#!/usr/bin/env python # coding: utf8 """ AudioAdapter class defintion. """ from abc import ABC, abstractmethod from importlib import import_module from pathlib import Path from typing import Any, Dict, List, Optional, Union # pyright: reportMissingImports=false # pylint: disable=import-error import numpy as np import tensorflow as tf from spleeter.audio import Codec from .. import SpleeterError from ..types import AudioDescriptor, Signal from ..utils.logging import logger # pylint: enable=import-error __email__ = "spleeter@deezer.com" __author__ = "Deezer Research" __license__ = "MIT License" class AudioAdapter(ABC): """ An abstract class for manipulating audio signal. """ _DEFAULT: "AudioAdapter" = None """ Default audio adapter singleton instance. """ @abstractmethod def load( self, audio_descriptor: AudioDescriptor, offset: Optional[float] = None, duration: Optional[float] = None, sample_rate: Optional[float] = None, dtype: np.dtype = np.float32, ) -> Signal: """ Loads the audio file denoted by the given audio descriptor and returns it data as a waveform. Aims to be implemented by client. Parameters: audio_descriptor (AudioDescriptor): Describe song to load, in case of file based audio adapter, such descriptor would be a file path. offset (Optional[float]): Start offset to load from in seconds. duration (Optional[float]): Duration to load in seconds. sample_rate (Optional[float]): Sample rate to load audio with. dtype (numpy.dtype): (Optional) Numpy data type to use, default to `float32`. Returns: Signal: Loaded data as (wf, sample_rate) tuple. """ pass def load_tf_waveform( self, audio_descriptor, offset: float = 0.0, duration: float = 1800.0, sample_rate: int = 44100, dtype: bytes = b"float32", waveform_name: str = "waveform", ) -> Dict[str, Any]: """ Load the audio and convert it to a tensorflow waveform. Parameters: audio_descriptor (): Describe song to load, in case of file based audio adapter, such descriptor would be a file path. offset (float): Start offset to load from in seconds. duration (float): Duration to load in seconds. sample_rate (float): Sample rate to load audio with. dtype (bytes): (Optional)data type to use, default to `b'float32'`. waveform_name (str): (Optional) Name of the key in output dict, default to `'waveform'`. Returns: Dict[str, Any]: TF output dict with waveform as `(T x chan numpy array)` and a boolean that tells whether there were an error while trying to load the waveform. """ # Cast parameters to TF format. offset = tf.cast(offset, tf.float64) duration = tf.cast(duration, tf.float64) # Defined safe loading function. def safe_load(path, offset, duration, sample_rate, dtype): logger.info(f"Loading audio {path} from {offset} to {offset + duration}") try: (data, _) = self.load( path.numpy(), offset.numpy(), duration.numpy(), sample_rate.numpy(), dtype=dtype.numpy(), ) logger.info("Audio data loaded successfully") return (data, False) except Exception as e: logger.exception("An error occurs while loading audio", exc_info=e) return (np.float32(-1.0), True) # Execute function and format results. results = ( tf.py_function( safe_load, [audio_descriptor, offset, duration, sample_rate, dtype], (tf.float32, tf.bool), ), ) waveform, error = results[0] return {waveform_name: waveform, f"{waveform_name}_error": error} @abstractmethod def save( self, path: Union[Path, str], data: np.ndarray, sample_rate: float, codec: Codec = None, bitrate: str = None, ) -> None: """ Save the given audio data to the file denoted by the given path. Parameters: path (Union[Path, str]): Path like of the audio file to save data in. data (numpy.ndarray): Waveform data to write. sample_rate (float): Sample rate to write file in. codec (): (Optional) Writing codec to use, default to `None`. bitrate (str): (Optional) Bitrate of the written audio file, default to `None`. """ pass @classmethod def default(cls: type) -> "AudioAdapter": """ Builds and returns a default audio adapter instance. Returns: AudioAdapter: Default adapter instance to use. """ if cls._DEFAULT is None: from .ffmpeg import FFMPEGProcessAudioAdapter cls._DEFAULT = FFMPEGProcessAudioAdapter() return cls._DEFAULT @classmethod def get(cls: type, descriptor: str) -> "AudioAdapter": """ Load dynamically an AudioAdapter from given class descriptor. Parameters: descriptor (str): Adapter class descriptor (module.Class) Returns: AudioAdapter: Created adapter instance. """ if not descriptor: return cls.default() module_path: List[str] = descriptor.split(".") adapter_class_name: str = module_path[-1] module_path: str = ".".join(module_path[:-1]) adapter_module = import_module(module_path) adapter_class = getattr(adapter_module, adapter_class_name) if not issubclass(adapter_class, AudioAdapter): raise SpleeterError( f"{adapter_class_name} is not a valid AudioAdapter class" ) return adapter_class()