|
|
- #!/usr/bin/env python
- # coding: utf8
-
- """
- This module provides an AudioAdapter implementation based on FFMPEG
- process. Such implementation is POSIXish and depends on nothing except
- standard Python libraries. Thus this implementation is the default one
- used within this library.
- """
-
- import datetime as dt
- import os
- import shutil
- from pathlib import Path
- from typing import Dict, Optional, Union
-
- # pyright: reportMissingImports=false
- # pylint: disable=import-error
- import ffmpeg
- import numpy as np
-
- from .. import SpleeterError
- from ..types import Signal
- from ..utils.logging import logger
- from . import Codec
- from .adapter import AudioAdapter
-
- # pylint: enable=import-error
-
- __email__ = "spleeter@deezer.com"
- __author__ = "Deezer Research"
- __license__ = "MIT License"
-
-
- class FFMPEGProcessAudioAdapter(AudioAdapter):
- """
- An AudioAdapter implementation that use FFMPEG binary through
- subprocess in order to perform I/O operation for audio processing.
-
- When created, FFMPEG binary path will be checked and expended,
- raising exception if not found. Such path could be infered using
- `FFMPEG_PATH` environment variable.
- """
-
- SUPPORTED_CODECS: Dict[Codec, str] = {
- Codec.M4A: "aac",
- Codec.OGG: "libvorbis",
- Codec.WMA: "wmav2",
- }
- """ FFMPEG codec name mapping. """
-
- def __init__(_) -> None:
- """
- Default constructor, ensure FFMPEG binaries are available.
-
- Raises:
- SpleeterError:
- If ffmpeg or ffprobe is not found.
- """
- for binary in ("ffmpeg", "ffprobe"):
- if shutil.which(binary) is None:
- raise SpleeterError("{} binary not found".format(binary))
-
- def load(
- _,
- path: Union[Path, str],
- offset: Optional[float] = None,
- duration: Optional[float] = None,
- sample_rate: Optional[float] = None,
- dtype: np.dtype = np.float32,
- ) -> Signal:
- """
- Loads the audio file denoted by the given path
- and returns it data as a waveform.
-
- Parameters:
- path (Union[Path, str]:
- Path of the audio file to load data from.
- offset (Optional[float]):
- Start offset to load from in seconds.
- duration (Optional[float]):
- Duration to load in seconds.
- sample_rate (Optional[float]):
- Sample rate to load audio with.
- dtype (numpy.dtype):
- (Optional) Numpy data type to use, default to `float32`.
-
- Returns:
- Signal:
- Loaded data a (waveform, sample_rate) tuple.
-
- Raises:
- SpleeterError:
- If any error occurs while loading audio.
- """
- if isinstance(path, Path):
- path = str(path)
- if not isinstance(path, str):
- path = path.decode()
- try:
- probe = ffmpeg.probe(path)
- except ffmpeg._run.Error as e:
- raise SpleeterError(
- "An error occurs with ffprobe (see ffprobe output below)\n\n{}".format(
- e.stderr.decode()
- )
- )
- if "streams" not in probe or len(probe["streams"]) == 0:
- raise SpleeterError("No stream was found with ffprobe")
- metadata = next(
- stream for stream in probe["streams"] if stream["codec_type"] == "audio"
- )
- n_channels = metadata["channels"]
- if sample_rate is None:
- sample_rate = metadata["sample_rate"]
- output_kwargs = {"format": "f32le", "ar": sample_rate}
- if duration is not None:
- output_kwargs["t"] = str(dt.timedelta(seconds=duration))
- if offset is not None:
- output_kwargs["ss"] = str(dt.timedelta(seconds=offset))
- process = (
- ffmpeg.input(path)
- .output("pipe:", **output_kwargs)
- .run_async(pipe_stdout=True, pipe_stderr=True)
- )
- buffer, _ = process.communicate()
- waveform = np.frombuffer(buffer, dtype="<f4").reshape(-1, n_channels)
- if not waveform.dtype == np.dtype(dtype):
- waveform = waveform.astype(dtype)
- return (waveform, sample_rate)
-
- def save(
- self,
- path: Union[Path, str],
- data: np.ndarray,
- sample_rate: float,
- codec: Codec = None,
- bitrate: str = None,
- ) -> None:
- """
- Write waveform data to the file denoted by the given path using
- FFMPEG process.
-
- Parameters:
- path (Union[Path, str]):
- Path like of the audio file to save data in.
- data (numpy.ndarray):
- Waveform data to write.
- sample_rate (float):
- Sample rate to write file in.
- codec ():
- (Optional) Writing codec to use, default to `None`.
- bitrate (str):
- (Optional) Bitrate of the written audio file, default to
- `None`.
-
- Raises:
- IOError:
- If any error occurs while using FFMPEG to write data.
- """
- if isinstance(path, Path):
- path = str(path)
- directory = os.path.dirname(path)
- if not os.path.exists(directory):
- raise SpleeterError(f"output directory does not exists: {directory}")
- logger.debug(f"Writing file {path}")
- input_kwargs = {"ar": sample_rate, "ac": data.shape[1]}
- output_kwargs = {"ar": sample_rate, "strict": "-2"}
- if bitrate:
- output_kwargs["audio_bitrate"] = bitrate
- if codec is not None and codec != "wav":
- output_kwargs["codec"] = self.SUPPORTED_CODECS.get(codec, codec)
- process = (
- ffmpeg.input("pipe:", format="f32le", **input_kwargs)
- .output(path, **output_kwargs)
- .overwrite_output()
- .run_async(pipe_stdin=True, pipe_stderr=True, quiet=True)
- )
- try:
- process.stdin.write(data.astype("<f4").tobytes())
- process.stdin.close()
- process.wait()
- except IOError:
- raise SpleeterError(f"FFMPEG error: {process.stderr.read()}")
- logger.info(f"File {path} written succesfully")
|