Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

185 řádky
6.1 KiB

před 2 roky
  1. #!/usr/bin/env python
  2. # coding: utf8
  3. """
  4. This module provides an AudioAdapter implementation based on FFMPEG
  5. process. Such implementation is POSIXish and depends on nothing except
  6. standard Python libraries. Thus this implementation is the default one
  7. used within this library.
  8. """
  9. import datetime as dt
  10. import os
  11. import shutil
  12. from pathlib import Path
  13. from typing import Dict, Optional, Union
  14. # pyright: reportMissingImports=false
  15. # pylint: disable=import-error
  16. import ffmpeg
  17. import numpy as np
  18. from .. import SpleeterError
  19. from ..types import Signal
  20. from ..utils.logging import logger
  21. from . import Codec
  22. from .adapter import AudioAdapter
  23. # pylint: enable=import-error
  24. __email__ = "spleeter@deezer.com"
  25. __author__ = "Deezer Research"
  26. __license__ = "MIT License"
  27. class FFMPEGProcessAudioAdapter(AudioAdapter):
  28. """
  29. An AudioAdapter implementation that use FFMPEG binary through
  30. subprocess in order to perform I/O operation for audio processing.
  31. When created, FFMPEG binary path will be checked and expended,
  32. raising exception if not found. Such path could be infered using
  33. `FFMPEG_PATH` environment variable.
  34. """
  35. SUPPORTED_CODECS: Dict[Codec, str] = {
  36. Codec.M4A: "aac",
  37. Codec.OGG: "libvorbis",
  38. Codec.WMA: "wmav2",
  39. }
  40. """ FFMPEG codec name mapping. """
  41. def __init__(_) -> None:
  42. """
  43. Default constructor, ensure FFMPEG binaries are available.
  44. Raises:
  45. SpleeterError:
  46. If ffmpeg or ffprobe is not found.
  47. """
  48. for binary in ("ffmpeg", "ffprobe"):
  49. if shutil.which(binary) is None:
  50. raise SpleeterError("{} binary not found".format(binary))
  51. def load(
  52. _,
  53. path: Union[Path, str],
  54. offset: Optional[float] = None,
  55. duration: Optional[float] = None,
  56. sample_rate: Optional[float] = None,
  57. dtype: np.dtype = np.float32,
  58. ) -> Signal:
  59. """
  60. Loads the audio file denoted by the given path
  61. and returns it data as a waveform.
  62. Parameters:
  63. path (Union[Path, str]:
  64. Path of the audio file to load data from.
  65. offset (Optional[float]):
  66. Start offset to load from in seconds.
  67. duration (Optional[float]):
  68. Duration to load in seconds.
  69. sample_rate (Optional[float]):
  70. Sample rate to load audio with.
  71. dtype (numpy.dtype):
  72. (Optional) Numpy data type to use, default to `float32`.
  73. Returns:
  74. Signal:
  75. Loaded data a (waveform, sample_rate) tuple.
  76. Raises:
  77. SpleeterError:
  78. If any error occurs while loading audio.
  79. """
  80. if isinstance(path, Path):
  81. path = str(path)
  82. if not isinstance(path, str):
  83. path = path.decode()
  84. try:
  85. probe = ffmpeg.probe(path)
  86. except ffmpeg._run.Error as e:
  87. raise SpleeterError(
  88. "An error occurs with ffprobe (see ffprobe output below)\n\n{}".format(
  89. e.stderr.decode()
  90. )
  91. )
  92. if "streams" not in probe or len(probe["streams"]) == 0:
  93. raise SpleeterError("No stream was found with ffprobe")
  94. metadata = next(
  95. stream for stream in probe["streams"] if stream["codec_type"] == "audio"
  96. )
  97. n_channels = metadata["channels"]
  98. if sample_rate is None:
  99. sample_rate = metadata["sample_rate"]
  100. output_kwargs = {"format": "f32le", "ar": sample_rate}
  101. if duration is not None:
  102. output_kwargs["t"] = str(dt.timedelta(seconds=duration))
  103. if offset is not None:
  104. output_kwargs["ss"] = str(dt.timedelta(seconds=offset))
  105. process = (
  106. ffmpeg.input(path)
  107. .output("pipe:", **output_kwargs)
  108. .run_async(pipe_stdout=True, pipe_stderr=True)
  109. )
  110. buffer, _ = process.communicate()
  111. waveform = np.frombuffer(buffer, dtype="<f4").reshape(-1, n_channels)
  112. if not waveform.dtype == np.dtype(dtype):
  113. waveform = waveform.astype(dtype)
  114. return (waveform, sample_rate)
  115. def save(
  116. self,
  117. path: Union[Path, str],
  118. data: np.ndarray,
  119. sample_rate: float,
  120. codec: Codec = None,
  121. bitrate: str = None,
  122. ) -> None:
  123. """
  124. Write waveform data to the file denoted by the given path using
  125. FFMPEG process.
  126. Parameters:
  127. path (Union[Path, str]):
  128. Path like of the audio file to save data in.
  129. data (numpy.ndarray):
  130. Waveform data to write.
  131. sample_rate (float):
  132. Sample rate to write file in.
  133. codec ():
  134. (Optional) Writing codec to use, default to `None`.
  135. bitrate (str):
  136. (Optional) Bitrate of the written audio file, default to
  137. `None`.
  138. Raises:
  139. IOError:
  140. If any error occurs while using FFMPEG to write data.
  141. """
  142. if isinstance(path, Path):
  143. path = str(path)
  144. directory = os.path.dirname(path)
  145. if not os.path.exists(directory):
  146. raise SpleeterError(f"output directory does not exists: {directory}")
  147. logger.debug(f"Writing file {path}")
  148. input_kwargs = {"ar": sample_rate, "ac": data.shape[1]}
  149. output_kwargs = {"ar": sample_rate, "strict": "-2"}
  150. if bitrate:
  151. output_kwargs["audio_bitrate"] = bitrate
  152. if codec is not None and codec != "wav":
  153. output_kwargs["codec"] = self.SUPPORTED_CODECS.get(codec, codec)
  154. process = (
  155. ffmpeg.input("pipe:", format="f32le", **input_kwargs)
  156. .output(path, **output_kwargs)
  157. .overwrite_output()
  158. .run_async(pipe_stdin=True, pipe_stderr=True, quiet=True)
  159. )
  160. try:
  161. process.stdin.write(data.astype("<f4").tobytes())
  162. process.stdin.close()
  163. process.wait()
  164. except IOError:
  165. raise SpleeterError(f"FFMPEG error: {process.stderr.read()}")
  166. logger.info(f"File {path} written succesfully")