@ -0,0 +1,3 @@ | |||
# Default ignored files | |||
/shelf/ | |||
/workspace.xml |
@ -0,0 +1,10 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<module type="PYTHON_MODULE" version="4"> | |||
<component name="NewModuleRootManager"> | |||
<content url="file://$MODULE_DIR$"> | |||
<excludeFolder url="file://$MODULE_DIR$/venv" /> | |||
</content> | |||
<orderEntry type="jdk" jdkName="Python 3.8 (base)" jdkType="Python SDK" /> | |||
<orderEntry type="sourceFolder" forTests="false" /> | |||
</component> | |||
</module> |
@ -0,0 +1,24 @@ | |||
<component name="InspectionProjectProfileManager"> | |||
<profile version="1.0"> | |||
<option name="myName" value="Project Default" /> | |||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> | |||
<option name="ignoredPackages"> | |||
<value> | |||
<list size="11"> | |||
<item index="0" class="java.lang.String" itemvalue="scikit-image" /> | |||
<item index="1" class="java.lang.String" itemvalue="scipy" /> | |||
<item index="2" class="java.lang.String" itemvalue="python" /> | |||
<item index="3" class="java.lang.String" itemvalue="natsort" /> | |||
<item index="4" class="java.lang.String" itemvalue="tensorboardx" /> | |||
<item index="5" class="java.lang.String" itemvalue="pillow" /> | |||
<item index="6" class="java.lang.String" itemvalue="sklearn" /> | |||
<item index="7" class="java.lang.String" itemvalue="torch" /> | |||
<item index="8" class="java.lang.String" itemvalue="numpy" /> | |||
<item index="9" class="java.lang.String" itemvalue="torchvision" /> | |||
<item index="10" class="java.lang.String" itemvalue="torchsummary" /> | |||
</list> | |||
</value> | |||
</option> | |||
</inspection_tool> | |||
</profile> | |||
</component> |
@ -0,0 +1,6 @@ | |||
<component name="InspectionProjectProfileManager"> | |||
<settings> | |||
<option name="USE_PROJECT_PROFILE" value="false" /> | |||
<version value="1.0" /> | |||
</settings> | |||
</component> |
@ -0,0 +1,4 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" /> | |||
</project> |
@ -0,0 +1,8 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="ProjectModuleManager"> | |||
<modules> | |||
<module fileurl="file://$PROJECT_DIR$/.idea/Wave-U-Net.iml" filepath="$PROJECT_DIR$/.idea/Wave-U-Net.iml" /> | |||
</modules> | |||
</component> | |||
</project> |
@ -0,0 +1,6 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="VcsDirectoryMappings"> | |||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> | |||
</component> | |||
</project> |
@ -0,0 +1,16 @@ | |||
# This is a sample Python script. | |||
# Press Shift+F10 to execute it or replace it with your code. | |||
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. | |||
def print_hi(name): | |||
# Use a breakpoint in the code line below to debug your script. | |||
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. | |||
# Press the green button in the gutter to run the script. | |||
if __name__ == '__main__': | |||
print_hi('PyCharm') | |||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |
@ -0,0 +1,28 @@ | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
class ConvLayer(nn.Module): | |||
def __init__(self, input, output, kernel_size, stride, transpose=False): | |||
super(ConvLayer,self).__init__() | |||
self.input = input | |||
self.output = output | |||
self.kernel_size = kernel_size | |||
self.stride = stride | |||
self.transpose = transpose | |||
if self.transpose: | |||
self.conv = nn.ConvTranspose1d(input, output, kernel_size=self.kernel_size, | |||
stride=self.stride,padding=self.kernel_size - 1) | |||
else: | |||
self.conv = nn.Conv1d(input, output, kernel_size=self.kernel_size, stride=self.stride) | |||
self.norm = nn.GroupNorm(output // 8, output) | |||
def forward(self,x): | |||
x = self.conv(x) | |||
x = self.norm(x) | |||
output = nn.ReLU(x) | |||
return output | |||
@ -0,0 +1,15 @@ | |||
import torch | |||
import torch.nn as nn | |||
from model.Conv import ConvLayer | |||
from model.Resample import Resample | |||
class DownsamplingBlock(nn.Module): | |||
def __init__(self, stride, kernel_size, padding): | |||
super(DownsamplingBlock, self).__init__() | |||
self.stride = stride | |||
self.kernel_size = kernel_size | |||
def forward(self,x): | |||
return out |
@ -0,0 +1,17 @@ | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
class Resample(nn.Module): | |||
def __init__(self, channels, kernel_size, stride, padding): | |||
super(Resample,self).__init__() | |||
self.channels = channels | |||
self.kernel_size = kernel_size | |||
self.stride = stride | |||
self.padding = padding | |||
def forward(self, x): | |||
out = x | |||
return out |
@ -0,0 +1,17 @@ | |||
import torch | |||
import torch.nn as nn | |||
from model.Conv import ConvLayer | |||
from model.Resample import Resample | |||
class UpsamplingBlock(nn.Module): | |||
def __init__(self, stride, kernel_size, padding): | |||
super(Upsampling,self).__init__() | |||
self.stride = stride | |||
self.kernel_size = kernel_size | |||
self.padding = padding | |||
def forward(self, x): | |||
out = x | |||
return out |
@ -0,0 +1,41 @@ | |||
import numpy as np | |||
import torch.nn as nn | |||
import torch | |||
# k = 3 | |||
# | |||
# dconv1 = nn.Conv1d(1, 1, kernel_size=k, stride=1, padding=0, bias=False) | |||
# | |||
# dconv1.weight.data = torch.ones(1, 1, k) | |||
# | |||
# x = torch.ones(1, 1, 4) | |||
# | |||
# # print('=====dconv1=====') | |||
# # | |||
# # for name, l in dconv1.named_parameters(): | |||
# # print('{}={}'.format(name, l.data)) | |||
# | |||
# x3 = dconv1(x) | |||
# | |||
# class MyModule(nn.Module): | |||
# def __init__(self): | |||
# super(MyModule, self).__init__() | |||
# self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)]) | |||
# | |||
# def forward(self, x): | |||
# # ModuleList can act as an iterable, or be indexed using ints | |||
# for i, l in enumerate(self.linears): | |||
# x = self.linears[i // 2](x) + l(x) | |||
# return x | |||
# | |||
# x = np.random.randint(2, size=(1,2,3,4)) | |||
# | |||
# z = np.random.randint(2, size=(2,2,2)) | |||
# y = x[0,:,1:3,2:4] | |||
# print(x) | |||
# print(y.shape) | |||
# print(y.dot(z).shape) | |||
from py2neo import Graph,Node,Relationship | |||
# 连接neo4j数据库,输入地址、用户名、密码 | |||
graph = Graph('http://localhost:7474',auth=("neo4j", "test")) |
@ -0,0 +1,29 @@ | |||
import torch | |||
import torch.nn as nn | |||
from model.Conv import ConvLayer | |||
from model.Resample import Resample | |||
class DownsamplingBlock(nn.Module): | |||
def __init__(self, stride, kernel_size): | |||
super(DownsamplingBlock, self).__init__() | |||
self.stride = stride | |||
self.kernel_size = kernel_size | |||
def forward(self,x): | |||
out = x | |||
return out | |||
class UpsamplingBlock(nn.Module): | |||
def __init__(self, stride, kernel_size, padding): | |||
super(Upsampling,self).__init__() | |||
self.stride = stride | |||
self.kernel_size = kernel_size | |||
self.padding = padding | |||
def forward(self, x): | |||
out = x | |||
return out |
@ -0,0 +1,74 @@ | |||
import torch | |||
import torchaudio | |||
import matplotlib as plt | |||
import musdb | |||
import os | |||
import numpy as np | |||
import glob | |||
import librosa | |||
import soundfile | |||
def load(path, sr=22050, mono=True, mode="numpy", offset=0.0, duration=None): | |||
y, curr_sr = librosa.load(path, sr=sr, mono=mono, res_type='kaiser_fast', offset=offset, duration=duration) | |||
if len(y.shape) == 1: | |||
# Expand channel dimension | |||
y = y[np.newaxis, :] | |||
if mode == "pytorch": | |||
y = torch.tensor(y) | |||
return y, curr_sr | |||
def write_wav(path, audio, sr): | |||
soundfile.write(path, audio.T, sr, "PCM_16") | |||
def get_musdbhq(database_path): | |||
''' | |||
Retrieve audio file paths for MUSDB HQ dataset | |||
:param database_path: MUSDB HQ root directory | |||
:return: dictionary with train and test keys, each containing list of samples, each sample containing all audio paths | |||
''' | |||
subsets = list() | |||
for subset in ["train", "test"]: | |||
print("Loading " + subset + " set...") | |||
tracks = glob.glob(os.path.join(database_path, subset, "*")) | |||
samples = list() | |||
# Go through tracks | |||
for track_folder in sorted(tracks): | |||
# Skip track if mixture is already written, assuming this track is done already | |||
example = dict() | |||
for stem in ["mix", "bass", "drums", "other", "vocals"]: | |||
filename = stem if stem != "mix" else "mixture" | |||
audio_path = os.path.join(track_folder, filename + ".wav") | |||
example[stem] = audio_path | |||
# Add other instruments to form accompaniment | |||
acc_path = os.path.join(track_folder, "accompaniment.wav") | |||
if not os.path.exists(acc_path): | |||
print("Writing accompaniment to " + track_folder) | |||
stem_audio = [] | |||
for stem in ["bass", "drums", "other"]: | |||
audio, sr = load(example[stem], sr=None, mono=False) | |||
stem_audio.append(audio) | |||
acc_audio = np.clip(sum(stem_audio), -1.0, 1.0) | |||
write_wav(acc_path, acc_audio, sr) | |||
example["accompaniment"] = acc_path | |||
samples.append(example) | |||
subsets.append(samples) | |||
return subsets | |||
path = "C:/Users/IAN/Desktop/Wave-U-Net/musdb18-hq/" | |||
res = get_musdbhq(path) | |||
print(res) |
@ -0,0 +1 @@ | |||
* @deezer @Faylixe @romi1502 @mmoussallam @alreadytaikeune |
@ -0,0 +1,41 @@ | |||
# How-to contribute | |||
Those are the main contributing guidelines for contributing to this project: | |||
- Verify that your contribution does not embark proprietary code or infringe any copyright of any sort. | |||
- Avoid adding any unnecessary dependencies to the project, espcially of those are not easily packaged and installed through `conda` or `pip`. | |||
- Python contributions must follow the [PEP 8 style guide](https://www.python.org/dev/peps/pep-0008/). | |||
- Use [Pull Request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) mechanism and please be patient while waiting for reviews. | |||
- Remain polite and civil in all exchanges with the maintainers and other contributors. | |||
- Any issue submitted which does not respect provided template, or lack of information, will be considered as invalid and automatically closed. | |||
## Get started | |||
This project is managed using [Poetry](https://python-poetry.org/docs/basic-usage/), | |||
in order to contribute, the safest is to create your | |||
[own fork of spleeter](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) first and then setup your development environment: | |||
```bash | |||
# Clone spleeter repository fork | |||
git clone https://github.com/<your_name>/spleeter && cd spleeter | |||
# Install poetry | |||
pip install poetry | |||
# Install spleeter dependencies | |||
poetry install | |||
# Run unit test suite | |||
poetry run pytest tests/ | |||
``` | |||
You can then make your changes and experiment freely. Once you're done, remember to check that the tests still run. If you've added a new feature, add tests! | |||
Then finally, you're more than welcome to create a [Pull Request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) in **Spleeter** main repo. We will look at it as soon as possible and eventually integrate your changes in the project. | |||
## PR requirements | |||
Following command should be ran successfully before to consider a PR for merging: | |||
```bash | |||
poetry run pytest tests/ | |||
poetry run black spleeter | |||
poetry run isort spleeter | |||
``` |
@ -0,0 +1,44 @@ | |||
--- | |||
name: Bug | |||
about: Report a bug | |||
title: "[Bug] name your bug" | |||
labels: bug, invalid | |||
--- | |||
- [ ] I didn't find a similar issue already open. | |||
- [ ] I read the documentation (README AND Wiki) | |||
- [ ] I have installed FFMpeg | |||
- [ ] My problem is related to Spleeter only, not a derivative product (such as Webapplication, or GUI provided by others) | |||
## Description | |||
<!-- Give us a clear and concise description of the bug you are reporting. --> | |||
## Step to reproduce | |||
<!-- Indicates clearly steps to reproduce the behavior: --> | |||
1. Installed using `...` | |||
2. Run as `...` | |||
3. Got `...` error | |||
## Output | |||
```bash | |||
Share what your terminal says when you run the script (as well as what you would expect). | |||
``` | |||
## Environment | |||
<!-- Fill the following table --> | |||
| | | | |||
| ----------------- | ------------------------------- | | |||
| OS | Windows / Linux / MacOS / other | | |||
| Installation type | Conda / pip / other | | |||
| RAM available | XGo | | |||
| Hardware spec | GPU / CPU / etc ... | | |||
## Additional context | |||
<!-- Add any other context about the problem here, references, cites, etc.. --> |
@ -0,0 +1 @@ | |||
blank_issues_enabled: false |
@ -0,0 +1,8 @@ | |||
--- | |||
name: Discussion | |||
about: Ideas sharing or theorical question solving | |||
labels: question | |||
title: "[Discussion] your question" | |||
--- | |||
<!-- Please respect the title [Discussion] tag. --> |
@ -0,0 +1,14 @@ | |||
--- | |||
name: Feature request | |||
about: Submit idea for new feature | |||
labels: feature, enhancement | |||
title: "[Feature] your feature name" | |||
--- | |||
## Description | |||
<!-- Describe your feature request here. --> | |||
## Additional information | |||
<!-- Add any additional description --> |
@ -0,0 +1,21 @@ | |||
# Pull request title | |||
- [ ] I read [contributing guideline](https://github.com/deezer/spleeter/blob/master/.github/CONTRIBUTING.md) | |||
- [ ] I didn't find a similar pull request already open. | |||
- [ ] My PR is related to Spleeter only, not a derivative product (such as Webapplication, or GUI provided by others) | |||
## Description | |||
A few sentences describing the overall goals of the pull request's commits. | |||
## How this patch was tested | |||
You tested it, right? | |||
- [ ] I implemented unit test whicn ran successfully using `poetry run pytest tests/` | |||
- [ ] Code has been formatted using `poetry run black spleeter` | |||
- [ ] Imports has been formatted using `poetry run isort spleeter`` | |||
## Documentation link and external references | |||
Please provide any info that may help us better understand your code. |
@ -0,0 +1,58 @@ | |||
name: conda | |||
on: | |||
- workflow_dispatch | |||
jobs: | |||
build-linux: | |||
strategy: | |||
matrix: | |||
python: [3.7, 3.8] | |||
runs-on: ubuntu-latest | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- name: Set up Python ${{ matrix.python }} | |||
uses: actions/setup-python@v2 | |||
with: | |||
python-version: ${{ matrix.python }} | |||
- name: Install dependencies | |||
run: | | |||
$CONDA/bin/conda install conda-build | |||
$CONDA/bin/conda install anaconda-client | |||
- name: Build package | |||
run: | | |||
$CONDA/bin/conda config --add channels anaconda | |||
$CONDA/bin/conda config --add channels conda-forge | |||
$CONDA/bin/conda build --python ${{ matrix.python }} conda/spleeter | |||
- name: Push package | |||
run: | | |||
$CONDA/bin/anaconda login --username ${{ secrets.ANACONDA_USERNAME }} --password ${{ secrets.ANACONDA_PASSWORD }} | |||
for package in /usr/share/miniconda/conda-bld/linux-64/spleeter*.bz2; do | |||
$CONDA/bin/anaconda upload $package | |||
done | |||
build-windows: | |||
strategy: | |||
matrix: | |||
python: [3.7] | |||
runs-on: windows-latest | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- name: Set up Python ${{ matrix.python }} | |||
uses: actions/setup-python@v2 | |||
with: | |||
python-version: ${{ matrix.python }} | |||
- name: Install dependencies | |||
run: | | |||
C:\Miniconda\condabin\conda.bat init powershell | |||
C:\Miniconda\condabin\conda.bat install conda-build | |||
C:\Miniconda\condabin\conda.bat install anaconda-client | |||
- name: Build package | |||
run: | | |||
C:\Miniconda\condabin\conda.bat config --add channels anaconda | |||
C:\Miniconda\condabin\conda.bat config --add channels conda-forge | |||
C:\Miniconda\condabin\conda.bat build --python ${{ matrix.python }} conda\spleeter | |||
- name: Push package | |||
run: | | |||
anaconda login --username ${{ secrets.ANACONDA_USERNAME }} --password ${{ secrets.ANACONDA_PASSWORD }} | |||
$packages = Get-ChildItem "C:\Miniconda\conda-bld\win-64\" | |||
foreach ($package in $packages){ | |||
anaconda upload $package.FullName | |||
} |
@ -0,0 +1,124 @@ | |||
name: docker | |||
on: | |||
workflow_dispatch: | |||
inputs: | |||
version: | |||
description: "Spleeter version to build image for" | |||
required: true | |||
default: "2.1.2" | |||
jobs: | |||
cuda-base: | |||
runs-on: ubuntu-latest | |||
strategy: | |||
matrix: | |||
distribution: [3.6, 3.7, 3.8] | |||
fail-fast: true | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- name: Build CUDA base image | |||
run: | | |||
docker build \ | |||
--build-arg BASE=python:${{ matrix.distribution }} \ | |||
-t deezer/python-cuda-10-1:${{ matrix.distribution }} \ | |||
-f docker/cuda-10-1.dockerfile . | |||
- name: Docker login | |||
run: echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin | |||
- name: Push deezer/python-cuda-10-1:${{ matrix.distribution }} image | |||
run: docker push deezer/python-cuda-10-1:${{ matrix.distribution }} | |||
pip-images: | |||
needs: cuda-base | |||
runs-on: ubuntu-latest | |||
strategy: | |||
matrix: | |||
platform: [cpu, gpu] | |||
distribution: [3.6, 3.7, 3.8] | |||
fail-fast: true | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- if: ${{ matrix.platform == 'cpu' }} | |||
run: | | |||
echo "base=python:${{ matrix.distribution }}" >> $GITHUB_ENV | |||
echo "image=spleeter" >> $GITHUB_ENV | |||
- if: ${{ matrix.platform == 'gpu' }} | |||
run: | | |||
echo "base=deezer/python-cuda-10-1:${{ matrix.distribution }}" >> $GITHUB_ENV | |||
echo "image=spleeter-gpu" >> $GITHUB_ENV | |||
- name: Build deezer/${{ env.image }}:${{ matrix.distribution }} image | |||
run: | | |||
docker build \ | |||
--build-arg BASE=${{ env.base }} \ | |||
--build-arg SPLEETER_VERSION=${{ github.event.inputs.version }} \ | |||
-t deezer/${{ env.image }}:${{ matrix.distribution }} \ | |||
-f docker/spleeter.dockerfile . | |||
- name: Test deezer/${{ env.image }}:${{ matrix.distribution }} image | |||
run: | | |||
docker run \ | |||
-v $(pwd):/runtime \ | |||
deezer/${{ env.image }}:${{ matrix.distribution }} \ | |||
separate -o /tmp /runtime/audio_example.mp3 | |||
- name: Docker login | |||
run: echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin | |||
- name: Push deezer/${{ env.image }}:${{ matrix.distribution }} image | |||
run: docker push deezer/${{ env.image }}:${{ matrix.distribution }} | |||
conda-images: | |||
needs: cuda-base | |||
runs-on: ubuntu-latest | |||
strategy: | |||
matrix: | |||
platform: [cpu, gpu] | |||
fail-fast: true | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- if: ${{ matrix.platform == 'cpu' }} | |||
name: Build Conda base image | |||
run: | | |||
docker build -t conda:cpu -f docker/conda.dockerfile . | |||
echo "image=spleeter" >> $GITHUB_ENV | |||
- if: ${{ matrix.platform == 'gpu' }} | |||
name: Build Conda base image | |||
run: | | |||
docker build --build-arg BASE=deezer/python-cuda-10-1:3.8 -t conda:gpu -f docker/conda.dockerfile . | |||
echo "image=spleeter-gpu" >> $GITHUB_ENV | |||
- name: Build deezer/${{ env.image }}:${{ env.tag }} image | |||
run: | | |||
docker build \ | |||
--build-arg BASE=conda:${{ matrix.platform }} \ | |||
--build-arg SPLEETER_VERSION=${{ github.event.inputs.version }} \ | |||
-t deezer/${{ env.image }}:conda \ | |||
-f docker/spleeter-conda.dockerfile . | |||
- name: Docker login | |||
run: echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin | |||
- name: Push deezer/${{ env.image }}:conda image | |||
run: docker push deezer/${{ env.image }}:conda | |||
images-with-model: | |||
needs: [pip-images, conda-images] | |||
runs-on: ubuntu-latest | |||
strategy: | |||
matrix: | |||
platform: [cpu, gpu] | |||
distribution: [3.6, 3.7, 3.8] | |||
model: [2stems, 4stems, 5stems] | |||
fail-fast: true | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- if: ${{ matrix.platform == 'cpu' }} | |||
run: echo "image=spleeter" >> $GITHUB_ENV | |||
- if: ${{ matrix.platform == 'gpu' }} | |||
run: echo "image=spleeter-gpu" >> $GITHUB_ENV | |||
- name: Build deezer/${{ env.image }}:${{ matrix.distribution }}-${{ matrix.model }} image | |||
run: | | |||
docker build \ | |||
--build-arg BASE=deezer/${{ env.image }}:${{ matrix.distribution }} \ | |||
--build-arg MODEL=${{ matrix.model }} \ | |||
-t deezer/${{ env.image }}:${{ matrix.distribution }}-${{ matrix.model }} \ | |||
-f docker/spleeter-model.dockerfile . | |||
- name: Test deezer/${{ env.image }}:${{ matrix.distribution }}-${{ matrix.model }} image | |||
run: | | |||
docker run \ | |||
-v $(pwd):/runtime \ | |||
deezer/${{ env.image }}:${{ matrix.distribution }} \ | |||
separate -o /tmp -p spleeter:${{ matrix.model }} /runtime/audio_example.mp3 | |||
- name: Docker login | |||
run: echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin | |||
- name: Push deezer/${{ env.image }}:${{ matrix.distribution }}-${{ matrix.model }} image | |||
run: docker push deezer/${{ env.image }}:${{ matrix.distribution }}-${{ matrix.model }} |
@ -0,0 +1,23 @@ | |||
name: pypi | |||
on: | |||
- workflow_dispatch | |||
env: | |||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} | |||
jobs: | |||
package-and-deploy: | |||
runs-on: ubuntu-latest | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- uses: actions/setup-python@v2 | |||
with: | |||
python-version: 3.7 | |||
- name: Install Poetry | |||
run: | | |||
pip install poetry | |||
poetry config virtualenvs.in-project false | |||
poetry config virtualenvs.path ~/.virtualenvs | |||
poetry config pypi-token.pypi $PYPI_TOKEN | |||
- name: Deploy to pypi | |||
run: | | |||
poetry build | |||
poetry publish |
@ -0,0 +1,51 @@ | |||
name: test | |||
on: | |||
pull_request: | |||
branches: | |||
- master | |||
jobs: | |||
tests: | |||
runs-on: ubuntu-latest | |||
strategy: | |||
matrix: | |||
python-version: [3.6, 3.7, 3.8, 3.9] | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- name: Set up Python ${{ matrix.python-version }} | |||
uses: actions/setup-python@v2 | |||
with: | |||
python-version: ${{ matrix.python-version }} | |||
- uses: actions/cache@v2 | |||
env: | |||
model-release: 1 | |||
id: spleeter-model-cache | |||
with: | |||
path: ${{ env.GITHUB_WORKSPACE }}/pretrained_models | |||
key: models-${{ env.model-release }} | |||
restore-keys: | | |||
models-${{ env.model-release }} | |||
- name: Install ffmpeg | |||
run: | | |||
sudo apt-get update && sudo apt-get install -y ffmpeg | |||
- name: Install Poetry | |||
run: | | |||
pip install poetry | |||
poetry config virtualenvs.in-project false | |||
poetry config virtualenvs.path ~/.virtualenvs | |||
- name: Cache Poetry virtualenv | |||
uses: actions/cache@v1 | |||
id: cache | |||
with: | |||
path: ~/.virtualenvs | |||
key: poetry-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} | |||
restore-keys: | | |||
poetry-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} | |||
- name: Install Dependencies | |||
run: poetry install | |||
if: steps.cache.outputs.cache-hit != 'true' | |||
- name: Code quality checks | |||
run: | | |||
poetry run black spleeter --check | |||
poetry run isort spleeter --check | |||
- name: Test with pytest | |||
run: poetry run pytest tests/ |
@ -0,0 +1,114 @@ | |||
# Byte-compiled / optimized / DLL files | |||
__pycache__/ | |||
*.py[cod] | |||
*$py.class | |||
# C extensions | |||
*.so | |||
# Distribution / packaging | |||
.Python | |||
build/ | |||
develop-eggs/ | |||
dist/ | |||
downloads/ | |||
eggs/ | |||
.eggs/ | |||
lib/ | |||
lib64/ | |||
parts/ | |||
sdist/ | |||
var/ | |||
wheels/ | |||
*.egg-info/ | |||
.installed.cfg | |||
*.egg | |||
MANIFEST | |||
# PyInstaller | |||
# Usually these files are written by a python script from a template | |||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
*.manifest | |||
*.spec | |||
# Installer logs | |||
pip-log.txt | |||
pip-delete-this-directory.txt | |||
# Unit test / coverage reports | |||
htmlcov/ | |||
.tox/ | |||
.coverage | |||
.coverage.* | |||
.cache | |||
nosetests.xml | |||
coverage.xml | |||
*.cover | |||
.hypothesis/ | |||
.pytest_cache/ | |||
# Translations | |||
*.mo | |||
*.pot | |||
# Django stuff: | |||
*.log | |||
local_settings.py | |||
db.sqlite3 | |||
# Flask stuff: | |||
instance/ | |||
.webassets-cache | |||
# Scrapy stuff: | |||
.scrapy | |||
# Sphinx documentation | |||
docs/_build/ | |||
# PyBuilder | |||
target/ | |||
# Jupyter Notebook | |||
.ipynb_checkpoints | |||
# pyenv | |||
.python-version | |||
# celery beat schedule file | |||
celerybeat-schedule | |||
# SageMath parsed files | |||
*.sage.py | |||
# Environments | |||
.env | |||
.venv | |||
env/ | |||
venv/ | |||
ENV/ | |||
env.bak/ | |||
venv.bak/ | |||
# Spyder project settings | |||
.spyderproject | |||
.spyproject | |||
# Rope project settings | |||
.ropeproject | |||
# mkdocs documentation | |||
/site | |||
# mypy | |||
.mypy_cache/ | |||
.vscode | |||
.DS_Store | |||
__pycache__ | |||
**/reporting | |||
pretrained_models | |||
docs/build | |||
.vscode | |||
spleeter-feedstock/ | |||
*FAKE_MUSDB_DIR |
@ -0,0 +1,99 @@ | |||
# Changelog History | |||
## 2.3.0 | |||
Updating dependencies to enable TensorFlow 2.5 support (and Python 3.9 overall) | |||
Removing the destructor from the `Separator` class | |||
## 2.2.0 | |||
Minor changes mainly fixing some issues: | |||
* mono training was not working due to hardcoded filters in the dataset | |||
* default argument of `separate` was of wrong type | |||
* added a way to request spleeter version with the `--version` argument in the CLI | |||
## 2.1.0 | |||
This version introduce design related changes, especially transition to Typer for CLI managment and Poetry as | |||
library build backend. | |||
* `-i` option is now deprecated and replaced by traditional CLI input argument listing | |||
* Project is now built using Poetry | |||
* Project requires code formatting using Black and iSort | |||
* Dedicated GPU package `spleeter-gpu` is not supported anymore, `spleeter` package will support both CPU and GPU hardware | |||
### API changes: | |||
* function `get_default_audio_adapter` is now available as `default()` class method within `AudioAdapter` class | |||
* function `get_default_model_provider` is now available as `default()` class method within `ModelProvider` class | |||
* `STFTBackend` and `Codec` are now string enum | |||
* `GithubModelProvider` now use `httpx` with HTTP/2 support | |||
* Commands are now located in `__main__` module, wrapped as simple function using Typer options module provide specification for each available option and argument | |||
* `types` module provide custom type specification and must be enhanced in future release to provide more robust typing support with MyPy | |||
* `utils.logging` module has been cleaned, logger instance is now a module singleton, and a single function is used to configure it with verbose parameter | |||
* Added a custom logger handler (see tiangolo/typer#203 discussion) | |||
## 2.0 | |||
First release, October 9th 2020 | |||
Tensorflow-2 compatible version, allowing uses in python 3.8. | |||
## 1.5.4 | |||
First release, July 24th 2020 | |||
Add some padding of the input waveform to avoid separation artefacts on the edges due to unstabilities in the inverse fourier transforms. | |||
Also add tests to ensure both librosa and tensorflow backends have same outputs. | |||
## 1.5.2 | |||
First released, May 15th 2020 | |||
### Major changes | |||
* PR #375 merged to avoid mutliple tf.graph instantiation failures | |||
### Minor changes | |||
* PR #362 use tf.abs instead of numpy | |||
* PR #352 tempdir cleaning | |||
## 1.5.1 | |||
First released, April 15th 2020 | |||
### Major changes | |||
* Bugfixes on the LibRosa STFT backend | |||
### Minor changes | |||
* Typos, and small bugfixes | |||
## 1.5.0 | |||
First released, March 20th 2020 | |||
### Major changes | |||
* Implement a new STFT backend using LibRosa, faster on CPU than TF implementation | |||
* Switch tensorflow version to 1.15.2 | |||
### Minor changes | |||
* Typos, and small bugfixes | |||
## 1.4.9 | |||
First released, Dec 27th 2019 | |||
### Major changes | |||
* Add new configuration for processing until 16Khz | |||
### Minor changes | |||
* Typos, and small bugfixes |
@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) 2019-present, Deezer SA. | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@ -0,0 +1,133 @@ | |||
<img src="https://github.com/deezer/spleeter/raw/master/images/spleeter_logo.png" height="80" /> | |||
[![Github actions](https://github.com/deezer/spleeter/workflows/pytest/badge.svg)](https://github.com/deezer/spleeter/actions) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/spleeter) [![PyPI version](https://badge.fury.io/py/spleeter.svg)](https://badge.fury.io/py/spleeter) [![Conda](https://img.shields.io/conda/vn/deezer-research/spleeter)](https://anaconda.org/deezer-research/spleeter) [![Docker Pulls](https://img.shields.io/docker/pulls/deezer/spleeter)](https://hub.docker.com/r/researchdeezer/spleeter) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deezer/spleeter/blob/master/spleeter.ipynb) [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/spleeter/community) [![status](https://joss.theoj.org/papers/259e5efe669945a343bad6eccb89018b/status.svg)](https://joss.theoj.org/papers/259e5efe669945a343bad6eccb89018b) | |||
> :warning: [Spleeter 2.1.0](https://pypi.org/project/spleeter/) release introduces some breaking changes, including new CLI option naming for input, and the drop | |||
> of dedicated GPU package. Please read [CHANGELOG](CHANGELOG.md) for more details. | |||
## About | |||
**Spleeter** is [Deezer](https://www.deezer.com/) source separation library with pretrained models | |||
written in [Python](https://www.python.org/) and uses [Tensorflow](https://tensorflow.org/). It makes it easy | |||
to train source separation model (assuming you have a dataset of isolated sources), and provides | |||
already trained state of the art model for performing various flavour of separation : | |||
* Vocals (singing voice) / accompaniment separation ([2 stems](https://github.com/deezer/spleeter/wiki/2.-Getting-started#using-2stems-model)) | |||
* Vocals / drums / bass / other separation ([4 stems](https://github.com/deezer/spleeter/wiki/2.-Getting-started#using-4stems-model)) | |||
* Vocals / drums / bass / piano / other separation ([5 stems](https://github.com/deezer/spleeter/wiki/2.-Getting-started#using-5stems-model)) | |||
2 stems and 4 stems models have [high performances](https://github.com/deezer/spleeter/wiki/Separation-Performances) on the [musdb](https://sigsep.github.io/datasets/musdb.html) dataset. **Spleeter** is also very fast as it can perform separation of audio files to 4 stems 100x faster than real-time when run on a GPU. | |||
We designed **Spleeter** so you can use it straight from [command line](https://github.com/deezer/spleeter/wiki/2.-Getting-started#usage) | |||
as well as directly in your own development pipeline as a [Python library](https://github.com/deezer/spleeter/wiki/4.-API-Reference#separator). It can be installed with [pip](https://github.com/deezer/spleeter/wiki/1.-Installation#using-pip) or be used with | |||
[Docker](https://github.com/deezer/spleeter/wiki/2.-Getting-started#using-docker-image). | |||
### Projects and Softwares using **Spleeter** | |||
Since it's been released, there are multiple forks exposing **Spleeter** through either a Guided User Interface (GUI) or a standalone free or paying website. Please note that we do not host, maintain or directly support any of these initiatives. | |||
That being said, many cool projects have been built on top of ours. Notably the porting to the *Ableton Live* ecosystem through the [Spleeter 4 Max](https://github.com/diracdeltas/spleeter4max#spleeter-for-max) project. | |||
**Spleeter** pre-trained models have also been used by professionnal audio softwares. Here's a non-exhaustive list: | |||
* [iZotope](https://www.izotope.com/en/shop/rx-8-standard.html) in its *Music Rebalance* feature within **RX 8** | |||
* [SpectralLayers](https://new.steinberg.net/spectralayers/) in its *Unmix* feature in **SpectralLayers 7** | |||
* [Acon Digital](https://acondigital.com/products/acoustica-audio-editor/) within **Acoustica 7** | |||
* [VirtualDJ](https://www.virtualdj.com/stems/) in their stem isolation feature | |||
* [Algoriddim](https://www.algoriddim.com/apps) in their **NeuralMix** and **djayPRO** app suite | |||
🆕 **Spleeter** is a baseline in the ongoing [Music Demixing Challenge](https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021)! | |||
## Quick start | |||
Want to try it out but don't want to install anything ? We have set up a [Google Colab](https://colab.research.google.com/github/deezer/spleeter/blob/master/spleeter.ipynb). | |||
Ready to dig into it ? In a few lines you can install **Spleeter** and separate the vocal and accompaniment parts from an example audio file. | |||
You need first to install `ffmpeg` and `libsndfile`. It can be done on most platform using [Conda](https://github.com/deezer/spleeter/wiki/1.-Installation#using-conda): | |||
```bash | |||
# install dependencies using conda | |||
conda install -c conda-forge ffmpeg libsndfile | |||
# install spleeter with pip | |||
pip install spleeter | |||
# download an example audio file (if you don't have wget, use another tool for downloading) | |||
wget https://github.com/deezer/spleeter/raw/master/audio_example.mp3 | |||
# separate the example audio into two components | |||
spleeter separate -p spleeter:2stems -o output audio_example.mp3 | |||
``` | |||
> :warning: Note that we no longer recommend using `conda` for installing spleeter. | |||
> ⚠️ There are known issues with Apple M1 chips, mostly due to TensorFlow compatibility. Until these are fixed, you can use [this workaround](https://github.com/deezer/spleeter/issues/607#issuecomment-828352392) | |||
You should get two separated audio files (`vocals.wav` and `accompaniment.wav`) in the `output/audio_example` folder. | |||
For a detailed documentation, please check the [repository wiki](https://github.com/deezer/spleeter/wiki/1.-Installation) | |||
## Development and Testing | |||
This project is managed using [Poetry](https://python-poetry.org/docs/basic-usage/), to run test suite you | |||
can execute the following set of commands: | |||
```bash | |||
# Clone spleeter repository | |||
git clone https://github.com/Deezer/spleeter && cd spleeter | |||
# Install poetry | |||
pip install poetry | |||
# Install spleeter dependencies | |||
poetry install | |||
# Run unit test suite | |||
poetry run pytest tests/ | |||
``` | |||
## Reference | |||
* Deezer Research - Source Separation Engine Story - deezer.io blog post: | |||
* [English version](https://deezer.io/releasing-spleeter-deezer-r-d-source-separation-engine-2b88985e797e) | |||
* [Japanese version](http://dzr.fm/splitterjp) | |||
* [Music Source Separation tool with pre-trained models / ISMIR2019 extended abstract](http://archives.ismir.net/ismir2019/latebreaking/000036.pdf) | |||
If you use **Spleeter** in your work, please cite: | |||
```BibTeX | |||
@article{spleeter2020, | |||
doi = {10.21105/joss.02154}, | |||
url = {https://doi.org/10.21105/joss.02154}, | |||
year = {2020}, | |||
publisher = {The Open Journal}, | |||
volume = {5}, | |||
number = {50}, | |||
pages = {2154}, | |||
author = {Romain Hennequin and Anis Khlif and Felix Voituret and Manuel Moussallam}, | |||
title = {Spleeter: a fast and efficient music source separation tool with pre-trained models}, | |||
journal = {Journal of Open Source Software}, | |||
note = {Deezer Research} | |||
} | |||
``` | |||
## License | |||
The code of **Spleeter** is [MIT-licensed](LICENSE). | |||
## Disclaimer | |||
If you plan to use **Spleeter** on copyrighted material, make sure you get proper authorization from right owners beforehand. | |||
## Troubleshooting | |||
**Spleeter** is a complex piece of software and although we continously try to improve and test it you may encounter unexpected issues running it. If that's the case please check the [FAQ page](https://github.com/deezer/spleeter/wiki/5.-FAQ) first as well as the list of [currently open issues](https://github.com/deezer/spleeter/issues) | |||
### Windows users | |||
It appears that sometimes the shortcut command `spleeter` does not work properly on windows. This is a known issue that we will hopefully fix soon. In the meantime replace `spleeter separate` by `python -m spleeter separate` in command line and it should work. | |||
## Contributing | |||
If you would like to participate in the development of **Spleeter** you are more than welcome to do so. Don't hesitate to throw us a pull request and we'll do our best to examine it quickly. Please check out our [guidelines](.github/CONTRIBUTING.md) first. | |||
## Note | |||
This repository include a demo audio file `audio_example.mp3` which is an excerpt | |||
from Slow Motion Dream by Steven M Bryant (c) copyright 2011 Licensed under a Creative | |||
Commons Attribution (3.0) [license](http://dig.ccmixter.org/files/stevieb357/34740) | |||
Ft: CSoul,Alex Beroza & Robert Siekawitch |
@ -0,0 +1,55 @@ | |||
{% set name = "spleeter" %} | |||
{% set version = "2.2.1" %} | |||
package: | |||
name: {{ name|lower }} | |||
version: {{ version }} | |||
source: | |||
- url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz | |||
sha256: 6cbe9e572474948515430804a22da255f774243aab77e58edb147566dbff7a42 | |||
build: | |||
number: 0 | |||
script: {{ PYTHON }} -m pip install . -vv | |||
skip: True # [osx] | |||
entry_points: | |||
- spleeter = spleeter.__main__:entrypoint | |||
requirements: | |||
host: | |||
- python {{ python }} | |||
- pip | |||
- poetry | |||
run: | |||
- python {{ python }} | |||
- tensorflow ==2.3.0 # [linux] | |||
- tensorflow ==2.3.0 # [win] | |||
- numpy <1.20.0 | |||
- pandas | |||
- ffmpeg-python | |||
- norbert | |||
- librosa | |||
- typer | |||
- httpx | |||
test: | |||
imports: | |||
- spleeter | |||
- spleeter.model | |||
- spleeter.utils | |||
- spleeter.separator | |||
about: | |||
home: https://github.com/deezer/spleeter | |||
license: MIT | |||
license_family: MIT | |||
license_file: LICENSE | |||
summary: The Deezer source separation library with pretrained models based on tensorflow. | |||
doc_url: https://github.com/deezer/spleeter/wiki | |||
dev_url: https://github.com/deezer/spleeter | |||
extra: | |||
recipe-maintainers: | |||
- Faylixe | |||
- romi1502 |
@ -0,0 +1,28 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "2stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "accompaniment"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1000000, | |||
"throttle_secs":300, | |||
"random_seed":0, | |||
"save_checkpoints_steps":150, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "4stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1500000, | |||
"throttle_secs":600, | |||
"random_seed":3, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "5stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "piano", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 2500000, | |||
"throttle_secs":600, | |||
"random_seed":8, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.softmax_unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,32 @@ | |||
{ | |||
"train_csv": "configs/musdb_train.csv", | |||
"validation_csv": "configs/musdb_validation.csv", | |||
"model_dir": "musdb_model", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"n_chunks_per_song":40, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"cache/training", | |||
"validation_cache":"cache/validation", | |||
"train_max_steps": 200000, | |||
"throttle_secs":1800, | |||
"random_seed":3, | |||
"save_checkpoints_steps":1000, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,87 @@ | |||
mix_path,vocals_path,drums_path,bass_path,other_path,duration | |||
train/A Classic Education - NightOwl/mixture.wav,train/A Classic Education - NightOwl/vocals.wav,train/A Classic Education - NightOwl/drums.wav,train/A Classic Education - NightOwl/bass.wav,train/A Classic Education - NightOwl/other.wav,171.247166 | |||
train/ANiMAL - Clinic A/mixture.wav,train/ANiMAL - Clinic A/vocals.wav,train/ANiMAL - Clinic A/drums.wav,train/ANiMAL - Clinic A/bass.wav,train/ANiMAL - Clinic A/other.wav,237.865215 | |||
train/ANiMAL - Easy Tiger/mixture.wav,train/ANiMAL - Easy Tiger/vocals.wav,train/ANiMAL - Easy Tiger/drums.wav,train/ANiMAL - Easy Tiger/bass.wav,train/ANiMAL - Easy Tiger/other.wav,205.473379 | |||
train/Actions - Devil's Words/mixture.wav,train/Actions - Devil's Words/vocals.wav,train/Actions - Devil's Words/drums.wav,train/Actions - Devil's Words/bass.wav,train/Actions - Devil's Words/other.wav,196.626576 | |||
train/Actions - South Of The Water/mixture.wav,train/Actions - South Of The Water/vocals.wav,train/Actions - South Of The Water/drums.wav,train/Actions - South Of The Water/bass.wav,train/Actions - South Of The Water/other.wav,176.610975 | |||
train/Aimee Norwich - Child/mixture.wav,train/Aimee Norwich - Child/vocals.wav,train/Aimee Norwich - Child/drums.wav,train/Aimee Norwich - Child/bass.wav,train/Aimee Norwich - Child/other.wav,189.080091 | |||
train/Alexander Ross - Velvet Curtain/mixture.wav,train/Alexander Ross - Velvet Curtain/vocals.wav,train/Alexander Ross - Velvet Curtain/drums.wav,train/Alexander Ross - Velvet Curtain/bass.wav,train/Alexander Ross - Velvet Curtain/other.wav,514.298776 | |||
train/Angela Thomas Wade - Milk Cow Blues/mixture.wav,train/Angela Thomas Wade - Milk Cow Blues/vocals.wav,train/Angela Thomas Wade - Milk Cow Blues/drums.wav,train/Angela Thomas Wade - Milk Cow Blues/bass.wav,train/Angela Thomas Wade - Milk Cow Blues/other.wav,210.906848 | |||
train/Atlantis Bound - It Was My Fault For Waiting/mixture.wav,train/Atlantis Bound - It Was My Fault For Waiting/vocals.wav,train/Atlantis Bound - It Was My Fault For Waiting/drums.wav,train/Atlantis Bound - It Was My Fault For Waiting/bass.wav,train/Atlantis Bound - It Was My Fault For Waiting/other.wav,268.051156 | |||
train/Auctioneer - Our Future Faces/mixture.wav,train/Auctioneer - Our Future Faces/vocals.wav,train/Auctioneer - Our Future Faces/drums.wav,train/Auctioneer - Our Future Faces/bass.wav,train/Auctioneer - Our Future Faces/other.wav,207.702494 | |||
train/AvaLuna - Waterduct/mixture.wav,train/AvaLuna - Waterduct/vocals.wav,train/AvaLuna - Waterduct/drums.wav,train/AvaLuna - Waterduct/bass.wav,train/AvaLuna - Waterduct/other.wav,259.111474 | |||
train/BigTroubles - Phantom/mixture.wav,train/BigTroubles - Phantom/vocals.wav,train/BigTroubles - Phantom/drums.wav,train/BigTroubles - Phantom/bass.wav,train/BigTroubles - Phantom/other.wav,146.750113 | |||
train/Bill Chudziak - Children Of No-one/mixture.wav,train/Bill Chudziak - Children Of No-one/vocals.wav,train/Bill Chudziak - Children Of No-one/drums.wav,train/Bill Chudziak - Children Of No-one/bass.wav,train/Bill Chudziak - Children Of No-one/other.wav,230.736689 | |||
train/Black Bloc - If You Want Success/mixture.wav,train/Black Bloc - If You Want Success/vocals.wav,train/Black Bloc - If You Want Success/drums.wav,train/Black Bloc - If You Want Success/bass.wav,train/Black Bloc - If You Want Success/other.wav,398.547302 | |||
train/Celestial Shore - Die For Us/mixture.wav,train/Celestial Shore - Die For Us/vocals.wav,train/Celestial Shore - Die For Us/drums.wav,train/Celestial Shore - Die For Us/bass.wav,train/Celestial Shore - Die For Us/other.wav,278.476916 | |||
train/Chris Durban - Celebrate/mixture.wav,train/Chris Durban - Celebrate/vocals.wav,train/Chris Durban - Celebrate/drums.wav,train/Chris Durban - Celebrate/bass.wav,train/Chris Durban - Celebrate/other.wav,301.603991 | |||
train/Clara Berry And Wooldog - Air Traffic/mixture.wav,train/Clara Berry And Wooldog - Air Traffic/vocals.wav,train/Clara Berry And Wooldog - Air Traffic/drums.wav,train/Clara Berry And Wooldog - Air Traffic/bass.wav,train/Clara Berry And Wooldog - Air Traffic/other.wav,173.267302 | |||
train/Clara Berry And Wooldog - Stella/mixture.wav,train/Clara Berry And Wooldog - Stella/vocals.wav,train/Clara Berry And Wooldog - Stella/drums.wav,train/Clara Berry And Wooldog - Stella/bass.wav,train/Clara Berry And Wooldog - Stella/other.wav,195.558458 | |||
train/Cnoc An Tursa - Bannockburn/mixture.wav,train/Cnoc An Tursa - Bannockburn/vocals.wav,train/Cnoc An Tursa - Bannockburn/drums.wav,train/Cnoc An Tursa - Bannockburn/bass.wav,train/Cnoc An Tursa - Bannockburn/other.wav,294.521905 | |||
train/Creepoid - OldTree/mixture.wav,train/Creepoid - OldTree/vocals.wav,train/Creepoid - OldTree/drums.wav,train/Creepoid - OldTree/bass.wav,train/Creepoid - OldTree/other.wav,302.02195 | |||
train/Dark Ride - Burning Bridges/mixture.wav,train/Dark Ride - Burning Bridges/vocals.wav,train/Dark Ride - Burning Bridges/drums.wav,train/Dark Ride - Burning Bridges/bass.wav,train/Dark Ride - Burning Bridges/other.wav,232.663946 | |||
train/Dreamers Of The Ghetto - Heavy Love/mixture.wav,train/Dreamers Of The Ghetto - Heavy Love/vocals.wav,train/Dreamers Of The Ghetto - Heavy Love/drums.wav,train/Dreamers Of The Ghetto - Heavy Love/bass.wav,train/Dreamers Of The Ghetto - Heavy Love/other.wav,294.800544 | |||
train/Drumtracks - Ghost Bitch/mixture.wav,train/Drumtracks - Ghost Bitch/vocals.wav,train/Drumtracks - Ghost Bitch/drums.wav,train/Drumtracks - Ghost Bitch/bass.wav,train/Drumtracks - Ghost Bitch/other.wav,356.913923 | |||
train/Faces On Film - Waiting For Ga/mixture.wav,train/Faces On Film - Waiting For Ga/vocals.wav,train/Faces On Film - Waiting For Ga/drums.wav,train/Faces On Film - Waiting For Ga/bass.wav,train/Faces On Film - Waiting For Ga/other.wav,257.439637 | |||
train/Fergessen - Back From The Start/mixture.wav,train/Fergessen - Back From The Start/vocals.wav,train/Fergessen - Back From The Start/drums.wav,train/Fergessen - Back From The Start/bass.wav,train/Fergessen - Back From The Start/other.wav,168.553651 | |||
train/Fergessen - The Wind/mixture.wav,train/Fergessen - The Wind/vocals.wav,train/Fergessen - The Wind/drums.wav,train/Fergessen - The Wind/bass.wav,train/Fergessen - The Wind/other.wav,191.820045 | |||
train/Flags - 54/mixture.wav,train/Flags - 54/vocals.wav,train/Flags - 54/drums.wav,train/Flags - 54/bass.wav,train/Flags - 54/other.wav,315.164444 | |||
train/Giselle - Moss/mixture.wav,train/Giselle - Moss/vocals.wav,train/Giselle - Moss/drums.wav,train/Giselle - Moss/bass.wav,train/Giselle - Moss/other.wav,201.711746 | |||
train/Grants - PunchDrunk/mixture.wav,train/Grants - PunchDrunk/vocals.wav,train/Grants - PunchDrunk/drums.wav,train/Grants - PunchDrunk/bass.wav,train/Grants - PunchDrunk/other.wav,204.405261 | |||
train/Helado Negro - Mitad Del Mundo/mixture.wav,train/Helado Negro - Mitad Del Mundo/vocals.wav,train/Helado Negro - Mitad Del Mundo/drums.wav,train/Helado Negro - Mitad Del Mundo/bass.wav,train/Helado Negro - Mitad Del Mundo/other.wav,181.672925 | |||
train/Hezekiah Jones - Borrowed Heart/mixture.wav,train/Hezekiah Jones - Borrowed Heart/vocals.wav,train/Hezekiah Jones - Borrowed Heart/drums.wav,train/Hezekiah Jones - Borrowed Heart/bass.wav,train/Hezekiah Jones - Borrowed Heart/other.wav,241.394649 | |||
train/Hollow Ground - Left Blind/mixture.wav,train/Hollow Ground - Left Blind/vocals.wav,train/Hollow Ground - Left Blind/drums.wav,train/Hollow Ground - Left Blind/bass.wav,train/Hollow Ground - Left Blind/other.wav,159.103129 | |||
train/Hop Along - Sister Cities/mixture.wav,train/Hop Along - Sister Cities/vocals.wav,train/Hop Along - Sister Cities/drums.wav,train/Hop Along - Sister Cities/bass.wav,train/Hop Along - Sister Cities/other.wav,283.237007 | |||
train/Invisible Familiars - Disturbing Wildlife/mixture.wav,train/Invisible Familiars - Disturbing Wildlife/vocals.wav,train/Invisible Familiars - Disturbing Wildlife/drums.wav,train/Invisible Familiars - Disturbing Wildlife/bass.wav,train/Invisible Familiars - Disturbing Wildlife/other.wav,218.499773 | |||
train/James May - All Souls Moon/mixture.wav,train/James May - All Souls Moon/vocals.wav,train/James May - All Souls Moon/drums.wav,train/James May - All Souls Moon/bass.wav,train/James May - All Souls Moon/other.wav,220.844989 | |||
train/James May - Dont Let Go/mixture.wav,train/James May - Dont Let Go/vocals.wav,train/James May - Dont Let Go/drums.wav,train/James May - Dont Let Go/bass.wav,train/James May - Dont Let Go/other.wav,241.951927 | |||
train/James May - If You Say/mixture.wav,train/James May - If You Say/vocals.wav,train/James May - If You Say/drums.wav,train/James May - If You Say/bass.wav,train/James May - If You Say/other.wav,258.321995 | |||
train/Jay Menon - Through My Eyes/mixture.wav,train/Jay Menon - Through My Eyes/vocals.wav,train/Jay Menon - Through My Eyes/drums.wav,train/Jay Menon - Through My Eyes/bass.wav,train/Jay Menon - Through My Eyes/other.wav,253.167166 | |||
train/Johnny Lokke - Whisper To A Scream/mixture.wav,train/Johnny Lokke - Whisper To A Scream/vocals.wav,train/Johnny Lokke - Whisper To A Scream/drums.wav,train/Johnny Lokke - Whisper To A Scream/bass.wav,train/Johnny Lokke - Whisper To A Scream/other.wav,255.326621 | |||
"train/Jokers, Jacks & Kings - Sea Of Leaves/mixture.wav","train/Jokers, Jacks & Kings - Sea Of Leaves/vocals.wav","train/Jokers, Jacks & Kings - Sea Of Leaves/drums.wav","train/Jokers, Jacks & Kings - Sea Of Leaves/bass.wav","train/Jokers, Jacks & Kings - Sea Of Leaves/other.wav",191.471746 | |||
train/Leaf - Come Around/mixture.wav,train/Leaf - Come Around/vocals.wav,train/Leaf - Come Around/drums.wav,train/Leaf - Come Around/bass.wav,train/Leaf - Come Around/other.wav,264.382404 | |||
train/Leaf - Wicked/mixture.wav,train/Leaf - Wicked/vocals.wav,train/Leaf - Wicked/drums.wav,train/Leaf - Wicked/bass.wav,train/Leaf - Wicked/other.wav,190.635828 | |||
train/Lushlife - Toynbee Suite/mixture.wav,train/Lushlife - Toynbee Suite/vocals.wav,train/Lushlife - Toynbee Suite/drums.wav,train/Lushlife - Toynbee Suite/bass.wav,train/Lushlife - Toynbee Suite/other.wav,628.378413 | |||
train/Matthew Entwistle - Dont You Ever/mixture.wav,train/Matthew Entwistle - Dont You Ever/vocals.wav,train/Matthew Entwistle - Dont You Ever/drums.wav,train/Matthew Entwistle - Dont You Ever/bass.wav,train/Matthew Entwistle - Dont You Ever/other.wav,113.824218 | |||
train/Meaxic - You Listen/mixture.wav,train/Meaxic - You Listen/vocals.wav,train/Meaxic - You Listen/drums.wav,train/Meaxic - You Listen/bass.wav,train/Meaxic - You Listen/other.wav,412.525714 | |||
train/Music Delta - 80s Rock/mixture.wav,train/Music Delta - 80s Rock/vocals.wav,train/Music Delta - 80s Rock/drums.wav,train/Music Delta - 80s Rock/bass.wav,train/Music Delta - 80s Rock/other.wav,36.733968 | |||
train/Music Delta - Beatles/mixture.wav,train/Music Delta - Beatles/vocals.wav,train/Music Delta - Beatles/drums.wav,train/Music Delta - Beatles/bass.wav,train/Music Delta - Beatles/other.wav,36.176689 | |||
train/Music Delta - Britpop/mixture.wav,train/Music Delta - Britpop/vocals.wav,train/Music Delta - Britpop/drums.wav,train/Music Delta - Britpop/bass.wav,train/Music Delta - Britpop/other.wav,36.594649 | |||
train/Music Delta - Country1/mixture.wav,train/Music Delta - Country1/vocals.wav,train/Music Delta - Country1/drums.wav,train/Music Delta - Country1/bass.wav,train/Music Delta - Country1/other.wav,34.551293 | |||
train/Music Delta - Country2/mixture.wav,train/Music Delta - Country2/vocals.wav,train/Music Delta - Country2/drums.wav,train/Music Delta - Country2/bass.wav,train/Music Delta - Country2/other.wav,17.275646 | |||
train/Music Delta - Disco/mixture.wav,train/Music Delta - Disco/vocals.wav,train/Music Delta - Disco/drums.wav,train/Music Delta - Disco/bass.wav,train/Music Delta - Disco/other.wav,124.598277 | |||
train/Music Delta - Gospel/mixture.wav,train/Music Delta - Gospel/vocals.wav,train/Music Delta - Gospel/drums.wav,train/Music Delta - Gospel/bass.wav,train/Music Delta - Gospel/other.wav,75.557732 | |||
train/Music Delta - Grunge/mixture.wav,train/Music Delta - Grunge/vocals.wav,train/Music Delta - Grunge/drums.wav,train/Music Delta - Grunge/bass.wav,train/Music Delta - Grunge/other.wav,41.656599 | |||
train/Music Delta - Hendrix/mixture.wav,train/Music Delta - Hendrix/vocals.wav,train/Music Delta - Hendrix/drums.wav,train/Music Delta - Hendrix/bass.wav,train/Music Delta - Hendrix/other.wav,19.644082 | |||
train/Music Delta - Punk/mixture.wav,train/Music Delta - Punk/vocals.wav,train/Music Delta - Punk/drums.wav,train/Music Delta - Punk/bass.wav,train/Music Delta - Punk/other.wav,28.583764 | |||
train/Music Delta - Reggae/mixture.wav,train/Music Delta - Reggae/vocals.wav,train/Music Delta - Reggae/drums.wav,train/Music Delta - Reggae/bass.wav,train/Music Delta - Reggae/other.wav,17.275646 | |||
train/Music Delta - Rock/mixture.wav,train/Music Delta - Rock/vocals.wav,train/Music Delta - Rock/drums.wav,train/Music Delta - Rock/bass.wav,train/Music Delta - Rock/other.wav,12.910295 | |||
train/Music Delta - Rockabilly/mixture.wav,train/Music Delta - Rockabilly/vocals.wav,train/Music Delta - Rockabilly/drums.wav,train/Music Delta - Rockabilly/bass.wav,train/Music Delta - Rockabilly/other.wav,25.75093 | |||
train/Night Panther - Fire/mixture.wav,train/Night Panther - Fire/vocals.wav,train/Night Panther - Fire/drums.wav,train/Night Panther - Fire/bass.wav,train/Night Panther - Fire/other.wav,212.810884 | |||
train/North To Alaska - All The Same/mixture.wav,train/North To Alaska - All The Same/vocals.wav,train/North To Alaska - All The Same/drums.wav,train/North To Alaska - All The Same/bass.wav,train/North To Alaska - All The Same/other.wav,247.965896 | |||
train/Patrick Talbot - Set Me Free/mixture.wav,train/Patrick Talbot - Set Me Free/vocals.wav,train/Patrick Talbot - Set Me Free/drums.wav,train/Patrick Talbot - Set Me Free/bass.wav,train/Patrick Talbot - Set Me Free/other.wav,289.785034 | |||
train/Phre The Eon - Everybody's Falling Apart/mixture.wav,train/Phre The Eon - Everybody's Falling Apart/vocals.wav,train/Phre The Eon - Everybody's Falling Apart/drums.wav,train/Phre The Eon - Everybody's Falling Apart/bass.wav,train/Phre The Eon - Everybody's Falling Apart/other.wav,224.235102 | |||
train/Port St Willow - Stay Even/mixture.wav,train/Port St Willow - Stay Even/vocals.wav,train/Port St Willow - Stay Even/drums.wav,train/Port St Willow - Stay Even/bass.wav,train/Port St Willow - Stay Even/other.wav,316.836281 | |||
train/Remember December - C U Next Time/mixture.wav,train/Remember December - C U Next Time/vocals.wav,train/Remember December - C U Next Time/drums.wav,train/Remember December - C U Next Time/bass.wav,train/Remember December - C U Next Time/other.wav,242.532426 | |||
train/Secret Mountains - High Horse/mixture.wav,train/Secret Mountains - High Horse/vocals.wav,train/Secret Mountains - High Horse/drums.wav,train/Secret Mountains - High Horse/bass.wav,train/Secret Mountains - High Horse/other.wav,355.311746 | |||
train/Skelpolu - Together Alone/mixture.wav,train/Skelpolu - Together Alone/vocals.wav,train/Skelpolu - Together Alone/drums.wav,train/Skelpolu - Together Alone/bass.wav,train/Skelpolu - Together Alone/other.wav,325.822404 | |||
train/Snowmine - Curfews/mixture.wav,train/Snowmine - Curfews/vocals.wav,train/Snowmine - Curfews/drums.wav,train/Snowmine - Curfews/bass.wav,train/Snowmine - Curfews/other.wav,275.017143 | |||
train/Spike Mullings - Mike's Sulking/mixture.wav,train/Spike Mullings - Mike's Sulking/vocals.wav,train/Spike Mullings - Mike's Sulking/drums.wav,train/Spike Mullings - Mike's Sulking/bass.wav,train/Spike Mullings - Mike's Sulking/other.wav,256.696599 | |||
train/St Vitus - Word Gets Around/mixture.wav,train/St Vitus - Word Gets Around/vocals.wav,train/St Vitus - Word Gets Around/drums.wav,train/St Vitus - Word Gets Around/bass.wav,train/St Vitus - Word Gets Around/other.wav,247.013878 | |||
train/Steven Clark - Bounty/mixture.wav,train/Steven Clark - Bounty/vocals.wav,train/Steven Clark - Bounty/drums.wav,train/Steven Clark - Bounty/bass.wav,train/Steven Clark - Bounty/other.wav,289.274195 | |||
train/Strand Of Oaks - Spacestation/mixture.wav,train/Strand Of Oaks - Spacestation/vocals.wav,train/Strand Of Oaks - Spacestation/drums.wav,train/Strand Of Oaks - Spacestation/bass.wav,train/Strand Of Oaks - Spacestation/other.wav,243.670204 | |||
train/Sweet Lights - You Let Me Down/mixture.wav,train/Sweet Lights - You Let Me Down/vocals.wav,train/Sweet Lights - You Let Me Down/drums.wav,train/Sweet Lights - You Let Me Down/bass.wav,train/Sweet Lights - You Let Me Down/other.wav,391.790295 | |||
train/Swinging Steaks - Lost My Way/mixture.wav,train/Swinging Steaks - Lost My Way/vocals.wav,train/Swinging Steaks - Lost My Way/drums.wav,train/Swinging Steaks - Lost My Way/bass.wav,train/Swinging Steaks - Lost My Way/other.wav,309.963175 | |||
train/The Districts - Vermont/mixture.wav,train/The Districts - Vermont/vocals.wav,train/The Districts - Vermont/drums.wav,train/The Districts - Vermont/bass.wav,train/The Districts - Vermont/other.wav,227.973515 | |||
train/The Long Wait - Back Home To Blue/mixture.wav,train/The Long Wait - Back Home To Blue/vocals.wav,train/The Long Wait - Back Home To Blue/drums.wav,train/The Long Wait - Back Home To Blue/bass.wav,train/The Long Wait - Back Home To Blue/other.wav,260.458231 | |||
train/The Scarlet Brand - Les Fleurs Du Mal/mixture.wav,train/The Scarlet Brand - Les Fleurs Du Mal/vocals.wav,train/The Scarlet Brand - Les Fleurs Du Mal/drums.wav,train/The Scarlet Brand - Les Fleurs Du Mal/bass.wav,train/The Scarlet Brand - Les Fleurs Du Mal/other.wav,303.438367 | |||
train/The So So Glos - Emergency/mixture.wav,train/The So So Glos - Emergency/vocals.wav,train/The So So Glos - Emergency/drums.wav,train/The So So Glos - Emergency/bass.wav,train/The So So Glos - Emergency/other.wav,166.812154 | |||
train/The Wrong'Uns - Rothko/mixture.wav,train/The Wrong'Uns - Rothko/vocals.wav,train/The Wrong'Uns - Rothko/drums.wav,train/The Wrong'Uns - Rothko/bass.wav,train/The Wrong'Uns - Rothko/other.wav,202.152925 | |||
train/Tim Taler - Stalker/mixture.wav,train/Tim Taler - Stalker/vocals.wav,train/Tim Taler - Stalker/drums.wav,train/Tim Taler - Stalker/bass.wav,train/Tim Taler - Stalker/other.wav,237.633016 | |||
train/Titanium - Haunted Age/mixture.wav,train/Titanium - Haunted Age/vocals.wav,train/Titanium - Haunted Age/drums.wav,train/Titanium - Haunted Age/bass.wav,train/Titanium - Haunted Age/other.wav,248.105215 | |||
train/Traffic Experiment - Once More (With Feeling)/mixture.wav,train/Traffic Experiment - Once More (With Feeling)/vocals.wav,train/Traffic Experiment - Once More (With Feeling)/drums.wav,train/Traffic Experiment - Once More (With Feeling)/bass.wav,train/Traffic Experiment - Once More (With Feeling)/other.wav,435.07229 | |||
train/Triviul - Dorothy/mixture.wav,train/Triviul - Dorothy/vocals.wav,train/Triviul - Dorothy/drums.wav,train/Triviul - Dorothy/bass.wav,train/Triviul - Dorothy/other.wav,187.361814 | |||
train/Voelund - Comfort Lives In Belief/mixture.wav,train/Voelund - Comfort Lives In Belief/vocals.wav,train/Voelund - Comfort Lives In Belief/drums.wav,train/Voelund - Comfort Lives In Belief/bass.wav,train/Voelund - Comfort Lives In Belief/other.wav,209.90839 | |||
train/Wall Of Death - Femme/mixture.wav,train/Wall Of Death - Femme/vocals.wav,train/Wall Of Death - Femme/drums.wav,train/Wall Of Death - Femme/bass.wav,train/Wall Of Death - Femme/other.wav,238.933333 | |||
train/Young Griffo - Blood To Bone/mixture.wav,train/Young Griffo - Blood To Bone/vocals.wav,train/Young Griffo - Blood To Bone/drums.wav,train/Young Griffo - Blood To Bone/bass.wav,train/Young Griffo - Blood To Bone/other.wav,254.397823 | |||
train/Young Griffo - Facade/mixture.wav,train/Young Griffo - Facade/vocals.wav,train/Young Griffo - Facade/drums.wav,train/Young Griffo - Facade/bass.wav,train/Young Griffo - Facade/other.wav,167.857052 |
@ -0,0 +1,15 @@ | |||
mix_path,vocals_path,drums_path,bass_path,other_path,duration | |||
train/ANiMAL - Rockshow/mixture.wav,train/ANiMAL - Rockshow/vocals.wav,train/ANiMAL - Rockshow/drums.wav,train/ANiMAL - Rockshow/bass.wav,train/ANiMAL - Rockshow/other.wav,165.511837 | |||
train/Actions - One Minute Smile/mixture.wav,train/Actions - One Minute Smile/vocals.wav,train/Actions - One Minute Smile/drums.wav,train/Actions - One Minute Smile/bass.wav,train/Actions - One Minute Smile/other.wav,163.375601 | |||
train/Alexander Ross - Goodbye Bolero/mixture.wav,train/Alexander Ross - Goodbye Bolero/vocals.wav,train/Alexander Ross - Goodbye Bolero/drums.wav,train/Alexander Ross - Goodbye Bolero/bass.wav,train/Alexander Ross - Goodbye Bolero/other.wav,418.632562 | |||
train/Clara Berry And Wooldog - Waltz For My Victims/mixture.wav,train/Clara Berry And Wooldog - Waltz For My Victims/vocals.wav,train/Clara Berry And Wooldog - Waltz For My Victims/drums.wav,train/Clara Berry And Wooldog - Waltz For My Victims/bass.wav,train/Clara Berry And Wooldog - Waltz For My Victims/other.wav,175.240998 | |||
train/Fergessen - Nos Palpitants/mixture.wav,train/Fergessen - Nos Palpitants/vocals.wav,train/Fergessen - Nos Palpitants/drums.wav,train/Fergessen - Nos Palpitants/bass.wav,train/Fergessen - Nos Palpitants/other.wav,198.228753 | |||
train/James May - On The Line/mixture.wav,train/James May - On The Line/vocals.wav,train/James May - On The Line/drums.wav,train/James May - On The Line/bass.wav,train/James May - On The Line/other.wav,256.09288 | |||
train/Johnny Lokke - Promises & Lies/mixture.wav,train/Johnny Lokke - Promises & Lies/vocals.wav,train/Johnny Lokke - Promises & Lies/drums.wav,train/Johnny Lokke - Promises & Lies/bass.wav,train/Johnny Lokke - Promises & Lies/other.wav,285.814422 | |||
train/Leaf - Summerghost/mixture.wav,train/Leaf - Summerghost/vocals.wav,train/Leaf - Summerghost/drums.wav,train/Leaf - Summerghost/bass.wav,train/Leaf - Summerghost/other.wav,231.804807 | |||
train/Meaxic - Take A Step/mixture.wav,train/Meaxic - Take A Step/vocals.wav,train/Meaxic - Take A Step/drums.wav,train/Meaxic - Take A Step/bass.wav,train/Meaxic - Take A Step/other.wav,282.517188 | |||
train/Patrick Talbot - A Reason To Leave/mixture.wav,train/Patrick Talbot - A Reason To Leave/vocals.wav,train/Patrick Talbot - A Reason To Leave/drums.wav,train/Patrick Talbot - A Reason To Leave/bass.wav,train/Patrick Talbot - A Reason To Leave/other.wav,259.552653 | |||
train/Skelpolu - Human Mistakes/mixture.wav,train/Skelpolu - Human Mistakes/vocals.wav,train/Skelpolu - Human Mistakes/drums.wav,train/Skelpolu - Human Mistakes/bass.wav,train/Skelpolu - Human Mistakes/other.wav,324.498866 | |||
train/Traffic Experiment - Sirens/mixture.wav,train/Traffic Experiment - Sirens/vocals.wav,train/Traffic Experiment - Sirens/drums.wav,train/Traffic Experiment - Sirens/bass.wav,train/Traffic Experiment - Sirens/other.wav,421.279637 | |||
train/Triviul - Angelsaint/mixture.wav,train/Triviul - Angelsaint/vocals.wav,train/Triviul - Angelsaint/drums.wav,train/Triviul - Angelsaint/bass.wav,train/Triviul - Angelsaint/other.wav,236.704218 | |||
train/Young Griffo - Pennies/mixture.wav,train/Young Griffo - Pennies/vocals.wav,train/Young Griffo - Pennies/drums.wav,train/Young Griffo - Pennies/bass.wav,train/Young Griffo - Pennies/other.wav,277.803537 |
@ -0,0 +1,13 @@ | |||
#!/bin/bash | |||
###################################################################### | |||
# Custom entrypoint that activate conda before running spleeter. | |||
# | |||
# @author Félix Voituret <fvoituret@deezer.com> | |||
# @version 1.0.0 | |||
###################################################################### | |||
# shellcheck disable=1091 | |||
. "/opt/conda/etc/profile.d/conda.sh" | |||
conda activate base | |||
spleeter "$@" |
@ -0,0 +1,16 @@ | |||
ARG BASE=python:3.7 | |||
FROM ${BASE} | |||
RUN apt-get update --fix-missing \ | |||
&& apt-get install -y wget bzip2 ca-certificates curl git \ | |||
&& apt-get clean \ | |||
&& rm -rf /var/lib/apt/lists/* \ | |||
&& wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O ~/miniconda.sh \ | |||
&& /bin/bash ~/miniconda.sh -b -p /opt/conda \ | |||
&& rm ~/miniconda.sh \ | |||
&& /opt/conda/bin/conda clean -tipsy \ | |||
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \ | |||
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \ | |||
&& echo "conda activate base" >> ~/.bashrc \ | |||
&& ln -s /opt/conda/bin/conda /usr/bin/conda | |||
SHELL ["/bin/bash", "-c"] |
@ -0,0 +1,43 @@ | |||
ARG BASE=python:3.7 | |||
FROM ${BASE} | |||
ENV CUDA_VERSION 10.0.130 | |||
ENV CUDA_PKG_VERSION 10-0=$CUDA_VERSION-1 | |||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} | |||
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | |||
ENV NVIDIA_VISIBLE_DEVICES=all | |||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | |||
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411" | |||
ENV NCCL_VERSION 2.4.2 | |||
ENV CUDNN_VERSION 7.6.0.64 | |||
LABEL com.nvidia.cuda.version="${CUDA_VERSION}" | |||
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" | |||
LABEL com.nvidia.volumes.needed="nvidia_driver" | |||
RUN apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
gnupg2 \ | |||
curl \ | |||
ca-certificates \ | |||
&& curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list \ | |||
&& apt-get purge --autoremove -y curl \ | |||
&& apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-cudart-$CUDA_PKG_VERSION \ | |||
cuda-compat-10-0 \ | |||
&& ln -s cuda-10.0 /usr/local/cuda \ | |||
&& echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \ | |||
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-toolkit-10-0 \ | |||
cuda-libraries-$CUDA_PKG_VERSION \ | |||
cuda-nvtx-$CUDA_PKG_VERSION \ | |||
libnccl2=$NCCL_VERSION-1+cuda10.0 \ | |||
libcudnn7=$CUDNN_VERSION-1+cuda10.0 \ | |||
&& apt-mark hold libnccl2 \ | |||
&& apt-mark hold libcudnn7 \ | |||
&& rm -rf /var/lib/apt/lists/* |
@ -0,0 +1,45 @@ | |||
ARG BASE=python:3.8 | |||
FROM ${BASE} | |||
ENV CUDA_VERSION 10.1.243 | |||
ENV CUDA_PKG_VERSION 10-1=$CUDA_VERSION-1 | |||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} | |||
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | |||
ENV NVIDIA_VISIBLE_DEVICES all | |||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | |||
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411 brand=tesla,driver>=418,driver<419" | |||
ENV CUDNN_VERSION 7.6.5.32 | |||
ENV NCCL_VERSION 2.7.8 | |||
LABEL com.nvidia.cuda.version="${CUDA_VERSION}" | |||
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" | |||
LABEL com.nvidia.volumes.needed="nvidia_driver" | |||
RUN apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
gnupg2 \ | |||
curl \ | |||
ca-certificates \ | |||
&& curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list \ | |||
&& apt-get purge --autoremove -y curl \ | |||
&& apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-cudart-$CUDA_PKG_VERSION \ | |||
cuda-compat-10-1 \ | |||
&& ln -s cuda-10.1 /usr/local/cuda \ | |||
&& echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \ | |||
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-libraries-$CUDA_PKG_VERSION \ | |||
cuda-npp-$CUDA_PKG_VERSION \ | |||
cuda-nvtx-$CUDA_PKG_VERSION \ | |||
libcublas10=10.2.1.243-1 \ | |||
libcudnn7=$CUDNN_VERSION-1+cuda10.1 \ | |||
libnccl2=$NCCL_VERSION-1+cuda10.1 \ | |||
&& apt-mark hold libnccl2 \ | |||
&& apt-mark hold libcudnn7 \ | |||
&& apt-mark hold libcublas10 \ | |||
&& rm -rf /var/lib/apt/lists/* |
@ -0,0 +1,58 @@ | |||
ARG BASE=python:3.7 | |||
FROM ${BASE} | |||
# FROM 9.2-base-ubuntu18.04 | |||
# https://gitlab.com/nvidia/container-images/cuda/blob/ubuntu18.04/9.2/base/Dockerfile | |||
RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates \ | |||
&& curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64/7fa2af80.pub | apt-key add - \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64 /" > /etc/apt/sources.list.d/cuda.list \ | |||
&& echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list \ | |||
&& apt-get purge --autoremove -y curl \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
ENV CUDA_VERSION 9.2.148 | |||
ENV CUDA_PKG_VERSION 9-2=$CUDA_VERSION-1 | |||
RUN apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-cudart-$CUDA_PKG_VERSION \ | |||
&& ln -s cuda-9.2 /usr/local/cuda \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
LABEL com.nvidia.volumes.needed="nvidia_driver" | |||
LABEL com.nvidia.cuda.version="${CUDA_VERSION}" | |||
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \ | |||
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf | |||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} | |||
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | |||
ENV NVIDIA_VISIBLE_DEVICES all | |||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | |||
ENV NVIDIA_REQUIRE_CUDA "cuda>=9.2" | |||
# FROM 9.2-runtime-ubuntu18.04 | |||
# https://gitlab.com/nvidia/container-images/cuda/blob/ubuntu18.04/9.2/runtime/Dockerfile | |||
ENV NCCL_VERSION 2.3.7 | |||
RUN apt-get update \ | |||
&& apt-get install -y --no-install-recommends \ | |||
cuda-libraries-$CUDA_PKG_VERSION \ | |||
cuda-nvtx-$CUDA_PKG_VERSION \ | |||
libnccl2=$NCCL_VERSION-1+cuda9.2 \ | |||
&& apt-mark hold libnccl2 \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
# FROM 9.2-runtime-cudnn7-ubuntu18.04 | |||
# https://gitlab.com/nvidia/container-images/cuda/blob/ubuntu18.04/9.2/runtime/cudnn7/Dockerfile | |||
ENV CUDNN_VERSION 7.5.0.56 | |||
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" | |||
RUN apt-get update \ | |||
&& apt-get install -y --no-install-recommends libcudnn7=$CUDNN_VERSION-1+cuda9.2 \ | |||
&& apt-mark hold libcudnn7 \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
RUN mkdir -p /model | |||
ENV MODEL_PATH /model | |||
COPY audio_example.mp3 . | |||
# Spleeter installation. | |||
RUN apt-get update && apt-get install -y ffmpeg libsndfile1 | |||
RUN pip install musdb museval | |||
RUN pip install spleeter-gpu==1.4.9 | |||
ENTRYPOINT ["spleeter"] |
@ -0,0 +1,13 @@ | |||
ARG BASE=conda | |||
FROM ${BASE} | |||
ARG SPLEETER_VERSION=1.5.3 | |||
ENV MODEL_PATH /model | |||
RUN mkdir -p /model | |||
RUN conda config --add channels conda-forge | |||
RUN conda install -y -c conda-forge musdb | |||
RUN conda install -y -c deezer-research spleeter | |||
COPY docker/conda-entrypoint.sh spleeter-entrypoint.sh | |||
ENTRYPOINT ["/bin/bash", "spleeter-entrypoint.sh"] |
@ -0,0 +1,9 @@ | |||
ARG BASE=researchdeezer/spleeter | |||
FROM ${BASE} | |||
ARG MODEL=2stems | |||
RUN mkdir -p /model/$MODEL \ | |||
&& wget -O /tmp/$MODEL.tar.gz https://github.com/deezer/spleeter/releases/download/v1.4.0/$MODEL.tar.gz \ | |||
&& tar -xvzf /tmp/$MODEL.tar.gz -C /model/$MODEL/ \ | |||
&& touch /model/$MODEL/.probe |
@ -0,0 +1,13 @@ | |||
ARG BASE=python:3.6 | |||
FROM ${BASE} | |||
ARG SPLEETER_VERSION=1.5.3 | |||
ENV MODEL_PATH /model | |||
RUN mkdir -p /model | |||
RUN apt-get update && apt-get install -y ffmpeg libsndfile1 | |||
RUN pip install musdb museval | |||
RUN pip install spleeter==${SPLEETER_VERSION} | |||
ENTRYPOINT ["spleeter"] |
@ -0,0 +1,145 @@ | |||
% bibtex | |||
@inproceedings{SISEC18, | |||
author = {{St{\"o}ter}, Fabian-Robert and {Liutkus}, Antoine and {Ito}, Nobutaka}, | |||
title = {The 2018 Signal Separation Evaluation Campaign}, | |||
year = {2018}, | |||
booktitle = {Latent Variable Analysis and Signal Separation. {LVA}/{ICA}}, | |||
vol={10891}, | |||
doi = {10.1007/978-3-319-93764-9_28}, | |||
publisher = { Springer, Cham} | |||
} | |||
@misc{spleeter2019, | |||
title={Spleeter: A Fast And State-of-the Art Music Source Separation Tool With Pre-trained Models}, | |||
author={Romain Hennequin and Anis Khlif and Felix Voituret and Manuel Moussallam}, | |||
howpublished={Late-Breaking/Demo ISMIR 2019}, | |||
month={November}, | |||
note={Deezer Research}, | |||
year={2019} | |||
} | |||
@inproceedings{unet2017, | |||
title={Singing voice separation with deep U-Net convolutional networks}, | |||
author={Jansson, Andreas and Humphrey, Eric J. and Montecchio, Nicola and Bittner, Rachel and Kumar, Aparna and Weyde, Tillman}, | |||
booktitle={Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)}, | |||
pages={323--332}, | |||
year={2017} | |||
} | |||
@inproceedings{deezerICASSP2019, | |||
author={Laure {Pr\'etet} and Romain {Hennequin} and Jimena {Royo-Letelier} and Andrea {Vaglio}}, | |||
booktitle={ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |||
title={Singing Voice Separation: A Study on Training Data}, | |||
year={2019}, | |||
volume={}, | |||
number={}, | |||
pages={506-510}, | |||
keywords={feature extraction;source separation;speech processing;supervised training;separation quality;data augmentation;singing voice separation systems;singing voice separation algorithms;separation diversity;source separation;supervised learning;training data;data augmentation}, | |||
doi={10.1109/ICASSP.2019.8683555}, | |||
ISSN={}, | |||
month={May},} | |||
@misc{Norbert, | |||
author = {Antoine Liutkus and | |||
Fabian-Robert St{\"o}ter}, | |||
title = {sigsep/norbert: First official Norbert release}, | |||
month = jul, | |||
year = 2019, | |||
doi = {10.5281/zenodo.3269749}, | |||
url = {https://doi.org/10.5281/zenodo.3269749} | |||
} | |||
@ARTICLE{separation_metrics, | |||
author={Emmanuel {Vincent} and Remi {Gribonval} and Cedric {Fevotte}}, | |||
journal={IEEE Transactions on Audio, Speech, and Language Processing}, | |||
title={Performance measurement in blind audio source separation}, | |||
year={2006}, | |||
volume={14}, | |||
number={4}, | |||
pages={1462-1469}, | |||
keywords={audio signal processing;blind source separation;distortion;time-varying filters;blind audio source separation;distortions;time-invariant gains;time-varying filters;source estimation;interference;additive noise;algorithmic artifacts;Source separation;Data mining;Filters;Additive noise;Microphones;Distortion measurement;Energy measurement;Independent component analysis;Interference;Image analysis;Audio source separation;evaluation;measure;performance;quality}, | |||
doi={10.1109/TSA.2005.858005}, | |||
ISSN={}, | |||
month={July},} | |||
@misc{musdb18, | |||
author = {Rafii, Zafar and | |||
Liutkus, Antoine and | |||
Fabian-Robert St{\"o}ter and | |||
Mimilakis, Stylianos Ioannis and | |||
Bittner, Rachel}, | |||
title = {The {MUSDB18} corpus for music separation}, | |||
month = dec, | |||
year = 2017, | |||
doi = {10.5281/zenodo.1117372}, | |||
url = {https://doi.org/10.5281/zenodo.1117372} | |||
} | |||
@misc{tensorflow2015-whitepaper, | |||
title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems}, | |||
url={https://www.tensorflow.org/}, | |||
note={Software available from tensorflow.org}, | |||
author={ | |||
Abadi, Mart{\'{\i}}n et al.}, | |||
year={2015}, | |||
} | |||
@article{2019arXiv190611139L, | |||
author = {{Lee}, Kyungyun and {Nam}, Juhan}, | |||
title = "{Learning a Joint Embedding Space of Monophonic and Mixed Music Signals for Singing Voice}", | |||
journal = {arXiv e-prints}, | |||
keywords = {Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing}, | |||
year = "2019", | |||
month = "Jun", | |||
eid = {arXiv:1906.11139}, | |||
pages = {arXiv:1906.11139}, | |||
archivePrefix = {arXiv}, | |||
eprint = {1906.11139}, | |||
primaryClass = {cs.SD}, | |||
adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190611139L}, | |||
adsnote = {Provided by the SAO/NASA Astrophysics Data System} | |||
} | |||
@article{Adam, | |||
author = {{Kingma}, Diederik P. and {Ba}, Jimmy}, | |||
title = "{Adam: A Method for Stochastic Optimization}", | |||
journal = {arXiv e-prints}, | |||
keywords = {Computer Science - Machine Learning}, | |||
year = "2014", | |||
month = "Dec", | |||
eid = {arXiv:1412.6980}, | |||
pages = {arXiv:1412.6980}, | |||
archivePrefix = {arXiv}, | |||
eprint = {1412.6980}, | |||
primaryClass = {cs.LG}, | |||
adsurl = {https://ui.adsabs.harvard.edu/abs/2014arXiv1412.6980K}, | |||
adsnote = {Provided by the SAO/NASA Astrophysics Data System} | |||
} | |||
@article{Open-Unmix, | |||
author={Fabian-Robert St\"{o}ter and Stefan Uhlich and Antoine Liutkus and Yuki Mitsufuji}, | |||
title={Open-Unmix - A Reference Implementation for Music Source Separation}, | |||
journal={Journal of Open Source Software}, | |||
year=2019, | |||
doi = {10.21105/joss.01667}, | |||
url = {https://doi.org/10.21105/joss.01667} | |||
} | |||
@misc{spleeter, | |||
author={Romain Hennequin and Anis Khlif and Felix Voituret and Manuel Moussallam}, | |||
title={Spleeter}, | |||
year=2019, | |||
url = {https://www.github.com/deezer/spleeter} | |||
} | |||
@misc{demucs, | |||
title={Music Source Separation in the Waveform Domain}, | |||
author={Alexandre Défossez and Nicolas Usunier and Léon Bottou and Francis Bach}, | |||
year={2019}, | |||
eprint={1911.13254}, | |||
archivePrefix={arXiv}, | |||
primaryClass={cs.SD} | |||
} |
@ -0,0 +1,95 @@ | |||
--- | |||
title: 'Spleeter: a fast and efficient music source separation tool with pre-trained models' | |||
tags: | |||
- Python | |||
- musical signal processing | |||
- source separation | |||
- vocal isolation | |||
authors: | |||
- name: Romain Hennequin | |||
orcid: 0000-0001-8158-5562 | |||
affiliation: 1 | |||
- name: Anis Khlif | |||
affiliation: 1 | |||
- name: Felix Voituret | |||
affiliation: 1 | |||
- name: Manuel Moussallam | |||
orcid: 0000-0003-0886-5423 | |||
affiliation: 1 | |||
affiliations: | |||
- name: Deezer Research, Paris | |||
index: 1 | |||
date: 04 March 2020 | |||
bibliography: paper.bib | |||
--- | |||
## Summary | |||
We present and release a new tool for music source separation with pre-trained models called Spleeter. Spleeter was designed with ease of use, separation performance, and speed in mind. Spleeter is based on Tensorflow [@tensorflow2015-whitepaper] and makes it possible to: | |||
- split music audio files into several stems with a single command line using pre-trained models. A music audio file can be separated into $2$ stems (vocals and accompaniments), $4$ stems (vocals, drums, bass, and other) or $5$ stems (vocals, drums, bass, piano and other). | |||
- train source separation models or fine-tune pre-trained ones with Tensorflow (provided you have a dataset of isolated sources). | |||
The performance of the pre-trained models are very close to the published state-of-the-art and is one of the best performing $4$ stems separation model on the common musdb18 benchmark [@musdb18] to be publicly released. Spleeter is also very fast as it can separate a mix audio file into $4$ stems $100$ times faster than real-time (we note, though, that the model cannot be applied in real-time as it needs buffering) on a single Graphics Processing Unit (GPU) using the pre-trained $4$-stems model. | |||
## Purpose | |||
We release Spleeter with pre-trained state-of-the-art models in order to help the Music Information Retrieval (MIR) research community leverage the power of source separation in various MIR tasks, such as vocal lyrics analysis from audio (audio/lyrics alignment, lyrics transcription...), music transcription (chord transcription, drums transcription, bass transcription, chord estimation, beat tracking), singer identification, any type of multilabel classification (mood/genre...), vocal melody extraction or cover detection. | |||
We believe that source separation has reached a level of maturity that makes it worth considering for these tasks and that specific features computed from isolated vocals, drums or bass may help increase performances, especially in low data availability scenarios (small datasets, limited annotation availability) for which supervised learning might be difficult. | |||
Spleeter also makes it possible to fine-tune the provided state-of-the-art models in order to adapt the system to a specific use-case. | |||
Finally, having an available source separation tool such as Spleeter will allow researchers to compare performances of their new models to a state-of-the-art one on their private datasets instead of musdb18, which is usually the only used dataset for reporting separation performances for unreleased models. | |||
Note that we cannot release the training data for copyright reasons, and thus, sharing pre-trained models were the only way to make these results available to the community. | |||
## Implementation details | |||
Spleeter contains pre-trained models for: | |||
- vocals/accompaniment separation. | |||
- $4$ stems separation as in SiSec [@SISEC18] (vocals, bass, drums and other). | |||
- $5$ stems separation with an extra piano stem (vocals, bass, drums, piano, and other). It is, to the authors' knowledge, the first released model to perform such a separation. | |||
The pre-trained models are U-nets [@unet2017] and follow similar specifications as in [@deezerICASSP2019]. The U-net is an encoder/decoder Convolutional Neural Network (CNN) architecture with skip connections. We used $12$-layer U-nets ($6$ layers for the encoder and $6$ for the decoder). A U-net is used for estimating a soft mask for each source (stem). Training loss is a $L_1$-norm between masked input mix spectrograms and source-target spectrograms. The models were trained on Deezer's internal datasets (noteworthily the Bean dataset that was used in [@deezerICASSP2019]) using Adam [@Adam]. Training time took approximately a full week on a single GPU. Separation is then done from estimated source spectrograms using soft masking or multi-channel Wiener filtering. | |||
Training and inference are implemented in Tensorflow which makes it possible to run the code on Central Processing Unit (CPU) or GPU. | |||
## Speed | |||
As the whole separation pipeline can be run on a GPU and the model is based on a CNN, computations are efficiently parallelized and model inference is very fast. For instance, Spleeter is able to separate the whole musdb18 test dataset (about $3$ hours and $27$ minutes of audio) into $4$ stems in less than $2$ minutes, including model loading time (about $15$ seconds), and audio wav files export, using a single GeForce RTX 2080 GPU, and a double Intel Xeon Gold 6134 CPU @ 3.20GHz (CPU is used for mix files loading and stem files export only). In this setup, Spleeter is able to process $100$ seconds of stereo audio in less than $1$ second, which makes it very useful for efficiently processing large datasets. | |||
## Separation performances | |||
The models compete with the state-of-the-art on the standard musdb18 dataset [@musdb18] while it was not trained, validated or optimized in any way with musdb18 data. We report results in terms of standard source separation metrics [@separation_metrics], namely Signal to Distortion Ratio (SDR), Signal to Artifacts Ratio (SAR), Signal to Interference Ratio (SIR) and source Image to Spatial distortion Ratio (ISR), are presented in the following table compared to Open-Unmix [@Open-Unmix] and Demucs [@demucs] (only SDR are reported for Demucs since other metrics are not available in the paper) which are, to the authors' knowledge, the only released system that performs near state-of-the-art performances. | |||
We present results for soft masking and for multi-channel Wiener filtering (applied using Norbert [@Norbert]). As can be seen, for most metrics Spleeter is competitive with Open-Unmix and especially on SDR for all instruments, and is almost on par with Demucs. | |||
| |Spleeter Mask |Spleeter MWF |Open-Unmix |Demucs| | |||
|-----------|---------------|---------------|-----------|------| | |||
| Vocals SDR|6.55 |6.86 |6.32 |7.05 | | |||
| Vocals SIR|15.19 |15.86 |13.33 |13.94 | | |||
| Vocals SAR|6.44 |6.99 |6.52 |7.00 | | |||
| Vocals ISR|12.01 |11.95 |11.93 |12.04 | | |||
| Bass SDR |5.10 |5.51 |5.23 |6.70 | | |||
| Bass SIR |10.01 |10.30 |10.93 |13.03 | | |||
| Bass SAR |5.15 |5.96 |6.34 |6.68 | | |||
| Bass ISR |9.18 |9.61 |9.23 |9.99 | | |||
| Drums SDR |5.93 |6.71 |5.73 |7.08 | | |||
| Drums SIR |12.24 |13.67 |11.12 |13.74 | | |||
| Drums SAR |5.78 |6.54 |6.02 |7.04 | | |||
| Drums ISR |10.50 |10.69 |10.51 |11.96 | | |||
| Other SDR |4.24 |4.55 |4.02 |4.47 | | |||
| Other SIR |7.86 |8.16 |6.59 |7.11 | | |||
| Other SAR |4.63 |4.88 |4.74 |5.26 | | |||
| Other ISR |9.83 |9.87 |9.31 |10.86 | | |||
Spleeter [@spleeter] source code and pre-trained models are available on [github](https://www.github.com/deezer/spleeter) and distributed under a MIT license. This repository will eventually be used for releasing other models with improved performances or models separating into more than $5$ stems in the future. | |||
## Distribution | |||
Spleeter is available as a standalone Python package, and also provided as a [conda](https://github.com/conda-forge/spleeter-feedstock) recipe and self-contained [Dockers](https://hub.docker.com/r/researchdeezer/spleeter) which makes it usable as-is on various platforms. | |||
## Acknowledgements | |||
We acknowledge contributions from Laure Pretet who trained first models and wrote the first piece of code that lead to Spleeter. | |||
## References |
@ -0,0 +1,86 @@ | |||
[tool.poetry] | |||
name = "spleeter" | |||
version = "2.3.0" | |||
description = "The Deezer source separation library with pretrained models based on tensorflow." | |||
authors = ["Deezer Research <spleeter@deezer.com>"] | |||
license = "MIT License" | |||
readme = "README.md" | |||
repository = "https://github.com/deezer/spleeter" | |||
homepage = "https://github.com/deezer/spleeter" | |||
classifiers = [ | |||
"Environment :: Console", | |||
"Environment :: MacOS X", | |||
"Intended Audience :: Developers", | |||
"Intended Audience :: Information Technology", | |||
"Intended Audience :: Science/Research", | |||
"License :: OSI Approved :: MIT License", | |||
"Natural Language :: English", | |||
"Operating System :: MacOS", | |||
"Operating System :: Microsoft :: Windows", | |||
"Operating System :: POSIX :: Linux", | |||
"Operating System :: Unix", | |||
"Programming Language :: Python", | |||
"Programming Language :: Python :: 3", | |||
"Programming Language :: Python :: 3.6", | |||
"Programming Language :: Python :: 3.7", | |||
"Programming Language :: Python :: 3.8", | |||
"Programming Language :: Python :: 3 :: Only", | |||
"Programming Language :: Python :: Implementation :: CPython", | |||
"Topic :: Artistic Software", | |||
"Topic :: Multimedia", | |||
"Topic :: Multimedia :: Sound/Audio", | |||
"Topic :: Multimedia :: Sound/Audio :: Analysis", | |||
"Topic :: Multimedia :: Sound/Audio :: Conversion", | |||
"Topic :: Multimedia :: Sound/Audio :: Sound Synthesis", | |||
"Topic :: Scientific/Engineering", | |||
"Topic :: Scientific/Engineering :: Artificial Intelligence", | |||
"Topic :: Scientific/Engineering :: Information Analysis", | |||
"Topic :: Software Development", | |||
"Topic :: Software Development :: Libraries", | |||
"Topic :: Software Development :: Libraries :: Python Modules", | |||
"Topic :: Utilities" | |||
] | |||
packages = [ { include = "spleeter" } ] | |||
include = ["LICENSE", "spleeter/resources/*.json"] | |||
[tool.poetry.dependencies] | |||
python = ">=3.6.1,<3.10" | |||
ffmpeg-python = "0.2.0" | |||
norbert = "0.2.1" | |||
httpx = {extras = ["http2"], version = "^0.19.0"} | |||
typer = "^0.3.2" | |||
librosa = "0.8.0" | |||
musdb = {version = "0.3.1", optional = true} | |||
museval = {version = "0.3.0", optional = true} | |||
tensorflow = "2.5.0" | |||
pandas = "^1.1.2" | |||
numpy = "<1.20.0,>=1.16.0" | |||
importlib-resources = {version = "^4.1.1", python = "<3.7"} | |||
importlib-metadata = {version = "^3.0.0", python = "<3.8"} | |||
llvmlite = "^0.36.0" | |||
[tool.poetry.dev-dependencies] | |||
pytest = "^6.2.1" | |||
isort = "^5.7.0" | |||
black = "^20.8b1" | |||
mypy = "^0.790" | |||
pytest-forked = "^1.3.0" | |||
musdb = "0.3.1" | |||
museval = "0.3.0" | |||
[tool.poetry.scripts] | |||
spleeter = 'spleeter.__main__:entrypoint' | |||
[tool.poetry.extras] | |||
evaluation = ["musdb", "museval"] | |||
[tool.isort] | |||
profile = "black" | |||
multi_line_output = 3 | |||
[tool.pytest.ini_options] | |||
addopts = "-W ignore::FutureWarning -W ignore::DeprecationWarning -vv --forked" | |||
[build-system] | |||
requires = ["poetry-core>=1.0.0"] | |||
build-backend = "poetry.core.masonry.api" |
@ -0,0 +1,202 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "markdown", | |||
"metadata": { | |||
"colab_type": "text", | |||
"id": "K6mcSc0mmp3i" | |||
}, | |||
"source": [ | |||
"# Install spleeter" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 109 | |||
}, | |||
"colab_type": "code", | |||
"id": "f8Brdfh6mzEz", | |||
"outputId": "c63dae8e-1d33-48f2-879f-dd15393a5034" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!apt install ffmpeg" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 1000 | |||
}, | |||
"colab_type": "code", | |||
"id": "V_6Ram1lmc1F", | |||
"outputId": "26a8df7b-6b6c-41e7-d874-acea0247d181" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"pip install spleeter" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": {}, | |||
"colab_type": "code", | |||
"id": "W0LktyMypXqE" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"from IPython.display import Audio" | |||
] | |||
}, | |||
{ | |||
"cell_type": "markdown", | |||
"metadata": { | |||
"colab_type": "text", | |||
"id": "afbcUSken16L" | |||
}, | |||
"source": [ | |||
"# Separate from command line" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 311 | |||
}, | |||
"colab_type": "code", | |||
"id": "O1kQaoJSoAD0", | |||
"outputId": "cd1868b4-6992-47c3-8a2b-920e6f288614" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!wget https://github.com/deezer/spleeter/raw/master/audio_example.mp3" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 60 | |||
}, | |||
"colab_type": "code", | |||
"id": "ibG6uF55p4lH", | |||
"outputId": "f2785922-0ee1-4769-807a-6ee69313993c" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"Audio('audio_example.mp3')" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 660 | |||
}, | |||
"colab_type": "code", | |||
"id": "kOAqBcPhn6IU", | |||
"outputId": "23e14ad5-209d-4ed6-b909-7c0cd966bd0c" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!spleeter separate -h" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 533 | |||
}, | |||
"colab_type": "code", | |||
"id": "dGL-k5xxoKbu", | |||
"outputId": "dd8d6a7f-515c-47f0-8388-39e179ef652a" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!spleeter separate -o output/ audio_example.mp3" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 63 | |||
}, | |||
"colab_type": "code", | |||
"id": "IDuPWcAMoZP_", | |||
"outputId": "3f9a05fd-afab-41c7-d47c-433fc614283b" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!ls output/audio_example" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 60 | |||
}, | |||
"colab_type": "code", | |||
"id": "e7CHpyiloxrk", | |||
"outputId": "d1ff17ac-8cef-4b9d-913a-01c2688ffef1" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"Audio('output/audio_example/vocals.wav')" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": { | |||
"colab": { | |||
"base_uri": "https://localhost:8080/", | |||
"height": 60 | |||
}, | |||
"colab_type": "code", | |||
"id": "ibXd-WCTpT0w", | |||
"outputId": "6716708d-1cdb-4be5-da22-593075de78ca" | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"Audio('output/audio_example/accompaniment.wav')" | |||
] | |||
} | |||
], | |||
"metadata": { | |||
"colab": { | |||
"name": "spleeter.ipynb", | |||
"provenance": [] | |||
}, | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"name": "python3" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 0 | |||
} |
@ -0,0 +1,24 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
Spleeter is the Deezer source separation library with pretrained models. | |||
The library is based on Tensorflow: | |||
- It provides already trained model for performing separation. | |||
- It makes it easy to train source separation model with tensorflow | |||
(provided you have a dataset of isolated sources). | |||
This module allows to interact easily from command line with Spleeter | |||
by providing train, evaluation and source separation action. | |||
""" | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class SpleeterError(Exception): | |||
""" Custom exception for Spleeter related error. """ | |||
pass |
@ -0,0 +1,262 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
Python oneliner script usage. | |||
USAGE: python -m spleeter {train,evaluate,separate} ... | |||
Notes: | |||
All critical import involving TF, numpy or Pandas are deported to | |||
command function scope to avoid heavy import on CLI evaluation, | |||
leading to large bootstraping time. | |||
""" | |||
import json | |||
from functools import partial | |||
from glob import glob | |||
from itertools import product | |||
from os.path import join | |||
from pathlib import Path | |||
from typing import Container, Dict, List, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
from typer import Exit, Typer | |||
from . import SpleeterError | |||
from .options import * | |||
from .utils.logging import configure_logger, logger | |||
# pylint: enable=import-error | |||
spleeter: Typer = Typer(add_completion=False, no_args_is_help=True, short_help="-h") | |||
""" CLI application. """ | |||
@spleeter.callback() | |||
def default( | |||
version: bool = VersionOption, | |||
) -> None: | |||
pass | |||
@spleeter.command(no_args_is_help=True) | |||
def train( | |||
adapter: str = AudioAdapterOption, | |||
data: Path = TrainingDataDirectoryOption, | |||
params_filename: str = ModelParametersOption, | |||
verbose: bool = VerboseOption, | |||
) -> None: | |||
""" | |||
Train a source separation model | |||
""" | |||
import tensorflow as tf | |||
from .audio.adapter import AudioAdapter | |||
from .dataset import get_training_dataset, get_validation_dataset | |||
from .model import model_fn | |||
from .model.provider import ModelProvider | |||
from .utils.configuration import load_configuration | |||
configure_logger(verbose) | |||
audio_adapter = AudioAdapter.get(adapter) | |||
audio_path = str(data) | |||
params = load_configuration(params_filename) | |||
session_config = tf.compat.v1.ConfigProto() | |||
session_config.gpu_options.per_process_gpu_memory_fraction = 0.45 | |||
estimator = tf.estimator.Estimator( | |||
model_fn=model_fn, | |||
model_dir=params["model_dir"], | |||
params=params, | |||
config=tf.estimator.RunConfig( | |||
save_checkpoints_steps=params["save_checkpoints_steps"], | |||
tf_random_seed=params["random_seed"], | |||
save_summary_steps=params["save_summary_steps"], | |||
session_config=session_config, | |||
log_step_count_steps=10, | |||
keep_checkpoint_max=2, | |||
), | |||
) | |||
input_fn = partial(get_training_dataset, params, audio_adapter, audio_path) | |||
train_spec = tf.estimator.TrainSpec( | |||
input_fn=input_fn, max_steps=params["train_max_steps"] | |||
) | |||
input_fn = partial(get_validation_dataset, params, audio_adapter, audio_path) | |||
evaluation_spec = tf.estimator.EvalSpec( | |||
input_fn=input_fn, steps=None, throttle_secs=params["throttle_secs"] | |||
) | |||
logger.info("Start model training") | |||
tf.estimator.train_and_evaluate(estimator, train_spec, evaluation_spec) | |||
ModelProvider.writeProbe(params["model_dir"]) | |||
logger.info("Model training done") | |||
@spleeter.command(no_args_is_help=True) | |||
def separate( | |||
deprecated_files: Optional[str] = AudioInputOption, | |||
files: List[Path] = AudioInputArgument, | |||
adapter: str = AudioAdapterOption, | |||
bitrate: str = AudioBitrateOption, | |||
codec: Codec = AudioCodecOption, | |||
duration: float = AudioDurationOption, | |||
offset: float = AudioOffsetOption, | |||
output_path: Path = AudioOutputOption, | |||
stft_backend: STFTBackend = AudioSTFTBackendOption, | |||
filename_format: str = FilenameFormatOption, | |||
params_filename: str = ModelParametersOption, | |||
mwf: bool = MWFOption, | |||
verbose: bool = VerboseOption, | |||
) -> None: | |||
""" | |||
Separate audio file(s) | |||
""" | |||
from .audio.adapter import AudioAdapter | |||
from .separator import Separator | |||
configure_logger(verbose) | |||
if deprecated_files is not None: | |||
logger.error( | |||
"⚠️ -i option is not supported anymore, audio files must be supplied " | |||
"using input argument instead (see spleeter separate --help)" | |||
) | |||
raise Exit(20) | |||
audio_adapter: AudioAdapter = AudioAdapter.get(adapter) | |||
separator: Separator = Separator( | |||
params_filename, MWF=mwf, stft_backend=stft_backend | |||
) | |||
for filename in files: | |||
separator.separate_to_file( | |||
str(filename), | |||
str(output_path), | |||
audio_adapter=audio_adapter, | |||
offset=offset, | |||
duration=duration, | |||
codec=codec, | |||
bitrate=bitrate, | |||
filename_format=filename_format, | |||
synchronous=False, | |||
) | |||
separator.join() | |||
EVALUATION_SPLIT: str = "test" | |||
EVALUATION_METRICS_DIRECTORY: str = "metrics" | |||
EVALUATION_INSTRUMENTS: Container[str] = ("vocals", "drums", "bass", "other") | |||
EVALUATION_METRICS: Container[str] = ("SDR", "SAR", "SIR", "ISR") | |||
EVALUATION_MIXTURE: str = "mixture.wav" | |||
EVALUATION_AUDIO_DIRECTORY: str = "audio" | |||
def _compile_metrics(metrics_output_directory) -> Dict: | |||
""" | |||
Compiles metrics from given directory and returns results as dict. | |||
Parameters: | |||
metrics_output_directory (str): | |||
Directory to get metrics from. | |||
Returns: | |||
Dict: | |||
Compiled metrics as dict. | |||
""" | |||
import numpy as np | |||
import pandas as pd | |||
songs = glob(join(metrics_output_directory, "test/*.json")) | |||
index = pd.MultiIndex.from_tuples( | |||
product(EVALUATION_INSTRUMENTS, EVALUATION_METRICS), | |||
names=["instrument", "metric"], | |||
) | |||
pd.DataFrame([], index=["config1", "config2"], columns=index) | |||
metrics = { | |||
instrument: {k: [] for k in EVALUATION_METRICS} | |||
for instrument in EVALUATION_INSTRUMENTS | |||
} | |||
for song in songs: | |||
with open(song, "r") as stream: | |||
data = json.load(stream) | |||
for target in data["targets"]: | |||
instrument = target["name"] | |||
for metric in EVALUATION_METRICS: | |||
sdr_med = np.median( | |||
[ | |||
frame["metrics"][metric] | |||
for frame in target["frames"] | |||
if not np.isnan(frame["metrics"][metric]) | |||
] | |||
) | |||
metrics[instrument][metric].append(sdr_med) | |||
return metrics | |||
@spleeter.command(no_args_is_help=True) | |||
def evaluate( | |||
adapter: str = AudioAdapterOption, | |||
output_path: Path = AudioOutputOption, | |||
stft_backend: STFTBackend = AudioSTFTBackendOption, | |||
params_filename: str = ModelParametersOption, | |||
mus_dir: Path = MUSDBDirectoryOption, | |||
mwf: bool = MWFOption, | |||
verbose: bool = VerboseOption, | |||
) -> Dict: | |||
""" | |||
Evaluate a model on the musDB test dataset | |||
""" | |||
import numpy as np | |||
configure_logger(verbose) | |||
try: | |||
import musdb | |||
import museval | |||
except ImportError: | |||
logger.error("Extra dependencies musdb and museval not found") | |||
logger.error("Please install musdb and museval first, abort") | |||
raise Exit(10) | |||
# Separate musdb sources. | |||
songs = glob(join(mus_dir, EVALUATION_SPLIT, "*/")) | |||
mixtures = [join(song, EVALUATION_MIXTURE) for song in songs] | |||
audio_output_directory = join(output_path, EVALUATION_AUDIO_DIRECTORY) | |||
separate( | |||
deprecated_files=None, | |||
files=mixtures, | |||
adapter=adapter, | |||
bitrate="128k", | |||
codec=Codec.WAV, | |||
duration=600.0, | |||
offset=0, | |||
output_path=join(audio_output_directory, EVALUATION_SPLIT), | |||
stft_backend=stft_backend, | |||
filename_format="{foldername}/{instrument}.{codec}", | |||
params_filename=params_filename, | |||
mwf=mwf, | |||
verbose=verbose, | |||
) | |||
# Compute metrics with musdb. | |||
metrics_output_directory = join(output_path, EVALUATION_METRICS_DIRECTORY) | |||
logger.info("Starting musdb evaluation (this could be long) ...") | |||
dataset = musdb.DB(root=mus_dir, is_wav=True, subsets=[EVALUATION_SPLIT]) | |||
museval.eval_mus_dir( | |||
dataset=dataset, | |||
estimates_dir=audio_output_directory, | |||
output_dir=metrics_output_directory, | |||
) | |||
logger.info("musdb evaluation done") | |||
# Compute and pretty print median metrics. | |||
metrics = _compile_metrics(metrics_output_directory) | |||
for instrument, metric in metrics.items(): | |||
logger.info(f"{instrument}:") | |||
for metric, value in metric.items(): | |||
logger.info(f"{metric}: {np.median(value):.3f}") | |||
return metrics | |||
def entrypoint(): | |||
""" Application entrypoint. """ | |||
try: | |||
spleeter() | |||
except SpleeterError as e: | |||
logger.error(e) | |||
if __name__ == "__main__": | |||
entrypoint() |
@ -0,0 +1,52 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
`spleeter.utils.audio` package provides various | |||
tools for manipulating audio content such as : | |||
- Audio adapter class for abstract interaction with audio file. | |||
- FFMPEG implementation for audio adapter. | |||
- Waveform convertion and transforming functions. | |||
""" | |||
from enum import Enum | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class Codec(str, Enum): | |||
""" Enumeration of supported audio codec. """ | |||
WAV: str = "wav" | |||
MP3: str = "mp3" | |||
OGG: str = "ogg" | |||
M4A: str = "m4a" | |||
WMA: str = "wma" | |||
FLAC: str = "flac" | |||
class STFTBackend(str, Enum): | |||
""" Enumeration of supported STFT backend. """ | |||
AUTO: str = "auto" | |||
TENSORFLOW: str = "tensorflow" | |||
LIBROSA: str = "librosa" | |||
@classmethod | |||
def resolve(cls: type, backend: str) -> str: | |||
# NOTE: import is resolved here to avoid performance issues on command | |||
# evaluation. | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
if backend not in cls.__members__.values(): | |||
raise ValueError(f"Unsupported backend {backend}") | |||
if backend == cls.AUTO: | |||
if len(tf.config.list_physical_devices("GPU")): | |||
return cls.TENSORFLOW | |||
return cls.LIBROSA | |||
return backend |
@ -0,0 +1,200 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" AudioAdapter class defintion. """ | |||
from abc import ABC, abstractmethod | |||
from importlib import import_module | |||
from pathlib import Path | |||
from typing import Any, Dict, List, Optional, Union | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import numpy as np | |||
import tensorflow as tf | |||
from spleeter.audio import Codec | |||
from .. import SpleeterError | |||
from ..types import AudioDescriptor, Signal | |||
from ..utils.logging import logger | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class AudioAdapter(ABC): | |||
""" An abstract class for manipulating audio signal. """ | |||
_DEFAULT: "AudioAdapter" = None | |||
""" Default audio adapter singleton instance. """ | |||
@abstractmethod | |||
def load( | |||
self, | |||
audio_descriptor: AudioDescriptor, | |||
offset: Optional[float] = None, | |||
duration: Optional[float] = None, | |||
sample_rate: Optional[float] = None, | |||
dtype: np.dtype = np.float32, | |||
) -> Signal: | |||
""" | |||
Loads the audio file denoted by the given audio descriptor and | |||
returns it data as a waveform. Aims to be implemented by client. | |||
Parameters: | |||
audio_descriptor (AudioDescriptor): | |||
Describe song to load, in case of file based audio adapter, | |||
such descriptor would be a file path. | |||
offset (Optional[float]): | |||
Start offset to load from in seconds. | |||
duration (Optional[float]): | |||
Duration to load in seconds. | |||
sample_rate (Optional[float]): | |||
Sample rate to load audio with. | |||
dtype (numpy.dtype): | |||
(Optional) Numpy data type to use, default to `float32`. | |||
Returns: | |||
Signal: | |||
Loaded data as (wf, sample_rate) tuple. | |||
""" | |||
pass | |||
def load_tf_waveform( | |||
self, | |||
audio_descriptor, | |||
offset: float = 0.0, | |||
duration: float = 1800.0, | |||
sample_rate: int = 44100, | |||
dtype: bytes = b"float32", | |||
waveform_name: str = "waveform", | |||
) -> Dict[str, Any]: | |||
""" | |||
Load the audio and convert it to a tensorflow waveform. | |||
Parameters: | |||
audio_descriptor (): | |||
Describe song to load, in case of file based audio adapter, | |||
such descriptor would be a file path. | |||
offset (float): | |||
Start offset to load from in seconds. | |||
duration (float): | |||
Duration to load in seconds. | |||
sample_rate (float): | |||
Sample rate to load audio with. | |||
dtype (bytes): | |||
(Optional)data type to use, default to `b'float32'`. | |||
waveform_name (str): | |||
(Optional) Name of the key in output dict, default to | |||
`'waveform'`. | |||
Returns: | |||
Dict[str, Any]: | |||
TF output dict with waveform as `(T x chan numpy array)` | |||
and a boolean that tells whether there were an error while | |||
trying to load the waveform. | |||
""" | |||
# Cast parameters to TF format. | |||
offset = tf.cast(offset, tf.float64) | |||
duration = tf.cast(duration, tf.float64) | |||
# Defined safe loading function. | |||
def safe_load(path, offset, duration, sample_rate, dtype): | |||
logger.info(f"Loading audio {path} from {offset} to {offset + duration}") | |||
try: | |||
(data, _) = self.load( | |||
path.numpy(), | |||
offset.numpy(), | |||
duration.numpy(), | |||
sample_rate.numpy(), | |||
dtype=dtype.numpy(), | |||
) | |||
logger.info("Audio data loaded successfully") | |||
return (data, False) | |||
except Exception as e: | |||
logger.exception("An error occurs while loading audio", exc_info=e) | |||
return (np.float32(-1.0), True) | |||
# Execute function and format results. | |||
results = ( | |||
tf.py_function( | |||
safe_load, | |||
[audio_descriptor, offset, duration, sample_rate, dtype], | |||
(tf.float32, tf.bool), | |||
), | |||
) | |||
waveform, error = results[0] | |||
return {waveform_name: waveform, f"{waveform_name}_error": error} | |||
@abstractmethod | |||
def save( | |||
self, | |||
path: Union[Path, str], | |||
data: np.ndarray, | |||
sample_rate: float, | |||
codec: Codec = None, | |||
bitrate: str = None, | |||
) -> None: | |||
""" | |||
Save the given audio data to the file denoted by the given path. | |||
Parameters: | |||
path (Union[Path, str]): | |||
Path like of the audio file to save data in. | |||
data (numpy.ndarray): | |||
Waveform data to write. | |||
sample_rate (float): | |||
Sample rate to write file in. | |||
codec (): | |||
(Optional) Writing codec to use, default to `None`. | |||
bitrate (str): | |||
(Optional) Bitrate of the written audio file, default to | |||
`None`. | |||
""" | |||
pass | |||
@classmethod | |||
def default(cls: type) -> "AudioAdapter": | |||
""" | |||
Builds and returns a default audio adapter instance. | |||
Returns: | |||
AudioAdapter: | |||
Default adapter instance to use. | |||
""" | |||
if cls._DEFAULT is None: | |||
from .ffmpeg import FFMPEGProcessAudioAdapter | |||
cls._DEFAULT = FFMPEGProcessAudioAdapter() | |||
return cls._DEFAULT | |||
@classmethod | |||
def get(cls: type, descriptor: str) -> "AudioAdapter": | |||
""" | |||
Load dynamically an AudioAdapter from given class descriptor. | |||
Parameters: | |||
descriptor (str): | |||
Adapter class descriptor (module.Class) | |||
Returns: | |||
AudioAdapter: | |||
Created adapter instance. | |||
""" | |||
if not descriptor: | |||
return cls.default() | |||
module_path: List[str] = descriptor.split(".") | |||
adapter_class_name: str = module_path[-1] | |||
module_path: str = ".".join(module_path[:-1]) | |||
adapter_module = import_module(module_path) | |||
adapter_class = getattr(adapter_module, adapter_class_name) | |||
if not issubclass(adapter_class, AudioAdapter): | |||
raise SpleeterError( | |||
f"{adapter_class_name} is not a valid AudioAdapter class" | |||
) | |||
return adapter_class() |
@ -0,0 +1,139 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" This module provides audio data convertion functions. """ | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import numpy as np | |||
import tensorflow as tf | |||
from ..utils.tensor import from_float32_to_uint8, from_uint8_to_float32 | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def to_n_channels(waveform: tf.Tensor, n_channels: int) -> tf.Tensor: | |||
""" | |||
Convert a waveform to n_channels by removing or duplicating channels if | |||
needed (in tensorflow). | |||
Parameters: | |||
waveform (tensorflow.Tensor): | |||
Waveform to transform. | |||
n_channels (int): | |||
Number of channel to reshape waveform in. | |||
Returns: | |||
tensorflow.Tensor: | |||
Reshaped waveform. | |||
""" | |||
return tf.cond( | |||
tf.shape(waveform)[1] >= n_channels, | |||
true_fn=lambda: waveform[:, :n_channels], | |||
false_fn=lambda: tf.tile(waveform, [1, n_channels])[:, :n_channels], | |||
) | |||
def to_stereo(waveform: np.ndarray) -> np.ndarray: | |||
""" | |||
Convert a waveform to stereo by duplicating if mono, or truncating | |||
if too many channels. | |||
Parameters: | |||
waveform (numpy.ndarray): | |||
a `(N, d)` numpy array. | |||
Returns: | |||
numpy.ndarray: | |||
A stereo waveform as a `(N, 1)` numpy array. | |||
""" | |||
if waveform.shape[1] == 1: | |||
return np.repeat(waveform, 2, axis=-1) | |||
if waveform.shape[1] > 2: | |||
return waveform[:, :2] | |||
return waveform | |||
def gain_to_db(tensor: tf.Tensor, espilon: float = 10e-10) -> tf.Tensor: | |||
""" | |||
Convert from gain to decibel in tensorflow. | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
Tensor to convert | |||
epsilon (float): | |||
Operation constant. | |||
Returns: | |||
tensorflow.Tensor: | |||
Converted tensor. | |||
""" | |||
return 20.0 / np.log(10) * tf.math.log(tf.maximum(tensor, espilon)) | |||
def db_to_gain(tensor: tf.Tensor) -> tf.Tensor: | |||
""" | |||
Convert from decibel to gain in tensorflow. | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
Tensor to convert | |||
Returns: | |||
tensorflow.Tensor: | |||
Converted tensor. | |||
""" | |||
return tf.pow(10.0, (tensor / 20.0)) | |||
def spectrogram_to_db_uint( | |||
spectrogram: tf.Tensor, db_range: float = 100.0, **kwargs | |||
) -> tf.Tensor: | |||
""" | |||
Encodes given spectrogram into uint8 using decibel scale. | |||
Parameters: | |||
spectrogram (tensorflow.Tensor): | |||
Spectrogram to be encoded as TF float tensor. | |||
db_range (float): | |||
Range in decibel for encoding. | |||
Returns: | |||
tensorflow.Tensor: | |||
Encoded decibel spectrogram as `uint8` tensor. | |||
""" | |||
db_spectrogram: tf.Tensor = gain_to_db(spectrogram) | |||
max_db_spectrogram: tf.Tensor = tf.reduce_max(db_spectrogram) | |||
db_spectrogram: tf.Tensor = tf.maximum( | |||
db_spectrogram, max_db_spectrogram - db_range | |||
) | |||
return from_float32_to_uint8(db_spectrogram, **kwargs) | |||
def db_uint_spectrogram_to_gain( | |||
db_uint_spectrogram: tf.Tensor, min_db: tf.Tensor, max_db: tf.Tensor | |||
) -> tf.Tensor: | |||
""" | |||
Decode spectrogram from uint8 decibel scale. | |||
Paramters: | |||
db_uint_spectrogram (tensorflow.Tensor): | |||
Decibel spectrogram to decode. | |||
min_db (tensorflow.Tensor): | |||
Lower bound limit for decoding. | |||
max_db (tensorflow.Tensor): | |||
Upper bound limit for decoding. | |||
Returns: | |||
tensorflow.Tensor: | |||
Decoded spectrogram as `float32` tensor. | |||
""" | |||
db_spectrogram: tf.Tensor = from_uint8_to_float32( | |||
db_uint_spectrogram, min_db, max_db | |||
) | |||
return db_to_gain(db_spectrogram) |
@ -0,0 +1,185 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
This module provides an AudioAdapter implementation based on FFMPEG | |||
process. Such implementation is POSIXish and depends on nothing except | |||
standard Python libraries. Thus this implementation is the default one | |||
used within this library. | |||
""" | |||
import datetime as dt | |||
import os | |||
import shutil | |||
from pathlib import Path | |||
from typing import Dict, Optional, Union | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import ffmpeg | |||
import numpy as np | |||
from .. import SpleeterError | |||
from ..types import Signal | |||
from ..utils.logging import logger | |||
from . import Codec | |||
from .adapter import AudioAdapter | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class FFMPEGProcessAudioAdapter(AudioAdapter): | |||
""" | |||
An AudioAdapter implementation that use FFMPEG binary through | |||
subprocess in order to perform I/O operation for audio processing. | |||
When created, FFMPEG binary path will be checked and expended, | |||
raising exception if not found. Such path could be infered using | |||
`FFMPEG_PATH` environment variable. | |||
""" | |||
SUPPORTED_CODECS: Dict[Codec, str] = { | |||
Codec.M4A: "aac", | |||
Codec.OGG: "libvorbis", | |||
Codec.WMA: "wmav2", | |||
} | |||
""" FFMPEG codec name mapping. """ | |||
def __init__(_) -> None: | |||
""" | |||
Default constructor, ensure FFMPEG binaries are available. | |||
Raises: | |||
SpleeterError: | |||
If ffmpeg or ffprobe is not found. | |||
""" | |||
for binary in ("ffmpeg", "ffprobe"): | |||
if shutil.which(binary) is None: | |||
raise SpleeterError("{} binary not found".format(binary)) | |||
def load( | |||
_, | |||
path: Union[Path, str], | |||
offset: Optional[float] = None, | |||
duration: Optional[float] = None, | |||
sample_rate: Optional[float] = None, | |||
dtype: np.dtype = np.float32, | |||
) -> Signal: | |||
""" | |||
Loads the audio file denoted by the given path | |||
and returns it data as a waveform. | |||
Parameters: | |||
path (Union[Path, str]: | |||
Path of the audio file to load data from. | |||
offset (Optional[float]): | |||
Start offset to load from in seconds. | |||
duration (Optional[float]): | |||
Duration to load in seconds. | |||
sample_rate (Optional[float]): | |||
Sample rate to load audio with. | |||
dtype (numpy.dtype): | |||
(Optional) Numpy data type to use, default to `float32`. | |||
Returns: | |||
Signal: | |||
Loaded data a (waveform, sample_rate) tuple. | |||
Raises: | |||
SpleeterError: | |||
If any error occurs while loading audio. | |||
""" | |||
if isinstance(path, Path): | |||
path = str(path) | |||
if not isinstance(path, str): | |||
path = path.decode() | |||
try: | |||
probe = ffmpeg.probe(path) | |||
except ffmpeg._run.Error as e: | |||
raise SpleeterError( | |||
"An error occurs with ffprobe (see ffprobe output below)\n\n{}".format( | |||
e.stderr.decode() | |||
) | |||
) | |||
if "streams" not in probe or len(probe["streams"]) == 0: | |||
raise SpleeterError("No stream was found with ffprobe") | |||
metadata = next( | |||
stream for stream in probe["streams"] if stream["codec_type"] == "audio" | |||
) | |||
n_channels = metadata["channels"] | |||
if sample_rate is None: | |||
sample_rate = metadata["sample_rate"] | |||
output_kwargs = {"format": "f32le", "ar": sample_rate} | |||
if duration is not None: | |||
output_kwargs["t"] = str(dt.timedelta(seconds=duration)) | |||
if offset is not None: | |||
output_kwargs["ss"] = str(dt.timedelta(seconds=offset)) | |||
process = ( | |||
ffmpeg.input(path) | |||
.output("pipe:", **output_kwargs) | |||
.run_async(pipe_stdout=True, pipe_stderr=True) | |||
) | |||
buffer, _ = process.communicate() | |||
waveform = np.frombuffer(buffer, dtype="<f4").reshape(-1, n_channels) | |||
if not waveform.dtype == np.dtype(dtype): | |||
waveform = waveform.astype(dtype) | |||
return (waveform, sample_rate) | |||
def save( | |||
self, | |||
path: Union[Path, str], | |||
data: np.ndarray, | |||
sample_rate: float, | |||
codec: Codec = None, | |||
bitrate: str = None, | |||
) -> None: | |||
""" | |||
Write waveform data to the file denoted by the given path using | |||
FFMPEG process. | |||
Parameters: | |||
path (Union[Path, str]): | |||
Path like of the audio file to save data in. | |||
data (numpy.ndarray): | |||
Waveform data to write. | |||
sample_rate (float): | |||
Sample rate to write file in. | |||
codec (): | |||
(Optional) Writing codec to use, default to `None`. | |||
bitrate (str): | |||
(Optional) Bitrate of the written audio file, default to | |||
`None`. | |||
Raises: | |||
IOError: | |||
If any error occurs while using FFMPEG to write data. | |||
""" | |||
if isinstance(path, Path): | |||
path = str(path) | |||
directory = os.path.dirname(path) | |||
if not os.path.exists(directory): | |||
raise SpleeterError(f"output directory does not exists: {directory}") | |||
logger.debug(f"Writing file {path}") | |||
input_kwargs = {"ar": sample_rate, "ac": data.shape[1]} | |||
output_kwargs = {"ar": sample_rate, "strict": "-2"} | |||
if bitrate: | |||
output_kwargs["audio_bitrate"] = bitrate | |||
if codec is not None and codec != "wav": | |||
output_kwargs["codec"] = self.SUPPORTED_CODECS.get(codec, codec) | |||
process = ( | |||
ffmpeg.input("pipe:", format="f32le", **input_kwargs) | |||
.output(path, **output_kwargs) | |||
.overwrite_output() | |||
.run_async(pipe_stdin=True, pipe_stderr=True, quiet=True) | |||
) | |||
try: | |||
process.stdin.write(data.astype("<f4").tobytes()) | |||
process.stdin.close() | |||
process.wait() | |||
except IOError: | |||
raise SpleeterError(f"FFMPEG error: {process.stderr.read()}") | |||
logger.info(f"File {path} written succesfully") |
@ -0,0 +1,176 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Spectrogram specific data augmentation. """ | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import numpy as np | |||
import tensorflow as tf | |||
from tensorflow.signal import hann_window, stft | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def compute_spectrogram_tf( | |||
waveform: tf.Tensor, | |||
frame_length: int = 2048, | |||
frame_step: int = 512, | |||
spec_exponent: float = 1.0, | |||
window_exponent: float = 1.0, | |||
) -> tf.Tensor: | |||
""" | |||
Compute magnitude / power spectrogram from waveform as a | |||
`n_samples x n_channels` tensor. | |||
Parameters: | |||
waveform (tensorflow.Tensor): | |||
Input waveform as `(times x number of channels)` tensor. | |||
frame_length (int): | |||
Length of a STFT frame to use. | |||
frame_step (int): | |||
HOP between successive frames. | |||
spec_exponent (float): | |||
Exponent of the spectrogram (usually 1 for magnitude | |||
spectrogram, or 2 for power spectrogram). | |||
window_exponent (float): | |||
Exponent applied to the Hann windowing function (may be | |||
useful for making perfect STFT/iSTFT reconstruction). | |||
Returns: | |||
tensorflow.Tensor: | |||
Computed magnitude / power spectrogram as a | |||
`(T x F x n_channels)` tensor. | |||
""" | |||
stft_tensor: tf.Tensor = tf.transpose( | |||
stft( | |||
tf.transpose(waveform), | |||
frame_length, | |||
frame_step, | |||
window_fn=lambda f, dtype: hann_window( | |||
f, periodic=True, dtype=waveform.dtype | |||
) | |||
** window_exponent, | |||
), | |||
perm=[1, 2, 0], | |||
) | |||
return tf.abs(stft_tensor) ** spec_exponent | |||
def time_stretch( | |||
spectrogram: tf.Tensor, | |||
factor: float = 1.0, | |||
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR, | |||
) -> tf.Tensor: | |||
""" | |||
Time stretch a spectrogram preserving shape in tensorflow. Note that | |||
this is an approximation in the frequency domain. | |||
Parameters: | |||
spectrogram (tensorflow.Tensor): | |||
Input spectrogram to be time stretched as tensor. | |||
factor (float): | |||
(Optional) Time stretch factor, must be > 0, default to `1`. | |||
method (tensorflow.image.ResizeMethod): | |||
(Optional) Interpolation method, default to `BILINEAR`. | |||
Returns: | |||
tensorflow.Tensor: | |||
Time stretched spectrogram as tensor with same shape. | |||
""" | |||
T = tf.shape(spectrogram)[0] | |||
T_ts = tf.cast(tf.cast(T, tf.float32) * factor, tf.int32)[0] | |||
F = tf.shape(spectrogram)[1] | |||
ts_spec = tf.image.resize_images( | |||
spectrogram, [T_ts, F], method=method, align_corners=True | |||
) | |||
return tf.image.resize_image_with_crop_or_pad(ts_spec, T, F) | |||
def random_time_stretch( | |||
spectrogram: tf.Tensor, factor_min: float = 0.9, factor_max: float = 1.1, **kwargs | |||
) -> tf.Tensor: | |||
""" | |||
Time stretch a spectrogram preserving shape with random ratio in | |||
tensorflow. Applies time_stretch to spectrogram with a random ratio | |||
drawn uniformly in `[factor_min, factor_max]`. | |||
Parameters: | |||
spectrogram (tensorflow.Tensor): | |||
Input spectrogram to be time stretched as tensor. | |||
factor_min (float): | |||
(Optional) Min time stretch factor, default to `0.9`. | |||
factor_max (float): | |||
(Optional) Max time stretch factor, default to `1.1`. | |||
Returns: | |||
tensorflow.Tensor: | |||
Randomly time stretched spectrogram as tensor with same shape. | |||
""" | |||
factor = ( | |||
tf.random_uniform(shape=(1,), seed=0) * (factor_max - factor_min) + factor_min | |||
) | |||
return time_stretch(spectrogram, factor=factor, **kwargs) | |||
def pitch_shift( | |||
spectrogram: tf.Tensor, | |||
semitone_shift: float = 0.0, | |||
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR, | |||
) -> tf.Tensor: | |||
""" | |||
Pitch shift a spectrogram preserving shape in tensorflow. Note that | |||
this is an approximation in the frequency domain. | |||
Parameters: | |||
spectrogram (tensorflow.Tensor): | |||
Input spectrogram to be pitch shifted as tensor. | |||
semitone_shift (float): | |||
(Optional) Pitch shift in semitone, default to `0.0`. | |||
method (tensorflow.image.ResizeMethod): | |||
(Optional) Interpolation method, default to `BILINEAR`. | |||
Returns: | |||
tensorflow.Tensor: | |||
Pitch shifted spectrogram (same shape as spectrogram). | |||
""" | |||
factor = 2 ** (semitone_shift / 12.0) | |||
T = tf.shape(spectrogram)[0] | |||
F = tf.shape(spectrogram)[1] | |||
F_ps = tf.cast(tf.cast(F, tf.float32) * factor, tf.int32)[0] | |||
ps_spec = tf.image.resize_images( | |||
spectrogram, [T, F_ps], method=method, align_corners=True | |||
) | |||
paddings = [[0, 0], [0, tf.maximum(0, F - F_ps)], [0, 0]] | |||
return tf.pad(ps_spec[:, :F, :], paddings, "CONSTANT") | |||
def random_pitch_shift( | |||
spectrogram: tf.Tensor, shift_min: float = -1.0, shift_max: float = 1.0, **kwargs | |||
) -> tf.Tensor: | |||
""" | |||
Pitch shift a spectrogram preserving shape with random ratio in | |||
tensorflow. Applies pitch_shift to spectrogram with a random shift | |||
amount (expressed in semitones) drawn uniformly in | |||
`[shift_min, shift_max]`. | |||
Parameters: | |||
spectrogram (tensorflow.Tensor): | |||
Input spectrogram to be pitch shifted as tensor. | |||
shift_min (float): | |||
(Optional) Min pitch shift in semitone, default to -1. | |||
shift_max (float): | |||
(Optional) Max pitch shift in semitone, default to 1. | |||
Returns: | |||
tensorflow.Tensor: | |||
Randomly pitch shifted spectrogram (same shape as spectrogram). | |||
""" | |||
semitone_shift = ( | |||
tf.random_uniform(shape=(1,), seed=0) * (shift_max - shift_min) + shift_min | |||
) | |||
return pitch_shift(spectrogram, semitone_shift=semitone_shift, **kwargs) |
@ -0,0 +1,625 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
Module for building data preprocessing pipeline using the tensorflow | |||
data API. Data preprocessing such as audio loading, spectrogram | |||
computation, cropping, feature caching or data augmentation is done | |||
using a tensorflow dataset object that output a tuple (input_, output) | |||
where: | |||
- input is a dictionary with a single key that contains the (batched) | |||
mix spectrogram of audio samples | |||
- output is a dictionary of spectrogram of the isolated tracks | |||
(ground truth) | |||
""" | |||
import os | |||
import time | |||
from os.path import exists | |||
from os.path import sep as SEPARATOR | |||
from typing import Any, Dict, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
from .audio.adapter import AudioAdapter | |||
from .audio.convertor import db_uint_spectrogram_to_gain, spectrogram_to_db_uint | |||
from .audio.spectrogram import ( | |||
compute_spectrogram_tf, | |||
random_pitch_shift, | |||
random_time_stretch, | |||
) | |||
from .utils.logging import logger | |||
from .utils.tensor import ( | |||
check_tensor_shape, | |||
dataset_from_csv, | |||
set_tensor_shape, | |||
sync_apply, | |||
) | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
# Default audio parameters to use. | |||
DEFAULT_AUDIO_PARAMS: Dict = { | |||
"instrument_list": ("vocals", "accompaniment"), | |||
"mix_name": "mix", | |||
"sample_rate": 44100, | |||
"frame_length": 4096, | |||
"frame_step": 1024, | |||
"T": 512, | |||
"F": 1024, | |||
} | |||
def get_training_dataset( | |||
audio_params: Dict, audio_adapter: AudioAdapter, audio_path: str | |||
) -> Any: | |||
""" | |||
Builds training dataset. | |||
Parameters: | |||
audio_params (Dict): | |||
Audio parameters. | |||
audio_adapter (AudioAdapter): | |||
Adapter to load audio from. | |||
audio_path (str): | |||
Path of directory containing audio. | |||
Returns: | |||
Any: | |||
Built dataset. | |||
""" | |||
builder = DatasetBuilder( | |||
audio_params, | |||
audio_adapter, | |||
audio_path, | |||
chunk_duration=audio_params.get("chunk_duration", 20.0), | |||
random_seed=audio_params.get("random_seed", 0), | |||
) | |||
return builder.build( | |||
audio_params.get("train_csv"), | |||
cache_directory=audio_params.get("training_cache"), | |||
batch_size=audio_params.get("batch_size"), | |||
n_chunks_per_song=audio_params.get("n_chunks_per_song", 2), | |||
random_data_augmentation=False, | |||
convert_to_uint=True, | |||
wait_for_cache=False, | |||
) | |||
def get_validation_dataset( | |||
audio_params: Dict, audio_adapter: AudioAdapter, audio_path: str | |||
) -> Any: | |||
""" | |||
Builds validation dataset. | |||
Parameters: | |||
audio_params (Dict): | |||
Audio parameters. | |||
audio_adapter (AudioAdapter): | |||
Adapter to load audio from. | |||
audio_path (str): | |||
Path of directory containing audio. | |||
Returns: | |||
Any: | |||
Built dataset. | |||
""" | |||
builder = DatasetBuilder( | |||
audio_params, audio_adapter, audio_path, chunk_duration=12.0 | |||
) | |||
return builder.build( | |||
audio_params.get("validation_csv"), | |||
batch_size=audio_params.get("batch_size"), | |||
cache_directory=audio_params.get("validation_cache"), | |||
convert_to_uint=True, | |||
infinite_generator=False, | |||
n_chunks_per_song=1, | |||
# should not perform data augmentation for eval: | |||
random_data_augmentation=False, | |||
random_time_crop=False, | |||
shuffle=False, | |||
) | |||
class InstrumentDatasetBuilder(object): | |||
""" Instrument based filter and mapper provider. """ | |||
def __init__(self, parent, instrument) -> None: | |||
""" | |||
Default constructor. | |||
Parameters: | |||
parent: | |||
Parent dataset builder. | |||
instrument: | |||
Target instrument. | |||
""" | |||
self._parent = parent | |||
self._instrument = instrument | |||
self._spectrogram_key = f"{instrument}_spectrogram" | |||
self._min_spectrogram_key = f"min_{instrument}_spectrogram" | |||
self._max_spectrogram_key = f"max_{instrument}_spectrogram" | |||
def load_waveform(self, sample): | |||
""" Load waveform for given sample. """ | |||
return dict( | |||
sample, | |||
**self._parent._audio_adapter.load_tf_waveform( | |||
sample[f"{self._instrument}_path"], | |||
offset=sample["start"], | |||
duration=self._parent._chunk_duration, | |||
sample_rate=self._parent._sample_rate, | |||
waveform_name="waveform", | |||
), | |||
) | |||
def compute_spectrogram(self, sample): | |||
""" Compute spectrogram of the given sample. """ | |||
return dict( | |||
sample, | |||
**{ | |||
self._spectrogram_key: compute_spectrogram_tf( | |||
sample["waveform"], | |||
frame_length=self._parent._frame_length, | |||
frame_step=self._parent._frame_step, | |||
spec_exponent=1.0, | |||
window_exponent=1.0, | |||
) | |||
}, | |||
) | |||
def filter_frequencies(self, sample): | |||
""" """ | |||
return dict( | |||
sample, | |||
**{ | |||
self._spectrogram_key: sample[self._spectrogram_key][ | |||
:, : self._parent._F, : | |||
] | |||
}, | |||
) | |||
def convert_to_uint(self, sample): | |||
""" Convert given sample from float to unit. """ | |||
return dict( | |||
sample, | |||
**spectrogram_to_db_uint( | |||
sample[self._spectrogram_key], | |||
tensor_key=self._spectrogram_key, | |||
min_key=self._min_spectrogram_key, | |||
max_key=self._max_spectrogram_key, | |||
), | |||
) | |||
def filter_infinity(self, sample): | |||
""" Filter infinity sample. """ | |||
return tf.logical_not(tf.math.is_inf(sample[self._min_spectrogram_key])) | |||
def convert_to_float32(self, sample): | |||
""" Convert given sample from unit to float. """ | |||
return dict( | |||
sample, | |||
**{ | |||
self._spectrogram_key: db_uint_spectrogram_to_gain( | |||
sample[self._spectrogram_key], | |||
sample[self._min_spectrogram_key], | |||
sample[self._max_spectrogram_key], | |||
) | |||
}, | |||
) | |||
def time_crop(self, sample): | |||
""" """ | |||
def start(sample): | |||
""" mid_segment_start """ | |||
return tf.cast( | |||
tf.maximum( | |||
tf.shape(sample[self._spectrogram_key])[0] / 2 | |||
- self._parent._T / 2, | |||
0, | |||
), | |||
tf.int32, | |||
) | |||
return dict( | |||
sample, | |||
**{ | |||
self._spectrogram_key: sample[self._spectrogram_key][ | |||
start(sample) : start(sample) + self._parent._T, :, : | |||
] | |||
}, | |||
) | |||
def filter_shape(self, sample): | |||
""" Filter badly shaped sample. """ | |||
return check_tensor_shape( | |||
sample[self._spectrogram_key], | |||
(self._parent._T, self._parent._F, self._parent._n_channels), | |||
) | |||
def reshape_spectrogram(self, sample): | |||
""" Reshape given sample. """ | |||
return dict( | |||
sample, | |||
**{ | |||
self._spectrogram_key: set_tensor_shape( | |||
sample[self._spectrogram_key], | |||
(self._parent._T, self._parent._F, self._parent._n_channels), | |||
) | |||
}, | |||
) | |||
class DatasetBuilder(object): | |||
""" | |||
TO BE DOCUMENTED. | |||
""" | |||
MARGIN: float = 0.5 | |||
""" Margin at beginning and end of songs in seconds. """ | |||
WAIT_PERIOD: int = 60 | |||
""" Wait period for cache (in seconds). """ | |||
def __init__( | |||
self, | |||
audio_params: Dict, | |||
audio_adapter: AudioAdapter, | |||
audio_path: str, | |||
random_seed: int = 0, | |||
chunk_duration: float = 20.0, | |||
) -> None: | |||
""" | |||
Default constructor. | |||
NOTE: Probably need for AudioAdapter. | |||
Parameters: | |||
audio_params (Dict): | |||
Audio parameters to use. | |||
audio_adapter (AudioAdapter): | |||
Audio adapter to use. | |||
audio_path (str): | |||
random_seed (int): | |||
chunk_duration (float): | |||
""" | |||
# Length of segment in frames (if fs=22050 and | |||
# frame_step=512, then T=512 corresponds to 11.89s) | |||
self._T = audio_params["T"] | |||
# Number of frequency bins to be used (should | |||
# be less than frame_length/2 + 1) | |||
self._F = audio_params["F"] | |||
self._sample_rate = audio_params["sample_rate"] | |||
self._frame_length = audio_params["frame_length"] | |||
self._frame_step = audio_params["frame_step"] | |||
self._mix_name = audio_params["mix_name"] | |||
self._n_channels = audio_params["n_channels"] | |||
self._instruments = [self._mix_name] + audio_params["instrument_list"] | |||
self._instrument_builders = None | |||
self._chunk_duration = chunk_duration | |||
self._audio_adapter = audio_adapter | |||
self._audio_params = audio_params | |||
self._audio_path = audio_path | |||
self._random_seed = random_seed | |||
self.check_parameters_compatibility() | |||
def check_parameters_compatibility(self): | |||
if self._frame_length / 2 + 1 < self._F: | |||
raise ValueError( | |||
"F is too large and must be set to at most frame_length/2+1. Decrease F or increase frame_length to fix." | |||
) | |||
if ( | |||
self._chunk_duration * self._sample_rate - self._frame_length | |||
) / self._frame_step < self._T: | |||
raise ValueError( | |||
"T is too large considering STFT parameters and chunk duratoin. Make sure spectrogram time dimension of chunks is larger than T (for instance reducing T or frame_step or increasing chunk duration)." | |||
) | |||
def expand_path(self, sample): | |||
""" Expands audio paths for the given sample. """ | |||
return dict( | |||
sample, | |||
**{ | |||
f"{instrument}_path": tf.strings.join( | |||
(self._audio_path, sample[f"{instrument}_path"]), SEPARATOR | |||
) | |||
for instrument in self._instruments | |||
}, | |||
) | |||
def filter_error(self, sample): | |||
""" Filter errored sample. """ | |||
return tf.logical_not(sample["waveform_error"]) | |||
def filter_waveform(self, sample): | |||
""" Filter waveform from sample. """ | |||
return {k: v for k, v in sample.items() if not k == "waveform"} | |||
def harmonize_spectrogram(self, sample): | |||
""" Ensure same size for vocals and mix spectrograms. """ | |||
def _reduce(sample): | |||
return tf.reduce_min( | |||
[ | |||
tf.shape(sample[f"{instrument}_spectrogram"])[0] | |||
for instrument in self._instruments | |||
] | |||
) | |||
return dict( | |||
sample, | |||
**{ | |||
f"{instrument}_spectrogram": sample[f"{instrument}_spectrogram"][ | |||
: _reduce(sample), :, : | |||
] | |||
for instrument in self._instruments | |||
}, | |||
) | |||
def filter_short_segments(self, sample): | |||
""" Filter out too short segment. """ | |||
return tf.reduce_any( | |||
[ | |||
tf.shape(sample[f"{instrument}_spectrogram"])[0] >= self._T | |||
for instrument in self._instruments | |||
] | |||
) | |||
def random_time_crop(self, sample): | |||
""" Random time crop of 11.88s. """ | |||
return dict( | |||
sample, | |||
**sync_apply( | |||
{ | |||
f"{instrument}_spectrogram": sample[f"{instrument}_spectrogram"] | |||
for instrument in self._instruments | |||
}, | |||
lambda x: tf.image.random_crop( | |||
x, | |||
(self._T, len(self._instruments) * self._F, self._n_channels), | |||
seed=self._random_seed, | |||
), | |||
), | |||
) | |||
def random_time_stretch(self, sample): | |||
""" Randomly time stretch the given sample. """ | |||
return dict( | |||
sample, | |||
**sync_apply( | |||
{ | |||
f"{instrument}_spectrogram": sample[f"{instrument}_spectrogram"] | |||
for instrument in self._instruments | |||
}, | |||
lambda x: random_time_stretch(x, factor_min=0.9, factor_max=1.1), | |||
), | |||
) | |||
def random_pitch_shift(self, sample): | |||
""" Randomly pitch shift the given sample. """ | |||
return dict( | |||
sample, | |||
**sync_apply( | |||
{ | |||
f"{instrument}_spectrogram": sample[f"{instrument}_spectrogram"] | |||
for instrument in self._instruments | |||
}, | |||
lambda x: random_pitch_shift(x, shift_min=-1.0, shift_max=1.0), | |||
concat_axis=0, | |||
), | |||
) | |||
def map_features(self, sample): | |||
""" Select features and annotation of the given sample. """ | |||
input_ = { | |||
f"{self._mix_name}_spectrogram": sample[f"{self._mix_name}_spectrogram"] | |||
} | |||
output = { | |||
f"{instrument}_spectrogram": sample[f"{instrument}_spectrogram"] | |||
for instrument in self._audio_params["instrument_list"] | |||
} | |||
return (input_, output) | |||
def compute_segments(self, dataset: Any, n_chunks_per_song: int) -> Any: | |||
""" | |||
Computes segments for each song of the dataset. | |||
Parameters: | |||
dataset (Any): | |||
Dataset to compute segments for. | |||
n_chunks_per_song (int): | |||
Number of segment per song to compute. | |||
Returns: | |||
Any: | |||
Segmented dataset. | |||
""" | |||
if n_chunks_per_song <= 0: | |||
raise ValueError("n_chunks_per_song must be positif") | |||
datasets = [] | |||
for k in range(n_chunks_per_song): | |||
if n_chunks_per_song > 1: | |||
datasets.append( | |||
dataset.map( | |||
lambda sample: dict( | |||
sample, | |||
start=tf.maximum( | |||
k | |||
* ( | |||
sample["duration"] | |||
- self._chunk_duration | |||
- 2 * self.MARGIN | |||
) | |||
/ (n_chunks_per_song - 1) | |||
+ self.MARGIN, | |||
0, | |||
), | |||
) | |||
) | |||
) | |||
elif n_chunks_per_song == 1: # Take central segment. | |||
datasets.append( | |||
dataset.map( | |||
lambda sample: dict( | |||
sample, | |||
start=tf.maximum( | |||
sample["duration"] / 2 - self._chunk_duration / 2, 0 | |||
), | |||
) | |||
) | |||
) | |||
dataset = datasets[-1] | |||
for d in datasets[:-1]: | |||
dataset = dataset.concatenate(d) | |||
return dataset | |||
@property | |||
def instruments(self) -> Any: | |||
""" | |||
Instrument dataset builder generator. | |||
Yields: | |||
Any: | |||
InstrumentBuilder instance. | |||
""" | |||
if self._instrument_builders is None: | |||
self._instrument_builders = [] | |||
for instrument in self._instruments: | |||
self._instrument_builders.append( | |||
InstrumentDatasetBuilder(self, instrument) | |||
) | |||
for builder in self._instrument_builders: | |||
yield builder | |||
def cache(self, dataset: Any, cache: str, wait: bool) -> Any: | |||
""" | |||
Cache the given dataset if cache is enabled. Eventually waits for | |||
cache to be available (useful if another process is already | |||
computing cache) if provided wait flag is `True`. | |||
Parameters: | |||
dataset (Any): | |||
Dataset to be cached if cache is required. | |||
cache (str): | |||
Path of cache directory to be used, None if no cache. | |||
wait (bool): | |||
If caching is enabled, True is cache should be waited. | |||
Returns: | |||
Any: | |||
Cached dataset if needed, original dataset otherwise. | |||
""" | |||
if cache is not None: | |||
if wait: | |||
while not exists(f"{cache}.index"): | |||
logger.info(f"Cache not available, wait {self.WAIT_PERIOD}") | |||
time.sleep(self.WAIT_PERIOD) | |||
cache_path = os.path.split(cache)[0] | |||
os.makedirs(cache_path, exist_ok=True) | |||
return dataset.cache(cache) | |||
return dataset | |||
def build( | |||
self, | |||
csv_path: str, | |||
batch_size: int = 8, | |||
shuffle: bool = True, | |||
convert_to_uint: bool = True, | |||
random_data_augmentation: bool = False, | |||
random_time_crop: bool = True, | |||
infinite_generator: bool = True, | |||
cache_directory: Optional[str] = None, | |||
wait_for_cache: bool = False, | |||
num_parallel_calls: int = 4, | |||
n_chunks_per_song: float = 2, | |||
) -> Any: | |||
""" | |||
TO BE DOCUMENTED. | |||
""" | |||
dataset = dataset_from_csv(csv_path) | |||
dataset = self.compute_segments(dataset, n_chunks_per_song) | |||
# Shuffle data | |||
if shuffle: | |||
dataset = dataset.shuffle( | |||
buffer_size=200000, | |||
seed=self._random_seed, | |||
# useless since it is cached : | |||
reshuffle_each_iteration=True, | |||
) | |||
# Expand audio path. | |||
dataset = dataset.map(self.expand_path) | |||
# Load waveform, compute spectrogram, and filtering error, | |||
# K bins frequencies, and waveform. | |||
N = num_parallel_calls | |||
for instrument in self.instruments: | |||
dataset = ( | |||
dataset.map(instrument.load_waveform, num_parallel_calls=N) | |||
.filter(self.filter_error) | |||
.map(instrument.compute_spectrogram, num_parallel_calls=N) | |||
.map(instrument.filter_frequencies) | |||
) | |||
dataset = dataset.map(self.filter_waveform) | |||
# Convert to uint before caching in order to save space. | |||
if convert_to_uint: | |||
for instrument in self.instruments: | |||
dataset = dataset.map(instrument.convert_to_uint) | |||
dataset = self.cache(dataset, cache_directory, wait_for_cache) | |||
# Check for INFINITY (should not happen) | |||
for instrument in self.instruments: | |||
dataset = dataset.filter(instrument.filter_infinity) | |||
# Repeat indefinitly | |||
if infinite_generator: | |||
dataset = dataset.repeat(count=-1) | |||
# Ensure same size for vocals and mix spectrograms. | |||
# NOTE: could be done before caching ? | |||
dataset = dataset.map(self.harmonize_spectrogram) | |||
# Filter out too short segment. | |||
# NOTE: could be done before caching ? | |||
dataset = dataset.filter(self.filter_short_segments) | |||
# Random time crop of 11.88s | |||
if random_time_crop: | |||
dataset = dataset.map(self.random_time_crop, num_parallel_calls=N) | |||
else: | |||
# frame_duration = 11.88/T | |||
# take central segment (for validation) | |||
for instrument in self.instruments: | |||
dataset = dataset.map(instrument.time_crop) | |||
# Post cache shuffling. Done where the data are the lightest: | |||
# after croping but before converting back to float. | |||
if shuffle: | |||
dataset = dataset.shuffle( | |||
buffer_size=256, seed=self._random_seed, reshuffle_each_iteration=True | |||
) | |||
# Convert back to float32 | |||
if convert_to_uint: | |||
for instrument in self.instruments: | |||
dataset = dataset.map( | |||
instrument.convert_to_float32, num_parallel_calls=N | |||
) | |||
M = 8 # Parallel call post caching. | |||
# Must be applied with the same factor on mix and vocals. | |||
if random_data_augmentation: | |||
dataset = dataset.map(self.random_time_stretch, num_parallel_calls=M).map( | |||
self.random_pitch_shift, num_parallel_calls=M | |||
) | |||
# Filter by shape (remove badly shaped tensors). | |||
for instrument in self.instruments: | |||
dataset = dataset.filter(instrument.filter_shape).map( | |||
instrument.reshape_spectrogram | |||
) | |||
# Select features and annotation. | |||
dataset = dataset.map(self.map_features) | |||
# Make batch (done after selection to avoid | |||
# error due to unprocessed instrument spectrogram batching). | |||
dataset = dataset.batch(batch_size) | |||
return dataset |
@ -0,0 +1,573 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" This package provide an estimator builder as well as model functions. """ | |||
import importlib | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
from tensorflow.signal import hann_window, inverse_stft, stft | |||
from ..utils.tensor import pad_and_partition, pad_and_reshape | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
placeholder = tf.compat.v1.placeholder | |||
def get_model_function(model_type): | |||
""" | |||
Get tensorflow function of the model to be applied to the input tensor. | |||
For instance "unet.softmax_unet" will return the softmax_unet function | |||
in the "unet.py" submodule of the current module (spleeter.model). | |||
Params: | |||
- model_type: str | |||
the relative module path to the model function. | |||
Returns: | |||
A tensorflow function to be applied to the input tensor to get the | |||
multitrack output. | |||
""" | |||
relative_path_to_module = ".".join(model_type.split(".")[:-1]) | |||
model_name = model_type.split(".")[-1] | |||
main_module = ".".join((__name__, "functions")) | |||
path_to_module = f"{main_module}.{relative_path_to_module}" | |||
module = importlib.import_module(path_to_module) | |||
model_function = getattr(module, model_name) | |||
return model_function | |||
class InputProvider(object): | |||
def __init__(self, params): | |||
self.params = params | |||
def get_input_dict_placeholders(self): | |||
raise NotImplementedError() | |||
@property | |||
def input_names(self): | |||
raise NotImplementedError() | |||
def get_feed_dict(self, features, *args): | |||
raise NotImplementedError() | |||
class WaveformInputProvider(InputProvider): | |||
@property | |||
def input_names(self): | |||
return ["audio_id", "waveform"] | |||
def get_input_dict_placeholders(self): | |||
shape = (None, self.params["n_channels"]) | |||
features = { | |||
"waveform": placeholder(tf.float32, shape=shape, name="waveform"), | |||
"audio_id": placeholder(tf.string, name="audio_id"), | |||
} | |||
return features | |||
def get_feed_dict(self, features, waveform, audio_id): | |||
return {features["audio_id"]: audio_id, features["waveform"]: waveform} | |||
class SpectralInputProvider(InputProvider): | |||
def __init__(self, params): | |||
super().__init__(params) | |||
self.stft_input_name = "{}_stft".format(self.params["mix_name"]) | |||
@property | |||
def input_names(self): | |||
return ["audio_id", self.stft_input_name] | |||
def get_input_dict_placeholders(self): | |||
features = { | |||
self.stft_input_name: placeholder( | |||
tf.complex64, | |||
shape=( | |||
None, | |||
self.params["frame_length"] // 2 + 1, | |||
self.params["n_channels"], | |||
), | |||
name=self.stft_input_name, | |||
), | |||
"audio_id": placeholder(tf.string, name="audio_id"), | |||
} | |||
return features | |||
def get_feed_dict(self, features, stft, audio_id): | |||
return {features["audio_id"]: audio_id, features[self.stft_input_name]: stft} | |||
class InputProviderFactory(object): | |||
@staticmethod | |||
def get(params): | |||
stft_backend = params["stft_backend"] | |||
assert stft_backend in ( | |||
"tensorflow", | |||
"librosa", | |||
), "Unexpected backend {}".format(stft_backend) | |||
if stft_backend == "tensorflow": | |||
return WaveformInputProvider(params) | |||
else: | |||
return SpectralInputProvider(params) | |||
class EstimatorSpecBuilder(object): | |||
"""A builder class that allows to builds a multitrack unet model | |||
estimator. The built model estimator has a different behaviour when | |||
used in a train/eval mode and in predict mode. | |||
* In train/eval mode: it takes as input and outputs magnitude spectrogram | |||
* In predict mode: it takes as input and outputs waveform. The whole | |||
separation process is then done in this function | |||
for performance reason: it makes it possible to run | |||
the whole spearation process (including STFT and | |||
inverse STFT) on GPU. | |||
:Example: | |||
>>> from spleeter.model import EstimatorSpecBuilder | |||
>>> builder = EstimatorSpecBuilder() | |||
>>> builder.build_predict_model() | |||
>>> builder.build_evaluation_model() | |||
>>> builder.build_train_model() | |||
>>> from spleeter.model import model_fn | |||
>>> estimator = tf.estimator.Estimator(model_fn=model_fn, ...) | |||
""" | |||
# Supported model functions. | |||
DEFAULT_MODEL = "unet.unet" | |||
# Supported loss functions. | |||
L1_MASK = "L1_mask" | |||
WEIGHTED_L1_MASK = "weighted_L1_mask" | |||
# Supported optimizers. | |||
ADADELTA = "Adadelta" | |||
SGD = "SGD" | |||
# Math constants. | |||
WINDOW_COMPENSATION_FACTOR = 2.0 / 3.0 | |||
EPSILON = 1e-10 | |||
def __init__(self, features, params): | |||
"""Default constructor. Depending on built model | |||
usage, the provided features should be different: | |||
* In train/eval mode: features is a dictionary with a | |||
"mix_spectrogram" key, associated to the | |||
mix magnitude spectrogram. | |||
* In predict mode: features is a dictionary with a "waveform" | |||
key, associated to the waveform of the sound | |||
to be separated. | |||
:param features: The input features for the estimator. | |||
:param params: Some hyperparameters as a dictionary. | |||
""" | |||
self._features = features | |||
self._params = params | |||
# Get instrument name. | |||
self._mix_name = params["mix_name"] | |||
self._instruments = params["instrument_list"] | |||
# Get STFT/signals parameters | |||
self._n_channels = params["n_channels"] | |||
self._T = params["T"] | |||
self._F = params["F"] | |||
self._frame_length = params["frame_length"] | |||
self._frame_step = params["frame_step"] | |||
def include_stft_computations(self): | |||
return self._params["stft_backend"] == "tensorflow" | |||
def _build_model_outputs(self): | |||
"""Created a batch_sizexTxFxn_channels input tensor containing | |||
mix magnitude spectrogram, then an output dict from it according | |||
to the selected model in internal parameters. | |||
:returns: Build output dict. | |||
:raise ValueError: If required model_type is not supported. | |||
""" | |||
input_tensor = self.spectrogram_feature | |||
model = self._params.get("model", None) | |||
if model is not None: | |||
model_type = model.get("type", self.DEFAULT_MODEL) | |||
else: | |||
model_type = self.DEFAULT_MODEL | |||
try: | |||
apply_model = get_model_function(model_type) | |||
except ModuleNotFoundError: | |||
raise ValueError(f"No model function {model_type} found") | |||
self._model_outputs = apply_model( | |||
input_tensor, self._instruments, self._params["model"]["params"] | |||
) | |||
def _build_loss(self, labels): | |||
"""Construct tensorflow loss and metrics | |||
:param output_dict: dictionary of network outputs (key: instrument | |||
name, value: estimated spectrogram of the instrument) | |||
:param labels: dictionary of target outputs (key: instrument | |||
name, value: ground truth spectrogram of the instrument) | |||
:returns: tensorflow (loss, metrics) tuple. | |||
""" | |||
output_dict = self.model_outputs | |||
loss_type = self._params.get("loss_type", self.L1_MASK) | |||
if loss_type == self.L1_MASK: | |||
losses = { | |||
name: tf.reduce_mean(tf.abs(output - labels[name])) | |||
for name, output in output_dict.items() | |||
} | |||
elif loss_type == self.WEIGHTED_L1_MASK: | |||
losses = { | |||
name: tf.reduce_mean( | |||
tf.reduce_mean(labels[name], axis=[1, 2, 3], keep_dims=True) | |||
* tf.abs(output - labels[name]) | |||
) | |||
for name, output in output_dict.items() | |||
} | |||
else: | |||
raise ValueError(f"Unkwnown loss type: {loss_type}") | |||
loss = tf.reduce_sum(list(losses.values())) | |||
# Add metrics for monitoring each instrument. | |||
metrics = {k: tf.compat.v1.metrics.mean(v) for k, v in losses.items()} | |||
metrics["absolute_difference"] = tf.compat.v1.metrics.mean(loss) | |||
return loss, metrics | |||
def _build_optimizer(self): | |||
"""Builds an optimizer instance from internal parameter values. | |||
Default to AdamOptimizer if not specified. | |||
:returns: Optimizer instance from internal configuration. | |||
""" | |||
name = self._params.get("optimizer") | |||
if name == self.ADADELTA: | |||
return tf.compat.v1.train.AdadeltaOptimizer() | |||
rate = self._params["learning_rate"] | |||
if name == self.SGD: | |||
return tf.compat.v1.train.GradientDescentOptimizer(rate) | |||
return tf.compat.v1.train.AdamOptimizer(rate) | |||
@property | |||
def instruments(self): | |||
return self._instruments | |||
@property | |||
def stft_name(self): | |||
return f"{self._mix_name}_stft" | |||
@property | |||
def spectrogram_name(self): | |||
return f"{self._mix_name}_spectrogram" | |||
def _build_stft_feature(self): | |||
"""Compute STFT of waveform and slice the STFT in segment | |||
with the right length to feed the network. | |||
""" | |||
stft_name = self.stft_name | |||
spec_name = self.spectrogram_name | |||
if stft_name not in self._features: | |||
# pad input with a frame of zeros | |||
waveform = tf.concat( | |||
[ | |||
tf.zeros((self._frame_length, self._n_channels)), | |||
self._features["waveform"], | |||
], | |||
0, | |||
) | |||
stft_feature = tf.transpose( | |||
stft( | |||
tf.transpose(waveform), | |||
self._frame_length, | |||
self._frame_step, | |||
window_fn=lambda frame_length, dtype: ( | |||
hann_window(frame_length, periodic=True, dtype=dtype) | |||
), | |||
pad_end=True, | |||
), | |||
perm=[1, 2, 0], | |||
) | |||
self._features[f"{self._mix_name}_stft"] = stft_feature | |||
if spec_name not in self._features: | |||
self._features[spec_name] = tf.abs( | |||
pad_and_partition(self._features[stft_name], self._T) | |||
)[:, :, : self._F, :] | |||
@property | |||
def model_outputs(self): | |||
if not hasattr(self, "_model_outputs"): | |||
self._build_model_outputs() | |||
return self._model_outputs | |||
@property | |||
def outputs(self): | |||
if not hasattr(self, "_outputs"): | |||
self._build_outputs() | |||
return self._outputs | |||
@property | |||
def stft_feature(self): | |||
if self.stft_name not in self._features: | |||
self._build_stft_feature() | |||
return self._features[self.stft_name] | |||
@property | |||
def spectrogram_feature(self): | |||
if self.spectrogram_name not in self._features: | |||
self._build_stft_feature() | |||
return self._features[self.spectrogram_name] | |||
@property | |||
def masks(self): | |||
if not hasattr(self, "_masks"): | |||
self._build_masks() | |||
return self._masks | |||
@property | |||
def masked_stfts(self): | |||
if not hasattr(self, "_masked_stfts"): | |||
self._build_masked_stfts() | |||
return self._masked_stfts | |||
def _inverse_stft(self, stft_t, time_crop=None): | |||
"""Inverse and reshape the given STFT | |||
:param stft_t: input STFT | |||
:returns: inverse STFT (waveform) | |||
""" | |||
inversed = ( | |||
inverse_stft( | |||
tf.transpose(stft_t, perm=[2, 0, 1]), | |||
self._frame_length, | |||
self._frame_step, | |||
window_fn=lambda frame_length, dtype: ( | |||
hann_window(frame_length, periodic=True, dtype=dtype) | |||
), | |||
) | |||
* self.WINDOW_COMPENSATION_FACTOR | |||
) | |||
reshaped = tf.transpose(inversed) | |||
if time_crop is None: | |||
time_crop = tf.shape(self._features["waveform"])[0] | |||
return reshaped[self._frame_length : self._frame_length + time_crop, :] | |||
def _build_mwf_output_waveform(self): | |||
"""Perform separation with multichannel Wiener Filtering using Norbert. | |||
Note: multichannel Wiener Filtering is not coded in Tensorflow and thus | |||
may be quite slow. | |||
:returns: dictionary of separated waveforms (key: instrument name, | |||
value: estimated waveform of the instrument) | |||
""" | |||
import norbert # pylint: disable=import-error | |||
output_dict = self.model_outputs | |||
x = self.stft_feature | |||
v = tf.stack( | |||
[ | |||
pad_and_reshape( | |||
output_dict[f"{instrument}_spectrogram"], | |||
self._frame_length, | |||
self._F, | |||
)[: tf.shape(x)[0], ...] | |||
for instrument in self._instruments | |||
], | |||
axis=3, | |||
) | |||
input_args = [v, x] | |||
stft_function = ( | |||
tf.py_function( | |||
lambda v, x: norbert.wiener(v.numpy(), x.numpy()), | |||
input_args, | |||
tf.complex64, | |||
), | |||
) | |||
return { | |||
instrument: self._inverse_stft(stft_function[0][:, :, :, k]) | |||
for k, instrument in enumerate(self._instruments) | |||
} | |||
def _extend_mask(self, mask): | |||
"""Extend mask, from reduced number of frequency bin to the number of | |||
frequency bin in the STFT. | |||
:param mask: restricted mask | |||
:returns: extended mask | |||
:raise ValueError: If invalid mask_extension parameter is set. | |||
""" | |||
extension = self._params["mask_extension"] | |||
# Extend with average | |||
# (dispatch according to energy in the processed band) | |||
if extension == "average": | |||
extension_row = tf.reduce_mean(mask, axis=2, keepdims=True) | |||
# Extend with 0 | |||
# (avoid extension artifacts but not conservative separation) | |||
elif extension == "zeros": | |||
mask_shape = tf.shape(mask) | |||
extension_row = tf.zeros((mask_shape[0], mask_shape[1], 1, mask_shape[-1])) | |||
else: | |||
raise ValueError(f"Invalid mask_extension parameter {extension}") | |||
n_extra_row = self._frame_length // 2 + 1 - self._F | |||
extension = tf.tile(extension_row, [1, 1, n_extra_row, 1]) | |||
return tf.concat([mask, extension], axis=2) | |||
def _build_masks(self): | |||
""" | |||
Compute masks from the output spectrograms of the model. | |||
:return: | |||
""" | |||
output_dict = self.model_outputs | |||
stft_feature = self.stft_feature | |||
separation_exponent = self._params["separation_exponent"] | |||
output_sum = ( | |||
tf.reduce_sum( | |||
[e ** separation_exponent for e in output_dict.values()], axis=0 | |||
) | |||
+ self.EPSILON | |||
) | |||
out = {} | |||
for instrument in self._instruments: | |||
output = output_dict[f"{instrument}_spectrogram"] | |||
# Compute mask with the model. | |||
instrument_mask = ( | |||
output ** separation_exponent + (self.EPSILON / len(output_dict)) | |||
) / output_sum | |||
# Extend mask; | |||
instrument_mask = self._extend_mask(instrument_mask) | |||
# Stack back mask. | |||
old_shape = tf.shape(instrument_mask) | |||
new_shape = tf.concat( | |||
[[old_shape[0] * old_shape[1]], old_shape[2:]], axis=0 | |||
) | |||
instrument_mask = tf.reshape(instrument_mask, new_shape) | |||
# Remove padded part (for mask having the same size as STFT); | |||
instrument_mask = instrument_mask[: tf.shape(stft_feature)[0], ...] | |||
out[instrument] = instrument_mask | |||
self._masks = out | |||
def _build_masked_stfts(self): | |||
input_stft = self.stft_feature | |||
out = {} | |||
for instrument, mask in self.masks.items(): | |||
out[instrument] = tf.cast(mask, dtype=tf.complex64) * input_stft | |||
self._masked_stfts = out | |||
def _build_manual_output_waveform(self, masked_stft): | |||
"""Perform ratio mask separation | |||
:param output_dict: dictionary of estimated spectrogram (key: instrument | |||
name, value: estimated spectrogram of the instrument) | |||
:returns: dictionary of separated waveforms (key: instrument name, | |||
value: estimated waveform of the instrument) | |||
""" | |||
output_waveform = {} | |||
for instrument, stft_data in masked_stft.items(): | |||
output_waveform[instrument] = self._inverse_stft(stft_data) | |||
return output_waveform | |||
def _build_output_waveform(self, masked_stft): | |||
"""Build output waveform from given output dict in order to be used in | |||
prediction context. Regarding of the configuration building method will | |||
be using MWF. | |||
:returns: Built output waveform. | |||
""" | |||
if self._params.get("MWF", False): | |||
output_waveform = self._build_mwf_output_waveform() | |||
else: | |||
output_waveform = self._build_manual_output_waveform(masked_stft) | |||
return output_waveform | |||
def _build_outputs(self): | |||
if self.include_stft_computations(): | |||
self._outputs = self._build_output_waveform(self.masked_stfts) | |||
else: | |||
self._outputs = self.masked_stfts | |||
if "audio_id" in self._features: | |||
self._outputs["audio_id"] = self._features["audio_id"] | |||
def build_predict_model(self): | |||
"""Builder interface for creating model instance that aims to perform | |||
prediction / inference over given track. The output of such estimator | |||
will be a dictionary with a "<instrument>" key per separated instrument | |||
, associated to the estimated separated waveform of the instrument. | |||
:returns: An estimator for performing prediction. | |||
""" | |||
return tf.estimator.EstimatorSpec( | |||
tf.estimator.ModeKeys.PREDICT, predictions=self.outputs | |||
) | |||
def build_evaluation_model(self, labels): | |||
"""Builder interface for creating model instance that aims to perform | |||
model evaluation. The output of such estimator will be a dictionary | |||
with a key "<instrument>_spectrogram" per separated instrument, | |||
associated to the estimated separated instrument magnitude spectrogram. | |||
:param labels: Model labels. | |||
:returns: An estimator for performing model evaluation. | |||
""" | |||
loss, metrics = self._build_loss(labels) | |||
return tf.estimator.EstimatorSpec( | |||
tf.estimator.ModeKeys.EVAL, loss=loss, eval_metric_ops=metrics | |||
) | |||
def build_train_model(self, labels): | |||
"""Builder interface for creating model instance that aims to perform | |||
model training. The output of such estimator will be a dictionary | |||
with a key "<instrument>_spectrogram" per separated instrument, | |||
associated to the estimated separated instrument magnitude spectrogram. | |||
:param labels: Model labels. | |||
:returns: An estimator for performing model training. | |||
""" | |||
loss, metrics = self._build_loss(labels) | |||
optimizer = self._build_optimizer() | |||
train_operation = optimizer.minimize( | |||
loss=loss, global_step=tf.compat.v1.train.get_global_step() | |||
) | |||
return tf.estimator.EstimatorSpec( | |||
mode=tf.estimator.ModeKeys.TRAIN, | |||
loss=loss, | |||
train_op=train_operation, | |||
eval_metric_ops=metrics, | |||
) | |||
def model_fn(features, labels, mode, params, config): | |||
""" | |||
:param features: | |||
:param labels: | |||
:param mode: Estimator mode. | |||
:param params: | |||
:param config: TF configuration (not used). | |||
:returns: Built EstimatorSpec. | |||
:raise ValueError: If estimator mode is not supported. | |||
""" | |||
builder = EstimatorSpecBuilder(features, params) | |||
if mode == tf.estimator.ModeKeys.PREDICT: | |||
return builder.build_predict_model() | |||
elif mode == tf.estimator.ModeKeys.EVAL: | |||
return builder.build_evaluation_model(labels) | |||
elif mode == tf.estimator.ModeKeys.TRAIN: | |||
return builder.build_train_model(labels) | |||
raise ValueError(f"Unknown mode {mode}") |
@ -0,0 +1,47 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" This package provide model functions. """ | |||
from typing import Callable, Dict, Iterable, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def apply( | |||
function: Callable, | |||
input_tensor: tf.Tensor, | |||
instruments: Iterable[str], | |||
params: Optional[Dict] = None, | |||
) -> Dict: | |||
""" | |||
Apply given function to the input tensor. | |||
Parameters: | |||
function: | |||
Function to be applied to tensor. | |||
input_tensor (tensorflow.Tensor): | |||
Tensor to apply blstm to. | |||
instruments (Iterable[str]): | |||
Iterable that provides a collection of instruments. | |||
params: | |||
(Optional) dict of BLSTM parameters. | |||
Returns: | |||
Created output tensor dict. | |||
""" | |||
output_dict: Dict = {} | |||
for instrument in instruments: | |||
out_name = f"{instrument}_spectrogram" | |||
output_dict[out_name] = function( | |||
input_tensor, output_name=out_name, params=params or {} | |||
) | |||
return output_dict |
@ -0,0 +1,98 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
This system (UHL1) uses a bi-directional LSTM network as described in : | |||
`S. Uhlich, M. Porcu, F. Giron, M. Enenkl, T. Kemp, N. Takahashi and | |||
Y. Mitsufuji. | |||
"Improving music source separation based on deep neural networks through | |||
data augmentation and network blending", Proc. ICASSP, 2017.` | |||
It has three BLSTM layers, each having 500 cells. For each instrument, | |||
a network is trained which predicts the target instrument amplitude from | |||
the mixture amplitude in the STFT domain (frame size: 4096, hop size: | |||
1024). The raw output of each network is then combined by a multichannel | |||
Wiener filter. The network is trained on musdb where we split train into | |||
train_train and train_valid with 86 and 14 songs, respectively. The | |||
validation set is used to perform early stopping and hyperparameter | |||
selection (LSTM layer dropout rate, regularization strength). | |||
""" | |||
from typing import Dict, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
from tensorflow.compat.v1.keras.initializers import he_uniform | |||
from tensorflow.compat.v1.keras.layers import CuDNNLSTM | |||
from tensorflow.keras.layers import ( | |||
Bidirectional, | |||
Dense, | |||
Flatten, | |||
Reshape, | |||
TimeDistributed, | |||
) | |||
from . import apply | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def apply_blstm( | |||
input_tensor: tf.Tensor, output_name: str = "output", params: Optional[Dict] = None | |||
) -> tf.Tensor: | |||
""" | |||
Apply BLSTM to the given input_tensor. | |||
Parameters: | |||
input_tensor (tensorflow.Tensor): | |||
Input of the model. | |||
output_name (str): | |||
(Optional) name of the output, default to 'output'. | |||
params (Optional[Dict]): | |||
(Optional) dict of BLSTM parameters. | |||
Returns: | |||
tensorflow.Tensor: | |||
Output tensor. | |||
""" | |||
if params is None: | |||
params = {} | |||
units: int = params.get("lstm_units", 250) | |||
kernel_initializer = he_uniform(seed=50) | |||
flatten_input = TimeDistributed(Flatten())((input_tensor)) | |||
def create_bidirectional(): | |||
return Bidirectional( | |||
CuDNNLSTM( | |||
units, kernel_initializer=kernel_initializer, return_sequences=True | |||
) | |||
) | |||
l1 = create_bidirectional()((flatten_input)) | |||
l2 = create_bidirectional()((l1)) | |||
l3 = create_bidirectional()((l2)) | |||
dense = TimeDistributed( | |||
Dense( | |||
int(flatten_input.shape[2]), | |||
activation="relu", | |||
kernel_initializer=kernel_initializer, | |||
) | |||
)((l3)) | |||
output: tf.Tensor = TimeDistributed( | |||
Reshape(input_tensor.shape[2:]), name=output_name | |||
)(dense) | |||
return output | |||
def blstm( | |||
input_tensor: tf.Tensor, output_name: str = "output", params: Optional[Dict] = None | |||
) -> tf.Tensor: | |||
""" Model function applier. """ | |||
return apply(apply_blstm, input_tensor, output_name, params) |
@ -0,0 +1,234 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
This module contains building functions for U-net source | |||
separation models in a similar way as in A. Jansson et al. : | |||
"Singing voice separation with deep u-net convolutional networks", | |||
ISMIR 2017 | |||
Each instrument is modeled by a single U-net | |||
convolutional / deconvolutional network that take a mix spectrogram | |||
as input and the estimated sound spectrogram as output. | |||
""" | |||
from functools import partial | |||
from typing import Any, Dict, Iterable, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
from tensorflow.compat.v1 import logging | |||
from tensorflow.compat.v1.keras.initializers import he_uniform | |||
from tensorflow.keras.layers import ( | |||
ELU, | |||
BatchNormalization, | |||
Concatenate, | |||
Conv2D, | |||
Conv2DTranspose, | |||
Dropout, | |||
LeakyReLU, | |||
Multiply, | |||
ReLU, | |||
Softmax, | |||
) | |||
from . import apply | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def _get_conv_activation_layer(params: Dict) -> Any: | |||
""" | |||
> To be documented. | |||
Parameters: | |||
params (Dict): | |||
Returns: | |||
Any: | |||
Required Activation function. | |||
""" | |||
conv_activation: str = params.get("conv_activation") | |||
if conv_activation == "ReLU": | |||
return ReLU() | |||
elif conv_activation == "ELU": | |||
return ELU() | |||
return LeakyReLU(0.2) | |||
def _get_deconv_activation_layer(params: Dict) -> Any: | |||
""" | |||
> To be documented. | |||
Parameters: | |||
params (Dict): | |||
Returns: | |||
Any: | |||
Required Activation function. | |||
""" | |||
deconv_activation: str = params.get("deconv_activation") | |||
if deconv_activation == "LeakyReLU": | |||
return LeakyReLU(0.2) | |||
elif deconv_activation == "ELU": | |||
return ELU() | |||
return ReLU() | |||
def apply_unet( | |||
input_tensor: tf.Tensor, | |||
output_name: str = "output", | |||
params: Optional[Dict] = None, | |||
output_mask_logit: bool = False, | |||
) -> Any: | |||
""" | |||
Apply a convolutionnal U-net to model a single instrument (one U-net | |||
is used for each instrument). | |||
Parameters: | |||
input_tensor (tensorflow.Tensor): | |||
output_name (str): | |||
params (Optional[Dict]): | |||
output_mask_logit (bool): | |||
""" | |||
logging.info(f"Apply unet for {output_name}") | |||
conv_n_filters = params.get("conv_n_filters", [16, 32, 64, 128, 256, 512]) | |||
conv_activation_layer = _get_conv_activation_layer(params) | |||
deconv_activation_layer = _get_deconv_activation_layer(params) | |||
kernel_initializer = he_uniform(seed=50) | |||
conv2d_factory = partial( | |||
Conv2D, strides=(2, 2), padding="same", kernel_initializer=kernel_initializer | |||
) | |||
# First layer. | |||
conv1 = conv2d_factory(conv_n_filters[0], (5, 5))(input_tensor) | |||
batch1 = BatchNormalization(axis=-1)(conv1) | |||
rel1 = conv_activation_layer(batch1) | |||
# Second layer. | |||
conv2 = conv2d_factory(conv_n_filters[1], (5, 5))(rel1) | |||
batch2 = BatchNormalization(axis=-1)(conv2) | |||
rel2 = conv_activation_layer(batch2) | |||
# Third layer. | |||
conv3 = conv2d_factory(conv_n_filters[2], (5, 5))(rel2) | |||
batch3 = BatchNormalization(axis=-1)(conv3) | |||
rel3 = conv_activation_layer(batch3) | |||
# Fourth layer. | |||
conv4 = conv2d_factory(conv_n_filters[3], (5, 5))(rel3) | |||
batch4 = BatchNormalization(axis=-1)(conv4) | |||
rel4 = conv_activation_layer(batch4) | |||
# Fifth layer. | |||
conv5 = conv2d_factory(conv_n_filters[4], (5, 5))(rel4) | |||
batch5 = BatchNormalization(axis=-1)(conv5) | |||
rel5 = conv_activation_layer(batch5) | |||
# Sixth layer | |||
conv6 = conv2d_factory(conv_n_filters[5], (5, 5))(rel5) | |||
batch6 = BatchNormalization(axis=-1)(conv6) | |||
_ = conv_activation_layer(batch6) | |||
# | |||
# | |||
conv2d_transpose_factory = partial( | |||
Conv2DTranspose, | |||
strides=(2, 2), | |||
padding="same", | |||
kernel_initializer=kernel_initializer, | |||
) | |||
# | |||
up1 = conv2d_transpose_factory(conv_n_filters[4], (5, 5))((conv6)) | |||
up1 = deconv_activation_layer(up1) | |||
batch7 = BatchNormalization(axis=-1)(up1) | |||
drop1 = Dropout(0.5)(batch7) | |||
merge1 = Concatenate(axis=-1)([conv5, drop1]) | |||
# | |||
up2 = conv2d_transpose_factory(conv_n_filters[3], (5, 5))((merge1)) | |||
up2 = deconv_activation_layer(up2) | |||
batch8 = BatchNormalization(axis=-1)(up2) | |||
drop2 = Dropout(0.5)(batch8) | |||
merge2 = Concatenate(axis=-1)([conv4, drop2]) | |||
# | |||
up3 = conv2d_transpose_factory(conv_n_filters[2], (5, 5))((merge2)) | |||
up3 = deconv_activation_layer(up3) | |||
batch9 = BatchNormalization(axis=-1)(up3) | |||
drop3 = Dropout(0.5)(batch9) | |||
merge3 = Concatenate(axis=-1)([conv3, drop3]) | |||
# | |||
up4 = conv2d_transpose_factory(conv_n_filters[1], (5, 5))((merge3)) | |||
up4 = deconv_activation_layer(up4) | |||
batch10 = BatchNormalization(axis=-1)(up4) | |||
merge4 = Concatenate(axis=-1)([conv2, batch10]) | |||
# | |||
up5 = conv2d_transpose_factory(conv_n_filters[0], (5, 5))((merge4)) | |||
up5 = deconv_activation_layer(up5) | |||
batch11 = BatchNormalization(axis=-1)(up5) | |||
merge5 = Concatenate(axis=-1)([conv1, batch11]) | |||
# | |||
up6 = conv2d_transpose_factory(1, (5, 5), strides=(2, 2))((merge5)) | |||
up6 = deconv_activation_layer(up6) | |||
batch12 = BatchNormalization(axis=-1)(up6) | |||
# Last layer to ensure initial shape reconstruction. | |||
if not output_mask_logit: | |||
up7 = Conv2D( | |||
2, | |||
(4, 4), | |||
dilation_rate=(2, 2), | |||
activation="sigmoid", | |||
padding="same", | |||
kernel_initializer=kernel_initializer, | |||
)((batch12)) | |||
output = Multiply(name=output_name)([up7, input_tensor]) | |||
return output | |||
return Conv2D( | |||
2, | |||
(4, 4), | |||
dilation_rate=(2, 2), | |||
padding="same", | |||
kernel_initializer=kernel_initializer, | |||
)((batch12)) | |||
def unet( | |||
input_tensor: tf.Tensor, instruments: Iterable[str], params: Optional[Dict] = None | |||
) -> Dict: | |||
""" Model function applier. """ | |||
return apply(apply_unet, input_tensor, instruments, params) | |||
def softmax_unet( | |||
input_tensor: tf.Tensor, instruments: Iterable[str], params: Optional[Dict] = None | |||
) -> Dict: | |||
""" | |||
Apply softmax to multitrack unet in order to have mask suming to one. | |||
Parameters: | |||
input_tensor (tensorflow.Tensor): | |||
Tensor to apply blstm to. | |||
instruments (Iterable[str]): | |||
Iterable that provides a collection of instruments. | |||
params (Optional[Dict]): | |||
(Optional) dict of BLSTM parameters. | |||
Returns: | |||
Dict: | |||
Created output tensor dict. | |||
""" | |||
logit_mask_list = [] | |||
for instrument in instruments: | |||
out_name = f"{instrument}_spectrogram" | |||
logit_mask_list.append( | |||
apply_unet( | |||
input_tensor, | |||
output_name=out_name, | |||
params=params, | |||
output_mask_logit=True, | |||
) | |||
) | |||
masks = Softmax(axis=4)(tf.stack(logit_mask_list, axis=4)) | |||
output_dict = {} | |||
for i, instrument in enumerate(instruments): | |||
out_name = f"{instrument}_spectrogram" | |||
output_dict[out_name] = Multiply(name=out_name)([masks[..., i], input_tensor]) | |||
return output_dict |
@ -0,0 +1,95 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
This package provides tools for downloading model from network | |||
using remote storage abstraction. | |||
Examples: | |||
```python | |||
>>> provider = MyProviderImplementation() | |||
>>> provider.get('/path/to/local/storage', params) | |||
``` | |||
""" | |||
from abc import ABC, abstractmethod | |||
from os import environ, makedirs | |||
from os.path import exists, isabs, join, sep | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class ModelProvider(ABC): | |||
""" | |||
A ModelProvider manages model files on disk and | |||
file download is not available. | |||
""" | |||
DEFAULT_MODEL_PATH: str = environ.get("MODEL_PATH", "pretrained_models") | |||
MODEL_PROBE_PATH: str = ".probe" | |||
@abstractmethod | |||
def download(_, name: str, path: str) -> None: | |||
""" | |||
Download model denoted by the given name to disk. | |||
Parameters: | |||
name (str): | |||
Name of the model to download. | |||
path (str): | |||
Path of the directory to save model into. | |||
""" | |||
pass | |||
@staticmethod | |||
def writeProbe(directory: str) -> None: | |||
""" | |||
Write a model probe file into the given directory. | |||
Parameters: | |||
directory (str): | |||
Directory to write probe into. | |||
""" | |||
probe: str = join(directory, ModelProvider.MODEL_PROBE_PATH) | |||
with open(probe, "w") as stream: | |||
stream.write("OK") | |||
def get(self, model_directory: str) -> str: | |||
""" | |||
Ensures required model is available at given location. | |||
Parameters: | |||
model_directory (str): | |||
Expected model_directory to be available. | |||
Raises: | |||
IOError: | |||
If model can not be retrieved. | |||
""" | |||
# Expend model directory if needed. | |||
if not isabs(model_directory): | |||
model_directory = join(self.DEFAULT_MODEL_PATH, model_directory) | |||
# Download it if not exists. | |||
model_probe: str = join(model_directory, self.MODEL_PROBE_PATH) | |||
if not exists(model_probe): | |||
if not exists(model_directory): | |||
makedirs(model_directory) | |||
self.download(model_directory.split(sep)[-1], model_directory) | |||
self.writeProbe(model_directory) | |||
return model_directory | |||
@classmethod | |||
def default(_: type) -> "ModelProvider": | |||
""" | |||
Builds and returns a default model provider. | |||
Returns: | |||
ModelProvider: | |||
A default model provider instance to use. | |||
""" | |||
from .github import GithubModelProvider | |||
return GithubModelProvider.from_environ() |
@ -0,0 +1,156 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
A ModelProvider backed by Github Release feature. | |||
Examples: | |||
```python | |||
>>> from spleeter.model.provider import github | |||
>>> provider = github.GithubModelProvider( | |||
'github.com', | |||
'Deezer/spleeter', | |||
'latest') | |||
>>> provider.download('2stems', '/path/to/local/storage') | |||
``` | |||
""" | |||
import hashlib | |||
import os | |||
import tarfile | |||
from os import environ | |||
from tempfile import NamedTemporaryFile | |||
from typing import Dict | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import httpx | |||
from ...utils.logging import logger | |||
from . import ModelProvider | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def compute_file_checksum(path): | |||
"""Computes given path file sha256. | |||
:param path: Path of the file to compute checksum for. | |||
:returns: File checksum. | |||
""" | |||
sha256 = hashlib.sha256() | |||
with open(path, "rb") as stream: | |||
for chunk in iter(lambda: stream.read(4096), b""): | |||
sha256.update(chunk) | |||
return sha256.hexdigest() | |||
class GithubModelProvider(ModelProvider): | |||
""" A ModelProvider implementation backed on Github for remote storage. """ | |||
DEFAULT_HOST: str = "https://github.com" | |||
DEFAULT_REPOSITORY: str = "deezer/spleeter" | |||
CHECKSUM_INDEX: str = "checksum.json" | |||
LATEST_RELEASE: str = "v1.4.0" | |||
RELEASE_PATH: str = "releases/download" | |||
def __init__(self, host: str, repository: str, release: str) -> None: | |||
"""Default constructor. | |||
Parameters: | |||
host (str): | |||
Host to the Github instance to reach. | |||
repository (str): | |||
Repository path within target Github. | |||
release (str): | |||
Release name to get models from. | |||
""" | |||
self._host: str = host | |||
self._repository: str = repository | |||
self._release: str = release | |||
@classmethod | |||
def from_environ(cls: type) -> "GithubModelProvider": | |||
""" | |||
Factory method that creates provider from envvars. | |||
Returns: | |||
GithubModelProvider: | |||
Created instance. | |||
""" | |||
return cls( | |||
environ.get("GITHUB_HOST", cls.DEFAULT_HOST), | |||
environ.get("GITHUB_REPOSITORY", cls.DEFAULT_REPOSITORY), | |||
environ.get("GITHUB_RELEASE", cls.LATEST_RELEASE), | |||
) | |||
def checksum(self, name: str) -> str: | |||
""" | |||
Downloads and returns reference checksum for the given model name. | |||
Parameters: | |||
name (str): | |||
Name of the model to get checksum for. | |||
Returns: | |||
str: | |||
Checksum of the required model. | |||
Raises: | |||
ValueError: | |||
If the given model name is not indexed. | |||
""" | |||
url: str = "/".join( | |||
( | |||
self._host, | |||
self._repository, | |||
self.RELEASE_PATH, | |||
self._release, | |||
self.CHECKSUM_INDEX, | |||
) | |||
) | |||
response: httpx.Response = httpx.get(url) | |||
response.raise_for_status() | |||
index: Dict = response.json() | |||
if name not in index: | |||
raise ValueError(f"No checksum for model {name}") | |||
return index[name] | |||
def download(self, name: str, path: str) -> None: | |||
""" | |||
Download model denoted by the given name to disk. | |||
Parameters: | |||
name (str): | |||
Name of the model to download. | |||
path (str): | |||
Path of the directory to save model into. | |||
""" | |||
url: str = "/".join( | |||
(self._host, self._repository, self.RELEASE_PATH, self._release, name) | |||
) | |||
url = f"{url}.tar.gz" | |||
logger.info(f"Downloading model archive {url}") | |||
with httpx.Client(http2=True) as client: | |||
with client.stream("GET", url) as response: | |||
response.raise_for_status() | |||
archive = NamedTemporaryFile(delete=False) | |||
try: | |||
with archive as stream: | |||
for chunk in response.iter_raw(): | |||
stream.write(chunk) | |||
logger.info("Validating archive checksum") | |||
checksum: str = compute_file_checksum(archive.name) | |||
if checksum != self.checksum(name): | |||
raise IOError("Downloaded file is corrupted, please retry") | |||
logger.info(f"Extracting downloaded {name} archive") | |||
with tarfile.open(name=archive.name) as tar: | |||
tar.extractall(path=path) | |||
finally: | |||
os.unlink(archive.name) | |||
logger.info(f"{name} model file(s) extracted") |
@ -0,0 +1,148 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" This modules provides spleeter command as well as CLI parsing methods. """ | |||
from os.path import join | |||
from tempfile import gettempdir | |||
from typer import Argument, Exit, Option, echo | |||
from typer.models import ArgumentInfo, OptionInfo | |||
from .audio import Codec, STFTBackend | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
AudioInputArgument: ArgumentInfo = Argument( | |||
..., | |||
help="List of input audio file path", | |||
exists=True, | |||
file_okay=True, | |||
dir_okay=False, | |||
readable=True, | |||
resolve_path=True, | |||
) | |||
AudioInputOption: OptionInfo = Option( | |||
None, "--inputs", "-i", help="(DEPRECATED) placeholder for deprecated input option" | |||
) | |||
AudioAdapterOption: OptionInfo = Option( | |||
"spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter", | |||
"--adapter", | |||
"-a", | |||
help="Name of the audio adapter to use for audio I/O", | |||
) | |||
AudioOutputOption: OptionInfo = Option( | |||
join(gettempdir(), "separated_audio"), | |||
"--output_path", | |||
"-o", | |||
help="Path of the output directory to write audio files in", | |||
) | |||
AudioOffsetOption: OptionInfo = Option( | |||
0.0, "--offset", "-s", help="Set the starting offset to separate audio from" | |||
) | |||
AudioDurationOption: OptionInfo = Option( | |||
600.0, | |||
"--duration", | |||
"-d", | |||
help=( | |||
"Set a maximum duration for processing audio " | |||
"(only separate offset + duration first seconds of " | |||
"the input file)" | |||
), | |||
) | |||
AudioSTFTBackendOption: OptionInfo = Option( | |||
STFTBackend.AUTO, | |||
"--stft-backend", | |||
"-B", | |||
case_sensitive=False, | |||
help=( | |||
"Who should be in charge of computing the stfts. Librosa is faster " | |||
'than tensorflow on CPU and uses less memory. "auto" will use ' | |||
"tensorflow when GPU acceleration is available and librosa when not" | |||
), | |||
) | |||
AudioCodecOption: OptionInfo = Option( | |||
Codec.WAV, "--codec", "-c", help="Audio codec to be used for the separated output" | |||
) | |||
AudioBitrateOption: OptionInfo = Option( | |||
"128k", "--bitrate", "-b", help="Audio bitrate to be used for the separated output" | |||
) | |||
FilenameFormatOption: OptionInfo = Option( | |||
"{filename}/{instrument}.{codec}", | |||
"--filename_format", | |||
"-f", | |||
help=( | |||
"Template string that will be formatted to generated" | |||
"output filename. Such template should be Python formattable" | |||
"string, and could use {filename}, {instrument}, and {codec}" | |||
"variables" | |||
), | |||
) | |||
ModelParametersOption: OptionInfo = Option( | |||
"spleeter:2stems", | |||
"--params_filename", | |||
"-p", | |||
help="JSON filename that contains params", | |||
) | |||
MWFOption: OptionInfo = Option( | |||
False, "--mwf", help="Whether to use multichannel Wiener filtering for separation" | |||
) | |||
MUSDBDirectoryOption: OptionInfo = Option( | |||
..., | |||
"--mus_dir", | |||
exists=True, | |||
dir_okay=True, | |||
file_okay=False, | |||
readable=True, | |||
resolve_path=True, | |||
help="Path to musDB dataset directory", | |||
) | |||
TrainingDataDirectoryOption: OptionInfo = Option( | |||
..., | |||
"--data", | |||
"-d", | |||
exists=True, | |||
dir_okay=True, | |||
file_okay=False, | |||
readable=True, | |||
resolve_path=True, | |||
help="Path of the folder containing audio data for training", | |||
) | |||
VerboseOption: OptionInfo = Option(False, "--verbose", help="Enable verbose logs") | |||
def version_callback(value: bool): | |||
if value: | |||
try: | |||
from importlib.metadata import version | |||
except ImportError: | |||
from importlib_metadata import version | |||
echo(f"Spleeter Version: {version('spleeter')}") | |||
raise Exit() | |||
VersionOption: OptionInfo = Option( | |||
None, | |||
"--version", | |||
callback=version_callback, | |||
is_eager=True, | |||
help="Return Spleeter version", | |||
) |
@ -0,0 +1,28 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "2stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "accompaniment"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1536, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1000000, | |||
"throttle_secs":300, | |||
"random_seed":0, | |||
"save_checkpoints_steps":150, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{} | |||
} | |||
} |
@ -0,0 +1,28 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "2stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "accompaniment"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1000000, | |||
"throttle_secs":300, | |||
"random_seed":0, | |||
"save_checkpoints_steps":150, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/val.csv", | |||
"model_dir": "4stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1536, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1500000, | |||
"throttle_secs":600, | |||
"random_seed":3, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/val.csv", | |||
"model_dir": "4stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 1500000, | |||
"throttle_secs":600, | |||
"random_seed":3, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "5stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "piano", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1536, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 2500000, | |||
"throttle_secs":600, | |||
"random_seed":8, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.softmax_unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,31 @@ | |||
{ | |||
"train_csv": "path/to/train.csv", | |||
"validation_csv": "path/to/test.csv", | |||
"model_dir": "5stems", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "piano", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 2500000, | |||
"throttle_secs":600, | |||
"random_seed":8, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.softmax_unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,8 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Packages that provides static resources file for the library. """ | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" |
@ -0,0 +1,32 @@ | |||
{ | |||
"train_csv": "configs/musdb_train.csv", | |||
"validation_csv": "configs/musdb_validation.csv", | |||
"model_dir": "musdb_model", | |||
"mix_name": "mix", | |||
"instrument_list": ["vocals", "drums", "bass", "other"], | |||
"sample_rate":44100, | |||
"frame_length":4096, | |||
"frame_step":1024, | |||
"T":512, | |||
"F":1024, | |||
"n_channels":2, | |||
"n_chunks_per_song":1, | |||
"separation_exponent":2, | |||
"mask_extension":"zeros", | |||
"learning_rate": 1e-4, | |||
"batch_size":4, | |||
"training_cache":"training_cache", | |||
"validation_cache":"validation_cache", | |||
"train_max_steps": 100000, | |||
"throttle_secs":600, | |||
"random_seed":3, | |||
"save_checkpoints_steps":300, | |||
"save_summary_steps":5, | |||
"model":{ | |||
"type":"unet.unet", | |||
"params":{ | |||
"conv_activation":"ELU", | |||
"deconv_activation":"ELU" | |||
} | |||
} | |||
} |
@ -0,0 +1,461 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" | |||
Module that provides a class wrapper for source separation. | |||
Examples: | |||
```python | |||
>>> from spleeter.separator import Separator | |||
>>> separator = Separator('spleeter:2stems') | |||
>>> separator.separate(waveform, lambda instrument, data: ...) | |||
>>> separator.separate_to_file(...) | |||
``` | |||
""" | |||
import atexit | |||
import os | |||
from multiprocessing import Pool | |||
from os.path import basename, dirname, join, splitext | |||
from typing import Dict, Generator, Optional | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import numpy as np | |||
import tensorflow as tf | |||
from librosa.core import istft, stft | |||
from scipy.signal.windows import hann | |||
from spleeter.model.provider import ModelProvider | |||
from . import SpleeterError | |||
from .audio import Codec, STFTBackend | |||
from .audio.adapter import AudioAdapter | |||
from .audio.convertor import to_stereo | |||
from .model import EstimatorSpecBuilder, InputProviderFactory, model_fn | |||
from .model.provider import ModelProvider | |||
from .types import AudioDescriptor | |||
from .utils.configuration import load_configuration | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
class DataGenerator(object): | |||
""" | |||
Generator object that store a sample and generate it once while called. | |||
Used to feed a tensorflow estimator without knowing the whole data at | |||
build time. | |||
""" | |||
def __init__(self) -> None: | |||
""" Default constructor. """ | |||
self._current_data = None | |||
def update_data(self, data) -> None: | |||
""" Replace internal data. """ | |||
self._current_data = data | |||
def __call__(self) -> Generator: | |||
""" Generation process. """ | |||
buffer = self._current_data | |||
while buffer: | |||
yield buffer | |||
buffer = self._current_data | |||
def create_estimator(params, MWF): | |||
""" | |||
Initialize tensorflow estimator that will perform separation | |||
Params: | |||
- params: a dictionary of parameters for building the model | |||
Returns: | |||
a tensorflow estimator | |||
""" | |||
# Load model. | |||
provider: ModelProvider = ModelProvider.default() | |||
params["model_dir"] = provider.get(params["model_dir"]) | |||
params["MWF"] = MWF | |||
# Setup config | |||
session_config = tf.compat.v1.ConfigProto() | |||
session_config.gpu_options.per_process_gpu_memory_fraction = 0.7 | |||
config = tf.estimator.RunConfig(session_config=session_config) | |||
# Setup estimator | |||
estimator = tf.estimator.Estimator( | |||
model_fn=model_fn, model_dir=params["model_dir"], params=params, config=config | |||
) | |||
return estimator | |||
class Separator(object): | |||
""" A wrapper class for performing separation. """ | |||
def __init__( | |||
self, | |||
params_descriptor: str, | |||
MWF: bool = False, | |||
stft_backend: STFTBackend = STFTBackend.AUTO, | |||
multiprocess: bool = True, | |||
) -> None: | |||
""" | |||
Default constructor. | |||
Parameters: | |||
params_descriptor (str): | |||
Descriptor for TF params to be used. | |||
MWF (bool): | |||
(Optional) `True` if MWF should be used, `False` otherwise. | |||
""" | |||
self._params = load_configuration(params_descriptor) | |||
self._sample_rate = self._params["sample_rate"] | |||
self._MWF = MWF | |||
self._tf_graph = tf.Graph() | |||
self._prediction_generator = None | |||
self._input_provider = None | |||
self._builder = None | |||
self._features = None | |||
self._session = None | |||
if multiprocess: | |||
self._pool = Pool() | |||
atexit.register(self._pool.close) | |||
else: | |||
self._pool = None | |||
self._tasks = [] | |||
self._params["stft_backend"] = STFTBackend.resolve(stft_backend) | |||
self._data_generator = DataGenerator() | |||
def _get_prediction_generator(self) -> Generator: | |||
""" | |||
Lazy loading access method for internal prediction generator | |||
returned by the predict method of a tensorflow estimator. | |||
Returns: | |||
Generator: | |||
Generator of prediction. | |||
""" | |||
if self._prediction_generator is None: | |||
estimator = create_estimator(self._params, self._MWF) | |||
def get_dataset(): | |||
return tf.data.Dataset.from_generator( | |||
self._data_generator, | |||
output_types={"waveform": tf.float32, "audio_id": tf.string}, | |||
output_shapes={"waveform": (None, 2), "audio_id": ()}, | |||
) | |||
self._prediction_generator = estimator.predict( | |||
get_dataset, yield_single_examples=False | |||
) | |||
return self._prediction_generator | |||
def join(self, timeout: int = 200) -> None: | |||
""" | |||
Wait for all pending tasks to be finished. | |||
Parameters: | |||
timeout (int): | |||
(Optional) task waiting timeout. | |||
""" | |||
while len(self._tasks) > 0: | |||
task = self._tasks.pop() | |||
task.get() | |||
task.wait(timeout=timeout) | |||
def _stft( | |||
self, data: np.ndarray, inverse: bool = False, length: Optional[int] = None | |||
) -> np.ndarray: | |||
""" | |||
Single entrypoint for both stft and istft. This computes stft and | |||
istft with librosa on stereo data. The two channels are processed | |||
separately and are concatenated together in the result. The | |||
expected input formats are: (n_samples, 2) for stft and (T, F, 2) | |||
for istft. | |||
Parameters: | |||
data (numpy.array): | |||
Array with either the waveform or the complex spectrogram | |||
depending on the parameter inverse | |||
inverse (bool): | |||
(Optional) Should a stft or an istft be computed. | |||
length (Optional[int]): | |||
Returns: | |||
numpy.ndarray: | |||
Stereo data as numpy array for the transform. The channels | |||
are stored in the last dimension. | |||
""" | |||
assert not (inverse and length is None) | |||
data = np.asfortranarray(data) | |||
N = self._params["frame_length"] | |||
H = self._params["frame_step"] | |||
win = hann(N, sym=False) | |||
fstft = istft if inverse else stft | |||
win_len_arg = {"win_length": None, "length": None} if inverse else {"n_fft": N} | |||
n_channels = data.shape[-1] | |||
out = [] | |||
for c in range(n_channels): | |||
d = ( | |||
np.concatenate((np.zeros((N,)), data[:, c], np.zeros((N,)))) | |||
if not inverse | |||
else data[:, :, c].T | |||
) | |||
s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) | |||
if inverse: | |||
s = s[N : N + length] | |||
s = np.expand_dims(s.T, 2 - inverse) | |||
out.append(s) | |||
if len(out) == 1: | |||
return out[0] | |||
return np.concatenate(out, axis=2 - inverse) | |||
def _get_input_provider(self): | |||
if self._input_provider is None: | |||
self._input_provider = InputProviderFactory.get(self._params) | |||
return self._input_provider | |||
def _get_features(self): | |||
if self._features is None: | |||
provider = self._get_input_provider() | |||
self._features = provider.get_input_dict_placeholders() | |||
return self._features | |||
def _get_builder(self): | |||
if self._builder is None: | |||
self._builder = EstimatorSpecBuilder(self._get_features(), self._params) | |||
return self._builder | |||
def _get_session(self): | |||
if self._session is None: | |||
saver = tf.compat.v1.train.Saver() | |||
provider = ModelProvider.default() | |||
model_directory: str = provider.get(self._params["model_dir"]) | |||
latest_checkpoint = tf.train.latest_checkpoint(model_directory) | |||
self._session = tf.compat.v1.Session() | |||
saver.restore(self._session, latest_checkpoint) | |||
return self._session | |||
def _separate_librosa( | |||
self, waveform: np.ndarray, audio_descriptor: AudioDescriptor | |||
) -> Dict: | |||
""" | |||
Performs separation with librosa backend for STFT. | |||
Parameters: | |||
waveform (numpy.ndarray): | |||
Waveform to be separated (as a numpy array) | |||
audio_descriptor (AudioDescriptor): | |||
""" | |||
with self._tf_graph.as_default(): | |||
out = {} | |||
features = self._get_features() | |||
# TODO: fix the logic, build sometimes return, | |||
# sometimes set attribute. | |||
outputs = self._get_builder().outputs | |||
stft = self._stft(waveform) | |||
if stft.shape[-1] == 1: | |||
stft = np.concatenate([stft, stft], axis=-1) | |||
elif stft.shape[-1] > 2: | |||
stft = stft[:, :2] | |||
sess = self._get_session() | |||
outputs = sess.run( | |||
outputs, | |||
feed_dict=self._get_input_provider().get_feed_dict( | |||
features, stft, audio_descriptor | |||
), | |||
) | |||
for inst in self._get_builder().instruments: | |||
out[inst] = self._stft( | |||
outputs[inst], inverse=True, length=waveform.shape[0] | |||
) | |||
return out | |||
def _separate_tensorflow( | |||
self, waveform: np.ndarray, audio_descriptor: AudioDescriptor | |||
) -> Dict: | |||
""" | |||
Performs source separation over the given waveform with tensorflow | |||
backend. | |||
Parameters: | |||
waveform (numpy.ndarray): | |||
Waveform to be separated (as a numpy array) | |||
audio_descriptor (AudioDescriptor): | |||
Returns: | |||
Separated waveforms. | |||
""" | |||
if not waveform.shape[-1] == 2: | |||
waveform = to_stereo(waveform) | |||
prediction_generator = self._get_prediction_generator() | |||
# NOTE: update data in generator before performing separation. | |||
self._data_generator.update_data( | |||
{"waveform": waveform, "audio_id": np.array(audio_descriptor)} | |||
) | |||
# NOTE: perform separation. | |||
prediction = next(prediction_generator) | |||
prediction.pop("audio_id") | |||
return prediction | |||
def separate( | |||
self, waveform: np.ndarray, audio_descriptor: Optional[str] = "" | |||
) -> None: | |||
""" | |||
Performs separation on a waveform. | |||
Parameters: | |||
waveform (numpy.ndarray): | |||
Waveform to be separated (as a numpy array) | |||
audio_descriptor (str): | |||
(Optional) string describing the waveform (e.g. filename). | |||
""" | |||
backend: str = self._params["stft_backend"] | |||
if backend == STFTBackend.TENSORFLOW: | |||
return self._separate_tensorflow(waveform, audio_descriptor) | |||
elif backend == STFTBackend.LIBROSA: | |||
return self._separate_librosa(waveform, audio_descriptor) | |||
raise ValueError(f"Unsupported STFT backend {backend}") | |||
def separate_to_file( | |||
self, | |||
audio_descriptor: AudioDescriptor, | |||
destination: str, | |||
audio_adapter: Optional[AudioAdapter] = None, | |||
offset: int = 0, | |||
duration: float = 600.0, | |||
codec: Codec = Codec.WAV, | |||
bitrate: str = "128k", | |||
filename_format: str = "{filename}/{instrument}.{codec}", | |||
synchronous: bool = True, | |||
) -> None: | |||
""" | |||
Performs source separation and export result to file using | |||
given audio adapter. | |||
Filename format should be a Python formattable string that could | |||
use following parameters : | |||
- {instrument} | |||
- {filename} | |||
- {foldername} | |||
- {codec}. | |||
Parameters: | |||
audio_descriptor (AudioDescriptor): | |||
Describe song to separate, used by audio adapter to | |||
retrieve and load audio data, in case of file based | |||
audio adapter, such descriptor would be a file path. | |||
destination (str): | |||
Target directory to write output to. | |||
audio_adapter (Optional[AudioAdapter]): | |||
(Optional) Audio adapter to use for I/O. | |||
offset (int): | |||
(Optional) Offset of loaded song. | |||
duration (float): | |||
(Optional) Duration of loaded song (default: 600s). | |||
codec (Codec): | |||
(Optional) Export codec. | |||
bitrate (str): | |||
(Optional) Export bitrate. | |||
filename_format (str): | |||
(Optional) Filename format. | |||
synchronous (bool): | |||
(Optional) True is should by synchronous. | |||
""" | |||
if audio_adapter is None: | |||
audio_adapter = AudioAdapter.default() | |||
waveform, _ = audio_adapter.load( | |||
audio_descriptor, | |||
offset=offset, | |||
duration=duration, | |||
sample_rate=self._sample_rate, | |||
) | |||
sources = self.separate(waveform, audio_descriptor) | |||
self.save_to_file( | |||
sources, | |||
audio_descriptor, | |||
destination, | |||
filename_format, | |||
codec, | |||
audio_adapter, | |||
bitrate, | |||
synchronous, | |||
) | |||
def save_to_file( | |||
self, | |||
sources: Dict, | |||
audio_descriptor: AudioDescriptor, | |||
destination: str, | |||
filename_format: str = "{filename}/{instrument}.{codec}", | |||
codec: Codec = Codec.WAV, | |||
audio_adapter: Optional[AudioAdapter] = None, | |||
bitrate: str = "128k", | |||
synchronous: bool = True, | |||
) -> None: | |||
""" | |||
Export dictionary of sources to files. | |||
Parameters: | |||
sources (Dict): | |||
Dictionary of sources to be exported. The keys are the name | |||
of the instruments, and the values are `N x 2` numpy arrays | |||
containing the corresponding intrument waveform, as | |||
returned by the separate method | |||
audio_descriptor (AudioDescriptor): | |||
Describe song to separate, used by audio adapter to | |||
retrieve and load audio data, in case of file based audio | |||
adapter, such descriptor would be a file path. | |||
destination (str): | |||
Target directory to write output to. | |||
filename_format (str): | |||
(Optional) Filename format. | |||
codec (Codec): | |||
(Optional) Export codec. | |||
audio_adapter (Optional[AudioAdapter]): | |||
(Optional) Audio adapter to use for I/O. | |||
bitrate (str): | |||
(Optional) Export bitrate. | |||
synchronous (bool): | |||
(Optional) True is should by synchronous. | |||
""" | |||
if audio_adapter is None: | |||
audio_adapter = AudioAdapter.default() | |||
foldername = basename(dirname(audio_descriptor)) | |||
filename = splitext(basename(audio_descriptor))[0] | |||
generated = [] | |||
for instrument, data in sources.items(): | |||
path = join( | |||
destination, | |||
filename_format.format( | |||
filename=filename, | |||
instrument=instrument, | |||
foldername=foldername, | |||
codec=codec, | |||
), | |||
) | |||
directory = os.path.dirname(path) | |||
if not os.path.exists(directory): | |||
os.makedirs(directory) | |||
if path in generated: | |||
raise SpleeterError( | |||
( | |||
f"Separated source path conflict : {path}," | |||
"please check your filename format" | |||
) | |||
) | |||
generated.append(path) | |||
if self._pool: | |||
task = self._pool.apply_async( | |||
audio_adapter.save, (path, data, self._sample_rate, codec, bitrate) | |||
) | |||
self._tasks.append(task) | |||
else: | |||
audio_adapter.save(path, data, self._sample_rate, codec, bitrate) | |||
if synchronous and self._pool: | |||
self.join() |
@ -0,0 +1,15 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Custom types definition. """ | |||
from typing import Any, Tuple | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import numpy as np | |||
# pylint: enable=import-error | |||
AudioDescriptor: type = Any | |||
Signal: type = Tuple[np.ndarray, float] |
@ -0,0 +1,8 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" This package provides utility function and classes. """ | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" |
@ -0,0 +1,57 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Module that provides configuration loading function. """ | |||
import json | |||
from os.path import exists | |||
from typing import Dict | |||
try: | |||
import importlib.resources as loader | |||
except ImportError: | |||
# Try backported to PY<37 `importlib_resources`. | |||
# pyright: reportMissingImports=false | |||
import importlib_resources as loader | |||
from .. import SpleeterError, resources | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
_EMBEDDED_CONFIGURATION_PREFIX: str = "spleeter:" | |||
def load_configuration(descriptor: str) -> Dict: | |||
""" | |||
Load configuration from the given descriptor. Could be either a | |||
`spleeter:` prefixed embedded configuration name or a file system path | |||
to read configuration from. | |||
Parameters: | |||
descriptor (str): | |||
Configuration descriptor to use for lookup. | |||
Returns: | |||
Dict: | |||
Loaded description as dict. | |||
Raises: | |||
ValueError: | |||
If required embedded configuration does not exists. | |||
SpleeterError: | |||
If required configuration file does not exists. | |||
""" | |||
# Embedded configuration reading. | |||
if descriptor.startswith(_EMBEDDED_CONFIGURATION_PREFIX): | |||
name = descriptor[len(_EMBEDDED_CONFIGURATION_PREFIX) :] | |||
if not loader.is_resource(resources, f"{name}.json"): | |||
raise SpleeterError(f"No embedded configuration {name} found") | |||
with loader.open_text(resources, f"{name}.json") as stream: | |||
return json.load(stream) | |||
# Standard file reading. | |||
if not exists(descriptor): | |||
raise SpleeterError(f"Configuration file {descriptor} not found") | |||
with open(descriptor, "r") as stream: | |||
return json.load(stream) |
@ -0,0 +1,56 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Centralized logging facilities for Spleeter. """ | |||
import logging | |||
import warnings | |||
from os import environ | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
from typer import echo | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | |||
class TyperLoggerHandler(logging.Handler): | |||
""" A custom logger handler that use Typer echo. """ | |||
def emit(self, record: logging.LogRecord) -> None: | |||
echo(self.format(record)) | |||
formatter = logging.Formatter("%(levelname)s:%(name)s:%(message)s") | |||
handler = TyperLoggerHandler() | |||
handler.setFormatter(formatter) | |||
logger: logging.Logger = logging.getLogger("spleeter") | |||
logger.addHandler(handler) | |||
logger.setLevel(logging.INFO) | |||
def configure_logger(verbose: bool) -> None: | |||
""" | |||
Configure application logger. | |||
Parameters: | |||
verbose (bool): | |||
`True` to use verbose logger, `False` otherwise. | |||
""" | |||
from tensorflow import get_logger | |||
from tensorflow.compat.v1 import logging as tf_logging | |||
tf_logger = get_logger() | |||
tf_logger.handlers = [handler] | |||
if verbose: | |||
tf_logging.set_verbosity(tf_logging.INFO) | |||
logger.setLevel(logging.DEBUG) | |||
else: | |||
warnings.filterwarnings("ignore") | |||
tf_logging.set_verbosity(tf_logging.ERROR) |
@ -0,0 +1,230 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Utility function for tensorflow. """ | |||
from typing import Any, Callable, Dict | |||
import pandas as pd | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
import tensorflow as tf | |||
# pylint: enable=import-error | |||
__email__ = "spleeter@deezer.com" | |||
__author__ = "Deezer Research" | |||
__license__ = "MIT License" | |||
def sync_apply( | |||
tensor_dict: tf.Tensor, func: Callable, concat_axis: int = 1 | |||
) -> Dict[str, tf.Tensor]: | |||
""" | |||
Return a function that applies synchronously the provided func on the | |||
provided dictionnary of tensor. This means that func is applied to the | |||
concatenation of the tensors in tensor_dict. This is useful for | |||
performing random operation that needs the same drawn value on multiple | |||
tensor, such as a random time-crop on both input data and label (the | |||
same crop should be applied to both input data and label, so random | |||
crop cannot be applied separately on each of them). | |||
Notes: | |||
All tensor are assumed to be the same shape. | |||
Parameters: | |||
tensor_dict (Dict[str, tensorflow.Tensor]): | |||
A dictionary of tensor. | |||
func (Callable): | |||
Function to be applied to the concatenation of the tensors in | |||
`tensor_dict`. | |||
concat_axis (int): | |||
The axis on which to perform the concatenation. | |||
Returns: | |||
Dict[str, tensorflow.Tensor]: | |||
Processed tensors dictionary with the same name (keys) as input | |||
tensor_dict. | |||
""" | |||
if concat_axis not in {0, 1}: | |||
raise NotImplementedError( | |||
"Function only implemented for concat_axis equal to 0 or 1" | |||
) | |||
tensor_list = list(tensor_dict.values()) | |||
concat_tensor = tf.concat(tensor_list, concat_axis) | |||
processed_concat_tensor = func(concat_tensor) | |||
tensor_shape = tf.shape(list(tensor_dict.values())[0]) | |||
D = tensor_shape[concat_axis] | |||
if concat_axis == 0: | |||
return { | |||
name: processed_concat_tensor[index * D : (index + 1) * D, :, :] | |||
for index, name in enumerate(tensor_dict) | |||
} | |||
return { | |||
name: processed_concat_tensor[:, index * D : (index + 1) * D, :] | |||
for index, name in enumerate(tensor_dict) | |||
} | |||
def from_float32_to_uint8( | |||
tensor: tf.Tensor, | |||
tensor_key: str = "tensor", | |||
min_key: str = "min", | |||
max_key: str = "max", | |||
) -> tf.Tensor: | |||
""" | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
tensor_key (str): | |||
min_key (str): | |||
max_key (str): | |||
Returns: | |||
tensorflow.Tensor: | |||
""" | |||
tensor_min = tf.reduce_min(tensor) | |||
tensor_max = tf.reduce_max(tensor) | |||
return { | |||
tensor_key: tf.cast( | |||
(tensor - tensor_min) / (tensor_max - tensor_min + 1e-16) * 255.9999, | |||
dtype=tf.uint8, | |||
), | |||
min_key: tensor_min, | |||
max_key: tensor_max, | |||
} | |||
def from_uint8_to_float32( | |||
tensor: tf.Tensor, tensor_min: tf.Tensor, tensor_max: tf.Tensor | |||
) -> tf.Tensor: | |||
""" | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
tensor_min (tensorflow.Tensor): | |||
tensor_max (tensorflow.Tensor): | |||
Returns: | |||
tensorflow.Tensor: | |||
""" | |||
return ( | |||
tf.cast(tensor, tf.float32) * (tensor_max - tensor_min) / 255.9999 + tensor_min | |||
) | |||
def pad_and_partition(tensor: tf.Tensor, segment_len: int) -> tf.Tensor: | |||
""" | |||
Pad and partition a tensor into segment of len `segment_len` | |||
along the first dimension. The tensor is padded with 0 in order | |||
to ensure that the first dimension is a multiple of `segment_len`. | |||
Tensor must be of known fixed rank | |||
Examples: | |||
```python | |||
>>> tensor = [[1, 2, 3], [4, 5, 6]] | |||
>>> segment_len = 2 | |||
>>> pad_and_partition(tensor, segment_len) | |||
[[[1, 2], [4, 5]], [[3, 0], [6, 0]]] | |||
```` | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
segment_len (int): | |||
Returns: | |||
tensorflow.Tensor: | |||
""" | |||
tensor_size = tf.math.floormod(tf.shape(tensor)[0], segment_len) | |||
pad_size = tf.math.floormod(segment_len - tensor_size, segment_len) | |||
padded = tf.pad(tensor, [[0, pad_size]] + [[0, 0]] * (len(tensor.shape) - 1)) | |||
split = (tf.shape(padded)[0] + segment_len - 1) // segment_len | |||
return tf.reshape( | |||
padded, tf.concat([[split, segment_len], tf.shape(padded)[1:]], axis=0) | |||
) | |||
def pad_and_reshape(instr_spec, frame_length, F) -> Any: | |||
""" | |||
Parameters: | |||
instr_spec: | |||
frame_length: | |||
F: | |||
Returns: | |||
Any: | |||
""" | |||
spec_shape = tf.shape(instr_spec) | |||
extension_row = tf.zeros((spec_shape[0], spec_shape[1], 1, spec_shape[-1])) | |||
n_extra_row = (frame_length) // 2 + 1 - F | |||
extension = tf.tile(extension_row, [1, 1, n_extra_row, 1]) | |||
extended_spec = tf.concat([instr_spec, extension], axis=2) | |||
old_shape = tf.shape(extended_spec) | |||
new_shape = tf.concat([[old_shape[0] * old_shape[1]], old_shape[2:]], axis=0) | |||
processed_instr_spec = tf.reshape(extended_spec, new_shape) | |||
return processed_instr_spec | |||
def dataset_from_csv(csv_path: str, **kwargs) -> Any: | |||
""" | |||
Load dataset from a CSV file using Pandas. kwargs if any are | |||
forwarded to the `pandas.read_csv` function. | |||
Parameters: | |||
csv_path (str): | |||
Path of the CSV file to load dataset from. | |||
Returns: | |||
Any: | |||
Loaded dataset. | |||
""" | |||
df = pd.read_csv(csv_path, **kwargs) | |||
dataset = tf.data.Dataset.from_tensor_slices({key: df[key].values for key in df}) | |||
return dataset | |||
def check_tensor_shape(tensor_tf: tf.Tensor, target_shape: Any) -> bool: | |||
""" | |||
Return a Tensorflow boolean graph that indicates whether | |||
sample[features_key] has the specified target shape. Only check | |||
not None entries of target_shape. | |||
Parameters: | |||
tensor_tf (tensorflow.Tensor): | |||
Tensor to check shape for. | |||
target_shape (Any): | |||
Target shape to compare tensor to. | |||
Returns: | |||
bool: | |||
`True` if shape is valid, `False` otherwise (as TF boolean). | |||
""" | |||
result = tf.constant(True) | |||
for i, target_length in enumerate(target_shape): | |||
if target_length: | |||
result = tf.logical_and( | |||
result, tf.equal(tf.constant(target_length), tf.shape(tensor_tf)[i]) | |||
) | |||
return result | |||
def set_tensor_shape(tensor: tf.Tensor, tensor_shape: Any) -> tf.Tensor: | |||
""" | |||
Set shape for a tensor (not in place, as opposed to tf.set_shape) | |||
Parameters: | |||
tensor (tensorflow.Tensor): | |||
Tensor to reshape. | |||
tensor_shape (Any): | |||
Shape to apply to the tensor. | |||
Returns: | |||
tensorflow.Tensor: | |||
A reshaped tensor. | |||
""" | |||
# NOTE: That SOUND LIKE IN PLACE HERE ? | |||
tensor.set_shape(tensor_shape) | |||
return tensor |
@ -0,0 +1,8 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing package. """ | |||
__email__ = 'spleeter@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' |
@ -0,0 +1,21 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing for Separator class. """ | |||
__email__ = 'research@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' | |||
from spleeter.__main__ import spleeter | |||
from typer.testing import CliRunner | |||
def test_version(): | |||
runner = CliRunner() | |||
# execute spleeter version command | |||
result = runner.invoke(spleeter, [ | |||
'--version', | |||
]) |
@ -0,0 +1,88 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing for Separator class. """ | |||
__email__ = 'spleeter@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' | |||
from os import makedirs | |||
from os.path import join | |||
from tempfile import TemporaryDirectory | |||
import pytest | |||
import numpy as np | |||
from spleeter.__main__ import evaluate | |||
from spleeter.audio.adapter import AudioAdapter | |||
BACKENDS = ['tensorflow', 'librosa'] | |||
TEST_CONFIGURATIONS = {el: el for el in BACKENDS} | |||
res_4stems = { | |||
'vocals': { | |||
'SDR': 3.25e-05, | |||
'SAR': -11.153575, | |||
'SIR': -1.3849, | |||
'ISR': 2.75e-05 | |||
}, | |||
'drums': { | |||
'SDR': -0.079505, | |||
'SAR': -15.7073575, | |||
'SIR': -4.972755, | |||
'ISR': 0.0013575 | |||
}, | |||
'bass': { | |||
'SDR': 2.5e-06, | |||
'SAR': -10.3520575, | |||
'SIR': -4.272325, | |||
'ISR': 2.5e-06 | |||
}, | |||
'other': { | |||
'SDR': -1.359175, | |||
'SAR': -14.7076775, | |||
'SIR': -4.761505, | |||
'ISR': -0.01528 | |||
} | |||
} | |||
def generate_fake_eval_dataset(path): | |||
""" | |||
generate fake evaluation dataset | |||
""" | |||
aa = AudioAdapter.default() | |||
n_songs = 2 | |||
fs = 44100 | |||
duration = 3 | |||
n_channels = 2 | |||
rng = np.random.RandomState(seed=0) | |||
for song in range(n_songs): | |||
song_path = join(path, 'test', f'song{song}') | |||
makedirs(song_path, exist_ok=True) | |||
for instr in ['mixture', 'vocals', 'bass', 'drums', 'other']: | |||
filename = join(song_path, f'{instr}.wav') | |||
data = rng.rand(duration*fs, n_channels)-0.5 | |||
aa.save(filename, data, fs) | |||
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS) | |||
def test_evaluate(backend): | |||
with TemporaryDirectory() as dataset: | |||
with TemporaryDirectory() as evaluation: | |||
generate_fake_eval_dataset(dataset) | |||
metrics = evaluate( | |||
adapter='spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter', | |||
output_path=evaluation, | |||
stft_backend=backend, | |||
params_filename='spleeter:4stems', | |||
mus_dir=dataset, | |||
mwf=False, | |||
verbose=False) | |||
for instrument, metric in metrics.items(): | |||
for m, value in metric.items(): | |||
assert np.allclose( | |||
np.median(value), | |||
res_4stems[instrument][m], | |||
atol=1e-3) |
@ -0,0 +1,87 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing for audio adapter. """ | |||
__email__ = 'spleeter@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' | |||
from os.path import join | |||
from tempfile import TemporaryDirectory | |||
from spleeter import SpleeterError | |||
from spleeter.audio.adapter import AudioAdapter | |||
from spleeter.audio.ffmpeg import FFMPEGProcessAudioAdapter | |||
# pyright: reportMissingImports=false | |||
# pylint: disable=import-error | |||
from pytest import fixture, raises | |||
import numpy as np | |||
import ffmpeg | |||
# pylint: enable=import-error | |||
TEST_AUDIO_DESCRIPTOR = 'audio_example.mp3' | |||
TEST_OFFSET = 0 | |||
TEST_DURATION = 600. | |||
TEST_SAMPLE_RATE = 44100 | |||
@fixture(scope='session') | |||
def adapter(): | |||
""" Target test audio adapter fixture. """ | |||
return AudioAdapter.default() | |||
@fixture(scope='session') | |||
def audio_data(adapter): | |||
""" Audio data fixture based on sample loading from adapter. """ | |||
return adapter.load( | |||
TEST_AUDIO_DESCRIPTOR, | |||
TEST_OFFSET, | |||
TEST_DURATION, | |||
TEST_SAMPLE_RATE) | |||
def test_default_adapter(adapter): | |||
""" Test adapter as default adapter. """ | |||
assert isinstance(adapter, FFMPEGProcessAudioAdapter) | |||
assert adapter is AudioAdapter._DEFAULT | |||
def test_load(audio_data): | |||
""" Test audio loading. """ | |||
waveform, sample_rate = audio_data | |||
assert sample_rate == TEST_SAMPLE_RATE | |||
assert waveform is not None | |||
assert waveform.dtype == np.dtype('float32') | |||
assert len(waveform.shape) == 2 | |||
assert waveform.shape[0] == 479832 | |||
assert waveform.shape[1] == 2 | |||
def test_load_error(adapter): | |||
""" Test load ffprobe exception """ | |||
with raises(SpleeterError): | |||
adapter.load( | |||
'Paris City Jazz', | |||
TEST_OFFSET, | |||
TEST_DURATION, | |||
TEST_SAMPLE_RATE) | |||
def test_save(adapter, audio_data): | |||
""" Test audio saving. """ | |||
with TemporaryDirectory() as directory: | |||
path = join(directory, 'ffmpeg-save.mp3') | |||
adapter.save( | |||
path, | |||
audio_data[0], | |||
audio_data[1]) | |||
probe = ffmpeg.probe(TEST_AUDIO_DESCRIPTOR) | |||
assert len(probe['streams']) == 1 | |||
stream = probe['streams'][0] | |||
assert stream['codec_type'] == 'audio' | |||
assert stream['channels'] == 2 | |||
assert stream['duration'] == '10.919184' |
@ -0,0 +1,21 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" TO DOCUMENT """ | |||
from pytest import raises | |||
from spleeter.model.provider import ModelProvider | |||
def test_checksum(): | |||
""" Test archive checksum index retrieval. """ | |||
provider = ModelProvider.default() | |||
assert provider.checksum('2stems') == \ | |||
'f3a90b39dd2874269e8b05a48a86745df897b848c61f3958efc80a39152bd692' | |||
assert provider.checksum('4stems') == \ | |||
'3adb4a50ad4eb18c7c4d65fcf4cf2367a07d48408a5eb7d03cd20067429dfaa8' | |||
assert provider.checksum('5stems') == \ | |||
'25a1e87eb5f75cc72a4d2d5467a0a50ac75f05611f877c278793742513cc7218' | |||
with raises(ValueError): | |||
provider.checksum('laisse moi stems stems stems') |
@ -0,0 +1,140 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing for Separator class. """ | |||
__email__ = 'spleeter@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' | |||
import itertools | |||
from os.path import splitext, basename, exists, join | |||
from tempfile import TemporaryDirectory | |||
import pytest | |||
import numpy as np | |||
import tensorflow as tf | |||
from spleeter import SpleeterError | |||
from spleeter.audio.adapter import AudioAdapter | |||
from spleeter.separator import Separator | |||
TEST_AUDIO_DESCRIPTORS = ['audio_example.mp3', 'audio_example_mono.mp3'] | |||
BACKENDS = ["tensorflow", "librosa"] | |||
MODELS = ['spleeter:2stems', 'spleeter:4stems', 'spleeter:5stems'] | |||
MODEL_TO_INST = { | |||
'spleeter:2stems': ('vocals', 'accompaniment'), | |||
'spleeter:4stems': ('vocals', 'drums', 'bass', 'other'), | |||
'spleeter:5stems': ('vocals', 'drums', 'bass', 'piano', 'other'), | |||
} | |||
MODELS_AND_TEST_FILES = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS)) | |||
TEST_CONFIGURATIONS = list(itertools.product( | |||
TEST_AUDIO_DESCRIPTORS, MODELS, BACKENDS)) | |||
print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) | |||
@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS) | |||
def test_separator_backends(test_file): | |||
adapter = AudioAdapter.default() | |||
waveform, _ = adapter.load(test_file) | |||
separator_lib = Separator( | |||
"spleeter:2stems", stft_backend="librosa", multiprocess=False) | |||
separator_tf = Separator( | |||
"spleeter:2stems", stft_backend="tensorflow", multiprocess=False) | |||
# Test the stft and inverse stft provides exact reconstruction | |||
stft_matrix = separator_lib._stft(waveform) | |||
reconstructed = separator_lib._stft( | |||
stft_matrix, inverse=True, length=waveform.shape[0]) | |||
assert np.allclose(reconstructed, waveform, atol=3e-2) | |||
# compare both separation, it should be close | |||
out_tf = separator_tf._separate_tensorflow(waveform, test_file) | |||
out_lib = separator_lib._separate_librosa(waveform, test_file) | |||
for instrument in out_lib.keys(): | |||
# test that both outputs are close everywhere | |||
assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5) | |||
@pytest.mark.parametrize( | |||
'test_file, configuration, backend', | |||
TEST_CONFIGURATIONS) | |||
def test_separate(test_file, configuration, backend): | |||
""" Test separation from raw data. """ | |||
instruments = MODEL_TO_INST[configuration] | |||
adapter = AudioAdapter.default() | |||
waveform, _ = adapter.load(test_file) | |||
separator = Separator( | |||
configuration, stft_backend=backend, multiprocess=False) | |||
prediction = separator.separate(waveform, test_file) | |||
assert len(prediction) == len(instruments) | |||
for instrument in instruments: | |||
assert instrument in prediction | |||
for instrument in instruments: | |||
track = prediction[instrument] | |||
assert waveform.shape[:-1] == track.shape[:-1] | |||
assert not np.allclose(waveform, track) | |||
for compared in instruments: | |||
if instrument != compared: | |||
assert not np.allclose(track, prediction[compared]) | |||
@pytest.mark.parametrize( | |||
'test_file, configuration, backend', | |||
TEST_CONFIGURATIONS) | |||
def test_separate_to_file(test_file, configuration, backend): | |||
""" Test file based separation. """ | |||
instruments = MODEL_TO_INST[configuration] | |||
separator = Separator( | |||
configuration, stft_backend=backend, multiprocess=False) | |||
name = splitext(basename(test_file))[0] | |||
with TemporaryDirectory() as directory: | |||
separator.separate_to_file( | |||
test_file, | |||
directory) | |||
for instrument in instruments: | |||
assert exists(join( | |||
directory, | |||
'{}/{}.wav'.format(name, instrument))) | |||
@pytest.mark.parametrize( | |||
'test_file, configuration, backend', | |||
TEST_CONFIGURATIONS) | |||
def test_filename_format(test_file, configuration, backend): | |||
""" Test custom filename format. """ | |||
instruments = MODEL_TO_INST[configuration] | |||
separator = Separator( | |||
configuration, stft_backend=backend, multiprocess=False) | |||
name = splitext(basename(test_file))[0] | |||
with TemporaryDirectory() as directory: | |||
separator.separate_to_file( | |||
test_file, | |||
directory, | |||
filename_format='export/{filename}/{instrument}.{codec}') | |||
for instrument in instruments: | |||
assert exists(join( | |||
directory, | |||
'export/{}/{}.wav'.format(name, instrument))) | |||
@pytest.mark.parametrize( | |||
'test_file, configuration', | |||
MODELS_AND_TEST_FILES) | |||
def test_filename_conflict(test_file, configuration): | |||
""" Test error handling with static pattern. """ | |||
separator = Separator(configuration, multiprocess=False) | |||
with TemporaryDirectory() as directory: | |||
with pytest.raises(SpleeterError): | |||
separator.separate_to_file( | |||
test_file, | |||
directory, | |||
filename_format='I wanna be your lover') |
@ -0,0 +1,124 @@ | |||
#!/usr/bin/env python | |||
# coding: utf8 | |||
""" Unit testing for Separator class. """ | |||
__email__ = 'research@deezer.com' | |||
__author__ = 'Deezer Research' | |||
__license__ = 'MIT License' | |||
import json | |||
import os | |||
from os import makedirs | |||
from os.path import join | |||
from tempfile import TemporaryDirectory | |||
import numpy as np | |||
import pandas as pd | |||
from spleeter.audio.adapter import AudioAdapter | |||
from spleeter.__main__ import spleeter | |||
from typer.testing import CliRunner | |||
TRAIN_CONFIG = { | |||
'mix_name': 'mix', | |||
'instrument_list': ['vocals', 'other'], | |||
'sample_rate': 44100, | |||
'frame_length': 4096, | |||
'frame_step': 1024, | |||
'T': 128, | |||
'F': 128, | |||
'n_channels': 2, | |||
'chunk_duration': 4, | |||
'n_chunks_per_song': 1, | |||
'separation_exponent': 2, | |||
'mask_extension': 'zeros', | |||
'learning_rate': 1e-4, | |||
'batch_size': 2, | |||
'train_max_steps': 10, | |||
'throttle_secs': 20, | |||
'save_checkpoints_steps': 100, | |||
'save_summary_steps': 5, | |||
'random_seed': 0, | |||
'model': { | |||
'type': 'unet.unet', | |||
'params': { | |||
'conv_activation': 'ELU', | |||
'deconv_activation': 'ELU' | |||
} | |||
} | |||
} | |||
def generate_fake_training_dataset(path, | |||
instrument_list=['vocals', 'other'], | |||
n_channels=2, | |||
n_songs = 2, | |||
fs = 44100, | |||
duration = 6, | |||
): | |||
""" | |||
generates a fake training dataset in path: | |||
- generates audio files | |||
- generates a csv file describing the dataset | |||
""" | |||
aa = AudioAdapter.default() | |||
rng = np.random.RandomState(seed=0) | |||
dataset_df = pd.DataFrame( | |||
columns=['mix_path'] + [ | |||
f'{instr}_path' for instr in instrument_list] + ['duration']) | |||
for song in range(n_songs): | |||
song_path = join(path, 'train', f'song{song}') | |||
makedirs(song_path, exist_ok=True) | |||
dataset_df.loc[song, f'duration'] = duration | |||
for instr in instrument_list+['mix']: | |||
filename = join(song_path, f'{instr}.wav') | |||
data = rng.rand(duration*fs, n_channels)-0.5 | |||
aa.save(filename, data, fs) | |||
dataset_df.loc[song, f'{instr}_path'] = join( | |||
'train', | |||
f'song{song}', | |||
f'{instr}.wav') | |||
dataset_df.to_csv(join(path, 'train', 'train.csv'), index=False) | |||
def test_train(): | |||
with TemporaryDirectory() as path: | |||
# generate training dataset | |||
for n_channels in [1,2]: | |||
TRAIN_CONFIG["n_channels"] = n_channels | |||
generate_fake_training_dataset(path, | |||
n_channels=n_channels, | |||
fs=TRAIN_CONFIG["sample_rate"] | |||
) | |||
# set training command arguments | |||
runner = CliRunner() | |||
model_dir = join(path, f'model_{n_channels}') | |||
train_dir = join(path, f'train') | |||
cache_dir = join(path, f'cache_{n_channels}') | |||
TRAIN_CONFIG['train_csv'] = join(train_dir, 'train.csv') | |||
TRAIN_CONFIG['validation_csv'] = join(train_dir, 'train.csv') | |||
TRAIN_CONFIG['model_dir'] = model_dir | |||
TRAIN_CONFIG['training_cache'] = join(cache_dir, 'training') | |||
TRAIN_CONFIG['validation_cache'] = join(cache_dir, 'validation') | |||
with open('useless_config.json', 'w') as stream: | |||
json.dump(TRAIN_CONFIG, stream) | |||
# execute training | |||
result = runner.invoke(spleeter, [ | |||
'train', | |||
'-p', 'useless_config.json', | |||
'-d', path, | |||
"--verbose" | |||
]) | |||
# assert that model checkpoint was created. | |||
assert os.path.exists(join(model_dir, 'model.ckpt-10.index')) | |||
assert os.path.exists(join(model_dir, 'checkpoint')) | |||
assert os.path.exists(join(model_dir, 'model.ckpt-0.meta')) | |||
assert result.exit_code == 0 |