bgmseparator / app.py
masszhou's picture
update mdxnet outputs
47b5e1d
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
import gradio as gr
from scipy.io.wavfile import write
import torch
from utils import convert_to_stereo_and_wav
from uvr_processing import get_model_params, run_mdx
MODEL_ID = "masszhou/mdxnet"
MODELS_PATH = {
"bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
"basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
"main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
}
def inference_mdx(audio_file: str) -> list[str]:
mdx_model_params = get_model_params(Path("./mdx_models"))
audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
device_base = "cuda" if torch.cuda.is_available() else "cpu"
output_dir = Path("./out/mdx")
os.makedirs(output_dir, exist_ok=True)
model_bgm_path = MODELS_PATH["bgm"]
background_path, vocal_path = run_mdx(model_params=mdx_model_params,
input_filename=audio_file,
output_dir=output_dir,
model_path=model_bgm_path,
denoise=False,
device_base=device_base,
)
return str(vocal_path), str(background_path)
def inference_demucs(audio):
sr = audio[0]
audio_np = audio[1]
os.makedirs("out", exist_ok=True)
write('test.wav', audio[0], audio[1])
os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals test.wav -o out")
return "./out/htdemucs/test/vocals.wav","./out/htdemucs/test/no_vocals.wav"
if __name__ == "__main__":
tab_1 = gr.Interface(
fn = inference_demucs,
inputs = gr.Audio(type="numpy", label="Input"),
outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")],
title="Demucs Music Source Separation (v4)",
article="<p style='text-align: center'><a href='https://arxiv.org/abs/1911.13254' target='_blank'>Music Source Separation in the Waveform Domain</a> | <a href='https://github.com/facebookresearch/demucs' target='_blank'>Github Repo</a> | <a href='https://github.com/facebookresearch/demucs/blob/main/LICENSE' target='_blank'>MIT License</a></p>",
api_name="demucs_separation",
)
tab_2 = gr.Interface(
fn = inference_mdx,
inputs = gr.Audio(type="filepath", label="Input"),
outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")],
title="MDXNET Music Source Separation",
article="<p style='text-align: center'><a href='https://arxiv.org/abs/2111.12203' target='_blank'>KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing</a> | <a href='https://github.com/kuielab/mdx-net' target='_blank'>Github Repo</a> | <a href='https://github.com/kuielab/mdx-net/blob/main/LICENSE' target='_blank'>MIT License</a></p>",
api_name="mdxnet_separation",
)
demo = gr.TabbedInterface([tab_1, tab_2], ["Demucs", "MDXNET"])
demo.launch()