import os from pathlib import Path from huggingface_hub import hf_hub_download import gradio as gr from scipy.io.wavfile import write import torch from utils import convert_to_stereo_and_wav from uvr_processing import get_model_params, run_mdx MODEL_ID = "masszhou/mdxnet" MODELS_PATH = { "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")), "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")), "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx")) } def inference_mdx(audio_file: str) -> list[str]: mdx_model_params = get_model_params(Path("./mdx_models")) audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz device_base = "cuda" if torch.cuda.is_available() else "cpu" output_dir = Path("./out/mdx") os.makedirs(output_dir, exist_ok=True) model_bgm_path = MODELS_PATH["bgm"] background_path, vocal_path = run_mdx(model_params=mdx_model_params, input_filename=audio_file, output_dir=output_dir, model_path=model_bgm_path, denoise=False, device_base=device_base, ) return str(vocal_path), str(background_path) def inference_demucs(audio): sr = audio[0] audio_np = audio[1] os.makedirs("out", exist_ok=True) write('test.wav', audio[0], audio[1]) os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals test.wav -o out") return "./out/htdemucs/test/vocals.wav","./out/htdemucs/test/no_vocals.wav" if __name__ == "__main__": tab_1 = gr.Interface( fn = inference_demucs, inputs = gr.Audio(type="numpy", label="Input"), outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")], title="Demucs Music Source Separation (v4)", article="

Music Source Separation in the Waveform Domain | Github Repo | MIT License

", api_name="demucs_separation", ) tab_2 = gr.Interface( fn = inference_mdx, inputs = gr.Audio(type="filepath", label="Input"), outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")], title="MDXNET Music Source Separation", article="

KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing | Github Repo | MIT License

", api_name="mdxnet_separation", ) demo = gr.TabbedInterface([tab_1, tab_2], ["Demucs", "MDXNET"]) demo.launch()