Spaces:

masszhou
/

bgmseparator

Running

App Files Files Community

bgmseparator / app.py

masszhou

update mdxnet outputs

47b5e1d 18 days ago

raw

history blame contribute delete

3.17 kB

	import os
	from pathlib import Path
	from huggingface_hub import hf_hub_download
	import gradio as gr
	from scipy.io.wavfile import write
	import torch
	from utils import convert_to_stereo_and_wav
	from uvr_processing import get_model_params, run_mdx


	MODEL_ID = "masszhou/mdxnet"
	MODELS_PATH = {
	"bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
	"basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
	"main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
	}


	def inference_mdx(audio_file: str) -> list[str]:
	mdx_model_params = get_model_params(Path("./mdx_models"))
	audio_file = convert_to_stereo_and_wav(Path(audio_file)) # resampling at 44100 Hz
	device_base = "cuda" if torch.cuda.is_available() else "cpu"
	output_dir = Path("./out/mdx")
	os.makedirs(output_dir, exist_ok=True)
	model_bgm_path = MODELS_PATH["bgm"]
	background_path, vocal_path = run_mdx(model_params=mdx_model_params,
	input_filename=audio_file,
	output_dir=output_dir,
	model_path=model_bgm_path,
	denoise=False,
	device_base=device_base,
	)

	return str(vocal_path), str(background_path)


	def inference_demucs(audio):
	sr = audio[0]
	audio_np = audio[1]
	os.makedirs("out", exist_ok=True)
	write('test.wav', audio[0], audio[1])
	os.system("python3 -m demucs.separate -n htdemucs --two-stems=vocals test.wav -o out")
	return "./out/htdemucs/test/vocals.wav","./out/htdemucs/test/no_vocals.wav"


	if __name__ == "__main__":
	tab_1 = gr.Interface(
	fn = inference_demucs,
	inputs = gr.Audio(type="numpy", label="Input"),
	outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")],
	title="Demucs Music Source Separation (v4)",
	article="<p style='text-align: center'><a href='https://arxiv.org/abs/1911.13254' target='_blank'>Music Source Separation in the Waveform Domain</a> \| <a href='https://github.com/facebookresearch/demucs' target='_blank'>Github Repo</a> \| <a href='https://github.com/facebookresearch/demucs/blob/main/LICENSE' target='_blank'>MIT License</a></p>",
	api_name="demucs_separation",
	)
	tab_2 = gr.Interface(
	fn = inference_mdx,
	inputs = gr.Audio(type="filepath", label="Input"),
	outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")],
	title="MDXNET Music Source Separation",
	article="<p style='text-align: center'><a href='https://arxiv.org/abs/2111.12203' target='_blank'>KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing</a> \| <a href='https://github.com/kuielab/mdx-net' target='_blank'>Github Repo</a> \| <a href='https://github.com/kuielab/mdx-net/blob/main/LICENSE' target='_blank'>MIT License</a></p>",
	api_name="mdxnet_separation",
	)
	demo = gr.TabbedInterface([tab_1, tab_2], ["Demucs", "MDXNET"])
	demo.launch()