Politrees commited on
Commit
c52c271
·
verified ·
1 Parent(s): 35e695a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -796
app.py CHANGED
@@ -1,797 +1,3 @@
1
- import os
2
- import re
3
- import torch
4
- import shutil
5
- import logging
6
- import subprocess
7
- import gradio as gr
8
 
9
- from audio_separator.separator import Separator
10
-
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- use_autocast = device == "cuda"
13
-
14
- #=========================#
15
- # Roformer Models #
16
- #=========================#
17
- ROFORMER_MODELS = {
18
- # BS Roformer
19
- 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
20
- 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
21
- 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
22
- 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
23
- 'BS Roformer | Chorus Male-Female by Sucial': 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',
24
- 'BS Roformer | Male-Female by aufr33': 'bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt',
25
- 'BS Roformer | Vocals by Gabox': 'bs_roformer_vocals_gabox.ckpt',
26
-
27
- # MelBand Roformer
28
- 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
29
- 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
30
- 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
31
- 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
32
- 'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
33
- 'MelBand Roformer | Aspiration by Sucial': 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',
34
- 'MelBand Roformer | Aspiration Less Aggressive by Sucial': 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',
35
- 'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris': 'mel_band_roformer_bleed_suppressor_v1.ckpt',
36
- 'MelBand Roformer | De-Reverb by anvuew': 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',
37
- 'MelBand Roformer | De-Reverb Mono by anvuew': 'dereverb_mel_band_roformer_mono_anvuew.ckpt',
38
- 'MelBand Roformer | De-Reverb Less Aggressive by anvuew': 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',
39
- 'MelBand Roformer | De-Reverb-Echo by Sucial': 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',
40
- 'MelBand Roformer | De-Reverb-Echo V2 by Sucial': 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',
41
- 'MelBand Roformer | De-Reverb Big by Sucial': 'dereverb_big_mbr_ep_362.ckpt',
42
- 'MelBand Roformer | De-Reverb Super Big by Sucial': 'dereverb_super_big_mbr_ep_346.ckpt',
43
- 'MelBand Roformer | De-Reverb-Echo Fused by Sucial': 'dereverb_echo_mbr_fused.ckpt',
44
- 'MelBand Roformer | Denoise-Debleed by Gabox': 'mel_band_roformer_denoise_debleed_gabox.ckpt',
45
- 'MelBand Roformer | Instrumental by becruily': 'mel_band_roformer_instrumental_becruily.ckpt',
46
- 'MelBand Roformer | Instrumental by Gabox': 'mel_band_roformer_instrumental_gabox.ckpt',
47
- 'MelBand Roformer | Instrumental 2 by Gabox': 'mel_band_roformer_instrumental_2_gabox.ckpt',
48
- 'MelBand Roformer | Instrumental 3 by Gabox': 'mel_band_roformer_instrumental_3_gabox.ckpt',
49
- 'MelBand Roformer | Instrumental Bleedless V1 by Gabox': 'mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt',
50
- 'MelBand Roformer | Instrumental Bleedless V2 by Gabox': 'mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt',
51
- 'MelBand Roformer | Instrumental Fullness V1 by Gabox': 'mel_band_roformer_instrumental_fullness_v1_gabox.ckpt',
52
- 'MelBand Roformer | Instrumental Fullness V2 by Gabox': 'mel_band_roformer_instrumental_fullness_v2_gabox.ckpt',
53
- 'MelBand Roformer | Instrumental Fullness V3 by Gabox': 'mel_band_roformer_instrumental_fullness_v3_gabox.ckpt',
54
- 'MelBand Roformer | Instrumental Fullness Noisy V4 by Gabox': 'mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt',
55
- 'MelBand Roformer | INSTV5 by Gabox': 'mel_band_roformer_instrumental_instv5_gabox.ckpt',
56
- 'MelBand Roformer | INSTV5N by Gabox': 'mel_band_roformer_instrumental_instv5n_gabox.ckpt',
57
- 'MelBand Roformer | INSTV6 by Gabox': 'mel_band_roformer_instrumental_instv6_gabox.ckpt',
58
- 'MelBand Roformer | INSTV6N by Gabox': 'mel_band_roformer_instrumental_instv6n_gabox.ckpt',
59
- 'MelBand Roformer | INSTV7 by Gabox': 'mel_band_roformer_instrumental_instv7_gabox.ckpt',
60
- 'MelBand Roformer | Vocals by becruily': 'mel_band_roformer_vocals_becruily.ckpt',
61
- 'MelBand Roformer | Vocals by Kimberley Jensen': 'vocals_mel_band_roformer.ckpt',
62
- 'MelBand Roformer | Vocals Fullness by Aname': 'mel_band_roformer_vocal_fullness_aname.ckpt',
63
- 'MelBand Roformer | Vocals by Gabox': 'mel_band_roformer_vocals_gabox.ckpt',
64
- 'MelBand Roformer | Vocals FV1 by Gabox': 'mel_band_roformer_vocals_fv1_gabox.ckpt',
65
- 'MelBand Roformer | Vocals FV2 by Gabox': 'mel_band_roformer_vocals_fv2_gabox.ckpt',
66
- 'MelBand Roformer | Vocals FV3 by Gabox': 'mel_band_roformer_vocals_fv3_gabox.ckpt',
67
- 'MelBand Roformer | Vocals FV4 by Gabox': 'mel_band_roformer_vocals_fv4_gabox.ckpt',
68
- 'MelBand Roformer | Karaoke by Gabox': 'mel_band_roformer_karaoke_gabox.ckpt',
69
-
70
- # MelBand Roformer Kim
71
- 'MelBand Roformer Kim | FT by unwa': 'mel_band_roformer_kim_ft_unwa.ckpt',
72
- 'MelBand Roformer Kim | FT 2 by unwa': 'mel_band_roformer_kim_ft2_unwa.ckpt',
73
- 'MelBand Roformer Kim | FT 2 Bleedless by unwa': 'mel_band_roformer_kim_ft2_bleedless_unwa.ckpt',
74
- 'MelBand Roformer Kim | Big Beta 4 FT by unwa': 'melband_roformer_big_beta4.ckpt',
75
- 'MelBand Roformer Kim | Big Beta 5e FT by unwa': 'melband_roformer_big_beta5e.ckpt',
76
- 'MelBand Roformer Kim | Big Beta 6 by unwa': 'melband_roformer_big_beta6.ckpt',
77
- 'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt',
78
- 'MelBand Roformer Kim | Inst V1 (E) by Unwa': 'melband_roformer_inst_v1e.ckpt',
79
- 'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt',
80
- 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt',
81
- 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt',
82
- 'MelBand Roformer Kim | SYHFT by SYH99999': 'MelBandRoformerSYHFT.ckpt',
83
- 'MelBand Roformer Kim | SYHFT V2 by SYH99999': 'MelBandRoformerSYHFTV2.ckpt',
84
- 'MelBand Roformer Kim | SYHFT V2.5 by SYH99999': 'MelBandRoformerSYHFTV2.5.ckpt',
85
- 'MelBand Roformer Kim | SYHFT V3 by SYH99999': 'MelBandRoformerSYHFTV3Epsilon.ckpt',
86
- 'MelBand Roformer Kim | Big SYHFT V1 by SYH99999': 'MelBandRoformerBigSYHFTV1.ckpt',
87
- }
88
- #=========================#
89
- # MDX23C Models #
90
- #=========================#
91
- MDX23C_MODELS = {
92
- 'MDX23C DrumSep by aufr33-jarredou': 'MDX23C-DrumSep-aufr33-jarredou.ckpt',
93
- 'MDX23C De-Reverb by aufr33-jarredou': 'MDX23C-De-Reverb-aufr33-jarredou.ckpt',
94
- 'MDX23C-InstVoc HQ': 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
95
- 'VIP | MDX23C-InstVoc HQ 2': 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
96
- 'VIP | MDX23C_D1581': 'MDX23C_D1581.ckpt',
97
- }
98
- #=========================#
99
- # MDXN-NET Models #
100
- #=========================#
101
- MDXNET_MODELS = {
102
- 'UVR-MDX-NET 1': 'UVR_MDXNET_1_9703.onnx',
103
- 'UVR-MDX-NET 2': 'UVR_MDXNET_2_9682.onnx',
104
- 'UVR-MDX-NET 3': 'UVR_MDXNET_3_9662.onnx',
105
- 'UVR_MDXNET_9482': 'UVR_MDXNET_9482.onnx',
106
- 'UVR-MDX-NET Inst 1': 'UVR-MDX-NET-Inst_1.onnx',
107
- 'UVR-MDX-NET Inst 2': 'UVR-MDX-NET-Inst_2.onnx',
108
- 'UVR-MDX-NET Inst 3': 'UVR-MDX-NET-Inst_3.onnx',
109
- 'UVR-MDX-NET Inst HQ 1': 'UVR-MDX-NET-Inst_HQ_1.onnx',
110
- 'UVR-MDX-NET Inst HQ 2': 'UVR-MDX-NET-Inst_HQ_2.onnx',
111
- 'UVR-MDX-NET Inst HQ 3': 'UVR-MDX-NET-Inst_HQ_3.onnx',
112
- 'UVR-MDX-NET Inst HQ 4': 'UVR-MDX-NET-Inst_HQ_4.onnx',
113
- 'UVR-MDX-NET Inst HQ 5': 'UVR-MDX-NET-Inst_HQ_5.onnx',
114
- 'UVR-MDX-NET Inst Main': 'UVR-MDX-NET-Inst_Main.onnx',
115
- 'UVR-MDX-NET Karaoke': 'UVR_MDXNET_KARA.onnx',
116
- 'UVR-MDX-NET Karaoke 2': 'UVR_MDXNET_KARA_2.onnx',
117
- 'UVR-MDX-NET Main': 'UVR_MDXNET_Main.onnx',
118
- 'UVR-MDX-NET Voc FT': 'UVR-MDX-NET-Voc_FT.onnx',
119
- 'Kim Inst': 'Kim_Inst.onnx',
120
- 'Kim Vocal 1': 'Kim_Vocal_1.onnx',
121
- 'Kim Vocal 2': 'Kim_Vocal_2.onnx',
122
- 'kuielab_a_bass': 'kuielab_a_bass.onnx',
123
- 'kuielab_a_drums': 'kuielab_a_drums.onnx',
124
- 'kuielab_a_other': 'kuielab_a_other.onnx',
125
- 'kuielab_a_vocals': 'kuielab_a_vocals.onnx',
126
- 'kuielab_b_bass': 'kuielab_b_bass.onnx',
127
- 'kuielab_b_drums': 'kuielab_b_drums.onnx',
128
- 'kuielab_b_other': 'kuielab_b_other.onnx',
129
- 'kuielab_b_vocals': 'kuielab_b_vocals.onnx',
130
- 'Reverb HQ By FoxJoy': 'Reverb_HQ_By_FoxJoy.onnx',
131
- 'VIP | UVR-MDX-NET_Inst_82_beta': 'UVR-MDX-NET_Inst_82_beta.onnx',
132
- 'VIP | UVR-MDX-NET_Inst_90_beta': 'UVR-MDX-NET_Inst_90_beta.onnx',
133
- 'VIP | UVR-MDX-NET_Inst_187_beta': 'UVR-MDX-NET_Inst_187_beta.onnx',
134
- 'VIP | UVR-MDX-NET-Inst_full_292': 'UVR-MDX-NET-Inst_full_292.onnx',
135
- 'VIP | UVR-MDX-NET_Main_340': 'UVR-MDX-NET_Main_340.onnx',
136
- 'VIP | UVR-MDX-NET_Main_390': 'UVR-MDX-NET_Main_390.onnx',
137
- 'VIP | UVR-MDX-NET_Main_406': 'UVR-MDX-NET_Main_406.onnx',
138
- 'VIP | UVR-MDX-NET_Main_427': 'UVR-MDX-NET_Main_427.onnx',
139
- 'VIP | UVR-MDX-NET_Main_438': 'UVR-MDX-NET_Main_438.onnx',
140
- }
141
- #========================#
142
- # VR-ARCH Models #
143
- #========================#
144
- VR_ARCH_MODELS = {
145
- '1_HP-UVR': '1_HP-UVR.pth',
146
- '2_HP-UVR': '2_HP-UVR.pth',
147
- '3_HP-Vocal-UVR': '3_HP-Vocal-UVR.pth',
148
- '4_HP-Vocal-UVR': '4_HP-Vocal-UVR.pth',
149
- '5_HP-Karaoke-UVR': '5_HP-Karaoke-UVR.pth',
150
- '6_HP-Karaoke-UVR': '6_HP-Karaoke-UVR.pth',
151
- '7_HP2-UVR': '7_HP2-UVR.pth',
152
- '8_HP2-UVR': '8_HP2-UVR.pth',
153
- '9_HP2-UVR': '9_HP2-UVR.pth',
154
- '10_SP-UVR-2B-32000-1': '10_SP-UVR-2B-32000-1.pth',
155
- '11_SP-UVR-2B-32000-2': '11_SP-UVR-2B-32000-2.pth',
156
- '12_SP-UVR-3B-44100': '12_SP-UVR-3B-44100.pth',
157
- '13_SP-UVR-4B-44100-1': '13_SP-UVR-4B-44100-1.pth',
158
- '14_SP-UVR-4B-44100-2': '14_SP-UVR-4B-44100-2.pth',
159
- '15_SP-UVR-MID-44100-1': '15_SP-UVR-MID-44100-1.pth',
160
- '16_SP-UVR-MID-44100-2': '16_SP-UVR-MID-44100-2.pth',
161
- '17_HP-Wind_Inst-UVR': '17_HP-Wind_Inst-UVR.pth',
162
- 'MGM_HIGHEND_v4': 'MGM_HIGHEND_v4.pth',
163
- 'MGM_LOWEND_A_v4': 'MGM_LOWEND_A_v4.pth',
164
- 'MGM_LOWEND_B_v4': 'MGM_LOWEND_B_v4.pth',
165
- 'MGM_MAIN_v4': 'MGM_MAIN_v4.pth',
166
- 'UVR-BVE-4B_SN-44100-1': 'UVR-BVE-4B_SN-44100-1.pth',
167
- 'UVR-De-Reverb by aufr33-jarredou': 'UVR-De-Reverb-aufr33-jarredou.pth',
168
- 'UVR-De-Echo-Aggressive by FoxJoy': 'UVR-De-Echo-Aggressive.pth',
169
- 'UVR-De-Echo-Normal by FoxJoy': 'UVR-De-Echo-Normal.pth',
170
- 'UVR-DeEcho-DeReverb by FoxJoy': 'UVR-DeEcho-DeReverb.pth',
171
- 'UVR-DeNoise-Lite by FoxJoy': 'UVR-DeNoise-Lite.pth',
172
- 'UVR-DeNoise by FoxJoy': 'UVR-DeNoise.pth',
173
- }
174
- #=======================#
175
- # DEMUCS Models #
176
- #=======================#
177
- DEMUCS_MODELS = {
178
- 'htdemucs': 'htdemucs.yaml',
179
- 'htdemucs_6s': 'htdemucs_6s.yaml',
180
- 'htdemucs_ft': 'htdemucs_ft.yaml',
181
- 'hdemucs_mmi': 'hdemucs_mmi.yaml',
182
- }
183
-
184
- OUTPUT_FORMAT = ["wav", "flac", "mp3", "ogg", "opus", "m4a", "aiff", "ac3"]
185
-
186
- def print_message(input_file, model_name):
187
- """Prints information about the audio separation process."""
188
- base_name = os.path.splitext(os.path.basename(input_file))[0]
189
- print("\n")
190
- print("🎵 Audio-Separator 🎵")
191
- print("Input audio:", base_name)
192
- print("Separation Model:", model_name)
193
- print("Audio Separation Process...")
194
-
195
- def prepare_output_dir(input_file, output_dir):
196
- """Create a directory for the output files and clean it if it already exists."""
197
- base_name = os.path.splitext(os.path.basename(input_file))[0]
198
- out_dir = os.path.join(output_dir, base_name)
199
- try:
200
- if os.path.exists(out_dir):
201
- shutil.rmtree(out_dir)
202
- os.makedirs(out_dir)
203
- except Exception as e:
204
- raise RuntimeError(f"Failed to prepare output directory {out_dir}: {e}")
205
- return out_dir
206
-
207
- def rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model):
208
- base_name = os.path.splitext(os.path.basename(audio))[0]
209
- stems = {
210
- "Vocals": vocals_stem.replace("NAME", base_name).replace("STEM", "Vocals").replace("MODEL", model),
211
- "Instrumental": instrumental_stem.replace("NAME", base_name).replace("STEM", "Instrumental").replace("MODEL", model),
212
- "Drums": drums_stem.replace("NAME", base_name).replace("STEM", "Drums").replace("MODEL", model),
213
- "Bass": bass_stem.replace("NAME", base_name).replace("STEM", "Bass").replace("MODEL", model),
214
- "Other": other_stem.replace("NAME", base_name).replace("STEM", "Other").replace("MODEL", model),
215
- "Guitar": guitar_stem.replace("NAME", base_name).replace("STEM", "Guitar").replace("MODEL", model),
216
- "Piano": piano_stem.replace("NAME", base_name).replace("STEM", "Piano").replace("MODEL", model),
217
- }
218
- return stems
219
-
220
- def leaderboard(list_filter, list_limit):
221
- try:
222
- result = subprocess.run(
223
- ["audio-separator", "-l", f"--list_filter={list_filter}", f"--list_limit={list_limit}"],
224
- capture_output=True,
225
- text=True,
226
- )
227
- if result.returncode != 0:
228
- return f"Error: {result.stderr}"
229
-
230
- return "<table border='1'>" + "".join(
231
- f"<tr style='{'font-weight: bold; font-size: 1.2em;' if i == 0 else ''}'>" +
232
- "".join(f"<td>{cell}</td>" for cell in re.split(r"\s{2,}", line.strip())) +
233
- "</tr>"
234
- for i, line in enumerate(re.findall(r"^(?!-+)(.+)$", result.stdout.strip(), re.MULTILINE))
235
- ) + "</table>"
236
-
237
- except Exception as e:
238
- return f"Error: {e}"
239
-
240
- def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
241
- """Separate audio using Roformer model."""
242
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
243
- print_message(audio, model_key)
244
- model = ROFORMER_MODELS[model_key]
245
- try:
246
- out_dir = prepare_output_dir(audio, out_dir)
247
- separator = Separator(
248
- log_level=logging.WARNING,
249
- model_file_dir=model_dir,
250
- output_dir=out_dir,
251
- output_format=out_format,
252
- normalization_threshold=norm_thresh,
253
- amplification_threshold=amp_thresh,
254
- use_autocast=use_autocast,
255
- mdxc_params={
256
- "segment_size": seg_size,
257
- "override_model_segment_size": override_seg_size,
258
- "batch_size": batch_size,
259
- "overlap": overlap,
260
- "pitch_shift": pitch_shift,
261
- }
262
- )
263
-
264
- progress(0.2, desc="Model loaded...")
265
- separator.load_model(model_filename=model)
266
-
267
- progress(0.7, desc="Audio separated...")
268
- separation = separator.separate(audio, stemname)
269
- print(f"Separation complete!\nResults: {', '.join(separation)}")
270
-
271
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
272
- return stems[0], stems[1]
273
- except Exception as e:
274
- raise RuntimeError(f"Roformer separation failed: {e}") from e
275
-
276
- def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
277
- """Separate audio using MDX23C model."""
278
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
279
- print_message(audio, model_key)
280
- model = MDX23C_MODELS[model_key]
281
- try:
282
- out_dir = prepare_output_dir(audio, out_dir)
283
- separator = Separator(
284
- log_level=logging.WARNING,
285
- model_file_dir=model_dir,
286
- output_dir=out_dir,
287
- output_format=out_format,
288
- normalization_threshold=norm_thresh,
289
- amplification_threshold=amp_thresh,
290
- use_autocast=use_autocast,
291
- mdxc_params={
292
- "segment_size": seg_size,
293
- "override_model_segment_size": override_seg_size,
294
- "batch_size": batch_size,
295
- "overlap": overlap,
296
- "pitch_shift": pitch_shift,
297
- }
298
- )
299
-
300
- progress(0.2, desc="Model loaded...")
301
- separator.load_model(model_filename=model)
302
-
303
- progress(0.7, desc="Audio separated...")
304
- separation = separator.separate(audio, stemname)
305
- print(f"Separation complete!\nResults: {', '.join(separation)}")
306
-
307
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
308
- return stems[0], stems[1]
309
- except Exception as e:
310
- raise RuntimeError(f"MDX23C separation failed: {e}") from e
311
-
312
- def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
313
- """Separate audio using MDX-NET model."""
314
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
315
- print_message(audio, model_key)
316
- model = MDXNET_MODELS[model_key]
317
- try:
318
- out_dir = prepare_output_dir(audio, out_dir)
319
- separator = Separator(
320
- log_level=logging.WARNING,
321
- model_file_dir=model_dir,
322
- output_dir=out_dir,
323
- output_format=out_format,
324
- normalization_threshold=norm_thresh,
325
- amplification_threshold=amp_thresh,
326
- use_autocast=use_autocast,
327
- mdx_params={
328
- "hop_length": hop_length,
329
- "segment_size": seg_size,
330
- "overlap": overlap,
331
- "batch_size": batch_size,
332
- "enable_denoise": denoise,
333
- }
334
- )
335
-
336
- progress(0.2, desc="Model loaded...")
337
- separator.load_model(model_filename=model)
338
-
339
- progress(0.7, desc="Audio separated...")
340
- separation = separator.separate(audio, stemname)
341
- print(f"Separation complete!\nResults: {', '.join(separation)}")
342
-
343
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
344
- return stems[0], stems[1]
345
- except Exception as e:
346
- raise RuntimeError(f"MDX-NET separation failed: {e}") from e
347
-
348
- def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
349
- """Separate audio using VR ARCH model."""
350
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
351
- print_message(audio, model_key)
352
- model = VR_ARCH_MODELS[model_key]
353
- try:
354
- out_dir = prepare_output_dir(audio, out_dir)
355
- separator = Separator(
356
- log_level=logging.WARNING,
357
- model_file_dir=model_dir,
358
- output_dir=out_dir,
359
- output_format=out_format,
360
- normalization_threshold=norm_thresh,
361
- amplification_threshold=amp_thresh,
362
- use_autocast=use_autocast,
363
- vr_params={
364
- "batch_size": batch_size,
365
- "window_size": window_size,
366
- "aggression": aggression,
367
- "enable_tta": tta,
368
- "enable_post_process": post_process,
369
- "post_process_threshold": post_process_threshold,
370
- "high_end_process": high_end_process,
371
- }
372
- )
373
-
374
- progress(0.2, desc="Model loaded...")
375
- separator.load_model(model_filename=model)
376
-
377
- progress(0.7, desc="Audio separated...")
378
- separation = separator.separate(audio, stemname)
379
- print(f"Separation complete!\nResults: {', '.join(separation)}")
380
-
381
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
382
- return stems[0], stems[1]
383
- except Exception as e:
384
- raise RuntimeError(f"VR ARCH separation failed: {e}") from e
385
-
386
- def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
387
- """Separate audio using Demucs model."""
388
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
389
- print_message(audio, model_key)
390
- model = DEMUCS_MODELS[model_key]
391
- try:
392
- out_dir = prepare_output_dir(audio, out_dir)
393
- separator = Separator(
394
- log_level=logging.WARNING,
395
- model_file_dir=model_dir,
396
- output_dir=out_dir,
397
- output_format=out_format,
398
- normalization_threshold=norm_thresh,
399
- amplification_threshold=amp_thresh,
400
- use_autocast=use_autocast,
401
- demucs_params={
402
- "segment_size": seg_size,
403
- "shifts": shifts,
404
- "overlap": overlap,
405
- "segments_enabled": segments_enabled,
406
- }
407
- )
408
-
409
- progress(0.2, desc="Model loaded...")
410
- separator.load_model(model_filename=model)
411
-
412
- progress(0.7, desc="Audio separated...")
413
- separation = separator.separate(audio, stemname)
414
- print(f"Separation complete!\nResults: {', '.join(separation)}")
415
-
416
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
417
-
418
- if model_key == "htdemucs_6s":
419
- return stems[0], stems[1], stems[2], stems[3], stems[4], stems[5]
420
- else:
421
- return stems[0], stems[1], stems[2], stems[3], None, None
422
- except Exception as e:
423
- raise RuntimeError(f"Demucs separation failed: {e}") from e
424
-
425
- def update_stems(model):
426
- """Update the visibility of stem outputs based on the selected Demucs model."""
427
- if model == "htdemucs_6s":
428
- return gr.update(visible=True)
429
- else:
430
- return gr.update(visible=False)
431
-
432
- def show_hide_params(param):
433
- """Update the visibility of a parameter based on the checkbox state."""
434
- return gr.update(visible=param)
435
-
436
- with gr.Blocks(
437
- title="🎵 Audio-Separator by Politrees 🎵",
438
- css="footer{display:none !important}",
439
- theme=gr.themes.Default(
440
- spacing_size="sm",
441
- radius_size="lg",
442
- )
443
- ) as app:
444
- gr.HTML("<h1><center> 🎵 Audio-Separator by Politrees 🎵 </center></h1>")
445
-
446
- with gr.Tab("Roformer"):
447
- with gr.Group():
448
- with gr.Row():
449
- roformer_model = gr.Dropdown(value="MelBand Roformer Kim | Big Beta 5e FT by unwa", label="Select the Model", choices=list(ROFORMER_MODELS.keys()), scale=3)
450
- roformer_output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMAT, label="Output Format", info="The format of the output audio file.", scale=1)
451
- with gr.Accordion("Advanced settings", open=False):
452
- with gr.Column(variant='panel'):
453
- with gr.Group():
454
- roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
455
- with gr.Row():
456
- roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
457
- roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.", show_reset_button=False)
458
- roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
459
- with gr.Column(variant='panel'):
460
- with gr.Group():
461
- with gr.Row():
462
- roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
463
- roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
464
- roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
465
- with gr.Row():
466
- roformer_audio = gr.Audio(label="Input Audio", type="filepath")
467
- with gr.Row():
468
- roformer_button = gr.Button("Separate!", variant="primary")
469
- with gr.Row():
470
- roformer_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
471
- roformer_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
472
-
473
- with gr.Tab("MDX23C"):
474
- with gr.Group():
475
- with gr.Row():
476
- mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
477
- mdx23c_output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMAT, label="Output Format", info="The format of the output audio file.", scale=1)
478
- with gr.Accordion("Advanced settings", open=False):
479
- with gr.Column(variant='panel'):
480
- with gr.Group():
481
- mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
482
- with gr.Row():
483
- mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
484
- mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
485
- mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
486
- with gr.Column(variant='panel'):
487
- with gr.Group():
488
- with gr.Row():
489
- mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
490
- mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
491
- mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
492
- with gr.Row():
493
- mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
494
- with gr.Row():
495
- mdx23c_button = gr.Button("Separate!", variant="primary")
496
- with gr.Row():
497
- mdx23c_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
498
- mdx23c_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
499
-
500
- with gr.Tab("MDX-NET"):
501
- with gr.Group():
502
- with gr.Row():
503
- mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
504
- mdx_output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMAT, label="Output Format", info="The format of the output audio file.", scale=1)
505
- with gr.Accordion("Advanced settings", open=False):
506
- with gr.Column(variant='panel'):
507
- with gr.Group():
508
- mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
509
- with gr.Row():
510
- mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.", show_reset_button=False)
511
- mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False)
512
- mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
513
- with gr.Column(variant='panel'):
514
- with gr.Group():
515
- with gr.Row():
516
- mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
517
- mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
518
- mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
519
- with gr.Row():
520
- mdx_audio = gr.Audio(label="Input Audio", type="filepath")
521
- with gr.Row():
522
- mdx_button = gr.Button("Separate!", variant="primary")
523
- with gr.Row():
524
- mdx_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
525
- mdx_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
526
-
527
- with gr.Tab("VR ARCH"):
528
- with gr.Group():
529
- with gr.Row():
530
- vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
531
- vr_output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMAT, label="Output Format", info="The format of the output audio file.", scale=1)
532
- with gr.Accordion("Advanced settings", open=False):
533
- with gr.Column(variant='panel'):
534
- with gr.Group():
535
- with gr.Row():
536
- vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
537
- vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
538
- vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
539
- with gr.Row():
540
- vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", show_reset_button=False, visible=False)
541
- vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.", show_reset_button=False)
542
- vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.", show_reset_button=False)
543
- with gr.Column(variant='panel'):
544
- with gr.Group():
545
- with gr.Row():
546
- vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
547
- vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
548
- vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
549
- with gr.Row():
550
- vr_audio = gr.Audio(label="Input Audio", type="filepath")
551
- with gr.Row():
552
- vr_button = gr.Button("Separate!", variant="primary")
553
- with gr.Row():
554
- vr_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
555
- vr_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
556
-
557
- with gr.Tab("Demucs"):
558
- with gr.Group():
559
- with gr.Row():
560
- demucs_model = gr.Dropdown(value="htdemucs_ft", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
561
- demucs_output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMAT, label="Output Format", info="The format of the output audio file.", scale=1)
562
- with gr.Accordion("Advanced settings", open=False):
563
- with gr.Column(variant='panel'):
564
- with gr.Group():
565
- demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
566
- with gr.Row():
567
- demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.", show_reset_button=False)
568
- demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.", show_reset_button=False)
569
- demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.", show_reset_button=False)
570
- with gr.Column(variant='panel'):
571
- with gr.Group():
572
- with gr.Row():
573
- demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
574
- demucs_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
575
- with gr.Row():
576
- demucs_audio = gr.Audio(label="Input Audio", type="filepath")
577
- with gr.Row():
578
- demucs_button = gr.Button("Separate!", variant="primary")
579
- with gr.Row():
580
- demucs_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
581
- demucs_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
582
- with gr.Row():
583
- demucs_stem3 = gr.Audio(label="Stem 3", type="filepath", interactive=False)
584
- demucs_stem4 = gr.Audio(label="Stem 4", type="filepath", interactive=False)
585
- with gr.Row(visible=False) as stem6:
586
- demucs_stem5 = gr.Audio(label="Stem 5", type="filepath", interactive=False)
587
- demucs_stem6 = gr.Audio(label="Stem 6", type="filepath", interactive=False)
588
-
589
- with gr.Tab("Settings"):
590
- with gr.Group():
591
- with gr.Row():
592
- model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="Directory to cache model files", info="The directory where model files are stored.", placeholder="/tmp/audio-separator-models/")
593
- output_dir = gr.Textbox(value="output", label="File output directory", info="The directory where output files will be saved.", placeholder="output")
594
-
595
- with gr.Accordion("Rename Stems", open=False):
596
- gr.Markdown(
597
- """
598
- Keys for automatic determination of input file names, stems, and models to simplify the construction of output file names.
599
-
600
- Keys:
601
- * **NAME** - Input File Name
602
- * **STEM** - Stem Name (e.g., Vocals, Instrumental)
603
- * **MODEL** - Model Name (e.g., BS-Roformer-Viperx-1297)
604
-
605
- > Example:
606
- > * **Usage:** NAME_(STEM)_MODEL
607
- > * **Output File Name:** Music_(Vocals)_BS-Roformer-Viperx-1297
608
- """
609
- )
610
- with gr.Row():
611
- vocals_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Vocals Stem", info="Output example: Music_(Vocals)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
612
- instrumental_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Instrumental Stem", info="Output example: Music_(Instrumental)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
613
- other_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Other Stem", info="Output example: Music_(Other)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
614
- with gr.Row():
615
- drums_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Drums Stem", info="Output example: Music_(Drums)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
616
- bass_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Bass Stem", info="Output example: Music_(Bass)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
617
- with gr.Row():
618
- guitar_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Guitar Stem", info="Output example: Music_(Guitar)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
619
- piano_stem = gr.Textbox(value="NAME_(STEM)_MODEL", label="Piano Stem", info="Output example: Music_(Piano)_BS-Roformer-Viperx-1297", placeholder="NAME_(STEM)_MODEL")
620
-
621
- with gr.Tab("Leaderboard"):
622
- with gr.Group():
623
- with gr.Row(equal_height=True):
624
- list_filter = gr.Dropdown(value="vocals", choices=["vocals", "instrumental", "drums", "bass", "guitar", "piano", "other"], label="List filter", info="Filter and sort the model list by 'stem'")
625
- list_limit = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="List limit", info="Limit the number of models shown.", show_reset_button=False)
626
- list_button = gr.Button("Show list", variant="primary")
627
-
628
- output_list = gr.HTML(label="Leaderboard")
629
-
630
- with gr.Tab("Credits"):
631
- gr.Markdown(
632
- """
633
- This Space created by **[Politrees](https://github.com/Bebra777228)**.
634
- * python-audio-separator by **[beveradb](https://github.com/beveradb)**.
635
- * Thanks to **[NeoPy](https://huggingface.co/NeoPy)** for the help with the code.
636
- """
637
- )
638
-
639
- roformer_override_seg_size.change(show_hide_params, inputs=[roformer_override_seg_size], outputs=[roformer_seg_size])
640
- mdx23c_override_seg_size.change(show_hide_params, inputs=[mdx23c_override_seg_size], outputs=[mdx23c_seg_size])
641
- vr_post_process.change(show_hide_params, inputs=[vr_post_process], outputs=[vr_post_process_threshold])
642
-
643
- demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
644
-
645
- list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)
646
-
647
- roformer_button.click(
648
- roformer_separator,
649
- inputs=[
650
- roformer_audio,
651
- roformer_model,
652
- roformer_seg_size,
653
- roformer_override_seg_size,
654
- roformer_overlap,
655
- roformer_pitch_shift,
656
- model_file_dir,
657
- output_dir,
658
- roformer_output_format,
659
- roformer_norm_threshold,
660
- roformer_amp_threshold,
661
- roformer_batch_size,
662
- vocals_stem,
663
- instrumental_stem,
664
- other_stem,
665
- drums_stem,
666
- bass_stem,
667
- guitar_stem,
668
- piano_stem,
669
- ],
670
- outputs=[
671
- roformer_stem1,
672
- roformer_stem2,
673
- ], concurrency_limit=1,
674
- )
675
- mdx23c_button.click(
676
- mdx23c_separator,
677
- inputs=[
678
- mdx23c_audio,
679
- mdx23c_model,
680
- mdx23c_seg_size,
681
- mdx23c_override_seg_size,
682
- mdx23c_overlap,
683
- mdx23c_pitch_shift,
684
- model_file_dir,
685
- output_dir,
686
- mdx23c_output_format,
687
- mdx23c_norm_threshold,
688
- mdx23c_amp_threshold,
689
- mdx23c_batch_size,
690
- vocals_stem,
691
- instrumental_stem,
692
- other_stem,
693
- drums_stem,
694
- bass_stem,
695
- guitar_stem,
696
- piano_stem,
697
- ],
698
- outputs=[
699
- mdx23c_stem1,
700
- mdx23c_stem2,
701
- ], concurrency_limit=1,
702
- )
703
- mdx_button.click(
704
- mdx_separator,
705
- inputs=[
706
- mdx_audio,
707
- mdx_model,
708
- mdx_hop_length,
709
- mdx_seg_size,
710
- mdx_overlap,
711
- mdx_denoise,
712
- model_file_dir,
713
- output_dir,
714
- mdx_output_format,
715
- mdx_norm_threshold,
716
- mdx_amp_threshold,
717
- mdx_batch_size,
718
- vocals_stem,
719
- instrumental_stem,
720
- other_stem,
721
- drums_stem,
722
- bass_stem,
723
- guitar_stem,
724
- piano_stem,
725
- ],
726
- outputs=[
727
- mdx_stem1,
728
- mdx_stem2,
729
- ], concurrency_limit=1,
730
- )
731
- vr_button.click(
732
- vr_separator,
733
- inputs=[
734
- vr_audio,
735
- vr_model,
736
- vr_window_size,
737
- vr_aggression,
738
- vr_tta,
739
- vr_post_process,
740
- vr_post_process_threshold,
741
- vr_high_end_process,
742
- model_file_dir,
743
- output_dir,
744
- vr_output_format,
745
- vr_norm_threshold,
746
- vr_amp_threshold,
747
- vr_batch_size,
748
- vocals_stem,
749
- instrumental_stem,
750
- other_stem,
751
- drums_stem,
752
- bass_stem,
753
- guitar_stem,
754
- piano_stem,
755
- ],
756
- outputs=[
757
- vr_stem1,
758
- vr_stem2,
759
- ], concurrency_limit=1,
760
- )
761
- demucs_button.click(
762
- demucs_separator,
763
- inputs=[
764
- demucs_audio,
765
- demucs_model,
766
- demucs_seg_size,
767
- demucs_shifts,
768
- demucs_overlap,
769
- demucs_segments_enabled,
770
- model_file_dir,
771
- output_dir,
772
- demucs_output_format,
773
- demucs_norm_threshold,
774
- demucs_amp_threshold,
775
- vocals_stem,
776
- instrumental_stem,
777
- other_stem,
778
- drums_stem,
779
- bass_stem,
780
- guitar_stem,
781
- piano_stem,
782
- ],
783
- outputs=[
784
- demucs_stem1,
785
- demucs_stem2,
786
- demucs_stem3,
787
- demucs_stem4,
788
- demucs_stem5,
789
- demucs_stem6,
790
- ], concurrency_limit=1,
791
- )
792
-
793
- def main():
794
- app.queue().launch(share=True, debug=True)
795
-
796
- if __name__ == "__main__":
797
- main()
 
1
+ from PolUVR.utils import PolUVR_UI
 
 
 
 
 
 
2
 
3
+ PolUVR_UI()