Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -63,5 +63,196 @@ with gr.Blocks(title="RVC UI") as app:
|
|
63 |
format1 = gr.Radio(label="Export file format",choices=["wav", "flac", "mp3", "m4a"],value="wav",interactive=True)
|
64 |
but1 = gr.Button("Convert", variant="primary")
|
65 |
vc_output3 = gr.Textbox(label="Output information",interactive=False)
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
app.launch()
|
|
|
63 |
format1 = gr.Radio(label="Export file format",choices=["wav", "flac", "mp3", "m4a"],value="wav",interactive=True)
|
64 |
but1 = gr.Button("Convert", variant="primary")
|
65 |
vc_output3 = gr.Textbox(label="Output information",interactive=False)
|
66 |
+
with gr.Tabs():
|
67 |
+
with gr.TabItem("Train"):
|
68 |
+
gr.Markdown("### Step 1. Fill in the experimental configuration.\nExperimental data is stored in the 'logs' folder, with each experiment having a separate folder. Manually enter the experiment name path, which contains the experimental configuration, logs, and trained model files.")
|
69 |
+
with gr.Row():
|
70 |
+
exp_dir1 = gr.Textbox(label="Enter the experiment name", value="my-voice")
|
71 |
+
author = gr.Textbox(label="Model Author (Nullable)")
|
72 |
+
np7 = gr.Slider(minimum=0,maximum=config.n_cpu,step=1,label="Number of CPU processes used for pitch extraction and data processing",value=int(np.ceil(config.n_cpu / 1.5)),interactive=True)
|
73 |
+
with gr.Row():
|
74 |
+
sr2 = gr.Radio(label=("Target sample rate"),choices=["40k", "48k"],value="40k",interactive=True)
|
75 |
+
if_f0_3 = gr.Radio(label="Whether the model has pitch guidance (required for singing, optional for speech)",choices=[("Yes"), ("No")],value=("Yes"),interactive=True)
|
76 |
+
version19 = gr.Radio(label=("Version"),choices=["v1", "v2"],value="v2",interactive=True,visible=True)
|
77 |
+
gr.Markdown("### Step 2. Audio processing. \n#### 1. Slicing.\nAutomatically traverse all files in the training folder that can be decoded into audio and perform slice normalization. Generates 2 wav folders in the experiment directory. Currently, only single-singer/speaker training is supported.")
|
78 |
+
with gr.Row():
|
79 |
+
with gr.Column():
|
80 |
+
trainset_dir4 = gr.Textbox(
|
81 |
+
label=i18n("Enter the path of the training folder"),
|
82 |
+
)
|
83 |
+
spk_id5 = gr.Slider(
|
84 |
+
minimum=0,
|
85 |
+
maximum=4,
|
86 |
+
step=1,
|
87 |
+
label=i18n("Please specify the speaker/singer ID"),
|
88 |
+
value=0,
|
89 |
+
interactive=True,
|
90 |
+
)
|
91 |
+
but1 = gr.Button(i18n("Process data"), variant="primary")
|
92 |
+
with gr.Column():
|
93 |
+
info1 = gr.Textbox(label=i18n("Output information"), value="")
|
94 |
+
but1.click(
|
95 |
+
preprocess_dataset,
|
96 |
+
[trainset_dir4, exp_dir1, sr2, np7],
|
97 |
+
[info1],
|
98 |
+
api_name="train_preprocess",
|
99 |
+
)
|
100 |
+
gr.Markdown(
|
101 |
+
value=i18n(
|
102 |
+
"#### 2. Feature extraction.\nUse CPU to extract pitch (if the model has pitch), use GPU to extract features (select GPU index)."
|
103 |
+
)
|
104 |
+
)
|
105 |
+
with gr.Row():
|
106 |
+
with gr.Column():
|
107 |
+
gpu_info9 = gr.Textbox(
|
108 |
+
label=i18n("GPU Information"),
|
109 |
+
value=gpu_info,
|
110 |
+
visible=F0GPUVisible,
|
111 |
+
)
|
112 |
+
gpus6 = gr.Textbox(
|
113 |
+
label=i18n(
|
114 |
+
"Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2"
|
115 |
+
),
|
116 |
+
value=gpus,
|
117 |
+
interactive=True,
|
118 |
+
visible=F0GPUVisible,
|
119 |
+
)
|
120 |
+
gpus_rmvpe = gr.Textbox(
|
121 |
+
label=i18n(
|
122 |
+
"Enter the GPU index(es) separated by '-', e.g., 0-0-1 to use 2 processes in GPU0 and 1 process in GPU1"
|
123 |
+
),
|
124 |
+
value="%s-%s" % (gpus, gpus),
|
125 |
+
interactive=True,
|
126 |
+
visible=F0GPUVisible,
|
127 |
+
)
|
128 |
+
f0method8 = gr.Radio(
|
129 |
+
label=i18n(
|
130 |
+
"Select the pitch extraction algorithm: when extracting singing, you can use 'pm' to speed up. For high-quality speech with fast performance, but worse CPU usage, you can use 'dio'. 'harvest' results in better quality but is slower. 'rmvpe' has the best results and consumes less CPU/GPU"
|
131 |
+
),
|
132 |
+
choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"],
|
133 |
+
value="rmvpe_gpu",
|
134 |
+
interactive=True,
|
135 |
+
)
|
136 |
+
with gr.Column():
|
137 |
+
but2 = gr.Button(i18n("Feature extraction"), variant="primary")
|
138 |
+
info2 = gr.Textbox(label=i18n("Output information"), value="")
|
139 |
+
f0method8.change(
|
140 |
+
fn=change_f0_method,
|
141 |
+
inputs=[f0method8],
|
142 |
+
outputs=[gpus_rmvpe],
|
143 |
+
)
|
144 |
+
but2.click(
|
145 |
+
extract_f0_feature,
|
146 |
+
[
|
147 |
+
gpus6,
|
148 |
+
np7,
|
149 |
+
f0method8,
|
150 |
+
if_f0_3,
|
151 |
+
exp_dir1,
|
152 |
+
version19,
|
153 |
+
gpus_rmvpe,
|
154 |
+
],
|
155 |
+
[info2],
|
156 |
+
api_name="train_extract_f0_feature",
|
157 |
+
)
|
158 |
+
gr.Markdown(
|
159 |
+
value=i18n(
|
160 |
+
"### Step 3. Start training.\nFill in the training settings and start training the model and index."
|
161 |
+
)
|
162 |
+
)
|
163 |
+
with gr.Row():
|
164 |
+
with gr.Column():
|
165 |
+
save_epoch10 = gr.Slider(
|
166 |
+
minimum=1,
|
167 |
+
maximum=50,
|
168 |
+
step=1,
|
169 |
+
label=i18n("Save frequency (save_every_epoch)"),
|
170 |
+
value=5,
|
171 |
+
interactive=True,
|
172 |
+
)
|
173 |
+
total_epoch11 = gr.Slider(
|
174 |
+
minimum=2,
|
175 |
+
maximum=1000,
|
176 |
+
step=1,
|
177 |
+
label=i18n("Total training epochs (total_epoch)"),
|
178 |
+
value=20,
|
179 |
+
interactive=True,
|
180 |
+
)
|
181 |
+
batch_size12 = gr.Slider(
|
182 |
+
minimum=1,
|
183 |
+
maximum=40,
|
184 |
+
step=1,
|
185 |
+
label=i18n("Batch size per GPU"),
|
186 |
+
value=default_batch_size,
|
187 |
+
interactive=True,
|
188 |
+
)
|
189 |
+
if_save_latest13 = gr.Radio(
|
190 |
+
label=i18n(
|
191 |
+
"Save only the latest '.ckpt' file to save disk space"
|
192 |
+
),
|
193 |
+
choices=[i18n("Yes"), i18n("No")],
|
194 |
+
value=i18n("No"),
|
195 |
+
interactive=True,
|
196 |
+
)
|
197 |
+
if_cache_gpu17 = gr.Radio(
|
198 |
+
label=i18n(
|
199 |
+
"Cache all training sets to GPU memory. Caching small datasets (less than 10 minutes) can speed up training, but caching large datasets will consume a lot of GPU memory and may not provide much speed improvement"
|
200 |
+
),
|
201 |
+
choices=[i18n("Yes"), i18n("No")],
|
202 |
+
value=i18n("No"),
|
203 |
+
interactive=True,
|
204 |
+
)
|
205 |
+
if_save_every_weights18 = gr.Radio(
|
206 |
+
label=i18n(
|
207 |
+
"Save a small final model to the 'weights' folder at each save point"
|
208 |
+
),
|
209 |
+
choices=[i18n("Yes"), i18n("No")],
|
210 |
+
value=i18n("No"),
|
211 |
+
interactive=True,
|
212 |
+
)
|
213 |
+
with gr.Column():
|
214 |
+
pretrained_G14 = gr.Textbox(
|
215 |
+
label="Load pre-trained base model G path",
|
216 |
+
value="assets/pretrained_v2/f0G40k.pth",
|
217 |
+
interactive=True,
|
218 |
+
)
|
219 |
+
pretrained_D15 = gr.Textbox(
|
220 |
+
label=i18n("Load pre-trained base model D path"),
|
221 |
+
value="assets/pretrained_v2/f0D40k.pth",
|
222 |
+
interactive=True,
|
223 |
+
)
|
224 |
+
gpus16 = gr.Textbox(
|
225 |
+
label=i18n(
|
226 |
+
"Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2"
|
227 |
+
),
|
228 |
+
value=gpus,
|
229 |
+
interactive=True,
|
230 |
+
)
|
231 |
+
sr2.change(
|
232 |
+
change_sr2,
|
233 |
+
[sr2, if_f0_3, version19],
|
234 |
+
[pretrained_G14, pretrained_D15],
|
235 |
+
)
|
236 |
+
version19.change(
|
237 |
+
change_version19,
|
238 |
+
[sr2, if_f0_3, version19],
|
239 |
+
[pretrained_G14, pretrained_D15, sr2],
|
240 |
+
)
|
241 |
+
if_f0_3.change(
|
242 |
+
change_f0,
|
243 |
+
[if_f0_3, sr2, version19],
|
244 |
+
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15],
|
245 |
+
)
|
246 |
+
|
247 |
+
but3 = gr.Button("Train model"), variant="primary")
|
248 |
+
but4 = gr.Button("Train feature index"), variant="primary")
|
249 |
+
but5 = gr.Button("One-click training"), variant="primary")
|
250 |
+
with gr.Row():
|
251 |
+
info3 = gr.Textbox(label=i18n("Output information"), value="")
|
252 |
+
but3.click(click_train,[exp_dir1,sr2,if_f0_3,spk_id5,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16,if_cache_gpu17,if_save_every_weights18,version19,author,],info3,api_name="train_start")
|
253 |
+
but4.click(train_index, [exp_dir1, version19], info3)
|
254 |
+
but5.click(train1key,[exp_dir1,sr2,if_f0_3,trainset_dir4,spk_id5,np7,f0method8,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16,if_cache_gpu17,if_save_every_weights18,version19,gpus_rmvpe,author],info3,api_name="train_start_all")
|
255 |
+
|
256 |
+
|
257 |
+
|
258 |
app.launch()
|