Update demo.py
Browse files
demo.py
CHANGED
@@ -184,30 +184,17 @@ with gr.Blocks() as audio_inf:
|
|
184 |
interactive=True,
|
185 |
type='filepath',
|
186 |
waveform_options={'waveform_color': '#a3ffc3', 'waveform_progress_color': '#e972ab'})
|
187 |
-
spk_id = gr.Number(label="Speaker ID (randomly picking a sample based on the ID - may result in subpar / broken audio)",
|
188 |
-
info="Input speaker ID (max 196 Ru / 2006 En) to use a random sample from that speaker on the server. 9999 disables.",
|
189 |
-
value=9999,
|
190 |
-
interactive=True)
|
191 |
-
|
192 |
-
random_spk_btn = gr.Button("Random")
|
193 |
|
194 |
|
195 |
with gr.Accordion("Advanced Parameters", open=False):
|
196 |
-
|
197 |
-
def update_audio_inf_defaults(is_english):
|
198 |
-
new_text_value = en_random_texts_list[0] if is_english else ru_random_texts_list[0]
|
199 |
-
new_spk_info = "Input speaker ID (max 2006 En) or use Randomize. 9999 disables." if is_english else "Input speaker ID (max 196 Ru) or use Randomize. 9999 disables."
|
200 |
-
new_spk_val = 9999
|
201 |
-
return gr.update(value=new_text_value), gr.update(info=new_spk_info, value=new_spk_val)
|
202 |
-
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
209 |
|
210 |
-
|
211 |
beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1,
|
212 |
label="Beta (Diffusion Strength vs. Reference)",
|
213 |
info="Diffusion parameter. Higher means LESS like the reference audio. 0 disables diffusion.",
|
@@ -231,7 +218,6 @@ with gr.Blocks() as audio_inf:
|
|
231 |
label="T (Duration / Temperature)",
|
232 |
info="inflence of previous sentence on the current one",
|
233 |
interactive=True)
|
234 |
-
|
235 |
|
236 |
with gr.Column(scale=1):
|
237 |
btn = gr.Button("Synthesize (Voice Guided)", variant="primary")
|
@@ -240,6 +226,18 @@ with gr.Blocks() as audio_inf:
|
|
240 |
waveform_options={'waveform_color': '#a3ffc3', 'waveform_progress_color': '#e972ab'})
|
241 |
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
btn.click(Client_Synthesize_Audio,
|
245 |
inputs=[inp, voice, voice_2, spk_id, multispeakersteps, embscale, beta, rate_of_speech, t, language_checkbox_audio],
|
@@ -455,7 +453,7 @@ with gr.Blocks(title="The Poor Man's TTS (Experimental 🔧)", theme="Respair/Sh
|
|
455 |
|
456 |
gr.TabbedInterface(
|
457 |
[audio_inf, longform, info_tab, model_details_tab],
|
458 |
-
['Voice-guided Synthesis',
|
459 |
title="The Poor Man's TTS (Experimental)",
|
460 |
theme="Respair/[email protected]"
|
461 |
)
|
|
|
184 |
interactive=True,
|
185 |
type='filepath',
|
186 |
waveform_options={'waveform_color': '#a3ffc3', 'waveform_progress_color': '#e972ab'})
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
|
189 |
with gr.Accordion("Advanced Parameters", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
+
spk_id = gr.Number(label="Speaker ID (randomly picking a sample based on the ID - may result in subpar / broken audio)",
|
192 |
+
info="Input speaker ID (max 196 Ru / 2006 En) to use a random sample from that speaker on the server. 9999 disables.",
|
193 |
+
value=9999,
|
194 |
+
interactive=True)
|
195 |
+
|
196 |
+
random_spk_btn = gr.Button("Random")
|
197 |
|
|
|
198 |
beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1,
|
199 |
label="Beta (Diffusion Strength vs. Reference)",
|
200 |
info="Diffusion parameter. Higher means LESS like the reference audio. 0 disables diffusion.",
|
|
|
218 |
label="T (Duration / Temperature)",
|
219 |
info="inflence of previous sentence on the current one",
|
220 |
interactive=True)
|
|
|
221 |
|
222 |
with gr.Column(scale=1):
|
223 |
btn = gr.Button("Synthesize (Voice Guided)", variant="primary")
|
|
|
226 |
waveform_options={'waveform_color': '#a3ffc3', 'waveform_progress_color': '#e972ab'})
|
227 |
|
228 |
|
229 |
+
def update_audio_inf_defaults(is_english):
|
230 |
+
new_text_value = en_random_texts_list[0] if is_english else ru_random_texts_list[0]
|
231 |
+
new_spk_info = "Input speaker ID (max 2006 En) or use Randomize. 9999 disables." if is_english else "Input speaker ID (max 196 Ru) or use Randomize. 9999 disables."
|
232 |
+
new_spk_val = 9999
|
233 |
+
return gr.update(value=new_text_value), gr.update(info=new_spk_info, value=new_spk_val)
|
234 |
+
|
235 |
+
|
236 |
+
language_checkbox_audio.change(update_audio_inf_defaults,
|
237 |
+
inputs=[language_checkbox_audio],
|
238 |
+
outputs=[inp, spk_id])
|
239 |
+
|
240 |
+
random_spk_btn.click(fn=generate_random_spk, inputs=[language_checkbox_audio], outputs=spk_id)
|
241 |
|
242 |
btn.click(Client_Synthesize_Audio,
|
243 |
inputs=[inp, voice, voice_2, spk_id, multispeakersteps, embscale, beta, rate_of_speech, t, language_checkbox_audio],
|
|
|
453 |
|
454 |
gr.TabbedInterface(
|
455 |
[audio_inf, longform, info_tab, model_details_tab],
|
456 |
+
['Voice-guided Synthesis','Text-guided Synthesis', 'Intuition & Tips', 'Model Details'],
|
457 |
title="The Poor Man's TTS (Experimental)",
|
458 |
theme="Respair/[email protected]"
|
459 |
)
|