Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
2761ffc
1
Parent(s):
a6f6f36
new model
Browse files
app.py
CHANGED
@@ -24,8 +24,7 @@ model = StyleTTS2(config_path, models_path).to(device)
|
|
24 |
|
25 |
# Core inference function
|
26 |
def process_inputs(text_prompt, reference_audio_paths,
|
27 |
-
n_merge,
|
28 |
-
denoise, t_denoise, split_dur):
|
29 |
|
30 |
speakers = {}
|
31 |
for i, path in enumerate(reference_audio_paths, 1):
|
@@ -36,11 +35,7 @@ def process_inputs(text_prompt, reference_audio_paths,
|
|
36 |
"speed": 1.1
|
37 |
}
|
38 |
|
39 |
-
|
40 |
-
r = model.generate(
|
41 |
-
text_prompt, speakers, denoise, t_denoise,
|
42 |
-
split_dur, "[id_1]", n_merge, randomness, smooth_dur
|
43 |
-
)
|
44 |
|
45 |
r = r / np.abs(r).max()
|
46 |
sf.write("output.wav", r, samplerate=24000)
|
@@ -68,12 +63,10 @@ with gr.Blocks(css=custom_css) as demo:
|
|
68 |
reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
|
69 |
# Parameters
|
70 |
with gr.Accordion("Advanced Settings", open=False):
|
71 |
-
denoise = gr.
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
smooth_dur = gr.Slider(0.0, 1.0, value=0.15, label="Smooth Duration")
|
76 |
-
split_dur = gr.Slider(0, 10, step=1, value=3, label="Split Ref Audio Duration")
|
77 |
|
78 |
submit_button = gr.Button("Synthesize")
|
79 |
synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")
|
@@ -84,11 +77,9 @@ with gr.Blocks(css=custom_css) as demo:
|
|
84 |
text_prompt,
|
85 |
reference_audios,
|
86 |
n_merge,
|
87 |
-
randomness,
|
88 |
-
smooth_dur,
|
89 |
denoise,
|
90 |
-
|
91 |
-
|
92 |
],
|
93 |
outputs=synthesized_audio
|
94 |
)
|
|
|
24 |
|
25 |
# Core inference function
|
26 |
def process_inputs(text_prompt, reference_audio_paths,
|
27 |
+
n_merge, denoise, avg_style,stabilize):
|
|
|
28 |
|
29 |
speakers = {}
|
30 |
for i, path in enumerate(reference_audio_paths, 1):
|
|
|
35 |
"speed": 1.1
|
36 |
}
|
37 |
|
38 |
+
r = model.generate(text_prompt, speakers, avg_style, stabilize, denoise, n_merge, "[id_1]")
|
|
|
|
|
|
|
|
|
39 |
|
40 |
r = r / np.abs(r).max()
|
41 |
sf.write("output.wav", r, samplerate=24000)
|
|
|
63 |
reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
|
64 |
# Parameters
|
65 |
with gr.Accordion("Advanced Settings", open=False):
|
66 |
+
denoise = gr.Slider(0.0, 1.0, value=0.6, label="Denoise Strength")
|
67 |
+
avg_style = gr.Checkbox(label="Use Average Styles", value=True)
|
68 |
+
stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=True)
|
69 |
+
n_merge = gr.Slider(10, 30, value=16, label="Min Words to Merge")
|
|
|
|
|
70 |
|
71 |
submit_button = gr.Button("Synthesize")
|
72 |
synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")
|
|
|
77 |
text_prompt,
|
78 |
reference_audios,
|
79 |
n_merge,
|
|
|
|
|
80 |
denoise,
|
81 |
+
avg_style,
|
82 |
+
stabilize
|
83 |
],
|
84 |
outputs=synthesized_audio
|
85 |
)
|