dangtr0408 commited on
Commit
2761ffc
·
1 Parent(s): a6f6f36
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -24,8 +24,7 @@ model = StyleTTS2(config_path, models_path).to(device)
24
 
25
  # Core inference function
26
  def process_inputs(text_prompt, reference_audio_paths,
27
- n_merge, randomness, smooth_dur,
28
- denoise, t_denoise, split_dur):
29
 
30
  speakers = {}
31
  for i, path in enumerate(reference_audio_paths, 1):
@@ -36,11 +35,7 @@ def process_inputs(text_prompt, reference_audio_paths,
36
  "speed": 1.1
37
  }
38
 
39
- # Synthesize audio
40
- r = model.generate(
41
- text_prompt, speakers, denoise, t_denoise,
42
- split_dur, "[id_1]", n_merge, randomness, smooth_dur
43
- )
44
 
45
  r = r / np.abs(r).max()
46
  sf.write("output.wav", r, samplerate=24000)
@@ -68,12 +63,10 @@ with gr.Blocks(css=custom_css) as demo:
68
  reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
69
  # Parameters
70
  with gr.Accordion("Advanced Settings", open=False):
71
- denoise = gr.Checkbox(label="Apply Denoising", value=True)
72
- t_denoise = gr.Slider(0.0, 1.0, value=0.3, label="Denoise Strength")
73
- n_merge = gr.Slider(1, 30, value=16, label="Min Words to Merge")
74
- randomness = gr.Slider(0.0, 1.0, value=0.2, label="Randomness")
75
- smooth_dur = gr.Slider(0.0, 1.0, value=0.15, label="Smooth Duration")
76
- split_dur = gr.Slider(0, 10, step=1, value=3, label="Split Ref Audio Duration")
77
 
78
  submit_button = gr.Button("Synthesize")
79
  synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")
@@ -84,11 +77,9 @@ with gr.Blocks(css=custom_css) as demo:
84
  text_prompt,
85
  reference_audios,
86
  n_merge,
87
- randomness,
88
- smooth_dur,
89
  denoise,
90
- t_denoise,
91
- split_dur
92
  ],
93
  outputs=synthesized_audio
94
  )
 
24
 
25
  # Core inference function
26
  def process_inputs(text_prompt, reference_audio_paths,
27
+ n_merge, denoise, avg_style,stabilize):
 
28
 
29
  speakers = {}
30
  for i, path in enumerate(reference_audio_paths, 1):
 
35
  "speed": 1.1
36
  }
37
 
38
+ r = model.generate(text_prompt, speakers, avg_style, stabilize, denoise, n_merge, "[id_1]")
 
 
 
 
39
 
40
  r = r / np.abs(r).max()
41
  sf.write("output.wav", r, samplerate=24000)
 
63
  reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
64
  # Parameters
65
  with gr.Accordion("Advanced Settings", open=False):
66
+ denoise = gr.Slider(0.0, 1.0, value=0.6, label="Denoise Strength")
67
+ avg_style = gr.Checkbox(label="Use Average Styles", value=True)
68
+ stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=True)
69
+ n_merge = gr.Slider(10, 30, value=16, label="Min Words to Merge")
 
 
70
 
71
  submit_button = gr.Button("Synthesize")
72
  synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")
 
77
  text_prompt,
78
  reference_audios,
79
  n_merge,
 
 
80
  denoise,
81
+ avg_style,
82
+ stabilize
83
  ],
84
  outputs=synthesized_audio
85
  )