Spaces:
Build error
Build error
积极的屁孩
commited on
Commit
·
c65b183
1
Parent(s):
92e6065
adjust badges
Browse files
app.py
CHANGED
@@ -525,7 +525,7 @@ def vevo_voice(content_wav, style_reference_wav, timbre_reference_wav):
|
|
525 |
traceback.print_exc()
|
526 |
raise e
|
527 |
|
528 |
-
def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_language="en", ref_language="en"):
|
529 |
temp_ref_path = "wav/temp_ref.wav"
|
530 |
temp_timbre_path = "wav/temp_timbre.wav"
|
531 |
output_path = "wav/output_vevotts.wav"
|
@@ -561,7 +561,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
|
|
561 |
# 打印debug信息
|
562 |
print(f"Reference audio shape: {ref_tensor.shape}, sample rate: {ref_sr}")
|
563 |
if style_ref_text:
|
564 |
-
print(f"Style reference text: {style_ref_text}")
|
565 |
|
566 |
# 保存上传的音频
|
567 |
torchaudio.save(temp_ref_path, ref_tensor, ref_sr)
|
@@ -607,7 +607,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
|
|
607 |
timbre_ref_wav_path=temp_timbre_path,
|
608 |
style_ref_wav_text=style_ref_text,
|
609 |
src_text_language=src_language,
|
610 |
-
style_ref_wav_text_language=
|
611 |
)
|
612 |
|
613 |
# 检查生成音频是否为数值异常
|
@@ -693,15 +693,15 @@ with gr.Blocks(title="Vevo DEMO") as demo:
|
|
693 |
tts_src_language = gr.Dropdown(["en", "zh", "de", "fr", "ja", "ko"], label="Text Language", value="en")
|
694 |
tts_reference = gr.Audio(label="Style Reference", type="numpy")
|
695 |
tts_style_ref_text = gr.Textbox(label="Style Reference Text", placeholder="Enter style reference text...", lines=3)
|
|
|
696 |
tts_timbre_reference = gr.Audio(label="Timbre Reference", type="numpy")
|
697 |
-
tts_ref_language = gr.Dropdown(["en", "zh", "de", "fr", "ja", "ko"], label="Reference Audio Language", value="en")
|
698 |
tts_button = gr.Button("Generate")
|
699 |
with gr.Column():
|
700 |
tts_output = gr.Audio(label="Result")
|
701 |
|
702 |
tts_button.click(
|
703 |
vevo_tts,
|
704 |
-
inputs=[tts_text, tts_reference, tts_timbre_reference, tts_style_ref_text, tts_src_language, tts_ref_language],
|
705 |
outputs=tts_output
|
706 |
)
|
707 |
|
|
|
525 |
traceback.print_exc()
|
526 |
raise e
|
527 |
|
528 |
+
def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_language="en", ref_language="en", style_ref_text_language="en"):
|
529 |
temp_ref_path = "wav/temp_ref.wav"
|
530 |
temp_timbre_path = "wav/temp_timbre.wav"
|
531 |
output_path = "wav/output_vevotts.wav"
|
|
|
561 |
# 打印debug信息
|
562 |
print(f"Reference audio shape: {ref_tensor.shape}, sample rate: {ref_sr}")
|
563 |
if style_ref_text:
|
564 |
+
print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
|
565 |
|
566 |
# 保存上传的音频
|
567 |
torchaudio.save(temp_ref_path, ref_tensor, ref_sr)
|
|
|
607 |
timbre_ref_wav_path=temp_timbre_path,
|
608 |
style_ref_wav_text=style_ref_text,
|
609 |
src_text_language=src_language,
|
610 |
+
style_ref_wav_text_language=style_ref_text_language,
|
611 |
)
|
612 |
|
613 |
# 检查生成音频是否为数值异常
|
|
|
693 |
tts_src_language = gr.Dropdown(["en", "zh", "de", "fr", "ja", "ko"], label="Text Language", value="en")
|
694 |
tts_reference = gr.Audio(label="Style Reference", type="numpy")
|
695 |
tts_style_ref_text = gr.Textbox(label="Style Reference Text", placeholder="Enter style reference text...", lines=3)
|
696 |
+
tts_style_ref_text_language = gr.Dropdown(["en", "zh", "de", "fr", "ja", "ko"], label="Style Reference Text Language", value="en")
|
697 |
tts_timbre_reference = gr.Audio(label="Timbre Reference", type="numpy")
|
|
|
698 |
tts_button = gr.Button("Generate")
|
699 |
with gr.Column():
|
700 |
tts_output = gr.Audio(label="Result")
|
701 |
|
702 |
tts_button.click(
|
703 |
vevo_tts,
|
704 |
+
inputs=[tts_text, tts_reference, tts_timbre_reference, tts_style_ref_text, tts_src_language, tts_ref_language, tts_style_ref_text_language],
|
705 |
outputs=tts_output
|
706 |
)
|
707 |
|