Spaces:

kennethli319
/

toolbox-tts

Runtime error

App Files Files Community

kennethli319 commited on Jan 19, 2024

Commit

7b0aa8e

1 Parent(s): b1ab8b4

update app

Browse files

Files changed (1) hide show

app.py +99 -1

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import torch
 import torchaudio
 import tempfile
 import numpy as np
 from nemo.collections.tts.models import FastPitchModel
 from nemo.collections.tts.models import HifiGanModel
@@ -32,6 +32,101 @@ def generate_tts(text: str, speaker: int = 0):
     return (sr, audio.squeeze(0).cpu().numpy())
 def run():
     demo = gr.Interface(
         fn=generate_tts,
         inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
@@ -39,6 +134,9 @@ def run():
         outputs=gr.Audio(label="Output", type="numpy"),
     )
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import torch
 import torchaudio
 import tempfile
+import logging
 import numpy as np
 from nemo.collections.tts.models import FastPitchModel
 from nemo.collections.tts.models import HifiGanModel
     return (sr, audio.squeeze(0).cpu().numpy())
 def run():
+    logging.basicConfig(level=logging.INFO)
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            <h1 align="center">Balacoon🦝 Text-to-Speech</h1>
+            1. Write an utterance to generate,
+            2. Select the model to synthesize with
+            3. Select speaker
+            4. Hit "Generate" and listen to the result!
+            You can learn more about models available
+            [here](https://huggingface.co/balacoon/tts).
+            Visit [Balacoon website](https://balacoon.com/) for more info.
+            """
+        )
+        with gr.Row(variant="panel"):
+            text = gr.Textbox(label="Text", placeholder="Type something here...")
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                repo_files = os.listdir(model_repo_dir)
+                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
+                model_name = gr.Dropdown(
+                    label="Model",
+                    choices=model_files,
+                )
+            with gr.Column(variant="panel"):
+                speaker = gr.Dropdown(label="Speaker", choices=[])
+            def set_model(model_name_str: str):
+                """
+                gets value from `model_name`. either
+                uses cached list of speakers for the given model name
+                or loads the addon and checks what are the speakers.
+                """
+                global model_to_speakers
+                if model_name_str in model_to_speakers:
+                    speakers = model_to_speakers[model_name_str]
+                else:
+                    global tts, cur_model_path, locker
+                    with locker:
+                        # need to load this model to learn the list of speakers
+                        model_path = os.path.join(model_repo_dir, model_name_str)
+                        if tts is not None:
+                            del tts
+                        tts = TTS(model_path)
+                        cur_model_path = model_path
+                        speakers = tts.get_speakers()
+                        model_to_speakers[model_name_str] = speakers
+                value = speakers[-1]
+                return gr.Dropdown.update(
+                    choices=speakers, value=value, visible=True
+                )
+            model_name.change(set_model, inputs=model_name, outputs=speaker)
+        with gr.Row(variant="panel"):
+            generate = gr.Button("Generate")
+        with gr.Row(variant="panel"):
+            audio = gr.Audio()
+        def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
+            """
+            gets utterance to synthesize from `text` Textbox
+            and speaker name from `speaker` dropdown list.
+            speaker name might be empty for single-speaker models.
+            Synthesizes the waveform and updates `audio` with it.
+            """
+            if not text_str or not model_name_str or not speaker_str:
+                logging.info("text, model name or speaker are not provided")
+                return None
+            expected_model_path = os.path.join(model_repo_dir, model_name_str)
+            global tts, cur_model_path, locker
+            with locker:
+                if expected_model_path != cur_model_path:
+                    # reload model
+                    if tts is not None:
+                        del tts
+                    tts = TTS(expected_model_path)
+                    cur_model_path = expected_model_path
+                if len(text_str) > 1024:
+                    # truncate the text
+                    text_str = text_str[:1024]
+                samples = tts.synthesize(text_str, speaker_str)
+            return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
+        generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio)
+    demo.queue(concurrency_count=1).launch()
     demo = gr.Interface(
         fn=generate_tts,
         inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
         outputs=gr.Audio(label="Output", type="numpy"),
     )
+    with gr.Row(variant="panel"):
+        generate = gr.Button("Generate")
     demo.launch(server_name="0.0.0.0", server_port=7860)