kennethli319 commited on
Commit
7b0aa8e
·
1 Parent(s): b1ab8b4

update app

Browse files
Files changed (1) hide show
  1. app.py +99 -1
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import torch
3
  import torchaudio
4
  import tempfile
5
-
6
  import numpy as np
7
  from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
@@ -32,6 +32,101 @@ def generate_tts(text: str, speaker: int = 0):
32
  return (sr, audio.squeeze(0).cpu().numpy())
33
 
34
  def run():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  demo = gr.Interface(
36
  fn=generate_tts,
37
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
@@ -39,6 +134,9 @@ def run():
39
  outputs=gr.Audio(label="Output", type="numpy"),
40
  )
41
 
 
 
 
42
  demo.launch(server_name="0.0.0.0", server_port=7860)
43
 
44
 
 
2
  import torch
3
  import torchaudio
4
  import tempfile
5
+ import logging
6
  import numpy as np
7
  from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
 
32
  return (sr, audio.squeeze(0).cpu().numpy())
33
 
34
  def run():
35
+ logging.basicConfig(level=logging.INFO)
36
+
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown(
39
+ """
40
+ <h1 align="center">Balacoon🦝 Text-to-Speech</h1>
41
+ 1. Write an utterance to generate,
42
+ 2. Select the model to synthesize with
43
+ 3. Select speaker
44
+ 4. Hit "Generate" and listen to the result!
45
+ You can learn more about models available
46
+ [here](https://huggingface.co/balacoon/tts).
47
+ Visit [Balacoon website](https://balacoon.com/) for more info.
48
+ """
49
+ )
50
+ with gr.Row(variant="panel"):
51
+ text = gr.Textbox(label="Text", placeholder="Type something here...")
52
+
53
+ with gr.Row():
54
+ with gr.Column(variant="panel"):
55
+ repo_files = os.listdir(model_repo_dir)
56
+ model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
57
+ model_name = gr.Dropdown(
58
+ label="Model",
59
+ choices=model_files,
60
+ )
61
+ with gr.Column(variant="panel"):
62
+ speaker = gr.Dropdown(label="Speaker", choices=[])
63
+
64
+ def set_model(model_name_str: str):
65
+ """
66
+ gets value from `model_name`. either
67
+ uses cached list of speakers for the given model name
68
+ or loads the addon and checks what are the speakers.
69
+ """
70
+ global model_to_speakers
71
+ if model_name_str in model_to_speakers:
72
+ speakers = model_to_speakers[model_name_str]
73
+ else:
74
+ global tts, cur_model_path, locker
75
+ with locker:
76
+ # need to load this model to learn the list of speakers
77
+ model_path = os.path.join(model_repo_dir, model_name_str)
78
+ if tts is not None:
79
+ del tts
80
+ tts = TTS(model_path)
81
+ cur_model_path = model_path
82
+ speakers = tts.get_speakers()
83
+ model_to_speakers[model_name_str] = speakers
84
+
85
+ value = speakers[-1]
86
+ return gr.Dropdown.update(
87
+ choices=speakers, value=value, visible=True
88
+ )
89
+
90
+ model_name.change(set_model, inputs=model_name, outputs=speaker)
91
+
92
+ with gr.Row(variant="panel"):
93
+ generate = gr.Button("Generate")
94
+ with gr.Row(variant="panel"):
95
+ audio = gr.Audio()
96
+
97
+ def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
98
+ """
99
+ gets utterance to synthesize from `text` Textbox
100
+ and speaker name from `speaker` dropdown list.
101
+ speaker name might be empty for single-speaker models.
102
+ Synthesizes the waveform and updates `audio` with it.
103
+ """
104
+ if not text_str or not model_name_str or not speaker_str:
105
+ logging.info("text, model name or speaker are not provided")
106
+ return None
107
+ expected_model_path = os.path.join(model_repo_dir, model_name_str)
108
+ global tts, cur_model_path, locker
109
+ with locker:
110
+ if expected_model_path != cur_model_path:
111
+ # reload model
112
+ if tts is not None:
113
+ del tts
114
+ tts = TTS(expected_model_path)
115
+ cur_model_path = expected_model_path
116
+ if len(text_str) > 1024:
117
+ # truncate the text
118
+ text_str = text_str[:1024]
119
+ samples = tts.synthesize(text_str, speaker_str)
120
+ return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
121
+
122
+ generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio)
123
+
124
+ demo.queue(concurrency_count=1).launch()
125
+
126
+
127
+
128
+
129
+
130
  demo = gr.Interface(
131
  fn=generate_tts,
132
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
 
134
  outputs=gr.Audio(label="Output", type="numpy"),
135
  )
136
 
137
+ with gr.Row(variant="panel"):
138
+ generate = gr.Button("Generate")
139
+
140
  demo.launch(server_name="0.0.0.0", server_port=7860)
141
 
142