tdurzynski commited on
Commit
951b505
·
verified ·
1 Parent(s): 1dc3846

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -50
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Speech Translation Demo with Automatic TTS and Restart Option
3
 
4
  This demo performs the following:
5
  1. Accepts up to 15 seconds of audio recording from the microphone.
@@ -10,7 +10,6 @@ This demo performs the following:
10
  5. Automatically converts the final translated text to speech using gTTS.
11
  6. Provides a "Restart Recording" button (located just below the recording section)
12
  to reset the audio input, translated text, and TTS output.
13
-
14
  Note: True real-time translation (i.e. while speaking) requires a continuous streaming
15
  solution which is not provided by the standard browser microphone input.
16
  """
@@ -109,56 +108,80 @@ def restart_recording():
109
  return None, "", None
110
 
111
  # -----------------------------------------------------------------------------
112
- # Gradio Interface Definition with Updated Layout and Chained Events
113
  # -----------------------------------------------------------------------------
114
  with gr.Blocks() as demo:
115
- gr.Markdown("# Real-time Speech Translation Demo")
116
- gr.Markdown(
117
- "Speak into the microphone and your speech will be transcribed and translated "
118
- "segment-by-segment. (Recording is limited to 15 seconds.)\n\n"
119
- "**Note:** The translation and speech synthesis occur automatically after recording."
120
- )
121
-
122
- # Top row: Audio input and target language selection.
123
- with gr.Row():
124
- audio_input = gr.Audio(
125
- sources=["microphone"],
126
- type="filepath",
127
- label="Record your speech (max 15 seconds)",
128
- elem_id="audio_input"
129
- )
130
- target_lang_dropdown = gr.Dropdown(
131
- choices=list(LANGUAGES.keys()),
132
- value="English",
133
- label="Select Target Language"
134
- )
135
-
136
- # Restart Recording button placed directly below the recording section.
137
- with gr.Row():
138
- restart_button = gr.Button("Restart Recording")
139
-
140
- # Output components: Translated text and TTS audio.
141
- output_text = gr.Textbox(label="Translated Text", lines=10)
142
- tts_audio = gr.Audio(label="Translated Speech", type="filepath")
143
-
144
- # Chain the audio input change event: first stream translation text, then automatically generate TTS.
145
- audio_input.change(
146
- fn=translate_audio,
147
- inputs=[audio_input, target_lang_dropdown],
148
- outputs=output_text,
149
- stream=True
150
- ).then(
151
- fn=generate_tts,
152
- inputs=[output_text, target_lang_dropdown],
153
- outputs=tts_audio
154
- )
155
-
156
- # Restart button clears the audio input, translation text, and TTS output.
157
- restart_button.click(
158
- fn=restart_recording,
159
- inputs=[],
160
- outputs=[audio_input, output_text, tts_audio]
161
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # Launch the Gradio app (suitable for Hugging Face Spaces).
164
  demo.launch()
 
 
1
  """
2
+ Speech Translation Demo with Automatic TTS, Restart Option, and About Tab
3
 
4
  This demo performs the following:
5
  1. Accepts up to 15 seconds of audio recording from the microphone.
 
10
  5. Automatically converts the final translated text to speech using gTTS.
11
  6. Provides a "Restart Recording" button (located just below the recording section)
12
  to reset the audio input, translated text, and TTS output.
 
13
  Note: True real-time translation (i.e. while speaking) requires a continuous streaming
14
  solution which is not provided by the standard browser microphone input.
15
  """
 
108
  return None, "", None
109
 
110
  # -----------------------------------------------------------------------------
111
+ # Gradio Interface Definition with Tabs
112
  # -----------------------------------------------------------------------------
113
  with gr.Blocks() as demo:
114
+ with gr.Tabs():
115
+ # "Demo" Tab: Contains the interactive interface.
116
+ with gr.TabItem("Demo"):
117
+ gr.Markdown("# Real-time Speech Translation Demo")
118
+ gr.Markdown(
119
+ "Speak into the microphone and your speech will be transcribed and translated "
120
+ "segment-by-segment. (Recording is limited to 15 seconds.)\n\n"
121
+ "**Note:** The translation and speech synthesis occur automatically after recording."
122
+ )
123
+
124
+ # Row for audio input and target language selection.
125
+ with gr.Row():
126
+ audio_input = gr.Audio(
127
+ sources=["microphone"],
128
+ type="filepath",
129
+ label="Record your speech (max 15 seconds)",
130
+ elem_id="audio_input"
131
+ )
132
+ target_lang_dropdown = gr.Dropdown(
133
+ choices=list(LANGUAGES.keys()),
134
+ value="English",
135
+ label="Select Target Language"
136
+ )
137
+
138
+ # Row for the Restart Recording button (placed just below the recording section).
139
+ with gr.Row():
140
+ restart_button = gr.Button("Restart Recording")
141
+
142
+ # Output components: Translated text and TTS audio.
143
+ output_text = gr.Textbox(label="Translated Text", lines=10)
144
+ tts_audio = gr.Audio(label="Translated Speech", type="filepath")
145
+
146
+ # Chain the events:
147
+ # 1. When new audio is recorded, stream the translation text.
148
+ # 2. Once translation is complete, automatically generate the TTS audio.
149
+ audio_input.change(
150
+ fn=translate_audio,
151
+ inputs=[audio_input, target_lang_dropdown],
152
+ outputs=output_text,
153
+ stream=True
154
+ ).then(
155
+ fn=generate_tts,
156
+ inputs=[output_text, target_lang_dropdown],
157
+ outputs=tts_audio
158
+ )
159
+
160
+ # The Restart button clears the audio input, translation text, and TTS audio.
161
+ restart_button.click(
162
+ fn=restart_recording,
163
+ inputs=[],
164
+ outputs=[audio_input, output_text, tts_audio]
165
+ )
166
+
167
+ # "About" Tab: Displays the descriptive text.
168
+ with gr.TabItem("About"):
169
+ gr.Markdown(
170
+ """
171
+ **Speech Translation Demo with Automatic TTS and Restart Option**
172
+
173
+ This demo performs the following:
174
+ 1. Accepts up to 15 seconds of audio recording from the microphone.
175
+ 2. Uses OpenAI’s Whisper model to transcribe the speech.
176
+ 3. Splits the transcription into segments and translates each segment on-the-fly using Facebook’s M2M100 model.
177
+ 4. Streams the cumulative translation output to the user.
178
+ 5. Automatically converts the final translated text to speech using gTTS.
179
+ 6. Provides a "Restart Recording" button (located just below the recording section) to reset the audio input, translated text, and TTS output.
180
+
181
+ **Note:** True real-time translation (i.e. while speaking) requires a continuous streaming solution which is not provided by the standard browser microphone input.
182
+ """
183
+ )
184
 
185
  # Launch the Gradio app (suitable for Hugging Face Spaces).
186
  demo.launch()
187
+