Spaces:

tdurzynski
/

real-time-speech-translation

Running

App Files Files Community

tdurzynski commited on Feb 7

Commit

951b505

verified ·

1 Parent(s): 1dc3846

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -50

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Speech Translation Demo with Automatic TTS and Restart Option
 This demo performs the following:
   1. Accepts up to 15 seconds of audio recording from the microphone.
@@ -10,7 +10,6 @@ This demo performs the following:
   5. Automatically converts the final translated text to speech using gTTS.
   6. Provides a "Restart Recording" button (located just below the recording section)
      to reset the audio input, translated text, and TTS output.
 Note: True real-time translation (i.e. while speaking) requires a continuous streaming
 solution which is not provided by the standard browser microphone input.
 """
@@ -109,56 +108,80 @@ def restart_recording():
     return None, "", None
 # -----------------------------------------------------------------------------
-# Gradio Interface Definition with Updated Layout and Chained Events
 # -----------------------------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# Real-time Speech Translation Demo")
-    gr.Markdown(
-        "Speak into the microphone and your speech will be transcribed and translated "
-        "segment-by-segment. (Recording is limited to 15 seconds.)\n\n"
-        "**Note:** The translation and speech synthesis occur automatically after recording."
-    )
-    # Top row: Audio input and target language selection.
-    with gr.Row():
-        audio_input = gr.Audio(
-            sources=["microphone"],
-            type="filepath",
-            label="Record your speech (max 15 seconds)",
-            elem_id="audio_input"
-        )
-        target_lang_dropdown = gr.Dropdown(
-            choices=list(LANGUAGES.keys()),
-            value="English",
-            label="Select Target Language"
-        )
-    # Restart Recording button placed directly below the recording section.
-    with gr.Row():
-        restart_button = gr.Button("Restart Recording")
-    # Output components: Translated text and TTS audio.
-    output_text = gr.Textbox(label="Translated Text", lines=10)
-    tts_audio = gr.Audio(label="Translated Speech", type="filepath")
-    # Chain the audio input change event: first stream translation text, then automatically generate TTS.
-    audio_input.change(
-        fn=translate_audio,
-        inputs=[audio_input, target_lang_dropdown],
-        outputs=output_text,
-        stream=True
-    ).then(
-        fn=generate_tts,
-        inputs=[output_text, target_lang_dropdown],
-        outputs=tts_audio
-    )
-    # Restart button clears the audio input, translation text, and TTS output.
-    restart_button.click(
-        fn=restart_recording,
-        inputs=[],
-        outputs=[audio_input, output_text, tts_audio]
-    )
 # Launch the Gradio app (suitable for Hugging Face Spaces).
 demo.launch()

 """
+Speech Translation Demo with Automatic TTS, Restart Option, and About Tab
 This demo performs the following:
   1. Accepts up to 15 seconds of audio recording from the microphone.
   5. Automatically converts the final translated text to speech using gTTS.
   6. Provides a "Restart Recording" button (located just below the recording section)
      to reset the audio input, translated text, and TTS output.
 Note: True real-time translation (i.e. while speaking) requires a continuous streaming
 solution which is not provided by the standard browser microphone input.
 """
     return None, "", None
 # -----------------------------------------------------------------------------
+# Gradio Interface Definition with Tabs
 # -----------------------------------------------------------------------------
 with gr.Blocks() as demo:
+    with gr.Tabs():
+        # "Demo" Tab: Contains the interactive interface.
+        with gr.TabItem("Demo"):
+            gr.Markdown("# Real-time Speech Translation Demo")
+            gr.Markdown(
+                "Speak into the microphone and your speech will be transcribed and translated "
+                "segment-by-segment. (Recording is limited to 15 seconds.)\n\n"
+                "**Note:** The translation and speech synthesis occur automatically after recording."
+            )
+            # Row for audio input and target language selection.
+            with gr.Row():
+                audio_input = gr.Audio(
+                    sources=["microphone"],
+                    type="filepath",
+                    label="Record your speech (max 15 seconds)",
+                    elem_id="audio_input"
+                )
+                target_lang_dropdown = gr.Dropdown(
+                    choices=list(LANGUAGES.keys()),
+                    value="English",
+                    label="Select Target Language"
+                )
+            # Row for the Restart Recording button (placed just below the recording section).
+            with gr.Row():
+                restart_button = gr.Button("Restart Recording")
+            # Output components: Translated text and TTS audio.
+            output_text = gr.Textbox(label="Translated Text", lines=10)
+            tts_audio = gr.Audio(label="Translated Speech", type="filepath")
+            # Chain the events:
+            # 1. When new audio is recorded, stream the translation text.
+            # 2. Once translation is complete, automatically generate the TTS audio.
+            audio_input.change(
+                fn=translate_audio,
+                inputs=[audio_input, target_lang_dropdown],
+                outputs=output_text,
+                stream=True
+            ).then(
+                fn=generate_tts,
+                inputs=[output_text, target_lang_dropdown],
+                outputs=tts_audio
+            )
+            # The Restart button clears the audio input, translation text, and TTS audio.
+            restart_button.click(
+                fn=restart_recording,
+                inputs=[],
+                outputs=[audio_input, output_text, tts_audio]
+            )
+        # "About" Tab: Displays the descriptive text.
+        with gr.TabItem("About"):
+            gr.Markdown(
+                """
+**Speech Translation Demo with Automatic TTS and Restart Option**
+This demo performs the following:
+  1. Accepts up to 15 seconds of audio recording from the microphone.
+  2. Uses OpenAI’s Whisper model to transcribe the speech.
+  3. Splits the transcription into segments and translates each segment on-the-fly using Facebook’s M2M100 model.
+  4. Streams the cumulative translation output to the user.
+  5. Automatically converts the final translated text to speech using gTTS.
+  6. Provides a "Restart Recording" button (located just below the recording section) to reset the audio input, translated text, and TTS output.
+**Note:** True real-time translation (i.e. while speaking) requires a continuous streaming solution which is not provided by the standard browser microphone input.
+                """
+            )
 # Launch the Gradio app (suitable for Hugging Face Spaces).
 demo.launch()