Spaces:

GavinHuang
/

asr-demo

Running on Zero

App Files Files Community

GavinHuang commited on 26 days ago

Commit

8d5b897

1 Parent(s): 2b1f9fe

fix: update transcription handling to return last transcription and improve state management

Browse files

Files changed (1) hide show

app.py +25 -9

app.py CHANGED Viewed

@@ -135,9 +135,8 @@ def transcribe(audio, model_name="nvidia/parakeet-tdt-0.6b-v2", state="", audio_
                 audio_buffer = [full_audio[-keep_samples:]]
             else:
                 audio_buffer = []
             print(f"New state: {new_state}")
-            return new_state, new_state, audio_buffer, last_processed_time
         except Exception as e:
             print(f"Error processing audio: {e}")
@@ -243,7 +242,8 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
                         placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
                         lines=10
                     )
-      # State to store the ongoing transcription
     state = gr.State("")
     audio_buffer = gr.State(value=None)
     last_processed_time = gr.State(value=0)
@@ -287,7 +287,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
     )    # Clear the real-time transcription
     def clear_transcription():
         print("Clearing real-time transcription")
-        return "", "", None, 0  # Clear state, streaming_text, audio_buffer, and last_processed_time
     # Clear the file transcription
     def clear_file_transcription():
@@ -298,20 +298,36 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
     clear_btn.click(
         fn=clear_transcription,
         inputs=[],
-        outputs=[state, streaming_text, audio_buffer, last_processed_time]
     )
     clear_file_btn.click(
         fn=clear_file_transcription,
         inputs=[],
         outputs=[file_transcription]
-    )
-    # Update the main text output when the state changes
     state.change(
-        fn=lambda s: s,
         inputs=[state],
-        outputs=[text_output]    )

                 audio_buffer = [full_audio[-keep_samples:]]
             else:
                 audio_buffer = []
             print(f"New state: {new_state}")
+            return new_state, transcription, audio_buffer, last_processed_time  # Return last transcription for streaming_text
         except Exception as e:
             print(f"Error processing audio: {e}")
                         placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
                         lines=10
                     )
+    # State to store the ongoing transcription
     state = gr.State("")
     audio_buffer = gr.State(value=None)
     last_processed_time = gr.State(value=0)
     )    # Clear the real-time transcription
     def clear_transcription():
         print("Clearing real-time transcription")
+        return "", "", None, 0  # Return empty values for state, text_output, audio_buffer, and last_processed_time
     # Clear the file transcription
     def clear_file_transcription():
     clear_btn.click(
         fn=clear_transcription,
         inputs=[],
+        outputs=[state, text_output, audio_buffer, last_processed_time]
+    )
+    # Also clear streaming_text when clearing the transcription
+    clear_btn.click(
+        fn=lambda: "",
+        inputs=[],
+        outputs=[streaming_text]
     )
     clear_file_btn.click(
         fn=clear_file_transcription,
         inputs=[],
         outputs=[file_transcription]
+    )    # Update the main text output when the state changes
+    def update_output(transcript):
+        # For streaming_text, show just the last few words or chunks
+        words = transcript.split()
+        if len(words) > 15:
+            streaming_text = " ".join(words[-15:])
+        else:
+            streaming_text = transcript
+        return transcript, streaming_text
     state.change(
+        fn=update_output,
         inputs=[state],
+        outputs=[text_output, streaming_text]
+    )