GavinHuang commited on
Commit
8d5b897
·
1 Parent(s): 2b1f9fe

fix: update transcription handling to return last transcription and improve state management

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -135,9 +135,8 @@ def transcribe(audio, model_name="nvidia/parakeet-tdt-0.6b-v2", state="", audio_
135
  audio_buffer = [full_audio[-keep_samples:]]
136
  else:
137
  audio_buffer = []
138
-
139
  print(f"New state: {new_state}")
140
- return new_state, new_state, audio_buffer, last_processed_time
141
 
142
  except Exception as e:
143
  print(f"Error processing audio: {e}")
@@ -243,7 +242,8 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
243
  placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
244
  lines=10
245
  )
246
- # State to store the ongoing transcription
 
247
  state = gr.State("")
248
  audio_buffer = gr.State(value=None)
249
  last_processed_time = gr.State(value=0)
@@ -287,7 +287,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
287
  ) # Clear the real-time transcription
288
  def clear_transcription():
289
  print("Clearing real-time transcription")
290
- return "", "", None, 0 # Clear state, streaming_text, audio_buffer, and last_processed_time
291
 
292
  # Clear the file transcription
293
  def clear_file_transcription():
@@ -298,20 +298,36 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
298
  clear_btn.click(
299
  fn=clear_transcription,
300
  inputs=[],
301
- outputs=[state, streaming_text, audio_buffer, last_processed_time]
 
 
 
 
 
 
 
302
  )
303
 
304
  clear_file_btn.click(
305
  fn=clear_file_transcription,
306
  inputs=[],
307
  outputs=[file_transcription]
308
- )
 
 
 
 
 
 
 
 
 
309
 
310
- # Update the main text output when the state changes
311
  state.change(
312
- fn=lambda s: s,
313
  inputs=[state],
314
- outputs=[text_output] )
 
315
 
316
 
317
 
 
135
  audio_buffer = [full_audio[-keep_samples:]]
136
  else:
137
  audio_buffer = []
 
138
  print(f"New state: {new_state}")
139
+ return new_state, transcription, audio_buffer, last_processed_time # Return last transcription for streaming_text
140
 
141
  except Exception as e:
142
  print(f"Error processing audio: {e}")
 
242
  placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
243
  lines=10
244
  )
245
+
246
+ # State to store the ongoing transcription
247
  state = gr.State("")
248
  audio_buffer = gr.State(value=None)
249
  last_processed_time = gr.State(value=0)
 
287
  ) # Clear the real-time transcription
288
  def clear_transcription():
289
  print("Clearing real-time transcription")
290
+ return "", "", None, 0 # Return empty values for state, text_output, audio_buffer, and last_processed_time
291
 
292
  # Clear the file transcription
293
  def clear_file_transcription():
 
298
  clear_btn.click(
299
  fn=clear_transcription,
300
  inputs=[],
301
+ outputs=[state, text_output, audio_buffer, last_processed_time]
302
+ )
303
+
304
+ # Also clear streaming_text when clearing the transcription
305
+ clear_btn.click(
306
+ fn=lambda: "",
307
+ inputs=[],
308
+ outputs=[streaming_text]
309
  )
310
 
311
  clear_file_btn.click(
312
  fn=clear_file_transcription,
313
  inputs=[],
314
  outputs=[file_transcription]
315
+ ) # Update the main text output when the state changes
316
+ def update_output(transcript):
317
+ # For streaming_text, show just the last few words or chunks
318
+ words = transcript.split()
319
+ if len(words) > 15:
320
+ streaming_text = " ".join(words[-15:])
321
+ else:
322
+ streaming_text = transcript
323
+
324
+ return transcript, streaming_text
325
 
 
326
  state.change(
327
+ fn=update_output,
328
  inputs=[state],
329
+ outputs=[text_output, streaming_text]
330
+ )
331
 
332
 
333