GavinHuang commited on
Commit
6dbf680
ยท
1 Parent(s): f374409

fix: add clear transcription functionality for real-time and file transcription, update UI elements

Browse files
Files changed (1) hide show
  1. app.py +32 -7
app.py CHANGED
@@ -159,6 +159,7 @@ def transcribe_file(audio_file, model_name="nvidia/parakeet-tdt-0.6b-v2"):
159
  return "No audio file provided. Please upload an audio file."
160
 
161
  try:
 
162
  model = load_model(model_name)
163
 
164
  print(f"Processing file: {audio_file}")
@@ -209,7 +210,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
209
  label="Speak into your microphone"
210
  )
211
 
212
- # clear_btn = gr.Button("Clear Transcript")
213
 
214
  with gr.Column(scale=3):
215
  text_output = gr.Textbox(
@@ -232,7 +233,9 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
232
  type="filepath",
233
  label="Record or upload audio file"
234
  )
235
- transcribe_btn = gr.Button("Transcribe Audio File")
 
 
236
 
237
  with gr.Column(scale=3):
238
  file_transcription = gr.Textbox(
@@ -281,24 +284,45 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
281
  fn=transcribe_file,
282
  inputs=[audio_recorder, model_dropdown],
283
  outputs=[file_transcription]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  )
285
 
286
  # Update the main text output when the state changes
287
  state.change(
288
  fn=lambda s: s,
289
  inputs=[state],
290
- outputs=[text_output]
291
- )
 
292
 
293
- gr.Markdown("## ๐Ÿ“ Instructions")
294
- gr.Markdown("""
295
  ### Real-time Transcription:
296
  1. Select an ASR model from the dropdown menu
297
  2. Click 'Load Selected Model' to load the model
298
  3. Click the microphone button to start recording
299
  4. Speak clearly into your microphone
300
  5. The transcription will appear in real-time
301
- 6. Click 'Clear Transcript' to start a new transcription
302
 
303
  ### File Transcription:
304
  1. Select an ASR model from the dropdown menu
@@ -307,6 +331,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
307
  4. Record audio by clicking the microphone button or upload an existing audio file
308
  5. Click 'Transcribe Audio File' to process the recording
309
  6. The complete transcription will appear in the text box
 
310
  """)
311
 
312
  # Launch the app
 
159
  return "No audio file provided. Please upload an audio file."
160
 
161
  try:
162
+ global model
163
  model = load_model(model_name)
164
 
165
  print(f"Processing file: {audio_file}")
 
210
  label="Speak into your microphone"
211
  )
212
 
213
+ clear_btn = gr.Button("Clear Transcript", variant="secondary")
214
 
215
  with gr.Column(scale=3):
216
  text_output = gr.Textbox(
 
233
  type="filepath",
234
  label="Record or upload audio file"
235
  )
236
+ with gr.Row():
237
+ transcribe_btn = gr.Button("Transcribe Audio File", variant="primary")
238
+ clear_file_btn = gr.Button("Clear Transcript", variant="secondary")
239
 
240
  with gr.Column(scale=3):
241
  file_transcription = gr.Textbox(
 
284
  fn=transcribe_file,
285
  inputs=[audio_recorder, model_dropdown],
286
  outputs=[file_transcription]
287
+ ) # Clear the real-time transcription
288
+ def clear_transcription():
289
+ print("Clearing real-time transcription")
290
+ return "", "", None, 0 # Clear state, streaming_text, audio_buffer, and last_processed_time
291
+
292
+ # Clear the file transcription
293
+ def clear_file_transcription():
294
+ print("Clearing file transcription")
295
+ return "" # Clear file_transcription
296
+
297
+ # Set up clear button event handlers
298
+ clear_btn.click(
299
+ fn=clear_transcription,
300
+ inputs=[],
301
+ outputs=[state, streaming_text, audio_buffer, last_processed_time]
302
+ )
303
+
304
+ clear_file_btn.click(
305
+ fn=clear_file_transcription,
306
+ inputs=[],
307
+ outputs=[file_transcription]
308
  )
309
 
310
  # Update the main text output when the state changes
311
  state.change(
312
  fn=lambda s: s,
313
  inputs=[state],
314
+ outputs=[text_output] )
315
+
316
+
317
 
318
+ gr.Markdown("## ๐Ÿ“ Instructions")gr.Markdown("""
 
319
  ### Real-time Transcription:
320
  1. Select an ASR model from the dropdown menu
321
  2. Click 'Load Selected Model' to load the model
322
  3. Click the microphone button to start recording
323
  4. Speak clearly into your microphone
324
  5. The transcription will appear in real-time
325
+ 6. Click 'Clear Transcript' to reset the transcription
326
 
327
  ### File Transcription:
328
  1. Select an ASR model from the dropdown menu
 
331
  4. Record audio by clicking the microphone button or upload an existing audio file
332
  5. Click 'Transcribe Audio File' to process the recording
333
  6. The complete transcription will appear in the text box
334
+ 7. Click 'Clear Transcript' to reset the file transcription
335
  """)
336
 
337
  # Launch the app