GavinHuang commited on
Commit
0011522
·
1 Parent(s): 779d79b

fix: enhance transcription process with audio feedback and temporary file management

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -36,13 +36,16 @@ def transcribe(audio, state=""):
36
  audio_data = audio_data.mean(axis=1) if audio_data.ndim > 1 else audio_data # To mono
37
  temp_file = "temp_audio.wav"
38
  sf.write(temp_file, audio_data, samplerate=16000)
 
39
 
40
  # Transcribe
41
  if torch.cuda.is_available():
42
  model = model.cuda()
43
  transcription = model.transcribe([temp_file])[0]
 
44
  model = model.cpu()
45
  os.remove(temp_file)
 
46
 
47
  # Clear buffer
48
  audio_buffer = []
@@ -86,7 +89,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
86
  inputs=[audio_input, state],
87
  outputs=[state, streaming_text],
88
  )
89
-
90
  # Clear the transcription
91
  def clear_transcription():
92
  return "", "", ""
 
36
  audio_data = audio_data.mean(axis=1) if audio_data.ndim > 1 else audio_data # To mono
37
  temp_file = "temp_audio.wav"
38
  sf.write(temp_file, audio_data, samplerate=16000)
39
+ print("Transcribing audio...")
40
 
41
  # Transcribe
42
  if torch.cuda.is_available():
43
  model = model.cuda()
44
  transcription = model.transcribe([temp_file])[0]
45
+ print(f"Transcription: {transcription}")
46
  model = model.cpu()
47
  os.remove(temp_file)
48
+ print("Temporary file removed.")
49
 
50
  # Clear buffer
51
  audio_buffer = []
 
89
  inputs=[audio_input, state],
90
  outputs=[state, streaming_text],
91
  )
92
+
93
  # Clear the transcription
94
  def clear_transcription():
95
  return "", "", ""