Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
8d5b897
1
Parent(s):
2b1f9fe
fix: update transcription handling to return last transcription and improve state management
Browse files
app.py
CHANGED
@@ -135,9 +135,8 @@ def transcribe(audio, model_name="nvidia/parakeet-tdt-0.6b-v2", state="", audio_
|
|
135 |
audio_buffer = [full_audio[-keep_samples:]]
|
136 |
else:
|
137 |
audio_buffer = []
|
138 |
-
|
139 |
print(f"New state: {new_state}")
|
140 |
-
return new_state,
|
141 |
|
142 |
except Exception as e:
|
143 |
print(f"Error processing audio: {e}")
|
@@ -243,7 +242,8 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
243 |
placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
|
244 |
lines=10
|
245 |
)
|
246 |
-
|
|
|
247 |
state = gr.State("")
|
248 |
audio_buffer = gr.State(value=None)
|
249 |
last_processed_time = gr.State(value=0)
|
@@ -287,7 +287,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
287 |
) # Clear the real-time transcription
|
288 |
def clear_transcription():
|
289 |
print("Clearing real-time transcription")
|
290 |
-
return "", "", None, 0 #
|
291 |
|
292 |
# Clear the file transcription
|
293 |
def clear_file_transcription():
|
@@ -298,20 +298,36 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
298 |
clear_btn.click(
|
299 |
fn=clear_transcription,
|
300 |
inputs=[],
|
301 |
-
outputs=[state,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
)
|
303 |
|
304 |
clear_file_btn.click(
|
305 |
fn=clear_file_transcription,
|
306 |
inputs=[],
|
307 |
outputs=[file_transcription]
|
308 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
-
# Update the main text output when the state changes
|
311 |
state.change(
|
312 |
-
fn=
|
313 |
inputs=[state],
|
314 |
-
outputs=[text_output]
|
|
|
315 |
|
316 |
|
317 |
|
|
|
135 |
audio_buffer = [full_audio[-keep_samples:]]
|
136 |
else:
|
137 |
audio_buffer = []
|
|
|
138 |
print(f"New state: {new_state}")
|
139 |
+
return new_state, transcription, audio_buffer, last_processed_time # Return last transcription for streaming_text
|
140 |
|
141 |
except Exception as e:
|
142 |
print(f"Error processing audio: {e}")
|
|
|
242 |
placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
|
243 |
lines=10
|
244 |
)
|
245 |
+
|
246 |
+
# State to store the ongoing transcription
|
247 |
state = gr.State("")
|
248 |
audio_buffer = gr.State(value=None)
|
249 |
last_processed_time = gr.State(value=0)
|
|
|
287 |
) # Clear the real-time transcription
|
288 |
def clear_transcription():
|
289 |
print("Clearing real-time transcription")
|
290 |
+
return "", "", None, 0 # Return empty values for state, text_output, audio_buffer, and last_processed_time
|
291 |
|
292 |
# Clear the file transcription
|
293 |
def clear_file_transcription():
|
|
|
298 |
clear_btn.click(
|
299 |
fn=clear_transcription,
|
300 |
inputs=[],
|
301 |
+
outputs=[state, text_output, audio_buffer, last_processed_time]
|
302 |
+
)
|
303 |
+
|
304 |
+
# Also clear streaming_text when clearing the transcription
|
305 |
+
clear_btn.click(
|
306 |
+
fn=lambda: "",
|
307 |
+
inputs=[],
|
308 |
+
outputs=[streaming_text]
|
309 |
)
|
310 |
|
311 |
clear_file_btn.click(
|
312 |
fn=clear_file_transcription,
|
313 |
inputs=[],
|
314 |
outputs=[file_transcription]
|
315 |
+
) # Update the main text output when the state changes
|
316 |
+
def update_output(transcript):
|
317 |
+
# For streaming_text, show just the last few words or chunks
|
318 |
+
words = transcript.split()
|
319 |
+
if len(words) > 15:
|
320 |
+
streaming_text = " ".join(words[-15:])
|
321 |
+
else:
|
322 |
+
streaming_text = transcript
|
323 |
+
|
324 |
+
return transcript, streaming_text
|
325 |
|
|
|
326 |
state.change(
|
327 |
+
fn=update_output,
|
328 |
inputs=[state],
|
329 |
+
outputs=[text_output, streaming_text]
|
330 |
+
)
|
331 |
|
332 |
|
333 |
|