Spaces:
Running
on
Zero
Running
on
Zero
Commit
ยท
6dbf680
1
Parent(s):
f374409
fix: add clear transcription functionality for real-time and file transcription, update UI elements
Browse files
app.py
CHANGED
@@ -159,6 +159,7 @@ def transcribe_file(audio_file, model_name="nvidia/parakeet-tdt-0.6b-v2"):
|
|
159 |
return "No audio file provided. Please upload an audio file."
|
160 |
|
161 |
try:
|
|
|
162 |
model = load_model(model_name)
|
163 |
|
164 |
print(f"Processing file: {audio_file}")
|
@@ -209,7 +210,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
209 |
label="Speak into your microphone"
|
210 |
)
|
211 |
|
212 |
-
|
213 |
|
214 |
with gr.Column(scale=3):
|
215 |
text_output = gr.Textbox(
|
@@ -232,7 +233,9 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
232 |
type="filepath",
|
233 |
label="Record or upload audio file"
|
234 |
)
|
235 |
-
|
|
|
|
|
236 |
|
237 |
with gr.Column(scale=3):
|
238 |
file_transcription = gr.Textbox(
|
@@ -281,24 +284,45 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
281 |
fn=transcribe_file,
|
282 |
inputs=[audio_recorder, model_dropdown],
|
283 |
outputs=[file_transcription]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
)
|
285 |
|
286 |
# Update the main text output when the state changes
|
287 |
state.change(
|
288 |
fn=lambda s: s,
|
289 |
inputs=[state],
|
290 |
-
outputs=[text_output]
|
291 |
-
|
|
|
292 |
|
293 |
-
gr.Markdown("## ๐ Instructions")
|
294 |
-
gr.Markdown("""
|
295 |
### Real-time Transcription:
|
296 |
1. Select an ASR model from the dropdown menu
|
297 |
2. Click 'Load Selected Model' to load the model
|
298 |
3. Click the microphone button to start recording
|
299 |
4. Speak clearly into your microphone
|
300 |
5. The transcription will appear in real-time
|
301 |
-
6. Click 'Clear Transcript' to
|
302 |
|
303 |
### File Transcription:
|
304 |
1. Select an ASR model from the dropdown menu
|
@@ -307,6 +331,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
307 |
4. Record audio by clicking the microphone button or upload an existing audio file
|
308 |
5. Click 'Transcribe Audio File' to process the recording
|
309 |
6. The complete transcription will appear in the text box
|
|
|
310 |
""")
|
311 |
|
312 |
# Launch the app
|
|
|
159 |
return "No audio file provided. Please upload an audio file."
|
160 |
|
161 |
try:
|
162 |
+
global model
|
163 |
model = load_model(model_name)
|
164 |
|
165 |
print(f"Processing file: {audio_file}")
|
|
|
210 |
label="Speak into your microphone"
|
211 |
)
|
212 |
|
213 |
+
clear_btn = gr.Button("Clear Transcript", variant="secondary")
|
214 |
|
215 |
with gr.Column(scale=3):
|
216 |
text_output = gr.Textbox(
|
|
|
233 |
type="filepath",
|
234 |
label="Record or upload audio file"
|
235 |
)
|
236 |
+
with gr.Row():
|
237 |
+
transcribe_btn = gr.Button("Transcribe Audio File", variant="primary")
|
238 |
+
clear_file_btn = gr.Button("Clear Transcript", variant="secondary")
|
239 |
|
240 |
with gr.Column(scale=3):
|
241 |
file_transcription = gr.Textbox(
|
|
|
284 |
fn=transcribe_file,
|
285 |
inputs=[audio_recorder, model_dropdown],
|
286 |
outputs=[file_transcription]
|
287 |
+
) # Clear the real-time transcription
|
288 |
+
def clear_transcription():
|
289 |
+
print("Clearing real-time transcription")
|
290 |
+
return "", "", None, 0 # Clear state, streaming_text, audio_buffer, and last_processed_time
|
291 |
+
|
292 |
+
# Clear the file transcription
|
293 |
+
def clear_file_transcription():
|
294 |
+
print("Clearing file transcription")
|
295 |
+
return "" # Clear file_transcription
|
296 |
+
|
297 |
+
# Set up clear button event handlers
|
298 |
+
clear_btn.click(
|
299 |
+
fn=clear_transcription,
|
300 |
+
inputs=[],
|
301 |
+
outputs=[state, streaming_text, audio_buffer, last_processed_time]
|
302 |
+
)
|
303 |
+
|
304 |
+
clear_file_btn.click(
|
305 |
+
fn=clear_file_transcription,
|
306 |
+
inputs=[],
|
307 |
+
outputs=[file_transcription]
|
308 |
)
|
309 |
|
310 |
# Update the main text output when the state changes
|
311 |
state.change(
|
312 |
fn=lambda s: s,
|
313 |
inputs=[state],
|
314 |
+
outputs=[text_output] )
|
315 |
+
|
316 |
+
|
317 |
|
318 |
+
gr.Markdown("## ๐ Instructions")gr.Markdown("""
|
|
|
319 |
### Real-time Transcription:
|
320 |
1. Select an ASR model from the dropdown menu
|
321 |
2. Click 'Load Selected Model' to load the model
|
322 |
3. Click the microphone button to start recording
|
323 |
4. Speak clearly into your microphone
|
324 |
5. The transcription will appear in real-time
|
325 |
+
6. Click 'Clear Transcript' to reset the transcription
|
326 |
|
327 |
### File Transcription:
|
328 |
1. Select an ASR model from the dropdown menu
|
|
|
331 |
4. Record audio by clicking the microphone button or upload an existing audio file
|
332 |
5. Click 'Transcribe Audio File' to process the recording
|
333 |
6. The complete transcription will appear in the text box
|
334 |
+
7. Click 'Clear Transcript' to reset the file transcription
|
335 |
""")
|
336 |
|
337 |
# Launch the app
|