Emmanuel08 commited on
Commit
32f1cf7
·
verified ·
1 Parent(s): ac68fd5

This is the test, lets see if this would be perfect.

Files changed (1) hide show
  1. app.py +30 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCTC
3
+ import torch
4
+ import soundfile as sf
5
+
6
+ # Load the FastConformer model and processor
7
+ processor = AutoProcessor.from_pretrained("nvidia/stt_en_fastconformer_hybrid_large_pc")
8
+ model = AutoModelForCTC.from_pretrained("nvidia/stt_en_fastconformer_hybrid_large_pc")
9
+
10
+ # Function to transcribe audio
11
+ def transcribe_audio(audio_file):
12
+ audio_input, sample_rate = sf.read(audio_file)
13
+ inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt")
14
+ with torch.no_grad():
15
+ logits = model(**inputs).logits
16
+ predicted_ids = torch.argmax(logits, dim=-1)
17
+ transcription = processor.batch_decode(predicted_ids)[0]
18
+ return transcription
19
+
20
+ # Create a Gradio interface
21
+ iface = gr.Interface(
22
+ fn=transcribe_audio,
23
+ inputs=gr.Audio(type="filepath"),
24
+ outputs="text",
25
+ title="Real-Time Transcription with FastConformer",
26
+ description="Upload an audio file to transcribe it using NVIDIA FastConformer."
27
+ )
28
+
29
+ # Launch the app
30
+ iface.launch()