reallynicejam commited on
Commit
fa084e8
·
verified ·
1 Parent(s): 37f6be6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -1,3 +1,22 @@
1
  import gradio as gr
 
2
 
3
- gr.load("models/facebook/xm_transformer_s2ut_hk-en").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
3
 
4
+ # Load the ASR model and processor
5
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
6
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
7
+
8
+ def transcribe(audio):
9
+ # Process the audio and transcribe using the Hugging Face ASR model
10
+ input_values = processor(audio, return_tensors="pt", sampling_rate=16000).input_values
11
+ logits = model(input_values).logits
12
+ predicted_ids = torch.argmax(logits, dim=-1)
13
+ transcription = processor.batch_decode(predicted_ids)
14
+
15
+ return transcription[0]
16
+
17
+ gr.Interface(
18
+ fn=transcribe,
19
+ inputs=gr.Audio(source="microphone", type="filepath"),
20
+ outputs="text",
21
+ live=True
22
+ ).launch()