pheodoraa commited on
Commit
b8590a9
·
verified ·
1 Parent(s): 5c85c67

Update 3 app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -1,23 +1,47 @@
1
  import gradio as gr
2
  import torch
3
- from speechbrain.pretrained import EncoderASR
4
  import torchaudio
 
5
 
6
- # Charger le modèle
7
- asr_model = EncoderASR.from_hparams(source="speechbrain/asr-wav2vec2-dvoice-darija", savedir="tmp_model")
 
 
 
 
 
 
 
8
 
9
  def transcribe(audio):
10
- waveform, sample_rate = torchaudio.load(audio)
11
- transcription = asr_model.transcribe_batch(waveform)
12
- return transcription[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Interface Gradio
15
  iface = gr.Interface(
16
  fn=transcribe,
17
- inputs = gr.Audio(type="filepath"),
18
  outputs="text",
19
  title="Reconnaissance Vocale Darija",
20
  description="Parlez en Darija et obtenez la transcription."
21
  )
22
 
23
- iface.launch()
 
 
 
1
  import gradio as gr
2
  import torch
 
3
  import torchaudio
4
+ from speechbrain.pretrained import EncoderASR
5
 
6
+ # Load the model
7
+ try:
8
+ asr_model = EncoderASR.from_hparams(
9
+ source="speechbrain/asr-wav2vec2-dvoice-darija",
10
+ savedir="tmp_model",
11
+ run_opts={"device": "cpu"} # Ensure compatibility with CPU if needed
12
+ )
13
+ except Exception as e:
14
+ print(f"Error loading model: {str(e)}")
15
 
16
  def transcribe(audio):
17
+ """Transcribe audio to text using SpeechBrain ASR model."""
18
+ if audio is None:
19
+ return "No audio file uploaded. Please upload a valid file."
20
+
21
+ try:
22
+ # Load audio
23
+ waveform, sample_rate = torchaudio.load(audio)
24
+
25
+ # Ensure correct sample rate (16kHz expected)
26
+ if sample_rate != 16000:
27
+ waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
28
+
29
+ # Transcribe
30
+ transcription = asr_model.transcribe_batch(waveform)
31
+ return transcription[0]
32
+
33
+ except Exception as e:
34
+ return f"Error processing audio: {str(e)}"
35
 
36
+ # Create Gradio Interface
37
  iface = gr.Interface(
38
  fn=transcribe,
39
+ inputs=gr.Audio(type="filepath"),
40
  outputs="text",
41
  title="Reconnaissance Vocale Darija",
42
  description="Parlez en Darija et obtenez la transcription."
43
  )
44
 
45
+ # Launch the app
46
+ if __name__ == "__main__":
47
+ iface.launch()