Spaces:

FrexG
/

MMS-Ethiopian_Language-ASR

Runtime error

FrexG commited on Jun 4, 2023

Commit

4bb6cd4

1 Parent(s): 46de166

support for mp3 audio

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,19 @@ import gradio as gr
 import torch
 import torchaudio
 import torchaudio.functional as AF
 from asr import Transcribe
 def transcribe(audio_file, lang_id: str):
-    print(f"audio_file={audio_file}")
-    print(lang_id)
     freq = 16000
     # Return the transcript.
     transcript = ""
     # load the auido file to tensor
-    waveform, orig_freq = torchaudio.load(audio_file.name)
     # resample audio to 16Khz
     if orig_freq != freq:
         waveform = AF.resample(waveform, orig_freq, freq)
@@ -21,7 +23,7 @@ def transcribe(audio_file, lang_id: str):
 if __name__ == "__main__":
     transcriber = Transcribe()
-    inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"])]
     outputs = [
         gr.Textbox(label="Transcript"),
         gr.Audio(label="Audio", type="filepath"),

 import torch
 import torchaudio
 import torchaudio.functional as AF
+from pydub import AudioSegment
 from asr import Transcribe
 def transcribe(audio_file, lang_id: str):
     freq = 16000
     # Return the transcript.
     transcript = ""
     # load the auido file to tensor
+    audio = AudioSegment.from_file(audio_file.name)
+    orig_freq = audio.frame_rate
+    waveform = torch.tensor(audio.get_array_of_samples())
+    waveform = (waveform / waveform.max()).unsqueeze(0)
     # resample audio to 16Khz
     if orig_freq != freq:
         waveform = AF.resample(waveform, orig_freq, freq)
 if __name__ == "__main__":
     transcriber = Transcribe()
+    inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"], value=["amh"])]
     outputs = [
         gr.Textbox(label="Transcript"),
         gr.Audio(label="Audio", type="filepath"),