Spaces:
Runtime error
Runtime error
support for mp3 audio
Browse files
app.py
CHANGED
@@ -2,17 +2,19 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import torchaudio
|
4 |
import torchaudio.functional as AF
|
|
|
5 |
from asr import Transcribe
|
6 |
|
7 |
|
8 |
def transcribe(audio_file, lang_id: str):
|
9 |
-
print(f"audio_file={audio_file}")
|
10 |
-
print(lang_id)
|
11 |
freq = 16000
|
12 |
# Return the transcript.
|
13 |
transcript = ""
|
14 |
# load the auido file to tensor
|
15 |
-
|
|
|
|
|
|
|
16 |
# resample audio to 16Khz
|
17 |
if orig_freq != freq:
|
18 |
waveform = AF.resample(waveform, orig_freq, freq)
|
@@ -21,7 +23,7 @@ def transcribe(audio_file, lang_id: str):
|
|
21 |
|
22 |
if __name__ == "__main__":
|
23 |
transcriber = Transcribe()
|
24 |
-
inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"])]
|
25 |
outputs = [
|
26 |
gr.Textbox(label="Transcript"),
|
27 |
gr.Audio(label="Audio", type="filepath"),
|
|
|
2 |
import torch
|
3 |
import torchaudio
|
4 |
import torchaudio.functional as AF
|
5 |
+
from pydub import AudioSegment
|
6 |
from asr import Transcribe
|
7 |
|
8 |
|
9 |
def transcribe(audio_file, lang_id: str):
|
|
|
|
|
10 |
freq = 16000
|
11 |
# Return the transcript.
|
12 |
transcript = ""
|
13 |
# load the auido file to tensor
|
14 |
+
audio = AudioSegment.from_file(audio_file.name)
|
15 |
+
orig_freq = audio.frame_rate
|
16 |
+
waveform = torch.tensor(audio.get_array_of_samples())
|
17 |
+
waveform = (waveform / waveform.max()).unsqueeze(0)
|
18 |
# resample audio to 16Khz
|
19 |
if orig_freq != freq:
|
20 |
waveform = AF.resample(waveform, orig_freq, freq)
|
|
|
23 |
|
24 |
if __name__ == "__main__":
|
25 |
transcriber = Transcribe()
|
26 |
+
inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"], value=["amh"])]
|
27 |
outputs = [
|
28 |
gr.Textbox(label="Transcript"),
|
29 |
gr.Audio(label="Audio", type="filepath"),
|