FrexG commited on
Commit
4bb6cd4
·
1 Parent(s): 46de166

support for mp3 audio

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -2,17 +2,19 @@ import gradio as gr
2
  import torch
3
  import torchaudio
4
  import torchaudio.functional as AF
 
5
  from asr import Transcribe
6
 
7
 
8
  def transcribe(audio_file, lang_id: str):
9
- print(f"audio_file={audio_file}")
10
- print(lang_id)
11
  freq = 16000
12
  # Return the transcript.
13
  transcript = ""
14
  # load the auido file to tensor
15
- waveform, orig_freq = torchaudio.load(audio_file.name)
 
 
 
16
  # resample audio to 16Khz
17
  if orig_freq != freq:
18
  waveform = AF.resample(waveform, orig_freq, freq)
@@ -21,7 +23,7 @@ def transcribe(audio_file, lang_id: str):
21
 
22
  if __name__ == "__main__":
23
  transcriber = Transcribe()
24
- inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"])]
25
  outputs = [
26
  gr.Textbox(label="Transcript"),
27
  gr.Audio(label="Audio", type="filepath"),
 
2
  import torch
3
  import torchaudio
4
  import torchaudio.functional as AF
5
+ from pydub import AudioSegment
6
  from asr import Transcribe
7
 
8
 
9
  def transcribe(audio_file, lang_id: str):
 
 
10
  freq = 16000
11
  # Return the transcript.
12
  transcript = ""
13
  # load the auido file to tensor
14
+ audio = AudioSegment.from_file(audio_file.name)
15
+ orig_freq = audio.frame_rate
16
+ waveform = torch.tensor(audio.get_array_of_samples())
17
+ waveform = (waveform / waveform.max()).unsqueeze(0)
18
  # resample audio to 16Khz
19
  if orig_freq != freq:
20
  waveform = AF.resample(waveform, orig_freq, freq)
 
23
 
24
  if __name__ == "__main__":
25
  transcriber = Transcribe()
26
+ inputs = [gr.File(), gr.Dropdown(choices=["amh", "orm", "som"], value=["amh"])]
27
  outputs = [
28
  gr.Textbox(label="Transcript"),
29
  gr.Audio(label="Audio", type="filepath"),