camparchimedes commited on
Commit
c5571fa
·
verified ·
1 Parent(s): 889bcb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -17,7 +17,7 @@ import spaces
17
  import gradio as gr
18
  from PIL import Image
19
  #from pydub import AudioSegment
20
- from scipy.io import wavfile
21
 
22
  import os
23
  import re
@@ -26,6 +26,7 @@ import warnings
26
  #import datetime
27
  import subprocess
28
  from pathlib import Path
 
29
  from fpdf import FPDF
30
 
31
  import psutil
@@ -69,23 +70,26 @@ def convert_to_wav(filepath):
69
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
70
 
71
  @spaces.GPU()
72
- def transcribe_audio(filepath, batch_size=16, sample_rate=16000):
73
-
74
- waveform, sample_rate = torchaudio.load(filepath)
 
 
 
 
 
 
 
 
75
 
76
- # --convert to mono
77
- if waveform.ndim > 1:
78
- waveform = waveform[0, :]
79
-
80
- # --waveform to ndnumpy array
81
- waveform = waveform.numpy()
82
 
83
- start_time = time.time()
84
-
 
85
 
86
  # --pipe it
87
  with torch.no_grad():
88
- outputs = pipe(waveform, sampling_rate=sample_rate, batch_size=batch_size, return_timestamps=False)
89
 
90
  end_time = time.time()
91
 
 
17
  import gradio as gr
18
  from PIL import Image
19
  #from pydub import AudioSegment
20
+ #from scipy.io import wavfile
21
 
22
  import os
23
  import re
 
26
  #import datetime
27
  import subprocess
28
  from pathlib import Path
29
+ import tempfile
30
  from fpdf import FPDF
31
 
32
  import psutil
 
70
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
71
 
72
  @spaces.GPU()
73
+ def transcribe_audio(audio_file, batch_size=16, sample_rate=16000):
74
+
75
+ with tempfile.TemporaryDirectory() as tmpdirname:
76
+ temp_path = Path(tmpdirname) / "audio_file"
77
+
78
+ with open(temp_path, "wb") as f:
79
+ f.write(audio_file.read())
80
+
81
+ waveform, sample_rate = torchaudio.load(str(temp_path))
82
+
83
+ samples = waveform.numpy()
84
 
 
 
 
 
 
 
85
 
86
+ if samples.ndim > 1:
87
+ samples = samples[0, :]
88
+
89
 
90
  # --pipe it
91
  with torch.no_grad():
92
+ outputs = pipe(samples, sampling_rate=sample_rate, batch_size=batch_size, return_timestamps=False)
93
 
94
  end_time = time.time()
95