KDM999 commited on
Commit
b6c2d8b
·
verified ·
1 Parent(s): 9b1fd29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -11
app.py CHANGED
@@ -20,10 +20,15 @@ accents = sorted(set(entry["accent"] for entry in data))
20
 
21
  # Load ASR pipelines
22
  device = 0
23
- pipe_whisper_medium = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device, generate_kwargs={"language": "en"})
24
- pipe_whisper_base = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device, generate_kwargs={"language": "en"})
25
- pipe_whisper_tiny = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device, generate_kwargs={"language": "en"})
 
 
 
 
26
  pipe_wav2vec2_base_960h = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=device)
 
27
  pipe_hubert_large_ls960_ft = pipeline("automatic-speech-recognition", model="facebook/hubert-large-ls960-ft", device=device)
28
 
29
  # Functions
@@ -84,10 +89,15 @@ def transcribe_audio(file_path):
84
 
85
  outputs = {}
86
  models = {
87
- "openai/whisper-medium": pipe_whisper_medium,
88
- "openai/whisper-base": pipe_whisper_base,
89
  "openai/whisper-tiny": pipe_whisper_tiny,
 
 
 
 
 
 
90
  "facebook/wav2vec2-base-960h": pipe_wav2vec2_base_960h,
 
91
  "facebook/hubert-large-ls960-ft": pipe_hubert_large_ls960_ft,
92
  }
93
 
@@ -122,10 +132,16 @@ with gr.Blocks() as demo:
122
 
123
  transcribe_btn = gr.Button("Transcribe with All Models")
124
  gold_text = gr.Textbox(label="Reference (Gold Standard)")
125
- whisper_medium_html = gr.HTML(label="Whisper Medium")
126
- whisper_base_html = gr.HTML(label="Whisper Base")
127
  whisper_tiny_html = gr.HTML(label="Whisper Tiny")
128
- wav2vec_html = gr.HTML(label="Wav2Vec2 Base")
 
 
 
 
 
 
 
129
  hubert_html = gr.HTML(label="HuBERT Large")
130
 
131
  transcribe_btn.click(
@@ -133,10 +149,15 @@ with gr.Blocks() as demo:
133
  inputs=[file_path_output],
134
  outputs=[
135
  gold_text,
136
- whisper_medium_html,
137
- whisper_base_html,
138
  whisper_tiny_html,
139
- wav2vec_html,
 
 
 
 
 
 
 
140
  hubert_html,
141
  ],
142
  )
 
20
 
21
  # Load ASR pipelines
22
  device = 0
23
+ pipe_whisper_tiny = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
24
+ pipe_whisper_tiny_en = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=device)
25
+ pipe_whisper_base = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
26
+ pipe_whisper_base_en = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=device)
27
+ pipe_whisper_medium = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device)
28
+ pipe_whisper_medium_en = pipeline("automatic-speech-recognition", model="openai/whisper-medium.en", device=device)
29
+ pipe_distil_whisper_large = pipeline("automatic-speech-recognition", model="distil-whisper/distil-large-v3.5", device=device)
30
  pipe_wav2vec2_base_960h = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=device)
31
+ pipe_wav2vec2_large_960h = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h", device=device)
32
  pipe_hubert_large_ls960_ft = pipeline("automatic-speech-recognition", model="facebook/hubert-large-ls960-ft", device=device)
33
 
34
  # Functions
 
89
 
90
  outputs = {}
91
  models = {
 
 
92
  "openai/whisper-tiny": pipe_whisper_tiny,
93
+ "openai/whisper-tiny.en": pipe_whisper_tiny_en,
94
+ "openai/whisper-base": pipe_whisper_base,
95
+ "openai/whisper-base.en": pipe_whisper_base_en,
96
+ "openai/whisper-medium": pipe_whisper_medium,
97
+ "openai/whisper-medium.en": pipe_whisper_medium_en,
98
+ "distil-whisper/distil-large-v3.5": pipe_distil_whisper_large,
99
  "facebook/wav2vec2-base-960h": pipe_wav2vec2_base_960h,
100
+ "facebook/wav2vec2-large-960h": pipe_wav2vec2_large_960h,
101
  "facebook/hubert-large-ls960-ft": pipe_hubert_large_ls960_ft,
102
  }
103
 
 
132
 
133
  transcribe_btn = gr.Button("Transcribe with All Models")
134
  gold_text = gr.Textbox(label="Reference (Gold Standard)")
135
+
 
136
  whisper_tiny_html = gr.HTML(label="Whisper Tiny")
137
+ whisper_tiny_en_html = gr.HTML(label="Whisper Tiny English")
138
+ whisper_base_html = gr.HTML(label="Whisper Base")
139
+ whisper_base_en_html = gr.HTML(label="Whisper Base English")
140
+ whisper_medium_html = gr.HTML(label="Whisper Medium")
141
+ whisper_medium_en_html = gr.HTML(label="Whisper Medium English")
142
+ distil_html = gr.HTML(label="Distil-Whisper Large")
143
+ wav2vec_base_html = gr.HTML(label="Wav2Vec2 Base")
144
+ wav2vec_large_html = gr.HTML(label="Wav2Vec2 Large")
145
  hubert_html = gr.HTML(label="HuBERT Large")
146
 
147
  transcribe_btn.click(
 
149
  inputs=[file_path_output],
150
  outputs=[
151
  gold_text,
 
 
152
  whisper_tiny_html,
153
+ whisper_tiny_en_html,
154
+ whisper_base_html,
155
+ whisper_base_en_html,
156
+ whisper_medium_html,
157
+ whisper_medium_en_html,
158
+ distil_html,
159
+ wav2vec_base_html,
160
+ wav2vec_large_html,
161
  hubert_html,
162
  ],
163
  )