razhan commited on
Commit
0239856
·
verified ·
1 Parent(s): 9040397

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -223
app.py CHANGED
@@ -1,201 +1,24 @@
1
 
2
 
3
 
4
- # import spaces
5
- # import torch
6
-
7
- # import gradio as gr
8
- # import yt_dlp as youtube_dl
9
- # from pytubefix import YouTube
10
- # from pytubefix.cli import on_progress
11
-
12
- # from transformers import pipeline
13
- # from transformers.pipelines.audio_utils import ffmpeg_read
14
-
15
- # import tempfile
16
- # import os
17
-
18
- # MODEL_NAME = "razhan/whisper-base-hawrami-transcription"
19
- # BATCH_SIZE = 1
20
- # FILE_LIMIT_MB = 10
21
- # YT_LENGTH_LIMIT_S = 60 * 10 # limit to 1 hour YouTube files
22
-
23
- # device = 0 if torch.cuda.is_available() else "cpu"
24
-
25
- # pipe = pipeline(
26
- # task="automatic-speech-recognition",
27
- # model=MODEL_NAME,
28
- # chunk_length_s=30,
29
- # device=device,
30
- # )
31
-
32
-
33
- # # @spaces.GPU
34
- # def transcribe(inputs, task="transcribe"):
35
- # if inputs is None:
36
- # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
37
-
38
- # text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
39
- # return text
40
-
41
-
42
- # def _return_yt_html_embed(yt_url):
43
- # video_id = yt_url.split("?v=")[-1]
44
- # HTML_str = (
45
- # f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
46
- # " </center>"
47
- # )
48
- # return HTML_str
49
-
50
- # # def download_yt_audio(yt_url, filename):
51
- # # info_loader = youtube_dl.YoutubeDL()
52
-
53
- # # try:
54
- # # info = info_loader.extract_info(yt_url, download=False)
55
- # # except youtube_dl.utils.DownloadError as err:
56
- # # raise gr.Error(str(err))
57
-
58
- # # file_length = info["duration_string"]
59
- # # file_h_m_s = file_length.split(":")
60
- # # file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
61
-
62
- # # if len(file_h_m_s) == 1:
63
- # # file_h_m_s.insert(0, 0)
64
- # # if len(file_h_m_s) == 2:
65
- # # file_h_m_s.insert(0, 0)
66
- # # file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
67
-
68
- # # if file_length_s > YT_LENGTH_LIMIT_S:
69
- # # yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
70
- # # file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
71
- # # raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
72
-
73
- # # ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
74
-
75
- # # with youtube_dl.YoutubeDL(ydl_opts) as ydl:
76
- # # try:
77
- # # ydl.download([yt_url])
78
- # # except youtube_dl.utils.ExtractorError as err:
79
- # # raise gr.Error(str(err))
80
- # # yt = pt.YouTube(yt_url)
81
- # # stream = yt.streams.filter(only_audio=True)[0]
82
- # # stream.download(filename=filename)
83
-
84
- # # @spaces.GPU
85
- # # def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
86
- # # html_embed_str = _return_yt_html_embed(yt_url)
87
-
88
- # # with tempfile.TemporaryDirectory() as tmpdirname:
89
- # # # filepath = os.path.join(tmpdirname, "video.mp4")
90
- # # filepath = os.path.join(tmpdirname, "audio.mp3")
91
- # # download_yt_audio(yt_url, filepath)
92
- # # with open(filepath, "rb") as f:
93
- # # inputs = f.read()
94
-
95
- # # inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
96
- # # inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
97
-
98
- # # text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
99
-
100
- # # return html_embed_str, text
101
-
102
-
103
- # def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
104
- # progress(0, desc="Loading audio file...")
105
- # html_embed_str = _return_yt_html_embed(yt_url)
106
- # try:
107
- # # yt = pytube.YouTube(yt_url)
108
- # # stream = yt.streams.filter(only_audio=True)[0]
109
- # yt = YouTube(yt_url, on_progress_callback = on_progress, use_po_token=True)
110
-
111
- # stream = yt.streams.get_audio_only()
112
-
113
- # except:
114
- # raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
115
-
116
- # if stream.filesize_mb > max_filesize:
117
- # raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
118
-
119
- # # stream.download(filename="audio.mp3")
120
- # stream.download(filename="audio.mp3", mp3=True)
121
-
122
- # with open("audio.mp3", "rb") as f:
123
- # inputs = f.read()
124
-
125
- # inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
126
- # inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
127
- # text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
128
- # return html_embed_str, text
129
-
130
-
131
- # demo = gr.Blocks(theme=gr.themes.Ocean())
132
-
133
- # mf_transcribe = gr.Interface(
134
- # fn=transcribe,
135
- # inputs=[
136
- # gr.Audio(sources="microphone", type="filepath"),
137
- # # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
138
- # ],
139
- # outputs="text",
140
- # title="Whisper Horami Demo: Transcribe Audio",
141
- # description=(
142
- # "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
143
- # f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
144
- # " of arbitrary length."
145
- # ),
146
- # flagging_mode="never",
147
- # )
148
-
149
- # file_transcribe = gr.Interface(
150
- # fn=transcribe,
151
- # inputs=[
152
- # gr.Audio(sources="upload", type="filepath", label="Audio file"),
153
- # # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
154
- # ],
155
- # outputs="text",
156
- # title="Whisper Horami Demo: Transcribe Audio",
157
- # description=(
158
- # "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
159
- # f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
160
- # " of arbitrary length."
161
- # ),
162
- # flagging_mode="never",
163
- # )
164
-
165
- # yt_transcribe = gr.Interface(
166
- # fn=yt_transcribe,
167
- # inputs=[
168
- # gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
169
- # # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
170
- # ],
171
- # outputs=["html", "text"],
172
- # title="Whisper Horami Demo: Translate YouTube",
173
- # description=(
174
- # "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
175
- # f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
176
- # " arbitrary length."
177
- # ),
178
- # flagging_mode="never",
179
- # )
180
-
181
- # with demo:
182
- # # gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
183
- # gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
184
-
185
- # demo.queue().launch(ssr_mode=False)
186
-
187
  import spaces
188
  import torch
 
189
  import gradio as gr
 
190
  from pytubefix import YouTube
191
  from pytubefix.cli import on_progress
 
192
  from transformers import pipeline
193
  from transformers.pipelines.audio_utils import ffmpeg_read
 
194
  import tempfile
195
  import os
196
 
197
  MODEL_NAME = "razhan/whisper-base-hawrami-transcription"
198
  BATCH_SIZE = 1
 
 
199
 
200
  device = 0 if torch.cuda.is_available() else "cpu"
201
 
@@ -206,83 +29,260 @@ pipe = pipeline(
206
  device=device,
207
  )
208
 
 
 
209
  def transcribe(inputs, task="transcribe"):
210
  if inputs is None:
211
- raise gr.Error("Please upload or record an audio file before submitting.")
 
 
 
212
 
213
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
214
- return result["text"]
215
 
216
  def _return_yt_html_embed(yt_url):
217
  video_id = yt_url.split("?v=")[-1]
218
- return f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
 
 
 
 
219
 
220
- def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress()):
221
- progress(0, desc="Loading audio file...")
222
- html_embed = _return_yt_html_embed(yt_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  try:
225
- yt = YouTube(yt_url, on_progress_callback=on_progress, use_po_token=True)
 
 
 
226
  stream = yt.streams.get_audio_only()
227
- except Exception as e:
228
- raise gr.Error(f"Error loading YouTube video: {str(e)}")
229
-
230
- with tempfile.TemporaryDirectory() as tmpdir:
231
- file_path = os.path.join(tmpdir, "audio.mp3")
232
- stream.download(filename=file_path)
233
 
234
- with open(file_path, "rb") as f:
235
- audio_data = f.read()
236
 
237
- audio = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
238
- inputs = {"array": audio, "sampling_rate": pipe.feature_extractor.sampling_rate}
239
-
240
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
241
- return html_embed, result["text"]
242
 
243
- demo = gr.Blocks(theme=gr.themes.Ocean())
 
 
 
 
244
 
245
- common_inputs = [
246
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
247
- ]
 
 
 
 
248
 
249
  mf_transcribe = gr.Interface(
250
  fn=transcribe,
251
  inputs=[
252
  gr.Audio(sources="microphone", type="filepath"),
253
- *common_inputs
254
  ],
255
  outputs="text",
256
- title="Whisper Horami: Live Transcription",
257
- description="Transcribe audio from your microphone in real-time"
 
 
 
 
 
258
  )
259
 
260
  file_transcribe = gr.Interface(
261
  fn=transcribe,
262
  inputs=[
263
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
264
- *common_inputs
265
  ],
266
  outputs="text",
267
- title="Whisper Horami: File Transcription",
268
- description="Upload an audio file for transcription"
 
 
 
 
 
269
  )
270
 
271
- yt_interface = gr.Interface(
272
  fn=yt_transcribe,
273
  inputs=[
274
- gr.Textbox(placeholder="YouTube URL", label="Video URL"),
275
- *common_inputs
276
  ],
277
  outputs=["html", "text"],
278
- title="Whisper Horami: YouTube Transcription",
279
- description="Transcribe audio from YouTube videos"
 
 
 
 
 
280
  )
281
 
282
  with demo:
283
- gr.TabbedInterface(
284
- [mf_transcribe, file_transcribe],
285
- ["Microphone", "Audio File",]
286
- )
287
 
288
  demo.queue().launch(ssr_mode=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import spaces
5
  import torch
6
+
7
  import gradio as gr
8
+ import yt_dlp as youtube_dl
9
  from pytubefix import YouTube
10
  from pytubefix.cli import on_progress
11
+
12
  from transformers import pipeline
13
  from transformers.pipelines.audio_utils import ffmpeg_read
14
+
15
  import tempfile
16
  import os
17
 
18
  MODEL_NAME = "razhan/whisper-base-hawrami-transcription"
19
  BATCH_SIZE = 1
20
+ FILE_LIMIT_MB = 10
21
+ YT_LENGTH_LIMIT_S = 60 * 10 # limit to 1 hour YouTube files
22
 
23
  device = 0 if torch.cuda.is_available() else "cpu"
24
 
 
29
  device=device,
30
  )
31
 
32
+
33
+ # @spaces.GPU
34
  def transcribe(inputs, task="transcribe"):
35
  if inputs is None:
36
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
37
+
38
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
39
+ return text
40
 
 
 
41
 
42
  def _return_yt_html_embed(yt_url):
43
  video_id = yt_url.split("?v=")[-1]
44
+ HTML_str = (
45
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
46
+ " </center>"
47
+ )
48
+ return HTML_str
49
 
50
+ # def download_yt_audio(yt_url, filename):
51
+ # info_loader = youtube_dl.YoutubeDL()
52
+
53
+ # try:
54
+ # info = info_loader.extract_info(yt_url, download=False)
55
+ # except youtube_dl.utils.DownloadError as err:
56
+ # raise gr.Error(str(err))
57
+
58
+ # file_length = info["duration_string"]
59
+ # file_h_m_s = file_length.split(":")
60
+ # file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
61
+
62
+ # if len(file_h_m_s) == 1:
63
+ # file_h_m_s.insert(0, 0)
64
+ # if len(file_h_m_s) == 2:
65
+ # file_h_m_s.insert(0, 0)
66
+ # file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
67
+
68
+ # if file_length_s > YT_LENGTH_LIMIT_S:
69
+ # yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
70
+ # file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
71
+ # raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
72
 
73
+ # ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
74
+
75
+ # with youtube_dl.YoutubeDL(ydl_opts) as ydl:
76
+ # try:
77
+ # ydl.download([yt_url])
78
+ # except youtube_dl.utils.ExtractorError as err:
79
+ # raise gr.Error(str(err))
80
+ # yt = pt.YouTube(yt_url)
81
+ # stream = yt.streams.filter(only_audio=True)[0]
82
+ # stream.download(filename=filename)
83
+
84
+ # @spaces.GPU
85
+ # def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
86
+ # html_embed_str = _return_yt_html_embed(yt_url)
87
+
88
+ # with tempfile.TemporaryDirectory() as tmpdirname:
89
+ # # filepath = os.path.join(tmpdirname, "video.mp4")
90
+ # filepath = os.path.join(tmpdirname, "audio.mp3")
91
+ # download_yt_audio(yt_url, filepath)
92
+ # with open(filepath, "rb") as f:
93
+ # inputs = f.read()
94
+
95
+ # inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
96
+ # inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
97
+
98
+ # text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
99
+
100
+ # return html_embed_str, text
101
+
102
+
103
+ def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
104
+ progress(0, desc="Loading audio file...")
105
+ html_embed_str = _return_yt_html_embed(yt_url)
106
  try:
107
+ # yt = pytube.YouTube(yt_url)
108
+ # stream = yt.streams.filter(only_audio=True)[0]
109
+ yt = YouTube(yt_url, on_progress_callback = on_progress, use_po_token=True)
110
+
111
  stream = yt.streams.get_audio_only()
 
 
 
 
 
 
112
 
113
+ except:
114
+ raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
115
 
116
+ if stream.filesize_mb > max_filesize:
117
+ raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
 
 
 
118
 
119
+ # stream.download(filename="audio.mp3")
120
+ stream.download(filename="audio.mp3", mp3=True)
121
+
122
+ with open("audio.mp3", "rb") as f:
123
+ inputs = f.read()
124
 
125
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
126
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
127
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
128
+ return html_embed_str, text
129
+
130
+
131
+ demo = gr.Blocks(theme=gr.themes.Ocean())
132
 
133
  mf_transcribe = gr.Interface(
134
  fn=transcribe,
135
  inputs=[
136
  gr.Audio(sources="microphone", type="filepath"),
137
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
138
  ],
139
  outputs="text",
140
+ title="Whisper Horami Demo: Transcribe Audio",
141
+ description=(
142
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
143
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
144
+ " of arbitrary length."
145
+ ),
146
+ flagging_mode="never",
147
  )
148
 
149
  file_transcribe = gr.Interface(
150
  fn=transcribe,
151
  inputs=[
152
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
153
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
154
  ],
155
  outputs="text",
156
+ title="Whisper Horami Demo: Transcribe Audio",
157
+ description=(
158
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
159
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
160
+ " of arbitrary length."
161
+ ),
162
+ flagging_mode="never",
163
  )
164
 
165
+ yt_transcribe = gr.Interface(
166
  fn=yt_transcribe,
167
  inputs=[
168
+ gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
169
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
170
  ],
171
  outputs=["html", "text"],
172
+ title="Whisper Horami Demo: Translate YouTube",
173
+ description=(
174
+ "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
175
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
176
+ " arbitrary length."
177
+ ),
178
+ flagging_mode="never",
179
  )
180
 
181
  with demo:
182
+ # gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
183
+ gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 
 
184
 
185
  demo.queue().launch(ssr_mode=False)
186
+
187
+ # import spaces
188
+ # import torch
189
+ # import gradio as gr
190
+ # from pytubefix import YouTube
191
+ # from pytubefix.cli import on_progress
192
+ # from transformers import pipeline
193
+ # from transformers.pipelines.audio_utils import ffmpeg_read
194
+ # import tempfile
195
+ # import os
196
+
197
+ # MODEL_NAME = "razhan/whisper-base-hawrami-transcription"
198
+ # BATCH_SIZE = 1
199
+
200
+ # device = 0 if torch.cuda.is_available() else "cpu"
201
+
202
+ # pipe = pipeline(
203
+ # task="automatic-speech-recognition",
204
+ # model=MODEL_NAME,
205
+ # chunk_length_s=30,
206
+ # device=device,
207
+ # )
208
+
209
+ # def transcribe(inputs, task="transcribe"):
210
+ # if inputs is None:
211
+ # raise gr.Error("Please upload or record an audio file before submitting.")
212
+
213
+ # result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
214
+ # return result["text"]
215
+
216
+ # def _return_yt_html_embed(yt_url):
217
+ # video_id = yt_url.split("?v=")[-1]
218
+ # return f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
219
+
220
+ # def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress()):
221
+ # progress(0, desc="Loading audio file...")
222
+ # html_embed = _return_yt_html_embed(yt_url)
223
+
224
+ # try:
225
+ # yt = YouTube(yt_url, on_progress_callback=on_progress, use_po_token=True)
226
+ # stream = yt.streams.get_audio_only()
227
+ # except Exception as e:
228
+ # raise gr.Error(f"Error loading YouTube video: {str(e)}")
229
+
230
+ # with tempfile.TemporaryDirectory() as tmpdir:
231
+ # file_path = os.path.join(tmpdir, "audio.mp3")
232
+ # stream.download(filename=file_path)
233
+
234
+ # with open(file_path, "rb") as f:
235
+ # audio_data = f.read()
236
+
237
+ # audio = ffmpeg_read(audio_data, pipe.feature_extractor.sampling_rate)
238
+ # inputs = {"array": audio, "sampling_rate": pipe.feature_extractor.sampling_rate}
239
+
240
+ # result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
241
+ # return html_embed, result["text"]
242
+
243
+ # demo = gr.Blocks(theme=gr.themes.Ocean())
244
+
245
+ # common_inputs = [
246
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
247
+ # ]
248
+
249
+ # mf_transcribe = gr.Interface(
250
+ # fn=transcribe,
251
+ # inputs=[
252
+ # gr.Audio(sources="microphone", type="filepath"),
253
+ # *common_inputs
254
+ # ],
255
+ # outputs="text",
256
+ # title="Whisper Horami: Live Transcription",
257
+ # description="Transcribe audio from your microphone in real-time"
258
+ # )
259
+
260
+ # file_transcribe = gr.Interface(
261
+ # fn=transcribe,
262
+ # inputs=[
263
+ # gr.Audio(sources="upload", type="filepath", label="Audio file"),
264
+ # *common_inputs
265
+ # ],
266
+ # outputs="text",
267
+ # title="Whisper Horami: File Transcription",
268
+ # description="Upload an audio file for transcription"
269
+ # )
270
+
271
+ # yt_interface = gr.Interface(
272
+ # fn=yt_transcribe,
273
+ # inputs=[
274
+ # gr.Textbox(placeholder="YouTube URL", label="Video URL"),
275
+ # *common_inputs
276
+ # ],
277
+ # outputs=["html", "text"],
278
+ # title="Whisper Horami: YouTube Transcription",
279
+ # description="Transcribe audio from YouTube videos"
280
+ # )
281
+
282
+ # with demo:
283
+ # gr.TabbedInterface(
284
+ # [mf_transcribe, file_transcribe],
285
+ # ["Microphone", "Audio File",]
286
+ # )
287
+
288
+ # demo.queue().launch(ssr_mode=False)