siddqamar commited on
Commit
dad4b00
·
verified ·
1 Parent(s): 90ec329

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -19
app.py CHANGED
@@ -1,31 +1,132 @@
 
1
  import gradio as gr
2
- import whisper
 
 
3
  import os
 
4
 
5
- model = whisper.load_model("base")
 
 
 
 
6
 
7
- def transcribe_audio(audio_file):
8
- # Check file size (e.g., 25MB limit)
9
- if os.path.getsize(audio_file.name) > 25 * 1024 * 1024:
10
- return "Error: File size exceeds 25MB limit.", None
11
 
12
- result = model.transcribe(audio_file.name)
13
- output_filename = os.path.splitext(os.path.basename(audio_file.name))[0] + ".txt"
14
-
15
- with open(output_filename, "w") as text_file:
16
- text_file.write(result["text"])
17
-
18
- return result["text"], output_filename
19
 
20
- iface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  fn=transcribe_audio,
22
- inputs=gr.File(label="Upload Audio File (Max 25MB)"),
23
- outputs=[
24
- gr.Textbox(label="Transcription"),
25
- gr.File(label="Download Transcript")
26
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  title="Free Transcript Maker",
28
  description="Upload an audio file (WAV, MP3, etc.) up to 25MB to get its transcription. The transcript will be displayed and available for download. Please use responsibly."
29
  )
30
 
31
- iface.launch(share=True)
 
 
 
 
1
+ import torch
2
  import gradio as gr
3
+ import yt_dlp as youtube_dl
4
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
5
+ import tempfile
6
  import os
7
+ import time
8
 
9
+ # Constants
10
+ MODEL_NAME = "openai/whisper-large-v3"
11
+ BATCH_SIZE = 8
12
+ FILE_LIMIT_MB = 25 # File size limit in MB
13
+ YT_LENGTH_LIMIT_S = 3600 # 1 hour YouTube file limit
14
 
15
+ # Device configuration (CUDA if available)
16
+ device = 0 if torch.cuda.is_available() else "cpu"
 
 
17
 
18
+ # Load Whisper model and processor
19
+ processor = WhisperProcessor.from_pretrained(MODEL_NAME)
20
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
 
 
 
 
21
 
22
+ def transcribe_audio(inputs):
23
+ """Transcribe audio using Whisper model."""
24
+ if inputs is None:
25
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
26
+
27
+ # Check file size (max 25MB)
28
+ if os.path.getsize(inputs) > FILE_LIMIT_MB * 1024 * 1024:
29
+ raise gr.Error(f"File size exceeds {FILE_LIMIT_MB}MB limit.")
30
+
31
+ # Preprocess audio input
32
+ audio_input = processor(inputs, return_tensors="pt", sampling_rate=16000).to(device)
33
+
34
+ # Generate transcription
35
+ predicted_ids = model.generate(audio_input.input_values, max_length=448)
36
+
37
+ # Decode the transcription output
38
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
39
+ return transcription
40
+
41
+ def _return_yt_html_embed(yt_url):
42
+ """Return YouTube embed HTML for display."""
43
+ video_id = yt_url.split("?v=")[-1]
44
+ html_embed = f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
45
+ return html_embed
46
+
47
+ def download_yt_audio(yt_url, filename):
48
+ """Download audio from a YouTube URL."""
49
+ info_loader = youtube_dl.YoutubeDL()
50
+
51
+ try:
52
+ info = info_loader.extract_info(yt_url, download=False)
53
+ except youtube_dl.utils.DownloadError as err:
54
+ raise gr.Error(f"Download error: {str(err)}")
55
+
56
+ # Check video length
57
+ file_length_s = int(info.get("duration", 0))
58
+
59
+ if file_length_s > YT_LENGTH_LIMIT_S:
60
+ yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
61
+ file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
62
+ raise gr.Error(f"Maximum YouTube video length is {yt_length_limit_hms}, but video is {file_length_hms}.")
63
+
64
+ # Download the video
65
+ ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
66
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
67
+ try:
68
+ ydl.download([yt_url])
69
+ except youtube_dl.utils.ExtractorError as err:
70
+ raise gr.Error(f"Error while downloading video: {str(err)}")
71
+
72
+ def yt_transcribe(yt_url):
73
+ """Transcribe YouTube video using Whisper model."""
74
+ html_embed = _return_yt_html_embed(yt_url)
75
+
76
+ with tempfile.TemporaryDirectory() as tmpdirname:
77
+ filepath = os.path.join(tmpdirname, "video.mp4")
78
+ download_yt_audio(yt_url, filepath)
79
+
80
+ with open(filepath, "rb") as file:
81
+ audio_input = file.read()
82
+
83
+ # Process and transcribe
84
+ transcription = transcribe_audio(audio_input)
85
+ return html_embed, transcription
86
+
87
+ # Create Gradio interface
88
+ demo = gr.Blocks()
89
+
90
+ # Microphone transcription interface
91
+ mf_transcribe = gr.Interface(
92
  fn=transcribe_audio,
93
+ inputs=[
94
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True),
 
 
95
  ],
96
+ outputs="text",
97
+ layout="horizontal",
98
+ theme="huggingface",
99
+ title="Whisper Transcription (Microphone)",
100
+ description="Transcribe audio from your microphone. File size limit is 25MB."
101
+ )
102
+
103
+ # File upload transcription interface
104
+ file_transcribe = gr.Interface(
105
+ fn=transcribe_audio,
106
+ inputs=[
107
+ gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
108
+ ],
109
+ outputs="text",
110
+ layout="horizontal",
111
+ theme="huggingface",
112
+ title="Whisper Transcription (File)",
113
+ description="Upload an audio file to transcribe. File size limit is 25MB."
114
+ )
115
+
116
+ # YouTube video transcription interface
117
+ yt_transcribe = gr.Interface(
118
+ fn=yt_transcribe,
119
+ inputs=[
120
+ gr.inputs.Textbox(lines=1, placeholder="Paste YouTube URL", label="YouTube URL"),
121
+ ],
122
+ outputs=["html", "text"],
123
+ layout="horizontal",
124
+ theme="huggingface",
125
  title="Free Transcript Maker",
126
  description="Upload an audio file (WAV, MP3, etc.) up to 25MB to get its transcription. The transcript will be displayed and available for download. Please use responsibly."
127
  )
128
 
129
+ with demo:
130
+ gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
131
+
132
+ demo.launch(enable_queue=True)