JaganathC commited on
Commit
083a014
·
verified ·
1 Parent(s): 2f88fda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -228
app.py CHANGED
@@ -1,228 +1,228 @@
1
- import gradio as gr
2
- import torch
3
- import yt_dlp
4
- import os
5
- import subprocess
6
- import json
7
- from threading import Thread
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
- import spaces
10
- import moviepy.editor as mp
11
- import time
12
- import langdetect
13
- import uuid
14
-
15
- HF_TOKEN = os.environ.get("HF_TOKEN")
16
- print("Starting the program...")
17
-
18
- model_path = "Qwen/Qwen2.5-7B-Instruct"
19
- print(f"Loading model {model_path}...")
20
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
21
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
22
- model = model.eval()
23
- print("Model successfully loaded.")
24
-
25
- def generate_unique_filename(extension):
26
- return f"{uuid.uuid4()}{extension}"
27
-
28
- def cleanup_files(*files):
29
- for file in files:
30
- if file and os.path.exists(file):
31
- os.remove(file)
32
- print(f"Removed file: {file}")
33
-
34
- def download_youtube_audio(url):
35
- print(f"Downloading audio from YouTube: {url}")
36
- output_path = generate_unique_filename(".wav")
37
- ydl_opts = {
38
- 'format': 'bestaudio/best',
39
- 'postprocessors': [{
40
- 'key': 'FFmpegExtractAudio',
41
- 'preferredcodec': 'wav',
42
- }],
43
- 'outtmpl': output_path,
44
- 'keepvideo': True,
45
- }
46
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
47
- ydl.download([url])
48
-
49
- # Check if the file was renamed to .wav.wav
50
- if os.path.exists(output_path + ".wav"):
51
- os.rename(output_path + ".wav", output_path)
52
-
53
- if os.path.exists(output_path):
54
- print(f"Audio download completed. File saved at: {output_path}")
55
- print(f"File size: {os.path.getsize(output_path)} bytes")
56
- else:
57
- print(f"Error: File {output_path} not found after download.")
58
-
59
- return output_path
60
-
61
- @spaces.GPU(duration=90)
62
- def transcribe_audio(file_path):
63
- print(f"Starting transcription of file: {file_path}")
64
- temp_audio = None
65
- if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
66
- print("Video file detected. Extracting audio...")
67
- try:
68
- video = mp.VideoFileClip(file_path)
69
- temp_audio = generate_unique_filename(".wav")
70
- video.audio.write_audiofile(temp_audio)
71
- file_path = temp_audio
72
- except Exception as e:
73
- print(f"Error extracting audio from video: {e}")
74
- raise
75
-
76
- print(f"Does the file exist? {os.path.exists(file_path)}")
77
- print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
78
-
79
- output_file = generate_unique_filename(".json")
80
- command = [
81
- "insanely-fast-whisper",
82
- "--file-name", file_path,
83
- "--device-id", "0",
84
- "--model-name", "openai/whisper-large-v3",
85
- "--task", "transcribe",
86
- "--timestamp", "chunk",
87
- "--transcript-path", output_file
88
- ]
89
- print(f"Executing command: {' '.join(command)}")
90
- try:
91
- result = subprocess.run(command, check=True, capture_output=True, text=True)
92
- print(f"Standard output: {result.stdout}")
93
- print(f"Error output: {result.stderr}")
94
- except subprocess.CalledProcessError as e:
95
- print(f"Error running insanely-fast-whisper: {e}")
96
- print(f"Standard output: {e.stdout}")
97
- print(f"Error output: {e.stderr}")
98
- raise
99
-
100
- print(f"Reading transcription file: {output_file}")
101
- try:
102
- with open(output_file, "r") as f:
103
- transcription = json.load(f)
104
- except json.JSONDecodeError as e:
105
- print(f"Error decoding JSON: {e}")
106
- print(f"File content: {open(output_file, 'r').read()}")
107
- raise
108
-
109
- if "text" in transcription:
110
- result = transcription["text"]
111
- else:
112
- result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
113
-
114
- print("Transcription completed.")
115
-
116
- # Cleanup
117
- cleanup_files(output_file)
118
- if temp_audio:
119
- cleanup_files(temp_audio)
120
-
121
- return result
122
-
123
- @spaces.GPU(duration=90)
124
- def generate_summary_stream(transcription):
125
- print("Starting summary generation...")
126
- print(f"Transcription length: {len(transcription)} characters")
127
-
128
- detected_language = langdetect.detect(transcription)
129
-
130
- prompt = f"""Summarize the following video transcription in 150-300 words.
131
- The summary should be in the same language as the transcription, which is detected as {detected_language}.
132
- Please ensure that the summary captures the main points and key ideas of the transcription:
133
-
134
- {transcription[:300000]}..."""
135
-
136
- response, history = model.chat(tokenizer, prompt, history=[])
137
- print(f"Final summary generated: {response[:100]}...")
138
- print("Summary generation completed.")
139
- return response
140
-
141
- def process_youtube(url):
142
- if not url:
143
- print("YouTube URL not provided.")
144
- return "Please enter a YouTube URL.", None
145
- print(f"Processing YouTube URL: {url}")
146
-
147
- audio_file = None
148
- try:
149
- audio_file = download_youtube_audio(url)
150
- if not os.path.exists(audio_file):
151
- raise FileNotFoundError(f"File {audio_file} does not exist after download.")
152
-
153
- print(f"Audio file found: {audio_file}")
154
- print("Starting transcription...")
155
- transcription = transcribe_audio(audio_file)
156
- print(f"Transcription completed. Length: {len(transcription)} characters")
157
- return transcription, None
158
- except Exception as e:
159
- print(f"Error processing YouTube: {e}")
160
- return f"Processing error: {str(e)}", None
161
- finally:
162
- if audio_file and os.path.exists(audio_file):
163
- cleanup_files(audio_file)
164
- print(f"Directory content after processing: {os.listdir('.')}")
165
-
166
- def process_uploaded_video(video_path):
167
- print(f"Processing uploaded video: {video_path}")
168
- try:
169
- print("Starting transcription...")
170
- transcription = transcribe_audio(video_path)
171
- print(f"Transcription completed. Length: {len(transcription)} characters")
172
- return transcription, None
173
- except Exception as e:
174
- print(f"Error processing video: {e}")
175
- return f"Processing error: {str(e)}", None
176
-
177
- print("Setting up Gradio interface...")
178
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
179
- gr.Markdown(
180
- """
181
- # 🎥 Video Transcription and Smart Summary
182
-
183
- Upload a video or provide a YouTube link to get a transcription and AI-generated summary. HF Zero GPU has a usage time limit. So if you want to run longer videos I recommend you clone the space. Remove @Spaces.gpu from the code and run it locally on your GPU!
184
- """
185
- )
186
-
187
- with gr.Tabs():
188
- with gr.TabItem("📤 Video Upload"):
189
- video_input = gr.Video(label="Drag and drop or click to upload")
190
- video_button = gr.Button("🚀 Process Video", variant="primary")
191
-
192
- with gr.TabItem("🔗 YouTube Link"):
193
- url_input = gr.Textbox(label="Paste YouTube URL here", placeholder="https://www.youtube.com/watch?v=...")
194
- url_button = gr.Button("🚀 Process URL", variant="primary")
195
-
196
- with gr.Row():
197
- with gr.Column():
198
- transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
199
- with gr.Column():
200
- summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
201
-
202
- summary_button = gr.Button("📝 Generate Summary", variant="secondary")
203
-
204
- gr.Markdown(
205
- """
206
- ### How to use:
207
- 1. Upload a video or paste a YouTube link.
208
- 2. Click 'Process' to get the transcription.
209
- 3. Click 'Generate Summary' to get a summary of the content.
210
-
211
- *Note: Processing may take a few minutes depending on the video length.*
212
- """
213
- )
214
-
215
- def process_video_and_update(video):
216
- if video is None:
217
- return "No video uploaded.", "Please upload a video."
218
- print(f"Video received: {video}")
219
- transcription, _ = process_uploaded_video(video)
220
- print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
221
- return transcription or "Transcription error", ""
222
-
223
- video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
224
- url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
225
- summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
226
-
227
- print("Launching Gradio interface...")
228
- demo.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import yt_dlp
4
+ import os
5
+ import subprocess
6
+ import json
7
+ from threading import Thread
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ import spaces
10
+ import moviepy.editor as mp
11
+ import time
12
+ import langdetect
13
+ import uuid
14
+
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+ print("Starting the program...")
17
+
18
+ model_path = "Qwen/Qwen2.5-7B-Instruct"
19
+ print(f"Loading model {model_path}...")
20
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
21
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
22
+ model = model.eval()
23
+ print("Model successfully loaded.")
24
+
25
+ def generate_unique_filename(extension):
26
+ return f"{uuid.uuid4()}{extension}"
27
+
28
+ def cleanup_files(*files):
29
+ for file in files:
30
+ if file and os.path.exists(file):
31
+ os.remove(file)
32
+ print(f"Removed file: {file}")
33
+
34
+ def download_youtube_audio(url):
35
+ print(f"Downloading audio from YouTube: {url}")
36
+ output_path = generate_unique_filename(".wav")
37
+ ydl_opts = {
38
+ 'format': 'bestaudio/best',
39
+ 'postprocessors': [{
40
+ 'key': 'FFmpegExtractAudio',
41
+ 'preferredcodec': 'wav',
42
+ }],
43
+ 'outtmpl': output_path,
44
+ 'keepvideo': True,
45
+ }
46
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
47
+ ydl.download([url])
48
+
49
+ # Check if the file was renamed to .wav.wav
50
+ if os.path.exists(output_path + ".wav"):
51
+ os.rename(output_path + ".wav", output_path)
52
+
53
+ if os.path.exists(output_path):
54
+ print(f"Audio download completed. File saved at: {output_path}")
55
+ print(f"File size: {os.path.getsize(output_path)} bytes")
56
+ else:
57
+ print(f"Error: File {output_path} not found after download.")
58
+
59
+ return output_path
60
+
61
+
62
+ def transcribe_audio(file_path):
63
+ print(f"Starting transcription of file: {file_path}")
64
+ temp_audio = None
65
+ if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
66
+ print("Video file detected. Extracting audio...")
67
+ try:
68
+ video = mp.VideoFileClip(file_path)
69
+ temp_audio = generate_unique_filename(".wav")
70
+ video.audio.write_audiofile(temp_audio)
71
+ file_path = temp_audio
72
+ except Exception as e:
73
+ print(f"Error extracting audio from video: {e}")
74
+ raise
75
+
76
+ print(f"Does the file exist? {os.path.exists(file_path)}")
77
+ print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
78
+
79
+ output_file = generate_unique_filename(".json")
80
+ command = [
81
+ "insanely-fast-whisper",
82
+ "--file-name", file_path,
83
+ "--device-id", "0",
84
+ "--model-name", "openai/whisper-large-v3",
85
+ "--task", "transcribe",
86
+ "--timestamp", "chunk",
87
+ "--transcript-path", output_file
88
+ ]
89
+ print(f"Executing command: {' '.join(command)}")
90
+ try:
91
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
92
+ print(f"Standard output: {result.stdout}")
93
+ print(f"Error output: {result.stderr}")
94
+ except subprocess.CalledProcessError as e:
95
+ print(f"Error running insanely-fast-whisper: {e}")
96
+ print(f"Standard output: {e.stdout}")
97
+ print(f"Error output: {e.stderr}")
98
+ raise
99
+
100
+ print(f"Reading transcription file: {output_file}")
101
+ try:
102
+ with open(output_file, "r") as f:
103
+ transcription = json.load(f)
104
+ except json.JSONDecodeError as e:
105
+ print(f"Error decoding JSON: {e}")
106
+ print(f"File content: {open(output_file, 'r').read()}")
107
+ raise
108
+
109
+ if "text" in transcription:
110
+ result = transcription["text"]
111
+ else:
112
+ result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
113
+
114
+ print("Transcription completed.")
115
+
116
+ # Cleanup
117
+ cleanup_files(output_file)
118
+ if temp_audio:
119
+ cleanup_files(temp_audio)
120
+
121
+ return result
122
+
123
+ @spaces.GPU(duration=90)
124
+ def generate_summary_stream(transcription):
125
+ print("Starting summary generation...")
126
+ print(f"Transcription length: {len(transcription)} characters")
127
+
128
+ detected_language = langdetect.detect(transcription)
129
+
130
+ prompt = f"""Summarize the following video transcription in 150-300 words.
131
+ The summary should be in the same language as the transcription, which is detected as {detected_language}.
132
+ Please ensure that the summary captures the main points and key ideas of the transcription:
133
+
134
+ {transcription[:300000]}..."""
135
+
136
+ response, history = model.chat(tokenizer, prompt, history=[])
137
+ print(f"Final summary generated: {response[:100]}...")
138
+ print("Summary generation completed.")
139
+ return response
140
+
141
+ def process_youtube(url):
142
+ if not url:
143
+ print("YouTube URL not provided.")
144
+ return "Please enter a YouTube URL.", None
145
+ print(f"Processing YouTube URL: {url}")
146
+
147
+ audio_file = None
148
+ try:
149
+ audio_file = download_youtube_audio(url)
150
+ if not os.path.exists(audio_file):
151
+ raise FileNotFoundError(f"File {audio_file} does not exist after download.")
152
+
153
+ print(f"Audio file found: {audio_file}")
154
+ print("Starting transcription...")
155
+ transcription = transcribe_audio(audio_file)
156
+ print(f"Transcription completed. Length: {len(transcription)} characters")
157
+ return transcription, None
158
+ except Exception as e:
159
+ print(f"Error processing YouTube: {e}")
160
+ return f"Processing error: {str(e)}", None
161
+ finally:
162
+ if audio_file and os.path.exists(audio_file):
163
+ cleanup_files(audio_file)
164
+ print(f"Directory content after processing: {os.listdir('.')}")
165
+
166
+ def process_uploaded_video(video_path):
167
+ print(f"Processing uploaded video: {video_path}")
168
+ try:
169
+ print("Starting transcription...")
170
+ transcription = transcribe_audio(video_path)
171
+ print(f"Transcription completed. Length: {len(transcription)} characters")
172
+ return transcription, None
173
+ except Exception as e:
174
+ print(f"Error processing video: {e}")
175
+ return f"Processing error: {str(e)}", None
176
+
177
+ print("Setting up Gradio interface...")
178
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
179
+ gr.Markdown(
180
+ """
181
+ # 🎥 Video Transcription and Smart Summary
182
+
183
+ Upload a video or provide a YouTube link to get a transcription and AI-generated summary. HF Zero GPU has a usage time limit. So if you want to run longer videos I recommend you clone the space. Remove @Spaces.gpu from the code and run it locally on your GPU!
184
+ """
185
+ )
186
+
187
+ with gr.Tabs():
188
+ with gr.TabItem("📤 Video Upload"):
189
+ video_input = gr.Video(label="Drag and drop or click to upload")
190
+ video_button = gr.Button("🚀 Process Video", variant="primary")
191
+
192
+ with gr.TabItem("🔗 YouTube Link"):
193
+ url_input = gr.Textbox(label="Paste YouTube URL here", placeholder="https://www.youtube.com/watch?v=...")
194
+ url_button = gr.Button("🚀 Process URL", variant="primary")
195
+
196
+ with gr.Row():
197
+ with gr.Column():
198
+ transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
199
+ with gr.Column():
200
+ summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
201
+
202
+ summary_button = gr.Button("📝 Generate Summary", variant="secondary")
203
+
204
+ gr.Markdown(
205
+ """
206
+ ### How to use:
207
+ 1. Upload a video or paste a YouTube link.
208
+ 2. Click 'Process' to get the transcription.
209
+ 3. Click 'Generate Summary' to get a summary of the content.
210
+
211
+ *Note: Processing may take a few minutes depending on the video length.*
212
+ """
213
+ )
214
+
215
+ def process_video_and_update(video):
216
+ if video is None:
217
+ return "No video uploaded.", "Please upload a video."
218
+ print(f"Video received: {video}")
219
+ transcription, _ = process_uploaded_video(video)
220
+ print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
221
+ return transcription or "Transcription error", ""
222
+
223
+ video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
224
+ url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
225
+ summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
226
+
227
+ print("Launching Gradio interface...")
228
+ demo.launch()