artificialguybr commited on
Commit
240de18
·
verified ·
1 Parent(s): cafd3d2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -0
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import yt_dlp
4
+ import os
5
+ import subprocess
6
+ import json
7
+ from threading import Thread
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ import spaces
10
+ import moviepy.editor as mp
11
+ import time
12
+ import langdetect
13
+
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+ print("Starting the program...")
16
+
17
+ model_path = "internlm/internlm2_5-7b-chat"
18
+ print(f"Loading model {model_path}...")
19
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
21
+ model = model.eval()
22
+ print("Model successfully loaded.")
23
+
24
+ def download_youtube_audio(url, output_path):
25
+ print(f"Downloading audio from YouTube: {url}")
26
+ ydl_opts = {
27
+ 'format': 'bestaudio/best',
28
+ 'postprocessors': [{
29
+ 'key': 'FFmpegExtractAudio',
30
+ 'preferredcodec': 'wav',
31
+ }],
32
+ 'outtmpl': output_path
33
+ }
34
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
+ ydl.download([url])
36
+
37
+ # Check if the file was renamed to .wav.wav
38
+ if os.path.exists(output_path + ".wav"):
39
+ os.rename(output_path + ".wav", output_path)
40
+
41
+ if os.path.exists(output_path):
42
+ print(f"Audio download completed. File saved at: {output_path}")
43
+ print(f"File size: {os.path.getsize(output_path)} bytes")
44
+ else:
45
+ print(f"Error: File {output_path} not found after download.")
46
+
47
+
48
+ @spaces.GPU(duration=60)
49
+ def transcribe_audio(file_path):
50
+ print(f"Starting transcription of file: {file_path}")
51
+ if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
52
+ print("Video file detected. Extracting audio...")
53
+ try:
54
+ video = mp.VideoFileClip(file_path)
55
+ audio_path = "temp_audio.wav"
56
+ video.audio.write_audiofile(audio_path)
57
+ file_path = audio_path
58
+ except Exception as e:
59
+ print(f"Error extracting audio from video: {e}")
60
+ raise
61
+ print(f"Does the file exist? {os.path.exists(file_path)}")
62
+ print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
63
+ output_file = "output.json"
64
+ command = [
65
+ "insanely-fast-whisper",
66
+ "--file-name", file_path,
67
+ "--device-id", "0",
68
+ "--model-name", "openai/whisper-large-v3",
69
+ "--task", "transcribe",
70
+ "--timestamp", "chunk",
71
+ "--transcript-path", output_file
72
+ ]
73
+ print(f"Executing command: {' '.join(command)}")
74
+ try:
75
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
76
+ print(f"Standard output: {result.stdout}")
77
+ print(f"Error output: {result.stderr}")
78
+ except subprocess.CalledProcessError as e:
79
+ print(f"Error running insanely-fast-whisper: {e}")
80
+ print(f"Standard output: {e.stdout}")
81
+ print(f"Error output: {e.stderr}")
82
+ raise
83
+ print(f"Reading transcription file: {output_file}")
84
+ try:
85
+ with open(output_file, "r") as f:
86
+ transcription = json.load(f)
87
+ except json.JSONDecodeError as e:
88
+ print(f"Error decoding JSON: {e}")
89
+ print(f"File content: {open(output_file, 'r').read()}")
90
+ raise
91
+ if "text" in transcription:
92
+ result = transcription["text"]
93
+ else:
94
+ result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
95
+ print("Transcription completed.")
96
+ if file_path.startswith("temp_audio"):
97
+ os.remove(file_path)
98
+ return result
99
+
100
+ @spaces.GPU(duration=60)
101
+ def generate_summary_stream(transcription):
102
+ print("Starting summary generation...")
103
+ print(f"Transcription length: {len(transcription)} characters")
104
+
105
+ detected_language = langdetect.detect(transcription)
106
+
107
+ prompt = f"""Summarize the following video transcription in 150-300 words.
108
+ The summary should be in the same language as the transcription, which is detected as {detected_language}.
109
+ Please ensure that the summary captures the main points and key ideas of the transcription:
110
+
111
+ {transcription[:30000]}..."""
112
+
113
+ response, history = model.chat(tokenizer, prompt, history=[])
114
+ print(f"Final summary generated: {response[:100]}...")
115
+ print("Summary generation completed.")
116
+ return response
117
+
118
+ def process_youtube(url):
119
+ if not url:
120
+ print("YouTube URL not provided.")
121
+ return "Please enter a YouTube URL.", None
122
+ print(f"Processing YouTube URL: {url}")
123
+ audio_file = "youtube_audio.wav"
124
+ try:
125
+ download_youtube_audio(url, audio_file)
126
+ # Check if the file was renamed to .wav.wav
127
+ if os.path.exists(audio_file + ".wav"):
128
+ audio_file = audio_file + ".wav"
129
+ if not os.path.exists(audio_file):
130
+ raise FileNotFoundError(f"File {audio_file} does not exist after download.")
131
+ print(f"Audio file found: {audio_file}")
132
+ print("Starting transcription...")
133
+ transcription = transcribe_audio(audio_file)
134
+ print(f"Transcription completed. Length: {len(transcription)} characters")
135
+ return transcription, None
136
+ except Exception as e:
137
+ print(f"Error processing YouTube: {e}")
138
+ return f"Processing error: {str(e)}", None
139
+ finally:
140
+ if os.path.exists(audio_file):
141
+ os.remove(audio_file)
142
+ print(f"Directory content after processing: {os.listdir('.')}")
143
+
144
+ def process_uploaded_video(video_path):
145
+ print(f"Processing uploaded video: {video_path}")
146
+ try:
147
+ print("Starting transcription...")
148
+ transcription = transcribe_audio(video_path)
149
+ print(f"Transcription completed. Length: {len(transcription)} characters")
150
+ return transcription, None
151
+ except Exception as e:
152
+ print(f"Error processing video: {e}")
153
+ return f"Processing error: {str(e)}", None
154
+
155
+ print("Setting up Gradio interface...")
156
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
157
+ gr.Markdown(
158
+ """
159
+ # 🎥 Video Transcription and Smart Summary
160
+
161
+ Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
162
+ """
163
+ )
164
+
165
+ with gr.Tabs():
166
+ with gr.TabItem("📤 Video Upload"):
167
+ video_input = gr.Video(label="Drag and drop or click to upload")
168
+ video_button = gr.Button("🚀 Process Video", variant="primary")
169
+
170
+ with gr.TabItem("🔗 YouTube Link"):
171
+ url_input = gr.Textbox(label="Paste YouTube URL here", placeholder="https://www.youtube.com/watch?v=...")
172
+ url_button = gr.Button("🚀 Process URL", variant="primary")
173
+
174
+ with gr.Row():
175
+ with gr.Column():
176
+ transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
177
+ with gr.Column():
178
+ summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
179
+
180
+ summary_button = gr.Button("📝 Generate Summary", variant="secondary")
181
+
182
+ gr.Markdown(
183
+ """
184
+ ### How to use:
185
+ 1. Upload a video or paste a YouTube link.
186
+ 2. Click 'Process' to get the transcription.
187
+ 3. Click 'Generate Summary' to get a summary of the content.
188
+
189
+ *Note: Processing may take a few minutes depending on the video length.*
190
+ """
191
+ )
192
+
193
+ def process_video_and_update(video):
194
+ if video is None:
195
+ return "No video uploaded.", "Please upload a video."
196
+ print(f"Video received: {video}")
197
+ transcription, _ = process_uploaded_video(video)
198
+ print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
199
+ return transcription or "Transcription error", ""
200
+
201
+ video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
202
+ url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
203
+ summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
204
+
205
+ print("Launching Gradio interface...")
206
+ demo.launch()