wangston9 commited on
Commit
3036801
·
verified ·
1 Parent(s): 8913a74

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +107 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import subprocess
4
+ import glob
5
+ import re
6
+ import traceback
7
+ import gradio as gr
8
+ from openai import OpenAI
9
+
10
+ # Load OpenAI key from environment (Hugging Face Spaces -> Settings -> Secrets)
11
+ openai_api_key = os.getenv("OPENAI_API_KEY")
12
+ openai = OpenAI(api_key=openai_api_key)
13
+
14
+ def download_audio(youtube_url):
15
+ try:
16
+ output_template = "/tmp/downloaded_audio.%(ext)s"
17
+
18
+ # Remove any old files
19
+ for f in glob.glob("/tmp/downloaded_audio.*"):
20
+ os.remove(f)
21
+
22
+ command = [
23
+ "yt-dlp", "-f", "bestaudio",
24
+ "--extract-audio", "--audio-format", "mp3",
25
+ "--audio-quality", "0",
26
+ "-o", output_template,
27
+ youtube_url
28
+ ]
29
+
30
+ result = subprocess.run(command, capture_output=True, text=True)
31
+ print("stdout:\n", result.stdout)
32
+ print("stderr:\n", result.stderr)
33
+
34
+ if result.returncode != 0:
35
+ raise RuntimeError(f"yt-dlp failed: {result.stderr}")
36
+
37
+ files = glob.glob("/tmp/downloaded_audio.*")
38
+ if not files:
39
+ raise FileNotFoundError("No audio file downloaded.")
40
+
41
+ return files[0]
42
+ except Exception as e:
43
+ raise RuntimeError(f"Download error: {e}")
44
+
45
+ def transcribe_audio(file_path):
46
+ try:
47
+ with open(file_path, "rb") as f:
48
+ result = openai.audio.transcriptions.create(
49
+ model="whisper-1",
50
+ file=f,
51
+ response_format="verbose_json"
52
+ )
53
+ return result["text"], result["language"]
54
+ except Exception as e:
55
+ raise RuntimeError(f"Transcription error: {e}")
56
+
57
+ def summarize_text(text, lang):
58
+ lang = lang.lower()
59
+ if lang.startswith("zh") or "chinese" in lang:
60
+ prompt = "你是一位聰明的助手,能夠用繁體中文清楚且完整地摘要影片內容。"
61
+ elif lang.startswith("ja") or "japanese" in lang:
62
+ prompt = "あなたは日本語で要点を簡潔かつ分かりやすく要約する有能なアシスタントです。"
63
+ else:
64
+ prompt = "You are a helpful assistant that summarizes transcripts clearly and concisely."
65
+
66
+ response = openai.chat.completions.create(
67
+ model="gpt-3.5-turbo",
68
+ messages=[
69
+ {"role": "system", "content": prompt},
70
+ {"role": "user", "content": f"Summarize the following transcript:\n\n{text}"}
71
+ ]
72
+ )
73
+
74
+ summary = response.choices[0].message.content
75
+ debug_info = f"🌐 Detected Language: {lang}\n🧠 Prompt Used: {prompt}"
76
+ return summary, debug_info
77
+
78
+ def extract_video_id(url):
79
+ match = re.search(r"(?:v=|shorts/)([a-zA-Z0-9_-]{11})", url)
80
+ return match.group(1) if match else None
81
+
82
+ def full_process(youtube_url):
83
+ try:
84
+ video_id = extract_video_id(youtube_url)
85
+ thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" if video_id else None
86
+ audio_path = download_audio(youtube_url)
87
+ transcript, lang = transcribe_audio(audio_path)
88
+ summary, debug = summarize_text(transcript, lang)
89
+ return summary, debug, thumbnail_url
90
+ except Exception as e:
91
+ return f"❌ Error: {str(e)}", "", None
92
+
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("## 🧠 YouTube AI Summarizer\nEasily extract summaries from YouTube videos using Whisper + GPT. Supports English/Japanese/Chinese.")
95
+
96
+ with gr.Row():
97
+ youtube_input = gr.Textbox(label="🎥 Enter YouTube Video Link")
98
+ submit_btn = gr.Button("🔍 Summarize")
99
+
100
+ summary_output = gr.Textbox(label="📝 AI Video Summary", lines=6)
101
+ info_output = gr.Textbox(label="📄 Language & Model Info", lines=4)
102
+ thumbnail_output = gr.Image(label="🎞️ Video Thumbnail", visible=True)
103
+
104
+ submit_btn.click(fn=full_process, inputs=youtube_input, outputs=[summary_output, info_output, thumbnail_output])
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ yt-dlp
4
+ ffmpeg-python