Spaces:

wangston9
/

youtube-summary-ai

Sleeping

App Files Files Community

youtube-summary-ai / app.py

wangston9

Update app.py

84eefde verified 11 days ago

raw

history blame contribute delete

3.87 kB

	# app.py
	import os
	import subprocess
	import glob
	import re
	import traceback
	import gradio as gr
	from openai import OpenAI

	# Load OpenAI key from environment (Hugging Face Spaces -> Settings -> Secrets)
	openai_api_key = os.getenv("OPENAI_API_KEY")
	openai = OpenAI(api_key=openai_api_key)

	def download_audio(youtube_url):
	try:
	output_template = "/tmp/downloaded_audio.%(ext)s"

	# Remove any old files
	for f in glob.glob("/tmp/downloaded_audio.*"):
	os.remove(f)

	command = [
	"yt-dlp", "-f", "bestaudio",
	"--extract-audio", "--audio-format", "mp3",
	"--audio-quality", "0",
	"-o", output_template,
	youtube_url
	]

	result = subprocess.run(command, capture_output=True, text=True)
	print("stdout:\n", result.stdout)
	print("stderr:\n", result.stderr)

	if result.returncode != 0:
	raise RuntimeError(f"yt-dlp failed: {result.stderr}")

	files = glob.glob("/tmp/downloaded_audio.*")
	if not files:
	raise FileNotFoundError("No audio file downloaded.")

	return files[0]
	except Exception as e:
	raise RuntimeError(f"Download error: {e}")

	def transcribe_audio(file_path):
	try:
	with open(file_path, "rb") as f:
	result = openai.audio.transcriptions.create(
	model="whisper-1",
	file=f,
	response_format="verbose_json"
	)
	return result.text, result.language
	except Exception as e:
	raise RuntimeError(f"Transcription error: {e}")

	def summarize_text(text, lang):
	lang = lang.lower()
	if lang.startswith("zh") or "chinese" in lang:
	prompt = "你是一位聰明的助手，能夠用繁體中文清楚且完整地摘要影片內容。"
	elif lang.startswith("ja") or "japanese" in lang:
	prompt = "あなたは日本語で要点を簡潔かつ分かりやすく要約する有能なアシスタントです。"
	else:
	prompt = "You are a helpful assistant that summarizes transcripts clearly and concisely."

	response = openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": prompt},
	{"role": "user", "content": f"Summarize the following transcript:\n\n{text}"}
	]
	)

	summary = response.choices[0].message.content
	debug_info = f"🌐 Detected Language: {lang}\n🧠 Prompt Used: {prompt}"
	return summary, debug_info

	def extract_video_id(url):
	match = re.search(r"(?:v=\|shorts/)([a-zA-Z0-9_-]{11})", url)
	return match.group(1) if match else None

	def full_process(youtube_url):
	try:
	video_id = extract_video_id(youtube_url)
	thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" if video_id else None
	audio_path = download_audio(youtube_url)
	transcript, lang = transcribe_audio(audio_path)
	summary, debug = summarize_text(transcript, lang)
	return summary, debug, thumbnail_url
	except Exception as e:
	return f"❌ Error: {str(e)}", "", None

	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 YouTube AI Summarizer\nEasily extract summaries from YouTube videos using Whisper + GPT. Supports English/Japanese/Chinese.")

	with gr.Row():
	youtube_input = gr.Textbox(label="🎥 Enter YouTube Video Link")
	submit_btn = gr.Button("🔍 Summarize")

	summary_output = gr.Textbox(label="📝 AI Video Summary", lines=6)
	info_output = gr.Textbox(label="📄 Language & Model Info", lines=4)
	thumbnail_output = gr.Image(label="🎞️ Video Thumbnail", visible=True)

	submit_btn.click(fn=full_process, inputs=youtube_input, outputs=[summary_output, info_output, thumbnail_output])

	if __name__ == "__main__":
	demo.launch()