Spaces:
Sleeping
Sleeping
# app.py | |
import os | |
import subprocess | |
import glob | |
import re | |
import traceback | |
import gradio as gr | |
from openai import OpenAI | |
# Load OpenAI key from environment (Hugging Face Spaces -> Settings -> Secrets) | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
openai = OpenAI(api_key=openai_api_key) | |
# def download_audio(youtube_url): | |
# try: | |
# output_template = "/tmp/downloaded_audio.%(ext)s" | |
# # Remove any old files | |
# for f in glob.glob("/tmp/downloaded_audio.*"): | |
# os.remove(f) | |
# command = [ | |
# "yt-dlp", "-f", "bestaudio", | |
# "--extract-audio", "--audio-format", "mp3", | |
# "--audio-quality", "0", | |
# "-o", output_template, | |
# youtube_url | |
# ] | |
# result = subprocess.run(command, capture_output=True, text=True) | |
# print("stdout:\n", result.stdout) | |
# print("stderr:\n", result.stderr) | |
# if result.returncode != 0: | |
# raise RuntimeError(f"yt-dlp failed: {result.stderr}") | |
# files = glob.glob("/tmp/downloaded_audio.*") | |
# if not files: | |
# raise FileNotFoundError("No audio file downloaded.") | |
# return files[0] | |
# except Exception as e: | |
# raise RuntimeError(f"Download error: {e}") | |
from pytube import YouTube | |
def clean_url(url): | |
# Extract just the video ID | |
match = re.search(r"(?:v=|shorts/)([a-zA-Z0-9_-]{11})", url) | |
video_id = match.group(1) if match else None | |
return f"https://www.youtube.com/watch?v={video_id}" if video_id else url | |
def download_audio(youtube_url): | |
try: | |
# Try yt-dlp first | |
output_template = "/tmp/downloaded_audio.%(ext)s" | |
for f in glob.glob("/tmp/downloaded_audio.*"): | |
os.remove(f) | |
command = [ | |
"yt-dlp", | |
"-f", "bestaudio", | |
"--extract-audio", "--audio-format", "mp3", | |
"--audio-quality", "0", | |
"-o", output_template, | |
youtube_url | |
] | |
result = subprocess.run(command, capture_output=True, text=True) | |
if result.returncode == 0: | |
files = glob.glob("/tmp/downloaded_audio.*") | |
if files: | |
return files[0] | |
# ๐ Fallback to pytube | |
print("๐ yt-dlp failed, trying pytube...") | |
safe_url = clean_url(youtube_url) | |
yt = YouTube(safe_url) | |
stream = yt.streams.filter(only_audio=True).first() | |
output_path = "/tmp/fallback_audio.mp4" | |
stream.download(filename=output_path) | |
return output_path | |
except Exception as e: | |
raise RuntimeError(f"Download error: {e}") | |
def transcribe_audio(file_path): | |
try: | |
with open(file_path, "rb") as f: | |
result = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=f, | |
response_format="verbose_json" | |
) | |
return result["text"], result["language"] | |
except Exception as e: | |
raise RuntimeError(f"Transcription error: {e}") | |
def summarize_text(text, lang): | |
lang = lang.lower() | |
if lang.startswith("zh") or "chinese" in lang: | |
prompt = "ไฝ ๆฏไธไฝ่ฐๆ็ๅฉๆ๏ผ่ฝๅค ็จ็น้ซไธญๆๆธ ๆฅไธๅฎๆดๅฐๆ่ฆๅฝฑ็ๅ งๅฎนใ" | |
elif lang.startswith("ja") or "japanese" in lang: | |
prompt = "ใใชใใฏๆฅๆฌ่ชใง่ฆ็นใ็ฐกๆฝใใคๅใใใใใ่ฆ็ดใใๆ่ฝใชใขใทในใฟใณใใงใใ" | |
else: | |
prompt = "You are a helpful assistant that summarizes transcripts clearly and concisely." | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": prompt}, | |
{"role": "user", "content": f"Summarize the following transcript:\n\n{text}"} | |
] | |
) | |
summary = response.choices[0].message.content | |
debug_info = f"๐ Detected Language: {lang}\n๐ง Prompt Used: {prompt}" | |
return summary, debug_info | |
def extract_video_id(url): | |
match = re.search(r"(?:v=|shorts/)([a-zA-Z0-9_-]{11})", url) | |
return match.group(1) if match else None | |
def full_process(youtube_url): | |
try: | |
video_id = extract_video_id(youtube_url) | |
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" if video_id else None | |
audio_path = download_audio(youtube_url) | |
transcript, lang = transcribe_audio(audio_path) | |
summary, debug = summarize_text(transcript, lang) | |
return summary, debug, thumbnail_url | |
except Exception as e: | |
return f"โ Error: {str(e)}", "", None | |
with gr.Blocks() as demo: | |
gr.Markdown("## ๐ง YouTube AI Summarizer\nEasily extract summaries from YouTube videos using Whisper + GPT. Supports English/Japanese/Chinese.") | |
with gr.Row(): | |
youtube_input = gr.Textbox(label="๐ฅ Enter YouTube Video Link") | |
submit_btn = gr.Button("๐ Summarize") | |
summary_output = gr.Textbox(label="๐ AI Video Summary", lines=6) | |
info_output = gr.Textbox(label="๐ Language & Model Info", lines=4) | |
thumbnail_output = gr.Image(label="๐๏ธ Video Thumbnail", visible=True) | |
submit_btn.click(fn=full_process, inputs=youtube_input, outputs=[summary_output, info_output, thumbnail_output]) | |
if __name__ == "__main__": | |
demo.launch() | |