tool-YoutubeTranscript-blog

Running

App Files Files Community

tool-YoutubeTranscript-blog / app.py

VPCSinfo

Add language-aware summarization and DOCX generation

41a98e7 about 22 hours ago

raw

history blame

4.95 kB

	import gradio as gr
	import os
	from dotenv import load_dotenv
	from tool import YouTubeTranscriptExtractor, TranscriptSummarizer, TranscriptToDocx

	# Load environment variables
	load_dotenv()

	youtube_tool = YouTubeTranscriptExtractor()
	docx_tool = TranscriptToDocx()
	#summarizer_tool = TranscriptSummarizer()

	def process_youtube_video(video_url, hf_api_key, existing_docx_path=None):
	# Initialize tools
	summarizer_tool = TranscriptSummarizer(hf_api_key=hf_api_key)

	# Get video title
	from pytubefix import YouTube
	try:
	yt = YouTube(video_url)
	video_title = yt.title
	except Exception:
	video_title = "YouTube Video"

	# Extract transcript and detect language
	transcript_result = youtube_tool.forward(video_url=video_url)

	# Parse the formatted string response
	# Format: "LANGUAGE:lang\|\|transcript_text"
	try:
	if "LANGUAGE:" in transcript_result and "\|\|" in transcript_result:
	parts = transcript_result.split("\|\|", 1)
	language = parts[0].replace("LANGUAGE:", "").strip()
	transcript = parts[1]
	print(f"Detected language: {language}")
	else:
	# Fallback if we didn't get the expected format
	transcript = transcript_result
	language = "en"
	print("Warning: Could not detect language, using English as default")
	except Exception as e:
	transcript = transcript_result if isinstance(transcript_result, str) else "Error extracting transcript"
	language = "en"
	print(f"Warning: Error parsing transcript data: {str(e)}, using English as default")

	# Generate summary and get image URL
	summary_and_blog = summarizer_tool.forward(transcript=transcript, language=language)
	try:
	if "\n\nImage URL: " in summary_and_blog:
	summary, image_url = summary_and_blog.split("\n\nImage URL: ")
	else:
	summary = summary_and_blog
	image_url = None
	except Exception:
	summary = summary_and_blog
	image_url = None

	# Generate or update DOCX file
	# Handle the file path from Gradio
	docx_file_path = None
	if existing_docx_path is not None and existing_docx_path != "" and existing_docx_path != []:
	# If it's a temporary file path from Gradio
	if isinstance(existing_docx_path, str) and os.path.exists(existing_docx_path):
	docx_file_path = existing_docx_path
	# If it's a file object from Gradio
	elif hasattr(existing_docx_path, 'name') and os.path.exists(existing_docx_path.name):
	docx_file_path = existing_docx_path.name
	# If it's a list (Gradio sometimes returns a list for file components)
	elif isinstance(existing_docx_path, list) and len(existing_docx_path) > 0 and existing_docx_path[0] is not None:
	if isinstance(existing_docx_path[0], str) and os.path.exists(existing_docx_path[0]):
	docx_file_path = existing_docx_path[0]
	elif hasattr(existing_docx_path[0], 'name') and os.path.exists(existing_docx_path[0].name):
	docx_file_path = existing_docx_path[0].name

	docx_path = docx_tool.forward(
	transcript=transcript,
	summary=summary,
	video_title=video_title,
	image_path=image_url,
	existing_docx_path=docx_file_path
	)

	return transcript, summary, image_url, docx_path

	with gr.Blocks() as demo:
	gr.Markdown("# YouTube Transcript Summarizer and Blog Content Generator")
	gr.Markdown("Enter a YouTube video URL and Hugging Face API Key to extract the transcript, summarize it, and generate blog content with an image and DOCX file. Optionally, you can provide an existing DOCX file to update.")

	# Check if Gemini API key is set
	gemini_api_key = os.getenv("GEMINI_API_KEY")
	if not gemini_api_key or gemini_api_key == "your_gemini_api_key_here":
	gr.Markdown("⚠️ Warning: Gemini API key is not set in the .env file. Please add your Gemini API key to the .env file to use the summarization feature.")

	with gr.Row():
	with gr.Column():
	video_url = gr.Textbox(label="YouTube Video URL")
	hf_api_key = gr.Textbox(label="Hugging Face API Key", type="password")
	existing_docx = gr.File(label="Existing DOCX file (optional)", file_types=[".docx"])
	submit_btn = gr.Button("Process Video")

	with gr.Column():
	transcript_output = gr.Textbox(label="Transcript")
	summary_output = gr.Textbox(label="Summary and Blog Content")
	image_output = gr.Image(label="Generated Image", image_mode="RGBA")
	docx_output = gr.File(label="Generated DOCX File")

	submit_btn.click(
	fn=process_youtube_video,
	inputs=[video_url, hf_api_key, existing_docx],
	outputs=[transcript_output, summary_output, image_output, docx_output]
	)

	iface = demo

	iface.launch()