Spaces:

anurag629
/

YoutubeVideoSummarizer

Sleeping

App Files Files Community

YoutubeVideoSummarizer / app.py

anurag629

completed

62e9276 5 months ago

raw

history blame contribute delete

4.11 kB

	import streamlit as st
	import yt_dlp
	import whisper
	import os
	from openai import OpenAI

	# App title
	st.title("Youtube Video Summarizer/Notes Maker by Anurag")

	# Input fields for YouTube video URL and OpenAI API key
	video_url = st.text_input("Enter a valid YouTube video URL:")
	api_key = st.text_input("Enter your OpenAI API Key:", type="password")

	# Option to choose between formatted markdown notes or summarization
	option = st.radio("Select an option:", ["Create formatted markdown notes", "Summarize video with a given length"])

	# Input field for summary length if "Summarize" is selected
	summary_length = None
	if option == "Summarize video with a given length":
	summary_length = st.slider("Select summary length (words):", min_value=50, max_value=500, step=50)

	# Function to download audio using yt-dlp
	def download_audio_with_ytdlp(video_url, output_path="downloads"):
	os.makedirs(output_path, exist_ok=True)
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': f'{output_path}/%(id)s.%(ext)s',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(video_url, download=True)
	audio_file = f"{output_path}/{info_dict['id']}.mp3"
	return audio_file

	# Whisper transcription function
	def transcribe_audio(audio_file):
	model = whisper.load_model("base")
	result = model.transcribe(audio_file)
	return result["text"]

	# Button to start processing
	if st.button("Generate"):
	if not video_url or not api_key:
	st.error("Please provide both a valid YouTube URL and OpenAI API Key.")
	else:
	try:
	# Download audio using yt-dlp
	st.info("Downloading audio...")
	audio_file = download_audio_with_ytdlp(video_url)
	st.success(f"Audio downloaded successfully! File: {audio_file}")

	# Transcribe audio using Whisper
	st.info("Transcribing audio...")
	transcript = transcribe_audio(audio_file)
	st.success("Audio transcribed successfully!")

	# OpenAI API configuration
	client = OpenAI(api_key=api_key)

	# Prompt based on user selection
	if option == "Create formatted markdown notes":
	system_message = "Generate detailed and well-structured markdown notes from the following transcript."
	else:
	system_message = f"Summarize the following transcript into approximately {summary_length} words."

	# Call OpenAI chat completion API
	st.info("Processing with OpenAI...")
	try:
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "system",
	"content": [
	{
	"type": "text",
	"text": system_message
	}
	]
	},
	{
	"role": "user",
	"content": transcript
	}
	],
	response_format={
	"type": "text"
	},
	temperature=1,
	max_tokens=16383,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)

	# Extract the generated content
	output = response['choices'][0]['message']['content']
	st.success("Output generated successfully!")
	st.text_area("Generated Output:", output, height=300)

	except Exception as e:
	st.error(f"Error with OpenAI API: {e}")


	except Exception as e:
	st.error(f"An error occurred: {e}")