Spaces:
Sleeping
Sleeping
import streamlit as st | |
import yt_dlp | |
import whisper | |
import os | |
from openai import OpenAI | |
# App title | |
st.title("Youtube Video Summarizer/Notes Maker by Anurag") | |
# Input fields for YouTube video URL and OpenAI API key | |
video_url = st.text_input("Enter a valid YouTube video URL:") | |
api_key = st.text_input("Enter your OpenAI API Key:", type="password") | |
# Option to choose between formatted markdown notes or summarization | |
option = st.radio("Select an option:", ["Create formatted markdown notes", "Summarize video with a given length"]) | |
# Input field for summary length if "Summarize" is selected | |
summary_length = None | |
if option == "Summarize video with a given length": | |
summary_length = st.slider("Select summary length (words):", min_value=50, max_value=500, step=50) | |
# Function to download audio using yt-dlp | |
def download_audio_with_ytdlp(video_url, output_path="downloads"): | |
os.makedirs(output_path, exist_ok=True) | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': f'{output_path}/%(id)s.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(video_url, download=True) | |
audio_file = f"{output_path}/{info_dict['id']}.mp3" | |
return audio_file | |
# Whisper transcription function | |
def transcribe_audio(audio_file): | |
model = whisper.load_model("base") | |
result = model.transcribe(audio_file) | |
return result["text"] | |
# Button to start processing | |
if st.button("Generate"): | |
if not video_url or not api_key: | |
st.error("Please provide both a valid YouTube URL and OpenAI API Key.") | |
else: | |
try: | |
# Download audio using yt-dlp | |
st.info("Downloading audio...") | |
audio_file = download_audio_with_ytdlp(video_url) | |
st.success(f"Audio downloaded successfully! File: {audio_file}") | |
# Transcribe audio using Whisper | |
st.info("Transcribing audio...") | |
transcript = transcribe_audio(audio_file) | |
st.success("Audio transcribed successfully!") | |
# OpenAI API configuration | |
client = OpenAI(api_key=api_key) | |
# Prompt based on user selection | |
if option == "Create formatted markdown notes": | |
system_message = "Generate detailed and well-structured markdown notes from the following transcript." | |
else: | |
system_message = f"Summarize the following transcript into approximately {summary_length} words." | |
# Call OpenAI chat completion API | |
st.info("Processing with OpenAI...") | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "system", | |
"content": [ | |
{ | |
"type": "text", | |
"text": system_message | |
} | |
] | |
}, | |
{ | |
"role": "user", | |
"content": transcript | |
} | |
], | |
response_format={ | |
"type": "text" | |
}, | |
temperature=1, | |
max_tokens=16383, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
# Extract the generated content | |
output = response['choices'][0]['message']['content'] | |
st.success("Output generated successfully!") | |
st.text_area("Generated Output:", output, height=300) | |
except Exception as e: | |
st.error(f"Error with OpenAI API: {e}") | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |