sum / app.py
sairaarif89's picture
Update app.py
2dee61d verified
import os
import streamlit as st
import moviepy.editor as mp
import whisper
from transformers import pipeline
# βœ… Ensure ffmpeg is installed (needed for moviepy)
if not os.path.exists("/usr/bin/ffmpeg"):
os.system("apt-get update && apt-get install -y ffmpeg")
# βœ… Function to extract audio from a video
def extract_audio(video_path, audio_path="audio.wav"):
if os.path.exists(audio_path):
os.remove(audio_path)
video = mp.VideoFileClip(video_path) # Use mp.VideoFileClip
video.audio.write_audiofile(audio_path)
return audio_path
# βœ… Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
model = whisper.load_model("base", download_root="./models") # Ensure model is downloaded
result = model.transcribe(audio_path)
return result["text"]
# βœ… Function to summarize text
def summarize_text(text):
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
max_chunk_size = 1000
chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] for chunk in chunks]
return " ".join(summaries)
# βœ… Function to generate study notes
def generate_study_notes(summary):
generator = pipeline("text-generation", model="gpt2")
prompt = f"Create study notes from the following summary:\n{summary}"
study_notes = generator(prompt, max_length=400, max_new_tokens=200, num_return_sequences=1, truncation=True)
return study_notes[0]["generated_text"]
# βœ… Function to answer user questions
def answer_question(question, context):
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
result = qa_pipeline(question=question, context=context)
return result["answer"]
# βœ… Streamlit UI
st.title("Lecture Video Processor πŸŽ₯πŸ“")
# File uploader
uploaded_file = st.file_uploader("πŸ“€ Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if uploaded_file:
video_path = uploaded_file.name
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
st.success("βœ… Video uploaded successfully!")
# Extract audio
st.info("πŸ”Š Extracting audio...")
audio_path = extract_audio(video_path)
st.success("βœ… Audio extracted!")
# Transcribe audio
st.info("πŸŽ™οΈ Transcribing audio...")
transcript = transcribe_audio(audio_path)
st.text_area("πŸ“œ Transcript", transcript, height=200)
# Summarize transcript
st.info("πŸ“„ Summarizing transcript...")
video_summary = summarize_text(transcript)
st.text_area("πŸ“Œ Summary", video_summary, height=150)
# Generate study notes
st.info("πŸ“ Generating study notes...")
study_notes = generate_study_notes(video_summary)
st.text_area("πŸ“– Study Notes", study_notes, height=150)
# Q&A Section
question = st.text_input("❓ Ask a question about the video:")
if question:
answer = answer_question(question, video_summary)
st.write("πŸ’‘ Answer:", answer)