Spaces:
Running
Running
import os | |
import streamlit as st | |
import moviepy.editor as mp | |
import whisper | |
from transformers import pipeline | |
# β Ensure ffmpeg is installed (needed for moviepy) | |
if not os.path.exists("/usr/bin/ffmpeg"): | |
os.system("apt-get update && apt-get install -y ffmpeg") | |
# β Function to extract audio from a video | |
def extract_audio(video_path, audio_path="audio.wav"): | |
if os.path.exists(audio_path): | |
os.remove(audio_path) | |
video = mp.VideoFileClip(video_path) # Use mp.VideoFileClip | |
video.audio.write_audiofile(audio_path) | |
return audio_path | |
# β Function to transcribe audio using Whisper | |
def transcribe_audio(audio_path): | |
model = whisper.load_model("base", download_root="./models") # Ensure model is downloaded | |
result = model.transcribe(audio_path) | |
return result["text"] | |
# β Function to summarize text | |
def summarize_text(text): | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
max_chunk_size = 1000 | |
chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)] | |
summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] for chunk in chunks] | |
return " ".join(summaries) | |
# β Function to generate study notes | |
def generate_study_notes(summary): | |
generator = pipeline("text-generation", model="gpt2") | |
prompt = f"Create study notes from the following summary:\n{summary}" | |
study_notes = generator(prompt, max_length=400, max_new_tokens=200, num_return_sequences=1, truncation=True) | |
return study_notes[0]["generated_text"] | |
# β Function to answer user questions | |
def answer_question(question, context): | |
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") | |
result = qa_pipeline(question=question, context=context) | |
return result["answer"] | |
# β Streamlit UI | |
st.title("Lecture Video Processor π₯π") | |
# File uploader | |
uploaded_file = st.file_uploader("π€ Upload a video file", type=["mp4", "mov", "avi", "mkv"]) | |
if uploaded_file: | |
video_path = uploaded_file.name | |
with open(video_path, "wb") as f: | |
f.write(uploaded_file.read()) | |
st.success("β Video uploaded successfully!") | |
# Extract audio | |
st.info("π Extracting audio...") | |
audio_path = extract_audio(video_path) | |
st.success("β Audio extracted!") | |
# Transcribe audio | |
st.info("ποΈ Transcribing audio...") | |
transcript = transcribe_audio(audio_path) | |
st.text_area("π Transcript", transcript, height=200) | |
# Summarize transcript | |
st.info("π Summarizing transcript...") | |
video_summary = summarize_text(transcript) | |
st.text_area("π Summary", video_summary, height=150) | |
# Generate study notes | |
st.info("π Generating study notes...") | |
study_notes = generate_study_notes(video_summary) | |
st.text_area("π Study Notes", study_notes, height=150) | |
# Q&A Section | |
question = st.text_input("β Ask a question about the video:") | |
if question: | |
answer = answer_question(question, video_summary) | |
st.write("π‘ Answer:", answer) | |