Spaces:
Running
Running
File size: 3,106 Bytes
f4bbee0 2dee61d 53e2c95 5466137 2dee61d f4bbee0 5466137 2dee61d 5466137 f4bbee0 2dee61d f4bbee0 2dee61d f4bbee0 2dee61d f4bbee0 2dee61d f4bbee0 2dee61d f4bbee0 2dee61d 53e2c95 f4bbee0 2dee61d 53e2c95 f4bbee0 53e2c95 2dee61d 53e2c95 f4bbee0 53e2c95 5466137 2dee61d 53e2c95 f4bbee0 53e2c95 5466137 2dee61d 53e2c95 f4bbee0 53e2c95 5466137 2dee61d 53e2c95 f4bbee0 53e2c95 5466137 2dee61d 53e2c95 f4bbee0 53e2c95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import os
import streamlit as st
import moviepy.editor as mp
import whisper
from transformers import pipeline
# β
Ensure ffmpeg is installed (needed for moviepy)
if not os.path.exists("/usr/bin/ffmpeg"):
os.system("apt-get update && apt-get install -y ffmpeg")
# β
Function to extract audio from a video
def extract_audio(video_path, audio_path="audio.wav"):
if os.path.exists(audio_path):
os.remove(audio_path)
video = mp.VideoFileClip(video_path) # Use mp.VideoFileClip
video.audio.write_audiofile(audio_path)
return audio_path
# β
Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
model = whisper.load_model("base", download_root="./models") # Ensure model is downloaded
result = model.transcribe(audio_path)
return result["text"]
# β
Function to summarize text
def summarize_text(text):
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
max_chunk_size = 1000
chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] for chunk in chunks]
return " ".join(summaries)
# β
Function to generate study notes
def generate_study_notes(summary):
generator = pipeline("text-generation", model="gpt2")
prompt = f"Create study notes from the following summary:\n{summary}"
study_notes = generator(prompt, max_length=400, max_new_tokens=200, num_return_sequences=1, truncation=True)
return study_notes[0]["generated_text"]
# β
Function to answer user questions
def answer_question(question, context):
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
result = qa_pipeline(question=question, context=context)
return result["answer"]
# β
Streamlit UI
st.title("Lecture Video Processor π₯π")
# File uploader
uploaded_file = st.file_uploader("π€ Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if uploaded_file:
video_path = uploaded_file.name
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
st.success("β
Video uploaded successfully!")
# Extract audio
st.info("π Extracting audio...")
audio_path = extract_audio(video_path)
st.success("β
Audio extracted!")
# Transcribe audio
st.info("ποΈ Transcribing audio...")
transcript = transcribe_audio(audio_path)
st.text_area("π Transcript", transcript, height=200)
# Summarize transcript
st.info("π Summarizing transcript...")
video_summary = summarize_text(transcript)
st.text_area("π Summary", video_summary, height=150)
# Generate study notes
st.info("π Generating study notes...")
study_notes = generate_study_notes(video_summary)
st.text_area("π Study Notes", study_notes, height=150)
# Q&A Section
question = st.text_input("β Ask a question about the video:")
if question:
answer = answer_question(question, video_summary)
st.write("π‘ Answer:", answer)
|