File size: 3,106 Bytes
f4bbee0
2dee61d
53e2c95
5466137
 
 
2dee61d
 
 
 
 
f4bbee0
5466137
 
2dee61d
5466137
f4bbee0
 
2dee61d
f4bbee0
2dee61d
f4bbee0
 
 
2dee61d
f4bbee0
 
 
 
 
 
 
2dee61d
f4bbee0
 
 
 
 
 
2dee61d
f4bbee0
 
 
 
 
2dee61d
53e2c95
f4bbee0
2dee61d
53e2c95
f4bbee0
 
 
 
 
 
53e2c95
 
2dee61d
53e2c95
f4bbee0
53e2c95
5466137
2dee61d
53e2c95
f4bbee0
53e2c95
5466137
2dee61d
53e2c95
f4bbee0
53e2c95
5466137
2dee61d
53e2c95
f4bbee0
53e2c95
5466137
2dee61d
53e2c95
f4bbee0
 
53e2c95
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import streamlit as st
import moviepy.editor as mp
import whisper
from transformers import pipeline

# βœ… Ensure ffmpeg is installed (needed for moviepy)
if not os.path.exists("/usr/bin/ffmpeg"):
    os.system("apt-get update && apt-get install -y ffmpeg")

# βœ… Function to extract audio from a video
def extract_audio(video_path, audio_path="audio.wav"):
    if os.path.exists(audio_path):
        os.remove(audio_path)
    video = mp.VideoFileClip(video_path)  # Use mp.VideoFileClip
    video.audio.write_audiofile(audio_path)
    return audio_path

# βœ… Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
    model = whisper.load_model("base", download_root="./models")  # Ensure model is downloaded
    result = model.transcribe(audio_path)
    return result["text"]

# βœ… Function to summarize text
def summarize_text(text):
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
    max_chunk_size = 1000
    chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
    summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] for chunk in chunks]
    return " ".join(summaries)

# βœ… Function to generate study notes
def generate_study_notes(summary):
    generator = pipeline("text-generation", model="gpt2")
    prompt = f"Create study notes from the following summary:\n{summary}"
    study_notes = generator(prompt, max_length=400, max_new_tokens=200, num_return_sequences=1, truncation=True)
    return study_notes[0]["generated_text"]

# βœ… Function to answer user questions
def answer_question(question, context):
    qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
    result = qa_pipeline(question=question, context=context)
    return result["answer"]

# βœ… Streamlit UI
st.title("Lecture Video Processor πŸŽ₯πŸ“")

# File uploader
uploaded_file = st.file_uploader("πŸ“€ Upload a video file", type=["mp4", "mov", "avi", "mkv"])

if uploaded_file:
    video_path = uploaded_file.name
    with open(video_path, "wb") as f:
        f.write(uploaded_file.read())

    st.success("βœ… Video uploaded successfully!")

    # Extract audio
    st.info("πŸ”Š Extracting audio...")
    audio_path = extract_audio(video_path)
    st.success("βœ… Audio extracted!")

    # Transcribe audio
    st.info("πŸŽ™οΈ Transcribing audio...")
    transcript = transcribe_audio(audio_path)
    st.text_area("πŸ“œ Transcript", transcript, height=200)

    # Summarize transcript
    st.info("πŸ“„ Summarizing transcript...")
    video_summary = summarize_text(transcript)
    st.text_area("πŸ“Œ Summary", video_summary, height=150)

    # Generate study notes
    st.info("πŸ“ Generating study notes...")
    study_notes = generate_study_notes(video_summary)
    st.text_area("πŸ“– Study Notes", study_notes, height=150)

    # Q&A Section
    question = st.text_input("❓ Ask a question about the video:")
    if question:
        answer = answer_question(question, video_summary)
        st.write("πŸ’‘ Answer:", answer)