SankethShetty001 commited on
Commit
7b3c770
Β·
verified Β·
1 Parent(s): 394ea13

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import whisper
4
+ from transformers import pipeline , T5ForConditionalGeneration, T5Tokenizer
5
+ import os
6
+ import torch
7
+ import spacy
8
+
9
+ # Load models once
10
+ whisper_model = whisper.load_model("base")
11
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
12
+
13
+ # Load model and tokenizer
14
+ model_name = "valhalla/t5-base-qg-hl"
15
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
16
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
17
+
18
+ # Load spaCy for NER
19
+ nlp = spacy.load("en_core_web_sm")
20
+
21
+ # Load QA pipeline
22
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
23
+
24
+ def extract_audio(video_path, audio_output_path):
25
+ command = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', audio_output_path]
26
+ subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
27
+ return audio_output_path
28
+
29
+ def process_video(video_file):
30
+ try:
31
+ import whisper
32
+ from transformers import pipeline
33
+ import subprocess
34
+ import os
35
+
36
+ audio_path = "extracted_audio.wav"
37
+
38
+ # Extract audio from video using FFmpeg
39
+ command = ['ffmpeg', '-i', video_file, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', audio_path]
40
+ subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
41
+
42
+ if not os.path.exists(audio_path):
43
+ return "Audio extraction failed.", "No summary generated."
44
+
45
+ # Load Whisper model
46
+ model = whisper.load_model("base")
47
+ result = model.transcribe(audio_path)
48
+
49
+ transcript_text = result['text']
50
+
51
+ # Load summarizer
52
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
53
+
54
+ # Chunk text if needed
55
+ chunks = [transcript_text[i:i + 1024] for i in range(0, len(transcript_text), 1024)]
56
+ summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
57
+ final_summary = ' '.join(summaries)
58
+
59
+ return transcript_text, final_summary
60
+
61
+ except Exception as e:
62
+ return f"Error: {str(e)}", f"Error: {str(e)}"
63
+
64
+ # Extract top named entities for highlighting
65
+ def select_top_entities(text, max_entities=3):
66
+ doc = nlp(text)
67
+ candidates = [ent.text for ent in doc.ents if 2 <= len(ent.text) <= 30 and len(ent.text.split()) <= 5]
68
+ seen = set()
69
+ top_entities = []
70
+ for entity in candidates:
71
+ if entity not in seen:
72
+ seen.add(entity)
73
+ top_entities.append(entity)
74
+ if len(top_entities) >= max_entities:
75
+ break
76
+ return top_entities
77
+
78
+ # Generate questions for each highlighted entity
79
+ def generate_questions(context):
80
+ entities = select_top_entities(context, max_entities=3)
81
+ questions = []
82
+
83
+ for ent in entities:
84
+ highlighted = context.replace(ent, f"<hl> {ent} <hl>", 1)
85
+ input_text = f"generate question: {highlighted}"
86
+ input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
87
+ outputs = model.generate(
88
+ input_ids=input_ids,
89
+ max_length=64,
90
+ num_beams=4,
91
+ num_return_sequences=1,
92
+ no_repeat_ngram_size=2,
93
+ early_stopping=True
94
+ )
95
+ question = tokenizer.decode(outputs[0], skip_special_tokens=True)
96
+ questions.append(question)
97
+
98
+ return "\n".join(f"Q{i+1}: {q}" for i, q in enumerate(questions))
99
+
100
+ def generate_answers(context, questions):
101
+ """
102
+ context: str β€” typically the summary
103
+ questions: list[str] or str β€” can be multiline string or list
104
+ returns: str β€” formatted answers
105
+ """
106
+ if isinstance(questions, str):
107
+ questions = questions.strip().split('\n')
108
+
109
+ answers = []
110
+ for q in questions:
111
+ if q.strip():
112
+ result = qa_pipeline(question=q.strip(), context=context)
113
+ answers.append(f"Q: {q.strip()}\nA: {result['answer']}")
114
+
115
+ return "\n\n".join(answers)
116
+
117
+
118
+ import gradio as gr
119
+
120
+ # Dummy processing functions β€” replace these with your actual logic
121
+ def process_video_(video_path):
122
+ # Step 1: Transcribe the video
123
+ transcript , summary = process_video(video_path)
124
+
125
+ questions = generate_questions(summary)
126
+
127
+ answers = generate_answers(summary, questions)
128
+
129
+ return transcript, summary, questions , answers
130
+
131
+ # Gradio Interface
132
+ iface = gr.Interface(
133
+ fn=process_video_,
134
+ inputs=gr.Video(label="Upload a video"),
135
+ outputs=[
136
+ gr.Textbox(label="Transcript"),
137
+ gr.Textbox(label="Summary"),
138
+ gr.Textbox(label="Generated Questions"),
139
+ gr.Textbox(label="Generated Answers")
140
+ ],
141
+ title="Vision to Insight",
142
+ description="Upload a video to extract a transcript, generate a summary, and get 2–3 meaningful questions based on the summary."
143
+ )
144
+
145
+ iface.launch()