Spaces:
Runtime error
Runtime error
Jai Ansh Bindra
commited on
Commit
·
aeb5f89
1
Parent(s):
a9c9290
Add Flask app and requirements.
Browse files- app.py +176 -0
- requirements.txt +0 -0
app.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, send_file, jsonify
|
2 |
+
import subprocess
|
3 |
+
import numpy as np
|
4 |
+
import ffmpeg
|
5 |
+
import whisper
|
6 |
+
import re
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
8 |
+
from io import BytesIO
|
9 |
+
import torch
|
10 |
+
from reportlab.lib.pagesizes import letter
|
11 |
+
from reportlab.pdfgen import canvas
|
12 |
+
import textwrap
|
13 |
+
import os
|
14 |
+
|
15 |
+
app = Flask(__name__)
|
16 |
+
|
17 |
+
# -------------------------------
|
18 |
+
# Global setup
|
19 |
+
# -------------------------------
|
20 |
+
model_name = "Qwen/Qwen2.5-7B"
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
22 |
+
model = AutoModelForCausalLM.from_pretrained(
|
23 |
+
model_name,
|
24 |
+
device_map="auto",
|
25 |
+
torch_dtype=torch.float16
|
26 |
+
)
|
27 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
28 |
+
|
29 |
+
# -------------------------------
|
30 |
+
# Pipeline functions
|
31 |
+
# -------------------------------
|
32 |
+
def stream_youtube_audio(video_url):
|
33 |
+
command = [
|
34 |
+
"yt-dlp",
|
35 |
+
"-f", "bestaudio",
|
36 |
+
"--no-playlist",
|
37 |
+
"-o", "-",
|
38 |
+
video_url
|
39 |
+
]
|
40 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
41 |
+
return process.stdout.read()
|
42 |
+
|
43 |
+
def audio_stream_to_numpy(audio_bytes):
|
44 |
+
try:
|
45 |
+
out, _ = (
|
46 |
+
ffmpeg.input("pipe:0")
|
47 |
+
.output("pipe:1", format="wav", acodec="pcm_s16le", ac=1, ar="16000")
|
48 |
+
.run(input=audio_bytes, capture_stdout=True, capture_stderr=True)
|
49 |
+
)
|
50 |
+
audio_data = np.frombuffer(out, np.int16).astype(np.float32) / 32768.0
|
51 |
+
return audio_data
|
52 |
+
except ffmpeg.Error as e:
|
53 |
+
print("FFmpeg error:", e)
|
54 |
+
return None
|
55 |
+
|
56 |
+
def transcribe_audio_numpy(audio_data):
|
57 |
+
model_whisper = whisper.load_model("tiny")
|
58 |
+
result = model_whisper.transcribe(audio_data)
|
59 |
+
print("Transcription completed.")
|
60 |
+
return result["text"]
|
61 |
+
|
62 |
+
def summarize_text(transcription, max_tokens=512):
|
63 |
+
if len(transcription.split()) < 100:
|
64 |
+
return transcription
|
65 |
+
summary = summarizer(transcription, max_length=max_tokens, min_length=100, do_sample=False)
|
66 |
+
return summary[0]['summary_text']
|
67 |
+
|
68 |
+
def generate_questionnaire(summary):
|
69 |
+
prompt = f"""
|
70 |
+
You are a professional questionnaire generator reputed for generating diverse questionnaires, given any
|
71 |
+
information sample.
|
72 |
+
|
73 |
+
The questionnaire you generate must contain:
|
74 |
+
1. Three simple multiple-choice questions (each with 4 options).
|
75 |
+
2. One moderately difficult multiple-choice question (4 options).
|
76 |
+
3. Two simple open-ended questions.
|
77 |
+
4. Three moderately difficult open-ended questions.
|
78 |
+
5. One hard scenario-based open-ended question.
|
79 |
+
|
80 |
+
Make sure to cover each and every type of question mentioned.
|
81 |
+
Nothing else, no code. Stick strictly to the provided context.
|
82 |
+
Also, provide the questions in a structured, well-formatted, sequential manner.
|
83 |
+
Start question sections with ### Multiple-Choice Questions etc.
|
84 |
+
Generate a well-structured questionnaire based on the following content:
|
85 |
+
"{summary}"
|
86 |
+
"""
|
87 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
88 |
+
outputs = model.generate(
|
89 |
+
**inputs,
|
90 |
+
max_length=2000,
|
91 |
+
temperature=0.2,
|
92 |
+
top_p=0.8,
|
93 |
+
repetition_penalty=1.1,
|
94 |
+
do_sample=True
|
95 |
+
)
|
96 |
+
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
97 |
+
print("Questionnaire generation completed.")
|
98 |
+
return clean_questionnaire(output_text)
|
99 |
+
|
100 |
+
def clean_questionnaire(raw_text):
|
101 |
+
match = re.search(r"(### Multiple-Choice Questions.*?)$", raw_text, re.DOTALL)
|
102 |
+
cleaned_text = match.group(1) if match else raw_text
|
103 |
+
return cleaned_text.strip()
|
104 |
+
|
105 |
+
def save_text_as_pdf(text, filename):
|
106 |
+
c = canvas.Canvas(filename, pagesize=letter)
|
107 |
+
width, height = letter
|
108 |
+
margin = 50
|
109 |
+
text_object = c.beginText(margin, height - margin)
|
110 |
+
text_object.setFont("Helvetica", 12)
|
111 |
+
max_chars_per_line = 100
|
112 |
+
|
113 |
+
for paragraph in text.split("\n"):
|
114 |
+
wrapped_lines = textwrap.wrap(paragraph, width=max_chars_per_line)
|
115 |
+
if not wrapped_lines:
|
116 |
+
text_object.textLine("")
|
117 |
+
for line in wrapped_lines:
|
118 |
+
text_object.textLine(line)
|
119 |
+
if text_object.getY() < margin:
|
120 |
+
c.drawText(text_object)
|
121 |
+
c.showPage()
|
122 |
+
text_object = c.beginText(margin, height - margin)
|
123 |
+
text_object.setFont("Helvetica", 12)
|
124 |
+
c.drawText(text_object)
|
125 |
+
c.save()
|
126 |
+
|
127 |
+
def process_stream(video_url, output_pdf="questionnaire.pdf"):
|
128 |
+
print("Streaming audio...")
|
129 |
+
audio_bytes = stream_youtube_audio(video_url)
|
130 |
+
if not audio_bytes:
|
131 |
+
print("Error: Unable to fetch audio.")
|
132 |
+
return None
|
133 |
+
|
134 |
+
print("Converting audio stream to NumPy array...")
|
135 |
+
audio_data = audio_stream_to_numpy(audio_bytes)
|
136 |
+
if audio_data is None:
|
137 |
+
print("Error: Unable to process audio data.")
|
138 |
+
return None
|
139 |
+
|
140 |
+
print("Transcribing audio...")
|
141 |
+
transcription = transcribe_audio_numpy(audio_data)
|
142 |
+
if not transcription:
|
143 |
+
print("Error: Transcription failed.")
|
144 |
+
return None
|
145 |
+
|
146 |
+
print("Summarizing transcription...")
|
147 |
+
summary = summarize_text(transcription)
|
148 |
+
|
149 |
+
print("Generating questionnaire...")
|
150 |
+
questionnaire = generate_questionnaire(summary)
|
151 |
+
|
152 |
+
print("Converting questionnaire to PDF...")
|
153 |
+
save_text_as_pdf(questionnaire, output_pdf)
|
154 |
+
|
155 |
+
print(f"PDF generated: {output_pdf}")
|
156 |
+
return output_pdf
|
157 |
+
|
158 |
+
# -------------------------------
|
159 |
+
# API endpoints
|
160 |
+
# -------------------------------
|
161 |
+
@app.route('/process', methods=['POST'])
|
162 |
+
def process_video():
|
163 |
+
data = request.get_json()
|
164 |
+
video_url = data.get("video_url")
|
165 |
+
if not video_url:
|
166 |
+
return jsonify({"error": "No video URL provided."}), 400
|
167 |
+
|
168 |
+
pdf_file = process_stream(video_url)
|
169 |
+
if not pdf_file:
|
170 |
+
return jsonify({"error": "Processing failed. Check logs for details."}), 500
|
171 |
+
|
172 |
+
return send_file(pdf_file, as_attachment=True)
|
173 |
+
|
174 |
+
if __name__ == '__main__':
|
175 |
+
# When deploying on a cloud service, make sure the port is set appropriately.
|
176 |
+
app.run(debug=True, use_reloader=False, host="0.0.0.0", port=5000)
|
requirements.txt
ADDED
File without changes
|