Spaces:
Running
Running
working doc
Browse files
app.py
CHANGED
@@ -72,7 +72,7 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
72 |
@app.get("/")
|
73 |
def root():
|
74 |
return RedirectResponse(url="/")
|
75 |
-
|
76 |
import gradio as gr
|
77 |
import fitz # PyMuPDF for PDFs
|
78 |
import easyocr # OCR for images
|
@@ -95,7 +95,7 @@ reader = easyocr.Reader(['en', 'fr']) # OCR for English & French
|
|
95 |
|
96 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
97 |
def extract_text_from_pdf(pdf_file):
|
98 |
-
"Extract text from a PDF file.""
|
99 |
text = []
|
100 |
try:
|
101 |
with fitz.open(pdf_file) as doc:
|
@@ -106,12 +106,12 @@ def extract_text_from_pdf(pdf_file):
|
|
106 |
return "\n".join(text)
|
107 |
|
108 |
def extract_text_from_docx(docx_file):
|
109 |
-
"Extract text from a DOCX file."
|
110 |
doc = docx.Document(docx_file)
|
111 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
112 |
|
113 |
def extract_text_from_pptx(pptx_file):
|
114 |
-
"Extract text from a PPTX file."
|
115 |
text = []
|
116 |
try:
|
117 |
presentation = pptx.Presentation(pptx_file)
|
@@ -124,7 +124,7 @@ def extract_text_from_pptx(pptx_file):
|
|
124 |
return "\n".join(text)
|
125 |
|
126 |
def extract_text_from_xlsx(xlsx_file):
|
127 |
-
"Extract text from an XLSX file."
|
128 |
text = []
|
129 |
try:
|
130 |
wb = openpyxl.load_workbook(xlsx_file)
|
@@ -137,13 +137,13 @@ def extract_text_from_xlsx(xlsx_file):
|
|
137 |
return "\n".join(text)
|
138 |
|
139 |
def extract_text_from_image(image_path):
|
140 |
-
"Extract text from an image using EasyOCR.""
|
141 |
result = reader.readtext(image_path, detail=0)
|
142 |
return " ".join(result) # Return text as a single string
|
143 |
|
144 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
145 |
def answer_question_from_doc(file, question):
|
146 |
-
"Process document and answer a question based on its content."
|
147 |
ext = file.name.split(".")[-1].lower()
|
148 |
|
149 |
if ext == "pdf":
|
@@ -155,10 +155,10 @@ def answer_question_from_doc(file, question):
|
|
155 |
elif ext == "xlsx":
|
156 |
context = extract_text_from_xlsx(file.name)
|
157 |
else:
|
158 |
-
return "Unsupported file format."
|
159 |
|
160 |
if not context.strip():
|
161 |
-
return "No text found in the document."
|
162 |
|
163 |
# Generate answer using QA pipeline correctly
|
164 |
try:
|
@@ -168,10 +168,10 @@ def answer_question_from_doc(file, question):
|
|
168 |
return f"Error generating answer: {e}"
|
169 |
|
170 |
def answer_question_from_image(image, question):
|
171 |
-
"Process an image, extract text, and answer a question.""
|
172 |
img_text = extract_text_from_image(image)
|
173 |
if not img_text.strip():
|
174 |
-
return "No readable text found in the image."
|
175 |
|
176 |
try:
|
177 |
result = qa_model({"question": question, "context": img_text})
|
@@ -203,146 +203,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
203 |
@app.get("/")
|
204 |
def home():
|
205 |
return RedirectResponse(url="/")
|
206 |
-
"""
|
207 |
-
from fastapi import FastAPI
|
208 |
-
from fastapi.responses import RedirectResponse
|
209 |
-
import gradio as gr
|
210 |
-
|
211 |
-
import fitz # PyMuPDF for PDFs
|
212 |
-
import easyocr # OCR for images
|
213 |
-
import openpyxl # XLSX processing
|
214 |
-
import pptx # PPTX processing
|
215 |
-
import docx # DOCX processing
|
216 |
-
|
217 |
-
from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering
|
218 |
-
from PIL import Image
|
219 |
-
import torch
|
220 |
-
|
221 |
-
# === Initialize FastAPI App ===
|
222 |
-
app = FastAPI()
|
223 |
-
|
224 |
-
# === Initialize QA Model for Documents and OCR ===
|
225 |
-
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
226 |
-
|
227 |
-
# === Initialize Image QA Model (VQA) ===
|
228 |
-
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
229 |
-
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
230 |
-
|
231 |
-
# === OCR Reader ===
|
232 |
-
reader = easyocr.Reader(['en', 'fr'])
|
233 |
-
|
234 |
-
# === Document Text Extraction Functions ===
|
235 |
-
def extract_text_from_pdf(file_obj):
|
236 |
-
doc = fitz.open(stream=file_obj.read(), filetype="pdf")
|
237 |
-
return "\n".join([page.get_text() for page in doc])
|
238 |
-
|
239 |
-
def extract_text_from_docx(docx_file):
|
240 |
-
doc = docx.Document(docx_file)
|
241 |
-
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
242 |
-
|
243 |
-
def extract_text_from_pptx(pptx_file):
|
244 |
-
text = []
|
245 |
-
try:
|
246 |
-
presentation = pptx.Presentation(pptx_file)
|
247 |
-
for slide in presentation.slides:
|
248 |
-
for shape in slide.shapes:
|
249 |
-
if hasattr(shape, "text"):
|
250 |
-
text.append(shape.text)
|
251 |
-
except Exception as e:
|
252 |
-
return f"Error reading PPTX: {e}"
|
253 |
-
return "\n".join(text)
|
254 |
-
|
255 |
-
def extract_text_from_xlsx(xlsx_file):
|
256 |
-
text = []
|
257 |
-
try:
|
258 |
-
wb = openpyxl.load_workbook(xlsx_file)
|
259 |
-
for sheet in wb.sheetnames:
|
260 |
-
ws = wb[sheet]
|
261 |
-
for row in ws.iter_rows(values_only=True):
|
262 |
-
text.append(" ".join(str(cell) for cell in row if cell))
|
263 |
-
except Exception as e:
|
264 |
-
return f"Error reading XLSX: {e}"
|
265 |
-
return "\n".join(text)
|
266 |
-
|
267 |
-
# === Image OCR ===
|
268 |
-
def extract_text_from_image(image_path):
|
269 |
-
result = reader.readtext(image_path, detail=0)
|
270 |
-
return " ".join(result)
|
271 |
-
|
272 |
-
# === QA for Document Files ===
|
273 |
-
def answer_question_from_doc(file, question):
|
274 |
-
if file is None or not question.strip():
|
275 |
-
return "Please upload a document and ask a question."
|
276 |
-
|
277 |
-
ext = file.name.split(".")[-1].lower()
|
278 |
-
if ext == "pdf":
|
279 |
-
context = extract_text_from_pdf(file)
|
280 |
-
elif ext == "docx":
|
281 |
-
context = extract_text_from_docx(file)
|
282 |
-
elif ext == "pptx":
|
283 |
-
context = extract_text_from_pptx(file)
|
284 |
-
elif ext == "xlsx":
|
285 |
-
context = extract_text_from_xlsx(file)
|
286 |
-
else:
|
287 |
-
return "Unsupported file format."
|
288 |
-
|
289 |
-
if not context.strip():
|
290 |
-
return "No text found in the document."
|
291 |
-
|
292 |
-
try:
|
293 |
-
result = qa_model({"question": question, "context": context})
|
294 |
-
return result["answer"]
|
295 |
-
except Exception as e:
|
296 |
-
return f"Error generating answer: {e}"
|
297 |
-
|
298 |
-
# === QA for Images using EasyOCR and QA model ===
|
299 |
-
def answer_question_from_image_text(image, question):
|
300 |
-
img_text = extract_text_from_image(image)
|
301 |
-
if not img_text.strip():
|
302 |
-
return "No readable text found in the image."
|
303 |
-
try:
|
304 |
-
result = qa_model({"question": question, "context": img_text})
|
305 |
-
return result["answer"]
|
306 |
-
except Exception as e:
|
307 |
-
return f"Error generating answer: {e}"
|
308 |
|
309 |
-
# === QA for Images using ViLT (Visual QA Model) ===
|
310 |
-
def answer_question_from_image_visual(image, question):
|
311 |
-
if image is None or not question.strip():
|
312 |
-
return "Please upload an image and ask a question."
|
313 |
-
inputs = vqa_processor(image, question, return_tensors="pt")
|
314 |
-
with torch.no_grad():
|
315 |
-
outputs = vqa_model(**inputs)
|
316 |
-
predicted_id = outputs.logits.argmax(-1).item()
|
317 |
-
return vqa_model.config.id2label[predicted_id]
|
318 |
-
|
319 |
-
# === Gradio Interfaces ===
|
320 |
-
with gr.Blocks() as doc_interface:
|
321 |
-
gr.Markdown("## 📄 Document Question Answering")
|
322 |
-
file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
|
323 |
-
question_input = gr.Textbox(label="Ask a Question")
|
324 |
-
answer_output = gr.Textbox(label="Answer")
|
325 |
-
file_submit = gr.Button("Get Answer")
|
326 |
-
file_submit.click(fn=answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
|
327 |
-
|
328 |
-
with gr.Blocks() as image_interface:
|
329 |
-
gr.Markdown("## 🖼️ Image Question Answering (OCR + VQA)")
|
330 |
-
with gr.Tabs():
|
331 |
-
with gr.TabItem("OCR-based Image QA"):
|
332 |
-
image_input = gr.Image(label="Upload Image")
|
333 |
-
img_question_input = gr.Textbox(label="Ask a Question")
|
334 |
-
img_answer_output = gr.Textbox(label="Answer")
|
335 |
-
gr.Button("Get Answer").click(fn=answer_question_from_image_text, inputs=[image_input, img_question_input], outputs=img_answer_output)
|
336 |
-
with gr.TabItem("Visual QA (ViLT)"):
|
337 |
-
image_input_vqa = gr.Image(label="Upload Image")
|
338 |
-
vqa_question_input = gr.Textbox(label="Ask a Question")
|
339 |
-
vqa_answer_output = gr.Textbox(label="Answer")
|
340 |
-
gr.Button("Get Answer").click(fn=answer_question_from_image_visual, inputs=[image_input_vqa, vqa_question_input], outputs=vqa_answer_output)
|
341 |
-
|
342 |
-
# === Mount Gradio on FastAPI ===
|
343 |
-
demo = gr.TabbedInterface([doc_interface, image_interface], ["Document QA", "Image QA"])
|
344 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
345 |
-
|
346 |
-
@app.get("/")
|
347 |
-
def root():
|
348 |
-
return RedirectResponse(url="/")
|
|
|
72 |
@app.get("/")
|
73 |
def root():
|
74 |
return RedirectResponse(url="/")
|
75 |
+
"""
|
76 |
import gradio as gr
|
77 |
import fitz # PyMuPDF for PDFs
|
78 |
import easyocr # OCR for images
|
|
|
95 |
|
96 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
97 |
def extract_text_from_pdf(pdf_file):
|
98 |
+
"""Extract text from a PDF file."""
|
99 |
text = []
|
100 |
try:
|
101 |
with fitz.open(pdf_file) as doc:
|
|
|
106 |
return "\n".join(text)
|
107 |
|
108 |
def extract_text_from_docx(docx_file):
|
109 |
+
"""Extract text from a DOCX file."""
|
110 |
doc = docx.Document(docx_file)
|
111 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
112 |
|
113 |
def extract_text_from_pptx(pptx_file):
|
114 |
+
"""Extract text from a PPTX file."""
|
115 |
text = []
|
116 |
try:
|
117 |
presentation = pptx.Presentation(pptx_file)
|
|
|
124 |
return "\n".join(text)
|
125 |
|
126 |
def extract_text_from_xlsx(xlsx_file):
|
127 |
+
"""Extract text from an XLSX file."""
|
128 |
text = []
|
129 |
try:
|
130 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
|
137 |
return "\n".join(text)
|
138 |
|
139 |
def extract_text_from_image(image_path):
|
140 |
+
"""Extract text from an image using EasyOCR."""
|
141 |
result = reader.readtext(image_path, detail=0)
|
142 |
return " ".join(result) # Return text as a single string
|
143 |
|
144 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
145 |
def answer_question_from_doc(file, question):
|
146 |
+
"""Process document and answer a question based on its content."""
|
147 |
ext = file.name.split(".")[-1].lower()
|
148 |
|
149 |
if ext == "pdf":
|
|
|
155 |
elif ext == "xlsx":
|
156 |
context = extract_text_from_xlsx(file.name)
|
157 |
else:
|
158 |
+
return """Unsupported file format."""
|
159 |
|
160 |
if not context.strip():
|
161 |
+
return """No text found in the document."""
|
162 |
|
163 |
# Generate answer using QA pipeline correctly
|
164 |
try:
|
|
|
168 |
return f"Error generating answer: {e}"
|
169 |
|
170 |
def answer_question_from_image(image, question):
|
171 |
+
"""Process an image, extract text, and answer a question."""
|
172 |
img_text = extract_text_from_image(image)
|
173 |
if not img_text.strip():
|
174 |
+
return """No readable text found in the image."""
|
175 |
|
176 |
try:
|
177 |
result = qa_model({"question": question, "context": img_text})
|
|
|
203 |
@app.get("/")
|
204 |
def home():
|
205 |
return RedirectResponse(url="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|