Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Apr 4

Commit

1cafb18

verified ·

1 Parent(s): 5976e32

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -79

app.py CHANGED Viewed

@@ -205,74 +205,40 @@ def home():
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 import gradio as gr
-from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM
-from PIL import Image
-import torch
-import fitz  # PyMuPDF for PDF
 import easyocr  # OCR for images
 import openpyxl  # XLSX processing
 import pptx  # PPTX processing
 import docx  # DOCX processing
-# Initialize FastAPI app
-app = FastAPI()
-# ========== Document QA Setup ==========
-doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-def read_pdf(file):
-    doc = fitz.open(stream=file.read(), filetype="pdf")
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-def answer_question_from_doc(file, question):
-    if file is None or not question.strip():
-        return "Please upload a document and ask a question."
-    text = read_pdf(file)
-    prompt = f"Context: {text}\nQuestion: {question}\nAnswer:"
-    inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
-    with torch.no_grad():
-        outputs = doc_model.generate(**inputs, max_new_tokens=100)
-    answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return answer.split("Answer:")[-1].strip()
-# ========== Image QA Setup ==========
 vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
 vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
-def answer_question_from_image(image, question):
-    if image is None or not question.strip():
-        return "Please upload an image and ask a question."
-    inputs = vqa_processor(image, question, return_tensors="pt")
-    with torch.no_grad():
-        outputs = vqa_model(**inputs)
-    predicted_id = outputs.logits.argmax(-1).item()
-    return vqa_model.config.id2label[predicted_id]
-# ========== Text Extraction Functions ==========
-reader = easyocr.Reader(['en', 'fr'])  # OCR for English & French
-def extract_text_from_pdf(pdf_file):
-    """Extract text from a PDF file."""
-    text = []
-    try:
-        with fitz.open(pdf_file) as doc:
-            for page in doc:
-                text.append(page.get_text("text"))
-    except Exception as e:
-        return f"Error reading PDF: {e}"
-    return "\n".join(text)
 def extract_text_from_docx(docx_file):
-    """Extract text from a DOCX file."""
     doc = docx.Document(docx_file)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 def extract_text_from_pptx(pptx_file):
-    """Extract text from a PPTX file."""
     text = []
     try:
         presentation = pptx.Presentation(pptx_file)
@@ -285,7 +251,6 @@ def extract_text_from_pptx(pptx_file):
     return "\n".join(text)
 def extract_text_from_xlsx(xlsx_file):
-    """Extract text from an XLSX file."""
     text = []
     try:
         wb = openpyxl.load_workbook(xlsx_file)
@@ -297,39 +262,39 @@ def extract_text_from_xlsx(xlsx_file):
         return f"Error reading XLSX: {e}"
     return "\n".join(text)
 def extract_text_from_image(image_path):
-    """Extract text from an image using EasyOCR."""
     result = reader.readtext(image_path, detail=0)
-    return " ".join(result)  # Return text as a single string
-# ========== Main Processing Functions ==========
 def answer_question_from_doc(file, question):
-    """Process document and answer a question based on its content."""
-    ext = file.name.split(".")[-1].lower()
     if ext == "pdf":
-        context = extract_text_from_pdf(file.name)
     elif ext == "docx":
-        context = extract_text_from_docx(file.name)
     elif ext == "pptx":
-        context = extract_text_from_pptx(file.name)
     elif ext == "xlsx":
-        context = extract_text_from_xlsx(file.name)
     else:
         return "Unsupported file format."
     if not context.strip():
         return "No text found in the document."
-    # Generate answer using QA pipeline correctly
     try:
         result = qa_model({"question": question, "context": context})
         return result["answer"]
     except Exception as e:
         return f"Error generating answer: {e}"
-def answer_question_from_image(image, question):
-    """Process an image, extract text, and answer a question."""
     img_text = extract_text_from_image(image)
     if not img_text.strip():
         return "No readable text found in the image."
@@ -339,27 +304,43 @@ def answer_question_from_image(image, question):
     except Exception as e:
         return f"Error generating answer: {e}"
-# ========== Gradio Interfaces ==========
 with gr.Blocks() as doc_interface:
     gr.Markdown("## 📄 Document Question Answering")
     file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
-    question_input = gr.Textbox(label="Ask a question")
     answer_output = gr.Textbox(label="Answer")
     file_submit = gr.Button("Get Answer")
-    file_submit.click(answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
-with gr.Blocks() as img_interface:
-    gr.Markdown("## 🖼️ Image Question Answering")
-    image_input = gr.Image(label="Upload an Image")
-    img_question_input = gr.Textbox(label="Ask a question")
-    img_answer_output = gr.Textbox(label="Answer")
-    image_submit = gr.Button("Get Answer")
-    image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output)
-# ========== Mount Gradio App ==========
-demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
-def home():
     return RedirectResponse(url="/")

 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 import gradio as gr
+import fitz  # PyMuPDF for PDFs
 import easyocr  # OCR for images
 import openpyxl  # XLSX processing
 import pptx  # PPTX processing
 import docx  # DOCX processing
+from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering
+from PIL import Image
+import torch
+# === Initialize FastAPI App ===
+app = FastAPI()
+# === Initialize QA Model for Documents and OCR ===
+qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
+# === Initialize Image QA Model (VQA) ===
 vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
 vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+# === OCR Reader ===
+reader = easyocr.Reader(['en', 'fr'])
+# === Document Text Extraction Functions ===
+def extract_text_from_pdf(file_obj):
+    doc = fitz.open(stream=file_obj.read(), filetype="pdf")
+    return "\n".join([page.get_text() for page in doc])
 def extract_text_from_docx(docx_file):
     doc = docx.Document(docx_file)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 def extract_text_from_pptx(pptx_file):
     text = []
     try:
         presentation = pptx.Presentation(pptx_file)
     return "\n".join(text)
 def extract_text_from_xlsx(xlsx_file):
     text = []
     try:
         wb = openpyxl.load_workbook(xlsx_file)
         return f"Error reading XLSX: {e}"
     return "\n".join(text)
+# === Image OCR ===
 def extract_text_from_image(image_path):
     result = reader.readtext(image_path, detail=0)
+    return " ".join(result)
+# === QA for Document Files ===
 def answer_question_from_doc(file, question):
+    if file is None or not question.strip():
+        return "Please upload a document and ask a question."
+    ext = file.name.split(".")[-1].lower()
     if ext == "pdf":
+        context = extract_text_from_pdf(file)
     elif ext == "docx":
+        context = extract_text_from_docx(file)
     elif ext == "pptx":
+        context = extract_text_from_pptx(file)
     elif ext == "xlsx":
+        context = extract_text_from_xlsx(file)
     else:
         return "Unsupported file format."
     if not context.strip():
         return "No text found in the document."
     try:
         result = qa_model({"question": question, "context": context})
         return result["answer"]
     except Exception as e:
         return f"Error generating answer: {e}"
+# === QA for Images using EasyOCR and QA model ===
+def answer_question_from_image_text(image, question):
     img_text = extract_text_from_image(image)
     if not img_text.strip():
         return "No readable text found in the image."
     except Exception as e:
         return f"Error generating answer: {e}"
+# === QA for Images using ViLT (Visual QA Model) ===
+def answer_question_from_image_visual(image, question):
+    if image is None or not question.strip():
+        return "Please upload an image and ask a question."
+    inputs = vqa_processor(image, question, return_tensors="pt")
+    with torch.no_grad():
+        outputs = vqa_model(**inputs)
+    predicted_id = outputs.logits.argmax(-1).item()
+    return vqa_model.config.id2label[predicted_id]
+# === Gradio Interfaces ===
 with gr.Blocks() as doc_interface:
     gr.Markdown("## 📄 Document Question Answering")
     file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
+    question_input = gr.Textbox(label="Ask a Question")
     answer_output = gr.Textbox(label="Answer")
     file_submit = gr.Button("Get Answer")
+    file_submit.click(fn=answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
+with gr.Blocks() as image_interface:
+    gr.Markdown("## 🖼️ Image Question Answering (OCR + VQA)")
+    with gr.Tabs():
+        with gr.TabItem("OCR-based Image QA"):
+            image_input = gr.Image(label="Upload Image")
+            img_question_input = gr.Textbox(label="Ask a Question")
+            img_answer_output = gr.Textbox(label="Answer")
+            gr.Button("Get Answer").click(fn=answer_question_from_image_text, inputs=[image_input, img_question_input], outputs=img_answer_output)
+        with gr.TabItem("Visual QA (ViLT)"):
+            image_input_vqa = gr.Image(label="Upload Image")
+            vqa_question_input = gr.Textbox(label="Ask a Question")
+            vqa_answer_output = gr.Textbox(label="Answer")
+            gr.Button("Get Answer").click(fn=answer_question_from_image_visual, inputs=[image_input_vqa, vqa_question_input], outputs=vqa_answer_output)
+# === Mount Gradio on FastAPI ===
+demo = gr.TabbedInterface([doc_interface, image_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
+def root():
     return RedirectResponse(url="/")