Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Apr 4

Commit

5976e32

verified ·

1 Parent(s): 6218efd

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -1

app.py CHANGED Viewed

@@ -201,4 +201,165 @@ app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
     return RedirectResponse(url="/")
-"""

 @app.get("/")
 def home():
     return RedirectResponse(url="/")
+"""
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+import gradio as gr
+from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM
+from PIL import Image
+import torch
+import fitz  # PyMuPDF for PDF
+import easyocr  # OCR for images
+import openpyxl  # XLSX processing
+import pptx  # PPTX processing
+import docx  # DOCX processing
+# Initialize FastAPI app
+app = FastAPI()
+# ========== Document QA Setup ==========
+doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+def read_pdf(file):
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def answer_question_from_doc(file, question):
+    if file is None or not question.strip():
+        return "Please upload a document and ask a question."
+    text = read_pdf(file)
+    prompt = f"Context: {text}\nQuestion: {question}\nAnswer:"
+    inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+    with torch.no_grad():
+        outputs = doc_model.generate(**inputs, max_new_tokens=100)
+    answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return answer.split("Answer:")[-1].strip()
+# ========== Image QA Setup ==========
+vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+def answer_question_from_image(image, question):
+    if image is None or not question.strip():
+        return "Please upload an image and ask a question."
+    inputs = vqa_processor(image, question, return_tensors="pt")
+    with torch.no_grad():
+        outputs = vqa_model(**inputs)
+    predicted_id = outputs.logits.argmax(-1).item()
+    return vqa_model.config.id2label[predicted_id]
+# ========== Text Extraction Functions ==========
+reader = easyocr.Reader(['en', 'fr'])  # OCR for English & French
+def extract_text_from_pdf(pdf_file):
+    """Extract text from a PDF file."""
+    text = []
+    try:
+        with fitz.open(pdf_file) as doc:
+            for page in doc:
+                text.append(page.get_text("text"))
+    except Exception as e:
+        return f"Error reading PDF: {e}"
+    return "\n".join(text)
+def extract_text_from_docx(docx_file):
+    """Extract text from a DOCX file."""
+    doc = docx.Document(docx_file)
+    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
+def extract_text_from_pptx(pptx_file):
+    """Extract text from a PPTX file."""
+    text = []
+    try:
+        presentation = pptx.Presentation(pptx_file)
+        for slide in presentation.slides:
+            for shape in slide.shapes:
+                if hasattr(shape, "text"):
+                    text.append(shape.text)
+    except Exception as e:
+        return f"Error reading PPTX: {e}"
+    return "\n".join(text)
+def extract_text_from_xlsx(xlsx_file):
+    """Extract text from an XLSX file."""
+    text = []
+    try:
+        wb = openpyxl.load_workbook(xlsx_file)
+        for sheet in wb.sheetnames:
+            ws = wb[sheet]
+            for row in ws.iter_rows(values_only=True):
+                text.append(" ".join(str(cell) for cell in row if cell))
+    except Exception as e:
+        return f"Error reading XLSX: {e}"
+    return "\n".join(text)
+def extract_text_from_image(image_path):
+    """Extract text from an image using EasyOCR."""
+    result = reader.readtext(image_path, detail=0)
+    return " ".join(result)  # Return text as a single string
+# ========== Main Processing Functions ==========
+def answer_question_from_doc(file, question):
+    """Process document and answer a question based on its content."""
+    ext = file.name.split(".")[-1].lower()
+    if ext == "pdf":
+        context = extract_text_from_pdf(file.name)
+    elif ext == "docx":
+        context = extract_text_from_docx(file.name)
+    elif ext == "pptx":
+        context = extract_text_from_pptx(file.name)
+    elif ext == "xlsx":
+        context = extract_text_from_xlsx(file.name)
+    else:
+        return "Unsupported file format."
+    if not context.strip():
+        return "No text found in the document."
+    # Generate answer using QA pipeline correctly
+    try:
+        result = qa_model({"question": question, "context": context})
+        return result["answer"]
+    except Exception as e:
+        return f"Error generating answer: {e}"
+def answer_question_from_image(image, question):
+    """Process an image, extract text, and answer a question."""
+    img_text = extract_text_from_image(image)
+    if not img_text.strip():
+        return "No readable text found in the image."
+    try:
+        result = qa_model({"question": question, "context": img_text})
+        return result["answer"]
+    except Exception as e:
+        return f"Error generating answer: {e}"
+# ========== Gradio Interfaces ==========
+with gr.Blocks() as doc_interface:
+    gr.Markdown("## 📄 Document Question Answering")
+    file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
+    question_input = gr.Textbox(label="Ask a question")
+    answer_output = gr.Textbox(label="Answer")
+    file_submit = gr.Button("Get Answer")
+    file_submit.click(answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
+with gr.Blocks() as img_interface:
+    gr.Markdown("## 🖼️ Image Question Answering")
+    image_input = gr.Image(label="Upload an Image")
+    img_question_input = gr.Textbox(label="Ask a question")
+    img_answer_output = gr.Textbox(label="Answer")
+    image_submit = gr.Button("Get Answer")
+    image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output)
+# ========== Mount Gradio App ==========
+demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
+app = gr.mount_gradio_app(app, demo, path="/")
+@app.get("/")
+def home():
+    return RedirectResponse(url="/")