Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Apr 4

Commit

b36b2d0

verified ·

1 Parent(s): 4f113b7

working code

Browse files

Files changed (1) hide show

app.py +45 -36

app.py CHANGED Viewed

@@ -103,59 +103,68 @@ async def get_docs(request: Request):
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 import gradio as gr
-from transformers import VilBertForQuestionAnswering, ViltProcessor
 from PIL import Image
 import torch
-# Initialize FastAPI
 app = FastAPI()
-# Load VilBERT model and processor
-model = VilBertForQuestionAnswering.from_pretrained("facebook/vilbert-vqa")
-processor = ViltProcessor.from_pretrained("facebook/vilbert-vqa")
-# Function to handle image question answering
-def answer_question_from_image(image, question):
-    if image is None or question.strip() == "":
-        return "Please upload an image and enter a question."
-    # Process input
-    inputs = processor(images=image, text=question, return_tensors="pt")
     with torch.no_grad():
-        outputs = model(**inputs)
-        predicted_idx = outputs.logits.argmax(-1).item()
-    # For VilBERT VQA, class index maps to predefined answers (like "yes", "no", etc.)
-    # You'd need the VQA label mapping to decode this properly
-    # For now, just return the index
-    return f"Predicted answer ID: {predicted_idx}"
-# Create Image QA interface
-img_interface = gr.Interface(
-    fn=answer_question_from_image,
-    inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
-    outputs="text",
-    title="AI Image Question Answering"
-)
-# Dummy doc QA interface (replace with your own implementation)
-def dummy_doc_qa(doc, question):
-    return "This is a placeholder for Document QA."
 doc_interface = gr.Interface(
-    fn=dummy_doc_qa,
-    inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
     outputs="text",
     title="Document Question Answering"
 )
-# Combine into a tabbed interface
-demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
-# Mount Gradio inside FastAPI at root "/"
 app = gr.mount_gradio_app(app, demo, path="/")
-# Redirect root URL to Gradio UI
 @app.get("/")
-def home():
     return RedirectResponse(url="/")

 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 import gradio as gr
+from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM
 from PIL import Image
 import torch
+import fitz  # PyMuPDF for PDF
 app = FastAPI()
+# ========== Document QA Setup ==========
+doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+def read_pdf(file):
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def answer_question_from_doc(file, question):
+    if file is None or not question.strip():
+        return "Please upload a document and ask a question."
+    text = read_pdf(file)
+    prompt = f"Context: {text}\nQuestion: {question}\nAnswer:"
+    inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
     with torch.no_grad():
+        outputs = doc_model.generate(**inputs, max_new_tokens=100)
+    answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return answer.split("Answer:")[-1].strip()
+# ========== Image QA Setup ==========
+vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
+def answer_question_from_image(image, question):
+    if image is None or not question.strip():
+        return "Please upload an image and ask a question."
+    inputs = vqa_processor(image, question, return_tensors="pt")
+    with torch.no_grad():
+        outputs = vqa_model(**inputs)
+    predicted_id = outputs.logits.argmax(-1).item()
+    return vqa_model.config.id2label[predicted_id]
+# ========== Gradio Interfaces ==========
 doc_interface = gr.Interface(
+    fn=answer_question_from_doc,
+    inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")],
     outputs="text",
     title="Document Question Answering"
 )
+img_interface = gr.Interface(
+    fn=answer_question_from_image,
+    inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
+    outputs="text",
+    title="Image Question Answering"
+)
+# ========== Combine and Mount ==========
+demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
+def root():
     return RedirectResponse(url="/")