qtAnswering / app.py
ikraamkb's picture
Update app.py
753db53 verified
raw
history blame
3.57 kB
import gradio as gr
import fitz # PyMuPDF for PDF parsing
from tika import parser # Apache Tika for document parsing
import openpyxl
from pptx import Presentation
from PIL import Image
from transformers import pipeline
import torch
import numpy as np
# Load Optimized Hugging Face Models
print("πŸ”„ Loading models...")
qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
print("βœ… Models loaded (Optimized for Speed)")
# Allowed File Extensions
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
def validate_file_type(file):
ext = file.name.split(".")[-1].lower()
if ext not in ALLOWED_EXTENSIONS:
return f"❌ Unsupported file format: {ext}"
return None
# Function to truncate text to 450 tokens
def truncate_text(text, max_tokens=450):
words = text.split()
return " ".join(words[:max_tokens])
# Document Text Extraction Functions
def extract_text_from_pdf(pdf_bytes):
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text if text else "⚠️ No text found."
def extract_text_with_tika(file_bytes):
parsed = parser.from_buffer(file_bytes)
return parsed.get("content", "⚠️ No text found.").strip()
def extract_text_from_excel(excel_bytes):
wb = openpyxl.load_workbook(excel_bytes, read_only=True)
text = []
for sheet in wb.worksheets:
for row in sheet.iter_rows(values_only=True):
text.append(" ".join(map(str, row)))
return "\n".join(text) if text else "⚠️ No text found."
# Function to process document and answer question
def answer_question_from_document(file, question):
validation_error = validate_file_type(file)
if validation_error:
return validation_error
file_ext = file.name.split(".")[-1].lower()
file_bytes = file.read()
if file_ext == "pdf":
text = extract_text_from_pdf(file_bytes)
elif file_ext in ["docx", "pptx"]:
text = extract_text_with_tika(file_bytes)
elif file_ext == "xlsx":
text = extract_text_from_excel(file_bytes)
else:
return "❌ Unsupported file format!"
if not text:
return "⚠️ No text extracted from the document."
truncated_text = truncate_text(text)
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
return response[0]["generated_text"]
# Function to process image and answer question
def answer_question_from_image(image, question):
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
caption = image_captioning_pipeline(image)[0]['generated_text']
response = qa_pipeline(f"Question: {question}\nContext: {caption}")
return response[0]["generated_text"]
# Gradio Interface
interface = gr.Interface(
fn=lambda file, image, question: (
answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
),
inputs=[
gr.File(label="πŸ“‚ Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
gr.Image(label="πŸ–ΌοΈ Upload Image", optional=True),
gr.Textbox(label="πŸ’¬ Ask a Question")
],
outputs="text",
title="πŸ“„ AI Document & Image Question Answering",
description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content."
)
interface.launch()