Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

qtAnswering / app.py

ikraamkb

Update app.py

753db53 verified 2 months ago

raw

history blame

3.57 kB

	import gradio as gr
	import fitz # PyMuPDF for PDF parsing
	from tika import parser # Apache Tika for document parsing
	import openpyxl
	from pptx import Presentation
	from PIL import Image
	from transformers import pipeline
	import torch
	import numpy as np

	# Load Optimized Hugging Face Models
	print("🔄 Loading models...")

	qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
	image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)

	print("✅ Models loaded (Optimized for Speed)")

	# Allowed File Extensions
	ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}

	def validate_file_type(file):
	ext = file.name.split(".")[-1].lower()
	if ext not in ALLOWED_EXTENSIONS:
	return f"❌ Unsupported file format: {ext}"
	return None

	# Function to truncate text to 450 tokens
	def truncate_text(text, max_tokens=450):
	words = text.split()
	return " ".join(words[:max_tokens])

	# Document Text Extraction Functions
	def extract_text_from_pdf(pdf_bytes):
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	text = "\n".join([page.get_text("text") for page in doc])
	return text if text else "⚠️ No text found."

	def extract_text_with_tika(file_bytes):
	parsed = parser.from_buffer(file_bytes)
	return parsed.get("content", "⚠️ No text found.").strip()

	def extract_text_from_excel(excel_bytes):
	wb = openpyxl.load_workbook(excel_bytes, read_only=True)
	text = []
	for sheet in wb.worksheets:
	for row in sheet.iter_rows(values_only=True):
	text.append(" ".join(map(str, row)))
	return "\n".join(text) if text else "⚠️ No text found."

	# Function to process document and answer question
	def answer_question_from_document(file, question):
	validation_error = validate_file_type(file)
	if validation_error:
	return validation_error

	file_ext = file.name.split(".")[-1].lower()
	file_bytes = file.read()

	if file_ext == "pdf":
	text = extract_text_from_pdf(file_bytes)
	elif file_ext in ["docx", "pptx"]:
	text = extract_text_with_tika(file_bytes)
	elif file_ext == "xlsx":
	text = extract_text_from_excel(file_bytes)
	else:
	return "❌ Unsupported file format!"

	if not text:
	return "⚠️ No text extracted from the document."

	truncated_text = truncate_text(text)
	response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")

	return response[0]["generated_text"]

	# Function to process image and answer question
	def answer_question_from_image(image, question):
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	caption = image_captioning_pipeline(image)[0]['generated_text']
	response = qa_pipeline(f"Question: {question}\nContext: {caption}")

	return response[0]["generated_text"]

	# Gradio Interface
	interface = gr.Interface(
	fn=lambda file, image, question: (
	answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
	),
	inputs=[
	gr.File(label="📂 Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
	gr.Image(label="🖼️ Upload Image", optional=True),
	gr.Textbox(label="💬 Ask a Question")
	],
	outputs="text",
	title="📄 AI Document & Image Question Answering",
	description="Upload a document (PDF, DOCX, PPTX, XLSX) or an image, then ask a question about its content."
	)

	interface.launch()