Spaces:
Running
Running
""" | |
from fastapi import FastAPI | |
from fastapi.responses import RedirectResponse | |
import gradio as gr | |
from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM | |
from PIL import Image | |
import torch | |
import fitz # PyMuPDF for PDF | |
app = FastAPI() | |
# ========== Document QA Setup ========== | |
doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
def read_pdf(file): | |
doc = fitz.open(stream=file.read(), filetype="pdf") | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def answer_question_from_doc(file, question): | |
if file is None or not question.strip(): | |
return "Please upload a document and ask a question." | |
text = read_pdf(file) | |
prompt = f"Context: {text}\nQuestion: {question}\nAnswer:" | |
inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) | |
with torch.no_grad(): | |
outputs = doc_model.generate(**inputs, max_new_tokens=100) | |
answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return answer.split("Answer:")[-1].strip() | |
# ========== Image QA Setup ========== | |
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
def answer_question_from_image(image, question): | |
if image is None or not question.strip(): | |
return "Please upload an image and ask a question." | |
inputs = vqa_processor(image, question, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = vqa_model(**inputs) | |
predicted_id = outputs.logits.argmax(-1).item() | |
return vqa_model.config.id2label[predicted_id] | |
# ========== Gradio Interfaces ========== | |
doc_interface = gr.Interface( | |
fn=answer_question_from_doc, | |
inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")], | |
outputs="text", | |
title="Document Question Answering" | |
) | |
img_interface = gr.Interface( | |
fn=answer_question_from_image, | |
inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")], | |
outputs="text", | |
title="Image Question Answering" | |
) | |
# ========== Combine and Mount ========== | |
demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) | |
app = gr.mount_gradio_app(app, demo, path="/") | |
@app.get("/") | |
def root(): | |
return RedirectResponse(url="/") | |
""" | |
import gradio as gr | |
import fitz # PyMuPDF for PDFs | |
import easyocr # OCR for images | |
import openpyxl # XLSX processing | |
import pptx # PPTX processing | |
import docx # DOCX processing | |
import json # Exporting results | |
from deep_translator import GoogleTranslator | |
from transformers import pipeline | |
from fastapi import FastAPI | |
from starlette.responses import RedirectResponse | |
# Initialize FastAPI app | |
app = FastAPI() | |
# Initialize AI Models | |
qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") | |
image_captioning = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
reader = easyocr.Reader(['en', 'fr']) # EasyOCR for image text extraction (English & French) | |
# ---- TEXT EXTRACTION FUNCTIONS ---- | |
def extract_text_from_pdf(pdf_file): | |
"""Extract text from a PDF file.""" | |
text = [] | |
try: | |
with fitz.open(pdf_file) as doc: | |
for page in doc: | |
text.append(page.get_text("text")) | |
except Exception as e: | |
return f"Error reading PDF: {e}" | |
return "\n".join(text) | |
def extract_text_from_docx(docx_file): | |
"""Extract text from a DOCX file.""" | |
doc = docx.Document(docx_file) | |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) | |
def extract_text_from_pptx(pptx_file): | |
"""Extract text from a PPTX file.""" | |
text = [] | |
try: | |
presentation = pptx.Presentation(pptx_file) | |
for slide in presentation.slides: | |
for shape in slide.shapes: | |
if hasattr(shape, "text"): | |
text.append(shape.text) | |
except Exception as e: | |
return f"Error reading PPTX: {e}" | |
return "\n".join(text) | |
def extract_text_from_xlsx(xlsx_file): | |
"""Extract text from an XLSX file.""" | |
text = [] | |
try: | |
wb = openpyxl.load_workbook(xlsx_file) | |
for sheet in wb.sheetnames: | |
ws = wb[sheet] | |
for row in ws.iter_rows(values_only=True): | |
text.append(" ".join(str(cell) for cell in row if cell)) | |
except Exception as e: | |
return f"Error reading XLSX: {e}" | |
return "\n".join(text) | |
def extract_text_from_image(image_path): | |
"""Extract text from an image using EasyOCR.""" | |
result = reader.readtext(image_path, detail=0) | |
return " ".join(result) # Return text as a single string | |
# ---- MAIN PROCESSING FUNCTIONS ---- | |
def answer_question_from_doc(file, question): | |
"""Process document and answer a question based on its content.""" | |
ext = file.name.split(".")[-1].lower() | |
if ext == "pdf": | |
context = extract_text_from_pdf(file.name) | |
elif ext == "docx": | |
context = extract_text_from_docx(file.name) | |
elif ext == "pptx": | |
context = extract_text_from_pptx(file.name) | |
elif ext == "xlsx": | |
context = extract_text_from_xlsx(file.name) | |
else: | |
return "Unsupported file format." | |
if not context.strip(): | |
return "No text found in the document." | |
# Generate answer using AI | |
answer = qa_model(question + " " + context, max_length=100)[0]["generated_text"] | |
return answer | |
def answer_question_from_image(image, question): | |
"""Process an image, extract text, and answer a question.""" | |
img_text = extract_text_from_image(image) | |
if not img_text.strip(): | |
return "No readable text found in the image." | |
# Generate answer using AI | |
answer = qa_model(question + " " + img_text, max_length=50)[0]["generated_text"] | |
return answer | |
# ---- GRADIO INTERFACES ---- | |
with gr.Blocks() as doc_interface: | |
gr.Markdown("## Document Question Answering") | |
file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF") | |
question_input = gr.Textbox(label="Ask a question") | |
answer_output = gr.Textbox(label="Answer") | |
file_submit = gr.Button("Get Answer") | |
file_submit.click(answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output) | |
with gr.Blocks() as img_interface: | |
gr.Markdown("## Image Question Answering") | |
image_input = gr.Image(label="Upload an Image") | |
img_question_input = gr.Textbox(label="Ask a question") | |
img_answer_output = gr.Textbox(label="Answer") | |
image_submit = gr.Button("Get Answer") | |
image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output) | |
# ---- MOUNT GRADIO APP ---- | |
demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) | |
app = gr.mount_gradio_app(app, demo, path="/") | |
def home(): | |
return RedirectResponse(url="/") | |