""" from fastapi import FastAPI from fastapi.responses import RedirectResponse import gradio as gr from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM from PIL import Image import torch import fitz # PyMuPDF for PDF app = FastAPI() # ========== Document QA Setup ========== doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") def read_pdf(file): doc = fitz.open(stream=file.read(), filetype="pdf") text = "" for page in doc: text += page.get_text() return text def answer_question_from_doc(file, question): if file is None or not question.strip(): return "Please upload a document and ask a question." text = read_pdf(file) prompt = f"Context: {text}\nQuestion: {question}\nAnswer:" inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) with torch.no_grad(): outputs = doc_model.generate(**inputs, max_new_tokens=100) answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True) return answer.split("Answer:")[-1].strip() # ========== Image QA Setup ========== vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") def answer_question_from_image(image, question): if image is None or not question.strip(): return "Please upload an image and ask a question." inputs = vqa_processor(image, question, return_tensors="pt") with torch.no_grad(): outputs = vqa_model(**inputs) predicted_id = outputs.logits.argmax(-1).item() return vqa_model.config.id2label[predicted_id] # ========== Gradio Interfaces ========== doc_interface = gr.Interface( fn=answer_question_from_doc, inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")], outputs="text", title="Document Question Answering" ) img_interface = gr.Interface( fn=answer_question_from_image, inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")], outputs="text", title="Image Question Answering" ) # ========== Combine and Mount ========== demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def root(): return RedirectResponse(url="/") """ import gradio as gr import fitz # PyMuPDF for PDFs import easyocr # OCR for images import openpyxl # XLSX processing import pptx # PPTX processing import docx # DOCX processing import json # Exporting results from deep_translator import GoogleTranslator from transformers import pipeline from fastapi import FastAPI from starlette.responses import RedirectResponse # Initialize FastAPI app app = FastAPI() # Initialize AI Models qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") image_captioning = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") reader = easyocr.Reader(['en', 'fr']) # EasyOCR for image text extraction (English & French) # ---- TEXT EXTRACTION FUNCTIONS ---- def extract_text_from_pdf(pdf_file): """Extract text from a PDF file.""" text = [] try: with fitz.open(pdf_file) as doc: for page in doc: text.append(page.get_text("text")) except Exception as e: return f"Error reading PDF: {e}" return "\n".join(text) def extract_text_from_docx(docx_file): """Extract text from a DOCX file.""" doc = docx.Document(docx_file) return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) def extract_text_from_pptx(pptx_file): """Extract text from a PPTX file.""" text = [] try: presentation = pptx.Presentation(pptx_file) for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) except Exception as e: return f"Error reading PPTX: {e}" return "\n".join(text) def extract_text_from_xlsx(xlsx_file): """Extract text from an XLSX file.""" text = [] try: wb = openpyxl.load_workbook(xlsx_file) for sheet in wb.sheetnames: ws = wb[sheet] for row in ws.iter_rows(values_only=True): text.append(" ".join(str(cell) for cell in row if cell)) except Exception as e: return f"Error reading XLSX: {e}" return "\n".join(text) def extract_text_from_image(image_path): """Extract text from an image using EasyOCR.""" result = reader.readtext(image_path, detail=0) return " ".join(result) # Return text as a single string # ---- MAIN PROCESSING FUNCTIONS ---- def answer_question_from_doc(file, question): """Process document and answer a question based on its content.""" ext = file.name.split(".")[-1].lower() if ext == "pdf": context = extract_text_from_pdf(file.name) elif ext == "docx": context = extract_text_from_docx(file.name) elif ext == "pptx": context = extract_text_from_pptx(file.name) elif ext == "xlsx": context = extract_text_from_xlsx(file.name) else: return "Unsupported file format." if not context.strip(): return "No text found in the document." # Generate answer using AI answer = qa_model(question + " " + context, max_length=100)[0]["generated_text"] return answer def answer_question_from_image(image, question): """Process an image, extract text, and answer a question.""" img_text = extract_text_from_image(image) if not img_text.strip(): return "No readable text found in the image." # Generate answer using AI answer = qa_model(question + " " + img_text, max_length=50)[0]["generated_text"] return answer # ---- GRADIO INTERFACES ---- with gr.Blocks() as doc_interface: gr.Markdown("## Document Question Answering") file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF") question_input = gr.Textbox(label="Ask a question") answer_output = gr.Textbox(label="Answer") file_submit = gr.Button("Get Answer") file_submit.click(answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output) with gr.Blocks() as img_interface: gr.Markdown("## Image Question Answering") image_input = gr.Image(label="Upload an Image") img_question_input = gr.Textbox(label="Ask a question") img_answer_output = gr.Textbox(label="Answer") image_submit = gr.Button("Get Answer") image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output) # ---- MOUNT GRADIO APP ---- demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def home(): return RedirectResponse(url="/")