"""import gradio as gr import uvicorn import numpy as np import fitz # PyMuPDF import tika import torch from fastapi import FastAPI from transformers import pipeline from PIL import Image from io import BytesIO from starlette.responses import RedirectResponse from tika import parser from openpyxl import load_workbook # Initialize Tika for DOCX & PPTX parsing tika.initVM() # Initialize FastAPI app = FastAPI() # Load models device = "cuda" if torch.cuda.is_available() else "cpu" qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device) image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"} # ✅ Function to Validate File Type def validate_file_type(file): if isinstance(file, str): # Text-based input (NamedString) return None if hasattr(file, "name"): ext = file.name.split(".")[-1].lower() if ext not in ALLOWED_EXTENSIONS: return f"❌ Unsupported file format: {ext}" return None return "❌ Invalid file format!" # ✅ Extract Text from PDF def extract_text_from_pdf(pdf_bytes): doc = fitz.open(stream=pdf_bytes, filetype="pdf") return "\n".join([page.get_text() for page in doc]) # ✅ Extract Text from DOCX & PPTX using Tika def extract_text_with_tika(file_bytes): return parser.from_buffer(file_bytes)["content"] # ✅ Extract Text from Excel def extract_text_from_excel(file_bytes): wb = load_workbook(BytesIO(file_bytes), data_only=True) text = [] for sheet in wb.worksheets: for row in sheet.iter_rows(values_only=True): text.append(" ".join(str(cell) for cell in row if cell)) return "\n".join(text) # ✅ Truncate Long Text for Model def truncate_text(text, max_length=2048): return text[:max_length] if len(text) > max_length else text # ✅ Answer Questions from Image or Document def answer_question(file, question: str): # Image Processing (Gradio sends images as NumPy arrays) if isinstance(file, np.ndarray): image = Image.fromarray(file) caption = image_captioning_pipeline(image)[0]['generated_text'] response = qa_pipeline(f"Question: {question}\nContext: {caption}") return response[0]["generated_text"] # Validate File validation_error = validate_file_type(file) if validation_error: return validation_error file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None file_bytes = file.read() if hasattr(file, "read") else None if not file_bytes: return "❌ Could not read file content!" # Extract Text from Supported Documents if file_ext == "pdf": text = extract_text_from_pdf(file_bytes) elif file_ext in ["docx", "pptx"]: text = extract_text_with_tika(file_bytes) elif file_ext == "xlsx": text = extract_text_from_excel(file_bytes) else: return "❌ Unsupported file format!" if not text: return "⚠️ No text extracted from the document." truncated_text = truncate_text(text) response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}") return response[0]["generated_text"] # ✅ Gradio Interface (Unified for Images & Documents) with gr.Blocks() as demo: gr.Markdown("## 📄 AI-Powered Document & Image QA") with gr.Row(): file_input = gr.File(label="Upload Document / Image") question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?") answer_output = gr.Textbox(label="Answer") submit_btn = gr.Button("Get Answer") submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output) # ✅ Mount Gradio with FastAPI app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def home(): return RedirectResponse(url="/") # ✅ Run FastAPI + Gradio if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860) """ import gradio as gr import uvicorn import numpy as np import fitz # PyMuPDF import tika import torch from fastapi import FastAPI from transformers import pipeline from PIL import Image from io import BytesIO from starlette.responses import RedirectResponse from tika import parser from openpyxl import load_workbook # Initialize Tika for DOCX & PPTX parsing tika.initVM() # Initialize FastAPI app = FastAPI() # Load models device = "cuda" if torch.cuda.is_available() else "cpu" qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device) image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"} # ✅ Function to Validate File Type def validate_file_type(file): if file is None: return "❌ No file uploaded!" if isinstance(file, str): # Text-based input (NamedString) return None if hasattr(file, "name"): ext = file.name.split(".")[-1].lower() if ext not in ALLOWED_EXTENSIONS: return f"❌ Unsupported file format: {ext}" return None return "❌ Invalid file format!" # ✅ Extract Text from PDF def extract_text_from_pdf(file): try: doc = fitz.open(stream=file, filetype="pdf") return "\n".join([page.get_text() for page in doc]) except Exception: return None # ✅ Extract Text from DOCX & PPTX using Tika def extract_text_with_tika(file): try: return parser.from_buffer(file)["content"] except Exception: return None # ✅ Extract Text from Excel def extract_text_from_excel(file): try: wb = load_workbook(BytesIO(file), data_only=True) text = [] for sheet in wb.worksheets: for row in sheet.iter_rows(values_only=True): text.append(" ".join(str(cell) for cell in row if cell)) return "\n".join(text) except Exception: return None # ✅ Truncate Long Text for Model def truncate_text(text, max_length=2048): return text[:max_length] if len(text) > max_length else text # ✅ Answer Questions from Image or Document def answer_question(file, question: str): # Image Processing (Gradio sends images as NumPy arrays) if isinstance(file, np.ndarray): image = Image.fromarray(file) caption = image_captioning_pipeline(image)[0]['generated_text'] response = qa_pipeline(f"Question: {question}\nContext: {caption}") return response[0]["generated_text"] # Validate File validation_error = validate_file_type(file) if validation_error: return validation_error # ✅ Read File Bytes Properly file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None file_bytes = file.read() if hasattr(file, "read") else None if not file_bytes: return "❌ Could not read file content!" # Extract Text from Supported Documents text = None if file_ext == "pdf": text = extract_text_from_pdf(file_bytes) elif file_ext in ["docx", "pptx"]: text = extract_text_with_tika(file_bytes) elif file_ext == "xlsx": text = extract_text_from_excel(file_bytes) if not text: return "⚠️ No text extracted from the document." truncated_text = truncate_text(text) response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}") return response[0]["generated_text"] # ✅ Gradio Interface (Unified for Images & Documents) with gr.Blocks() as demo: gr.Markdown("## 📄 AI-Powered Document & Image QA") with gr.Row(): file_input = gr.File(label="Upload Document / Image") question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?") answer_output = gr.Textbox(label="Answer") submit_btn = gr.Button("Get Answer") submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output) # ✅ Mount Gradio with FastAPI app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def home(): return RedirectResponse(url="/") # ✅ Run FastAPI + Gradio if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)