Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 24

Commit

28de64c

verified ·

1 Parent(s): 6bf4ee9

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -46

app.py CHANGED Viewed

@@ -3,11 +3,8 @@ import fitz  # PyMuPDF for PDF parsing
 from tika import parser  # Apache Tika for document parsing
 import openpyxl
 from pptx import Presentation
-import torch
-from torchvision import transforms
-from torchvision.models.detection import fasterrcnn_resnet50_fpn
 from PIL import Image
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 from fastapi.responses import RedirectResponse
 import numpy as np
@@ -20,62 +17,49 @@ print(f"🔄 Loading models")
 doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-print("models loaded")
 # Initialize OCR Model (Lazy Load)
 reader = easyocr.Reader(["en"], gpu=True)
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
-def validate_file_type(file):
-    ext = file.name.split(".")[-1].lower()
     print(f"🔍 Validating file type: {ext}")
     if ext not in ALLOWED_EXTENSIONS:
         return f"❌ Unsupported file format: {ext}"
     return None
-# Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
     truncated = " ".join(words[:max_tokens])
     print(f"✂️ Truncated text to {max_tokens} tokens.")
     return truncated
-# Document Text Extraction Functions
-def extract_text_from_pdf(pdf_file):
     try:
-        print("📄 Extracting text from PDF...")
-        doc = fitz.open(pdf_file)
         text = "\n".join([page.get_text("text") for page in doc])
         return text if text else "⚠️ No text found."
     except Exception as e:
         return f"❌ Error reading PDF: {str(e)}"
-def extract_text_with_tika(file):
     try:
         print("📝 Extracting text with Tika...")
-        parsed = parser.from_buffer(file)
         return parsed.get("content", "⚠️ No text found.").strip()
     except Exception as e:
         return f"❌ Error reading document: {str(e)}"
-def extract_text_from_pptx(pptx_file):
-    try:
-        print("📊 Extracting text from PPTX...")
-        ppt = Presentation(pptx_file)
-        text = []
-        for slide in ppt.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text.append(shape.text)
-        return "\n".join(text) if text else "⚠️ No text found."
-    except Exception as e:
-        return f"❌ Error reading PPTX: {str(e)}"
-def extract_text_from_excel(excel_file):
     try:
-        print("📊 Extracting text from Excel...")
-        wb = openpyxl.load_workbook(excel_file, read_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
@@ -84,13 +68,14 @@ def extract_text_from_excel(excel_file):
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
-def answer_question_from_document(file, question):
     print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
-    file_ext = file.name.split(".")[-1].lower()
     if file_ext == "pdf":
         text = extract_text_from_pdf(file)
     elif file_ext in ["docx", "pptx"]:
@@ -109,25 +94,22 @@ def answer_question_from_document(file, question):
     return response[0]["generated_text"]
-def answer_question_from_image(image, question):
     try:
-        print("🖼️ Converting image for processing...")
-        if isinstance(image, np.ndarray):  # If it's a NumPy array from Gradio
-            image = Image.fromarray(image)  # Convert to PIL Image
-        print("🖼️ Generating caption for image...")
         caption = image_captioning_pipeline(image)[0]['generated_text']
         print("🤖 Answering question based on caption...")
         response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
     except Exception as e:
         return f"❌ Error processing image: {str(e)}"
-# Gradio UI for Document & Image QA
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
     inputs=[gr.File(label="📂 Upload Document"), gr.Textbox(label="💬 Ask a Question")],
@@ -137,15 +119,18 @@ doc_interface = gr.Interface(
 img_interface = gr.Interface(
     fn=answer_question_from_image,
-    inputs=[gr.Image(label="🖼️ Upload Image"), gr.Textbox(label="💬 Ask a Question")],
     outputs="text",
-    title="🖼️ AI Image Question Answering"
 )
-# Mount Gradio Interfaces
-demo = gr.TabbedInterface([doc_interface, img_interface], ["📄 Document QA", "🖼️ Image QA"])
-app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
     return RedirectResponse(url="/")

 from tika import parser  # Apache Tika for document parsing
 import openpyxl
 from pptx import Presentation
 from PIL import Image
+from transformers import pipeline
 import gradio as gr
 from fastapi.responses import RedirectResponse
 import numpy as np
 doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+print("Models loaded")
 # Initialize OCR Model (Lazy Load)
 reader = easyocr.Reader(["en"], gpu=True)
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
+def validate_file_type(file: UploadFile):
+    ext = file.filename.split(".")[-1].lower()
     print(f"🔍 Validating file type: {ext}")
     if ext not in ALLOWED_EXTENSIONS:
         return f"❌ Unsupported file format: {ext}"
     return None
 def truncate_text(text, max_tokens=450):
     words = text.split()
     truncated = " ".join(words[:max_tokens])
     print(f"✂️ Truncated text to {max_tokens} tokens.")
     return truncated
+def extract_text_from_pdf(pdf_file: UploadFile):
     try:
+        print("📝 Extracting text from PDF...")
+        pdf_bytes = pdf_file.file.read()
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
         text = "\n".join([page.get_text("text") for page in doc])
         return text if text else "⚠️ No text found."
     except Exception as e:
         return f"❌ Error reading PDF: {str(e)}"
+def extract_text_with_tika(file: UploadFile):
     try:
         print("📝 Extracting text with Tika...")
+        parsed = parser.from_buffer(file.file.read())
         return parsed.get("content", "⚠️ No text found.").strip()
     except Exception as e:
         return f"❌ Error reading document: {str(e)}"
+def extract_text_from_excel(excel_file: UploadFile):
     try:
+        print("📝 Extracting text from Excel...")
+        wb = openpyxl.load_workbook(excel_file.file, read_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
+def answer_question_from_document(file: UploadFile, question: str):
     print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
+    file_ext = file.filename.split(".")[-1].lower()
     if file_ext == "pdf":
         text = extract_text_from_pdf(file)
     elif file_ext in ["docx", "pptx"]:
     return response[0]["generated_text"]
+def answer_question_from_image(image, question: str):
     try:
+        print("🎨 Converting image for processing...")
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+        print("🎨 Generating caption for image...")
         caption = image_captioning_pipeline(image)[0]['generated_text']
         print("🤖 Answering question based on caption...")
         response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
     except Exception as e:
         return f"❌ Error processing image: {str(e)}"
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
     inputs=[gr.File(label="📂 Upload Document"), gr.Textbox(label="💬 Ask a Question")],
 img_interface = gr.Interface(
     fn=answer_question_from_image,
+    inputs=[gr.Image(label="🎨 Upload Image"), gr.Textbox(label="💬 Ask a Question")],
     outputs="text",
+    title="🎨 AI Image Question Answering"
 )
+demo = gr.TabbedInterface([doc_interface, img_interface], ["📄 Document QA", "🎨 Image QA"])
 @app.get("/")
 def home():
     return RedirectResponse(url="/")
+if __name__ == "__main__":
+    demo.launch()
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)