Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 28

Commit

93ae425

verified ·

1 Parent(s): 1cd6a53

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -34

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-import uvicorn
 import numpy as np
 import fitz  # PyMuPDF
 import tika
@@ -12,7 +11,7 @@ from starlette.responses import RedirectResponse
 from tika import parser
 from openpyxl import load_workbook
-# Initialize Tika for DOCX & PPTX parsing
 tika.initVM()
 # Initialize FastAPI
@@ -21,14 +20,12 @@ app = FastAPI()
 # Load models
 device = "cuda" if torch.cuda.is_available() else "cpu"
 qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
-image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 # ✅ Function to Validate File Type
 def validate_file_type(file):
-    if isinstance(file, str):  # Text-based input (NamedString)
-        return None
     if hasattr(file, "name"):
         ext = file.name.split(".")[-1].lower()
         if ext not in ALLOWED_EXTENSIONS:
@@ -37,17 +34,17 @@ def validate_file_type(file):
     return "❌ Invalid file format!"
 # ✅ Extract Text from PDF
-def extract_text_from_pdf(pdf_bytes):
-    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-    return "\n".join([page.get_text() for page in doc])
 # ✅ Extract Text from DOCX & PPTX using Tika
-def extract_text_with_tika(file_bytes):
-    return parser.from_buffer(file_bytes)["content"]
 # ✅ Extract Text from Excel
-def extract_text_from_excel(file_bytes):
-    wb = load_workbook(BytesIO(file_bytes), data_only=True)
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
@@ -60,30 +57,25 @@ def truncate_text(text, max_length=2048):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
-    # Image Processing (Gradio sends images as NumPy arrays)
-    if isinstance(file, np.ndarray):
         image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
-    # Validate File
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
-    file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
-    file_bytes = file.read() if hasattr(file, "read") else None
-    if not file_bytes:
-        return "❌ Could not read file content!"
     # Extract Text from Supported Documents
     if file_ext == "pdf":
-        text = extract_text_from_pdf(file_bytes)
     elif file_ext in ["docx", "pptx"]:
-        text = extract_text_with_tika(file_bytes)
     elif file_ext == "xlsx":
-        text = extract_text_from_excel(file_bytes)
     else:
         return "❌ Unsupported file format!"
@@ -95,27 +87,23 @@ def answer_question(file, question: str):
     return response[0]["generated_text"]
-# ✅ Gradio Interface (Unified for Images & Documents)
 with gr.Blocks() as demo:
     gr.Markdown("## 📄 AI-Powered Document & Image QA")
     with gr.Row():
-        file_input = gr.File(label="Upload Document / Image")
-        question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
     answer_output = gr.Textbox(label="Answer")
     submit_btn = gr.Button("Get Answer")
     submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
 # ✅ Mount Gradio with FastAPI
-app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
-    return RedirectResponse(url="/")
-# ✅ Run FastAPI + Gradio
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import gradio as gr
 import numpy as np
 import fitz  # PyMuPDF
 import tika
 from tika import parser
 from openpyxl import load_workbook
+# Initialize Tika for DOCX & PPTX parsing (Ensure Java is installed)
 tika.initVM()
 # Initialize FastAPI
 # Load models
 device = "cuda" if torch.cuda.is_available() else "cpu"
 qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
+image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 # ✅ Function to Validate File Type
 def validate_file_type(file):
     if hasattr(file, "name"):
         ext = file.name.split(".")[-1].lower()
         if ext not in ALLOWED_EXTENSIONS:
     return "❌ Invalid file format!"
 # ✅ Extract Text from PDF
+def extract_text_from_pdf(file):
+    with fitz.open(file.name) as doc:
+        return "\n".join([page.get_text() for page in doc])
 # ✅ Extract Text from DOCX & PPTX using Tika
+def extract_text_with_tika(file):
+    return parser.from_file(file.name)["content"]
 # ✅ Extract Text from Excel
+def extract_text_from_excel(file):
+    wb = load_workbook(file.name, data_only=True)
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
+    if isinstance(file, np.ndarray):  # Image Processing
         image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
+    file_ext = file.name.split(".")[-1].lower()
     # Extract Text from Supported Documents
     if file_ext == "pdf":
+        text = extract_text_from_pdf(file)
     elif file_ext in ["docx", "pptx"]:
+        text = extract_text_with_tika(file)
     elif file_ext == "xlsx":
+        text = extract_text_from_excel(file)
     else:
         return "❌ Unsupported file format!"
     return response[0]["generated_text"]
+# ✅ Gradio Interface (Separate File & Image Inputs)
 with gr.Blocks() as demo:
     gr.Markdown("## 📄 AI-Powered Document & Image QA")
     with gr.Row():
+        file_input = gr.File(label="Upload Document")
+        image_input = gr.Image(label="Upload Image")
+    question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
     answer_output = gr.Textbox(label="Answer")
     submit_btn = gr.Button("Get Answer")
     submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
 # ✅ Mount Gradio with FastAPI
+app = gr.mount_gradio_app(app, demo, path="/demo")
 @app.get("/")
 def home():
+    return RedirectResponse(url="/demo")