Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 27

Commit

3403b3e

verified ·

1 Parent(s): 384cf9d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -12

app.py CHANGED Viewed

@@ -132,7 +132,7 @@ from io import BytesIO
 from starlette.responses import RedirectResponse
 from tika import parser
 from openpyxl import load_workbook
 # Initialize Tika for DOCX & PPTX parsing
 tika.initVM()
@@ -197,7 +197,7 @@ def truncate_text(text, max_length=2048):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
     try:
-        # ✅ Handle Image Files
         if isinstance(file, np.ndarray):
             image = Image.fromarray(file)
             caption = image_captioning_pipeline(image)[0]['generated_text']
@@ -209,21 +209,26 @@ def answer_question(file, question: str):
         if validation_error:
             return validation_error
-        # ✅ Extract File Bytes Correctly
         file_bytes = None
-        if isinstance(file, bytes):
-            file_bytes = file  # Directly received bytes
-        elif hasattr(file, "read"):
-            file_bytes = file.read()  # Read bytes from file object
-        elif isinstance(file, str):
-            return "❌ Error: File received as a string, expected binary data!"
         else:
             return f"❌ Unexpected file type received! Type: {type(file)}"
-        # ✅ Extract Text Based on File Extension
-        file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
         if file_ext == "pdf":
             text = extract_text_from_pdf(file_bytes)
         elif file_ext in ["docx", "pptx"]:

 from starlette.responses import RedirectResponse
 from tika import parser
 from openpyxl import load_workbook
+import os
 # Initialize Tika for DOCX & PPTX parsing
 tika.initVM()
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
     try:
+        # ✅ Handle Image Files (Gradio sends images as NumPy arrays)
         if isinstance(file, np.ndarray):
             image = Image.fromarray(file)
             caption = image_captioning_pipeline(image)[0]['generated_text']
         if validation_error:
             return validation_error
+        # ✅ Determine File Path or Read Bytes
         file_bytes = None
+        file_ext = None
+        if isinstance(file, str):  # Gradio sometimes passes a file path string
+            if os.path.exists(file):  # If it's a valid file path
+                file_ext = file.split(".")[-1].lower()
+                with open(file, "rb") as f:
+                    file_bytes = f.read()
+            else:
+                return f"❌ Error: File path does not exist! Path: {file}"
+        elif hasattr(file, "read"):  # If it's a file-like object
+            file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
+            file_bytes = file.read()
         else:
             return f"❌ Unexpected file type received! Type: {type(file)}"
+        # ✅ Extract Text Based on File Type
         if file_ext == "pdf":
             text = extract_text_from_pdf(file_bytes)
         elif file_ext in ["docx", "pptx"]: