Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 27

Commit

1f136e0

verified ·

1 Parent(s): 2553b67

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -31

app.py CHANGED Viewed

@@ -165,6 +165,7 @@ def extract_text_from_pdf(file_bytes):
         doc = fitz.open(stream=file_bytes, filetype="pdf")
         return "\n".join([page.get_text() for page in doc])
     except Exception as e:
         return f"❌ PDF Error: {str(e)}"
 # ✅ Extract Text from DOCX & PPTX using Tika
@@ -173,6 +174,7 @@ def extract_text_with_tika(file_bytes):
         parsed = parser.from_buffer(file_bytes)
         return parsed["content"]
     except Exception as e:
         return f"❌ Tika Error: {str(e)}"
 # ✅ Extract Text from Excel
@@ -185,6 +187,7 @@ def extract_text_from_excel(file_bytes):
                 text.append(" ".join(str(cell) for cell in row if cell))
         return "\n".join(text)
     except Exception as e:
         return f"❌ Excel Error: {str(e)}"
 # ✅ Truncate Long Text for Model
@@ -193,48 +196,50 @@ def truncate_text(text, max_length=2048):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
-    # ✅ Image Processing (Gradio sends images as NumPy arrays)
-    if isinstance(file, np.ndarray):
-        image = Image.fromarray(file)
-        caption = image_captioning_pipeline(image)[0]['generated_text']
-        response = qa_pipeline(f"Question: {question}\nContext: {caption}")
-        return response[0]["generated_text"]
-    # ✅ Validate File
-    validation_error = validate_file_type(file)
-    if validation_error:
-        return validation_error
-    # ✅ Read File Bytes Properly
     try:
         if hasattr(file, "read"):  # Gradio passes file objects
             file_bytes = file.read()
         elif isinstance(file, bytes):  # Direct bytes input
             file_bytes = file
         else:
-            return "❌ Could not read file content!"
-    except Exception as e:
-        return f"❌ File Read Error: {str(e)}"
-    # ✅ Get File Extension
-    file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
-    # ✅ Extract Text from Supported Documents
-    text = None
-    if file_ext == "pdf":
-        text = extract_text_from_pdf(file_bytes)
-    elif file_ext in ["docx", "pptx"]:
-        text = extract_text_with_tika(file_bytes)
-    elif file_ext == "xlsx":
-        text = extract_text_from_excel(file_bytes)
-    if not text or "❌" in text:
-        return f"⚠️ No text extracted. Error: {text}"
-    truncated_text = truncate_text(text)
-    response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
-    return response[0]["generated_text"]
 # ✅ Gradio Interface (Unified for Images & Documents)
 with gr.Blocks() as demo:

         doc = fitz.open(stream=file_bytes, filetype="pdf")
         return "\n".join([page.get_text() for page in doc])
     except Exception as e:
+        print(f"❌ PDF Extraction Error: {e}")  # Log error
         return f"❌ PDF Error: {str(e)}"
 # ✅ Extract Text from DOCX & PPTX using Tika
         parsed = parser.from_buffer(file_bytes)
         return parsed["content"]
     except Exception as e:
+        print(f"❌ Tika Extraction Error: {e}")  # Log error
         return f"❌ Tika Error: {str(e)}"
 # ✅ Extract Text from Excel
                 text.append(" ".join(str(cell) for cell in row if cell))
         return "\n".join(text)
     except Exception as e:
+        print(f"❌ Excel Extraction Error: {e}")  # Log error
         return f"❌ Excel Error: {str(e)}"
 # ✅ Truncate Long Text for Model
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
     try:
+        # ✅ Image Processing (Gradio sends images as NumPy arrays)
+        if isinstance(file, np.ndarray):
+            image = Image.fromarray(file)
+            caption = image_captioning_pipeline(image)[0]['generated_text']
+            response = qa_pipeline(f"Question: {question}\nContext: {caption}")
+            return response[0]["generated_text"]
+        # ✅ Validate File
+        validation_error = validate_file_type(file)
+        if validation_error:
+            return validation_error
+        # ✅ Read File Bytes Properly
         if hasattr(file, "read"):  # Gradio passes file objects
             file_bytes = file.read()
         elif isinstance(file, bytes):  # Direct bytes input
             file_bytes = file
         else:
+            raise ValueError("Unexpected file type received!")
+        # ✅ Get File Extension
+        file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
+        # ✅ Extract Text from Supported Documents
+        text = None
+        if file_ext == "pdf":
+            text = extract_text_from_pdf(file_bytes)
+        elif file_ext in ["docx", "pptx"]:
+            text = extract_text_with_tika(file_bytes)
+        elif file_ext == "xlsx":
+            text = extract_text_from_excel(file_bytes)
+        if not text or "❌" in text:
+            return f"⚠️ No text extracted. Error: {text}"
+        truncated_text = truncate_text(text)
+        response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
+        return response[0]["generated_text"]
+    except Exception as e:
+        print(f"❌ General Processing Error: {e}")  # Log error
+        return f"❌ Processing Error: {str(e)}"
 # ✅ Gradio Interface (Unified for Images & Documents)
 with gr.Blocks() as demo: