Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 27

Commit

2553b67

verified ·

1 Parent(s): 669e074

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -20

app.py CHANGED Viewed

@@ -160,31 +160,32 @@ def validate_file_type(file):
     return "❌ Invalid file format!"
 # ✅ Extract Text from PDF
-def extract_text_from_pdf(file):
     try:
-        doc = fitz.open(stream=file, filetype="pdf")
         return "\n".join([page.get_text() for page in doc])
-    except Exception:
-        return None
 # ✅ Extract Text from DOCX & PPTX using Tika
-def extract_text_with_tika(file):
     try:
-        return parser.from_buffer(file)["content"]
-    except Exception:
-        return None
 # ✅ Extract Text from Excel
-def extract_text_from_excel(file):
     try:
-        wb = load_workbook(BytesIO(file), data_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
                 text.append(" ".join(str(cell) for cell in row if cell))
         return "\n".join(text)
-    except Exception:
-        return None
 # ✅ Truncate Long Text for Model
 def truncate_text(text, max_length=2048):
@@ -192,25 +193,33 @@ def truncate_text(text, max_length=2048):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
-    # Image Processing (Gradio sends images as NumPy arrays)
     if isinstance(file, np.ndarray):
         image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
-    # Validate File
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
     # ✅ Read File Bytes Properly
     file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
-    file_bytes = file.read() if hasattr(file, "read") else None
-    if not file_bytes:
-        return "❌ Could not read file content!"
-    # Extract Text from Supported Documents
     text = None
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
@@ -219,8 +228,8 @@ def answer_question(file, question: str):
     elif file_ext == "xlsx":
         text = extract_text_from_excel(file_bytes)
-    if not text:
-        return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")

     return "❌ Invalid file format!"
 # ✅ Extract Text from PDF
+def extract_text_from_pdf(file_bytes):
     try:
+        doc = fitz.open(stream=file_bytes, filetype="pdf")
         return "\n".join([page.get_text() for page in doc])
+    except Exception as e:
+        return f"❌ PDF Error: {str(e)}"
 # ✅ Extract Text from DOCX & PPTX using Tika
+def extract_text_with_tika(file_bytes):
     try:
+        parsed = parser.from_buffer(file_bytes)
+        return parsed["content"]
+    except Exception as e:
+        return f"❌ Tika Error: {str(e)}"
 # ✅ Extract Text from Excel
+def extract_text_from_excel(file_bytes):
     try:
+        wb = load_workbook(BytesIO(file_bytes), data_only=True)
         text = []
         for sheet in wb.worksheets:
             for row in sheet.iter_rows(values_only=True):
                 text.append(" ".join(str(cell) for cell in row if cell))
         return "\n".join(text)
+    except Exception as e:
+        return f"❌ Excel Error: {str(e)}"
 # ✅ Truncate Long Text for Model
 def truncate_text(text, max_length=2048):
 # ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
+    # ✅ Image Processing (Gradio sends images as NumPy arrays)
     if isinstance(file, np.ndarray):
         image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
+    # ✅ Validate File
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
     # ✅ Read File Bytes Properly
+    try:
+        if hasattr(file, "read"):  # Gradio passes file objects
+            file_bytes = file.read()
+        elif isinstance(file, bytes):  # Direct bytes input
+            file_bytes = file
+        else:
+            return "❌ Could not read file content!"
+    except Exception as e:
+        return f"❌ File Read Error: {str(e)}"
+    # ✅ Get File Extension
     file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
+    # ✅ Extract Text from Supported Documents
     text = None
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
     elif file_ext == "xlsx":
         text = extract_text_from_excel(file_bytes)
+    if not text or "❌" in text:
+        return f"⚠️ No text extracted. Error: {text}"
     truncated_text = truncate_text(text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")