ikraamkb commited on
Commit
3403b3e
Β·
verified Β·
1 Parent(s): 384cf9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -132,7 +132,7 @@ from io import BytesIO
132
  from starlette.responses import RedirectResponse
133
  from tika import parser
134
  from openpyxl import load_workbook
135
-
136
  # Initialize Tika for DOCX & PPTX parsing
137
  tika.initVM()
138
 
@@ -197,7 +197,7 @@ def truncate_text(text, max_length=2048):
197
  # βœ… Answer Questions from Image or Document
198
  def answer_question(file, question: str):
199
  try:
200
- # βœ… Handle Image Files
201
  if isinstance(file, np.ndarray):
202
  image = Image.fromarray(file)
203
  caption = image_captioning_pipeline(image)[0]['generated_text']
@@ -209,21 +209,26 @@ def answer_question(file, question: str):
209
  if validation_error:
210
  return validation_error
211
 
212
- # βœ… Extract File Bytes Correctly
213
  file_bytes = None
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- if isinstance(file, bytes):
216
- file_bytes = file # Directly received bytes
217
- elif hasattr(file, "read"):
218
- file_bytes = file.read() # Read bytes from file object
219
- elif isinstance(file, str):
220
- return "❌ Error: File received as a string, expected binary data!"
221
  else:
222
  return f"❌ Unexpected file type received! Type: {type(file)}"
223
 
224
- # βœ… Extract Text Based on File Extension
225
- file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
226
-
227
  if file_ext == "pdf":
228
  text = extract_text_from_pdf(file_bytes)
229
  elif file_ext in ["docx", "pptx"]:
 
132
  from starlette.responses import RedirectResponse
133
  from tika import parser
134
  from openpyxl import load_workbook
135
+ import os
136
  # Initialize Tika for DOCX & PPTX parsing
137
  tika.initVM()
138
 
 
197
  # βœ… Answer Questions from Image or Document
198
  def answer_question(file, question: str):
199
  try:
200
+ # βœ… Handle Image Files (Gradio sends images as NumPy arrays)
201
  if isinstance(file, np.ndarray):
202
  image = Image.fromarray(file)
203
  caption = image_captioning_pipeline(image)[0]['generated_text']
 
209
  if validation_error:
210
  return validation_error
211
 
212
+ # βœ… Determine File Path or Read Bytes
213
  file_bytes = None
214
+ file_ext = None
215
+
216
+ if isinstance(file, str): # Gradio sometimes passes a file path string
217
+ if os.path.exists(file): # If it's a valid file path
218
+ file_ext = file.split(".")[-1].lower()
219
+ with open(file, "rb") as f:
220
+ file_bytes = f.read()
221
+ else:
222
+ return f"❌ Error: File path does not exist! Path: {file}"
223
+
224
+ elif hasattr(file, "read"): # If it's a file-like object
225
+ file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
226
+ file_bytes = file.read()
227
 
 
 
 
 
 
 
228
  else:
229
  return f"❌ Unexpected file type received! Type: {type(file)}"
230
 
231
+ # βœ… Extract Text Based on File Type
 
 
232
  if file_ext == "pdf":
233
  text = extract_text_from_pdf(file_bytes)
234
  elif file_ext in ["docx", "pptx"]: