ikraamkb commited on
Commit
171f476
Β·
verified Β·
1 Parent(s): 1f136e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -197,7 +197,7 @@ def truncate_text(text, max_length=2048):
197
  # βœ… Answer Questions from Image or Document
198
  def answer_question(file, question: str):
199
  try:
200
- # βœ… Image Processing (Gradio sends images as NumPy arrays)
201
  if isinstance(file, np.ndarray):
202
  image = Image.fromarray(file)
203
  caption = image_captioning_pipeline(image)[0]['generated_text']
@@ -209,26 +209,31 @@ def answer_question(file, question: str):
209
  if validation_error:
210
  return validation_error
211
 
212
- # βœ… Read File Bytes Properly
213
- if hasattr(file, "read"): # Gradio passes file objects
214
- file_bytes = file.read()
215
- elif isinstance(file, bytes): # Direct bytes input
216
- file_bytes = file
 
 
 
 
217
  else:
218
- raise ValueError("Unexpected file type received!")
219
 
220
- # βœ… Get File Extension
221
  file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
222
 
223
- # βœ… Extract Text from Supported Documents
224
- text = None
225
  if file_ext == "pdf":
226
  text = extract_text_from_pdf(file_bytes)
227
  elif file_ext in ["docx", "pptx"]:
228
  text = extract_text_with_tika(file_bytes)
229
  elif file_ext == "xlsx":
230
  text = extract_text_from_excel(file_bytes)
 
 
231
 
 
232
  if not text or "❌" in text:
233
  return f"⚠️ No text extracted. Error: {text}"
234
 
@@ -238,7 +243,7 @@ def answer_question(file, question: str):
238
  return response[0]["generated_text"]
239
 
240
  except Exception as e:
241
- print(f"❌ General Processing Error: {e}") # Log error
242
  return f"❌ Processing Error: {str(e)}"
243
 
244
  # βœ… Gradio Interface (Unified for Images & Documents)
 
197
  # βœ… Answer Questions from Image or Document
198
  def answer_question(file, question: str):
199
  try:
200
+ # βœ… Handle Image Files
201
  if isinstance(file, np.ndarray):
202
  image = Image.fromarray(file)
203
  caption = image_captioning_pipeline(image)[0]['generated_text']
 
209
  if validation_error:
210
  return validation_error
211
 
212
+ # βœ… Extract File Bytes Correctly
213
+ file_bytes = None
214
+
215
+ if isinstance(file, bytes):
216
+ file_bytes = file # Directly received bytes
217
+ elif hasattr(file, "read"):
218
+ file_bytes = file.read() # Read bytes from file object
219
+ elif isinstance(file, str):
220
+ return "❌ Error: File received as a string, expected binary data!"
221
  else:
222
+ return f"❌ Unexpected file type received! Type: {type(file)}"
223
 
224
+ # βœ… Extract Text Based on File Extension
225
  file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
226
 
 
 
227
  if file_ext == "pdf":
228
  text = extract_text_from_pdf(file_bytes)
229
  elif file_ext in ["docx", "pptx"]:
230
  text = extract_text_with_tika(file_bytes)
231
  elif file_ext == "xlsx":
232
  text = extract_text_from_excel(file_bytes)
233
+ else:
234
+ return f"❌ Unsupported file format: {file_ext}"
235
 
236
+ # βœ… Validate Extraction
237
  if not text or "❌" in text:
238
  return f"⚠️ No text extracted. Error: {text}"
239
 
 
243
  return response[0]["generated_text"]
244
 
245
  except Exception as e:
246
+ print(f"❌ General Processing Error: {e}") # Log error to console
247
  return f"❌ Processing Error: {str(e)}"
248
 
249
  # βœ… Gradio Interface (Unified for Images & Documents)