Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -197,7 +197,7 @@ def truncate_text(text, max_length=2048):
|
|
197 |
# β
Answer Questions from Image or Document
|
198 |
def answer_question(file, question: str):
|
199 |
try:
|
200 |
-
# β
Image
|
201 |
if isinstance(file, np.ndarray):
|
202 |
image = Image.fromarray(file)
|
203 |
caption = image_captioning_pipeline(image)[0]['generated_text']
|
@@ -209,26 +209,31 @@ def answer_question(file, question: str):
|
|
209 |
if validation_error:
|
210 |
return validation_error
|
211 |
|
212 |
-
# β
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
file_bytes = file
|
|
|
|
|
|
|
|
|
217 |
else:
|
218 |
-
|
219 |
|
220 |
-
# β
|
221 |
file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
|
222 |
|
223 |
-
# β
Extract Text from Supported Documents
|
224 |
-
text = None
|
225 |
if file_ext == "pdf":
|
226 |
text = extract_text_from_pdf(file_bytes)
|
227 |
elif file_ext in ["docx", "pptx"]:
|
228 |
text = extract_text_with_tika(file_bytes)
|
229 |
elif file_ext == "xlsx":
|
230 |
text = extract_text_from_excel(file_bytes)
|
|
|
|
|
231 |
|
|
|
232 |
if not text or "β" in text:
|
233 |
return f"β οΈ No text extracted. Error: {text}"
|
234 |
|
@@ -238,7 +243,7 @@ def answer_question(file, question: str):
|
|
238 |
return response[0]["generated_text"]
|
239 |
|
240 |
except Exception as e:
|
241 |
-
print(f"β General Processing Error: {e}") # Log error
|
242 |
return f"β Processing Error: {str(e)}"
|
243 |
|
244 |
# β
Gradio Interface (Unified for Images & Documents)
|
|
|
197 |
# β
Answer Questions from Image or Document
|
198 |
def answer_question(file, question: str):
|
199 |
try:
|
200 |
+
# β
Handle Image Files
|
201 |
if isinstance(file, np.ndarray):
|
202 |
image = Image.fromarray(file)
|
203 |
caption = image_captioning_pipeline(image)[0]['generated_text']
|
|
|
209 |
if validation_error:
|
210 |
return validation_error
|
211 |
|
212 |
+
# β
Extract File Bytes Correctly
|
213 |
+
file_bytes = None
|
214 |
+
|
215 |
+
if isinstance(file, bytes):
|
216 |
+
file_bytes = file # Directly received bytes
|
217 |
+
elif hasattr(file, "read"):
|
218 |
+
file_bytes = file.read() # Read bytes from file object
|
219 |
+
elif isinstance(file, str):
|
220 |
+
return "β Error: File received as a string, expected binary data!"
|
221 |
else:
|
222 |
+
return f"β Unexpected file type received! Type: {type(file)}"
|
223 |
|
224 |
+
# β
Extract Text Based on File Extension
|
225 |
file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
|
226 |
|
|
|
|
|
227 |
if file_ext == "pdf":
|
228 |
text = extract_text_from_pdf(file_bytes)
|
229 |
elif file_ext in ["docx", "pptx"]:
|
230 |
text = extract_text_with_tika(file_bytes)
|
231 |
elif file_ext == "xlsx":
|
232 |
text = extract_text_from_excel(file_bytes)
|
233 |
+
else:
|
234 |
+
return f"β Unsupported file format: {file_ext}"
|
235 |
|
236 |
+
# β
Validate Extraction
|
237 |
if not text or "β" in text:
|
238 |
return f"β οΈ No text extracted. Error: {text}"
|
239 |
|
|
|
243 |
return response[0]["generated_text"]
|
244 |
|
245 |
except Exception as e:
|
246 |
+
print(f"β General Processing Error: {e}") # Log error to console
|
247 |
return f"β Processing Error: {str(e)}"
|
248 |
|
249 |
# β
Gradio Interface (Unified for Images & Documents)
|