Add handling for DependencyError in PDF extraction and update requirements to include pycryptodome
Browse files- controllers/utils.py +1 -1
- requirements.txt +1 -0
controllers/utils.py
CHANGED
@@ -405,7 +405,7 @@ def extract_from_pdf_by_pattern(url, pattern):
|
|
405 |
text = text.strip()
|
406 |
extracted_text += text
|
407 |
except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
|
408 |
-
PyPDF2.errors.PdfReadError) as e:
|
409 |
logging.error(e)
|
410 |
extracted_text = ''
|
411 |
return extracted_text.replace('?\n', '?-\n').replace(
|
|
|
405 |
text = text.strip()
|
406 |
extracted_text += text
|
407 |
except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
|
408 |
+
PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError) as e:
|
409 |
logging.error(e)
|
410 |
extracted_text = ''
|
411 |
return extracted_text.replace('?\n', '?-\n').replace(
|
requirements.txt
CHANGED
@@ -195,3 +195,4 @@ Werkzeug==3.0.3
|
|
195 |
wrapt==1.16.0
|
196 |
yarl==1.9.4
|
197 |
prefect==2.20.2
|
|
|
|
195 |
wrapt==1.16.0
|
196 |
yarl==1.9.4
|
197 |
prefect==2.20.2
|
198 |
+
pycryptodome==3.21.0
|