gavinzli commited on
Commit
beed350
·
1 Parent(s): b68d569

Add handling for DependencyError in PDF extraction and update requirements to include pycryptodome

Browse files
Files changed (2) hide show
  1. controllers/utils.py +1 -1
  2. requirements.txt +1 -0
controllers/utils.py CHANGED
@@ -405,7 +405,7 @@ def extract_from_pdf_by_pattern(url, pattern):
405
  text = text.strip()
406
  extracted_text += text
407
  except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
408
- PyPDF2.errors.PdfReadError) as e:
409
  logging.error(e)
410
  extracted_text = ''
411
  return extracted_text.replace('?\n', '?-\n').replace(
 
405
  text = text.strip()
406
  extracted_text += text
407
  except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
408
+ PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError) as e:
409
  logging.error(e)
410
  extracted_text = ''
411
  return extracted_text.replace('?\n', '?-\n').replace(
requirements.txt CHANGED
@@ -195,3 +195,4 @@ Werkzeug==3.0.3
195
  wrapt==1.16.0
196
  yarl==1.9.4
197
  prefect==2.20.2
 
 
195
  wrapt==1.16.0
196
  yarl==1.9.4
197
  prefect==2.20.2
198
+ pycryptodome==3.21.0