Didier commited on
Commit
7315ce3
·
verified ·
1 Parent(s): 0ee2031

Update ocr.py

Browse files
Files changed (1) hide show
  1. ocr.py +9 -0
ocr.py CHANGED
@@ -10,6 +10,15 @@ Date: 2024-11-23
10
  import os
11
  os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
12
 
 
 
 
 
 
 
 
 
 
13
  import pytesseract
14
  from pdf2image import convert_from_path
15
  from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
 
10
  import os
11
  os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
12
 
13
+ # Check Ghostscript installation
14
+ gs_path = "/usr/bin/gs" # Default Ghostscript location on Ubuntu
15
+
16
+ if not os.path.exists(gs_path):
17
+ raise FileNotFoundError(f"Ghostscript not found at {gs_path}")
18
+
19
+ # Set Ghostscript path explicitly
20
+ os.environ["OCRMYPDF_GS"] = gs_path
21
+
22
  import pytesseract
23
  from pdf2image import convert_from_path
24
  from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError