Update ocr.py
Browse files
ocr.py
CHANGED
@@ -10,6 +10,15 @@ Date: 2024-11-23
|
|
10 |
import os
|
11 |
os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import pytesseract
|
14 |
from pdf2image import convert_from_path
|
15 |
from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
|
|
|
10 |
import os
|
11 |
os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
|
12 |
|
13 |
+
# Check Ghostscript installation
|
14 |
+
gs_path = "/usr/bin/gs" # Default Ghostscript location on Ubuntu
|
15 |
+
|
16 |
+
if not os.path.exists(gs_path):
|
17 |
+
raise FileNotFoundError(f"Ghostscript not found at {gs_path}")
|
18 |
+
|
19 |
+
# Set Ghostscript path explicitly
|
20 |
+
os.environ["OCRMYPDF_GS"] = gs_path
|
21 |
+
|
22 |
import pytesseract
|
23 |
from pdf2image import convert_from_path
|
24 |
from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
|