Spaces:

Ansemin101
/

Markit_v2

Runtime error

AnseMin commited on Mar 17

Commit

e5140fc

1 Parent(s): baf5fd5

removing marker attempt 2

Files changed (4) hide show

README.md CHANGED Viewed

@@ -121,6 +121,7 @@ build:
    - **PyPdfium**: Best for standard PDFs with selectable text
    - **Docling**: Best for complex document layouts
    - **Gemini Flash**: Best for AI-powered conversions (requires API key)
 3. Choose an OCR option based on your selected parser:
    - **None**: No OCR processing (for documents with selectable text)
    - **Tesseract**: Basic OCR using Tesseract

    - **PyPdfium**: Best for standard PDFs with selectable text
    - **Docling**: Best for complex document layouts
    - **Gemini Flash**: Best for AI-powered conversions (requires API key)
+   - **GOT-OCR**: Best for high-quality OCR on images (JPG/PNG only)
 3. Choose an OCR option based on your selected parser:
    - **None**: No OCR processing (for documents with selectable text)
    - **Tesseract**: Basic OCR using Tesseract

build.sh CHANGED Viewed

@@ -80,10 +80,7 @@ echo "Google Gemini API client installed successfully"
 # Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
-# Use compatible versions to avoid conflicts
-pip install -q -U torch==2.0.1 torchvision==0.15.2
-pip install -q -U transformers==4.37.2
-pip install -q -U tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
 # Install Python dependencies

 # Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
+pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.47.0 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
 # Install Python dependencies

requirements.txt CHANGED Viewed

@@ -25,7 +25,7 @@ pydantic==2.7.1
 # GOT-OCR dependencies
 torch>=2.0.1
 torchvision>=0.15.2
-transformers==4.37.2  # Pin to a compatible version for GOT-OCR
 tiktoken>=0.6.0
 verovio>=4.3.1
 accelerate>=0.28.0

 # GOT-OCR dependencies
 torch>=2.0.1
 torchvision>=0.15.2
+transformers>=4.37.2,<4.48.0  # Pin to a compatible version for GOT-OCR
 tiktoken>=0.6.0
 verovio>=4.3.1
 accelerate>=0.28.0

setup.sh CHANGED Viewed

@@ -15,19 +15,12 @@ fi
 # Install Python dependencies
 echo "Installing Python dependencies..."
 pip install -q -U pytesseract pillow opencv-python-headless pdf2image
-echo "Python dependencies installed successfully"
-# Install Gemini API client
-echo "Installing Google Gemini API client..."
 pip install -q -U google-genai
-echo "Google Gemini API client installed successfully"
 # Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
-# Use compatible versions to avoid conflicts
-pip install -q -U torch==2.0.1 torchvision==0.15.2
-pip install -q -U transformers==4.37.2
-pip install -q -U tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
 # Install tesserocr with pip

 # Install Python dependencies
 echo "Installing Python dependencies..."
 pip install -q -U pytesseract pillow opencv-python-headless pdf2image
 pip install -q -U google-genai
+echo "Python dependencies installed successfully"
 # Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
+pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.47.0 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
 # Install tesserocr with pip