Spaces:

Ansemin101
/

Markit_v2

Runtime error

AnseMin commited on Mar 17

Commit

98f25ae

1 Parent(s): 5367fe1

resolving compatiability issue with docling and GOT OCR

Files changed (4) hide show

app.py CHANGED Viewed

@@ -134,5 +134,26 @@ except ModuleNotFoundError:
 # Call setup function at import time
 setup_tesseract()
 if __name__ == "__main__":
     main()

 # Call setup function at import time
 setup_tesseract()
+# Add this near the top of app.py after imports
+# Handle potential import conflicts
+try:
+    import transformers
+    print(f"Transformers version: {transformers.__version__}")
+except ImportError:
+    print("Warning: Transformers not installed or not working")
+try:
+    import torch
+    print(f"Torch version: {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+except ImportError:
+    print("Warning: PyTorch not installed or not working")
+try:
+    import docling
+    print(f"Docling version: {docling.__version__ if hasattr(docling, '__version__') else 'unknown'}")
+except ImportError:
+    print("Warning: Docling not installed or not working")
 if __name__ == "__main__":
     main()

build.sh CHANGED Viewed

@@ -78,17 +78,20 @@ echo "Installing Google Gemini API client..."
 pip install -q -U google-genai
 echo "Google Gemini API client installed successfully"
-# Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
-# Install transformers and other dependencies first
-pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
-# Install torch and torchvision separately with --no-deps to avoid conflicts
 pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
 echo "GOT-OCR dependencies installed successfully"
-# Install Python dependencies
-echo "Installing Python dependencies..."
-pip install -e .
 # Create .env file if it doesn't exist
 if [ ! -f .env ]; then

 pip install -q -U google-genai
 echo "Google Gemini API client installed successfully"
+# Install GOT-OCR dependencies first
 echo "Installing GOT-OCR dependencies..."
 pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
+pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
+# Install docling separately with --no-deps to avoid conflicts
+echo "Installing docling..."
+pip install -q -U docling==2.25.0 --no-deps
+echo "Docling installed successfully"
+# Install remaining Python dependencies
+echo "Installing remaining Python dependencies..."
+pip install -e . --no-deps
 # Create .env file if it doesn't exist
 if [ ! -f .env ]; then

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-docling==2.25.0
 gradio==5.14.0
 grpcio-status==1.70.0
 markdown==3.7
@@ -9,28 +9,35 @@ pytesseract==0.3.13
 semchunk==2.2.2
 Pillow>=9.0.0
 numpy>=1.21.0
 # Tesseract dependencies
 tesseract==0.1.3
 tesserocr>=2.5.0; platform_system != "Windows"  # Only install on non-Windows systems
 # Additional dependencies for image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments
 pdf2image>=1.16.0  # For PDF processing
 dill==0.3.8  # Downgraded to be compatible with datasets
 # Gemini API client
 google-genai>=0.1.0
 # Environment variables
 python-dotenv>=1.0.0
 # Pin pydantic to resolve compatibility issues with gradio
 pydantic==2.7.1
-# GOT-OCR dependencies - pinned versions to avoid conflicts
-transformers==4.37.2  # Exact version for GOT-OCR2
-tiktoken==0.6.0
-verovio==4.3.1
-accelerate==0.28.0
-safetensors>=0.4.0
 packaging>=21.0  # For version comparison
-# Torch dependencies - install separately to avoid conflicts
-# torch==2.0.1
-# torchvision==0.15.2

+# Core dependencies
 gradio==5.14.0
 grpcio-status==1.70.0
 markdown==3.7
 semchunk==2.2.2
 Pillow>=9.0.0
 numpy>=1.21.0
 # Tesseract dependencies
 tesseract==0.1.3
 tesserocr>=2.5.0; platform_system != "Windows"  # Only install on non-Windows systems
 # Additional dependencies for image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments
 pdf2image>=1.16.0  # For PDF processing
 dill==0.3.8  # Downgraded to be compatible with datasets
 # Gemini API client
 google-genai>=0.1.0
 # Environment variables
 python-dotenv>=1.0.0
 # Pin pydantic to resolve compatibility issues with gradio
 pydantic==2.7.1
+# Common dependencies - not pinned to allow resolution
 packaging>=21.0  # For version comparison
+safetensors>=0.4.0
+# Note: The following packages will be installed separately in setup.sh and build.sh
+# to avoid dependency conflicts:
+# - docling
+# - transformers
+# - torch
+# - torchvision
+# - tiktoken
+# - verovio
+# - accelerate

setup.sh CHANGED Viewed

@@ -18,14 +18,17 @@ pip install -q -U pytesseract pillow opencv-python-headless pdf2image
 pip install -q -U google-genai
 echo "Python dependencies installed successfully"
-# Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
-# Install transformers and other dependencies first
-pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
-# Install torch and torchvision separately with --no-deps to avoid conflicts
 pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
 echo "GOT-OCR dependencies installed successfully"
 # Install tesserocr with pip
 echo "Installing tesserocr..."
 pip install -q -U tesserocr || echo "Failed to install tesserocr with pip, trying with specific compiler flags..."

 pip install -q -U google-genai
 echo "Python dependencies installed successfully"
+# Install GOT-OCR dependencies first
 echo "Installing GOT-OCR dependencies..."
 pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
+pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
+# Install docling separately with --no-deps to avoid conflicts
+echo "Installing docling..."
+pip install -q -U docling==2.25.0 --no-deps
+echo "Docling installed successfully"
 # Install tesserocr with pip
 echo "Installing tesserocr..."
 pip install -q -U tesserocr || echo "Failed to install tesserocr with pip, trying with specific compiler flags..."