AnseMin commited on
Commit
4523ddf
·
1 Parent(s): 17cb3f3

Error: Error processing document with GOT-OCR (ZeroGPU): Numpy is not available

Browse files
Files changed (4) hide show
  1. .gitignore +2 -2
  2. requirements.txt +1 -4
  3. setup.sh +0 -5
  4. src/parsers/got_ocr_parser.py +6 -89
.gitignore CHANGED
@@ -79,8 +79,8 @@ requirement.txt
79
  test_gemini_parser.py
80
 
81
  # Ignore documents folder
82
- # /documents/
83
- # /documents/*
84
 
85
  # Ignore tessdata folder
86
  /tessdata/
 
79
  test_gemini_parser.py
80
 
81
  # Ignore documents folder
82
+ /documents/
83
+ /documents/*
84
 
85
  # Ignore tessdata folder
86
  /tessdata/
requirements.txt CHANGED
@@ -24,7 +24,4 @@ transformers==4.37.2 # Pin to a specific version that works with safetensors 0.
24
  tiktoken==0.6.0
25
  verovio==4.3.1
26
  accelerate==0.28.0
27
- safetensors==0.4.3 # Updated to meet minimum version required by accelerate
28
-
29
- # ZeroGPU support for HuggingFace Spaces
30
- spaces>=0.19.1
 
24
  tiktoken==0.6.0
25
  verovio==4.3.1
26
  accelerate==0.28.0
27
+ safetensors==0.4.3 # Updated to meet minimum version required by accelerate
 
 
 
setup.sh CHANGED
@@ -34,11 +34,6 @@ echo "Installing GOT-OCR dependencies..."
34
  pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.3
35
  echo "GOT-OCR dependencies installed successfully"
36
 
37
- # Install ZeroGPU support
38
- echo "Installing ZeroGPU support..."
39
- pip install -q -U spaces>=0.19.1
40
- echo "ZeroGPU support installed successfully"
41
-
42
  # Install the project in development mode
43
  echo "Installing project in development mode..."
44
  pip install -e .
 
34
  pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.3
35
  echo "GOT-OCR dependencies installed successfully"
36
 
 
 
 
 
 
37
  # Install the project in development mode
38
  echo "Installing project in development mode..."
39
  pip install -e .
src/parsers/got_ocr_parser.py CHANGED
@@ -34,19 +34,9 @@ try:
34
  "Consider downgrading to version <4.48.0"
35
  )
36
 
37
- # Import spaces for ZeroGPU support
38
- try:
39
- import spaces
40
- ZEROGPU_AVAILABLE = True
41
- logger.info("ZeroGPU support is available")
42
- except ImportError:
43
- ZEROGPU_AVAILABLE = False
44
- logger.info("ZeroGPU not available, will use standard GPU if available")
45
-
46
  GOT_AVAILABLE = True and NUMPY_AVAILABLE
47
  except ImportError:
48
  GOT_AVAILABLE = False
49
- ZEROGPU_AVAILABLE = False
50
  NUMPY_AVAILABLE = False
51
  logger.warning("GOT-OCR dependencies not installed. The parser will not be available.")
52
 
@@ -75,6 +65,10 @@ class GotOcrParser(DocumentParser):
75
  }
76
  ]
77
 
 
 
 
 
78
  @classmethod
79
  def _load_model(cls):
80
  """Load the GOT-OCR model and tokenizer if not already loaded."""
@@ -162,21 +156,6 @@ class GotOcrParser(DocumentParser):
162
  # Determine OCR type based on method
163
  ocr_type = "format" if ocr_method == "format" else "ocr"
164
 
165
- # Use ZeroGPU if available, otherwise use regular processing
166
- if ZEROGPU_AVAILABLE:
167
- try:
168
- return self._parse_with_zerogpu(file_path, ocr_type, **kwargs)
169
- except RuntimeError as e:
170
- if "numpy" in str(e).lower():
171
- logger.warning("NumPy issues in ZeroGPU environment, falling back to regular processing")
172
- return self._parse_regular(file_path, ocr_type, **kwargs)
173
- else:
174
- raise
175
- else:
176
- return self._parse_regular(file_path, ocr_type, **kwargs)
177
-
178
- def _parse_regular(self, file_path: Path, ocr_type: str, **kwargs) -> str:
179
- """Regular parsing without ZeroGPU."""
180
  try:
181
  # Load the model
182
  self._load_model()
@@ -189,7 +168,8 @@ class GotOcrParser(DocumentParser):
189
  ocr_type=ocr_type
190
  )
191
 
192
- return self._format_result(result, **kwargs)
 
193
 
194
  except torch.cuda.OutOfMemoryError:
195
  self.release_model() # Release memory
@@ -213,69 +193,6 @@ class GotOcrParser(DocumentParser):
213
  except Exception as e:
214
  logger.error(f"Error processing document with GOT-OCR: {str(e)}")
215
  raise RuntimeError(f"Error processing document with GOT-OCR: {str(e)}")
216
-
217
- def _parse_with_zerogpu(self, file_path: Path, ocr_type: str, **kwargs) -> str:
218
- """Parse using ZeroGPU for dynamic GPU allocation."""
219
- try:
220
- # Define the GPU-dependent function
221
- @spaces.GPU
222
- def process_with_gpu():
223
- # Ensure NumPy is available
224
- try:
225
- import numpy
226
- except ImportError:
227
- # Try to install numpy if not available
228
- import subprocess
229
- import sys
230
- logger.warning("NumPy not found in ZeroGPU environment, attempting to install...")
231
- subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy>=1.24.0"])
232
- import numpy
233
- logger.info(f"NumPy {numpy.__version__} installed successfully in ZeroGPU environment")
234
-
235
- # Load the model
236
- self._load_model()
237
-
238
- # Use the model's chat method
239
- logger.info(f"Processing image with GOT-OCR using ZeroGPU: {file_path}")
240
- return self._model.chat(
241
- self._tokenizer,
242
- str(file_path),
243
- ocr_type=ocr_type
244
- )
245
-
246
- # Call the GPU-decorated function
247
- result = process_with_gpu()
248
-
249
- # Format and return the result
250
- return self._format_result(result, **kwargs)
251
-
252
- except ImportError as e:
253
- if "numpy" in str(e).lower():
254
- logger.error(f"NumPy import error in ZeroGPU environment: {str(e)}")
255
- raise RuntimeError(
256
- "NumPy is not available in the ZeroGPU environment. "
257
- "This is a known issue with some HuggingFace Spaces. "
258
- "Please try using a different parser or contact support."
259
- )
260
- else:
261
- logger.error(f"Import error in ZeroGPU environment: {str(e)}")
262
- raise RuntimeError(f"Error processing document with GOT-OCR (ZeroGPU): {str(e)}")
263
- except Exception as e:
264
- logger.error(f"Error processing document with GOT-OCR (ZeroGPU): {str(e)}")
265
- raise RuntimeError(f"Error processing document with GOT-OCR (ZeroGPU): {str(e)}")
266
-
267
- def _format_result(self, result: str, **kwargs) -> str:
268
- """Format the OCR result based on the requested format."""
269
- output_format = kwargs.get("output_format", "markdown").lower()
270
- if output_format == "json":
271
- return json.dumps({"content": result}, ensure_ascii=False, indent=2)
272
- elif output_format == "text":
273
- # Simple markdown to text conversion
274
- return result.replace("#", "").replace("*", "").replace("_", "")
275
- elif output_format == "document_tags":
276
- return f"<doc>\n{result}\n</doc>"
277
- else:
278
- return result
279
 
280
  # Register the parser with the registry if GOT is available
281
  if GOT_AVAILABLE:
 
34
  "Consider downgrading to version <4.48.0"
35
  )
36
 
 
 
 
 
 
 
 
 
 
37
  GOT_AVAILABLE = True and NUMPY_AVAILABLE
38
  except ImportError:
39
  GOT_AVAILABLE = False
 
40
  NUMPY_AVAILABLE = False
41
  logger.warning("GOT-OCR dependencies not installed. The parser will not be available.")
42
 
 
65
  }
66
  ]
67
 
68
+ @classmethod
69
+ def get_description(cls) -> str:
70
+ return "GOT-OCR 2.0 parser for converting images to text (requires CUDA)"
71
+
72
  @classmethod
73
  def _load_model(cls):
74
  """Load the GOT-OCR model and tokenizer if not already loaded."""
 
156
  # Determine OCR type based on method
157
  ocr_type = "format" if ocr_method == "format" else "ocr"
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  try:
160
  # Load the model
161
  self._load_model()
 
168
  ocr_type=ocr_type
169
  )
170
 
171
+ # Return the result directly as markdown
172
+ return result
173
 
174
  except torch.cuda.OutOfMemoryError:
175
  self.release_model() # Release memory
 
193
  except Exception as e:
194
  logger.error(f"Error processing document with GOT-OCR: {str(e)}")
195
  raise RuntimeError(f"Error processing document with GOT-OCR: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  # Register the parser with the registry if GOT is available
198
  if GOT_AVAILABLE: