Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,414 Bytes
dda982a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Optional, Any, Union
class DocumentParser(ABC):
"""Base interface for all document parsers in the system."""
@abstractmethod
def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
"""
Parse a document and return its content.
Args:
file_path: Path to the document
ocr_method: OCR method to use (if applicable)
**kwargs: Additional parser-specific options
Returns:
str: The parsed content
"""
pass
@classmethod
@abstractmethod
def get_name(cls) -> str:
"""Return the display name of this parser"""
pass
@classmethod
@abstractmethod
def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]:
"""
Return a list of supported OCR methods.
Returns:
List of dictionaries with keys:
- id: Unique identifier for the OCR method
- name: Display name for the OCR method
- default_params: Default parameters for this OCR method
"""
pass
@classmethod
def get_description(cls) -> str:
"""Return a description of this parser"""
return f"{cls.get_name()} document parser" |