Spaces:
Running
on
Zero
Running
on
Zero
from abc import ABC, abstractmethod | |
from pathlib import Path | |
from typing import Dict, List, Optional, Any, Union | |
class DocumentParser(ABC): | |
"""Base interface for all document parsers in the system.""" | |
def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str: | |
""" | |
Parse a document and return its content. | |
Args: | |
file_path: Path to the document | |
ocr_method: OCR method to use (if applicable) | |
**kwargs: Additional parser-specific options | |
Returns: | |
str: The parsed content | |
""" | |
pass | |
def get_name(cls) -> str: | |
"""Return the display name of this parser""" | |
pass | |
def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]: | |
""" | |
Return a list of supported OCR methods. | |
Returns: | |
List of dictionaries with keys: | |
- id: Unique identifier for the OCR method | |
- name: Display name for the OCR method | |
- default_params: Default parameters for this OCR method | |
""" | |
pass | |
def get_description(cls) -> str: | |
"""Return a description of this parser""" | |
return f"{cls.get_name()} document parser" |