File size: 2,377 Bytes
dda982a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from typing import Dict, List, Type, Any, Optional
from src.parsers.parser_interface import DocumentParser


class ParserRegistry:
    """Central registry for all document parsers in the system."""
    
    _parsers: Dict[str, Type[DocumentParser]] = {}
    
    @classmethod
    def register(cls, parser_class: Type[DocumentParser]) -> None:
        """
        Register a parser with the system.
        
        Args:
            parser_class: The parser class to register
        """
        parser_name = parser_class.get_name()
        cls._parsers[parser_name] = parser_class
        print(f"Registered parser: {parser_name}")
    
    @classmethod
    def get_available_parsers(cls) -> Dict[str, Type[DocumentParser]]:
        """Return all registered parsers"""
        return cls._parsers
    
    @classmethod
    def get_parser_class(cls, name: str) -> Optional[Type[DocumentParser]]:
        """Get a specific parser class by name"""
        return cls._parsers.get(name)
    
    @classmethod
    def get_parser_names(cls) -> List[str]:
        """Get a list of all registered parser names"""
        return list(cls._parsers.keys())
    
    @classmethod
    def get_ocr_options(cls, parser_name: str) -> List[str]:
        """
        Get OCR methods supported by a parser.
        
        Args:
            parser_name: Name of the parser
            
        Returns:
            List of OCR method display names
        """
        parser_class = cls.get_parser_class(parser_name)
        if not parser_class:
            return []
        
        return [method["name"] for method in parser_class.get_supported_ocr_methods()]
    
    @classmethod
    def get_ocr_method_id(cls, parser_name: str, ocr_display_name: str) -> Optional[str]:
        """
        Get the internal ID for an OCR method based on its display name.
        
        Args:
            parser_name: Name of the parser
            ocr_display_name: Display name of the OCR method
            
        Returns:
            Internal ID of the OCR method or None if not found
        """
        parser_class = cls.get_parser_class(parser_name)
        if not parser_class:
            return None
        
        for method in parser_class.get_supported_ocr_methods():
            if method["name"] == ocr_display_name:
                return method["id"]
        
        return None