File size: 4,642 Bytes
14586a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
λ™μ˜μ–΄ 처리 λͺ¨λ“ˆ
"""
import os
import sys
import re
from typing import Dict, List, Optional, Set

# κΈ°λ³Έ λ™μ˜μ–΄ 사전 (MP_synonyms.py 파일이 없을 경우 μ‚¬μš©)
DEFAULT_SYNONYMS = {
    "μ—‘μΈ„λ ˆμ΄ν„°": "앑츄에이터",
    "앑츄에이터": "앑츄에이터",
    "λͺ¨ν„°": "앑츄에이터",
    "컨박": "μ»¨νŠΈλ‘€λ°•μŠ€"
}


class SynonymsHandler:
    """
    λΆ€ν’ˆλͺ…μ˜ λ™μ˜μ–΄λ₯Ό μ²˜λ¦¬ν•˜λŠ” 클래슀
    """

    def __init__(self, synonyms_file: Optional[str] = None):
        """
        λ™μ˜μ–΄ ν•Έλ“€λŸ¬ μ΄ˆκΈ°ν™”

        Args:
            synonyms_file: λ™μ˜μ–΄ 파일 경둜 (선택적)
        """
        self.synonyms = {}
        self.loaded = False

        # 1. κΈ°λ³Έ 제곡된 파일 경둜 확인
        if synonyms_file and os.path.exists(synonyms_file):
            self._load_from_file(synonyms_file)

        # 2. 일반적인 μœ„μΉ˜ 확인 (.venv/SYNONYMS/MP_synonyms.py)
        elif os.path.exists(".venv/SYNONYMS/MP_synonyms.py"):
            self._load_from_file(".venv/SYNONYMS/MP_synonyms.py")

        # 3. ν˜„μž¬ 디렉토리 확인
        elif os.path.exists("MP_synonyms.py"):
            self._load_from_file("MP_synonyms.py")

        # 4. κΈ°λ³Έ λ™μ˜μ–΄ μ‚¬μš©
        else:
            print("λ™μ˜μ–΄ νŒŒμΌμ„ 찾을 수 μ—†μ–΄ κΈ°λ³Έ λ™μ˜μ–΄ 사전을 μ‚¬μš©ν•©λ‹ˆλ‹€.")
            self.synonyms = DEFAULT_SYNONYMS
            self.loaded = True

    def _load_from_file(self, file_path: str) -> None:
        """
        νŒŒμΌμ—μ„œ λ™μ˜μ–΄ 사전 λ‘œλ“œ

        Args:
            file_path: λ™μ˜μ–΄ 파일 경둜
        """
        try:
            # 파일 λ‚΄μš© 읽기
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

            # SYNONYMS λ”•μ…”λ„ˆλ¦¬ μΆ”μΆœ
            synonyms_match = re.search(r'SYNONYMS\s*=\s*\{(.*?)\}', content, re.DOTALL)
            if synonyms_match:
                # μ‹€ν–‰ν•˜μ§€ μ•Šκ³  λ³€ν™˜ν•˜λŠ” 방법
                synonyms_str = "{" + synonyms_match.group(1) + "}"

                # μ •κ·œμ‹μ„ μ‚¬μš©ν•˜μ—¬ λ”•μ…”λ„ˆλ¦¬ ν˜•νƒœλ‘œ νŒŒμ‹±
                pattern = r'"([^"]*)"\s*:\s*"([^"]*)"'
                matches = re.findall(pattern, synonyms_str)

                self.synonyms = {key: value for key, value in matches}
                self.loaded = True
                print(f"λ™μ˜μ–΄ 사전 λ‘œλ“œ μ™„λ£Œ: {file_path}, {len(self.synonyms)}개 ν•­λͺ©")
            else:
                print(f"νŒŒμΌμ—μ„œ SYNONYMS λ”•μ…”λ„ˆλ¦¬λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {file_path}")
                self.synonyms = DEFAULT_SYNONYMS
                self.loaded = True

        except Exception as e:
            print(f"λ™μ˜μ–΄ 사전 λ‘œλ“œ 쀑 였λ₯˜: {e}")
            self.synonyms = DEFAULT_SYNONYMS
            self.loaded = True

    def find_in_text(self, text: str) -> List[str]:
        """
        ν…μŠ€νŠΈμ—μ„œ λ™μ˜μ–΄ μ°ΎκΈ°

        Args:
            text: 검색할 ν…μŠ€νŠΈ

        Returns:
            찾은 ν‘œμ€€ν™”λœ λΆ€ν’ˆλͺ… 리슀트
        """
        if not text or not self.loaded:
            return []

        # 곡백 제거 및 μ†Œλ¬Έμž λ³€ν™˜
        text = text.lower()

        found_parts = set()

        # λ™μ˜μ–΄ ν‚€μ›Œλ“œκ°€ ν…μŠ€νŠΈμ— ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ 확인
        for keyword, standard_name in self.synonyms.items():
            if keyword.lower() in text:
                found_parts.add(standard_name)

        return list(found_parts)

    def standardize(self, part_name: str) -> str:
        """
        λΆ€ν’ˆλͺ…을 ν‘œμ€€ν™”

        Args:
            part_name: ν‘œμ€€ν™”ν•  λΆ€ν’ˆλͺ…

        Returns:
            ν‘œμ€€ν™”λœ λΆ€ν’ˆλͺ…
        """
        if not part_name or not self.loaded:
            return part_name

        # μ†Œλ¬Έμž λ³€ν™˜ν•˜μ—¬ 비ꡐ
        part_lower = part_name.lower().strip()

        # λ™μ˜μ–΄ μ‚¬μ „μ—μ„œ 검색
        for keyword, standard_name in self.synonyms.items():
            if part_lower == keyword.lower():
                return standard_name

        # λ§€μΉ­λ˜μ§€ μ•ŠμœΌλ©΄ μ›λž˜ 이름 λ°˜ν™˜
        return part_name

    def standardize_parts_list(self, parts: List[str]) -> List[str]:
        """
        λΆ€ν’ˆλͺ… 리슀트λ₯Ό ν‘œμ€€ν™”

        Args:
            parts: ν‘œμ€€ν™”ν•  λΆ€ν’ˆλͺ… 리슀트

        Returns:
            ν‘œμ€€ν™”λœ λΆ€ν’ˆλͺ… 리슀트
        """
        if not parts or not self.loaded:
            return parts

        standardized = set()

        for part in parts:
            if part:
                standardized.add(self.standardize(part))

        return list(standardized)