File size: 12,169 Bytes
4044010
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding: utf-8 -*-
"""
VITO APIλ₯Ό μ‚¬μš©ν•œ μŒμ„± 인식(STT) λͺ¨λ“ˆ
"""

import os
import logging
import requests
import json
import time # time import μΆ”κ°€
from dotenv import load_dotenv

# ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
load_dotenv()

# 둜거 μ„€μ • (app.py와 κ³΅μœ ν•˜κ±°λ‚˜ λ…λ¦½μ μœΌλ‘œ μ„€μ • κ°€λŠ₯)
# μ—¬κΈ°μ„œλŠ” 독립적인 둜거λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€. ν•„μš”μ‹œ app.py의 둜거λ₯Ό μ‚¬μš©ν•˜λ„λ‘ μˆ˜μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
logger = logging.getLogger("VitoSTT")
# κΈ°λ³Έ λ‘œκΉ… 레벨 μ„€μ • (ν•Έλ“€λŸ¬κ°€ μ—†μœΌλ©΄ 좜λ ₯이 μ•ˆλ  수 μžˆμœΌλ―€λ‘œ κΈ°λ³Έ ν•Έλ“€λŸ¬ μΆ”κ°€ κ³ λ €)
if not logger.hasHandlers():
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO) # κΈ°λ³Έ 레벨 INFO둜 μ„€μ •

class VitoSTT:
    """VITO STT API 래퍼 클래슀"""

    def __init__(self):
        """VITO STT 클래슀 μ΄ˆκΈ°ν™”"""
        self.client_id = os.getenv("VITO_CLIENT_ID")
        self.client_secret = os.getenv("VITO_CLIENT_SECRET")

        if not self.client_id or not self.client_secret:
            logger.warning("VITO API 인증 정보가 .env νŒŒμΌμ— μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
            logger.warning("VITO_CLIENT_ID와 VITO_CLIENT_SECRETλ₯Ό ν™•μΈν•˜μ„Έμš”.")
            # μ—λŸ¬λ₯Ό λ°œμƒμ‹œν‚€κ±°λ‚˜, κΈ°λŠ₯ μ‚¬μš© μ‹œμ μ— μ²΄ν¬ν•˜λ„λ‘ λ‘˜ 수 μžˆμŠ΅λ‹ˆλ‹€.
            # μ—¬κΈ°μ„œλŠ” 경고만 ν•˜κ³  λ„˜μ–΄κ°‘λ‹ˆλ‹€.
        else:
            logger.info("VITO STT API ν΄λΌμ΄μ–ΈνŠΈ ID/Secret λ‘œλ“œ μ™„λ£Œ.")

        # API μ—”λ“œν¬μΈνŠΈ
        self.token_url = "https://openapi.vito.ai/v1/authenticate"
        self.stt_url = "https://openapi.vito.ai/v1/transcribe"

        # μ•‘μ„ΈμŠ€ 토큰
        self.access_token = None
        self._token_expires_at = 0 # 토큰 만료 μ‹œκ°„ 좔적 (선택적 κ°œμ„ )

    def get_access_token(self):
        """VITO API μ•‘μ„ΈμŠ€ 토큰 νšλ“"""
        # ν˜„μž¬ μ‹œκ°„μ„ 가져와 토큰 만료 μ—¬λΆ€ 확인 (선택적 κ°œμ„ )
        # now = time.time()
        # if self.access_token and now < self._token_expires_at:
        #     logger.debug("κΈ°μ‘΄ VITO API 토큰 μ‚¬μš©")
        #     return self.access_token

        if not self.client_id or not self.client_secret:
            logger.error("API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•„ 토큰을 νšλ“ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
            raise ValueError("VITO API 인증 정보가 μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")

        logger.info("VITO API μ•‘μ„ΈμŠ€ 토큰 μš”μ²­ 쀑...")
        try:
            response = requests.post(
                self.token_url,
                data={"client_id": self.client_id, "client_secret": self.client_secret},
                timeout=10 # νƒ€μž„μ•„μ›ƒ μ„€μ •
            )
            response.raise_for_status() # HTTP 였λ₯˜ λ°œμƒ μ‹œ μ˜ˆμ™Έ λ°œμƒ

            result = response.json()
            self.access_token = result.get("access_token")
            expires_in = result.get("expires_in", 3600) # 만료 μ‹œκ°„ (초), κΈ°λ³Έκ°’ 1μ‹œκ°„
            self._token_expires_at = time.time() + expires_in - 60 # 60초 μ—¬μœ 

            if not self.access_token:
                logger.error("VITO API μ‘λ‹΅μ—μ„œ 토큰을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
                raise ValueError("VITO API 토큰을 λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")

            logger.info("VITO API μ•‘μ„ΈμŠ€ 토큰 νšλ“ 성곡")
            return self.access_token
        except requests.exceptions.Timeout:
            logger.error(f"VITO API 토큰 νšλ“ μ‹œκ°„ 초과: {self.token_url}")
            raise TimeoutError("VITO API 토큰 νšλ“ μ‹œκ°„ 초과")
        except requests.exceptions.RequestException as e:
            logger.error(f"VITO API 토큰 νšλ“ μ‹€νŒ¨: {e}")
            if hasattr(e, 'response') and e.response is not None:
                 logger.error(f"응닡 μ½”λ“œ: {e.response.status_code}, λ‚΄μš©: {e.response.text}")
            raise ConnectionError(f"VITO API 토큰 νšλ“ μ‹€νŒ¨: {e}")


    def transcribe_audio(self, audio_bytes, language="ko"):
        """
        μ˜€λ””μ˜€ λ°”μ΄νŠΈ 데이터λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜

        Args:
            audio_bytes: μ˜€λ””μ˜€ 파일 λ°”μ΄νŠΈ 데이터
            language: μ–Έμ–΄ μ½”λ“œ (κΈ°λ³Έκ°’: 'ko')

        Returns:
            μΈμ‹λœ ν…μŠ€νŠΈ λ˜λŠ” 였λ₯˜ λ©”μ‹œμ§€λ₯Ό ν¬ν•¨ν•œ λ”•μ…”λ„ˆλ¦¬
            {'success': True, 'text': 'μΈμ‹λœ ν…μŠ€νŠΈ'}
            {'success': False, 'error': '였λ₯˜ λ©”μ‹œμ§€', 'details': '상세 λ‚΄μš©'}
        """
        if not self.client_id or not self.client_secret:
            logger.error("API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
            return {"success": False, "error": "API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}

        try:
            # 토큰 νšλ“ λ˜λŠ” κ°±μ‹ 
            # (선택적 κ°œμ„ : 만료 μ‹œκ°„ 체크 둜직 μΆ”κ°€ μ‹œ self._token_expires_at μ‚¬μš©)
            if not self.access_token: # or time.time() >= self._token_expires_at:
                logger.info("VITO API 토큰 νšλ“/κ°±μ‹  μ‹œλ„...")
                self.get_access_token()

            headers = {
                "Authorization": f"Bearer {self.access_token}"
            }

            files = {
                "file": ("audio_file", audio_bytes) # 파일λͺ… νŠœν”Œλ‘œ 전달
            }

            # API μ„€μ •κ°’ (ν•„μš”μ— 따라 μˆ˜μ •)
            config = {
                "use_multi_channel": False,
                "use_itn": True, # Inverse Text Normalization (숫자, λ‚ μ§œ λ“± λ³€ν™˜)
                "use_disfluency_filter": True, # ν•„λŸ¬ (음, μ•„...) 제거
                "use_profanity_filter": False, # 비속어 필터링
                "language": language,
                # "type": "audio" # type νŒŒλΌλ―Έν„°λŠ” VITO λ¬Έμ„œμƒ ν•„μˆ˜ μ•„λ‹˜ (μžλ™ 감지)
            }
            data = {"config": json.dumps(config)}

            logger.info(f"VITO STT API ({self.stt_url}) μš”μ²­ 전솑 쀑...")
            response = requests.post(
                self.stt_url,
                headers=headers,
                files=files,
                data=data,
                timeout=20 # μ—…λ‘œλ“œ νƒ€μž„μ•„μ›ƒ
            )
            response.raise_for_status()

            result = response.json()
            job_id = result.get("id")

            if not job_id:
                logger.error("VITO API μž‘μ—… IDλ₯Ό λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
                return {"success": False, "error": "VITO API μž‘μ—… IDλ₯Ό λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€."}

            logger.info(f"VITO STT μž‘μ—… ID: {job_id}, κ²°κ³Ό 확인 μ‹œμž‘...")

            # κ²°κ³Ό 확인 URL
            transcript_url = f"{self.stt_url}/{job_id}"
            max_tries = 15 # μ΅œλŒ€ μ‹œλ„ 횟수 증가
            wait_time = 2 # λŒ€κΈ° μ‹œκ°„ 증가 (초)

            for try_count in range(max_tries):
                time.sleep(wait_time) # API λΆ€ν•˜ κ°μ†Œ μœ„ν•΄ λŒ€κΈ°
                logger.debug(f"κ²°κ³Ό 확인 μ‹œλ„ ({try_count + 1}/{max_tries}) - URL: {transcript_url}")
                get_response = requests.get(
                    transcript_url,
                    headers=headers,
                    timeout=10 # κ²°κ³Ό 확인 νƒ€μž„μ•„μ›ƒ
                )
                get_response.raise_for_status()

                result = get_response.json()
                status = result.get("status")
                logger.debug(f"ν˜„μž¬ μƒνƒœ: {status}")

                if status == "completed":
                    # κ²°κ³Ό μΆ”μΆœ (utterances ꡬ쑰 확인 ν•„μš”)
                    utterances = result.get("results", {}).get("utterances", [])
                    if utterances:
                         # 전체 ν…μŠ€νŠΈλ₯Ό ν•˜λ‚˜λ‘œ ν•©μΉ¨
                        transcript = " ".join([seg.get("msg", "") for seg in utterances if seg.get("msg")]).strip()
                        logger.info(f"VITO STT 인식 성곡 (일뢀): {transcript[:50]}...")
                        return {
                            "success": True,
                            "text": transcript
                            # "raw_result": result # ν•„μš”μ‹œ 전체 κ²°κ³Ό λ°˜ν™˜
                        }
                    else:
                         logger.warning("VITO STT μ™„λ£Œλ˜μ—ˆμœΌλ‚˜ κ²°κ³Ό utterancesκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
                         return {"success": True, "text": ""} # μ„±κ³΅μ΄μ§€λ§Œ ν…μŠ€νŠΈ μ—†μŒ

                elif status == "failed":
                    error_msg = f"VITO API λ³€ν™˜ μ‹€νŒ¨: {result.get('message', 'μ•Œ 수 μ—†λŠ” 였λ₯˜')}"
                    logger.error(error_msg)
                    return {"success": False, "error": error_msg, "details": result}

                elif status == "transcribing":
                    logger.info(f"VITO API 처리 쀑... ({try_count + 1}/{max_tries})")
                else: # registered, waiting λ“± λ‹€λ₯Έ μƒνƒœ
                    logger.info(f"VITO API μƒνƒœ '{status}', λŒ€κΈ° 쀑... ({try_count + 1}/{max_tries})")


            logger.error(f"VITO API 응닡 νƒ€μž„μ•„μ›ƒ ({max_tries * wait_time}초 초과)")
            return {"success": False, "error": "VITO API 응닡 νƒ€μž„μ•„μ›ƒ"}

        except requests.exceptions.HTTPError as e:
            # 토큰 만료 였λ₯˜ 처리 (401 Unauthorized)
            if e.response.status_code == 401:
                logger.warning("VITO API 토큰이 λ§Œλ£Œλ˜μ—ˆκ±°λ‚˜ μœ νš¨ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. 토큰 μž¬λ°œκΈ‰ μ‹œλ„...")
                self.access_token = None # κΈ°μ‘΄ 토큰 λ¬΄νš¨ν™”
                try:
                    # μž¬κ·€ 호좜 λŒ€μ‹ , 토큰 μž¬λ°œκΈ‰ ν›„ λ‹€μ‹œ μ‹œλ„ν•˜λŠ” 둜직 ꡬ성
                    self.get_access_token()
                    logger.info("μƒˆ ν† ν°μœΌλ‘œ μž¬μ‹œλ„ν•©λ‹ˆλ‹€.")
                    # μž¬μ‹œλ„λŠ” 이 ν•¨μˆ˜λ₯Ό λ‹€μ‹œ ν˜ΈμΆœν•˜λŠ” λŒ€μ‹ , ν˜ΈμΆœν•˜λŠ” μͺ½μ—μ„œ μ²˜λ¦¬ν•˜λŠ” 것이 더 μ•ˆμ „ν•  수 있음
                    # μ—¬κΈ°μ„œλŠ” ν•œ 번 더 μ‹œλ„ν•˜λŠ” 둜직 μΆ”κ°€ (λ¬΄ν•œ 루프 λ°©μ§€ ν•„μš”)
                    # return self.transcribe_audio(audio_bytes, language) # μž¬κ·€ 호좜 방식
                    # --- λΉ„μž¬κ·€ 방식 ---
                    headers["Authorization"] = f"Bearer {self.access_token}" # 헀더 μ—…λ°μ΄νŠΈ
                    # POST μš”μ²­λΆ€ν„° λ‹€μ‹œ μ‹œμž‘ (μ½”λ“œ 쀑볡 λ°œμƒ κ°€λŠ₯μ„± 있음)
                    # ... (POST μš”μ²­ 및 κ²°κ³Ό 폴링 둜직 반볡) ...
                    # κ°„λ‹¨ν•˜κ²ŒλŠ” κ·Έλƒ₯ μ‹€νŒ¨ μ²˜λ¦¬ν•˜κ³  μƒμœ„μ—μ„œ μž¬μ‹œλ„ μœ λ„
                    return {"success": False, "error": "토큰 만료 ν›„ μž¬μ‹œλ„ ν•„μš”", "details": "토큰 μž¬λ°œκΈ‰ 성곡"}

                except Exception as token_e:
                    logger.error(f"토큰 μž¬νšλ“ μ‹€νŒ¨: {token_e}")
                    return {"success": False, "error": f"토큰 μž¬νšλ“ μ‹€νŒ¨: {str(token_e)}"}

            else:
                # 401 μ™Έ λ‹€λ₯Έ HTTP 였λ₯˜
                error_body = ""
                try:
                    error_body = e.response.text
                except Exception:
                    pass
                logger.error(f"VITO API HTTP 였λ₯˜: {e.response.status_code}, 응닡: {error_body}")
                return {
                    "success": False,
                    "error": f"API HTTP 였λ₯˜: {e.response.status_code}",
                    "details": error_body
                }

        except requests.exceptions.Timeout:
            logger.error("VITO API μš”μ²­ μ‹œκ°„ 초과")
            return {"success": False, "error": "API μš”μ²­ μ‹œκ°„ 초과"}
        except requests.exceptions.RequestException as e:
            logger.error(f"VITO API μš”μ²­ 쀑 λ„€νŠΈμ›Œν¬ 였λ₯˜ λ°œμƒ: {str(e)}")
            return {"success": False, "error": "API μš”μ²­ λ„€νŠΈμ›Œν¬ 였λ₯˜", "details": str(e)}
        except Exception as e:
            logger.error(f"μŒμ„±μΈμ‹ 처리 쀑 μ˜ˆμƒμΉ˜ λͺ»ν•œ 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
            return {
                "success": False,
                "error": "μŒμ„±μΈμ‹ λ‚΄λΆ€ 처리 μ‹€νŒ¨",
                "details": str(e)
            }