RAG4_Voice_Fast / vito_stt.py
jeongsoo's picture
'update'
211ce10
# -*- coding: utf-8 -*-
"""
VITO APIλ₯Ό μ‚¬μš©ν•œ μŒμ„± 인식(STT) λͺ¨λ“ˆ
"""
import os
import logging
import requests
import json
import time # time import μΆ”κ°€
from dotenv import load_dotenv
# ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
load_dotenv()
# 둜거 μ„€μ • (app.py와 κ³΅μœ ν•˜κ±°λ‚˜ λ…λ¦½μ μœΌλ‘œ μ„€μ • κ°€λŠ₯)
# μ—¬κΈ°μ„œλŠ” 독립적인 둜거λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€. ν•„μš”μ‹œ app.py의 둜거λ₯Ό μ‚¬μš©ν•˜λ„λ‘ μˆ˜μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
logger = logging.getLogger("VitoSTT")
# κΈ°λ³Έ λ‘œκΉ… 레벨 μ„€μ • (ν•Έλ“€λŸ¬κ°€ μ—†μœΌλ©΄ 좜λ ₯이 μ•ˆλ  수 μžˆμœΌλ―€λ‘œ κΈ°λ³Έ ν•Έλ“€λŸ¬ μΆ”κ°€ κ³ λ €)
if not logger.hasHandlers():
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO) # κΈ°λ³Έ 레벨 INFO둜 μ„€μ •
class VitoSTT:
"""VITO STT API 래퍼 클래슀"""
def __init__(self):
"""VITO STT 클래슀 μ΄ˆκΈ°ν™”"""
self.client_id = os.getenv("VITO_CLIENT_ID")
self.client_secret = os.getenv("VITO_CLIENT_SECRET")
if not self.client_id or not self.client_secret:
logger.warning("VITO API 인증 정보가 .env νŒŒμΌμ— μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
logger.warning("VITO_CLIENT_ID와 VITO_CLIENT_SECRETλ₯Ό ν™•μΈν•˜μ„Έμš”.")
# μ—λŸ¬λ₯Ό λ°œμƒμ‹œν‚€κ±°λ‚˜, κΈ°λŠ₯ μ‚¬μš© μ‹œμ μ— μ²΄ν¬ν•˜λ„λ‘ λ‘˜ 수 μžˆμŠ΅λ‹ˆλ‹€.
# μ—¬κΈ°μ„œλŠ” 경고만 ν•˜κ³  λ„˜μ–΄κ°‘λ‹ˆλ‹€.
else:
logger.info("VITO STT API ν΄λΌμ΄μ–ΈνŠΈ ID/Secret λ‘œλ“œ μ™„λ£Œ.")
# API μ—”λ“œν¬μΈνŠΈ
self.token_url = "https://openapi.vito.ai/v1/authenticate"
self.stt_url = "https://openapi.vito.ai/v1/transcribe"
# μ•‘μ„ΈμŠ€ 토큰
self.access_token = None
self._token_expires_at = 0 # 토큰 만료 μ‹œκ°„ 좔적 (선택적 κ°œμ„ )
def get_access_token(self):
"""VITO API μ•‘μ„ΈμŠ€ 토큰 νšλ“"""
# ν˜„μž¬ μ‹œκ°„μ„ 가져와 토큰 만료 μ—¬λΆ€ 확인 (선택적 κ°œμ„ )
# now = time.time()
# if self.access_token and now < self._token_expires_at:
# logger.debug("κΈ°μ‘΄ VITO API 토큰 μ‚¬μš©")
# return self.access_token
if not self.client_id or not self.client_secret:
logger.error("API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•„ 토큰을 νšλ“ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
raise ValueError("VITO API 인증 정보가 μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
logger.info("VITO API μ•‘μ„ΈμŠ€ 토큰 μš”μ²­ 쀑...")
try:
response = requests.post(
self.token_url,
data={"client_id": self.client_id, "client_secret": self.client_secret},
timeout=10 # νƒ€μž„μ•„μ›ƒ μ„€μ •
)
response.raise_for_status() # HTTP 였λ₯˜ λ°œμƒ μ‹œ μ˜ˆμ™Έ λ°œμƒ
result = response.json()
self.access_token = result.get("access_token")
expires_in = result.get("expires_in", 3600) # 만료 μ‹œκ°„ (초), κΈ°λ³Έκ°’ 1μ‹œκ°„
self._token_expires_at = time.time() + expires_in - 60 # 60초 μ—¬μœ 
if not self.access_token:
logger.error("VITO API μ‘λ‹΅μ—μ„œ 토큰을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
raise ValueError("VITO API 토큰을 λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
logger.info("VITO API μ•‘μ„ΈμŠ€ 토큰 νšλ“ 성곡")
return self.access_token
except requests.exceptions.Timeout:
logger.error(f"VITO API 토큰 νšλ“ μ‹œκ°„ 초과: {self.token_url}")
raise TimeoutError("VITO API 토큰 νšλ“ μ‹œκ°„ 초과")
except requests.exceptions.RequestException as e:
logger.error(f"VITO API 토큰 νšλ“ μ‹€νŒ¨: {e}")
if hasattr(e, 'response') and e.response is not None:
logger.error(f"응닡 μ½”λ“œ: {e.response.status_code}, λ‚΄μš©: {e.response.text}")
raise ConnectionError(f"VITO API 토큰 νšλ“ μ‹€νŒ¨: {e}")
def transcribe_audio(self, audio_bytes, language="ko"):
"""
μ˜€λ””μ˜€ λ°”μ΄νŠΈ 데이터λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
Args:
audio_bytes: μ˜€λ””μ˜€ 파일 λ°”μ΄νŠΈ 데이터
language: μ–Έμ–΄ μ½”λ“œ (κΈ°λ³Έκ°’: 'ko')
Returns:
μΈμ‹λœ ν…μŠ€νŠΈ λ˜λŠ” 였λ₯˜ λ©”μ‹œμ§€λ₯Ό ν¬ν•¨ν•œ λ”•μ…”λ„ˆλ¦¬
{'success': True, 'text': 'μΈμ‹λœ ν…μŠ€νŠΈ'}
{'success': False, 'error': '였λ₯˜ λ©”μ‹œμ§€', 'details': '상세 λ‚΄μš©'}
"""
if not self.client_id or not self.client_secret:
logger.error("API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
return {"success": False, "error": "API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}
try:
# 토큰 νšλ“ λ˜λŠ” κ°±μ‹ 
# (선택적 κ°œμ„ : 만료 μ‹œκ°„ 체크 둜직 μΆ”κ°€ μ‹œ self._token_expires_at μ‚¬μš©)
if not self.access_token: # or time.time() >= self._token_expires_at:
logger.info("VITO API 토큰 νšλ“/κ°±μ‹  μ‹œλ„...")
self.get_access_token()
headers = {
"Authorization": f"Bearer {self.access_token}"
}
files = {
"file": ("audio_file", audio_bytes) # 파일λͺ… νŠœν”Œλ‘œ 전달
}
# API μ„€μ •κ°’ (ν•„μš”μ— 따라 μˆ˜μ •)
config = {
"use_multi_channel": False,
"use_itn": True, # Inverse Text Normalization (숫자, λ‚ μ§œ λ“± λ³€ν™˜)
"use_disfluency_filter": True, # ν•„λŸ¬ (음, μ•„...) 제거
"use_profanity_filter": False, # 비속어 필터링
"language": language,
# "type": "audio" # type νŒŒλΌλ―Έν„°λŠ” VITO λ¬Έμ„œμƒ ν•„μˆ˜ μ•„λ‹˜ (μžλ™ 감지)
}
data = {"config": json.dumps(config)}
logger.info(f"VITO STT API ({self.stt_url}) μš”μ²­ 전솑 쀑...")
response = requests.post(
self.stt_url,
headers=headers,
files=files,
data=data,
timeout=20 # μ—…λ‘œλ“œ νƒ€μž„μ•„μ›ƒ
)
response.raise_for_status()
result = response.json()
job_id = result.get("id")
if not job_id:
logger.error("VITO API μž‘μ—… IDλ₯Ό λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
return {"success": False, "error": "VITO API μž‘μ—… IDλ₯Ό λ°›μ•„μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€."}
logger.info(f"VITO STT μž‘μ—… ID: {job_id}, κ²°κ³Ό 확인 μ‹œμž‘...")
# κ²°κ³Ό 확인 URL
transcript_url = f"{self.stt_url}/{job_id}"
max_tries = 15 # μ΅œλŒ€ μ‹œλ„ 횟수 증가
wait_time = 2 # λŒ€κΈ° μ‹œκ°„ 증가 (초)
for try_count in range(max_tries):
time.sleep(wait_time) # API λΆ€ν•˜ κ°μ†Œ μœ„ν•΄ λŒ€κΈ°
logger.debug(f"κ²°κ³Ό 확인 μ‹œλ„ ({try_count + 1}/{max_tries}) - URL: {transcript_url}")
get_response = requests.get(
transcript_url,
headers=headers,
timeout=10 # κ²°κ³Ό 확인 νƒ€μž„μ•„μ›ƒ
)
get_response.raise_for_status()
result = get_response.json()
status = result.get("status")
logger.debug(f"ν˜„μž¬ μƒνƒœ: {status}")
if status == "completed":
# κ²°κ³Ό μΆ”μΆœ (utterances ꡬ쑰 확인 ν•„μš”)
utterances = result.get("results", {}).get("utterances", [])
if utterances:
# 전체 ν…μŠ€νŠΈλ₯Ό ν•˜λ‚˜λ‘œ ν•©μΉ¨
transcript = " ".join([seg.get("msg", "") for seg in utterances if seg.get("msg")]).strip()
logger.info(f"VITO STT 인식 성곡 (일뢀): {transcript[:50]}...")
return {
"success": True,
"text": transcript
# "raw_result": result # ν•„μš”μ‹œ 전체 κ²°κ³Ό λ°˜ν™˜
}
else:
logger.warning("VITO STT μ™„λ£Œλ˜μ—ˆμœΌλ‚˜ κ²°κ³Ό utterancesκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
return {"success": True, "text": ""} # μ„±κ³΅μ΄μ§€λ§Œ ν…μŠ€νŠΈ μ—†μŒ
elif status == "failed":
error_msg = f"VITO API λ³€ν™˜ μ‹€νŒ¨: {result.get('message', 'μ•Œ 수 μ—†λŠ” 였λ₯˜')}"
logger.error(error_msg)
return {"success": False, "error": error_msg, "details": result}
elif status == "transcribing":
logger.info(f"VITO API 처리 쀑... ({try_count + 1}/{max_tries})")
else: # registered, waiting λ“± λ‹€λ₯Έ μƒνƒœ
logger.info(f"VITO API μƒνƒœ '{status}', λŒ€κΈ° 쀑... ({try_count + 1}/{max_tries})")
logger.error(f"VITO API 응닡 νƒ€μž„μ•„μ›ƒ ({max_tries * wait_time}초 초과)")
return {"success": False, "error": "VITO API 응닡 νƒ€μž„μ•„μ›ƒ"}
except requests.exceptions.HTTPError as e:
# 토큰 만료 였λ₯˜ 처리 (401 Unauthorized)
if e.response.status_code == 401:
logger.warning("VITO API 토큰이 λ§Œλ£Œλ˜μ—ˆκ±°λ‚˜ μœ νš¨ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. 토큰 μž¬λ°œκΈ‰ μ‹œλ„...")
self.access_token = None # κΈ°μ‘΄ 토큰 λ¬΄νš¨ν™”
try:
# μž¬κ·€ 호좜 λŒ€μ‹ , 토큰 μž¬λ°œκΈ‰ ν›„ λ‹€μ‹œ μ‹œλ„ν•˜λŠ” 둜직 ꡬ성
self.get_access_token()
logger.info("μƒˆ ν† ν°μœΌλ‘œ μž¬μ‹œλ„ν•©λ‹ˆλ‹€.")
# μž¬μ‹œλ„λŠ” 이 ν•¨μˆ˜λ₯Ό λ‹€μ‹œ ν˜ΈμΆœν•˜λŠ” λŒ€μ‹ , ν˜ΈμΆœν•˜λŠ” μͺ½μ—μ„œ μ²˜λ¦¬ν•˜λŠ” 것이 더 μ•ˆμ „ν•  수 있음
# μ—¬κΈ°μ„œλŠ” ν•œ 번 더 μ‹œλ„ν•˜λŠ” 둜직 μΆ”κ°€ (λ¬΄ν•œ 루프 λ°©μ§€ ν•„μš”)
# return self.transcribe_audio(audio_bytes, language) # μž¬κ·€ 호좜 방식
# --- λΉ„μž¬κ·€ 방식 ---
headers["Authorization"] = f"Bearer {self.access_token}" # 헀더 μ—…λ°μ΄νŠΈ
# POST μš”μ²­λΆ€ν„° λ‹€μ‹œ μ‹œμž‘ (μ½”λ“œ 쀑볡 λ°œμƒ κ°€λŠ₯μ„± 있음)
# ... (POST μš”μ²­ 및 κ²°κ³Ό 폴링 둜직 반볡) ...
# κ°„λ‹¨ν•˜κ²ŒλŠ” κ·Έλƒ₯ μ‹€νŒ¨ μ²˜λ¦¬ν•˜κ³  μƒμœ„μ—μ„œ μž¬μ‹œλ„ μœ λ„
return {"success": False, "error": "토큰 만료 ν›„ μž¬μ‹œλ„ ν•„μš”", "details": "토큰 μž¬λ°œκΈ‰ 성곡"}
except Exception as token_e:
logger.error(f"토큰 μž¬νšλ“ μ‹€νŒ¨: {token_e}")
return {"success": False, "error": f"토큰 μž¬νšλ“ μ‹€νŒ¨: {str(token_e)}"}
else:
# 401 μ™Έ λ‹€λ₯Έ HTTP 였λ₯˜
error_body = ""
try:
error_body = e.response.text
except Exception:
pass
logger.error(f"VITO API HTTP 였λ₯˜: {e.response.status_code}, 응닡: {error_body}")
return {
"success": False,
"error": f"API HTTP 였λ₯˜: {e.response.status_code}",
"details": error_body
}
except requests.exceptions.Timeout:
logger.error("VITO API μš”μ²­ μ‹œκ°„ 초과")
return {"success": False, "error": "API μš”μ²­ μ‹œκ°„ 초과"}
except requests.exceptions.RequestException as e:
logger.error(f"VITO API μš”μ²­ 쀑 λ„€νŠΈμ›Œν¬ 였λ₯˜ λ°œμƒ: {str(e)}")
return {"success": False, "error": "API μš”μ²­ λ„€νŠΈμ›Œν¬ 였λ₯˜", "details": str(e)}
except Exception as e:
logger.error(f"μŒμ„±μΈμ‹ 처리 쀑 μ˜ˆμƒμΉ˜ λͺ»ν•œ 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
return {
"success": False,
"error": "μŒμ„±μΈμ‹ λ‚΄λΆ€ 처리 μ‹€νŒ¨",
"details": str(e)
}