Spaces:
Paused
Paused
# -*- coding: utf-8 -*- | |
""" | |
VITO APIλ₯Ό μ¬μ©ν μμ± μΈμ(STT) λͺ¨λ | |
""" | |
import os | |
import logging | |
import requests | |
import json | |
import time # time import μΆκ° | |
from dotenv import load_dotenv | |
# νκ²½ λ³μ λ‘λ | |
load_dotenv() | |
# λ‘κ±° μ€μ (app.pyμ 곡μ νκ±°λ λ 립μ μΌλ‘ μ€μ κ°λ₯) | |
# μ¬κΈ°μλ λ 립μ μΈ λ‘κ±°λ₯Ό μ¬μ©ν©λλ€. νμμ app.pyμ λ‘κ±°λ₯Ό μ¬μ©νλλ‘ μμ ν μ μμ΅λλ€. | |
logger = logging.getLogger("VitoSTT") | |
# κΈ°λ³Έ λ‘κΉ λ 벨 μ€μ (νΈλ€λ¬κ° μμΌλ©΄ μΆλ ₯μ΄ μλ μ μμΌλ―λ‘ κΈ°λ³Έ νΈλ€λ¬ μΆκ° κ³ λ €) | |
if not logger.hasHandlers(): | |
handler = logging.StreamHandler() | |
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
logger.setLevel(logging.INFO) # κΈ°λ³Έ λ 벨 INFOλ‘ μ€μ | |
class VitoSTT: | |
"""VITO STT API λνΌ ν΄λμ€""" | |
def __init__(self): | |
"""VITO STT ν΄λμ€ μ΄κΈ°ν""" | |
self.client_id = os.getenv("VITO_CLIENT_ID") | |
self.client_secret = os.getenv("VITO_CLIENT_SECRET") | |
if not self.client_id or not self.client_secret: | |
logger.warning("VITO API μΈμ¦ μ λ³΄κ° .env νμΌμ μ€μ λμ§ μμμ΅λλ€.") | |
logger.warning("VITO_CLIENT_IDμ VITO_CLIENT_SECRETλ₯Ό νμΈνμΈμ.") | |
# μλ¬λ₯Ό λ°μμν€κ±°λ, κΈ°λ₯ μ¬μ© μμ μ 체ν¬νλλ‘ λ μ μμ΅λλ€. | |
# μ¬κΈ°μλ κ²½κ³ λ§ νκ³ λμ΄κ°λλ€. | |
else: | |
logger.info("VITO STT API ν΄λΌμ΄μΈνΈ ID/Secret λ‘λ μλ£.") | |
# API μλν¬μΈνΈ | |
self.token_url = "https://openapi.vito.ai/v1/authenticate" | |
self.stt_url = "https://openapi.vito.ai/v1/transcribe" | |
# μ‘μΈμ€ ν ν° | |
self.access_token = None | |
self._token_expires_at = 0 # ν ν° λ§λ£ μκ° μΆμ (μ νμ κ°μ ) | |
def get_access_token(self): | |
"""VITO API μ‘μΈμ€ ν ν° νλ""" | |
# νμ¬ μκ°μ κ°μ Έμ ν ν° λ§λ£ μ¬λΆ νμΈ (μ νμ κ°μ ) | |
# now = time.time() | |
# if self.access_token and now < self._token_expires_at: | |
# logger.debug("κΈ°μ‘΄ VITO API ν ν° μ¬μ©") | |
# return self.access_token | |
if not self.client_id or not self.client_secret: | |
logger.error("API ν€κ° μ€μ λμ§ μμ ν ν°μ νλν μ μμ΅λλ€.") | |
raise ValueError("VITO API μΈμ¦ μ λ³΄κ° μ€μ λμ§ μμμ΅λλ€.") | |
logger.info("VITO API μ‘μΈμ€ ν ν° μμ² μ€...") | |
try: | |
response = requests.post( | |
self.token_url, | |
data={"client_id": self.client_id, "client_secret": self.client_secret}, | |
timeout=10 # νμμμ μ€μ | |
) | |
response.raise_for_status() # HTTP μ€λ₯ λ°μ μ μμΈ λ°μ | |
result = response.json() | |
self.access_token = result.get("access_token") | |
expires_in = result.get("expires_in", 3600) # λ§λ£ μκ° (μ΄), κΈ°λ³Έκ° 1μκ° | |
self._token_expires_at = time.time() + expires_in - 60 # 60μ΄ μ¬μ | |
if not self.access_token: | |
logger.error("VITO API μλ΅μμ ν ν°μ μ°Ύμ μ μμ΅λλ€.") | |
raise ValueError("VITO API ν ν°μ λ°μμ€μ§ λͺ»νμ΅λλ€.") | |
logger.info("VITO API μ‘μΈμ€ ν ν° νλ μ±κ³΅") | |
return self.access_token | |
except requests.exceptions.Timeout: | |
logger.error(f"VITO API ν ν° νλ μκ° μ΄κ³Ό: {self.token_url}") | |
raise TimeoutError("VITO API ν ν° νλ μκ° μ΄κ³Ό") | |
except requests.exceptions.RequestException as e: | |
logger.error(f"VITO API ν ν° νλ μ€ν¨: {e}") | |
if hasattr(e, 'response') and e.response is not None: | |
logger.error(f"μλ΅ μ½λ: {e.response.status_code}, λ΄μ©: {e.response.text}") | |
raise ConnectionError(f"VITO API ν ν° νλ μ€ν¨: {e}") | |
def transcribe_audio(self, audio_bytes, language="ko"): | |
""" | |
μ€λμ€ λ°μ΄νΈ λ°μ΄ν°λ₯Ό ν μ€νΈλ‘ λ³ν | |
Args: | |
audio_bytes: μ€λμ€ νμΌ λ°μ΄νΈ λ°μ΄ν° | |
language: μΈμ΄ μ½λ (κΈ°λ³Έκ°: 'ko') | |
Returns: | |
μΈμλ ν μ€νΈ λλ μ€λ₯ λ©μμ§λ₯Ό ν¬ν¨ν λμ λ리 | |
{'success': True, 'text': 'μΈμλ ν μ€νΈ'} | |
{'success': False, 'error': 'μ€λ₯ λ©μμ§', 'details': 'μμΈ λ΄μ©'} | |
""" | |
if not self.client_id or not self.client_secret: | |
logger.error("API ν€κ° μ€μ λμ§ μμμ΅λλ€.") | |
return {"success": False, "error": "API ν€κ° μ€μ λμ§ μμμ΅λλ€."} | |
try: | |
# ν ν° νλ λλ κ°±μ | |
# (μ νμ κ°μ : λ§λ£ μκ° μ²΄ν¬ λ‘μ§ μΆκ° μ self._token_expires_at μ¬μ©) | |
if not self.access_token: # or time.time() >= self._token_expires_at: | |
logger.info("VITO API ν ν° νλ/κ°±μ μλ...") | |
self.get_access_token() | |
headers = { | |
"Authorization": f"Bearer {self.access_token}" | |
} | |
files = { | |
"file": ("audio_file", audio_bytes) # νμΌλͺ ννλ‘ μ λ¬ | |
} | |
# API μ€μ κ° (νμμ λ°λΌ μμ ) | |
config = { | |
"use_multi_channel": False, | |
"use_itn": True, # Inverse Text Normalization (μ«μ, λ μ§ λ± λ³ν) | |
"use_disfluency_filter": True, # νλ¬ (μ, μ...) μ κ±° | |
"use_profanity_filter": False, # λΉμμ΄ νν°λ§ | |
"language": language, | |
# "type": "audio" # type νλΌλ―Έν°λ VITO λ¬Έμμ νμ μλ (μλ κ°μ§) | |
} | |
data = {"config": json.dumps(config)} | |
logger.info(f"VITO STT API ({self.stt_url}) μμ² μ μ‘ μ€...") | |
response = requests.post( | |
self.stt_url, | |
headers=headers, | |
files=files, | |
data=data, | |
timeout=20 # μ λ‘λ νμμμ | |
) | |
response.raise_for_status() | |
result = response.json() | |
job_id = result.get("id") | |
if not job_id: | |
logger.error("VITO API μμ IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€.") | |
return {"success": False, "error": "VITO API μμ IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€."} | |
logger.info(f"VITO STT μμ ID: {job_id}, κ²°κ³Ό νμΈ μμ...") | |
# κ²°κ³Ό νμΈ URL | |
transcript_url = f"{self.stt_url}/{job_id}" | |
max_tries = 15 # μ΅λ μλ νμ μ¦κ° | |
wait_time = 2 # λκΈ° μκ° μ¦κ° (μ΄) | |
for try_count in range(max_tries): | |
time.sleep(wait_time) # API λΆν κ°μ μν΄ λκΈ° | |
logger.debug(f"κ²°κ³Ό νμΈ μλ ({try_count + 1}/{max_tries}) - URL: {transcript_url}") | |
get_response = requests.get( | |
transcript_url, | |
headers=headers, | |
timeout=10 # κ²°κ³Ό νμΈ νμμμ | |
) | |
get_response.raise_for_status() | |
result = get_response.json() | |
status = result.get("status") | |
logger.debug(f"νμ¬ μν: {status}") | |
if status == "completed": | |
# κ²°κ³Ό μΆμΆ (utterances ꡬ쑰 νμΈ νμ) | |
utterances = result.get("results", {}).get("utterances", []) | |
if utterances: | |
# μ 체 ν μ€νΈλ₯Ό νλλ‘ ν©μΉ¨ | |
transcript = " ".join([seg.get("msg", "") for seg in utterances if seg.get("msg")]).strip() | |
logger.info(f"VITO STT μΈμ μ±κ³΅ (μΌλΆ): {transcript[:50]}...") | |
return { | |
"success": True, | |
"text": transcript | |
# "raw_result": result # νμμ μ 체 κ²°κ³Ό λ°ν | |
} | |
else: | |
logger.warning("VITO STT μλ£λμμΌλ κ²°κ³Ό utterancesκ° λΉμ΄μμ΅λλ€.") | |
return {"success": True, "text": ""} # μ±κ³΅μ΄μ§λ§ ν μ€νΈ μμ | |
elif status == "failed": | |
error_msg = f"VITO API λ³ν μ€ν¨: {result.get('message', 'μ μ μλ μ€λ₯')}" | |
logger.error(error_msg) | |
return {"success": False, "error": error_msg, "details": result} | |
elif status == "transcribing": | |
logger.info(f"VITO API μ²λ¦¬ μ€... ({try_count + 1}/{max_tries})") | |
else: # registered, waiting λ± λ€λ₯Έ μν | |
logger.info(f"VITO API μν '{status}', λκΈ° μ€... ({try_count + 1}/{max_tries})") | |
logger.error(f"VITO API μλ΅ νμμμ ({max_tries * wait_time}μ΄ μ΄κ³Ό)") | |
return {"success": False, "error": "VITO API μλ΅ νμμμ"} | |
except requests.exceptions.HTTPError as e: | |
# ν ν° λ§λ£ μ€λ₯ μ²λ¦¬ (401 Unauthorized) | |
if e.response.status_code == 401: | |
logger.warning("VITO API ν ν°μ΄ λ§λ£λμκ±°λ μ ν¨νμ§ μμ΅λλ€. ν ν° μ¬λ°κΈ μλ...") | |
self.access_token = None # κΈ°μ‘΄ ν ν° λ¬΄ν¨ν | |
try: | |
# μ¬κ· νΈμΆ λμ , ν ν° μ¬λ°κΈ ν λ€μ μλνλ λ‘μ§ κ΅¬μ± | |
self.get_access_token() | |
logger.info("μ ν ν°μΌλ‘ μ¬μλν©λλ€.") | |
# μ¬μλλ μ΄ ν¨μλ₯Ό λ€μ νΈμΆνλ λμ , νΈμΆνλ μͺ½μμ μ²λ¦¬νλ κ²μ΄ λ μμ ν μ μμ | |
# μ¬κΈ°μλ ν λ² λ μλνλ λ‘μ§ μΆκ° (무ν 루ν λ°©μ§ νμ) | |
# return self.transcribe_audio(audio_bytes, language) # μ¬κ· νΈμΆ λ°©μ | |
# --- λΉμ¬κ· λ°©μ --- | |
headers["Authorization"] = f"Bearer {self.access_token}" # ν€λ μ λ°μ΄νΈ | |
# POST μμ²λΆν° λ€μ μμ (μ½λ μ€λ³΅ λ°μ κ°λ₯μ± μμ) | |
# ... (POST μμ² λ° κ²°κ³Ό ν΄λ§ λ‘μ§ λ°λ³΅) ... | |
# κ°λ¨νκ²λ κ·Έλ₯ μ€ν¨ μ²λ¦¬νκ³ μμμμ μ¬μλ μ λ | |
return {"success": False, "error": "ν ν° λ§λ£ ν μ¬μλ νμ", "details": "ν ν° μ¬λ°κΈ μ±κ³΅"} | |
except Exception as token_e: | |
logger.error(f"ν ν° μ¬νλ μ€ν¨: {token_e}") | |
return {"success": False, "error": f"ν ν° μ¬νλ μ€ν¨: {str(token_e)}"} | |
else: | |
# 401 μΈ λ€λ₯Έ HTTP μ€λ₯ | |
error_body = "" | |
try: | |
error_body = e.response.text | |
except Exception: | |
pass | |
logger.error(f"VITO API HTTP μ€λ₯: {e.response.status_code}, μλ΅: {error_body}") | |
return { | |
"success": False, | |
"error": f"API HTTP μ€λ₯: {e.response.status_code}", | |
"details": error_body | |
} | |
except requests.exceptions.Timeout: | |
logger.error("VITO API μμ² μκ° μ΄κ³Ό") | |
return {"success": False, "error": "API μμ² μκ° μ΄κ³Ό"} | |
except requests.exceptions.RequestException as e: | |
logger.error(f"VITO API μμ² μ€ λ€νΈμν¬ μ€λ₯ λ°μ: {str(e)}") | |
return {"success": False, "error": "API μμ² λ€νΈμν¬ μ€λ₯", "details": str(e)} | |
except Exception as e: | |
logger.error(f"μμ±μΈμ μ²λ¦¬ μ€ μμμΉ λͺ»ν μ€λ₯ λ°μ: {str(e)}", exc_info=True) | |
return { | |
"success": False, | |
"error": "μμ±μΈμ λ΄λΆ μ²λ¦¬ μ€ν¨", | |
"details": str(e) | |
} |