zach
Improve Hume errors, fix error message text in UI
2f050a8
raw
history blame
5.29 kB
"""
hume_api.py
This file defines the interaction with the Hume text-to-speech (TTS) API.
It includes functionality for API request handling and processing API responses.
Key Features:
- Encapsulates all logic related to the Hume TTS API.
- Implements retry logic for handling transient API errors.
- Handles received audio and processes it for playback on the web.
- Provides detailed logging for debugging and error tracking.
Classes:
- HumeConfig: Immutable configuration for interacting with Hume's TTS API.
- HumeError: Custom exception for Hume API-related errors.
Functions:
- text_to_speech_with_hume: Synthesizes speech from text using Hume's TTS API.
"""
# Standard Library Imports
from dataclasses import dataclass
import logging
import os
import random
from typing import Literal, Optional
# Third-Party Library Imports
import requests
from requests.exceptions import HTTPError
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
# Local Application Imports
from src.config import logger
from src.utils import save_base64_audio_to_file, validate_env_var
HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
""" Support audio file formats for the Hume TTS API"""
@dataclass(frozen=True)
class HumeConfig:
"""Immutable configuration for interacting with the Hume TTS API."""
api_key: str = validate_env_var("HUME_API_KEY")
url: str = "https://test-api.hume.ai/v0/tts/octave"
headers: dict = None
file_format: HumeSupportedFileFormat = "mp3"
def __post_init__(self):
# Validate required attributes
if not self.api_key:
raise ValueError("Hume API key is not set.")
if not self.url:
raise ValueError("Hume TTS endpoint URL is not set.")
if not self.file_format:
raise ValueError("Hume TTS file format is not set.")
# Set headers dynamically after validation
object.__setattr__(
self,
"headers",
{
"X-Hume-Api-Key": f"{self.api_key}",
"Content-Type": "application/json",
},
)
class HumeError(Exception):
"""Custom exception for errors related to the Hume TTS API."""
def __init__(self, message: str, original_exception: Optional[Exception] = None):
super().__init__(message)
self.original_exception = original_exception
self.message = message
class UnretryableHumeError(HumeError):
"""Custom exception for errors related to the Hume TTS API that should not be retried."""
def __init__(self, message: str, original_exception: Optional[Exception] = None):
super().__init__(message)
self.original_exception = original_exception
# Initialize the Hume client
hume_config = HumeConfig()
@retry(
stop=stop_after_attempt(3),
wait=wait_fixed(2),
before=before_log(logger, logging.DEBUG),
after=after_log(logger, logging.DEBUG),
reraise=True,
)
def text_to_speech_with_hume(character_description: str, text: str) -> bytes:
"""
Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
Args:
character_description (str): The original user character description to use as the description for generating the voice.
text (str): The generated text to be converted to speech.
Returns:
Tuple[str, str]: A tuple containing:
- generation_id (str): The generation ID returned from the Hume API.
- file_path (str): The relative path to the file where the synthesized audio was saved.
Raises:
HumeError: If there is an error communicating with the Hume TTS API or parsing the response.
"""
logger.debug(
f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. Text length: {len(text)} characters."
)
request_body = {
"utterances": [{"text": text, "description": character_description}]
}
try:
# Synthesize speech using the Hume TTS API
response = requests.post(
url=hume_config.url,
headers=hume_config.headers,
json=request_body,
)
response.raise_for_status()
response_data = response.json()
generations = response_data.get("generations")
if not generations:
msg = "No generations returned by Hume API."
logger.error(msg)
raise HumeError(msg)
# Extract the base64 encoded audio and generation ID from the generation
generation = generations[0]
generation_id = generation.get("generation_id")
base64_audio = generation.get("audio")
filename = f"{generation_id}.mp3"
# Write audio to file and return the relative path
return generation_id, save_base64_audio_to_file(base64_audio, filename)
except Exception as e:
if isinstance(e, HTTPError):
if e.response.status_code >= 400 and e.response.status_code < 500:
raise UnretryableHumeError(
message=f'"{e.response.text}"',
original_exception=e,
) from e
raise HumeError(
message=f"{e}",
original_exception=e,
) from e