File size: 6,368 Bytes
adecb62
a375dbf
e1385f3
fc85b67
bf6610d
5a007ca
adecb62
bf6610d
 
1a6c67a
c0a60aa
5a007ca
adecb62
5ed9749
 
0e508c8
adecb62
 
 
e9bcee8
d1ed6b1
fc85b67
bf6610d
a35c804
adecb62
fc85b67
bf6610d
0e508c8
 
e9bcee8
fc85b67
 
adecb62
e91a94a
 
 
 
 
 
 
 
 
 
 
 
 
a6d4367
adecb62
d1ed6b1
fc85b67
2f050a8
 
 
 
 
 
 
fc85b67
 
adecb62
104737f
adecb62
a5cafbd
104737f
c0a60aa
 
a5cafbd
 
d1ed6b1
a5cafbd
104737f
fc85b67
 
 
7854f13
adecb62
104737f
adecb62
bf6610d
7854f13
 
ba3994f
adecb62
104737f
 
 
adecb62
 
7854f13
 
 
adecb62
 
104737f
 
adecb62
e91a94a
1ed6720
e91a94a
e1385f3
adecb62
e91a94a
 
bf6610d
 
 
 
 
5ed9749
bf6610d
 
d4b2b49
e91a94a
0e508c8
e91a94a
 
 
 
 
0e508c8
e91a94a
0e508c8
bf6610d
 
e1385f3
5ed9749
7854f13
104737f
5ed9749
fe85e28
 
 
 
 
7854f13
 
ba3994f
104737f
 
 
 
2192d9b
ba3994f
e91a94a
7854f13
5ed9749
7854f13
 
 
 
 
 
 
 
 
2192d9b
7854f13
323c87d
 
7854f13
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# Standard Library Imports
import logging
import time
from dataclasses import dataclass, field
from typing import Tuple, Union

# Third-Party Library Imports
from hume import AsyncHumeClient
from hume.core.api_error import ApiError
from hume.tts.types import Format, FormatMp3, PostedUtterance, ReturnTts
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_fixed

# Local Application Imports
from src.common import Config, logger, save_base64_audio_to_file, validate_env_var
from src.common.constants import CLIENT_ERROR_CODE, GENERIC_API_ERROR_MESSAGE, RATE_LIMIT_ERROR_CODE, SERVER_ERROR_CODE


@dataclass(frozen=True)
class HumeConfig:
    """Immutable configuration for interacting with the Hume TTS API."""

    api_key: str = field(init=False)
    file_format: Format = field(default_factory=FormatMp3)
    request_timeout: float = 40.0

    def __post_init__(self) -> None:
        """Validate required attributes and set computed fields."""
        if not self.file_format:
            raise ValueError("Hume TTS file format is not set.")

        computed_api_key = validate_env_var("HUME_API_KEY")
        object.__setattr__(self, "api_key", computed_api_key)

    @property
    def client(self) -> AsyncHumeClient:
        """
        Lazy initialization of the asynchronous Hume client.

        Returns:
            AsyncHumeClient: Configured async client instance.
        """
        return AsyncHumeClient(
            api_key=self.api_key,
            timeout=self.request_timeout
        )

class HumeError(Exception):
    """Custom exception for errors related to the Hume TTS API."""

    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
        super().__init__(message)
        self.original_exception = original_exception
        self.message = message

class UnretryableHumeError(HumeError):
    """Custom exception for errors related to the Hume TTS API that should not be retried."""

    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
        super().__init__(message, original_exception)
        self.original_exception = original_exception
        self.message = message

@retry(
    retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
    stop=stop_after_attempt(2),
    wait=wait_fixed(2),
    before=before_log(logger, logging.DEBUG),
    after=after_log(logger, logging.DEBUG),
    reraise=True,
)
async def text_to_speech_with_hume(
    character_description: str,
    text: str,
    config: Config,
) -> Tuple[str, str]:
    """
    Asynchronously synthesizes speech using the Hume TTS API, processes audio data, and writes audio to a file.

    This function uses the Hume Python SDK to send a request to the Hume TTS API with a character description
    and text to be converted to speech. It extracts the base64-encoded audio and generation ID from the response,
    saves the audio as an MP3 file, and returns the relevant details.

    Args:
        character_description (str): Description used for voice synthesis.
        text (str): Text to be converted to speech.
        config (Config): Application configuration containing Hume API settings.

    Returns:
        Tuple[str, str]: A tuple containing:
            - generation_id (str): Unique identifier for the generated audio.
            - audio_file_path (str): Path to the saved audio file.

    Raises:
        HumeError: For errors communicating with the Hume API.
        UnretryableHumeError: For client-side HTTP errors (status code 4xx).
    """
    logger.debug(f"Synthesizing speech with Hume. Text length: {len(text)} characters.")
    hume_config = config.hume_config
    client = hume_config.client
    start_time = time.time()
    try:
        utterance = PostedUtterance(text=text, description=character_description)
        response: ReturnTts = await client.tts.synthesize_json(
            utterances=[utterance],
            format=hume_config.file_format,
        )

        elapsed_time = time.time() - start_time
        logger.info(f"Hume API request completed in {elapsed_time:.2f} seconds.")

        generations = response.generations
        if not generations:
            raise HumeError("No generations returned by Hume API.")

        generation = generations[0]
        generation_id = generation.generation_id
        base64_audio = generation.audio
        filename = f"{generation_id}.mp3"
        audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)

        return generation_id, audio_file_path

    except ApiError as e:
        elapsed_time = time.time() - start_time
        logger.error(f"Hume API request failed after {elapsed_time:.2f} seconds: {e!s}")
        clean_message = __extract_hume_api_error_message(e)
        logger.error(f"Full Hume API error: {e!s}")

        if hasattr(e, 'status_code') and e.status_code is not None:
            if e.status_code == RATE_LIMIT_ERROR_CODE:
                rate_limit_error_message = "We're working on scaling capacity. Please try again in a few seconds."
                raise HumeError(message=rate_limit_error_message, original_exception=e) from e
            if CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE:
                raise UnretryableHumeError(message=clean_message, original_exception=e) from e

        raise HumeError(message=clean_message, original_exception=e) from e

    except Exception as e:
        error_type = type(e).__name__
        error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
        logger.error("Error during Hume API call: %s - %s", error_type, error_message)
        clean_message = GENERIC_API_ERROR_MESSAGE

        raise HumeError(message=clean_message, original_exception=e) from e

def __extract_hume_api_error_message(e: ApiError) -> str:
    """
    Extracts a clean, user-friendly error message from a Hume API error response.

    Args:
        e (ApiError): The Hume API error exception containing response information.

    Returns:
        str: A clean, user-friendly error message suitable for display to end users.
    """
    clean_message = GENERIC_API_ERROR_MESSAGE

    if hasattr(e, "body") and isinstance(e.body, dict) and "message" in e.body:
        clean_message = e.body["message"]

    return clean_message