Spaces:
Running
Running
zach
commited on
Commit
·
e91a94a
1
Parent(s):
7854f13
Clean up integration code
Browse files
src/integrations/elevenlabs_api.py
CHANGED
@@ -15,6 +15,7 @@ Key Features:
|
|
15 |
# Standard Library Imports
|
16 |
import logging
|
17 |
import random
|
|
|
18 |
from dataclasses import dataclass, field
|
19 |
from typing import Optional, Tuple
|
20 |
|
@@ -102,28 +103,26 @@ async def text_to_speech_with_elevenlabs(
|
|
102 |
"""
|
103 |
logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
|
104 |
elevenlabs_config = config.elevenlabs_config
|
105 |
-
|
|
|
106 |
try:
|
107 |
-
|
108 |
-
response = await elevenlabs_config.client.text_to_voice.create_previews(
|
109 |
voice_description=character_description,
|
110 |
text=text,
|
111 |
output_format=elevenlabs_config.output_format,
|
112 |
)
|
113 |
|
|
|
|
|
|
|
114 |
previews = response.previews
|
115 |
if not previews:
|
116 |
-
|
117 |
-
logger.error(msg)
|
118 |
-
raise ElevenLabsError(message=msg)
|
119 |
|
120 |
-
# Extract the base64 encoded audio and generated voice ID from the preview
|
121 |
preview = random.choice(previews)
|
122 |
generated_voice_id = preview.generated_voice_id
|
123 |
base64_audio = preview.audio_base_64
|
124 |
filename = f"{generated_voice_id}.mp3"
|
125 |
-
|
126 |
-
# Write audio to file and return the relative path
|
127 |
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
128 |
|
129 |
return None, audio_file_path
|
|
|
15 |
# Standard Library Imports
|
16 |
import logging
|
17 |
import random
|
18 |
+
import time
|
19 |
from dataclasses import dataclass, field
|
20 |
from typing import Optional, Tuple
|
21 |
|
|
|
103 |
"""
|
104 |
logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
|
105 |
elevenlabs_config = config.elevenlabs_config
|
106 |
+
client = elevenlabs_config.client
|
107 |
+
start_time = time.time()
|
108 |
try:
|
109 |
+
response = await client.text_to_voice.create_previews(
|
|
|
110 |
voice_description=character_description,
|
111 |
text=text,
|
112 |
output_format=elevenlabs_config.output_format,
|
113 |
)
|
114 |
|
115 |
+
elapsed_time = time.time() - start_time
|
116 |
+
logger.info(f"Elevenlabs API request completed in {elapsed_time:.2f} seconds")
|
117 |
+
|
118 |
previews = response.previews
|
119 |
if not previews:
|
120 |
+
raise ElevenLabsError(message="No previews returned by ElevenLabs API.")
|
|
|
|
|
121 |
|
|
|
122 |
preview = random.choice(previews)
|
123 |
generated_voice_id = preview.generated_voice_id
|
124 |
base64_audio = preview.audio_base_64
|
125 |
filename = f"{generated_voice_id}.mp3"
|
|
|
|
|
126 |
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
127 |
|
128 |
return None, audio_file_path
|
src/integrations/hume_api.py
CHANGED
@@ -21,8 +21,7 @@ from typing import Tuple, Union
|
|
21 |
from hume import AsyncHumeClient
|
22 |
from hume.core.api_error import ApiError
|
23 |
from hume.tts import PostedUtterance
|
24 |
-
from hume.tts.types import
|
25 |
-
from hume.tts.types.format import Format, FormatMp3
|
26 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
27 |
|
28 |
# Local Application Imports
|
@@ -47,6 +46,19 @@ class HumeConfig:
|
|
47 |
computed_api_key = validate_env_var("HUME_API_KEY")
|
48 |
object.__setattr__(self, "api_key", computed_api_key)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
class HumeError(Exception):
|
52 |
"""Custom exception for errors related to the Hume TTS API."""
|
@@ -100,27 +112,13 @@ async def text_to_speech_with_hume(
|
|
100 |
HumeError: For errors communicating with the Hume API.
|
101 |
UnretryableHumeError: For client-side HTTP errors (status code 4xx).
|
102 |
"""
|
103 |
-
logger.debug(
|
104 |
-
"Processing TTS with Hume. "
|
105 |
-
f"Character description length: {len(character_description)}. "
|
106 |
-
f"Text length: {len(text)}."
|
107 |
-
)
|
108 |
-
|
109 |
hume_config = config.hume_config
|
110 |
-
|
111 |
start_time = time.time()
|
112 |
try:
|
113 |
-
|
114 |
-
|
115 |
-
timeout=hume_config.request_timeout
|
116 |
-
)
|
117 |
-
|
118 |
-
utterance = PostedUtterance(
|
119 |
-
text=text,
|
120 |
-
description=character_description or None
|
121 |
-
)
|
122 |
-
|
123 |
-
response: ReturnTts = await hume_client.tts.synthesize_json(
|
124 |
utterances=[utterance],
|
125 |
format=hume_config.file_format,
|
126 |
)
|
@@ -130,14 +128,15 @@ async def text_to_speech_with_hume(
|
|
130 |
|
131 |
generations = response.generations
|
132 |
if not generations:
|
133 |
-
|
134 |
-
logger.error(msg)
|
135 |
-
raise HumeError(msg)
|
136 |
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
139 |
|
140 |
-
return
|
141 |
|
142 |
except ApiError as e:
|
143 |
elapsed_time = time.time() - start_time
|
@@ -145,11 +144,7 @@ async def text_to_speech_with_hume(
|
|
145 |
clean_message = _extract_hume_api_error_message(e)
|
146 |
logger.error(f"Full Hume API error: {e!s}")
|
147 |
|
148 |
-
if
|
149 |
-
hasattr(e, 'status_code')
|
150 |
-
and e.status_code is not None
|
151 |
-
and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
|
152 |
-
):
|
153 |
raise UnretryableHumeError(message=clean_message, original_exception=e) from e
|
154 |
|
155 |
raise HumeError(message=clean_message, original_exception=e) from e
|
@@ -158,33 +153,9 @@ async def text_to_speech_with_hume(
|
|
158 |
error_type = type(e).__name__
|
159 |
error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
|
160 |
logger.error("Error during Hume API call: %s - %s", error_type, error_message)
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
def _parse_hume_tts_generation(generation: ReturnGeneration, config: Config) -> Tuple[str, str]:
|
165 |
-
"""
|
166 |
-
Parses a Hume TTS generation response and saves the decoded audio as an MP3 file.
|
167 |
-
|
168 |
-
Args:
|
169 |
-
generation (ReturnGeneration): TTS generation response containing generation_id and audio.
|
170 |
-
config (Config): Application configuration for saving the audio file.
|
171 |
-
|
172 |
-
Returns:
|
173 |
-
Tuple[str, str]: (generation_id, audio_path)
|
174 |
|
175 |
-
|
176 |
-
KeyError: If expected attributes are missing.
|
177 |
-
Exception: Propagates exceptions from saving the audio file.
|
178 |
-
"""
|
179 |
-
if not generation.generation_id:
|
180 |
-
raise KeyError("The generation is missing the generation_id.")
|
181 |
-
|
182 |
-
if not generation.audio:
|
183 |
-
raise KeyError("The generation is missing the audio data.")
|
184 |
-
|
185 |
-
filename = f"{generation.generation_id}.mp3"
|
186 |
-
audio_file_path = save_base64_audio_to_file(generation.audio, filename, config)
|
187 |
-
return generation.generation_id, audio_file_path
|
188 |
|
189 |
|
190 |
def _extract_hume_api_error_message(e: ApiError) -> str:
|
|
|
21 |
from hume import AsyncHumeClient
|
22 |
from hume.core.api_error import ApiError
|
23 |
from hume.tts import PostedUtterance
|
24 |
+
from hume.tts.types import Format, FormatMp3, ReturnTts
|
|
|
25 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
26 |
|
27 |
# Local Application Imports
|
|
|
46 |
computed_api_key = validate_env_var("HUME_API_KEY")
|
47 |
object.__setattr__(self, "api_key", computed_api_key)
|
48 |
|
49 |
+
@property
|
50 |
+
def client(self) -> AsyncHumeClient:
|
51 |
+
"""
|
52 |
+
Lazy initialization of the asynchronous Hume client.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
AsyncHumeClient: Configured async client instance.
|
56 |
+
"""
|
57 |
+
return AsyncHumeClient(
|
58 |
+
api_key=self.api_key,
|
59 |
+
timeout=self.request_timeout
|
60 |
+
)
|
61 |
+
|
62 |
|
63 |
class HumeError(Exception):
|
64 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
|
112 |
HumeError: For errors communicating with the Hume API.
|
113 |
UnretryableHumeError: For client-side HTTP errors (status code 4xx).
|
114 |
"""
|
115 |
+
logger.debug(f"Synthesizing speech with Hume. Text length: {len(text)} characters.")
|
|
|
|
|
|
|
|
|
|
|
116 |
hume_config = config.hume_config
|
117 |
+
client = hume_config.client
|
118 |
start_time = time.time()
|
119 |
try:
|
120 |
+
utterance = PostedUtterance(text=text, description=character_description)
|
121 |
+
response: ReturnTts = await client.tts.synthesize_json(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
utterances=[utterance],
|
123 |
format=hume_config.file_format,
|
124 |
)
|
|
|
128 |
|
129 |
generations = response.generations
|
130 |
if not generations:
|
131 |
+
raise HumeError("No generations returned by Hume API.")
|
|
|
|
|
132 |
|
133 |
+
generation = generations[0]
|
134 |
+
generation_id = generation.generation_id
|
135 |
+
base64_audio = generation.audio
|
136 |
+
filename = f"{generation_id}.mp3"
|
137 |
+
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
138 |
|
139 |
+
return generation_id, audio_file_path
|
140 |
|
141 |
except ApiError as e:
|
142 |
elapsed_time = time.time() - start_time
|
|
|
144 |
clean_message = _extract_hume_api_error_message(e)
|
145 |
logger.error(f"Full Hume API error: {e!s}")
|
146 |
|
147 |
+
if e.status_code is not None and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE:
|
|
|
|
|
|
|
|
|
148 |
raise UnretryableHumeError(message=clean_message, original_exception=e) from e
|
149 |
|
150 |
raise HumeError(message=clean_message, original_exception=e) from e
|
|
|
153 |
error_type = type(e).__name__
|
154 |
error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
|
155 |
logger.error("Error during Hume API call: %s - %s", error_type, error_message)
|
156 |
+
clean_message = "An unexpected error occurred while processing your speech request. Please try again later."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
+
raise HumeError(message=clean_message, original_exception=e) from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
|
161 |
def _extract_hume_api_error_message(e: ApiError) -> str:
|