Spaces:
Running
Running
zach
commited on
Commit
·
fc85b67
1
Parent(s):
9dc43bf
Fix types in integrations package
Browse files- src/integrations/anthropic_api.py +85 -71
- src/integrations/elevenlabs_api.py +18 -9
- src/integrations/hume_api.py +38 -33
src/integrations/anthropic_api.py
CHANGED
@@ -20,12 +20,12 @@ Functions:
|
|
20 |
|
21 |
# Standard Library Imports
|
22 |
import logging
|
23 |
-
from dataclasses import dataclass
|
24 |
-
from typing import List, Optional, Union
|
25 |
|
26 |
# Third-Party Library Imports
|
27 |
from anthropic import Anthropic, APIError
|
28 |
-
from anthropic.types import Message, ModelParam, TextBlock
|
29 |
from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
|
30 |
|
31 |
# Local Application Imports
|
@@ -33,65 +33,65 @@ from src.config import Config, logger
|
|
33 |
from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
34 |
from src.utils import truncate_text, validate_env_var
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
@dataclass(frozen=True)
|
38 |
class AnthropicConfig:
|
39 |
"""Immutable configuration for interacting with the Anthropic API."""
|
40 |
|
41 |
-
api_key:
|
|
|
42 |
model: ModelParam = "claude-3-5-sonnet-latest"
|
43 |
max_tokens: int = 150
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
def __post_init__(self):
|
49 |
-
# Validate that required attributes are set
|
50 |
-
if not self.api_key:
|
51 |
-
api_key = validate_env_var("ANTHROPIC_API_KEY")
|
52 |
-
object.__setattr__(self, "api_key", api_key)
|
53 |
if not self.model:
|
54 |
raise ValueError("Anthropic Model is not set.")
|
55 |
if not self.max_tokens:
|
56 |
raise ValueError("Anthropic Max Tokens is not set.")
|
57 |
-
if self.system_prompt is None:
|
58 |
-
system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech
|
59 |
-
synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
|
60 |
-
|
61 |
-
CRITICAL LENGTH CONSTRAINTS:
|
62 |
-
|
63 |
-
Maximum length: {self.max_tokens} tokens (approximately 400 characters)
|
64 |
-
You MUST complete all thoughts and sentences
|
65 |
-
Responses should be 25% shorter than you initially plan
|
66 |
-
Never exceed 400 characters total
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
Cut it down to 75% of its original length
|
72 |
-
Reserve the last 100 characters for a proper conclusion
|
73 |
-
If you start running long, immediately wrap up
|
74 |
-
End every piece with a clear conclusion
|
75 |
-
|
76 |
-
Content Requirements:
|
77 |
-
|
78 |
-
Allow natural emotional progression
|
79 |
-
Create an arc of connected moments
|
80 |
-
Use efficient but expressive language
|
81 |
-
Balance description with emotional depth
|
82 |
-
Ensure perfect completion
|
83 |
-
No meta-commentary or formatting
|
84 |
-
|
85 |
-
Structure for Emotional Pieces:
|
86 |
-
|
87 |
-
Opening hook (50-75 characters)
|
88 |
-
Emotional journey (200-250 characters)
|
89 |
-
Resolution (75-100 characters)
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
object.__setattr__(self, "system_prompt", system_prompt)
|
95 |
|
96 |
@property
|
97 |
def client(self) -> Anthropic:
|
@@ -127,7 +127,7 @@ Remember: A shorter, complete response is ALWAYS better than a longer, truncated
|
|
127 |
class AnthropicError(Exception):
|
128 |
"""Custom exception for errors related to the Anthropic API."""
|
129 |
|
130 |
-
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
131 |
super().__init__(message)
|
132 |
self.original_exception = original_exception
|
133 |
self.message = message
|
@@ -136,7 +136,7 @@ class AnthropicError(Exception):
|
|
136 |
class UnretryableAnthropicError(AnthropicError):
|
137 |
"""Custom exception for errors related to the Anthropic API that should not be retried."""
|
138 |
|
139 |
-
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
140 |
super().__init__(message, original_exception)
|
141 |
|
142 |
|
@@ -151,23 +151,29 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
151 |
"""
|
152 |
Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
|
153 |
|
|
|
|
|
|
|
|
|
154 |
Args:
|
155 |
-
character_description (str): The input character description used to assist with generating text
|
|
|
156 |
|
157 |
Returns:
|
158 |
str: The generated text.
|
159 |
|
160 |
Raises:
|
161 |
-
|
|
|
162 |
"""
|
163 |
-
# Build prompt for claude with character description
|
164 |
-
anthropic_config = config.anthropic_config
|
165 |
-
prompt = anthropic_config.build_expressive_prompt(character_description)
|
166 |
-
logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
|
167 |
-
|
168 |
-
response = None
|
169 |
try:
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
response: Message = anthropic_config.client.messages.create(
|
172 |
model=anthropic_config.model,
|
173 |
max_tokens=anthropic_config.max_tokens,
|
@@ -176,17 +182,17 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
176 |
)
|
177 |
logger.debug(f"API response received: {truncate_text(str(response))}")
|
178 |
|
179 |
-
|
180 |
-
if not hasattr(response, "content"):
|
181 |
logger.error("Response is missing 'content'. Response: %s", response)
|
182 |
raise AnthropicError('Invalid API response: Missing "content".')
|
183 |
|
184 |
-
|
185 |
-
|
186 |
if isinstance(blocks, list):
|
187 |
result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
|
188 |
logger.debug(f"Processed response from list: {truncate_text(result)}")
|
189 |
return result
|
|
|
190 |
if isinstance(blocks, TextBlock):
|
191 |
logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
|
192 |
return blocks.text
|
@@ -195,13 +201,21 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
195 |
return str(blocks or "No content generated.")
|
196 |
|
197 |
except Exception as e:
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
raise AnthropicError(
|
205 |
-
message=(
|
206 |
original_exception=e,
|
207 |
) from e
|
|
|
20 |
|
21 |
# Standard Library Imports
|
22 |
import logging
|
23 |
+
from dataclasses import dataclass, field
|
24 |
+
from typing import Any, Dict, List, Optional, Union, cast
|
25 |
|
26 |
# Third-Party Library Imports
|
27 |
from anthropic import Anthropic, APIError
|
28 |
+
from anthropic.types import Message, ModelParam, TextBlock, ToolUseBlock
|
29 |
from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
|
30 |
|
31 |
# Local Application Imports
|
|
|
33 |
from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
34 |
from src.utils import truncate_text, validate_env_var
|
35 |
|
36 |
+
PROMPT_TEMPLATE: str = (
|
37 |
+
"""You are an expert at generating micro-content optimized for text-to-speech synthesis.
|
38 |
+
Your absolute priority is delivering complete, untruncated responses within strict length limits.
|
39 |
+
|
40 |
+
CRITICAL LENGTH CONSTRAINTS:
|
41 |
+
- Maximum length: {max_tokens} tokens (approximately 400 characters)
|
42 |
+
- You MUST complete all thoughts and sentences
|
43 |
+
- Responses should be 25% shorter than you initially plan
|
44 |
+
- Never exceed 400 characters total
|
45 |
+
|
46 |
+
Response Generation Process:
|
47 |
+
- Draft your response mentally first
|
48 |
+
- ut it down to 75% of its original length
|
49 |
+
- Reserve the last 100 characters for a proper conclusion
|
50 |
+
- If you start running long, immediately wrap up
|
51 |
+
- End every piece with a clear conclusion
|
52 |
+
|
53 |
+
Content Requirements:
|
54 |
+
- Allow natural emotional progression
|
55 |
+
- Create an arc of connected moments
|
56 |
+
- Use efficient but expressive language
|
57 |
+
- Balance description with emotional depth
|
58 |
+
- Ensure perfect completion
|
59 |
+
- No meta-commentary or formatting
|
60 |
+
|
61 |
+
Structure for Emotional Pieces:
|
62 |
+
- Opening hook (50-75 characters)
|
63 |
+
- Emotional journey (200-250 characters)
|
64 |
+
- Resolution (75-100 characters)
|
65 |
+
|
66 |
+
MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of
|
67 |
+
where you are in the narrative.
|
68 |
+
|
69 |
+
Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
|
70 |
+
)
|
71 |
|
72 |
@dataclass(frozen=True)
|
73 |
class AnthropicConfig:
|
74 |
"""Immutable configuration for interacting with the Anthropic API."""
|
75 |
|
76 |
+
api_key: str = field(init=False)
|
77 |
+
system_prompt: str = field(init=False)
|
78 |
model: ModelParam = "claude-3-5-sonnet-latest"
|
79 |
max_tokens: int = 150
|
80 |
+
|
81 |
+
def __post_init__(self) -> None:
|
82 |
+
# Validate required non-computed attributes.
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
if not self.model:
|
84 |
raise ValueError("Anthropic Model is not set.")
|
85 |
if not self.max_tokens:
|
86 |
raise ValueError("Anthropic Max Tokens is not set.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
# Compute the API key from the environment.
|
89 |
+
computed_api_key = validate_env_var("ANTHROPIC_API_KEY")
|
90 |
+
object.__setattr__(self, "api_key", computed_api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
+
# Compute the system prompt using max_tokens and other logic.
|
93 |
+
computed_prompt = PROMPT_TEMPLATE.format(max_tokens=self.max_tokens)
|
94 |
+
object.__setattr__(self, "system_prompt", computed_prompt)
|
|
|
95 |
|
96 |
@property
|
97 |
def client(self) -> Anthropic:
|
|
|
127 |
class AnthropicError(Exception):
|
128 |
"""Custom exception for errors related to the Anthropic API."""
|
129 |
|
130 |
+
def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
|
131 |
super().__init__(message)
|
132 |
self.original_exception = original_exception
|
133 |
self.message = message
|
|
|
136 |
class UnretryableAnthropicError(AnthropicError):
|
137 |
"""Custom exception for errors related to the Anthropic API that should not be retried."""
|
138 |
|
139 |
+
def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
|
140 |
super().__init__(message, original_exception)
|
141 |
|
142 |
|
|
|
151 |
"""
|
152 |
Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
|
153 |
|
154 |
+
This function includes retry logic and error translation. It raises a custom
|
155 |
+
UnretryableAnthropicError for API errors deemed unretryable and AnthropicError
|
156 |
+
for other errors.
|
157 |
+
|
158 |
Args:
|
159 |
+
character_description (str): The input character description used to assist with generating text.
|
160 |
+
config (Config): Application configuration including Anthropic settings.
|
161 |
|
162 |
Returns:
|
163 |
str: The generated text.
|
164 |
|
165 |
Raises:
|
166 |
+
UnretryableAnthropicError: For errors that should not be retried.
|
167 |
+
AnthropicError: For other errors communicating with the Anthropic API.
|
168 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
try:
|
170 |
+
anthropic_config = config.anthropic_config
|
171 |
+
prompt = anthropic_config.build_expressive_prompt(character_description)
|
172 |
+
logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
|
173 |
+
|
174 |
+
# Ensure system_prompt is set (guaranteed by __post_init__)
|
175 |
+
assert anthropic_config.system_prompt is not None, "system_prompt must be set."
|
176 |
+
|
177 |
response: Message = anthropic_config.client.messages.create(
|
178 |
model=anthropic_config.model,
|
179 |
max_tokens=anthropic_config.max_tokens,
|
|
|
182 |
)
|
183 |
logger.debug(f"API response received: {truncate_text(str(response))}")
|
184 |
|
185 |
+
if not hasattr(response, "content") or response.content is None:
|
|
|
186 |
logger.error("Response is missing 'content'. Response: %s", response)
|
187 |
raise AnthropicError('Invalid API response: Missing "content".')
|
188 |
|
189 |
+
blocks: Union[List[Union[TextBlock, ToolUseBlock]], TextBlock, None] = response.content
|
190 |
+
|
191 |
if isinstance(blocks, list):
|
192 |
result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
|
193 |
logger.debug(f"Processed response from list: {truncate_text(result)}")
|
194 |
return result
|
195 |
+
|
196 |
if isinstance(blocks, TextBlock):
|
197 |
logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
|
198 |
return blocks.text
|
|
|
201 |
return str(blocks or "No content generated.")
|
202 |
|
203 |
except Exception as e:
|
204 |
+
# If the error is an APIError, check if it's unretryable.
|
205 |
+
if isinstance(e, APIError):
|
206 |
+
status_code: Optional[int] = getattr(e, "status_code", None)
|
207 |
+
if status_code is not None and CLIENT_ERROR_CODE <= status_code < SERVER_ERROR_CODE:
|
208 |
+
error_body: Any = e.body
|
209 |
+
error_message: str = "Unknown error"
|
210 |
+
if isinstance(error_body, dict):
|
211 |
+
error_message = cast(Dict[str, Any], error_body).get("error", {}).get("message", "Unknown error")
|
212 |
+
raise UnretryableAnthropicError(
|
213 |
+
message=f'"{error_message}"',
|
214 |
+
original_exception=e,
|
215 |
+
) from e
|
216 |
+
|
217 |
+
# For all other errors, wrap them in an AnthropicError.
|
218 |
raise AnthropicError(
|
219 |
+
message=str(e),
|
220 |
original_exception=e,
|
221 |
) from e
|
src/integrations/elevenlabs_api.py
CHANGED
@@ -22,7 +22,7 @@ Functions:
|
|
22 |
# Standard Library Imports
|
23 |
import logging
|
24 |
import random
|
25 |
-
from dataclasses import dataclass
|
26 |
from typing import Optional, Tuple
|
27 |
|
28 |
# Third-Party Library Imports
|
@@ -40,14 +40,17 @@ from src.utils import save_base64_audio_to_file, validate_env_var
|
|
40 |
class ElevenLabsConfig:
|
41 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
42 |
|
43 |
-
api_key:
|
44 |
output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
|
45 |
|
46 |
def __post_init__(self):
|
47 |
-
# Validate
|
48 |
-
if not self.
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
|
52 |
@property
|
53 |
def client(self) -> ElevenLabs:
|
@@ -83,7 +86,9 @@ class UnretryableElevenLabsError(ElevenLabsError):
|
|
83 |
after=after_log(logger, logging.DEBUG),
|
84 |
reraise=True,
|
85 |
)
|
86 |
-
def text_to_speech_with_elevenlabs(
|
|
|
|
|
87 |
"""
|
88 |
Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
|
89 |
|
@@ -94,7 +99,7 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
|
|
94 |
Returns:
|
95 |
Tuple[None, str]: A tuple containing:
|
96 |
- generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
|
97 |
-
across TTS integrations
|
98 |
- file_path (str): The relative file path to the audio file where the synthesized speech was saved.
|
99 |
|
100 |
Raises:
|
@@ -129,7 +134,11 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
|
|
129 |
return None, audio_file_path
|
130 |
|
131 |
except Exception as e:
|
132 |
-
if
|
|
|
|
|
|
|
|
|
133 |
raise UnretryableElevenLabsError(
|
134 |
message=f"{e.body['detail']['message']}",
|
135 |
original_exception=e,
|
|
|
22 |
# Standard Library Imports
|
23 |
import logging
|
24 |
import random
|
25 |
+
from dataclasses import dataclass, field
|
26 |
from typing import Optional, Tuple
|
27 |
|
28 |
# Third-Party Library Imports
|
|
|
40 |
class ElevenLabsConfig:
|
41 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
42 |
|
43 |
+
api_key: str = field(init=False)
|
44 |
output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
|
45 |
|
46 |
def __post_init__(self):
|
47 |
+
# Validate required attributes.
|
48 |
+
if not self.output_format:
|
49 |
+
raise ValueError("ElevenLabs TTS API output format is not set.")
|
50 |
+
|
51 |
+
# Compute the API key from the environment.
|
52 |
+
computed_key = validate_env_var("ELEVENLABS_API_KEY")
|
53 |
+
object.__setattr__(self, "api_key", computed_key)
|
54 |
|
55 |
@property
|
56 |
def client(self) -> ElevenLabs:
|
|
|
86 |
after=after_log(logger, logging.DEBUG),
|
87 |
reraise=True,
|
88 |
)
|
89 |
+
def text_to_speech_with_elevenlabs(
|
90 |
+
character_description: str, text: str, config: Config
|
91 |
+
) -> Tuple[None, str]:
|
92 |
"""
|
93 |
Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
|
94 |
|
|
|
99 |
Returns:
|
100 |
Tuple[None, str]: A tuple containing:
|
101 |
- generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
|
102 |
+
across TTS integrations.
|
103 |
- file_path (str): The relative file path to the audio file where the synthesized speech was saved.
|
104 |
|
105 |
Raises:
|
|
|
134 |
return None, audio_file_path
|
135 |
|
136 |
except Exception as e:
|
137 |
+
if (
|
138 |
+
isinstance(e, ApiError)
|
139 |
+
and e.status_code is not None
|
140 |
+
and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
|
141 |
+
):
|
142 |
raise UnretryableElevenLabsError(
|
143 |
message=f"{e.body['detail']['message']}",
|
144 |
original_exception=e,
|
src/integrations/hume_api.py
CHANGED
@@ -20,8 +20,8 @@ Functions:
|
|
20 |
|
21 |
# Standard Library Imports
|
22 |
import logging
|
23 |
-
from dataclasses import dataclass
|
24 |
-
from typing import Any, Dict, Literal,
|
25 |
|
26 |
# Third-Party Library Imports
|
27 |
import requests
|
@@ -34,43 +34,44 @@ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
|
34 |
from src.utils import save_base64_audio_to_file, validate_env_var
|
35 |
|
36 |
HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
|
37 |
-
"""
|
38 |
|
39 |
|
40 |
@dataclass(frozen=True)
|
41 |
class HumeConfig:
|
42 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
url: str = "https://test-api.hume.ai/v0/tts/octave"
|
46 |
-
headers: dict = None
|
47 |
file_format: HumeSupportedFileFormat = "mp3"
|
48 |
|
49 |
-
def __post_init__(self):
|
50 |
-
# Validate required attributes
|
51 |
-
if not self.api_key:
|
52 |
-
api_key = validate_env_var("HUME_API_KEY")
|
53 |
-
object.__setattr__(self, "api_key", api_key)
|
54 |
if not self.url:
|
55 |
raise ValueError("Hume TTS endpoint URL is not set.")
|
56 |
if not self.file_format:
|
57 |
raise ValueError("Hume TTS file format is not set.")
|
58 |
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
68 |
|
69 |
|
70 |
class HumeError(Exception):
|
71 |
"""Custom exception for errors related to the Hume TTS API."""
|
72 |
|
73 |
-
def __init__(self, message: str, original_exception:
|
74 |
super().__init__(message)
|
75 |
self.original_exception = original_exception
|
76 |
self.message = message
|
@@ -79,14 +80,11 @@ class HumeError(Exception):
|
|
79 |
class UnretryableHumeError(HumeError):
|
80 |
"""Custom exception for errors related to the Hume TTS API that should not be retried."""
|
81 |
|
82 |
-
def __init__(self, message: str, original_exception:
|
83 |
-
super().__init__(message)
|
84 |
self.original_exception = original_exception
|
85 |
|
86 |
|
87 |
-
# Initialize the Hume client
|
88 |
-
|
89 |
-
|
90 |
@retry(
|
91 |
stop=stop_after_attempt(3),
|
92 |
wait=wait_fixed(2),
|
@@ -95,7 +93,10 @@ class UnretryableHumeError(HumeError):
|
|
95 |
reraise=True,
|
96 |
)
|
97 |
def text_to_speech_with_hume(
|
98 |
-
character_description: str,
|
|
|
|
|
|
|
99 |
) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
100 |
"""
|
101 |
Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
|
@@ -110,9 +111,10 @@ def text_to_speech_with_hume(
|
|
110 |
character_description (str): A description of the character, which is used as contextual input
|
111 |
for generating the voice.
|
112 |
text (str): The text to be converted to speech.
|
113 |
-
num_generations (int
|
114 |
Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
|
115 |
-
generation is expected in the API response.
|
|
|
116 |
|
117 |
Returns:
|
118 |
Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
@@ -137,9 +139,7 @@ def text_to_speech_with_hume(
|
|
137 |
hume_config = config.hume_config
|
138 |
request_body = {
|
139 |
"utterances": [{"text": text, "description": character_description or None}],
|
140 |
-
"format": {
|
141 |
-
"type": hume_config.file_format,
|
142 |
-
},
|
143 |
"num_generations": num_generations,
|
144 |
}
|
145 |
|
@@ -159,7 +159,7 @@ def text_to_speech_with_hume(
|
|
159 |
logger.error(msg)
|
160 |
raise HumeError(msg)
|
161 |
|
162 |
-
# Extract the base64 encoded audio and generation ID from the generation
|
163 |
generation_a = generations[0]
|
164 |
generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
|
165 |
|
@@ -171,7 +171,11 @@ def text_to_speech_with_hume(
|
|
171 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
172 |
|
173 |
except Exception as e:
|
174 |
-
if
|
|
|
|
|
|
|
|
|
175 |
raise UnretryableHumeError(
|
176 |
message=f"{e.response.text}",
|
177 |
original_exception=e,
|
@@ -197,6 +201,7 @@ def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tup
|
|
197 |
Expected keys are:
|
198 |
- "generation_id" (str): A unique identifier for the generated audio.
|
199 |
- "audio" (str): A base64 encoded string of the audio data.
|
|
|
200 |
|
201 |
Returns:
|
202 |
Tuple[str, str]: A tuple containing:
|
|
|
20 |
|
21 |
# Standard Library Imports
|
22 |
import logging
|
23 |
+
from dataclasses import dataclass, field
|
24 |
+
from typing import Any, Dict, Literal, Tuple, Union
|
25 |
|
26 |
# Third-Party Library Imports
|
27 |
import requests
|
|
|
34 |
from src.utils import save_base64_audio_to_file, validate_env_var
|
35 |
|
36 |
HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
|
37 |
+
"""Supported audio file formats for the Hume TTS API"""
|
38 |
|
39 |
|
40 |
@dataclass(frozen=True)
|
41 |
class HumeConfig:
|
42 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
43 |
|
44 |
+
# Computed fields.
|
45 |
+
api_key: str = field(init=False)
|
46 |
+
headers: Dict[str, str] = field(init=False)
|
47 |
+
|
48 |
+
# Provided fields.
|
49 |
url: str = "https://test-api.hume.ai/v0/tts/octave"
|
|
|
50 |
file_format: HumeSupportedFileFormat = "mp3"
|
51 |
|
52 |
+
def __post_init__(self) -> None:
|
53 |
+
# Validate required attributes.
|
|
|
|
|
|
|
54 |
if not self.url:
|
55 |
raise ValueError("Hume TTS endpoint URL is not set.")
|
56 |
if not self.file_format:
|
57 |
raise ValueError("Hume TTS file format is not set.")
|
58 |
|
59 |
+
# Compute the API key from the environment.
|
60 |
+
computed_api_key = validate_env_var("HUME_API_KEY")
|
61 |
+
object.__setattr__(self, "api_key", computed_api_key)
|
62 |
+
|
63 |
+
# Compute the headers.
|
64 |
+
computed_headers = {
|
65 |
+
"X-Hume-Api-Key": f"{computed_api_key}",
|
66 |
+
"Content-Type": "application/json",
|
67 |
+
}
|
68 |
+
object.__setattr__(self, "headers", computed_headers)
|
69 |
|
70 |
|
71 |
class HumeError(Exception):
|
72 |
"""Custom exception for errors related to the Hume TTS API."""
|
73 |
|
74 |
+
def __init__(self, message: str, original_exception: Union[Exception, None] = None):
|
75 |
super().__init__(message)
|
76 |
self.original_exception = original_exception
|
77 |
self.message = message
|
|
|
80 |
class UnretryableHumeError(HumeError):
|
81 |
"""Custom exception for errors related to the Hume TTS API that should not be retried."""
|
82 |
|
83 |
+
def __init__(self, message: str, original_exception: Union[Exception, None] = None):
|
84 |
+
super().__init__(message, original_exception)
|
85 |
self.original_exception = original_exception
|
86 |
|
87 |
|
|
|
|
|
|
|
88 |
@retry(
|
89 |
stop=stop_after_attempt(3),
|
90 |
wait=wait_fixed(2),
|
|
|
93 |
reraise=True,
|
94 |
)
|
95 |
def text_to_speech_with_hume(
|
96 |
+
character_description: str,
|
97 |
+
text: str,
|
98 |
+
num_generations: int,
|
99 |
+
config: Config,
|
100 |
) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
101 |
"""
|
102 |
Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
|
|
|
111 |
character_description (str): A description of the character, which is used as contextual input
|
112 |
for generating the voice.
|
113 |
text (str): The text to be converted to speech.
|
114 |
+
num_generations (int): The number of audio generations to request from the API.
|
115 |
Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
|
116 |
+
generation is expected in the API response.
|
117 |
+
config (Config): The application configuration containing Hume API settings.
|
118 |
|
119 |
Returns:
|
120 |
Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
|
|
139 |
hume_config = config.hume_config
|
140 |
request_body = {
|
141 |
"utterances": [{"text": text, "description": character_description or None}],
|
142 |
+
"format": {"type": hume_config.file_format},
|
|
|
|
|
143 |
"num_generations": num_generations,
|
144 |
}
|
145 |
|
|
|
159 |
logger.error(msg)
|
160 |
raise HumeError(msg)
|
161 |
|
162 |
+
# Extract the base64 encoded audio and generation ID from the generation.
|
163 |
generation_a = generations[0]
|
164 |
generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
|
165 |
|
|
|
171 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
172 |
|
173 |
except Exception as e:
|
174 |
+
if (
|
175 |
+
isinstance(e, HTTPError)
|
176 |
+
and e.response is not None
|
177 |
+
and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
|
178 |
+
):
|
179 |
raise UnretryableHumeError(
|
180 |
message=f"{e.response.text}",
|
181 |
original_exception=e,
|
|
|
201 |
Expected keys are:
|
202 |
- "generation_id" (str): A unique identifier for the generated audio.
|
203 |
- "audio" (str): A base64 encoded string of the audio data.
|
204 |
+
config (Config): The application configuration used for saving the audio file.
|
205 |
|
206 |
Returns:
|
207 |
Tuple[str, str]: A tuple containing:
|