zach commited on
Commit
fc85b67
·
1 Parent(s): 9dc43bf

Fix types in integrations package

Browse files
src/integrations/anthropic_api.py CHANGED
@@ -20,12 +20,12 @@ Functions:
20
 
21
  # Standard Library Imports
22
  import logging
23
- from dataclasses import dataclass
24
- from typing import List, Optional, Union
25
 
26
  # Third-Party Library Imports
27
  from anthropic import Anthropic, APIError
28
- from anthropic.types import Message, ModelParam, TextBlock
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
@@ -33,65 +33,65 @@ from src.config import Config, logger
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
  from src.utils import truncate_text, validate_env_var
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  @dataclass(frozen=True)
38
  class AnthropicConfig:
39
  """Immutable configuration for interacting with the Anthropic API."""
40
 
41
- api_key: Optional[str] = None
 
42
  model: ModelParam = "claude-3-5-sonnet-latest"
43
  max_tokens: int = 150
44
- system_prompt: Optional[str] = (
45
- None # system prompt is set post initialization, since self.max_tokens is leveraged in the prompt.
46
- )
47
-
48
- def __post_init__(self):
49
- # Validate that required attributes are set
50
- if not self.api_key:
51
- api_key = validate_env_var("ANTHROPIC_API_KEY")
52
- object.__setattr__(self, "api_key", api_key)
53
  if not self.model:
54
  raise ValueError("Anthropic Model is not set.")
55
  if not self.max_tokens:
56
  raise ValueError("Anthropic Max Tokens is not set.")
57
- if self.system_prompt is None:
58
- system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech
59
- synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
60
-
61
- CRITICAL LENGTH CONSTRAINTS:
62
-
63
- Maximum length: {self.max_tokens} tokens (approximately 400 characters)
64
- You MUST complete all thoughts and sentences
65
- Responses should be 25% shorter than you initially plan
66
- Never exceed 400 characters total
67
 
68
- Response Generation Process:
69
-
70
- Draft your response mentally first
71
- Cut it down to 75% of its original length
72
- Reserve the last 100 characters for a proper conclusion
73
- If you start running long, immediately wrap up
74
- End every piece with a clear conclusion
75
-
76
- Content Requirements:
77
-
78
- Allow natural emotional progression
79
- Create an arc of connected moments
80
- Use efficient but expressive language
81
- Balance description with emotional depth
82
- Ensure perfect completion
83
- No meta-commentary or formatting
84
-
85
- Structure for Emotional Pieces:
86
-
87
- Opening hook (50-75 characters)
88
- Emotional journey (200-250 characters)
89
- Resolution (75-100 characters)
90
 
91
- MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of where you
92
- are in the narrative.
93
- Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
94
- object.__setattr__(self, "system_prompt", system_prompt)
95
 
96
  @property
97
  def client(self) -> Anthropic:
@@ -127,7 +127,7 @@ Remember: A shorter, complete response is ALWAYS better than a longer, truncated
127
  class AnthropicError(Exception):
128
  """Custom exception for errors related to the Anthropic API."""
129
 
130
- def __init__(self, message: str, original_exception: Optional[Exception] = None):
131
  super().__init__(message)
132
  self.original_exception = original_exception
133
  self.message = message
@@ -136,7 +136,7 @@ class AnthropicError(Exception):
136
  class UnretryableAnthropicError(AnthropicError):
137
  """Custom exception for errors related to the Anthropic API that should not be retried."""
138
 
139
- def __init__(self, message: str, original_exception: Optional[Exception] = None):
140
  super().__init__(message, original_exception)
141
 
142
 
@@ -151,23 +151,29 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
151
  """
152
  Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
153
 
 
 
 
 
154
  Args:
155
- character_description (str): The input character description used to assist with generating text with Claude.
 
156
 
157
  Returns:
158
  str: The generated text.
159
 
160
  Raises:
161
- AnthropicError: If there is an error communicating with the Anthropic API.
 
162
  """
163
- # Build prompt for claude with character description
164
- anthropic_config = config.anthropic_config
165
- prompt = anthropic_config.build_expressive_prompt(character_description)
166
- logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
167
-
168
- response = None
169
  try:
170
- # Generate text using the Anthropic SDK
 
 
 
 
 
 
171
  response: Message = anthropic_config.client.messages.create(
172
  model=anthropic_config.model,
173
  max_tokens=anthropic_config.max_tokens,
@@ -176,17 +182,17 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
176
  )
177
  logger.debug(f"API response received: {truncate_text(str(response))}")
178
 
179
- # Validate response
180
- if not hasattr(response, "content"):
181
  logger.error("Response is missing 'content'. Response: %s", response)
182
  raise AnthropicError('Invalid API response: Missing "content".')
183
 
184
- # Process response
185
- blocks: Union[List[TextBlock], TextBlock, None] = response.content
186
  if isinstance(blocks, list):
187
  result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
188
  logger.debug(f"Processed response from list: {truncate_text(result)}")
189
  return result
 
190
  if isinstance(blocks, TextBlock):
191
  logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
192
  return blocks.text
@@ -195,13 +201,21 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
195
  return str(blocks or "No content generated.")
196
 
197
  except Exception as e:
198
- if isinstance(e, APIError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
199
- raise UnretryableAnthropicError(
200
- message=f'"{e.body["error"]["message"]}"',
201
- original_exception=e,
202
- ) from e
203
-
 
 
 
 
 
 
 
 
204
  raise AnthropicError(
205
- message=(f"{e.message}"),
206
  original_exception=e,
207
  ) from e
 
20
 
21
  # Standard Library Imports
22
  import logging
23
+ from dataclasses import dataclass, field
24
+ from typing import Any, Dict, List, Optional, Union, cast
25
 
26
  # Third-Party Library Imports
27
  from anthropic import Anthropic, APIError
28
+ from anthropic.types import Message, ModelParam, TextBlock, ToolUseBlock
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
 
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
  from src.utils import truncate_text, validate_env_var
35
 
36
+ PROMPT_TEMPLATE: str = (
37
+ """You are an expert at generating micro-content optimized for text-to-speech synthesis.
38
+ Your absolute priority is delivering complete, untruncated responses within strict length limits.
39
+
40
+ CRITICAL LENGTH CONSTRAINTS:
41
+ - Maximum length: {max_tokens} tokens (approximately 400 characters)
42
+ - You MUST complete all thoughts and sentences
43
+ - Responses should be 25% shorter than you initially plan
44
+ - Never exceed 400 characters total
45
+
46
+ Response Generation Process:
47
+ - Draft your response mentally first
48
+ - ut it down to 75% of its original length
49
+ - Reserve the last 100 characters for a proper conclusion
50
+ - If you start running long, immediately wrap up
51
+ - End every piece with a clear conclusion
52
+
53
+ Content Requirements:
54
+ - Allow natural emotional progression
55
+ - Create an arc of connected moments
56
+ - Use efficient but expressive language
57
+ - Balance description with emotional depth
58
+ - Ensure perfect completion
59
+ - No meta-commentary or formatting
60
+
61
+ Structure for Emotional Pieces:
62
+ - Opening hook (50-75 characters)
63
+ - Emotional journey (200-250 characters)
64
+ - Resolution (75-100 characters)
65
+
66
+ MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of
67
+ where you are in the narrative.
68
+
69
+ Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
70
+ )
71
 
72
  @dataclass(frozen=True)
73
  class AnthropicConfig:
74
  """Immutable configuration for interacting with the Anthropic API."""
75
 
76
+ api_key: str = field(init=False)
77
+ system_prompt: str = field(init=False)
78
  model: ModelParam = "claude-3-5-sonnet-latest"
79
  max_tokens: int = 150
80
+
81
+ def __post_init__(self) -> None:
82
+ # Validate required non-computed attributes.
 
 
 
 
 
 
83
  if not self.model:
84
  raise ValueError("Anthropic Model is not set.")
85
  if not self.max_tokens:
86
  raise ValueError("Anthropic Max Tokens is not set.")
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # Compute the API key from the environment.
89
+ computed_api_key = validate_env_var("ANTHROPIC_API_KEY")
90
+ object.__setattr__(self, "api_key", computed_api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # Compute the system prompt using max_tokens and other logic.
93
+ computed_prompt = PROMPT_TEMPLATE.format(max_tokens=self.max_tokens)
94
+ object.__setattr__(self, "system_prompt", computed_prompt)
 
95
 
96
  @property
97
  def client(self) -> Anthropic:
 
127
  class AnthropicError(Exception):
128
  """Custom exception for errors related to the Anthropic API."""
129
 
130
+ def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
131
  super().__init__(message)
132
  self.original_exception = original_exception
133
  self.message = message
 
136
  class UnretryableAnthropicError(AnthropicError):
137
  """Custom exception for errors related to the Anthropic API that should not be retried."""
138
 
139
+ def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
140
  super().__init__(message, original_exception)
141
 
142
 
 
151
  """
152
  Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
153
 
154
+ This function includes retry logic and error translation. It raises a custom
155
+ UnretryableAnthropicError for API errors deemed unretryable and AnthropicError
156
+ for other errors.
157
+
158
  Args:
159
+ character_description (str): The input character description used to assist with generating text.
160
+ config (Config): Application configuration including Anthropic settings.
161
 
162
  Returns:
163
  str: The generated text.
164
 
165
  Raises:
166
+ UnretryableAnthropicError: For errors that should not be retried.
167
+ AnthropicError: For other errors communicating with the Anthropic API.
168
  """
 
 
 
 
 
 
169
  try:
170
+ anthropic_config = config.anthropic_config
171
+ prompt = anthropic_config.build_expressive_prompt(character_description)
172
+ logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
173
+
174
+ # Ensure system_prompt is set (guaranteed by __post_init__)
175
+ assert anthropic_config.system_prompt is not None, "system_prompt must be set."
176
+
177
  response: Message = anthropic_config.client.messages.create(
178
  model=anthropic_config.model,
179
  max_tokens=anthropic_config.max_tokens,
 
182
  )
183
  logger.debug(f"API response received: {truncate_text(str(response))}")
184
 
185
+ if not hasattr(response, "content") or response.content is None:
 
186
  logger.error("Response is missing 'content'. Response: %s", response)
187
  raise AnthropicError('Invalid API response: Missing "content".')
188
 
189
+ blocks: Union[List[Union[TextBlock, ToolUseBlock]], TextBlock, None] = response.content
190
+
191
  if isinstance(blocks, list):
192
  result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
193
  logger.debug(f"Processed response from list: {truncate_text(result)}")
194
  return result
195
+
196
  if isinstance(blocks, TextBlock):
197
  logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
198
  return blocks.text
 
201
  return str(blocks or "No content generated.")
202
 
203
  except Exception as e:
204
+ # If the error is an APIError, check if it's unretryable.
205
+ if isinstance(e, APIError):
206
+ status_code: Optional[int] = getattr(e, "status_code", None)
207
+ if status_code is not None and CLIENT_ERROR_CODE <= status_code < SERVER_ERROR_CODE:
208
+ error_body: Any = e.body
209
+ error_message: str = "Unknown error"
210
+ if isinstance(error_body, dict):
211
+ error_message = cast(Dict[str, Any], error_body).get("error", {}).get("message", "Unknown error")
212
+ raise UnretryableAnthropicError(
213
+ message=f'"{error_message}"',
214
+ original_exception=e,
215
+ ) from e
216
+
217
+ # For all other errors, wrap them in an AnthropicError.
218
  raise AnthropicError(
219
+ message=str(e),
220
  original_exception=e,
221
  ) from e
src/integrations/elevenlabs_api.py CHANGED
@@ -22,7 +22,7 @@ Functions:
22
  # Standard Library Imports
23
  import logging
24
  import random
25
- from dataclasses import dataclass
26
  from typing import Optional, Tuple
27
 
28
  # Third-Party Library Imports
@@ -40,14 +40,17 @@ from src.utils import save_base64_audio_to_file, validate_env_var
40
  class ElevenLabsConfig:
41
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
42
 
43
- api_key: Optional[str] = None
44
  output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
45
 
46
  def __post_init__(self):
47
- # Validate that required attributes are set
48
- if not self.api_key:
49
- api_key = validate_env_var("ELEVENLABS_API_KEY")
50
- object.__setattr__(self, "api_key", api_key)
 
 
 
51
 
52
  @property
53
  def client(self) -> ElevenLabs:
@@ -83,7 +86,9 @@ class UnretryableElevenLabsError(ElevenLabsError):
83
  after=after_log(logger, logging.DEBUG),
84
  reraise=True,
85
  )
86
- def text_to_speech_with_elevenlabs(character_description: str, text: str, config: Config) -> Tuple[None, str]:
 
 
87
  """
88
  Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
89
 
@@ -94,7 +99,7 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
94
  Returns:
95
  Tuple[None, str]: A tuple containing:
96
  - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
97
- across TTS integrations
98
  - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
99
 
100
  Raises:
@@ -129,7 +134,11 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
129
  return None, audio_file_path
130
 
131
  except Exception as e:
132
- if isinstance(e, ApiError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
 
 
 
 
133
  raise UnretryableElevenLabsError(
134
  message=f"{e.body['detail']['message']}",
135
  original_exception=e,
 
22
  # Standard Library Imports
23
  import logging
24
  import random
25
+ from dataclasses import dataclass, field
26
  from typing import Optional, Tuple
27
 
28
  # Third-Party Library Imports
 
40
  class ElevenLabsConfig:
41
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
42
 
43
+ api_key: str = field(init=False)
44
  output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
45
 
46
  def __post_init__(self):
47
+ # Validate required attributes.
48
+ if not self.output_format:
49
+ raise ValueError("ElevenLabs TTS API output format is not set.")
50
+
51
+ # Compute the API key from the environment.
52
+ computed_key = validate_env_var("ELEVENLABS_API_KEY")
53
+ object.__setattr__(self, "api_key", computed_key)
54
 
55
  @property
56
  def client(self) -> ElevenLabs:
 
86
  after=after_log(logger, logging.DEBUG),
87
  reraise=True,
88
  )
89
+ def text_to_speech_with_elevenlabs(
90
+ character_description: str, text: str, config: Config
91
+ ) -> Tuple[None, str]:
92
  """
93
  Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
94
 
 
99
  Returns:
100
  Tuple[None, str]: A tuple containing:
101
  - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
102
+ across TTS integrations.
103
  - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
104
 
105
  Raises:
 
134
  return None, audio_file_path
135
 
136
  except Exception as e:
137
+ if (
138
+ isinstance(e, ApiError)
139
+ and e.status_code is not None
140
+ and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
141
+ ):
142
  raise UnretryableElevenLabsError(
143
  message=f"{e.body['detail']['message']}",
144
  original_exception=e,
src/integrations/hume_api.py CHANGED
@@ -20,8 +20,8 @@ Functions:
20
 
21
  # Standard Library Imports
22
  import logging
23
- from dataclasses import dataclass
24
- from typing import Any, Dict, Literal, Optional, Tuple, Union
25
 
26
  # Third-Party Library Imports
27
  import requests
@@ -34,43 +34,44 @@ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
  from src.utils import save_base64_audio_to_file, validate_env_var
35
 
36
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
37
- """ Support audio file formats for the Hume TTS API"""
38
 
39
 
40
  @dataclass(frozen=True)
41
  class HumeConfig:
42
  """Immutable configuration for interacting with the Hume TTS API."""
43
 
44
- api_key: Optional[str] = None
 
 
 
 
45
  url: str = "https://test-api.hume.ai/v0/tts/octave"
46
- headers: dict = None
47
  file_format: HumeSupportedFileFormat = "mp3"
48
 
49
- def __post_init__(self):
50
- # Validate required attributes
51
- if not self.api_key:
52
- api_key = validate_env_var("HUME_API_KEY")
53
- object.__setattr__(self, "api_key", api_key)
54
  if not self.url:
55
  raise ValueError("Hume TTS endpoint URL is not set.")
56
  if not self.file_format:
57
  raise ValueError("Hume TTS file format is not set.")
58
 
59
- # Set headers dynamically after validation
60
- object.__setattr__(
61
- self,
62
- "headers",
63
- {
64
- "X-Hume-Api-Key": f"{self.api_key}",
65
- "Content-Type": "application/json",
66
- },
67
- )
 
68
 
69
 
70
  class HumeError(Exception):
71
  """Custom exception for errors related to the Hume TTS API."""
72
 
73
- def __init__(self, message: str, original_exception: Optional[Exception] = None):
74
  super().__init__(message)
75
  self.original_exception = original_exception
76
  self.message = message
@@ -79,14 +80,11 @@ class HumeError(Exception):
79
  class UnretryableHumeError(HumeError):
80
  """Custom exception for errors related to the Hume TTS API that should not be retried."""
81
 
82
- def __init__(self, message: str, original_exception: Optional[Exception] = None):
83
- super().__init__(message)
84
  self.original_exception = original_exception
85
 
86
 
87
- # Initialize the Hume client
88
-
89
-
90
  @retry(
91
  stop=stop_after_attempt(3),
92
  wait=wait_fixed(2),
@@ -95,7 +93,10 @@ class UnretryableHumeError(HumeError):
95
  reraise=True,
96
  )
97
  def text_to_speech_with_hume(
98
- character_description: str, text: str, num_generations: int, config: Config
 
 
 
99
  ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
100
  """
101
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
@@ -110,9 +111,10 @@ def text_to_speech_with_hume(
110
  character_description (str): A description of the character, which is used as contextual input
111
  for generating the voice.
112
  text (str): The text to be converted to speech.
113
- num_generations (int, optional): The number of audio generations to request from the API.
114
  Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
115
- generation is expected in the API response. Defaults to 1.
 
116
 
117
  Returns:
118
  Union[Tuple[str, str], Tuple[str, str, str, str]]:
@@ -137,9 +139,7 @@ def text_to_speech_with_hume(
137
  hume_config = config.hume_config
138
  request_body = {
139
  "utterances": [{"text": text, "description": character_description or None}],
140
- "format": {
141
- "type": hume_config.file_format,
142
- },
143
  "num_generations": num_generations,
144
  }
145
 
@@ -159,7 +159,7 @@ def text_to_speech_with_hume(
159
  logger.error(msg)
160
  raise HumeError(msg)
161
 
162
- # Extract the base64 encoded audio and generation ID from the generation
163
  generation_a = generations[0]
164
  generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
165
 
@@ -171,7 +171,11 @@ def text_to_speech_with_hume(
171
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
172
 
173
  except Exception as e:
174
- if isinstance(e, HTTPError) and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE:
 
 
 
 
175
  raise UnretryableHumeError(
176
  message=f"{e.response.text}",
177
  original_exception=e,
@@ -197,6 +201,7 @@ def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tup
197
  Expected keys are:
198
  - "generation_id" (str): A unique identifier for the generated audio.
199
  - "audio" (str): A base64 encoded string of the audio data.
 
200
 
201
  Returns:
202
  Tuple[str, str]: A tuple containing:
 
20
 
21
  # Standard Library Imports
22
  import logging
23
+ from dataclasses import dataclass, field
24
+ from typing import Any, Dict, Literal, Tuple, Union
25
 
26
  # Third-Party Library Imports
27
  import requests
 
34
  from src.utils import save_base64_audio_to_file, validate_env_var
35
 
36
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
37
+ """Supported audio file formats for the Hume TTS API"""
38
 
39
 
40
  @dataclass(frozen=True)
41
  class HumeConfig:
42
  """Immutable configuration for interacting with the Hume TTS API."""
43
 
44
+ # Computed fields.
45
+ api_key: str = field(init=False)
46
+ headers: Dict[str, str] = field(init=False)
47
+
48
+ # Provided fields.
49
  url: str = "https://test-api.hume.ai/v0/tts/octave"
 
50
  file_format: HumeSupportedFileFormat = "mp3"
51
 
52
+ def __post_init__(self) -> None:
53
+ # Validate required attributes.
 
 
 
54
  if not self.url:
55
  raise ValueError("Hume TTS endpoint URL is not set.")
56
  if not self.file_format:
57
  raise ValueError("Hume TTS file format is not set.")
58
 
59
+ # Compute the API key from the environment.
60
+ computed_api_key = validate_env_var("HUME_API_KEY")
61
+ object.__setattr__(self, "api_key", computed_api_key)
62
+
63
+ # Compute the headers.
64
+ computed_headers = {
65
+ "X-Hume-Api-Key": f"{computed_api_key}",
66
+ "Content-Type": "application/json",
67
+ }
68
+ object.__setattr__(self, "headers", computed_headers)
69
 
70
 
71
  class HumeError(Exception):
72
  """Custom exception for errors related to the Hume TTS API."""
73
 
74
+ def __init__(self, message: str, original_exception: Union[Exception, None] = None):
75
  super().__init__(message)
76
  self.original_exception = original_exception
77
  self.message = message
 
80
  class UnretryableHumeError(HumeError):
81
  """Custom exception for errors related to the Hume TTS API that should not be retried."""
82
 
83
+ def __init__(self, message: str, original_exception: Union[Exception, None] = None):
84
+ super().__init__(message, original_exception)
85
  self.original_exception = original_exception
86
 
87
 
 
 
 
88
  @retry(
89
  stop=stop_after_attempt(3),
90
  wait=wait_fixed(2),
 
93
  reraise=True,
94
  )
95
  def text_to_speech_with_hume(
96
+ character_description: str,
97
+ text: str,
98
+ num_generations: int,
99
+ config: Config,
100
  ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
101
  """
102
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
 
111
  character_description (str): A description of the character, which is used as contextual input
112
  for generating the voice.
113
  text (str): The text to be converted to speech.
114
+ num_generations (int): The number of audio generations to request from the API.
115
  Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
116
+ generation is expected in the API response.
117
+ config (Config): The application configuration containing Hume API settings.
118
 
119
  Returns:
120
  Union[Tuple[str, str], Tuple[str, str, str, str]]:
 
139
  hume_config = config.hume_config
140
  request_body = {
141
  "utterances": [{"text": text, "description": character_description or None}],
142
+ "format": {"type": hume_config.file_format},
 
 
143
  "num_generations": num_generations,
144
  }
145
 
 
159
  logger.error(msg)
160
  raise HumeError(msg)
161
 
162
+ # Extract the base64 encoded audio and generation ID from the generation.
163
  generation_a = generations[0]
164
  generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
165
 
 
171
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
172
 
173
  except Exception as e:
174
+ if (
175
+ isinstance(e, HTTPError)
176
+ and e.response is not None
177
+ and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
178
+ ):
179
  raise UnretryableHumeError(
180
  message=f"{e.response.text}",
181
  original_exception=e,
 
201
  Expected keys are:
202
  - "generation_id" (str): A unique identifier for the generated audio.
203
  - "audio" (str): A base64 encoded string of the audio data.
204
+ config (Config): The application configuration used for saving the audio file.
205
 
206
  Returns:
207
  Tuple[str, str]: A tuple containing: