Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 14

Commit

fc85b67

1 Parent(s): 9dc43bf

Fix types in integrations package

Browse files

Files changed (3) hide show

src/integrations/anthropic_api.py +85 -71
src/integrations/elevenlabs_api.py +18 -9
src/integrations/hume_api.py +38 -33

src/integrations/anthropic_api.py CHANGED Viewed

@@ -20,12 +20,12 @@ Functions:
 # Standard Library Imports
 import logging
-from dataclasses import dataclass
-from typing import List, Optional, Union
 # Third-Party Library Imports
 from anthropic import Anthropic, APIError
-from anthropic.types import Message, ModelParam, TextBlock
 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
@@ -33,65 +33,65 @@ from src.config import Config, logger
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
 @dataclass(frozen=True)
 class AnthropicConfig:
     """Immutable configuration for interacting with the Anthropic API."""
-    api_key: Optional[str] = None
     model: ModelParam = "claude-3-5-sonnet-latest"
     max_tokens: int = 150
-    system_prompt: Optional[str] = (
-        None  # system prompt is set post initialization, since self.max_tokens is leveraged in the prompt.
-    )
-    def __post_init__(self):
-        # Validate that required attributes are set
-        if not self.api_key:
-            api_key = validate_env_var("ANTHROPIC_API_KEY")
-            object.__setattr__(self, "api_key", api_key)
         if not self.model:
             raise ValueError("Anthropic Model is not set.")
         if not self.max_tokens:
             raise ValueError("Anthropic Max Tokens is not set.")
-        if self.system_prompt is None:
-            system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech
-synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
-CRITICAL LENGTH CONSTRAINTS:
-Maximum length: {self.max_tokens} tokens (approximately 400 characters)
-You MUST complete all thoughts and sentences
-Responses should be 25% shorter than you initially plan
-Never exceed 400 characters total
-Response Generation Process:
-Draft your response mentally first
-Cut it down to 75% of its original length
-Reserve the last 100 characters for a proper conclusion
-If you start running long, immediately wrap up
-End every piece with a clear conclusion
-Content Requirements:
-Allow natural emotional progression
-Create an arc of connected moments
-Use efficient but expressive language
-Balance description with emotional depth
-Ensure perfect completion
-No meta-commentary or formatting
-Structure for Emotional Pieces:
-Opening hook (50-75 characters)
-Emotional journey (200-250 characters)
-Resolution (75-100 characters)
-MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of where you
-are in the narrative.
-Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
-            object.__setattr__(self, "system_prompt", system_prompt)
     @property
     def client(self) -> Anthropic:
@@ -127,7 +127,7 @@ Remember: A shorter, complete response is ALWAYS better than a longer, truncated
 class AnthropicError(Exception):
     """Custom exception for errors related to the Anthropic API."""
-    def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
         self.message = message
@@ -136,7 +136,7 @@ class AnthropicError(Exception):
 class UnretryableAnthropicError(AnthropicError):
     """Custom exception for errors related to the Anthropic API that should not be retried."""
-    def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message, original_exception)
@@ -151,23 +151,29 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
     """
     Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
     Args:
-        character_description (str): The input character description used to assist with generating text with Claude.
     Returns:
         str: The generated text.
     Raises:
-        AnthropicError: If there is an error communicating with the Anthropic API.
     """
-    # Build prompt for claude with character description
-    anthropic_config = config.anthropic_config
-    prompt = anthropic_config.build_expressive_prompt(character_description)
-    logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
-    response = None
     try:
-        # Generate text using the Anthropic SDK
         response: Message = anthropic_config.client.messages.create(
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
@@ -176,17 +182,17 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
         )
         logger.debug(f"API response received: {truncate_text(str(response))}")
-        # Validate response
-        if not hasattr(response, "content"):
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
-        # Process response
-        blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
             result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
             logger.debug(f"Processed response from list: {truncate_text(result)}")
             return result
         if isinstance(blocks, TextBlock):
             logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
             return blocks.text
@@ -195,13 +201,21 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
         return str(blocks or "No content generated.")
     except Exception as e:
-        if isinstance(e, APIError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
-            raise UnretryableAnthropicError(
-                message=f'"{e.body["error"]["message"]}"',
-                original_exception=e,
-            ) from e
         raise AnthropicError(
-            message=(f"{e.message}"),
             original_exception=e,
         ) from e

 # Standard Library Imports
 import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union, cast
 # Third-Party Library Imports
 from anthropic import Anthropic, APIError
+from anthropic.types import Message, ModelParam, TextBlock, ToolUseBlock
 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
+PROMPT_TEMPLATE: str = (
+    """You are an expert at generating micro-content optimized for text-to-speech synthesis.
+Your absolute priority is delivering complete, untruncated responses within strict length limits.
+CRITICAL LENGTH CONSTRAINTS:
+- Maximum length: {max_tokens} tokens (approximately 400 characters)
+- You MUST complete all thoughts and sentences
+- Responses should be 25% shorter than you initially plan
+- Never exceed 400 characters total
+Response Generation Process:
+- Draft your response mentally first
+- ut it down to 75% of its original length
+- Reserve the last 100 characters for a proper conclusion
+- If you start running long, immediately wrap up
+- End every piece with a clear conclusion
+Content Requirements:
+- Allow natural emotional progression
+- Create an arc of connected moments
+- Use efficient but expressive language
+- Balance description with emotional depth
+- Ensure perfect completion
+- No meta-commentary or formatting
+Structure for Emotional Pieces:
+- Opening hook (50-75 characters)
+- Emotional journey (200-250 characters)
+- Resolution (75-100 characters)
+MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of
+where you are in the narrative.
+Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
+)
 @dataclass(frozen=True)
 class AnthropicConfig:
     """Immutable configuration for interacting with the Anthropic API."""
+    api_key: str = field(init=False)
+    system_prompt: str = field(init=False)
     model: ModelParam = "claude-3-5-sonnet-latest"
     max_tokens: int = 150
+    def __post_init__(self) -> None:
+        # Validate required non-computed attributes.
         if not self.model:
             raise ValueError("Anthropic Model is not set.")
         if not self.max_tokens:
             raise ValueError("Anthropic Max Tokens is not set.")
+        # Compute the API key from the environment.
+        computed_api_key = validate_env_var("ANTHROPIC_API_KEY")
+        object.__setattr__(self, "api_key", computed_api_key)
+        # Compute the system prompt using max_tokens and other logic.
+        computed_prompt = PROMPT_TEMPLATE.format(max_tokens=self.max_tokens)
+        object.__setattr__(self, "system_prompt", computed_prompt)
     @property
     def client(self) -> Anthropic:
 class AnthropicError(Exception):
     """Custom exception for errors related to the Anthropic API."""
+    def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
         super().__init__(message)
         self.original_exception = original_exception
         self.message = message
 class UnretryableAnthropicError(AnthropicError):
     """Custom exception for errors related to the Anthropic API that should not be retried."""
+    def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
         super().__init__(message, original_exception)
     """
     Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
+    This function includes retry logic and error translation. It raises a custom
+    UnretryableAnthropicError for API errors deemed unretryable and AnthropicError
+    for other errors.
     Args:
+        character_description (str): The input character description used to assist with generating text.
+        config (Config): Application configuration including Anthropic settings.
     Returns:
         str: The generated text.
     Raises:
+        UnretryableAnthropicError: For errors that should not be retried.
+        AnthropicError: For other errors communicating with the Anthropic API.
     """
     try:
+        anthropic_config = config.anthropic_config
+        prompt = anthropic_config.build_expressive_prompt(character_description)
+        logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
+        # Ensure system_prompt is set (guaranteed by __post_init__)
+        assert anthropic_config.system_prompt is not None, "system_prompt must be set."
         response: Message = anthropic_config.client.messages.create(
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
         )
         logger.debug(f"API response received: {truncate_text(str(response))}")
+        if not hasattr(response, "content") or response.content is None:
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
+        blocks: Union[List[Union[TextBlock, ToolUseBlock]], TextBlock, None] = response.content
         if isinstance(blocks, list):
             result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
             logger.debug(f"Processed response from list: {truncate_text(result)}")
             return result
         if isinstance(blocks, TextBlock):
             logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
             return blocks.text
         return str(blocks or "No content generated.")
     except Exception as e:
+        # If the error is an APIError, check if it's unretryable.
+        if isinstance(e, APIError):
+            status_code: Optional[int] = getattr(e, "status_code", None)
+            if status_code is not None and CLIENT_ERROR_CODE <= status_code < SERVER_ERROR_CODE:
+                error_body: Any = e.body
+                error_message: str = "Unknown error"
+                if isinstance(error_body, dict):
+                    error_message = cast(Dict[str, Any], error_body).get("error", {}).get("message", "Unknown error")
+                raise UnretryableAnthropicError(
+                    message=f'"{error_message}"',
+                    original_exception=e,
+                ) from e
+        # For all other errors, wrap them in an AnthropicError.
         raise AnthropicError(
+            message=str(e),
             original_exception=e,
         ) from e

src/integrations/elevenlabs_api.py CHANGED Viewed

@@ -22,7 +22,7 @@ Functions:
 # Standard Library Imports
 import logging
 import random
-from dataclasses import dataclass
 from typing import Optional, Tuple
 # Third-Party Library Imports
@@ -40,14 +40,17 @@ from src.utils import save_base64_audio_to_file, validate_env_var
 class ElevenLabsConfig:
     """Immutable configuration for interacting with the ElevenLabs TTS API."""
-    api_key: Optional[str] = None
     output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
     def __post_init__(self):
-        # Validate that required attributes are set
-        if not self.api_key:
-            api_key = validate_env_var("ELEVENLABS_API_KEY")
-            object.__setattr__(self, "api_key", api_key)
     @property
     def client(self) -> ElevenLabs:
@@ -83,7 +86,9 @@ class UnretryableElevenLabsError(ElevenLabsError):
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
-def text_to_speech_with_elevenlabs(character_description: str, text: str, config: Config) -> Tuple[None, str]:
     """
     Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
@@ -94,7 +99,7 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
     Returns:
         Tuple[None, str]: A tuple containing:
             - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
-                                    across TTS integrations
             - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
     Raises:
@@ -129,7 +134,11 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
         return None, audio_file_path
     except Exception as e:
-        if isinstance(e, ApiError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
             raise UnretryableElevenLabsError(
                 message=f"{e.body['detail']['message']}",
                 original_exception=e,

 # Standard Library Imports
 import logging
 import random
+from dataclasses import dataclass, field
 from typing import Optional, Tuple
 # Third-Party Library Imports
 class ElevenLabsConfig:
     """Immutable configuration for interacting with the ElevenLabs TTS API."""
+    api_key: str = field(init=False)
     output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
     def __post_init__(self):
+        # Validate required attributes.
+        if not self.output_format:
+            raise ValueError("ElevenLabs TTS API output format is not set.")
+        # Compute the API key from the environment.
+        computed_key = validate_env_var("ELEVENLABS_API_KEY")
+        object.__setattr__(self, "api_key", computed_key)
     @property
     def client(self) -> ElevenLabs:
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
+def text_to_speech_with_elevenlabs(
+    character_description: str, text: str, config: Config
+) -> Tuple[None, str]:
     """
     Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
     Returns:
         Tuple[None, str]: A tuple containing:
             - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
+                                    across TTS integrations.
             - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
     Raises:
         return None, audio_file_path
     except Exception as e:
+        if (
+            isinstance(e, ApiError)
+            and e.status_code is not None
+            and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
+        ):
             raise UnretryableElevenLabsError(
                 message=f"{e.body['detail']['message']}",
                 original_exception=e,

src/integrations/hume_api.py CHANGED Viewed

@@ -20,8 +20,8 @@ Functions:
 # Standard Library Imports
 import logging
-from dataclasses import dataclass
-from typing import Any, Dict, Literal, Optional, Tuple, Union
 # Third-Party Library Imports
 import requests
@@ -34,43 +34,44 @@ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import save_base64_audio_to_file, validate_env_var
 HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
-""" Support audio file formats for the Hume TTS API"""
 @dataclass(frozen=True)
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
-    api_key: Optional[str] = None
     url: str = "https://test-api.hume.ai/v0/tts/octave"
-    headers: dict = None
     file_format: HumeSupportedFileFormat = "mp3"
-    def __post_init__(self):
-        # Validate required attributes
-        if not self.api_key:
-            api_key = validate_env_var("HUME_API_KEY")
-            object.__setattr__(self, "api_key", api_key)
         if not self.url:
             raise ValueError("Hume TTS endpoint URL is not set.")
         if not self.file_format:
             raise ValueError("Hume TTS file format is not set.")
-        # Set headers dynamically after validation
-        object.__setattr__(
-            self,
-            "headers",
-            {
-                "X-Hume-Api-Key": f"{self.api_key}",
-                "Content-Type": "application/json",
-            },
-        )
 class HumeError(Exception):
     """Custom exception for errors related to the Hume TTS API."""
-    def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
         self.message = message
@@ -79,14 +80,11 @@ class HumeError(Exception):
 class UnretryableHumeError(HumeError):
     """Custom exception for errors related to the Hume TTS API that should not be retried."""
-    def __init__(self, message: str, original_exception: Optional[Exception] = None):
-        super().__init__(message)
         self.original_exception = original_exception
-# Initialize the Hume client
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
@@ -95,7 +93,10 @@ class UnretryableHumeError(HumeError):
     reraise=True,
 )
 def text_to_speech_with_hume(
-    character_description: str, text: str, num_generations: int, config: Config
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
     Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
@@ -110,9 +111,10 @@ def text_to_speech_with_hume(
         character_description (str): A description of the character, which is used as contextual input
             for generating the voice.
         text (str): The text to be converted to speech.
-        num_generations (int, optional): The number of audio generations to request from the API.
             Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
-            generation is expected in the API response. Defaults to 1.
     Returns:
         Union[Tuple[str, str], Tuple[str, str, str, str]]:
@@ -137,9 +139,7 @@ def text_to_speech_with_hume(
     hume_config = config.hume_config
     request_body = {
         "utterances": [{"text": text, "description": character_description or None}],
-        "format": {
-            "type": hume_config.file_format,
-        },
         "num_generations": num_generations,
     }
@@ -159,7 +159,7 @@ def text_to_speech_with_hume(
             logger.error(msg)
             raise HumeError(msg)
-        # Extract the base64 encoded audio and generation ID from the generation
         generation_a = generations[0]
         generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
@@ -171,7 +171,11 @@ def text_to_speech_with_hume(
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
     except Exception as e:
-        if isinstance(e, HTTPError) and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE:
             raise UnretryableHumeError(
                 message=f"{e.response.text}",
                 original_exception=e,
@@ -197,6 +201,7 @@ def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tup
             Expected keys are:
                 - "generation_id" (str): A unique identifier for the generated audio.
                 - "audio" (str): A base64 encoded string of the audio data.
     Returns:
         Tuple[str, str]: A tuple containing:

 # Standard Library Imports
 import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, Literal, Tuple, Union
 # Third-Party Library Imports
 import requests
 from src.utils import save_base64_audio_to_file, validate_env_var
 HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
+"""Supported audio file formats for the Hume TTS API"""
 @dataclass(frozen=True)
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
+    # Computed fields.
+    api_key: str = field(init=False)
+    headers: Dict[str, str] = field(init=False)
+    # Provided fields.
     url: str = "https://test-api.hume.ai/v0/tts/octave"
     file_format: HumeSupportedFileFormat = "mp3"
+    def __post_init__(self) -> None:
+        # Validate required attributes.
         if not self.url:
             raise ValueError("Hume TTS endpoint URL is not set.")
         if not self.file_format:
             raise ValueError("Hume TTS file format is not set.")
+        # Compute the API key from the environment.
+        computed_api_key = validate_env_var("HUME_API_KEY")
+        object.__setattr__(self, "api_key", computed_api_key)
+        # Compute the headers.
+        computed_headers = {
+            "X-Hume-Api-Key": f"{computed_api_key}",
+            "Content-Type": "application/json",
+        }
+        object.__setattr__(self, "headers", computed_headers)
 class HumeError(Exception):
     """Custom exception for errors related to the Hume TTS API."""
+    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
         super().__init__(message)
         self.original_exception = original_exception
         self.message = message
 class UnretryableHumeError(HumeError):
     """Custom exception for errors related to the Hume TTS API that should not be retried."""
+    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
+        super().__init__(message, original_exception)
         self.original_exception = original_exception
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     reraise=True,
 )
 def text_to_speech_with_hume(
+    character_description: str,
+    text: str,
+    num_generations: int,
+    config: Config,
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
     Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
         character_description (str): A description of the character, which is used as contextual input
             for generating the voice.
         text (str): The text to be converted to speech.
+        num_generations (int): The number of audio generations to request from the API.
             Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
+            generation is expected in the API response.
+        config (Config): The application configuration containing Hume API settings.
     Returns:
         Union[Tuple[str, str], Tuple[str, str, str, str]]:
     hume_config = config.hume_config
     request_body = {
         "utterances": [{"text": text, "description": character_description or None}],
+        "format": {"type": hume_config.file_format},
         "num_generations": num_generations,
     }
             logger.error(msg)
             raise HumeError(msg)
+        # Extract the base64 encoded audio and generation ID from the generation.
         generation_a = generations[0]
         generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
     except Exception as e:
+        if (
+            isinstance(e, HTTPError)
+            and e.response is not None
+            and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
+        ):
             raise UnretryableHumeError(
                 message=f"{e.response.text}",
                 original_exception=e,
             Expected keys are:
                 - "generation_id" (str): A unique identifier for the generated audio.
                 - "audio" (str): A base64 encoded string of the audio data.
+        config (Config): The application configuration used for saving the audio file.
     Returns:
         Tuple[str, str]: A tuple containing: