Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Jan 28

Commit

adecb62

1 Parent(s): 829d0b8

Add base Hume integration

Browse files

Files changed (4) hide show

src/app.py +33 -16
src/integrations/__init__.py +2 -1
src/integrations/hume_api.py +121 -1
src/utils.py +1 -1

src/app.py CHANGED Viewed

@@ -1,24 +1,25 @@
 """
 app.py
-This file defines the Gradio user interface for interacting with the Anthropic API.
-Users can input prompts, which are processed and passed to the Claude model via the API.
-The generated responses are displayed back to the user in the Gradio UI.
 Key Features:
 - Gradio interface for user interaction.
 - Input validation via prompt length constraints.
-- Logging of user interactions and API responses.
 Functions:
-- process_prompt: Handles user input, calls the API, and returns generated text.
 - build_gradio_interface: Constructs the Gradio Blocks-based interface.
 """
 # Third-Party Library Imports
 import gradio as gr
 # Local Application Imports
-from src.integrations import generate_text_with_claude
 from src.config import logger
 from src.utils import truncate_text, validate_prompt_length
@@ -42,16 +43,24 @@ def process_prompt(prompt: str) -> str:
     try:
         # Validate prompt length before processing
         validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
         generated_text = generate_text_with_claude(prompt)
         logger.debug(f"Generated text: {generated_text}")
-        logger.info("Successfully generated text.")
-        return generated_text
     except ValueError as ve:
         logger.warning(f"Validation error: {ve}")
-        return str(ve)  # Return validation error directly to the UI
     except Exception as e:
-        logger.error(f"Unexpected error generating text: {e}")
-        return "An unexpected error occurred. Please try again."
 def build_gradio_interface() -> gr.Blocks:
@@ -63,12 +72,15 @@ def build_gradio_interface() -> gr.Blocks:
     """
     with gr.Blocks() as demo:
         gr.Markdown("# TTS Arena")
-        gr.Markdown("Generate text from a prompt using **Claude by Anthropic**.")
         with gr.Row():
             prompt_input = gr.Textbox(
                 label="Enter your prompt",
-                placeholder=f"Prompt Claude to generate a poem or short story...",
                 lines=2,
             )
@@ -76,13 +88,18 @@ def build_gradio_interface() -> gr.Blocks:
             generate_button = gr.Button("Generate")
         with gr.Row():
-            output_text = gr.Textbox(label="Generated Text", interactive=False, lines=10)
-        # Attach the validation and processing logic
         generate_button.click(
             fn=process_prompt,
             inputs=prompt_input,
-            outputs=output_text,
         )
     logger.debug("Gradio interface built successfully")

 """
 app.py
+This file defines the Gradio user interface for interacting with the Anthropic API and Hume TTS API.
+Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
+The generated text is then converted to audio using the Hume TTS API, allowing playback in the Gradio UI.
 Key Features:
 - Gradio interface for user interaction.
 - Input validation via prompt length constraints.
+- Integration with the Anthropic and Hume APIs.
+- Playback support for TTS audio responses.
 Functions:
+- process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
 - build_gradio_interface: Constructs the Gradio Blocks-based interface.
 """
 # Third-Party Library Imports
 import gradio as gr
 # Local Application Imports
+from src.integrations import generate_text_with_claude, text_to_speech_with_hume
 from src.config import logger
 from src.utils import truncate_text, validate_prompt_length
     try:
         # Validate prompt length before processing
         validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
+        # Generate text with Claude API
         generated_text = generate_text_with_claude(prompt)
         logger.debug(f"Generated text: {generated_text}")
+        # Convert text to speech with Hume TTS API
+        generated_hume_audio = text_to_speech_with_hume(prompt, generated_text)
+        logger.debug(f"Generated audio data: {len(generated_hume_audio)} bytes")
+        logger.info("Successfully processed prompt.")
+        return generated_text, generated_hume_audio
     except ValueError as ve:
         logger.warning(f"Validation error: {ve}")
+        return str(ve), b""  # Return validation error directly to the UI with no audio
     except Exception as e:
+        logger.error(f"Unexpected error during processing: {e}")
+        return "An unexpected error occurred. Please try again.", b""
 def build_gradio_interface() -> gr.Blocks:
     """
     with gr.Blocks() as demo:
         gr.Markdown("# TTS Arena")
+        gr.Markdown(
+            "Generate text from a prompt using **Claude by Anthropic**, "
+            "and listen to the generated text-to-speech using **Hume TTS API**."
+        )
         with gr.Row():
             prompt_input = gr.Textbox(
                 label="Enter your prompt",
+                placeholder="Prompt Claude to generate a poem or short story...",
                 lines=2,
             )
             generate_button = gr.Button("Generate")
         with gr.Row():
+            output_text = gr.Textbox(
+                label="Generated Text",
+                interactive=False,
+                lines=10,
+            )
+            audio_output = gr.Audio(label="Generated Audio", type="filepath")  # Fix: type="filepath"
+        # Attach the validation, text generation, and TTS processing logic
         generate_button.click(
             fn=process_prompt,
             inputs=prompt_input,
+            outputs=[output_text, audio_output],
         )
     logger.debug("Gradio interface built successfully")

src/integrations/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1	- from .anthropic_api import generate_text_with_claude


1	+ from .anthropic_api import generate_text_with_claude
2	+ from .hume_api import text_to_speech_with_hume

src/integrations/hume_api.py CHANGED Viewed

	@@ -1 +1,121 @@
1	- ~~# coming soon...~~

+"""
+hume_api.py
+This file defines the interaction with the Hume TTS API, focusing on converting text to audio.
+It includes functionality for input validation, API request handling, and processing API responses.
+Key Features:
+- Encapsulates all logic related to the Hume TTS API.
+- Implements retry logic for handling transient API errors.
+- Handles received audio and processes it for playback on the web.
+- Provides detailed logging for debugging and error tracking.
+Classes:
+- HumeException: Custom exception for TTS API-related errors.
+- HumeConfig: Immutable configuration for interacting with the TTS API.
+Functions:
+- text_to_speech_with_hume: Converts text to speech using the Hume TTS API with input validation and retry logic.
+"""
+# Standard Library Imports
+import logging
+from dataclasses import dataclass
+from typing import Optional
+# Third-Party Library Imports
+import requests
+from tenacity import retry, stop_after_attempt, wait_fixed
+# Local Application Imports
+from src.config import logger
+from src.utils import validate_env_var, truncate_text
+@dataclass(frozen=True)
+class HumeConfig:
+    """Immutable configuration for interacting with the TTS API."""
+    tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
+    api_key: str = validate_env_var("HUME_API_KEY")
+    voice: str = "KORA"
+    audio_format: str = 'wav'
+    headers: dict = None
+    def __post_init__(self):
+        # Dynamically set headers after initialization
+        object.__setattr__(self, "headers", {
+            'X-Hume-Api-Key': f"{self.api_key}",
+            'Content-Type': 'application/json',
+        })
+class HumeException(Exception):
+    """Custom exception for errors related to the Hume TTS API."""
+    def __init__(self, message: str, original_exception: Optional[Exception] = None):
+        super().__init__(message)
+        self.original_exception = original_exception
+# Initialize the Hume client
+hume_config = HumeConfig()
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
+    """
+    Converts text to speech using the Hume TTS API and processes raw binary audio data.
+    Args:
+        prompt (str): The original user prompt (for debugging).
+        text (str): The generated text to be converted to speech.
+    Returns:
+        bytes: The raw binary audio data for playback.
+    Raises:
+        HumeException: If there is an error communicating with the Hume TTS API.
+    """
+    logger.debug(f"Preparing TTS request for prompt: {truncate_text(prompt)}")
+    logger.debug(f"Generated text for TTS: {truncate_text(text)}")
+    request_body = {
+        "text": text,
+        "voice": {"name": hume_config.voice},
+        # "voice_description": prompt, # <-- breaking request!?
+        # "format": hume_config.audio_format, # <-- breaking request!?
+    }
+    try:
+        response = requests.post(
+            url=hume_config.tts_endpoint_url,
+            headers=hume_config.headers,
+            json=request_body,
+        )
+        # Log the status and content type for debugging
+        logger.debug(f"Hume TTS API Response Status: {response.status_code}")
+        if response.status_code != 200:
+            logger.error(f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)")
+            raise HumeException(f"Hume TTS API responded with status {response.status_code}: {response.text}")
+        # If Content-Type is audio, return the binary audio data
+        if response.headers.get("Content-Type", "").startswith("audio/"):
+            audio_data = response.content  # Raw binary audio data
+            logger.debug(f"Received binary audio data: {len(audio_data)} bytes")
+            return audio_data
+        # Unexpected content type
+        logger.error(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
+        raise HumeException(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
+    except requests.exceptions.RequestException as e:
+        logger.exception("Request to Hume TTS API failed.")
+        raise HumeException(
+            message=f"Failed to communicate with Hume TTS API: {e}",
+            original_exception=e,
+        )
+    except Exception as e:
+        logger.exception(f"Unexpected error: {e}")
+        raise HumeException(
+            message=f"Unexpected error while processing the Hume TTS response: {e}",
+            original_exception=e,
+        )

src/utils.py CHANGED Viewed

@@ -9,8 +9,8 @@ Key Features:
 - Provides helper functions for text validation and truncation.
 Functions:
-- validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
 - truncate_text: Truncates a string to a specified length with ellipses.
 - validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
 """

 - Provides helper functions for text validation and truncation.
 Functions:
 - truncate_text: Truncates a string to a specified length with ellipses.
+- validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
 - validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
 """