zach commited on
Commit
adecb62
·
1 Parent(s): 829d0b8

Add base Hume integration

Browse files
src/app.py CHANGED
@@ -1,24 +1,25 @@
1
  """
2
  app.py
3
 
4
- This file defines the Gradio user interface for interacting with the Anthropic API.
5
- Users can input prompts, which are processed and passed to the Claude model via the API.
6
- The generated responses are displayed back to the user in the Gradio UI.
7
 
8
  Key Features:
9
  - Gradio interface for user interaction.
10
  - Input validation via prompt length constraints.
11
- - Logging of user interactions and API responses.
 
12
 
13
  Functions:
14
- - process_prompt: Handles user input, calls the API, and returns generated text.
15
  - build_gradio_interface: Constructs the Gradio Blocks-based interface.
16
  """
17
 
18
  # Third-Party Library Imports
19
  import gradio as gr
20
  # Local Application Imports
21
- from src.integrations import generate_text_with_claude
22
  from src.config import logger
23
  from src.utils import truncate_text, validate_prompt_length
24
 
@@ -42,16 +43,24 @@ def process_prompt(prompt: str) -> str:
42
  try:
43
  # Validate prompt length before processing
44
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
 
 
45
  generated_text = generate_text_with_claude(prompt)
46
  logger.debug(f"Generated text: {generated_text}")
47
- logger.info("Successfully generated text.")
48
- return generated_text
 
 
 
 
 
 
49
  except ValueError as ve:
50
  logger.warning(f"Validation error: {ve}")
51
- return str(ve) # Return validation error directly to the UI
52
  except Exception as e:
53
- logger.error(f"Unexpected error generating text: {e}")
54
- return "An unexpected error occurred. Please try again."
55
 
56
 
57
  def build_gradio_interface() -> gr.Blocks:
@@ -63,12 +72,15 @@ def build_gradio_interface() -> gr.Blocks:
63
  """
64
  with gr.Blocks() as demo:
65
  gr.Markdown("# TTS Arena")
66
- gr.Markdown("Generate text from a prompt using **Claude by Anthropic**.")
 
 
 
67
 
68
  with gr.Row():
69
  prompt_input = gr.Textbox(
70
  label="Enter your prompt",
71
- placeholder=f"Prompt Claude to generate a poem or short story...",
72
  lines=2,
73
  )
74
 
@@ -76,13 +88,18 @@ def build_gradio_interface() -> gr.Blocks:
76
  generate_button = gr.Button("Generate")
77
 
78
  with gr.Row():
79
- output_text = gr.Textbox(label="Generated Text", interactive=False, lines=10)
 
 
 
 
 
80
 
81
- # Attach the validation and processing logic
82
  generate_button.click(
83
  fn=process_prompt,
84
  inputs=prompt_input,
85
- outputs=output_text,
86
  )
87
 
88
  logger.debug("Gradio interface built successfully")
 
1
  """
2
  app.py
3
 
4
+ This file defines the Gradio user interface for interacting with the Anthropic API and Hume TTS API.
5
+ Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
+ The generated text is then converted to audio using the Hume TTS API, allowing playback in the Gradio UI.
7
 
8
  Key Features:
9
  - Gradio interface for user interaction.
10
  - Input validation via prompt length constraints.
11
+ - Integration with the Anthropic and Hume APIs.
12
+ - Playback support for TTS audio responses.
13
 
14
  Functions:
15
+ - process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
16
  - build_gradio_interface: Constructs the Gradio Blocks-based interface.
17
  """
18
 
19
  # Third-Party Library Imports
20
  import gradio as gr
21
  # Local Application Imports
22
+ from src.integrations import generate_text_with_claude, text_to_speech_with_hume
23
  from src.config import logger
24
  from src.utils import truncate_text, validate_prompt_length
25
 
 
43
  try:
44
  # Validate prompt length before processing
45
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
46
+
47
+ # Generate text with Claude API
48
  generated_text = generate_text_with_claude(prompt)
49
  logger.debug(f"Generated text: {generated_text}")
50
+
51
+ # Convert text to speech with Hume TTS API
52
+ generated_hume_audio = text_to_speech_with_hume(prompt, generated_text)
53
+ logger.debug(f"Generated audio data: {len(generated_hume_audio)} bytes")
54
+
55
+ logger.info("Successfully processed prompt.")
56
+ return generated_text, generated_hume_audio
57
+
58
  except ValueError as ve:
59
  logger.warning(f"Validation error: {ve}")
60
+ return str(ve), b"" # Return validation error directly to the UI with no audio
61
  except Exception as e:
62
+ logger.error(f"Unexpected error during processing: {e}")
63
+ return "An unexpected error occurred. Please try again.", b""
64
 
65
 
66
  def build_gradio_interface() -> gr.Blocks:
 
72
  """
73
  with gr.Blocks() as demo:
74
  gr.Markdown("# TTS Arena")
75
+ gr.Markdown(
76
+ "Generate text from a prompt using **Claude by Anthropic**, "
77
+ "and listen to the generated text-to-speech using **Hume TTS API**."
78
+ )
79
 
80
  with gr.Row():
81
  prompt_input = gr.Textbox(
82
  label="Enter your prompt",
83
+ placeholder="Prompt Claude to generate a poem or short story...",
84
  lines=2,
85
  )
86
 
 
88
  generate_button = gr.Button("Generate")
89
 
90
  with gr.Row():
91
+ output_text = gr.Textbox(
92
+ label="Generated Text",
93
+ interactive=False,
94
+ lines=10,
95
+ )
96
+ audio_output = gr.Audio(label="Generated Audio", type="filepath") # Fix: type="filepath"
97
 
98
+ # Attach the validation, text generation, and TTS processing logic
99
  generate_button.click(
100
  fn=process_prompt,
101
  inputs=prompt_input,
102
+ outputs=[output_text, audio_output],
103
  )
104
 
105
  logger.debug("Gradio interface built successfully")
src/integrations/__init__.py CHANGED
@@ -1 +1,2 @@
1
- from .anthropic_api import generate_text_with_claude
 
 
1
+ from .anthropic_api import generate_text_with_claude
2
+ from .hume_api import text_to_speech_with_hume
src/integrations/hume_api.py CHANGED
@@ -1 +1,121 @@
1
- # coming soon...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ hume_api.py
3
+
4
+ This file defines the interaction with the Hume TTS API, focusing on converting text to audio.
5
+ It includes functionality for input validation, API request handling, and processing API responses.
6
+
7
+ Key Features:
8
+ - Encapsulates all logic related to the Hume TTS API.
9
+ - Implements retry logic for handling transient API errors.
10
+ - Handles received audio and processes it for playback on the web.
11
+ - Provides detailed logging for debugging and error tracking.
12
+
13
+ Classes:
14
+ - HumeException: Custom exception for TTS API-related errors.
15
+ - HumeConfig: Immutable configuration for interacting with the TTS API.
16
+
17
+ Functions:
18
+ - text_to_speech_with_hume: Converts text to speech using the Hume TTS API with input validation and retry logic.
19
+ """
20
+
21
+ # Standard Library Imports
22
+ import logging
23
+ from dataclasses import dataclass
24
+ from typing import Optional
25
+ # Third-Party Library Imports
26
+ import requests
27
+ from tenacity import retry, stop_after_attempt, wait_fixed
28
+ # Local Application Imports
29
+ from src.config import logger
30
+ from src.utils import validate_env_var, truncate_text
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class HumeConfig:
35
+ """Immutable configuration for interacting with the TTS API."""
36
+ tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
37
+ api_key: str = validate_env_var("HUME_API_KEY")
38
+ voice: str = "KORA"
39
+ audio_format: str = 'wav'
40
+ headers: dict = None
41
+
42
+ def __post_init__(self):
43
+ # Dynamically set headers after initialization
44
+ object.__setattr__(self, "headers", {
45
+ 'X-Hume-Api-Key': f"{self.api_key}",
46
+ 'Content-Type': 'application/json',
47
+ })
48
+
49
+
50
+ class HumeException(Exception):
51
+ """Custom exception for errors related to the Hume TTS API."""
52
+ def __init__(self, message: str, original_exception: Optional[Exception] = None):
53
+ super().__init__(message)
54
+ self.original_exception = original_exception
55
+
56
+
57
+ # Initialize the Hume client
58
+ hume_config = HumeConfig()
59
+
60
+
61
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
62
+ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
63
+ """
64
+ Converts text to speech using the Hume TTS API and processes raw binary audio data.
65
+
66
+ Args:
67
+ prompt (str): The original user prompt (for debugging).
68
+ text (str): The generated text to be converted to speech.
69
+
70
+ Returns:
71
+ bytes: The raw binary audio data for playback.
72
+
73
+ Raises:
74
+ HumeException: If there is an error communicating with the Hume TTS API.
75
+ """
76
+ logger.debug(f"Preparing TTS request for prompt: {truncate_text(prompt)}")
77
+ logger.debug(f"Generated text for TTS: {truncate_text(text)}")
78
+
79
+ request_body = {
80
+ "text": text,
81
+ "voice": {"name": hume_config.voice},
82
+ # "voice_description": prompt, # <-- breaking request!?
83
+ # "format": hume_config.audio_format, # <-- breaking request!?
84
+ }
85
+
86
+ try:
87
+ response = requests.post(
88
+ url=hume_config.tts_endpoint_url,
89
+ headers=hume_config.headers,
90
+ json=request_body,
91
+ )
92
+
93
+ # Log the status and content type for debugging
94
+ logger.debug(f"Hume TTS API Response Status: {response.status_code}")
95
+
96
+ if response.status_code != 200:
97
+ logger.error(f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)")
98
+ raise HumeException(f"Hume TTS API responded with status {response.status_code}: {response.text}")
99
+
100
+ # If Content-Type is audio, return the binary audio data
101
+ if response.headers.get("Content-Type", "").startswith("audio/"):
102
+ audio_data = response.content # Raw binary audio data
103
+ logger.debug(f"Received binary audio data: {len(audio_data)} bytes")
104
+ return audio_data
105
+
106
+ # Unexpected content type
107
+ logger.error(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
108
+ raise HumeException(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
109
+
110
+ except requests.exceptions.RequestException as e:
111
+ logger.exception("Request to Hume TTS API failed.")
112
+ raise HumeException(
113
+ message=f"Failed to communicate with Hume TTS API: {e}",
114
+ original_exception=e,
115
+ )
116
+ except Exception as e:
117
+ logger.exception(f"Unexpected error: {e}")
118
+ raise HumeException(
119
+ message=f"Unexpected error while processing the Hume TTS response: {e}",
120
+ original_exception=e,
121
+ )
src/utils.py CHANGED
@@ -9,8 +9,8 @@ Key Features:
9
  - Provides helper functions for text validation and truncation.
10
 
11
  Functions:
12
- - validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
13
  - truncate_text: Truncates a string to a specified length with ellipses.
 
14
  - validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
15
  """
16
 
 
9
  - Provides helper functions for text validation and truncation.
10
 
11
  Functions:
 
12
  - truncate_text: Truncates a string to a specified length with ellipses.
13
+ - validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
14
  - validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
15
  """
16