Spaces:
Running
Running
zach
commited on
Commit
·
adecb62
1
Parent(s):
829d0b8
Add base Hume integration
Browse files- src/app.py +33 -16
- src/integrations/__init__.py +2 -1
- src/integrations/hume_api.py +121 -1
- src/utils.py +1 -1
src/app.py
CHANGED
@@ -1,24 +1,25 @@
|
|
1 |
"""
|
2 |
app.py
|
3 |
|
4 |
-
This file defines the Gradio user interface for interacting with the Anthropic API.
|
5 |
-
Users can input prompts, which are processed
|
6 |
-
The generated
|
7 |
|
8 |
Key Features:
|
9 |
- Gradio interface for user interaction.
|
10 |
- Input validation via prompt length constraints.
|
11 |
-
-
|
|
|
12 |
|
13 |
Functions:
|
14 |
-
- process_prompt: Handles user input, calls the
|
15 |
- build_gradio_interface: Constructs the Gradio Blocks-based interface.
|
16 |
"""
|
17 |
|
18 |
# Third-Party Library Imports
|
19 |
import gradio as gr
|
20 |
# Local Application Imports
|
21 |
-
from src.integrations import generate_text_with_claude
|
22 |
from src.config import logger
|
23 |
from src.utils import truncate_text, validate_prompt_length
|
24 |
|
@@ -42,16 +43,24 @@ def process_prompt(prompt: str) -> str:
|
|
42 |
try:
|
43 |
# Validate prompt length before processing
|
44 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
|
|
|
|
45 |
generated_text = generate_text_with_claude(prompt)
|
46 |
logger.debug(f"Generated text: {generated_text}")
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
except ValueError as ve:
|
50 |
logger.warning(f"Validation error: {ve}")
|
51 |
-
return str(ve) # Return validation error directly to the UI
|
52 |
except Exception as e:
|
53 |
-
logger.error(f"Unexpected error
|
54 |
-
return "An unexpected error occurred. Please try again."
|
55 |
|
56 |
|
57 |
def build_gradio_interface() -> gr.Blocks:
|
@@ -63,12 +72,15 @@ def build_gradio_interface() -> gr.Blocks:
|
|
63 |
"""
|
64 |
with gr.Blocks() as demo:
|
65 |
gr.Markdown("# TTS Arena")
|
66 |
-
gr.Markdown(
|
|
|
|
|
|
|
67 |
|
68 |
with gr.Row():
|
69 |
prompt_input = gr.Textbox(
|
70 |
label="Enter your prompt",
|
71 |
-
placeholder=
|
72 |
lines=2,
|
73 |
)
|
74 |
|
@@ -76,13 +88,18 @@ def build_gradio_interface() -> gr.Blocks:
|
|
76 |
generate_button = gr.Button("Generate")
|
77 |
|
78 |
with gr.Row():
|
79 |
-
output_text = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
# Attach the validation and processing logic
|
82 |
generate_button.click(
|
83 |
fn=process_prompt,
|
84 |
inputs=prompt_input,
|
85 |
-
outputs=output_text,
|
86 |
)
|
87 |
|
88 |
logger.debug("Gradio interface built successfully")
|
|
|
1 |
"""
|
2 |
app.py
|
3 |
|
4 |
+
This file defines the Gradio user interface for interacting with the Anthropic API and Hume TTS API.
|
5 |
+
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
|
6 |
+
The generated text is then converted to audio using the Hume TTS API, allowing playback in the Gradio UI.
|
7 |
|
8 |
Key Features:
|
9 |
- Gradio interface for user interaction.
|
10 |
- Input validation via prompt length constraints.
|
11 |
+
- Integration with the Anthropic and Hume APIs.
|
12 |
+
- Playback support for TTS audio responses.
|
13 |
|
14 |
Functions:
|
15 |
+
- process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
|
16 |
- build_gradio_interface: Constructs the Gradio Blocks-based interface.
|
17 |
"""
|
18 |
|
19 |
# Third-Party Library Imports
|
20 |
import gradio as gr
|
21 |
# Local Application Imports
|
22 |
+
from src.integrations import generate_text_with_claude, text_to_speech_with_hume
|
23 |
from src.config import logger
|
24 |
from src.utils import truncate_text, validate_prompt_length
|
25 |
|
|
|
43 |
try:
|
44 |
# Validate prompt length before processing
|
45 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
46 |
+
|
47 |
+
# Generate text with Claude API
|
48 |
generated_text = generate_text_with_claude(prompt)
|
49 |
logger.debug(f"Generated text: {generated_text}")
|
50 |
+
|
51 |
+
# Convert text to speech with Hume TTS API
|
52 |
+
generated_hume_audio = text_to_speech_with_hume(prompt, generated_text)
|
53 |
+
logger.debug(f"Generated audio data: {len(generated_hume_audio)} bytes")
|
54 |
+
|
55 |
+
logger.info("Successfully processed prompt.")
|
56 |
+
return generated_text, generated_hume_audio
|
57 |
+
|
58 |
except ValueError as ve:
|
59 |
logger.warning(f"Validation error: {ve}")
|
60 |
+
return str(ve), b"" # Return validation error directly to the UI with no audio
|
61 |
except Exception as e:
|
62 |
+
logger.error(f"Unexpected error during processing: {e}")
|
63 |
+
return "An unexpected error occurred. Please try again.", b""
|
64 |
|
65 |
|
66 |
def build_gradio_interface() -> gr.Blocks:
|
|
|
72 |
"""
|
73 |
with gr.Blocks() as demo:
|
74 |
gr.Markdown("# TTS Arena")
|
75 |
+
gr.Markdown(
|
76 |
+
"Generate text from a prompt using **Claude by Anthropic**, "
|
77 |
+
"and listen to the generated text-to-speech using **Hume TTS API**."
|
78 |
+
)
|
79 |
|
80 |
with gr.Row():
|
81 |
prompt_input = gr.Textbox(
|
82 |
label="Enter your prompt",
|
83 |
+
placeholder="Prompt Claude to generate a poem or short story...",
|
84 |
lines=2,
|
85 |
)
|
86 |
|
|
|
88 |
generate_button = gr.Button("Generate")
|
89 |
|
90 |
with gr.Row():
|
91 |
+
output_text = gr.Textbox(
|
92 |
+
label="Generated Text",
|
93 |
+
interactive=False,
|
94 |
+
lines=10,
|
95 |
+
)
|
96 |
+
audio_output = gr.Audio(label="Generated Audio", type="filepath") # Fix: type="filepath"
|
97 |
|
98 |
+
# Attach the validation, text generation, and TTS processing logic
|
99 |
generate_button.click(
|
100 |
fn=process_prompt,
|
101 |
inputs=prompt_input,
|
102 |
+
outputs=[output_text, audio_output],
|
103 |
)
|
104 |
|
105 |
logger.debug("Gradio interface built successfully")
|
src/integrations/__init__.py
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
from .anthropic_api import generate_text_with_claude
|
|
|
|
1 |
+
from .anthropic_api import generate_text_with_claude
|
2 |
+
from .hume_api import text_to_speech_with_hume
|
src/integrations/hume_api.py
CHANGED
@@ -1 +1,121 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
hume_api.py
|
3 |
+
|
4 |
+
This file defines the interaction with the Hume TTS API, focusing on converting text to audio.
|
5 |
+
It includes functionality for input validation, API request handling, and processing API responses.
|
6 |
+
|
7 |
+
Key Features:
|
8 |
+
- Encapsulates all logic related to the Hume TTS API.
|
9 |
+
- Implements retry logic for handling transient API errors.
|
10 |
+
- Handles received audio and processes it for playback on the web.
|
11 |
+
- Provides detailed logging for debugging and error tracking.
|
12 |
+
|
13 |
+
Classes:
|
14 |
+
- HumeException: Custom exception for TTS API-related errors.
|
15 |
+
- HumeConfig: Immutable configuration for interacting with the TTS API.
|
16 |
+
|
17 |
+
Functions:
|
18 |
+
- text_to_speech_with_hume: Converts text to speech using the Hume TTS API with input validation and retry logic.
|
19 |
+
"""
|
20 |
+
|
21 |
+
# Standard Library Imports
|
22 |
+
import logging
|
23 |
+
from dataclasses import dataclass
|
24 |
+
from typing import Optional
|
25 |
+
# Third-Party Library Imports
|
26 |
+
import requests
|
27 |
+
from tenacity import retry, stop_after_attempt, wait_fixed
|
28 |
+
# Local Application Imports
|
29 |
+
from src.config import logger
|
30 |
+
from src.utils import validate_env_var, truncate_text
|
31 |
+
|
32 |
+
|
33 |
+
@dataclass(frozen=True)
|
34 |
+
class HumeConfig:
|
35 |
+
"""Immutable configuration for interacting with the TTS API."""
|
36 |
+
tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
|
37 |
+
api_key: str = validate_env_var("HUME_API_KEY")
|
38 |
+
voice: str = "KORA"
|
39 |
+
audio_format: str = 'wav'
|
40 |
+
headers: dict = None
|
41 |
+
|
42 |
+
def __post_init__(self):
|
43 |
+
# Dynamically set headers after initialization
|
44 |
+
object.__setattr__(self, "headers", {
|
45 |
+
'X-Hume-Api-Key': f"{self.api_key}",
|
46 |
+
'Content-Type': 'application/json',
|
47 |
+
})
|
48 |
+
|
49 |
+
|
50 |
+
class HumeException(Exception):
|
51 |
+
"""Custom exception for errors related to the Hume TTS API."""
|
52 |
+
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
53 |
+
super().__init__(message)
|
54 |
+
self.original_exception = original_exception
|
55 |
+
|
56 |
+
|
57 |
+
# Initialize the Hume client
|
58 |
+
hume_config = HumeConfig()
|
59 |
+
|
60 |
+
|
61 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
62 |
+
def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
|
63 |
+
"""
|
64 |
+
Converts text to speech using the Hume TTS API and processes raw binary audio data.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
prompt (str): The original user prompt (for debugging).
|
68 |
+
text (str): The generated text to be converted to speech.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
bytes: The raw binary audio data for playback.
|
72 |
+
|
73 |
+
Raises:
|
74 |
+
HumeException: If there is an error communicating with the Hume TTS API.
|
75 |
+
"""
|
76 |
+
logger.debug(f"Preparing TTS request for prompt: {truncate_text(prompt)}")
|
77 |
+
logger.debug(f"Generated text for TTS: {truncate_text(text)}")
|
78 |
+
|
79 |
+
request_body = {
|
80 |
+
"text": text,
|
81 |
+
"voice": {"name": hume_config.voice},
|
82 |
+
# "voice_description": prompt, # <-- breaking request!?
|
83 |
+
# "format": hume_config.audio_format, # <-- breaking request!?
|
84 |
+
}
|
85 |
+
|
86 |
+
try:
|
87 |
+
response = requests.post(
|
88 |
+
url=hume_config.tts_endpoint_url,
|
89 |
+
headers=hume_config.headers,
|
90 |
+
json=request_body,
|
91 |
+
)
|
92 |
+
|
93 |
+
# Log the status and content type for debugging
|
94 |
+
logger.debug(f"Hume TTS API Response Status: {response.status_code}")
|
95 |
+
|
96 |
+
if response.status_code != 200:
|
97 |
+
logger.error(f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)")
|
98 |
+
raise HumeException(f"Hume TTS API responded with status {response.status_code}: {response.text}")
|
99 |
+
|
100 |
+
# If Content-Type is audio, return the binary audio data
|
101 |
+
if response.headers.get("Content-Type", "").startswith("audio/"):
|
102 |
+
audio_data = response.content # Raw binary audio data
|
103 |
+
logger.debug(f"Received binary audio data: {len(audio_data)} bytes")
|
104 |
+
return audio_data
|
105 |
+
|
106 |
+
# Unexpected content type
|
107 |
+
logger.error(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
|
108 |
+
raise HumeException(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
|
109 |
+
|
110 |
+
except requests.exceptions.RequestException as e:
|
111 |
+
logger.exception("Request to Hume TTS API failed.")
|
112 |
+
raise HumeException(
|
113 |
+
message=f"Failed to communicate with Hume TTS API: {e}",
|
114 |
+
original_exception=e,
|
115 |
+
)
|
116 |
+
except Exception as e:
|
117 |
+
logger.exception(f"Unexpected error: {e}")
|
118 |
+
raise HumeException(
|
119 |
+
message=f"Unexpected error while processing the Hume TTS response: {e}",
|
120 |
+
original_exception=e,
|
121 |
+
)
|
src/utils.py
CHANGED
@@ -9,8 +9,8 @@ Key Features:
|
|
9 |
- Provides helper functions for text validation and truncation.
|
10 |
|
11 |
Functions:
|
12 |
-
- validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
|
13 |
- truncate_text: Truncates a string to a specified length with ellipses.
|
|
|
14 |
- validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
|
15 |
"""
|
16 |
|
|
|
9 |
- Provides helper functions for text validation and truncation.
|
10 |
|
11 |
Functions:
|
|
|
12 |
- truncate_text: Truncates a string to a specified length with ellipses.
|
13 |
+
- validate_env_var: Ensures the presence of a specific environment variable and retrieves its value.
|
14 |
- validate_prompt_length: Ensures that a prompt does not exceed the specified maximum length.
|
15 |
"""
|
16 |
|