zach commited on
Commit
d4b2b49
·
1 Parent(s): 0d3ca01

Update eleven labs integration to consume the voice generation api

Browse files
src/config.py CHANGED
@@ -18,7 +18,7 @@ from dotenv import load_dotenv
18
 
19
 
20
  # Load environment variables
21
- load_dotenv()
22
 
23
 
24
  # Enable debugging mode based on an environment variable
 
18
 
19
 
20
  # Load environment variables
21
+ load_dotenv(override=True)
22
 
23
 
24
  # Enable debugging mode based on an environment variable
src/integrations/elevenlabs_api.py CHANGED
@@ -20,6 +20,7 @@ Functions:
20
  """
21
 
22
  # Standard Library Imports
 
23
  from dataclasses import dataclass
24
  from enum import Enum
25
  import logging
@@ -35,44 +36,16 @@ from src.config import logger
35
  from src.utils import validate_env_var
36
 
37
 
38
- ElevenlabsVoiceName = Literal["Adam", "Antoni", "Rachel", "Matilda"]
39
-
40
-
41
- class ElevenLabsVoice(Enum):
42
- ADAM = ("Adam", "pNInz6obpgDQGcFmaJgB")
43
- ANTONI = ("Antoni", "ErXwobaYiN019PkySvjV")
44
- RACHEL = ("Rachel", "21m00Tcm4TlvDq8ikWAM")
45
- MATILDA = ("Matilda", "XrExE9yKIg1WjnnlVkGX")
46
-
47
- @property
48
- def voice_name(self) -> ElevenlabsVoiceName:
49
- """Returns the display name of the voice."""
50
- return self.value[0]
51
-
52
- @property
53
- def voice_id(self) -> str:
54
- """Returns the ElevenLabs voice ID."""
55
- return self.value[1]
56
-
57
-
58
  @dataclass(frozen=True)
59
  class ElevenLabsConfig:
60
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
61
 
62
  api_key: str = validate_env_var("ELEVENLABS_API_KEY")
63
- model_id: str = (
64
- "eleven_multilingual_v2" # ElevenLab's most emotionally expressive model
65
- )
66
- output_format: str = "mp3_44100_128" # Output format of the generated audio
67
 
68
  def __post_init__(self):
69
  # Validate that required attributes are set
70
  if not self.api_key:
71
  raise ValueError("ElevenLabs API key is not set.")
72
- if not self.model_id:
73
- raise ValueError("ElevenLabs Model ID is not set.")
74
- if not self.output_format:
75
- raise ValueError("ElevenLabs Output Format is not set.")
76
 
77
  @property
78
  def client(self) -> ElevenLabs:
@@ -84,16 +57,6 @@ class ElevenLabsConfig:
84
  """
85
  return ElevenLabs(api_key=self.api_key)
86
 
87
- @property
88
- def random_voice(self) -> ElevenLabsVoice:
89
- """
90
- Selects a random ElevenLabs voice.
91
-
92
- Returns:
93
- ElevenLabsVoice: A randomly selected voice enum member.
94
- """
95
- return random.choice(list(ElevenLabsVoice))
96
-
97
 
98
  class ElevenLabsError(Exception):
99
  """Custom exception for errors related to the ElevenLabs TTS API."""
@@ -132,6 +95,8 @@ def text_to_speech_with_elevenlabs(prompt: str, text: str) -> bytes:
132
  f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters."
133
  )
134
 
 
 
135
  try:
136
  # Synthesize speech using the ElevenLabs SDK
137
  response = elevenlabs_config.client.text_to_voice.create_previews(
@@ -145,7 +110,8 @@ def text_to_speech_with_elevenlabs(prompt: str, text: str) -> bytes:
145
  logger.error(msg)
146
  raise ElevenLabsError(message=msg)
147
 
148
- base64_audio = previews[0].audio_base64
 
149
  audio = base64.b64decode(base64_audio)
150
  return audio
151
 
 
20
  """
21
 
22
  # Standard Library Imports
23
+ import base64
24
  from dataclasses import dataclass
25
  from enum import Enum
26
  import logging
 
36
  from src.utils import validate_env_var
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  @dataclass(frozen=True)
40
  class ElevenLabsConfig:
41
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
42
 
43
  api_key: str = validate_env_var("ELEVENLABS_API_KEY")
 
 
 
 
44
 
45
  def __post_init__(self):
46
  # Validate that required attributes are set
47
  if not self.api_key:
48
  raise ValueError("ElevenLabs API key is not set.")
 
 
 
 
49
 
50
  @property
51
  def client(self) -> ElevenLabs:
 
57
  """
58
  return ElevenLabs(api_key=self.api_key)
59
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  class ElevenLabsError(Exception):
62
  """Custom exception for errors related to the ElevenLabs TTS API."""
 
95
  f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters."
96
  )
97
 
98
+ request_body = {"text": text, "voice_description": prompt}
99
+
100
  try:
101
  # Synthesize speech using the ElevenLabs SDK
102
  response = elevenlabs_config.client.text_to_voice.create_previews(
 
110
  logger.error(msg)
111
  raise ElevenLabsError(message=msg)
112
 
113
+ preview = random.choice(previews)
114
+ base64_audio = preview.audio_base_64
115
  audio = base64.b64decode(base64_audio)
116
  return audio
117
 
src/integrations/hume_api.py CHANGED
@@ -39,14 +39,14 @@ class HumeConfig:
39
  """Immutable configuration for interacting with the Hume TTS API."""
40
 
41
  api_key: str = validate_env_var("HUME_API_KEY")
42
- tts_endpoint_url: str = "https://test-api.hume.ai/v0/tts/octave"
43
  headers: dict = None
44
 
45
  def __post_init__(self):
46
  # Validate required attributes
47
  if not self.api_key:
48
  raise ValueError("Hume API key is not set.")
49
- if not self.tts_endpoint_url:
50
  raise ValueError("Hume TTS endpoint URL is not set.")
51
 
52
  # Set headers dynamically after validation
@@ -102,29 +102,25 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
102
  try:
103
  # Synthesize speech using the Hume TTS API
104
  response = requests.post(
105
- url=hume_config.tts_endpoint_url,
106
  headers=hume_config.headers,
107
  json=request_body,
108
  )
109
  response.raise_for_status()
110
- except requests.RequestException as re:
111
- logger.exception(f"Error communicating with Hume TTS API: {re}")
112
- raise HumeError(f"Error communicating with Hume TTS API: {re}") from re
113
-
114
- try:
115
- # Parse JSON response
116
  response_data = response.json()
117
- except ValueError as ve:
118
- logger.exception("Invalid JSON response from Hume TTS API")
119
- raise HumeError("Invalid JSON response from Hume TTS API") from ve
 
120
 
121
  try:
122
  # Safely extract the generation result from the response JSON
123
  generations = response_data.get("generations", [])
124
- if not generations or "audio" not in generations[0]:
125
  logger.error("Missing 'audio' data in the response.")
126
  raise HumeError("Missing audio data in response from Hume TTS API")
127
- base64_audio = generations[0]["audio"]
 
128
  # Decode base64 encoded audio
129
  audio = base64.b64decode(base64_audio)
130
  except (KeyError, TypeError, base64.binascii.Error) as ae:
 
39
  """Immutable configuration for interacting with the Hume TTS API."""
40
 
41
  api_key: str = validate_env_var("HUME_API_KEY")
42
+ url: str = "https://test-api.hume.ai/v0/tts/octave"
43
  headers: dict = None
44
 
45
  def __post_init__(self):
46
  # Validate required attributes
47
  if not self.api_key:
48
  raise ValueError("Hume API key is not set.")
49
+ if not self.url:
50
  raise ValueError("Hume TTS endpoint URL is not set.")
51
 
52
  # Set headers dynamically after validation
 
102
  try:
103
  # Synthesize speech using the Hume TTS API
104
  response = requests.post(
105
+ url=hume_config.url,
106
  headers=hume_config.headers,
107
  json=request_body,
108
  )
109
  response.raise_for_status()
 
 
 
 
 
 
110
  response_data = response.json()
111
+ except requests.RequestException as re:
112
+ request_error_msg = f"Error communicating with Hume TTS API: {re}"
113
+ logger.exception(request_error_msg)
114
+ raise HumeError(request_error_msg) from re
115
 
116
  try:
117
  # Safely extract the generation result from the response JSON
118
  generations = response_data.get("generations", [])
119
+ if not generations:
120
  logger.error("Missing 'audio' data in the response.")
121
  raise HumeError("Missing audio data in response from Hume TTS API")
122
+ generation = generations[0]
123
+ base64_audio = generation.get("audio")
124
  # Decode base64 encoded audio
125
  audio = base64.b64decode(base64_audio)
126
  except (KeyError, TypeError, base64.binascii.Error) as ae: