zach commited on
Commit
048c3fc
Β·
1 Parent(s): aad8b40

Move env var validation util to config, refactor to fix linting errors across project

Browse files
README.md CHANGED
@@ -39,6 +39,7 @@ Expressive TTS Arena/
39
  β”‚ β”œβ”€β”€ app.py # Entry file
40
  β”‚ β”œβ”€β”€ config.py # Global config and logger setup
41
  β”‚ β”œβ”€β”€ constants.py # Global constants
 
42
  β”‚ β”œβ”€β”€ theme.py # Custom Gradio Theme
43
  β”‚ └── utils.py # Utility functions
44
  β”œβ”€β”€ .env.example
 
39
  β”‚ β”œβ”€β”€ app.py # Entry file
40
  β”‚ β”œβ”€β”€ config.py # Global config and logger setup
41
  β”‚ β”œβ”€β”€ constants.py # Global constants
42
+ β”‚ β”œβ”€β”€ custom_types.py # Global custom types
43
  β”‚ β”œβ”€β”€ theme.py # Custom Gradio Theme
44
  β”‚ └── utils.py # Utility functions
45
  β”œβ”€β”€ .env.example
pyproject.toml CHANGED
@@ -29,7 +29,20 @@ line-length = 120
29
  src = ["src"]
30
 
31
  [tool.ruff.lint]
32
- ignore = ["D100", "D104", "FIX002", "TD002", "TD003"]
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  select = [
34
  "A",
35
  "ARG",
 
29
  src = ["src"]
30
 
31
  [tool.ruff.lint]
32
+ ignore = [
33
+ "B904",
34
+ "BLE001",
35
+ "D100",
36
+ "D104",
37
+ "EM101",
38
+ "EM102",
39
+ "FIX002",
40
+ "G004",
41
+ "PLR0913",
42
+ "PLR2004",
43
+ "TD002",
44
+ "TD003",
45
+ ]
46
  select = [
47
  "A",
48
  "ARG",
src/app.py CHANGED
@@ -9,26 +9,26 @@ Users can compare the outputs and vote for their favorite in an interactive UI.
9
  """
10
 
11
  # Standard Library Imports
12
- from concurrent.futures import ThreadPoolExecutor
13
  import time
 
14
  from typing import Tuple, Union
15
 
16
  # Third-Party Library Imports
17
  import gradio as gr
18
 
19
  # Local Application Imports
20
- from src.config import AUDIO_DIR, logger
21
  from src import constants
 
 
22
  from src.integrations import (
23
  AnthropicError,
24
  ElevenLabsError,
25
- generate_text_with_claude,
26
  HumeError,
 
27
  text_to_speech_with_elevenlabs,
28
  text_to_speech_with_hume,
29
  )
30
  from src.theme import CustomTheme
31
- from src.types import ComparisonType, OptionMap
32
  from src.utils import (
33
  choose_providers,
34
  create_shuffled_tts_options,
@@ -66,7 +66,7 @@ def generate_text(
66
  logger.info(f"Generated text ({len(generated_text)} characters).")
67
  return gr.update(value=generated_text), generated_text
68
  except AnthropicError as ae:
69
- logger.error(f"AnthropicError while generating text: {str(ae)}")
70
  raise gr.Error(
71
  f'There was an issue communicating with the Anthropic API: "{ae.message}"'
72
  )
@@ -94,7 +94,8 @@ def synthesize_speech(
94
  Args:
95
  character_description (str): The description of the character used for generating the voice.
96
  text (str): The text content to be synthesized into speech.
97
- generated_text_state (str): The previously generated text state, used to determine if the text has been modified.
 
98
 
99
  Returns:
100
  Tuple containing:
@@ -118,7 +119,7 @@ def synthesize_speech(
118
 
119
  # Select 2 TTS providers based on whether the text has been modified.
120
  text_modified = text != generated_text_state
121
- comparison_type, provider_a, provider_b = choose_providers(
122
  text_modified, character_description
123
  )
124
 
@@ -151,9 +152,9 @@ def synthesize_speech(
151
  generation_id_b, audio_b = future_audio_b.result()
152
 
153
  # Shuffle options so that placement of options in the UI will always be random
154
- options_map: OptionMap = create_shuffled_tts_options(
155
- provider_a, audio_a, generation_id_a, provider_b, audio_b, generation_id_b
156
- )
157
 
158
  option_a_audio = options_map["option_a"]["audio_file_path"]
159
  option_b_audio = options_map["option_b"]["audio_file_path"]
@@ -162,18 +163,17 @@ def synthesize_speech(
162
  gr.update(value=option_a_audio, visible=True, autoplay=True),
163
  gr.update(value=option_b_audio, visible=True),
164
  options_map,
165
- comparison_type,
166
  text_modified,
167
  text,
168
  character_description,
169
  )
170
  except ElevenLabsError as ee:
171
- logger.error(f"ElevenLabsError while synthesizing speech from text: {str(ee)}")
172
  raise gr.Error(
173
  f'There was an issue communicating with the Elevenlabs API: "{ee.message}"'
174
  )
175
  except HumeError as he:
176
- logger.error(f"HumeError while synthesizing speech from text: {str(he)}")
177
  raise gr.Error(
178
  f'There was an issue communicating with the Hume API: "{he.message}"'
179
  )
@@ -186,7 +186,6 @@ def vote(
186
  vote_submitted: bool,
187
  option_map: OptionMap,
188
  clicked_option_button: str,
189
- comparison_type: ComparisonType,
190
  text_modified: bool,
191
  character_description: str,
192
  text: str,
@@ -222,7 +221,6 @@ def vote(
222
  submit_voting_results(
223
  option_map,
224
  selected_option,
225
- comparison_type,
226
  text_modified,
227
  character_description,
228
  text,
@@ -272,7 +270,10 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
272
 
273
 
274
  def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
275
- """Builds the input section including the sample character description dropdown, character description input, and generate text button"""
 
 
 
276
  sample_character_description_dropdown = gr.Dropdown(
277
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
278
  label="Choose a sample character description",
@@ -298,7 +299,9 @@ def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
298
  def build_output_section() -> (
299
  Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
300
  ):
301
- """Builds the output section including text input, audio players, and vote buttons."""
 
 
302
  text_input = gr.Textbox(
303
  label="Input Text",
304
  placeholder="Enter or generate text for synthesis...",
@@ -348,11 +351,15 @@ def build_gradio_interface() -> gr.Blocks:
348
  gr.Markdown("# Expressive TTS Arena")
349
  gr.Markdown(
350
  """
351
- 1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide text and voice generation.
352
- 2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated text will appear in the input field automaticallyβ€”edit it if needed.
353
- 3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to two TTS APIs. Each API generates a voice and synthesizes speech in that voice.
 
 
 
354
  4. **Listen & compare**: Play both audio options and assess their expressiveness.
355
- 5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most expressive output.
 
356
  """
357
  )
358
 
@@ -384,8 +391,6 @@ def build_gradio_interface() -> gr.Blocks:
384
  # Track whether text that was used was generated or modified/custom
385
  text_modified_state = gr.State()
386
 
387
- # Track comparison type (which set of providers are being compared)
388
- comparison_type_state = gr.State()
389
  # Track option map (option A and option B are randomized)
390
  option_map_state = gr.State()
391
 
@@ -450,7 +455,6 @@ def build_gradio_interface() -> gr.Blocks:
450
  option_a_audio_player,
451
  option_b_audio_player,
452
  option_map_state,
453
- comparison_type_state,
454
  text_modified_state,
455
  text_state,
456
  character_description_state,
@@ -472,7 +476,6 @@ def build_gradio_interface() -> gr.Blocks:
472
  vote_submitted_state,
473
  option_map_state,
474
  vote_button_a,
475
- comparison_type_state,
476
  text_modified_state,
477
  character_description_state,
478
  text_state,
@@ -490,7 +493,6 @@ def build_gradio_interface() -> gr.Blocks:
490
  vote_submitted_state,
491
  option_map_state,
492
  vote_button_b,
493
- comparison_type_state,
494
  text_modified_state,
495
  character_description_state,
496
  text_state,
 
9
  """
10
 
11
  # Standard Library Imports
 
12
  import time
13
+ from concurrent.futures import ThreadPoolExecutor
14
  from typing import Tuple, Union
15
 
16
  # Third-Party Library Imports
17
  import gradio as gr
18
 
19
  # Local Application Imports
 
20
  from src import constants
21
+ from src.config import AUDIO_DIR, logger
22
+ from src.custom_types import ComparisonType, Option, OptionMap
23
  from src.integrations import (
24
  AnthropicError,
25
  ElevenLabsError,
 
26
  HumeError,
27
+ generate_text_with_claude,
28
  text_to_speech_with_elevenlabs,
29
  text_to_speech_with_hume,
30
  )
31
  from src.theme import CustomTheme
 
32
  from src.utils import (
33
  choose_providers,
34
  create_shuffled_tts_options,
 
66
  logger.info(f"Generated text ({len(generated_text)} characters).")
67
  return gr.update(value=generated_text), generated_text
68
  except AnthropicError as ae:
69
+ logger.error(f"AnthropicError while generating text: {ae!s}")
70
  raise gr.Error(
71
  f'There was an issue communicating with the Anthropic API: "{ae.message}"'
72
  )
 
94
  Args:
95
  character_description (str): The description of the character used for generating the voice.
96
  text (str): The text content to be synthesized into speech.
97
+ generated_text_state (str): The previously generated text state, used to determine if the text has
98
+ been modified.
99
 
100
  Returns:
101
  Tuple containing:
 
119
 
120
  # Select 2 TTS providers based on whether the text has been modified.
121
  text_modified = text != generated_text_state
122
+ provider_a, provider_b = choose_providers(
123
  text_modified, character_description
124
  )
125
 
 
152
  generation_id_b, audio_b = future_audio_b.result()
153
 
154
  # Shuffle options so that placement of options in the UI will always be random
155
+ option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
156
+ option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
157
+ options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
158
 
159
  option_a_audio = options_map["option_a"]["audio_file_path"]
160
  option_b_audio = options_map["option_b"]["audio_file_path"]
 
163
  gr.update(value=option_a_audio, visible=True, autoplay=True),
164
  gr.update(value=option_b_audio, visible=True),
165
  options_map,
 
166
  text_modified,
167
  text,
168
  character_description,
169
  )
170
  except ElevenLabsError as ee:
171
+ logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
172
  raise gr.Error(
173
  f'There was an issue communicating with the Elevenlabs API: "{ee.message}"'
174
  )
175
  except HumeError as he:
176
+ logger.error(f"HumeError while synthesizing speech from text: {he!s}")
177
  raise gr.Error(
178
  f'There was an issue communicating with the Hume API: "{he.message}"'
179
  )
 
186
  vote_submitted: bool,
187
  option_map: OptionMap,
188
  clicked_option_button: str,
 
189
  text_modified: bool,
190
  character_description: str,
191
  text: str,
 
221
  submit_voting_results(
222
  option_map,
223
  selected_option,
 
224
  text_modified,
225
  character_description,
226
  text,
 
270
 
271
 
272
  def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
273
+ """
274
+ Builds the input section including the sample character description dropdown, character
275
+ description input, and generate text button.
276
+ """
277
  sample_character_description_dropdown = gr.Dropdown(
278
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
279
  label="Choose a sample character description",
 
299
  def build_output_section() -> (
300
  Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
301
  ):
302
+ """
303
+ Builds the output section including text input, audio players, and vote buttons.
304
+ """
305
  text_input = gr.Textbox(
306
  label="Input Text",
307
  placeholder="Enter or generate text for synthesis...",
 
351
  gr.Markdown("# Expressive TTS Arena")
352
  gr.Markdown(
353
  """
354
+ 1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide
355
+ text and voice generation.
356
+ 2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated
357
+ text will appear in the input field automaticallyβ€”edit it if needed.
358
+ 3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to two
359
+ TTS APIs. Each API generates a voice and synthesizes speech in that voice.
360
  4. **Listen & compare**: Play both audio options and assess their expressiveness.
361
+ 5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most
362
+ expressive output.
363
  """
364
  )
365
 
 
391
  # Track whether text that was used was generated or modified/custom
392
  text_modified_state = gr.State()
393
 
 
 
394
  # Track option map (option A and option B are randomized)
395
  option_map_state = gr.State()
396
 
 
455
  option_a_audio_player,
456
  option_b_audio_player,
457
  option_map_state,
 
458
  text_modified_state,
459
  text_state,
460
  character_description_state,
 
476
  vote_submitted_state,
477
  option_map_state,
478
  vote_button_a,
 
479
  text_modified_state,
480
  character_description_state,
481
  text_state,
 
493
  vote_submitted_state,
494
  option_map_state,
495
  vote_button_b,
 
496
  text_modified_state,
497
  character_description_state,
498
  text_state,
src/config.py CHANGED
@@ -13,25 +13,20 @@ Key Features:
13
  # Standard Library Imports
14
  import logging
15
  import os
 
16
 
17
  # Third-Party Library Imports
18
  from dotenv import load_dotenv
19
 
20
-
21
  # Determine the environment (defaults to "dev" if not explicitly set)
22
  APP_ENV = os.getenv("APP_ENV", "dev").lower()
23
  if APP_ENV not in {"dev", "prod"}:
24
- print(f'Warning: Invalid APP_ENV "{APP_ENV}". Defaulting to "dev".')
25
  APP_ENV = "dev"
26
 
27
 
28
  # In development, load environment variables from .env file (not used in production)
29
- if APP_ENV == "dev":
30
- if os.path.exists(".env"):
31
- # Load environment variables
32
- load_dotenv(".env", override=True)
33
- else:
34
- print("Warning: .env file not found. Using system environment variables.")
35
 
36
 
37
  # Enable debug mode if in development (or if explicitly set in env variables)
@@ -47,10 +42,42 @@ logger.info(f'App running in "{APP_ENV}" mode.')
47
  logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
48
 
49
  if DEBUG:
50
- logger.debug(f"DEBUG mode enabled.")
51
 
52
 
53
  # Define the directory for audio files relative to the project root
54
- AUDIO_DIR = os.path.join(os.getcwd(), "static", "audio")
55
- os.makedirs(AUDIO_DIR, exist_ok=True)
56
  logger.info(f"Audio directory set to {AUDIO_DIR}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Standard Library Imports
14
  import logging
15
  import os
16
+ from pathlib import Path
17
 
18
  # Third-Party Library Imports
19
  from dotenv import load_dotenv
20
 
 
21
  # Determine the environment (defaults to "dev" if not explicitly set)
22
  APP_ENV = os.getenv("APP_ENV", "dev").lower()
23
  if APP_ENV not in {"dev", "prod"}:
 
24
  APP_ENV = "dev"
25
 
26
 
27
  # In development, load environment variables from .env file (not used in production)
28
+ if APP_ENV == "dev" and Path(".env").exists():
29
+ load_dotenv(".env", override=True)
 
 
 
 
30
 
31
 
32
  # Enable debug mode if in development (or if explicitly set in env variables)
 
42
  logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
43
 
44
  if DEBUG:
45
+ logger.debug("DEBUG mode enabled.")
46
 
47
 
48
  # Define the directory for audio files relative to the project root
49
+ AUDIO_DIR = Path.cwd() / "static" / "audio"
50
+ AUDIO_DIR.mkdir(parents=True, exist_ok=True)
51
  logger.info(f"Audio directory set to {AUDIO_DIR}")
52
+
53
+
54
+ def validate_env_var(var_name: str) -> str:
55
+ """
56
+ Validates that an environment variable is set and returns its value.
57
+
58
+ Args:
59
+ var_name (str): The name of the environment variable to validate.
60
+
61
+ Returns:
62
+ str: The value of the environment variable.
63
+
64
+ Raises:
65
+ ValueError: If the environment variable is not set.
66
+
67
+ Examples:
68
+ >>> import os
69
+ >>> os.environ["EXAMPLE_VAR"] = "example_value"
70
+ >>> validate_env_var("EXAMPLE_VAR")
71
+ 'example_value'
72
+
73
+ >>> validate_env_var("MISSING_VAR")
74
+ Traceback (most recent call last):
75
+ ...
76
+ ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
77
+ """
78
+ value = os.environ.get(var_name, "")
79
+ if not value:
80
+ raise ValueError(
81
+ f"{var_name} is not set. Please ensure it is defined in your environment variables."
82
+ )
83
+ return value
src/constants.py CHANGED
@@ -8,8 +8,10 @@ This module defines global constants used throughout the project.
8
  from typing import List
9
 
10
  # Third-Party Library Imports
11
- from src.types import ComparisonType, OptionKey, OptionLabel, TTSProviderName
12
 
 
 
13
 
14
  # UI constants
15
  HUME_AI: TTSProviderName = "Hume AI"
@@ -35,23 +37,28 @@ SELECT_OPTION_B: str = "Select Option B"
35
  # inspiration for generating creative text for expressive TTS, and generating novel voices.
36
  SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
37
  "πŸš€ Stranded Astronaut": (
38
- "A lone astronaut whose voice mirrors the silent vastness of spaceβ€”a low, steady tone imbued with isolation and quiet wonder. "
39
- "It carries the measured resolve of someone sending a final transmission, with an undercurrent of wistful melancholy."
 
40
  ),
41
  "πŸ“œ Timeless Poet": (
42
- "An ageless poet with a voice that flows like gentle verseβ€”a soft, reflective tone marked by deliberate pauses. "
43
- "It speaks with the measured cadence of classic sonnets, evoking both the fragile beauty of time and heartfelt introspection."
 
44
  ),
45
  "🐱 Whimsical Feline": (
46
- "A mischievous cat whose voice is playful yet mysteriousβ€”light, quick-witted, and infused with an enchanting purr. "
47
- "It hints at secret adventures and hidden charm, balancing exuberance with a subtle, smooth allure."
 
48
  ),
49
  "πŸ”₯ Revolutionary Orator": (
50
- "A defiant orator whose voice builds from quiet determination to passionate fervorβ€”a clear, commanding tone that resonates with conviction. "
51
- "It starts measured and resolute, then rises to a crescendo of fervor, punctuated by deliberate pauses that emphasize each rallying cry."
 
52
  ),
53
  "πŸ‘» Haunted Keeper": (
54
- "A solitary lighthouse keeper with a voice that carries the weight of forgotten stormsβ€”a soft, measured tone with an echo of sorrow. "
55
- "It speaks as if whispering long-held secrets in the dark, blending quiet melancholy with an air of enduring mystery."
 
56
  ),
57
  }
 
8
  from typing import List
9
 
10
  # Third-Party Library Imports
11
+ from src.custom_types import ComparisonType, OptionKey, OptionLabel, TTSProviderName
12
 
13
+ CLIENT_ERROR_CODE = 400
14
+ SERVER_ERROR_CODE = 500
15
 
16
  # UI constants
17
  HUME_AI: TTSProviderName = "Hume AI"
 
37
  # inspiration for generating creative text for expressive TTS, and generating novel voices.
38
  SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
39
  "πŸš€ Stranded Astronaut": (
40
+ "A lone astronaut whose voice mirrors the silent vastness of spaceβ€”a low, steady tone imbued "
41
+ "with isolation and quiet wonder. It carries the measured resolve of someone sending a final "
42
+ "transmission, with an undercurrent of wistful melancholy."
43
  ),
44
  "πŸ“œ Timeless Poet": (
45
+ "An ageless poet with a voice that flows like gentle verseβ€”a soft, reflective tone marked by "
46
+ "deliberate pauses. It speaks with the measured cadence of classic sonnets, evoking both the "
47
+ "fragile beauty of time and heartfelt introspection."
48
  ),
49
  "🐱 Whimsical Feline": (
50
+ "A mischievous cat whose voice is playful yet mysteriousβ€”light, quick-witted, and infused with "
51
+ "an enchanting purr. It hints at secret adventures and hidden charm, balancing exuberance with "
52
+ "a subtle, smooth allure."
53
  ),
54
  "πŸ”₯ Revolutionary Orator": (
55
+ "A defiant orator whose voice builds from quiet determination to passionate fervorβ€”a clear, "
56
+ "commanding tone that resonates with conviction. It starts measured and resolute, then rises "
57
+ "to a crescendo of fervor, punctuated by deliberate pauses that emphasize each rallying cry."
58
  ),
59
  "πŸ‘» Haunted Keeper": (
60
+ "A solitary lighthouse keeper with a voice that carries the weight of forgotten stormsβ€”a soft, "
61
+ "measured tone with an echo of sorrow. It speaks as if whispering long-held secrets in the dark, "
62
+ "blending quiet melancholy with an air of enduring mystery."
63
  ),
64
  }
src/custom_types.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- types.py
3
 
4
  This module defines custom types for the application.
5
  """
 
1
  """
2
+ custom_types.py
3
 
4
  This module defines custom types for the application.
5
  """
src/integrations/anthropic_api.py CHANGED
@@ -19,25 +19,26 @@ Functions:
19
  """
20
 
21
  # Standard Library Imports
22
- from dataclasses import dataclass
23
  import logging
 
24
  from typing import List, Optional, Union
25
 
26
  # Third-Party Library Imports
27
- from anthropic import APIError, Anthropic
28
  from anthropic.types import Message, ModelParam, TextBlock
29
- from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
30
 
31
  # Local Application Imports
32
- from src.config import logger
33
- from src.utils import truncate_text, validate_env_var
 
34
 
35
 
36
  @dataclass(frozen=True)
37
  class AnthropicConfig:
38
  """Immutable configuration for interacting with the Anthropic API."""
39
 
40
- api_key: str = validate_env_var("ANTHROPIC_API_KEY")
41
  model: ModelParam = "claude-3-5-sonnet-latest"
42
  max_tokens: int = 150
43
  system_prompt: Optional[str] = (
@@ -47,13 +48,16 @@ class AnthropicConfig:
47
  def __post_init__(self):
48
  # Validate that required attributes are set
49
  if not self.api_key:
50
- raise ValueError("Anthropic API key is not set.")
 
51
  if not self.model:
52
  raise ValueError("Anthropic Model is not set.")
53
  if not self.max_tokens:
54
  raise ValueError("Anthropic Max Tokens is not set.")
55
  if self.system_prompt is None:
56
- system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
 
 
57
  CRITICAL LENGTH CONSTRAINTS:
58
 
59
  Maximum length: {self.max_tokens} tokens (approximately 400 characters)
@@ -84,7 +88,8 @@ Opening hook (50-75 characters)
84
  Emotional journey (200-250 characters)
85
  Resolution (75-100 characters)
86
 
87
- MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of where you are in the narrative.
 
88
  Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
89
  object.__setattr__(self, "system_prompt", system_prompt)
90
 
@@ -110,12 +115,13 @@ Remember: A shorter, complete response is ALWAYS better than a longer, truncated
110
  Returns:
111
  str: The prompt to be passed to the Anthropic API.
112
  """
113
- prompt = (
114
  f"Character Description: {character_description}\n\n"
115
- "Based on the above character description, please generate a line of dialogue that captures the character's unique personality, emotional depth, and distinctive tone. "
116
- "The response should sound like something the character would naturally say, reflecting their background and emotional state, and be fully developed for text-to-speech synthesis."
 
 
117
  )
118
- return prompt
119
 
120
 
121
  class AnthropicError(Exception):
@@ -198,12 +204,15 @@ def generate_text_with_claude(character_description: str) -> str:
198
  return str(blocks or "No content generated.")
199
 
200
  except Exception as e:
201
- if isinstance(e, APIError):
202
- if e.status_code >= 400 and e.status_code < 500:
203
- raise UnretryableAnthropicError(
204
- message=f"\"{e.body['error']['message']}\"",
205
- original_exception=e,
206
- ) from e
 
 
 
207
  raise AnthropicError(
208
  message=(f"{e.message}"),
209
  original_exception=e,
 
19
  """
20
 
21
  # Standard Library Imports
 
22
  import logging
23
+ from dataclasses import dataclass
24
  from typing import List, Optional, Union
25
 
26
  # Third-Party Library Imports
27
+ from anthropic import Anthropic, APIError
28
  from anthropic.types import Message, ModelParam, TextBlock
29
+ from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
+ from src.config import logger, validate_env_var
33
+ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
+ from src.utils import truncate_text
35
 
36
 
37
  @dataclass(frozen=True)
38
  class AnthropicConfig:
39
  """Immutable configuration for interacting with the Anthropic API."""
40
 
41
+ api_key: Optional[str] = None
42
  model: ModelParam = "claude-3-5-sonnet-latest"
43
  max_tokens: int = 150
44
  system_prompt: Optional[str] = (
 
48
  def __post_init__(self):
49
  # Validate that required attributes are set
50
  if not self.api_key:
51
+ api_key = validate_env_var("ANTHROPIC_API_KEY")
52
+ object.__setattr__(self, "api_key", api_key)
53
  if not self.model:
54
  raise ValueError("Anthropic Model is not set.")
55
  if not self.max_tokens:
56
  raise ValueError("Anthropic Max Tokens is not set.")
57
  if self.system_prompt is None:
58
+ system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech
59
+ synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
60
+
61
  CRITICAL LENGTH CONSTRAINTS:
62
 
63
  Maximum length: {self.max_tokens} tokens (approximately 400 characters)
 
88
  Emotional journey (200-250 characters)
89
  Resolution (75-100 characters)
90
 
91
+ MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of where you
92
+ are in the narrative.
93
  Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
94
  object.__setattr__(self, "system_prompt", system_prompt)
95
 
 
115
  Returns:
116
  str: The prompt to be passed to the Anthropic API.
117
  """
118
+ return (
119
  f"Character Description: {character_description}\n\n"
120
+ "Based on the above character description, please generate a line of dialogue that captures the "
121
+ "character's unique personality, emotional depth, and distinctive tone. The response should sound "
122
+ "like something the character would naturally say, reflecting their background and emotional state, "
123
+ "and be fully developed for text-to-speech synthesis."
124
  )
 
125
 
126
 
127
  class AnthropicError(Exception):
 
204
  return str(blocks or "No content generated.")
205
 
206
  except Exception as e:
207
+ if (
208
+ isinstance(e, APIError)
209
+ and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
210
+ ):
211
+ raise UnretryableAnthropicError(
212
+ message=f"\"{e.body['error']['message']}\"",
213
+ original_exception=e,
214
+ ) from e
215
+
216
  raise AnthropicError(
217
  message=(f"{e.message}"),
218
  original_exception=e,
src/integrations/elevenlabs_api.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
  elevenlabs_api.py
3
 
4
- This file defines the interaction with the ElevenLabs text-to-speech (TTS) API using the ElevenLabs Python SDK.
5
- It includes functionality for API request handling and processing API responses.
6
 
7
  Key Features:
8
  - Encapsulates all logic related to the ElevenLabs TTS API.
@@ -20,32 +20,34 @@ Functions:
20
  """
21
 
22
  # Standard Library Imports
23
- from dataclasses import dataclass
24
  import logging
25
  import random
 
26
  from typing import Optional, Tuple
27
 
28
  # Third-Party Library Imports
29
  from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
30
  from elevenlabs.core import ApiError
31
- from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
32
 
33
  # Local Application Imports
34
- from src.config import logger
35
- from src.utils import save_base64_audio_to_file, validate_env_var
 
36
 
37
 
38
  @dataclass(frozen=True)
39
  class ElevenLabsConfig:
40
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
41
 
42
- api_key: str = validate_env_var("ELEVENLABS_API_KEY")
43
  output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
44
 
45
  def __post_init__(self):
46
  # Validate that required attributes are set
47
  if not self.api_key:
48
- raise ValueError("ElevenLabs API key is not set.")
 
49
 
50
  @property
51
  def client(self) -> ElevenLabs:
@@ -97,7 +99,8 @@ def text_to_speech_with_elevenlabs(
97
 
98
  Returns:
99
  Tuple[None, str]: A tuple containing:
100
- - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity across TTS integrations
 
101
  - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
102
 
103
  Raises:
@@ -132,12 +135,15 @@ def text_to_speech_with_elevenlabs(
132
  return None, audio_file_path
133
 
134
  except Exception as e:
135
- if isinstance(e, ApiError):
136
- if e.status_code >= 400 and e.status_code < 500:
137
- raise UnretryableElevenLabsError(
138
- message=f"{e.body['detail']['message']}",
139
- original_exception=e,
140
- ) from e
 
 
 
141
  raise ElevenLabsError(
142
  message=f"{e}",
143
  original_exception=e,
 
1
  """
2
  elevenlabs_api.py
3
 
4
+ This file defines the interaction with the ElevenLabs text-to-speech (TTS) API using the
5
+ ElevenLabs Python SDK. It includes functionality for API request handling and processing API responses.
6
 
7
  Key Features:
8
  - Encapsulates all logic related to the ElevenLabs TTS API.
 
20
  """
21
 
22
  # Standard Library Imports
 
23
  import logging
24
  import random
25
+ from dataclasses import dataclass
26
  from typing import Optional, Tuple
27
 
28
  # Third-Party Library Imports
29
  from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
30
  from elevenlabs.core import ApiError
31
+ from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
32
 
33
  # Local Application Imports
34
+ from src.config import logger, validate_env_var
35
+ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
36
+ from src.utils import save_base64_audio_to_file
37
 
38
 
39
  @dataclass(frozen=True)
40
  class ElevenLabsConfig:
41
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
42
 
43
+ api_key: Optional[str] = None
44
  output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
45
 
46
  def __post_init__(self):
47
  # Validate that required attributes are set
48
  if not self.api_key:
49
+ api_key = validate_env_var("ELEVENLABS_API_KEY")
50
+ object.__setattr__(self, "api_key", api_key)
51
 
52
  @property
53
  def client(self) -> ElevenLabs:
 
99
 
100
  Returns:
101
  Tuple[None, str]: A tuple containing:
102
+ - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
103
+ across TTS integrations
104
  - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
105
 
106
  Raises:
 
135
  return None, audio_file_path
136
 
137
  except Exception as e:
138
+ if (
139
+ isinstance(e, ApiError)
140
+ and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
141
+ ):
142
+ raise UnretryableElevenLabsError(
143
+ message=f"{e.body['detail']['message']}",
144
+ original_exception=e,
145
+ ) from e
146
+
147
  raise ElevenLabsError(
148
  message=f"{e}",
149
  original_exception=e,
src/integrations/hume_api.py CHANGED
@@ -19,21 +19,19 @@ Functions:
19
  """
20
 
21
  # Standard Library Imports
22
- from dataclasses import dataclass
23
  import logging
24
- import os
25
- import random
26
  from typing import Any, Dict, Literal, Optional, Tuple, Union
27
 
28
  # Third-Party Library Imports
29
  import requests
30
  from requests.exceptions import HTTPError
31
- from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
32
 
33
  # Local Application Imports
34
- from src.config import logger
35
- from src.utils import save_base64_audio_to_file, validate_env_var
36
-
37
 
38
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
39
  """ Support audio file formats for the Hume TTS API"""
@@ -43,7 +41,7 @@ HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
43
  class HumeConfig:
44
  """Immutable configuration for interacting with the Hume TTS API."""
45
 
46
- api_key: str = validate_env_var("HUME_API_KEY")
47
  url: str = "https://test-api.hume.ai/v0/tts/octave"
48
  headers: dict = None
49
  file_format: HumeSupportedFileFormat = "mp3"
@@ -51,7 +49,8 @@ class HumeConfig:
51
  def __post_init__(self):
52
  # Validate required attributes
53
  if not self.api_key:
54
- raise ValueError("Hume API key is not set.")
 
55
  if not self.url:
56
  raise ValueError("Hume TTS endpoint URL is not set.")
57
  if not self.file_format:
@@ -118,17 +117,19 @@ def text_to_speech_with_hume(
118
 
119
  Returns:
120
  Union[Tuple[str, str], Tuple[str, str, str, str]]:
121
- - If num_generations == 1: A tuple in the form (generation_a_id, audio_a_path).
122
- - If num_generations == 2: A tuple in the form (generation_a_id, audio_a_path, generation_b_id, audio_b_path).
123
 
124
  Raises:
125
  ValueError: If num_generations is not 1 or 2.
126
  HumeError: If there is an error communicating with the Hume TTS API or parsing its response.
127
  UnretryableHumeError: If a client-side HTTP error (status code in the 4xx range) is encountered.
128
- Exception: Any other exceptions raised during the request or processing will be wrapped and re-raised as HumeError.
 
129
  """
130
  logger.debug(
131
- f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. Text length: {len(text)} characters."
 
132
  )
133
 
134
  if num_generations < 1 or num_generations > 2:
@@ -170,12 +171,19 @@ def text_to_speech_with_hume(
170
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
171
 
172
  except Exception as e:
173
- if isinstance(e, HTTPError):
174
- if e.response.status_code >= 400 and e.response.status_code < 500:
175
- raise UnretryableHumeError(
176
- message=f"{e.response.text}", original_exception=e
177
- ) from e
178
- raise HumeError(message=f"{e}", original_exception=e) from e
 
 
 
 
 
 
 
179
 
180
 
181
  def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
 
19
  """
20
 
21
  # Standard Library Imports
 
22
  import logging
23
+ from dataclasses import dataclass
 
24
  from typing import Any, Dict, Literal, Optional, Tuple, Union
25
 
26
  # Third-Party Library Imports
27
  import requests
28
  from requests.exceptions import HTTPError
29
+ from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
+ from src.config import logger, validate_env_var
33
+ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
+ from src.utils import save_base64_audio_to_file
35
 
36
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
37
  """ Support audio file formats for the Hume TTS API"""
 
41
  class HumeConfig:
42
  """Immutable configuration for interacting with the Hume TTS API."""
43
 
44
+ api_key: Optional[str] = None
45
  url: str = "https://test-api.hume.ai/v0/tts/octave"
46
  headers: dict = None
47
  file_format: HumeSupportedFileFormat = "mp3"
 
49
  def __post_init__(self):
50
  # Validate required attributes
51
  if not self.api_key:
52
+ api_key = validate_env_var("HUME_API_KEY")
53
+ object.__setattr__(self, "api_key", api_key)
54
  if not self.url:
55
  raise ValueError("Hume TTS endpoint URL is not set.")
56
  if not self.file_format:
 
117
 
118
  Returns:
119
  Union[Tuple[str, str], Tuple[str, str, str, str]]:
120
+ - If num_generations == 1: (generation_a_id, audio_a_path).
121
+ - If num_generations == 2: (generation_a_id, audio_a_path, generation_b_id, audio_b_path).
122
 
123
  Raises:
124
  ValueError: If num_generations is not 1 or 2.
125
  HumeError: If there is an error communicating with the Hume TTS API or parsing its response.
126
  UnretryableHumeError: If a client-side HTTP error (status code in the 4xx range) is encountered.
127
+ Exception: Any other exceptions raised during the request or processing will be wrapped and
128
+ re-raised as HumeError.
129
  """
130
  logger.debug(
131
+ f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. "
132
+ f"Text length: {len(text)} characters."
133
  )
134
 
135
  if num_generations < 1 or num_generations > 2:
 
171
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
172
 
173
  except Exception as e:
174
+ if (
175
+ isinstance(e, HTTPError)
176
+ and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
177
+ ):
178
+ raise UnretryableHumeError(
179
+ message=f"{e.response.text}",
180
+ original_exception=e,
181
+ ) from e
182
+
183
+ raise HumeError(
184
+ message=f"{e}",
185
+ original_exception=e,
186
+ ) from e
187
 
188
 
189
  def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
src/utils.py CHANGED
@@ -8,15 +8,15 @@ These functions provide reusable logic to simplify code in other modules.
8
  # Standard Library Imports
9
  import base64
10
  import json
11
- import os
12
  import random
13
  import time
 
14
  from typing import Tuple
15
 
16
  # Local Application Imports
17
  from src import constants
18
  from src.config import AUDIO_DIR, logger
19
- from src.types import (
20
  ComparisonType,
21
  Option,
22
  OptionKey,
@@ -56,38 +56,6 @@ def truncate_text(text: str, max_length: int = 50) -> str:
56
  return text[:max_length] + ("..." if is_truncated else "")
57
 
58
 
59
- def validate_env_var(var_name: str) -> str:
60
- """
61
- Validates that an environment variable is set and returns its value.
62
-
63
- Args:
64
- var_name (str): The name of the environment variable to validate.
65
-
66
- Returns:
67
- str: The value of the environment variable.
68
-
69
- Raises:
70
- ValueError: If the environment variable is not set.
71
-
72
- Examples:
73
- >>> import os
74
- >>> os.environ["EXAMPLE_VAR"] = "example_value"
75
- >>> validate_env_var("EXAMPLE_VAR")
76
- 'example_value'
77
-
78
- >>> validate_env_var("MISSING_VAR")
79
- Traceback (most recent call last):
80
- ...
81
- ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
82
- """
83
- value = os.environ.get(var_name, "")
84
- if not value:
85
- raise ValueError(
86
- f"{var_name} is not set. Please ensure it is defined in your environment variables."
87
- )
88
- return value
89
-
90
-
91
  def validate_character_description_length(character_description: str) -> None:
92
  """
93
  Validates that a voice description is within specified minimum and maximum length limits.
@@ -114,16 +82,20 @@ def validate_character_description_length(character_description: str) -> None:
114
 
115
  if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
116
  raise ValueError(
117
- f"Your character description is too short. Please enter at least {constants.CHARACTER_DESCRIPTION_MIN_LENGTH} characters. "
 
118
  f"(Current length: {character_description_length})"
119
  )
120
  if character_description_length > constants.CHARACTER_DESCRIPTION_MAX_LENGTH:
121
  raise ValueError(
122
- f"Your character description is too long. Please limit it to {constants.CHARACTER_DESCRIPTION_MAX_LENGTH} characters. "
 
123
  f"(Current length: {character_description_length})"
124
  )
 
 
125
  logger.debug(
126
- f"Character description length validation passed for character_description: {truncate_text(stripped_character_description)}"
127
  )
128
 
129
 
@@ -145,28 +117,29 @@ def delete_files_older_than(directory: str, minutes: int = 30) -> None:
145
  now = time.time()
146
  # Convert the minutes threshold to seconds.
147
  cutoff = now - (minutes * 60)
 
148
 
149
  # Iterate over all files in the directory.
150
- for filename in os.listdir(directory):
151
- file_path = os.path.join(directory, filename)
152
- file_mod_time = os.path.getmtime(file_path)
153
- # If the file's modification time is older than the cutoff, delete it.
154
- if file_mod_time < cutoff:
155
- try:
156
- os.remove(file_path)
157
- print(f"Deleted: {file_path}")
158
- except Exception as e:
159
- print(f"Error deleting {file_path}: {e}")
160
 
161
 
162
  def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
163
  """
164
  Decode a base64-encoded audio string and write the resulting binary data to a file
165
  within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
166
- file all files within the directory which are more than 30 minutes old are deleted.
167
- This function verifies the file was created, logs the absolute and relative file
168
- paths, and returns a path relative to the current working directory (which is what
169
- Gradio requires to serve static files).
170
 
171
  Args:
172
  base64_audio (str): The base64-encoded string representing the audio data.
@@ -178,38 +151,38 @@ def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
178
  str: The relative file path to the saved audio file.
179
 
180
  Raises:
181
- Exception: Propagates any exceptions raised during the decoding or file I/O operations.
182
  """
183
  # Decode the base64-encoded audio into binary data.
184
  audio_bytes = base64.b64decode(base64_audio)
185
 
186
- # Construct the full absolute file path within the AUDIO_DIR directory.
187
- file_path = os.path.join(AUDIO_DIR, filename)
188
 
189
- # Delete all audio files older than 30 minutes before writing new audio file.
190
  num_minutes = 30
191
  delete_files_older_than(AUDIO_DIR, num_minutes)
192
 
193
  # Write the binary audio data to the file.
194
- with open(file_path, "wb") as audio_file:
195
  audio_file.write(audio_bytes)
196
 
197
  # Verify that the file was created.
198
- if not os.path.exists(file_path):
199
  raise FileNotFoundError(f"Audio file was not created at {file_path}")
200
 
201
- # Compute a relative path for Gradio to serve (relative to the project root).
202
- relative_path = os.path.relpath(file_path, os.getcwd())
203
  logger.debug(f"Audio file absolute path: {file_path}")
204
  logger.debug(f"Audio file relative path: {relative_path}")
205
 
206
- return relative_path
207
 
208
 
209
  def choose_providers(
210
  text_modified: bool,
211
  character_description: str,
212
- ) -> Tuple[ComparisonType, TTSProviderName, TTSProviderName]:
213
  """
214
  Select two TTS providers based on whether the text has been modified.
215
 
@@ -236,69 +209,48 @@ def choose_providers(
236
  else random.choice(constants.TTS_PROVIDERS)
237
  )
238
 
239
- match provider_b:
240
- case constants.HUME_AI:
241
- comparison_type = constants.HUME_TO_HUME
242
- case constants.ELEVENLABS:
243
- comparison_type = constants.HUME_TO_ELEVENLABS
244
-
245
- return comparison_type, provider_a, provider_b
246
 
247
 
248
- def create_shuffled_tts_options(
249
- provider_a: TTSProviderName,
250
- audio_a: str,
251
- generation_id_a: str,
252
- provider_b: TTSProviderName,
253
- audio_b: str,
254
- generation_id_b: str,
255
- ) -> OptionMap:
256
  """
257
  Create and shuffle TTS generation options.
258
 
259
- This function creates two Option instances from the provided TTS details, shuffles them,
260
- then extracts the providers, audio file paths, and generation IDs from the shuffled options,
261
- and finally maps the options to an OptionMap.
262
 
263
  Args:
264
- provider_a (TTSProviderName): The TTS provider for the first generation.
265
- audio_a (str): The relative file path to the audio file for the first generation.
266
- generation_id_a (str): The generation ID for the first generation.
267
- provider_b (TTSProviderName): The TTS provider for the second generation.
268
- audio_b (str): The relative file path to the audio file for the second generation.
269
- generation_id_b (str): The generation ID for the second generation.
270
 
271
  Returns:
272
- options_map (OptionMap): Mapping of TTS output options.
 
273
  """
274
  # Create a list of Option instances for the available providers.
275
- options = [
276
- Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a),
277
- Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b),
278
- ]
279
 
280
  # Randomly shuffle the list of options.
281
  random.shuffle(options)
282
 
283
  # Unpack the two options.
284
- option_a, option_b = options
285
 
286
  # Build a mapping from option constants to the corresponding providers.
287
- options_map: OptionMap = {
288
  "option_a": {
289
- "provider": option_a.provider,
290
- "generation_id": option_a.generation_id,
291
- "audio_file_path": option_a.audio,
292
  },
293
  "option_b": {
294
- "provider": option_b.provider,
295
- "generation_id": option_b.generation_id,
296
- "audio_file_path": option_b.audio,
297
  },
298
  }
299
 
300
- return options_map
301
-
302
 
303
  def determine_selected_option(
304
  selected_option_button: str,
@@ -324,11 +276,38 @@ def determine_selected_option(
324
 
325
  return selected_option, other_option
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  def submit_voting_results(
329
  option_map: OptionMap,
330
  selected_option: str,
331
- comparison_type: ComparisonType,
332
  text_modified: bool,
333
  character_description: str,
334
  text: str,
@@ -347,12 +326,16 @@ def submit_voting_results(
347
  Returns:
348
  VotingResults: The constructed voting results dictionary.
349
  """
 
 
 
 
350
  voting_results: VotingResults = {
351
  "comparison_type": comparison_type,
352
  "winning_provider": option_map[selected_option]["provider"],
353
  "winning_option": selected_option,
354
- "option_a_provider": option_map[constants.OPTION_A_KEY]["provider"],
355
- "option_b_provider": option_map[constants.OPTION_B_KEY]["provider"],
356
  "option_a_generation_id": option_map[constants.OPTION_A_KEY]["generation_id"],
357
  "option_b_generation_id": option_map[constants.OPTION_B_KEY]["generation_id"],
358
  "voice_description": character_description,
 
8
  # Standard Library Imports
9
  import base64
10
  import json
 
11
  import random
12
  import time
13
+ from pathlib import Path
14
  from typing import Tuple
15
 
16
  # Local Application Imports
17
  from src import constants
18
  from src.config import AUDIO_DIR, logger
19
+ from src.custom_types import (
20
  ComparisonType,
21
  Option,
22
  OptionKey,
 
56
  return text[:max_length] + ("..." if is_truncated else "")
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def validate_character_description_length(character_description: str) -> None:
60
  """
61
  Validates that a voice description is within specified minimum and maximum length limits.
 
82
 
83
  if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
84
  raise ValueError(
85
+ f"Your character description is too short. Please enter at least "
86
+ f"{constants.CHARACTER_DESCRIPTION_MIN_LENGTH} characters. "
87
  f"(Current length: {character_description_length})"
88
  )
89
  if character_description_length > constants.CHARACTER_DESCRIPTION_MAX_LENGTH:
90
  raise ValueError(
91
+ f"Your character description is too long. Please limit it to "
92
+ f"{constants.CHARACTER_DESCRIPTION_MAX_LENGTH} characters. "
93
  f"(Current length: {character_description_length})"
94
  )
95
+
96
+ truncated_description = truncate_text(stripped_character_description)
97
  logger.debug(
98
+ f"Character description length validation passed for character_description: {truncated_description}"
99
  )
100
 
101
 
 
117
  now = time.time()
118
  # Convert the minutes threshold to seconds.
119
  cutoff = now - (minutes * 60)
120
+ dir_path = Path(directory)
121
 
122
  # Iterate over all files in the directory.
123
+ for file_path in dir_path.iterdir():
124
+ if file_path.is_file():
125
+ file_mod_time = file_path.stat().st_mtime
126
+ # If the file's modification time is older than the cutoff, delete it.
127
+ if file_mod_time < cutoff:
128
+ try:
129
+ file_path.unlink()
130
+ logger.info(f"Deleted: {file_path}")
131
+ except Exception as e:
132
+ logger.exception(f"Error deleting {file_path}: {e}")
133
 
134
 
135
  def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
136
  """
137
  Decode a base64-encoded audio string and write the resulting binary data to a file
138
  within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
139
+ file, all files within the directory that are more than 30 minutes old are deleted.
140
+ This function verifies the file was created, logs both the absolute and relative
141
+ file paths, and returns a path relative to the current working directory
142
+ (as required by Gradio for serving static files).
143
 
144
  Args:
145
  base64_audio (str): The base64-encoded string representing the audio data.
 
151
  str: The relative file path to the saved audio file.
152
 
153
  Raises:
154
+ FileNotFoundError: If the audio file was not created.
155
  """
156
  # Decode the base64-encoded audio into binary data.
157
  audio_bytes = base64.b64decode(base64_audio)
158
 
159
+ # Construct the full absolute file path within the AUDIO_DIR directory using Path.
160
+ file_path = Path(AUDIO_DIR) / filename
161
 
162
+ # Delete all audio files older than 30 minutes before writing the new audio file.
163
  num_minutes = 30
164
  delete_files_older_than(AUDIO_DIR, num_minutes)
165
 
166
  # Write the binary audio data to the file.
167
+ with file_path.open("wb") as audio_file:
168
  audio_file.write(audio_bytes)
169
 
170
  # Verify that the file was created.
171
+ if not file_path.exists():
172
  raise FileNotFoundError(f"Audio file was not created at {file_path}")
173
 
174
+ # Compute a relative path for Gradio to serve (relative to the current working directory).
175
+ relative_path = file_path.relative_to(Path.cwd())
176
  logger.debug(f"Audio file absolute path: {file_path}")
177
  logger.debug(f"Audio file relative path: {relative_path}")
178
 
179
+ return str(relative_path)
180
 
181
 
182
  def choose_providers(
183
  text_modified: bool,
184
  character_description: str,
185
+ ) -> Tuple[TTSProviderName, TTSProviderName]:
186
  """
187
  Select two TTS providers based on whether the text has been modified.
188
 
 
209
  else random.choice(constants.TTS_PROVIDERS)
210
  )
211
 
212
+ return provider_a, provider_b
 
 
 
 
 
 
213
 
214
 
215
+ def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
 
 
 
 
 
 
 
216
  """
217
  Create and shuffle TTS generation options.
218
 
219
+ This function accepts two TTS generation options, shuffles them randomly,
220
+ and returns an OptionMap with keys 'option_a' and 'option_b' corresponding
221
+ to the shuffled options.
222
 
223
  Args:
224
+ option_a (Option): The first TTS generation option.
225
+ option_b (Option): The second TTS generation option.
 
 
 
 
226
 
227
  Returns:
228
+ OptionMap: A mapping of shuffled TTS options, where each option includes
229
+ its provider, audio file path, and generation ID.
230
  """
231
  # Create a list of Option instances for the available providers.
232
+ options = [option_a, option_b]
 
 
 
233
 
234
  # Randomly shuffle the list of options.
235
  random.shuffle(options)
236
 
237
  # Unpack the two options.
238
+ shuffled_option_a, shuffled_option_b = options
239
 
240
  # Build a mapping from option constants to the corresponding providers.
241
+ return {
242
  "option_a": {
243
+ "provider": shuffled_option_a.provider,
244
+ "generation_id": shuffled_option_a.generation_id,
245
+ "audio_file_path": shuffled_option_a.audio,
246
  },
247
  "option_b": {
248
+ "provider": shuffled_option_b.provider,
249
+ "generation_id": shuffled_option_b.generation_id,
250
+ "audio_file_path": shuffled_option_b.audio,
251
  },
252
  }
253
 
 
 
254
 
255
  def determine_selected_option(
256
  selected_option_button: str,
 
276
 
277
  return selected_option, other_option
278
 
279
+ def determine_comparison_type(
280
+ provider_a: TTSProviderName,
281
+ provider_b: TTSProviderName
282
+ ) -> ComparisonType:
283
+ """
284
+ Determine the comparison type based on the given TTS provider names.
285
+
286
+ If both providers are HUME_AI, the comparison type is HUME_TO_HUME.
287
+ If either provider is ELEVENLABS, the comparison type is HUME_TO_ELEVENLABS.
288
+
289
+ Args:
290
+ provider_a (TTSProviderName): The first TTS provider.
291
+ provider_b (TTSProviderName): The second TTS provider.
292
+
293
+ Returns:
294
+ ComparisonType: The determined comparison type.
295
+
296
+ Raises:
297
+ ValueError: If the combination of providers is not recognized.
298
+ """
299
+ if provider_a == constants.HUME_AI and provider_b == constants.HUME_AI:
300
+ return constants.HUME_TO_HUME
301
+
302
+ if constants.ELEVENLABS in (provider_a, provider_b):
303
+ return constants.HUME_TO_ELEVENLABS
304
+
305
+ raise ValueError(f"Invalid provider combination: {provider_a}, {provider_b}")
306
+
307
 
308
  def submit_voting_results(
309
  option_map: OptionMap,
310
  selected_option: str,
 
311
  text_modified: bool,
312
  character_description: str,
313
  text: str,
 
326
  Returns:
327
  VotingResults: The constructed voting results dictionary.
328
  """
329
+ provider_a: TTSProviderName = option_map[constants.OPTION_A_KEY]["provider"]
330
+ provider_b: TTSProviderName = option_map[constants.OPTION_B_KEY]["provider"]
331
+ comparison_type: ComparisonType = determine_comparison_type(provider_a, provider_b)
332
+
333
  voting_results: VotingResults = {
334
  "comparison_type": comparison_type,
335
  "winning_provider": option_map[selected_option]["provider"],
336
  "winning_option": selected_option,
337
+ "option_a_provider": provider_a,
338
+ "option_b_provider": provider_b,
339
  "option_a_generation_id": option_map[constants.OPTION_A_KEY]["generation_id"],
340
  "option_b_generation_id": option_map[constants.OPTION_B_KEY]["generation_id"],
341
  "voice_description": character_description,