Spaces:

HumeAI
/

expressive-tts-arena

Running

Zachary Greathouse twitchard commited on Mar 24

Commit

9ed181c

unverified ·

1 Parent(s): 548169b

Zg/add openai (#18)

* Add OpenAI python SDK to dependencies

* Fix Anthropic clean API Error message.

* Update constants and custom types associated with TTS providers to include OpenAI

* Add OpenAI integration

* Update logic for selecting providers, add OpenAI tts to UI

* Fix typo in openai_api.py

* Update docstrings in openai_api.py

* Update leaderboard results query to include OpenAI results

* Add citation

* Adjust padding in UI components

* Adjust padding in UI components in citation

* Add transitive dependency override for sounddevice in pyproject.toml

* remove sounddevice

* Add warning toast for custom text inputs

* Improve leaderboard results query to account for zero records, and update to only include relevant comparison types for each provider.

---------

Co-authored-by: twitchard <[email protected]>

Files changed (11) hide show

pyproject.toml +6 -2
src/config.py +4 -2
src/constants.py +15 -1
src/custom_types.py +7 -2
src/database/crud.py +32 -22
src/frontend.py +117 -37
src/integrations/__init__.py +4 -0
src/integrations/anthropic_api.py +2 -2
src/integrations/openai_api.py +192 -0
src/utils.py +32 -12
uv.lock +59 -3

pyproject.toml CHANGED Viewed

@@ -12,13 +12,17 @@ dependencies = [
     "gradio>=5.18.0",
     "greenlet>=2.0.0",
     "hume>=0.7.8",
     "python-dotenv>=1.0.1",
     "sqlalchemy>=2.0.0",
     "tenacity>=9.0.0",
 ]
 [tool.uv]
-override-dependencies = ["aiofiles==24.1.0"]
 dev-dependencies = [
     "mypy>=1.15.0",
     "pre-commit>=4.1.0",
@@ -84,7 +88,7 @@ select = [
     "TID",
     "W",
 ]
-per-file-ignores = { "src/constants.py" = ["E501"] }
 [tool.ruff.lint.pycodestyle]
 max-line-length = 120

     "gradio>=5.18.0",
     "greenlet>=2.0.0",
     "hume>=0.7.8",
+    "openai>=1.68.0",
     "python-dotenv>=1.0.1",
     "sqlalchemy>=2.0.0",
     "tenacity>=9.0.0",
 ]
 [tool.uv]
+override-dependencies = [
+    "aiofiles==24.1.0",
+    "sounddevice; sys_platform == 'never'",
+]
 dev-dependencies = [
     "mypy>=1.15.0",
     "pre-commit>=4.1.0",
     "TID",
     "W",
 ]
+per-file-ignores = { "src/constants.py" = ["E501"], "src/frontend.py" = ["E501"] }
 [tool.ruff.lint.pycodestyle]
 max-line-length = 120

src/config.py CHANGED Viewed

@@ -22,7 +22,7 @@ from dotenv import load_dotenv
 # Local Application Imports
 if TYPE_CHECKING:
-    from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
 logger: logging.Logger = logging.getLogger("expressive_tts_arena")
@@ -37,6 +37,7 @@ class Config:
     anthropic_config: "AnthropicConfig"
     hume_config: "HumeConfig"
     elevenlabs_config: "ElevenLabsConfig"
     @classmethod
     def get(cls) -> "Config":
@@ -79,7 +80,7 @@ class Config:
         if debug:
             logger.debug("DEBUG mode enabled.")
-        from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
         return Config(
             app_env=app_env,
@@ -89,4 +90,5 @@ class Config:
             anthropic_config=AnthropicConfig(),
             hume_config=HumeConfig(),
             elevenlabs_config=ElevenLabsConfig(),
         )

 # Local Application Imports
 if TYPE_CHECKING:
+    from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig, OpenAIConfig
 logger: logging.Logger = logging.getLogger("expressive_tts_arena")
     anthropic_config: "AnthropicConfig"
     hume_config: "HumeConfig"
     elevenlabs_config: "ElevenLabsConfig"
+    openai_config: "OpenAIConfig"
     @classmethod
     def get(cls) -> "Config":
         if debug:
             logger.debug("DEBUG mode enabled.")
+        from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig, OpenAIConfig
         return Config(
             app_env=app_env,
             anthropic_config=AnthropicConfig(),
             hume_config=HumeConfig(),
             elevenlabs_config=ElevenLabsConfig(),
+            openai_config=OpenAIConfig(),
         )

src/constants.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Dict, List
 # Third-Party Library Imports
 from src.custom_types import (
     ComparisonType,
     OptionKey,
     OptionLabel,
     TTSProviderName,
@@ -23,8 +24,9 @@ RATE_LIMIT_ERROR_CODE = 429
 # UI constants
 HUME_AI: TTSProviderName = "Hume AI"
 ELEVENLABS: TTSProviderName = "ElevenLabs"
-TTS_PROVIDERS: List[TTSProviderName] = ["Hume AI", "ElevenLabs"]
 TTS_PROVIDER_LINKS = {
     "Hume AI": {
         "provider_link": "https://hume.ai/",
@@ -33,11 +35,17 @@ TTS_PROVIDER_LINKS = {
     "ElevenLabs": {
         "provider_link": "https://elevenlabs.io/",
         "model_link": "https://elevenlabs.io/blog/rvg",
     }
 }
 HUME_TO_HUME: ComparisonType = "Hume AI - Hume AI"
 HUME_TO_ELEVENLABS: ComparisonType = "Hume AI - ElevenLabs"
 CHARACTER_DESCRIPTION_MIN_LENGTH: int = 20
 CHARACTER_DESCRIPTION_MAX_LENGTH: int = 400
@@ -162,3 +170,9 @@ META_TAGS: List[Dict[str, str]] = [
     }
 ]

 # Third-Party Library Imports
 from src.custom_types import (
     ComparisonType,
+    LeaderboardEntry,
     OptionKey,
     OptionLabel,
     TTSProviderName,
 # UI constants
 HUME_AI: TTSProviderName = "Hume AI"
 ELEVENLABS: TTSProviderName = "ElevenLabs"
+OPENAI: TTSProviderName = "OpenAI"
+TTS_PROVIDERS: List[TTSProviderName] = ["Hume AI", "ElevenLabs", "OpenAI"]
 TTS_PROVIDER_LINKS = {
     "Hume AI": {
         "provider_link": "https://hume.ai/",
     "ElevenLabs": {
         "provider_link": "https://elevenlabs.io/",
         "model_link": "https://elevenlabs.io/blog/rvg",
+    },
+    "OpenAI": {
+        "provider_link": "https://openai.com/",
+        "model_link": "https://platform.openai.com/docs/models/gpt-4o-mini-tts",
     }
 }
 HUME_TO_HUME: ComparisonType = "Hume AI - Hume AI"
 HUME_TO_ELEVENLABS: ComparisonType = "Hume AI - ElevenLabs"
+HUME_TO_OPENAI: ComparisonType = "Hume AI - OpenAI"
+OPENAI_TO_ELEVENLABS: ComparisonType = "OpenAI - ElevenLabs"
 CHARACTER_DESCRIPTION_MIN_LENGTH: int = 20
 CHARACTER_DESCRIPTION_MAX_LENGTH: int = 400
     }
 ]
+# Reflects and empty leaderboard state
+DEFAULT_LEADERBOARD: List[LeaderboardEntry] = [
+    LeaderboardEntry("1", "", "", "0%", "0"),
+    LeaderboardEntry("2", "", "", "0%", "0"),
+    LeaderboardEntry("3", "", "", "0%", "0"),
+]

src/custom_types.py CHANGED Viewed

@@ -7,11 +7,16 @@ This module defines custom types for the application.
 # Standard Library Imports
 from typing import List, Literal, NamedTuple, Optional, TypedDict
-TTSProviderName = Literal["Hume AI", "ElevenLabs"]
 """TTSProviderName represents the allowed provider names for TTS services."""
-ComparisonType = Literal["Hume AI - Hume AI", "Hume AI - ElevenLabs"]
 """Comparison type denoting which providers are compared."""

 # Standard Library Imports
 from typing import List, Literal, NamedTuple, Optional, TypedDict
+TTSProviderName = Literal["Hume AI", "ElevenLabs", "OpenAI"]
 """TTSProviderName represents the allowed provider names for TTS services."""
+ComparisonType = Literal[
+    "Hume AI - Hume AI",
+    "Hume AI - ElevenLabs",
+    "Hume AI - OpenAI",
+    "OpenAI - ElevenLabs"
+]
 """Comparison type denoting which providers are compared."""

src/database/crud.py CHANGED Viewed

@@ -12,6 +12,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 # Local Application Imports
 from src.config import logger
 from src.custom_types import LeaderboardEntry, LeaderboardTableEntries, VotingResults
 from src.database.models import VoteResult
@@ -72,8 +73,8 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
     """
     Fetches voting statistics from the database to populate a leaderboard.
-    This function calculates voting statistics for TTS providers, excluding Hume-to-Hume
-    comparisons, and returns data structured for a leaderboard display.
     Args:
         db (AsyncSession): The SQLAlchemy async database session.
@@ -82,46 +83,54 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
         LeaderboardTableEntries: A list of LeaderboardEntry objects containing rank,
                                 provider name, model name, win rate, and total votes.
     """
-    default_leaderboard = [
-        LeaderboardEntry("1", "", "", "0%", "0"),
-        LeaderboardEntry("2", "", "", "0%", "0")
-    ]
     try:
         query = text(
             """
-            WITH provider_stats AS (
-                -- Get wins for Hume AI
                 SELECT
                     'Hume AI' as provider,
                     COUNT(*) as total_comparisons,
                     SUM(CASE WHEN winning_provider = 'Hume AI' THEN 1 ELSE 0 END) as wins
                 FROM vote_results
-                WHERE comparison_type != 'Hume AI - Hume AI'
                 UNION ALL
-                -- Get wins for ElevenLabs
                 SELECT
                     'ElevenLabs' as provider,
                     COUNT(*) as total_comparisons,
                     SUM(CASE WHEN winning_provider = 'ElevenLabs' THEN 1 ELSE 0 END) as wins
                 FROM vote_results
-                WHERE comparison_type != 'Hume AI - Hume AI'
             )
             SELECT
-                provider,
                 CASE
-                    WHEN provider = 'Hume AI' THEN 'Octave'
-                    WHEN provider = 'ElevenLabs' THEN 'Voice Design'
                 END as model,
                 CASE
-                    WHEN total_comparisons > 0 THEN ROUND((wins * 100.0 / total_comparisons)::numeric, 2)
                     ELSE 0
                 END as win_rate,
-                wins as total_votes
-            FROM provider_stats
-            ORDER BY win_rate DESC;
             """
         )
@@ -143,13 +152,14 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
         # If no data was found, return default entries
         if not leaderboard_data:
-            return default_leaderboard
         return leaderboard_data
     except SQLAlchemyError as e:
         logger.error(f"Database error while fetching leaderboard stats: {e}")
-        return default_leaderboard
     except Exception as e:
         logger.error(f"Unexpected error while fetching leaderboard stats: {e}")
-        return default_leaderboard

 # Local Application Imports
 from src.config import logger
+from src.constants import DEFAULT_LEADERBOARD
 from src.custom_types import LeaderboardEntry, LeaderboardTableEntries, VotingResults
 from src.database.models import VoteResult
     """
     Fetches voting statistics from the database to populate a leaderboard.
+    This function calculates voting statistics for TTS providers, using only the relevant
+    comparison types for each provider, and returns data structured for a leaderboard display.
     Args:
         db (AsyncSession): The SQLAlchemy async database session.
         LeaderboardTableEntries: A list of LeaderboardEntry objects containing rank,
                                 provider name, model name, win rate, and total votes.
     """
     try:
         query = text(
             """
+            WITH all_providers AS (
+                SELECT provider FROM (VALUES ('Hume AI'), ('ElevenLabs'), ('OpenAI')) AS p(provider)
+            ),
+            provider_stats AS (
                 SELECT
                     'Hume AI' as provider,
                     COUNT(*) as total_comparisons,
                     SUM(CASE WHEN winning_provider = 'Hume AI' THEN 1 ELSE 0 END) as wins
                 FROM vote_results
+                WHERE comparison_type IN ('Hume AI - ElevenLabs', 'Hume AI - OpenAI')
                 UNION ALL
                 SELECT
                     'ElevenLabs' as provider,
                     COUNT(*) as total_comparisons,
                     SUM(CASE WHEN winning_provider = 'ElevenLabs' THEN 1 ELSE 0 END) as wins
                 FROM vote_results
+                WHERE comparison_type IN ('Hume AI - ElevenLabs', 'OpenAI - ElevenLabs')
+                UNION ALL
+                SELECT
+                    'OpenAI' as provider,
+                    COUNT(*) as total_comparisons,
+                    SUM(CASE WHEN winning_provider = 'OpenAI' THEN 1 ELSE 0 END) as wins
+                FROM vote_results
+                WHERE comparison_type IN ('Hume AI - OpenAI', 'OpenAI - ElevenLabs')
             )
             SELECT
+                p.provider,
                 CASE
+                    WHEN p.provider = 'Hume AI' THEN 'Octave'
+                    WHEN p.provider = 'ElevenLabs' THEN 'Voice Design'
+                    WHEN p.provider = 'OpenAI' THEN 'gpt-4o-mini-tts'
                 END as model,
                 CASE
+                    WHEN COALESCE(ps.total_comparisons, 0) > 0
+                    THEN ROUND((COALESCE(ps.wins, 0) * 100.0 / COALESCE(ps.total_comparisons, 1))::numeric, 2)
                     ELSE 0
                 END as win_rate,
+                COALESCE(ps.wins, 0) as total_votes
+            FROM all_providers p
+            LEFT JOIN provider_stats ps ON p.provider = ps.provider
+            ORDER BY win_rate DESC, total_votes DESC;
             """
         )
         # If no data was found, return default entries
         if not leaderboard_data:
+            return DEFAULT_LEADERBOARD
         return leaderboard_data
     except SQLAlchemyError as e:
         logger.error(f"Database error while fetching leaderboard stats: {e}")
+        return DEFAULT_LEADERBOARD
     except Exception as e:
         logger.error(f"Unexpected error while fetching leaderboard stats: {e}")
+        return DEFAULT_LEADERBOARD

src/frontend.py CHANGED Viewed

@@ -13,7 +13,7 @@ import asyncio
 import hashlib
 import json
 import time
-from typing import List, Tuple
 # Third-Party Library Imports
 import gradio as gr
@@ -27,15 +27,17 @@ from src.integrations import (
     AnthropicError,
     ElevenLabsError,
     HumeError,
     generate_text_with_claude,
     text_to_speech_with_elevenlabs,
     text_to_speech_with_hume,
 )
 from src.utils import (
     create_shuffled_tts_options,
     determine_selected_option,
     get_leaderboard_data,
-    get_random_provider,
     submit_voting_results,
     validate_character_description_length,
     validate_text_length,
@@ -52,40 +54,40 @@ class Frontend:
         # leaderboard update state
         self._leaderboard_data: List[List[str]] = [[]]
-        self._leaderboard_cache_hash = None
-        self._last_leaderboard_update_time = 0
         self._min_refresh_interval = 30
     async def _update_leaderboard_data(self, force: bool = False) -> bool:
         """
         Fetches the latest leaderboard data only if needed based on cache and time constraints.
         Args:
             force (bool): If True, bypass the time-based throttling.
         Returns:
             bool: True if the leaderboard was updated, False otherwise.
         """
         current_time = time.time()
         time_since_last_update = current_time - self._last_leaderboard_update_time
         # Skip update if it's been less than min_refresh_interval seconds and not forced
         if not force and time_since_last_update < self._min_refresh_interval:
             logger.debug(f"Skipping leaderboard update: last updated {time_since_last_update:.1f}s ago.")
             return False
         # Fetch the latest data
         latest_leaderboard_data = await get_leaderboard_data(self.db_session_maker)
         # Generate a hash of the new data to check if it's changed
         data_str = json.dumps(str(latest_leaderboard_data))
         data_hash = hashlib.md5(data_str.encode()).hexdigest()
         # Check if the data has changed
         if data_hash == self._leaderboard_cache_hash and not force:
             logger.debug("Leaderboard data unchanged since last fetch.")
             return False
         # Update the cache and timestamp
         self._leaderboard_data = latest_leaderboard_data
         self._leaderboard_cache_hash = data_hash
@@ -125,6 +127,24 @@ class Frontend:
             logger.error(f"Text Generation Failed: Unexpected error while generating text: {e!s}")
             raise gr.Error("Failed to generate text. Please try again shortly.")
     async def _synthesize_speech(
         self,
         character_description: str,
@@ -135,9 +155,7 @@ class Frontend:
         Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
         This function generates TTS outputs using different providers based on the input text and its modification
-        state. Depending on the selected providers, it may:
-          - Synthesize one Hume and one ElevenLabs output (50% chance), or
-          - Synthesize two Hume outputs (50% chance).
         The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
         Additional metadata such as the comparison type, generation IDs, and state information are also returned.
@@ -150,8 +168,8 @@ class Frontend:
         Returns:
             Tuple containing:
-                - dict: Update for the first audio player (with autoplay enabled).
-                - dict: Update for the second audio player.
                 - OptionMap: A mapping of option constants to their corresponding TTS providers.
                 - bool: Flag indicating whether the text was modified.
                 - str: The original text that was synthesized.
@@ -169,22 +187,19 @@ class Frontend:
             raise gr.Error(str(ve))
         text_modified = text != generated_text_state
-        provider_a = constants.HUME_AI # always compare with Hume
-        provider_b = get_random_provider(text_modified)
         tts_provider_funcs = {
             constants.HUME_AI: text_to_speech_with_hume,
             constants.ELEVENLABS: text_to_speech_with_elevenlabs,
         }
-        if provider_b not in tts_provider_funcs:
-            raise ValueError(f"Unsupported provider: {provider_b}")
         try:
             logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
             # Create two tasks for concurrent execution
-            task_a = text_to_speech_with_hume(character_description, text, self.config)
             task_b = tts_provider_funcs[provider_b](character_description, text, self.config)
             # Await both tasks concurrently using asyncio.gather()
@@ -204,12 +219,15 @@ class Frontend:
                 character_description,
                 True,
             )
-        except ElevenLabsError as ee:
-            logger.error(f"Synthesis failed with ElevenLabsError during TTS generation: {ee!s}")
-            raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
         except HumeError as he:
             logger.error(f"Synthesis failed with HumeError during TTS generation: {he!s}")
             raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
         except Exception as e:
             logger.error(f"Synthesis failed with an unexpected error during TTS generation: {e!s}")
             raise gr.Error("An unexpected error occurred. Please try again shortly.")
@@ -243,7 +261,7 @@ class Frontend:
         Returns:
             A tuple of:
-            - A boolean indicating if the vote was accepted.
             - A dict update for hiding vote button A.
             - A dict update for hiding vote button B.
             - A dict update for showing vote result A textbox.
@@ -330,13 +348,12 @@ class Frontend:
         # Only return an update if the data changed or force=True
         if data_updated:
             return gr.update(value=self._leaderboard_data)
-        else:
-            return gr.skip()
     async def _handle_tab_select(self, evt: gr.SelectData):
         """
         Handles tab selection events and refreshes the leaderboard if the Leaderboard tab is selected.
         Args:
             evt (gr.SelectData): Event data containing information about the selected tab
@@ -431,7 +448,7 @@ class Frontend:
         Builds the Title section
         """
         gr.HTML(
-            """
             <div class="title-container">
                 <h1>Expressive TTS Arena</h1>
                 <div class="social-links">
@@ -468,9 +485,9 @@ class Frontend:
         with gr.Row():
             with gr.Column(scale=5):
                 gr.HTML(
-                    """
                     <h2 class="tab-header">📋 Instructions</h2>
-                    <ol>
                         <li>
                             Select a sample character, or input a custom character description and click
                             <strong>"Generate Text"</strong>, to generate your text input.
@@ -487,7 +504,8 @@ class Frontend:
                             <strong>"Select Option B"</strong>.
                         </li>
                     </ol>
-                    """
                 )
             randomize_all_button = gr.Button(
                 "🎲 Randomize All",
@@ -726,6 +744,13 @@ class Frontend:
             ],
         )
         # "Synthesize Speech" button click event handler chain:
         # 1. Disable components in the UI
         # 2. Reset UI state for audio players and voting results
@@ -854,15 +879,16 @@ class Frontend:
         with gr.Row():
             with gr.Column(scale=5):
                 gr.HTML(
-                    """
                     <h2 class="tab-header">🏆 Leaderboard</h2>
-                    <p>
                         This leaderboard presents community voting results for different TTS providers, showing which
                         ones users found more expressive and natural-sounding. The win rate reflects how often each
                         provider was selected as the preferred option in head-to-head comparisons. Click the refresh
                         button to see the most up-to-date voting results.
                     </p>
-                    """
                 )
             refresh_button = gr.Button(
                 "↻ Refresh",
@@ -883,10 +909,64 @@ class Frontend:
                 elem_id="leaderboard-table"
             )
         # Wrapper for the async refresh function
         async def async_refresh_handler():
             return await self._refresh_leaderboard(force=True)
         # Handler to re-enable the button after a refresh
         def reenable_button():
             time.sleep(3) # wait 3 seconds before enabling to prevent excessive data fetching

 import hashlib
 import json
 import time
+from typing import List, Optional, Tuple
 # Third-Party Library Imports
 import gradio as gr
     AnthropicError,
     ElevenLabsError,
     HumeError,
+    OpenAIError,
     generate_text_with_claude,
     text_to_speech_with_elevenlabs,
     text_to_speech_with_hume,
+    text_to_speech_with_openai,
 )
 from src.utils import (
     create_shuffled_tts_options,
     determine_selected_option,
     get_leaderboard_data,
+    get_random_providers,
     submit_voting_results,
     validate_character_description_length,
     validate_text_length,
         # leaderboard update state
         self._leaderboard_data: List[List[str]] = [[]]
+        self._leaderboard_cache_hash: Optional[str] = None
+        self._last_leaderboard_update_time: float = 0.0
         self._min_refresh_interval = 30
     async def _update_leaderboard_data(self, force: bool = False) -> bool:
         """
         Fetches the latest leaderboard data only if needed based on cache and time constraints.
         Args:
             force (bool): If True, bypass the time-based throttling.
         Returns:
             bool: True if the leaderboard was updated, False otherwise.
         """
         current_time = time.time()
         time_since_last_update = current_time - self._last_leaderboard_update_time
         # Skip update if it's been less than min_refresh_interval seconds and not forced
         if not force and time_since_last_update < self._min_refresh_interval:
             logger.debug(f"Skipping leaderboard update: last updated {time_since_last_update:.1f}s ago.")
             return False
         # Fetch the latest data
         latest_leaderboard_data = await get_leaderboard_data(self.db_session_maker)
         # Generate a hash of the new data to check if it's changed
         data_str = json.dumps(str(latest_leaderboard_data))
         data_hash = hashlib.md5(data_str.encode()).hexdigest()
         # Check if the data has changed
         if data_hash == self._leaderboard_cache_hash and not force:
             logger.debug("Leaderboard data unchanged since last fetch.")
             return False
         # Update the cache and timestamp
         self._leaderboard_data = latest_leaderboard_data
         self._leaderboard_cache_hash = data_hash
             logger.error(f"Text Generation Failed: Unexpected error while generating text: {e!s}")
             raise gr.Error("Failed to generate text. Please try again shortly.")
+    def _warn_user_about_custom_text(self, text: str, generated_text: str) -> None:
+        """
+        Shows a warning to the user if they have modified the generated text.
+        When users edit the generated text instead of using it as-is, only Hume Octave
+        outputs will be generated for comparison rather than comparing against other
+        providers. This function displays a warning to inform users of this limitation.
+        Args:
+            text (str): The current text that will be used for synthesis.
+            generated_text (str): The original text that was generated by the system.
+        Returns:
+            None: This function displays a warning but does not return any value.
+        """
+        if text != generated_text:
+            gr.Warning("When custom text is used, only Hume Octave outputs are generated.")
     async def _synthesize_speech(
         self,
         character_description: str,
         Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
         This function generates TTS outputs using different providers based on the input text and its modification
+        state.
         The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
         Additional metadata such as the comparison type, generation IDs, and state information are also returned.
         Returns:
             Tuple containing:
+                - gr.Audio: Update for the first audio player (with autoplay enabled).
+                - gr.Audio: Update for the second audio player.
                 - OptionMap: A mapping of option constants to their corresponding TTS providers.
                 - bool: Flag indicating whether the text was modified.
                 - str: The original text that was synthesized.
             raise gr.Error(str(ve))
         text_modified = text != generated_text_state
+        provider_a, provider_b = get_random_providers(text_modified)
         tts_provider_funcs = {
             constants.HUME_AI: text_to_speech_with_hume,
+            constants.OPENAI: text_to_speech_with_openai,
             constants.ELEVENLABS: text_to_speech_with_elevenlabs,
         }
         try:
             logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
             # Create two tasks for concurrent execution
+            task_a = tts_provider_funcs[provider_a](character_description, text, self.config)
             task_b = tts_provider_funcs[provider_b](character_description, text, self.config)
             # Await both tasks concurrently using asyncio.gather()
                 character_description,
                 True,
             )
         except HumeError as he:
             logger.error(f"Synthesis failed with HumeError during TTS generation: {he!s}")
             raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
+        except OpenAIError as oe:
+            logger.error(f"Synthesis failed with OpenAIError during TTS generation: {oe!s}")
+            raise gr.Error(f'There was an issue communicating with the OpenAI API: "{oe.message}"')
+        except ElevenLabsError as ee:
+            logger.error(f"Synthesis failed with ElevenLabsError during TTS generation: {ee!s}")
+            raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
         except Exception as e:
             logger.error(f"Synthesis failed with an unexpected error during TTS generation: {e!s}")
             raise gr.Error("An unexpected error occurred. Please try again shortly.")
         Returns:
             A tuple of:
+            - bool: A boolean indicating if the vote was accepted.
             - A dict update for hiding vote button A.
             - A dict update for hiding vote button B.
             - A dict update for showing vote result A textbox.
         # Only return an update if the data changed or force=True
         if data_updated:
             return gr.update(value=self._leaderboard_data)
+        return gr.skip()
     async def _handle_tab_select(self, evt: gr.SelectData):
         """
         Handles tab selection events and refreshes the leaderboard if the Leaderboard tab is selected.
         Args:
             evt (gr.SelectData): Event data containing information about the selected tab
         Builds the Title section
         """
         gr.HTML(
+            value="""
             <div class="title-container">
                 <h1>Expressive TTS Arena</h1>
                 <div class="social-links">
         with gr.Row():
             with gr.Column(scale=5):
                 gr.HTML(
+                    value="""
                     <h2 class="tab-header">📋 Instructions</h2>
+                    <ol style="padding-left: 8px;">
                         <li>
                             Select a sample character, or input a custom character description and click
                             <strong>"Generate Text"</strong>, to generate your text input.
                             <strong>"Select Option B"</strong>.
                         </li>
                     </ol>
+                    """,
+                    padding=False,
                 )
             randomize_all_button = gr.Button(
                 "🎲 Randomize All",
             ],
         )
+        # "Text Input" blur event handler
+        text_input.blur(
+            fn=self._warn_user_about_custom_text,
+            inputs=[text_input, generated_text_state],
+            outputs=[],
+        )
         # "Synthesize Speech" button click event handler chain:
         # 1. Disable components in the UI
         # 2. Reset UI state for audio players and voting results
         with gr.Row():
             with gr.Column(scale=5):
                 gr.HTML(
+                    value="""
                     <h2 class="tab-header">🏆 Leaderboard</h2>
+                    <p style="padding-left: 8px;">
                         This leaderboard presents community voting results for different TTS providers, showing which
                         ones users found more expressive and natural-sounding. The win rate reflects how often each
                         provider was selected as the preferred option in head-to-head comparisons. Click the refresh
                         button to see the most up-to-date voting results.
                     </p>
+                    """,
+                    padding=False,
                 )
             refresh_button = gr.Button(
                 "↻ Refresh",
                 elem_id="leaderboard-table"
             )
+        with gr.Accordion(label="Citation", open=False):
+            with gr.Column(variant="panel"):
+                with gr.Column(variant="panel"):
+                    gr.HTML(
+                        value="""
+                        <h2>Citation</h2>
+                        <p style="padding: 0 8px;">
+                            When referencing this leaderboard or its dataset in academic publications, please cite:
+                        </p>
+                        """,
+                        padding=False,
+                    )
+                    gr.Markdown(
+                        value="""
+                        **BibTeX**
+                        ```BibTeX
+                        @misc{expressive-tts-arena,
+                            title = {Expressive TTS Arena: An Open Platform for Evaluating Text-to-Speech Expressiveness by Human Preference},
+                            author = {Alan Cowen, Zachary Greathouse, Richard Marmorstein, Jeremy Hadfield},
+                            year = {2025},
+                            publisher = {Hugging Face},
+                            howpublished = {\\url{https://huggingface.co/spaces/HumeAI/expressive-tts-arena}}
+                        }
+                        ```
+                        """
+                    )
+                    gr.HTML(
+                        value="""
+                        <h2>Terms of Use</h2>
+                        <p style="padding: 0 8px;">
+                            Users are required to agree to the following terms before using the service:
+                        </p>
+                        <p style="padding: 0 8px;">
+                            All generated audio clips are provided for research and evaluation purposes only.
+                            The audio content may not be redistributed or used for commercial purposes without
+                            explicit permission. Users should not upload any private or personally identifiable
+                            information. Please report any bugs, issues, or concerns to our
+                            <a href="https://discord.com/invite/humeai" target="_blank" class="provider-link">
+                                Discord community
+                            </a>.
+                        </p>
+                        """,
+                        padding=False,
+                    )
+                    gr.HTML(
+                        value="""
+                        <h2>Acknowledgements</h2>
+                        <p style="padding: 0 8px;">
+                            We thank all participants who contributed their votes to help build this leaderboard.
+                        </p>
+                        """,
+                        padding=False,
+                    )
         # Wrapper for the async refresh function
         async def async_refresh_handler():
             return await self._refresh_leaderboard(force=True)
         # Handler to re-enable the button after a refresh
         def reenable_button():
             time.sleep(3) # wait 3 seconds before enabling to prevent excessive data fetching

src/integrations/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .anthropic_api import AnthropicConfig, AnthropicError, generate_text_with_claude
 from .elevenlabs_api import ElevenLabsConfig, ElevenLabsError, text_to_speech_with_elevenlabs
 from .hume_api import HumeConfig, HumeError, text_to_speech_with_hume
 __all__ = [
     "AnthropicConfig",
@@ -9,7 +10,10 @@ __all__ = [
     "ElevenLabsError",
     "HumeConfig",
     "HumeError",
     "generate_text_with_claude",
     "text_to_speech_with_elevenlabs",
     "text_to_speech_with_hume",
 ]

 from .anthropic_api import AnthropicConfig, AnthropicError, generate_text_with_claude
 from .elevenlabs_api import ElevenLabsConfig, ElevenLabsError, text_to_speech_with_elevenlabs
 from .hume_api import HumeConfig, HumeError, text_to_speech_with_hume
+from .openai_api import OpenAIConfig, OpenAIError, text_to_speech_with_openai
 __all__ = [
     "AnthropicConfig",
     "ElevenLabsError",
     "HumeConfig",
     "HumeError",
+    "OpenAIConfig",
+    "OpenAIError",
     "generate_text_with_claude",
     "text_to_speech_with_elevenlabs",
     "text_to_speech_with_hume",
+    "text_to_speech_with_openai",
 ]

src/integrations/anthropic_api.py CHANGED Viewed

@@ -23,7 +23,7 @@ from tenacity import after_log, before_log, retry, retry_if_exception, stop_afte
 # Local Application Imports
 from src.config import Config, logger
-from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
 PROMPT_TEMPLATE: str = """
@@ -246,7 +246,7 @@ def _extract_anthropic_error_message(e: APIError) -> str:
     Returns:
         str: A clean, user-friendly error message suitable for display to end users.
     """
-    clean_message = "An unknown error has occurred. Please try again later."
     if hasattr(e, 'body') and isinstance(e.body, dict):
         error_body = e.body

 # Local Application Imports
 from src.config import Config, logger
+from src.constants import CLIENT_ERROR_CODE, GENERIC_API_ERROR_MESSAGE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
 PROMPT_TEMPLATE: str = """
     Returns:
         str: A clean, user-friendly error message suitable for display to end users.
     """
+    clean_message = GENERIC_API_ERROR_MESSAGE
     if hasattr(e, 'body') and isinstance(e.body, dict):
         error_body = e.body

src/integrations/openai_api.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+openai_api.py
+This file defines the interaction with the OpenAI text-to-speech (TTS) API using the
+OpenAI Python SDK. It includes functionality for API request handling and processing API responses.
+Key Features:
+- Encapsulates all logic related to the OpenAI TTS API.
+- Implements retry logic using Tenacity for handling transient API errors.
+- Handles received audio and processes it for playback on the web.
+- Provides detailed logging for debugging and error tracking.
+- Utilizes robust error handling (EAFP) to validate API responses.
+"""
+# Standard Library Imports
+import logging
+import random
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal, Tuple, Union
+# Third-Party Library Imports
+from openai import APIError, AsyncOpenAI
+from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_fixed
+# Local Application Imports
+from src.config import Config, logger
+from src.constants import CLIENT_ERROR_CODE, GENERIC_API_ERROR_MESSAGE, SERVER_ERROR_CODE
+from src.utils import validate_env_var
+@dataclass(frozen=True)
+class OpenAIConfig:
+    """Immutable configuration for interacting with the OpenAI TTS API."""
+    api_key: str = field(init=False)
+    model: str = "gpt-4o-mini-tts"
+    response_format: Literal['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] = "mp3"
+    def __post_init__(self) -> None:
+        """Validate required attributes and set computed fields."""
+        computed_api_key = validate_env_var("OPENAI_API_KEY")
+        object.__setattr__(self, "api_key", computed_api_key)
+    @property
+    def client(self) -> AsyncOpenAI:
+        """
+        Lazy initialization of the asynchronous OpenAI client.
+        Returns:
+            AsyncOpenAI: Configured async client instance.
+        """
+        return AsyncOpenAI(api_key=self.api_key)
+    @staticmethod
+    def select_random_base_voice() -> str:
+        """
+        Randomly selects one of OpenAI's base voice options for TTS.
+        OpenAI's Python SDK doesn't export a type for their base voice names,
+        so we use a hardcoded list of the available voice options.
+        Returns:
+            str: A randomly selected OpenAI base voice name (e.g., 'alloy', 'nova', etc.)
+        """
+        openai_base_voices = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
+        return random.choice(openai_base_voices)
+class OpenAIError(Exception):
+    """Custom exception for errors related to the OpenAI TTS API."""
+    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
+        super().__init__(message)
+        self.original_exception = original_exception
+        self.message = message
+class UnretryableOpenAIError(OpenAIError):
+    """Custom exception for errors related to the OpenAI TTS API that should not be retried."""
+    def __init__(self, message: str, original_exception: Union[Exception, None] = None):
+        super().__init__(message, original_exception)
+        self.original_exception = original_exception
+        self.message = message
+@retry(
+    retry=retry_if_exception(lambda e: not isinstance(e, UnretryableOpenAIError)),
+    stop=stop_after_attempt(2),
+    wait=wait_fixed(2),
+    before=before_log(logger, logging.DEBUG),
+    after=after_log(logger, logging.DEBUG),
+    reraise=True,
+)
+async def text_to_speech_with_openai(
+    character_description: str,
+    text: str,
+    config: Config,
+) -> Tuple[None, str]:
+    """
+    Asynchronously synthesizes speech using the OpenAI TTS API, processes audio data, and writes audio to a file.
+    This function uses the OpenAI Python SDK to send a request to the OpenAI TTS API with a character description
+    and text to be converted to speech. It extracts the base64-encoded audio and generation ID from the response,
+    saves the audio as an MP3 file, and returns the relevant details.
+    Args:
+        character_description (str): Description used for voice synthesis.
+        text (str): Text to be converted to speech.
+        config (Config): Application configuration containing OpenAI API settings.
+    Returns:
+        Tuple[str, str]: A tuple containing:
+            - generation_id (str): Unique identifier for the generated audio.
+            - audio_file_path (str): Path to the saved audio file.
+    Raises:
+        OpenAIError: For errors communicating with the OpenAI API.
+        UnretryableOpenAIError: For client-side HTTP errors (status code 4xx).
+    """
+    logger.debug(f"Synthesizing speech with OpenAI. Text length: {len(text)} characters.")
+    openai_config = config.openai_config
+    client = openai_config.client
+    start_time = time.time()
+    try:
+        voice = openai_config.select_random_base_voice()
+        async with client.audio.speech.with_streaming_response.create(
+            model=openai_config.model,
+            input=text,
+            instructions=character_description,
+            response_format=openai_config.response_format,
+            voice=voice, # OpenAI requires a base voice to be specified
+        ) as response:
+            elapsed_time = time.time() - start_time
+            logger.info(f"OpenAI API request completed in {elapsed_time:.2f} seconds")
+            filename = f"openai_{voice}_{start_time}"
+            audio_file_path = Path(config.audio_dir) / filename
+            await response.stream_to_file(audio_file_path)
+            relative_audio_file_path = audio_file_path.relative_to(Path.cwd())
+            return None, str(relative_audio_file_path)
+    except APIError as e:
+        elapsed_time = time.time() - start_time
+        logger.error(f"OpenAI API request failed after {elapsed_time:.2f} seconds: {e!s}")
+        logger.error(f"Full OpenAI API error: {e!s}")
+        clean_message = _extract_openai_error_message(e)
+        if (
+            hasattr(e, 'status_code')
+            and e.status_code is not None
+            and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
+        ):
+            raise UnretryableOpenAIError(message=clean_message, original_exception=e) from e
+        raise OpenAIError(message=clean_message, original_exception=e) from e
+    except Exception as e:
+        error_type = type(e).__name__
+        error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
+        logger.error("Error during OpenAI API call: %s - %s", error_type, error_message)
+        clean_message = GENERIC_API_ERROR_MESSAGE
+        raise OpenAIError(message=clean_message, original_exception=e) from e
+def _extract_openai_error_message(e: APIError) -> str:
+    """
+    Extracts a clean, user-friendly error message from an OpenAI API error response.
+    Args:
+        e (APIError): The OpenAI API error exception containing response information.
+    Returns:
+        str: A clean, user-friendly error message suitable for display to end users.
+    """
+    clean_message = GENERIC_API_ERROR_MESSAGE
+    if hasattr(e, 'body') and isinstance(e.body, dict):
+        error_body = e.body
+        if (
+            'error' in error_body
+            and isinstance(error_body['error'], dict)
+            and 'message' in error_body['error']
+        ):
+            clean_message = error_body['error']['message']
+    return clean_message

src/utils.py CHANGED Viewed

@@ -204,22 +204,37 @@ def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config)
     return str(relative_path)
-def get_random_provider(text_modified: bool) -> TTSProviderName:
     """
-    Select a TTS provider based on whether the text has been modified.
     Args:
-        text_modified (bool): A flag indicating whether the text has been modified.
     Returns:
-        provider: A TTS provider selected based on the following criteria:
-            - If the text has been modified, it will be "Hume AI"
-            - Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
     """
     if text_modified:
-        return constants.HUME_AI
-    return constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
 def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
@@ -285,9 +300,6 @@ def _determine_comparison_type(provider_a: TTSProviderName, provider_b: TTSProvi
     """
     Determine the comparison type based on the given TTS provider names.
-    If both providers are HUME_AI, the comparison type is HUME_TO_HUME.
-    If either provider is ELEVENLABS, the comparison type is HUME_TO_ELEVENLABS.
     Args:
         provider_a (TTSProviderName): The first TTS provider.
         provider_b (TTSProviderName): The second TTS provider.
@@ -302,9 +314,17 @@ def _determine_comparison_type(provider_a: TTSProviderName, provider_b: TTSProvi
     if provider_a == constants.HUME_AI and provider_b == constants.HUME_AI:
         return constants.HUME_TO_HUME
-    if constants.ELEVENLABS in (provider_a, provider_b):
         return constants.HUME_TO_ELEVENLABS
     raise ValueError(f"Invalid provider combination: {provider_a}, {provider_b}")

     return str(relative_path)
+def get_random_providers(text_modified: bool) -> Tuple[TTSProviderName, TTSProviderName]:
     """
+    Select 2 TTS providers based on whether the text has been modified.
+    Probabilities:
+     - 50% HUME_AI, OPENAI
+     - 25% OPENAI, ELEVENLABS
+     - 20% HUME_AI, ELEVENLABS
+     - 5% HUME_AI, HUME_AI
+    If the `text_modified` argument is `True`, then 100% HUME_AI, HUME_AI
     Args:
+        text_modified (bool): A flag indicating whether the text has been modified, indicating a custom text input.
     Returns:
+        tuple: A tuple (TTSProviderName, TTSProviderName)
     """
     if text_modified:
+        return constants.HUME_AI, constants.HUME_AI
+    # When modifying the probability distribution, make sure the weights match the order of provider pairs
+    provider_pairs = [
+        (constants.HUME_AI, constants.OPENAI),
+        (constants.OPENAI, constants.ELEVENLABS),
+        (constants.HUME_AI, constants.ELEVENLABS),
+        (constants.HUME_AI, constants.HUME_AI)
+    ]
+    weights = [0.5, 0.25, 0.2, 0.05]
+    return random.choices(provider_pairs, weights=weights, k=1)[0]
 def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
     """
     Determine the comparison type based on the given TTS provider names.
     Args:
         provider_a (TTSProviderName): The first TTS provider.
         provider_b (TTSProviderName): The second TTS provider.
     if provider_a == constants.HUME_AI and provider_b == constants.HUME_AI:
         return constants.HUME_TO_HUME
+    providers = (provider_a, provider_b)
+    if constants.HUME_AI in providers and constants.ELEVENLABS in providers:
         return constants.HUME_TO_ELEVENLABS
+    if constants.HUME_AI in providers and constants.OPENAI in providers:
+        return constants.HUME_TO_OPENAI
+    if constants.ELEVENLABS in providers and constants.OPENAI in providers:
+        return constants.OPENAI_TO_ELEVENLABS
     raise ValueError(f"Invalid provider combination: {provider_a}, {provider_b}")

uv.lock CHANGED Viewed

@@ -7,7 +7,10 @@ resolution-markers = [
 ]
 [manifest]
-overrides = [{ name = "aiofiles", specifier = "==24.1.0" }]
 [[package]]
 name = "aiofiles"
@@ -165,6 +168,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 },
 ]
 [[package]]
 name = "cfgv"
 version = "3.4.0"
@@ -227,7 +239,7 @@ name = "click"
 version = "8.1.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
 wheels = [
@@ -299,6 +311,7 @@ dependencies = [
     { name = "gradio" },
     { name = "greenlet" },
     { name = "hume" },
     { name = "python-dotenv" },
     { name = "sqlalchemy" },
     { name = "tenacity" },
@@ -324,6 +337,7 @@ requires-dist = [
     { name = "gradio", specifier = ">=5.18.0" },
     { name = "greenlet", specifier = ">=2.0.0" },
     { name = "hume", specifier = ">=0.7.8" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "sqlalchemy", specifier = ">=2.0.0" },
     { name = "tenacity", specifier = ">=9.0.0" },
@@ -783,6 +797,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/80/94/cd9e9b04012c015cb6320ab3bf43bc615e248dddfeb163728e800a5d96f0/numpy-2.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:97b974d3ba0fb4612b77ed35d7627490e8e3dff56ab41454d9e8b23448940576", size = 12696208 },
 ]
 [[package]]
 name = "orjson"
 version = "3.10.15"
@@ -963,6 +998,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/43/b3/df14c580d82b9627d173ceea305ba898dca135feb360b6d84019d0803d3b/pre_commit-4.1.0-py2.py3-none-any.whl", hash = "sha256:d29e7cb346295bcc1cc75fc3e92e343495e3ea0196c9ec6ba53f49f10ab6ae7b", size = 220560 },
 ]
 [[package]]
 name = "pydantic"
 version = "2.10.6"
@@ -1251,6 +1295,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
 ]
 [[package]]
 name = "soupsieve"
 version = "2.6"
@@ -1332,7 +1388,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [

 ]
 [manifest]
+overrides = [
+    { name = "aiofiles", specifier = "==24.1.0" },
+    { name = "sounddevice", marker = "sys_platform == 'never'" },
+]
 [[package]]
 name = "aiofiles"
     { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 },
 ]
+[[package]]
+name = "cffi"
+version = "1.17.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 }
 [[package]]
 name = "cfgv"
 version = "3.4.0"
 version = "8.1.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
 wheels = [
     { name = "gradio" },
     { name = "greenlet" },
     { name = "hume" },
+    { name = "openai" },
     { name = "python-dotenv" },
     { name = "sqlalchemy" },
     { name = "tenacity" },
     { name = "gradio", specifier = ">=5.18.0" },
     { name = "greenlet", specifier = ">=2.0.0" },
     { name = "hume", specifier = ">=0.7.8" },
+    { name = "openai", specifier = ">=1.68.0" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "sqlalchemy", specifier = ">=2.0.0" },
     { name = "tenacity", specifier = ">=9.0.0" },
     { url = "https://files.pythonhosted.org/packages/80/94/cd9e9b04012c015cb6320ab3bf43bc615e248dddfeb163728e800a5d96f0/numpy-2.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:97b974d3ba0fb4612b77ed35d7627490e8e3dff56ab41454d9e8b23448940576", size = 12696208 },
 ]
+[[package]]
+name = "openai"
+version = "1.68.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "numpy" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "sounddevice", marker = "sys_platform == 'never'" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/58/ea/58102e9bfda09edc963e6e877e39cca12706b46ebf35d5fc9da7b8af10f2/openai-1.68.0.tar.gz", hash = "sha256:c570c06c9ba10f98b891ac30a3dd7b5c89ed48094c711c7a3f35fb5ade6c0757", size = 413039 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a5/b6/bd67b7031572cba7d8451d82ac4a990b3a96bbd3b037634726b48ac972c8/openai-1.68.0-py3-none-any.whl", hash = "sha256:20e279b0f3a78cb4a95f3eab2a180f3ee30c6a196aeebd6bf642a4f88ab85ee1", size = 605645 },
+]
 [[package]]
 name = "orjson"
 version = "3.10.15"
     { url = "https://files.pythonhosted.org/packages/43/b3/df14c580d82b9627d173ceea305ba898dca135feb360b6d84019d0803d3b/pre_commit-4.1.0-py2.py3-none-any.whl", hash = "sha256:d29e7cb346295bcc1cc75fc3e92e343495e3ea0196c9ec6ba53f49f10ab6ae7b", size = 220560 },
 ]
+[[package]]
+name = "pycparser"
+version = "2.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 },
+]
 [[package]]
 name = "pydantic"
 version = "2.10.6"
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
 ]
+[[package]]
+name = "sounddevice"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/80/2d/b04ae180312b81dbb694504bee170eada5372242e186f6298139fd3a0513/sounddevice-0.5.1.tar.gz", hash = "sha256:09ca991daeda8ce4be9ac91e15a9a81c8f81efa6b695a348c9171ea0c16cb041", size = 52896 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/d1/464b5fca3decdd0cfec8c47f7b4161a0b12972453201c1bf03811f367c5e/sounddevice-0.5.1-py3-none-any.whl", hash = "sha256:e2017f182888c3f3c280d9fbac92e5dbddac024a7e3442f6e6116bd79dab8a9c", size = 32276 },
+]
 [[package]]
 name = "soupsieve"
 version = "2.6"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [