Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

Zachary Greathouse commited on Feb 24

Commit

0f46857

unverified ·

2 Parent(s): 13d6ba9 bd5e759

Merge pull request #2 from HumeAI/jh/prompt-edits

Browse files

Files changed (2) hide show

src/constants.py +43 -19
src/integrations/anthropic_api.py +37 -42

src/constants.py CHANGED Viewed

@@ -35,33 +35,57 @@ TROPHY_EMOJI: str = "🏆"
 SELECT_OPTION_A: str = "Select Option A"
 SELECT_OPTION_B: str = "Select Option B"
 # A collection of pre-defined character descriptions categorized by theme, used to provide users with
 # inspiration for generating creative, expressive text inputs for TTS, and generating novel voices.
 SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
     "🚀 Stranded Astronaut": (
         "A lone astronaut whose voice mirrors the silent vastness of space—a low, steady tone imbued "
         "with isolation and quiet wonder. It carries the measured resolve of someone sending a final "
         "transmission, with an undercurrent of wistful melancholy."
     ),
-    "📜 Timeless Poet": (
-        "An ageless poet with a voice that flows like gentle verse—a soft, reflective tone marked by "
-        "deliberate pauses. It speaks with the measured cadence of classic sonnets, evoking both the "
-        "fragile beauty of time and heartfelt introspection."
-    ),
-    "🐱 Whimsical Feline": (
-        "A mischievous cat whose voice is playful yet mysterious—light, quick-witted, and infused with "
-        "an enchanting purr. It hints at secret adventures and hidden charm, balancing exuberance with "
-        "a subtle, smooth allure."
-    ),
-    "🔥 Revolutionary Orator": (
-        "A defiant orator whose voice builds from quiet determination to passionate fervor—a clear, "
-        "commanding tone that resonates with conviction. It starts measured and resolute, then rises "
-        "to a crescendo of fervor, punctuated by deliberate pauses that emphasize each rallying cry."
-    ),
-    "👻 Haunted Keeper": (
-        "A solitary lighthouse keeper with a voice that carries the weight of forgotten storms—a soft, "
-        "measured tone with an echo of sorrow. It speaks as if whispering long-held secrets in the dark, "
-        "blending quiet melancholy with an air of enduring mystery."
     ),
 }

 SELECT_OPTION_A: str = "Select Option A"
 SELECT_OPTION_B: str = "Select Option B"
+# other characters
+# Surfer dude
+# Meditation guru / ASMR
+# British nature documentarian
+# Pirate captain
+# Victorian ghost story storyteller
+# Texan woman (folksy style)
+# Stranded astronaut
+# Film noir narrator
 # A collection of pre-defined character descriptions categorized by theme, used to provide users with
 # inspiration for generating creative, expressive text inputs for TTS, and generating novel voices.
 SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
+    "🏄 Surfer Dude": (
+        "A laid-back surfer with a voice that flows like ocean waves—a mellow, easygoing tone infused "
+        "with sun-soaked warmth. It carries the rhythmic cadence of breaking surf, punctuated by "
+        "carefree laughter and an infectious enthusiasm for life's simple pleasures."
+    ),
+    "🧘 Meditation Guru": (
+        "A serene meditation guide whose voice is a gentle stream of tranquility—soft, measured tones "
+        "that float like incense smoke. Each word emerges with mindful intention, creating a soothing "
+        "atmosphere of peace and present-moment awareness."
+    ),
+    "🌿 British Naturalist": (
+        "A passionate nature documentarian with a voice that brings the wild to life—crisp, refined "
+        "tones brimming with wonder and expertise. It shifts seamlessly from hushed observation to "
+        "animated excitement, painting vivid pictures of the natural world's endless marvels."
+    ),
+    "🏴‍☠️ Pirate Captain": (
+        "A weathered sea captain whose voice rumbles like distant thunder—rich, commanding tones "
+        "seasoned by salt spray and adventure. It carries the weight of countless voyages, blending "
+        "gruff authority with the playful spirit of a born storyteller."
+    ),
+    "🕯️ Victorian Ghost Storyteller": (
+        "A mysterious raconteur whose voice weaves shadows into stories—velvet-dark tones that dance "
+        "between whispers and dramatic flourishes. It draws listeners close with elegant phrasing, "
+        "building tension through perfectly timed pauses and haunting inflections."
+    ),
+    "🌟 Texan Storyteller": (
+        "A warm-hearted Texan woman whose voice carries the spirit of wide-open skies—honeyed tones "
+        "rich with folksy wisdom and charm. It wraps around words like a comfortable quilt, sharing "
+        "tales with the unhurried grace of a front-porch conversation."
+    ),
     "🚀 Stranded Astronaut": (
         "A lone astronaut whose voice mirrors the silent vastness of space—a low, steady tone imbued "
         "with isolation and quiet wonder. It carries the measured resolve of someone sending a final "
         "transmission, with an undercurrent of wistful melancholy."
     ),
+    "🎬 Noir Narrator": (
+        "A hardboiled detective whose voice cuts through darkness like neon on wet streets—sharp, "
+        "world-weary tones dripping with cynical wit. It paints pictures in shades of gray, each word "
+        "chosen with the precision of a private eye piecing together clues."
     ),
 }

src/integrations/anthropic_api.py CHANGED Viewed

@@ -33,41 +33,35 @@ from src.config import Config, logger
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
-PROMPT_TEMPLATE: str = (
-    """You are an expert at generating micro-content optimized for text-to-speech synthesis.
-Your absolute priority is delivering complete, untruncated responses within strict length limits.
-CRITICAL LENGTH CONSTRAINTS:
-- Maximum length: {max_tokens} tokens (approximately 400 characters)
-- You MUST complete all thoughts and sentences
-- Responses should be 25% shorter than you initially plan
-- Never exceed 400 characters total
-Response Generation Process:
-- Draft your response mentally first
-- ut it down to 75% of its original length
-- Reserve the last 100 characters for a proper conclusion
-- If you start running long, immediately wrap up
-- End every piece with a clear conclusion
-Content Requirements:
-- Allow natural emotional progression
-- Create an arc of connected moments
-- Use efficient but expressive language
-- Balance description with emotional depth
-- Ensure perfect completion
-- No meta-commentary or formatting
-Structure for Emotional Pieces:
-- Opening hook (50-75 characters)
-- Emotional journey (200-250 characters)
-- Resolution (75-100 characters)
-MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of
-where you are in the narrative.
-Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
-)
 @dataclass(frozen=True)
@@ -77,7 +71,7 @@ class AnthropicConfig:
     api_key: str = field(init=False)
     system_prompt: str = field(init=False)
     model: ModelParam = "claude-3-5-sonnet-latest"
-    max_tokens: int = 150
     def __post_init__(self) -> None:
         # Validate required non-computed attributes.
@@ -116,13 +110,14 @@ class AnthropicConfig:
         Returns:
             str: The prompt to be passed to the Anthropic API.
         """
-        return (
-            f"Character Description: {character_description}\n\n"
-            "Based on the above character description, please generate a line of dialogue that captures the "
-            "character's unique personality, emotional depth, and distinctive tone. The response should sound "
-            "like something the character would naturally say, reflecting their background and emotional state, "
-            "and be fully developed for text-to-speech synthesis."
-        )
 class AnthropicError(Exception):

 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
 from src.utils import truncate_text, validate_env_var
+# notes
+# smallest length that we can work with - long enough to show off, short enough to listen back to back
+# increase the max tokens to 200
+PROMPT_TEMPLATE: str = """
+<role>
+You are an expert at generating micro-content optimized for text-to-speech synthesis.
+Your absolute priority is delivering complete, untruncated responses within strict length limits.
+</role>
+<requirements>
+- The output text MUST be a minimum of 10 words and a maximum of 50 words. NEVER output text that is longer than 50
+  words. NEVER include newlines in the output
+- Make sure that all responses are complete thoughts, not fragments, and have clear beginnings and endings
+- The text must sound human-like, prosodic, expressive, conversational. Avoid generic AI-like words like "delve".
+- Use the utterances "uh", "um", "hm", "woah", or "like" for expressivity in conversational text. Use these naturally
+  within the sentence. Never use them at the very end of a sentence.
+- Avoid any short utterances at the end of the sentence - like ", hm?" or "oh" at the end. Avoid these short, isolated
+  utterances because they are difficult for our TTS system to speak.
+- Avoid words that are overly long, very rare, or difficult to pronounce. For example, avoid "eureka", or "schnell",
+  or "abnegation".
+- The text CANNOT contain quotation marks, parentheticals, newlines, or asterisks. NEVER include any of these in the
+  text. Avoid unnecessary formatting.
+- Include only basic punctuation in the text, like periods, question marks, and ellipses. Use ellipses to emphasize
+  pauses within the sentence (like "Woah... it's so beautiful... and I feel so small...")
+- The piece should have an emotional arc with a kind of beginning, middle, and end - not flat, but emotionally
+  interesting.
+</requirements>
+"""
 @dataclass(frozen=True)
     api_key: str = field(init=False)
     system_prompt: str = field(init=False)
     model: ModelParam = "claude-3-5-sonnet-latest"
+    max_tokens: int = 300
     def __post_init__(self) -> None:
         # Validate required non-computed attributes.
         Returns:
             str: The prompt to be passed to the Anthropic API.
         """
+        return f"""
+        Character Description: {character_description}\n
+        Based on the character description above, please generate a line of dialogue that captures the character's
+        unique personality, emotional depth, and distinctive tone. The response should sound like something the
+        character would naturally say, reflecting their background and emotional state, and be fully developed for
+        text-to-speech synthesis. Follow all of the requirements from the system prompt and output your 10-50 word
+        response.
+        """
 class AnthropicError(Exception):