Spaces:
Running
Running
zach
commited on
Commit
·
4a475d2
1
Parent(s):
a35c804
Add randomize all button, improve UI interaction
Browse files- src/app.py +254 -64
src/app.py
CHANGED
@@ -50,7 +50,7 @@ class App:
|
|
50 |
async def _generate_text(
|
51 |
self,
|
52 |
character_description: str,
|
53 |
-
) -> Tuple[
|
54 |
"""
|
55 |
Validates the character_description and generates text using Anthropic API.
|
56 |
|
@@ -87,7 +87,7 @@ class App:
|
|
87 |
character_description: str,
|
88 |
text: str,
|
89 |
generated_text_state: str,
|
90 |
-
) -> Tuple[
|
91 |
"""
|
92 |
Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
|
93 |
|
@@ -209,7 +209,14 @@ class App:
|
|
209 |
text_modified: bool,
|
210 |
character_description: str,
|
211 |
text: str,
|
212 |
-
) -> Tuple[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
"""
|
214 |
Handles user voting and updates the UI to display vote results.
|
215 |
|
@@ -271,20 +278,88 @@ class App:
|
|
271 |
gr.update(interactive=True),
|
272 |
)
|
273 |
|
274 |
-
def
|
275 |
"""
|
276 |
-
|
277 |
|
278 |
Returns:
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
"""
|
289 |
default_option_map: OptionMap = {
|
290 |
"option_a": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
|
@@ -295,12 +370,50 @@ class App:
|
|
295 |
gr.update(value=None, autoplay=False), # clear audio player B
|
296 |
gr.update(visible=True, interactive=False), # show vote button A
|
297 |
gr.update(visible=True, interactive=False), # show vote button B
|
298 |
-
gr.update(visible=False
|
299 |
-
gr.update(visible=False
|
300 |
default_option_map, # Reset option_map_state as a default OptionMap
|
301 |
False, # Reset vote_submitted_state
|
302 |
)
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
def _build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
|
305 |
"""
|
306 |
Builds the input section including the sample character description dropdown, character
|
@@ -418,35 +531,11 @@ class App:
|
|
418 |
css_paths="src/assets/styles.css",
|
419 |
) as demo:
|
420 |
# --- UI components ---
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
</p>
|
427 |
-
<ol style="margin-left: 8px;">
|
428 |
-
<li>
|
429 |
-
Choose or enter a character description by selecting a sample or typing your own to guide
|
430 |
-
text generation and voice synthesis.
|
431 |
-
</li>
|
432 |
-
<li>
|
433 |
-
Click the <strong>"Generate Text"</strong> button to create dialogue for the character;
|
434 |
-
the text automatically populates the input field for further editing.
|
435 |
-
</li>
|
436 |
-
<li>
|
437 |
-
Click the <strong>"Synthesize Speech"</strong> button to convert your text and character
|
438 |
-
description into two synthesized speech options for direct comparison.
|
439 |
-
</li>
|
440 |
-
<li>
|
441 |
-
Listen to both audio outputs to assess their expressiveness.
|
442 |
-
</li>
|
443 |
-
<li>
|
444 |
-
Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
|
445 |
-
the most expressive result.
|
446 |
-
</li>
|
447 |
-
</ol>
|
448 |
-
"""
|
449 |
-
)
|
450 |
(
|
451 |
sample_character_description_dropdown,
|
452 |
character_description_input,
|
@@ -478,6 +567,79 @@ class App:
|
|
478 |
vote_submitted_state = gr.State(False)
|
479 |
|
480 |
# --- Register event handlers ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
# When a sample character description is chosen, update the character description textbox
|
482 |
sample_character_description_dropdown.change(
|
483 |
fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
|
@@ -490,34 +652,57 @@ class App:
|
|
490 |
# 2. Generate text
|
491 |
# 3. Enable the "Generate text" button
|
492 |
generate_text_button.click(
|
493 |
-
fn=
|
494 |
inputs=[],
|
495 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
496 |
).then(
|
497 |
fn=self._generate_text,
|
498 |
inputs=[character_description_input],
|
499 |
outputs=[text_input, generated_text_state],
|
500 |
).then(
|
501 |
-
fn=
|
502 |
inputs=[],
|
503 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
)
|
505 |
|
506 |
# Synthesize speech button click event handler chain:
|
507 |
-
# 1. Disable
|
508 |
-
# 2. Reset UI state
|
509 |
# 3. Synthesize speech, load audio players, and display vote button
|
510 |
# 4. Enable the "Synthesize speech" button and display vote buttons
|
511 |
synthesize_speech_button.click(
|
512 |
-
fn=
|
513 |
-
gr.update(interactive=False),
|
514 |
-
gr.update(interactive=False),
|
515 |
-
gr.update(interactive=False),
|
516 |
-
),
|
517 |
inputs=[],
|
518 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
).then(
|
520 |
-
fn=self.
|
521 |
inputs=[],
|
522 |
outputs=[
|
523 |
option_a_audio_player,
|
@@ -541,13 +726,18 @@ class App:
|
|
541 |
character_description_state,
|
542 |
],
|
543 |
).then(
|
544 |
-
fn=
|
545 |
-
gr.update(interactive=True),
|
546 |
-
gr.update(interactive=True),
|
547 |
-
gr.update(interactive=True),
|
548 |
-
),
|
549 |
inputs=[],
|
550 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
)
|
552 |
|
553 |
# Vote button click event handlers
|
|
|
50 |
async def _generate_text(
|
51 |
self,
|
52 |
character_description: str,
|
53 |
+
) -> Tuple[gr.Textbox, str]:
|
54 |
"""
|
55 |
Validates the character_description and generates text using Anthropic API.
|
56 |
|
|
|
87 |
character_description: str,
|
88 |
text: str,
|
89 |
generated_text_state: str,
|
90 |
+
) -> Tuple[gr.Audio, gr.Audio, OptionMap, bool, str, str]:
|
91 |
"""
|
92 |
Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
|
93 |
|
|
|
209 |
text_modified: bool,
|
210 |
character_description: str,
|
211 |
text: str,
|
212 |
+
) -> Tuple[
|
213 |
+
bool,
|
214 |
+
gr.Button,
|
215 |
+
gr.Button,
|
216 |
+
gr.Textbox,
|
217 |
+
gr.Textbox,
|
218 |
+
gr.Button
|
219 |
+
]:
|
220 |
"""
|
221 |
Handles user voting and updates the UI to display vote results.
|
222 |
|
|
|
278 |
gr.update(interactive=True),
|
279 |
)
|
280 |
|
281 |
+
async def _randomize_character_description(self) -> Tuple[gr.Dropdown, gr.Textbox]:
|
282 |
"""
|
283 |
+
Randomly selects a character description, generates text, and synthesizes speech.
|
284 |
|
285 |
Returns:
|
286 |
+
Tuple containing updates for:
|
287 |
+
- sample_character_description_dropdown (select random)
|
288 |
+
- character_description_input (update value)
|
289 |
+
"""
|
290 |
+
import random
|
291 |
+
|
292 |
+
sample_keys = list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys())
|
293 |
+
random_sample = random.choice(sample_keys)
|
294 |
+
character_description = constants.SAMPLE_CHARACTER_DESCRIPTIONS[random_sample]
|
295 |
+
|
296 |
+
logger.info(f"Randomize All: Selected '{random_sample}'")
|
297 |
+
|
298 |
+
return (
|
299 |
+
gr.update(value=random_sample), # Update dropdown
|
300 |
+
gr.update(value=character_description), # Update character description
|
301 |
+
)
|
302 |
+
|
303 |
+
def _disable_ui(self) -> Tuple[
|
304 |
+
gr.Button,
|
305 |
+
gr.Dropdown,
|
306 |
+
gr.Textbox,
|
307 |
+
gr.Button,
|
308 |
+
gr.Textbox,
|
309 |
+
gr.Button,
|
310 |
+
gr.Button,
|
311 |
+
gr.Button
|
312 |
+
]:
|
313 |
+
"""
|
314 |
+
Disables all interactive components in the UI (except audio players)
|
315 |
+
"""
|
316 |
+
return(
|
317 |
+
gr.update(interactive=False), # disable Randomize All button
|
318 |
+
gr.update(interactive=False), # disable Character Description dropdown
|
319 |
+
gr.update(interactive=False), # disable Character Description input
|
320 |
+
gr.update(interactive=False), # disable Generate Text button
|
321 |
+
gr.update(interactive=False), # disable Input Text input
|
322 |
+
gr.update(interactive=False), # disable Synthesize Speech Button
|
323 |
+
gr.update(interactive=False), # disable Choose Select A Button
|
324 |
+
gr.update(interactive=False), # disable Choose Select B Button
|
325 |
+
)
|
326 |
+
|
327 |
+
def _enable_ui(self) -> Tuple[
|
328 |
+
gr.Button,
|
329 |
+
gr.Dropdown,
|
330 |
+
gr.Textbox,
|
331 |
+
gr.Button,
|
332 |
+
gr.Textbox,
|
333 |
+
gr.Button,
|
334 |
+
gr.Button,
|
335 |
+
gr.Button
|
336 |
+
]:
|
337 |
+
"""
|
338 |
+
Enables all interactive components in the UI (except audio players)
|
339 |
+
"""
|
340 |
+
return(
|
341 |
+
gr.update(interactive=True), # enable Randomize All button
|
342 |
+
gr.update(interactive=True), # enable Character Description dropdown
|
343 |
+
gr.update(interactive=True), # enable Character Description input
|
344 |
+
gr.update(interactive=True), # enable Generate Text button
|
345 |
+
gr.update(interactive=True), # enable Input Text input
|
346 |
+
gr.update(interactive=True), # enable Synthesize Speech Button
|
347 |
+
gr.update(interactive=True), # enable Choose Select A Button
|
348 |
+
gr.update(interactive=True), # enable Choose Select B Button
|
349 |
+
)
|
350 |
+
|
351 |
+
def _reset_voting_ui(self) -> Tuple[
|
352 |
+
gr.Audio,
|
353 |
+
gr.Audio,
|
354 |
+
gr.Button,
|
355 |
+
gr.Button,
|
356 |
+
gr.Textbox,
|
357 |
+
gr.Textbox,
|
358 |
+
OptionMap,
|
359 |
+
bool
|
360 |
+
]:
|
361 |
+
"""
|
362 |
+
Resets voting UI state and clear audio players
|
363 |
"""
|
364 |
default_option_map: OptionMap = {
|
365 |
"option_a": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
|
|
|
370 |
gr.update(value=None, autoplay=False), # clear audio player B
|
371 |
gr.update(visible=True, interactive=False), # show vote button A
|
372 |
gr.update(visible=True, interactive=False), # show vote button B
|
373 |
+
gr.update(visible=False), # hide vote result A
|
374 |
+
gr.update(visible=False), # hide vote result B
|
375 |
default_option_map, # Reset option_map_state as a default OptionMap
|
376 |
False, # Reset vote_submitted_state
|
377 |
)
|
378 |
|
379 |
+
def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
|
380 |
+
"""
|
381 |
+
Builds heading section including title, randomize all button, and instructions
|
382 |
+
"""
|
383 |
+
with gr.Row():
|
384 |
+
with gr.Column(scale=5):
|
385 |
+
title = gr.HTML("<h1>Expressive TTS Arena</h1>")
|
386 |
+
randomize_all_button = gr.Button("🎲 Randomize All", variant="primary", scale=1)
|
387 |
+
instructions = gr.HTML(
|
388 |
+
"""
|
389 |
+
<p style="font-size: 16px; font-weight: bold;">
|
390 |
+
<strong>Instructions</strong>
|
391 |
+
</p>
|
392 |
+
<ol style="margin-left: 8px;">
|
393 |
+
<li>
|
394 |
+
Choose or enter a character description by selecting a sample or typing your own to guide
|
395 |
+
text generation and voice synthesis.
|
396 |
+
</li>
|
397 |
+
<li>
|
398 |
+
Click the <strong>"Generate Text"</strong> button to create dialogue for the character;
|
399 |
+
the text automatically populates the input field for further editing.
|
400 |
+
</li>
|
401 |
+
<li>
|
402 |
+
Click the <strong>"Synthesize Speech"</strong> button to convert your text and character
|
403 |
+
description into two synthesized speech options for direct comparison.
|
404 |
+
</li>
|
405 |
+
<li>
|
406 |
+
Listen to both audio outputs to assess their expressiveness.
|
407 |
+
</li>
|
408 |
+
<li>
|
409 |
+
Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
|
410 |
+
the most expressive result.
|
411 |
+
</li>
|
412 |
+
</ol>
|
413 |
+
"""
|
414 |
+
)
|
415 |
+
return (title, randomize_all_button, instructions)
|
416 |
+
|
417 |
def _build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
|
418 |
"""
|
419 |
Builds the input section including the sample character description dropdown, character
|
|
|
531 |
css_paths="src/assets/styles.css",
|
532 |
) as demo:
|
533 |
# --- UI components ---
|
534 |
+
(
|
535 |
+
title,
|
536 |
+
randomize_all_button,
|
537 |
+
instructions,
|
538 |
+
) = self._build_heading_section()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
(
|
540 |
sample_character_description_dropdown,
|
541 |
character_description_input,
|
|
|
567 |
vote_submitted_state = gr.State(False)
|
568 |
|
569 |
# --- Register event handlers ---
|
570 |
+
# Select a character description, generate text, and synthesize speech with a single button click.
|
571 |
+
# 1. Select random character descriptions and disable buttons
|
572 |
+
# 2. Generate text
|
573 |
+
# 3. Reset UI state for audio players and voting results
|
574 |
+
# 4. Synthesize speech
|
575 |
+
# 5. Reenable buttons
|
576 |
+
randomize_all_button.click(
|
577 |
+
fn=self._randomize_character_description,
|
578 |
+
inputs=[],
|
579 |
+
outputs=[
|
580 |
+
sample_character_description_dropdown,
|
581 |
+
character_description_input,
|
582 |
+
],
|
583 |
+
).then(
|
584 |
+
fn=self._disable_ui,
|
585 |
+
inputs=[],
|
586 |
+
outputs=[
|
587 |
+
randomize_all_button,
|
588 |
+
sample_character_description_dropdown,
|
589 |
+
character_description_input,
|
590 |
+
generate_text_button,
|
591 |
+
text_input,
|
592 |
+
synthesize_speech_button,
|
593 |
+
vote_button_a,
|
594 |
+
vote_button_b,
|
595 |
+
],
|
596 |
+
).then(
|
597 |
+
fn=self._generate_text,
|
598 |
+
inputs=[character_description_input],
|
599 |
+
outputs=[text_input, generated_text_state],
|
600 |
+
).then(
|
601 |
+
fn=self._reset_voting_ui,
|
602 |
+
inputs=[],
|
603 |
+
outputs=[
|
604 |
+
option_a_audio_player,
|
605 |
+
option_b_audio_player,
|
606 |
+
vote_button_a,
|
607 |
+
vote_button_b,
|
608 |
+
vote_result_a,
|
609 |
+
vote_result_b,
|
610 |
+
option_map_state,
|
611 |
+
vote_submitted_state,
|
612 |
+
],
|
613 |
+
).then(
|
614 |
+
fn=lambda _=None: gr.update(interactive=False),
|
615 |
+
inputs=[],
|
616 |
+
outputs=[text_input],
|
617 |
+
).then(
|
618 |
+
fn=self._synthesize_speech,
|
619 |
+
inputs=[character_description_input, text_input, generated_text_state],
|
620 |
+
outputs=[
|
621 |
+
option_a_audio_player,
|
622 |
+
option_b_audio_player,
|
623 |
+
option_map_state,
|
624 |
+
text_modified_state,
|
625 |
+
text_state,
|
626 |
+
character_description_state,
|
627 |
+
],
|
628 |
+
).then(
|
629 |
+
fn=self._enable_ui,
|
630 |
+
inputs=[],
|
631 |
+
outputs=[
|
632 |
+
randomize_all_button,
|
633 |
+
sample_character_description_dropdown,
|
634 |
+
character_description_input,
|
635 |
+
generate_text_button,
|
636 |
+
text_input,
|
637 |
+
synthesize_speech_button,
|
638 |
+
vote_button_a,
|
639 |
+
vote_button_b,
|
640 |
+
],
|
641 |
+
)
|
642 |
+
|
643 |
# When a sample character description is chosen, update the character description textbox
|
644 |
sample_character_description_dropdown.change(
|
645 |
fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
|
|
|
652 |
# 2. Generate text
|
653 |
# 3. Enable the "Generate text" button
|
654 |
generate_text_button.click(
|
655 |
+
fn=self._disable_ui,
|
656 |
inputs=[],
|
657 |
+
outputs=[
|
658 |
+
randomize_all_button,
|
659 |
+
sample_character_description_dropdown,
|
660 |
+
character_description_input,
|
661 |
+
generate_text_button,
|
662 |
+
text_input,
|
663 |
+
synthesize_speech_button,
|
664 |
+
vote_button_a,
|
665 |
+
vote_button_b,
|
666 |
+
],
|
667 |
).then(
|
668 |
fn=self._generate_text,
|
669 |
inputs=[character_description_input],
|
670 |
outputs=[text_input, generated_text_state],
|
671 |
).then(
|
672 |
+
fn=self._enable_ui,
|
673 |
inputs=[],
|
674 |
+
outputs=[
|
675 |
+
randomize_all_button,
|
676 |
+
sample_character_description_dropdown,
|
677 |
+
character_description_input,
|
678 |
+
generate_text_button,
|
679 |
+
text_input,
|
680 |
+
synthesize_speech_button,
|
681 |
+
vote_button_a,
|
682 |
+
vote_button_b,
|
683 |
+
],
|
684 |
)
|
685 |
|
686 |
# Synthesize speech button click event handler chain:
|
687 |
+
# 1. Disable UI
|
688 |
+
# 2. Reset UI state for audio players and voting results
|
689 |
# 3. Synthesize speech, load audio players, and display vote button
|
690 |
# 4. Enable the "Synthesize speech" button and display vote buttons
|
691 |
synthesize_speech_button.click(
|
692 |
+
fn=self._disable_ui,
|
|
|
|
|
|
|
|
|
693 |
inputs=[],
|
694 |
+
outputs=[
|
695 |
+
randomize_all_button,
|
696 |
+
sample_character_description_dropdown,
|
697 |
+
character_description_input,
|
698 |
+
generate_text_button,
|
699 |
+
text_input,
|
700 |
+
synthesize_speech_button,
|
701 |
+
vote_button_a,
|
702 |
+
vote_button_b,
|
703 |
+
],
|
704 |
).then(
|
705 |
+
fn=self._reset_voting_ui,
|
706 |
inputs=[],
|
707 |
outputs=[
|
708 |
option_a_audio_player,
|
|
|
726 |
character_description_state,
|
727 |
],
|
728 |
).then(
|
729 |
+
fn=self._enable_ui,
|
|
|
|
|
|
|
|
|
730 |
inputs=[],
|
731 |
+
outputs=[
|
732 |
+
randomize_all_button,
|
733 |
+
sample_character_description_dropdown,
|
734 |
+
character_description_input,
|
735 |
+
generate_text_button,
|
736 |
+
text_input,
|
737 |
+
synthesize_speech_button,
|
738 |
+
vote_button_a,
|
739 |
+
vote_button_b,
|
740 |
+
],
|
741 |
)
|
742 |
|
743 |
# Vote button click event handlers
|