zach commited on
Commit
4a475d2
·
1 Parent(s): a35c804

Add randomize all button, improve UI interaction

Browse files
Files changed (1) hide show
  1. src/app.py +254 -64
src/app.py CHANGED
@@ -50,7 +50,7 @@ class App:
50
  async def _generate_text(
51
  self,
52
  character_description: str,
53
- ) -> Tuple[dict, str]:
54
  """
55
  Validates the character_description and generates text using Anthropic API.
56
 
@@ -87,7 +87,7 @@ class App:
87
  character_description: str,
88
  text: str,
89
  generated_text_state: str,
90
- ) -> Tuple[dict, dict, OptionMap, bool, str, str]:
91
  """
92
  Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
93
 
@@ -209,7 +209,14 @@ class App:
209
  text_modified: bool,
210
  character_description: str,
211
  text: str,
212
- ) -> Tuple[bool, dict, dict, dict, dict, dict]:
 
 
 
 
 
 
 
213
  """
214
  Handles user voting and updates the UI to display vote results.
215
 
@@ -271,20 +278,88 @@ class App:
271
  gr.update(interactive=True),
272
  )
273
 
274
- def _reset_ui(self) -> Tuple[dict, dict, dict, dict, dict, dict, OptionMap, bool]:
275
  """
276
- Resets UI state before generating new text.
277
 
278
  Returns:
279
- A tuple of updates for:
280
- - option_a_audio_player (clear audio)
281
- - option_b_audio_player (clear audio)
282
- - vote_button_a (show)
283
- - vote_button_b (show)
284
- - vote_result_a (hide)
285
- - vote_result_b (hide)
286
- - option_map_state (reset option map state)
287
- - vote_submitted_state (reset submitted vote state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  """
289
  default_option_map: OptionMap = {
290
  "option_a": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
@@ -295,12 +370,50 @@ class App:
295
  gr.update(value=None, autoplay=False), # clear audio player B
296
  gr.update(visible=True, interactive=False), # show vote button A
297
  gr.update(visible=True, interactive=False), # show vote button B
298
- gr.update(visible=False, elem_classes=[]), # hide vote result A
299
- gr.update(visible=False, elem_classes=[]), # hide vote result B
300
  default_option_map, # Reset option_map_state as a default OptionMap
301
  False, # Reset vote_submitted_state
302
  )
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  def _build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
305
  """
306
  Builds the input section including the sample character description dropdown, character
@@ -418,35 +531,11 @@ class App:
418
  css_paths="src/assets/styles.css",
419
  ) as demo:
420
  # --- UI components ---
421
- gr.HTML("<h1>Expressive TTS Arena</h1>")
422
- gr.HTML(
423
- """
424
- <p style="font-size: 16px; font-weight: bold;">
425
- <strong>Instructions</strong>
426
- </p>
427
- <ol style="margin-left: 8px;">
428
- <li>
429
- Choose or enter a character description by selecting a sample or typing your own to guide
430
- text generation and voice synthesis.
431
- </li>
432
- <li>
433
- Click the <strong>"Generate Text"</strong> button to create dialogue for the character;
434
- the text automatically populates the input field for further editing.
435
- </li>
436
- <li>
437
- Click the <strong>"Synthesize Speech"</strong> button to convert your text and character
438
- description into two synthesized speech options for direct comparison.
439
- </li>
440
- <li>
441
- Listen to both audio outputs to assess their expressiveness.
442
- </li>
443
- <li>
444
- Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
445
- the most expressive result.
446
- </li>
447
- </ol>
448
- """
449
- )
450
  (
451
  sample_character_description_dropdown,
452
  character_description_input,
@@ -478,6 +567,79 @@ class App:
478
  vote_submitted_state = gr.State(False)
479
 
480
  # --- Register event handlers ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  # When a sample character description is chosen, update the character description textbox
482
  sample_character_description_dropdown.change(
483
  fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
@@ -490,34 +652,57 @@ class App:
490
  # 2. Generate text
491
  # 3. Enable the "Generate text" button
492
  generate_text_button.click(
493
- fn=lambda _=None: gr.update(interactive=False),
494
  inputs=[],
495
- outputs=[generate_text_button],
 
 
 
 
 
 
 
 
 
496
  ).then(
497
  fn=self._generate_text,
498
  inputs=[character_description_input],
499
  outputs=[text_input, generated_text_state],
500
  ).then(
501
- fn=lambda _=None: gr.update(interactive=True),
502
  inputs=[],
503
- outputs=[generate_text_button],
 
 
 
 
 
 
 
 
 
504
  )
505
 
506
  # Synthesize speech button click event handler chain:
507
- # 1. Disable the "Synthesize speech" button
508
- # 2. Reset UI state
509
  # 3. Synthesize speech, load audio players, and display vote button
510
  # 4. Enable the "Synthesize speech" button and display vote buttons
511
  synthesize_speech_button.click(
512
- fn=lambda _=None: (
513
- gr.update(interactive=False),
514
- gr.update(interactive=False),
515
- gr.update(interactive=False),
516
- ),
517
  inputs=[],
518
- outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
 
 
 
 
 
 
 
 
 
519
  ).then(
520
- fn=self._reset_ui,
521
  inputs=[],
522
  outputs=[
523
  option_a_audio_player,
@@ -541,13 +726,18 @@ class App:
541
  character_description_state,
542
  ],
543
  ).then(
544
- fn=lambda _=None: (
545
- gr.update(interactive=True),
546
- gr.update(interactive=True),
547
- gr.update(interactive=True),
548
- ),
549
  inputs=[],
550
- outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
 
 
 
 
 
 
 
 
 
551
  )
552
 
553
  # Vote button click event handlers
 
50
  async def _generate_text(
51
  self,
52
  character_description: str,
53
+ ) -> Tuple[gr.Textbox, str]:
54
  """
55
  Validates the character_description and generates text using Anthropic API.
56
 
 
87
  character_description: str,
88
  text: str,
89
  generated_text_state: str,
90
+ ) -> Tuple[gr.Audio, gr.Audio, OptionMap, bool, str, str]:
91
  """
92
  Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
93
 
 
209
  text_modified: bool,
210
  character_description: str,
211
  text: str,
212
+ ) -> Tuple[
213
+ bool,
214
+ gr.Button,
215
+ gr.Button,
216
+ gr.Textbox,
217
+ gr.Textbox,
218
+ gr.Button
219
+ ]:
220
  """
221
  Handles user voting and updates the UI to display vote results.
222
 
 
278
  gr.update(interactive=True),
279
  )
280
 
281
+ async def _randomize_character_description(self) -> Tuple[gr.Dropdown, gr.Textbox]:
282
  """
283
+ Randomly selects a character description, generates text, and synthesizes speech.
284
 
285
  Returns:
286
+ Tuple containing updates for:
287
+ - sample_character_description_dropdown (select random)
288
+ - character_description_input (update value)
289
+ """
290
+ import random
291
+
292
+ sample_keys = list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys())
293
+ random_sample = random.choice(sample_keys)
294
+ character_description = constants.SAMPLE_CHARACTER_DESCRIPTIONS[random_sample]
295
+
296
+ logger.info(f"Randomize All: Selected '{random_sample}'")
297
+
298
+ return (
299
+ gr.update(value=random_sample), # Update dropdown
300
+ gr.update(value=character_description), # Update character description
301
+ )
302
+
303
+ def _disable_ui(self) -> Tuple[
304
+ gr.Button,
305
+ gr.Dropdown,
306
+ gr.Textbox,
307
+ gr.Button,
308
+ gr.Textbox,
309
+ gr.Button,
310
+ gr.Button,
311
+ gr.Button
312
+ ]:
313
+ """
314
+ Disables all interactive components in the UI (except audio players)
315
+ """
316
+ return(
317
+ gr.update(interactive=False), # disable Randomize All button
318
+ gr.update(interactive=False), # disable Character Description dropdown
319
+ gr.update(interactive=False), # disable Character Description input
320
+ gr.update(interactive=False), # disable Generate Text button
321
+ gr.update(interactive=False), # disable Input Text input
322
+ gr.update(interactive=False), # disable Synthesize Speech Button
323
+ gr.update(interactive=False), # disable Choose Select A Button
324
+ gr.update(interactive=False), # disable Choose Select B Button
325
+ )
326
+
327
+ def _enable_ui(self) -> Tuple[
328
+ gr.Button,
329
+ gr.Dropdown,
330
+ gr.Textbox,
331
+ gr.Button,
332
+ gr.Textbox,
333
+ gr.Button,
334
+ gr.Button,
335
+ gr.Button
336
+ ]:
337
+ """
338
+ Enables all interactive components in the UI (except audio players)
339
+ """
340
+ return(
341
+ gr.update(interactive=True), # enable Randomize All button
342
+ gr.update(interactive=True), # enable Character Description dropdown
343
+ gr.update(interactive=True), # enable Character Description input
344
+ gr.update(interactive=True), # enable Generate Text button
345
+ gr.update(interactive=True), # enable Input Text input
346
+ gr.update(interactive=True), # enable Synthesize Speech Button
347
+ gr.update(interactive=True), # enable Choose Select A Button
348
+ gr.update(interactive=True), # enable Choose Select B Button
349
+ )
350
+
351
+ def _reset_voting_ui(self) -> Tuple[
352
+ gr.Audio,
353
+ gr.Audio,
354
+ gr.Button,
355
+ gr.Button,
356
+ gr.Textbox,
357
+ gr.Textbox,
358
+ OptionMap,
359
+ bool
360
+ ]:
361
+ """
362
+ Resets voting UI state and clear audio players
363
  """
364
  default_option_map: OptionMap = {
365
  "option_a": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
 
370
  gr.update(value=None, autoplay=False), # clear audio player B
371
  gr.update(visible=True, interactive=False), # show vote button A
372
  gr.update(visible=True, interactive=False), # show vote button B
373
+ gr.update(visible=False), # hide vote result A
374
+ gr.update(visible=False), # hide vote result B
375
  default_option_map, # Reset option_map_state as a default OptionMap
376
  False, # Reset vote_submitted_state
377
  )
378
 
379
+ def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
380
+ """
381
+ Builds heading section including title, randomize all button, and instructions
382
+ """
383
+ with gr.Row():
384
+ with gr.Column(scale=5):
385
+ title = gr.HTML("<h1>Expressive TTS Arena</h1>")
386
+ randomize_all_button = gr.Button("🎲 Randomize All", variant="primary", scale=1)
387
+ instructions = gr.HTML(
388
+ """
389
+ <p style="font-size: 16px; font-weight: bold;">
390
+ <strong>Instructions</strong>
391
+ </p>
392
+ <ol style="margin-left: 8px;">
393
+ <li>
394
+ Choose or enter a character description by selecting a sample or typing your own to guide
395
+ text generation and voice synthesis.
396
+ </li>
397
+ <li>
398
+ Click the <strong>"Generate Text"</strong> button to create dialogue for the character;
399
+ the text automatically populates the input field for further editing.
400
+ </li>
401
+ <li>
402
+ Click the <strong>"Synthesize Speech"</strong> button to convert your text and character
403
+ description into two synthesized speech options for direct comparison.
404
+ </li>
405
+ <li>
406
+ Listen to both audio outputs to assess their expressiveness.
407
+ </li>
408
+ <li>
409
+ Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
410
+ the most expressive result.
411
+ </li>
412
+ </ol>
413
+ """
414
+ )
415
+ return (title, randomize_all_button, instructions)
416
+
417
  def _build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
418
  """
419
  Builds the input section including the sample character description dropdown, character
 
531
  css_paths="src/assets/styles.css",
532
  ) as demo:
533
  # --- UI components ---
534
+ (
535
+ title,
536
+ randomize_all_button,
537
+ instructions,
538
+ ) = self._build_heading_section()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  (
540
  sample_character_description_dropdown,
541
  character_description_input,
 
567
  vote_submitted_state = gr.State(False)
568
 
569
  # --- Register event handlers ---
570
+ # Select a character description, generate text, and synthesize speech with a single button click.
571
+ # 1. Select random character descriptions and disable buttons
572
+ # 2. Generate text
573
+ # 3. Reset UI state for audio players and voting results
574
+ # 4. Synthesize speech
575
+ # 5. Reenable buttons
576
+ randomize_all_button.click(
577
+ fn=self._randomize_character_description,
578
+ inputs=[],
579
+ outputs=[
580
+ sample_character_description_dropdown,
581
+ character_description_input,
582
+ ],
583
+ ).then(
584
+ fn=self._disable_ui,
585
+ inputs=[],
586
+ outputs=[
587
+ randomize_all_button,
588
+ sample_character_description_dropdown,
589
+ character_description_input,
590
+ generate_text_button,
591
+ text_input,
592
+ synthesize_speech_button,
593
+ vote_button_a,
594
+ vote_button_b,
595
+ ],
596
+ ).then(
597
+ fn=self._generate_text,
598
+ inputs=[character_description_input],
599
+ outputs=[text_input, generated_text_state],
600
+ ).then(
601
+ fn=self._reset_voting_ui,
602
+ inputs=[],
603
+ outputs=[
604
+ option_a_audio_player,
605
+ option_b_audio_player,
606
+ vote_button_a,
607
+ vote_button_b,
608
+ vote_result_a,
609
+ vote_result_b,
610
+ option_map_state,
611
+ vote_submitted_state,
612
+ ],
613
+ ).then(
614
+ fn=lambda _=None: gr.update(interactive=False),
615
+ inputs=[],
616
+ outputs=[text_input],
617
+ ).then(
618
+ fn=self._synthesize_speech,
619
+ inputs=[character_description_input, text_input, generated_text_state],
620
+ outputs=[
621
+ option_a_audio_player,
622
+ option_b_audio_player,
623
+ option_map_state,
624
+ text_modified_state,
625
+ text_state,
626
+ character_description_state,
627
+ ],
628
+ ).then(
629
+ fn=self._enable_ui,
630
+ inputs=[],
631
+ outputs=[
632
+ randomize_all_button,
633
+ sample_character_description_dropdown,
634
+ character_description_input,
635
+ generate_text_button,
636
+ text_input,
637
+ synthesize_speech_button,
638
+ vote_button_a,
639
+ vote_button_b,
640
+ ],
641
+ )
642
+
643
  # When a sample character description is chosen, update the character description textbox
644
  sample_character_description_dropdown.change(
645
  fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
 
652
  # 2. Generate text
653
  # 3. Enable the "Generate text" button
654
  generate_text_button.click(
655
+ fn=self._disable_ui,
656
  inputs=[],
657
+ outputs=[
658
+ randomize_all_button,
659
+ sample_character_description_dropdown,
660
+ character_description_input,
661
+ generate_text_button,
662
+ text_input,
663
+ synthesize_speech_button,
664
+ vote_button_a,
665
+ vote_button_b,
666
+ ],
667
  ).then(
668
  fn=self._generate_text,
669
  inputs=[character_description_input],
670
  outputs=[text_input, generated_text_state],
671
  ).then(
672
+ fn=self._enable_ui,
673
  inputs=[],
674
+ outputs=[
675
+ randomize_all_button,
676
+ sample_character_description_dropdown,
677
+ character_description_input,
678
+ generate_text_button,
679
+ text_input,
680
+ synthesize_speech_button,
681
+ vote_button_a,
682
+ vote_button_b,
683
+ ],
684
  )
685
 
686
  # Synthesize speech button click event handler chain:
687
+ # 1. Disable UI
688
+ # 2. Reset UI state for audio players and voting results
689
  # 3. Synthesize speech, load audio players, and display vote button
690
  # 4. Enable the "Synthesize speech" button and display vote buttons
691
  synthesize_speech_button.click(
692
+ fn=self._disable_ui,
 
 
 
 
693
  inputs=[],
694
+ outputs=[
695
+ randomize_all_button,
696
+ sample_character_description_dropdown,
697
+ character_description_input,
698
+ generate_text_button,
699
+ text_input,
700
+ synthesize_speech_button,
701
+ vote_button_a,
702
+ vote_button_b,
703
+ ],
704
  ).then(
705
+ fn=self._reset_voting_ui,
706
  inputs=[],
707
  outputs=[
708
  option_a_audio_player,
 
726
  character_description_state,
727
  ],
728
  ).then(
729
+ fn=self._enable_ui,
 
 
 
 
730
  inputs=[],
731
+ outputs=[
732
+ randomize_all_button,
733
+ sample_character_description_dropdown,
734
+ character_description_input,
735
+ generate_text_button,
736
+ text_input,
737
+ synthesize_speech_button,
738
+ vote_button_a,
739
+ vote_button_b,
740
+ ],
741
  )
742
 
743
  # Vote button click event handlers