Prof-Hunt commited on
Commit
7d77471
Β·
verified Β·
1 Parent(s): d10f43f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -107
app.py CHANGED
@@ -359,7 +359,9 @@ def generate_all_scenes(prompts_text):
359
 
360
  @spaces.GPU(duration=60)
361
  def add_text_to_scenes(gallery_images, prompts_text):
 
362
  if not isinstance(gallery_images, list):
 
363
  return [], []
364
 
365
  clear_memory()
@@ -385,22 +387,40 @@ def add_text_to_scenes(gallery_images, prompts_text):
385
 
386
  if paragraph and image_data is not None:
387
  try:
388
- # Handle tuple case (image, label) from gallery
389
- if isinstance(image_data, tuple):
390
- image_data = image_data[0]
391
 
392
- # Convert numpy array to PIL Image
393
- if isinstance(image_data, np.ndarray):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  image = Image.fromarray(image_data)
395
- else:
396
  image = image_data
 
 
 
397
 
398
- print(f"Processing image {i+1}, type: {type(image)}")
 
 
399
 
400
- # Ensure we have a PIL Image
401
- if not isinstance(image, Image.Image):
402
- raise TypeError(f"Expected PIL Image, got {type(image)}")
403
-
404
  overlaid_img = overlay_text_on_image(image, paragraph)
405
  if overlaid_img is not None:
406
  overlaid_array = np.array(overlaid_img)
@@ -410,8 +430,11 @@ def add_text_to_scenes(gallery_images, prompts_text):
410
  overlaid_img.save(output_path)
411
  output_files.append(output_path)
412
  print(f"Successfully processed image {i+1}")
 
413
  except Exception as e:
414
  print(f"Error processing image {i+1}: {str(e)}")
 
 
415
  continue
416
 
417
  if not overlaid_images:
@@ -581,121 +604,220 @@ def process_generated_prompt(prompt, paragraph):
581
 
582
  # Create the interface
583
  def create_interface():
584
- with gr.Blocks() as demo:
585
- gr.Markdown("# Tech Tales: Story Creation")
 
 
 
 
586
 
587
- with gr.Row():
588
- generate_btn = gr.Button("1. Generate Random Landscape")
589
- with gr.Row():
590
- image_output = gr.Image(label="Generated Image", type="pil", interactive=False)
591
-
592
- with gr.Row():
593
- analyze_btn = gr.Button("2. Get Brief Description")
594
- with gr.Row():
595
- analysis_output = gr.Textbox(label="Image Description", lines=3)
596
-
597
- with gr.Row():
598
- story_btn = gr.Button("3. Create Children's Story")
599
- with gr.Row():
600
- story_output = gr.Textbox(label="Generated Story", lines=10)
601
-
602
- with gr.Row():
603
- prompts_btn = gr.Button("4. Generate Scene Prompts")
604
- with gr.Row():
605
- prompts_output = gr.Textbox(label="Generated Scene Prompts", lines=20)
606
-
607
- with gr.Row():
608
- generate_scenes_btn = gr.Button("5. Generate Story Scenes", variant="primary")
609
-
610
- with gr.Row():
611
- scene_progress = gr.Textbox(
612
- label="Generation Progress",
613
- lines=6,
614
- interactive=False
615
- )
616
-
617
- with gr.Row():
618
- gallery = gr.Gallery(
619
- label="Story Scenes",
620
- show_label=True,
621
- columns=2,
622
- height="auto",
623
- interactive=False
624
- )
625
-
626
- with gr.Row():
627
- scene_prompts_display = gr.Textbox(
628
- label="Scene Descriptions",
629
- lines=8,
630
- interactive=False
631
- )
632
 
633
- with gr.Row():
634
- add_text_btn = gr.Button("6. Add Text to Scenes", variant="primary")
 
635
 
636
- with gr.Row():
637
- final_gallery = gr.Gallery(
638
- label="Story Book Pages",
639
- show_label=True,
640
- columns=2,
641
- height="auto",
642
- interactive=False
643
- )
644
-
645
- with gr.Row():
646
- download_btn = gr.File(
647
- label="Download Story Book",
648
- file_count="multiple",
649
- interactive=False
650
- )
651
-
652
- with gr.Row():
653
- tts_btn = gr.Button("7. Read Story Aloud")
654
- audio_output = gr.Audio(label="Story Audio")
655
-
656
- # Event handlers
657
- generate_btn.click(
658
- fn=generate_image,
659
- outputs=image_output
660
- )
661
 
662
- analyze_btn.click(
663
- fn=analyze_image,
664
- inputs=[image_output],
665
- outputs=analysis_output
666
- )
 
 
 
 
 
 
 
 
667
 
668
- story_btn.click(
669
- fn=generate_story,
670
- inputs=[analysis_output],
671
- outputs=story_output
672
- )
673
 
674
- prompts_btn.click(
675
- fn=generate_image_prompts,
676
- inputs=[story_output],
677
- outputs=prompts_output
678
- )
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  generate_scenes_btn.click(
681
  fn=generate_all_scenes,
682
  inputs=[prompts_output],
683
  outputs=[gallery, scene_prompts_display, scene_progress]
684
  )
685
-
686
  add_text_btn.click(
687
  fn=add_text_to_scenes,
688
  inputs=[gallery, prompts_output],
689
  outputs=[final_gallery, download_btn]
690
  )
691
-
692
- tts_btn.click(
693
- fn=generate_combined_audio_from_story,
694
- inputs=[story_output],
695
- outputs=audio_output
696
- )
697
 
698
  return demo
 
699
  if __name__ == "__main__":
700
  demo = create_interface()
701
  demo.launch()
 
359
 
360
  @spaces.GPU(duration=60)
361
  def add_text_to_scenes(gallery_images, prompts_text):
362
+ """Add text overlay to scene images"""
363
  if not isinstance(gallery_images, list):
364
+ print("Error: gallery_images is not a list")
365
  return [], []
366
 
367
  clear_memory()
 
387
 
388
  if paragraph and image_data is not None:
389
  try:
390
+ print(f"Processing image {i+1}, type: {type(image_data)}")
391
+ print(f"Image data: {str(image_data)[:100]}...") # Print first 100 chars
 
392
 
393
+ # Handle different types of image data
394
+ if isinstance(image_data, str):
395
+ # If it's a string, try to load it as a numpy array
396
+ try:
397
+ import ast
398
+ # Convert string representation of array to actual array
399
+ array_data = ast.literal_eval(image_data)
400
+ image = Image.fromarray(np.array(array_data, dtype=np.uint8))
401
+ except:
402
+ print(f"Failed to convert string to array for image {i+1}")
403
+ continue
404
+ elif isinstance(image_data, tuple):
405
+ # Handle gallery tuple format (image, label)
406
+ image_data = image_data[0]
407
+ if isinstance(image_data, np.ndarray):
408
+ image = Image.fromarray(image_data)
409
+ else:
410
+ print(f"Unexpected tuple data type: {type(image_data)}")
411
+ continue
412
+ elif isinstance(image_data, np.ndarray):
413
  image = Image.fromarray(image_data)
414
+ elif isinstance(image_data, Image.Image):
415
  image = image_data
416
+ else:
417
+ print(f"Unsupported image data type: {type(image_data)}")
418
+ continue
419
 
420
+ # Ensure we have a valid RGB image
421
+ if image.mode != 'RGB':
422
+ image = image.convert('RGB')
423
 
 
 
 
 
424
  overlaid_img = overlay_text_on_image(image, paragraph)
425
  if overlaid_img is not None:
426
  overlaid_array = np.array(overlaid_img)
 
430
  overlaid_img.save(output_path)
431
  output_files.append(output_path)
432
  print(f"Successfully processed image {i+1}")
433
+
434
  except Exception as e:
435
  print(f"Error processing image {i+1}: {str(e)}")
436
+ import traceback
437
+ print(traceback.format_exc()) # Print full error trace
438
  continue
439
 
440
  if not overlaid_images:
 
604
 
605
  # Create the interface
606
  def create_interface():
607
+ # Define CSS for custom styling
608
+ css = """
609
+ /* Global styles */
610
+ .gradio-container {
611
+ background-color: #EBF8FF !important;
612
+ }
613
 
614
+ /* Custom button styling */
615
+ .custom-button {
616
+ background-color: #3B82F6 !important;
617
+ color: white !important;
618
+ border: none !important;
619
+ border-radius: 8px !important;
620
+ padding: 10px 20px !important;
621
+ margin: 10px 0 !important;
622
+ min-width: 200px !important;
623
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
+ .custom-button:hover {
626
+ background-color: #2563EB !important;
627
+ }
628
 
629
+ /* Section styling */
630
+ .section-content {
631
+ background-color: white !important;
632
+ border-radius: 12px !important;
633
+ padding: 20px !important;
634
+ margin: 10px 0 !important;
635
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05) !important;
636
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
+ /* AI Lesson box styling */
639
+ .ai-lesson {
640
+ background-color: #FEE2E2 !important;
641
+ border-radius: 8px !important;
642
+ padding: 15px !important;
643
+ margin: 10px 0 !important;
644
+ border: 1px solid #FCA5A5 !important;
645
+ }
646
+ """
647
+
648
+ with gr.Blocks(css=css) as demo:
649
+ gr.Markdown("""
650
+ # 🎨 Tech Tales: AI Children's Story Creator
651
 
652
+ Welcome to this educational AI story creation tool! This app demonstrates how multiple AI models
653
+ work together to create an illustrated children's story. Each step includes a brief AI lesson
654
+ to help you understand the technology being used.
 
 
655
 
656
+ Let's create something magical! ✨
657
+ """)
 
 
 
658
 
659
+ # Step 1: Generate Landscape
660
+ with gr.Row(elem_classes="section-content"):
661
+ with gr.Column():
662
+ with gr.Box(elem_classes="ai-lesson"):
663
+ gr.Markdown("""
664
+ ### Step 1: Setting the Scene with AI πŸ–ΌοΈ
665
+
666
+ πŸ€– **AI Lesson: Text-to-Image Generation**
667
+ We're using Stable Diffusion, a powerful AI model that turns text into images.
668
+ How it works:
669
+ - Starts with random noise and gradually refines it into an image
670
+ - Uses millions of image-text pairs from its training
671
+ - Combines understanding of both language and visual elements
672
+ - Takes about 50 steps to create each image
673
+
674
+ Real-world applications: Book illustrations, concept art, product visualization
675
+ """)
676
+ generate_btn = gr.Button("1. Generate Random Landscape", elem_classes="custom-button")
677
+ image_output = gr.Image(label="Your AI-Generated Landscape", type="pil", interactive=False)
678
+
679
+ # Step 2: Analyze Scene
680
+ with gr.Row(elem_classes="section-content"):
681
+ with gr.Column():
682
+ with gr.Box(elem_classes="ai-lesson"):
683
+ gr.Markdown("""
684
+ ### Step 2: Teaching AI to See πŸ‘οΈ
685
+
686
+ πŸ€– **AI Lesson: Vision-Language Models (VLM)**
687
+ Our VLM acts like an AI art critic, understanding and describing images.
688
+ How it works:
689
+ - Processes images through neural networks
690
+ - Identifies objects, scenes, colors, and relationships
691
+ - Translates visual features into natural language
692
+ - Uses attention mechanisms to focus on important details
693
+
694
+ Real-world applications: Image search, accessibility tools, medical imaging
695
+ """)
696
+ analyze_btn = gr.Button("2. Get Brief Description", elem_classes="custom-button")
697
+ analysis_output = gr.Textbox(label="What the AI Sees", lines=3)
698
+
699
+ # Step 3: Create Story
700
+ with gr.Row(elem_classes="section-content"):
701
+ with gr.Column():
702
+ with gr.Box(elem_classes="ai-lesson"):
703
+ gr.Markdown("""
704
+ ### Step 3: Crafting the Narrative πŸ“–
705
+
706
+ πŸ€– **AI Lesson: Large Language Models**
707
+ Meet our AI storyteller! It uses a Large Language Model (LLM) to write creative stories.
708
+ How it works:
709
+ - Processes the scene description as context
710
+ - Uses pattern recognition from millions of stories
711
+ - Maintains narrative consistency and character development
712
+ - Adapts its writing style for children
713
+
714
+ Real-world applications: Content creation, creative writing, education
715
+ """)
716
+ story_btn = gr.Button("3. Create Children's Story", elem_classes="custom-button")
717
+ story_output = gr.Textbox(label="Your AI-Generated Story", lines=10)
718
+
719
+ # Step 4: Generate Prompts
720
+ with gr.Row(elem_classes="section-content"):
721
+ with gr.Column():
722
+ with gr.Box(elem_classes="ai-lesson"):
723
+ gr.Markdown("""
724
+ ### Step 4: Planning the Illustrations 🎯
725
+
726
+ πŸ€– **AI Lesson: Natural Language Processing**
727
+ The AI breaks down the story into key scenes and creates optimal image prompts.
728
+ How it works:
729
+ - Analyzes story structure and pacing
730
+ - Identifies key narrative moments
731
+ - Generates specialized prompts for each scene
732
+ - Ensures visual consistency across illustrations
733
+
734
+ Real-world applications: Content planning, storyboarding, scene composition
735
+ """)
736
+ prompts_btn = gr.Button("4. Generate Scene Prompts", elem_classes="custom-button")
737
+ prompts_output = gr.Textbox(label="Scene Descriptions", lines=20)
738
+
739
+ # Step 5: Generate Scenes
740
+ with gr.Row(elem_classes="section-content"):
741
+ with gr.Column():
742
+ with gr.Box(elem_classes="ai-lesson"):
743
+ gr.Markdown("""
744
+ ### Step 5: Bringing Scenes to Life 🎨
745
+
746
+ πŸ€– **AI Lesson: Specialized Image Generation**
747
+ Using a fine-tuned model to create consistent character illustrations.
748
+ How it works:
749
+ - Uses LoRA (Low-Rank Adaptation) for specialized training
750
+ - Maintains consistent character appearance
751
+ - Processes multiple scenes in parallel
752
+ - Balances creativity with prompt adherence
753
+
754
+ Real-world applications: Character design, animation, book illustration
755
+ """)
756
+ generate_scenes_btn = gr.Button("5. Generate Story Scenes", elem_classes="custom-button")
757
+ scene_progress = gr.Textbox(label="Generation Progress", lines=6, interactive=False)
758
+ gallery = gr.Gallery(label="Story Scenes", columns=2, height="auto", interactive=False)
759
+ scene_prompts_display = gr.Textbox(label="Scene Details", lines=8, interactive=False)
760
+
761
+ # Step 6: Add Text
762
+ with gr.Row(elem_classes="section-content"):
763
+ with gr.Column():
764
+ with gr.Box(elem_classes="ai-lesson"):
765
+ gr.Markdown("""
766
+ ### Step 6: Creating Book Pages πŸ“š
767
+
768
+ πŸ€– **AI Lesson: Computer Vision & Layout**
769
+ Combining images and text requires sophisticated layout algorithms.
770
+ How it works:
771
+ - Analyzes image composition for text placement
772
+ - Adjusts font size and style for readability
773
+ - Creates visual hierarchy between elements
774
+ - Ensures consistent formatting across pages
775
+
776
+ Real-world applications: Desktop publishing, web design, digital books
777
+ """)
778
+ add_text_btn = gr.Button("6. Add Text to Scenes", elem_classes="custom-button")
779
+ final_gallery = gr.Gallery(label="Final Book Pages", columns=2, height="auto", interactive=False)
780
+ download_btn = gr.File(label="Download Your Story Book", file_count="multiple", interactive=False)
781
+
782
+ # Step 7: Audio Generation
783
+ with gr.Row(elem_classes="section-content"):
784
+ with gr.Column():
785
+ with gr.Box(elem_classes="ai-lesson"):
786
+ gr.Markdown("""
787
+ ### Step 7: Adding Narration 🎧
788
+
789
+ πŸ€– **AI Lesson: Text-to-Speech Synthesis**
790
+ Converting our story into natural-sounding speech.
791
+ How it works:
792
+ - Uses neural networks for voice synthesis
793
+ - Adds appropriate emotion and emphasis
794
+ - Controls pacing and pronunciation
795
+ - Maintains consistent voice throughout
796
+
797
+ Real-world applications: Audiobooks, accessibility tools, virtual assistants
798
+ """)
799
+ tts_btn = gr.Button("7. Read Story Aloud", elem_classes="custom-button")
800
+ audio_output = gr.Audio(label="Story Narration")
801
+
802
+ # Event handlers
803
+ generate_btn.click(fn=generate_image, outputs=image_output)
804
+ analyze_btn.click(fn=analyze_image, inputs=[image_output], outputs=analysis_output)
805
+ story_btn.click(fn=generate_story, inputs=[analysis_output], outputs=story_output)
806
+ prompts_btn.click(fn=generate_image_prompts, inputs=[story_output], outputs=prompts_output)
807
  generate_scenes_btn.click(
808
  fn=generate_all_scenes,
809
  inputs=[prompts_output],
810
  outputs=[gallery, scene_prompts_display, scene_progress]
811
  )
 
812
  add_text_btn.click(
813
  fn=add_text_to_scenes,
814
  inputs=[gallery, prompts_output],
815
  outputs=[final_gallery, download_btn]
816
  )
817
+ tts_btn.click(fn=generate_combined_audio_from_story, inputs=[story_output], outputs=audio_output)
 
 
 
 
 
818
 
819
  return demo
820
+
821
  if __name__ == "__main__":
822
  demo = create_interface()
823
  demo.launch()