File size: 7,456 Bytes
9dd5dc1
28c2183
 
 
 
4fddba4
3928fc8
4fddba4
a63c8c4
3928fc8
a63c8c4
 
 
 
 
28c2183
a63c8c4
28c2183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fddba4
2bdfc3f
4fddba4
a63c8c4
aee30ec
a63c8c4
 
 
 
 
2bdfc3f
a63c8c4
28c2183
 
 
 
 
7b153c6
aee30ec
28c2183
 
7b153c6
28c2183
 
 
 
 
aee30ec
 
 
7b153c6
aee30ec
 
28c2183
 
 
 
4fddba4
a63c8c4
4fddba4
a63c8c4
 
 
 
 
 
 
 
 
28c2183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fddba4
28c2183
 
 
b1e0ecf
28c2183
 
 
 
f9e85ad
28c2183
 
a63c8c4
aee30ec
 
 
 
 
 
 
 
28c2183
 
 
 
 
4fddba4
28c2183
 
4fddba4
28c2183
 
aee30ec
 
 
 
 
 
4e17b21
28c2183
 
aee30ec
 
 
 
 
3a25fa2
28c2183
 
aee30ec
 
 
28c2183
 
 
 
 
 
9323a68
aee30ec
28c2183
 
8de2446
28c2183
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Import necessary libraries
import streamlit as st  # For building the web application
from transformers import pipeline  # For using pre-trained models (image-to-text and text-generation)
from gtts import gTTS  # For converting text to speech
import os  # For file handling (saving and deleting temporary files)

# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
    """
    Converts an image to text using the Salesforce/blip-image-captioning-base model.
    
    Args:
        url (str): Path to the image file.
    
    Returns:
        str: Generated text caption from the image, without unwanted words like "illustration".
    """
    try:
        # Load the image-to-text model
        image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
        
        # Generate text caption from the image
        text = image_to_text_model(url)[0]["generated_text"]
        
        # Remove unwanted words like "illustration" to make the caption cleaner
        unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
        for word in unwanted_words:
            text = text.replace(word, "")
        
        return text.strip()  # Return the cleaned caption
    except Exception as e:
        st.error(f"Error processing image: {e}")  # Display error message if something goes wrong
        return None

# Function to generate a kid-friendly superhero story from the text caption
def text2story(text):
    """
    Generates a superhero story from the text caption using the pranavpsv/gpt2-genre-story-generator model.
    
    Args:
        text (str): Text caption generated from the image.
    
    Returns:
        str: Generated superhero story suitable for kids aged 3-10, within 100 words.
    """
    try:
        # Load the text-generation model
        story_generator = pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator")
        
        # Generate the story with the superhero genre
        prompt = f"<BOS> <superhero> {text}"  # Add genre tags to the prompt
        story = story_generator(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
        
        # Remove <BOS> and <superhero> tags from the generated story
        story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
        
        
        # Ensure the story is within 100 words by truncating if necessary
        story = " ".join(story.split()[:100])
        
        # If the story is too short, regenerate it
        if len(story.split()) < 50:  # Minimum 50 words
            story = story_generator(prompt, max_length=200, num_return_sequences=1)[0]['generated_text']
            story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
            story = " ".join(story.split()[:100])
        
        return story
    except Exception as e:
        st.error(f"Error generating story: {e}")  # Display error message if something goes wrong
        return None

# Function to convert text to audio using gTTS
def text2audio(story_text):
    """
    Converts the generated story text to audio using gTTS.
    
    Args:
        story_text (str): The generated story text.
    
    Returns:
        str: Path to the generated audio file.
    """
    try:
        # Convert text to speech using gTTS
        tts = gTTS(text=story_text, lang='en')
        audio_file = "story_audio.mp3"  # Define the output audio file name
        tts.save(audio_file)  # Save the audio file
        return audio_file
    except Exception as e:
        st.error(f"Error generating audio: {e}")  # Display error message if something goes wrong
        return None

# Main application function
def main():
    """
    Main function to run the Streamlit application.
    """
    # Configure the Streamlit app page
    st.set_page_config(page_title="Picture Stories πŸŽ¨πŸ“–", page_icon="πŸ¦„")
    st.title("Picture Stories πŸŽ¨πŸ“–")
    st.markdown("### Turn your pictures into fun superhero stories and listen to them! πŸŽ‰")

    # Instructions for kids
    st.markdown("""
    **How to use this app:**
    1. **Upload a picture** of something fun, like your favorite toy, your pet, or your hero.
    2. Wait for the app to **create a superhero story** from your picture.
    3. **Listen to the story** by clicking the "Play Audio" button.
    4. Enjoy your fun superhero story! 🎧
    """)

    # Upload image
    uploaded_file = st.file_uploader("πŸ“· **Upload your picture here!**", type=["jpg", "jpeg", "png"])

    # Initialize session state variables
    if 'scenario' not in st.session_state:
        st.session_state.scenario = None
    if 'story' not in st.session_state:
        st.session_state.story = None
    if 'audio_file' not in st.session_state:
        st.session_state.audio_file = None

    if uploaded_file is not None:
        # Save the uploaded file to disk
        image_bytes = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(image_bytes)

        # Display the uploaded image in the app
        st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)

        # Stage 1: Image to Text
        with st.spinner('✨ Turning your picture into words...'):
            if st.session_state.scenario is None or uploaded_file.name != st.session_state.get('uploaded_file_name', None):
                st.session_state.scenario = img2text(uploaded_file.name)  # Generate text caption from the image
                st.session_state.uploaded_file_name = uploaded_file.name  # Store the uploaded file name

            if st.session_state.scenario:
                st.write("**What we see:**", st.session_state.scenario)  # Display the generated caption

                # Stage 2: Text to Story
                with st.spinner('πŸ“– Creating a fun superhero story for you...'):
                    if st.session_state.story is None or uploaded_file.name != st.session_state.uploaded_file_name:
                        st.session_state.story = text2story(st.session_state.scenario)  # Generate a superhero story from the caption

                    if st.session_state.story:
                        st.write("**Your superhero story:**", st.session_state.story)  # Display the generated story

                        # Stage 3: Story to Audio
                        with st.spinner('🎧 Turning your story into audio...'):
                            if st.session_state.audio_file is None or uploaded_file.name != st.session_state.uploaded_file_name:
                                st.session_state.audio_file = text2audio(st.session_state.story)  # Generate audio file

                            # Play button for the generated audio
                            if st.button("🎡 **Play Audio**"):
                                if os.path.exists(st.session_state.audio_file):
                                    st.audio(st.session_state.audio_file, format="audio/mp3")  # Play the audio
                                else:
                                    st.error("Audio file not found. Please try again.")  # Display error if audio file is missing

        # Clean up temporary files (uploaded image)
        if os.path.exists(uploaded_file.name):
            os.remove(uploaded_file.name)  # Delete the uploaded image file

# Run the application
if __name__ == "__main__":
    main()