Spaces:
Sleeping
Sleeping
File size: 7,456 Bytes
9dd5dc1 28c2183 4fddba4 3928fc8 4fddba4 a63c8c4 3928fc8 a63c8c4 28c2183 a63c8c4 28c2183 4fddba4 2bdfc3f 4fddba4 a63c8c4 aee30ec a63c8c4 2bdfc3f a63c8c4 28c2183 7b153c6 aee30ec 28c2183 7b153c6 28c2183 aee30ec 7b153c6 aee30ec 28c2183 4fddba4 a63c8c4 4fddba4 a63c8c4 28c2183 4fddba4 28c2183 b1e0ecf 28c2183 f9e85ad 28c2183 a63c8c4 aee30ec 28c2183 4fddba4 28c2183 4fddba4 28c2183 aee30ec 4e17b21 28c2183 aee30ec 3a25fa2 28c2183 aee30ec 28c2183 9323a68 aee30ec 28c2183 8de2446 28c2183 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# Import necessary libraries
import streamlit as st # For building the web application
from transformers import pipeline # For using pre-trained models (image-to-text and text-generation)
from gtts import gTTS # For converting text to speech
import os # For file handling (saving and deleting temporary files)
# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
"""
Converts an image to text using the Salesforce/blip-image-captioning-base model.
Args:
url (str): Path to the image file.
Returns:
str: Generated text caption from the image, without unwanted words like "illustration".
"""
try:
# Load the image-to-text model
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Generate text caption from the image
text = image_to_text_model(url)[0]["generated_text"]
# Remove unwanted words like "illustration" to make the caption cleaner
unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
for word in unwanted_words:
text = text.replace(word, "")
return text.strip() # Return the cleaned caption
except Exception as e:
st.error(f"Error processing image: {e}") # Display error message if something goes wrong
return None
# Function to generate a kid-friendly superhero story from the text caption
def text2story(text):
"""
Generates a superhero story from the text caption using the pranavpsv/gpt2-genre-story-generator model.
Args:
text (str): Text caption generated from the image.
Returns:
str: Generated superhero story suitable for kids aged 3-10, within 100 words.
"""
try:
# Load the text-generation model
story_generator = pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator")
# Generate the story with the superhero genre
prompt = f"<BOS> <superhero> {text}" # Add genre tags to the prompt
story = story_generator(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
# Remove <BOS> and <superhero> tags from the generated story
story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
# Ensure the story is within 100 words by truncating if necessary
story = " ".join(story.split()[:100])
# If the story is too short, regenerate it
if len(story.split()) < 50: # Minimum 50 words
story = story_generator(prompt, max_length=200, num_return_sequences=1)[0]['generated_text']
story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
story = " ".join(story.split()[:100])
return story
except Exception as e:
st.error(f"Error generating story: {e}") # Display error message if something goes wrong
return None
# Function to convert text to audio using gTTS
def text2audio(story_text):
"""
Converts the generated story text to audio using gTTS.
Args:
story_text (str): The generated story text.
Returns:
str: Path to the generated audio file.
"""
try:
# Convert text to speech using gTTS
tts = gTTS(text=story_text, lang='en')
audio_file = "story_audio.mp3" # Define the output audio file name
tts.save(audio_file) # Save the audio file
return audio_file
except Exception as e:
st.error(f"Error generating audio: {e}") # Display error message if something goes wrong
return None
# Main application function
def main():
"""
Main function to run the Streamlit application.
"""
# Configure the Streamlit app page
st.set_page_config(page_title="Picture Stories π¨π", page_icon="π¦")
st.title("Picture Stories π¨π")
st.markdown("### Turn your pictures into fun superhero stories and listen to them! π")
# Instructions for kids
st.markdown("""
**How to use this app:**
1. **Upload a picture** of something fun, like your favorite toy, your pet, or your hero.
2. Wait for the app to **create a superhero story** from your picture.
3. **Listen to the story** by clicking the "Play Audio" button.
4. Enjoy your fun superhero story! π§
""")
# Upload image
uploaded_file = st.file_uploader("π· **Upload your picture here!**", type=["jpg", "jpeg", "png"])
# Initialize session state variables
if 'scenario' not in st.session_state:
st.session_state.scenario = None
if 'story' not in st.session_state:
st.session_state.story = None
if 'audio_file' not in st.session_state:
st.session_state.audio_file = None
if uploaded_file is not None:
# Save the uploaded file to disk
image_bytes = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(image_bytes)
# Display the uploaded image in the app
st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)
# Stage 1: Image to Text
with st.spinner('β¨ Turning your picture into words...'):
if st.session_state.scenario is None or uploaded_file.name != st.session_state.get('uploaded_file_name', None):
st.session_state.scenario = img2text(uploaded_file.name) # Generate text caption from the image
st.session_state.uploaded_file_name = uploaded_file.name # Store the uploaded file name
if st.session_state.scenario:
st.write("**What we see:**", st.session_state.scenario) # Display the generated caption
# Stage 2: Text to Story
with st.spinner('π Creating a fun superhero story for you...'):
if st.session_state.story is None or uploaded_file.name != st.session_state.uploaded_file_name:
st.session_state.story = text2story(st.session_state.scenario) # Generate a superhero story from the caption
if st.session_state.story:
st.write("**Your superhero story:**", st.session_state.story) # Display the generated story
# Stage 3: Story to Audio
with st.spinner('π§ Turning your story into audio...'):
if st.session_state.audio_file is None or uploaded_file.name != st.session_state.uploaded_file_name:
st.session_state.audio_file = text2audio(st.session_state.story) # Generate audio file
# Play button for the generated audio
if st.button("π΅ **Play Audio**"):
if os.path.exists(st.session_state.audio_file):
st.audio(st.session_state.audio_file, format="audio/mp3") # Play the audio
else:
st.error("Audio file not found. Please try again.") # Display error if audio file is missing
# Clean up temporary files (uploaded image)
if os.path.exists(uploaded_file.name):
os.remove(uploaded_file.name) # Delete the uploaded image file
# Run the application
if __name__ == "__main__":
main() |