Spaces:
Sleeping
Sleeping
File size: 4,004 Bytes
9dd5dc1 4fddba4 3603f06 8de2446 4fddba4 1510296 4fddba4 a63c8c4 1510296 a63c8c4 02b4ede a63c8c4 1510296 3a25fa2 02b4ede 4fddba4 a63c8c4 4fddba4 a63c8c4 02b4ede a63c8c4 02b4ede 72f3264 02c1ec5 4636e10 a63c8c4 b343e37 f9e85ad 4636e10 f9e85ad 02b4ede 18423bf 9dd5dc1 4fddba4 a63c8c4 4fddba4 a63c8c4 a107515 3603f06 796d097 8de2446 4fddba4 9dd5dc1 f9e85ad a63c8c4 f9e85ad 4fddba4 9dd5dc1 4fddba4 a107515 f9e85ad 4e17b21 f9e85ad 4636e10 3a25fa2 f9e85ad 4636e10 9323a68 a107515 f9e85ad 4636e10 9323a68 a107515 f9e85ad a107515 8de2446 9dd5dc1 4636e10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# Function to convert image to text using Hugging Face's vit-gpt2-image-captioning model
def img2text(url):
"""
Converts an image to text using the nlpconnect/vit-gpt2-image-captioning model.
Args:
url (str): Path to the image file.
Returns:
str: Generated text caption from the image, without words like "illustration".
"""
image_to_text_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
text = image_to_text_model(url)[0]["generated_text"]
# Remove unwanted words like "illustration"
unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
for word in unwanted_words:
text = text.replace(word, "")
return text.strip()
# Function to generate a kid-friendly story from the text caption
def text2story(text):
"""
Generates a kid-friendly story from the text caption using the aspis/gpt2-genre-story-generation model.
Args:
text (str): Text caption generated from the image.
Returns:
str: Generated story suitable for kids aged 3-10, within 100 words.
"""
# Add a prompt to ensure the story is happy, fun, and appropriate for kids
prompt = f"Write a kids story based on below scenario :{text}."
# Load the text generation model
story_generator = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
# Generate the story
story = story_generator(prompt, max_length=150, num_return_sequences=1, temperature=0.7)[0]["generated_text"]
# Remove the prompt from the generated story
story = story.replace(prompt, "").strip()
# Ensure the story is within 95 words
story_words = story.split()
if len(story_words) > 100:
story = " ".join(story_words[:100])
return story
# Function to convert text to audio using gTTS
def text2audio(story_text):
"""
Converts the generated story text to audio using gTTS.
Args:
story_text (str): The generated story text.
Returns:
str: Path to the generated audio file.
"""
# Convert text to speech
tts = gTTS(text=story_text, lang='en')
audio_file = "story_audio.mp3"
tts.save(audio_file)
return audio_file
# Main application
st.set_page_config(page_title="Picture Stories π¨π", page_icon="π¦")
st.title("Picture Stories π¨π")
st.markdown("### Turn your pictures into fun stories and listen to them! π")
# Instructions for kids
st.markdown("""
**How to use this app:**
1. **Upload a picture** of something fun, like your favorite toy, a park, or your pet.
2. Wait for the app to **create a story** from your picture.
3. **Listen to the story** by clicking the "Play Audio" button.
4. Enjoy your fun story! π§
""")
# Upload image
uploaded_file = st.file_uploader("π· **Upload your picture here!**", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Save the uploaded file
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
# Display the uploaded image
st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)
# Stage 1: Image to Text
st.text('β¨ Turning your picture into words...')
scenario = img2text(uploaded_file.name)
st.write("**What we see:**", scenario)
# Stage 2: Text to Story
st.text('π Creating a fun story for you...')
story = text2story(scenario)
st.write("**Your story:**", story)
# Stage 3: Story to Audio
st.text('π§ Turning your story into audio...')
audio_file = text2audio(story)
# Play button for the generated audio
if st.button("π΅ **Play Audio**"):
st.audio(audio_file, format="audio/mp3")
# Clean up the generated audio file
os.remove(audio_file) |