Spaces:
Sleeping
Sleeping
# Import necessary libraries | |
import streamlit as st | |
from transformers import pipeline | |
from gtts import gTTS | |
import os | |
# Function to convert image to text | |
def img2text(url): | |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
text = image_to_text_model(url)[0]["generated_text"] | |
# Remove unwanted words like "illustration" | |
unwanted_words = ["illustration", "painting", "drawing", "artwork"] | |
for word in unwanted_words: | |
text = text.replace(word, "") | |
return text.strip() | |
# Function to generate a story from text using T5 | |
def text2story(text): | |
# Use the T5 model for text generation | |
text_generator = pipeline("text2text-generation", model="t5-small") | |
# Add a strong prompt to guide the model | |
prompt = f"Write a short, happy, and fun story for kids aged 3-10. " \ | |
f"The story should be about: {text}. " \ | |
"Make it cheerful, imaginative, and suitable for young children. " \ | |
"Avoid any scary or sad elements. The story should be under 95 words." | |
# Generate the story | |
story = text_generator(prompt, max_length=95, num_return_sequences=1)[0]["generated_text"] | |
# Clean up the output to remove the prompt | |
story = story.replace(prompt, "").strip() | |
# Ensure the story is under 95 words | |
words = story.split() | |
if len(words) > 95: | |
story = " ".join(words[:95]) | |
return story | |
# Function to convert text to audio using gTTS | |
def text2audio(story_text): | |
audio_file = os.path.abspath("kids_playing_audio.wav") | |
tts = gTTS(story_text, lang="en") | |
tts.save(audio_file) | |
return audio_file | |
# Main application | |
st.set_page_config(page_title="Image to Story", page_icon="π") | |
st.header("π Image to Story") | |
st.markdown("### Turn your image into a fun story!") | |
# Initialize session state | |
if "story" not in st.session_state: | |
st.session_state.story = None | |
if "audio_file" not in st.session_state: | |
st.session_state.audio_file = None | |
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Save the uploaded file | |
bytes_data = uploaded_file.getvalue() | |
with open(uploaded_file.name, "wb") as file: | |
file.write(bytes_data) | |
st.image(uploaded_file, caption="Your Uploaded Image", use_column_width=True) | |
# Stage 1: Image to Text | |
st.text('πΌοΈ Processing image...') | |
scenario = img2text(uploaded_file.name) | |
st.write("**What I see:**", scenario) | |
# Stage 2: Text to Story | |
if st.session_state.story is None or st.button("π Generate New Story"): | |
st.text('π Creating a story...') | |
st.session_state.story = text2story(scenario) | |
st.write("**Your Story:**", st.session_state.story) | |
# Stage 3: Story to Audio | |
st.text('ποΈ Turning your story into audio...') | |
st.session_state.audio_file = text2audio(st.session_state.story) | |
# Play button for audio | |
if st.session_state.audio_file and st.button("π§ Listen to the Story"): | |
if os.path.exists(st.session_state.audio_file): | |
st.audio(st.session_state.audio_file, format="audio/wav") | |
else: | |
st.error("Audio file not found. Please generate the story again.") | |
# Clean up the generated audio file | |
if st.session_state.audio_file and os.path.exists(st.session_state.audio_file): | |
os.remove(st.session_state.audio_file) | |
# Add some fun prompts for kids | |
st.markdown("### π¨ Tips for a Great Story!") | |
st.write("1. Upload a picture of your favorite animal, place, or toy!") | |
st.write("2. Imagine what's happening in the picture and let the story begin!") | |
st.write("3. Listen to your story and share it with your friends!") |