Spaces:

smtsead
/

Assignment1

Sleeping

File size: 5,234 Bytes

9dd5dc1
4fddba4
 
3603f06
8de2446
dbfc4d0
 
 
 
 
 
4fddba4
a63c8c4
dbfc4d0
4fddba4
a63c8c4
 
 
 
 
 
 
02b4ede
a63c8c4
dbfc4d0
 
3a25fa2
02b4ede
 
 
 
 
 
 
4fddba4
a63c8c4
dbfc4d0
4fddba4
a63c8c4
 
 
 
 
 
 
02b4ede
a63c8c4
02b4ede
dbfc4d0
02c1ec5
dbfc4d0
 
a63c8c4
 
02b4ede
f9e85ad
 
 
 
dbfc4d0
f9e85ad
02b4ede
 
18423bf
9dd5dc1
4fddba4
a63c8c4
4fddba4
a63c8c4
 
 
 
 
 
 
 
 
 
a107515
 
3603f06
796d097
8de2446
4fddba4
9dd5dc1
f9e85ad
 
 
 
 
 
 
 
 
 
 
 
a63c8c4
 
f9e85ad
4fddba4
 
9dd5dc1
4fddba4
 
 
 
a107515
f9e85ad
4e17b21
 
f9e85ad
dbfc4d0
 
 
 
 
3a25fa2
 
f9e85ad
dbfc4d0
 
 
 
 
9323a68
a107515
f9e85ad
dbfc4d0
 
 
 
9323a68
a107515
f9e85ad
a107515
8de2446
9dd5dc1
dbfc4d0

# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
from retrying import retry  # For retry logic

# Retry decorator for handling transient errors
def retry_if_timeout_error(exception):
    """Return True if the exception is a timeout error."""
    return isinstance(exception, Exception) and "Read timed out" in str(exception)

# Function to convert image to text using Hugging Face's BLIP model
@retry(stop_max_attempt_number=3, wait_fixed=2000, retry_on_exception=retry_if_timeout_error)  # Retry 3 times with a 2-second delay
def img2text(url):
    """
    Converts an image to text using the Salesforce/blip-image-captioning-base model.
    
    Args:
        url (str): Path to the image file.
    
    Returns:
        str: Generated text caption from the image, without words like "illustration".
    """
    # Increase timeout to 30 seconds
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device_map="auto", timeout=30)
    text = image_to_text_model(url)[0]["generated_text"]
    
    # Remove unwanted words like "illustration"
    unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
    for word in unwanted_words:
        text = text.replace(word, "")
    
    return text.strip()

# Function to generate a kid-friendly story from the text caption
@retry(stop_max_attempt_number=3, wait_fixed=2000, retry_on_exception=retry_if_timeout_error)  # Retry 3 times with a 2-second delay
def text2story(text):
    """
    Generates a kid-friendly story from the text caption using the aspis/gpt2-genre-story-generation model.
    
    Args:
        text (str): Text caption generated from the image.
    
    Returns:
        str: Generated story suitable for kids aged 3-10, within 100 words.
    """
    # Add a prompt to ensure the story is happy, fun, and appropriate for kids
    prompt = f"Base on the following scenario: {text}, write a happy,fun and complete story for kids with no sad, violent, scary elements."
    
    # Load the text generation model with increased timeout
    story_generator = pipeline("text-generation", model="aspis/gpt2-genre-story-generation", device_map="auto", timeout=30)
    
    # Generate the story
    story = story_generator(prompt, max_length=150, num_return_sequences=1, temperature=0.7, top_k=50, top_p=0.9)[0]["generated_text"]
    
    # Remove the prompt from the generated story
    story = story.replace(prompt, "").strip()
    
    # Ensure the story is within 100 words
    story_words = story.split()
    if len(story_words) > 100:
        story = " ".join(story_words[:100])
    
    return story

# Function to convert text to audio using gTTS
def text2audio(story_text):
    """
    Converts the generated story text to audio using gTTS.
    
    Args:
        story_text (str): The generated story text.
    
    Returns:
        str: Path to the generated audio file.
    """
    # Convert text to speech
    tts = gTTS(text=story_text, lang='en')
    audio_file = "story_audio.mp3"
    tts.save(audio_file)
    
    return audio_file

# Main application
st.set_page_config(page_title="Picture Stories 🎨📖", page_icon="🦄")
st.title("Picture Stories 🎨📖")
st.markdown("### Turn your pictures into fun stories and listen to them! 🎉")

# Instructions for kids
st.markdown("""
**How to use this app:**
1. **Upload a picture** of something fun, like your favorite toy, a park, or your pet.
2. Wait for the app to **create a story** from your picture.
3. **Listen to the story** by clicking the "Play Audio" button.
4. Enjoy your fun story! 🎧
""")

# Upload image
uploaded_file = st.file_uploader("📷 **Upload your picture here!**", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Save the uploaded file
    bytes_data = uploaded_file.getvalue()
    with open(uploaded_file.name, "wb") as file:
        file.write(bytes_data)

    # Display the uploaded image
    st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)

    # Stage 1: Image to Text
    st.text('✨ Turning your picture into words...')
    try:
        scenario = img2text(uploaded_file.name)
        st.write("**What we see:**", scenario)
    except Exception as e:
        st.error(f"Oops! Something went wrong while processing the image. Please try again. Error: {e}")

    # Stage 2: Text to Story
    st.text('📖 Creating a fun story for you...')
    try:
        story = text2story(scenario)
        st.write("**Your story:**", story)
    except Exception as e:
        st.error(f"Oops! Something went wrong while generating the story. Please try again. Error: {e}")

    # Stage 3: Story to Audio
    st.text('🎧 Turning your story into audio...')
    try:
        audio_file = text2audio(story)
    except Exception as e:
        st.error(f"Oops! Something went wrong while converting the story to audio. Please try again. Error: {e}")

    # Play button for the generated audio
    if st.button("🎵 **Play Audio**"):
        st.audio(audio_file, format="audio/mp3")

    # Clean up the generated audio file
    if os.path.exists(audio_file):
        os.remove(audio_file)