Spaces:

smtsead
/

Assignment1

Sleeping

File size: 3,001 Bytes

9dd5dc1
4fddba4
 
3603f06
8de2446
4fddba4
a107515
4fddba4
a107515
 
 
 
 
 
 
 
 
3a25fa2
 
a107515
4fddba4
a107515
4fddba4
a107515
 
3603f06
a107515
 
02c1ec5
a107515
 
 
081f6e3
 
02c1ec5
a107515
18423bf
 
 
 
 
 
9dd5dc1
4fddba4
3603f06
4fddba4
a107515
 
 
 
 
 
 
 
 
 
 
3603f06
8de2446
4fddba4
9dd5dc1
a107515
 
 
9dd5dc1
4fddba4
 
9dd5dc1
4fddba4
 
 
 
a107515
 
4e17b21
 
a107515
4fddba4
a107515
3a25fa2
 
a107515
 
 
9323a68
a107515
 
 
9323a68
a107515
 
 
8de2446
9dd5dc1
a107515

# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os

# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
    """
    Converts an image to text using the Salesforce/blip-image-captioning-base model.
    
    Args:
        url (str): Path to the image file.
    
    Returns:
        str: Generated text caption from the image.
    """
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    text = image_to_text_model(url)[0]["generated_text"]
    return text

# Function to generate a story from the text using T5-small model
def text2story(text):
    """
    Expands the text caption into a full story using the T5-small model.
    
    Args:
        text (str): Text caption generated from the image.
    
    Returns:
        str: Generated story suitable for kids aged 3-10.
    """
    # Simplified and rephrased prompt to guide the model
    prompt = f"Write a short, happy story for kids about {text}. The story should be cheerful, imaginative, and under 95 words."
    
    text_generator = pipeline("text2text-generation", model="t5-small")
    story = text_generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
    
    # Remove any repetition of the prompt in the output
    if prompt in story:
        story = story.replace(prompt, "").strip()
    
    return story

# Function to convert text to audio using gTTS
def text2audio(story_text):
    """
    Converts the generated story text into audio using gTTS.
    
    Args:
        story_text (str): The generated story text.
    
    Returns:
        str: Path to the generated audio file.
    """
    tts = gTTS(text=story_text, lang='en')
    audio_file = "story_audio.mp3"
    tts.save(audio_file)
    return audio_file

# Main application
st.set_page_config(page_title="Your Image to Audio Story",
                   page_icon="🦜")
st.header("Turn Your Image into a Fun Audio Story!")
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Save the uploaded file
    bytes_data = uploaded_file.getvalue()
    with open(uploaded_file.name, "wb") as file:
        file.write(bytes_data)

    # Display the uploaded image
    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)

    # Stage 1: Image to Text
    st.text('Processing image to text...')
    scenario = img2text(uploaded_file.name)
    st.write("**Caption:**", scenario)

    # Stage 2: Text to Story
    st.text('Generating a fun story...')
    story = text2story(scenario)
    st.write("**Story:**", story)

    # Stage 3: Story to Audio
    st.text('Converting story to audio...')
    audio_file = text2audio(story)

    # Play button for the generated audio
    if st.button("Play Audio"):
        st.audio(audio_file, format="audio/mp3")

    # Clean up the generated audio file
    os.remove(audio_file)