Spaces:
Sleeping
Sleeping
File size: 2,955 Bytes
9dd5dc1 4fddba4 3603f06 8de2446 4fddba4 a107515 4fddba4 a107515 3a25fa2 a107515 4fddba4 a107515 4fddba4 a107515 3603f06 a107515 02c1ec5 a107515 02c1ec5 a107515 9dd5dc1 4fddba4 3603f06 4fddba4 a107515 3603f06 8de2446 4fddba4 9dd5dc1 a107515 9dd5dc1 4fddba4 9dd5dc1 4fddba4 a107515 4e17b21 a107515 4fddba4 a107515 3a25fa2 a107515 9323a68 a107515 9323a68 a107515 8de2446 9dd5dc1 a107515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
"""
Converts an image to text using the Salesforce/blip-image-captioning-base model.
Args:
url (str): Path to the image file.
Returns:
str: Generated text caption from the image.
"""
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# Function to generate a story from the text using T5-small model
def text2story(text):
"""
Expands the text caption into a full story using the T5-small model.
Args:
text (str): Text caption generated from the image.
Returns:
str: Generated story suitable for kids aged 3-10.
"""
# Prompt to ensure the story is happy and suitable for kids
prompt = f"Write a short, happy, and fun story for kids aged 3-10. The story should be about: {text}. Make it cheerful, imaginative, and suitable for young children. Avoid any scary or sad elements. The story should be under 95 words."
text_generator = pipeline("text2text-generation", model="t5-small")
story = text_generator(prompt, max_length=100)[0]['generated_text']
return story
# Function to convert text to audio using gTTS
def text2audio(story_text):
"""
Converts the generated story text into audio using gTTS.
Args:
story_text (str): The generated story text.
Returns:
str: Path to the generated audio file.
"""
tts = gTTS(text=story_text, lang='en')
audio_file = "story_audio.mp3"
tts.save(audio_file)
return audio_file
# Main application
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image into a Fun Audio Story!")
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Save the uploaded file
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Stage 1: Image to Text
st.text('Processing image to text...')
scenario = img2text(uploaded_file.name)
st.write("**Caption:**", scenario)
# Stage 2: Text to Story
st.text('Generating a fun story...')
story = text2story(scenario)
st.write("**Story:**", story)
# Stage 3: Story to Audio
st.text('Converting story to audio...')
audio_file = text2audio(story)
# Play button for the generated audio
if st.button("Play Audio"):
st.audio(audio_file, format="audio/mp3")
# Clean up the generated audio file
os.remove(audio_file) |