Spaces:
Build error
Build error
File size: 2,632 Bytes
26d92c7 a3db14d 26d92c7 f13b8b7 8e3910c 7086887 26d92c7 7086887 26d92c7 996b501 26d92c7 7086887 26d92c7 654076c 26d92c7 7086887 26d92c7 a3db14d 07fcd26 f13b8b7 26d92c7 a3db14d f13b8b7 7086887 26d92c7 7086887 a3db14d f13b8b7 7086887 26d92c7 654076c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import streamlit as st # Streamlit for building the web application
from transformers import pipeline # Hugging Face Transformers pipeline for models
from PIL import Image # PIL for handling image files
# Function to convert image to text
def img2text(image):
# Load the image-to-text model
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Generate a caption for the image
text = image_to_text_model(image)[0]["generated_text"]
return text # Return the generated caption
# Function to generate a story based on the caption
def text2story(text):
# Load the text generation model
story_model = pipeline("text-generation", model="gpt2")
# Generate a story based on the input text
story_text = story_model(f"Once upon a time, {text}.", max_length=100, num_return_sequences=1)
return story_text[0]["generated_text"] # Return the generated story
# Function to convert text to audio
def text2audio(story_text):
# Load the text-to-speech model
text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
# Generate audio data from the story text
audio_data = text_to_audio_model(story_text)
return audio_data # Return the audio data
# Main part of the application
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image into an Audio Story")
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) # File uploader for images
if uploaded_file is not None:
# Open and read the uploaded image
image = Image.open(uploaded_file) # Use PIL to open the uploaded image
st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
# Stage 1: Image to Text
st.text('Processing image to text...')
scenario = img2text(image) # Get the caption for the uploaded image
st.write("Caption:", scenario) # Display the generated caption
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario) # Generate a story based on the caption
st.write("Story:", story) # Display the generated story
# Stage 3: Story to Audio data
st.text('Generating audio data...') # Inform the user about the audio generation stage
audio_data = text2audio(story) # Convert the generated story into audio
# Play button for the audio
if st.button("Play Audio"): # Create a button to play the audio
st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate']) # Play the audio |