Spaces:
Running
Running
File size: 3,117 Bytes
f34973b 815a364 7e1bb2a 815a364 bb6f5bb 815a364 7e1bb2a 815a364 2da2d23 815a364 7e1bb2a 815a364 7e1bb2a 815a364 2da2d23 815a364 2da2d23 815a364 2da2d23 815a364 7e1bb2a 815a364 2da2d23 815a364 7e1bb2a 815a364 7e1bb2a 815a364 f34973b 815a364 2da2d23 815a364 2da2d23 815a364 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# Import necessary libraries
import streamlit as st # Streamlit for creating the web application
from transformers import pipeline # Pipeline for using Hugging Face models
from PIL import Image # PIL for image processing
# Function to load models
def load_models():
# Load the image to text model
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Load pre-trained image to text model
# Load the text generation model
story_model = pipeline("text-generation", model="gpt2") # Load pre-trained text generation model
# Load the text-to-speech model
tts_model = pipeline("text-to-speech", model="suno/bark") # Load a TTS model
return caption_model, story_model, tts_model # Return all three models
# Function to generate story from caption
def generate_story(caption, story_model):
# Generate a story based on the caption
story = story_model(caption, max_length=100, num_return_sequences=1)[0]['generated_text'] # Generate the story
return story # Return the generated story
# Function to convert text to audio
def text_to_audio(text, tts_model):
audio = tts_model(text) # Generate audio from text using the TTS model
return audio # Return the audio object
# Function to process the uploaded image and generate a story
def process_image(image, caption_model, story_model):
# Generate a caption from the uploaded image
result = caption_model(image) # Get the result from the model
caption = result[0]['generated_text'] # Access the generated caption
# Generate a story from the caption
story = generate_story(caption, story_model) # Call the story generation function
return caption, story # Return both caption and story
# Main part
def main():
st.set_page_config(page_title="Storytelling Friend", page_icon="🦦") # Title of the application
st.write("Upload an image to generate a story!") # Instructions for the user
# Upload image section
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # File uploader for images
# Load models once
caption_model, story_model, tts_model = load_models() # Load models
if uploaded_file is not None:
# Open and read the uploaded image
image = Image.open(uploaded_file) # Open the uploaded image file
st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
# Process the image and generate story
caption, story = process_image(image, caption_model, story_model) # Get caption and story
st.subheader("Generated Caption:") # Subheader for caption
st.write(caption) # Display the caption
st.subheader("Generated Story:") # Subheader for story
st.write(story) # Display the generated story
# Convert story to audio and play it
audio = text_to_audio(story, tts_model) # Convert story to audio
st.audio(audio, format='audio/wav') # Play the audio
# Run the app
if __name__ == "__main__":
main() # Call the main function to run the app |