# Import necessary libraries import streamlit as st # Streamlit for creating the web application from transformers import pipeline # Pipeline for using Hugging Face models from PIL import Image # PIL for image processing # Function to load models def load_models(): # Load the image to text model caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Load pre-trained image to text model # Load the text generation model story_model = pipeline("text-generation", model="gpt2") # Load pre-trained text generation model # Load the text-to-speech model tts_model = pipeline("text-to-speech", model="suno/bark") # Load a TTS model return caption_model, story_model, tts_model # Return all three models # Function to generate story from caption def generate_story(caption, story_model): # Generate a story based on the caption story = story_model(caption, max_length=100, num_return_sequences=1)[0]['generated_text'] # Generate the story return story # Return the generated story # Function to convert text to audio def text_to_audio(text, tts_model): audio = tts_model(text) # Generate audio from text using the TTS model return audio # Return the audio object # Function to process the uploaded image and generate a story def process_image(image, caption_model, story_model): # Generate a caption from the uploaded image result = caption_model(image) # Get the result from the model caption = result[0]['generated_text'] # Access the generated caption # Generate a story from the caption story = generate_story(caption, story_model) # Call the story generation function return caption, story # Return both caption and story # Main part def main(): st.set_page_config(page_title="Storytelling Friend", page_icon="🦦") # Title of the application st.write("Upload an image to generate a story!") # Instructions for the user # Upload image section uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) # File uploader for images # Load models once caption_model, story_model, tts_model = load_models() # Load models if uploaded_file is not None: # Open and read the uploaded image image = Image.open(uploaded_file) # Open the uploaded image file st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image # Process the image and generate story caption, story = process_image(image, caption_model, story_model) # Get caption and story st.subheader("Generated Caption:") # Subheader for caption st.write(caption) # Display the caption st.subheader("Generated Story:") # Subheader for story st.write(story) # Display the generated story # Convert story to audio and play it audio = text_to_audio(story, tts_model) # Convert story to audio st.audio(audio, format='audio/wav') # Play the audio # Run the app if __name__ == "__main__": main() # Call the main function to run the app