import streamlit as st from transformers import pipeline from gtts import gTTS import os def generate_caption(image): # Load the image captioning model caption_model = pipeline("image-to-text", model="facebook/blip-image-captioning-base") # Generate the caption for the uploaded image caption = caption_model(image)[0]["generated_text"] return caption def generate_story(caption): # Load the text generation model text_generation_model = pipeline("text-generation", model="gpt2") # Generate the story based on the caption story = text_generation_model(caption, max_length=200, num_return_sequences=1)[0]["generated_text"] return story def convert_to_audio(story): # Convert the story to audio using gTTS tts = gTTS(text=story, lang="en") tts.save("story_audio.mp3") def main(): st.title("Storytelling Application") # File uploader for the image uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: # Display the uploaded image st.image(uploaded_image, caption="Uploaded Image", use_column_width=True) # Generate the caption for the image caption = generate_caption(uploaded_image) st.subheader("Generated Caption:") st.write(caption) # Generate the story based on the caption story = generate_story(caption) st.subheader("Generated Story:") st.write(story) # Convert the story to audio convert_to_audio(story) # Display the audio player audio_file = open("story_audio.mp3", "rb") audio_bytes = audio_file.read() st.audio(audio_bytes, format="audio/mp3") if __name__ == "__main__": main()