import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os

def generate_caption(image):
    # Load the image captioning model
    caption_model = pipeline("image-to-text", model="facebook/blip-image-captioning-base")
    
    # Generate the caption for the uploaded image
    caption = caption_model(image)[0]["generated_text"]
    
    return caption

def generate_story(caption):
    # Load the text generation model
    text_generation_model = pipeline("text-generation", model="gpt2")
    
    # Generate the story based on the caption
    story = text_generation_model(caption, max_length=200, num_return_sequences=1)[0]["generated_text"]
    
    return story

def convert_to_audio(story):
    # Convert the story to audio using gTTS
    tts = gTTS(text=story, lang="en")
    tts.save("story_audio.mp3")

def main():
    st.title("Storytelling Application")
    
    # File uploader for the image
    uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    
    if uploaded_image is not None:
        # Display the uploaded image
        st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
        
        # Generate the caption for the image
        caption = generate_caption(uploaded_image)
        st.subheader("Generated Caption:")
        st.write(caption)
        
        # Generate the story based on the caption
        story = generate_story(caption)
        st.subheader("Generated Story:")
        st.write(story)
        
        # Convert the story to audio
        convert_to_audio(story)
        
        # Display the audio player
        audio_file = open("story_audio.mp3", "rb")
        audio_bytes = audio_file.read()
        st.audio(audio_bytes, format="audio/mp3")

if __name__ == "__main__":
    main()