# Import necessary libraries
import streamlit as st  # For building the web application
from transformers import pipeline  # For using pre-trained models (image-to-text and text-generation)
from gtts import gTTS  # For converting text to speech
import os  # For file handling (saving and deleting temporary files)

# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
    """
    Converts an image to text using the Salesforce/blip-image-captioning-base model.
    
    Args:
        url (str): Path to the image file.
    
    Returns:
        str: Generated text caption from the image, without unwanted words like "illustration".
    """
    try:
        # Load the image-to-text model
        image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
        
        # Generate text caption from the image
        text = image_to_text_model(url)[0]["generated_text"]
        
        # Remove unwanted words like "illustration" to make the caption cleaner
        unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
        for word in unwanted_words:
            text = text.replace(word, "")
        
        return text.strip()  # Return the cleaned caption
    except Exception as e:
        st.error(f"Error processing image: {e}")  # Display error message if something goes wrong
        return None

# Function to generate a kid-friendly superhero story from the text caption
def text2story(text):
    """
    Generates a superhero story from the text caption using the pranavpsv/gpt2-genre-story-generator model.
    
    Args:
        text (str): Text caption generated from the image.
    
    Returns:
        str: Generated superhero story suitable for kids aged 3-10, within 100 words.
    """
    try:
        # Load the text-generation model
        story_generator = pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator")
        
        # Generate the story with the superhero genre
        prompt = f"<BOS> <superhero> {text}"  # Add genre tags to the prompt
        story = story_generator(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
        
        # Remove <BOS> and <superhero> tags from the generated story
        story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
        
        
        # Ensure the story is within 100 words by truncating if necessary
        story = " ".join(story.split()[:100])
        
        # If the story is too short, regenerate it
        if len(story.split()) < 50:  # Minimum 50 words
            story = story_generator(prompt, max_length=200, num_return_sequences=1)[0]['generated_text']
            story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
            story = " ".join(story.split()[:100])
        
        return story
    except Exception as e:
        st.error(f"Error generating story: {e}")  # Display error message if something goes wrong
        return None

# Function to convert text to audio using gTTS
def text2audio(story_text):
    """
    Converts the generated story text to audio using gTTS.
    
    Args:
        story_text (str): The generated story text.
    
    Returns:
        str: Path to the generated audio file.
    """
    try:
        # Convert text to speech using gTTS
        tts = gTTS(text=story_text, lang='en')
        audio_file = "story_audio.mp3"  # Define the output audio file name
        tts.save(audio_file)  # Save the audio file
        return audio_file
    except Exception as e:
        st.error(f"Error generating audio: {e}")  # Display error message if something goes wrong
        return None

# Main application function
def main():
    """
    Main function to run the Streamlit application.
    """
    # Configure the Streamlit app page
    st.set_page_config(page_title="Picture Stories 🎨📖", page_icon="🦄")
    st.title("Picture Stories 🎨📖")
    st.markdown("### Turn your pictures into fun superhero stories and listen to them! 🎉")

    # Instructions for kids
    st.markdown("""
    **How to use this app:**
    1. **Upload a picture** of something fun, like your favorite toy, your pet, or your hero.
    2. Wait for the app to **create a superhero story** from your picture.
    3. **Listen to the story** by clicking the "Play Audio" button.
    4. Enjoy your fun superhero story! 🎧
    """)

    # Upload image
    uploaded_file = st.file_uploader("📷 **Upload your picture here!**", type=["jpg", "jpeg", "png"])

    # Initialize session state variables
    if 'scenario' not in st.session_state:
        st.session_state.scenario = None
    if 'story' not in st.session_state:
        st.session_state.story = None
    if 'audio_file' not in st.session_state:
        st.session_state.audio_file = None

    if uploaded_file is not None:
        # Save the uploaded file to disk
        image_bytes = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(image_bytes)

        # Display the uploaded image in the app
        st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)

        # Stage 1: Image to Text
        with st.spinner('✨ Turning your picture into words...'):
            if st.session_state.scenario is None or uploaded_file.name != st.session_state.get('uploaded_file_name', None):
                st.session_state.scenario = img2text(uploaded_file.name)  # Generate text caption from the image
                st.session_state.uploaded_file_name = uploaded_file.name  # Store the uploaded file name

            if st.session_state.scenario:
                st.write("**What we see:**", st.session_state.scenario)  # Display the generated caption

                # Stage 2: Text to Story
                with st.spinner('📖 Creating a fun superhero story for you...'):
                    if st.session_state.story is None or uploaded_file.name != st.session_state.uploaded_file_name:
                        st.session_state.story = text2story(st.session_state.scenario)  # Generate a superhero story from the caption

                    if st.session_state.story:
                        st.write("**Your superhero story:**", st.session_state.story)  # Display the generated story

                        # Stage 3: Story to Audio
                        with st.spinner('🎧 Turning your story into audio...'):
                            if st.session_state.audio_file is None or uploaded_file.name != st.session_state.uploaded_file_name:
                                st.session_state.audio_file = text2audio(st.session_state.story)  # Generate audio file

                            # Play button for the generated audio
                            if st.button("🎵 **Play Audio**"):
                                if os.path.exists(st.session_state.audio_file):
                                    st.audio(st.session_state.audio_file, format="audio/mp3")  # Play the audio
                                else:
                                    st.error("Audio file not found. Please try again.")  # Display error if audio file is missing

        # Clean up temporary files (uploaded image)
        if os.path.exists(uploaded_file.name):
            os.remove(uploaded_file.name)  # Delete the uploaded image file

# Run the application
if __name__ == "__main__":
    main()