Spaces:

koey811
/

assignment1

Sleeping

File size: 2,109 Bytes

478e2ea
dc29be0
478e2ea
dc29be0
224b704
dc29be0
 
 
 
 
 
 
478e2ea
 
 
 
 
 
 
 
dc29be0
224b704
dc29be0
478e2ea
 
 
 
 
dc29be0
 
 
 
478e2ea
 
 
 
224b704
 
478e2ea
 
224b704
 
 
478e2ea
224b704
478e2ea
 
224b704
478e2ea
 
 
 
 
 
 
 
 
dc29be0
478e2ea

import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from gtts import gTTS
import io
from PIL import Image

# Load the image captioning model
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Load the text generation model
text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

def generate_caption(image):
    # Generate the caption for the uploaded image
    caption = caption_model(image)[0]["generated_text"]
    return caption

def generate_story(caption):
    # Generate the story based on the caption
    input_ids = tokenizer.encode(caption, return_tensors="pt")
    output = text_generation_model.generate(input_ids, max_length=100, num_return_sequences=1)
    story = tokenizer.decode(output[0], skip_special_tokens=True)
    return story

def convert_to_audio(story):
    # Convert the story to audio using gTTS
    tts = gTTS(text=story, lang="en")
    audio_bytes = io.BytesIO()
    tts.write_to_fp(audio_bytes)
    audio_bytes.seek(0)
    return audio_bytes

def main():
    st.title("Storytelling Application")
    
    # File uploader for the image (restricted to JPG)
    uploaded_image = st.file_uploader("Upload an image", type=["jpg"])
    
    if uploaded_image is not None:
        # Convert the uploaded image to PIL image
        image = Image.open(uploaded_image)
        
        # Display the uploaded image
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        # Generate the caption for the image
        caption = generate_caption(image)
        st.subheader("Generated Caption:")
        st.write(caption)
        
        # Generate the story based on the caption
        story = generate_story(caption)
        st.subheader("Generated Story:")
        st.write(story)
        
        # Convert the story to audio
        audio_bytes = convert_to_audio(story)
        
        # Display the audio player
        st.audio(audio_bytes, format="audio/mp3")

if __name__ == "__main__":
    main()