Spaces:
Sleeping
Sleeping
File size: 2,109 Bytes
478e2ea dc29be0 478e2ea dc29be0 224b704 dc29be0 478e2ea dc29be0 224b704 dc29be0 478e2ea dc29be0 478e2ea 224b704 478e2ea 224b704 478e2ea 224b704 478e2ea 224b704 478e2ea dc29be0 478e2ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from gtts import gTTS
import io
from PIL import Image
# Load the image captioning model
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Load the text generation model
text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
def generate_caption(image):
# Generate the caption for the uploaded image
caption = caption_model(image)[0]["generated_text"]
return caption
def generate_story(caption):
# Generate the story based on the caption
input_ids = tokenizer.encode(caption, return_tensors="pt")
output = text_generation_model.generate(input_ids, max_length=100, num_return_sequences=1)
story = tokenizer.decode(output[0], skip_special_tokens=True)
return story
def convert_to_audio(story):
# Convert the story to audio using gTTS
tts = gTTS(text=story, lang="en")
audio_bytes = io.BytesIO()
tts.write_to_fp(audio_bytes)
audio_bytes.seek(0)
return audio_bytes
def main():
st.title("Storytelling Application")
# File uploader for the image (restricted to JPG)
uploaded_image = st.file_uploader("Upload an image", type=["jpg"])
if uploaded_image is not None:
# Convert the uploaded image to PIL image
image = Image.open(uploaded_image)
# Display the uploaded image
st.image(image, caption="Uploaded Image", use_column_width=True)
# Generate the caption for the image
caption = generate_caption(image)
st.subheader("Generated Caption:")
st.write(caption)
# Generate the story based on the caption
story = generate_story(caption)
st.subheader("Generated Story:")
st.write(story)
# Convert the story to audio
audio_bytes = convert_to_audio(story)
# Display the audio player
st.audio(audio_bytes, format="audio/mp3")
if __name__ == "__main__":
main() |