Spaces:

joey1101
/

storytelling

Build error

App Files Files Community

joey1101 commited on Mar 9

Commit

26d92c7

verified ·

1 Parent(s): a5258a2

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -42

app.py CHANGED Viewed

@@ -1,57 +1,56 @@
-# import part
-import streamlit as st
-from transformers import pipeline
-from PIL import Image  # Import PIL to handle image files
-# function part
-# img2text
 def img2text(image):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(image)[0]["generated_text"]
-    return text
-# text2story
-def text2story(text):
-    story_model = pipeline("text-generation", model="distilbert/distilgpt2")
-    if isinstance(text, list):
-        text = " ".join(text)
-    story_text = story_model(text, max_length=100, num_return_sequences=1)
-    return story_text[0]["generated_text"]
-# text2audio
 def text2audio(story_text):
     text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
     audio_data = text_to_audio_model(story_text)
-    return audio_data
-# main part
-st.set_page_config(page_title="Your Image to Audio Story",
-                   page_icon="🦜")
-st.header("Turn Your Image to Audio Story")
-uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     # Open and read the uploaded image
-    image = Image.open(uploaded_file)  # Use PIL to open the image
-    st.image(image, caption="Uploaded Image", use_container_width=True)
     # Stage 1: Image to Text
-    st.text('Processing img2text...')
-    scenario = img2text(image)  # Pass the image object instead of file name
-    st.write(scenario)
     # Stage 2: Text to Story
-    st.text('Generating a story...')
-    story = text2story(scenario)
-    st.write(story)
     # Stage 3: Story to Audio data
-    st.text('Generating audio data...')
-    audio_data = text2audio(story)
-    # Play button
-    if st.button("Play Audio"):
-        st.audio(audio_data['audio'],
-                 format="audio/wav",
-                 start_time=0,
-                 sample_rate=audio_data['sampling_rate'])

+# Import necessary libraries
+import streamlit as st  # Streamlit for building the web application
+from transformers import pipeline  # Hugging Face Transformers pipeline for models
+from PIL import Image  # PIL for handling image files
+# Function to convert image to text
 def img2text(image):
+    # Load the image captioning model
+    image_to_text_model = pipeline("image-captioning", model="Salesforce/blip-image-captioning-base")
+    # Generate a caption for the image
+    text = image_to_text_model(image)[0]["caption"]  # Use "caption" instead of "generated_text"
+    return text  # Return the generated caption
+# Function to generate a story based on the caption
+def text2story(text):
+    # Load the text generation model
+    story_model = pipeline("text-generation", model="gpt2")  # Use a model better suited for storytelling
+    # Generate a story based on the input text
+    story_text = story_model(f"Once upon a time, {text}. ", max_length=150, num_return_sequences=1)
+    return story_text[0]["generated_text"]  # Return the generated story
+# Function to convert text to audio
 def text2audio(story_text):
+    # Load the text-to-speech model
     text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+    # Generate audio data from the story text
     audio_data = text_to_audio_model(story_text)
+    return audio_data  # Return the audio data
+# Main part of the application
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")  # Set the title and icon of the app
+st.header("Turn Your Image into an Audio Story")  # Header for the application
+uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])  # File uploader for images
 if uploaded_file is not None:
     # Open and read the uploaded image
+    image = Image.open(uploaded_file)  # Use PIL to open the uploaded image
+    st.image(image, caption="Uploaded Image", use_container_width=True)  # Display the uploaded image
     # Stage 1: Image to Text
+    st.text('Processing image to text...')  # Inform the user about the processing stage
+    scenario = img2text(image)  # Get the caption for the uploaded image
+    st.write(scenario)  # Display the generated caption
     # Stage 2: Text to Story
+    st.text('Generating a story...')  # Inform the user about the story generation stage
+    story = text2story(scenario)  # Generate a story based on the caption
+    st.write(story)  # Display the generated story
     # Stage 3: Story to Audio data
+    st.text('Generating audio data...')  # Inform the user about the audio generation stage
+    audio_data = text2audio(story)  # Convert the generated story into audio
+    # Play button for the audio
+    if st.button("Play Audio"):  # Create a button to play the audio
+        st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate'])  # Play the audio