Spaces:

smtsead
/

Assignment1

Sleeping

App Files Files Community

smtsead commited on Mar 7

Commit

a107515

verified ·

1 Parent(s): 07aa0f1

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -61

app.py CHANGED Viewed

@@ -4,59 +4,59 @@ from transformers import pipeline
 from gtts import gTTS
 import os
-# Function to convert image to text
 def img2text(url):
     image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
-    # Remove unwanted words like "illustration"
-    unwanted_words = ["illustration", "painting", "drawing", "artwork"]
-    for word in unwanted_words:
-        text = text.replace(word, "")
-    return text.strip()
-# Function to generate a story from text using T5
 def text2story(text):
-    # Use the T5 model for text generation
-    text_generator = pipeline("text2text-generation", model="t5-small")
-    # Add a strong prompt to guide the model
-    prompt = f"Write a short, happy, and fun story for kids aged 3-10. " \
-             f"The story should be about: {text}. " \
-             "Make it cheerful, imaginative, and suitable for young children. " \
-             "Avoid any scary or sad elements. The story should be under 95 words."
-    # Generate the story
-    story = text_generator(prompt, max_length=95, num_return_sequences=1)[0]["generated_text"]
-    # Clean up the output to remove the prompt
-    story = story.replace(prompt, "").strip()
-    # Ensure the story is under 95 words
-    words = story.split()
-    if len(words) > 95:
-        story = " ".join(words[:95])
     return story
 # Function to convert text to audio using gTTS
 def text2audio(story_text):
-    audio_file = os.path.abspath("kids_playing_audio.wav")
-    tts = gTTS(story_text, lang="en")
     tts.save(audio_file)
     return audio_file
 # Main application
-st.set_page_config(page_title="Image to Story", page_icon="📖")
-st.header("📖 Image to Story")
-st.markdown("### Turn your image into a fun story!")
-# Initialize session state
-if "story" not in st.session_state:
-    st.session_state.story = None
-if "audio_file" not in st.session_state:
-    st.session_state.audio_file = None
 uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
@@ -65,36 +65,26 @@ if uploaded_file is not None:
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
-    st.image(uploaded_file, caption="Your Uploaded Image", use_column_width=True)
     # Stage 1: Image to Text
-    st.text('🖼️ Processing image...')
     scenario = img2text(uploaded_file.name)
-    st.write("**What I see:**", scenario)
     # Stage 2: Text to Story
-    if st.session_state.story is None or st.button("🔄 Generate New Story"):
-        st.text('📝 Creating a story...')
-        st.session_state.story = text2story(scenario)
-        st.write("**Your Story:**", st.session_state.story)
-        # Stage 3: Story to Audio
-        st.text('🎙️ Turning your story into audio...')
-        st.session_state.audio_file = text2audio(st.session_state.story)
-    # Play button for audio
-    if st.session_state.audio_file and st.button("🎧 Listen to the Story"):
-        if os.path.exists(st.session_state.audio_file):
-            st.audio(st.session_state.audio_file, format="audio/wav")
-        else:
-            st.error("Audio file not found. Please generate the story again.")
     # Clean up the generated audio file
-    if st.session_state.audio_file and os.path.exists(st.session_state.audio_file):
-        os.remove(st.session_state.audio_file)
-# Add some fun prompts for kids
-st.markdown("### 🎨 Tips for a Great Story!")
-st.write("1. Upload a picture of your favorite animal, place, or toy!")
-st.write("2. Imagine what's happening in the picture and let the story begin!")
-st.write("3. Listen to your story and share it with your friends!")

 from gtts import gTTS
 import os
+# Function to convert image to text using Hugging Face's BLIP model
 def img2text(url):
+    """
+    Converts an image to text using the Salesforce/blip-image-captioning-base model.
+    Args:
+        url (str): Path to the image file.
+    Returns:
+        str: Generated text caption from the image.
+    """
     image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
+    return text
+# Function to generate a story from the text using T5-small model
 def text2story(text):
+    """
+    Expands the text caption into a full story using the T5-small model.
+    Args:
+        text (str): Text caption generated from the image.
+    Returns:
+        str: Generated story suitable for kids aged 3-10.
+    """
+    # Prompt to ensure the story is happy and suitable for kids
+    prompt = f"Write a short, happy, and fun story for kids aged 3-10. The story should be about: {text}. Make it cheerful, imaginative, and suitable for young children. Avoid any scary or sad elements. The story should be under 95 words."
+    text_generator = pipeline("text2text-generation", model="t5-small")
+    story = text_generator(prompt, max_length=100)[0]['generated_text']
     return story
 # Function to convert text to audio using gTTS
 def text2audio(story_text):
+    """
+    Converts the generated story text into audio using gTTS.
+    Args:
+        story_text (str): The generated story text.
+    Returns:
+        str: Path to the generated audio file.
+    """
+    tts = gTTS(text=story_text, lang='en')
+    audio_file = "story_audio.mp3"
     tts.save(audio_file)
     return audio_file
 # Main application
+st.set_page_config(page_title="Your Image to Audio Story",
+                   page_icon="🦜")
+st.header("Turn Your Image into a Fun Audio Story!")
 uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
+    # Display the uploaded image
+    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
     # Stage 1: Image to Text
+    st.text('Processing image to text...')
     scenario = img2text(uploaded_file.name)
+    st.write("**Caption:**", scenario)
     # Stage 2: Text to Story
+    st.text('Generating a fun story...')
+    story = text2story(scenario)
+    st.write("**Story:**", story)
+    # Stage 3: Story to Audio
+    st.text('Converting story to audio...')
+    audio_file = text2audio(story)
+    # Play button for the generated audio
+    if st.button("Play Audio"):
+        st.audio(audio_file, format="audio/mp3")
     # Clean up the generated audio file
+    os.remove(audio_file)