Spaces:

smtsead
/

Assignment1

Sleeping

App Files Files Community

smtsead commited on Mar 7

Commit

4e17b21

verified ·

1 Parent(s): e619a81

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -1,60 +1,71 @@
 # import part
 import streamlit as st
 from transformers import pipeline
 # function part
 # img2text
 def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(url)[0]["generated_text"]
-    return text
 # text2story
 def text2story(text):
-    story_text = ""   # to be completed
-    return story_text
 # text2audio
 def text2audio(story_text):
-    audio_data = ""     # to be completed
-    return audio_data
 # main part
-st.set_page_config(page_title="Your Image to Audio Story",
-                   page_icon="🦜")
 st.header("Turn Your Image to Audio Story")
-uploaded_file = st.file_uploader("Select an Image...")
 if uploaded_file is not None:
-    print(uploaded_file)
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
-    st.image(uploaded_file, caption="Uploaded Image",
-             use_container_width=True)
-    #Stage 1: Image to Text
     st.text('Processing img2text...')
     scenario = img2text(uploaded_file.name)
-    st.write(scenario)
-    #Stage 2: Text to Story
-    st.text('Generating a story...')
-    #story = text2story(scenario)
-    #st.write(story)
-    #Stage 3: Story to Audio data
-    #st.text('Generating audio data...')
-    #audio_data =text2audio(story)
-    # Play button
-    if st.button("Play Audio"):
-        #st.audio(audio_data['audio'],
-        #            format="audio/wav",
-        #            start_time=0,
-        #            sample_rate = audio_data['sampling_rate'])
-        st.audio("kids_playing_audio.wav")

 # import part
 import streamlit as st
 from transformers import pipeline
+from gtts import gTTS
+import os
 # function part
 # img2text
 def img2text(url):
+    try:
+        image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+        text = image_to_text_model(url)[0]["generated_text"]
+        return text
+    except Exception as e:
+        st.error(f"Error in image to text conversion: {e}")
+        return None
 # text2story
 def text2story(text):
+    try:
+        story_generator = pipeline("text-generation", model="gpt2")
+        story = story_generator(text, max_length=100, num_return_sequences=1)[0]["generated_text"]
+        return story
+    except Exception as e:
+        st.error(f"Error in story generation: {e}")
+        return None
 # text2audio
 def text2audio(story_text):
+    try:
+        tts = gTTS(text=story_text, lang='en')
+        audio_file = "story_audio.mp3"
+        tts.save(audio_file)
+        return audio_file
+    except Exception as e:
+        st.error(f"Error in text to audio conversion: {e}")
+        return None
 # main part
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 st.header("Turn Your Image to Audio Story")
+uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
+    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+    # Stage 1: Image to Text
     st.text('Processing img2text...')
     scenario = img2text(uploaded_file.name)
+    if scenario:
+        st.write("Image Caption:", scenario)
+        # Stage 2: Text to Story
+        st.text('Generating a story...')
+        story = text2story(scenario)
+        if story:
+            st.write("Generated Story:", story)
+            # Stage 3: Story to Audio data
+            st.text('Generating audio data...')
+            audio_file = text2audio(story)
+            if audio_file:
+                # Play button
+                if st.button("Play Audio"):
+                    st.audio(audio_file, format="audio/mp3")
+                # Clean up the audio file after playing
+                os.remove(audio_file)