Spaces:

koey811
/

assignment1

Sleeping

koey811 commited on Mar 4

Commit

dc29be0

verified ·

1 Parent(s): 1a7366d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,34 @@
 import streamlit as st
-from transformers import pipeline
 from gtts import gTTS
-import os
 def generate_caption(image):
-    # Load the image captioning model
-    caption_model = pipeline("image-to-text", model="facebook/blip-image-captioning-base")
     # Generate the caption for the uploaded image
     caption = caption_model(image)[0]["generated_text"]
     return caption
 def generate_story(caption):
-    # Load the text generation model
-    text_generation_model = pipeline("text-generation", model="gpt2")
     # Generate the story based on the caption
-    story = text_generation_model(caption, max_length=200, num_return_sequences=1)[0]["generated_text"]
     return story
 def convert_to_audio(story):
     # Convert the story to audio using gTTS
     tts = gTTS(text=story, lang="en")
-    tts.save("story_audio.mp3")
 def main():
     st.title("Storytelling Application")
@@ -47,11 +51,9 @@ def main():
         st.write(story)
         # Convert the story to audio
-        convert_to_audio(story)
         # Display the audio player
-        audio_file = open("story_audio.mp3", "rb")
-        audio_bytes = audio_file.read()
         st.audio(audio_bytes, format="audio/mp3")
 if __name__ == "__main__":

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from gtts import gTTS
+import io
+# Load the image captioning model
+caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+# Load the text generation model
+text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
 def generate_caption(image):
     # Generate the caption for the uploaded image
     caption = caption_model(image)[0]["generated_text"]
     return caption
 def generate_story(caption):
     # Generate the story based on the caption
+    input_ids = tokenizer.encode(caption, return_tensors="pt")
+    output = text_generation_model.generate(input_ids, max_length=200, num_return_sequences=1)
+    story = tokenizer.decode(output[0], skip_special_tokens=True)
     return story
 def convert_to_audio(story):
     # Convert the story to audio using gTTS
     tts = gTTS(text=story, lang="en")
+    audio_bytes = io.BytesIO()
+    tts.write_to_fp(audio_bytes)
+    audio_bytes.seek(0)
+    return audio_bytes
 def main():
     st.title("Storytelling Application")
         st.write(story)
         # Convert the story to audio
+        audio_bytes = convert_to_audio(story)
         # Display the audio player
         st.audio(audio_bytes, format="audio/mp3")
 if __name__ == "__main__":