smtsead commited on
Commit
a107515
Β·
verified Β·
1 Parent(s): 07aa0f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -61
app.py CHANGED
@@ -4,59 +4,59 @@ from transformers import pipeline
4
  from gtts import gTTS
5
  import os
6
 
7
- # Function to convert image to text
8
  def img2text(url):
 
 
 
 
 
 
 
 
 
9
  image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
  text = image_to_text_model(url)[0]["generated_text"]
11
-
12
- # Remove unwanted words like "illustration"
13
- unwanted_words = ["illustration", "painting", "drawing", "artwork"]
14
- for word in unwanted_words:
15
- text = text.replace(word, "")
16
- return text.strip()
17
 
18
- # Function to generate a story from text using T5
19
  def text2story(text):
20
- # Use the T5 model for text generation
21
- text_generator = pipeline("text2text-generation", model="t5-small")
22
-
23
- # Add a strong prompt to guide the model
24
- prompt = f"Write a short, happy, and fun story for kids aged 3-10. " \
25
- f"The story should be about: {text}. " \
26
- "Make it cheerful, imaginative, and suitable for young children. " \
27
- "Avoid any scary or sad elements. The story should be under 95 words."
28
-
29
- # Generate the story
30
- story = text_generator(prompt, max_length=95, num_return_sequences=1)[0]["generated_text"]
31
 
32
- # Clean up the output to remove the prompt
33
- story = story.replace(prompt, "").strip()
34
 
35
- # Ensure the story is under 95 words
36
- words = story.split()
37
- if len(words) > 95:
38
- story = " ".join(words[:95])
 
39
 
 
 
40
  return story
41
 
42
  # Function to convert text to audio using gTTS
43
  def text2audio(story_text):
44
- audio_file = os.path.abspath("kids_playing_audio.wav")
45
- tts = gTTS(story_text, lang="en")
 
 
 
 
 
 
 
 
 
46
  tts.save(audio_file)
47
  return audio_file
48
 
49
  # Main application
50
- st.set_page_config(page_title="Image to Story", page_icon="πŸ“–")
51
- st.header("πŸ“– Image to Story")
52
- st.markdown("### Turn your image into a fun story!")
53
-
54
- # Initialize session state
55
- if "story" not in st.session_state:
56
- st.session_state.story = None
57
- if "audio_file" not in st.session_state:
58
- st.session_state.audio_file = None
59
-
60
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
61
 
62
  if uploaded_file is not None:
@@ -65,36 +65,26 @@ if uploaded_file is not None:
65
  with open(uploaded_file.name, "wb") as file:
66
  file.write(bytes_data)
67
 
68
- st.image(uploaded_file, caption="Your Uploaded Image", use_column_width=True)
 
69
 
70
  # Stage 1: Image to Text
71
- st.text('πŸ–ΌοΈ Processing image...')
72
  scenario = img2text(uploaded_file.name)
73
- st.write("**What I see:**", scenario)
74
 
75
  # Stage 2: Text to Story
76
- if st.session_state.story is None or st.button("πŸ”„ Generate New Story"):
77
- st.text('πŸ“ Creating a story...')
78
- st.session_state.story = text2story(scenario)
79
- st.write("**Your Story:**", st.session_state.story)
80
 
81
- # Stage 3: Story to Audio
82
- st.text('πŸŽ™οΈ Turning your story into audio...')
83
- st.session_state.audio_file = text2audio(st.session_state.story)
84
 
85
- # Play button for audio
86
- if st.session_state.audio_file and st.button("🎧 Listen to the Story"):
87
- if os.path.exists(st.session_state.audio_file):
88
- st.audio(st.session_state.audio_file, format="audio/wav")
89
- else:
90
- st.error("Audio file not found. Please generate the story again.")
91
 
92
  # Clean up the generated audio file
93
- if st.session_state.audio_file and os.path.exists(st.session_state.audio_file):
94
- os.remove(st.session_state.audio_file)
95
-
96
- # Add some fun prompts for kids
97
- st.markdown("### 🎨 Tips for a Great Story!")
98
- st.write("1. Upload a picture of your favorite animal, place, or toy!")
99
- st.write("2. Imagine what's happening in the picture and let the story begin!")
100
- st.write("3. Listen to your story and share it with your friends!")
 
4
  from gtts import gTTS
5
  import os
6
 
7
+ # Function to convert image to text using Hugging Face's BLIP model
8
  def img2text(url):
9
+ """
10
+ Converts an image to text using the Salesforce/blip-image-captioning-base model.
11
+
12
+ Args:
13
+ url (str): Path to the image file.
14
+
15
+ Returns:
16
+ str: Generated text caption from the image.
17
+ """
18
  image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
19
  text = image_to_text_model(url)[0]["generated_text"]
20
+ return text
 
 
 
 
 
21
 
22
+ # Function to generate a story from the text using T5-small model
23
  def text2story(text):
24
+ """
25
+ Expands the text caption into a full story using the T5-small model.
 
 
 
 
 
 
 
 
 
26
 
27
+ Args:
28
+ text (str): Text caption generated from the image.
29
 
30
+ Returns:
31
+ str: Generated story suitable for kids aged 3-10.
32
+ """
33
+ # Prompt to ensure the story is happy and suitable for kids
34
+ prompt = f"Write a short, happy, and fun story for kids aged 3-10. The story should be about: {text}. Make it cheerful, imaginative, and suitable for young children. Avoid any scary or sad elements. The story should be under 95 words."
35
 
36
+ text_generator = pipeline("text2text-generation", model="t5-small")
37
+ story = text_generator(prompt, max_length=100)[0]['generated_text']
38
  return story
39
 
40
  # Function to convert text to audio using gTTS
41
  def text2audio(story_text):
42
+ """
43
+ Converts the generated story text into audio using gTTS.
44
+
45
+ Args:
46
+ story_text (str): The generated story text.
47
+
48
+ Returns:
49
+ str: Path to the generated audio file.
50
+ """
51
+ tts = gTTS(text=story_text, lang='en')
52
+ audio_file = "story_audio.mp3"
53
  tts.save(audio_file)
54
  return audio_file
55
 
56
  # Main application
57
+ st.set_page_config(page_title="Your Image to Audio Story",
58
+ page_icon="🦜")
59
+ st.header("Turn Your Image into a Fun Audio Story!")
 
 
 
 
 
 
 
60
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
61
 
62
  if uploaded_file is not None:
 
65
  with open(uploaded_file.name, "wb") as file:
66
  file.write(bytes_data)
67
 
68
+ # Display the uploaded image
69
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
70
 
71
  # Stage 1: Image to Text
72
+ st.text('Processing image to text...')
73
  scenario = img2text(uploaded_file.name)
74
+ st.write("**Caption:**", scenario)
75
 
76
  # Stage 2: Text to Story
77
+ st.text('Generating a fun story...')
78
+ story = text2story(scenario)
79
+ st.write("**Story:**", story)
 
80
 
81
+ # Stage 3: Story to Audio
82
+ st.text('Converting story to audio...')
83
+ audio_file = text2audio(story)
84
 
85
+ # Play button for the generated audio
86
+ if st.button("Play Audio"):
87
+ st.audio(audio_file, format="audio/mp3")
 
 
 
88
 
89
  # Clean up the generated audio file
90
+ os.remove(audio_file)