Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,58 +4,32 @@ from transformers import pipeline
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
|
7 |
-
# Function to convert image to text
|
8 |
def img2text(url):
|
9 |
-
"""
|
10 |
-
Converts an image to text using the Salesforce/blip-image-captioning-base model.
|
11 |
-
|
12 |
-
Args:
|
13 |
-
url (str): Path to the image file.
|
14 |
-
|
15 |
-
Returns:
|
16 |
-
str: Generated text caption from the image.
|
17 |
-
"""
|
18 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
19 |
text = image_to_text_model(url)[0]["generated_text"]
|
20 |
return text
|
21 |
|
22 |
-
# Function to
|
23 |
def text2story(text):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
Args:
|
28 |
-
text (str): Text caption generated from the image.
|
29 |
-
|
30 |
-
Returns:
|
31 |
-
str: Generated story suitable for kids aged 3-10.
|
32 |
-
"""
|
33 |
-
# Simplified and rephrased prompt to guide the model
|
34 |
-
prompt = f"Write a short, happy story for kids about {text}. The story should be cheerful, imaginative, and under 95 words."
|
35 |
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
story = story.replace(prompt, "").strip()
|
42 |
|
43 |
return story
|
44 |
|
45 |
-
# Function to convert text to audio
|
46 |
def text2audio(story_text):
|
47 |
-
|
48 |
-
Converts the generated story text into audio using gTTS.
|
49 |
-
|
50 |
-
Args:
|
51 |
-
story_text (str): The generated story text.
|
52 |
-
|
53 |
-
Returns:
|
54 |
-
str: Path to the generated audio file.
|
55 |
-
"""
|
56 |
tts = gTTS(text=story_text, lang='en')
|
57 |
audio_file = "story_audio.mp3"
|
58 |
tts.save(audio_file)
|
|
|
59 |
return audio_file
|
60 |
|
61 |
# Main application
|
@@ -76,10 +50,10 @@ if uploaded_file is not None:
|
|
76 |
# Stage 1: Image to Text
|
77 |
st.text('Processing image to text...')
|
78 |
scenario = img2text(uploaded_file.name)
|
79 |
-
st.write("**
|
80 |
|
81 |
# Stage 2: Text to Story
|
82 |
-
st.text('Generating a fun story...')
|
83 |
story = text2story(scenario)
|
84 |
st.write("**Story:**", story)
|
85 |
|
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
|
7 |
+
# Function to convert image to text
|
8 |
def img2text(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
10 |
text = image_to_text_model(url)[0]["generated_text"]
|
11 |
return text
|
12 |
|
13 |
+
# Function to convert text to a kid-friendly story
|
14 |
def text2story(text):
|
15 |
+
# Initialize the text generation pipeline
|
16 |
+
text_generator = pipeline("text-generation", model="distilbert/distilgpt2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Create a prompt to ensure the story is kid-friendly and happy
|
19 |
+
prompt = f"Write a happy and fun story for kids aged 3-10 based on the following scenario: {text}. The story should be suitable for children and should not contain any magical elements. Keep the story under 95 words."
|
20 |
|
21 |
+
# Generate the story
|
22 |
+
story = text_generator(prompt, max_length=95, num_return_sequences=1)[0]['generated_text']
|
|
|
23 |
|
24 |
return story
|
25 |
|
26 |
+
# Function to convert text to audio
|
27 |
def text2audio(story_text):
|
28 |
+
# Convert the story text to audio using gTTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
tts = gTTS(text=story_text, lang='en')
|
30 |
audio_file = "story_audio.mp3"
|
31 |
tts.save(audio_file)
|
32 |
+
|
33 |
return audio_file
|
34 |
|
35 |
# Main application
|
|
|
50 |
# Stage 1: Image to Text
|
51 |
st.text('Processing image to text...')
|
52 |
scenario = img2text(uploaded_file.name)
|
53 |
+
st.write("**Scenario:**", scenario)
|
54 |
|
55 |
# Stage 2: Text to Story
|
56 |
+
st.text('Generating a fun story for kids...')
|
57 |
story = text2story(scenario)
|
58 |
st.write("**Story:**", story)
|
59 |
|