joey1101 commited on
Commit
26d92c7
·
verified ·
1 Parent(s): a5258a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -42
app.py CHANGED
@@ -1,57 +1,56 @@
1
- # import part
2
- import streamlit as st
3
- from transformers import pipeline
4
- from PIL import Image # Import PIL to handle image files
5
 
6
- # function part
7
- # img2text
8
  def img2text(image):
9
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- text = image_to_text_model(image)[0]["generated_text"]
11
- return text
12
-
13
- # text2story
14
- def text2story(text):
15
- story_model = pipeline("text-generation", model="distilbert/distilgpt2")
16
- if isinstance(text, list):
17
- text = " ".join(text)
18
- story_text = story_model(text, max_length=100, num_return_sequences=1)
19
- return story_text[0]["generated_text"]
20
-
21
- # text2audio
 
 
22
  def text2audio(story_text):
 
23
  text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
 
24
  audio_data = text_to_audio_model(story_text)
25
- return audio_data
26
 
27
- # main part
28
- st.set_page_config(page_title="Your Image to Audio Story",
29
- page_icon="🦜")
30
- st.header("Turn Your Image to Audio Story")
31
- uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
32
 
33
  if uploaded_file is not None:
34
  # Open and read the uploaded image
35
- image = Image.open(uploaded_file) # Use PIL to open the image
36
- st.image(image, caption="Uploaded Image", use_container_width=True)
37
 
38
  # Stage 1: Image to Text
39
- st.text('Processing img2text...')
40
- scenario = img2text(image) # Pass the image object instead of file name
41
- st.write(scenario)
42
 
43
  # Stage 2: Text to Story
44
- st.text('Generating a story...')
45
- story = text2story(scenario)
46
- st.write(story)
47
 
48
  # Stage 3: Story to Audio data
49
- st.text('Generating audio data...')
50
- audio_data = text2audio(story)
51
-
52
- # Play button
53
- if st.button("Play Audio"):
54
- st.audio(audio_data['audio'],
55
- format="audio/wav",
56
- start_time=0,
57
- sample_rate=audio_data['sampling_rate'])
 
1
+ # Import necessary libraries
2
+ import streamlit as st # Streamlit for building the web application
3
+ from transformers import pipeline # Hugging Face Transformers pipeline for models
4
+ from PIL import Image # PIL for handling image files
5
 
6
+ # Function to convert image to text
 
7
  def img2text(image):
8
+ # Load the image captioning model
9
+ image_to_text_model = pipeline("image-captioning", model="Salesforce/blip-image-captioning-base")
10
+ # Generate a caption for the image
11
+ text = image_to_text_model(image)[0]["caption"] # Use "caption" instead of "generated_text"
12
+ return text # Return the generated caption
13
+
14
+ # Function to generate a story based on the caption
15
+ def text2story(text):
16
+ # Load the text generation model
17
+ story_model = pipeline("text-generation", model="gpt2") # Use a model better suited for storytelling
18
+ # Generate a story based on the input text
19
+ story_text = story_model(f"Once upon a time, {text}. ", max_length=150, num_return_sequences=1)
20
+ return story_text[0]["generated_text"] # Return the generated story
21
+
22
+ # Function to convert text to audio
23
  def text2audio(story_text):
24
+ # Load the text-to-speech model
25
  text_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
26
+ # Generate audio data from the story text
27
  audio_data = text_to_audio_model(story_text)
28
+ return audio_data # Return the audio data
29
 
30
+ # Main part of the application
31
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜") # Set the title and icon of the app
32
+ st.header("Turn Your Image into an Audio Story") # Header for the application
33
+ uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) # File uploader for images
 
34
 
35
  if uploaded_file is not None:
36
  # Open and read the uploaded image
37
+ image = Image.open(uploaded_file) # Use PIL to open the uploaded image
38
+ st.image(image, caption="Uploaded Image", use_container_width=True) # Display the uploaded image
39
 
40
  # Stage 1: Image to Text
41
+ st.text('Processing image to text...') # Inform the user about the processing stage
42
+ scenario = img2text(image) # Get the caption for the uploaded image
43
+ st.write(scenario) # Display the generated caption
44
 
45
  # Stage 2: Text to Story
46
+ st.text('Generating a story...') # Inform the user about the story generation stage
47
+ story = text2story(scenario) # Generate a story based on the caption
48
+ st.write(story) # Display the generated story
49
 
50
  # Stage 3: Story to Audio data
51
+ st.text('Generating audio data...') # Inform the user about the audio generation stage
52
+ audio_data = text2audio(story) # Convert the generated story into audio
53
+
54
+ # Play button for the audio
55
+ if st.button("Play Audio"): # Create a button to play the audio
56
+ st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate']) # Play the audio