smtsead commited on
Commit
4e17b21
·
verified ·
1 Parent(s): e619a81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -1,60 +1,71 @@
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
-
 
5
 
6
  # function part
7
  # img2text
8
  def img2text(url):
9
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- text = image_to_text_model(url)[0]["generated_text"]
11
- return text
 
 
 
 
12
 
13
  # text2story
14
  def text2story(text):
15
- story_text = "" # to be completed
16
- return story_text
 
 
 
 
 
17
 
18
  # text2audio
19
  def text2audio(story_text):
20
- audio_data = "" # to be completed
21
- return audio_data
 
 
 
 
 
 
22
 
23
  # main part
24
-
25
- st.set_page_config(page_title="Your Image to Audio Story",
26
- page_icon="🦜")
27
  st.header("Turn Your Image to Audio Story")
28
- uploaded_file = st.file_uploader("Select an Image...")
29
-
30
 
31
  if uploaded_file is not None:
32
- print(uploaded_file)
33
  bytes_data = uploaded_file.getvalue()
34
  with open(uploaded_file.name, "wb") as file:
35
  file.write(bytes_data)
36
-
37
- st.image(uploaded_file, caption="Uploaded Image",
38
- use_container_width=True)
39
 
40
- #Stage 1: Image to Text
 
 
41
  st.text('Processing img2text...')
42
  scenario = img2text(uploaded_file.name)
43
- st.write(scenario)
44
-
45
- #Stage 2: Text to Story
46
- st.text('Generating a story...')
47
- #story = text2story(scenario)
48
- #st.write(story)
49
-
50
- #Stage 3: Story to Audio data
51
- #st.text('Generating audio data...')
52
- #audio_data =text2audio(story)
53
-
54
- # Play button
55
- if st.button("Play Audio"):
56
- #st.audio(audio_data['audio'],
57
- # format="audio/wav",
58
- # start_time=0,
59
- # sample_rate = audio_data['sampling_rate'])
60
- st.audio("kids_playing_audio.wav")
 
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
+ from gtts import gTTS
5
+ import os
6
 
7
  # function part
8
  # img2text
9
  def img2text(url):
10
+ try:
11
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
12
+ text = image_to_text_model(url)[0]["generated_text"]
13
+ return text
14
+ except Exception as e:
15
+ st.error(f"Error in image to text conversion: {e}")
16
+ return None
17
 
18
  # text2story
19
  def text2story(text):
20
+ try:
21
+ story_generator = pipeline("text-generation", model="gpt2")
22
+ story = story_generator(text, max_length=100, num_return_sequences=1)[0]["generated_text"]
23
+ return story
24
+ except Exception as e:
25
+ st.error(f"Error in story generation: {e}")
26
+ return None
27
 
28
  # text2audio
29
  def text2audio(story_text):
30
+ try:
31
+ tts = gTTS(text=story_text, lang='en')
32
+ audio_file = "story_audio.mp3"
33
+ tts.save(audio_file)
34
+ return audio_file
35
+ except Exception as e:
36
+ st.error(f"Error in text to audio conversion: {e}")
37
+ return None
38
 
39
  # main part
40
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 
 
41
  st.header("Turn Your Image to Audio Story")
42
+ uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
 
43
 
44
  if uploaded_file is not None:
 
45
  bytes_data = uploaded_file.getvalue()
46
  with open(uploaded_file.name, "wb") as file:
47
  file.write(bytes_data)
 
 
 
48
 
49
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
50
+
51
+ # Stage 1: Image to Text
52
  st.text('Processing img2text...')
53
  scenario = img2text(uploaded_file.name)
54
+ if scenario:
55
+ st.write("Image Caption:", scenario)
56
+
57
+ # Stage 2: Text to Story
58
+ st.text('Generating a story...')
59
+ story = text2story(scenario)
60
+ if story:
61
+ st.write("Generated Story:", story)
62
+
63
+ # Stage 3: Story to Audio data
64
+ st.text('Generating audio data...')
65
+ audio_file = text2audio(story)
66
+ if audio_file:
67
+ # Play button
68
+ if st.button("Play Audio"):
69
+ st.audio(audio_file, format="audio/mp3")
70
+ # Clean up the audio file after playing
71
+ os.remove(audio_file)