Fanny1366 commited on
Commit
c8359bd
·
verified ·
1 Parent(s): 85a0c1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -1,45 +1,51 @@
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
- import soundfile as sf
5
- import numpy as np
6
- import tempfile
7
 
8
  # function part
9
  # img2text
10
  def img2text(url):
11
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
12
  text = image_to_text_model(url)[0]["generated_text"]
13
  return text
14
 
15
  # text2story
16
  def text2story(text):
17
- story_text_model = pipeline("text-generation", model="google/gemma-2-9b-it")
18
- story = story_text_model(text, max_length=150)[0]['generated_text']
19
- return story
 
 
20
 
21
  # text2audio
22
  def text2audio(story_text):
23
- tts_model = pipeline("text-to-speech", model="tts_models/en/ljspeech/tacotron2")
24
- audio_data = tts_model(story_text)
25
-
26
- # Save audio to a temporary file
27
- audio_filename = tempfile.mktemp(suffix=".wav")
28
- sf.write(audio_filename, audio_data['audio'], audio_data['sampling_rate'])
29
-
30
- return audio_filename
 
31
 
32
  # main part
33
- st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 
34
  st.header("Turn Your Image to Audio Story")
35
  uploaded_file = st.file_uploader("Select an Image...")
36
 
37
  if uploaded_file is not None:
 
38
  bytes_data = uploaded_file.getvalue()
39
  with open(uploaded_file.name, "wb") as file:
40
  file.write(bytes_data)
41
 
42
- st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
 
43
 
44
  # Stage 1: Image to Text
45
  st.text('Processing img2text...')
@@ -53,8 +59,8 @@ if uploaded_file is not None:
53
 
54
  # Stage 3: Story to Audio data
55
  st.text('Generating audio data...')
56
- audio_filename = text2audio(story)
57
 
58
  # Play button
59
  if st.button("Play Audio"):
60
- st.audio(audio_filename, format="audio/wav")
 
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
+ from gtts import gTTS
5
+ import io
 
6
 
7
  # function part
8
  # img2text
9
  def img2text(url):
10
+ image_to_text_model = pipeline("image-to-text",
11
+ model="Salesforce/blip-image-captioning-base")
12
  text = image_to_text_model(url)[0]["generated_text"]
13
  return text
14
 
15
  # text2story
16
  def text2story(text):
17
+ story_pipeline = pipeline("text-generation", model="facebook/opt-125m")
18
+ # 调用 pipeline 生成故事文本
19
+ result = story_pipeline(text, max_length=200, num_return_sequences=1)
20
+ story_text = result[0]['generated_text']
21
+ return story_text
22
 
23
  # text2audio
24
  def text2audio(story_text):
25
+ # 使用 gTTS 库将文本转换为音频
26
+ tts = gTTS(text=story_text, lang='en')
27
+ # 创建一个内存中的字节流对象,用于存储音频数据
28
+ audio_file = io.BytesIO()
29
+ # 将音频数据写入字节流
30
+ tts.write_to_fp(audio_file)
31
+ # 将文件指针移动到文件开头,以便后续读取
32
+ audio_file.seek(0)
33
+ return audio_file
34
 
35
  # main part
36
+ st.set_page_config(page_title="Your Image to Audio Story",
37
+ page_icon="🦜")
38
  st.header("Turn Your Image to Audio Story")
39
  uploaded_file = st.file_uploader("Select an Image...")
40
 
41
  if uploaded_file is not None:
42
+ print(uploaded_file)
43
  bytes_data = uploaded_file.getvalue()
44
  with open(uploaded_file.name, "wb") as file:
45
  file.write(bytes_data)
46
 
47
+ st.image(uploaded_file, caption="Uploaded Image",
48
+ use_column_width=True)
49
 
50
  # Stage 1: Image to Text
51
  st.text('Processing img2text...')
 
59
 
60
  # Stage 3: Story to Audio data
61
  st.text('Generating audio data...')
62
+ audio_data = text2audio(story)
63
 
64
  # Play button
65
  if st.button("Play Audio"):
66
+ st.audio(audio_data, format="audio/mpeg")