Spaces:
Sleeping
Sleeping
File size: 2,006 Bytes
8d2615a 5866ba2 c8359bd 5866ba2 8d2615a 5866ba2 c8359bd 5866ba2 8d2615a 5866ba2 c8359bd 5866ba2 8d2615a 5866ba2 c8359bd 5866ba2 8d2615a c8359bd 5866ba2 c8359bd 5866ba2 c8359bd 8d2615a 67f1091 5866ba2 67f1091 5866ba2 67f1091 5866ba2 c8359bd 5866ba2 c8359bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# import part
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import io
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text",
model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
story_pipeline = pipeline("text-generation", model="facebook/opt-125m")
# 调用 pipeline 生成故事文本
result = story_pipeline(text, max_length=200, num_return_sequences=1)
story_text = result[0]['generated_text']
return story_text
# text2audio
def text2audio(story_text):
# 使用 gTTS 库将文本转换为音频
tts = gTTS(text=story_text, lang='en')
# 创建一个内存中的字节流对象,用于存储音频数据
audio_file = io.BytesIO()
# 将音频数据写入字节流
tts.write_to_fp(audio_file)
# 将文件指针移动到文件开头,以便后续读取
audio_file.seek(0)
return audio_file
# main part
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image",
use_column_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(uploaded_file.name)
st.write(scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
st.write(story)
# Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_data = text2audio(story)
# Play button
if st.button("Play Audio"):
st.audio(audio_data, format="audio/mpeg") |