File size: 2,648 Bytes
4cc910e 221ba92 fc48446 4fcc6b4 221ba92 4fcc6b4 fc48446 221ba92 fc48446 4fcc6b4 221ba92 4fcc6b4 221ba92 4fcc6b4 fc48446 4fcc6b4 221ba92 4fcc6b4 221ba92 4fcc6b4 221ba92 4fcc6b4 221ba92 4fcc6b4 221ba92 4fcc6b4 fc48446 221ba92 df4ad39 221ba92 fc48446 221ba92 fc48446 4fcc6b4 fc48446 221ba92 4fcc6b4 fc48446 221ba92 4fcc6b4 221ba92 df4ad39 221ba92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import io
import os
# function part
# img2text
def img2text(url):
try:
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
except Exception as e:
st.error(f"图像转文本出错: {e}")
return None
# text2story
def text2story(text):
try:
story_generator = pipeline("text-generation", model="Qwen/QwQ-32B-Preview")
# 生成故事文本
result = story_generator(text, max_length=200, num_return_sequences=1)
story = result[0]['generated_text']
return story
except Exception as e:
st.error(f"文本生成故事出错: {e}")
return None
# text2audio
def text2audio(story_text):
try:
# 创建 gTTS 对象,将文本转换为语音
tts = gTTS(text=story_text, lang='en')
# 创建字节流对象用于存储音频数据
audio_file = io.BytesIO()
# 将音频数据写入字节流
tts.write_to_fp(audio_file)
# 移动文件指针到开头
audio_file.seek(0)
return audio_file
except Exception as e:
st.error(f"文本转音频出错: {e}")
return None
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
# 使用临时文件存储上传的图像
temp_file_path = "temp_image.jpg"
bytes_data = uploaded_file.getvalue()
with open(temp_file_path, "wb") as file:
file.write(bytes_data)
# 显示上传的图像,使用 use_container_width 避免弃用警告
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(temp_file_path)
if scenario:
st.write(scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
if story:
st.write(story)
# Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_data = text2audio(story)
if audio_data:
# Play button
if st.button("Play Audio"):
st.audio(audio_data, format="audio/mpeg", start_time=0)
# 删除临时文件
if os.path.exists(temp_file_path):
os.remove(temp_file_path) |