Spaces:
Sleeping
Sleeping
File size: 1,817 Bytes
8d2615a 5866ba2 67f1091 85a0c1a 5866ba2 8d2615a 5866ba2 67f1091 5866ba2 8d2615a 5866ba2 85a0c1a 67f1091 5866ba2 8d2615a 5866ba2 67f1091 85a0c1a 67f1091 5866ba2 8d2615a 67f1091 5866ba2 67f1091 8d2615a 67f1091 5866ba2 67f1091 5866ba2 67f1091 5866ba2 67f1091 5866ba2 67f1091 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# import part
import streamlit as st
from transformers import pipeline
import soundfile as sf
import numpy as np
import tempfile
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
story_text_model = pipeline("text-generation", model="google/gemma-2-9b-it")
story = story_text_model(text, max_length=150)[0]['generated_text']
return story
# text2audio
def text2audio(story_text):
tts_model = pipeline("text-to-speech", model="tts_models/en/ljspeech/tacotron2")
audio_data = tts_model(story_text)
# Save audio to a temporary file
audio_filename = tempfile.mktemp(suffix=".wav")
sf.write(audio_filename, audio_data['audio'], audio_data['sampling_rate'])
return audio_filename
# main part
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(uploaded_file.name)
st.write(scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
st.write(story)
# Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_filename = text2audio(story)
# Play button
if st.button("Play Audio"):
st.audio(audio_filename, format="audio/wav") |