Spaces:
Sleeping
Sleeping
File size: 1,933 Bytes
8d2615a 5866ba2 0dadc82 5866ba2 8d2615a 5866ba2 8d2615a 5866ba2 8d2615a 5866ba2 0dadc82 5866ba2 8d2615a 5866ba2 0dadc82 5866ba2 8d2615a 0dadc82 5866ba2 8d2615a 5866ba2 8d2615a 5866ba2 8d2615a 0dadc82 5866ba2 0dadc82 5866ba2 0dadc82 5866ba2 0dadc82 5866ba2 ef7790c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# import part
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text",
model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
story_text = "pipeline("text-generation", model="perplexity-ai/r1-1776", trust_remote_code=True)" # to be completed
return story_text
# text2audio
def text2audio(story_text):
# Convert text to audio using gTTS
tts = gTTS(story_text, lang="en")
audio_file = "story_audio.wav"
tts.save(audio_file)
return audio_file
# main part
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜") # prepare configuration
st.header("Turn Your Image to Audio Story")
# Upload image
uploaded_file = st.file_uploader("Select an Image...")
# If it is none, skip all the following things
if uploaded_file is not None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image",
use_column_width=True)
#Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(uploaded_file.name)
st.write(scenario)
#Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
st.write(story)
#Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_data =text2audio(story)
# Play button
if st.button("Play Audio"):
st.audio(audio_data['audio'],
format="audio/wav",
start_time=0,
sample_rate = audio_data['sampling_rate'])
st.audio("kids_playing_audio.wav") |