Spaces:
Sleeping
Sleeping
import os | |
import random | |
import torch | |
from kokoro import KModel, KPipeline | |
import gradio as gr | |
# App configuration | |
APP_TITLE = "✨ VoiceBloom ✨" | |
APP_SUBTITLE = "Transform wisdom into delightful speech!" | |
THEME = gr.themes.Soft( | |
primary_hue="indigo", | |
secondary_hue="purple", | |
).set( | |
body_background_fill="linear-gradient(to right top, #d16ba5, #c777b9, #ba83ca, #aa8fd8, #9a9ae1, #8aa7ec, #79b3f4, #69bff8, #52cffe, #41dfff, #46eefa, #5ffbf1)", | |
button_primary_background_fill="linear-gradient(90deg, rgba(255,124,0,1) 0%, rgba(255,194,23,1) 100%)", | |
button_primary_background_fill_hover="linear-gradient(90deg, rgba(255,194,23,1) 0%, rgba(255,124,0,1) 100%)", | |
button_secondary_background_fill="linear-gradient(90deg, rgba(144,95,255,1) 0%, rgba(110,72,220,1) 100%)", | |
button_secondary_background_fill_hover="linear-gradient(90deg, rgba(110,72,220,1) 0%, rgba(144,95,255,1) 100%)", | |
block_background_fill="rgba(255, 255, 255, 0.8)", | |
block_shadow="0px 4px 12px rgba(0, 0, 0, 0.1)", | |
block_radius="12px", | |
) | |
# Initialize TTS models without GPU | |
models = {False: KModel().to('cpu').eval()} | |
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'} | |
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO' | |
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ' | |
# Define voice options with fun emoji indicators | |
VOICES = { | |
'✨ Sarah (US) ✨': 'af_sarah', | |
'🌟 Nicole (US) 🌟': 'af_nicole', | |
'💖 Heart (US) 💖': 'af_heart', | |
'🔮 Bella (US) 🔮': 'af_bella', | |
'🌈 Aoede (US) 🌈': 'af_aoede', | |
'🎵 Michael (US) 🎵': 'am_michael', | |
'🌠 Echo (US) 🌠': 'am_echo', | |
'🧙 Fenrir (US) 🧙': 'am_fenrir', | |
'🎭 Puck (US) 🎭': 'am_puck', | |
'👑 Emma (UK) 👑': 'bf_emma', | |
'🌹 Isabella (UK) 🌹': 'bf_isabella', | |
'🎩 George (UK) 🎩': 'bm_george', | |
'✨ Fable (UK) ✨': 'bm_fable', | |
} | |
# Preload voices | |
for v in VOICES.values(): | |
pipelines[v[0]].load_voice(v) | |
# Inspirational quotes | |
QUOTES = [ | |
"When it is obvious that the goals cannot be reached, don't adjust the goals, adjust the action steps. - Confucius", | |
"The man who moves a mountain begins by carrying away small stones. - Confucius", | |
"Life is really simple, but we insist on making it complicated. - Confucius", | |
"It does not matter how slowly you go as long as you do not stop. - Confucius", | |
"Our greatest glory is not in never falling, but in rising every time we fall. - Confucius", | |
"Silence is a true friend who never betrays. - Confucius", | |
"Eating fruit daily provides essential vitamins, minerals and fiber that help maintain good health.", | |
"An apple a day keeps the doctor away - a simple habit with profound health benefits.", | |
"Fruits are nature's candy - sweet, nutritious, and vital for our wellbeing.", | |
"Regular consumption of fruits boosts your immune system and reduces risk of chronic diseases.", | |
"Colorful fruits on your plate mean a rainbow of nutrients for your body.", | |
"The wisdom of health lies in eating seasonal fruits that nature provides us.", | |
"A journey of a thousand miles begins with a single step.", | |
"Happiness is not something ready-made. It comes from your own actions.", | |
"The best time to plant a tree was 20 years ago. The second best time is now.", | |
] | |
def generate_audio(text, voice, speed): | |
"""Generate audio from text using the selected voice and speed""" | |
pipeline = pipelines[voice[0]] | |
pack = pipeline.load_voice(voice) | |
for _, ps, _ in pipeline(text, voice, speed): | |
ref_s = pack[len(ps)-1] | |
audio = models[False](ps, ref_s, speed) | |
return (24000, audio.numpy()) | |
return None | |
def stream_audio(text, voice, speed): | |
"""Stream audio for longer texts""" | |
pipeline = pipelines[voice[0]] | |
pack = pipeline.load_voice(voice) | |
for _, ps, _ in pipeline(text, voice, speed): | |
ref_s = pack[len(ps)-1] | |
audio = models[False](ps, ref_s, speed) | |
yield 24000, audio.numpy() | |
def get_random_quote(): | |
return random.choice(QUOTES) | |
def get_confucius(): | |
return """The Master said, 'Learning without thought is labor lost; thought without learning is perilous.' | |
He who exercises government by means of his virtue may be compared to the north polar star, which keeps its place and all the stars turn towards it. | |
To see what is right and not to do it is want of courage, or of principle. | |
The superior man is modest in his speech, but exceeds in his actions. | |
The journey of a thousand miles begins with a single step.""" | |
def get_fruit_wisdom(): | |
return """The regular consumption of fruits is one of the wisest habits for maintaining good health. | |
Fruits provide a natural source of vitamins, minerals, and fiber that our bodies need for optimal functioning. They contain antioxidants that help fight inflammation and reduce the risk of chronic diseases. | |
Different colored fruits offer different nutrients - oranges for vitamin C, bananas for potassium, blueberries for antioxidants, and apples for fiber. | |
Eating seasonal fruits connects us with nature's rhythm and ensures we get the freshest, most nutritious options available. | |
Remember: 'Nature's first green is gold' - and nowhere is this more true than in the vibrant colors of fruit that nourish our bodies daily.""" | |
# Build the UI | |
with gr.Blocks(theme=THEME) as app: | |
gr.Markdown(f"# {APP_TITLE}") | |
gr.Markdown(f"### {APP_SUBTITLE}") | |
with gr.Row(): | |
with gr.Column(): | |
# Input section with colorful styling | |
with gr.Group(): | |
gr.Markdown("### 📝 What would you like me to say?") | |
text_input = gr.Textbox( | |
label="", | |
placeholder="Type your text here or choose a wisdom quote below...", | |
lines=5 | |
) | |
with gr.Row(): | |
random_btn = gr.Button("🎲 Random Quote", variant="secondary") | |
confucius_btn = gr.Button("🧙 Confucius Wisdom", variant="secondary") | |
fruit_btn = gr.Button("🍎 Fruit Wisdom", variant="secondary") | |
# Voice & Speed controls | |
with gr.Group(): | |
gr.Markdown("### 🎤 Choose Your Voice") | |
voice_dropdown = gr.Dropdown( | |
list(VOICES.items()), | |
value=list(VOICES.items())[0][1], | |
label="Voice Style" | |
) | |
gr.Markdown("### ⏱️ Adjust Speaking Speed") | |
speed_slider = gr.Slider( | |
minimum=0.5, | |
maximum=2.0, | |
value=1.0, | |
step=0.1, | |
label="Speed" | |
) | |
# Output section | |
with gr.Column(): | |
with gr.Tabs(): | |
with gr.TabItem("🔊 Listen"): | |
audio_output = gr.Audio( | |
label="Your VoiceBloom Creation", | |
interactive=False, | |
autoplay=True | |
) | |
generate_btn = gr.Button("🎵 Generate Audio", variant="primary", size="lg") | |
with gr.TabItem("📻 Stream Longer Text"): | |
stream_output = gr.Audio( | |
label="Streaming Audio", | |
interactive=False, | |
streaming=True, | |
autoplay=True | |
) | |
with gr.Row(): | |
stream_btn = gr.Button("▶️ Start Streaming", variant="primary") | |
stop_btn = gr.Button("⏹️ Stop", variant="stop") | |
# Footer | |
gr.Markdown("---") | |
gr.Markdown("### ✨ VoiceBloom - Bringing wisdom to life through the art of voice ✨") | |
# Set up event handlers | |
random_btn.click(fn=get_random_quote, outputs=[text_input]) | |
confucius_btn.click(fn=get_confucius, outputs=[text_input]) | |
fruit_btn.click(fn=get_fruit_wisdom, outputs=[text_input]) | |
generate_btn.click( | |
fn=generate_audio, | |
inputs=[text_input, voice_dropdown, speed_slider], | |
outputs=[audio_output] | |
) | |
stream_event = stream_btn.click( | |
fn=stream_audio, | |
inputs=[text_input, voice_dropdown, speed_slider], | |
outputs=[stream_output] | |
) | |
stop_btn.click(fn=None, cancels=stream_event) | |
if __name__ == "__main__": | |
app.launch() |