KokAudio / app.py
BryanBradfo's picture
no gradio error
a0e43b6
import os
import random
import torch
from kokoro import KModel, KPipeline
import gradio as gr
# App configuration
APP_TITLE = "✨ VoiceBloom ✨"
APP_SUBTITLE = "Transform wisdom into delightful speech!"
THEME = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="purple",
).set(
body_background_fill="linear-gradient(to right top, #d16ba5, #c777b9, #ba83ca, #aa8fd8, #9a9ae1, #8aa7ec, #79b3f4, #69bff8, #52cffe, #41dfff, #46eefa, #5ffbf1)",
button_primary_background_fill="linear-gradient(90deg, rgba(255,124,0,1) 0%, rgba(255,194,23,1) 100%)",
button_primary_background_fill_hover="linear-gradient(90deg, rgba(255,194,23,1) 0%, rgba(255,124,0,1) 100%)",
button_secondary_background_fill="linear-gradient(90deg, rgba(144,95,255,1) 0%, rgba(110,72,220,1) 100%)",
button_secondary_background_fill_hover="linear-gradient(90deg, rgba(110,72,220,1) 0%, rgba(144,95,255,1) 100%)",
block_background_fill="rgba(255, 255, 255, 0.8)",
block_shadow="0px 4px 12px rgba(0, 0, 0, 0.1)",
block_radius="12px",
)
# Initialize TTS models without GPU
models = {False: KModel().to('cpu').eval()}
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
# Define voice options with fun emoji indicators
VOICES = {
'✨ Sarah (US) ✨': 'af_sarah',
'🌟 Nicole (US) 🌟': 'af_nicole',
'💖 Heart (US) 💖': 'af_heart',
'🔮 Bella (US) 🔮': 'af_bella',
'🌈 Aoede (US) 🌈': 'af_aoede',
'🎵 Michael (US) 🎵': 'am_michael',
'🌠 Echo (US) 🌠': 'am_echo',
'🧙 Fenrir (US) 🧙': 'am_fenrir',
'🎭 Puck (US) 🎭': 'am_puck',
'👑 Emma (UK) 👑': 'bf_emma',
'🌹 Isabella (UK) 🌹': 'bf_isabella',
'🎩 George (UK) 🎩': 'bm_george',
'✨ Fable (UK) ✨': 'bm_fable',
}
# Preload voices
for v in VOICES.values():
pipelines[v[0]].load_voice(v)
# Inspirational quotes
QUOTES = [
"When it is obvious that the goals cannot be reached, don't adjust the goals, adjust the action steps. - Confucius",
"The man who moves a mountain begins by carrying away small stones. - Confucius",
"Life is really simple, but we insist on making it complicated. - Confucius",
"It does not matter how slowly you go as long as you do not stop. - Confucius",
"Our greatest glory is not in never falling, but in rising every time we fall. - Confucius",
"Silence is a true friend who never betrays. - Confucius",
"Eating fruit daily provides essential vitamins, minerals and fiber that help maintain good health.",
"An apple a day keeps the doctor away - a simple habit with profound health benefits.",
"Fruits are nature's candy - sweet, nutritious, and vital for our wellbeing.",
"Regular consumption of fruits boosts your immune system and reduces risk of chronic diseases.",
"Colorful fruits on your plate mean a rainbow of nutrients for your body.",
"The wisdom of health lies in eating seasonal fruits that nature provides us.",
"A journey of a thousand miles begins with a single step.",
"Happiness is not something ready-made. It comes from your own actions.",
"The best time to plant a tree was 20 years ago. The second best time is now.",
]
def generate_audio(text, voice, speed):
"""Generate audio from text using the selected voice and speed"""
pipeline = pipelines[voice[0]]
pack = pipeline.load_voice(voice)
for _, ps, _ in pipeline(text, voice, speed):
ref_s = pack[len(ps)-1]
audio = models[False](ps, ref_s, speed)
return (24000, audio.numpy())
return None
def stream_audio(text, voice, speed):
"""Stream audio for longer texts"""
pipeline = pipelines[voice[0]]
pack = pipeline.load_voice(voice)
for _, ps, _ in pipeline(text, voice, speed):
ref_s = pack[len(ps)-1]
audio = models[False](ps, ref_s, speed)
yield 24000, audio.numpy()
def get_random_quote():
return random.choice(QUOTES)
def get_confucius():
return """The Master said, 'Learning without thought is labor lost; thought without learning is perilous.'
He who exercises government by means of his virtue may be compared to the north polar star, which keeps its place and all the stars turn towards it.
To see what is right and not to do it is want of courage, or of principle.
The superior man is modest in his speech, but exceeds in his actions.
The journey of a thousand miles begins with a single step."""
def get_fruit_wisdom():
return """The regular consumption of fruits is one of the wisest habits for maintaining good health.
Fruits provide a natural source of vitamins, minerals, and fiber that our bodies need for optimal functioning. They contain antioxidants that help fight inflammation and reduce the risk of chronic diseases.
Different colored fruits offer different nutrients - oranges for vitamin C, bananas for potassium, blueberries for antioxidants, and apples for fiber.
Eating seasonal fruits connects us with nature's rhythm and ensures we get the freshest, most nutritious options available.
Remember: 'Nature's first green is gold' - and nowhere is this more true than in the vibrant colors of fruit that nourish our bodies daily."""
# Build the UI
with gr.Blocks(theme=THEME) as app:
gr.Markdown(f"# {APP_TITLE}")
gr.Markdown(f"### {APP_SUBTITLE}")
with gr.Row():
with gr.Column():
# Input section with colorful styling
with gr.Group():
gr.Markdown("### 📝 What would you like me to say?")
text_input = gr.Textbox(
label="",
placeholder="Type your text here or choose a wisdom quote below...",
lines=5
)
with gr.Row():
random_btn = gr.Button("🎲 Random Quote", variant="secondary")
confucius_btn = gr.Button("🧙 Confucius Wisdom", variant="secondary")
fruit_btn = gr.Button("🍎 Fruit Wisdom", variant="secondary")
# Voice & Speed controls
with gr.Group():
gr.Markdown("### 🎤 Choose Your Voice")
voice_dropdown = gr.Dropdown(
list(VOICES.items()),
value=list(VOICES.items())[0][1],
label="Voice Style"
)
gr.Markdown("### ⏱️ Adjust Speaking Speed")
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
label="Speed"
)
# Output section
with gr.Column():
with gr.Tabs():
with gr.TabItem("🔊 Listen"):
audio_output = gr.Audio(
label="Your VoiceBloom Creation",
interactive=False,
autoplay=True
)
generate_btn = gr.Button("🎵 Generate Audio", variant="primary", size="lg")
with gr.TabItem("📻 Stream Longer Text"):
stream_output = gr.Audio(
label="Streaming Audio",
interactive=False,
streaming=True,
autoplay=True
)
with gr.Row():
stream_btn = gr.Button("▶️ Start Streaming", variant="primary")
stop_btn = gr.Button("⏹️ Stop", variant="stop")
# Footer
gr.Markdown("---")
gr.Markdown("### ✨ VoiceBloom - Bringing wisdom to life through the art of voice ✨")
# Set up event handlers
random_btn.click(fn=get_random_quote, outputs=[text_input])
confucius_btn.click(fn=get_confucius, outputs=[text_input])
fruit_btn.click(fn=get_fruit_wisdom, outputs=[text_input])
generate_btn.click(
fn=generate_audio,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output]
)
stream_event = stream_btn.click(
fn=stream_audio,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[stream_output]
)
stop_btn.click(fn=None, cancels=stream_event)
if __name__ == "__main__":
app.launch()