File size: 4,863 Bytes
6d89762 bff0a67 6d89762 49ef1cb 34b524b 6d89762 3a5f7f4 bff0a67 3a5f7f4 bff0a67 6d89762 3a5f7f4 6d89762 bff0a67 6d89762 3a5f7f4 6d89762 bff0a67 6d89762 bff0a67 6d89762 bff0a67 6d89762 bff0a67 6d89762 3a5f7f4 bff0a67 3a5f7f4 bff0a67 3a5f7f4 bff0a67 3a5f7f4 bff0a67 3a5f7f4 bff0a67 49ef1cb bff0a67 6d89762 bff0a67 3a5f7f4 bff0a67 6d89762 bff0a67 3a5f7f4 2b20da1 bff0a67 2b20da1 bff0a67 2b20da1 bff0a67 2b20da1 bff0a67 2b20da1 bff0a67 2b20da1 bff0a67 49ef1cb bff0a67 6d89762 bff0a67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
from KOKORO.models import build_model
from KOKORO.utils import tts,tts_file_name
import sys
sys.path.append('.')
import os
os.system("python download_model.py")
import torch
import gc
import platform
import gradio as gr
import shutil
# Initialize model
print("Loading model...")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')
MODEL = build_model('./KOKORO/kokoro-v0_19.pth', device)
print("Model loaded successfully.")
# Get list of available voices
voice_list = [
os.path.splitext(filename)[0]
for filename in os.listdir("./KOKORO/voices")
if filename.endswith('.pt')
]
voice_list = sorted(voice_list, key=len)
model_list = ["kokoro-v0_19.pth", "kokoro-v0_19-half.pth"]
current_model = model_list[0]
def update_model(model_name):
"""Updates the TTS model only if the specified model is not already loaded."""
global MODEL, current_model
if current_model == model_name:
return f"Model already set to {model_name}"
model_path = f"./KOKORO/{model_name}"
if model_name == "kokoro-v0_19-half.pth":
model_path = f"./KOKORO/fp16/{model_name}"
del MODEL
gc.collect()
torch.cuda.empty_cache()
MODEL = build_model(model_path, device)
current_model = model_name
return f"Model updated to {model_name}"
def manage_files(file_path):
"""Validates uploaded voicepack files."""
if os.path.exists(file_path):
file_extension = os.path.splitext(file_path)[1]
file_size = os.path.getsize(file_path)
if file_extension == ".pt" and file_size <= 5 * 1024 * 1024:
return True
else:
os.remove(file_path)
return False
return False
def text_to_speech(text, model_name="kokoro-v0_19.pth", voice_name="af", speed=1.0,
pad_between_segments=0, remove_silence=True, minimum_silence=0.20,
custom_voicepack=None, trim=0.0):
"""Converts text to speech using specified parameters."""
update_model(model_name)
if not minimum_silence:
minimum_silence = 0.05
save_at = tts_file_name(text)
if custom_voicepack:
if manage_files(custom_voicepack):
voice_name = custom_voicepack
else:
gr.Warning("Invalid voicepack file. Using default voice instead.")
audio_path = tts(MODEL, device, text, voice_name, speed=speed, trim=trim,
pad_between_segments=pad_between_segments, output_file=save_at,
remove_silence=remove_silence, minimum_silence=minimum_silence)
return audio_path
def toggle_autoplay(autoplay):
return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
# Main Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Kokoro TTS - Batched Text-to-Speech")
with gr.Row():
with gr.Column():
text = gr.Textbox(label='Enter Text', lines=3, placeholder="Type your text here...")
with gr.Row():
voice = gr.Dropdown(voice_list, value='af_bella', allow_custom_value=False,
label='Voice', info='Select a voice')
with gr.Row():
generate_btn = gr.Button('Generate', variant='primary')
with gr.Accordion('Audio Settings', open=False):
model_name = gr.Dropdown(model_list, label="Model", value=model_list[0])
speed = gr.Slider(minimum=0.25, maximum=2, value=1, step=0.1,
label='Speed', info='Adjust speaking speed')
remove_silence = gr.Checkbox(value=False, label='Remove Silence')
minimum_silence = gr.Number(label="Minimum Silence (seconds)", value=0.05)
pad_between = gr.Slider(minimum=0, maximum=2, value=0, step=0.1,
label='Pad Between', info='Silent duration between segments')
custom_voicepack = gr.File(label='Upload Custom VoicePack (.pt file)')
with gr.Column():
audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
with gr.Accordion('Autoplay Settings', open=False):
autoplay = gr.Checkbox(value=True, label='Autoplay')
autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
text.submit(text_to_speech,
inputs=[text, model_name, voice, speed, pad_between, remove_silence,
minimum_silence, custom_voicepack],
outputs=[audio])
generate_btn.click(text_to_speech,
inputs=[text, model_name, voice, speed, pad_between, remove_silence,
minimum_silence, custom_voicepack],
outputs=[audio])
if __name__ == "__main__":
demo.queue().launch() |