Ktk

Runtime error

App Files Files Community

Ktk / app.py

hivecorp

Update app.py

bff0a67 verified 3 months ago

raw

history blame contribute delete

4.86 kB

	from KOKORO.models import build_model
	from KOKORO.utils import tts,tts_file_name
	import sys
	sys.path.append('.')
	import os
	os.system("python download_model.py")
	import torch
	import gc
	import platform
	import gradio as gr
	import shutil

	# Initialize model
	print("Loading model...")
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print(f'Using device: {device}')
	MODEL = build_model('./KOKORO/kokoro-v0_19.pth', device)
	print("Model loaded successfully.")

	# Get list of available voices
	voice_list = [
	os.path.splitext(filename)[0]
	for filename in os.listdir("./KOKORO/voices")
	if filename.endswith('.pt')
	]
	voice_list = sorted(voice_list, key=len)

	model_list = ["kokoro-v0_19.pth", "kokoro-v0_19-half.pth"]
	current_model = model_list[0]

	def update_model(model_name):
	"""Updates the TTS model only if the specified model is not already loaded."""
	global MODEL, current_model
	if current_model == model_name:
	return f"Model already set to {model_name}"
	model_path = f"./KOKORO/{model_name}"
	if model_name == "kokoro-v0_19-half.pth":
	model_path = f"./KOKORO/fp16/{model_name}"
	del MODEL
	gc.collect()
	torch.cuda.empty_cache()
	MODEL = build_model(model_path, device)
	current_model = model_name
	return f"Model updated to {model_name}"

	def manage_files(file_path):
	"""Validates uploaded voicepack files."""
	if os.path.exists(file_path):
	file_extension = os.path.splitext(file_path)[1]
	file_size = os.path.getsize(file_path)
	if file_extension == ".pt" and file_size <= 5 * 1024 * 1024:
	return True
	else:
	os.remove(file_path)
	return False
	return False

	def text_to_speech(text, model_name="kokoro-v0_19.pth", voice_name="af", speed=1.0,
	pad_between_segments=0, remove_silence=True, minimum_silence=0.20,
	custom_voicepack=None, trim=0.0):
	"""Converts text to speech using specified parameters."""
	update_model(model_name)
	if not minimum_silence:
	minimum_silence = 0.05
	save_at = tts_file_name(text)

	if custom_voicepack:
	if manage_files(custom_voicepack):
	voice_name = custom_voicepack
	else:
	gr.Warning("Invalid voicepack file. Using default voice instead.")

	audio_path = tts(MODEL, device, text, voice_name, speed=speed, trim=trim,
	pad_between_segments=pad_between_segments, output_file=save_at,
	remove_silence=remove_silence, minimum_silence=minimum_silence)
	return audio_path

	def toggle_autoplay(autoplay):
	return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)

	# Main Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Kokoro TTS - Batched Text-to-Speech")

	with gr.Row():
	with gr.Column():
	text = gr.Textbox(label='Enter Text', lines=3, placeholder="Type your text here...")
	with gr.Row():
	voice = gr.Dropdown(voice_list, value='af_bella', allow_custom_value=False,
	label='Voice', info='Select a voice')
	with gr.Row():
	generate_btn = gr.Button('Generate', variant='primary')

	with gr.Accordion('Audio Settings', open=False):
	model_name = gr.Dropdown(model_list, label="Model", value=model_list[0])
	speed = gr.Slider(minimum=0.25, maximum=2, value=1, step=0.1,
	label='Speed', info='Adjust speaking speed')
	remove_silence = gr.Checkbox(value=False, label='Remove Silence')
	minimum_silence = gr.Number(label="Minimum Silence (seconds)", value=0.05)
	pad_between = gr.Slider(minimum=0, maximum=2, value=0, step=0.1,
	label='Pad Between', info='Silent duration between segments')
	custom_voicepack = gr.File(label='Upload Custom VoicePack (.pt file)')

	with gr.Column():
	audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
	with gr.Accordion('Autoplay Settings', open=False):
	autoplay = gr.Checkbox(value=True, label='Autoplay')
	autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])

	text.submit(text_to_speech,
	inputs=[text, model_name, voice, speed, pad_between, remove_silence,
	minimum_silence, custom_voicepack],
	outputs=[audio])

	generate_btn.click(text_to_speech,
	inputs=[text, model_name, voice, speed, pad_between, remove_silence,
	minimum_silence, custom_voicepack],
	outputs=[audio])

	if __name__ == "__main__":
	demo.queue().launch()