Spaces:

DroolingPanda
/

kokoro-tts-server

Runtime error

kokoro-tts-server / examples /streaming_refactor /benchmark_unified_streaming.py

Michael Hu

initial check in

05b45a5 about 1 month ago

5.38 kB

	#!/usr/bin/env python3
	"""Benchmark script for unified streaming implementation"""

	import asyncio
	import time
	from pathlib import Path
	from typing import List, Tuple

	from openai import OpenAI
	import numpy as np
	import matplotlib.pyplot as plt

	# Initialize OpenAI client
	client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed")

	TEST_TEXTS = {
	"short": "The quick brown fox jumps over the lazy dog.",
	"medium": """In a bustling city, life moves at a rapid pace.
	People hurry along the sidewalks, while cars navigate
	through the busy streets. The air is filled with the
	sounds of urban activity.""",
	"long": """The technological revolution has transformed how we live and work.
	From artificial intelligence to renewable energy, innovations continue
	to shape our future. As we face global challenges, scientific advances
	offer new solutions. The intersection of technology and human creativity
	drives progress forward, opening new possibilities for tomorrow."""
	}

	async def benchmark_streaming(text_name: str, text: str) -> Tuple[float, float, int]:
	"""Benchmark streaming performance

	Returns:
	Tuple of (time to first byte, total time, total bytes)
	"""
	start_time = time.time()
	total_bytes = 0
	first_byte_time = None

	with client.audio.speech.with_streaming_response.create(
	model="kokoro",
	voice="af_bella",
	response_format="pcm",
	input=text,
	) as response:
	for chunk in response.iter_bytes(chunk_size=1024):
	if first_byte_time is None:
	first_byte_time = time.time() - start_time
	total_bytes += len(chunk)

	total_time = time.time() - start_time
	return first_byte_time, total_time, total_bytes

	async def benchmark_non_streaming(text_name: str, text: str) -> Tuple[float, int]:
	"""Benchmark non-streaming performance

	Returns:
	Tuple of (total time, total bytes)
	"""
	start_time = time.time()
	speech_file = Path(__file__).parent / f"non_stream_{text_name}.mp3"

	with client.audio.speech.with_streaming_response.create(
	model="kokoro",
	voice="af_bella",
	input=text,
	) as response:
	response.stream_to_file(speech_file)

	total_time = time.time() - start_time
	total_bytes = speech_file.stat().st_size
	return total_time, total_bytes

	def plot_results(results: dict):
	"""Plot benchmark results"""
	plt.figure(figsize=(12, 6))

	# Prepare data
	text_lengths = [len(text) for text in TEST_TEXTS.values()]
	streaming_times = [r["streaming"]["total_time"] for r in results.values()]
	non_streaming_times = [r["non_streaming"]["total_time"] for r in results.values()]
	first_byte_times = [r["streaming"]["first_byte_time"] for r in results.values()]

	# Plot times
	x = np.arange(len(TEST_TEXTS))
	width = 0.25

	plt.bar(x - width, streaming_times, width, label='Streaming Total Time')
	plt.bar(x, non_streaming_times, width, label='Non-Streaming Total Time')
	plt.bar(x + width, first_byte_times, width, label='Time to First Byte')

	plt.xlabel('Text Length (characters)')
	plt.ylabel('Time (seconds)')
	plt.title('Unified Streaming Performance Comparison')
	plt.xticks(x, text_lengths)
	plt.legend()

	# Save plot
	plt.savefig(Path(__file__).parent / 'benchmark_results.png')
	plt.close()

	async def main():
	"""Run benchmarks"""
	print("Starting unified streaming benchmarks...")

	results = {}

	for name, text in TEST_TEXTS.items():
	print(f"\nTesting {name} text ({len(text)} chars)...")

	# Test streaming
	print("Running streaming test...")
	first_byte_time, stream_total_time, stream_bytes = await benchmark_streaming(name, text)

	# Test non-streaming
	print("Running non-streaming test...")
	non_stream_total_time, non_stream_bytes = await benchmark_non_streaming(name, text)

	results[name] = {
	"text_length": len(text),
	"streaming": {
	"first_byte_time": first_byte_time,
	"total_time": stream_total_time,
	"total_bytes": stream_bytes,
	"throughput": stream_bytes / stream_total_time / 1024 # KB/s
	},
	"non_streaming": {
	"total_time": non_stream_total_time,
	"total_bytes": non_stream_bytes,
	"throughput": non_stream_bytes / non_stream_total_time / 1024 # KB/s
	}
	}

	# Print results for this test
	print(f"\nResults for {name} text:")
	print(f"Streaming:")
	print(f" Time to first byte: {first_byte_time:.3f}s")
	print(f" Total time: {stream_total_time:.3f}s")
	print(f" Throughput: {stream_bytes/stream_total_time/1024:.1f} KB/s")
	print(f"Non-streaming:")
	print(f" Total time: {non_stream_total_time:.3f}s")
	print(f" Throughput: {non_stream_bytes/non_stream_total_time/1024:.1f} KB/s")

	# Plot results
	plot_results(results)
	print("\nBenchmark results have been plotted to benchmark_results.png")

	if __name__ == "__main__":
	asyncio.run(main())