Spaces:
Runtime error
Runtime error
File size: 5,378 Bytes
05b45a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
#!/usr/bin/env python3
"""Benchmark script for unified streaming implementation"""
import asyncio
import time
from pathlib import Path
from typing import List, Tuple
from openai import OpenAI
import numpy as np
import matplotlib.pyplot as plt
# Initialize OpenAI client
client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed")
TEST_TEXTS = {
"short": "The quick brown fox jumps over the lazy dog.",
"medium": """In a bustling city, life moves at a rapid pace.
People hurry along the sidewalks, while cars navigate
through the busy streets. The air is filled with the
sounds of urban activity.""",
"long": """The technological revolution has transformed how we live and work.
From artificial intelligence to renewable energy, innovations continue
to shape our future. As we face global challenges, scientific advances
offer new solutions. The intersection of technology and human creativity
drives progress forward, opening new possibilities for tomorrow."""
}
async def benchmark_streaming(text_name: str, text: str) -> Tuple[float, float, int]:
"""Benchmark streaming performance
Returns:
Tuple of (time to first byte, total time, total bytes)
"""
start_time = time.time()
total_bytes = 0
first_byte_time = None
with client.audio.speech.with_streaming_response.create(
model="kokoro",
voice="af_bella",
response_format="pcm",
input=text,
) as response:
for chunk in response.iter_bytes(chunk_size=1024):
if first_byte_time is None:
first_byte_time = time.time() - start_time
total_bytes += len(chunk)
total_time = time.time() - start_time
return first_byte_time, total_time, total_bytes
async def benchmark_non_streaming(text_name: str, text: str) -> Tuple[float, int]:
"""Benchmark non-streaming performance
Returns:
Tuple of (total time, total bytes)
"""
start_time = time.time()
speech_file = Path(__file__).parent / f"non_stream_{text_name}.mp3"
with client.audio.speech.with_streaming_response.create(
model="kokoro",
voice="af_bella",
input=text,
) as response:
response.stream_to_file(speech_file)
total_time = time.time() - start_time
total_bytes = speech_file.stat().st_size
return total_time, total_bytes
def plot_results(results: dict):
"""Plot benchmark results"""
plt.figure(figsize=(12, 6))
# Prepare data
text_lengths = [len(text) for text in TEST_TEXTS.values()]
streaming_times = [r["streaming"]["total_time"] for r in results.values()]
non_streaming_times = [r["non_streaming"]["total_time"] for r in results.values()]
first_byte_times = [r["streaming"]["first_byte_time"] for r in results.values()]
# Plot times
x = np.arange(len(TEST_TEXTS))
width = 0.25
plt.bar(x - width, streaming_times, width, label='Streaming Total Time')
plt.bar(x, non_streaming_times, width, label='Non-Streaming Total Time')
plt.bar(x + width, first_byte_times, width, label='Time to First Byte')
plt.xlabel('Text Length (characters)')
plt.ylabel('Time (seconds)')
plt.title('Unified Streaming Performance Comparison')
plt.xticks(x, text_lengths)
plt.legend()
# Save plot
plt.savefig(Path(__file__).parent / 'benchmark_results.png')
plt.close()
async def main():
"""Run benchmarks"""
print("Starting unified streaming benchmarks...")
results = {}
for name, text in TEST_TEXTS.items():
print(f"\nTesting {name} text ({len(text)} chars)...")
# Test streaming
print("Running streaming test...")
first_byte_time, stream_total_time, stream_bytes = await benchmark_streaming(name, text)
# Test non-streaming
print("Running non-streaming test...")
non_stream_total_time, non_stream_bytes = await benchmark_non_streaming(name, text)
results[name] = {
"text_length": len(text),
"streaming": {
"first_byte_time": first_byte_time,
"total_time": stream_total_time,
"total_bytes": stream_bytes,
"throughput": stream_bytes / stream_total_time / 1024 # KB/s
},
"non_streaming": {
"total_time": non_stream_total_time,
"total_bytes": non_stream_bytes,
"throughput": non_stream_bytes / non_stream_total_time / 1024 # KB/s
}
}
# Print results for this test
print(f"\nResults for {name} text:")
print(f"Streaming:")
print(f" Time to first byte: {first_byte_time:.3f}s")
print(f" Total time: {stream_total_time:.3f}s")
print(f" Throughput: {stream_bytes/stream_total_time/1024:.1f} KB/s")
print(f"Non-streaming:")
print(f" Total time: {non_stream_total_time:.3f}s")
print(f" Throughput: {non_stream_bytes/non_stream_total_time/1024:.1f} KB/s")
# Plot results
plot_results(results)
print("\nBenchmark results have been plotted to benchmark_results.png")
if __name__ == "__main__":
asyncio.run(main()) |