File size: 5,378 Bytes
05b45a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
"""Benchmark script for unified streaming implementation"""

import asyncio
import time
from pathlib import Path
from typing import List, Tuple

from openai import OpenAI
import numpy as np
import matplotlib.pyplot as plt

# Initialize OpenAI client
client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed")

TEST_TEXTS = {
    "short": "The quick brown fox jumps over the lazy dog.",
    "medium": """In a bustling city, life moves at a rapid pace. 
                People hurry along the sidewalks, while cars navigate 
                through the busy streets. The air is filled with the 
                sounds of urban activity.""",
    "long": """The technological revolution has transformed how we live and work. 
              From artificial intelligence to renewable energy, innovations continue 
              to shape our future. As we face global challenges, scientific advances 
              offer new solutions. The intersection of technology and human creativity 
              drives progress forward, opening new possibilities for tomorrow."""
}

async def benchmark_streaming(text_name: str, text: str) -> Tuple[float, float, int]:
    """Benchmark streaming performance
    
    Returns:
        Tuple of (time to first byte, total time, total bytes)
    """
    start_time = time.time()
    total_bytes = 0
    first_byte_time = None
    
    with client.audio.speech.with_streaming_response.create(
        model="kokoro",
        voice="af_bella",
        response_format="pcm",
        input=text,
    ) as response:
        for chunk in response.iter_bytes(chunk_size=1024):
            if first_byte_time is None:
                first_byte_time = time.time() - start_time
            total_bytes += len(chunk)
    
    total_time = time.time() - start_time
    return first_byte_time, total_time, total_bytes

async def benchmark_non_streaming(text_name: str, text: str) -> Tuple[float, int]:
    """Benchmark non-streaming performance
    
    Returns:
        Tuple of (total time, total bytes)
    """
    start_time = time.time()
    speech_file = Path(__file__).parent / f"non_stream_{text_name}.mp3"
    
    with client.audio.speech.with_streaming_response.create(
        model="kokoro",
        voice="af_bella",
        input=text,
    ) as response:
        response.stream_to_file(speech_file)
    
    total_time = time.time() - start_time
    total_bytes = speech_file.stat().st_size
    return total_time, total_bytes

def plot_results(results: dict):
    """Plot benchmark results"""
    plt.figure(figsize=(12, 6))
    
    # Prepare data
    text_lengths = [len(text) for text in TEST_TEXTS.values()]
    streaming_times = [r["streaming"]["total_time"] for r in results.values()]
    non_streaming_times = [r["non_streaming"]["total_time"] for r in results.values()]
    first_byte_times = [r["streaming"]["first_byte_time"] for r in results.values()]
    
    # Plot times
    x = np.arange(len(TEST_TEXTS))
    width = 0.25
    
    plt.bar(x - width, streaming_times, width, label='Streaming Total Time')
    plt.bar(x, non_streaming_times, width, label='Non-Streaming Total Time')
    plt.bar(x + width, first_byte_times, width, label='Time to First Byte')
    
    plt.xlabel('Text Length (characters)')
    plt.ylabel('Time (seconds)')
    plt.title('Unified Streaming Performance Comparison')
    plt.xticks(x, text_lengths)
    plt.legend()
    
    # Save plot
    plt.savefig(Path(__file__).parent / 'benchmark_results.png')
    plt.close()

async def main():
    """Run benchmarks"""
    print("Starting unified streaming benchmarks...")
    
    results = {}
    
    for name, text in TEST_TEXTS.items():
        print(f"\nTesting {name} text ({len(text)} chars)...")
        
        # Test streaming
        print("Running streaming test...")
        first_byte_time, stream_total_time, stream_bytes = await benchmark_streaming(name, text)
        
        # Test non-streaming
        print("Running non-streaming test...")
        non_stream_total_time, non_stream_bytes = await benchmark_non_streaming(name, text)
        
        results[name] = {
            "text_length": len(text),
            "streaming": {
                "first_byte_time": first_byte_time,
                "total_time": stream_total_time,
                "total_bytes": stream_bytes,
                "throughput": stream_bytes / stream_total_time / 1024  # KB/s
            },
            "non_streaming": {
                "total_time": non_stream_total_time,
                "total_bytes": non_stream_bytes,
                "throughput": non_stream_bytes / non_stream_total_time / 1024  # KB/s
            }
        }
        
        # Print results for this test
        print(f"\nResults for {name} text:")
        print(f"Streaming:")
        print(f"  Time to first byte: {first_byte_time:.3f}s")
        print(f"  Total time: {stream_total_time:.3f}s")
        print(f"  Throughput: {stream_bytes/stream_total_time/1024:.1f} KB/s")
        print(f"Non-streaming:")
        print(f"  Total time: {non_stream_total_time:.3f}s")
        print(f"  Throughput: {non_stream_bytes/non_stream_total_time/1024:.1f} KB/s")
    
    # Plot results
    plot_results(results)
    print("\nBenchmark results have been plotted to benchmark_results.png")

if __name__ == "__main__":
    asyncio.run(main())