hlarcher HF Staff commited on
Commit
ecea89f
·
verified ·
1 Parent(s): 463adeb

Upload 3 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3-bookworm
2
+ RUN apt-get update && apt-get install -y \
3
+ git \
4
+ && rm -rf /var/lib/apt/lists/*
5
+ RUN curl -sSL https://install.python-poetry.org | python3 -
6
+ ENV PATH="/root/.local/bin:$PATH"
7
+ WORKDIR /app
8
+ RUN git clone https://github.com/huggingface/inference-benchmarker.git \
9
+ && cd inference-benchmarker \
10
+ && poetry install --no-root
11
+ COPY results results
12
+ WORKDIR /app/inference-benchmarker
13
+ CMD ["poetry", "run", "python", "extra/dashboard/app.py", "--from-results-dir", "/app/results"]
results/meta-llama_Llama-3_1-8B-Instruct_2025-04-02-07-50-23.json ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "max_vus": 128,
4
+ "duration_secs": 120,
5
+ "benchmark_kind": "Rate",
6
+ "warmup_duration_secs": 30,
7
+ "rates": [
8
+ 0.8,
9
+ 2.4,
10
+ 4.0,
11
+ 5.6,
12
+ 7.2,
13
+ 8.8,
14
+ 10.4,
15
+ 12.0,
16
+ 13.6,
17
+ 15.2,
18
+ 16.8,
19
+ 18.4,
20
+ 20.0,
21
+ 21.6,
22
+ 23.2,
23
+ 24.0
24
+ ],
25
+ "num_rates": 10,
26
+ "prompt_options": null,
27
+ "decode_options": {
28
+ "num_tokens": 200,
29
+ "min_tokens": 200,
30
+ "max_tokens": 200,
31
+ "variance": 100
32
+ },
33
+ "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
34
+ "model_name": "meta-llama/Llama-3.1-8B-Instruct",
35
+ "profile": null,
36
+ "meta": {
37
+ "version": "4d28897b4e345f4dfdd93d3434e50ac8afcdf9e1",
38
+ "engine": "TGI",
39
+ "tp": "1"
40
+ },
41
+ "run_id": "TGI-3.2.1"
42
+ },
43
+ "results": [
44
+ {
45
+ "id": "warmup",
46
+ "executor_type": "ConstantVUs",
47
+ "config": {
48
+ "max_vus": 1,
49
+ "duration_secs": 30,
50
+ "rate": null
51
+ },
52
+ "total_requests": 22,
53
+ "total_tokens": 3726,
54
+ "token_throughput_secs": 121.61045252166194,
55
+ "duration_ms": 30638,
56
+ "time_to_first_token_ms": {
57
+ "p50": 46.607,
58
+ "p60": 46.786,
59
+ "p70": 46.957,
60
+ "p80": 47.13,
61
+ "p90": 48.231,
62
+ "p95": 78.293,
63
+ "p99": 80.85,
64
+ "avg": 48.554
65
+ },
66
+ "inter_token_latency_ms": {
67
+ "p50": 7.973,
68
+ "p60": 7.982,
69
+ "p70": 7.99,
70
+ "p80": 8.0,
71
+ "p90": 8.043,
72
+ "p95": 8.083,
73
+ "p99": 8.125,
74
+ "avg": 7.927
75
+ },
76
+ "failed_requests": 0,
77
+ "successful_requests": 22,
78
+ "request_rate": 0.7180434663114769,
79
+ "total_tokens_sent": 1613,
80
+ "e2e_latency_ms": {
81
+ "p50": 1632.456,
82
+ "p60": 1633.79,
83
+ "p70": 1636.485,
84
+ "p80": 1638.246,
85
+ "p90": 1647.577,
86
+ "p95": 1688.143,
87
+ "p99": 1697.2,
88
+ "avg": 1392.459
89
+ }
90
+ },
91
+ {
92
+ "id": "[email protected]/s",
93
+ "executor_type": "ConstantArrivalRate",
94
+ "config": {
95
+ "max_vus": 128,
96
+ "duration_secs": 120,
97
+ "rate": 0.8
98
+ },
99
+ "total_requests": 94,
100
+ "total_tokens": 14823,
101
+ "token_throughput_secs": 125.70685354915,
102
+ "duration_ms": 117917,
103
+ "time_to_first_token_ms": {
104
+ "p50": 26.689,
105
+ "p60": 28.276,
106
+ "p70": 29.925,
107
+ "p80": 31.171,
108
+ "p90": 31.927,
109
+ "p95": 32.252,
110
+ "p99": 33.737,
111
+ "avg": 27.559
112
+ },
113
+ "inter_token_latency_ms": {
114
+ "p50": 8.297,
115
+ "p60": 8.308,
116
+ "p70": 8.327,
117
+ "p80": 8.348,
118
+ "p90": 8.372,
119
+ "p95": 8.387,
120
+ "p99": 8.48,
121
+ "avg": 8.274
122
+ },
123
+ "failed_requests": 0,
124
+ "successful_requests": 94,
125
+ "request_rate": 0.7971695495932065,
126
+ "total_tokens_sent": 13915,
127
+ "e2e_latency_ms": {
128
+ "p50": 1675.47,
129
+ "p60": 1680.097,
130
+ "p70": 1683.628,
131
+ "p80": 1686.037,
132
+ "p90": 1689.112,
133
+ "p95": 1692.598,
134
+ "p99": 1711.849,
135
+ "avg": 1329.915
136
+ }
137
+ },
138
+ {
139
+ "id": "[email protected]/s",
140
+ "executor_type": "ConstantArrivalRate",
141
+ "config": {
142
+ "max_vus": 128,
143
+ "duration_secs": 120,
144
+ "rate": 2.4
145
+ },
146
+ "total_requests": 283,
147
+ "total_tokens": 49472,
148
+ "token_throughput_secs": 417.57128544344323,
149
+ "duration_ms": 118475,
150
+ "time_to_first_token_ms": {
151
+ "p50": 29.056,
152
+ "p60": 29.943,
153
+ "p70": 30.586,
154
+ "p80": 31.684,
155
+ "p90": 32.58,
156
+ "p95": 33.297,
157
+ "p99": 42.848,
158
+ "avg": 29.12
159
+ },
160
+ "inter_token_latency_ms": {
161
+ "p50": 8.925,
162
+ "p60": 8.931,
163
+ "p70": 8.939,
164
+ "p80": 8.947,
165
+ "p90": 8.954,
166
+ "p95": 8.965,
167
+ "p99": 8.982,
168
+ "avg": 8.669
169
+ },
170
+ "failed_requests": 0,
171
+ "successful_requests": 283,
172
+ "request_rate": 2.3886779143858026,
173
+ "total_tokens_sent": 44403,
174
+ "e2e_latency_ms": {
175
+ "p50": 1804.541,
176
+ "p60": 1805.909,
177
+ "p70": 1807.748,
178
+ "p80": 1809.553,
179
+ "p90": 1812.308,
180
+ "p95": 1814.507,
181
+ "p99": 1817.862,
182
+ "avg": 1579.6
183
+ }
184
+ },
185
+ {
186
+ "id": "[email protected]/s",
187
+ "executor_type": "ConstantArrivalRate",
188
+ "config": {
189
+ "max_vus": 128,
190
+ "duration_secs": 120,
191
+ "rate": 4.0
192
+ },
193
+ "total_requests": 474,
194
+ "total_tokens": 85697,
195
+ "token_throughput_secs": 720.8187174458998,
196
+ "duration_ms": 118888,
197
+ "time_to_first_token_ms": {
198
+ "p50": 28.261,
199
+ "p60": 29.236,
200
+ "p70": 30.047,
201
+ "p80": 31.071,
202
+ "p90": 32.258,
203
+ "p95": 32.956,
204
+ "p99": 44.447,
205
+ "avg": 28.686
206
+ },
207
+ "inter_token_latency_ms": {
208
+ "p50": 9.365,
209
+ "p60": 9.383,
210
+ "p70": 9.396,
211
+ "p80": 9.415,
212
+ "p90": 9.433,
213
+ "p95": 9.446,
214
+ "p99": 9.457,
215
+ "avg": 9.313
216
+ },
217
+ "failed_requests": 0,
218
+ "successful_requests": 474,
219
+ "request_rate": 3.9869315386694577,
220
+ "total_tokens_sent": 56591,
221
+ "e2e_latency_ms": {
222
+ "p50": 1890.864,
223
+ "p60": 1894.873,
224
+ "p70": 1898.381,
225
+ "p80": 1902.406,
226
+ "p90": 1906.151,
227
+ "p95": 1909.095,
228
+ "p99": 1912.544,
229
+ "avg": 1712.674
230
+ }
231
+ },
232
+ {
233
+ "id": "[email protected]/s",
234
+ "executor_type": "ConstantArrivalRate",
235
+ "config": {
236
+ "max_vus": 128,
237
+ "duration_secs": 120,
238
+ "rate": 5.6
239
+ },
240
+ "total_requests": 660,
241
+ "total_tokens": 115159,
242
+ "token_throughput_secs": 966.2563530237055,
243
+ "duration_ms": 119180,
244
+ "time_to_first_token_ms": {
245
+ "p50": 29.312,
246
+ "p60": 30.64,
247
+ "p70": 31.75,
248
+ "p80": 32.952,
249
+ "p90": 44.096,
250
+ "p95": 46.404,
251
+ "p99": 48.15,
252
+ "avg": 30.915
253
+ },
254
+ "inter_token_latency_ms": {
255
+ "p50": 9.98,
256
+ "p60": 9.996,
257
+ "p70": 10.004,
258
+ "p80": 10.013,
259
+ "p90": 10.022,
260
+ "p95": 10.028,
261
+ "p99": 10.062,
262
+ "avg": 9.803
263
+ },
264
+ "failed_requests": 0,
265
+ "successful_requests": 660,
266
+ "request_rate": 5.537814612801827,
267
+ "total_tokens_sent": 113449,
268
+ "e2e_latency_ms": {
269
+ "p50": 2013.741,
270
+ "p60": 2017.376,
271
+ "p70": 2019.935,
272
+ "p80": 2021.638,
273
+ "p90": 2024.968,
274
+ "p95": 2026.227,
275
+ "p99": 2032.895,
276
+ "avg": 1754.082
277
+ }
278
+ },
279
+ {
280
+ "id": "[email protected]/s",
281
+ "executor_type": "ConstantArrivalRate",
282
+ "config": {
283
+ "max_vus": 128,
284
+ "duration_secs": 120,
285
+ "rate": 7.2
286
+ },
287
+ "total_requests": 849,
288
+ "total_tokens": 151779,
289
+ "token_throughput_secs": 1267.369912316427,
290
+ "duration_ms": 119759,
291
+ "time_to_first_token_ms": {
292
+ "p50": 30.493,
293
+ "p60": 31.719,
294
+ "p70": 33.123,
295
+ "p80": 36.078,
296
+ "p90": 46.155,
297
+ "p95": 46.769,
298
+ "p99": 48.162,
299
+ "avg": 32.604
300
+ },
301
+ "inter_token_latency_ms": {
302
+ "p50": 10.423,
303
+ "p60": 10.475,
304
+ "p70": 10.5,
305
+ "p80": 10.524,
306
+ "p90": 10.555,
307
+ "p95": 10.575,
308
+ "p99": 10.597,
309
+ "avg": 10.321
310
+ },
311
+ "failed_requests": 0,
312
+ "successful_requests": 849,
313
+ "request_rate": 7.089235372196724,
314
+ "total_tokens_sent": 145128,
315
+ "e2e_latency_ms": {
316
+ "p50": 2100.596,
317
+ "p60": 2114.214,
318
+ "p70": 2119.354,
319
+ "p80": 2124.055,
320
+ "p90": 2130.161,
321
+ "p95": 2134.421,
322
+ "p99": 2140.161,
323
+ "avg": 1884.625
324
+ }
325
+ },
326
+ {
327
+ "id": "[email protected]/s",
328
+ "executor_type": "ConstantArrivalRate",
329
+ "config": {
330
+ "max_vus": 128,
331
+ "duration_secs": 120,
332
+ "rate": 8.8
333
+ },
334
+ "total_requests": 1041,
335
+ "total_tokens": 181439,
336
+ "token_throughput_secs": 1528.9336762174746,
337
+ "duration_ms": 118670,
338
+ "time_to_first_token_ms": {
339
+ "p50": 33.409,
340
+ "p60": 41.426,
341
+ "p70": 45.116,
342
+ "p80": 45.65,
343
+ "p90": 47.216,
344
+ "p95": 47.524,
345
+ "p99": 47.814,
346
+ "avg": 36.264
347
+ },
348
+ "inter_token_latency_ms": {
349
+ "p50": 11.019,
350
+ "p60": 11.044,
351
+ "p70": 11.072,
352
+ "p80": 11.102,
353
+ "p90": 11.136,
354
+ "p95": 11.159,
355
+ "p99": 11.222,
356
+ "avg": 10.862
357
+ },
358
+ "failed_requests": 0,
359
+ "successful_requests": 1041,
360
+ "request_rate": 8.77220419503189,
361
+ "total_tokens_sent": 191937,
362
+ "e2e_latency_ms": {
363
+ "p50": 2223.571,
364
+ "p60": 2232.271,
365
+ "p70": 2241.046,
366
+ "p80": 2246.104,
367
+ "p90": 2253.422,
368
+ "p95": 2259.594,
369
+ "p99": 2267.867,
370
+ "avg": 1944.18
371
+ }
372
+ },
373
+ {
374
+ "id": "[email protected]/s",
375
+ "executor_type": "ConstantArrivalRate",
376
+ "config": {
377
+ "max_vus": 128,
378
+ "duration_secs": 120,
379
+ "rate": 10.4
380
+ },
381
+ "total_requests": 1225,
382
+ "total_tokens": 208853,
383
+ "token_throughput_secs": 1750.44303623107,
384
+ "duration_ms": 119314,
385
+ "time_to_first_token_ms": {
386
+ "p50": 34.056,
387
+ "p60": 40.904,
388
+ "p70": 44.889,
389
+ "p80": 45.567,
390
+ "p90": 47.156,
391
+ "p95": 47.537,
392
+ "p99": 47.805,
393
+ "avg": 36.461
394
+ },
395
+ "inter_token_latency_ms": {
396
+ "p50": 11.655,
397
+ "p60": 11.693,
398
+ "p70": 11.73,
399
+ "p80": 11.772,
400
+ "p90": 11.827,
401
+ "p95": 11.861,
402
+ "p99": 11.909,
403
+ "avg": 11.475
404
+ },
405
+ "failed_requests": 0,
406
+ "successful_requests": 1225,
407
+ "request_rate": 10.266995060559632,
408
+ "total_tokens_sent": 219623,
409
+ "e2e_latency_ms": {
410
+ "p50": 2351.289,
411
+ "p60": 2360.059,
412
+ "p70": 2368.244,
413
+ "p80": 2378.908,
414
+ "p90": 2388.768,
415
+ "p95": 2395.775,
416
+ "p99": 2405.872,
417
+ "avg": 2012.23
418
+ }
419
+ },
420
+ {
421
+ "id": "[email protected]/s",
422
+ "executor_type": "ConstantArrivalRate",
423
+ "config": {
424
+ "max_vus": 128,
425
+ "duration_secs": 120,
426
+ "rate": 12.0
427
+ },
428
+ "total_requests": 1411,
429
+ "total_tokens": 255188,
430
+ "token_throughput_secs": 2159.7525641398156,
431
+ "duration_ms": 118156,
432
+ "time_to_first_token_ms": {
433
+ "p50": 40.727,
434
+ "p60": 44.09,
435
+ "p70": 44.798,
436
+ "p80": 46.295,
437
+ "p90": 46.858,
438
+ "p95": 48.094,
439
+ "p99": 48.34,
440
+ "avg": 38.454
441
+ },
442
+ "inter_token_latency_ms": {
443
+ "p50": 12.566,
444
+ "p60": 12.629,
445
+ "p70": 12.67,
446
+ "p80": 12.718,
447
+ "p90": 12.802,
448
+ "p95": 12.951,
449
+ "p99": 13.08,
450
+ "avg": 12.404
451
+ },
452
+ "failed_requests": 0,
453
+ "successful_requests": 1411,
454
+ "request_rate": 11.941826684645358,
455
+ "total_tokens_sent": 272561,
456
+ "e2e_latency_ms": {
457
+ "p50": 2532.282,
458
+ "p60": 2547.776,
459
+ "p70": 2560.142,
460
+ "p80": 2570.947,
461
+ "p90": 2585.7,
462
+ "p95": 2612.572,
463
+ "p99": 2635.79,
464
+ "avg": 2297.575
465
+ }
466
+ },
467
+ {
468
+ "id": "[email protected]/s",
469
+ "executor_type": "ConstantArrivalRate",
470
+ "config": {
471
+ "max_vus": 128,
472
+ "duration_secs": 120,
473
+ "rate": 13.6
474
+ },
475
+ "total_requests": 1564,
476
+ "total_tokens": 272803,
477
+ "token_throughput_secs": 2293.5215462094866,
478
+ "duration_ms": 118945,
479
+ "time_to_first_token_ms": {
480
+ "p50": 44.852,
481
+ "p60": 46.319,
482
+ "p70": 46.872,
483
+ "p80": 48.169,
484
+ "p90": 49.674,
485
+ "p95": 51.154,
486
+ "p99": 53.485,
487
+ "avg": 42.864
488
+ },
489
+ "inter_token_latency_ms": {
490
+ "p50": 25.88,
491
+ "p60": 26.011,
492
+ "p70": 26.149,
493
+ "p80": 26.243,
494
+ "p90": 26.308,
495
+ "p95": 26.354,
496
+ "p99": 26.436,
497
+ "avg": 24.547
498
+ },
499
+ "failed_requests": 0,
500
+ "successful_requests": 1564,
501
+ "request_rate": 13.148930540615892,
502
+ "total_tokens_sent": 255933,
503
+ "e2e_latency_ms": {
504
+ "p50": 5177.262,
505
+ "p60": 5201.64,
506
+ "p70": 5238.214,
507
+ "p80": 5261.34,
508
+ "p90": 5275.17,
509
+ "p95": 5286.208,
510
+ "p99": 5302.579,
511
+ "avg": 4359.271
512
+ }
513
+ },
514
+ {
515
+ "id": "[email protected]/s",
516
+ "executor_type": "ConstantArrivalRate",
517
+ "config": {
518
+ "max_vus": 128,
519
+ "duration_secs": 120,
520
+ "rate": 15.2
521
+ },
522
+ "total_requests": 1759,
523
+ "total_tokens": 309348,
524
+ "token_throughput_secs": 2580.5086165701296,
525
+ "duration_ms": 119878,
526
+ "time_to_first_token_ms": {
527
+ "p50": 45.707,
528
+ "p60": 47.05,
529
+ "p70": 47.656,
530
+ "p80": 48.721,
531
+ "p90": 51.476,
532
+ "p95": 53.138,
533
+ "p99": 55.565,
534
+ "avg": 44.101
535
+ },
536
+ "inter_token_latency_ms": {
537
+ "p50": 26.733,
538
+ "p60": 26.968,
539
+ "p70": 27.102,
540
+ "p80": 27.19,
541
+ "p90": 27.265,
542
+ "p95": 27.368,
543
+ "p99": 27.441,
544
+ "avg": 26.359
545
+ },
546
+ "failed_requests": 0,
547
+ "successful_requests": 1759,
548
+ "request_rate": 14.673166325778274,
549
+ "total_tokens_sent": 270134,
550
+ "e2e_latency_ms": {
551
+ "p50": 5326.968,
552
+ "p60": 5382.346,
553
+ "p70": 5428.823,
554
+ "p80": 5452.294,
555
+ "p90": 5469.218,
556
+ "p95": 5483.97,
557
+ "p99": 5502.975,
558
+ "avg": 4685.437
559
+ }
560
+ },
561
+ {
562
+ "id": "[email protected]/s",
563
+ "executor_type": "ConstantArrivalRate",
564
+ "config": {
565
+ "max_vus": 128,
566
+ "duration_secs": 120,
567
+ "rate": 16.8
568
+ },
569
+ "total_requests": 1928,
570
+ "total_tokens": 330549,
571
+ "token_throughput_secs": 2759.3148813615194,
572
+ "duration_ms": 119793,
573
+ "time_to_first_token_ms": {
574
+ "p50": 46.472,
575
+ "p60": 46.857,
576
+ "p70": 48.168,
577
+ "p80": 50.368,
578
+ "p90": 53.407,
579
+ "p95": 55.802,
580
+ "p99": 72.801,
581
+ "avg": 45.931
582
+ },
583
+ "inter_token_latency_ms": {
584
+ "p50": 27.881,
585
+ "p60": 28.021,
586
+ "p70": 28.093,
587
+ "p80": 28.16,
588
+ "p90": 28.336,
589
+ "p95": 28.602,
590
+ "p99": 29.638,
591
+ "avg": 27.078
592
+ },
593
+ "failed_requests": 0,
594
+ "successful_requests": 1928,
595
+ "request_rate": 16.094313070876055,
596
+ "total_tokens_sent": 320581,
597
+ "e2e_latency_ms": {
598
+ "p50": 5553.009,
599
+ "p60": 5600.423,
600
+ "p70": 5629.869,
601
+ "p80": 5644.616,
602
+ "p90": 5671.609,
603
+ "p95": 5710.139,
604
+ "p99": 5944.138,
605
+ "avg": 4721.266
606
+ }
607
+ },
608
+ {
609
+ "id": "[email protected]/s",
610
+ "executor_type": "ConstantArrivalRate",
611
+ "config": {
612
+ "max_vus": 128,
613
+ "duration_secs": 120,
614
+ "rate": 18.4
615
+ },
616
+ "total_requests": 2118,
617
+ "total_tokens": 357373,
618
+ "token_throughput_secs": 2982.4464814578682,
619
+ "duration_ms": 119825,
620
+ "time_to_first_token_ms": {
621
+ "p50": 59.209,
622
+ "p60": 64.399,
623
+ "p70": 69.696,
624
+ "p80": 75.067,
625
+ "p90": 80.705,
626
+ "p95": 84.049,
627
+ "p99": 88.502,
628
+ "avg": 59.862
629
+ },
630
+ "inter_token_latency_ms": {
631
+ "p50": 28.563,
632
+ "p60": 28.749,
633
+ "p70": 28.916,
634
+ "p80": 29.057,
635
+ "p90": 29.191,
636
+ "p95": 29.317,
637
+ "p99": 30.202,
638
+ "avg": 28.34
639
+ },
640
+ "failed_requests": 0,
641
+ "successful_requests": 2118,
642
+ "request_rate": 17.67571038586509,
643
+ "total_tokens_sent": 398184,
644
+ "e2e_latency_ms": {
645
+ "p50": 5708.277,
646
+ "p60": 5743.555,
647
+ "p70": 5787.666,
648
+ "p80": 5828.147,
649
+ "p90": 5862.892,
650
+ "p95": 5894.271,
651
+ "p99": 6066.881,
652
+ "avg": 4844.433
653
+ }
654
+ },
655
+ {
656
+ "id": "[email protected]/s",
657
+ "executor_type": "ConstantArrivalRate",
658
+ "config": {
659
+ "max_vus": 128,
660
+ "duration_secs": 120,
661
+ "rate": 20.0
662
+ },
663
+ "total_requests": 2290,
664
+ "total_tokens": 386940,
665
+ "token_throughput_secs": 3227.511698507537,
666
+ "duration_ms": 119888,
667
+ "time_to_first_token_ms": {
668
+ "p50": 58.122,
669
+ "p60": 63.978,
670
+ "p70": 70.049,
671
+ "p80": 76.219,
672
+ "p90": 82.824,
673
+ "p95": 86.111,
674
+ "p99": 90.923,
675
+ "avg": 59.703
676
+ },
677
+ "inter_token_latency_ms": {
678
+ "p50": 29.32,
679
+ "p60": 29.492,
680
+ "p70": 29.895,
681
+ "p80": 30.4,
682
+ "p90": 30.546,
683
+ "p95": 31.209,
684
+ "p99": 31.92,
685
+ "avg": 28.595
686
+ },
687
+ "failed_requests": 0,
688
+ "successful_requests": 2290,
689
+ "request_rate": 19.10115725844384,
690
+ "total_tokens_sent": 346259,
691
+ "e2e_latency_ms": {
692
+ "p50": 5821.093,
693
+ "p60": 5905.813,
694
+ "p70": 5981.542,
695
+ "p80": 6088.576,
696
+ "p90": 6138.484,
697
+ "p95": 6172.066,
698
+ "p99": 6421.6,
699
+ "avg": 4983.079
700
+ }
701
+ },
702
+ {
703
+ "id": "[email protected]/s",
704
+ "executor_type": "ConstantArrivalRate",
705
+ "config": {
706
+ "max_vus": 128,
707
+ "duration_secs": 120,
708
+ "rate": 21.6
709
+ },
710
+ "total_requests": 2445,
711
+ "total_tokens": 438323,
712
+ "token_throughput_secs": 3667.2404231554137,
713
+ "duration_ms": 119523,
714
+ "time_to_first_token_ms": {
715
+ "p50": 60.891,
716
+ "p60": 67.221,
717
+ "p70": 73.352,
718
+ "p80": 80.009,
719
+ "p90": 86.246,
720
+ "p95": 89.966,
721
+ "p99": 95.937,
722
+ "avg": 62.545
723
+ },
724
+ "inter_token_latency_ms": {
725
+ "p50": 31.059,
726
+ "p60": 31.174,
727
+ "p70": 31.267,
728
+ "p80": 31.363,
729
+ "p90": 31.564,
730
+ "p95": 31.98,
731
+ "p99": 32.368,
732
+ "avg": 30.585
733
+ },
734
+ "failed_requests": 0,
735
+ "successful_requests": 2445,
736
+ "request_rate": 20.456154102374246,
737
+ "total_tokens_sent": 375764,
738
+ "e2e_latency_ms": {
739
+ "p50": 6210.518,
740
+ "p60": 6251.837,
741
+ "p70": 6276.884,
742
+ "p80": 6301.657,
743
+ "p90": 6339.383,
744
+ "p95": 6390.942,
745
+ "p99": 6504.767,
746
+ "avg": 5545.311
747
+ }
748
+ },
749
+ {
750
+ "id": "[email protected]/s",
751
+ "executor_type": "ConstantArrivalRate",
752
+ "config": {
753
+ "max_vus": 128,
754
+ "duration_secs": 120,
755
+ "rate": 23.2
756
+ },
757
+ "total_requests": 2557,
758
+ "total_tokens": 451146,
759
+ "token_throughput_secs": 3774.5098883710634,
760
+ "duration_ms": 119524,
761
+ "time_to_first_token_ms": {
762
+ "p50": 61.047,
763
+ "p60": 67.493,
764
+ "p70": 74.009,
765
+ "p80": 80.579,
766
+ "p90": 87.143,
767
+ "p95": 90.788,
768
+ "p99": 95.729,
769
+ "avg": 63.122
770
+ },
771
+ "inter_token_latency_ms": {
772
+ "p50": 31.013,
773
+ "p60": 31.094,
774
+ "p70": 31.187,
775
+ "p80": 31.29,
776
+ "p90": 31.621,
777
+ "p95": 32.321,
778
+ "p99": 32.547,
779
+ "avg": 30.814
780
+ },
781
+ "failed_requests": 0,
782
+ "successful_requests": 2557,
783
+ "request_rate": 21.393122812935964,
784
+ "total_tokens_sent": 397220,
785
+ "e2e_latency_ms": {
786
+ "p50": 6207.811,
787
+ "p60": 6232.278,
788
+ "p70": 6255.491,
789
+ "p80": 6280.109,
790
+ "p90": 6327.14,
791
+ "p95": 6467.71,
792
+ "p99": 6530.251,
793
+ "avg": 5488.01
794
+ }
795
+ },
796
+ {
797
+ "id": "[email protected]/s",
798
+ "executor_type": "ConstantArrivalRate",
799
+ "config": {
800
+ "max_vus": 128,
801
+ "duration_secs": 120,
802
+ "rate": 24.0
803
+ },
804
+ "total_requests": 2540,
805
+ "total_tokens": 454462,
806
+ "token_throughput_secs": 3793.8392202012997,
807
+ "duration_ms": 119789,
808
+ "time_to_first_token_ms": {
809
+ "p50": 61.504,
810
+ "p60": 67.687,
811
+ "p70": 74.328,
812
+ "p80": 81.247,
813
+ "p90": 87.806,
814
+ "p95": 91.29,
815
+ "p99": 96.912,
816
+ "avg": 63.516
817
+ },
818
+ "inter_token_latency_ms": {
819
+ "p50": 31.192,
820
+ "p60": 31.251,
821
+ "p70": 31.333,
822
+ "p80": 31.475,
823
+ "p90": 31.907,
824
+ "p95": 32.12,
825
+ "p99": 32.242,
826
+ "avg": 30.921
827
+ },
828
+ "failed_requests": 0,
829
+ "successful_requests": 2540,
830
+ "request_rate": 21.203866592391226,
831
+ "total_tokens_sent": 455432,
832
+ "e2e_latency_ms": {
833
+ "p50": 6257.817,
834
+ "p60": 6275.79,
835
+ "p70": 6293.476,
836
+ "p80": 6317.243,
837
+ "p90": 6372.71,
838
+ "p95": 6449.819,
839
+ "p99": 6483.148,
840
+ "avg": 5596.165
841
+ }
842
+ }
843
+ ],
844
+ "start_time": "2025-04-02T07:17:06.471305459+00:00",
845
+ "end_time": "2025-04-02T07:50:23.157564514+00:00",
846
+ "system": {
847
+ "cpu": [
848
+ "AMD EPYC 7R13 Processor cpu0@2649MHz",
849
+ "AMD EPYC 7R13 Processor cpu1@2516MHz",
850
+ "AMD EPYC 7R13 Processor cpu2@2516MHz",
851
+ "AMD EPYC 7R13 Processor cpu3@2649MHz",
852
+ "AMD EPYC 7R13 Processor cpu4@2516MHz",
853
+ "AMD EPYC 7R13 Processor cpu5@2516MHz",
854
+ "AMD EPYC 7R13 Processor cpu6@2516MHz",
855
+ "AMD EPYC 7R13 Processor cpu7@2649MHz",
856
+ "AMD EPYC 7R13 Processor cpu8@2649MHz",
857
+ "AMD EPYC 7R13 Processor cpu9@2649MHz",
858
+ "AMD EPYC 7R13 Processor cpu10@2649MHz",
859
+ "AMD EPYC 7R13 Processor cpu11@2649MHz",
860
+ "AMD EPYC 7R13 Processor cpu12@2516MHz",
861
+ "AMD EPYC 7R13 Processor cpu13@2881MHz",
862
+ "AMD EPYC 7R13 Processor cpu14@2516MHz",
863
+ "AMD EPYC 7R13 Processor cpu15@2649MHz",
864
+ "AMD EPYC 7R13 Processor cpu16@2649MHz",
865
+ "AMD EPYC 7R13 Processor cpu17@2649MHz",
866
+ "AMD EPYC 7R13 Processor cpu18@2516MHz",
867
+ "AMD EPYC 7R13 Processor cpu19@2516MHz",
868
+ "AMD EPYC 7R13 Processor cpu20@2516MHz",
869
+ "AMD EPYC 7R13 Processor cpu21@2516MHz",
870
+ "AMD EPYC 7R13 Processor cpu22@3083MHz",
871
+ "AMD EPYC 7R13 Processor cpu23@2649MHz",
872
+ "AMD EPYC 7R13 Processor cpu24@2516MHz",
873
+ "AMD EPYC 7R13 Processor cpu25@2516MHz",
874
+ "AMD EPYC 7R13 Processor cpu26@2516MHz",
875
+ "AMD EPYC 7R13 Processor cpu27@2516MHz",
876
+ "AMD EPYC 7R13 Processor cpu28@2649MHz",
877
+ "AMD EPYC 7R13 Processor cpu29@2516MHz",
878
+ "AMD EPYC 7R13 Processor cpu30@2516MHz",
879
+ "AMD EPYC 7R13 Processor cpu31@2516MHz",
880
+ "AMD EPYC 7R13 Processor cpu32@2516MHz",
881
+ "AMD EPYC 7R13 Processor cpu33@2516MHz",
882
+ "AMD EPYC 7R13 Processor cpu34@2649MHz",
883
+ "AMD EPYC 7R13 Processor cpu35@2649MHz",
884
+ "AMD EPYC 7R13 Processor cpu36@2516MHz",
885
+ "AMD EPYC 7R13 Processor cpu37@2516MHz",
886
+ "AMD EPYC 7R13 Processor cpu38@2516MHz",
887
+ "AMD EPYC 7R13 Processor cpu39@2649MHz",
888
+ "AMD EPYC 7R13 Processor cpu40@2649MHz",
889
+ "AMD EPYC 7R13 Processor cpu41@2649MHz",
890
+ "AMD EPYC 7R13 Processor cpu42@2516MHz",
891
+ "AMD EPYC 7R13 Processor cpu43@2516MHz",
892
+ "AMD EPYC 7R13 Processor cpu44@2881MHz",
893
+ "AMD EPYC 7R13 Processor cpu45@2516MHz",
894
+ "AMD EPYC 7R13 Processor cpu46@2516MHz",
895
+ "AMD EPYC 7R13 Processor cpu47@2649MHz",
896
+ "AMD EPYC 7R13 Processor cpu48@2516MHz",
897
+ "AMD EPYC 7R13 Processor cpu49@2516MHz",
898
+ "AMD EPYC 7R13 Processor cpu50@2516MHz",
899
+ "AMD EPYC 7R13 Processor cpu51@2649MHz",
900
+ "AMD EPYC 7R13 Processor cpu52@2649MHz",
901
+ "AMD EPYC 7R13 Processor cpu53@2649MHz",
902
+ "AMD EPYC 7R13 Processor cpu54@2516MHz",
903
+ "AMD EPYC 7R13 Processor cpu55@2516MHz",
904
+ "AMD EPYC 7R13 Processor cpu56@2516MHz",
905
+ "AMD EPYC 7R13 Processor cpu57@2516MHz",
906
+ "AMD EPYC 7R13 Processor cpu58@2516MHz",
907
+ "AMD EPYC 7R13 Processor cpu59@2516MHz",
908
+ "AMD EPYC 7R13 Processor cpu60@2516MHz",
909
+ "AMD EPYC 7R13 Processor cpu61@2516MHz",
910
+ "AMD EPYC 7R13 Processor cpu62@3007MHz",
911
+ "AMD EPYC 7R13 Processor cpu63@2649MHz",
912
+ "AMD EPYC 7R13 Processor cpu64@2649MHz",
913
+ "AMD EPYC 7R13 Processor cpu65@2649MHz",
914
+ "AMD EPYC 7R13 Processor cpu66@2649MHz",
915
+ "AMD EPYC 7R13 Processor cpu67@2649MHz",
916
+ "AMD EPYC 7R13 Processor cpu68@2649MHz",
917
+ "AMD EPYC 7R13 Processor cpu69@2649MHz",
918
+ "AMD EPYC 7R13 Processor cpu70@2649MHz",
919
+ "AMD EPYC 7R13 Processor cpu71@2649MHz",
920
+ "AMD EPYC 7R13 Processor cpu72@2649MHz",
921
+ "AMD EPYC 7R13 Processor cpu73@2649MHz",
922
+ "AMD EPYC 7R13 Processor cpu74@2649MHz",
923
+ "AMD EPYC 7R13 Processor cpu75@2649MHz",
924
+ "AMD EPYC 7R13 Processor cpu76@2649MHz",
925
+ "AMD EPYC 7R13 Processor cpu77@2649MHz",
926
+ "AMD EPYC 7R13 Processor cpu78@2516MHz",
927
+ "AMD EPYC 7R13 Processor cpu79@2516MHz",
928
+ "AMD EPYC 7R13 Processor cpu80@2516MHz",
929
+ "AMD EPYC 7R13 Processor cpu81@2649MHz",
930
+ "AMD EPYC 7R13 Processor cpu82@2649MHz",
931
+ "AMD EPYC 7R13 Processor cpu83@2649MHz",
932
+ "AMD EPYC 7R13 Processor cpu84@2516MHz",
933
+ "AMD EPYC 7R13 Processor cpu85@2516MHz",
934
+ "AMD EPYC 7R13 Processor cpu86@2649MHz",
935
+ "AMD EPYC 7R13 Processor cpu87@2649MHz",
936
+ "AMD EPYC 7R13 Processor cpu88@2649MHz",
937
+ "AMD EPYC 7R13 Processor cpu89@2649MHz",
938
+ "AMD EPYC 7R13 Processor cpu90@2649MHz",
939
+ "AMD EPYC 7R13 Processor cpu91@2649MHz",
940
+ "AMD EPYC 7R13 Processor cpu92@2649MHz",
941
+ "AMD EPYC 7R13 Processor cpu93@2649MHz",
942
+ "AMD EPYC 7R13 Processor cpu94@2649MHz",
943
+ "AMD EPYC 7R13 Processor cpu95@2649MHz"
944
+ ],
945
+ "memory": "1999.99 GB",
946
+ "os_name": "Debian GNU/Linux",
947
+ "os_version": "11",
948
+ "kernel": "5.15.0-1048-aws",
949
+ "hostname": "ip-26-0-161-153"
950
+ }
951
+ }
results/meta-llama_Llama-3_1-8B-Instruct_2025-04-02-08-06-23.json ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "max_vus": 128,
4
+ "duration_secs": 120,
5
+ "benchmark_kind": "Rate",
6
+ "warmup_duration_secs": 30,
7
+ "rates": [
8
+ 0.8,
9
+ 2.4,
10
+ 4.0,
11
+ 5.6,
12
+ 7.2,
13
+ 8.8,
14
+ 10.4,
15
+ 12.0,
16
+ 13.6,
17
+ 15.2,
18
+ 16.8,
19
+ 18.4,
20
+ 20.0,
21
+ 21.6,
22
+ 23.2,
23
+ 24.0
24
+ ],
25
+ "num_rates": 10,
26
+ "prompt_options": null,
27
+ "decode_options": {
28
+ "num_tokens": 200,
29
+ "min_tokens": 200,
30
+ "max_tokens": 200,
31
+ "variance": 100
32
+ },
33
+ "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
34
+ "model_name": "meta-llama/Llama-3.1-8B-Instruct",
35
+ "profile": null,
36
+ "meta": {
37
+ "tp": "1",
38
+ "version": "d2ed52f531cf8098ca62375248e007022eaadc65",
39
+ "engine": "TGI"
40
+ },
41
+ "run_id": "TGI-2.4.1"
42
+ },
43
+ "results": [
44
+ {
45
+ "id": "warmup",
46
+ "executor_type": "ConstantVUs",
47
+ "config": {
48
+ "max_vus": 1,
49
+ "duration_secs": 30,
50
+ "rate": null
51
+ },
52
+ "total_requests": 21,
53
+ "total_tokens": 3526,
54
+ "token_throughput_secs": 122.88666224584397,
55
+ "duration_ms": 28693,
56
+ "time_to_first_token_ms": {
57
+ "p50": 46.249,
58
+ "p60": 46.647,
59
+ "p70": 47.564,
60
+ "p80": 48.066,
61
+ "p90": 48.771,
62
+ "p95": 84.208,
63
+ "p99": 86.256,
64
+ "avg": 49.712
65
+ },
66
+ "inter_token_latency_ms": {
67
+ "p50": 7.881,
68
+ "p60": 7.884,
69
+ "p70": 7.887,
70
+ "p80": 7.912,
71
+ "p90": 7.984,
72
+ "p95": 7.986,
73
+ "p99": 7.999,
74
+ "avg": 7.821
75
+ },
76
+ "failed_requests": 0,
77
+ "successful_requests": 21,
78
+ "request_rate": 0.7318831273859113,
79
+ "total_tokens_sent": 1337,
80
+ "e2e_latency_ms": {
81
+ "p50": 1614.476,
82
+ "p60": 1615.106,
83
+ "p70": 1615.452,
84
+ "p80": 1619.496,
85
+ "p90": 1634.138,
86
+ "p95": 1637.031,
87
+ "p99": 1668.807,
88
+ "avg": 1366.18
89
+ }
90
+ },
91
+ {
92
+ "id": "[email protected]/s",
93
+ "executor_type": "ConstantArrivalRate",
94
+ "config": {
95
+ "max_vus": 128,
96
+ "duration_secs": 120,
97
+ "rate": 0.8
98
+ },
99
+ "total_requests": 94,
100
+ "total_tokens": 17800,
101
+ "token_throughput_secs": 150.94862460675677,
102
+ "duration_ms": 117920,
103
+ "time_to_first_token_ms": {
104
+ "p50": 24.484,
105
+ "p60": 25.247,
106
+ "p70": 26.255,
107
+ "p80": 27.172,
108
+ "p90": 28.51,
109
+ "p95": 29.611,
110
+ "p99": 30.397,
111
+ "avg": 24.771
112
+ },
113
+ "inter_token_latency_ms": {
114
+ "p50": 8.272,
115
+ "p60": 8.28,
116
+ "p70": 8.286,
117
+ "p80": 8.292,
118
+ "p90": 8.326,
119
+ "p95": 8.376,
120
+ "p99": 8.475,
121
+ "avg": 8.281
122
+ },
123
+ "failed_requests": 0,
124
+ "successful_requests": 94,
125
+ "request_rate": 0.7971444220806255,
126
+ "total_tokens_sent": 11826,
127
+ "e2e_latency_ms": {
128
+ "p50": 1670.039,
129
+ "p60": 1671.341,
130
+ "p70": 1673.034,
131
+ "p80": 1675.422,
132
+ "p90": 1677.624,
133
+ "p95": 1684.827,
134
+ "p99": 1704.448,
135
+ "avg": 1584.628
136
+ }
137
+ },
138
+ {
139
+ "id": "[email protected]/s",
140
+ "executor_type": "ConstantArrivalRate",
141
+ "config": {
142
+ "max_vus": 128,
143
+ "duration_secs": 120,
144
+ "rate": 2.4
145
+ },
146
+ "total_requests": 284,
147
+ "total_tokens": 48304,
148
+ "token_throughput_secs": 406.254834027723,
149
+ "duration_ms": 118900,
150
+ "time_to_first_token_ms": {
151
+ "p50": 25.492,
152
+ "p60": 26.266,
153
+ "p70": 27.157,
154
+ "p80": 28.116,
155
+ "p90": 29.53,
156
+ "p95": 30.532,
157
+ "p99": 46.87,
158
+ "avg": 26.066
159
+ },
160
+ "inter_token_latency_ms": {
161
+ "p50": 8.977,
162
+ "p60": 9.004,
163
+ "p70": 9.027,
164
+ "p80": 9.052,
165
+ "p90": 9.082,
166
+ "p95": 9.114,
167
+ "p99": 9.179,
168
+ "avg": 8.716
169
+ },
170
+ "failed_requests": 0,
171
+ "successful_requests": 284,
172
+ "request_rate": 2.3885469705174174,
173
+ "total_tokens_sent": 43082,
174
+ "e2e_latency_ms": {
175
+ "p50": 1809.698,
176
+ "p60": 1814.702,
177
+ "p70": 1820.864,
178
+ "p80": 1827.06,
179
+ "p90": 1833.784,
180
+ "p95": 1838.2,
181
+ "p99": 1849.697,
182
+ "avg": 1543.709
183
+ }
184
+ },
185
+ {
186
+ "id": "[email protected]/s",
187
+ "executor_type": "ConstantArrivalRate",
188
+ "config": {
189
+ "max_vus": 128,
190
+ "duration_secs": 120,
191
+ "rate": 4.0
192
+ },
193
+ "total_requests": 472,
194
+ "total_tokens": 83257,
195
+ "token_throughput_secs": 697.279766847058,
196
+ "duration_ms": 119402,
197
+ "time_to_first_token_ms": {
198
+ "p50": 25.684,
199
+ "p60": 26.779,
200
+ "p70": 27.553,
201
+ "p80": 28.486,
202
+ "p90": 29.463,
203
+ "p95": 30.455,
204
+ "p99": 45.113,
205
+ "avg": 26.146
206
+ },
207
+ "inter_token_latency_ms": {
208
+ "p50": 9.418,
209
+ "p60": 9.445,
210
+ "p70": 9.461,
211
+ "p80": 9.49,
212
+ "p90": 9.526,
213
+ "p95": 9.532,
214
+ "p99": 9.547,
215
+ "avg": 9.371
216
+ },
217
+ "failed_requests": 0,
218
+ "successful_requests": 472,
219
+ "request_rate": 3.953013559842552,
220
+ "total_tokens_sent": 53690,
221
+ "e2e_latency_ms": {
222
+ "p50": 1898.831,
223
+ "p60": 1904.467,
224
+ "p70": 1907.994,
225
+ "p80": 1913.596,
226
+ "p90": 1920.946,
227
+ "p95": 1923.942,
228
+ "p99": 1928.04,
229
+ "avg": 1679.7
230
+ }
231
+ },
232
+ {
233
+ "id": "[email protected]/s",
234
+ "executor_type": "ConstantArrivalRate",
235
+ "config": {
236
+ "max_vus": 128,
237
+ "duration_secs": 120,
238
+ "rate": 5.6
239
+ },
240
+ "total_requests": 662,
241
+ "total_tokens": 118380,
242
+ "token_throughput_secs": 989.0969943504368,
243
+ "duration_ms": 119684,
244
+ "time_to_first_token_ms": {
245
+ "p50": 26.614,
246
+ "p60": 27.599,
247
+ "p70": 28.411,
248
+ "p80": 29.519,
249
+ "p90": 40.521,
250
+ "p95": 45.386,
251
+ "p99": 47.427,
252
+ "avg": 27.98
253
+ },
254
+ "inter_token_latency_ms": {
255
+ "p50": 9.948,
256
+ "p60": 9.96,
257
+ "p70": 9.969,
258
+ "p80": 9.98,
259
+ "p90": 9.995,
260
+ "p95": 10.005,
261
+ "p99": 10.037,
262
+ "avg": 9.848
263
+ },
264
+ "failed_requests": 0,
265
+ "successful_requests": 662,
266
+ "request_rate": 5.531189476769633,
267
+ "total_tokens_sent": 111696,
268
+ "e2e_latency_ms": {
269
+ "p50": 2004.271,
270
+ "p60": 2007.295,
271
+ "p70": 2010.406,
272
+ "p80": 2012.985,
273
+ "p90": 2015.977,
274
+ "p95": 2017.766,
275
+ "p99": 2025.136,
276
+ "avg": 1791.085
277
+ }
278
+ },
279
+ {
280
+ "id": "[email protected]/s",
281
+ "executor_type": "ConstantArrivalRate",
282
+ "config": {
283
+ "max_vus": 128,
284
+ "duration_secs": 120,
285
+ "rate": 7.2
286
+ },
287
+ "total_requests": 850,
288
+ "total_tokens": 150442,
289
+ "token_throughput_secs": 1255.1886063661461,
290
+ "duration_ms": 119856,
291
+ "time_to_first_token_ms": {
292
+ "p50": 42.393,
293
+ "p60": 44.487,
294
+ "p70": 45.999,
295
+ "p80": 46.482,
296
+ "p90": 47.894,
297
+ "p95": 48.207,
298
+ "p99": 48.37,
299
+ "avg": 37.807
300
+ },
301
+ "inter_token_latency_ms": {
302
+ "p50": 10.186,
303
+ "p60": 10.207,
304
+ "p70": 10.229,
305
+ "p80": 10.259,
306
+ "p90": 10.316,
307
+ "p95": 10.336,
308
+ "p99": 10.359,
309
+ "avg": 9.933
310
+ },
311
+ "failed_requests": 0,
312
+ "successful_requests": 850,
313
+ "request_rate": 7.091838152984034,
314
+ "total_tokens_sent": 168386,
315
+ "e2e_latency_ms": {
316
+ "p50": 2065.737,
317
+ "p60": 2070.244,
318
+ "p70": 2074.586,
319
+ "p80": 2078.847,
320
+ "p90": 2083.084,
321
+ "p95": 2085.846,
322
+ "p99": 2094.979,
323
+ "avg": 1832.984
324
+ }
325
+ },
326
+ {
327
+ "id": "[email protected]/s",
328
+ "executor_type": "ConstantArrivalRate",
329
+ "config": {
330
+ "max_vus": 128,
331
+ "duration_secs": 120,
332
+ "rate": 8.8
333
+ },
334
+ "total_requests": 1044,
335
+ "total_tokens": 189381,
336
+ "token_throughput_secs": 1581.3245632308367,
337
+ "duration_ms": 119760,
338
+ "time_to_first_token_ms": {
339
+ "p50": 42.904,
340
+ "p60": 44.702,
341
+ "p70": 45.143,
342
+ "p80": 45.649,
343
+ "p90": 47.202,
344
+ "p95": 47.429,
345
+ "p99": 47.647,
346
+ "avg": 38.138
347
+ },
348
+ "inter_token_latency_ms": {
349
+ "p50": 10.601,
350
+ "p60": 10.617,
351
+ "p70": 10.64,
352
+ "p80": 10.669,
353
+ "p90": 10.709,
354
+ "p95": 10.745,
355
+ "p99": 10.837,
356
+ "avg": 10.457
357
+ },
358
+ "failed_requests": 0,
359
+ "successful_requests": 1044,
360
+ "request_rate": 8.717362586600522,
361
+ "total_tokens_sent": 164348,
362
+ "e2e_latency_ms": {
363
+ "p50": 2149.601,
364
+ "p60": 2152.628,
365
+ "p70": 2155.129,
366
+ "p80": 2157.878,
367
+ "p90": 2161.992,
368
+ "p95": 2166.405,
369
+ "p99": 2185.247,
370
+ "avg": 1952.745
371
+ }
372
+ },
373
+ {
374
+ "id": "[email protected]/s",
375
+ "executor_type": "ConstantArrivalRate",
376
+ "config": {
377
+ "max_vus": 128,
378
+ "duration_secs": 120,
379
+ "rate": 10.4
380
+ },
381
+ "total_requests": 1227,
382
+ "total_tokens": 215756,
383
+ "token_throughput_secs": 1801.727543102545,
384
+ "duration_ms": 119749,
385
+ "time_to_first_token_ms": {
386
+ "p50": 28.996,
387
+ "p60": 29.94,
388
+ "p70": 31.219,
389
+ "p80": 33.038,
390
+ "p90": 43.255,
391
+ "p95": 45.051,
392
+ "p99": 47.322,
393
+ "avg": 30.686
394
+ },
395
+ "inter_token_latency_ms": {
396
+ "p50": 11.152,
397
+ "p60": 11.172,
398
+ "p70": 11.192,
399
+ "p80": 11.214,
400
+ "p90": 11.247,
401
+ "p95": 11.271,
402
+ "p99": 11.413,
403
+ "avg": 11.041
404
+ },
405
+ "failed_requests": 0,
406
+ "successful_requests": 1227,
407
+ "request_rate": 10.246388028081828,
408
+ "total_tokens_sent": 216589,
409
+ "e2e_latency_ms": {
410
+ "p50": 2246.941,
411
+ "p60": 2250.773,
412
+ "p70": 2254.922,
413
+ "p80": 2260.186,
414
+ "p90": 2267.355,
415
+ "p95": 2272.509,
416
+ "p99": 2299.613,
417
+ "avg": 1980.137
418
+ }
419
+ },
420
+ {
421
+ "id": "[email protected]/s",
422
+ "executor_type": "ConstantArrivalRate",
423
+ "config": {
424
+ "max_vus": 128,
425
+ "duration_secs": 120,
426
+ "rate": 12.0
427
+ },
428
+ "total_requests": 1413,
429
+ "total_tokens": 257715,
430
+ "token_throughput_secs": 2152.3457887331883,
431
+ "duration_ms": 119736,
432
+ "time_to_first_token_ms": {
433
+ "p50": 32.129,
434
+ "p60": 41.129,
435
+ "p70": 42.905,
436
+ "p80": 43.618,
437
+ "p90": 45.582,
438
+ "p95": 47.077,
439
+ "p99": 47.609,
440
+ "avg": 34.989
441
+ },
442
+ "inter_token_latency_ms": {
443
+ "p50": 11.524,
444
+ "p60": 11.571,
445
+ "p70": 11.619,
446
+ "p80": 11.681,
447
+ "p90": 11.737,
448
+ "p95": 11.759,
449
+ "p99": 11.825,
450
+ "avg": 11.473
451
+ },
452
+ "failed_requests": 0,
453
+ "successful_requests": 1413,
454
+ "request_rate": 11.800883144093262,
455
+ "total_tokens_sent": 224022,
456
+ "e2e_latency_ms": {
457
+ "p50": 2327.912,
458
+ "p60": 2333.025,
459
+ "p70": 2339.512,
460
+ "p80": 2355.095,
461
+ "p90": 2364.745,
462
+ "p95": 2369.454,
463
+ "p99": 2381.415,
464
+ "avg": 2131.083
465
+ }
466
+ },
467
+ {
468
+ "id": "[email protected]/s",
469
+ "executor_type": "ConstantArrivalRate",
470
+ "config": {
471
+ "max_vus": 128,
472
+ "duration_secs": 120,
473
+ "rate": 13.6
474
+ },
475
+ "total_requests": 1586,
476
+ "total_tokens": 273066,
477
+ "token_throughput_secs": 2286.726108001893,
478
+ "duration_ms": 119413,
479
+ "time_to_first_token_ms": {
480
+ "p50": 41.765,
481
+ "p60": 43.496,
482
+ "p70": 45.121,
483
+ "p80": 45.647,
484
+ "p90": 47.119,
485
+ "p95": 47.381,
486
+ "p99": 47.644,
487
+ "avg": 39.252
488
+ },
489
+ "inter_token_latency_ms": {
490
+ "p50": 21.018,
491
+ "p60": 21.058,
492
+ "p70": 21.094,
493
+ "p80": 21.136,
494
+ "p90": 21.204,
495
+ "p95": 21.361,
496
+ "p99": 21.474,
497
+ "avg": 20.088
498
+ },
499
+ "failed_requests": 0,
500
+ "successful_requests": 1586,
501
+ "request_rate": 13.281578839148787,
502
+ "total_tokens_sent": 263160,
503
+ "e2e_latency_ms": {
504
+ "p50": 4215.628,
505
+ "p60": 4224.579,
506
+ "p70": 4233.564,
507
+ "p80": 4241.334,
508
+ "p90": 4253.157,
509
+ "p95": 4281.605,
510
+ "p99": 4307.594,
511
+ "avg": 3499.01
512
+ }
513
+ },
514
+ {
515
+ "id": "[email protected]/s",
516
+ "executor_type": "ConstantArrivalRate",
517
+ "config": {
518
+ "max_vus": 128,
519
+ "duration_secs": 120,
520
+ "rate": 15.2
521
+ },
522
+ "total_requests": 1767,
523
+ "total_tokens": 318782,
524
+ "token_throughput_secs": 2659.660245067388,
525
+ "duration_ms": 119858,
526
+ "time_to_first_token_ms": {
527
+ "p50": 41.941,
528
+ "p60": 42.561,
529
+ "p70": 43.982,
530
+ "p80": 44.599,
531
+ "p90": 46.402,
532
+ "p95": 47.889,
533
+ "p99": 48.314,
534
+ "avg": 38.969
535
+ },
536
+ "inter_token_latency_ms": {
537
+ "p50": 21.711,
538
+ "p60": 21.755,
539
+ "p70": 21.812,
540
+ "p80": 21.867,
541
+ "p90": 21.933,
542
+ "p95": 21.976,
543
+ "p99": 22.027,
544
+ "avg": 21.316
545
+ },
546
+ "failed_requests": 0,
547
+ "successful_requests": 1767,
548
+ "request_rate": 14.742424770012343,
549
+ "total_tokens_sent": 299995,
550
+ "e2e_latency_ms": {
551
+ "p50": 4354.219,
552
+ "p60": 4361.816,
553
+ "p70": 4377.337,
554
+ "p80": 4389.415,
555
+ "p90": 4402.615,
556
+ "p95": 4409.06,
557
+ "p99": 4416.879,
558
+ "avg": 3900.022
559
+ }
560
+ },
561
+ {
562
+ "id": "[email protected]/s",
563
+ "executor_type": "ConstantArrivalRate",
564
+ "config": {
565
+ "max_vus": 128,
566
+ "duration_secs": 120,
567
+ "rate": 16.8
568
+ },
569
+ "total_requests": 1952,
570
+ "total_tokens": 349899,
571
+ "token_throughput_secs": 2924.5946715959903,
572
+ "duration_ms": 119640,
573
+ "time_to_first_token_ms": {
574
+ "p50": 42.018,
575
+ "p60": 42.508,
576
+ "p70": 43.793,
577
+ "p80": 44.153,
578
+ "p90": 44.83,
579
+ "p95": 46.44,
580
+ "p99": 48.232,
581
+ "avg": 38.955
582
+ },
583
+ "inter_token_latency_ms": {
584
+ "p50": 22.202,
585
+ "p60": 22.257,
586
+ "p70": 22.348,
587
+ "p80": 22.464,
588
+ "p90": 22.555,
589
+ "p95": 22.618,
590
+ "p99": 22.716,
591
+ "avg": 21.964
592
+ },
593
+ "failed_requests": 0,
594
+ "successful_requests": 1952,
595
+ "request_rate": 16.315590495987053,
596
+ "total_tokens_sent": 308143,
597
+ "e2e_latency_ms": {
598
+ "p50": 4448.481,
599
+ "p60": 4462.093,
600
+ "p70": 4478.598,
601
+ "p80": 4507.043,
602
+ "p90": 4525.206,
603
+ "p95": 4538.11,
604
+ "p99": 4560.079,
605
+ "avg": 3971.144
606
+ }
607
+ },
608
+ {
609
+ "id": "[email protected]/s",
610
+ "executor_type": "ConstantArrivalRate",
611
+ "config": {
612
+ "max_vus": 128,
613
+ "duration_secs": 120,
614
+ "rate": 18.4
615
+ },
616
+ "total_requests": 2135,
617
+ "total_tokens": 377085,
618
+ "token_throughput_secs": 3157.5918029420154,
619
+ "duration_ms": 119421,
620
+ "time_to_first_token_ms": {
621
+ "p50": 42.037,
622
+ "p60": 42.627,
623
+ "p70": 43.838,
624
+ "p80": 44.183,
625
+ "p90": 45.567,
626
+ "p95": 46.603,
627
+ "p99": 48.266,
628
+ "avg": 39.44
629
+ },
630
+ "inter_token_latency_ms": {
631
+ "p50": 22.519,
632
+ "p60": 22.586,
633
+ "p70": 22.678,
634
+ "p80": 22.785,
635
+ "p90": 22.851,
636
+ "p95": 22.888,
637
+ "p99": 22.962,
638
+ "avg": 22.143
639
+ },
640
+ "failed_requests": 0,
641
+ "successful_requests": 2135,
642
+ "request_rate": 17.87782197457126,
643
+ "total_tokens_sent": 378767,
644
+ "e2e_latency_ms": {
645
+ "p50": 4511.199,
646
+ "p60": 4525.581,
647
+ "p70": 4542.862,
648
+ "p80": 4569.463,
649
+ "p90": 4584.328,
650
+ "p95": 4592.49,
651
+ "p99": 4605.219,
652
+ "avg": 3978.179
653
+ }
654
+ },
655
+ {
656
+ "id": "[email protected]/s",
657
+ "executor_type": "ConstantArrivalRate",
658
+ "config": {
659
+ "max_vus": 128,
660
+ "duration_secs": 120,
661
+ "rate": 20.0
662
+ },
663
+ "total_requests": 2314,
664
+ "total_tokens": 410851,
665
+ "token_throughput_secs": 3431.8999694521895,
666
+ "duration_ms": 119715,
667
+ "time_to_first_token_ms": {
668
+ "p50": 42.054,
669
+ "p60": 42.898,
670
+ "p70": 43.131,
671
+ "p80": 43.857,
672
+ "p90": 45.58,
673
+ "p95": 47.057,
674
+ "p99": 53.486,
675
+ "avg": 40.178
676
+ },
677
+ "inter_token_latency_ms": {
678
+ "p50": 23.178,
679
+ "p60": 23.241,
680
+ "p70": 23.306,
681
+ "p80": 23.362,
682
+ "p90": 23.489,
683
+ "p95": 23.649,
684
+ "p99": 24.696,
685
+ "avg": 22.962
686
+ },
687
+ "failed_requests": 0,
688
+ "successful_requests": 2314,
689
+ "request_rate": 19.329188755320946,
690
+ "total_tokens_sent": 428632,
691
+ "e2e_latency_ms": {
692
+ "p50": 4641.862,
693
+ "p60": 4658.169,
694
+ "p70": 4671.517,
695
+ "p80": 4685.088,
696
+ "p90": 4705.846,
697
+ "p95": 4725.362,
698
+ "p99": 4949.917,
699
+ "avg": 4118.614
700
+ }
701
+ },
702
+ {
703
+ "id": "[email protected]/s",
704
+ "executor_type": "ConstantArrivalRate",
705
+ "config": {
706
+ "max_vus": 128,
707
+ "duration_secs": 120,
708
+ "rate": 21.6
709
+ },
710
+ "total_requests": 2496,
711
+ "total_tokens": 425184,
712
+ "token_throughput_secs": 3546.9270370066156,
713
+ "duration_ms": 119873,
714
+ "time_to_first_token_ms": {
715
+ "p50": 47.532,
716
+ "p60": 51.096,
717
+ "p70": 55.311,
718
+ "p80": 59.955,
719
+ "p90": 64.818,
720
+ "p95": 68.566,
721
+ "p99": 73.174,
722
+ "avg": 49.832
723
+ },
724
+ "inter_token_latency_ms": {
725
+ "p50": 23.615,
726
+ "p60": 23.819,
727
+ "p70": 23.937,
728
+ "p80": 24.142,
729
+ "p90": 24.335,
730
+ "p95": 24.424,
731
+ "p99": 24.675,
732
+ "avg": 23.117
733
+ },
734
+ "failed_requests": 0,
735
+ "successful_requests": 2496,
736
+ "request_rate": 20.821879196697225,
737
+ "total_tokens_sent": 449445,
738
+ "e2e_latency_ms": {
739
+ "p50": 4705.977,
740
+ "p60": 4760.663,
741
+ "p70": 4802.56,
742
+ "p80": 4835.503,
743
+ "p90": 4887.331,
744
+ "p95": 4911.378,
745
+ "p99": 4950.871,
746
+ "avg": 4041.088
747
+ }
748
+ },
749
+ {
750
+ "id": "[email protected]/s",
751
+ "executor_type": "ConstantArrivalRate",
752
+ "config": {
753
+ "max_vus": 128,
754
+ "duration_secs": 120,
755
+ "rate": 23.2
756
+ },
757
+ "total_requests": 2693,
758
+ "total_tokens": 477598,
759
+ "token_throughput_secs": 3987.677624302409,
760
+ "duration_ms": 119768,
761
+ "time_to_first_token_ms": {
762
+ "p50": 48.917,
763
+ "p60": 53.875,
764
+ "p70": 58.427,
765
+ "p80": 63.212,
766
+ "p90": 68.231,
767
+ "p95": 71.137,
768
+ "p99": 75.227,
769
+ "avg": 51.515
770
+ },
771
+ "inter_token_latency_ms": {
772
+ "p50": 24.172,
773
+ "p60": 24.458,
774
+ "p70": 24.565,
775
+ "p80": 24.718,
776
+ "p90": 24.831,
777
+ "p95": 25.263,
778
+ "p99": 25.541,
779
+ "avg": 23.951
780
+ },
781
+ "failed_requests": 0,
782
+ "successful_requests": 2693,
783
+ "request_rate": 22.485051952157228,
784
+ "total_tokens_sent": 418148,
785
+ "e2e_latency_ms": {
786
+ "p50": 4820.55,
787
+ "p60": 4895.607,
788
+ "p70": 4932.562,
789
+ "p80": 4963.156,
790
+ "p90": 4990.976,
791
+ "p95": 5047.438,
792
+ "p99": 5133.803,
793
+ "avg": 4302.865
794
+ }
795
+ },
796
+ {
797
+ "id": "[email protected]/s",
798
+ "executor_type": "ConstantArrivalRate",
799
+ "config": {
800
+ "max_vus": 128,
801
+ "duration_secs": 120,
802
+ "rate": 24.0
803
+ },
804
+ "total_requests": 2770,
805
+ "total_tokens": 491476,
806
+ "token_throughput_secs": 4122.591533026405,
807
+ "duration_ms": 119215,
808
+ "time_to_first_token_ms": {
809
+ "p50": 49.685,
810
+ "p60": 54.302,
811
+ "p70": 59.464,
812
+ "p80": 64.151,
813
+ "p90": 69.885,
814
+ "p95": 72.497,
815
+ "p99": 75.82,
816
+ "avg": 51.673
817
+ },
818
+ "inter_token_latency_ms": {
819
+ "p50": 24.572,
820
+ "p60": 24.655,
821
+ "p70": 24.758,
822
+ "p80": 24.89,
823
+ "p90": 25.177,
824
+ "p95": 25.712,
825
+ "p99": 25.93,
826
+ "avg": 24.429
827
+ },
828
+ "failed_requests": 0,
829
+ "successful_requests": 2770,
830
+ "request_rate": 23.235272010196105,
831
+ "total_tokens_sent": 417041,
832
+ "e2e_latency_ms": {
833
+ "p50": 4917.332,
834
+ "p60": 4944.149,
835
+ "p70": 4968.491,
836
+ "p80": 4997.781,
837
+ "p90": 5053.09,
838
+ "p95": 5149.163,
839
+ "p99": 5204.539,
840
+ "avg": 4376.615
841
+ }
842
+ }
843
+ ],
844
+ "start_time": "2025-04-02T07:33:13.875090549+00:00",
845
+ "end_time": "2025-04-02T08:06:23.692065899+00:00",
846
+ "system": {
847
+ "cpu": [
848
+ "AMD EPYC 7R13 Processor cpu0@2363MHz",
849
+ "AMD EPYC 7R13 Processor cpu1@2649MHz",
850
+ "AMD EPYC 7R13 Processor cpu2@2673MHz",
851
+ "AMD EPYC 7R13 Processor cpu3@2673MHz",
852
+ "AMD EPYC 7R13 Processor cpu4@2649MHz",
853
+ "AMD EPYC 7R13 Processor cpu5@2673MHz",
854
+ "AMD EPYC 7R13 Processor cpu6@2673MHz",
855
+ "AMD EPYC 7R13 Processor cpu7@2649MHz",
856
+ "AMD EPYC 7R13 Processor cpu8@2649MHz",
857
+ "AMD EPYC 7R13 Processor cpu9@2673MHz",
858
+ "AMD EPYC 7R13 Processor cpu10@2673MHz",
859
+ "AMD EPYC 7R13 Processor cpu11@2673MHz",
860
+ "AMD EPYC 7R13 Processor cpu12@2649MHz",
861
+ "AMD EPYC 7R13 Processor cpu13@2673MHz",
862
+ "AMD EPYC 7R13 Processor cpu14@2673MHz",
863
+ "AMD EPYC 7R13 Processor cpu15@2673MHz",
864
+ "AMD EPYC 7R13 Processor cpu16@2673MHz",
865
+ "AMD EPYC 7R13 Processor cpu17@2673MHz",
866
+ "AMD EPYC 7R13 Processor cpu18@2673MHz",
867
+ "AMD EPYC 7R13 Processor cpu19@2649MHz",
868
+ "AMD EPYC 7R13 Processor cpu20@2649MHz",
869
+ "AMD EPYC 7R13 Processor cpu21@2649MHz",
870
+ "AMD EPYC 7R13 Processor cpu22@2649MHz",
871
+ "AMD EPYC 7R13 Processor cpu23@2673MHz",
872
+ "AMD EPYC 7R13 Processor cpu24@2649MHz",
873
+ "AMD EPYC 7R13 Processor cpu25@2649MHz",
874
+ "AMD EPYC 7R13 Processor cpu26@2649MHz",
875
+ "AMD EPYC 7R13 Processor cpu27@2649MHz",
876
+ "AMD EPYC 7R13 Processor cpu28@2649MHz",
877
+ "AMD EPYC 7R13 Processor cpu29@2649MHz",
878
+ "AMD EPYC 7R13 Processor cpu30@2673MHz",
879
+ "AMD EPYC 7R13 Processor cpu31@3391MHz",
880
+ "AMD EPYC 7R13 Processor cpu32@2649MHz",
881
+ "AMD EPYC 7R13 Processor cpu33@2649MHz",
882
+ "AMD EPYC 7R13 Processor cpu34@2649MHz",
883
+ "AMD EPYC 7R13 Processor cpu35@2649MHz",
884
+ "AMD EPYC 7R13 Processor cpu36@2649MHz",
885
+ "AMD EPYC 7R13 Processor cpu37@2649MHz",
886
+ "AMD EPYC 7R13 Processor cpu38@2649MHz",
887
+ "AMD EPYC 7R13 Processor cpu39@2673MHz",
888
+ "AMD EPYC 7R13 Processor cpu40@2649MHz",
889
+ "AMD EPYC 7R13 Processor cpu41@2673MHz",
890
+ "AMD EPYC 7R13 Processor cpu42@2673MHz",
891
+ "AMD EPYC 7R13 Processor cpu43@2673MHz",
892
+ "AMD EPYC 7R13 Processor cpu44@2673MHz",
893
+ "AMD EPYC 7R13 Processor cpu45@2673MHz",
894
+ "AMD EPYC 7R13 Processor cpu46@2673MHz",
895
+ "AMD EPYC 7R13 Processor cpu47@2649MHz",
896
+ "AMD EPYC 7R13 Processor cpu48@2649MHz",
897
+ "AMD EPYC 7R13 Processor cpu49@2649MHz",
898
+ "AMD EPYC 7R13 Processor cpu50@2649MHz",
899
+ "AMD EPYC 7R13 Processor cpu51@2649MHz",
900
+ "AMD EPYC 7R13 Processor cpu52@2649MHz",
901
+ "AMD EPYC 7R13 Processor cpu53@2649MHz",
902
+ "AMD EPYC 7R13 Processor cpu54@2673MHz",
903
+ "AMD EPYC 7R13 Processor cpu55@2673MHz",
904
+ "AMD EPYC 7R13 Processor cpu56@2673MHz",
905
+ "AMD EPYC 7R13 Processor cpu57@2649MHz",
906
+ "AMD EPYC 7R13 Processor cpu58@2649MHz",
907
+ "AMD EPYC 7R13 Processor cpu59@2649MHz",
908
+ "AMD EPYC 7R13 Processor cpu60@2673MHz",
909
+ "AMD EPYC 7R13 Processor cpu61@2649MHz",
910
+ "AMD EPYC 7R13 Processor cpu62@2649MHz",
911
+ "AMD EPYC 7R13 Processor cpu63@2649MHz",
912
+ "AMD EPYC 7R13 Processor cpu64@2649MHz",
913
+ "AMD EPYC 7R13 Processor cpu65@2649MHz",
914
+ "AMD EPYC 7R13 Processor cpu66@2649MHz",
915
+ "AMD EPYC 7R13 Processor cpu67@2649MHz",
916
+ "AMD EPYC 7R13 Processor cpu68@2649MHz",
917
+ "AMD EPYC 7R13 Processor cpu69@2649MHz",
918
+ "AMD EPYC 7R13 Processor cpu70@2649MHz",
919
+ "AMD EPYC 7R13 Processor cpu71@2649MHz",
920
+ "AMD EPYC 7R13 Processor cpu72@2649MHz",
921
+ "AMD EPYC 7R13 Processor cpu73@2649MHz",
922
+ "AMD EPYC 7R13 Processor cpu74@2649MHz",
923
+ "AMD EPYC 7R13 Processor cpu75@2649MHz",
924
+ "AMD EPYC 7R13 Processor cpu76@2649MHz",
925
+ "AMD EPYC 7R13 Processor cpu77@2649MHz",
926
+ "AMD EPYC 7R13 Processor cpu78@2649MHz",
927
+ "AMD EPYC 7R13 Processor cpu79@2649MHz",
928
+ "AMD EPYC 7R13 Processor cpu80@2649MHz",
929
+ "AMD EPYC 7R13 Processor cpu81@2649MHz",
930
+ "AMD EPYC 7R13 Processor cpu82@2649MHz",
931
+ "AMD EPYC 7R13 Processor cpu83@2649MHz",
932
+ "AMD EPYC 7R13 Processor cpu84@2673MHz",
933
+ "AMD EPYC 7R13 Processor cpu85@2673MHz",
934
+ "AMD EPYC 7R13 Processor cpu86@3111MHz",
935
+ "AMD EPYC 7R13 Processor cpu87@2649MHz",
936
+ "AMD EPYC 7R13 Processor cpu88@2649MHz",
937
+ "AMD EPYC 7R13 Processor cpu89@2649MHz",
938
+ "AMD EPYC 7R13 Processor cpu90@3111MHz",
939
+ "AMD EPYC 7R13 Processor cpu91@2649MHz",
940
+ "AMD EPYC 7R13 Processor cpu92@2649MHz",
941
+ "AMD EPYC 7R13 Processor cpu93@2649MHz",
942
+ "AMD EPYC 7R13 Processor cpu94@2649MHz",
943
+ "AMD EPYC 7R13 Processor cpu95@2649MHz"
944
+ ],
945
+ "memory": "1999.99 GB",
946
+ "os_name": "Debian GNU/Linux",
947
+ "os_version": "11",
948
+ "kernel": "5.15.0-1048-aws",
949
+ "hostname": "ip-26-0-161-123"
950
+ }
951
+ }