Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -96,6 +96,5 @@ def main(num_gpu, prompt_size, response_size, n_concurrent_request, avg_context_
|
|
96 |
estimated_response_time = calc_estimated_response_time(prefill_time_per_token, generation_time_per_token, prompt_size, response_size)
|
97 |
capacity_latency_table.append([model['name'], gpu['name'], f"{kv_cache_tokens}", f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"])
|
98 |
print(tabulate(capacity_latency_table, headers=['Model', 'GPU', 'KV Cache Tokens', 'Prefill Time', 'Generation Time', 'Estimated Response Time'], tablefmt='orgtbl'))
|
99 |
-
|
100 |
if __name__ == '__main__':
|
101 |
-
main()
|
|
|
96 |
estimated_response_time = calc_estimated_response_time(prefill_time_per_token, generation_time_per_token, prompt_size, response_size)
|
97 |
capacity_latency_table.append([model['name'], gpu['name'], f"{kv_cache_tokens}", f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"])
|
98 |
print(tabulate(capacity_latency_table, headers=['Model', 'GPU', 'KV Cache Tokens', 'Prefill Time', 'Generation Time', 'Estimated Response Time'], tablefmt='orgtbl'))
|
|
|
99 |
if __name__ == '__main__':
|
100 |
+
main(num_gpu, prompt_size, response_size, n_concurrent_request, avg_context_window)
|