farmax commited on
Commit
01ed4ac
·
verified ·
1 Parent(s): 74ff3ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -32,7 +32,7 @@ def estimate_capacity_latency(model, gpu):
32
  return f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"
33
 
34
  def create_gradio_interface():
35
- # Define gpu_specs here so it's available in the function
36
  gpu_specs = [
37
  {"name": "A10", "fp16_tflops": 125, "memory_gb": 24, "memory_bandwidth_gbps": 600},
38
  {"name": "A30", "fp16_tflops": 330, "memory_gb": 24, "memory_bandwidth_gbps": 933},
@@ -47,10 +47,23 @@ def create_gradio_interface():
47
  {"name": "H100 NVL", "fp16_tflops": 3958, "memory_gb": 188, "memory_bandwidth_gbps": 7800}
48
  ]
49
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  demo = gr.Interface(
51
  fn=estimate_capacity_latency,
52
  inputs=[
53
  gr.Textbox(label="Model Name"),
 
54
  gr.Dropdown(choices=[gpu['name'] for gpu in gpu_specs], label="GPU Type")
55
  ],
56
  outputs=[
 
32
  return f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"
33
 
34
  def create_gradio_interface():
35
+ # Definisci gpu_specs qui così che sia disponibile nella funzione
36
  gpu_specs = [
37
  {"name": "A10", "fp16_tflops": 125, "memory_gb": 24, "memory_bandwidth_gbps": 600},
38
  {"name": "A30", "fp16_tflops": 330, "memory_gb": 24, "memory_bandwidth_gbps": 933},
 
47
  {"name": "H100 NVL", "fp16_tflops": 3958, "memory_gb": 188, "memory_bandwidth_gbps": 7800}
48
  ]
49
 
50
+ # Definisci model_specs qui così che sia disponibile nella funzione
51
+ model_specs = [
52
+ {"name": "Llama-3-8B", "params_billion": 8, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 8192, "d_head": 128},
53
+ {"name": "Llama-3-70B", "params_billion": 70, "d_model": 8192, "n_heads": 64, "n_layers": 80, "max_context_window": 8192, "d_head": 128},
54
+ {"name": "Llama-3.1-8B", "params_billion": 8, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 131072, "d_head": 128},
55
+ {"name": "Llama-3.1-70B", "params_billion": 70, "d_model": 8192, "n_heads": 64, "n_layers": 80, "max_context_window": 131072, "d_head": 128},
56
+ {"name": "Mistral-7B-v0.3", "params_billion": 7, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 32768, "d_head": 128},
57
+ {"name": "Falcon-7B", "params_billion": 7, "d_model": 4544, "n_heads": 71, "n_layers": 32, "max_context_window": 2048, "d_head": 64},
58
+ {"name": "Falcon-40B", "params_billion": 40, "d_model": 8192, "n_heads": 128, "n_layers": 60, "max_context_window": 2048, "d_head": 64},
59
+ {"name": "Falcon-180B", "params_billion": 180, "d_model": 14848, "n_heads": 232, "n_layers": 80, "max_context_window": 2048, "d_head": 64}
60
+ ]
61
+
62
  demo = gr.Interface(
63
  fn=estimate_capacity_latency,
64
  inputs=[
65
  gr.Textbox(label="Model Name"),
66
+ gr.Dropdown(choices=[model['name'] for model in model_specs], label="Model Type"),
67
  gr.Dropdown(choices=[gpu['name'] for gpu in gpu_specs], label="GPU Type")
68
  ],
69
  outputs=[