Spaces:

farmax
/

LLM_Sizing

Sleeping

App Files Files Community

farmax commited on Nov 9, 2024

Commit

01ed4ac

verified ·

1 Parent(s): 74ff3ad

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -1

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ def estimate_capacity_latency(model, gpu):
     return f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"
 def create_gradio_interface():
-    # Define gpu_specs here so it's available in the function
     gpu_specs = [
         {"name": "A10", "fp16_tflops": 125, "memory_gb": 24, "memory_bandwidth_gbps": 600},
         {"name": "A30", "fp16_tflops": 330, "memory_gb": 24, "memory_bandwidth_gbps": 933},
@@ -47,10 +47,23 @@ def create_gradio_interface():
         {"name": "H100 NVL", "fp16_tflops": 3958, "memory_gb": 188, "memory_bandwidth_gbps": 7800}
     ]
     demo = gr.Interface(
         fn=estimate_capacity_latency,
         inputs=[
             gr.Textbox(label="Model Name"),
             gr.Dropdown(choices=[gpu['name'] for gpu in gpu_specs], label="GPU Type")
         ],
         outputs=[

     return f"{prefill_time_per_token:.3f} ms", f"{generation_time_per_token:.3f} ms", f"{estimated_response_time:.1f} s"
 def create_gradio_interface():
+    # Definisci gpu_specs qui così che sia disponibile nella funzione
     gpu_specs = [
         {"name": "A10", "fp16_tflops": 125, "memory_gb": 24, "memory_bandwidth_gbps": 600},
         {"name": "A30", "fp16_tflops": 330, "memory_gb": 24, "memory_bandwidth_gbps": 933},
         {"name": "H100 NVL", "fp16_tflops": 3958, "memory_gb": 188, "memory_bandwidth_gbps": 7800}
     ]
+    # Definisci model_specs qui così che sia disponibile nella funzione
+    model_specs = [
+        {"name": "Llama-3-8B", "params_billion": 8, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 8192, "d_head": 128},
+        {"name": "Llama-3-70B", "params_billion": 70, "d_model": 8192, "n_heads": 64, "n_layers": 80, "max_context_window": 8192, "d_head": 128},
+        {"name": "Llama-3.1-8B", "params_billion": 8, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 131072, "d_head": 128},
+        {"name": "Llama-3.1-70B", "params_billion": 70, "d_model": 8192, "n_heads": 64, "n_layers": 80, "max_context_window": 131072, "d_head": 128},
+        {"name": "Mistral-7B-v0.3", "params_billion": 7, "d_model": 4096, "n_heads": 32, "n_layers": 32, "max_context_window": 32768, "d_head": 128},
+        {"name": "Falcon-7B", "params_billion": 7, "d_model": 4544, "n_heads": 71, "n_layers": 32, "max_context_window": 2048, "d_head": 64},
+        {"name": "Falcon-40B", "params_billion": 40, "d_model": 8192, "n_heads": 128, "n_layers": 60, "max_context_window": 2048, "d_head": 64},
+        {"name": "Falcon-180B", "params_billion": 180, "d_model": 14848, "n_heads": 232, "n_layers": 80, "max_context_window": 2048, "d_head": 64}
+    ]
     demo = gr.Interface(
         fn=estimate_capacity_latency,
         inputs=[
             gr.Textbox(label="Model Name"),
+            gr.Dropdown(choices=[model['name'] for model in model_specs], label="Model Type"),
             gr.Dropdown(choices=[gpu['name'] for gpu in gpu_specs], label="GPU Type")
         ],
         outputs=[