derek-thomas commited on
Commit
08b6656
·
verified ·
1 Parent(s): 60517f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -325,6 +325,10 @@ with gr.Blocks() as demo:
325
  """)
326
  with gr.Row():
327
  with gr.Column():
 
 
 
 
328
  vocab_size = gr.Number(
329
  label="Vocab Size",
330
  value=51200,
@@ -350,25 +354,10 @@ with gr.Blocks() as demo:
350
  value=1.0,
351
  info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
352
  )
353
- topk = gr.Number(
354
- label="Top K Routing for MoE",
355
- value=1,
356
- info="Top k routing for Mixture of Experts (MoE)"
357
- )
358
- moe = gr.Checkbox(
359
- label="Mixture of Experts (MoE)",
360
- value=False,
361
- info="Whether the model uses Mixture of Experts"
362
- )
363
- num_experts = gr.Number(
364
- label="Number of Experts",
365
- value=128,
366
- info="Number of experts for Mixture of Experts (MoE)"
367
- )
368
- expert_interval = gr.Number(
369
- label="Expert Interval",
370
- value=2,
371
- info="Expert interval for Mixture of Experts (MoE)"
372
  )
373
  batch_size = gr.Number(
374
  label="Batch Size",
@@ -385,17 +374,35 @@ with gr.Blocks() as demo:
385
  value=True,
386
  info="Whether Megatron-style activation checkpointing is being used"
387
  )
388
- ffn_expansion_factor = gr.Number(
389
- label="FFN Expansion Factor",
390
- value=4,
391
- info="How much the MLP hidden size expands"
392
- )
393
  infer = gr.Checkbox(
394
  label="Inference-Only",
395
  value=False,
396
  info="Whether the model is being used for inference-only"
397
  )
398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  calc_flops_button = gr.Button("Calculate FLOPs")
400
  flops_result = gr.JSON(label="FLOP Calculation Result", interactive=False)
401
  calc_flops_button.click(
@@ -404,8 +411,8 @@ with gr.Blocks() as demo:
404
  outputs=flops_result
405
  )
406
 
407
- hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path", info="Name of the HuggingFace model or local path")
408
- hf_model_name_or_path.change(fn=get_hf_model_args, inputs=[hf_model_name_or_path], outputs=[num_layers, hidden_size, vocab_size, sequence_length])
409
-
410
 
411
  demo.launch()
 
325
  """)
326
  with gr.Row():
327
  with gr.Column():
328
+ hf_model_name_or_path = gr.Textbox(
329
+ label="HuggingFace Model Name or Path",
330
+ info="Name of the HuggingFace Hub repository or the local file path for it"
331
+ )
332
  vocab_size = gr.Number(
333
  label="Vocab Size",
334
  value=51200,
 
354
  value=1.0,
355
  info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
356
  )
357
+ ffn_expansion_factor = gr.Number(
358
+ label="FFN Expansion Factor",
359
+ value=4,
360
+ info="How much the MLP hidden size expands"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  )
362
  batch_size = gr.Number(
363
  label="Batch Size",
 
374
  value=True,
375
  info="Whether Megatron-style activation checkpointing is being used"
376
  )
 
 
 
 
 
377
  infer = gr.Checkbox(
378
  label="Inference-Only",
379
  value=False,
380
  info="Whether the model is being used for inference-only"
381
  )
382
 
383
+ # MoE parameters hidden in accordion
384
+ with gr.Accordion("Mixture of Experts (MoE)", open=False):
385
+ moe = gr.Checkbox(
386
+ label="Mixture of Experts (MoE)",
387
+ value=False,
388
+ info="Whether the model uses Mixture of Experts"
389
+ )
390
+ num_experts = gr.Number(
391
+ label="Number of Experts",
392
+ value=128,
393
+ info="Number of experts for Mixture of Experts (MoE)"
394
+ )
395
+ expert_interval = gr.Number(
396
+ label="Expert Interval",
397
+ value=2,
398
+ info="Expert interval for Mixture of Experts (MoE)"
399
+ )
400
+ topk = gr.Number(
401
+ label="Top K Routing for MoE",
402
+ value=1,
403
+ info="Top k routing for Mixture of Experts (MoE)"
404
+ )
405
+
406
  calc_flops_button = gr.Button("Calculate FLOPs")
407
  flops_result = gr.JSON(label="FLOP Calculation Result", interactive=False)
408
  calc_flops_button.click(
 
411
  outputs=flops_result
412
  )
413
 
414
+ hf_model_name_or_path.change(fn=update_from_hf_model,
415
+ inputs=[hf_model_name_or_path],
416
+ outputs=[num_layers, hidden_size, vocab_size, sequence_length])
417
 
418
  demo.launch()