Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoConfig # Required for Hugging Face integration
|
3 |
from calc_params import calc_params # Import calc_params from the new file
|
|
|
4 |
|
5 |
# ---- Helper Functions ---- #
|
6 |
def get_hf_model_args(hf_model_name_or_path):
|
@@ -320,35 +321,39 @@ with gr.Blocks() as demo:
|
|
320 |
gr.Markdown("""
|
321 |
## FLOP Calculation
|
322 |
|
323 |
-
FLOP Calculation
|
324 |
-
|
|
|
|
|
325 |
""")
|
326 |
with gr.Row():
|
327 |
-
with gr.Column():
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
|
|
|
|
352 |
kv_size_ratio = gr.Number(
|
353 |
label="KV Size Ratio",
|
354 |
value=1.0,
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoConfig # Required for Hugging Face integration
|
3 |
from calc_params import calc_params # Import calc_params from the new file
|
4 |
+
import math
|
5 |
|
6 |
# ---- Helper Functions ---- #
|
7 |
def get_hf_model_args(hf_model_name_or_path):
|
|
|
321 |
gr.Markdown("""
|
322 |
## FLOP Calculation
|
323 |
|
324 |
+
FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
|
325 |
+
See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
|
326 |
+
Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
|
327 |
+
[Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
|
328 |
""")
|
329 |
with gr.Row():
|
330 |
+
with gr.Column("Generatable"):
|
331 |
+
with gr.Group():
|
332 |
+
hf_model_name_or_path = gr.Textbox(
|
333 |
+
label="HuggingFace Model Name or Path",
|
334 |
+
info="Name of the HuggingFace Hub repository or the local file path for it"
|
335 |
+
)
|
336 |
+
vocab_size = gr.Number(
|
337 |
+
label="Vocab Size",
|
338 |
+
value=51200,
|
339 |
+
info="How many tokens are in the embedding layer"
|
340 |
+
)
|
341 |
+
hidden_size = gr.Number(
|
342 |
+
label="Hidden Size",
|
343 |
+
value=6144,
|
344 |
+
info="Dimension of the model's hidden size"
|
345 |
+
)
|
346 |
+
sequence_length = gr.Number(
|
347 |
+
label="Sequence Length",
|
348 |
+
value=2048,
|
349 |
+
info="Sequence length used for training"
|
350 |
+
)
|
351 |
+
num_layers = gr.Number(
|
352 |
+
label="Number of Layers",
|
353 |
+
value=44,
|
354 |
+
info="Number of transformer layers used in the model"
|
355 |
+
)
|
356 |
+
with gr.Column("Generatable"):
|
357 |
kv_size_ratio = gr.Number(
|
358 |
label="KV Size Ratio",
|
359 |
value=1.0,
|