Spaces:
Running
Running
add a warning when ctx_len is too large. try to fix HF token
Browse files
app.py
CHANGED
@@ -3,11 +3,12 @@ from transformers import AutoConfig
|
|
3 |
|
4 |
|
5 |
def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
|
|
|
6 |
try:
|
7 |
cfg = AutoConfig.from_pretrained(
|
8 |
name,
|
9 |
trust_remote_code=True,
|
10 |
-
token=hf_token,
|
11 |
)
|
12 |
except Exception as e:
|
13 |
raise gr.Error(e)
|
@@ -22,6 +23,10 @@ def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str
|
|
22 |
["num_layers", num_layers],
|
23 |
["max_ctx_len", cfg.max_position_embeddings],
|
24 |
]
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# TODO: show attention type, show calculation
|
27 |
if use_mla:
|
|
|
3 |
|
4 |
|
5 |
def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
|
6 |
+
hf_token = hf_token.strip()
|
7 |
try:
|
8 |
cfg = AutoConfig.from_pretrained(
|
9 |
name,
|
10 |
trust_remote_code=True,
|
11 |
+
token=hf_token or None,
|
12 |
)
|
13 |
except Exception as e:
|
14 |
raise gr.Error(e)
|
|
|
23 |
["num_layers", num_layers],
|
24 |
["max_ctx_len", cfg.max_position_embeddings],
|
25 |
]
|
26 |
+
if ctx_len > cfg.max_position_embeddings:
|
27 |
+
gr.Warning(
|
28 |
+
"Requested context length is larger than the max value supported by the model"
|
29 |
+
)
|
30 |
|
31 |
# TODO: show attention type, show calculation
|
32 |
if use_mla:
|