gaunernst commited on
Commit
0e5a78d
·
1 Parent(s): 36a071f

add a warning when ctx_len is too large. try to fix HF token

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -3,11 +3,12 @@ from transformers import AutoConfig
3
 
4
 
5
  def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
 
6
  try:
7
  cfg = AutoConfig.from_pretrained(
8
  name,
9
  trust_remote_code=True,
10
- token=hf_token,
11
  )
12
  except Exception as e:
13
  raise gr.Error(e)
@@ -22,6 +23,10 @@ def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str
22
  ["num_layers", num_layers],
23
  ["max_ctx_len", cfg.max_position_embeddings],
24
  ]
 
 
 
 
25
 
26
  # TODO: show attention type, show calculation
27
  if use_mla:
 
3
 
4
 
5
  def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
6
+ hf_token = hf_token.strip()
7
  try:
8
  cfg = AutoConfig.from_pretrained(
9
  name,
10
  trust_remote_code=True,
11
+ token=hf_token or None,
12
  )
13
  except Exception as e:
14
  raise gr.Error(e)
 
23
  ["num_layers", num_layers],
24
  ["max_ctx_len", cfg.max_position_embeddings],
25
  ]
26
+ if ctx_len > cfg.max_position_embeddings:
27
+ gr.Warning(
28
+ "Requested context length is larger than the max value supported by the model"
29
+ )
30
 
31
  # TODO: show attention type, show calculation
32
  if use_mla: