ethanlshen commited on
Commit
f9e22c8
·
verified ·
1 Parent(s): 0633ac3

Added model loading function

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -17,6 +17,15 @@ os.environ['WORLD_SIZE'] = "1"
17
  os.environ['MASTER_PORT'] = "12193"
18
  os.environ['MASTER_ADDR'] = "127.0.0.1"
19
 
 
 
 
 
 
 
 
 
 
20
  # load_dotenv()
21
  # print(os.getenv("HF_ACCESS_TOKEN"))
22
  login(os.getenv("HF_ACCESS_TOKEN"))
@@ -36,11 +45,7 @@ n_token_sample = params["n_token_sample"]
36
  i_weights = params["i_weights"]
37
  i_length = params["i_length"]
38
  # Load main model
39
- model = SuperposedLlama.build(ckpt_dir=weight_path,
40
- tokenizer_path=f'{weight_path}/tokenizer.model',
41
- max_seq_len=100,
42
- max_batch_size=32,
43
- model_parallel_size=1)
44
  tokenizer = Tokenizer(f'{weight_path}/tokenizer.model')
45
  # Create ngram models
46
  ngrams = make_models("ckpts-200k", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)
 
17
  os.environ['MASTER_PORT'] = "12193"
18
  os.environ['MASTER_ADDR'] = "127.0.0.1"
19
 
20
+ @spaces.GPU
21
+ def load_models():
22
+ model = SuperposedLlama.build(ckpt_dir=weight_path,
23
+ tokenizer_path=f'{weight_path}/tokenizer.model',
24
+ max_seq_len=100,
25
+ max_batch_size=32,
26
+ model_parallel_size=1)
27
+ return model
28
+
29
  # load_dotenv()
30
  # print(os.getenv("HF_ACCESS_TOKEN"))
31
  login(os.getenv("HF_ACCESS_TOKEN"))
 
45
  i_weights = params["i_weights"]
46
  i_length = params["i_length"]
47
  # Load main model
48
+ model = load_models()
 
 
 
 
49
  tokenizer = Tokenizer(f'{weight_path}/tokenizer.model')
50
  # Create ngram models
51
  ngrams = make_models("ckpts-200k", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)