ethanlshen commited on
Commit
02f45d3
·
verified ·
1 Parent(s): d8add38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,6 @@ os.environ['WORLD_SIZE'] = "1"
17
  os.environ['MASTER_PORT'] = "12193"
18
  os.environ['MASTER_ADDR'] = "127.0.0.1"
19
 
20
- @spaces.GPU
21
  def load_models():
22
  model = SuperposedLlama.build(ckpt_dir=weight_path,
23
  tokenizer_path=f'{weight_path}/tokenizer.model',
@@ -47,6 +46,8 @@ i_weights = params["i_weights"]
47
  i_length = params["i_length"]
48
  # Load main model
49
  model = load_models()
 
 
50
  tokenizer = Tokenizer(f'{weight_path}/tokenizer.model')
51
  # Create ngram models
52
  ngrams = make_models("ckpts-200k", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)
@@ -66,7 +67,6 @@ def decode(tokenizer, encoding):
66
 
67
  @spaces.GPU
68
  def update_options(input, num_tokens):
69
- model.to("cuda")
70
  tokenized_prompts = tokenizer.encode([input], True, False)
71
  alive_gens, _ = model.sup_generate(prompt_tokens=tokenized_prompts,
72
  smoothing="geom",
 
17
  os.environ['MASTER_PORT'] = "12193"
18
  os.environ['MASTER_ADDR'] = "127.0.0.1"
19
 
 
20
  def load_models():
21
  model = SuperposedLlama.build(ckpt_dir=weight_path,
22
  tokenizer_path=f'{weight_path}/tokenizer.model',
 
46
  i_length = params["i_length"]
47
  # Load main model
48
  model = load_models()
49
+ model.model.to("cuda")
50
+ model.device = "cuda"
51
  tokenizer = Tokenizer(f'{weight_path}/tokenizer.model')
52
  # Create ngram models
53
  ngrams = make_models("ckpts-200k", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)
 
67
 
68
  @spaces.GPU
69
  def update_options(input, num_tokens):
 
70
  tokenized_prompts = tokenizer.encode([input], True, False)
71
  alive_gens, _ = model.sup_generate(prompt_tokens=tokenized_prompts,
72
  smoothing="geom",