sandz7 commited on
Commit
9633993
Β·
1 Parent(s): 516209b

max new tokens on generation 256

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -42,7 +42,8 @@ def llama_generation(input_text: str,
42
  return_tensors='pt').to('cuda')
43
 
44
  # llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
45
- output_ids = llama_model.generate(input_ids=input_ids)
 
46
 
47
  # Decode
48
  output_text = llama_tokenizer.decode(output_ids[0],
 
42
  return_tensors='pt').to('cuda')
43
 
44
  # llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
45
+ output_ids = llama_model.generate(input_ids=input_ids,
46
+ max_new_tokens=256)
47
 
48
  # Decode
49
  output_text = llama_tokenizer.decode(output_ids[0],