placed input_ids in cuda and just added the vector for input_ids on generate() for model
Browse files
app.py
CHANGED
@@ -34,9 +34,10 @@ def llama_generation(input_text):
|
|
34 |
"""
|
35 |
|
36 |
input_ids = llama_tokenizer.encode(input_text,
|
37 |
-
return_tensors='pt')
|
38 |
|
39 |
-
|
|
|
40 |
|
41 |
# Decode
|
42 |
output_text = llama_tokenizer.decode(output_ids,
|
|
|
34 |
"""
|
35 |
|
36 |
input_ids = llama_tokenizer.encode(input_text,
|
37 |
+
return_tensors='pt').to('cuda')
|
38 |
|
39 |
+
# llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
|
40 |
+
output_ids = llama_model.generate(input_ids=input_ids)
|
41 |
|
42 |
# Decode
|
43 |
output_text = llama_tokenizer.decode(output_ids,
|