added 2 parameters for llama_generation
Browse files
app.py
CHANGED
@@ -28,7 +28,8 @@ llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
|
|
28 |
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
|
29 |
|
30 |
# Place just input pass and return generation output
|
31 |
-
def llama_generation(input_text
|
|
|
32 |
"""
|
33 |
Pass input texts, tokenize, output and back to text.
|
34 |
"""
|
@@ -37,11 +38,10 @@ def llama_generation(input_text):
|
|
37 |
return_tensors='pt').to('cuda')
|
38 |
|
39 |
# llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
|
40 |
-
output_ids = llama_model.generate(input_ids=input_ids
|
41 |
-
max_new_tokens=400)
|
42 |
|
43 |
# Decode
|
44 |
-
output_text = llama_tokenizer.decode(output_ids
|
45 |
skip_special_tokens=True)
|
46 |
|
47 |
return output_text
|
|
|
28 |
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
|
29 |
|
30 |
# Place just input pass and return generation output
|
31 |
+
def llama_generation(input_text: str,
|
32 |
+
history):
|
33 |
"""
|
34 |
Pass input texts, tokenize, output and back to text.
|
35 |
"""
|
|
|
38 |
return_tensors='pt').to('cuda')
|
39 |
|
40 |
# llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
|
41 |
+
output_ids = llama_model.generate(input_ids=input_ids)
|
|
|
42 |
|
43 |
# Decode
|
44 |
+
output_text = llama_tokenizer.decode(output_ids,
|
45 |
skip_special_tokens=True)
|
46 |
|
47 |
return output_text
|