Spaces:

sandz7
/

loki

Runtime error

sandz7 commited on May 23, 2024

Commit

92c5d55

1 Parent(s): c6e3099

added 2 parameters for llama_generation

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,7 +28,8 @@ llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
 llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
 # Place just input pass and return generation output
-def llama_generation(input_text):
     """
     Pass input texts, tokenize, output and back to text.
     """
@@ -37,11 +38,10 @@ def llama_generation(input_text):
                                        return_tensors='pt').to('cuda')
     # llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
-    output_ids = llama_model.generate(input_ids=input_ids,
-                                      max_new_tokens=400)
     # Decode
-    output_text = llama_tokenizer.decode(output_ids[0],
                                          skip_special_tokens=True)
     return output_text

 llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
 # Place just input pass and return generation output
+def llama_generation(input_text: str,
+                     history):
     """
     Pass input texts, tokenize, output and back to text.
     """
                                        return_tensors='pt').to('cuda')
     # llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
+    output_ids = llama_model.generate(input_ids=input_ids)
     # Decode
+    output_text = llama_tokenizer.decode(output_ids,
                                          skip_special_tokens=True)
     return output_text