IST199655 commited on
Commit
4acb7f4
Β·
1 Parent(s): d44f7f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import InferenceClient
4
  """
5
  Copied from inference in colab notebook
6
  """
7
- from unsloth.chat_templates import get_chat_template
8
  from unsloth import FastLanguageModel
9
 
10
  # IMPORTING MODEL AND TOKENIZER β€”β€”β€”β€”β€”β€”β€”β€”
@@ -20,10 +20,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
20
  load_in_4bit = load_in_4bit,
21
  )
22
 
23
- tokenizer = get_chat_template(
24
- tokenizer,
25
- chat_template = "llama-3.1",
26
- )
27
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
 
29
  # RUNNING INFERENCE β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
4
  """
5
  Copied from inference in colab notebook
6
  """
7
+ # from unsloth.chat_templates import get_chat_template
8
  from unsloth import FastLanguageModel
9
 
10
  # IMPORTING MODEL AND TOKENIZER β€”β€”β€”β€”β€”β€”β€”β€”
 
20
  load_in_4bit = load_in_4bit,
21
  )
22
 
23
+ # tokenizer = get_chat_template(
24
+ # tokenizer,
25
+ # chat_template = "llama-3.1",
26
+ # )
27
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
 
29
  # RUNNING INFERENCE β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”