IST199655 commited on
Commit
26b0879
Β·
1 Parent(s): 4acb7f4
Files changed (2) hide show
  1. app.py +5 -5
  2. requirements.txt +4 -1
app.py CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import InferenceClient
4
  """
5
  Copied from inference in colab notebook
6
  """
7
- # from unsloth.chat_templates import get_chat_template
8
  from unsloth import FastLanguageModel
9
 
10
  # IMPORTING MODEL AND TOKENIZER β€”β€”β€”β€”β€”β€”β€”β€”
@@ -20,10 +20,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
20
  load_in_4bit = load_in_4bit,
21
  )
22
 
23
- # tokenizer = get_chat_template(
24
- # tokenizer,
25
- # chat_template = "llama-3.1",
26
- # )
27
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
 
29
  # RUNNING INFERENCE β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
4
  """
5
  Copied from inference in colab notebook
6
  """
7
+ from unsloth.chat_templates import get_chat_template
8
  from unsloth import FastLanguageModel
9
 
10
  # IMPORTING MODEL AND TOKENIZER β€”β€”β€”β€”β€”β€”β€”β€”
 
20
  load_in_4bit = load_in_4bit,
21
  )
22
 
23
+ tokenizer = get_chat_template(
24
+ tokenizer,
25
+ chat_template = "llama-3.1",
26
+ )
27
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
 
29
  # RUNNING INFERENCE β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  huggingface_hub==0.25.2
2
 
3
- unsloth
 
 
 
 
1
  huggingface_hub==0.25.2
2
 
3
+ unsloth
4
+ torch==2.0.1+cpu --force-reinstall
5
+ torchvision==0.15.2+cpu --force-reinstall
6
+ -f https://download.pytorch.org/whl/cpu