Steph254 commited on
Commit
f2b9562
·
verified ·
1 Parent(s): 65774c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -24,7 +24,7 @@ def load_quantized_model(model_path):
24
  # Use Hugging Face transformers to load the quantized model directly
25
  model = LlamaForCausalLM.from_pretrained(
26
  model_path,
27
- use_auth_token="HUGGINGFACE_TOKEN",
28
  device_map="auto", # Auto-distributes across CPU/GPU
29
  torch_dtype=torch.float16, # Reduces memory usage
30
  low_cpu_mem_usage=True # Optimized RAM loading
 
24
  # Use Hugging Face transformers to load the quantized model directly
25
  model = LlamaForCausalLM.from_pretrained(
26
  model_path,
27
+ use_auth_token=HUGGINGFACE_TOKEN,
28
  device_map="auto", # Auto-distributes across CPU/GPU
29
  torch_dtype=torch.float16, # Reduces memory usage
30
  low_cpu_mem_usage=True # Optimized RAM loading