Spaces:

Steph254
/

demo_1

Runtime error

Steph254 commited on Mar 18

Commit

f2b9562

verified ·

1 Parent(s): 65774c5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ def load_quantized_model(model_path):
     # Use Hugging Face transformers to load the quantized model directly
     model = LlamaForCausalLM.from_pretrained(
         model_path,
-        use_auth_token="HUGGINGFACE_TOKEN",
         device_map="auto",  # Auto-distributes across CPU/GPU
         torch_dtype=torch.float16,  # Reduces memory usage
         low_cpu_mem_usage=True  # Optimized RAM loading

     # Use Hugging Face transformers to load the quantized model directly
     model = LlamaForCausalLM.from_pretrained(
         model_path,
+        use_auth_token=HUGGINGFACE_TOKEN,
         device_map="auto",  # Auto-distributes across CPU/GPU
         torch_dtype=torch.float16,  # Reduces memory usage
         low_cpu_mem_usage=True  # Optimized RAM loading