Steph254 commited on
Commit
196f1dd
·
verified ·
1 Parent(s): 3e8ef05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -18,20 +18,18 @@ QUANTIZED_MODEL = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Directly
18
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
19
 
20
  # Function to load Llama model (without LoRA)
21
- def load_llama_model(model_name, is_guard=False):
22
  print(f"🔄 Loading Model: {model_name}")
23
-
24
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
25
- model = AutoModelForCausalLM.from_pretrained(
26
- model_name,
27
- token=HUGGINGFACE_TOKEN,
28
- torch_dtype=torch.float32,
29
- low_cpu_mem_usage=True
30
- )
31
-
32
- model.eval()
33
- print("✅ Model Loaded Successfully")
34
- return tokenizer, model
35
 
36
  # Load the quantized Llama model
37
  tokenizer, model = load_llama_model(QUANTIZED_MODEL)
 
18
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
19
 
20
  # Function to load Llama model (without LoRA)
21
+ def load_llama_model(model_name):
22
  print(f"🔄 Loading Model: {model_name}")
23
+
24
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
25
+
26
+ # Load the checkpoint manually
27
+ model_path = f"{model_name}/consolidated.00.pth"
28
+ state_dict = torch.load(model_path, map_location="cpu") # Adjust for GPU if needed
29
+
30
+ print("✅ Model state dictionary loaded successfully!")
31
+
32
+ return tokenizer, state_dict
 
 
33
 
34
  # Load the quantized Llama model
35
  tokenizer, model = load_llama_model(QUANTIZED_MODEL)