Steph254 commited on
Commit
48bf8a4
·
verified ·
1 Parent(s): f2b9562

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import gradio as gr
3
  import torch
4
  import json
5
- from transformers import LlamaTokenizer, LlamaForCausalLM
6
  from peft import PeftModel
7
 
8
  # Set Hugging Face Token for Authentication
@@ -21,14 +21,25 @@ LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
21
  def load_quantized_model(model_path):
22
  print(f"🔄 Loading Quantized Model: {model_path}")
23
 
24
- # Use Hugging Face transformers to load the quantized model directly
25
- model = LlamaForCausalLM.from_pretrained(
26
- model_path,
27
- use_auth_token=HUGGINGFACE_TOKEN,
28
- device_map="auto", # Auto-distributes across CPU/GPU
29
- torch_dtype=torch.float16, # Reduces memory usage
30
- low_cpu_mem_usage=True # Optimized RAM loading
31
- )
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  print("✅ Quantized model loaded successfully!")
34
  return model
 
2
  import gradio as gr
3
  import torch
4
  import json
5
+ from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
6
  from peft import PeftModel
7
 
8
  # Set Hugging Face Token for Authentication
 
21
  def load_quantized_model(model_path):
22
  print(f"🔄 Loading Quantized Model: {model_path}")
23
 
24
+ # Load the config manually
25
+ config = LlamaConfig.from_pretrained(model_path)
26
+
27
+ # Initialize model
28
+ model = LlamaForCausalLM(config)
29
+
30
+ # Load the quantized weights manually
31
+ checkpoint_path = os.path.join(model_path, "consolidated.00.pth")
32
+ if not os.path.exists(checkpoint_path):
33
+ raise FileNotFoundError(f"❌ Checkpoint file not found: {checkpoint_path}")
34
+
35
+ state_dict = torch.load(checkpoint_path, map_location="cpu")
36
+
37
+ # Load the state dict into the model
38
+ model.load_state_dict(state_dict, strict=False)
39
+
40
+ # Move model to GPU if available
41
+ device = "cuda" if torch.cuda.is_available() else "cpu"
42
+ model.to(device)
43
 
44
  print("✅ Quantized model loaded successfully!")
45
  return model