Steph254 commited on
Commit
f8d604d
·
verified ·
1 Parent(s): 72aeff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -30
app.py CHANGED
@@ -18,53 +18,73 @@ def load_llama_model(model_path, is_guard=False):
18
  print(f"Loading model: {model_path}")
19
 
20
  try:
21
- # Check if token exists and is valid
22
  token = os.getenv("HUGGINGFACE_TOKEN")
23
  if not token:
24
- raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
25
-
26
- # Load tokenizer with proper token
27
- tokenizer = LlamaTokenizer.from_pretrained(
28
- BASE_MODEL,
29
- token=token,
30
- use_fast=False # Sometimes helps with compatibility issues
31
- )
32
-
33
- # Load config first (to avoid shape mismatch errors)
34
- config = AutoModelForCausalLM.from_pretrained(
35
- BASE_MODEL,
36
- config_only=True,
37
- token=token
38
- ).config
39
-
40
- # Load model from config
41
- model = AutoModelForCausalLM.from_pretrained(
42
- model_path,
43
- token=token,
44
- config=config,
45
- device_map="auto", # Better device management
46
- torch_dtype=torch.float16 # Use half precision for efficiency
47
- )
48
 
49
- model.eval() # Set to inference mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Load QLoRA adapter if applicable
52
  if not is_guard and "QLORA" in model_path:
53
  print("Loading QLoRA adapter...")
 
 
54
  model = PeftModel.from_pretrained(
55
  model,
56
- model_path,
57
- token=token
58
  )
59
  print("Merging LoRA weights...")
60
  model = model.merge_and_unload()
61
-
 
62
  return tokenizer, model
63
 
64
  except Exception as e:
65
  print(f"❌ Error loading model {model_path}: {e}")
66
  raise
67
-
68
  # Load Llama 3.2 model
69
  tokenizer, model = load_llama_model(QLORA_ADAPTER)
70
 
 
18
  print(f"Loading model: {model_path}")
19
 
20
  try:
21
+ # Check if token exists
22
  token = os.getenv("HUGGINGFACE_TOKEN")
23
  if not token:
24
+ print("Warning: HUGGINGFACE_TOKEN not set, attempting to load without authentication")
25
+ token = None # Set to None explicitly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # First, try standard loading method with token handling
28
+ try:
29
+ tokenizer = LlamaTokenizer.from_pretrained(
30
+ BASE_MODEL,
31
+ use_auth_token=token # Use this parameter instead of token=
32
+ )
33
+
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_path,
36
+ use_auth_token=token,
37
+ torch_dtype=torch.float16,
38
+ low_cpu_mem_usage=True
39
+ )
40
+
41
+ except Exception as e:
42
+ print(f"Standard loading failed: {e}, trying alternative method...")
43
+
44
+ # Fall back to alternative loading method
45
+ # Download files first to ensure they exist locally
46
+ from huggingface_hub import snapshot_download
47
+
48
+ cache_dir = snapshot_download(
49
+ BASE_MODEL,
50
+ use_auth_token=token,
51
+ local_dir="./model_cache"
52
+ )
53
+
54
+ # Load tokenizer from local files
55
+ tokenizer = LlamaTokenizer.from_pretrained(
56
+ cache_dir,
57
+ local_files_only=True
58
+ )
59
+
60
+ # Load model from local files
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ model_path,
63
+ use_auth_token=token,
64
+ torch_dtype=torch.float16,
65
+ low_cpu_mem_usage=True
66
+ )
67
 
68
  # Load QLoRA adapter if applicable
69
  if not is_guard and "QLORA" in model_path:
70
  print("Loading QLoRA adapter...")
71
+ from peft import PeftConfig, PeftModel
72
+
73
  model = PeftModel.from_pretrained(
74
  model,
75
+ model_path,
76
+ use_auth_token=token
77
  )
78
  print("Merging LoRA weights...")
79
  model = model.merge_and_unload()
80
+
81
+ model.eval()
82
  return tokenizer, model
83
 
84
  except Exception as e:
85
  print(f"❌ Error loading model {model_path}: {e}")
86
  raise
87
+
88
  # Load Llama 3.2 model
89
  tokenizer, model = load_llama_model(QLORA_ADAPTER)
90