Spaces:

Steph254
/

demo_1

Runtime error

Steph254 commited on Mar 18

Commit

72aeff1

verified ·

1 Parent(s): b7a28cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,28 +18,44 @@ def load_llama_model(model_path, is_guard=False):
     print(f"Loading model: {model_path}")
     try:
-        # Load tokenizer
-        tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL, token=HUGGINGFACE_TOKEN)
         # Load config first (to avoid shape mismatch errors)
-        config = AutoModelForCausalLM.from_pretrained(BASE_MODEL, config_only=True).config
-        # 🔹 Manually load the `.pth` file
-        state_dict_path = os.path.join(model_path, "consolidated.00.pth")
-        if not os.path.exists(state_dict_path):
-            raise FileNotFoundError(f"Missing model weights: {state_dict_path}")
-        state_dict = torch.load(state_dict_path, map_location="cpu")
-        # Load model from config and manually apply weights
-        model = AutoModelForCausalLM.from_config(config)
-        model.load_state_dict(state_dict, strict=False)  # Use strict=False to allow missing keys
         model.eval()  # Set to inference mode
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
-            model = PeftModel.from_pretrained(model, model_path, token=HUGGINGFACE_TOKEN)
             print("Merging LoRA weights...")
             model = model.merge_and_unload()

     print(f"Loading model: {model_path}")
     try:
+        # Check if token exists and is valid
+        token = os.getenv("HUGGINGFACE_TOKEN")
+        if not token:
+            raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
+        # Load tokenizer with proper token
+        tokenizer = LlamaTokenizer.from_pretrained(
+            BASE_MODEL,
+            token=token,
+            use_fast=False  # Sometimes helps with compatibility issues
+        )
         # Load config first (to avoid shape mismatch errors)
+        config = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL,
+            config_only=True,
+            token=token
+        ).config
+        # Load model from config
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            token=token,
+            config=config,
+            device_map="auto",  # Better device management
+            torch_dtype=torch.float16  # Use half precision for efficiency
+        )
         model.eval()  # Set to inference mode
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
+            model = PeftModel.from_pretrained(
+                model,
+                model_path,
+                token=token
+            )
             print("Merging LoRA weights...")
             model = model.merge_and_unload()