Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

f8d604d

verified ·

1 Parent(s): 72aeff1

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -30

app.py CHANGED Viewed

@@ -18,53 +18,73 @@ def load_llama_model(model_path, is_guard=False):
     print(f"Loading model: {model_path}")
     try:
-        # Check if token exists and is valid
         token = os.getenv("HUGGINGFACE_TOKEN")
         if not token:
-            raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
-        # Load tokenizer with proper token
-        tokenizer = LlamaTokenizer.from_pretrained(
-            BASE_MODEL,
-            token=token,
-            use_fast=False  # Sometimes helps with compatibility issues
-        )
-        # Load config first (to avoid shape mismatch errors)
-        config = AutoModelForCausalLM.from_pretrained(
-            BASE_MODEL,
-            config_only=True,
-            token=token
-        ).config
-        # Load model from config
-        model = AutoModelForCausalLM.from_pretrained(
-            model_path,
-            token=token,
-            config=config,
-            device_map="auto",  # Better device management
-            torch_dtype=torch.float16  # Use half precision for efficiency
-        )
-        model.eval()  # Set to inference mode
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
             model = PeftModel.from_pretrained(
                 model,
-                model_path,
-                token=token
             )
             print("Merging LoRA weights...")
             model = model.merge_and_unload()
         return tokenizer, model
     except Exception as e:
         print(f"❌ Error loading model {model_path}: {e}")
         raise
 # Load Llama 3.2 model
 tokenizer, model = load_llama_model(QLORA_ADAPTER)

     print(f"Loading model: {model_path}")
     try:
+        # Check if token exists
         token = os.getenv("HUGGINGFACE_TOKEN")
         if not token:
+            print("Warning: HUGGINGFACE_TOKEN not set, attempting to load without authentication")
+            token = None  # Set to None explicitly
+        # First, try standard loading method with token handling
+        try:
+            tokenizer = LlamaTokenizer.from_pretrained(
+                BASE_MODEL,
+                use_auth_token=token  # Use this parameter instead of token=
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                model_path,
+                use_auth_token=token,
+                torch_dtype=torch.float16,
+                low_cpu_mem_usage=True
+            )
+        except Exception as e:
+            print(f"Standard loading failed: {e}, trying alternative method...")
+            # Fall back to alternative loading method
+            # Download files first to ensure they exist locally
+            from huggingface_hub import snapshot_download
+            cache_dir = snapshot_download(
+                BASE_MODEL,
+                use_auth_token=token,
+                local_dir="./model_cache"
+            )
+            # Load tokenizer from local files
+            tokenizer = LlamaTokenizer.from_pretrained(
+                cache_dir,
+                local_files_only=True
+            )
+            # Load model from local files
+            model = AutoModelForCausalLM.from_pretrained(
+                model_path,
+                use_auth_token=token,
+                torch_dtype=torch.float16,
+                low_cpu_mem_usage=True
+            )
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
+            from peft import PeftConfig, PeftModel
             model = PeftModel.from_pretrained(
                 model,
+                model_path,
+                use_auth_token=token
             )
             print("Merging LoRA weights...")
             model = model.merge_and_unload()
+        model.eval()
         return tokenizer, model
     except Exception as e:
         print(f"❌ Error loading model {model_path}: {e}")
         raise
 # Load Llama 3.2 model
 tokenizer, model = load_llama_model(QLORA_ADAPTER)