George-API commited on
Commit
90cba36
·
verified ·
1 Parent(s): 7e2aaf9

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +32 -3
run_cloud_training.py CHANGED
@@ -28,6 +28,9 @@ os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
28
  # Force GPU mode in Space if we're using a pre-quantized model
29
  os.environ["FORCE_GPU"] = "1"
30
 
 
 
 
31
  # Create triton directory to avoid warning
32
  os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
33
 
@@ -524,12 +527,24 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
524
 
525
  # Create LoRA config
526
  logger.info("Creating LoRA configuration")
 
 
 
 
 
 
 
 
 
 
 
527
  lora_config_obj = LoraConfig(
528
  r=lora_config.get("r", 16),
529
  lora_alpha=lora_config.get("lora_alpha", 32),
530
  lora_dropout=lora_config.get("lora_dropout", 0.05),
531
  bias=lora_config.get("bias", "none"),
532
- target_modules=lora_config.get("target_modules", ["q_proj", "k_proj", "v_proj", "o_proj"])
 
533
  )
534
 
535
  # Apply LoRA to model
@@ -537,6 +552,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
537
  model = get_peft_model(model, lora_config_obj)
538
  logger.info("Successfully applied LoRA")
539
 
 
 
 
 
 
 
 
 
 
540
  # Always use minimal batch size for HF Space CPU
541
  if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
542
  per_device_train_batch_size = 1
@@ -566,10 +590,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
566
  gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
567
  fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
568
  bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
569
- gradient_checkpointing = torch.cuda.is_available() and hardware_config.get("gradient_checkpointing", True)
 
570
  dataloader_workers = training_config.get("dataloader_num_workers", 4)
571
- eval_strategy = training_config.get("eval_strategy", "no") # Changed from "steps" to "no" since we don't have an eval_dataset
572
  load_best_model_at_end = False # Must be False when eval_strategy is "no"
 
 
 
 
573
  logger.info("Using full training parameters for GPU mode")
574
  else:
575
  # For Space CPU training mode, use minimal parameters
 
28
  # Force GPU mode in Space if we're using a pre-quantized model
29
  os.environ["FORCE_GPU"] = "1"
30
 
31
+ # Disable tokenizers parallelism warning
32
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
+
34
  # Create triton directory to avoid warning
35
  os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
36
 
 
527
 
528
  # Create LoRA config
529
  logger.info("Creating LoRA configuration")
530
+
531
+ # For pre-quantized models, we need proper target modules
532
+ default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
533
+
534
+ # For pre-quantized models, especially Unsloth ones, we need to be careful with the target modules
535
+ if is_pre_quantized:
536
+ # For Unsloth models, use special configuration
537
+ if "unsloth" in model_name.lower():
538
+ default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
539
+ logger.info("Using Unsloth-specific LoRA target modules")
540
+
541
  lora_config_obj = LoraConfig(
542
  r=lora_config.get("r", 16),
543
  lora_alpha=lora_config.get("lora_alpha", 32),
544
  lora_dropout=lora_config.get("lora_dropout", 0.05),
545
  bias=lora_config.get("bias", "none"),
546
+ task_type="CAUSAL_LM", # Explicitly set the task type
547
+ target_modules=lora_config.get("target_modules", default_target_modules)
548
  )
549
 
550
  # Apply LoRA to model
 
552
  model = get_peft_model(model, lora_config_obj)
553
  logger.info("Successfully applied LoRA")
554
 
555
+ # Ensure model parameters that need gradients are properly set
556
+ if is_pre_quantized:
557
+ logger.info("Verifying gradient settings for pre-quantized model")
558
+ for name, param in model.named_parameters():
559
+ if 'lora' in name: # Only LoRA parameters should be trained
560
+ if not param.requires_grad:
561
+ logger.warning(f"LoRA parameter {name} doesn't have requires_grad=True, fixing...")
562
+ param.requires_grad = True
563
+
564
  # Always use minimal batch size for HF Space CPU
565
  if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
566
  per_device_train_batch_size = 1
 
590
  gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
591
  fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
592
  bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
593
+ # Disable gradient checkpointing for pre-quantized models as it can cause gradient issues
594
+ gradient_checkpointing = torch.cuda.is_available() and hardware_config.get("gradient_checkpointing", True) and not is_pre_quantized
595
  dataloader_workers = training_config.get("dataloader_num_workers", 4)
596
+ eval_strategy = training_config.get("eval_strategy", "no")
597
  load_best_model_at_end = False # Must be False when eval_strategy is "no"
598
+
599
+ if is_pre_quantized:
600
+ logger.info("Disabled gradient checkpointing for pre-quantized model to avoid gradient issues")
601
+
602
  logger.info("Using full training parameters for GPU mode")
603
  else:
604
  # For Space CPU training mode, use minimal parameters