Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +32 -3
run_cloud_training.py
CHANGED
@@ -28,6 +28,9 @@ os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
|
28 |
# Force GPU mode in Space if we're using a pre-quantized model
|
29 |
os.environ["FORCE_GPU"] = "1"
|
30 |
|
|
|
|
|
|
|
31 |
# Create triton directory to avoid warning
|
32 |
os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
|
33 |
|
@@ -524,12 +527,24 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
524 |
|
525 |
# Create LoRA config
|
526 |
logger.info("Creating LoRA configuration")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
lora_config_obj = LoraConfig(
|
528 |
r=lora_config.get("r", 16),
|
529 |
lora_alpha=lora_config.get("lora_alpha", 32),
|
530 |
lora_dropout=lora_config.get("lora_dropout", 0.05),
|
531 |
bias=lora_config.get("bias", "none"),
|
532 |
-
|
|
|
533 |
)
|
534 |
|
535 |
# Apply LoRA to model
|
@@ -537,6 +552,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
537 |
model = get_peft_model(model, lora_config_obj)
|
538 |
logger.info("Successfully applied LoRA")
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
# Always use minimal batch size for HF Space CPU
|
541 |
if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
|
542 |
per_device_train_batch_size = 1
|
@@ -566,10 +590,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
566 |
gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
|
567 |
fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
|
568 |
bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
|
569 |
-
|
|
|
570 |
dataloader_workers = training_config.get("dataloader_num_workers", 4)
|
571 |
-
eval_strategy = training_config.get("eval_strategy", "no")
|
572 |
load_best_model_at_end = False # Must be False when eval_strategy is "no"
|
|
|
|
|
|
|
|
|
573 |
logger.info("Using full training parameters for GPU mode")
|
574 |
else:
|
575 |
# For Space CPU training mode, use minimal parameters
|
|
|
28 |
# Force GPU mode in Space if we're using a pre-quantized model
|
29 |
os.environ["FORCE_GPU"] = "1"
|
30 |
|
31 |
+
# Disable tokenizers parallelism warning
|
32 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
33 |
+
|
34 |
# Create triton directory to avoid warning
|
35 |
os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
|
36 |
|
|
|
527 |
|
528 |
# Create LoRA config
|
529 |
logger.info("Creating LoRA configuration")
|
530 |
+
|
531 |
+
# For pre-quantized models, we need proper target modules
|
532 |
+
default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
|
533 |
+
|
534 |
+
# For pre-quantized models, especially Unsloth ones, we need to be careful with the target modules
|
535 |
+
if is_pre_quantized:
|
536 |
+
# For Unsloth models, use special configuration
|
537 |
+
if "unsloth" in model_name.lower():
|
538 |
+
default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
539 |
+
logger.info("Using Unsloth-specific LoRA target modules")
|
540 |
+
|
541 |
lora_config_obj = LoraConfig(
|
542 |
r=lora_config.get("r", 16),
|
543 |
lora_alpha=lora_config.get("lora_alpha", 32),
|
544 |
lora_dropout=lora_config.get("lora_dropout", 0.05),
|
545 |
bias=lora_config.get("bias", "none"),
|
546 |
+
task_type="CAUSAL_LM", # Explicitly set the task type
|
547 |
+
target_modules=lora_config.get("target_modules", default_target_modules)
|
548 |
)
|
549 |
|
550 |
# Apply LoRA to model
|
|
|
552 |
model = get_peft_model(model, lora_config_obj)
|
553 |
logger.info("Successfully applied LoRA")
|
554 |
|
555 |
+
# Ensure model parameters that need gradients are properly set
|
556 |
+
if is_pre_quantized:
|
557 |
+
logger.info("Verifying gradient settings for pre-quantized model")
|
558 |
+
for name, param in model.named_parameters():
|
559 |
+
if 'lora' in name: # Only LoRA parameters should be trained
|
560 |
+
if not param.requires_grad:
|
561 |
+
logger.warning(f"LoRA parameter {name} doesn't have requires_grad=True, fixing...")
|
562 |
+
param.requires_grad = True
|
563 |
+
|
564 |
# Always use minimal batch size for HF Space CPU
|
565 |
if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
|
566 |
per_device_train_batch_size = 1
|
|
|
590 |
gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
|
591 |
fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
|
592 |
bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
|
593 |
+
# Disable gradient checkpointing for pre-quantized models as it can cause gradient issues
|
594 |
+
gradient_checkpointing = torch.cuda.is_available() and hardware_config.get("gradient_checkpointing", True) and not is_pre_quantized
|
595 |
dataloader_workers = training_config.get("dataloader_num_workers", 4)
|
596 |
+
eval_strategy = training_config.get("eval_strategy", "no")
|
597 |
load_best_model_at_end = False # Must be False when eval_strategy is "no"
|
598 |
+
|
599 |
+
if is_pre_quantized:
|
600 |
+
logger.info("Disabled gradient checkpointing for pre-quantized model to avoid gradient issues")
|
601 |
+
|
602 |
logger.info("Using full training parameters for GPU mode")
|
603 |
else:
|
604 |
# For Space CPU training mode, use minimal parameters
|