Spaces:
Build error
Build error
File size: 2,007 Bytes
5bfd071 611c848 4395ceb 611c848 4395ceb 611c848 5bfd071 518aafe 4395ceb 518aafe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
defaults:
- _self_
# Model configuration
model:
name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit"
# name: "HuggingFaceTB/SmolLM2-135M-Instruct"
max_seq_length: 2048 # Auto supports RoPE Scaling internally
provider: "openai"
dtype: null # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit: true # Use 4bit quantization to reduce memory usage
# PEFT configuration
peft:
r: 64
lora_alpha: 128
lora_dropout: 0.05
bias: "none"
use_gradient_checkpointing: "unsloth"
random_state: 3407
use_rslora: true
loftq_config: null
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
# Dataset configuration
dataset:
validation_split: 0.1 # 10% of data for validation
seed: 3407 # Random seed for dataset splitting
# Training configuration
training:
args:
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
gradient_accumulation_steps: 16
warmup_steps: 100
max_steps: 120
learning_rate: 5e-5
logging_steps: 1
save_strategy: "steps"
save_steps: 30
eval_strategy: "steps"
eval_steps: 30
save_total_limit: 2
optim: "adamw_8bit"
weight_decay: 0.01
lr_scheduler_type: "cosine_with_restarts"
seed: 3407
output_dir: "outputs"
gradient_checkpointing: true
load_best_model_at_end: true
metric_for_best_model: "eval_loss"
greater_is_better: false
sft:
dataset_num_proc: 2
packing: false
data_collator:
mlm: false
pad_to_multiple_of: 8
# Output configuration
output:
dir: "final_model"
# Training control
train: false
# Testing configuration
test: true # Whether to run testing after training
test_dataset:
name: "gaia-benchmark/GAIA"
config: "2023_level1" # Use level 1 questions for testing
split: "test" # Use test split for testing
max_samples: 3 # Number of samples to test on
max_length: 2048 # Maximum sequence length for testing
|