George-API commited on
Commit
8d5f419
·
verified ·
1 Parent(s): e278512

Add: Configuration file for training

Browse files
Files changed (1) hide show
  1. transformers_config.json +72 -0
transformers_config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
4
+ "use_cache": false,
5
+ "rope_scaling": {
6
+ "type": "dynamic",
7
+ "factor": 2.0
8
+ }
9
+ },
10
+ "training_config": {
11
+ "num_train_epochs": 3,
12
+ "per_device_train_batch_size": 2,
13
+ "gradient_accumulation_steps": 4,
14
+ "learning_rate": 2e-5,
15
+ "lr_scheduler_type": "cosine",
16
+ "warmup_ratio": 0.03,
17
+ "weight_decay": 0.01,
18
+ "optim": "adamw_torch",
19
+ "max_grad_norm": 0.3,
20
+ "max_seq_length": 2048,
21
+ "logging_steps": 10,
22
+ "save_steps": 200,
23
+ "save_total_limit": 3,
24
+ "evaluation_strategy": "steps",
25
+ "eval_steps": 200,
26
+ "load_best_model_at_end": true,
27
+ "output_dir": "fine_tuned_model",
28
+ "disable_tqdm": false,
29
+ "report_to": ["tensorboard"],
30
+ "logging_first_step": true
31
+ },
32
+ "hardware_config": {
33
+ "fp16": true,
34
+ "bf16": false,
35
+ "gradient_checkpointing": true,
36
+ "device_map": "auto",
37
+ "use_flash_attention": false,
38
+ "attn_implementation": "eager"
39
+ },
40
+ "quantization_config": {
41
+ "load_in_4bit": true,
42
+ "bnb_4bit_compute_dtype": "float16",
43
+ "bnb_4bit_quant_type": "nf4",
44
+ "bnb_4bit_use_double_quant": true
45
+ },
46
+ "lora_config": {
47
+ "r": 16,
48
+ "lora_alpha": 32,
49
+ "lora_dropout": 0.05,
50
+ "bias": "none",
51
+ "target_modules": [
52
+ "q_proj",
53
+ "k_proj",
54
+ "v_proj",
55
+ "o_proj",
56
+ "gate_proj",
57
+ "up_proj",
58
+ "down_proj"
59
+ ]
60
+ },
61
+ "dataset_config": {
62
+ "sort_by_field": "prompt_number",
63
+ "sort_direction": "ascending",
64
+ "max_tokens": 2048,
65
+ "text_field": "conversations",
66
+ "shuffle_seed": 42,
67
+ "training_phase_only": true,
68
+ "pre_tokenized": true,
69
+ "input_ids_field": "input_ids",
70
+ "skip_tokenization": true
71
+ }
72
+ }