mtasic85 commited on
Commit
199602f
·
1 Parent(s): cc31c8f

pretrain core 4

Browse files
Files changed (1) hide show
  1. scripts/pretrain_core_model_4.yaml +20 -4
scripts/pretrain_core_model_4.yaml CHANGED
@@ -124,15 +124,31 @@ eval:
124
  # - 0.9
125
  # - 0.999
126
 
 
 
 
 
 
 
 
 
 
 
127
  optimizer:
128
- class_path: sophia_opt.SophiaG
 
 
 
 
129
  init_args:
 
130
  lr: 1e-4
 
 
 
131
  betas:
132
  - 0.9
133
- - 0.95
134
- rho: 0.05
135
- weight_decay: 0.1
136
 
137
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
138
  devices: auto
 
124
  # - 0.9
125
  # - 0.999
126
 
127
+ # optimizer:
128
+ # class_path: sophia_opt.SophiaG
129
+ # init_args:
130
+ # lr: 1e-4
131
+ # betas:
132
+ # - 0.9
133
+ # - 0.95
134
+ # rho: 0.05
135
+ # weight_decay: 0.1
136
+
137
  optimizer:
138
+ # class_path: torch.optim.AdamW
139
+ class_path: torchao.prototype.low_bit_optim.AdamW8bit
140
+ # class_path: torchao.prototype.low_bit_optim.AdamW4bit
141
+ # class_path: bitsandbytes.optim.AdamW8bit
142
+ # class_path: bitsandbytes.optim.PagedAdamW8bit
143
  init_args:
144
+ # (type: float, default: 0.001)
145
  lr: 1e-4
146
+ # (type: float, default: 0.01)
147
+ weight_decay: 0.01
148
+ # (type: tuple, default: (0.9,0.999))
149
  betas:
150
  - 0.9
151
+ - 0.999
 
 
152
 
153
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
154
  devices: auto