mtasic85 commited on
Commit
a412c7b
·
1 Parent(s): 7c5858c

pretrain core 4

Browse files
Files changed (1) hide show
  1. scripts/pretrain_core_model_4.yaml +5 -21
scripts/pretrain_core_model_4.yaml CHANGED
@@ -60,7 +60,7 @@ train:
60
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
61
  # global_batch_size: 512
62
  # global_batch_size: 256
63
- global_batch_size: 64
64
 
65
  # Number of samples per data-parallel rank (type: int, default: 4)
66
  micro_batch_size: 1
@@ -124,31 +124,15 @@ eval:
124
  # - 0.9
125
  # - 0.999
126
 
127
- # optimizer:
128
- # class_path: sophia_opt.SophiaG
129
- # init_args:
130
- # lr: 1e-4
131
- # betas:
132
- # - 0.9
133
- # - 0.95
134
- # rho: 0.05
135
- # weight_decay: 0.1
136
-
137
  optimizer:
138
- # class_path: torch.optim.AdamW
139
- # class_path: torchao.prototype.low_bit_optim.AdamW8bit
140
- # class_path: torchao.prototype.low_bit_optim.AdamW4bit
141
- # class_path: bitsandbytes.optim.AdamW8bit
142
- class_path: bitsandbytes.optim.PagedAdamW8bit
143
  init_args:
144
- # (type: float, default: 0.001)
145
  lr: 1e-4
146
- # (type: float, default: 0.01)
147
- weight_decay: 0.01
148
- # (type: tuple, default: (0.9,0.999))
149
  betas:
150
  - 0.9
151
- - 0.999
 
 
152
 
153
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
154
  devices: auto
 
60
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
61
  # global_batch_size: 512
62
  # global_batch_size: 256
63
+ global_batch_size: 32
64
 
65
  # Number of samples per data-parallel rank (type: int, default: 4)
66
  micro_batch_size: 1
 
124
  # - 0.9
125
  # - 0.999
126
 
 
 
 
 
 
 
 
 
 
 
127
  optimizer:
128
+ class_path: sophia_opt.SophiaG
 
 
 
 
129
  init_args:
 
130
  lr: 1e-4
 
 
 
131
  betas:
132
  - 0.9
133
+ - 0.95
134
+ rho: 0.05
135
+ weight_decay: 0.1
136
 
137
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
138
  devices: auto