valoomba commited on
Commit
d72b603
·
verified ·
1 Parent(s): bc30e8a

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bad9034785763b086b60a73ca966d0ddc01f299b32c43137a9d43513800d579
3
  size 536991984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ba522228845174d32cacd7669d20a73f3023311098562af2f92c68ec74141f
3
  size 536991984
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0be7dc8b6c2c3408c9f271d985ac75371c63f7f6e395e7647353a0c823bf5a8
3
  size 273692564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f89bb4f12f1bc27da4b337212804d8fbe0d4d436638a7a785f9b382734e8738c
3
  size 273692564
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d3e3111608832edd4c3ec5c3f7270df3317520d5f3b484383a9ebced45d72b0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f7b4a7087f66c43babf2b65b54d2577d3e2a7ac4870488e548b8ce38a147b3
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fdaf50130575b5890af18d121235b5d095d7e70fa6a567281438c1db678114e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9933be5dcd705040f69563dec6acc92d3f8a3165edb72320c05d24fa315f93
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 128,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -207,6 +207,54 @@
207
  "loss": 1.136,
208
  "mean_token_accuracy": 0.7119275834411383,
209
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
  "logging_steps": 5,
@@ -221,12 +269,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 7.738212146404147e+18,
230
  "train_batch_size": 1,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.864,
5
  "eval_steps": 500,
6
+ "global_step": 155,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
207
  "loss": 1.136,
208
  "mean_token_accuracy": 0.7119275834411383,
209
  "step": 125
210
+ },
211
+ {
212
+ "epoch": 4.064,
213
+ "grad_norm": 0.0652015432715416,
214
+ "learning_rate": 6.968339090999186e-08,
215
+ "loss": 1.1041,
216
+ "mean_token_accuracy": 0.7163285718244665,
217
+ "step": 130
218
+ },
219
+ {
220
+ "epoch": 4.224,
221
+ "grad_norm": 0.06359368562698364,
222
+ "learning_rate": 4.498244441786675e-08,
223
+ "loss": 1.1359,
224
+ "mean_token_accuracy": 0.7156530544161797,
225
+ "step": 135
226
+ },
227
+ {
228
+ "epoch": 4.384,
229
+ "grad_norm": 0.06807275116443634,
230
+ "learning_rate": 2.547212649466568e-08,
231
+ "loss": 1.1229,
232
+ "mean_token_accuracy": 0.7130052808672189,
233
+ "step": 140
234
+ },
235
+ {
236
+ "epoch": 4.5440000000000005,
237
+ "grad_norm": 0.06558381021022797,
238
+ "learning_rate": 1.1375001769727999e-08,
239
+ "loss": 1.0983,
240
+ "mean_token_accuracy": 0.7218218572437763,
241
+ "step": 145
242
+ },
243
+ {
244
+ "epoch": 4.704,
245
+ "grad_norm": 0.06619936972856522,
246
+ "learning_rate": 2.851883682973233e-09,
247
+ "loss": 1.1149,
248
+ "mean_token_accuracy": 0.7202201712876558,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 4.864,
253
+ "grad_norm": 0.0672445297241211,
254
+ "learning_rate": 0.0,
255
+ "loss": 1.0885,
256
+ "mean_token_accuracy": 0.7229270905256271,
257
+ "step": 155
258
  }
259
  ],
260
  "logging_steps": 5,
 
269
  "should_evaluate": false,
270
  "should_log": false,
271
  "should_save": true,
272
+ "should_training_stop": true
273
  },
274
  "attributes": {}
275
  }
276
  },
277
+ "total_flos": 9.40617510529582e+18,
278
  "train_batch_size": 1,
279
  "trial_name": null,
280
  "trial_params": null