teapotllm / checkpoint-8200 /trainer_state.json
zakerytclarke's picture
Upload folder using huggingface_hub
332e336 verified
{
"best_metric": 0.07522870600223541,
"best_model_checkpoint": "./teapotllm/checkpoint-1640",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 8200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.528669357299805,
"learning_rate": 4.75e-05,
"loss": 6.5278,
"step": 410
},
{
"epoch": 1.0,
"eval_loss": 0.08746226876974106,
"eval_runtime": 4.8581,
"eval_samples_per_second": 45.285,
"eval_steps_per_second": 5.764,
"step": 410
},
{
"epoch": 2.0,
"grad_norm": 0.16966140270233154,
"learning_rate": 4.5e-05,
"loss": 0.0895,
"step": 820
},
{
"epoch": 2.0,
"eval_loss": 0.0768546611070633,
"eval_runtime": 4.8636,
"eval_samples_per_second": 45.234,
"eval_steps_per_second": 5.757,
"step": 820
},
{
"epoch": 3.0,
"grad_norm": 0.40588390827178955,
"learning_rate": 4.25e-05,
"loss": 0.1199,
"step": 1230
},
{
"epoch": 3.0,
"eval_loss": 0.07700426131486893,
"eval_runtime": 4.8598,
"eval_samples_per_second": 45.269,
"eval_steps_per_second": 5.762,
"step": 1230
},
{
"epoch": 4.0,
"grad_norm": 0.0932304635643959,
"learning_rate": 4e-05,
"loss": 0.0639,
"step": 1640
},
{
"epoch": 4.0,
"eval_loss": 0.07522870600223541,
"eval_runtime": 4.8695,
"eval_samples_per_second": 45.179,
"eval_steps_per_second": 5.75,
"step": 1640
},
{
"epoch": 5.0,
"grad_norm": 0.08160164952278137,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0692,
"step": 2050
},
{
"epoch": 5.0,
"eval_loss": 0.07598432898521423,
"eval_runtime": 4.8511,
"eval_samples_per_second": 45.351,
"eval_steps_per_second": 5.772,
"step": 2050
},
{
"epoch": 6.0,
"grad_norm": 0.24587534368038177,
"learning_rate": 3.5e-05,
"loss": 0.061,
"step": 2460
},
{
"epoch": 6.0,
"eval_loss": 0.0796995759010315,
"eval_runtime": 4.8601,
"eval_samples_per_second": 45.266,
"eval_steps_per_second": 5.761,
"step": 2460
},
{
"epoch": 7.0,
"grad_norm": 0.5349581837654114,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0395,
"step": 2870
},
{
"epoch": 7.0,
"eval_loss": 0.07927663624286652,
"eval_runtime": 4.8456,
"eval_samples_per_second": 45.402,
"eval_steps_per_second": 5.778,
"step": 2870
},
{
"epoch": 8.0,
"grad_norm": 0.19880425930023193,
"learning_rate": 3e-05,
"loss": 0.036,
"step": 3280
},
{
"epoch": 8.0,
"eval_loss": 0.0807080939412117,
"eval_runtime": 4.8744,
"eval_samples_per_second": 45.134,
"eval_steps_per_second": 5.744,
"step": 3280
},
{
"epoch": 9.0,
"grad_norm": 0.10939527302980423,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.0345,
"step": 3690
},
{
"epoch": 9.0,
"eval_loss": 0.08234285563230515,
"eval_runtime": 4.863,
"eval_samples_per_second": 45.24,
"eval_steps_per_second": 5.758,
"step": 3690
},
{
"epoch": 10.0,
"grad_norm": 0.45931077003479004,
"learning_rate": 2.5e-05,
"loss": 0.0303,
"step": 4100
},
{
"epoch": 10.0,
"eval_loss": 0.08353324979543686,
"eval_runtime": 4.8563,
"eval_samples_per_second": 45.302,
"eval_steps_per_second": 5.766,
"step": 4100
},
{
"epoch": 11.0,
"grad_norm": 43.76845932006836,
"learning_rate": 2.25e-05,
"loss": 0.0267,
"step": 4510
},
{
"epoch": 11.0,
"eval_loss": 0.08770459145307541,
"eval_runtime": 4.8587,
"eval_samples_per_second": 45.28,
"eval_steps_per_second": 5.763,
"step": 4510
},
{
"epoch": 12.0,
"grad_norm": 0.8835374116897583,
"learning_rate": 2e-05,
"loss": 0.0243,
"step": 4920
},
{
"epoch": 12.0,
"eval_loss": 0.08723071962594986,
"eval_runtime": 4.867,
"eval_samples_per_second": 45.203,
"eval_steps_per_second": 5.753,
"step": 4920
},
{
"epoch": 13.0,
"grad_norm": 0.07083321362733841,
"learning_rate": 1.75e-05,
"loss": 0.0215,
"step": 5330
},
{
"epoch": 13.0,
"eval_loss": 0.09219814836978912,
"eval_runtime": 4.8773,
"eval_samples_per_second": 45.107,
"eval_steps_per_second": 5.741,
"step": 5330
},
{
"epoch": 14.0,
"grad_norm": 0.1330474615097046,
"learning_rate": 1.5e-05,
"loss": 0.0206,
"step": 5740
},
{
"epoch": 14.0,
"eval_loss": 0.09269961714744568,
"eval_runtime": 4.8707,
"eval_samples_per_second": 45.168,
"eval_steps_per_second": 5.749,
"step": 5740
},
{
"epoch": 15.0,
"grad_norm": 0.20611341297626495,
"learning_rate": 1.25e-05,
"loss": 0.0193,
"step": 6150
},
{
"epoch": 15.0,
"eval_loss": 0.09489532560110092,
"eval_runtime": 4.8553,
"eval_samples_per_second": 45.311,
"eval_steps_per_second": 5.767,
"step": 6150
},
{
"epoch": 16.0,
"grad_norm": 24.089698791503906,
"learning_rate": 1e-05,
"loss": 0.019,
"step": 6560
},
{
"epoch": 16.0,
"eval_loss": 0.09438970685005188,
"eval_runtime": 4.8722,
"eval_samples_per_second": 45.154,
"eval_steps_per_second": 5.747,
"step": 6560
},
{
"epoch": 17.0,
"grad_norm": 0.29387062788009644,
"learning_rate": 7.5e-06,
"loss": 0.018,
"step": 6970
},
{
"epoch": 17.0,
"eval_loss": 0.097468800842762,
"eval_runtime": 4.8614,
"eval_samples_per_second": 45.254,
"eval_steps_per_second": 5.76,
"step": 6970
},
{
"epoch": 18.0,
"grad_norm": 0.06152157112956047,
"learning_rate": 5e-06,
"loss": 0.0174,
"step": 7380
},
{
"epoch": 18.0,
"eval_loss": 0.09819656610488892,
"eval_runtime": 4.8553,
"eval_samples_per_second": 45.312,
"eval_steps_per_second": 5.767,
"step": 7380
},
{
"epoch": 19.0,
"grad_norm": 0.20427298545837402,
"learning_rate": 2.5e-06,
"loss": 0.0674,
"step": 7790
},
{
"epoch": 19.0,
"eval_loss": 0.09810814261436462,
"eval_runtime": 4.8519,
"eval_samples_per_second": 45.343,
"eval_steps_per_second": 5.771,
"step": 7790
},
{
"epoch": 20.0,
"grad_norm": 12.638993263244629,
"learning_rate": 0.0,
"loss": 0.0171,
"step": 8200
},
{
"epoch": 20.0,
"eval_loss": 0.0987108051776886,
"eval_runtime": 4.8753,
"eval_samples_per_second": 45.126,
"eval_steps_per_second": 5.743,
"step": 8200
}
],
"logging_steps": 500,
"max_steps": 8200,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.12300205801472e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}