ameerazam08's picture
Azam
3328618
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.5347222222222223,
"global_step": 730,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 9.88425925925926e-05,
"loss": 2.0821,
"step": 10
},
{
"epoch": 0.03,
"eval_accuracy": 0.1527777761220932,
"eval_loss": 2.099255084991455,
"eval_runtime": 90.7163,
"eval_samples_per_second": 6.349,
"eval_steps_per_second": 1.587,
"step": 10
},
{
"epoch": 0.07,
"learning_rate": 9.768518518518519e-05,
"loss": 1.9991,
"step": 20
},
{
"epoch": 0.07,
"eval_accuracy": 0.2326388955116272,
"eval_loss": 2.011439085006714,
"eval_runtime": 90.9685,
"eval_samples_per_second": 6.332,
"eval_steps_per_second": 1.583,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 9.664351851851853e-05,
"loss": 2.0133,
"step": 30
},
{
"epoch": 0.1,
"eval_accuracy": 0.1822916716337204,
"eval_loss": 2.0611398220062256,
"eval_runtime": 91.1422,
"eval_samples_per_second": 6.32,
"eval_steps_per_second": 1.58,
"step": 30
},
{
"epoch": 0.14,
"learning_rate": 9.548611111111112e-05,
"loss": 1.9912,
"step": 40
},
{
"epoch": 0.14,
"eval_accuracy": 0.1944444477558136,
"eval_loss": 1.9874335527420044,
"eval_runtime": 91.2497,
"eval_samples_per_second": 6.312,
"eval_steps_per_second": 1.578,
"step": 40
},
{
"epoch": 0.17,
"learning_rate": 9.432870370370372e-05,
"loss": 1.9825,
"step": 50
},
{
"epoch": 0.17,
"eval_accuracy": 0.1875,
"eval_loss": 1.9108080863952637,
"eval_runtime": 91.363,
"eval_samples_per_second": 6.305,
"eval_steps_per_second": 1.576,
"step": 50
},
{
"epoch": 0.21,
"learning_rate": 9.31712962962963e-05,
"loss": 1.8281,
"step": 60
},
{
"epoch": 0.21,
"eval_accuracy": 0.2760416567325592,
"eval_loss": 1.8094313144683838,
"eval_runtime": 90.7999,
"eval_samples_per_second": 6.344,
"eval_steps_per_second": 1.586,
"step": 60
},
{
"epoch": 0.24,
"learning_rate": 9.201388888888889e-05,
"loss": 1.7768,
"step": 70
},
{
"epoch": 0.24,
"eval_accuracy": 0.28125,
"eval_loss": 1.8212822675704956,
"eval_runtime": 90.3717,
"eval_samples_per_second": 6.374,
"eval_steps_per_second": 1.593,
"step": 70
},
{
"epoch": 0.28,
"learning_rate": 9.085648148148149e-05,
"loss": 1.8747,
"step": 80
},
{
"epoch": 0.28,
"eval_accuracy": 0.2916666567325592,
"eval_loss": 1.819143533706665,
"eval_runtime": 89.6205,
"eval_samples_per_second": 6.427,
"eval_steps_per_second": 1.607,
"step": 80
},
{
"epoch": 0.31,
"learning_rate": 8.969907407407407e-05,
"loss": 1.8258,
"step": 90
},
{
"epoch": 0.31,
"eval_accuracy": 0.2673611044883728,
"eval_loss": 1.8833109140396118,
"eval_runtime": 90.3547,
"eval_samples_per_second": 6.375,
"eval_steps_per_second": 1.594,
"step": 90
},
{
"epoch": 0.35,
"learning_rate": 8.854166666666667e-05,
"loss": 1.8466,
"step": 100
},
{
"epoch": 0.35,
"eval_accuracy": 0.3020833432674408,
"eval_loss": 1.794202208518982,
"eval_runtime": 89.9211,
"eval_samples_per_second": 6.406,
"eval_steps_per_second": 1.601,
"step": 100
},
{
"epoch": 0.38,
"learning_rate": 8.738425925925926e-05,
"loss": 1.7949,
"step": 110
},
{
"epoch": 0.38,
"eval_accuracy": 0.234375,
"eval_loss": 1.737123727798462,
"eval_runtime": 90.2733,
"eval_samples_per_second": 6.381,
"eval_steps_per_second": 1.595,
"step": 110
},
{
"epoch": 0.42,
"learning_rate": 8.622685185185186e-05,
"loss": 1.6993,
"step": 120
},
{
"epoch": 0.42,
"eval_accuracy": 0.3333333432674408,
"eval_loss": 1.714468240737915,
"eval_runtime": 90.6521,
"eval_samples_per_second": 6.354,
"eval_steps_per_second": 1.588,
"step": 120
},
{
"epoch": 0.45,
"learning_rate": 8.506944444444444e-05,
"loss": 1.9949,
"step": 130
},
{
"epoch": 0.45,
"eval_accuracy": 0.3628472089767456,
"eval_loss": 1.7169595956802368,
"eval_runtime": 89.657,
"eval_samples_per_second": 6.424,
"eval_steps_per_second": 1.606,
"step": 130
},
{
"epoch": 0.49,
"learning_rate": 8.391203703703704e-05,
"loss": 1.6402,
"step": 140
},
{
"epoch": 0.49,
"eval_accuracy": 0.3506944477558136,
"eval_loss": 1.7289636135101318,
"eval_runtime": 90.3787,
"eval_samples_per_second": 6.373,
"eval_steps_per_second": 1.593,
"step": 140
},
{
"epoch": 0.52,
"learning_rate": 8.275462962962963e-05,
"loss": 1.7599,
"step": 150
},
{
"epoch": 0.52,
"eval_accuracy": 0.3090277910232544,
"eval_loss": 1.7477116584777832,
"eval_runtime": 90.5736,
"eval_samples_per_second": 6.359,
"eval_steps_per_second": 1.59,
"step": 150
},
{
"epoch": 0.56,
"learning_rate": 8.159722222222223e-05,
"loss": 1.5776,
"step": 160
},
{
"epoch": 0.56,
"eval_accuracy": 0.3715277910232544,
"eval_loss": 1.6158109903335571,
"eval_runtime": 89.7843,
"eval_samples_per_second": 6.415,
"eval_steps_per_second": 1.604,
"step": 160
},
{
"epoch": 0.59,
"learning_rate": 8.043981481481482e-05,
"loss": 1.7169,
"step": 170
},
{
"epoch": 0.59,
"eval_accuracy": 0.3663194477558136,
"eval_loss": 1.6075005531311035,
"eval_runtime": 90.228,
"eval_samples_per_second": 6.384,
"eval_steps_per_second": 1.596,
"step": 170
},
{
"epoch": 0.62,
"learning_rate": 7.928240740740742e-05,
"loss": 1.653,
"step": 180
},
{
"epoch": 0.62,
"eval_accuracy": 0.4201388955116272,
"eval_loss": 1.524334192276001,
"eval_runtime": 91.575,
"eval_samples_per_second": 6.29,
"eval_steps_per_second": 1.572,
"step": 180
},
{
"epoch": 0.66,
"learning_rate": 7.8125e-05,
"loss": 1.5733,
"step": 190
},
{
"epoch": 0.66,
"eval_accuracy": 0.359375,
"eval_loss": 1.7072927951812744,
"eval_runtime": 90.2896,
"eval_samples_per_second": 6.379,
"eval_steps_per_second": 1.595,
"step": 190
},
{
"epoch": 0.69,
"learning_rate": 7.69675925925926e-05,
"loss": 1.6704,
"step": 200
},
{
"epoch": 0.69,
"eval_accuracy": 0.4722222089767456,
"eval_loss": 1.4267817735671997,
"eval_runtime": 90.57,
"eval_samples_per_second": 6.36,
"eval_steps_per_second": 1.59,
"step": 200
},
{
"epoch": 0.73,
"learning_rate": 7.581018518518519e-05,
"loss": 1.4389,
"step": 210
},
{
"epoch": 0.73,
"eval_accuracy": 0.3940972089767456,
"eval_loss": 1.7218824625015259,
"eval_runtime": 89.8269,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 1.603,
"step": 210
},
{
"epoch": 0.76,
"learning_rate": 7.465277777777779e-05,
"loss": 1.5342,
"step": 220
},
{
"epoch": 0.76,
"eval_accuracy": 0.3940972089767456,
"eval_loss": 1.5133135318756104,
"eval_runtime": 89.6337,
"eval_samples_per_second": 6.426,
"eval_steps_per_second": 1.607,
"step": 220
},
{
"epoch": 0.8,
"learning_rate": 7.349537037037037e-05,
"loss": 1.5165,
"step": 230
},
{
"epoch": 0.8,
"eval_accuracy": 0.4322916567325592,
"eval_loss": 1.4692301750183105,
"eval_runtime": 90.2666,
"eval_samples_per_second": 6.381,
"eval_steps_per_second": 1.595,
"step": 230
},
{
"epoch": 0.83,
"learning_rate": 7.233796296296297e-05,
"loss": 1.4743,
"step": 240
},
{
"epoch": 0.83,
"eval_accuracy": 0.3975694477558136,
"eval_loss": 1.5509642362594604,
"eval_runtime": 90.8282,
"eval_samples_per_second": 6.342,
"eval_steps_per_second": 1.585,
"step": 240
},
{
"epoch": 0.87,
"learning_rate": 7.118055555555556e-05,
"loss": 1.4903,
"step": 250
},
{
"epoch": 0.87,
"eval_accuracy": 0.4739583432674408,
"eval_loss": 1.3426711559295654,
"eval_runtime": 91.1121,
"eval_samples_per_second": 6.322,
"eval_steps_per_second": 1.58,
"step": 250
},
{
"epoch": 0.9,
"learning_rate": 7.002314814814816e-05,
"loss": 1.2193,
"step": 260
},
{
"epoch": 0.9,
"eval_accuracy": 0.5329861044883728,
"eval_loss": 1.3175561428070068,
"eval_runtime": 89.5523,
"eval_samples_per_second": 6.432,
"eval_steps_per_second": 1.608,
"step": 260
},
{
"epoch": 0.94,
"learning_rate": 6.886574074074074e-05,
"loss": 1.56,
"step": 270
},
{
"epoch": 0.94,
"eval_accuracy": 0.4635416567325592,
"eval_loss": 1.4102365970611572,
"eval_runtime": 89.6275,
"eval_samples_per_second": 6.427,
"eval_steps_per_second": 1.607,
"step": 270
},
{
"epoch": 0.97,
"learning_rate": 6.770833333333334e-05,
"loss": 1.4563,
"step": 280
},
{
"epoch": 0.97,
"eval_accuracy": 0.5347222089767456,
"eval_loss": 1.291764736175537,
"eval_runtime": 89.5795,
"eval_samples_per_second": 6.43,
"eval_steps_per_second": 1.608,
"step": 280
},
{
"epoch": 1.01,
"learning_rate": 6.655092592592593e-05,
"loss": 1.3766,
"step": 290
},
{
"epoch": 1.01,
"eval_accuracy": 0.4305555522441864,
"eval_loss": 1.5331988334655762,
"eval_runtime": 89.8777,
"eval_samples_per_second": 6.409,
"eval_steps_per_second": 1.602,
"step": 290
},
{
"epoch": 1.04,
"learning_rate": 6.539351851851853e-05,
"loss": 1.218,
"step": 300
},
{
"epoch": 1.04,
"eval_accuracy": 0.4861111044883728,
"eval_loss": 1.317336916923523,
"eval_runtime": 91.03,
"eval_samples_per_second": 6.328,
"eval_steps_per_second": 1.582,
"step": 300
},
{
"epoch": 1.08,
"learning_rate": 6.423611111111112e-05,
"loss": 1.3211,
"step": 310
},
{
"epoch": 1.08,
"eval_accuracy": 0.5034722089767456,
"eval_loss": 1.263780951499939,
"eval_runtime": 90.5509,
"eval_samples_per_second": 6.361,
"eval_steps_per_second": 1.59,
"step": 310
},
{
"epoch": 1.11,
"learning_rate": 6.307870370370372e-05,
"loss": 1.1933,
"step": 320
},
{
"epoch": 1.11,
"eval_accuracy": 0.4982638955116272,
"eval_loss": 1.3839294910430908,
"eval_runtime": 90.7629,
"eval_samples_per_second": 6.346,
"eval_steps_per_second": 1.587,
"step": 320
},
{
"epoch": 1.15,
"learning_rate": 6.192129629629629e-05,
"loss": 1.2803,
"step": 330
},
{
"epoch": 1.15,
"eval_accuracy": 0.5121527910232544,
"eval_loss": 1.2145192623138428,
"eval_runtime": 90.0175,
"eval_samples_per_second": 6.399,
"eval_steps_per_second": 1.6,
"step": 330
},
{
"epoch": 1.18,
"learning_rate": 6.076388888888889e-05,
"loss": 1.1112,
"step": 340
},
{
"epoch": 1.18,
"eval_accuracy": 0.5711805820465088,
"eval_loss": 1.1930283308029175,
"eval_runtime": 89.5796,
"eval_samples_per_second": 6.43,
"eval_steps_per_second": 1.608,
"step": 340
},
{
"epoch": 1.22,
"learning_rate": 5.960648148148148e-05,
"loss": 1.0907,
"step": 350
},
{
"epoch": 1.22,
"eval_accuracy": 0.5815972089767456,
"eval_loss": 1.145843505859375,
"eval_runtime": 90.3053,
"eval_samples_per_second": 6.378,
"eval_steps_per_second": 1.595,
"step": 350
},
{
"epoch": 1.25,
"learning_rate": 5.844907407407407e-05,
"loss": 1.013,
"step": 360
},
{
"epoch": 1.25,
"eval_accuracy": 0.6180555820465088,
"eval_loss": 1.0559839010238647,
"eval_runtime": 89.8917,
"eval_samples_per_second": 6.408,
"eval_steps_per_second": 1.602,
"step": 360
},
{
"epoch": 1.28,
"learning_rate": 5.7291666666666666e-05,
"loss": 0.9308,
"step": 370
},
{
"epoch": 1.28,
"eval_accuracy": 0.6163194179534912,
"eval_loss": 1.0486806631088257,
"eval_runtime": 90.7378,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 1.587,
"step": 370
},
{
"epoch": 1.32,
"learning_rate": 5.613425925925926e-05,
"loss": 1.3225,
"step": 380
},
{
"epoch": 1.32,
"eval_accuracy": 0.6145833134651184,
"eval_loss": 1.050321340560913,
"eval_runtime": 89.4251,
"eval_samples_per_second": 6.441,
"eval_steps_per_second": 1.61,
"step": 380
},
{
"epoch": 1.35,
"learning_rate": 5.497685185185185e-05,
"loss": 1.0774,
"step": 390
},
{
"epoch": 1.35,
"eval_accuracy": 0.5434027910232544,
"eval_loss": 1.25161612033844,
"eval_runtime": 89.4532,
"eval_samples_per_second": 6.439,
"eval_steps_per_second": 1.61,
"step": 390
},
{
"epoch": 1.39,
"learning_rate": 5.3819444444444444e-05,
"loss": 1.2251,
"step": 400
},
{
"epoch": 1.39,
"eval_accuracy": 0.609375,
"eval_loss": 1.076072335243225,
"eval_runtime": 90.2082,
"eval_samples_per_second": 6.385,
"eval_steps_per_second": 1.596,
"step": 400
},
{
"epoch": 1.42,
"learning_rate": 5.266203703703704e-05,
"loss": 0.9848,
"step": 410
},
{
"epoch": 1.42,
"eval_accuracy": 0.6440972089767456,
"eval_loss": 1.0271832942962646,
"eval_runtime": 89.9601,
"eval_samples_per_second": 6.403,
"eval_steps_per_second": 1.601,
"step": 410
},
{
"epoch": 1.46,
"learning_rate": 5.150462962962963e-05,
"loss": 0.9913,
"step": 420
},
{
"epoch": 1.46,
"eval_accuracy": 0.6041666865348816,
"eval_loss": 1.0503506660461426,
"eval_runtime": 89.8786,
"eval_samples_per_second": 6.409,
"eval_steps_per_second": 1.602,
"step": 420
},
{
"epoch": 1.49,
"learning_rate": 5.034722222222222e-05,
"loss": 0.9081,
"step": 430
},
{
"epoch": 1.49,
"eval_accuracy": 0.6666666865348816,
"eval_loss": 0.9094821214675903,
"eval_runtime": 91.6907,
"eval_samples_per_second": 6.282,
"eval_steps_per_second": 1.57,
"step": 430
},
{
"epoch": 1.53,
"learning_rate": 4.9189814814814815e-05,
"loss": 0.8339,
"step": 440
},
{
"epoch": 1.53,
"eval_accuracy": 0.6631944179534912,
"eval_loss": 0.9030921459197998,
"eval_runtime": 90.547,
"eval_samples_per_second": 6.361,
"eval_steps_per_second": 1.59,
"step": 440
},
{
"epoch": 1.56,
"learning_rate": 4.803240740740741e-05,
"loss": 0.8893,
"step": 450
},
{
"epoch": 1.56,
"eval_accuracy": 0.6423611044883728,
"eval_loss": 0.9375382661819458,
"eval_runtime": 90.2281,
"eval_samples_per_second": 6.384,
"eval_steps_per_second": 1.596,
"step": 450
},
{
"epoch": 1.6,
"learning_rate": 4.6875e-05,
"loss": 0.9362,
"step": 460
},
{
"epoch": 1.6,
"eval_accuracy": 0.6197916865348816,
"eval_loss": 0.9755175113677979,
"eval_runtime": 91.0163,
"eval_samples_per_second": 6.329,
"eval_steps_per_second": 1.582,
"step": 460
},
{
"epoch": 1.63,
"learning_rate": 4.5717592592592594e-05,
"loss": 0.835,
"step": 470
},
{
"epoch": 1.63,
"eval_accuracy": 0.6545138955116272,
"eval_loss": 0.9399816989898682,
"eval_runtime": 89.8555,
"eval_samples_per_second": 6.41,
"eval_steps_per_second": 1.603,
"step": 470
},
{
"epoch": 1.67,
"learning_rate": 4.456018518518519e-05,
"loss": 0.6733,
"step": 480
},
{
"epoch": 1.67,
"eval_accuracy": 0.6927083134651184,
"eval_loss": 0.8480438590049744,
"eval_runtime": 89.8841,
"eval_samples_per_second": 6.408,
"eval_steps_per_second": 1.602,
"step": 480
},
{
"epoch": 1.7,
"learning_rate": 4.340277777777778e-05,
"loss": 1.0115,
"step": 490
},
{
"epoch": 1.7,
"eval_accuracy": 0.6840277910232544,
"eval_loss": 0.8332173824310303,
"eval_runtime": 91.0011,
"eval_samples_per_second": 6.33,
"eval_steps_per_second": 1.582,
"step": 490
},
{
"epoch": 1.74,
"learning_rate": 4.224537037037037e-05,
"loss": 0.7473,
"step": 500
},
{
"epoch": 1.74,
"eval_accuracy": 0.6475694179534912,
"eval_loss": 0.9618370532989502,
"eval_runtime": 90.2515,
"eval_samples_per_second": 6.382,
"eval_steps_per_second": 1.596,
"step": 500
},
{
"epoch": 1.77,
"learning_rate": 4.1087962962962965e-05,
"loss": 0.8355,
"step": 510
},
{
"epoch": 1.77,
"eval_accuracy": 0.6840277910232544,
"eval_loss": 0.8845413327217102,
"eval_runtime": 89.7252,
"eval_samples_per_second": 6.42,
"eval_steps_per_second": 1.605,
"step": 510
},
{
"epoch": 1.81,
"learning_rate": 3.993055555555556e-05,
"loss": 0.8487,
"step": 520
},
{
"epoch": 1.81,
"eval_accuracy": 0.6875,
"eval_loss": 0.8297374844551086,
"eval_runtime": 91.9685,
"eval_samples_per_second": 6.263,
"eval_steps_per_second": 1.566,
"step": 520
},
{
"epoch": 1.84,
"learning_rate": 3.877314814814815e-05,
"loss": 0.6038,
"step": 530
},
{
"epoch": 1.84,
"eval_accuracy": 0.6493055820465088,
"eval_loss": 0.9539130330085754,
"eval_runtime": 90.0856,
"eval_samples_per_second": 6.394,
"eval_steps_per_second": 1.598,
"step": 530
},
{
"epoch": 1.88,
"learning_rate": 3.7615740740740744e-05,
"loss": 0.75,
"step": 540
},
{
"epoch": 1.88,
"eval_accuracy": 0.6857638955116272,
"eval_loss": 0.8455307483673096,
"eval_runtime": 89.5522,
"eval_samples_per_second": 6.432,
"eval_steps_per_second": 1.608,
"step": 540
},
{
"epoch": 1.91,
"learning_rate": 3.6458333333333336e-05,
"loss": 0.8561,
"step": 550
},
{
"epoch": 1.91,
"eval_accuracy": 0.7013888955116272,
"eval_loss": 0.7813519239425659,
"eval_runtime": 90.1129,
"eval_samples_per_second": 6.392,
"eval_steps_per_second": 1.598,
"step": 550
},
{
"epoch": 1.94,
"learning_rate": 3.530092592592593e-05,
"loss": 0.7552,
"step": 560
},
{
"epoch": 1.94,
"eval_accuracy": 0.6822916865348816,
"eval_loss": 0.8651251196861267,
"eval_runtime": 89.9146,
"eval_samples_per_second": 6.406,
"eval_steps_per_second": 1.602,
"step": 560
},
{
"epoch": 1.98,
"learning_rate": 3.414351851851852e-05,
"loss": 0.6972,
"step": 570
},
{
"epoch": 1.98,
"eval_accuracy": 0.71875,
"eval_loss": 0.7325252890586853,
"eval_runtime": 90.7375,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 1.587,
"step": 570
},
{
"epoch": 2.01,
"learning_rate": 3.2986111111111115e-05,
"loss": 0.7483,
"step": 580
},
{
"epoch": 2.01,
"eval_accuracy": 0.7690972089767456,
"eval_loss": 0.6722133159637451,
"eval_runtime": 90.3148,
"eval_samples_per_second": 6.378,
"eval_steps_per_second": 1.594,
"step": 580
},
{
"epoch": 2.05,
"learning_rate": 3.182870370370371e-05,
"loss": 0.5419,
"step": 590
},
{
"epoch": 2.05,
"eval_accuracy": 0.7326388955116272,
"eval_loss": 0.7046216130256653,
"eval_runtime": 92.1898,
"eval_samples_per_second": 6.248,
"eval_steps_per_second": 1.562,
"step": 590
},
{
"epoch": 2.08,
"learning_rate": 3.06712962962963e-05,
"loss": 0.5203,
"step": 600
},
{
"epoch": 2.08,
"eval_accuracy": 0.7326388955116272,
"eval_loss": 0.7062063813209534,
"eval_runtime": 90.2661,
"eval_samples_per_second": 6.381,
"eval_steps_per_second": 1.595,
"step": 600
},
{
"epoch": 2.12,
"learning_rate": 2.951388888888889e-05,
"loss": 0.5235,
"step": 610
},
{
"epoch": 2.12,
"eval_accuracy": 0.7534722089767456,
"eval_loss": 0.6795992255210876,
"eval_runtime": 91.0148,
"eval_samples_per_second": 6.329,
"eval_steps_per_second": 1.582,
"step": 610
},
{
"epoch": 2.15,
"learning_rate": 2.8356481481481483e-05,
"loss": 0.514,
"step": 620
},
{
"epoch": 2.15,
"eval_accuracy": 0.7204861044883728,
"eval_loss": 0.746653139591217,
"eval_runtime": 89.8044,
"eval_samples_per_second": 6.414,
"eval_steps_per_second": 1.603,
"step": 620
},
{
"epoch": 2.19,
"learning_rate": 2.7199074074074076e-05,
"loss": 0.5402,
"step": 630
},
{
"epoch": 2.19,
"eval_accuracy": 0.7447916865348816,
"eval_loss": 0.716274619102478,
"eval_runtime": 90.8833,
"eval_samples_per_second": 6.338,
"eval_steps_per_second": 1.584,
"step": 630
},
{
"epoch": 2.22,
"learning_rate": 2.604166666666667e-05,
"loss": 0.7235,
"step": 640
},
{
"epoch": 2.22,
"eval_accuracy": 0.7222222089767456,
"eval_loss": 0.7545790076255798,
"eval_runtime": 89.6633,
"eval_samples_per_second": 6.424,
"eval_steps_per_second": 1.606,
"step": 640
},
{
"epoch": 2.26,
"learning_rate": 2.488425925925926e-05,
"loss": 0.551,
"step": 650
},
{
"epoch": 2.26,
"eval_accuracy": 0.7534722089767456,
"eval_loss": 0.6994116902351379,
"eval_runtime": 89.8085,
"eval_samples_per_second": 6.414,
"eval_steps_per_second": 1.603,
"step": 650
},
{
"epoch": 2.29,
"learning_rate": 2.3726851851851854e-05,
"loss": 0.5769,
"step": 660
},
{
"epoch": 2.29,
"eval_accuracy": 0.7534722089767456,
"eval_loss": 0.7151244282722473,
"eval_runtime": 90.9508,
"eval_samples_per_second": 6.333,
"eval_steps_per_second": 1.583,
"step": 660
},
{
"epoch": 2.33,
"learning_rate": 2.2569444444444447e-05,
"loss": 0.5501,
"step": 670
},
{
"epoch": 2.33,
"eval_accuracy": 0.7604166865348816,
"eval_loss": 0.695513129234314,
"eval_runtime": 90.6092,
"eval_samples_per_second": 6.357,
"eval_steps_per_second": 1.589,
"step": 670
},
{
"epoch": 2.36,
"learning_rate": 2.141203703703704e-05,
"loss": 0.5416,
"step": 680
},
{
"epoch": 2.36,
"eval_accuracy": 0.7725694179534912,
"eval_loss": 0.6533116102218628,
"eval_runtime": 89.82,
"eval_samples_per_second": 6.413,
"eval_steps_per_second": 1.603,
"step": 680
},
{
"epoch": 2.4,
"learning_rate": 2.0254629629629632e-05,
"loss": 0.5452,
"step": 690
},
{
"epoch": 2.4,
"eval_accuracy": 0.7777777910232544,
"eval_loss": 0.6232606172561646,
"eval_runtime": 90.4116,
"eval_samples_per_second": 6.371,
"eval_steps_per_second": 1.593,
"step": 690
},
{
"epoch": 2.43,
"learning_rate": 1.9097222222222222e-05,
"loss": 0.8518,
"step": 700
},
{
"epoch": 2.43,
"eval_accuracy": 0.7777777910232544,
"eval_loss": 0.6136298179626465,
"eval_runtime": 90.8648,
"eval_samples_per_second": 6.339,
"eval_steps_per_second": 1.585,
"step": 700
},
{
"epoch": 2.47,
"learning_rate": 1.7939814814814815e-05,
"loss": 0.3372,
"step": 710
},
{
"epoch": 2.47,
"eval_accuracy": 0.7986111044883728,
"eval_loss": 0.5700623393058777,
"eval_runtime": 90.1438,
"eval_samples_per_second": 6.39,
"eval_steps_per_second": 1.597,
"step": 710
},
{
"epoch": 2.5,
"learning_rate": 1.6782407407407408e-05,
"loss": 0.4488,
"step": 720
},
{
"epoch": 2.5,
"eval_accuracy": 0.7847222089767456,
"eval_loss": 0.5789040327072144,
"eval_runtime": 90.7633,
"eval_samples_per_second": 6.346,
"eval_steps_per_second": 1.587,
"step": 720
},
{
"epoch": 2.53,
"learning_rate": 1.5625e-05,
"loss": 0.3977,
"step": 730
},
{
"epoch": 2.53,
"eval_accuracy": 0.7829861044883728,
"eval_loss": 0.5748720169067383,
"eval_runtime": 89.6735,
"eval_samples_per_second": 6.423,
"eval_steps_per_second": 1.606,
"step": 730
}
],
"max_steps": 864,
"num_train_epochs": 3,
"total_flos": 2.1573660231214095e+18,
"trial_name": null,
"trial_params": null
}