|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.5347222222222223, |
|
"global_step": 730, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.88425925925926e-05, |
|
"loss": 2.0821, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.1527777761220932, |
|
"eval_loss": 2.099255084991455, |
|
"eval_runtime": 90.7163, |
|
"eval_samples_per_second": 6.349, |
|
"eval_steps_per_second": 1.587, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.768518518518519e-05, |
|
"loss": 1.9991, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.2326388955116272, |
|
"eval_loss": 2.011439085006714, |
|
"eval_runtime": 90.9685, |
|
"eval_samples_per_second": 6.332, |
|
"eval_steps_per_second": 1.583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.664351851851853e-05, |
|
"loss": 2.0133, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.1822916716337204, |
|
"eval_loss": 2.0611398220062256, |
|
"eval_runtime": 91.1422, |
|
"eval_samples_per_second": 6.32, |
|
"eval_steps_per_second": 1.58, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.548611111111112e-05, |
|
"loss": 1.9912, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.1944444477558136, |
|
"eval_loss": 1.9874335527420044, |
|
"eval_runtime": 91.2497, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 1.578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.432870370370372e-05, |
|
"loss": 1.9825, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.1875, |
|
"eval_loss": 1.9108080863952637, |
|
"eval_runtime": 91.363, |
|
"eval_samples_per_second": 6.305, |
|
"eval_steps_per_second": 1.576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.31712962962963e-05, |
|
"loss": 1.8281, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.2760416567325592, |
|
"eval_loss": 1.8094313144683838, |
|
"eval_runtime": 90.7999, |
|
"eval_samples_per_second": 6.344, |
|
"eval_steps_per_second": 1.586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.201388888888889e-05, |
|
"loss": 1.7768, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.28125, |
|
"eval_loss": 1.8212822675704956, |
|
"eval_runtime": 90.3717, |
|
"eval_samples_per_second": 6.374, |
|
"eval_steps_per_second": 1.593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.085648148148149e-05, |
|
"loss": 1.8747, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.2916666567325592, |
|
"eval_loss": 1.819143533706665, |
|
"eval_runtime": 89.6205, |
|
"eval_samples_per_second": 6.427, |
|
"eval_steps_per_second": 1.607, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.969907407407407e-05, |
|
"loss": 1.8258, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.2673611044883728, |
|
"eval_loss": 1.8833109140396118, |
|
"eval_runtime": 90.3547, |
|
"eval_samples_per_second": 6.375, |
|
"eval_steps_per_second": 1.594, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.854166666666667e-05, |
|
"loss": 1.8466, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.3020833432674408, |
|
"eval_loss": 1.794202208518982, |
|
"eval_runtime": 89.9211, |
|
"eval_samples_per_second": 6.406, |
|
"eval_steps_per_second": 1.601, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.738425925925926e-05, |
|
"loss": 1.7949, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.234375, |
|
"eval_loss": 1.737123727798462, |
|
"eval_runtime": 90.2733, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 1.595, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.622685185185186e-05, |
|
"loss": 1.6993, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.3333333432674408, |
|
"eval_loss": 1.714468240737915, |
|
"eval_runtime": 90.6521, |
|
"eval_samples_per_second": 6.354, |
|
"eval_steps_per_second": 1.588, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.506944444444444e-05, |
|
"loss": 1.9949, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.3628472089767456, |
|
"eval_loss": 1.7169595956802368, |
|
"eval_runtime": 89.657, |
|
"eval_samples_per_second": 6.424, |
|
"eval_steps_per_second": 1.606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.391203703703704e-05, |
|
"loss": 1.6402, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.3506944477558136, |
|
"eval_loss": 1.7289636135101318, |
|
"eval_runtime": 90.3787, |
|
"eval_samples_per_second": 6.373, |
|
"eval_steps_per_second": 1.593, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.275462962962963e-05, |
|
"loss": 1.7599, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.3090277910232544, |
|
"eval_loss": 1.7477116584777832, |
|
"eval_runtime": 90.5736, |
|
"eval_samples_per_second": 6.359, |
|
"eval_steps_per_second": 1.59, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.159722222222223e-05, |
|
"loss": 1.5776, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.3715277910232544, |
|
"eval_loss": 1.6158109903335571, |
|
"eval_runtime": 89.7843, |
|
"eval_samples_per_second": 6.415, |
|
"eval_steps_per_second": 1.604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.043981481481482e-05, |
|
"loss": 1.7169, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.3663194477558136, |
|
"eval_loss": 1.6075005531311035, |
|
"eval_runtime": 90.228, |
|
"eval_samples_per_second": 6.384, |
|
"eval_steps_per_second": 1.596, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.928240740740742e-05, |
|
"loss": 1.653, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.4201388955116272, |
|
"eval_loss": 1.524334192276001, |
|
"eval_runtime": 91.575, |
|
"eval_samples_per_second": 6.29, |
|
"eval_steps_per_second": 1.572, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.8125e-05, |
|
"loss": 1.5733, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.359375, |
|
"eval_loss": 1.7072927951812744, |
|
"eval_runtime": 90.2896, |
|
"eval_samples_per_second": 6.379, |
|
"eval_steps_per_second": 1.595, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.69675925925926e-05, |
|
"loss": 1.6704, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.4722222089767456, |
|
"eval_loss": 1.4267817735671997, |
|
"eval_runtime": 90.57, |
|
"eval_samples_per_second": 6.36, |
|
"eval_steps_per_second": 1.59, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.581018518518519e-05, |
|
"loss": 1.4389, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.3940972089767456, |
|
"eval_loss": 1.7218824625015259, |
|
"eval_runtime": 89.8269, |
|
"eval_samples_per_second": 6.412, |
|
"eval_steps_per_second": 1.603, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.465277777777779e-05, |
|
"loss": 1.5342, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.3940972089767456, |
|
"eval_loss": 1.5133135318756104, |
|
"eval_runtime": 89.6337, |
|
"eval_samples_per_second": 6.426, |
|
"eval_steps_per_second": 1.607, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.349537037037037e-05, |
|
"loss": 1.5165, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.4322916567325592, |
|
"eval_loss": 1.4692301750183105, |
|
"eval_runtime": 90.2666, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 1.595, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.233796296296297e-05, |
|
"loss": 1.4743, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.3975694477558136, |
|
"eval_loss": 1.5509642362594604, |
|
"eval_runtime": 90.8282, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 1.585, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.118055555555556e-05, |
|
"loss": 1.4903, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.4739583432674408, |
|
"eval_loss": 1.3426711559295654, |
|
"eval_runtime": 91.1121, |
|
"eval_samples_per_second": 6.322, |
|
"eval_steps_per_second": 1.58, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.002314814814816e-05, |
|
"loss": 1.2193, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5329861044883728, |
|
"eval_loss": 1.3175561428070068, |
|
"eval_runtime": 89.5523, |
|
"eval_samples_per_second": 6.432, |
|
"eval_steps_per_second": 1.608, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.886574074074074e-05, |
|
"loss": 1.56, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.4635416567325592, |
|
"eval_loss": 1.4102365970611572, |
|
"eval_runtime": 89.6275, |
|
"eval_samples_per_second": 6.427, |
|
"eval_steps_per_second": 1.607, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.770833333333334e-05, |
|
"loss": 1.4563, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5347222089767456, |
|
"eval_loss": 1.291764736175537, |
|
"eval_runtime": 89.5795, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 1.608, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.655092592592593e-05, |
|
"loss": 1.3766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.4305555522441864, |
|
"eval_loss": 1.5331988334655762, |
|
"eval_runtime": 89.8777, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 1.602, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.539351851851853e-05, |
|
"loss": 1.218, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.4861111044883728, |
|
"eval_loss": 1.317336916923523, |
|
"eval_runtime": 91.03, |
|
"eval_samples_per_second": 6.328, |
|
"eval_steps_per_second": 1.582, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.423611111111112e-05, |
|
"loss": 1.3211, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.5034722089767456, |
|
"eval_loss": 1.263780951499939, |
|
"eval_runtime": 90.5509, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 1.59, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.307870370370372e-05, |
|
"loss": 1.1933, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.4982638955116272, |
|
"eval_loss": 1.3839294910430908, |
|
"eval_runtime": 90.7629, |
|
"eval_samples_per_second": 6.346, |
|
"eval_steps_per_second": 1.587, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.192129629629629e-05, |
|
"loss": 1.2803, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.5121527910232544, |
|
"eval_loss": 1.2145192623138428, |
|
"eval_runtime": 90.0175, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 1.6, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.076388888888889e-05, |
|
"loss": 1.1112, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.5711805820465088, |
|
"eval_loss": 1.1930283308029175, |
|
"eval_runtime": 89.5796, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 1.608, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.960648148148148e-05, |
|
"loss": 1.0907, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.5815972089767456, |
|
"eval_loss": 1.145843505859375, |
|
"eval_runtime": 90.3053, |
|
"eval_samples_per_second": 6.378, |
|
"eval_steps_per_second": 1.595, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.844907407407407e-05, |
|
"loss": 1.013, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.6180555820465088, |
|
"eval_loss": 1.0559839010238647, |
|
"eval_runtime": 89.8917, |
|
"eval_samples_per_second": 6.408, |
|
"eval_steps_per_second": 1.602, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.7291666666666666e-05, |
|
"loss": 0.9308, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.6163194179534912, |
|
"eval_loss": 1.0486806631088257, |
|
"eval_runtime": 90.7378, |
|
"eval_samples_per_second": 6.348, |
|
"eval_steps_per_second": 1.587, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.613425925925926e-05, |
|
"loss": 1.3225, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.6145833134651184, |
|
"eval_loss": 1.050321340560913, |
|
"eval_runtime": 89.4251, |
|
"eval_samples_per_second": 6.441, |
|
"eval_steps_per_second": 1.61, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.497685185185185e-05, |
|
"loss": 1.0774, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.5434027910232544, |
|
"eval_loss": 1.25161612033844, |
|
"eval_runtime": 89.4532, |
|
"eval_samples_per_second": 6.439, |
|
"eval_steps_per_second": 1.61, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.3819444444444444e-05, |
|
"loss": 1.2251, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.609375, |
|
"eval_loss": 1.076072335243225, |
|
"eval_runtime": 90.2082, |
|
"eval_samples_per_second": 6.385, |
|
"eval_steps_per_second": 1.596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.266203703703704e-05, |
|
"loss": 0.9848, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.6440972089767456, |
|
"eval_loss": 1.0271832942962646, |
|
"eval_runtime": 89.9601, |
|
"eval_samples_per_second": 6.403, |
|
"eval_steps_per_second": 1.601, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.150462962962963e-05, |
|
"loss": 0.9913, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.6041666865348816, |
|
"eval_loss": 1.0503506660461426, |
|
"eval_runtime": 89.8786, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 1.602, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.034722222222222e-05, |
|
"loss": 0.9081, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.6666666865348816, |
|
"eval_loss": 0.9094821214675903, |
|
"eval_runtime": 91.6907, |
|
"eval_samples_per_second": 6.282, |
|
"eval_steps_per_second": 1.57, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.9189814814814815e-05, |
|
"loss": 0.8339, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.6631944179534912, |
|
"eval_loss": 0.9030921459197998, |
|
"eval_runtime": 90.547, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 1.59, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.803240740740741e-05, |
|
"loss": 0.8893, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.6423611044883728, |
|
"eval_loss": 0.9375382661819458, |
|
"eval_runtime": 90.2281, |
|
"eval_samples_per_second": 6.384, |
|
"eval_steps_per_second": 1.596, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.9362, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.6197916865348816, |
|
"eval_loss": 0.9755175113677979, |
|
"eval_runtime": 91.0163, |
|
"eval_samples_per_second": 6.329, |
|
"eval_steps_per_second": 1.582, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.5717592592592594e-05, |
|
"loss": 0.835, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.6545138955116272, |
|
"eval_loss": 0.9399816989898682, |
|
"eval_runtime": 89.8555, |
|
"eval_samples_per_second": 6.41, |
|
"eval_steps_per_second": 1.603, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.456018518518519e-05, |
|
"loss": 0.6733, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.6927083134651184, |
|
"eval_loss": 0.8480438590049744, |
|
"eval_runtime": 89.8841, |
|
"eval_samples_per_second": 6.408, |
|
"eval_steps_per_second": 1.602, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.340277777777778e-05, |
|
"loss": 1.0115, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.6840277910232544, |
|
"eval_loss": 0.8332173824310303, |
|
"eval_runtime": 91.0011, |
|
"eval_samples_per_second": 6.33, |
|
"eval_steps_per_second": 1.582, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.224537037037037e-05, |
|
"loss": 0.7473, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.6475694179534912, |
|
"eval_loss": 0.9618370532989502, |
|
"eval_runtime": 90.2515, |
|
"eval_samples_per_second": 6.382, |
|
"eval_steps_per_second": 1.596, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.1087962962962965e-05, |
|
"loss": 0.8355, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.6840277910232544, |
|
"eval_loss": 0.8845413327217102, |
|
"eval_runtime": 89.7252, |
|
"eval_samples_per_second": 6.42, |
|
"eval_steps_per_second": 1.605, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.8487, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.6875, |
|
"eval_loss": 0.8297374844551086, |
|
"eval_runtime": 91.9685, |
|
"eval_samples_per_second": 6.263, |
|
"eval_steps_per_second": 1.566, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.877314814814815e-05, |
|
"loss": 0.6038, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.6493055820465088, |
|
"eval_loss": 0.9539130330085754, |
|
"eval_runtime": 90.0856, |
|
"eval_samples_per_second": 6.394, |
|
"eval_steps_per_second": 1.598, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7615740740740744e-05, |
|
"loss": 0.75, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.6857638955116272, |
|
"eval_loss": 0.8455307483673096, |
|
"eval_runtime": 89.5522, |
|
"eval_samples_per_second": 6.432, |
|
"eval_steps_per_second": 1.608, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.8561, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.7013888955116272, |
|
"eval_loss": 0.7813519239425659, |
|
"eval_runtime": 90.1129, |
|
"eval_samples_per_second": 6.392, |
|
"eval_steps_per_second": 1.598, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.530092592592593e-05, |
|
"loss": 0.7552, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.6822916865348816, |
|
"eval_loss": 0.8651251196861267, |
|
"eval_runtime": 89.9146, |
|
"eval_samples_per_second": 6.406, |
|
"eval_steps_per_second": 1.602, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.414351851851852e-05, |
|
"loss": 0.6972, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.71875, |
|
"eval_loss": 0.7325252890586853, |
|
"eval_runtime": 90.7375, |
|
"eval_samples_per_second": 6.348, |
|
"eval_steps_per_second": 1.587, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.2986111111111115e-05, |
|
"loss": 0.7483, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.7690972089767456, |
|
"eval_loss": 0.6722133159637451, |
|
"eval_runtime": 90.3148, |
|
"eval_samples_per_second": 6.378, |
|
"eval_steps_per_second": 1.594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.182870370370371e-05, |
|
"loss": 0.5419, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.7326388955116272, |
|
"eval_loss": 0.7046216130256653, |
|
"eval_runtime": 92.1898, |
|
"eval_samples_per_second": 6.248, |
|
"eval_steps_per_second": 1.562, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.06712962962963e-05, |
|
"loss": 0.5203, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.7326388955116272, |
|
"eval_loss": 0.7062063813209534, |
|
"eval_runtime": 90.2661, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 1.595, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.5235, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.7534722089767456, |
|
"eval_loss": 0.6795992255210876, |
|
"eval_runtime": 91.0148, |
|
"eval_samples_per_second": 6.329, |
|
"eval_steps_per_second": 1.582, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8356481481481483e-05, |
|
"loss": 0.514, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_accuracy": 0.7204861044883728, |
|
"eval_loss": 0.746653139591217, |
|
"eval_runtime": 89.8044, |
|
"eval_samples_per_second": 6.414, |
|
"eval_steps_per_second": 1.603, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.7199074074074076e-05, |
|
"loss": 0.5402, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.7447916865348816, |
|
"eval_loss": 0.716274619102478, |
|
"eval_runtime": 90.8833, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 1.584, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.7235, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.7222222089767456, |
|
"eval_loss": 0.7545790076255798, |
|
"eval_runtime": 89.6633, |
|
"eval_samples_per_second": 6.424, |
|
"eval_steps_per_second": 1.606, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.488425925925926e-05, |
|
"loss": 0.551, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.7534722089767456, |
|
"eval_loss": 0.6994116902351379, |
|
"eval_runtime": 89.8085, |
|
"eval_samples_per_second": 6.414, |
|
"eval_steps_per_second": 1.603, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3726851851851854e-05, |
|
"loss": 0.5769, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_accuracy": 0.7534722089767456, |
|
"eval_loss": 0.7151244282722473, |
|
"eval_runtime": 90.9508, |
|
"eval_samples_per_second": 6.333, |
|
"eval_steps_per_second": 1.583, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.5501, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7604166865348816, |
|
"eval_loss": 0.695513129234314, |
|
"eval_runtime": 90.6092, |
|
"eval_samples_per_second": 6.357, |
|
"eval_steps_per_second": 1.589, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.141203703703704e-05, |
|
"loss": 0.5416, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.7725694179534912, |
|
"eval_loss": 0.6533116102218628, |
|
"eval_runtime": 89.82, |
|
"eval_samples_per_second": 6.413, |
|
"eval_steps_per_second": 1.603, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0254629629629632e-05, |
|
"loss": 0.5452, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.7777777910232544, |
|
"eval_loss": 0.6232606172561646, |
|
"eval_runtime": 90.4116, |
|
"eval_samples_per_second": 6.371, |
|
"eval_steps_per_second": 1.593, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.8518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.7777777910232544, |
|
"eval_loss": 0.6136298179626465, |
|
"eval_runtime": 90.8648, |
|
"eval_samples_per_second": 6.339, |
|
"eval_steps_per_second": 1.585, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7939814814814815e-05, |
|
"loss": 0.3372, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.7986111044883728, |
|
"eval_loss": 0.5700623393058777, |
|
"eval_runtime": 90.1438, |
|
"eval_samples_per_second": 6.39, |
|
"eval_steps_per_second": 1.597, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6782407407407408e-05, |
|
"loss": 0.4488, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.7847222089767456, |
|
"eval_loss": 0.5789040327072144, |
|
"eval_runtime": 90.7633, |
|
"eval_samples_per_second": 6.346, |
|
"eval_steps_per_second": 1.587, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.3977, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.7829861044883728, |
|
"eval_loss": 0.5748720169067383, |
|
"eval_runtime": 89.6735, |
|
"eval_samples_per_second": 6.423, |
|
"eval_steps_per_second": 1.606, |
|
"step": 730 |
|
} |
|
], |
|
"max_steps": 864, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.1573660231214095e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|