|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 1757, |
|
"global_step": 35140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 23.659637451171875, |
|
"learning_rate": 9.97723392145703e-07, |
|
"loss": 3.8346, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_nli-pairs_loss": 2.8535287380218506, |
|
"eval_nli-pairs_runtime": 23.1384, |
|
"eval_nli-pairs_samples_per_second": 294.229, |
|
"eval_nli-pairs_steps_per_second": 18.411, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_scitail-pairs-pos_loss": 2.323117256164551, |
|
"eval_scitail-pairs-pos_runtime": 5.1803, |
|
"eval_scitail-pairs-pos_samples_per_second": 251.722, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.829, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_qnli-contrastive_loss": 3.09728741645813, |
|
"eval_qnli-contrastive_runtime": 15.5151, |
|
"eval_qnli-contrastive_samples_per_second": 352.109, |
|
"eval_qnli-contrastive_steps_per_second": 22.043, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 13.776155471801758, |
|
"learning_rate": 1.9965850882185546e-06, |
|
"loss": 1.8532, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_nli-pairs_loss": 1.3508331775665283, |
|
"eval_nli-pairs_runtime": 22.8642, |
|
"eval_nli-pairs_samples_per_second": 297.758, |
|
"eval_nli-pairs_steps_per_second": 18.632, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_scitail-pairs-pos_loss": 0.9754649996757507, |
|
"eval_scitail-pairs-pos_runtime": 5.1924, |
|
"eval_scitail-pairs-pos_samples_per_second": 251.136, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.792, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_qnli-contrastive_loss": 2.0602548122406006, |
|
"eval_qnli-contrastive_runtime": 15.6036, |
|
"eval_qnli-contrastive_samples_per_second": 350.111, |
|
"eval_qnli-contrastive_steps_per_second": 21.918, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.6704504489898682, |
|
"learning_rate": 2.99601593625498e-06, |
|
"loss": 1.2185, |
|
"step": 5271 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_nli-pairs_loss": 0.9380640983581543, |
|
"eval_nli-pairs_runtime": 23.1102, |
|
"eval_nli-pairs_samples_per_second": 294.589, |
|
"eval_nli-pairs_steps_per_second": 18.433, |
|
"step": 5271 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_scitail-pairs-pos_loss": 0.7407301664352417, |
|
"eval_scitail-pairs-pos_runtime": 5.2512, |
|
"eval_scitail-pairs-pos_samples_per_second": 248.322, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.615, |
|
"step": 5271 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_qnli-contrastive_loss": 1.2534083127975464, |
|
"eval_qnli-contrastive_runtime": 15.6475, |
|
"eval_qnli-contrastive_samples_per_second": 349.129, |
|
"eval_qnli-contrastive_steps_per_second": 21.857, |
|
"step": 5271 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 26.072860717773438, |
|
"learning_rate": 3.99601593625498e-06, |
|
"loss": 0.9584, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_nli-pairs_loss": 0.749484658241272, |
|
"eval_nli-pairs_runtime": 23.0514, |
|
"eval_nli-pairs_samples_per_second": 295.34, |
|
"eval_nli-pairs_steps_per_second": 18.48, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_scitail-pairs-pos_loss": 0.661561131477356, |
|
"eval_scitail-pairs-pos_runtime": 5.2207, |
|
"eval_scitail-pairs-pos_samples_per_second": 249.774, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.707, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_qnli-contrastive_loss": 0.5139556527137756, |
|
"eval_qnli-contrastive_runtime": 15.681, |
|
"eval_qnli-contrastive_samples_per_second": 348.384, |
|
"eval_qnli-contrastive_steps_per_second": 21.81, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 24.09697914123535, |
|
"learning_rate": 4.995446784291406e-06, |
|
"loss": 0.8157, |
|
"step": 8785 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_nli-pairs_loss": 0.6549726724624634, |
|
"eval_nli-pairs_runtime": 23.2274, |
|
"eval_nli-pairs_samples_per_second": 293.102, |
|
"eval_nli-pairs_steps_per_second": 18.34, |
|
"step": 8785 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_scitail-pairs-pos_loss": 0.6056841611862183, |
|
"eval_scitail-pairs-pos_runtime": 5.2473, |
|
"eval_scitail-pairs-pos_samples_per_second": 248.508, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.627, |
|
"step": 8785 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_qnli-contrastive_loss": 0.3295331299304962, |
|
"eval_qnli-contrastive_runtime": 15.7204, |
|
"eval_qnli-contrastive_samples_per_second": 347.511, |
|
"eval_qnli-contrastive_steps_per_second": 21.755, |
|
"step": 8785 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 9.664803504943848, |
|
"learning_rate": 5.994877632327832e-06, |
|
"loss": 0.6698, |
|
"step": 10542 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_nli-pairs_loss": 0.5809468626976013, |
|
"eval_nli-pairs_runtime": 22.9525, |
|
"eval_nli-pairs_samples_per_second": 296.612, |
|
"eval_nli-pairs_steps_per_second": 18.56, |
|
"step": 10542 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_scitail-pairs-pos_loss": 0.5820835828781128, |
|
"eval_scitail-pairs-pos_runtime": 5.1829, |
|
"eval_scitail-pairs-pos_samples_per_second": 251.599, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.821, |
|
"step": 10542 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_qnli-contrastive_loss": 0.24226614832878113, |
|
"eval_qnli-contrastive_runtime": 15.6321, |
|
"eval_qnli-contrastive_samples_per_second": 349.473, |
|
"eval_qnli-contrastive_steps_per_second": 21.878, |
|
"step": 10542 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 66.77753448486328, |
|
"learning_rate": 6.994877632327832e-06, |
|
"loss": 0.6497, |
|
"step": 12299 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_nli-pairs_loss": 0.5178281664848328, |
|
"eval_nli-pairs_runtime": 23.0673, |
|
"eval_nli-pairs_samples_per_second": 295.136, |
|
"eval_nli-pairs_steps_per_second": 18.468, |
|
"step": 12299 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_scitail-pairs-pos_loss": 0.504002571105957, |
|
"eval_scitail-pairs-pos_runtime": 5.1845, |
|
"eval_scitail-pairs-pos_samples_per_second": 251.52, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.816, |
|
"step": 12299 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_qnli-contrastive_loss": 0.24089547991752625, |
|
"eval_qnli-contrastive_runtime": 15.5228, |
|
"eval_qnli-contrastive_samples_per_second": 351.933, |
|
"eval_qnli-contrastive_steps_per_second": 22.032, |
|
"step": 12299 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.6044542193412781, |
|
"learning_rate": 7.994308480364257e-06, |
|
"loss": 0.5737, |
|
"step": 14056 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_nli-pairs_loss": 0.5019380450248718, |
|
"eval_nli-pairs_runtime": 23.0659, |
|
"eval_nli-pairs_samples_per_second": 295.154, |
|
"eval_nli-pairs_steps_per_second": 18.469, |
|
"step": 14056 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_scitail-pairs-pos_loss": 0.49418017268180847, |
|
"eval_scitail-pairs-pos_runtime": 5.2457, |
|
"eval_scitail-pairs-pos_samples_per_second": 248.585, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.632, |
|
"step": 14056 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_qnli-contrastive_loss": 0.14995019137859344, |
|
"eval_qnli-contrastive_runtime": 15.7177, |
|
"eval_qnli-contrastive_samples_per_second": 347.57, |
|
"eval_qnli-contrastive_steps_per_second": 21.759, |
|
"step": 14056 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.4454790949821472, |
|
"learning_rate": 8.993739328400684e-06, |
|
"loss": 0.5896, |
|
"step": 15813 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_nli-pairs_loss": 0.4803747236728668, |
|
"eval_nli-pairs_runtime": 23.0746, |
|
"eval_nli-pairs_samples_per_second": 295.043, |
|
"eval_nli-pairs_steps_per_second": 18.462, |
|
"step": 15813 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_scitail-pairs-pos_loss": 0.47568026185035706, |
|
"eval_scitail-pairs-pos_runtime": 5.2076, |
|
"eval_scitail-pairs-pos_samples_per_second": 250.402, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.746, |
|
"step": 15813 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_qnli-contrastive_loss": 0.14648529887199402, |
|
"eval_qnli-contrastive_runtime": 15.5997, |
|
"eval_qnli-contrastive_samples_per_second": 350.199, |
|
"eval_qnli-contrastive_steps_per_second": 21.924, |
|
"step": 15813 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 196.14842224121094, |
|
"learning_rate": 9.993739328400683e-06, |
|
"loss": 0.5174, |
|
"step": 17570 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_nli-pairs_loss": 0.4586646258831024, |
|
"eval_nli-pairs_runtime": 22.8967, |
|
"eval_nli-pairs_samples_per_second": 297.336, |
|
"eval_nli-pairs_steps_per_second": 18.605, |
|
"step": 17570 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_scitail-pairs-pos_loss": 0.5253121256828308, |
|
"eval_scitail-pairs-pos_runtime": 5.1603, |
|
"eval_scitail-pairs-pos_samples_per_second": 252.699, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.891, |
|
"step": 17570 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_qnli-contrastive_loss": 0.0533733032643795, |
|
"eval_qnli-contrastive_runtime": 15.5083, |
|
"eval_qnli-contrastive_samples_per_second": 352.263, |
|
"eval_qnli-contrastive_steps_per_second": 22.053, |
|
"step": 17570 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 18.839372634887695, |
|
"learning_rate": 9.75831232890717e-06, |
|
"loss": 0.5059, |
|
"step": 19327 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_nli-pairs_loss": 0.45871272683143616, |
|
"eval_nli-pairs_runtime": 22.8984, |
|
"eval_nli-pairs_samples_per_second": 297.313, |
|
"eval_nli-pairs_steps_per_second": 18.604, |
|
"step": 19327 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_scitail-pairs-pos_loss": 0.5492986440658569, |
|
"eval_scitail-pairs-pos_runtime": 5.1782, |
|
"eval_scitail-pairs-pos_samples_per_second": 251.824, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.836, |
|
"step": 19327 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_qnli-contrastive_loss": 0.027841920033097267, |
|
"eval_qnli-contrastive_runtime": 15.522, |
|
"eval_qnli-contrastive_samples_per_second": 351.952, |
|
"eval_qnli-contrastive_steps_per_second": 22.033, |
|
"step": 19327 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 6.800241947174072, |
|
"learning_rate": 9.051905444616243e-06, |
|
"loss": 0.4654, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_nli-pairs_loss": 0.44151321053504944, |
|
"eval_nli-pairs_runtime": 23.1311, |
|
"eval_nli-pairs_samples_per_second": 294.323, |
|
"eval_nli-pairs_steps_per_second": 18.417, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_scitail-pairs-pos_loss": 0.4850437045097351, |
|
"eval_scitail-pairs-pos_runtime": 5.2939, |
|
"eval_scitail-pairs-pos_samples_per_second": 246.321, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.49, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_qnli-contrastive_loss": 0.05170624330639839, |
|
"eval_qnli-contrastive_runtime": 15.7737, |
|
"eval_qnli-contrastive_samples_per_second": 346.336, |
|
"eval_qnli-contrastive_steps_per_second": 21.682, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.41899746656417847, |
|
"learning_rate": 7.948320938272786e-06, |
|
"loss": 0.4224, |
|
"step": 22841 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_nli-pairs_loss": 0.39569494128227234, |
|
"eval_nli-pairs_runtime": 23.2638, |
|
"eval_nli-pairs_samples_per_second": 292.643, |
|
"eval_nli-pairs_steps_per_second": 18.312, |
|
"step": 22841 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_scitail-pairs-pos_loss": 0.42922988533973694, |
|
"eval_scitail-pairs-pos_runtime": 5.2769, |
|
"eval_scitail-pairs-pos_samples_per_second": 247.114, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.539, |
|
"step": 22841 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_qnli-contrastive_loss": 0.0938122496008873, |
|
"eval_qnli-contrastive_runtime": 15.6681, |
|
"eval_qnli-contrastive_samples_per_second": 348.67, |
|
"eval_qnli-contrastive_steps_per_second": 21.828, |
|
"step": 22841 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 3.0029168128967285, |
|
"learning_rate": 6.556983832253587e-06, |
|
"loss": 0.4125, |
|
"step": 24598 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_nli-pairs_loss": 0.3794442415237427, |
|
"eval_nli-pairs_runtime": 23.2107, |
|
"eval_nli-pairs_samples_per_second": 293.313, |
|
"eval_nli-pairs_steps_per_second": 18.354, |
|
"step": 24598 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_scitail-pairs-pos_loss": 0.4623956084251404, |
|
"eval_scitail-pairs-pos_runtime": 5.2884, |
|
"eval_scitail-pairs-pos_samples_per_second": 246.577, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.506, |
|
"step": 24598 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_qnli-contrastive_loss": 0.0838843286037445, |
|
"eval_qnli-contrastive_runtime": 15.7017, |
|
"eval_qnli-contrastive_samples_per_second": 347.924, |
|
"eval_qnli-contrastive_steps_per_second": 21.781, |
|
"step": 24598 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 10.91913890838623, |
|
"learning_rate": 5.012516292320938e-06, |
|
"loss": 0.4072, |
|
"step": 26355 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_nli-pairs_loss": 0.3877629041671753, |
|
"eval_nli-pairs_runtime": 23.1072, |
|
"eval_nli-pairs_samples_per_second": 294.627, |
|
"eval_nli-pairs_steps_per_second": 18.436, |
|
"step": 26355 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_scitail-pairs-pos_loss": 0.4480924606323242, |
|
"eval_scitail-pairs-pos_runtime": 5.2741, |
|
"eval_scitail-pairs-pos_samples_per_second": 247.244, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.548, |
|
"step": 26355 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_qnli-contrastive_loss": 0.06811495870351791, |
|
"eval_qnli-contrastive_runtime": 15.7641, |
|
"eval_qnli-contrastive_samples_per_second": 346.546, |
|
"eval_qnli-contrastive_steps_per_second": 21.695, |
|
"step": 26355 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 3.676146984100342, |
|
"learning_rate": 3.4668235704897813e-06, |
|
"loss": 0.3572, |
|
"step": 28112 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_nli-pairs_loss": 0.3715905547142029, |
|
"eval_nli-pairs_runtime": 23.1744, |
|
"eval_nli-pairs_samples_per_second": 293.773, |
|
"eval_nli-pairs_steps_per_second": 18.382, |
|
"step": 28112 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_scitail-pairs-pos_loss": 0.49534013867378235, |
|
"eval_scitail-pairs-pos_runtime": 5.2856, |
|
"eval_scitail-pairs-pos_samples_per_second": 246.708, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.514, |
|
"step": 28112 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_qnli-contrastive_loss": 0.06735851615667343, |
|
"eval_qnli-contrastive_runtime": 15.7308, |
|
"eval_qnli-contrastive_samples_per_second": 347.281, |
|
"eval_qnli-contrastive_steps_per_second": 21.741, |
|
"step": 28112 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 229.6580047607422, |
|
"learning_rate": 2.072658211127134e-06, |
|
"loss": 0.371, |
|
"step": 29869 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_nli-pairs_loss": 0.36217835545539856, |
|
"eval_nli-pairs_runtime": 23.1495, |
|
"eval_nli-pairs_samples_per_second": 294.089, |
|
"eval_nli-pairs_steps_per_second": 18.402, |
|
"step": 29869 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_scitail-pairs-pos_loss": 0.47673526406288147, |
|
"eval_scitail-pairs-pos_runtime": 5.2158, |
|
"eval_scitail-pairs-pos_samples_per_second": 250.008, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.721, |
|
"step": 29869 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_qnli-contrastive_loss": 0.06000087782740593, |
|
"eval_qnli-contrastive_runtime": 15.6328, |
|
"eval_qnli-contrastive_samples_per_second": 349.458, |
|
"eval_qnli-contrastive_steps_per_second": 21.877, |
|
"step": 29869 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.6022229194641113, |
|
"learning_rate": 9.638670801112644e-07, |
|
"loss": 0.3332, |
|
"step": 31626 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_nli-pairs_loss": 0.3600439131259918, |
|
"eval_nli-pairs_runtime": 23.0874, |
|
"eval_nli-pairs_samples_per_second": 294.879, |
|
"eval_nli-pairs_steps_per_second": 18.452, |
|
"step": 31626 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_scitail-pairs-pos_loss": 0.465911865234375, |
|
"eval_scitail-pairs-pos_runtime": 5.3369, |
|
"eval_scitail-pairs-pos_samples_per_second": 244.338, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.365, |
|
"step": 31626 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_qnli-contrastive_loss": 0.05613844096660614, |
|
"eval_qnli-contrastive_runtime": 15.7089, |
|
"eval_qnli-contrastive_samples_per_second": 347.764, |
|
"eval_qnli-contrastive_steps_per_second": 21.771, |
|
"step": 31626 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.23106251657009125, |
|
"learning_rate": 2.4943593464921476e-07, |
|
"loss": 0.3695, |
|
"step": 33383 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_nli-pairs_loss": 0.35667526721954346, |
|
"eval_nli-pairs_runtime": 23.1588, |
|
"eval_nli-pairs_samples_per_second": 293.971, |
|
"eval_nli-pairs_steps_per_second": 18.395, |
|
"step": 33383 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_scitail-pairs-pos_loss": 0.4603894352912903, |
|
"eval_scitail-pairs-pos_runtime": 5.248, |
|
"eval_scitail-pairs-pos_samples_per_second": 248.476, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.625, |
|
"step": 33383 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_qnli-contrastive_loss": 0.06141861155629158, |
|
"eval_qnli-contrastive_runtime": 15.6709, |
|
"eval_qnli-contrastive_samples_per_second": 348.608, |
|
"eval_qnli-contrastive_steps_per_second": 21.824, |
|
"step": 33383 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2.5896487759191624e-11, |
|
"loss": 0.3315, |
|
"step": 35140 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_nli-pairs_loss": 0.3597075045108795, |
|
"eval_nli-pairs_runtime": 23.1058, |
|
"eval_nli-pairs_samples_per_second": 294.645, |
|
"eval_nli-pairs_steps_per_second": 18.437, |
|
"step": 35140 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_scitail-pairs-pos_loss": 0.47120198607444763, |
|
"eval_scitail-pairs-pos_runtime": 5.2532, |
|
"eval_scitail-pairs-pos_samples_per_second": 248.23, |
|
"eval_scitail-pairs-pos_steps_per_second": 15.61, |
|
"step": 35140 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_qnli-contrastive_loss": 0.05398999899625778, |
|
"eval_qnli-contrastive_runtime": 15.7099, |
|
"eval_qnli-contrastive_samples_per_second": 347.743, |
|
"eval_qnli-contrastive_steps_per_second": 21.77, |
|
"step": 35140 |
|
} |
|
], |
|
"logging_steps": 1757, |
|
"max_steps": 35140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 17570, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|