{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 11722, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.530967411704488e-05, "grad_norm": 6.629314529173358, "learning_rate": 2.840909090909091e-07, "loss": 0.6284, "step": 1 }, { "epoch": 0.00017061934823408976, "grad_norm": 5.932271803860879, "learning_rate": 5.681818181818182e-07, "loss": 0.5329, "step": 2 }, { "epoch": 0.0002559290223511346, "grad_norm": 7.1147730193722625, "learning_rate": 8.522727272727273e-07, "loss": 0.6039, "step": 3 }, { "epoch": 0.0003412386964681795, "grad_norm": 7.096217864707822, "learning_rate": 1.1363636363636364e-06, "loss": 0.5143, "step": 4 }, { "epoch": 0.00042654837058522436, "grad_norm": 5.912930378966415, "learning_rate": 1.4204545454545456e-06, "loss": 0.5325, "step": 5 }, { "epoch": 0.0005118580447022692, "grad_norm": 7.2287544184246935, "learning_rate": 1.7045454545454546e-06, "loss": 0.6042, "step": 6 }, { "epoch": 0.0005971677188193141, "grad_norm": 6.21850922851663, "learning_rate": 1.9886363636363638e-06, "loss": 0.5608, "step": 7 }, { "epoch": 0.000682477392936359, "grad_norm": 7.053744399862654, "learning_rate": 2.2727272727272728e-06, "loss": 0.565, "step": 8 }, { "epoch": 0.0007677870670534039, "grad_norm": 7.493507626951199, "learning_rate": 2.556818181818182e-06, "loss": 0.5962, "step": 9 }, { "epoch": 0.0008530967411704487, "grad_norm": 6.192947301807031, "learning_rate": 2.840909090909091e-06, "loss": 0.5282, "step": 10 }, { "epoch": 0.0009384064152874935, "grad_norm": 6.684495197372885, "learning_rate": 3.125e-06, "loss": 0.596, "step": 11 }, { "epoch": 0.0010237160894045385, "grad_norm": 6.875621420778997, "learning_rate": 3.409090909090909e-06, "loss": 0.6384, "step": 12 }, { "epoch": 0.0011090257635215834, "grad_norm": 5.34180401622009, "learning_rate": 3.6931818181818186e-06, "loss": 0.5144, "step": 13 }, { "epoch": 0.0011943354376386282, "grad_norm": 5.761761122725548, "learning_rate": 3.9772727272727275e-06, "loss": 0.4941, "step": 14 }, { "epoch": 0.0012796451117556731, "grad_norm": 6.087771255747434, "learning_rate": 4.2613636363636365e-06, "loss": 0.5588, "step": 15 }, { "epoch": 0.001364954785872718, "grad_norm": 6.210298095035311, "learning_rate": 4.5454545454545455e-06, "loss": 0.5241, "step": 16 }, { "epoch": 0.0014502644599897628, "grad_norm": 4.096960774214127, "learning_rate": 4.8295454545454545e-06, "loss": 0.5402, "step": 17 }, { "epoch": 0.0015355741341068077, "grad_norm": 5.166936908340425, "learning_rate": 5.113636363636364e-06, "loss": 0.5525, "step": 18 }, { "epoch": 0.0016208838082238527, "grad_norm": 4.522643867933041, "learning_rate": 5.397727272727273e-06, "loss": 0.5006, "step": 19 }, { "epoch": 0.0017061934823408974, "grad_norm": 5.4051509026592015, "learning_rate": 5.681818181818182e-06, "loss": 0.5601, "step": 20 }, { "epoch": 0.0017915031564579424, "grad_norm": 4.2363985863924665, "learning_rate": 5.965909090909091e-06, "loss": 0.4607, "step": 21 }, { "epoch": 0.001876812830574987, "grad_norm": 3.10201538776686, "learning_rate": 6.25e-06, "loss": 0.4585, "step": 22 }, { "epoch": 0.0019621225046920323, "grad_norm": 3.816799303662491, "learning_rate": 6.534090909090909e-06, "loss": 0.4631, "step": 23 }, { "epoch": 0.002047432178809077, "grad_norm": 3.6358645508468124, "learning_rate": 6.818181818181818e-06, "loss": 0.46, "step": 24 }, { "epoch": 0.0021327418529261217, "grad_norm": 4.04790751714191, "learning_rate": 7.102272727272728e-06, "loss": 0.4904, "step": 25 }, { "epoch": 0.002218051527043167, "grad_norm": 4.332183147274963, "learning_rate": 7.386363636363637e-06, "loss": 0.5344, "step": 26 }, { "epoch": 0.0023033612011602116, "grad_norm": 3.4435144693517357, "learning_rate": 7.670454545454545e-06, "loss": 0.5198, "step": 27 }, { "epoch": 0.0023886708752772563, "grad_norm": 2.999834373988388, "learning_rate": 7.954545454545455e-06, "loss": 0.4124, "step": 28 }, { "epoch": 0.0024739805493943015, "grad_norm": 2.7988314472186757, "learning_rate": 8.238636363636363e-06, "loss": 0.4453, "step": 29 }, { "epoch": 0.0025592902235113462, "grad_norm": 3.5867232914062677, "learning_rate": 8.522727272727273e-06, "loss": 0.5019, "step": 30 }, { "epoch": 0.002644599897628391, "grad_norm": 3.568366590971768, "learning_rate": 8.806818181818183e-06, "loss": 0.503, "step": 31 }, { "epoch": 0.002729909571745436, "grad_norm": 3.576671988301675, "learning_rate": 9.090909090909091e-06, "loss": 0.4749, "step": 32 }, { "epoch": 0.002815219245862481, "grad_norm": 3.4684813799249006, "learning_rate": 9.375000000000001e-06, "loss": 0.4596, "step": 33 }, { "epoch": 0.0029005289199795256, "grad_norm": 2.7005813184761975, "learning_rate": 9.659090909090909e-06, "loss": 0.445, "step": 34 }, { "epoch": 0.0029858385940965708, "grad_norm": 3.461783346659491, "learning_rate": 9.943181818181819e-06, "loss": 0.5361, "step": 35 }, { "epoch": 0.0030711482682136155, "grad_norm": 2.85171566904558, "learning_rate": 1.0227272727272729e-05, "loss": 0.4596, "step": 36 }, { "epoch": 0.0031564579423306602, "grad_norm": 2.9458556142465286, "learning_rate": 1.0511363636363637e-05, "loss": 0.4231, "step": 37 }, { "epoch": 0.0032417676164477054, "grad_norm": 2.7769807774198187, "learning_rate": 1.0795454545454547e-05, "loss": 0.437, "step": 38 }, { "epoch": 0.00332707729056475, "grad_norm": 2.7680138222784776, "learning_rate": 1.1079545454545455e-05, "loss": 0.5033, "step": 39 }, { "epoch": 0.003412386964681795, "grad_norm": 2.878143582196053, "learning_rate": 1.1363636363636365e-05, "loss": 0.4611, "step": 40 }, { "epoch": 0.00349769663879884, "grad_norm": 2.6572662204753765, "learning_rate": 1.1647727272727273e-05, "loss": 0.4371, "step": 41 }, { "epoch": 0.0035830063129158847, "grad_norm": 2.6815801670624753, "learning_rate": 1.1931818181818183e-05, "loss": 0.4843, "step": 42 }, { "epoch": 0.0036683159870329295, "grad_norm": 2.562583549812194, "learning_rate": 1.2215909090909092e-05, "loss": 0.432, "step": 43 }, { "epoch": 0.003753625661149974, "grad_norm": 2.739143964880151, "learning_rate": 1.25e-05, "loss": 0.4844, "step": 44 }, { "epoch": 0.0038389353352670194, "grad_norm": 2.780612315249518, "learning_rate": 1.2784090909090909e-05, "loss": 0.431, "step": 45 }, { "epoch": 0.0039242450093840645, "grad_norm": 2.8748069366647395, "learning_rate": 1.3068181818181819e-05, "loss": 0.4636, "step": 46 }, { "epoch": 0.004009554683501109, "grad_norm": 2.696568823769511, "learning_rate": 1.3352272727272727e-05, "loss": 0.4669, "step": 47 }, { "epoch": 0.004094864357618154, "grad_norm": 3.0441281187587963, "learning_rate": 1.3636363636363637e-05, "loss": 0.523, "step": 48 }, { "epoch": 0.004180174031735199, "grad_norm": 2.7147998394605377, "learning_rate": 1.3920454545454545e-05, "loss": 0.4549, "step": 49 }, { "epoch": 0.0042654837058522434, "grad_norm": 2.957650405786925, "learning_rate": 1.4204545454545456e-05, "loss": 0.49, "step": 50 }, { "epoch": 0.004350793379969288, "grad_norm": 2.905629409652124, "learning_rate": 1.4488636363636366e-05, "loss": 0.4072, "step": 51 }, { "epoch": 0.004436103054086334, "grad_norm": 2.601004234190255, "learning_rate": 1.4772727272727274e-05, "loss": 0.3886, "step": 52 }, { "epoch": 0.0045214127282033785, "grad_norm": 2.6294151595784374, "learning_rate": 1.5056818181818182e-05, "loss": 0.4422, "step": 53 }, { "epoch": 0.004606722402320423, "grad_norm": 2.619601374869889, "learning_rate": 1.534090909090909e-05, "loss": 0.4018, "step": 54 }, { "epoch": 0.004692032076437468, "grad_norm": 2.510556345070487, "learning_rate": 1.5625e-05, "loss": 0.3996, "step": 55 }, { "epoch": 0.004777341750554513, "grad_norm": 2.2963729621139355, "learning_rate": 1.590909090909091e-05, "loss": 0.4232, "step": 56 }, { "epoch": 0.004862651424671557, "grad_norm": 2.4640898360125156, "learning_rate": 1.619318181818182e-05, "loss": 0.4405, "step": 57 }, { "epoch": 0.004947961098788603, "grad_norm": 2.9500789373149257, "learning_rate": 1.6477272727272726e-05, "loss": 0.4912, "step": 58 }, { "epoch": 0.005033270772905648, "grad_norm": 3.1716084485733753, "learning_rate": 1.6761363636363636e-05, "loss": 0.4228, "step": 59 }, { "epoch": 0.0051185804470226925, "grad_norm": 2.4454277425794544, "learning_rate": 1.7045454545454546e-05, "loss": 0.4536, "step": 60 }, { "epoch": 0.005203890121139737, "grad_norm": 2.8109240885187194, "learning_rate": 1.7329545454545456e-05, "loss": 0.4177, "step": 61 }, { "epoch": 0.005289199795256782, "grad_norm": 2.6953534551632554, "learning_rate": 1.7613636363636366e-05, "loss": 0.4327, "step": 62 }, { "epoch": 0.005374509469373827, "grad_norm": 3.3716333339801574, "learning_rate": 1.7897727272727276e-05, "loss": 0.4671, "step": 63 }, { "epoch": 0.005459819143490872, "grad_norm": 2.8254756425797627, "learning_rate": 1.8181818181818182e-05, "loss": 0.4411, "step": 64 }, { "epoch": 0.005545128817607917, "grad_norm": 2.1775193454658286, "learning_rate": 1.8465909090909092e-05, "loss": 0.4334, "step": 65 }, { "epoch": 0.005630438491724962, "grad_norm": 2.887497978705157, "learning_rate": 1.8750000000000002e-05, "loss": 0.3884, "step": 66 }, { "epoch": 0.0057157481658420065, "grad_norm": 3.0250875223912845, "learning_rate": 1.9034090909090908e-05, "loss": 0.4735, "step": 67 }, { "epoch": 0.005801057839959051, "grad_norm": 1.9973627226139639, "learning_rate": 1.9318181818181818e-05, "loss": 0.4331, "step": 68 }, { "epoch": 0.005886367514076096, "grad_norm": 2.6540890149824823, "learning_rate": 1.9602272727272728e-05, "loss": 0.411, "step": 69 }, { "epoch": 0.0059716771881931415, "grad_norm": 2.7941918316018985, "learning_rate": 1.9886363636363638e-05, "loss": 0.3893, "step": 70 }, { "epoch": 0.006056986862310186, "grad_norm": 2.468703740326332, "learning_rate": 2.0170454545454544e-05, "loss": 0.4396, "step": 71 }, { "epoch": 0.006142296536427231, "grad_norm": 2.2721514128346905, "learning_rate": 2.0454545454545457e-05, "loss": 0.4542, "step": 72 }, { "epoch": 0.006227606210544276, "grad_norm": 2.5257997587064462, "learning_rate": 2.0738636363636367e-05, "loss": 0.4239, "step": 73 }, { "epoch": 0.0063129158846613204, "grad_norm": 2.391049415961601, "learning_rate": 2.1022727272727274e-05, "loss": 0.4455, "step": 74 }, { "epoch": 0.006398225558778365, "grad_norm": 2.2439387590072073, "learning_rate": 2.1306818181818183e-05, "loss": 0.4087, "step": 75 }, { "epoch": 0.006483535232895411, "grad_norm": 2.7118541915598113, "learning_rate": 2.1590909090909093e-05, "loss": 0.399, "step": 76 }, { "epoch": 0.0065688449070124555, "grad_norm": 2.4380295007186668, "learning_rate": 2.1875e-05, "loss": 0.3821, "step": 77 }, { "epoch": 0.0066541545811295, "grad_norm": 2.2267576968645812, "learning_rate": 2.215909090909091e-05, "loss": 0.4067, "step": 78 }, { "epoch": 0.006739464255246545, "grad_norm": 2.3983794463149195, "learning_rate": 2.244318181818182e-05, "loss": 0.4176, "step": 79 }, { "epoch": 0.00682477392936359, "grad_norm": 2.701910374463013, "learning_rate": 2.272727272727273e-05, "loss": 0.4055, "step": 80 }, { "epoch": 0.006910083603480634, "grad_norm": 2.438091890901868, "learning_rate": 2.3011363636363636e-05, "loss": 0.4329, "step": 81 }, { "epoch": 0.00699539327759768, "grad_norm": 3.0371775138055743, "learning_rate": 2.3295454545454546e-05, "loss": 0.4478, "step": 82 }, { "epoch": 0.007080702951714725, "grad_norm": 2.9669190231602607, "learning_rate": 2.3579545454545455e-05, "loss": 0.4853, "step": 83 }, { "epoch": 0.0071660126258317695, "grad_norm": 2.859139354563873, "learning_rate": 2.3863636363636365e-05, "loss": 0.4051, "step": 84 }, { "epoch": 0.007251322299948814, "grad_norm": 2.285670974014553, "learning_rate": 2.4147727272727275e-05, "loss": 0.4564, "step": 85 }, { "epoch": 0.007336631974065859, "grad_norm": 2.2653677958800316, "learning_rate": 2.4431818181818185e-05, "loss": 0.4419, "step": 86 }, { "epoch": 0.007421941648182904, "grad_norm": 2.7272967467550795, "learning_rate": 2.471590909090909e-05, "loss": 0.3965, "step": 87 }, { "epoch": 0.007507251322299948, "grad_norm": 2.3618557248284207, "learning_rate": 2.5e-05, "loss": 0.3885, "step": 88 }, { "epoch": 0.007592560996416994, "grad_norm": 2.844057506661265, "learning_rate": 2.5284090909090914e-05, "loss": 0.4297, "step": 89 }, { "epoch": 0.007677870670534039, "grad_norm": 3.2240440016268805, "learning_rate": 2.5568181818181817e-05, "loss": 0.479, "step": 90 }, { "epoch": 0.0077631803446510834, "grad_norm": 2.5809231456501065, "learning_rate": 2.585227272727273e-05, "loss": 0.39, "step": 91 }, { "epoch": 0.007848490018768129, "grad_norm": 2.1449746818204254, "learning_rate": 2.6136363636363637e-05, "loss": 0.398, "step": 92 }, { "epoch": 0.007933799692885173, "grad_norm": 2.6658356881814447, "learning_rate": 2.6420454545454547e-05, "loss": 0.4163, "step": 93 }, { "epoch": 0.008019109367002219, "grad_norm": 2.816925720475951, "learning_rate": 2.6704545454545453e-05, "loss": 0.4579, "step": 94 }, { "epoch": 0.008104419041119262, "grad_norm": 3.1909705323353306, "learning_rate": 2.6988636363636367e-05, "loss": 0.4421, "step": 95 }, { "epoch": 0.008189728715236308, "grad_norm": 2.6903140394552256, "learning_rate": 2.7272727272727273e-05, "loss": 0.4332, "step": 96 }, { "epoch": 0.008275038389353352, "grad_norm": 2.261479656646674, "learning_rate": 2.7556818181818183e-05, "loss": 0.4383, "step": 97 }, { "epoch": 0.008360348063470397, "grad_norm": 2.4101742538200126, "learning_rate": 2.784090909090909e-05, "loss": 0.4747, "step": 98 }, { "epoch": 0.008445657737587443, "grad_norm": 3.189572875746799, "learning_rate": 2.8125000000000003e-05, "loss": 0.4188, "step": 99 }, { "epoch": 0.008530967411704487, "grad_norm": 2.8665742556144713, "learning_rate": 2.8409090909090912e-05, "loss": 0.4555, "step": 100 }, { "epoch": 0.008616277085821532, "grad_norm": 3.107580692781122, "learning_rate": 2.869318181818182e-05, "loss": 0.4371, "step": 101 }, { "epoch": 0.008701586759938576, "grad_norm": 2.6561859123128118, "learning_rate": 2.8977272727272732e-05, "loss": 0.4273, "step": 102 }, { "epoch": 0.008786896434055622, "grad_norm": 2.4387686924533307, "learning_rate": 2.9261363636363635e-05, "loss": 0.4319, "step": 103 }, { "epoch": 0.008872206108172668, "grad_norm": 2.367498497000744, "learning_rate": 2.954545454545455e-05, "loss": 0.4138, "step": 104 }, { "epoch": 0.008957515782289711, "grad_norm": 2.399062482191123, "learning_rate": 2.9829545454545455e-05, "loss": 0.3934, "step": 105 }, { "epoch": 0.009042825456406757, "grad_norm": 2.2303430418275383, "learning_rate": 3.0113636363636365e-05, "loss": 0.3882, "step": 106 }, { "epoch": 0.0091281351305238, "grad_norm": 2.7945423308364234, "learning_rate": 3.039772727272727e-05, "loss": 0.4444, "step": 107 }, { "epoch": 0.009213444804640846, "grad_norm": 2.33809641200874, "learning_rate": 3.068181818181818e-05, "loss": 0.4412, "step": 108 }, { "epoch": 0.00929875447875789, "grad_norm": 2.890506721344916, "learning_rate": 3.096590909090909e-05, "loss": 0.4226, "step": 109 }, { "epoch": 0.009384064152874936, "grad_norm": 2.756478654368825, "learning_rate": 3.125e-05, "loss": 0.3945, "step": 110 }, { "epoch": 0.009469373826991982, "grad_norm": 2.347354609300478, "learning_rate": 3.153409090909091e-05, "loss": 0.3965, "step": 111 }, { "epoch": 0.009554683501109025, "grad_norm": 2.7855857742625565, "learning_rate": 3.181818181818182e-05, "loss": 0.3814, "step": 112 }, { "epoch": 0.009639993175226071, "grad_norm": 2.810281853709702, "learning_rate": 3.210227272727273e-05, "loss": 0.4418, "step": 113 }, { "epoch": 0.009725302849343115, "grad_norm": 2.4997447837258604, "learning_rate": 3.238636363636364e-05, "loss": 0.3808, "step": 114 }, { "epoch": 0.00981061252346016, "grad_norm": 2.224526635725398, "learning_rate": 3.267045454545455e-05, "loss": 0.4399, "step": 115 }, { "epoch": 0.009895922197577206, "grad_norm": 2.239553572582664, "learning_rate": 3.295454545454545e-05, "loss": 0.3651, "step": 116 }, { "epoch": 0.00998123187169425, "grad_norm": 2.558876639299239, "learning_rate": 3.323863636363637e-05, "loss": 0.4254, "step": 117 }, { "epoch": 0.010066541545811295, "grad_norm": 2.562609600421485, "learning_rate": 3.352272727272727e-05, "loss": 0.4547, "step": 118 }, { "epoch": 0.01015185121992834, "grad_norm": 2.329367491697468, "learning_rate": 3.380681818181818e-05, "loss": 0.4442, "step": 119 }, { "epoch": 0.010237160894045385, "grad_norm": 2.34291611141999, "learning_rate": 3.409090909090909e-05, "loss": 0.435, "step": 120 }, { "epoch": 0.010322470568162429, "grad_norm": 2.341304876903332, "learning_rate": 3.4375e-05, "loss": 0.4276, "step": 121 }, { "epoch": 0.010407780242279474, "grad_norm": 2.388019413394707, "learning_rate": 3.465909090909091e-05, "loss": 0.3708, "step": 122 }, { "epoch": 0.01049308991639652, "grad_norm": 2.301087959713805, "learning_rate": 3.494318181818182e-05, "loss": 0.4625, "step": 123 }, { "epoch": 0.010578399590513564, "grad_norm": 2.4122638606589377, "learning_rate": 3.522727272727273e-05, "loss": 0.41, "step": 124 }, { "epoch": 0.01066370926463061, "grad_norm": 2.9180038838141718, "learning_rate": 3.5511363636363635e-05, "loss": 0.44, "step": 125 }, { "epoch": 0.010749018938747653, "grad_norm": 2.8819515938012685, "learning_rate": 3.579545454545455e-05, "loss": 0.4522, "step": 126 }, { "epoch": 0.010834328612864699, "grad_norm": 2.3626546738465866, "learning_rate": 3.6079545454545454e-05, "loss": 0.3913, "step": 127 }, { "epoch": 0.010919638286981745, "grad_norm": 2.3690482402252093, "learning_rate": 3.6363636363636364e-05, "loss": 0.4064, "step": 128 }, { "epoch": 0.011004947961098788, "grad_norm": 2.324628521355312, "learning_rate": 3.6647727272727274e-05, "loss": 0.4163, "step": 129 }, { "epoch": 0.011090257635215834, "grad_norm": 2.3571162242953627, "learning_rate": 3.6931818181818184e-05, "loss": 0.3537, "step": 130 }, { "epoch": 0.011175567309332878, "grad_norm": 2.6333834880768654, "learning_rate": 3.721590909090909e-05, "loss": 0.4197, "step": 131 }, { "epoch": 0.011260876983449923, "grad_norm": 2.62791444567765, "learning_rate": 3.7500000000000003e-05, "loss": 0.3753, "step": 132 }, { "epoch": 0.011346186657566967, "grad_norm": 2.6389534077927514, "learning_rate": 3.778409090909091e-05, "loss": 0.3776, "step": 133 }, { "epoch": 0.011431496331684013, "grad_norm": 2.7011152083603265, "learning_rate": 3.8068181818181816e-05, "loss": 0.4444, "step": 134 }, { "epoch": 0.011516806005801059, "grad_norm": 2.467105172675951, "learning_rate": 3.835227272727273e-05, "loss": 0.4716, "step": 135 }, { "epoch": 0.011602115679918102, "grad_norm": 2.567534641377614, "learning_rate": 3.8636363636363636e-05, "loss": 0.4099, "step": 136 }, { "epoch": 0.011687425354035148, "grad_norm": 2.6657081510537193, "learning_rate": 3.8920454545454546e-05, "loss": 0.4344, "step": 137 }, { "epoch": 0.011772735028152192, "grad_norm": 3.0208726200749925, "learning_rate": 3.9204545454545456e-05, "loss": 0.4845, "step": 138 }, { "epoch": 0.011858044702269237, "grad_norm": 3.148781580143838, "learning_rate": 3.9488636363636366e-05, "loss": 0.4329, "step": 139 }, { "epoch": 0.011943354376386283, "grad_norm": 2.4785205308942286, "learning_rate": 3.9772727272727275e-05, "loss": 0.5395, "step": 140 }, { "epoch": 0.012028664050503327, "grad_norm": 2.701888843635922, "learning_rate": 4.0056818181818185e-05, "loss": 0.41, "step": 141 }, { "epoch": 0.012113973724620372, "grad_norm": 2.244890026465999, "learning_rate": 4.034090909090909e-05, "loss": 0.4041, "step": 142 }, { "epoch": 0.012199283398737416, "grad_norm": 2.4629690352927773, "learning_rate": 4.0625000000000005e-05, "loss": 0.4514, "step": 143 }, { "epoch": 0.012284593072854462, "grad_norm": 2.606842182312283, "learning_rate": 4.0909090909090915e-05, "loss": 0.4666, "step": 144 }, { "epoch": 0.012369902746971506, "grad_norm": 2.437699138625212, "learning_rate": 4.119318181818182e-05, "loss": 0.4337, "step": 145 }, { "epoch": 0.012455212421088551, "grad_norm": 2.3627027071420614, "learning_rate": 4.1477272727272734e-05, "loss": 0.3842, "step": 146 }, { "epoch": 0.012540522095205597, "grad_norm": 2.3157634904763573, "learning_rate": 4.176136363636364e-05, "loss": 0.4732, "step": 147 }, { "epoch": 0.012625831769322641, "grad_norm": 2.0017455351593814, "learning_rate": 4.204545454545455e-05, "loss": 0.3461, "step": 148 }, { "epoch": 0.012711141443439686, "grad_norm": 2.56314757933644, "learning_rate": 4.232954545454546e-05, "loss": 0.4096, "step": 149 }, { "epoch": 0.01279645111755673, "grad_norm": 2.1440659315234414, "learning_rate": 4.261363636363637e-05, "loss": 0.4548, "step": 150 }, { "epoch": 0.012881760791673776, "grad_norm": 2.3080827951175045, "learning_rate": 4.289772727272727e-05, "loss": 0.3785, "step": 151 }, { "epoch": 0.012967070465790822, "grad_norm": 2.013942400634266, "learning_rate": 4.318181818181819e-05, "loss": 0.3873, "step": 152 }, { "epoch": 0.013052380139907865, "grad_norm": 2.7874178271305814, "learning_rate": 4.346590909090909e-05, "loss": 0.4876, "step": 153 }, { "epoch": 0.013137689814024911, "grad_norm": 2.1405483705490416, "learning_rate": 4.375e-05, "loss": 0.4122, "step": 154 }, { "epoch": 0.013222999488141955, "grad_norm": 2.632660722744027, "learning_rate": 4.4034090909090916e-05, "loss": 0.4822, "step": 155 }, { "epoch": 0.013308309162259, "grad_norm": 2.10643863455855, "learning_rate": 4.431818181818182e-05, "loss": 0.3805, "step": 156 }, { "epoch": 0.013393618836376044, "grad_norm": 2.414338031340607, "learning_rate": 4.460227272727273e-05, "loss": 0.4072, "step": 157 }, { "epoch": 0.01347892851049309, "grad_norm": 3.2263062308376123, "learning_rate": 4.488636363636364e-05, "loss": 0.4084, "step": 158 }, { "epoch": 0.013564238184610136, "grad_norm": 2.2316863416051835, "learning_rate": 4.517045454545455e-05, "loss": 0.3704, "step": 159 }, { "epoch": 0.01364954785872718, "grad_norm": 2.4844122350049687, "learning_rate": 4.545454545454546e-05, "loss": 0.404, "step": 160 }, { "epoch": 0.013734857532844225, "grad_norm": 2.4101932467151324, "learning_rate": 4.573863636363637e-05, "loss": 0.4523, "step": 161 }, { "epoch": 0.013820167206961269, "grad_norm": 2.3242541398631826, "learning_rate": 4.602272727272727e-05, "loss": 0.4241, "step": 162 }, { "epoch": 0.013905476881078314, "grad_norm": 2.6891403515023393, "learning_rate": 4.630681818181818e-05, "loss": 0.451, "step": 163 }, { "epoch": 0.01399078655519536, "grad_norm": 2.8190388432127422, "learning_rate": 4.659090909090909e-05, "loss": 0.4069, "step": 164 }, { "epoch": 0.014076096229312404, "grad_norm": 2.4510707150440365, "learning_rate": 4.6875e-05, "loss": 0.3978, "step": 165 }, { "epoch": 0.01416140590342945, "grad_norm": 2.4501163961048196, "learning_rate": 4.715909090909091e-05, "loss": 0.4643, "step": 166 }, { "epoch": 0.014246715577546493, "grad_norm": 3.5419141047190177, "learning_rate": 4.744318181818182e-05, "loss": 0.4195, "step": 167 }, { "epoch": 0.014332025251663539, "grad_norm": 2.4948616628576574, "learning_rate": 4.772727272727273e-05, "loss": 0.4084, "step": 168 }, { "epoch": 0.014417334925780583, "grad_norm": 2.5015318945514182, "learning_rate": 4.801136363636364e-05, "loss": 0.4598, "step": 169 }, { "epoch": 0.014502644599897628, "grad_norm": 2.406353737712857, "learning_rate": 4.829545454545455e-05, "loss": 0.3859, "step": 170 }, { "epoch": 0.014587954274014674, "grad_norm": 2.370111353065027, "learning_rate": 4.857954545454545e-05, "loss": 0.3997, "step": 171 }, { "epoch": 0.014673263948131718, "grad_norm": 2.514239196068343, "learning_rate": 4.886363636363637e-05, "loss": 0.3698, "step": 172 }, { "epoch": 0.014758573622248763, "grad_norm": 2.6636046074830406, "learning_rate": 4.914772727272727e-05, "loss": 0.415, "step": 173 }, { "epoch": 0.014843883296365807, "grad_norm": 2.374428730572856, "learning_rate": 4.943181818181818e-05, "loss": 0.3849, "step": 174 }, { "epoch": 0.014929192970482853, "grad_norm": 2.433557696863759, "learning_rate": 4.971590909090909e-05, "loss": 0.4298, "step": 175 }, { "epoch": 0.015014502644599897, "grad_norm": 1.9580069601385302, "learning_rate": 5e-05, "loss": 0.3604, "step": 176 }, { "epoch": 0.015099812318716942, "grad_norm": 2.288392681770112, "learning_rate": 5.0284090909090905e-05, "loss": 0.4093, "step": 177 }, { "epoch": 0.015185121992833988, "grad_norm": 2.9733852139324513, "learning_rate": 5.056818181818183e-05, "loss": 0.3911, "step": 178 }, { "epoch": 0.015270431666951032, "grad_norm": 2.6770649969954836, "learning_rate": 5.085227272727273e-05, "loss": 0.4516, "step": 179 }, { "epoch": 0.015355741341068077, "grad_norm": 2.4377382602152724, "learning_rate": 5.1136363636363635e-05, "loss": 0.4476, "step": 180 }, { "epoch": 0.015441051015185121, "grad_norm": 2.5861934293198994, "learning_rate": 5.1420454545454545e-05, "loss": 0.4685, "step": 181 }, { "epoch": 0.015526360689302167, "grad_norm": 2.896575741088906, "learning_rate": 5.170454545454546e-05, "loss": 0.4433, "step": 182 }, { "epoch": 0.015611670363419212, "grad_norm": 2.6599358510764954, "learning_rate": 5.1988636363636364e-05, "loss": 0.4251, "step": 183 }, { "epoch": 0.015696980037536258, "grad_norm": 2.438004759341845, "learning_rate": 5.2272727272727274e-05, "loss": 0.4284, "step": 184 }, { "epoch": 0.0157822897116533, "grad_norm": 2.227061326215854, "learning_rate": 5.255681818181818e-05, "loss": 0.4044, "step": 185 }, { "epoch": 0.015867599385770346, "grad_norm": 2.380804698087815, "learning_rate": 5.2840909090909094e-05, "loss": 0.4194, "step": 186 }, { "epoch": 0.01595290905988739, "grad_norm": 2.279491426063942, "learning_rate": 5.3125000000000004e-05, "loss": 0.431, "step": 187 }, { "epoch": 0.016038218734004437, "grad_norm": 2.6766441568811343, "learning_rate": 5.340909090909091e-05, "loss": 0.3904, "step": 188 }, { "epoch": 0.016123528408121483, "grad_norm": 1.9764639486841136, "learning_rate": 5.3693181818181823e-05, "loss": 0.3652, "step": 189 }, { "epoch": 0.016208838082238525, "grad_norm": 2.5636267627128007, "learning_rate": 5.397727272727273e-05, "loss": 0.4372, "step": 190 }, { "epoch": 0.01629414775635557, "grad_norm": 2.298835073766845, "learning_rate": 5.4261363636363636e-05, "loss": 0.4377, "step": 191 }, { "epoch": 0.016379457430472616, "grad_norm": 2.186628549561476, "learning_rate": 5.4545454545454546e-05, "loss": 0.4144, "step": 192 }, { "epoch": 0.01646476710458966, "grad_norm": 2.692304707619826, "learning_rate": 5.482954545454546e-05, "loss": 0.48, "step": 193 }, { "epoch": 0.016550076778706704, "grad_norm": 2.2535190613327503, "learning_rate": 5.5113636363636366e-05, "loss": 0.4124, "step": 194 }, { "epoch": 0.01663538645282375, "grad_norm": 2.0387039746317264, "learning_rate": 5.5397727272727276e-05, "loss": 0.3935, "step": 195 }, { "epoch": 0.016720696126940795, "grad_norm": 2.4423187747752397, "learning_rate": 5.568181818181818e-05, "loss": 0.4202, "step": 196 }, { "epoch": 0.01680600580105784, "grad_norm": 2.269094612284175, "learning_rate": 5.5965909090909095e-05, "loss": 0.3815, "step": 197 }, { "epoch": 0.016891315475174886, "grad_norm": 2.2772363339670623, "learning_rate": 5.6250000000000005e-05, "loss": 0.4252, "step": 198 }, { "epoch": 0.016976625149291928, "grad_norm": 2.493049496425207, "learning_rate": 5.653409090909091e-05, "loss": 0.4346, "step": 199 }, { "epoch": 0.017061934823408974, "grad_norm": 2.7503155613957935, "learning_rate": 5.6818181818181825e-05, "loss": 0.4178, "step": 200 }, { "epoch": 0.01714724449752602, "grad_norm": 2.2266195724935978, "learning_rate": 5.7102272727272735e-05, "loss": 0.412, "step": 201 }, { "epoch": 0.017232554171643065, "grad_norm": 2.8407013029923203, "learning_rate": 5.738636363636364e-05, "loss": 0.4639, "step": 202 }, { "epoch": 0.01731786384576011, "grad_norm": 2.617368777245781, "learning_rate": 5.767045454545454e-05, "loss": 0.4439, "step": 203 }, { "epoch": 0.017403173519877153, "grad_norm": 2.0892848898637983, "learning_rate": 5.7954545454545464e-05, "loss": 0.402, "step": 204 }, { "epoch": 0.0174884831939942, "grad_norm": 2.436676864533104, "learning_rate": 5.823863636363637e-05, "loss": 0.4087, "step": 205 }, { "epoch": 0.017573792868111244, "grad_norm": 2.5845663348109458, "learning_rate": 5.852272727272727e-05, "loss": 0.4186, "step": 206 }, { "epoch": 0.01765910254222829, "grad_norm": 2.3642438961408896, "learning_rate": 5.880681818181818e-05, "loss": 0.4086, "step": 207 }, { "epoch": 0.017744412216345335, "grad_norm": 2.1335219995201653, "learning_rate": 5.90909090909091e-05, "loss": 0.3728, "step": 208 }, { "epoch": 0.017829721890462377, "grad_norm": 2.6074355907293754, "learning_rate": 5.9375e-05, "loss": 0.4244, "step": 209 }, { "epoch": 0.017915031564579423, "grad_norm": 3.25477205735282, "learning_rate": 5.965909090909091e-05, "loss": 0.4111, "step": 210 }, { "epoch": 0.01800034123869647, "grad_norm": 2.454606696048798, "learning_rate": 5.9943181818181826e-05, "loss": 0.4215, "step": 211 }, { "epoch": 0.018085650912813514, "grad_norm": 2.1353496509984127, "learning_rate": 6.022727272727273e-05, "loss": 0.419, "step": 212 }, { "epoch": 0.01817096058693056, "grad_norm": 2.500536479608336, "learning_rate": 6.051136363636364e-05, "loss": 0.4496, "step": 213 }, { "epoch": 0.0182562702610476, "grad_norm": 2.9987885890320753, "learning_rate": 6.079545454545454e-05, "loss": 0.4507, "step": 214 }, { "epoch": 0.018341579935164647, "grad_norm": 2.247094928021411, "learning_rate": 6.107954545454547e-05, "loss": 0.4054, "step": 215 }, { "epoch": 0.018426889609281693, "grad_norm": 2.402179892309456, "learning_rate": 6.136363636363636e-05, "loss": 0.4485, "step": 216 }, { "epoch": 0.01851219928339874, "grad_norm": 1.7680762500588991, "learning_rate": 6.164772727272727e-05, "loss": 0.3624, "step": 217 }, { "epoch": 0.01859750895751578, "grad_norm": 2.6790701374642354, "learning_rate": 6.193181818181818e-05, "loss": 0.4293, "step": 218 }, { "epoch": 0.018682818631632826, "grad_norm": 2.4588525083784476, "learning_rate": 6.221590909090909e-05, "loss": 0.433, "step": 219 }, { "epoch": 0.018768128305749872, "grad_norm": 2.0963968655678356, "learning_rate": 6.25e-05, "loss": 0.3591, "step": 220 }, { "epoch": 0.018853437979866917, "grad_norm": 2.794223146247957, "learning_rate": 6.278409090909091e-05, "loss": 0.4308, "step": 221 }, { "epoch": 0.018938747653983963, "grad_norm": 2.2081393270624314, "learning_rate": 6.306818181818182e-05, "loss": 0.3923, "step": 222 }, { "epoch": 0.019024057328101005, "grad_norm": 2.12053817863608, "learning_rate": 6.335227272727273e-05, "loss": 0.4308, "step": 223 }, { "epoch": 0.01910936700221805, "grad_norm": 2.3852945377008883, "learning_rate": 6.363636363636364e-05, "loss": 0.4389, "step": 224 }, { "epoch": 0.019194676676335096, "grad_norm": 2.1586937808351845, "learning_rate": 6.392045454545455e-05, "loss": 0.4132, "step": 225 }, { "epoch": 0.019279986350452142, "grad_norm": 2.079858042934605, "learning_rate": 6.420454545454546e-05, "loss": 0.4216, "step": 226 }, { "epoch": 0.019365296024569188, "grad_norm": 1.9909462446488657, "learning_rate": 6.448863636363637e-05, "loss": 0.35, "step": 227 }, { "epoch": 0.01945060569868623, "grad_norm": 2.0339874601624626, "learning_rate": 6.477272727272728e-05, "loss": 0.3714, "step": 228 }, { "epoch": 0.019535915372803275, "grad_norm": 1.601909544025692, "learning_rate": 6.505681818181818e-05, "loss": 0.3724, "step": 229 }, { "epoch": 0.01962122504692032, "grad_norm": 1.9721372015929162, "learning_rate": 6.53409090909091e-05, "loss": 0.4024, "step": 230 }, { "epoch": 0.019706534721037366, "grad_norm": 2.4817827243793587, "learning_rate": 6.562500000000001e-05, "loss": 0.4502, "step": 231 }, { "epoch": 0.019791844395154412, "grad_norm": 2.0375358157140164, "learning_rate": 6.59090909090909e-05, "loss": 0.3958, "step": 232 }, { "epoch": 0.019877154069271454, "grad_norm": 2.3366166470335035, "learning_rate": 6.619318181818183e-05, "loss": 0.4338, "step": 233 }, { "epoch": 0.0199624637433885, "grad_norm": 1.8930909020170057, "learning_rate": 6.647727272727274e-05, "loss": 0.3942, "step": 234 }, { "epoch": 0.020047773417505545, "grad_norm": 2.694706857057218, "learning_rate": 6.676136363636364e-05, "loss": 0.4838, "step": 235 }, { "epoch": 0.02013308309162259, "grad_norm": 2.4290366328285473, "learning_rate": 6.704545454545455e-05, "loss": 0.4179, "step": 236 }, { "epoch": 0.020218392765739637, "grad_norm": 2.3474409414059116, "learning_rate": 6.732954545454547e-05, "loss": 0.4183, "step": 237 }, { "epoch": 0.02030370243985668, "grad_norm": 2.2787788279640795, "learning_rate": 6.761363636363636e-05, "loss": 0.4061, "step": 238 }, { "epoch": 0.020389012113973724, "grad_norm": 2.373541534735438, "learning_rate": 6.789772727272727e-05, "loss": 0.4365, "step": 239 }, { "epoch": 0.02047432178809077, "grad_norm": 2.3081579942821513, "learning_rate": 6.818181818181818e-05, "loss": 0.3781, "step": 240 }, { "epoch": 0.020559631462207816, "grad_norm": 2.5766238379363178, "learning_rate": 6.84659090909091e-05, "loss": 0.4276, "step": 241 }, { "epoch": 0.020644941136324858, "grad_norm": 2.38974830658828, "learning_rate": 6.875e-05, "loss": 0.436, "step": 242 }, { "epoch": 0.020730250810441903, "grad_norm": 2.5043939124493533, "learning_rate": 6.903409090909091e-05, "loss": 0.4271, "step": 243 }, { "epoch": 0.02081556048455895, "grad_norm": 2.2455676184058695, "learning_rate": 6.931818181818182e-05, "loss": 0.3763, "step": 244 }, { "epoch": 0.020900870158675994, "grad_norm": 2.0126630918393773, "learning_rate": 6.960227272727273e-05, "loss": 0.4216, "step": 245 }, { "epoch": 0.02098617983279304, "grad_norm": 2.4314806046744857, "learning_rate": 6.988636363636364e-05, "loss": 0.3868, "step": 246 }, { "epoch": 0.021071489506910082, "grad_norm": 2.683584332623123, "learning_rate": 7.017045454545454e-05, "loss": 0.4434, "step": 247 }, { "epoch": 0.021156799181027128, "grad_norm": 2.522366039397641, "learning_rate": 7.045454545454546e-05, "loss": 0.4322, "step": 248 }, { "epoch": 0.021242108855144173, "grad_norm": 2.4736583541232173, "learning_rate": 7.073863636363637e-05, "loss": 0.4598, "step": 249 }, { "epoch": 0.02132741852926122, "grad_norm": 1.9516230797508671, "learning_rate": 7.102272727272727e-05, "loss": 0.4696, "step": 250 }, { "epoch": 0.021412728203378265, "grad_norm": 1.8823724129490143, "learning_rate": 7.130681818181818e-05, "loss": 0.3884, "step": 251 }, { "epoch": 0.021498037877495307, "grad_norm": 2.302613336878057, "learning_rate": 7.15909090909091e-05, "loss": 0.4278, "step": 252 }, { "epoch": 0.021583347551612352, "grad_norm": 2.064295363014741, "learning_rate": 7.1875e-05, "loss": 0.4239, "step": 253 }, { "epoch": 0.021668657225729398, "grad_norm": 2.090826242109761, "learning_rate": 7.215909090909091e-05, "loss": 0.4356, "step": 254 }, { "epoch": 0.021753966899846443, "grad_norm": 1.9675281911378966, "learning_rate": 7.244318181818183e-05, "loss": 0.403, "step": 255 }, { "epoch": 0.02183927657396349, "grad_norm": 2.2684160601544314, "learning_rate": 7.272727272727273e-05, "loss": 0.4429, "step": 256 }, { "epoch": 0.02192458624808053, "grad_norm": 1.8001039104138832, "learning_rate": 7.301136363636364e-05, "loss": 0.3878, "step": 257 }, { "epoch": 0.022009895922197577, "grad_norm": 2.824482294768277, "learning_rate": 7.329545454545455e-05, "loss": 0.4372, "step": 258 }, { "epoch": 0.022095205596314622, "grad_norm": 2.114413763554116, "learning_rate": 7.357954545454546e-05, "loss": 0.4565, "step": 259 }, { "epoch": 0.022180515270431668, "grad_norm": 2.410582765897125, "learning_rate": 7.386363636363637e-05, "loss": 0.4433, "step": 260 }, { "epoch": 0.02226582494454871, "grad_norm": 2.2112227077320132, "learning_rate": 7.414772727272728e-05, "loss": 0.4087, "step": 261 }, { "epoch": 0.022351134618665756, "grad_norm": 1.6947096354821942, "learning_rate": 7.443181818181817e-05, "loss": 0.3613, "step": 262 }, { "epoch": 0.0224364442927828, "grad_norm": 2.301747765112511, "learning_rate": 7.47159090909091e-05, "loss": 0.4189, "step": 263 }, { "epoch": 0.022521753966899847, "grad_norm": 2.30536468384744, "learning_rate": 7.500000000000001e-05, "loss": 0.4359, "step": 264 }, { "epoch": 0.022607063641016893, "grad_norm": 1.9573892658039487, "learning_rate": 7.52840909090909e-05, "loss": 0.4206, "step": 265 }, { "epoch": 0.022692373315133935, "grad_norm": 2.129782959889996, "learning_rate": 7.556818181818183e-05, "loss": 0.3936, "step": 266 }, { "epoch": 0.02277768298925098, "grad_norm": 2.1100318168663894, "learning_rate": 7.585227272727274e-05, "loss": 0.432, "step": 267 }, { "epoch": 0.022862992663368026, "grad_norm": 2.5995322430271646, "learning_rate": 7.613636363636363e-05, "loss": 0.4807, "step": 268 }, { "epoch": 0.02294830233748507, "grad_norm": 2.0337347238971604, "learning_rate": 7.642045454545454e-05, "loss": 0.3795, "step": 269 }, { "epoch": 0.023033612011602117, "grad_norm": 2.190910840942133, "learning_rate": 7.670454545454547e-05, "loss": 0.4297, "step": 270 }, { "epoch": 0.02311892168571916, "grad_norm": 2.419609141982105, "learning_rate": 7.698863636363636e-05, "loss": 0.458, "step": 271 }, { "epoch": 0.023204231359836205, "grad_norm": 2.168031103938609, "learning_rate": 7.727272727272727e-05, "loss": 0.41, "step": 272 }, { "epoch": 0.02328954103395325, "grad_norm": 2.3960852006847397, "learning_rate": 7.755681818181818e-05, "loss": 0.4996, "step": 273 }, { "epoch": 0.023374850708070296, "grad_norm": 2.2335931437045566, "learning_rate": 7.784090909090909e-05, "loss": 0.4293, "step": 274 }, { "epoch": 0.02346016038218734, "grad_norm": 2.041486096344372, "learning_rate": 7.8125e-05, "loss": 0.3993, "step": 275 }, { "epoch": 0.023545470056304384, "grad_norm": 2.2405684204619236, "learning_rate": 7.840909090909091e-05, "loss": 0.3819, "step": 276 }, { "epoch": 0.02363077973042143, "grad_norm": 2.2809759524096678, "learning_rate": 7.869318181818182e-05, "loss": 0.4047, "step": 277 }, { "epoch": 0.023716089404538475, "grad_norm": 1.8354656505940115, "learning_rate": 7.897727272727273e-05, "loss": 0.4128, "step": 278 }, { "epoch": 0.02380139907865552, "grad_norm": 2.1087050892966253, "learning_rate": 7.926136363636364e-05, "loss": 0.4591, "step": 279 }, { "epoch": 0.023886708752772566, "grad_norm": 1.9526127257879837, "learning_rate": 7.954545454545455e-05, "loss": 0.3926, "step": 280 }, { "epoch": 0.023972018426889608, "grad_norm": 2.231732065820366, "learning_rate": 7.982954545454546e-05, "loss": 0.4224, "step": 281 }, { "epoch": 0.024057328101006654, "grad_norm": 2.3258765711422074, "learning_rate": 8.011363636363637e-05, "loss": 0.4486, "step": 282 }, { "epoch": 0.0241426377751237, "grad_norm": 2.097291598600573, "learning_rate": 8.039772727272728e-05, "loss": 0.4555, "step": 283 }, { "epoch": 0.024227947449240745, "grad_norm": 2.0576139431037546, "learning_rate": 8.068181818181818e-05, "loss": 0.4242, "step": 284 }, { "epoch": 0.024313257123357787, "grad_norm": 2.1073030821911694, "learning_rate": 8.09659090909091e-05, "loss": 0.4592, "step": 285 }, { "epoch": 0.024398566797474833, "grad_norm": 2.1904471027227888, "learning_rate": 8.125000000000001e-05, "loss": 0.4315, "step": 286 }, { "epoch": 0.02448387647159188, "grad_norm": 2.39197317766697, "learning_rate": 8.15340909090909e-05, "loss": 0.4379, "step": 287 }, { "epoch": 0.024569186145708924, "grad_norm": 2.1958233113455274, "learning_rate": 8.181818181818183e-05, "loss": 0.4387, "step": 288 }, { "epoch": 0.02465449581982597, "grad_norm": 2.1177392219608735, "learning_rate": 8.210227272727274e-05, "loss": 0.4244, "step": 289 }, { "epoch": 0.02473980549394301, "grad_norm": 2.163896244424515, "learning_rate": 8.238636363636364e-05, "loss": 0.4233, "step": 290 }, { "epoch": 0.024825115168060057, "grad_norm": 2.321665051750211, "learning_rate": 8.267045454545455e-05, "loss": 0.4715, "step": 291 }, { "epoch": 0.024910424842177103, "grad_norm": 1.8350982695831577, "learning_rate": 8.295454545454547e-05, "loss": 0.4368, "step": 292 }, { "epoch": 0.02499573451629415, "grad_norm": 2.5456302151812715, "learning_rate": 8.323863636363637e-05, "loss": 0.4511, "step": 293 }, { "epoch": 0.025081044190411194, "grad_norm": 2.0739950573088883, "learning_rate": 8.352272727272727e-05, "loss": 0.4007, "step": 294 }, { "epoch": 0.025166353864528236, "grad_norm": 2.5200214707504403, "learning_rate": 8.380681818181818e-05, "loss": 0.4289, "step": 295 }, { "epoch": 0.025251663538645282, "grad_norm": 2.1590787604323283, "learning_rate": 8.40909090909091e-05, "loss": 0.419, "step": 296 }, { "epoch": 0.025336973212762327, "grad_norm": 2.8775934877959846, "learning_rate": 8.4375e-05, "loss": 0.4577, "step": 297 }, { "epoch": 0.025422282886879373, "grad_norm": 1.9942457150856439, "learning_rate": 8.465909090909091e-05, "loss": 0.3722, "step": 298 }, { "epoch": 0.02550759256099642, "grad_norm": 2.259699893875751, "learning_rate": 8.494318181818182e-05, "loss": 0.4225, "step": 299 }, { "epoch": 0.02559290223511346, "grad_norm": 2.1876651156643416, "learning_rate": 8.522727272727273e-05, "loss": 0.4391, "step": 300 }, { "epoch": 0.025678211909230506, "grad_norm": 2.239210005997261, "learning_rate": 8.551136363636364e-05, "loss": 0.4622, "step": 301 }, { "epoch": 0.025763521583347552, "grad_norm": 1.9997042198811548, "learning_rate": 8.579545454545454e-05, "loss": 0.4283, "step": 302 }, { "epoch": 0.025848831257464597, "grad_norm": 1.9990316669423305, "learning_rate": 8.607954545454546e-05, "loss": 0.392, "step": 303 }, { "epoch": 0.025934140931581643, "grad_norm": 2.1912395665496494, "learning_rate": 8.636363636363637e-05, "loss": 0.4888, "step": 304 }, { "epoch": 0.026019450605698685, "grad_norm": 1.9437906831365956, "learning_rate": 8.664772727272727e-05, "loss": 0.4438, "step": 305 }, { "epoch": 0.02610476027981573, "grad_norm": 2.2897292067825545, "learning_rate": 8.693181818181818e-05, "loss": 0.4411, "step": 306 }, { "epoch": 0.026190069953932776, "grad_norm": 1.8667725936035295, "learning_rate": 8.72159090909091e-05, "loss": 0.3792, "step": 307 }, { "epoch": 0.026275379628049822, "grad_norm": 2.001616539921716, "learning_rate": 8.75e-05, "loss": 0.3993, "step": 308 }, { "epoch": 0.026360689302166864, "grad_norm": 1.9698526836636576, "learning_rate": 8.778409090909091e-05, "loss": 0.4038, "step": 309 }, { "epoch": 0.02644599897628391, "grad_norm": 1.984745141298858, "learning_rate": 8.806818181818183e-05, "loss": 0.4541, "step": 310 }, { "epoch": 0.026531308650400955, "grad_norm": 1.884512865580798, "learning_rate": 8.835227272727273e-05, "loss": 0.4381, "step": 311 }, { "epoch": 0.026616618324518, "grad_norm": 2.4831388265060133, "learning_rate": 8.863636363636364e-05, "loss": 0.424, "step": 312 }, { "epoch": 0.026701927998635046, "grad_norm": 2.0911087913288613, "learning_rate": 8.892045454545455e-05, "loss": 0.4585, "step": 313 }, { "epoch": 0.02678723767275209, "grad_norm": 2.099454709056239, "learning_rate": 8.920454545454546e-05, "loss": 0.4413, "step": 314 }, { "epoch": 0.026872547346869134, "grad_norm": 1.857717849463511, "learning_rate": 8.948863636363637e-05, "loss": 0.4365, "step": 315 }, { "epoch": 0.02695785702098618, "grad_norm": 2.2091446471822693, "learning_rate": 8.977272727272728e-05, "loss": 0.3983, "step": 316 }, { "epoch": 0.027043166695103225, "grad_norm": 1.8647576007353133, "learning_rate": 9.005681818181819e-05, "loss": 0.3884, "step": 317 }, { "epoch": 0.02712847636922027, "grad_norm": 1.9841011639383348, "learning_rate": 9.03409090909091e-05, "loss": 0.4302, "step": 318 }, { "epoch": 0.027213786043337313, "grad_norm": 1.859998710590859, "learning_rate": 9.062500000000001e-05, "loss": 0.4028, "step": 319 }, { "epoch": 0.02729909571745436, "grad_norm": 2.023175903848956, "learning_rate": 9.090909090909092e-05, "loss": 0.3932, "step": 320 }, { "epoch": 0.027384405391571404, "grad_norm": 2.2051830013395652, "learning_rate": 9.119318181818183e-05, "loss": 0.4163, "step": 321 }, { "epoch": 0.02746971506568845, "grad_norm": 2.235208955934696, "learning_rate": 9.147727272727274e-05, "loss": 0.4609, "step": 322 }, { "epoch": 0.027555024739805496, "grad_norm": 2.1994964803679227, "learning_rate": 9.176136363636363e-05, "loss": 0.4547, "step": 323 }, { "epoch": 0.027640334413922538, "grad_norm": 2.0965232133333056, "learning_rate": 9.204545454545454e-05, "loss": 0.4096, "step": 324 }, { "epoch": 0.027725644088039583, "grad_norm": 1.9607746922967504, "learning_rate": 9.232954545454547e-05, "loss": 0.4394, "step": 325 }, { "epoch": 0.02781095376215663, "grad_norm": 2.1625424772291324, "learning_rate": 9.261363636363636e-05, "loss": 0.4258, "step": 326 }, { "epoch": 0.027896263436273674, "grad_norm": 2.0981794731243633, "learning_rate": 9.289772727272727e-05, "loss": 0.4054, "step": 327 }, { "epoch": 0.02798157311039072, "grad_norm": 2.1449846854919206, "learning_rate": 9.318181818181818e-05, "loss": 0.3627, "step": 328 }, { "epoch": 0.028066882784507762, "grad_norm": 2.014754351274066, "learning_rate": 9.346590909090909e-05, "loss": 0.4327, "step": 329 }, { "epoch": 0.028152192458624808, "grad_norm": 1.9631347504440135, "learning_rate": 9.375e-05, "loss": 0.37, "step": 330 }, { "epoch": 0.028237502132741853, "grad_norm": 2.3106651243647502, "learning_rate": 9.403409090909091e-05, "loss": 0.4704, "step": 331 }, { "epoch": 0.0283228118068589, "grad_norm": 1.8457726198061013, "learning_rate": 9.431818181818182e-05, "loss": 0.4334, "step": 332 }, { "epoch": 0.02840812148097594, "grad_norm": 2.2932677715747793, "learning_rate": 9.460227272727273e-05, "loss": 0.4389, "step": 333 }, { "epoch": 0.028493431155092987, "grad_norm": 1.9535980872354977, "learning_rate": 9.488636363636364e-05, "loss": 0.4002, "step": 334 }, { "epoch": 0.028578740829210032, "grad_norm": 2.082547751768784, "learning_rate": 9.517045454545455e-05, "loss": 0.4885, "step": 335 }, { "epoch": 0.028664050503327078, "grad_norm": 1.8107495240807714, "learning_rate": 9.545454545454546e-05, "loss": 0.3809, "step": 336 }, { "epoch": 0.028749360177444123, "grad_norm": 2.1616497154228065, "learning_rate": 9.573863636363637e-05, "loss": 0.4312, "step": 337 }, { "epoch": 0.028834669851561166, "grad_norm": 1.862738508275819, "learning_rate": 9.602272727272728e-05, "loss": 0.4555, "step": 338 }, { "epoch": 0.02891997952567821, "grad_norm": 1.9347831381781693, "learning_rate": 9.630681818181818e-05, "loss": 0.4439, "step": 339 }, { "epoch": 0.029005289199795257, "grad_norm": 2.127971030783837, "learning_rate": 9.65909090909091e-05, "loss": 0.466, "step": 340 }, { "epoch": 0.029090598873912302, "grad_norm": 2.091898312856188, "learning_rate": 9.687500000000001e-05, "loss": 0.4355, "step": 341 }, { "epoch": 0.029175908548029348, "grad_norm": 2.1397767996925383, "learning_rate": 9.71590909090909e-05, "loss": 0.4852, "step": 342 }, { "epoch": 0.02926121822214639, "grad_norm": 2.0469305598748244, "learning_rate": 9.744318181818183e-05, "loss": 0.4438, "step": 343 }, { "epoch": 0.029346527896263436, "grad_norm": 2.070087348643038, "learning_rate": 9.772727272727274e-05, "loss": 0.4791, "step": 344 }, { "epoch": 0.02943183757038048, "grad_norm": 2.08017096513764, "learning_rate": 9.801136363636364e-05, "loss": 0.3927, "step": 345 }, { "epoch": 0.029517147244497527, "grad_norm": 1.7758976345632271, "learning_rate": 9.829545454545455e-05, "loss": 0.4143, "step": 346 }, { "epoch": 0.029602456918614573, "grad_norm": 2.2037809220696207, "learning_rate": 9.857954545454547e-05, "loss": 0.4435, "step": 347 }, { "epoch": 0.029687766592731615, "grad_norm": 1.9215947388782628, "learning_rate": 9.886363636363637e-05, "loss": 0.3775, "step": 348 }, { "epoch": 0.02977307626684866, "grad_norm": 2.0231537491078866, "learning_rate": 9.914772727272728e-05, "loss": 0.4202, "step": 349 }, { "epoch": 0.029858385940965706, "grad_norm": 2.1361756131349754, "learning_rate": 9.943181818181819e-05, "loss": 0.3969, "step": 350 }, { "epoch": 0.02994369561508275, "grad_norm": 1.9714897710470474, "learning_rate": 9.97159090909091e-05, "loss": 0.4254, "step": 351 }, { "epoch": 0.030029005289199794, "grad_norm": 1.9625615541560266, "learning_rate": 0.0001, "loss": 0.446, "step": 352 }, { "epoch": 0.03011431496331684, "grad_norm": 1.9589553480954867, "learning_rate": 9.999999809138285e-05, "loss": 0.4354, "step": 353 }, { "epoch": 0.030199624637433885, "grad_norm": 1.9948155440718336, "learning_rate": 9.999999236553155e-05, "loss": 0.4147, "step": 354 }, { "epoch": 0.03028493431155093, "grad_norm": 2.6046457892581096, "learning_rate": 9.999998282244653e-05, "loss": 0.4593, "step": 355 }, { "epoch": 0.030370243985667976, "grad_norm": 2.279216957815973, "learning_rate": 9.999996946212851e-05, "loss": 0.4585, "step": 356 }, { "epoch": 0.030455553659785018, "grad_norm": 2.1956250385704656, "learning_rate": 9.999995228457853e-05, "loss": 0.4435, "step": 357 }, { "epoch": 0.030540863333902064, "grad_norm": 1.7413102066574255, "learning_rate": 9.99999312897979e-05, "loss": 0.4181, "step": 358 }, { "epoch": 0.03062617300801911, "grad_norm": 1.7067792693383537, "learning_rate": 9.99999064777882e-05, "loss": 0.4008, "step": 359 }, { "epoch": 0.030711482682136155, "grad_norm": 1.9150008412252226, "learning_rate": 9.999987784855135e-05, "loss": 0.4394, "step": 360 }, { "epoch": 0.0307967923562532, "grad_norm": 2.18074059426832, "learning_rate": 9.999984540208954e-05, "loss": 0.3823, "step": 361 }, { "epoch": 0.030882102030370243, "grad_norm": 2.0830388306044387, "learning_rate": 9.99998091384052e-05, "loss": 0.4554, "step": 362 }, { "epoch": 0.030967411704487288, "grad_norm": 2.0196632327369417, "learning_rate": 9.999976905750114e-05, "loss": 0.4342, "step": 363 }, { "epoch": 0.031052721378604334, "grad_norm": 2.0923935780870955, "learning_rate": 9.999972515938044e-05, "loss": 0.4247, "step": 364 }, { "epoch": 0.03113803105272138, "grad_norm": 1.9031563341289148, "learning_rate": 9.999967744404639e-05, "loss": 0.3841, "step": 365 }, { "epoch": 0.031223340726838425, "grad_norm": 2.1459903721304556, "learning_rate": 9.999962591150267e-05, "loss": 0.4739, "step": 366 }, { "epoch": 0.03130865040095547, "grad_norm": 2.3628572950694235, "learning_rate": 9.999957056175321e-05, "loss": 0.5259, "step": 367 }, { "epoch": 0.031393960075072516, "grad_norm": 1.7150793934676076, "learning_rate": 9.999951139480224e-05, "loss": 0.3993, "step": 368 }, { "epoch": 0.031479269749189555, "grad_norm": 1.6935744275524314, "learning_rate": 9.999944841065427e-05, "loss": 0.4182, "step": 369 }, { "epoch": 0.0315645794233066, "grad_norm": 2.1951437237029383, "learning_rate": 9.999938160931412e-05, "loss": 0.4445, "step": 370 }, { "epoch": 0.031649889097423646, "grad_norm": 1.8465669739779238, "learning_rate": 9.999931099078688e-05, "loss": 0.4114, "step": 371 }, { "epoch": 0.03173519877154069, "grad_norm": 2.0087053859197708, "learning_rate": 9.999923655507792e-05, "loss": 0.4389, "step": 372 }, { "epoch": 0.03182050844565774, "grad_norm": 1.8213840326906363, "learning_rate": 9.999915830219296e-05, "loss": 0.4106, "step": 373 }, { "epoch": 0.03190581811977478, "grad_norm": 2.053223875501273, "learning_rate": 9.999907623213796e-05, "loss": 0.463, "step": 374 }, { "epoch": 0.03199112779389183, "grad_norm": 1.899658940472844, "learning_rate": 9.999899034491919e-05, "loss": 0.4402, "step": 375 }, { "epoch": 0.032076437468008874, "grad_norm": 1.7400649616567545, "learning_rate": 9.999890064054318e-05, "loss": 0.4165, "step": 376 }, { "epoch": 0.03216174714212592, "grad_norm": 1.9470843476577795, "learning_rate": 9.999880711901682e-05, "loss": 0.4461, "step": 377 }, { "epoch": 0.032247056816242965, "grad_norm": 1.574740794211691, "learning_rate": 9.999870978034722e-05, "loss": 0.3808, "step": 378 }, { "epoch": 0.032332366490360004, "grad_norm": 1.6731729193213831, "learning_rate": 9.999860862454182e-05, "loss": 0.3732, "step": 379 }, { "epoch": 0.03241767616447705, "grad_norm": 2.3072586459056654, "learning_rate": 9.999850365160836e-05, "loss": 0.4685, "step": 380 }, { "epoch": 0.032502985838594095, "grad_norm": 2.4653633162533937, "learning_rate": 9.999839486155482e-05, "loss": 0.4687, "step": 381 }, { "epoch": 0.03258829551271114, "grad_norm": 2.1617605587880204, "learning_rate": 9.999828225438954e-05, "loss": 0.4416, "step": 382 }, { "epoch": 0.032673605186828186, "grad_norm": 1.8432090741654392, "learning_rate": 9.999816583012109e-05, "loss": 0.38, "step": 383 }, { "epoch": 0.03275891486094523, "grad_norm": 1.7255542970270525, "learning_rate": 9.999804558875835e-05, "loss": 0.3897, "step": 384 }, { "epoch": 0.03284422453506228, "grad_norm": 2.2437285865888232, "learning_rate": 9.999792153031055e-05, "loss": 0.5137, "step": 385 }, { "epoch": 0.03292953420917932, "grad_norm": 1.8928977609785098, "learning_rate": 9.999779365478712e-05, "loss": 0.4257, "step": 386 }, { "epoch": 0.03301484388329637, "grad_norm": 1.9243978053863642, "learning_rate": 9.999766196219784e-05, "loss": 0.4317, "step": 387 }, { "epoch": 0.03310015355741341, "grad_norm": 2.462422821849337, "learning_rate": 9.999752645255273e-05, "loss": 0.478, "step": 388 }, { "epoch": 0.03318546323153045, "grad_norm": 2.213257559701024, "learning_rate": 9.99973871258622e-05, "loss": 0.4339, "step": 389 }, { "epoch": 0.0332707729056475, "grad_norm": 1.8182274660969233, "learning_rate": 9.99972439821368e-05, "loss": 0.4266, "step": 390 }, { "epoch": 0.033356082579764544, "grad_norm": 2.277835223654649, "learning_rate": 9.999709702138756e-05, "loss": 0.4718, "step": 391 }, { "epoch": 0.03344139225388159, "grad_norm": 1.7945496481391174, "learning_rate": 9.99969462436256e-05, "loss": 0.423, "step": 392 }, { "epoch": 0.033526701927998635, "grad_norm": 1.8387622286703071, "learning_rate": 9.999679164886251e-05, "loss": 0.424, "step": 393 }, { "epoch": 0.03361201160211568, "grad_norm": 2.2155144969248077, "learning_rate": 9.999663323711004e-05, "loss": 0.4615, "step": 394 }, { "epoch": 0.033697321276232726, "grad_norm": 2.1710132014269887, "learning_rate": 9.999647100838032e-05, "loss": 0.4169, "step": 395 }, { "epoch": 0.03378263095034977, "grad_norm": 1.708057505352017, "learning_rate": 9.999630496268572e-05, "loss": 0.3975, "step": 396 }, { "epoch": 0.03386794062446682, "grad_norm": 1.7820872965707593, "learning_rate": 9.999613510003891e-05, "loss": 0.4088, "step": 397 }, { "epoch": 0.033953250298583856, "grad_norm": 1.7093968531242085, "learning_rate": 9.999596142045286e-05, "loss": 0.4073, "step": 398 }, { "epoch": 0.0340385599727009, "grad_norm": 1.7906389911630942, "learning_rate": 9.999578392394085e-05, "loss": 0.4295, "step": 399 }, { "epoch": 0.03412386964681795, "grad_norm": 2.029904198476538, "learning_rate": 9.99956026105164e-05, "loss": 0.4461, "step": 400 }, { "epoch": 0.03420917932093499, "grad_norm": 2.509424944958969, "learning_rate": 9.999541748019337e-05, "loss": 0.46, "step": 401 }, { "epoch": 0.03429448899505204, "grad_norm": 1.9037876172021797, "learning_rate": 9.999522853298589e-05, "loss": 0.4192, "step": 402 }, { "epoch": 0.034379798669169084, "grad_norm": 1.5985656985186654, "learning_rate": 9.999503576890838e-05, "loss": 0.4096, "step": 403 }, { "epoch": 0.03446510834328613, "grad_norm": 2.1697783744601433, "learning_rate": 9.999483918797556e-05, "loss": 0.4264, "step": 404 }, { "epoch": 0.034550418017403176, "grad_norm": 2.249842850177404, "learning_rate": 9.999463879020246e-05, "loss": 0.4737, "step": 405 }, { "epoch": 0.03463572769152022, "grad_norm": 1.8537895233406445, "learning_rate": 9.999443457560434e-05, "loss": 0.4518, "step": 406 }, { "epoch": 0.03472103736563727, "grad_norm": 1.6848283033875446, "learning_rate": 9.999422654419682e-05, "loss": 0.3914, "step": 407 }, { "epoch": 0.034806347039754305, "grad_norm": 1.9247505571330115, "learning_rate": 9.999401469599577e-05, "loss": 0.4369, "step": 408 }, { "epoch": 0.03489165671387135, "grad_norm": 2.0357216545679098, "learning_rate": 9.999379903101735e-05, "loss": 0.4292, "step": 409 }, { "epoch": 0.0349769663879884, "grad_norm": 1.7980703607186028, "learning_rate": 9.999357954927808e-05, "loss": 0.3672, "step": 410 }, { "epoch": 0.03506227606210544, "grad_norm": 2.0037425310801225, "learning_rate": 9.999335625079464e-05, "loss": 0.4021, "step": 411 }, { "epoch": 0.03514758573622249, "grad_norm": 1.9993254000684548, "learning_rate": 9.999312913558413e-05, "loss": 0.3912, "step": 412 }, { "epoch": 0.03523289541033953, "grad_norm": 1.862805959673738, "learning_rate": 9.999289820366387e-05, "loss": 0.3825, "step": 413 }, { "epoch": 0.03531820508445658, "grad_norm": 1.8277655354057292, "learning_rate": 9.999266345505149e-05, "loss": 0.4321, "step": 414 }, { "epoch": 0.035403514758573625, "grad_norm": 1.9668139671939164, "learning_rate": 9.999242488976493e-05, "loss": 0.4225, "step": 415 }, { "epoch": 0.03548882443269067, "grad_norm": 1.7374065127024416, "learning_rate": 9.999218250782239e-05, "loss": 0.4075, "step": 416 }, { "epoch": 0.03557413410680771, "grad_norm": 1.783508408351506, "learning_rate": 9.999193630924236e-05, "loss": 0.4371, "step": 417 }, { "epoch": 0.035659443780924754, "grad_norm": 1.842813156823864, "learning_rate": 9.999168629404365e-05, "loss": 0.4322, "step": 418 }, { "epoch": 0.0357447534550418, "grad_norm": 1.7963075488168845, "learning_rate": 9.999143246224536e-05, "loss": 0.4051, "step": 419 }, { "epoch": 0.035830063129158846, "grad_norm": 1.92854701637278, "learning_rate": 9.999117481386684e-05, "loss": 0.3736, "step": 420 }, { "epoch": 0.03591537280327589, "grad_norm": 1.9874747988464672, "learning_rate": 9.999091334892779e-05, "loss": 0.4192, "step": 421 }, { "epoch": 0.03600068247739294, "grad_norm": 1.8661637951512327, "learning_rate": 9.999064806744816e-05, "loss": 0.4531, "step": 422 }, { "epoch": 0.03608599215150998, "grad_norm": 1.8672563228931698, "learning_rate": 9.999037896944819e-05, "loss": 0.3954, "step": 423 }, { "epoch": 0.03617130182562703, "grad_norm": 1.9541365178540537, "learning_rate": 9.999010605494843e-05, "loss": 0.4929, "step": 424 }, { "epoch": 0.036256611499744074, "grad_norm": 1.8289543943981572, "learning_rate": 9.998982932396972e-05, "loss": 0.388, "step": 425 }, { "epoch": 0.03634192117386112, "grad_norm": 2.289052188576658, "learning_rate": 9.998954877653319e-05, "loss": 0.5287, "step": 426 }, { "epoch": 0.03642723084797816, "grad_norm": 1.743389860857203, "learning_rate": 9.998926441266026e-05, "loss": 0.3843, "step": 427 }, { "epoch": 0.0365125405220952, "grad_norm": 1.6717398045147527, "learning_rate": 9.998897623237263e-05, "loss": 0.427, "step": 428 }, { "epoch": 0.03659785019621225, "grad_norm": 2.0092337361420687, "learning_rate": 9.998868423569231e-05, "loss": 0.4794, "step": 429 }, { "epoch": 0.036683159870329295, "grad_norm": 2.1469976975917686, "learning_rate": 9.998838842264158e-05, "loss": 0.3978, "step": 430 }, { "epoch": 0.03676846954444634, "grad_norm": 2.02892992595597, "learning_rate": 9.998808879324304e-05, "loss": 0.4603, "step": 431 }, { "epoch": 0.036853779218563386, "grad_norm": 1.9340274825249049, "learning_rate": 9.998778534751956e-05, "loss": 0.4586, "step": 432 }, { "epoch": 0.03693908889268043, "grad_norm": 2.045793669938324, "learning_rate": 9.998747808549429e-05, "loss": 0.4316, "step": 433 }, { "epoch": 0.03702439856679748, "grad_norm": 1.9406671786677268, "learning_rate": 9.998716700719071e-05, "loss": 0.3865, "step": 434 }, { "epoch": 0.03710970824091452, "grad_norm": 1.7564992110166182, "learning_rate": 9.998685211263257e-05, "loss": 0.448, "step": 435 }, { "epoch": 0.03719501791503156, "grad_norm": 1.944023164646452, "learning_rate": 9.998653340184387e-05, "loss": 0.4768, "step": 436 }, { "epoch": 0.03728032758914861, "grad_norm": 1.9590477823814212, "learning_rate": 9.998621087484901e-05, "loss": 0.4118, "step": 437 }, { "epoch": 0.03736563726326565, "grad_norm": 1.9193376368193606, "learning_rate": 9.998588453167256e-05, "loss": 0.427, "step": 438 }, { "epoch": 0.0374509469373827, "grad_norm": 1.775878033599765, "learning_rate": 9.998555437233946e-05, "loss": 0.3688, "step": 439 }, { "epoch": 0.037536256611499744, "grad_norm": 1.7138817941418163, "learning_rate": 9.998522039687488e-05, "loss": 0.3891, "step": 440 }, { "epoch": 0.03762156628561679, "grad_norm": 1.8589044825088654, "learning_rate": 9.998488260530436e-05, "loss": 0.4357, "step": 441 }, { "epoch": 0.037706875959733835, "grad_norm": 1.8775306789568564, "learning_rate": 9.998454099765368e-05, "loss": 0.4409, "step": 442 }, { "epoch": 0.03779218563385088, "grad_norm": 1.734729696209684, "learning_rate": 9.99841955739489e-05, "loss": 0.388, "step": 443 }, { "epoch": 0.037877495307967926, "grad_norm": 1.7025289629966738, "learning_rate": 9.998384633421641e-05, "loss": 0.3993, "step": 444 }, { "epoch": 0.03796280498208497, "grad_norm": 1.9216380399682018, "learning_rate": 9.998349327848286e-05, "loss": 0.4663, "step": 445 }, { "epoch": 0.03804811465620201, "grad_norm": 2.16444002569889, "learning_rate": 9.998313640677522e-05, "loss": 0.4499, "step": 446 }, { "epoch": 0.038133424330319056, "grad_norm": 2.0420511493534943, "learning_rate": 9.998277571912073e-05, "loss": 0.4324, "step": 447 }, { "epoch": 0.0382187340044361, "grad_norm": 1.9204375931646704, "learning_rate": 9.998241121554692e-05, "loss": 0.4707, "step": 448 }, { "epoch": 0.03830404367855315, "grad_norm": 1.8202521187259524, "learning_rate": 9.99820428960816e-05, "loss": 0.4616, "step": 449 }, { "epoch": 0.03838935335267019, "grad_norm": 2.9452914533191987, "learning_rate": 9.998167076075293e-05, "loss": 0.4195, "step": 450 }, { "epoch": 0.03847466302678724, "grad_norm": 1.9652133237635931, "learning_rate": 9.998129480958929e-05, "loss": 0.431, "step": 451 }, { "epoch": 0.038559972700904284, "grad_norm": 1.928231682445954, "learning_rate": 9.99809150426194e-05, "loss": 0.4129, "step": 452 }, { "epoch": 0.03864528237502133, "grad_norm": 1.8863727892124031, "learning_rate": 9.998053145987223e-05, "loss": 0.4236, "step": 453 }, { "epoch": 0.038730592049138375, "grad_norm": 2.029140844578974, "learning_rate": 9.998014406137709e-05, "loss": 0.4914, "step": 454 }, { "epoch": 0.038815901723255414, "grad_norm": 1.8084088554374762, "learning_rate": 9.997975284716354e-05, "loss": 0.4323, "step": 455 }, { "epoch": 0.03890121139737246, "grad_norm": 1.9739399274170912, "learning_rate": 9.997935781726147e-05, "loss": 0.4625, "step": 456 }, { "epoch": 0.038986521071489505, "grad_norm": 1.7282298740896487, "learning_rate": 9.9978958971701e-05, "loss": 0.4361, "step": 457 }, { "epoch": 0.03907183074560655, "grad_norm": 1.6874368443856747, "learning_rate": 9.99785563105126e-05, "loss": 0.3953, "step": 458 }, { "epoch": 0.039157140419723596, "grad_norm": 1.670781713942468, "learning_rate": 9.997814983372702e-05, "loss": 0.4435, "step": 459 }, { "epoch": 0.03924245009384064, "grad_norm": 1.7607385314801893, "learning_rate": 9.997773954137528e-05, "loss": 0.4634, "step": 460 }, { "epoch": 0.03932775976795769, "grad_norm": 2.1285991482196147, "learning_rate": 9.99773254334887e-05, "loss": 0.451, "step": 461 }, { "epoch": 0.03941306944207473, "grad_norm": 2.0539464738403295, "learning_rate": 9.997690751009892e-05, "loss": 0.4879, "step": 462 }, { "epoch": 0.03949837911619178, "grad_norm": 1.877152860019274, "learning_rate": 9.997648577123782e-05, "loss": 0.4216, "step": 463 }, { "epoch": 0.039583688790308824, "grad_norm": 1.8978055679292676, "learning_rate": 9.99760602169376e-05, "loss": 0.3862, "step": 464 }, { "epoch": 0.03966899846442586, "grad_norm": 1.6654749503907371, "learning_rate": 9.997563084723077e-05, "loss": 0.4125, "step": 465 }, { "epoch": 0.03975430813854291, "grad_norm": 1.7587102335410838, "learning_rate": 9.997519766215009e-05, "loss": 0.4306, "step": 466 }, { "epoch": 0.039839617812659954, "grad_norm": 1.6985023128211911, "learning_rate": 9.997476066172863e-05, "loss": 0.4102, "step": 467 }, { "epoch": 0.039924927486777, "grad_norm": 2.1099651500119943, "learning_rate": 9.997431984599976e-05, "loss": 0.4818, "step": 468 }, { "epoch": 0.040010237160894045, "grad_norm": 1.5380006796806829, "learning_rate": 9.997387521499714e-05, "loss": 0.3916, "step": 469 }, { "epoch": 0.04009554683501109, "grad_norm": 1.7876247975993698, "learning_rate": 9.99734267687547e-05, "loss": 0.4408, "step": 470 }, { "epoch": 0.040180856509128136, "grad_norm": 1.668714584374819, "learning_rate": 9.997297450730669e-05, "loss": 0.4599, "step": 471 }, { "epoch": 0.04026616618324518, "grad_norm": 1.7888597931611698, "learning_rate": 9.997251843068762e-05, "loss": 0.3982, "step": 472 }, { "epoch": 0.04035147585736223, "grad_norm": 1.831909305767981, "learning_rate": 9.997205853893234e-05, "loss": 0.419, "step": 473 }, { "epoch": 0.04043678553147927, "grad_norm": 2.05642278809104, "learning_rate": 9.997159483207594e-05, "loss": 0.4107, "step": 474 }, { "epoch": 0.04052209520559631, "grad_norm": 1.8964990208666261, "learning_rate": 9.997112731015382e-05, "loss": 0.4196, "step": 475 }, { "epoch": 0.04060740487971336, "grad_norm": 1.6556136150263872, "learning_rate": 9.997065597320165e-05, "loss": 0.3846, "step": 476 }, { "epoch": 0.0406927145538304, "grad_norm": 1.785570226035507, "learning_rate": 9.997018082125546e-05, "loss": 0.4453, "step": 477 }, { "epoch": 0.04077802422794745, "grad_norm": 2.1324823703909197, "learning_rate": 9.996970185435152e-05, "loss": 0.444, "step": 478 }, { "epoch": 0.040863333902064494, "grad_norm": 1.8015351610185433, "learning_rate": 9.996921907252636e-05, "loss": 0.4458, "step": 479 }, { "epoch": 0.04094864357618154, "grad_norm": 1.8162515553876672, "learning_rate": 9.996873247581689e-05, "loss": 0.4437, "step": 480 }, { "epoch": 0.041033953250298585, "grad_norm": 1.7954509897493156, "learning_rate": 9.996824206426021e-05, "loss": 0.3887, "step": 481 }, { "epoch": 0.04111926292441563, "grad_norm": 2.009533333954896, "learning_rate": 9.996774783789377e-05, "loss": 0.4562, "step": 482 }, { "epoch": 0.04120457259853268, "grad_norm": 1.6419429753074182, "learning_rate": 9.996724979675533e-05, "loss": 0.4176, "step": 483 }, { "epoch": 0.041289882272649715, "grad_norm": 1.7105855362019193, "learning_rate": 9.996674794088288e-05, "loss": 0.4719, "step": 484 }, { "epoch": 0.04137519194676676, "grad_norm": 2.0389576150512156, "learning_rate": 9.996624227031474e-05, "loss": 0.4272, "step": 485 }, { "epoch": 0.041460501620883806, "grad_norm": 1.9720316587386792, "learning_rate": 9.996573278508953e-05, "loss": 0.5048, "step": 486 }, { "epoch": 0.04154581129500085, "grad_norm": 1.955573050765085, "learning_rate": 9.996521948524615e-05, "loss": 0.4689, "step": 487 }, { "epoch": 0.0416311209691179, "grad_norm": 1.8340900622253422, "learning_rate": 9.996470237082378e-05, "loss": 0.4552, "step": 488 }, { "epoch": 0.04171643064323494, "grad_norm": 1.515981101449214, "learning_rate": 9.996418144186188e-05, "loss": 0.392, "step": 489 }, { "epoch": 0.04180174031735199, "grad_norm": 1.8807489634080876, "learning_rate": 9.996365669840024e-05, "loss": 0.4006, "step": 490 }, { "epoch": 0.041887049991469034, "grad_norm": 2.2866320938670124, "learning_rate": 9.996312814047892e-05, "loss": 0.4432, "step": 491 }, { "epoch": 0.04197235966558608, "grad_norm": 1.5705188976172262, "learning_rate": 9.996259576813828e-05, "loss": 0.4681, "step": 492 }, { "epoch": 0.042057669339703126, "grad_norm": 2.2225209816799407, "learning_rate": 9.996205958141894e-05, "loss": 0.5061, "step": 493 }, { "epoch": 0.042142979013820164, "grad_norm": 2.0396317312273657, "learning_rate": 9.996151958036186e-05, "loss": 0.4875, "step": 494 }, { "epoch": 0.04222828868793721, "grad_norm": 1.9566085876778616, "learning_rate": 9.996097576500825e-05, "loss": 0.4598, "step": 495 }, { "epoch": 0.042313598362054256, "grad_norm": 1.797909513468858, "learning_rate": 9.996042813539964e-05, "loss": 0.4057, "step": 496 }, { "epoch": 0.0423989080361713, "grad_norm": 1.991157115678473, "learning_rate": 9.995987669157781e-05, "loss": 0.453, "step": 497 }, { "epoch": 0.04248421771028835, "grad_norm": 1.4267809563756437, "learning_rate": 9.99593214335849e-05, "loss": 0.4081, "step": 498 }, { "epoch": 0.04256952738440539, "grad_norm": 1.9387760882217988, "learning_rate": 9.995876236146327e-05, "loss": 0.403, "step": 499 }, { "epoch": 0.04265483705852244, "grad_norm": 1.5274801754328375, "learning_rate": 9.995819947525563e-05, "loss": 0.3929, "step": 500 }, { "epoch": 0.042740146732639483, "grad_norm": 1.6270046340463735, "learning_rate": 9.995763277500493e-05, "loss": 0.4367, "step": 501 }, { "epoch": 0.04282545640675653, "grad_norm": 1.5348653347824193, "learning_rate": 9.995706226075445e-05, "loss": 0.3822, "step": 502 }, { "epoch": 0.04291076608087357, "grad_norm": 1.7936442646403976, "learning_rate": 9.995648793254772e-05, "loss": 0.4047, "step": 503 }, { "epoch": 0.04299607575499061, "grad_norm": 1.7599888193252196, "learning_rate": 9.995590979042861e-05, "loss": 0.4401, "step": 504 }, { "epoch": 0.04308138542910766, "grad_norm": 1.9607489141840362, "learning_rate": 9.995532783444126e-05, "loss": 0.4356, "step": 505 }, { "epoch": 0.043166695103224705, "grad_norm": 2.145043928501278, "learning_rate": 9.995474206463009e-05, "loss": 0.4305, "step": 506 }, { "epoch": 0.04325200477734175, "grad_norm": 1.6572994290339318, "learning_rate": 9.995415248103982e-05, "loss": 0.3654, "step": 507 }, { "epoch": 0.043337314451458796, "grad_norm": 2.007720826427349, "learning_rate": 9.995355908371546e-05, "loss": 0.4444, "step": 508 }, { "epoch": 0.04342262412557584, "grad_norm": 1.6977641577379594, "learning_rate": 9.995296187270233e-05, "loss": 0.4443, "step": 509 }, { "epoch": 0.04350793379969289, "grad_norm": 1.8624156804807661, "learning_rate": 9.9952360848046e-05, "loss": 0.4124, "step": 510 }, { "epoch": 0.04359324347380993, "grad_norm": 2.0336253435961242, "learning_rate": 9.995175600979236e-05, "loss": 0.4193, "step": 511 }, { "epoch": 0.04367855314792698, "grad_norm": 1.863073565346736, "learning_rate": 9.995114735798761e-05, "loss": 0.4529, "step": 512 }, { "epoch": 0.04376386282204402, "grad_norm": 1.5486438540583327, "learning_rate": 9.99505348926782e-05, "loss": 0.4217, "step": 513 }, { "epoch": 0.04384917249616106, "grad_norm": 1.7909336216914036, "learning_rate": 9.994991861391088e-05, "loss": 0.4431, "step": 514 }, { "epoch": 0.04393448217027811, "grad_norm": 1.664011063474776, "learning_rate": 9.99492985217327e-05, "loss": 0.4209, "step": 515 }, { "epoch": 0.044019791844395154, "grad_norm": 1.6830434231655653, "learning_rate": 9.994867461619101e-05, "loss": 0.4088, "step": 516 }, { "epoch": 0.0441051015185122, "grad_norm": 1.6184268938252164, "learning_rate": 9.994804689733344e-05, "loss": 0.4377, "step": 517 }, { "epoch": 0.044190411192629245, "grad_norm": 2.0369541771088304, "learning_rate": 9.994741536520792e-05, "loss": 0.457, "step": 518 }, { "epoch": 0.04427572086674629, "grad_norm": 1.9305325406880656, "learning_rate": 9.994678001986265e-05, "loss": 0.4623, "step": 519 }, { "epoch": 0.044361030540863336, "grad_norm": 1.7573249649423603, "learning_rate": 9.994614086134616e-05, "loss": 0.4328, "step": 520 }, { "epoch": 0.04444634021498038, "grad_norm": 2.176255234433211, "learning_rate": 9.994549788970721e-05, "loss": 0.4343, "step": 521 }, { "epoch": 0.04453164988909742, "grad_norm": 1.5776720057808933, "learning_rate": 9.99448511049949e-05, "loss": 0.3983, "step": 522 }, { "epoch": 0.044616959563214466, "grad_norm": 1.977000553607622, "learning_rate": 9.994420050725863e-05, "loss": 0.4057, "step": 523 }, { "epoch": 0.04470226923733151, "grad_norm": 1.7487069529974277, "learning_rate": 9.994354609654806e-05, "loss": 0.4294, "step": 524 }, { "epoch": 0.04478757891144856, "grad_norm": 1.6073724824865063, "learning_rate": 9.994288787291313e-05, "loss": 0.4342, "step": 525 }, { "epoch": 0.0448728885855656, "grad_norm": 2.064182751002176, "learning_rate": 9.994222583640412e-05, "loss": 0.4617, "step": 526 }, { "epoch": 0.04495819825968265, "grad_norm": 2.005432142838248, "learning_rate": 9.994155998707155e-05, "loss": 0.4528, "step": 527 }, { "epoch": 0.045043507933799694, "grad_norm": 1.6786739396328332, "learning_rate": 9.994089032496627e-05, "loss": 0.4335, "step": 528 }, { "epoch": 0.04512881760791674, "grad_norm": 1.5681755746810122, "learning_rate": 9.994021685013939e-05, "loss": 0.3518, "step": 529 }, { "epoch": 0.045214127282033785, "grad_norm": 1.7847948772849345, "learning_rate": 9.993953956264235e-05, "loss": 0.4301, "step": 530 }, { "epoch": 0.04529943695615083, "grad_norm": 1.81006122812224, "learning_rate": 9.993885846252682e-05, "loss": 0.4555, "step": 531 }, { "epoch": 0.04538474663026787, "grad_norm": 2.0485164670759537, "learning_rate": 9.993817354984486e-05, "loss": 0.4285, "step": 532 }, { "epoch": 0.045470056304384915, "grad_norm": 1.8557016969314828, "learning_rate": 9.993748482464868e-05, "loss": 0.4561, "step": 533 }, { "epoch": 0.04555536597850196, "grad_norm": 1.9225785743123498, "learning_rate": 9.993679228699091e-05, "loss": 0.4395, "step": 534 }, { "epoch": 0.045640675652619006, "grad_norm": 2.003022056469577, "learning_rate": 9.993609593692442e-05, "loss": 0.484, "step": 535 }, { "epoch": 0.04572598532673605, "grad_norm": 1.5764848098289237, "learning_rate": 9.993539577450237e-05, "loss": 0.3652, "step": 536 }, { "epoch": 0.0458112950008531, "grad_norm": 1.8432079746924845, "learning_rate": 9.993469179977821e-05, "loss": 0.4036, "step": 537 }, { "epoch": 0.04589660467497014, "grad_norm": 1.7259818779445988, "learning_rate": 9.993398401280567e-05, "loss": 0.4005, "step": 538 }, { "epoch": 0.04598191434908719, "grad_norm": 1.772212205989413, "learning_rate": 9.993327241363881e-05, "loss": 0.4143, "step": 539 }, { "epoch": 0.046067224023204234, "grad_norm": 1.863213687847433, "learning_rate": 9.993255700233194e-05, "loss": 0.3589, "step": 540 }, { "epoch": 0.04615253369732128, "grad_norm": 2.106266698572994, "learning_rate": 9.99318377789397e-05, "loss": 0.4102, "step": 541 }, { "epoch": 0.04623784337143832, "grad_norm": 1.7667209970769158, "learning_rate": 9.993111474351698e-05, "loss": 0.4535, "step": 542 }, { "epoch": 0.046323153045555364, "grad_norm": 1.883366673379803, "learning_rate": 9.993038789611897e-05, "loss": 0.4883, "step": 543 }, { "epoch": 0.04640846271967241, "grad_norm": 1.9041348588931537, "learning_rate": 9.992965723680117e-05, "loss": 0.406, "step": 544 }, { "epoch": 0.046493772393789455, "grad_norm": 1.897527217443621, "learning_rate": 9.992892276561938e-05, "loss": 0.4507, "step": 545 }, { "epoch": 0.0465790820679065, "grad_norm": 1.821830280468326, "learning_rate": 9.992818448262965e-05, "loss": 0.4342, "step": 546 }, { "epoch": 0.046664391742023546, "grad_norm": 1.9887043740669104, "learning_rate": 9.992744238788836e-05, "loss": 0.4759, "step": 547 }, { "epoch": 0.04674970141614059, "grad_norm": 1.9065217621744515, "learning_rate": 9.992669648145215e-05, "loss": 0.4771, "step": 548 }, { "epoch": 0.04683501109025764, "grad_norm": 1.7387867396166499, "learning_rate": 9.992594676337797e-05, "loss": 0.4246, "step": 549 }, { "epoch": 0.04692032076437468, "grad_norm": 1.6872756243698814, "learning_rate": 9.992519323372307e-05, "loss": 0.4017, "step": 550 }, { "epoch": 0.04700563043849172, "grad_norm": 1.6476389320154368, "learning_rate": 9.992443589254496e-05, "loss": 0.4187, "step": 551 }, { "epoch": 0.04709094011260877, "grad_norm": 1.6445618397819353, "learning_rate": 9.99236747399015e-05, "loss": 0.4144, "step": 552 }, { "epoch": 0.04717624978672581, "grad_norm": 1.980788887829494, "learning_rate": 9.992290977585072e-05, "loss": 0.4644, "step": 553 }, { "epoch": 0.04726155946084286, "grad_norm": 1.6482974892406739, "learning_rate": 9.99221410004511e-05, "loss": 0.4687, "step": 554 }, { "epoch": 0.047346869134959904, "grad_norm": 1.7027941741209258, "learning_rate": 9.99213684137613e-05, "loss": 0.4141, "step": 555 }, { "epoch": 0.04743217880907695, "grad_norm": 1.7904192847940812, "learning_rate": 9.99205920158403e-05, "loss": 0.4302, "step": 556 }, { "epoch": 0.047517488483193995, "grad_norm": 1.6913486673539364, "learning_rate": 9.991981180674737e-05, "loss": 0.4185, "step": 557 }, { "epoch": 0.04760279815731104, "grad_norm": 1.737653503223708, "learning_rate": 9.991902778654207e-05, "loss": 0.3884, "step": 558 }, { "epoch": 0.047688107831428087, "grad_norm": 1.6587591059189746, "learning_rate": 9.991823995528428e-05, "loss": 0.3838, "step": 559 }, { "epoch": 0.04777341750554513, "grad_norm": 1.9741398133233161, "learning_rate": 9.991744831303416e-05, "loss": 0.4409, "step": 560 }, { "epoch": 0.04785872717966217, "grad_norm": 1.5490685525283021, "learning_rate": 9.991665285985209e-05, "loss": 0.3878, "step": 561 }, { "epoch": 0.047944036853779216, "grad_norm": 1.6516440783719395, "learning_rate": 9.991585359579884e-05, "loss": 0.3828, "step": 562 }, { "epoch": 0.04802934652789626, "grad_norm": 2.068359605538821, "learning_rate": 9.991505052093541e-05, "loss": 0.4306, "step": 563 }, { "epoch": 0.04811465620201331, "grad_norm": 1.7979381567362094, "learning_rate": 9.991424363532314e-05, "loss": 0.4099, "step": 564 }, { "epoch": 0.04819996587613035, "grad_norm": 1.7529154062353483, "learning_rate": 9.991343293902361e-05, "loss": 0.4068, "step": 565 }, { "epoch": 0.0482852755502474, "grad_norm": 2.0502111605307616, "learning_rate": 9.991261843209872e-05, "loss": 0.4285, "step": 566 }, { "epoch": 0.048370585224364444, "grad_norm": 1.8357865941139286, "learning_rate": 9.991180011461063e-05, "loss": 0.41, "step": 567 }, { "epoch": 0.04845589489848149, "grad_norm": 1.4585383498178595, "learning_rate": 9.991097798662183e-05, "loss": 0.3725, "step": 568 }, { "epoch": 0.048541204572598536, "grad_norm": 1.9377198863617282, "learning_rate": 9.99101520481951e-05, "loss": 0.4036, "step": 569 }, { "epoch": 0.048626514246715574, "grad_norm": 1.9609964673420446, "learning_rate": 9.99093222993935e-05, "loss": 0.4594, "step": 570 }, { "epoch": 0.04871182392083262, "grad_norm": 1.92798338482848, "learning_rate": 9.990848874028032e-05, "loss": 0.4216, "step": 571 }, { "epoch": 0.048797133594949665, "grad_norm": 1.9774213892458463, "learning_rate": 9.990765137091927e-05, "loss": 0.4127, "step": 572 }, { "epoch": 0.04888244326906671, "grad_norm": 1.7822954556461847, "learning_rate": 9.990681019137424e-05, "loss": 0.4474, "step": 573 }, { "epoch": 0.04896775294318376, "grad_norm": 1.6003575700321564, "learning_rate": 9.990596520170945e-05, "loss": 0.4175, "step": 574 }, { "epoch": 0.0490530626173008, "grad_norm": 1.7745673659783288, "learning_rate": 9.99051164019894e-05, "loss": 0.4284, "step": 575 }, { "epoch": 0.04913837229141785, "grad_norm": 1.9852321905978498, "learning_rate": 9.990426379227894e-05, "loss": 0.4105, "step": 576 }, { "epoch": 0.04922368196553489, "grad_norm": 1.8535749870096034, "learning_rate": 9.990340737264311e-05, "loss": 0.4431, "step": 577 }, { "epoch": 0.04930899163965194, "grad_norm": 1.6749429095203776, "learning_rate": 9.990254714314732e-05, "loss": 0.4179, "step": 578 }, { "epoch": 0.049394301313768985, "grad_norm": 2.265222447905475, "learning_rate": 9.990168310385726e-05, "loss": 0.4605, "step": 579 }, { "epoch": 0.04947961098788602, "grad_norm": 1.8783480951270148, "learning_rate": 9.990081525483885e-05, "loss": 0.4486, "step": 580 }, { "epoch": 0.04956492066200307, "grad_norm": 2.0766327742029986, "learning_rate": 9.989994359615836e-05, "loss": 0.4747, "step": 581 }, { "epoch": 0.049650230336120114, "grad_norm": 1.7237823335023126, "learning_rate": 9.989906812788235e-05, "loss": 0.478, "step": 582 }, { "epoch": 0.04973554001023716, "grad_norm": 1.5445648822586222, "learning_rate": 9.989818885007766e-05, "loss": 0.453, "step": 583 }, { "epoch": 0.049820849684354206, "grad_norm": 1.9257722742208105, "learning_rate": 9.98973057628114e-05, "loss": 0.4403, "step": 584 }, { "epoch": 0.04990615935847125, "grad_norm": 1.8812706676881346, "learning_rate": 9.989641886615101e-05, "loss": 0.4751, "step": 585 }, { "epoch": 0.0499914690325883, "grad_norm": 1.6575149887580998, "learning_rate": 9.989552816016418e-05, "loss": 0.4307, "step": 586 }, { "epoch": 0.05007677870670534, "grad_norm": 1.6603679925859307, "learning_rate": 9.989463364491893e-05, "loss": 0.3732, "step": 587 }, { "epoch": 0.05016208838082239, "grad_norm": 1.5386976400185586, "learning_rate": 9.989373532048353e-05, "loss": 0.3842, "step": 588 }, { "epoch": 0.05024739805493943, "grad_norm": 1.7476727133168597, "learning_rate": 9.989283318692657e-05, "loss": 0.459, "step": 589 }, { "epoch": 0.05033270772905647, "grad_norm": 1.4539096313634023, "learning_rate": 9.989192724431694e-05, "loss": 0.3551, "step": 590 }, { "epoch": 0.05041801740317352, "grad_norm": 1.8311567679878624, "learning_rate": 9.989101749272378e-05, "loss": 0.4704, "step": 591 }, { "epoch": 0.050503327077290563, "grad_norm": 1.674485921969712, "learning_rate": 9.989010393221656e-05, "loss": 0.4231, "step": 592 }, { "epoch": 0.05058863675140761, "grad_norm": 1.5810920847569117, "learning_rate": 9.988918656286503e-05, "loss": 0.4378, "step": 593 }, { "epoch": 0.050673946425524655, "grad_norm": 1.788070203627527, "learning_rate": 9.98882653847392e-05, "loss": 0.4168, "step": 594 }, { "epoch": 0.0507592560996417, "grad_norm": 1.3916835399373817, "learning_rate": 9.988734039790942e-05, "loss": 0.416, "step": 595 }, { "epoch": 0.050844565773758746, "grad_norm": 1.6962849161803455, "learning_rate": 9.98864116024463e-05, "loss": 0.4042, "step": 596 }, { "epoch": 0.05092987544787579, "grad_norm": 1.7238480301189036, "learning_rate": 9.988547899842076e-05, "loss": 0.426, "step": 597 }, { "epoch": 0.05101518512199284, "grad_norm": 1.7432537835803728, "learning_rate": 9.988454258590398e-05, "loss": 0.4885, "step": 598 }, { "epoch": 0.051100494796109876, "grad_norm": 1.580743863971221, "learning_rate": 9.988360236496745e-05, "loss": 0.4259, "step": 599 }, { "epoch": 0.05118580447022692, "grad_norm": 2.032638896592715, "learning_rate": 9.988265833568298e-05, "loss": 0.4687, "step": 600 }, { "epoch": 0.05127111414434397, "grad_norm": 1.7424026151176844, "learning_rate": 9.98817104981226e-05, "loss": 0.3844, "step": 601 }, { "epoch": 0.05135642381846101, "grad_norm": 1.6243691320192233, "learning_rate": 9.988075885235873e-05, "loss": 0.3535, "step": 602 }, { "epoch": 0.05144173349257806, "grad_norm": 1.9235100962545184, "learning_rate": 9.987980339846395e-05, "loss": 0.452, "step": 603 }, { "epoch": 0.051527043166695104, "grad_norm": 1.9567644314275794, "learning_rate": 9.987884413651127e-05, "loss": 0.4421, "step": 604 }, { "epoch": 0.05161235284081215, "grad_norm": 1.8302624588245187, "learning_rate": 9.987788106657387e-05, "loss": 0.4096, "step": 605 }, { "epoch": 0.051697662514929195, "grad_norm": 2.018242253086773, "learning_rate": 9.987691418872532e-05, "loss": 0.4417, "step": 606 }, { "epoch": 0.05178297218904624, "grad_norm": 1.8376379700847743, "learning_rate": 9.987594350303941e-05, "loss": 0.43, "step": 607 }, { "epoch": 0.051868281863163286, "grad_norm": 1.9614028682419764, "learning_rate": 9.987496900959026e-05, "loss": 0.4157, "step": 608 }, { "epoch": 0.051953591537280325, "grad_norm": 1.8620728226873853, "learning_rate": 9.987399070845226e-05, "loss": 0.4577, "step": 609 }, { "epoch": 0.05203890121139737, "grad_norm": 1.423864879578551, "learning_rate": 9.98730085997001e-05, "loss": 0.4333, "step": 610 }, { "epoch": 0.052124210885514416, "grad_norm": 1.727476404583737, "learning_rate": 9.987202268340876e-05, "loss": 0.4493, "step": 611 }, { "epoch": 0.05220952055963146, "grad_norm": 1.7847592770118559, "learning_rate": 9.98710329596535e-05, "loss": 0.4631, "step": 612 }, { "epoch": 0.05229483023374851, "grad_norm": 1.8576834541592206, "learning_rate": 9.987003942850989e-05, "loss": 0.461, "step": 613 }, { "epoch": 0.05238013990786555, "grad_norm": 1.8219587879314543, "learning_rate": 9.986904209005378e-05, "loss": 0.433, "step": 614 }, { "epoch": 0.0524654495819826, "grad_norm": 1.9570009149267584, "learning_rate": 9.98680409443613e-05, "loss": 0.3849, "step": 615 }, { "epoch": 0.052550759256099644, "grad_norm": 1.8562457254790852, "learning_rate": 9.986703599150891e-05, "loss": 0.4703, "step": 616 }, { "epoch": 0.05263606893021669, "grad_norm": 1.426696650703955, "learning_rate": 9.986602723157332e-05, "loss": 0.4023, "step": 617 }, { "epoch": 0.05272137860433373, "grad_norm": 1.8172716929775283, "learning_rate": 9.986501466463152e-05, "loss": 0.37, "step": 618 }, { "epoch": 0.052806688278450774, "grad_norm": 2.1353723164736773, "learning_rate": 9.986399829076084e-05, "loss": 0.4564, "step": 619 }, { "epoch": 0.05289199795256782, "grad_norm": 1.5738562836332872, "learning_rate": 9.986297811003886e-05, "loss": 0.3926, "step": 620 }, { "epoch": 0.052977307626684865, "grad_norm": 1.9655220574864012, "learning_rate": 9.986195412254349e-05, "loss": 0.4247, "step": 621 }, { "epoch": 0.05306261730080191, "grad_norm": 1.7257172599868804, "learning_rate": 9.986092632835286e-05, "loss": 0.4088, "step": 622 }, { "epoch": 0.053147926974918956, "grad_norm": 1.9636922385634132, "learning_rate": 9.985989472754549e-05, "loss": 0.438, "step": 623 }, { "epoch": 0.053233236649036, "grad_norm": 1.5309201099274732, "learning_rate": 9.985885932020011e-05, "loss": 0.4287, "step": 624 }, { "epoch": 0.05331854632315305, "grad_norm": 1.622793900919288, "learning_rate": 9.985782010639577e-05, "loss": 0.4248, "step": 625 }, { "epoch": 0.05340385599727009, "grad_norm": 1.4829005899582366, "learning_rate": 9.98567770862118e-05, "loss": 0.404, "step": 626 }, { "epoch": 0.05348916567138714, "grad_norm": 1.6558352976868793, "learning_rate": 9.985573025972785e-05, "loss": 0.426, "step": 627 }, { "epoch": 0.05357447534550418, "grad_norm": 1.880196491179506, "learning_rate": 9.985467962702382e-05, "loss": 0.4172, "step": 628 }, { "epoch": 0.05365978501962122, "grad_norm": 1.4922713011532882, "learning_rate": 9.985362518817993e-05, "loss": 0.3653, "step": 629 }, { "epoch": 0.05374509469373827, "grad_norm": 1.7043214847345076, "learning_rate": 9.985256694327669e-05, "loss": 0.3725, "step": 630 }, { "epoch": 0.053830404367855314, "grad_norm": 1.8786667256735436, "learning_rate": 9.985150489239486e-05, "loss": 0.4031, "step": 631 }, { "epoch": 0.05391571404197236, "grad_norm": 2.037920986751495, "learning_rate": 9.985043903561555e-05, "loss": 0.4325, "step": 632 }, { "epoch": 0.054001023716089405, "grad_norm": 1.6646395117200277, "learning_rate": 9.984936937302013e-05, "loss": 0.4986, "step": 633 }, { "epoch": 0.05408633339020645, "grad_norm": 1.9408354812463864, "learning_rate": 9.984829590469025e-05, "loss": 0.4124, "step": 634 }, { "epoch": 0.054171643064323496, "grad_norm": 1.9568076853107614, "learning_rate": 9.984721863070788e-05, "loss": 0.4499, "step": 635 }, { "epoch": 0.05425695273844054, "grad_norm": 1.8127518840585877, "learning_rate": 9.984613755115525e-05, "loss": 0.3865, "step": 636 }, { "epoch": 0.05434226241255758, "grad_norm": 1.9867568252041108, "learning_rate": 9.984505266611491e-05, "loss": 0.4375, "step": 637 }, { "epoch": 0.054427572086674626, "grad_norm": 1.4679024056571948, "learning_rate": 9.984396397566965e-05, "loss": 0.4237, "step": 638 }, { "epoch": 0.05451288176079167, "grad_norm": 1.686093839479765, "learning_rate": 9.984287147990263e-05, "loss": 0.3952, "step": 639 }, { "epoch": 0.05459819143490872, "grad_norm": 1.565691625582272, "learning_rate": 9.984177517889724e-05, "loss": 0.406, "step": 640 }, { "epoch": 0.05468350110902576, "grad_norm": 1.7322620890147777, "learning_rate": 9.984067507273715e-05, "loss": 0.4011, "step": 641 }, { "epoch": 0.05476881078314281, "grad_norm": 1.5636645745500795, "learning_rate": 9.98395711615064e-05, "loss": 0.3824, "step": 642 }, { "epoch": 0.054854120457259854, "grad_norm": 1.8386579770336298, "learning_rate": 9.983846344528923e-05, "loss": 0.4574, "step": 643 }, { "epoch": 0.0549394301313769, "grad_norm": 1.9048548110942412, "learning_rate": 9.983735192417021e-05, "loss": 0.4624, "step": 644 }, { "epoch": 0.055024739805493945, "grad_norm": 2.044961631993242, "learning_rate": 9.983623659823422e-05, "loss": 0.4143, "step": 645 }, { "epoch": 0.05511004947961099, "grad_norm": 1.8062537856540806, "learning_rate": 9.983511746756638e-05, "loss": 0.437, "step": 646 }, { "epoch": 0.05519535915372803, "grad_norm": 1.5146183721729956, "learning_rate": 9.983399453225216e-05, "loss": 0.4035, "step": 647 }, { "epoch": 0.055280668827845075, "grad_norm": 1.9668894862443678, "learning_rate": 9.983286779237727e-05, "loss": 0.4834, "step": 648 }, { "epoch": 0.05536597850196212, "grad_norm": 1.861829596071821, "learning_rate": 9.983173724802772e-05, "loss": 0.4225, "step": 649 }, { "epoch": 0.055451288176079166, "grad_norm": 1.8725588483947024, "learning_rate": 9.983060289928984e-05, "loss": 0.4628, "step": 650 }, { "epoch": 0.05553659785019621, "grad_norm": 1.754375437487421, "learning_rate": 9.982946474625024e-05, "loss": 0.4096, "step": 651 }, { "epoch": 0.05562190752431326, "grad_norm": 1.7151826771238572, "learning_rate": 9.982832278899582e-05, "loss": 0.4226, "step": 652 }, { "epoch": 0.0557072171984303, "grad_norm": 1.7291879154722833, "learning_rate": 9.982717702761371e-05, "loss": 0.4568, "step": 653 }, { "epoch": 0.05579252687254735, "grad_norm": 1.5367950156776902, "learning_rate": 9.982602746219142e-05, "loss": 0.4073, "step": 654 }, { "epoch": 0.055877836546664394, "grad_norm": 1.7327844698318313, "learning_rate": 9.982487409281671e-05, "loss": 0.4243, "step": 655 }, { "epoch": 0.05596314622078144, "grad_norm": 1.60194943103676, "learning_rate": 9.982371691957764e-05, "loss": 0.4268, "step": 656 }, { "epoch": 0.05604845589489848, "grad_norm": 1.4197391775517751, "learning_rate": 9.982255594256253e-05, "loss": 0.4085, "step": 657 }, { "epoch": 0.056133765569015524, "grad_norm": 1.8696801055807888, "learning_rate": 9.982139116186004e-05, "loss": 0.4509, "step": 658 }, { "epoch": 0.05621907524313257, "grad_norm": 1.760451171657228, "learning_rate": 9.98202225775591e-05, "loss": 0.3803, "step": 659 }, { "epoch": 0.056304384917249616, "grad_norm": 1.7266064806553745, "learning_rate": 9.981905018974888e-05, "loss": 0.4099, "step": 660 }, { "epoch": 0.05638969459136666, "grad_norm": 2.029331063798562, "learning_rate": 9.981787399851894e-05, "loss": 0.5058, "step": 661 }, { "epoch": 0.05647500426548371, "grad_norm": 1.9639903465740756, "learning_rate": 9.981669400395906e-05, "loss": 0.4188, "step": 662 }, { "epoch": 0.05656031393960075, "grad_norm": 1.5458439945699876, "learning_rate": 9.98155102061593e-05, "loss": 0.4375, "step": 663 }, { "epoch": 0.0566456236137178, "grad_norm": 1.6156104317751672, "learning_rate": 9.981432260521006e-05, "loss": 0.4734, "step": 664 }, { "epoch": 0.056730933287834844, "grad_norm": 1.9446740998780048, "learning_rate": 9.981313120120199e-05, "loss": 0.376, "step": 665 }, { "epoch": 0.05681624296195188, "grad_norm": 1.580613544221983, "learning_rate": 9.981193599422608e-05, "loss": 0.4064, "step": 666 }, { "epoch": 0.05690155263606893, "grad_norm": 1.9939371840283002, "learning_rate": 9.981073698437355e-05, "loss": 0.4597, "step": 667 }, { "epoch": 0.05698686231018597, "grad_norm": 1.627673077968262, "learning_rate": 9.980953417173594e-05, "loss": 0.392, "step": 668 }, { "epoch": 0.05707217198430302, "grad_norm": 1.811943560721299, "learning_rate": 9.980832755640509e-05, "loss": 0.421, "step": 669 }, { "epoch": 0.057157481658420065, "grad_norm": 1.508057016104302, "learning_rate": 9.98071171384731e-05, "loss": 0.3473, "step": 670 }, { "epoch": 0.05724279133253711, "grad_norm": 1.706799663835291, "learning_rate": 9.980590291803241e-05, "loss": 0.4089, "step": 671 }, { "epoch": 0.057328101006654156, "grad_norm": 1.9110688757788352, "learning_rate": 9.98046848951757e-05, "loss": 0.4346, "step": 672 }, { "epoch": 0.0574134106807712, "grad_norm": 1.5839432829807862, "learning_rate": 9.980346306999596e-05, "loss": 0.417, "step": 673 }, { "epoch": 0.05749872035488825, "grad_norm": 2.150819103435792, "learning_rate": 9.980223744258644e-05, "loss": 0.4435, "step": 674 }, { "epoch": 0.05758403002900529, "grad_norm": 1.8969270953545716, "learning_rate": 9.980100801304077e-05, "loss": 0.4885, "step": 675 }, { "epoch": 0.05766933970312233, "grad_norm": 1.4218275460509366, "learning_rate": 9.979977478145276e-05, "loss": 0.3559, "step": 676 }, { "epoch": 0.05775464937723938, "grad_norm": 1.771348033230026, "learning_rate": 9.97985377479166e-05, "loss": 0.4363, "step": 677 }, { "epoch": 0.05783995905135642, "grad_norm": 1.5391672941054115, "learning_rate": 9.97972969125267e-05, "loss": 0.3595, "step": 678 }, { "epoch": 0.05792526872547347, "grad_norm": 2.0226092318731266, "learning_rate": 9.979605227537781e-05, "loss": 0.4473, "step": 679 }, { "epoch": 0.058010578399590514, "grad_norm": 1.5755657935755187, "learning_rate": 9.979480383656494e-05, "loss": 0.3948, "step": 680 }, { "epoch": 0.05809588807370756, "grad_norm": 1.7941827258737757, "learning_rate": 9.979355159618343e-05, "loss": 0.4114, "step": 681 }, { "epoch": 0.058181197747824605, "grad_norm": 1.983599293397938, "learning_rate": 9.979229555432882e-05, "loss": 0.3877, "step": 682 }, { "epoch": 0.05826650742194165, "grad_norm": 1.8565561421004084, "learning_rate": 9.979103571109706e-05, "loss": 0.3932, "step": 683 }, { "epoch": 0.058351817096058696, "grad_norm": 1.5969606858050187, "learning_rate": 9.97897720665843e-05, "loss": 0.4063, "step": 684 }, { "epoch": 0.058437126770175735, "grad_norm": 1.5477181650916576, "learning_rate": 9.978850462088704e-05, "loss": 0.3773, "step": 685 }, { "epoch": 0.05852243644429278, "grad_norm": 1.9571922982169399, "learning_rate": 9.978723337410202e-05, "loss": 0.4324, "step": 686 }, { "epoch": 0.058607746118409826, "grad_norm": 1.4798391810239806, "learning_rate": 9.978595832632632e-05, "loss": 0.402, "step": 687 }, { "epoch": 0.05869305579252687, "grad_norm": 1.7406158200472204, "learning_rate": 9.978467947765724e-05, "loss": 0.4159, "step": 688 }, { "epoch": 0.05877836546664392, "grad_norm": 1.7907633463356754, "learning_rate": 9.978339682819246e-05, "loss": 0.3977, "step": 689 }, { "epoch": 0.05886367514076096, "grad_norm": 1.9310916625856633, "learning_rate": 9.978211037802986e-05, "loss": 0.5006, "step": 690 }, { "epoch": 0.05894898481487801, "grad_norm": 1.869786197863332, "learning_rate": 9.978082012726768e-05, "loss": 0.4278, "step": 691 }, { "epoch": 0.059034294488995054, "grad_norm": 1.4471769936381407, "learning_rate": 9.977952607600442e-05, "loss": 0.4572, "step": 692 }, { "epoch": 0.0591196041631121, "grad_norm": 1.767667820799123, "learning_rate": 9.977822822433886e-05, "loss": 0.407, "step": 693 }, { "epoch": 0.059204913837229145, "grad_norm": 1.658400237428728, "learning_rate": 9.977692657237013e-05, "loss": 0.4301, "step": 694 }, { "epoch": 0.059290223511346184, "grad_norm": 1.9816144830598752, "learning_rate": 9.977562112019754e-05, "loss": 0.4262, "step": 695 }, { "epoch": 0.05937553318546323, "grad_norm": 1.6016108807955403, "learning_rate": 9.97743118679208e-05, "loss": 0.4183, "step": 696 }, { "epoch": 0.059460842859580275, "grad_norm": 1.8153864140762759, "learning_rate": 9.977299881563984e-05, "loss": 0.3544, "step": 697 }, { "epoch": 0.05954615253369732, "grad_norm": 1.5616597777506123, "learning_rate": 9.977168196345492e-05, "loss": 0.4283, "step": 698 }, { "epoch": 0.059631462207814366, "grad_norm": 1.7108400918065563, "learning_rate": 9.977036131146656e-05, "loss": 0.3845, "step": 699 }, { "epoch": 0.05971677188193141, "grad_norm": 1.740349728307172, "learning_rate": 9.97690368597756e-05, "loss": 0.3917, "step": 700 }, { "epoch": 0.05980208155604846, "grad_norm": 1.6789937560582797, "learning_rate": 9.976770860848315e-05, "loss": 0.4141, "step": 701 }, { "epoch": 0.0598873912301655, "grad_norm": 1.9257531463862783, "learning_rate": 9.976637655769061e-05, "loss": 0.5062, "step": 702 }, { "epoch": 0.05997270090428255, "grad_norm": 1.4936235318455626, "learning_rate": 9.976504070749969e-05, "loss": 0.4378, "step": 703 }, { "epoch": 0.06005801057839959, "grad_norm": 1.741272827349332, "learning_rate": 9.976370105801234e-05, "loss": 0.4518, "step": 704 }, { "epoch": 0.06014332025251663, "grad_norm": 1.8145108085472286, "learning_rate": 9.976235760933086e-05, "loss": 0.4071, "step": 705 }, { "epoch": 0.06022862992663368, "grad_norm": 1.658808549366544, "learning_rate": 9.976101036155783e-05, "loss": 0.4207, "step": 706 }, { "epoch": 0.060313939600750724, "grad_norm": 1.653848238194744, "learning_rate": 9.975965931479607e-05, "loss": 0.3834, "step": 707 }, { "epoch": 0.06039924927486777, "grad_norm": 1.796642056168611, "learning_rate": 9.975830446914876e-05, "loss": 0.407, "step": 708 }, { "epoch": 0.060484558948984815, "grad_norm": 1.8764388603498008, "learning_rate": 9.975694582471932e-05, "loss": 0.3788, "step": 709 }, { "epoch": 0.06056986862310186, "grad_norm": 1.713628594726069, "learning_rate": 9.975558338161146e-05, "loss": 0.4424, "step": 710 }, { "epoch": 0.060655178297218906, "grad_norm": 1.5238020240721182, "learning_rate": 9.975421713992923e-05, "loss": 0.3971, "step": 711 }, { "epoch": 0.06074048797133595, "grad_norm": 1.4240721609873204, "learning_rate": 9.97528470997769e-05, "loss": 0.3728, "step": 712 }, { "epoch": 0.060825797645453, "grad_norm": 1.671246990009984, "learning_rate": 9.975147326125908e-05, "loss": 0.3764, "step": 713 }, { "epoch": 0.060911107319570036, "grad_norm": 1.6296077836784801, "learning_rate": 9.975009562448066e-05, "loss": 0.4083, "step": 714 }, { "epoch": 0.06099641699368708, "grad_norm": 1.7082977136139423, "learning_rate": 9.974871418954681e-05, "loss": 0.3684, "step": 715 }, { "epoch": 0.06108172666780413, "grad_norm": 1.9369928249921435, "learning_rate": 9.9747328956563e-05, "loss": 0.4308, "step": 716 }, { "epoch": 0.06116703634192117, "grad_norm": 2.2116112642839973, "learning_rate": 9.974593992563498e-05, "loss": 0.4336, "step": 717 }, { "epoch": 0.06125234601603822, "grad_norm": 1.9965355789105876, "learning_rate": 9.974454709686878e-05, "loss": 0.4791, "step": 718 }, { "epoch": 0.061337655690155264, "grad_norm": 1.8203665270994214, "learning_rate": 9.974315047037077e-05, "loss": 0.4443, "step": 719 }, { "epoch": 0.06142296536427231, "grad_norm": 1.7504232439757355, "learning_rate": 9.974175004624756e-05, "loss": 0.4516, "step": 720 }, { "epoch": 0.061508275038389355, "grad_norm": 1.9204218262978112, "learning_rate": 9.974034582460606e-05, "loss": 0.3909, "step": 721 }, { "epoch": 0.0615935847125064, "grad_norm": 1.8106615674214386, "learning_rate": 9.973893780555346e-05, "loss": 0.4174, "step": 722 }, { "epoch": 0.06167889438662345, "grad_norm": 1.6481255692678836, "learning_rate": 9.973752598919728e-05, "loss": 0.4045, "step": 723 }, { "epoch": 0.061764204060740485, "grad_norm": 1.8095463989456402, "learning_rate": 9.973611037564529e-05, "loss": 0.4089, "step": 724 }, { "epoch": 0.06184951373485753, "grad_norm": 1.4843783729916447, "learning_rate": 9.973469096500558e-05, "loss": 0.4075, "step": 725 }, { "epoch": 0.061934823408974576, "grad_norm": 1.483033788989333, "learning_rate": 9.97332677573865e-05, "loss": 0.3781, "step": 726 }, { "epoch": 0.06202013308309162, "grad_norm": 1.6702006860175476, "learning_rate": 9.973184075289672e-05, "loss": 0.38, "step": 727 }, { "epoch": 0.06210544275720867, "grad_norm": 1.8546676673246565, "learning_rate": 9.973040995164515e-05, "loss": 0.4095, "step": 728 }, { "epoch": 0.06219075243132571, "grad_norm": 2.0802081000325225, "learning_rate": 9.972897535374106e-05, "loss": 0.4388, "step": 729 }, { "epoch": 0.06227606210544276, "grad_norm": 2.0643990761314117, "learning_rate": 9.972753695929397e-05, "loss": 0.4426, "step": 730 }, { "epoch": 0.062361371779559804, "grad_norm": 1.5342278621693195, "learning_rate": 9.972609476841367e-05, "loss": 0.4353, "step": 731 }, { "epoch": 0.06244668145367685, "grad_norm": 1.5758553982539307, "learning_rate": 9.972464878121028e-05, "loss": 0.3547, "step": 732 }, { "epoch": 0.0625319911277939, "grad_norm": 1.3762799721069183, "learning_rate": 9.972319899779422e-05, "loss": 0.3908, "step": 733 }, { "epoch": 0.06261730080191094, "grad_norm": 1.6035432801550966, "learning_rate": 9.97217454182761e-05, "loss": 0.3907, "step": 734 }, { "epoch": 0.06270261047602799, "grad_norm": 1.7161712721780478, "learning_rate": 9.972028804276697e-05, "loss": 0.4628, "step": 735 }, { "epoch": 0.06278792015014503, "grad_norm": 1.7743157048276244, "learning_rate": 9.971882687137805e-05, "loss": 0.4329, "step": 736 }, { "epoch": 0.06287322982426208, "grad_norm": 1.6373236000618754, "learning_rate": 9.97173619042209e-05, "loss": 0.4325, "step": 737 }, { "epoch": 0.06295853949837911, "grad_norm": 1.8875907320358087, "learning_rate": 9.971589314140738e-05, "loss": 0.3883, "step": 738 }, { "epoch": 0.06304384917249616, "grad_norm": 1.9402580319692888, "learning_rate": 9.97144205830496e-05, "loss": 0.4356, "step": 739 }, { "epoch": 0.0631291588466132, "grad_norm": 1.829180551300995, "learning_rate": 9.971294422925999e-05, "loss": 0.4227, "step": 740 }, { "epoch": 0.06321446852073025, "grad_norm": 1.5307310353782888, "learning_rate": 9.971146408015126e-05, "loss": 0.3852, "step": 741 }, { "epoch": 0.06329977819484729, "grad_norm": 1.7781374992910104, "learning_rate": 9.970998013583643e-05, "loss": 0.4087, "step": 742 }, { "epoch": 0.06338508786896434, "grad_norm": 1.6321747186763973, "learning_rate": 9.970849239642875e-05, "loss": 0.369, "step": 743 }, { "epoch": 0.06347039754308138, "grad_norm": 1.7753962652034996, "learning_rate": 9.970700086204184e-05, "loss": 0.4346, "step": 744 }, { "epoch": 0.06355570721719843, "grad_norm": 1.5127150138424117, "learning_rate": 9.970550553278956e-05, "loss": 0.4206, "step": 745 }, { "epoch": 0.06364101689131547, "grad_norm": 1.7462342480964155, "learning_rate": 9.970400640878605e-05, "loss": 0.4002, "step": 746 }, { "epoch": 0.06372632656543252, "grad_norm": 1.9813579541948918, "learning_rate": 9.97025034901458e-05, "loss": 0.3988, "step": 747 }, { "epoch": 0.06381163623954957, "grad_norm": 1.7700899096999652, "learning_rate": 9.970099677698353e-05, "loss": 0.3815, "step": 748 }, { "epoch": 0.06389694591366661, "grad_norm": 1.828453099730764, "learning_rate": 9.969948626941426e-05, "loss": 0.4163, "step": 749 }, { "epoch": 0.06398225558778366, "grad_norm": 1.7354645614456705, "learning_rate": 9.969797196755331e-05, "loss": 0.4387, "step": 750 }, { "epoch": 0.0640675652619007, "grad_norm": 1.9481513210130315, "learning_rate": 9.969645387151629e-05, "loss": 0.431, "step": 751 }, { "epoch": 0.06415287493601775, "grad_norm": 1.972405566915169, "learning_rate": 9.96949319814191e-05, "loss": 0.3986, "step": 752 }, { "epoch": 0.0642381846101348, "grad_norm": 1.5148859483449306, "learning_rate": 9.969340629737794e-05, "loss": 0.4384, "step": 753 }, { "epoch": 0.06432349428425184, "grad_norm": 1.7965166480941033, "learning_rate": 9.969187681950928e-05, "loss": 0.4262, "step": 754 }, { "epoch": 0.06440880395836888, "grad_norm": 1.7650395325726247, "learning_rate": 9.96903435479299e-05, "loss": 0.4456, "step": 755 }, { "epoch": 0.06449411363248593, "grad_norm": 1.672672901311217, "learning_rate": 9.968880648275682e-05, "loss": 0.3911, "step": 756 }, { "epoch": 0.06457942330660296, "grad_norm": 2.093351525492899, "learning_rate": 9.968726562410744e-05, "loss": 0.4478, "step": 757 }, { "epoch": 0.06466473298072001, "grad_norm": 1.5839073077737664, "learning_rate": 9.968572097209934e-05, "loss": 0.4166, "step": 758 }, { "epoch": 0.06475004265483705, "grad_norm": 2.127672198237853, "learning_rate": 9.968417252685049e-05, "loss": 0.4647, "step": 759 }, { "epoch": 0.0648353523289541, "grad_norm": 1.8028922791929458, "learning_rate": 9.968262028847908e-05, "loss": 0.4237, "step": 760 }, { "epoch": 0.06492066200307114, "grad_norm": 1.547818599313833, "learning_rate": 9.968106425710364e-05, "loss": 0.3278, "step": 761 }, { "epoch": 0.06500597167718819, "grad_norm": 2.002709460787104, "learning_rate": 9.967950443284293e-05, "loss": 0.4704, "step": 762 }, { "epoch": 0.06509128135130524, "grad_norm": 1.6936164493625083, "learning_rate": 9.967794081581606e-05, "loss": 0.4133, "step": 763 }, { "epoch": 0.06517659102542228, "grad_norm": 1.3878576625320678, "learning_rate": 9.96763734061424e-05, "loss": 0.3611, "step": 764 }, { "epoch": 0.06526190069953933, "grad_norm": 1.6748449737774969, "learning_rate": 9.96748022039416e-05, "loss": 0.3931, "step": 765 }, { "epoch": 0.06534721037365637, "grad_norm": 1.4758210111707109, "learning_rate": 9.967322720933363e-05, "loss": 0.4096, "step": 766 }, { "epoch": 0.06543252004777342, "grad_norm": 1.9596742003057535, "learning_rate": 9.967164842243872e-05, "loss": 0.4294, "step": 767 }, { "epoch": 0.06551782972189046, "grad_norm": 1.8062267262368437, "learning_rate": 9.967006584337741e-05, "loss": 0.407, "step": 768 }, { "epoch": 0.06560313939600751, "grad_norm": 2.028697361289656, "learning_rate": 9.966847947227054e-05, "loss": 0.49, "step": 769 }, { "epoch": 0.06568844907012455, "grad_norm": 1.5384473781667618, "learning_rate": 9.966688930923917e-05, "loss": 0.3758, "step": 770 }, { "epoch": 0.0657737587442416, "grad_norm": 1.6759639186935664, "learning_rate": 9.966529535440475e-05, "loss": 0.426, "step": 771 }, { "epoch": 0.06585906841835865, "grad_norm": 1.900145653110966, "learning_rate": 9.966369760788895e-05, "loss": 0.4584, "step": 772 }, { "epoch": 0.06594437809247569, "grad_norm": 1.69237013348179, "learning_rate": 9.966209606981373e-05, "loss": 0.4171, "step": 773 }, { "epoch": 0.06602968776659274, "grad_norm": 1.8458526389301464, "learning_rate": 9.966049074030141e-05, "loss": 0.4165, "step": 774 }, { "epoch": 0.06611499744070978, "grad_norm": 1.672360643816351, "learning_rate": 9.96588816194745e-05, "loss": 0.3889, "step": 775 }, { "epoch": 0.06620030711482681, "grad_norm": 2.1442795346923385, "learning_rate": 9.965726870745586e-05, "loss": 0.4473, "step": 776 }, { "epoch": 0.06628561678894386, "grad_norm": 1.5987477200083995, "learning_rate": 9.965565200436865e-05, "loss": 0.438, "step": 777 }, { "epoch": 0.0663709264630609, "grad_norm": 1.5985370623623631, "learning_rate": 9.965403151033628e-05, "loss": 0.4019, "step": 778 }, { "epoch": 0.06645623613717795, "grad_norm": 1.621407719736719, "learning_rate": 9.965240722548245e-05, "loss": 0.4483, "step": 779 }, { "epoch": 0.066541545811295, "grad_norm": 1.3318707769390519, "learning_rate": 9.965077914993119e-05, "loss": 0.4301, "step": 780 }, { "epoch": 0.06662685548541204, "grad_norm": 1.4576918508112018, "learning_rate": 9.964914728380677e-05, "loss": 0.3954, "step": 781 }, { "epoch": 0.06671216515952909, "grad_norm": 1.5354488181375885, "learning_rate": 9.96475116272338e-05, "loss": 0.3983, "step": 782 }, { "epoch": 0.06679747483364613, "grad_norm": 1.768767674226376, "learning_rate": 9.964587218033715e-05, "loss": 0.4189, "step": 783 }, { "epoch": 0.06688278450776318, "grad_norm": 1.5955260609285875, "learning_rate": 9.964422894324197e-05, "loss": 0.3875, "step": 784 }, { "epoch": 0.06696809418188023, "grad_norm": 1.570512218016105, "learning_rate": 9.964258191607372e-05, "loss": 0.3878, "step": 785 }, { "epoch": 0.06705340385599727, "grad_norm": 1.5519721313511008, "learning_rate": 9.964093109895816e-05, "loss": 0.4308, "step": 786 }, { "epoch": 0.06713871353011432, "grad_norm": 1.4750880392923433, "learning_rate": 9.963927649202127e-05, "loss": 0.3928, "step": 787 }, { "epoch": 0.06722402320423136, "grad_norm": 1.5314689012658513, "learning_rate": 9.963761809538943e-05, "loss": 0.4001, "step": 788 }, { "epoch": 0.06730933287834841, "grad_norm": 1.5760632634705454, "learning_rate": 9.963595590918921e-05, "loss": 0.4013, "step": 789 }, { "epoch": 0.06739464255246545, "grad_norm": 1.7537355743489207, "learning_rate": 9.963428993354751e-05, "loss": 0.4113, "step": 790 }, { "epoch": 0.0674799522265825, "grad_norm": 1.8104378380395372, "learning_rate": 9.963262016859154e-05, "loss": 0.4171, "step": 791 }, { "epoch": 0.06756526190069954, "grad_norm": 1.3351155033222046, "learning_rate": 9.963094661444878e-05, "loss": 0.3501, "step": 792 }, { "epoch": 0.06765057157481659, "grad_norm": 1.5681311796453534, "learning_rate": 9.962926927124697e-05, "loss": 0.4002, "step": 793 }, { "epoch": 0.06773588124893364, "grad_norm": 1.7074925129001612, "learning_rate": 9.962758813911419e-05, "loss": 0.4493, "step": 794 }, { "epoch": 0.06782119092305067, "grad_norm": 1.5223582822240553, "learning_rate": 9.962590321817878e-05, "loss": 0.3961, "step": 795 }, { "epoch": 0.06790650059716771, "grad_norm": 2.066416287893566, "learning_rate": 9.962421450856936e-05, "loss": 0.5521, "step": 796 }, { "epoch": 0.06799181027128476, "grad_norm": 1.6601151764220508, "learning_rate": 9.962252201041486e-05, "loss": 0.4907, "step": 797 }, { "epoch": 0.0680771199454018, "grad_norm": 1.6354872410928762, "learning_rate": 9.96208257238445e-05, "loss": 0.4299, "step": 798 }, { "epoch": 0.06816242961951885, "grad_norm": 1.7317840193244083, "learning_rate": 9.961912564898779e-05, "loss": 0.3806, "step": 799 }, { "epoch": 0.0682477392936359, "grad_norm": 1.533140378020366, "learning_rate": 9.96174217859745e-05, "loss": 0.3975, "step": 800 }, { "epoch": 0.06833304896775294, "grad_norm": 1.6066696984247963, "learning_rate": 9.961571413493474e-05, "loss": 0.393, "step": 801 }, { "epoch": 0.06841835864186999, "grad_norm": 1.3395906882234951, "learning_rate": 9.961400269599886e-05, "loss": 0.3388, "step": 802 }, { "epoch": 0.06850366831598703, "grad_norm": 1.8338195271693283, "learning_rate": 9.961228746929752e-05, "loss": 0.44, "step": 803 }, { "epoch": 0.06858897799010408, "grad_norm": 1.6121721865981693, "learning_rate": 9.961056845496167e-05, "loss": 0.4034, "step": 804 }, { "epoch": 0.06867428766422112, "grad_norm": 1.9502288097242129, "learning_rate": 9.960884565312255e-05, "loss": 0.4171, "step": 805 }, { "epoch": 0.06875959733833817, "grad_norm": 2.065080819753614, "learning_rate": 9.960711906391167e-05, "loss": 0.4814, "step": 806 }, { "epoch": 0.06884490701245521, "grad_norm": 1.7157795987539437, "learning_rate": 9.960538868746087e-05, "loss": 0.4213, "step": 807 }, { "epoch": 0.06893021668657226, "grad_norm": 1.7273950425101938, "learning_rate": 9.960365452390226e-05, "loss": 0.4027, "step": 808 }, { "epoch": 0.0690155263606893, "grad_norm": 1.8160994075715577, "learning_rate": 9.960191657336821e-05, "loss": 0.4404, "step": 809 }, { "epoch": 0.06910083603480635, "grad_norm": 1.8604421678641454, "learning_rate": 9.960017483599142e-05, "loss": 0.4306, "step": 810 }, { "epoch": 0.0691861457089234, "grad_norm": 1.7108569539937606, "learning_rate": 9.959842931190485e-05, "loss": 0.3651, "step": 811 }, { "epoch": 0.06927145538304044, "grad_norm": 1.6826427632677905, "learning_rate": 9.959668000124177e-05, "loss": 0.4278, "step": 812 }, { "epoch": 0.06935676505715749, "grad_norm": 1.5020084445324864, "learning_rate": 9.959492690413573e-05, "loss": 0.4216, "step": 813 }, { "epoch": 0.06944207473127453, "grad_norm": 1.7381082170067803, "learning_rate": 9.959317002072057e-05, "loss": 0.4279, "step": 814 }, { "epoch": 0.06952738440539157, "grad_norm": 1.8988422229261541, "learning_rate": 9.95914093511304e-05, "loss": 0.4126, "step": 815 }, { "epoch": 0.06961269407950861, "grad_norm": 1.603104904673485, "learning_rate": 9.958964489549968e-05, "loss": 0.3818, "step": 816 }, { "epoch": 0.06969800375362566, "grad_norm": 1.4444876301659328, "learning_rate": 9.958787665396308e-05, "loss": 0.3948, "step": 817 }, { "epoch": 0.0697833134277427, "grad_norm": 1.7364559577082002, "learning_rate": 9.958610462665561e-05, "loss": 0.4178, "step": 818 }, { "epoch": 0.06986862310185975, "grad_norm": 1.7727928838614577, "learning_rate": 9.958432881371253e-05, "loss": 0.4731, "step": 819 }, { "epoch": 0.0699539327759768, "grad_norm": 1.6411653627883929, "learning_rate": 9.958254921526946e-05, "loss": 0.4093, "step": 820 }, { "epoch": 0.07003924245009384, "grad_norm": 1.5903632137812598, "learning_rate": 9.958076583146224e-05, "loss": 0.3813, "step": 821 }, { "epoch": 0.07012455212421088, "grad_norm": 1.6717520695350219, "learning_rate": 9.957897866242702e-05, "loss": 0.403, "step": 822 }, { "epoch": 0.07020986179832793, "grad_norm": 1.6821384734170965, "learning_rate": 9.957718770830022e-05, "loss": 0.4502, "step": 823 }, { "epoch": 0.07029517147244498, "grad_norm": 1.5958567140358806, "learning_rate": 9.957539296921862e-05, "loss": 0.3937, "step": 824 }, { "epoch": 0.07038048114656202, "grad_norm": 1.5158406526970096, "learning_rate": 9.95735944453192e-05, "loss": 0.372, "step": 825 }, { "epoch": 0.07046579082067907, "grad_norm": 1.8311654263388588, "learning_rate": 9.957179213673926e-05, "loss": 0.397, "step": 826 }, { "epoch": 0.07055110049479611, "grad_norm": 1.699377784740752, "learning_rate": 9.956998604361644e-05, "loss": 0.4464, "step": 827 }, { "epoch": 0.07063641016891316, "grad_norm": 1.8911148571635248, "learning_rate": 9.956817616608857e-05, "loss": 0.4565, "step": 828 }, { "epoch": 0.0707217198430302, "grad_norm": 1.445494356828545, "learning_rate": 9.956636250429389e-05, "loss": 0.4068, "step": 829 }, { "epoch": 0.07080702951714725, "grad_norm": 1.7614699228162274, "learning_rate": 9.95645450583708e-05, "loss": 0.4759, "step": 830 }, { "epoch": 0.0708923391912643, "grad_norm": 1.4079481679766486, "learning_rate": 9.95627238284581e-05, "loss": 0.4135, "step": 831 }, { "epoch": 0.07097764886538134, "grad_norm": 1.8147319337224361, "learning_rate": 9.956089881469482e-05, "loss": 0.4389, "step": 832 }, { "epoch": 0.07106295853949839, "grad_norm": 1.4168203027479986, "learning_rate": 9.955907001722025e-05, "loss": 0.3573, "step": 833 }, { "epoch": 0.07114826821361542, "grad_norm": 1.696448863775742, "learning_rate": 9.955723743617407e-05, "loss": 0.4214, "step": 834 }, { "epoch": 0.07123357788773246, "grad_norm": 1.4369650965083927, "learning_rate": 9.955540107169614e-05, "loss": 0.4278, "step": 835 }, { "epoch": 0.07131888756184951, "grad_norm": 1.6617398237820984, "learning_rate": 9.955356092392668e-05, "loss": 0.4181, "step": 836 }, { "epoch": 0.07140419723596655, "grad_norm": 1.553726512703792, "learning_rate": 9.955171699300617e-05, "loss": 0.393, "step": 837 }, { "epoch": 0.0714895069100836, "grad_norm": 1.572362520063593, "learning_rate": 9.954986927907539e-05, "loss": 0.3599, "step": 838 }, { "epoch": 0.07157481658420065, "grad_norm": 1.650089177698375, "learning_rate": 9.95480177822754e-05, "loss": 0.3725, "step": 839 }, { "epoch": 0.07166012625831769, "grad_norm": 1.490219656095398, "learning_rate": 9.954616250274754e-05, "loss": 0.4052, "step": 840 }, { "epoch": 0.07174543593243474, "grad_norm": 1.5664394308557068, "learning_rate": 9.954430344063347e-05, "loss": 0.4184, "step": 841 }, { "epoch": 0.07183074560655178, "grad_norm": 1.4774834722636927, "learning_rate": 9.95424405960751e-05, "loss": 0.3963, "step": 842 }, { "epoch": 0.07191605528066883, "grad_norm": 1.5085276750726058, "learning_rate": 9.954057396921467e-05, "loss": 0.4077, "step": 843 }, { "epoch": 0.07200136495478587, "grad_norm": 1.8681920114790531, "learning_rate": 9.953870356019466e-05, "loss": 0.4168, "step": 844 }, { "epoch": 0.07208667462890292, "grad_norm": 1.6623221596270337, "learning_rate": 9.953682936915791e-05, "loss": 0.4083, "step": 845 }, { "epoch": 0.07217198430301996, "grad_norm": 1.3297857335670609, "learning_rate": 9.953495139624744e-05, "loss": 0.4112, "step": 846 }, { "epoch": 0.07225729397713701, "grad_norm": 1.5688661965427195, "learning_rate": 9.953306964160669e-05, "loss": 0.4289, "step": 847 }, { "epoch": 0.07234260365125406, "grad_norm": 1.753138996687177, "learning_rate": 9.953118410537928e-05, "loss": 0.4013, "step": 848 }, { "epoch": 0.0724279133253711, "grad_norm": 1.5223926580263065, "learning_rate": 9.952929478770916e-05, "loss": 0.3671, "step": 849 }, { "epoch": 0.07251322299948815, "grad_norm": 1.517479931417427, "learning_rate": 9.952740168874059e-05, "loss": 0.4124, "step": 850 }, { "epoch": 0.07259853267360519, "grad_norm": 1.611307409904723, "learning_rate": 9.95255048086181e-05, "loss": 0.3898, "step": 851 }, { "epoch": 0.07268384234772224, "grad_norm": 1.455565669159701, "learning_rate": 9.952360414748649e-05, "loss": 0.3649, "step": 852 }, { "epoch": 0.07276915202183927, "grad_norm": 1.6649097560219357, "learning_rate": 9.952169970549088e-05, "loss": 0.4284, "step": 853 }, { "epoch": 0.07285446169595632, "grad_norm": 1.872216638329716, "learning_rate": 9.951979148277664e-05, "loss": 0.4314, "step": 854 }, { "epoch": 0.07293977137007336, "grad_norm": 1.2739952597747002, "learning_rate": 9.951787947948947e-05, "loss": 0.3456, "step": 855 }, { "epoch": 0.0730250810441904, "grad_norm": 1.827941102779828, "learning_rate": 9.951596369577535e-05, "loss": 0.4185, "step": 856 }, { "epoch": 0.07311039071830745, "grad_norm": 2.0090824371027454, "learning_rate": 9.951404413178052e-05, "loss": 0.4551, "step": 857 }, { "epoch": 0.0731957003924245, "grad_norm": 1.5864202957786435, "learning_rate": 9.951212078765155e-05, "loss": 0.3764, "step": 858 }, { "epoch": 0.07328101006654154, "grad_norm": 1.425126769467023, "learning_rate": 9.951019366353524e-05, "loss": 0.3928, "step": 859 }, { "epoch": 0.07336631974065859, "grad_norm": 1.9067345925660544, "learning_rate": 9.950826275957877e-05, "loss": 0.4583, "step": 860 }, { "epoch": 0.07345162941477563, "grad_norm": 1.5230793849922206, "learning_rate": 9.950632807592951e-05, "loss": 0.3857, "step": 861 }, { "epoch": 0.07353693908889268, "grad_norm": 1.7489930389335564, "learning_rate": 9.950438961273517e-05, "loss": 0.396, "step": 862 }, { "epoch": 0.07362224876300973, "grad_norm": 1.8098799085030821, "learning_rate": 9.950244737014376e-05, "loss": 0.3724, "step": 863 }, { "epoch": 0.07370755843712677, "grad_norm": 1.6872481405429087, "learning_rate": 9.950050134830355e-05, "loss": 0.3844, "step": 864 }, { "epoch": 0.07379286811124382, "grad_norm": 1.5512414821049696, "learning_rate": 9.94985515473631e-05, "loss": 0.4108, "step": 865 }, { "epoch": 0.07387817778536086, "grad_norm": 1.3910940000912388, "learning_rate": 9.949659796747129e-05, "loss": 0.3812, "step": 866 }, { "epoch": 0.07396348745947791, "grad_norm": 1.6244988769170436, "learning_rate": 9.949464060877723e-05, "loss": 0.4056, "step": 867 }, { "epoch": 0.07404879713359495, "grad_norm": 1.554041282498229, "learning_rate": 9.949267947143038e-05, "loss": 0.4036, "step": 868 }, { "epoch": 0.074134106807712, "grad_norm": 1.6257391495686464, "learning_rate": 9.949071455558046e-05, "loss": 0.4229, "step": 869 }, { "epoch": 0.07421941648182905, "grad_norm": 1.8976478347183405, "learning_rate": 9.948874586137747e-05, "loss": 0.4045, "step": 870 }, { "epoch": 0.07430472615594609, "grad_norm": 1.5924332358888864, "learning_rate": 9.948677338897172e-05, "loss": 0.4074, "step": 871 }, { "epoch": 0.07439003583006312, "grad_norm": 1.389861036325033, "learning_rate": 9.948479713851379e-05, "loss": 0.3431, "step": 872 }, { "epoch": 0.07447534550418017, "grad_norm": 1.5992253067187814, "learning_rate": 9.948281711015458e-05, "loss": 0.3875, "step": 873 }, { "epoch": 0.07456065517829721, "grad_norm": 1.8158786458627252, "learning_rate": 9.948083330404522e-05, "loss": 0.4464, "step": 874 }, { "epoch": 0.07464596485241426, "grad_norm": 1.7525795589569202, "learning_rate": 9.947884572033717e-05, "loss": 0.4039, "step": 875 }, { "epoch": 0.0747312745265313, "grad_norm": 1.5980749351109855, "learning_rate": 9.947685435918219e-05, "loss": 0.3322, "step": 876 }, { "epoch": 0.07481658420064835, "grad_norm": 1.4224825389038447, "learning_rate": 9.947485922073231e-05, "loss": 0.3688, "step": 877 }, { "epoch": 0.0749018938747654, "grad_norm": 1.3936716203076156, "learning_rate": 9.947286030513983e-05, "loss": 0.4074, "step": 878 }, { "epoch": 0.07498720354888244, "grad_norm": 2.045672114197119, "learning_rate": 9.947085761255735e-05, "loss": 0.4267, "step": 879 }, { "epoch": 0.07507251322299949, "grad_norm": 1.7085090019652056, "learning_rate": 9.94688511431378e-05, "loss": 0.3809, "step": 880 }, { "epoch": 0.07515782289711653, "grad_norm": 2.0479668187791304, "learning_rate": 9.946684089703434e-05, "loss": 0.4752, "step": 881 }, { "epoch": 0.07524313257123358, "grad_norm": 1.4061678544529634, "learning_rate": 9.946482687440042e-05, "loss": 0.3936, "step": 882 }, { "epoch": 0.07532844224535062, "grad_norm": 1.597122886812165, "learning_rate": 9.946280907538985e-05, "loss": 0.3786, "step": 883 }, { "epoch": 0.07541375191946767, "grad_norm": 1.554040131859552, "learning_rate": 9.946078750015664e-05, "loss": 0.3981, "step": 884 }, { "epoch": 0.07549906159358472, "grad_norm": 1.756019932892268, "learning_rate": 9.945876214885513e-05, "loss": 0.4367, "step": 885 }, { "epoch": 0.07558437126770176, "grad_norm": 1.4751623872496558, "learning_rate": 9.945673302163997e-05, "loss": 0.3971, "step": 886 }, { "epoch": 0.0756696809418188, "grad_norm": 1.477371639995077, "learning_rate": 9.945470011866604e-05, "loss": 0.3826, "step": 887 }, { "epoch": 0.07575499061593585, "grad_norm": 1.4438786172756286, "learning_rate": 9.945266344008857e-05, "loss": 0.4266, "step": 888 }, { "epoch": 0.0758403002900529, "grad_norm": 1.5147279267202027, "learning_rate": 9.945062298606305e-05, "loss": 0.3798, "step": 889 }, { "epoch": 0.07592560996416994, "grad_norm": 1.9949974438643752, "learning_rate": 9.944857875674522e-05, "loss": 0.4973, "step": 890 }, { "epoch": 0.07601091963828698, "grad_norm": 2.0277055773952624, "learning_rate": 9.94465307522912e-05, "loss": 0.5057, "step": 891 }, { "epoch": 0.07609622931240402, "grad_norm": 1.434747714817044, "learning_rate": 9.944447897285729e-05, "loss": 0.3779, "step": 892 }, { "epoch": 0.07618153898652107, "grad_norm": 1.680964991012642, "learning_rate": 9.944242341860016e-05, "loss": 0.3885, "step": 893 }, { "epoch": 0.07626684866063811, "grad_norm": 1.6874224327062108, "learning_rate": 9.944036408967674e-05, "loss": 0.4701, "step": 894 }, { "epoch": 0.07635215833475516, "grad_norm": 2.102312939608402, "learning_rate": 9.943830098624426e-05, "loss": 0.4663, "step": 895 }, { "epoch": 0.0764374680088722, "grad_norm": 1.5505283255156348, "learning_rate": 9.943623410846021e-05, "loss": 0.3868, "step": 896 }, { "epoch": 0.07652277768298925, "grad_norm": 1.7846137960955586, "learning_rate": 9.943416345648238e-05, "loss": 0.4214, "step": 897 }, { "epoch": 0.0766080873571063, "grad_norm": 1.7010477679910776, "learning_rate": 9.943208903046888e-05, "loss": 0.4608, "step": 898 }, { "epoch": 0.07669339703122334, "grad_norm": 1.402390078148613, "learning_rate": 9.943001083057805e-05, "loss": 0.3096, "step": 899 }, { "epoch": 0.07677870670534039, "grad_norm": 1.5524866830251585, "learning_rate": 9.942792885696856e-05, "loss": 0.3926, "step": 900 }, { "epoch": 0.07686401637945743, "grad_norm": 1.6487801001449627, "learning_rate": 9.942584310979939e-05, "loss": 0.3803, "step": 901 }, { "epoch": 0.07694932605357448, "grad_norm": 1.9104782488377685, "learning_rate": 9.942375358922971e-05, "loss": 0.4275, "step": 902 }, { "epoch": 0.07703463572769152, "grad_norm": 1.8667932197836743, "learning_rate": 9.94216602954191e-05, "loss": 0.4328, "step": 903 }, { "epoch": 0.07711994540180857, "grad_norm": 1.4881630197255973, "learning_rate": 9.941956322852735e-05, "loss": 0.4024, "step": 904 }, { "epoch": 0.07720525507592561, "grad_norm": 1.5590928122425167, "learning_rate": 9.941746238871457e-05, "loss": 0.4154, "step": 905 }, { "epoch": 0.07729056475004266, "grad_norm": 1.519331107405912, "learning_rate": 9.941535777614112e-05, "loss": 0.3777, "step": 906 }, { "epoch": 0.0773758744241597, "grad_norm": 1.5344739955812983, "learning_rate": 9.941324939096772e-05, "loss": 0.4009, "step": 907 }, { "epoch": 0.07746118409827675, "grad_norm": 1.7731666526164567, "learning_rate": 9.941113723335531e-05, "loss": 0.3811, "step": 908 }, { "epoch": 0.0775464937723938, "grad_norm": 1.3621317217005404, "learning_rate": 9.940902130346513e-05, "loss": 0.3969, "step": 909 }, { "epoch": 0.07763180344651083, "grad_norm": 1.6877891857879768, "learning_rate": 9.940690160145874e-05, "loss": 0.4359, "step": 910 }, { "epoch": 0.07771711312062787, "grad_norm": 1.4189524535533058, "learning_rate": 9.940477812749795e-05, "loss": 0.3851, "step": 911 }, { "epoch": 0.07780242279474492, "grad_norm": 1.6472573055557493, "learning_rate": 9.94026508817449e-05, "loss": 0.3833, "step": 912 }, { "epoch": 0.07788773246886196, "grad_norm": 1.7339434387503534, "learning_rate": 9.940051986436198e-05, "loss": 0.4093, "step": 913 }, { "epoch": 0.07797304214297901, "grad_norm": 1.6406845808563764, "learning_rate": 9.939838507551188e-05, "loss": 0.3531, "step": 914 }, { "epoch": 0.07805835181709606, "grad_norm": 1.6379536997547528, "learning_rate": 9.939624651535757e-05, "loss": 0.4152, "step": 915 }, { "epoch": 0.0781436614912131, "grad_norm": 1.3937606605840542, "learning_rate": 9.939410418406234e-05, "loss": 0.3626, "step": 916 }, { "epoch": 0.07822897116533015, "grad_norm": 1.3210644809541832, "learning_rate": 9.939195808178974e-05, "loss": 0.394, "step": 917 }, { "epoch": 0.07831428083944719, "grad_norm": 1.608640484642789, "learning_rate": 9.938980820870361e-05, "loss": 0.3775, "step": 918 }, { "epoch": 0.07839959051356424, "grad_norm": 2.0557496549134475, "learning_rate": 9.938765456496808e-05, "loss": 0.4419, "step": 919 }, { "epoch": 0.07848490018768128, "grad_norm": 1.5819830239563057, "learning_rate": 9.938549715074757e-05, "loss": 0.3605, "step": 920 }, { "epoch": 0.07857020986179833, "grad_norm": 1.5182668725942867, "learning_rate": 9.938333596620677e-05, "loss": 0.3646, "step": 921 }, { "epoch": 0.07865551953591537, "grad_norm": 1.4535549717216238, "learning_rate": 9.93811710115107e-05, "loss": 0.3816, "step": 922 }, { "epoch": 0.07874082921003242, "grad_norm": 1.9238440498428777, "learning_rate": 9.937900228682465e-05, "loss": 0.4419, "step": 923 }, { "epoch": 0.07882613888414947, "grad_norm": 1.702658423112307, "learning_rate": 9.937682979231416e-05, "loss": 0.3985, "step": 924 }, { "epoch": 0.07891144855826651, "grad_norm": 1.4350917380114725, "learning_rate": 9.93746535281451e-05, "loss": 0.3599, "step": 925 }, { "epoch": 0.07899675823238356, "grad_norm": 1.5024821090267786, "learning_rate": 9.937247349448362e-05, "loss": 0.3671, "step": 926 }, { "epoch": 0.0790820679065006, "grad_norm": 1.7560829300338785, "learning_rate": 9.937028969149617e-05, "loss": 0.4359, "step": 927 }, { "epoch": 0.07916737758061765, "grad_norm": 1.8567038832084304, "learning_rate": 9.936810211934944e-05, "loss": 0.4298, "step": 928 }, { "epoch": 0.07925268725473468, "grad_norm": 1.8759775792227853, "learning_rate": 9.936591077821045e-05, "loss": 0.4406, "step": 929 }, { "epoch": 0.07933799692885173, "grad_norm": 1.459218937046371, "learning_rate": 9.936371566824651e-05, "loss": 0.3663, "step": 930 }, { "epoch": 0.07942330660296877, "grad_norm": 1.6614193408500193, "learning_rate": 9.936151678962523e-05, "loss": 0.4242, "step": 931 }, { "epoch": 0.07950861627708582, "grad_norm": 1.3331618447092377, "learning_rate": 9.93593141425144e-05, "loss": 0.3685, "step": 932 }, { "epoch": 0.07959392595120286, "grad_norm": 1.6156127929226338, "learning_rate": 9.935710772708225e-05, "loss": 0.4048, "step": 933 }, { "epoch": 0.07967923562531991, "grad_norm": 1.76858030118462, "learning_rate": 9.93548975434972e-05, "loss": 0.4054, "step": 934 }, { "epoch": 0.07976454529943695, "grad_norm": 1.777548574865013, "learning_rate": 9.935268359192802e-05, "loss": 0.4314, "step": 935 }, { "epoch": 0.079849854973554, "grad_norm": 1.5316126646969312, "learning_rate": 9.935046587254368e-05, "loss": 0.3974, "step": 936 }, { "epoch": 0.07993516464767104, "grad_norm": 1.7106172444812717, "learning_rate": 9.934824438551353e-05, "loss": 0.3985, "step": 937 }, { "epoch": 0.08002047432178809, "grad_norm": 1.707210574998789, "learning_rate": 9.934601913100716e-05, "loss": 0.397, "step": 938 }, { "epoch": 0.08010578399590514, "grad_norm": 1.5681650841866768, "learning_rate": 9.934379010919446e-05, "loss": 0.4235, "step": 939 }, { "epoch": 0.08019109367002218, "grad_norm": 1.5493426436430673, "learning_rate": 9.934155732024557e-05, "loss": 0.3884, "step": 940 }, { "epoch": 0.08027640334413923, "grad_norm": 1.7369232702531296, "learning_rate": 9.933932076433101e-05, "loss": 0.4151, "step": 941 }, { "epoch": 0.08036171301825627, "grad_norm": 1.6376699810764517, "learning_rate": 9.933708044162149e-05, "loss": 0.4585, "step": 942 }, { "epoch": 0.08044702269237332, "grad_norm": 1.7226191579410883, "learning_rate": 9.933483635228804e-05, "loss": 0.3886, "step": 943 }, { "epoch": 0.08053233236649036, "grad_norm": 2.163295348215504, "learning_rate": 9.933258849650202e-05, "loss": 0.4314, "step": 944 }, { "epoch": 0.08061764204060741, "grad_norm": 2.1622896610344675, "learning_rate": 9.9330336874435e-05, "loss": 0.4333, "step": 945 }, { "epoch": 0.08070295171472446, "grad_norm": 1.7243527041048436, "learning_rate": 9.932808148625891e-05, "loss": 0.4377, "step": 946 }, { "epoch": 0.0807882613888415, "grad_norm": 1.590156843019075, "learning_rate": 9.932582233214593e-05, "loss": 0.4057, "step": 947 }, { "epoch": 0.08087357106295855, "grad_norm": 1.7529260151859165, "learning_rate": 9.932355941226854e-05, "loss": 0.3875, "step": 948 }, { "epoch": 0.08095888073707558, "grad_norm": 1.6521807651083074, "learning_rate": 9.932129272679949e-05, "loss": 0.3834, "step": 949 }, { "epoch": 0.08104419041119262, "grad_norm": 1.5230304663322867, "learning_rate": 9.931902227591183e-05, "loss": 0.3491, "step": 950 }, { "epoch": 0.08112950008530967, "grad_norm": 1.8186539064171319, "learning_rate": 9.93167480597789e-05, "loss": 0.3894, "step": 951 }, { "epoch": 0.08121480975942671, "grad_norm": 1.6622873786972638, "learning_rate": 9.931447007857432e-05, "loss": 0.3661, "step": 952 }, { "epoch": 0.08130011943354376, "grad_norm": 1.6468036382802227, "learning_rate": 9.931218833247203e-05, "loss": 0.416, "step": 953 }, { "epoch": 0.0813854291076608, "grad_norm": 1.6972524205650052, "learning_rate": 9.930990282164617e-05, "loss": 0.4439, "step": 954 }, { "epoch": 0.08147073878177785, "grad_norm": 1.7626942980649782, "learning_rate": 9.930761354627129e-05, "loss": 0.3822, "step": 955 }, { "epoch": 0.0815560484558949, "grad_norm": 1.7519823835262383, "learning_rate": 9.930532050652212e-05, "loss": 0.4457, "step": 956 }, { "epoch": 0.08164135813001194, "grad_norm": 1.530050469607025, "learning_rate": 9.930302370257374e-05, "loss": 0.4138, "step": 957 }, { "epoch": 0.08172666780412899, "grad_norm": 1.6239593548268876, "learning_rate": 9.93007231346015e-05, "loss": 0.4023, "step": 958 }, { "epoch": 0.08181197747824603, "grad_norm": 1.6215288796195353, "learning_rate": 9.929841880278104e-05, "loss": 0.3921, "step": 959 }, { "epoch": 0.08189728715236308, "grad_norm": 1.5372116655876398, "learning_rate": 9.929611070728826e-05, "loss": 0.3739, "step": 960 }, { "epoch": 0.08198259682648013, "grad_norm": 1.7176908524153687, "learning_rate": 9.929379884829939e-05, "loss": 0.4127, "step": 961 }, { "epoch": 0.08206790650059717, "grad_norm": 1.7384728486840317, "learning_rate": 9.929148322599093e-05, "loss": 0.4104, "step": 962 }, { "epoch": 0.08215321617471422, "grad_norm": 1.4674845986800955, "learning_rate": 9.928916384053965e-05, "loss": 0.3564, "step": 963 }, { "epoch": 0.08223852584883126, "grad_norm": 1.5796299170196961, "learning_rate": 9.928684069212264e-05, "loss": 0.3841, "step": 964 }, { "epoch": 0.08232383552294831, "grad_norm": 1.960962667721647, "learning_rate": 9.928451378091726e-05, "loss": 0.4391, "step": 965 }, { "epoch": 0.08240914519706535, "grad_norm": 1.323450726619773, "learning_rate": 9.928218310710115e-05, "loss": 0.3329, "step": 966 }, { "epoch": 0.0824944548711824, "grad_norm": 1.126890977438387, "learning_rate": 9.927984867085224e-05, "loss": 0.3167, "step": 967 }, { "epoch": 0.08257976454529943, "grad_norm": 1.6888137224826205, "learning_rate": 9.927751047234875e-05, "loss": 0.4243, "step": 968 }, { "epoch": 0.08266507421941648, "grad_norm": 1.7334523539150009, "learning_rate": 9.927516851176921e-05, "loss": 0.3877, "step": 969 }, { "epoch": 0.08275038389353352, "grad_norm": 1.9393448199162233, "learning_rate": 9.92728227892924e-05, "loss": 0.4617, "step": 970 }, { "epoch": 0.08283569356765057, "grad_norm": 1.6531613899364932, "learning_rate": 9.92704733050974e-05, "loss": 0.3988, "step": 971 }, { "epoch": 0.08292100324176761, "grad_norm": 1.6326109839388958, "learning_rate": 9.926812005936359e-05, "loss": 0.4279, "step": 972 }, { "epoch": 0.08300631291588466, "grad_norm": 1.667556890520914, "learning_rate": 9.926576305227063e-05, "loss": 0.4067, "step": 973 }, { "epoch": 0.0830916225900017, "grad_norm": 1.8090461840662317, "learning_rate": 9.926340228399845e-05, "loss": 0.4325, "step": 974 }, { "epoch": 0.08317693226411875, "grad_norm": 1.5629815694658298, "learning_rate": 9.926103775472728e-05, "loss": 0.4121, "step": 975 }, { "epoch": 0.0832622419382358, "grad_norm": 1.4291245649863116, "learning_rate": 9.925866946463766e-05, "loss": 0.3706, "step": 976 }, { "epoch": 0.08334755161235284, "grad_norm": 1.5995724106687563, "learning_rate": 9.925629741391038e-05, "loss": 0.3909, "step": 977 }, { "epoch": 0.08343286128646989, "grad_norm": 1.4782008210105717, "learning_rate": 9.925392160272655e-05, "loss": 0.4415, "step": 978 }, { "epoch": 0.08351817096058693, "grad_norm": 1.3095293031511217, "learning_rate": 9.925154203126754e-05, "loss": 0.3964, "step": 979 }, { "epoch": 0.08360348063470398, "grad_norm": 1.7113070154270404, "learning_rate": 9.924915869971503e-05, "loss": 0.3867, "step": 980 }, { "epoch": 0.08368879030882102, "grad_norm": 1.5976306568138428, "learning_rate": 9.924677160825094e-05, "loss": 0.3896, "step": 981 }, { "epoch": 0.08377409998293807, "grad_norm": 1.426009486494571, "learning_rate": 9.924438075705756e-05, "loss": 0.3929, "step": 982 }, { "epoch": 0.08385940965705511, "grad_norm": 1.4266695782521803, "learning_rate": 9.924198614631736e-05, "loss": 0.3729, "step": 983 }, { "epoch": 0.08394471933117216, "grad_norm": 2.246026026492598, "learning_rate": 9.92395877762132e-05, "loss": 0.4402, "step": 984 }, { "epoch": 0.0840300290052892, "grad_norm": 1.5353601528966365, "learning_rate": 9.92371856469282e-05, "loss": 0.3632, "step": 985 }, { "epoch": 0.08411533867940625, "grad_norm": 2.1849212705626555, "learning_rate": 9.92347797586457e-05, "loss": 0.4491, "step": 986 }, { "epoch": 0.08420064835352328, "grad_norm": 1.539180073386258, "learning_rate": 9.92323701115494e-05, "loss": 0.4718, "step": 987 }, { "epoch": 0.08428595802764033, "grad_norm": 1.3549210378999164, "learning_rate": 9.922995670582325e-05, "loss": 0.3542, "step": 988 }, { "epoch": 0.08437126770175737, "grad_norm": 1.7466439355336911, "learning_rate": 9.922753954165154e-05, "loss": 0.4268, "step": 989 }, { "epoch": 0.08445657737587442, "grad_norm": 1.5277756180410822, "learning_rate": 9.922511861921878e-05, "loss": 0.4446, "step": 990 }, { "epoch": 0.08454188704999147, "grad_norm": 1.4524606806411642, "learning_rate": 9.922269393870976e-05, "loss": 0.3923, "step": 991 }, { "epoch": 0.08462719672410851, "grad_norm": 1.6235109255710627, "learning_rate": 9.922026550030965e-05, "loss": 0.4238, "step": 992 }, { "epoch": 0.08471250639822556, "grad_norm": 1.5380084305868262, "learning_rate": 9.921783330420383e-05, "loss": 0.4412, "step": 993 }, { "epoch": 0.0847978160723426, "grad_norm": 1.5170293377860202, "learning_rate": 9.921539735057798e-05, "loss": 0.4138, "step": 994 }, { "epoch": 0.08488312574645965, "grad_norm": 1.5693752851223886, "learning_rate": 9.921295763961806e-05, "loss": 0.4232, "step": 995 }, { "epoch": 0.0849684354205767, "grad_norm": 1.5846192843123548, "learning_rate": 9.921051417151035e-05, "loss": 0.3814, "step": 996 }, { "epoch": 0.08505374509469374, "grad_norm": 1.832724564077769, "learning_rate": 9.92080669464414e-05, "loss": 0.4544, "step": 997 }, { "epoch": 0.08513905476881078, "grad_norm": 1.8473920895416924, "learning_rate": 9.920561596459801e-05, "loss": 0.43, "step": 998 }, { "epoch": 0.08522436444292783, "grad_norm": 1.4406206838414872, "learning_rate": 9.920316122616732e-05, "loss": 0.3599, "step": 999 }, { "epoch": 0.08530967411704488, "grad_norm": 1.4872798404133472, "learning_rate": 9.920070273133674e-05, "loss": 0.4043, "step": 1000 }, { "epoch": 0.08539498379116192, "grad_norm": 1.7160227552474645, "learning_rate": 9.919824048029397e-05, "loss": 0.3566, "step": 1001 }, { "epoch": 0.08548029346527897, "grad_norm": 1.451563088013496, "learning_rate": 9.919577447322697e-05, "loss": 0.4176, "step": 1002 }, { "epoch": 0.08556560313939601, "grad_norm": 1.6625469631359495, "learning_rate": 9.919330471032401e-05, "loss": 0.4186, "step": 1003 }, { "epoch": 0.08565091281351306, "grad_norm": 1.6576207085063346, "learning_rate": 9.919083119177366e-05, "loss": 0.4013, "step": 1004 }, { "epoch": 0.0857362224876301, "grad_norm": 1.509768941266167, "learning_rate": 9.918835391776474e-05, "loss": 0.3862, "step": 1005 }, { "epoch": 0.08582153216174714, "grad_norm": 1.4240339049364124, "learning_rate": 9.918587288848638e-05, "loss": 0.3902, "step": 1006 }, { "epoch": 0.08590684183586418, "grad_norm": 1.5366954124862304, "learning_rate": 9.918338810412801e-05, "loss": 0.3574, "step": 1007 }, { "epoch": 0.08599215150998123, "grad_norm": 2.1244654263551386, "learning_rate": 9.91808995648793e-05, "loss": 0.39, "step": 1008 }, { "epoch": 0.08607746118409827, "grad_norm": 1.7710669045586542, "learning_rate": 9.917840727093028e-05, "loss": 0.3513, "step": 1009 }, { "epoch": 0.08616277085821532, "grad_norm": 1.860610679543165, "learning_rate": 9.917591122247119e-05, "loss": 0.4803, "step": 1010 }, { "epoch": 0.08624808053233236, "grad_norm": 1.5647940008337091, "learning_rate": 9.917341141969258e-05, "loss": 0.4285, "step": 1011 }, { "epoch": 0.08633339020644941, "grad_norm": 1.8128510496936157, "learning_rate": 9.917090786278533e-05, "loss": 0.444, "step": 1012 }, { "epoch": 0.08641869988056645, "grad_norm": 1.8520976391795119, "learning_rate": 9.916840055194057e-05, "loss": 0.4153, "step": 1013 }, { "epoch": 0.0865040095546835, "grad_norm": 1.492804115276654, "learning_rate": 9.91658894873497e-05, "loss": 0.3822, "step": 1014 }, { "epoch": 0.08658931922880055, "grad_norm": 1.802817031767372, "learning_rate": 9.916337466920443e-05, "loss": 0.4125, "step": 1015 }, { "epoch": 0.08667462890291759, "grad_norm": 1.8392670652949337, "learning_rate": 9.916085609769677e-05, "loss": 0.4009, "step": 1016 }, { "epoch": 0.08675993857703464, "grad_norm": 1.4804431440006092, "learning_rate": 9.915833377301898e-05, "loss": 0.3876, "step": 1017 }, { "epoch": 0.08684524825115168, "grad_norm": 1.4741682390657833, "learning_rate": 9.915580769536362e-05, "loss": 0.4192, "step": 1018 }, { "epoch": 0.08693055792526873, "grad_norm": 1.5700872458981556, "learning_rate": 9.915327786492357e-05, "loss": 0.3859, "step": 1019 }, { "epoch": 0.08701586759938577, "grad_norm": 1.3552529110957288, "learning_rate": 9.915074428189195e-05, "loss": 0.3401, "step": 1020 }, { "epoch": 0.08710117727350282, "grad_norm": 1.495064961862393, "learning_rate": 9.91482069464622e-05, "loss": 0.4151, "step": 1021 }, { "epoch": 0.08718648694761987, "grad_norm": 2.100037656173963, "learning_rate": 9.914566585882801e-05, "loss": 0.4525, "step": 1022 }, { "epoch": 0.08727179662173691, "grad_norm": 1.5408126252541476, "learning_rate": 9.91431210191834e-05, "loss": 0.3732, "step": 1023 }, { "epoch": 0.08735710629585396, "grad_norm": 1.6693361203126729, "learning_rate": 9.914057242772266e-05, "loss": 0.4283, "step": 1024 }, { "epoch": 0.08744241596997099, "grad_norm": 1.6234628302733933, "learning_rate": 9.913802008464033e-05, "loss": 0.4522, "step": 1025 }, { "epoch": 0.08752772564408803, "grad_norm": 1.6754695006588272, "learning_rate": 9.913546399013127e-05, "loss": 0.3877, "step": 1026 }, { "epoch": 0.08761303531820508, "grad_norm": 1.8914325345153844, "learning_rate": 9.913290414439068e-05, "loss": 0.3737, "step": 1027 }, { "epoch": 0.08769834499232212, "grad_norm": 2.0629846408066994, "learning_rate": 9.913034054761392e-05, "loss": 0.415, "step": 1028 }, { "epoch": 0.08778365466643917, "grad_norm": 1.7163253327553218, "learning_rate": 9.912777319999675e-05, "loss": 0.4216, "step": 1029 }, { "epoch": 0.08786896434055622, "grad_norm": 1.6523081097016943, "learning_rate": 9.912520210173515e-05, "loss": 0.4181, "step": 1030 }, { "epoch": 0.08795427401467326, "grad_norm": 1.5346185650157314, "learning_rate": 9.912262725302543e-05, "loss": 0.3902, "step": 1031 }, { "epoch": 0.08803958368879031, "grad_norm": 1.6811868053031946, "learning_rate": 9.912004865406415e-05, "loss": 0.4457, "step": 1032 }, { "epoch": 0.08812489336290735, "grad_norm": 1.5684970210992253, "learning_rate": 9.911746630504818e-05, "loss": 0.4554, "step": 1033 }, { "epoch": 0.0882102030370244, "grad_norm": 1.5903704846201183, "learning_rate": 9.911488020617467e-05, "loss": 0.4074, "step": 1034 }, { "epoch": 0.08829551271114144, "grad_norm": 1.454215430213596, "learning_rate": 9.911229035764106e-05, "loss": 0.3906, "step": 1035 }, { "epoch": 0.08838082238525849, "grad_norm": 1.4879976738327056, "learning_rate": 9.910969675964506e-05, "loss": 0.4066, "step": 1036 }, { "epoch": 0.08846613205937554, "grad_norm": 1.6579680259299645, "learning_rate": 9.910709941238467e-05, "loss": 0.3805, "step": 1037 }, { "epoch": 0.08855144173349258, "grad_norm": 1.6002155873967299, "learning_rate": 9.910449831605821e-05, "loss": 0.408, "step": 1038 }, { "epoch": 0.08863675140760963, "grad_norm": 1.6985540383372362, "learning_rate": 9.910189347086423e-05, "loss": 0.3787, "step": 1039 }, { "epoch": 0.08872206108172667, "grad_norm": 1.7142880871165975, "learning_rate": 9.909928487700162e-05, "loss": 0.4317, "step": 1040 }, { "epoch": 0.08880737075584372, "grad_norm": 1.7859913147889228, "learning_rate": 9.909667253466952e-05, "loss": 0.4137, "step": 1041 }, { "epoch": 0.08889268042996076, "grad_norm": 1.9552960345828005, "learning_rate": 9.909405644406738e-05, "loss": 0.4606, "step": 1042 }, { "epoch": 0.08897799010407781, "grad_norm": 1.7600755831354542, "learning_rate": 9.909143660539491e-05, "loss": 0.4318, "step": 1043 }, { "epoch": 0.08906329977819484, "grad_norm": 1.5755435490087217, "learning_rate": 9.908881301885212e-05, "loss": 0.3671, "step": 1044 }, { "epoch": 0.08914860945231189, "grad_norm": 1.575248286946329, "learning_rate": 9.908618568463932e-05, "loss": 0.409, "step": 1045 }, { "epoch": 0.08923391912642893, "grad_norm": 1.5931405790935835, "learning_rate": 9.908355460295708e-05, "loss": 0.3748, "step": 1046 }, { "epoch": 0.08931922880054598, "grad_norm": 1.4993942150110713, "learning_rate": 9.908091977400629e-05, "loss": 0.3486, "step": 1047 }, { "epoch": 0.08940453847466302, "grad_norm": 2.1220722224810205, "learning_rate": 9.907828119798807e-05, "loss": 0.4525, "step": 1048 }, { "epoch": 0.08948984814878007, "grad_norm": 1.569125813610626, "learning_rate": 9.90756388751039e-05, "loss": 0.4133, "step": 1049 }, { "epoch": 0.08957515782289711, "grad_norm": 1.3632042106131326, "learning_rate": 9.907299280555549e-05, "loss": 0.3431, "step": 1050 }, { "epoch": 0.08966046749701416, "grad_norm": 1.5988150500320493, "learning_rate": 9.907034298954485e-05, "loss": 0.4283, "step": 1051 }, { "epoch": 0.0897457771711312, "grad_norm": 1.470254492755597, "learning_rate": 9.906768942727427e-05, "loss": 0.4077, "step": 1052 }, { "epoch": 0.08983108684524825, "grad_norm": 1.555915845669338, "learning_rate": 9.906503211894635e-05, "loss": 0.4007, "step": 1053 }, { "epoch": 0.0899163965193653, "grad_norm": 1.7254064495733312, "learning_rate": 9.906237106476397e-05, "loss": 0.3762, "step": 1054 }, { "epoch": 0.09000170619348234, "grad_norm": 1.5656357389831763, "learning_rate": 9.905970626493029e-05, "loss": 0.418, "step": 1055 }, { "epoch": 0.09008701586759939, "grad_norm": 1.3060219615853983, "learning_rate": 9.905703771964872e-05, "loss": 0.3476, "step": 1056 }, { "epoch": 0.09017232554171643, "grad_norm": 1.5550532078609287, "learning_rate": 9.905436542912301e-05, "loss": 0.4301, "step": 1057 }, { "epoch": 0.09025763521583348, "grad_norm": 1.5778565886713063, "learning_rate": 9.905168939355717e-05, "loss": 0.342, "step": 1058 }, { "epoch": 0.09034294488995052, "grad_norm": 1.5104082129230916, "learning_rate": 9.90490096131555e-05, "loss": 0.4198, "step": 1059 }, { "epoch": 0.09042825456406757, "grad_norm": 1.3957969841091087, "learning_rate": 9.904632608812261e-05, "loss": 0.3572, "step": 1060 }, { "epoch": 0.09051356423818462, "grad_norm": 1.7691411478888746, "learning_rate": 9.904363881866334e-05, "loss": 0.3713, "step": 1061 }, { "epoch": 0.09059887391230166, "grad_norm": 1.7050065312861753, "learning_rate": 9.904094780498288e-05, "loss": 0.4063, "step": 1062 }, { "epoch": 0.0906841835864187, "grad_norm": 1.4737981522760653, "learning_rate": 9.903825304728664e-05, "loss": 0.3834, "step": 1063 }, { "epoch": 0.09076949326053574, "grad_norm": 1.483115454899053, "learning_rate": 9.903555454578038e-05, "loss": 0.3886, "step": 1064 }, { "epoch": 0.09085480293465278, "grad_norm": 1.4788564795549235, "learning_rate": 9.903285230067011e-05, "loss": 0.3872, "step": 1065 }, { "epoch": 0.09094011260876983, "grad_norm": 1.5319753991684157, "learning_rate": 9.903014631216213e-05, "loss": 0.3589, "step": 1066 }, { "epoch": 0.09102542228288688, "grad_norm": 1.6564863504157736, "learning_rate": 9.902743658046301e-05, "loss": 0.3989, "step": 1067 }, { "epoch": 0.09111073195700392, "grad_norm": 1.693743141976862, "learning_rate": 9.902472310577963e-05, "loss": 0.4293, "step": 1068 }, { "epoch": 0.09119604163112097, "grad_norm": 1.5194523258845558, "learning_rate": 9.902200588831918e-05, "loss": 0.4046, "step": 1069 }, { "epoch": 0.09128135130523801, "grad_norm": 1.7269375078814895, "learning_rate": 9.901928492828907e-05, "loss": 0.4226, "step": 1070 }, { "epoch": 0.09136666097935506, "grad_norm": 1.521525978626275, "learning_rate": 9.901656022589705e-05, "loss": 0.3932, "step": 1071 }, { "epoch": 0.0914519706534721, "grad_norm": 1.919615557150131, "learning_rate": 9.901383178135113e-05, "loss": 0.4149, "step": 1072 }, { "epoch": 0.09153728032758915, "grad_norm": 1.6133622340826805, "learning_rate": 9.901109959485961e-05, "loss": 0.3713, "step": 1073 }, { "epoch": 0.0916225900017062, "grad_norm": 1.4541133677744429, "learning_rate": 9.900836366663108e-05, "loss": 0.389, "step": 1074 }, { "epoch": 0.09170789967582324, "grad_norm": 1.3994029032015374, "learning_rate": 9.900562399687443e-05, "loss": 0.3841, "step": 1075 }, { "epoch": 0.09179320934994029, "grad_norm": 1.5215401596400473, "learning_rate": 9.900288058579879e-05, "loss": 0.4038, "step": 1076 }, { "epoch": 0.09187851902405733, "grad_norm": 1.8865868816534737, "learning_rate": 9.900013343361361e-05, "loss": 0.4415, "step": 1077 }, { "epoch": 0.09196382869817438, "grad_norm": 1.6944788979673775, "learning_rate": 9.899738254052863e-05, "loss": 0.4326, "step": 1078 }, { "epoch": 0.09204913837229142, "grad_norm": 1.589216703811722, "learning_rate": 9.899462790675389e-05, "loss": 0.3788, "step": 1079 }, { "epoch": 0.09213444804640847, "grad_norm": 1.6845066224373106, "learning_rate": 9.899186953249965e-05, "loss": 0.3978, "step": 1080 }, { "epoch": 0.09221975772052551, "grad_norm": 1.5364348156524648, "learning_rate": 9.898910741797652e-05, "loss": 0.4035, "step": 1081 }, { "epoch": 0.09230506739464256, "grad_norm": 1.6268229896183486, "learning_rate": 9.898634156339534e-05, "loss": 0.3581, "step": 1082 }, { "epoch": 0.09239037706875959, "grad_norm": 1.4574031406365084, "learning_rate": 9.898357196896733e-05, "loss": 0.3842, "step": 1083 }, { "epoch": 0.09247568674287664, "grad_norm": 1.4712451275507412, "learning_rate": 9.898079863490389e-05, "loss": 0.3898, "step": 1084 }, { "epoch": 0.09256099641699368, "grad_norm": 1.4833253060170108, "learning_rate": 9.897802156141673e-05, "loss": 0.3747, "step": 1085 }, { "epoch": 0.09264630609111073, "grad_norm": 1.7434297930773963, "learning_rate": 9.897524074871792e-05, "loss": 0.4327, "step": 1086 }, { "epoch": 0.09273161576522777, "grad_norm": 1.4671203012890899, "learning_rate": 9.897245619701972e-05, "loss": 0.3948, "step": 1087 }, { "epoch": 0.09281692543934482, "grad_norm": 1.5519702878742814, "learning_rate": 9.896966790653475e-05, "loss": 0.4525, "step": 1088 }, { "epoch": 0.09290223511346186, "grad_norm": 1.509922113241415, "learning_rate": 9.896687587747584e-05, "loss": 0.3953, "step": 1089 }, { "epoch": 0.09298754478757891, "grad_norm": 1.5969285123634192, "learning_rate": 9.896408011005617e-05, "loss": 0.4278, "step": 1090 }, { "epoch": 0.09307285446169596, "grad_norm": 1.4260223602853117, "learning_rate": 9.896128060448917e-05, "loss": 0.3696, "step": 1091 }, { "epoch": 0.093158164135813, "grad_norm": 1.948090802193288, "learning_rate": 9.89584773609886e-05, "loss": 0.4142, "step": 1092 }, { "epoch": 0.09324347380993005, "grad_norm": 1.650198623955504, "learning_rate": 9.895567037976842e-05, "loss": 0.4049, "step": 1093 }, { "epoch": 0.09332878348404709, "grad_norm": 1.2563500283875293, "learning_rate": 9.895285966104298e-05, "loss": 0.3523, "step": 1094 }, { "epoch": 0.09341409315816414, "grad_norm": 1.6418263714301033, "learning_rate": 9.895004520502683e-05, "loss": 0.4137, "step": 1095 }, { "epoch": 0.09349940283228118, "grad_norm": 1.3887513214434664, "learning_rate": 9.894722701193486e-05, "loss": 0.37, "step": 1096 }, { "epoch": 0.09358471250639823, "grad_norm": 1.632794320767332, "learning_rate": 9.894440508198219e-05, "loss": 0.3961, "step": 1097 }, { "epoch": 0.09367002218051527, "grad_norm": 1.6558447288014835, "learning_rate": 9.894157941538428e-05, "loss": 0.3836, "step": 1098 }, { "epoch": 0.09375533185463232, "grad_norm": 1.4728806221025965, "learning_rate": 9.893875001235689e-05, "loss": 0.3982, "step": 1099 }, { "epoch": 0.09384064152874937, "grad_norm": 1.7992775155863208, "learning_rate": 9.893591687311598e-05, "loss": 0.4274, "step": 1100 }, { "epoch": 0.09392595120286641, "grad_norm": 1.6533621316883294, "learning_rate": 9.893307999787787e-05, "loss": 0.3966, "step": 1101 }, { "epoch": 0.09401126087698344, "grad_norm": 1.9266611685008936, "learning_rate": 9.893023938685911e-05, "loss": 0.3717, "step": 1102 }, { "epoch": 0.09409657055110049, "grad_norm": 1.6508890155616975, "learning_rate": 9.89273950402766e-05, "loss": 0.4307, "step": 1103 }, { "epoch": 0.09418188022521753, "grad_norm": 1.5506256563553502, "learning_rate": 9.892454695834747e-05, "loss": 0.4003, "step": 1104 }, { "epoch": 0.09426718989933458, "grad_norm": 1.7875866528828586, "learning_rate": 9.892169514128919e-05, "loss": 0.3843, "step": 1105 }, { "epoch": 0.09435249957345163, "grad_norm": 1.5571259205198327, "learning_rate": 9.891883958931943e-05, "loss": 0.4094, "step": 1106 }, { "epoch": 0.09443780924756867, "grad_norm": 1.6311131445525264, "learning_rate": 9.891598030265623e-05, "loss": 0.3992, "step": 1107 }, { "epoch": 0.09452311892168572, "grad_norm": 1.8431034570924505, "learning_rate": 9.891311728151789e-05, "loss": 0.3912, "step": 1108 }, { "epoch": 0.09460842859580276, "grad_norm": 1.6368297457329255, "learning_rate": 9.891025052612295e-05, "loss": 0.4616, "step": 1109 }, { "epoch": 0.09469373826991981, "grad_norm": 1.5346020190699587, "learning_rate": 9.890738003669029e-05, "loss": 0.3699, "step": 1110 }, { "epoch": 0.09477904794403685, "grad_norm": 1.8251247729145925, "learning_rate": 9.890450581343907e-05, "loss": 0.4143, "step": 1111 }, { "epoch": 0.0948643576181539, "grad_norm": 1.5400596525355503, "learning_rate": 9.89016278565887e-05, "loss": 0.4, "step": 1112 }, { "epoch": 0.09494966729227095, "grad_norm": 1.750011716530951, "learning_rate": 9.88987461663589e-05, "loss": 0.4156, "step": 1113 }, { "epoch": 0.09503497696638799, "grad_norm": 1.3417097171645092, "learning_rate": 9.889586074296968e-05, "loss": 0.4015, "step": 1114 }, { "epoch": 0.09512028664050504, "grad_norm": 1.8260464059086732, "learning_rate": 9.889297158664134e-05, "loss": 0.4168, "step": 1115 }, { "epoch": 0.09520559631462208, "grad_norm": 1.6398348404132068, "learning_rate": 9.889007869759442e-05, "loss": 0.4124, "step": 1116 }, { "epoch": 0.09529090598873913, "grad_norm": 1.5294689018377727, "learning_rate": 9.88871820760498e-05, "loss": 0.4227, "step": 1117 }, { "epoch": 0.09537621566285617, "grad_norm": 1.47828839872781, "learning_rate": 9.88842817222286e-05, "loss": 0.4052, "step": 1118 }, { "epoch": 0.09546152533697322, "grad_norm": 1.9175464297247529, "learning_rate": 9.88813776363523e-05, "loss": 0.4136, "step": 1119 }, { "epoch": 0.09554683501109026, "grad_norm": 1.5384366849579048, "learning_rate": 9.887846981864255e-05, "loss": 0.3643, "step": 1120 }, { "epoch": 0.0956321446852073, "grad_norm": 1.8326630306191638, "learning_rate": 9.887555826932136e-05, "loss": 0.4405, "step": 1121 }, { "epoch": 0.09571745435932434, "grad_norm": 1.507893614489566, "learning_rate": 9.887264298861103e-05, "loss": 0.3678, "step": 1122 }, { "epoch": 0.09580276403344139, "grad_norm": 1.5183016550895867, "learning_rate": 9.88697239767341e-05, "loss": 0.411, "step": 1123 }, { "epoch": 0.09588807370755843, "grad_norm": 1.7459468225025743, "learning_rate": 9.886680123391347e-05, "loss": 0.4344, "step": 1124 }, { "epoch": 0.09597338338167548, "grad_norm": 1.62865513757338, "learning_rate": 9.886387476037222e-05, "loss": 0.4144, "step": 1125 }, { "epoch": 0.09605869305579252, "grad_norm": 1.5695072975438684, "learning_rate": 9.886094455633381e-05, "loss": 0.3678, "step": 1126 }, { "epoch": 0.09614400272990957, "grad_norm": 1.3191018959349314, "learning_rate": 9.88580106220219e-05, "loss": 0.4078, "step": 1127 }, { "epoch": 0.09622931240402662, "grad_norm": 1.5473825411983755, "learning_rate": 9.885507295766054e-05, "loss": 0.3629, "step": 1128 }, { "epoch": 0.09631462207814366, "grad_norm": 1.4698459513386102, "learning_rate": 9.885213156347398e-05, "loss": 0.3643, "step": 1129 }, { "epoch": 0.0963999317522607, "grad_norm": 1.4540536845710843, "learning_rate": 9.884918643968676e-05, "loss": 0.3825, "step": 1130 }, { "epoch": 0.09648524142637775, "grad_norm": 1.6116368963154155, "learning_rate": 9.884623758652373e-05, "loss": 0.3739, "step": 1131 }, { "epoch": 0.0965705511004948, "grad_norm": 1.2001706300064445, "learning_rate": 9.884328500421005e-05, "loss": 0.3662, "step": 1132 }, { "epoch": 0.09665586077461184, "grad_norm": 2.0876537791733516, "learning_rate": 9.884032869297111e-05, "loss": 0.3898, "step": 1133 }, { "epoch": 0.09674117044872889, "grad_norm": 1.7509255005539186, "learning_rate": 9.88373686530326e-05, "loss": 0.3524, "step": 1134 }, { "epoch": 0.09682648012284593, "grad_norm": 1.6995866048484765, "learning_rate": 9.883440488462051e-05, "loss": 0.4097, "step": 1135 }, { "epoch": 0.09691178979696298, "grad_norm": 1.7312260815452045, "learning_rate": 9.883143738796113e-05, "loss": 0.4218, "step": 1136 }, { "epoch": 0.09699709947108003, "grad_norm": 1.7035071268024957, "learning_rate": 9.882846616328099e-05, "loss": 0.4213, "step": 1137 }, { "epoch": 0.09708240914519707, "grad_norm": 1.5778563620170611, "learning_rate": 9.882549121080694e-05, "loss": 0.3801, "step": 1138 }, { "epoch": 0.09716771881931412, "grad_norm": 1.4849994349879978, "learning_rate": 9.882251253076606e-05, "loss": 0.3667, "step": 1139 }, { "epoch": 0.09725302849343115, "grad_norm": 1.616663013860502, "learning_rate": 9.881953012338583e-05, "loss": 0.4065, "step": 1140 }, { "epoch": 0.0973383381675482, "grad_norm": 1.581303709422227, "learning_rate": 9.881654398889389e-05, "loss": 0.3948, "step": 1141 }, { "epoch": 0.09742364784166524, "grad_norm": 1.598247241557984, "learning_rate": 9.881355412751822e-05, "loss": 0.4098, "step": 1142 }, { "epoch": 0.09750895751578229, "grad_norm": 1.5234990034159601, "learning_rate": 9.88105605394871e-05, "loss": 0.4214, "step": 1143 }, { "epoch": 0.09759426718989933, "grad_norm": 1.5790295416379894, "learning_rate": 9.880756322502904e-05, "loss": 0.4273, "step": 1144 }, { "epoch": 0.09767957686401638, "grad_norm": 1.985062007175787, "learning_rate": 9.88045621843729e-05, "loss": 0.4206, "step": 1145 }, { "epoch": 0.09776488653813342, "grad_norm": 1.4718279140578403, "learning_rate": 9.880155741774779e-05, "loss": 0.4045, "step": 1146 }, { "epoch": 0.09785019621225047, "grad_norm": 1.7200293894408203, "learning_rate": 9.87985489253831e-05, "loss": 0.4221, "step": 1147 }, { "epoch": 0.09793550588636751, "grad_norm": 1.3375900487801116, "learning_rate": 9.879553670750852e-05, "loss": 0.4364, "step": 1148 }, { "epoch": 0.09802081556048456, "grad_norm": 1.4401269213055474, "learning_rate": 9.879252076435402e-05, "loss": 0.4302, "step": 1149 }, { "epoch": 0.0981061252346016, "grad_norm": 1.5858898437555806, "learning_rate": 9.878950109614982e-05, "loss": 0.3944, "step": 1150 }, { "epoch": 0.09819143490871865, "grad_norm": 1.6086530825477052, "learning_rate": 9.878647770312649e-05, "loss": 0.4261, "step": 1151 }, { "epoch": 0.0982767445828357, "grad_norm": 1.4579444457565012, "learning_rate": 9.878345058551484e-05, "loss": 0.3774, "step": 1152 }, { "epoch": 0.09836205425695274, "grad_norm": 1.6876734891649727, "learning_rate": 9.878041974354598e-05, "loss": 0.4191, "step": 1153 }, { "epoch": 0.09844736393106979, "grad_norm": 1.4242374789313832, "learning_rate": 9.877738517745127e-05, "loss": 0.4332, "step": 1154 }, { "epoch": 0.09853267360518683, "grad_norm": 1.5677374988094595, "learning_rate": 9.877434688746241e-05, "loss": 0.4124, "step": 1155 }, { "epoch": 0.09861798327930388, "grad_norm": 1.5289290608228245, "learning_rate": 9.877130487381137e-05, "loss": 0.3929, "step": 1156 }, { "epoch": 0.09870329295342092, "grad_norm": 1.776384769832928, "learning_rate": 9.876825913673036e-05, "loss": 0.439, "step": 1157 }, { "epoch": 0.09878860262753797, "grad_norm": 1.8766999485775469, "learning_rate": 9.876520967645191e-05, "loss": 0.4472, "step": 1158 }, { "epoch": 0.098873912301655, "grad_norm": 1.6100449880391952, "learning_rate": 9.876215649320885e-05, "loss": 0.431, "step": 1159 }, { "epoch": 0.09895922197577205, "grad_norm": 1.6265385020425152, "learning_rate": 9.875909958723426e-05, "loss": 0.4094, "step": 1160 }, { "epoch": 0.09904453164988909, "grad_norm": 1.2372097831043276, "learning_rate": 9.875603895876154e-05, "loss": 0.3572, "step": 1161 }, { "epoch": 0.09912984132400614, "grad_norm": 1.67016242899159, "learning_rate": 9.875297460802431e-05, "loss": 0.386, "step": 1162 }, { "epoch": 0.09921515099812318, "grad_norm": 1.7102329423419294, "learning_rate": 9.874990653525656e-05, "loss": 0.378, "step": 1163 }, { "epoch": 0.09930046067224023, "grad_norm": 1.3530464184785223, "learning_rate": 9.874683474069248e-05, "loss": 0.3678, "step": 1164 }, { "epoch": 0.09938577034635727, "grad_norm": 1.6979773886964495, "learning_rate": 9.874375922456662e-05, "loss": 0.4041, "step": 1165 }, { "epoch": 0.09947108002047432, "grad_norm": 1.598814006177353, "learning_rate": 9.874067998711378e-05, "loss": 0.4096, "step": 1166 }, { "epoch": 0.09955638969459137, "grad_norm": 1.8828590988787421, "learning_rate": 9.873759702856901e-05, "loss": 0.3822, "step": 1167 }, { "epoch": 0.09964169936870841, "grad_norm": 1.5416888759491065, "learning_rate": 9.873451034916772e-05, "loss": 0.4037, "step": 1168 }, { "epoch": 0.09972700904282546, "grad_norm": 1.5233080686704237, "learning_rate": 9.873141994914553e-05, "loss": 0.3681, "step": 1169 }, { "epoch": 0.0998123187169425, "grad_norm": 1.5702742386896573, "learning_rate": 9.872832582873837e-05, "loss": 0.4108, "step": 1170 }, { "epoch": 0.09989762839105955, "grad_norm": 1.5870196779758186, "learning_rate": 9.87252279881825e-05, "loss": 0.388, "step": 1171 }, { "epoch": 0.0999829380651766, "grad_norm": 1.5261349004011449, "learning_rate": 9.872212642771439e-05, "loss": 0.3727, "step": 1172 }, { "epoch": 0.10006824773929364, "grad_norm": 1.7874588754898342, "learning_rate": 9.871902114757084e-05, "loss": 0.45, "step": 1173 }, { "epoch": 0.10015355741341068, "grad_norm": 1.3776359001595844, "learning_rate": 9.87159121479889e-05, "loss": 0.4098, "step": 1174 }, { "epoch": 0.10023886708752773, "grad_norm": 1.609574907349762, "learning_rate": 9.871279942920595e-05, "loss": 0.355, "step": 1175 }, { "epoch": 0.10032417676164478, "grad_norm": 1.7237507982369746, "learning_rate": 9.870968299145965e-05, "loss": 0.3967, "step": 1176 }, { "epoch": 0.10040948643576182, "grad_norm": 1.397749960899182, "learning_rate": 9.870656283498786e-05, "loss": 0.3615, "step": 1177 }, { "epoch": 0.10049479610987885, "grad_norm": 1.4835170474922745, "learning_rate": 9.870343896002884e-05, "loss": 0.3363, "step": 1178 }, { "epoch": 0.1005801057839959, "grad_norm": 1.8112041839849518, "learning_rate": 9.870031136682107e-05, "loss": 0.3984, "step": 1179 }, { "epoch": 0.10066541545811294, "grad_norm": 1.7234056015036947, "learning_rate": 9.869718005560331e-05, "loss": 0.3869, "step": 1180 }, { "epoch": 0.10075072513222999, "grad_norm": 1.8164199664510667, "learning_rate": 9.869404502661464e-05, "loss": 0.4562, "step": 1181 }, { "epoch": 0.10083603480634704, "grad_norm": 1.8665875863740202, "learning_rate": 9.869090628009438e-05, "loss": 0.4412, "step": 1182 }, { "epoch": 0.10092134448046408, "grad_norm": 1.6523345153406248, "learning_rate": 9.868776381628218e-05, "loss": 0.3965, "step": 1183 }, { "epoch": 0.10100665415458113, "grad_norm": 1.7243462747446727, "learning_rate": 9.868461763541791e-05, "loss": 0.4083, "step": 1184 }, { "epoch": 0.10109196382869817, "grad_norm": 1.8541724244217646, "learning_rate": 9.868146773774183e-05, "loss": 0.3999, "step": 1185 }, { "epoch": 0.10117727350281522, "grad_norm": 1.589616763995373, "learning_rate": 9.867831412349438e-05, "loss": 0.3402, "step": 1186 }, { "epoch": 0.10126258317693226, "grad_norm": 1.7469538334782408, "learning_rate": 9.86751567929163e-05, "loss": 0.4134, "step": 1187 }, { "epoch": 0.10134789285104931, "grad_norm": 1.1933070794117626, "learning_rate": 9.867199574624867e-05, "loss": 0.3199, "step": 1188 }, { "epoch": 0.10143320252516635, "grad_norm": 1.4790102737204052, "learning_rate": 9.86688309837328e-05, "loss": 0.3539, "step": 1189 }, { "epoch": 0.1015185121992834, "grad_norm": 1.6499019824883077, "learning_rate": 9.866566250561033e-05, "loss": 0.4077, "step": 1190 }, { "epoch": 0.10160382187340045, "grad_norm": 1.780945601303489, "learning_rate": 9.866249031212311e-05, "loss": 0.4102, "step": 1191 }, { "epoch": 0.10168913154751749, "grad_norm": 1.545558099158801, "learning_rate": 9.865931440351337e-05, "loss": 0.355, "step": 1192 }, { "epoch": 0.10177444122163454, "grad_norm": 1.3645443898630554, "learning_rate": 9.865613478002354e-05, "loss": 0.3488, "step": 1193 }, { "epoch": 0.10185975089575158, "grad_norm": 1.4253241655490843, "learning_rate": 9.865295144189638e-05, "loss": 0.3726, "step": 1194 }, { "epoch": 0.10194506056986863, "grad_norm": 1.6206468816501767, "learning_rate": 9.864976438937493e-05, "loss": 0.4445, "step": 1195 }, { "epoch": 0.10203037024398567, "grad_norm": 1.5960701901960548, "learning_rate": 9.864657362270247e-05, "loss": 0.4029, "step": 1196 }, { "epoch": 0.10211567991810272, "grad_norm": 1.8306916957907553, "learning_rate": 9.864337914212263e-05, "loss": 0.4351, "step": 1197 }, { "epoch": 0.10220098959221975, "grad_norm": 1.576321090196278, "learning_rate": 9.864018094787928e-05, "loss": 0.4124, "step": 1198 }, { "epoch": 0.1022862992663368, "grad_norm": 1.7461625668066525, "learning_rate": 9.863697904021661e-05, "loss": 0.4567, "step": 1199 }, { "epoch": 0.10237160894045384, "grad_norm": 1.6362002332097125, "learning_rate": 9.863377341937903e-05, "loss": 0.4255, "step": 1200 }, { "epoch": 0.10245691861457089, "grad_norm": 1.51766751491278, "learning_rate": 9.863056408561129e-05, "loss": 0.3907, "step": 1201 }, { "epoch": 0.10254222828868793, "grad_norm": 1.9369109704270973, "learning_rate": 9.86273510391584e-05, "loss": 0.4045, "step": 1202 }, { "epoch": 0.10262753796280498, "grad_norm": 1.5247763203908735, "learning_rate": 9.862413428026567e-05, "loss": 0.3816, "step": 1203 }, { "epoch": 0.10271284763692203, "grad_norm": 1.657787760730636, "learning_rate": 9.862091380917868e-05, "loss": 0.4283, "step": 1204 }, { "epoch": 0.10279815731103907, "grad_norm": 1.6339007679749813, "learning_rate": 9.861768962614328e-05, "loss": 0.4271, "step": 1205 }, { "epoch": 0.10288346698515612, "grad_norm": 1.476235489737626, "learning_rate": 9.861446173140563e-05, "loss": 0.4041, "step": 1206 }, { "epoch": 0.10296877665927316, "grad_norm": 1.6481990551359238, "learning_rate": 9.861123012521219e-05, "loss": 0.4019, "step": 1207 }, { "epoch": 0.10305408633339021, "grad_norm": 1.6030342596894294, "learning_rate": 9.860799480780963e-05, "loss": 0.4216, "step": 1208 }, { "epoch": 0.10313939600750725, "grad_norm": 1.6063339983280471, "learning_rate": 9.860475577944497e-05, "loss": 0.4122, "step": 1209 }, { "epoch": 0.1032247056816243, "grad_norm": 1.44555859919143, "learning_rate": 9.86015130403655e-05, "loss": 0.3574, "step": 1210 }, { "epoch": 0.10331001535574134, "grad_norm": 1.5948460867591736, "learning_rate": 9.859826659081875e-05, "loss": 0.3932, "step": 1211 }, { "epoch": 0.10339532502985839, "grad_norm": 1.7040489560289802, "learning_rate": 9.859501643105262e-05, "loss": 0.4171, "step": 1212 }, { "epoch": 0.10348063470397544, "grad_norm": 1.49418338888966, "learning_rate": 9.859176256131522e-05, "loss": 0.3896, "step": 1213 }, { "epoch": 0.10356594437809248, "grad_norm": 1.6102877500717088, "learning_rate": 9.858850498185496e-05, "loss": 0.3851, "step": 1214 }, { "epoch": 0.10365125405220953, "grad_norm": 1.4004155666199827, "learning_rate": 9.858524369292054e-05, "loss": 0.3586, "step": 1215 }, { "epoch": 0.10373656372632657, "grad_norm": 1.5642049261558193, "learning_rate": 9.858197869476096e-05, "loss": 0.3681, "step": 1216 }, { "epoch": 0.1038218734004436, "grad_norm": 1.6491179304871362, "learning_rate": 9.857870998762544e-05, "loss": 0.4156, "step": 1217 }, { "epoch": 0.10390718307456065, "grad_norm": 1.4927825540146684, "learning_rate": 9.85754375717636e-05, "loss": 0.4352, "step": 1218 }, { "epoch": 0.1039924927486777, "grad_norm": 1.601007579406677, "learning_rate": 9.85721614474252e-05, "loss": 0.4239, "step": 1219 }, { "epoch": 0.10407780242279474, "grad_norm": 1.4816717328105489, "learning_rate": 9.85688816148604e-05, "loss": 0.3902, "step": 1220 }, { "epoch": 0.10416311209691179, "grad_norm": 1.5008161231958201, "learning_rate": 9.856559807431958e-05, "loss": 0.3427, "step": 1221 }, { "epoch": 0.10424842177102883, "grad_norm": 1.5248566106704817, "learning_rate": 9.856231082605342e-05, "loss": 0.3898, "step": 1222 }, { "epoch": 0.10433373144514588, "grad_norm": 1.5842109640404005, "learning_rate": 9.855901987031289e-05, "loss": 0.4067, "step": 1223 }, { "epoch": 0.10441904111926292, "grad_norm": 1.396698877893307, "learning_rate": 9.855572520734923e-05, "loss": 0.3882, "step": 1224 }, { "epoch": 0.10450435079337997, "grad_norm": 1.5758987435910594, "learning_rate": 9.8552426837414e-05, "loss": 0.395, "step": 1225 }, { "epoch": 0.10458966046749701, "grad_norm": 1.5757710491979335, "learning_rate": 9.854912476075897e-05, "loss": 0.3997, "step": 1226 }, { "epoch": 0.10467497014161406, "grad_norm": 1.7864627522969065, "learning_rate": 9.854581897763626e-05, "loss": 0.37, "step": 1227 }, { "epoch": 0.1047602798157311, "grad_norm": 1.5659695061629322, "learning_rate": 9.854250948829824e-05, "loss": 0.4159, "step": 1228 }, { "epoch": 0.10484558948984815, "grad_norm": 1.4632729207793176, "learning_rate": 9.853919629299758e-05, "loss": 0.3841, "step": 1229 }, { "epoch": 0.1049308991639652, "grad_norm": 1.5297284254263077, "learning_rate": 9.853587939198721e-05, "loss": 0.4282, "step": 1230 }, { "epoch": 0.10501620883808224, "grad_norm": 1.9485977804402128, "learning_rate": 9.853255878552036e-05, "loss": 0.4081, "step": 1231 }, { "epoch": 0.10510151851219929, "grad_norm": 1.4308508637112918, "learning_rate": 9.852923447385056e-05, "loss": 0.4181, "step": 1232 }, { "epoch": 0.10518682818631633, "grad_norm": 1.6101219147672696, "learning_rate": 9.85259064572316e-05, "loss": 0.3784, "step": 1233 }, { "epoch": 0.10527213786043338, "grad_norm": 1.4463636545216476, "learning_rate": 9.852257473591754e-05, "loss": 0.3869, "step": 1234 }, { "epoch": 0.10535744753455042, "grad_norm": 1.326822786889586, "learning_rate": 9.851923931016275e-05, "loss": 0.3678, "step": 1235 }, { "epoch": 0.10544275720866746, "grad_norm": 1.5155002503327226, "learning_rate": 9.851590018022187e-05, "loss": 0.425, "step": 1236 }, { "epoch": 0.1055280668827845, "grad_norm": 1.4388646821318285, "learning_rate": 9.851255734634983e-05, "loss": 0.349, "step": 1237 }, { "epoch": 0.10561337655690155, "grad_norm": 1.6247457158598184, "learning_rate": 9.850921080880183e-05, "loss": 0.41, "step": 1238 }, { "epoch": 0.1056986862310186, "grad_norm": 1.6010424258003428, "learning_rate": 9.850586056783334e-05, "loss": 0.4321, "step": 1239 }, { "epoch": 0.10578399590513564, "grad_norm": 1.3526174584539745, "learning_rate": 9.850250662370017e-05, "loss": 0.3773, "step": 1240 }, { "epoch": 0.10586930557925268, "grad_norm": 1.6244951344209075, "learning_rate": 9.849914897665837e-05, "loss": 0.4125, "step": 1241 }, { "epoch": 0.10595461525336973, "grad_norm": 1.7072075026106859, "learning_rate": 9.849578762696426e-05, "loss": 0.4521, "step": 1242 }, { "epoch": 0.10603992492748678, "grad_norm": 1.85888819372561, "learning_rate": 9.849242257487447e-05, "loss": 0.3828, "step": 1243 }, { "epoch": 0.10612523460160382, "grad_norm": 1.4227107180580392, "learning_rate": 9.848905382064591e-05, "loss": 0.396, "step": 1244 }, { "epoch": 0.10621054427572087, "grad_norm": 1.48960781299599, "learning_rate": 9.848568136453577e-05, "loss": 0.4042, "step": 1245 }, { "epoch": 0.10629585394983791, "grad_norm": 1.5669292994107318, "learning_rate": 9.84823052068015e-05, "loss": 0.4538, "step": 1246 }, { "epoch": 0.10638116362395496, "grad_norm": 1.672856408249695, "learning_rate": 9.847892534770086e-05, "loss": 0.4006, "step": 1247 }, { "epoch": 0.106466473298072, "grad_norm": 1.9015568979975386, "learning_rate": 9.84755417874919e-05, "loss": 0.4345, "step": 1248 }, { "epoch": 0.10655178297218905, "grad_norm": 1.6684366682114486, "learning_rate": 9.84721545264329e-05, "loss": 0.4623, "step": 1249 }, { "epoch": 0.1066370926463061, "grad_norm": 1.6426271846307634, "learning_rate": 9.84687635647825e-05, "loss": 0.3938, "step": 1250 }, { "epoch": 0.10672240232042314, "grad_norm": 1.478091139939872, "learning_rate": 9.846536890279956e-05, "loss": 0.351, "step": 1251 }, { "epoch": 0.10680771199454019, "grad_norm": 1.4979277442801942, "learning_rate": 9.846197054074325e-05, "loss": 0.5531, "step": 1252 }, { "epoch": 0.10689302166865723, "grad_norm": 1.4097373107620492, "learning_rate": 9.845856847887302e-05, "loss": 0.3603, "step": 1253 }, { "epoch": 0.10697833134277428, "grad_norm": 1.5448990348738334, "learning_rate": 9.84551627174486e-05, "loss": 0.3629, "step": 1254 }, { "epoch": 0.10706364101689131, "grad_norm": 1.7209932426927315, "learning_rate": 9.845175325672998e-05, "loss": 0.4615, "step": 1255 }, { "epoch": 0.10714895069100835, "grad_norm": 1.5232230793614938, "learning_rate": 9.844834009697748e-05, "loss": 0.3495, "step": 1256 }, { "epoch": 0.1072342603651254, "grad_norm": 2.080378636963071, "learning_rate": 9.844492323845167e-05, "loss": 0.3665, "step": 1257 }, { "epoch": 0.10731957003924245, "grad_norm": 1.5691315874556921, "learning_rate": 9.844150268141338e-05, "loss": 0.3874, "step": 1258 }, { "epoch": 0.10740487971335949, "grad_norm": 1.830554879702474, "learning_rate": 9.843807842612383e-05, "loss": 0.4361, "step": 1259 }, { "epoch": 0.10749018938747654, "grad_norm": 1.3373991348879906, "learning_rate": 9.843465047284434e-05, "loss": 0.3539, "step": 1260 }, { "epoch": 0.10757549906159358, "grad_norm": 1.6267925059520882, "learning_rate": 9.84312188218367e-05, "loss": 0.4305, "step": 1261 }, { "epoch": 0.10766080873571063, "grad_norm": 1.483530948991082, "learning_rate": 9.842778347336286e-05, "loss": 0.3691, "step": 1262 }, { "epoch": 0.10774611840982767, "grad_norm": 1.6209282848066056, "learning_rate": 9.84243444276851e-05, "loss": 0.4131, "step": 1263 }, { "epoch": 0.10783142808394472, "grad_norm": 1.3782218985540946, "learning_rate": 9.842090168506596e-05, "loss": 0.4179, "step": 1264 }, { "epoch": 0.10791673775806176, "grad_norm": 1.626571482286378, "learning_rate": 9.841745524576829e-05, "loss": 0.4032, "step": 1265 }, { "epoch": 0.10800204743217881, "grad_norm": 1.5944011330490948, "learning_rate": 9.84140051100552e-05, "loss": 0.3961, "step": 1266 }, { "epoch": 0.10808735710629586, "grad_norm": 1.1376499936487545, "learning_rate": 9.841055127819009e-05, "loss": 0.3187, "step": 1267 }, { "epoch": 0.1081726667804129, "grad_norm": 1.7387678172599317, "learning_rate": 9.840709375043663e-05, "loss": 0.4055, "step": 1268 }, { "epoch": 0.10825797645452995, "grad_norm": 1.32277986114486, "learning_rate": 9.840363252705882e-05, "loss": 0.3478, "step": 1269 }, { "epoch": 0.10834328612864699, "grad_norm": 1.7420258725037387, "learning_rate": 9.840016760832088e-05, "loss": 0.3884, "step": 1270 }, { "epoch": 0.10842859580276404, "grad_norm": 1.779872696428237, "learning_rate": 9.839669899448733e-05, "loss": 0.3929, "step": 1271 }, { "epoch": 0.10851390547688108, "grad_norm": 1.834160646980156, "learning_rate": 9.8393226685823e-05, "loss": 0.4124, "step": 1272 }, { "epoch": 0.10859921515099813, "grad_norm": 1.621495870330285, "learning_rate": 9.838975068259297e-05, "loss": 0.3922, "step": 1273 }, { "epoch": 0.10868452482511516, "grad_norm": 1.5903085689904624, "learning_rate": 9.838627098506264e-05, "loss": 0.3317, "step": 1274 }, { "epoch": 0.1087698344992322, "grad_norm": 1.5102437238229727, "learning_rate": 9.838278759349762e-05, "loss": 0.3669, "step": 1275 }, { "epoch": 0.10885514417334925, "grad_norm": 1.9501843096365878, "learning_rate": 9.837930050816387e-05, "loss": 0.4103, "step": 1276 }, { "epoch": 0.1089404538474663, "grad_norm": 1.6191298020832865, "learning_rate": 9.837580972932762e-05, "loss": 0.4041, "step": 1277 }, { "epoch": 0.10902576352158334, "grad_norm": 1.9215272418236897, "learning_rate": 9.837231525725537e-05, "loss": 0.4508, "step": 1278 }, { "epoch": 0.10911107319570039, "grad_norm": 1.4453831011050189, "learning_rate": 9.836881709221391e-05, "loss": 0.4119, "step": 1279 }, { "epoch": 0.10919638286981743, "grad_norm": 1.6616362314041955, "learning_rate": 9.836531523447028e-05, "loss": 0.3843, "step": 1280 }, { "epoch": 0.10928169254393448, "grad_norm": 1.7282234591533776, "learning_rate": 9.836180968429185e-05, "loss": 0.4232, "step": 1281 }, { "epoch": 0.10936700221805153, "grad_norm": 1.6753903802332095, "learning_rate": 9.835830044194625e-05, "loss": 0.3911, "step": 1282 }, { "epoch": 0.10945231189216857, "grad_norm": 1.5600835807124547, "learning_rate": 9.835478750770137e-05, "loss": 0.4044, "step": 1283 }, { "epoch": 0.10953762156628562, "grad_norm": 1.633122902893285, "learning_rate": 9.835127088182543e-05, "loss": 0.3767, "step": 1284 }, { "epoch": 0.10962293124040266, "grad_norm": 1.6126620603045914, "learning_rate": 9.834775056458691e-05, "loss": 0.4242, "step": 1285 }, { "epoch": 0.10970824091451971, "grad_norm": 1.4246066336870817, "learning_rate": 9.834422655625454e-05, "loss": 0.3948, "step": 1286 }, { "epoch": 0.10979355058863675, "grad_norm": 1.5370305779120086, "learning_rate": 9.834069885709738e-05, "loss": 0.3835, "step": 1287 }, { "epoch": 0.1098788602627538, "grad_norm": 1.568087619595953, "learning_rate": 9.833716746738474e-05, "loss": 0.3756, "step": 1288 }, { "epoch": 0.10996416993687085, "grad_norm": 1.6278975402916556, "learning_rate": 9.833363238738623e-05, "loss": 0.4073, "step": 1289 }, { "epoch": 0.11004947961098789, "grad_norm": 1.9149982889630912, "learning_rate": 9.833009361737174e-05, "loss": 0.4147, "step": 1290 }, { "epoch": 0.11013478928510494, "grad_norm": 1.5711263948195051, "learning_rate": 9.83265511576114e-05, "loss": 0.3836, "step": 1291 }, { "epoch": 0.11022009895922198, "grad_norm": 1.7813469040595487, "learning_rate": 9.83230050083757e-05, "loss": 0.4483, "step": 1292 }, { "epoch": 0.11030540863333901, "grad_norm": 1.429942540835023, "learning_rate": 9.831945516993537e-05, "loss": 0.4145, "step": 1293 }, { "epoch": 0.11039071830745606, "grad_norm": 1.7153658756312815, "learning_rate": 9.831590164256139e-05, "loss": 0.4321, "step": 1294 }, { "epoch": 0.1104760279815731, "grad_norm": 1.2645690657404158, "learning_rate": 9.831234442652508e-05, "loss": 0.332, "step": 1295 }, { "epoch": 0.11056133765569015, "grad_norm": 2.0952090972487323, "learning_rate": 9.8308783522098e-05, "loss": 0.4872, "step": 1296 }, { "epoch": 0.1106466473298072, "grad_norm": 1.690779572082313, "learning_rate": 9.830521892955202e-05, "loss": 0.4693, "step": 1297 }, { "epoch": 0.11073195700392424, "grad_norm": 1.3825371247147757, "learning_rate": 9.830165064915926e-05, "loss": 0.3593, "step": 1298 }, { "epoch": 0.11081726667804129, "grad_norm": 1.5723969398773467, "learning_rate": 9.829807868119214e-05, "loss": 0.4269, "step": 1299 }, { "epoch": 0.11090257635215833, "grad_norm": 1.5077870421162862, "learning_rate": 9.829450302592338e-05, "loss": 0.3697, "step": 1300 }, { "epoch": 0.11098788602627538, "grad_norm": 1.5145973575717588, "learning_rate": 9.829092368362596e-05, "loss": 0.4178, "step": 1301 }, { "epoch": 0.11107319570039242, "grad_norm": 1.4921497120491412, "learning_rate": 9.828734065457313e-05, "loss": 0.3778, "step": 1302 }, { "epoch": 0.11115850537450947, "grad_norm": 1.5286776676361635, "learning_rate": 9.828375393903842e-05, "loss": 0.4323, "step": 1303 }, { "epoch": 0.11124381504862652, "grad_norm": 2.1955338227375747, "learning_rate": 9.828016353729569e-05, "loss": 0.4829, "step": 1304 }, { "epoch": 0.11132912472274356, "grad_norm": 1.3011428284645377, "learning_rate": 9.827656944961903e-05, "loss": 0.4151, "step": 1305 }, { "epoch": 0.1114144343968606, "grad_norm": 1.4524720068120107, "learning_rate": 9.827297167628283e-05, "loss": 0.3663, "step": 1306 }, { "epoch": 0.11149974407097765, "grad_norm": 1.5432733548966495, "learning_rate": 9.826937021756177e-05, "loss": 0.4006, "step": 1307 }, { "epoch": 0.1115850537450947, "grad_norm": 1.2655838853727643, "learning_rate": 9.82657650737308e-05, "loss": 0.3665, "step": 1308 }, { "epoch": 0.11167036341921174, "grad_norm": 1.3456348684244057, "learning_rate": 9.826215624506516e-05, "loss": 0.376, "step": 1309 }, { "epoch": 0.11175567309332879, "grad_norm": 1.6014489110347983, "learning_rate": 9.825854373184033e-05, "loss": 0.366, "step": 1310 }, { "epoch": 0.11184098276744583, "grad_norm": 1.7080189795589777, "learning_rate": 9.825492753433215e-05, "loss": 0.4203, "step": 1311 }, { "epoch": 0.11192629244156288, "grad_norm": 1.4676621649334691, "learning_rate": 9.825130765281668e-05, "loss": 0.3927, "step": 1312 }, { "epoch": 0.11201160211567991, "grad_norm": 1.2565992676412976, "learning_rate": 9.824768408757028e-05, "loss": 0.377, "step": 1313 }, { "epoch": 0.11209691178979696, "grad_norm": 1.4671182699394456, "learning_rate": 9.824405683886957e-05, "loss": 0.4233, "step": 1314 }, { "epoch": 0.112182221463914, "grad_norm": 1.7965780925090677, "learning_rate": 9.824042590699151e-05, "loss": 0.4421, "step": 1315 }, { "epoch": 0.11226753113803105, "grad_norm": 1.5845552631586517, "learning_rate": 9.823679129221326e-05, "loss": 0.4125, "step": 1316 }, { "epoch": 0.1123528408121481, "grad_norm": 1.7729428311429618, "learning_rate": 9.823315299481235e-05, "loss": 0.4303, "step": 1317 }, { "epoch": 0.11243815048626514, "grad_norm": 1.5337183772948835, "learning_rate": 9.82295110150665e-05, "loss": 0.3794, "step": 1318 }, { "epoch": 0.11252346016038219, "grad_norm": 1.906214791895175, "learning_rate": 9.822586535325378e-05, "loss": 0.4701, "step": 1319 }, { "epoch": 0.11260876983449923, "grad_norm": 1.5446220714704637, "learning_rate": 9.822221600965252e-05, "loss": 0.3735, "step": 1320 }, { "epoch": 0.11269407950861628, "grad_norm": 1.3881394536598284, "learning_rate": 9.821856298454131e-05, "loss": 0.3974, "step": 1321 }, { "epoch": 0.11277938918273332, "grad_norm": 1.5055643824326006, "learning_rate": 9.821490627819904e-05, "loss": 0.3758, "step": 1322 }, { "epoch": 0.11286469885685037, "grad_norm": 1.3416855500750358, "learning_rate": 9.821124589090491e-05, "loss": 0.3732, "step": 1323 }, { "epoch": 0.11295000853096741, "grad_norm": 1.5067982004499716, "learning_rate": 9.820758182293834e-05, "loss": 0.4057, "step": 1324 }, { "epoch": 0.11303531820508446, "grad_norm": 1.4089252879394418, "learning_rate": 9.820391407457907e-05, "loss": 0.3883, "step": 1325 }, { "epoch": 0.1131206278792015, "grad_norm": 1.2497344688675995, "learning_rate": 9.820024264610713e-05, "loss": 0.3774, "step": 1326 }, { "epoch": 0.11320593755331855, "grad_norm": 1.6150411469499941, "learning_rate": 9.81965675378028e-05, "loss": 0.373, "step": 1327 }, { "epoch": 0.1132912472274356, "grad_norm": 1.577665054222673, "learning_rate": 9.819288874994663e-05, "loss": 0.4036, "step": 1328 }, { "epoch": 0.11337655690155264, "grad_norm": 2.0629577128837333, "learning_rate": 9.818920628281953e-05, "loss": 0.5256, "step": 1329 }, { "epoch": 0.11346186657566969, "grad_norm": 1.7705437666561015, "learning_rate": 9.818552013670258e-05, "loss": 0.4064, "step": 1330 }, { "epoch": 0.11354717624978673, "grad_norm": 1.2225871203687202, "learning_rate": 9.818183031187724e-05, "loss": 0.3702, "step": 1331 }, { "epoch": 0.11363248592390376, "grad_norm": 1.6248428562182757, "learning_rate": 9.81781368086252e-05, "loss": 0.3859, "step": 1332 }, { "epoch": 0.11371779559802081, "grad_norm": 1.9829095550663949, "learning_rate": 9.817443962722843e-05, "loss": 0.4094, "step": 1333 }, { "epoch": 0.11380310527213786, "grad_norm": 1.6569095234132327, "learning_rate": 9.817073876796918e-05, "loss": 0.3798, "step": 1334 }, { "epoch": 0.1138884149462549, "grad_norm": 1.4432439899910845, "learning_rate": 9.816703423113001e-05, "loss": 0.3956, "step": 1335 }, { "epoch": 0.11397372462037195, "grad_norm": 1.5501731622013872, "learning_rate": 9.816332601699374e-05, "loss": 0.4374, "step": 1336 }, { "epoch": 0.11405903429448899, "grad_norm": 1.5937735611913058, "learning_rate": 9.815961412584347e-05, "loss": 0.3575, "step": 1337 }, { "epoch": 0.11414434396860604, "grad_norm": 1.5248041527484413, "learning_rate": 9.815589855796259e-05, "loss": 0.3712, "step": 1338 }, { "epoch": 0.11422965364272308, "grad_norm": 1.5531794455980519, "learning_rate": 9.815217931363475e-05, "loss": 0.4478, "step": 1339 }, { "epoch": 0.11431496331684013, "grad_norm": 1.887019415803533, "learning_rate": 9.814845639314387e-05, "loss": 0.4183, "step": 1340 }, { "epoch": 0.11440027299095717, "grad_norm": 1.2627957592491499, "learning_rate": 9.814472979677424e-05, "loss": 0.3484, "step": 1341 }, { "epoch": 0.11448558266507422, "grad_norm": 1.7990233473818644, "learning_rate": 9.814099952481032e-05, "loss": 0.433, "step": 1342 }, { "epoch": 0.11457089233919127, "grad_norm": 1.5176635089748975, "learning_rate": 9.81372655775369e-05, "loss": 0.3927, "step": 1343 }, { "epoch": 0.11465620201330831, "grad_norm": 1.6394191852044078, "learning_rate": 9.813352795523907e-05, "loss": 0.455, "step": 1344 }, { "epoch": 0.11474151168742536, "grad_norm": 1.7374117959227313, "learning_rate": 9.812978665820216e-05, "loss": 0.4564, "step": 1345 }, { "epoch": 0.1148268213615424, "grad_norm": 1.7858034792831694, "learning_rate": 9.812604168671178e-05, "loss": 0.3965, "step": 1346 }, { "epoch": 0.11491213103565945, "grad_norm": 1.846893217510088, "learning_rate": 9.812229304105387e-05, "loss": 0.4022, "step": 1347 }, { "epoch": 0.1149974407097765, "grad_norm": 1.256660502534179, "learning_rate": 9.811854072151461e-05, "loss": 0.3399, "step": 1348 }, { "epoch": 0.11508275038389354, "grad_norm": 1.6968271547319982, "learning_rate": 9.811478472838046e-05, "loss": 0.4116, "step": 1349 }, { "epoch": 0.11516806005801059, "grad_norm": 1.6642709520902645, "learning_rate": 9.811102506193818e-05, "loss": 0.3786, "step": 1350 }, { "epoch": 0.11525336973212762, "grad_norm": 1.3958334946513675, "learning_rate": 9.810726172247482e-05, "loss": 0.3523, "step": 1351 }, { "epoch": 0.11533867940624466, "grad_norm": 1.373098315473216, "learning_rate": 9.810349471027765e-05, "loss": 0.3804, "step": 1352 }, { "epoch": 0.11542398908036171, "grad_norm": 1.516880734326338, "learning_rate": 9.809972402563427e-05, "loss": 0.3788, "step": 1353 }, { "epoch": 0.11550929875447875, "grad_norm": 1.55027397564645, "learning_rate": 9.809594966883259e-05, "loss": 0.4251, "step": 1354 }, { "epoch": 0.1155946084285958, "grad_norm": 1.3693968441995499, "learning_rate": 9.809217164016071e-05, "loss": 0.3769, "step": 1355 }, { "epoch": 0.11567991810271284, "grad_norm": 1.2341805316954158, "learning_rate": 9.80883899399071e-05, "loss": 0.3276, "step": 1356 }, { "epoch": 0.11576522777682989, "grad_norm": 1.5374970971056818, "learning_rate": 9.808460456836047e-05, "loss": 0.3999, "step": 1357 }, { "epoch": 0.11585053745094694, "grad_norm": 1.5645725999947389, "learning_rate": 9.808081552580978e-05, "loss": 0.3875, "step": 1358 }, { "epoch": 0.11593584712506398, "grad_norm": 1.7284793483569694, "learning_rate": 9.807702281254432e-05, "loss": 0.3936, "step": 1359 }, { "epoch": 0.11602115679918103, "grad_norm": 1.5586725277220796, "learning_rate": 9.807322642885369e-05, "loss": 0.3567, "step": 1360 }, { "epoch": 0.11610646647329807, "grad_norm": 1.6864249018967754, "learning_rate": 9.806942637502764e-05, "loss": 0.4268, "step": 1361 }, { "epoch": 0.11619177614741512, "grad_norm": 1.45904091517369, "learning_rate": 9.806562265135635e-05, "loss": 0.3764, "step": 1362 }, { "epoch": 0.11627708582153216, "grad_norm": 2.0317401001291127, "learning_rate": 9.806181525813019e-05, "loss": 0.4017, "step": 1363 }, { "epoch": 0.11636239549564921, "grad_norm": 1.4815695502453996, "learning_rate": 9.805800419563982e-05, "loss": 0.4218, "step": 1364 }, { "epoch": 0.11644770516976626, "grad_norm": 1.5189653534343968, "learning_rate": 9.805418946417622e-05, "loss": 0.3713, "step": 1365 }, { "epoch": 0.1165330148438833, "grad_norm": 1.7889270981103493, "learning_rate": 9.805037106403062e-05, "loss": 0.4397, "step": 1366 }, { "epoch": 0.11661832451800035, "grad_norm": 1.6257278572931584, "learning_rate": 9.804654899549451e-05, "loss": 0.367, "step": 1367 }, { "epoch": 0.11670363419211739, "grad_norm": 1.6412421382765152, "learning_rate": 9.804272325885971e-05, "loss": 0.4149, "step": 1368 }, { "epoch": 0.11678894386623444, "grad_norm": 1.584753562349142, "learning_rate": 9.80388938544183e-05, "loss": 0.3904, "step": 1369 }, { "epoch": 0.11687425354035147, "grad_norm": 1.5889911286746807, "learning_rate": 9.803506078246262e-05, "loss": 0.4006, "step": 1370 }, { "epoch": 0.11695956321446851, "grad_norm": 1.667042387891971, "learning_rate": 9.803122404328529e-05, "loss": 0.3638, "step": 1371 }, { "epoch": 0.11704487288858556, "grad_norm": 1.4141709091191823, "learning_rate": 9.802738363717928e-05, "loss": 0.3669, "step": 1372 }, { "epoch": 0.1171301825627026, "grad_norm": 1.3680804143326395, "learning_rate": 9.80235395644377e-05, "loss": 0.3411, "step": 1373 }, { "epoch": 0.11721549223681965, "grad_norm": 1.7187270423049488, "learning_rate": 9.80196918253541e-05, "loss": 0.4354, "step": 1374 }, { "epoch": 0.1173008019109367, "grad_norm": 1.5847461152938551, "learning_rate": 9.80158404202222e-05, "loss": 0.3928, "step": 1375 }, { "epoch": 0.11738611158505374, "grad_norm": 1.4948959934455248, "learning_rate": 9.801198534933603e-05, "loss": 0.3393, "step": 1376 }, { "epoch": 0.11747142125917079, "grad_norm": 1.5565831880969498, "learning_rate": 9.800812661298992e-05, "loss": 0.3798, "step": 1377 }, { "epoch": 0.11755673093328783, "grad_norm": 2.070211731959517, "learning_rate": 9.800426421147845e-05, "loss": 0.4498, "step": 1378 }, { "epoch": 0.11764204060740488, "grad_norm": 1.563526426779397, "learning_rate": 9.800039814509653e-05, "loss": 0.4104, "step": 1379 }, { "epoch": 0.11772735028152193, "grad_norm": 1.8858104596652112, "learning_rate": 9.799652841413927e-05, "loss": 0.3933, "step": 1380 }, { "epoch": 0.11781265995563897, "grad_norm": 1.7562107502634245, "learning_rate": 9.799265501890211e-05, "loss": 0.3926, "step": 1381 }, { "epoch": 0.11789796962975602, "grad_norm": 1.5138740256899084, "learning_rate": 9.798877795968078e-05, "loss": 0.3721, "step": 1382 }, { "epoch": 0.11798327930387306, "grad_norm": 1.5016082248612883, "learning_rate": 9.798489723677126e-05, "loss": 0.3616, "step": 1383 }, { "epoch": 0.11806858897799011, "grad_norm": 1.3448811582794293, "learning_rate": 9.798101285046983e-05, "loss": 0.3442, "step": 1384 }, { "epoch": 0.11815389865210715, "grad_norm": 1.5657330443359392, "learning_rate": 9.797712480107304e-05, "loss": 0.3917, "step": 1385 }, { "epoch": 0.1182392083262242, "grad_norm": 1.3038337335369345, "learning_rate": 9.797323308887773e-05, "loss": 0.3936, "step": 1386 }, { "epoch": 0.11832451800034124, "grad_norm": 1.25113435772954, "learning_rate": 9.796933771418098e-05, "loss": 0.3694, "step": 1387 }, { "epoch": 0.11840982767445829, "grad_norm": 1.4249662395292801, "learning_rate": 9.796543867728023e-05, "loss": 0.4161, "step": 1388 }, { "epoch": 0.11849513734857532, "grad_norm": 1.6731172028945198, "learning_rate": 9.79615359784731e-05, "loss": 0.4024, "step": 1389 }, { "epoch": 0.11858044702269237, "grad_norm": 1.549849992076673, "learning_rate": 9.795762961805758e-05, "loss": 0.4639, "step": 1390 }, { "epoch": 0.11866575669680941, "grad_norm": 1.533167669773171, "learning_rate": 9.795371959633189e-05, "loss": 0.3811, "step": 1391 }, { "epoch": 0.11875106637092646, "grad_norm": 1.7096656008339188, "learning_rate": 9.794980591359453e-05, "loss": 0.3541, "step": 1392 }, { "epoch": 0.1188363760450435, "grad_norm": 1.480641377633408, "learning_rate": 9.79458885701443e-05, "loss": 0.3706, "step": 1393 }, { "epoch": 0.11892168571916055, "grad_norm": 1.4680215368276182, "learning_rate": 9.794196756628025e-05, "loss": 0.4078, "step": 1394 }, { "epoch": 0.1190069953932776, "grad_norm": 2.0333404348072217, "learning_rate": 9.793804290230176e-05, "loss": 0.4307, "step": 1395 }, { "epoch": 0.11909230506739464, "grad_norm": 1.5056631947538524, "learning_rate": 9.793411457850841e-05, "loss": 0.3994, "step": 1396 }, { "epoch": 0.11917761474151169, "grad_norm": 1.5480438594321893, "learning_rate": 9.793018259520014e-05, "loss": 0.3985, "step": 1397 }, { "epoch": 0.11926292441562873, "grad_norm": 1.5129375423895168, "learning_rate": 9.792624695267714e-05, "loss": 0.4052, "step": 1398 }, { "epoch": 0.11934823408974578, "grad_norm": 1.323289843612551, "learning_rate": 9.792230765123987e-05, "loss": 0.3458, "step": 1399 }, { "epoch": 0.11943354376386282, "grad_norm": 1.5912265621835522, "learning_rate": 9.791836469118905e-05, "loss": 0.3862, "step": 1400 }, { "epoch": 0.11951885343797987, "grad_norm": 1.4382672128401612, "learning_rate": 9.791441807282573e-05, "loss": 0.337, "step": 1401 }, { "epoch": 0.11960416311209691, "grad_norm": 1.7203557229913673, "learning_rate": 9.791046779645121e-05, "loss": 0.4037, "step": 1402 }, { "epoch": 0.11968947278621396, "grad_norm": 1.7282535332724058, "learning_rate": 9.790651386236707e-05, "loss": 0.381, "step": 1403 }, { "epoch": 0.119774782460331, "grad_norm": 1.5214418300022805, "learning_rate": 9.790255627087517e-05, "loss": 0.4411, "step": 1404 }, { "epoch": 0.11986009213444805, "grad_norm": 1.4565843812206998, "learning_rate": 9.789859502227766e-05, "loss": 0.4098, "step": 1405 }, { "epoch": 0.1199454018085651, "grad_norm": 1.5294891665256767, "learning_rate": 9.789463011687694e-05, "loss": 0.3994, "step": 1406 }, { "epoch": 0.12003071148268214, "grad_norm": 1.7070665857773644, "learning_rate": 9.789066155497573e-05, "loss": 0.3829, "step": 1407 }, { "epoch": 0.12011602115679917, "grad_norm": 1.3438002111235776, "learning_rate": 9.788668933687699e-05, "loss": 0.3306, "step": 1408 }, { "epoch": 0.12020133083091622, "grad_norm": 1.2411904802294773, "learning_rate": 9.7882713462884e-05, "loss": 0.3644, "step": 1409 }, { "epoch": 0.12028664050503327, "grad_norm": 1.3312865382091927, "learning_rate": 9.78787339333003e-05, "loss": 0.3601, "step": 1410 }, { "epoch": 0.12037195017915031, "grad_norm": 1.7941095051723244, "learning_rate": 9.787475074842967e-05, "loss": 0.4069, "step": 1411 }, { "epoch": 0.12045725985326736, "grad_norm": 1.5671632987347308, "learning_rate": 9.787076390857623e-05, "loss": 0.3886, "step": 1412 }, { "epoch": 0.1205425695273844, "grad_norm": 1.6347027656018862, "learning_rate": 9.786677341404436e-05, "loss": 0.4117, "step": 1413 }, { "epoch": 0.12062787920150145, "grad_norm": 1.7290905702078447, "learning_rate": 9.78627792651387e-05, "loss": 0.4242, "step": 1414 }, { "epoch": 0.1207131888756185, "grad_norm": 1.392462298911892, "learning_rate": 9.785878146216417e-05, "loss": 0.3774, "step": 1415 }, { "epoch": 0.12079849854973554, "grad_norm": 1.4643828213621637, "learning_rate": 9.7854780005426e-05, "loss": 0.3564, "step": 1416 }, { "epoch": 0.12088380822385258, "grad_norm": 1.4858958434033112, "learning_rate": 9.78507748952297e-05, "loss": 0.3103, "step": 1417 }, { "epoch": 0.12096911789796963, "grad_norm": 1.5458534026921007, "learning_rate": 9.7846766131881e-05, "loss": 0.3479, "step": 1418 }, { "epoch": 0.12105442757208668, "grad_norm": 1.8892902044561743, "learning_rate": 9.784275371568596e-05, "loss": 0.4358, "step": 1419 }, { "epoch": 0.12113973724620372, "grad_norm": 1.5890666740310146, "learning_rate": 9.783873764695091e-05, "loss": 0.3982, "step": 1420 }, { "epoch": 0.12122504692032077, "grad_norm": 1.604010149103148, "learning_rate": 9.783471792598247e-05, "loss": 0.4008, "step": 1421 }, { "epoch": 0.12131035659443781, "grad_norm": 1.8727600705455663, "learning_rate": 9.783069455308749e-05, "loss": 0.4557, "step": 1422 }, { "epoch": 0.12139566626855486, "grad_norm": 1.6510737162306268, "learning_rate": 9.782666752857317e-05, "loss": 0.4079, "step": 1423 }, { "epoch": 0.1214809759426719, "grad_norm": 1.5009278765434824, "learning_rate": 9.782263685274692e-05, "loss": 0.3328, "step": 1424 }, { "epoch": 0.12156628561678895, "grad_norm": 1.5649615924741378, "learning_rate": 9.781860252591648e-05, "loss": 0.4275, "step": 1425 }, { "epoch": 0.121651595290906, "grad_norm": 1.5671154519099313, "learning_rate": 9.781456454838986e-05, "loss": 0.3866, "step": 1426 }, { "epoch": 0.12173690496502303, "grad_norm": 1.6897110758639955, "learning_rate": 9.78105229204753e-05, "loss": 0.4132, "step": 1427 }, { "epoch": 0.12182221463914007, "grad_norm": 1.4904269872657692, "learning_rate": 9.780647764248139e-05, "loss": 0.3743, "step": 1428 }, { "epoch": 0.12190752431325712, "grad_norm": 1.5501675484453845, "learning_rate": 9.780242871471696e-05, "loss": 0.4233, "step": 1429 }, { "epoch": 0.12199283398737416, "grad_norm": 1.4752503069547778, "learning_rate": 9.779837613749111e-05, "loss": 0.3641, "step": 1430 }, { "epoch": 0.12207814366149121, "grad_norm": 1.6140711146195872, "learning_rate": 9.779431991111326e-05, "loss": 0.3929, "step": 1431 }, { "epoch": 0.12216345333560825, "grad_norm": 2.040490012501748, "learning_rate": 9.779026003589304e-05, "loss": 0.4819, "step": 1432 }, { "epoch": 0.1222487630097253, "grad_norm": 1.6620806860773185, "learning_rate": 9.778619651214042e-05, "loss": 0.4326, "step": 1433 }, { "epoch": 0.12233407268384235, "grad_norm": 1.6001500297777866, "learning_rate": 9.778212934016566e-05, "loss": 0.3999, "step": 1434 }, { "epoch": 0.12241938235795939, "grad_norm": 1.5982144227145807, "learning_rate": 9.777805852027922e-05, "loss": 0.3907, "step": 1435 }, { "epoch": 0.12250469203207644, "grad_norm": 1.3395235885815728, "learning_rate": 9.777398405279192e-05, "loss": 0.3718, "step": 1436 }, { "epoch": 0.12259000170619348, "grad_norm": 1.4839040711904956, "learning_rate": 9.77699059380148e-05, "loss": 0.351, "step": 1437 }, { "epoch": 0.12267531138031053, "grad_norm": 1.3512848585433523, "learning_rate": 9.77658241762592e-05, "loss": 0.4055, "step": 1438 }, { "epoch": 0.12276062105442757, "grad_norm": 1.865034578018158, "learning_rate": 9.776173876783677e-05, "loss": 0.4125, "step": 1439 }, { "epoch": 0.12284593072854462, "grad_norm": 1.5832017877376847, "learning_rate": 9.775764971305936e-05, "loss": 0.4029, "step": 1440 }, { "epoch": 0.12293124040266167, "grad_norm": 1.447282098632627, "learning_rate": 9.77535570122392e-05, "loss": 0.4195, "step": 1441 }, { "epoch": 0.12301655007677871, "grad_norm": 1.7733736404819216, "learning_rate": 9.774946066568873e-05, "loss": 0.3934, "step": 1442 }, { "epoch": 0.12310185975089576, "grad_norm": 1.5119936986267137, "learning_rate": 9.774536067372066e-05, "loss": 0.3577, "step": 1443 }, { "epoch": 0.1231871694250128, "grad_norm": 1.7515478781805298, "learning_rate": 9.774125703664805e-05, "loss": 0.3861, "step": 1444 }, { "epoch": 0.12327247909912985, "grad_norm": 1.9438946317483903, "learning_rate": 9.773714975478414e-05, "loss": 0.4221, "step": 1445 }, { "epoch": 0.1233577887732469, "grad_norm": 1.6469404464544188, "learning_rate": 9.773303882844253e-05, "loss": 0.4408, "step": 1446 }, { "epoch": 0.12344309844736392, "grad_norm": 1.746491866841178, "learning_rate": 9.772892425793705e-05, "loss": 0.393, "step": 1447 }, { "epoch": 0.12352840812148097, "grad_norm": 1.4388684932060216, "learning_rate": 9.772480604358183e-05, "loss": 0.349, "step": 1448 }, { "epoch": 0.12361371779559802, "grad_norm": 1.2782041119479068, "learning_rate": 9.772068418569129e-05, "loss": 0.3875, "step": 1449 }, { "epoch": 0.12369902746971506, "grad_norm": 1.2713470619229221, "learning_rate": 9.77165586845801e-05, "loss": 0.3234, "step": 1450 }, { "epoch": 0.12378433714383211, "grad_norm": 1.429246344537149, "learning_rate": 9.771242954056321e-05, "loss": 0.4058, "step": 1451 }, { "epoch": 0.12386964681794915, "grad_norm": 1.6173095657057255, "learning_rate": 9.770829675395587e-05, "loss": 0.4018, "step": 1452 }, { "epoch": 0.1239549564920662, "grad_norm": 1.5435682460030622, "learning_rate": 9.770416032507361e-05, "loss": 0.4136, "step": 1453 }, { "epoch": 0.12404026616618324, "grad_norm": 1.596134421660813, "learning_rate": 9.77000202542322e-05, "loss": 0.3907, "step": 1454 }, { "epoch": 0.12412557584030029, "grad_norm": 1.1978802993340332, "learning_rate": 9.769587654174772e-05, "loss": 0.3193, "step": 1455 }, { "epoch": 0.12421088551441734, "grad_norm": 1.6306147774565631, "learning_rate": 9.769172918793652e-05, "loss": 0.3818, "step": 1456 }, { "epoch": 0.12429619518853438, "grad_norm": 1.5478235284330377, "learning_rate": 9.768757819311523e-05, "loss": 0.3722, "step": 1457 }, { "epoch": 0.12438150486265143, "grad_norm": 1.5035739283804668, "learning_rate": 9.768342355760076e-05, "loss": 0.3329, "step": 1458 }, { "epoch": 0.12446681453676847, "grad_norm": 1.285413536591431, "learning_rate": 9.767926528171028e-05, "loss": 0.3727, "step": 1459 }, { "epoch": 0.12455212421088552, "grad_norm": 1.6558369535353157, "learning_rate": 9.767510336576127e-05, "loss": 0.4485, "step": 1460 }, { "epoch": 0.12463743388500256, "grad_norm": 1.5067199543725904, "learning_rate": 9.767093781007147e-05, "loss": 0.3998, "step": 1461 }, { "epoch": 0.12472274355911961, "grad_norm": 1.5887459378674527, "learning_rate": 9.766676861495888e-05, "loss": 0.3748, "step": 1462 }, { "epoch": 0.12480805323323665, "grad_norm": 1.5760560779034518, "learning_rate": 9.766259578074181e-05, "loss": 0.401, "step": 1463 }, { "epoch": 0.1248933629073537, "grad_norm": 1.6180061074461092, "learning_rate": 9.765841930773883e-05, "loss": 0.4405, "step": 1464 }, { "epoch": 0.12497867258147075, "grad_norm": 1.8237025574127537, "learning_rate": 9.76542391962688e-05, "loss": 0.4425, "step": 1465 }, { "epoch": 0.1250639822555878, "grad_norm": 1.7074011220829344, "learning_rate": 9.765005544665084e-05, "loss": 0.4195, "step": 1466 }, { "epoch": 0.12514929192970484, "grad_norm": 1.7611288702891998, "learning_rate": 9.764586805920434e-05, "loss": 0.3624, "step": 1467 }, { "epoch": 0.12523460160382188, "grad_norm": 1.4342176031051357, "learning_rate": 9.764167703424904e-05, "loss": 0.3708, "step": 1468 }, { "epoch": 0.12531991127793893, "grad_norm": 1.5893757949209082, "learning_rate": 9.763748237210484e-05, "loss": 0.4177, "step": 1469 }, { "epoch": 0.12540522095205597, "grad_norm": 1.2936634606060615, "learning_rate": 9.763328407309201e-05, "loss": 0.3636, "step": 1470 }, { "epoch": 0.12549053062617302, "grad_norm": 1.4438267675029803, "learning_rate": 9.762908213753107e-05, "loss": 0.3984, "step": 1471 }, { "epoch": 0.12557584030029006, "grad_norm": 1.3709420231609104, "learning_rate": 9.76248765657428e-05, "loss": 0.3751, "step": 1472 }, { "epoch": 0.1256611499744071, "grad_norm": 1.824257827764997, "learning_rate": 9.762066735804829e-05, "loss": 0.4407, "step": 1473 }, { "epoch": 0.12574645964852416, "grad_norm": 1.5246516158096788, "learning_rate": 9.761645451476889e-05, "loss": 0.41, "step": 1474 }, { "epoch": 0.12583176932264117, "grad_norm": 1.361712933613824, "learning_rate": 9.761223803622621e-05, "loss": 0.3395, "step": 1475 }, { "epoch": 0.12591707899675822, "grad_norm": 1.4738651241670113, "learning_rate": 9.760801792274217e-05, "loss": 0.3761, "step": 1476 }, { "epoch": 0.12600238867087526, "grad_norm": 1.6480749373342263, "learning_rate": 9.760379417463894e-05, "loss": 0.3703, "step": 1477 }, { "epoch": 0.1260876983449923, "grad_norm": 1.8161841472073759, "learning_rate": 9.759956679223901e-05, "loss": 0.4654, "step": 1478 }, { "epoch": 0.12617300801910936, "grad_norm": 1.6333503384905121, "learning_rate": 9.759533577586508e-05, "loss": 0.4029, "step": 1479 }, { "epoch": 0.1262583176932264, "grad_norm": 1.4446982238226294, "learning_rate": 9.75911011258402e-05, "loss": 0.3604, "step": 1480 }, { "epoch": 0.12634362736734345, "grad_norm": 1.5024968664863396, "learning_rate": 9.758686284248764e-05, "loss": 0.3595, "step": 1481 }, { "epoch": 0.1264289370414605, "grad_norm": 1.263981682418331, "learning_rate": 9.758262092613099e-05, "loss": 0.3151, "step": 1482 }, { "epoch": 0.12651424671557754, "grad_norm": 1.4856943792015616, "learning_rate": 9.757837537709407e-05, "loss": 0.4196, "step": 1483 }, { "epoch": 0.12659955638969458, "grad_norm": 1.4825089621313197, "learning_rate": 9.757412619570104e-05, "loss": 0.4018, "step": 1484 }, { "epoch": 0.12668486606381163, "grad_norm": 1.681969658077868, "learning_rate": 9.756987338227626e-05, "loss": 0.44, "step": 1485 }, { "epoch": 0.12677017573792868, "grad_norm": 1.6115365928119423, "learning_rate": 9.756561693714446e-05, "loss": 0.4438, "step": 1486 }, { "epoch": 0.12685548541204572, "grad_norm": 1.6005427393690164, "learning_rate": 9.756135686063055e-05, "loss": 0.357, "step": 1487 }, { "epoch": 0.12694079508616277, "grad_norm": 1.4131844676689043, "learning_rate": 9.755709315305978e-05, "loss": 0.3402, "step": 1488 }, { "epoch": 0.1270261047602798, "grad_norm": 1.3849568524163902, "learning_rate": 9.755282581475769e-05, "loss": 0.4218, "step": 1489 }, { "epoch": 0.12711141443439686, "grad_norm": 1.5737764480222753, "learning_rate": 9.754855484605003e-05, "loss": 0.3872, "step": 1490 }, { "epoch": 0.1271967241085139, "grad_norm": 1.7885521573125818, "learning_rate": 9.754428024726288e-05, "loss": 0.4186, "step": 1491 }, { "epoch": 0.12728203378263095, "grad_norm": 1.6499210569974947, "learning_rate": 9.754000201872258e-05, "loss": 0.376, "step": 1492 }, { "epoch": 0.127367343456748, "grad_norm": 1.5879211798211474, "learning_rate": 9.753572016075576e-05, "loss": 0.4299, "step": 1493 }, { "epoch": 0.12745265313086504, "grad_norm": 1.567251914194325, "learning_rate": 9.753143467368931e-05, "loss": 0.3762, "step": 1494 }, { "epoch": 0.12753796280498209, "grad_norm": 1.3961765616970476, "learning_rate": 9.75271455578504e-05, "loss": 0.3412, "step": 1495 }, { "epoch": 0.12762327247909913, "grad_norm": 1.4381261374126315, "learning_rate": 9.752285281356648e-05, "loss": 0.3891, "step": 1496 }, { "epoch": 0.12770858215321618, "grad_norm": 1.5721309627890916, "learning_rate": 9.75185564411653e-05, "loss": 0.3701, "step": 1497 }, { "epoch": 0.12779389182733322, "grad_norm": 1.5504362169301111, "learning_rate": 9.751425644097482e-05, "loss": 0.3562, "step": 1498 }, { "epoch": 0.12787920150145027, "grad_norm": 1.367048768770485, "learning_rate": 9.750995281332338e-05, "loss": 0.4052, "step": 1499 }, { "epoch": 0.1279645111755673, "grad_norm": 1.4070903386529137, "learning_rate": 9.750564555853951e-05, "loss": 0.3999, "step": 1500 }, { "epoch": 0.12804982084968436, "grad_norm": 1.3398583859014075, "learning_rate": 9.750133467695203e-05, "loss": 0.3647, "step": 1501 }, { "epoch": 0.1281351305238014, "grad_norm": 1.3337591157010007, "learning_rate": 9.749702016889008e-05, "loss": 0.4257, "step": 1502 }, { "epoch": 0.12822044019791845, "grad_norm": 1.9198342630259528, "learning_rate": 9.749270203468304e-05, "loss": 0.4227, "step": 1503 }, { "epoch": 0.1283057498720355, "grad_norm": 1.5473648990948863, "learning_rate": 9.748838027466057e-05, "loss": 0.3908, "step": 1504 }, { "epoch": 0.12839105954615254, "grad_norm": 1.5833449195973293, "learning_rate": 9.748405488915262e-05, "loss": 0.3693, "step": 1505 }, { "epoch": 0.1284763692202696, "grad_norm": 1.342533092836798, "learning_rate": 9.747972587848942e-05, "loss": 0.353, "step": 1506 }, { "epoch": 0.12856167889438663, "grad_norm": 1.5022365267970412, "learning_rate": 9.747539324300143e-05, "loss": 0.392, "step": 1507 }, { "epoch": 0.12864698856850368, "grad_norm": 1.5777930486563907, "learning_rate": 9.747105698301949e-05, "loss": 0.3399, "step": 1508 }, { "epoch": 0.12873229824262072, "grad_norm": 1.5824866122241452, "learning_rate": 9.746671709887458e-05, "loss": 0.3955, "step": 1509 }, { "epoch": 0.12881760791673777, "grad_norm": 1.5697293693400625, "learning_rate": 9.746237359089805e-05, "loss": 0.3539, "step": 1510 }, { "epoch": 0.12890291759085482, "grad_norm": 1.571500565997564, "learning_rate": 9.745802645942153e-05, "loss": 0.3563, "step": 1511 }, { "epoch": 0.12898822726497186, "grad_norm": 1.5906725323662974, "learning_rate": 9.745367570477688e-05, "loss": 0.4021, "step": 1512 }, { "epoch": 0.1290735369390889, "grad_norm": 1.5922063195610967, "learning_rate": 9.744932132729625e-05, "loss": 0.354, "step": 1513 }, { "epoch": 0.12915884661320592, "grad_norm": 1.5012171098979705, "learning_rate": 9.744496332731208e-05, "loss": 0.3795, "step": 1514 }, { "epoch": 0.12924415628732297, "grad_norm": 1.4660436979757423, "learning_rate": 9.74406017051571e-05, "loss": 0.3711, "step": 1515 }, { "epoch": 0.12932946596144002, "grad_norm": 1.4756413568658944, "learning_rate": 9.743623646116427e-05, "loss": 0.4036, "step": 1516 }, { "epoch": 0.12941477563555706, "grad_norm": 1.785309298776367, "learning_rate": 9.743186759566685e-05, "loss": 0.441, "step": 1517 }, { "epoch": 0.1295000853096741, "grad_norm": 1.7423199658861144, "learning_rate": 9.742749510899841e-05, "loss": 0.3893, "step": 1518 }, { "epoch": 0.12958539498379115, "grad_norm": 1.483659832985735, "learning_rate": 9.742311900149275e-05, "loss": 0.3683, "step": 1519 }, { "epoch": 0.1296707046579082, "grad_norm": 1.491168248253157, "learning_rate": 9.741873927348394e-05, "loss": 0.3911, "step": 1520 }, { "epoch": 0.12975601433202524, "grad_norm": 1.6730510105079273, "learning_rate": 9.741435592530638e-05, "loss": 0.4091, "step": 1521 }, { "epoch": 0.1298413240061423, "grad_norm": 1.8453110374296304, "learning_rate": 9.74099689572947e-05, "loss": 0.4428, "step": 1522 }, { "epoch": 0.12992663368025933, "grad_norm": 1.768646760225816, "learning_rate": 9.740557836978384e-05, "loss": 0.3531, "step": 1523 }, { "epoch": 0.13001194335437638, "grad_norm": 1.761215374879008, "learning_rate": 9.740118416310897e-05, "loss": 0.3612, "step": 1524 }, { "epoch": 0.13009725302849343, "grad_norm": 1.5741509737707076, "learning_rate": 9.739678633760559e-05, "loss": 0.3971, "step": 1525 }, { "epoch": 0.13018256270261047, "grad_norm": 1.2696393245765147, "learning_rate": 9.739238489360942e-05, "loss": 0.3709, "step": 1526 }, { "epoch": 0.13026787237672752, "grad_norm": 1.4781102540649655, "learning_rate": 9.738797983145654e-05, "loss": 0.3876, "step": 1527 }, { "epoch": 0.13035318205084456, "grad_norm": 1.5952274729518061, "learning_rate": 9.738357115148319e-05, "loss": 0.365, "step": 1528 }, { "epoch": 0.1304384917249616, "grad_norm": 1.7382878335549485, "learning_rate": 9.737915885402599e-05, "loss": 0.4067, "step": 1529 }, { "epoch": 0.13052380139907865, "grad_norm": 1.3814086684840199, "learning_rate": 9.737474293942177e-05, "loss": 0.3739, "step": 1530 }, { "epoch": 0.1306091110731957, "grad_norm": 1.7928153577026764, "learning_rate": 9.737032340800769e-05, "loss": 0.4148, "step": 1531 }, { "epoch": 0.13069442074731275, "grad_norm": 1.5636450577246201, "learning_rate": 9.736590026012114e-05, "loss": 0.3615, "step": 1532 }, { "epoch": 0.1307797304214298, "grad_norm": 1.5841369931677496, "learning_rate": 9.736147349609981e-05, "loss": 0.4126, "step": 1533 }, { "epoch": 0.13086504009554684, "grad_norm": 1.6114706080469992, "learning_rate": 9.735704311628166e-05, "loss": 0.3413, "step": 1534 }, { "epoch": 0.13095034976966388, "grad_norm": 1.47692398409785, "learning_rate": 9.735260912100492e-05, "loss": 0.3962, "step": 1535 }, { "epoch": 0.13103565944378093, "grad_norm": 1.4724535410470485, "learning_rate": 9.73481715106081e-05, "loss": 0.3407, "step": 1536 }, { "epoch": 0.13112096911789797, "grad_norm": 1.2301186684219814, "learning_rate": 9.734373028543001e-05, "loss": 0.3667, "step": 1537 }, { "epoch": 0.13120627879201502, "grad_norm": 1.5605454999896164, "learning_rate": 9.733928544580967e-05, "loss": 0.3916, "step": 1538 }, { "epoch": 0.13129158846613206, "grad_norm": 1.4132023508328218, "learning_rate": 9.733483699208645e-05, "loss": 0.3506, "step": 1539 }, { "epoch": 0.1313768981402491, "grad_norm": 1.6374237799923463, "learning_rate": 9.733038492459998e-05, "loss": 0.3884, "step": 1540 }, { "epoch": 0.13146220781436616, "grad_norm": 1.871951677028812, "learning_rate": 9.732592924369013e-05, "loss": 0.3579, "step": 1541 }, { "epoch": 0.1315475174884832, "grad_norm": 1.5069780165316182, "learning_rate": 9.732146994969706e-05, "loss": 0.4181, "step": 1542 }, { "epoch": 0.13163282716260025, "grad_norm": 1.829945497378669, "learning_rate": 9.731700704296126e-05, "loss": 0.3979, "step": 1543 }, { "epoch": 0.1317181368367173, "grad_norm": 1.4623005266360931, "learning_rate": 9.731254052382337e-05, "loss": 0.3763, "step": 1544 }, { "epoch": 0.13180344651083434, "grad_norm": 1.888701918953774, "learning_rate": 9.730807039262447e-05, "loss": 0.4246, "step": 1545 }, { "epoch": 0.13188875618495138, "grad_norm": 1.5003550427186978, "learning_rate": 9.730359664970576e-05, "loss": 0.3891, "step": 1546 }, { "epoch": 0.13197406585906843, "grad_norm": 1.5920284173049009, "learning_rate": 9.729911929540883e-05, "loss": 0.4094, "step": 1547 }, { "epoch": 0.13205937553318547, "grad_norm": 1.594850347308505, "learning_rate": 9.729463833007548e-05, "loss": 0.4157, "step": 1548 }, { "epoch": 0.13214468520730252, "grad_norm": 1.488308963868031, "learning_rate": 9.729015375404782e-05, "loss": 0.4131, "step": 1549 }, { "epoch": 0.13222999488141957, "grad_norm": 1.4786718731941308, "learning_rate": 9.728566556766823e-05, "loss": 0.4447, "step": 1550 }, { "epoch": 0.1323153045555366, "grad_norm": 1.534318302140307, "learning_rate": 9.728117377127933e-05, "loss": 0.3615, "step": 1551 }, { "epoch": 0.13240061422965363, "grad_norm": 1.2244956223801744, "learning_rate": 9.727667836522407e-05, "loss": 0.3359, "step": 1552 }, { "epoch": 0.13248592390377067, "grad_norm": 1.450340145598042, "learning_rate": 9.727217934984566e-05, "loss": 0.3769, "step": 1553 }, { "epoch": 0.13257123357788772, "grad_norm": 1.6672014173303975, "learning_rate": 9.726767672548755e-05, "loss": 0.4607, "step": 1554 }, { "epoch": 0.13265654325200477, "grad_norm": 1.4017748940755728, "learning_rate": 9.72631704924935e-05, "loss": 0.3915, "step": 1555 }, { "epoch": 0.1327418529261218, "grad_norm": 1.4735602480425447, "learning_rate": 9.725866065120755e-05, "loss": 0.3916, "step": 1556 }, { "epoch": 0.13282716260023886, "grad_norm": 1.6625291090403116, "learning_rate": 9.725414720197399e-05, "loss": 0.3961, "step": 1557 }, { "epoch": 0.1329124722743559, "grad_norm": 1.4465492524492045, "learning_rate": 9.72496301451374e-05, "loss": 0.4073, "step": 1558 }, { "epoch": 0.13299778194847295, "grad_norm": 1.498129234387553, "learning_rate": 9.724510948104262e-05, "loss": 0.3665, "step": 1559 }, { "epoch": 0.13308309162259, "grad_norm": 1.5714869116815533, "learning_rate": 9.72405852100348e-05, "loss": 0.4128, "step": 1560 }, { "epoch": 0.13316840129670704, "grad_norm": 1.573304774757937, "learning_rate": 9.723605733245933e-05, "loss": 0.3679, "step": 1561 }, { "epoch": 0.13325371097082409, "grad_norm": 1.6077316189692306, "learning_rate": 9.72315258486619e-05, "loss": 0.3689, "step": 1562 }, { "epoch": 0.13333902064494113, "grad_norm": 1.697039168760585, "learning_rate": 9.722699075898846e-05, "loss": 0.4172, "step": 1563 }, { "epoch": 0.13342433031905818, "grad_norm": 1.607776255148098, "learning_rate": 9.722245206378524e-05, "loss": 0.3814, "step": 1564 }, { "epoch": 0.13350963999317522, "grad_norm": 1.7161995431216364, "learning_rate": 9.721790976339874e-05, "loss": 0.4171, "step": 1565 }, { "epoch": 0.13359494966729227, "grad_norm": 1.4952024190118773, "learning_rate": 9.721336385817575e-05, "loss": 0.3882, "step": 1566 }, { "epoch": 0.1336802593414093, "grad_norm": 1.4891292529039406, "learning_rate": 9.720881434846332e-05, "loss": 0.3755, "step": 1567 }, { "epoch": 0.13376556901552636, "grad_norm": 1.7163639499207755, "learning_rate": 9.720426123460877e-05, "loss": 0.4413, "step": 1568 }, { "epoch": 0.1338508786896434, "grad_norm": 1.5465474408060744, "learning_rate": 9.719970451695973e-05, "loss": 0.3535, "step": 1569 }, { "epoch": 0.13393618836376045, "grad_norm": 1.947138163260366, "learning_rate": 9.719514419586406e-05, "loss": 0.4453, "step": 1570 }, { "epoch": 0.1340214980378775, "grad_norm": 1.515007758545602, "learning_rate": 9.719058027166994e-05, "loss": 0.3522, "step": 1571 }, { "epoch": 0.13410680771199454, "grad_norm": 1.7937304638586276, "learning_rate": 9.718601274472578e-05, "loss": 0.3663, "step": 1572 }, { "epoch": 0.1341921173861116, "grad_norm": 1.6592282562601417, "learning_rate": 9.71814416153803e-05, "loss": 0.4191, "step": 1573 }, { "epoch": 0.13427742706022863, "grad_norm": 1.561381206512026, "learning_rate": 9.717686688398246e-05, "loss": 0.3897, "step": 1574 }, { "epoch": 0.13436273673434568, "grad_norm": 1.6200932736094864, "learning_rate": 9.717228855088154e-05, "loss": 0.4082, "step": 1575 }, { "epoch": 0.13444804640846272, "grad_norm": 1.486219364553145, "learning_rate": 9.716770661642707e-05, "loss": 0.3711, "step": 1576 }, { "epoch": 0.13453335608257977, "grad_norm": 1.5782930643462578, "learning_rate": 9.716312108096884e-05, "loss": 0.4289, "step": 1577 }, { "epoch": 0.13461866575669681, "grad_norm": 1.7026244661652268, "learning_rate": 9.715853194485693e-05, "loss": 0.3886, "step": 1578 }, { "epoch": 0.13470397543081386, "grad_norm": 1.6934863684178536, "learning_rate": 9.715393920844171e-05, "loss": 0.428, "step": 1579 }, { "epoch": 0.1347892851049309, "grad_norm": 1.3443702774284125, "learning_rate": 9.714934287207382e-05, "loss": 0.4021, "step": 1580 }, { "epoch": 0.13487459477904795, "grad_norm": 1.5960870698778773, "learning_rate": 9.714474293610415e-05, "loss": 0.3655, "step": 1581 }, { "epoch": 0.134959904453165, "grad_norm": 1.4121312394302725, "learning_rate": 9.714013940088388e-05, "loss": 0.4129, "step": 1582 }, { "epoch": 0.13504521412728204, "grad_norm": 1.5280788048000664, "learning_rate": 9.713553226676446e-05, "loss": 0.4115, "step": 1583 }, { "epoch": 0.1351305238013991, "grad_norm": 1.581400879910264, "learning_rate": 9.713092153409765e-05, "loss": 0.4302, "step": 1584 }, { "epoch": 0.13521583347551613, "grad_norm": 1.5942429172980066, "learning_rate": 9.712630720323542e-05, "loss": 0.4381, "step": 1585 }, { "epoch": 0.13530114314963318, "grad_norm": 1.3782079295423924, "learning_rate": 9.712168927453007e-05, "loss": 0.3542, "step": 1586 }, { "epoch": 0.13538645282375023, "grad_norm": 1.5804443689371208, "learning_rate": 9.711706774833414e-05, "loss": 0.3953, "step": 1587 }, { "epoch": 0.13547176249786727, "grad_norm": 1.5370988275924657, "learning_rate": 9.711244262500048e-05, "loss": 0.3804, "step": 1588 }, { "epoch": 0.13555707217198432, "grad_norm": 1.4338352924974864, "learning_rate": 9.710781390488216e-05, "loss": 0.374, "step": 1589 }, { "epoch": 0.13564238184610133, "grad_norm": 1.5930805670317492, "learning_rate": 9.710318158833261e-05, "loss": 0.3501, "step": 1590 }, { "epoch": 0.13572769152021838, "grad_norm": 1.5877368660180724, "learning_rate": 9.709854567570542e-05, "loss": 0.4376, "step": 1591 }, { "epoch": 0.13581300119433543, "grad_norm": 1.0334568685603185, "learning_rate": 9.709390616735456e-05, "loss": 0.3566, "step": 1592 }, { "epoch": 0.13589831086845247, "grad_norm": 1.5402392231310809, "learning_rate": 9.708926306363422e-05, "loss": 0.3672, "step": 1593 }, { "epoch": 0.13598362054256952, "grad_norm": 1.413782334366891, "learning_rate": 9.708461636489889e-05, "loss": 0.3586, "step": 1594 }, { "epoch": 0.13606893021668656, "grad_norm": 1.677039041190965, "learning_rate": 9.70799660715033e-05, "loss": 0.4089, "step": 1595 }, { "epoch": 0.1361542398908036, "grad_norm": 1.4222034452278338, "learning_rate": 9.707531218380248e-05, "loss": 0.3866, "step": 1596 }, { "epoch": 0.13623954956492065, "grad_norm": 1.664371157393566, "learning_rate": 9.707065470215174e-05, "loss": 0.3903, "step": 1597 }, { "epoch": 0.1363248592390377, "grad_norm": 1.7475394935129018, "learning_rate": 9.706599362690663e-05, "loss": 0.3706, "step": 1598 }, { "epoch": 0.13641016891315474, "grad_norm": 1.612262099111962, "learning_rate": 9.706132895842304e-05, "loss": 0.4023, "step": 1599 }, { "epoch": 0.1364954785872718, "grad_norm": 1.5461674623039467, "learning_rate": 9.705666069705704e-05, "loss": 0.3791, "step": 1600 }, { "epoch": 0.13658078826138884, "grad_norm": 1.6907313456202153, "learning_rate": 9.705198884316507e-05, "loss": 0.3877, "step": 1601 }, { "epoch": 0.13666609793550588, "grad_norm": 1.525447438997784, "learning_rate": 9.70473133971038e-05, "loss": 0.4148, "step": 1602 }, { "epoch": 0.13675140760962293, "grad_norm": 1.5611736008312378, "learning_rate": 9.704263435923014e-05, "loss": 0.4306, "step": 1603 }, { "epoch": 0.13683671728373997, "grad_norm": 1.449287433487613, "learning_rate": 9.703795172990134e-05, "loss": 0.4418, "step": 1604 }, { "epoch": 0.13692202695785702, "grad_norm": 1.177079608652529, "learning_rate": 9.703326550947487e-05, "loss": 0.3532, "step": 1605 }, { "epoch": 0.13700733663197406, "grad_norm": 1.7926195257497117, "learning_rate": 9.702857569830852e-05, "loss": 0.4104, "step": 1606 }, { "epoch": 0.1370926463060911, "grad_norm": 1.4820402556343328, "learning_rate": 9.702388229676033e-05, "loss": 0.415, "step": 1607 }, { "epoch": 0.13717795598020815, "grad_norm": 1.5030486597038368, "learning_rate": 9.701918530518861e-05, "loss": 0.3911, "step": 1608 }, { "epoch": 0.1372632656543252, "grad_norm": 1.2380458475660985, "learning_rate": 9.701448472395197e-05, "loss": 0.3332, "step": 1609 }, { "epoch": 0.13734857532844225, "grad_norm": 1.405281326664139, "learning_rate": 9.700978055340923e-05, "loss": 0.3698, "step": 1610 }, { "epoch": 0.1374338850025593, "grad_norm": 1.557745910874254, "learning_rate": 9.700507279391956e-05, "loss": 0.4017, "step": 1611 }, { "epoch": 0.13751919467667634, "grad_norm": 1.4181388143486113, "learning_rate": 9.700036144584237e-05, "loss": 0.3906, "step": 1612 }, { "epoch": 0.13760450435079338, "grad_norm": 1.4680458166368218, "learning_rate": 9.699564650953734e-05, "loss": 0.3921, "step": 1613 }, { "epoch": 0.13768981402491043, "grad_norm": 1.5542180104846908, "learning_rate": 9.699092798536445e-05, "loss": 0.3987, "step": 1614 }, { "epoch": 0.13777512369902747, "grad_norm": 1.479456573158392, "learning_rate": 9.698620587368389e-05, "loss": 0.4284, "step": 1615 }, { "epoch": 0.13786043337314452, "grad_norm": 1.4117523206256366, "learning_rate": 9.698148017485621e-05, "loss": 0.3511, "step": 1616 }, { "epoch": 0.13794574304726157, "grad_norm": 1.7409387055836059, "learning_rate": 9.697675088924218e-05, "loss": 0.4472, "step": 1617 }, { "epoch": 0.1380310527213786, "grad_norm": 1.8808194450346787, "learning_rate": 9.697201801720286e-05, "loss": 0.4465, "step": 1618 }, { "epoch": 0.13811636239549566, "grad_norm": 1.775310048707822, "learning_rate": 9.696728155909956e-05, "loss": 0.3565, "step": 1619 }, { "epoch": 0.1382016720696127, "grad_norm": 1.4005306498433059, "learning_rate": 9.69625415152939e-05, "loss": 0.3694, "step": 1620 }, { "epoch": 0.13828698174372975, "grad_norm": 1.381158906858611, "learning_rate": 9.695779788614776e-05, "loss": 0.3608, "step": 1621 }, { "epoch": 0.1383722914178468, "grad_norm": 1.832833315835978, "learning_rate": 9.695305067202328e-05, "loss": 0.4196, "step": 1622 }, { "epoch": 0.13845760109196384, "grad_norm": 1.7449584957094701, "learning_rate": 9.694829987328288e-05, "loss": 0.3868, "step": 1623 }, { "epoch": 0.13854291076608088, "grad_norm": 1.4890825812635886, "learning_rate": 9.694354549028927e-05, "loss": 0.4003, "step": 1624 }, { "epoch": 0.13862822044019793, "grad_norm": 1.4389838148509788, "learning_rate": 9.693878752340544e-05, "loss": 0.3557, "step": 1625 }, { "epoch": 0.13871353011431498, "grad_norm": 1.67151331331575, "learning_rate": 9.69340259729946e-05, "loss": 0.3995, "step": 1626 }, { "epoch": 0.13879883978843202, "grad_norm": 1.2361005480216145, "learning_rate": 9.692926083942029e-05, "loss": 0.3371, "step": 1627 }, { "epoch": 0.13888414946254907, "grad_norm": 1.5116859452189984, "learning_rate": 9.692449212304629e-05, "loss": 0.3891, "step": 1628 }, { "epoch": 0.13896945913666608, "grad_norm": 1.138804511431061, "learning_rate": 9.691971982423669e-05, "loss": 0.3532, "step": 1629 }, { "epoch": 0.13905476881078313, "grad_norm": 1.5043253525203089, "learning_rate": 9.691494394335579e-05, "loss": 0.3904, "step": 1630 }, { "epoch": 0.13914007848490018, "grad_norm": 1.73904861485781, "learning_rate": 9.691016448076824e-05, "loss": 0.3932, "step": 1631 }, { "epoch": 0.13922538815901722, "grad_norm": 1.2704258979586254, "learning_rate": 9.690538143683891e-05, "loss": 0.3418, "step": 1632 }, { "epoch": 0.13931069783313427, "grad_norm": 1.6981197617962145, "learning_rate": 9.690059481193295e-05, "loss": 0.3788, "step": 1633 }, { "epoch": 0.1393960075072513, "grad_norm": 1.5150205842435813, "learning_rate": 9.689580460641581e-05, "loss": 0.3809, "step": 1634 }, { "epoch": 0.13948131718136836, "grad_norm": 1.3116340505348978, "learning_rate": 9.68910108206532e-05, "loss": 0.4094, "step": 1635 }, { "epoch": 0.1395666268554854, "grad_norm": 1.6424958234527764, "learning_rate": 9.688621345501109e-05, "loss": 0.383, "step": 1636 }, { "epoch": 0.13965193652960245, "grad_norm": 1.4290833577749193, "learning_rate": 9.688141250985574e-05, "loss": 0.3187, "step": 1637 }, { "epoch": 0.1397372462037195, "grad_norm": 1.640384765475006, "learning_rate": 9.687660798555367e-05, "loss": 0.3707, "step": 1638 }, { "epoch": 0.13982255587783654, "grad_norm": 1.5952529552380332, "learning_rate": 9.687179988247167e-05, "loss": 0.3988, "step": 1639 }, { "epoch": 0.1399078655519536, "grad_norm": 1.6646528316244216, "learning_rate": 9.686698820097684e-05, "loss": 0.3842, "step": 1640 }, { "epoch": 0.13999317522607063, "grad_norm": 1.3820627194924846, "learning_rate": 9.686217294143652e-05, "loss": 0.3952, "step": 1641 }, { "epoch": 0.14007848490018768, "grad_norm": 1.1956011355102751, "learning_rate": 9.68573541042183e-05, "loss": 0.3292, "step": 1642 }, { "epoch": 0.14016379457430472, "grad_norm": 1.8223178933713358, "learning_rate": 9.68525316896901e-05, "loss": 0.4261, "step": 1643 }, { "epoch": 0.14024910424842177, "grad_norm": 1.5863897120495658, "learning_rate": 9.684770569822008e-05, "loss": 0.3831, "step": 1644 }, { "epoch": 0.14033441392253881, "grad_norm": 1.6616413968280979, "learning_rate": 9.684287613017669e-05, "loss": 0.4377, "step": 1645 }, { "epoch": 0.14041972359665586, "grad_norm": 1.6190177401489059, "learning_rate": 9.683804298592862e-05, "loss": 0.3691, "step": 1646 }, { "epoch": 0.1405050332707729, "grad_norm": 1.5264711350711628, "learning_rate": 9.683320626584486e-05, "loss": 0.3523, "step": 1647 }, { "epoch": 0.14059034294488995, "grad_norm": 1.561883728564702, "learning_rate": 9.682836597029468e-05, "loss": 0.4053, "step": 1648 }, { "epoch": 0.140675652619007, "grad_norm": 1.4997118037250283, "learning_rate": 9.68235220996476e-05, "loss": 0.3447, "step": 1649 }, { "epoch": 0.14076096229312404, "grad_norm": 1.367225777226452, "learning_rate": 9.681867465427344e-05, "loss": 0.3324, "step": 1650 }, { "epoch": 0.1408462719672411, "grad_norm": 1.5050431351167353, "learning_rate": 9.681382363454224e-05, "loss": 0.3792, "step": 1651 }, { "epoch": 0.14093158164135813, "grad_norm": 1.5880713928855237, "learning_rate": 9.680896904082439e-05, "loss": 0.348, "step": 1652 }, { "epoch": 0.14101689131547518, "grad_norm": 1.5057708674642218, "learning_rate": 9.68041108734905e-05, "loss": 0.3379, "step": 1653 }, { "epoch": 0.14110220098959222, "grad_norm": 1.7301400520436687, "learning_rate": 9.679924913291145e-05, "loss": 0.3727, "step": 1654 }, { "epoch": 0.14118751066370927, "grad_norm": 1.3814422370081434, "learning_rate": 9.679438381945843e-05, "loss": 0.3718, "step": 1655 }, { "epoch": 0.14127282033782632, "grad_norm": 1.659390477110099, "learning_rate": 9.678951493350286e-05, "loss": 0.3357, "step": 1656 }, { "epoch": 0.14135813001194336, "grad_norm": 1.8907876063007454, "learning_rate": 9.678464247541648e-05, "loss": 0.4025, "step": 1657 }, { "epoch": 0.1414434396860604, "grad_norm": 1.5118949846553016, "learning_rate": 9.677976644557125e-05, "loss": 0.3498, "step": 1658 }, { "epoch": 0.14152874936017745, "grad_norm": 1.6497480055532063, "learning_rate": 9.677488684433944e-05, "loss": 0.381, "step": 1659 }, { "epoch": 0.1416140590342945, "grad_norm": 1.4297140984067351, "learning_rate": 9.677000367209356e-05, "loss": 0.3534, "step": 1660 }, { "epoch": 0.14169936870841154, "grad_norm": 1.3458052154926359, "learning_rate": 9.676511692920647e-05, "loss": 0.4, "step": 1661 }, { "epoch": 0.1417846783825286, "grad_norm": 1.646494366489924, "learning_rate": 9.67602266160512e-05, "loss": 0.3945, "step": 1662 }, { "epoch": 0.14186998805664564, "grad_norm": 1.6223821093456916, "learning_rate": 9.675533273300111e-05, "loss": 0.398, "step": 1663 }, { "epoch": 0.14195529773076268, "grad_norm": 1.5667024175474185, "learning_rate": 9.675043528042982e-05, "loss": 0.3729, "step": 1664 }, { "epoch": 0.14204060740487973, "grad_norm": 1.2793758921207807, "learning_rate": 9.674553425871123e-05, "loss": 0.3531, "step": 1665 }, { "epoch": 0.14212591707899677, "grad_norm": 1.6327573045956492, "learning_rate": 9.67406296682195e-05, "loss": 0.419, "step": 1666 }, { "epoch": 0.1422112267531138, "grad_norm": 1.6668238406774813, "learning_rate": 9.673572150932909e-05, "loss": 0.3788, "step": 1667 }, { "epoch": 0.14229653642723084, "grad_norm": 1.5758329308299346, "learning_rate": 9.673080978241468e-05, "loss": 0.3755, "step": 1668 }, { "epoch": 0.14238184610134788, "grad_norm": 1.6088291464708675, "learning_rate": 9.672589448785128e-05, "loss": 0.3768, "step": 1669 }, { "epoch": 0.14246715577546493, "grad_norm": 1.5091788470519045, "learning_rate": 9.672097562601414e-05, "loss": 0.3577, "step": 1670 }, { "epoch": 0.14255246544958197, "grad_norm": 1.5052430391985423, "learning_rate": 9.671605319727876e-05, "loss": 0.3385, "step": 1671 }, { "epoch": 0.14263777512369902, "grad_norm": 1.334734815912357, "learning_rate": 9.6711127202021e-05, "loss": 0.3836, "step": 1672 }, { "epoch": 0.14272308479781606, "grad_norm": 1.6075240667033628, "learning_rate": 9.670619764061688e-05, "loss": 0.3529, "step": 1673 }, { "epoch": 0.1428083944719331, "grad_norm": 1.7703707221642828, "learning_rate": 9.670126451344277e-05, "loss": 0.3997, "step": 1674 }, { "epoch": 0.14289370414605015, "grad_norm": 1.533964126177392, "learning_rate": 9.66963278208753e-05, "loss": 0.4162, "step": 1675 }, { "epoch": 0.1429790138201672, "grad_norm": 1.476583813710259, "learning_rate": 9.669138756329133e-05, "loss": 0.4067, "step": 1676 }, { "epoch": 0.14306432349428425, "grad_norm": 1.4193113922016716, "learning_rate": 9.668644374106805e-05, "loss": 0.3244, "step": 1677 }, { "epoch": 0.1431496331684013, "grad_norm": 1.6095376534348946, "learning_rate": 9.668149635458287e-05, "loss": 0.3656, "step": 1678 }, { "epoch": 0.14323494284251834, "grad_norm": 1.6445687260287902, "learning_rate": 9.667654540421351e-05, "loss": 0.4261, "step": 1679 }, { "epoch": 0.14332025251663538, "grad_norm": 1.6519379534580574, "learning_rate": 9.667159089033794e-05, "loss": 0.3484, "step": 1680 }, { "epoch": 0.14340556219075243, "grad_norm": 1.4769887157672892, "learning_rate": 9.666663281333443e-05, "loss": 0.3702, "step": 1681 }, { "epoch": 0.14349087186486947, "grad_norm": 1.550751654585159, "learning_rate": 9.666167117358149e-05, "loss": 0.3688, "step": 1682 }, { "epoch": 0.14357618153898652, "grad_norm": 1.7811706257585727, "learning_rate": 9.66567059714579e-05, "loss": 0.4493, "step": 1683 }, { "epoch": 0.14366149121310356, "grad_norm": 1.6598863090510807, "learning_rate": 9.665173720734277e-05, "loss": 0.3824, "step": 1684 }, { "epoch": 0.1437468008872206, "grad_norm": 1.6450085887278902, "learning_rate": 9.66467648816154e-05, "loss": 0.4062, "step": 1685 }, { "epoch": 0.14383211056133766, "grad_norm": 1.2081004718593613, "learning_rate": 9.66417889946554e-05, "loss": 0.3916, "step": 1686 }, { "epoch": 0.1439174202354547, "grad_norm": 1.517122060226807, "learning_rate": 9.663680954684268e-05, "loss": 0.381, "step": 1687 }, { "epoch": 0.14400272990957175, "grad_norm": 1.4202328096197847, "learning_rate": 9.663182653855737e-05, "loss": 0.3432, "step": 1688 }, { "epoch": 0.1440880395836888, "grad_norm": 1.353979562402305, "learning_rate": 9.662683997017991e-05, "loss": 0.3655, "step": 1689 }, { "epoch": 0.14417334925780584, "grad_norm": 1.2832574237732859, "learning_rate": 9.6621849842091e-05, "loss": 0.3568, "step": 1690 }, { "epoch": 0.14425865893192288, "grad_norm": 1.759166416521776, "learning_rate": 9.661685615467157e-05, "loss": 0.421, "step": 1691 }, { "epoch": 0.14434396860603993, "grad_norm": 1.4712657080421907, "learning_rate": 9.661185890830293e-05, "loss": 0.3836, "step": 1692 }, { "epoch": 0.14442927828015698, "grad_norm": 1.3180180764818294, "learning_rate": 9.660685810336654e-05, "loss": 0.3775, "step": 1693 }, { "epoch": 0.14451458795427402, "grad_norm": 1.6689204553992965, "learning_rate": 9.660185374024421e-05, "loss": 0.4319, "step": 1694 }, { "epoch": 0.14459989762839107, "grad_norm": 1.8767671522721343, "learning_rate": 9.659684581931798e-05, "loss": 0.4769, "step": 1695 }, { "epoch": 0.1446852073025081, "grad_norm": 1.1487441918492973, "learning_rate": 9.65918343409702e-05, "loss": 0.3641, "step": 1696 }, { "epoch": 0.14477051697662516, "grad_norm": 1.426492174134447, "learning_rate": 9.658681930558345e-05, "loss": 0.3907, "step": 1697 }, { "epoch": 0.1448558266507422, "grad_norm": 1.343007436997308, "learning_rate": 9.658180071354061e-05, "loss": 0.3415, "step": 1698 }, { "epoch": 0.14494113632485925, "grad_norm": 1.2960709066832559, "learning_rate": 9.657677856522483e-05, "loss": 0.3703, "step": 1699 }, { "epoch": 0.1450264459989763, "grad_norm": 1.4444001488536842, "learning_rate": 9.657175286101949e-05, "loss": 0.3942, "step": 1700 }, { "epoch": 0.14511175567309334, "grad_norm": 1.75717680562683, "learning_rate": 9.656672360130832e-05, "loss": 0.4542, "step": 1701 }, { "epoch": 0.14519706534721039, "grad_norm": 1.4568337314280622, "learning_rate": 9.656169078647526e-05, "loss": 0.3509, "step": 1702 }, { "epoch": 0.14528237502132743, "grad_norm": 1.3374519909950235, "learning_rate": 9.655665441690453e-05, "loss": 0.3435, "step": 1703 }, { "epoch": 0.14536768469544448, "grad_norm": 1.4683551663447267, "learning_rate": 9.655161449298062e-05, "loss": 0.3478, "step": 1704 }, { "epoch": 0.1454529943695615, "grad_norm": 1.5266726841592815, "learning_rate": 9.654657101508836e-05, "loss": 0.4242, "step": 1705 }, { "epoch": 0.14553830404367854, "grad_norm": 1.473775584975126, "learning_rate": 9.654152398361271e-05, "loss": 0.3427, "step": 1706 }, { "epoch": 0.14562361371779559, "grad_norm": 1.3406683605802416, "learning_rate": 9.653647339893905e-05, "loss": 0.4031, "step": 1707 }, { "epoch": 0.14570892339191263, "grad_norm": 1.667479753843185, "learning_rate": 9.653141926145292e-05, "loss": 0.3689, "step": 1708 }, { "epoch": 0.14579423306602968, "grad_norm": 2.777221333613233, "learning_rate": 9.652636157154022e-05, "loss": 0.4743, "step": 1709 }, { "epoch": 0.14587954274014672, "grad_norm": 1.8237932186744834, "learning_rate": 9.652130032958704e-05, "loss": 0.3778, "step": 1710 }, { "epoch": 0.14596485241426377, "grad_norm": 1.6145583448219605, "learning_rate": 9.651623553597981e-05, "loss": 0.4175, "step": 1711 }, { "epoch": 0.1460501620883808, "grad_norm": 1.5018988352712577, "learning_rate": 9.651116719110517e-05, "loss": 0.3816, "step": 1712 }, { "epoch": 0.14613547176249786, "grad_norm": 1.5178615153273747, "learning_rate": 9.650609529535008e-05, "loss": 0.4001, "step": 1713 }, { "epoch": 0.1462207814366149, "grad_norm": 1.2054417091051284, "learning_rate": 9.650101984910174e-05, "loss": 0.3503, "step": 1714 }, { "epoch": 0.14630609111073195, "grad_norm": 1.7486335324051907, "learning_rate": 9.649594085274764e-05, "loss": 0.3849, "step": 1715 }, { "epoch": 0.146391400784849, "grad_norm": 1.8563102018824313, "learning_rate": 9.649085830667555e-05, "loss": 0.4299, "step": 1716 }, { "epoch": 0.14647671045896604, "grad_norm": 1.5228097257974407, "learning_rate": 9.648577221127346e-05, "loss": 0.3526, "step": 1717 }, { "epoch": 0.1465620201330831, "grad_norm": 1.4413907843693086, "learning_rate": 9.64806825669297e-05, "loss": 0.4098, "step": 1718 }, { "epoch": 0.14664732980720013, "grad_norm": 1.3238663556163661, "learning_rate": 9.647558937403283e-05, "loss": 0.4141, "step": 1719 }, { "epoch": 0.14673263948131718, "grad_norm": 1.3132578160670438, "learning_rate": 9.647049263297168e-05, "loss": 0.416, "step": 1720 }, { "epoch": 0.14681794915543422, "grad_norm": 1.549874835959702, "learning_rate": 9.646539234413535e-05, "loss": 0.4165, "step": 1721 }, { "epoch": 0.14690325882955127, "grad_norm": 1.3768410062121659, "learning_rate": 9.646028850791325e-05, "loss": 0.3991, "step": 1722 }, { "epoch": 0.14698856850366832, "grad_norm": 1.4470703981749367, "learning_rate": 9.645518112469498e-05, "loss": 0.3556, "step": 1723 }, { "epoch": 0.14707387817778536, "grad_norm": 1.5076693926481615, "learning_rate": 9.645007019487052e-05, "loss": 0.3705, "step": 1724 }, { "epoch": 0.1471591878519024, "grad_norm": 1.508918791196701, "learning_rate": 9.644495571883003e-05, "loss": 0.3791, "step": 1725 }, { "epoch": 0.14724449752601945, "grad_norm": 1.3324806148986974, "learning_rate": 9.643983769696398e-05, "loss": 0.3285, "step": 1726 }, { "epoch": 0.1473298072001365, "grad_norm": 1.3555679504856217, "learning_rate": 9.64347161296631e-05, "loss": 0.3561, "step": 1727 }, { "epoch": 0.14741511687425354, "grad_norm": 1.6622513778756545, "learning_rate": 9.64295910173184e-05, "loss": 0.3974, "step": 1728 }, { "epoch": 0.1475004265483706, "grad_norm": 1.593313419159069, "learning_rate": 9.642446236032114e-05, "loss": 0.4114, "step": 1729 }, { "epoch": 0.14758573622248763, "grad_norm": 1.870824360418856, "learning_rate": 9.64193301590629e-05, "loss": 0.4599, "step": 1730 }, { "epoch": 0.14767104589660468, "grad_norm": 1.8072247524864626, "learning_rate": 9.641419441393546e-05, "loss": 0.385, "step": 1731 }, { "epoch": 0.14775635557072173, "grad_norm": 1.5420268213732446, "learning_rate": 9.640905512533091e-05, "loss": 0.4024, "step": 1732 }, { "epoch": 0.14784166524483877, "grad_norm": 1.6640824151078697, "learning_rate": 9.640391229364165e-05, "loss": 0.4239, "step": 1733 }, { "epoch": 0.14792697491895582, "grad_norm": 1.3745951489999428, "learning_rate": 9.639876591926026e-05, "loss": 0.3413, "step": 1734 }, { "epoch": 0.14801228459307286, "grad_norm": 1.4852889610205458, "learning_rate": 9.639361600257966e-05, "loss": 0.3701, "step": 1735 }, { "epoch": 0.1480975942671899, "grad_norm": 1.6142375042440416, "learning_rate": 9.6388462543993e-05, "loss": 0.398, "step": 1736 }, { "epoch": 0.14818290394130695, "grad_norm": 1.520588641753117, "learning_rate": 9.638330554389374e-05, "loss": 0.3728, "step": 1737 }, { "epoch": 0.148268213615424, "grad_norm": 1.3499538131159847, "learning_rate": 9.637814500267559e-05, "loss": 0.3325, "step": 1738 }, { "epoch": 0.14835352328954104, "grad_norm": 1.737518074435624, "learning_rate": 9.63729809207325e-05, "loss": 0.4795, "step": 1739 }, { "epoch": 0.1484388329636581, "grad_norm": 1.4407734295887524, "learning_rate": 9.636781329845877e-05, "loss": 0.4074, "step": 1740 }, { "epoch": 0.14852414263777514, "grad_norm": 1.6469455855876347, "learning_rate": 9.636264213624889e-05, "loss": 0.3681, "step": 1741 }, { "epoch": 0.14860945231189218, "grad_norm": 1.6471412950091706, "learning_rate": 9.635746743449763e-05, "loss": 0.3806, "step": 1742 }, { "epoch": 0.1486947619860092, "grad_norm": 1.4311581744592927, "learning_rate": 9.635228919360009e-05, "loss": 0.3198, "step": 1743 }, { "epoch": 0.14878007166012625, "grad_norm": 1.4545755017493271, "learning_rate": 9.634710741395158e-05, "loss": 0.3289, "step": 1744 }, { "epoch": 0.1488653813342433, "grad_norm": 1.5985590615385246, "learning_rate": 9.634192209594773e-05, "loss": 0.3824, "step": 1745 }, { "epoch": 0.14895069100836034, "grad_norm": 1.3794190315163781, "learning_rate": 9.633673323998436e-05, "loss": 0.3005, "step": 1746 }, { "epoch": 0.14903600068247738, "grad_norm": 1.8436476468916945, "learning_rate": 9.633154084645766e-05, "loss": 0.4571, "step": 1747 }, { "epoch": 0.14912131035659443, "grad_norm": 1.651139490095605, "learning_rate": 9.6326344915764e-05, "loss": 0.4404, "step": 1748 }, { "epoch": 0.14920662003071147, "grad_norm": 1.6451433723373943, "learning_rate": 9.632114544830011e-05, "loss": 0.3688, "step": 1749 }, { "epoch": 0.14929192970482852, "grad_norm": 1.4586588995695942, "learning_rate": 9.631594244446289e-05, "loss": 0.4106, "step": 1750 }, { "epoch": 0.14937723937894556, "grad_norm": 1.5093414776793508, "learning_rate": 9.63107359046496e-05, "loss": 0.3227, "step": 1751 }, { "epoch": 0.1494625490530626, "grad_norm": 1.5598998751664908, "learning_rate": 9.630552582925772e-05, "loss": 0.3802, "step": 1752 }, { "epoch": 0.14954785872717966, "grad_norm": 1.4887288539355181, "learning_rate": 9.630031221868501e-05, "loss": 0.3677, "step": 1753 }, { "epoch": 0.1496331684012967, "grad_norm": 1.2235965221043843, "learning_rate": 9.62950950733295e-05, "loss": 0.3674, "step": 1754 }, { "epoch": 0.14971847807541375, "grad_norm": 1.515255833921545, "learning_rate": 9.62898743935895e-05, "loss": 0.4021, "step": 1755 }, { "epoch": 0.1498037877495308, "grad_norm": 1.4305101539707445, "learning_rate": 9.628465017986356e-05, "loss": 0.3064, "step": 1756 }, { "epoch": 0.14988909742364784, "grad_norm": 1.6500262518441646, "learning_rate": 9.627942243255055e-05, "loss": 0.3701, "step": 1757 }, { "epoch": 0.14997440709776488, "grad_norm": 1.4100404728836626, "learning_rate": 9.627419115204956e-05, "loss": 0.4161, "step": 1758 }, { "epoch": 0.15005971677188193, "grad_norm": 1.4655644385995943, "learning_rate": 9.626895633875997e-05, "loss": 0.4098, "step": 1759 }, { "epoch": 0.15014502644599897, "grad_norm": 1.713293743653325, "learning_rate": 9.626371799308144e-05, "loss": 0.3595, "step": 1760 }, { "epoch": 0.15023033612011602, "grad_norm": 1.6830835830686528, "learning_rate": 9.625847611541388e-05, "loss": 0.3947, "step": 1761 }, { "epoch": 0.15031564579423307, "grad_norm": 2.0044790419722056, "learning_rate": 9.625323070615751e-05, "loss": 0.3541, "step": 1762 }, { "epoch": 0.1504009554683501, "grad_norm": 1.3750529712530464, "learning_rate": 9.624798176571274e-05, "loss": 0.3518, "step": 1763 }, { "epoch": 0.15048626514246716, "grad_norm": 1.7924677662099286, "learning_rate": 9.624272929448033e-05, "loss": 0.359, "step": 1764 }, { "epoch": 0.1505715748165842, "grad_norm": 1.2978652023390904, "learning_rate": 9.623747329286126e-05, "loss": 0.3828, "step": 1765 }, { "epoch": 0.15065688449070125, "grad_norm": 1.5247784312897292, "learning_rate": 9.623221376125683e-05, "loss": 0.3843, "step": 1766 }, { "epoch": 0.1507421941648183, "grad_norm": 1.70749558477543, "learning_rate": 9.622695070006855e-05, "loss": 0.3822, "step": 1767 }, { "epoch": 0.15082750383893534, "grad_norm": 1.198449002724245, "learning_rate": 9.622168410969824e-05, "loss": 0.3519, "step": 1768 }, { "epoch": 0.15091281351305239, "grad_norm": 1.3043516720493946, "learning_rate": 9.621641399054797e-05, "loss": 0.4024, "step": 1769 }, { "epoch": 0.15099812318716943, "grad_norm": 1.716999063748175, "learning_rate": 9.621114034302007e-05, "loss": 0.479, "step": 1770 }, { "epoch": 0.15108343286128648, "grad_norm": 1.6873057218262109, "learning_rate": 9.620586316751719e-05, "loss": 0.3614, "step": 1771 }, { "epoch": 0.15116874253540352, "grad_norm": 1.200797964065371, "learning_rate": 9.620058246444218e-05, "loss": 0.3291, "step": 1772 }, { "epoch": 0.15125405220952057, "grad_norm": 1.4484056520715738, "learning_rate": 9.619529823419821e-05, "loss": 0.3586, "step": 1773 }, { "epoch": 0.1513393618836376, "grad_norm": 1.2269084466853473, "learning_rate": 9.619001047718871e-05, "loss": 0.3339, "step": 1774 }, { "epoch": 0.15142467155775466, "grad_norm": 1.3722119241311757, "learning_rate": 9.618471919381735e-05, "loss": 0.3553, "step": 1775 }, { "epoch": 0.1515099812318717, "grad_norm": 1.6037306089708154, "learning_rate": 9.617942438448812e-05, "loss": 0.3295, "step": 1776 }, { "epoch": 0.15159529090598875, "grad_norm": 1.4003300992902172, "learning_rate": 9.617412604960523e-05, "loss": 0.3681, "step": 1777 }, { "epoch": 0.1516806005801058, "grad_norm": 1.7640789318417542, "learning_rate": 9.616882418957318e-05, "loss": 0.4051, "step": 1778 }, { "epoch": 0.15176591025422284, "grad_norm": 1.169260898652667, "learning_rate": 9.616351880479675e-05, "loss": 0.324, "step": 1779 }, { "epoch": 0.1518512199283399, "grad_norm": 1.2750751753691671, "learning_rate": 9.615820989568098e-05, "loss": 0.3355, "step": 1780 }, { "epoch": 0.15193652960245693, "grad_norm": 1.4719317447552593, "learning_rate": 9.615289746263116e-05, "loss": 0.3695, "step": 1781 }, { "epoch": 0.15202183927657395, "grad_norm": 1.4220472797361745, "learning_rate": 9.614758150605286e-05, "loss": 0.3708, "step": 1782 }, { "epoch": 0.152107148950691, "grad_norm": 1.5917697646751967, "learning_rate": 9.614226202635195e-05, "loss": 0.3793, "step": 1783 }, { "epoch": 0.15219245862480804, "grad_norm": 1.5759938268298548, "learning_rate": 9.613693902393455e-05, "loss": 0.4172, "step": 1784 }, { "epoch": 0.1522777682989251, "grad_norm": 1.5676166680828407, "learning_rate": 9.613161249920701e-05, "loss": 0.3738, "step": 1785 }, { "epoch": 0.15236307797304213, "grad_norm": 1.3826927521144667, "learning_rate": 9.6126282452576e-05, "loss": 0.3848, "step": 1786 }, { "epoch": 0.15244838764715918, "grad_norm": 1.7691077258150025, "learning_rate": 9.612094888444845e-05, "loss": 0.4031, "step": 1787 }, { "epoch": 0.15253369732127622, "grad_norm": 1.1394484735968708, "learning_rate": 9.611561179523152e-05, "loss": 0.3697, "step": 1788 }, { "epoch": 0.15261900699539327, "grad_norm": 1.4672035029536938, "learning_rate": 9.611027118533271e-05, "loss": 0.3741, "step": 1789 }, { "epoch": 0.15270431666951031, "grad_norm": 1.5877232011693645, "learning_rate": 9.610492705515972e-05, "loss": 0.3742, "step": 1790 }, { "epoch": 0.15278962634362736, "grad_norm": 1.3683755127186337, "learning_rate": 9.609957940512054e-05, "loss": 0.3988, "step": 1791 }, { "epoch": 0.1528749360177444, "grad_norm": 1.472683124651167, "learning_rate": 9.609422823562345e-05, "loss": 0.4032, "step": 1792 }, { "epoch": 0.15296024569186145, "grad_norm": 1.3209454525955682, "learning_rate": 9.608887354707699e-05, "loss": 0.3413, "step": 1793 }, { "epoch": 0.1530455553659785, "grad_norm": 1.792818549353607, "learning_rate": 9.608351533988992e-05, "loss": 0.3989, "step": 1794 }, { "epoch": 0.15313086504009554, "grad_norm": 1.6625799461015265, "learning_rate": 9.607815361447136e-05, "loss": 0.3779, "step": 1795 }, { "epoch": 0.1532161747142126, "grad_norm": 1.7639910133548111, "learning_rate": 9.607278837123064e-05, "loss": 0.4184, "step": 1796 }, { "epoch": 0.15330148438832963, "grad_norm": 1.6711128137472173, "learning_rate": 9.606741961057736e-05, "loss": 0.3744, "step": 1797 }, { "epoch": 0.15338679406244668, "grad_norm": 1.5126975978688757, "learning_rate": 9.606204733292139e-05, "loss": 0.3529, "step": 1798 }, { "epoch": 0.15347210373656373, "grad_norm": 1.7457346024132616, "learning_rate": 9.605667153867286e-05, "loss": 0.3636, "step": 1799 }, { "epoch": 0.15355741341068077, "grad_norm": 1.2638887811784152, "learning_rate": 9.605129222824223e-05, "loss": 0.3727, "step": 1800 }, { "epoch": 0.15364272308479782, "grad_norm": 1.4243685477739585, "learning_rate": 9.604590940204013e-05, "loss": 0.3613, "step": 1801 }, { "epoch": 0.15372803275891486, "grad_norm": 1.3146758436407582, "learning_rate": 9.604052306047755e-05, "loss": 0.3022, "step": 1802 }, { "epoch": 0.1538133424330319, "grad_norm": 1.4733512715647232, "learning_rate": 9.603513320396569e-05, "loss": 0.3432, "step": 1803 }, { "epoch": 0.15389865210714895, "grad_norm": 1.7525916663560606, "learning_rate": 9.602973983291604e-05, "loss": 0.341, "step": 1804 }, { "epoch": 0.153983961781266, "grad_norm": 1.5942813511123175, "learning_rate": 9.602434294774037e-05, "loss": 0.3396, "step": 1805 }, { "epoch": 0.15406927145538304, "grad_norm": 1.7481253664945806, "learning_rate": 9.601894254885067e-05, "loss": 0.3472, "step": 1806 }, { "epoch": 0.1541545811295001, "grad_norm": 1.5993125928991831, "learning_rate": 9.601353863665925e-05, "loss": 0.417, "step": 1807 }, { "epoch": 0.15423989080361714, "grad_norm": 1.4397825280853813, "learning_rate": 9.600813121157868e-05, "loss": 0.3458, "step": 1808 }, { "epoch": 0.15432520047773418, "grad_norm": 1.3972929207746092, "learning_rate": 9.600272027402178e-05, "loss": 0.4074, "step": 1809 }, { "epoch": 0.15441051015185123, "grad_norm": 1.2668024872859318, "learning_rate": 9.599730582440163e-05, "loss": 0.3325, "step": 1810 }, { "epoch": 0.15449581982596827, "grad_norm": 1.6039052808101828, "learning_rate": 9.599188786313162e-05, "loss": 0.3699, "step": 1811 }, { "epoch": 0.15458112950008532, "grad_norm": 1.5346076120856722, "learning_rate": 9.598646639062538e-05, "loss": 0.3955, "step": 1812 }, { "epoch": 0.15466643917420236, "grad_norm": 1.6340080885109143, "learning_rate": 9.59810414072968e-05, "loss": 0.4158, "step": 1813 }, { "epoch": 0.1547517488483194, "grad_norm": 1.5695878059979012, "learning_rate": 9.597561291356004e-05, "loss": 0.3773, "step": 1814 }, { "epoch": 0.15483705852243645, "grad_norm": 1.466002552717206, "learning_rate": 9.597018090982956e-05, "loss": 0.3832, "step": 1815 }, { "epoch": 0.1549223681965535, "grad_norm": 1.254091433341745, "learning_rate": 9.596474539652005e-05, "loss": 0.359, "step": 1816 }, { "epoch": 0.15500767787067055, "grad_norm": 1.4213354742941797, "learning_rate": 9.595930637404649e-05, "loss": 0.3661, "step": 1817 }, { "epoch": 0.1550929875447876, "grad_norm": 1.330105819077737, "learning_rate": 9.59538638428241e-05, "loss": 0.3946, "step": 1818 }, { "epoch": 0.15517829721890464, "grad_norm": 1.4036141169638505, "learning_rate": 9.594841780326842e-05, "loss": 0.3244, "step": 1819 }, { "epoch": 0.15526360689302166, "grad_norm": 1.303698776131246, "learning_rate": 9.59429682557952e-05, "loss": 0.3521, "step": 1820 }, { "epoch": 0.1553489165671387, "grad_norm": 1.4410373453440648, "learning_rate": 9.59375152008205e-05, "loss": 0.3277, "step": 1821 }, { "epoch": 0.15543422624125575, "grad_norm": 1.3475386858537444, "learning_rate": 9.593205863876062e-05, "loss": 0.4049, "step": 1822 }, { "epoch": 0.1555195359153728, "grad_norm": 1.4593369799609488, "learning_rate": 9.592659857003214e-05, "loss": 0.3505, "step": 1823 }, { "epoch": 0.15560484558948984, "grad_norm": 1.5656568299368185, "learning_rate": 9.592113499505193e-05, "loss": 0.3679, "step": 1824 }, { "epoch": 0.15569015526360688, "grad_norm": 1.679516592867654, "learning_rate": 9.591566791423708e-05, "loss": 0.3855, "step": 1825 }, { "epoch": 0.15577546493772393, "grad_norm": 1.6455988737090415, "learning_rate": 9.591019732800499e-05, "loss": 0.3675, "step": 1826 }, { "epoch": 0.15586077461184097, "grad_norm": 1.4259535593921322, "learning_rate": 9.590472323677328e-05, "loss": 0.3635, "step": 1827 }, { "epoch": 0.15594608428595802, "grad_norm": 1.5206448512226005, "learning_rate": 9.589924564095991e-05, "loss": 0.3685, "step": 1828 }, { "epoch": 0.15603139396007507, "grad_norm": 1.6547382581092218, "learning_rate": 9.589376454098304e-05, "loss": 0.3603, "step": 1829 }, { "epoch": 0.1561167036341921, "grad_norm": 1.4780365383138192, "learning_rate": 9.58882799372611e-05, "loss": 0.3628, "step": 1830 }, { "epoch": 0.15620201330830916, "grad_norm": 1.5656247746206642, "learning_rate": 9.588279183021288e-05, "loss": 0.4144, "step": 1831 }, { "epoch": 0.1562873229824262, "grad_norm": 1.895526302194404, "learning_rate": 9.58773002202573e-05, "loss": 0.4114, "step": 1832 }, { "epoch": 0.15637263265654325, "grad_norm": 1.5216362893260724, "learning_rate": 9.587180510781363e-05, "loss": 0.3848, "step": 1833 }, { "epoch": 0.1564579423306603, "grad_norm": 1.275557549434821, "learning_rate": 9.586630649330142e-05, "loss": 0.3665, "step": 1834 }, { "epoch": 0.15654325200477734, "grad_norm": 1.657339637329028, "learning_rate": 9.586080437714044e-05, "loss": 0.435, "step": 1835 }, { "epoch": 0.15662856167889438, "grad_norm": 1.7698956718517342, "learning_rate": 9.585529875975074e-05, "loss": 0.4315, "step": 1836 }, { "epoch": 0.15671387135301143, "grad_norm": 1.5718661541244323, "learning_rate": 9.584978964155266e-05, "loss": 0.3674, "step": 1837 }, { "epoch": 0.15679918102712848, "grad_norm": 1.488605774268017, "learning_rate": 9.58442770229668e-05, "loss": 0.3896, "step": 1838 }, { "epoch": 0.15688449070124552, "grad_norm": 1.6015441893484523, "learning_rate": 9.583876090441398e-05, "loss": 0.4582, "step": 1839 }, { "epoch": 0.15696980037536257, "grad_norm": 1.414752433606239, "learning_rate": 9.583324128631537e-05, "loss": 0.419, "step": 1840 }, { "epoch": 0.1570551100494796, "grad_norm": 1.34897873714535, "learning_rate": 9.582771816909234e-05, "loss": 0.3896, "step": 1841 }, { "epoch": 0.15714041972359666, "grad_norm": 1.3296950540653014, "learning_rate": 9.582219155316656e-05, "loss": 0.4392, "step": 1842 }, { "epoch": 0.1572257293977137, "grad_norm": 1.1926690085423441, "learning_rate": 9.581666143895994e-05, "loss": 0.3672, "step": 1843 }, { "epoch": 0.15731103907183075, "grad_norm": 1.295380524067418, "learning_rate": 9.58111278268947e-05, "loss": 0.3516, "step": 1844 }, { "epoch": 0.1573963487459478, "grad_norm": 1.4439316210103457, "learning_rate": 9.580559071739329e-05, "loss": 0.3323, "step": 1845 }, { "epoch": 0.15748165842006484, "grad_norm": 1.4219762745097242, "learning_rate": 9.580005011087844e-05, "loss": 0.3759, "step": 1846 }, { "epoch": 0.1575669680941819, "grad_norm": 1.614302415918115, "learning_rate": 9.579450600777314e-05, "loss": 0.4064, "step": 1847 }, { "epoch": 0.15765227776829893, "grad_norm": 1.7286542416611643, "learning_rate": 9.578895840850066e-05, "loss": 0.4124, "step": 1848 }, { "epoch": 0.15773758744241598, "grad_norm": 1.4105163283567599, "learning_rate": 9.578340731348454e-05, "loss": 0.3605, "step": 1849 }, { "epoch": 0.15782289711653302, "grad_norm": 1.4377166750605601, "learning_rate": 9.577785272314854e-05, "loss": 0.4112, "step": 1850 }, { "epoch": 0.15790820679065007, "grad_norm": 1.5296013968591018, "learning_rate": 9.577229463791677e-05, "loss": 0.4608, "step": 1851 }, { "epoch": 0.15799351646476711, "grad_norm": 1.6306203335760683, "learning_rate": 9.576673305821353e-05, "loss": 0.4, "step": 1852 }, { "epoch": 0.15807882613888416, "grad_norm": 1.5714337345756821, "learning_rate": 9.576116798446342e-05, "loss": 0.3796, "step": 1853 }, { "epoch": 0.1581641358130012, "grad_norm": 1.4518948341883604, "learning_rate": 9.575559941709131e-05, "loss": 0.4089, "step": 1854 }, { "epoch": 0.15824944548711825, "grad_norm": 1.3384995460067737, "learning_rate": 9.575002735652234e-05, "loss": 0.3748, "step": 1855 }, { "epoch": 0.1583347551612353, "grad_norm": 1.2148542495715386, "learning_rate": 9.57444518031819e-05, "loss": 0.3273, "step": 1856 }, { "epoch": 0.15842006483535234, "grad_norm": 1.7143298098024615, "learning_rate": 9.573887275749564e-05, "loss": 0.4324, "step": 1857 }, { "epoch": 0.15850537450946936, "grad_norm": 1.276832408222686, "learning_rate": 9.573329021988949e-05, "loss": 0.3495, "step": 1858 }, { "epoch": 0.1585906841835864, "grad_norm": 1.3173591040421309, "learning_rate": 9.572770419078966e-05, "loss": 0.3829, "step": 1859 }, { "epoch": 0.15867599385770345, "grad_norm": 1.4148952497928433, "learning_rate": 9.572211467062264e-05, "loss": 0.3513, "step": 1860 }, { "epoch": 0.1587613035318205, "grad_norm": 1.5818611712927055, "learning_rate": 9.57165216598151e-05, "loss": 0.3626, "step": 1861 }, { "epoch": 0.15884661320593754, "grad_norm": 1.4571290993241361, "learning_rate": 9.57109251587941e-05, "loss": 0.3605, "step": 1862 }, { "epoch": 0.1589319228800546, "grad_norm": 1.5544672671037723, "learning_rate": 9.570532516798685e-05, "loss": 0.3663, "step": 1863 }, { "epoch": 0.15901723255417163, "grad_norm": 1.5049698672652363, "learning_rate": 9.56997216878209e-05, "loss": 0.3807, "step": 1864 }, { "epoch": 0.15910254222828868, "grad_norm": 1.41633069972578, "learning_rate": 9.569411471872404e-05, "loss": 0.3576, "step": 1865 }, { "epoch": 0.15918785190240572, "grad_norm": 1.587760065958907, "learning_rate": 9.568850426112436e-05, "loss": 0.3111, "step": 1866 }, { "epoch": 0.15927316157652277, "grad_norm": 1.3097719269147807, "learning_rate": 9.568289031545017e-05, "loss": 0.3019, "step": 1867 }, { "epoch": 0.15935847125063982, "grad_norm": 1.6203138566632234, "learning_rate": 9.567727288213005e-05, "loss": 0.3679, "step": 1868 }, { "epoch": 0.15944378092475686, "grad_norm": 1.2860025326865545, "learning_rate": 9.567165196159288e-05, "loss": 0.353, "step": 1869 }, { "epoch": 0.1595290905988739, "grad_norm": 1.5200721542900675, "learning_rate": 9.566602755426776e-05, "loss": 0.4107, "step": 1870 }, { "epoch": 0.15961440027299095, "grad_norm": 1.476225153417582, "learning_rate": 9.566039966058414e-05, "loss": 0.3631, "step": 1871 }, { "epoch": 0.159699709947108, "grad_norm": 1.607185652329586, "learning_rate": 9.565476828097163e-05, "loss": 0.4038, "step": 1872 }, { "epoch": 0.15978501962122504, "grad_norm": 1.5163932504936912, "learning_rate": 9.564913341586017e-05, "loss": 0.385, "step": 1873 }, { "epoch": 0.1598703292953421, "grad_norm": 1.4986182842646667, "learning_rate": 9.564349506567996e-05, "loss": 0.3332, "step": 1874 }, { "epoch": 0.15995563896945914, "grad_norm": 2.3769041760058527, "learning_rate": 9.563785323086143e-05, "loss": 0.312, "step": 1875 }, { "epoch": 0.16004094864357618, "grad_norm": 1.6666128467771033, "learning_rate": 9.563220791183535e-05, "loss": 0.3998, "step": 1876 }, { "epoch": 0.16012625831769323, "grad_norm": 1.742512899211497, "learning_rate": 9.562655910903267e-05, "loss": 0.4209, "step": 1877 }, { "epoch": 0.16021156799181027, "grad_norm": 1.3649889462061866, "learning_rate": 9.562090682288467e-05, "loss": 0.3736, "step": 1878 }, { "epoch": 0.16029687766592732, "grad_norm": 1.6153965122532383, "learning_rate": 9.561525105382286e-05, "loss": 0.3388, "step": 1879 }, { "epoch": 0.16038218734004436, "grad_norm": 1.4602591015558704, "learning_rate": 9.560959180227902e-05, "loss": 0.3718, "step": 1880 }, { "epoch": 0.1604674970141614, "grad_norm": 1.3068630331431983, "learning_rate": 9.560392906868522e-05, "loss": 0.3473, "step": 1881 }, { "epoch": 0.16055280668827845, "grad_norm": 1.4672763005400398, "learning_rate": 9.55982628534738e-05, "loss": 0.3722, "step": 1882 }, { "epoch": 0.1606381163623955, "grad_norm": 1.3205055682531819, "learning_rate": 9.559259315707729e-05, "loss": 0.3526, "step": 1883 }, { "epoch": 0.16072342603651255, "grad_norm": 1.5156657419922688, "learning_rate": 9.558691997992858e-05, "loss": 0.4008, "step": 1884 }, { "epoch": 0.1608087357106296, "grad_norm": 1.5963796724539772, "learning_rate": 9.558124332246078e-05, "loss": 0.3475, "step": 1885 }, { "epoch": 0.16089404538474664, "grad_norm": 1.3398050463900162, "learning_rate": 9.557556318510728e-05, "loss": 0.386, "step": 1886 }, { "epoch": 0.16097935505886368, "grad_norm": 1.4352413345391606, "learning_rate": 9.55698795683017e-05, "loss": 0.3612, "step": 1887 }, { "epoch": 0.16106466473298073, "grad_norm": 1.6608449427455243, "learning_rate": 9.556419247247799e-05, "loss": 0.3772, "step": 1888 }, { "epoch": 0.16114997440709777, "grad_norm": 1.3761962541970234, "learning_rate": 9.55585018980703e-05, "loss": 0.3545, "step": 1889 }, { "epoch": 0.16123528408121482, "grad_norm": 1.3104686457883066, "learning_rate": 9.555280784551308e-05, "loss": 0.3422, "step": 1890 }, { "epoch": 0.16132059375533186, "grad_norm": 1.438931871547079, "learning_rate": 9.554711031524107e-05, "loss": 0.368, "step": 1891 }, { "epoch": 0.1614059034294489, "grad_norm": 1.830286948423147, "learning_rate": 9.554140930768922e-05, "loss": 0.3997, "step": 1892 }, { "epoch": 0.16149121310356596, "grad_norm": 1.470565485497824, "learning_rate": 9.553570482329277e-05, "loss": 0.3638, "step": 1893 }, { "epoch": 0.161576522777683, "grad_norm": 1.4154990470718631, "learning_rate": 9.552999686248722e-05, "loss": 0.3842, "step": 1894 }, { "epoch": 0.16166183245180005, "grad_norm": 1.922638098093566, "learning_rate": 9.552428542570838e-05, "loss": 0.4605, "step": 1895 }, { "epoch": 0.1617471421259171, "grad_norm": 1.6114139418435518, "learning_rate": 9.551857051339225e-05, "loss": 0.3604, "step": 1896 }, { "epoch": 0.1618324518000341, "grad_norm": 1.6308829117407115, "learning_rate": 9.551285212597516e-05, "loss": 0.4207, "step": 1897 }, { "epoch": 0.16191776147415116, "grad_norm": 1.6920064869839742, "learning_rate": 9.550713026389366e-05, "loss": 0.4286, "step": 1898 }, { "epoch": 0.1620030711482682, "grad_norm": 1.595518738868818, "learning_rate": 9.550140492758457e-05, "loss": 0.3979, "step": 1899 }, { "epoch": 0.16208838082238525, "grad_norm": 1.3995563996382685, "learning_rate": 9.549567611748503e-05, "loss": 0.3363, "step": 1900 }, { "epoch": 0.1621736904965023, "grad_norm": 1.8677251193442368, "learning_rate": 9.548994383403238e-05, "loss": 0.3814, "step": 1901 }, { "epoch": 0.16225900017061934, "grad_norm": 1.2499863623828815, "learning_rate": 9.548420807766425e-05, "loss": 0.3538, "step": 1902 }, { "epoch": 0.16234430984473638, "grad_norm": 1.5057369830591596, "learning_rate": 9.547846884881853e-05, "loss": 0.3974, "step": 1903 }, { "epoch": 0.16242961951885343, "grad_norm": 1.4486077761019904, "learning_rate": 9.547272614793339e-05, "loss": 0.3904, "step": 1904 }, { "epoch": 0.16251492919297048, "grad_norm": 1.2552633102006092, "learning_rate": 9.546697997544725e-05, "loss": 0.354, "step": 1905 }, { "epoch": 0.16260023886708752, "grad_norm": 1.3700055534361615, "learning_rate": 9.546123033179879e-05, "loss": 0.3211, "step": 1906 }, { "epoch": 0.16268554854120457, "grad_norm": 1.178256347781605, "learning_rate": 9.545547721742698e-05, "loss": 0.3716, "step": 1907 }, { "epoch": 0.1627708582153216, "grad_norm": 1.6359036767299517, "learning_rate": 9.544972063277104e-05, "loss": 0.4403, "step": 1908 }, { "epoch": 0.16285616788943866, "grad_norm": 1.671079927862571, "learning_rate": 9.544396057827045e-05, "loss": 0.4025, "step": 1909 }, { "epoch": 0.1629414775635557, "grad_norm": 1.7543583820291182, "learning_rate": 9.543819705436496e-05, "loss": 0.4178, "step": 1910 }, { "epoch": 0.16302678723767275, "grad_norm": 1.6320234518563321, "learning_rate": 9.543243006149459e-05, "loss": 0.3869, "step": 1911 }, { "epoch": 0.1631120969117898, "grad_norm": 1.3509923801927257, "learning_rate": 9.542665960009959e-05, "loss": 0.3269, "step": 1912 }, { "epoch": 0.16319740658590684, "grad_norm": 1.448202512047144, "learning_rate": 9.542088567062055e-05, "loss": 0.4043, "step": 1913 }, { "epoch": 0.16328271626002389, "grad_norm": 1.6246689679350357, "learning_rate": 9.541510827349823e-05, "loss": 0.3736, "step": 1914 }, { "epoch": 0.16336802593414093, "grad_norm": 1.6026162007495455, "learning_rate": 9.540932740917374e-05, "loss": 0.355, "step": 1915 }, { "epoch": 0.16345333560825798, "grad_norm": 1.5214137794266787, "learning_rate": 9.540354307808841e-05, "loss": 0.3675, "step": 1916 }, { "epoch": 0.16353864528237502, "grad_norm": 1.527359047737488, "learning_rate": 9.539775528068384e-05, "loss": 0.3929, "step": 1917 }, { "epoch": 0.16362395495649207, "grad_norm": 1.3638203164494591, "learning_rate": 9.53919640174019e-05, "loss": 0.3719, "step": 1918 }, { "epoch": 0.1637092646306091, "grad_norm": 1.7213555440353754, "learning_rate": 9.538616928868473e-05, "loss": 0.3115, "step": 1919 }, { "epoch": 0.16379457430472616, "grad_norm": 1.490046058045333, "learning_rate": 9.53803710949747e-05, "loss": 0.3493, "step": 1920 }, { "epoch": 0.1638798839788432, "grad_norm": 1.5242243373187732, "learning_rate": 9.53745694367145e-05, "loss": 0.3396, "step": 1921 }, { "epoch": 0.16396519365296025, "grad_norm": 1.6386968999504534, "learning_rate": 9.536876431434703e-05, "loss": 0.3548, "step": 1922 }, { "epoch": 0.1640505033270773, "grad_norm": 1.3515409787620016, "learning_rate": 9.53629557283155e-05, "loss": 0.3635, "step": 1923 }, { "epoch": 0.16413581300119434, "grad_norm": 1.269756966112304, "learning_rate": 9.535714367906336e-05, "loss": 0.3563, "step": 1924 }, { "epoch": 0.1642211226753114, "grad_norm": 1.8087833049794988, "learning_rate": 9.535132816703432e-05, "loss": 0.3984, "step": 1925 }, { "epoch": 0.16430643234942843, "grad_norm": 1.4961853796087667, "learning_rate": 9.534550919267238e-05, "loss": 0.4283, "step": 1926 }, { "epoch": 0.16439174202354548, "grad_norm": 1.3417772405484392, "learning_rate": 9.533968675642178e-05, "loss": 0.3747, "step": 1927 }, { "epoch": 0.16447705169766252, "grad_norm": 1.4152247252465662, "learning_rate": 9.533386085872703e-05, "loss": 0.3624, "step": 1928 }, { "epoch": 0.16456236137177957, "grad_norm": 1.56261695424111, "learning_rate": 9.53280315000329e-05, "loss": 0.3667, "step": 1929 }, { "epoch": 0.16464767104589662, "grad_norm": 1.4431763405173679, "learning_rate": 9.532219868078445e-05, "loss": 0.3983, "step": 1930 }, { "epoch": 0.16473298072001366, "grad_norm": 1.6622442063041163, "learning_rate": 9.531636240142696e-05, "loss": 0.3548, "step": 1931 }, { "epoch": 0.1648182903941307, "grad_norm": 1.3615859894371232, "learning_rate": 9.531052266240601e-05, "loss": 0.3663, "step": 1932 }, { "epoch": 0.16490360006824775, "grad_norm": 1.3166543517361726, "learning_rate": 9.530467946416745e-05, "loss": 0.3512, "step": 1933 }, { "epoch": 0.1649889097423648, "grad_norm": 1.239041453082941, "learning_rate": 9.529883280715735e-05, "loss": 0.3846, "step": 1934 }, { "epoch": 0.16507421941648182, "grad_norm": 1.1925324167288014, "learning_rate": 9.529298269182209e-05, "loss": 0.3299, "step": 1935 }, { "epoch": 0.16515952909059886, "grad_norm": 1.362618446451218, "learning_rate": 9.528712911860829e-05, "loss": 0.3666, "step": 1936 }, { "epoch": 0.1652448387647159, "grad_norm": 1.384878436444713, "learning_rate": 9.528127208796282e-05, "loss": 0.3809, "step": 1937 }, { "epoch": 0.16533014843883295, "grad_norm": 1.348572880920801, "learning_rate": 9.527541160033286e-05, "loss": 0.4209, "step": 1938 }, { "epoch": 0.16541545811295, "grad_norm": 1.5952522826902542, "learning_rate": 9.526954765616583e-05, "loss": 0.3868, "step": 1939 }, { "epoch": 0.16550076778706704, "grad_norm": 1.8113470685707431, "learning_rate": 9.526368025590938e-05, "loss": 0.3842, "step": 1940 }, { "epoch": 0.1655860774611841, "grad_norm": 1.5844576842272216, "learning_rate": 9.525780940001148e-05, "loss": 0.383, "step": 1941 }, { "epoch": 0.16567138713530113, "grad_norm": 1.3340186453978917, "learning_rate": 9.525193508892034e-05, "loss": 0.3531, "step": 1942 }, { "epoch": 0.16575669680941818, "grad_norm": 1.6727453087835131, "learning_rate": 9.524605732308442e-05, "loss": 0.4205, "step": 1943 }, { "epoch": 0.16584200648353523, "grad_norm": 1.4807083620409418, "learning_rate": 9.524017610295245e-05, "loss": 0.3581, "step": 1944 }, { "epoch": 0.16592731615765227, "grad_norm": 1.508919976243008, "learning_rate": 9.523429142897346e-05, "loss": 0.3935, "step": 1945 }, { "epoch": 0.16601262583176932, "grad_norm": 1.980347518671471, "learning_rate": 9.522840330159669e-05, "loss": 0.5012, "step": 1946 }, { "epoch": 0.16609793550588636, "grad_norm": 1.326389794851441, "learning_rate": 9.522251172127166e-05, "loss": 0.3495, "step": 1947 }, { "epoch": 0.1661832451800034, "grad_norm": 2.0305766236615024, "learning_rate": 9.521661668844817e-05, "loss": 0.3906, "step": 1948 }, { "epoch": 0.16626855485412045, "grad_norm": 1.4005438003936128, "learning_rate": 9.521071820357627e-05, "loss": 0.4327, "step": 1949 }, { "epoch": 0.1663538645282375, "grad_norm": 1.2269886029933654, "learning_rate": 9.520481626710631e-05, "loss": 0.3716, "step": 1950 }, { "epoch": 0.16643917420235455, "grad_norm": 1.2090205616997054, "learning_rate": 9.519891087948884e-05, "loss": 0.3712, "step": 1951 }, { "epoch": 0.1665244838764716, "grad_norm": 1.5279518731462065, "learning_rate": 9.519300204117469e-05, "loss": 0.3206, "step": 1952 }, { "epoch": 0.16660979355058864, "grad_norm": 1.4493951818616395, "learning_rate": 9.518708975261502e-05, "loss": 0.3715, "step": 1953 }, { "epoch": 0.16669510322470568, "grad_norm": 1.867100988961649, "learning_rate": 9.518117401426115e-05, "loss": 0.4197, "step": 1954 }, { "epoch": 0.16678041289882273, "grad_norm": 1.5852036305016588, "learning_rate": 9.517525482656475e-05, "loss": 0.3669, "step": 1955 }, { "epoch": 0.16686572257293977, "grad_norm": 1.3905643921406787, "learning_rate": 9.51693321899777e-05, "loss": 0.365, "step": 1956 }, { "epoch": 0.16695103224705682, "grad_norm": 1.4459994771225697, "learning_rate": 9.516340610495215e-05, "loss": 0.393, "step": 1957 }, { "epoch": 0.16703634192117386, "grad_norm": 1.5083783921032674, "learning_rate": 9.515747657194056e-05, "loss": 0.4657, "step": 1958 }, { "epoch": 0.1671216515952909, "grad_norm": 1.514685742861431, "learning_rate": 9.51515435913956e-05, "loss": 0.3848, "step": 1959 }, { "epoch": 0.16720696126940796, "grad_norm": 1.4146436477110251, "learning_rate": 9.514560716377023e-05, "loss": 0.362, "step": 1960 }, { "epoch": 0.167292270943525, "grad_norm": 1.3574481330362744, "learning_rate": 9.513966728951764e-05, "loss": 0.3546, "step": 1961 }, { "epoch": 0.16737758061764205, "grad_norm": 1.4527296123134235, "learning_rate": 9.513372396909133e-05, "loss": 0.3305, "step": 1962 }, { "epoch": 0.1674628902917591, "grad_norm": 1.5155710427029596, "learning_rate": 9.512777720294504e-05, "loss": 0.3812, "step": 1963 }, { "epoch": 0.16754819996587614, "grad_norm": 1.6072407616963769, "learning_rate": 9.512182699153276e-05, "loss": 0.4183, "step": 1964 }, { "epoch": 0.16763350963999318, "grad_norm": 1.4193646414549708, "learning_rate": 9.511587333530877e-05, "loss": 0.4076, "step": 1965 }, { "epoch": 0.16771881931411023, "grad_norm": 1.7561358105879774, "learning_rate": 9.51099162347276e-05, "loss": 0.3919, "step": 1966 }, { "epoch": 0.16780412898822727, "grad_norm": 1.5484298444110227, "learning_rate": 9.510395569024404e-05, "loss": 0.3444, "step": 1967 }, { "epoch": 0.16788943866234432, "grad_norm": 1.5954295265846423, "learning_rate": 9.509799170231314e-05, "loss": 0.4207, "step": 1968 }, { "epoch": 0.16797474833646137, "grad_norm": 1.416650585008485, "learning_rate": 9.509202427139023e-05, "loss": 0.349, "step": 1969 }, { "epoch": 0.1680600580105784, "grad_norm": 1.5646460482053284, "learning_rate": 9.508605339793087e-05, "loss": 0.3922, "step": 1970 }, { "epoch": 0.16814536768469546, "grad_norm": 1.56693842877801, "learning_rate": 9.508007908239094e-05, "loss": 0.4232, "step": 1971 }, { "epoch": 0.1682306773588125, "grad_norm": 1.4906920284019969, "learning_rate": 9.507410132522652e-05, "loss": 0.3683, "step": 1972 }, { "epoch": 0.16831598703292952, "grad_norm": 1.1634214543484371, "learning_rate": 9.506812012689399e-05, "loss": 0.3437, "step": 1973 }, { "epoch": 0.16840129670704657, "grad_norm": 1.416588566067115, "learning_rate": 9.506213548784996e-05, "loss": 0.4123, "step": 1974 }, { "epoch": 0.1684866063811636, "grad_norm": 1.4456044495041753, "learning_rate": 9.505614740855138e-05, "loss": 0.3667, "step": 1975 }, { "epoch": 0.16857191605528066, "grad_norm": 1.3769619988771018, "learning_rate": 9.505015588945534e-05, "loss": 0.3469, "step": 1976 }, { "epoch": 0.1686572257293977, "grad_norm": 1.610110438924628, "learning_rate": 9.50441609310193e-05, "loss": 0.4288, "step": 1977 }, { "epoch": 0.16874253540351475, "grad_norm": 1.4264110271680157, "learning_rate": 9.503816253370097e-05, "loss": 0.3654, "step": 1978 }, { "epoch": 0.1688278450776318, "grad_norm": 1.3930720968628294, "learning_rate": 9.503216069795824e-05, "loss": 0.3428, "step": 1979 }, { "epoch": 0.16891315475174884, "grad_norm": 1.6184226216841973, "learning_rate": 9.502615542424933e-05, "loss": 0.3701, "step": 1980 }, { "epoch": 0.16899846442586589, "grad_norm": 1.3913098481253559, "learning_rate": 9.502014671303275e-05, "loss": 0.3523, "step": 1981 }, { "epoch": 0.16908377409998293, "grad_norm": 1.396789602788483, "learning_rate": 9.501413456476717e-05, "loss": 0.3763, "step": 1982 }, { "epoch": 0.16916908377409998, "grad_norm": 1.6872634721931155, "learning_rate": 9.500811897991164e-05, "loss": 0.4412, "step": 1983 }, { "epoch": 0.16925439344821702, "grad_norm": 1.663980687955517, "learning_rate": 9.500209995892541e-05, "loss": 0.4963, "step": 1984 }, { "epoch": 0.16933970312233407, "grad_norm": 1.3861634768012483, "learning_rate": 9.499607750226797e-05, "loss": 0.3429, "step": 1985 }, { "epoch": 0.1694250127964511, "grad_norm": 1.5270449450960053, "learning_rate": 9.499005161039914e-05, "loss": 0.3507, "step": 1986 }, { "epoch": 0.16951032247056816, "grad_norm": 1.6017181320866514, "learning_rate": 9.498402228377892e-05, "loss": 0.3392, "step": 1987 }, { "epoch": 0.1695956321446852, "grad_norm": 1.4465822158429134, "learning_rate": 9.497798952286767e-05, "loss": 0.3808, "step": 1988 }, { "epoch": 0.16968094181880225, "grad_norm": 1.5496939203091256, "learning_rate": 9.497195332812592e-05, "loss": 0.3526, "step": 1989 }, { "epoch": 0.1697662514929193, "grad_norm": 1.4224963664459322, "learning_rate": 9.49659137000145e-05, "loss": 0.4149, "step": 1990 }, { "epoch": 0.16985156116703634, "grad_norm": 1.7796148189878567, "learning_rate": 9.495987063899454e-05, "loss": 0.3964, "step": 1991 }, { "epoch": 0.1699368708411534, "grad_norm": 1.436033911057561, "learning_rate": 9.495382414552737e-05, "loss": 0.3897, "step": 1992 }, { "epoch": 0.17002218051527043, "grad_norm": 1.6045434517544594, "learning_rate": 9.494777422007462e-05, "loss": 0.3629, "step": 1993 }, { "epoch": 0.17010749018938748, "grad_norm": 1.8689485170183626, "learning_rate": 9.494172086309813e-05, "loss": 0.3982, "step": 1994 }, { "epoch": 0.17019279986350452, "grad_norm": 1.6164578620853254, "learning_rate": 9.493566407506009e-05, "loss": 0.3853, "step": 1995 }, { "epoch": 0.17027810953762157, "grad_norm": 1.4874631380274121, "learning_rate": 9.492960385642288e-05, "loss": 0.332, "step": 1996 }, { "epoch": 0.17036341921173861, "grad_norm": 1.2198479426319129, "learning_rate": 9.492354020764919e-05, "loss": 0.33, "step": 1997 }, { "epoch": 0.17044872888585566, "grad_norm": 1.768381313544331, "learning_rate": 9.491747312920191e-05, "loss": 0.3724, "step": 1998 }, { "epoch": 0.1705340385599727, "grad_norm": 1.8577899748621323, "learning_rate": 9.491140262154426e-05, "loss": 0.3783, "step": 1999 }, { "epoch": 0.17061934823408975, "grad_norm": 1.3978784818403818, "learning_rate": 9.490532868513967e-05, "loss": 0.3611, "step": 2000 }, { "epoch": 0.1707046579082068, "grad_norm": 1.4197414446218812, "learning_rate": 9.489925132045185e-05, "loss": 0.3273, "step": 2001 }, { "epoch": 0.17078996758232384, "grad_norm": 1.3955491569059186, "learning_rate": 9.489317052794481e-05, "loss": 0.3433, "step": 2002 }, { "epoch": 0.1708752772564409, "grad_norm": 1.4724980681686812, "learning_rate": 9.488708630808275e-05, "loss": 0.3604, "step": 2003 }, { "epoch": 0.17096058693055793, "grad_norm": 1.6198405005973546, "learning_rate": 9.488099866133017e-05, "loss": 0.3746, "step": 2004 }, { "epoch": 0.17104589660467498, "grad_norm": 1.5827595859129449, "learning_rate": 9.487490758815186e-05, "loss": 0.3444, "step": 2005 }, { "epoch": 0.17113120627879203, "grad_norm": 1.623438011073894, "learning_rate": 9.486881308901281e-05, "loss": 0.3362, "step": 2006 }, { "epoch": 0.17121651595290907, "grad_norm": 1.186474407423192, "learning_rate": 9.486271516437832e-05, "loss": 0.3414, "step": 2007 }, { "epoch": 0.17130182562702612, "grad_norm": 1.5865338336548076, "learning_rate": 9.485661381471393e-05, "loss": 0.3705, "step": 2008 }, { "epoch": 0.17138713530114316, "grad_norm": 1.553833309895497, "learning_rate": 9.485050904048542e-05, "loss": 0.3343, "step": 2009 }, { "epoch": 0.1714724449752602, "grad_norm": 1.3298780652131086, "learning_rate": 9.48444008421589e-05, "loss": 0.2949, "step": 2010 }, { "epoch": 0.17155775464937725, "grad_norm": 1.5805011649774627, "learning_rate": 9.483828922020069e-05, "loss": 0.3688, "step": 2011 }, { "epoch": 0.17164306432349427, "grad_norm": 1.3255981192950144, "learning_rate": 9.483217417507734e-05, "loss": 0.3411, "step": 2012 }, { "epoch": 0.17172837399761132, "grad_norm": 1.3468772958970472, "learning_rate": 9.482605570725575e-05, "loss": 0.3769, "step": 2013 }, { "epoch": 0.17181368367172836, "grad_norm": 1.6095659457511904, "learning_rate": 9.4819933817203e-05, "loss": 0.3522, "step": 2014 }, { "epoch": 0.1718989933458454, "grad_norm": 1.5444136795270016, "learning_rate": 9.481380850538648e-05, "loss": 0.3537, "step": 2015 }, { "epoch": 0.17198430301996245, "grad_norm": 1.3514609767291486, "learning_rate": 9.480767977227383e-05, "loss": 0.3702, "step": 2016 }, { "epoch": 0.1720696126940795, "grad_norm": 1.5701079733585683, "learning_rate": 9.480154761833293e-05, "loss": 0.376, "step": 2017 }, { "epoch": 0.17215492236819654, "grad_norm": 1.3989171745208626, "learning_rate": 9.479541204403193e-05, "loss": 0.3647, "step": 2018 }, { "epoch": 0.1722402320423136, "grad_norm": 1.5828768507142232, "learning_rate": 9.47892730498393e-05, "loss": 0.3681, "step": 2019 }, { "epoch": 0.17232554171643064, "grad_norm": 1.5871240846598673, "learning_rate": 9.478313063622364e-05, "loss": 0.3513, "step": 2020 }, { "epoch": 0.17241085139054768, "grad_norm": 1.5942884545368605, "learning_rate": 9.477698480365395e-05, "loss": 0.3183, "step": 2021 }, { "epoch": 0.17249616106466473, "grad_norm": 1.6804868703350253, "learning_rate": 9.477083555259943e-05, "loss": 0.4235, "step": 2022 }, { "epoch": 0.17258147073878177, "grad_norm": 1.4513051111762538, "learning_rate": 9.476468288352951e-05, "loss": 0.3711, "step": 2023 }, { "epoch": 0.17266678041289882, "grad_norm": 1.8514665667311923, "learning_rate": 9.475852679691393e-05, "loss": 0.3978, "step": 2024 }, { "epoch": 0.17275209008701586, "grad_norm": 1.381366771405277, "learning_rate": 9.475236729322268e-05, "loss": 0.3465, "step": 2025 }, { "epoch": 0.1728373997611329, "grad_norm": 1.6029250141055684, "learning_rate": 9.4746204372926e-05, "loss": 0.3841, "step": 2026 }, { "epoch": 0.17292270943524995, "grad_norm": 1.8075624495780929, "learning_rate": 9.474003803649441e-05, "loss": 0.3854, "step": 2027 }, { "epoch": 0.173008019109367, "grad_norm": 1.2756690850272745, "learning_rate": 9.473386828439865e-05, "loss": 0.3345, "step": 2028 }, { "epoch": 0.17309332878348405, "grad_norm": 1.1411279588440137, "learning_rate": 9.472769511710976e-05, "loss": 0.3224, "step": 2029 }, { "epoch": 0.1731786384576011, "grad_norm": 1.5239623119470058, "learning_rate": 9.472151853509903e-05, "loss": 0.3448, "step": 2030 }, { "epoch": 0.17326394813171814, "grad_norm": 1.4543497810095276, "learning_rate": 9.471533853883803e-05, "loss": 0.3962, "step": 2031 }, { "epoch": 0.17334925780583518, "grad_norm": 1.0942399153895093, "learning_rate": 9.470915512879852e-05, "loss": 0.3495, "step": 2032 }, { "epoch": 0.17343456747995223, "grad_norm": 1.5380868675592425, "learning_rate": 9.470296830545263e-05, "loss": 0.367, "step": 2033 }, { "epoch": 0.17351987715406927, "grad_norm": 1.5500000492218995, "learning_rate": 9.469677806927264e-05, "loss": 0.4053, "step": 2034 }, { "epoch": 0.17360518682818632, "grad_norm": 1.3643432677803804, "learning_rate": 9.469058442073117e-05, "loss": 0.3702, "step": 2035 }, { "epoch": 0.17369049650230337, "grad_norm": 1.4953369138631982, "learning_rate": 9.468438736030107e-05, "loss": 0.3731, "step": 2036 }, { "epoch": 0.1737758061764204, "grad_norm": 1.6222766650802707, "learning_rate": 9.467818688845544e-05, "loss": 0.4022, "step": 2037 }, { "epoch": 0.17386111585053746, "grad_norm": 1.4310949516437115, "learning_rate": 9.467198300566766e-05, "loss": 0.3584, "step": 2038 }, { "epoch": 0.1739464255246545, "grad_norm": 1.5128947570413411, "learning_rate": 9.466577571241137e-05, "loss": 0.3676, "step": 2039 }, { "epoch": 0.17403173519877155, "grad_norm": 1.28269514134802, "learning_rate": 9.465956500916045e-05, "loss": 0.3512, "step": 2040 }, { "epoch": 0.1741170448728886, "grad_norm": 1.4903717977573707, "learning_rate": 9.465335089638907e-05, "loss": 0.4189, "step": 2041 }, { "epoch": 0.17420235454700564, "grad_norm": 1.4715974679722406, "learning_rate": 9.464713337457163e-05, "loss": 0.3491, "step": 2042 }, { "epoch": 0.17428766422112268, "grad_norm": 1.6515324902943336, "learning_rate": 9.464091244418282e-05, "loss": 0.3712, "step": 2043 }, { "epoch": 0.17437297389523973, "grad_norm": 1.5527998616546224, "learning_rate": 9.463468810569756e-05, "loss": 0.3701, "step": 2044 }, { "epoch": 0.17445828356935678, "grad_norm": 1.5692567075373618, "learning_rate": 9.462846035959105e-05, "loss": 0.332, "step": 2045 }, { "epoch": 0.17454359324347382, "grad_norm": 1.2344626866352923, "learning_rate": 9.462222920633875e-05, "loss": 0.293, "step": 2046 }, { "epoch": 0.17462890291759087, "grad_norm": 1.4235884853312164, "learning_rate": 9.461599464641638e-05, "loss": 0.3729, "step": 2047 }, { "epoch": 0.1747142125917079, "grad_norm": 1.539120126023015, "learning_rate": 9.46097566802999e-05, "loss": 0.3994, "step": 2048 }, { "epoch": 0.17479952226582496, "grad_norm": 1.5519772777122989, "learning_rate": 9.460351530846555e-05, "loss": 0.3629, "step": 2049 }, { "epoch": 0.17488483193994198, "grad_norm": 1.5066982757718506, "learning_rate": 9.459727053138983e-05, "loss": 0.4238, "step": 2050 }, { "epoch": 0.17497014161405902, "grad_norm": 1.4981909014960142, "learning_rate": 9.45910223495495e-05, "loss": 0.3609, "step": 2051 }, { "epoch": 0.17505545128817607, "grad_norm": 1.5607832060078062, "learning_rate": 9.458477076342157e-05, "loss": 0.3542, "step": 2052 }, { "epoch": 0.1751407609622931, "grad_norm": 1.4399378463789074, "learning_rate": 9.457851577348332e-05, "loss": 0.3303, "step": 2053 }, { "epoch": 0.17522607063641016, "grad_norm": 1.421849680245458, "learning_rate": 9.457225738021226e-05, "loss": 0.3257, "step": 2054 }, { "epoch": 0.1753113803105272, "grad_norm": 1.6233642855336392, "learning_rate": 9.456599558408623e-05, "loss": 0.3951, "step": 2055 }, { "epoch": 0.17539668998464425, "grad_norm": 1.6513918756932628, "learning_rate": 9.455973038558325e-05, "loss": 0.3689, "step": 2056 }, { "epoch": 0.1754819996587613, "grad_norm": 1.220989321918956, "learning_rate": 9.455346178518164e-05, "loss": 0.3397, "step": 2057 }, { "epoch": 0.17556730933287834, "grad_norm": 1.497401609547849, "learning_rate": 9.454718978336e-05, "loss": 0.3416, "step": 2058 }, { "epoch": 0.1756526190069954, "grad_norm": 1.4111848498190755, "learning_rate": 9.454091438059712e-05, "loss": 0.3689, "step": 2059 }, { "epoch": 0.17573792868111243, "grad_norm": 1.5393883994259243, "learning_rate": 9.453463557737212e-05, "loss": 0.383, "step": 2060 }, { "epoch": 0.17582323835522948, "grad_norm": 1.5863053219454233, "learning_rate": 9.452835337416436e-05, "loss": 0.3421, "step": 2061 }, { "epoch": 0.17590854802934652, "grad_norm": 1.4849595374557716, "learning_rate": 9.452206777145342e-05, "loss": 0.4045, "step": 2062 }, { "epoch": 0.17599385770346357, "grad_norm": 1.3534830813328118, "learning_rate": 9.451577876971923e-05, "loss": 0.3499, "step": 2063 }, { "epoch": 0.17607916737758061, "grad_norm": 1.320991386720471, "learning_rate": 9.450948636944189e-05, "loss": 0.3533, "step": 2064 }, { "epoch": 0.17616447705169766, "grad_norm": 1.3747770822330179, "learning_rate": 9.450319057110175e-05, "loss": 0.3804, "step": 2065 }, { "epoch": 0.1762497867258147, "grad_norm": 1.6433500637391831, "learning_rate": 9.449689137517952e-05, "loss": 0.4088, "step": 2066 }, { "epoch": 0.17633509639993175, "grad_norm": 1.6585678580118834, "learning_rate": 9.44905887821561e-05, "loss": 0.3656, "step": 2067 }, { "epoch": 0.1764204060740488, "grad_norm": 1.5215927300904266, "learning_rate": 9.448428279251263e-05, "loss": 0.3975, "step": 2068 }, { "epoch": 0.17650571574816584, "grad_norm": 1.4964576856224163, "learning_rate": 9.447797340673057e-05, "loss": 0.3704, "step": 2069 }, { "epoch": 0.1765910254222829, "grad_norm": 1.300121701486029, "learning_rate": 9.44716606252916e-05, "loss": 0.3597, "step": 2070 }, { "epoch": 0.17667633509639993, "grad_norm": 1.642412429054416, "learning_rate": 9.446534444867765e-05, "loss": 0.3929, "step": 2071 }, { "epoch": 0.17676164477051698, "grad_norm": 1.336617592261912, "learning_rate": 9.445902487737095e-05, "loss": 0.3536, "step": 2072 }, { "epoch": 0.17684695444463402, "grad_norm": 1.6734108688557747, "learning_rate": 9.445270191185395e-05, "loss": 0.3917, "step": 2073 }, { "epoch": 0.17693226411875107, "grad_norm": 1.4404381289477226, "learning_rate": 9.444637555260939e-05, "loss": 0.3902, "step": 2074 }, { "epoch": 0.17701757379286812, "grad_norm": 1.818853620480993, "learning_rate": 9.444004580012023e-05, "loss": 0.3898, "step": 2075 }, { "epoch": 0.17710288346698516, "grad_norm": 1.3661701886178708, "learning_rate": 9.443371265486975e-05, "loss": 0.3648, "step": 2076 }, { "epoch": 0.1771881931411022, "grad_norm": 1.494814173654145, "learning_rate": 9.442737611734141e-05, "loss": 0.3739, "step": 2077 }, { "epoch": 0.17727350281521925, "grad_norm": 1.5693050208188686, "learning_rate": 9.442103618801898e-05, "loss": 0.3588, "step": 2078 }, { "epoch": 0.1773588124893363, "grad_norm": 1.3477705312731325, "learning_rate": 9.44146928673865e-05, "loss": 0.3891, "step": 2079 }, { "epoch": 0.17744412216345334, "grad_norm": 1.2422893650345583, "learning_rate": 9.440834615592826e-05, "loss": 0.384, "step": 2080 }, { "epoch": 0.1775294318375704, "grad_norm": 1.59423880467545, "learning_rate": 9.440199605412876e-05, "loss": 0.3599, "step": 2081 }, { "epoch": 0.17761474151168744, "grad_norm": 1.4222475339372282, "learning_rate": 9.439564256247281e-05, "loss": 0.3146, "step": 2082 }, { "epoch": 0.17770005118580448, "grad_norm": 1.4042354989466956, "learning_rate": 9.438928568144547e-05, "loss": 0.3896, "step": 2083 }, { "epoch": 0.17778536085992153, "grad_norm": 1.3367744634846488, "learning_rate": 9.438292541153206e-05, "loss": 0.3764, "step": 2084 }, { "epoch": 0.17787067053403857, "grad_norm": 1.4161205173973368, "learning_rate": 9.437656175321814e-05, "loss": 0.3458, "step": 2085 }, { "epoch": 0.17795598020815562, "grad_norm": 1.5110771927268145, "learning_rate": 9.437019470698955e-05, "loss": 0.3885, "step": 2086 }, { "epoch": 0.17804128988227266, "grad_norm": 1.4148864874448854, "learning_rate": 9.436382427333237e-05, "loss": 0.3155, "step": 2087 }, { "epoch": 0.17812659955638968, "grad_norm": 1.4845637151317854, "learning_rate": 9.435745045273297e-05, "loss": 0.3409, "step": 2088 }, { "epoch": 0.17821190923050673, "grad_norm": 1.3240257730222846, "learning_rate": 9.435107324567793e-05, "loss": 0.3522, "step": 2089 }, { "epoch": 0.17829721890462377, "grad_norm": 1.722241548545989, "learning_rate": 9.434469265265414e-05, "loss": 0.45, "step": 2090 }, { "epoch": 0.17838252857874082, "grad_norm": 1.3724695676893526, "learning_rate": 9.43383086741487e-05, "loss": 0.3549, "step": 2091 }, { "epoch": 0.17846783825285786, "grad_norm": 1.394712206642833, "learning_rate": 9.4331921310649e-05, "loss": 0.3616, "step": 2092 }, { "epoch": 0.1785531479269749, "grad_norm": 1.2411428413349714, "learning_rate": 9.43255305626427e-05, "loss": 0.3517, "step": 2093 }, { "epoch": 0.17863845760109195, "grad_norm": 1.3951785511341113, "learning_rate": 9.431913643061769e-05, "loss": 0.3371, "step": 2094 }, { "epoch": 0.178723767275209, "grad_norm": 1.4440732849946718, "learning_rate": 9.431273891506213e-05, "loss": 0.3585, "step": 2095 }, { "epoch": 0.17880907694932605, "grad_norm": 1.8115916771387377, "learning_rate": 9.430633801646443e-05, "loss": 0.3651, "step": 2096 }, { "epoch": 0.1788943866234431, "grad_norm": 1.6581907597632306, "learning_rate": 9.429993373531326e-05, "loss": 0.4356, "step": 2097 }, { "epoch": 0.17897969629756014, "grad_norm": 1.5840583529994712, "learning_rate": 9.429352607209755e-05, "loss": 0.3336, "step": 2098 }, { "epoch": 0.17906500597167718, "grad_norm": 1.2813919733054788, "learning_rate": 9.42871150273065e-05, "loss": 0.3458, "step": 2099 }, { "epoch": 0.17915031564579423, "grad_norm": 1.3727324167743142, "learning_rate": 9.428070060142957e-05, "loss": 0.3531, "step": 2100 }, { "epoch": 0.17923562531991127, "grad_norm": 1.447708206356078, "learning_rate": 9.427428279495646e-05, "loss": 0.3894, "step": 2101 }, { "epoch": 0.17932093499402832, "grad_norm": 1.5490172993412459, "learning_rate": 9.426786160837713e-05, "loss": 0.3363, "step": 2102 }, { "epoch": 0.17940624466814536, "grad_norm": 1.1104377706405748, "learning_rate": 9.426143704218179e-05, "loss": 0.3146, "step": 2103 }, { "epoch": 0.1794915543422624, "grad_norm": 1.485189435288107, "learning_rate": 9.425500909686096e-05, "loss": 0.3475, "step": 2104 }, { "epoch": 0.17957686401637946, "grad_norm": 1.6547604466425663, "learning_rate": 9.424857777290535e-05, "loss": 0.4423, "step": 2105 }, { "epoch": 0.1796621736904965, "grad_norm": 1.471906800139159, "learning_rate": 9.424214307080594e-05, "loss": 0.3949, "step": 2106 }, { "epoch": 0.17974748336461355, "grad_norm": 1.5480482487909466, "learning_rate": 9.423570499105403e-05, "loss": 0.3809, "step": 2107 }, { "epoch": 0.1798327930387306, "grad_norm": 1.8391575920781056, "learning_rate": 9.42292635341411e-05, "loss": 0.3761, "step": 2108 }, { "epoch": 0.17991810271284764, "grad_norm": 1.3045157131010492, "learning_rate": 9.422281870055896e-05, "loss": 0.3591, "step": 2109 }, { "epoch": 0.18000341238696468, "grad_norm": 1.414797597097611, "learning_rate": 9.421637049079959e-05, "loss": 0.3394, "step": 2110 }, { "epoch": 0.18008872206108173, "grad_norm": 1.4198698220556956, "learning_rate": 9.42099189053553e-05, "loss": 0.3927, "step": 2111 }, { "epoch": 0.18017403173519878, "grad_norm": 1.3824440847888746, "learning_rate": 9.420346394471864e-05, "loss": 0.3225, "step": 2112 }, { "epoch": 0.18025934140931582, "grad_norm": 1.3125425967615674, "learning_rate": 9.41970056093824e-05, "loss": 0.337, "step": 2113 }, { "epoch": 0.18034465108343287, "grad_norm": 1.5574994751269586, "learning_rate": 9.419054389983964e-05, "loss": 0.3961, "step": 2114 }, { "epoch": 0.1804299607575499, "grad_norm": 1.3875609083811307, "learning_rate": 9.418407881658369e-05, "loss": 0.3983, "step": 2115 }, { "epoch": 0.18051527043166696, "grad_norm": 1.437856215417877, "learning_rate": 9.417761036010812e-05, "loss": 0.2842, "step": 2116 }, { "epoch": 0.180600580105784, "grad_norm": 1.598373438650538, "learning_rate": 9.417113853090675e-05, "loss": 0.4028, "step": 2117 }, { "epoch": 0.18068588977990105, "grad_norm": 1.4904622594898205, "learning_rate": 9.416466332947367e-05, "loss": 0.3647, "step": 2118 }, { "epoch": 0.1807711994540181, "grad_norm": 1.3115782907338411, "learning_rate": 9.415818475630325e-05, "loss": 0.3768, "step": 2119 }, { "epoch": 0.18085650912813514, "grad_norm": 1.445695651018829, "learning_rate": 9.415170281189008e-05, "loss": 0.3586, "step": 2120 }, { "epoch": 0.18094181880225219, "grad_norm": 1.4944029334699114, "learning_rate": 9.414521749672902e-05, "loss": 0.3716, "step": 2121 }, { "epoch": 0.18102712847636923, "grad_norm": 1.6235788439845715, "learning_rate": 9.413872881131518e-05, "loss": 0.4106, "step": 2122 }, { "epoch": 0.18111243815048628, "grad_norm": 1.6984889776295007, "learning_rate": 9.413223675614396e-05, "loss": 0.3243, "step": 2123 }, { "epoch": 0.18119774782460332, "grad_norm": 1.5669029000243948, "learning_rate": 9.412574133171098e-05, "loss": 0.4054, "step": 2124 }, { "epoch": 0.18128305749872037, "grad_norm": 1.3010638505659557, "learning_rate": 9.411924253851213e-05, "loss": 0.3081, "step": 2125 }, { "epoch": 0.1813683671728374, "grad_norm": 1.338344613895088, "learning_rate": 9.411274037704356e-05, "loss": 0.3428, "step": 2126 }, { "epoch": 0.18145367684695443, "grad_norm": 1.2930793830123053, "learning_rate": 9.410623484780168e-05, "loss": 0.3505, "step": 2127 }, { "epoch": 0.18153898652107148, "grad_norm": 1.2855123009287905, "learning_rate": 9.409972595128316e-05, "loss": 0.2899, "step": 2128 }, { "epoch": 0.18162429619518852, "grad_norm": 1.541247138943457, "learning_rate": 9.409321368798489e-05, "loss": 0.3499, "step": 2129 }, { "epoch": 0.18170960586930557, "grad_norm": 1.3737284676527821, "learning_rate": 9.408669805840408e-05, "loss": 0.3363, "step": 2130 }, { "epoch": 0.1817949155434226, "grad_norm": 1.157052560593279, "learning_rate": 9.408017906303815e-05, "loss": 0.3405, "step": 2131 }, { "epoch": 0.18188022521753966, "grad_norm": 1.3771218487330503, "learning_rate": 9.407365670238479e-05, "loss": 0.3623, "step": 2132 }, { "epoch": 0.1819655348916567, "grad_norm": 1.4806411360976093, "learning_rate": 9.406713097694194e-05, "loss": 0.3803, "step": 2133 }, { "epoch": 0.18205084456577375, "grad_norm": 1.4878778186185877, "learning_rate": 9.406060188720782e-05, "loss": 0.4114, "step": 2134 }, { "epoch": 0.1821361542398908, "grad_norm": 1.6643031846187255, "learning_rate": 9.405406943368088e-05, "loss": 0.3381, "step": 2135 }, { "epoch": 0.18222146391400784, "grad_norm": 1.4989149619749687, "learning_rate": 9.404753361685985e-05, "loss": 0.412, "step": 2136 }, { "epoch": 0.1823067735881249, "grad_norm": 1.6363975919947766, "learning_rate": 9.404099443724368e-05, "loss": 0.3683, "step": 2137 }, { "epoch": 0.18239208326224193, "grad_norm": 1.5354696250005457, "learning_rate": 9.403445189533163e-05, "loss": 0.3918, "step": 2138 }, { "epoch": 0.18247739293635898, "grad_norm": 1.4930830260517047, "learning_rate": 9.402790599162317e-05, "loss": 0.3395, "step": 2139 }, { "epoch": 0.18256270261047602, "grad_norm": 1.7124089571844419, "learning_rate": 9.402135672661807e-05, "loss": 0.3989, "step": 2140 }, { "epoch": 0.18264801228459307, "grad_norm": 1.393694201665393, "learning_rate": 9.401480410081628e-05, "loss": 0.3742, "step": 2141 }, { "epoch": 0.18273332195871012, "grad_norm": 1.5801664688442303, "learning_rate": 9.400824811471811e-05, "loss": 0.3799, "step": 2142 }, { "epoch": 0.18281863163282716, "grad_norm": 1.445702165192577, "learning_rate": 9.400168876882408e-05, "loss": 0.3071, "step": 2143 }, { "epoch": 0.1829039413069442, "grad_norm": 1.5127215546313584, "learning_rate": 9.39951260636349e-05, "loss": 0.3913, "step": 2144 }, { "epoch": 0.18298925098106125, "grad_norm": 1.3993782416881584, "learning_rate": 9.398855999965165e-05, "loss": 0.2814, "step": 2145 }, { "epoch": 0.1830745606551783, "grad_norm": 1.4639655570684424, "learning_rate": 9.39819905773756e-05, "loss": 0.3391, "step": 2146 }, { "epoch": 0.18315987032929534, "grad_norm": 1.5478789029278979, "learning_rate": 9.397541779730827e-05, "loss": 0.3648, "step": 2147 }, { "epoch": 0.1832451800034124, "grad_norm": 1.2935391461583383, "learning_rate": 9.39688416599515e-05, "loss": 0.346, "step": 2148 }, { "epoch": 0.18333048967752943, "grad_norm": 1.7089425218089342, "learning_rate": 9.396226216580733e-05, "loss": 0.3508, "step": 2149 }, { "epoch": 0.18341579935164648, "grad_norm": 1.7410165589458244, "learning_rate": 9.395567931537803e-05, "loss": 0.389, "step": 2150 }, { "epoch": 0.18350110902576353, "grad_norm": 1.5358552772967553, "learning_rate": 9.39490931091662e-05, "loss": 0.3672, "step": 2151 }, { "epoch": 0.18358641869988057, "grad_norm": 1.355734411584374, "learning_rate": 9.394250354767467e-05, "loss": 0.3654, "step": 2152 }, { "epoch": 0.18367172837399762, "grad_norm": 1.529648702595086, "learning_rate": 9.39359106314065e-05, "loss": 0.3595, "step": 2153 }, { "epoch": 0.18375703804811466, "grad_norm": 1.425578565430829, "learning_rate": 9.392931436086502e-05, "loss": 0.3452, "step": 2154 }, { "epoch": 0.1838423477222317, "grad_norm": 1.4587777232695847, "learning_rate": 9.392271473655384e-05, "loss": 0.3912, "step": 2155 }, { "epoch": 0.18392765739634875, "grad_norm": 1.567074982548948, "learning_rate": 9.391611175897677e-05, "loss": 0.3815, "step": 2156 }, { "epoch": 0.1840129670704658, "grad_norm": 1.431664522603879, "learning_rate": 9.390950542863797e-05, "loss": 0.3984, "step": 2157 }, { "epoch": 0.18409827674458284, "grad_norm": 1.6303692658017979, "learning_rate": 9.390289574604174e-05, "loss": 0.4146, "step": 2158 }, { "epoch": 0.1841835864186999, "grad_norm": 1.2965786606817444, "learning_rate": 9.389628271169273e-05, "loss": 0.3617, "step": 2159 }, { "epoch": 0.18426889609281694, "grad_norm": 1.3084062584317118, "learning_rate": 9.38896663260958e-05, "loss": 0.3291, "step": 2160 }, { "epoch": 0.18435420576693398, "grad_norm": 1.5872211992442011, "learning_rate": 9.388304658975608e-05, "loss": 0.3977, "step": 2161 }, { "epoch": 0.18443951544105103, "grad_norm": 1.5765956606747022, "learning_rate": 9.387642350317894e-05, "loss": 0.3735, "step": 2162 }, { "epoch": 0.18452482511516807, "grad_norm": 1.4109437386849026, "learning_rate": 9.386979706687002e-05, "loss": 0.3755, "step": 2163 }, { "epoch": 0.18461013478928512, "grad_norm": 1.4650148012110096, "learning_rate": 9.386316728133525e-05, "loss": 0.359, "step": 2164 }, { "epoch": 0.18469544446340214, "grad_norm": 1.7618344558646326, "learning_rate": 9.385653414708071e-05, "loss": 0.3785, "step": 2165 }, { "epoch": 0.18478075413751918, "grad_norm": 1.647283357954928, "learning_rate": 9.384989766461285e-05, "loss": 0.3604, "step": 2166 }, { "epoch": 0.18486606381163623, "grad_norm": 1.3735257828843872, "learning_rate": 9.384325783443832e-05, "loss": 0.3809, "step": 2167 }, { "epoch": 0.18495137348575327, "grad_norm": 1.4872361566255512, "learning_rate": 9.383661465706404e-05, "loss": 0.3398, "step": 2168 }, { "epoch": 0.18503668315987032, "grad_norm": 1.7257276216725197, "learning_rate": 9.382996813299718e-05, "loss": 0.4549, "step": 2169 }, { "epoch": 0.18512199283398736, "grad_norm": 1.3366115721068486, "learning_rate": 9.382331826274518e-05, "loss": 0.3456, "step": 2170 }, { "epoch": 0.1852073025081044, "grad_norm": 1.2593525053907115, "learning_rate": 9.381666504681568e-05, "loss": 0.3641, "step": 2171 }, { "epoch": 0.18529261218222146, "grad_norm": 1.4324761381708364, "learning_rate": 9.381000848571666e-05, "loss": 0.315, "step": 2172 }, { "epoch": 0.1853779218563385, "grad_norm": 1.495182805829612, "learning_rate": 9.380334857995629e-05, "loss": 0.3648, "step": 2173 }, { "epoch": 0.18546323153045555, "grad_norm": 1.41875366680999, "learning_rate": 9.379668533004305e-05, "loss": 0.3604, "step": 2174 }, { "epoch": 0.1855485412045726, "grad_norm": 1.373173367356437, "learning_rate": 9.379001873648558e-05, "loss": 0.3878, "step": 2175 }, { "epoch": 0.18563385087868964, "grad_norm": 2.003462654970958, "learning_rate": 9.378334879979292e-05, "loss": 0.3718, "step": 2176 }, { "epoch": 0.18571916055280668, "grad_norm": 1.3993321119911664, "learning_rate": 9.377667552047423e-05, "loss": 0.3185, "step": 2177 }, { "epoch": 0.18580447022692373, "grad_norm": 1.4873009203877523, "learning_rate": 9.3769998899039e-05, "loss": 0.3775, "step": 2178 }, { "epoch": 0.18588977990104077, "grad_norm": 1.3520451824916808, "learning_rate": 9.376331893599692e-05, "loss": 0.3273, "step": 2179 }, { "epoch": 0.18597508957515782, "grad_norm": 1.551514036837339, "learning_rate": 9.375663563185801e-05, "loss": 0.3918, "step": 2180 }, { "epoch": 0.18606039924927487, "grad_norm": 1.2727131567983454, "learning_rate": 9.37499489871325e-05, "loss": 0.3438, "step": 2181 }, { "epoch": 0.1861457089233919, "grad_norm": 1.303306808413153, "learning_rate": 9.374325900233088e-05, "loss": 0.3327, "step": 2182 }, { "epoch": 0.18623101859750896, "grad_norm": 1.5895341951400515, "learning_rate": 9.373656567796386e-05, "loss": 0.3733, "step": 2183 }, { "epoch": 0.186316328271626, "grad_norm": 1.5326550324075727, "learning_rate": 9.372986901454248e-05, "loss": 0.3716, "step": 2184 }, { "epoch": 0.18640163794574305, "grad_norm": 1.63110393586401, "learning_rate": 9.372316901257798e-05, "loss": 0.3583, "step": 2185 }, { "epoch": 0.1864869476198601, "grad_norm": 1.2894571336038494, "learning_rate": 9.371646567258187e-05, "loss": 0.3409, "step": 2186 }, { "epoch": 0.18657225729397714, "grad_norm": 1.408346605332417, "learning_rate": 9.370975899506593e-05, "loss": 0.3762, "step": 2187 }, { "epoch": 0.18665756696809419, "grad_norm": 1.199492780459368, "learning_rate": 9.370304898054214e-05, "loss": 0.3764, "step": 2188 }, { "epoch": 0.18674287664221123, "grad_norm": 1.5724477342613465, "learning_rate": 9.369633562952281e-05, "loss": 0.348, "step": 2189 }, { "epoch": 0.18682818631632828, "grad_norm": 1.6410030837602483, "learning_rate": 9.368961894252046e-05, "loss": 0.3808, "step": 2190 }, { "epoch": 0.18691349599044532, "grad_norm": 1.7254593721269822, "learning_rate": 9.368289892004787e-05, "loss": 0.3634, "step": 2191 }, { "epoch": 0.18699880566456237, "grad_norm": 1.7578556309812698, "learning_rate": 9.367617556261808e-05, "loss": 0.454, "step": 2192 }, { "epoch": 0.1870841153386794, "grad_norm": 1.4552788307992095, "learning_rate": 9.366944887074437e-05, "loss": 0.4177, "step": 2193 }, { "epoch": 0.18716942501279646, "grad_norm": 1.5195022227204193, "learning_rate": 9.36627188449403e-05, "loss": 0.3692, "step": 2194 }, { "epoch": 0.1872547346869135, "grad_norm": 1.4150306380227138, "learning_rate": 9.365598548571968e-05, "loss": 0.3718, "step": 2195 }, { "epoch": 0.18734004436103055, "grad_norm": 1.4432716601074742, "learning_rate": 9.364924879359653e-05, "loss": 0.4084, "step": 2196 }, { "epoch": 0.1874253540351476, "grad_norm": 1.6044435963902883, "learning_rate": 9.364250876908522e-05, "loss": 0.3872, "step": 2197 }, { "epoch": 0.18751066370926464, "grad_norm": 1.523986952072517, "learning_rate": 9.363576541270027e-05, "loss": 0.3796, "step": 2198 }, { "epoch": 0.1875959733833817, "grad_norm": 1.3611281041405867, "learning_rate": 9.36290187249565e-05, "loss": 0.3394, "step": 2199 }, { "epoch": 0.18768128305749873, "grad_norm": 1.373615261200611, "learning_rate": 9.362226870636901e-05, "loss": 0.3491, "step": 2200 }, { "epoch": 0.18776659273161578, "grad_norm": 1.7977803478817251, "learning_rate": 9.36155153574531e-05, "loss": 0.3543, "step": 2201 }, { "epoch": 0.18785190240573282, "grad_norm": 1.457056203838132, "learning_rate": 9.360875867872437e-05, "loss": 0.3634, "step": 2202 }, { "epoch": 0.18793721207984984, "grad_norm": 1.5420834862785329, "learning_rate": 9.360199867069866e-05, "loss": 0.3115, "step": 2203 }, { "epoch": 0.1880225217539669, "grad_norm": 1.6509241810423314, "learning_rate": 9.359523533389202e-05, "loss": 0.3631, "step": 2204 }, { "epoch": 0.18810783142808393, "grad_norm": 1.1786912665191398, "learning_rate": 9.358846866882087e-05, "loss": 0.3675, "step": 2205 }, { "epoch": 0.18819314110220098, "grad_norm": 1.5581130431351855, "learning_rate": 9.358169867600175e-05, "loss": 0.403, "step": 2206 }, { "epoch": 0.18827845077631802, "grad_norm": 1.9408307517746883, "learning_rate": 9.357492535595151e-05, "loss": 0.4274, "step": 2207 }, { "epoch": 0.18836376045043507, "grad_norm": 1.49519851234162, "learning_rate": 9.356814870918731e-05, "loss": 0.3581, "step": 2208 }, { "epoch": 0.18844907012455211, "grad_norm": 1.3354561814260906, "learning_rate": 9.356136873622646e-05, "loss": 0.3631, "step": 2209 }, { "epoch": 0.18853437979866916, "grad_norm": 1.4995080618035839, "learning_rate": 9.355458543758658e-05, "loss": 0.3268, "step": 2210 }, { "epoch": 0.1886196894727862, "grad_norm": 1.4457247584285118, "learning_rate": 9.354779881378558e-05, "loss": 0.378, "step": 2211 }, { "epoch": 0.18870499914690325, "grad_norm": 1.5184193443505163, "learning_rate": 9.354100886534152e-05, "loss": 0.3736, "step": 2212 }, { "epoch": 0.1887903088210203, "grad_norm": 1.3358071246861138, "learning_rate": 9.353421559277282e-05, "loss": 0.3429, "step": 2213 }, { "epoch": 0.18887561849513734, "grad_norm": 1.5624024932954723, "learning_rate": 9.352741899659812e-05, "loss": 0.4157, "step": 2214 }, { "epoch": 0.1889609281692544, "grad_norm": 1.505296099610858, "learning_rate": 9.352061907733626e-05, "loss": 0.342, "step": 2215 }, { "epoch": 0.18904623784337143, "grad_norm": 1.4332678317842633, "learning_rate": 9.351381583550641e-05, "loss": 0.3678, "step": 2216 }, { "epoch": 0.18913154751748848, "grad_norm": 1.5014155384435015, "learning_rate": 9.350700927162794e-05, "loss": 0.3531, "step": 2217 }, { "epoch": 0.18921685719160553, "grad_norm": 1.545740192962784, "learning_rate": 9.350019938622053e-05, "loss": 0.3215, "step": 2218 }, { "epoch": 0.18930216686572257, "grad_norm": 1.6665167105289307, "learning_rate": 9.349338617980406e-05, "loss": 0.3983, "step": 2219 }, { "epoch": 0.18938747653983962, "grad_norm": 1.399105828700447, "learning_rate": 9.348656965289866e-05, "loss": 0.3546, "step": 2220 }, { "epoch": 0.18947278621395666, "grad_norm": 1.6752595956536378, "learning_rate": 9.347974980602477e-05, "loss": 0.4113, "step": 2221 }, { "epoch": 0.1895580958880737, "grad_norm": 1.4692825812738146, "learning_rate": 9.347292663970301e-05, "loss": 0.3507, "step": 2222 }, { "epoch": 0.18964340556219075, "grad_norm": 1.4731086825218431, "learning_rate": 9.346610015445434e-05, "loss": 0.3737, "step": 2223 }, { "epoch": 0.1897287152363078, "grad_norm": 1.5145979085200825, "learning_rate": 9.34592703507999e-05, "loss": 0.3663, "step": 2224 }, { "epoch": 0.18981402491042484, "grad_norm": 1.4705448141419493, "learning_rate": 9.34524372292611e-05, "loss": 0.3163, "step": 2225 }, { "epoch": 0.1898993345845419, "grad_norm": 1.5000292457272657, "learning_rate": 9.344560079035962e-05, "loss": 0.3954, "step": 2226 }, { "epoch": 0.18998464425865894, "grad_norm": 1.5724212000863373, "learning_rate": 9.34387610346174e-05, "loss": 0.3823, "step": 2227 }, { "epoch": 0.19006995393277598, "grad_norm": 1.5927174533194512, "learning_rate": 9.343191796255659e-05, "loss": 0.3735, "step": 2228 }, { "epoch": 0.19015526360689303, "grad_norm": 1.6183968412835168, "learning_rate": 9.342507157469967e-05, "loss": 0.4035, "step": 2229 }, { "epoch": 0.19024057328101007, "grad_norm": 1.2012730997744432, "learning_rate": 9.341822187156927e-05, "loss": 0.3652, "step": 2230 }, { "epoch": 0.19032588295512712, "grad_norm": 1.3638431298391498, "learning_rate": 9.341136885368837e-05, "loss": 0.3878, "step": 2231 }, { "epoch": 0.19041119262924416, "grad_norm": 1.3892313799843345, "learning_rate": 9.340451252158015e-05, "loss": 0.3213, "step": 2232 }, { "epoch": 0.1904965023033612, "grad_norm": 1.432982935695377, "learning_rate": 9.339765287576803e-05, "loss": 0.4045, "step": 2233 }, { "epoch": 0.19058181197747825, "grad_norm": 1.2512502140193607, "learning_rate": 9.339078991677575e-05, "loss": 0.3411, "step": 2234 }, { "epoch": 0.1906671216515953, "grad_norm": 1.318841238617068, "learning_rate": 9.338392364512723e-05, "loss": 0.3582, "step": 2235 }, { "epoch": 0.19075243132571235, "grad_norm": 1.268469921448622, "learning_rate": 9.337705406134666e-05, "loss": 0.387, "step": 2236 }, { "epoch": 0.1908377409998294, "grad_norm": 1.504760499571868, "learning_rate": 9.337018116595855e-05, "loss": 0.3561, "step": 2237 }, { "epoch": 0.19092305067394644, "grad_norm": 1.5199935719705486, "learning_rate": 9.336330495948756e-05, "loss": 0.3202, "step": 2238 }, { "epoch": 0.19100836034806348, "grad_norm": 1.4702694926118651, "learning_rate": 9.335642544245868e-05, "loss": 0.3859, "step": 2239 }, { "epoch": 0.19109367002218053, "grad_norm": 1.6030246666041459, "learning_rate": 9.33495426153971e-05, "loss": 0.3624, "step": 2240 }, { "epoch": 0.19117897969629755, "grad_norm": 1.4297960584105134, "learning_rate": 9.334265647882832e-05, "loss": 0.3075, "step": 2241 }, { "epoch": 0.1912642893704146, "grad_norm": 1.9792376723017, "learning_rate": 9.333576703327803e-05, "loss": 0.458, "step": 2242 }, { "epoch": 0.19134959904453164, "grad_norm": 1.5524872973132797, "learning_rate": 9.33288742792722e-05, "loss": 0.3867, "step": 2243 }, { "epoch": 0.19143490871864868, "grad_norm": 1.554878874841924, "learning_rate": 9.332197821733709e-05, "loss": 0.3929, "step": 2244 }, { "epoch": 0.19152021839276573, "grad_norm": 1.3791080143530041, "learning_rate": 9.331507884799913e-05, "loss": 0.3306, "step": 2245 }, { "epoch": 0.19160552806688277, "grad_norm": 1.3852685356779497, "learning_rate": 9.33081761717851e-05, "loss": 0.3239, "step": 2246 }, { "epoch": 0.19169083774099982, "grad_norm": 1.6893479154605262, "learning_rate": 9.330127018922194e-05, "loss": 0.4364, "step": 2247 }, { "epoch": 0.19177614741511687, "grad_norm": 1.4136734738199892, "learning_rate": 9.32943609008369e-05, "loss": 0.5458, "step": 2248 }, { "epoch": 0.1918614570892339, "grad_norm": 1.4221519787316514, "learning_rate": 9.32874483071575e-05, "loss": 0.3603, "step": 2249 }, { "epoch": 0.19194676676335096, "grad_norm": 1.2785746858255433, "learning_rate": 9.328053240871143e-05, "loss": 0.3659, "step": 2250 }, { "epoch": 0.192032076437468, "grad_norm": 1.6306485525223582, "learning_rate": 9.32736132060267e-05, "loss": 0.4091, "step": 2251 }, { "epoch": 0.19211738611158505, "grad_norm": 1.618533693685873, "learning_rate": 9.326669069963156e-05, "loss": 0.3822, "step": 2252 }, { "epoch": 0.1922026957857021, "grad_norm": 1.4002077629694747, "learning_rate": 9.325976489005453e-05, "loss": 0.3517, "step": 2253 }, { "epoch": 0.19228800545981914, "grad_norm": 1.424235302718538, "learning_rate": 9.32528357778243e-05, "loss": 0.354, "step": 2254 }, { "epoch": 0.19237331513393618, "grad_norm": 1.5200246289416206, "learning_rate": 9.324590336346992e-05, "loss": 0.4086, "step": 2255 }, { "epoch": 0.19245862480805323, "grad_norm": 1.70091797502964, "learning_rate": 9.323896764752063e-05, "loss": 0.3881, "step": 2256 }, { "epoch": 0.19254393448217028, "grad_norm": 1.2797673648789567, "learning_rate": 9.32320286305059e-05, "loss": 0.3644, "step": 2257 }, { "epoch": 0.19262924415628732, "grad_norm": 1.2499744889517106, "learning_rate": 9.322508631295555e-05, "loss": 0.3437, "step": 2258 }, { "epoch": 0.19271455383040437, "grad_norm": 1.4458878480306283, "learning_rate": 9.321814069539956e-05, "loss": 0.3741, "step": 2259 }, { "epoch": 0.1927998635045214, "grad_norm": 1.563510034263625, "learning_rate": 9.321119177836818e-05, "loss": 0.4249, "step": 2260 }, { "epoch": 0.19288517317863846, "grad_norm": 1.6406023296424763, "learning_rate": 9.320423956239192e-05, "loss": 0.4165, "step": 2261 }, { "epoch": 0.1929704828527555, "grad_norm": 1.2329202119020621, "learning_rate": 9.319728404800157e-05, "loss": 0.3291, "step": 2262 }, { "epoch": 0.19305579252687255, "grad_norm": 1.4196721714005904, "learning_rate": 9.319032523572815e-05, "loss": 0.3694, "step": 2263 }, { "epoch": 0.1931411022009896, "grad_norm": 1.4222958120208644, "learning_rate": 9.318336312610288e-05, "loss": 0.3975, "step": 2264 }, { "epoch": 0.19322641187510664, "grad_norm": 1.6032017204264015, "learning_rate": 9.317639771965733e-05, "loss": 0.3825, "step": 2265 }, { "epoch": 0.1933117215492237, "grad_norm": 1.548322982479738, "learning_rate": 9.316942901692325e-05, "loss": 0.3468, "step": 2266 }, { "epoch": 0.19339703122334073, "grad_norm": 1.409380289816392, "learning_rate": 9.316245701843266e-05, "loss": 0.3585, "step": 2267 }, { "epoch": 0.19348234089745778, "grad_norm": 1.5559972105810271, "learning_rate": 9.315548172471784e-05, "loss": 0.3434, "step": 2268 }, { "epoch": 0.19356765057157482, "grad_norm": 1.419490953460277, "learning_rate": 9.314850313631132e-05, "loss": 0.3544, "step": 2269 }, { "epoch": 0.19365296024569187, "grad_norm": 1.4281344290323987, "learning_rate": 9.314152125374589e-05, "loss": 0.3498, "step": 2270 }, { "epoch": 0.19373826991980891, "grad_norm": 1.2336118246231662, "learning_rate": 9.313453607755456e-05, "loss": 0.3157, "step": 2271 }, { "epoch": 0.19382357959392596, "grad_norm": 1.6517035504382358, "learning_rate": 9.312754760827061e-05, "loss": 0.3758, "step": 2272 }, { "epoch": 0.193908889268043, "grad_norm": 1.5075425612987505, "learning_rate": 9.312055584642758e-05, "loss": 0.3606, "step": 2273 }, { "epoch": 0.19399419894216005, "grad_norm": 1.3461553646959397, "learning_rate": 9.311356079255927e-05, "loss": 0.3538, "step": 2274 }, { "epoch": 0.1940795086162771, "grad_norm": 1.6348539303500276, "learning_rate": 9.310656244719968e-05, "loss": 0.3733, "step": 2275 }, { "epoch": 0.19416481829039414, "grad_norm": 1.4791080391597355, "learning_rate": 9.309956081088311e-05, "loss": 0.2958, "step": 2276 }, { "epoch": 0.1942501279645112, "grad_norm": 1.1817183035547427, "learning_rate": 9.309255588414412e-05, "loss": 0.3963, "step": 2277 }, { "epoch": 0.19433543763862823, "grad_norm": 1.5003204798074032, "learning_rate": 9.308554766751746e-05, "loss": 0.3424, "step": 2278 }, { "epoch": 0.19442074731274528, "grad_norm": 1.2198601092979509, "learning_rate": 9.307853616153821e-05, "loss": 0.3674, "step": 2279 }, { "epoch": 0.1945060569868623, "grad_norm": 1.635872196325985, "learning_rate": 9.307152136674164e-05, "loss": 0.4394, "step": 2280 }, { "epoch": 0.19459136666097934, "grad_norm": 1.3336566840922235, "learning_rate": 9.30645032836633e-05, "loss": 0.3376, "step": 2281 }, { "epoch": 0.1946766763350964, "grad_norm": 1.6823028784536735, "learning_rate": 9.305748191283898e-05, "loss": 0.4106, "step": 2282 }, { "epoch": 0.19476198600921343, "grad_norm": 1.4543458465683683, "learning_rate": 9.305045725480472e-05, "loss": 0.3804, "step": 2283 }, { "epoch": 0.19484729568333048, "grad_norm": 1.3907681509339, "learning_rate": 9.304342931009681e-05, "loss": 0.3505, "step": 2284 }, { "epoch": 0.19493260535744752, "grad_norm": 1.2925809065326863, "learning_rate": 9.303639807925182e-05, "loss": 0.359, "step": 2285 }, { "epoch": 0.19501791503156457, "grad_norm": 1.2549704435751192, "learning_rate": 9.302936356280652e-05, "loss": 0.3676, "step": 2286 }, { "epoch": 0.19510322470568162, "grad_norm": 1.4138115928987298, "learning_rate": 9.302232576129797e-05, "loss": 0.3531, "step": 2287 }, { "epoch": 0.19518853437979866, "grad_norm": 1.4954502723866803, "learning_rate": 9.301528467526347e-05, "loss": 0.3227, "step": 2288 }, { "epoch": 0.1952738440539157, "grad_norm": 1.4703456246580011, "learning_rate": 9.300824030524056e-05, "loss": 0.3671, "step": 2289 }, { "epoch": 0.19535915372803275, "grad_norm": 1.3390425797430754, "learning_rate": 9.300119265176707e-05, "loss": 0.3272, "step": 2290 }, { "epoch": 0.1954444634021498, "grad_norm": 1.3558591780556613, "learning_rate": 9.2994141715381e-05, "loss": 0.3534, "step": 2291 }, { "epoch": 0.19552977307626684, "grad_norm": 1.467438457179841, "learning_rate": 9.29870874966207e-05, "loss": 0.3725, "step": 2292 }, { "epoch": 0.1956150827503839, "grad_norm": 1.2422625441045494, "learning_rate": 9.298002999602471e-05, "loss": 0.3343, "step": 2293 }, { "epoch": 0.19570039242450094, "grad_norm": 1.883607815120781, "learning_rate": 9.297296921413181e-05, "loss": 0.3307, "step": 2294 }, { "epoch": 0.19578570209861798, "grad_norm": 1.3451647297408802, "learning_rate": 9.296590515148109e-05, "loss": 0.3468, "step": 2295 }, { "epoch": 0.19587101177273503, "grad_norm": 1.498819999682742, "learning_rate": 9.295883780861181e-05, "loss": 0.3817, "step": 2296 }, { "epoch": 0.19595632144685207, "grad_norm": 1.5300277191343887, "learning_rate": 9.295176718606355e-05, "loss": 0.4012, "step": 2297 }, { "epoch": 0.19604163112096912, "grad_norm": 1.2790225135187772, "learning_rate": 9.294469328437611e-05, "loss": 0.3722, "step": 2298 }, { "epoch": 0.19612694079508616, "grad_norm": 1.6214818283130898, "learning_rate": 9.293761610408955e-05, "loss": 0.3161, "step": 2299 }, { "epoch": 0.1962122504692032, "grad_norm": 1.7537546750779216, "learning_rate": 9.293053564574417e-05, "loss": 0.3388, "step": 2300 }, { "epoch": 0.19629756014332025, "grad_norm": 1.3950341864468, "learning_rate": 9.292345190988052e-05, "loss": 0.3232, "step": 2301 }, { "epoch": 0.1963828698174373, "grad_norm": 1.7478235880000268, "learning_rate": 9.291636489703943e-05, "loss": 0.3552, "step": 2302 }, { "epoch": 0.19646817949155435, "grad_norm": 1.3931081653945223, "learning_rate": 9.290927460776192e-05, "loss": 0.3342, "step": 2303 }, { "epoch": 0.1965534891656714, "grad_norm": 1.4526469767232348, "learning_rate": 9.290218104258933e-05, "loss": 0.4003, "step": 2304 }, { "epoch": 0.19663879883978844, "grad_norm": 1.4881921776577116, "learning_rate": 9.289508420206318e-05, "loss": 0.3797, "step": 2305 }, { "epoch": 0.19672410851390548, "grad_norm": 1.5311839031045038, "learning_rate": 9.28879840867253e-05, "loss": 0.4065, "step": 2306 }, { "epoch": 0.19680941818802253, "grad_norm": 1.9519679189766073, "learning_rate": 9.288088069711774e-05, "loss": 0.3799, "step": 2307 }, { "epoch": 0.19689472786213957, "grad_norm": 1.2819079593790168, "learning_rate": 9.287377403378282e-05, "loss": 0.3385, "step": 2308 }, { "epoch": 0.19698003753625662, "grad_norm": 1.5458444572658565, "learning_rate": 9.286666409726306e-05, "loss": 0.3976, "step": 2309 }, { "epoch": 0.19706534721037366, "grad_norm": 1.7301012600497216, "learning_rate": 9.285955088810132e-05, "loss": 0.3706, "step": 2310 }, { "epoch": 0.1971506568844907, "grad_norm": 2.008388451522935, "learning_rate": 9.285243440684059e-05, "loss": 0.3606, "step": 2311 }, { "epoch": 0.19723596655860776, "grad_norm": 1.2754301617089234, "learning_rate": 9.284531465402424e-05, "loss": 0.3272, "step": 2312 }, { "epoch": 0.1973212762327248, "grad_norm": 1.3992524995466287, "learning_rate": 9.283819163019578e-05, "loss": 0.3242, "step": 2313 }, { "epoch": 0.19740658590684185, "grad_norm": 1.3961790377924257, "learning_rate": 9.283106533589905e-05, "loss": 0.3259, "step": 2314 }, { "epoch": 0.1974918955809589, "grad_norm": 1.4638170229897018, "learning_rate": 9.282393577167807e-05, "loss": 0.3479, "step": 2315 }, { "epoch": 0.19757720525507594, "grad_norm": 1.2925567431510063, "learning_rate": 9.281680293807717e-05, "loss": 0.331, "step": 2316 }, { "epoch": 0.19766251492919298, "grad_norm": 1.5788418491976428, "learning_rate": 9.280966683564088e-05, "loss": 0.353, "step": 2317 }, { "epoch": 0.19774782460331, "grad_norm": 1.5746486635258359, "learning_rate": 9.280252746491403e-05, "loss": 0.4146, "step": 2318 }, { "epoch": 0.19783313427742705, "grad_norm": 1.7053267919197717, "learning_rate": 9.279538482644165e-05, "loss": 0.3608, "step": 2319 }, { "epoch": 0.1979184439515441, "grad_norm": 1.26144817273652, "learning_rate": 9.278823892076907e-05, "loss": 0.347, "step": 2320 }, { "epoch": 0.19800375362566114, "grad_norm": 1.4518786592145927, "learning_rate": 9.27810897484418e-05, "loss": 0.3166, "step": 2321 }, { "epoch": 0.19808906329977818, "grad_norm": 1.4705256016800705, "learning_rate": 9.277393731000568e-05, "loss": 0.325, "step": 2322 }, { "epoch": 0.19817437297389523, "grad_norm": 1.956840216474636, "learning_rate": 9.276678160600674e-05, "loss": 0.37, "step": 2323 }, { "epoch": 0.19825968264801228, "grad_norm": 1.467770757311051, "learning_rate": 9.275962263699129e-05, "loss": 0.3457, "step": 2324 }, { "epoch": 0.19834499232212932, "grad_norm": 1.6781995703469232, "learning_rate": 9.27524604035059e-05, "loss": 0.3789, "step": 2325 }, { "epoch": 0.19843030199624637, "grad_norm": 1.3479296862972323, "learning_rate": 9.274529490609731e-05, "loss": 0.3201, "step": 2326 }, { "epoch": 0.1985156116703634, "grad_norm": 1.2808523724112388, "learning_rate": 9.273812614531261e-05, "loss": 0.3309, "step": 2327 }, { "epoch": 0.19860092134448046, "grad_norm": 1.3910286992483543, "learning_rate": 9.27309541216991e-05, "loss": 0.3577, "step": 2328 }, { "epoch": 0.1986862310185975, "grad_norm": 1.5129815083928717, "learning_rate": 9.27237788358043e-05, "loss": 0.3769, "step": 2329 }, { "epoch": 0.19877154069271455, "grad_norm": 1.5481838507433523, "learning_rate": 9.271660028817603e-05, "loss": 0.3169, "step": 2330 }, { "epoch": 0.1988568503668316, "grad_norm": 1.7907598181762547, "learning_rate": 9.270941847936232e-05, "loss": 0.3885, "step": 2331 }, { "epoch": 0.19894216004094864, "grad_norm": 1.697591489143861, "learning_rate": 9.270223340991147e-05, "loss": 0.3488, "step": 2332 }, { "epoch": 0.19902746971506569, "grad_norm": 1.5657103555220282, "learning_rate": 9.269504508037202e-05, "loss": 0.3835, "step": 2333 }, { "epoch": 0.19911277938918273, "grad_norm": 1.5097425688528965, "learning_rate": 9.268785349129277e-05, "loss": 0.3613, "step": 2334 }, { "epoch": 0.19919808906329978, "grad_norm": 1.402069234942463, "learning_rate": 9.268065864322273e-05, "loss": 0.3764, "step": 2335 }, { "epoch": 0.19928339873741682, "grad_norm": 1.317581240992137, "learning_rate": 9.267346053671121e-05, "loss": 0.3662, "step": 2336 }, { "epoch": 0.19936870841153387, "grad_norm": 1.6189338726937779, "learning_rate": 9.266625917230774e-05, "loss": 0.3905, "step": 2337 }, { "epoch": 0.1994540180856509, "grad_norm": 1.5180737877755566, "learning_rate": 9.265905455056211e-05, "loss": 0.4274, "step": 2338 }, { "epoch": 0.19953932775976796, "grad_norm": 1.256342152873683, "learning_rate": 9.265184667202438e-05, "loss": 0.3256, "step": 2339 }, { "epoch": 0.199624637433885, "grad_norm": 1.3220940482812145, "learning_rate": 9.264463553724478e-05, "loss": 0.3152, "step": 2340 }, { "epoch": 0.19970994710800205, "grad_norm": 1.401317592899641, "learning_rate": 9.26374211467739e-05, "loss": 0.312, "step": 2341 }, { "epoch": 0.1997952567821191, "grad_norm": 1.3401228722419425, "learning_rate": 9.263020350116247e-05, "loss": 0.3588, "step": 2342 }, { "epoch": 0.19988056645623614, "grad_norm": 1.5067110930844312, "learning_rate": 9.262298260096154e-05, "loss": 0.3576, "step": 2343 }, { "epoch": 0.1999658761303532, "grad_norm": 1.6814954387093624, "learning_rate": 9.261575844672238e-05, "loss": 0.3624, "step": 2344 }, { "epoch": 0.20005118580447023, "grad_norm": 1.6340045866604216, "learning_rate": 9.260853103899654e-05, "loss": 0.367, "step": 2345 }, { "epoch": 0.20013649547858728, "grad_norm": 1.794273948574453, "learning_rate": 9.260130037833576e-05, "loss": 0.4172, "step": 2346 }, { "epoch": 0.20022180515270432, "grad_norm": 1.8105131637907983, "learning_rate": 9.259406646529209e-05, "loss": 0.3642, "step": 2347 }, { "epoch": 0.20030711482682137, "grad_norm": 1.6027517973705752, "learning_rate": 9.258682930041778e-05, "loss": 0.3796, "step": 2348 }, { "epoch": 0.20039242450093842, "grad_norm": 1.5323204658476444, "learning_rate": 9.257958888426536e-05, "loss": 0.3905, "step": 2349 }, { "epoch": 0.20047773417505546, "grad_norm": 1.2793938752884548, "learning_rate": 9.257234521738762e-05, "loss": 0.3125, "step": 2350 }, { "epoch": 0.2005630438491725, "grad_norm": 1.2746416261446618, "learning_rate": 9.256509830033752e-05, "loss": 0.3758, "step": 2351 }, { "epoch": 0.20064835352328955, "grad_norm": 1.224199290263932, "learning_rate": 9.255784813366837e-05, "loss": 0.3486, "step": 2352 }, { "epoch": 0.2007336631974066, "grad_norm": 1.4243959987242454, "learning_rate": 9.255059471793369e-05, "loss": 0.357, "step": 2353 }, { "epoch": 0.20081897287152364, "grad_norm": 1.5610147665181908, "learning_rate": 9.254333805368717e-05, "loss": 0.3621, "step": 2354 }, { "epoch": 0.2009042825456407, "grad_norm": 1.3462698176491765, "learning_rate": 9.253607814148289e-05, "loss": 0.2918, "step": 2355 }, { "epoch": 0.2009895922197577, "grad_norm": 1.480816883231211, "learning_rate": 9.25288149818751e-05, "loss": 0.3488, "step": 2356 }, { "epoch": 0.20107490189387475, "grad_norm": 1.3245001749864451, "learning_rate": 9.252154857541825e-05, "loss": 0.3274, "step": 2357 }, { "epoch": 0.2011602115679918, "grad_norm": 1.1953710404565532, "learning_rate": 9.251427892266712e-05, "loss": 0.3374, "step": 2358 }, { "epoch": 0.20124552124210884, "grad_norm": 1.2888937174955077, "learning_rate": 9.250700602417675e-05, "loss": 0.3398, "step": 2359 }, { "epoch": 0.2013308309162259, "grad_norm": 1.3113029107109142, "learning_rate": 9.249972988050233e-05, "loss": 0.3122, "step": 2360 }, { "epoch": 0.20141614059034293, "grad_norm": 1.558359840834, "learning_rate": 9.249245049219939e-05, "loss": 0.3987, "step": 2361 }, { "epoch": 0.20150145026445998, "grad_norm": 1.3145612244275862, "learning_rate": 9.248516785982364e-05, "loss": 0.3118, "step": 2362 }, { "epoch": 0.20158675993857703, "grad_norm": 1.447267931319856, "learning_rate": 9.247788198393111e-05, "loss": 0.4013, "step": 2363 }, { "epoch": 0.20167206961269407, "grad_norm": 1.2464051051981606, "learning_rate": 9.2470592865078e-05, "loss": 0.3163, "step": 2364 }, { "epoch": 0.20175737928681112, "grad_norm": 1.2205837832287891, "learning_rate": 9.246330050382083e-05, "loss": 0.3386, "step": 2365 }, { "epoch": 0.20184268896092816, "grad_norm": 1.1774548757212486, "learning_rate": 9.24560049007163e-05, "loss": 0.3381, "step": 2366 }, { "epoch": 0.2019279986350452, "grad_norm": 1.5123659468585662, "learning_rate": 9.244870605632142e-05, "loss": 0.3401, "step": 2367 }, { "epoch": 0.20201330830916225, "grad_norm": 1.3951131851329, "learning_rate": 9.24414039711934e-05, "loss": 0.3295, "step": 2368 }, { "epoch": 0.2020986179832793, "grad_norm": 1.569914582352549, "learning_rate": 9.243409864588972e-05, "loss": 0.3742, "step": 2369 }, { "epoch": 0.20218392765739635, "grad_norm": 1.5652935233006344, "learning_rate": 9.242679008096811e-05, "loss": 0.3338, "step": 2370 }, { "epoch": 0.2022692373315134, "grad_norm": 1.6499858653301198, "learning_rate": 9.241947827698652e-05, "loss": 0.3592, "step": 2371 }, { "epoch": 0.20235454700563044, "grad_norm": 1.348653938188765, "learning_rate": 9.24121632345032e-05, "loss": 0.3276, "step": 2372 }, { "epoch": 0.20243985667974748, "grad_norm": 1.3848536023600049, "learning_rate": 9.240484495407657e-05, "loss": 0.3424, "step": 2373 }, { "epoch": 0.20252516635386453, "grad_norm": 1.4880061658813541, "learning_rate": 9.239752343626538e-05, "loss": 0.3835, "step": 2374 }, { "epoch": 0.20261047602798157, "grad_norm": 1.3965206968479016, "learning_rate": 9.239019868162856e-05, "loss": 0.3291, "step": 2375 }, { "epoch": 0.20269578570209862, "grad_norm": 1.7209146912980149, "learning_rate": 9.238287069072535e-05, "loss": 0.432, "step": 2376 }, { "epoch": 0.20278109537621566, "grad_norm": 1.440644184241334, "learning_rate": 9.237553946411519e-05, "loss": 0.3957, "step": 2377 }, { "epoch": 0.2028664050503327, "grad_norm": 1.3597275726157676, "learning_rate": 9.236820500235776e-05, "loss": 0.3441, "step": 2378 }, { "epoch": 0.20295171472444976, "grad_norm": 1.484846421722767, "learning_rate": 9.236086730601304e-05, "loss": 0.3628, "step": 2379 }, { "epoch": 0.2030370243985668, "grad_norm": 1.8558818156580406, "learning_rate": 9.235352637564118e-05, "loss": 0.395, "step": 2380 }, { "epoch": 0.20312233407268385, "grad_norm": 1.3348555617472397, "learning_rate": 9.234618221180267e-05, "loss": 0.3782, "step": 2381 }, { "epoch": 0.2032076437468009, "grad_norm": 1.3901689617462902, "learning_rate": 9.233883481505817e-05, "loss": 0.3259, "step": 2382 }, { "epoch": 0.20329295342091794, "grad_norm": 1.2862269338869552, "learning_rate": 9.233148418596862e-05, "loss": 0.362, "step": 2383 }, { "epoch": 0.20337826309503498, "grad_norm": 1.5459241931189127, "learning_rate": 9.23241303250952e-05, "loss": 0.4038, "step": 2384 }, { "epoch": 0.20346357276915203, "grad_norm": 1.4247934760632408, "learning_rate": 9.231677323299935e-05, "loss": 0.3988, "step": 2385 }, { "epoch": 0.20354888244326907, "grad_norm": 1.5112149288629535, "learning_rate": 9.230941291024273e-05, "loss": 0.3389, "step": 2386 }, { "epoch": 0.20363419211738612, "grad_norm": 1.6065335420537519, "learning_rate": 9.230204935738725e-05, "loss": 0.3707, "step": 2387 }, { "epoch": 0.20371950179150317, "grad_norm": 1.517922930533949, "learning_rate": 9.229468257499511e-05, "loss": 0.3365, "step": 2388 }, { "epoch": 0.2038048114656202, "grad_norm": 1.3760548793361493, "learning_rate": 9.22873125636287e-05, "loss": 0.359, "step": 2389 }, { "epoch": 0.20389012113973726, "grad_norm": 1.5267688030048883, "learning_rate": 9.227993932385069e-05, "loss": 0.4021, "step": 2390 }, { "epoch": 0.2039754308138543, "grad_norm": 1.3188660050845482, "learning_rate": 9.2272562856224e-05, "loss": 0.358, "step": 2391 }, { "epoch": 0.20406074048797135, "grad_norm": 1.4464959325696958, "learning_rate": 9.226518316131176e-05, "loss": 0.3016, "step": 2392 }, { "epoch": 0.2041460501620884, "grad_norm": 1.4035986595743148, "learning_rate": 9.22578002396774e-05, "loss": 0.3583, "step": 2393 }, { "epoch": 0.20423135983620544, "grad_norm": 1.347768408489516, "learning_rate": 9.225041409188453e-05, "loss": 0.3327, "step": 2394 }, { "epoch": 0.20431666951032246, "grad_norm": 1.3565403113388232, "learning_rate": 9.224302471849707e-05, "loss": 0.3608, "step": 2395 }, { "epoch": 0.2044019791844395, "grad_norm": 1.366081487973002, "learning_rate": 9.223563212007915e-05, "loss": 0.3553, "step": 2396 }, { "epoch": 0.20448728885855655, "grad_norm": 1.4130474683775431, "learning_rate": 9.222823629719516e-05, "loss": 0.3784, "step": 2397 }, { "epoch": 0.2045725985326736, "grad_norm": 1.5195680432412895, "learning_rate": 9.222083725040973e-05, "loss": 0.3737, "step": 2398 }, { "epoch": 0.20465790820679064, "grad_norm": 1.2980811703169588, "learning_rate": 9.221343498028774e-05, "loss": 0.3557, "step": 2399 }, { "epoch": 0.20474321788090769, "grad_norm": 1.618125606579481, "learning_rate": 9.22060294873943e-05, "loss": 0.3267, "step": 2400 }, { "epoch": 0.20482852755502473, "grad_norm": 1.9377799293353495, "learning_rate": 9.21986207722948e-05, "loss": 0.4057, "step": 2401 }, { "epoch": 0.20491383722914178, "grad_norm": 1.8917022662517073, "learning_rate": 9.219120883555486e-05, "loss": 0.3746, "step": 2402 }, { "epoch": 0.20499914690325882, "grad_norm": 1.961337956728868, "learning_rate": 9.21837936777403e-05, "loss": 0.3825, "step": 2403 }, { "epoch": 0.20508445657737587, "grad_norm": 1.9520093249035717, "learning_rate": 9.217637529941727e-05, "loss": 0.3644, "step": 2404 }, { "epoch": 0.2051697662514929, "grad_norm": 1.3094727709554865, "learning_rate": 9.216895370115211e-05, "loss": 0.3113, "step": 2405 }, { "epoch": 0.20525507592560996, "grad_norm": 1.2992544292016988, "learning_rate": 9.216152888351144e-05, "loss": 0.3544, "step": 2406 }, { "epoch": 0.205340385599727, "grad_norm": 1.6388924305908095, "learning_rate": 9.215410084706206e-05, "loss": 0.3792, "step": 2407 }, { "epoch": 0.20542569527384405, "grad_norm": 1.477758901797391, "learning_rate": 9.214666959237109e-05, "loss": 0.3704, "step": 2408 }, { "epoch": 0.2055110049479611, "grad_norm": 1.403383045214586, "learning_rate": 9.213923512000588e-05, "loss": 0.3781, "step": 2409 }, { "epoch": 0.20559631462207814, "grad_norm": 1.2863063130410417, "learning_rate": 9.213179743053399e-05, "loss": 0.3683, "step": 2410 }, { "epoch": 0.2056816242961952, "grad_norm": 1.5397853025417485, "learning_rate": 9.212435652452324e-05, "loss": 0.3352, "step": 2411 }, { "epoch": 0.20576693397031223, "grad_norm": 1.5513416882267979, "learning_rate": 9.211691240254173e-05, "loss": 0.3259, "step": 2412 }, { "epoch": 0.20585224364442928, "grad_norm": 1.1969862047131878, "learning_rate": 9.210946506515777e-05, "loss": 0.3188, "step": 2413 }, { "epoch": 0.20593755331854632, "grad_norm": 1.778951868972562, "learning_rate": 9.210201451293992e-05, "loss": 0.4078, "step": 2414 }, { "epoch": 0.20602286299266337, "grad_norm": 1.4799737832608402, "learning_rate": 9.209456074645699e-05, "loss": 0.3627, "step": 2415 }, { "epoch": 0.20610817266678041, "grad_norm": 1.3096378089982557, "learning_rate": 9.208710376627803e-05, "loss": 0.3501, "step": 2416 }, { "epoch": 0.20619348234089746, "grad_norm": 1.2827926281076356, "learning_rate": 9.207964357297235e-05, "loss": 0.3354, "step": 2417 }, { "epoch": 0.2062787920150145, "grad_norm": 1.3862663283216288, "learning_rate": 9.20721801671095e-05, "loss": 0.3067, "step": 2418 }, { "epoch": 0.20636410168913155, "grad_norm": 1.2931146913894684, "learning_rate": 9.206471354925928e-05, "loss": 0.3876, "step": 2419 }, { "epoch": 0.2064494113632486, "grad_norm": 1.3959073668627056, "learning_rate": 9.205724371999169e-05, "loss": 0.4102, "step": 2420 }, { "epoch": 0.20653472103736564, "grad_norm": 1.736274709834672, "learning_rate": 9.204977067987704e-05, "loss": 0.4128, "step": 2421 }, { "epoch": 0.2066200307114827, "grad_norm": 1.5333225471006633, "learning_rate": 9.204229442948585e-05, "loss": 0.39, "step": 2422 }, { "epoch": 0.20670534038559973, "grad_norm": 1.4427269155175517, "learning_rate": 9.203481496938888e-05, "loss": 0.3523, "step": 2423 }, { "epoch": 0.20679065005971678, "grad_norm": 1.4028196244665427, "learning_rate": 9.202733230015719e-05, "loss": 0.3261, "step": 2424 }, { "epoch": 0.20687595973383383, "grad_norm": 1.8118557936146182, "learning_rate": 9.201984642236198e-05, "loss": 0.3771, "step": 2425 }, { "epoch": 0.20696126940795087, "grad_norm": 1.2188137233409726, "learning_rate": 9.201235733657481e-05, "loss": 0.317, "step": 2426 }, { "epoch": 0.20704657908206792, "grad_norm": 1.6089512535662909, "learning_rate": 9.20048650433674e-05, "loss": 0.3577, "step": 2427 }, { "epoch": 0.20713188875618496, "grad_norm": 1.6002979031034716, "learning_rate": 9.199736954331177e-05, "loss": 0.3791, "step": 2428 }, { "epoch": 0.207217198430302, "grad_norm": 1.5744622659822474, "learning_rate": 9.198987083698014e-05, "loss": 0.4074, "step": 2429 }, { "epoch": 0.20730250810441905, "grad_norm": 1.7341199120239505, "learning_rate": 9.198236892494501e-05, "loss": 0.4313, "step": 2430 }, { "epoch": 0.2073878177785361, "grad_norm": 1.432783599528768, "learning_rate": 9.197486380777911e-05, "loss": 0.3215, "step": 2431 }, { "epoch": 0.20747312745265314, "grad_norm": 1.6953345793943118, "learning_rate": 9.196735548605541e-05, "loss": 0.3083, "step": 2432 }, { "epoch": 0.20755843712677016, "grad_norm": 1.770853521194574, "learning_rate": 9.195984396034713e-05, "loss": 0.3769, "step": 2433 }, { "epoch": 0.2076437468008872, "grad_norm": 1.4495754574112816, "learning_rate": 9.195232923122773e-05, "loss": 0.3703, "step": 2434 }, { "epoch": 0.20772905647500425, "grad_norm": 1.4033545036636987, "learning_rate": 9.194481129927093e-05, "loss": 0.3034, "step": 2435 }, { "epoch": 0.2078143661491213, "grad_norm": 1.5077834052406667, "learning_rate": 9.193729016505069e-05, "loss": 0.3283, "step": 2436 }, { "epoch": 0.20789967582323834, "grad_norm": 1.7840246040247043, "learning_rate": 9.19297658291412e-05, "loss": 0.3585, "step": 2437 }, { "epoch": 0.2079849854973554, "grad_norm": 1.3880865953432335, "learning_rate": 9.19222382921169e-05, "loss": 0.3418, "step": 2438 }, { "epoch": 0.20807029517147244, "grad_norm": 1.6402695043608033, "learning_rate": 9.191470755455248e-05, "loss": 0.4165, "step": 2439 }, { "epoch": 0.20815560484558948, "grad_norm": 1.635829347024894, "learning_rate": 9.190717361702288e-05, "loss": 0.3806, "step": 2440 }, { "epoch": 0.20824091451970653, "grad_norm": 1.5357012764009006, "learning_rate": 9.189963648010326e-05, "loss": 0.392, "step": 2441 }, { "epoch": 0.20832622419382357, "grad_norm": 1.346802902947275, "learning_rate": 9.189209614436906e-05, "loss": 0.3455, "step": 2442 }, { "epoch": 0.20841153386794062, "grad_norm": 1.4938792280536783, "learning_rate": 9.188455261039592e-05, "loss": 0.3607, "step": 2443 }, { "epoch": 0.20849684354205766, "grad_norm": 1.5108354535993598, "learning_rate": 9.187700587875977e-05, "loss": 0.3445, "step": 2444 }, { "epoch": 0.2085821532161747, "grad_norm": 1.2687204442081628, "learning_rate": 9.186945595003676e-05, "loss": 0.3389, "step": 2445 }, { "epoch": 0.20866746289029175, "grad_norm": 1.8302418768871445, "learning_rate": 9.186190282480327e-05, "loss": 0.4075, "step": 2446 }, { "epoch": 0.2087527725644088, "grad_norm": 1.3374360363362778, "learning_rate": 9.185434650363596e-05, "loss": 0.3885, "step": 2447 }, { "epoch": 0.20883808223852585, "grad_norm": 1.3834288349660078, "learning_rate": 9.184678698711171e-05, "loss": 0.3492, "step": 2448 }, { "epoch": 0.2089233919126429, "grad_norm": 1.3524652013338798, "learning_rate": 9.183922427580764e-05, "loss": 0.3192, "step": 2449 }, { "epoch": 0.20900870158675994, "grad_norm": 1.6392876396797145, "learning_rate": 9.183165837030114e-05, "loss": 0.3344, "step": 2450 }, { "epoch": 0.20909401126087698, "grad_norm": 1.2094839416642693, "learning_rate": 9.182408927116981e-05, "loss": 0.2738, "step": 2451 }, { "epoch": 0.20917932093499403, "grad_norm": 1.3544766095839507, "learning_rate": 9.181651697899152e-05, "loss": 0.3696, "step": 2452 }, { "epoch": 0.20926463060911107, "grad_norm": 1.5317115088212574, "learning_rate": 9.180894149434437e-05, "loss": 0.3525, "step": 2453 }, { "epoch": 0.20934994028322812, "grad_norm": 1.217603682346766, "learning_rate": 9.18013628178067e-05, "loss": 0.3335, "step": 2454 }, { "epoch": 0.20943524995734517, "grad_norm": 1.402362408952185, "learning_rate": 9.179378094995712e-05, "loss": 0.2805, "step": 2455 }, { "epoch": 0.2095205596314622, "grad_norm": 1.4270468751283107, "learning_rate": 9.178619589137447e-05, "loss": 0.3314, "step": 2456 }, { "epoch": 0.20960586930557926, "grad_norm": 1.3681844945303623, "learning_rate": 9.177860764263779e-05, "loss": 0.3345, "step": 2457 }, { "epoch": 0.2096911789796963, "grad_norm": 1.233459132884068, "learning_rate": 9.177101620432644e-05, "loss": 0.3139, "step": 2458 }, { "epoch": 0.20977648865381335, "grad_norm": 1.1838493448884542, "learning_rate": 9.176342157701998e-05, "loss": 0.275, "step": 2459 }, { "epoch": 0.2098617983279304, "grad_norm": 1.9551354398910248, "learning_rate": 9.17558237612982e-05, "loss": 0.3957, "step": 2460 }, { "epoch": 0.20994710800204744, "grad_norm": 1.4424489289602913, "learning_rate": 9.174822275774117e-05, "loss": 0.3392, "step": 2461 }, { "epoch": 0.21003241767616448, "grad_norm": 1.3569329787860698, "learning_rate": 9.174061856692919e-05, "loss": 0.3479, "step": 2462 }, { "epoch": 0.21011772735028153, "grad_norm": 1.4175475785009273, "learning_rate": 9.17330111894428e-05, "loss": 0.2839, "step": 2463 }, { "epoch": 0.21020303702439858, "grad_norm": 1.6319628973745028, "learning_rate": 9.172540062586276e-05, "loss": 0.351, "step": 2464 }, { "epoch": 0.21028834669851562, "grad_norm": 1.6420704604816077, "learning_rate": 9.171778687677011e-05, "loss": 0.3737, "step": 2465 }, { "epoch": 0.21037365637263267, "grad_norm": 1.3499283806912148, "learning_rate": 9.171016994274612e-05, "loss": 0.3417, "step": 2466 }, { "epoch": 0.2104589660467497, "grad_norm": 1.3960132577807627, "learning_rate": 9.17025498243723e-05, "loss": 0.3548, "step": 2467 }, { "epoch": 0.21054427572086676, "grad_norm": 1.2562467888178992, "learning_rate": 9.169492652223042e-05, "loss": 0.3924, "step": 2468 }, { "epoch": 0.2106295853949838, "grad_norm": 1.66581554933221, "learning_rate": 9.168730003690246e-05, "loss": 0.3975, "step": 2469 }, { "epoch": 0.21071489506910085, "grad_norm": 1.443450883537793, "learning_rate": 9.167967036897066e-05, "loss": 0.3249, "step": 2470 }, { "epoch": 0.21080020474321787, "grad_norm": 1.4607237797227726, "learning_rate": 9.167203751901751e-05, "loss": 0.3133, "step": 2471 }, { "epoch": 0.2108855144173349, "grad_norm": 1.5363266538239047, "learning_rate": 9.166440148762576e-05, "loss": 0.4093, "step": 2472 }, { "epoch": 0.21097082409145196, "grad_norm": 1.120903131847718, "learning_rate": 9.165676227537836e-05, "loss": 0.3392, "step": 2473 }, { "epoch": 0.211056133765569, "grad_norm": 1.5908269068310223, "learning_rate": 9.164911988285851e-05, "loss": 0.3959, "step": 2474 }, { "epoch": 0.21114144343968605, "grad_norm": 1.301595759338922, "learning_rate": 9.164147431064969e-05, "loss": 0.3281, "step": 2475 }, { "epoch": 0.2112267531138031, "grad_norm": 1.357941759846635, "learning_rate": 9.163382555933558e-05, "loss": 0.3694, "step": 2476 }, { "epoch": 0.21131206278792014, "grad_norm": 1.6576855753813486, "learning_rate": 9.162617362950015e-05, "loss": 0.3498, "step": 2477 }, { "epoch": 0.2113973724620372, "grad_norm": 1.8114653955913838, "learning_rate": 9.161851852172754e-05, "loss": 0.3919, "step": 2478 }, { "epoch": 0.21148268213615423, "grad_norm": 1.6120664442486878, "learning_rate": 9.161086023660222e-05, "loss": 0.3803, "step": 2479 }, { "epoch": 0.21156799181027128, "grad_norm": 1.8410586094102739, "learning_rate": 9.160319877470882e-05, "loss": 0.3454, "step": 2480 }, { "epoch": 0.21165330148438832, "grad_norm": 1.806276356857873, "learning_rate": 9.159553413663228e-05, "loss": 0.3742, "step": 2481 }, { "epoch": 0.21173861115850537, "grad_norm": 1.4551284269304494, "learning_rate": 9.158786632295776e-05, "loss": 0.3494, "step": 2482 }, { "epoch": 0.21182392083262241, "grad_norm": 1.3178371723299727, "learning_rate": 9.158019533427064e-05, "loss": 0.4202, "step": 2483 }, { "epoch": 0.21190923050673946, "grad_norm": 1.5383646975812602, "learning_rate": 9.157252117115656e-05, "loss": 0.3999, "step": 2484 }, { "epoch": 0.2119945401808565, "grad_norm": 1.3869244060337473, "learning_rate": 9.156484383420141e-05, "loss": 0.3403, "step": 2485 }, { "epoch": 0.21207984985497355, "grad_norm": 1.2462334151011, "learning_rate": 9.155716332399129e-05, "loss": 0.3171, "step": 2486 }, { "epoch": 0.2121651595290906, "grad_norm": 1.5203456706459728, "learning_rate": 9.15494796411126e-05, "loss": 0.3649, "step": 2487 }, { "epoch": 0.21225046920320764, "grad_norm": 1.366121061475942, "learning_rate": 9.154179278615194e-05, "loss": 0.3377, "step": 2488 }, { "epoch": 0.2123357788773247, "grad_norm": 1.3742542845624697, "learning_rate": 9.153410275969613e-05, "loss": 0.3626, "step": 2489 }, { "epoch": 0.21242108855144173, "grad_norm": 1.380569880893272, "learning_rate": 9.152640956233231e-05, "loss": 0.3308, "step": 2490 }, { "epoch": 0.21250639822555878, "grad_norm": 1.2961342316066142, "learning_rate": 9.151871319464778e-05, "loss": 0.3153, "step": 2491 }, { "epoch": 0.21259170789967582, "grad_norm": 1.4982626708570794, "learning_rate": 9.151101365723013e-05, "loss": 0.3609, "step": 2492 }, { "epoch": 0.21267701757379287, "grad_norm": 1.5771080744157806, "learning_rate": 9.150331095066717e-05, "loss": 0.3932, "step": 2493 }, { "epoch": 0.21276232724790992, "grad_norm": 1.4456983721329515, "learning_rate": 9.149560507554698e-05, "loss": 0.3726, "step": 2494 }, { "epoch": 0.21284763692202696, "grad_norm": 1.3615368719974474, "learning_rate": 9.148789603245784e-05, "loss": 0.3273, "step": 2495 }, { "epoch": 0.212932946596144, "grad_norm": 1.6606658512441705, "learning_rate": 9.148018382198831e-05, "loss": 0.3427, "step": 2496 }, { "epoch": 0.21301825627026105, "grad_norm": 1.2023470456559668, "learning_rate": 9.147246844472716e-05, "loss": 0.3466, "step": 2497 }, { "epoch": 0.2131035659443781, "grad_norm": 1.5191862602177038, "learning_rate": 9.146474990126343e-05, "loss": 0.3446, "step": 2498 }, { "epoch": 0.21318887561849514, "grad_norm": 1.5001357335031738, "learning_rate": 9.14570281921864e-05, "loss": 0.3165, "step": 2499 }, { "epoch": 0.2132741852926122, "grad_norm": 1.319026795034846, "learning_rate": 9.144930331808557e-05, "loss": 0.3172, "step": 2500 }, { "epoch": 0.21335949496672924, "grad_norm": 1.4017255690343615, "learning_rate": 9.144157527955069e-05, "loss": 0.3085, "step": 2501 }, { "epoch": 0.21344480464084628, "grad_norm": 1.6386155674641294, "learning_rate": 9.143384407717175e-05, "loss": 0.3967, "step": 2502 }, { "epoch": 0.21353011431496333, "grad_norm": 1.7829661885648391, "learning_rate": 9.1426109711539e-05, "loss": 0.3607, "step": 2503 }, { "epoch": 0.21361542398908037, "grad_norm": 1.5527215537255643, "learning_rate": 9.141837218324292e-05, "loss": 0.3833, "step": 2504 }, { "epoch": 0.21370073366319742, "grad_norm": 1.6036107805201332, "learning_rate": 9.141063149287421e-05, "loss": 0.3673, "step": 2505 }, { "epoch": 0.21378604333731446, "grad_norm": 1.4137258392347143, "learning_rate": 9.140288764102384e-05, "loss": 0.3845, "step": 2506 }, { "epoch": 0.2138713530114315, "grad_norm": 1.6888205694649085, "learning_rate": 9.1395140628283e-05, "loss": 0.4281, "step": 2507 }, { "epoch": 0.21395666268554855, "grad_norm": 1.474561273661873, "learning_rate": 9.138739045524318e-05, "loss": 0.3614, "step": 2508 }, { "epoch": 0.2140419723596656, "grad_norm": 1.4979423078624645, "learning_rate": 9.1379637122496e-05, "loss": 0.3948, "step": 2509 }, { "epoch": 0.21412728203378262, "grad_norm": 1.250603339500001, "learning_rate": 9.137188063063344e-05, "loss": 0.34, "step": 2510 }, { "epoch": 0.21421259170789966, "grad_norm": 1.6484105980291512, "learning_rate": 9.136412098024763e-05, "loss": 0.4213, "step": 2511 }, { "epoch": 0.2142979013820167, "grad_norm": 1.3052067437229489, "learning_rate": 9.1356358171931e-05, "loss": 0.3253, "step": 2512 }, { "epoch": 0.21438321105613375, "grad_norm": 1.5268745189389608, "learning_rate": 9.134859220627618e-05, "loss": 0.3097, "step": 2513 }, { "epoch": 0.2144685207302508, "grad_norm": 1.2927224653620677, "learning_rate": 9.134082308387608e-05, "loss": 0.3303, "step": 2514 }, { "epoch": 0.21455383040436785, "grad_norm": 1.4240128092564734, "learning_rate": 9.133305080532384e-05, "loss": 0.3232, "step": 2515 }, { "epoch": 0.2146391400784849, "grad_norm": 1.4428561395428527, "learning_rate": 9.132527537121278e-05, "loss": 0.319, "step": 2516 }, { "epoch": 0.21472444975260194, "grad_norm": 1.19223678906945, "learning_rate": 9.131749678213657e-05, "loss": 0.3215, "step": 2517 }, { "epoch": 0.21480975942671898, "grad_norm": 1.9645781138388863, "learning_rate": 9.130971503868904e-05, "loss": 0.3984, "step": 2518 }, { "epoch": 0.21489506910083603, "grad_norm": 1.2212422637538238, "learning_rate": 9.130193014146427e-05, "loss": 0.4047, "step": 2519 }, { "epoch": 0.21498037877495307, "grad_norm": 1.5302571561783445, "learning_rate": 9.129414209105664e-05, "loss": 0.3196, "step": 2520 }, { "epoch": 0.21506568844907012, "grad_norm": 1.4244108956648431, "learning_rate": 9.128635088806068e-05, "loss": 0.352, "step": 2521 }, { "epoch": 0.21515099812318716, "grad_norm": 1.5373189121693727, "learning_rate": 9.127855653307123e-05, "loss": 0.3442, "step": 2522 }, { "epoch": 0.2152363077973042, "grad_norm": 1.5374218866617808, "learning_rate": 9.127075902668333e-05, "loss": 0.3952, "step": 2523 }, { "epoch": 0.21532161747142126, "grad_norm": 1.6697366890648735, "learning_rate": 9.126295836949231e-05, "loss": 0.3763, "step": 2524 }, { "epoch": 0.2154069271455383, "grad_norm": 1.4681040378209198, "learning_rate": 9.125515456209367e-05, "loss": 0.3195, "step": 2525 }, { "epoch": 0.21549223681965535, "grad_norm": 1.3732274073810598, "learning_rate": 9.124734760508323e-05, "loss": 0.3744, "step": 2526 }, { "epoch": 0.2155775464937724, "grad_norm": 1.3732056180045116, "learning_rate": 9.123953749905697e-05, "loss": 0.3688, "step": 2527 }, { "epoch": 0.21566285616788944, "grad_norm": 1.3121776639129543, "learning_rate": 9.123172424461118e-05, "loss": 0.3737, "step": 2528 }, { "epoch": 0.21574816584200648, "grad_norm": 1.3532219550304436, "learning_rate": 9.122390784234233e-05, "loss": 0.3282, "step": 2529 }, { "epoch": 0.21583347551612353, "grad_norm": 1.4615922572393845, "learning_rate": 9.12160882928472e-05, "loss": 0.3452, "step": 2530 }, { "epoch": 0.21591878519024058, "grad_norm": 1.6051082902544624, "learning_rate": 9.120826559672275e-05, "loss": 0.3306, "step": 2531 }, { "epoch": 0.21600409486435762, "grad_norm": 1.498088254195459, "learning_rate": 9.12004397545662e-05, "loss": 0.3626, "step": 2532 }, { "epoch": 0.21608940453847467, "grad_norm": 1.637473139651779, "learning_rate": 9.1192610766975e-05, "loss": 0.3228, "step": 2533 }, { "epoch": 0.2161747142125917, "grad_norm": 1.416491366741507, "learning_rate": 9.118477863454688e-05, "loss": 0.3745, "step": 2534 }, { "epoch": 0.21626002388670876, "grad_norm": 1.7976119810619302, "learning_rate": 9.117694335787976e-05, "loss": 0.393, "step": 2535 }, { "epoch": 0.2163453335608258, "grad_norm": 1.2408565372443479, "learning_rate": 9.116910493757185e-05, "loss": 0.3264, "step": 2536 }, { "epoch": 0.21643064323494285, "grad_norm": 1.6390415361225625, "learning_rate": 9.116126337422152e-05, "loss": 0.3976, "step": 2537 }, { "epoch": 0.2165159529090599, "grad_norm": 1.6728152189298582, "learning_rate": 9.115341866842748e-05, "loss": 0.3962, "step": 2538 }, { "epoch": 0.21660126258317694, "grad_norm": 1.6217594112889395, "learning_rate": 9.114557082078861e-05, "loss": 0.3685, "step": 2539 }, { "epoch": 0.21668657225729399, "grad_norm": 1.6731055891345257, "learning_rate": 9.113771983190405e-05, "loss": 0.3546, "step": 2540 }, { "epoch": 0.21677188193141103, "grad_norm": 1.401661146215816, "learning_rate": 9.112986570237319e-05, "loss": 0.3652, "step": 2541 }, { "epoch": 0.21685719160552808, "grad_norm": 1.516290184425666, "learning_rate": 9.112200843279565e-05, "loss": 0.332, "step": 2542 }, { "epoch": 0.21694250127964512, "grad_norm": 1.6343599862162184, "learning_rate": 9.11141480237713e-05, "loss": 0.34, "step": 2543 }, { "epoch": 0.21702781095376217, "grad_norm": 1.6673515660965446, "learning_rate": 9.110628447590021e-05, "loss": 0.3597, "step": 2544 }, { "epoch": 0.2171131206278792, "grad_norm": 1.6222544731493502, "learning_rate": 9.109841778978274e-05, "loss": 0.4057, "step": 2545 }, { "epoch": 0.21719843030199626, "grad_norm": 1.4205904177791921, "learning_rate": 9.109054796601947e-05, "loss": 0.3241, "step": 2546 }, { "epoch": 0.2172837399761133, "grad_norm": 1.379005536619473, "learning_rate": 9.108267500521121e-05, "loss": 0.3693, "step": 2547 }, { "epoch": 0.21736904965023032, "grad_norm": 1.4418868424119837, "learning_rate": 9.107479890795905e-05, "loss": 0.3782, "step": 2548 }, { "epoch": 0.21745435932434737, "grad_norm": 1.3533461163219331, "learning_rate": 9.106691967486424e-05, "loss": 0.3232, "step": 2549 }, { "epoch": 0.2175396689984644, "grad_norm": 1.4196227964233161, "learning_rate": 9.105903730652836e-05, "loss": 0.3543, "step": 2550 }, { "epoch": 0.21762497867258146, "grad_norm": 1.4030589030782714, "learning_rate": 9.105115180355317e-05, "loss": 0.3267, "step": 2551 }, { "epoch": 0.2177102883466985, "grad_norm": 1.386911513134561, "learning_rate": 9.104326316654067e-05, "loss": 0.353, "step": 2552 }, { "epoch": 0.21779559802081555, "grad_norm": 1.38546141454188, "learning_rate": 9.103537139609314e-05, "loss": 0.3688, "step": 2553 }, { "epoch": 0.2178809076949326, "grad_norm": 1.4614512310367571, "learning_rate": 9.102747649281307e-05, "loss": 0.3672, "step": 2554 }, { "epoch": 0.21796621736904964, "grad_norm": 1.8703736769280128, "learning_rate": 9.101957845730318e-05, "loss": 0.4214, "step": 2555 }, { "epoch": 0.2180515270431667, "grad_norm": 1.4159336250503043, "learning_rate": 9.101167729016646e-05, "loss": 0.4039, "step": 2556 }, { "epoch": 0.21813683671728373, "grad_norm": 1.7940643885698397, "learning_rate": 9.10037729920061e-05, "loss": 0.3958, "step": 2557 }, { "epoch": 0.21822214639140078, "grad_norm": 1.4307290540912012, "learning_rate": 9.099586556342559e-05, "loss": 0.3498, "step": 2558 }, { "epoch": 0.21830745606551782, "grad_norm": 1.4647261508784708, "learning_rate": 9.098795500502858e-05, "loss": 0.3477, "step": 2559 }, { "epoch": 0.21839276573963487, "grad_norm": 1.4960457019709874, "learning_rate": 9.098004131741902e-05, "loss": 0.3921, "step": 2560 }, { "epoch": 0.21847807541375192, "grad_norm": 1.4307566329639663, "learning_rate": 9.097212450120108e-05, "loss": 0.3653, "step": 2561 }, { "epoch": 0.21856338508786896, "grad_norm": 1.399219145314189, "learning_rate": 9.096420455697914e-05, "loss": 0.3668, "step": 2562 }, { "epoch": 0.218648694761986, "grad_norm": 1.579731340865897, "learning_rate": 9.095628148535788e-05, "loss": 0.2983, "step": 2563 }, { "epoch": 0.21873400443610305, "grad_norm": 1.1677883353322214, "learning_rate": 9.094835528694217e-05, "loss": 0.3224, "step": 2564 }, { "epoch": 0.2188193141102201, "grad_norm": 1.5796430481524666, "learning_rate": 9.094042596233712e-05, "loss": 0.4247, "step": 2565 }, { "epoch": 0.21890462378433714, "grad_norm": 1.290689539738869, "learning_rate": 9.093249351214812e-05, "loss": 0.3567, "step": 2566 }, { "epoch": 0.2189899334584542, "grad_norm": 1.5078774077522656, "learning_rate": 9.092455793698075e-05, "loss": 0.3206, "step": 2567 }, { "epoch": 0.21907524313257123, "grad_norm": 1.59773250481797, "learning_rate": 9.091661923744086e-05, "loss": 0.3173, "step": 2568 }, { "epoch": 0.21916055280668828, "grad_norm": 1.6225535243159221, "learning_rate": 9.090867741413452e-05, "loss": 0.403, "step": 2569 }, { "epoch": 0.21924586248080533, "grad_norm": 1.3984306377903168, "learning_rate": 9.090073246766803e-05, "loss": 0.3583, "step": 2570 }, { "epoch": 0.21933117215492237, "grad_norm": 1.3326279542620254, "learning_rate": 9.089278439864797e-05, "loss": 0.3714, "step": 2571 }, { "epoch": 0.21941648182903942, "grad_norm": 1.5957929196066907, "learning_rate": 9.088483320768115e-05, "loss": 0.3993, "step": 2572 }, { "epoch": 0.21950179150315646, "grad_norm": 1.4248966346863219, "learning_rate": 9.087687889537454e-05, "loss": 0.3042, "step": 2573 }, { "epoch": 0.2195871011772735, "grad_norm": 1.470827783820874, "learning_rate": 9.086892146233548e-05, "loss": 0.3298, "step": 2574 }, { "epoch": 0.21967241085139055, "grad_norm": 1.4663594046880324, "learning_rate": 9.086096090917143e-05, "loss": 0.3443, "step": 2575 }, { "epoch": 0.2197577205255076, "grad_norm": 1.6038525096216387, "learning_rate": 9.085299723649014e-05, "loss": 0.3427, "step": 2576 }, { "epoch": 0.21984303019962464, "grad_norm": 1.4610858372741338, "learning_rate": 9.08450304448996e-05, "loss": 0.341, "step": 2577 }, { "epoch": 0.2199283398737417, "grad_norm": 1.4501142588354818, "learning_rate": 9.083706053500806e-05, "loss": 0.332, "step": 2578 }, { "epoch": 0.22001364954785874, "grad_norm": 1.2776010236715707, "learning_rate": 9.082908750742394e-05, "loss": 0.3217, "step": 2579 }, { "epoch": 0.22009895922197578, "grad_norm": 1.6841719977464191, "learning_rate": 9.082111136275596e-05, "loss": 0.3918, "step": 2580 }, { "epoch": 0.22018426889609283, "grad_norm": 1.3136430032207718, "learning_rate": 9.081313210161304e-05, "loss": 0.3278, "step": 2581 }, { "epoch": 0.22026957857020987, "grad_norm": 1.4144778827058635, "learning_rate": 9.080514972460439e-05, "loss": 0.3452, "step": 2582 }, { "epoch": 0.22035488824432692, "grad_norm": 1.5469222398730937, "learning_rate": 9.079716423233938e-05, "loss": 0.2896, "step": 2583 }, { "epoch": 0.22044019791844396, "grad_norm": 1.5061905591957319, "learning_rate": 9.078917562542765e-05, "loss": 0.4116, "step": 2584 }, { "epoch": 0.220525507592561, "grad_norm": 1.8053351226968692, "learning_rate": 9.078118390447917e-05, "loss": 0.4404, "step": 2585 }, { "epoch": 0.22061081726667803, "grad_norm": 1.6489615669467794, "learning_rate": 9.077318907010397e-05, "loss": 0.3979, "step": 2586 }, { "epoch": 0.22069612694079507, "grad_norm": 1.281600206476664, "learning_rate": 9.076519112291246e-05, "loss": 0.3582, "step": 2587 }, { "epoch": 0.22078143661491212, "grad_norm": 1.4848733165919927, "learning_rate": 9.075719006351524e-05, "loss": 0.3652, "step": 2588 }, { "epoch": 0.22086674628902916, "grad_norm": 1.4361305347820406, "learning_rate": 9.074918589252313e-05, "loss": 0.3631, "step": 2589 }, { "epoch": 0.2209520559631462, "grad_norm": 1.5541656998673092, "learning_rate": 9.074117861054723e-05, "loss": 0.315, "step": 2590 }, { "epoch": 0.22103736563726326, "grad_norm": 1.4558897072078754, "learning_rate": 9.073316821819884e-05, "loss": 0.3466, "step": 2591 }, { "epoch": 0.2211226753113803, "grad_norm": 1.5003269951757194, "learning_rate": 9.07251547160895e-05, "loss": 0.3752, "step": 2592 }, { "epoch": 0.22120798498549735, "grad_norm": 1.3510033658181257, "learning_rate": 9.071713810483103e-05, "loss": 0.3686, "step": 2593 }, { "epoch": 0.2212932946596144, "grad_norm": 1.4067570937652187, "learning_rate": 9.070911838503545e-05, "loss": 0.3401, "step": 2594 }, { "epoch": 0.22137860433373144, "grad_norm": 1.4687310075546476, "learning_rate": 9.070109555731497e-05, "loss": 0.3417, "step": 2595 }, { "epoch": 0.22146391400784848, "grad_norm": 1.493528073476427, "learning_rate": 9.069306962228215e-05, "loss": 0.3119, "step": 2596 }, { "epoch": 0.22154922368196553, "grad_norm": 1.4047646201140582, "learning_rate": 9.06850405805497e-05, "loss": 0.366, "step": 2597 }, { "epoch": 0.22163453335608257, "grad_norm": 1.487455203880448, "learning_rate": 9.067700843273061e-05, "loss": 0.3111, "step": 2598 }, { "epoch": 0.22171984303019962, "grad_norm": 1.3761406415545379, "learning_rate": 9.066897317943808e-05, "loss": 0.3547, "step": 2599 }, { "epoch": 0.22180515270431667, "grad_norm": 1.4691818089533368, "learning_rate": 9.066093482128557e-05, "loss": 0.3589, "step": 2600 }, { "epoch": 0.2218904623784337, "grad_norm": 1.3050931197226736, "learning_rate": 9.065289335888674e-05, "loss": 0.3349, "step": 2601 }, { "epoch": 0.22197577205255076, "grad_norm": 1.4988163410486075, "learning_rate": 9.064484879285555e-05, "loss": 0.3811, "step": 2602 }, { "epoch": 0.2220610817266678, "grad_norm": 1.5517485938209492, "learning_rate": 9.063680112380612e-05, "loss": 0.3932, "step": 2603 }, { "epoch": 0.22214639140078485, "grad_norm": 1.5619975998941853, "learning_rate": 9.062875035235288e-05, "loss": 0.3732, "step": 2604 }, { "epoch": 0.2222317010749019, "grad_norm": 1.3723162556208697, "learning_rate": 9.062069647911046e-05, "loss": 0.3612, "step": 2605 }, { "epoch": 0.22231701074901894, "grad_norm": 1.3567696520701982, "learning_rate": 9.061263950469371e-05, "loss": 0.3254, "step": 2606 }, { "epoch": 0.22240232042313599, "grad_norm": 1.3321132243150429, "learning_rate": 9.060457942971776e-05, "loss": 0.3171, "step": 2607 }, { "epoch": 0.22248763009725303, "grad_norm": 1.3192777472594854, "learning_rate": 9.059651625479793e-05, "loss": 0.323, "step": 2608 }, { "epoch": 0.22257293977137008, "grad_norm": 1.626219951753739, "learning_rate": 9.058844998054983e-05, "loss": 0.3207, "step": 2609 }, { "epoch": 0.22265824944548712, "grad_norm": 1.4863621310580928, "learning_rate": 9.058038060758925e-05, "loss": 0.3329, "step": 2610 }, { "epoch": 0.22274355911960417, "grad_norm": 1.2999416998481703, "learning_rate": 9.057230813653225e-05, "loss": 0.3292, "step": 2611 }, { "epoch": 0.2228288687937212, "grad_norm": 1.4219736756679437, "learning_rate": 9.056423256799513e-05, "loss": 0.3321, "step": 2612 }, { "epoch": 0.22291417846783826, "grad_norm": 1.8620015034080524, "learning_rate": 9.055615390259441e-05, "loss": 0.3987, "step": 2613 }, { "epoch": 0.2229994881419553, "grad_norm": 1.3601257618790756, "learning_rate": 9.054807214094685e-05, "loss": 0.4035, "step": 2614 }, { "epoch": 0.22308479781607235, "grad_norm": 1.4566702305963304, "learning_rate": 9.053998728366947e-05, "loss": 0.3549, "step": 2615 }, { "epoch": 0.2231701074901894, "grad_norm": 1.4915429765505306, "learning_rate": 9.053189933137949e-05, "loss": 0.3394, "step": 2616 }, { "epoch": 0.22325541716430644, "grad_norm": 1.4237901135722582, "learning_rate": 9.052380828469436e-05, "loss": 0.3581, "step": 2617 }, { "epoch": 0.2233407268384235, "grad_norm": 1.4600176848032884, "learning_rate": 9.051571414423182e-05, "loss": 0.3503, "step": 2618 }, { "epoch": 0.22342603651254053, "grad_norm": 1.5985501127576316, "learning_rate": 9.050761691060981e-05, "loss": 0.338, "step": 2619 }, { "epoch": 0.22351134618665758, "grad_norm": 1.523531084365152, "learning_rate": 9.049951658444651e-05, "loss": 0.394, "step": 2620 }, { "epoch": 0.22359665586077462, "grad_norm": 1.485711229106358, "learning_rate": 9.049141316636033e-05, "loss": 0.3811, "step": 2621 }, { "epoch": 0.22368196553489167, "grad_norm": 1.2742536304969254, "learning_rate": 9.048330665696993e-05, "loss": 0.3561, "step": 2622 }, { "epoch": 0.22376727520900871, "grad_norm": 1.224991150259632, "learning_rate": 9.047519705689418e-05, "loss": 0.3287, "step": 2623 }, { "epoch": 0.22385258488312576, "grad_norm": 1.5261191217167267, "learning_rate": 9.046708436675223e-05, "loss": 0.3359, "step": 2624 }, { "epoch": 0.22393789455724278, "grad_norm": 1.2317656932935943, "learning_rate": 9.045896858716343e-05, "loss": 0.3157, "step": 2625 }, { "epoch": 0.22402320423135982, "grad_norm": 1.480602731404406, "learning_rate": 9.045084971874738e-05, "loss": 0.3588, "step": 2626 }, { "epoch": 0.22410851390547687, "grad_norm": 1.0485102848536334, "learning_rate": 9.04427277621239e-05, "loss": 0.2901, "step": 2627 }, { "epoch": 0.22419382357959391, "grad_norm": 1.4834601544518111, "learning_rate": 9.043460271791308e-05, "loss": 0.3703, "step": 2628 }, { "epoch": 0.22427913325371096, "grad_norm": 1.4013712706822048, "learning_rate": 9.04264745867352e-05, "loss": 0.3743, "step": 2629 }, { "epoch": 0.224364442927828, "grad_norm": 1.579490750490103, "learning_rate": 9.041834336921082e-05, "loss": 0.3468, "step": 2630 }, { "epoch": 0.22444975260194505, "grad_norm": 1.8702961728969631, "learning_rate": 9.04102090659607e-05, "loss": 0.3833, "step": 2631 }, { "epoch": 0.2245350622760621, "grad_norm": 1.5330714078074665, "learning_rate": 9.040207167760586e-05, "loss": 0.3583, "step": 2632 }, { "epoch": 0.22462037195017914, "grad_norm": 1.5534414540810104, "learning_rate": 9.039393120476755e-05, "loss": 0.3792, "step": 2633 }, { "epoch": 0.2247056816242962, "grad_norm": 1.448815631581177, "learning_rate": 9.038578764806723e-05, "loss": 0.3805, "step": 2634 }, { "epoch": 0.22479099129841323, "grad_norm": 1.656712881220153, "learning_rate": 9.037764100812664e-05, "loss": 0.2951, "step": 2635 }, { "epoch": 0.22487630097253028, "grad_norm": 1.5066405173932909, "learning_rate": 9.036949128556773e-05, "loss": 0.3539, "step": 2636 }, { "epoch": 0.22496161064664733, "grad_norm": 1.4027989745768032, "learning_rate": 9.036133848101269e-05, "loss": 0.3736, "step": 2637 }, { "epoch": 0.22504692032076437, "grad_norm": 1.1997559179981596, "learning_rate": 9.035318259508393e-05, "loss": 0.2947, "step": 2638 }, { "epoch": 0.22513222999488142, "grad_norm": 1.2847604344959902, "learning_rate": 9.034502362840411e-05, "loss": 0.2952, "step": 2639 }, { "epoch": 0.22521753966899846, "grad_norm": 1.5368160370215846, "learning_rate": 9.033686158159613e-05, "loss": 0.3977, "step": 2640 }, { "epoch": 0.2253028493431155, "grad_norm": 1.4925980727084007, "learning_rate": 9.032869645528313e-05, "loss": 0.3496, "step": 2641 }, { "epoch": 0.22538815901723255, "grad_norm": 1.645535051703329, "learning_rate": 9.032052825008845e-05, "loss": 0.4262, "step": 2642 }, { "epoch": 0.2254734686913496, "grad_norm": 1.5867464605003152, "learning_rate": 9.031235696663572e-05, "loss": 0.3648, "step": 2643 }, { "epoch": 0.22555877836546664, "grad_norm": 1.4749810363067721, "learning_rate": 9.030418260554873e-05, "loss": 0.3746, "step": 2644 }, { "epoch": 0.2256440880395837, "grad_norm": 1.4835602783155635, "learning_rate": 9.029600516745158e-05, "loss": 0.3536, "step": 2645 }, { "epoch": 0.22572939771370074, "grad_norm": 1.6146868108589558, "learning_rate": 9.028782465296856e-05, "loss": 0.3259, "step": 2646 }, { "epoch": 0.22581470738781778, "grad_norm": 1.3523490248940309, "learning_rate": 9.027964106272423e-05, "loss": 0.3349, "step": 2647 }, { "epoch": 0.22590001706193483, "grad_norm": 1.5716495420481054, "learning_rate": 9.027145439734336e-05, "loss": 0.3514, "step": 2648 }, { "epoch": 0.22598532673605187, "grad_norm": 1.2784024671903156, "learning_rate": 9.026326465745094e-05, "loss": 0.3527, "step": 2649 }, { "epoch": 0.22607063641016892, "grad_norm": 1.440980016031087, "learning_rate": 9.025507184367223e-05, "loss": 0.2681, "step": 2650 }, { "epoch": 0.22615594608428596, "grad_norm": 1.3867481497213598, "learning_rate": 9.024687595663268e-05, "loss": 0.3088, "step": 2651 }, { "epoch": 0.226241255758403, "grad_norm": 1.1727748213666038, "learning_rate": 9.023867699695804e-05, "loss": 0.3279, "step": 2652 }, { "epoch": 0.22632656543252005, "grad_norm": 1.4456469174713813, "learning_rate": 9.023047496527423e-05, "loss": 0.3719, "step": 2653 }, { "epoch": 0.2264118751066371, "grad_norm": 1.4774973498480406, "learning_rate": 9.022226986220745e-05, "loss": 0.3485, "step": 2654 }, { "epoch": 0.22649718478075415, "grad_norm": 1.673203769179113, "learning_rate": 9.02140616883841e-05, "loss": 0.3896, "step": 2655 }, { "epoch": 0.2265824944548712, "grad_norm": 1.5874210428141582, "learning_rate": 9.020585044443084e-05, "loss": 0.4008, "step": 2656 }, { "epoch": 0.22666780412898824, "grad_norm": 1.2660533509817204, "learning_rate": 9.019763613097455e-05, "loss": 0.3116, "step": 2657 }, { "epoch": 0.22675311380310528, "grad_norm": 2.1602480767101, "learning_rate": 9.018941874864236e-05, "loss": 0.3553, "step": 2658 }, { "epoch": 0.22683842347722233, "grad_norm": 1.5319386704721214, "learning_rate": 9.01811982980616e-05, "loss": 0.3354, "step": 2659 }, { "epoch": 0.22692373315133937, "grad_norm": 1.4703732712091857, "learning_rate": 9.017297477985989e-05, "loss": 0.2938, "step": 2660 }, { "epoch": 0.22700904282545642, "grad_norm": 1.383482086655729, "learning_rate": 9.016474819466501e-05, "loss": 0.3392, "step": 2661 }, { "epoch": 0.22709435249957347, "grad_norm": 1.4619218090924933, "learning_rate": 9.015651854310506e-05, "loss": 0.353, "step": 2662 }, { "epoch": 0.22717966217369048, "grad_norm": 1.3514626085709576, "learning_rate": 9.01482858258083e-05, "loss": 0.3244, "step": 2663 }, { "epoch": 0.22726497184780753, "grad_norm": 1.3952592077041992, "learning_rate": 9.014005004340327e-05, "loss": 0.4092, "step": 2664 }, { "epoch": 0.22735028152192457, "grad_norm": 1.315071629288537, "learning_rate": 9.013181119651872e-05, "loss": 0.3269, "step": 2665 }, { "epoch": 0.22743559119604162, "grad_norm": 1.3673366792217243, "learning_rate": 9.012356928578365e-05, "loss": 0.34, "step": 2666 }, { "epoch": 0.22752090087015867, "grad_norm": 1.489605252120227, "learning_rate": 9.011532431182729e-05, "loss": 0.353, "step": 2667 }, { "epoch": 0.2276062105442757, "grad_norm": 1.4093912432426032, "learning_rate": 9.010707627527909e-05, "loss": 0.3931, "step": 2668 }, { "epoch": 0.22769152021839276, "grad_norm": 1.581719864706679, "learning_rate": 9.009882517676873e-05, "loss": 0.3643, "step": 2669 }, { "epoch": 0.2277768298925098, "grad_norm": 1.4843749196905818, "learning_rate": 9.009057101692615e-05, "loss": 0.3418, "step": 2670 }, { "epoch": 0.22786213956662685, "grad_norm": 1.464970534877853, "learning_rate": 9.008231379638152e-05, "loss": 0.355, "step": 2671 }, { "epoch": 0.2279474492407439, "grad_norm": 1.3721434658545917, "learning_rate": 9.007405351576524e-05, "loss": 0.3519, "step": 2672 }, { "epoch": 0.22803275891486094, "grad_norm": 1.6771262580006516, "learning_rate": 9.006579017570792e-05, "loss": 0.3798, "step": 2673 }, { "epoch": 0.22811806858897798, "grad_norm": 1.7863135395000977, "learning_rate": 9.005752377684043e-05, "loss": 0.3395, "step": 2674 }, { "epoch": 0.22820337826309503, "grad_norm": 1.3882929074254688, "learning_rate": 9.004925431979387e-05, "loss": 0.3357, "step": 2675 }, { "epoch": 0.22828868793721208, "grad_norm": 1.5060626534731294, "learning_rate": 9.004098180519956e-05, "loss": 0.375, "step": 2676 }, { "epoch": 0.22837399761132912, "grad_norm": 1.6867829671476047, "learning_rate": 9.003270623368905e-05, "loss": 0.3753, "step": 2677 }, { "epoch": 0.22845930728544617, "grad_norm": 1.7973146647060123, "learning_rate": 9.002442760589418e-05, "loss": 0.4216, "step": 2678 }, { "epoch": 0.2285446169595632, "grad_norm": 1.6065100937973524, "learning_rate": 9.001614592244694e-05, "loss": 0.3107, "step": 2679 }, { "epoch": 0.22862992663368026, "grad_norm": 1.7426270071671672, "learning_rate": 9.00078611839796e-05, "loss": 0.3528, "step": 2680 }, { "epoch": 0.2287152363077973, "grad_norm": 1.7216616849774904, "learning_rate": 8.999957339112466e-05, "loss": 0.3848, "step": 2681 }, { "epoch": 0.22880054598191435, "grad_norm": 1.5793928586601544, "learning_rate": 8.999128254451486e-05, "loss": 0.3739, "step": 2682 }, { "epoch": 0.2288858556560314, "grad_norm": 1.4292872134307235, "learning_rate": 8.998298864478314e-05, "loss": 0.3032, "step": 2683 }, { "epoch": 0.22897116533014844, "grad_norm": 1.5182901133566833, "learning_rate": 8.997469169256271e-05, "loss": 0.3415, "step": 2684 }, { "epoch": 0.2290564750042655, "grad_norm": 1.389615152996505, "learning_rate": 8.9966391688487e-05, "loss": 0.3224, "step": 2685 }, { "epoch": 0.22914178467838253, "grad_norm": 1.332539838871685, "learning_rate": 8.995808863318966e-05, "loss": 0.3987, "step": 2686 }, { "epoch": 0.22922709435249958, "grad_norm": 1.5201153649921217, "learning_rate": 8.99497825273046e-05, "loss": 0.3878, "step": 2687 }, { "epoch": 0.22931240402661662, "grad_norm": 1.6691087315764384, "learning_rate": 8.994147337146593e-05, "loss": 0.375, "step": 2688 }, { "epoch": 0.22939771370073367, "grad_norm": 1.2344138706702417, "learning_rate": 8.993316116630801e-05, "loss": 0.3587, "step": 2689 }, { "epoch": 0.22948302337485071, "grad_norm": 1.2521366931041409, "learning_rate": 8.992484591246544e-05, "loss": 0.322, "step": 2690 }, { "epoch": 0.22956833304896776, "grad_norm": 1.4304424856411295, "learning_rate": 8.991652761057305e-05, "loss": 0.3192, "step": 2691 }, { "epoch": 0.2296536427230848, "grad_norm": 1.6376150949812056, "learning_rate": 8.990820626126589e-05, "loss": 0.3273, "step": 2692 }, { "epoch": 0.22973895239720185, "grad_norm": 1.6854843534718262, "learning_rate": 8.989988186517926e-05, "loss": 0.335, "step": 2693 }, { "epoch": 0.2298242620713189, "grad_norm": 1.7343918395728457, "learning_rate": 8.989155442294867e-05, "loss": 0.3737, "step": 2694 }, { "epoch": 0.22990957174543594, "grad_norm": 1.2900623489807819, "learning_rate": 8.988322393520989e-05, "loss": 0.3331, "step": 2695 }, { "epoch": 0.229994881419553, "grad_norm": 1.420733570090468, "learning_rate": 8.98748904025989e-05, "loss": 0.3522, "step": 2696 }, { "epoch": 0.23008019109367003, "grad_norm": 1.328211388863755, "learning_rate": 8.986655382575192e-05, "loss": 0.3367, "step": 2697 }, { "epoch": 0.23016550076778708, "grad_norm": 1.4672407959184468, "learning_rate": 8.98582142053054e-05, "loss": 0.3468, "step": 2698 }, { "epoch": 0.23025081044190412, "grad_norm": 1.5269669558254164, "learning_rate": 8.984987154189604e-05, "loss": 0.2941, "step": 2699 }, { "epoch": 0.23033612011602117, "grad_norm": 1.5415876557319652, "learning_rate": 8.984152583616076e-05, "loss": 0.3007, "step": 2700 }, { "epoch": 0.2304214297901382, "grad_norm": 1.4090274932160813, "learning_rate": 8.983317708873669e-05, "loss": 0.3477, "step": 2701 }, { "epoch": 0.23050673946425523, "grad_norm": 1.4607573209412965, "learning_rate": 8.982482530026122e-05, "loss": 0.3475, "step": 2702 }, { "epoch": 0.23059204913837228, "grad_norm": 1.5709134750092304, "learning_rate": 8.981647047137197e-05, "loss": 0.3772, "step": 2703 }, { "epoch": 0.23067735881248932, "grad_norm": 1.4529788036021176, "learning_rate": 8.980811260270677e-05, "loss": 0.32, "step": 2704 }, { "epoch": 0.23076266848660637, "grad_norm": 1.2478990064251259, "learning_rate": 8.979975169490374e-05, "loss": 0.3992, "step": 2705 }, { "epoch": 0.23084797816072342, "grad_norm": 1.4305659029359055, "learning_rate": 8.979138774860114e-05, "loss": 0.4117, "step": 2706 }, { "epoch": 0.23093328783484046, "grad_norm": 1.6676751264720635, "learning_rate": 8.978302076443754e-05, "loss": 0.4303, "step": 2707 }, { "epoch": 0.2310185975089575, "grad_norm": 1.3917503090048342, "learning_rate": 8.977465074305173e-05, "loss": 0.3155, "step": 2708 }, { "epoch": 0.23110390718307455, "grad_norm": 1.2981843887577587, "learning_rate": 8.976627768508267e-05, "loss": 0.3024, "step": 2709 }, { "epoch": 0.2311892168571916, "grad_norm": 1.435594124567865, "learning_rate": 8.975790159116966e-05, "loss": 0.3221, "step": 2710 }, { "epoch": 0.23127452653130864, "grad_norm": 1.353180242384292, "learning_rate": 8.974952246195212e-05, "loss": 0.2757, "step": 2711 }, { "epoch": 0.2313598362054257, "grad_norm": 1.8127801612845869, "learning_rate": 8.974114029806976e-05, "loss": 0.3938, "step": 2712 }, { "epoch": 0.23144514587954274, "grad_norm": 1.3817187364028884, "learning_rate": 8.973275510016252e-05, "loss": 0.3063, "step": 2713 }, { "epoch": 0.23153045555365978, "grad_norm": 1.308175865748185, "learning_rate": 8.972436686887059e-05, "loss": 0.307, "step": 2714 }, { "epoch": 0.23161576522777683, "grad_norm": 1.6637860278922578, "learning_rate": 8.971597560483434e-05, "loss": 0.3693, "step": 2715 }, { "epoch": 0.23170107490189387, "grad_norm": 1.3477004776483583, "learning_rate": 8.970758130869439e-05, "loss": 0.3095, "step": 2716 }, { "epoch": 0.23178638457601092, "grad_norm": 1.577670041213197, "learning_rate": 8.969918398109162e-05, "loss": 0.3624, "step": 2717 }, { "epoch": 0.23187169425012796, "grad_norm": 1.6662988813231567, "learning_rate": 8.969078362266711e-05, "loss": 0.3417, "step": 2718 }, { "epoch": 0.231957003924245, "grad_norm": 1.5864797332315557, "learning_rate": 8.968238023406219e-05, "loss": 0.3658, "step": 2719 }, { "epoch": 0.23204231359836205, "grad_norm": 1.78492524970906, "learning_rate": 8.96739738159184e-05, "loss": 0.4017, "step": 2720 }, { "epoch": 0.2321276232724791, "grad_norm": 1.3413477427765317, "learning_rate": 8.966556436887755e-05, "loss": 0.3197, "step": 2721 }, { "epoch": 0.23221293294659615, "grad_norm": 1.2783923962976873, "learning_rate": 8.965715189358164e-05, "loss": 0.3208, "step": 2722 }, { "epoch": 0.2322982426207132, "grad_norm": 1.614911823286636, "learning_rate": 8.964873639067293e-05, "loss": 0.3694, "step": 2723 }, { "epoch": 0.23238355229483024, "grad_norm": 1.431193408097108, "learning_rate": 8.964031786079388e-05, "loss": 0.3782, "step": 2724 }, { "epoch": 0.23246886196894728, "grad_norm": 1.2991596917305084, "learning_rate": 8.963189630458721e-05, "loss": 0.3498, "step": 2725 }, { "epoch": 0.23255417164306433, "grad_norm": 1.389678418606113, "learning_rate": 8.962347172269585e-05, "loss": 0.3525, "step": 2726 }, { "epoch": 0.23263948131718137, "grad_norm": 1.5038241277294075, "learning_rate": 8.9615044115763e-05, "loss": 0.3172, "step": 2727 }, { "epoch": 0.23272479099129842, "grad_norm": 1.464795897655899, "learning_rate": 8.960661348443205e-05, "loss": 0.3223, "step": 2728 }, { "epoch": 0.23281010066541546, "grad_norm": 1.4999693231624662, "learning_rate": 8.959817982934662e-05, "loss": 0.3277, "step": 2729 }, { "epoch": 0.2328954103395325, "grad_norm": 1.4428203644846966, "learning_rate": 8.958974315115059e-05, "loss": 0.2994, "step": 2730 }, { "epoch": 0.23298072001364956, "grad_norm": 1.5365747009003967, "learning_rate": 8.958130345048803e-05, "loss": 0.405, "step": 2731 }, { "epoch": 0.2330660296877666, "grad_norm": 1.4377712325583099, "learning_rate": 8.95728607280033e-05, "loss": 0.3285, "step": 2732 }, { "epoch": 0.23315133936188365, "grad_norm": 1.526840244042859, "learning_rate": 8.956441498434096e-05, "loss": 0.3288, "step": 2733 }, { "epoch": 0.2332366490360007, "grad_norm": 1.6400101735799693, "learning_rate": 8.955596622014576e-05, "loss": 0.3335, "step": 2734 }, { "epoch": 0.23332195871011774, "grad_norm": 1.447820847814723, "learning_rate": 8.954751443606273e-05, "loss": 0.4249, "step": 2735 }, { "epoch": 0.23340726838423478, "grad_norm": 1.7168767951703208, "learning_rate": 8.953905963273714e-05, "loss": 0.3834, "step": 2736 }, { "epoch": 0.23349257805835183, "grad_norm": 1.4360768280675902, "learning_rate": 8.953060181081447e-05, "loss": 0.3311, "step": 2737 }, { "epoch": 0.23357788773246888, "grad_norm": 1.3973350228258166, "learning_rate": 8.952214097094041e-05, "loss": 0.3251, "step": 2738 }, { "epoch": 0.2336631974065859, "grad_norm": 1.5371855313388714, "learning_rate": 8.95136771137609e-05, "loss": 0.3675, "step": 2739 }, { "epoch": 0.23374850708070294, "grad_norm": 1.6965915759369317, "learning_rate": 8.950521023992212e-05, "loss": 0.3783, "step": 2740 }, { "epoch": 0.23383381675481998, "grad_norm": 1.403803498632058, "learning_rate": 8.949674035007047e-05, "loss": 0.3493, "step": 2741 }, { "epoch": 0.23391912642893703, "grad_norm": 1.6244818521591384, "learning_rate": 8.948826744485258e-05, "loss": 0.3933, "step": 2742 }, { "epoch": 0.23400443610305408, "grad_norm": 1.4269005131623194, "learning_rate": 8.947979152491533e-05, "loss": 0.3563, "step": 2743 }, { "epoch": 0.23408974577717112, "grad_norm": 1.2393678536644306, "learning_rate": 8.947131259090575e-05, "loss": 0.2926, "step": 2744 }, { "epoch": 0.23417505545128817, "grad_norm": 1.7737521930060025, "learning_rate": 8.946283064347125e-05, "loss": 0.3682, "step": 2745 }, { "epoch": 0.2342603651254052, "grad_norm": 1.3940420281027912, "learning_rate": 8.94543456832593e-05, "loss": 0.3256, "step": 2746 }, { "epoch": 0.23434567479952226, "grad_norm": 1.5613484526134822, "learning_rate": 8.944585771091773e-05, "loss": 0.3506, "step": 2747 }, { "epoch": 0.2344309844736393, "grad_norm": 1.9184690238672946, "learning_rate": 8.943736672709454e-05, "loss": 0.4104, "step": 2748 }, { "epoch": 0.23451629414775635, "grad_norm": 1.782931153530607, "learning_rate": 8.942887273243797e-05, "loss": 0.419, "step": 2749 }, { "epoch": 0.2346016038218734, "grad_norm": 1.3628578730961391, "learning_rate": 8.942037572759649e-05, "loss": 0.3346, "step": 2750 }, { "epoch": 0.23468691349599044, "grad_norm": 1.7471191672354478, "learning_rate": 8.94118757132188e-05, "loss": 0.3553, "step": 2751 }, { "epoch": 0.23477222317010749, "grad_norm": 1.6046972346915962, "learning_rate": 8.940337268995385e-05, "loss": 0.3636, "step": 2752 }, { "epoch": 0.23485753284422453, "grad_norm": 1.2998353193587227, "learning_rate": 8.939486665845077e-05, "loss": 0.3034, "step": 2753 }, { "epoch": 0.23494284251834158, "grad_norm": 1.4134639927609713, "learning_rate": 8.938635761935896e-05, "loss": 0.3081, "step": 2754 }, { "epoch": 0.23502815219245862, "grad_norm": 1.3255382254408197, "learning_rate": 8.937784557332807e-05, "loss": 0.3284, "step": 2755 }, { "epoch": 0.23511346186657567, "grad_norm": 1.4808846646873621, "learning_rate": 8.936933052100789e-05, "loss": 0.3038, "step": 2756 }, { "epoch": 0.2351987715406927, "grad_norm": 1.5342842712304223, "learning_rate": 8.936081246304855e-05, "loss": 0.3405, "step": 2757 }, { "epoch": 0.23528408121480976, "grad_norm": 1.545564963736208, "learning_rate": 8.935229140010035e-05, "loss": 0.3393, "step": 2758 }, { "epoch": 0.2353693908889268, "grad_norm": 1.2914486157529175, "learning_rate": 8.93437673328138e-05, "loss": 0.3531, "step": 2759 }, { "epoch": 0.23545470056304385, "grad_norm": 1.6298446330815153, "learning_rate": 8.93352402618397e-05, "loss": 0.3239, "step": 2760 }, { "epoch": 0.2355400102371609, "grad_norm": 1.5281122718078124, "learning_rate": 8.932671018782903e-05, "loss": 0.3794, "step": 2761 }, { "epoch": 0.23562531991127794, "grad_norm": 1.5005776564610271, "learning_rate": 8.931817711143302e-05, "loss": 0.3447, "step": 2762 }, { "epoch": 0.235710629585395, "grad_norm": 1.565143789931264, "learning_rate": 8.930964103330312e-05, "loss": 0.3672, "step": 2763 }, { "epoch": 0.23579593925951203, "grad_norm": 1.4256339219151548, "learning_rate": 8.930110195409102e-05, "loss": 0.3384, "step": 2764 }, { "epoch": 0.23588124893362908, "grad_norm": 1.4020330143250717, "learning_rate": 8.929255987444864e-05, "loss": 0.3673, "step": 2765 }, { "epoch": 0.23596655860774612, "grad_norm": 1.4266100003376376, "learning_rate": 8.92840147950281e-05, "loss": 0.3625, "step": 2766 }, { "epoch": 0.23605186828186317, "grad_norm": 1.7228142283137062, "learning_rate": 8.92754667164818e-05, "loss": 0.3641, "step": 2767 }, { "epoch": 0.23613717795598022, "grad_norm": 1.531645431841794, "learning_rate": 8.926691563946232e-05, "loss": 0.3568, "step": 2768 }, { "epoch": 0.23622248763009726, "grad_norm": 1.3032622633254047, "learning_rate": 8.92583615646225e-05, "loss": 0.369, "step": 2769 }, { "epoch": 0.2363077973042143, "grad_norm": 1.4240729143784416, "learning_rate": 8.924980449261539e-05, "loss": 0.3652, "step": 2770 }, { "epoch": 0.23639310697833135, "grad_norm": 1.4318811649104877, "learning_rate": 8.924124442409427e-05, "loss": 0.3724, "step": 2771 }, { "epoch": 0.2364784166524484, "grad_norm": 1.8294547697712211, "learning_rate": 8.923268135971267e-05, "loss": 0.396, "step": 2772 }, { "epoch": 0.23656372632656544, "grad_norm": 1.4834884405299675, "learning_rate": 8.922411530012433e-05, "loss": 0.3298, "step": 2773 }, { "epoch": 0.2366490360006825, "grad_norm": 1.430171196833511, "learning_rate": 8.921554624598323e-05, "loss": 0.3639, "step": 2774 }, { "epoch": 0.23673434567479953, "grad_norm": 1.462640840318615, "learning_rate": 8.920697419794357e-05, "loss": 0.3681, "step": 2775 }, { "epoch": 0.23681965534891658, "grad_norm": 1.2783030602767733, "learning_rate": 8.919839915665976e-05, "loss": 0.3564, "step": 2776 }, { "epoch": 0.23690496502303363, "grad_norm": 1.4196207810816441, "learning_rate": 8.91898211227865e-05, "loss": 0.375, "step": 2777 }, { "epoch": 0.23699027469715064, "grad_norm": 1.2173826910759706, "learning_rate": 8.91812400969786e-05, "loss": 0.2982, "step": 2778 }, { "epoch": 0.2370755843712677, "grad_norm": 1.42133513880884, "learning_rate": 8.917265607989128e-05, "loss": 0.3634, "step": 2779 }, { "epoch": 0.23716089404538473, "grad_norm": 1.5034061541686232, "learning_rate": 8.916406907217982e-05, "loss": 0.3869, "step": 2780 }, { "epoch": 0.23724620371950178, "grad_norm": 1.4358944426713214, "learning_rate": 8.915547907449979e-05, "loss": 0.3358, "step": 2781 }, { "epoch": 0.23733151339361883, "grad_norm": 1.6317326383466302, "learning_rate": 8.914688608750701e-05, "loss": 0.3784, "step": 2782 }, { "epoch": 0.23741682306773587, "grad_norm": 1.7405590435249767, "learning_rate": 8.91382901118575e-05, "loss": 0.3373, "step": 2783 }, { "epoch": 0.23750213274185292, "grad_norm": 1.5082282249261034, "learning_rate": 8.912969114820753e-05, "loss": 0.3458, "step": 2784 }, { "epoch": 0.23758744241596996, "grad_norm": 1.5618826600276565, "learning_rate": 8.912108919721358e-05, "loss": 0.3485, "step": 2785 }, { "epoch": 0.237672752090087, "grad_norm": 1.3575092973619043, "learning_rate": 8.911248425953236e-05, "loss": 0.3253, "step": 2786 }, { "epoch": 0.23775806176420405, "grad_norm": 1.4528782132508775, "learning_rate": 8.91038763358208e-05, "loss": 0.2941, "step": 2787 }, { "epoch": 0.2378433714383211, "grad_norm": 1.4867254821336264, "learning_rate": 8.909526542673608e-05, "loss": 0.3618, "step": 2788 }, { "epoch": 0.23792868111243815, "grad_norm": 1.4794473068429899, "learning_rate": 8.908665153293559e-05, "loss": 0.3196, "step": 2789 }, { "epoch": 0.2380139907865552, "grad_norm": 1.4234143821606113, "learning_rate": 8.907803465507697e-05, "loss": 0.3123, "step": 2790 }, { "epoch": 0.23809930046067224, "grad_norm": 1.3474870216155113, "learning_rate": 8.906941479381806e-05, "loss": 0.3273, "step": 2791 }, { "epoch": 0.23818461013478928, "grad_norm": 1.669837637221109, "learning_rate": 8.906079194981695e-05, "loss": 0.3665, "step": 2792 }, { "epoch": 0.23826991980890633, "grad_norm": 1.3918521481485033, "learning_rate": 8.905216612373195e-05, "loss": 0.2899, "step": 2793 }, { "epoch": 0.23835522948302337, "grad_norm": 1.3802208617979312, "learning_rate": 8.904353731622157e-05, "loss": 0.3509, "step": 2794 }, { "epoch": 0.23844053915714042, "grad_norm": 1.3520016700914892, "learning_rate": 8.90349055279446e-05, "loss": 0.3706, "step": 2795 }, { "epoch": 0.23852584883125746, "grad_norm": 1.3134803743555283, "learning_rate": 8.902627075956003e-05, "loss": 0.3076, "step": 2796 }, { "epoch": 0.2386111585053745, "grad_norm": 1.5599101155624426, "learning_rate": 8.901763301172708e-05, "loss": 0.3046, "step": 2797 }, { "epoch": 0.23869646817949156, "grad_norm": 1.5748404891363266, "learning_rate": 8.900899228510517e-05, "loss": 0.3512, "step": 2798 }, { "epoch": 0.2387817778536086, "grad_norm": 1.5515899471379184, "learning_rate": 8.900034858035402e-05, "loss": 0.3447, "step": 2799 }, { "epoch": 0.23886708752772565, "grad_norm": 1.4027574189390515, "learning_rate": 8.899170189813349e-05, "loss": 0.3609, "step": 2800 }, { "epoch": 0.2389523972018427, "grad_norm": 1.421335390422852, "learning_rate": 8.89830522391037e-05, "loss": 0.3429, "step": 2801 }, { "epoch": 0.23903770687595974, "grad_norm": 1.6691857215579633, "learning_rate": 8.897439960392507e-05, "loss": 0.3964, "step": 2802 }, { "epoch": 0.23912301655007678, "grad_norm": 1.2493858736146108, "learning_rate": 8.89657439932581e-05, "loss": 0.3915, "step": 2803 }, { "epoch": 0.23920832622419383, "grad_norm": 1.7652920434340178, "learning_rate": 8.895708540776366e-05, "loss": 0.379, "step": 2804 }, { "epoch": 0.23929363589831087, "grad_norm": 1.3952123010077795, "learning_rate": 8.894842384810276e-05, "loss": 0.3388, "step": 2805 }, { "epoch": 0.23937894557242792, "grad_norm": 1.3427977625637602, "learning_rate": 8.893975931493668e-05, "loss": 0.344, "step": 2806 }, { "epoch": 0.23946425524654497, "grad_norm": 1.5489585792662737, "learning_rate": 8.89310918089269e-05, "loss": 0.3861, "step": 2807 }, { "epoch": 0.239549564920662, "grad_norm": 1.2810010900857591, "learning_rate": 8.892242133073513e-05, "loss": 0.3089, "step": 2808 }, { "epoch": 0.23963487459477906, "grad_norm": 1.5128039819091084, "learning_rate": 8.891374788102333e-05, "loss": 0.3451, "step": 2809 }, { "epoch": 0.2397201842688961, "grad_norm": 1.3257895186733437, "learning_rate": 8.890507146045366e-05, "loss": 0.3257, "step": 2810 }, { "epoch": 0.23980549394301315, "grad_norm": 1.4061581899554663, "learning_rate": 8.889639206968853e-05, "loss": 0.3038, "step": 2811 }, { "epoch": 0.2398908036171302, "grad_norm": 1.7830746323417683, "learning_rate": 8.888770970939057e-05, "loss": 0.345, "step": 2812 }, { "epoch": 0.23997611329124724, "grad_norm": 1.5713955764279048, "learning_rate": 8.887902438022261e-05, "loss": 0.3455, "step": 2813 }, { "epoch": 0.24006142296536429, "grad_norm": 1.34143883427284, "learning_rate": 8.887033608284776e-05, "loss": 0.3876, "step": 2814 }, { "epoch": 0.24014673263948133, "grad_norm": 1.4630685858460801, "learning_rate": 8.886164481792929e-05, "loss": 0.3302, "step": 2815 }, { "epoch": 0.24023204231359835, "grad_norm": 1.6887661457772332, "learning_rate": 8.885295058613075e-05, "loss": 0.3447, "step": 2816 }, { "epoch": 0.2403173519877154, "grad_norm": 1.3578714847373463, "learning_rate": 8.88442533881159e-05, "loss": 0.3356, "step": 2817 }, { "epoch": 0.24040266166183244, "grad_norm": 1.3623389017600949, "learning_rate": 8.883555322454873e-05, "loss": 0.3308, "step": 2818 }, { "epoch": 0.24048797133594949, "grad_norm": 1.33523657184956, "learning_rate": 8.882685009609343e-05, "loss": 0.3519, "step": 2819 }, { "epoch": 0.24057328101006653, "grad_norm": 1.640628197076498, "learning_rate": 8.881814400341446e-05, "loss": 0.3781, "step": 2820 }, { "epoch": 0.24065859068418358, "grad_norm": 1.7813641695610551, "learning_rate": 8.880943494717646e-05, "loss": 0.3627, "step": 2821 }, { "epoch": 0.24074390035830062, "grad_norm": 1.547974167068566, "learning_rate": 8.880072292804435e-05, "loss": 0.3632, "step": 2822 }, { "epoch": 0.24082921003241767, "grad_norm": 1.680255598651746, "learning_rate": 8.879200794668323e-05, "loss": 0.4027, "step": 2823 }, { "epoch": 0.2409145197065347, "grad_norm": 1.5024628289015456, "learning_rate": 8.878329000375844e-05, "loss": 0.3722, "step": 2824 }, { "epoch": 0.24099982938065176, "grad_norm": 1.2390644953785015, "learning_rate": 8.877456909993555e-05, "loss": 0.3191, "step": 2825 }, { "epoch": 0.2410851390547688, "grad_norm": 1.5019351873827276, "learning_rate": 8.876584523588036e-05, "loss": 0.381, "step": 2826 }, { "epoch": 0.24117044872888585, "grad_norm": 1.5015177994980522, "learning_rate": 8.875711841225888e-05, "loss": 0.3806, "step": 2827 }, { "epoch": 0.2412557584030029, "grad_norm": 1.186816470534537, "learning_rate": 8.874838862973738e-05, "loss": 0.3008, "step": 2828 }, { "epoch": 0.24134106807711994, "grad_norm": 1.6382666940547252, "learning_rate": 8.87396558889823e-05, "loss": 0.3518, "step": 2829 }, { "epoch": 0.241426377751237, "grad_norm": 1.5275660204316355, "learning_rate": 8.873092019066038e-05, "loss": 0.3515, "step": 2830 }, { "epoch": 0.24151168742535403, "grad_norm": 1.4338368721595758, "learning_rate": 8.872218153543849e-05, "loss": 0.3055, "step": 2831 }, { "epoch": 0.24159699709947108, "grad_norm": 1.6269682921289952, "learning_rate": 8.871343992398383e-05, "loss": 0.423, "step": 2832 }, { "epoch": 0.24168230677358812, "grad_norm": 1.5130913390268406, "learning_rate": 8.870469535696375e-05, "loss": 0.384, "step": 2833 }, { "epoch": 0.24176761644770517, "grad_norm": 1.5214839832467002, "learning_rate": 8.869594783504585e-05, "loss": 0.4069, "step": 2834 }, { "epoch": 0.24185292612182221, "grad_norm": 1.4694438675124333, "learning_rate": 8.868719735889797e-05, "loss": 0.4163, "step": 2835 }, { "epoch": 0.24193823579593926, "grad_norm": 1.4148248124606182, "learning_rate": 8.867844392918816e-05, "loss": 0.3496, "step": 2836 }, { "epoch": 0.2420235454700563, "grad_norm": 1.67995205947049, "learning_rate": 8.866968754658466e-05, "loss": 0.3906, "step": 2837 }, { "epoch": 0.24210885514417335, "grad_norm": 1.6586998149578958, "learning_rate": 8.866092821175604e-05, "loss": 0.3716, "step": 2838 }, { "epoch": 0.2421941648182904, "grad_norm": 1.7206767986245142, "learning_rate": 8.8652165925371e-05, "loss": 0.3689, "step": 2839 }, { "epoch": 0.24227947449240744, "grad_norm": 1.4020758667851443, "learning_rate": 8.864340068809846e-05, "loss": 0.3645, "step": 2840 }, { "epoch": 0.2423647841665245, "grad_norm": 1.265152501575014, "learning_rate": 8.863463250060765e-05, "loss": 0.3102, "step": 2841 }, { "epoch": 0.24245009384064153, "grad_norm": 1.541583093319659, "learning_rate": 8.862586136356794e-05, "loss": 0.3838, "step": 2842 }, { "epoch": 0.24253540351475858, "grad_norm": 1.556237526349554, "learning_rate": 8.8617087277649e-05, "loss": 0.3537, "step": 2843 }, { "epoch": 0.24262071318887563, "grad_norm": 1.6601012456421056, "learning_rate": 8.860831024352063e-05, "loss": 0.3657, "step": 2844 }, { "epoch": 0.24270602286299267, "grad_norm": 1.4709075339875242, "learning_rate": 8.859953026185294e-05, "loss": 0.2901, "step": 2845 }, { "epoch": 0.24279133253710972, "grad_norm": 1.553180903881217, "learning_rate": 8.859074733331622e-05, "loss": 0.3243, "step": 2846 }, { "epoch": 0.24287664221122676, "grad_norm": 1.3910553941234542, "learning_rate": 8.858196145858104e-05, "loss": 0.3598, "step": 2847 }, { "epoch": 0.2429619518853438, "grad_norm": 1.3766097269606041, "learning_rate": 8.857317263831812e-05, "loss": 0.3341, "step": 2848 }, { "epoch": 0.24304726155946085, "grad_norm": 1.466148913997397, "learning_rate": 8.856438087319843e-05, "loss": 0.3119, "step": 2849 }, { "epoch": 0.2431325712335779, "grad_norm": 1.7051295811250635, "learning_rate": 8.855558616389322e-05, "loss": 0.377, "step": 2850 }, { "epoch": 0.24321788090769494, "grad_norm": 1.3469818639291884, "learning_rate": 8.854678851107385e-05, "loss": 0.3432, "step": 2851 }, { "epoch": 0.243303190581812, "grad_norm": 1.5981576594423355, "learning_rate": 8.853798791541204e-05, "loss": 0.3855, "step": 2852 }, { "epoch": 0.24338850025592904, "grad_norm": 1.5439770458224868, "learning_rate": 8.852918437757964e-05, "loss": 0.3306, "step": 2853 }, { "epoch": 0.24347380993004605, "grad_norm": 1.3315710799883727, "learning_rate": 8.852037789824876e-05, "loss": 0.3552, "step": 2854 }, { "epoch": 0.2435591196041631, "grad_norm": 1.4237438119380397, "learning_rate": 8.851156847809173e-05, "loss": 0.3544, "step": 2855 }, { "epoch": 0.24364442927828014, "grad_norm": 1.3711731281832291, "learning_rate": 8.85027561177811e-05, "loss": 0.3098, "step": 2856 }, { "epoch": 0.2437297389523972, "grad_norm": 1.5360139193301525, "learning_rate": 8.849394081798962e-05, "loss": 0.3562, "step": 2857 }, { "epoch": 0.24381504862651424, "grad_norm": 1.6946727758658136, "learning_rate": 8.848512257939033e-05, "loss": 0.3815, "step": 2858 }, { "epoch": 0.24390035830063128, "grad_norm": 1.5023372243341764, "learning_rate": 8.847630140265644e-05, "loss": 0.3581, "step": 2859 }, { "epoch": 0.24398566797474833, "grad_norm": 1.5503330303632832, "learning_rate": 8.84674772884614e-05, "loss": 0.378, "step": 2860 }, { "epoch": 0.24407097764886537, "grad_norm": 1.480412384006967, "learning_rate": 8.845865023747888e-05, "loss": 0.2993, "step": 2861 }, { "epoch": 0.24415628732298242, "grad_norm": 1.5027678701873688, "learning_rate": 8.844982025038279e-05, "loss": 0.3602, "step": 2862 }, { "epoch": 0.24424159699709946, "grad_norm": 1.661229834394719, "learning_rate": 8.844098732784723e-05, "loss": 0.4196, "step": 2863 }, { "epoch": 0.2443269066712165, "grad_norm": 1.2467395221276802, "learning_rate": 8.843215147054659e-05, "loss": 0.2829, "step": 2864 }, { "epoch": 0.24441221634533356, "grad_norm": 1.5225792447969337, "learning_rate": 8.84233126791554e-05, "loss": 0.4055, "step": 2865 }, { "epoch": 0.2444975260194506, "grad_norm": 1.3460909834006956, "learning_rate": 8.841447095434847e-05, "loss": 0.3186, "step": 2866 }, { "epoch": 0.24458283569356765, "grad_norm": 1.4410729164984273, "learning_rate": 8.84056262968008e-05, "loss": 0.303, "step": 2867 }, { "epoch": 0.2446681453676847, "grad_norm": 1.5378406147362598, "learning_rate": 8.839677870718768e-05, "loss": 0.3027, "step": 2868 }, { "epoch": 0.24475345504180174, "grad_norm": 1.441022951185548, "learning_rate": 8.838792818618452e-05, "loss": 0.3587, "step": 2869 }, { "epoch": 0.24483876471591878, "grad_norm": 1.4039118508220307, "learning_rate": 8.837907473446704e-05, "loss": 0.3447, "step": 2870 }, { "epoch": 0.24492407439003583, "grad_norm": 1.664961451178715, "learning_rate": 8.837021835271117e-05, "loss": 0.3755, "step": 2871 }, { "epoch": 0.24500938406415287, "grad_norm": 1.7242156774980057, "learning_rate": 8.836135904159302e-05, "loss": 0.3459, "step": 2872 }, { "epoch": 0.24509469373826992, "grad_norm": 1.5743768579487971, "learning_rate": 8.835249680178894e-05, "loss": 0.342, "step": 2873 }, { "epoch": 0.24518000341238697, "grad_norm": 1.3599921831439272, "learning_rate": 8.834363163397556e-05, "loss": 0.3047, "step": 2874 }, { "epoch": 0.245265313086504, "grad_norm": 1.4347115713936445, "learning_rate": 8.833476353882964e-05, "loss": 0.3547, "step": 2875 }, { "epoch": 0.24535062276062106, "grad_norm": 1.4009372809378227, "learning_rate": 8.832589251702825e-05, "loss": 0.4179, "step": 2876 }, { "epoch": 0.2454359324347381, "grad_norm": 1.3433809438950022, "learning_rate": 8.831701856924864e-05, "loss": 0.3627, "step": 2877 }, { "epoch": 0.24552124210885515, "grad_norm": 1.208709143152259, "learning_rate": 8.830814169616826e-05, "loss": 0.3021, "step": 2878 }, { "epoch": 0.2456065517829722, "grad_norm": 1.194493231159, "learning_rate": 8.829926189846482e-05, "loss": 0.3075, "step": 2879 }, { "epoch": 0.24569186145708924, "grad_norm": 1.5285512536427484, "learning_rate": 8.829037917681627e-05, "loss": 0.3514, "step": 2880 }, { "epoch": 0.24577717113120628, "grad_norm": 1.3458207609142505, "learning_rate": 8.828149353190075e-05, "loss": 0.2864, "step": 2881 }, { "epoch": 0.24586248080532333, "grad_norm": 1.637129629633785, "learning_rate": 8.827260496439662e-05, "loss": 0.3987, "step": 2882 }, { "epoch": 0.24594779047944038, "grad_norm": 1.5965299110274822, "learning_rate": 8.826371347498248e-05, "loss": 0.4408, "step": 2883 }, { "epoch": 0.24603310015355742, "grad_norm": 1.5896016154439903, "learning_rate": 8.825481906433716e-05, "loss": 0.3029, "step": 2884 }, { "epoch": 0.24611840982767447, "grad_norm": 1.355704427232778, "learning_rate": 8.824592173313968e-05, "loss": 0.3319, "step": 2885 }, { "epoch": 0.2462037195017915, "grad_norm": 1.653712217745607, "learning_rate": 8.823702148206931e-05, "loss": 0.3565, "step": 2886 }, { "epoch": 0.24628902917590856, "grad_norm": 1.565996606396814, "learning_rate": 8.822811831180555e-05, "loss": 0.3575, "step": 2887 }, { "epoch": 0.2463743388500256, "grad_norm": 1.5781864116541524, "learning_rate": 8.821921222302811e-05, "loss": 0.3539, "step": 2888 }, { "epoch": 0.24645964852414265, "grad_norm": 1.6358071932045792, "learning_rate": 8.82103032164169e-05, "loss": 0.3493, "step": 2889 }, { "epoch": 0.2465449581982597, "grad_norm": 1.3087102581938195, "learning_rate": 8.820139129265208e-05, "loss": 0.3133, "step": 2890 }, { "epoch": 0.24663026787237674, "grad_norm": 1.2769267950322039, "learning_rate": 8.819247645241406e-05, "loss": 0.2969, "step": 2891 }, { "epoch": 0.2467155775464938, "grad_norm": 1.7377477620164874, "learning_rate": 8.818355869638339e-05, "loss": 0.3923, "step": 2892 }, { "epoch": 0.2468008872206108, "grad_norm": 1.3605453013680158, "learning_rate": 8.817463802524096e-05, "loss": 0.3333, "step": 2893 }, { "epoch": 0.24688619689472785, "grad_norm": 1.2817598584493335, "learning_rate": 8.816571443966775e-05, "loss": 0.3299, "step": 2894 }, { "epoch": 0.2469715065688449, "grad_norm": 1.3422797831916606, "learning_rate": 8.815678794034505e-05, "loss": 0.321, "step": 2895 }, { "epoch": 0.24705681624296194, "grad_norm": 1.5767124766307308, "learning_rate": 8.814785852795436e-05, "loss": 0.3229, "step": 2896 }, { "epoch": 0.247142125917079, "grad_norm": 1.8209838938788991, "learning_rate": 8.81389262031774e-05, "loss": 0.3665, "step": 2897 }, { "epoch": 0.24722743559119603, "grad_norm": 1.5362844422641024, "learning_rate": 8.812999096669608e-05, "loss": 0.3867, "step": 2898 }, { "epoch": 0.24731274526531308, "grad_norm": 1.5760024498470262, "learning_rate": 8.812105281919259e-05, "loss": 0.282, "step": 2899 }, { "epoch": 0.24739805493943012, "grad_norm": 1.2003600812130093, "learning_rate": 8.811211176134927e-05, "loss": 0.2879, "step": 2900 }, { "epoch": 0.24748336461354717, "grad_norm": 1.698331825036055, "learning_rate": 8.810316779384877e-05, "loss": 0.3719, "step": 2901 }, { "epoch": 0.24756867428766421, "grad_norm": 1.3417796393429957, "learning_rate": 8.809422091737387e-05, "loss": 0.3314, "step": 2902 }, { "epoch": 0.24765398396178126, "grad_norm": 1.5562665578428583, "learning_rate": 8.808527113260764e-05, "loss": 0.3575, "step": 2903 }, { "epoch": 0.2477392936358983, "grad_norm": 1.6081946034792904, "learning_rate": 8.807631844023334e-05, "loss": 0.3323, "step": 2904 }, { "epoch": 0.24782460331001535, "grad_norm": 1.6152883979591828, "learning_rate": 8.806736284093446e-05, "loss": 0.313, "step": 2905 }, { "epoch": 0.2479099129841324, "grad_norm": 1.40753191693234, "learning_rate": 8.805840433539475e-05, "loss": 0.389, "step": 2906 }, { "epoch": 0.24799522265824944, "grad_norm": 1.4728943811841293, "learning_rate": 8.804944292429807e-05, "loss": 0.3502, "step": 2907 }, { "epoch": 0.2480805323323665, "grad_norm": 1.2477418052084752, "learning_rate": 8.804047860832863e-05, "loss": 0.322, "step": 2908 }, { "epoch": 0.24816584200648353, "grad_norm": 1.5956943843233298, "learning_rate": 8.803151138817081e-05, "loss": 0.342, "step": 2909 }, { "epoch": 0.24825115168060058, "grad_norm": 1.4164144441059345, "learning_rate": 8.802254126450917e-05, "loss": 0.3715, "step": 2910 }, { "epoch": 0.24833646135471762, "grad_norm": 1.6867464997065982, "learning_rate": 8.801356823802857e-05, "loss": 0.3397, "step": 2911 }, { "epoch": 0.24842177102883467, "grad_norm": 1.3499555350857595, "learning_rate": 8.800459230941405e-05, "loss": 0.375, "step": 2912 }, { "epoch": 0.24850708070295172, "grad_norm": 1.544877583366757, "learning_rate": 8.799561347935086e-05, "loss": 0.3323, "step": 2913 }, { "epoch": 0.24859239037706876, "grad_norm": 1.3430371722532766, "learning_rate": 8.798663174852447e-05, "loss": 0.3118, "step": 2914 }, { "epoch": 0.2486777000511858, "grad_norm": 1.2543817016848295, "learning_rate": 8.797764711762063e-05, "loss": 0.3243, "step": 2915 }, { "epoch": 0.24876300972530285, "grad_norm": 1.8350346069814543, "learning_rate": 8.796865958732522e-05, "loss": 0.3282, "step": 2916 }, { "epoch": 0.2488483193994199, "grad_norm": 1.292481022147596, "learning_rate": 8.795966915832443e-05, "loss": 0.278, "step": 2917 }, { "epoch": 0.24893362907353694, "grad_norm": 1.4161087321221584, "learning_rate": 8.795067583130461e-05, "loss": 0.3325, "step": 2918 }, { "epoch": 0.249018938747654, "grad_norm": 1.5317503442553766, "learning_rate": 8.794167960695238e-05, "loss": 0.3022, "step": 2919 }, { "epoch": 0.24910424842177104, "grad_norm": 1.476483539710446, "learning_rate": 8.793268048595452e-05, "loss": 0.3065, "step": 2920 }, { "epoch": 0.24918955809588808, "grad_norm": 1.3111899059042107, "learning_rate": 8.792367846899807e-05, "loss": 0.3013, "step": 2921 }, { "epoch": 0.24927486777000513, "grad_norm": 1.3704458591548796, "learning_rate": 8.791467355677031e-05, "loss": 0.275, "step": 2922 }, { "epoch": 0.24936017744412217, "grad_norm": 1.2294690142834548, "learning_rate": 8.79056657499587e-05, "loss": 0.3091, "step": 2923 }, { "epoch": 0.24944548711823922, "grad_norm": 1.4315869996564823, "learning_rate": 8.789665504925093e-05, "loss": 0.3336, "step": 2924 }, { "epoch": 0.24953079679235626, "grad_norm": 1.454507067970368, "learning_rate": 8.788764145533494e-05, "loss": 0.3369, "step": 2925 }, { "epoch": 0.2496161064664733, "grad_norm": 1.773087521608408, "learning_rate": 8.787862496889886e-05, "loss": 0.3832, "step": 2926 }, { "epoch": 0.24970141614059035, "grad_norm": 1.23894653739764, "learning_rate": 8.786960559063105e-05, "loss": 0.3659, "step": 2927 }, { "epoch": 0.2497867258147074, "grad_norm": 1.2565966113739249, "learning_rate": 8.786058332122009e-05, "loss": 0.299, "step": 2928 }, { "epoch": 0.24987203548882445, "grad_norm": 1.3678595934528963, "learning_rate": 8.785155816135478e-05, "loss": 0.3251, "step": 2929 }, { "epoch": 0.2499573451629415, "grad_norm": 1.274433892925217, "learning_rate": 8.784253011172415e-05, "loss": 0.3055, "step": 2930 }, { "epoch": 0.2500426548370585, "grad_norm": 1.5679821734193564, "learning_rate": 8.783349917301744e-05, "loss": 0.3521, "step": 2931 }, { "epoch": 0.2501279645111756, "grad_norm": 1.4418721260270346, "learning_rate": 8.782446534592413e-05, "loss": 0.3271, "step": 2932 }, { "epoch": 0.2502132741852926, "grad_norm": 1.4965856953462486, "learning_rate": 8.781542863113387e-05, "loss": 0.37, "step": 2933 }, { "epoch": 0.2502985838594097, "grad_norm": 1.3901603436782244, "learning_rate": 8.780638902933658e-05, "loss": 0.3344, "step": 2934 }, { "epoch": 0.2503838935335267, "grad_norm": 1.2444361840174525, "learning_rate": 8.779734654122241e-05, "loss": 0.2856, "step": 2935 }, { "epoch": 0.25046920320764376, "grad_norm": 1.4788224621235644, "learning_rate": 8.778830116748167e-05, "loss": 0.3696, "step": 2936 }, { "epoch": 0.2505545128817608, "grad_norm": 1.3758337354206966, "learning_rate": 8.777925290880496e-05, "loss": 0.328, "step": 2937 }, { "epoch": 0.25063982255587786, "grad_norm": 1.429081856275145, "learning_rate": 8.777020176588305e-05, "loss": 0.2976, "step": 2938 }, { "epoch": 0.2507251322299949, "grad_norm": 1.407789913085209, "learning_rate": 8.776114773940693e-05, "loss": 0.3335, "step": 2939 }, { "epoch": 0.25081044190411195, "grad_norm": 1.6631391945698732, "learning_rate": 8.775209083006784e-05, "loss": 0.3393, "step": 2940 }, { "epoch": 0.25089575157822896, "grad_norm": 1.7333464450829328, "learning_rate": 8.774303103855724e-05, "loss": 0.3949, "step": 2941 }, { "epoch": 0.25098106125234604, "grad_norm": 1.3625536181897637, "learning_rate": 8.773396836556679e-05, "loss": 0.3443, "step": 2942 }, { "epoch": 0.25106637092646306, "grad_norm": 1.6192565066625166, "learning_rate": 8.772490281178836e-05, "loss": 0.3714, "step": 2943 }, { "epoch": 0.25115168060058013, "grad_norm": 1.5957667736120338, "learning_rate": 8.771583437791409e-05, "loss": 0.3404, "step": 2944 }, { "epoch": 0.25123699027469715, "grad_norm": 1.5225747820147923, "learning_rate": 8.770676306463628e-05, "loss": 0.3375, "step": 2945 }, { "epoch": 0.2513222999488142, "grad_norm": 1.5682108465531113, "learning_rate": 8.769768887264747e-05, "loss": 0.3614, "step": 2946 }, { "epoch": 0.25140760962293124, "grad_norm": 1.4408117376017797, "learning_rate": 8.768861180264045e-05, "loss": 0.3574, "step": 2947 }, { "epoch": 0.2514929192970483, "grad_norm": 1.5730606249011059, "learning_rate": 8.767953185530819e-05, "loss": 0.3547, "step": 2948 }, { "epoch": 0.25157822897116533, "grad_norm": 1.4341419637363861, "learning_rate": 8.767044903134392e-05, "loss": 0.3254, "step": 2949 }, { "epoch": 0.25166353864528235, "grad_norm": 1.6345817799803488, "learning_rate": 8.766136333144102e-05, "loss": 0.36, "step": 2950 }, { "epoch": 0.2517488483193994, "grad_norm": 1.5922694341450117, "learning_rate": 8.765227475629319e-05, "loss": 0.3985, "step": 2951 }, { "epoch": 0.25183415799351644, "grad_norm": 1.2641527065913656, "learning_rate": 8.764318330659424e-05, "loss": 0.3085, "step": 2952 }, { "epoch": 0.2519194676676335, "grad_norm": 1.183162094316587, "learning_rate": 8.763408898303829e-05, "loss": 0.3401, "step": 2953 }, { "epoch": 0.25200477734175053, "grad_norm": 1.4299551291195, "learning_rate": 8.762499178631964e-05, "loss": 0.4044, "step": 2954 }, { "epoch": 0.2520900870158676, "grad_norm": 1.287494052021398, "learning_rate": 8.76158917171328e-05, "loss": 0.3356, "step": 2955 }, { "epoch": 0.2521753966899846, "grad_norm": 1.3035325289034032, "learning_rate": 8.760678877617253e-05, "loss": 0.3424, "step": 2956 }, { "epoch": 0.2522607063641017, "grad_norm": 1.6679880467176376, "learning_rate": 8.759768296413376e-05, "loss": 0.4003, "step": 2957 }, { "epoch": 0.2523460160382187, "grad_norm": 1.195886511511915, "learning_rate": 8.75885742817117e-05, "loss": 0.3427, "step": 2958 }, { "epoch": 0.2524313257123358, "grad_norm": 1.4806192366881175, "learning_rate": 8.757946272960173e-05, "loss": 0.3461, "step": 2959 }, { "epoch": 0.2525166353864528, "grad_norm": 1.4252182776603206, "learning_rate": 8.757034830849948e-05, "loss": 0.3465, "step": 2960 }, { "epoch": 0.2526019450605699, "grad_norm": 1.361531750023376, "learning_rate": 8.756123101910079e-05, "loss": 0.326, "step": 2961 }, { "epoch": 0.2526872547346869, "grad_norm": 1.5375429969295904, "learning_rate": 8.755211086210172e-05, "loss": 0.3337, "step": 2962 }, { "epoch": 0.25277256440880397, "grad_norm": 1.4024079289533713, "learning_rate": 8.754298783819853e-05, "loss": 0.3715, "step": 2963 }, { "epoch": 0.252857874082921, "grad_norm": 1.5666754817038286, "learning_rate": 8.753386194808772e-05, "loss": 0.3341, "step": 2964 }, { "epoch": 0.25294318375703806, "grad_norm": 1.3773603121345408, "learning_rate": 8.752473319246601e-05, "loss": 0.3475, "step": 2965 }, { "epoch": 0.2530284934311551, "grad_norm": 1.4492882560246265, "learning_rate": 8.751560157203031e-05, "loss": 0.3133, "step": 2966 }, { "epoch": 0.25311380310527215, "grad_norm": 1.3224733265600825, "learning_rate": 8.750646708747781e-05, "loss": 0.3331, "step": 2967 }, { "epoch": 0.25319911277938917, "grad_norm": 1.0387025367368699, "learning_rate": 8.749732973950585e-05, "loss": 0.2922, "step": 2968 }, { "epoch": 0.25328442245350624, "grad_norm": 1.716559522797388, "learning_rate": 8.748818952881201e-05, "loss": 0.3137, "step": 2969 }, { "epoch": 0.25336973212762326, "grad_norm": 1.5439899397112156, "learning_rate": 8.747904645609413e-05, "loss": 0.3284, "step": 2970 }, { "epoch": 0.25345504180174033, "grad_norm": 1.3161806541499235, "learning_rate": 8.746990052205022e-05, "loss": 0.2944, "step": 2971 }, { "epoch": 0.25354035147585735, "grad_norm": 1.4064768290368939, "learning_rate": 8.746075172737851e-05, "loss": 0.3407, "step": 2972 }, { "epoch": 0.2536256611499744, "grad_norm": 1.8126388365089399, "learning_rate": 8.745160007277747e-05, "loss": 0.3222, "step": 2973 }, { "epoch": 0.25371097082409144, "grad_norm": 1.7308583391514052, "learning_rate": 8.744244555894578e-05, "loss": 0.3812, "step": 2974 }, { "epoch": 0.2537962804982085, "grad_norm": 1.742381542672724, "learning_rate": 8.743328818658235e-05, "loss": 0.4196, "step": 2975 }, { "epoch": 0.25388159017232553, "grad_norm": 1.415126421372807, "learning_rate": 8.742412795638629e-05, "loss": 0.3248, "step": 2976 }, { "epoch": 0.2539668998464426, "grad_norm": 1.3813056710187774, "learning_rate": 8.741496486905691e-05, "loss": 0.3031, "step": 2977 }, { "epoch": 0.2540522095205596, "grad_norm": 1.7382750778945901, "learning_rate": 8.740579892529381e-05, "loss": 0.3431, "step": 2978 }, { "epoch": 0.2541375191946767, "grad_norm": 1.3389964636028788, "learning_rate": 8.739663012579673e-05, "loss": 0.2791, "step": 2979 }, { "epoch": 0.2542228288687937, "grad_norm": 1.4035392914243696, "learning_rate": 8.738745847126565e-05, "loss": 0.3322, "step": 2980 }, { "epoch": 0.2543081385429108, "grad_norm": 1.3510950856860076, "learning_rate": 8.737828396240081e-05, "loss": 0.3055, "step": 2981 }, { "epoch": 0.2543934482170278, "grad_norm": 1.480617465398191, "learning_rate": 8.736910659990261e-05, "loss": 0.3099, "step": 2982 }, { "epoch": 0.2544787578911449, "grad_norm": 1.3912564676276693, "learning_rate": 8.73599263844717e-05, "loss": 0.346, "step": 2983 }, { "epoch": 0.2545640675652619, "grad_norm": 1.375797777222852, "learning_rate": 8.735074331680895e-05, "loss": 0.2714, "step": 2984 }, { "epoch": 0.25464937723937897, "grad_norm": 1.4436947403277889, "learning_rate": 8.734155739761541e-05, "loss": 0.3389, "step": 2985 }, { "epoch": 0.254734686913496, "grad_norm": 1.4029711326734382, "learning_rate": 8.733236862759242e-05, "loss": 0.3218, "step": 2986 }, { "epoch": 0.25481999658761306, "grad_norm": 1.4231059017918672, "learning_rate": 8.732317700744146e-05, "loss": 0.3439, "step": 2987 }, { "epoch": 0.2549053062617301, "grad_norm": 1.587354205788295, "learning_rate": 8.731398253786426e-05, "loss": 0.3667, "step": 2988 }, { "epoch": 0.2549906159358471, "grad_norm": 1.5229312936034314, "learning_rate": 8.730478521956278e-05, "loss": 0.3371, "step": 2989 }, { "epoch": 0.25507592560996417, "grad_norm": 1.3621728976911653, "learning_rate": 8.729558505323921e-05, "loss": 0.3227, "step": 2990 }, { "epoch": 0.2551612352840812, "grad_norm": 2.9418740679397226, "learning_rate": 8.728638203959589e-05, "loss": 0.3588, "step": 2991 }, { "epoch": 0.25524654495819826, "grad_norm": 1.3370626964662562, "learning_rate": 8.727717617933544e-05, "loss": 0.3191, "step": 2992 }, { "epoch": 0.2553318546323153, "grad_norm": 1.489543229565816, "learning_rate": 8.726796747316068e-05, "loss": 0.3931, "step": 2993 }, { "epoch": 0.25541716430643235, "grad_norm": 1.440610671230726, "learning_rate": 8.725875592177464e-05, "loss": 0.386, "step": 2994 }, { "epoch": 0.25550247398054937, "grad_norm": 1.7543391519708895, "learning_rate": 8.724954152588058e-05, "loss": 0.3572, "step": 2995 }, { "epoch": 0.25558778365466644, "grad_norm": 1.4680441925818484, "learning_rate": 8.724032428618198e-05, "loss": 0.3483, "step": 2996 }, { "epoch": 0.25567309332878346, "grad_norm": 1.5835223670683074, "learning_rate": 8.723110420338251e-05, "loss": 0.3423, "step": 2997 }, { "epoch": 0.25575840300290054, "grad_norm": 1.375566452562154, "learning_rate": 8.722188127818608e-05, "loss": 0.3169, "step": 2998 }, { "epoch": 0.25584371267701755, "grad_norm": 1.6748230513297162, "learning_rate": 8.721265551129683e-05, "loss": 0.3919, "step": 2999 }, { "epoch": 0.2559290223511346, "grad_norm": 1.5081369317820192, "learning_rate": 8.720342690341905e-05, "loss": 0.3814, "step": 3000 }, { "epoch": 0.25601433202525165, "grad_norm": 1.4854127970355735, "learning_rate": 8.719419545525733e-05, "loss": 0.3585, "step": 3001 }, { "epoch": 0.2560996416993687, "grad_norm": 1.6072475853357586, "learning_rate": 8.718496116751644e-05, "loss": 0.3938, "step": 3002 }, { "epoch": 0.25618495137348574, "grad_norm": 1.1246439052314376, "learning_rate": 8.717572404090138e-05, "loss": 0.3383, "step": 3003 }, { "epoch": 0.2562702610476028, "grad_norm": 1.4529486108186798, "learning_rate": 8.716648407611732e-05, "loss": 0.3155, "step": 3004 }, { "epoch": 0.2563555707217198, "grad_norm": 1.4509602491216809, "learning_rate": 8.715724127386972e-05, "loss": 0.3952, "step": 3005 }, { "epoch": 0.2564408803958369, "grad_norm": 1.433089081704649, "learning_rate": 8.714799563486418e-05, "loss": 0.3737, "step": 3006 }, { "epoch": 0.2565261900699539, "grad_norm": 1.21161701456593, "learning_rate": 8.71387471598066e-05, "loss": 0.3329, "step": 3007 }, { "epoch": 0.256611499744071, "grad_norm": 1.4950534002150655, "learning_rate": 8.712949584940303e-05, "loss": 0.3396, "step": 3008 }, { "epoch": 0.256696809418188, "grad_norm": 1.4359610239121479, "learning_rate": 8.712024170435975e-05, "loss": 0.3431, "step": 3009 }, { "epoch": 0.2567821190923051, "grad_norm": 1.4548631342851304, "learning_rate": 8.711098472538326e-05, "loss": 0.3315, "step": 3010 }, { "epoch": 0.2568674287664221, "grad_norm": 1.6304926842622194, "learning_rate": 8.710172491318031e-05, "loss": 0.3788, "step": 3011 }, { "epoch": 0.2569527384405392, "grad_norm": 1.2717373035322808, "learning_rate": 8.709246226845782e-05, "loss": 0.4137, "step": 3012 }, { "epoch": 0.2570380481146562, "grad_norm": 1.200555708482756, "learning_rate": 8.708319679192293e-05, "loss": 0.2878, "step": 3013 }, { "epoch": 0.25712335778877327, "grad_norm": 1.4215377575357797, "learning_rate": 8.707392848428304e-05, "loss": 0.3098, "step": 3014 }, { "epoch": 0.2572086674628903, "grad_norm": 1.5464917873851471, "learning_rate": 8.706465734624572e-05, "loss": 0.3442, "step": 3015 }, { "epoch": 0.25729397713700736, "grad_norm": 1.4989399343089969, "learning_rate": 8.705538337851878e-05, "loss": 0.3236, "step": 3016 }, { "epoch": 0.2573792868111244, "grad_norm": 1.496350298764888, "learning_rate": 8.704610658181021e-05, "loss": 0.3619, "step": 3017 }, { "epoch": 0.25746459648524145, "grad_norm": 1.153222011217429, "learning_rate": 8.703682695682829e-05, "loss": 0.3525, "step": 3018 }, { "epoch": 0.25754990615935847, "grad_norm": 1.3203792216580077, "learning_rate": 8.702754450428143e-05, "loss": 0.2493, "step": 3019 }, { "epoch": 0.25763521583347554, "grad_norm": 1.4033412945054156, "learning_rate": 8.701825922487831e-05, "loss": 0.3644, "step": 3020 }, { "epoch": 0.25772052550759256, "grad_norm": 1.3546718486510934, "learning_rate": 8.700897111932782e-05, "loss": 0.3118, "step": 3021 }, { "epoch": 0.25780583518170963, "grad_norm": 1.2867497823154346, "learning_rate": 8.699968018833904e-05, "loss": 0.3547, "step": 3022 }, { "epoch": 0.25789114485582665, "grad_norm": 1.5657874047874336, "learning_rate": 8.699038643262131e-05, "loss": 0.3229, "step": 3023 }, { "epoch": 0.2579764545299437, "grad_norm": 1.4179022487042232, "learning_rate": 8.698108985288414e-05, "loss": 0.3435, "step": 3024 }, { "epoch": 0.25806176420406074, "grad_norm": 1.2923763335516532, "learning_rate": 8.697179044983725e-05, "loss": 0.2959, "step": 3025 }, { "epoch": 0.2581470738781778, "grad_norm": 1.7370396669785588, "learning_rate": 8.696248822419065e-05, "loss": 0.384, "step": 3026 }, { "epoch": 0.25823238355229483, "grad_norm": 1.579140742281224, "learning_rate": 8.69531831766545e-05, "loss": 0.3807, "step": 3027 }, { "epoch": 0.25831769322641185, "grad_norm": 1.4867039128984916, "learning_rate": 8.694387530793916e-05, "loss": 0.2839, "step": 3028 }, { "epoch": 0.2584030029005289, "grad_norm": 1.5743612598820613, "learning_rate": 8.693456461875529e-05, "loss": 0.3552, "step": 3029 }, { "epoch": 0.25848831257464594, "grad_norm": 1.4870145117661513, "learning_rate": 8.692525110981366e-05, "loss": 0.3646, "step": 3030 }, { "epoch": 0.258573622248763, "grad_norm": 1.7611658958567424, "learning_rate": 8.691593478182533e-05, "loss": 0.357, "step": 3031 }, { "epoch": 0.25865893192288003, "grad_norm": 1.3842878929960687, "learning_rate": 8.690661563550156e-05, "loss": 0.3401, "step": 3032 }, { "epoch": 0.2587442415969971, "grad_norm": 1.612009059482987, "learning_rate": 8.68972936715538e-05, "loss": 0.34, "step": 3033 }, { "epoch": 0.2588295512711141, "grad_norm": 1.358937072369264, "learning_rate": 8.688796889069373e-05, "loss": 0.3678, "step": 3034 }, { "epoch": 0.2589148609452312, "grad_norm": 1.3948585275811818, "learning_rate": 8.687864129363327e-05, "loss": 0.33, "step": 3035 }, { "epoch": 0.2590001706193482, "grad_norm": 1.484931921634675, "learning_rate": 8.686931088108452e-05, "loss": 0.3632, "step": 3036 }, { "epoch": 0.2590854802934653, "grad_norm": 1.2939246755429792, "learning_rate": 8.68599776537598e-05, "loss": 0.3153, "step": 3037 }, { "epoch": 0.2591707899675823, "grad_norm": 1.4603613058889888, "learning_rate": 8.685064161237167e-05, "loss": 0.3343, "step": 3038 }, { "epoch": 0.2592560996416994, "grad_norm": 1.2469640102500499, "learning_rate": 8.684130275763287e-05, "loss": 0.3114, "step": 3039 }, { "epoch": 0.2593414093158164, "grad_norm": 1.5469250911732655, "learning_rate": 8.68319610902564e-05, "loss": 0.369, "step": 3040 }, { "epoch": 0.25942671898993347, "grad_norm": 1.24776535560363, "learning_rate": 8.68226166109554e-05, "loss": 0.3444, "step": 3041 }, { "epoch": 0.2595120286640505, "grad_norm": 1.7105090157796172, "learning_rate": 8.68132693204433e-05, "loss": 0.3871, "step": 3042 }, { "epoch": 0.25959733833816756, "grad_norm": 1.1491593667771978, "learning_rate": 8.680391921943371e-05, "loss": 0.3316, "step": 3043 }, { "epoch": 0.2596826480122846, "grad_norm": 1.553277147543048, "learning_rate": 8.679456630864048e-05, "loss": 0.3564, "step": 3044 }, { "epoch": 0.25976795768640165, "grad_norm": 1.337150068051128, "learning_rate": 8.678521058877763e-05, "loss": 0.3361, "step": 3045 }, { "epoch": 0.25985326736051867, "grad_norm": 1.4604284858316048, "learning_rate": 8.677585206055943e-05, "loss": 0.3565, "step": 3046 }, { "epoch": 0.25993857703463574, "grad_norm": 1.617565534699742, "learning_rate": 8.676649072470034e-05, "loss": 0.3062, "step": 3047 }, { "epoch": 0.26002388670875276, "grad_norm": 1.3632805067350007, "learning_rate": 8.675712658191508e-05, "loss": 0.3213, "step": 3048 }, { "epoch": 0.26010919638286983, "grad_norm": 1.4626916156670635, "learning_rate": 8.674775963291853e-05, "loss": 0.3472, "step": 3049 }, { "epoch": 0.26019450605698685, "grad_norm": 1.5393306285613562, "learning_rate": 8.673838987842579e-05, "loss": 0.3481, "step": 3050 }, { "epoch": 0.2602798157311039, "grad_norm": 1.3719875975957447, "learning_rate": 8.672901731915222e-05, "loss": 0.3849, "step": 3051 }, { "epoch": 0.26036512540522094, "grad_norm": 1.31406676918815, "learning_rate": 8.671964195581336e-05, "loss": 0.3052, "step": 3052 }, { "epoch": 0.260450435079338, "grad_norm": 1.3424826567255939, "learning_rate": 8.671026378912497e-05, "loss": 0.3656, "step": 3053 }, { "epoch": 0.26053574475345503, "grad_norm": 1.4092939704557612, "learning_rate": 8.6700882819803e-05, "loss": 0.3429, "step": 3054 }, { "epoch": 0.2606210544275721, "grad_norm": 1.4377381500673827, "learning_rate": 8.669149904856366e-05, "loss": 0.3608, "step": 3055 }, { "epoch": 0.2607063641016891, "grad_norm": 1.839771309821584, "learning_rate": 8.668211247612335e-05, "loss": 0.3664, "step": 3056 }, { "epoch": 0.2607916737758062, "grad_norm": 1.502238113887016, "learning_rate": 8.66727231031987e-05, "loss": 0.346, "step": 3057 }, { "epoch": 0.2608769834499232, "grad_norm": 1.279663173271976, "learning_rate": 8.666333093050649e-05, "loss": 0.3516, "step": 3058 }, { "epoch": 0.2609622931240403, "grad_norm": 1.5066432075587148, "learning_rate": 8.665393595876379e-05, "loss": 0.3221, "step": 3059 }, { "epoch": 0.2610476027981573, "grad_norm": 1.4060456021430408, "learning_rate": 8.664453818868789e-05, "loss": 0.3009, "step": 3060 }, { "epoch": 0.2611329124722744, "grad_norm": 1.4341868491177756, "learning_rate": 8.66351376209962e-05, "loss": 0.2814, "step": 3061 }, { "epoch": 0.2612182221463914, "grad_norm": 1.3992343103000475, "learning_rate": 8.662573425640645e-05, "loss": 0.3256, "step": 3062 }, { "epoch": 0.2613035318205085, "grad_norm": 2.1943582110087303, "learning_rate": 8.661632809563651e-05, "loss": 0.2902, "step": 3063 }, { "epoch": 0.2613888414946255, "grad_norm": 1.3939872129283466, "learning_rate": 8.66069191394045e-05, "loss": 0.348, "step": 3064 }, { "epoch": 0.2614741511687425, "grad_norm": 1.4331196928861816, "learning_rate": 8.659750738842873e-05, "loss": 0.3189, "step": 3065 }, { "epoch": 0.2615594608428596, "grad_norm": 1.3862143875377115, "learning_rate": 8.658809284342778e-05, "loss": 0.2965, "step": 3066 }, { "epoch": 0.2616447705169766, "grad_norm": 1.526256750093447, "learning_rate": 8.657867550512033e-05, "loss": 0.3405, "step": 3067 }, { "epoch": 0.2617300801910937, "grad_norm": 1.5203796999155452, "learning_rate": 8.656925537422542e-05, "loss": 0.3879, "step": 3068 }, { "epoch": 0.2618153898652107, "grad_norm": 1.3143922698173816, "learning_rate": 8.655983245146217e-05, "loss": 0.2931, "step": 3069 }, { "epoch": 0.26190069953932776, "grad_norm": 1.2980081135153856, "learning_rate": 8.655040673754999e-05, "loss": 0.3403, "step": 3070 }, { "epoch": 0.2619860092134448, "grad_norm": 1.5824148207605895, "learning_rate": 8.65409782332085e-05, "loss": 0.3928, "step": 3071 }, { "epoch": 0.26207131888756185, "grad_norm": 1.4390277208066564, "learning_rate": 8.65315469391575e-05, "loss": 0.3682, "step": 3072 }, { "epoch": 0.2621566285616789, "grad_norm": 1.392915403978097, "learning_rate": 8.652211285611701e-05, "loss": 0.3519, "step": 3073 }, { "epoch": 0.26224193823579595, "grad_norm": 1.4755221137855792, "learning_rate": 8.651267598480728e-05, "loss": 0.3503, "step": 3074 }, { "epoch": 0.26232724790991296, "grad_norm": 1.3528538526954528, "learning_rate": 8.650323632594877e-05, "loss": 0.3307, "step": 3075 }, { "epoch": 0.26241255758403004, "grad_norm": 1.3000967613302523, "learning_rate": 8.649379388026215e-05, "loss": 0.3065, "step": 3076 }, { "epoch": 0.26249786725814706, "grad_norm": 1.577878573978356, "learning_rate": 8.64843486484683e-05, "loss": 0.3523, "step": 3077 }, { "epoch": 0.26258317693226413, "grad_norm": 1.588848580484164, "learning_rate": 8.64749006312883e-05, "loss": 0.3255, "step": 3078 }, { "epoch": 0.26266848660638115, "grad_norm": 1.4344220587962353, "learning_rate": 8.646544982944345e-05, "loss": 0.3563, "step": 3079 }, { "epoch": 0.2627537962804982, "grad_norm": 2.0039160060830983, "learning_rate": 8.64559962436553e-05, "loss": 0.3568, "step": 3080 }, { "epoch": 0.26283910595461524, "grad_norm": 1.3400232845389999, "learning_rate": 8.644653987464555e-05, "loss": 0.3153, "step": 3081 }, { "epoch": 0.2629244156287323, "grad_norm": 1.4568472329287525, "learning_rate": 8.643708072313618e-05, "loss": 0.3515, "step": 3082 }, { "epoch": 0.26300972530284933, "grad_norm": 1.3308277230491854, "learning_rate": 8.642761878984931e-05, "loss": 0.3528, "step": 3083 }, { "epoch": 0.2630950349769664, "grad_norm": 1.2670268548439259, "learning_rate": 8.64181540755073e-05, "loss": 0.2887, "step": 3084 }, { "epoch": 0.2631803446510834, "grad_norm": 1.5778104260847121, "learning_rate": 8.640868658083279e-05, "loss": 0.3234, "step": 3085 }, { "epoch": 0.2632656543252005, "grad_norm": 1.6607057626860051, "learning_rate": 8.639921630654852e-05, "loss": 0.3661, "step": 3086 }, { "epoch": 0.2633509639993175, "grad_norm": 1.3483412618761554, "learning_rate": 8.63897432533775e-05, "loss": 0.2855, "step": 3087 }, { "epoch": 0.2634362736734346, "grad_norm": 1.4059550505842304, "learning_rate": 8.638026742204298e-05, "loss": 0.3936, "step": 3088 }, { "epoch": 0.2635215833475516, "grad_norm": 1.7124120898549997, "learning_rate": 8.637078881326834e-05, "loss": 0.3146, "step": 3089 }, { "epoch": 0.2636068930216687, "grad_norm": 1.4982992703162394, "learning_rate": 8.636130742777725e-05, "loss": 0.345, "step": 3090 }, { "epoch": 0.2636922026957857, "grad_norm": 1.366912901413495, "learning_rate": 8.635182326629358e-05, "loss": 0.3618, "step": 3091 }, { "epoch": 0.26377751236990277, "grad_norm": 1.9961019915207185, "learning_rate": 8.634233632954139e-05, "loss": 0.4345, "step": 3092 }, { "epoch": 0.2638628220440198, "grad_norm": 1.518570387780574, "learning_rate": 8.633284661824492e-05, "loss": 0.3332, "step": 3093 }, { "epoch": 0.26394813171813686, "grad_norm": 1.723163832710155, "learning_rate": 8.632335413312869e-05, "loss": 0.34, "step": 3094 }, { "epoch": 0.2640334413922539, "grad_norm": 1.3493522255527375, "learning_rate": 8.631385887491739e-05, "loss": 0.2948, "step": 3095 }, { "epoch": 0.26411875106637095, "grad_norm": 1.5183218332825907, "learning_rate": 8.630436084433593e-05, "loss": 0.3949, "step": 3096 }, { "epoch": 0.26420406074048797, "grad_norm": 1.2929480974559047, "learning_rate": 8.629486004210945e-05, "loss": 0.3784, "step": 3097 }, { "epoch": 0.26428937041460504, "grad_norm": 1.38836335995013, "learning_rate": 8.628535646896328e-05, "loss": 0.339, "step": 3098 }, { "epoch": 0.26437468008872206, "grad_norm": 1.3295738844066054, "learning_rate": 8.627585012562296e-05, "loss": 0.2839, "step": 3099 }, { "epoch": 0.26445998976283913, "grad_norm": 1.372469393974321, "learning_rate": 8.626634101281425e-05, "loss": 0.3321, "step": 3100 }, { "epoch": 0.26454529943695615, "grad_norm": 1.3036830027584212, "learning_rate": 8.625682913126311e-05, "loss": 0.3286, "step": 3101 }, { "epoch": 0.2646306091110732, "grad_norm": 1.6531174023170605, "learning_rate": 8.624731448169576e-05, "loss": 0.3753, "step": 3102 }, { "epoch": 0.26471591878519024, "grad_norm": 1.589073500693624, "learning_rate": 8.623779706483855e-05, "loss": 0.3192, "step": 3103 }, { "epoch": 0.26480122845930726, "grad_norm": 1.4729251363133966, "learning_rate": 8.622827688141812e-05, "loss": 0.3649, "step": 3104 }, { "epoch": 0.26488653813342433, "grad_norm": 1.3576136611368028, "learning_rate": 8.621875393216126e-05, "loss": 0.2983, "step": 3105 }, { "epoch": 0.26497184780754135, "grad_norm": 1.4772027459274788, "learning_rate": 8.620922821779498e-05, "loss": 0.3316, "step": 3106 }, { "epoch": 0.2650571574816584, "grad_norm": 1.4904877733447646, "learning_rate": 8.619969973904655e-05, "loss": 0.3471, "step": 3107 }, { "epoch": 0.26514246715577544, "grad_norm": 1.4203615622864658, "learning_rate": 8.619016849664344e-05, "loss": 0.3057, "step": 3108 }, { "epoch": 0.2652277768298925, "grad_norm": 1.1494325232826128, "learning_rate": 8.618063449131327e-05, "loss": 0.3272, "step": 3109 }, { "epoch": 0.26531308650400953, "grad_norm": 1.349144111517021, "learning_rate": 8.617109772378388e-05, "loss": 0.2717, "step": 3110 }, { "epoch": 0.2653983961781266, "grad_norm": 1.6016958785924373, "learning_rate": 8.616155819478344e-05, "loss": 0.3741, "step": 3111 }, { "epoch": 0.2654837058522436, "grad_norm": 1.630586850358637, "learning_rate": 8.615201590504017e-05, "loss": 0.378, "step": 3112 }, { "epoch": 0.2655690155263607, "grad_norm": 1.5379313848441112, "learning_rate": 8.61424708552826e-05, "loss": 0.3426, "step": 3113 }, { "epoch": 0.2656543252004777, "grad_norm": 1.2761062086741253, "learning_rate": 8.613292304623945e-05, "loss": 0.3811, "step": 3114 }, { "epoch": 0.2657396348745948, "grad_norm": 1.3839508371275988, "learning_rate": 8.612337247863962e-05, "loss": 0.353, "step": 3115 }, { "epoch": 0.2658249445487118, "grad_norm": 1.4044281071874263, "learning_rate": 8.611381915321227e-05, "loss": 0.3375, "step": 3116 }, { "epoch": 0.2659102542228289, "grad_norm": 1.3726694991156794, "learning_rate": 8.610426307068674e-05, "loss": 0.3633, "step": 3117 }, { "epoch": 0.2659955638969459, "grad_norm": 1.5069201106593766, "learning_rate": 8.609470423179258e-05, "loss": 0.3398, "step": 3118 }, { "epoch": 0.26608087357106297, "grad_norm": 1.5158396303459571, "learning_rate": 8.608514263725955e-05, "loss": 0.3302, "step": 3119 }, { "epoch": 0.26616618324518, "grad_norm": 1.44565029836273, "learning_rate": 8.607557828781765e-05, "loss": 0.2858, "step": 3120 }, { "epoch": 0.26625149291929706, "grad_norm": 1.352140446393673, "learning_rate": 8.606601118419705e-05, "loss": 0.3167, "step": 3121 }, { "epoch": 0.2663368025934141, "grad_norm": 1.3671388017973236, "learning_rate": 8.605644132712814e-05, "loss": 0.3567, "step": 3122 }, { "epoch": 0.26642211226753115, "grad_norm": 1.528795727773433, "learning_rate": 8.604686871734156e-05, "loss": 0.3973, "step": 3123 }, { "epoch": 0.26650742194164817, "grad_norm": 1.3597987599706796, "learning_rate": 8.603729335556808e-05, "loss": 0.3197, "step": 3124 }, { "epoch": 0.26659273161576524, "grad_norm": 1.6537329062618593, "learning_rate": 8.602771524253876e-05, "loss": 0.337, "step": 3125 }, { "epoch": 0.26667804128988226, "grad_norm": 1.2231367343189616, "learning_rate": 8.601813437898484e-05, "loss": 0.3664, "step": 3126 }, { "epoch": 0.26676335096399933, "grad_norm": 1.290549928638571, "learning_rate": 8.600855076563776e-05, "loss": 0.2801, "step": 3127 }, { "epoch": 0.26684866063811635, "grad_norm": 1.4564960713881923, "learning_rate": 8.599896440322918e-05, "loss": 0.3151, "step": 3128 }, { "epoch": 0.2669339703122334, "grad_norm": 1.6629149012443534, "learning_rate": 8.598937529249096e-05, "loss": 0.3706, "step": 3129 }, { "epoch": 0.26701927998635044, "grad_norm": 1.7136640727220092, "learning_rate": 8.59797834341552e-05, "loss": 0.3638, "step": 3130 }, { "epoch": 0.2671045896604675, "grad_norm": 1.3129248612629463, "learning_rate": 8.597018882895416e-05, "loss": 0.2982, "step": 3131 }, { "epoch": 0.26718989933458454, "grad_norm": 1.4849943776136627, "learning_rate": 8.596059147762034e-05, "loss": 0.2722, "step": 3132 }, { "epoch": 0.2672752090087016, "grad_norm": 1.4533848683963906, "learning_rate": 8.595099138088644e-05, "loss": 0.3353, "step": 3133 }, { "epoch": 0.2673605186828186, "grad_norm": 1.540198047585879, "learning_rate": 8.594138853948544e-05, "loss": 0.3318, "step": 3134 }, { "epoch": 0.2674458283569357, "grad_norm": 1.5108750623329157, "learning_rate": 8.593178295415038e-05, "loss": 0.3123, "step": 3135 }, { "epoch": 0.2675311380310527, "grad_norm": 1.5197701156095704, "learning_rate": 8.592217462561465e-05, "loss": 0.3124, "step": 3136 }, { "epoch": 0.2676164477051698, "grad_norm": 1.4537504757382638, "learning_rate": 8.591256355461176e-05, "loss": 0.2847, "step": 3137 }, { "epoch": 0.2677017573792868, "grad_norm": 1.4979723895677204, "learning_rate": 8.59029497418755e-05, "loss": 0.3673, "step": 3138 }, { "epoch": 0.2677870670534039, "grad_norm": 1.4521686319102718, "learning_rate": 8.58933331881398e-05, "loss": 0.3196, "step": 3139 }, { "epoch": 0.2678723767275209, "grad_norm": 1.4784337050043108, "learning_rate": 8.588371389413885e-05, "loss": 0.358, "step": 3140 }, { "epoch": 0.267957686401638, "grad_norm": 1.502275013256262, "learning_rate": 8.587409186060704e-05, "loss": 0.2872, "step": 3141 }, { "epoch": 0.268042996075755, "grad_norm": 1.6381452439391564, "learning_rate": 8.586446708827896e-05, "loss": 0.3372, "step": 3142 }, { "epoch": 0.268128305749872, "grad_norm": 1.4410624934139102, "learning_rate": 8.585483957788938e-05, "loss": 0.329, "step": 3143 }, { "epoch": 0.2682136154239891, "grad_norm": 1.7071232192625228, "learning_rate": 8.584520933017333e-05, "loss": 0.3452, "step": 3144 }, { "epoch": 0.2682989250981061, "grad_norm": 1.4895139380164961, "learning_rate": 8.583557634586605e-05, "loss": 0.2927, "step": 3145 }, { "epoch": 0.2683842347722232, "grad_norm": 1.5561110530243278, "learning_rate": 8.582594062570292e-05, "loss": 0.3186, "step": 3146 }, { "epoch": 0.2684695444463402, "grad_norm": 1.1029807625940662, "learning_rate": 8.581630217041963e-05, "loss": 0.3069, "step": 3147 }, { "epoch": 0.26855485412045726, "grad_norm": 1.4363126412086953, "learning_rate": 8.580666098075197e-05, "loss": 0.3908, "step": 3148 }, { "epoch": 0.2686401637945743, "grad_norm": 1.357976347384424, "learning_rate": 8.579701705743604e-05, "loss": 0.2974, "step": 3149 }, { "epoch": 0.26872547346869136, "grad_norm": 1.5094642881280065, "learning_rate": 8.578737040120807e-05, "loss": 0.3313, "step": 3150 }, { "epoch": 0.2688107831428084, "grad_norm": 1.3019105974697402, "learning_rate": 8.577772101280456e-05, "loss": 0.3023, "step": 3151 }, { "epoch": 0.26889609281692545, "grad_norm": 1.5431179312524392, "learning_rate": 8.576806889296216e-05, "loss": 0.3756, "step": 3152 }, { "epoch": 0.26898140249104247, "grad_norm": 1.1551190593175797, "learning_rate": 8.575841404241777e-05, "loss": 0.2902, "step": 3153 }, { "epoch": 0.26906671216515954, "grad_norm": 1.3768153777787038, "learning_rate": 8.57487564619085e-05, "loss": 0.2905, "step": 3154 }, { "epoch": 0.26915202183927656, "grad_norm": 1.5264100638021838, "learning_rate": 8.573909615217163e-05, "loss": 0.2959, "step": 3155 }, { "epoch": 0.26923733151339363, "grad_norm": 1.7123821552131855, "learning_rate": 8.572943311394468e-05, "loss": 0.3287, "step": 3156 }, { "epoch": 0.26932264118751065, "grad_norm": 1.1276937659113369, "learning_rate": 8.57197673479654e-05, "loss": 0.3028, "step": 3157 }, { "epoch": 0.2694079508616277, "grad_norm": 1.3570570201546166, "learning_rate": 8.571009885497168e-05, "loss": 0.3072, "step": 3158 }, { "epoch": 0.26949326053574474, "grad_norm": 1.3137524396617823, "learning_rate": 8.570042763570168e-05, "loss": 0.2637, "step": 3159 }, { "epoch": 0.2695785702098618, "grad_norm": 1.637324692172651, "learning_rate": 8.569075369089374e-05, "loss": 0.4194, "step": 3160 }, { "epoch": 0.26966387988397883, "grad_norm": 1.640879584042099, "learning_rate": 8.568107702128642e-05, "loss": 0.3345, "step": 3161 }, { "epoch": 0.2697491895580959, "grad_norm": 1.4821518965033482, "learning_rate": 8.56713976276185e-05, "loss": 0.312, "step": 3162 }, { "epoch": 0.2698344992322129, "grad_norm": 1.844939155505761, "learning_rate": 8.566171551062889e-05, "loss": 0.3798, "step": 3163 }, { "epoch": 0.26991980890633, "grad_norm": 1.8194832284043798, "learning_rate": 8.565203067105683e-05, "loss": 0.3979, "step": 3164 }, { "epoch": 0.270005118580447, "grad_norm": 1.5768029746934769, "learning_rate": 8.564234310964168e-05, "loss": 0.3326, "step": 3165 }, { "epoch": 0.2700904282545641, "grad_norm": 1.2799261743892094, "learning_rate": 8.563265282712303e-05, "loss": 0.3643, "step": 3166 }, { "epoch": 0.2701757379286811, "grad_norm": 2.05112964395591, "learning_rate": 8.562295982424069e-05, "loss": 0.3452, "step": 3167 }, { "epoch": 0.2702610476027982, "grad_norm": 1.8945278521635123, "learning_rate": 8.561326410173467e-05, "loss": 0.3582, "step": 3168 }, { "epoch": 0.2703463572769152, "grad_norm": 1.5343513222788427, "learning_rate": 8.560356566034518e-05, "loss": 0.3327, "step": 3169 }, { "epoch": 0.27043166695103227, "grad_norm": 1.2093972531816906, "learning_rate": 8.559386450081266e-05, "loss": 0.2776, "step": 3170 }, { "epoch": 0.2705169766251493, "grad_norm": 1.3200251249609831, "learning_rate": 8.558416062387772e-05, "loss": 0.3178, "step": 3171 }, { "epoch": 0.27060228629926636, "grad_norm": 1.2846795679291543, "learning_rate": 8.557445403028122e-05, "loss": 0.33, "step": 3172 }, { "epoch": 0.2706875959733834, "grad_norm": 1.629879813765778, "learning_rate": 8.556474472076419e-05, "loss": 0.3885, "step": 3173 }, { "epoch": 0.27077290564750045, "grad_norm": 1.4548785386398495, "learning_rate": 8.555503269606789e-05, "loss": 0.2985, "step": 3174 }, { "epoch": 0.27085821532161747, "grad_norm": 1.4123131484953475, "learning_rate": 8.55453179569338e-05, "loss": 0.3551, "step": 3175 }, { "epoch": 0.27094352499573454, "grad_norm": 1.2164955317097301, "learning_rate": 8.553560050410354e-05, "loss": 0.353, "step": 3176 }, { "epoch": 0.27102883466985156, "grad_norm": 1.118863109646051, "learning_rate": 8.552588033831905e-05, "loss": 0.2678, "step": 3177 }, { "epoch": 0.27111414434396863, "grad_norm": 1.385781113532844, "learning_rate": 8.551615746032235e-05, "loss": 0.381, "step": 3178 }, { "epoch": 0.27119945401808565, "grad_norm": 1.8962208557906821, "learning_rate": 8.550643187085579e-05, "loss": 0.3426, "step": 3179 }, { "epoch": 0.27128476369220267, "grad_norm": 1.4719504529399194, "learning_rate": 8.549670357066182e-05, "loss": 0.2895, "step": 3180 }, { "epoch": 0.27137007336631974, "grad_norm": 1.413848860814623, "learning_rate": 8.548697256048317e-05, "loss": 0.2934, "step": 3181 }, { "epoch": 0.27145538304043676, "grad_norm": 1.683581571247279, "learning_rate": 8.547723884106274e-05, "loss": 0.3621, "step": 3182 }, { "epoch": 0.27154069271455383, "grad_norm": 1.5979216338888234, "learning_rate": 8.546750241314365e-05, "loss": 0.3156, "step": 3183 }, { "epoch": 0.27162600238867085, "grad_norm": 1.5017289687656301, "learning_rate": 8.545776327746922e-05, "loss": 0.4461, "step": 3184 }, { "epoch": 0.2717113120627879, "grad_norm": 1.617375607077749, "learning_rate": 8.544802143478298e-05, "loss": 0.3693, "step": 3185 }, { "epoch": 0.27179662173690494, "grad_norm": 1.2560446974734165, "learning_rate": 8.543827688582868e-05, "loss": 0.2902, "step": 3186 }, { "epoch": 0.271881931411022, "grad_norm": 1.4167020830701467, "learning_rate": 8.542852963135029e-05, "loss": 0.4055, "step": 3187 }, { "epoch": 0.27196724108513903, "grad_norm": 1.6542238041436705, "learning_rate": 8.541877967209189e-05, "loss": 0.3458, "step": 3188 }, { "epoch": 0.2720525507592561, "grad_norm": 1.4811492402778688, "learning_rate": 8.540902700879789e-05, "loss": 0.3442, "step": 3189 }, { "epoch": 0.2721378604333731, "grad_norm": 1.5041860344961626, "learning_rate": 8.539927164221285e-05, "loss": 0.3677, "step": 3190 }, { "epoch": 0.2722231701074902, "grad_norm": 1.3161097342589196, "learning_rate": 8.538951357308151e-05, "loss": 0.2872, "step": 3191 }, { "epoch": 0.2723084797816072, "grad_norm": 1.5178769086304127, "learning_rate": 8.537975280214889e-05, "loss": 0.3459, "step": 3192 }, { "epoch": 0.2723937894557243, "grad_norm": 1.4573571707360222, "learning_rate": 8.536998933016014e-05, "loss": 0.3812, "step": 3193 }, { "epoch": 0.2724790991298413, "grad_norm": 1.4872678174804583, "learning_rate": 8.536022315786065e-05, "loss": 0.3433, "step": 3194 }, { "epoch": 0.2725644088039584, "grad_norm": 1.4010468758222698, "learning_rate": 8.535045428599604e-05, "loss": 0.34, "step": 3195 }, { "epoch": 0.2726497184780754, "grad_norm": 1.5062027476643154, "learning_rate": 8.53406827153121e-05, "loss": 0.3594, "step": 3196 }, { "epoch": 0.27273502815219247, "grad_norm": 1.4687529218928717, "learning_rate": 8.533090844655482e-05, "loss": 0.295, "step": 3197 }, { "epoch": 0.2728203378263095, "grad_norm": 1.6992135584960206, "learning_rate": 8.532113148047045e-05, "loss": 0.3585, "step": 3198 }, { "epoch": 0.27290564750042656, "grad_norm": 1.3532578083787694, "learning_rate": 8.531135181780537e-05, "loss": 0.3418, "step": 3199 }, { "epoch": 0.2729909571745436, "grad_norm": 1.3757617747603412, "learning_rate": 8.530156945930624e-05, "loss": 0.3423, "step": 3200 }, { "epoch": 0.27307626684866065, "grad_norm": 1.5732192281116824, "learning_rate": 8.529178440571986e-05, "loss": 0.3433, "step": 3201 }, { "epoch": 0.27316157652277767, "grad_norm": 1.6741522850126527, "learning_rate": 8.528199665779328e-05, "loss": 0.38, "step": 3202 }, { "epoch": 0.27324688619689474, "grad_norm": 1.3464859898543406, "learning_rate": 8.527220621627375e-05, "loss": 0.3021, "step": 3203 }, { "epoch": 0.27333219587101176, "grad_norm": 1.4113209318542042, "learning_rate": 8.526241308190871e-05, "loss": 0.3137, "step": 3204 }, { "epoch": 0.27341750554512884, "grad_norm": 1.4601586861791642, "learning_rate": 8.525261725544582e-05, "loss": 0.3742, "step": 3205 }, { "epoch": 0.27350281521924585, "grad_norm": 1.7458513676236165, "learning_rate": 8.524281873763293e-05, "loss": 0.4056, "step": 3206 }, { "epoch": 0.2735881248933629, "grad_norm": 1.7115130566860386, "learning_rate": 8.523301752921811e-05, "loss": 0.3479, "step": 3207 }, { "epoch": 0.27367343456747995, "grad_norm": 1.4368004962841698, "learning_rate": 8.522321363094962e-05, "loss": 0.3289, "step": 3208 }, { "epoch": 0.273758744241597, "grad_norm": 1.3455194754890352, "learning_rate": 8.521340704357597e-05, "loss": 0.3303, "step": 3209 }, { "epoch": 0.27384405391571404, "grad_norm": 1.2842564549000774, "learning_rate": 8.520359776784579e-05, "loss": 0.3009, "step": 3210 }, { "epoch": 0.2739293635898311, "grad_norm": 1.219545935885109, "learning_rate": 8.5193785804508e-05, "loss": 0.3325, "step": 3211 }, { "epoch": 0.2740146732639481, "grad_norm": 1.1311997081052088, "learning_rate": 8.518397115431169e-05, "loss": 0.2972, "step": 3212 }, { "epoch": 0.2740999829380652, "grad_norm": 1.4741020896371484, "learning_rate": 8.517415381800615e-05, "loss": 0.3357, "step": 3213 }, { "epoch": 0.2741852926121822, "grad_norm": 1.758288238573877, "learning_rate": 8.516433379634088e-05, "loss": 0.4123, "step": 3214 }, { "epoch": 0.2742706022862993, "grad_norm": 1.5411729623517385, "learning_rate": 8.515451109006558e-05, "loss": 0.3495, "step": 3215 }, { "epoch": 0.2743559119604163, "grad_norm": 1.5394816336209887, "learning_rate": 8.514468569993017e-05, "loss": 0.3365, "step": 3216 }, { "epoch": 0.2744412216345334, "grad_norm": 1.406423897057721, "learning_rate": 8.513485762668476e-05, "loss": 0.3098, "step": 3217 }, { "epoch": 0.2745265313086504, "grad_norm": 1.257269463689414, "learning_rate": 8.51250268710797e-05, "loss": 0.3044, "step": 3218 }, { "epoch": 0.2746118409827674, "grad_norm": 1.477918456407827, "learning_rate": 8.511519343386547e-05, "loss": 0.3363, "step": 3219 }, { "epoch": 0.2746971506568845, "grad_norm": 1.4509210587998984, "learning_rate": 8.510535731579283e-05, "loss": 0.3551, "step": 3220 }, { "epoch": 0.2747824603310015, "grad_norm": 1.7272292010741404, "learning_rate": 8.50955185176127e-05, "loss": 0.3524, "step": 3221 }, { "epoch": 0.2748677700051186, "grad_norm": 1.4030196492293532, "learning_rate": 8.508567704007627e-05, "loss": 0.3777, "step": 3222 }, { "epoch": 0.2749530796792356, "grad_norm": 1.2421313579036137, "learning_rate": 8.507583288393479e-05, "loss": 0.2874, "step": 3223 }, { "epoch": 0.2750383893533527, "grad_norm": 1.5681339163619845, "learning_rate": 8.50659860499399e-05, "loss": 0.3275, "step": 3224 }, { "epoch": 0.2751236990274697, "grad_norm": 1.416380189465466, "learning_rate": 8.50561365388433e-05, "loss": 0.3016, "step": 3225 }, { "epoch": 0.27520900870158677, "grad_norm": 1.630040347998262, "learning_rate": 8.504628435139696e-05, "loss": 0.3452, "step": 3226 }, { "epoch": 0.2752943183757038, "grad_norm": 1.2900702034529115, "learning_rate": 8.503642948835305e-05, "loss": 0.3213, "step": 3227 }, { "epoch": 0.27537962804982086, "grad_norm": 1.3360982876217569, "learning_rate": 8.502657195046393e-05, "loss": 0.2945, "step": 3228 }, { "epoch": 0.2754649377239379, "grad_norm": 1.3543529040088138, "learning_rate": 8.501671173848217e-05, "loss": 0.3006, "step": 3229 }, { "epoch": 0.27555024739805495, "grad_norm": 1.7045797731817243, "learning_rate": 8.500684885316055e-05, "loss": 0.3483, "step": 3230 }, { "epoch": 0.27563555707217197, "grad_norm": 1.41713410500876, "learning_rate": 8.499698329525205e-05, "loss": 0.3199, "step": 3231 }, { "epoch": 0.27572086674628904, "grad_norm": 1.3588575935052503, "learning_rate": 8.498711506550983e-05, "loss": 0.3174, "step": 3232 }, { "epoch": 0.27580617642040606, "grad_norm": 1.6015048877309197, "learning_rate": 8.497724416468733e-05, "loss": 0.3384, "step": 3233 }, { "epoch": 0.27589148609452313, "grad_norm": 1.3242333785517817, "learning_rate": 8.496737059353809e-05, "loss": 0.3479, "step": 3234 }, { "epoch": 0.27597679576864015, "grad_norm": 1.5941146732565068, "learning_rate": 8.495749435281592e-05, "loss": 0.3803, "step": 3235 }, { "epoch": 0.2760621054427572, "grad_norm": 1.3594307723607364, "learning_rate": 8.49476154432748e-05, "loss": 0.2856, "step": 3236 }, { "epoch": 0.27614741511687424, "grad_norm": 1.7510454597258083, "learning_rate": 8.493773386566899e-05, "loss": 0.4265, "step": 3237 }, { "epoch": 0.2762327247909913, "grad_norm": 1.8905819974690472, "learning_rate": 8.492784962075284e-05, "loss": 0.3643, "step": 3238 }, { "epoch": 0.27631803446510833, "grad_norm": 1.4494697785241955, "learning_rate": 8.491796270928099e-05, "loss": 0.3121, "step": 3239 }, { "epoch": 0.2764033441392254, "grad_norm": 1.5866443580907725, "learning_rate": 8.490807313200822e-05, "loss": 0.3428, "step": 3240 }, { "epoch": 0.2764886538133424, "grad_norm": 1.1746328328070283, "learning_rate": 8.489818088968957e-05, "loss": 0.2844, "step": 3241 }, { "epoch": 0.2765739634874595, "grad_norm": 1.3423149963148238, "learning_rate": 8.488828598308028e-05, "loss": 0.323, "step": 3242 }, { "epoch": 0.2766592731615765, "grad_norm": 1.6295716461228122, "learning_rate": 8.487838841293572e-05, "loss": 0.3705, "step": 3243 }, { "epoch": 0.2767445828356936, "grad_norm": 1.4624881776511176, "learning_rate": 8.486848818001158e-05, "loss": 0.3229, "step": 3244 }, { "epoch": 0.2768298925098106, "grad_norm": 1.3448026659117036, "learning_rate": 8.485858528506363e-05, "loss": 0.3177, "step": 3245 }, { "epoch": 0.2769152021839277, "grad_norm": 1.2824414458859943, "learning_rate": 8.484867972884795e-05, "loss": 0.2893, "step": 3246 }, { "epoch": 0.2770005118580447, "grad_norm": 1.2550780148213752, "learning_rate": 8.483877151212077e-05, "loss": 0.3047, "step": 3247 }, { "epoch": 0.27708582153216177, "grad_norm": 1.3858524249090896, "learning_rate": 8.482886063563849e-05, "loss": 0.3544, "step": 3248 }, { "epoch": 0.2771711312062788, "grad_norm": 1.450294855984901, "learning_rate": 8.481894710015778e-05, "loss": 0.331, "step": 3249 }, { "epoch": 0.27725644088039586, "grad_norm": 1.372456799575954, "learning_rate": 8.48090309064355e-05, "loss": 0.3673, "step": 3250 }, { "epoch": 0.2773417505545129, "grad_norm": 1.2907416302536872, "learning_rate": 8.47991120552287e-05, "loss": 0.2632, "step": 3251 }, { "epoch": 0.27742706022862995, "grad_norm": 1.652896076353333, "learning_rate": 8.47891905472946e-05, "loss": 0.372, "step": 3252 }, { "epoch": 0.27751236990274697, "grad_norm": 1.183749349205217, "learning_rate": 8.477926638339067e-05, "loss": 0.2789, "step": 3253 }, { "epoch": 0.27759767957686404, "grad_norm": 1.234268425312323, "learning_rate": 8.476933956427458e-05, "loss": 0.2522, "step": 3254 }, { "epoch": 0.27768298925098106, "grad_norm": 1.6162967901102516, "learning_rate": 8.475941009070416e-05, "loss": 0.3773, "step": 3255 }, { "epoch": 0.27776829892509813, "grad_norm": 1.5316673021588478, "learning_rate": 8.47494779634375e-05, "loss": 0.3471, "step": 3256 }, { "epoch": 0.27785360859921515, "grad_norm": 1.5197990593350212, "learning_rate": 8.473954318323287e-05, "loss": 0.3384, "step": 3257 }, { "epoch": 0.27793891827333217, "grad_norm": 1.4461922931945335, "learning_rate": 8.47296057508487e-05, "loss": 0.319, "step": 3258 }, { "epoch": 0.27802422794744924, "grad_norm": 1.4810689952370024, "learning_rate": 8.471966566704369e-05, "loss": 0.3055, "step": 3259 }, { "epoch": 0.27810953762156626, "grad_norm": 1.5057654048429978, "learning_rate": 8.470972293257671e-05, "loss": 0.3238, "step": 3260 }, { "epoch": 0.27819484729568333, "grad_norm": 1.5801303322068891, "learning_rate": 8.469977754820683e-05, "loss": 0.3649, "step": 3261 }, { "epoch": 0.27828015696980035, "grad_norm": 1.276841557793486, "learning_rate": 8.468982951469333e-05, "loss": 0.3204, "step": 3262 }, { "epoch": 0.2783654666439174, "grad_norm": 1.6153413859371455, "learning_rate": 8.467987883279569e-05, "loss": 0.3447, "step": 3263 }, { "epoch": 0.27845077631803444, "grad_norm": 1.7843583408520036, "learning_rate": 8.46699255032736e-05, "loss": 0.3073, "step": 3264 }, { "epoch": 0.2785360859921515, "grad_norm": 1.529667016614047, "learning_rate": 8.46599695268869e-05, "loss": 0.3074, "step": 3265 }, { "epoch": 0.27862139566626853, "grad_norm": 1.2316795084072638, "learning_rate": 8.465001090439575e-05, "loss": 0.3052, "step": 3266 }, { "epoch": 0.2787067053403856, "grad_norm": 1.860755087674725, "learning_rate": 8.464004963656037e-05, "loss": 0.3576, "step": 3267 }, { "epoch": 0.2787920150145026, "grad_norm": 1.6383556838401239, "learning_rate": 8.463008572414128e-05, "loss": 0.3836, "step": 3268 }, { "epoch": 0.2788773246886197, "grad_norm": 1.4133854715309067, "learning_rate": 8.462011916789918e-05, "loss": 0.2895, "step": 3269 }, { "epoch": 0.2789626343627367, "grad_norm": 1.7390126264880421, "learning_rate": 8.461014996859495e-05, "loss": 0.3397, "step": 3270 }, { "epoch": 0.2790479440368538, "grad_norm": 1.7094982486346575, "learning_rate": 8.460017812698968e-05, "loss": 0.3826, "step": 3271 }, { "epoch": 0.2791332537109708, "grad_norm": 1.4436477558873608, "learning_rate": 8.45902036438447e-05, "loss": 0.3542, "step": 3272 }, { "epoch": 0.2792185633850879, "grad_norm": 1.2943433386938028, "learning_rate": 8.458022651992145e-05, "loss": 0.3137, "step": 3273 }, { "epoch": 0.2793038730592049, "grad_norm": 1.5120262602084855, "learning_rate": 8.457024675598168e-05, "loss": 0.3704, "step": 3274 }, { "epoch": 0.279389182733322, "grad_norm": 1.3155534738411057, "learning_rate": 8.456026435278728e-05, "loss": 0.3619, "step": 3275 }, { "epoch": 0.279474492407439, "grad_norm": 1.4975780962161398, "learning_rate": 8.455027931110034e-05, "loss": 0.4511, "step": 3276 }, { "epoch": 0.27955980208155606, "grad_norm": 1.2498883674365329, "learning_rate": 8.454029163168317e-05, "loss": 0.292, "step": 3277 }, { "epoch": 0.2796451117556731, "grad_norm": 1.281353457856169, "learning_rate": 8.45303013152983e-05, "loss": 0.2644, "step": 3278 }, { "epoch": 0.27973042142979015, "grad_norm": 1.9394641272779254, "learning_rate": 8.452030836270841e-05, "loss": 0.3641, "step": 3279 }, { "epoch": 0.2798157311039072, "grad_norm": 1.4612850652946614, "learning_rate": 8.451031277467641e-05, "loss": 0.2985, "step": 3280 }, { "epoch": 0.27990104077802425, "grad_norm": 1.6980550354664306, "learning_rate": 8.450031455196543e-05, "loss": 0.3462, "step": 3281 }, { "epoch": 0.27998635045214126, "grad_norm": 1.6438388408831757, "learning_rate": 8.449031369533876e-05, "loss": 0.3658, "step": 3282 }, { "epoch": 0.28007166012625834, "grad_norm": 1.1406117791232666, "learning_rate": 8.448031020555993e-05, "loss": 0.3098, "step": 3283 }, { "epoch": 0.28015696980037536, "grad_norm": 1.6280771610897802, "learning_rate": 8.447030408339263e-05, "loss": 0.3224, "step": 3284 }, { "epoch": 0.28024227947449243, "grad_norm": 1.3737965866256385, "learning_rate": 8.446029532960081e-05, "loss": 0.3044, "step": 3285 }, { "epoch": 0.28032758914860945, "grad_norm": 1.2651799679323588, "learning_rate": 8.445028394494853e-05, "loss": 0.3031, "step": 3286 }, { "epoch": 0.2804128988227265, "grad_norm": 1.4279276538281835, "learning_rate": 8.444026993020017e-05, "loss": 0.3407, "step": 3287 }, { "epoch": 0.28049820849684354, "grad_norm": 1.466022068398818, "learning_rate": 8.44302532861202e-05, "loss": 0.3466, "step": 3288 }, { "epoch": 0.2805835181709606, "grad_norm": 1.547630664935542, "learning_rate": 8.442023401347336e-05, "loss": 0.3853, "step": 3289 }, { "epoch": 0.28066882784507763, "grad_norm": 1.4845989660652854, "learning_rate": 8.441021211302456e-05, "loss": 0.3227, "step": 3290 }, { "epoch": 0.2807541375191947, "grad_norm": 1.3731081258425433, "learning_rate": 8.440018758553892e-05, "loss": 0.297, "step": 3291 }, { "epoch": 0.2808394471933117, "grad_norm": 1.3542310797214892, "learning_rate": 8.439016043178176e-05, "loss": 0.2916, "step": 3292 }, { "epoch": 0.2809247568674288, "grad_norm": 1.579044187655882, "learning_rate": 8.438013065251859e-05, "loss": 0.3447, "step": 3293 }, { "epoch": 0.2810100665415458, "grad_norm": 1.4147386146379841, "learning_rate": 8.437009824851517e-05, "loss": 0.283, "step": 3294 }, { "epoch": 0.28109537621566283, "grad_norm": 1.5982873691371215, "learning_rate": 8.436006322053737e-05, "loss": 0.332, "step": 3295 }, { "epoch": 0.2811806858897799, "grad_norm": 1.5977768980700873, "learning_rate": 8.435002556935131e-05, "loss": 0.366, "step": 3296 }, { "epoch": 0.2812659955638969, "grad_norm": 1.2661792812953432, "learning_rate": 8.433998529572338e-05, "loss": 0.3315, "step": 3297 }, { "epoch": 0.281351305238014, "grad_norm": 1.538122906921773, "learning_rate": 8.432994240042003e-05, "loss": 0.3527, "step": 3298 }, { "epoch": 0.281436614912131, "grad_norm": 1.4380478022486332, "learning_rate": 8.4319896884208e-05, "loss": 0.3252, "step": 3299 }, { "epoch": 0.2815219245862481, "grad_norm": 1.6481618063111143, "learning_rate": 8.430984874785423e-05, "loss": 0.3159, "step": 3300 }, { "epoch": 0.2816072342603651, "grad_norm": 1.6161662392327385, "learning_rate": 8.429979799212584e-05, "loss": 0.3148, "step": 3301 }, { "epoch": 0.2816925439344822, "grad_norm": 1.3649746234337483, "learning_rate": 8.428974461779014e-05, "loss": 0.3328, "step": 3302 }, { "epoch": 0.2817778536085992, "grad_norm": 1.5663459747209152, "learning_rate": 8.427968862561464e-05, "loss": 0.3795, "step": 3303 }, { "epoch": 0.28186316328271627, "grad_norm": 1.4929019356094069, "learning_rate": 8.426963001636707e-05, "loss": 0.3216, "step": 3304 }, { "epoch": 0.2819484729568333, "grad_norm": 1.4736413873280179, "learning_rate": 8.425956879081539e-05, "loss": 0.3554, "step": 3305 }, { "epoch": 0.28203378263095036, "grad_norm": 1.5627398497551328, "learning_rate": 8.424950494972766e-05, "loss": 0.3035, "step": 3306 }, { "epoch": 0.2821190923050674, "grad_norm": 1.2831018602428044, "learning_rate": 8.423943849387223e-05, "loss": 0.2818, "step": 3307 }, { "epoch": 0.28220440197918445, "grad_norm": 1.2368266229098361, "learning_rate": 8.422936942401762e-05, "loss": 0.2578, "step": 3308 }, { "epoch": 0.28228971165330147, "grad_norm": 1.3644264461028859, "learning_rate": 8.421929774093255e-05, "loss": 0.2559, "step": 3309 }, { "epoch": 0.28237502132741854, "grad_norm": 1.803418526649647, "learning_rate": 8.420922344538594e-05, "loss": 0.3125, "step": 3310 }, { "epoch": 0.28246033100153556, "grad_norm": 1.678787558129074, "learning_rate": 8.419914653814692e-05, "loss": 0.3005, "step": 3311 }, { "epoch": 0.28254564067565263, "grad_norm": 1.297847888444762, "learning_rate": 8.418906701998477e-05, "loss": 0.304, "step": 3312 }, { "epoch": 0.28263095034976965, "grad_norm": 1.37536256518312, "learning_rate": 8.417898489166905e-05, "loss": 0.3339, "step": 3313 }, { "epoch": 0.2827162600238867, "grad_norm": 1.3613046564736404, "learning_rate": 8.416890015396947e-05, "loss": 0.3115, "step": 3314 }, { "epoch": 0.28280156969800374, "grad_norm": 1.7593075646169247, "learning_rate": 8.415881280765591e-05, "loss": 0.3745, "step": 3315 }, { "epoch": 0.2828868793721208, "grad_norm": 1.4805887218972809, "learning_rate": 8.414872285349854e-05, "loss": 0.3282, "step": 3316 }, { "epoch": 0.28297218904623783, "grad_norm": 1.3195118931490555, "learning_rate": 8.413863029226762e-05, "loss": 0.3408, "step": 3317 }, { "epoch": 0.2830574987203549, "grad_norm": 1.5606287623227577, "learning_rate": 8.412853512473371e-05, "loss": 0.3294, "step": 3318 }, { "epoch": 0.2831428083944719, "grad_norm": 1.6637385953058046, "learning_rate": 8.41184373516675e-05, "loss": 0.3563, "step": 3319 }, { "epoch": 0.283228118068589, "grad_norm": 1.3956017752874732, "learning_rate": 8.41083369738399e-05, "loss": 0.3624, "step": 3320 }, { "epoch": 0.283313427742706, "grad_norm": 1.4039791846077232, "learning_rate": 8.409823399202203e-05, "loss": 0.3498, "step": 3321 }, { "epoch": 0.2833987374168231, "grad_norm": 1.7132339742376632, "learning_rate": 8.408812840698517e-05, "loss": 0.3058, "step": 3322 }, { "epoch": 0.2834840470909401, "grad_norm": 1.2873193688207303, "learning_rate": 8.407802021950087e-05, "loss": 0.2934, "step": 3323 }, { "epoch": 0.2835693567650572, "grad_norm": 1.5045713068860787, "learning_rate": 8.406790943034081e-05, "loss": 0.3401, "step": 3324 }, { "epoch": 0.2836546664391742, "grad_norm": 1.2864389714110895, "learning_rate": 8.405779604027691e-05, "loss": 0.3057, "step": 3325 }, { "epoch": 0.28373997611329127, "grad_norm": 1.3520312957075935, "learning_rate": 8.404768005008126e-05, "loss": 0.3271, "step": 3326 }, { "epoch": 0.2838252857874083, "grad_norm": 1.3726190413250257, "learning_rate": 8.403756146052617e-05, "loss": 0.3193, "step": 3327 }, { "epoch": 0.28391059546152536, "grad_norm": 1.6008759306297835, "learning_rate": 8.402744027238413e-05, "loss": 0.3256, "step": 3328 }, { "epoch": 0.2839959051356424, "grad_norm": 1.2991978169381775, "learning_rate": 8.401731648642785e-05, "loss": 0.2983, "step": 3329 }, { "epoch": 0.28408121480975945, "grad_norm": 1.6321092761330358, "learning_rate": 8.400719010343023e-05, "loss": 0.3341, "step": 3330 }, { "epoch": 0.28416652448387647, "grad_norm": 1.5683054077440493, "learning_rate": 8.399706112416434e-05, "loss": 0.3179, "step": 3331 }, { "epoch": 0.28425183415799354, "grad_norm": 1.5994376624869255, "learning_rate": 8.398692954940352e-05, "loss": 0.3695, "step": 3332 }, { "epoch": 0.28433714383211056, "grad_norm": 1.7691438431898998, "learning_rate": 8.397679537992122e-05, "loss": 0.3609, "step": 3333 }, { "epoch": 0.2844224535062276, "grad_norm": 1.616621720125096, "learning_rate": 8.396665861649115e-05, "loss": 0.3625, "step": 3334 }, { "epoch": 0.28450776318034465, "grad_norm": 1.3824351167543172, "learning_rate": 8.395651925988718e-05, "loss": 0.3545, "step": 3335 }, { "epoch": 0.28459307285446167, "grad_norm": 1.6542703565613621, "learning_rate": 8.394637731088344e-05, "loss": 0.3802, "step": 3336 }, { "epoch": 0.28467838252857874, "grad_norm": 1.6244279147868474, "learning_rate": 8.393623277025415e-05, "loss": 0.3207, "step": 3337 }, { "epoch": 0.28476369220269576, "grad_norm": 1.466107202521244, "learning_rate": 8.392608563877385e-05, "loss": 0.3175, "step": 3338 }, { "epoch": 0.28484900187681284, "grad_norm": 1.2220166273062838, "learning_rate": 8.391593591721718e-05, "loss": 0.2735, "step": 3339 }, { "epoch": 0.28493431155092985, "grad_norm": 1.3378855461525405, "learning_rate": 8.390578360635903e-05, "loss": 0.3411, "step": 3340 }, { "epoch": 0.2850196212250469, "grad_norm": 1.7236007211955897, "learning_rate": 8.38956287069745e-05, "loss": 0.3626, "step": 3341 }, { "epoch": 0.28510493089916394, "grad_norm": 1.4867577151285487, "learning_rate": 8.388547121983881e-05, "loss": 0.3431, "step": 3342 }, { "epoch": 0.285190240573281, "grad_norm": 1.4336379873326266, "learning_rate": 8.387531114572746e-05, "loss": 0.3678, "step": 3343 }, { "epoch": 0.28527555024739804, "grad_norm": 1.2235843925912289, "learning_rate": 8.386514848541614e-05, "loss": 0.3722, "step": 3344 }, { "epoch": 0.2853608599215151, "grad_norm": 1.684567340350639, "learning_rate": 8.385498323968069e-05, "loss": 0.3312, "step": 3345 }, { "epoch": 0.2854461695956321, "grad_norm": 1.52112485974302, "learning_rate": 8.384481540929715e-05, "loss": 0.3927, "step": 3346 }, { "epoch": 0.2855314792697492, "grad_norm": 1.600803846848578, "learning_rate": 8.383464499504183e-05, "loss": 0.3412, "step": 3347 }, { "epoch": 0.2856167889438662, "grad_norm": 1.5036568571117577, "learning_rate": 8.382447199769115e-05, "loss": 0.3219, "step": 3348 }, { "epoch": 0.2857020986179833, "grad_norm": 1.3092643272223479, "learning_rate": 8.381429641802177e-05, "loss": 0.3216, "step": 3349 }, { "epoch": 0.2857874082921003, "grad_norm": 1.3778779515557773, "learning_rate": 8.380411825681057e-05, "loss": 0.3375, "step": 3350 }, { "epoch": 0.2858727179662174, "grad_norm": 1.2699615218874094, "learning_rate": 8.379393751483455e-05, "loss": 0.2839, "step": 3351 }, { "epoch": 0.2859580276403344, "grad_norm": 1.4890123709119276, "learning_rate": 8.378375419287099e-05, "loss": 0.2948, "step": 3352 }, { "epoch": 0.2860433373144515, "grad_norm": 1.1685690469447738, "learning_rate": 8.377356829169734e-05, "loss": 0.3137, "step": 3353 }, { "epoch": 0.2861286469885685, "grad_norm": 1.5151454520826695, "learning_rate": 8.376337981209119e-05, "loss": 0.3137, "step": 3354 }, { "epoch": 0.28621395666268556, "grad_norm": 1.4712546886022593, "learning_rate": 8.375318875483045e-05, "loss": 0.3403, "step": 3355 }, { "epoch": 0.2862992663368026, "grad_norm": 1.4798004332872652, "learning_rate": 8.374299512069308e-05, "loss": 0.3558, "step": 3356 }, { "epoch": 0.28638457601091966, "grad_norm": 1.586625424471493, "learning_rate": 8.373279891045735e-05, "loss": 0.3342, "step": 3357 }, { "epoch": 0.2864698856850367, "grad_norm": 1.3431074691061493, "learning_rate": 8.37226001249017e-05, "loss": 0.3395, "step": 3358 }, { "epoch": 0.28655519535915375, "grad_norm": 1.7326124015555378, "learning_rate": 8.37123987648047e-05, "loss": 0.3645, "step": 3359 }, { "epoch": 0.28664050503327076, "grad_norm": 1.251188476147474, "learning_rate": 8.370219483094523e-05, "loss": 0.3375, "step": 3360 }, { "epoch": 0.28672581470738784, "grad_norm": 1.6836985402785538, "learning_rate": 8.369198832410227e-05, "loss": 0.3668, "step": 3361 }, { "epoch": 0.28681112438150486, "grad_norm": 1.2115508464324602, "learning_rate": 8.368177924505504e-05, "loss": 0.2959, "step": 3362 }, { "epoch": 0.28689643405562193, "grad_norm": 1.4138244934423247, "learning_rate": 8.367156759458294e-05, "loss": 0.3825, "step": 3363 }, { "epoch": 0.28698174372973895, "grad_norm": 1.413501101169218, "learning_rate": 8.366135337346559e-05, "loss": 0.3164, "step": 3364 }, { "epoch": 0.287067053403856, "grad_norm": 1.3896751159980854, "learning_rate": 8.365113658248278e-05, "loss": 0.3126, "step": 3365 }, { "epoch": 0.28715236307797304, "grad_norm": 1.3444054357966426, "learning_rate": 8.364091722241454e-05, "loss": 0.2941, "step": 3366 }, { "epoch": 0.2872376727520901, "grad_norm": 1.161540223842728, "learning_rate": 8.363069529404102e-05, "loss": 0.2831, "step": 3367 }, { "epoch": 0.28732298242620713, "grad_norm": 1.6164624344071115, "learning_rate": 8.362047079814262e-05, "loss": 0.3737, "step": 3368 }, { "epoch": 0.2874082921003242, "grad_norm": 1.2418218110380703, "learning_rate": 8.361024373549994e-05, "loss": 0.3186, "step": 3369 }, { "epoch": 0.2874936017744412, "grad_norm": 1.295068608826808, "learning_rate": 8.360001410689375e-05, "loss": 0.2692, "step": 3370 }, { "epoch": 0.2875789114485583, "grad_norm": 1.5378989841736879, "learning_rate": 8.358978191310505e-05, "loss": 0.3439, "step": 3371 }, { "epoch": 0.2876642211226753, "grad_norm": 1.5617989302447974, "learning_rate": 8.357954715491498e-05, "loss": 0.3198, "step": 3372 }, { "epoch": 0.28774953079679233, "grad_norm": 1.442891913714002, "learning_rate": 8.356930983310493e-05, "loss": 0.272, "step": 3373 }, { "epoch": 0.2878348404709094, "grad_norm": 1.4578643135438607, "learning_rate": 8.355906994845646e-05, "loss": 0.3367, "step": 3374 }, { "epoch": 0.2879201501450264, "grad_norm": 1.7245004345192918, "learning_rate": 8.354882750175133e-05, "loss": 0.3136, "step": 3375 }, { "epoch": 0.2880054598191435, "grad_norm": 1.4434569949121756, "learning_rate": 8.35385824937715e-05, "loss": 0.3484, "step": 3376 }, { "epoch": 0.2880907694932605, "grad_norm": 1.5121054930897035, "learning_rate": 8.352833492529914e-05, "loss": 0.3068, "step": 3377 }, { "epoch": 0.2881760791673776, "grad_norm": 1.3624304709936323, "learning_rate": 8.351808479711656e-05, "loss": 0.3296, "step": 3378 }, { "epoch": 0.2882613888414946, "grad_norm": 1.4692005521545286, "learning_rate": 8.350783211000632e-05, "loss": 0.3375, "step": 3379 }, { "epoch": 0.2883466985156117, "grad_norm": 1.4480846316753675, "learning_rate": 8.349757686475116e-05, "loss": 0.2853, "step": 3380 }, { "epoch": 0.2884320081897287, "grad_norm": 1.2944894474258046, "learning_rate": 8.348731906213402e-05, "loss": 0.2837, "step": 3381 }, { "epoch": 0.28851731786384577, "grad_norm": 1.3977743100007232, "learning_rate": 8.3477058702938e-05, "loss": 0.3569, "step": 3382 }, { "epoch": 0.2886026275379628, "grad_norm": 1.7837500177502381, "learning_rate": 8.346679578794647e-05, "loss": 0.2945, "step": 3383 }, { "epoch": 0.28868793721207986, "grad_norm": 1.5521486341150623, "learning_rate": 8.345653031794292e-05, "loss": 0.3451, "step": 3384 }, { "epoch": 0.2887732468861969, "grad_norm": 1.3642925894256581, "learning_rate": 8.344626229371107e-05, "loss": 0.332, "step": 3385 }, { "epoch": 0.28885855656031395, "grad_norm": 1.2056193071688754, "learning_rate": 8.34359917160348e-05, "loss": 0.2973, "step": 3386 }, { "epoch": 0.28894386623443097, "grad_norm": 1.5309818578002727, "learning_rate": 8.342571858569826e-05, "loss": 0.3312, "step": 3387 }, { "epoch": 0.28902917590854804, "grad_norm": 1.5877089355424894, "learning_rate": 8.341544290348572e-05, "loss": 0.3654, "step": 3388 }, { "epoch": 0.28911448558266506, "grad_norm": 1.2784254061495903, "learning_rate": 8.340516467018171e-05, "loss": 0.3216, "step": 3389 }, { "epoch": 0.28919979525678213, "grad_norm": 1.2509523578479016, "learning_rate": 8.339488388657089e-05, "loss": 0.3107, "step": 3390 }, { "epoch": 0.28928510493089915, "grad_norm": 1.2841026370064597, "learning_rate": 8.338460055343812e-05, "loss": 0.3082, "step": 3391 }, { "epoch": 0.2893704146050162, "grad_norm": 1.6480591687858084, "learning_rate": 8.337431467156851e-05, "loss": 0.3823, "step": 3392 }, { "epoch": 0.28945572427913324, "grad_norm": 1.721390793526585, "learning_rate": 8.336402624174734e-05, "loss": 0.3525, "step": 3393 }, { "epoch": 0.2895410339532503, "grad_norm": 1.3593824978325686, "learning_rate": 8.335373526476005e-05, "loss": 0.3284, "step": 3394 }, { "epoch": 0.28962634362736733, "grad_norm": 1.4867856177538812, "learning_rate": 8.334344174139233e-05, "loss": 0.3189, "step": 3395 }, { "epoch": 0.2897116533014844, "grad_norm": 1.4039438623687421, "learning_rate": 8.333314567243e-05, "loss": 0.3981, "step": 3396 }, { "epoch": 0.2897969629756014, "grad_norm": 1.490273251407635, "learning_rate": 8.332284705865914e-05, "loss": 0.2939, "step": 3397 }, { "epoch": 0.2898822726497185, "grad_norm": 1.677458095339819, "learning_rate": 8.331254590086597e-05, "loss": 0.3516, "step": 3398 }, { "epoch": 0.2899675823238355, "grad_norm": 1.4311995718122155, "learning_rate": 8.330224219983695e-05, "loss": 0.2987, "step": 3399 }, { "epoch": 0.2900528919979526, "grad_norm": 1.533300000671534, "learning_rate": 8.329193595635872e-05, "loss": 0.3707, "step": 3400 }, { "epoch": 0.2901382016720696, "grad_norm": 1.5251504511176304, "learning_rate": 8.32816271712181e-05, "loss": 0.3521, "step": 3401 }, { "epoch": 0.2902235113461867, "grad_norm": 1.4859472681600923, "learning_rate": 8.327131584520207e-05, "loss": 0.324, "step": 3402 }, { "epoch": 0.2903088210203037, "grad_norm": 1.363403314847807, "learning_rate": 8.32610019790979e-05, "loss": 0.3194, "step": 3403 }, { "epoch": 0.29039413069442077, "grad_norm": 1.4413873107823754, "learning_rate": 8.325068557369298e-05, "loss": 0.3487, "step": 3404 }, { "epoch": 0.2904794403685378, "grad_norm": 1.6214548467136018, "learning_rate": 8.32403666297749e-05, "loss": 0.2945, "step": 3405 }, { "epoch": 0.29056475004265486, "grad_norm": 1.3742030174825177, "learning_rate": 8.323004514813148e-05, "loss": 0.3227, "step": 3406 }, { "epoch": 0.2906500597167719, "grad_norm": 1.3900560222286302, "learning_rate": 8.321972112955068e-05, "loss": 0.2836, "step": 3407 }, { "epoch": 0.29073536939088895, "grad_norm": 1.4362718892234714, "learning_rate": 8.320939457482072e-05, "loss": 0.2883, "step": 3408 }, { "epoch": 0.29082067906500597, "grad_norm": 1.2848021416535824, "learning_rate": 8.319906548472993e-05, "loss": 0.2889, "step": 3409 }, { "epoch": 0.290905988739123, "grad_norm": 1.5149369398533319, "learning_rate": 8.318873386006693e-05, "loss": 0.352, "step": 3410 }, { "epoch": 0.29099129841324006, "grad_norm": 1.7339133945691558, "learning_rate": 8.317839970162047e-05, "loss": 0.3418, "step": 3411 }, { "epoch": 0.2910766080873571, "grad_norm": 0.9681992965175608, "learning_rate": 8.31680630101795e-05, "loss": 0.2801, "step": 3412 }, { "epoch": 0.29116191776147415, "grad_norm": 1.4678988323848137, "learning_rate": 8.315772378653317e-05, "loss": 0.3332, "step": 3413 }, { "epoch": 0.29124722743559117, "grad_norm": 1.5605346526430002, "learning_rate": 8.314738203147084e-05, "loss": 0.3591, "step": 3414 }, { "epoch": 0.29133253710970825, "grad_norm": 1.726231193688644, "learning_rate": 8.3137037745782e-05, "loss": 0.345, "step": 3415 }, { "epoch": 0.29141784678382526, "grad_norm": 1.2490377059968574, "learning_rate": 8.312669093025645e-05, "loss": 0.302, "step": 3416 }, { "epoch": 0.29150315645794234, "grad_norm": 1.4277102446180319, "learning_rate": 8.311634158568408e-05, "loss": 0.316, "step": 3417 }, { "epoch": 0.29158846613205935, "grad_norm": 1.351680992974035, "learning_rate": 8.3105989712855e-05, "loss": 0.2838, "step": 3418 }, { "epoch": 0.2916737758061764, "grad_norm": 1.52779902626443, "learning_rate": 8.309563531255955e-05, "loss": 0.4026, "step": 3419 }, { "epoch": 0.29175908548029345, "grad_norm": 1.2502719106571611, "learning_rate": 8.308527838558819e-05, "loss": 0.3148, "step": 3420 }, { "epoch": 0.2918443951544105, "grad_norm": 1.2251514652578621, "learning_rate": 8.307491893273165e-05, "loss": 0.3011, "step": 3421 }, { "epoch": 0.29192970482852754, "grad_norm": 1.8325679944422664, "learning_rate": 8.306455695478081e-05, "loss": 0.3796, "step": 3422 }, { "epoch": 0.2920150145026446, "grad_norm": 1.6559799262041828, "learning_rate": 8.305419245252676e-05, "loss": 0.3598, "step": 3423 }, { "epoch": 0.2921003241767616, "grad_norm": 1.4069419430058494, "learning_rate": 8.304382542676075e-05, "loss": 0.3313, "step": 3424 }, { "epoch": 0.2921856338508787, "grad_norm": 1.4625280361259636, "learning_rate": 8.303345587827427e-05, "loss": 0.3359, "step": 3425 }, { "epoch": 0.2922709435249957, "grad_norm": 1.373917326798478, "learning_rate": 8.302308380785898e-05, "loss": 0.2769, "step": 3426 }, { "epoch": 0.2923562531991128, "grad_norm": 1.9757294840760555, "learning_rate": 8.301270921630673e-05, "loss": 0.3485, "step": 3427 }, { "epoch": 0.2924415628732298, "grad_norm": 1.148250227210755, "learning_rate": 8.300233210440954e-05, "loss": 0.3037, "step": 3428 }, { "epoch": 0.2925268725473469, "grad_norm": 1.440160611123179, "learning_rate": 8.299195247295968e-05, "loss": 0.2948, "step": 3429 }, { "epoch": 0.2926121822214639, "grad_norm": 1.664592103453614, "learning_rate": 8.298157032274957e-05, "loss": 0.3025, "step": 3430 }, { "epoch": 0.292697491895581, "grad_norm": 1.3610333379380388, "learning_rate": 8.297118565457182e-05, "loss": 0.3207, "step": 3431 }, { "epoch": 0.292782801569698, "grad_norm": 1.4490486312508564, "learning_rate": 8.296079846921927e-05, "loss": 0.3186, "step": 3432 }, { "epoch": 0.29286811124381507, "grad_norm": 1.5323132307487544, "learning_rate": 8.295040876748489e-05, "loss": 0.3311, "step": 3433 }, { "epoch": 0.2929534209179321, "grad_norm": 1.3691165067586395, "learning_rate": 8.294001655016192e-05, "loss": 0.3053, "step": 3434 }, { "epoch": 0.29303873059204916, "grad_norm": 1.444070478267461, "learning_rate": 8.292962181804372e-05, "loss": 0.3009, "step": 3435 }, { "epoch": 0.2931240402661662, "grad_norm": 1.4773930227211731, "learning_rate": 8.291922457192387e-05, "loss": 0.319, "step": 3436 }, { "epoch": 0.29320934994028325, "grad_norm": 1.2958170240710825, "learning_rate": 8.290882481259618e-05, "loss": 0.3656, "step": 3437 }, { "epoch": 0.29329465961440027, "grad_norm": 1.5111941035860943, "learning_rate": 8.289842254085458e-05, "loss": 0.3429, "step": 3438 }, { "epoch": 0.29337996928851734, "grad_norm": 1.360203063264417, "learning_rate": 8.288801775749323e-05, "loss": 0.3142, "step": 3439 }, { "epoch": 0.29346527896263436, "grad_norm": 1.6028945965491568, "learning_rate": 8.28776104633065e-05, "loss": 0.356, "step": 3440 }, { "epoch": 0.29355058863675143, "grad_norm": 1.450765765985163, "learning_rate": 8.286720065908893e-05, "loss": 0.3469, "step": 3441 }, { "epoch": 0.29363589831086845, "grad_norm": 1.4420325926537712, "learning_rate": 8.285678834563524e-05, "loss": 0.317, "step": 3442 }, { "epoch": 0.2937212079849855, "grad_norm": 1.6868457938934722, "learning_rate": 8.284637352374037e-05, "loss": 0.3305, "step": 3443 }, { "epoch": 0.29380651765910254, "grad_norm": 1.8434097493332207, "learning_rate": 8.283595619419941e-05, "loss": 0.3622, "step": 3444 }, { "epoch": 0.2938918273332196, "grad_norm": 1.5724568315897054, "learning_rate": 8.28255363578077e-05, "loss": 0.3252, "step": 3445 }, { "epoch": 0.29397713700733663, "grad_norm": 1.5780334446098185, "learning_rate": 8.281511401536071e-05, "loss": 0.3603, "step": 3446 }, { "epoch": 0.2940624466814537, "grad_norm": 1.2046962238000847, "learning_rate": 8.280468916765415e-05, "loss": 0.2795, "step": 3447 }, { "epoch": 0.2941477563555707, "grad_norm": 1.6318954009494275, "learning_rate": 8.27942618154839e-05, "loss": 0.3597, "step": 3448 }, { "epoch": 0.29423306602968774, "grad_norm": 1.1910937290224175, "learning_rate": 8.278383195964601e-05, "loss": 0.2819, "step": 3449 }, { "epoch": 0.2943183757038048, "grad_norm": 1.4413839198918188, "learning_rate": 8.27733996009368e-05, "loss": 0.3331, "step": 3450 }, { "epoch": 0.29440368537792183, "grad_norm": 1.571867670910473, "learning_rate": 8.276296474015266e-05, "loss": 0.3538, "step": 3451 }, { "epoch": 0.2944889950520389, "grad_norm": 1.568930022104512, "learning_rate": 8.275252737809028e-05, "loss": 0.3814, "step": 3452 }, { "epoch": 0.2945743047261559, "grad_norm": 1.3237730193185386, "learning_rate": 8.274208751554646e-05, "loss": 0.3349, "step": 3453 }, { "epoch": 0.294659614400273, "grad_norm": 1.5378275917706472, "learning_rate": 8.273164515331826e-05, "loss": 0.344, "step": 3454 }, { "epoch": 0.29474492407439, "grad_norm": 1.4334352494016933, "learning_rate": 8.272120029220289e-05, "loss": 0.3028, "step": 3455 }, { "epoch": 0.2948302337485071, "grad_norm": 1.4441953723488363, "learning_rate": 8.271075293299777e-05, "loss": 0.3171, "step": 3456 }, { "epoch": 0.2949155434226241, "grad_norm": 1.4836027847183682, "learning_rate": 8.270030307650048e-05, "loss": 0.2913, "step": 3457 }, { "epoch": 0.2950008530967412, "grad_norm": 1.303984309763213, "learning_rate": 8.268985072350882e-05, "loss": 0.3008, "step": 3458 }, { "epoch": 0.2950861627708582, "grad_norm": 1.4594149075591922, "learning_rate": 8.267939587482077e-05, "loss": 0.3156, "step": 3459 }, { "epoch": 0.29517147244497527, "grad_norm": 1.6561400179064367, "learning_rate": 8.266893853123447e-05, "loss": 0.3401, "step": 3460 }, { "epoch": 0.2952567821190923, "grad_norm": 1.381332726396209, "learning_rate": 8.265847869354836e-05, "loss": 0.3072, "step": 3461 }, { "epoch": 0.29534209179320936, "grad_norm": 1.3960513850840974, "learning_rate": 8.264801636256094e-05, "loss": 0.2893, "step": 3462 }, { "epoch": 0.2954274014673264, "grad_norm": 1.429848083482857, "learning_rate": 8.263755153907095e-05, "loss": 0.3318, "step": 3463 }, { "epoch": 0.29551271114144345, "grad_norm": 1.4741545727400653, "learning_rate": 8.262708422387735e-05, "loss": 0.2935, "step": 3464 }, { "epoch": 0.29559802081556047, "grad_norm": 1.317463617130108, "learning_rate": 8.261661441777924e-05, "loss": 0.302, "step": 3465 }, { "epoch": 0.29568333048967754, "grad_norm": 1.463816941552411, "learning_rate": 8.260614212157593e-05, "loss": 0.3486, "step": 3466 }, { "epoch": 0.29576864016379456, "grad_norm": 1.3410305627957078, "learning_rate": 8.259566733606696e-05, "loss": 0.3385, "step": 3467 }, { "epoch": 0.29585394983791163, "grad_norm": 1.5092849262285268, "learning_rate": 8.2585190062052e-05, "loss": 0.2836, "step": 3468 }, { "epoch": 0.29593925951202865, "grad_norm": 1.6132805849678942, "learning_rate": 8.257471030033092e-05, "loss": 0.3442, "step": 3469 }, { "epoch": 0.2960245691861457, "grad_norm": 1.4804370242622267, "learning_rate": 8.256422805170383e-05, "loss": 0.3526, "step": 3470 }, { "epoch": 0.29610987886026274, "grad_norm": 1.3551549190909238, "learning_rate": 8.255374331697097e-05, "loss": 0.3233, "step": 3471 }, { "epoch": 0.2961951885343798, "grad_norm": 1.7387602071236237, "learning_rate": 8.25432560969328e-05, "loss": 0.3514, "step": 3472 }, { "epoch": 0.29628049820849683, "grad_norm": 1.5495223693808586, "learning_rate": 8.253276639238995e-05, "loss": 0.3899, "step": 3473 }, { "epoch": 0.2963658078826139, "grad_norm": 1.394170206813722, "learning_rate": 8.252227420414327e-05, "loss": 0.302, "step": 3474 }, { "epoch": 0.2964511175567309, "grad_norm": 1.6571335505120848, "learning_rate": 8.251177953299379e-05, "loss": 0.3432, "step": 3475 }, { "epoch": 0.296536427230848, "grad_norm": 1.5842558114329137, "learning_rate": 8.250128237974268e-05, "loss": 0.3228, "step": 3476 }, { "epoch": 0.296621736904965, "grad_norm": 1.43220057342309, "learning_rate": 8.24907827451914e-05, "loss": 0.3179, "step": 3477 }, { "epoch": 0.2967070465790821, "grad_norm": 1.468577435684217, "learning_rate": 8.24802806301415e-05, "loss": 0.3687, "step": 3478 }, { "epoch": 0.2967923562531991, "grad_norm": 1.3651802368781192, "learning_rate": 8.246977603539478e-05, "loss": 0.3039, "step": 3479 }, { "epoch": 0.2968776659273162, "grad_norm": 1.4460244564557727, "learning_rate": 8.245926896175321e-05, "loss": 0.3219, "step": 3480 }, { "epoch": 0.2969629756014332, "grad_norm": 1.9085470649046878, "learning_rate": 8.244875941001893e-05, "loss": 0.3727, "step": 3481 }, { "epoch": 0.2970482852755503, "grad_norm": 1.4724568603880168, "learning_rate": 8.243824738099431e-05, "loss": 0.3091, "step": 3482 }, { "epoch": 0.2971335949496673, "grad_norm": 1.393149110389477, "learning_rate": 8.242773287548187e-05, "loss": 0.3274, "step": 3483 }, { "epoch": 0.29721890462378436, "grad_norm": 1.3481992650345669, "learning_rate": 8.241721589428435e-05, "loss": 0.3543, "step": 3484 }, { "epoch": 0.2973042142979014, "grad_norm": 1.52685585912511, "learning_rate": 8.240669643820467e-05, "loss": 0.3171, "step": 3485 }, { "epoch": 0.2973895239720184, "grad_norm": 1.4154338615001154, "learning_rate": 8.239617450804591e-05, "loss": 0.3373, "step": 3486 }, { "epoch": 0.2974748336461355, "grad_norm": 1.50051664356025, "learning_rate": 8.238565010461138e-05, "loss": 0.3636, "step": 3487 }, { "epoch": 0.2975601433202525, "grad_norm": 1.3840391677402235, "learning_rate": 8.237512322870458e-05, "loss": 0.3733, "step": 3488 }, { "epoch": 0.29764545299436956, "grad_norm": 1.4038474858742958, "learning_rate": 8.236459388112916e-05, "loss": 0.3462, "step": 3489 }, { "epoch": 0.2977307626684866, "grad_norm": 1.4079130723193687, "learning_rate": 8.235406206268898e-05, "loss": 0.3442, "step": 3490 }, { "epoch": 0.29781607234260365, "grad_norm": 1.495159763998199, "learning_rate": 8.234352777418808e-05, "loss": 0.3102, "step": 3491 }, { "epoch": 0.2979013820167207, "grad_norm": 1.5226414875001886, "learning_rate": 8.23329910164307e-05, "loss": 0.3633, "step": 3492 }, { "epoch": 0.29798669169083775, "grad_norm": 1.5617544302745479, "learning_rate": 8.23224517902213e-05, "loss": 0.3509, "step": 3493 }, { "epoch": 0.29807200136495476, "grad_norm": 1.2572168871057035, "learning_rate": 8.231191009636446e-05, "loss": 0.3218, "step": 3494 }, { "epoch": 0.29815731103907184, "grad_norm": 1.5668859341897605, "learning_rate": 8.230136593566497e-05, "loss": 0.2813, "step": 3495 }, { "epoch": 0.29824262071318886, "grad_norm": 1.451052428472866, "learning_rate": 8.229081930892786e-05, "loss": 0.3299, "step": 3496 }, { "epoch": 0.29832793038730593, "grad_norm": 1.6496811645409282, "learning_rate": 8.228027021695827e-05, "loss": 0.3637, "step": 3497 }, { "epoch": 0.29841324006142295, "grad_norm": 1.1094940551488524, "learning_rate": 8.226971866056161e-05, "loss": 0.3031, "step": 3498 }, { "epoch": 0.29849854973554, "grad_norm": 1.6073377733257608, "learning_rate": 8.225916464054341e-05, "loss": 0.3301, "step": 3499 }, { "epoch": 0.29858385940965704, "grad_norm": 1.5302081553660531, "learning_rate": 8.22486081577094e-05, "loss": 0.3484, "step": 3500 }, { "epoch": 0.2986691690837741, "grad_norm": 1.3404344863505548, "learning_rate": 8.223804921286553e-05, "loss": 0.2839, "step": 3501 }, { "epoch": 0.29875447875789113, "grad_norm": 1.5702986835232504, "learning_rate": 8.22274878068179e-05, "loss": 0.3547, "step": 3502 }, { "epoch": 0.2988397884320082, "grad_norm": 1.3474028860565483, "learning_rate": 8.221692394037286e-05, "loss": 0.2716, "step": 3503 }, { "epoch": 0.2989250981061252, "grad_norm": 1.6138578035852078, "learning_rate": 8.220635761433687e-05, "loss": 0.2915, "step": 3504 }, { "epoch": 0.2990104077802423, "grad_norm": 1.44875029938294, "learning_rate": 8.219578882951662e-05, "loss": 0.2868, "step": 3505 }, { "epoch": 0.2990957174543593, "grad_norm": 1.364491709576698, "learning_rate": 8.218521758671897e-05, "loss": 0.2744, "step": 3506 }, { "epoch": 0.2991810271284764, "grad_norm": 1.4733440705306573, "learning_rate": 8.2174643886751e-05, "loss": 0.3361, "step": 3507 }, { "epoch": 0.2992663368025934, "grad_norm": 1.1923815628032712, "learning_rate": 8.216406773041994e-05, "loss": 0.2954, "step": 3508 }, { "epoch": 0.2993516464767105, "grad_norm": 1.5835538342512008, "learning_rate": 8.215348911853324e-05, "loss": 0.2817, "step": 3509 }, { "epoch": 0.2994369561508275, "grad_norm": 1.5747045330648788, "learning_rate": 8.21429080518985e-05, "loss": 0.3485, "step": 3510 }, { "epoch": 0.29952226582494457, "grad_norm": 1.197780131423429, "learning_rate": 8.213232453132353e-05, "loss": 0.2455, "step": 3511 }, { "epoch": 0.2996075754990616, "grad_norm": 1.2264207891738952, "learning_rate": 8.212173855761636e-05, "loss": 0.2959, "step": 3512 }, { "epoch": 0.29969288517317866, "grad_norm": 1.4052128676031928, "learning_rate": 8.211115013158512e-05, "loss": 0.3734, "step": 3513 }, { "epoch": 0.2997781948472957, "grad_norm": 1.5455395106556813, "learning_rate": 8.210055925403821e-05, "loss": 0.3531, "step": 3514 }, { "epoch": 0.29986350452141275, "grad_norm": 1.5549262548788518, "learning_rate": 8.208996592578417e-05, "loss": 0.3399, "step": 3515 }, { "epoch": 0.29994881419552977, "grad_norm": 1.2812181794471804, "learning_rate": 8.207937014763178e-05, "loss": 0.3094, "step": 3516 }, { "epoch": 0.30003412386964684, "grad_norm": 1.4697886913259532, "learning_rate": 8.206877192038995e-05, "loss": 0.3201, "step": 3517 }, { "epoch": 0.30011943354376386, "grad_norm": 1.6951736885585216, "learning_rate": 8.205817124486779e-05, "loss": 0.374, "step": 3518 }, { "epoch": 0.30020474321788093, "grad_norm": 1.1699261545857662, "learning_rate": 8.204756812187461e-05, "loss": 0.3483, "step": 3519 }, { "epoch": 0.30029005289199795, "grad_norm": 1.5324657051767014, "learning_rate": 8.203696255221991e-05, "loss": 0.2725, "step": 3520 }, { "epoch": 0.300375362566115, "grad_norm": 1.4442883136111926, "learning_rate": 8.202635453671335e-05, "loss": 0.3516, "step": 3521 }, { "epoch": 0.30046067224023204, "grad_norm": 1.3034174328002148, "learning_rate": 8.201574407616483e-05, "loss": 0.3164, "step": 3522 }, { "epoch": 0.3005459819143491, "grad_norm": 1.535124023720473, "learning_rate": 8.200513117138435e-05, "loss": 0.3036, "step": 3523 }, { "epoch": 0.30063129158846613, "grad_norm": 1.4373355439693412, "learning_rate": 8.199451582318221e-05, "loss": 0.2797, "step": 3524 }, { "epoch": 0.30071660126258315, "grad_norm": 1.5159483889045384, "learning_rate": 8.19838980323688e-05, "loss": 0.3032, "step": 3525 }, { "epoch": 0.3008019109367002, "grad_norm": 1.5149891099443988, "learning_rate": 8.197327779975473e-05, "loss": 0.3236, "step": 3526 }, { "epoch": 0.30088722061081724, "grad_norm": 1.4368220679517012, "learning_rate": 8.196265512615081e-05, "loss": 0.3333, "step": 3527 }, { "epoch": 0.3009725302849343, "grad_norm": 1.7625128400618977, "learning_rate": 8.195203001236802e-05, "loss": 0.3499, "step": 3528 }, { "epoch": 0.30105783995905133, "grad_norm": 1.3324641533873456, "learning_rate": 8.194140245921753e-05, "loss": 0.3002, "step": 3529 }, { "epoch": 0.3011431496331684, "grad_norm": 1.3410829202160706, "learning_rate": 8.19307724675107e-05, "loss": 0.36, "step": 3530 }, { "epoch": 0.3012284593072854, "grad_norm": 1.7145381298881435, "learning_rate": 8.192014003805907e-05, "loss": 0.3387, "step": 3531 }, { "epoch": 0.3013137689814025, "grad_norm": 1.632233001232514, "learning_rate": 8.190950517167437e-05, "loss": 0.2976, "step": 3532 }, { "epoch": 0.3013990786555195, "grad_norm": 1.6223231054897655, "learning_rate": 8.189886786916853e-05, "loss": 0.3178, "step": 3533 }, { "epoch": 0.3014843883296366, "grad_norm": 1.5235387523944401, "learning_rate": 8.188822813135362e-05, "loss": 0.3427, "step": 3534 }, { "epoch": 0.3015696980037536, "grad_norm": 1.7774107532657117, "learning_rate": 8.187758595904196e-05, "loss": 0.4158, "step": 3535 }, { "epoch": 0.3016550076778707, "grad_norm": 1.4625039907547714, "learning_rate": 8.1866941353046e-05, "loss": 0.2917, "step": 3536 }, { "epoch": 0.3017403173519877, "grad_norm": 1.2824923377152075, "learning_rate": 8.18562943141784e-05, "loss": 0.2818, "step": 3537 }, { "epoch": 0.30182562702610477, "grad_norm": 1.5409064695428558, "learning_rate": 8.184564484325204e-05, "loss": 0.2822, "step": 3538 }, { "epoch": 0.3019109367002218, "grad_norm": 1.3386962247247451, "learning_rate": 8.18349929410799e-05, "loss": 0.3424, "step": 3539 }, { "epoch": 0.30199624637433886, "grad_norm": 1.3812676545132523, "learning_rate": 8.182433860847524e-05, "loss": 0.2843, "step": 3540 }, { "epoch": 0.3020815560484559, "grad_norm": 1.2875284951028867, "learning_rate": 8.181368184625143e-05, "loss": 0.3509, "step": 3541 }, { "epoch": 0.30216686572257295, "grad_norm": 1.5214696450065213, "learning_rate": 8.180302265522206e-05, "loss": 0.2726, "step": 3542 }, { "epoch": 0.30225217539668997, "grad_norm": 1.3303073679901414, "learning_rate": 8.179236103620094e-05, "loss": 0.2794, "step": 3543 }, { "epoch": 0.30233748507080704, "grad_norm": 1.0721000168282195, "learning_rate": 8.178169699000198e-05, "loss": 0.3012, "step": 3544 }, { "epoch": 0.30242279474492406, "grad_norm": 1.3327092657324004, "learning_rate": 8.177103051743932e-05, "loss": 0.2695, "step": 3545 }, { "epoch": 0.30250810441904114, "grad_norm": 1.3417003880087615, "learning_rate": 8.176036161932734e-05, "loss": 0.3178, "step": 3546 }, { "epoch": 0.30259341409315815, "grad_norm": 1.5288138180853326, "learning_rate": 8.174969029648052e-05, "loss": 0.3115, "step": 3547 }, { "epoch": 0.3026787237672752, "grad_norm": 1.4146272153419892, "learning_rate": 8.173901654971357e-05, "loss": 0.3062, "step": 3548 }, { "epoch": 0.30276403344139224, "grad_norm": 1.9640668239257608, "learning_rate": 8.172834037984137e-05, "loss": 0.387, "step": 3549 }, { "epoch": 0.3028493431155093, "grad_norm": 1.5847944076967044, "learning_rate": 8.171766178767897e-05, "loss": 0.3292, "step": 3550 }, { "epoch": 0.30293465278962634, "grad_norm": 1.2315346596936414, "learning_rate": 8.170698077404165e-05, "loss": 0.3066, "step": 3551 }, { "epoch": 0.3030199624637434, "grad_norm": 1.6023618610610273, "learning_rate": 8.169629733974482e-05, "loss": 0.3601, "step": 3552 }, { "epoch": 0.3031052721378604, "grad_norm": 1.2986138877000324, "learning_rate": 8.168561148560414e-05, "loss": 0.3029, "step": 3553 }, { "epoch": 0.3031905818119775, "grad_norm": 1.3816782291850394, "learning_rate": 8.167492321243539e-05, "loss": 0.3233, "step": 3554 }, { "epoch": 0.3032758914860945, "grad_norm": 1.3255857540437934, "learning_rate": 8.166423252105458e-05, "loss": 0.3437, "step": 3555 }, { "epoch": 0.3033612011602116, "grad_norm": 1.2761829479660753, "learning_rate": 8.165353941227789e-05, "loss": 0.3667, "step": 3556 }, { "epoch": 0.3034465108343286, "grad_norm": 1.529998968909421, "learning_rate": 8.164284388692166e-05, "loss": 0.3511, "step": 3557 }, { "epoch": 0.3035318205084457, "grad_norm": 1.364590297807388, "learning_rate": 8.163214594580245e-05, "loss": 0.2867, "step": 3558 }, { "epoch": 0.3036171301825627, "grad_norm": 1.6363331925565703, "learning_rate": 8.1621445589737e-05, "loss": 0.3281, "step": 3559 }, { "epoch": 0.3037024398566798, "grad_norm": 1.5527255459900322, "learning_rate": 8.161074281954219e-05, "loss": 0.3211, "step": 3560 }, { "epoch": 0.3037877495307968, "grad_norm": 1.4530528881269154, "learning_rate": 8.160003763603516e-05, "loss": 0.3243, "step": 3561 }, { "epoch": 0.30387305920491386, "grad_norm": 1.4642926209316791, "learning_rate": 8.158933004003319e-05, "loss": 0.3228, "step": 3562 }, { "epoch": 0.3039583688790309, "grad_norm": 1.3902791482565307, "learning_rate": 8.157862003235373e-05, "loss": 0.2909, "step": 3563 }, { "epoch": 0.3040436785531479, "grad_norm": 1.2190863805337258, "learning_rate": 8.156790761381444e-05, "loss": 0.2788, "step": 3564 }, { "epoch": 0.304128988227265, "grad_norm": 1.2962757702494117, "learning_rate": 8.155719278523316e-05, "loss": 0.3269, "step": 3565 }, { "epoch": 0.304214297901382, "grad_norm": 1.583933197963333, "learning_rate": 8.154647554742789e-05, "loss": 0.2952, "step": 3566 }, { "epoch": 0.30429960757549906, "grad_norm": 1.764371714087253, "learning_rate": 8.153575590121686e-05, "loss": 0.3625, "step": 3567 }, { "epoch": 0.3043849172496161, "grad_norm": 1.583031910095734, "learning_rate": 8.152503384741846e-05, "loss": 0.369, "step": 3568 }, { "epoch": 0.30447022692373316, "grad_norm": 1.4602315900622067, "learning_rate": 8.151430938685123e-05, "loss": 0.3834, "step": 3569 }, { "epoch": 0.3045555365978502, "grad_norm": 1.439861762378643, "learning_rate": 8.150358252033394e-05, "loss": 0.306, "step": 3570 }, { "epoch": 0.30464084627196725, "grad_norm": 1.9153408219354495, "learning_rate": 8.149285324868554e-05, "loss": 0.3586, "step": 3571 }, { "epoch": 0.30472615594608427, "grad_norm": 1.477629906483165, "learning_rate": 8.148212157272517e-05, "loss": 0.2707, "step": 3572 }, { "epoch": 0.30481146562020134, "grad_norm": 1.2465691212793606, "learning_rate": 8.14713874932721e-05, "loss": 0.3307, "step": 3573 }, { "epoch": 0.30489677529431836, "grad_norm": 1.636057717935035, "learning_rate": 8.146065101114581e-05, "loss": 0.3345, "step": 3574 }, { "epoch": 0.30498208496843543, "grad_norm": 1.4130805805687379, "learning_rate": 8.144991212716603e-05, "loss": 0.3473, "step": 3575 }, { "epoch": 0.30506739464255245, "grad_norm": 1.4746202076899293, "learning_rate": 8.143917084215256e-05, "loss": 0.3235, "step": 3576 }, { "epoch": 0.3051527043166695, "grad_norm": 1.4596257976540583, "learning_rate": 8.142842715692548e-05, "loss": 0.3555, "step": 3577 }, { "epoch": 0.30523801399078654, "grad_norm": 1.385400623811846, "learning_rate": 8.141768107230498e-05, "loss": 0.3626, "step": 3578 }, { "epoch": 0.3053233236649036, "grad_norm": 1.5070231847314883, "learning_rate": 8.140693258911151e-05, "loss": 0.325, "step": 3579 }, { "epoch": 0.30540863333902063, "grad_norm": 1.2832228660516554, "learning_rate": 8.139618170816562e-05, "loss": 0.3216, "step": 3580 }, { "epoch": 0.3054939430131377, "grad_norm": 1.4572536922557924, "learning_rate": 8.13854284302881e-05, "loss": 0.3257, "step": 3581 }, { "epoch": 0.3055792526872547, "grad_norm": 1.4285053306002127, "learning_rate": 8.137467275629988e-05, "loss": 0.3353, "step": 3582 }, { "epoch": 0.3056645623613718, "grad_norm": 1.3397401819711119, "learning_rate": 8.136391468702214e-05, "loss": 0.2797, "step": 3583 }, { "epoch": 0.3057498720354888, "grad_norm": 1.5260889699155487, "learning_rate": 8.135315422327618e-05, "loss": 0.3344, "step": 3584 }, { "epoch": 0.3058351817096059, "grad_norm": 1.3658843014703155, "learning_rate": 8.13423913658835e-05, "loss": 0.3312, "step": 3585 }, { "epoch": 0.3059204913837229, "grad_norm": 1.3195056142626134, "learning_rate": 8.133162611566581e-05, "loss": 0.2964, "step": 3586 }, { "epoch": 0.30600580105784, "grad_norm": 1.5070951662802252, "learning_rate": 8.132085847344493e-05, "loss": 0.3243, "step": 3587 }, { "epoch": 0.306091110731957, "grad_norm": 1.664252758343018, "learning_rate": 8.131008844004295e-05, "loss": 0.3642, "step": 3588 }, { "epoch": 0.30617642040607407, "grad_norm": 1.335969444919145, "learning_rate": 8.129931601628212e-05, "loss": 0.3443, "step": 3589 }, { "epoch": 0.3062617300801911, "grad_norm": 1.4762130404504286, "learning_rate": 8.128854120298484e-05, "loss": 0.3358, "step": 3590 }, { "epoch": 0.30634703975430816, "grad_norm": 1.1969408898605605, "learning_rate": 8.127776400097369e-05, "loss": 0.291, "step": 3591 }, { "epoch": 0.3064323494284252, "grad_norm": 1.1840130154661104, "learning_rate": 8.126698441107146e-05, "loss": 0.3007, "step": 3592 }, { "epoch": 0.30651765910254225, "grad_norm": 1.4549758772800314, "learning_rate": 8.125620243410113e-05, "loss": 0.3426, "step": 3593 }, { "epoch": 0.30660296877665927, "grad_norm": 1.0477214067581129, "learning_rate": 8.124541807088587e-05, "loss": 0.2318, "step": 3594 }, { "epoch": 0.30668827845077634, "grad_norm": 1.4422035389976475, "learning_rate": 8.123463132224893e-05, "loss": 0.3122, "step": 3595 }, { "epoch": 0.30677358812489336, "grad_norm": 1.2631619351671666, "learning_rate": 8.122384218901389e-05, "loss": 0.304, "step": 3596 }, { "epoch": 0.30685889779901043, "grad_norm": 2.185165249230106, "learning_rate": 8.121305067200442e-05, "loss": 0.2589, "step": 3597 }, { "epoch": 0.30694420747312745, "grad_norm": 1.4079143423825364, "learning_rate": 8.120225677204441e-05, "loss": 0.2595, "step": 3598 }, { "epoch": 0.3070295171472445, "grad_norm": 1.7501583027765895, "learning_rate": 8.11914604899579e-05, "loss": 0.3522, "step": 3599 }, { "epoch": 0.30711482682136154, "grad_norm": 1.437414830214042, "learning_rate": 8.118066182656911e-05, "loss": 0.3001, "step": 3600 }, { "epoch": 0.30720013649547856, "grad_norm": 1.6969193960321889, "learning_rate": 8.116986078270252e-05, "loss": 0.2912, "step": 3601 }, { "epoch": 0.30728544616959563, "grad_norm": 1.5887636456927758, "learning_rate": 8.115905735918268e-05, "loss": 0.3516, "step": 3602 }, { "epoch": 0.30737075584371265, "grad_norm": 1.6084359401460373, "learning_rate": 8.114825155683437e-05, "loss": 0.3109, "step": 3603 }, { "epoch": 0.3074560655178297, "grad_norm": 1.279836665997689, "learning_rate": 8.113744337648259e-05, "loss": 0.3112, "step": 3604 }, { "epoch": 0.30754137519194674, "grad_norm": 1.0891060468814595, "learning_rate": 8.112663281895248e-05, "loss": 0.2937, "step": 3605 }, { "epoch": 0.3076266848660638, "grad_norm": 1.490159819051607, "learning_rate": 8.111581988506935e-05, "loss": 0.3102, "step": 3606 }, { "epoch": 0.30771199454018083, "grad_norm": 1.4211479256376478, "learning_rate": 8.110500457565873e-05, "loss": 0.3605, "step": 3607 }, { "epoch": 0.3077973042142979, "grad_norm": 1.4294538463334896, "learning_rate": 8.109418689154629e-05, "loss": 0.2959, "step": 3608 }, { "epoch": 0.3078826138884149, "grad_norm": 1.5354620165535078, "learning_rate": 8.108336683355792e-05, "loss": 0.3745, "step": 3609 }, { "epoch": 0.307967923562532, "grad_norm": 1.437142120262007, "learning_rate": 8.107254440251967e-05, "loss": 0.3126, "step": 3610 }, { "epoch": 0.308053233236649, "grad_norm": 1.5439487097725115, "learning_rate": 8.106171959925779e-05, "loss": 0.3361, "step": 3611 }, { "epoch": 0.3081385429107661, "grad_norm": 1.6168644531755365, "learning_rate": 8.105089242459866e-05, "loss": 0.2885, "step": 3612 }, { "epoch": 0.3082238525848831, "grad_norm": 1.376680301124599, "learning_rate": 8.104006287936892e-05, "loss": 0.3387, "step": 3613 }, { "epoch": 0.3083091622590002, "grad_norm": 1.3830855364024144, "learning_rate": 8.10292309643953e-05, "loss": 0.3121, "step": 3614 }, { "epoch": 0.3083944719331172, "grad_norm": 1.2391086064692225, "learning_rate": 8.10183966805048e-05, "loss": 0.2938, "step": 3615 }, { "epoch": 0.30847978160723427, "grad_norm": 1.3481845870486784, "learning_rate": 8.100756002852454e-05, "loss": 0.3263, "step": 3616 }, { "epoch": 0.3085650912813513, "grad_norm": 1.4338434402096012, "learning_rate": 8.099672100928184e-05, "loss": 0.2862, "step": 3617 }, { "epoch": 0.30865040095546836, "grad_norm": 1.4004728966119238, "learning_rate": 8.098587962360422e-05, "loss": 0.2738, "step": 3618 }, { "epoch": 0.3087357106295854, "grad_norm": 1.436710223638843, "learning_rate": 8.097503587231933e-05, "loss": 0.3221, "step": 3619 }, { "epoch": 0.30882102030370245, "grad_norm": 1.4794903342861743, "learning_rate": 8.096418975625508e-05, "loss": 0.2773, "step": 3620 }, { "epoch": 0.30890632997781947, "grad_norm": 1.1304414335903616, "learning_rate": 8.095334127623947e-05, "loss": 0.2963, "step": 3621 }, { "epoch": 0.30899163965193654, "grad_norm": 1.6499924399462813, "learning_rate": 8.094249043310073e-05, "loss": 0.3482, "step": 3622 }, { "epoch": 0.30907694932605356, "grad_norm": 1.7380816741558427, "learning_rate": 8.09316372276673e-05, "loss": 0.3273, "step": 3623 }, { "epoch": 0.30916225900017064, "grad_norm": 1.6273357037708396, "learning_rate": 8.09207816607677e-05, "loss": 0.3566, "step": 3624 }, { "epoch": 0.30924756867428765, "grad_norm": 1.8102239590338958, "learning_rate": 8.090992373323077e-05, "loss": 0.3145, "step": 3625 }, { "epoch": 0.3093328783484047, "grad_norm": 1.3716726358051288, "learning_rate": 8.08990634458854e-05, "loss": 0.2828, "step": 3626 }, { "epoch": 0.30941818802252175, "grad_norm": 1.4389022124879303, "learning_rate": 8.088820079956074e-05, "loss": 0.3365, "step": 3627 }, { "epoch": 0.3095034976966388, "grad_norm": 1.5979822847437128, "learning_rate": 8.087733579508609e-05, "loss": 0.3726, "step": 3628 }, { "epoch": 0.30958880737075584, "grad_norm": 1.52398890762097, "learning_rate": 8.086646843329093e-05, "loss": 0.3466, "step": 3629 }, { "epoch": 0.3096741170448729, "grad_norm": 1.6026156800602114, "learning_rate": 8.085559871500493e-05, "loss": 0.3564, "step": 3630 }, { "epoch": 0.3097594267189899, "grad_norm": 1.5661817280350046, "learning_rate": 8.084472664105794e-05, "loss": 0.3214, "step": 3631 }, { "epoch": 0.309844736393107, "grad_norm": 1.5492469250246994, "learning_rate": 8.083385221227997e-05, "loss": 0.3635, "step": 3632 }, { "epoch": 0.309930046067224, "grad_norm": 1.5025508172554627, "learning_rate": 8.082297542950123e-05, "loss": 0.3586, "step": 3633 }, { "epoch": 0.3100153557413411, "grad_norm": 1.5343509338107555, "learning_rate": 8.081209629355214e-05, "loss": 0.2943, "step": 3634 }, { "epoch": 0.3101006654154581, "grad_norm": 1.587395660095319, "learning_rate": 8.080121480526319e-05, "loss": 0.3294, "step": 3635 }, { "epoch": 0.3101859750895752, "grad_norm": 1.4899691724468391, "learning_rate": 8.07903309654652e-05, "loss": 0.331, "step": 3636 }, { "epoch": 0.3102712847636922, "grad_norm": 1.503876604060479, "learning_rate": 8.077944477498905e-05, "loss": 0.3194, "step": 3637 }, { "epoch": 0.3103565944378093, "grad_norm": 1.1481458851633592, "learning_rate": 8.076855623466584e-05, "loss": 0.3142, "step": 3638 }, { "epoch": 0.3104419041119263, "grad_norm": 1.2510355951107346, "learning_rate": 8.075766534532689e-05, "loss": 0.3097, "step": 3639 }, { "epoch": 0.3105272137860433, "grad_norm": 1.2975969717873541, "learning_rate": 8.074677210780361e-05, "loss": 0.3599, "step": 3640 }, { "epoch": 0.3106125234601604, "grad_norm": 1.328794243435732, "learning_rate": 8.073587652292769e-05, "loss": 0.2917, "step": 3641 }, { "epoch": 0.3106978331342774, "grad_norm": 1.4600153986236377, "learning_rate": 8.072497859153091e-05, "loss": 0.3104, "step": 3642 }, { "epoch": 0.3107831428083945, "grad_norm": 1.421829893633357, "learning_rate": 8.07140783144453e-05, "loss": 0.3339, "step": 3643 }, { "epoch": 0.3108684524825115, "grad_norm": 1.9012824197847922, "learning_rate": 8.070317569250302e-05, "loss": 0.3635, "step": 3644 }, { "epoch": 0.31095376215662857, "grad_norm": 1.5866569803781057, "learning_rate": 8.069227072653642e-05, "loss": 0.2937, "step": 3645 }, { "epoch": 0.3110390718307456, "grad_norm": 1.400903413703806, "learning_rate": 8.068136341737807e-05, "loss": 0.2742, "step": 3646 }, { "epoch": 0.31112438150486266, "grad_norm": 1.696432842163217, "learning_rate": 8.067045376586066e-05, "loss": 0.2881, "step": 3647 }, { "epoch": 0.3112096911789797, "grad_norm": 1.7414653975110013, "learning_rate": 8.065954177281708e-05, "loss": 0.3997, "step": 3648 }, { "epoch": 0.31129500085309675, "grad_norm": 1.4700571290417541, "learning_rate": 8.064862743908042e-05, "loss": 0.2867, "step": 3649 }, { "epoch": 0.31138031052721377, "grad_norm": 1.7032528750508593, "learning_rate": 8.063771076548391e-05, "loss": 0.299, "step": 3650 }, { "epoch": 0.31146562020133084, "grad_norm": 1.6136717981704622, "learning_rate": 8.0626791752861e-05, "loss": 0.2848, "step": 3651 }, { "epoch": 0.31155092987544786, "grad_norm": 1.7027294154487662, "learning_rate": 8.061587040204528e-05, "loss": 0.3387, "step": 3652 }, { "epoch": 0.31163623954956493, "grad_norm": 1.4041955988287023, "learning_rate": 8.060494671387055e-05, "loss": 0.3441, "step": 3653 }, { "epoch": 0.31172154922368195, "grad_norm": 1.3209589893406137, "learning_rate": 8.059402068917079e-05, "loss": 0.3296, "step": 3654 }, { "epoch": 0.311806858897799, "grad_norm": 1.6779348056072638, "learning_rate": 8.058309232878012e-05, "loss": 0.3287, "step": 3655 }, { "epoch": 0.31189216857191604, "grad_norm": 1.208837349229705, "learning_rate": 8.057216163353285e-05, "loss": 0.3232, "step": 3656 }, { "epoch": 0.3119774782460331, "grad_norm": 1.4624366616121516, "learning_rate": 8.056122860426352e-05, "loss": 0.3109, "step": 3657 }, { "epoch": 0.31206278792015013, "grad_norm": 1.4403485808733343, "learning_rate": 8.055029324180678e-05, "loss": 0.3215, "step": 3658 }, { "epoch": 0.3121480975942672, "grad_norm": 1.6590863538943188, "learning_rate": 8.053935554699749e-05, "loss": 0.2525, "step": 3659 }, { "epoch": 0.3122334072683842, "grad_norm": 1.648168315875182, "learning_rate": 8.05284155206707e-05, "loss": 0.3158, "step": 3660 }, { "epoch": 0.3123187169425013, "grad_norm": 1.5621157364406917, "learning_rate": 8.05174731636616e-05, "loss": 0.2664, "step": 3661 }, { "epoch": 0.3124040266166183, "grad_norm": 1.3914107342433049, "learning_rate": 8.050652847680562e-05, "loss": 0.2944, "step": 3662 }, { "epoch": 0.3124893362907354, "grad_norm": 1.4796217455649705, "learning_rate": 8.049558146093827e-05, "loss": 0.3127, "step": 3663 }, { "epoch": 0.3125746459648524, "grad_norm": 1.3318526520234757, "learning_rate": 8.048463211689535e-05, "loss": 0.2805, "step": 3664 }, { "epoch": 0.3126599556389695, "grad_norm": 1.4244491415910117, "learning_rate": 8.047368044551276e-05, "loss": 0.3035, "step": 3665 }, { "epoch": 0.3127452653130865, "grad_norm": 1.405863560455443, "learning_rate": 8.04627264476266e-05, "loss": 0.2507, "step": 3666 }, { "epoch": 0.31283057498720357, "grad_norm": 1.5191288196971153, "learning_rate": 8.045177012407316e-05, "loss": 0.341, "step": 3667 }, { "epoch": 0.3129158846613206, "grad_norm": 1.400010231525636, "learning_rate": 8.044081147568889e-05, "loss": 0.3306, "step": 3668 }, { "epoch": 0.31300119433543766, "grad_norm": 1.4407630043376214, "learning_rate": 8.042985050331042e-05, "loss": 0.4905, "step": 3669 }, { "epoch": 0.3130865040095547, "grad_norm": 1.324594899995326, "learning_rate": 8.041888720777457e-05, "loss": 0.274, "step": 3670 }, { "epoch": 0.31317181368367175, "grad_norm": 1.4454634304111627, "learning_rate": 8.040792158991833e-05, "loss": 0.2748, "step": 3671 }, { "epoch": 0.31325712335778877, "grad_norm": 1.2926237908115972, "learning_rate": 8.039695365057887e-05, "loss": 0.3224, "step": 3672 }, { "epoch": 0.31334243303190584, "grad_norm": 1.682648926898849, "learning_rate": 8.038598339059351e-05, "loss": 0.3048, "step": 3673 }, { "epoch": 0.31342774270602286, "grad_norm": 1.5212536931992198, "learning_rate": 8.03750108107998e-05, "loss": 0.342, "step": 3674 }, { "epoch": 0.31351305238013993, "grad_norm": 1.435235603476932, "learning_rate": 8.036403591203544e-05, "loss": 0.3614, "step": 3675 }, { "epoch": 0.31359836205425695, "grad_norm": 1.5057730050062628, "learning_rate": 8.035305869513828e-05, "loss": 0.3178, "step": 3676 }, { "epoch": 0.313683671728374, "grad_norm": 1.404223019669934, "learning_rate": 8.034207916094638e-05, "loss": 0.3093, "step": 3677 }, { "epoch": 0.31376898140249104, "grad_norm": 1.59684736672099, "learning_rate": 8.033109731029798e-05, "loss": 0.3532, "step": 3678 }, { "epoch": 0.31385429107660806, "grad_norm": 1.5252377559243568, "learning_rate": 8.032011314403147e-05, "loss": 0.3324, "step": 3679 }, { "epoch": 0.31393960075072513, "grad_norm": 1.4349047411891953, "learning_rate": 8.030912666298546e-05, "loss": 0.2669, "step": 3680 }, { "epoch": 0.31402491042484215, "grad_norm": 1.3972249661556237, "learning_rate": 8.029813786799868e-05, "loss": 0.3388, "step": 3681 }, { "epoch": 0.3141102200989592, "grad_norm": 1.2998634743526631, "learning_rate": 8.028714675991006e-05, "loss": 0.3223, "step": 3682 }, { "epoch": 0.31419552977307624, "grad_norm": 1.6053898924235062, "learning_rate": 8.027615333955877e-05, "loss": 0.3524, "step": 3683 }, { "epoch": 0.3142808394471933, "grad_norm": 1.5210035394330197, "learning_rate": 8.026515760778403e-05, "loss": 0.3018, "step": 3684 }, { "epoch": 0.31436614912131033, "grad_norm": 1.465829827610065, "learning_rate": 8.025415956542535e-05, "loss": 0.2978, "step": 3685 }, { "epoch": 0.3144514587954274, "grad_norm": 1.6196325863878902, "learning_rate": 8.024315921332236e-05, "loss": 0.3786, "step": 3686 }, { "epoch": 0.3145367684695444, "grad_norm": 1.202404400917446, "learning_rate": 8.023215655231488e-05, "loss": 0.2675, "step": 3687 }, { "epoch": 0.3146220781436615, "grad_norm": 1.685727990454968, "learning_rate": 8.022115158324288e-05, "loss": 0.3388, "step": 3688 }, { "epoch": 0.3147073878177785, "grad_norm": 1.4134731856165186, "learning_rate": 8.021014430694655e-05, "loss": 0.3765, "step": 3689 }, { "epoch": 0.3147926974918956, "grad_norm": 1.6145243808279397, "learning_rate": 8.019913472426626e-05, "loss": 0.3363, "step": 3690 }, { "epoch": 0.3148780071660126, "grad_norm": 1.585788888968316, "learning_rate": 8.018812283604251e-05, "loss": 0.3415, "step": 3691 }, { "epoch": 0.3149633168401297, "grad_norm": 1.4935242422439652, "learning_rate": 8.017710864311599e-05, "loss": 0.3075, "step": 3692 }, { "epoch": 0.3150486265142467, "grad_norm": 1.375005201850068, "learning_rate": 8.016609214632759e-05, "loss": 0.2806, "step": 3693 }, { "epoch": 0.3151339361883638, "grad_norm": 1.730468887776871, "learning_rate": 8.015507334651835e-05, "loss": 0.321, "step": 3694 }, { "epoch": 0.3152192458624808, "grad_norm": 1.454747104755306, "learning_rate": 8.014405224452953e-05, "loss": 0.3027, "step": 3695 }, { "epoch": 0.31530455553659786, "grad_norm": 1.517602397474206, "learning_rate": 8.013302884120247e-05, "loss": 0.3441, "step": 3696 }, { "epoch": 0.3153898652107149, "grad_norm": 1.3960691034652943, "learning_rate": 8.012200313737881e-05, "loss": 0.2935, "step": 3697 }, { "epoch": 0.31547517488483195, "grad_norm": 1.2506278368657016, "learning_rate": 8.011097513390027e-05, "loss": 0.3704, "step": 3698 }, { "epoch": 0.315560484558949, "grad_norm": 1.3285807556859188, "learning_rate": 8.009994483160879e-05, "loss": 0.3463, "step": 3699 }, { "epoch": 0.31564579423306605, "grad_norm": 1.7792854183091984, "learning_rate": 8.008891223134647e-05, "loss": 0.3238, "step": 3700 }, { "epoch": 0.31573110390718306, "grad_norm": 1.366916171806953, "learning_rate": 8.007787733395559e-05, "loss": 0.2807, "step": 3701 }, { "epoch": 0.31581641358130014, "grad_norm": 1.2892642239031378, "learning_rate": 8.006684014027862e-05, "loss": 0.2748, "step": 3702 }, { "epoch": 0.31590172325541716, "grad_norm": 1.4383925279361707, "learning_rate": 8.005580065115816e-05, "loss": 0.3277, "step": 3703 }, { "epoch": 0.31598703292953423, "grad_norm": 1.3984786725509817, "learning_rate": 8.004475886743705e-05, "loss": 0.3171, "step": 3704 }, { "epoch": 0.31607234260365125, "grad_norm": 1.6608500388543512, "learning_rate": 8.003371478995827e-05, "loss": 0.3562, "step": 3705 }, { "epoch": 0.3161576522777683, "grad_norm": 1.511993777469163, "learning_rate": 8.002266841956496e-05, "loss": 0.3351, "step": 3706 }, { "epoch": 0.31624296195188534, "grad_norm": 1.5121853524792144, "learning_rate": 8.001161975710045e-05, "loss": 0.319, "step": 3707 }, { "epoch": 0.3163282716260024, "grad_norm": 1.8368222965598509, "learning_rate": 8.000056880340824e-05, "loss": 0.3952, "step": 3708 }, { "epoch": 0.31641358130011943, "grad_norm": 1.4163404631446825, "learning_rate": 7.998951555933205e-05, "loss": 0.2821, "step": 3709 }, { "epoch": 0.3164988909742365, "grad_norm": 1.4023362693480839, "learning_rate": 7.99784600257157e-05, "loss": 0.295, "step": 3710 }, { "epoch": 0.3165842006483535, "grad_norm": 1.6437622592019234, "learning_rate": 7.996740220340323e-05, "loss": 0.3004, "step": 3711 }, { "epoch": 0.3166695103224706, "grad_norm": 1.3748687334692138, "learning_rate": 7.995634209323886e-05, "loss": 0.3057, "step": 3712 }, { "epoch": 0.3167548199965876, "grad_norm": 1.4855815601537656, "learning_rate": 7.994527969606695e-05, "loss": 0.2606, "step": 3713 }, { "epoch": 0.3168401296707047, "grad_norm": 1.4980598459092545, "learning_rate": 7.993421501273205e-05, "loss": 0.3182, "step": 3714 }, { "epoch": 0.3169254393448217, "grad_norm": 1.396268729252478, "learning_rate": 7.992314804407892e-05, "loss": 0.3177, "step": 3715 }, { "epoch": 0.3170107490189387, "grad_norm": 1.8721645532487037, "learning_rate": 7.991207879095244e-05, "loss": 0.3702, "step": 3716 }, { "epoch": 0.3170960586930558, "grad_norm": 1.6425411115869135, "learning_rate": 7.990100725419771e-05, "loss": 0.3374, "step": 3717 }, { "epoch": 0.3171813683671728, "grad_norm": 1.266463861208134, "learning_rate": 7.988993343465996e-05, "loss": 0.283, "step": 3718 }, { "epoch": 0.3172666780412899, "grad_norm": 1.451322688895287, "learning_rate": 7.987885733318463e-05, "loss": 0.375, "step": 3719 }, { "epoch": 0.3173519877154069, "grad_norm": 1.7448586825885326, "learning_rate": 7.986777895061732e-05, "loss": 0.359, "step": 3720 }, { "epoch": 0.317437297389524, "grad_norm": 1.6603259908629566, "learning_rate": 7.98566982878038e-05, "loss": 0.3952, "step": 3721 }, { "epoch": 0.317522607063641, "grad_norm": 1.250088879286478, "learning_rate": 7.984561534559003e-05, "loss": 0.324, "step": 3722 }, { "epoch": 0.31760791673775807, "grad_norm": 1.3534237608898174, "learning_rate": 7.983453012482214e-05, "loss": 0.2906, "step": 3723 }, { "epoch": 0.3176932264118751, "grad_norm": 1.4734954466605406, "learning_rate": 7.982344262634641e-05, "loss": 0.3456, "step": 3724 }, { "epoch": 0.31777853608599216, "grad_norm": 1.7367726841427111, "learning_rate": 7.981235285100929e-05, "loss": 0.3788, "step": 3725 }, { "epoch": 0.3178638457601092, "grad_norm": 1.2591413030251557, "learning_rate": 7.980126079965747e-05, "loss": 0.287, "step": 3726 }, { "epoch": 0.31794915543422625, "grad_norm": 1.3592019683507057, "learning_rate": 7.979016647313774e-05, "loss": 0.2926, "step": 3727 }, { "epoch": 0.31803446510834327, "grad_norm": 1.4899066849744786, "learning_rate": 7.977906987229713e-05, "loss": 0.346, "step": 3728 }, { "epoch": 0.31811977478246034, "grad_norm": 1.4836069629757775, "learning_rate": 7.976797099798277e-05, "loss": 0.3413, "step": 3729 }, { "epoch": 0.31820508445657736, "grad_norm": 1.8704785826965291, "learning_rate": 7.9756869851042e-05, "loss": 0.33, "step": 3730 }, { "epoch": 0.31829039413069443, "grad_norm": 1.726954074834335, "learning_rate": 7.974576643232236e-05, "loss": 0.3487, "step": 3731 }, { "epoch": 0.31837570380481145, "grad_norm": 1.9179034251103277, "learning_rate": 7.97346607426715e-05, "loss": 0.3067, "step": 3732 }, { "epoch": 0.3184610134789285, "grad_norm": 1.3166005248747719, "learning_rate": 7.972355278293733e-05, "loss": 0.3144, "step": 3733 }, { "epoch": 0.31854632315304554, "grad_norm": 2.5752093850345488, "learning_rate": 7.971244255396784e-05, "loss": 0.2722, "step": 3734 }, { "epoch": 0.3186316328271626, "grad_norm": 1.312104074615648, "learning_rate": 7.970133005661125e-05, "loss": 0.309, "step": 3735 }, { "epoch": 0.31871694250127963, "grad_norm": 1.2785692315033275, "learning_rate": 7.969021529171595e-05, "loss": 0.2749, "step": 3736 }, { "epoch": 0.3188022521753967, "grad_norm": 1.4141594447841506, "learning_rate": 7.96790982601305e-05, "loss": 0.3229, "step": 3737 }, { "epoch": 0.3188875618495137, "grad_norm": 1.3648118660078932, "learning_rate": 7.966797896270358e-05, "loss": 0.3346, "step": 3738 }, { "epoch": 0.3189728715236308, "grad_norm": 1.7154640692047571, "learning_rate": 7.965685740028415e-05, "loss": 0.3169, "step": 3739 }, { "epoch": 0.3190581811977478, "grad_norm": 1.4178473470540967, "learning_rate": 7.964573357372123e-05, "loss": 0.3275, "step": 3740 }, { "epoch": 0.3191434908718649, "grad_norm": 1.3119913659683933, "learning_rate": 7.96346074838641e-05, "loss": 0.2892, "step": 3741 }, { "epoch": 0.3192288005459819, "grad_norm": 1.420412506175007, "learning_rate": 7.962347913156218e-05, "loss": 0.3042, "step": 3742 }, { "epoch": 0.319314110220099, "grad_norm": 1.5046010975971755, "learning_rate": 7.961234851766503e-05, "loss": 0.3017, "step": 3743 }, { "epoch": 0.319399419894216, "grad_norm": 1.533383498546036, "learning_rate": 7.960121564302243e-05, "loss": 0.3686, "step": 3744 }, { "epoch": 0.31948472956833307, "grad_norm": 1.8396506562519799, "learning_rate": 7.959008050848433e-05, "loss": 0.3827, "step": 3745 }, { "epoch": 0.3195700392424501, "grad_norm": 1.3220520297789906, "learning_rate": 7.957894311490082e-05, "loss": 0.3057, "step": 3746 }, { "epoch": 0.31965534891656716, "grad_norm": 1.4268328407398738, "learning_rate": 7.956780346312218e-05, "loss": 0.2464, "step": 3747 }, { "epoch": 0.3197406585906842, "grad_norm": 1.4316325480192946, "learning_rate": 7.955666155399886e-05, "loss": 0.3147, "step": 3748 }, { "epoch": 0.31982596826480125, "grad_norm": 1.5499245994440478, "learning_rate": 7.954551738838151e-05, "loss": 0.3407, "step": 3749 }, { "epoch": 0.31991127793891827, "grad_norm": 1.617373101092765, "learning_rate": 7.953437096712091e-05, "loss": 0.3853, "step": 3750 }, { "epoch": 0.31999658761303534, "grad_norm": 1.2834483569893234, "learning_rate": 7.952322229106803e-05, "loss": 0.2933, "step": 3751 }, { "epoch": 0.32008189728715236, "grad_norm": 1.332581079388846, "learning_rate": 7.951207136107401e-05, "loss": 0.3042, "step": 3752 }, { "epoch": 0.32016720696126943, "grad_norm": 1.3606490380227523, "learning_rate": 7.950091817799018e-05, "loss": 0.3202, "step": 3753 }, { "epoch": 0.32025251663538645, "grad_norm": 1.6621387806318466, "learning_rate": 7.9489762742668e-05, "loss": 0.3581, "step": 3754 }, { "epoch": 0.32033782630950347, "grad_norm": 1.299428517100528, "learning_rate": 7.947860505595915e-05, "loss": 0.3449, "step": 3755 }, { "epoch": 0.32042313598362054, "grad_norm": 1.690886032062863, "learning_rate": 7.946744511871545e-05, "loss": 0.3548, "step": 3756 }, { "epoch": 0.32050844565773756, "grad_norm": 1.5002305330508288, "learning_rate": 7.945628293178891e-05, "loss": 0.306, "step": 3757 }, { "epoch": 0.32059375533185464, "grad_norm": 1.4168870230939932, "learning_rate": 7.944511849603171e-05, "loss": 0.3126, "step": 3758 }, { "epoch": 0.32067906500597165, "grad_norm": 1.8251324801501059, "learning_rate": 7.943395181229616e-05, "loss": 0.3035, "step": 3759 }, { "epoch": 0.3207643746800887, "grad_norm": 1.5456031425710681, "learning_rate": 7.942278288143482e-05, "loss": 0.3074, "step": 3760 }, { "epoch": 0.32084968435420574, "grad_norm": 1.2349125621128598, "learning_rate": 7.941161170430036e-05, "loss": 0.2725, "step": 3761 }, { "epoch": 0.3209349940283228, "grad_norm": 1.3508290853243348, "learning_rate": 7.940043828174562e-05, "loss": 0.3079, "step": 3762 }, { "epoch": 0.32102030370243984, "grad_norm": 1.5464957186482093, "learning_rate": 7.938926261462366e-05, "loss": 0.2939, "step": 3763 }, { "epoch": 0.3211056133765569, "grad_norm": 1.7123002151564868, "learning_rate": 7.937808470378767e-05, "loss": 0.3452, "step": 3764 }, { "epoch": 0.3211909230506739, "grad_norm": 1.6447713137577962, "learning_rate": 7.936690455009104e-05, "loss": 0.3442, "step": 3765 }, { "epoch": 0.321276232724791, "grad_norm": 1.6833803258448907, "learning_rate": 7.93557221543873e-05, "loss": 0.3165, "step": 3766 }, { "epoch": 0.321361542398908, "grad_norm": 1.4557089852541805, "learning_rate": 7.934453751753017e-05, "loss": 0.3585, "step": 3767 }, { "epoch": 0.3214468520730251, "grad_norm": 1.8283407621948506, "learning_rate": 7.933335064037353e-05, "loss": 0.3291, "step": 3768 }, { "epoch": 0.3215321617471421, "grad_norm": 1.2470640034467464, "learning_rate": 7.932216152377146e-05, "loss": 0.3389, "step": 3769 }, { "epoch": 0.3216174714212592, "grad_norm": 1.5557000778061887, "learning_rate": 7.931097016857816e-05, "loss": 0.3521, "step": 3770 }, { "epoch": 0.3217027810953762, "grad_norm": 1.346340432702658, "learning_rate": 7.929977657564804e-05, "loss": 0.3159, "step": 3771 }, { "epoch": 0.3217880907694933, "grad_norm": 1.37986000511937, "learning_rate": 7.928858074583569e-05, "loss": 0.2655, "step": 3772 }, { "epoch": 0.3218734004436103, "grad_norm": 1.5617603840318615, "learning_rate": 7.927738267999584e-05, "loss": 0.3314, "step": 3773 }, { "epoch": 0.32195871011772736, "grad_norm": 1.2420328871754232, "learning_rate": 7.92661823789834e-05, "loss": 0.271, "step": 3774 }, { "epoch": 0.3220440197918444, "grad_norm": 1.2373939006289472, "learning_rate": 7.925497984365345e-05, "loss": 0.3304, "step": 3775 }, { "epoch": 0.32212932946596146, "grad_norm": 1.3775815138609921, "learning_rate": 7.924377507486127e-05, "loss": 0.2818, "step": 3776 }, { "epoch": 0.3222146391400785, "grad_norm": 1.3104243441520063, "learning_rate": 7.923256807346224e-05, "loss": 0.3456, "step": 3777 }, { "epoch": 0.32229994881419555, "grad_norm": 1.3712184229842794, "learning_rate": 7.922135884031199e-05, "loss": 0.3139, "step": 3778 }, { "epoch": 0.32238525848831256, "grad_norm": 1.4927046114477418, "learning_rate": 7.921014737626627e-05, "loss": 0.289, "step": 3779 }, { "epoch": 0.32247056816242964, "grad_norm": 1.2321289488202913, "learning_rate": 7.919893368218103e-05, "loss": 0.294, "step": 3780 }, { "epoch": 0.32255587783654666, "grad_norm": 1.4059372448163163, "learning_rate": 7.918771775891236e-05, "loss": 0.2636, "step": 3781 }, { "epoch": 0.32264118751066373, "grad_norm": 1.2829837698679227, "learning_rate": 7.917649960731655e-05, "loss": 0.2785, "step": 3782 }, { "epoch": 0.32272649718478075, "grad_norm": 2.1073685887682303, "learning_rate": 7.916527922825002e-05, "loss": 0.36, "step": 3783 }, { "epoch": 0.3228118068588978, "grad_norm": 1.7938305481161296, "learning_rate": 7.915405662256942e-05, "loss": 0.3744, "step": 3784 }, { "epoch": 0.32289711653301484, "grad_norm": 1.2793136945011936, "learning_rate": 7.91428317911315e-05, "loss": 0.2672, "step": 3785 }, { "epoch": 0.3229824262071319, "grad_norm": 1.7357292730255116, "learning_rate": 7.913160473479327e-05, "loss": 0.3313, "step": 3786 }, { "epoch": 0.32306773588124893, "grad_norm": 1.8492607908059127, "learning_rate": 7.912037545441182e-05, "loss": 0.2921, "step": 3787 }, { "epoch": 0.323153045555366, "grad_norm": 1.5170113426895915, "learning_rate": 7.910914395084443e-05, "loss": 0.2977, "step": 3788 }, { "epoch": 0.323238355229483, "grad_norm": 1.5420005368084597, "learning_rate": 7.909791022494859e-05, "loss": 0.3183, "step": 3789 }, { "epoch": 0.3233236649036001, "grad_norm": 1.6596543215274444, "learning_rate": 7.908667427758194e-05, "loss": 0.2752, "step": 3790 }, { "epoch": 0.3234089745777171, "grad_norm": 1.372108366631985, "learning_rate": 7.907543610960226e-05, "loss": 0.3232, "step": 3791 }, { "epoch": 0.3234942842518342, "grad_norm": 1.535201288004261, "learning_rate": 7.906419572186756e-05, "loss": 0.3196, "step": 3792 }, { "epoch": 0.3235795939259512, "grad_norm": 1.2998091814406254, "learning_rate": 7.905295311523595e-05, "loss": 0.2927, "step": 3793 }, { "epoch": 0.3236649036000682, "grad_norm": 1.436117336563859, "learning_rate": 7.904170829056577e-05, "loss": 0.298, "step": 3794 }, { "epoch": 0.3237502132741853, "grad_norm": 1.5818350211057353, "learning_rate": 7.903046124871547e-05, "loss": 0.3212, "step": 3795 }, { "epoch": 0.3238355229483023, "grad_norm": 1.8546375862481768, "learning_rate": 7.901921199054373e-05, "loss": 0.3316, "step": 3796 }, { "epoch": 0.3239208326224194, "grad_norm": 1.4302718839073052, "learning_rate": 7.900796051690937e-05, "loss": 0.3077, "step": 3797 }, { "epoch": 0.3240061422965364, "grad_norm": 1.5075588507101447, "learning_rate": 7.899670682867136e-05, "loss": 0.3201, "step": 3798 }, { "epoch": 0.3240914519706535, "grad_norm": 1.529612775308138, "learning_rate": 7.898545092668887e-05, "loss": 0.3324, "step": 3799 }, { "epoch": 0.3241767616447705, "grad_norm": 1.1540387634761629, "learning_rate": 7.897419281182124e-05, "loss": 0.2957, "step": 3800 }, { "epoch": 0.32426207131888757, "grad_norm": 1.5245002365437414, "learning_rate": 7.896293248492797e-05, "loss": 0.3153, "step": 3801 }, { "epoch": 0.3243473809930046, "grad_norm": 1.587161038473519, "learning_rate": 7.895166994686869e-05, "loss": 0.2764, "step": 3802 }, { "epoch": 0.32443269066712166, "grad_norm": 1.367895019566022, "learning_rate": 7.894040519850328e-05, "loss": 0.2769, "step": 3803 }, { "epoch": 0.3245180003412387, "grad_norm": 1.5674627456010066, "learning_rate": 7.89291382406917e-05, "loss": 0.3157, "step": 3804 }, { "epoch": 0.32460331001535575, "grad_norm": 1.3924149555418988, "learning_rate": 7.891786907429415e-05, "loss": 0.3023, "step": 3805 }, { "epoch": 0.32468861968947277, "grad_norm": 1.5839031683146827, "learning_rate": 7.890659770017097e-05, "loss": 0.31, "step": 3806 }, { "epoch": 0.32477392936358984, "grad_norm": 1.8207061734815446, "learning_rate": 7.889532411918267e-05, "loss": 0.3393, "step": 3807 }, { "epoch": 0.32485923903770686, "grad_norm": 1.3647031173354027, "learning_rate": 7.888404833218992e-05, "loss": 0.3289, "step": 3808 }, { "epoch": 0.32494454871182393, "grad_norm": 1.5820254478819142, "learning_rate": 7.887277034005356e-05, "loss": 0.2813, "step": 3809 }, { "epoch": 0.32502985838594095, "grad_norm": 1.4139985128014498, "learning_rate": 7.886149014363463e-05, "loss": 0.3076, "step": 3810 }, { "epoch": 0.325115168060058, "grad_norm": 1.6299088500579013, "learning_rate": 7.885020774379429e-05, "loss": 0.3186, "step": 3811 }, { "epoch": 0.32520047773417504, "grad_norm": 1.6871878017277597, "learning_rate": 7.88389231413939e-05, "loss": 0.3755, "step": 3812 }, { "epoch": 0.3252857874082921, "grad_norm": 1.225114733324955, "learning_rate": 7.882763633729497e-05, "loss": 0.2921, "step": 3813 }, { "epoch": 0.32537109708240913, "grad_norm": 1.144441256060886, "learning_rate": 7.88163473323592e-05, "loss": 0.2987, "step": 3814 }, { "epoch": 0.3254564067565262, "grad_norm": 1.4204506919114988, "learning_rate": 7.880505612744843e-05, "loss": 0.3217, "step": 3815 }, { "epoch": 0.3255417164306432, "grad_norm": 1.5318441989748375, "learning_rate": 7.879376272342472e-05, "loss": 0.3605, "step": 3816 }, { "epoch": 0.3256270261047603, "grad_norm": 1.4908316479000474, "learning_rate": 7.878246712115022e-05, "loss": 0.2789, "step": 3817 }, { "epoch": 0.3257123357788773, "grad_norm": 1.535297416428123, "learning_rate": 7.877116932148731e-05, "loss": 0.3185, "step": 3818 }, { "epoch": 0.3257976454529944, "grad_norm": 1.4490028076513244, "learning_rate": 7.875986932529852e-05, "loss": 0.3051, "step": 3819 }, { "epoch": 0.3258829551271114, "grad_norm": 1.7818088658643094, "learning_rate": 7.874856713344651e-05, "loss": 0.3301, "step": 3820 }, { "epoch": 0.3259682648012285, "grad_norm": 1.3929015823233672, "learning_rate": 7.87372627467942e-05, "loss": 0.3502, "step": 3821 }, { "epoch": 0.3260535744753455, "grad_norm": 1.3688998588845624, "learning_rate": 7.872595616620458e-05, "loss": 0.3437, "step": 3822 }, { "epoch": 0.32613888414946257, "grad_norm": 1.2385951946852136, "learning_rate": 7.871464739254084e-05, "loss": 0.2948, "step": 3823 }, { "epoch": 0.3262241938235796, "grad_norm": 1.6524111325928978, "learning_rate": 7.870333642666639e-05, "loss": 0.3372, "step": 3824 }, { "epoch": 0.32630950349769666, "grad_norm": 1.26611262217371, "learning_rate": 7.86920232694447e-05, "loss": 0.2872, "step": 3825 }, { "epoch": 0.3263948131718137, "grad_norm": 1.3513903646057024, "learning_rate": 7.868070792173952e-05, "loss": 0.245, "step": 3826 }, { "epoch": 0.32648012284593075, "grad_norm": 1.7149790230405135, "learning_rate": 7.86693903844147e-05, "loss": 0.3336, "step": 3827 }, { "epoch": 0.32656543252004777, "grad_norm": 1.077285882337616, "learning_rate": 7.865807065833428e-05, "loss": 0.2396, "step": 3828 }, { "epoch": 0.32665074219416484, "grad_norm": 1.5015155765057044, "learning_rate": 7.864674874436244e-05, "loss": 0.3272, "step": 3829 }, { "epoch": 0.32673605186828186, "grad_norm": 1.4458644328588268, "learning_rate": 7.863542464336356e-05, "loss": 0.2954, "step": 3830 }, { "epoch": 0.3268213615423989, "grad_norm": 1.6378617040080945, "learning_rate": 7.86240983562022e-05, "loss": 0.3106, "step": 3831 }, { "epoch": 0.32690667121651595, "grad_norm": 1.61736860505115, "learning_rate": 7.861276988374302e-05, "loss": 0.3075, "step": 3832 }, { "epoch": 0.32699198089063297, "grad_norm": 1.6871673821318351, "learning_rate": 7.860143922685091e-05, "loss": 0.3401, "step": 3833 }, { "epoch": 0.32707729056475005, "grad_norm": 1.7040561615325664, "learning_rate": 7.85901063863909e-05, "loss": 0.3038, "step": 3834 }, { "epoch": 0.32716260023886706, "grad_norm": 1.3256987906299769, "learning_rate": 7.85787713632282e-05, "loss": 0.2649, "step": 3835 }, { "epoch": 0.32724790991298414, "grad_norm": 1.5276569328294873, "learning_rate": 7.856743415822816e-05, "loss": 0.3338, "step": 3836 }, { "epoch": 0.32733321958710115, "grad_norm": 1.6490282202550415, "learning_rate": 7.855609477225635e-05, "loss": 0.3409, "step": 3837 }, { "epoch": 0.3274185292612182, "grad_norm": 1.422271002654795, "learning_rate": 7.854475320617844e-05, "loss": 0.3541, "step": 3838 }, { "epoch": 0.32750383893533525, "grad_norm": 1.4235306207909502, "learning_rate": 7.853340946086032e-05, "loss": 0.2911, "step": 3839 }, { "epoch": 0.3275891486094523, "grad_norm": 1.3515122244419997, "learning_rate": 7.8522063537168e-05, "loss": 0.3569, "step": 3840 }, { "epoch": 0.32767445828356934, "grad_norm": 1.4840514784158867, "learning_rate": 7.851071543596769e-05, "loss": 0.312, "step": 3841 }, { "epoch": 0.3277597679576864, "grad_norm": 1.4020652388169839, "learning_rate": 7.849936515812578e-05, "loss": 0.3064, "step": 3842 }, { "epoch": 0.3278450776318034, "grad_norm": 1.67803050082361, "learning_rate": 7.848801270450879e-05, "loss": 0.3372, "step": 3843 }, { "epoch": 0.3279303873059205, "grad_norm": 1.4462945849141275, "learning_rate": 7.84766580759834e-05, "loss": 0.3206, "step": 3844 }, { "epoch": 0.3280156969800375, "grad_norm": 1.8351356865716122, "learning_rate": 7.84653012734165e-05, "loss": 0.3117, "step": 3845 }, { "epoch": 0.3281010066541546, "grad_norm": 1.200155631543102, "learning_rate": 7.845394229767509e-05, "loss": 0.3081, "step": 3846 }, { "epoch": 0.3281863163282716, "grad_norm": 1.296890350618629, "learning_rate": 7.844258114962642e-05, "loss": 0.3234, "step": 3847 }, { "epoch": 0.3282716260023887, "grad_norm": 1.3451189121164937, "learning_rate": 7.84312178301378e-05, "loss": 0.3205, "step": 3848 }, { "epoch": 0.3283569356765057, "grad_norm": 1.2196082492251616, "learning_rate": 7.84198523400768e-05, "loss": 0.3331, "step": 3849 }, { "epoch": 0.3284422453506228, "grad_norm": 1.6531756676334852, "learning_rate": 7.840848468031108e-05, "loss": 0.2929, "step": 3850 }, { "epoch": 0.3285275550247398, "grad_norm": 1.3540334000179826, "learning_rate": 7.839711485170854e-05, "loss": 0.2845, "step": 3851 }, { "epoch": 0.32861286469885687, "grad_norm": 1.2528961962106053, "learning_rate": 7.838574285513716e-05, "loss": 0.3352, "step": 3852 }, { "epoch": 0.3286981743729739, "grad_norm": 1.155394366425496, "learning_rate": 7.837436869146517e-05, "loss": 0.2776, "step": 3853 }, { "epoch": 0.32878348404709096, "grad_norm": 1.7033774512537787, "learning_rate": 7.836299236156089e-05, "loss": 0.3202, "step": 3854 }, { "epoch": 0.328868793721208, "grad_norm": 1.5285649015544482, "learning_rate": 7.835161386629288e-05, "loss": 0.3652, "step": 3855 }, { "epoch": 0.32895410339532505, "grad_norm": 1.456315424681467, "learning_rate": 7.834023320652981e-05, "loss": 0.324, "step": 3856 }, { "epoch": 0.32903941306944207, "grad_norm": 1.3320299970783147, "learning_rate": 7.832885038314053e-05, "loss": 0.2865, "step": 3857 }, { "epoch": 0.32912472274355914, "grad_norm": 1.591642744407166, "learning_rate": 7.831746539699407e-05, "loss": 0.3446, "step": 3858 }, { "epoch": 0.32921003241767616, "grad_norm": 1.4790171247655421, "learning_rate": 7.830607824895962e-05, "loss": 0.2956, "step": 3859 }, { "epoch": 0.32929534209179323, "grad_norm": 1.3273717651643897, "learning_rate": 7.829468893990649e-05, "loss": 0.3294, "step": 3860 }, { "epoch": 0.32938065176591025, "grad_norm": 1.4874832537053018, "learning_rate": 7.828329747070422e-05, "loss": 0.3096, "step": 3861 }, { "epoch": 0.3294659614400273, "grad_norm": 1.5186233750291533, "learning_rate": 7.82719038422225e-05, "loss": 0.3679, "step": 3862 }, { "epoch": 0.32955127111414434, "grad_norm": 1.5663936167371635, "learning_rate": 7.826050805533114e-05, "loss": 0.3291, "step": 3863 }, { "epoch": 0.3296365807882614, "grad_norm": 1.341058075141567, "learning_rate": 7.824911011090016e-05, "loss": 0.292, "step": 3864 }, { "epoch": 0.32972189046237843, "grad_norm": 1.4098199247798977, "learning_rate": 7.823771000979976e-05, "loss": 0.3069, "step": 3865 }, { "epoch": 0.3298072001364955, "grad_norm": 1.6752254391309909, "learning_rate": 7.822630775290025e-05, "loss": 0.3782, "step": 3866 }, { "epoch": 0.3298925098106125, "grad_norm": 1.371197905754974, "learning_rate": 7.821490334107216e-05, "loss": 0.2745, "step": 3867 }, { "epoch": 0.3299778194847296, "grad_norm": 1.5806230470668663, "learning_rate": 7.820349677518609e-05, "loss": 0.3524, "step": 3868 }, { "epoch": 0.3300631291588466, "grad_norm": 1.9117623346408965, "learning_rate": 7.819208805611294e-05, "loss": 0.3519, "step": 3869 }, { "epoch": 0.33014843883296363, "grad_norm": 1.3528724011760425, "learning_rate": 7.818067718472366e-05, "loss": 0.327, "step": 3870 }, { "epoch": 0.3302337485070807, "grad_norm": 1.377037705874116, "learning_rate": 7.816926416188946e-05, "loss": 0.2895, "step": 3871 }, { "epoch": 0.3303190581811977, "grad_norm": 1.657863335005311, "learning_rate": 7.815784898848163e-05, "loss": 0.3045, "step": 3872 }, { "epoch": 0.3304043678553148, "grad_norm": 1.307800781489184, "learning_rate": 7.814643166537163e-05, "loss": 0.2944, "step": 3873 }, { "epoch": 0.3304896775294318, "grad_norm": 1.6460861140700027, "learning_rate": 7.813501219343116e-05, "loss": 0.3342, "step": 3874 }, { "epoch": 0.3305749872035489, "grad_norm": 1.6215415979414065, "learning_rate": 7.812359057353201e-05, "loss": 0.3644, "step": 3875 }, { "epoch": 0.3306602968776659, "grad_norm": 1.4768473032623652, "learning_rate": 7.811216680654618e-05, "loss": 0.316, "step": 3876 }, { "epoch": 0.330745606551783, "grad_norm": 1.4916529471904851, "learning_rate": 7.810074089334581e-05, "loss": 0.3022, "step": 3877 }, { "epoch": 0.3308309162259, "grad_norm": 1.8200598283512759, "learning_rate": 7.808931283480316e-05, "loss": 0.3415, "step": 3878 }, { "epoch": 0.33091622590001707, "grad_norm": 1.419325838568809, "learning_rate": 7.807788263179078e-05, "loss": 0.3385, "step": 3879 }, { "epoch": 0.3310015355741341, "grad_norm": 1.4392432547671787, "learning_rate": 7.806645028518125e-05, "loss": 0.2819, "step": 3880 }, { "epoch": 0.33108684524825116, "grad_norm": 1.3756981724399662, "learning_rate": 7.80550157958474e-05, "loss": 0.264, "step": 3881 }, { "epoch": 0.3311721549223682, "grad_norm": 1.5679884836640077, "learning_rate": 7.804357916466216e-05, "loss": 0.2922, "step": 3882 }, { "epoch": 0.33125746459648525, "grad_norm": 1.3173560273407474, "learning_rate": 7.80321403924987e-05, "loss": 0.3424, "step": 3883 }, { "epoch": 0.33134277427060227, "grad_norm": 1.6359010533860923, "learning_rate": 7.802069948023025e-05, "loss": 0.3162, "step": 3884 }, { "epoch": 0.33142808394471934, "grad_norm": 1.5128935751084716, "learning_rate": 7.800925642873032e-05, "loss": 0.3031, "step": 3885 }, { "epoch": 0.33151339361883636, "grad_norm": 1.4274254920073917, "learning_rate": 7.799781123887248e-05, "loss": 0.3228, "step": 3886 }, { "epoch": 0.33159870329295343, "grad_norm": 1.221517209013403, "learning_rate": 7.798636391153056e-05, "loss": 0.2817, "step": 3887 }, { "epoch": 0.33168401296707045, "grad_norm": 1.374694356760319, "learning_rate": 7.797491444757847e-05, "loss": 0.3186, "step": 3888 }, { "epoch": 0.3317693226411875, "grad_norm": 1.7141356516053483, "learning_rate": 7.796346284789032e-05, "loss": 0.3307, "step": 3889 }, { "epoch": 0.33185463231530454, "grad_norm": 1.717576128897983, "learning_rate": 7.795200911334036e-05, "loss": 0.3578, "step": 3890 }, { "epoch": 0.3319399419894216, "grad_norm": 1.357723767856874, "learning_rate": 7.794055324480305e-05, "loss": 0.3463, "step": 3891 }, { "epoch": 0.33202525166353863, "grad_norm": 1.4858903879424816, "learning_rate": 7.792909524315298e-05, "loss": 0.3108, "step": 3892 }, { "epoch": 0.3321105613376557, "grad_norm": 1.4910848647948867, "learning_rate": 7.791763510926491e-05, "loss": 0.3626, "step": 3893 }, { "epoch": 0.3321958710117727, "grad_norm": 1.4624639685993273, "learning_rate": 7.790617284401374e-05, "loss": 0.329, "step": 3894 }, { "epoch": 0.3322811806858898, "grad_norm": 1.4378565470484657, "learning_rate": 7.789470844827458e-05, "loss": 0.3912, "step": 3895 }, { "epoch": 0.3323664903600068, "grad_norm": 1.2656727534574663, "learning_rate": 7.788324192292265e-05, "loss": 0.2707, "step": 3896 }, { "epoch": 0.3324518000341239, "grad_norm": 1.274390864252462, "learning_rate": 7.787177326883336e-05, "loss": 0.2841, "step": 3897 }, { "epoch": 0.3325371097082409, "grad_norm": 1.6385703163366503, "learning_rate": 7.786030248688232e-05, "loss": 0.3666, "step": 3898 }, { "epoch": 0.332622419382358, "grad_norm": 1.5431064978811737, "learning_rate": 7.784882957794522e-05, "loss": 0.3153, "step": 3899 }, { "epoch": 0.332707729056475, "grad_norm": 1.3081294360497975, "learning_rate": 7.783735454289798e-05, "loss": 0.3288, "step": 3900 }, { "epoch": 0.3327930387305921, "grad_norm": 1.3004968464016398, "learning_rate": 7.782587738261664e-05, "loss": 0.3005, "step": 3901 }, { "epoch": 0.3328783484047091, "grad_norm": 1.289547545758673, "learning_rate": 7.781439809797743e-05, "loss": 0.3066, "step": 3902 }, { "epoch": 0.33296365807882616, "grad_norm": 1.2751034563763408, "learning_rate": 7.780291668985672e-05, "loss": 0.2816, "step": 3903 }, { "epoch": 0.3330489677529432, "grad_norm": 1.5050924007907995, "learning_rate": 7.779143315913108e-05, "loss": 0.3331, "step": 3904 }, { "epoch": 0.33313427742706025, "grad_norm": 1.5260799086360861, "learning_rate": 7.777994750667719e-05, "loss": 0.3318, "step": 3905 }, { "epoch": 0.3332195871011773, "grad_norm": 1.5667362008321781, "learning_rate": 7.776845973337194e-05, "loss": 0.2851, "step": 3906 }, { "epoch": 0.33330489677529435, "grad_norm": 1.3730614608489142, "learning_rate": 7.775696984009236e-05, "loss": 0.2817, "step": 3907 }, { "epoch": 0.33339020644941136, "grad_norm": 1.6359909733847366, "learning_rate": 7.774547782771562e-05, "loss": 0.3526, "step": 3908 }, { "epoch": 0.3334755161235284, "grad_norm": 1.5374305709421578, "learning_rate": 7.773398369711908e-05, "loss": 0.2934, "step": 3909 }, { "epoch": 0.33356082579764545, "grad_norm": 1.7952009280921455, "learning_rate": 7.772248744918028e-05, "loss": 0.3123, "step": 3910 }, { "epoch": 0.3336461354717625, "grad_norm": 1.677343605016623, "learning_rate": 7.771098908477686e-05, "loss": 0.276, "step": 3911 }, { "epoch": 0.33373144514587955, "grad_norm": 1.6946688366227523, "learning_rate": 7.769948860478669e-05, "loss": 0.3086, "step": 3912 }, { "epoch": 0.33381675481999656, "grad_norm": 1.3611021798461471, "learning_rate": 7.768798601008776e-05, "loss": 0.289, "step": 3913 }, { "epoch": 0.33390206449411364, "grad_norm": 1.557445284646221, "learning_rate": 7.767648130155824e-05, "loss": 0.3428, "step": 3914 }, { "epoch": 0.33398737416823066, "grad_norm": 1.365411356749855, "learning_rate": 7.766497448007643e-05, "loss": 0.2924, "step": 3915 }, { "epoch": 0.33407268384234773, "grad_norm": 1.53315903910045, "learning_rate": 7.765346554652085e-05, "loss": 0.3562, "step": 3916 }, { "epoch": 0.33415799351646475, "grad_norm": 1.411693802039434, "learning_rate": 7.764195450177011e-05, "loss": 0.3196, "step": 3917 }, { "epoch": 0.3342433031905818, "grad_norm": 1.3387046843338153, "learning_rate": 7.763044134670303e-05, "loss": 0.3203, "step": 3918 }, { "epoch": 0.33432861286469884, "grad_norm": 1.6847990107440127, "learning_rate": 7.761892608219859e-05, "loss": 0.378, "step": 3919 }, { "epoch": 0.3344139225388159, "grad_norm": 1.6518413961444232, "learning_rate": 7.760740870913592e-05, "loss": 0.3585, "step": 3920 }, { "epoch": 0.33449923221293293, "grad_norm": 1.5476226541106852, "learning_rate": 7.759588922839427e-05, "loss": 0.3366, "step": 3921 }, { "epoch": 0.33458454188705, "grad_norm": 1.5953782683592101, "learning_rate": 7.758436764085315e-05, "loss": 0.3531, "step": 3922 }, { "epoch": 0.334669851561167, "grad_norm": 1.4659691315090346, "learning_rate": 7.757284394739212e-05, "loss": 0.3, "step": 3923 }, { "epoch": 0.3347551612352841, "grad_norm": 1.471764575355378, "learning_rate": 7.7561318148891e-05, "loss": 0.3562, "step": 3924 }, { "epoch": 0.3348404709094011, "grad_norm": 1.6986919419540636, "learning_rate": 7.75497902462297e-05, "loss": 0.2874, "step": 3925 }, { "epoch": 0.3349257805835182, "grad_norm": 1.6562345971884758, "learning_rate": 7.753826024028829e-05, "loss": 0.2983, "step": 3926 }, { "epoch": 0.3350110902576352, "grad_norm": 1.607939144444319, "learning_rate": 7.752672813194707e-05, "loss": 0.3025, "step": 3927 }, { "epoch": 0.3350963999317523, "grad_norm": 1.5438897197059671, "learning_rate": 7.751519392208644e-05, "loss": 0.3675, "step": 3928 }, { "epoch": 0.3351817096058693, "grad_norm": 1.352227989808941, "learning_rate": 7.750365761158695e-05, "loss": 0.2955, "step": 3929 }, { "epoch": 0.33526701927998637, "grad_norm": 1.2724110966111923, "learning_rate": 7.749211920132937e-05, "loss": 0.3622, "step": 3930 }, { "epoch": 0.3353523289541034, "grad_norm": 1.6049131002698322, "learning_rate": 7.748057869219456e-05, "loss": 0.2959, "step": 3931 }, { "epoch": 0.33543763862822046, "grad_norm": 1.9325837551191765, "learning_rate": 7.746903608506362e-05, "loss": 0.3679, "step": 3932 }, { "epoch": 0.3355229483023375, "grad_norm": 1.432186756337768, "learning_rate": 7.745749138081775e-05, "loss": 0.3093, "step": 3933 }, { "epoch": 0.33560825797645455, "grad_norm": 1.3338408050801318, "learning_rate": 7.74459445803383e-05, "loss": 0.2704, "step": 3934 }, { "epoch": 0.33569356765057157, "grad_norm": 1.3454729168426134, "learning_rate": 7.743439568450684e-05, "loss": 0.2911, "step": 3935 }, { "epoch": 0.33577887732468864, "grad_norm": 1.4344695947242927, "learning_rate": 7.742284469420505e-05, "loss": 0.3275, "step": 3936 }, { "epoch": 0.33586418699880566, "grad_norm": 1.5104039509555898, "learning_rate": 7.74112916103148e-05, "loss": 0.3271, "step": 3937 }, { "epoch": 0.33594949667292273, "grad_norm": 1.4183296185783634, "learning_rate": 7.739973643371809e-05, "loss": 0.3427, "step": 3938 }, { "epoch": 0.33603480634703975, "grad_norm": 1.492967172219617, "learning_rate": 7.738817916529713e-05, "loss": 0.3251, "step": 3939 }, { "epoch": 0.3361201160211568, "grad_norm": 1.6590168711958155, "learning_rate": 7.73766198059342e-05, "loss": 0.3637, "step": 3940 }, { "epoch": 0.33620542569527384, "grad_norm": 1.400285405631338, "learning_rate": 7.736505835651186e-05, "loss": 0.3194, "step": 3941 }, { "epoch": 0.3362907353693909, "grad_norm": 1.60398554914819, "learning_rate": 7.73534948179127e-05, "loss": 0.3217, "step": 3942 }, { "epoch": 0.33637604504350793, "grad_norm": 1.4053814218902507, "learning_rate": 7.734192919101958e-05, "loss": 0.271, "step": 3943 }, { "epoch": 0.336461354717625, "grad_norm": 1.4927330417899625, "learning_rate": 7.733036147671546e-05, "loss": 0.3725, "step": 3944 }, { "epoch": 0.336546664391742, "grad_norm": 1.1128744888073114, "learning_rate": 7.731879167588347e-05, "loss": 0.258, "step": 3945 }, { "epoch": 0.33663197406585904, "grad_norm": 1.6112674586117453, "learning_rate": 7.730721978940693e-05, "loss": 0.3299, "step": 3946 }, { "epoch": 0.3367172837399761, "grad_norm": 1.6447619641041809, "learning_rate": 7.729564581816923e-05, "loss": 0.3155, "step": 3947 }, { "epoch": 0.33680259341409313, "grad_norm": 1.4494298076918168, "learning_rate": 7.728406976305406e-05, "loss": 0.2965, "step": 3948 }, { "epoch": 0.3368879030882102, "grad_norm": 1.4890882651950985, "learning_rate": 7.727249162494513e-05, "loss": 0.3036, "step": 3949 }, { "epoch": 0.3369732127623272, "grad_norm": 1.5045267781809095, "learning_rate": 7.72609114047264e-05, "loss": 0.3369, "step": 3950 }, { "epoch": 0.3370585224364443, "grad_norm": 1.3385818366567765, "learning_rate": 7.724932910328194e-05, "loss": 0.2996, "step": 3951 }, { "epoch": 0.3371438321105613, "grad_norm": 1.457377783753285, "learning_rate": 7.723774472149601e-05, "loss": 0.3122, "step": 3952 }, { "epoch": 0.3372291417846784, "grad_norm": 1.3611762291821599, "learning_rate": 7.722615826025302e-05, "loss": 0.3144, "step": 3953 }, { "epoch": 0.3373144514587954, "grad_norm": 1.3323294565877535, "learning_rate": 7.72145697204375e-05, "loss": 0.3074, "step": 3954 }, { "epoch": 0.3373997611329125, "grad_norm": 1.5642561389667895, "learning_rate": 7.720297910293421e-05, "loss": 0.3361, "step": 3955 }, { "epoch": 0.3374850708070295, "grad_norm": 1.3707394481769732, "learning_rate": 7.719138640862804e-05, "loss": 0.3345, "step": 3956 }, { "epoch": 0.33757038048114657, "grad_norm": 1.5612302580074684, "learning_rate": 7.717979163840401e-05, "loss": 0.3191, "step": 3957 }, { "epoch": 0.3376556901552636, "grad_norm": 1.4997167319811038, "learning_rate": 7.71681947931473e-05, "loss": 0.2478, "step": 3958 }, { "epoch": 0.33774099982938066, "grad_norm": 1.9504935887008161, "learning_rate": 7.715659587374331e-05, "loss": 0.3124, "step": 3959 }, { "epoch": 0.3378263095034977, "grad_norm": 1.7537529077594167, "learning_rate": 7.714499488107751e-05, "loss": 0.3739, "step": 3960 }, { "epoch": 0.33791161917761475, "grad_norm": 1.7208946026231247, "learning_rate": 7.713339181603563e-05, "loss": 0.3229, "step": 3961 }, { "epoch": 0.33799692885173177, "grad_norm": 1.7871791471608633, "learning_rate": 7.712178667950346e-05, "loss": 0.308, "step": 3962 }, { "epoch": 0.33808223852584884, "grad_norm": 1.4734490079622509, "learning_rate": 7.711017947236699e-05, "loss": 0.2898, "step": 3963 }, { "epoch": 0.33816754819996586, "grad_norm": 1.4938563257147763, "learning_rate": 7.70985701955124e-05, "loss": 0.3183, "step": 3964 }, { "epoch": 0.33825285787408294, "grad_norm": 1.4020523576115806, "learning_rate": 7.708695884982597e-05, "loss": 0.3094, "step": 3965 }, { "epoch": 0.33833816754819995, "grad_norm": 1.546255507833282, "learning_rate": 7.707534543619417e-05, "loss": 0.3206, "step": 3966 }, { "epoch": 0.338423477222317, "grad_norm": 1.3295798019383003, "learning_rate": 7.706372995550361e-05, "loss": 0.3174, "step": 3967 }, { "epoch": 0.33850878689643404, "grad_norm": 1.1849542736132055, "learning_rate": 7.70521124086411e-05, "loss": 0.4057, "step": 3968 }, { "epoch": 0.3385940965705511, "grad_norm": 1.474100067908597, "learning_rate": 7.704049279649357e-05, "loss": 0.3148, "step": 3969 }, { "epoch": 0.33867940624466814, "grad_norm": 1.2666399147738971, "learning_rate": 7.70288711199481e-05, "loss": 0.2407, "step": 3970 }, { "epoch": 0.3387647159187852, "grad_norm": 1.4943570647206421, "learning_rate": 7.701724737989196e-05, "loss": 0.3593, "step": 3971 }, { "epoch": 0.3388500255929022, "grad_norm": 1.5874392159907516, "learning_rate": 7.700562157721254e-05, "loss": 0.323, "step": 3972 }, { "epoch": 0.3389353352670193, "grad_norm": 1.6138863894905935, "learning_rate": 7.69939937127974e-05, "loss": 0.2704, "step": 3973 }, { "epoch": 0.3390206449411363, "grad_norm": 1.6441049634927343, "learning_rate": 7.698236378753432e-05, "loss": 0.3605, "step": 3974 }, { "epoch": 0.3391059546152534, "grad_norm": 1.3948229743723883, "learning_rate": 7.697073180231114e-05, "loss": 0.306, "step": 3975 }, { "epoch": 0.3391912642893704, "grad_norm": 1.62369426205817, "learning_rate": 7.69590977580159e-05, "loss": 0.2803, "step": 3976 }, { "epoch": 0.3392765739634875, "grad_norm": 1.4747822034998963, "learning_rate": 7.694746165553682e-05, "loss": 0.3716, "step": 3977 }, { "epoch": 0.3393618836376045, "grad_norm": 1.66666735013312, "learning_rate": 7.693582349576223e-05, "loss": 0.3849, "step": 3978 }, { "epoch": 0.3394471933117216, "grad_norm": 1.9768652401398887, "learning_rate": 7.692418327958066e-05, "loss": 0.3125, "step": 3979 }, { "epoch": 0.3395325029858386, "grad_norm": 1.6253554615635086, "learning_rate": 7.691254100788077e-05, "loss": 0.278, "step": 3980 }, { "epoch": 0.33961781265995566, "grad_norm": 1.702115030666688, "learning_rate": 7.690089668155138e-05, "loss": 0.3687, "step": 3981 }, { "epoch": 0.3397031223340727, "grad_norm": 1.3544420646959163, "learning_rate": 7.688925030148149e-05, "loss": 0.313, "step": 3982 }, { "epoch": 0.33978843200818976, "grad_norm": 1.455446747036618, "learning_rate": 7.687760186856023e-05, "loss": 0.3375, "step": 3983 }, { "epoch": 0.3398737416823068, "grad_norm": 1.591502980441875, "learning_rate": 7.686595138367688e-05, "loss": 0.2941, "step": 3984 }, { "epoch": 0.3399590513564238, "grad_norm": 1.1763321102561513, "learning_rate": 7.685429884772092e-05, "loss": 0.3086, "step": 3985 }, { "epoch": 0.34004436103054086, "grad_norm": 1.5322828896394542, "learning_rate": 7.684264426158194e-05, "loss": 0.355, "step": 3986 }, { "epoch": 0.3401296707046579, "grad_norm": 1.3084801012478688, "learning_rate": 7.683098762614971e-05, "loss": 0.269, "step": 3987 }, { "epoch": 0.34021498037877496, "grad_norm": 1.6098025596582188, "learning_rate": 7.681932894231417e-05, "loss": 0.2988, "step": 3988 }, { "epoch": 0.340300290052892, "grad_norm": 1.3135492581242154, "learning_rate": 7.680766821096537e-05, "loss": 0.2996, "step": 3989 }, { "epoch": 0.34038559972700905, "grad_norm": 1.4024991345190414, "learning_rate": 7.679600543299356e-05, "loss": 0.3235, "step": 3990 }, { "epoch": 0.34047090940112607, "grad_norm": 1.6492285977082677, "learning_rate": 7.678434060928913e-05, "loss": 0.3595, "step": 3991 }, { "epoch": 0.34055621907524314, "grad_norm": 1.7840003480440494, "learning_rate": 7.677267374074262e-05, "loss": 0.3966, "step": 3992 }, { "epoch": 0.34064152874936016, "grad_norm": 1.8038688222935177, "learning_rate": 7.676100482824476e-05, "loss": 0.3421, "step": 3993 }, { "epoch": 0.34072683842347723, "grad_norm": 1.4072793372249686, "learning_rate": 7.674933387268637e-05, "loss": 0.2656, "step": 3994 }, { "epoch": 0.34081214809759425, "grad_norm": 1.474861254874631, "learning_rate": 7.673766087495848e-05, "loss": 0.3499, "step": 3995 }, { "epoch": 0.3408974577717113, "grad_norm": 1.4108494457594876, "learning_rate": 7.672598583595227e-05, "loss": 0.3364, "step": 3996 }, { "epoch": 0.34098276744582834, "grad_norm": 1.3219315573134145, "learning_rate": 7.671430875655907e-05, "loss": 0.2923, "step": 3997 }, { "epoch": 0.3410680771199454, "grad_norm": 1.3971494144853618, "learning_rate": 7.670262963767037e-05, "loss": 0.3407, "step": 3998 }, { "epoch": 0.34115338679406243, "grad_norm": 1.579002967073094, "learning_rate": 7.669094848017777e-05, "loss": 0.329, "step": 3999 }, { "epoch": 0.3412386964681795, "grad_norm": 1.660576693083846, "learning_rate": 7.667926528497311e-05, "loss": 0.3666, "step": 4000 }, { "epoch": 0.3413240061422965, "grad_norm": 1.352765027942841, "learning_rate": 7.66675800529483e-05, "loss": 0.2804, "step": 4001 }, { "epoch": 0.3414093158164136, "grad_norm": 1.5221032192804937, "learning_rate": 7.665589278499547e-05, "loss": 0.3208, "step": 4002 }, { "epoch": 0.3414946254905306, "grad_norm": 1.5937215765120147, "learning_rate": 7.664420348200689e-05, "loss": 0.3247, "step": 4003 }, { "epoch": 0.3415799351646477, "grad_norm": 1.2340778041343683, "learning_rate": 7.663251214487495e-05, "loss": 0.3269, "step": 4004 }, { "epoch": 0.3416652448387647, "grad_norm": 1.4674362637977554, "learning_rate": 7.662081877449221e-05, "loss": 0.3312, "step": 4005 }, { "epoch": 0.3417505545128818, "grad_norm": 1.3932202585476263, "learning_rate": 7.660912337175145e-05, "loss": 0.3091, "step": 4006 }, { "epoch": 0.3418358641869988, "grad_norm": 1.6014140734861553, "learning_rate": 7.659742593754551e-05, "loss": 0.2796, "step": 4007 }, { "epoch": 0.34192117386111587, "grad_norm": 1.4357149402044846, "learning_rate": 7.658572647276744e-05, "loss": 0.3127, "step": 4008 }, { "epoch": 0.3420064835352329, "grad_norm": 1.3885132260377775, "learning_rate": 7.657402497831044e-05, "loss": 0.3471, "step": 4009 }, { "epoch": 0.34209179320934996, "grad_norm": 1.4325739172061165, "learning_rate": 7.656232145506784e-05, "loss": 0.2874, "step": 4010 }, { "epoch": 0.342177102883467, "grad_norm": 1.4979959451903202, "learning_rate": 7.655061590393314e-05, "loss": 0.3023, "step": 4011 }, { "epoch": 0.34226241255758405, "grad_norm": 1.4185688020722746, "learning_rate": 7.653890832580002e-05, "loss": 0.3358, "step": 4012 }, { "epoch": 0.34234772223170107, "grad_norm": 1.2603940830361797, "learning_rate": 7.652719872156226e-05, "loss": 0.3126, "step": 4013 }, { "epoch": 0.34243303190581814, "grad_norm": 1.4568494422533163, "learning_rate": 7.651548709211384e-05, "loss": 0.3401, "step": 4014 }, { "epoch": 0.34251834157993516, "grad_norm": 1.5095048805099793, "learning_rate": 7.650377343834891e-05, "loss": 0.3185, "step": 4015 }, { "epoch": 0.34260365125405223, "grad_norm": 1.2824448387374394, "learning_rate": 7.64920577611617e-05, "loss": 0.3207, "step": 4016 }, { "epoch": 0.34268896092816925, "grad_norm": 1.4098760691078205, "learning_rate": 7.648034006144667e-05, "loss": 0.2471, "step": 4017 }, { "epoch": 0.3427742706022863, "grad_norm": 1.9111008132181517, "learning_rate": 7.646862034009837e-05, "loss": 0.3188, "step": 4018 }, { "epoch": 0.34285958027640334, "grad_norm": 1.322348114550975, "learning_rate": 7.645689859801157e-05, "loss": 0.2975, "step": 4019 }, { "epoch": 0.3429448899505204, "grad_norm": 1.3778233152515755, "learning_rate": 7.644517483608116e-05, "loss": 0.3052, "step": 4020 }, { "epoch": 0.34303019962463743, "grad_norm": 1.7301114576741412, "learning_rate": 7.64334490552022e-05, "loss": 0.2964, "step": 4021 }, { "epoch": 0.3431155092987545, "grad_norm": 1.58577002030938, "learning_rate": 7.642172125626986e-05, "loss": 0.304, "step": 4022 }, { "epoch": 0.3432008189728715, "grad_norm": 1.5041485479783492, "learning_rate": 7.64099914401795e-05, "loss": 0.3027, "step": 4023 }, { "epoch": 0.34328612864698854, "grad_norm": 1.5131414456204502, "learning_rate": 7.639825960782663e-05, "loss": 0.317, "step": 4024 }, { "epoch": 0.3433714383211056, "grad_norm": 1.3815066534980434, "learning_rate": 7.638652576010692e-05, "loss": 0.3254, "step": 4025 }, { "epoch": 0.34345674799522263, "grad_norm": 1.6041499380585325, "learning_rate": 7.637478989791618e-05, "loss": 0.3953, "step": 4026 }, { "epoch": 0.3435420576693397, "grad_norm": 1.8407583372980814, "learning_rate": 7.636305202215041e-05, "loss": 0.3873, "step": 4027 }, { "epoch": 0.3436273673434567, "grad_norm": 1.3562832454702665, "learning_rate": 7.63513121337057e-05, "loss": 0.2829, "step": 4028 }, { "epoch": 0.3437126770175738, "grad_norm": 1.535618836107564, "learning_rate": 7.633957023347833e-05, "loss": 0.3287, "step": 4029 }, { "epoch": 0.3437979866916908, "grad_norm": 1.8178830941762445, "learning_rate": 7.632782632236474e-05, "loss": 0.2884, "step": 4030 }, { "epoch": 0.3438832963658079, "grad_norm": 1.4759917594245437, "learning_rate": 7.631608040126154e-05, "loss": 0.274, "step": 4031 }, { "epoch": 0.3439686060399249, "grad_norm": 1.695344704880243, "learning_rate": 7.630433247106543e-05, "loss": 0.3329, "step": 4032 }, { "epoch": 0.344053915714042, "grad_norm": 1.3150633349233105, "learning_rate": 7.629258253267332e-05, "loss": 0.3568, "step": 4033 }, { "epoch": 0.344139225388159, "grad_norm": 1.5845326181595236, "learning_rate": 7.628083058698226e-05, "loss": 0.3103, "step": 4034 }, { "epoch": 0.34422453506227607, "grad_norm": 1.2611267779735986, "learning_rate": 7.626907663488943e-05, "loss": 0.2963, "step": 4035 }, { "epoch": 0.3443098447363931, "grad_norm": 1.8803749609436669, "learning_rate": 7.625732067729219e-05, "loss": 0.3988, "step": 4036 }, { "epoch": 0.34439515441051016, "grad_norm": 1.390685990956992, "learning_rate": 7.624556271508805e-05, "loss": 0.2852, "step": 4037 }, { "epoch": 0.3444804640846272, "grad_norm": 1.3016508477715498, "learning_rate": 7.623380274917467e-05, "loss": 0.3273, "step": 4038 }, { "epoch": 0.34456577375874425, "grad_norm": 1.633775490968363, "learning_rate": 7.622204078044985e-05, "loss": 0.3624, "step": 4039 }, { "epoch": 0.34465108343286127, "grad_norm": 1.3898443967186216, "learning_rate": 7.621027680981155e-05, "loss": 0.3227, "step": 4040 }, { "epoch": 0.34473639310697834, "grad_norm": 1.3124935967425415, "learning_rate": 7.619851083815793e-05, "loss": 0.335, "step": 4041 }, { "epoch": 0.34482170278109536, "grad_norm": 1.4801624152273494, "learning_rate": 7.61867428663872e-05, "loss": 0.2769, "step": 4042 }, { "epoch": 0.34490701245521244, "grad_norm": 1.2315905104972493, "learning_rate": 7.617497289539781e-05, "loss": 0.2637, "step": 4043 }, { "epoch": 0.34499232212932945, "grad_norm": 1.5178894744763447, "learning_rate": 7.616320092608834e-05, "loss": 0.284, "step": 4044 }, { "epoch": 0.3450776318034465, "grad_norm": 1.4841626467142526, "learning_rate": 7.615142695935751e-05, "loss": 0.2994, "step": 4045 }, { "epoch": 0.34516294147756355, "grad_norm": 1.361737883319037, "learning_rate": 7.613965099610419e-05, "loss": 0.3082, "step": 4046 }, { "epoch": 0.3452482511516806, "grad_norm": 1.4330508168082134, "learning_rate": 7.612787303722744e-05, "loss": 0.2881, "step": 4047 }, { "epoch": 0.34533356082579764, "grad_norm": 1.3656385093736563, "learning_rate": 7.61160930836264e-05, "loss": 0.3008, "step": 4048 }, { "epoch": 0.3454188704999147, "grad_norm": 1.4015089009900876, "learning_rate": 7.610431113620046e-05, "loss": 0.319, "step": 4049 }, { "epoch": 0.3455041801740317, "grad_norm": 1.170258589802889, "learning_rate": 7.609252719584907e-05, "loss": 0.2734, "step": 4050 }, { "epoch": 0.3455894898481488, "grad_norm": 1.8928108505284662, "learning_rate": 7.608074126347189e-05, "loss": 0.323, "step": 4051 }, { "epoch": 0.3456747995222658, "grad_norm": 1.5732550688582676, "learning_rate": 7.606895333996871e-05, "loss": 0.3416, "step": 4052 }, { "epoch": 0.3457601091963829, "grad_norm": 1.5893456430907877, "learning_rate": 7.605716342623948e-05, "loss": 0.3477, "step": 4053 }, { "epoch": 0.3458454188704999, "grad_norm": 1.4243331453100203, "learning_rate": 7.604537152318427e-05, "loss": 0.2566, "step": 4054 }, { "epoch": 0.345930728544617, "grad_norm": 1.5518401636030632, "learning_rate": 7.603357763170336e-05, "loss": 0.2926, "step": 4055 }, { "epoch": 0.346016038218734, "grad_norm": 1.78766707593202, "learning_rate": 7.602178175269713e-05, "loss": 0.3484, "step": 4056 }, { "epoch": 0.3461013478928511, "grad_norm": 1.377320455699284, "learning_rate": 7.600998388706615e-05, "loss": 0.32, "step": 4057 }, { "epoch": 0.3461866575669681, "grad_norm": 1.1929486406051621, "learning_rate": 7.599818403571112e-05, "loss": 0.2711, "step": 4058 }, { "epoch": 0.34627196724108517, "grad_norm": 1.4885353011717501, "learning_rate": 7.598638219953289e-05, "loss": 0.3159, "step": 4059 }, { "epoch": 0.3463572769152022, "grad_norm": 1.4836365318457616, "learning_rate": 7.597457837943247e-05, "loss": 0.3071, "step": 4060 }, { "epoch": 0.3464425865893192, "grad_norm": 1.1514592283569929, "learning_rate": 7.596277257631102e-05, "loss": 0.3206, "step": 4061 }, { "epoch": 0.3465278962634363, "grad_norm": 1.5907813454404929, "learning_rate": 7.595096479106985e-05, "loss": 0.3468, "step": 4062 }, { "epoch": 0.3466132059375533, "grad_norm": 1.6286431335615523, "learning_rate": 7.593915502461042e-05, "loss": 0.3476, "step": 4063 }, { "epoch": 0.34669851561167037, "grad_norm": 1.5639501327408734, "learning_rate": 7.592734327783435e-05, "loss": 0.3175, "step": 4064 }, { "epoch": 0.3467838252857874, "grad_norm": 1.3113974981562497, "learning_rate": 7.59155295516434e-05, "loss": 0.3072, "step": 4065 }, { "epoch": 0.34686913495990446, "grad_norm": 1.414231432523959, "learning_rate": 7.590371384693947e-05, "loss": 0.3068, "step": 4066 }, { "epoch": 0.3469544446340215, "grad_norm": 1.4930094906504658, "learning_rate": 7.589189616462465e-05, "loss": 0.3041, "step": 4067 }, { "epoch": 0.34703975430813855, "grad_norm": 1.5718571292172265, "learning_rate": 7.588007650560116e-05, "loss": 0.304, "step": 4068 }, { "epoch": 0.34712506398225557, "grad_norm": 1.6946339457844806, "learning_rate": 7.586825487077132e-05, "loss": 0.308, "step": 4069 }, { "epoch": 0.34721037365637264, "grad_norm": 1.3680662110629636, "learning_rate": 7.585643126103772e-05, "loss": 0.3071, "step": 4070 }, { "epoch": 0.34729568333048966, "grad_norm": 1.725300254121922, "learning_rate": 7.584460567730298e-05, "loss": 0.284, "step": 4071 }, { "epoch": 0.34738099300460673, "grad_norm": 1.7706657629575684, "learning_rate": 7.583277812046993e-05, "loss": 0.2896, "step": 4072 }, { "epoch": 0.34746630267872375, "grad_norm": 1.6346073780467865, "learning_rate": 7.582094859144155e-05, "loss": 0.3496, "step": 4073 }, { "epoch": 0.3475516123528408, "grad_norm": 1.3797566760510427, "learning_rate": 7.580911709112096e-05, "loss": 0.2955, "step": 4074 }, { "epoch": 0.34763692202695784, "grad_norm": 1.5427704116244934, "learning_rate": 7.579728362041142e-05, "loss": 0.3372, "step": 4075 }, { "epoch": 0.3477222317010749, "grad_norm": 1.4062231697066132, "learning_rate": 7.578544818021638e-05, "loss": 0.3148, "step": 4076 }, { "epoch": 0.34780754137519193, "grad_norm": 1.196500100964336, "learning_rate": 7.577361077143939e-05, "loss": 0.3191, "step": 4077 }, { "epoch": 0.347892851049309, "grad_norm": 1.4102298791319308, "learning_rate": 7.576177139498417e-05, "loss": 0.2405, "step": 4078 }, { "epoch": 0.347978160723426, "grad_norm": 1.5289611058050414, "learning_rate": 7.574993005175459e-05, "loss": 0.2994, "step": 4079 }, { "epoch": 0.3480634703975431, "grad_norm": 1.5648426042572565, "learning_rate": 7.57380867426547e-05, "loss": 0.333, "step": 4080 }, { "epoch": 0.3481487800716601, "grad_norm": 1.103932529792683, "learning_rate": 7.572624146858867e-05, "loss": 0.3406, "step": 4081 }, { "epoch": 0.3482340897457772, "grad_norm": 1.1219149781651019, "learning_rate": 7.571439423046079e-05, "loss": 0.2933, "step": 4082 }, { "epoch": 0.3483193994198942, "grad_norm": 1.434153434578593, "learning_rate": 7.570254502917557e-05, "loss": 0.3036, "step": 4083 }, { "epoch": 0.3484047090940113, "grad_norm": 1.4100616507883788, "learning_rate": 7.569069386563762e-05, "loss": 0.3406, "step": 4084 }, { "epoch": 0.3484900187681283, "grad_norm": 1.3114056339032942, "learning_rate": 7.567884074075173e-05, "loss": 0.2779, "step": 4085 }, { "epoch": 0.34857532844224537, "grad_norm": 1.3448661781586864, "learning_rate": 7.566698565542279e-05, "loss": 0.2579, "step": 4086 }, { "epoch": 0.3486606381163624, "grad_norm": 1.2339912977081804, "learning_rate": 7.565512861055589e-05, "loss": 0.296, "step": 4087 }, { "epoch": 0.34874594779047946, "grad_norm": 1.2580899234739025, "learning_rate": 7.564326960705624e-05, "loss": 0.2506, "step": 4088 }, { "epoch": 0.3488312574645965, "grad_norm": 1.2688520756893134, "learning_rate": 7.563140864582925e-05, "loss": 0.2509, "step": 4089 }, { "epoch": 0.34891656713871355, "grad_norm": 1.3385696358733423, "learning_rate": 7.561954572778038e-05, "loss": 0.274, "step": 4090 }, { "epoch": 0.34900187681283057, "grad_norm": 1.3440575802111436, "learning_rate": 7.560768085381535e-05, "loss": 0.2935, "step": 4091 }, { "epoch": 0.34908718648694764, "grad_norm": 1.5781488700043467, "learning_rate": 7.559581402483999e-05, "loss": 0.2973, "step": 4092 }, { "epoch": 0.34917249616106466, "grad_norm": 1.6693068414653833, "learning_rate": 7.558394524176023e-05, "loss": 0.4025, "step": 4093 }, { "epoch": 0.34925780583518173, "grad_norm": 1.4364815296774165, "learning_rate": 7.557207450548219e-05, "loss": 0.2991, "step": 4094 }, { "epoch": 0.34934311550929875, "grad_norm": 1.20372549156837, "learning_rate": 7.556020181691217e-05, "loss": 0.3148, "step": 4095 }, { "epoch": 0.3494284251834158, "grad_norm": 1.6545374671703224, "learning_rate": 7.554832717695656e-05, "loss": 0.3257, "step": 4096 }, { "epoch": 0.34951373485753284, "grad_norm": 1.641490598941576, "learning_rate": 7.553645058652193e-05, "loss": 0.3451, "step": 4097 }, { "epoch": 0.3495990445316499, "grad_norm": 1.4703905399055552, "learning_rate": 7.552457204651499e-05, "loss": 0.2587, "step": 4098 }, { "epoch": 0.34968435420576693, "grad_norm": 1.4368817202231285, "learning_rate": 7.551269155784264e-05, "loss": 0.3177, "step": 4099 }, { "epoch": 0.34976966387988395, "grad_norm": 1.310234157586576, "learning_rate": 7.550080912141184e-05, "loss": 0.3147, "step": 4100 }, { "epoch": 0.349854973554001, "grad_norm": 1.6547680108472942, "learning_rate": 7.548892473812979e-05, "loss": 0.327, "step": 4101 }, { "epoch": 0.34994028322811804, "grad_norm": 1.252587882059664, "learning_rate": 7.547703840890377e-05, "loss": 0.335, "step": 4102 }, { "epoch": 0.3500255929022351, "grad_norm": 1.4489425025731488, "learning_rate": 7.546515013464125e-05, "loss": 0.3166, "step": 4103 }, { "epoch": 0.35011090257635213, "grad_norm": 1.5939719849617098, "learning_rate": 7.545325991624986e-05, "loss": 0.3155, "step": 4104 }, { "epoch": 0.3501962122504692, "grad_norm": 1.495717133212102, "learning_rate": 7.54413677546373e-05, "loss": 0.3214, "step": 4105 }, { "epoch": 0.3502815219245862, "grad_norm": 1.2977284759870593, "learning_rate": 7.542947365071153e-05, "loss": 0.3029, "step": 4106 }, { "epoch": 0.3503668315987033, "grad_norm": 1.5921248583689127, "learning_rate": 7.54175776053806e-05, "loss": 0.3321, "step": 4107 }, { "epoch": 0.3504521412728203, "grad_norm": 1.163097416767449, "learning_rate": 7.540567961955267e-05, "loss": 0.2762, "step": 4108 }, { "epoch": 0.3505374509469374, "grad_norm": 1.5649523754003245, "learning_rate": 7.539377969413608e-05, "loss": 0.3308, "step": 4109 }, { "epoch": 0.3506227606210544, "grad_norm": 1.6431287280389015, "learning_rate": 7.538187783003939e-05, "loss": 0.2689, "step": 4110 }, { "epoch": 0.3507080702951715, "grad_norm": 1.4743762912073501, "learning_rate": 7.536997402817119e-05, "loss": 0.2915, "step": 4111 }, { "epoch": 0.3507933799692885, "grad_norm": 1.5916707556440681, "learning_rate": 7.535806828944028e-05, "loss": 0.2698, "step": 4112 }, { "epoch": 0.3508786896434056, "grad_norm": 1.9907851126339724, "learning_rate": 7.534616061475563e-05, "loss": 0.3693, "step": 4113 }, { "epoch": 0.3509639993175226, "grad_norm": 1.5237396723058503, "learning_rate": 7.533425100502629e-05, "loss": 0.3653, "step": 4114 }, { "epoch": 0.35104930899163966, "grad_norm": 1.3731086033366067, "learning_rate": 7.532233946116151e-05, "loss": 0.2875, "step": 4115 }, { "epoch": 0.3511346186657567, "grad_norm": 1.8031621654785872, "learning_rate": 7.53104259840707e-05, "loss": 0.4071, "step": 4116 }, { "epoch": 0.35121992833987375, "grad_norm": 1.3112265448931664, "learning_rate": 7.529851057466336e-05, "loss": 0.3067, "step": 4117 }, { "epoch": 0.3513052380139908, "grad_norm": 1.31155125069401, "learning_rate": 7.528659323384914e-05, "loss": 0.3249, "step": 4118 }, { "epoch": 0.35139054768810785, "grad_norm": 1.453463935291773, "learning_rate": 7.527467396253792e-05, "loss": 0.2815, "step": 4119 }, { "epoch": 0.35147585736222486, "grad_norm": 1.9938749459557044, "learning_rate": 7.526275276163966e-05, "loss": 0.3056, "step": 4120 }, { "epoch": 0.35156116703634194, "grad_norm": 1.639657961719472, "learning_rate": 7.525082963206446e-05, "loss": 0.3612, "step": 4121 }, { "epoch": 0.35164647671045896, "grad_norm": 1.547053163074845, "learning_rate": 7.523890457472261e-05, "loss": 0.3241, "step": 4122 }, { "epoch": 0.35173178638457603, "grad_norm": 1.3073050914951512, "learning_rate": 7.522697759052451e-05, "loss": 0.2788, "step": 4123 }, { "epoch": 0.35181709605869305, "grad_norm": 1.3616056008966724, "learning_rate": 7.521504868038073e-05, "loss": 0.3158, "step": 4124 }, { "epoch": 0.3519024057328101, "grad_norm": 1.4010233919258845, "learning_rate": 7.520311784520197e-05, "loss": 0.3454, "step": 4125 }, { "epoch": 0.35198771540692714, "grad_norm": 1.5169208139008945, "learning_rate": 7.519118508589911e-05, "loss": 0.2744, "step": 4126 }, { "epoch": 0.3520730250810442, "grad_norm": 1.6023258529995308, "learning_rate": 7.517925040338312e-05, "loss": 0.2746, "step": 4127 }, { "epoch": 0.35215833475516123, "grad_norm": 1.3444247658570578, "learning_rate": 7.516731379856517e-05, "loss": 0.2539, "step": 4128 }, { "epoch": 0.3522436444292783, "grad_norm": 1.3789300241122882, "learning_rate": 7.515537527235655e-05, "loss": 0.3573, "step": 4129 }, { "epoch": 0.3523289541033953, "grad_norm": 1.45730195586049, "learning_rate": 7.514343482566869e-05, "loss": 0.2889, "step": 4130 }, { "epoch": 0.3524142637775124, "grad_norm": 1.9558430793865038, "learning_rate": 7.51314924594132e-05, "loss": 0.2881, "step": 4131 }, { "epoch": 0.3524995734516294, "grad_norm": 2.055531607594331, "learning_rate": 7.511954817450181e-05, "loss": 0.3734, "step": 4132 }, { "epoch": 0.3525848831257465, "grad_norm": 1.4027894568298092, "learning_rate": 7.51076019718464e-05, "loss": 0.305, "step": 4133 }, { "epoch": 0.3526701927998635, "grad_norm": 1.6639732372717686, "learning_rate": 7.509565385235901e-05, "loss": 0.3072, "step": 4134 }, { "epoch": 0.3527555024739806, "grad_norm": 1.1673456271297786, "learning_rate": 7.50837038169518e-05, "loss": 0.2997, "step": 4135 }, { "epoch": 0.3528408121480976, "grad_norm": 1.5373506271870694, "learning_rate": 7.50717518665371e-05, "loss": 0.3025, "step": 4136 }, { "epoch": 0.35292612182221467, "grad_norm": 1.7573794361288875, "learning_rate": 7.505979800202739e-05, "loss": 0.3232, "step": 4137 }, { "epoch": 0.3530114314963317, "grad_norm": 1.622206340600284, "learning_rate": 7.504784222433525e-05, "loss": 0.2917, "step": 4138 }, { "epoch": 0.3530967411704487, "grad_norm": 1.5484647962087017, "learning_rate": 7.503588453437347e-05, "loss": 0.3148, "step": 4139 }, { "epoch": 0.3531820508445658, "grad_norm": 1.626406574379336, "learning_rate": 7.502392493305494e-05, "loss": 0.2785, "step": 4140 }, { "epoch": 0.3532673605186828, "grad_norm": 1.4869337013718502, "learning_rate": 7.501196342129273e-05, "loss": 0.3461, "step": 4141 }, { "epoch": 0.35335267019279987, "grad_norm": 1.4256208773782901, "learning_rate": 7.500000000000001e-05, "loss": 0.3281, "step": 4142 }, { "epoch": 0.3534379798669169, "grad_norm": 1.5872858638968617, "learning_rate": 7.498803467009013e-05, "loss": 0.3651, "step": 4143 }, { "epoch": 0.35352328954103396, "grad_norm": 1.5283387984355252, "learning_rate": 7.497606743247662e-05, "loss": 0.3007, "step": 4144 }, { "epoch": 0.353608599215151, "grad_norm": 1.5003152357099143, "learning_rate": 7.496409828807307e-05, "loss": 0.3449, "step": 4145 }, { "epoch": 0.35369390888926805, "grad_norm": 1.4233263594183307, "learning_rate": 7.495212723779327e-05, "loss": 0.2734, "step": 4146 }, { "epoch": 0.35377921856338507, "grad_norm": 1.4300046475208228, "learning_rate": 7.494015428255116e-05, "loss": 0.3114, "step": 4147 }, { "epoch": 0.35386452823750214, "grad_norm": 1.266357398341105, "learning_rate": 7.49281794232608e-05, "loss": 0.2705, "step": 4148 }, { "epoch": 0.35394983791161916, "grad_norm": 1.7008213247012014, "learning_rate": 7.491620266083641e-05, "loss": 0.3043, "step": 4149 }, { "epoch": 0.35403514758573623, "grad_norm": 1.389950792415877, "learning_rate": 7.490422399619235e-05, "loss": 0.3158, "step": 4150 }, { "epoch": 0.35412045725985325, "grad_norm": 1.3093855554706035, "learning_rate": 7.489224343024313e-05, "loss": 0.2677, "step": 4151 }, { "epoch": 0.3542057669339703, "grad_norm": 1.2056975171718771, "learning_rate": 7.488026096390339e-05, "loss": 0.2544, "step": 4152 }, { "epoch": 0.35429107660808734, "grad_norm": 1.3901506965237294, "learning_rate": 7.486827659808796e-05, "loss": 0.2786, "step": 4153 }, { "epoch": 0.3543763862822044, "grad_norm": 1.4110196500500811, "learning_rate": 7.485629033371175e-05, "loss": 0.3179, "step": 4154 }, { "epoch": 0.35446169595632143, "grad_norm": 1.1256501650287427, "learning_rate": 7.484430217168985e-05, "loss": 0.2729, "step": 4155 }, { "epoch": 0.3545470056304385, "grad_norm": 1.4109723379369126, "learning_rate": 7.483231211293751e-05, "loss": 0.2497, "step": 4156 }, { "epoch": 0.3546323153045555, "grad_norm": 1.3880243307311733, "learning_rate": 7.482032015837009e-05, "loss": 0.2828, "step": 4157 }, { "epoch": 0.3547176249786726, "grad_norm": 2.203051085618749, "learning_rate": 7.480832630890315e-05, "loss": 0.3539, "step": 4158 }, { "epoch": 0.3548029346527896, "grad_norm": 1.5888355254316853, "learning_rate": 7.47963305654523e-05, "loss": 0.2767, "step": 4159 }, { "epoch": 0.3548882443269067, "grad_norm": 1.4808956124700212, "learning_rate": 7.478433292893339e-05, "loss": 0.2981, "step": 4160 }, { "epoch": 0.3549735540010237, "grad_norm": 1.2384139510328742, "learning_rate": 7.477233340026235e-05, "loss": 0.338, "step": 4161 }, { "epoch": 0.3550588636751408, "grad_norm": 1.6233144968703728, "learning_rate": 7.476033198035531e-05, "loss": 0.263, "step": 4162 }, { "epoch": 0.3551441733492578, "grad_norm": 1.6678393847575337, "learning_rate": 7.47483286701285e-05, "loss": 0.3213, "step": 4163 }, { "epoch": 0.35522948302337487, "grad_norm": 1.444358882244643, "learning_rate": 7.473632347049831e-05, "loss": 0.2465, "step": 4164 }, { "epoch": 0.3553147926974919, "grad_norm": 1.4073903440510949, "learning_rate": 7.472431638238127e-05, "loss": 0.2983, "step": 4165 }, { "epoch": 0.35540010237160896, "grad_norm": 1.6340053162132264, "learning_rate": 7.471230740669405e-05, "loss": 0.2679, "step": 4166 }, { "epoch": 0.355485412045726, "grad_norm": 1.7243881688543232, "learning_rate": 7.470029654435349e-05, "loss": 0.3384, "step": 4167 }, { "epoch": 0.35557072171984305, "grad_norm": 1.563595502190453, "learning_rate": 7.468828379627653e-05, "loss": 0.2607, "step": 4168 }, { "epoch": 0.35565603139396007, "grad_norm": 1.5989946574538765, "learning_rate": 7.467626916338032e-05, "loss": 0.2704, "step": 4169 }, { "epoch": 0.35574134106807714, "grad_norm": 1.2950643745829644, "learning_rate": 7.466425264658208e-05, "loss": 0.2776, "step": 4170 }, { "epoch": 0.35582665074219416, "grad_norm": 1.5390310526312114, "learning_rate": 7.46522342467992e-05, "loss": 0.3212, "step": 4171 }, { "epoch": 0.35591196041631123, "grad_norm": 1.4328080604675804, "learning_rate": 7.464021396494925e-05, "loss": 0.3127, "step": 4172 }, { "epoch": 0.35599727009042825, "grad_norm": 1.3722706495758248, "learning_rate": 7.462819180194991e-05, "loss": 0.3046, "step": 4173 }, { "epoch": 0.3560825797645453, "grad_norm": 1.382616147686098, "learning_rate": 7.461616775871899e-05, "loss": 0.3287, "step": 4174 }, { "epoch": 0.35616788943866234, "grad_norm": 1.6908364690780078, "learning_rate": 7.460414183617447e-05, "loss": 0.3041, "step": 4175 }, { "epoch": 0.35625319911277936, "grad_norm": 1.3343316850622668, "learning_rate": 7.459211403523447e-05, "loss": 0.2607, "step": 4176 }, { "epoch": 0.35633850878689644, "grad_norm": 1.3024817912463575, "learning_rate": 7.458008435681725e-05, "loss": 0.3041, "step": 4177 }, { "epoch": 0.35642381846101345, "grad_norm": 1.5170511044951327, "learning_rate": 7.456805280184121e-05, "loss": 0.2952, "step": 4178 }, { "epoch": 0.3565091281351305, "grad_norm": 1.340921218975924, "learning_rate": 7.455601937122489e-05, "loss": 0.3189, "step": 4179 }, { "epoch": 0.35659443780924754, "grad_norm": 1.314331819965079, "learning_rate": 7.454398406588697e-05, "loss": 0.3223, "step": 4180 }, { "epoch": 0.3566797474833646, "grad_norm": 1.9313878895936387, "learning_rate": 7.45319468867463e-05, "loss": 0.336, "step": 4181 }, { "epoch": 0.35676505715748164, "grad_norm": 1.5041286551761526, "learning_rate": 7.451990783472186e-05, "loss": 0.3258, "step": 4182 }, { "epoch": 0.3568503668315987, "grad_norm": 1.6054708290550712, "learning_rate": 7.450786691073274e-05, "loss": 0.2921, "step": 4183 }, { "epoch": 0.3569356765057157, "grad_norm": 1.4826322263061211, "learning_rate": 7.449582411569822e-05, "loss": 0.2592, "step": 4184 }, { "epoch": 0.3570209861798328, "grad_norm": 1.2759791560010187, "learning_rate": 7.44837794505377e-05, "loss": 0.2797, "step": 4185 }, { "epoch": 0.3571062958539498, "grad_norm": 1.1812247056624468, "learning_rate": 7.447173291617072e-05, "loss": 0.2738, "step": 4186 }, { "epoch": 0.3571916055280669, "grad_norm": 1.435103367395486, "learning_rate": 7.4459684513517e-05, "loss": 0.2934, "step": 4187 }, { "epoch": 0.3572769152021839, "grad_norm": 1.5267378051430691, "learning_rate": 7.44476342434963e-05, "loss": 0.2553, "step": 4188 }, { "epoch": 0.357362224876301, "grad_norm": 1.4426187517285172, "learning_rate": 7.443558210702868e-05, "loss": 0.3066, "step": 4189 }, { "epoch": 0.357447534550418, "grad_norm": 1.4840466587926515, "learning_rate": 7.44235281050342e-05, "loss": 0.3073, "step": 4190 }, { "epoch": 0.3575328442245351, "grad_norm": 1.5241952429933074, "learning_rate": 7.441147223843315e-05, "loss": 0.3234, "step": 4191 }, { "epoch": 0.3576181538986521, "grad_norm": 1.575554973978516, "learning_rate": 7.439941450814591e-05, "loss": 0.322, "step": 4192 }, { "epoch": 0.35770346357276916, "grad_norm": 1.5873783125007348, "learning_rate": 7.4387354915093e-05, "loss": 0.3312, "step": 4193 }, { "epoch": 0.3577887732468862, "grad_norm": 1.8190744143464306, "learning_rate": 7.437529346019518e-05, "loss": 0.3544, "step": 4194 }, { "epoch": 0.35787408292100326, "grad_norm": 1.193727726753516, "learning_rate": 7.436323014437322e-05, "loss": 0.2359, "step": 4195 }, { "epoch": 0.3579593925951203, "grad_norm": 1.409258273888283, "learning_rate": 7.435116496854809e-05, "loss": 0.2997, "step": 4196 }, { "epoch": 0.35804470226923735, "grad_norm": 1.44090084322994, "learning_rate": 7.433909793364093e-05, "loss": 0.2593, "step": 4197 }, { "epoch": 0.35813001194335436, "grad_norm": 1.2452632803385872, "learning_rate": 7.432702904057299e-05, "loss": 0.308, "step": 4198 }, { "epoch": 0.35821532161747144, "grad_norm": 1.731010954841491, "learning_rate": 7.431495829026563e-05, "loss": 0.329, "step": 4199 }, { "epoch": 0.35830063129158846, "grad_norm": 1.7393054473710938, "learning_rate": 7.430288568364045e-05, "loss": 0.2728, "step": 4200 }, { "epoch": 0.35838594096570553, "grad_norm": 1.7898064278516481, "learning_rate": 7.429081122161908e-05, "loss": 0.3591, "step": 4201 }, { "epoch": 0.35847125063982255, "grad_norm": 1.4840130013207395, "learning_rate": 7.427873490512334e-05, "loss": 0.298, "step": 4202 }, { "epoch": 0.3585565603139396, "grad_norm": 1.563909581470781, "learning_rate": 7.426665673507521e-05, "loss": 0.327, "step": 4203 }, { "epoch": 0.35864186998805664, "grad_norm": 1.4059001487421992, "learning_rate": 7.425457671239679e-05, "loss": 0.3011, "step": 4204 }, { "epoch": 0.3587271796621737, "grad_norm": 1.3402321477753099, "learning_rate": 7.424249483801033e-05, "loss": 0.2902, "step": 4205 }, { "epoch": 0.35881248933629073, "grad_norm": 1.5545089849222813, "learning_rate": 7.423041111283822e-05, "loss": 0.2936, "step": 4206 }, { "epoch": 0.3588977990104078, "grad_norm": 1.3497369598278781, "learning_rate": 7.421832553780299e-05, "loss": 0.312, "step": 4207 }, { "epoch": 0.3589831086845248, "grad_norm": 1.1563493840575407, "learning_rate": 7.420623811382728e-05, "loss": 0.3064, "step": 4208 }, { "epoch": 0.3590684183586419, "grad_norm": 1.3791005805433056, "learning_rate": 7.419414884183393e-05, "loss": 0.2723, "step": 4209 }, { "epoch": 0.3591537280327589, "grad_norm": 1.8005694018555456, "learning_rate": 7.41820577227459e-05, "loss": 0.3714, "step": 4210 }, { "epoch": 0.359239037706876, "grad_norm": 1.663598224185657, "learning_rate": 7.416996475748625e-05, "loss": 0.3524, "step": 4211 }, { "epoch": 0.359324347380993, "grad_norm": 1.4613591365995722, "learning_rate": 7.415786994697823e-05, "loss": 0.3246, "step": 4212 }, { "epoch": 0.3594096570551101, "grad_norm": 1.1662786497009061, "learning_rate": 7.414577329214522e-05, "loss": 0.2725, "step": 4213 }, { "epoch": 0.3594949667292271, "grad_norm": 1.29366203230182, "learning_rate": 7.413367479391071e-05, "loss": 0.3246, "step": 4214 }, { "epoch": 0.3595802764033441, "grad_norm": 1.7716052299079519, "learning_rate": 7.412157445319839e-05, "loss": 0.3631, "step": 4215 }, { "epoch": 0.3596655860774612, "grad_norm": 1.3172090612345997, "learning_rate": 7.410947227093204e-05, "loss": 0.2626, "step": 4216 }, { "epoch": 0.3597508957515782, "grad_norm": 1.5265496963073362, "learning_rate": 7.40973682480356e-05, "loss": 0.3056, "step": 4217 }, { "epoch": 0.3598362054256953, "grad_norm": 1.4675115680401103, "learning_rate": 7.408526238543316e-05, "loss": 0.3173, "step": 4218 }, { "epoch": 0.3599215150998123, "grad_norm": 1.6291098341659236, "learning_rate": 7.407315468404892e-05, "loss": 0.3515, "step": 4219 }, { "epoch": 0.36000682477392937, "grad_norm": 1.3142001176365272, "learning_rate": 7.406104514480723e-05, "loss": 0.307, "step": 4220 }, { "epoch": 0.3600921344480464, "grad_norm": 1.3293194504624795, "learning_rate": 7.404893376863263e-05, "loss": 0.3062, "step": 4221 }, { "epoch": 0.36017744412216346, "grad_norm": 1.6527274477621239, "learning_rate": 7.40368205564497e-05, "loss": 0.3486, "step": 4222 }, { "epoch": 0.3602627537962805, "grad_norm": 1.4694359983230725, "learning_rate": 7.402470550918328e-05, "loss": 0.292, "step": 4223 }, { "epoch": 0.36034806347039755, "grad_norm": 1.4141440183631373, "learning_rate": 7.401258862775825e-05, "loss": 0.3063, "step": 4224 }, { "epoch": 0.36043337314451457, "grad_norm": 1.474940786951194, "learning_rate": 7.400046991309968e-05, "loss": 0.293, "step": 4225 }, { "epoch": 0.36051868281863164, "grad_norm": 1.555093836758891, "learning_rate": 7.398834936613277e-05, "loss": 0.2422, "step": 4226 }, { "epoch": 0.36060399249274866, "grad_norm": 1.8596147655121584, "learning_rate": 7.397622698778286e-05, "loss": 0.3099, "step": 4227 }, { "epoch": 0.36068930216686573, "grad_norm": 1.2104587716129196, "learning_rate": 7.396410277897543e-05, "loss": 0.3026, "step": 4228 }, { "epoch": 0.36077461184098275, "grad_norm": 1.8139240327046058, "learning_rate": 7.39519767406361e-05, "loss": 0.309, "step": 4229 }, { "epoch": 0.3608599215150998, "grad_norm": 2.003670542873419, "learning_rate": 7.39398488736906e-05, "loss": 0.3189, "step": 4230 }, { "epoch": 0.36094523118921684, "grad_norm": 1.7902239232150443, "learning_rate": 7.392771917906489e-05, "loss": 0.3611, "step": 4231 }, { "epoch": 0.3610305408633339, "grad_norm": 1.4257203807640462, "learning_rate": 7.391558765768496e-05, "loss": 0.2955, "step": 4232 }, { "epoch": 0.36111585053745093, "grad_norm": 1.5176082887928923, "learning_rate": 7.390345431047702e-05, "loss": 0.3002, "step": 4233 }, { "epoch": 0.361201160211568, "grad_norm": 1.1945877371206675, "learning_rate": 7.389131913836736e-05, "loss": 0.3048, "step": 4234 }, { "epoch": 0.361286469885685, "grad_norm": 1.3211375711438296, "learning_rate": 7.387918214228242e-05, "loss": 0.2615, "step": 4235 }, { "epoch": 0.3613717795598021, "grad_norm": 1.513981035715959, "learning_rate": 7.386704332314882e-05, "loss": 0.2664, "step": 4236 }, { "epoch": 0.3614570892339191, "grad_norm": 1.5980016059962905, "learning_rate": 7.38549026818933e-05, "loss": 0.2844, "step": 4237 }, { "epoch": 0.3615423989080362, "grad_norm": 1.4503613515006382, "learning_rate": 7.384276021944272e-05, "loss": 0.2928, "step": 4238 }, { "epoch": 0.3616277085821532, "grad_norm": 1.471855613759008, "learning_rate": 7.38306159367241e-05, "loss": 0.3485, "step": 4239 }, { "epoch": 0.3617130182562703, "grad_norm": 1.205229961406717, "learning_rate": 7.38184698346646e-05, "loss": 0.2093, "step": 4240 }, { "epoch": 0.3617983279303873, "grad_norm": 1.4829420702391927, "learning_rate": 7.380632191419147e-05, "loss": 0.3179, "step": 4241 }, { "epoch": 0.36188363760450437, "grad_norm": 2.0362400676187655, "learning_rate": 7.379417217623221e-05, "loss": 0.3203, "step": 4242 }, { "epoch": 0.3619689472786214, "grad_norm": 1.4047567280480766, "learning_rate": 7.378202062171432e-05, "loss": 0.2695, "step": 4243 }, { "epoch": 0.36205425695273846, "grad_norm": 1.6149096087501598, "learning_rate": 7.376986725156554e-05, "loss": 0.3107, "step": 4244 }, { "epoch": 0.3621395666268555, "grad_norm": 1.4874729955449901, "learning_rate": 7.375771206671372e-05, "loss": 0.2812, "step": 4245 }, { "epoch": 0.36222487630097255, "grad_norm": 1.8303704450702114, "learning_rate": 7.37455550680868e-05, "loss": 0.3165, "step": 4246 }, { "epoch": 0.36231018597508957, "grad_norm": 1.624906097046457, "learning_rate": 7.373339625661295e-05, "loss": 0.2678, "step": 4247 }, { "epoch": 0.36239549564920664, "grad_norm": 1.308576350309929, "learning_rate": 7.372123563322042e-05, "loss": 0.303, "step": 4248 }, { "epoch": 0.36248080532332366, "grad_norm": 1.2866710327416957, "learning_rate": 7.37090731988376e-05, "loss": 0.2701, "step": 4249 }, { "epoch": 0.36256611499744074, "grad_norm": 2.1745091114533084, "learning_rate": 7.369690895439303e-05, "loss": 0.338, "step": 4250 }, { "epoch": 0.36265142467155775, "grad_norm": 1.4140610668533398, "learning_rate": 7.368474290081539e-05, "loss": 0.3045, "step": 4251 }, { "epoch": 0.3627367343456748, "grad_norm": 1.5209786942223085, "learning_rate": 7.367257503903349e-05, "loss": 0.2641, "step": 4252 }, { "epoch": 0.36282204401979185, "grad_norm": 1.7939477047309853, "learning_rate": 7.366040536997628e-05, "loss": 0.3313, "step": 4253 }, { "epoch": 0.36290735369390886, "grad_norm": 1.4808094769933504, "learning_rate": 7.364823389457286e-05, "loss": 0.3143, "step": 4254 }, { "epoch": 0.36299266336802594, "grad_norm": 1.4487295636108986, "learning_rate": 7.363606061375245e-05, "loss": 0.2875, "step": 4255 }, { "epoch": 0.36307797304214295, "grad_norm": 1.4336142057783612, "learning_rate": 7.36238855284444e-05, "loss": 0.3615, "step": 4256 }, { "epoch": 0.36316328271626, "grad_norm": 1.3625060230087005, "learning_rate": 7.361170863957822e-05, "loss": 0.2954, "step": 4257 }, { "epoch": 0.36324859239037705, "grad_norm": 1.6519839930001325, "learning_rate": 7.359952994808358e-05, "loss": 0.271, "step": 4258 }, { "epoch": 0.3633339020644941, "grad_norm": 1.3618945745257973, "learning_rate": 7.358734945489022e-05, "loss": 0.2981, "step": 4259 }, { "epoch": 0.36341921173861114, "grad_norm": 1.3943882289175156, "learning_rate": 7.357516716092807e-05, "loss": 0.316, "step": 4260 }, { "epoch": 0.3635045214127282, "grad_norm": 1.3759598416245837, "learning_rate": 7.35629830671272e-05, "loss": 0.2531, "step": 4261 }, { "epoch": 0.3635898310868452, "grad_norm": 1.4937804342704875, "learning_rate": 7.355079717441777e-05, "loss": 0.2667, "step": 4262 }, { "epoch": 0.3636751407609623, "grad_norm": 1.6484140692758664, "learning_rate": 7.353860948373015e-05, "loss": 0.2491, "step": 4263 }, { "epoch": 0.3637604504350793, "grad_norm": 1.3654732556568299, "learning_rate": 7.352641999599477e-05, "loss": 0.3196, "step": 4264 }, { "epoch": 0.3638457601091964, "grad_norm": 1.3087475131210702, "learning_rate": 7.351422871214223e-05, "loss": 0.3158, "step": 4265 }, { "epoch": 0.3639310697833134, "grad_norm": 1.42468955690604, "learning_rate": 7.35020356331033e-05, "loss": 0.3073, "step": 4266 }, { "epoch": 0.3640163794574305, "grad_norm": 1.530576441015205, "learning_rate": 7.348984075980882e-05, "loss": 0.3116, "step": 4267 }, { "epoch": 0.3641016891315475, "grad_norm": 1.540077920156181, "learning_rate": 7.347764409318984e-05, "loss": 0.2555, "step": 4268 }, { "epoch": 0.3641869988056646, "grad_norm": 1.2610619314596752, "learning_rate": 7.346544563417747e-05, "loss": 0.3294, "step": 4269 }, { "epoch": 0.3642723084797816, "grad_norm": 1.9141447088531902, "learning_rate": 7.345324538370304e-05, "loss": 0.3334, "step": 4270 }, { "epoch": 0.36435761815389867, "grad_norm": 1.3899551235474363, "learning_rate": 7.344104334269795e-05, "loss": 0.2825, "step": 4271 }, { "epoch": 0.3644429278280157, "grad_norm": 1.7019880507035143, "learning_rate": 7.342883951209377e-05, "loss": 0.2592, "step": 4272 }, { "epoch": 0.36452823750213276, "grad_norm": 1.5462336125658036, "learning_rate": 7.341663389282219e-05, "loss": 0.3519, "step": 4273 }, { "epoch": 0.3646135471762498, "grad_norm": 1.594169262360319, "learning_rate": 7.340442648581505e-05, "loss": 0.327, "step": 4274 }, { "epoch": 0.36469885685036685, "grad_norm": 1.5767942048495174, "learning_rate": 7.339221729200431e-05, "loss": 0.3135, "step": 4275 }, { "epoch": 0.36478416652448387, "grad_norm": 1.3762780232222667, "learning_rate": 7.338000631232212e-05, "loss": 0.3148, "step": 4276 }, { "epoch": 0.36486947619860094, "grad_norm": 1.464915281456608, "learning_rate": 7.336779354770066e-05, "loss": 0.2507, "step": 4277 }, { "epoch": 0.36495478587271796, "grad_norm": 1.3172839942783907, "learning_rate": 7.335557899907232e-05, "loss": 0.3203, "step": 4278 }, { "epoch": 0.36504009554683503, "grad_norm": 1.4653195028204837, "learning_rate": 7.334336266736968e-05, "loss": 0.2962, "step": 4279 }, { "epoch": 0.36512540522095205, "grad_norm": 1.367423075407528, "learning_rate": 7.333114455352532e-05, "loss": 0.3354, "step": 4280 }, { "epoch": 0.3652107148950691, "grad_norm": 1.5490485439783876, "learning_rate": 7.331892465847206e-05, "loss": 0.3368, "step": 4281 }, { "epoch": 0.36529602456918614, "grad_norm": 1.3289569717227228, "learning_rate": 7.330670298314281e-05, "loss": 0.2604, "step": 4282 }, { "epoch": 0.3653813342433032, "grad_norm": 1.5952362909150917, "learning_rate": 7.329447952847064e-05, "loss": 0.2984, "step": 4283 }, { "epoch": 0.36546664391742023, "grad_norm": 1.3088995277366045, "learning_rate": 7.328225429538875e-05, "loss": 0.3066, "step": 4284 }, { "epoch": 0.3655519535915373, "grad_norm": 1.3614219512787074, "learning_rate": 7.327002728483048e-05, "loss": 0.3295, "step": 4285 }, { "epoch": 0.3656372632656543, "grad_norm": 1.4281239115170394, "learning_rate": 7.325779849772928e-05, "loss": 0.2683, "step": 4286 }, { "epoch": 0.3657225729397714, "grad_norm": 1.6948753531656995, "learning_rate": 7.324556793501875e-05, "loss": 0.3558, "step": 4287 }, { "epoch": 0.3658078826138884, "grad_norm": 1.5210414726627473, "learning_rate": 7.323333559763263e-05, "loss": 0.3502, "step": 4288 }, { "epoch": 0.3658931922880055, "grad_norm": 1.4985080292683581, "learning_rate": 7.32211014865048e-05, "loss": 0.2852, "step": 4289 }, { "epoch": 0.3659785019621225, "grad_norm": 1.3202008533110372, "learning_rate": 7.320886560256927e-05, "loss": 0.2652, "step": 4290 }, { "epoch": 0.3660638116362395, "grad_norm": 1.7107409359966237, "learning_rate": 7.319662794676018e-05, "loss": 0.3539, "step": 4291 }, { "epoch": 0.3661491213103566, "grad_norm": 1.2947343371162618, "learning_rate": 7.318438852001181e-05, "loss": 0.2999, "step": 4292 }, { "epoch": 0.3662344309844736, "grad_norm": 1.6847891049039134, "learning_rate": 7.317214732325858e-05, "loss": 0.2737, "step": 4293 }, { "epoch": 0.3663197406585907, "grad_norm": 1.5465773479113516, "learning_rate": 7.315990435743504e-05, "loss": 0.2986, "step": 4294 }, { "epoch": 0.3664050503327077, "grad_norm": 1.5373766035478993, "learning_rate": 7.31476596234759e-05, "loss": 0.3203, "step": 4295 }, { "epoch": 0.3664903600068248, "grad_norm": 1.1775635046548942, "learning_rate": 7.313541312231592e-05, "loss": 0.2476, "step": 4296 }, { "epoch": 0.3665756696809418, "grad_norm": 1.4924221952723868, "learning_rate": 7.31231648548901e-05, "loss": 0.2988, "step": 4297 }, { "epoch": 0.36666097935505887, "grad_norm": 1.460340571722467, "learning_rate": 7.311091482213353e-05, "loss": 0.3042, "step": 4298 }, { "epoch": 0.3667462890291759, "grad_norm": 1.5370674176045749, "learning_rate": 7.30986630249814e-05, "loss": 0.2681, "step": 4299 }, { "epoch": 0.36683159870329296, "grad_norm": 1.501679592747295, "learning_rate": 7.308640946436912e-05, "loss": 0.3395, "step": 4300 }, { "epoch": 0.36691690837741, "grad_norm": 1.347698222073416, "learning_rate": 7.307415414123215e-05, "loss": 0.247, "step": 4301 }, { "epoch": 0.36700221805152705, "grad_norm": 1.1703509784072004, "learning_rate": 7.306189705650613e-05, "loss": 0.2841, "step": 4302 }, { "epoch": 0.36708752772564407, "grad_norm": 1.3174073349216433, "learning_rate": 7.304963821112681e-05, "loss": 0.264, "step": 4303 }, { "epoch": 0.36717283739976114, "grad_norm": 1.493185139239735, "learning_rate": 7.30373776060301e-05, "loss": 0.2958, "step": 4304 }, { "epoch": 0.36725814707387816, "grad_norm": 1.3482652697852324, "learning_rate": 7.302511524215203e-05, "loss": 0.3237, "step": 4305 }, { "epoch": 0.36734345674799523, "grad_norm": 1.220414712022384, "learning_rate": 7.301285112042879e-05, "loss": 0.265, "step": 4306 }, { "epoch": 0.36742876642211225, "grad_norm": 1.6587330181572024, "learning_rate": 7.300058524179662e-05, "loss": 0.2716, "step": 4307 }, { "epoch": 0.3675140760962293, "grad_norm": 1.6527367523544116, "learning_rate": 7.298831760719202e-05, "loss": 0.3201, "step": 4308 }, { "epoch": 0.36759938577034634, "grad_norm": 1.5080392301399568, "learning_rate": 7.297604821755153e-05, "loss": 0.298, "step": 4309 }, { "epoch": 0.3676846954444634, "grad_norm": 1.3067331324105926, "learning_rate": 7.296377707381183e-05, "loss": 0.2569, "step": 4310 }, { "epoch": 0.36777000511858043, "grad_norm": 1.5563241596247046, "learning_rate": 7.29515041769098e-05, "loss": 0.3418, "step": 4311 }, { "epoch": 0.3678553147926975, "grad_norm": 1.714249840429153, "learning_rate": 7.293922952778239e-05, "loss": 0.3104, "step": 4312 }, { "epoch": 0.3679406244668145, "grad_norm": 1.5928775418873533, "learning_rate": 7.292695312736668e-05, "loss": 0.318, "step": 4313 }, { "epoch": 0.3680259341409316, "grad_norm": 1.441016664036712, "learning_rate": 7.291467497659996e-05, "loss": 0.3301, "step": 4314 }, { "epoch": 0.3681112438150486, "grad_norm": 1.6840717672608718, "learning_rate": 7.290239507641956e-05, "loss": 0.3281, "step": 4315 }, { "epoch": 0.3681965534891657, "grad_norm": 1.380684200760041, "learning_rate": 7.2890113427763e-05, "loss": 0.2874, "step": 4316 }, { "epoch": 0.3682818631632827, "grad_norm": 1.4871067809635161, "learning_rate": 7.287783003156793e-05, "loss": 0.2729, "step": 4317 }, { "epoch": 0.3683671728373998, "grad_norm": 1.274898042529129, "learning_rate": 7.286554488877211e-05, "loss": 0.2891, "step": 4318 }, { "epoch": 0.3684524825115168, "grad_norm": 1.6390937562186016, "learning_rate": 7.285325800031343e-05, "loss": 0.3554, "step": 4319 }, { "epoch": 0.3685377921856339, "grad_norm": 1.5170547191530506, "learning_rate": 7.284096936712994e-05, "loss": 0.2619, "step": 4320 }, { "epoch": 0.3686231018597509, "grad_norm": 1.5827702056265052, "learning_rate": 7.282867899015983e-05, "loss": 0.3088, "step": 4321 }, { "epoch": 0.36870841153386796, "grad_norm": 1.4567260422524593, "learning_rate": 7.281638687034139e-05, "loss": 0.2823, "step": 4322 }, { "epoch": 0.368793721207985, "grad_norm": 1.618799336981349, "learning_rate": 7.280409300861305e-05, "loss": 0.3355, "step": 4323 }, { "epoch": 0.36887903088210205, "grad_norm": 1.548574958497118, "learning_rate": 7.27917974059134e-05, "loss": 0.3537, "step": 4324 }, { "epoch": 0.3689643405562191, "grad_norm": 1.6706123932128372, "learning_rate": 7.277950006318113e-05, "loss": 0.353, "step": 4325 }, { "epoch": 0.36904965023033615, "grad_norm": 1.575077551491637, "learning_rate": 7.276720098135507e-05, "loss": 0.3554, "step": 4326 }, { "epoch": 0.36913495990445316, "grad_norm": 1.8081195778209982, "learning_rate": 7.275490016137421e-05, "loss": 0.3319, "step": 4327 }, { "epoch": 0.36922026957857024, "grad_norm": 1.3679158477749767, "learning_rate": 7.274259760417764e-05, "loss": 0.3083, "step": 4328 }, { "epoch": 0.36930557925268725, "grad_norm": 1.2424853465638062, "learning_rate": 7.27302933107046e-05, "loss": 0.2729, "step": 4329 }, { "epoch": 0.3693908889268043, "grad_norm": 1.291742486676809, "learning_rate": 7.271798728189445e-05, "loss": 0.3157, "step": 4330 }, { "epoch": 0.36947619860092135, "grad_norm": 1.3837875118529597, "learning_rate": 7.27056795186867e-05, "loss": 0.3027, "step": 4331 }, { "epoch": 0.36956150827503836, "grad_norm": 1.2864427243828913, "learning_rate": 7.269337002202096e-05, "loss": 0.2415, "step": 4332 }, { "epoch": 0.36964681794915544, "grad_norm": 1.390045302359493, "learning_rate": 7.268105879283703e-05, "loss": 0.3093, "step": 4333 }, { "epoch": 0.36973212762327246, "grad_norm": 1.431386386419961, "learning_rate": 7.266874583207479e-05, "loss": 0.2891, "step": 4334 }, { "epoch": 0.36981743729738953, "grad_norm": 1.3996492389236859, "learning_rate": 7.265643114067426e-05, "loss": 0.2714, "step": 4335 }, { "epoch": 0.36990274697150655, "grad_norm": 1.4052561744970984, "learning_rate": 7.26441147195756e-05, "loss": 0.2713, "step": 4336 }, { "epoch": 0.3699880566456236, "grad_norm": 1.849314552342862, "learning_rate": 7.263179656971911e-05, "loss": 0.3292, "step": 4337 }, { "epoch": 0.37007336631974064, "grad_norm": 1.3831832302572808, "learning_rate": 7.261947669204523e-05, "loss": 0.2672, "step": 4338 }, { "epoch": 0.3701586759938577, "grad_norm": 1.7834823406511973, "learning_rate": 7.26071550874945e-05, "loss": 0.3195, "step": 4339 }, { "epoch": 0.37024398566797473, "grad_norm": 1.3294427953595531, "learning_rate": 7.259483175700762e-05, "loss": 0.3001, "step": 4340 }, { "epoch": 0.3703292953420918, "grad_norm": 1.6326778666645747, "learning_rate": 7.25825067015254e-05, "loss": 0.3172, "step": 4341 }, { "epoch": 0.3704146050162088, "grad_norm": 1.7688233431110416, "learning_rate": 7.257017992198878e-05, "loss": 0.3069, "step": 4342 }, { "epoch": 0.3704999146903259, "grad_norm": 1.4269881483573605, "learning_rate": 7.255785141933888e-05, "loss": 0.2724, "step": 4343 }, { "epoch": 0.3705852243644429, "grad_norm": 1.3166053236582393, "learning_rate": 7.25455211945169e-05, "loss": 0.3606, "step": 4344 }, { "epoch": 0.37067053403856, "grad_norm": 1.5364867989009934, "learning_rate": 7.253318924846417e-05, "loss": 0.3846, "step": 4345 }, { "epoch": 0.370755843712677, "grad_norm": 1.4492346256342792, "learning_rate": 7.252085558212219e-05, "loss": 0.2961, "step": 4346 }, { "epoch": 0.3708411533867941, "grad_norm": 1.5382817800665272, "learning_rate": 7.250852019643256e-05, "loss": 0.2768, "step": 4347 }, { "epoch": 0.3709264630609111, "grad_norm": 1.6097036970445997, "learning_rate": 7.249618309233702e-05, "loss": 0.2351, "step": 4348 }, { "epoch": 0.37101177273502817, "grad_norm": 1.583355836541061, "learning_rate": 7.248384427077745e-05, "loss": 0.2932, "step": 4349 }, { "epoch": 0.3710970824091452, "grad_norm": 1.5387907611273086, "learning_rate": 7.247150373269586e-05, "loss": 0.2691, "step": 4350 }, { "epoch": 0.37118239208326226, "grad_norm": 1.4104370513900426, "learning_rate": 7.245916147903436e-05, "loss": 0.3084, "step": 4351 }, { "epoch": 0.3712677017573793, "grad_norm": 1.280952698244463, "learning_rate": 7.244681751073522e-05, "loss": 0.2908, "step": 4352 }, { "epoch": 0.37135301143149635, "grad_norm": 1.5568179767813544, "learning_rate": 7.243447182874086e-05, "loss": 0.3449, "step": 4353 }, { "epoch": 0.37143832110561337, "grad_norm": 1.506800969448036, "learning_rate": 7.242212443399378e-05, "loss": 0.3706, "step": 4354 }, { "epoch": 0.37152363077973044, "grad_norm": 1.6331521005057155, "learning_rate": 7.240977532743667e-05, "loss": 0.3032, "step": 4355 }, { "epoch": 0.37160894045384746, "grad_norm": 1.5960562232447135, "learning_rate": 7.239742451001228e-05, "loss": 0.3265, "step": 4356 }, { "epoch": 0.37169425012796453, "grad_norm": 1.4463413185297167, "learning_rate": 7.238507198266356e-05, "loss": 0.3393, "step": 4357 }, { "epoch": 0.37177955980208155, "grad_norm": 1.6653974309629103, "learning_rate": 7.237271774633354e-05, "loss": 0.3093, "step": 4358 }, { "epoch": 0.3718648694761986, "grad_norm": 1.1845964270069467, "learning_rate": 7.236036180196541e-05, "loss": 0.2531, "step": 4359 }, { "epoch": 0.37195017915031564, "grad_norm": 1.39197097961276, "learning_rate": 7.234800415050248e-05, "loss": 0.3113, "step": 4360 }, { "epoch": 0.3720354888244327, "grad_norm": 1.83333107196784, "learning_rate": 7.233564479288821e-05, "loss": 0.3093, "step": 4361 }, { "epoch": 0.37212079849854973, "grad_norm": 1.3568999460282598, "learning_rate": 7.232328373006613e-05, "loss": 0.2241, "step": 4362 }, { "epoch": 0.3722061081726668, "grad_norm": 1.4443224841168991, "learning_rate": 7.231092096297995e-05, "loss": 0.3187, "step": 4363 }, { "epoch": 0.3722914178467838, "grad_norm": 1.217793113470004, "learning_rate": 7.229855649257354e-05, "loss": 0.2313, "step": 4364 }, { "epoch": 0.3723767275209009, "grad_norm": 1.5464434551578699, "learning_rate": 7.228619031979083e-05, "loss": 0.3306, "step": 4365 }, { "epoch": 0.3724620371950179, "grad_norm": 1.6947881353521652, "learning_rate": 7.227382244557591e-05, "loss": 0.3548, "step": 4366 }, { "epoch": 0.372547346869135, "grad_norm": 1.3389232799040407, "learning_rate": 7.226145287087302e-05, "loss": 0.3083, "step": 4367 }, { "epoch": 0.372632656543252, "grad_norm": 1.514016310358315, "learning_rate": 7.224908159662649e-05, "loss": 0.2558, "step": 4368 }, { "epoch": 0.372717966217369, "grad_norm": 1.2655962717068876, "learning_rate": 7.223670862378082e-05, "loss": 0.2683, "step": 4369 }, { "epoch": 0.3728032758914861, "grad_norm": 1.5703997991028324, "learning_rate": 7.222433395328062e-05, "loss": 0.2754, "step": 4370 }, { "epoch": 0.3728885855656031, "grad_norm": 1.4558218265130378, "learning_rate": 7.22119575860706e-05, "loss": 0.2653, "step": 4371 }, { "epoch": 0.3729738952397202, "grad_norm": 1.875604277829665, "learning_rate": 7.219957952309567e-05, "loss": 0.2789, "step": 4372 }, { "epoch": 0.3730592049138372, "grad_norm": 1.786855477396208, "learning_rate": 7.21871997653008e-05, "loss": 0.2931, "step": 4373 }, { "epoch": 0.3731445145879543, "grad_norm": 1.5101406327499347, "learning_rate": 7.217481831363115e-05, "loss": 0.316, "step": 4374 }, { "epoch": 0.3732298242620713, "grad_norm": 1.6494376235643289, "learning_rate": 7.216243516903194e-05, "loss": 0.2911, "step": 4375 }, { "epoch": 0.37331513393618837, "grad_norm": 1.7378892778085713, "learning_rate": 7.215005033244857e-05, "loss": 0.3172, "step": 4376 }, { "epoch": 0.3734004436103054, "grad_norm": 1.988786254782528, "learning_rate": 7.213766380482658e-05, "loss": 0.259, "step": 4377 }, { "epoch": 0.37348575328442246, "grad_norm": 1.8848063568299482, "learning_rate": 7.21252755871116e-05, "loss": 0.3313, "step": 4378 }, { "epoch": 0.3735710629585395, "grad_norm": 1.770226954441182, "learning_rate": 7.211288568024937e-05, "loss": 0.3156, "step": 4379 }, { "epoch": 0.37365637263265655, "grad_norm": 1.6639303951927757, "learning_rate": 7.210049408518587e-05, "loss": 0.3007, "step": 4380 }, { "epoch": 0.37374168230677357, "grad_norm": 1.6907247178994895, "learning_rate": 7.208810080286706e-05, "loss": 0.3137, "step": 4381 }, { "epoch": 0.37382699198089064, "grad_norm": 1.7459211498865272, "learning_rate": 7.207570583423915e-05, "loss": 0.3002, "step": 4382 }, { "epoch": 0.37391230165500766, "grad_norm": 1.3160494991663427, "learning_rate": 7.20633091802484e-05, "loss": 0.2817, "step": 4383 }, { "epoch": 0.37399761132912474, "grad_norm": 1.7150094684306092, "learning_rate": 7.205091084184122e-05, "loss": 0.3077, "step": 4384 }, { "epoch": 0.37408292100324175, "grad_norm": 1.7399993292489349, "learning_rate": 7.20385108199642e-05, "loss": 0.3453, "step": 4385 }, { "epoch": 0.3741682306773588, "grad_norm": 1.5081557441546642, "learning_rate": 7.202610911556399e-05, "loss": 0.3255, "step": 4386 }, { "epoch": 0.37425354035147584, "grad_norm": 1.5607486064512994, "learning_rate": 7.201370572958738e-05, "loss": 0.3124, "step": 4387 }, { "epoch": 0.3743388500255929, "grad_norm": 1.5149100278822254, "learning_rate": 7.200130066298133e-05, "loss": 0.3131, "step": 4388 }, { "epoch": 0.37442415969970994, "grad_norm": 1.5569536571625238, "learning_rate": 7.198889391669288e-05, "loss": 0.3046, "step": 4389 }, { "epoch": 0.374509469373827, "grad_norm": 1.2370297337954357, "learning_rate": 7.197648549166924e-05, "loss": 0.2277, "step": 4390 }, { "epoch": 0.374594779047944, "grad_norm": 1.537432276777182, "learning_rate": 7.19640753888577e-05, "loss": 0.313, "step": 4391 }, { "epoch": 0.3746800887220611, "grad_norm": 1.5450121519234288, "learning_rate": 7.195166360920574e-05, "loss": 0.2979, "step": 4392 }, { "epoch": 0.3747653983961781, "grad_norm": 1.3605759675877236, "learning_rate": 7.19392501536609e-05, "loss": 0.2822, "step": 4393 }, { "epoch": 0.3748507080702952, "grad_norm": 1.5706050467447592, "learning_rate": 7.19268350231709e-05, "loss": 0.3367, "step": 4394 }, { "epoch": 0.3749360177444122, "grad_norm": 1.5403130577242727, "learning_rate": 7.191441821868358e-05, "loss": 0.2842, "step": 4395 }, { "epoch": 0.3750213274185293, "grad_norm": 1.540489967727062, "learning_rate": 7.190199974114686e-05, "loss": 0.3651, "step": 4396 }, { "epoch": 0.3751066370926463, "grad_norm": 1.4855013137960515, "learning_rate": 7.188957959150886e-05, "loss": 0.4192, "step": 4397 }, { "epoch": 0.3751919467667634, "grad_norm": 1.3459384308318671, "learning_rate": 7.187715777071777e-05, "loss": 0.2313, "step": 4398 }, { "epoch": 0.3752772564408804, "grad_norm": 1.7325026569648836, "learning_rate": 7.186473427972195e-05, "loss": 0.384, "step": 4399 }, { "epoch": 0.37536256611499746, "grad_norm": 1.4726878195700241, "learning_rate": 7.185230911946984e-05, "loss": 0.3774, "step": 4400 }, { "epoch": 0.3754478757891145, "grad_norm": 1.5450810520429672, "learning_rate": 7.183988229091006e-05, "loss": 0.2972, "step": 4401 }, { "epoch": 0.37553318546323156, "grad_norm": 1.7396280279147585, "learning_rate": 7.182745379499135e-05, "loss": 0.3225, "step": 4402 }, { "epoch": 0.3756184951373486, "grad_norm": 1.157491790379851, "learning_rate": 7.181502363266251e-05, "loss": 0.306, "step": 4403 }, { "epoch": 0.37570380481146565, "grad_norm": 1.5327775303762587, "learning_rate": 7.180259180487254e-05, "loss": 0.3475, "step": 4404 }, { "epoch": 0.37578911448558266, "grad_norm": 1.5953728883916565, "learning_rate": 7.179015831257053e-05, "loss": 0.3538, "step": 4405 }, { "epoch": 0.3758744241596997, "grad_norm": 1.3971353787448542, "learning_rate": 7.177772315670574e-05, "loss": 0.3102, "step": 4406 }, { "epoch": 0.37595973383381676, "grad_norm": 1.1452064995938662, "learning_rate": 7.176528633822752e-05, "loss": 0.253, "step": 4407 }, { "epoch": 0.3760450435079338, "grad_norm": 1.2610253002130467, "learning_rate": 7.175284785808534e-05, "loss": 0.2646, "step": 4408 }, { "epoch": 0.37613035318205085, "grad_norm": 1.703438826178883, "learning_rate": 7.174040771722884e-05, "loss": 0.3087, "step": 4409 }, { "epoch": 0.37621566285616787, "grad_norm": 1.2967114632688255, "learning_rate": 7.172796591660771e-05, "loss": 0.2569, "step": 4410 }, { "epoch": 0.37630097253028494, "grad_norm": 1.3635562739550307, "learning_rate": 7.171552245717186e-05, "loss": 0.324, "step": 4411 }, { "epoch": 0.37638628220440196, "grad_norm": 1.4931130459779185, "learning_rate": 7.170307733987127e-05, "loss": 0.3178, "step": 4412 }, { "epoch": 0.37647159187851903, "grad_norm": 1.27530631331439, "learning_rate": 7.169063056565605e-05, "loss": 0.289, "step": 4413 }, { "epoch": 0.37655690155263605, "grad_norm": 1.5193347951009653, "learning_rate": 7.167818213547646e-05, "loss": 0.3137, "step": 4414 }, { "epoch": 0.3766422112267531, "grad_norm": 1.5425241336758437, "learning_rate": 7.166573205028285e-05, "loss": 0.3323, "step": 4415 }, { "epoch": 0.37672752090087014, "grad_norm": 1.5152090229333743, "learning_rate": 7.165328031102572e-05, "loss": 0.2983, "step": 4416 }, { "epoch": 0.3768128305749872, "grad_norm": 1.228421594268837, "learning_rate": 7.164082691865573e-05, "loss": 0.3226, "step": 4417 }, { "epoch": 0.37689814024910423, "grad_norm": 1.69459166781825, "learning_rate": 7.162837187412356e-05, "loss": 0.3414, "step": 4418 }, { "epoch": 0.3769834499232213, "grad_norm": 1.6350273916300655, "learning_rate": 7.161591517838018e-05, "loss": 0.2798, "step": 4419 }, { "epoch": 0.3770687595973383, "grad_norm": 1.5543809137971947, "learning_rate": 7.160345683237652e-05, "loss": 0.3092, "step": 4420 }, { "epoch": 0.3771540692714554, "grad_norm": 1.39827586683754, "learning_rate": 7.159099683706372e-05, "loss": 0.3071, "step": 4421 }, { "epoch": 0.3772393789455724, "grad_norm": 1.34530425562088, "learning_rate": 7.157853519339306e-05, "loss": 0.2711, "step": 4422 }, { "epoch": 0.3773246886196895, "grad_norm": 1.864786176108398, "learning_rate": 7.156607190231591e-05, "loss": 0.2774, "step": 4423 }, { "epoch": 0.3774099982938065, "grad_norm": 1.5696979287993604, "learning_rate": 7.155360696478377e-05, "loss": 0.3174, "step": 4424 }, { "epoch": 0.3774953079679236, "grad_norm": 1.468408504312652, "learning_rate": 7.154114038174828e-05, "loss": 0.284, "step": 4425 }, { "epoch": 0.3775806176420406, "grad_norm": 1.3508476174831083, "learning_rate": 7.152867215416117e-05, "loss": 0.2679, "step": 4426 }, { "epoch": 0.37766592731615767, "grad_norm": 1.6541776828094772, "learning_rate": 7.151620228297436e-05, "loss": 0.3288, "step": 4427 }, { "epoch": 0.3777512369902747, "grad_norm": 1.3435179931491983, "learning_rate": 7.150373076913986e-05, "loss": 0.2736, "step": 4428 }, { "epoch": 0.37783654666439176, "grad_norm": 1.251651769304745, "learning_rate": 7.149125761360975e-05, "loss": 0.2562, "step": 4429 }, { "epoch": 0.3779218563385088, "grad_norm": 1.6051948852107758, "learning_rate": 7.147878281733634e-05, "loss": 0.3129, "step": 4430 }, { "epoch": 0.37800716601262585, "grad_norm": 1.461095954339367, "learning_rate": 7.1466306381272e-05, "loss": 0.3036, "step": 4431 }, { "epoch": 0.37809247568674287, "grad_norm": 1.481242196750609, "learning_rate": 7.145382830636924e-05, "loss": 0.277, "step": 4432 }, { "epoch": 0.37817778536085994, "grad_norm": 1.44148109469155, "learning_rate": 7.14413485935807e-05, "loss": 0.3121, "step": 4433 }, { "epoch": 0.37826309503497696, "grad_norm": 1.5877467767138853, "learning_rate": 7.142886724385913e-05, "loss": 0.3039, "step": 4434 }, { "epoch": 0.37834840470909403, "grad_norm": 1.3224369991706648, "learning_rate": 7.141638425815743e-05, "loss": 0.2214, "step": 4435 }, { "epoch": 0.37843371438321105, "grad_norm": 1.5261194341675064, "learning_rate": 7.140389963742859e-05, "loss": 0.3006, "step": 4436 }, { "epoch": 0.3785190240573281, "grad_norm": 1.4375679995168449, "learning_rate": 7.139141338262573e-05, "loss": 0.2783, "step": 4437 }, { "epoch": 0.37860433373144514, "grad_norm": 1.1509361851664233, "learning_rate": 7.137892549470218e-05, "loss": 0.277, "step": 4438 }, { "epoch": 0.3786896434055622, "grad_norm": 1.646106681175478, "learning_rate": 7.136643597461124e-05, "loss": 0.2612, "step": 4439 }, { "epoch": 0.37877495307967923, "grad_norm": 1.6403031533037298, "learning_rate": 7.135394482330646e-05, "loss": 0.3087, "step": 4440 }, { "epoch": 0.3788602627537963, "grad_norm": 1.5705839463381501, "learning_rate": 7.134145204174148e-05, "loss": 0.264, "step": 4441 }, { "epoch": 0.3789455724279133, "grad_norm": 1.5962531683762666, "learning_rate": 7.132895763087002e-05, "loss": 0.2907, "step": 4442 }, { "epoch": 0.3790308821020304, "grad_norm": 1.687729360746664, "learning_rate": 7.1316461591646e-05, "loss": 0.3036, "step": 4443 }, { "epoch": 0.3791161917761474, "grad_norm": 1.4084835540439686, "learning_rate": 7.130396392502342e-05, "loss": 0.3232, "step": 4444 }, { "epoch": 0.37920150145026443, "grad_norm": 1.3415673738467055, "learning_rate": 7.129146463195641e-05, "loss": 0.2935, "step": 4445 }, { "epoch": 0.3792868111243815, "grad_norm": 1.4353790392693846, "learning_rate": 7.127896371339921e-05, "loss": 0.2689, "step": 4446 }, { "epoch": 0.3793721207984985, "grad_norm": 1.350416193491896, "learning_rate": 7.126646117030619e-05, "loss": 0.2914, "step": 4447 }, { "epoch": 0.3794574304726156, "grad_norm": 1.4725265648199157, "learning_rate": 7.125395700363189e-05, "loss": 0.3062, "step": 4448 }, { "epoch": 0.3795427401467326, "grad_norm": 1.329915892444083, "learning_rate": 7.124145121433092e-05, "loss": 0.2889, "step": 4449 }, { "epoch": 0.3796280498208497, "grad_norm": 1.6152672170416078, "learning_rate": 7.122894380335799e-05, "loss": 0.2977, "step": 4450 }, { "epoch": 0.3797133594949667, "grad_norm": 1.5836934717412687, "learning_rate": 7.121643477166805e-05, "loss": 0.2538, "step": 4451 }, { "epoch": 0.3797986691690838, "grad_norm": 1.7819698953665164, "learning_rate": 7.120392412021605e-05, "loss": 0.3111, "step": 4452 }, { "epoch": 0.3798839788432008, "grad_norm": 1.949951767936282, "learning_rate": 7.11914118499571e-05, "loss": 0.3208, "step": 4453 }, { "epoch": 0.37996928851731787, "grad_norm": 1.32922452404095, "learning_rate": 7.11788979618465e-05, "loss": 0.34, "step": 4454 }, { "epoch": 0.3800545981914349, "grad_norm": 1.3900845795565544, "learning_rate": 7.116638245683957e-05, "loss": 0.2877, "step": 4455 }, { "epoch": 0.38013990786555196, "grad_norm": 1.8832624222338015, "learning_rate": 7.115386533589181e-05, "loss": 0.3596, "step": 4456 }, { "epoch": 0.380225217539669, "grad_norm": 1.335914031597822, "learning_rate": 7.114134659995886e-05, "loss": 0.2688, "step": 4457 }, { "epoch": 0.38031052721378605, "grad_norm": 1.3490306977214326, "learning_rate": 7.112882624999641e-05, "loss": 0.2631, "step": 4458 }, { "epoch": 0.38039583688790307, "grad_norm": 1.4811324189594515, "learning_rate": 7.111630428696039e-05, "loss": 0.3214, "step": 4459 }, { "epoch": 0.38048114656202014, "grad_norm": 1.4295243967542237, "learning_rate": 7.110378071180674e-05, "loss": 0.2306, "step": 4460 }, { "epoch": 0.38056645623613716, "grad_norm": 1.5315168498547969, "learning_rate": 7.109125552549157e-05, "loss": 0.3155, "step": 4461 }, { "epoch": 0.38065176591025424, "grad_norm": 1.4643071933991942, "learning_rate": 7.107872872897113e-05, "loss": 0.3178, "step": 4462 }, { "epoch": 0.38073707558437125, "grad_norm": 1.28925446901551, "learning_rate": 7.106620032320174e-05, "loss": 0.2684, "step": 4463 }, { "epoch": 0.3808223852584883, "grad_norm": 1.2889682619731742, "learning_rate": 7.105367030913993e-05, "loss": 0.2966, "step": 4464 }, { "epoch": 0.38090769493260535, "grad_norm": 1.3996739808315657, "learning_rate": 7.104113868774225e-05, "loss": 0.2486, "step": 4465 }, { "epoch": 0.3809930046067224, "grad_norm": 1.5988222078744647, "learning_rate": 7.102860545996546e-05, "loss": 0.1741, "step": 4466 }, { "epoch": 0.38107831428083944, "grad_norm": 1.4579466534183845, "learning_rate": 7.101607062676638e-05, "loss": 0.3599, "step": 4467 }, { "epoch": 0.3811636239549565, "grad_norm": 1.386083064987905, "learning_rate": 7.1003534189102e-05, "loss": 0.2516, "step": 4468 }, { "epoch": 0.3812489336290735, "grad_norm": 1.5083421162442658, "learning_rate": 7.099099614792937e-05, "loss": 0.2818, "step": 4469 }, { "epoch": 0.3813342433031906, "grad_norm": 2.0031560791270735, "learning_rate": 7.097845650420574e-05, "loss": 0.3832, "step": 4470 }, { "epoch": 0.3814195529773076, "grad_norm": 1.273001145568356, "learning_rate": 7.096591525888845e-05, "loss": 0.3189, "step": 4471 }, { "epoch": 0.3815048626514247, "grad_norm": 1.3187343939992253, "learning_rate": 7.095337241293493e-05, "loss": 0.2536, "step": 4472 }, { "epoch": 0.3815901723255417, "grad_norm": 1.5737800838876461, "learning_rate": 7.094082796730279e-05, "loss": 0.3236, "step": 4473 }, { "epoch": 0.3816754819996588, "grad_norm": 1.579303716719737, "learning_rate": 7.092828192294968e-05, "loss": 0.2776, "step": 4474 }, { "epoch": 0.3817607916737758, "grad_norm": 1.251548189802904, "learning_rate": 7.091573428083348e-05, "loss": 0.3397, "step": 4475 }, { "epoch": 0.3818461013478929, "grad_norm": 1.2982236904194173, "learning_rate": 7.090318504191211e-05, "loss": 0.2733, "step": 4476 }, { "epoch": 0.3819314110220099, "grad_norm": 1.3205868588313832, "learning_rate": 7.089063420714366e-05, "loss": 0.2733, "step": 4477 }, { "epoch": 0.38201672069612697, "grad_norm": 1.5384484629808992, "learning_rate": 7.087808177748628e-05, "loss": 0.3131, "step": 4478 }, { "epoch": 0.382102030370244, "grad_norm": 1.1459045503621361, "learning_rate": 7.08655277538983e-05, "loss": 0.2406, "step": 4479 }, { "epoch": 0.38218734004436106, "grad_norm": 1.695712978331773, "learning_rate": 7.085297213733816e-05, "loss": 0.3436, "step": 4480 }, { "epoch": 0.3822726497184781, "grad_norm": 1.415216638810336, "learning_rate": 7.084041492876442e-05, "loss": 0.3058, "step": 4481 }, { "epoch": 0.3823579593925951, "grad_norm": 1.4036794692652694, "learning_rate": 7.082785612913573e-05, "loss": 0.2747, "step": 4482 }, { "epoch": 0.38244326906671217, "grad_norm": 1.5035891508230086, "learning_rate": 7.081529573941091e-05, "loss": 0.2802, "step": 4483 }, { "epoch": 0.3825285787408292, "grad_norm": 1.5613064594306942, "learning_rate": 7.08027337605489e-05, "loss": 0.2845, "step": 4484 }, { "epoch": 0.38261388841494626, "grad_norm": 1.4793115285223695, "learning_rate": 7.079017019350866e-05, "loss": 0.2235, "step": 4485 }, { "epoch": 0.3826991980890633, "grad_norm": 1.4909646181172165, "learning_rate": 7.077760503924945e-05, "loss": 0.3162, "step": 4486 }, { "epoch": 0.38278450776318035, "grad_norm": 1.4548060220360375, "learning_rate": 7.076503829873048e-05, "loss": 0.2653, "step": 4487 }, { "epoch": 0.38286981743729737, "grad_norm": 1.6898984465474707, "learning_rate": 7.075246997291119e-05, "loss": 0.2657, "step": 4488 }, { "epoch": 0.38295512711141444, "grad_norm": 1.502906367844182, "learning_rate": 7.073990006275111e-05, "loss": 0.2632, "step": 4489 }, { "epoch": 0.38304043678553146, "grad_norm": 1.5942791826922043, "learning_rate": 7.072732856920983e-05, "loss": 0.4223, "step": 4490 }, { "epoch": 0.38312574645964853, "grad_norm": 1.9349089494126306, "learning_rate": 7.07147554932472e-05, "loss": 0.3131, "step": 4491 }, { "epoch": 0.38321105613376555, "grad_norm": 1.769459165069068, "learning_rate": 7.070218083582306e-05, "loss": 0.3588, "step": 4492 }, { "epoch": 0.3832963658078826, "grad_norm": 1.5582036268683386, "learning_rate": 7.068960459789741e-05, "loss": 0.3227, "step": 4493 }, { "epoch": 0.38338167548199964, "grad_norm": 1.3828058188756547, "learning_rate": 7.06770267804304e-05, "loss": 0.3273, "step": 4494 }, { "epoch": 0.3834669851561167, "grad_norm": 1.563419681731942, "learning_rate": 7.066444738438227e-05, "loss": 0.2464, "step": 4495 }, { "epoch": 0.38355229483023373, "grad_norm": 1.446627127257955, "learning_rate": 7.06518664107134e-05, "loss": 0.3167, "step": 4496 }, { "epoch": 0.3836376045043508, "grad_norm": 1.5659177403677336, "learning_rate": 7.063928386038428e-05, "loss": 0.344, "step": 4497 }, { "epoch": 0.3837229141784678, "grad_norm": 1.3272815494434178, "learning_rate": 7.06266997343555e-05, "loss": 0.3102, "step": 4498 }, { "epoch": 0.3838082238525849, "grad_norm": 1.485232617433028, "learning_rate": 7.061411403358781e-05, "loss": 0.2972, "step": 4499 }, { "epoch": 0.3838935335267019, "grad_norm": 1.3395280382640038, "learning_rate": 7.060152675904206e-05, "loss": 0.2937, "step": 4500 }, { "epoch": 0.383978843200819, "grad_norm": 1.7377499572112904, "learning_rate": 7.058893791167921e-05, "loss": 0.2748, "step": 4501 }, { "epoch": 0.384064152874936, "grad_norm": 1.423485901864935, "learning_rate": 7.057634749246037e-05, "loss": 0.2665, "step": 4502 }, { "epoch": 0.3841494625490531, "grad_norm": 1.62689186887609, "learning_rate": 7.056375550234673e-05, "loss": 0.319, "step": 4503 }, { "epoch": 0.3842347722231701, "grad_norm": 1.4314249456504933, "learning_rate": 7.055116194229964e-05, "loss": 0.2749, "step": 4504 }, { "epoch": 0.38432008189728717, "grad_norm": 1.5113308825434426, "learning_rate": 7.053856681328055e-05, "loss": 0.3101, "step": 4505 }, { "epoch": 0.3844053915714042, "grad_norm": 1.4752007719565419, "learning_rate": 7.052597011625101e-05, "loss": 0.2447, "step": 4506 }, { "epoch": 0.38449070124552126, "grad_norm": 1.3760972413423425, "learning_rate": 7.051337185217273e-05, "loss": 0.372, "step": 4507 }, { "epoch": 0.3845760109196383, "grad_norm": 2.0004767803283325, "learning_rate": 7.050077202200753e-05, "loss": 0.3133, "step": 4508 }, { "epoch": 0.38466132059375535, "grad_norm": 1.6176786552240165, "learning_rate": 7.048817062671732e-05, "loss": 0.3085, "step": 4509 }, { "epoch": 0.38474663026787237, "grad_norm": 1.5237066569121365, "learning_rate": 7.047556766726418e-05, "loss": 0.3332, "step": 4510 }, { "epoch": 0.38483193994198944, "grad_norm": 1.3421003284501412, "learning_rate": 7.046296314461021e-05, "loss": 0.264, "step": 4511 }, { "epoch": 0.38491724961610646, "grad_norm": 1.67283054033511, "learning_rate": 7.045035705971778e-05, "loss": 0.3565, "step": 4512 }, { "epoch": 0.38500255929022353, "grad_norm": 1.4272345673927316, "learning_rate": 7.043774941354925e-05, "loss": 0.3058, "step": 4513 }, { "epoch": 0.38508786896434055, "grad_norm": 1.3820827304185932, "learning_rate": 7.042514020706714e-05, "loss": 0.3155, "step": 4514 }, { "epoch": 0.3851731786384576, "grad_norm": 1.5793080946801872, "learning_rate": 7.041252944123413e-05, "loss": 0.3546, "step": 4515 }, { "epoch": 0.38525848831257464, "grad_norm": 1.6127377534400469, "learning_rate": 7.039991711701298e-05, "loss": 0.2929, "step": 4516 }, { "epoch": 0.3853437979866917, "grad_norm": 1.3065250271466937, "learning_rate": 7.038730323536655e-05, "loss": 0.3197, "step": 4517 }, { "epoch": 0.38542910766080873, "grad_norm": 1.5736861546710388, "learning_rate": 7.037468779725787e-05, "loss": 0.2698, "step": 4518 }, { "epoch": 0.3855144173349258, "grad_norm": 1.2892041215262011, "learning_rate": 7.036207080365003e-05, "loss": 0.2524, "step": 4519 }, { "epoch": 0.3855997270090428, "grad_norm": 1.5455092749365158, "learning_rate": 7.034945225550629e-05, "loss": 0.3421, "step": 4520 }, { "epoch": 0.38568503668315984, "grad_norm": 1.6513798925812262, "learning_rate": 7.033683215379002e-05, "loss": 0.3266, "step": 4521 }, { "epoch": 0.3857703463572769, "grad_norm": 1.3138183149148563, "learning_rate": 7.032421049946467e-05, "loss": 0.2431, "step": 4522 }, { "epoch": 0.38585565603139393, "grad_norm": 1.3271590815574126, "learning_rate": 7.031158729349386e-05, "loss": 0.2885, "step": 4523 }, { "epoch": 0.385940965705511, "grad_norm": 1.3315747952828012, "learning_rate": 7.02989625368413e-05, "loss": 0.3573, "step": 4524 }, { "epoch": 0.386026275379628, "grad_norm": 1.5156007745124165, "learning_rate": 7.02863362304708e-05, "loss": 0.3802, "step": 4525 }, { "epoch": 0.3861115850537451, "grad_norm": 1.5279067774940591, "learning_rate": 7.027370837534635e-05, "loss": 0.2431, "step": 4526 }, { "epoch": 0.3861968947278621, "grad_norm": 1.5311221633798902, "learning_rate": 7.0261078972432e-05, "loss": 0.287, "step": 4527 }, { "epoch": 0.3862822044019792, "grad_norm": 1.7197512397966825, "learning_rate": 7.024844802269193e-05, "loss": 0.3525, "step": 4528 }, { "epoch": 0.3863675140760962, "grad_norm": 1.377543091697106, "learning_rate": 7.023581552709045e-05, "loss": 0.3056, "step": 4529 }, { "epoch": 0.3864528237502133, "grad_norm": 1.5646905421247825, "learning_rate": 7.0223181486592e-05, "loss": 0.2837, "step": 4530 }, { "epoch": 0.3865381334243303, "grad_norm": 1.2348928210434789, "learning_rate": 7.02105459021611e-05, "loss": 0.2498, "step": 4531 }, { "epoch": 0.3866234430984474, "grad_norm": 1.7044107326095346, "learning_rate": 7.019790877476241e-05, "loss": 0.3191, "step": 4532 }, { "epoch": 0.3867087527725644, "grad_norm": 1.533493500443582, "learning_rate": 7.018527010536073e-05, "loss": 0.2377, "step": 4533 }, { "epoch": 0.38679406244668146, "grad_norm": 1.4421556793703096, "learning_rate": 7.017262989492095e-05, "loss": 0.2793, "step": 4534 }, { "epoch": 0.3868793721207985, "grad_norm": 1.316512785542691, "learning_rate": 7.015998814440804e-05, "loss": 0.25, "step": 4535 }, { "epoch": 0.38696468179491555, "grad_norm": 1.2895395956597897, "learning_rate": 7.01473448547872e-05, "loss": 0.2323, "step": 4536 }, { "epoch": 0.3870499914690326, "grad_norm": 1.6074258797318735, "learning_rate": 7.013470002702363e-05, "loss": 0.2545, "step": 4537 }, { "epoch": 0.38713530114314965, "grad_norm": 1.6384351375198285, "learning_rate": 7.012205366208272e-05, "loss": 0.2891, "step": 4538 }, { "epoch": 0.38722061081726666, "grad_norm": 1.887845984068671, "learning_rate": 7.010940576092993e-05, "loss": 0.3353, "step": 4539 }, { "epoch": 0.38730592049138374, "grad_norm": 1.3958496785511645, "learning_rate": 7.009675632453088e-05, "loss": 0.2721, "step": 4540 }, { "epoch": 0.38739123016550076, "grad_norm": 1.6703004642120742, "learning_rate": 7.008410535385126e-05, "loss": 0.283, "step": 4541 }, { "epoch": 0.38747653983961783, "grad_norm": 1.3193126255689345, "learning_rate": 7.007145284985694e-05, "loss": 0.2369, "step": 4542 }, { "epoch": 0.38756184951373485, "grad_norm": 1.4310684622029664, "learning_rate": 7.005879881351384e-05, "loss": 0.2585, "step": 4543 }, { "epoch": 0.3876471591878519, "grad_norm": 1.3851591984802982, "learning_rate": 7.004614324578806e-05, "loss": 0.3339, "step": 4544 }, { "epoch": 0.38773246886196894, "grad_norm": 1.512167063238164, "learning_rate": 7.003348614764575e-05, "loss": 0.2948, "step": 4545 }, { "epoch": 0.387817778536086, "grad_norm": 1.5777215725255826, "learning_rate": 7.002082752005324e-05, "loss": 0.2805, "step": 4546 }, { "epoch": 0.38790308821020303, "grad_norm": 1.3115208015784663, "learning_rate": 7.000816736397695e-05, "loss": 0.3047, "step": 4547 }, { "epoch": 0.3879883978843201, "grad_norm": 1.4947810776321049, "learning_rate": 6.999550568038339e-05, "loss": 0.2952, "step": 4548 }, { "epoch": 0.3880737075584371, "grad_norm": 1.2299786272750706, "learning_rate": 6.998284247023924e-05, "loss": 0.2762, "step": 4549 }, { "epoch": 0.3881590172325542, "grad_norm": 1.4945677302644735, "learning_rate": 6.997017773451124e-05, "loss": 0.281, "step": 4550 }, { "epoch": 0.3882443269066712, "grad_norm": 1.395592123038142, "learning_rate": 6.995751147416632e-05, "loss": 0.291, "step": 4551 }, { "epoch": 0.3883296365807883, "grad_norm": 1.702299845577068, "learning_rate": 6.994484369017143e-05, "loss": 0.2787, "step": 4552 }, { "epoch": 0.3884149462549053, "grad_norm": 1.5073641570014673, "learning_rate": 6.993217438349371e-05, "loss": 0.289, "step": 4553 }, { "epoch": 0.3885002559290224, "grad_norm": 1.8571922248524761, "learning_rate": 6.99195035551004e-05, "loss": 0.354, "step": 4554 }, { "epoch": 0.3885855656031394, "grad_norm": 1.8980786647674177, "learning_rate": 6.990683120595884e-05, "loss": 0.3151, "step": 4555 }, { "epoch": 0.38867087527725647, "grad_norm": 1.3290734887223816, "learning_rate": 6.98941573370365e-05, "loss": 0.2471, "step": 4556 }, { "epoch": 0.3887561849513735, "grad_norm": 2.0068219186706218, "learning_rate": 6.988148194930099e-05, "loss": 0.3674, "step": 4557 }, { "epoch": 0.38884149462549056, "grad_norm": 1.3917257688539573, "learning_rate": 6.986880504371996e-05, "loss": 0.3222, "step": 4558 }, { "epoch": 0.3889268042996076, "grad_norm": 1.5883562407120946, "learning_rate": 6.985612662126125e-05, "loss": 0.2701, "step": 4559 }, { "epoch": 0.3890121139737246, "grad_norm": 1.6173405229346307, "learning_rate": 6.98434466828928e-05, "loss": 0.3168, "step": 4560 }, { "epoch": 0.38909742364784167, "grad_norm": 1.4096693217974976, "learning_rate": 6.983076522958262e-05, "loss": 0.3096, "step": 4561 }, { "epoch": 0.3891827333219587, "grad_norm": 1.7074700321902099, "learning_rate": 6.981808226229892e-05, "loss": 0.316, "step": 4562 }, { "epoch": 0.38926804299607576, "grad_norm": 1.7294541391918639, "learning_rate": 6.980539778200995e-05, "loss": 0.3447, "step": 4563 }, { "epoch": 0.3893533526701928, "grad_norm": 2.016712930111886, "learning_rate": 6.979271178968409e-05, "loss": 0.3429, "step": 4564 }, { "epoch": 0.38943866234430985, "grad_norm": 1.665511597285209, "learning_rate": 6.978002428628987e-05, "loss": 0.2904, "step": 4565 }, { "epoch": 0.38952397201842687, "grad_norm": 1.4408264648173768, "learning_rate": 6.97673352727959e-05, "loss": 0.2827, "step": 4566 }, { "epoch": 0.38960928169254394, "grad_norm": 1.4380485483179404, "learning_rate": 6.975464475017093e-05, "loss": 0.2302, "step": 4567 }, { "epoch": 0.38969459136666096, "grad_norm": 1.301428556476657, "learning_rate": 6.974195271938383e-05, "loss": 0.3175, "step": 4568 }, { "epoch": 0.38977990104077803, "grad_norm": 1.3887090449985435, "learning_rate": 6.972925918140352e-05, "loss": 0.2562, "step": 4569 }, { "epoch": 0.38986521071489505, "grad_norm": 1.2998455909581152, "learning_rate": 6.971656413719912e-05, "loss": 0.293, "step": 4570 }, { "epoch": 0.3899505203890121, "grad_norm": 1.3520040507453412, "learning_rate": 6.970386758773983e-05, "loss": 0.2616, "step": 4571 }, { "epoch": 0.39003583006312914, "grad_norm": 1.2969086424359084, "learning_rate": 6.969116953399496e-05, "loss": 0.2985, "step": 4572 }, { "epoch": 0.3901211397372462, "grad_norm": 1.5802365519502544, "learning_rate": 6.967846997693392e-05, "loss": 0.336, "step": 4573 }, { "epoch": 0.39020644941136323, "grad_norm": 1.377324134137384, "learning_rate": 6.966576891752628e-05, "loss": 0.3164, "step": 4574 }, { "epoch": 0.3902917590854803, "grad_norm": 1.5819099285560505, "learning_rate": 6.965306635674168e-05, "loss": 0.2954, "step": 4575 }, { "epoch": 0.3903770687595973, "grad_norm": 1.153300156768468, "learning_rate": 6.964036229554991e-05, "loss": 0.2948, "step": 4576 }, { "epoch": 0.3904623784337144, "grad_norm": 1.5604367175509557, "learning_rate": 6.962765673492083e-05, "loss": 0.2654, "step": 4577 }, { "epoch": 0.3905476881078314, "grad_norm": 1.490088459435667, "learning_rate": 6.961494967582447e-05, "loss": 0.2871, "step": 4578 }, { "epoch": 0.3906329977819485, "grad_norm": 1.715694555321379, "learning_rate": 6.960224111923093e-05, "loss": 0.3044, "step": 4579 }, { "epoch": 0.3907183074560655, "grad_norm": 1.2394286895372637, "learning_rate": 6.958953106611045e-05, "loss": 0.2372, "step": 4580 }, { "epoch": 0.3908036171301826, "grad_norm": 1.585613649874231, "learning_rate": 6.957681951743338e-05, "loss": 0.3297, "step": 4581 }, { "epoch": 0.3908889268042996, "grad_norm": 1.506513757682751, "learning_rate": 6.956410647417017e-05, "loss": 0.2415, "step": 4582 }, { "epoch": 0.39097423647841667, "grad_norm": 1.2725742901473736, "learning_rate": 6.955139193729139e-05, "loss": 0.2718, "step": 4583 }, { "epoch": 0.3910595461525337, "grad_norm": 1.8934275764977166, "learning_rate": 6.953867590776773e-05, "loss": 0.3761, "step": 4584 }, { "epoch": 0.39114485582665076, "grad_norm": 1.4531075261203663, "learning_rate": 6.952595838656998e-05, "loss": 0.3444, "step": 4585 }, { "epoch": 0.3912301655007678, "grad_norm": 1.6509228090971697, "learning_rate": 6.951323937466909e-05, "loss": 0.3049, "step": 4586 }, { "epoch": 0.39131547517488485, "grad_norm": 1.272620565102642, "learning_rate": 6.950051887303606e-05, "loss": 0.257, "step": 4587 }, { "epoch": 0.39140078484900187, "grad_norm": 1.7904365959712494, "learning_rate": 6.948779688264203e-05, "loss": 0.2748, "step": 4588 }, { "epoch": 0.39148609452311894, "grad_norm": 1.4970478571273336, "learning_rate": 6.947507340445827e-05, "loss": 0.2717, "step": 4589 }, { "epoch": 0.39157140419723596, "grad_norm": 1.373259917182607, "learning_rate": 6.946234843945616e-05, "loss": 0.2315, "step": 4590 }, { "epoch": 0.39165671387135303, "grad_norm": 1.457366168517509, "learning_rate": 6.944962198860715e-05, "loss": 0.2091, "step": 4591 }, { "epoch": 0.39174202354547005, "grad_norm": 1.3971432285380783, "learning_rate": 6.943689405288288e-05, "loss": 0.1899, "step": 4592 }, { "epoch": 0.3918273332195871, "grad_norm": 1.5854819258878023, "learning_rate": 6.942416463325502e-05, "loss": 0.3365, "step": 4593 }, { "epoch": 0.39191264289370414, "grad_norm": 1.69847851995856, "learning_rate": 6.94114337306954e-05, "loss": 0.2851, "step": 4594 }, { "epoch": 0.3919979525678212, "grad_norm": 1.6070201645294004, "learning_rate": 6.939870134617599e-05, "loss": 0.2399, "step": 4595 }, { "epoch": 0.39208326224193824, "grad_norm": 1.56035711211305, "learning_rate": 6.93859674806688e-05, "loss": 0.3152, "step": 4596 }, { "epoch": 0.39216857191605525, "grad_norm": 1.5736167646491517, "learning_rate": 6.937323213514601e-05, "loss": 0.2808, "step": 4597 }, { "epoch": 0.3922538815901723, "grad_norm": 1.4950705433153098, "learning_rate": 6.93604953105799e-05, "loss": 0.3767, "step": 4598 }, { "epoch": 0.39233919126428934, "grad_norm": 1.8536279803069522, "learning_rate": 6.934775700794286e-05, "loss": 0.3353, "step": 4599 }, { "epoch": 0.3924245009384064, "grad_norm": 1.3575979434205159, "learning_rate": 6.933501722820739e-05, "loss": 0.2499, "step": 4600 }, { "epoch": 0.39250981061252344, "grad_norm": 1.3805691037620866, "learning_rate": 6.932227597234609e-05, "loss": 0.2846, "step": 4601 }, { "epoch": 0.3925951202866405, "grad_norm": 1.531683101517452, "learning_rate": 6.930953324133169e-05, "loss": 0.2805, "step": 4602 }, { "epoch": 0.3926804299607575, "grad_norm": 1.3539707580096503, "learning_rate": 6.929678903613705e-05, "loss": 0.2873, "step": 4603 }, { "epoch": 0.3927657396348746, "grad_norm": 1.5910443545715849, "learning_rate": 6.928404335773512e-05, "loss": 0.3399, "step": 4604 }, { "epoch": 0.3928510493089916, "grad_norm": 1.5362417639565131, "learning_rate": 6.927129620709895e-05, "loss": 0.2703, "step": 4605 }, { "epoch": 0.3929363589831087, "grad_norm": 1.4814984205831707, "learning_rate": 6.92585475852017e-05, "loss": 0.3081, "step": 4606 }, { "epoch": 0.3930216686572257, "grad_norm": 1.4779464452536302, "learning_rate": 6.924579749301671e-05, "loss": 0.3213, "step": 4607 }, { "epoch": 0.3931069783313428, "grad_norm": 1.4357899155180385, "learning_rate": 6.923304593151734e-05, "loss": 0.232, "step": 4608 }, { "epoch": 0.3931922880054598, "grad_norm": 1.0475815623355937, "learning_rate": 6.92202929016771e-05, "loss": 0.2211, "step": 4609 }, { "epoch": 0.3932775976795769, "grad_norm": 1.344976885288535, "learning_rate": 6.920753840446968e-05, "loss": 0.2761, "step": 4610 }, { "epoch": 0.3933629073536939, "grad_norm": 1.4498307819939664, "learning_rate": 6.919478244086873e-05, "loss": 0.3459, "step": 4611 }, { "epoch": 0.39344821702781096, "grad_norm": 1.4729667356507057, "learning_rate": 6.918202501184815e-05, "loss": 0.3048, "step": 4612 }, { "epoch": 0.393533526701928, "grad_norm": 1.2964531545623978, "learning_rate": 6.91692661183819e-05, "loss": 0.3007, "step": 4613 }, { "epoch": 0.39361883637604506, "grad_norm": 1.3283965338091153, "learning_rate": 6.915650576144405e-05, "loss": 0.3408, "step": 4614 }, { "epoch": 0.3937041460501621, "grad_norm": 1.463822153529901, "learning_rate": 6.914374394200878e-05, "loss": 0.2488, "step": 4615 }, { "epoch": 0.39378945572427915, "grad_norm": 1.4963961860801878, "learning_rate": 6.91309806610504e-05, "loss": 0.3365, "step": 4616 }, { "epoch": 0.39387476539839616, "grad_norm": 1.4169530672526711, "learning_rate": 6.911821591954328e-05, "loss": 0.2731, "step": 4617 }, { "epoch": 0.39396007507251324, "grad_norm": 1.565964025159736, "learning_rate": 6.910544971846198e-05, "loss": 0.3119, "step": 4618 }, { "epoch": 0.39404538474663026, "grad_norm": 2.190601003735815, "learning_rate": 6.909268205878114e-05, "loss": 0.3299, "step": 4619 }, { "epoch": 0.39413069442074733, "grad_norm": 1.4464501929307045, "learning_rate": 6.907991294147546e-05, "loss": 0.3051, "step": 4620 }, { "epoch": 0.39421600409486435, "grad_norm": 1.4319848951629957, "learning_rate": 6.906714236751983e-05, "loss": 0.219, "step": 4621 }, { "epoch": 0.3943013137689814, "grad_norm": 1.4275950143328886, "learning_rate": 6.90543703378892e-05, "loss": 0.3097, "step": 4622 }, { "epoch": 0.39438662344309844, "grad_norm": 1.742195710988218, "learning_rate": 6.904159685355865e-05, "loss": 0.2867, "step": 4623 }, { "epoch": 0.3944719331172155, "grad_norm": 1.393571924371653, "learning_rate": 6.902882191550337e-05, "loss": 0.2816, "step": 4624 }, { "epoch": 0.39455724279133253, "grad_norm": 1.742296634557364, "learning_rate": 6.901604552469865e-05, "loss": 0.2686, "step": 4625 }, { "epoch": 0.3946425524654496, "grad_norm": 1.476607549070974, "learning_rate": 6.900326768211991e-05, "loss": 0.2896, "step": 4626 }, { "epoch": 0.3947278621395666, "grad_norm": 1.7152135359059182, "learning_rate": 6.899048838874267e-05, "loss": 0.3569, "step": 4627 }, { "epoch": 0.3948131718136837, "grad_norm": 1.8387632011390922, "learning_rate": 6.897770764554255e-05, "loss": 0.317, "step": 4628 }, { "epoch": 0.3948984814878007, "grad_norm": 1.311800179785466, "learning_rate": 6.896492545349529e-05, "loss": 0.2702, "step": 4629 }, { "epoch": 0.3949837911619178, "grad_norm": 1.460611153938112, "learning_rate": 6.895214181357675e-05, "loss": 0.2842, "step": 4630 }, { "epoch": 0.3950691008360348, "grad_norm": 1.6120617115617952, "learning_rate": 6.89393567267629e-05, "loss": 0.2917, "step": 4631 }, { "epoch": 0.3951544105101519, "grad_norm": 1.1927707550283513, "learning_rate": 6.892657019402983e-05, "loss": 0.2976, "step": 4632 }, { "epoch": 0.3952397201842689, "grad_norm": 1.2825498732597815, "learning_rate": 6.891378221635367e-05, "loss": 0.2734, "step": 4633 }, { "epoch": 0.39532502985838597, "grad_norm": 1.5736170676689993, "learning_rate": 6.890099279471076e-05, "loss": 0.2702, "step": 4634 }, { "epoch": 0.395410339532503, "grad_norm": 1.5566537968976903, "learning_rate": 6.888820193007749e-05, "loss": 0.258, "step": 4635 }, { "epoch": 0.39549564920662, "grad_norm": 1.902375771254981, "learning_rate": 6.887540962343037e-05, "loss": 0.3284, "step": 4636 }, { "epoch": 0.3955809588807371, "grad_norm": 1.8539463583831013, "learning_rate": 6.886261587574604e-05, "loss": 0.3052, "step": 4637 }, { "epoch": 0.3956662685548541, "grad_norm": 1.5961864024422563, "learning_rate": 6.88498206880012e-05, "loss": 0.2782, "step": 4638 }, { "epoch": 0.39575157822897117, "grad_norm": 1.132478125320946, "learning_rate": 6.883702406117275e-05, "loss": 0.2434, "step": 4639 }, { "epoch": 0.3958368879030882, "grad_norm": 1.4613461662347391, "learning_rate": 6.88242259962376e-05, "loss": 0.3153, "step": 4640 }, { "epoch": 0.39592219757720526, "grad_norm": 1.5547258381932811, "learning_rate": 6.881142649417281e-05, "loss": 0.3, "step": 4641 }, { "epoch": 0.3960075072513223, "grad_norm": 1.686727276242056, "learning_rate": 6.879862555595559e-05, "loss": 0.2861, "step": 4642 }, { "epoch": 0.39609281692543935, "grad_norm": 1.479333447290605, "learning_rate": 6.87858231825632e-05, "loss": 0.2871, "step": 4643 }, { "epoch": 0.39617812659955637, "grad_norm": 1.6713693157051168, "learning_rate": 6.877301937497302e-05, "loss": 0.2323, "step": 4644 }, { "epoch": 0.39626343627367344, "grad_norm": 2.6435680353837383, "learning_rate": 6.87602141341626e-05, "loss": 0.2764, "step": 4645 }, { "epoch": 0.39634874594779046, "grad_norm": 1.5422445021958167, "learning_rate": 6.874740746110951e-05, "loss": 0.3511, "step": 4646 }, { "epoch": 0.39643405562190753, "grad_norm": 1.403750847997095, "learning_rate": 6.87345993567915e-05, "loss": 0.3002, "step": 4647 }, { "epoch": 0.39651936529602455, "grad_norm": 1.5491902912890552, "learning_rate": 6.872178982218635e-05, "loss": 0.2992, "step": 4648 }, { "epoch": 0.3966046749701416, "grad_norm": 1.4985863063779725, "learning_rate": 6.870897885827206e-05, "loss": 0.267, "step": 4649 }, { "epoch": 0.39668998464425864, "grad_norm": 1.6645671813461298, "learning_rate": 6.869616646602664e-05, "loss": 0.2988, "step": 4650 }, { "epoch": 0.3967752943183757, "grad_norm": 1.502275330666259, "learning_rate": 6.868335264642827e-05, "loss": 0.3, "step": 4651 }, { "epoch": 0.39686060399249273, "grad_norm": 1.2325238217417498, "learning_rate": 6.867053740045521e-05, "loss": 0.2508, "step": 4652 }, { "epoch": 0.3969459136666098, "grad_norm": 1.3103779486357645, "learning_rate": 6.865772072908583e-05, "loss": 0.3048, "step": 4653 }, { "epoch": 0.3970312233407268, "grad_norm": 1.6371840951224823, "learning_rate": 6.864490263329862e-05, "loss": 0.3025, "step": 4654 }, { "epoch": 0.3971165330148439, "grad_norm": 1.287458774684394, "learning_rate": 6.863208311407216e-05, "loss": 0.287, "step": 4655 }, { "epoch": 0.3972018426889609, "grad_norm": 1.7996126632378862, "learning_rate": 6.861926217238519e-05, "loss": 0.3064, "step": 4656 }, { "epoch": 0.397287152363078, "grad_norm": 1.3241550126392145, "learning_rate": 6.860643980921648e-05, "loss": 0.2847, "step": 4657 }, { "epoch": 0.397372462037195, "grad_norm": 1.554806191979148, "learning_rate": 6.859361602554499e-05, "loss": 0.2791, "step": 4658 }, { "epoch": 0.3974577717113121, "grad_norm": 1.275912261421778, "learning_rate": 6.858079082234969e-05, "loss": 0.2596, "step": 4659 }, { "epoch": 0.3975430813854291, "grad_norm": 1.6908249770262418, "learning_rate": 6.856796420060976e-05, "loss": 0.3321, "step": 4660 }, { "epoch": 0.39762839105954617, "grad_norm": 1.4943277079822164, "learning_rate": 6.855513616130445e-05, "loss": 0.2991, "step": 4661 }, { "epoch": 0.3977137007336632, "grad_norm": 1.5328483802881172, "learning_rate": 6.854230670541306e-05, "loss": 0.2956, "step": 4662 }, { "epoch": 0.39779901040778026, "grad_norm": 1.7092325433651747, "learning_rate": 6.852947583391511e-05, "loss": 0.3001, "step": 4663 }, { "epoch": 0.3978843200818973, "grad_norm": 1.3917726217507325, "learning_rate": 6.851664354779015e-05, "loss": 0.2748, "step": 4664 }, { "epoch": 0.39796962975601435, "grad_norm": 1.628888246724926, "learning_rate": 6.850380984801783e-05, "loss": 0.2603, "step": 4665 }, { "epoch": 0.39805493943013137, "grad_norm": 1.8033425079272147, "learning_rate": 6.849097473557798e-05, "loss": 0.2407, "step": 4666 }, { "epoch": 0.39814024910424844, "grad_norm": 1.8081164791092323, "learning_rate": 6.847813821145045e-05, "loss": 0.3652, "step": 4667 }, { "epoch": 0.39822555877836546, "grad_norm": 1.7124288669486774, "learning_rate": 6.84653002766153e-05, "loss": 0.2479, "step": 4668 }, { "epoch": 0.39831086845248254, "grad_norm": 1.5764689301472854, "learning_rate": 6.845246093205256e-05, "loss": 0.3081, "step": 4669 }, { "epoch": 0.39839617812659955, "grad_norm": 1.3440145631324723, "learning_rate": 6.84396201787425e-05, "loss": 0.2867, "step": 4670 }, { "epoch": 0.3984814878007166, "grad_norm": 1.3758896637138014, "learning_rate": 6.842677801766541e-05, "loss": 0.2784, "step": 4671 }, { "epoch": 0.39856679747483365, "grad_norm": 1.5340133178560547, "learning_rate": 6.841393444980177e-05, "loss": 0.334, "step": 4672 }, { "epoch": 0.3986521071489507, "grad_norm": 1.5273972914108909, "learning_rate": 6.840108947613205e-05, "loss": 0.2835, "step": 4673 }, { "epoch": 0.39873741682306774, "grad_norm": 1.2305077440652907, "learning_rate": 6.838824309763696e-05, "loss": 0.2722, "step": 4674 }, { "epoch": 0.39882272649718475, "grad_norm": 1.6475275791621005, "learning_rate": 6.83753953152972e-05, "loss": 0.2862, "step": 4675 }, { "epoch": 0.3989080361713018, "grad_norm": 1.746284218454933, "learning_rate": 6.836254613009367e-05, "loss": 0.2862, "step": 4676 }, { "epoch": 0.39899334584541885, "grad_norm": 1.5733628892681732, "learning_rate": 6.834969554300732e-05, "loss": 0.3365, "step": 4677 }, { "epoch": 0.3990786555195359, "grad_norm": 1.5689946048514956, "learning_rate": 6.833684355501923e-05, "loss": 0.3176, "step": 4678 }, { "epoch": 0.39916396519365294, "grad_norm": 1.3491703981218737, "learning_rate": 6.832399016711058e-05, "loss": 0.2719, "step": 4679 }, { "epoch": 0.39924927486777, "grad_norm": 1.7031189104723774, "learning_rate": 6.831113538026264e-05, "loss": 0.3225, "step": 4680 }, { "epoch": 0.399334584541887, "grad_norm": 1.7788293016868693, "learning_rate": 6.829827919545682e-05, "loss": 0.2748, "step": 4681 }, { "epoch": 0.3994198942160041, "grad_norm": 1.442093682673117, "learning_rate": 6.828542161367462e-05, "loss": 0.2874, "step": 4682 }, { "epoch": 0.3995052038901211, "grad_norm": 1.414656372132328, "learning_rate": 6.827256263589766e-05, "loss": 0.2037, "step": 4683 }, { "epoch": 0.3995905135642382, "grad_norm": 1.4901555791699153, "learning_rate": 6.825970226310762e-05, "loss": 0.2484, "step": 4684 }, { "epoch": 0.3996758232383552, "grad_norm": 1.6049473419345228, "learning_rate": 6.824684049628638e-05, "loss": 0.3102, "step": 4685 }, { "epoch": 0.3997611329124723, "grad_norm": 1.5065416109179481, "learning_rate": 6.82339773364158e-05, "loss": 0.2904, "step": 4686 }, { "epoch": 0.3998464425865893, "grad_norm": 1.72071497173756, "learning_rate": 6.822111278447796e-05, "loss": 0.2971, "step": 4687 }, { "epoch": 0.3999317522607064, "grad_norm": 1.4908493992704064, "learning_rate": 6.820824684145499e-05, "loss": 0.2582, "step": 4688 }, { "epoch": 0.4000170619348234, "grad_norm": 1.540378221165017, "learning_rate": 6.819537950832912e-05, "loss": 0.3105, "step": 4689 }, { "epoch": 0.40010237160894047, "grad_norm": 1.475722624067136, "learning_rate": 6.818251078608273e-05, "loss": 0.2859, "step": 4690 }, { "epoch": 0.4001876812830575, "grad_norm": 1.6429182464775187, "learning_rate": 6.816964067569825e-05, "loss": 0.3022, "step": 4691 }, { "epoch": 0.40027299095717456, "grad_norm": 1.8708256348222843, "learning_rate": 6.815676917815826e-05, "loss": 0.3018, "step": 4692 }, { "epoch": 0.4003583006312916, "grad_norm": 1.681261611387863, "learning_rate": 6.814389629444543e-05, "loss": 0.2971, "step": 4693 }, { "epoch": 0.40044361030540865, "grad_norm": 1.6500267575724208, "learning_rate": 6.813102202554254e-05, "loss": 0.2745, "step": 4694 }, { "epoch": 0.40052891997952567, "grad_norm": 1.38256385421221, "learning_rate": 6.811814637243246e-05, "loss": 0.2487, "step": 4695 }, { "epoch": 0.40061422965364274, "grad_norm": 1.6625298260759604, "learning_rate": 6.810526933609818e-05, "loss": 0.2893, "step": 4696 }, { "epoch": 0.40069953932775976, "grad_norm": 1.527707888228031, "learning_rate": 6.80923909175228e-05, "loss": 0.2719, "step": 4697 }, { "epoch": 0.40078484900187683, "grad_norm": 1.576015308644645, "learning_rate": 6.807951111768952e-05, "loss": 0.241, "step": 4698 }, { "epoch": 0.40087015867599385, "grad_norm": 1.6825287674460996, "learning_rate": 6.806662993758164e-05, "loss": 0.3669, "step": 4699 }, { "epoch": 0.4009554683501109, "grad_norm": 1.8161988498865154, "learning_rate": 6.805374737818257e-05, "loss": 0.3379, "step": 4700 }, { "epoch": 0.40104077802422794, "grad_norm": 1.7608824646006853, "learning_rate": 6.804086344047583e-05, "loss": 0.3706, "step": 4701 }, { "epoch": 0.401126087698345, "grad_norm": 1.6890095211052047, "learning_rate": 6.802797812544502e-05, "loss": 0.3486, "step": 4702 }, { "epoch": 0.40121139737246203, "grad_norm": 1.525512768572268, "learning_rate": 6.80150914340739e-05, "loss": 0.343, "step": 4703 }, { "epoch": 0.4012967070465791, "grad_norm": 1.6695286337975233, "learning_rate": 6.800220336734627e-05, "loss": 0.3034, "step": 4704 }, { "epoch": 0.4013820167206961, "grad_norm": 1.4851168736149685, "learning_rate": 6.798931392624608e-05, "loss": 0.3217, "step": 4705 }, { "epoch": 0.4014673263948132, "grad_norm": 1.5656454850187258, "learning_rate": 6.797642311175736e-05, "loss": 0.2683, "step": 4706 }, { "epoch": 0.4015526360689302, "grad_norm": 1.474514140912765, "learning_rate": 6.796353092486427e-05, "loss": 0.2983, "step": 4707 }, { "epoch": 0.4016379457430473, "grad_norm": 1.5058859737436858, "learning_rate": 6.795063736655104e-05, "loss": 0.3351, "step": 4708 }, { "epoch": 0.4017232554171643, "grad_norm": 1.4680075696651282, "learning_rate": 6.793774243780206e-05, "loss": 0.2918, "step": 4709 }, { "epoch": 0.4018085650912814, "grad_norm": 1.317443710514474, "learning_rate": 6.792484613960175e-05, "loss": 0.2931, "step": 4710 }, { "epoch": 0.4018938747653984, "grad_norm": 1.4555940878154447, "learning_rate": 6.79119484729347e-05, "loss": 0.2497, "step": 4711 }, { "epoch": 0.4019791844395154, "grad_norm": 1.6225142907687375, "learning_rate": 6.789904943878554e-05, "loss": 0.3167, "step": 4712 }, { "epoch": 0.4020644941136325, "grad_norm": 1.3311394085863255, "learning_rate": 6.78861490381391e-05, "loss": 0.2691, "step": 4713 }, { "epoch": 0.4021498037877495, "grad_norm": 1.5026474477615848, "learning_rate": 6.787324727198021e-05, "loss": 0.2736, "step": 4714 }, { "epoch": 0.4022351134618666, "grad_norm": 1.457659465775207, "learning_rate": 6.786034414129388e-05, "loss": 0.2258, "step": 4715 }, { "epoch": 0.4023204231359836, "grad_norm": 1.3390062567384504, "learning_rate": 6.784743964706518e-05, "loss": 0.2586, "step": 4716 }, { "epoch": 0.40240573281010067, "grad_norm": 1.5468754623874783, "learning_rate": 6.783453379027931e-05, "loss": 0.2841, "step": 4717 }, { "epoch": 0.4024910424842177, "grad_norm": 1.9170068632429835, "learning_rate": 6.782162657192154e-05, "loss": 0.3211, "step": 4718 }, { "epoch": 0.40257635215833476, "grad_norm": 1.6549958573027332, "learning_rate": 6.780871799297731e-05, "loss": 0.3336, "step": 4719 }, { "epoch": 0.4026616618324518, "grad_norm": 1.3920109732184394, "learning_rate": 6.779580805443208e-05, "loss": 0.2924, "step": 4720 }, { "epoch": 0.40274697150656885, "grad_norm": 1.2671315210209797, "learning_rate": 6.778289675727149e-05, "loss": 0.3115, "step": 4721 }, { "epoch": 0.40283228118068587, "grad_norm": 1.371751763233006, "learning_rate": 6.776998410248122e-05, "loss": 0.2671, "step": 4722 }, { "epoch": 0.40291759085480294, "grad_norm": 1.5866332383734976, "learning_rate": 6.775707009104708e-05, "loss": 0.2544, "step": 4723 }, { "epoch": 0.40300290052891996, "grad_norm": 1.5288381460983147, "learning_rate": 6.774415472395501e-05, "loss": 0.2396, "step": 4724 }, { "epoch": 0.40308821020303703, "grad_norm": 1.846050056266541, "learning_rate": 6.773123800219103e-05, "loss": 0.3256, "step": 4725 }, { "epoch": 0.40317351987715405, "grad_norm": 1.395396458405523, "learning_rate": 6.771831992674123e-05, "loss": 0.2704, "step": 4726 }, { "epoch": 0.4032588295512711, "grad_norm": 1.3338806648209176, "learning_rate": 6.770540049859188e-05, "loss": 0.2836, "step": 4727 }, { "epoch": 0.40334413922538814, "grad_norm": 1.3227004177337123, "learning_rate": 6.769247971872927e-05, "loss": 0.2709, "step": 4728 }, { "epoch": 0.4034294488995052, "grad_norm": 1.2198057248255307, "learning_rate": 6.767955758813986e-05, "loss": 0.2184, "step": 4729 }, { "epoch": 0.40351475857362223, "grad_norm": 1.604279311877894, "learning_rate": 6.766663410781019e-05, "loss": 0.2834, "step": 4730 }, { "epoch": 0.4036000682477393, "grad_norm": 1.4499869970856736, "learning_rate": 6.765370927872687e-05, "loss": 0.275, "step": 4731 }, { "epoch": 0.4036853779218563, "grad_norm": 1.4083872342751873, "learning_rate": 6.764078310187668e-05, "loss": 0.261, "step": 4732 }, { "epoch": 0.4037706875959734, "grad_norm": 1.468926926365788, "learning_rate": 6.76278555782464e-05, "loss": 0.3218, "step": 4733 }, { "epoch": 0.4038559972700904, "grad_norm": 1.5468175087465845, "learning_rate": 6.761492670882306e-05, "loss": 0.2597, "step": 4734 }, { "epoch": 0.4039413069442075, "grad_norm": 1.098861304364317, "learning_rate": 6.760199649459366e-05, "loss": 0.2649, "step": 4735 }, { "epoch": 0.4040266166183245, "grad_norm": 1.432545125094113, "learning_rate": 6.758906493654535e-05, "loss": 0.2889, "step": 4736 }, { "epoch": 0.4041119262924416, "grad_norm": 1.4892349991848974, "learning_rate": 6.757613203566542e-05, "loss": 0.2535, "step": 4737 }, { "epoch": 0.4041972359665586, "grad_norm": 1.6863720267605191, "learning_rate": 6.75631977929412e-05, "loss": 0.33, "step": 4738 }, { "epoch": 0.4042825456406757, "grad_norm": 1.4677454171009277, "learning_rate": 6.755026220936016e-05, "loss": 0.259, "step": 4739 }, { "epoch": 0.4043678553147927, "grad_norm": 1.7032873793721703, "learning_rate": 6.753732528590986e-05, "loss": 0.2225, "step": 4740 }, { "epoch": 0.40445316498890976, "grad_norm": 1.4061490340544598, "learning_rate": 6.752438702357797e-05, "loss": 0.2872, "step": 4741 }, { "epoch": 0.4045384746630268, "grad_norm": 1.6354133298155482, "learning_rate": 6.751144742335227e-05, "loss": 0.3345, "step": 4742 }, { "epoch": 0.40462378433714385, "grad_norm": 1.6193790053241155, "learning_rate": 6.749850648622061e-05, "loss": 0.2773, "step": 4743 }, { "epoch": 0.4047090940112609, "grad_norm": 1.6087996556251798, "learning_rate": 6.748556421317094e-05, "loss": 0.2972, "step": 4744 }, { "epoch": 0.40479440368537795, "grad_norm": 1.525580517699773, "learning_rate": 6.747262060519139e-05, "loss": 0.2971, "step": 4745 }, { "epoch": 0.40487971335949496, "grad_norm": 1.4003914336943053, "learning_rate": 6.745967566327009e-05, "loss": 0.2378, "step": 4746 }, { "epoch": 0.40496502303361204, "grad_norm": 1.6362124732065841, "learning_rate": 6.744672938839534e-05, "loss": 0.3152, "step": 4747 }, { "epoch": 0.40505033270772905, "grad_norm": 1.4131028939516943, "learning_rate": 6.743378178155551e-05, "loss": 0.2627, "step": 4748 }, { "epoch": 0.40513564238184613, "grad_norm": 1.6917404302791732, "learning_rate": 6.742083284373907e-05, "loss": 0.3055, "step": 4749 }, { "epoch": 0.40522095205596315, "grad_norm": 1.64134621889421, "learning_rate": 6.740788257593463e-05, "loss": 0.2539, "step": 4750 }, { "epoch": 0.40530626173008016, "grad_norm": 1.3712801466960194, "learning_rate": 6.739493097913088e-05, "loss": 0.2686, "step": 4751 }, { "epoch": 0.40539157140419724, "grad_norm": 1.81781679574932, "learning_rate": 6.738197805431657e-05, "loss": 0.3294, "step": 4752 }, { "epoch": 0.40547688107831426, "grad_norm": 1.3130989524585548, "learning_rate": 6.73690238024806e-05, "loss": 0.2972, "step": 4753 }, { "epoch": 0.40556219075243133, "grad_norm": 1.3554860754307458, "learning_rate": 6.735606822461195e-05, "loss": 0.2533, "step": 4754 }, { "epoch": 0.40564750042654835, "grad_norm": 1.3367435633609805, "learning_rate": 6.734311132169974e-05, "loss": 0.3049, "step": 4755 }, { "epoch": 0.4057328101006654, "grad_norm": 1.5774879302758598, "learning_rate": 6.733015309473313e-05, "loss": 0.2941, "step": 4756 }, { "epoch": 0.40581811977478244, "grad_norm": 1.3348080952903854, "learning_rate": 6.731719354470143e-05, "loss": 0.3004, "step": 4757 }, { "epoch": 0.4059034294488995, "grad_norm": 1.6472083838261977, "learning_rate": 6.730423267259402e-05, "loss": 0.2782, "step": 4758 }, { "epoch": 0.40598873912301653, "grad_norm": 1.3219518472324805, "learning_rate": 6.729127047940042e-05, "loss": 0.2912, "step": 4759 }, { "epoch": 0.4060740487971336, "grad_norm": 1.2290556528854621, "learning_rate": 6.727830696611018e-05, "loss": 0.2489, "step": 4760 }, { "epoch": 0.4061593584712506, "grad_norm": 1.4525354481506312, "learning_rate": 6.726534213371304e-05, "loss": 0.2752, "step": 4761 }, { "epoch": 0.4062446681453677, "grad_norm": 1.438487584420817, "learning_rate": 6.725237598319877e-05, "loss": 0.2765, "step": 4762 }, { "epoch": 0.4063299778194847, "grad_norm": 1.5139714295325781, "learning_rate": 6.723940851555726e-05, "loss": 0.2844, "step": 4763 }, { "epoch": 0.4064152874936018, "grad_norm": 1.7191551424578038, "learning_rate": 6.722643973177855e-05, "loss": 0.284, "step": 4764 }, { "epoch": 0.4065005971677188, "grad_norm": 1.586272706935678, "learning_rate": 6.721346963285266e-05, "loss": 0.2751, "step": 4765 }, { "epoch": 0.4065859068418359, "grad_norm": 1.4115030298041737, "learning_rate": 6.720049821976988e-05, "loss": 0.2557, "step": 4766 }, { "epoch": 0.4066712165159529, "grad_norm": 1.3294349941674626, "learning_rate": 6.718752549352045e-05, "loss": 0.2977, "step": 4767 }, { "epoch": 0.40675652619006997, "grad_norm": 1.2977928682234485, "learning_rate": 6.717455145509477e-05, "loss": 0.2393, "step": 4768 }, { "epoch": 0.406841835864187, "grad_norm": 1.5159516916454125, "learning_rate": 6.716157610548338e-05, "loss": 0.3351, "step": 4769 }, { "epoch": 0.40692714553830406, "grad_norm": 1.4790604871151443, "learning_rate": 6.714859944567681e-05, "loss": 0.2487, "step": 4770 }, { "epoch": 0.4070124552124211, "grad_norm": 1.4105449362746516, "learning_rate": 6.713562147666584e-05, "loss": 0.31, "step": 4771 }, { "epoch": 0.40709776488653815, "grad_norm": 2.0355809916212393, "learning_rate": 6.71226421994412e-05, "loss": 0.3558, "step": 4772 }, { "epoch": 0.40718307456065517, "grad_norm": 1.4466630553861157, "learning_rate": 6.710966161499384e-05, "loss": 0.2745, "step": 4773 }, { "epoch": 0.40726838423477224, "grad_norm": 1.6573021623657533, "learning_rate": 6.709667972431473e-05, "loss": 0.2535, "step": 4774 }, { "epoch": 0.40735369390888926, "grad_norm": 1.6361980474863502, "learning_rate": 6.708369652839497e-05, "loss": 0.2279, "step": 4775 }, { "epoch": 0.40743900358300633, "grad_norm": 1.479941563612484, "learning_rate": 6.707071202822575e-05, "loss": 0.2904, "step": 4776 }, { "epoch": 0.40752431325712335, "grad_norm": 1.390658603219355, "learning_rate": 6.70577262247984e-05, "loss": 0.3065, "step": 4777 }, { "epoch": 0.4076096229312404, "grad_norm": 1.517193484073549, "learning_rate": 6.704473911910428e-05, "loss": 0.2586, "step": 4778 }, { "epoch": 0.40769493260535744, "grad_norm": 1.559003656545759, "learning_rate": 6.703175071213493e-05, "loss": 0.2663, "step": 4779 }, { "epoch": 0.4077802422794745, "grad_norm": 1.339472683162747, "learning_rate": 6.701876100488189e-05, "loss": 0.2897, "step": 4780 }, { "epoch": 0.40786555195359153, "grad_norm": 1.6597126446620065, "learning_rate": 6.70057699983369e-05, "loss": 0.312, "step": 4781 }, { "epoch": 0.4079508616277086, "grad_norm": 1.6333853706841626, "learning_rate": 6.699277769349174e-05, "loss": 0.335, "step": 4782 }, { "epoch": 0.4080361713018256, "grad_norm": 1.3595979880360427, "learning_rate": 6.697978409133831e-05, "loss": 0.2774, "step": 4783 }, { "epoch": 0.4081214809759427, "grad_norm": 1.650871468645439, "learning_rate": 6.696678919286859e-05, "loss": 0.2893, "step": 4784 }, { "epoch": 0.4082067906500597, "grad_norm": 1.720100565787041, "learning_rate": 6.695379299907467e-05, "loss": 0.2992, "step": 4785 }, { "epoch": 0.4082921003241768, "grad_norm": 1.3034881287104796, "learning_rate": 6.694079551094873e-05, "loss": 0.3235, "step": 4786 }, { "epoch": 0.4083774099982938, "grad_norm": 1.4332083618035119, "learning_rate": 6.69277967294831e-05, "loss": 0.3069, "step": 4787 }, { "epoch": 0.4084627196724109, "grad_norm": 1.6453404553711097, "learning_rate": 6.691479665567015e-05, "loss": 0.2907, "step": 4788 }, { "epoch": 0.4085480293465279, "grad_norm": 1.4563808267791303, "learning_rate": 6.690179529050235e-05, "loss": 0.2546, "step": 4789 }, { "epoch": 0.4086333390206449, "grad_norm": 1.7629975881316617, "learning_rate": 6.688879263497229e-05, "loss": 0.2713, "step": 4790 }, { "epoch": 0.408718648694762, "grad_norm": 1.5129243050629935, "learning_rate": 6.687578869007267e-05, "loss": 0.3001, "step": 4791 }, { "epoch": 0.408803958368879, "grad_norm": 1.5852886808302782, "learning_rate": 6.686278345679625e-05, "loss": 0.2835, "step": 4792 }, { "epoch": 0.4088892680429961, "grad_norm": 1.4588586224514366, "learning_rate": 6.684977693613593e-05, "loss": 0.299, "step": 4793 }, { "epoch": 0.4089745777171131, "grad_norm": 1.231377160336562, "learning_rate": 6.683676912908469e-05, "loss": 0.2735, "step": 4794 }, { "epoch": 0.40905988739123017, "grad_norm": 1.669601249030669, "learning_rate": 6.682376003663559e-05, "loss": 0.2566, "step": 4795 }, { "epoch": 0.4091451970653472, "grad_norm": 1.4045588178748063, "learning_rate": 6.681074965978181e-05, "loss": 0.2497, "step": 4796 }, { "epoch": 0.40923050673946426, "grad_norm": 1.5610400436367815, "learning_rate": 6.679773799951662e-05, "loss": 0.2712, "step": 4797 }, { "epoch": 0.4093158164135813, "grad_norm": 1.3994277772411083, "learning_rate": 6.67847250568334e-05, "loss": 0.3067, "step": 4798 }, { "epoch": 0.40940112608769835, "grad_norm": 1.6934813705199043, "learning_rate": 6.677171083272562e-05, "loss": 0.2866, "step": 4799 }, { "epoch": 0.40948643576181537, "grad_norm": 1.5171451613768165, "learning_rate": 6.675869532818683e-05, "loss": 0.2775, "step": 4800 }, { "epoch": 0.40957174543593244, "grad_norm": 1.575198036943552, "learning_rate": 6.674567854421073e-05, "loss": 0.3057, "step": 4801 }, { "epoch": 0.40965705511004946, "grad_norm": 1.6551723933254163, "learning_rate": 6.673266048179103e-05, "loss": 0.3136, "step": 4802 }, { "epoch": 0.40974236478416654, "grad_norm": 1.7257018554992303, "learning_rate": 6.671964114192164e-05, "loss": 0.2594, "step": 4803 }, { "epoch": 0.40982767445828355, "grad_norm": 1.2906795647196054, "learning_rate": 6.670662052559649e-05, "loss": 0.331, "step": 4804 }, { "epoch": 0.4099129841324006, "grad_norm": 1.4877199731590542, "learning_rate": 6.669359863380964e-05, "loss": 0.2822, "step": 4805 }, { "epoch": 0.40999829380651764, "grad_norm": 1.6376371515509076, "learning_rate": 6.668057546755526e-05, "loss": 0.2949, "step": 4806 }, { "epoch": 0.4100836034806347, "grad_norm": 1.6680255674739966, "learning_rate": 6.666755102782758e-05, "loss": 0.2584, "step": 4807 }, { "epoch": 0.41016891315475174, "grad_norm": 1.2645501647235977, "learning_rate": 6.665452531562093e-05, "loss": 0.2581, "step": 4808 }, { "epoch": 0.4102542228288688, "grad_norm": 1.4702913840193408, "learning_rate": 6.66414983319298e-05, "loss": 0.2963, "step": 4809 }, { "epoch": 0.4103395325029858, "grad_norm": 1.575541279137308, "learning_rate": 6.662847007774869e-05, "loss": 0.3006, "step": 4810 }, { "epoch": 0.4104248421771029, "grad_norm": 1.4026066950640559, "learning_rate": 6.661544055407225e-05, "loss": 0.3007, "step": 4811 }, { "epoch": 0.4105101518512199, "grad_norm": 1.2258523233595118, "learning_rate": 6.660240976189523e-05, "loss": 0.2369, "step": 4812 }, { "epoch": 0.410595461525337, "grad_norm": 1.6342489686297144, "learning_rate": 6.658937770221242e-05, "loss": 0.3084, "step": 4813 }, { "epoch": 0.410680771199454, "grad_norm": 1.4188835618929705, "learning_rate": 6.657634437601881e-05, "loss": 0.2622, "step": 4814 }, { "epoch": 0.4107660808735711, "grad_norm": 1.348456634350364, "learning_rate": 6.656330978430939e-05, "loss": 0.2277, "step": 4815 }, { "epoch": 0.4108513905476881, "grad_norm": 1.2839683910056208, "learning_rate": 6.65502739280793e-05, "loss": 0.2366, "step": 4816 }, { "epoch": 0.4109367002218052, "grad_norm": 1.514936467717608, "learning_rate": 6.653723680832371e-05, "loss": 0.2776, "step": 4817 }, { "epoch": 0.4110220098959222, "grad_norm": 1.6281571163590254, "learning_rate": 6.652419842603797e-05, "loss": 0.2781, "step": 4818 }, { "epoch": 0.41110731957003926, "grad_norm": 1.88539794434864, "learning_rate": 6.651115878221752e-05, "loss": 0.3939, "step": 4819 }, { "epoch": 0.4111926292441563, "grad_norm": 1.6162540856447674, "learning_rate": 6.649811787785781e-05, "loss": 0.2993, "step": 4820 }, { "epoch": 0.41127793891827336, "grad_norm": 1.5074825419969742, "learning_rate": 6.648507571395449e-05, "loss": 0.3057, "step": 4821 }, { "epoch": 0.4113632485923904, "grad_norm": 2.197790301940499, "learning_rate": 6.647203229150322e-05, "loss": 0.2711, "step": 4822 }, { "epoch": 0.41144855826650745, "grad_norm": 1.6507823592045492, "learning_rate": 6.645898761149982e-05, "loss": 0.3134, "step": 4823 }, { "epoch": 0.41153386794062446, "grad_norm": 1.4879250087555818, "learning_rate": 6.644594167494019e-05, "loss": 0.2409, "step": 4824 }, { "epoch": 0.41161917761474154, "grad_norm": 2.0705843764953813, "learning_rate": 6.643289448282031e-05, "loss": 0.2797, "step": 4825 }, { "epoch": 0.41170448728885856, "grad_norm": 1.626970563522614, "learning_rate": 6.641984603613625e-05, "loss": 0.2932, "step": 4826 }, { "epoch": 0.4117897969629756, "grad_norm": 1.6510548716437219, "learning_rate": 6.640679633588421e-05, "loss": 0.3052, "step": 4827 }, { "epoch": 0.41187510663709265, "grad_norm": 1.436554473396467, "learning_rate": 6.639374538306046e-05, "loss": 0.2885, "step": 4828 }, { "epoch": 0.41196041631120967, "grad_norm": 1.604262221161558, "learning_rate": 6.638069317866135e-05, "loss": 0.2546, "step": 4829 }, { "epoch": 0.41204572598532674, "grad_norm": 1.6879276157696694, "learning_rate": 6.636763972368337e-05, "loss": 0.2601, "step": 4830 }, { "epoch": 0.41213103565944376, "grad_norm": 1.2485497645007244, "learning_rate": 6.635458501912307e-05, "loss": 0.2158, "step": 4831 }, { "epoch": 0.41221634533356083, "grad_norm": 1.522920804555188, "learning_rate": 6.63415290659771e-05, "loss": 0.2949, "step": 4832 }, { "epoch": 0.41230165500767785, "grad_norm": 1.7777541808045803, "learning_rate": 6.632847186524225e-05, "loss": 0.3476, "step": 4833 }, { "epoch": 0.4123869646817949, "grad_norm": 1.3738818390524794, "learning_rate": 6.631541341791533e-05, "loss": 0.2571, "step": 4834 }, { "epoch": 0.41247227435591194, "grad_norm": 1.4469817548549313, "learning_rate": 6.63023537249933e-05, "loss": 0.3236, "step": 4835 }, { "epoch": 0.412557584030029, "grad_norm": 1.354891959383992, "learning_rate": 6.62892927874732e-05, "loss": 0.2897, "step": 4836 }, { "epoch": 0.41264289370414603, "grad_norm": 1.6598351022099516, "learning_rate": 6.627623060635214e-05, "loss": 0.2653, "step": 4837 }, { "epoch": 0.4127282033782631, "grad_norm": 1.5061981572136998, "learning_rate": 6.626316718262737e-05, "loss": 0.2547, "step": 4838 }, { "epoch": 0.4128135130523801, "grad_norm": 1.705779710171263, "learning_rate": 6.62501025172962e-05, "loss": 0.3946, "step": 4839 }, { "epoch": 0.4128988227264972, "grad_norm": 1.5386128809655613, "learning_rate": 6.623703661135609e-05, "loss": 0.3108, "step": 4840 }, { "epoch": 0.4129841324006142, "grad_norm": 1.437040836144225, "learning_rate": 6.622396946580449e-05, "loss": 0.282, "step": 4841 }, { "epoch": 0.4130694420747313, "grad_norm": 1.69877004728074, "learning_rate": 6.621090108163904e-05, "loss": 0.369, "step": 4842 }, { "epoch": 0.4131547517488483, "grad_norm": 1.9205416884819932, "learning_rate": 6.619783145985743e-05, "loss": 0.292, "step": 4843 }, { "epoch": 0.4132400614229654, "grad_norm": 1.5724037630989578, "learning_rate": 6.618476060145747e-05, "loss": 0.2963, "step": 4844 }, { "epoch": 0.4133253710970824, "grad_norm": 1.8058168914966501, "learning_rate": 6.617168850743704e-05, "loss": 0.3426, "step": 4845 }, { "epoch": 0.41341068077119947, "grad_norm": 1.2368497546579917, "learning_rate": 6.615861517879414e-05, "loss": 0.304, "step": 4846 }, { "epoch": 0.4134959904453165, "grad_norm": 1.578098183583461, "learning_rate": 6.614554061652683e-05, "loss": 0.2585, "step": 4847 }, { "epoch": 0.41358130011943356, "grad_norm": 1.34293562585836, "learning_rate": 6.61324648216333e-05, "loss": 0.3155, "step": 4848 }, { "epoch": 0.4136666097935506, "grad_norm": 1.4355705414981563, "learning_rate": 6.61193877951118e-05, "loss": 0.3067, "step": 4849 }, { "epoch": 0.41375191946766765, "grad_norm": 1.2571726053916839, "learning_rate": 6.61063095379607e-05, "loss": 0.2296, "step": 4850 }, { "epoch": 0.41383722914178467, "grad_norm": 1.242346459540386, "learning_rate": 6.609323005117846e-05, "loss": 0.2694, "step": 4851 }, { "epoch": 0.41392253881590174, "grad_norm": 1.4193528831166418, "learning_rate": 6.608014933576362e-05, "loss": 0.2843, "step": 4852 }, { "epoch": 0.41400784849001876, "grad_norm": 1.235074147071974, "learning_rate": 6.606706739271482e-05, "loss": 0.245, "step": 4853 }, { "epoch": 0.41409315816413583, "grad_norm": 1.4270393568999271, "learning_rate": 6.605398422303082e-05, "loss": 0.2657, "step": 4854 }, { "epoch": 0.41417846783825285, "grad_norm": 1.6997278612762343, "learning_rate": 6.604089982771043e-05, "loss": 0.2971, "step": 4855 }, { "epoch": 0.4142637775123699, "grad_norm": 1.380924984076075, "learning_rate": 6.602781420775258e-05, "loss": 0.249, "step": 4856 }, { "epoch": 0.41434908718648694, "grad_norm": 1.3078091674908072, "learning_rate": 6.601472736415629e-05, "loss": 0.2564, "step": 4857 }, { "epoch": 0.414434396860604, "grad_norm": 1.5479485995805413, "learning_rate": 6.600163929792067e-05, "loss": 0.2998, "step": 4858 }, { "epoch": 0.41451970653472103, "grad_norm": 1.50136583451505, "learning_rate": 6.598855001004492e-05, "loss": 0.2487, "step": 4859 }, { "epoch": 0.4146050162088381, "grad_norm": 1.5002971990528542, "learning_rate": 6.597545950152833e-05, "loss": 0.302, "step": 4860 }, { "epoch": 0.4146903258829551, "grad_norm": 1.1430056041269763, "learning_rate": 6.59623677733703e-05, "loss": 0.2417, "step": 4861 }, { "epoch": 0.4147756355570722, "grad_norm": 1.8097241949209082, "learning_rate": 6.594927482657033e-05, "loss": 0.3001, "step": 4862 }, { "epoch": 0.4148609452311892, "grad_norm": 1.677204372547872, "learning_rate": 6.593618066212797e-05, "loss": 0.3559, "step": 4863 }, { "epoch": 0.4149462549053063, "grad_norm": 1.52064218582746, "learning_rate": 6.59230852810429e-05, "loss": 0.2676, "step": 4864 }, { "epoch": 0.4150315645794233, "grad_norm": 1.3643279770957948, "learning_rate": 6.59099886843149e-05, "loss": 0.2371, "step": 4865 }, { "epoch": 0.4151168742535403, "grad_norm": 1.8213034621582234, "learning_rate": 6.589689087294378e-05, "loss": 0.3238, "step": 4866 }, { "epoch": 0.4152021839276574, "grad_norm": 1.551464631680959, "learning_rate": 6.588379184792954e-05, "loss": 0.3016, "step": 4867 }, { "epoch": 0.4152874936017744, "grad_norm": 1.7703638539016255, "learning_rate": 6.587069161027219e-05, "loss": 0.274, "step": 4868 }, { "epoch": 0.4153728032758915, "grad_norm": 1.4515022322808058, "learning_rate": 6.585759016097188e-05, "loss": 0.3483, "step": 4869 }, { "epoch": 0.4154581129500085, "grad_norm": 1.242068446950036, "learning_rate": 6.584448750102883e-05, "loss": 0.2549, "step": 4870 }, { "epoch": 0.4155434226241256, "grad_norm": 1.2866226225234039, "learning_rate": 6.583138363144334e-05, "loss": 0.2698, "step": 4871 }, { "epoch": 0.4156287322982426, "grad_norm": 1.4957710097283277, "learning_rate": 6.581827855321587e-05, "loss": 0.2565, "step": 4872 }, { "epoch": 0.41571404197235967, "grad_norm": 1.4714851883697895, "learning_rate": 6.580517226734686e-05, "loss": 0.2576, "step": 4873 }, { "epoch": 0.4157993516464767, "grad_norm": 1.6357711926066456, "learning_rate": 6.579206477483695e-05, "loss": 0.2418, "step": 4874 }, { "epoch": 0.41588466132059376, "grad_norm": 1.2188766120287349, "learning_rate": 6.57789560766868e-05, "loss": 0.285, "step": 4875 }, { "epoch": 0.4159699709947108, "grad_norm": 1.964607725130225, "learning_rate": 6.57658461738972e-05, "loss": 0.346, "step": 4876 }, { "epoch": 0.41605528066882785, "grad_norm": 1.2505544386541019, "learning_rate": 6.575273506746905e-05, "loss": 0.3054, "step": 4877 }, { "epoch": 0.41614059034294487, "grad_norm": 1.7286863770710394, "learning_rate": 6.573962275840328e-05, "loss": 0.2767, "step": 4878 }, { "epoch": 0.41622590001706194, "grad_norm": 1.4914710436934489, "learning_rate": 6.572650924770093e-05, "loss": 0.295, "step": 4879 }, { "epoch": 0.41631120969117896, "grad_norm": 1.4868991471743462, "learning_rate": 6.57133945363632e-05, "loss": 0.2293, "step": 4880 }, { "epoch": 0.41639651936529604, "grad_norm": 1.5796469723778568, "learning_rate": 6.570027862539128e-05, "loss": 0.3075, "step": 4881 }, { "epoch": 0.41648182903941305, "grad_norm": 1.4883498929431387, "learning_rate": 6.568716151578653e-05, "loss": 0.2743, "step": 4882 }, { "epoch": 0.4165671387135301, "grad_norm": 1.5247221396534563, "learning_rate": 6.567404320855035e-05, "loss": 0.3543, "step": 4883 }, { "epoch": 0.41665244838764715, "grad_norm": 1.6760623580512068, "learning_rate": 6.566092370468427e-05, "loss": 0.3127, "step": 4884 }, { "epoch": 0.4167377580617642, "grad_norm": 1.3410004274622203, "learning_rate": 6.564780300518987e-05, "loss": 0.3151, "step": 4885 }, { "epoch": 0.41682306773588124, "grad_norm": 1.5382114904113149, "learning_rate": 6.563468111106889e-05, "loss": 0.2797, "step": 4886 }, { "epoch": 0.4169083774099983, "grad_norm": 1.4165280592186344, "learning_rate": 6.562155802332307e-05, "loss": 0.2531, "step": 4887 }, { "epoch": 0.4169936870841153, "grad_norm": 1.3878260530094775, "learning_rate": 6.56084337429543e-05, "loss": 0.2573, "step": 4888 }, { "epoch": 0.4170789967582324, "grad_norm": 1.6602662532489318, "learning_rate": 6.559530827096457e-05, "loss": 0.3202, "step": 4889 }, { "epoch": 0.4171643064323494, "grad_norm": 1.7286308638095118, "learning_rate": 6.558218160835594e-05, "loss": 0.2659, "step": 4890 }, { "epoch": 0.4172496161064665, "grad_norm": 1.6611686940385408, "learning_rate": 6.556905375613054e-05, "loss": 0.2434, "step": 4891 }, { "epoch": 0.4173349257805835, "grad_norm": 1.450972901536218, "learning_rate": 6.555592471529059e-05, "loss": 0.2797, "step": 4892 }, { "epoch": 0.4174202354547006, "grad_norm": 1.334855025917071, "learning_rate": 6.554279448683849e-05, "loss": 0.2391, "step": 4893 }, { "epoch": 0.4175055451288176, "grad_norm": 1.7012804874824794, "learning_rate": 6.552966307177662e-05, "loss": 0.2751, "step": 4894 }, { "epoch": 0.4175908548029347, "grad_norm": 1.4660280043671141, "learning_rate": 6.551653047110747e-05, "loss": 0.2839, "step": 4895 }, { "epoch": 0.4176761644770517, "grad_norm": 1.5146605579080659, "learning_rate": 6.550339668583369e-05, "loss": 0.3005, "step": 4896 }, { "epoch": 0.41776147415116877, "grad_norm": 1.9936533004605224, "learning_rate": 6.549026171695799e-05, "loss": 0.294, "step": 4897 }, { "epoch": 0.4178467838252858, "grad_norm": 1.6270256621641148, "learning_rate": 6.547712556548307e-05, "loss": 0.2347, "step": 4898 }, { "epoch": 0.41793209349940286, "grad_norm": 1.4227355197564429, "learning_rate": 6.546398823241188e-05, "loss": 0.3289, "step": 4899 }, { "epoch": 0.4180174031735199, "grad_norm": 1.3577796958411017, "learning_rate": 6.545084971874738e-05, "loss": 0.2786, "step": 4900 }, { "epoch": 0.41810271284763695, "grad_norm": 1.5878272612857258, "learning_rate": 6.543771002549259e-05, "loss": 0.2828, "step": 4901 }, { "epoch": 0.41818802252175397, "grad_norm": 1.7927204699847095, "learning_rate": 6.54245691536507e-05, "loss": 0.2872, "step": 4902 }, { "epoch": 0.41827333219587104, "grad_norm": 1.66460750054619, "learning_rate": 6.541142710422489e-05, "loss": 0.2994, "step": 4903 }, { "epoch": 0.41835864186998806, "grad_norm": 1.479481632207603, "learning_rate": 6.539828387821854e-05, "loss": 0.229, "step": 4904 }, { "epoch": 0.4184439515441051, "grad_norm": 1.4846065139923608, "learning_rate": 6.538513947663503e-05, "loss": 0.2517, "step": 4905 }, { "epoch": 0.41852926121822215, "grad_norm": 1.402261290609683, "learning_rate": 6.537199390047786e-05, "loss": 0.2773, "step": 4906 }, { "epoch": 0.41861457089233917, "grad_norm": 1.3888321366901035, "learning_rate": 6.535884715075067e-05, "loss": 0.3197, "step": 4907 }, { "epoch": 0.41869988056645624, "grad_norm": 1.3495215733808272, "learning_rate": 6.53456992284571e-05, "loss": 0.2077, "step": 4908 }, { "epoch": 0.41878519024057326, "grad_norm": 1.5438421553931745, "learning_rate": 6.533255013460095e-05, "loss": 0.3054, "step": 4909 }, { "epoch": 0.41887049991469033, "grad_norm": 1.4245710697782408, "learning_rate": 6.531939987018608e-05, "loss": 0.216, "step": 4910 }, { "epoch": 0.41895580958880735, "grad_norm": 1.4661093165817223, "learning_rate": 6.530624843621644e-05, "loss": 0.2576, "step": 4911 }, { "epoch": 0.4190411192629244, "grad_norm": 1.3902580548611994, "learning_rate": 6.529309583369605e-05, "loss": 0.276, "step": 4912 }, { "epoch": 0.41912642893704144, "grad_norm": 1.4022539795431253, "learning_rate": 6.527994206362907e-05, "loss": 0.2757, "step": 4913 }, { "epoch": 0.4192117386111585, "grad_norm": 1.5297317763438636, "learning_rate": 6.526678712701973e-05, "loss": 0.3004, "step": 4914 }, { "epoch": 0.41929704828527553, "grad_norm": 1.5414544896012865, "learning_rate": 6.52536310248723e-05, "loss": 0.2944, "step": 4915 }, { "epoch": 0.4193823579593926, "grad_norm": 1.6844490162399595, "learning_rate": 6.524047375819118e-05, "loss": 0.2778, "step": 4916 }, { "epoch": 0.4194676676335096, "grad_norm": 1.561239726122121, "learning_rate": 6.522731532798091e-05, "loss": 0.3231, "step": 4917 }, { "epoch": 0.4195529773076267, "grad_norm": 1.249434009206129, "learning_rate": 6.521415573524603e-05, "loss": 0.258, "step": 4918 }, { "epoch": 0.4196382869817437, "grad_norm": 1.4933090389069346, "learning_rate": 6.520099498099118e-05, "loss": 0.3221, "step": 4919 }, { "epoch": 0.4197235966558608, "grad_norm": 1.562441786635312, "learning_rate": 6.518783306622116e-05, "loss": 0.3249, "step": 4920 }, { "epoch": 0.4198089063299778, "grad_norm": 1.5202551835832265, "learning_rate": 6.517466999194079e-05, "loss": 0.2882, "step": 4921 }, { "epoch": 0.4198942160040949, "grad_norm": 1.5399782970682685, "learning_rate": 6.516150575915502e-05, "loss": 0.2242, "step": 4922 }, { "epoch": 0.4199795256782119, "grad_norm": 1.8064962463424374, "learning_rate": 6.514834036886884e-05, "loss": 0.3279, "step": 4923 }, { "epoch": 0.42006483535232897, "grad_norm": 1.5646207531527534, "learning_rate": 6.513517382208737e-05, "loss": 0.2641, "step": 4924 }, { "epoch": 0.420150145026446, "grad_norm": 1.5347325174591449, "learning_rate": 6.51220061198158e-05, "loss": 0.2491, "step": 4925 }, { "epoch": 0.42023545470056306, "grad_norm": 1.5170877221531271, "learning_rate": 6.510883726305943e-05, "loss": 0.2768, "step": 4926 }, { "epoch": 0.4203207643746801, "grad_norm": 1.5500417334568006, "learning_rate": 6.509566725282362e-05, "loss": 0.3113, "step": 4927 }, { "epoch": 0.42040607404879715, "grad_norm": 1.3096807719162282, "learning_rate": 6.508249609011384e-05, "loss": 0.2495, "step": 4928 }, { "epoch": 0.42049138372291417, "grad_norm": 1.642535595791203, "learning_rate": 6.506932377593562e-05, "loss": 0.2764, "step": 4929 }, { "epoch": 0.42057669339703124, "grad_norm": 1.659510372408742, "learning_rate": 6.505615031129462e-05, "loss": 0.243, "step": 4930 }, { "epoch": 0.42066200307114826, "grad_norm": 1.5249779371322834, "learning_rate": 6.504297569719654e-05, "loss": 0.2424, "step": 4931 }, { "epoch": 0.42074731274526533, "grad_norm": 1.486710808673122, "learning_rate": 6.502979993464723e-05, "loss": 0.3179, "step": 4932 }, { "epoch": 0.42083262241938235, "grad_norm": 1.903080165190847, "learning_rate": 6.501662302465254e-05, "loss": 0.2834, "step": 4933 }, { "epoch": 0.4209179320934994, "grad_norm": 1.5632935606906888, "learning_rate": 6.50034449682185e-05, "loss": 0.2689, "step": 4934 }, { "epoch": 0.42100324176761644, "grad_norm": 1.6050322373144421, "learning_rate": 6.499026576635115e-05, "loss": 0.3088, "step": 4935 }, { "epoch": 0.4210885514417335, "grad_norm": 1.740569111390786, "learning_rate": 6.497708542005666e-05, "loss": 0.2892, "step": 4936 }, { "epoch": 0.42117386111585053, "grad_norm": 1.3154297781238369, "learning_rate": 6.496390393034129e-05, "loss": 0.2344, "step": 4937 }, { "epoch": 0.4212591707899676, "grad_norm": 1.532265229281269, "learning_rate": 6.495072129821136e-05, "loss": 0.3421, "step": 4938 }, { "epoch": 0.4213444804640846, "grad_norm": 1.8399028404125592, "learning_rate": 6.493753752467334e-05, "loss": 0.3379, "step": 4939 }, { "epoch": 0.4214297901382017, "grad_norm": 1.7988961172766964, "learning_rate": 6.492435261073368e-05, "loss": 0.3361, "step": 4940 }, { "epoch": 0.4215150998123187, "grad_norm": 1.8284413642692512, "learning_rate": 6.491116655739902e-05, "loss": 0.2747, "step": 4941 }, { "epoch": 0.42160040948643573, "grad_norm": 1.5882839640056128, "learning_rate": 6.489797936567603e-05, "loss": 0.2795, "step": 4942 }, { "epoch": 0.4216857191605528, "grad_norm": 1.8040326404198443, "learning_rate": 6.488479103657149e-05, "loss": 0.3335, "step": 4943 }, { "epoch": 0.4217710288346698, "grad_norm": 1.3903690059648923, "learning_rate": 6.487160157109224e-05, "loss": 0.309, "step": 4944 }, { "epoch": 0.4218563385087869, "grad_norm": 1.9079833358020941, "learning_rate": 6.485841097024524e-05, "loss": 0.3351, "step": 4945 }, { "epoch": 0.4219416481829039, "grad_norm": 1.5300247584315667, "learning_rate": 6.484521923503752e-05, "loss": 0.3226, "step": 4946 }, { "epoch": 0.422026957857021, "grad_norm": 1.3514725318908878, "learning_rate": 6.48320263664762e-05, "loss": 0.2983, "step": 4947 }, { "epoch": 0.422112267531138, "grad_norm": 1.5308135052793193, "learning_rate": 6.481883236556848e-05, "loss": 0.3232, "step": 4948 }, { "epoch": 0.4221975772052551, "grad_norm": 1.4037938178570202, "learning_rate": 6.480563723332167e-05, "loss": 0.324, "step": 4949 }, { "epoch": 0.4222828868793721, "grad_norm": 1.5944347032758206, "learning_rate": 6.479244097074313e-05, "loss": 0.3028, "step": 4950 }, { "epoch": 0.4223681965534892, "grad_norm": 1.4885448312514427, "learning_rate": 6.477924357884031e-05, "loss": 0.3116, "step": 4951 }, { "epoch": 0.4224535062276062, "grad_norm": 1.5937077385778133, "learning_rate": 6.47660450586208e-05, "loss": 0.3033, "step": 4952 }, { "epoch": 0.42253881590172326, "grad_norm": 1.649411243822525, "learning_rate": 6.475284541109221e-05, "loss": 0.2855, "step": 4953 }, { "epoch": 0.4226241255758403, "grad_norm": 1.52705915292259, "learning_rate": 6.473964463726228e-05, "loss": 0.3377, "step": 4954 }, { "epoch": 0.42270943524995735, "grad_norm": 1.3779801238499283, "learning_rate": 6.47264427381388e-05, "loss": 0.2899, "step": 4955 }, { "epoch": 0.4227947449240744, "grad_norm": 1.5592318115543584, "learning_rate": 6.471323971472966e-05, "loss": 0.2282, "step": 4956 }, { "epoch": 0.42288005459819145, "grad_norm": 1.5602426626298498, "learning_rate": 6.470003556804286e-05, "loss": 0.2855, "step": 4957 }, { "epoch": 0.42296536427230846, "grad_norm": 1.1980278737514418, "learning_rate": 6.468683029908647e-05, "loss": 0.2804, "step": 4958 }, { "epoch": 0.42305067394642554, "grad_norm": 1.5849367940195556, "learning_rate": 6.467362390886862e-05, "loss": 0.3368, "step": 4959 }, { "epoch": 0.42313598362054256, "grad_norm": 1.4678125839444536, "learning_rate": 6.466041639839757e-05, "loss": 0.2736, "step": 4960 }, { "epoch": 0.42322129329465963, "grad_norm": 1.4901521392570372, "learning_rate": 6.464720776868163e-05, "loss": 0.2968, "step": 4961 }, { "epoch": 0.42330660296877665, "grad_norm": 1.3432612306461535, "learning_rate": 6.46339980207292e-05, "loss": 0.2845, "step": 4962 }, { "epoch": 0.4233919126428937, "grad_norm": 1.3043811918220463, "learning_rate": 6.46207871555488e-05, "loss": 0.2664, "step": 4963 }, { "epoch": 0.42347722231701074, "grad_norm": 1.4137524006601419, "learning_rate": 6.4607575174149e-05, "loss": 0.2509, "step": 4964 }, { "epoch": 0.4235625319911278, "grad_norm": 1.6137478871704738, "learning_rate": 6.459436207753846e-05, "loss": 0.3325, "step": 4965 }, { "epoch": 0.42364784166524483, "grad_norm": 1.351761114267749, "learning_rate": 6.458114786672593e-05, "loss": 0.2688, "step": 4966 }, { "epoch": 0.4237331513393619, "grad_norm": 1.6542707889306185, "learning_rate": 6.456793254272023e-05, "loss": 0.3113, "step": 4967 }, { "epoch": 0.4238184610134789, "grad_norm": 1.5603414502963964, "learning_rate": 6.455471610653031e-05, "loss": 0.2442, "step": 4968 }, { "epoch": 0.423903770687596, "grad_norm": 1.839023804237256, "learning_rate": 6.454149855916513e-05, "loss": 0.27, "step": 4969 }, { "epoch": 0.423989080361713, "grad_norm": 1.5564121667408446, "learning_rate": 6.452827990163384e-05, "loss": 0.2392, "step": 4970 }, { "epoch": 0.4240743900358301, "grad_norm": 1.4868400584376922, "learning_rate": 6.451506013494558e-05, "loss": 0.2621, "step": 4971 }, { "epoch": 0.4241596997099471, "grad_norm": 1.500700469178763, "learning_rate": 6.45018392601096e-05, "loss": 0.2775, "step": 4972 }, { "epoch": 0.4242450093840642, "grad_norm": 1.6701020439174272, "learning_rate": 6.448861727813526e-05, "loss": 0.2647, "step": 4973 }, { "epoch": 0.4243303190581812, "grad_norm": 1.4226182944080512, "learning_rate": 6.447539419003198e-05, "loss": 0.2315, "step": 4974 }, { "epoch": 0.42441562873229827, "grad_norm": 1.4236864561079365, "learning_rate": 6.446216999680928e-05, "loss": 0.2487, "step": 4975 }, { "epoch": 0.4245009384064153, "grad_norm": 1.4202436373795913, "learning_rate": 6.444894469947677e-05, "loss": 0.2817, "step": 4976 }, { "epoch": 0.42458624808053236, "grad_norm": 1.4675060442356496, "learning_rate": 6.443571829904408e-05, "loss": 0.2724, "step": 4977 }, { "epoch": 0.4246715577546494, "grad_norm": 1.592299156279491, "learning_rate": 6.442249079652103e-05, "loss": 0.3438, "step": 4978 }, { "epoch": 0.42475686742876645, "grad_norm": 1.196091491577488, "learning_rate": 6.440926219291744e-05, "loss": 0.2619, "step": 4979 }, { "epoch": 0.42484217710288347, "grad_norm": 1.2954769642120532, "learning_rate": 6.439603248924325e-05, "loss": 0.2652, "step": 4980 }, { "epoch": 0.4249274867770005, "grad_norm": 1.403539631163416, "learning_rate": 6.438280168650849e-05, "loss": 0.2667, "step": 4981 }, { "epoch": 0.42501279645111756, "grad_norm": 1.8838834763632937, "learning_rate": 6.436956978572324e-05, "loss": 0.2729, "step": 4982 }, { "epoch": 0.4250981061252346, "grad_norm": 1.5993999488497601, "learning_rate": 6.435633678789769e-05, "loss": 0.2804, "step": 4983 }, { "epoch": 0.42518341579935165, "grad_norm": 1.363487162519452, "learning_rate": 6.434310269404214e-05, "loss": 0.2936, "step": 4984 }, { "epoch": 0.42526872547346867, "grad_norm": 1.62078486797503, "learning_rate": 6.432986750516692e-05, "loss": 0.2939, "step": 4985 }, { "epoch": 0.42535403514758574, "grad_norm": 1.476602705150794, "learning_rate": 6.431663122228245e-05, "loss": 0.2471, "step": 4986 }, { "epoch": 0.42543934482170276, "grad_norm": 1.8929551952599397, "learning_rate": 6.430339384639927e-05, "loss": 0.2936, "step": 4987 }, { "epoch": 0.42552465449581983, "grad_norm": 1.8429736426378662, "learning_rate": 6.429015537852797e-05, "loss": 0.2385, "step": 4988 }, { "epoch": 0.42560996416993685, "grad_norm": 1.550562230553712, "learning_rate": 6.427691581967925e-05, "loss": 0.3116, "step": 4989 }, { "epoch": 0.4256952738440539, "grad_norm": 1.7543326966037327, "learning_rate": 6.426367517086387e-05, "loss": 0.3164, "step": 4990 }, { "epoch": 0.42578058351817094, "grad_norm": 1.4051427084508785, "learning_rate": 6.42504334330927e-05, "loss": 0.2847, "step": 4991 }, { "epoch": 0.425865893192288, "grad_norm": 1.750581848963728, "learning_rate": 6.423719060737665e-05, "loss": 0.2097, "step": 4992 }, { "epoch": 0.42595120286640503, "grad_norm": 1.665060397819686, "learning_rate": 6.422394669472676e-05, "loss": 0.3133, "step": 4993 }, { "epoch": 0.4260365125405221, "grad_norm": 1.388764154343188, "learning_rate": 6.421070169615411e-05, "loss": 0.2923, "step": 4994 }, { "epoch": 0.4261218222146391, "grad_norm": 1.7784467355786293, "learning_rate": 6.419745561266993e-05, "loss": 0.3088, "step": 4995 }, { "epoch": 0.4262071318887562, "grad_norm": 1.701641871875404, "learning_rate": 6.418420844528545e-05, "loss": 0.3192, "step": 4996 }, { "epoch": 0.4262924415628732, "grad_norm": 1.455558625833575, "learning_rate": 6.417096019501203e-05, "loss": 0.2874, "step": 4997 }, { "epoch": 0.4263777512369903, "grad_norm": 1.442698739177142, "learning_rate": 6.415771086286109e-05, "loss": 0.2427, "step": 4998 }, { "epoch": 0.4264630609111073, "grad_norm": 1.6258838890632172, "learning_rate": 6.414446044984417e-05, "loss": 0.2999, "step": 4999 }, { "epoch": 0.4265483705852244, "grad_norm": 1.510672747070879, "learning_rate": 6.413120895697287e-05, "loss": 0.2629, "step": 5000 }, { "epoch": 0.4266336802593414, "grad_norm": 1.6736754233146347, "learning_rate": 6.411795638525883e-05, "loss": 0.3318, "step": 5001 }, { "epoch": 0.42671898993345847, "grad_norm": 1.3971222814373248, "learning_rate": 6.410470273571387e-05, "loss": 0.2411, "step": 5002 }, { "epoch": 0.4268042996075755, "grad_norm": 1.5315214422545076, "learning_rate": 6.409144800934979e-05, "loss": 0.3034, "step": 5003 }, { "epoch": 0.42688960928169256, "grad_norm": 1.5365076692925421, "learning_rate": 6.407819220717855e-05, "loss": 0.2953, "step": 5004 }, { "epoch": 0.4269749189558096, "grad_norm": 1.5956503813861982, "learning_rate": 6.406493533021213e-05, "loss": 0.2752, "step": 5005 }, { "epoch": 0.42706022862992665, "grad_norm": 1.5986825495994885, "learning_rate": 6.405167737946265e-05, "loss": 0.2617, "step": 5006 }, { "epoch": 0.42714553830404367, "grad_norm": 1.4907225762825478, "learning_rate": 6.403841835594228e-05, "loss": 0.282, "step": 5007 }, { "epoch": 0.42723084797816074, "grad_norm": 1.3242027709356947, "learning_rate": 6.402515826066327e-05, "loss": 0.2783, "step": 5008 }, { "epoch": 0.42731615765227776, "grad_norm": 1.4581983049641574, "learning_rate": 6.401189709463794e-05, "loss": 0.3307, "step": 5009 }, { "epoch": 0.42740146732639483, "grad_norm": 1.4673970260754279, "learning_rate": 6.399863485887873e-05, "loss": 0.3069, "step": 5010 }, { "epoch": 0.42748677700051185, "grad_norm": 1.2355954860467726, "learning_rate": 6.398537155439812e-05, "loss": 0.3739, "step": 5011 }, { "epoch": 0.4275720866746289, "grad_norm": 1.3512115904989064, "learning_rate": 6.397210718220874e-05, "loss": 0.2415, "step": 5012 }, { "epoch": 0.42765739634874594, "grad_norm": 1.4644537241179756, "learning_rate": 6.395884174332322e-05, "loss": 0.1983, "step": 5013 }, { "epoch": 0.427742706022863, "grad_norm": 1.5087787911766077, "learning_rate": 6.394557523875428e-05, "loss": 0.2475, "step": 5014 }, { "epoch": 0.42782801569698004, "grad_norm": 1.3224143729225564, "learning_rate": 6.393230766951481e-05, "loss": 0.2179, "step": 5015 }, { "epoch": 0.4279133253710971, "grad_norm": 1.2498365772230822, "learning_rate": 6.391903903661768e-05, "loss": 0.2203, "step": 5016 }, { "epoch": 0.4279986350452141, "grad_norm": 1.4537432596143887, "learning_rate": 6.390576934107589e-05, "loss": 0.2929, "step": 5017 }, { "epoch": 0.4280839447193312, "grad_norm": 1.4774945259315564, "learning_rate": 6.389249858390251e-05, "loss": 0.2558, "step": 5018 }, { "epoch": 0.4281692543934482, "grad_norm": 1.406540734648776, "learning_rate": 6.387922676611065e-05, "loss": 0.2555, "step": 5019 }, { "epoch": 0.42825456406756524, "grad_norm": 1.9842722107705277, "learning_rate": 6.386595388871361e-05, "loss": 0.3068, "step": 5020 }, { "epoch": 0.4283398737416823, "grad_norm": 1.4244747498866293, "learning_rate": 6.385267995272468e-05, "loss": 0.2441, "step": 5021 }, { "epoch": 0.4284251834157993, "grad_norm": 1.707643796191413, "learning_rate": 6.383940495915723e-05, "loss": 0.2738, "step": 5022 }, { "epoch": 0.4285104930899164, "grad_norm": 1.5359239672727285, "learning_rate": 6.382612890902478e-05, "loss": 0.2605, "step": 5023 }, { "epoch": 0.4285958027640334, "grad_norm": 1.567758409412175, "learning_rate": 6.381285180334084e-05, "loss": 0.2985, "step": 5024 }, { "epoch": 0.4286811124381505, "grad_norm": 1.9627545209926593, "learning_rate": 6.379957364311905e-05, "loss": 0.2608, "step": 5025 }, { "epoch": 0.4287664221122675, "grad_norm": 2.0602531910822868, "learning_rate": 6.378629442937318e-05, "loss": 0.279, "step": 5026 }, { "epoch": 0.4288517317863846, "grad_norm": 1.2433014679132959, "learning_rate": 6.377301416311696e-05, "loss": 0.2331, "step": 5027 }, { "epoch": 0.4289370414605016, "grad_norm": 1.5937912131571954, "learning_rate": 6.375973284536432e-05, "loss": 0.2771, "step": 5028 }, { "epoch": 0.4290223511346187, "grad_norm": 1.649112219828119, "learning_rate": 6.374645047712919e-05, "loss": 0.2727, "step": 5029 }, { "epoch": 0.4291076608087357, "grad_norm": 1.4039206816664418, "learning_rate": 6.37331670594256e-05, "loss": 0.2353, "step": 5030 }, { "epoch": 0.42919297048285276, "grad_norm": 1.46114572275325, "learning_rate": 6.371988259326771e-05, "loss": 0.3018, "step": 5031 }, { "epoch": 0.4292782801569698, "grad_norm": 1.5280234929171135, "learning_rate": 6.370659707966967e-05, "loss": 0.3462, "step": 5032 }, { "epoch": 0.42936358983108686, "grad_norm": 1.4756458000193908, "learning_rate": 6.369331051964579e-05, "loss": 0.2652, "step": 5033 }, { "epoch": 0.4294488995052039, "grad_norm": 1.4364643512661552, "learning_rate": 6.368002291421042e-05, "loss": 0.2726, "step": 5034 }, { "epoch": 0.42953420917932095, "grad_norm": 1.4852242700530909, "learning_rate": 6.366673426437797e-05, "loss": 0.2802, "step": 5035 }, { "epoch": 0.42961951885343796, "grad_norm": 1.5808239509868491, "learning_rate": 6.365344457116301e-05, "loss": 0.2715, "step": 5036 }, { "epoch": 0.42970482852755504, "grad_norm": 1.4000692810218613, "learning_rate": 6.36401538355801e-05, "loss": 0.2791, "step": 5037 }, { "epoch": 0.42979013820167206, "grad_norm": 1.4924541454843063, "learning_rate": 6.362686205864394e-05, "loss": 0.343, "step": 5038 }, { "epoch": 0.42987544787578913, "grad_norm": 1.6734033889214617, "learning_rate": 6.361356924136928e-05, "loss": 0.2703, "step": 5039 }, { "epoch": 0.42996075754990615, "grad_norm": 1.3009234284834132, "learning_rate": 6.360027538477094e-05, "loss": 0.247, "step": 5040 }, { "epoch": 0.4300460672240232, "grad_norm": 1.4939258296382887, "learning_rate": 6.358698048986384e-05, "loss": 0.287, "step": 5041 }, { "epoch": 0.43013137689814024, "grad_norm": 1.497857630189252, "learning_rate": 6.357368455766299e-05, "loss": 0.3244, "step": 5042 }, { "epoch": 0.4302166865722573, "grad_norm": 1.4444708770795052, "learning_rate": 6.356038758918344e-05, "loss": 0.2579, "step": 5043 }, { "epoch": 0.43030199624637433, "grad_norm": 1.519471939616116, "learning_rate": 6.354708958544038e-05, "loss": 0.2819, "step": 5044 }, { "epoch": 0.4303873059204914, "grad_norm": 1.634957980161359, "learning_rate": 6.353379054744901e-05, "loss": 0.2955, "step": 5045 }, { "epoch": 0.4304726155946084, "grad_norm": 1.5117636977026703, "learning_rate": 6.352049047622463e-05, "loss": 0.3025, "step": 5046 }, { "epoch": 0.4305579252687255, "grad_norm": 1.636549619981285, "learning_rate": 6.350718937278269e-05, "loss": 0.3128, "step": 5047 }, { "epoch": 0.4306432349428425, "grad_norm": 1.2979218268049522, "learning_rate": 6.349388723813859e-05, "loss": 0.2777, "step": 5048 }, { "epoch": 0.4307285446169596, "grad_norm": 1.2476425829421292, "learning_rate": 6.348058407330792e-05, "loss": 0.246, "step": 5049 }, { "epoch": 0.4308138542910766, "grad_norm": 1.4239605709896446, "learning_rate": 6.346727987930628e-05, "loss": 0.3025, "step": 5050 }, { "epoch": 0.4308991639651937, "grad_norm": 1.70583820334247, "learning_rate": 6.345397465714939e-05, "loss": 0.3105, "step": 5051 }, { "epoch": 0.4309844736393107, "grad_norm": 1.3631100260153917, "learning_rate": 6.344066840785302e-05, "loss": 0.247, "step": 5052 }, { "epoch": 0.43106978331342777, "grad_norm": 1.5509798705850315, "learning_rate": 6.342736113243305e-05, "loss": 0.3193, "step": 5053 }, { "epoch": 0.4311550929875448, "grad_norm": 1.5376578955481142, "learning_rate": 6.341405283190541e-05, "loss": 0.3019, "step": 5054 }, { "epoch": 0.43124040266166186, "grad_norm": 1.3880101168316028, "learning_rate": 6.340074350728612e-05, "loss": 0.2905, "step": 5055 }, { "epoch": 0.4313257123357789, "grad_norm": 1.477145286878499, "learning_rate": 6.338743315959127e-05, "loss": 0.2689, "step": 5056 }, { "epoch": 0.4314110220098959, "grad_norm": 1.3504475310950526, "learning_rate": 6.337412178983704e-05, "loss": 0.24, "step": 5057 }, { "epoch": 0.43149633168401297, "grad_norm": 1.241861936503385, "learning_rate": 6.336080939903968e-05, "loss": 0.2697, "step": 5058 }, { "epoch": 0.43158164135813, "grad_norm": 1.548566721615955, "learning_rate": 6.334749598821555e-05, "loss": 0.261, "step": 5059 }, { "epoch": 0.43166695103224706, "grad_norm": 1.3921929852755426, "learning_rate": 6.3334181558381e-05, "loss": 0.2484, "step": 5060 }, { "epoch": 0.4317522607063641, "grad_norm": 1.2917589134173089, "learning_rate": 6.332086611055255e-05, "loss": 0.2546, "step": 5061 }, { "epoch": 0.43183757038048115, "grad_norm": 1.6515259939903106, "learning_rate": 6.330754964574676e-05, "loss": 0.3036, "step": 5062 }, { "epoch": 0.43192288005459817, "grad_norm": 1.6574933405493533, "learning_rate": 6.329423216498027e-05, "loss": 0.2998, "step": 5063 }, { "epoch": 0.43200818972871524, "grad_norm": 1.4420528460592685, "learning_rate": 6.328091366926979e-05, "loss": 0.2514, "step": 5064 }, { "epoch": 0.43209349940283226, "grad_norm": 1.7987987006449317, "learning_rate": 6.326759415963216e-05, "loss": 0.277, "step": 5065 }, { "epoch": 0.43217880907694933, "grad_norm": 1.9954484168879056, "learning_rate": 6.325427363708418e-05, "loss": 0.3327, "step": 5066 }, { "epoch": 0.43226411875106635, "grad_norm": 1.4983881396297263, "learning_rate": 6.324095210264286e-05, "loss": 0.2754, "step": 5067 }, { "epoch": 0.4323494284251834, "grad_norm": 1.3265107161588363, "learning_rate": 6.322762955732521e-05, "loss": 0.2447, "step": 5068 }, { "epoch": 0.43243473809930044, "grad_norm": 1.3503106995958802, "learning_rate": 6.321430600214832e-05, "loss": 0.3108, "step": 5069 }, { "epoch": 0.4325200477734175, "grad_norm": 1.7680134779817702, "learning_rate": 6.320098143812942e-05, "loss": 0.3397, "step": 5070 }, { "epoch": 0.43260535744753453, "grad_norm": 1.7289890826240784, "learning_rate": 6.318765586628572e-05, "loss": 0.2652, "step": 5071 }, { "epoch": 0.4326906671216516, "grad_norm": 1.2743224830900588, "learning_rate": 6.317432928763456e-05, "loss": 0.2929, "step": 5072 }, { "epoch": 0.4327759767957686, "grad_norm": 1.4830763104818316, "learning_rate": 6.316100170319337e-05, "loss": 0.2783, "step": 5073 }, { "epoch": 0.4328612864698857, "grad_norm": 1.4062855610059903, "learning_rate": 6.314767311397966e-05, "loss": 0.2222, "step": 5074 }, { "epoch": 0.4329465961440027, "grad_norm": 1.970494662885739, "learning_rate": 6.313434352101095e-05, "loss": 0.2926, "step": 5075 }, { "epoch": 0.4330319058181198, "grad_norm": 1.276004474123821, "learning_rate": 6.312101292530492e-05, "loss": 0.2947, "step": 5076 }, { "epoch": 0.4331172154922368, "grad_norm": 1.644434257782209, "learning_rate": 6.310768132787928e-05, "loss": 0.2534, "step": 5077 }, { "epoch": 0.4332025251663539, "grad_norm": 1.8482645658468353, "learning_rate": 6.309434872975181e-05, "loss": 0.2576, "step": 5078 }, { "epoch": 0.4332878348404709, "grad_norm": 1.6113810952467613, "learning_rate": 6.308101513194041e-05, "loss": 0.2466, "step": 5079 }, { "epoch": 0.43337314451458797, "grad_norm": 1.6173753122561878, "learning_rate": 6.306768053546302e-05, "loss": 0.2702, "step": 5080 }, { "epoch": 0.433458454188705, "grad_norm": 1.2983841904623148, "learning_rate": 6.305434494133766e-05, "loss": 0.2853, "step": 5081 }, { "epoch": 0.43354376386282206, "grad_norm": 1.4637733719521049, "learning_rate": 6.304100835058244e-05, "loss": 0.2471, "step": 5082 }, { "epoch": 0.4336290735369391, "grad_norm": 1.6074859495127352, "learning_rate": 6.302767076421552e-05, "loss": 0.3184, "step": 5083 }, { "epoch": 0.43371438321105615, "grad_norm": 1.5560416453655141, "learning_rate": 6.301433218325518e-05, "loss": 0.2535, "step": 5084 }, { "epoch": 0.43379969288517317, "grad_norm": 1.6192441384941332, "learning_rate": 6.300099260871972e-05, "loss": 0.3169, "step": 5085 }, { "epoch": 0.43388500255929024, "grad_norm": 1.521379068082254, "learning_rate": 6.298765204162757e-05, "loss": 0.3261, "step": 5086 }, { "epoch": 0.43397031223340726, "grad_norm": 1.9326284137417122, "learning_rate": 6.29743104829972e-05, "loss": 0.266, "step": 5087 }, { "epoch": 0.43405562190752434, "grad_norm": 1.454326993890119, "learning_rate": 6.296096793384716e-05, "loss": 0.2344, "step": 5088 }, { "epoch": 0.43414093158164135, "grad_norm": 1.6085421434881086, "learning_rate": 6.29476243951961e-05, "loss": 0.3151, "step": 5089 }, { "epoch": 0.4342262412557584, "grad_norm": 1.4742993970037481, "learning_rate": 6.293427986806274e-05, "loss": 0.2391, "step": 5090 }, { "epoch": 0.43431155092987545, "grad_norm": 1.4140353964215566, "learning_rate": 6.292093435346583e-05, "loss": 0.2673, "step": 5091 }, { "epoch": 0.4343968606039925, "grad_norm": 1.461170687909295, "learning_rate": 6.290758785242425e-05, "loss": 0.2949, "step": 5092 }, { "epoch": 0.43448217027810954, "grad_norm": 1.435889627449407, "learning_rate": 6.289424036595693e-05, "loss": 0.2601, "step": 5093 }, { "epoch": 0.4345674799522266, "grad_norm": 1.3765646961554026, "learning_rate": 6.288089189508286e-05, "loss": 0.2792, "step": 5094 }, { "epoch": 0.4346527896263436, "grad_norm": 1.4944249499694886, "learning_rate": 6.286754244082115e-05, "loss": 0.2523, "step": 5095 }, { "epoch": 0.43473809930046065, "grad_norm": 1.639685225400661, "learning_rate": 6.285419200419095e-05, "loss": 0.2736, "step": 5096 }, { "epoch": 0.4348234089745777, "grad_norm": 1.2794456337204412, "learning_rate": 6.28408405862115e-05, "loss": 0.215, "step": 5097 }, { "epoch": 0.43490871864869474, "grad_norm": 1.3889299842264815, "learning_rate": 6.282748818790212e-05, "loss": 0.2298, "step": 5098 }, { "epoch": 0.4349940283228118, "grad_norm": 1.500298152537951, "learning_rate": 6.281413481028217e-05, "loss": 0.2499, "step": 5099 }, { "epoch": 0.4350793379969288, "grad_norm": 1.6133842527745306, "learning_rate": 6.280078045437113e-05, "loss": 0.3081, "step": 5100 }, { "epoch": 0.4351646476710459, "grad_norm": 1.6147952607233418, "learning_rate": 6.278742512118852e-05, "loss": 0.3168, "step": 5101 }, { "epoch": 0.4352499573451629, "grad_norm": 1.7215792864141926, "learning_rate": 6.277406881175395e-05, "loss": 0.2917, "step": 5102 }, { "epoch": 0.43533526701928, "grad_norm": 1.6930448070529511, "learning_rate": 6.276071152708712e-05, "loss": 0.3192, "step": 5103 }, { "epoch": 0.435420576693397, "grad_norm": 1.5554088663388788, "learning_rate": 6.274735326820775e-05, "loss": 0.2883, "step": 5104 }, { "epoch": 0.4355058863675141, "grad_norm": 1.820750826341912, "learning_rate": 6.273399403613572e-05, "loss": 0.3228, "step": 5105 }, { "epoch": 0.4355911960416311, "grad_norm": 1.3727531715874688, "learning_rate": 6.272063383189091e-05, "loss": 0.2265, "step": 5106 }, { "epoch": 0.4356765057157482, "grad_norm": 1.7821076319478921, "learning_rate": 6.27072726564933e-05, "loss": 0.3114, "step": 5107 }, { "epoch": 0.4357618153898652, "grad_norm": 1.5560485403045548, "learning_rate": 6.269391051096295e-05, "loss": 0.2492, "step": 5108 }, { "epoch": 0.43584712506398227, "grad_norm": 2.3669489875714738, "learning_rate": 6.268054739631999e-05, "loss": 0.3562, "step": 5109 }, { "epoch": 0.4359324347380993, "grad_norm": 1.6685066874424086, "learning_rate": 6.266718331358461e-05, "loss": 0.2433, "step": 5110 }, { "epoch": 0.43601774441221636, "grad_norm": 1.5237833266333765, "learning_rate": 6.265381826377711e-05, "loss": 0.2095, "step": 5111 }, { "epoch": 0.4361030540863334, "grad_norm": 1.3610898305998322, "learning_rate": 6.264045224791784e-05, "loss": 0.2883, "step": 5112 }, { "epoch": 0.43618836376045045, "grad_norm": 1.4754608727402108, "learning_rate": 6.26270852670272e-05, "loss": 0.2756, "step": 5113 }, { "epoch": 0.43627367343456747, "grad_norm": 1.6672345942215778, "learning_rate": 6.26137173221257e-05, "loss": 0.2749, "step": 5114 }, { "epoch": 0.43635898310868454, "grad_norm": 1.8929589737699923, "learning_rate": 6.26003484142339e-05, "loss": 0.2851, "step": 5115 }, { "epoch": 0.43644429278280156, "grad_norm": 1.422836230319117, "learning_rate": 6.258697854437247e-05, "loss": 0.2843, "step": 5116 }, { "epoch": 0.43652960245691863, "grad_norm": 1.5511477251173464, "learning_rate": 6.25736077135621e-05, "loss": 0.2994, "step": 5117 }, { "epoch": 0.43661491213103565, "grad_norm": 1.5758007799185605, "learning_rate": 6.25602359228236e-05, "loss": 0.2504, "step": 5118 }, { "epoch": 0.4367002218051527, "grad_norm": 1.3434544615330335, "learning_rate": 6.254686317317785e-05, "loss": 0.2683, "step": 5119 }, { "epoch": 0.43678553147926974, "grad_norm": 1.3835542058220913, "learning_rate": 6.253348946564575e-05, "loss": 0.269, "step": 5120 }, { "epoch": 0.4368708411533868, "grad_norm": 1.400441444145243, "learning_rate": 6.252011480124835e-05, "loss": 0.3492, "step": 5121 }, { "epoch": 0.43695615082750383, "grad_norm": 1.4901112596844583, "learning_rate": 6.250673918100671e-05, "loss": 0.2602, "step": 5122 }, { "epoch": 0.4370414605016209, "grad_norm": 1.6346843156909356, "learning_rate": 6.2493362605942e-05, "loss": 0.3288, "step": 5123 }, { "epoch": 0.4371267701757379, "grad_norm": 1.4179501703059176, "learning_rate": 6.247998507707545e-05, "loss": 0.2642, "step": 5124 }, { "epoch": 0.437212079849855, "grad_norm": 1.7672789262483783, "learning_rate": 6.246660659542833e-05, "loss": 0.2853, "step": 5125 }, { "epoch": 0.437297389523972, "grad_norm": 1.5395747073099206, "learning_rate": 6.245322716202207e-05, "loss": 0.2801, "step": 5126 }, { "epoch": 0.4373826991980891, "grad_norm": 1.4990915090407348, "learning_rate": 6.243984677787808e-05, "loss": 0.2852, "step": 5127 }, { "epoch": 0.4374680088722061, "grad_norm": 1.6057709272631342, "learning_rate": 6.242646544401788e-05, "loss": 0.3041, "step": 5128 }, { "epoch": 0.4375533185463232, "grad_norm": 1.706364900961085, "learning_rate": 6.241308316146311e-05, "loss": 0.2983, "step": 5129 }, { "epoch": 0.4376386282204402, "grad_norm": 1.6541272361277293, "learning_rate": 6.239969993123539e-05, "loss": 0.3353, "step": 5130 }, { "epoch": 0.43772393789455727, "grad_norm": 1.949821669420438, "learning_rate": 6.238631575435647e-05, "loss": 0.319, "step": 5131 }, { "epoch": 0.4378092475686743, "grad_norm": 1.9016036644322158, "learning_rate": 6.237293063184816e-05, "loss": 0.3027, "step": 5132 }, { "epoch": 0.43789455724279136, "grad_norm": 1.7997454728203253, "learning_rate": 6.235954456473235e-05, "loss": 0.2878, "step": 5133 }, { "epoch": 0.4379798669169084, "grad_norm": 1.4868105532776832, "learning_rate": 6.234615755403101e-05, "loss": 0.3155, "step": 5134 }, { "epoch": 0.4380651765910254, "grad_norm": 1.4993764852983147, "learning_rate": 6.233276960076612e-05, "loss": 0.3125, "step": 5135 }, { "epoch": 0.43815048626514247, "grad_norm": 1.3794388647466354, "learning_rate": 6.231938070595981e-05, "loss": 0.2524, "step": 5136 }, { "epoch": 0.4382357959392595, "grad_norm": 2.153658650431996, "learning_rate": 6.230599087063426e-05, "loss": 0.3569, "step": 5137 }, { "epoch": 0.43832110561337656, "grad_norm": 1.2546178396989243, "learning_rate": 6.22926000958117e-05, "loss": 0.2758, "step": 5138 }, { "epoch": 0.4384064152874936, "grad_norm": 1.335148762547462, "learning_rate": 6.227920838251443e-05, "loss": 0.2539, "step": 5139 }, { "epoch": 0.43849172496161065, "grad_norm": 1.5401026894651864, "learning_rate": 6.226581573176487e-05, "loss": 0.3029, "step": 5140 }, { "epoch": 0.43857703463572767, "grad_norm": 1.3128806879086894, "learning_rate": 6.225242214458544e-05, "loss": 0.2847, "step": 5141 }, { "epoch": 0.43866234430984474, "grad_norm": 1.4588859964112695, "learning_rate": 6.223902762199871e-05, "loss": 0.2632, "step": 5142 }, { "epoch": 0.43874765398396176, "grad_norm": 1.5169085543787444, "learning_rate": 6.222563216502724e-05, "loss": 0.3082, "step": 5143 }, { "epoch": 0.43883296365807883, "grad_norm": 1.3512414098814172, "learning_rate": 6.221223577469373e-05, "loss": 0.2355, "step": 5144 }, { "epoch": 0.43891827333219585, "grad_norm": 1.4488942073377913, "learning_rate": 6.219883845202092e-05, "loss": 0.2334, "step": 5145 }, { "epoch": 0.4390035830063129, "grad_norm": 1.4788356822609379, "learning_rate": 6.21854401980316e-05, "loss": 0.298, "step": 5146 }, { "epoch": 0.43908889268042994, "grad_norm": 1.400425440995251, "learning_rate": 6.21720410137487e-05, "loss": 0.2718, "step": 5147 }, { "epoch": 0.439174202354547, "grad_norm": 1.639958418807465, "learning_rate": 6.215864090019515e-05, "loss": 0.2793, "step": 5148 }, { "epoch": 0.43925951202866403, "grad_norm": 1.7826025662867973, "learning_rate": 6.214523985839395e-05, "loss": 0.3227, "step": 5149 }, { "epoch": 0.4393448217027811, "grad_norm": 1.5873346047335588, "learning_rate": 6.213183788936825e-05, "loss": 0.3121, "step": 5150 }, { "epoch": 0.4394301313768981, "grad_norm": 1.4809368269259358, "learning_rate": 6.211843499414119e-05, "loss": 0.2202, "step": 5151 }, { "epoch": 0.4395154410510152, "grad_norm": 1.6675245779076318, "learning_rate": 6.2105031173736e-05, "loss": 0.288, "step": 5152 }, { "epoch": 0.4396007507251322, "grad_norm": 1.3764440582968591, "learning_rate": 6.209162642917603e-05, "loss": 0.2348, "step": 5153 }, { "epoch": 0.4396860603992493, "grad_norm": 1.7225934144754642, "learning_rate": 6.207822076148462e-05, "loss": 0.2331, "step": 5154 }, { "epoch": 0.4397713700733663, "grad_norm": 1.5540717357588882, "learning_rate": 6.206481417168526e-05, "loss": 0.3335, "step": 5155 }, { "epoch": 0.4398566797474834, "grad_norm": 1.5373201528669584, "learning_rate": 6.205140666080143e-05, "loss": 0.2839, "step": 5156 }, { "epoch": 0.4399419894216004, "grad_norm": 1.5838442362965663, "learning_rate": 6.203799822985676e-05, "loss": 0.2013, "step": 5157 }, { "epoch": 0.4400272990957175, "grad_norm": 1.6029133379969167, "learning_rate": 6.202458887987488e-05, "loss": 0.2862, "step": 5158 }, { "epoch": 0.4401126087698345, "grad_norm": 1.5283134484670142, "learning_rate": 6.201117861187955e-05, "loss": 0.2927, "step": 5159 }, { "epoch": 0.44019791844395156, "grad_norm": 1.5045056226225304, "learning_rate": 6.199776742689454e-05, "loss": 0.3805, "step": 5160 }, { "epoch": 0.4402832281180686, "grad_norm": 1.7948939141720086, "learning_rate": 6.198435532594375e-05, "loss": 0.2245, "step": 5161 }, { "epoch": 0.44036853779218565, "grad_norm": 1.3969265751203481, "learning_rate": 6.197094231005112e-05, "loss": 0.2361, "step": 5162 }, { "epoch": 0.4404538474663027, "grad_norm": 1.5890756762173044, "learning_rate": 6.195752838024066e-05, "loss": 0.2551, "step": 5163 }, { "epoch": 0.44053915714041975, "grad_norm": 1.6392128816317217, "learning_rate": 6.194411353753646e-05, "loss": 0.2553, "step": 5164 }, { "epoch": 0.44062446681453676, "grad_norm": 1.4795481856925143, "learning_rate": 6.193069778296265e-05, "loss": 0.2202, "step": 5165 }, { "epoch": 0.44070977648865384, "grad_norm": 2.1887993359628863, "learning_rate": 6.191728111754347e-05, "loss": 0.2988, "step": 5166 }, { "epoch": 0.44079508616277085, "grad_norm": 1.9438953063229973, "learning_rate": 6.19038635423032e-05, "loss": 0.3341, "step": 5167 }, { "epoch": 0.44088039583688793, "grad_norm": 1.5584100218209778, "learning_rate": 6.18904450582662e-05, "loss": 0.3063, "step": 5168 }, { "epoch": 0.44096570551100495, "grad_norm": 1.4458368123623695, "learning_rate": 6.18770256664569e-05, "loss": 0.2736, "step": 5169 }, { "epoch": 0.441051015185122, "grad_norm": 1.240801968191985, "learning_rate": 6.186360536789981e-05, "loss": 0.2749, "step": 5170 }, { "epoch": 0.44113632485923904, "grad_norm": 1.2328721567473298, "learning_rate": 6.185018416361951e-05, "loss": 0.2096, "step": 5171 }, { "epoch": 0.44122163453335606, "grad_norm": 1.588641787849778, "learning_rate": 6.183676205464062e-05, "loss": 0.3315, "step": 5172 }, { "epoch": 0.44130694420747313, "grad_norm": 1.8023450437226136, "learning_rate": 6.182333904198782e-05, "loss": 0.2943, "step": 5173 }, { "epoch": 0.44139225388159015, "grad_norm": 1.306969480086682, "learning_rate": 6.180991512668594e-05, "loss": 0.2512, "step": 5174 }, { "epoch": 0.4414775635557072, "grad_norm": 1.3164907366075025, "learning_rate": 6.17964903097598e-05, "loss": 0.2462, "step": 5175 }, { "epoch": 0.44156287322982424, "grad_norm": 1.5136432866503924, "learning_rate": 6.17830645922343e-05, "loss": 0.2548, "step": 5176 }, { "epoch": 0.4416481829039413, "grad_norm": 2.2202351931832167, "learning_rate": 6.176963797513443e-05, "loss": 0.2466, "step": 5177 }, { "epoch": 0.44173349257805833, "grad_norm": 1.4381161903267352, "learning_rate": 6.175621045948524e-05, "loss": 0.2978, "step": 5178 }, { "epoch": 0.4418188022521754, "grad_norm": 1.8296350616576644, "learning_rate": 6.174278204631187e-05, "loss": 0.3037, "step": 5179 }, { "epoch": 0.4419041119262924, "grad_norm": 1.4221353816468, "learning_rate": 6.172935273663949e-05, "loss": 0.2781, "step": 5180 }, { "epoch": 0.4419894216004095, "grad_norm": 1.4896032514329738, "learning_rate": 6.171592253149334e-05, "loss": 0.2795, "step": 5181 }, { "epoch": 0.4420747312745265, "grad_norm": 1.7798755764068521, "learning_rate": 6.170249143189878e-05, "loss": 0.3044, "step": 5182 }, { "epoch": 0.4421600409486436, "grad_norm": 1.8107979115888055, "learning_rate": 6.168905943888118e-05, "loss": 0.2773, "step": 5183 }, { "epoch": 0.4422453506227606, "grad_norm": 1.4896003704386085, "learning_rate": 6.1675626553466e-05, "loss": 0.317, "step": 5184 }, { "epoch": 0.4423306602968777, "grad_norm": 1.3969656588430133, "learning_rate": 6.16621927766788e-05, "loss": 0.2334, "step": 5185 }, { "epoch": 0.4424159699709947, "grad_norm": 1.7278999223427496, "learning_rate": 6.164875810954514e-05, "loss": 0.2922, "step": 5186 }, { "epoch": 0.44250127964511177, "grad_norm": 1.4718029676819824, "learning_rate": 6.16353225530907e-05, "loss": 0.2702, "step": 5187 }, { "epoch": 0.4425865893192288, "grad_norm": 1.5142939120068197, "learning_rate": 6.162188610834121e-05, "loss": 0.2681, "step": 5188 }, { "epoch": 0.44267189899334586, "grad_norm": 1.6394124227565277, "learning_rate": 6.160844877632248e-05, "loss": 0.3187, "step": 5189 }, { "epoch": 0.4427572086674629, "grad_norm": 1.2255543252583894, "learning_rate": 6.159501055806038e-05, "loss": 0.2619, "step": 5190 }, { "epoch": 0.44284251834157995, "grad_norm": 1.673456745054852, "learning_rate": 6.158157145458082e-05, "loss": 0.2188, "step": 5191 }, { "epoch": 0.44292782801569697, "grad_norm": 1.4724191328035214, "learning_rate": 6.156813146690983e-05, "loss": 0.2091, "step": 5192 }, { "epoch": 0.44301313768981404, "grad_norm": 1.7781437341544537, "learning_rate": 6.155469059607348e-05, "loss": 0.3387, "step": 5193 }, { "epoch": 0.44309844736393106, "grad_norm": 1.8527349309178482, "learning_rate": 6.15412488430979e-05, "loss": 0.2854, "step": 5194 }, { "epoch": 0.44318375703804813, "grad_norm": 1.7062141330847755, "learning_rate": 6.152780620900931e-05, "loss": 0.2648, "step": 5195 }, { "epoch": 0.44326906671216515, "grad_norm": 1.5507985457601707, "learning_rate": 6.151436269483397e-05, "loss": 0.2621, "step": 5196 }, { "epoch": 0.4433543763862822, "grad_norm": 1.2461117829838635, "learning_rate": 6.150091830159823e-05, "loss": 0.2276, "step": 5197 }, { "epoch": 0.44343968606039924, "grad_norm": 1.4133148324328326, "learning_rate": 6.148747303032849e-05, "loss": 0.289, "step": 5198 }, { "epoch": 0.4435249957345163, "grad_norm": 1.2218338513092024, "learning_rate": 6.147402688205122e-05, "loss": 0.2112, "step": 5199 }, { "epoch": 0.44361030540863333, "grad_norm": 1.2490177109671305, "learning_rate": 6.146057985779299e-05, "loss": 0.2842, "step": 5200 }, { "epoch": 0.4436956150827504, "grad_norm": 1.4386268842769159, "learning_rate": 6.144713195858037e-05, "loss": 0.3061, "step": 5201 }, { "epoch": 0.4437809247568674, "grad_norm": 1.4714355266210903, "learning_rate": 6.143368318544006e-05, "loss": 0.2758, "step": 5202 }, { "epoch": 0.4438662344309845, "grad_norm": 1.3903686201379104, "learning_rate": 6.14202335393988e-05, "loss": 0.2147, "step": 5203 }, { "epoch": 0.4439515441051015, "grad_norm": 1.6431117512084286, "learning_rate": 6.140678302148339e-05, "loss": 0.2642, "step": 5204 }, { "epoch": 0.4440368537792186, "grad_norm": 1.2858744646384437, "learning_rate": 6.139333163272072e-05, "loss": 0.2456, "step": 5205 }, { "epoch": 0.4441221634533356, "grad_norm": 1.5328748217870887, "learning_rate": 6.137987937413771e-05, "loss": 0.2651, "step": 5206 }, { "epoch": 0.4442074731274527, "grad_norm": 1.5326018302403768, "learning_rate": 6.13664262467614e-05, "loss": 0.2872, "step": 5207 }, { "epoch": 0.4442927828015697, "grad_norm": 1.5295784058989852, "learning_rate": 6.135297225161886e-05, "loss": 0.2464, "step": 5208 }, { "epoch": 0.44437809247568677, "grad_norm": 1.9544282761141707, "learning_rate": 6.13395173897372e-05, "loss": 0.3742, "step": 5209 }, { "epoch": 0.4444634021498038, "grad_norm": 1.4048587274628073, "learning_rate": 6.132606166214363e-05, "loss": 0.3307, "step": 5210 }, { "epoch": 0.4445487118239208, "grad_norm": 1.6571629725019927, "learning_rate": 6.131260506986545e-05, "loss": 0.264, "step": 5211 }, { "epoch": 0.4446340214980379, "grad_norm": 1.557589941474087, "learning_rate": 6.129914761393001e-05, "loss": 0.2593, "step": 5212 }, { "epoch": 0.4447193311721549, "grad_norm": 1.4912932747303944, "learning_rate": 6.128568929536466e-05, "loss": 0.2584, "step": 5213 }, { "epoch": 0.44480464084627197, "grad_norm": 2.059533963124588, "learning_rate": 6.127223011519692e-05, "loss": 0.2803, "step": 5214 }, { "epoch": 0.444889950520389, "grad_norm": 1.6256529889892681, "learning_rate": 6.12587700744543e-05, "loss": 0.3207, "step": 5215 }, { "epoch": 0.44497526019450606, "grad_norm": 1.906697517770092, "learning_rate": 6.124530917416443e-05, "loss": 0.2884, "step": 5216 }, { "epoch": 0.4450605698686231, "grad_norm": 1.4580011080200004, "learning_rate": 6.123184741535495e-05, "loss": 0.2764, "step": 5217 }, { "epoch": 0.44514587954274015, "grad_norm": 2.0293637247245213, "learning_rate": 6.121838479905363e-05, "loss": 0.3249, "step": 5218 }, { "epoch": 0.44523118921685717, "grad_norm": 2.0002829828335726, "learning_rate": 6.120492132628823e-05, "loss": 0.2556, "step": 5219 }, { "epoch": 0.44531649889097424, "grad_norm": 1.1030083224483207, "learning_rate": 6.119145699808662e-05, "loss": 0.2096, "step": 5220 }, { "epoch": 0.44540180856509126, "grad_norm": 1.6598835081567125, "learning_rate": 6.117799181547674e-05, "loss": 0.2611, "step": 5221 }, { "epoch": 0.44548711823920834, "grad_norm": 1.7112401407207194, "learning_rate": 6.11645257794866e-05, "loss": 0.2944, "step": 5222 }, { "epoch": 0.44557242791332535, "grad_norm": 1.3723354097078366, "learning_rate": 6.115105889114422e-05, "loss": 0.2543, "step": 5223 }, { "epoch": 0.4456577375874424, "grad_norm": 1.3198701918960858, "learning_rate": 6.113759115147778e-05, "loss": 0.2742, "step": 5224 }, { "epoch": 0.44574304726155944, "grad_norm": 1.761535296647109, "learning_rate": 6.112412256151543e-05, "loss": 0.235, "step": 5225 }, { "epoch": 0.4458283569356765, "grad_norm": 1.9402292778757666, "learning_rate": 6.111065312228542e-05, "loss": 0.3108, "step": 5226 }, { "epoch": 0.44591366660979354, "grad_norm": 1.3374754324109495, "learning_rate": 6.109718283481611e-05, "loss": 0.2744, "step": 5227 }, { "epoch": 0.4459989762839106, "grad_norm": 1.7133194181171574, "learning_rate": 6.108371170013585e-05, "loss": 0.2312, "step": 5228 }, { "epoch": 0.4460842859580276, "grad_norm": 1.432666031434522, "learning_rate": 6.10702397192731e-05, "loss": 0.3297, "step": 5229 }, { "epoch": 0.4461695956321447, "grad_norm": 1.7231450155186696, "learning_rate": 6.105676689325638e-05, "loss": 0.323, "step": 5230 }, { "epoch": 0.4462549053062617, "grad_norm": 1.6949707249276655, "learning_rate": 6.104329322311425e-05, "loss": 0.2609, "step": 5231 }, { "epoch": 0.4463402149803788, "grad_norm": 1.5347356244284058, "learning_rate": 6.1029818709875374e-05, "loss": 0.2297, "step": 5232 }, { "epoch": 0.4464255246544958, "grad_norm": 1.4104101739656034, "learning_rate": 6.1016343354568464e-05, "loss": 0.2461, "step": 5233 }, { "epoch": 0.4465108343286129, "grad_norm": 1.6508820112594653, "learning_rate": 6.100286715822225e-05, "loss": 0.2898, "step": 5234 }, { "epoch": 0.4465961440027299, "grad_norm": 1.495578448044967, "learning_rate": 6.0989390121865634e-05, "loss": 0.278, "step": 5235 }, { "epoch": 0.446681453676847, "grad_norm": 1.970429143394914, "learning_rate": 6.0975912246527455e-05, "loss": 0.2939, "step": 5236 }, { "epoch": 0.446766763350964, "grad_norm": 1.4498760861033335, "learning_rate": 6.0962433533236705e-05, "loss": 0.2782, "step": 5237 }, { "epoch": 0.44685207302508106, "grad_norm": 1.5991839354303115, "learning_rate": 6.094895398302241e-05, "loss": 0.2813, "step": 5238 }, { "epoch": 0.4469373826991981, "grad_norm": 1.4452465145410207, "learning_rate": 6.093547359691367e-05, "loss": 0.3788, "step": 5239 }, { "epoch": 0.44702269237331516, "grad_norm": 1.5115167053995482, "learning_rate": 6.092199237593963e-05, "loss": 0.2613, "step": 5240 }, { "epoch": 0.4471080020474322, "grad_norm": 1.4694979771888623, "learning_rate": 6.090851032112951e-05, "loss": 0.2755, "step": 5241 }, { "epoch": 0.44719331172154925, "grad_norm": 1.6402394886305067, "learning_rate": 6.089502743351259e-05, "loss": 0.3729, "step": 5242 }, { "epoch": 0.44727862139566626, "grad_norm": 1.3387551292086095, "learning_rate": 6.088154371411822e-05, "loss": 0.2339, "step": 5243 }, { "epoch": 0.44736393106978334, "grad_norm": 1.3734613826615132, "learning_rate": 6.086805916397581e-05, "loss": 0.289, "step": 5244 }, { "epoch": 0.44744924074390036, "grad_norm": 1.821416822764228, "learning_rate": 6.085457378411484e-05, "loss": 0.2874, "step": 5245 }, { "epoch": 0.44753455041801743, "grad_norm": 1.9600965664878032, "learning_rate": 6.084108757556485e-05, "loss": 0.2595, "step": 5246 }, { "epoch": 0.44761986009213445, "grad_norm": 1.5326645214889356, "learning_rate": 6.082760053935541e-05, "loss": 0.2789, "step": 5247 }, { "epoch": 0.4477051697662515, "grad_norm": 1.3018390375482094, "learning_rate": 6.0814112676516234e-05, "loss": 0.3012, "step": 5248 }, { "epoch": 0.44779047944036854, "grad_norm": 1.5963610036515838, "learning_rate": 6.080062398807701e-05, "loss": 0.2554, "step": 5249 }, { "epoch": 0.44787578911448556, "grad_norm": 1.6259060314563756, "learning_rate": 6.078713447506754e-05, "loss": 0.2914, "step": 5250 }, { "epoch": 0.44796109878860263, "grad_norm": 1.343912514019017, "learning_rate": 6.077364413851768e-05, "loss": 0.2828, "step": 5251 }, { "epoch": 0.44804640846271965, "grad_norm": 1.4950345027093461, "learning_rate": 6.076015297945733e-05, "loss": 0.2783, "step": 5252 }, { "epoch": 0.4481317181368367, "grad_norm": 1.7488813912342565, "learning_rate": 6.0746660998916495e-05, "loss": 0.2882, "step": 5253 }, { "epoch": 0.44821702781095374, "grad_norm": 1.6529743984106795, "learning_rate": 6.0733168197925195e-05, "loss": 0.2958, "step": 5254 }, { "epoch": 0.4483023374850708, "grad_norm": 1.6597367777733705, "learning_rate": 6.071967457751352e-05, "loss": 0.3164, "step": 5255 }, { "epoch": 0.44838764715918783, "grad_norm": 1.4902570930239203, "learning_rate": 6.070618013871168e-05, "loss": 0.2731, "step": 5256 }, { "epoch": 0.4484729568333049, "grad_norm": 1.5891414656472078, "learning_rate": 6.0692684882549864e-05, "loss": 0.2534, "step": 5257 }, { "epoch": 0.4485582665074219, "grad_norm": 1.8003599124936547, "learning_rate": 6.067918881005839e-05, "loss": 0.2366, "step": 5258 }, { "epoch": 0.448643576181539, "grad_norm": 1.68585131604341, "learning_rate": 6.0665691922267586e-05, "loss": 0.2657, "step": 5259 }, { "epoch": 0.448728885855656, "grad_norm": 1.5923699780717449, "learning_rate": 6.065219422020789e-05, "loss": 0.3073, "step": 5260 }, { "epoch": 0.4488141955297731, "grad_norm": 1.4205680961305651, "learning_rate": 6.0638695704909776e-05, "loss": 0.2009, "step": 5261 }, { "epoch": 0.4488995052038901, "grad_norm": 1.3684904592736875, "learning_rate": 6.062519637740378e-05, "loss": 0.3257, "step": 5262 }, { "epoch": 0.4489848148780072, "grad_norm": 1.3325976289121466, "learning_rate": 6.0611696238720485e-05, "loss": 0.3054, "step": 5263 }, { "epoch": 0.4490701245521242, "grad_norm": 1.5370350762796896, "learning_rate": 6.059819528989058e-05, "loss": 0.2203, "step": 5264 }, { "epoch": 0.44915543422624127, "grad_norm": 1.5802576743435792, "learning_rate": 6.058469353194479e-05, "loss": 0.2687, "step": 5265 }, { "epoch": 0.4492407439003583, "grad_norm": 1.476797741109375, "learning_rate": 6.057119096591388e-05, "loss": 0.2844, "step": 5266 }, { "epoch": 0.44932605357447536, "grad_norm": 1.5180398638721757, "learning_rate": 6.055768759282874e-05, "loss": 0.3001, "step": 5267 }, { "epoch": 0.4494113632485924, "grad_norm": 1.61176004805287, "learning_rate": 6.0544183413720235e-05, "loss": 0.2851, "step": 5268 }, { "epoch": 0.44949667292270945, "grad_norm": 1.367968004053284, "learning_rate": 6.053067842961937e-05, "loss": 0.1934, "step": 5269 }, { "epoch": 0.44958198259682647, "grad_norm": 1.8513630304854296, "learning_rate": 6.051717264155716e-05, "loss": 0.3231, "step": 5270 }, { "epoch": 0.44966729227094354, "grad_norm": 1.5373362818444576, "learning_rate": 6.050366605056471e-05, "loss": 0.2526, "step": 5271 }, { "epoch": 0.44975260194506056, "grad_norm": 1.4209011222053676, "learning_rate": 6.049015865767318e-05, "loss": 0.2726, "step": 5272 }, { "epoch": 0.44983791161917763, "grad_norm": 1.7456990933152985, "learning_rate": 6.047665046391378e-05, "loss": 0.2953, "step": 5273 }, { "epoch": 0.44992322129329465, "grad_norm": 1.6697849507552758, "learning_rate": 6.0463141470317774e-05, "loss": 0.2693, "step": 5274 }, { "epoch": 0.4500085309674117, "grad_norm": 1.4548469103734905, "learning_rate": 6.044963167791653e-05, "loss": 0.3193, "step": 5275 }, { "epoch": 0.45009384064152874, "grad_norm": 1.837033597959754, "learning_rate": 6.0436121087741425e-05, "loss": 0.2306, "step": 5276 }, { "epoch": 0.4501791503156458, "grad_norm": 1.4547656242133378, "learning_rate": 6.042260970082395e-05, "loss": 0.2619, "step": 5277 }, { "epoch": 0.45026445998976283, "grad_norm": 1.8460849266148116, "learning_rate": 6.04090975181956e-05, "loss": 0.2537, "step": 5278 }, { "epoch": 0.4503497696638799, "grad_norm": 1.277109013724992, "learning_rate": 6.0395584540887963e-05, "loss": 0.2636, "step": 5279 }, { "epoch": 0.4504350793379969, "grad_norm": 1.5010143664928624, "learning_rate": 6.03820707699327e-05, "loss": 0.3206, "step": 5280 }, { "epoch": 0.450520389012114, "grad_norm": 1.71959138563172, "learning_rate": 6.03685562063615e-05, "loss": 0.3005, "step": 5281 }, { "epoch": 0.450605698686231, "grad_norm": 1.811825659699528, "learning_rate": 6.035504085120613e-05, "loss": 0.3028, "step": 5282 }, { "epoch": 0.4506910083603481, "grad_norm": 1.2813216398726388, "learning_rate": 6.034152470549843e-05, "loss": 0.2441, "step": 5283 }, { "epoch": 0.4507763180344651, "grad_norm": 1.492211067053547, "learning_rate": 6.0328007770270256e-05, "loss": 0.2256, "step": 5284 }, { "epoch": 0.4508616277085822, "grad_norm": 1.6621591491060015, "learning_rate": 6.031449004655359e-05, "loss": 0.3062, "step": 5285 }, { "epoch": 0.4509469373826992, "grad_norm": 1.5886805072415773, "learning_rate": 6.03009715353804e-05, "loss": 0.3067, "step": 5286 }, { "epoch": 0.4510322470568162, "grad_norm": 1.221163292171883, "learning_rate": 6.028745223778278e-05, "loss": 0.2106, "step": 5287 }, { "epoch": 0.4511175567309333, "grad_norm": 1.6048521913817675, "learning_rate": 6.027393215479286e-05, "loss": 0.2118, "step": 5288 }, { "epoch": 0.4512028664050503, "grad_norm": 1.6170851817911316, "learning_rate": 6.0260411287442786e-05, "loss": 0.2375, "step": 5289 }, { "epoch": 0.4512881760791674, "grad_norm": 1.5302787347347688, "learning_rate": 6.0246889636764856e-05, "loss": 0.2316, "step": 5290 }, { "epoch": 0.4513734857532844, "grad_norm": 1.3690412760127562, "learning_rate": 6.023336720379136e-05, "loss": 0.2289, "step": 5291 }, { "epoch": 0.45145879542740147, "grad_norm": 1.496696330897282, "learning_rate": 6.021984398955466e-05, "loss": 0.2546, "step": 5292 }, { "epoch": 0.4515441051015185, "grad_norm": 1.484466549911171, "learning_rate": 6.020631999508717e-05, "loss": 0.3118, "step": 5293 }, { "epoch": 0.45162941477563556, "grad_norm": 1.4509230306652785, "learning_rate": 6.019279522142138e-05, "loss": 0.2319, "step": 5294 }, { "epoch": 0.4517147244497526, "grad_norm": 1.1941430348580064, "learning_rate": 6.017926966958984e-05, "loss": 0.2623, "step": 5295 }, { "epoch": 0.45180003412386965, "grad_norm": 1.6544845816849807, "learning_rate": 6.0165743340625155e-05, "loss": 0.2112, "step": 5296 }, { "epoch": 0.45188534379798667, "grad_norm": 1.6726692666015448, "learning_rate": 6.015221623555999e-05, "loss": 0.2848, "step": 5297 }, { "epoch": 0.45197065347210374, "grad_norm": 2.0145557731734107, "learning_rate": 6.013868835542707e-05, "loss": 0.2803, "step": 5298 }, { "epoch": 0.45205596314622076, "grad_norm": 1.3349221369721378, "learning_rate": 6.012515970125916e-05, "loss": 0.2705, "step": 5299 }, { "epoch": 0.45214127282033784, "grad_norm": 1.703491451491664, "learning_rate": 6.0111630274089105e-05, "loss": 0.3095, "step": 5300 }, { "epoch": 0.45222658249445485, "grad_norm": 1.2740621146565712, "learning_rate": 6.0098100074949825e-05, "loss": 0.2505, "step": 5301 }, { "epoch": 0.4523118921685719, "grad_norm": 1.51120214974981, "learning_rate": 6.008456910487428e-05, "loss": 0.2783, "step": 5302 }, { "epoch": 0.45239720184268895, "grad_norm": 1.3804369419235114, "learning_rate": 6.0071037364895454e-05, "loss": 0.2556, "step": 5303 }, { "epoch": 0.452482511516806, "grad_norm": 1.3680550139028613, "learning_rate": 6.0057504856046445e-05, "loss": 0.2649, "step": 5304 }, { "epoch": 0.45256782119092304, "grad_norm": 1.4577224360179035, "learning_rate": 6.004397157936038e-05, "loss": 0.2389, "step": 5305 }, { "epoch": 0.4526531308650401, "grad_norm": 1.2958580533988728, "learning_rate": 6.003043753587046e-05, "loss": 0.2432, "step": 5306 }, { "epoch": 0.4527384405391571, "grad_norm": 1.962129754779501, "learning_rate": 6.0016902726609945e-05, "loss": 0.2696, "step": 5307 }, { "epoch": 0.4528237502132742, "grad_norm": 1.4628625925635257, "learning_rate": 6.000336715261212e-05, "loss": 0.2325, "step": 5308 }, { "epoch": 0.4529090598873912, "grad_norm": 1.657408111464771, "learning_rate": 5.9989830814910397e-05, "loss": 0.2758, "step": 5309 }, { "epoch": 0.4529943695615083, "grad_norm": 1.4086160887577575, "learning_rate": 5.997629371453817e-05, "loss": 0.2591, "step": 5310 }, { "epoch": 0.4530796792356253, "grad_norm": 1.6527619971083838, "learning_rate": 5.996275585252891e-05, "loss": 0.2363, "step": 5311 }, { "epoch": 0.4531649889097424, "grad_norm": 1.5573770080793505, "learning_rate": 5.99492172299162e-05, "loss": 0.2787, "step": 5312 }, { "epoch": 0.4532502985838594, "grad_norm": 1.642126722145731, "learning_rate": 5.993567784773362e-05, "loss": 0.2442, "step": 5313 }, { "epoch": 0.4533356082579765, "grad_norm": 1.5240442877114042, "learning_rate": 5.9922137707014845e-05, "loss": 0.2925, "step": 5314 }, { "epoch": 0.4534209179320935, "grad_norm": 1.5874153354926295, "learning_rate": 5.990859680879357e-05, "loss": 0.1988, "step": 5315 }, { "epoch": 0.45350622760621057, "grad_norm": 1.4534818149684887, "learning_rate": 5.989505515410358e-05, "loss": 0.2931, "step": 5316 }, { "epoch": 0.4535915372803276, "grad_norm": 1.5701146546794318, "learning_rate": 5.988151274397873e-05, "loss": 0.192, "step": 5317 }, { "epoch": 0.45367684695444466, "grad_norm": 1.4538247669309718, "learning_rate": 5.986796957945287e-05, "loss": 0.2569, "step": 5318 }, { "epoch": 0.4537621566285617, "grad_norm": 1.6527535581906754, "learning_rate": 5.9854425661559975e-05, "loss": 0.2959, "step": 5319 }, { "epoch": 0.45384746630267875, "grad_norm": 1.7895593078326884, "learning_rate": 5.984088099133406e-05, "loss": 0.3015, "step": 5320 }, { "epoch": 0.45393277597679577, "grad_norm": 1.6689435982290786, "learning_rate": 5.9827335569809165e-05, "loss": 0.2411, "step": 5321 }, { "epoch": 0.45401808565091284, "grad_norm": 1.5399256575586677, "learning_rate": 5.981378939801942e-05, "loss": 0.2756, "step": 5322 }, { "epoch": 0.45410339532502986, "grad_norm": 1.6328674745203033, "learning_rate": 5.980024247699903e-05, "loss": 0.2545, "step": 5323 }, { "epoch": 0.45418870499914693, "grad_norm": 1.5046819098538098, "learning_rate": 5.978669480778217e-05, "loss": 0.2481, "step": 5324 }, { "epoch": 0.45427401467326395, "grad_norm": 1.6185432685015617, "learning_rate": 5.977314639140319e-05, "loss": 0.2854, "step": 5325 }, { "epoch": 0.45435932434738097, "grad_norm": 1.6421023302286935, "learning_rate": 5.975959722889641e-05, "loss": 0.2589, "step": 5326 }, { "epoch": 0.45444463402149804, "grad_norm": 1.5051066574374226, "learning_rate": 5.974604732129625e-05, "loss": 0.2897, "step": 5327 }, { "epoch": 0.45452994369561506, "grad_norm": 1.1847907330638054, "learning_rate": 5.9732496669637164e-05, "loss": 0.251, "step": 5328 }, { "epoch": 0.45461525336973213, "grad_norm": 1.464841796873698, "learning_rate": 5.971894527495366e-05, "loss": 0.2877, "step": 5329 }, { "epoch": 0.45470056304384915, "grad_norm": 1.3439864904201548, "learning_rate": 5.970539313828035e-05, "loss": 0.244, "step": 5330 }, { "epoch": 0.4547858727179662, "grad_norm": 1.5264138905902824, "learning_rate": 5.9691840260651844e-05, "loss": 0.2699, "step": 5331 }, { "epoch": 0.45487118239208324, "grad_norm": 1.2784330057546784, "learning_rate": 5.967828664310283e-05, "loss": 0.2309, "step": 5332 }, { "epoch": 0.4549564920662003, "grad_norm": 1.7694254795151372, "learning_rate": 5.966473228666807e-05, "loss": 0.3093, "step": 5333 }, { "epoch": 0.45504180174031733, "grad_norm": 1.6231067338869718, "learning_rate": 5.965117719238236e-05, "loss": 0.2976, "step": 5334 }, { "epoch": 0.4551271114144344, "grad_norm": 1.6549281477529028, "learning_rate": 5.963762136128055e-05, "loss": 0.2469, "step": 5335 }, { "epoch": 0.4552124210885514, "grad_norm": 2.0208797121788797, "learning_rate": 5.962406479439757e-05, "loss": 0.3341, "step": 5336 }, { "epoch": 0.4552977307626685, "grad_norm": 1.5389326786340187, "learning_rate": 5.961050749276838e-05, "loss": 0.2703, "step": 5337 }, { "epoch": 0.4553830404367855, "grad_norm": 1.4307672144447299, "learning_rate": 5.9596949457428006e-05, "loss": 0.3058, "step": 5338 }, { "epoch": 0.4554683501109026, "grad_norm": 1.9040161551904107, "learning_rate": 5.9583390689411556e-05, "loss": 0.2982, "step": 5339 }, { "epoch": 0.4555536597850196, "grad_norm": 1.3588020608117684, "learning_rate": 5.9569831189754135e-05, "loss": 0.2363, "step": 5340 }, { "epoch": 0.4556389694591367, "grad_norm": 1.5614687759155415, "learning_rate": 5.9556270959490966e-05, "loss": 0.2472, "step": 5341 }, { "epoch": 0.4557242791332537, "grad_norm": 1.5975651423585142, "learning_rate": 5.9542709999657286e-05, "loss": 0.2735, "step": 5342 }, { "epoch": 0.45580958880737077, "grad_norm": 1.5709675044360072, "learning_rate": 5.952914831128842e-05, "loss": 0.3091, "step": 5343 }, { "epoch": 0.4558948984814878, "grad_norm": 1.413178014490511, "learning_rate": 5.951558589541971e-05, "loss": 0.2845, "step": 5344 }, { "epoch": 0.45598020815560486, "grad_norm": 1.4760499094766792, "learning_rate": 5.9502022753086586e-05, "loss": 0.2666, "step": 5345 }, { "epoch": 0.4560655178297219, "grad_norm": 1.8204238268163606, "learning_rate": 5.948845888532452e-05, "loss": 0.3144, "step": 5346 }, { "epoch": 0.45615082750383895, "grad_norm": 1.3334860366559373, "learning_rate": 5.947489429316904e-05, "loss": 0.2639, "step": 5347 }, { "epoch": 0.45623613717795597, "grad_norm": 1.2121439676514931, "learning_rate": 5.946132897765572e-05, "loss": 0.2443, "step": 5348 }, { "epoch": 0.45632144685207304, "grad_norm": 1.4506355208344517, "learning_rate": 5.9447762939820216e-05, "loss": 0.2137, "step": 5349 }, { "epoch": 0.45640675652619006, "grad_norm": 1.6910189918701324, "learning_rate": 5.943419618069821e-05, "loss": 0.2728, "step": 5350 }, { "epoch": 0.45649206620030713, "grad_norm": 1.541959176365496, "learning_rate": 5.942062870132547e-05, "loss": 0.2322, "step": 5351 }, { "epoch": 0.45657737587442415, "grad_norm": 1.3619064788191444, "learning_rate": 5.940706050273779e-05, "loss": 0.2304, "step": 5352 }, { "epoch": 0.4566626855485412, "grad_norm": 1.2254400280426216, "learning_rate": 5.939349158597102e-05, "loss": 0.2119, "step": 5353 }, { "epoch": 0.45674799522265824, "grad_norm": 1.3419395493083146, "learning_rate": 5.937992195206109e-05, "loss": 0.2467, "step": 5354 }, { "epoch": 0.4568333048967753, "grad_norm": 1.3690712730267685, "learning_rate": 5.9366351602043955e-05, "loss": 0.2027, "step": 5355 }, { "epoch": 0.45691861457089233, "grad_norm": 1.4268360155672495, "learning_rate": 5.935278053695566e-05, "loss": 0.2696, "step": 5356 }, { "epoch": 0.4570039242450094, "grad_norm": 1.6428461933363443, "learning_rate": 5.933920875783228e-05, "loss": 0.2558, "step": 5357 }, { "epoch": 0.4570892339191264, "grad_norm": 1.2970147115068642, "learning_rate": 5.932563626570992e-05, "loss": 0.2891, "step": 5358 }, { "epoch": 0.4571745435932435, "grad_norm": 1.326176684094166, "learning_rate": 5.93120630616248e-05, "loss": 0.2736, "step": 5359 }, { "epoch": 0.4572598532673605, "grad_norm": 1.5262522199584911, "learning_rate": 5.929848914661315e-05, "loss": 0.2858, "step": 5360 }, { "epoch": 0.4573451629414776, "grad_norm": 1.5431782640586555, "learning_rate": 5.9284914521711245e-05, "loss": 0.3077, "step": 5361 }, { "epoch": 0.4574304726155946, "grad_norm": 1.6703026053097216, "learning_rate": 5.9271339187955475e-05, "loss": 0.253, "step": 5362 }, { "epoch": 0.4575157822897117, "grad_norm": 1.5347071954251394, "learning_rate": 5.925776314638223e-05, "loss": 0.271, "step": 5363 }, { "epoch": 0.4576010919638287, "grad_norm": 1.6203654296234535, "learning_rate": 5.9244186398027944e-05, "loss": 0.2734, "step": 5364 }, { "epoch": 0.4576864016379457, "grad_norm": 1.737961025959943, "learning_rate": 5.923060894392917e-05, "loss": 0.3219, "step": 5365 }, { "epoch": 0.4577717113120628, "grad_norm": 2.55685305022537, "learning_rate": 5.921703078512245e-05, "loss": 0.3116, "step": 5366 }, { "epoch": 0.4578570209861798, "grad_norm": 1.7406012322876565, "learning_rate": 5.92034519226444e-05, "loss": 0.2766, "step": 5367 }, { "epoch": 0.4579423306602969, "grad_norm": 1.2317264970968647, "learning_rate": 5.918987235753172e-05, "loss": 0.2977, "step": 5368 }, { "epoch": 0.4580276403344139, "grad_norm": 1.1535338899307688, "learning_rate": 5.9176292090821105e-05, "loss": 0.2291, "step": 5369 }, { "epoch": 0.458112950008531, "grad_norm": 1.5408208262195715, "learning_rate": 5.916271112354935e-05, "loss": 0.2954, "step": 5370 }, { "epoch": 0.458198259682648, "grad_norm": 1.5150781805179774, "learning_rate": 5.9149129456753306e-05, "loss": 0.2831, "step": 5371 }, { "epoch": 0.45828356935676506, "grad_norm": 1.1005328621713184, "learning_rate": 5.913554709146983e-05, "loss": 0.231, "step": 5372 }, { "epoch": 0.4583688790308821, "grad_norm": 1.5670080384545502, "learning_rate": 5.9121964028735886e-05, "loss": 0.227, "step": 5373 }, { "epoch": 0.45845418870499915, "grad_norm": 1.3253200756199186, "learning_rate": 5.910838026958846e-05, "loss": 0.2764, "step": 5374 }, { "epoch": 0.4585394983791162, "grad_norm": 1.6315171063269012, "learning_rate": 5.9094795815064604e-05, "loss": 0.289, "step": 5375 }, { "epoch": 0.45862480805323325, "grad_norm": 1.5335947884989958, "learning_rate": 5.9081210666201435e-05, "loss": 0.2581, "step": 5376 }, { "epoch": 0.45871011772735026, "grad_norm": 1.9397958258717167, "learning_rate": 5.906762482403607e-05, "loss": 0.3092, "step": 5377 }, { "epoch": 0.45879542740146734, "grad_norm": 1.1796646621211355, "learning_rate": 5.905403828960575e-05, "loss": 0.2353, "step": 5378 }, { "epoch": 0.45888073707558436, "grad_norm": 1.6840933569585097, "learning_rate": 5.904045106394771e-05, "loss": 0.2969, "step": 5379 }, { "epoch": 0.45896604674970143, "grad_norm": 1.4764278289900623, "learning_rate": 5.902686314809927e-05, "loss": 0.2482, "step": 5380 }, { "epoch": 0.45905135642381845, "grad_norm": 1.8364882272186687, "learning_rate": 5.9013274543097795e-05, "loss": 0.2727, "step": 5381 }, { "epoch": 0.4591366660979355, "grad_norm": 1.4810572438429903, "learning_rate": 5.8999685249980696e-05, "loss": 0.2916, "step": 5382 }, { "epoch": 0.45922197577205254, "grad_norm": 1.6133885382568014, "learning_rate": 5.898609526978547e-05, "loss": 0.2735, "step": 5383 }, { "epoch": 0.4593072854461696, "grad_norm": 1.5695431471013446, "learning_rate": 5.8972504603549616e-05, "loss": 0.2617, "step": 5384 }, { "epoch": 0.45939259512028663, "grad_norm": 1.6759603622501758, "learning_rate": 5.895891325231071e-05, "loss": 0.3303, "step": 5385 }, { "epoch": 0.4594779047944037, "grad_norm": 1.2543558996064483, "learning_rate": 5.89453212171064e-05, "loss": 0.2157, "step": 5386 }, { "epoch": 0.4595632144685207, "grad_norm": 1.3786342449827689, "learning_rate": 5.8931728498974336e-05, "loss": 0.231, "step": 5387 }, { "epoch": 0.4596485241426378, "grad_norm": 1.4288145641780716, "learning_rate": 5.8918135098952276e-05, "loss": 0.2905, "step": 5388 }, { "epoch": 0.4597338338167548, "grad_norm": 1.3393375341290348, "learning_rate": 5.8904541018077984e-05, "loss": 0.2694, "step": 5389 }, { "epoch": 0.4598191434908719, "grad_norm": 1.9154074167267918, "learning_rate": 5.88909462573893e-05, "loss": 0.2916, "step": 5390 }, { "epoch": 0.4599044531649889, "grad_norm": 1.6536045179437302, "learning_rate": 5.887735081792413e-05, "loss": 0.3444, "step": 5391 }, { "epoch": 0.459989762839106, "grad_norm": 1.6799751922501498, "learning_rate": 5.88637547007204e-05, "loss": 0.3078, "step": 5392 }, { "epoch": 0.460075072513223, "grad_norm": 1.7124748116750033, "learning_rate": 5.8850157906816075e-05, "loss": 0.3361, "step": 5393 }, { "epoch": 0.46016038218734007, "grad_norm": 1.829205576744229, "learning_rate": 5.8836560437249245e-05, "loss": 0.3305, "step": 5394 }, { "epoch": 0.4602456918614571, "grad_norm": 1.5631487453263233, "learning_rate": 5.882296229305797e-05, "loss": 0.278, "step": 5395 }, { "epoch": 0.46033100153557416, "grad_norm": 1.3210646163101234, "learning_rate": 5.8809363475280424e-05, "loss": 0.2602, "step": 5396 }, { "epoch": 0.4604163112096912, "grad_norm": 1.648904309459142, "learning_rate": 5.8795763984954776e-05, "loss": 0.208, "step": 5397 }, { "epoch": 0.46050162088380825, "grad_norm": 1.453787375811822, "learning_rate": 5.878216382311931e-05, "loss": 0.2521, "step": 5398 }, { "epoch": 0.46058693055792527, "grad_norm": 1.3665923648888358, "learning_rate": 5.876856299081228e-05, "loss": 0.2976, "step": 5399 }, { "epoch": 0.46067224023204234, "grad_norm": 1.5151670098234948, "learning_rate": 5.875496148907208e-05, "loss": 0.302, "step": 5400 }, { "epoch": 0.46075754990615936, "grad_norm": 1.5835401333058419, "learning_rate": 5.874135931893707e-05, "loss": 0.2798, "step": 5401 }, { "epoch": 0.4608428595802764, "grad_norm": 1.466791023415425, "learning_rate": 5.872775648144575e-05, "loss": 0.2688, "step": 5402 }, { "epoch": 0.46092816925439345, "grad_norm": 1.40377738586235, "learning_rate": 5.8714152977636595e-05, "loss": 0.2637, "step": 5403 }, { "epoch": 0.46101347892851047, "grad_norm": 1.4095740133382033, "learning_rate": 5.8700548808548164e-05, "loss": 0.1857, "step": 5404 }, { "epoch": 0.46109878860262754, "grad_norm": 1.4000045265397054, "learning_rate": 5.868694397521908e-05, "loss": 0.2046, "step": 5405 }, { "epoch": 0.46118409827674456, "grad_norm": 1.8682833851317695, "learning_rate": 5.8673338478687955e-05, "loss": 0.3033, "step": 5406 }, { "epoch": 0.46126940795086163, "grad_norm": 1.4118345212094177, "learning_rate": 5.8659732319993555e-05, "loss": 0.27, "step": 5407 }, { "epoch": 0.46135471762497865, "grad_norm": 1.4711158041736723, "learning_rate": 5.864612550017461e-05, "loss": 0.2418, "step": 5408 }, { "epoch": 0.4614400272990957, "grad_norm": 1.6069681632752253, "learning_rate": 5.863251802026992e-05, "loss": 0.2692, "step": 5409 }, { "epoch": 0.46152533697321274, "grad_norm": 1.5200493328973006, "learning_rate": 5.8618909881318354e-05, "loss": 0.2936, "step": 5410 }, { "epoch": 0.4616106466473298, "grad_norm": 1.4816853298017372, "learning_rate": 5.860530108435881e-05, "loss": 0.1941, "step": 5411 }, { "epoch": 0.46169595632144683, "grad_norm": 1.5875324065737988, "learning_rate": 5.859169163043027e-05, "loss": 0.2353, "step": 5412 }, { "epoch": 0.4617812659955639, "grad_norm": 1.5655874647563597, "learning_rate": 5.857808152057173e-05, "loss": 0.2544, "step": 5413 }, { "epoch": 0.4618665756696809, "grad_norm": 1.981307414573751, "learning_rate": 5.856447075582223e-05, "loss": 0.2772, "step": 5414 }, { "epoch": 0.461951885343798, "grad_norm": 1.7142459461731119, "learning_rate": 5.855085933722092e-05, "loss": 0.2651, "step": 5415 }, { "epoch": 0.462037195017915, "grad_norm": 1.4516726383271807, "learning_rate": 5.8537247265806936e-05, "loss": 0.2832, "step": 5416 }, { "epoch": 0.4621225046920321, "grad_norm": 1.7096340143605215, "learning_rate": 5.852363454261949e-05, "loss": 0.269, "step": 5417 }, { "epoch": 0.4622078143661491, "grad_norm": 1.8842543150558957, "learning_rate": 5.851002116869784e-05, "loss": 0.2635, "step": 5418 }, { "epoch": 0.4622931240402662, "grad_norm": 1.4902640523492765, "learning_rate": 5.849640714508129e-05, "loss": 0.2612, "step": 5419 }, { "epoch": 0.4623784337143832, "grad_norm": 1.838838857634103, "learning_rate": 5.848279247280921e-05, "loss": 0.2608, "step": 5420 }, { "epoch": 0.46246374338850027, "grad_norm": 1.4526937521629404, "learning_rate": 5.846917715292101e-05, "loss": 0.219, "step": 5421 }, { "epoch": 0.4625490530626173, "grad_norm": 1.7165822316981387, "learning_rate": 5.845556118645612e-05, "loss": 0.3131, "step": 5422 }, { "epoch": 0.46263436273673436, "grad_norm": 1.2505016274058396, "learning_rate": 5.844194457445408e-05, "loss": 0.2719, "step": 5423 }, { "epoch": 0.4627196724108514, "grad_norm": 1.6360051094665577, "learning_rate": 5.8428327317954435e-05, "loss": 0.2784, "step": 5424 }, { "epoch": 0.46280498208496845, "grad_norm": 2.0019088219183248, "learning_rate": 5.841470941799677e-05, "loss": 0.2911, "step": 5425 }, { "epoch": 0.46289029175908547, "grad_norm": 1.4692396605749176, "learning_rate": 5.840109087562078e-05, "loss": 0.2771, "step": 5426 }, { "epoch": 0.46297560143320254, "grad_norm": 1.4156246092671578, "learning_rate": 5.838747169186611e-05, "loss": 0.2223, "step": 5427 }, { "epoch": 0.46306091110731956, "grad_norm": 1.4555886006830627, "learning_rate": 5.8373851867772576e-05, "loss": 0.2517, "step": 5428 }, { "epoch": 0.46314622078143663, "grad_norm": 1.827782362430057, "learning_rate": 5.836023140437995e-05, "loss": 0.2793, "step": 5429 }, { "epoch": 0.46323153045555365, "grad_norm": 1.4818313487947108, "learning_rate": 5.834661030272809e-05, "loss": 0.2863, "step": 5430 }, { "epoch": 0.4633168401296707, "grad_norm": 1.3726803112580832, "learning_rate": 5.833298856385687e-05, "loss": 0.2786, "step": 5431 }, { "epoch": 0.46340214980378774, "grad_norm": 1.7835861413085765, "learning_rate": 5.8319366188806256e-05, "loss": 0.3351, "step": 5432 }, { "epoch": 0.4634874594779048, "grad_norm": 1.5317716683034515, "learning_rate": 5.830574317861625e-05, "loss": 0.2793, "step": 5433 }, { "epoch": 0.46357276915202184, "grad_norm": 1.4979813821295194, "learning_rate": 5.8292119534326885e-05, "loss": 0.2564, "step": 5434 }, { "epoch": 0.4636580788261389, "grad_norm": 1.8039977501899578, "learning_rate": 5.827849525697825e-05, "loss": 0.2659, "step": 5435 }, { "epoch": 0.4637433885002559, "grad_norm": 1.8142500354125015, "learning_rate": 5.82648703476105e-05, "loss": 0.2749, "step": 5436 }, { "epoch": 0.463828698174373, "grad_norm": 1.439964089873043, "learning_rate": 5.8251244807263825e-05, "loss": 0.2139, "step": 5437 }, { "epoch": 0.46391400784849, "grad_norm": 1.259219502203736, "learning_rate": 5.823761863697844e-05, "loss": 0.2707, "step": 5438 }, { "epoch": 0.4639993175226071, "grad_norm": 1.5531792920944811, "learning_rate": 5.822399183779467e-05, "loss": 0.2297, "step": 5439 }, { "epoch": 0.4640846271967241, "grad_norm": 1.6363761015137896, "learning_rate": 5.8210364410752814e-05, "loss": 0.283, "step": 5440 }, { "epoch": 0.4641699368708411, "grad_norm": 2.0549573829854064, "learning_rate": 5.819673635689327e-05, "loss": 0.3438, "step": 5441 }, { "epoch": 0.4642552465449582, "grad_norm": 1.3829070236744894, "learning_rate": 5.8183107677256456e-05, "loss": 0.2634, "step": 5442 }, { "epoch": 0.4643405562190752, "grad_norm": 1.3039322911628248, "learning_rate": 5.816947837288285e-05, "loss": 0.2375, "step": 5443 }, { "epoch": 0.4644258658931923, "grad_norm": 1.5729408851763238, "learning_rate": 5.815584844481299e-05, "loss": 0.2438, "step": 5444 }, { "epoch": 0.4645111755673093, "grad_norm": 1.4004359537234332, "learning_rate": 5.814221789408745e-05, "loss": 0.2819, "step": 5445 }, { "epoch": 0.4645964852414264, "grad_norm": 1.7188456422030982, "learning_rate": 5.812858672174681e-05, "loss": 0.3186, "step": 5446 }, { "epoch": 0.4646817949155434, "grad_norm": 1.4846374279740842, "learning_rate": 5.81149549288318e-05, "loss": 0.2642, "step": 5447 }, { "epoch": 0.4647671045896605, "grad_norm": 1.6762048146005644, "learning_rate": 5.810132251638309e-05, "loss": 0.2965, "step": 5448 }, { "epoch": 0.4648524142637775, "grad_norm": 1.5193803020904184, "learning_rate": 5.8087689485441466e-05, "loss": 0.286, "step": 5449 }, { "epoch": 0.46493772393789456, "grad_norm": 1.2733667272080142, "learning_rate": 5.807405583704773e-05, "loss": 0.3105, "step": 5450 }, { "epoch": 0.4650230336120116, "grad_norm": 1.5356869156511759, "learning_rate": 5.806042157224273e-05, "loss": 0.3142, "step": 5451 }, { "epoch": 0.46510834328612866, "grad_norm": 1.4014740693806294, "learning_rate": 5.804678669206738e-05, "loss": 0.2711, "step": 5452 }, { "epoch": 0.4651936529602457, "grad_norm": 1.537089908772267, "learning_rate": 5.803315119756262e-05, "loss": 0.2632, "step": 5453 }, { "epoch": 0.46527896263436275, "grad_norm": 1.3686215100879302, "learning_rate": 5.801951508976945e-05, "loss": 0.2363, "step": 5454 }, { "epoch": 0.46536427230847976, "grad_norm": 1.5502805886562772, "learning_rate": 5.8005878369728926e-05, "loss": 0.3233, "step": 5455 }, { "epoch": 0.46544958198259684, "grad_norm": 1.5136210771466203, "learning_rate": 5.799224103848213e-05, "loss": 0.2838, "step": 5456 }, { "epoch": 0.46553489165671386, "grad_norm": 1.6200497862147487, "learning_rate": 5.797860309707021e-05, "loss": 0.2719, "step": 5457 }, { "epoch": 0.46562020133083093, "grad_norm": 1.4447612639154932, "learning_rate": 5.796496454653433e-05, "loss": 0.3126, "step": 5458 }, { "epoch": 0.46570551100494795, "grad_norm": 1.5392195723108084, "learning_rate": 5.795132538791572e-05, "loss": 0.302, "step": 5459 }, { "epoch": 0.465790820679065, "grad_norm": 1.6199310177374169, "learning_rate": 5.79376856222557e-05, "loss": 0.2991, "step": 5460 }, { "epoch": 0.46587613035318204, "grad_norm": 1.4000212156868084, "learning_rate": 5.792404525059555e-05, "loss": 0.2976, "step": 5461 }, { "epoch": 0.4659614400272991, "grad_norm": 1.7667718724048935, "learning_rate": 5.791040427397666e-05, "loss": 0.3372, "step": 5462 }, { "epoch": 0.46604674970141613, "grad_norm": 1.5939362361029261, "learning_rate": 5.789676269344043e-05, "loss": 0.2939, "step": 5463 }, { "epoch": 0.4661320593755332, "grad_norm": 1.514851323502777, "learning_rate": 5.7883120510028336e-05, "loss": 0.2789, "step": 5464 }, { "epoch": 0.4662173690496502, "grad_norm": 1.6565233400901502, "learning_rate": 5.786947772478187e-05, "loss": 0.2439, "step": 5465 }, { "epoch": 0.4663026787237673, "grad_norm": 1.7896670190549464, "learning_rate": 5.785583433874262e-05, "loss": 0.2775, "step": 5466 }, { "epoch": 0.4663879883978843, "grad_norm": 1.4308080398969287, "learning_rate": 5.7842190352952143e-05, "loss": 0.2953, "step": 5467 }, { "epoch": 0.4664732980720014, "grad_norm": 1.5802379098268884, "learning_rate": 5.7828545768452115e-05, "loss": 0.2375, "step": 5468 }, { "epoch": 0.4665586077461184, "grad_norm": 1.5243635444163752, "learning_rate": 5.781490058628422e-05, "loss": 0.2759, "step": 5469 }, { "epoch": 0.4666439174202355, "grad_norm": 1.6226228786778187, "learning_rate": 5.780125480749019e-05, "loss": 0.2722, "step": 5470 }, { "epoch": 0.4667292270943525, "grad_norm": 1.4631047620518394, "learning_rate": 5.7787608433111816e-05, "loss": 0.254, "step": 5471 }, { "epoch": 0.46681453676846957, "grad_norm": 1.4636440399675126, "learning_rate": 5.777396146419093e-05, "loss": 0.1882, "step": 5472 }, { "epoch": 0.4668998464425866, "grad_norm": 1.4378625992932743, "learning_rate": 5.776031390176938e-05, "loss": 0.1874, "step": 5473 }, { "epoch": 0.46698515611670366, "grad_norm": 1.619372674490563, "learning_rate": 5.7746665746889114e-05, "loss": 0.2714, "step": 5474 }, { "epoch": 0.4670704657908207, "grad_norm": 1.4560836697002424, "learning_rate": 5.7733017000592074e-05, "loss": 0.2579, "step": 5475 }, { "epoch": 0.46715577546493775, "grad_norm": 1.6027304507753033, "learning_rate": 5.7719367663920285e-05, "loss": 0.2829, "step": 5476 }, { "epoch": 0.46724108513905477, "grad_norm": 1.4885250502616163, "learning_rate": 5.770571773791579e-05, "loss": 0.2457, "step": 5477 }, { "epoch": 0.4673263948131718, "grad_norm": 1.51157916703577, "learning_rate": 5.7692067223620695e-05, "loss": 0.2828, "step": 5478 }, { "epoch": 0.46741170448728886, "grad_norm": 1.710810617419037, "learning_rate": 5.767841612207715e-05, "loss": 0.2639, "step": 5479 }, { "epoch": 0.4674970141614059, "grad_norm": 1.2896776061144168, "learning_rate": 5.766476443432732e-05, "loss": 0.2864, "step": 5480 }, { "epoch": 0.46758232383552295, "grad_norm": 1.2478248267325602, "learning_rate": 5.765111216141348e-05, "loss": 0.2607, "step": 5481 }, { "epoch": 0.46766763350963997, "grad_norm": 1.6971529625834443, "learning_rate": 5.7637459304377874e-05, "loss": 0.2755, "step": 5482 }, { "epoch": 0.46775294318375704, "grad_norm": 1.4161509061186794, "learning_rate": 5.762380586426283e-05, "loss": 0.2534, "step": 5483 }, { "epoch": 0.46783825285787406, "grad_norm": 1.4344118367038112, "learning_rate": 5.7610151842110736e-05, "loss": 0.2461, "step": 5484 }, { "epoch": 0.46792356253199113, "grad_norm": 1.4101807652928762, "learning_rate": 5.7596497238963975e-05, "loss": 0.3041, "step": 5485 }, { "epoch": 0.46800887220610815, "grad_norm": 1.7017013479142826, "learning_rate": 5.758284205586503e-05, "loss": 0.2726, "step": 5486 }, { "epoch": 0.4680941818802252, "grad_norm": 1.4376705731902604, "learning_rate": 5.756918629385638e-05, "loss": 0.1983, "step": 5487 }, { "epoch": 0.46817949155434224, "grad_norm": 1.5671559962931352, "learning_rate": 5.755552995398057e-05, "loss": 0.2842, "step": 5488 }, { "epoch": 0.4682648012284593, "grad_norm": 1.6664266492951554, "learning_rate": 5.7541873037280215e-05, "loss": 0.2886, "step": 5489 }, { "epoch": 0.46835011090257633, "grad_norm": 1.2940372074532387, "learning_rate": 5.752821554479793e-05, "loss": 0.2454, "step": 5490 }, { "epoch": 0.4684354205766934, "grad_norm": 1.8694980004650115, "learning_rate": 5.751455747757637e-05, "loss": 0.2412, "step": 5491 }, { "epoch": 0.4685207302508104, "grad_norm": 1.8447335416258928, "learning_rate": 5.75008988366583e-05, "loss": 0.2751, "step": 5492 }, { "epoch": 0.4686060399249275, "grad_norm": 1.8096520641653777, "learning_rate": 5.748723962308646e-05, "loss": 0.2034, "step": 5493 }, { "epoch": 0.4686913495990445, "grad_norm": 1.5391281811016677, "learning_rate": 5.747357983790367e-05, "loss": 0.2231, "step": 5494 }, { "epoch": 0.4687766592731616, "grad_norm": 2.0386069070171158, "learning_rate": 5.745991948215277e-05, "loss": 0.28, "step": 5495 }, { "epoch": 0.4688619689472786, "grad_norm": 1.7441397364700635, "learning_rate": 5.7446258556876645e-05, "loss": 0.2599, "step": 5496 }, { "epoch": 0.4689472786213957, "grad_norm": 1.8778434809012778, "learning_rate": 5.743259706311827e-05, "loss": 0.2878, "step": 5497 }, { "epoch": 0.4690325882955127, "grad_norm": 1.6260501695989378, "learning_rate": 5.741893500192059e-05, "loss": 0.2979, "step": 5498 }, { "epoch": 0.46911789796962977, "grad_norm": 1.625873330994743, "learning_rate": 5.740527237432665e-05, "loss": 0.3123, "step": 5499 }, { "epoch": 0.4692032076437468, "grad_norm": 1.7174765290679141, "learning_rate": 5.7391609181379514e-05, "loss": 0.2889, "step": 5500 }, { "epoch": 0.46928851731786386, "grad_norm": 1.7373163524148156, "learning_rate": 5.737794542412229e-05, "loss": 0.2389, "step": 5501 }, { "epoch": 0.4693738269919809, "grad_norm": 1.5642556055086503, "learning_rate": 5.736428110359815e-05, "loss": 0.2569, "step": 5502 }, { "epoch": 0.46945913666609795, "grad_norm": 1.6825369861726271, "learning_rate": 5.7350616220850285e-05, "loss": 0.264, "step": 5503 }, { "epoch": 0.46954444634021497, "grad_norm": 1.8970185930932526, "learning_rate": 5.733695077692193e-05, "loss": 0.2996, "step": 5504 }, { "epoch": 0.46962975601433204, "grad_norm": 1.35567110073833, "learning_rate": 5.732328477285638e-05, "loss": 0.2803, "step": 5505 }, { "epoch": 0.46971506568844906, "grad_norm": 1.6020239188591037, "learning_rate": 5.730961820969694e-05, "loss": 0.3005, "step": 5506 }, { "epoch": 0.46980037536256614, "grad_norm": 1.242826189632858, "learning_rate": 5.7295951088486985e-05, "loss": 0.2917, "step": 5507 }, { "epoch": 0.46988568503668315, "grad_norm": 1.9112328613790208, "learning_rate": 5.7282283410269955e-05, "loss": 0.3362, "step": 5508 }, { "epoch": 0.4699709947108002, "grad_norm": 1.4411203984850425, "learning_rate": 5.726861517608927e-05, "loss": 0.2726, "step": 5509 }, { "epoch": 0.47005630438491725, "grad_norm": 1.3794836721058898, "learning_rate": 5.725494638698845e-05, "loss": 0.224, "step": 5510 }, { "epoch": 0.4701416140590343, "grad_norm": 1.5968967861772172, "learning_rate": 5.724127704401102e-05, "loss": 0.2364, "step": 5511 }, { "epoch": 0.47022692373315134, "grad_norm": 1.8172455848076587, "learning_rate": 5.722760714820057e-05, "loss": 0.2605, "step": 5512 }, { "epoch": 0.4703122334072684, "grad_norm": 1.394992527685354, "learning_rate": 5.721393670060072e-05, "loss": 0.2819, "step": 5513 }, { "epoch": 0.4703975430813854, "grad_norm": 1.7132559618320824, "learning_rate": 5.720026570225514e-05, "loss": 0.3039, "step": 5514 }, { "epoch": 0.4704828527555025, "grad_norm": 1.6963064909870345, "learning_rate": 5.718659415420754e-05, "loss": 0.2493, "step": 5515 }, { "epoch": 0.4705681624296195, "grad_norm": 1.7396528340540323, "learning_rate": 5.717292205750167e-05, "loss": 0.2663, "step": 5516 }, { "epoch": 0.47065347210373654, "grad_norm": 1.682790755022504, "learning_rate": 5.7159249413181303e-05, "loss": 0.2976, "step": 5517 }, { "epoch": 0.4707387817778536, "grad_norm": 1.176481323684068, "learning_rate": 5.71455762222903e-05, "loss": 0.1753, "step": 5518 }, { "epoch": 0.4708240914519706, "grad_norm": 1.5359755797531094, "learning_rate": 5.713190248587251e-05, "loss": 0.2894, "step": 5519 }, { "epoch": 0.4709094011260877, "grad_norm": 1.6019053022515382, "learning_rate": 5.711822820497187e-05, "loss": 0.2652, "step": 5520 }, { "epoch": 0.4709947108002047, "grad_norm": 1.6396418941110502, "learning_rate": 5.710455338063234e-05, "loss": 0.3444, "step": 5521 }, { "epoch": 0.4710800204743218, "grad_norm": 1.4436629496171078, "learning_rate": 5.70908780138979e-05, "loss": 0.2404, "step": 5522 }, { "epoch": 0.4711653301484388, "grad_norm": 1.6983696580969743, "learning_rate": 5.707720210581261e-05, "loss": 0.3222, "step": 5523 }, { "epoch": 0.4712506398225559, "grad_norm": 1.7352601962705727, "learning_rate": 5.706352565742056e-05, "loss": 0.2728, "step": 5524 }, { "epoch": 0.4713359494966729, "grad_norm": 1.4806724549108385, "learning_rate": 5.7049848669765846e-05, "loss": 0.28, "step": 5525 }, { "epoch": 0.47142125917079, "grad_norm": 1.5231717391389656, "learning_rate": 5.703617114389266e-05, "loss": 0.2253, "step": 5526 }, { "epoch": 0.471506568844907, "grad_norm": 1.6187677448728772, "learning_rate": 5.7022493080845194e-05, "loss": 0.2269, "step": 5527 }, { "epoch": 0.47159187851902407, "grad_norm": 1.6212065508100455, "learning_rate": 5.700881448166769e-05, "loss": 0.302, "step": 5528 }, { "epoch": 0.4716771881931411, "grad_norm": 1.4671186762095996, "learning_rate": 5.699513534740446e-05, "loss": 0.218, "step": 5529 }, { "epoch": 0.47176249786725816, "grad_norm": 1.36499999385177, "learning_rate": 5.69814556790998e-05, "loss": 0.2823, "step": 5530 }, { "epoch": 0.4718478075413752, "grad_norm": 1.5973394028437404, "learning_rate": 5.696777547779811e-05, "loss": 0.3133, "step": 5531 }, { "epoch": 0.47193311721549225, "grad_norm": 1.4546100862188767, "learning_rate": 5.69540947445438e-05, "loss": 0.2549, "step": 5532 }, { "epoch": 0.47201842688960927, "grad_norm": 1.529292509771867, "learning_rate": 5.694041348038128e-05, "loss": 0.2456, "step": 5533 }, { "epoch": 0.47210373656372634, "grad_norm": 1.6732938218612936, "learning_rate": 5.6926731686355096e-05, "loss": 0.3219, "step": 5534 }, { "epoch": 0.47218904623784336, "grad_norm": 1.5122198018713262, "learning_rate": 5.691304936350975e-05, "loss": 0.2175, "step": 5535 }, { "epoch": 0.47227435591196043, "grad_norm": 1.6295584783932038, "learning_rate": 5.689936651288983e-05, "loss": 0.309, "step": 5536 }, { "epoch": 0.47235966558607745, "grad_norm": 1.7800429420434773, "learning_rate": 5.688568313553994e-05, "loss": 0.2896, "step": 5537 }, { "epoch": 0.4724449752601945, "grad_norm": 1.4346615507171723, "learning_rate": 5.68719992325047e-05, "loss": 0.2442, "step": 5538 }, { "epoch": 0.47253028493431154, "grad_norm": 1.510186732537592, "learning_rate": 5.685831480482887e-05, "loss": 0.2695, "step": 5539 }, { "epoch": 0.4726155946084286, "grad_norm": 1.305405196646716, "learning_rate": 5.684462985355714e-05, "loss": 0.2733, "step": 5540 }, { "epoch": 0.47270090428254563, "grad_norm": 1.2668324115296299, "learning_rate": 5.683094437973429e-05, "loss": 0.2396, "step": 5541 }, { "epoch": 0.4727862139566627, "grad_norm": 1.511800916543242, "learning_rate": 5.681725838440515e-05, "loss": 0.275, "step": 5542 }, { "epoch": 0.4728715236307797, "grad_norm": 1.706030999965243, "learning_rate": 5.680357186861455e-05, "loss": 0.2882, "step": 5543 }, { "epoch": 0.4729568333048968, "grad_norm": 1.5852187458802143, "learning_rate": 5.678988483340738e-05, "loss": 0.2169, "step": 5544 }, { "epoch": 0.4730421429790138, "grad_norm": 1.5858218897882548, "learning_rate": 5.677619727982859e-05, "loss": 0.2763, "step": 5545 }, { "epoch": 0.4731274526531309, "grad_norm": 1.368055972418208, "learning_rate": 5.6762509208923165e-05, "loss": 0.2585, "step": 5546 }, { "epoch": 0.4732127623272479, "grad_norm": 1.6164504135979463, "learning_rate": 5.6748820621736084e-05, "loss": 0.2801, "step": 5547 }, { "epoch": 0.473298072001365, "grad_norm": 1.5914901718761396, "learning_rate": 5.673513151931241e-05, "loss": 0.2588, "step": 5548 }, { "epoch": 0.473383381675482, "grad_norm": 1.3426949550927942, "learning_rate": 5.6721441902697236e-05, "loss": 0.2651, "step": 5549 }, { "epoch": 0.47346869134959907, "grad_norm": 1.3252798684565943, "learning_rate": 5.670775177293569e-05, "loss": 0.2901, "step": 5550 }, { "epoch": 0.4735540010237161, "grad_norm": 1.6285862596931213, "learning_rate": 5.669406113107295e-05, "loss": 0.25, "step": 5551 }, { "epoch": 0.47363931069783316, "grad_norm": 1.4888622521552974, "learning_rate": 5.66803699781542e-05, "loss": 0.2377, "step": 5552 }, { "epoch": 0.4737246203719502, "grad_norm": 1.6937511655233393, "learning_rate": 5.666667831522471e-05, "loss": 0.243, "step": 5553 }, { "epoch": 0.47380993004606725, "grad_norm": 1.5452733851651954, "learning_rate": 5.665298614332975e-05, "loss": 0.2474, "step": 5554 }, { "epoch": 0.47389523972018427, "grad_norm": 1.7047675375326532, "learning_rate": 5.663929346351466e-05, "loss": 0.2946, "step": 5555 }, { "epoch": 0.4739805493943013, "grad_norm": 1.5628392423474862, "learning_rate": 5.6625600276824796e-05, "loss": 0.2503, "step": 5556 }, { "epoch": 0.47406585906841836, "grad_norm": 1.840042395103327, "learning_rate": 5.661190658430556e-05, "loss": 0.2536, "step": 5557 }, { "epoch": 0.4741511687425354, "grad_norm": 1.8643318589194051, "learning_rate": 5.659821238700239e-05, "loss": 0.3101, "step": 5558 }, { "epoch": 0.47423647841665245, "grad_norm": 1.5277153792306157, "learning_rate": 5.658451768596077e-05, "loss": 0.2187, "step": 5559 }, { "epoch": 0.47432178809076947, "grad_norm": 1.569001974713631, "learning_rate": 5.65708224822262e-05, "loss": 0.2482, "step": 5560 }, { "epoch": 0.47440709776488654, "grad_norm": 1.5192380489891832, "learning_rate": 5.655712677684426e-05, "loss": 0.2747, "step": 5561 }, { "epoch": 0.47449240743900356, "grad_norm": 1.5500718038601284, "learning_rate": 5.654343057086053e-05, "loss": 0.3039, "step": 5562 }, { "epoch": 0.47457771711312063, "grad_norm": 1.261599980955783, "learning_rate": 5.652973386532066e-05, "loss": 0.2765, "step": 5563 }, { "epoch": 0.47466302678723765, "grad_norm": 1.605151216983099, "learning_rate": 5.651603666127031e-05, "loss": 0.3013, "step": 5564 }, { "epoch": 0.4747483364613547, "grad_norm": 1.5922455512218403, "learning_rate": 5.6502338959755164e-05, "loss": 0.251, "step": 5565 }, { "epoch": 0.47483364613547174, "grad_norm": 1.5450241884480012, "learning_rate": 5.648864076182101e-05, "loss": 0.2289, "step": 5566 }, { "epoch": 0.4749189558095888, "grad_norm": 1.5300672205949595, "learning_rate": 5.647494206851363e-05, "loss": 0.2693, "step": 5567 }, { "epoch": 0.47500426548370583, "grad_norm": 1.5654443084548026, "learning_rate": 5.646124288087881e-05, "loss": 0.2481, "step": 5568 }, { "epoch": 0.4750895751578229, "grad_norm": 1.5116847623410012, "learning_rate": 5.644754319996244e-05, "loss": 0.2555, "step": 5569 }, { "epoch": 0.4751748848319399, "grad_norm": 1.8098382808033244, "learning_rate": 5.643384302681039e-05, "loss": 0.2488, "step": 5570 }, { "epoch": 0.475260194506057, "grad_norm": 1.405884250125079, "learning_rate": 5.6420142362468634e-05, "loss": 0.2641, "step": 5571 }, { "epoch": 0.475345504180174, "grad_norm": 1.8053587618792666, "learning_rate": 5.640644120798312e-05, "loss": 0.2174, "step": 5572 }, { "epoch": 0.4754308138542911, "grad_norm": 1.3934592178886849, "learning_rate": 5.6392739564399845e-05, "loss": 0.2471, "step": 5573 }, { "epoch": 0.4755161235284081, "grad_norm": 1.8380237170361016, "learning_rate": 5.637903743276489e-05, "loss": 0.2468, "step": 5574 }, { "epoch": 0.4756014332025252, "grad_norm": 1.3564453125, "learning_rate": 5.636533481412433e-05, "loss": 0.2264, "step": 5575 }, { "epoch": 0.4756867428766422, "grad_norm": 1.253481118454482, "learning_rate": 5.635163170952428e-05, "loss": 0.1879, "step": 5576 }, { "epoch": 0.4757720525507593, "grad_norm": 1.5768013114509567, "learning_rate": 5.6337928120010906e-05, "loss": 0.2881, "step": 5577 }, { "epoch": 0.4758573622248763, "grad_norm": 1.3396850580932202, "learning_rate": 5.6324224046630395e-05, "loss": 0.2252, "step": 5578 }, { "epoch": 0.47594267189899336, "grad_norm": 1.6384632946299809, "learning_rate": 5.631051949042898e-05, "loss": 0.2397, "step": 5579 }, { "epoch": 0.4760279815731104, "grad_norm": 1.6591269499877148, "learning_rate": 5.629681445245295e-05, "loss": 0.2899, "step": 5580 }, { "epoch": 0.47611329124722745, "grad_norm": 1.4603688158294363, "learning_rate": 5.628310893374859e-05, "loss": 0.2552, "step": 5581 }, { "epoch": 0.4761986009213445, "grad_norm": 1.6287639015992188, "learning_rate": 5.626940293536225e-05, "loss": 0.2949, "step": 5582 }, { "epoch": 0.47628391059546155, "grad_norm": 1.8493938380884087, "learning_rate": 5.62556964583403e-05, "loss": 0.2585, "step": 5583 }, { "epoch": 0.47636922026957856, "grad_norm": 1.5574783502485419, "learning_rate": 5.624198950372918e-05, "loss": 0.2455, "step": 5584 }, { "epoch": 0.47645452994369564, "grad_norm": 1.2522607386747948, "learning_rate": 5.622828207257533e-05, "loss": 0.3167, "step": 5585 }, { "epoch": 0.47653983961781265, "grad_norm": 1.7537191924191784, "learning_rate": 5.621457416592524e-05, "loss": 0.218, "step": 5586 }, { "epoch": 0.47662514929192973, "grad_norm": 1.5087280656157234, "learning_rate": 5.620086578482544e-05, "loss": 0.2952, "step": 5587 }, { "epoch": 0.47671045896604675, "grad_norm": 1.1528673954232784, "learning_rate": 5.618715693032248e-05, "loss": 0.1821, "step": 5588 }, { "epoch": 0.4767957686401638, "grad_norm": 1.512739127936292, "learning_rate": 5.617344760346298e-05, "loss": 0.3048, "step": 5589 }, { "epoch": 0.47688107831428084, "grad_norm": 1.411693802039434, "learning_rate": 5.615973780529357e-05, "loss": 0.2603, "step": 5590 }, { "epoch": 0.4769663879883979, "grad_norm": 1.758329120588755, "learning_rate": 5.614602753686088e-05, "loss": 0.264, "step": 5591 }, { "epoch": 0.47705169766251493, "grad_norm": 1.3446274044181687, "learning_rate": 5.613231679921167e-05, "loss": 0.2235, "step": 5592 }, { "epoch": 0.47713700733663195, "grad_norm": 1.654104750825316, "learning_rate": 5.611860559339265e-05, "loss": 0.2546, "step": 5593 }, { "epoch": 0.477222317010749, "grad_norm": 1.7154719216681986, "learning_rate": 5.6104893920450605e-05, "loss": 0.2734, "step": 5594 }, { "epoch": 0.47730762668486604, "grad_norm": 1.2801473571475914, "learning_rate": 5.609118178143236e-05, "loss": 0.2455, "step": 5595 }, { "epoch": 0.4773929363589831, "grad_norm": 1.568349949866605, "learning_rate": 5.6077469177384754e-05, "loss": 0.263, "step": 5596 }, { "epoch": 0.47747824603310013, "grad_norm": 1.7075457114137187, "learning_rate": 5.606375610935466e-05, "loss": 0.239, "step": 5597 }, { "epoch": 0.4775635557072172, "grad_norm": 1.821269295511029, "learning_rate": 5.6050042578389016e-05, "loss": 0.2129, "step": 5598 }, { "epoch": 0.4776488653813342, "grad_norm": 1.243244992578802, "learning_rate": 5.603632858553478e-05, "loss": 0.2553, "step": 5599 }, { "epoch": 0.4777341750554513, "grad_norm": 1.4662316016614474, "learning_rate": 5.602261413183892e-05, "loss": 0.2604, "step": 5600 }, { "epoch": 0.4778194847295683, "grad_norm": 1.73531013915899, "learning_rate": 5.60088992183485e-05, "loss": 0.3607, "step": 5601 }, { "epoch": 0.4779047944036854, "grad_norm": 1.7108369562575525, "learning_rate": 5.5995183846110524e-05, "loss": 0.2043, "step": 5602 }, { "epoch": 0.4779901040778024, "grad_norm": 1.8648437730128362, "learning_rate": 5.5981468016172134e-05, "loss": 0.2397, "step": 5603 }, { "epoch": 0.4780754137519195, "grad_norm": 1.9770667597642466, "learning_rate": 5.5967751729580454e-05, "loss": 0.3257, "step": 5604 }, { "epoch": 0.4781607234260365, "grad_norm": 1.5253150520701944, "learning_rate": 5.595403498738262e-05, "loss": 0.2791, "step": 5605 }, { "epoch": 0.47824603310015357, "grad_norm": 1.5547120365526095, "learning_rate": 5.5940317790625876e-05, "loss": 0.2613, "step": 5606 }, { "epoch": 0.4783313427742706, "grad_norm": 1.5081721059854485, "learning_rate": 5.592660014035742e-05, "loss": 0.2976, "step": 5607 }, { "epoch": 0.47841665244838766, "grad_norm": 1.59685990835923, "learning_rate": 5.591288203762455e-05, "loss": 0.2434, "step": 5608 }, { "epoch": 0.4785019621225047, "grad_norm": 1.499400336881134, "learning_rate": 5.589916348347455e-05, "loss": 0.261, "step": 5609 }, { "epoch": 0.47858727179662175, "grad_norm": 1.3891813521284981, "learning_rate": 5.588544447895477e-05, "loss": 0.2042, "step": 5610 }, { "epoch": 0.47867258147073877, "grad_norm": 1.4097810283019168, "learning_rate": 5.5871725025112586e-05, "loss": 0.2178, "step": 5611 }, { "epoch": 0.47875789114485584, "grad_norm": 1.454632705000654, "learning_rate": 5.585800512299539e-05, "loss": 0.2745, "step": 5612 }, { "epoch": 0.47884320081897286, "grad_norm": 1.796073535807098, "learning_rate": 5.584428477365063e-05, "loss": 0.2708, "step": 5613 }, { "epoch": 0.47892851049308993, "grad_norm": 1.485017657447927, "learning_rate": 5.583056397812578e-05, "loss": 0.2349, "step": 5614 }, { "epoch": 0.47901382016720695, "grad_norm": 1.512636206933163, "learning_rate": 5.5816842737468353e-05, "loss": 0.2314, "step": 5615 }, { "epoch": 0.479099129841324, "grad_norm": 1.783726961004454, "learning_rate": 5.5803121052725916e-05, "loss": 0.2887, "step": 5616 }, { "epoch": 0.47918443951544104, "grad_norm": 1.6593847299638964, "learning_rate": 5.578939892494601e-05, "loss": 0.2363, "step": 5617 }, { "epoch": 0.4792697491895581, "grad_norm": 1.5661765522339217, "learning_rate": 5.577567635517625e-05, "loss": 0.2417, "step": 5618 }, { "epoch": 0.47935505886367513, "grad_norm": 1.5610627239657981, "learning_rate": 5.57619533444643e-05, "loss": 0.2737, "step": 5619 }, { "epoch": 0.4794403685377922, "grad_norm": 1.4843165335936315, "learning_rate": 5.574822989385784e-05, "loss": 0.2566, "step": 5620 }, { "epoch": 0.4795256782119092, "grad_norm": 1.6623557924525916, "learning_rate": 5.5734506004404574e-05, "loss": 0.2629, "step": 5621 }, { "epoch": 0.4796109878860263, "grad_norm": 1.4712305427762022, "learning_rate": 5.572078167715225e-05, "loss": 0.2708, "step": 5622 }, { "epoch": 0.4796962975601433, "grad_norm": 1.479334253121483, "learning_rate": 5.5707056913148626e-05, "loss": 0.262, "step": 5623 }, { "epoch": 0.4797816072342604, "grad_norm": 1.4967822689249888, "learning_rate": 5.569333171344154e-05, "loss": 0.2227, "step": 5624 }, { "epoch": 0.4798669169083774, "grad_norm": 2.0454733905502107, "learning_rate": 5.567960607907885e-05, "loss": 0.2535, "step": 5625 }, { "epoch": 0.4799522265824945, "grad_norm": 1.433190395561741, "learning_rate": 5.5665880011108394e-05, "loss": 0.1972, "step": 5626 }, { "epoch": 0.4800375362566115, "grad_norm": 1.1983741992212837, "learning_rate": 5.565215351057812e-05, "loss": 0.2702, "step": 5627 }, { "epoch": 0.48012284593072857, "grad_norm": 1.305565772568966, "learning_rate": 5.5638426578535955e-05, "loss": 0.208, "step": 5628 }, { "epoch": 0.4802081556048456, "grad_norm": 1.3223022275560743, "learning_rate": 5.5624699216029885e-05, "loss": 0.2351, "step": 5629 }, { "epoch": 0.48029346527896266, "grad_norm": 1.549720074387617, "learning_rate": 5.561097142410791e-05, "loss": 0.2313, "step": 5630 }, { "epoch": 0.4803787749530797, "grad_norm": 1.6540403200929426, "learning_rate": 5.5597243203818104e-05, "loss": 0.3239, "step": 5631 }, { "epoch": 0.4804640846271967, "grad_norm": 1.5565671053842982, "learning_rate": 5.5583514556208514e-05, "loss": 0.2515, "step": 5632 }, { "epoch": 0.48054939430131377, "grad_norm": 1.9285609860616875, "learning_rate": 5.556978548232726e-05, "loss": 0.2552, "step": 5633 }, { "epoch": 0.4806347039754308, "grad_norm": 1.4753256163592798, "learning_rate": 5.5556055983222474e-05, "loss": 0.2351, "step": 5634 }, { "epoch": 0.48072001364954786, "grad_norm": 1.5586192958334895, "learning_rate": 5.554232605994235e-05, "loss": 0.2666, "step": 5635 }, { "epoch": 0.4808053233236649, "grad_norm": 1.748277565848846, "learning_rate": 5.552859571353507e-05, "loss": 0.2438, "step": 5636 }, { "epoch": 0.48089063299778195, "grad_norm": 1.3815650269994986, "learning_rate": 5.5514864945048904e-05, "loss": 0.2311, "step": 5637 }, { "epoch": 0.48097594267189897, "grad_norm": 1.8124323865183727, "learning_rate": 5.55011337555321e-05, "loss": 0.3055, "step": 5638 }, { "epoch": 0.48106125234601604, "grad_norm": 1.380511422137322, "learning_rate": 5.548740214603295e-05, "loss": 0.23, "step": 5639 }, { "epoch": 0.48114656202013306, "grad_norm": 1.6308150782251356, "learning_rate": 5.547367011759982e-05, "loss": 0.2714, "step": 5640 }, { "epoch": 0.48123187169425014, "grad_norm": 1.7181407889164177, "learning_rate": 5.545993767128107e-05, "loss": 0.2519, "step": 5641 }, { "epoch": 0.48131718136836715, "grad_norm": 1.4727885948230635, "learning_rate": 5.544620480812508e-05, "loss": 0.2735, "step": 5642 }, { "epoch": 0.4814024910424842, "grad_norm": 1.5702847150935078, "learning_rate": 5.5432471529180306e-05, "loss": 0.2382, "step": 5643 }, { "epoch": 0.48148780071660124, "grad_norm": 1.5081236523188652, "learning_rate": 5.541873783549518e-05, "loss": 0.2758, "step": 5644 }, { "epoch": 0.4815731103907183, "grad_norm": 1.7802512062346159, "learning_rate": 5.540500372811823e-05, "loss": 0.2726, "step": 5645 }, { "epoch": 0.48165842006483534, "grad_norm": 1.4389718854542244, "learning_rate": 5.539126920809797e-05, "loss": 0.2778, "step": 5646 }, { "epoch": 0.4817437297389524, "grad_norm": 1.6506322920788763, "learning_rate": 5.537753427648295e-05, "loss": 0.2461, "step": 5647 }, { "epoch": 0.4818290394130694, "grad_norm": 1.8618351665024866, "learning_rate": 5.536379893432177e-05, "loss": 0.2878, "step": 5648 }, { "epoch": 0.4819143490871865, "grad_norm": 1.5500855699114973, "learning_rate": 5.535006318266304e-05, "loss": 0.2383, "step": 5649 }, { "epoch": 0.4819996587613035, "grad_norm": 1.2447589194274535, "learning_rate": 5.53363270225554e-05, "loss": 0.1982, "step": 5650 }, { "epoch": 0.4820849684354206, "grad_norm": 1.8089464803510396, "learning_rate": 5.5322590455047564e-05, "loss": 0.2187, "step": 5651 }, { "epoch": 0.4821702781095376, "grad_norm": 2.0706778725719817, "learning_rate": 5.5308853481188236e-05, "loss": 0.3068, "step": 5652 }, { "epoch": 0.4822555877836547, "grad_norm": 1.7008415803735837, "learning_rate": 5.529511610202616e-05, "loss": 0.2567, "step": 5653 }, { "epoch": 0.4823408974577717, "grad_norm": 1.5031201971219232, "learning_rate": 5.52813783186101e-05, "loss": 0.2274, "step": 5654 }, { "epoch": 0.4824262071318888, "grad_norm": 1.4006530430721453, "learning_rate": 5.5267640131988864e-05, "loss": 0.2383, "step": 5655 }, { "epoch": 0.4825115168060058, "grad_norm": 1.735655372142338, "learning_rate": 5.5253901543211295e-05, "loss": 0.2509, "step": 5656 }, { "epoch": 0.48259682648012286, "grad_norm": 1.6988853390572514, "learning_rate": 5.524016255332627e-05, "loss": 0.2275, "step": 5657 }, { "epoch": 0.4826821361542399, "grad_norm": 1.9045664122147006, "learning_rate": 5.522642316338268e-05, "loss": 0.2844, "step": 5658 }, { "epoch": 0.48276744582835696, "grad_norm": 1.4972134774887889, "learning_rate": 5.521268337442945e-05, "loss": 0.2206, "step": 5659 }, { "epoch": 0.482852755502474, "grad_norm": 1.58512068135612, "learning_rate": 5.519894318751554e-05, "loss": 0.2574, "step": 5660 }, { "epoch": 0.48293806517659105, "grad_norm": 1.347165446816573, "learning_rate": 5.518520260368996e-05, "loss": 0.2104, "step": 5661 }, { "epoch": 0.48302337485070806, "grad_norm": 1.5991833390795898, "learning_rate": 5.517146162400171e-05, "loss": 0.2723, "step": 5662 }, { "epoch": 0.48310868452482514, "grad_norm": 1.9771909656979638, "learning_rate": 5.5157720249499847e-05, "loss": 0.3039, "step": 5663 }, { "epoch": 0.48319399419894216, "grad_norm": 1.7719119732479183, "learning_rate": 5.514397848123345e-05, "loss": 0.2525, "step": 5664 }, { "epoch": 0.48327930387305923, "grad_norm": 1.6739399359625198, "learning_rate": 5.5130236320251625e-05, "loss": 0.27, "step": 5665 }, { "epoch": 0.48336461354717625, "grad_norm": 1.8147219488527573, "learning_rate": 5.5116493767603524e-05, "loss": 0.238, "step": 5666 }, { "epoch": 0.4834499232212933, "grad_norm": 1.528729758701773, "learning_rate": 5.510275082433831e-05, "loss": 0.286, "step": 5667 }, { "epoch": 0.48353523289541034, "grad_norm": 1.580056170298068, "learning_rate": 5.5089007491505186e-05, "loss": 0.2811, "step": 5668 }, { "epoch": 0.4836205425695274, "grad_norm": 1.7040249607828006, "learning_rate": 5.507526377015339e-05, "loss": 0.2175, "step": 5669 }, { "epoch": 0.48370585224364443, "grad_norm": 1.5282022936410995, "learning_rate": 5.5061519661332175e-05, "loss": 0.2227, "step": 5670 }, { "epoch": 0.48379116191776145, "grad_norm": 1.6785671312340917, "learning_rate": 5.504777516609082e-05, "loss": 0.2594, "step": 5671 }, { "epoch": 0.4838764715918785, "grad_norm": 1.325143316903214, "learning_rate": 5.503403028547867e-05, "loss": 0.2314, "step": 5672 }, { "epoch": 0.48396178126599554, "grad_norm": 1.5712248775443345, "learning_rate": 5.5020285020545046e-05, "loss": 0.3149, "step": 5673 }, { "epoch": 0.4840470909401126, "grad_norm": 1.7620532612712478, "learning_rate": 5.500653937233935e-05, "loss": 0.2539, "step": 5674 }, { "epoch": 0.48413240061422963, "grad_norm": 1.5699642421513653, "learning_rate": 5.499279334191096e-05, "loss": 0.2768, "step": 5675 }, { "epoch": 0.4842177102883467, "grad_norm": 1.5814277910842771, "learning_rate": 5.497904693030934e-05, "loss": 0.2763, "step": 5676 }, { "epoch": 0.4843030199624637, "grad_norm": 1.6796111333475783, "learning_rate": 5.4965300138583955e-05, "loss": 0.2967, "step": 5677 }, { "epoch": 0.4843883296365808, "grad_norm": 1.4685107097085839, "learning_rate": 5.495155296778428e-05, "loss": 0.2271, "step": 5678 }, { "epoch": 0.4844736393106978, "grad_norm": 1.7699973058949459, "learning_rate": 5.4937805418959844e-05, "loss": 0.2771, "step": 5679 }, { "epoch": 0.4845589489848149, "grad_norm": 1.6935787212860438, "learning_rate": 5.492405749316021e-05, "loss": 0.3059, "step": 5680 }, { "epoch": 0.4846442586589319, "grad_norm": 1.3703309026371964, "learning_rate": 5.491030919143494e-05, "loss": 0.2693, "step": 5681 }, { "epoch": 0.484729568333049, "grad_norm": 1.5712383823981926, "learning_rate": 5.4896560514833675e-05, "loss": 0.2063, "step": 5682 }, { "epoch": 0.484814878007166, "grad_norm": 1.599912658930116, "learning_rate": 5.4882811464406026e-05, "loss": 0.2361, "step": 5683 }, { "epoch": 0.48490018768128307, "grad_norm": 1.7740704587428957, "learning_rate": 5.486906204120168e-05, "loss": 0.3197, "step": 5684 }, { "epoch": 0.4849854973554001, "grad_norm": 1.4158921181320978, "learning_rate": 5.4855312246270306e-05, "loss": 0.2786, "step": 5685 }, { "epoch": 0.48507080702951716, "grad_norm": 1.5856724409525247, "learning_rate": 5.4841562080661656e-05, "loss": 0.2273, "step": 5686 }, { "epoch": 0.4851561167036342, "grad_norm": 1.4328558163350817, "learning_rate": 5.4827811545425454e-05, "loss": 0.2699, "step": 5687 }, { "epoch": 0.48524142637775125, "grad_norm": 1.5479991950740248, "learning_rate": 5.481406064161151e-05, "loss": 0.287, "step": 5688 }, { "epoch": 0.48532673605186827, "grad_norm": 1.8675872502505964, "learning_rate": 5.4800309370269607e-05, "loss": 0.3337, "step": 5689 }, { "epoch": 0.48541204572598534, "grad_norm": 1.1728641150419816, "learning_rate": 5.4786557732449594e-05, "loss": 0.2716, "step": 5690 }, { "epoch": 0.48549735540010236, "grad_norm": 1.6281048383581012, "learning_rate": 5.4772805729201346e-05, "loss": 0.2218, "step": 5691 }, { "epoch": 0.48558266507421943, "grad_norm": 1.9505223137137309, "learning_rate": 5.475905336157473e-05, "loss": 0.2504, "step": 5692 }, { "epoch": 0.48566797474833645, "grad_norm": 1.4840665798006671, "learning_rate": 5.474530063061968e-05, "loss": 0.2431, "step": 5693 }, { "epoch": 0.4857532844224535, "grad_norm": 1.5397172493651403, "learning_rate": 5.473154753738616e-05, "loss": 0.2317, "step": 5694 }, { "epoch": 0.48583859409657054, "grad_norm": 1.3791150159278205, "learning_rate": 5.471779408292411e-05, "loss": 0.2463, "step": 5695 }, { "epoch": 0.4859239037706876, "grad_norm": 1.746608649678349, "learning_rate": 5.4704040268283564e-05, "loss": 0.2391, "step": 5696 }, { "epoch": 0.48600921344480463, "grad_norm": 1.367807433040752, "learning_rate": 5.4690286094514534e-05, "loss": 0.1893, "step": 5697 }, { "epoch": 0.4860945231189217, "grad_norm": 1.7683232038509216, "learning_rate": 5.4676531562667076e-05, "loss": 0.3157, "step": 5698 }, { "epoch": 0.4861798327930387, "grad_norm": 2.157574233173745, "learning_rate": 5.4662776673791307e-05, "loss": 0.2823, "step": 5699 }, { "epoch": 0.4862651424671558, "grad_norm": 1.7209687217427982, "learning_rate": 5.46490214289373e-05, "loss": 0.2762, "step": 5700 }, { "epoch": 0.4863504521412728, "grad_norm": 1.2170990741621217, "learning_rate": 5.463526582915521e-05, "loss": 0.241, "step": 5701 }, { "epoch": 0.4864357618153899, "grad_norm": 1.6927566599671913, "learning_rate": 5.4621509875495216e-05, "loss": 0.2761, "step": 5702 }, { "epoch": 0.4865210714895069, "grad_norm": 1.611271749724031, "learning_rate": 5.460775356900749e-05, "loss": 0.2925, "step": 5703 }, { "epoch": 0.486606381163624, "grad_norm": 1.9202289510357227, "learning_rate": 5.4593996910742275e-05, "loss": 0.2688, "step": 5704 }, { "epoch": 0.486691690837741, "grad_norm": 1.4995023378695498, "learning_rate": 5.458023990174981e-05, "loss": 0.2565, "step": 5705 }, { "epoch": 0.48677700051185807, "grad_norm": 1.7232781150624026, "learning_rate": 5.456648254308037e-05, "loss": 0.3275, "step": 5706 }, { "epoch": 0.4868623101859751, "grad_norm": 1.5139997754720422, "learning_rate": 5.4552724835784244e-05, "loss": 0.2853, "step": 5707 }, { "epoch": 0.4869476198600921, "grad_norm": 1.6474067394158105, "learning_rate": 5.4538966780911774e-05, "loss": 0.2274, "step": 5708 }, { "epoch": 0.4870329295342092, "grad_norm": 1.5242384150188224, "learning_rate": 5.45252083795133e-05, "loss": 0.2756, "step": 5709 }, { "epoch": 0.4871182392083262, "grad_norm": 1.4986092159780533, "learning_rate": 5.451144963263922e-05, "loss": 0.3001, "step": 5710 }, { "epoch": 0.48720354888244327, "grad_norm": 1.6196593039382512, "learning_rate": 5.449769054133994e-05, "loss": 0.2335, "step": 5711 }, { "epoch": 0.4872888585565603, "grad_norm": 1.5543432573452567, "learning_rate": 5.448393110666588e-05, "loss": 0.3022, "step": 5712 }, { "epoch": 0.48737416823067736, "grad_norm": 1.5800794830127818, "learning_rate": 5.4470171329667506e-05, "loss": 0.2472, "step": 5713 }, { "epoch": 0.4874594779047944, "grad_norm": 1.4116277229419965, "learning_rate": 5.445641121139532e-05, "loss": 0.2168, "step": 5714 }, { "epoch": 0.48754478757891145, "grad_norm": 1.6505376806710417, "learning_rate": 5.4442650752899814e-05, "loss": 0.2412, "step": 5715 }, { "epoch": 0.48763009725302847, "grad_norm": 1.444314230445612, "learning_rate": 5.442888995523153e-05, "loss": 0.2832, "step": 5716 }, { "epoch": 0.48771540692714554, "grad_norm": 1.6026988394412374, "learning_rate": 5.441512881944104e-05, "loss": 0.2675, "step": 5717 }, { "epoch": 0.48780071660126256, "grad_norm": 1.8572372842194989, "learning_rate": 5.440136734657891e-05, "loss": 0.2853, "step": 5718 }, { "epoch": 0.48788602627537964, "grad_norm": 1.9609094746931293, "learning_rate": 5.4387605537695784e-05, "loss": 0.3093, "step": 5719 }, { "epoch": 0.48797133594949665, "grad_norm": 1.598032713468097, "learning_rate": 5.437384339384228e-05, "loss": 0.2204, "step": 5720 }, { "epoch": 0.4880566456236137, "grad_norm": 1.763386548049981, "learning_rate": 5.436008091606908e-05, "loss": 0.2617, "step": 5721 }, { "epoch": 0.48814195529773075, "grad_norm": 1.52629017894949, "learning_rate": 5.434631810542687e-05, "loss": 0.3007, "step": 5722 }, { "epoch": 0.4882272649718478, "grad_norm": 1.3378955256240082, "learning_rate": 5.4332554962966384e-05, "loss": 0.2499, "step": 5723 }, { "epoch": 0.48831257464596484, "grad_norm": 1.5072328708502332, "learning_rate": 5.431879148973833e-05, "loss": 0.2897, "step": 5724 }, { "epoch": 0.4883978843200819, "grad_norm": 1.823687915228946, "learning_rate": 5.430502768679351e-05, "loss": 0.2847, "step": 5725 }, { "epoch": 0.4884831939941989, "grad_norm": 1.3723158212847322, "learning_rate": 5.42912635551827e-05, "loss": 0.2816, "step": 5726 }, { "epoch": 0.488568503668316, "grad_norm": 1.584948903412417, "learning_rate": 5.427749909595672e-05, "loss": 0.2484, "step": 5727 }, { "epoch": 0.488653813342433, "grad_norm": 1.4621695153242593, "learning_rate": 5.4263734310166416e-05, "loss": 0.2682, "step": 5728 }, { "epoch": 0.4887391230165501, "grad_norm": 1.2726704446224637, "learning_rate": 5.424996919886265e-05, "loss": 0.2675, "step": 5729 }, { "epoch": 0.4888244326906671, "grad_norm": 1.669141013522363, "learning_rate": 5.423620376309633e-05, "loss": 0.2313, "step": 5730 }, { "epoch": 0.4889097423647842, "grad_norm": 1.570228308702918, "learning_rate": 5.422243800391835e-05, "loss": 0.2547, "step": 5731 }, { "epoch": 0.4889950520389012, "grad_norm": 1.6650577488183478, "learning_rate": 5.420867192237966e-05, "loss": 0.343, "step": 5732 }, { "epoch": 0.4890803617130183, "grad_norm": 1.389359058695369, "learning_rate": 5.4194905519531255e-05, "loss": 0.2226, "step": 5733 }, { "epoch": 0.4891656713871353, "grad_norm": 1.4736782746984212, "learning_rate": 5.418113879642409e-05, "loss": 0.2482, "step": 5734 }, { "epoch": 0.48925098106125237, "grad_norm": 1.5026991718830414, "learning_rate": 5.416737175410921e-05, "loss": 0.2383, "step": 5735 }, { "epoch": 0.4893362907353694, "grad_norm": 1.5280319965667402, "learning_rate": 5.415360439363764e-05, "loss": 0.2619, "step": 5736 }, { "epoch": 0.48942160040948646, "grad_norm": 1.371764190300094, "learning_rate": 5.4139836716060435e-05, "loss": 0.2124, "step": 5737 }, { "epoch": 0.4895069100836035, "grad_norm": 1.9054448663107009, "learning_rate": 5.4126068722428704e-05, "loss": 0.2554, "step": 5738 }, { "epoch": 0.48959221975772055, "grad_norm": 1.6391061927928376, "learning_rate": 5.411230041379356e-05, "loss": 0.2114, "step": 5739 }, { "epoch": 0.48967752943183757, "grad_norm": 1.7722357488665403, "learning_rate": 5.4098531791206105e-05, "loss": 0.2897, "step": 5740 }, { "epoch": 0.48976283910595464, "grad_norm": 1.366874833458017, "learning_rate": 5.408476285571755e-05, "loss": 0.2788, "step": 5741 }, { "epoch": 0.48984814878007166, "grad_norm": 2.07059531529384, "learning_rate": 5.407099360837905e-05, "loss": 0.2902, "step": 5742 }, { "epoch": 0.48993345845418873, "grad_norm": 2.012440139164567, "learning_rate": 5.405722405024183e-05, "loss": 0.3213, "step": 5743 }, { "epoch": 0.49001876812830575, "grad_norm": 1.9891171599951407, "learning_rate": 5.40434541823571e-05, "loss": 0.2998, "step": 5744 }, { "epoch": 0.4901040778024228, "grad_norm": 1.7482673378129276, "learning_rate": 5.402968400577614e-05, "loss": 0.2137, "step": 5745 }, { "epoch": 0.49018938747653984, "grad_norm": 1.2642693971515382, "learning_rate": 5.401591352155021e-05, "loss": 0.2852, "step": 5746 }, { "epoch": 0.49027469715065686, "grad_norm": 1.4408727139146806, "learning_rate": 5.400214273073065e-05, "loss": 0.2563, "step": 5747 }, { "epoch": 0.49036000682477393, "grad_norm": 1.4284707817318052, "learning_rate": 5.398837163436873e-05, "loss": 0.2212, "step": 5748 }, { "epoch": 0.49044531649889095, "grad_norm": 1.6549439949171258, "learning_rate": 5.397460023351585e-05, "loss": 0.2889, "step": 5749 }, { "epoch": 0.490530626173008, "grad_norm": 1.7485694487744832, "learning_rate": 5.396082852922334e-05, "loss": 0.2751, "step": 5750 }, { "epoch": 0.49061593584712504, "grad_norm": 1.4793485968376676, "learning_rate": 5.394705652254264e-05, "loss": 0.2066, "step": 5751 }, { "epoch": 0.4907012455212421, "grad_norm": 1.432783266724, "learning_rate": 5.393328421452514e-05, "loss": 0.2389, "step": 5752 }, { "epoch": 0.49078655519535913, "grad_norm": 1.7292849795145842, "learning_rate": 5.391951160622228e-05, "loss": 0.3044, "step": 5753 }, { "epoch": 0.4908718648694762, "grad_norm": 1.0897995587758813, "learning_rate": 5.390573869868556e-05, "loss": 0.1819, "step": 5754 }, { "epoch": 0.4909571745435932, "grad_norm": 1.4903129266957051, "learning_rate": 5.389196549296644e-05, "loss": 0.2811, "step": 5755 }, { "epoch": 0.4910424842177103, "grad_norm": 1.4548832910142353, "learning_rate": 5.387819199011642e-05, "loss": 0.2864, "step": 5756 }, { "epoch": 0.4911277938918273, "grad_norm": 1.5708548145528698, "learning_rate": 5.3864418191187074e-05, "loss": 0.2944, "step": 5757 }, { "epoch": 0.4912131035659444, "grad_norm": 1.6550879810607895, "learning_rate": 5.385064409722992e-05, "loss": 0.2642, "step": 5758 }, { "epoch": 0.4912984132400614, "grad_norm": 1.6626854871945544, "learning_rate": 5.383686970929657e-05, "loss": 0.2833, "step": 5759 }, { "epoch": 0.4913837229141785, "grad_norm": 1.595901010072619, "learning_rate": 5.3823095028438585e-05, "loss": 0.2842, "step": 5760 }, { "epoch": 0.4914690325882955, "grad_norm": 1.4249305172761728, "learning_rate": 5.380932005570761e-05, "loss": 0.2405, "step": 5761 }, { "epoch": 0.49155434226241257, "grad_norm": 1.6056317991452056, "learning_rate": 5.3795544792155306e-05, "loss": 0.2679, "step": 5762 }, { "epoch": 0.4916396519365296, "grad_norm": 1.680322571394912, "learning_rate": 5.3781769238833315e-05, "loss": 0.2418, "step": 5763 }, { "epoch": 0.49172496161064666, "grad_norm": 1.5647632229635284, "learning_rate": 5.3767993396793335e-05, "loss": 0.2336, "step": 5764 }, { "epoch": 0.4918102712847637, "grad_norm": 1.3745623672395788, "learning_rate": 5.37542172670871e-05, "loss": 0.2986, "step": 5765 }, { "epoch": 0.49189558095888075, "grad_norm": 1.565186594116451, "learning_rate": 5.37404408507663e-05, "loss": 0.2599, "step": 5766 }, { "epoch": 0.49198089063299777, "grad_norm": 1.5078433335071981, "learning_rate": 5.372666414888274e-05, "loss": 0.306, "step": 5767 }, { "epoch": 0.49206620030711484, "grad_norm": 1.7437483291891427, "learning_rate": 5.371288716248819e-05, "loss": 0.3229, "step": 5768 }, { "epoch": 0.49215150998123186, "grad_norm": 1.4101122904765173, "learning_rate": 5.36991098926344e-05, "loss": 0.2186, "step": 5769 }, { "epoch": 0.49223681965534893, "grad_norm": 1.885535143366659, "learning_rate": 5.368533234037325e-05, "loss": 0.2467, "step": 5770 }, { "epoch": 0.49232212932946595, "grad_norm": 1.5550256102682323, "learning_rate": 5.3671554506756546e-05, "loss": 0.2411, "step": 5771 }, { "epoch": 0.492407439003583, "grad_norm": 1.9742235057608986, "learning_rate": 5.3657776392836175e-05, "loss": 0.2796, "step": 5772 }, { "epoch": 0.49249274867770004, "grad_norm": 1.4805098153007648, "learning_rate": 5.364399799966402e-05, "loss": 0.3091, "step": 5773 }, { "epoch": 0.4925780583518171, "grad_norm": 1.3829703375613611, "learning_rate": 5.3630219328291965e-05, "loss": 0.2466, "step": 5774 }, { "epoch": 0.49266336802593413, "grad_norm": 1.4342473590381677, "learning_rate": 5.361644037977196e-05, "loss": 0.2615, "step": 5775 }, { "epoch": 0.4927486777000512, "grad_norm": 1.599843661299624, "learning_rate": 5.3602661155155966e-05, "loss": 0.2754, "step": 5776 }, { "epoch": 0.4928339873741682, "grad_norm": 1.6288949064962115, "learning_rate": 5.3588881655495914e-05, "loss": 0.1851, "step": 5777 }, { "epoch": 0.4929192970482853, "grad_norm": 1.504111694571282, "learning_rate": 5.3575101881843824e-05, "loss": 0.2367, "step": 5778 }, { "epoch": 0.4930046067224023, "grad_norm": 1.853648173956069, "learning_rate": 5.3561321835251724e-05, "loss": 0.2741, "step": 5779 }, { "epoch": 0.4930899163965194, "grad_norm": 1.6264504048677995, "learning_rate": 5.3547541516771603e-05, "loss": 0.2947, "step": 5780 }, { "epoch": 0.4931752260706364, "grad_norm": 1.2634134160948929, "learning_rate": 5.353376092745556e-05, "loss": 0.2316, "step": 5781 }, { "epoch": 0.4932605357447535, "grad_norm": 1.7786081365013373, "learning_rate": 5.351998006835562e-05, "loss": 0.3295, "step": 5782 }, { "epoch": 0.4933458454188705, "grad_norm": 1.5509761044096022, "learning_rate": 5.350619894052393e-05, "loss": 0.2267, "step": 5783 }, { "epoch": 0.4934311550929876, "grad_norm": 1.284527935195022, "learning_rate": 5.349241754501257e-05, "loss": 0.1828, "step": 5784 }, { "epoch": 0.4935164647671046, "grad_norm": 1.5697577716059679, "learning_rate": 5.34786358828737e-05, "loss": 0.2593, "step": 5785 }, { "epoch": 0.4936017744412216, "grad_norm": 1.4441979312025572, "learning_rate": 5.3464853955159456e-05, "loss": 0.2556, "step": 5786 }, { "epoch": 0.4936870841153387, "grad_norm": 1.4285845228003802, "learning_rate": 5.345107176292202e-05, "loss": 0.2454, "step": 5787 }, { "epoch": 0.4937723937894557, "grad_norm": 1.632375558313376, "learning_rate": 5.343728930721361e-05, "loss": 0.2439, "step": 5788 }, { "epoch": 0.4938577034635728, "grad_norm": 1.273291924703737, "learning_rate": 5.3423506589086435e-05, "loss": 0.2199, "step": 5789 }, { "epoch": 0.4939430131376898, "grad_norm": 1.4093390974052038, "learning_rate": 5.34097236095927e-05, "loss": 0.2595, "step": 5790 }, { "epoch": 0.49402832281180686, "grad_norm": 1.3539540735285254, "learning_rate": 5.3395940369784706e-05, "loss": 0.2256, "step": 5791 }, { "epoch": 0.4941136324859239, "grad_norm": 1.440170461314077, "learning_rate": 5.338215687071469e-05, "loss": 0.2939, "step": 5792 }, { "epoch": 0.49419894216004095, "grad_norm": 1.4814564170748117, "learning_rate": 5.336837311343498e-05, "loss": 0.2195, "step": 5793 }, { "epoch": 0.494284251834158, "grad_norm": 2.1083314928290533, "learning_rate": 5.3354589098997886e-05, "loss": 0.2644, "step": 5794 }, { "epoch": 0.49436956150827505, "grad_norm": 1.7025054364670766, "learning_rate": 5.3340804828455726e-05, "loss": 0.28, "step": 5795 }, { "epoch": 0.49445487118239206, "grad_norm": 1.4579582640295639, "learning_rate": 5.332702030286089e-05, "loss": 0.278, "step": 5796 }, { "epoch": 0.49454018085650914, "grad_norm": 2.240157108587428, "learning_rate": 5.331323552326573e-05, "loss": 0.2523, "step": 5797 }, { "epoch": 0.49462549053062616, "grad_norm": 1.393940220668922, "learning_rate": 5.329945049072263e-05, "loss": 0.2861, "step": 5798 }, { "epoch": 0.49471080020474323, "grad_norm": 2.0239849749146903, "learning_rate": 5.328566520628403e-05, "loss": 0.305, "step": 5799 }, { "epoch": 0.49479610987886025, "grad_norm": 1.4276718355563696, "learning_rate": 5.327187967100237e-05, "loss": 0.2454, "step": 5800 }, { "epoch": 0.4948814195529773, "grad_norm": 2.0104950673788258, "learning_rate": 5.325809388593005e-05, "loss": 0.2343, "step": 5801 }, { "epoch": 0.49496672922709434, "grad_norm": 1.6559796382555234, "learning_rate": 5.324430785211959e-05, "loss": 0.3067, "step": 5802 }, { "epoch": 0.4950520389012114, "grad_norm": 1.4715160539628211, "learning_rate": 5.323052157062346e-05, "loss": 0.2458, "step": 5803 }, { "epoch": 0.49513734857532843, "grad_norm": 1.7226136217443933, "learning_rate": 5.321673504249418e-05, "loss": 0.1927, "step": 5804 }, { "epoch": 0.4952226582494455, "grad_norm": 1.700790765514048, "learning_rate": 5.320294826878428e-05, "loss": 0.2284, "step": 5805 }, { "epoch": 0.4953079679235625, "grad_norm": 1.148330060643228, "learning_rate": 5.318916125054628e-05, "loss": 0.2253, "step": 5806 }, { "epoch": 0.4953932775976796, "grad_norm": 1.3760498980329623, "learning_rate": 5.317537398883279e-05, "loss": 0.2271, "step": 5807 }, { "epoch": 0.4954785872717966, "grad_norm": 1.5268583575234504, "learning_rate": 5.3161586484696347e-05, "loss": 0.2703, "step": 5808 }, { "epoch": 0.4955638969459137, "grad_norm": 1.5459467096557562, "learning_rate": 5.3147798739189594e-05, "loss": 0.2962, "step": 5809 }, { "epoch": 0.4956492066200307, "grad_norm": 1.3504202541800756, "learning_rate": 5.313401075336513e-05, "loss": 0.2832, "step": 5810 }, { "epoch": 0.4957345162941478, "grad_norm": 1.4477893123832444, "learning_rate": 5.31202225282756e-05, "loss": 0.2384, "step": 5811 }, { "epoch": 0.4958198259682648, "grad_norm": 1.3453299526806017, "learning_rate": 5.3106434064973665e-05, "loss": 0.2529, "step": 5812 }, { "epoch": 0.49590513564238187, "grad_norm": 1.4590746584818914, "learning_rate": 5.309264536451199e-05, "loss": 0.2667, "step": 5813 }, { "epoch": 0.4959904453164989, "grad_norm": 1.3362922420551417, "learning_rate": 5.307885642794327e-05, "loss": 0.2438, "step": 5814 }, { "epoch": 0.49607575499061596, "grad_norm": 1.2951646119882239, "learning_rate": 5.306506725632023e-05, "loss": 0.2511, "step": 5815 }, { "epoch": 0.496161064664733, "grad_norm": 1.7635808944470182, "learning_rate": 5.305127785069558e-05, "loss": 0.3, "step": 5816 }, { "epoch": 0.49624637433885005, "grad_norm": 1.5342427804698409, "learning_rate": 5.30374882121221e-05, "loss": 0.3045, "step": 5817 }, { "epoch": 0.49633168401296707, "grad_norm": 1.7214396845532902, "learning_rate": 5.302369834165253e-05, "loss": 0.2954, "step": 5818 }, { "epoch": 0.49641699368708414, "grad_norm": 1.383938605628136, "learning_rate": 5.3009908240339647e-05, "loss": 0.2127, "step": 5819 }, { "epoch": 0.49650230336120116, "grad_norm": 1.6940828425655505, "learning_rate": 5.299611790923629e-05, "loss": 0.2663, "step": 5820 }, { "epoch": 0.49658761303531823, "grad_norm": 1.5419863893425267, "learning_rate": 5.2982327349395246e-05, "loss": 0.2218, "step": 5821 }, { "epoch": 0.49667292270943525, "grad_norm": 1.6894450810411825, "learning_rate": 5.296853656186934e-05, "loss": 0.2657, "step": 5822 }, { "epoch": 0.49675823238355227, "grad_norm": 1.6107474095609238, "learning_rate": 5.2954745547711446e-05, "loss": 0.2858, "step": 5823 }, { "epoch": 0.49684354205766934, "grad_norm": 1.9403288094610311, "learning_rate": 5.294095430797443e-05, "loss": 0.2969, "step": 5824 }, { "epoch": 0.49692885173178636, "grad_norm": 1.7595877266402264, "learning_rate": 5.2927162843711196e-05, "loss": 0.2922, "step": 5825 }, { "epoch": 0.49701416140590343, "grad_norm": 1.6874260356376158, "learning_rate": 5.291337115597462e-05, "loss": 0.2453, "step": 5826 }, { "epoch": 0.49709947108002045, "grad_norm": 1.210704461100857, "learning_rate": 5.289957924581764e-05, "loss": 0.2535, "step": 5827 }, { "epoch": 0.4971847807541375, "grad_norm": 1.991453327207648, "learning_rate": 5.28857871142932e-05, "loss": 0.2755, "step": 5828 }, { "epoch": 0.49727009042825454, "grad_norm": 1.5219985817740973, "learning_rate": 5.287199476245425e-05, "loss": 0.2813, "step": 5829 }, { "epoch": 0.4973554001023716, "grad_norm": 1.5116917807369032, "learning_rate": 5.285820219135374e-05, "loss": 0.2827, "step": 5830 }, { "epoch": 0.49744070977648863, "grad_norm": 1.79012923109813, "learning_rate": 5.2844409402044707e-05, "loss": 0.2854, "step": 5831 }, { "epoch": 0.4975260194506057, "grad_norm": 1.7359221147873507, "learning_rate": 5.283061639558011e-05, "loss": 0.3226, "step": 5832 }, { "epoch": 0.4976113291247227, "grad_norm": 1.70803677690832, "learning_rate": 5.281682317301302e-05, "loss": 0.2867, "step": 5833 }, { "epoch": 0.4976966387988398, "grad_norm": 2.02566547055586, "learning_rate": 5.280302973539644e-05, "loss": 0.2253, "step": 5834 }, { "epoch": 0.4977819484729568, "grad_norm": 2.116644645436194, "learning_rate": 5.278923608378341e-05, "loss": 0.3218, "step": 5835 }, { "epoch": 0.4978672581470739, "grad_norm": 1.594813422169497, "learning_rate": 5.277544221922705e-05, "loss": 0.2765, "step": 5836 }, { "epoch": 0.4979525678211909, "grad_norm": 1.4478661324809863, "learning_rate": 5.276164814278043e-05, "loss": 0.2041, "step": 5837 }, { "epoch": 0.498037877495308, "grad_norm": 1.772720057354984, "learning_rate": 5.274785385549663e-05, "loss": 0.1893, "step": 5838 }, { "epoch": 0.498123187169425, "grad_norm": 1.4368848728472936, "learning_rate": 5.27340593584288e-05, "loss": 0.2824, "step": 5839 }, { "epoch": 0.49820849684354207, "grad_norm": 1.4924151661339713, "learning_rate": 5.2720264652630055e-05, "loss": 0.267, "step": 5840 }, { "epoch": 0.4982938065176591, "grad_norm": 2.0172682576815872, "learning_rate": 5.2706469739153574e-05, "loss": 0.2994, "step": 5841 }, { "epoch": 0.49837911619177616, "grad_norm": 1.4828115159766064, "learning_rate": 5.269267461905253e-05, "loss": 0.2381, "step": 5842 }, { "epoch": 0.4984644258658932, "grad_norm": 1.4644034982953646, "learning_rate": 5.267887929338006e-05, "loss": 0.2176, "step": 5843 }, { "epoch": 0.49854973554001025, "grad_norm": 1.728912825125884, "learning_rate": 5.2665083763189396e-05, "loss": 0.2423, "step": 5844 }, { "epoch": 0.49863504521412727, "grad_norm": 1.4746109918244448, "learning_rate": 5.265128802953375e-05, "loss": 0.2248, "step": 5845 }, { "epoch": 0.49872035488824434, "grad_norm": 1.4306673954739333, "learning_rate": 5.263749209346634e-05, "loss": 0.2119, "step": 5846 }, { "epoch": 0.49880566456236136, "grad_norm": 1.2311304644346124, "learning_rate": 5.2623695956040445e-05, "loss": 0.2139, "step": 5847 }, { "epoch": 0.49889097423647843, "grad_norm": 1.565707157743297, "learning_rate": 5.260989961830929e-05, "loss": 0.2014, "step": 5848 }, { "epoch": 0.49897628391059545, "grad_norm": 1.820710560248006, "learning_rate": 5.259610308132618e-05, "loss": 0.3001, "step": 5849 }, { "epoch": 0.4990615935847125, "grad_norm": 1.3703884473435073, "learning_rate": 5.2582306346144394e-05, "loss": 0.2779, "step": 5850 }, { "epoch": 0.49914690325882954, "grad_norm": 2.267545635769351, "learning_rate": 5.2568509413817236e-05, "loss": 0.2923, "step": 5851 }, { "epoch": 0.4992322129329466, "grad_norm": 1.6061376955425855, "learning_rate": 5.2554712285398034e-05, "loss": 0.224, "step": 5852 }, { "epoch": 0.49931752260706364, "grad_norm": 1.8379992656644033, "learning_rate": 5.2540914961940124e-05, "loss": 0.2576, "step": 5853 }, { "epoch": 0.4994028322811807, "grad_norm": 1.2964594071694537, "learning_rate": 5.2527117444496864e-05, "loss": 0.3014, "step": 5854 }, { "epoch": 0.4994881419552977, "grad_norm": 1.5114715926125302, "learning_rate": 5.251331973412162e-05, "loss": 0.2937, "step": 5855 }, { "epoch": 0.4995734516294148, "grad_norm": 1.8027539484649742, "learning_rate": 5.249952183186776e-05, "loss": 0.2566, "step": 5856 }, { "epoch": 0.4996587613035318, "grad_norm": 1.4205643198771252, "learning_rate": 5.24857237387887e-05, "loss": 0.262, "step": 5857 }, { "epoch": 0.4997440709776489, "grad_norm": 2.055350889149309, "learning_rate": 5.247192545593783e-05, "loss": 0.28, "step": 5858 }, { "epoch": 0.4998293806517659, "grad_norm": 1.4797012634603999, "learning_rate": 5.245812698436858e-05, "loss": 0.2711, "step": 5859 }, { "epoch": 0.499914690325883, "grad_norm": 1.7482080822139743, "learning_rate": 5.2444328325134415e-05, "loss": 0.295, "step": 5860 }, { "epoch": 0.5, "grad_norm": 1.6863375297886776, "learning_rate": 5.243052947928876e-05, "loss": 0.2783, "step": 5861 }, { "epoch": 0.500085309674117, "grad_norm": 1.5120959538283225, "learning_rate": 5.241673044788511e-05, "loss": 0.2329, "step": 5862 }, { "epoch": 0.500170619348234, "grad_norm": 1.891059951562145, "learning_rate": 5.240293123197694e-05, "loss": 0.2778, "step": 5863 }, { "epoch": 0.5002559290223512, "grad_norm": 1.3542357891732193, "learning_rate": 5.238913183261771e-05, "loss": 0.3032, "step": 5864 }, { "epoch": 0.5003412386964682, "grad_norm": 1.5950554381873732, "learning_rate": 5.237533225086098e-05, "loss": 0.3291, "step": 5865 }, { "epoch": 0.5004265483705852, "grad_norm": 1.8420390579688488, "learning_rate": 5.236153248776025e-05, "loss": 0.2602, "step": 5866 }, { "epoch": 0.5005118580447022, "grad_norm": 1.4708248660498557, "learning_rate": 5.2347732544369055e-05, "loss": 0.2653, "step": 5867 }, { "epoch": 0.5005971677188193, "grad_norm": 1.442753108278878, "learning_rate": 5.2333932421740975e-05, "loss": 0.2493, "step": 5868 }, { "epoch": 0.5006824773929364, "grad_norm": 1.3726463113423903, "learning_rate": 5.2320132120929533e-05, "loss": 0.2928, "step": 5869 }, { "epoch": 0.5007677870670534, "grad_norm": 1.9623386802702012, "learning_rate": 5.230633164298835e-05, "loss": 0.2535, "step": 5870 }, { "epoch": 0.5008530967411704, "grad_norm": 1.6361072645302275, "learning_rate": 5.2292530988971e-05, "loss": 0.2711, "step": 5871 }, { "epoch": 0.5009384064152875, "grad_norm": 1.5438128130408117, "learning_rate": 5.2278730159931076e-05, "loss": 0.2128, "step": 5872 }, { "epoch": 0.5010237160894045, "grad_norm": 1.8804235854705895, "learning_rate": 5.226492915692224e-05, "loss": 0.2337, "step": 5873 }, { "epoch": 0.5011090257635216, "grad_norm": 1.462496817821725, "learning_rate": 5.225112798099809e-05, "loss": 0.2667, "step": 5874 }, { "epoch": 0.5011943354376386, "grad_norm": 1.5650795052902708, "learning_rate": 5.2237326633212266e-05, "loss": 0.2266, "step": 5875 }, { "epoch": 0.5012796451117557, "grad_norm": 1.1434658156082054, "learning_rate": 5.2223525114618466e-05, "loss": 0.2393, "step": 5876 }, { "epoch": 0.5013649547858727, "grad_norm": 1.993498187770168, "learning_rate": 5.220972342627032e-05, "loss": 0.2419, "step": 5877 }, { "epoch": 0.5014502644599897, "grad_norm": 1.712421348603371, "learning_rate": 5.219592156922154e-05, "loss": 0.2603, "step": 5878 }, { "epoch": 0.5015355741341068, "grad_norm": 1.3861098121088262, "learning_rate": 5.218211954452582e-05, "loss": 0.2577, "step": 5879 }, { "epoch": 0.5016208838082239, "grad_norm": 1.7382686314499862, "learning_rate": 5.216831735323685e-05, "loss": 0.2854, "step": 5880 }, { "epoch": 0.5017061934823409, "grad_norm": 1.5093608278987387, "learning_rate": 5.21545149964084e-05, "loss": 0.2492, "step": 5881 }, { "epoch": 0.5017915031564579, "grad_norm": 1.757952604008616, "learning_rate": 5.2140712475094166e-05, "loss": 0.2855, "step": 5882 }, { "epoch": 0.501876812830575, "grad_norm": 1.4441749013558491, "learning_rate": 5.21269097903479e-05, "loss": 0.2831, "step": 5883 }, { "epoch": 0.5019621225046921, "grad_norm": 1.673840659594257, "learning_rate": 5.21131069432234e-05, "loss": 0.2869, "step": 5884 }, { "epoch": 0.5020474321788091, "grad_norm": 1.622677757623885, "learning_rate": 5.209930393477439e-05, "loss": 0.2872, "step": 5885 }, { "epoch": 0.5021327418529261, "grad_norm": 1.9520603787902224, "learning_rate": 5.2085500766054695e-05, "loss": 0.2975, "step": 5886 }, { "epoch": 0.5022180515270431, "grad_norm": 1.4128255755843133, "learning_rate": 5.207169743811809e-05, "loss": 0.2131, "step": 5887 }, { "epoch": 0.5023033612011603, "grad_norm": 1.5006246855456218, "learning_rate": 5.20578939520184e-05, "loss": 0.3082, "step": 5888 }, { "epoch": 0.5023886708752773, "grad_norm": 1.5862545203947884, "learning_rate": 5.204409030880945e-05, "loss": 0.2845, "step": 5889 }, { "epoch": 0.5024739805493943, "grad_norm": 1.8511905216035383, "learning_rate": 5.2030286509545054e-05, "loss": 0.2454, "step": 5890 }, { "epoch": 0.5025592902235113, "grad_norm": 1.587594505560823, "learning_rate": 5.2016482555279065e-05, "loss": 0.2655, "step": 5891 }, { "epoch": 0.5026445998976284, "grad_norm": 1.5883336499038065, "learning_rate": 5.200267844706537e-05, "loss": 0.3391, "step": 5892 }, { "epoch": 0.5027299095717455, "grad_norm": 1.7045823607639747, "learning_rate": 5.198887418595779e-05, "loss": 0.219, "step": 5893 }, { "epoch": 0.5028152192458625, "grad_norm": 1.376404781594101, "learning_rate": 5.1975069773010255e-05, "loss": 0.2327, "step": 5894 }, { "epoch": 0.5029005289199795, "grad_norm": 1.3447535116671432, "learning_rate": 5.196126520927666e-05, "loss": 0.2102, "step": 5895 }, { "epoch": 0.5029858385940966, "grad_norm": 1.2838632404040276, "learning_rate": 5.194746049581084e-05, "loss": 0.2541, "step": 5896 }, { "epoch": 0.5030711482682136, "grad_norm": 1.4389664177809935, "learning_rate": 5.193365563366679e-05, "loss": 0.2756, "step": 5897 }, { "epoch": 0.5031564579423307, "grad_norm": 1.4791343936175387, "learning_rate": 5.1919850623898395e-05, "loss": 0.266, "step": 5898 }, { "epoch": 0.5032417676164477, "grad_norm": 1.4810329358690528, "learning_rate": 5.1906045467559616e-05, "loss": 0.2263, "step": 5899 }, { "epoch": 0.5033270772905647, "grad_norm": 1.7743239015085717, "learning_rate": 5.18922401657044e-05, "loss": 0.2794, "step": 5900 }, { "epoch": 0.5034123869646818, "grad_norm": 1.5283326364818608, "learning_rate": 5.187843471938668e-05, "loss": 0.2431, "step": 5901 }, { "epoch": 0.5034976966387988, "grad_norm": 1.5806899424613952, "learning_rate": 5.186462912966047e-05, "loss": 0.2224, "step": 5902 }, { "epoch": 0.5035830063129159, "grad_norm": 1.7972482044923637, "learning_rate": 5.1850823397579726e-05, "loss": 0.2937, "step": 5903 }, { "epoch": 0.5036683159870329, "grad_norm": 1.5520907237583155, "learning_rate": 5.183701752419845e-05, "loss": 0.2858, "step": 5904 }, { "epoch": 0.50375362566115, "grad_norm": 1.4247549883702764, "learning_rate": 5.1823211510570656e-05, "loss": 0.2476, "step": 5905 }, { "epoch": 0.503838935335267, "grad_norm": 1.5395827600102094, "learning_rate": 5.180940535775035e-05, "loss": 0.1959, "step": 5906 }, { "epoch": 0.503924245009384, "grad_norm": 1.5977613046145027, "learning_rate": 5.179559906679157e-05, "loss": 0.1785, "step": 5907 }, { "epoch": 0.5040095546835011, "grad_norm": 1.695439276811284, "learning_rate": 5.178179263874833e-05, "loss": 0.2612, "step": 5908 }, { "epoch": 0.5040948643576182, "grad_norm": 1.4875978517804556, "learning_rate": 5.176798607467468e-05, "loss": 0.2477, "step": 5909 }, { "epoch": 0.5041801740317352, "grad_norm": 1.7229606376796687, "learning_rate": 5.17541793756247e-05, "loss": 0.2545, "step": 5910 }, { "epoch": 0.5042654837058522, "grad_norm": 1.6379138161572897, "learning_rate": 5.174037254265245e-05, "loss": 0.1659, "step": 5911 }, { "epoch": 0.5043507933799692, "grad_norm": 1.718331442066315, "learning_rate": 5.172656557681199e-05, "loss": 0.2619, "step": 5912 }, { "epoch": 0.5044361030540864, "grad_norm": 1.6036472057482438, "learning_rate": 5.171275847915744e-05, "loss": 0.2545, "step": 5913 }, { "epoch": 0.5045214127282034, "grad_norm": 1.2877060660479243, "learning_rate": 5.169895125074287e-05, "loss": 0.2508, "step": 5914 }, { "epoch": 0.5046067224023204, "grad_norm": 1.4333168198868853, "learning_rate": 5.168514389262241e-05, "loss": 0.249, "step": 5915 }, { "epoch": 0.5046920320764374, "grad_norm": 2.101871964029643, "learning_rate": 5.167133640585018e-05, "loss": 0.2691, "step": 5916 }, { "epoch": 0.5047773417505546, "grad_norm": 1.5586309213436342, "learning_rate": 5.165752879148027e-05, "loss": 0.2566, "step": 5917 }, { "epoch": 0.5048626514246716, "grad_norm": 1.8421706854883606, "learning_rate": 5.164372105056686e-05, "loss": 0.3007, "step": 5918 }, { "epoch": 0.5049479610987886, "grad_norm": 1.38764759172269, "learning_rate": 5.162991318416408e-05, "loss": 0.2075, "step": 5919 }, { "epoch": 0.5050332707729056, "grad_norm": 1.9937325264574592, "learning_rate": 5.1616105193326084e-05, "loss": 0.2596, "step": 5920 }, { "epoch": 0.5051185804470227, "grad_norm": 1.292742245475485, "learning_rate": 5.1602297079107054e-05, "loss": 0.2384, "step": 5921 }, { "epoch": 0.5052038901211398, "grad_norm": 1.5361952044505145, "learning_rate": 5.1588488842561145e-05, "loss": 0.2387, "step": 5922 }, { "epoch": 0.5052891997952568, "grad_norm": 1.530123315591308, "learning_rate": 5.157468048474257e-05, "loss": 0.2765, "step": 5923 }, { "epoch": 0.5053745094693738, "grad_norm": 1.5117988663767181, "learning_rate": 5.15608720067055e-05, "loss": 0.273, "step": 5924 }, { "epoch": 0.5054598191434909, "grad_norm": 1.6562986546693421, "learning_rate": 5.1547063409504135e-05, "loss": 0.2415, "step": 5925 }, { "epoch": 0.5055451288176079, "grad_norm": 1.5464752913880582, "learning_rate": 5.1533254694192714e-05, "loss": 0.2234, "step": 5926 }, { "epoch": 0.505630438491725, "grad_norm": 1.7044107326095346, "learning_rate": 5.151944586182545e-05, "loss": 0.264, "step": 5927 }, { "epoch": 0.505715748165842, "grad_norm": 1.6801880556393776, "learning_rate": 5.1505636913456555e-05, "loss": 0.2711, "step": 5928 }, { "epoch": 0.5058010578399591, "grad_norm": 1.7662622432729855, "learning_rate": 5.149182785014029e-05, "loss": 0.2745, "step": 5929 }, { "epoch": 0.5058863675140761, "grad_norm": 1.447829657851678, "learning_rate": 5.147801867293088e-05, "loss": 0.2586, "step": 5930 }, { "epoch": 0.5059716771881931, "grad_norm": 1.4287403074988725, "learning_rate": 5.146420938288262e-05, "loss": 0.2784, "step": 5931 }, { "epoch": 0.5060569868623102, "grad_norm": 1.5649817784378997, "learning_rate": 5.145039998104974e-05, "loss": 0.2424, "step": 5932 }, { "epoch": 0.5061422965364273, "grad_norm": 1.4042859242825378, "learning_rate": 5.143659046848653e-05, "loss": 0.208, "step": 5933 }, { "epoch": 0.5062276062105443, "grad_norm": 1.5361281562826763, "learning_rate": 5.1422780846247284e-05, "loss": 0.3082, "step": 5934 }, { "epoch": 0.5063129158846613, "grad_norm": 1.5918382884998115, "learning_rate": 5.1408971115386287e-05, "loss": 0.2759, "step": 5935 }, { "epoch": 0.5063982255587783, "grad_norm": 1.704442275999321, "learning_rate": 5.1395161276957804e-05, "loss": 0.3432, "step": 5936 }, { "epoch": 0.5064835352328955, "grad_norm": 1.4653173062666098, "learning_rate": 5.13813513320162e-05, "loss": 0.2609, "step": 5937 }, { "epoch": 0.5065688449070125, "grad_norm": 1.4745662859753812, "learning_rate": 5.136754128161575e-05, "loss": 0.244, "step": 5938 }, { "epoch": 0.5066541545811295, "grad_norm": 1.3539920205200915, "learning_rate": 5.135373112681079e-05, "loss": 0.2859, "step": 5939 }, { "epoch": 0.5067394642552465, "grad_norm": 1.6660498272335529, "learning_rate": 5.133992086865565e-05, "loss": 0.246, "step": 5940 }, { "epoch": 0.5068247739293635, "grad_norm": 1.786360586919571, "learning_rate": 5.1326110508204675e-05, "loss": 0.2163, "step": 5941 }, { "epoch": 0.5069100836034807, "grad_norm": 1.5340262954925026, "learning_rate": 5.1312300046512205e-05, "loss": 0.2207, "step": 5942 }, { "epoch": 0.5069953932775977, "grad_norm": 1.4919979114824664, "learning_rate": 5.1298489484632605e-05, "loss": 0.2705, "step": 5943 }, { "epoch": 0.5070807029517147, "grad_norm": 1.3960971535511508, "learning_rate": 5.1284678823620225e-05, "loss": 0.2755, "step": 5944 }, { "epoch": 0.5071660126258317, "grad_norm": 1.4268576543341005, "learning_rate": 5.127086806452945e-05, "loss": 0.2358, "step": 5945 }, { "epoch": 0.5072513222999488, "grad_norm": 1.9368955838444324, "learning_rate": 5.125705720841465e-05, "loss": 0.3159, "step": 5946 }, { "epoch": 0.5073366319740659, "grad_norm": 1.3739654810760265, "learning_rate": 5.124324625633021e-05, "loss": 0.2272, "step": 5947 }, { "epoch": 0.5074219416481829, "grad_norm": 1.3135692237587173, "learning_rate": 5.122943520933054e-05, "loss": 0.2537, "step": 5948 }, { "epoch": 0.5075072513222999, "grad_norm": 1.843229058588964, "learning_rate": 5.1215624068470014e-05, "loss": 0.2673, "step": 5949 }, { "epoch": 0.507592560996417, "grad_norm": 1.71033428828564, "learning_rate": 5.120181283480305e-05, "loss": 0.2342, "step": 5950 }, { "epoch": 0.507677870670534, "grad_norm": 1.270643058457138, "learning_rate": 5.118800150938407e-05, "loss": 0.2001, "step": 5951 }, { "epoch": 0.5077631803446511, "grad_norm": 1.9015421030146435, "learning_rate": 5.117419009326747e-05, "loss": 0.184, "step": 5952 }, { "epoch": 0.5078484900187681, "grad_norm": 1.7380986835746681, "learning_rate": 5.1160378587507716e-05, "loss": 0.233, "step": 5953 }, { "epoch": 0.5079337996928852, "grad_norm": 1.4798515059749502, "learning_rate": 5.1146566993159205e-05, "loss": 0.2688, "step": 5954 }, { "epoch": 0.5080191093670022, "grad_norm": 1.7938274247210617, "learning_rate": 5.1132755311276405e-05, "loss": 0.2966, "step": 5955 }, { "epoch": 0.5081044190411192, "grad_norm": 1.473903947058223, "learning_rate": 5.111894354291376e-05, "loss": 0.2553, "step": 5956 }, { "epoch": 0.5081897287152363, "grad_norm": 1.696379505968058, "learning_rate": 5.110513168912571e-05, "loss": 0.2746, "step": 5957 }, { "epoch": 0.5082750383893534, "grad_norm": 1.9283049414458777, "learning_rate": 5.109131975096675e-05, "loss": 0.2792, "step": 5958 }, { "epoch": 0.5083603480634704, "grad_norm": 1.9694061396929539, "learning_rate": 5.10775077294913e-05, "loss": 0.2454, "step": 5959 }, { "epoch": 0.5084456577375874, "grad_norm": 1.562864032300786, "learning_rate": 5.106369562575388e-05, "loss": 0.1917, "step": 5960 }, { "epoch": 0.5085309674117044, "grad_norm": 1.5029148549716833, "learning_rate": 5.104988344080896e-05, "loss": 0.2809, "step": 5961 }, { "epoch": 0.5086162770858216, "grad_norm": 2.008763425790468, "learning_rate": 5.1036071175710986e-05, "loss": 0.3273, "step": 5962 }, { "epoch": 0.5087015867599386, "grad_norm": 2.5782060090255756, "learning_rate": 5.1022258831514504e-05, "loss": 0.3289, "step": 5963 }, { "epoch": 0.5087868964340556, "grad_norm": 1.4236062378011682, "learning_rate": 5.100844640927399e-05, "loss": 0.2444, "step": 5964 }, { "epoch": 0.5088722061081726, "grad_norm": 1.266446259211943, "learning_rate": 5.099463391004394e-05, "loss": 0.3101, "step": 5965 }, { "epoch": 0.5089575157822898, "grad_norm": 1.6851502058958021, "learning_rate": 5.098082133487889e-05, "loss": 0.2374, "step": 5966 }, { "epoch": 0.5090428254564068, "grad_norm": 1.365865318749289, "learning_rate": 5.096700868483334e-05, "loss": 0.2127, "step": 5967 }, { "epoch": 0.5091281351305238, "grad_norm": 1.5783890607127062, "learning_rate": 5.095319596096182e-05, "loss": 0.2597, "step": 5968 }, { "epoch": 0.5092134448046408, "grad_norm": 1.3119395739840773, "learning_rate": 5.0939383164318865e-05, "loss": 0.2747, "step": 5969 }, { "epoch": 0.5092987544787579, "grad_norm": 1.633303627730143, "learning_rate": 5.092557029595897e-05, "loss": 0.2504, "step": 5970 }, { "epoch": 0.509384064152875, "grad_norm": 1.6510405756067308, "learning_rate": 5.091175735693672e-05, "loss": 0.2689, "step": 5971 }, { "epoch": 0.509469373826992, "grad_norm": 1.4744206794108212, "learning_rate": 5.0897944348306636e-05, "loss": 0.2119, "step": 5972 }, { "epoch": 0.509554683501109, "grad_norm": 1.1878273161476167, "learning_rate": 5.088413127112326e-05, "loss": 0.237, "step": 5973 }, { "epoch": 0.5096399931752261, "grad_norm": 1.2452400176853045, "learning_rate": 5.087031812644118e-05, "loss": 0.1993, "step": 5974 }, { "epoch": 0.5097253028493431, "grad_norm": 1.6236871404425337, "learning_rate": 5.085650491531492e-05, "loss": 0.3029, "step": 5975 }, { "epoch": 0.5098106125234602, "grad_norm": 1.465211867808174, "learning_rate": 5.084269163879907e-05, "loss": 0.296, "step": 5976 }, { "epoch": 0.5098959221975772, "grad_norm": 1.6675938490919109, "learning_rate": 5.082887829794819e-05, "loss": 0.2844, "step": 5977 }, { "epoch": 0.5099812318716942, "grad_norm": 1.5568430923261134, "learning_rate": 5.081506489381684e-05, "loss": 0.2659, "step": 5978 }, { "epoch": 0.5100665415458113, "grad_norm": 1.4907446471365606, "learning_rate": 5.080125142745965e-05, "loss": 0.2201, "step": 5979 }, { "epoch": 0.5101518512199283, "grad_norm": 1.3569559079669398, "learning_rate": 5.078743789993115e-05, "loss": 0.257, "step": 5980 }, { "epoch": 0.5102371608940454, "grad_norm": 1.3181168396698528, "learning_rate": 5.077362431228596e-05, "loss": 0.2129, "step": 5981 }, { "epoch": 0.5103224705681624, "grad_norm": 1.3821497476602438, "learning_rate": 5.075981066557866e-05, "loss": 0.2863, "step": 5982 }, { "epoch": 0.5104077802422795, "grad_norm": 1.683326788533899, "learning_rate": 5.074599696086384e-05, "loss": 0.2579, "step": 5983 }, { "epoch": 0.5104930899163965, "grad_norm": 2.1101607378079015, "learning_rate": 5.073218319919614e-05, "loss": 0.2585, "step": 5984 }, { "epoch": 0.5105783995905135, "grad_norm": 2.2139636785599963, "learning_rate": 5.0718369381630126e-05, "loss": 0.2129, "step": 5985 }, { "epoch": 0.5106637092646306, "grad_norm": 1.7002194319081827, "learning_rate": 5.070455550922043e-05, "loss": 0.2805, "step": 5986 }, { "epoch": 0.5107490189387477, "grad_norm": 1.7149218843425413, "learning_rate": 5.069074158302167e-05, "loss": 0.2792, "step": 5987 }, { "epoch": 0.5108343286128647, "grad_norm": 1.9170668087552287, "learning_rate": 5.0676927604088465e-05, "loss": 0.308, "step": 5988 }, { "epoch": 0.5109196382869817, "grad_norm": 1.3842754922413765, "learning_rate": 5.066311357347542e-05, "loss": 0.2242, "step": 5989 }, { "epoch": 0.5110049479610987, "grad_norm": 1.423947092536067, "learning_rate": 5.06492994922372e-05, "loss": 0.2597, "step": 5990 }, { "epoch": 0.5110902576352159, "grad_norm": 1.7831266455738004, "learning_rate": 5.0635485361428395e-05, "loss": 0.2708, "step": 5991 }, { "epoch": 0.5111755673093329, "grad_norm": 1.5620136266937004, "learning_rate": 5.062167118210367e-05, "loss": 0.2579, "step": 5992 }, { "epoch": 0.5112608769834499, "grad_norm": 1.7714116292903612, "learning_rate": 5.0607856955317646e-05, "loss": 0.2822, "step": 5993 }, { "epoch": 0.5113461866575669, "grad_norm": 1.5552364127947291, "learning_rate": 5.0594042682124976e-05, "loss": 0.2151, "step": 5994 }, { "epoch": 0.511431496331684, "grad_norm": 1.6104401702284303, "learning_rate": 5.0580228363580304e-05, "loss": 0.2601, "step": 5995 }, { "epoch": 0.5115168060058011, "grad_norm": 1.8607368931235033, "learning_rate": 5.056641400073827e-05, "loss": 0.2188, "step": 5996 }, { "epoch": 0.5116021156799181, "grad_norm": 1.802755138736395, "learning_rate": 5.055259959465355e-05, "loss": 0.268, "step": 5997 }, { "epoch": 0.5116874253540351, "grad_norm": 1.4884024342938114, "learning_rate": 5.053878514638078e-05, "loss": 0.2232, "step": 5998 }, { "epoch": 0.5117727350281522, "grad_norm": 1.4516060386345004, "learning_rate": 5.052497065697464e-05, "loss": 0.2614, "step": 5999 }, { "epoch": 0.5118580447022693, "grad_norm": 1.6460958182996765, "learning_rate": 5.0511156127489766e-05, "loss": 0.2786, "step": 6000 }, { "epoch": 0.5119433543763863, "grad_norm": 1.8768283195165538, "learning_rate": 5.049734155898086e-05, "loss": 0.2346, "step": 6001 }, { "epoch": 0.5120286640505033, "grad_norm": 1.456279570971012, "learning_rate": 5.0483526952502545e-05, "loss": 0.2309, "step": 6002 }, { "epoch": 0.5121139737246204, "grad_norm": 1.6673399836730316, "learning_rate": 5.046971230910953e-05, "loss": 0.2704, "step": 6003 }, { "epoch": 0.5121992833987374, "grad_norm": 1.7884110509878686, "learning_rate": 5.045589762985646e-05, "loss": 0.2706, "step": 6004 }, { "epoch": 0.5122845930728545, "grad_norm": 2.0026248158620947, "learning_rate": 5.0442082915798037e-05, "loss": 0.2263, "step": 6005 }, { "epoch": 0.5123699027469715, "grad_norm": 1.2927887667403968, "learning_rate": 5.0428268167988946e-05, "loss": 0.2263, "step": 6006 }, { "epoch": 0.5124552124210886, "grad_norm": 1.7573451798511983, "learning_rate": 5.041445338748383e-05, "loss": 0.2933, "step": 6007 }, { "epoch": 0.5125405220952056, "grad_norm": 1.6121724823712378, "learning_rate": 5.040063857533742e-05, "loss": 0.2813, "step": 6008 }, { "epoch": 0.5126258317693226, "grad_norm": 1.4268003401270088, "learning_rate": 5.038682373260438e-05, "loss": 0.2399, "step": 6009 }, { "epoch": 0.5127111414434397, "grad_norm": 1.6980126320646507, "learning_rate": 5.03730088603394e-05, "loss": 0.2638, "step": 6010 }, { "epoch": 0.5127964511175568, "grad_norm": 1.428524774402654, "learning_rate": 5.035919395959719e-05, "loss": 0.273, "step": 6011 }, { "epoch": 0.5128817607916738, "grad_norm": 1.4359116279006374, "learning_rate": 5.0345379031432414e-05, "loss": 0.1872, "step": 6012 }, { "epoch": 0.5129670704657908, "grad_norm": 1.5418454487554518, "learning_rate": 5.033156407689978e-05, "loss": 0.2958, "step": 6013 }, { "epoch": 0.5130523801399078, "grad_norm": 1.3424523763893954, "learning_rate": 5.031774909705401e-05, "loss": 0.2151, "step": 6014 }, { "epoch": 0.5131376898140249, "grad_norm": 1.806661871707836, "learning_rate": 5.030393409294977e-05, "loss": 0.2766, "step": 6015 }, { "epoch": 0.513222999488142, "grad_norm": 1.2867437604593919, "learning_rate": 5.029011906564178e-05, "loss": 0.243, "step": 6016 }, { "epoch": 0.513308309162259, "grad_norm": 1.8251861686334625, "learning_rate": 5.027630401618475e-05, "loss": 0.2373, "step": 6017 }, { "epoch": 0.513393618836376, "grad_norm": 1.4270801219300755, "learning_rate": 5.026248894563336e-05, "loss": 0.1937, "step": 6018 }, { "epoch": 0.513478928510493, "grad_norm": 1.242225262529758, "learning_rate": 5.024867385504234e-05, "loss": 0.189, "step": 6019 }, { "epoch": 0.5135642381846102, "grad_norm": 1.3838365286881662, "learning_rate": 5.023485874546639e-05, "loss": 0.2162, "step": 6020 }, { "epoch": 0.5136495478587272, "grad_norm": 1.8096984389720019, "learning_rate": 5.022104361796023e-05, "loss": 0.2741, "step": 6021 }, { "epoch": 0.5137348575328442, "grad_norm": 1.7275098040201797, "learning_rate": 5.020722847357858e-05, "loss": 0.2349, "step": 6022 }, { "epoch": 0.5138201672069612, "grad_norm": 1.5447280318498828, "learning_rate": 5.019341331337612e-05, "loss": 0.2582, "step": 6023 }, { "epoch": 0.5139054768810783, "grad_norm": 1.613788294142059, "learning_rate": 5.0179598138407566e-05, "loss": 0.2469, "step": 6024 }, { "epoch": 0.5139907865551954, "grad_norm": 1.4098013222504064, "learning_rate": 5.016578294972768e-05, "loss": 0.2365, "step": 6025 }, { "epoch": 0.5140760962293124, "grad_norm": 1.6382471200192055, "learning_rate": 5.0151967748391116e-05, "loss": 0.2289, "step": 6026 }, { "epoch": 0.5141614059034294, "grad_norm": 2.018735865493281, "learning_rate": 5.0138152535452646e-05, "loss": 0.2338, "step": 6027 }, { "epoch": 0.5142467155775465, "grad_norm": 1.46197921414087, "learning_rate": 5.012433731196694e-05, "loss": 0.2473, "step": 6028 }, { "epoch": 0.5143320252516635, "grad_norm": 1.6489933758030633, "learning_rate": 5.0110522078988764e-05, "loss": 0.2537, "step": 6029 }, { "epoch": 0.5144173349257806, "grad_norm": 1.6875360273118147, "learning_rate": 5.00967068375728e-05, "loss": 0.2209, "step": 6030 }, { "epoch": 0.5145026445998976, "grad_norm": 1.2821486508245756, "learning_rate": 5.0082891588773784e-05, "loss": 0.2369, "step": 6031 }, { "epoch": 0.5145879542740147, "grad_norm": 1.565641221135638, "learning_rate": 5.006907633364646e-05, "loss": 0.2631, "step": 6032 }, { "epoch": 0.5146732639481317, "grad_norm": 1.7750141008918403, "learning_rate": 5.005526107324551e-05, "loss": 0.2201, "step": 6033 }, { "epoch": 0.5147585736222487, "grad_norm": 2.004698955357469, "learning_rate": 5.0041445808625656e-05, "loss": 0.255, "step": 6034 }, { "epoch": 0.5148438832963658, "grad_norm": 1.4982769129267954, "learning_rate": 5.002763054084164e-05, "loss": 0.2433, "step": 6035 }, { "epoch": 0.5149291929704829, "grad_norm": 1.4779057926924757, "learning_rate": 5.001381527094818e-05, "loss": 0.2237, "step": 6036 }, { "epoch": 0.5150145026445999, "grad_norm": 1.4681732993465255, "learning_rate": 5e-05, "loss": 0.2543, "step": 6037 }, { "epoch": 0.5150998123187169, "grad_norm": 1.96045450896313, "learning_rate": 4.9986184729051824e-05, "loss": 0.2304, "step": 6038 }, { "epoch": 0.515185121992834, "grad_norm": 1.27931681610174, "learning_rate": 4.997236945915838e-05, "loss": 0.2173, "step": 6039 }, { "epoch": 0.5152704316669511, "grad_norm": 1.4791125524947004, "learning_rate": 4.9958554191374356e-05, "loss": 0.2329, "step": 6040 }, { "epoch": 0.5153557413410681, "grad_norm": 1.7981394423831958, "learning_rate": 4.994473892675451e-05, "loss": 0.2309, "step": 6041 }, { "epoch": 0.5154410510151851, "grad_norm": 1.6179389866145597, "learning_rate": 4.9930923666353565e-05, "loss": 0.2138, "step": 6042 }, { "epoch": 0.5155263606893021, "grad_norm": 1.9957482921770318, "learning_rate": 4.991710841122623e-05, "loss": 0.2723, "step": 6043 }, { "epoch": 0.5156116703634193, "grad_norm": 1.4708183010439015, "learning_rate": 4.990329316242721e-05, "loss": 0.2407, "step": 6044 }, { "epoch": 0.5156969800375363, "grad_norm": 1.6360246374611487, "learning_rate": 4.988947792101124e-05, "loss": 0.2709, "step": 6045 }, { "epoch": 0.5157822897116533, "grad_norm": 1.807597600747306, "learning_rate": 4.987566268803307e-05, "loss": 0.2561, "step": 6046 }, { "epoch": 0.5158675993857703, "grad_norm": 1.855492778421718, "learning_rate": 4.9861847464547366e-05, "loss": 0.2188, "step": 6047 }, { "epoch": 0.5159529090598874, "grad_norm": 1.4477711153857042, "learning_rate": 4.984803225160888e-05, "loss": 0.2975, "step": 6048 }, { "epoch": 0.5160382187340045, "grad_norm": 1.4043607100407822, "learning_rate": 4.9834217050272345e-05, "loss": 0.2503, "step": 6049 }, { "epoch": 0.5161235284081215, "grad_norm": 1.6351987203522096, "learning_rate": 4.982040186159244e-05, "loss": 0.2299, "step": 6050 }, { "epoch": 0.5162088380822385, "grad_norm": 1.4997087036846832, "learning_rate": 4.980658668662389e-05, "loss": 0.2832, "step": 6051 }, { "epoch": 0.5162941477563556, "grad_norm": 1.5390939467262437, "learning_rate": 4.9792771526421445e-05, "loss": 0.2647, "step": 6052 }, { "epoch": 0.5163794574304726, "grad_norm": 1.6529288912950049, "learning_rate": 4.977895638203978e-05, "loss": 0.2534, "step": 6053 }, { "epoch": 0.5164647671045897, "grad_norm": 1.3303350572956423, "learning_rate": 4.976514125453361e-05, "loss": 0.2706, "step": 6054 }, { "epoch": 0.5165500767787067, "grad_norm": 1.4952309613134314, "learning_rate": 4.975132614495768e-05, "loss": 0.1865, "step": 6055 }, { "epoch": 0.5166353864528237, "grad_norm": 2.0743932273457015, "learning_rate": 4.973751105436665e-05, "loss": 0.1982, "step": 6056 }, { "epoch": 0.5167206961269408, "grad_norm": 1.5626402219795792, "learning_rate": 4.972369598381527e-05, "loss": 0.2272, "step": 6057 }, { "epoch": 0.5168060058010578, "grad_norm": 1.657537571638001, "learning_rate": 4.9709880934358244e-05, "loss": 0.2566, "step": 6058 }, { "epoch": 0.5168913154751749, "grad_norm": 1.6371085856642171, "learning_rate": 4.9696065907050244e-05, "loss": 0.2871, "step": 6059 }, { "epoch": 0.5169766251492919, "grad_norm": 1.5293123870491623, "learning_rate": 4.9682250902946e-05, "loss": 0.2215, "step": 6060 }, { "epoch": 0.517061934823409, "grad_norm": 1.407426299745421, "learning_rate": 4.966843592310021e-05, "loss": 0.256, "step": 6061 }, { "epoch": 0.517147244497526, "grad_norm": 1.3980684272872053, "learning_rate": 4.96546209685676e-05, "loss": 0.235, "step": 6062 }, { "epoch": 0.517232554171643, "grad_norm": 1.5414158213222064, "learning_rate": 4.964080604040282e-05, "loss": 0.2119, "step": 6063 }, { "epoch": 0.5173178638457601, "grad_norm": 1.3302514499655687, "learning_rate": 4.96269911396606e-05, "loss": 0.2404, "step": 6064 }, { "epoch": 0.5174031735198772, "grad_norm": 1.781857035749392, "learning_rate": 4.961317626739564e-05, "loss": 0.2565, "step": 6065 }, { "epoch": 0.5174884831939942, "grad_norm": 1.5615947389814995, "learning_rate": 4.959936142466259e-05, "loss": 0.2289, "step": 6066 }, { "epoch": 0.5175737928681112, "grad_norm": 1.779641077308621, "learning_rate": 4.958554661251616e-05, "loss": 0.281, "step": 6067 }, { "epoch": 0.5176591025422282, "grad_norm": 1.709941696154593, "learning_rate": 4.957173183201108e-05, "loss": 0.287, "step": 6068 }, { "epoch": 0.5177444122163454, "grad_norm": 1.6856679508223698, "learning_rate": 4.955791708420197e-05, "loss": 0.2451, "step": 6069 }, { "epoch": 0.5178297218904624, "grad_norm": 1.407234864273309, "learning_rate": 4.954410237014354e-05, "loss": 0.1872, "step": 6070 }, { "epoch": 0.5179150315645794, "grad_norm": 1.667311742241396, "learning_rate": 4.95302876908905e-05, "loss": 0.2788, "step": 6071 }, { "epoch": 0.5180003412386964, "grad_norm": 1.4560974237629203, "learning_rate": 4.9516473047497466e-05, "loss": 0.2166, "step": 6072 }, { "epoch": 0.5180856509128136, "grad_norm": 1.5240722898724588, "learning_rate": 4.950265844101915e-05, "loss": 0.2946, "step": 6073 }, { "epoch": 0.5181709605869306, "grad_norm": 1.4415332769230693, "learning_rate": 4.9488843872510245e-05, "loss": 0.255, "step": 6074 }, { "epoch": 0.5182562702610476, "grad_norm": 1.3874215078060725, "learning_rate": 4.9475029343025375e-05, "loss": 0.2145, "step": 6075 }, { "epoch": 0.5183415799351646, "grad_norm": 1.5309790546721163, "learning_rate": 4.946121485361922e-05, "loss": 0.2363, "step": 6076 }, { "epoch": 0.5184268896092817, "grad_norm": 1.7659092227073898, "learning_rate": 4.944740040534645e-05, "loss": 0.2454, "step": 6077 }, { "epoch": 0.5185121992833988, "grad_norm": 1.5537326506722495, "learning_rate": 4.943358599926174e-05, "loss": 0.2111, "step": 6078 }, { "epoch": 0.5185975089575158, "grad_norm": 1.9903379344108647, "learning_rate": 4.941977163641971e-05, "loss": 0.2433, "step": 6079 }, { "epoch": 0.5186828186316328, "grad_norm": 2.047578181263473, "learning_rate": 4.940595731787503e-05, "loss": 0.3465, "step": 6080 }, { "epoch": 0.5187681283057499, "grad_norm": 1.7207797463165506, "learning_rate": 4.939214304468237e-05, "loss": 0.1874, "step": 6081 }, { "epoch": 0.5188534379798669, "grad_norm": 1.8663536987315457, "learning_rate": 4.9378328817896347e-05, "loss": 0.2617, "step": 6082 }, { "epoch": 0.518938747653984, "grad_norm": 1.5096833476832872, "learning_rate": 4.936451463857161e-05, "loss": 0.2837, "step": 6083 }, { "epoch": 0.519024057328101, "grad_norm": 2.0721623056094263, "learning_rate": 4.935070050776282e-05, "loss": 0.2781, "step": 6084 }, { "epoch": 0.5191093670022181, "grad_norm": 1.6137317831959943, "learning_rate": 4.933688642652459e-05, "loss": 0.2299, "step": 6085 }, { "epoch": 0.5191946766763351, "grad_norm": 1.3147303840057354, "learning_rate": 4.932307239591155e-05, "loss": 0.1715, "step": 6086 }, { "epoch": 0.5192799863504521, "grad_norm": 1.7054377260763331, "learning_rate": 4.9309258416978346e-05, "loss": 0.2944, "step": 6087 }, { "epoch": 0.5193652960245692, "grad_norm": 1.9656520871963, "learning_rate": 4.929544449077958e-05, "loss": 0.2616, "step": 6088 }, { "epoch": 0.5194506056986863, "grad_norm": 1.2889666434974225, "learning_rate": 4.928163061836988e-05, "loss": 0.2557, "step": 6089 }, { "epoch": 0.5195359153728033, "grad_norm": 1.371358601347691, "learning_rate": 4.926781680080386e-05, "loss": 0.2045, "step": 6090 }, { "epoch": 0.5196212250469203, "grad_norm": 1.7430685236204855, "learning_rate": 4.925400303913616e-05, "loss": 0.2495, "step": 6091 }, { "epoch": 0.5197065347210373, "grad_norm": 1.3636359597696806, "learning_rate": 4.924018933442135e-05, "loss": 0.286, "step": 6092 }, { "epoch": 0.5197918443951544, "grad_norm": 1.614711026300313, "learning_rate": 4.9226375687714044e-05, "loss": 0.1984, "step": 6093 }, { "epoch": 0.5198771540692715, "grad_norm": 1.694520545598538, "learning_rate": 4.921256210006886e-05, "loss": 0.2353, "step": 6094 }, { "epoch": 0.5199624637433885, "grad_norm": 1.297880449396946, "learning_rate": 4.919874857254036e-05, "loss": 0.1816, "step": 6095 }, { "epoch": 0.5200477734175055, "grad_norm": 1.4235377388424557, "learning_rate": 4.9184935106183156e-05, "loss": 0.2687, "step": 6096 }, { "epoch": 0.5201330830916225, "grad_norm": 1.4177748702573179, "learning_rate": 4.917112170205183e-05, "loss": 0.2374, "step": 6097 }, { "epoch": 0.5202183927657397, "grad_norm": 1.4638264696847127, "learning_rate": 4.9157308361200944e-05, "loss": 0.2804, "step": 6098 }, { "epoch": 0.5203037024398567, "grad_norm": 1.818011749506585, "learning_rate": 4.914349508468508e-05, "loss": 0.2532, "step": 6099 }, { "epoch": 0.5203890121139737, "grad_norm": 1.6093887032231007, "learning_rate": 4.912968187355885e-05, "loss": 0.2231, "step": 6100 }, { "epoch": 0.5204743217880907, "grad_norm": 2.00212353981233, "learning_rate": 4.9115868728876745e-05, "loss": 0.3009, "step": 6101 }, { "epoch": 0.5205596314622079, "grad_norm": 1.6235488868075405, "learning_rate": 4.9102055651693376e-05, "loss": 0.2745, "step": 6102 }, { "epoch": 0.5206449411363249, "grad_norm": 1.5797363967949731, "learning_rate": 4.9088242643063304e-05, "loss": 0.2927, "step": 6103 }, { "epoch": 0.5207302508104419, "grad_norm": 1.6353844641493493, "learning_rate": 4.907442970404104e-05, "loss": 0.235, "step": 6104 }, { "epoch": 0.5208155604845589, "grad_norm": 1.4678150204106328, "learning_rate": 4.906061683568115e-05, "loss": 0.2524, "step": 6105 }, { "epoch": 0.520900870158676, "grad_norm": 1.4527966528290996, "learning_rate": 4.904680403903818e-05, "loss": 0.2632, "step": 6106 }, { "epoch": 0.520986179832793, "grad_norm": 1.5271002143596424, "learning_rate": 4.9032991315166674e-05, "loss": 0.2146, "step": 6107 }, { "epoch": 0.5210714895069101, "grad_norm": 1.4095778613200898, "learning_rate": 4.9019178665121115e-05, "loss": 0.2099, "step": 6108 }, { "epoch": 0.5211567991810271, "grad_norm": 1.5981689973398394, "learning_rate": 4.900536608995605e-05, "loss": 0.2745, "step": 6109 }, { "epoch": 0.5212421088551442, "grad_norm": 1.2812221803203567, "learning_rate": 4.899155359072603e-05, "loss": 0.219, "step": 6110 }, { "epoch": 0.5213274185292612, "grad_norm": 1.5020161430720618, "learning_rate": 4.897774116848551e-05, "loss": 0.2128, "step": 6111 }, { "epoch": 0.5214127282033783, "grad_norm": 1.4325700893864282, "learning_rate": 4.896392882428901e-05, "loss": 0.2502, "step": 6112 }, { "epoch": 0.5214980378774953, "grad_norm": 1.4808836987026537, "learning_rate": 4.8950116559191075e-05, "loss": 0.2234, "step": 6113 }, { "epoch": 0.5215833475516124, "grad_norm": 1.8308116375148855, "learning_rate": 4.893630437424613e-05, "loss": 0.2669, "step": 6114 }, { "epoch": 0.5216686572257294, "grad_norm": 1.9278997255659223, "learning_rate": 4.892249227050869e-05, "loss": 0.2558, "step": 6115 }, { "epoch": 0.5217539668998464, "grad_norm": 1.3183263703912695, "learning_rate": 4.890868024903327e-05, "loss": 0.2361, "step": 6116 }, { "epoch": 0.5218392765739635, "grad_norm": 1.7238350984565003, "learning_rate": 4.8894868310874296e-05, "loss": 0.2342, "step": 6117 }, { "epoch": 0.5219245862480806, "grad_norm": 1.6200517729739654, "learning_rate": 4.888105645708625e-05, "loss": 0.2242, "step": 6118 }, { "epoch": 0.5220098959221976, "grad_norm": 1.6265154887651305, "learning_rate": 4.886724468872362e-05, "loss": 0.2842, "step": 6119 }, { "epoch": 0.5220952055963146, "grad_norm": 1.4605549194081755, "learning_rate": 4.8853433006840807e-05, "loss": 0.3254, "step": 6120 }, { "epoch": 0.5221805152704316, "grad_norm": 1.5671535621383752, "learning_rate": 4.8839621412492296e-05, "loss": 0.2741, "step": 6121 }, { "epoch": 0.5222658249445488, "grad_norm": 1.716676536395888, "learning_rate": 4.882580990673253e-05, "loss": 0.2982, "step": 6122 }, { "epoch": 0.5223511346186658, "grad_norm": 1.704314769971804, "learning_rate": 4.881199849061595e-05, "loss": 0.2652, "step": 6123 }, { "epoch": 0.5224364442927828, "grad_norm": 1.9538519155092737, "learning_rate": 4.879818716519696e-05, "loss": 0.2323, "step": 6124 }, { "epoch": 0.5225217539668998, "grad_norm": 1.235008270452197, "learning_rate": 4.8784375931529984e-05, "loss": 0.2406, "step": 6125 }, { "epoch": 0.522607063641017, "grad_norm": 1.4620013112220154, "learning_rate": 4.877056479066947e-05, "loss": 0.2396, "step": 6126 }, { "epoch": 0.522692373315134, "grad_norm": 1.7483489558731362, "learning_rate": 4.875675374366979e-05, "loss": 0.3093, "step": 6127 }, { "epoch": 0.522777682989251, "grad_norm": 1.731339556309138, "learning_rate": 4.874294279158535e-05, "loss": 0.3176, "step": 6128 }, { "epoch": 0.522862992663368, "grad_norm": 2.0483923958730914, "learning_rate": 4.8729131935470565e-05, "loss": 0.2167, "step": 6129 }, { "epoch": 0.522948302337485, "grad_norm": 1.5851907710362856, "learning_rate": 4.8715321176379787e-05, "loss": 0.2411, "step": 6130 }, { "epoch": 0.5230336120116021, "grad_norm": 1.6390909925226234, "learning_rate": 4.870151051536741e-05, "loss": 0.2166, "step": 6131 }, { "epoch": 0.5231189216857192, "grad_norm": 1.5628301653123897, "learning_rate": 4.868769995348781e-05, "loss": 0.2269, "step": 6132 }, { "epoch": 0.5232042313598362, "grad_norm": 1.1469904288014818, "learning_rate": 4.8673889491795344e-05, "loss": 0.1479, "step": 6133 }, { "epoch": 0.5232895410339532, "grad_norm": 1.788229736012791, "learning_rate": 4.866007913134435e-05, "loss": 0.2768, "step": 6134 }, { "epoch": 0.5233748507080703, "grad_norm": 1.465920340055998, "learning_rate": 4.864626887318921e-05, "loss": 0.2374, "step": 6135 }, { "epoch": 0.5234601603821873, "grad_norm": 1.3362558442889911, "learning_rate": 4.8632458718384266e-05, "loss": 0.1952, "step": 6136 }, { "epoch": 0.5235454700563044, "grad_norm": 1.5185019333682346, "learning_rate": 4.861864866798381e-05, "loss": 0.2708, "step": 6137 }, { "epoch": 0.5236307797304214, "grad_norm": 1.7427016384849698, "learning_rate": 4.8604838723042194e-05, "loss": 0.2483, "step": 6138 }, { "epoch": 0.5237160894045385, "grad_norm": 1.6988626041101862, "learning_rate": 4.8591028884613745e-05, "loss": 0.293, "step": 6139 }, { "epoch": 0.5238013990786555, "grad_norm": 1.4086416886942472, "learning_rate": 4.857721915375272e-05, "loss": 0.2782, "step": 6140 }, { "epoch": 0.5238867087527725, "grad_norm": 1.5293428650392416, "learning_rate": 4.856340953151346e-05, "loss": 0.343, "step": 6141 }, { "epoch": 0.5239720184268896, "grad_norm": 2.050589067445999, "learning_rate": 4.854960001895027e-05, "loss": 0.2627, "step": 6142 }, { "epoch": 0.5240573281010067, "grad_norm": 1.7553564248778608, "learning_rate": 4.853579061711739e-05, "loss": 0.2906, "step": 6143 }, { "epoch": 0.5241426377751237, "grad_norm": 1.6601010302167556, "learning_rate": 4.8521981327069117e-05, "loss": 0.2795, "step": 6144 }, { "epoch": 0.5242279474492407, "grad_norm": 1.538787817285007, "learning_rate": 4.850817214985973e-05, "loss": 0.2595, "step": 6145 }, { "epoch": 0.5243132571233577, "grad_norm": 1.254760403730026, "learning_rate": 4.849436308654346e-05, "loss": 0.1885, "step": 6146 }, { "epoch": 0.5243985667974749, "grad_norm": 1.559667690535751, "learning_rate": 4.848055413817456e-05, "loss": 0.2116, "step": 6147 }, { "epoch": 0.5244838764715919, "grad_norm": 1.4644844937062012, "learning_rate": 4.84667453058073e-05, "loss": 0.1948, "step": 6148 }, { "epoch": 0.5245691861457089, "grad_norm": 1.4498087460896478, "learning_rate": 4.845293659049588e-05, "loss": 0.2789, "step": 6149 }, { "epoch": 0.5246544958198259, "grad_norm": 1.7339567762749295, "learning_rate": 4.843912799329451e-05, "loss": 0.3137, "step": 6150 }, { "epoch": 0.5247398054939431, "grad_norm": 1.409375214841027, "learning_rate": 4.842531951525744e-05, "loss": 0.2047, "step": 6151 }, { "epoch": 0.5248251151680601, "grad_norm": 1.7817036235153505, "learning_rate": 4.841151115743887e-05, "loss": 0.2818, "step": 6152 }, { "epoch": 0.5249104248421771, "grad_norm": 1.881557632082661, "learning_rate": 4.839770292089296e-05, "loss": 0.2267, "step": 6153 }, { "epoch": 0.5249957345162941, "grad_norm": 1.511935354087057, "learning_rate": 4.8383894806673914e-05, "loss": 0.2681, "step": 6154 }, { "epoch": 0.5250810441904112, "grad_norm": 1.4361478001925159, "learning_rate": 4.837008681583593e-05, "loss": 0.2836, "step": 6155 }, { "epoch": 0.5251663538645283, "grad_norm": 1.5417778731094067, "learning_rate": 4.835627894943315e-05, "loss": 0.2953, "step": 6156 }, { "epoch": 0.5252516635386453, "grad_norm": 1.7179816175727467, "learning_rate": 4.8342471208519726e-05, "loss": 0.2848, "step": 6157 }, { "epoch": 0.5253369732127623, "grad_norm": 1.7012950620492249, "learning_rate": 4.832866359414984e-05, "loss": 0.2535, "step": 6158 }, { "epoch": 0.5254222828868794, "grad_norm": 1.7514815190130018, "learning_rate": 4.83148561073776e-05, "loss": 0.215, "step": 6159 }, { "epoch": 0.5255075925609964, "grad_norm": 1.5333510017759198, "learning_rate": 4.830104874925713e-05, "loss": 0.2679, "step": 6160 }, { "epoch": 0.5255929022351135, "grad_norm": 1.3580063693553115, "learning_rate": 4.828724152084258e-05, "loss": 0.2382, "step": 6161 }, { "epoch": 0.5256782119092305, "grad_norm": 1.3755511999720356, "learning_rate": 4.827343442318801e-05, "loss": 0.2668, "step": 6162 }, { "epoch": 0.5257635215833476, "grad_norm": 1.273802722442492, "learning_rate": 4.8259627457347554e-05, "loss": 0.2292, "step": 6163 }, { "epoch": 0.5258488312574646, "grad_norm": 1.3262034259352071, "learning_rate": 4.8245820624375314e-05, "loss": 0.2325, "step": 6164 }, { "epoch": 0.5259341409315816, "grad_norm": 1.7470967187906226, "learning_rate": 4.823201392532533e-05, "loss": 0.232, "step": 6165 }, { "epoch": 0.5260194506056987, "grad_norm": 1.464671051017671, "learning_rate": 4.821820736125168e-05, "loss": 0.2149, "step": 6166 }, { "epoch": 0.5261047602798158, "grad_norm": 1.7502093189940393, "learning_rate": 4.8204400933208435e-05, "loss": 0.2707, "step": 6167 }, { "epoch": 0.5261900699539328, "grad_norm": 1.3110923939292023, "learning_rate": 4.819059464224966e-05, "loss": 0.2165, "step": 6168 }, { "epoch": 0.5262753796280498, "grad_norm": 1.528862863586825, "learning_rate": 4.8176788489429355e-05, "loss": 0.2906, "step": 6169 }, { "epoch": 0.5263606893021668, "grad_norm": 1.4260049722806853, "learning_rate": 4.816298247580155e-05, "loss": 0.2314, "step": 6170 }, { "epoch": 0.5264459989762839, "grad_norm": 1.7609295822006166, "learning_rate": 4.814917660242029e-05, "loss": 0.2454, "step": 6171 }, { "epoch": 0.526531308650401, "grad_norm": 1.2983806097280934, "learning_rate": 4.8135370870339545e-05, "loss": 0.1975, "step": 6172 }, { "epoch": 0.526616618324518, "grad_norm": 1.3147316080777425, "learning_rate": 4.8121565280613316e-05, "loss": 0.201, "step": 6173 }, { "epoch": 0.526701927998635, "grad_norm": 1.7784907067168425, "learning_rate": 4.8107759834295627e-05, "loss": 0.2465, "step": 6174 }, { "epoch": 0.526787237672752, "grad_norm": 1.4576823643600176, "learning_rate": 4.8093954532440396e-05, "loss": 0.2623, "step": 6175 }, { "epoch": 0.5268725473468692, "grad_norm": 1.3817990139722378, "learning_rate": 4.808014937610161e-05, "loss": 0.2435, "step": 6176 }, { "epoch": 0.5269578570209862, "grad_norm": 1.6396155748828558, "learning_rate": 4.806634436633323e-05, "loss": 0.2507, "step": 6177 }, { "epoch": 0.5270431666951032, "grad_norm": 1.345288659924601, "learning_rate": 4.805253950418917e-05, "loss": 0.2003, "step": 6178 }, { "epoch": 0.5271284763692202, "grad_norm": 1.5540889948986003, "learning_rate": 4.803873479072336e-05, "loss": 0.2129, "step": 6179 }, { "epoch": 0.5272137860433374, "grad_norm": 1.5090409250745338, "learning_rate": 4.802493022698974e-05, "loss": 0.2262, "step": 6180 }, { "epoch": 0.5272990957174544, "grad_norm": 1.5804084649423393, "learning_rate": 4.801112581404222e-05, "loss": 0.2878, "step": 6181 }, { "epoch": 0.5273844053915714, "grad_norm": 1.2522109505628325, "learning_rate": 4.799732155293465e-05, "loss": 0.2029, "step": 6182 }, { "epoch": 0.5274697150656884, "grad_norm": 1.8471575138482876, "learning_rate": 4.798351744472093e-05, "loss": 0.2574, "step": 6183 }, { "epoch": 0.5275550247398055, "grad_norm": 1.681705770438035, "learning_rate": 4.796971349045497e-05, "loss": 0.2827, "step": 6184 }, { "epoch": 0.5276403344139226, "grad_norm": 1.5065144698457478, "learning_rate": 4.795590969119057e-05, "loss": 0.2782, "step": 6185 }, { "epoch": 0.5277256440880396, "grad_norm": 1.7099450424910865, "learning_rate": 4.79421060479816e-05, "loss": 0.3372, "step": 6186 }, { "epoch": 0.5278109537621566, "grad_norm": 1.7156755867259197, "learning_rate": 4.792830256188192e-05, "loss": 0.2366, "step": 6187 }, { "epoch": 0.5278962634362737, "grad_norm": 1.8703456330770551, "learning_rate": 4.791449923394532e-05, "loss": 0.2927, "step": 6188 }, { "epoch": 0.5279815731103907, "grad_norm": 1.6000552644722101, "learning_rate": 4.7900696065225605e-05, "loss": 0.2647, "step": 6189 }, { "epoch": 0.5280668827845078, "grad_norm": 1.4176186373680277, "learning_rate": 4.7886893056776616e-05, "loss": 0.2473, "step": 6190 }, { "epoch": 0.5281521924586248, "grad_norm": 1.521026424883168, "learning_rate": 4.7873090209652106e-05, "loss": 0.2469, "step": 6191 }, { "epoch": 0.5282375021327419, "grad_norm": 1.4574883692491623, "learning_rate": 4.7859287524905845e-05, "loss": 0.2853, "step": 6192 }, { "epoch": 0.5283228118068589, "grad_norm": 1.7159452268044053, "learning_rate": 4.784548500359162e-05, "loss": 0.2637, "step": 6193 }, { "epoch": 0.5284081214809759, "grad_norm": 2.1023906224815336, "learning_rate": 4.7831682646763156e-05, "loss": 0.2468, "step": 6194 }, { "epoch": 0.528493431155093, "grad_norm": 2.118998748670913, "learning_rate": 4.781788045547419e-05, "loss": 0.313, "step": 6195 }, { "epoch": 0.5285787408292101, "grad_norm": 1.657717792131951, "learning_rate": 4.780407843077846e-05, "loss": 0.2301, "step": 6196 }, { "epoch": 0.5286640505033271, "grad_norm": 1.2867503381776486, "learning_rate": 4.77902765737297e-05, "loss": 0.2585, "step": 6197 }, { "epoch": 0.5287493601774441, "grad_norm": 1.5041204919182418, "learning_rate": 4.777647488538155e-05, "loss": 0.2604, "step": 6198 }, { "epoch": 0.5288346698515611, "grad_norm": 1.742302176631489, "learning_rate": 4.7762673366787725e-05, "loss": 0.2333, "step": 6199 }, { "epoch": 0.5289199795256783, "grad_norm": 1.9217684336324456, "learning_rate": 4.774887201900193e-05, "loss": 0.3188, "step": 6200 }, { "epoch": 0.5290052891997953, "grad_norm": 1.5539420943156028, "learning_rate": 4.773507084307778e-05, "loss": 0.2298, "step": 6201 }, { "epoch": 0.5290905988739123, "grad_norm": 1.4758922529099836, "learning_rate": 4.772126984006892e-05, "loss": 0.1886, "step": 6202 }, { "epoch": 0.5291759085480293, "grad_norm": 1.1294457225684957, "learning_rate": 4.770746901102902e-05, "loss": 0.2775, "step": 6203 }, { "epoch": 0.5292612182221464, "grad_norm": 1.5850396080964726, "learning_rate": 4.769366835701166e-05, "loss": 0.1845, "step": 6204 }, { "epoch": 0.5293465278962635, "grad_norm": 1.6588566259568538, "learning_rate": 4.7679867879070465e-05, "loss": 0.2534, "step": 6205 }, { "epoch": 0.5294318375703805, "grad_norm": 1.5553106084853878, "learning_rate": 4.7666067578259057e-05, "loss": 0.2239, "step": 6206 }, { "epoch": 0.5295171472444975, "grad_norm": 1.355576656564696, "learning_rate": 4.7652267455630956e-05, "loss": 0.297, "step": 6207 }, { "epoch": 0.5296024569186145, "grad_norm": 1.832410565701018, "learning_rate": 4.763846751223976e-05, "loss": 0.2916, "step": 6208 }, { "epoch": 0.5296877665927316, "grad_norm": 1.5135882348363474, "learning_rate": 4.762466774913905e-05, "loss": 0.2159, "step": 6209 }, { "epoch": 0.5297730762668487, "grad_norm": 1.7183779920786575, "learning_rate": 4.76108681673823e-05, "loss": 0.2179, "step": 6210 }, { "epoch": 0.5298583859409657, "grad_norm": 2.0042514912610563, "learning_rate": 4.759706876802308e-05, "loss": 0.2934, "step": 6211 }, { "epoch": 0.5299436956150827, "grad_norm": 1.7089221528851402, "learning_rate": 4.75832695521149e-05, "loss": 0.1611, "step": 6212 }, { "epoch": 0.5300290052891998, "grad_norm": 2.0337000229596867, "learning_rate": 4.756947052071125e-05, "loss": 0.3227, "step": 6213 }, { "epoch": 0.5301143149633168, "grad_norm": 1.8898227975920991, "learning_rate": 4.755567167486559e-05, "loss": 0.2398, "step": 6214 }, { "epoch": 0.5301996246374339, "grad_norm": 1.6778240424143012, "learning_rate": 4.7541873015631416e-05, "loss": 0.2282, "step": 6215 }, { "epoch": 0.5302849343115509, "grad_norm": 1.2823988718089747, "learning_rate": 4.752807454406219e-05, "loss": 0.1691, "step": 6216 }, { "epoch": 0.530370243985668, "grad_norm": 1.6766422078577665, "learning_rate": 4.751427626121132e-05, "loss": 0.2339, "step": 6217 }, { "epoch": 0.530455553659785, "grad_norm": 1.520322618127382, "learning_rate": 4.750047816813224e-05, "loss": 0.2223, "step": 6218 }, { "epoch": 0.530540863333902, "grad_norm": 1.5628704394863053, "learning_rate": 4.74866802658784e-05, "loss": 0.2294, "step": 6219 }, { "epoch": 0.5306261730080191, "grad_norm": 1.4412295845749565, "learning_rate": 4.747288255550315e-05, "loss": 0.293, "step": 6220 }, { "epoch": 0.5307114826821362, "grad_norm": 1.5927933458385641, "learning_rate": 4.7459085038059874e-05, "loss": 0.2512, "step": 6221 }, { "epoch": 0.5307967923562532, "grad_norm": 1.8644891498441267, "learning_rate": 4.744528771460198e-05, "loss": 0.2946, "step": 6222 }, { "epoch": 0.5308821020303702, "grad_norm": 1.2814571864388054, "learning_rate": 4.743149058618278e-05, "loss": 0.2113, "step": 6223 }, { "epoch": 0.5309674117044872, "grad_norm": 1.8095336844478378, "learning_rate": 4.741769365385562e-05, "loss": 0.2737, "step": 6224 }, { "epoch": 0.5310527213786044, "grad_norm": 1.8309208284404572, "learning_rate": 4.740389691867382e-05, "loss": 0.2749, "step": 6225 }, { "epoch": 0.5311380310527214, "grad_norm": 1.7989752395562928, "learning_rate": 4.7390100381690724e-05, "loss": 0.2696, "step": 6226 }, { "epoch": 0.5312233407268384, "grad_norm": 1.2526231421965155, "learning_rate": 4.737630404395956e-05, "loss": 0.2179, "step": 6227 }, { "epoch": 0.5313086504009554, "grad_norm": 1.2459008716320867, "learning_rate": 4.7362507906533656e-05, "loss": 0.2064, "step": 6228 }, { "epoch": 0.5313939600750726, "grad_norm": 1.3112692284787892, "learning_rate": 4.734871197046627e-05, "loss": 0.2477, "step": 6229 }, { "epoch": 0.5314792697491896, "grad_norm": 1.5380694288548569, "learning_rate": 4.7334916236810615e-05, "loss": 0.1747, "step": 6230 }, { "epoch": 0.5315645794233066, "grad_norm": 1.4903711578670276, "learning_rate": 4.732112070661994e-05, "loss": 0.2048, "step": 6231 }, { "epoch": 0.5316498890974236, "grad_norm": 1.5335750444605531, "learning_rate": 4.730732538094749e-05, "loss": 0.2614, "step": 6232 }, { "epoch": 0.5317351987715407, "grad_norm": 1.768784658139627, "learning_rate": 4.729353026084643e-05, "loss": 0.2791, "step": 6233 }, { "epoch": 0.5318205084456578, "grad_norm": 1.5811122909711872, "learning_rate": 4.727973534736994e-05, "loss": 0.2223, "step": 6234 }, { "epoch": 0.5319058181197748, "grad_norm": 1.9097113530013448, "learning_rate": 4.726594064157122e-05, "loss": 0.1983, "step": 6235 }, { "epoch": 0.5319911277938918, "grad_norm": 1.386047415691551, "learning_rate": 4.725214614450339e-05, "loss": 0.2765, "step": 6236 }, { "epoch": 0.5320764374680089, "grad_norm": 1.4857691592258995, "learning_rate": 4.7238351857219587e-05, "loss": 0.1964, "step": 6237 }, { "epoch": 0.5321617471421259, "grad_norm": 1.4774770175288852, "learning_rate": 4.722455778077297e-05, "loss": 0.2354, "step": 6238 }, { "epoch": 0.532247056816243, "grad_norm": 1.6916631983578962, "learning_rate": 4.72107639162166e-05, "loss": 0.2093, "step": 6239 }, { "epoch": 0.53233236649036, "grad_norm": 1.4168518543750768, "learning_rate": 4.719697026460358e-05, "loss": 0.1993, "step": 6240 }, { "epoch": 0.5324176761644771, "grad_norm": 1.5536269207769968, "learning_rate": 4.7183176826986984e-05, "loss": 0.2929, "step": 6241 }, { "epoch": 0.5325029858385941, "grad_norm": 1.5655713222537888, "learning_rate": 4.716938360441989e-05, "loss": 0.2603, "step": 6242 }, { "epoch": 0.5325882955127111, "grad_norm": 1.7088162584720763, "learning_rate": 4.715559059795531e-05, "loss": 0.2414, "step": 6243 }, { "epoch": 0.5326736051868282, "grad_norm": 2.178908998659301, "learning_rate": 4.714179780864626e-05, "loss": 0.2228, "step": 6244 }, { "epoch": 0.5327589148609452, "grad_norm": 1.824283051429629, "learning_rate": 4.712800523754578e-05, "loss": 0.2427, "step": 6245 }, { "epoch": 0.5328442245350623, "grad_norm": 1.4670554592204301, "learning_rate": 4.7114212885706815e-05, "loss": 0.2167, "step": 6246 }, { "epoch": 0.5329295342091793, "grad_norm": 1.792725058223975, "learning_rate": 4.710042075418236e-05, "loss": 0.1961, "step": 6247 }, { "epoch": 0.5330148438832963, "grad_norm": 1.3964312349892671, "learning_rate": 4.70866288440254e-05, "loss": 0.2137, "step": 6248 }, { "epoch": 0.5331001535574134, "grad_norm": 1.4480511262150555, "learning_rate": 4.7072837156288816e-05, "loss": 0.2536, "step": 6249 }, { "epoch": 0.5331854632315305, "grad_norm": 1.9367513132993224, "learning_rate": 4.7059045692025567e-05, "loss": 0.2101, "step": 6250 }, { "epoch": 0.5332707729056475, "grad_norm": 1.865933879285937, "learning_rate": 4.7045254452288566e-05, "loss": 0.194, "step": 6251 }, { "epoch": 0.5333560825797645, "grad_norm": 1.5941369578013902, "learning_rate": 4.7031463438130676e-05, "loss": 0.211, "step": 6252 }, { "epoch": 0.5334413922538815, "grad_norm": 1.8773865452403864, "learning_rate": 4.7017672650604766e-05, "loss": 0.262, "step": 6253 }, { "epoch": 0.5335267019279987, "grad_norm": 1.6904972444166027, "learning_rate": 4.700388209076373e-05, "loss": 0.2152, "step": 6254 }, { "epoch": 0.5336120116021157, "grad_norm": 1.9724146931118196, "learning_rate": 4.699009175966036e-05, "loss": 0.273, "step": 6255 }, { "epoch": 0.5336973212762327, "grad_norm": 1.7826604779996043, "learning_rate": 4.6976301658347484e-05, "loss": 0.2203, "step": 6256 }, { "epoch": 0.5337826309503497, "grad_norm": 1.4576760672850535, "learning_rate": 4.69625117878779e-05, "loss": 0.3006, "step": 6257 }, { "epoch": 0.5338679406244669, "grad_norm": 1.6257986893605296, "learning_rate": 4.6948722149304424e-05, "loss": 0.2073, "step": 6258 }, { "epoch": 0.5339532502985839, "grad_norm": 1.6046737596022, "learning_rate": 4.693493274367978e-05, "loss": 0.1792, "step": 6259 }, { "epoch": 0.5340385599727009, "grad_norm": 1.8200784295338872, "learning_rate": 4.692114357205673e-05, "loss": 0.2029, "step": 6260 }, { "epoch": 0.5341238696468179, "grad_norm": 1.737913628612647, "learning_rate": 4.690735463548803e-05, "loss": 0.1846, "step": 6261 }, { "epoch": 0.534209179320935, "grad_norm": 1.8298027621220965, "learning_rate": 4.6893565935026354e-05, "loss": 0.2577, "step": 6262 }, { "epoch": 0.534294488995052, "grad_norm": 2.2139526942987677, "learning_rate": 4.68797774717244e-05, "loss": 0.3097, "step": 6263 }, { "epoch": 0.5343797986691691, "grad_norm": 1.7098212236790604, "learning_rate": 4.6865989246634884e-05, "loss": 0.2876, "step": 6264 }, { "epoch": 0.5344651083432861, "grad_norm": 1.8093374877625243, "learning_rate": 4.685220126081042e-05, "loss": 0.2715, "step": 6265 }, { "epoch": 0.5345504180174032, "grad_norm": 1.67488333737066, "learning_rate": 4.683841351530365e-05, "loss": 0.2547, "step": 6266 }, { "epoch": 0.5346357276915202, "grad_norm": 1.4824205883558317, "learning_rate": 4.682462601116724e-05, "loss": 0.3152, "step": 6267 }, { "epoch": 0.5347210373656373, "grad_norm": 1.6720191679499201, "learning_rate": 4.681083874945373e-05, "loss": 0.3191, "step": 6268 }, { "epoch": 0.5348063470397543, "grad_norm": 1.270265290719166, "learning_rate": 4.679705173121573e-05, "loss": 0.2237, "step": 6269 }, { "epoch": 0.5348916567138714, "grad_norm": 1.7475462468348757, "learning_rate": 4.678326495750584e-05, "loss": 0.296, "step": 6270 }, { "epoch": 0.5349769663879884, "grad_norm": 1.461782935000973, "learning_rate": 4.676947842937655e-05, "loss": 0.2018, "step": 6271 }, { "epoch": 0.5350622760621054, "grad_norm": 1.7935471609448481, "learning_rate": 4.6755692147880414e-05, "loss": 0.2631, "step": 6272 }, { "epoch": 0.5351475857362225, "grad_norm": 1.6665629195665754, "learning_rate": 4.674190611406995e-05, "loss": 0.2385, "step": 6273 }, { "epoch": 0.5352328954103396, "grad_norm": 1.7529831392007231, "learning_rate": 4.672812032899765e-05, "loss": 0.2157, "step": 6274 }, { "epoch": 0.5353182050844566, "grad_norm": 1.7111141165931256, "learning_rate": 4.671433479371598e-05, "loss": 0.2555, "step": 6275 }, { "epoch": 0.5354035147585736, "grad_norm": 1.2568841197300917, "learning_rate": 4.670054950927737e-05, "loss": 0.2597, "step": 6276 }, { "epoch": 0.5354888244326906, "grad_norm": 1.3885449916211225, "learning_rate": 4.668676447673429e-05, "loss": 0.2156, "step": 6277 }, { "epoch": 0.5355741341068078, "grad_norm": 1.864357243759304, "learning_rate": 4.667297969713912e-05, "loss": 0.2103, "step": 6278 }, { "epoch": 0.5356594437809248, "grad_norm": 1.6149768554871475, "learning_rate": 4.6659195171544265e-05, "loss": 0.2486, "step": 6279 }, { "epoch": 0.5357447534550418, "grad_norm": 1.5201743365997915, "learning_rate": 4.664541090100213e-05, "loss": 0.2181, "step": 6280 }, { "epoch": 0.5358300631291588, "grad_norm": 1.5358822879588854, "learning_rate": 4.6631626886565026e-05, "loss": 0.2447, "step": 6281 }, { "epoch": 0.535915372803276, "grad_norm": 1.591446052693981, "learning_rate": 4.661784312928531e-05, "loss": 0.2137, "step": 6282 }, { "epoch": 0.536000682477393, "grad_norm": 2.1280831802001035, "learning_rate": 4.6604059630215326e-05, "loss": 0.271, "step": 6283 }, { "epoch": 0.53608599215151, "grad_norm": 1.5153869756615432, "learning_rate": 4.659027639040731e-05, "loss": 0.2874, "step": 6284 }, { "epoch": 0.536171301825627, "grad_norm": 1.360139653664992, "learning_rate": 4.657649341091358e-05, "loss": 0.2464, "step": 6285 }, { "epoch": 0.536256611499744, "grad_norm": 1.4019159711207063, "learning_rate": 4.656271069278639e-05, "loss": 0.2242, "step": 6286 }, { "epoch": 0.5363419211738611, "grad_norm": 1.5844137454138973, "learning_rate": 4.654892823707799e-05, "loss": 0.2191, "step": 6287 }, { "epoch": 0.5364272308479782, "grad_norm": 1.5989555168334144, "learning_rate": 4.6535146044840556e-05, "loss": 0.2131, "step": 6288 }, { "epoch": 0.5365125405220952, "grad_norm": 1.841499960015592, "learning_rate": 4.6521364117126306e-05, "loss": 0.2326, "step": 6289 }, { "epoch": 0.5365978501962122, "grad_norm": 1.692845531674437, "learning_rate": 4.650758245498744e-05, "loss": 0.2653, "step": 6290 }, { "epoch": 0.5366831598703293, "grad_norm": 1.742069804916036, "learning_rate": 4.649380105947608e-05, "loss": 0.253, "step": 6291 }, { "epoch": 0.5367684695444463, "grad_norm": 1.7310783053205916, "learning_rate": 4.648001993164438e-05, "loss": 0.2779, "step": 6292 }, { "epoch": 0.5368537792185634, "grad_norm": 1.6049582604896049, "learning_rate": 4.646623907254447e-05, "loss": 0.1668, "step": 6293 }, { "epoch": 0.5369390888926804, "grad_norm": 1.6332005673899193, "learning_rate": 4.64524584832284e-05, "loss": 0.2817, "step": 6294 }, { "epoch": 0.5370243985667975, "grad_norm": 1.4845704602498775, "learning_rate": 4.643867816474828e-05, "loss": 0.1675, "step": 6295 }, { "epoch": 0.5371097082409145, "grad_norm": 1.6598368258880807, "learning_rate": 4.642489811815618e-05, "loss": 0.2148, "step": 6296 }, { "epoch": 0.5371950179150315, "grad_norm": 1.5011657317663951, "learning_rate": 4.64111183445041e-05, "loss": 0.1912, "step": 6297 }, { "epoch": 0.5372803275891486, "grad_norm": 1.7215355235569227, "learning_rate": 4.639733884484405e-05, "loss": 0.1995, "step": 6298 }, { "epoch": 0.5373656372632657, "grad_norm": 1.4846571804669126, "learning_rate": 4.638355962022805e-05, "loss": 0.2907, "step": 6299 }, { "epoch": 0.5374509469373827, "grad_norm": 1.7477090690315067, "learning_rate": 4.636978067170805e-05, "loss": 0.2691, "step": 6300 }, { "epoch": 0.5375362566114997, "grad_norm": 1.512871827378863, "learning_rate": 4.6356002000335994e-05, "loss": 0.2653, "step": 6301 }, { "epoch": 0.5376215662856167, "grad_norm": 2.3946552088951187, "learning_rate": 4.634222360716382e-05, "loss": 0.2477, "step": 6302 }, { "epoch": 0.5377068759597339, "grad_norm": 1.6605983010968037, "learning_rate": 4.632844549324346e-05, "loss": 0.2689, "step": 6303 }, { "epoch": 0.5377921856338509, "grad_norm": 1.9297743557676548, "learning_rate": 4.6314667659626754e-05, "loss": 0.2862, "step": 6304 }, { "epoch": 0.5378774953079679, "grad_norm": 1.4427813661821451, "learning_rate": 4.630089010736559e-05, "loss": 0.236, "step": 6305 }, { "epoch": 0.5379628049820849, "grad_norm": 1.5330594329560696, "learning_rate": 4.628711283751183e-05, "loss": 0.2726, "step": 6306 }, { "epoch": 0.5380481146562021, "grad_norm": 1.54397534721834, "learning_rate": 4.6273335851117264e-05, "loss": 0.2237, "step": 6307 }, { "epoch": 0.5381334243303191, "grad_norm": 2.1682408067615455, "learning_rate": 4.6259559149233695e-05, "loss": 0.2704, "step": 6308 }, { "epoch": 0.5382187340044361, "grad_norm": 1.484688414563552, "learning_rate": 4.6245782732912924e-05, "loss": 0.2534, "step": 6309 }, { "epoch": 0.5383040436785531, "grad_norm": 1.520986139990504, "learning_rate": 4.623200660320667e-05, "loss": 0.2755, "step": 6310 }, { "epoch": 0.5383893533526702, "grad_norm": 1.9342681017104204, "learning_rate": 4.621823076116669e-05, "loss": 0.2547, "step": 6311 }, { "epoch": 0.5384746630267873, "grad_norm": 1.5629241367704976, "learning_rate": 4.620445520784472e-05, "loss": 0.2529, "step": 6312 }, { "epoch": 0.5385599727009043, "grad_norm": 1.2065422277320923, "learning_rate": 4.6190679944292395e-05, "loss": 0.1982, "step": 6313 }, { "epoch": 0.5386452823750213, "grad_norm": 1.6226534406759805, "learning_rate": 4.6176904971561426e-05, "loss": 0.2558, "step": 6314 }, { "epoch": 0.5387305920491384, "grad_norm": 1.9002863668167027, "learning_rate": 4.616313029070346e-05, "loss": 0.2732, "step": 6315 }, { "epoch": 0.5388159017232554, "grad_norm": 1.4971052689524118, "learning_rate": 4.6149355902770086e-05, "loss": 0.2719, "step": 6316 }, { "epoch": 0.5389012113973725, "grad_norm": 1.6939282369264317, "learning_rate": 4.613558180881294e-05, "loss": 0.2126, "step": 6317 }, { "epoch": 0.5389865210714895, "grad_norm": 1.4897957052436508, "learning_rate": 4.612180800988358e-05, "loss": 0.2453, "step": 6318 }, { "epoch": 0.5390718307456066, "grad_norm": 1.28020267014561, "learning_rate": 4.610803450703358e-05, "loss": 0.2544, "step": 6319 }, { "epoch": 0.5391571404197236, "grad_norm": 1.38703033815083, "learning_rate": 4.609426130131445e-05, "loss": 0.241, "step": 6320 }, { "epoch": 0.5392424500938406, "grad_norm": 1.5110385359994598, "learning_rate": 4.608048839377772e-05, "loss": 0.2619, "step": 6321 }, { "epoch": 0.5393277597679577, "grad_norm": 1.6165206935040626, "learning_rate": 4.606671578547488e-05, "loss": 0.2704, "step": 6322 }, { "epoch": 0.5394130694420747, "grad_norm": 1.4679895420845326, "learning_rate": 4.605294347745738e-05, "loss": 0.2894, "step": 6323 }, { "epoch": 0.5394983791161918, "grad_norm": 1.6049670249902808, "learning_rate": 4.603917147077666e-05, "loss": 0.218, "step": 6324 }, { "epoch": 0.5395836887903088, "grad_norm": 1.481889269698551, "learning_rate": 4.602539976648418e-05, "loss": 0.2236, "step": 6325 }, { "epoch": 0.5396689984644258, "grad_norm": 1.4290856934380993, "learning_rate": 4.601162836563128e-05, "loss": 0.2493, "step": 6326 }, { "epoch": 0.5397543081385429, "grad_norm": 1.4661275298995797, "learning_rate": 4.599785726926936e-05, "loss": 0.1299, "step": 6327 }, { "epoch": 0.53983961781266, "grad_norm": 1.3267952938280432, "learning_rate": 4.598408647844979e-05, "loss": 0.2335, "step": 6328 }, { "epoch": 0.539924927486777, "grad_norm": 1.9704784495857808, "learning_rate": 4.597031599422388e-05, "loss": 0.239, "step": 6329 }, { "epoch": 0.540010237160894, "grad_norm": 1.4636307640603994, "learning_rate": 4.5956545817642906e-05, "loss": 0.2218, "step": 6330 }, { "epoch": 0.540095546835011, "grad_norm": 1.6621775092068347, "learning_rate": 4.594277594975818e-05, "loss": 0.2233, "step": 6331 }, { "epoch": 0.5401808565091282, "grad_norm": 1.4085520234065982, "learning_rate": 4.5929006391620963e-05, "loss": 0.2342, "step": 6332 }, { "epoch": 0.5402661661832452, "grad_norm": 1.5713107603672012, "learning_rate": 4.591523714428246e-05, "loss": 0.2669, "step": 6333 }, { "epoch": 0.5403514758573622, "grad_norm": 1.8720805009177635, "learning_rate": 4.590146820879389e-05, "loss": 0.1931, "step": 6334 }, { "epoch": 0.5404367855314792, "grad_norm": 1.6595336464657267, "learning_rate": 4.5887699586206466e-05, "loss": 0.3055, "step": 6335 }, { "epoch": 0.5405220952055964, "grad_norm": 2.2043981458218327, "learning_rate": 4.58739312775713e-05, "loss": 0.3112, "step": 6336 }, { "epoch": 0.5406074048797134, "grad_norm": 1.697155140045162, "learning_rate": 4.586016328393956e-05, "loss": 0.2377, "step": 6337 }, { "epoch": 0.5406927145538304, "grad_norm": 1.4914078997707285, "learning_rate": 4.584639560636238e-05, "loss": 0.189, "step": 6338 }, { "epoch": 0.5407780242279474, "grad_norm": 1.8696059521720332, "learning_rate": 4.583262824589081e-05, "loss": 0.2283, "step": 6339 }, { "epoch": 0.5408633339020645, "grad_norm": 1.631575412374012, "learning_rate": 4.5818861203575915e-05, "loss": 0.2611, "step": 6340 }, { "epoch": 0.5409486435761816, "grad_norm": 1.8819384306951943, "learning_rate": 4.580509448046877e-05, "loss": 0.2961, "step": 6341 }, { "epoch": 0.5410339532502986, "grad_norm": 1.663694385771265, "learning_rate": 4.5791328077620344e-05, "loss": 0.1938, "step": 6342 }, { "epoch": 0.5411192629244156, "grad_norm": 1.3941684111960275, "learning_rate": 4.5777561996081656e-05, "loss": 0.2107, "step": 6343 }, { "epoch": 0.5412045725985327, "grad_norm": 1.7595766158641537, "learning_rate": 4.57637962369037e-05, "loss": 0.2122, "step": 6344 }, { "epoch": 0.5412898822726497, "grad_norm": 1.4014023620231695, "learning_rate": 4.5750030801137364e-05, "loss": 0.1789, "step": 6345 }, { "epoch": 0.5413751919467668, "grad_norm": 1.703718029682232, "learning_rate": 4.573626568983359e-05, "loss": 0.2113, "step": 6346 }, { "epoch": 0.5414605016208838, "grad_norm": 1.5573812945886498, "learning_rate": 4.572250090404328e-05, "loss": 0.2221, "step": 6347 }, { "epoch": 0.5415458112950009, "grad_norm": 1.5731857355977883, "learning_rate": 4.5708736444817316e-05, "loss": 0.203, "step": 6348 }, { "epoch": 0.5416311209691179, "grad_norm": 1.7151441723694656, "learning_rate": 4.5694972313206504e-05, "loss": 0.3025, "step": 6349 }, { "epoch": 0.5417164306432349, "grad_norm": 1.7873205508342163, "learning_rate": 4.568120851026167e-05, "loss": 0.2401, "step": 6350 }, { "epoch": 0.541801740317352, "grad_norm": 1.5605995064311877, "learning_rate": 4.5667445037033635e-05, "loss": 0.2862, "step": 6351 }, { "epoch": 0.5418870499914691, "grad_norm": 1.72401136180382, "learning_rate": 4.565368189457313e-05, "loss": 0.232, "step": 6352 }, { "epoch": 0.5419723596655861, "grad_norm": 1.953300529221885, "learning_rate": 4.563991908393092e-05, "loss": 0.3165, "step": 6353 }, { "epoch": 0.5420576693397031, "grad_norm": 1.7326669616395631, "learning_rate": 4.5626156606157736e-05, "loss": 0.2707, "step": 6354 }, { "epoch": 0.5421429790138201, "grad_norm": 1.2962120383873883, "learning_rate": 4.5612394462304234e-05, "loss": 0.2499, "step": 6355 }, { "epoch": 0.5422282886879373, "grad_norm": 1.4851168736149685, "learning_rate": 4.559863265342109e-05, "loss": 0.2267, "step": 6356 }, { "epoch": 0.5423135983620543, "grad_norm": 1.3558058086024927, "learning_rate": 4.558487118055898e-05, "loss": 0.2991, "step": 6357 }, { "epoch": 0.5423989080361713, "grad_norm": 2.0012888331937413, "learning_rate": 4.557111004476848e-05, "loss": 0.3495, "step": 6358 }, { "epoch": 0.5424842177102883, "grad_norm": 1.2752671934164177, "learning_rate": 4.55573492471002e-05, "loss": 0.1808, "step": 6359 }, { "epoch": 0.5425695273844053, "grad_norm": 2.07768348076415, "learning_rate": 4.554358878860469e-05, "loss": 0.259, "step": 6360 }, { "epoch": 0.5426548370585225, "grad_norm": 1.4722661885508175, "learning_rate": 4.55298286703325e-05, "loss": 0.2036, "step": 6361 }, { "epoch": 0.5427401467326395, "grad_norm": 1.716712993009085, "learning_rate": 4.551606889333412e-05, "loss": 0.2514, "step": 6362 }, { "epoch": 0.5428254564067565, "grad_norm": 2.0167256979801578, "learning_rate": 4.550230945866006e-05, "loss": 0.2848, "step": 6363 }, { "epoch": 0.5429107660808735, "grad_norm": 1.7453001126060652, "learning_rate": 4.548855036736079e-05, "loss": 0.2329, "step": 6364 }, { "epoch": 0.5429960757549906, "grad_norm": 1.5781272661551644, "learning_rate": 4.5474791620486703e-05, "loss": 0.1726, "step": 6365 }, { "epoch": 0.5430813854291077, "grad_norm": 1.2287020640370407, "learning_rate": 4.546103321908823e-05, "loss": 0.2235, "step": 6366 }, { "epoch": 0.5431666951032247, "grad_norm": 1.7332037330129442, "learning_rate": 4.5447275164215774e-05, "loss": 0.2713, "step": 6367 }, { "epoch": 0.5432520047773417, "grad_norm": 1.4969928320774892, "learning_rate": 4.543351745691964e-05, "loss": 0.1988, "step": 6368 }, { "epoch": 0.5433373144514588, "grad_norm": 1.366330690295891, "learning_rate": 4.541976009825019e-05, "loss": 0.2641, "step": 6369 }, { "epoch": 0.5434226241255758, "grad_norm": 1.7539509722948676, "learning_rate": 4.540600308925774e-05, "loss": 0.2, "step": 6370 }, { "epoch": 0.5435079337996929, "grad_norm": 1.6561739922123124, "learning_rate": 4.5392246430992517e-05, "loss": 0.1823, "step": 6371 }, { "epoch": 0.5435932434738099, "grad_norm": 1.5402689431413628, "learning_rate": 4.5378490124504796e-05, "loss": 0.172, "step": 6372 }, { "epoch": 0.543678553147927, "grad_norm": 1.7943191263447948, "learning_rate": 4.5364734170844807e-05, "loss": 0.268, "step": 6373 }, { "epoch": 0.543763862822044, "grad_norm": 1.867140765383357, "learning_rate": 4.535097857106272e-05, "loss": 0.2766, "step": 6374 }, { "epoch": 0.543849172496161, "grad_norm": 1.8762811734165883, "learning_rate": 4.5337223326208705e-05, "loss": 0.2217, "step": 6375 }, { "epoch": 0.5439344821702781, "grad_norm": 1.490031817307723, "learning_rate": 4.5323468437332916e-05, "loss": 0.2432, "step": 6376 }, { "epoch": 0.5440197918443952, "grad_norm": 1.3045804099715521, "learning_rate": 4.5309713905485485e-05, "loss": 0.2125, "step": 6377 }, { "epoch": 0.5441051015185122, "grad_norm": 1.392369194461806, "learning_rate": 4.529595973171645e-05, "loss": 0.2358, "step": 6378 }, { "epoch": 0.5441904111926292, "grad_norm": 1.7743185266400934, "learning_rate": 4.528220591707589e-05, "loss": 0.2395, "step": 6379 }, { "epoch": 0.5442757208667462, "grad_norm": 1.2185934895499144, "learning_rate": 4.526845246261386e-05, "loss": 0.1921, "step": 6380 }, { "epoch": 0.5443610305408634, "grad_norm": 1.1537592583116425, "learning_rate": 4.5254699369380324e-05, "loss": 0.209, "step": 6381 }, { "epoch": 0.5444463402149804, "grad_norm": 1.749133235992973, "learning_rate": 4.5240946638425275e-05, "loss": 0.248, "step": 6382 }, { "epoch": 0.5445316498890974, "grad_norm": 1.4408096691605667, "learning_rate": 4.522719427079868e-05, "loss": 0.2952, "step": 6383 }, { "epoch": 0.5446169595632144, "grad_norm": 1.6229374340264644, "learning_rate": 4.521344226755041e-05, "loss": 0.2808, "step": 6384 }, { "epoch": 0.5447022692373316, "grad_norm": 1.5649661629038296, "learning_rate": 4.51996906297304e-05, "loss": 0.2535, "step": 6385 }, { "epoch": 0.5447875789114486, "grad_norm": 1.7335534426733825, "learning_rate": 4.5185939358388514e-05, "loss": 0.279, "step": 6386 }, { "epoch": 0.5448728885855656, "grad_norm": 1.4145945185732691, "learning_rate": 4.517218845457456e-05, "loss": 0.2126, "step": 6387 }, { "epoch": 0.5449581982596826, "grad_norm": 1.6005938322048285, "learning_rate": 4.5158437919338355e-05, "loss": 0.2262, "step": 6388 }, { "epoch": 0.5450435079337997, "grad_norm": 1.4086301793588771, "learning_rate": 4.5144687753729706e-05, "loss": 0.2341, "step": 6389 }, { "epoch": 0.5451288176079168, "grad_norm": 1.5484246862595366, "learning_rate": 4.5130937958798334e-05, "loss": 0.26, "step": 6390 }, { "epoch": 0.5452141272820338, "grad_norm": 1.4076180479370723, "learning_rate": 4.5117188535593985e-05, "loss": 0.236, "step": 6391 }, { "epoch": 0.5452994369561508, "grad_norm": 1.6651566181467026, "learning_rate": 4.510343948516633e-05, "loss": 0.2366, "step": 6392 }, { "epoch": 0.5453847466302679, "grad_norm": 1.7032838799799523, "learning_rate": 4.508969080856507e-05, "loss": 0.2057, "step": 6393 }, { "epoch": 0.5454700563043849, "grad_norm": 1.465766555034195, "learning_rate": 4.5075942506839804e-05, "loss": 0.3168, "step": 6394 }, { "epoch": 0.545555365978502, "grad_norm": 1.362688345533869, "learning_rate": 4.506219458104016e-05, "loss": 0.208, "step": 6395 }, { "epoch": 0.545640675652619, "grad_norm": 1.7172260811162203, "learning_rate": 4.504844703221575e-05, "loss": 0.2247, "step": 6396 }, { "epoch": 0.5457259853267361, "grad_norm": 1.4513080681906119, "learning_rate": 4.503469986141606e-05, "loss": 0.2007, "step": 6397 }, { "epoch": 0.5458112950008531, "grad_norm": 1.509710194901363, "learning_rate": 4.502095306969066e-05, "loss": 0.265, "step": 6398 }, { "epoch": 0.5458966046749701, "grad_norm": 1.8048313628484887, "learning_rate": 4.500720665808905e-05, "loss": 0.2342, "step": 6399 }, { "epoch": 0.5459819143490872, "grad_norm": 1.7591765811893165, "learning_rate": 4.499346062766067e-05, "loss": 0.264, "step": 6400 }, { "epoch": 0.5460672240232042, "grad_norm": 1.3515285421526606, "learning_rate": 4.497971497945496e-05, "loss": 0.1841, "step": 6401 }, { "epoch": 0.5461525336973213, "grad_norm": 1.4827661730138804, "learning_rate": 4.496596971452135e-05, "loss": 0.1859, "step": 6402 }, { "epoch": 0.5462378433714383, "grad_norm": 1.4708754399281863, "learning_rate": 4.4952224833909194e-05, "loss": 0.2755, "step": 6403 }, { "epoch": 0.5463231530455553, "grad_norm": 1.4027721207675659, "learning_rate": 4.493848033866784e-05, "loss": 0.2293, "step": 6404 }, { "epoch": 0.5464084627196724, "grad_norm": 1.6834781899145497, "learning_rate": 4.492473622984663e-05, "loss": 0.279, "step": 6405 }, { "epoch": 0.5464937723937895, "grad_norm": 1.7655287859872975, "learning_rate": 4.4910992508494826e-05, "loss": 0.2061, "step": 6406 }, { "epoch": 0.5465790820679065, "grad_norm": 1.705470298944627, "learning_rate": 4.489724917566169e-05, "loss": 0.2462, "step": 6407 }, { "epoch": 0.5466643917420235, "grad_norm": 1.5215447821518853, "learning_rate": 4.488350623239648e-05, "loss": 0.2016, "step": 6408 }, { "epoch": 0.5467497014161405, "grad_norm": 1.9055124325533104, "learning_rate": 4.4869763679748386e-05, "loss": 0.2043, "step": 6409 }, { "epoch": 0.5468350110902577, "grad_norm": 1.8687956839498558, "learning_rate": 4.485602151876656e-05, "loss": 0.2408, "step": 6410 }, { "epoch": 0.5469203207643747, "grad_norm": 1.4749427267043644, "learning_rate": 4.484227975050015e-05, "loss": 0.2663, "step": 6411 }, { "epoch": 0.5470056304384917, "grad_norm": 1.9807952671889613, "learning_rate": 4.48285383759983e-05, "loss": 0.2135, "step": 6412 }, { "epoch": 0.5470909401126087, "grad_norm": 1.7787000909426207, "learning_rate": 4.4814797396310055e-05, "loss": 0.2516, "step": 6413 }, { "epoch": 0.5471762497867259, "grad_norm": 1.6882666506438335, "learning_rate": 4.480105681248446e-05, "loss": 0.2693, "step": 6414 }, { "epoch": 0.5472615594608429, "grad_norm": 1.709645798344891, "learning_rate": 4.478731662557057e-05, "loss": 0.2933, "step": 6415 }, { "epoch": 0.5473468691349599, "grad_norm": 1.3683289041265496, "learning_rate": 4.477357683661734e-05, "loss": 0.2672, "step": 6416 }, { "epoch": 0.5474321788090769, "grad_norm": 1.55006526686555, "learning_rate": 4.475983744667374e-05, "loss": 0.2006, "step": 6417 }, { "epoch": 0.547517488483194, "grad_norm": 1.6823220107547405, "learning_rate": 4.4746098456788724e-05, "loss": 0.2672, "step": 6418 }, { "epoch": 0.547602798157311, "grad_norm": 1.3493194489487208, "learning_rate": 4.4732359868011155e-05, "loss": 0.2138, "step": 6419 }, { "epoch": 0.5476881078314281, "grad_norm": 1.3104862932509382, "learning_rate": 4.4718621681389915e-05, "loss": 0.2144, "step": 6420 }, { "epoch": 0.5477734175055451, "grad_norm": 1.8474736516619676, "learning_rate": 4.470488389797385e-05, "loss": 0.1759, "step": 6421 }, { "epoch": 0.5478587271796622, "grad_norm": 1.421427645891796, "learning_rate": 4.4691146518811775e-05, "loss": 0.246, "step": 6422 }, { "epoch": 0.5479440368537792, "grad_norm": 1.6404305479072634, "learning_rate": 4.467740954495244e-05, "loss": 0.1931, "step": 6423 }, { "epoch": 0.5480293465278963, "grad_norm": 1.184127586510917, "learning_rate": 4.46636729774446e-05, "loss": 0.1767, "step": 6424 }, { "epoch": 0.5481146562020133, "grad_norm": 1.3842893139087835, "learning_rate": 4.464993681733699e-05, "loss": 0.2617, "step": 6425 }, { "epoch": 0.5481999658761304, "grad_norm": 1.7072555429556047, "learning_rate": 4.463620106567825e-05, "loss": 0.2277, "step": 6426 }, { "epoch": 0.5482852755502474, "grad_norm": 1.5382781377928458, "learning_rate": 4.462246572351706e-05, "loss": 0.2768, "step": 6427 }, { "epoch": 0.5483705852243644, "grad_norm": 1.5917463984999154, "learning_rate": 4.460873079190205e-05, "loss": 0.2724, "step": 6428 }, { "epoch": 0.5484558948984815, "grad_norm": 1.498931186086459, "learning_rate": 4.459499627188178e-05, "loss": 0.2128, "step": 6429 }, { "epoch": 0.5485412045725986, "grad_norm": 1.244322806872699, "learning_rate": 4.458126216450482e-05, "loss": 0.2222, "step": 6430 }, { "epoch": 0.5486265142467156, "grad_norm": 1.8713850778103618, "learning_rate": 4.456752847081971e-05, "loss": 0.2379, "step": 6431 }, { "epoch": 0.5487118239208326, "grad_norm": 1.6481302707256567, "learning_rate": 4.4553795191874924e-05, "loss": 0.2547, "step": 6432 }, { "epoch": 0.5487971335949496, "grad_norm": 1.6118771259342652, "learning_rate": 4.4540062328718945e-05, "loss": 0.2149, "step": 6433 }, { "epoch": 0.5488824432690668, "grad_norm": 1.6529750474729978, "learning_rate": 4.452632988240019e-05, "loss": 0.2446, "step": 6434 }, { "epoch": 0.5489677529431838, "grad_norm": 1.5983601630660296, "learning_rate": 4.451259785396707e-05, "loss": 0.2347, "step": 6435 }, { "epoch": 0.5490530626173008, "grad_norm": 1.4383476080394246, "learning_rate": 4.449886624446792e-05, "loss": 0.2342, "step": 6436 }, { "epoch": 0.5491383722914178, "grad_norm": 1.7961445526199304, "learning_rate": 4.44851350549511e-05, "loss": 0.2861, "step": 6437 }, { "epoch": 0.5492236819655348, "grad_norm": 1.831408038822339, "learning_rate": 4.447140428646494e-05, "loss": 0.1988, "step": 6438 }, { "epoch": 0.549308991639652, "grad_norm": 1.9592958546887607, "learning_rate": 4.445767394005766e-05, "loss": 0.2891, "step": 6439 }, { "epoch": 0.549394301313769, "grad_norm": 1.6071292059182363, "learning_rate": 4.4443944016777524e-05, "loss": 0.1929, "step": 6440 }, { "epoch": 0.549479610987886, "grad_norm": 1.832221893607139, "learning_rate": 4.443021451767275e-05, "loss": 0.2362, "step": 6441 }, { "epoch": 0.549564920662003, "grad_norm": 1.516056038824771, "learning_rate": 4.441648544379149e-05, "loss": 0.1741, "step": 6442 }, { "epoch": 0.5496502303361201, "grad_norm": 1.5319326008146443, "learning_rate": 4.4402756796181894e-05, "loss": 0.2515, "step": 6443 }, { "epoch": 0.5497355400102372, "grad_norm": 1.592421108549411, "learning_rate": 4.438902857589209e-05, "loss": 0.238, "step": 6444 }, { "epoch": 0.5498208496843542, "grad_norm": 1.5311494132499939, "learning_rate": 4.437530078397013e-05, "loss": 0.2426, "step": 6445 }, { "epoch": 0.5499061593584712, "grad_norm": 1.6467144534993083, "learning_rate": 4.436157342146405e-05, "loss": 0.2329, "step": 6446 }, { "epoch": 0.5499914690325883, "grad_norm": 1.458740213944822, "learning_rate": 4.434784648942191e-05, "loss": 0.2109, "step": 6447 }, { "epoch": 0.5500767787067053, "grad_norm": 1.767570301691941, "learning_rate": 4.433411998889162e-05, "loss": 0.2951, "step": 6448 }, { "epoch": 0.5501620883808224, "grad_norm": 1.930806812186006, "learning_rate": 4.432039392092117e-05, "loss": 0.244, "step": 6449 }, { "epoch": 0.5502473980549394, "grad_norm": 1.378122166221975, "learning_rate": 4.4306668286558476e-05, "loss": 0.2652, "step": 6450 }, { "epoch": 0.5503327077290565, "grad_norm": 1.6440294093543526, "learning_rate": 4.429294308685139e-05, "loss": 0.163, "step": 6451 }, { "epoch": 0.5504180174031735, "grad_norm": 1.8561077744170635, "learning_rate": 4.4279218322847764e-05, "loss": 0.2909, "step": 6452 }, { "epoch": 0.5505033270772905, "grad_norm": 1.9906828939262355, "learning_rate": 4.4265493995595424e-05, "loss": 0.2275, "step": 6453 }, { "epoch": 0.5505886367514076, "grad_norm": 1.6591203397180867, "learning_rate": 4.4251770106142166e-05, "loss": 0.2396, "step": 6454 }, { "epoch": 0.5506739464255247, "grad_norm": 1.2664764742528047, "learning_rate": 4.42380466555357e-05, "loss": 0.2538, "step": 6455 }, { "epoch": 0.5507592560996417, "grad_norm": 1.4851841377637944, "learning_rate": 4.422432364482375e-05, "loss": 0.195, "step": 6456 }, { "epoch": 0.5508445657737587, "grad_norm": 1.3442972532639708, "learning_rate": 4.421060107505401e-05, "loss": 0.2162, "step": 6457 }, { "epoch": 0.5509298754478757, "grad_norm": 1.598705068736823, "learning_rate": 4.41968789472741e-05, "loss": 0.2703, "step": 6458 }, { "epoch": 0.5510151851219929, "grad_norm": 1.6287973491025087, "learning_rate": 4.418315726253164e-05, "loss": 0.2591, "step": 6459 }, { "epoch": 0.5511004947961099, "grad_norm": 1.7217854831363182, "learning_rate": 4.4169436021874236e-05, "loss": 0.2589, "step": 6460 }, { "epoch": 0.5511858044702269, "grad_norm": 1.4930641834492742, "learning_rate": 4.415571522634938e-05, "loss": 0.1741, "step": 6461 }, { "epoch": 0.5512711141443439, "grad_norm": 1.7241228223007123, "learning_rate": 4.4141994877004614e-05, "loss": 0.2301, "step": 6462 }, { "epoch": 0.5513564238184611, "grad_norm": 1.824035504631355, "learning_rate": 4.412827497488744e-05, "loss": 0.2644, "step": 6463 }, { "epoch": 0.5514417334925781, "grad_norm": 1.4820509535747073, "learning_rate": 4.411455552104524e-05, "loss": 0.1826, "step": 6464 }, { "epoch": 0.5515270431666951, "grad_norm": 1.7170641173586756, "learning_rate": 4.4100836516525456e-05, "loss": 0.2891, "step": 6465 }, { "epoch": 0.5516123528408121, "grad_norm": 1.6732859851906587, "learning_rate": 4.408711796237545e-05, "loss": 0.2246, "step": 6466 }, { "epoch": 0.5516976625149292, "grad_norm": 1.5828604326587778, "learning_rate": 4.407339985964259e-05, "loss": 0.2292, "step": 6467 }, { "epoch": 0.5517829721890463, "grad_norm": 1.5350256323149831, "learning_rate": 4.4059682209374136e-05, "loss": 0.1987, "step": 6468 }, { "epoch": 0.5518682818631633, "grad_norm": 1.5990046474778112, "learning_rate": 4.404596501261737e-05, "loss": 0.2047, "step": 6469 }, { "epoch": 0.5519535915372803, "grad_norm": 1.5062644008090522, "learning_rate": 4.403224827041957e-05, "loss": 0.2387, "step": 6470 }, { "epoch": 0.5520389012113974, "grad_norm": 1.5780083641536464, "learning_rate": 4.401853198382788e-05, "loss": 0.2687, "step": 6471 }, { "epoch": 0.5521242108855144, "grad_norm": 1.8730867797292154, "learning_rate": 4.400481615388948e-05, "loss": 0.2964, "step": 6472 }, { "epoch": 0.5522095205596315, "grad_norm": 1.7258313732544388, "learning_rate": 4.399110078165153e-05, "loss": 0.273, "step": 6473 }, { "epoch": 0.5522948302337485, "grad_norm": 1.794211827282804, "learning_rate": 4.397738586816108e-05, "loss": 0.2476, "step": 6474 }, { "epoch": 0.5523801399078655, "grad_norm": 1.4167683882751, "learning_rate": 4.3963671414465216e-05, "loss": 0.2432, "step": 6475 }, { "epoch": 0.5524654495819826, "grad_norm": 1.6309317384301223, "learning_rate": 4.3949957421610995e-05, "loss": 0.229, "step": 6476 }, { "epoch": 0.5525507592560996, "grad_norm": 1.5164910427876666, "learning_rate": 4.393624389064535e-05, "loss": 0.2118, "step": 6477 }, { "epoch": 0.5526360689302167, "grad_norm": 1.754709174683864, "learning_rate": 4.392253082261526e-05, "loss": 0.2746, "step": 6478 }, { "epoch": 0.5527213786043337, "grad_norm": 1.4863040636398346, "learning_rate": 4.390881821856767e-05, "loss": 0.2486, "step": 6479 }, { "epoch": 0.5528066882784508, "grad_norm": 1.5870077346151763, "learning_rate": 4.3895106079549407e-05, "loss": 0.2427, "step": 6480 }, { "epoch": 0.5528919979525678, "grad_norm": 1.5663520632382562, "learning_rate": 4.388139440660736e-05, "loss": 0.2406, "step": 6481 }, { "epoch": 0.5529773076266848, "grad_norm": 1.566096401248512, "learning_rate": 4.3867683200788334e-05, "loss": 0.2265, "step": 6482 }, { "epoch": 0.5530626173008019, "grad_norm": 1.7113344611647283, "learning_rate": 4.385397246313913e-05, "loss": 0.2191, "step": 6483 }, { "epoch": 0.553147926974919, "grad_norm": 1.6166200978496723, "learning_rate": 4.384026219470645e-05, "loss": 0.1826, "step": 6484 }, { "epoch": 0.553233236649036, "grad_norm": 1.9530638418159147, "learning_rate": 4.382655239653702e-05, "loss": 0.2343, "step": 6485 }, { "epoch": 0.553318546323153, "grad_norm": 1.4000462916077863, "learning_rate": 4.3812843069677526e-05, "loss": 0.2225, "step": 6486 }, { "epoch": 0.55340385599727, "grad_norm": 1.6036269117946529, "learning_rate": 4.379913421517458e-05, "loss": 0.2678, "step": 6487 }, { "epoch": 0.5534891656713872, "grad_norm": 1.6772490077985966, "learning_rate": 4.3785425834074764e-05, "loss": 0.2424, "step": 6488 }, { "epoch": 0.5535744753455042, "grad_norm": 1.6677999299354598, "learning_rate": 4.377171792742469e-05, "loss": 0.2615, "step": 6489 }, { "epoch": 0.5536597850196212, "grad_norm": 1.3811355163632337, "learning_rate": 4.375801049627083e-05, "loss": 0.2529, "step": 6490 }, { "epoch": 0.5537450946937382, "grad_norm": 2.0560388798729745, "learning_rate": 4.37443035416597e-05, "loss": 0.273, "step": 6491 }, { "epoch": 0.5538304043678554, "grad_norm": 1.454810037174165, "learning_rate": 4.373059706463778e-05, "loss": 0.2094, "step": 6492 }, { "epoch": 0.5539157140419724, "grad_norm": 1.5253947669448007, "learning_rate": 4.371689106625143e-05, "loss": 0.2046, "step": 6493 }, { "epoch": 0.5540010237160894, "grad_norm": 1.5771003644956725, "learning_rate": 4.370318554754706e-05, "loss": 0.243, "step": 6494 }, { "epoch": 0.5540863333902064, "grad_norm": 1.942237967512923, "learning_rate": 4.368948050957104e-05, "loss": 0.2966, "step": 6495 }, { "epoch": 0.5541716430643235, "grad_norm": 1.4591314402535926, "learning_rate": 4.367577595336961e-05, "loss": 0.2062, "step": 6496 }, { "epoch": 0.5542569527384406, "grad_norm": 1.8242062684638911, "learning_rate": 4.3662071879989106e-05, "loss": 0.2886, "step": 6497 }, { "epoch": 0.5543422624125576, "grad_norm": 1.615214152826481, "learning_rate": 4.364836829047572e-05, "loss": 0.2348, "step": 6498 }, { "epoch": 0.5544275720866746, "grad_norm": 1.7034862730090672, "learning_rate": 4.363466518587568e-05, "loss": 0.2181, "step": 6499 }, { "epoch": 0.5545128817607917, "grad_norm": 1.4745980571792492, "learning_rate": 4.362096256723511e-05, "loss": 0.3018, "step": 6500 }, { "epoch": 0.5545981914349087, "grad_norm": 1.8497291211979827, "learning_rate": 4.360726043560015e-05, "loss": 0.245, "step": 6501 }, { "epoch": 0.5546835011090258, "grad_norm": 1.5128600078294736, "learning_rate": 4.359355879201691e-05, "loss": 0.1686, "step": 6502 }, { "epoch": 0.5547688107831428, "grad_norm": 1.4899489303131856, "learning_rate": 4.3579857637531384e-05, "loss": 0.237, "step": 6503 }, { "epoch": 0.5548541204572599, "grad_norm": 1.4996754772088288, "learning_rate": 4.356615697318962e-05, "loss": 0.2533, "step": 6504 }, { "epoch": 0.5549394301313769, "grad_norm": 1.3643888330042313, "learning_rate": 4.355245680003759e-05, "loss": 0.1919, "step": 6505 }, { "epoch": 0.5550247398054939, "grad_norm": 1.9141063451610878, "learning_rate": 4.3538757119121204e-05, "loss": 0.2312, "step": 6506 }, { "epoch": 0.555110049479611, "grad_norm": 1.6664194241560526, "learning_rate": 4.352505793148639e-05, "loss": 0.2505, "step": 6507 }, { "epoch": 0.5551953591537281, "grad_norm": 1.5022384313048098, "learning_rate": 4.3511359238178996e-05, "loss": 0.2721, "step": 6508 }, { "epoch": 0.5552806688278451, "grad_norm": 1.894022639323581, "learning_rate": 4.349766104024484e-05, "loss": 0.3145, "step": 6509 }, { "epoch": 0.5553659785019621, "grad_norm": 1.4146600798891866, "learning_rate": 4.348396333872971e-05, "loss": 0.2204, "step": 6510 }, { "epoch": 0.5554512881760791, "grad_norm": 1.367456638548344, "learning_rate": 4.347026613467934e-05, "loss": 0.2196, "step": 6511 }, { "epoch": 0.5555365978501963, "grad_norm": 1.5560212668115354, "learning_rate": 4.345656942913947e-05, "loss": 0.276, "step": 6512 }, { "epoch": 0.5556219075243133, "grad_norm": 1.4547705408440084, "learning_rate": 4.3442873223155746e-05, "loss": 0.181, "step": 6513 }, { "epoch": 0.5557072171984303, "grad_norm": 1.5840459357739112, "learning_rate": 4.34291775177738e-05, "loss": 0.216, "step": 6514 }, { "epoch": 0.5557925268725473, "grad_norm": 1.9306672732491006, "learning_rate": 4.341548231403925e-05, "loss": 0.3353, "step": 6515 }, { "epoch": 0.5558778365466643, "grad_norm": 1.4981915380464783, "learning_rate": 4.340178761299762e-05, "loss": 0.2667, "step": 6516 }, { "epoch": 0.5559631462207815, "grad_norm": 1.4963628063844305, "learning_rate": 4.338809341569444e-05, "loss": 0.2572, "step": 6517 }, { "epoch": 0.5560484558948985, "grad_norm": 1.5240165979177391, "learning_rate": 4.3374399723175216e-05, "loss": 0.2273, "step": 6518 }, { "epoch": 0.5561337655690155, "grad_norm": 1.328586273872187, "learning_rate": 4.336070653648535e-05, "loss": 0.2418, "step": 6519 }, { "epoch": 0.5562190752431325, "grad_norm": 2.0690797986608245, "learning_rate": 4.334701385667026e-05, "loss": 0.2756, "step": 6520 }, { "epoch": 0.5563043849172497, "grad_norm": 1.8275223741322846, "learning_rate": 4.3333321684775314e-05, "loss": 0.2481, "step": 6521 }, { "epoch": 0.5563896945913667, "grad_norm": 1.5992186724761064, "learning_rate": 4.331963002184581e-05, "loss": 0.2484, "step": 6522 }, { "epoch": 0.5564750042654837, "grad_norm": 1.3997764783624673, "learning_rate": 4.330593886892707e-05, "loss": 0.1928, "step": 6523 }, { "epoch": 0.5565603139396007, "grad_norm": 1.4967801981871556, "learning_rate": 4.329224822706433e-05, "loss": 0.2174, "step": 6524 }, { "epoch": 0.5566456236137178, "grad_norm": 1.5475646514205919, "learning_rate": 4.327855809730278e-05, "loss": 0.2425, "step": 6525 }, { "epoch": 0.5567309332878349, "grad_norm": 1.613089266015076, "learning_rate": 4.32648684806876e-05, "loss": 0.2417, "step": 6526 }, { "epoch": 0.5568162429619519, "grad_norm": 1.9223822567802356, "learning_rate": 4.325117937826392e-05, "loss": 0.2586, "step": 6527 }, { "epoch": 0.5569015526360689, "grad_norm": 1.924041767166648, "learning_rate": 4.323749079107685e-05, "loss": 0.2406, "step": 6528 }, { "epoch": 0.556986862310186, "grad_norm": 1.405388377395284, "learning_rate": 4.3223802720171417e-05, "loss": 0.2182, "step": 6529 }, { "epoch": 0.557072171984303, "grad_norm": 1.4223348691324245, "learning_rate": 4.321011516659263e-05, "loss": 0.2547, "step": 6530 }, { "epoch": 0.55715748165842, "grad_norm": 1.9687902052876338, "learning_rate": 4.319642813138548e-05, "loss": 0.275, "step": 6531 }, { "epoch": 0.5572427913325371, "grad_norm": 1.2855532882191996, "learning_rate": 4.318274161559487e-05, "loss": 0.2383, "step": 6532 }, { "epoch": 0.5573281010066542, "grad_norm": 1.4385478305707613, "learning_rate": 4.316905562026571e-05, "loss": 0.1975, "step": 6533 }, { "epoch": 0.5574134106807712, "grad_norm": 1.657946239833076, "learning_rate": 4.315537014644288e-05, "loss": 0.2165, "step": 6534 }, { "epoch": 0.5574987203548882, "grad_norm": 1.6466316347346888, "learning_rate": 4.3141685195171136e-05, "loss": 0.1979, "step": 6535 }, { "epoch": 0.5575840300290053, "grad_norm": 1.729095396235334, "learning_rate": 4.312800076749529e-05, "loss": 0.2523, "step": 6536 }, { "epoch": 0.5576693397031224, "grad_norm": 1.7152120068802406, "learning_rate": 4.311431686446009e-05, "loss": 0.2761, "step": 6537 }, { "epoch": 0.5577546493772394, "grad_norm": 1.5649599928205924, "learning_rate": 4.310063348711018e-05, "loss": 0.2385, "step": 6538 }, { "epoch": 0.5578399590513564, "grad_norm": 1.8193536287636225, "learning_rate": 4.3086950636490256e-05, "loss": 0.2126, "step": 6539 }, { "epoch": 0.5579252687254734, "grad_norm": 1.793043479941151, "learning_rate": 4.3073268313644915e-05, "loss": 0.2948, "step": 6540 }, { "epoch": 0.5580105783995906, "grad_norm": 1.336434835027676, "learning_rate": 4.305958651961873e-05, "loss": 0.2522, "step": 6541 }, { "epoch": 0.5580958880737076, "grad_norm": 1.523388671092473, "learning_rate": 4.304590525545622e-05, "loss": 0.2081, "step": 6542 }, { "epoch": 0.5581811977478246, "grad_norm": 1.883036873138863, "learning_rate": 4.303222452220189e-05, "loss": 0.288, "step": 6543 }, { "epoch": 0.5582665074219416, "grad_norm": 1.3017664205805974, "learning_rate": 4.301854432090021e-05, "loss": 0.2235, "step": 6544 }, { "epoch": 0.5583518170960587, "grad_norm": 1.6743095711529339, "learning_rate": 4.300486465259555e-05, "loss": 0.3225, "step": 6545 }, { "epoch": 0.5584371267701758, "grad_norm": 1.7524651467072274, "learning_rate": 4.299118551833231e-05, "loss": 0.2282, "step": 6546 }, { "epoch": 0.5585224364442928, "grad_norm": 1.6314690278433899, "learning_rate": 4.297750691915482e-05, "loss": 0.204, "step": 6547 }, { "epoch": 0.5586077461184098, "grad_norm": 2.240907948609908, "learning_rate": 4.296382885610735e-05, "loss": 0.2059, "step": 6548 }, { "epoch": 0.5586930557925269, "grad_norm": 1.3983578792474296, "learning_rate": 4.2950151330234145e-05, "loss": 0.2147, "step": 6549 }, { "epoch": 0.558778365466644, "grad_norm": 1.285259532546595, "learning_rate": 4.2936474342579453e-05, "loss": 0.2209, "step": 6550 }, { "epoch": 0.558863675140761, "grad_norm": 1.3162093199179248, "learning_rate": 4.2922797894187394e-05, "loss": 0.2654, "step": 6551 }, { "epoch": 0.558948984814878, "grad_norm": 1.5144614246250203, "learning_rate": 4.29091219861021e-05, "loss": 0.1777, "step": 6552 }, { "epoch": 0.559034294488995, "grad_norm": 1.7758117109271512, "learning_rate": 4.2895446619367684e-05, "loss": 0.2575, "step": 6553 }, { "epoch": 0.5591196041631121, "grad_norm": 2.025618155049144, "learning_rate": 4.288177179502814e-05, "loss": 0.202, "step": 6554 }, { "epoch": 0.5592049138372291, "grad_norm": 1.5904337470110448, "learning_rate": 4.286809751412749e-05, "loss": 0.2253, "step": 6555 }, { "epoch": 0.5592902235113462, "grad_norm": 1.7688734166521904, "learning_rate": 4.285442377770971e-05, "loss": 0.2689, "step": 6556 }, { "epoch": 0.5593755331854632, "grad_norm": 1.63915935626563, "learning_rate": 4.2840750586818715e-05, "loss": 0.1693, "step": 6557 }, { "epoch": 0.5594608428595803, "grad_norm": 1.3075719687383085, "learning_rate": 4.2827077942498343e-05, "loss": 0.2462, "step": 6558 }, { "epoch": 0.5595461525336973, "grad_norm": 1.6719558553663136, "learning_rate": 4.281340584579246e-05, "loss": 0.2445, "step": 6559 }, { "epoch": 0.5596314622078143, "grad_norm": 1.4364799529235928, "learning_rate": 4.2799734297744864e-05, "loss": 0.2783, "step": 6560 }, { "epoch": 0.5597167718819314, "grad_norm": 1.8270007858274677, "learning_rate": 4.278606329939929e-05, "loss": 0.2346, "step": 6561 }, { "epoch": 0.5598020815560485, "grad_norm": 2.196804313553467, "learning_rate": 4.2772392851799434e-05, "loss": 0.277, "step": 6562 }, { "epoch": 0.5598873912301655, "grad_norm": 1.7058755903999814, "learning_rate": 4.2758722955989e-05, "loss": 0.3142, "step": 6563 }, { "epoch": 0.5599727009042825, "grad_norm": 1.721214677369279, "learning_rate": 4.2745053613011564e-05, "loss": 0.2184, "step": 6564 }, { "epoch": 0.5600580105783995, "grad_norm": 1.9020727696983697, "learning_rate": 4.2731384823910735e-05, "loss": 0.2429, "step": 6565 }, { "epoch": 0.5601433202525167, "grad_norm": 1.710021030452861, "learning_rate": 4.271771658973007e-05, "loss": 0.2691, "step": 6566 }, { "epoch": 0.5602286299266337, "grad_norm": 1.8730767877157208, "learning_rate": 4.270404891151302e-05, "loss": 0.2575, "step": 6567 }, { "epoch": 0.5603139396007507, "grad_norm": 1.5748684207510268, "learning_rate": 4.2690381790303066e-05, "loss": 0.2652, "step": 6568 }, { "epoch": 0.5603992492748677, "grad_norm": 1.958786594070947, "learning_rate": 4.267671522714365e-05, "loss": 0.2633, "step": 6569 }, { "epoch": 0.5604845589489849, "grad_norm": 1.631426501316936, "learning_rate": 4.2663049223078075e-05, "loss": 0.2155, "step": 6570 }, { "epoch": 0.5605698686231019, "grad_norm": 1.4280764147885154, "learning_rate": 4.264938377914973e-05, "loss": 0.2772, "step": 6571 }, { "epoch": 0.5606551782972189, "grad_norm": 1.466964773024284, "learning_rate": 4.263571889640184e-05, "loss": 0.2238, "step": 6572 }, { "epoch": 0.5607404879713359, "grad_norm": 1.309594708129407, "learning_rate": 4.262205457587772e-05, "loss": 0.2033, "step": 6573 }, { "epoch": 0.560825797645453, "grad_norm": 1.3939448387227504, "learning_rate": 4.26083908186205e-05, "loss": 0.2555, "step": 6574 }, { "epoch": 0.5609111073195701, "grad_norm": 1.4768640926699292, "learning_rate": 4.2594727625673356e-05, "loss": 0.1966, "step": 6575 }, { "epoch": 0.5609964169936871, "grad_norm": 1.7647437538028867, "learning_rate": 4.258106499807943e-05, "loss": 0.2906, "step": 6576 }, { "epoch": 0.5610817266678041, "grad_norm": 1.6598995952804958, "learning_rate": 4.256740293688175e-05, "loss": 0.1967, "step": 6577 }, { "epoch": 0.5611670363419212, "grad_norm": 1.7780819879300818, "learning_rate": 4.255374144312335e-05, "loss": 0.2098, "step": 6578 }, { "epoch": 0.5612523460160382, "grad_norm": 1.8010049213509514, "learning_rate": 4.2540080517847255e-05, "loss": 0.2471, "step": 6579 }, { "epoch": 0.5613376556901553, "grad_norm": 1.8643070492204854, "learning_rate": 4.2526420162096344e-05, "loss": 0.2519, "step": 6580 }, { "epoch": 0.5614229653642723, "grad_norm": 1.591191425908551, "learning_rate": 4.251276037691355e-05, "loss": 0.2422, "step": 6581 }, { "epoch": 0.5615082750383894, "grad_norm": 1.620600834558567, "learning_rate": 4.249910116334171e-05, "loss": 0.1633, "step": 6582 }, { "epoch": 0.5615935847125064, "grad_norm": 2.136684605745816, "learning_rate": 4.2485442522423636e-05, "loss": 0.2485, "step": 6583 }, { "epoch": 0.5616788943866234, "grad_norm": 1.9860327096802, "learning_rate": 4.247178445520209e-05, "loss": 0.2624, "step": 6584 }, { "epoch": 0.5617642040607405, "grad_norm": 1.911135868933587, "learning_rate": 4.245812696271981e-05, "loss": 0.2113, "step": 6585 }, { "epoch": 0.5618495137348576, "grad_norm": 1.4476807033822452, "learning_rate": 4.2444470046019444e-05, "loss": 0.2345, "step": 6586 }, { "epoch": 0.5619348234089746, "grad_norm": 1.9050424221651925, "learning_rate": 4.2430813706143636e-05, "loss": 0.2596, "step": 6587 }, { "epoch": 0.5620201330830916, "grad_norm": 1.678368835919744, "learning_rate": 4.2417157944134975e-05, "loss": 0.2141, "step": 6588 }, { "epoch": 0.5621054427572086, "grad_norm": 1.5524024464645707, "learning_rate": 4.240350276103604e-05, "loss": 0.1908, "step": 6589 }, { "epoch": 0.5621907524313257, "grad_norm": 1.4243598437036438, "learning_rate": 4.2389848157889276e-05, "loss": 0.1956, "step": 6590 }, { "epoch": 0.5622760621054428, "grad_norm": 1.314815930294903, "learning_rate": 4.2376194135737165e-05, "loss": 0.2629, "step": 6591 }, { "epoch": 0.5623613717795598, "grad_norm": 1.7280500399178853, "learning_rate": 4.236254069562213e-05, "loss": 0.1994, "step": 6592 }, { "epoch": 0.5624466814536768, "grad_norm": 1.8638801172122395, "learning_rate": 4.234888783858653e-05, "loss": 0.2279, "step": 6593 }, { "epoch": 0.5625319911277938, "grad_norm": 1.7340163126335484, "learning_rate": 4.233523556567267e-05, "loss": 0.182, "step": 6594 }, { "epoch": 0.562617300801911, "grad_norm": 2.0344356498525857, "learning_rate": 4.232158387792287e-05, "loss": 0.298, "step": 6595 }, { "epoch": 0.562702610476028, "grad_norm": 1.5330984675167785, "learning_rate": 4.230793277637931e-05, "loss": 0.2722, "step": 6596 }, { "epoch": 0.562787920150145, "grad_norm": 1.4347775428416165, "learning_rate": 4.2294282262084215e-05, "loss": 0.1801, "step": 6597 }, { "epoch": 0.562873229824262, "grad_norm": 2.0618060707487658, "learning_rate": 4.228063233607974e-05, "loss": 0.3041, "step": 6598 }, { "epoch": 0.5629585394983792, "grad_norm": 1.6079286168229283, "learning_rate": 4.226698299940794e-05, "loss": 0.2045, "step": 6599 }, { "epoch": 0.5630438491724962, "grad_norm": 1.8413961870697808, "learning_rate": 4.225333425311089e-05, "loss": 0.2433, "step": 6600 }, { "epoch": 0.5631291588466132, "grad_norm": 2.2436739682887445, "learning_rate": 4.223968609823061e-05, "loss": 0.2562, "step": 6601 }, { "epoch": 0.5632144685207302, "grad_norm": 1.801913155989632, "learning_rate": 4.2226038535809084e-05, "loss": 0.1995, "step": 6602 }, { "epoch": 0.5632997781948473, "grad_norm": 1.659699285110179, "learning_rate": 4.2212391566888196e-05, "loss": 0.2187, "step": 6603 }, { "epoch": 0.5633850878689644, "grad_norm": 2.1472135629461504, "learning_rate": 4.219874519250981e-05, "loss": 0.276, "step": 6604 }, { "epoch": 0.5634703975430814, "grad_norm": 1.8449105958030454, "learning_rate": 4.2185099413715795e-05, "loss": 0.2685, "step": 6605 }, { "epoch": 0.5635557072171984, "grad_norm": 1.4184978748422596, "learning_rate": 4.217145423154789e-05, "loss": 0.2064, "step": 6606 }, { "epoch": 0.5636410168913155, "grad_norm": 1.591308968313664, "learning_rate": 4.2157809647047855e-05, "loss": 0.2301, "step": 6607 }, { "epoch": 0.5637263265654325, "grad_norm": 1.608177406118355, "learning_rate": 4.2144165661257405e-05, "loss": 0.2514, "step": 6608 }, { "epoch": 0.5638116362395496, "grad_norm": 1.5261417742324508, "learning_rate": 4.2130522275218134e-05, "loss": 0.2218, "step": 6609 }, { "epoch": 0.5638969459136666, "grad_norm": 1.4789949595047573, "learning_rate": 4.211687948997167e-05, "loss": 0.2032, "step": 6610 }, { "epoch": 0.5639822555877837, "grad_norm": 1.6309913079353515, "learning_rate": 4.210323730655959e-05, "loss": 0.2465, "step": 6611 }, { "epoch": 0.5640675652619007, "grad_norm": 1.60266373153477, "learning_rate": 4.208959572602336e-05, "loss": 0.25, "step": 6612 }, { "epoch": 0.5641528749360177, "grad_norm": 2.0399558496372507, "learning_rate": 4.207595474940446e-05, "loss": 0.2463, "step": 6613 }, { "epoch": 0.5642381846101348, "grad_norm": 1.6960664118773578, "learning_rate": 4.2062314377744315e-05, "loss": 0.2195, "step": 6614 }, { "epoch": 0.5643234942842519, "grad_norm": 1.4328794440851396, "learning_rate": 4.204867461208428e-05, "loss": 0.2522, "step": 6615 }, { "epoch": 0.5644088039583689, "grad_norm": 1.7323438418252348, "learning_rate": 4.2035035453465684e-05, "loss": 0.1914, "step": 6616 }, { "epoch": 0.5644941136324859, "grad_norm": 1.410159969589104, "learning_rate": 4.2021396902929796e-05, "loss": 0.2889, "step": 6617 }, { "epoch": 0.5645794233066029, "grad_norm": 1.5155306127987247, "learning_rate": 4.2007758961517886e-05, "loss": 0.2466, "step": 6618 }, { "epoch": 0.5646647329807201, "grad_norm": 1.6453469760974218, "learning_rate": 4.1994121630271086e-05, "loss": 0.2164, "step": 6619 }, { "epoch": 0.5647500426548371, "grad_norm": 1.7273792389148885, "learning_rate": 4.198048491023055e-05, "loss": 0.2412, "step": 6620 }, { "epoch": 0.5648353523289541, "grad_norm": 1.3577817151757767, "learning_rate": 4.19668488024374e-05, "loss": 0.1811, "step": 6621 }, { "epoch": 0.5649206620030711, "grad_norm": 1.460599727648663, "learning_rate": 4.195321330793264e-05, "loss": 0.2262, "step": 6622 }, { "epoch": 0.5650059716771882, "grad_norm": 2.217470794278974, "learning_rate": 4.1939578427757284e-05, "loss": 0.209, "step": 6623 }, { "epoch": 0.5650912813513053, "grad_norm": 1.3388394075345558, "learning_rate": 4.1925944162952285e-05, "loss": 0.1812, "step": 6624 }, { "epoch": 0.5651765910254223, "grad_norm": 1.7849358687647652, "learning_rate": 4.1912310514558545e-05, "loss": 0.2588, "step": 6625 }, { "epoch": 0.5652619006995393, "grad_norm": 1.4854313354338593, "learning_rate": 4.189867748361691e-05, "loss": 0.2273, "step": 6626 }, { "epoch": 0.5653472103736564, "grad_norm": 1.9509119542294875, "learning_rate": 4.1885045071168216e-05, "loss": 0.2011, "step": 6627 }, { "epoch": 0.5654325200477734, "grad_norm": 2.063139036189201, "learning_rate": 4.187141327825319e-05, "loss": 0.2572, "step": 6628 }, { "epoch": 0.5655178297218905, "grad_norm": 1.5805611267095945, "learning_rate": 4.185778210591257e-05, "loss": 0.1948, "step": 6629 }, { "epoch": 0.5656031393960075, "grad_norm": 1.4876114747407245, "learning_rate": 4.1844151555187035e-05, "loss": 0.2906, "step": 6630 }, { "epoch": 0.5656884490701245, "grad_norm": 1.441323792288082, "learning_rate": 4.183052162711716e-05, "loss": 0.2263, "step": 6631 }, { "epoch": 0.5657737587442416, "grad_norm": 1.2925217884521936, "learning_rate": 4.1816892322743555e-05, "loss": 0.1392, "step": 6632 }, { "epoch": 0.5658590684183586, "grad_norm": 1.2502438307413328, "learning_rate": 4.1803263643106735e-05, "loss": 0.2217, "step": 6633 }, { "epoch": 0.5659443780924757, "grad_norm": 2.0751768151749146, "learning_rate": 4.17896355892472e-05, "loss": 0.4398, "step": 6634 }, { "epoch": 0.5660296877665927, "grad_norm": 1.7830412040138561, "learning_rate": 4.177600816220535e-05, "loss": 0.2324, "step": 6635 }, { "epoch": 0.5661149974407098, "grad_norm": 1.7778514095131408, "learning_rate": 4.1762381363021557e-05, "loss": 0.2433, "step": 6636 }, { "epoch": 0.5662003071148268, "grad_norm": 1.4990765590139672, "learning_rate": 4.1748755192736194e-05, "loss": 0.1814, "step": 6637 }, { "epoch": 0.5662856167889438, "grad_norm": 1.307696088671073, "learning_rate": 4.173512965238951e-05, "loss": 0.2035, "step": 6638 }, { "epoch": 0.5663709264630609, "grad_norm": 1.557858248146981, "learning_rate": 4.172150474302175e-05, "loss": 0.2287, "step": 6639 }, { "epoch": 0.566456236137178, "grad_norm": 2.457588751151746, "learning_rate": 4.170788046567314e-05, "loss": 0.3438, "step": 6640 }, { "epoch": 0.566541545811295, "grad_norm": 1.878373607868276, "learning_rate": 4.1694256821383764e-05, "loss": 0.3037, "step": 6641 }, { "epoch": 0.566626855485412, "grad_norm": 1.675778048852384, "learning_rate": 4.168063381119375e-05, "loss": 0.2579, "step": 6642 }, { "epoch": 0.566712165159529, "grad_norm": 1.4861803821014832, "learning_rate": 4.166701143614315e-05, "loss": 0.2415, "step": 6643 }, { "epoch": 0.5667974748336462, "grad_norm": 1.4904076311781198, "learning_rate": 4.1653389697271925e-05, "loss": 0.2386, "step": 6644 }, { "epoch": 0.5668827845077632, "grad_norm": 1.5475410029536387, "learning_rate": 4.1639768595620056e-05, "loss": 0.2127, "step": 6645 }, { "epoch": 0.5669680941818802, "grad_norm": 1.3562211029208824, "learning_rate": 4.162614813222743e-05, "loss": 0.2648, "step": 6646 }, { "epoch": 0.5670534038559972, "grad_norm": 1.308790778514908, "learning_rate": 4.1612528308133895e-05, "loss": 0.2298, "step": 6647 }, { "epoch": 0.5671387135301144, "grad_norm": 1.2879699701101883, "learning_rate": 4.1598909124379237e-05, "loss": 0.2574, "step": 6648 }, { "epoch": 0.5672240232042314, "grad_norm": 1.6166481187418438, "learning_rate": 4.1585290582003225e-05, "loss": 0.215, "step": 6649 }, { "epoch": 0.5673093328783484, "grad_norm": 1.4679166986597372, "learning_rate": 4.157167268204559e-05, "loss": 0.2321, "step": 6650 }, { "epoch": 0.5673946425524654, "grad_norm": 1.6072346797325099, "learning_rate": 4.1558055425545925e-05, "loss": 0.2495, "step": 6651 }, { "epoch": 0.5674799522265825, "grad_norm": 1.8409710647820476, "learning_rate": 4.154443881354388e-05, "loss": 0.2533, "step": 6652 }, { "epoch": 0.5675652619006996, "grad_norm": 1.480759161896823, "learning_rate": 4.153082284707902e-05, "loss": 0.215, "step": 6653 }, { "epoch": 0.5676505715748166, "grad_norm": 1.572369495064697, "learning_rate": 4.15172075271908e-05, "loss": 0.2586, "step": 6654 }, { "epoch": 0.5677358812489336, "grad_norm": 1.5291623267061076, "learning_rate": 4.1503592854918714e-05, "loss": 0.175, "step": 6655 }, { "epoch": 0.5678211909230507, "grad_norm": 1.9971741740260123, "learning_rate": 4.148997883130218e-05, "loss": 0.2793, "step": 6656 }, { "epoch": 0.5679065005971677, "grad_norm": 1.6046388434858736, "learning_rate": 4.147636545738053e-05, "loss": 0.2008, "step": 6657 }, { "epoch": 0.5679918102712848, "grad_norm": 1.4509703546304726, "learning_rate": 4.146275273419307e-05, "loss": 0.1956, "step": 6658 }, { "epoch": 0.5680771199454018, "grad_norm": 1.38621185064494, "learning_rate": 4.14491406627791e-05, "loss": 0.2296, "step": 6659 }, { "epoch": 0.5681624296195189, "grad_norm": 1.8214516411183188, "learning_rate": 4.143552924417777e-05, "loss": 0.161, "step": 6660 }, { "epoch": 0.5682477392936359, "grad_norm": 1.936875150273709, "learning_rate": 4.1421918479428285e-05, "loss": 0.2956, "step": 6661 }, { "epoch": 0.5683330489677529, "grad_norm": 1.720985346153774, "learning_rate": 4.1408308369569734e-05, "loss": 0.2206, "step": 6662 }, { "epoch": 0.56841835864187, "grad_norm": 1.9192549671637023, "learning_rate": 4.13946989156412e-05, "loss": 0.2699, "step": 6663 }, { "epoch": 0.5685036683159871, "grad_norm": 1.6278080520085776, "learning_rate": 4.138109011868165e-05, "loss": 0.2729, "step": 6664 }, { "epoch": 0.5685889779901041, "grad_norm": 1.4604244045137833, "learning_rate": 4.1367481979730086e-05, "loss": 0.1972, "step": 6665 }, { "epoch": 0.5686742876642211, "grad_norm": 1.8348529615400826, "learning_rate": 4.135387449982541e-05, "loss": 0.2117, "step": 6666 }, { "epoch": 0.5687595973383381, "grad_norm": 1.7252313652231706, "learning_rate": 4.134026768000646e-05, "loss": 0.2401, "step": 6667 }, { "epoch": 0.5688449070124552, "grad_norm": 1.582166877866853, "learning_rate": 4.132666152131204e-05, "loss": 0.3159, "step": 6668 }, { "epoch": 0.5689302166865723, "grad_norm": 1.3378241083915297, "learning_rate": 4.131305602478095e-05, "loss": 0.1981, "step": 6669 }, { "epoch": 0.5690155263606893, "grad_norm": 1.5601287396201773, "learning_rate": 4.129945119145184e-05, "loss": 0.1995, "step": 6670 }, { "epoch": 0.5691008360348063, "grad_norm": 1.4973297193001815, "learning_rate": 4.128584702236341e-05, "loss": 0.1857, "step": 6671 }, { "epoch": 0.5691861457089233, "grad_norm": 1.580166695167121, "learning_rate": 4.1272243518554274e-05, "loss": 0.2231, "step": 6672 }, { "epoch": 0.5692714553830405, "grad_norm": 2.0404791460006817, "learning_rate": 4.1258640681062934e-05, "loss": 0.2604, "step": 6673 }, { "epoch": 0.5693567650571575, "grad_norm": 1.8052554207141838, "learning_rate": 4.124503851092793e-05, "loss": 0.2648, "step": 6674 }, { "epoch": 0.5694420747312745, "grad_norm": 1.646124061627656, "learning_rate": 4.123143700918773e-05, "loss": 0.232, "step": 6675 }, { "epoch": 0.5695273844053915, "grad_norm": 1.6388947581960176, "learning_rate": 4.121783617688071e-05, "loss": 0.2433, "step": 6676 }, { "epoch": 0.5696126940795087, "grad_norm": 1.5959586005518103, "learning_rate": 4.120423601504523e-05, "loss": 0.2318, "step": 6677 }, { "epoch": 0.5696980037536257, "grad_norm": 1.5151361679976036, "learning_rate": 4.119063652471958e-05, "loss": 0.2213, "step": 6678 }, { "epoch": 0.5697833134277427, "grad_norm": 1.862125766202671, "learning_rate": 4.117703770694204e-05, "loss": 0.2299, "step": 6679 }, { "epoch": 0.5698686231018597, "grad_norm": 1.4390027028602521, "learning_rate": 4.1163439562750767e-05, "loss": 0.2234, "step": 6680 }, { "epoch": 0.5699539327759768, "grad_norm": 1.2648624901799923, "learning_rate": 4.114984209318392e-05, "loss": 0.1948, "step": 6681 }, { "epoch": 0.5700392424500939, "grad_norm": 1.6212959623034875, "learning_rate": 4.113624529927963e-05, "loss": 0.2629, "step": 6682 }, { "epoch": 0.5701245521242109, "grad_norm": 2.1427445881665848, "learning_rate": 4.112264918207588e-05, "loss": 0.2563, "step": 6683 }, { "epoch": 0.5702098617983279, "grad_norm": 2.1934666594629464, "learning_rate": 4.110905374261069e-05, "loss": 0.2642, "step": 6684 }, { "epoch": 0.570295171472445, "grad_norm": 1.648089186764265, "learning_rate": 4.109545898192203e-05, "loss": 0.2397, "step": 6685 }, { "epoch": 0.570380481146562, "grad_norm": 1.4700786992499841, "learning_rate": 4.1081864901047736e-05, "loss": 0.1947, "step": 6686 }, { "epoch": 0.570465790820679, "grad_norm": 1.6132479980588745, "learning_rate": 4.106827150102567e-05, "loss": 0.1979, "step": 6687 }, { "epoch": 0.5705511004947961, "grad_norm": 1.505178651440039, "learning_rate": 4.105467878289361e-05, "loss": 0.2215, "step": 6688 }, { "epoch": 0.5706364101689132, "grad_norm": 2.359894145919229, "learning_rate": 4.10410867476893e-05, "loss": 0.2743, "step": 6689 }, { "epoch": 0.5707217198430302, "grad_norm": 1.8526680136821325, "learning_rate": 4.102749539645039e-05, "loss": 0.2338, "step": 6690 }, { "epoch": 0.5708070295171472, "grad_norm": 1.4088009902404737, "learning_rate": 4.1013904730214556e-05, "loss": 0.1986, "step": 6691 }, { "epoch": 0.5708923391912643, "grad_norm": 1.4795497165485902, "learning_rate": 4.1000314750019316e-05, "loss": 0.2502, "step": 6692 }, { "epoch": 0.5709776488653814, "grad_norm": 1.9634545574209266, "learning_rate": 4.0986725456902216e-05, "loss": 0.1892, "step": 6693 }, { "epoch": 0.5710629585394984, "grad_norm": 1.5921004491486845, "learning_rate": 4.097313685190074e-05, "loss": 0.2475, "step": 6694 }, { "epoch": 0.5711482682136154, "grad_norm": 1.9347870814556383, "learning_rate": 4.095954893605232e-05, "loss": 0.2455, "step": 6695 }, { "epoch": 0.5712335778877324, "grad_norm": 1.7554955025889372, "learning_rate": 4.0945961710394265e-05, "loss": 0.2494, "step": 6696 }, { "epoch": 0.5713188875618496, "grad_norm": 1.5569129236626162, "learning_rate": 4.093237517596394e-05, "loss": 0.2032, "step": 6697 }, { "epoch": 0.5714041972359666, "grad_norm": 1.9568379015407724, "learning_rate": 4.0918789333798576e-05, "loss": 0.1965, "step": 6698 }, { "epoch": 0.5714895069100836, "grad_norm": 1.7524504535108827, "learning_rate": 4.09052041849354e-05, "loss": 0.3027, "step": 6699 }, { "epoch": 0.5715748165842006, "grad_norm": 1.638088772754119, "learning_rate": 4.089161973041153e-05, "loss": 0.1774, "step": 6700 }, { "epoch": 0.5716601262583177, "grad_norm": 1.6970039051755033, "learning_rate": 4.0878035971264125e-05, "loss": 0.2291, "step": 6701 }, { "epoch": 0.5717454359324348, "grad_norm": 1.7172736329404397, "learning_rate": 4.086445290853018e-05, "loss": 0.2439, "step": 6702 }, { "epoch": 0.5718307456065518, "grad_norm": 1.7397452029756701, "learning_rate": 4.08508705432467e-05, "loss": 0.2682, "step": 6703 }, { "epoch": 0.5719160552806688, "grad_norm": 1.4457055459547246, "learning_rate": 4.083728887645066e-05, "loss": 0.2134, "step": 6704 }, { "epoch": 0.5720013649547858, "grad_norm": 1.4757551781616665, "learning_rate": 4.082370790917891e-05, "loss": 0.2265, "step": 6705 }, { "epoch": 0.572086674628903, "grad_norm": 1.9900851058013875, "learning_rate": 4.081012764246829e-05, "loss": 0.1931, "step": 6706 }, { "epoch": 0.57217198430302, "grad_norm": 1.400217979355169, "learning_rate": 4.079654807735559e-05, "loss": 0.2031, "step": 6707 }, { "epoch": 0.572257293977137, "grad_norm": 1.7720749784240208, "learning_rate": 4.078296921487756e-05, "loss": 0.3002, "step": 6708 }, { "epoch": 0.572342603651254, "grad_norm": 1.5960616755985506, "learning_rate": 4.076939105607084e-05, "loss": 0.1998, "step": 6709 }, { "epoch": 0.5724279133253711, "grad_norm": 1.9565672790829671, "learning_rate": 4.0755813601972054e-05, "loss": 0.3191, "step": 6710 }, { "epoch": 0.5725132229994881, "grad_norm": 2.140668214236476, "learning_rate": 4.074223685361779e-05, "loss": 0.2245, "step": 6711 }, { "epoch": 0.5725985326736052, "grad_norm": 1.3220862938941647, "learning_rate": 4.0728660812044536e-05, "loss": 0.2039, "step": 6712 }, { "epoch": 0.5726838423477222, "grad_norm": 2.065743064103295, "learning_rate": 4.071508547828875e-05, "loss": 0.1736, "step": 6713 }, { "epoch": 0.5727691520218393, "grad_norm": 1.38359207334153, "learning_rate": 4.070151085338688e-05, "loss": 0.2477, "step": 6714 }, { "epoch": 0.5728544616959563, "grad_norm": 1.2478580242424944, "learning_rate": 4.068793693837522e-05, "loss": 0.221, "step": 6715 }, { "epoch": 0.5729397713700733, "grad_norm": 2.1287651365215767, "learning_rate": 4.067436373429008e-05, "loss": 0.2873, "step": 6716 }, { "epoch": 0.5730250810441904, "grad_norm": 1.4166722952039017, "learning_rate": 4.066079124216775e-05, "loss": 0.2172, "step": 6717 }, { "epoch": 0.5731103907183075, "grad_norm": 1.9116250224456757, "learning_rate": 4.064721946304434e-05, "loss": 0.2527, "step": 6718 }, { "epoch": 0.5731957003924245, "grad_norm": 1.5102257268073023, "learning_rate": 4.063364839795605e-05, "loss": 0.2356, "step": 6719 }, { "epoch": 0.5732810100665415, "grad_norm": 1.8623731787598616, "learning_rate": 4.062007804793893e-05, "loss": 0.2218, "step": 6720 }, { "epoch": 0.5733663197406585, "grad_norm": 1.8154333647084868, "learning_rate": 4.0606508414029e-05, "loss": 0.247, "step": 6721 }, { "epoch": 0.5734516294147757, "grad_norm": 1.5479530662203385, "learning_rate": 4.059293949726222e-05, "loss": 0.2413, "step": 6722 }, { "epoch": 0.5735369390888927, "grad_norm": 1.5991250447231726, "learning_rate": 4.057937129867453e-05, "loss": 0.203, "step": 6723 }, { "epoch": 0.5736222487630097, "grad_norm": 1.5881137292335143, "learning_rate": 4.05658038193018e-05, "loss": 0.2884, "step": 6724 }, { "epoch": 0.5737075584371267, "grad_norm": 1.607017345674307, "learning_rate": 4.0552237060179796e-05, "loss": 0.1972, "step": 6725 }, { "epoch": 0.5737928681112439, "grad_norm": 1.521494090449565, "learning_rate": 4.053867102234428e-05, "loss": 0.2102, "step": 6726 }, { "epoch": 0.5738781777853609, "grad_norm": 1.5134234612188122, "learning_rate": 4.052510570683098e-05, "loss": 0.2383, "step": 6727 }, { "epoch": 0.5739634874594779, "grad_norm": 1.4997400217463555, "learning_rate": 4.0511541114675487e-05, "loss": 0.2264, "step": 6728 }, { "epoch": 0.5740487971335949, "grad_norm": 1.6026179859688352, "learning_rate": 4.049797724691342e-05, "loss": 0.2394, "step": 6729 }, { "epoch": 0.574134106807712, "grad_norm": 1.6428361796587634, "learning_rate": 4.04844141045803e-05, "loss": 0.2097, "step": 6730 }, { "epoch": 0.5742194164818291, "grad_norm": 1.4995552039292677, "learning_rate": 4.047085168871159e-05, "loss": 0.2037, "step": 6731 }, { "epoch": 0.5743047261559461, "grad_norm": 1.6508455450821713, "learning_rate": 4.045729000034271e-05, "loss": 0.2694, "step": 6732 }, { "epoch": 0.5743900358300631, "grad_norm": 1.6221736390460992, "learning_rate": 4.0443729040509045e-05, "loss": 0.1993, "step": 6733 }, { "epoch": 0.5744753455041802, "grad_norm": 1.5568704279920902, "learning_rate": 4.043016881024587e-05, "loss": 0.2386, "step": 6734 }, { "epoch": 0.5745606551782972, "grad_norm": 1.7381258434305953, "learning_rate": 4.0416609310588455e-05, "loss": 0.2249, "step": 6735 }, { "epoch": 0.5746459648524143, "grad_norm": 1.8992604799130877, "learning_rate": 4.0403050542572005e-05, "loss": 0.2348, "step": 6736 }, { "epoch": 0.5747312745265313, "grad_norm": 1.545476107523433, "learning_rate": 4.0389492507231635e-05, "loss": 0.2355, "step": 6737 }, { "epoch": 0.5748165842006484, "grad_norm": 1.9715923802023265, "learning_rate": 4.037593520560244e-05, "loss": 0.3487, "step": 6738 }, { "epoch": 0.5749018938747654, "grad_norm": 1.6894666726335423, "learning_rate": 4.036237863871944e-05, "loss": 0.2952, "step": 6739 }, { "epoch": 0.5749872035488824, "grad_norm": 1.4593197438400374, "learning_rate": 4.0348822807617654e-05, "loss": 0.1783, "step": 6740 }, { "epoch": 0.5750725132229995, "grad_norm": 1.5596900850953692, "learning_rate": 4.0335267713331944e-05, "loss": 0.2588, "step": 6741 }, { "epoch": 0.5751578228971166, "grad_norm": 1.404554998579199, "learning_rate": 4.0321713356897176e-05, "loss": 0.2411, "step": 6742 }, { "epoch": 0.5752431325712336, "grad_norm": 1.372821338954217, "learning_rate": 4.0308159739348174e-05, "loss": 0.239, "step": 6743 }, { "epoch": 0.5753284422453506, "grad_norm": 2.1047970296542275, "learning_rate": 4.029460686171967e-05, "loss": 0.2622, "step": 6744 }, { "epoch": 0.5754137519194676, "grad_norm": 2.1247116341996666, "learning_rate": 4.028105472504634e-05, "loss": 0.2709, "step": 6745 }, { "epoch": 0.5754990615935847, "grad_norm": 1.7729467981858305, "learning_rate": 4.026750333036286e-05, "loss": 0.2662, "step": 6746 }, { "epoch": 0.5755843712677018, "grad_norm": 1.5440849033591832, "learning_rate": 4.025395267870376e-05, "loss": 0.1853, "step": 6747 }, { "epoch": 0.5756696809418188, "grad_norm": 1.3762169134613313, "learning_rate": 4.024040277110359e-05, "loss": 0.2291, "step": 6748 }, { "epoch": 0.5757549906159358, "grad_norm": 1.8539336911825837, "learning_rate": 4.022685360859683e-05, "loss": 0.1728, "step": 6749 }, { "epoch": 0.5758403002900528, "grad_norm": 1.6049584090408626, "learning_rate": 4.0213305192217834e-05, "loss": 0.2073, "step": 6750 }, { "epoch": 0.57592560996417, "grad_norm": 1.8401960614693254, "learning_rate": 4.019975752300099e-05, "loss": 0.2392, "step": 6751 }, { "epoch": 0.576010919638287, "grad_norm": 1.941942167860835, "learning_rate": 4.0186210601980575e-05, "loss": 0.2249, "step": 6752 }, { "epoch": 0.576096229312404, "grad_norm": 1.6010302892151207, "learning_rate": 4.0172664430190847e-05, "loss": 0.2517, "step": 6753 }, { "epoch": 0.576181538986521, "grad_norm": 1.4075128606882885, "learning_rate": 4.0159119008665945e-05, "loss": 0.2006, "step": 6754 }, { "epoch": 0.5762668486606382, "grad_norm": 2.3130848119337273, "learning_rate": 4.0145574338440016e-05, "loss": 0.3146, "step": 6755 }, { "epoch": 0.5763521583347552, "grad_norm": 1.5406901471521977, "learning_rate": 4.0132030420547145e-05, "loss": 0.1855, "step": 6756 }, { "epoch": 0.5764374680088722, "grad_norm": 1.9924820027800973, "learning_rate": 4.011848725602129e-05, "loss": 0.1914, "step": 6757 }, { "epoch": 0.5765227776829892, "grad_norm": 1.7331109466698307, "learning_rate": 4.010494484589642e-05, "loss": 0.2164, "step": 6758 }, { "epoch": 0.5766080873571063, "grad_norm": 1.3969024245879094, "learning_rate": 4.009140319120645e-05, "loss": 0.22, "step": 6759 }, { "epoch": 0.5766933970312234, "grad_norm": 1.8881136395675502, "learning_rate": 4.007786229298517e-05, "loss": 0.2937, "step": 6760 }, { "epoch": 0.5767787067053404, "grad_norm": 1.5199752198257304, "learning_rate": 4.0064322152266385e-05, "loss": 0.2627, "step": 6761 }, { "epoch": 0.5768640163794574, "grad_norm": 1.6501391698926604, "learning_rate": 4.005078277008381e-05, "loss": 0.2377, "step": 6762 }, { "epoch": 0.5769493260535745, "grad_norm": 1.4624464432781075, "learning_rate": 4.00372441474711e-05, "loss": 0.2381, "step": 6763 }, { "epoch": 0.5770346357276915, "grad_norm": 2.174264923932256, "learning_rate": 4.002370628546185e-05, "loss": 0.2189, "step": 6764 }, { "epoch": 0.5771199454018086, "grad_norm": 2.014644413850199, "learning_rate": 4.001016918508963e-05, "loss": 0.1533, "step": 6765 }, { "epoch": 0.5772052550759256, "grad_norm": 1.4012264380400965, "learning_rate": 3.9996632847387885e-05, "loss": 0.2578, "step": 6766 }, { "epoch": 0.5772905647500427, "grad_norm": 1.4388151372487104, "learning_rate": 3.998309727339006e-05, "loss": 0.2521, "step": 6767 }, { "epoch": 0.5773758744241597, "grad_norm": 1.7240648803362613, "learning_rate": 3.9969562464129535e-05, "loss": 0.2478, "step": 6768 }, { "epoch": 0.5774611840982767, "grad_norm": 1.554253905901481, "learning_rate": 3.9956028420639636e-05, "loss": 0.2434, "step": 6769 }, { "epoch": 0.5775464937723938, "grad_norm": 1.668143913130381, "learning_rate": 3.994249514395356e-05, "loss": 0.2221, "step": 6770 }, { "epoch": 0.5776318034465109, "grad_norm": 1.4402014186183076, "learning_rate": 3.992896263510456e-05, "loss": 0.2106, "step": 6771 }, { "epoch": 0.5777171131206279, "grad_norm": 1.6117703287633398, "learning_rate": 3.991543089512574e-05, "loss": 0.2321, "step": 6772 }, { "epoch": 0.5778024227947449, "grad_norm": 1.529617529292597, "learning_rate": 3.990189992505018e-05, "loss": 0.2461, "step": 6773 }, { "epoch": 0.5778877324688619, "grad_norm": 1.9477614342617433, "learning_rate": 3.988836972591089e-05, "loss": 0.2051, "step": 6774 }, { "epoch": 0.5779730421429791, "grad_norm": 1.4997448704233276, "learning_rate": 3.987484029874086e-05, "loss": 0.1822, "step": 6775 }, { "epoch": 0.5780583518170961, "grad_norm": 1.9168036315189407, "learning_rate": 3.9861311644572944e-05, "loss": 0.2342, "step": 6776 }, { "epoch": 0.5781436614912131, "grad_norm": 1.688157695183893, "learning_rate": 3.984778376444001e-05, "loss": 0.2013, "step": 6777 }, { "epoch": 0.5782289711653301, "grad_norm": 1.756393334455185, "learning_rate": 3.9834256659374856e-05, "loss": 0.2617, "step": 6778 }, { "epoch": 0.5783142808394472, "grad_norm": 1.5035922428628186, "learning_rate": 3.982073033041017e-05, "loss": 0.2742, "step": 6779 }, { "epoch": 0.5783995905135643, "grad_norm": 1.6851197162288118, "learning_rate": 3.980720477857863e-05, "loss": 0.2969, "step": 6780 }, { "epoch": 0.5784849001876813, "grad_norm": 1.8430372250503089, "learning_rate": 3.9793680004912856e-05, "loss": 0.2409, "step": 6781 }, { "epoch": 0.5785702098617983, "grad_norm": 1.2594589924786561, "learning_rate": 3.9780156010445355e-05, "loss": 0.1962, "step": 6782 }, { "epoch": 0.5786555195359153, "grad_norm": 1.2585123141159542, "learning_rate": 3.976663279620865e-05, "loss": 0.1763, "step": 6783 }, { "epoch": 0.5787408292100324, "grad_norm": 1.5356103744710856, "learning_rate": 3.9753110363235136e-05, "loss": 0.2064, "step": 6784 }, { "epoch": 0.5788261388841495, "grad_norm": 1.4158304869541043, "learning_rate": 3.973958871255722e-05, "loss": 0.242, "step": 6785 }, { "epoch": 0.5789114485582665, "grad_norm": 1.6754872880098837, "learning_rate": 3.972606784520716e-05, "loss": 0.2027, "step": 6786 }, { "epoch": 0.5789967582323835, "grad_norm": 1.2912282148297882, "learning_rate": 3.9712547762217226e-05, "loss": 0.175, "step": 6787 }, { "epoch": 0.5790820679065006, "grad_norm": 1.6931138086072313, "learning_rate": 3.969902846461961e-05, "loss": 0.1827, "step": 6788 }, { "epoch": 0.5791673775806176, "grad_norm": 1.809250912010234, "learning_rate": 3.9685509953446424e-05, "loss": 0.2253, "step": 6789 }, { "epoch": 0.5792526872547347, "grad_norm": 1.7551175630758429, "learning_rate": 3.967199222972974e-05, "loss": 0.2111, "step": 6790 }, { "epoch": 0.5793379969288517, "grad_norm": 1.4464674999912317, "learning_rate": 3.965847529450159e-05, "loss": 0.192, "step": 6791 }, { "epoch": 0.5794233066029688, "grad_norm": 1.5825507170639013, "learning_rate": 3.964495914879387e-05, "loss": 0.2656, "step": 6792 }, { "epoch": 0.5795086162770858, "grad_norm": 1.4891369379737518, "learning_rate": 3.96314437936385e-05, "loss": 0.1906, "step": 6793 }, { "epoch": 0.5795939259512028, "grad_norm": 1.9043609138871724, "learning_rate": 3.9617929230067306e-05, "loss": 0.2226, "step": 6794 }, { "epoch": 0.5796792356253199, "grad_norm": 1.7684473082005339, "learning_rate": 3.960441545911204e-05, "loss": 0.2493, "step": 6795 }, { "epoch": 0.579764545299437, "grad_norm": 1.8005927063715106, "learning_rate": 3.959090248180441e-05, "loss": 0.2846, "step": 6796 }, { "epoch": 0.579849854973554, "grad_norm": 1.5439967340523764, "learning_rate": 3.957739029917605e-05, "loss": 0.2128, "step": 6797 }, { "epoch": 0.579935164647671, "grad_norm": 1.916114983427395, "learning_rate": 3.9563878912258586e-05, "loss": 0.2838, "step": 6798 }, { "epoch": 0.580020474321788, "grad_norm": 1.763959723858345, "learning_rate": 3.955036832208348e-05, "loss": 0.2514, "step": 6799 }, { "epoch": 0.5801057839959052, "grad_norm": 1.287097334624545, "learning_rate": 3.9536858529682224e-05, "loss": 0.2088, "step": 6800 }, { "epoch": 0.5801910936700222, "grad_norm": 1.4774201339591198, "learning_rate": 3.9523349536086247e-05, "loss": 0.2673, "step": 6801 }, { "epoch": 0.5802764033441392, "grad_norm": 1.7668963555906543, "learning_rate": 3.950984134232683e-05, "loss": 0.2278, "step": 6802 }, { "epoch": 0.5803617130182562, "grad_norm": 1.6450727934804616, "learning_rate": 3.94963339494353e-05, "loss": 0.2419, "step": 6803 }, { "epoch": 0.5804470226923734, "grad_norm": 1.6381992390506632, "learning_rate": 3.948282735844285e-05, "loss": 0.1786, "step": 6804 }, { "epoch": 0.5805323323664904, "grad_norm": 1.6060196054480456, "learning_rate": 3.9469321570380646e-05, "loss": 0.2068, "step": 6805 }, { "epoch": 0.5806176420406074, "grad_norm": 1.8207343926452868, "learning_rate": 3.945581658627977e-05, "loss": 0.2309, "step": 6806 }, { "epoch": 0.5807029517147244, "grad_norm": 1.748945736077635, "learning_rate": 3.9442312407171285e-05, "loss": 0.2013, "step": 6807 }, { "epoch": 0.5807882613888415, "grad_norm": 1.9641229661178279, "learning_rate": 3.9428809034086126e-05, "loss": 0.2151, "step": 6808 }, { "epoch": 0.5808735710629586, "grad_norm": 2.3071226541747296, "learning_rate": 3.941530646805522e-05, "loss": 0.2541, "step": 6809 }, { "epoch": 0.5809588807370756, "grad_norm": 1.6634398535726675, "learning_rate": 3.940180471010944e-05, "loss": 0.2219, "step": 6810 }, { "epoch": 0.5810441904111926, "grad_norm": 1.6434017842385582, "learning_rate": 3.938830376127953e-05, "loss": 0.2279, "step": 6811 }, { "epoch": 0.5811295000853097, "grad_norm": 1.5295427107744497, "learning_rate": 3.9374803622596236e-05, "loss": 0.1291, "step": 6812 }, { "epoch": 0.5812148097594267, "grad_norm": 1.4377196600227877, "learning_rate": 3.9361304295090236e-05, "loss": 0.1689, "step": 6813 }, { "epoch": 0.5813001194335438, "grad_norm": 1.599304990022491, "learning_rate": 3.934780577979212e-05, "loss": 0.1874, "step": 6814 }, { "epoch": 0.5813854291076608, "grad_norm": 1.8808830156751897, "learning_rate": 3.9334308077732426e-05, "loss": 0.3407, "step": 6815 }, { "epoch": 0.5814707387817779, "grad_norm": 1.9049862283924759, "learning_rate": 3.932081118994162e-05, "loss": 0.2488, "step": 6816 }, { "epoch": 0.5815560484558949, "grad_norm": 1.9567393315527513, "learning_rate": 3.930731511745015e-05, "loss": 0.2445, "step": 6817 }, { "epoch": 0.5816413581300119, "grad_norm": 1.4626432038972246, "learning_rate": 3.9293819861288334e-05, "loss": 0.229, "step": 6818 }, { "epoch": 0.581726667804129, "grad_norm": 2.018270958908694, "learning_rate": 3.9280325422486474e-05, "loss": 0.2907, "step": 6819 }, { "epoch": 0.581811977478246, "grad_norm": 1.4957888618737003, "learning_rate": 3.926683180207483e-05, "loss": 0.2257, "step": 6820 }, { "epoch": 0.5818972871523631, "grad_norm": 1.5780888921553784, "learning_rate": 3.9253339001083524e-05, "loss": 0.2761, "step": 6821 }, { "epoch": 0.5819825968264801, "grad_norm": 1.6171763691542138, "learning_rate": 3.923984702054266e-05, "loss": 0.2221, "step": 6822 }, { "epoch": 0.5820679065005971, "grad_norm": 1.8562498355955315, "learning_rate": 3.922635586148234e-05, "loss": 0.2411, "step": 6823 }, { "epoch": 0.5821532161747142, "grad_norm": 1.4497814474153596, "learning_rate": 3.921286552493246e-05, "loss": 0.2027, "step": 6824 }, { "epoch": 0.5822385258488313, "grad_norm": 1.3132383903485512, "learning_rate": 3.9199376011922995e-05, "loss": 0.1673, "step": 6825 }, { "epoch": 0.5823238355229483, "grad_norm": 1.7271511403551947, "learning_rate": 3.918588732348378e-05, "loss": 0.2184, "step": 6826 }, { "epoch": 0.5824091451970653, "grad_norm": 1.753849224021608, "learning_rate": 3.917239946064459e-05, "loss": 0.2802, "step": 6827 }, { "epoch": 0.5824944548711823, "grad_norm": 1.8859722801105043, "learning_rate": 3.915891242443516e-05, "loss": 0.1754, "step": 6828 }, { "epoch": 0.5825797645452995, "grad_norm": 1.278450675827457, "learning_rate": 3.914542621588516e-05, "loss": 0.1978, "step": 6829 }, { "epoch": 0.5826650742194165, "grad_norm": 1.76836499987888, "learning_rate": 3.913194083602421e-05, "loss": 0.2127, "step": 6830 }, { "epoch": 0.5827503838935335, "grad_norm": 1.6174905833894415, "learning_rate": 3.9118456285881786e-05, "loss": 0.2463, "step": 6831 }, { "epoch": 0.5828356935676505, "grad_norm": 1.7549285967611972, "learning_rate": 3.910497256648742e-05, "loss": 0.1865, "step": 6832 }, { "epoch": 0.5829210032417677, "grad_norm": 1.7535776624882302, "learning_rate": 3.9091489678870514e-05, "loss": 0.3496, "step": 6833 }, { "epoch": 0.5830063129158847, "grad_norm": 2.103143373950431, "learning_rate": 3.907800762406038e-05, "loss": 0.2604, "step": 6834 }, { "epoch": 0.5830916225900017, "grad_norm": 1.875993465444738, "learning_rate": 3.906452640308634e-05, "loss": 0.2116, "step": 6835 }, { "epoch": 0.5831769322641187, "grad_norm": 1.2715850649575495, "learning_rate": 3.90510460169776e-05, "loss": 0.252, "step": 6836 }, { "epoch": 0.5832622419382358, "grad_norm": 1.4552057607086895, "learning_rate": 3.903756646676331e-05, "loss": 0.225, "step": 6837 }, { "epoch": 0.5833475516123529, "grad_norm": 1.7436705977980727, "learning_rate": 3.902408775347256e-05, "loss": 0.25, "step": 6838 }, { "epoch": 0.5834328612864699, "grad_norm": 1.9616526481280927, "learning_rate": 3.90106098781344e-05, "loss": 0.2734, "step": 6839 }, { "epoch": 0.5835181709605869, "grad_norm": 1.3156676441811086, "learning_rate": 3.899713284177775e-05, "loss": 0.1822, "step": 6840 }, { "epoch": 0.583603480634704, "grad_norm": 1.4631063915896039, "learning_rate": 3.898365664543155e-05, "loss": 0.2406, "step": 6841 }, { "epoch": 0.583688790308821, "grad_norm": 1.5631642264927945, "learning_rate": 3.897018129012462e-05, "loss": 0.2094, "step": 6842 }, { "epoch": 0.583774099982938, "grad_norm": 1.4374498275625636, "learning_rate": 3.895670677688576e-05, "loss": 0.2218, "step": 6843 }, { "epoch": 0.5838594096570551, "grad_norm": 1.7863526456690237, "learning_rate": 3.8943233106743634e-05, "loss": 0.1769, "step": 6844 }, { "epoch": 0.5839447193311722, "grad_norm": 1.570579240453411, "learning_rate": 3.8929760280726904e-05, "loss": 0.2526, "step": 6845 }, { "epoch": 0.5840300290052892, "grad_norm": 1.5368891052864686, "learning_rate": 3.8916288299864164e-05, "loss": 0.2131, "step": 6846 }, { "epoch": 0.5841153386794062, "grad_norm": 1.6538167389589424, "learning_rate": 3.89028171651839e-05, "loss": 0.2102, "step": 6847 }, { "epoch": 0.5842006483535233, "grad_norm": 1.5661242604606733, "learning_rate": 3.888934687771458e-05, "loss": 0.2629, "step": 6848 }, { "epoch": 0.5842859580276404, "grad_norm": 1.7180218628101769, "learning_rate": 3.887587743848459e-05, "loss": 0.2458, "step": 6849 }, { "epoch": 0.5843712677017574, "grad_norm": 1.5843525751396639, "learning_rate": 3.886240884852223e-05, "loss": 0.2537, "step": 6850 }, { "epoch": 0.5844565773758744, "grad_norm": 1.6623334901645286, "learning_rate": 3.8848941108855774e-05, "loss": 0.2353, "step": 6851 }, { "epoch": 0.5845418870499914, "grad_norm": 1.5086527642471872, "learning_rate": 3.883547422051343e-05, "loss": 0.2302, "step": 6852 }, { "epoch": 0.5846271967241086, "grad_norm": 1.6409812903677221, "learning_rate": 3.8822008184523265e-05, "loss": 0.1962, "step": 6853 }, { "epoch": 0.5847125063982256, "grad_norm": 1.5552049858668746, "learning_rate": 3.8808543001913384e-05, "loss": 0.2739, "step": 6854 }, { "epoch": 0.5847978160723426, "grad_norm": 1.44139458876444, "learning_rate": 3.8795078673711795e-05, "loss": 0.2383, "step": 6855 }, { "epoch": 0.5848831257464596, "grad_norm": 1.6460153583676853, "learning_rate": 3.8781615200946386e-05, "loss": 0.2483, "step": 6856 }, { "epoch": 0.5849684354205766, "grad_norm": 1.684968957586758, "learning_rate": 3.8768152584645045e-05, "loss": 0.24, "step": 6857 }, { "epoch": 0.5850537450946938, "grad_norm": 1.784240086895008, "learning_rate": 3.8754690825835564e-05, "loss": 0.1809, "step": 6858 }, { "epoch": 0.5851390547688108, "grad_norm": 1.9421845071636044, "learning_rate": 3.8741229925545705e-05, "loss": 0.2169, "step": 6859 }, { "epoch": 0.5852243644429278, "grad_norm": 1.7489124051990026, "learning_rate": 3.872776988480309e-05, "loss": 0.2547, "step": 6860 }, { "epoch": 0.5853096741170448, "grad_norm": 1.4305236539195072, "learning_rate": 3.8714310704635335e-05, "loss": 0.2883, "step": 6861 }, { "epoch": 0.585394983791162, "grad_norm": 1.6043517596319186, "learning_rate": 3.870085238607002e-05, "loss": 0.1789, "step": 6862 }, { "epoch": 0.585480293465279, "grad_norm": 1.4734550758195426, "learning_rate": 3.868739493013455e-05, "loss": 0.2005, "step": 6863 }, { "epoch": 0.585565603139396, "grad_norm": 1.7234789901262906, "learning_rate": 3.867393833785637e-05, "loss": 0.2342, "step": 6864 }, { "epoch": 0.585650912813513, "grad_norm": 1.360057002023015, "learning_rate": 3.866048261026283e-05, "loss": 0.2298, "step": 6865 }, { "epoch": 0.5857362224876301, "grad_norm": 1.1615437132696744, "learning_rate": 3.864702774838116e-05, "loss": 0.1957, "step": 6866 }, { "epoch": 0.5858215321617471, "grad_norm": 1.6104118192467354, "learning_rate": 3.86335737532386e-05, "loss": 0.2339, "step": 6867 }, { "epoch": 0.5859068418358642, "grad_norm": 2.204837647427509, "learning_rate": 3.8620120625862285e-05, "loss": 0.2362, "step": 6868 }, { "epoch": 0.5859921515099812, "grad_norm": 1.4271412672920158, "learning_rate": 3.860666836727929e-05, "loss": 0.2931, "step": 6869 }, { "epoch": 0.5860774611840983, "grad_norm": 1.3164028088442596, "learning_rate": 3.859321697851661e-05, "loss": 0.2415, "step": 6870 }, { "epoch": 0.5861627708582153, "grad_norm": 1.675022620432686, "learning_rate": 3.857976646060122e-05, "loss": 0.237, "step": 6871 }, { "epoch": 0.5862480805323323, "grad_norm": 1.5715570025784988, "learning_rate": 3.8566316814559955e-05, "loss": 0.257, "step": 6872 }, { "epoch": 0.5863333902064494, "grad_norm": 1.426038410562941, "learning_rate": 3.855286804141964e-05, "loss": 0.2203, "step": 6873 }, { "epoch": 0.5864186998805665, "grad_norm": 1.622267332748958, "learning_rate": 3.8539420142207013e-05, "loss": 0.264, "step": 6874 }, { "epoch": 0.5865040095546835, "grad_norm": 1.441007564167445, "learning_rate": 3.8525973117948785e-05, "loss": 0.2455, "step": 6875 }, { "epoch": 0.5865893192288005, "grad_norm": 1.2731090660390263, "learning_rate": 3.851252696967152e-05, "loss": 0.2381, "step": 6876 }, { "epoch": 0.5866746289029175, "grad_norm": 1.6056576360145387, "learning_rate": 3.8499081698401786e-05, "loss": 0.2134, "step": 6877 }, { "epoch": 0.5867599385770347, "grad_norm": 1.5477066116573328, "learning_rate": 3.848563730516604e-05, "loss": 0.2248, "step": 6878 }, { "epoch": 0.5868452482511517, "grad_norm": 2.050905409187982, "learning_rate": 3.84721937909907e-05, "loss": 0.2515, "step": 6879 }, { "epoch": 0.5869305579252687, "grad_norm": 2.0658315858247525, "learning_rate": 3.8458751156902104e-05, "loss": 0.2305, "step": 6880 }, { "epoch": 0.5870158675993857, "grad_norm": 1.4688009699641662, "learning_rate": 3.844530940392654e-05, "loss": 0.1478, "step": 6881 }, { "epoch": 0.5871011772735029, "grad_norm": 1.346838484620931, "learning_rate": 3.843186853309018e-05, "loss": 0.2003, "step": 6882 }, { "epoch": 0.5871864869476199, "grad_norm": 2.1089171866468055, "learning_rate": 3.841842854541919e-05, "loss": 0.3018, "step": 6883 }, { "epoch": 0.5872717966217369, "grad_norm": 1.442689732547575, "learning_rate": 3.8404989441939656e-05, "loss": 0.2644, "step": 6884 }, { "epoch": 0.5873571062958539, "grad_norm": 1.5882378793209204, "learning_rate": 3.8391551223677535e-05, "loss": 0.2299, "step": 6885 }, { "epoch": 0.587442415969971, "grad_norm": 1.9275251002477143, "learning_rate": 3.83781138916588e-05, "loss": 0.1967, "step": 6886 }, { "epoch": 0.5875277256440881, "grad_norm": 1.8473377558144632, "learning_rate": 3.836467744690931e-05, "loss": 0.2774, "step": 6887 }, { "epoch": 0.5876130353182051, "grad_norm": 2.187518092489354, "learning_rate": 3.835124189045487e-05, "loss": 0.2402, "step": 6888 }, { "epoch": 0.5876983449923221, "grad_norm": 1.5814042721002801, "learning_rate": 3.8337807223321216e-05, "loss": 0.2357, "step": 6889 }, { "epoch": 0.5877836546664392, "grad_norm": 2.024703877779191, "learning_rate": 3.832437344653399e-05, "loss": 0.187, "step": 6890 }, { "epoch": 0.5878689643405562, "grad_norm": 1.6442350360919675, "learning_rate": 3.8310940561118835e-05, "loss": 0.2099, "step": 6891 }, { "epoch": 0.5879542740146733, "grad_norm": 2.0039406340328005, "learning_rate": 3.829750856810123e-05, "loss": 0.2597, "step": 6892 }, { "epoch": 0.5880395836887903, "grad_norm": 1.380170291400362, "learning_rate": 3.8284077468506654e-05, "loss": 0.2435, "step": 6893 }, { "epoch": 0.5881248933629074, "grad_norm": 1.3394436697683192, "learning_rate": 3.8270647263360535e-05, "loss": 0.2362, "step": 6894 }, { "epoch": 0.5882102030370244, "grad_norm": 1.8609602125240765, "learning_rate": 3.825721795368814e-05, "loss": 0.251, "step": 6895 }, { "epoch": 0.5882955127111414, "grad_norm": 1.859349034232267, "learning_rate": 3.8243789540514754e-05, "loss": 0.2231, "step": 6896 }, { "epoch": 0.5883808223852585, "grad_norm": 1.821829102659433, "learning_rate": 3.823036202486559e-05, "loss": 0.2342, "step": 6897 }, { "epoch": 0.5884661320593755, "grad_norm": 1.7128233945347309, "learning_rate": 3.821693540776571e-05, "loss": 0.2329, "step": 6898 }, { "epoch": 0.5885514417334926, "grad_norm": 1.3942518194719236, "learning_rate": 3.8203509690240216e-05, "loss": 0.2289, "step": 6899 }, { "epoch": 0.5886367514076096, "grad_norm": 1.954204535642691, "learning_rate": 3.819008487331407e-05, "loss": 0.2267, "step": 6900 }, { "epoch": 0.5887220610817266, "grad_norm": 1.495930316653595, "learning_rate": 3.817666095801219e-05, "loss": 0.2268, "step": 6901 }, { "epoch": 0.5888073707558437, "grad_norm": 1.6899231361651452, "learning_rate": 3.81632379453594e-05, "loss": 0.1817, "step": 6902 }, { "epoch": 0.5888926804299608, "grad_norm": 1.5573572593662577, "learning_rate": 3.8149815836380484e-05, "loss": 0.2217, "step": 6903 }, { "epoch": 0.5889779901040778, "grad_norm": 1.4057958929340817, "learning_rate": 3.8136394632100193e-05, "loss": 0.2277, "step": 6904 }, { "epoch": 0.5890632997781948, "grad_norm": 1.7234616289153013, "learning_rate": 3.8122974333543106e-05, "loss": 0.2791, "step": 6905 }, { "epoch": 0.5891486094523118, "grad_norm": 1.9275649903617988, "learning_rate": 3.8109554941733805e-05, "loss": 0.2227, "step": 6906 }, { "epoch": 0.589233919126429, "grad_norm": 1.7769273207210812, "learning_rate": 3.809613645769682e-05, "loss": 0.2244, "step": 6907 }, { "epoch": 0.589319228800546, "grad_norm": 1.3406090510915747, "learning_rate": 3.8082718882456546e-05, "loss": 0.2627, "step": 6908 }, { "epoch": 0.589404538474663, "grad_norm": 1.2753411790221258, "learning_rate": 3.806930221703737e-05, "loss": 0.2278, "step": 6909 }, { "epoch": 0.58948984814878, "grad_norm": 1.5196992050746156, "learning_rate": 3.805588646246356e-05, "loss": 0.2395, "step": 6910 }, { "epoch": 0.5895751578228972, "grad_norm": 2.090709243240319, "learning_rate": 3.804247161975935e-05, "loss": 0.2183, "step": 6911 }, { "epoch": 0.5896604674970142, "grad_norm": 1.5769916657280199, "learning_rate": 3.8029057689948885e-05, "loss": 0.2335, "step": 6912 }, { "epoch": 0.5897457771711312, "grad_norm": 1.9649894766489373, "learning_rate": 3.8015644674056266e-05, "loss": 0.2132, "step": 6913 }, { "epoch": 0.5898310868452482, "grad_norm": 1.5967762955769524, "learning_rate": 3.8002232573105475e-05, "loss": 0.2167, "step": 6914 }, { "epoch": 0.5899163965193653, "grad_norm": 1.7216494985477182, "learning_rate": 3.7988821388120466e-05, "loss": 0.2355, "step": 6915 }, { "epoch": 0.5900017061934824, "grad_norm": 1.7933388454796881, "learning_rate": 3.797541112012514e-05, "loss": 0.2415, "step": 6916 }, { "epoch": 0.5900870158675994, "grad_norm": 1.520637795754691, "learning_rate": 3.796200177014326e-05, "loss": 0.2828, "step": 6917 }, { "epoch": 0.5901723255417164, "grad_norm": 1.4733795898966675, "learning_rate": 3.794859333919857e-05, "loss": 0.2372, "step": 6918 }, { "epoch": 0.5902576352158335, "grad_norm": 1.6763312586490042, "learning_rate": 3.7935185828314754e-05, "loss": 0.1954, "step": 6919 }, { "epoch": 0.5903429448899505, "grad_norm": 1.7589104508316504, "learning_rate": 3.792177923851537e-05, "loss": 0.2184, "step": 6920 }, { "epoch": 0.5904282545640676, "grad_norm": 1.5660582652939226, "learning_rate": 3.790837357082398e-05, "loss": 0.2942, "step": 6921 }, { "epoch": 0.5905135642381846, "grad_norm": 1.6589404153085756, "learning_rate": 3.789496882626399e-05, "loss": 0.2393, "step": 6922 }, { "epoch": 0.5905988739123017, "grad_norm": 1.2189395463706552, "learning_rate": 3.788156500585883e-05, "loss": 0.1419, "step": 6923 }, { "epoch": 0.5906841835864187, "grad_norm": 1.6070416766817346, "learning_rate": 3.786816211063176e-05, "loss": 0.2502, "step": 6924 }, { "epoch": 0.5907694932605357, "grad_norm": 1.5107516564021293, "learning_rate": 3.7854760141606046e-05, "loss": 0.2804, "step": 6925 }, { "epoch": 0.5908548029346528, "grad_norm": 1.789451723362595, "learning_rate": 3.784135909980488e-05, "loss": 0.1783, "step": 6926 }, { "epoch": 0.5909401126087699, "grad_norm": 1.4574930313231658, "learning_rate": 3.782795898625131e-05, "loss": 0.23, "step": 6927 }, { "epoch": 0.5910254222828869, "grad_norm": 1.5111202662009864, "learning_rate": 3.781455980196839e-05, "loss": 0.2365, "step": 6928 }, { "epoch": 0.5911107319570039, "grad_norm": 1.9085384453011112, "learning_rate": 3.7801161547979095e-05, "loss": 0.2606, "step": 6929 }, { "epoch": 0.5911960416311209, "grad_norm": 1.6300753781962614, "learning_rate": 3.778776422530628e-05, "loss": 0.2196, "step": 6930 }, { "epoch": 0.5912813513052381, "grad_norm": 1.5008752176871276, "learning_rate": 3.777436783497277e-05, "loss": 0.2224, "step": 6931 }, { "epoch": 0.5913666609793551, "grad_norm": 1.4715395470249013, "learning_rate": 3.7760972378001304e-05, "loss": 0.2214, "step": 6932 }, { "epoch": 0.5914519706534721, "grad_norm": 1.506030833654386, "learning_rate": 3.774757785541457e-05, "loss": 0.2606, "step": 6933 }, { "epoch": 0.5915372803275891, "grad_norm": 1.414386522462766, "learning_rate": 3.7734184268235146e-05, "loss": 0.1729, "step": 6934 }, { "epoch": 0.5916225900017061, "grad_norm": 1.8142462900982474, "learning_rate": 3.772079161748557e-05, "loss": 0.2598, "step": 6935 }, { "epoch": 0.5917078996758233, "grad_norm": 1.5565464273667187, "learning_rate": 3.770739990418832e-05, "loss": 0.2701, "step": 6936 }, { "epoch": 0.5917932093499403, "grad_norm": 1.5744870243597815, "learning_rate": 3.7694009129365756e-05, "loss": 0.2482, "step": 6937 }, { "epoch": 0.5918785190240573, "grad_norm": 1.5139657602809988, "learning_rate": 3.768061929404019e-05, "loss": 0.2497, "step": 6938 }, { "epoch": 0.5919638286981743, "grad_norm": 1.2590832658704099, "learning_rate": 3.76672303992339e-05, "loss": 0.2116, "step": 6939 }, { "epoch": 0.5920491383722915, "grad_norm": 1.448806416134711, "learning_rate": 3.765384244596901e-05, "loss": 0.2395, "step": 6940 }, { "epoch": 0.5921344480464085, "grad_norm": 1.6416096230421902, "learning_rate": 3.764045543526765e-05, "loss": 0.2997, "step": 6941 }, { "epoch": 0.5922197577205255, "grad_norm": 1.7883808553211475, "learning_rate": 3.7627069368151845e-05, "loss": 0.2492, "step": 6942 }, { "epoch": 0.5923050673946425, "grad_norm": 1.8744765504686465, "learning_rate": 3.7613684245643544e-05, "loss": 0.3074, "step": 6943 }, { "epoch": 0.5923903770687596, "grad_norm": 1.9771734808685117, "learning_rate": 3.7600300068764615e-05, "loss": 0.2443, "step": 6944 }, { "epoch": 0.5924756867428767, "grad_norm": 2.065753566878747, "learning_rate": 3.758691683853691e-05, "loss": 0.2389, "step": 6945 }, { "epoch": 0.5925609964169937, "grad_norm": 1.5999628777965909, "learning_rate": 3.7573534555982115e-05, "loss": 0.2588, "step": 6946 }, { "epoch": 0.5926463060911107, "grad_norm": 1.444237303960806, "learning_rate": 3.756015322212193e-05, "loss": 0.2465, "step": 6947 }, { "epoch": 0.5927316157652278, "grad_norm": 1.6647606759577582, "learning_rate": 3.754677283797793e-05, "loss": 0.2443, "step": 6948 }, { "epoch": 0.5928169254393448, "grad_norm": 1.753003268171921, "learning_rate": 3.753339340457168e-05, "loss": 0.2565, "step": 6949 }, { "epoch": 0.5929022351134619, "grad_norm": 1.5855780133001929, "learning_rate": 3.752001492292456e-05, "loss": 0.1793, "step": 6950 }, { "epoch": 0.5929875447875789, "grad_norm": 2.189921210800832, "learning_rate": 3.750663739405801e-05, "loss": 0.2517, "step": 6951 }, { "epoch": 0.593072854461696, "grad_norm": 1.5526000150693071, "learning_rate": 3.749326081899329e-05, "loss": 0.2326, "step": 6952 }, { "epoch": 0.593158164135813, "grad_norm": 1.525653811522229, "learning_rate": 3.747988519875166e-05, "loss": 0.2487, "step": 6953 }, { "epoch": 0.59324347380993, "grad_norm": 1.3536613793218575, "learning_rate": 3.746651053435424e-05, "loss": 0.2328, "step": 6954 }, { "epoch": 0.593328783484047, "grad_norm": 1.4280729922918773, "learning_rate": 3.7453136826822166e-05, "loss": 0.1996, "step": 6955 }, { "epoch": 0.5934140931581642, "grad_norm": 1.576897323033087, "learning_rate": 3.74397640771764e-05, "loss": 0.2116, "step": 6956 }, { "epoch": 0.5934994028322812, "grad_norm": 1.6659317621587726, "learning_rate": 3.742639228643791e-05, "loss": 0.2059, "step": 6957 }, { "epoch": 0.5935847125063982, "grad_norm": 1.1791989660164066, "learning_rate": 3.7413021455627554e-05, "loss": 0.196, "step": 6958 }, { "epoch": 0.5936700221805152, "grad_norm": 1.7966236768025081, "learning_rate": 3.7399651585766115e-05, "loss": 0.1803, "step": 6959 }, { "epoch": 0.5937553318546324, "grad_norm": 1.5577633588839783, "learning_rate": 3.738628267787432e-05, "loss": 0.231, "step": 6960 }, { "epoch": 0.5938406415287494, "grad_norm": 1.5849426606934893, "learning_rate": 3.737291473297283e-05, "loss": 0.2507, "step": 6961 }, { "epoch": 0.5939259512028664, "grad_norm": 1.6552846722516965, "learning_rate": 3.735954775208218e-05, "loss": 0.2341, "step": 6962 }, { "epoch": 0.5940112608769834, "grad_norm": 1.5105632140876737, "learning_rate": 3.73461817362229e-05, "loss": 0.2604, "step": 6963 }, { "epoch": 0.5940965705511005, "grad_norm": 1.7831742450426973, "learning_rate": 3.733281668641538e-05, "loss": 0.1897, "step": 6964 }, { "epoch": 0.5941818802252176, "grad_norm": 2.239564857110984, "learning_rate": 3.7319452603680025e-05, "loss": 0.2937, "step": 6965 }, { "epoch": 0.5942671898993346, "grad_norm": 1.6999741552295689, "learning_rate": 3.7306089489037056e-05, "loss": 0.2366, "step": 6966 }, { "epoch": 0.5943524995734516, "grad_norm": 2.014831505065605, "learning_rate": 3.729272734350671e-05, "loss": 0.1926, "step": 6967 }, { "epoch": 0.5944378092475687, "grad_norm": 1.655623839433032, "learning_rate": 3.7279366168109106e-05, "loss": 0.2458, "step": 6968 }, { "epoch": 0.5945231189216857, "grad_norm": 1.5765829578353978, "learning_rate": 3.7266005963864296e-05, "loss": 0.1868, "step": 6969 }, { "epoch": 0.5946084285958028, "grad_norm": 1.446804616711596, "learning_rate": 3.725264673179225e-05, "loss": 0.2522, "step": 6970 }, { "epoch": 0.5946937382699198, "grad_norm": 1.6679088572842569, "learning_rate": 3.72392884729129e-05, "loss": 0.2284, "step": 6971 }, { "epoch": 0.5947790479440368, "grad_norm": 1.5310794968948436, "learning_rate": 3.722593118824606e-05, "loss": 0.2068, "step": 6972 }, { "epoch": 0.5948643576181539, "grad_norm": 1.4259645945098118, "learning_rate": 3.7212574878811495e-05, "loss": 0.1936, "step": 6973 }, { "epoch": 0.594949667292271, "grad_norm": 1.6356446012168622, "learning_rate": 3.71992195456289e-05, "loss": 0.212, "step": 6974 }, { "epoch": 0.595034976966388, "grad_norm": 1.5352300184546317, "learning_rate": 3.718586518971785e-05, "loss": 0.1861, "step": 6975 }, { "epoch": 0.595120286640505, "grad_norm": 1.6000230042472774, "learning_rate": 3.7172511812097894e-05, "loss": 0.2373, "step": 6976 }, { "epoch": 0.5952055963146221, "grad_norm": 1.664394148610085, "learning_rate": 3.715915941378849e-05, "loss": 0.166, "step": 6977 }, { "epoch": 0.5952909059887391, "grad_norm": 1.7085256778393902, "learning_rate": 3.7145807995809065e-05, "loss": 0.2122, "step": 6978 }, { "epoch": 0.5953762156628561, "grad_norm": 2.0118948553835145, "learning_rate": 3.7132457559178856e-05, "loss": 0.222, "step": 6979 }, { "epoch": 0.5954615253369732, "grad_norm": 1.410406285993878, "learning_rate": 3.711910810491714e-05, "loss": 0.2036, "step": 6980 }, { "epoch": 0.5955468350110903, "grad_norm": 1.2213750592080355, "learning_rate": 3.7105759634043105e-05, "loss": 0.2229, "step": 6981 }, { "epoch": 0.5956321446852073, "grad_norm": 1.5645238357927538, "learning_rate": 3.709241214757576e-05, "loss": 0.2417, "step": 6982 }, { "epoch": 0.5957174543593243, "grad_norm": 1.8981716142494425, "learning_rate": 3.7079065646534184e-05, "loss": 0.1731, "step": 6983 }, { "epoch": 0.5958027640334413, "grad_norm": 1.6817883505272708, "learning_rate": 3.7065720131937275e-05, "loss": 0.2461, "step": 6984 }, { "epoch": 0.5958880737075585, "grad_norm": 1.5112594182657308, "learning_rate": 3.70523756048039e-05, "loss": 0.2044, "step": 6985 }, { "epoch": 0.5959733833816755, "grad_norm": 1.6502353933791996, "learning_rate": 3.703903206615284e-05, "loss": 0.2404, "step": 6986 }, { "epoch": 0.5960586930557925, "grad_norm": 1.3066716896093582, "learning_rate": 3.7025689517002826e-05, "loss": 0.155, "step": 6987 }, { "epoch": 0.5961440027299095, "grad_norm": 1.8159825640988303, "learning_rate": 3.701234795837245e-05, "loss": 0.2419, "step": 6988 }, { "epoch": 0.5962293124040267, "grad_norm": 2.1458500488025254, "learning_rate": 3.6999007391280285e-05, "loss": 0.2239, "step": 6989 }, { "epoch": 0.5963146220781437, "grad_norm": 1.549102104747333, "learning_rate": 3.698566781674485e-05, "loss": 0.1992, "step": 6990 }, { "epoch": 0.5963999317522607, "grad_norm": 1.3138619123534143, "learning_rate": 3.6972329235784493e-05, "loss": 0.2048, "step": 6991 }, { "epoch": 0.5964852414263777, "grad_norm": 1.6191166230598102, "learning_rate": 3.695899164941757e-05, "loss": 0.2664, "step": 6992 }, { "epoch": 0.5965705511004948, "grad_norm": 2.197352211835619, "learning_rate": 3.694565505866234e-05, "loss": 0.2568, "step": 6993 }, { "epoch": 0.5966558607746119, "grad_norm": 1.7857934661749124, "learning_rate": 3.693231946453699e-05, "loss": 0.2779, "step": 6994 }, { "epoch": 0.5967411704487289, "grad_norm": 1.7701621036330055, "learning_rate": 3.69189848680596e-05, "loss": 0.2574, "step": 6995 }, { "epoch": 0.5968264801228459, "grad_norm": 1.6319693985061492, "learning_rate": 3.690565127024819e-05, "loss": 0.2093, "step": 6996 }, { "epoch": 0.596911789796963, "grad_norm": 1.9540428751416499, "learning_rate": 3.689231867212074e-05, "loss": 0.2183, "step": 6997 }, { "epoch": 0.59699709947108, "grad_norm": 1.2833386586231534, "learning_rate": 3.6878987074695094e-05, "loss": 0.1867, "step": 6998 }, { "epoch": 0.5970824091451971, "grad_norm": 2.2997843682754184, "learning_rate": 3.686565647898905e-05, "loss": 0.2329, "step": 6999 }, { "epoch": 0.5971677188193141, "grad_norm": 1.376654886183008, "learning_rate": 3.685232688602036e-05, "loss": 0.172, "step": 7000 }, { "epoch": 0.5972530284934312, "grad_norm": 1.710419389147185, "learning_rate": 3.683899829680663e-05, "loss": 0.2371, "step": 7001 }, { "epoch": 0.5973383381675482, "grad_norm": 2.233588447044138, "learning_rate": 3.682567071236544e-05, "loss": 0.2706, "step": 7002 }, { "epoch": 0.5974236478416652, "grad_norm": 1.3089344549037556, "learning_rate": 3.68123441337143e-05, "loss": 0.1776, "step": 7003 }, { "epoch": 0.5975089575157823, "grad_norm": 1.568909734975293, "learning_rate": 3.679901856187059e-05, "loss": 0.1783, "step": 7004 }, { "epoch": 0.5975942671898994, "grad_norm": 1.633406024729591, "learning_rate": 3.678569399785168e-05, "loss": 0.2761, "step": 7005 }, { "epoch": 0.5976795768640164, "grad_norm": 1.440026012689031, "learning_rate": 3.6772370442674806e-05, "loss": 0.227, "step": 7006 }, { "epoch": 0.5977648865381334, "grad_norm": 1.677634399419565, "learning_rate": 3.675904789735716e-05, "loss": 0.2341, "step": 7007 }, { "epoch": 0.5978501962122504, "grad_norm": 1.7026779568018309, "learning_rate": 3.674572636291582e-05, "loss": 0.1721, "step": 7008 }, { "epoch": 0.5979355058863676, "grad_norm": 2.0828539741891197, "learning_rate": 3.6732405840367856e-05, "loss": 0.2471, "step": 7009 }, { "epoch": 0.5980208155604846, "grad_norm": 2.2870326497591704, "learning_rate": 3.6719086330730215e-05, "loss": 0.2901, "step": 7010 }, { "epoch": 0.5981061252346016, "grad_norm": 1.8748418105470113, "learning_rate": 3.6705767835019736e-05, "loss": 0.2698, "step": 7011 }, { "epoch": 0.5981914349087186, "grad_norm": 1.8644374243336732, "learning_rate": 3.6692450354253246e-05, "loss": 0.2637, "step": 7012 }, { "epoch": 0.5982767445828356, "grad_norm": 1.9188136708384707, "learning_rate": 3.667913388944747e-05, "loss": 0.2205, "step": 7013 }, { "epoch": 0.5983620542569528, "grad_norm": 1.7738385524245854, "learning_rate": 3.666581844161902e-05, "loss": 0.2387, "step": 7014 }, { "epoch": 0.5984473639310698, "grad_norm": 1.5847647704774932, "learning_rate": 3.665250401178447e-05, "loss": 0.2627, "step": 7015 }, { "epoch": 0.5985326736051868, "grad_norm": 1.6579841316393298, "learning_rate": 3.6639190600960314e-05, "loss": 0.2253, "step": 7016 }, { "epoch": 0.5986179832793038, "grad_norm": 1.5453969656575695, "learning_rate": 3.6625878210162966e-05, "loss": 0.16, "step": 7017 }, { "epoch": 0.598703292953421, "grad_norm": 1.7878871205091145, "learning_rate": 3.661256684040873e-05, "loss": 0.1822, "step": 7018 }, { "epoch": 0.598788602627538, "grad_norm": 1.6587402049093802, "learning_rate": 3.6599256492713895e-05, "loss": 0.1819, "step": 7019 }, { "epoch": 0.598873912301655, "grad_norm": 1.9627415235076666, "learning_rate": 3.65859471680946e-05, "loss": 0.2366, "step": 7020 }, { "epoch": 0.598959221975772, "grad_norm": 1.414289424665684, "learning_rate": 3.657263886756696e-05, "loss": 0.2516, "step": 7021 }, { "epoch": 0.5990445316498891, "grad_norm": 1.8986371096313308, "learning_rate": 3.6559331592147e-05, "loss": 0.2457, "step": 7022 }, { "epoch": 0.5991298413240062, "grad_norm": 1.3296007820657296, "learning_rate": 3.654602534285063e-05, "loss": 0.2813, "step": 7023 }, { "epoch": 0.5992151509981232, "grad_norm": 1.9120775790975504, "learning_rate": 3.653272012069373e-05, "loss": 0.2179, "step": 7024 }, { "epoch": 0.5993004606722402, "grad_norm": 1.630417376715589, "learning_rate": 3.65194159266921e-05, "loss": 0.2577, "step": 7025 }, { "epoch": 0.5993857703463573, "grad_norm": 1.616117409019632, "learning_rate": 3.6506112761861425e-05, "loss": 0.2144, "step": 7026 }, { "epoch": 0.5994710800204743, "grad_norm": 1.750413300891532, "learning_rate": 3.649281062721733e-05, "loss": 0.248, "step": 7027 }, { "epoch": 0.5995563896945914, "grad_norm": 1.761250232726299, "learning_rate": 3.6479509523775366e-05, "loss": 0.2323, "step": 7028 }, { "epoch": 0.5996416993687084, "grad_norm": 1.6335969345667414, "learning_rate": 3.646620945255101e-05, "loss": 0.1805, "step": 7029 }, { "epoch": 0.5997270090428255, "grad_norm": 1.8956086860555936, "learning_rate": 3.645291041455964e-05, "loss": 0.285, "step": 7030 }, { "epoch": 0.5998123187169425, "grad_norm": 1.6221249161818951, "learning_rate": 3.643961241081656e-05, "loss": 0.1766, "step": 7031 }, { "epoch": 0.5998976283910595, "grad_norm": 1.660747683023734, "learning_rate": 3.642631544233703e-05, "loss": 0.1908, "step": 7032 }, { "epoch": 0.5999829380651766, "grad_norm": 1.7700075430628508, "learning_rate": 3.641301951013617e-05, "loss": 0.2214, "step": 7033 }, { "epoch": 0.6000682477392937, "grad_norm": 1.487498801094662, "learning_rate": 3.639972461522907e-05, "loss": 0.2182, "step": 7034 }, { "epoch": 0.6001535574134107, "grad_norm": 1.4854148836200958, "learning_rate": 3.638643075863074e-05, "loss": 0.2189, "step": 7035 }, { "epoch": 0.6002388670875277, "grad_norm": 1.5677970362648272, "learning_rate": 3.637313794135606e-05, "loss": 0.2444, "step": 7036 }, { "epoch": 0.6003241767616447, "grad_norm": 1.7894250760547983, "learning_rate": 3.63598461644199e-05, "loss": 0.2032, "step": 7037 }, { "epoch": 0.6004094864357619, "grad_norm": 1.4092417364279162, "learning_rate": 3.6346555428836985e-05, "loss": 0.2404, "step": 7038 }, { "epoch": 0.6004947961098789, "grad_norm": 1.7412870671803629, "learning_rate": 3.633326573562204e-05, "loss": 0.2526, "step": 7039 }, { "epoch": 0.6005801057839959, "grad_norm": 1.6369826802786358, "learning_rate": 3.6319977085789606e-05, "loss": 0.2261, "step": 7040 }, { "epoch": 0.6006654154581129, "grad_norm": 1.6349632298712427, "learning_rate": 3.630668948035422e-05, "loss": 0.22, "step": 7041 }, { "epoch": 0.60075072513223, "grad_norm": 1.457582098609966, "learning_rate": 3.6293402920330346e-05, "loss": 0.2345, "step": 7042 }, { "epoch": 0.6008360348063471, "grad_norm": 1.6675376603021597, "learning_rate": 3.6280117406732304e-05, "loss": 0.1911, "step": 7043 }, { "epoch": 0.6009213444804641, "grad_norm": 1.3171924993707487, "learning_rate": 3.626683294057439e-05, "loss": 0.2001, "step": 7044 }, { "epoch": 0.6010066541545811, "grad_norm": 1.6106551921058914, "learning_rate": 3.6253549522870825e-05, "loss": 0.2981, "step": 7045 }, { "epoch": 0.6010919638286982, "grad_norm": 1.41811645411089, "learning_rate": 3.6240267154635686e-05, "loss": 0.2353, "step": 7046 }, { "epoch": 0.6011772735028152, "grad_norm": 2.3314757332419407, "learning_rate": 3.622698583688304e-05, "loss": 0.2428, "step": 7047 }, { "epoch": 0.6012625831769323, "grad_norm": 2.105012805383292, "learning_rate": 3.621370557062684e-05, "loss": 0.2866, "step": 7048 }, { "epoch": 0.6013478928510493, "grad_norm": 2.1381411656022546, "learning_rate": 3.620042635688096e-05, "loss": 0.2406, "step": 7049 }, { "epoch": 0.6014332025251663, "grad_norm": 1.59519451725074, "learning_rate": 3.618714819665917e-05, "loss": 0.2646, "step": 7050 }, { "epoch": 0.6015185121992834, "grad_norm": 1.4188519714277246, "learning_rate": 3.617387109097525e-05, "loss": 0.2446, "step": 7051 }, { "epoch": 0.6016038218734004, "grad_norm": 1.6877996390604393, "learning_rate": 3.616059504084278e-05, "loss": 0.2076, "step": 7052 }, { "epoch": 0.6016891315475175, "grad_norm": 1.5244727896229107, "learning_rate": 3.614732004727533e-05, "loss": 0.3077, "step": 7053 }, { "epoch": 0.6017744412216345, "grad_norm": 1.6994652580263374, "learning_rate": 3.6134046111286376e-05, "loss": 0.2511, "step": 7054 }, { "epoch": 0.6018597508957516, "grad_norm": 1.7427225018472747, "learning_rate": 3.612077323388935e-05, "loss": 0.189, "step": 7055 }, { "epoch": 0.6019450605698686, "grad_norm": 1.8786770686792735, "learning_rate": 3.610750141609751e-05, "loss": 0.2442, "step": 7056 }, { "epoch": 0.6020303702439856, "grad_norm": 1.6092330721891845, "learning_rate": 3.609423065892412e-05, "loss": 0.2983, "step": 7057 }, { "epoch": 0.6021156799181027, "grad_norm": 1.3795087756893682, "learning_rate": 3.608096096338233e-05, "loss": 0.1819, "step": 7058 }, { "epoch": 0.6022009895922198, "grad_norm": 1.6727915595539953, "learning_rate": 3.606769233048519e-05, "loss": 0.2183, "step": 7059 }, { "epoch": 0.6022862992663368, "grad_norm": 1.348925537339124, "learning_rate": 3.605442476124571e-05, "loss": 0.1851, "step": 7060 }, { "epoch": 0.6023716089404538, "grad_norm": 1.6434569848271579, "learning_rate": 3.604115825667681e-05, "loss": 0.2201, "step": 7061 }, { "epoch": 0.6024569186145708, "grad_norm": 2.301467145613612, "learning_rate": 3.6027892817791275e-05, "loss": 0.2638, "step": 7062 }, { "epoch": 0.602542228288688, "grad_norm": 1.9812967649891373, "learning_rate": 3.601462844560187e-05, "loss": 0.1669, "step": 7063 }, { "epoch": 0.602627537962805, "grad_norm": 2.0455087076587843, "learning_rate": 3.6001365141121295e-05, "loss": 0.2566, "step": 7064 }, { "epoch": 0.602712847636922, "grad_norm": 1.5699230109611932, "learning_rate": 3.598810290536208e-05, "loss": 0.2544, "step": 7065 }, { "epoch": 0.602798157311039, "grad_norm": 1.4398469425027345, "learning_rate": 3.597484173933675e-05, "loss": 0.204, "step": 7066 }, { "epoch": 0.6028834669851562, "grad_norm": 1.7419479958165784, "learning_rate": 3.5961581644057744e-05, "loss": 0.2165, "step": 7067 }, { "epoch": 0.6029687766592732, "grad_norm": 1.5921926930710668, "learning_rate": 3.5948322620537357e-05, "loss": 0.2034, "step": 7068 }, { "epoch": 0.6030540863333902, "grad_norm": 1.6229011479404185, "learning_rate": 3.593506466978788e-05, "loss": 0.1961, "step": 7069 }, { "epoch": 0.6031393960075072, "grad_norm": 1.6585755486012963, "learning_rate": 3.592180779282146e-05, "loss": 0.219, "step": 7070 }, { "epoch": 0.6032247056816243, "grad_norm": 1.6071379585796968, "learning_rate": 3.590855199065023e-05, "loss": 0.1814, "step": 7071 }, { "epoch": 0.6033100153557414, "grad_norm": 1.5247290198538148, "learning_rate": 3.589529726428615e-05, "loss": 0.2566, "step": 7072 }, { "epoch": 0.6033953250298584, "grad_norm": 1.336587847972553, "learning_rate": 3.5882043614741165e-05, "loss": 0.1638, "step": 7073 }, { "epoch": 0.6034806347039754, "grad_norm": 1.938179881263922, "learning_rate": 3.586879104302716e-05, "loss": 0.2408, "step": 7074 }, { "epoch": 0.6035659443780925, "grad_norm": 1.4704557213263334, "learning_rate": 3.585553955015584e-05, "loss": 0.2207, "step": 7075 }, { "epoch": 0.6036512540522095, "grad_norm": 1.5512648436199634, "learning_rate": 3.584228913713891e-05, "loss": 0.1804, "step": 7076 }, { "epoch": 0.6037365637263266, "grad_norm": 1.5516296679507142, "learning_rate": 3.5829039804988e-05, "loss": 0.2402, "step": 7077 }, { "epoch": 0.6038218734004436, "grad_norm": 1.3954844062395901, "learning_rate": 3.5815791554714564e-05, "loss": 0.1645, "step": 7078 }, { "epoch": 0.6039071830745607, "grad_norm": 1.7409154242185996, "learning_rate": 3.580254438733008e-05, "loss": 0.2944, "step": 7079 }, { "epoch": 0.6039924927486777, "grad_norm": 1.6267845918299484, "learning_rate": 3.5789298303845884e-05, "loss": 0.2781, "step": 7080 }, { "epoch": 0.6040778024227947, "grad_norm": 1.5661864471328493, "learning_rate": 3.577605330527326e-05, "loss": 0.1924, "step": 7081 }, { "epoch": 0.6041631120969118, "grad_norm": 1.565787861589954, "learning_rate": 3.576280939262336e-05, "loss": 0.2374, "step": 7082 }, { "epoch": 0.6042484217710289, "grad_norm": 1.5741022791028874, "learning_rate": 3.574956656690731e-05, "loss": 0.2417, "step": 7083 }, { "epoch": 0.6043337314451459, "grad_norm": 1.5934679118319917, "learning_rate": 3.573632482913615e-05, "loss": 0.1543, "step": 7084 }, { "epoch": 0.6044190411192629, "grad_norm": 1.4257809155607484, "learning_rate": 3.572308418032076e-05, "loss": 0.2078, "step": 7085 }, { "epoch": 0.6045043507933799, "grad_norm": 1.7695971344057166, "learning_rate": 3.570984462147203e-05, "loss": 0.1952, "step": 7086 }, { "epoch": 0.604589660467497, "grad_norm": 1.746262032338374, "learning_rate": 3.5696606153600754e-05, "loss": 0.2312, "step": 7087 }, { "epoch": 0.6046749701416141, "grad_norm": 1.4095738441959014, "learning_rate": 3.568336877771756e-05, "loss": 0.1543, "step": 7088 }, { "epoch": 0.6047602798157311, "grad_norm": 1.9393923962200412, "learning_rate": 3.56701324948331e-05, "loss": 0.2324, "step": 7089 }, { "epoch": 0.6048455894898481, "grad_norm": 2.017967459825553, "learning_rate": 3.565689730595787e-05, "loss": 0.2491, "step": 7090 }, { "epoch": 0.6049308991639651, "grad_norm": 1.9734729851245387, "learning_rate": 3.5643663212102306e-05, "loss": 0.2337, "step": 7091 }, { "epoch": 0.6050162088380823, "grad_norm": 1.4191283641551193, "learning_rate": 3.563043021427677e-05, "loss": 0.1558, "step": 7092 }, { "epoch": 0.6051015185121993, "grad_norm": 1.762203851804246, "learning_rate": 3.561719831349153e-05, "loss": 0.2161, "step": 7093 }, { "epoch": 0.6051868281863163, "grad_norm": 1.8089091806551933, "learning_rate": 3.5603967510756764e-05, "loss": 0.2279, "step": 7094 }, { "epoch": 0.6052721378604333, "grad_norm": 1.5634734363989282, "learning_rate": 3.559073780708257e-05, "loss": 0.2328, "step": 7095 }, { "epoch": 0.6053574475345505, "grad_norm": 1.2811571413211014, "learning_rate": 3.5577509203479e-05, "loss": 0.2, "step": 7096 }, { "epoch": 0.6054427572086675, "grad_norm": 2.0435055087775753, "learning_rate": 3.556428170095593e-05, "loss": 0.2183, "step": 7097 }, { "epoch": 0.6055280668827845, "grad_norm": 1.8061970924099753, "learning_rate": 3.5551055300523254e-05, "loss": 0.2197, "step": 7098 }, { "epoch": 0.6056133765569015, "grad_norm": 2.023496413385838, "learning_rate": 3.553783000319072e-05, "loss": 0.276, "step": 7099 }, { "epoch": 0.6056986862310186, "grad_norm": 1.6269567885706802, "learning_rate": 3.552460580996803e-05, "loss": 0.2205, "step": 7100 }, { "epoch": 0.6057839959051357, "grad_norm": 1.5224947628939651, "learning_rate": 3.551138272186475e-05, "loss": 0.2149, "step": 7101 }, { "epoch": 0.6058693055792527, "grad_norm": 1.71676875276935, "learning_rate": 3.5498160739890404e-05, "loss": 0.2195, "step": 7102 }, { "epoch": 0.6059546152533697, "grad_norm": 1.7924720225684623, "learning_rate": 3.548493986505444e-05, "loss": 0.2396, "step": 7103 }, { "epoch": 0.6060399249274868, "grad_norm": 1.407848297578525, "learning_rate": 3.547172009836617e-05, "loss": 0.2133, "step": 7104 }, { "epoch": 0.6061252346016038, "grad_norm": 1.664859276515212, "learning_rate": 3.545850144083486e-05, "loss": 0.2555, "step": 7105 }, { "epoch": 0.6062105442757209, "grad_norm": 2.084874028487248, "learning_rate": 3.544528389346972e-05, "loss": 0.3101, "step": 7106 }, { "epoch": 0.6062958539498379, "grad_norm": 1.6386228424484084, "learning_rate": 3.5432067457279775e-05, "loss": 0.2414, "step": 7107 }, { "epoch": 0.606381163623955, "grad_norm": 1.7896026728706027, "learning_rate": 3.5418852133274084e-05, "loss": 0.288, "step": 7108 }, { "epoch": 0.606466473298072, "grad_norm": 1.834194703477827, "learning_rate": 3.5405637922461556e-05, "loss": 0.2039, "step": 7109 }, { "epoch": 0.606551782972189, "grad_norm": 1.63814058659549, "learning_rate": 3.5392424825851e-05, "loss": 0.1994, "step": 7110 }, { "epoch": 0.606637092646306, "grad_norm": 1.7467805676535282, "learning_rate": 3.5379212844451206e-05, "loss": 0.2101, "step": 7111 }, { "epoch": 0.6067224023204232, "grad_norm": 1.5889274337216217, "learning_rate": 3.5366001979270805e-05, "loss": 0.1476, "step": 7112 }, { "epoch": 0.6068077119945402, "grad_norm": 1.6555849575501282, "learning_rate": 3.5352792231318385e-05, "loss": 0.2265, "step": 7113 }, { "epoch": 0.6068930216686572, "grad_norm": 1.3866957111862586, "learning_rate": 3.5339583601602443e-05, "loss": 0.1816, "step": 7114 }, { "epoch": 0.6069783313427742, "grad_norm": 1.7439436336735257, "learning_rate": 3.532637609113138e-05, "loss": 0.2292, "step": 7115 }, { "epoch": 0.6070636410168914, "grad_norm": 1.3485232011031825, "learning_rate": 3.531316970091355e-05, "loss": 0.227, "step": 7116 }, { "epoch": 0.6071489506910084, "grad_norm": 1.885627636425589, "learning_rate": 3.529996443195714e-05, "loss": 0.2473, "step": 7117 }, { "epoch": 0.6072342603651254, "grad_norm": 1.643357753004793, "learning_rate": 3.528676028527035e-05, "loss": 0.2142, "step": 7118 }, { "epoch": 0.6073195700392424, "grad_norm": 1.6807220821351485, "learning_rate": 3.527355726186123e-05, "loss": 0.1983, "step": 7119 }, { "epoch": 0.6074048797133595, "grad_norm": 1.313973598670782, "learning_rate": 3.526035536273774e-05, "loss": 0.1865, "step": 7120 }, { "epoch": 0.6074901893874766, "grad_norm": 2.0417167371980947, "learning_rate": 3.52471545889078e-05, "loss": 0.1977, "step": 7121 }, { "epoch": 0.6075754990615936, "grad_norm": 1.74823460769587, "learning_rate": 3.523395494137921e-05, "loss": 0.2065, "step": 7122 }, { "epoch": 0.6076608087357106, "grad_norm": 1.747512684613689, "learning_rate": 3.5220756421159696e-05, "loss": 0.1981, "step": 7123 }, { "epoch": 0.6077461184098277, "grad_norm": 1.7192107536714487, "learning_rate": 3.520755902925689e-05, "loss": 0.1823, "step": 7124 }, { "epoch": 0.6078314280839447, "grad_norm": 2.0910822255748314, "learning_rate": 3.519436276667836e-05, "loss": 0.2179, "step": 7125 }, { "epoch": 0.6079167377580618, "grad_norm": 1.458671730380245, "learning_rate": 3.518116763443153e-05, "loss": 0.2156, "step": 7126 }, { "epoch": 0.6080020474321788, "grad_norm": 1.635168028313819, "learning_rate": 3.5167973633523804e-05, "loss": 0.2495, "step": 7127 }, { "epoch": 0.6080873571062958, "grad_norm": 2.098905450676935, "learning_rate": 3.515478076496248e-05, "loss": 0.2773, "step": 7128 }, { "epoch": 0.6081726667804129, "grad_norm": 1.6193837166260454, "learning_rate": 3.5141589029754776e-05, "loss": 0.2201, "step": 7129 }, { "epoch": 0.60825797645453, "grad_norm": 1.692737927300396, "learning_rate": 3.5128398428907766e-05, "loss": 0.2096, "step": 7130 }, { "epoch": 0.608343286128647, "grad_norm": 1.913850235360299, "learning_rate": 3.511520896342852e-05, "loss": 0.1975, "step": 7131 }, { "epoch": 0.608428595802764, "grad_norm": 1.3840669448515885, "learning_rate": 3.510202063432397e-05, "loss": 0.2497, "step": 7132 }, { "epoch": 0.6085139054768811, "grad_norm": 1.7355641592789104, "learning_rate": 3.5088833442600985e-05, "loss": 0.2138, "step": 7133 }, { "epoch": 0.6085992151509981, "grad_norm": 1.753430614250558, "learning_rate": 3.507564738926632e-05, "loss": 0.2318, "step": 7134 }, { "epoch": 0.6086845248251151, "grad_norm": 2.00515880440979, "learning_rate": 3.506246247532669e-05, "loss": 0.1665, "step": 7135 }, { "epoch": 0.6087698344992322, "grad_norm": 1.6161772295153707, "learning_rate": 3.504927870178863e-05, "loss": 0.1968, "step": 7136 }, { "epoch": 0.6088551441733493, "grad_norm": 1.331815551118445, "learning_rate": 3.503609606965872e-05, "loss": 0.1479, "step": 7137 }, { "epoch": 0.6089404538474663, "grad_norm": 2.051978003675863, "learning_rate": 3.5022914579943365e-05, "loss": 0.1681, "step": 7138 }, { "epoch": 0.6090257635215833, "grad_norm": 1.9596234664468128, "learning_rate": 3.500973423364887e-05, "loss": 0.2019, "step": 7139 }, { "epoch": 0.6091110731957003, "grad_norm": 1.3282696364688995, "learning_rate": 3.4996555031781516e-05, "loss": 0.2212, "step": 7140 }, { "epoch": 0.6091963828698175, "grad_norm": 1.7005447300336487, "learning_rate": 3.498337697534747e-05, "loss": 0.2209, "step": 7141 }, { "epoch": 0.6092816925439345, "grad_norm": 1.8502125385988177, "learning_rate": 3.497020006535278e-05, "loss": 0.2228, "step": 7142 }, { "epoch": 0.6093670022180515, "grad_norm": 1.9209601853438159, "learning_rate": 3.4957024302803453e-05, "loss": 0.2204, "step": 7143 }, { "epoch": 0.6094523118921685, "grad_norm": 1.7912093029736436, "learning_rate": 3.4943849688705376e-05, "loss": 0.2074, "step": 7144 }, { "epoch": 0.6095376215662857, "grad_norm": 1.818889470995856, "learning_rate": 3.493067622406439e-05, "loss": 0.2391, "step": 7145 }, { "epoch": 0.6096229312404027, "grad_norm": 1.5096396014875189, "learning_rate": 3.491750390988616e-05, "loss": 0.2398, "step": 7146 }, { "epoch": 0.6097082409145197, "grad_norm": 1.905752586129796, "learning_rate": 3.4904332747176373e-05, "loss": 0.2186, "step": 7147 }, { "epoch": 0.6097935505886367, "grad_norm": 2.135693515106114, "learning_rate": 3.4891162736940584e-05, "loss": 0.23, "step": 7148 }, { "epoch": 0.6098788602627538, "grad_norm": 1.686663526273274, "learning_rate": 3.48779938801842e-05, "loss": 0.2791, "step": 7149 }, { "epoch": 0.6099641699368709, "grad_norm": 1.300933187506458, "learning_rate": 3.486482617791263e-05, "loss": 0.1628, "step": 7150 }, { "epoch": 0.6100494796109879, "grad_norm": 1.9635977159455367, "learning_rate": 3.485165963113118e-05, "loss": 0.2974, "step": 7151 }, { "epoch": 0.6101347892851049, "grad_norm": 1.7048553635896795, "learning_rate": 3.483849424084499e-05, "loss": 0.2186, "step": 7152 }, { "epoch": 0.610220098959222, "grad_norm": 1.8182974865273611, "learning_rate": 3.482533000805921e-05, "loss": 0.2495, "step": 7153 }, { "epoch": 0.610305408633339, "grad_norm": 1.6146565410465576, "learning_rate": 3.4812166933778844e-05, "loss": 0.2592, "step": 7154 }, { "epoch": 0.6103907183074561, "grad_norm": 1.9093367802217651, "learning_rate": 3.4799005019008826e-05, "loss": 0.2547, "step": 7155 }, { "epoch": 0.6104760279815731, "grad_norm": 1.571203178491116, "learning_rate": 3.478584426475399e-05, "loss": 0.2474, "step": 7156 }, { "epoch": 0.6105613376556902, "grad_norm": 1.373108429702421, "learning_rate": 3.477268467201911e-05, "loss": 0.2715, "step": 7157 }, { "epoch": 0.6106466473298072, "grad_norm": 1.4849608219000678, "learning_rate": 3.475952624180882e-05, "loss": 0.1946, "step": 7158 }, { "epoch": 0.6107319570039242, "grad_norm": 1.4727268353856526, "learning_rate": 3.4746368975127716e-05, "loss": 0.1999, "step": 7159 }, { "epoch": 0.6108172666780413, "grad_norm": 1.6498326187777081, "learning_rate": 3.473321287298028e-05, "loss": 0.2455, "step": 7160 }, { "epoch": 0.6109025763521584, "grad_norm": 1.4369972842674636, "learning_rate": 3.472005793637094e-05, "loss": 0.2541, "step": 7161 }, { "epoch": 0.6109878860262754, "grad_norm": 2.182480202286844, "learning_rate": 3.470690416630395e-05, "loss": 0.2699, "step": 7162 }, { "epoch": 0.6110731957003924, "grad_norm": 1.6192267640027878, "learning_rate": 3.4693751563783574e-05, "loss": 0.2887, "step": 7163 }, { "epoch": 0.6111585053745094, "grad_norm": 2.00179150929201, "learning_rate": 3.4680600129813926e-05, "loss": 0.2724, "step": 7164 }, { "epoch": 0.6112438150486265, "grad_norm": 1.743451399856299, "learning_rate": 3.4667449865399054e-05, "loss": 0.1754, "step": 7165 }, { "epoch": 0.6113291247227436, "grad_norm": 1.5576897391998183, "learning_rate": 3.4654300771542896e-05, "loss": 0.2411, "step": 7166 }, { "epoch": 0.6114144343968606, "grad_norm": 1.181593208307416, "learning_rate": 3.4641152849249346e-05, "loss": 0.1891, "step": 7167 }, { "epoch": 0.6114997440709776, "grad_norm": 1.6116660393783049, "learning_rate": 3.462800609952214e-05, "loss": 0.2188, "step": 7168 }, { "epoch": 0.6115850537450946, "grad_norm": 1.6132753385938912, "learning_rate": 3.461486052336499e-05, "loss": 0.1824, "step": 7169 }, { "epoch": 0.6116703634192118, "grad_norm": 1.5642187202475712, "learning_rate": 3.460171612178149e-05, "loss": 0.2173, "step": 7170 }, { "epoch": 0.6117556730933288, "grad_norm": 1.7516839236284438, "learning_rate": 3.4588572895775126e-05, "loss": 0.1875, "step": 7171 }, { "epoch": 0.6118409827674458, "grad_norm": 1.3006140799112091, "learning_rate": 3.457543084634932e-05, "loss": 0.255, "step": 7172 }, { "epoch": 0.6119262924415628, "grad_norm": 1.4092831431564201, "learning_rate": 3.456228997450741e-05, "loss": 0.2207, "step": 7173 }, { "epoch": 0.61201160211568, "grad_norm": 2.1094040480132437, "learning_rate": 3.4549150281252636e-05, "loss": 0.1745, "step": 7174 }, { "epoch": 0.612096911789797, "grad_norm": 1.5917731347683282, "learning_rate": 3.453601176758813e-05, "loss": 0.2537, "step": 7175 }, { "epoch": 0.612182221463914, "grad_norm": 1.3495549634020705, "learning_rate": 3.452287443451693e-05, "loss": 0.2204, "step": 7176 }, { "epoch": 0.612267531138031, "grad_norm": 1.3748388629474873, "learning_rate": 3.4509738283042046e-05, "loss": 0.1917, "step": 7177 }, { "epoch": 0.6123528408121481, "grad_norm": 1.6697302635866689, "learning_rate": 3.449660331416631e-05, "loss": 0.2076, "step": 7178 }, { "epoch": 0.6124381504862652, "grad_norm": 1.3134305470854604, "learning_rate": 3.448346952889253e-05, "loss": 0.1822, "step": 7179 }, { "epoch": 0.6125234601603822, "grad_norm": 1.3962074271928706, "learning_rate": 3.447033692822341e-05, "loss": 0.2196, "step": 7180 }, { "epoch": 0.6126087698344992, "grad_norm": 1.4460300623194497, "learning_rate": 3.445720551316152e-05, "loss": 0.21, "step": 7181 }, { "epoch": 0.6126940795086163, "grad_norm": 1.5883741780353466, "learning_rate": 3.44440752847094e-05, "loss": 0.2454, "step": 7182 }, { "epoch": 0.6127793891827333, "grad_norm": 1.898834940433729, "learning_rate": 3.443094624386949e-05, "loss": 0.1908, "step": 7183 }, { "epoch": 0.6128646988568504, "grad_norm": 1.5895064462421231, "learning_rate": 3.441781839164408e-05, "loss": 0.174, "step": 7184 }, { "epoch": 0.6129500085309674, "grad_norm": 1.652462353068325, "learning_rate": 3.440469172903543e-05, "loss": 0.1851, "step": 7185 }, { "epoch": 0.6130353182050845, "grad_norm": 1.3666780230972981, "learning_rate": 3.4391566257045705e-05, "loss": 0.196, "step": 7186 }, { "epoch": 0.6131206278792015, "grad_norm": 1.2725189266033896, "learning_rate": 3.437844197667696e-05, "loss": 0.1838, "step": 7187 }, { "epoch": 0.6132059375533185, "grad_norm": 2.490964583504634, "learning_rate": 3.436531888893113e-05, "loss": 0.2621, "step": 7188 }, { "epoch": 0.6132912472274356, "grad_norm": 1.9686380990333954, "learning_rate": 3.4352196994810125e-05, "loss": 0.2545, "step": 7189 }, { "epoch": 0.6133765569015527, "grad_norm": 1.6232093334946784, "learning_rate": 3.433907629531575e-05, "loss": 0.2466, "step": 7190 }, { "epoch": 0.6134618665756697, "grad_norm": 1.2580797373574206, "learning_rate": 3.432595679144966e-05, "loss": 0.2248, "step": 7191 }, { "epoch": 0.6135471762497867, "grad_norm": 1.6790548863212593, "learning_rate": 3.431283848421347e-05, "loss": 0.3304, "step": 7192 }, { "epoch": 0.6136324859239037, "grad_norm": 1.5179645532382156, "learning_rate": 3.429972137460873e-05, "loss": 0.2357, "step": 7193 }, { "epoch": 0.6137177955980209, "grad_norm": 1.8916238715530747, "learning_rate": 3.4286605463636804e-05, "loss": 0.2377, "step": 7194 }, { "epoch": 0.6138031052721379, "grad_norm": 1.7200682699715837, "learning_rate": 3.4273490752299064e-05, "loss": 0.2347, "step": 7195 }, { "epoch": 0.6138884149462549, "grad_norm": 1.3801869612664535, "learning_rate": 3.426037724159673e-05, "loss": 0.3335, "step": 7196 }, { "epoch": 0.6139737246203719, "grad_norm": 1.4794032306177403, "learning_rate": 3.4247264932530964e-05, "loss": 0.1827, "step": 7197 }, { "epoch": 0.614059034294489, "grad_norm": 1.5606480877153488, "learning_rate": 3.4234153826102787e-05, "loss": 0.2431, "step": 7198 }, { "epoch": 0.6141443439686061, "grad_norm": 1.4596830479940488, "learning_rate": 3.422104392331322e-05, "loss": 0.2144, "step": 7199 }, { "epoch": 0.6142296536427231, "grad_norm": 1.3923787406297856, "learning_rate": 3.4207935225163066e-05, "loss": 0.1947, "step": 7200 }, { "epoch": 0.6143149633168401, "grad_norm": 1.6280573913243357, "learning_rate": 3.419482773265314e-05, "loss": 0.1929, "step": 7201 }, { "epoch": 0.6144002729909571, "grad_norm": 2.2776006485005866, "learning_rate": 3.418172144678416e-05, "loss": 0.1785, "step": 7202 }, { "epoch": 0.6144855826650742, "grad_norm": 1.895356869473053, "learning_rate": 3.416861636855666e-05, "loss": 0.2472, "step": 7203 }, { "epoch": 0.6145708923391913, "grad_norm": 1.7025029157481943, "learning_rate": 3.415551249897117e-05, "loss": 0.2363, "step": 7204 }, { "epoch": 0.6146562020133083, "grad_norm": 1.7534188525782721, "learning_rate": 3.4142409839028125e-05, "loss": 0.2463, "step": 7205 }, { "epoch": 0.6147415116874253, "grad_norm": 1.3906244856587004, "learning_rate": 3.412930838972781e-05, "loss": 0.2101, "step": 7206 }, { "epoch": 0.6148268213615424, "grad_norm": 1.9100157641214652, "learning_rate": 3.4116208152070473e-05, "loss": 0.2768, "step": 7207 }, { "epoch": 0.6149121310356594, "grad_norm": 1.6158735309156527, "learning_rate": 3.410310912705622e-05, "loss": 0.1868, "step": 7208 }, { "epoch": 0.6149974407097765, "grad_norm": 1.5060776924569836, "learning_rate": 3.409001131568513e-05, "loss": 0.1964, "step": 7209 }, { "epoch": 0.6150827503838935, "grad_norm": 2.0841077001202897, "learning_rate": 3.407691471895711e-05, "loss": 0.3119, "step": 7210 }, { "epoch": 0.6151680600580106, "grad_norm": 1.7964384129088622, "learning_rate": 3.406381933787204e-05, "loss": 0.1815, "step": 7211 }, { "epoch": 0.6152533697321276, "grad_norm": 1.6138824746549207, "learning_rate": 3.4050725173429695e-05, "loss": 0.265, "step": 7212 }, { "epoch": 0.6153386794062446, "grad_norm": 1.3716476495350756, "learning_rate": 3.4037632226629704e-05, "loss": 0.1882, "step": 7213 }, { "epoch": 0.6154239890803617, "grad_norm": 1.3882455077370852, "learning_rate": 3.402454049847168e-05, "loss": 0.1949, "step": 7214 }, { "epoch": 0.6155092987544788, "grad_norm": 1.9908821167470472, "learning_rate": 3.4011449989955105e-05, "loss": 0.2893, "step": 7215 }, { "epoch": 0.6155946084285958, "grad_norm": 1.7277790461403026, "learning_rate": 3.3998360702079345e-05, "loss": 0.2117, "step": 7216 }, { "epoch": 0.6156799181027128, "grad_norm": 1.6426459807578673, "learning_rate": 3.3985272635843725e-05, "loss": 0.2418, "step": 7217 }, { "epoch": 0.6157652277768298, "grad_norm": 1.8886473850370598, "learning_rate": 3.3972185792247424e-05, "loss": 0.2534, "step": 7218 }, { "epoch": 0.615850537450947, "grad_norm": 1.7902697358872732, "learning_rate": 3.395910017228958e-05, "loss": 0.2375, "step": 7219 }, { "epoch": 0.615935847125064, "grad_norm": 1.3416264188349003, "learning_rate": 3.3946015776969185e-05, "loss": 0.2053, "step": 7220 }, { "epoch": 0.616021156799181, "grad_norm": 1.4775357545763945, "learning_rate": 3.393293260728517e-05, "loss": 0.2091, "step": 7221 }, { "epoch": 0.616106466473298, "grad_norm": 1.579350967858409, "learning_rate": 3.39198506642364e-05, "loss": 0.2704, "step": 7222 }, { "epoch": 0.6161917761474152, "grad_norm": 1.7573310023052595, "learning_rate": 3.390676994882155e-05, "loss": 0.2486, "step": 7223 }, { "epoch": 0.6162770858215322, "grad_norm": 1.9663871027636037, "learning_rate": 3.3893690462039305e-05, "loss": 0.265, "step": 7224 }, { "epoch": 0.6163623954956492, "grad_norm": 1.3168674645844227, "learning_rate": 3.388061220488822e-05, "loss": 0.1711, "step": 7225 }, { "epoch": 0.6164477051697662, "grad_norm": 1.5410069609230457, "learning_rate": 3.386753517836671e-05, "loss": 0.1736, "step": 7226 }, { "epoch": 0.6165330148438833, "grad_norm": 2.125058341627887, "learning_rate": 3.3854459383473174e-05, "loss": 0.2477, "step": 7227 }, { "epoch": 0.6166183245180004, "grad_norm": 1.6714551166666876, "learning_rate": 3.384138482120587e-05, "loss": 0.2317, "step": 7228 }, { "epoch": 0.6167036341921174, "grad_norm": 1.5900592329880043, "learning_rate": 3.382831149256297e-05, "loss": 0.213, "step": 7229 }, { "epoch": 0.6167889438662344, "grad_norm": 1.8174730675130386, "learning_rate": 3.381523939854253e-05, "loss": 0.2113, "step": 7230 }, { "epoch": 0.6168742535403515, "grad_norm": 1.6989470165943086, "learning_rate": 3.380216854014259e-05, "loss": 0.2781, "step": 7231 }, { "epoch": 0.6169595632144685, "grad_norm": 1.9035046929997237, "learning_rate": 3.378909891836098e-05, "loss": 0.2426, "step": 7232 }, { "epoch": 0.6170448728885856, "grad_norm": 1.5955516131596457, "learning_rate": 3.377603053419552e-05, "loss": 0.2282, "step": 7233 }, { "epoch": 0.6171301825627026, "grad_norm": 2.236439851193815, "learning_rate": 3.376296338864392e-05, "loss": 0.2979, "step": 7234 }, { "epoch": 0.6172154922368197, "grad_norm": 1.485475313046355, "learning_rate": 3.3749897482703794e-05, "loss": 0.1908, "step": 7235 }, { "epoch": 0.6173008019109367, "grad_norm": 2.0619048213507747, "learning_rate": 3.373683281737263e-05, "loss": 0.2248, "step": 7236 }, { "epoch": 0.6173861115850537, "grad_norm": 1.5371940618546733, "learning_rate": 3.372376939364787e-05, "loss": 0.2292, "step": 7237 }, { "epoch": 0.6174714212591708, "grad_norm": 1.3527782462813558, "learning_rate": 3.371070721252682e-05, "loss": 0.2191, "step": 7238 }, { "epoch": 0.6175567309332879, "grad_norm": 1.4521734752433393, "learning_rate": 3.369764627500671e-05, "loss": 0.2569, "step": 7239 }, { "epoch": 0.6176420406074049, "grad_norm": 1.8582884875760979, "learning_rate": 3.3684586582084674e-05, "loss": 0.2877, "step": 7240 }, { "epoch": 0.6177273502815219, "grad_norm": 1.8145007567626181, "learning_rate": 3.3671528134757766e-05, "loss": 0.2675, "step": 7241 }, { "epoch": 0.6178126599556389, "grad_norm": 2.0562251034420793, "learning_rate": 3.36584709340229e-05, "loss": 0.2649, "step": 7242 }, { "epoch": 0.617897969629756, "grad_norm": 1.5985825518500274, "learning_rate": 3.3645414980876946e-05, "loss": 0.2313, "step": 7243 }, { "epoch": 0.6179832793038731, "grad_norm": 1.6103500079349304, "learning_rate": 3.363236027631665e-05, "loss": 0.2582, "step": 7244 }, { "epoch": 0.6180685889779901, "grad_norm": 1.3249067021757475, "learning_rate": 3.361930682133867e-05, "loss": 0.2316, "step": 7245 }, { "epoch": 0.6181538986521071, "grad_norm": 1.3371309894371635, "learning_rate": 3.3606254616939555e-05, "loss": 0.2132, "step": 7246 }, { "epoch": 0.6182392083262241, "grad_norm": 1.7516128736971985, "learning_rate": 3.3593203664115807e-05, "loss": 0.2312, "step": 7247 }, { "epoch": 0.6183245180003413, "grad_norm": 1.5077771592816331, "learning_rate": 3.358015396386376e-05, "loss": 0.2624, "step": 7248 }, { "epoch": 0.6184098276744583, "grad_norm": 1.6638659867140064, "learning_rate": 3.3567105517179696e-05, "loss": 0.2759, "step": 7249 }, { "epoch": 0.6184951373485753, "grad_norm": 1.7326347624534884, "learning_rate": 3.3554058325059806e-05, "loss": 0.2613, "step": 7250 }, { "epoch": 0.6185804470226923, "grad_norm": 1.6443742328266484, "learning_rate": 3.354101238850019e-05, "loss": 0.1903, "step": 7251 }, { "epoch": 0.6186657566968095, "grad_norm": 1.5237373252635662, "learning_rate": 3.352796770849679e-05, "loss": 0.2168, "step": 7252 }, { "epoch": 0.6187510663709265, "grad_norm": 1.3758920463558915, "learning_rate": 3.3514924286045526e-05, "loss": 0.2074, "step": 7253 }, { "epoch": 0.6188363760450435, "grad_norm": 1.6842604730910955, "learning_rate": 3.35018821221422e-05, "loss": 0.2213, "step": 7254 }, { "epoch": 0.6189216857191605, "grad_norm": 2.090187115848783, "learning_rate": 3.3488841217782493e-05, "loss": 0.2899, "step": 7255 }, { "epoch": 0.6190069953932776, "grad_norm": 1.768612385171475, "learning_rate": 3.347580157396202e-05, "loss": 0.1754, "step": 7256 }, { "epoch": 0.6190923050673947, "grad_norm": 1.335537900891743, "learning_rate": 3.3462763191676305e-05, "loss": 0.2343, "step": 7257 }, { "epoch": 0.6191776147415117, "grad_norm": 1.5574406921459327, "learning_rate": 3.3449726071920724e-05, "loss": 0.2654, "step": 7258 }, { "epoch": 0.6192629244156287, "grad_norm": 1.7317669478829376, "learning_rate": 3.3436690215690615e-05, "loss": 0.2174, "step": 7259 }, { "epoch": 0.6193482340897458, "grad_norm": 2.2773332808239206, "learning_rate": 3.34236556239812e-05, "loss": 0.2003, "step": 7260 }, { "epoch": 0.6194335437638628, "grad_norm": 1.4638111594931698, "learning_rate": 3.3410622297787574e-05, "loss": 0.1942, "step": 7261 }, { "epoch": 0.6195188534379799, "grad_norm": 1.364514642787154, "learning_rate": 3.339759023810478e-05, "loss": 0.2241, "step": 7262 }, { "epoch": 0.6196041631120969, "grad_norm": 1.9476489394777938, "learning_rate": 3.3384559445927746e-05, "loss": 0.2186, "step": 7263 }, { "epoch": 0.619689472786214, "grad_norm": 1.816640531136101, "learning_rate": 3.337152992225133e-05, "loss": 0.2251, "step": 7264 }, { "epoch": 0.619774782460331, "grad_norm": 1.2833333638839388, "learning_rate": 3.335850166807021e-05, "loss": 0.1431, "step": 7265 }, { "epoch": 0.619860092134448, "grad_norm": 1.656017503074621, "learning_rate": 3.3345474684379064e-05, "loss": 0.2711, "step": 7266 }, { "epoch": 0.619945401808565, "grad_norm": 1.968574576661078, "learning_rate": 3.3332448972172434e-05, "loss": 0.2512, "step": 7267 }, { "epoch": 0.6200307114826822, "grad_norm": 1.9476366980864666, "learning_rate": 3.3319424532444745e-05, "loss": 0.3656, "step": 7268 }, { "epoch": 0.6201160211567992, "grad_norm": 1.6894908746459938, "learning_rate": 3.3306401366190354e-05, "loss": 0.2212, "step": 7269 }, { "epoch": 0.6202013308309162, "grad_norm": 2.005977044001753, "learning_rate": 3.3293379474403514e-05, "loss": 0.2255, "step": 7270 }, { "epoch": 0.6202866405050332, "grad_norm": 1.7534986672332458, "learning_rate": 3.328035885807837e-05, "loss": 0.1669, "step": 7271 }, { "epoch": 0.6203719501791504, "grad_norm": 1.892159713931708, "learning_rate": 3.3267339518208976e-05, "loss": 0.2468, "step": 7272 }, { "epoch": 0.6204572598532674, "grad_norm": 1.832111348757885, "learning_rate": 3.32543214557893e-05, "loss": 0.199, "step": 7273 }, { "epoch": 0.6205425695273844, "grad_norm": 1.6545297578110894, "learning_rate": 3.324130467181318e-05, "loss": 0.2321, "step": 7274 }, { "epoch": 0.6206278792015014, "grad_norm": 1.456349885977598, "learning_rate": 3.322828916727439e-05, "loss": 0.238, "step": 7275 }, { "epoch": 0.6207131888756185, "grad_norm": 1.9093338457782598, "learning_rate": 3.321527494316662e-05, "loss": 0.2675, "step": 7276 }, { "epoch": 0.6207984985497356, "grad_norm": 1.6494149297844078, "learning_rate": 3.32022620004834e-05, "loss": 0.2574, "step": 7277 }, { "epoch": 0.6208838082238526, "grad_norm": 1.5723092967537282, "learning_rate": 3.3189250340218204e-05, "loss": 0.2476, "step": 7278 }, { "epoch": 0.6209691178979696, "grad_norm": 1.7892261413629422, "learning_rate": 3.317623996336443e-05, "loss": 0.2521, "step": 7279 }, { "epoch": 0.6210544275720866, "grad_norm": 2.072755919187038, "learning_rate": 3.316323087091532e-05, "loss": 0.1646, "step": 7280 }, { "epoch": 0.6211397372462037, "grad_norm": 1.6106101916235263, "learning_rate": 3.315022306386407e-05, "loss": 0.2247, "step": 7281 }, { "epoch": 0.6212250469203208, "grad_norm": 1.6412722809347615, "learning_rate": 3.313721654320375e-05, "loss": 0.1841, "step": 7282 }, { "epoch": 0.6213103565944378, "grad_norm": 2.1386749964303706, "learning_rate": 3.312421130992734e-05, "loss": 0.259, "step": 7283 }, { "epoch": 0.6213956662685548, "grad_norm": 1.6468775450598145, "learning_rate": 3.311120736502771e-05, "loss": 0.2095, "step": 7284 }, { "epoch": 0.6214809759426719, "grad_norm": 1.717814520365745, "learning_rate": 3.309820470949765e-05, "loss": 0.19, "step": 7285 }, { "epoch": 0.621566285616789, "grad_norm": 1.7537786696490778, "learning_rate": 3.3085203344329865e-05, "loss": 0.222, "step": 7286 }, { "epoch": 0.621651595290906, "grad_norm": 1.7186069775809396, "learning_rate": 3.30722032705169e-05, "loss": 0.1904, "step": 7287 }, { "epoch": 0.621736904965023, "grad_norm": 1.5496796123029282, "learning_rate": 3.305920448905125e-05, "loss": 0.2307, "step": 7288 }, { "epoch": 0.6218222146391401, "grad_norm": 1.5217940636013563, "learning_rate": 3.304620700092535e-05, "loss": 0.1994, "step": 7289 }, { "epoch": 0.6219075243132571, "grad_norm": 1.8265856921679091, "learning_rate": 3.303321080713143e-05, "loss": 0.2177, "step": 7290 }, { "epoch": 0.6219928339873741, "grad_norm": 1.6684953273791214, "learning_rate": 3.30202159086617e-05, "loss": 0.2564, "step": 7291 }, { "epoch": 0.6220781436614912, "grad_norm": 1.6800647400688922, "learning_rate": 3.300722230650827e-05, "loss": 0.2252, "step": 7292 }, { "epoch": 0.6221634533356083, "grad_norm": 1.9010925564856633, "learning_rate": 3.2994230001663104e-05, "loss": 0.2379, "step": 7293 }, { "epoch": 0.6222487630097253, "grad_norm": 1.7113851022715423, "learning_rate": 3.298123899511811e-05, "loss": 0.2771, "step": 7294 }, { "epoch": 0.6223340726838423, "grad_norm": 1.465620074233377, "learning_rate": 3.2968249287865084e-05, "loss": 0.2149, "step": 7295 }, { "epoch": 0.6224193823579593, "grad_norm": 1.6729942211956539, "learning_rate": 3.295526088089573e-05, "loss": 0.2176, "step": 7296 }, { "epoch": 0.6225046920320765, "grad_norm": 1.4190795582895015, "learning_rate": 3.294227377520161e-05, "loss": 0.2637, "step": 7297 }, { "epoch": 0.6225900017061935, "grad_norm": 1.55900411533598, "learning_rate": 3.292928797177425e-05, "loss": 0.1942, "step": 7298 }, { "epoch": 0.6226753113803105, "grad_norm": 1.6988300448907006, "learning_rate": 3.291630347160505e-05, "loss": 0.2314, "step": 7299 }, { "epoch": 0.6227606210544275, "grad_norm": 1.8949676325708602, "learning_rate": 3.290332027568529e-05, "loss": 0.2772, "step": 7300 }, { "epoch": 0.6228459307285447, "grad_norm": 2.073372821043409, "learning_rate": 3.289033838500618e-05, "loss": 0.2753, "step": 7301 }, { "epoch": 0.6229312404026617, "grad_norm": 1.6932741207498267, "learning_rate": 3.2877357800558804e-05, "loss": 0.1959, "step": 7302 }, { "epoch": 0.6230165500767787, "grad_norm": 1.4627247857694963, "learning_rate": 3.286437852333418e-05, "loss": 0.244, "step": 7303 }, { "epoch": 0.6231018597508957, "grad_norm": 1.8670714912927202, "learning_rate": 3.2851400554323184e-05, "loss": 0.2094, "step": 7304 }, { "epoch": 0.6231871694250128, "grad_norm": 1.8008607528248994, "learning_rate": 3.2838423894516656e-05, "loss": 0.2488, "step": 7305 }, { "epoch": 0.6232724790991299, "grad_norm": 1.6819154379699257, "learning_rate": 3.282544854490524e-05, "loss": 0.229, "step": 7306 }, { "epoch": 0.6233577887732469, "grad_norm": 1.198909508412235, "learning_rate": 3.281247450647956e-05, "loss": 0.193, "step": 7307 }, { "epoch": 0.6234430984473639, "grad_norm": 1.359783418183372, "learning_rate": 3.279950178023012e-05, "loss": 0.2173, "step": 7308 }, { "epoch": 0.623528408121481, "grad_norm": 1.7953876476313781, "learning_rate": 3.2786530367147336e-05, "loss": 0.1852, "step": 7309 }, { "epoch": 0.623613717795598, "grad_norm": 1.7192822412549384, "learning_rate": 3.277356026822147e-05, "loss": 0.1996, "step": 7310 }, { "epoch": 0.6236990274697151, "grad_norm": 1.7494187070804705, "learning_rate": 3.2760591484442735e-05, "loss": 0.2583, "step": 7311 }, { "epoch": 0.6237843371438321, "grad_norm": 1.427284848355554, "learning_rate": 3.274762401680124e-05, "loss": 0.208, "step": 7312 }, { "epoch": 0.6238696468179492, "grad_norm": 1.8344957900041883, "learning_rate": 3.2734657866286974e-05, "loss": 0.1969, "step": 7313 }, { "epoch": 0.6239549564920662, "grad_norm": 1.8254309472027068, "learning_rate": 3.272169303388982e-05, "loss": 0.2795, "step": 7314 }, { "epoch": 0.6240402661661832, "grad_norm": 1.5508810246654114, "learning_rate": 3.27087295205996e-05, "loss": 0.193, "step": 7315 }, { "epoch": 0.6241255758403003, "grad_norm": 1.629937373871438, "learning_rate": 3.269576732740598e-05, "loss": 0.2295, "step": 7316 }, { "epoch": 0.6242108855144173, "grad_norm": 1.8606582188612102, "learning_rate": 3.268280645529857e-05, "loss": 0.1989, "step": 7317 }, { "epoch": 0.6242961951885344, "grad_norm": 1.9763832463338582, "learning_rate": 3.2669846905266885e-05, "loss": 0.2028, "step": 7318 }, { "epoch": 0.6243815048626514, "grad_norm": 1.4663382675516843, "learning_rate": 3.265688867830027e-05, "loss": 0.2114, "step": 7319 }, { "epoch": 0.6244668145367684, "grad_norm": 1.541877147980776, "learning_rate": 3.264393177538805e-05, "loss": 0.1623, "step": 7320 }, { "epoch": 0.6245521242108855, "grad_norm": 1.8274228956705592, "learning_rate": 3.2630976197519424e-05, "loss": 0.2256, "step": 7321 }, { "epoch": 0.6246374338850026, "grad_norm": 1.538087332588653, "learning_rate": 3.2618021945683455e-05, "loss": 0.2504, "step": 7322 }, { "epoch": 0.6247227435591196, "grad_norm": 1.7485308611150472, "learning_rate": 3.2605069020869136e-05, "loss": 0.1968, "step": 7323 }, { "epoch": 0.6248080532332366, "grad_norm": 1.7227487690619085, "learning_rate": 3.259211742406537e-05, "loss": 0.2144, "step": 7324 }, { "epoch": 0.6248933629073536, "grad_norm": 1.7776134244459987, "learning_rate": 3.2579167156260934e-05, "loss": 0.2224, "step": 7325 }, { "epoch": 0.6249786725814708, "grad_norm": 1.3701860950241609, "learning_rate": 3.256621821844451e-05, "loss": 0.2806, "step": 7326 }, { "epoch": 0.6250639822555878, "grad_norm": 1.8186274247960568, "learning_rate": 3.255327061160467e-05, "loss": 0.3253, "step": 7327 }, { "epoch": 0.6251492919297048, "grad_norm": 1.2729164414892087, "learning_rate": 3.2540324336729935e-05, "loss": 0.2412, "step": 7328 }, { "epoch": 0.6252346016038218, "grad_norm": 1.5450233397217772, "learning_rate": 3.2527379394808635e-05, "loss": 0.2318, "step": 7329 }, { "epoch": 0.625319911277939, "grad_norm": 1.659041158136859, "learning_rate": 3.251443578682906e-05, "loss": 0.2945, "step": 7330 }, { "epoch": 0.625405220952056, "grad_norm": 1.3726965944443694, "learning_rate": 3.250149351377942e-05, "loss": 0.1533, "step": 7331 }, { "epoch": 0.625490530626173, "grad_norm": 1.4538888871818065, "learning_rate": 3.2488552576647746e-05, "loss": 0.1711, "step": 7332 }, { "epoch": 0.62557584030029, "grad_norm": 1.7472629941958453, "learning_rate": 3.247561297642203e-05, "loss": 0.2398, "step": 7333 }, { "epoch": 0.6256611499744071, "grad_norm": 2.1344354623745345, "learning_rate": 3.246267471409015e-05, "loss": 0.2225, "step": 7334 }, { "epoch": 0.6257464596485242, "grad_norm": 1.9948564191887874, "learning_rate": 3.2449737790639855e-05, "loss": 0.2535, "step": 7335 }, { "epoch": 0.6258317693226412, "grad_norm": 1.5907585642558766, "learning_rate": 3.24368022070588e-05, "loss": 0.2672, "step": 7336 }, { "epoch": 0.6259170789967582, "grad_norm": 1.5165649331036175, "learning_rate": 3.24238679643346e-05, "loss": 0.2595, "step": 7337 }, { "epoch": 0.6260023886708753, "grad_norm": 1.4571082373521527, "learning_rate": 3.2410935063454654e-05, "loss": 0.2339, "step": 7338 }, { "epoch": 0.6260876983449923, "grad_norm": 1.287962565612569, "learning_rate": 3.239800350540635e-05, "loss": 0.2464, "step": 7339 }, { "epoch": 0.6261730080191094, "grad_norm": 1.4129414619663274, "learning_rate": 3.238507329117694e-05, "loss": 0.2183, "step": 7340 }, { "epoch": 0.6262583176932264, "grad_norm": 1.4540257277226123, "learning_rate": 3.23721444217536e-05, "loss": 0.2002, "step": 7341 }, { "epoch": 0.6263436273673435, "grad_norm": 1.429953628535115, "learning_rate": 3.235921689812334e-05, "loss": 0.1639, "step": 7342 }, { "epoch": 0.6264289370414605, "grad_norm": 1.623704540623601, "learning_rate": 3.234629072127314e-05, "loss": 0.2165, "step": 7343 }, { "epoch": 0.6265142467155775, "grad_norm": 1.6791209130249856, "learning_rate": 3.233336589218983e-05, "loss": 0.1505, "step": 7344 }, { "epoch": 0.6265995563896946, "grad_norm": 1.613420900669154, "learning_rate": 3.232044241186014e-05, "loss": 0.252, "step": 7345 }, { "epoch": 0.6266848660638117, "grad_norm": 1.7827939486395123, "learning_rate": 3.230752028127073e-05, "loss": 0.2235, "step": 7346 }, { "epoch": 0.6267701757379287, "grad_norm": 1.4370677132152552, "learning_rate": 3.229459950140814e-05, "loss": 0.1816, "step": 7347 }, { "epoch": 0.6268554854120457, "grad_norm": 1.9363044003158112, "learning_rate": 3.228168007325877e-05, "loss": 0.1997, "step": 7348 }, { "epoch": 0.6269407950861627, "grad_norm": 1.403156990417243, "learning_rate": 3.2268761997808984e-05, "loss": 0.2009, "step": 7349 }, { "epoch": 0.6270261047602799, "grad_norm": 1.25415683036994, "learning_rate": 3.2255845276045e-05, "loss": 0.1745, "step": 7350 }, { "epoch": 0.6271114144343969, "grad_norm": 1.729939468074506, "learning_rate": 3.224292990895292e-05, "loss": 0.1893, "step": 7351 }, { "epoch": 0.6271967241085139, "grad_norm": 1.7382117096781835, "learning_rate": 3.22300158975188e-05, "loss": 0.1337, "step": 7352 }, { "epoch": 0.6272820337826309, "grad_norm": 1.461499111325349, "learning_rate": 3.221710324272852e-05, "loss": 0.1987, "step": 7353 }, { "epoch": 0.627367343456748, "grad_norm": 1.7333157715763627, "learning_rate": 3.2204191945567925e-05, "loss": 0.2107, "step": 7354 }, { "epoch": 0.6274526531308651, "grad_norm": 1.6157339446710148, "learning_rate": 3.2191282007022705e-05, "loss": 0.1685, "step": 7355 }, { "epoch": 0.6275379628049821, "grad_norm": 1.9037738414331025, "learning_rate": 3.2178373428078454e-05, "loss": 0.1999, "step": 7356 }, { "epoch": 0.6276232724790991, "grad_norm": 1.920682956107879, "learning_rate": 3.2165466209720705e-05, "loss": 0.2187, "step": 7357 }, { "epoch": 0.6277085821532161, "grad_norm": 2.8967793700926103, "learning_rate": 3.215256035293483e-05, "loss": 0.2352, "step": 7358 }, { "epoch": 0.6277938918273333, "grad_norm": 2.0340371597031415, "learning_rate": 3.213965585870612e-05, "loss": 0.2244, "step": 7359 }, { "epoch": 0.6278792015014503, "grad_norm": 1.8451168360325694, "learning_rate": 3.2126752728019805e-05, "loss": 0.2156, "step": 7360 }, { "epoch": 0.6279645111755673, "grad_norm": 1.8174971391322146, "learning_rate": 3.2113850961860915e-05, "loss": 0.2265, "step": 7361 }, { "epoch": 0.6280498208496843, "grad_norm": 1.7789303583212808, "learning_rate": 3.210095056121446e-05, "loss": 0.1895, "step": 7362 }, { "epoch": 0.6281351305238014, "grad_norm": 2.1397557407983605, "learning_rate": 3.208805152706533e-05, "loss": 0.19, "step": 7363 }, { "epoch": 0.6282204401979185, "grad_norm": 2.5494495321284782, "learning_rate": 3.2075153860398265e-05, "loss": 0.2435, "step": 7364 }, { "epoch": 0.6283057498720355, "grad_norm": 1.5521489413260476, "learning_rate": 3.206225756219796e-05, "loss": 0.2443, "step": 7365 }, { "epoch": 0.6283910595461525, "grad_norm": 2.1543446844196286, "learning_rate": 3.204936263344896e-05, "loss": 0.2271, "step": 7366 }, { "epoch": 0.6284763692202696, "grad_norm": 2.0614038502293126, "learning_rate": 3.203646907513575e-05, "loss": 0.2245, "step": 7367 }, { "epoch": 0.6285616788943866, "grad_norm": 1.8436914208368314, "learning_rate": 3.202357688824265e-05, "loss": 0.2749, "step": 7368 }, { "epoch": 0.6286469885685037, "grad_norm": 2.040995299315658, "learning_rate": 3.201068607375393e-05, "loss": 0.2628, "step": 7369 }, { "epoch": 0.6287322982426207, "grad_norm": 1.4001131727251903, "learning_rate": 3.199779663265375e-05, "loss": 0.2335, "step": 7370 }, { "epoch": 0.6288176079167378, "grad_norm": 1.9183882431315873, "learning_rate": 3.198490856592611e-05, "loss": 0.229, "step": 7371 }, { "epoch": 0.6289029175908548, "grad_norm": 1.8551857180308444, "learning_rate": 3.197202187455498e-05, "loss": 0.1853, "step": 7372 }, { "epoch": 0.6289882272649718, "grad_norm": 1.764223876275377, "learning_rate": 3.195913655952419e-05, "loss": 0.2611, "step": 7373 }, { "epoch": 0.6290735369390889, "grad_norm": 1.676706054457429, "learning_rate": 3.194625262181744e-05, "loss": 0.2316, "step": 7374 }, { "epoch": 0.629158846613206, "grad_norm": 1.6495497869613716, "learning_rate": 3.1933370062418366e-05, "loss": 0.1833, "step": 7375 }, { "epoch": 0.629244156287323, "grad_norm": 1.5099447718998276, "learning_rate": 3.1920488882310495e-05, "loss": 0.2082, "step": 7376 }, { "epoch": 0.62932946596144, "grad_norm": 1.8422519611084711, "learning_rate": 3.1907609082477215e-05, "loss": 0.2487, "step": 7377 }, { "epoch": 0.629414775635557, "grad_norm": 1.4446501992881262, "learning_rate": 3.189473066390183e-05, "loss": 0.2101, "step": 7378 }, { "epoch": 0.6295000853096742, "grad_norm": 1.4739823985297575, "learning_rate": 3.1881853627567566e-05, "loss": 0.18, "step": 7379 }, { "epoch": 0.6295853949837912, "grad_norm": 1.6976881126928218, "learning_rate": 3.186897797445748e-05, "loss": 0.1974, "step": 7380 }, { "epoch": 0.6296707046579082, "grad_norm": 1.715173502784287, "learning_rate": 3.1856103705554575e-05, "loss": 0.1959, "step": 7381 }, { "epoch": 0.6297560143320252, "grad_norm": 1.7126698541984033, "learning_rate": 3.184323082184176e-05, "loss": 0.1577, "step": 7382 }, { "epoch": 0.6298413240061423, "grad_norm": 1.5270730483613868, "learning_rate": 3.1830359324301764e-05, "loss": 0.1984, "step": 7383 }, { "epoch": 0.6299266336802594, "grad_norm": 1.8768821806108154, "learning_rate": 3.181748921391728e-05, "loss": 0.2555, "step": 7384 }, { "epoch": 0.6300119433543764, "grad_norm": 2.003794170613847, "learning_rate": 3.1804620491670884e-05, "loss": 0.209, "step": 7385 }, { "epoch": 0.6300972530284934, "grad_norm": 2.07071644423065, "learning_rate": 3.1791753158545026e-05, "loss": 0.2082, "step": 7386 }, { "epoch": 0.6301825627026105, "grad_norm": 1.3825935147270625, "learning_rate": 3.1778887215522044e-05, "loss": 0.2489, "step": 7387 }, { "epoch": 0.6302678723767275, "grad_norm": 1.7545375579875502, "learning_rate": 3.17660226635842e-05, "loss": 0.2533, "step": 7388 }, { "epoch": 0.6303531820508446, "grad_norm": 1.8808457482023568, "learning_rate": 3.175315950371365e-05, "loss": 0.3098, "step": 7389 }, { "epoch": 0.6304384917249616, "grad_norm": 1.5033993349720116, "learning_rate": 3.1740297736892376e-05, "loss": 0.222, "step": 7390 }, { "epoch": 0.6305238013990787, "grad_norm": 1.7086680789891564, "learning_rate": 3.172743736410235e-05, "loss": 0.1818, "step": 7391 }, { "epoch": 0.6306091110731957, "grad_norm": 1.6174868983780242, "learning_rate": 3.171457838632539e-05, "loss": 0.2222, "step": 7392 }, { "epoch": 0.6306944207473127, "grad_norm": 1.7250716733554872, "learning_rate": 3.170172080454319e-05, "loss": 0.2171, "step": 7393 }, { "epoch": 0.6307797304214298, "grad_norm": 1.417790173091403, "learning_rate": 3.168886461973737e-05, "loss": 0.1984, "step": 7394 }, { "epoch": 0.6308650400955468, "grad_norm": 1.8671468945800547, "learning_rate": 3.167600983288944e-05, "loss": 0.1693, "step": 7395 }, { "epoch": 0.6309503497696639, "grad_norm": 1.7411153599153628, "learning_rate": 3.166315644498078e-05, "loss": 0.1773, "step": 7396 }, { "epoch": 0.6310356594437809, "grad_norm": 1.5785872282927864, "learning_rate": 3.165030445699269e-05, "loss": 0.2011, "step": 7397 }, { "epoch": 0.631120969117898, "grad_norm": 1.6362642736081932, "learning_rate": 3.163745386990634e-05, "loss": 0.2925, "step": 7398 }, { "epoch": 0.631206278792015, "grad_norm": 2.0517795421787812, "learning_rate": 3.162460468470281e-05, "loss": 0.1654, "step": 7399 }, { "epoch": 0.6312915884661321, "grad_norm": 1.4525852636632626, "learning_rate": 3.161175690236305e-05, "loss": 0.225, "step": 7400 }, { "epoch": 0.6313768981402491, "grad_norm": 1.6062568159738047, "learning_rate": 3.159891052386795e-05, "loss": 0.1709, "step": 7401 }, { "epoch": 0.6314622078143661, "grad_norm": 1.5372830865946838, "learning_rate": 3.158606555019826e-05, "loss": 0.2332, "step": 7402 }, { "epoch": 0.6315475174884831, "grad_norm": 1.7662024439884105, "learning_rate": 3.157322198233459e-05, "loss": 0.2118, "step": 7403 }, { "epoch": 0.6316328271626003, "grad_norm": 1.4495660000815862, "learning_rate": 3.156037982125751e-05, "loss": 0.2089, "step": 7404 }, { "epoch": 0.6317181368367173, "grad_norm": 2.1890392882200844, "learning_rate": 3.1547539067947454e-05, "loss": 0.1866, "step": 7405 }, { "epoch": 0.6318034465108343, "grad_norm": 1.910734311496724, "learning_rate": 3.153469972338472e-05, "loss": 0.2102, "step": 7406 }, { "epoch": 0.6318887561849513, "grad_norm": 1.7654129171012776, "learning_rate": 3.1521861788549544e-05, "loss": 0.2667, "step": 7407 }, { "epoch": 0.6319740658590685, "grad_norm": 1.8283228318855491, "learning_rate": 3.150902526442203e-05, "loss": 0.2506, "step": 7408 }, { "epoch": 0.6320593755331855, "grad_norm": 1.7937067378871463, "learning_rate": 3.149619015198218e-05, "loss": 0.1987, "step": 7409 }, { "epoch": 0.6321446852073025, "grad_norm": 1.6316382461322285, "learning_rate": 3.148335645220987e-05, "loss": 0.1828, "step": 7410 }, { "epoch": 0.6322299948814195, "grad_norm": 1.6051993410855805, "learning_rate": 3.147052416608491e-05, "loss": 0.2311, "step": 7411 }, { "epoch": 0.6323153045555366, "grad_norm": 1.6445155924507, "learning_rate": 3.145769329458695e-05, "loss": 0.178, "step": 7412 }, { "epoch": 0.6324006142296537, "grad_norm": 1.8499686547794214, "learning_rate": 3.144486383869557e-05, "loss": 0.2365, "step": 7413 }, { "epoch": 0.6324859239037707, "grad_norm": 1.9597875255131256, "learning_rate": 3.143203579939023e-05, "loss": 0.1847, "step": 7414 }, { "epoch": 0.6325712335778877, "grad_norm": 1.740250335859922, "learning_rate": 3.1419209177650324e-05, "loss": 0.2315, "step": 7415 }, { "epoch": 0.6326565432520048, "grad_norm": 1.5553690121075652, "learning_rate": 3.1406383974455025e-05, "loss": 0.1825, "step": 7416 }, { "epoch": 0.6327418529261218, "grad_norm": 1.288361052198072, "learning_rate": 3.139356019078352e-05, "loss": 0.1649, "step": 7417 }, { "epoch": 0.6328271626002389, "grad_norm": 1.7821414791334467, "learning_rate": 3.1380737827614816e-05, "loss": 0.2663, "step": 7418 }, { "epoch": 0.6329124722743559, "grad_norm": 2.059436256528021, "learning_rate": 3.136791688592784e-05, "loss": 0.234, "step": 7419 }, { "epoch": 0.632997781948473, "grad_norm": 1.4816839620627058, "learning_rate": 3.1355097366701385e-05, "loss": 0.2278, "step": 7420 }, { "epoch": 0.63308309162259, "grad_norm": 1.5703027070152895, "learning_rate": 3.134227927091419e-05, "loss": 0.2537, "step": 7421 }, { "epoch": 0.633168401296707, "grad_norm": 1.6227277862105056, "learning_rate": 3.13294625995448e-05, "loss": 0.1751, "step": 7422 }, { "epoch": 0.6332537109708241, "grad_norm": 1.8103025203609533, "learning_rate": 3.131664735357174e-05, "loss": 0.2232, "step": 7423 }, { "epoch": 0.6333390206449412, "grad_norm": 1.9018056370838468, "learning_rate": 3.130383353397338e-05, "loss": 0.1933, "step": 7424 }, { "epoch": 0.6334243303190582, "grad_norm": 1.5240543779348774, "learning_rate": 3.129102114172796e-05, "loss": 0.1951, "step": 7425 }, { "epoch": 0.6335096399931752, "grad_norm": 1.5879400229189828, "learning_rate": 3.127821017781365e-05, "loss": 0.3022, "step": 7426 }, { "epoch": 0.6335949496672922, "grad_norm": 1.9567124036591441, "learning_rate": 3.126540064320853e-05, "loss": 0.2464, "step": 7427 }, { "epoch": 0.6336802593414094, "grad_norm": 1.896986418567356, "learning_rate": 3.12525925388905e-05, "loss": 0.213, "step": 7428 }, { "epoch": 0.6337655690155264, "grad_norm": 1.4230031997633965, "learning_rate": 3.1239785865837415e-05, "loss": 0.1551, "step": 7429 }, { "epoch": 0.6338508786896434, "grad_norm": 1.4823786109772898, "learning_rate": 3.122698062502697e-05, "loss": 0.2392, "step": 7430 }, { "epoch": 0.6339361883637604, "grad_norm": 1.2971757287337056, "learning_rate": 3.121417681743682e-05, "loss": 0.219, "step": 7431 }, { "epoch": 0.6340214980378774, "grad_norm": 1.5832953030218477, "learning_rate": 3.120137444404442e-05, "loss": 0.264, "step": 7432 }, { "epoch": 0.6341068077119946, "grad_norm": 1.5450864528183332, "learning_rate": 3.118857350582719e-05, "loss": 0.2229, "step": 7433 }, { "epoch": 0.6341921173861116, "grad_norm": 1.7335949767539458, "learning_rate": 3.117577400376243e-05, "loss": 0.2123, "step": 7434 }, { "epoch": 0.6342774270602286, "grad_norm": 1.8352155848785061, "learning_rate": 3.116297593882727e-05, "loss": 0.1766, "step": 7435 }, { "epoch": 0.6343627367343456, "grad_norm": 2.1294013051478764, "learning_rate": 3.115017931199879e-05, "loss": 0.1953, "step": 7436 }, { "epoch": 0.6344480464084628, "grad_norm": 1.2083759903229427, "learning_rate": 3.1137384124253974e-05, "loss": 0.1302, "step": 7437 }, { "epoch": 0.6345333560825798, "grad_norm": 1.368385052038495, "learning_rate": 3.112459037656963e-05, "loss": 0.1869, "step": 7438 }, { "epoch": 0.6346186657566968, "grad_norm": 1.8000575533248881, "learning_rate": 3.111179806992251e-05, "loss": 0.3073, "step": 7439 }, { "epoch": 0.6347039754308138, "grad_norm": 1.624405091767643, "learning_rate": 3.109900720528925e-05, "loss": 0.229, "step": 7440 }, { "epoch": 0.6347892851049309, "grad_norm": 2.021503835390931, "learning_rate": 3.108621778364634e-05, "loss": 0.2519, "step": 7441 }, { "epoch": 0.634874594779048, "grad_norm": 1.712067252843555, "learning_rate": 3.107342980597019e-05, "loss": 0.1995, "step": 7442 }, { "epoch": 0.634959904453165, "grad_norm": 1.927098378990481, "learning_rate": 3.106064327323711e-05, "loss": 0.1793, "step": 7443 }, { "epoch": 0.635045214127282, "grad_norm": 1.465881143079058, "learning_rate": 3.1047858186423254e-05, "loss": 0.1647, "step": 7444 }, { "epoch": 0.6351305238013991, "grad_norm": 1.7056924209325257, "learning_rate": 3.1035074546504715e-05, "loss": 0.2697, "step": 7445 }, { "epoch": 0.6352158334755161, "grad_norm": 1.5066823726583742, "learning_rate": 3.102229235445746e-05, "loss": 0.2148, "step": 7446 }, { "epoch": 0.6353011431496332, "grad_norm": 1.557749890262096, "learning_rate": 3.1009511611257354e-05, "loss": 0.2246, "step": 7447 }, { "epoch": 0.6353864528237502, "grad_norm": 1.6648047139654187, "learning_rate": 3.0996732317880096e-05, "loss": 0.2163, "step": 7448 }, { "epoch": 0.6354717624978673, "grad_norm": 1.6199651627268057, "learning_rate": 3.098395447530136e-05, "loss": 0.2271, "step": 7449 }, { "epoch": 0.6355570721719843, "grad_norm": 1.5146687430638421, "learning_rate": 3.0971178084496645e-05, "loss": 0.2365, "step": 7450 }, { "epoch": 0.6356423818461013, "grad_norm": 1.8868891479123513, "learning_rate": 3.0958403146441364e-05, "loss": 0.2393, "step": 7451 }, { "epoch": 0.6357276915202184, "grad_norm": 1.8075619219755994, "learning_rate": 3.0945629662110805e-05, "loss": 0.2627, "step": 7452 }, { "epoch": 0.6358130011943355, "grad_norm": 1.5268747531611164, "learning_rate": 3.0932857632480185e-05, "loss": 0.2107, "step": 7453 }, { "epoch": 0.6358983108684525, "grad_norm": 1.5702953432578768, "learning_rate": 3.092008705852455e-05, "loss": 0.2523, "step": 7454 }, { "epoch": 0.6359836205425695, "grad_norm": 1.8204565687020038, "learning_rate": 3.090731794121887e-05, "loss": 0.1943, "step": 7455 }, { "epoch": 0.6360689302166865, "grad_norm": 1.7750648728456853, "learning_rate": 3.089455028153803e-05, "loss": 0.3252, "step": 7456 }, { "epoch": 0.6361542398908037, "grad_norm": 1.3674516695119165, "learning_rate": 3.088178408045672e-05, "loss": 0.2329, "step": 7457 }, { "epoch": 0.6362395495649207, "grad_norm": 1.668632571771394, "learning_rate": 3.086901933894961e-05, "loss": 0.1809, "step": 7458 }, { "epoch": 0.6363248592390377, "grad_norm": 2.023807212016177, "learning_rate": 3.085625605799123e-05, "loss": 0.2587, "step": 7459 }, { "epoch": 0.6364101689131547, "grad_norm": 1.8055630496268964, "learning_rate": 3.084349423855596e-05, "loss": 0.2417, "step": 7460 }, { "epoch": 0.6364954785872718, "grad_norm": 2.2872428040711794, "learning_rate": 3.083073388161811e-05, "loss": 0.2238, "step": 7461 }, { "epoch": 0.6365807882613889, "grad_norm": 1.6232510471572683, "learning_rate": 3.0817974988151846e-05, "loss": 0.2498, "step": 7462 }, { "epoch": 0.6366660979355059, "grad_norm": 1.8858640642256783, "learning_rate": 3.080521755913128e-05, "loss": 0.1894, "step": 7463 }, { "epoch": 0.6367514076096229, "grad_norm": 2.187759384035759, "learning_rate": 3.079246159553034e-05, "loss": 0.2255, "step": 7464 }, { "epoch": 0.63683671728374, "grad_norm": 1.5084118220244092, "learning_rate": 3.0779707098322885e-05, "loss": 0.1992, "step": 7465 }, { "epoch": 0.636922026957857, "grad_norm": 1.7514218957089691, "learning_rate": 3.076695406848268e-05, "loss": 0.2008, "step": 7466 }, { "epoch": 0.6370073366319741, "grad_norm": 1.9266843090547585, "learning_rate": 3.075420250698331e-05, "loss": 0.2172, "step": 7467 }, { "epoch": 0.6370926463060911, "grad_norm": 1.7651981073998237, "learning_rate": 3.0741452414798295e-05, "loss": 0.2662, "step": 7468 }, { "epoch": 0.6371779559802082, "grad_norm": 1.56144037556602, "learning_rate": 3.072870379290107e-05, "loss": 0.2018, "step": 7469 }, { "epoch": 0.6372632656543252, "grad_norm": 1.804039512658307, "learning_rate": 3.071595664226489e-05, "loss": 0.2533, "step": 7470 }, { "epoch": 0.6373485753284422, "grad_norm": 2.0281328198204287, "learning_rate": 3.070321096386295e-05, "loss": 0.2744, "step": 7471 }, { "epoch": 0.6374338850025593, "grad_norm": 2.1098419802825745, "learning_rate": 3.069046675866831e-05, "loss": 0.1795, "step": 7472 }, { "epoch": 0.6375191946766763, "grad_norm": 1.3628779035976577, "learning_rate": 3.067772402765393e-05, "loss": 0.2334, "step": 7473 }, { "epoch": 0.6376045043507934, "grad_norm": 1.6674452949712628, "learning_rate": 3.0664982771792626e-05, "loss": 0.2008, "step": 7474 }, { "epoch": 0.6376898140249104, "grad_norm": 1.8113758942189417, "learning_rate": 3.065224299205714e-05, "loss": 0.2128, "step": 7475 }, { "epoch": 0.6377751236990274, "grad_norm": 1.8836279405159173, "learning_rate": 3.063950468942011e-05, "loss": 0.1845, "step": 7476 }, { "epoch": 0.6378604333731445, "grad_norm": 1.802657335480592, "learning_rate": 3.062676786485399e-05, "loss": 0.266, "step": 7477 }, { "epoch": 0.6379457430472616, "grad_norm": 1.3431778510316321, "learning_rate": 3.0614032519331204e-05, "loss": 0.1237, "step": 7478 }, { "epoch": 0.6380310527213786, "grad_norm": 1.8744550548831784, "learning_rate": 3.060129865382403e-05, "loss": 0.2302, "step": 7479 }, { "epoch": 0.6381163623954956, "grad_norm": 1.3550814644229985, "learning_rate": 3.05885662693046e-05, "loss": 0.2366, "step": 7480 }, { "epoch": 0.6382016720696126, "grad_norm": 1.5294685900712464, "learning_rate": 3.0575835366745e-05, "loss": 0.2757, "step": 7481 }, { "epoch": 0.6382869817437298, "grad_norm": 1.667596637035995, "learning_rate": 3.0563105947117147e-05, "loss": 0.2128, "step": 7482 }, { "epoch": 0.6383722914178468, "grad_norm": 1.7153471813841346, "learning_rate": 3.055037801139286e-05, "loss": 0.1929, "step": 7483 }, { "epoch": 0.6384576010919638, "grad_norm": 1.7585430746348152, "learning_rate": 3.053765156054385e-05, "loss": 0.289, "step": 7484 }, { "epoch": 0.6385429107660808, "grad_norm": 1.4100313844637393, "learning_rate": 3.0524926595541744e-05, "loss": 0.2331, "step": 7485 }, { "epoch": 0.638628220440198, "grad_norm": 1.6849054240956591, "learning_rate": 3.051220311735798e-05, "loss": 0.1936, "step": 7486 }, { "epoch": 0.638713530114315, "grad_norm": 1.8315807187108741, "learning_rate": 3.0499481126963947e-05, "loss": 0.2103, "step": 7487 }, { "epoch": 0.638798839788432, "grad_norm": 1.5187182764578393, "learning_rate": 3.0486760625330935e-05, "loss": 0.2467, "step": 7488 }, { "epoch": 0.638884149462549, "grad_norm": 1.4364720691285133, "learning_rate": 3.0474041613430028e-05, "loss": 0.2233, "step": 7489 }, { "epoch": 0.6389694591366661, "grad_norm": 1.8854279772452593, "learning_rate": 3.046132409223228e-05, "loss": 0.2806, "step": 7490 }, { "epoch": 0.6390547688107832, "grad_norm": 2.3132048898633673, "learning_rate": 3.0448608062708628e-05, "loss": 0.2391, "step": 7491 }, { "epoch": 0.6391400784849002, "grad_norm": 1.6685950646643983, "learning_rate": 3.0435893525829846e-05, "loss": 0.2472, "step": 7492 }, { "epoch": 0.6392253881590172, "grad_norm": 1.9597136793424266, "learning_rate": 3.0423180482566633e-05, "loss": 0.1723, "step": 7493 }, { "epoch": 0.6393106978331343, "grad_norm": 1.8212349318572856, "learning_rate": 3.0410468933889548e-05, "loss": 0.1826, "step": 7494 }, { "epoch": 0.6393960075072513, "grad_norm": 1.4935499432402255, "learning_rate": 3.0397758880769084e-05, "loss": 0.1515, "step": 7495 }, { "epoch": 0.6394813171813684, "grad_norm": 1.7721077391074496, "learning_rate": 3.038505032417554e-05, "loss": 0.1988, "step": 7496 }, { "epoch": 0.6395666268554854, "grad_norm": 1.8369522212444083, "learning_rate": 3.0372343265079172e-05, "loss": 0.1952, "step": 7497 }, { "epoch": 0.6396519365296025, "grad_norm": 1.5446480798987396, "learning_rate": 3.0359637704450117e-05, "loss": 0.1999, "step": 7498 }, { "epoch": 0.6397372462037195, "grad_norm": 1.536029130719037, "learning_rate": 3.034693364325833e-05, "loss": 0.1932, "step": 7499 }, { "epoch": 0.6398225558778365, "grad_norm": 1.539195098586814, "learning_rate": 3.0334231082473724e-05, "loss": 0.2254, "step": 7500 }, { "epoch": 0.6399078655519536, "grad_norm": 1.7638592963685806, "learning_rate": 3.0321530023066093e-05, "loss": 0.242, "step": 7501 }, { "epoch": 0.6399931752260707, "grad_norm": 2.4160049672612542, "learning_rate": 3.030883046600505e-05, "loss": 0.2425, "step": 7502 }, { "epoch": 0.6400784849001877, "grad_norm": 1.5439377458207215, "learning_rate": 3.0296132412260175e-05, "loss": 0.1932, "step": 7503 }, { "epoch": 0.6401637945743047, "grad_norm": 1.8001256316105547, "learning_rate": 3.0283435862800873e-05, "loss": 0.2181, "step": 7504 }, { "epoch": 0.6402491042484217, "grad_norm": 1.9301027167860707, "learning_rate": 3.0270740818596487e-05, "loss": 0.2003, "step": 7505 }, { "epoch": 0.6403344139225389, "grad_norm": 1.591567171525494, "learning_rate": 3.0258047280616187e-05, "loss": 0.2368, "step": 7506 }, { "epoch": 0.6404197235966559, "grad_norm": 2.086718466646953, "learning_rate": 3.0245355249829055e-05, "loss": 0.2412, "step": 7507 }, { "epoch": 0.6405050332707729, "grad_norm": 2.1311243104089486, "learning_rate": 3.023266472720411e-05, "loss": 0.2007, "step": 7508 }, { "epoch": 0.6405903429448899, "grad_norm": 1.679946311705893, "learning_rate": 3.0219975713710134e-05, "loss": 0.1489, "step": 7509 }, { "epoch": 0.6406756526190069, "grad_norm": 1.2436168771935352, "learning_rate": 3.020728821031591e-05, "loss": 0.2155, "step": 7510 }, { "epoch": 0.6407609622931241, "grad_norm": 1.702545907498142, "learning_rate": 3.019460221799007e-05, "loss": 0.1938, "step": 7511 }, { "epoch": 0.6408462719672411, "grad_norm": 1.6672887197583894, "learning_rate": 3.018191773770108e-05, "loss": 0.1521, "step": 7512 }, { "epoch": 0.6409315816413581, "grad_norm": 1.5300445482836982, "learning_rate": 3.0169234770417376e-05, "loss": 0.1798, "step": 7513 }, { "epoch": 0.6410168913154751, "grad_norm": 1.6576255987805033, "learning_rate": 3.0156553317107218e-05, "loss": 0.2087, "step": 7514 }, { "epoch": 0.6411022009895923, "grad_norm": 1.3771493758362194, "learning_rate": 3.0143873378738762e-05, "loss": 0.1564, "step": 7515 }, { "epoch": 0.6411875106637093, "grad_norm": 1.3452175022659756, "learning_rate": 3.0131194956280052e-05, "loss": 0.2112, "step": 7516 }, { "epoch": 0.6412728203378263, "grad_norm": 1.5729548300182787, "learning_rate": 3.011851805069904e-05, "loss": 0.1392, "step": 7517 }, { "epoch": 0.6413581300119433, "grad_norm": 1.881795774861702, "learning_rate": 3.0105842662963503e-05, "loss": 0.1762, "step": 7518 }, { "epoch": 0.6414434396860604, "grad_norm": 1.3783793969319902, "learning_rate": 3.009316879404116e-05, "loss": 0.2375, "step": 7519 }, { "epoch": 0.6415287493601775, "grad_norm": 1.485131964166407, "learning_rate": 3.0080496444899597e-05, "loss": 0.2047, "step": 7520 }, { "epoch": 0.6416140590342945, "grad_norm": 1.4213846220409219, "learning_rate": 3.0067825616506306e-05, "loss": 0.2593, "step": 7521 }, { "epoch": 0.6416993687084115, "grad_norm": 1.7560228519907897, "learning_rate": 3.005515630982858e-05, "loss": 0.2343, "step": 7522 }, { "epoch": 0.6417846783825286, "grad_norm": 1.2590847807404246, "learning_rate": 3.00424885258337e-05, "loss": 0.1842, "step": 7523 }, { "epoch": 0.6418699880566456, "grad_norm": 1.7169103313708662, "learning_rate": 3.002982226548876e-05, "loss": 0.2007, "step": 7524 }, { "epoch": 0.6419552977307627, "grad_norm": 1.4544987081742935, "learning_rate": 3.0017157529760775e-05, "loss": 0.1838, "step": 7525 }, { "epoch": 0.6420406074048797, "grad_norm": 1.6420274099322678, "learning_rate": 3.0004494319616604e-05, "loss": 0.1678, "step": 7526 }, { "epoch": 0.6421259170789968, "grad_norm": 1.8150384162221087, "learning_rate": 2.9991832636023065e-05, "loss": 0.2547, "step": 7527 }, { "epoch": 0.6422112267531138, "grad_norm": 1.6807591058432496, "learning_rate": 2.997917247994676e-05, "loss": 0.1544, "step": 7528 }, { "epoch": 0.6422965364272308, "grad_norm": 1.8952640335414845, "learning_rate": 2.9966513852354243e-05, "loss": 0.1922, "step": 7529 }, { "epoch": 0.6423818461013479, "grad_norm": 2.0630917711653063, "learning_rate": 2.995385675421196e-05, "loss": 0.2631, "step": 7530 }, { "epoch": 0.642467155775465, "grad_norm": 1.6390545549372872, "learning_rate": 2.994120118648617e-05, "loss": 0.187, "step": 7531 }, { "epoch": 0.642552465449582, "grad_norm": 1.8281698628977072, "learning_rate": 2.9928547150143066e-05, "loss": 0.2414, "step": 7532 }, { "epoch": 0.642637775123699, "grad_norm": 1.7693841801509314, "learning_rate": 2.9915894646148756e-05, "loss": 0.1793, "step": 7533 }, { "epoch": 0.642723084797816, "grad_norm": 1.5245047718847953, "learning_rate": 2.990324367546914e-05, "loss": 0.2128, "step": 7534 }, { "epoch": 0.6428083944719332, "grad_norm": 1.4024379772948952, "learning_rate": 2.9890594239070084e-05, "loss": 0.1615, "step": 7535 }, { "epoch": 0.6428937041460502, "grad_norm": 1.72873782006729, "learning_rate": 2.9877946337917296e-05, "loss": 0.1603, "step": 7536 }, { "epoch": 0.6429790138201672, "grad_norm": 1.6192058554694257, "learning_rate": 2.9865299972976386e-05, "loss": 0.2249, "step": 7537 }, { "epoch": 0.6430643234942842, "grad_norm": 1.4199111287420036, "learning_rate": 2.985265514521281e-05, "loss": 0.1739, "step": 7538 }, { "epoch": 0.6431496331684013, "grad_norm": 1.8327804367499934, "learning_rate": 2.9840011855591953e-05, "loss": 0.1659, "step": 7539 }, { "epoch": 0.6432349428425184, "grad_norm": 1.7140645753053327, "learning_rate": 2.982737010507908e-05, "loss": 0.1967, "step": 7540 }, { "epoch": 0.6433202525166354, "grad_norm": 1.4870008833368855, "learning_rate": 2.9814729894639282e-05, "loss": 0.1801, "step": 7541 }, { "epoch": 0.6434055621907524, "grad_norm": 2.0591963694874638, "learning_rate": 2.980209122523759e-05, "loss": 0.2475, "step": 7542 }, { "epoch": 0.6434908718648695, "grad_norm": 2.092762728551112, "learning_rate": 2.978945409783892e-05, "loss": 0.2058, "step": 7543 }, { "epoch": 0.6435761815389865, "grad_norm": 1.7208461810256634, "learning_rate": 2.9776818513408016e-05, "loss": 0.2066, "step": 7544 }, { "epoch": 0.6436614912131036, "grad_norm": 1.3407506959405844, "learning_rate": 2.9764184472909562e-05, "loss": 0.2458, "step": 7545 }, { "epoch": 0.6437468008872206, "grad_norm": 1.4651433612866402, "learning_rate": 2.975155197730809e-05, "loss": 0.1879, "step": 7546 }, { "epoch": 0.6438321105613376, "grad_norm": 1.5368459783655166, "learning_rate": 2.9738921027568024e-05, "loss": 0.1567, "step": 7547 }, { "epoch": 0.6439174202354547, "grad_norm": 1.5641426607029827, "learning_rate": 2.9726291624653658e-05, "loss": 0.2503, "step": 7548 }, { "epoch": 0.6440027299095717, "grad_norm": 1.5580215359767602, "learning_rate": 2.9713663769529194e-05, "loss": 0.2151, "step": 7549 }, { "epoch": 0.6440880395836888, "grad_norm": 1.5045538758773247, "learning_rate": 2.970103746315872e-05, "loss": 0.2442, "step": 7550 }, { "epoch": 0.6441733492578058, "grad_norm": 1.7828508511696877, "learning_rate": 2.9688412706506147e-05, "loss": 0.2513, "step": 7551 }, { "epoch": 0.6442586589319229, "grad_norm": 1.7571962081527872, "learning_rate": 2.9675789500535328e-05, "loss": 0.2147, "step": 7552 }, { "epoch": 0.6443439686060399, "grad_norm": 2.5015205527511024, "learning_rate": 2.9663167846209998e-05, "loss": 0.2576, "step": 7553 }, { "epoch": 0.644429278280157, "grad_norm": 1.5006685356615432, "learning_rate": 2.9650547744493712e-05, "loss": 0.1971, "step": 7554 }, { "epoch": 0.644514587954274, "grad_norm": 1.5566861135233323, "learning_rate": 2.9637929196349978e-05, "loss": 0.1856, "step": 7555 }, { "epoch": 0.6445998976283911, "grad_norm": 1.5083779969461046, "learning_rate": 2.962531220274215e-05, "loss": 0.1733, "step": 7556 }, { "epoch": 0.6446852073025081, "grad_norm": 1.4123941348078055, "learning_rate": 2.9612696764633465e-05, "loss": 0.1264, "step": 7557 }, { "epoch": 0.6447705169766251, "grad_norm": 1.7413611398076443, "learning_rate": 2.9600082882987028e-05, "loss": 0.2804, "step": 7558 }, { "epoch": 0.6448558266507421, "grad_norm": 1.3106227346827295, "learning_rate": 2.9587470558765886e-05, "loss": 0.2146, "step": 7559 }, { "epoch": 0.6449411363248593, "grad_norm": 2.104491281177299, "learning_rate": 2.9574859792932863e-05, "loss": 0.246, "step": 7560 }, { "epoch": 0.6450264459989763, "grad_norm": 1.7646032432084477, "learning_rate": 2.956225058645077e-05, "loss": 0.2338, "step": 7561 }, { "epoch": 0.6451117556730933, "grad_norm": 1.5126513381782114, "learning_rate": 2.9549642940282246e-05, "loss": 0.1753, "step": 7562 }, { "epoch": 0.6451970653472103, "grad_norm": 1.598955815051288, "learning_rate": 2.95370368553898e-05, "loss": 0.2527, "step": 7563 }, { "epoch": 0.6452823750213275, "grad_norm": 1.7256645525827932, "learning_rate": 2.9524432332735842e-05, "loss": 0.2428, "step": 7564 }, { "epoch": 0.6453676846954445, "grad_norm": 2.0358463808574054, "learning_rate": 2.9511829373282683e-05, "loss": 0.2032, "step": 7565 }, { "epoch": 0.6454529943695615, "grad_norm": 1.5909009413076953, "learning_rate": 2.949922797799247e-05, "loss": 0.2491, "step": 7566 }, { "epoch": 0.6455383040436785, "grad_norm": 1.9233941997547321, "learning_rate": 2.9486628147827273e-05, "loss": 0.2407, "step": 7567 }, { "epoch": 0.6456236137177956, "grad_norm": 1.7766476116745058, "learning_rate": 2.947402988374899e-05, "loss": 0.1916, "step": 7568 }, { "epoch": 0.6457089233919127, "grad_norm": 1.862579597462002, "learning_rate": 2.946143318671947e-05, "loss": 0.1943, "step": 7569 }, { "epoch": 0.6457942330660297, "grad_norm": 1.6303043357523317, "learning_rate": 2.9448838057700368e-05, "loss": 0.1674, "step": 7570 }, { "epoch": 0.6458795427401467, "grad_norm": 1.6272072107078404, "learning_rate": 2.9436244497653274e-05, "loss": 0.2431, "step": 7571 }, { "epoch": 0.6459648524142638, "grad_norm": 1.6750930760272673, "learning_rate": 2.9423652507539655e-05, "loss": 0.2305, "step": 7572 }, { "epoch": 0.6460501620883808, "grad_norm": 1.5723786686603456, "learning_rate": 2.94110620883208e-05, "loss": 0.1625, "step": 7573 }, { "epoch": 0.6461354717624979, "grad_norm": 1.8479891390460677, "learning_rate": 2.9398473240957945e-05, "loss": 0.2023, "step": 7574 }, { "epoch": 0.6462207814366149, "grad_norm": 1.3971324350610246, "learning_rate": 2.9385885966412207e-05, "loss": 0.2796, "step": 7575 }, { "epoch": 0.646306091110732, "grad_norm": 1.5677812966976301, "learning_rate": 2.937330026564451e-05, "loss": 0.1712, "step": 7576 }, { "epoch": 0.646391400784849, "grad_norm": 1.5649328746354805, "learning_rate": 2.936071613961574e-05, "loss": 0.2386, "step": 7577 }, { "epoch": 0.646476710458966, "grad_norm": 1.6056655057829392, "learning_rate": 2.934813358928661e-05, "loss": 0.2276, "step": 7578 }, { "epoch": 0.6465620201330831, "grad_norm": 1.5927256115645032, "learning_rate": 2.9335552615617745e-05, "loss": 0.2077, "step": 7579 }, { "epoch": 0.6466473298072002, "grad_norm": 1.53046568421879, "learning_rate": 2.9322973219569605e-05, "loss": 0.1813, "step": 7580 }, { "epoch": 0.6467326394813172, "grad_norm": 1.4725310173718558, "learning_rate": 2.9310395402102592e-05, "loss": 0.1843, "step": 7581 }, { "epoch": 0.6468179491554342, "grad_norm": 1.871191989812386, "learning_rate": 2.9297819164176965e-05, "loss": 0.1746, "step": 7582 }, { "epoch": 0.6469032588295512, "grad_norm": 1.8777732049947784, "learning_rate": 2.9285244506752808e-05, "loss": 0.2491, "step": 7583 }, { "epoch": 0.6469885685036684, "grad_norm": 2.19582352850195, "learning_rate": 2.9272671430790155e-05, "loss": 0.2587, "step": 7584 }, { "epoch": 0.6470738781777854, "grad_norm": 1.739084671029986, "learning_rate": 2.926009993724892e-05, "loss": 0.2218, "step": 7585 }, { "epoch": 0.6471591878519024, "grad_norm": 1.3369985906212625, "learning_rate": 2.924753002708882e-05, "loss": 0.2458, "step": 7586 }, { "epoch": 0.6472444975260194, "grad_norm": 1.8691016564846223, "learning_rate": 2.923496170126953e-05, "loss": 0.2505, "step": 7587 }, { "epoch": 0.6473298072001364, "grad_norm": 1.486115409076374, "learning_rate": 2.9222394960750577e-05, "loss": 0.1513, "step": 7588 }, { "epoch": 0.6474151168742536, "grad_norm": 1.7997307682019261, "learning_rate": 2.920982980649135e-05, "loss": 0.2027, "step": 7589 }, { "epoch": 0.6475004265483706, "grad_norm": 2.304242003751565, "learning_rate": 2.9197266239451128e-05, "loss": 0.2063, "step": 7590 }, { "epoch": 0.6475857362224876, "grad_norm": 1.5365577105264612, "learning_rate": 2.9184704260589096e-05, "loss": 0.1977, "step": 7591 }, { "epoch": 0.6476710458966046, "grad_norm": 1.4127172319928643, "learning_rate": 2.9172143870864276e-05, "loss": 0.2325, "step": 7592 }, { "epoch": 0.6477563555707218, "grad_norm": 1.7507223273409038, "learning_rate": 2.91595850712356e-05, "loss": 0.2406, "step": 7593 }, { "epoch": 0.6478416652448388, "grad_norm": 1.2391920620886292, "learning_rate": 2.9147027862661835e-05, "loss": 0.2094, "step": 7594 }, { "epoch": 0.6479269749189558, "grad_norm": 2.1810270179079025, "learning_rate": 2.913447224610172e-05, "loss": 0.2191, "step": 7595 }, { "epoch": 0.6480122845930728, "grad_norm": 1.6505107407057176, "learning_rate": 2.9121918222513735e-05, "loss": 0.179, "step": 7596 }, { "epoch": 0.6480975942671899, "grad_norm": 2.334170077928717, "learning_rate": 2.9109365792856358e-05, "loss": 0.2512, "step": 7597 }, { "epoch": 0.648182903941307, "grad_norm": 1.8676600157627083, "learning_rate": 2.909681495808789e-05, "loss": 0.2249, "step": 7598 }, { "epoch": 0.648268213615424, "grad_norm": 1.954363498928386, "learning_rate": 2.9084265719166527e-05, "loss": 0.1928, "step": 7599 }, { "epoch": 0.648353523289541, "grad_norm": 1.6824304230178133, "learning_rate": 2.907171807705031e-05, "loss": 0.1711, "step": 7600 }, { "epoch": 0.6484388329636581, "grad_norm": 1.700042875534616, "learning_rate": 2.905917203269724e-05, "loss": 0.2824, "step": 7601 }, { "epoch": 0.6485241426377751, "grad_norm": 1.4284398205625002, "learning_rate": 2.9046627587065066e-05, "loss": 0.1564, "step": 7602 }, { "epoch": 0.6486094523118922, "grad_norm": 1.6694918608671283, "learning_rate": 2.9034084741111555e-05, "loss": 0.2092, "step": 7603 }, { "epoch": 0.6486947619860092, "grad_norm": 1.577888244394417, "learning_rate": 2.9021543495794263e-05, "loss": 0.2044, "step": 7604 }, { "epoch": 0.6487800716601263, "grad_norm": 1.7294333225530791, "learning_rate": 2.9009003852070636e-05, "loss": 0.2238, "step": 7605 }, { "epoch": 0.6488653813342433, "grad_norm": 1.424599018901298, "learning_rate": 2.8996465810898027e-05, "loss": 0.2098, "step": 7606 }, { "epoch": 0.6489506910083603, "grad_norm": 2.1116765781923688, "learning_rate": 2.898392937323364e-05, "loss": 0.2418, "step": 7607 }, { "epoch": 0.6490360006824774, "grad_norm": 1.6803723735475236, "learning_rate": 2.8971394540034562e-05, "loss": 0.2115, "step": 7608 }, { "epoch": 0.6491213103565945, "grad_norm": 1.5838462684716246, "learning_rate": 2.8958861312257745e-05, "loss": 0.1879, "step": 7609 }, { "epoch": 0.6492066200307115, "grad_norm": 1.223470106214108, "learning_rate": 2.894632969086008e-05, "loss": 0.1971, "step": 7610 }, { "epoch": 0.6492919297048285, "grad_norm": 1.7480165958446003, "learning_rate": 2.8933799676798256e-05, "loss": 0.2097, "step": 7611 }, { "epoch": 0.6493772393789455, "grad_norm": 1.8753838464113461, "learning_rate": 2.8921271271028894e-05, "loss": 0.2435, "step": 7612 }, { "epoch": 0.6494625490530627, "grad_norm": 1.5554712514890734, "learning_rate": 2.8908744474508443e-05, "loss": 0.1936, "step": 7613 }, { "epoch": 0.6495478587271797, "grad_norm": 1.8410864518736498, "learning_rate": 2.8896219288193282e-05, "loss": 0.2028, "step": 7614 }, { "epoch": 0.6496331684012967, "grad_norm": 1.797067648470338, "learning_rate": 2.8883695713039625e-05, "loss": 0.197, "step": 7615 }, { "epoch": 0.6497184780754137, "grad_norm": 1.9077191554172865, "learning_rate": 2.8871173750003577e-05, "loss": 0.2033, "step": 7616 }, { "epoch": 0.6498037877495308, "grad_norm": 1.5685506017643838, "learning_rate": 2.8858653400041176e-05, "loss": 0.1908, "step": 7617 }, { "epoch": 0.6498890974236479, "grad_norm": 1.5963383020937152, "learning_rate": 2.8846134664108194e-05, "loss": 0.2858, "step": 7618 }, { "epoch": 0.6499744070977649, "grad_norm": 1.4876850365705399, "learning_rate": 2.8833617543160448e-05, "loss": 0.215, "step": 7619 }, { "epoch": 0.6500597167718819, "grad_norm": 2.3474253002952636, "learning_rate": 2.8821102038153515e-05, "loss": 0.2035, "step": 7620 }, { "epoch": 0.650145026445999, "grad_norm": 1.5568581767725724, "learning_rate": 2.8808588150042902e-05, "loss": 0.2038, "step": 7621 }, { "epoch": 0.650230336120116, "grad_norm": 1.8331983545494477, "learning_rate": 2.8796075879783956e-05, "loss": 0.212, "step": 7622 }, { "epoch": 0.6503156457942331, "grad_norm": 1.9737362162112955, "learning_rate": 2.8783565228331976e-05, "loss": 0.3025, "step": 7623 }, { "epoch": 0.6504009554683501, "grad_norm": 1.5400022164861409, "learning_rate": 2.8771056196641998e-05, "loss": 0.1625, "step": 7624 }, { "epoch": 0.6504862651424671, "grad_norm": 1.6229618201709695, "learning_rate": 2.8758548785669104e-05, "loss": 0.1777, "step": 7625 }, { "epoch": 0.6505715748165842, "grad_norm": 1.6702660635335747, "learning_rate": 2.874604299636813e-05, "loss": 0.2067, "step": 7626 }, { "epoch": 0.6506568844907012, "grad_norm": 1.9915823222966142, "learning_rate": 2.873353882969382e-05, "loss": 0.2519, "step": 7627 }, { "epoch": 0.6507421941648183, "grad_norm": 1.1129125151752566, "learning_rate": 2.8721036286600818e-05, "loss": 0.1847, "step": 7628 }, { "epoch": 0.6508275038389353, "grad_norm": 1.6286991271288276, "learning_rate": 2.87085353680436e-05, "loss": 0.1513, "step": 7629 }, { "epoch": 0.6509128135130524, "grad_norm": 1.4372641328444244, "learning_rate": 2.8696036074976595e-05, "loss": 0.2337, "step": 7630 }, { "epoch": 0.6509981231871694, "grad_norm": 1.8451519824442297, "learning_rate": 2.8683538408353992e-05, "loss": 0.1954, "step": 7631 }, { "epoch": 0.6510834328612864, "grad_norm": 2.3594947430874837, "learning_rate": 2.8671042369129984e-05, "loss": 0.2572, "step": 7632 }, { "epoch": 0.6511687425354035, "grad_norm": 1.5455069609540495, "learning_rate": 2.8658547958258543e-05, "loss": 0.2353, "step": 7633 }, { "epoch": 0.6512540522095206, "grad_norm": 1.5575893291983665, "learning_rate": 2.8646055176693553e-05, "loss": 0.2089, "step": 7634 }, { "epoch": 0.6513393618836376, "grad_norm": 2.166011442244045, "learning_rate": 2.863356402538878e-05, "loss": 0.1964, "step": 7635 }, { "epoch": 0.6514246715577546, "grad_norm": 1.3243892413638751, "learning_rate": 2.8621074505297852e-05, "loss": 0.1473, "step": 7636 }, { "epoch": 0.6515099812318716, "grad_norm": 2.2775132392640867, "learning_rate": 2.860858661737428e-05, "loss": 0.2332, "step": 7637 }, { "epoch": 0.6515952909059888, "grad_norm": 1.642878773647567, "learning_rate": 2.8596100362571422e-05, "loss": 0.191, "step": 7638 }, { "epoch": 0.6516806005801058, "grad_norm": 1.6827598683973661, "learning_rate": 2.8583615741842585e-05, "loss": 0.2087, "step": 7639 }, { "epoch": 0.6517659102542228, "grad_norm": 1.6077685440030898, "learning_rate": 2.8571132756140873e-05, "loss": 0.2249, "step": 7640 }, { "epoch": 0.6518512199283398, "grad_norm": 1.5186218050631812, "learning_rate": 2.8558651406419308e-05, "loss": 0.2022, "step": 7641 }, { "epoch": 0.651936529602457, "grad_norm": 1.3829893009933183, "learning_rate": 2.8546171693630746e-05, "loss": 0.1845, "step": 7642 }, { "epoch": 0.652021839276574, "grad_norm": 2.0866138061961768, "learning_rate": 2.853369361872801e-05, "loss": 0.2787, "step": 7643 }, { "epoch": 0.652107148950691, "grad_norm": 1.5156183930872427, "learning_rate": 2.8521217182663655e-05, "loss": 0.2358, "step": 7644 }, { "epoch": 0.652192458624808, "grad_norm": 2.0805964867244366, "learning_rate": 2.8508742386390252e-05, "loss": 0.2475, "step": 7645 }, { "epoch": 0.6522777682989251, "grad_norm": 1.8662528408103012, "learning_rate": 2.8496269230860163e-05, "loss": 0.1953, "step": 7646 }, { "epoch": 0.6523630779730422, "grad_norm": 1.644450206073485, "learning_rate": 2.8483797717025646e-05, "loss": 0.238, "step": 7647 }, { "epoch": 0.6524483876471592, "grad_norm": 1.5678070730077784, "learning_rate": 2.8471327845838834e-05, "loss": 0.2204, "step": 7648 }, { "epoch": 0.6525336973212762, "grad_norm": 1.6837856953326948, "learning_rate": 2.8458859618251744e-05, "loss": 0.2653, "step": 7649 }, { "epoch": 0.6526190069953933, "grad_norm": 1.9193255875194761, "learning_rate": 2.8446393035216245e-05, "loss": 0.1676, "step": 7650 }, { "epoch": 0.6527043166695103, "grad_norm": 1.6784471767319178, "learning_rate": 2.8433928097684087e-05, "loss": 0.2447, "step": 7651 }, { "epoch": 0.6527896263436274, "grad_norm": 1.8718022735172668, "learning_rate": 2.8421464806606955e-05, "loss": 0.178, "step": 7652 }, { "epoch": 0.6528749360177444, "grad_norm": 1.5811003783835669, "learning_rate": 2.8409003162936278e-05, "loss": 0.2268, "step": 7653 }, { "epoch": 0.6529602456918615, "grad_norm": 1.7462339750304188, "learning_rate": 2.839654316762349e-05, "loss": 0.2072, "step": 7654 }, { "epoch": 0.6530455553659785, "grad_norm": 1.8060732058688196, "learning_rate": 2.838408482161984e-05, "loss": 0.2282, "step": 7655 }, { "epoch": 0.6531308650400955, "grad_norm": 1.8758115919297356, "learning_rate": 2.8371628125876443e-05, "loss": 0.2665, "step": 7656 }, { "epoch": 0.6532161747142126, "grad_norm": 1.7817432322999174, "learning_rate": 2.8359173081344305e-05, "loss": 0.2775, "step": 7657 }, { "epoch": 0.6533014843883297, "grad_norm": 1.7812235646210317, "learning_rate": 2.8346719688974278e-05, "loss": 0.247, "step": 7658 }, { "epoch": 0.6533867940624467, "grad_norm": 1.6238835240673972, "learning_rate": 2.8334267949717187e-05, "loss": 0.2461, "step": 7659 }, { "epoch": 0.6534721037365637, "grad_norm": 1.5356891668046029, "learning_rate": 2.8321817864523558e-05, "loss": 0.2306, "step": 7660 }, { "epoch": 0.6535574134106807, "grad_norm": 1.8725435059895752, "learning_rate": 2.8309369434343963e-05, "loss": 0.2566, "step": 7661 }, { "epoch": 0.6536427230847978, "grad_norm": 1.7629047470152635, "learning_rate": 2.8296922660128744e-05, "loss": 0.2265, "step": 7662 }, { "epoch": 0.6537280327589149, "grad_norm": 1.8353644592734866, "learning_rate": 2.8284477542828153e-05, "loss": 0.2319, "step": 7663 }, { "epoch": 0.6538133424330319, "grad_norm": 1.5023435722585048, "learning_rate": 2.8272034083392284e-05, "loss": 0.1996, "step": 7664 }, { "epoch": 0.6538986521071489, "grad_norm": 1.495360192123293, "learning_rate": 2.8259592282771186e-05, "loss": 0.1579, "step": 7665 }, { "epoch": 0.6539839617812659, "grad_norm": 1.617936407822347, "learning_rate": 2.8247152141914656e-05, "loss": 0.1926, "step": 7666 }, { "epoch": 0.6540692714553831, "grad_norm": 1.7945533681327674, "learning_rate": 2.8234713661772484e-05, "loss": 0.2346, "step": 7667 }, { "epoch": 0.6541545811295001, "grad_norm": 1.5442124388420182, "learning_rate": 2.822227684329426e-05, "loss": 0.1917, "step": 7668 }, { "epoch": 0.6542398908036171, "grad_norm": 1.9872271848370215, "learning_rate": 2.820984168742947e-05, "loss": 0.1878, "step": 7669 }, { "epoch": 0.6543252004777341, "grad_norm": 1.4857531925624776, "learning_rate": 2.8197408195127484e-05, "loss": 0.1901, "step": 7670 }, { "epoch": 0.6544105101518513, "grad_norm": 2.1774755486605697, "learning_rate": 2.81849763673375e-05, "loss": 0.3262, "step": 7671 }, { "epoch": 0.6544958198259683, "grad_norm": 2.098366956171266, "learning_rate": 2.8172546205008683e-05, "loss": 0.2229, "step": 7672 }, { "epoch": 0.6545811295000853, "grad_norm": 1.7555027685531996, "learning_rate": 2.8160117709089927e-05, "loss": 0.1882, "step": 7673 }, { "epoch": 0.6546664391742023, "grad_norm": 1.4020919786312356, "learning_rate": 2.8147690880530154e-05, "loss": 0.1675, "step": 7674 }, { "epoch": 0.6547517488483194, "grad_norm": 1.6988437282593902, "learning_rate": 2.813526572027806e-05, "loss": 0.2205, "step": 7675 }, { "epoch": 0.6548370585224365, "grad_norm": 1.535812121345027, "learning_rate": 2.8122842229282237e-05, "loss": 0.2065, "step": 7676 }, { "epoch": 0.6549223681965535, "grad_norm": 2.210401058831032, "learning_rate": 2.8110420408491155e-05, "loss": 0.2192, "step": 7677 }, { "epoch": 0.6550076778706705, "grad_norm": 1.85429219738933, "learning_rate": 2.809800025885315e-05, "loss": 0.1873, "step": 7678 }, { "epoch": 0.6550929875447876, "grad_norm": 1.8435873186141687, "learning_rate": 2.8085581781316444e-05, "loss": 0.2175, "step": 7679 }, { "epoch": 0.6551782972189046, "grad_norm": 1.1869072187489105, "learning_rate": 2.807316497682909e-05, "loss": 0.1893, "step": 7680 }, { "epoch": 0.6552636068930217, "grad_norm": 2.191140225016933, "learning_rate": 2.8060749846339117e-05, "loss": 0.206, "step": 7681 }, { "epoch": 0.6553489165671387, "grad_norm": 1.6546755810993656, "learning_rate": 2.8048336390794272e-05, "loss": 0.2288, "step": 7682 }, { "epoch": 0.6554342262412558, "grad_norm": 2.439555450810025, "learning_rate": 2.8035924611142304e-05, "loss": 0.2208, "step": 7683 }, { "epoch": 0.6555195359153728, "grad_norm": 1.4958352445769076, "learning_rate": 2.8023514508330755e-05, "loss": 0.1717, "step": 7684 }, { "epoch": 0.6556048455894898, "grad_norm": 1.8011081092747225, "learning_rate": 2.8011106083307137e-05, "loss": 0.1896, "step": 7685 }, { "epoch": 0.6556901552636069, "grad_norm": 1.7572804641894182, "learning_rate": 2.7998699337018676e-05, "loss": 0.2335, "step": 7686 }, { "epoch": 0.655775464937724, "grad_norm": 1.570052851376578, "learning_rate": 2.7986294270412623e-05, "loss": 0.2456, "step": 7687 }, { "epoch": 0.655860774611841, "grad_norm": 1.4260593926846923, "learning_rate": 2.7973890884436027e-05, "loss": 0.1707, "step": 7688 }, { "epoch": 0.655946084285958, "grad_norm": 1.2141080634209198, "learning_rate": 2.796148918003581e-05, "loss": 0.1849, "step": 7689 }, { "epoch": 0.656031393960075, "grad_norm": 1.8882623206733764, "learning_rate": 2.7949089158158788e-05, "loss": 0.1789, "step": 7690 }, { "epoch": 0.6561167036341922, "grad_norm": 1.7067940763261202, "learning_rate": 2.7936690819751628e-05, "loss": 0.2086, "step": 7691 }, { "epoch": 0.6562020133083092, "grad_norm": 1.818951798088204, "learning_rate": 2.7924294165760878e-05, "loss": 0.2685, "step": 7692 }, { "epoch": 0.6562873229824262, "grad_norm": 1.3469261513752735, "learning_rate": 2.791189919713294e-05, "loss": 0.2245, "step": 7693 }, { "epoch": 0.6563726326565432, "grad_norm": 1.756955221726941, "learning_rate": 2.789950591481416e-05, "loss": 0.2487, "step": 7694 }, { "epoch": 0.6564579423306603, "grad_norm": 1.680786766702085, "learning_rate": 2.788711431975062e-05, "loss": 0.1812, "step": 7695 }, { "epoch": 0.6565432520047774, "grad_norm": 1.9716259371349516, "learning_rate": 2.787472441288842e-05, "loss": 0.2232, "step": 7696 }, { "epoch": 0.6566285616788944, "grad_norm": 1.6622505172887034, "learning_rate": 2.7862336195173434e-05, "loss": 0.2221, "step": 7697 }, { "epoch": 0.6567138713530114, "grad_norm": 1.5655838098646937, "learning_rate": 2.784994966755144e-05, "loss": 0.1914, "step": 7698 }, { "epoch": 0.6567991810271285, "grad_norm": 1.5564197493665588, "learning_rate": 2.7837564830968084e-05, "loss": 0.2839, "step": 7699 }, { "epoch": 0.6568844907012455, "grad_norm": 1.5926686526691056, "learning_rate": 2.7825181686368863e-05, "loss": 0.154, "step": 7700 }, { "epoch": 0.6569698003753626, "grad_norm": 1.6998285010728853, "learning_rate": 2.7812800234699222e-05, "loss": 0.2425, "step": 7701 }, { "epoch": 0.6570551100494796, "grad_norm": 1.6888941904174404, "learning_rate": 2.7800420476904337e-05, "loss": 0.3112, "step": 7702 }, { "epoch": 0.6571404197235966, "grad_norm": 1.686555244751091, "learning_rate": 2.7788042413929406e-05, "loss": 0.2113, "step": 7703 }, { "epoch": 0.6572257293977137, "grad_norm": 1.9056264138771686, "learning_rate": 2.77756660467194e-05, "loss": 0.1194, "step": 7704 }, { "epoch": 0.6573110390718307, "grad_norm": 1.2269012080700339, "learning_rate": 2.776329137621919e-05, "loss": 0.1363, "step": 7705 }, { "epoch": 0.6573963487459478, "grad_norm": 1.9860480156978453, "learning_rate": 2.7750918403373506e-05, "loss": 0.2114, "step": 7706 }, { "epoch": 0.6574816584200648, "grad_norm": 1.8162270078102947, "learning_rate": 2.7738547129127002e-05, "loss": 0.1547, "step": 7707 }, { "epoch": 0.6575669680941819, "grad_norm": 1.7752739224827847, "learning_rate": 2.7726177554424087e-05, "loss": 0.2742, "step": 7708 }, { "epoch": 0.6576522777682989, "grad_norm": 1.3692426221787022, "learning_rate": 2.7713809680209175e-05, "loss": 0.1711, "step": 7709 }, { "epoch": 0.657737587442416, "grad_norm": 1.7492047273960605, "learning_rate": 2.7701443507426468e-05, "loss": 0.1648, "step": 7710 }, { "epoch": 0.657822897116533, "grad_norm": 1.996233791507662, "learning_rate": 2.768907903702005e-05, "loss": 0.2595, "step": 7711 }, { "epoch": 0.6579082067906501, "grad_norm": 1.6317628836131193, "learning_rate": 2.767671626993389e-05, "loss": 0.2146, "step": 7712 }, { "epoch": 0.6579935164647671, "grad_norm": 2.206250510067111, "learning_rate": 2.7664355207111813e-05, "loss": 0.2669, "step": 7713 }, { "epoch": 0.6580788261388841, "grad_norm": 2.152835741254033, "learning_rate": 2.765199584949753e-05, "loss": 0.1813, "step": 7714 }, { "epoch": 0.6581641358130011, "grad_norm": 1.7526079227885278, "learning_rate": 2.763963819803459e-05, "loss": 0.2183, "step": 7715 }, { "epoch": 0.6582494454871183, "grad_norm": 2.022602159256463, "learning_rate": 2.7627282253666465e-05, "loss": 0.1899, "step": 7716 }, { "epoch": 0.6583347551612353, "grad_norm": 1.4713272857461068, "learning_rate": 2.761492801733645e-05, "loss": 0.2276, "step": 7717 }, { "epoch": 0.6584200648353523, "grad_norm": 1.9996480632123705, "learning_rate": 2.7602575489987727e-05, "loss": 0.1849, "step": 7718 }, { "epoch": 0.6585053745094693, "grad_norm": 1.9821616016026957, "learning_rate": 2.759022467256335e-05, "loss": 0.1853, "step": 7719 }, { "epoch": 0.6585906841835865, "grad_norm": 1.8480897681161554, "learning_rate": 2.7577875566006227e-05, "loss": 0.2397, "step": 7720 }, { "epoch": 0.6586759938577035, "grad_norm": 1.466419156098515, "learning_rate": 2.7565528171259158e-05, "loss": 0.2421, "step": 7721 }, { "epoch": 0.6587613035318205, "grad_norm": 1.371909830611417, "learning_rate": 2.7553182489264777e-05, "loss": 0.1794, "step": 7722 }, { "epoch": 0.6588466132059375, "grad_norm": 1.5744047220717536, "learning_rate": 2.7540838520965672e-05, "loss": 0.219, "step": 7723 }, { "epoch": 0.6589319228800546, "grad_norm": 1.648103436071104, "learning_rate": 2.7528496267304155e-05, "loss": 0.2051, "step": 7724 }, { "epoch": 0.6590172325541717, "grad_norm": 1.7361498849565575, "learning_rate": 2.7516155729222553e-05, "loss": 0.2039, "step": 7725 }, { "epoch": 0.6591025422282887, "grad_norm": 2.026156803837697, "learning_rate": 2.7503816907662982e-05, "loss": 0.2838, "step": 7726 }, { "epoch": 0.6591878519024057, "grad_norm": 1.9698889934421115, "learning_rate": 2.7491479803567453e-05, "loss": 0.2156, "step": 7727 }, { "epoch": 0.6592731615765228, "grad_norm": 1.8779265135422345, "learning_rate": 2.74791444178778e-05, "loss": 0.2648, "step": 7728 }, { "epoch": 0.6593584712506398, "grad_norm": 1.8021327840675705, "learning_rate": 2.746681075153582e-05, "loss": 0.2456, "step": 7729 }, { "epoch": 0.6594437809247569, "grad_norm": 1.585249878593841, "learning_rate": 2.7454478805483104e-05, "loss": 0.2274, "step": 7730 }, { "epoch": 0.6595290905988739, "grad_norm": 1.641171173736209, "learning_rate": 2.744214858066112e-05, "loss": 0.2528, "step": 7731 }, { "epoch": 0.659614400272991, "grad_norm": 1.9828643455370627, "learning_rate": 2.7429820078011214e-05, "loss": 0.1651, "step": 7732 }, { "epoch": 0.659699709947108, "grad_norm": 1.8156236500825311, "learning_rate": 2.7417493298474618e-05, "loss": 0.2119, "step": 7733 }, { "epoch": 0.659785019621225, "grad_norm": 1.4940413697628951, "learning_rate": 2.7405168242992396e-05, "loss": 0.1807, "step": 7734 }, { "epoch": 0.6598703292953421, "grad_norm": 1.3152850438988888, "learning_rate": 2.7392844912505494e-05, "loss": 0.168, "step": 7735 }, { "epoch": 0.6599556389694592, "grad_norm": 1.9336686553317948, "learning_rate": 2.7380523307954785e-05, "loss": 0.2246, "step": 7736 }, { "epoch": 0.6600409486435762, "grad_norm": 1.6171061177439536, "learning_rate": 2.7368203430280887e-05, "loss": 0.2099, "step": 7737 }, { "epoch": 0.6601262583176932, "grad_norm": 1.4903378831905865, "learning_rate": 2.735588528042441e-05, "loss": 0.2006, "step": 7738 }, { "epoch": 0.6602115679918102, "grad_norm": 1.451553643633841, "learning_rate": 2.7343568859325763e-05, "loss": 0.2175, "step": 7739 }, { "epoch": 0.6602968776659273, "grad_norm": 1.832577036426203, "learning_rate": 2.7331254167925235e-05, "loss": 0.2077, "step": 7740 }, { "epoch": 0.6603821873400444, "grad_norm": 1.8377777616282263, "learning_rate": 2.7318941207162984e-05, "loss": 0.2626, "step": 7741 }, { "epoch": 0.6604674970141614, "grad_norm": 1.6265423131270893, "learning_rate": 2.7306629977979047e-05, "loss": 0.228, "step": 7742 }, { "epoch": 0.6605528066882784, "grad_norm": 1.862424001487287, "learning_rate": 2.7294320481313328e-05, "loss": 0.189, "step": 7743 }, { "epoch": 0.6606381163623954, "grad_norm": 1.5469129153142132, "learning_rate": 2.7282012718105554e-05, "loss": 0.1735, "step": 7744 }, { "epoch": 0.6607234260365126, "grad_norm": 1.8272988824771113, "learning_rate": 2.726970668929541e-05, "loss": 0.1795, "step": 7745 }, { "epoch": 0.6608087357106296, "grad_norm": 2.063639355009185, "learning_rate": 2.7257402395822372e-05, "loss": 0.2128, "step": 7746 }, { "epoch": 0.6608940453847466, "grad_norm": 2.104357594303922, "learning_rate": 2.7245099838625805e-05, "loss": 0.282, "step": 7747 }, { "epoch": 0.6609793550588636, "grad_norm": 1.4727150983713955, "learning_rate": 2.723279901864493e-05, "loss": 0.2183, "step": 7748 }, { "epoch": 0.6610646647329808, "grad_norm": 1.3413468096121213, "learning_rate": 2.7220499936818896e-05, "loss": 0.1784, "step": 7749 }, { "epoch": 0.6611499744070978, "grad_norm": 1.5493714196278028, "learning_rate": 2.7208202594086605e-05, "loss": 0.1641, "step": 7750 }, { "epoch": 0.6612352840812148, "grad_norm": 1.9132399679569192, "learning_rate": 2.7195906991386953e-05, "loss": 0.1611, "step": 7751 }, { "epoch": 0.6613205937553318, "grad_norm": 1.7674922015977157, "learning_rate": 2.718361312965862e-05, "loss": 0.2374, "step": 7752 }, { "epoch": 0.6614059034294489, "grad_norm": 2.5542990479589127, "learning_rate": 2.7171321009840178e-05, "loss": 0.273, "step": 7753 }, { "epoch": 0.661491213103566, "grad_norm": 1.801595574081161, "learning_rate": 2.7159030632870063e-05, "loss": 0.2499, "step": 7754 }, { "epoch": 0.661576522777683, "grad_norm": 1.6379621421435415, "learning_rate": 2.7146741999686588e-05, "loss": 0.2202, "step": 7755 }, { "epoch": 0.6616618324518, "grad_norm": 1.3295934300929415, "learning_rate": 2.7134455111227917e-05, "loss": 0.1619, "step": 7756 }, { "epoch": 0.6617471421259171, "grad_norm": 1.7956864780055204, "learning_rate": 2.7122169968432075e-05, "loss": 0.1987, "step": 7757 }, { "epoch": 0.6618324518000341, "grad_norm": 1.6438338370754462, "learning_rate": 2.710988657223702e-05, "loss": 0.1835, "step": 7758 }, { "epoch": 0.6619177614741512, "grad_norm": 1.4315816703200648, "learning_rate": 2.7097604923580443e-05, "loss": 0.1799, "step": 7759 }, { "epoch": 0.6620030711482682, "grad_norm": 1.66975068224296, "learning_rate": 2.7085325023400056e-05, "loss": 0.2805, "step": 7760 }, { "epoch": 0.6620883808223853, "grad_norm": 1.9071114266562563, "learning_rate": 2.7073046872633324e-05, "loss": 0.1815, "step": 7761 }, { "epoch": 0.6621736904965023, "grad_norm": 1.4154193754098192, "learning_rate": 2.7060770472217634e-05, "loss": 0.2122, "step": 7762 }, { "epoch": 0.6622590001706193, "grad_norm": 1.6822860135041513, "learning_rate": 2.7048495823090226e-05, "loss": 0.2249, "step": 7763 }, { "epoch": 0.6623443098447364, "grad_norm": 1.9270813057003866, "learning_rate": 2.703622292618817e-05, "loss": 0.2226, "step": 7764 }, { "epoch": 0.6624296195188535, "grad_norm": 1.334882799497456, "learning_rate": 2.7023951782448505e-05, "loss": 0.2049, "step": 7765 }, { "epoch": 0.6625149291929705, "grad_norm": 1.8440890242959553, "learning_rate": 2.701168239280799e-05, "loss": 0.2383, "step": 7766 }, { "epoch": 0.6626002388670875, "grad_norm": 1.921828788884059, "learning_rate": 2.6999414758203378e-05, "loss": 0.2528, "step": 7767 }, { "epoch": 0.6626855485412045, "grad_norm": 1.9771286225298685, "learning_rate": 2.6987148879571233e-05, "loss": 0.2213, "step": 7768 }, { "epoch": 0.6627708582153217, "grad_norm": 1.9368713343420358, "learning_rate": 2.6974884757847975e-05, "loss": 0.1866, "step": 7769 }, { "epoch": 0.6628561678894387, "grad_norm": 1.234625296023871, "learning_rate": 2.6962622393969893e-05, "loss": 0.1405, "step": 7770 }, { "epoch": 0.6629414775635557, "grad_norm": 1.3033506660008112, "learning_rate": 2.6950361788873207e-05, "loss": 0.2008, "step": 7771 }, { "epoch": 0.6630267872376727, "grad_norm": 1.756957392923373, "learning_rate": 2.693810294349388e-05, "loss": 0.1485, "step": 7772 }, { "epoch": 0.6631120969117899, "grad_norm": 1.704147172115283, "learning_rate": 2.6925845858767856e-05, "loss": 0.2587, "step": 7773 }, { "epoch": 0.6631974065859069, "grad_norm": 1.1593446300878254, "learning_rate": 2.6913590535630885e-05, "loss": 0.1789, "step": 7774 }, { "epoch": 0.6632827162600239, "grad_norm": 1.6631662166839631, "learning_rate": 2.6901336975018597e-05, "loss": 0.2, "step": 7775 }, { "epoch": 0.6633680259341409, "grad_norm": 1.576896869448805, "learning_rate": 2.6889085177866492e-05, "loss": 0.1952, "step": 7776 }, { "epoch": 0.6634533356082579, "grad_norm": 1.8390423432495764, "learning_rate": 2.6876835145109892e-05, "loss": 0.2372, "step": 7777 }, { "epoch": 0.663538645282375, "grad_norm": 1.571044751147281, "learning_rate": 2.6864586877684093e-05, "loss": 0.1931, "step": 7778 }, { "epoch": 0.6636239549564921, "grad_norm": 1.5904036901935847, "learning_rate": 2.685234037652411e-05, "loss": 0.1572, "step": 7779 }, { "epoch": 0.6637092646306091, "grad_norm": 1.6159523195182148, "learning_rate": 2.684009564256495e-05, "loss": 0.2089, "step": 7780 }, { "epoch": 0.6637945743047261, "grad_norm": 1.5787942807956323, "learning_rate": 2.6827852676741415e-05, "loss": 0.2846, "step": 7781 }, { "epoch": 0.6638798839788432, "grad_norm": 2.255746919545511, "learning_rate": 2.681561147998819e-05, "loss": 0.2199, "step": 7782 }, { "epoch": 0.6639651936529603, "grad_norm": 1.7266030975678115, "learning_rate": 2.6803372053239834e-05, "loss": 0.1797, "step": 7783 }, { "epoch": 0.6640505033270773, "grad_norm": 2.190091478187797, "learning_rate": 2.679113439743075e-05, "loss": 0.1901, "step": 7784 }, { "epoch": 0.6641358130011943, "grad_norm": 1.9191329126651466, "learning_rate": 2.677889851349522e-05, "loss": 0.214, "step": 7785 }, { "epoch": 0.6642211226753114, "grad_norm": 1.771907196559745, "learning_rate": 2.676666440236738e-05, "loss": 0.1757, "step": 7786 }, { "epoch": 0.6643064323494284, "grad_norm": 1.9592598961904755, "learning_rate": 2.6754432064981285e-05, "loss": 0.2287, "step": 7787 }, { "epoch": 0.6643917420235455, "grad_norm": 2.104866465412768, "learning_rate": 2.6742201502270736e-05, "loss": 0.1854, "step": 7788 }, { "epoch": 0.6644770516976625, "grad_norm": 1.9150264259088754, "learning_rate": 2.6729972715169528e-05, "loss": 0.222, "step": 7789 }, { "epoch": 0.6645623613717796, "grad_norm": 1.8390112287169222, "learning_rate": 2.671774570461123e-05, "loss": 0.2277, "step": 7790 }, { "epoch": 0.6646476710458966, "grad_norm": 1.527721309581612, "learning_rate": 2.6705520471529366e-05, "loss": 0.175, "step": 7791 }, { "epoch": 0.6647329807200136, "grad_norm": 1.918222569842617, "learning_rate": 2.6693297016857188e-05, "loss": 0.1691, "step": 7792 }, { "epoch": 0.6648182903941307, "grad_norm": 2.3262106173962014, "learning_rate": 2.668107534152795e-05, "loss": 0.2487, "step": 7793 }, { "epoch": 0.6649036000682478, "grad_norm": 1.672598067460568, "learning_rate": 2.6668855446474693e-05, "loss": 0.1854, "step": 7794 }, { "epoch": 0.6649889097423648, "grad_norm": 1.2826743233869222, "learning_rate": 2.665663733263034e-05, "loss": 0.2332, "step": 7795 }, { "epoch": 0.6650742194164818, "grad_norm": 2.1549357956529076, "learning_rate": 2.6644421000927677e-05, "loss": 0.2067, "step": 7796 }, { "epoch": 0.6651595290905988, "grad_norm": 1.4948513360248188, "learning_rate": 2.6632206452299363e-05, "loss": 0.231, "step": 7797 }, { "epoch": 0.665244838764716, "grad_norm": 1.4660303624848352, "learning_rate": 2.661999368767791e-05, "loss": 0.1672, "step": 7798 }, { "epoch": 0.665330148438833, "grad_norm": 1.3835458481184586, "learning_rate": 2.6607782707995678e-05, "loss": 0.1749, "step": 7799 }, { "epoch": 0.66541545811295, "grad_norm": 1.4837033006860074, "learning_rate": 2.6595573514184967e-05, "loss": 0.2056, "step": 7800 }, { "epoch": 0.665500767787067, "grad_norm": 1.6918490140883014, "learning_rate": 2.658336610717781e-05, "loss": 0.2121, "step": 7801 }, { "epoch": 0.6655860774611841, "grad_norm": 1.2432373217072987, "learning_rate": 2.657116048790624e-05, "loss": 0.21, "step": 7802 }, { "epoch": 0.6656713871353012, "grad_norm": 1.20849609375, "learning_rate": 2.655895665730206e-05, "loss": 0.1577, "step": 7803 }, { "epoch": 0.6657566968094182, "grad_norm": 1.7007065146361324, "learning_rate": 2.6546754616296977e-05, "loss": 0.1656, "step": 7804 }, { "epoch": 0.6658420064835352, "grad_norm": 1.5099428771121521, "learning_rate": 2.6534554365822538e-05, "loss": 0.2051, "step": 7805 }, { "epoch": 0.6659273161576523, "grad_norm": 1.8198534341631272, "learning_rate": 2.652235590681017e-05, "loss": 0.2152, "step": 7806 }, { "epoch": 0.6660126258317693, "grad_norm": 1.584713317745035, "learning_rate": 2.6510159240191202e-05, "loss": 0.2197, "step": 7807 }, { "epoch": 0.6660979355058864, "grad_norm": 1.8517140515591102, "learning_rate": 2.6497964366896716e-05, "loss": 0.2233, "step": 7808 }, { "epoch": 0.6661832451800034, "grad_norm": 1.5532360873800377, "learning_rate": 2.6485771287857774e-05, "loss": 0.1646, "step": 7809 }, { "epoch": 0.6662685548541205, "grad_norm": 1.4279075340125134, "learning_rate": 2.6473580004005248e-05, "loss": 0.1779, "step": 7810 }, { "epoch": 0.6663538645282375, "grad_norm": 1.6736017027291923, "learning_rate": 2.6461390516269868e-05, "loss": 0.1853, "step": 7811 }, { "epoch": 0.6664391742023545, "grad_norm": 2.1259678151376087, "learning_rate": 2.6449202825582214e-05, "loss": 0.255, "step": 7812 }, { "epoch": 0.6665244838764716, "grad_norm": 1.9569891584366028, "learning_rate": 2.6437016932872816e-05, "loss": 0.2215, "step": 7813 }, { "epoch": 0.6666097935505887, "grad_norm": 1.681744969904423, "learning_rate": 2.642483283907192e-05, "loss": 0.2012, "step": 7814 }, { "epoch": 0.6666951032247057, "grad_norm": 1.7398470908973522, "learning_rate": 2.6412650545109787e-05, "loss": 0.188, "step": 7815 }, { "epoch": 0.6667804128988227, "grad_norm": 1.505811876074807, "learning_rate": 2.6400470051916432e-05, "loss": 0.1779, "step": 7816 }, { "epoch": 0.6668657225729397, "grad_norm": 1.5570311397508427, "learning_rate": 2.6388291360421784e-05, "loss": 0.1788, "step": 7817 }, { "epoch": 0.6669510322470568, "grad_norm": 2.1612233842365867, "learning_rate": 2.6376114471555623e-05, "loss": 0.1925, "step": 7818 }, { "epoch": 0.6670363419211739, "grad_norm": 1.9968042471563887, "learning_rate": 2.6363939386247576e-05, "loss": 0.1951, "step": 7819 }, { "epoch": 0.6671216515952909, "grad_norm": 1.7889484219224512, "learning_rate": 2.6351766105427163e-05, "loss": 0.201, "step": 7820 }, { "epoch": 0.6672069612694079, "grad_norm": 1.7626221364225756, "learning_rate": 2.6339594630023717e-05, "loss": 0.2004, "step": 7821 }, { "epoch": 0.667292270943525, "grad_norm": 1.6432771590086621, "learning_rate": 2.6327424960966506e-05, "loss": 0.1961, "step": 7822 }, { "epoch": 0.6673775806176421, "grad_norm": 1.5191296044183136, "learning_rate": 2.6315257099184608e-05, "loss": 0.196, "step": 7823 }, { "epoch": 0.6674628902917591, "grad_norm": 1.5933786595386985, "learning_rate": 2.6303091045606976e-05, "loss": 0.2162, "step": 7824 }, { "epoch": 0.6675481999658761, "grad_norm": 1.5631390599865749, "learning_rate": 2.6290926801162407e-05, "loss": 0.1938, "step": 7825 }, { "epoch": 0.6676335096399931, "grad_norm": 1.6771887357141113, "learning_rate": 2.6278764366779596e-05, "loss": 0.1779, "step": 7826 }, { "epoch": 0.6677188193141103, "grad_norm": 1.3557261901346018, "learning_rate": 2.6266603743387063e-05, "loss": 0.1842, "step": 7827 }, { "epoch": 0.6678041289882273, "grad_norm": 1.9467581773612888, "learning_rate": 2.62544449319132e-05, "loss": 0.2851, "step": 7828 }, { "epoch": 0.6678894386623443, "grad_norm": 1.878773706404611, "learning_rate": 2.6242287933286318e-05, "loss": 0.2073, "step": 7829 }, { "epoch": 0.6679747483364613, "grad_norm": 1.7558137507976848, "learning_rate": 2.6230132748434466e-05, "loss": 0.1458, "step": 7830 }, { "epoch": 0.6680600580105784, "grad_norm": 1.6680874570107078, "learning_rate": 2.621797937828569e-05, "loss": 0.2148, "step": 7831 }, { "epoch": 0.6681453676846955, "grad_norm": 1.94359994290688, "learning_rate": 2.6205827823767808e-05, "loss": 0.1499, "step": 7832 }, { "epoch": 0.6682306773588125, "grad_norm": 1.5254000029721329, "learning_rate": 2.6193678085808526e-05, "loss": 0.212, "step": 7833 }, { "epoch": 0.6683159870329295, "grad_norm": 1.4242394877402889, "learning_rate": 2.61815301653354e-05, "loss": 0.2174, "step": 7834 }, { "epoch": 0.6684012967070466, "grad_norm": 1.6824604654414432, "learning_rate": 2.6169384063275892e-05, "loss": 0.203, "step": 7835 }, { "epoch": 0.6684866063811636, "grad_norm": 1.8473958321861639, "learning_rate": 2.615723978055728e-05, "loss": 0.2617, "step": 7836 }, { "epoch": 0.6685719160552807, "grad_norm": 1.904018346517791, "learning_rate": 2.6145097318106703e-05, "loss": 0.2332, "step": 7837 }, { "epoch": 0.6686572257293977, "grad_norm": 1.7758113081503173, "learning_rate": 2.613295667685119e-05, "loss": 0.2595, "step": 7838 }, { "epoch": 0.6687425354035148, "grad_norm": 2.038206541973939, "learning_rate": 2.61208178577176e-05, "loss": 0.2012, "step": 7839 }, { "epoch": 0.6688278450776318, "grad_norm": 1.944903472427186, "learning_rate": 2.6108680861632673e-05, "loss": 0.2207, "step": 7840 }, { "epoch": 0.6689131547517488, "grad_norm": 1.7907343220251455, "learning_rate": 2.6096545689522983e-05, "loss": 0.2285, "step": 7841 }, { "epoch": 0.6689984644258659, "grad_norm": 1.6904758070235966, "learning_rate": 2.6084412342315047e-05, "loss": 0.1804, "step": 7842 }, { "epoch": 0.669083774099983, "grad_norm": 1.7263340173118304, "learning_rate": 2.6072280820935103e-05, "loss": 0.2026, "step": 7843 }, { "epoch": 0.6691690837741, "grad_norm": 1.411439517788445, "learning_rate": 2.6060151126309385e-05, "loss": 0.2096, "step": 7844 }, { "epoch": 0.669254393448217, "grad_norm": 1.497203604480788, "learning_rate": 2.6048023259363913e-05, "loss": 0.2271, "step": 7845 }, { "epoch": 0.669339703122334, "grad_norm": 1.7908508158392997, "learning_rate": 2.6035897221024585e-05, "loss": 0.2486, "step": 7846 }, { "epoch": 0.6694250127964512, "grad_norm": 1.7572565852720352, "learning_rate": 2.6023773012217155e-05, "loss": 0.2201, "step": 7847 }, { "epoch": 0.6695103224705682, "grad_norm": 1.792436840801463, "learning_rate": 2.601165063386725e-05, "loss": 0.2181, "step": 7848 }, { "epoch": 0.6695956321446852, "grad_norm": 1.6015687523696553, "learning_rate": 2.599953008690035e-05, "loss": 0.2638, "step": 7849 }, { "epoch": 0.6696809418188022, "grad_norm": 1.6283454203994883, "learning_rate": 2.598741137224176e-05, "loss": 0.1772, "step": 7850 }, { "epoch": 0.6697662514929194, "grad_norm": 1.5859777135401585, "learning_rate": 2.5975294490816737e-05, "loss": 0.21, "step": 7851 }, { "epoch": 0.6698515611670364, "grad_norm": 2.2230676897033366, "learning_rate": 2.5963179443550302e-05, "loss": 0.3174, "step": 7852 }, { "epoch": 0.6699368708411534, "grad_norm": 1.841729042527393, "learning_rate": 2.5951066231367395e-05, "loss": 0.2173, "step": 7853 }, { "epoch": 0.6700221805152704, "grad_norm": 1.4538810157943318, "learning_rate": 2.5938954855192766e-05, "loss": 0.2189, "step": 7854 }, { "epoch": 0.6701074901893874, "grad_norm": 1.6349206484049341, "learning_rate": 2.5926845315951103e-05, "loss": 0.2356, "step": 7855 }, { "epoch": 0.6701927998635046, "grad_norm": 1.7395973966414566, "learning_rate": 2.591473761456684e-05, "loss": 0.2777, "step": 7856 }, { "epoch": 0.6702781095376216, "grad_norm": 1.7146972037503638, "learning_rate": 2.5902631751964395e-05, "loss": 0.1338, "step": 7857 }, { "epoch": 0.6703634192117386, "grad_norm": 1.4837322248825862, "learning_rate": 2.589052772906796e-05, "loss": 0.2195, "step": 7858 }, { "epoch": 0.6704487288858556, "grad_norm": 1.7482203562659084, "learning_rate": 2.5878425546801622e-05, "loss": 0.2901, "step": 7859 }, { "epoch": 0.6705340385599727, "grad_norm": 1.999462413063487, "learning_rate": 2.5866325206089305e-05, "loss": 0.2296, "step": 7860 }, { "epoch": 0.6706193482340898, "grad_norm": 1.371397240243944, "learning_rate": 2.585422670785481e-05, "loss": 0.1374, "step": 7861 }, { "epoch": 0.6707046579082068, "grad_norm": 1.525450252290695, "learning_rate": 2.5842130053021796e-05, "loss": 0.2381, "step": 7862 }, { "epoch": 0.6707899675823238, "grad_norm": 1.8796062474117945, "learning_rate": 2.583003524251376e-05, "loss": 0.2842, "step": 7863 }, { "epoch": 0.6708752772564409, "grad_norm": 1.5431342313852667, "learning_rate": 2.581794227725414e-05, "loss": 0.1807, "step": 7864 }, { "epoch": 0.6709605869305579, "grad_norm": 1.282976429509427, "learning_rate": 2.580585115816607e-05, "loss": 0.2219, "step": 7865 }, { "epoch": 0.671045896604675, "grad_norm": 1.6075755308925346, "learning_rate": 2.579376188617273e-05, "loss": 0.2336, "step": 7866 }, { "epoch": 0.671131206278792, "grad_norm": 1.501400849754141, "learning_rate": 2.5781674462197026e-05, "loss": 0.2262, "step": 7867 }, { "epoch": 0.6712165159529091, "grad_norm": 1.5335046945406137, "learning_rate": 2.576958888716179e-05, "loss": 0.2931, "step": 7868 }, { "epoch": 0.6713018256270261, "grad_norm": 1.9661412590596796, "learning_rate": 2.575750516198968e-05, "loss": 0.2988, "step": 7869 }, { "epoch": 0.6713871353011431, "grad_norm": 1.5522341900249372, "learning_rate": 2.5745423287603206e-05, "loss": 0.2215, "step": 7870 }, { "epoch": 0.6714724449752602, "grad_norm": 2.1489763173768304, "learning_rate": 2.5733343264924815e-05, "loss": 0.2044, "step": 7871 }, { "epoch": 0.6715577546493773, "grad_norm": 1.5659975198796732, "learning_rate": 2.5721265094876667e-05, "loss": 0.2329, "step": 7872 }, { "epoch": 0.6716430643234943, "grad_norm": 1.783208339386886, "learning_rate": 2.5709188778380942e-05, "loss": 0.1535, "step": 7873 }, { "epoch": 0.6717283739976113, "grad_norm": 1.8765359944957904, "learning_rate": 2.5697114316359572e-05, "loss": 0.2287, "step": 7874 }, { "epoch": 0.6718136836717283, "grad_norm": 1.8364803729048391, "learning_rate": 2.568504170973437e-05, "loss": 0.2216, "step": 7875 }, { "epoch": 0.6718989933458455, "grad_norm": 1.8531816058977777, "learning_rate": 2.567297095942701e-05, "loss": 0.2117, "step": 7876 }, { "epoch": 0.6719843030199625, "grad_norm": 1.4861991515576976, "learning_rate": 2.5660902066359084e-05, "loss": 0.2193, "step": 7877 }, { "epoch": 0.6720696126940795, "grad_norm": 2.1260227659196165, "learning_rate": 2.5648835031451902e-05, "loss": 0.1811, "step": 7878 }, { "epoch": 0.6721549223681965, "grad_norm": 1.391190446057125, "learning_rate": 2.563676985562679e-05, "loss": 0.2446, "step": 7879 }, { "epoch": 0.6722402320423136, "grad_norm": 1.5071227714785966, "learning_rate": 2.5624706539804833e-05, "loss": 0.2533, "step": 7880 }, { "epoch": 0.6723255417164307, "grad_norm": 1.6889080954527587, "learning_rate": 2.5612645084906995e-05, "loss": 0.2538, "step": 7881 }, { "epoch": 0.6724108513905477, "grad_norm": 1.3206627736644825, "learning_rate": 2.560058549185412e-05, "loss": 0.1946, "step": 7882 }, { "epoch": 0.6724961610646647, "grad_norm": 1.7538704985307367, "learning_rate": 2.5588527761566857e-05, "loss": 0.1781, "step": 7883 }, { "epoch": 0.6725814707387818, "grad_norm": 1.7959976044449835, "learning_rate": 2.5576471894965815e-05, "loss": 0.2309, "step": 7884 }, { "epoch": 0.6726667804128988, "grad_norm": 1.7320669126646089, "learning_rate": 2.5564417892971327e-05, "loss": 0.1925, "step": 7885 }, { "epoch": 0.6727520900870159, "grad_norm": 1.6096698527923057, "learning_rate": 2.5552365756503693e-05, "loss": 0.198, "step": 7886 }, { "epoch": 0.6728373997611329, "grad_norm": 2.1586988613342486, "learning_rate": 2.5540315486483024e-05, "loss": 0.2132, "step": 7887 }, { "epoch": 0.67292270943525, "grad_norm": 1.7442339181244768, "learning_rate": 2.552826708382929e-05, "loss": 0.2044, "step": 7888 }, { "epoch": 0.673008019109367, "grad_norm": 1.7331564806924278, "learning_rate": 2.5516220549462317e-05, "loss": 0.2072, "step": 7889 }, { "epoch": 0.673093328783484, "grad_norm": 1.7880418023038187, "learning_rate": 2.55041758843018e-05, "loss": 0.2558, "step": 7890 }, { "epoch": 0.6731786384576011, "grad_norm": 1.422245354679519, "learning_rate": 2.5492133089267284e-05, "loss": 0.2326, "step": 7891 }, { "epoch": 0.6732639481317181, "grad_norm": 2.101250267233029, "learning_rate": 2.5480092165278153e-05, "loss": 0.2061, "step": 7892 }, { "epoch": 0.6733492578058352, "grad_norm": 2.008421334307997, "learning_rate": 2.5468053113253722e-05, "loss": 0.2287, "step": 7893 }, { "epoch": 0.6734345674799522, "grad_norm": 1.6956302327498203, "learning_rate": 2.5456015934113043e-05, "loss": 0.2266, "step": 7894 }, { "epoch": 0.6735198771540692, "grad_norm": 1.681650832939401, "learning_rate": 2.5443980628775133e-05, "loss": 0.2222, "step": 7895 }, { "epoch": 0.6736051868281863, "grad_norm": 1.364988727872369, "learning_rate": 2.543194719815879e-05, "loss": 0.2609, "step": 7896 }, { "epoch": 0.6736904965023034, "grad_norm": 1.8557097268475942, "learning_rate": 2.5419915643182767e-05, "loss": 0.1836, "step": 7897 }, { "epoch": 0.6737758061764204, "grad_norm": 1.6871264715403793, "learning_rate": 2.5407885964765526e-05, "loss": 0.1874, "step": 7898 }, { "epoch": 0.6738611158505374, "grad_norm": 2.2445177470198385, "learning_rate": 2.539585816382553e-05, "loss": 0.1742, "step": 7899 }, { "epoch": 0.6739464255246544, "grad_norm": 1.628748604809741, "learning_rate": 2.5383832241281015e-05, "loss": 0.1704, "step": 7900 }, { "epoch": 0.6740317351987716, "grad_norm": 1.8069845666062694, "learning_rate": 2.53718081980501e-05, "loss": 0.22, "step": 7901 }, { "epoch": 0.6741170448728886, "grad_norm": 1.742838990063349, "learning_rate": 2.5359786035050758e-05, "loss": 0.2098, "step": 7902 }, { "epoch": 0.6742023545470056, "grad_norm": 1.7647523326931316, "learning_rate": 2.5347765753200808e-05, "loss": 0.1864, "step": 7903 }, { "epoch": 0.6742876642211226, "grad_norm": 1.5048016941553828, "learning_rate": 2.5335747353417942e-05, "loss": 0.2174, "step": 7904 }, { "epoch": 0.6743729738952398, "grad_norm": 2.096965882197966, "learning_rate": 2.5323730836619685e-05, "loss": 0.2313, "step": 7905 }, { "epoch": 0.6744582835693568, "grad_norm": 1.769657223156361, "learning_rate": 2.5311716203723483e-05, "loss": 0.189, "step": 7906 }, { "epoch": 0.6745435932434738, "grad_norm": 1.771487335909185, "learning_rate": 2.5299703455646516e-05, "loss": 0.1904, "step": 7907 }, { "epoch": 0.6746289029175908, "grad_norm": 1.456466606304711, "learning_rate": 2.5287692593305956e-05, "loss": 0.187, "step": 7908 }, { "epoch": 0.6747142125917079, "grad_norm": 1.5445789289278773, "learning_rate": 2.5275683617618752e-05, "loss": 0.2503, "step": 7909 }, { "epoch": 0.674799522265825, "grad_norm": 2.268302860807429, "learning_rate": 2.5263676529501706e-05, "loss": 0.2104, "step": 7910 }, { "epoch": 0.674884831939942, "grad_norm": 1.5690065333739116, "learning_rate": 2.5251671329871518e-05, "loss": 0.164, "step": 7911 }, { "epoch": 0.674970141614059, "grad_norm": 1.6213083148195104, "learning_rate": 2.523966801964468e-05, "loss": 0.2487, "step": 7912 }, { "epoch": 0.6750554512881761, "grad_norm": 1.7383409811322863, "learning_rate": 2.5227666599737666e-05, "loss": 0.2406, "step": 7913 }, { "epoch": 0.6751407609622931, "grad_norm": 1.4520441773369819, "learning_rate": 2.5215667071066616e-05, "loss": 0.1969, "step": 7914 }, { "epoch": 0.6752260706364102, "grad_norm": 1.9314401674930208, "learning_rate": 2.5203669434547705e-05, "loss": 0.2339, "step": 7915 }, { "epoch": 0.6753113803105272, "grad_norm": 1.550341334752429, "learning_rate": 2.5191673691096866e-05, "loss": 0.1623, "step": 7916 }, { "epoch": 0.6753966899846443, "grad_norm": 1.9079313523920807, "learning_rate": 2.5179679841629912e-05, "loss": 0.205, "step": 7917 }, { "epoch": 0.6754819996587613, "grad_norm": 1.7726430583948891, "learning_rate": 2.5167687887062485e-05, "loss": 0.1833, "step": 7918 }, { "epoch": 0.6755673093328783, "grad_norm": 1.4600893710491853, "learning_rate": 2.5155697828310164e-05, "loss": 0.1735, "step": 7919 }, { "epoch": 0.6756526190069954, "grad_norm": 1.421757536008049, "learning_rate": 2.5143709666288262e-05, "loss": 0.1877, "step": 7920 }, { "epoch": 0.6757379286811125, "grad_norm": 1.613218883574328, "learning_rate": 2.5131723401912056e-05, "loss": 0.197, "step": 7921 }, { "epoch": 0.6758232383552295, "grad_norm": 1.8308240740192776, "learning_rate": 2.5119739036096613e-05, "loss": 0.248, "step": 7922 }, { "epoch": 0.6759085480293465, "grad_norm": 1.4004793232130615, "learning_rate": 2.510775656975689e-05, "loss": 0.1474, "step": 7923 }, { "epoch": 0.6759938577034635, "grad_norm": 1.7931030489088873, "learning_rate": 2.509577600380767e-05, "loss": 0.1485, "step": 7924 }, { "epoch": 0.6760791673775807, "grad_norm": 1.697248978966667, "learning_rate": 2.508379733916359e-05, "loss": 0.1836, "step": 7925 }, { "epoch": 0.6761644770516977, "grad_norm": 1.6598407041573267, "learning_rate": 2.507182057673922e-05, "loss": 0.2326, "step": 7926 }, { "epoch": 0.6762497867258147, "grad_norm": 1.655518712239003, "learning_rate": 2.5059845717448843e-05, "loss": 0.2142, "step": 7927 }, { "epoch": 0.6763350963999317, "grad_norm": 2.358667532849337, "learning_rate": 2.5047872762206732e-05, "loss": 0.2384, "step": 7928 }, { "epoch": 0.6764204060740489, "grad_norm": 1.5639990672720936, "learning_rate": 2.503590171192694e-05, "loss": 0.2013, "step": 7929 }, { "epoch": 0.6765057157481659, "grad_norm": 2.2087289947564486, "learning_rate": 2.5023932567523388e-05, "loss": 0.2844, "step": 7930 }, { "epoch": 0.6765910254222829, "grad_norm": 1.9076486678077853, "learning_rate": 2.501196532990987e-05, "loss": 0.1573, "step": 7931 }, { "epoch": 0.6766763350963999, "grad_norm": 1.6459981940388186, "learning_rate": 2.500000000000001e-05, "loss": 0.19, "step": 7932 }, { "epoch": 0.6767616447705169, "grad_norm": 1.3207015869348298, "learning_rate": 2.4988036578707303e-05, "loss": 0.1515, "step": 7933 }, { "epoch": 0.676846954444634, "grad_norm": 1.5193868141860507, "learning_rate": 2.4976075066945064e-05, "loss": 0.1973, "step": 7934 }, { "epoch": 0.6769322641187511, "grad_norm": 1.7024786186276042, "learning_rate": 2.496411546562656e-05, "loss": 0.1811, "step": 7935 }, { "epoch": 0.6770175737928681, "grad_norm": 1.5145476926924228, "learning_rate": 2.4952157775664757e-05, "loss": 0.2033, "step": 7936 }, { "epoch": 0.6771028834669851, "grad_norm": 1.6388518424435787, "learning_rate": 2.4940201997972628e-05, "loss": 0.2689, "step": 7937 }, { "epoch": 0.6771881931411022, "grad_norm": 1.6249715362403456, "learning_rate": 2.4928248133462907e-05, "loss": 0.204, "step": 7938 }, { "epoch": 0.6772735028152193, "grad_norm": 1.780526934648433, "learning_rate": 2.491629618304821e-05, "loss": 0.1744, "step": 7939 }, { "epoch": 0.6773588124893363, "grad_norm": 1.8222834840739615, "learning_rate": 2.4904346147640984e-05, "loss": 0.1938, "step": 7940 }, { "epoch": 0.6774441221634533, "grad_norm": 1.8660663127503114, "learning_rate": 2.489239802815359e-05, "loss": 0.1707, "step": 7941 }, { "epoch": 0.6775294318375704, "grad_norm": 1.706423583650431, "learning_rate": 2.488045182549819e-05, "loss": 0.2, "step": 7942 }, { "epoch": 0.6776147415116874, "grad_norm": 2.0446828503900547, "learning_rate": 2.4868507540586805e-05, "loss": 0.2177, "step": 7943 }, { "epoch": 0.6777000511858045, "grad_norm": 1.7311497848645907, "learning_rate": 2.485656517433132e-05, "loss": 0.1804, "step": 7944 }, { "epoch": 0.6777853608599215, "grad_norm": 1.6750589872762995, "learning_rate": 2.4844624727643472e-05, "loss": 0.1647, "step": 7945 }, { "epoch": 0.6778706705340386, "grad_norm": 2.0407824515431248, "learning_rate": 2.4832686201434852e-05, "loss": 0.1679, "step": 7946 }, { "epoch": 0.6779559802081556, "grad_norm": 1.5472952531935913, "learning_rate": 2.482074959661688e-05, "loss": 0.2069, "step": 7947 }, { "epoch": 0.6780412898822726, "grad_norm": 1.5025433594704092, "learning_rate": 2.4808814914100915e-05, "loss": 0.168, "step": 7948 }, { "epoch": 0.6781265995563897, "grad_norm": 1.9199799910138529, "learning_rate": 2.4796882154798023e-05, "loss": 0.1902, "step": 7949 }, { "epoch": 0.6782119092305068, "grad_norm": 1.5145996400706518, "learning_rate": 2.478495131961927e-05, "loss": 0.1873, "step": 7950 }, { "epoch": 0.6782972189046238, "grad_norm": 1.895969875015891, "learning_rate": 2.4773022409475495e-05, "loss": 0.174, "step": 7951 }, { "epoch": 0.6783825285787408, "grad_norm": 1.6462746121153309, "learning_rate": 2.4761095425277396e-05, "loss": 0.1606, "step": 7952 }, { "epoch": 0.6784678382528578, "grad_norm": 1.7998355525449634, "learning_rate": 2.474917036793555e-05, "loss": 0.2127, "step": 7953 }, { "epoch": 0.678553147926975, "grad_norm": 1.894048192685017, "learning_rate": 2.4737247238360356e-05, "loss": 0.1949, "step": 7954 }, { "epoch": 0.678638457601092, "grad_norm": 1.517732079747132, "learning_rate": 2.4725326037462092e-05, "loss": 0.199, "step": 7955 }, { "epoch": 0.678723767275209, "grad_norm": 1.6094029988936582, "learning_rate": 2.4713406766150858e-05, "loss": 0.1728, "step": 7956 }, { "epoch": 0.678809076949326, "grad_norm": 1.7380610980234976, "learning_rate": 2.4701489425336667e-05, "loss": 0.1739, "step": 7957 }, { "epoch": 0.6788943866234431, "grad_norm": 1.6494471635314611, "learning_rate": 2.468957401592932e-05, "loss": 0.2291, "step": 7958 }, { "epoch": 0.6789796962975602, "grad_norm": 1.6529352378459041, "learning_rate": 2.467766053883849e-05, "loss": 0.1608, "step": 7959 }, { "epoch": 0.6790650059716772, "grad_norm": 1.581694164392169, "learning_rate": 2.4665748994973704e-05, "loss": 0.1577, "step": 7960 }, { "epoch": 0.6791503156457942, "grad_norm": 1.3861613698132467, "learning_rate": 2.4653839385244392e-05, "loss": 0.1751, "step": 7961 }, { "epoch": 0.6792356253199113, "grad_norm": 1.9870778097229385, "learning_rate": 2.4641931710559717e-05, "loss": 0.2489, "step": 7962 }, { "epoch": 0.6793209349940283, "grad_norm": 1.8112296552548788, "learning_rate": 2.463002597182882e-05, "loss": 0.2135, "step": 7963 }, { "epoch": 0.6794062446681454, "grad_norm": 1.7992161633584316, "learning_rate": 2.461812216996062e-05, "loss": 0.2286, "step": 7964 }, { "epoch": 0.6794915543422624, "grad_norm": 2.2493961371604954, "learning_rate": 2.460622030586392e-05, "loss": 0.2121, "step": 7965 }, { "epoch": 0.6795768640163795, "grad_norm": 1.9020124769714135, "learning_rate": 2.4594320380447356e-05, "loss": 0.1923, "step": 7966 }, { "epoch": 0.6796621736904965, "grad_norm": 1.9147406174812989, "learning_rate": 2.4582422394619427e-05, "loss": 0.2047, "step": 7967 }, { "epoch": 0.6797474833646135, "grad_norm": 1.6782063896005734, "learning_rate": 2.4570526349288475e-05, "loss": 0.1887, "step": 7968 }, { "epoch": 0.6798327930387306, "grad_norm": 1.999058859641282, "learning_rate": 2.455863224536269e-05, "loss": 0.28, "step": 7969 }, { "epoch": 0.6799181027128476, "grad_norm": 1.4101267465479677, "learning_rate": 2.454674008375015e-05, "loss": 0.1758, "step": 7970 }, { "epoch": 0.6800034123869647, "grad_norm": 1.673564022064799, "learning_rate": 2.453484986535875e-05, "loss": 0.1702, "step": 7971 }, { "epoch": 0.6800887220610817, "grad_norm": 1.5995758388483163, "learning_rate": 2.4522961591096246e-05, "loss": 0.1603, "step": 7972 }, { "epoch": 0.6801740317351987, "grad_norm": 1.8071031133114381, "learning_rate": 2.4511075261870232e-05, "loss": 0.2075, "step": 7973 }, { "epoch": 0.6802593414093158, "grad_norm": 1.820296324301956, "learning_rate": 2.449919087858818e-05, "loss": 0.2286, "step": 7974 }, { "epoch": 0.6803446510834329, "grad_norm": 1.7394173670497928, "learning_rate": 2.4487308442157386e-05, "loss": 0.1911, "step": 7975 }, { "epoch": 0.6804299607575499, "grad_norm": 1.5655836575773556, "learning_rate": 2.4475427953485002e-05, "loss": 0.1605, "step": 7976 }, { "epoch": 0.6805152704316669, "grad_norm": 1.9317307253537899, "learning_rate": 2.4463549413478098e-05, "loss": 0.1692, "step": 7977 }, { "epoch": 0.680600580105784, "grad_norm": 1.5708295435906119, "learning_rate": 2.4451672823043455e-05, "loss": 0.1481, "step": 7978 }, { "epoch": 0.6806858897799011, "grad_norm": 2.225874516791536, "learning_rate": 2.4439798183087846e-05, "loss": 0.1728, "step": 7979 }, { "epoch": 0.6807711994540181, "grad_norm": 1.640463684870651, "learning_rate": 2.4427925494517823e-05, "loss": 0.2328, "step": 7980 }, { "epoch": 0.6808565091281351, "grad_norm": 2.45572306098597, "learning_rate": 2.4416054758239794e-05, "loss": 0.1949, "step": 7981 }, { "epoch": 0.6809418188022521, "grad_norm": 1.8355607315777658, "learning_rate": 2.4404185975160014e-05, "loss": 0.2512, "step": 7982 }, { "epoch": 0.6810271284763693, "grad_norm": 1.4227803460536188, "learning_rate": 2.4392319146184655e-05, "loss": 0.1538, "step": 7983 }, { "epoch": 0.6811124381504863, "grad_norm": 2.0299225210803873, "learning_rate": 2.4380454272219617e-05, "loss": 0.2695, "step": 7984 }, { "epoch": 0.6811977478246033, "grad_norm": 1.4406464184143224, "learning_rate": 2.436859135417077e-05, "loss": 0.2292, "step": 7985 }, { "epoch": 0.6812830574987203, "grad_norm": 2.189835092266822, "learning_rate": 2.4356730392943767e-05, "loss": 0.2029, "step": 7986 }, { "epoch": 0.6813683671728374, "grad_norm": 1.8735821131234385, "learning_rate": 2.4344871389444128e-05, "loss": 0.3047, "step": 7987 }, { "epoch": 0.6814536768469545, "grad_norm": 1.286435496427437, "learning_rate": 2.4333014344577232e-05, "loss": 0.2189, "step": 7988 }, { "epoch": 0.6815389865210715, "grad_norm": 1.8831325274854154, "learning_rate": 2.4321159259248272e-05, "loss": 0.2172, "step": 7989 }, { "epoch": 0.6816242961951885, "grad_norm": 1.6375770230414912, "learning_rate": 2.430930613436239e-05, "loss": 0.2577, "step": 7990 }, { "epoch": 0.6817096058693056, "grad_norm": 1.497521259749216, "learning_rate": 2.4297454970824424e-05, "loss": 0.2587, "step": 7991 }, { "epoch": 0.6817949155434226, "grad_norm": 1.5139127674999928, "learning_rate": 2.4285605769539204e-05, "loss": 0.2078, "step": 7992 }, { "epoch": 0.6818802252175397, "grad_norm": 1.9287618664162014, "learning_rate": 2.427375853141134e-05, "loss": 0.2771, "step": 7993 }, { "epoch": 0.6819655348916567, "grad_norm": 1.962179330273258, "learning_rate": 2.4261913257345304e-05, "loss": 0.2337, "step": 7994 }, { "epoch": 0.6820508445657738, "grad_norm": 1.4041849020086103, "learning_rate": 2.4250069948245414e-05, "loss": 0.1352, "step": 7995 }, { "epoch": 0.6821361542398908, "grad_norm": 1.6525915514679592, "learning_rate": 2.423822860501585e-05, "loss": 0.2249, "step": 7996 }, { "epoch": 0.6822214639140078, "grad_norm": 1.7642372551612153, "learning_rate": 2.4226389228560635e-05, "loss": 0.2023, "step": 7997 }, { "epoch": 0.6823067735881249, "grad_norm": 1.227865219790259, "learning_rate": 2.4214551819783626e-05, "loss": 0.1745, "step": 7998 }, { "epoch": 0.682392083262242, "grad_norm": 1.9117658889128188, "learning_rate": 2.4202716379588598e-05, "loss": 0.163, "step": 7999 }, { "epoch": 0.682477392936359, "grad_norm": 1.7674607042965056, "learning_rate": 2.4190882908879048e-05, "loss": 0.1557, "step": 8000 }, { "epoch": 0.682562702610476, "grad_norm": 1.6295027338258437, "learning_rate": 2.4179051408558458e-05, "loss": 0.2191, "step": 8001 }, { "epoch": 0.682648012284593, "grad_norm": 1.7957677995919377, "learning_rate": 2.416722187953006e-05, "loss": 0.2164, "step": 8002 }, { "epoch": 0.6827333219587102, "grad_norm": 1.8449125342583916, "learning_rate": 2.4155394322697038e-05, "loss": 0.2105, "step": 8003 }, { "epoch": 0.6828186316328272, "grad_norm": 1.9347485725426155, "learning_rate": 2.414356873896228e-05, "loss": 0.2327, "step": 8004 }, { "epoch": 0.6829039413069442, "grad_norm": 1.5783875501935376, "learning_rate": 2.4131745129228674e-05, "loss": 0.1429, "step": 8005 }, { "epoch": 0.6829892509810612, "grad_norm": 1.5328455028033574, "learning_rate": 2.4119923494398856e-05, "loss": 0.1786, "step": 8006 }, { "epoch": 0.6830745606551782, "grad_norm": 1.6188460980615125, "learning_rate": 2.4108103835375357e-05, "loss": 0.1741, "step": 8007 }, { "epoch": 0.6831598703292954, "grad_norm": 1.8081846495414007, "learning_rate": 2.4096286153060538e-05, "loss": 0.2294, "step": 8008 }, { "epoch": 0.6832451800034124, "grad_norm": 1.3684628015493308, "learning_rate": 2.4084470448356617e-05, "loss": 0.1692, "step": 8009 }, { "epoch": 0.6833304896775294, "grad_norm": 1.2497908417235382, "learning_rate": 2.4072656722165665e-05, "loss": 0.1559, "step": 8010 }, { "epoch": 0.6834157993516464, "grad_norm": 2.139864709980578, "learning_rate": 2.4060844975389574e-05, "loss": 0.1758, "step": 8011 }, { "epoch": 0.6835011090257636, "grad_norm": 1.858586119948412, "learning_rate": 2.4049035208930172e-05, "loss": 0.1993, "step": 8012 }, { "epoch": 0.6835864186998806, "grad_norm": 1.6194855951773885, "learning_rate": 2.4037227423688985e-05, "loss": 0.1181, "step": 8013 }, { "epoch": 0.6836717283739976, "grad_norm": 1.7468124378445509, "learning_rate": 2.4025421620567535e-05, "loss": 0.1669, "step": 8014 }, { "epoch": 0.6837570380481146, "grad_norm": 1.973273454554534, "learning_rate": 2.401361780046712e-05, "loss": 0.2368, "step": 8015 }, { "epoch": 0.6838423477222317, "grad_norm": 2.193255563862613, "learning_rate": 2.4001815964288893e-05, "loss": 0.1672, "step": 8016 }, { "epoch": 0.6839276573963488, "grad_norm": 1.7465808709503066, "learning_rate": 2.3990016112933866e-05, "loss": 0.2465, "step": 8017 }, { "epoch": 0.6840129670704658, "grad_norm": 1.6062772993447105, "learning_rate": 2.397821824730287e-05, "loss": 0.2362, "step": 8018 }, { "epoch": 0.6840982767445828, "grad_norm": 1.792268903192291, "learning_rate": 2.396642236829667e-05, "loss": 0.2383, "step": 8019 }, { "epoch": 0.6841835864186999, "grad_norm": 1.4881577327866116, "learning_rate": 2.3954628476815736e-05, "loss": 0.2426, "step": 8020 }, { "epoch": 0.6842688960928169, "grad_norm": 1.7321746202458352, "learning_rate": 2.394283657376054e-05, "loss": 0.1975, "step": 8021 }, { "epoch": 0.684354205766934, "grad_norm": 1.5110995185086376, "learning_rate": 2.3931046660031302e-05, "loss": 0.2144, "step": 8022 }, { "epoch": 0.684439515441051, "grad_norm": 1.7730490641423888, "learning_rate": 2.3919258736528123e-05, "loss": 0.2283, "step": 8023 }, { "epoch": 0.6845248251151681, "grad_norm": 1.7123073163191296, "learning_rate": 2.390747280415092e-05, "loss": 0.2051, "step": 8024 }, { "epoch": 0.6846101347892851, "grad_norm": 1.3989452101072286, "learning_rate": 2.389568886379956e-05, "loss": 0.1429, "step": 8025 }, { "epoch": 0.6846954444634021, "grad_norm": 1.7189520977152044, "learning_rate": 2.3883906916373595e-05, "loss": 0.2029, "step": 8026 }, { "epoch": 0.6847807541375192, "grad_norm": 1.9333335624343913, "learning_rate": 2.3872126962772572e-05, "loss": 0.1976, "step": 8027 }, { "epoch": 0.6848660638116363, "grad_norm": 2.1279963517480427, "learning_rate": 2.3860349003895816e-05, "loss": 0.1948, "step": 8028 }, { "epoch": 0.6849513734857533, "grad_norm": 1.6478187882411752, "learning_rate": 2.3848573040642508e-05, "loss": 0.2159, "step": 8029 }, { "epoch": 0.6850366831598703, "grad_norm": 1.7632955527750183, "learning_rate": 2.383679907391168e-05, "loss": 0.2629, "step": 8030 }, { "epoch": 0.6851219928339873, "grad_norm": 1.5586036930388512, "learning_rate": 2.382502710460219e-05, "loss": 0.1653, "step": 8031 }, { "epoch": 0.6852073025081045, "grad_norm": 1.6767116711354841, "learning_rate": 2.3813257133612827e-05, "loss": 0.1916, "step": 8032 }, { "epoch": 0.6852926121822215, "grad_norm": 1.7529768828517802, "learning_rate": 2.3801489161842083e-05, "loss": 0.207, "step": 8033 }, { "epoch": 0.6853779218563385, "grad_norm": 1.893708292183944, "learning_rate": 2.3789723190188444e-05, "loss": 0.1893, "step": 8034 }, { "epoch": 0.6854632315304555, "grad_norm": 1.7911330322920394, "learning_rate": 2.377795921955016e-05, "loss": 0.2344, "step": 8035 }, { "epoch": 0.6855485412045726, "grad_norm": 1.4608946605256046, "learning_rate": 2.376619725082535e-05, "loss": 0.2189, "step": 8036 }, { "epoch": 0.6856338508786897, "grad_norm": 1.7661750409309718, "learning_rate": 2.3754437284911968e-05, "loss": 0.2582, "step": 8037 }, { "epoch": 0.6857191605528067, "grad_norm": 1.696271633833895, "learning_rate": 2.3742679322707835e-05, "loss": 0.1871, "step": 8038 }, { "epoch": 0.6858044702269237, "grad_norm": 1.5756351734212746, "learning_rate": 2.3730923365110597e-05, "loss": 0.2345, "step": 8039 }, { "epoch": 0.6858897799010408, "grad_norm": 2.0855011787274034, "learning_rate": 2.371916941301775e-05, "loss": 0.2372, "step": 8040 }, { "epoch": 0.6859750895751578, "grad_norm": 1.7101973931672891, "learning_rate": 2.3707417467326704e-05, "loss": 0.2058, "step": 8041 }, { "epoch": 0.6860603992492749, "grad_norm": 2.091712073736264, "learning_rate": 2.3695667528934573e-05, "loss": 0.2137, "step": 8042 }, { "epoch": 0.6861457089233919, "grad_norm": 1.883416549847122, "learning_rate": 2.368391959873847e-05, "loss": 0.2279, "step": 8043 }, { "epoch": 0.686231018597509, "grad_norm": 1.6932637716758634, "learning_rate": 2.3672173677635258e-05, "loss": 0.2164, "step": 8044 }, { "epoch": 0.686316328271626, "grad_norm": 1.4493515077227581, "learning_rate": 2.366042976652168e-05, "loss": 0.1769, "step": 8045 }, { "epoch": 0.686401637945743, "grad_norm": 1.8995432204239378, "learning_rate": 2.36486878662943e-05, "loss": 0.1874, "step": 8046 }, { "epoch": 0.6864869476198601, "grad_norm": 1.453689301552852, "learning_rate": 2.3636947977849592e-05, "loss": 0.1574, "step": 8047 }, { "epoch": 0.6865722572939771, "grad_norm": 2.3516381337115346, "learning_rate": 2.3625210102083817e-05, "loss": 0.1715, "step": 8048 }, { "epoch": 0.6866575669680942, "grad_norm": 1.714439257489413, "learning_rate": 2.3613474239893092e-05, "loss": 0.1835, "step": 8049 }, { "epoch": 0.6867428766422112, "grad_norm": 1.7246534745830413, "learning_rate": 2.360174039217339e-05, "loss": 0.2236, "step": 8050 }, { "epoch": 0.6868281863163282, "grad_norm": 1.6857931193698406, "learning_rate": 2.3590008559820526e-05, "loss": 0.1412, "step": 8051 }, { "epoch": 0.6869134959904453, "grad_norm": 1.4529714195554302, "learning_rate": 2.357827874373017e-05, "loss": 0.19, "step": 8052 }, { "epoch": 0.6869988056645624, "grad_norm": 1.5684086280015321, "learning_rate": 2.3566550944797804e-05, "loss": 0.1881, "step": 8053 }, { "epoch": 0.6870841153386794, "grad_norm": 1.6829417796227693, "learning_rate": 2.3554825163918848e-05, "loss": 0.2132, "step": 8054 }, { "epoch": 0.6871694250127964, "grad_norm": 1.6471004035052919, "learning_rate": 2.354310140198842e-05, "loss": 0.202, "step": 8055 }, { "epoch": 0.6872547346869134, "grad_norm": 1.590444240529251, "learning_rate": 2.353137965990163e-05, "loss": 0.1666, "step": 8056 }, { "epoch": 0.6873400443610306, "grad_norm": 1.7507546024090428, "learning_rate": 2.3519659938553352e-05, "loss": 0.1588, "step": 8057 }, { "epoch": 0.6874253540351476, "grad_norm": 1.9958610622998993, "learning_rate": 2.3507942238838314e-05, "loss": 0.2246, "step": 8058 }, { "epoch": 0.6875106637092646, "grad_norm": 2.1211031918816095, "learning_rate": 2.3496226561651113e-05, "loss": 0.1753, "step": 8059 }, { "epoch": 0.6875959733833816, "grad_norm": 1.5938841071459144, "learning_rate": 2.3484512907886154e-05, "loss": 0.1719, "step": 8060 }, { "epoch": 0.6876812830574988, "grad_norm": 2.134332806685406, "learning_rate": 2.3472801278437768e-05, "loss": 0.2304, "step": 8061 }, { "epoch": 0.6877665927316158, "grad_norm": 1.3304819622716562, "learning_rate": 2.3461091674199998e-05, "loss": 0.1747, "step": 8062 }, { "epoch": 0.6878519024057328, "grad_norm": 1.4677825338623653, "learning_rate": 2.3449384096066874e-05, "loss": 0.1469, "step": 8063 }, { "epoch": 0.6879372120798498, "grad_norm": 1.966370613099556, "learning_rate": 2.343767854493218e-05, "loss": 0.1316, "step": 8064 }, { "epoch": 0.6880225217539669, "grad_norm": 1.8086199830626513, "learning_rate": 2.3425975021689584e-05, "loss": 0.213, "step": 8065 }, { "epoch": 0.688107831428084, "grad_norm": 1.56936981601383, "learning_rate": 2.3414273527232554e-05, "loss": 0.2092, "step": 8066 }, { "epoch": 0.688193141102201, "grad_norm": 1.8522596374747036, "learning_rate": 2.3402574062454508e-05, "loss": 0.2194, "step": 8067 }, { "epoch": 0.688278450776318, "grad_norm": 1.7683973574156346, "learning_rate": 2.3390876628248553e-05, "loss": 0.2325, "step": 8068 }, { "epoch": 0.6883637604504351, "grad_norm": 1.6086957664803487, "learning_rate": 2.3379181225507783e-05, "loss": 0.2058, "step": 8069 }, { "epoch": 0.6884490701245521, "grad_norm": 1.8736987366894342, "learning_rate": 2.336748785512507e-05, "loss": 0.21, "step": 8070 }, { "epoch": 0.6885343797986692, "grad_norm": 1.7392717261296013, "learning_rate": 2.335579651799313e-05, "loss": 0.1489, "step": 8071 }, { "epoch": 0.6886196894727862, "grad_norm": 1.4492852948891932, "learning_rate": 2.334410721500454e-05, "loss": 0.1498, "step": 8072 }, { "epoch": 0.6887049991469033, "grad_norm": 2.2056476408309034, "learning_rate": 2.3332419947051715e-05, "loss": 0.1843, "step": 8073 }, { "epoch": 0.6887903088210203, "grad_norm": 1.3918223423411753, "learning_rate": 2.3320734715026916e-05, "loss": 0.1901, "step": 8074 }, { "epoch": 0.6888756184951373, "grad_norm": 1.6434895530301339, "learning_rate": 2.330905151982223e-05, "loss": 0.1019, "step": 8075 }, { "epoch": 0.6889609281692544, "grad_norm": 1.7151436163378, "learning_rate": 2.329737036232964e-05, "loss": 0.1492, "step": 8076 }, { "epoch": 0.6890462378433715, "grad_norm": 1.6828598936698242, "learning_rate": 2.3285691243440927e-05, "loss": 0.2131, "step": 8077 }, { "epoch": 0.6891315475174885, "grad_norm": 1.5325668278073739, "learning_rate": 2.327401416404773e-05, "loss": 0.1164, "step": 8078 }, { "epoch": 0.6892168571916055, "grad_norm": 1.7163707564533948, "learning_rate": 2.3262339125041527e-05, "loss": 0.2194, "step": 8079 }, { "epoch": 0.6893021668657225, "grad_norm": 1.674064842053004, "learning_rate": 2.3250666127313647e-05, "loss": 0.1849, "step": 8080 }, { "epoch": 0.6893874765398397, "grad_norm": 2.1492668631800513, "learning_rate": 2.3238995171755268e-05, "loss": 0.1837, "step": 8081 }, { "epoch": 0.6894727862139567, "grad_norm": 1.7343316974904674, "learning_rate": 2.3227326259257376e-05, "loss": 0.1881, "step": 8082 }, { "epoch": 0.6895580958880737, "grad_norm": 2.0464766020025604, "learning_rate": 2.321565939071089e-05, "loss": 0.1821, "step": 8083 }, { "epoch": 0.6896434055621907, "grad_norm": 1.3625645543475744, "learning_rate": 2.3203994567006447e-05, "loss": 0.1791, "step": 8084 }, { "epoch": 0.6897287152363077, "grad_norm": 1.9085091508501562, "learning_rate": 2.319233178903464e-05, "loss": 0.2508, "step": 8085 }, { "epoch": 0.6898140249104249, "grad_norm": 1.667718658637687, "learning_rate": 2.3180671057685844e-05, "loss": 0.2228, "step": 8086 }, { "epoch": 0.6898993345845419, "grad_norm": 1.4034266209270947, "learning_rate": 2.3169012373850298e-05, "loss": 0.1881, "step": 8087 }, { "epoch": 0.6899846442586589, "grad_norm": 1.4821066942251548, "learning_rate": 2.3157355738418058e-05, "loss": 0.1834, "step": 8088 }, { "epoch": 0.6900699539327759, "grad_norm": 1.775391296453737, "learning_rate": 2.3145701152279103e-05, "loss": 0.1808, "step": 8089 }, { "epoch": 0.690155263606893, "grad_norm": 1.570099773516832, "learning_rate": 2.3134048616323125e-05, "loss": 0.1935, "step": 8090 }, { "epoch": 0.6902405732810101, "grad_norm": 1.8055400733610016, "learning_rate": 2.3122398131439783e-05, "loss": 0.2128, "step": 8091 }, { "epoch": 0.6903258829551271, "grad_norm": 1.2697624113464339, "learning_rate": 2.311074969851852e-05, "loss": 0.201, "step": 8092 }, { "epoch": 0.6904111926292441, "grad_norm": 1.666881841438823, "learning_rate": 2.309910331844863e-05, "loss": 0.2379, "step": 8093 }, { "epoch": 0.6904965023033612, "grad_norm": 1.8947041540783969, "learning_rate": 2.308745899211925e-05, "loss": 0.21, "step": 8094 }, { "epoch": 0.6905818119774783, "grad_norm": 1.3642027180933052, "learning_rate": 2.307581672041934e-05, "loss": 0.1696, "step": 8095 }, { "epoch": 0.6906671216515953, "grad_norm": 1.9053206132437945, "learning_rate": 2.3064176504237788e-05, "loss": 0.2358, "step": 8096 }, { "epoch": 0.6907524313257123, "grad_norm": 1.693696970654952, "learning_rate": 2.3052538344463187e-05, "loss": 0.1812, "step": 8097 }, { "epoch": 0.6908377409998294, "grad_norm": 1.527750804932747, "learning_rate": 2.3040902241984103e-05, "loss": 0.2264, "step": 8098 }, { "epoch": 0.6909230506739464, "grad_norm": 1.5441604068723236, "learning_rate": 2.302926819768887e-05, "loss": 0.1721, "step": 8099 }, { "epoch": 0.6910083603480635, "grad_norm": 1.5223528791230432, "learning_rate": 2.3017636212465692e-05, "loss": 0.219, "step": 8100 }, { "epoch": 0.6910936700221805, "grad_norm": 1.828712564581942, "learning_rate": 2.3006006287202604e-05, "loss": 0.2176, "step": 8101 }, { "epoch": 0.6911789796962976, "grad_norm": 1.6746428208495971, "learning_rate": 2.2994378422787488e-05, "loss": 0.2144, "step": 8102 }, { "epoch": 0.6912642893704146, "grad_norm": 1.7248250236776737, "learning_rate": 2.2982752620108072e-05, "loss": 0.1674, "step": 8103 }, { "epoch": 0.6913495990445316, "grad_norm": 2.138558162644787, "learning_rate": 2.2971128880051905e-05, "loss": 0.1998, "step": 8104 }, { "epoch": 0.6914349087186487, "grad_norm": 2.198855544566669, "learning_rate": 2.2959507203506437e-05, "loss": 0.1613, "step": 8105 }, { "epoch": 0.6915202183927658, "grad_norm": 1.6519633547433912, "learning_rate": 2.2947887591358897e-05, "loss": 0.0983, "step": 8106 }, { "epoch": 0.6916055280668828, "grad_norm": 2.2159253259877696, "learning_rate": 2.293627004449639e-05, "loss": 0.1773, "step": 8107 }, { "epoch": 0.6916908377409998, "grad_norm": 2.5062625646870793, "learning_rate": 2.2924654563805826e-05, "loss": 0.2597, "step": 8108 }, { "epoch": 0.6917761474151168, "grad_norm": 1.8052867868687839, "learning_rate": 2.2913041150174047e-05, "loss": 0.2098, "step": 8109 }, { "epoch": 0.691861457089234, "grad_norm": 1.6551210406431571, "learning_rate": 2.29014298044876e-05, "loss": 0.1843, "step": 8110 }, { "epoch": 0.691946766763351, "grad_norm": 1.5019596332860454, "learning_rate": 2.2889820527633005e-05, "loss": 0.2166, "step": 8111 }, { "epoch": 0.692032076437468, "grad_norm": 2.0363966077993236, "learning_rate": 2.2878213320496545e-05, "loss": 0.2242, "step": 8112 }, { "epoch": 0.692117386111585, "grad_norm": 1.838105305731888, "learning_rate": 2.2866608183964376e-05, "loss": 0.1659, "step": 8113 }, { "epoch": 0.6922026957857021, "grad_norm": 1.840211738345172, "learning_rate": 2.2855005118922485e-05, "loss": 0.2317, "step": 8114 }, { "epoch": 0.6922880054598192, "grad_norm": 1.41577466295691, "learning_rate": 2.2843404126256708e-05, "loss": 0.1799, "step": 8115 }, { "epoch": 0.6923733151339362, "grad_norm": 1.9331165071554317, "learning_rate": 2.2831805206852714e-05, "loss": 0.2399, "step": 8116 }, { "epoch": 0.6924586248080532, "grad_norm": 1.7511260633158379, "learning_rate": 2.2820208361596e-05, "loss": 0.2808, "step": 8117 }, { "epoch": 0.6925439344821703, "grad_norm": 1.8576493149065707, "learning_rate": 2.280861359137198e-05, "loss": 0.1662, "step": 8118 }, { "epoch": 0.6926292441562873, "grad_norm": 1.7308765215100828, "learning_rate": 2.2797020897065784e-05, "loss": 0.1926, "step": 8119 }, { "epoch": 0.6927145538304044, "grad_norm": 1.3843919602295947, "learning_rate": 2.27854302795625e-05, "loss": 0.1993, "step": 8120 }, { "epoch": 0.6927998635045214, "grad_norm": 1.6530874035315253, "learning_rate": 2.2773841739747003e-05, "loss": 0.2418, "step": 8121 }, { "epoch": 0.6928851731786384, "grad_norm": 2.0714470975263506, "learning_rate": 2.2762255278504007e-05, "loss": 0.2681, "step": 8122 }, { "epoch": 0.6929704828527555, "grad_norm": 1.6142999051624123, "learning_rate": 2.275067089671808e-05, "loss": 0.2136, "step": 8123 }, { "epoch": 0.6930557925268725, "grad_norm": 1.7514941104329256, "learning_rate": 2.2739088595273604e-05, "loss": 0.2019, "step": 8124 }, { "epoch": 0.6931411022009896, "grad_norm": 1.2581320883811327, "learning_rate": 2.272750837505489e-05, "loss": 0.1892, "step": 8125 }, { "epoch": 0.6932264118751066, "grad_norm": 1.7928995355947075, "learning_rate": 2.2715930236945947e-05, "loss": 0.243, "step": 8126 }, { "epoch": 0.6933117215492237, "grad_norm": 1.3703282058494313, "learning_rate": 2.2704354181830767e-05, "loss": 0.1097, "step": 8127 }, { "epoch": 0.6933970312233407, "grad_norm": 1.6339561436252723, "learning_rate": 2.269278021059309e-05, "loss": 0.1921, "step": 8128 }, { "epoch": 0.6934823408974577, "grad_norm": 1.6449988780843974, "learning_rate": 2.2681208324116538e-05, "loss": 0.2106, "step": 8129 }, { "epoch": 0.6935676505715748, "grad_norm": 1.8447465951052973, "learning_rate": 2.2669638523284535e-05, "loss": 0.2677, "step": 8130 }, { "epoch": 0.6936529602456919, "grad_norm": 1.635147104907537, "learning_rate": 2.2658070808980436e-05, "loss": 0.2723, "step": 8131 }, { "epoch": 0.6937382699198089, "grad_norm": 1.8870832197824237, "learning_rate": 2.26465051820873e-05, "loss": 0.2672, "step": 8132 }, { "epoch": 0.6938235795939259, "grad_norm": 1.3952455374272634, "learning_rate": 2.2634941643488156e-05, "loss": 0.22, "step": 8133 }, { "epoch": 0.693908889268043, "grad_norm": 1.6972317708702938, "learning_rate": 2.2623380194065802e-05, "loss": 0.2083, "step": 8134 }, { "epoch": 0.6939941989421601, "grad_norm": 1.6605630533295783, "learning_rate": 2.2611820834702886e-05, "loss": 0.1917, "step": 8135 }, { "epoch": 0.6940795086162771, "grad_norm": 1.9115890402804385, "learning_rate": 2.2600263566281908e-05, "loss": 0.2758, "step": 8136 }, { "epoch": 0.6941648182903941, "grad_norm": 1.9631742813351734, "learning_rate": 2.2588708389685193e-05, "loss": 0.2665, "step": 8137 }, { "epoch": 0.6942501279645111, "grad_norm": 1.467931966031455, "learning_rate": 2.2577155305794962e-05, "loss": 0.1839, "step": 8138 }, { "epoch": 0.6943354376386283, "grad_norm": 1.5212016595888076, "learning_rate": 2.256560431549316e-05, "loss": 0.2073, "step": 8139 }, { "epoch": 0.6944207473127453, "grad_norm": 1.7286693439990073, "learning_rate": 2.2554055419661703e-05, "loss": 0.1473, "step": 8140 }, { "epoch": 0.6945060569868623, "grad_norm": 1.6799622776701062, "learning_rate": 2.254250861918227e-05, "loss": 0.1854, "step": 8141 }, { "epoch": 0.6945913666609793, "grad_norm": 2.4682251577565455, "learning_rate": 2.2530963914936387e-05, "loss": 0.214, "step": 8142 }, { "epoch": 0.6946766763350964, "grad_norm": 1.8159143581698107, "learning_rate": 2.2519421307805445e-05, "loss": 0.1638, "step": 8143 }, { "epoch": 0.6947619860092135, "grad_norm": 1.4018068693819152, "learning_rate": 2.2507880798670656e-05, "loss": 0.1663, "step": 8144 }, { "epoch": 0.6948472956833305, "grad_norm": 1.701674237186182, "learning_rate": 2.2496342388413072e-05, "loss": 0.1935, "step": 8145 }, { "epoch": 0.6949326053574475, "grad_norm": 1.9668209979823401, "learning_rate": 2.2484806077913572e-05, "loss": 0.2265, "step": 8146 }, { "epoch": 0.6950179150315646, "grad_norm": 1.7058524594386242, "learning_rate": 2.247327186805295e-05, "loss": 0.2272, "step": 8147 }, { "epoch": 0.6951032247056816, "grad_norm": 1.6039070645946696, "learning_rate": 2.246173975971171e-05, "loss": 0.1993, "step": 8148 }, { "epoch": 0.6951885343797987, "grad_norm": 1.412217596399825, "learning_rate": 2.245020975377032e-05, "loss": 0.1711, "step": 8149 }, { "epoch": 0.6952738440539157, "grad_norm": 1.6039462330289866, "learning_rate": 2.2438681851109013e-05, "loss": 0.2427, "step": 8150 }, { "epoch": 0.6953591537280328, "grad_norm": 1.591998465474985, "learning_rate": 2.2427156052607885e-05, "loss": 0.1847, "step": 8151 }, { "epoch": 0.6954444634021498, "grad_norm": 1.7293526729442532, "learning_rate": 2.2415632359146856e-05, "loss": 0.2007, "step": 8152 }, { "epoch": 0.6955297730762668, "grad_norm": 1.866183469880605, "learning_rate": 2.2404110771605726e-05, "loss": 0.1814, "step": 8153 }, { "epoch": 0.6956150827503839, "grad_norm": 1.6591164597649521, "learning_rate": 2.2392591290864096e-05, "loss": 0.2337, "step": 8154 }, { "epoch": 0.695700392424501, "grad_norm": 2.1054166599829567, "learning_rate": 2.2381073917801416e-05, "loss": 0.1918, "step": 8155 }, { "epoch": 0.695785702098618, "grad_norm": 2.040585822157233, "learning_rate": 2.2369558653296978e-05, "loss": 0.2405, "step": 8156 }, { "epoch": 0.695871011772735, "grad_norm": 1.7157702191704067, "learning_rate": 2.2358045498229907e-05, "loss": 0.2136, "step": 8157 }, { "epoch": 0.695956321446852, "grad_norm": 1.4294073945691788, "learning_rate": 2.2346534453479173e-05, "loss": 0.1992, "step": 8158 }, { "epoch": 0.6960416311209692, "grad_norm": 1.6006513283145725, "learning_rate": 2.2335025519923565e-05, "loss": 0.1993, "step": 8159 }, { "epoch": 0.6961269407950862, "grad_norm": 1.4247947310792035, "learning_rate": 2.2323518698441786e-05, "loss": 0.197, "step": 8160 }, { "epoch": 0.6962122504692032, "grad_norm": 1.391837545095643, "learning_rate": 2.2312013989912238e-05, "loss": 0.214, "step": 8161 }, { "epoch": 0.6962975601433202, "grad_norm": 1.7357217869268533, "learning_rate": 2.2300511395213313e-05, "loss": 0.1574, "step": 8162 }, { "epoch": 0.6963828698174372, "grad_norm": 2.1060819564478943, "learning_rate": 2.2289010915223145e-05, "loss": 0.2218, "step": 8163 }, { "epoch": 0.6964681794915544, "grad_norm": 1.79032300496228, "learning_rate": 2.227751255081974e-05, "loss": 0.2162, "step": 8164 }, { "epoch": 0.6965534891656714, "grad_norm": 1.7957777570841373, "learning_rate": 2.2266016302880934e-05, "loss": 0.2796, "step": 8165 }, { "epoch": 0.6966387988397884, "grad_norm": 1.9750489482367033, "learning_rate": 2.2254522172284386e-05, "loss": 0.2557, "step": 8166 }, { "epoch": 0.6967241085139054, "grad_norm": 1.7348004068565464, "learning_rate": 2.224303015990767e-05, "loss": 0.1907, "step": 8167 }, { "epoch": 0.6968094181880226, "grad_norm": 1.9119047497682198, "learning_rate": 2.223154026662806e-05, "loss": 0.2355, "step": 8168 }, { "epoch": 0.6968947278621396, "grad_norm": 1.5377592969131808, "learning_rate": 2.2220052493322806e-05, "loss": 0.2145, "step": 8169 }, { "epoch": 0.6969800375362566, "grad_norm": 1.6271916795020862, "learning_rate": 2.220856684086893e-05, "loss": 0.2075, "step": 8170 }, { "epoch": 0.6970653472103736, "grad_norm": 1.418599390446634, "learning_rate": 2.2197083310143284e-05, "loss": 0.2357, "step": 8171 }, { "epoch": 0.6971506568844907, "grad_norm": 1.771104260831184, "learning_rate": 2.218560190202257e-05, "loss": 0.1735, "step": 8172 }, { "epoch": 0.6972359665586078, "grad_norm": 1.7889015092003082, "learning_rate": 2.217412261738338e-05, "loss": 0.2123, "step": 8173 }, { "epoch": 0.6973212762327248, "grad_norm": 1.499832461855408, "learning_rate": 2.216264545710202e-05, "loss": 0.1251, "step": 8174 }, { "epoch": 0.6974065859068418, "grad_norm": 1.861001465384323, "learning_rate": 2.215117042205478e-05, "loss": 0.2511, "step": 8175 }, { "epoch": 0.6974918955809589, "grad_norm": 1.7244983607124287, "learning_rate": 2.213969751311768e-05, "loss": 0.196, "step": 8176 }, { "epoch": 0.6975772052550759, "grad_norm": 2.121686708525965, "learning_rate": 2.2128226731166633e-05, "loss": 0.178, "step": 8177 }, { "epoch": 0.697662514929193, "grad_norm": 1.8117926960437554, "learning_rate": 2.211675807707736e-05, "loss": 0.1347, "step": 8178 }, { "epoch": 0.69774782460331, "grad_norm": 2.0391033066214335, "learning_rate": 2.210529155172544e-05, "loss": 0.2135, "step": 8179 }, { "epoch": 0.6978331342774271, "grad_norm": 1.8828818280764108, "learning_rate": 2.2093827155986273e-05, "loss": 0.2649, "step": 8180 }, { "epoch": 0.6979184439515441, "grad_norm": 1.5272060634670515, "learning_rate": 2.2082364890735096e-05, "loss": 0.2043, "step": 8181 }, { "epoch": 0.6980037536256611, "grad_norm": 2.0316570167635697, "learning_rate": 2.2070904756847022e-05, "loss": 0.2125, "step": 8182 }, { "epoch": 0.6980890632997782, "grad_norm": 1.4365385406135864, "learning_rate": 2.205944675519695e-05, "loss": 0.2263, "step": 8183 }, { "epoch": 0.6981743729738953, "grad_norm": 1.5857829502512686, "learning_rate": 2.2047990886659648e-05, "loss": 0.1925, "step": 8184 }, { "epoch": 0.6982596826480123, "grad_norm": 1.4878235761538245, "learning_rate": 2.2036537152109705e-05, "loss": 0.1934, "step": 8185 }, { "epoch": 0.6983449923221293, "grad_norm": 1.6997212686356096, "learning_rate": 2.202508555242155e-05, "loss": 0.2006, "step": 8186 }, { "epoch": 0.6984303019962463, "grad_norm": 1.4301613611267308, "learning_rate": 2.2013636088469458e-05, "loss": 0.2472, "step": 8187 }, { "epoch": 0.6985156116703635, "grad_norm": 1.549211836975956, "learning_rate": 2.2002188761127507e-05, "loss": 0.183, "step": 8188 }, { "epoch": 0.6986009213444805, "grad_norm": 1.422151727262114, "learning_rate": 2.1990743571269706e-05, "loss": 0.1452, "step": 8189 }, { "epoch": 0.6986862310185975, "grad_norm": 2.530161117127818, "learning_rate": 2.1979300519769752e-05, "loss": 0.2526, "step": 8190 }, { "epoch": 0.6987715406927145, "grad_norm": 1.7805764112608313, "learning_rate": 2.1967859607501325e-05, "loss": 0.2019, "step": 8191 }, { "epoch": 0.6988568503668317, "grad_norm": 1.4645965774537817, "learning_rate": 2.1956420835337848e-05, "loss": 0.1862, "step": 8192 }, { "epoch": 0.6989421600409487, "grad_norm": 2.133663357102439, "learning_rate": 2.194498420415262e-05, "loss": 0.2865, "step": 8193 }, { "epoch": 0.6990274697150657, "grad_norm": 1.553561315519243, "learning_rate": 2.1933549714818748e-05, "loss": 0.1626, "step": 8194 }, { "epoch": 0.6991127793891827, "grad_norm": 2.552815442596656, "learning_rate": 2.1922117368209245e-05, "loss": 0.2185, "step": 8195 }, { "epoch": 0.6991980890632998, "grad_norm": 1.4022860289359131, "learning_rate": 2.1910687165196837e-05, "loss": 0.226, "step": 8196 }, { "epoch": 0.6992833987374169, "grad_norm": 1.4510082291225799, "learning_rate": 2.1899259106654215e-05, "loss": 0.2376, "step": 8197 }, { "epoch": 0.6993687084115339, "grad_norm": 1.4125725499653852, "learning_rate": 2.1887833193453832e-05, "loss": 0.1919, "step": 8198 }, { "epoch": 0.6994540180856509, "grad_norm": 1.3063695880229182, "learning_rate": 2.1876409426468005e-05, "loss": 0.1453, "step": 8199 }, { "epoch": 0.6995393277597679, "grad_norm": 1.7329365031501285, "learning_rate": 2.1864987806568858e-05, "loss": 0.2234, "step": 8200 }, { "epoch": 0.699624637433885, "grad_norm": 2.5020365049716387, "learning_rate": 2.185356833462837e-05, "loss": 0.2275, "step": 8201 }, { "epoch": 0.699709947108002, "grad_norm": 1.58778882140215, "learning_rate": 2.1842151011518413e-05, "loss": 0.2163, "step": 8202 }, { "epoch": 0.6997952567821191, "grad_norm": 1.7964315779513742, "learning_rate": 2.183073583811055e-05, "loss": 0.2578, "step": 8203 }, { "epoch": 0.6998805664562361, "grad_norm": 1.415491130638026, "learning_rate": 2.181932281527634e-05, "loss": 0.1623, "step": 8204 }, { "epoch": 0.6999658761303532, "grad_norm": 1.51061387802531, "learning_rate": 2.180791194388707e-05, "loss": 0.1563, "step": 8205 }, { "epoch": 0.7000511858044702, "grad_norm": 1.273257658259237, "learning_rate": 2.179650322481392e-05, "loss": 0.1715, "step": 8206 }, { "epoch": 0.7001364954785873, "grad_norm": 1.4414524804948585, "learning_rate": 2.1785096658927873e-05, "loss": 0.2003, "step": 8207 }, { "epoch": 0.7002218051527043, "grad_norm": 2.237966998077689, "learning_rate": 2.1773692247099764e-05, "loss": 0.2238, "step": 8208 }, { "epoch": 0.7003071148268214, "grad_norm": 1.659356712341112, "learning_rate": 2.176228999020025e-05, "loss": 0.1111, "step": 8209 }, { "epoch": 0.7003924245009384, "grad_norm": 1.5964693543371788, "learning_rate": 2.1750889889099828e-05, "loss": 0.1935, "step": 8210 }, { "epoch": 0.7004777341750554, "grad_norm": 1.468194897231326, "learning_rate": 2.1739491944668866e-05, "loss": 0.1879, "step": 8211 }, { "epoch": 0.7005630438491725, "grad_norm": 1.619549045198152, "learning_rate": 2.1728096157777517e-05, "loss": 0.2644, "step": 8212 }, { "epoch": 0.7006483535232896, "grad_norm": 2.440097500776036, "learning_rate": 2.171670252929579e-05, "loss": 0.226, "step": 8213 }, { "epoch": 0.7007336631974066, "grad_norm": 1.8538052781776768, "learning_rate": 2.170531106009351e-05, "loss": 0.2976, "step": 8214 }, { "epoch": 0.7008189728715236, "grad_norm": 1.370566633575996, "learning_rate": 2.1693921751040407e-05, "loss": 0.147, "step": 8215 }, { "epoch": 0.7009042825456406, "grad_norm": 1.6887809695235503, "learning_rate": 2.1682534603005927e-05, "loss": 0.1728, "step": 8216 }, { "epoch": 0.7009895922197578, "grad_norm": 1.757303053889288, "learning_rate": 2.1671149616859466e-05, "loss": 0.2024, "step": 8217 }, { "epoch": 0.7010749018938748, "grad_norm": 1.5952485463085404, "learning_rate": 2.1659766793470195e-05, "loss": 0.2376, "step": 8218 }, { "epoch": 0.7011602115679918, "grad_norm": 1.573521310492869, "learning_rate": 2.1648386133707128e-05, "loss": 0.1869, "step": 8219 }, { "epoch": 0.7012455212421088, "grad_norm": 2.0818580999178016, "learning_rate": 2.1637007638439116e-05, "loss": 0.264, "step": 8220 }, { "epoch": 0.701330830916226, "grad_norm": 1.7932870619712007, "learning_rate": 2.1625631308534854e-05, "loss": 0.1714, "step": 8221 }, { "epoch": 0.701416140590343, "grad_norm": 1.3391180714634794, "learning_rate": 2.161425714486286e-05, "loss": 0.1431, "step": 8222 }, { "epoch": 0.70150145026446, "grad_norm": 1.5115641039228023, "learning_rate": 2.1602885148291473e-05, "loss": 0.192, "step": 8223 }, { "epoch": 0.701586759938577, "grad_norm": 1.4965999533745395, "learning_rate": 2.1591515319688936e-05, "loss": 0.1859, "step": 8224 }, { "epoch": 0.7016720696126941, "grad_norm": 1.9081089148794625, "learning_rate": 2.1580147659923212e-05, "loss": 0.1903, "step": 8225 }, { "epoch": 0.7017573792868111, "grad_norm": 1.6199005515574205, "learning_rate": 2.1568782169862205e-05, "loss": 0.1843, "step": 8226 }, { "epoch": 0.7018426889609282, "grad_norm": 1.7430339860869186, "learning_rate": 2.1557418850373602e-05, "loss": 0.15, "step": 8227 }, { "epoch": 0.7019279986350452, "grad_norm": 1.5717579276623852, "learning_rate": 2.1546057702324916e-05, "loss": 0.1879, "step": 8228 }, { "epoch": 0.7020133083091623, "grad_norm": 1.6115024911575402, "learning_rate": 2.1534698726583524e-05, "loss": 0.1729, "step": 8229 }, { "epoch": 0.7020986179832793, "grad_norm": 1.3101450365163294, "learning_rate": 2.1523341924016604e-05, "loss": 0.2024, "step": 8230 }, { "epoch": 0.7021839276573963, "grad_norm": 1.445929483226996, "learning_rate": 2.1511987295491243e-05, "loss": 0.1664, "step": 8231 }, { "epoch": 0.7022692373315134, "grad_norm": 1.8720496807841982, "learning_rate": 2.1500634841874224e-05, "loss": 0.2093, "step": 8232 }, { "epoch": 0.7023545470056305, "grad_norm": 1.3472164597448135, "learning_rate": 2.1489284564032308e-05, "loss": 0.1506, "step": 8233 }, { "epoch": 0.7024398566797475, "grad_norm": 1.9912608665653528, "learning_rate": 2.147793646283201e-05, "loss": 0.2532, "step": 8234 }, { "epoch": 0.7025251663538645, "grad_norm": 1.4968479735417342, "learning_rate": 2.14665905391397e-05, "loss": 0.1729, "step": 8235 }, { "epoch": 0.7026104760279815, "grad_norm": 1.5452753137756325, "learning_rate": 2.1455246793821555e-05, "loss": 0.1614, "step": 8236 }, { "epoch": 0.7026957857020986, "grad_norm": 1.8156352057637943, "learning_rate": 2.144390522774367e-05, "loss": 0.1784, "step": 8237 }, { "epoch": 0.7027810953762157, "grad_norm": 2.082278811281668, "learning_rate": 2.1432565841771836e-05, "loss": 0.2356, "step": 8238 }, { "epoch": 0.7028664050503327, "grad_norm": 2.2343235676854745, "learning_rate": 2.142122863677181e-05, "loss": 0.2217, "step": 8239 }, { "epoch": 0.7029517147244497, "grad_norm": 1.5223495902695598, "learning_rate": 2.1409893613609113e-05, "loss": 0.1429, "step": 8240 }, { "epoch": 0.7030370243985667, "grad_norm": 1.5376192093202021, "learning_rate": 2.1398560773149105e-05, "loss": 0.1546, "step": 8241 }, { "epoch": 0.7031223340726839, "grad_norm": 1.4713228295514058, "learning_rate": 2.1387230116257e-05, "loss": 0.1903, "step": 8242 }, { "epoch": 0.7032076437468009, "grad_norm": 1.988251991281528, "learning_rate": 2.137590164379781e-05, "loss": 0.2152, "step": 8243 }, { "epoch": 0.7032929534209179, "grad_norm": 2.1559893685540557, "learning_rate": 2.136457535663645e-05, "loss": 0.2213, "step": 8244 }, { "epoch": 0.7033782630950349, "grad_norm": 1.5144908633682246, "learning_rate": 2.1353251255637563e-05, "loss": 0.1848, "step": 8245 }, { "epoch": 0.7034635727691521, "grad_norm": 1.7712173344757174, "learning_rate": 2.1341929341665727e-05, "loss": 0.1717, "step": 8246 }, { "epoch": 0.7035488824432691, "grad_norm": 1.4882876578766253, "learning_rate": 2.1330609615585308e-05, "loss": 0.1894, "step": 8247 }, { "epoch": 0.7036341921173861, "grad_norm": 1.6440143271144472, "learning_rate": 2.1319292078260483e-05, "loss": 0.244, "step": 8248 }, { "epoch": 0.7037195017915031, "grad_norm": 1.3181337516714804, "learning_rate": 2.1307976730555306e-05, "loss": 0.1958, "step": 8249 }, { "epoch": 0.7038048114656202, "grad_norm": 1.5806857945767583, "learning_rate": 2.1296663573333635e-05, "loss": 0.2274, "step": 8250 }, { "epoch": 0.7038901211397373, "grad_norm": 1.6873356774468469, "learning_rate": 2.1285352607459168e-05, "loss": 0.2272, "step": 8251 }, { "epoch": 0.7039754308138543, "grad_norm": 1.5428211769322064, "learning_rate": 2.1274043833795426e-05, "loss": 0.2217, "step": 8252 }, { "epoch": 0.7040607404879713, "grad_norm": 1.9532643382914379, "learning_rate": 2.1262737253205822e-05, "loss": 0.217, "step": 8253 }, { "epoch": 0.7041460501620884, "grad_norm": 1.5434959573251013, "learning_rate": 2.1251432866553484e-05, "loss": 0.195, "step": 8254 }, { "epoch": 0.7042313598362054, "grad_norm": 1.71072358488109, "learning_rate": 2.124013067470149e-05, "loss": 0.263, "step": 8255 }, { "epoch": 0.7043166695103225, "grad_norm": 1.8401453374393504, "learning_rate": 2.1228830678512677e-05, "loss": 0.2291, "step": 8256 }, { "epoch": 0.7044019791844395, "grad_norm": 1.6764813719049305, "learning_rate": 2.1217532878849787e-05, "loss": 0.2235, "step": 8257 }, { "epoch": 0.7044872888585566, "grad_norm": 1.5754774187480007, "learning_rate": 2.1206237276575276e-05, "loss": 0.196, "step": 8258 }, { "epoch": 0.7045725985326736, "grad_norm": 1.5155674245064445, "learning_rate": 2.1194943872551553e-05, "loss": 0.1824, "step": 8259 }, { "epoch": 0.7046579082067906, "grad_norm": 2.046783328733526, "learning_rate": 2.1183652667640802e-05, "loss": 0.1862, "step": 8260 }, { "epoch": 0.7047432178809077, "grad_norm": 1.831265873238053, "learning_rate": 2.1172363662705035e-05, "loss": 0.1583, "step": 8261 }, { "epoch": 0.7048285275550248, "grad_norm": 1.4849685285425138, "learning_rate": 2.1161076858606115e-05, "loss": 0.2208, "step": 8262 }, { "epoch": 0.7049138372291418, "grad_norm": 1.744015952991001, "learning_rate": 2.1149792256205725e-05, "loss": 0.1856, "step": 8263 }, { "epoch": 0.7049991469032588, "grad_norm": 1.255829996148029, "learning_rate": 2.1138509856365386e-05, "loss": 0.1732, "step": 8264 }, { "epoch": 0.7050844565773758, "grad_norm": 1.585282815436865, "learning_rate": 2.1127229659946435e-05, "loss": 0.1886, "step": 8265 }, { "epoch": 0.705169766251493, "grad_norm": 1.4389220957236168, "learning_rate": 2.11159516678101e-05, "loss": 0.2149, "step": 8266 }, { "epoch": 0.70525507592561, "grad_norm": 1.7964844652454994, "learning_rate": 2.1104675880817337e-05, "loss": 0.1712, "step": 8267 }, { "epoch": 0.705340385599727, "grad_norm": 1.836841831277727, "learning_rate": 2.1093402299829036e-05, "loss": 0.1861, "step": 8268 }, { "epoch": 0.705425695273844, "grad_norm": 1.8677275447279373, "learning_rate": 2.1082130925705857e-05, "loss": 0.236, "step": 8269 }, { "epoch": 0.7055110049479612, "grad_norm": 1.9984786088307835, "learning_rate": 2.1070861759308315e-05, "loss": 0.2036, "step": 8270 }, { "epoch": 0.7055963146220782, "grad_norm": 1.638085134080076, "learning_rate": 2.1059594801496745e-05, "loss": 0.179, "step": 8271 }, { "epoch": 0.7056816242961952, "grad_norm": 1.6917735487272934, "learning_rate": 2.104833005313131e-05, "loss": 0.2162, "step": 8272 }, { "epoch": 0.7057669339703122, "grad_norm": 2.038879386309181, "learning_rate": 2.1037067515072057e-05, "loss": 0.2533, "step": 8273 }, { "epoch": 0.7058522436444293, "grad_norm": 1.65335657989146, "learning_rate": 2.1025807188178758e-05, "loss": 0.171, "step": 8274 }, { "epoch": 0.7059375533185464, "grad_norm": 1.392125809284176, "learning_rate": 2.101454907331113e-05, "loss": 0.1582, "step": 8275 }, { "epoch": 0.7060228629926634, "grad_norm": 1.6494662433001972, "learning_rate": 2.1003293171328646e-05, "loss": 0.2322, "step": 8276 }, { "epoch": 0.7061081726667804, "grad_norm": 1.2766550523131692, "learning_rate": 2.0992039483090647e-05, "loss": 0.1631, "step": 8277 }, { "epoch": 0.7061934823408974, "grad_norm": 1.6941513092851068, "learning_rate": 2.0980788009456264e-05, "loss": 0.1393, "step": 8278 }, { "epoch": 0.7062787920150145, "grad_norm": 1.5834169700350167, "learning_rate": 2.0969538751284547e-05, "loss": 0.1879, "step": 8279 }, { "epoch": 0.7063641016891316, "grad_norm": 1.6809770468892724, "learning_rate": 2.095829170943424e-05, "loss": 0.2527, "step": 8280 }, { "epoch": 0.7064494113632486, "grad_norm": 1.8062986637938607, "learning_rate": 2.0947046884764054e-05, "loss": 0.1917, "step": 8281 }, { "epoch": 0.7065347210373656, "grad_norm": 1.7238691908117707, "learning_rate": 2.0935804278132448e-05, "loss": 0.2028, "step": 8282 }, { "epoch": 0.7066200307114827, "grad_norm": 1.791653019461295, "learning_rate": 2.0924563890397743e-05, "loss": 0.176, "step": 8283 }, { "epoch": 0.7067053403855997, "grad_norm": 1.541392619889132, "learning_rate": 2.0913325722418077e-05, "loss": 0.1665, "step": 8284 }, { "epoch": 0.7067906500597168, "grad_norm": 1.7358563944138892, "learning_rate": 2.090208977505142e-05, "loss": 0.2009, "step": 8285 }, { "epoch": 0.7068759597338338, "grad_norm": 1.5993125928991831, "learning_rate": 2.089085604915559e-05, "loss": 0.2088, "step": 8286 }, { "epoch": 0.7069612694079509, "grad_norm": 1.876585290216165, "learning_rate": 2.087962454558819e-05, "loss": 0.2364, "step": 8287 }, { "epoch": 0.7070465790820679, "grad_norm": 1.4111643645775125, "learning_rate": 2.0868395265206732e-05, "loss": 0.1672, "step": 8288 }, { "epoch": 0.7071318887561849, "grad_norm": 2.741069163538519, "learning_rate": 2.085716820886849e-05, "loss": 0.2268, "step": 8289 }, { "epoch": 0.707217198430302, "grad_norm": 2.5292711411275337, "learning_rate": 2.0845943377430587e-05, "loss": 0.2551, "step": 8290 }, { "epoch": 0.7073025081044191, "grad_norm": 1.4059061689716186, "learning_rate": 2.0834720771749987e-05, "loss": 0.2458, "step": 8291 }, { "epoch": 0.7073878177785361, "grad_norm": 1.803464532946029, "learning_rate": 2.0823500392683476e-05, "loss": 0.2171, "step": 8292 }, { "epoch": 0.7074731274526531, "grad_norm": 1.6959342695310524, "learning_rate": 2.0812282241087662e-05, "loss": 0.1823, "step": 8293 }, { "epoch": 0.7075584371267701, "grad_norm": 1.804463426294902, "learning_rate": 2.0801066317818973e-05, "loss": 0.1919, "step": 8294 }, { "epoch": 0.7076437468008873, "grad_norm": 1.9416330615670572, "learning_rate": 2.0789852623733745e-05, "loss": 0.1561, "step": 8295 }, { "epoch": 0.7077290564750043, "grad_norm": 1.7881178048344757, "learning_rate": 2.0778641159688018e-05, "loss": 0.1987, "step": 8296 }, { "epoch": 0.7078143661491213, "grad_norm": 1.7310853294527915, "learning_rate": 2.0767431926537766e-05, "loss": 0.1725, "step": 8297 }, { "epoch": 0.7078996758232383, "grad_norm": 2.116485817259507, "learning_rate": 2.0756224925138752e-05, "loss": 0.2512, "step": 8298 }, { "epoch": 0.7079849854973554, "grad_norm": 1.8124701398823342, "learning_rate": 2.0745020156346558e-05, "loss": 0.1862, "step": 8299 }, { "epoch": 0.7080702951714725, "grad_norm": 2.11824364026836, "learning_rate": 2.07338176210166e-05, "loss": 0.2124, "step": 8300 }, { "epoch": 0.7081556048455895, "grad_norm": 1.394800795890137, "learning_rate": 2.0722617320004162e-05, "loss": 0.2213, "step": 8301 }, { "epoch": 0.7082409145197065, "grad_norm": 1.32575570993994, "learning_rate": 2.071141925416431e-05, "loss": 0.2297, "step": 8302 }, { "epoch": 0.7083262241938236, "grad_norm": 1.7235510614908287, "learning_rate": 2.070022342435196e-05, "loss": 0.1654, "step": 8303 }, { "epoch": 0.7084115338679406, "grad_norm": 1.7614468451391692, "learning_rate": 2.0689029831421856e-05, "loss": 0.1918, "step": 8304 }, { "epoch": 0.7084968435420577, "grad_norm": 2.121787728718631, "learning_rate": 2.067783847622856e-05, "loss": 0.2354, "step": 8305 }, { "epoch": 0.7085821532161747, "grad_norm": 1.3662615446979791, "learning_rate": 2.066664935962649e-05, "loss": 0.225, "step": 8306 }, { "epoch": 0.7086674628902918, "grad_norm": 1.8396483234519287, "learning_rate": 2.0655462482469833e-05, "loss": 0.1953, "step": 8307 }, { "epoch": 0.7087527725644088, "grad_norm": 1.5089108908642033, "learning_rate": 2.064427784561272e-05, "loss": 0.2183, "step": 8308 }, { "epoch": 0.7088380822385258, "grad_norm": 1.6417005374080125, "learning_rate": 2.0633095449908964e-05, "loss": 0.1954, "step": 8309 }, { "epoch": 0.7089233919126429, "grad_norm": 1.811694130477329, "learning_rate": 2.062191529621233e-05, "loss": 0.2061, "step": 8310 }, { "epoch": 0.70900870158676, "grad_norm": 1.7481674408491645, "learning_rate": 2.061073738537635e-05, "loss": 0.2078, "step": 8311 }, { "epoch": 0.709094011260877, "grad_norm": 1.6528166687109167, "learning_rate": 2.0599561718254397e-05, "loss": 0.1788, "step": 8312 }, { "epoch": 0.709179320934994, "grad_norm": 1.3746910181540328, "learning_rate": 2.0588388295699668e-05, "loss": 0.1761, "step": 8313 }, { "epoch": 0.709264630609111, "grad_norm": 1.861633852080652, "learning_rate": 2.0577217118565202e-05, "loss": 0.1629, "step": 8314 }, { "epoch": 0.7093499402832281, "grad_norm": 1.8463617996323476, "learning_rate": 2.0566048187703857e-05, "loss": 0.1816, "step": 8315 }, { "epoch": 0.7094352499573452, "grad_norm": 1.560366891118849, "learning_rate": 2.05548815039683e-05, "loss": 0.2009, "step": 8316 }, { "epoch": 0.7095205596314622, "grad_norm": 2.417439096151101, "learning_rate": 2.0543717068211093e-05, "loss": 0.2598, "step": 8317 }, { "epoch": 0.7096058693055792, "grad_norm": 1.5659752916454939, "learning_rate": 2.0532554881284555e-05, "loss": 0.1897, "step": 8318 }, { "epoch": 0.7096911789796962, "grad_norm": 1.6956065401955882, "learning_rate": 2.0521394944040856e-05, "loss": 0.2199, "step": 8319 }, { "epoch": 0.7097764886538134, "grad_norm": 1.43210077111811, "learning_rate": 2.0510237257331994e-05, "loss": 0.2203, "step": 8320 }, { "epoch": 0.7098617983279304, "grad_norm": 1.7432353200158972, "learning_rate": 2.0499081822009842e-05, "loss": 0.2083, "step": 8321 }, { "epoch": 0.7099471080020474, "grad_norm": 1.8961748367530997, "learning_rate": 2.048792863892599e-05, "loss": 0.1942, "step": 8322 }, { "epoch": 0.7100324176761644, "grad_norm": 1.7660495618948437, "learning_rate": 2.0476777708931978e-05, "loss": 0.2129, "step": 8323 }, { "epoch": 0.7101177273502816, "grad_norm": 1.3049147173905564, "learning_rate": 2.0465629032879097e-05, "loss": 0.1832, "step": 8324 }, { "epoch": 0.7102030370243986, "grad_norm": 1.4515212859019717, "learning_rate": 2.0454482611618496e-05, "loss": 0.2125, "step": 8325 }, { "epoch": 0.7102883466985156, "grad_norm": 1.9194498034762946, "learning_rate": 2.044333844600114e-05, "loss": 0.2439, "step": 8326 }, { "epoch": 0.7103736563726326, "grad_norm": 1.4619402376633408, "learning_rate": 2.043219653687784e-05, "loss": 0.177, "step": 8327 }, { "epoch": 0.7104589660467497, "grad_norm": 1.720575230464435, "learning_rate": 2.0421056885099204e-05, "loss": 0.2056, "step": 8328 }, { "epoch": 0.7105442757208668, "grad_norm": 1.6777704699078548, "learning_rate": 2.0409919491515677e-05, "loss": 0.1603, "step": 8329 }, { "epoch": 0.7106295853949838, "grad_norm": 2.530101845391402, "learning_rate": 2.0398784356977585e-05, "loss": 0.2615, "step": 8330 }, { "epoch": 0.7107148950691008, "grad_norm": 1.7279839511585244, "learning_rate": 2.038765148233498e-05, "loss": 0.1957, "step": 8331 }, { "epoch": 0.7108002047432179, "grad_norm": 1.570614989643856, "learning_rate": 2.0376520868437838e-05, "loss": 0.1529, "step": 8332 }, { "epoch": 0.7108855144173349, "grad_norm": 1.8800261207551558, "learning_rate": 2.0365392516135906e-05, "loss": 0.192, "step": 8333 }, { "epoch": 0.710970824091452, "grad_norm": 1.809660034919842, "learning_rate": 2.035426642627878e-05, "loss": 0.2323, "step": 8334 }, { "epoch": 0.711056133765569, "grad_norm": 2.1551400244021903, "learning_rate": 2.0343142599715874e-05, "loss": 0.2594, "step": 8335 }, { "epoch": 0.7111414434396861, "grad_norm": 1.560284531638903, "learning_rate": 2.0332021037296418e-05, "loss": 0.1817, "step": 8336 }, { "epoch": 0.7112267531138031, "grad_norm": 1.541943095744232, "learning_rate": 2.0320901739869537e-05, "loss": 0.2149, "step": 8337 }, { "epoch": 0.7113120627879201, "grad_norm": 1.6906476511116533, "learning_rate": 2.0309784708284058e-05, "loss": 0.2216, "step": 8338 }, { "epoch": 0.7113973724620372, "grad_norm": 1.7409841718222612, "learning_rate": 2.0298669943388753e-05, "loss": 0.2068, "step": 8339 }, { "epoch": 0.7114826821361543, "grad_norm": 1.6481056783365962, "learning_rate": 2.0287557446032172e-05, "loss": 0.166, "step": 8340 }, { "epoch": 0.7115679918102713, "grad_norm": 1.3427414768923651, "learning_rate": 2.027644721706269e-05, "loss": 0.2084, "step": 8341 }, { "epoch": 0.7116533014843883, "grad_norm": 1.3996354411809666, "learning_rate": 2.026533925732849e-05, "loss": 0.1613, "step": 8342 }, { "epoch": 0.7117386111585053, "grad_norm": 1.9574768411915553, "learning_rate": 2.0254233567677666e-05, "loss": 0.1773, "step": 8343 }, { "epoch": 0.7118239208326225, "grad_norm": 1.6014718378969093, "learning_rate": 2.0243130148958006e-05, "loss": 0.1524, "step": 8344 }, { "epoch": 0.7119092305067395, "grad_norm": 2.1867037822705857, "learning_rate": 2.0232029002017244e-05, "loss": 0.2576, "step": 8345 }, { "epoch": 0.7119945401808565, "grad_norm": 1.8133470594830947, "learning_rate": 2.0220930127702885e-05, "loss": 0.1971, "step": 8346 }, { "epoch": 0.7120798498549735, "grad_norm": 1.9147701278845877, "learning_rate": 2.0209833526862267e-05, "loss": 0.2503, "step": 8347 }, { "epoch": 0.7121651595290907, "grad_norm": 1.4716542525900096, "learning_rate": 2.0198739200342547e-05, "loss": 0.1751, "step": 8348 }, { "epoch": 0.7122504692032077, "grad_norm": 1.586610322494136, "learning_rate": 2.0187647148990712e-05, "loss": 0.2376, "step": 8349 }, { "epoch": 0.7123357788773247, "grad_norm": 1.865630773794163, "learning_rate": 2.017655737365363e-05, "loss": 0.2384, "step": 8350 }, { "epoch": 0.7124210885514417, "grad_norm": 1.5033000566531127, "learning_rate": 2.0165469875177874e-05, "loss": 0.2632, "step": 8351 }, { "epoch": 0.7125063982255587, "grad_norm": 2.0779750454370784, "learning_rate": 2.0154384654409975e-05, "loss": 0.156, "step": 8352 }, { "epoch": 0.7125917078996759, "grad_norm": 1.3401338580158748, "learning_rate": 2.01433017121962e-05, "loss": 0.165, "step": 8353 }, { "epoch": 0.7126770175737929, "grad_norm": 1.91503109460989, "learning_rate": 2.013222104938269e-05, "loss": 0.2016, "step": 8354 }, { "epoch": 0.7127623272479099, "grad_norm": 2.0211556429946778, "learning_rate": 2.012114266681538e-05, "loss": 0.17, "step": 8355 }, { "epoch": 0.7128476369220269, "grad_norm": 2.2393265924483825, "learning_rate": 2.011006656534005e-05, "loss": 0.2324, "step": 8356 }, { "epoch": 0.712932946596144, "grad_norm": 1.2766409991068695, "learning_rate": 2.009899274580231e-05, "loss": 0.1539, "step": 8357 }, { "epoch": 0.713018256270261, "grad_norm": 1.7374002002623292, "learning_rate": 2.0087921209047554e-05, "loss": 0.185, "step": 8358 }, { "epoch": 0.7131035659443781, "grad_norm": 2.081341999758448, "learning_rate": 2.00768519559211e-05, "loss": 0.174, "step": 8359 }, { "epoch": 0.7131888756184951, "grad_norm": 1.341159808138533, "learning_rate": 2.0065784987267956e-05, "loss": 0.2153, "step": 8360 }, { "epoch": 0.7132741852926122, "grad_norm": 1.5191860248101665, "learning_rate": 2.005472030393307e-05, "loss": 0.1896, "step": 8361 }, { "epoch": 0.7133594949667292, "grad_norm": 1.7327728432055116, "learning_rate": 2.0043657906761142e-05, "loss": 0.1675, "step": 8362 }, { "epoch": 0.7134448046408463, "grad_norm": 2.1103543868921983, "learning_rate": 2.0032597796596788e-05, "loss": 0.2249, "step": 8363 }, { "epoch": 0.7135301143149633, "grad_norm": 1.9029441058499363, "learning_rate": 2.0021539974284304e-05, "loss": 0.2409, "step": 8364 }, { "epoch": 0.7136154239890804, "grad_norm": 2.3163778422320047, "learning_rate": 2.0010484440667953e-05, "loss": 0.2252, "step": 8365 }, { "epoch": 0.7137007336631974, "grad_norm": 2.554075861981892, "learning_rate": 1.9999431196591755e-05, "loss": 0.2251, "step": 8366 }, { "epoch": 0.7137860433373144, "grad_norm": 1.4968140464270432, "learning_rate": 1.998838024289956e-05, "loss": 0.2253, "step": 8367 }, { "epoch": 0.7138713530114315, "grad_norm": 1.7270572005420353, "learning_rate": 1.9977331580435054e-05, "loss": 0.2889, "step": 8368 }, { "epoch": 0.7139566626855486, "grad_norm": 2.0342272726821427, "learning_rate": 1.996628521004174e-05, "loss": 0.1825, "step": 8369 }, { "epoch": 0.7140419723596656, "grad_norm": 1.6445610424251735, "learning_rate": 1.995524113256295e-05, "loss": 0.1387, "step": 8370 }, { "epoch": 0.7141272820337826, "grad_norm": 1.29045907863693, "learning_rate": 1.994419934884183e-05, "loss": 0.1687, "step": 8371 }, { "epoch": 0.7142125917078996, "grad_norm": 1.829739631933053, "learning_rate": 1.9933159859721408e-05, "loss": 0.1784, "step": 8372 }, { "epoch": 0.7142979013820168, "grad_norm": 1.5224465301595405, "learning_rate": 1.9922122666044413e-05, "loss": 0.1936, "step": 8373 }, { "epoch": 0.7143832110561338, "grad_norm": 1.3846981317296652, "learning_rate": 1.991108776865354e-05, "loss": 0.1957, "step": 8374 }, { "epoch": 0.7144685207302508, "grad_norm": 1.5225804192111128, "learning_rate": 1.9900055168391224e-05, "loss": 0.1313, "step": 8375 }, { "epoch": 0.7145538304043678, "grad_norm": 1.8941522907472919, "learning_rate": 1.9889024866099748e-05, "loss": 0.2172, "step": 8376 }, { "epoch": 0.714639140078485, "grad_norm": 1.6621615875685722, "learning_rate": 1.9877996862621207e-05, "loss": 0.223, "step": 8377 }, { "epoch": 0.714724449752602, "grad_norm": 1.41172428605008, "learning_rate": 1.9866971158797528e-05, "loss": 0.2033, "step": 8378 }, { "epoch": 0.714809759426719, "grad_norm": 1.4109304315975053, "learning_rate": 1.9855947755470504e-05, "loss": 0.1752, "step": 8379 }, { "epoch": 0.714895069100836, "grad_norm": 1.6798470354833115, "learning_rate": 1.9844926653481648e-05, "loss": 0.2212, "step": 8380 }, { "epoch": 0.7149803787749531, "grad_norm": 1.703456321476564, "learning_rate": 1.9833907853672417e-05, "loss": 0.2188, "step": 8381 }, { "epoch": 0.7150656884490701, "grad_norm": 1.566052099513456, "learning_rate": 1.9822891356884022e-05, "loss": 0.2034, "step": 8382 }, { "epoch": 0.7151509981231872, "grad_norm": 1.8168123181018294, "learning_rate": 1.981187716395751e-05, "loss": 0.1411, "step": 8383 }, { "epoch": 0.7152363077973042, "grad_norm": 1.797142473299192, "learning_rate": 1.9800865275733737e-05, "loss": 0.2459, "step": 8384 }, { "epoch": 0.7153216174714213, "grad_norm": 1.8008729989930432, "learning_rate": 1.9789855693053456e-05, "loss": 0.2125, "step": 8385 }, { "epoch": 0.7154069271455383, "grad_norm": 1.603159931241022, "learning_rate": 1.9778848416757123e-05, "loss": 0.1676, "step": 8386 }, { "epoch": 0.7154922368196553, "grad_norm": 1.7498446123073526, "learning_rate": 1.9767843447685137e-05, "loss": 0.3673, "step": 8387 }, { "epoch": 0.7155775464937724, "grad_norm": 2.1290247332998367, "learning_rate": 1.9756840786677648e-05, "loss": 0.1928, "step": 8388 }, { "epoch": 0.7156628561678895, "grad_norm": 1.817167979666121, "learning_rate": 1.9745840434574654e-05, "loss": 0.1664, "step": 8389 }, { "epoch": 0.7157481658420065, "grad_norm": 1.503851872241345, "learning_rate": 1.9734842392215975e-05, "loss": 0.1753, "step": 8390 }, { "epoch": 0.7158334755161235, "grad_norm": 1.692863558963438, "learning_rate": 1.972384666044123e-05, "loss": 0.1768, "step": 8391 }, { "epoch": 0.7159187851902405, "grad_norm": 2.392028197323257, "learning_rate": 1.971285324008994e-05, "loss": 0.2321, "step": 8392 }, { "epoch": 0.7160040948643576, "grad_norm": 1.6815075616771449, "learning_rate": 1.9701862132001324e-05, "loss": 0.1981, "step": 8393 }, { "epoch": 0.7160894045384747, "grad_norm": 1.4093772025418887, "learning_rate": 1.969087333701455e-05, "loss": 0.2027, "step": 8394 }, { "epoch": 0.7161747142125917, "grad_norm": 1.640647088764993, "learning_rate": 1.967988685596853e-05, "loss": 0.2553, "step": 8395 }, { "epoch": 0.7162600238867087, "grad_norm": 1.6035123539286065, "learning_rate": 1.9668902689702035e-05, "loss": 0.2067, "step": 8396 }, { "epoch": 0.7163453335608257, "grad_norm": 1.9481579908184914, "learning_rate": 1.9657920839053634e-05, "loss": 0.1613, "step": 8397 }, { "epoch": 0.7164306432349429, "grad_norm": 1.784122426450973, "learning_rate": 1.9646941304861742e-05, "loss": 0.1903, "step": 8398 }, { "epoch": 0.7165159529090599, "grad_norm": 2.0217921810492916, "learning_rate": 1.9635964087964586e-05, "loss": 0.1486, "step": 8399 }, { "epoch": 0.7166012625831769, "grad_norm": 1.8542799182837368, "learning_rate": 1.96249891892002e-05, "loss": 0.1356, "step": 8400 }, { "epoch": 0.7166865722572939, "grad_norm": 1.5210420212768982, "learning_rate": 1.961401660940651e-05, "loss": 0.2213, "step": 8401 }, { "epoch": 0.7167718819314111, "grad_norm": 1.8119817847963697, "learning_rate": 1.9603046349421146e-05, "loss": 0.1775, "step": 8402 }, { "epoch": 0.7168571916055281, "grad_norm": 1.9213850435785265, "learning_rate": 1.9592078410081682e-05, "loss": 0.2218, "step": 8403 }, { "epoch": 0.7169425012796451, "grad_norm": 2.0395217319910253, "learning_rate": 1.958111279222544e-05, "loss": 0.2152, "step": 8404 }, { "epoch": 0.7170278109537621, "grad_norm": 2.033422394141366, "learning_rate": 1.95701494966896e-05, "loss": 0.2437, "step": 8405 }, { "epoch": 0.7171131206278792, "grad_norm": 1.7914591232861758, "learning_rate": 1.955918852431112e-05, "loss": 0.1746, "step": 8406 }, { "epoch": 0.7171984303019963, "grad_norm": 1.4704773667561046, "learning_rate": 1.954822987592685e-05, "loss": 0.221, "step": 8407 }, { "epoch": 0.7172837399761133, "grad_norm": 1.8455402282199842, "learning_rate": 1.953727355237341e-05, "loss": 0.2285, "step": 8408 }, { "epoch": 0.7173690496502303, "grad_norm": 2.2045376624673065, "learning_rate": 1.9526319554487247e-05, "loss": 0.1122, "step": 8409 }, { "epoch": 0.7174543593243474, "grad_norm": 1.7066803665405998, "learning_rate": 1.9515367883104658e-05, "loss": 0.1308, "step": 8410 }, { "epoch": 0.7175396689984644, "grad_norm": 1.7874068549369737, "learning_rate": 1.9504418539061737e-05, "loss": 0.1671, "step": 8411 }, { "epoch": 0.7176249786725815, "grad_norm": 1.537112322056816, "learning_rate": 1.9493471523194402e-05, "loss": 0.2272, "step": 8412 }, { "epoch": 0.7177102883466985, "grad_norm": 1.9381756988655212, "learning_rate": 1.9482526836338387e-05, "loss": 0.1638, "step": 8413 }, { "epoch": 0.7177955980208156, "grad_norm": 1.4954429386212365, "learning_rate": 1.947158447932932e-05, "loss": 0.1761, "step": 8414 }, { "epoch": 0.7178809076949326, "grad_norm": 1.7213200168040352, "learning_rate": 1.946064445300251e-05, "loss": 0.1793, "step": 8415 }, { "epoch": 0.7179662173690496, "grad_norm": 1.2991404681269028, "learning_rate": 1.9449706758193232e-05, "loss": 0.1704, "step": 8416 }, { "epoch": 0.7180515270431667, "grad_norm": 1.7960230922531297, "learning_rate": 1.9438771395736495e-05, "loss": 0.1862, "step": 8417 }, { "epoch": 0.7181368367172838, "grad_norm": 2.0366491308045274, "learning_rate": 1.942783836646716e-05, "loss": 0.1926, "step": 8418 }, { "epoch": 0.7182221463914008, "grad_norm": 1.4905108874005566, "learning_rate": 1.94169076712199e-05, "loss": 0.2271, "step": 8419 }, { "epoch": 0.7183074560655178, "grad_norm": 1.5265327505377524, "learning_rate": 1.9405979310829232e-05, "loss": 0.2033, "step": 8420 }, { "epoch": 0.7183927657396348, "grad_norm": 1.640669031914705, "learning_rate": 1.9395053286129462e-05, "loss": 0.1708, "step": 8421 }, { "epoch": 0.718478075413752, "grad_norm": 3.7451100574410616, "learning_rate": 1.938412959795472e-05, "loss": 0.2014, "step": 8422 }, { "epoch": 0.718563385087869, "grad_norm": 1.5917200362493422, "learning_rate": 1.9373208247139008e-05, "loss": 0.2106, "step": 8423 }, { "epoch": 0.718648694761986, "grad_norm": 1.9259735073355115, "learning_rate": 1.93622892345161e-05, "loss": 0.1976, "step": 8424 }, { "epoch": 0.718734004436103, "grad_norm": 2.067907008339979, "learning_rate": 1.9351372560919596e-05, "loss": 0.1881, "step": 8425 }, { "epoch": 0.7188193141102202, "grad_norm": 1.9142961009197246, "learning_rate": 1.9340458227182918e-05, "loss": 0.1895, "step": 8426 }, { "epoch": 0.7189046237843372, "grad_norm": 1.510765780737516, "learning_rate": 1.9329546234139356e-05, "loss": 0.2326, "step": 8427 }, { "epoch": 0.7189899334584542, "grad_norm": 1.5387224315463768, "learning_rate": 1.931863658262193e-05, "loss": 0.1623, "step": 8428 }, { "epoch": 0.7190752431325712, "grad_norm": 1.5526711121153316, "learning_rate": 1.930772927346357e-05, "loss": 0.2175, "step": 8429 }, { "epoch": 0.7191605528066882, "grad_norm": 1.5870890828965967, "learning_rate": 1.9296824307496992e-05, "loss": 0.2146, "step": 8430 }, { "epoch": 0.7192458624808054, "grad_norm": 1.5687296467102843, "learning_rate": 1.9285921685554713e-05, "loss": 0.1154, "step": 8431 }, { "epoch": 0.7193311721549224, "grad_norm": 1.8558105153995068, "learning_rate": 1.92750214084691e-05, "loss": 0.1843, "step": 8432 }, { "epoch": 0.7194164818290394, "grad_norm": 1.976137500048491, "learning_rate": 1.926412347707233e-05, "loss": 0.2379, "step": 8433 }, { "epoch": 0.7195017915031564, "grad_norm": 1.5675247270741899, "learning_rate": 1.9253227892196406e-05, "loss": 0.2305, "step": 8434 }, { "epoch": 0.7195871011772735, "grad_norm": 1.8597727117822076, "learning_rate": 1.9242334654673124e-05, "loss": 0.1512, "step": 8435 }, { "epoch": 0.7196724108513906, "grad_norm": 2.2351779162021166, "learning_rate": 1.923144376533416e-05, "loss": 0.2374, "step": 8436 }, { "epoch": 0.7197577205255076, "grad_norm": 1.7995061435324309, "learning_rate": 1.9220555225010966e-05, "loss": 0.1966, "step": 8437 }, { "epoch": 0.7198430301996246, "grad_norm": 1.9603800798245852, "learning_rate": 1.9209669034534816e-05, "loss": 0.1908, "step": 8438 }, { "epoch": 0.7199283398737417, "grad_norm": 2.001359715787019, "learning_rate": 1.9198785194736817e-05, "loss": 0.1996, "step": 8439 }, { "epoch": 0.7200136495478587, "grad_norm": 1.882583857539184, "learning_rate": 1.9187903706447892e-05, "loss": 0.214, "step": 8440 }, { "epoch": 0.7200989592219758, "grad_norm": 1.7331047561514215, "learning_rate": 1.917702457049878e-05, "loss": 0.2262, "step": 8441 }, { "epoch": 0.7201842688960928, "grad_norm": 2.438801149176999, "learning_rate": 1.9166147787720036e-05, "loss": 0.2331, "step": 8442 }, { "epoch": 0.7202695785702099, "grad_norm": 1.64352088752938, "learning_rate": 1.915527335894209e-05, "loss": 0.1968, "step": 8443 }, { "epoch": 0.7203548882443269, "grad_norm": 1.5873642690710248, "learning_rate": 1.9144401284995072e-05, "loss": 0.1822, "step": 8444 }, { "epoch": 0.7204401979184439, "grad_norm": 2.125134407729433, "learning_rate": 1.9133531566709078e-05, "loss": 0.1564, "step": 8445 }, { "epoch": 0.720525507592561, "grad_norm": 1.896279383854787, "learning_rate": 1.912266420491392e-05, "loss": 0.2006, "step": 8446 }, { "epoch": 0.7206108172666781, "grad_norm": 1.3179445419394746, "learning_rate": 1.9111799200439267e-05, "loss": 0.2056, "step": 8447 }, { "epoch": 0.7206961269407951, "grad_norm": 1.4403504016844542, "learning_rate": 1.910093655411459e-05, "loss": 0.1601, "step": 8448 }, { "epoch": 0.7207814366149121, "grad_norm": 1.8626176784129047, "learning_rate": 1.9090076266769245e-05, "loss": 0.2439, "step": 8449 }, { "epoch": 0.7208667462890291, "grad_norm": 1.2670333937931306, "learning_rate": 1.9079218339232285e-05, "loss": 0.1866, "step": 8450 }, { "epoch": 0.7209520559631463, "grad_norm": 1.630595330889205, "learning_rate": 1.906836277233271e-05, "loss": 0.1377, "step": 8451 }, { "epoch": 0.7210373656372633, "grad_norm": 1.736525911581053, "learning_rate": 1.9057509566899266e-05, "loss": 0.1324, "step": 8452 }, { "epoch": 0.7211226753113803, "grad_norm": 1.9593296830626903, "learning_rate": 1.904665872376054e-05, "loss": 0.2029, "step": 8453 }, { "epoch": 0.7212079849854973, "grad_norm": 1.6081731067493863, "learning_rate": 1.9035810243744934e-05, "loss": 0.1763, "step": 8454 }, { "epoch": 0.7212932946596144, "grad_norm": 2.6349950058544107, "learning_rate": 1.9024964127680655e-05, "loss": 0.2199, "step": 8455 }, { "epoch": 0.7213786043337315, "grad_norm": 1.491045529847921, "learning_rate": 1.90141203763958e-05, "loss": 0.1844, "step": 8456 }, { "epoch": 0.7214639140078485, "grad_norm": 1.4644066730742138, "learning_rate": 1.900327899071816e-05, "loss": 0.2029, "step": 8457 }, { "epoch": 0.7215492236819655, "grad_norm": 2.3743438818273868, "learning_rate": 1.8992439971475468e-05, "loss": 0.2262, "step": 8458 }, { "epoch": 0.7216345333560826, "grad_norm": 1.5692909675806381, "learning_rate": 1.8981603319495213e-05, "loss": 0.2282, "step": 8459 }, { "epoch": 0.7217198430301996, "grad_norm": 2.0270142039331587, "learning_rate": 1.8970769035604708e-05, "loss": 0.2754, "step": 8460 }, { "epoch": 0.7218051527043167, "grad_norm": 1.2512142006309694, "learning_rate": 1.89599371206311e-05, "loss": 0.2249, "step": 8461 }, { "epoch": 0.7218904623784337, "grad_norm": 1.416492040006528, "learning_rate": 1.8949107575401347e-05, "loss": 0.2319, "step": 8462 }, { "epoch": 0.7219757720525508, "grad_norm": 1.0246557308855242, "learning_rate": 1.893828040074223e-05, "loss": 0.1298, "step": 8463 }, { "epoch": 0.7220610817266678, "grad_norm": 1.4830677902048233, "learning_rate": 1.892745559748032e-05, "loss": 0.1618, "step": 8464 }, { "epoch": 0.7221463914007848, "grad_norm": 1.7371153618426531, "learning_rate": 1.8916633166442094e-05, "loss": 0.2054, "step": 8465 }, { "epoch": 0.7222317010749019, "grad_norm": 2.0275535401449445, "learning_rate": 1.8905813108453712e-05, "loss": 0.217, "step": 8466 }, { "epoch": 0.7223170107490189, "grad_norm": 2.3478376220106476, "learning_rate": 1.8894995424341285e-05, "loss": 0.2272, "step": 8467 }, { "epoch": 0.722402320423136, "grad_norm": 1.3857418864104016, "learning_rate": 1.8884180114930644e-05, "loss": 0.2157, "step": 8468 }, { "epoch": 0.722487630097253, "grad_norm": 1.324047156274955, "learning_rate": 1.8873367181047537e-05, "loss": 0.1888, "step": 8469 }, { "epoch": 0.72257293977137, "grad_norm": 1.599850218437822, "learning_rate": 1.8862556623517406e-05, "loss": 0.2087, "step": 8470 }, { "epoch": 0.7226582494454871, "grad_norm": 1.6589757694086757, "learning_rate": 1.8851748443165628e-05, "loss": 0.1557, "step": 8471 }, { "epoch": 0.7227435591196042, "grad_norm": 2.0806190611186457, "learning_rate": 1.8840942640817338e-05, "loss": 0.1627, "step": 8472 }, { "epoch": 0.7228288687937212, "grad_norm": 1.8470197875817096, "learning_rate": 1.8830139217297498e-05, "loss": 0.2358, "step": 8473 }, { "epoch": 0.7229141784678382, "grad_norm": 2.045271966472821, "learning_rate": 1.881933817343089e-05, "loss": 0.1621, "step": 8474 }, { "epoch": 0.7229994881419552, "grad_norm": 1.5505613848588298, "learning_rate": 1.8808539510042124e-05, "loss": 0.1588, "step": 8475 }, { "epoch": 0.7230847978160724, "grad_norm": 1.6996728750652774, "learning_rate": 1.879774322795561e-05, "loss": 0.1655, "step": 8476 }, { "epoch": 0.7231701074901894, "grad_norm": 1.6213040502710443, "learning_rate": 1.8786949327995574e-05, "loss": 0.1723, "step": 8477 }, { "epoch": 0.7232554171643064, "grad_norm": 1.8709574671671092, "learning_rate": 1.877615781098613e-05, "loss": 0.2313, "step": 8478 }, { "epoch": 0.7233407268384234, "grad_norm": 2.0868603668659524, "learning_rate": 1.8765368677751072e-05, "loss": 0.1776, "step": 8479 }, { "epoch": 0.7234260365125406, "grad_norm": 1.7037660284820713, "learning_rate": 1.8754581929114156e-05, "loss": 0.2476, "step": 8480 }, { "epoch": 0.7235113461866576, "grad_norm": 2.5547765313829656, "learning_rate": 1.8743797565898873e-05, "loss": 0.2266, "step": 8481 }, { "epoch": 0.7235966558607746, "grad_norm": 1.961481999034873, "learning_rate": 1.873301558892855e-05, "loss": 0.2393, "step": 8482 }, { "epoch": 0.7236819655348916, "grad_norm": 2.042068428940456, "learning_rate": 1.8722235999026332e-05, "loss": 0.1767, "step": 8483 }, { "epoch": 0.7237672752090087, "grad_norm": 2.1162999393634188, "learning_rate": 1.8711458797015174e-05, "loss": 0.21, "step": 8484 }, { "epoch": 0.7238525848831258, "grad_norm": 1.8659003382048671, "learning_rate": 1.8700683983717897e-05, "loss": 0.276, "step": 8485 }, { "epoch": 0.7239378945572428, "grad_norm": 2.3584876002167103, "learning_rate": 1.8689911559957048e-05, "loss": 0.1884, "step": 8486 }, { "epoch": 0.7240232042313598, "grad_norm": 2.7345332508614955, "learning_rate": 1.8679141526555078e-05, "loss": 0.2646, "step": 8487 }, { "epoch": 0.7241085139054769, "grad_norm": 1.439059034047214, "learning_rate": 1.8668373884334217e-05, "loss": 0.1579, "step": 8488 }, { "epoch": 0.7241938235795939, "grad_norm": 1.4297727132114713, "learning_rate": 1.8657608634116512e-05, "loss": 0.21, "step": 8489 }, { "epoch": 0.724279133253711, "grad_norm": 1.8241213786830912, "learning_rate": 1.864684577672382e-05, "loss": 0.1924, "step": 8490 }, { "epoch": 0.724364442927828, "grad_norm": 1.7980626700812046, "learning_rate": 1.863608531297788e-05, "loss": 0.2103, "step": 8491 }, { "epoch": 0.7244497526019451, "grad_norm": 1.6098212206838642, "learning_rate": 1.862532724370012e-05, "loss": 0.1787, "step": 8492 }, { "epoch": 0.7245350622760621, "grad_norm": 1.5295236158789351, "learning_rate": 1.8614571569711914e-05, "loss": 0.1579, "step": 8493 }, { "epoch": 0.7246203719501791, "grad_norm": 1.6515952865825274, "learning_rate": 1.860381829183439e-05, "loss": 0.1993, "step": 8494 }, { "epoch": 0.7247056816242962, "grad_norm": 1.7685370273224414, "learning_rate": 1.8593067410888503e-05, "loss": 0.1735, "step": 8495 }, { "epoch": 0.7247909912984133, "grad_norm": 1.8779478424353193, "learning_rate": 1.858231892769502e-05, "loss": 0.2168, "step": 8496 }, { "epoch": 0.7248763009725303, "grad_norm": 1.6908125683427684, "learning_rate": 1.857157284307452e-05, "loss": 0.2541, "step": 8497 }, { "epoch": 0.7249616106466473, "grad_norm": 1.7353830243437383, "learning_rate": 1.8560829157847452e-05, "loss": 0.1569, "step": 8498 }, { "epoch": 0.7250469203207643, "grad_norm": 1.6908840580315014, "learning_rate": 1.8550087872833976e-05, "loss": 0.2338, "step": 8499 }, { "epoch": 0.7251322299948815, "grad_norm": 1.2205743584428685, "learning_rate": 1.853934898885419e-05, "loss": 0.1696, "step": 8500 }, { "epoch": 0.7252175396689985, "grad_norm": 1.7890165231452364, "learning_rate": 1.852861250672792e-05, "loss": 0.191, "step": 8501 }, { "epoch": 0.7253028493431155, "grad_norm": 1.8452366153576332, "learning_rate": 1.8517878427274848e-05, "loss": 0.1911, "step": 8502 }, { "epoch": 0.7253881590172325, "grad_norm": 1.8924544129619503, "learning_rate": 1.8507146751314464e-05, "loss": 0.1814, "step": 8503 }, { "epoch": 0.7254734686913497, "grad_norm": 1.737351621151452, "learning_rate": 1.8496417479666072e-05, "loss": 0.207, "step": 8504 }, { "epoch": 0.7255587783654667, "grad_norm": 1.5273588916388998, "learning_rate": 1.848569061314879e-05, "loss": 0.1756, "step": 8505 }, { "epoch": 0.7256440880395837, "grad_norm": 1.4029448344695377, "learning_rate": 1.8474966152581556e-05, "loss": 0.1904, "step": 8506 }, { "epoch": 0.7257293977137007, "grad_norm": 1.7829656536839953, "learning_rate": 1.8464244098783163e-05, "loss": 0.2173, "step": 8507 }, { "epoch": 0.7258147073878177, "grad_norm": 1.5812404587046933, "learning_rate": 1.8453524452572114e-05, "loss": 0.2123, "step": 8508 }, { "epoch": 0.7259000170619349, "grad_norm": 1.7768117926026343, "learning_rate": 1.8442807214766855e-05, "loss": 0.2675, "step": 8509 }, { "epoch": 0.7259853267360519, "grad_norm": 2.2137521681083, "learning_rate": 1.8432092386185574e-05, "loss": 0.1647, "step": 8510 }, { "epoch": 0.7260706364101689, "grad_norm": 2.1066896653227745, "learning_rate": 1.842137996764628e-05, "loss": 0.1498, "step": 8511 }, { "epoch": 0.7261559460842859, "grad_norm": 1.6961690259593893, "learning_rate": 1.84106699599668e-05, "loss": 0.2666, "step": 8512 }, { "epoch": 0.726241255758403, "grad_norm": 1.5170952655951957, "learning_rate": 1.839996236396483e-05, "loss": 0.169, "step": 8513 }, { "epoch": 0.72632656543252, "grad_norm": 1.4924832994558643, "learning_rate": 1.8389257180457804e-05, "loss": 0.1495, "step": 8514 }, { "epoch": 0.7264118751066371, "grad_norm": 1.3971235186475217, "learning_rate": 1.8378554410263015e-05, "loss": 0.2293, "step": 8515 }, { "epoch": 0.7264971847807541, "grad_norm": 1.9863989413230554, "learning_rate": 1.8367854054197557e-05, "loss": 0.2181, "step": 8516 }, { "epoch": 0.7265824944548712, "grad_norm": 1.6128311822552397, "learning_rate": 1.8357156113078357e-05, "loss": 0.1602, "step": 8517 }, { "epoch": 0.7266678041289882, "grad_norm": 1.7387570533636802, "learning_rate": 1.834646058772213e-05, "loss": 0.1862, "step": 8518 }, { "epoch": 0.7267531138031053, "grad_norm": 1.9652166600453498, "learning_rate": 1.8335767478945413e-05, "loss": 0.2283, "step": 8519 }, { "epoch": 0.7268384234772223, "grad_norm": 1.6874756987905897, "learning_rate": 1.8325076787564627e-05, "loss": 0.2354, "step": 8520 }, { "epoch": 0.7269237331513394, "grad_norm": 1.586387157116766, "learning_rate": 1.8314388514395864e-05, "loss": 0.2282, "step": 8521 }, { "epoch": 0.7270090428254564, "grad_norm": 1.5857268696240243, "learning_rate": 1.8303702660255184e-05, "loss": 0.1948, "step": 8522 }, { "epoch": 0.7270943524995734, "grad_norm": 2.560554137754288, "learning_rate": 1.8293019225958376e-05, "loss": 0.1725, "step": 8523 }, { "epoch": 0.7271796621736905, "grad_norm": 1.758535482287807, "learning_rate": 1.828233821232105e-05, "loss": 0.1902, "step": 8524 }, { "epoch": 0.7272649718478076, "grad_norm": 1.7696541244590265, "learning_rate": 1.827165962015866e-05, "loss": 0.1721, "step": 8525 }, { "epoch": 0.7273502815219246, "grad_norm": 1.8224462356276907, "learning_rate": 1.8260983450286452e-05, "loss": 0.1747, "step": 8526 }, { "epoch": 0.7274355911960416, "grad_norm": 1.6387021376824167, "learning_rate": 1.8250309703519496e-05, "loss": 0.174, "step": 8527 }, { "epoch": 0.7275209008701586, "grad_norm": 1.9366472274912099, "learning_rate": 1.8239638380672657e-05, "loss": 0.1654, "step": 8528 }, { "epoch": 0.7276062105442758, "grad_norm": 1.8619638580190905, "learning_rate": 1.8228969482560677e-05, "loss": 0.2846, "step": 8529 }, { "epoch": 0.7276915202183928, "grad_norm": 2.0084939831735458, "learning_rate": 1.8218303009998038e-05, "loss": 0.2234, "step": 8530 }, { "epoch": 0.7277768298925098, "grad_norm": 1.834792149177223, "learning_rate": 1.8207638963799084e-05, "loss": 0.1457, "step": 8531 }, { "epoch": 0.7278621395666268, "grad_norm": 1.580134707877098, "learning_rate": 1.8196977344777933e-05, "loss": 0.2189, "step": 8532 }, { "epoch": 0.727947449240744, "grad_norm": 2.0820811768462675, "learning_rate": 1.8186318153748587e-05, "loss": 0.145, "step": 8533 }, { "epoch": 0.728032758914861, "grad_norm": 2.071722853009397, "learning_rate": 1.8175661391524767e-05, "loss": 0.1432, "step": 8534 }, { "epoch": 0.728118068588978, "grad_norm": 1.8569969549611176, "learning_rate": 1.81650070589201e-05, "loss": 0.2213, "step": 8535 }, { "epoch": 0.728203378263095, "grad_norm": 1.6347597912829006, "learning_rate": 1.815435515674797e-05, "loss": 0.1604, "step": 8536 }, { "epoch": 0.7282886879372121, "grad_norm": 1.4695989711799404, "learning_rate": 1.81437056858216e-05, "loss": 0.1698, "step": 8537 }, { "epoch": 0.7283739976113291, "grad_norm": 1.6449380765582209, "learning_rate": 1.813305864695401e-05, "loss": 0.1645, "step": 8538 }, { "epoch": 0.7284593072854462, "grad_norm": 1.3738350701827664, "learning_rate": 1.8122414040958057e-05, "loss": 0.212, "step": 8539 }, { "epoch": 0.7285446169595632, "grad_norm": 1.6371581732709461, "learning_rate": 1.8111771868646395e-05, "loss": 0.2252, "step": 8540 }, { "epoch": 0.7286299266336803, "grad_norm": 1.6557370237164242, "learning_rate": 1.810113213083148e-05, "loss": 0.1853, "step": 8541 }, { "epoch": 0.7287152363077973, "grad_norm": 1.1223065664575653, "learning_rate": 1.809049482832563e-05, "loss": 0.1663, "step": 8542 }, { "epoch": 0.7288005459819143, "grad_norm": 1.9167867153000313, "learning_rate": 1.8079859961940936e-05, "loss": 0.1796, "step": 8543 }, { "epoch": 0.7288858556560314, "grad_norm": 1.5310120689668862, "learning_rate": 1.8069227532489312e-05, "loss": 0.1575, "step": 8544 }, { "epoch": 0.7289711653301484, "grad_norm": 1.7441066554948919, "learning_rate": 1.8058597540782485e-05, "loss": 0.2149, "step": 8545 }, { "epoch": 0.7290564750042655, "grad_norm": 1.8772538943461778, "learning_rate": 1.8047969987631996e-05, "loss": 0.1977, "step": 8546 }, { "epoch": 0.7291417846783825, "grad_norm": 1.7037621102633844, "learning_rate": 1.803734487384921e-05, "loss": 0.2085, "step": 8547 }, { "epoch": 0.7292270943524995, "grad_norm": 1.490747206055017, "learning_rate": 1.8026722200245272e-05, "loss": 0.1994, "step": 8548 }, { "epoch": 0.7293124040266166, "grad_norm": 1.7751261867747286, "learning_rate": 1.8016101967631223e-05, "loss": 0.2339, "step": 8549 }, { "epoch": 0.7293977137007337, "grad_norm": 1.9527475221164246, "learning_rate": 1.8005484176817794e-05, "loss": 0.1844, "step": 8550 }, { "epoch": 0.7294830233748507, "grad_norm": 1.5656260690285693, "learning_rate": 1.7994868828615648e-05, "loss": 0.1722, "step": 8551 }, { "epoch": 0.7295683330489677, "grad_norm": 1.875937418253762, "learning_rate": 1.798425592383519e-05, "loss": 0.1933, "step": 8552 }, { "epoch": 0.7296536427230847, "grad_norm": 2.051663454220093, "learning_rate": 1.797364546328666e-05, "loss": 0.1572, "step": 8553 }, { "epoch": 0.7297389523972019, "grad_norm": 1.566832524847244, "learning_rate": 1.7963037447780097e-05, "loss": 0.2111, "step": 8554 }, { "epoch": 0.7298242620713189, "grad_norm": 2.147326150769445, "learning_rate": 1.795243187812541e-05, "loss": 0.1674, "step": 8555 }, { "epoch": 0.7299095717454359, "grad_norm": 2.359186025971492, "learning_rate": 1.794182875513222e-05, "loss": 0.2519, "step": 8556 }, { "epoch": 0.7299948814195529, "grad_norm": 1.5386506899682417, "learning_rate": 1.7931228079610057e-05, "loss": 0.1843, "step": 8557 }, { "epoch": 0.7300801910936701, "grad_norm": 1.6184697619263004, "learning_rate": 1.7920629852368227e-05, "loss": 0.1471, "step": 8558 }, { "epoch": 0.7301655007677871, "grad_norm": 1.8300382595544569, "learning_rate": 1.791003407421583e-05, "loss": 0.216, "step": 8559 }, { "epoch": 0.7302508104419041, "grad_norm": 1.9472225876990878, "learning_rate": 1.789944074596181e-05, "loss": 0.214, "step": 8560 }, { "epoch": 0.7303361201160211, "grad_norm": 1.6393754833015663, "learning_rate": 1.7888849868414886e-05, "loss": 0.2655, "step": 8561 }, { "epoch": 0.7304214297901382, "grad_norm": 1.7148593215409191, "learning_rate": 1.787826144238367e-05, "loss": 0.2203, "step": 8562 }, { "epoch": 0.7305067394642553, "grad_norm": 1.3320899570090803, "learning_rate": 1.786767546867647e-05, "loss": 0.1368, "step": 8563 }, { "epoch": 0.7305920491383723, "grad_norm": 2.0462784225363473, "learning_rate": 1.7857091948101506e-05, "loss": 0.1746, "step": 8564 }, { "epoch": 0.7306773588124893, "grad_norm": 1.8861974149923901, "learning_rate": 1.784651088146677e-05, "loss": 0.1922, "step": 8565 }, { "epoch": 0.7307626684866064, "grad_norm": 1.9143467283387483, "learning_rate": 1.7835932269580064e-05, "loss": 0.1497, "step": 8566 }, { "epoch": 0.7308479781607234, "grad_norm": 1.3928482628284247, "learning_rate": 1.782535611324901e-05, "loss": 0.1574, "step": 8567 }, { "epoch": 0.7309332878348405, "grad_norm": 1.6794426318151954, "learning_rate": 1.7814782413281038e-05, "loss": 0.1948, "step": 8568 }, { "epoch": 0.7310185975089575, "grad_norm": 1.9343961649832246, "learning_rate": 1.7804211170483397e-05, "loss": 0.2058, "step": 8569 }, { "epoch": 0.7311039071830746, "grad_norm": 2.0548636357341095, "learning_rate": 1.7793642385663134e-05, "loss": 0.1816, "step": 8570 }, { "epoch": 0.7311892168571916, "grad_norm": 2.079111653774242, "learning_rate": 1.7783076059627156e-05, "loss": 0.2369, "step": 8571 }, { "epoch": 0.7312745265313086, "grad_norm": 1.9633997318630962, "learning_rate": 1.7772512193182095e-05, "loss": 0.2072, "step": 8572 }, { "epoch": 0.7313598362054257, "grad_norm": 2.0738008872072298, "learning_rate": 1.7761950787134484e-05, "loss": 0.1929, "step": 8573 }, { "epoch": 0.7314451458795428, "grad_norm": 1.6867637440986696, "learning_rate": 1.77513918422906e-05, "loss": 0.1755, "step": 8574 }, { "epoch": 0.7315304555536598, "grad_norm": 1.4755616201824457, "learning_rate": 1.7740835359456616e-05, "loss": 0.2033, "step": 8575 }, { "epoch": 0.7316157652277768, "grad_norm": 1.641014270922659, "learning_rate": 1.7730281339438387e-05, "loss": 0.1472, "step": 8576 }, { "epoch": 0.7317010749018938, "grad_norm": 1.70616305901778, "learning_rate": 1.7719729783041717e-05, "loss": 0.1904, "step": 8577 }, { "epoch": 0.731786384576011, "grad_norm": 1.4353120157584436, "learning_rate": 1.770918069107214e-05, "loss": 0.2281, "step": 8578 }, { "epoch": 0.731871694250128, "grad_norm": 1.6471573618355497, "learning_rate": 1.769863406433503e-05, "loss": 0.2008, "step": 8579 }, { "epoch": 0.731957003924245, "grad_norm": 1.7671146016891293, "learning_rate": 1.768808990363556e-05, "loss": 0.1586, "step": 8580 }, { "epoch": 0.732042313598362, "grad_norm": 1.874650350394313, "learning_rate": 1.767754820977871e-05, "loss": 0.2521, "step": 8581 }, { "epoch": 0.732127623272479, "grad_norm": 2.126852013853001, "learning_rate": 1.76670089835693e-05, "loss": 0.2013, "step": 8582 }, { "epoch": 0.7322129329465962, "grad_norm": 1.665625254790267, "learning_rate": 1.7656472225811922e-05, "loss": 0.2077, "step": 8583 }, { "epoch": 0.7322982426207132, "grad_norm": 1.5023983219925485, "learning_rate": 1.7645937937311048e-05, "loss": 0.2117, "step": 8584 }, { "epoch": 0.7323835522948302, "grad_norm": 2.0615479555031833, "learning_rate": 1.7635406118870846e-05, "loss": 0.1999, "step": 8585 }, { "epoch": 0.7324688619689472, "grad_norm": 1.3612159015038345, "learning_rate": 1.7624876771295424e-05, "loss": 0.1576, "step": 8586 }, { "epoch": 0.7325541716430644, "grad_norm": 1.3008100260166358, "learning_rate": 1.7614349895388614e-05, "loss": 0.163, "step": 8587 }, { "epoch": 0.7326394813171814, "grad_norm": 1.4231194719786031, "learning_rate": 1.7603825491954097e-05, "loss": 0.2005, "step": 8588 }, { "epoch": 0.7327247909912984, "grad_norm": 1.4645643450978885, "learning_rate": 1.759330356179535e-05, "loss": 0.212, "step": 8589 }, { "epoch": 0.7328101006654154, "grad_norm": 1.8626300945546395, "learning_rate": 1.7582784105715644e-05, "loss": 0.2171, "step": 8590 }, { "epoch": 0.7328954103395325, "grad_norm": 1.8326383776284776, "learning_rate": 1.7572267124518144e-05, "loss": 0.1385, "step": 8591 }, { "epoch": 0.7329807200136496, "grad_norm": 1.7202245455924965, "learning_rate": 1.7561752619005695e-05, "loss": 0.2184, "step": 8592 }, { "epoch": 0.7330660296877666, "grad_norm": 2.014459080564325, "learning_rate": 1.755124058998108e-05, "loss": 0.25, "step": 8593 }, { "epoch": 0.7331513393618836, "grad_norm": 1.9845764778761505, "learning_rate": 1.7540731038246805e-05, "loss": 0.2061, "step": 8594 }, { "epoch": 0.7332366490360007, "grad_norm": 1.5067865706025338, "learning_rate": 1.753022396460523e-05, "loss": 0.2401, "step": 8595 }, { "epoch": 0.7333219587101177, "grad_norm": 1.6850425345828133, "learning_rate": 1.7519719369858488e-05, "loss": 0.1751, "step": 8596 }, { "epoch": 0.7334072683842348, "grad_norm": 1.4422103995740025, "learning_rate": 1.7509217254808613e-05, "loss": 0.1915, "step": 8597 }, { "epoch": 0.7334925780583518, "grad_norm": 1.5045232126582055, "learning_rate": 1.749871762025731e-05, "loss": 0.2117, "step": 8598 }, { "epoch": 0.7335778877324689, "grad_norm": 1.6682946201727276, "learning_rate": 1.7488220467006223e-05, "loss": 0.2041, "step": 8599 }, { "epoch": 0.7336631974065859, "grad_norm": 2.08601544981051, "learning_rate": 1.7477725795856737e-05, "loss": 0.2099, "step": 8600 }, { "epoch": 0.7337485070807029, "grad_norm": 1.5458103716328233, "learning_rate": 1.7467233607610057e-05, "loss": 0.1541, "step": 8601 }, { "epoch": 0.73383381675482, "grad_norm": 1.564633019780525, "learning_rate": 1.745674390306722e-05, "loss": 0.194, "step": 8602 }, { "epoch": 0.7339191264289371, "grad_norm": 1.8709957598868054, "learning_rate": 1.7446256683029028e-05, "loss": 0.1656, "step": 8603 }, { "epoch": 0.7340044361030541, "grad_norm": 2.106948247794491, "learning_rate": 1.743577194829618e-05, "loss": 0.2613, "step": 8604 }, { "epoch": 0.7340897457771711, "grad_norm": 1.8654615014284717, "learning_rate": 1.7425289699669073e-05, "loss": 0.1364, "step": 8605 }, { "epoch": 0.7341750554512881, "grad_norm": 1.6254121551151177, "learning_rate": 1.7414809937948008e-05, "loss": 0.2015, "step": 8606 }, { "epoch": 0.7342603651254053, "grad_norm": 1.5201742581816196, "learning_rate": 1.7404332663933043e-05, "loss": 0.2978, "step": 8607 }, { "epoch": 0.7343456747995223, "grad_norm": 1.5821643161148768, "learning_rate": 1.7393857878424068e-05, "loss": 0.2047, "step": 8608 }, { "epoch": 0.7344309844736393, "grad_norm": 1.8541441355275698, "learning_rate": 1.738338558222078e-05, "loss": 0.1831, "step": 8609 }, { "epoch": 0.7345162941477563, "grad_norm": 1.3061915881954806, "learning_rate": 1.737291577612267e-05, "loss": 0.1873, "step": 8610 }, { "epoch": 0.7346016038218735, "grad_norm": 1.7891375263590885, "learning_rate": 1.7362448460929065e-05, "loss": 0.165, "step": 8611 }, { "epoch": 0.7346869134959905, "grad_norm": 1.7379641811643807, "learning_rate": 1.735198363743907e-05, "loss": 0.1784, "step": 8612 }, { "epoch": 0.7347722231701075, "grad_norm": 1.7725503862157441, "learning_rate": 1.7341521306451662e-05, "loss": 0.2647, "step": 8613 }, { "epoch": 0.7348575328442245, "grad_norm": 2.3054852672826756, "learning_rate": 1.7331061468765523e-05, "loss": 0.2305, "step": 8614 }, { "epoch": 0.7349428425183416, "grad_norm": 1.6313944960865756, "learning_rate": 1.7320604125179258e-05, "loss": 0.1519, "step": 8615 }, { "epoch": 0.7350281521924587, "grad_norm": 1.4604122421190369, "learning_rate": 1.7310149276491205e-05, "loss": 0.2106, "step": 8616 }, { "epoch": 0.7351134618665757, "grad_norm": 1.7167341027702159, "learning_rate": 1.7299696923499543e-05, "loss": 0.2489, "step": 8617 }, { "epoch": 0.7351987715406927, "grad_norm": 2.6729864296217247, "learning_rate": 1.7289247067002233e-05, "loss": 0.277, "step": 8618 }, { "epoch": 0.7352840812148098, "grad_norm": 1.6280480189072053, "learning_rate": 1.7278799707797104e-05, "loss": 0.2217, "step": 8619 }, { "epoch": 0.7353693908889268, "grad_norm": 1.408284601613662, "learning_rate": 1.726835484668174e-05, "loss": 0.1168, "step": 8620 }, { "epoch": 0.7354547005630439, "grad_norm": 2.0492908693421708, "learning_rate": 1.725791248445354e-05, "loss": 0.1629, "step": 8621 }, { "epoch": 0.7355400102371609, "grad_norm": 1.8031569426829377, "learning_rate": 1.7247472621909737e-05, "loss": 0.1503, "step": 8622 }, { "epoch": 0.7356253199112779, "grad_norm": 1.9735493366268657, "learning_rate": 1.723703525984735e-05, "loss": 0.3072, "step": 8623 }, { "epoch": 0.735710629585395, "grad_norm": 1.6968975480128872, "learning_rate": 1.722660039906322e-05, "loss": 0.1884, "step": 8624 }, { "epoch": 0.735795939259512, "grad_norm": 1.474063433706907, "learning_rate": 1.7216168040353976e-05, "loss": 0.16, "step": 8625 }, { "epoch": 0.735881248933629, "grad_norm": 2.1379941937873586, "learning_rate": 1.7205738184516123e-05, "loss": 0.2131, "step": 8626 }, { "epoch": 0.7359665586077461, "grad_norm": 1.8406924742165518, "learning_rate": 1.7195310832345852e-05, "loss": 0.2077, "step": 8627 }, { "epoch": 0.7360518682818632, "grad_norm": 2.4214388546924246, "learning_rate": 1.71848859846393e-05, "loss": 0.2138, "step": 8628 }, { "epoch": 0.7361371779559802, "grad_norm": 1.452257779487187, "learning_rate": 1.717446364219232e-05, "loss": 0.1324, "step": 8629 }, { "epoch": 0.7362224876300972, "grad_norm": 1.3067650614560695, "learning_rate": 1.71640438058006e-05, "loss": 0.1892, "step": 8630 }, { "epoch": 0.7363077973042143, "grad_norm": 1.5575068993821262, "learning_rate": 1.7153626476259656e-05, "loss": 0.2478, "step": 8631 }, { "epoch": 0.7363931069783314, "grad_norm": 1.9540663624976506, "learning_rate": 1.7143211654364762e-05, "loss": 0.2073, "step": 8632 }, { "epoch": 0.7364784166524484, "grad_norm": 1.8382543351926284, "learning_rate": 1.7132799340911087e-05, "loss": 0.2238, "step": 8633 }, { "epoch": 0.7365637263265654, "grad_norm": 1.2376274043303432, "learning_rate": 1.71223895366935e-05, "loss": 0.1815, "step": 8634 }, { "epoch": 0.7366490360006824, "grad_norm": 2.093372026174976, "learning_rate": 1.7111982242506775e-05, "loss": 0.209, "step": 8635 }, { "epoch": 0.7367343456747996, "grad_norm": 1.7815888735015635, "learning_rate": 1.710157745914544e-05, "loss": 0.2006, "step": 8636 }, { "epoch": 0.7368196553489166, "grad_norm": 1.8697302833901854, "learning_rate": 1.7091175187403842e-05, "loss": 0.2602, "step": 8637 }, { "epoch": 0.7369049650230336, "grad_norm": 1.6502030305504725, "learning_rate": 1.7080775428076122e-05, "loss": 0.1842, "step": 8638 }, { "epoch": 0.7369902746971506, "grad_norm": 1.3034994232387167, "learning_rate": 1.7070378181956302e-05, "loss": 0.1455, "step": 8639 }, { "epoch": 0.7370755843712677, "grad_norm": 1.223497874985733, "learning_rate": 1.705998344983809e-05, "loss": 0.1429, "step": 8640 }, { "epoch": 0.7371608940453848, "grad_norm": 1.6785610946522693, "learning_rate": 1.704959123251511e-05, "loss": 0.242, "step": 8641 }, { "epoch": 0.7372462037195018, "grad_norm": 1.9248907727055382, "learning_rate": 1.7039201530780742e-05, "loss": 0.214, "step": 8642 }, { "epoch": 0.7373315133936188, "grad_norm": 1.921545295036543, "learning_rate": 1.7028814345428185e-05, "loss": 0.184, "step": 8643 }, { "epoch": 0.7374168230677359, "grad_norm": 1.5225196616621097, "learning_rate": 1.7018429677250447e-05, "loss": 0.2175, "step": 8644 }, { "epoch": 0.737502132741853, "grad_norm": 2.2092877490824616, "learning_rate": 1.700804752704033e-05, "loss": 0.2069, "step": 8645 }, { "epoch": 0.73758744241597, "grad_norm": 1.6708359386141562, "learning_rate": 1.6997667895590474e-05, "loss": 0.1857, "step": 8646 }, { "epoch": 0.737672752090087, "grad_norm": 2.0219155261829145, "learning_rate": 1.6987290783693282e-05, "loss": 0.2012, "step": 8647 }, { "epoch": 0.7377580617642041, "grad_norm": 1.9458190993687625, "learning_rate": 1.6976916192141022e-05, "loss": 0.1958, "step": 8648 }, { "epoch": 0.7378433714383211, "grad_norm": 1.7789161517672245, "learning_rate": 1.696654412172573e-05, "loss": 0.1519, "step": 8649 }, { "epoch": 0.7379286811124381, "grad_norm": 1.4207798020168685, "learning_rate": 1.695617457323925e-05, "loss": 0.1598, "step": 8650 }, { "epoch": 0.7380139907865552, "grad_norm": 2.1916993297081744, "learning_rate": 1.6945807547473253e-05, "loss": 0.2366, "step": 8651 }, { "epoch": 0.7380993004606723, "grad_norm": 2.032759120513006, "learning_rate": 1.6935443045219198e-05, "loss": 0.2398, "step": 8652 }, { "epoch": 0.7381846101347893, "grad_norm": 1.9857182199354884, "learning_rate": 1.692508106726836e-05, "loss": 0.1612, "step": 8653 }, { "epoch": 0.7382699198089063, "grad_norm": 1.860230962093235, "learning_rate": 1.691472161441181e-05, "loss": 0.1906, "step": 8654 }, { "epoch": 0.7383552294830233, "grad_norm": 1.8825145818137976, "learning_rate": 1.6904364687440476e-05, "loss": 0.186, "step": 8655 }, { "epoch": 0.7384405391571405, "grad_norm": 2.261732137637137, "learning_rate": 1.6894010287145e-05, "loss": 0.1602, "step": 8656 }, { "epoch": 0.7385258488312575, "grad_norm": 1.89091792083726, "learning_rate": 1.6883658414315928e-05, "loss": 0.1964, "step": 8657 }, { "epoch": 0.7386111585053745, "grad_norm": 2.069388244457484, "learning_rate": 1.687330906974356e-05, "loss": 0.2175, "step": 8658 }, { "epoch": 0.7386964681794915, "grad_norm": 2.3596205425702315, "learning_rate": 1.6862962254218e-05, "loss": 0.1973, "step": 8659 }, { "epoch": 0.7387817778536085, "grad_norm": 1.4629485624322653, "learning_rate": 1.6852617968529176e-05, "loss": 0.2055, "step": 8660 }, { "epoch": 0.7388670875277257, "grad_norm": 1.2587352708638537, "learning_rate": 1.6842276213466852e-05, "loss": 0.1611, "step": 8661 }, { "epoch": 0.7389523972018427, "grad_norm": 1.8097992868996011, "learning_rate": 1.6831936989820506e-05, "loss": 0.2008, "step": 8662 }, { "epoch": 0.7390377068759597, "grad_norm": 1.4240099629900151, "learning_rate": 1.6821600298379538e-05, "loss": 0.2027, "step": 8663 }, { "epoch": 0.7391230165500767, "grad_norm": 2.0317361103276372, "learning_rate": 1.6811266139933075e-05, "loss": 0.2331, "step": 8664 }, { "epoch": 0.7392083262241939, "grad_norm": 1.8178771923371715, "learning_rate": 1.6800934515270074e-05, "loss": 0.2211, "step": 8665 }, { "epoch": 0.7392936358983109, "grad_norm": 1.8886013708199936, "learning_rate": 1.6790605425179306e-05, "loss": 0.2188, "step": 8666 }, { "epoch": 0.7393789455724279, "grad_norm": 1.6877783086654659, "learning_rate": 1.6780278870449325e-05, "loss": 0.2148, "step": 8667 }, { "epoch": 0.7394642552465449, "grad_norm": 1.8316580386156731, "learning_rate": 1.6769954851868548e-05, "loss": 0.2458, "step": 8668 }, { "epoch": 0.739549564920662, "grad_norm": 1.6704977191542965, "learning_rate": 1.675963337022511e-05, "loss": 0.2193, "step": 8669 }, { "epoch": 0.7396348745947791, "grad_norm": 1.4675578697011, "learning_rate": 1.6749314426307035e-05, "loss": 0.1589, "step": 8670 }, { "epoch": 0.7397201842688961, "grad_norm": 1.951971583253356, "learning_rate": 1.6738998020902108e-05, "loss": 0.2206, "step": 8671 }, { "epoch": 0.7398054939430131, "grad_norm": 1.272356007041577, "learning_rate": 1.6728684154797942e-05, "loss": 0.2137, "step": 8672 }, { "epoch": 0.7398908036171302, "grad_norm": 1.708056598051665, "learning_rate": 1.671837282878193e-05, "loss": 0.1612, "step": 8673 }, { "epoch": 0.7399761132912472, "grad_norm": 1.6076340378902547, "learning_rate": 1.6708064043641297e-05, "loss": 0.1504, "step": 8674 }, { "epoch": 0.7400614229653643, "grad_norm": 2.168184726713864, "learning_rate": 1.669775780016306e-05, "loss": 0.2085, "step": 8675 }, { "epoch": 0.7401467326394813, "grad_norm": 1.3590154501181997, "learning_rate": 1.6687454099134032e-05, "loss": 0.0972, "step": 8676 }, { "epoch": 0.7402320423135984, "grad_norm": 1.205408282935934, "learning_rate": 1.6677152941340873e-05, "loss": 0.19, "step": 8677 }, { "epoch": 0.7403173519877154, "grad_norm": 1.7323348271843848, "learning_rate": 1.6666854327570015e-05, "loss": 0.1256, "step": 8678 }, { "epoch": 0.7404026616618324, "grad_norm": 1.7202502552104906, "learning_rate": 1.6656558258607698e-05, "loss": 0.2294, "step": 8679 }, { "epoch": 0.7404879713359495, "grad_norm": 1.736614396790741, "learning_rate": 1.6646264735239948e-05, "loss": 0.1779, "step": 8680 }, { "epoch": 0.7405732810100666, "grad_norm": 1.292677693870993, "learning_rate": 1.663597375825268e-05, "loss": 0.2262, "step": 8681 }, { "epoch": 0.7406585906841836, "grad_norm": 1.5625233457728576, "learning_rate": 1.662568532843149e-05, "loss": 0.1776, "step": 8682 }, { "epoch": 0.7407439003583006, "grad_norm": 1.8102132907617647, "learning_rate": 1.6615399446561886e-05, "loss": 0.2052, "step": 8683 }, { "epoch": 0.7408292100324176, "grad_norm": 1.4339686431063376, "learning_rate": 1.660511611342913e-05, "loss": 0.1402, "step": 8684 }, { "epoch": 0.7409145197065348, "grad_norm": 1.7295970228010769, "learning_rate": 1.6594835329818297e-05, "loss": 0.1824, "step": 8685 }, { "epoch": 0.7409998293806518, "grad_norm": 1.7475482932909274, "learning_rate": 1.6584557096514274e-05, "loss": 0.2133, "step": 8686 }, { "epoch": 0.7410851390547688, "grad_norm": 1.8660617131927464, "learning_rate": 1.6574281414301744e-05, "loss": 0.2131, "step": 8687 }, { "epoch": 0.7411704487288858, "grad_norm": 1.586338612611732, "learning_rate": 1.656400828396521e-05, "loss": 0.1615, "step": 8688 }, { "epoch": 0.741255758403003, "grad_norm": 1.8528390982281029, "learning_rate": 1.655373770628894e-05, "loss": 0.1587, "step": 8689 }, { "epoch": 0.74134106807712, "grad_norm": 2.314350315820485, "learning_rate": 1.6543469682057106e-05, "loss": 0.2387, "step": 8690 }, { "epoch": 0.741426377751237, "grad_norm": 1.9184850553318726, "learning_rate": 1.6533204212053533e-05, "loss": 0.2345, "step": 8691 }, { "epoch": 0.741511687425354, "grad_norm": 1.4393727915486443, "learning_rate": 1.6522941297061996e-05, "loss": 0.2013, "step": 8692 }, { "epoch": 0.7415969970994711, "grad_norm": 1.6141572287458705, "learning_rate": 1.6512680937865993e-05, "loss": 0.1695, "step": 8693 }, { "epoch": 0.7416823067735882, "grad_norm": 1.7715195018309653, "learning_rate": 1.650242313524885e-05, "loss": 0.1909, "step": 8694 }, { "epoch": 0.7417676164477052, "grad_norm": 1.840352565117436, "learning_rate": 1.6492167889993693e-05, "loss": 0.1867, "step": 8695 }, { "epoch": 0.7418529261218222, "grad_norm": 2.224226519266594, "learning_rate": 1.6481915202883442e-05, "loss": 0.1796, "step": 8696 }, { "epoch": 0.7419382357959392, "grad_norm": 2.081075309318286, "learning_rate": 1.647166507470088e-05, "loss": 0.1997, "step": 8697 }, { "epoch": 0.7420235454700563, "grad_norm": 1.6480094751715255, "learning_rate": 1.6461417506228493e-05, "loss": 0.2063, "step": 8698 }, { "epoch": 0.7421088551441734, "grad_norm": 1.4046890920729733, "learning_rate": 1.645117249824867e-05, "loss": 0.1813, "step": 8699 }, { "epoch": 0.7421941648182904, "grad_norm": 1.765015893707902, "learning_rate": 1.6440930051543546e-05, "loss": 0.1762, "step": 8700 }, { "epoch": 0.7422794744924074, "grad_norm": 1.443806538981669, "learning_rate": 1.6430690166895084e-05, "loss": 0.1916, "step": 8701 }, { "epoch": 0.7423647841665245, "grad_norm": 1.8598335405947795, "learning_rate": 1.642045284508502e-05, "loss": 0.1694, "step": 8702 }, { "epoch": 0.7424500938406415, "grad_norm": 1.855444207280118, "learning_rate": 1.6410218086894976e-05, "loss": 0.2305, "step": 8703 }, { "epoch": 0.7425354035147586, "grad_norm": 2.0211204901578936, "learning_rate": 1.6399985893106252e-05, "loss": 0.2629, "step": 8704 }, { "epoch": 0.7426207131888756, "grad_norm": 1.495475860569233, "learning_rate": 1.6389756264500068e-05, "loss": 0.1749, "step": 8705 }, { "epoch": 0.7427060228629927, "grad_norm": 1.7411259038318811, "learning_rate": 1.637952920185739e-05, "loss": 0.2478, "step": 8706 }, { "epoch": 0.7427913325371097, "grad_norm": 1.4568928097978666, "learning_rate": 1.6369304705959e-05, "loss": 0.2102, "step": 8707 }, { "epoch": 0.7428766422112267, "grad_norm": 1.8760964049028777, "learning_rate": 1.6359082777585483e-05, "loss": 0.1685, "step": 8708 }, { "epoch": 0.7429619518853438, "grad_norm": 2.3053161798121766, "learning_rate": 1.6348863417517208e-05, "loss": 0.2687, "step": 8709 }, { "epoch": 0.7430472615594609, "grad_norm": 2.0592135052028397, "learning_rate": 1.6338646626534427e-05, "loss": 0.1558, "step": 8710 }, { "epoch": 0.7431325712335779, "grad_norm": 1.7559016034879609, "learning_rate": 1.632843240541706e-05, "loss": 0.1868, "step": 8711 }, { "epoch": 0.7432178809076949, "grad_norm": 1.616583669963732, "learning_rate": 1.631822075494497e-05, "loss": 0.2156, "step": 8712 }, { "epoch": 0.7433031905818119, "grad_norm": 2.012324506796629, "learning_rate": 1.630801167589774e-05, "loss": 0.1878, "step": 8713 }, { "epoch": 0.7433885002559291, "grad_norm": 1.3864324569675777, "learning_rate": 1.629780516905478e-05, "loss": 0.1592, "step": 8714 }, { "epoch": 0.7434738099300461, "grad_norm": 1.80499819768673, "learning_rate": 1.62876012351953e-05, "loss": 0.1981, "step": 8715 }, { "epoch": 0.7435591196041631, "grad_norm": 1.8808934098726509, "learning_rate": 1.6277399875098322e-05, "loss": 0.1368, "step": 8716 }, { "epoch": 0.7436444292782801, "grad_norm": 2.267940942355306, "learning_rate": 1.6267201089542657e-05, "loss": 0.1876, "step": 8717 }, { "epoch": 0.7437297389523972, "grad_norm": 1.646732551450994, "learning_rate": 1.625700487930692e-05, "loss": 0.2068, "step": 8718 }, { "epoch": 0.7438150486265143, "grad_norm": 2.1507333037231504, "learning_rate": 1.624681124516958e-05, "loss": 0.299, "step": 8719 }, { "epoch": 0.7439003583006313, "grad_norm": 1.4431097617486848, "learning_rate": 1.623662018790881e-05, "loss": 0.202, "step": 8720 }, { "epoch": 0.7439856679747483, "grad_norm": 2.711518063341033, "learning_rate": 1.6226431708302682e-05, "loss": 0.2196, "step": 8721 }, { "epoch": 0.7440709776488654, "grad_norm": 1.4222484559298694, "learning_rate": 1.6216245807129004e-05, "loss": 0.1728, "step": 8722 }, { "epoch": 0.7441562873229824, "grad_norm": 1.470572051632411, "learning_rate": 1.6206062485165463e-05, "loss": 0.153, "step": 8723 }, { "epoch": 0.7442415969970995, "grad_norm": 2.036030352825255, "learning_rate": 1.619588174318944e-05, "loss": 0.2455, "step": 8724 }, { "epoch": 0.7443269066712165, "grad_norm": 1.9814232326309522, "learning_rate": 1.618570358197823e-05, "loss": 0.1249, "step": 8725 }, { "epoch": 0.7444122163453336, "grad_norm": 2.138926925431064, "learning_rate": 1.617552800230886e-05, "loss": 0.1598, "step": 8726 }, { "epoch": 0.7444975260194506, "grad_norm": 1.9025946418390822, "learning_rate": 1.616535500495818e-05, "loss": 0.1737, "step": 8727 }, { "epoch": 0.7445828356935676, "grad_norm": 2.057145537647136, "learning_rate": 1.6155184590702855e-05, "loss": 0.1759, "step": 8728 }, { "epoch": 0.7446681453676847, "grad_norm": 1.6802744704787305, "learning_rate": 1.6145016760319338e-05, "loss": 0.2468, "step": 8729 }, { "epoch": 0.7447534550418018, "grad_norm": 1.4860978418076702, "learning_rate": 1.6134851514583875e-05, "loss": 0.134, "step": 8730 }, { "epoch": 0.7448387647159188, "grad_norm": 1.851199086253502, "learning_rate": 1.612468885427253e-05, "loss": 0.1379, "step": 8731 }, { "epoch": 0.7449240743900358, "grad_norm": 1.9999852179934687, "learning_rate": 1.6114528780161213e-05, "loss": 0.2159, "step": 8732 }, { "epoch": 0.7450093840641528, "grad_norm": 1.6779286246595546, "learning_rate": 1.610437129302552e-05, "loss": 0.1844, "step": 8733 }, { "epoch": 0.74509469373827, "grad_norm": 3.0812123882481943, "learning_rate": 1.6094216393640977e-05, "loss": 0.2646, "step": 8734 }, { "epoch": 0.745180003412387, "grad_norm": 1.5062200013018607, "learning_rate": 1.608406408278283e-05, "loss": 0.2334, "step": 8735 }, { "epoch": 0.745265313086504, "grad_norm": 1.2376448382764258, "learning_rate": 1.6073914361226166e-05, "loss": 0.1831, "step": 8736 }, { "epoch": 0.745350622760621, "grad_norm": 2.2775908085864374, "learning_rate": 1.606376722974586e-05, "loss": 0.2066, "step": 8737 }, { "epoch": 0.745435932434738, "grad_norm": 1.7681817641573596, "learning_rate": 1.605362268911657e-05, "loss": 0.1447, "step": 8738 }, { "epoch": 0.7455212421088552, "grad_norm": 1.4711043784415494, "learning_rate": 1.6043480740112827e-05, "loss": 0.1733, "step": 8739 }, { "epoch": 0.7456065517829722, "grad_norm": 1.729468614190348, "learning_rate": 1.6033341383508854e-05, "loss": 0.2542, "step": 8740 }, { "epoch": 0.7456918614570892, "grad_norm": 1.419236806292454, "learning_rate": 1.6023204620078787e-05, "loss": 0.1768, "step": 8741 }, { "epoch": 0.7457771711312062, "grad_norm": 1.7593333129280515, "learning_rate": 1.6013070450596492e-05, "loss": 0.1815, "step": 8742 }, { "epoch": 0.7458624808053234, "grad_norm": 1.5254967490192533, "learning_rate": 1.6002938875835665e-05, "loss": 0.1611, "step": 8743 }, { "epoch": 0.7459477904794404, "grad_norm": 2.374675527794205, "learning_rate": 1.599280989656977e-05, "loss": 0.2588, "step": 8744 }, { "epoch": 0.7460331001535574, "grad_norm": 1.5522569222206055, "learning_rate": 1.5982683513572165e-05, "loss": 0.1766, "step": 8745 }, { "epoch": 0.7461184098276744, "grad_norm": 1.6317399440179077, "learning_rate": 1.5972559727615875e-05, "loss": 0.17, "step": 8746 }, { "epoch": 0.7462037195017915, "grad_norm": 1.647267654196973, "learning_rate": 1.596243853947384e-05, "loss": 0.1646, "step": 8747 }, { "epoch": 0.7462890291759086, "grad_norm": 1.710860995319724, "learning_rate": 1.5952319949918748e-05, "loss": 0.1674, "step": 8748 }, { "epoch": 0.7463743388500256, "grad_norm": 1.7238418063362733, "learning_rate": 1.59422039597231e-05, "loss": 0.177, "step": 8749 }, { "epoch": 0.7464596485241426, "grad_norm": 1.9793050081858752, "learning_rate": 1.5932090569659197e-05, "loss": 0.1951, "step": 8750 }, { "epoch": 0.7465449581982597, "grad_norm": 1.6851944184700478, "learning_rate": 1.592197978049914e-05, "loss": 0.2742, "step": 8751 }, { "epoch": 0.7466302678723767, "grad_norm": 1.4483127281002108, "learning_rate": 1.5911871593014837e-05, "loss": 0.1687, "step": 8752 }, { "epoch": 0.7467155775464938, "grad_norm": 1.918492325370496, "learning_rate": 1.590176600797798e-05, "loss": 0.183, "step": 8753 }, { "epoch": 0.7468008872206108, "grad_norm": 1.664490120975035, "learning_rate": 1.5891663026160102e-05, "loss": 0.2954, "step": 8754 }, { "epoch": 0.7468861968947279, "grad_norm": 1.7576756402885099, "learning_rate": 1.5881562648332503e-05, "loss": 0.1861, "step": 8755 }, { "epoch": 0.7469715065688449, "grad_norm": 1.4589138601234741, "learning_rate": 1.5871464875266294e-05, "loss": 0.1676, "step": 8756 }, { "epoch": 0.7470568162429619, "grad_norm": 1.8408574836681415, "learning_rate": 1.586136970773238e-05, "loss": 0.1738, "step": 8757 }, { "epoch": 0.747142125917079, "grad_norm": 1.5362658191484115, "learning_rate": 1.5851277146501476e-05, "loss": 0.1895, "step": 8758 }, { "epoch": 0.7472274355911961, "grad_norm": 1.5424243594376976, "learning_rate": 1.5841187192344097e-05, "loss": 0.1943, "step": 8759 }, { "epoch": 0.7473127452653131, "grad_norm": 2.0193876883209394, "learning_rate": 1.583109984603054e-05, "loss": 0.1374, "step": 8760 }, { "epoch": 0.7473980549394301, "grad_norm": 1.9971287501955872, "learning_rate": 1.5821015108330968e-05, "loss": 0.2232, "step": 8761 }, { "epoch": 0.7474833646135471, "grad_norm": 1.8752408190890142, "learning_rate": 1.581093298001523e-05, "loss": 0.1752, "step": 8762 }, { "epoch": 0.7475686742876643, "grad_norm": 1.896673568261612, "learning_rate": 1.5800853461853098e-05, "loss": 0.2593, "step": 8763 }, { "epoch": 0.7476539839617813, "grad_norm": 2.896584630608578, "learning_rate": 1.5790776554614066e-05, "loss": 0.2462, "step": 8764 }, { "epoch": 0.7477392936358983, "grad_norm": 1.691323575648899, "learning_rate": 1.5780702259067465e-05, "loss": 0.1647, "step": 8765 }, { "epoch": 0.7478246033100153, "grad_norm": 1.6113390003184134, "learning_rate": 1.577063057598238e-05, "loss": 0.1942, "step": 8766 }, { "epoch": 0.7479099129841325, "grad_norm": 2.253844050403674, "learning_rate": 1.576056150612778e-05, "loss": 0.1784, "step": 8767 }, { "epoch": 0.7479952226582495, "grad_norm": 1.8665935896579164, "learning_rate": 1.5750495050272358e-05, "loss": 0.1813, "step": 8768 }, { "epoch": 0.7480805323323665, "grad_norm": 2.31302090525797, "learning_rate": 1.5740431209184636e-05, "loss": 0.1861, "step": 8769 }, { "epoch": 0.7481658420064835, "grad_norm": 1.7306256707473617, "learning_rate": 1.5730369983632937e-05, "loss": 0.1778, "step": 8770 }, { "epoch": 0.7482511516806006, "grad_norm": 1.588852782087432, "learning_rate": 1.5720311374385377e-05, "loss": 0.2127, "step": 8771 }, { "epoch": 0.7483364613547177, "grad_norm": 1.9399021393387816, "learning_rate": 1.5710255382209887e-05, "loss": 0.2234, "step": 8772 }, { "epoch": 0.7484217710288347, "grad_norm": 2.2265137784211593, "learning_rate": 1.5700202007874165e-05, "loss": 0.2516, "step": 8773 }, { "epoch": 0.7485070807029517, "grad_norm": 1.845728313940222, "learning_rate": 1.569015125214578e-05, "loss": 0.1578, "step": 8774 }, { "epoch": 0.7485923903770687, "grad_norm": 1.5752516923855069, "learning_rate": 1.5680103115791993e-05, "loss": 0.2111, "step": 8775 }, { "epoch": 0.7486777000511858, "grad_norm": 1.620697340698659, "learning_rate": 1.567005759957998e-05, "loss": 0.173, "step": 8776 }, { "epoch": 0.7487630097253029, "grad_norm": 1.9495572883620191, "learning_rate": 1.5660014704276638e-05, "loss": 0.1971, "step": 8777 }, { "epoch": 0.7488483193994199, "grad_norm": 2.172470450249727, "learning_rate": 1.5649974430648684e-05, "loss": 0.2498, "step": 8778 }, { "epoch": 0.7489336290735369, "grad_norm": 1.8493072037357834, "learning_rate": 1.5639936779462657e-05, "loss": 0.144, "step": 8779 }, { "epoch": 0.749018938747654, "grad_norm": 1.8234072070641372, "learning_rate": 1.562990175148486e-05, "loss": 0.2171, "step": 8780 }, { "epoch": 0.749104248421771, "grad_norm": 2.080060931927184, "learning_rate": 1.561986934748142e-05, "loss": 0.2155, "step": 8781 }, { "epoch": 0.749189558095888, "grad_norm": 1.736635951077458, "learning_rate": 1.5609839568218247e-05, "loss": 0.1576, "step": 8782 }, { "epoch": 0.7492748677700051, "grad_norm": 1.8207304642496251, "learning_rate": 1.559981241446109e-05, "loss": 0.1745, "step": 8783 }, { "epoch": 0.7493601774441222, "grad_norm": 1.8462744418975412, "learning_rate": 1.5589787886975456e-05, "loss": 0.1585, "step": 8784 }, { "epoch": 0.7494454871182392, "grad_norm": 2.228387131860651, "learning_rate": 1.5579765986526657e-05, "loss": 0.2552, "step": 8785 }, { "epoch": 0.7495307967923562, "grad_norm": 1.6624133037247757, "learning_rate": 1.5569746713879802e-05, "loss": 0.1522, "step": 8786 }, { "epoch": 0.7496161064664733, "grad_norm": 1.7464680451867067, "learning_rate": 1.5559730069799857e-05, "loss": 0.2189, "step": 8787 }, { "epoch": 0.7497014161405904, "grad_norm": 1.6813786709128347, "learning_rate": 1.5549716055051468e-05, "loss": 0.1891, "step": 8788 }, { "epoch": 0.7497867258147074, "grad_norm": 1.5027472451603143, "learning_rate": 1.553970467039921e-05, "loss": 0.1687, "step": 8789 }, { "epoch": 0.7498720354888244, "grad_norm": 1.584491013712994, "learning_rate": 1.5529695916607374e-05, "loss": 0.1747, "step": 8790 }, { "epoch": 0.7499573451629414, "grad_norm": 1.6251112826463598, "learning_rate": 1.5519689794440085e-05, "loss": 0.2052, "step": 8791 }, { "epoch": 0.7500426548370586, "grad_norm": 1.7156396639393523, "learning_rate": 1.5509686304661247e-05, "loss": 0.1725, "step": 8792 }, { "epoch": 0.7501279645111756, "grad_norm": 1.9631037809870209, "learning_rate": 1.549968544803458e-05, "loss": 0.2014, "step": 8793 }, { "epoch": 0.7502132741852926, "grad_norm": 1.2775656597784577, "learning_rate": 1.5489687225323594e-05, "loss": 0.122, "step": 8794 }, { "epoch": 0.7502985838594096, "grad_norm": 1.8244809078724058, "learning_rate": 1.5479691637291586e-05, "loss": 0.2088, "step": 8795 }, { "epoch": 0.7503838935335267, "grad_norm": 1.5505746852794664, "learning_rate": 1.5469698684701715e-05, "loss": 0.1315, "step": 8796 }, { "epoch": 0.7504692032076438, "grad_norm": 1.5693759687596183, "learning_rate": 1.5459708368316823e-05, "loss": 0.1538, "step": 8797 }, { "epoch": 0.7505545128817608, "grad_norm": 1.6553170797620624, "learning_rate": 1.5449720688899665e-05, "loss": 0.2432, "step": 8798 }, { "epoch": 0.7506398225558778, "grad_norm": 1.524244515315132, "learning_rate": 1.543973564721273e-05, "loss": 0.1688, "step": 8799 }, { "epoch": 0.7507251322299949, "grad_norm": 2.124121091505132, "learning_rate": 1.542975324401833e-05, "loss": 0.2512, "step": 8800 }, { "epoch": 0.750810441904112, "grad_norm": 1.5841542257549208, "learning_rate": 1.5419773480078563e-05, "loss": 0.1811, "step": 8801 }, { "epoch": 0.750895751578229, "grad_norm": 1.4835945085199154, "learning_rate": 1.5409796356155316e-05, "loss": 0.1817, "step": 8802 }, { "epoch": 0.750981061252346, "grad_norm": 2.063477371295097, "learning_rate": 1.5399821873010335e-05, "loss": 0.1894, "step": 8803 }, { "epoch": 0.7510663709264631, "grad_norm": 1.6079913367042795, "learning_rate": 1.5389850031405057e-05, "loss": 0.2219, "step": 8804 }, { "epoch": 0.7511516806005801, "grad_norm": 1.295978833991156, "learning_rate": 1.5379880832100824e-05, "loss": 0.1411, "step": 8805 }, { "epoch": 0.7512369902746971, "grad_norm": 1.582536781450066, "learning_rate": 1.536991427585872e-05, "loss": 0.1692, "step": 8806 }, { "epoch": 0.7513222999488142, "grad_norm": 1.3715410508212393, "learning_rate": 1.535995036343964e-05, "loss": 0.1974, "step": 8807 }, { "epoch": 0.7514076096229313, "grad_norm": 1.9034053652350351, "learning_rate": 1.5349989095604257e-05, "loss": 0.1575, "step": 8808 }, { "epoch": 0.7514929192970483, "grad_norm": 1.7861575067063453, "learning_rate": 1.5340030473113103e-05, "loss": 0.2695, "step": 8809 }, { "epoch": 0.7515782289711653, "grad_norm": 1.5002699450142991, "learning_rate": 1.5330074496726415e-05, "loss": 0.1743, "step": 8810 }, { "epoch": 0.7516635386452823, "grad_norm": 2.30354099528169, "learning_rate": 1.5320121167204314e-05, "loss": 0.2357, "step": 8811 }, { "epoch": 0.7517488483193994, "grad_norm": 3.270816746242443, "learning_rate": 1.5310170485306672e-05, "loss": 0.1941, "step": 8812 }, { "epoch": 0.7518341579935165, "grad_norm": 1.9682540874479943, "learning_rate": 1.5300222451793178e-05, "loss": 0.1609, "step": 8813 }, { "epoch": 0.7519194676676335, "grad_norm": 1.6259761959224437, "learning_rate": 1.5290277067423303e-05, "loss": 0.2457, "step": 8814 }, { "epoch": 0.7520047773417505, "grad_norm": 2.181962770670658, "learning_rate": 1.528033433295631e-05, "loss": 0.1682, "step": 8815 }, { "epoch": 0.7520900870158675, "grad_norm": 1.8525529618614052, "learning_rate": 1.5270394249151322e-05, "loss": 0.2221, "step": 8816 }, { "epoch": 0.7521753966899847, "grad_norm": 2.798200410461941, "learning_rate": 1.526045681676715e-05, "loss": 0.1834, "step": 8817 }, { "epoch": 0.7522607063641017, "grad_norm": 2.40025871789726, "learning_rate": 1.5250522036562503e-05, "loss": 0.2156, "step": 8818 }, { "epoch": 0.7523460160382187, "grad_norm": 1.5156731352093729, "learning_rate": 1.524058990929585e-05, "loss": 0.1689, "step": 8819 }, { "epoch": 0.7524313257123357, "grad_norm": 1.9079327269716335, "learning_rate": 1.523066043572544e-05, "loss": 0.2264, "step": 8820 }, { "epoch": 0.7525166353864529, "grad_norm": 1.4099587595434417, "learning_rate": 1.5220733616609345e-05, "loss": 0.1192, "step": 8821 }, { "epoch": 0.7526019450605699, "grad_norm": 1.7608788088721246, "learning_rate": 1.5210809452705415e-05, "loss": 0.1414, "step": 8822 }, { "epoch": 0.7526872547346869, "grad_norm": 1.807875752311001, "learning_rate": 1.520088794477132e-05, "loss": 0.1682, "step": 8823 }, { "epoch": 0.7527725644088039, "grad_norm": 1.8084078666456846, "learning_rate": 1.5190969093564494e-05, "loss": 0.184, "step": 8824 }, { "epoch": 0.752857874082921, "grad_norm": 1.5766653732445044, "learning_rate": 1.5181052899842229e-05, "loss": 0.1766, "step": 8825 }, { "epoch": 0.7529431837570381, "grad_norm": 1.7052347956774991, "learning_rate": 1.517113936436152e-05, "loss": 0.1369, "step": 8826 }, { "epoch": 0.7530284934311551, "grad_norm": 1.8261444579325572, "learning_rate": 1.5161228487879253e-05, "loss": 0.2364, "step": 8827 }, { "epoch": 0.7531138031052721, "grad_norm": 2.241880814582254, "learning_rate": 1.5151320271152041e-05, "loss": 0.2178, "step": 8828 }, { "epoch": 0.7531991127793892, "grad_norm": 1.7458248059315635, "learning_rate": 1.5141414714936376e-05, "loss": 0.1519, "step": 8829 }, { "epoch": 0.7532844224535062, "grad_norm": 2.217119289260291, "learning_rate": 1.5131511819988426e-05, "loss": 0.2617, "step": 8830 }, { "epoch": 0.7533697321276233, "grad_norm": 1.891148014079603, "learning_rate": 1.5121611587064278e-05, "loss": 0.2398, "step": 8831 }, { "epoch": 0.7534550418017403, "grad_norm": 1.5721121576886743, "learning_rate": 1.5111714016919732e-05, "loss": 0.2165, "step": 8832 }, { "epoch": 0.7535403514758574, "grad_norm": 1.872947586817065, "learning_rate": 1.5101819110310433e-05, "loss": 0.2163, "step": 8833 }, { "epoch": 0.7536256611499744, "grad_norm": 1.9383919416353785, "learning_rate": 1.509192686799179e-05, "loss": 0.1663, "step": 8834 }, { "epoch": 0.7537109708240914, "grad_norm": 1.9455462702119224, "learning_rate": 1.5082037290719036e-05, "loss": 0.2336, "step": 8835 }, { "epoch": 0.7537962804982085, "grad_norm": 1.6719872267659541, "learning_rate": 1.5072150379247174e-05, "loss": 0.2172, "step": 8836 }, { "epoch": 0.7538815901723256, "grad_norm": 1.6695888968098063, "learning_rate": 1.5062266134331016e-05, "loss": 0.1876, "step": 8837 }, { "epoch": 0.7539668998464426, "grad_norm": 1.836428962021142, "learning_rate": 1.5052384556725201e-05, "loss": 0.2155, "step": 8838 }, { "epoch": 0.7540522095205596, "grad_norm": 1.7643096885795237, "learning_rate": 1.5042505647184091e-05, "loss": 0.1943, "step": 8839 }, { "epoch": 0.7541375191946766, "grad_norm": 1.8469820304620208, "learning_rate": 1.5032629406461923e-05, "loss": 0.2476, "step": 8840 }, { "epoch": 0.7542228288687938, "grad_norm": 1.658603076306651, "learning_rate": 1.5022755835312686e-05, "loss": 0.171, "step": 8841 }, { "epoch": 0.7543081385429108, "grad_norm": 1.5480517910713956, "learning_rate": 1.5012884934490167e-05, "loss": 0.179, "step": 8842 }, { "epoch": 0.7543934482170278, "grad_norm": 1.955989110935848, "learning_rate": 1.5003016704747969e-05, "loss": 0.2425, "step": 8843 }, { "epoch": 0.7544787578911448, "grad_norm": 1.8458131787625622, "learning_rate": 1.4993151146839451e-05, "loss": 0.1485, "step": 8844 }, { "epoch": 0.754564067565262, "grad_norm": 1.8182624766491988, "learning_rate": 1.4983288261517853e-05, "loss": 0.1634, "step": 8845 }, { "epoch": 0.754649377239379, "grad_norm": 1.7332732679279041, "learning_rate": 1.497342804953608e-05, "loss": 0.1578, "step": 8846 }, { "epoch": 0.754734686913496, "grad_norm": 1.840182587023028, "learning_rate": 1.4963570511646963e-05, "loss": 0.1793, "step": 8847 }, { "epoch": 0.754819996587613, "grad_norm": 1.3517795895554527, "learning_rate": 1.4953715648603057e-05, "loss": 0.1889, "step": 8848 }, { "epoch": 0.75490530626173, "grad_norm": 1.7629321332863288, "learning_rate": 1.494386346115672e-05, "loss": 0.1879, "step": 8849 }, { "epoch": 0.7549906159358472, "grad_norm": 2.1008563929433044, "learning_rate": 1.4934013950060104e-05, "loss": 0.1061, "step": 8850 }, { "epoch": 0.7550759256099642, "grad_norm": 1.621918250299221, "learning_rate": 1.4924167116065218e-05, "loss": 0.1958, "step": 8851 }, { "epoch": 0.7551612352840812, "grad_norm": 1.4280494519708238, "learning_rate": 1.491432295992375e-05, "loss": 0.1936, "step": 8852 }, { "epoch": 0.7552465449581982, "grad_norm": 1.5697937673222646, "learning_rate": 1.4904481482387289e-05, "loss": 0.13, "step": 8853 }, { "epoch": 0.7553318546323153, "grad_norm": 1.5378960386243457, "learning_rate": 1.4894642684207171e-05, "loss": 0.1556, "step": 8854 }, { "epoch": 0.7554171643064324, "grad_norm": 1.627512236987166, "learning_rate": 1.4884806566134535e-05, "loss": 0.2226, "step": 8855 }, { "epoch": 0.7555024739805494, "grad_norm": 1.8384886851975362, "learning_rate": 1.4874973128920317e-05, "loss": 0.2307, "step": 8856 }, { "epoch": 0.7555877836546664, "grad_norm": 1.6386506325910102, "learning_rate": 1.4865142373315227e-05, "loss": 0.2478, "step": 8857 }, { "epoch": 0.7556730933287835, "grad_norm": 1.619509444432693, "learning_rate": 1.4855314300069844e-05, "loss": 0.2102, "step": 8858 }, { "epoch": 0.7557584030029005, "grad_norm": 1.6506312087728372, "learning_rate": 1.484548890993443e-05, "loss": 0.2093, "step": 8859 }, { "epoch": 0.7558437126770176, "grad_norm": 1.4454373692363878, "learning_rate": 1.4835666203659133e-05, "loss": 0.2337, "step": 8860 }, { "epoch": 0.7559290223511346, "grad_norm": 1.7344779594034898, "learning_rate": 1.4825846181993864e-05, "loss": 0.2089, "step": 8861 }, { "epoch": 0.7560143320252517, "grad_norm": 1.2581232291302487, "learning_rate": 1.4816028845688323e-05, "loss": 0.1915, "step": 8862 }, { "epoch": 0.7560996416993687, "grad_norm": 1.7093096091195683, "learning_rate": 1.4806214195492008e-05, "loss": 0.2045, "step": 8863 }, { "epoch": 0.7561849513734857, "grad_norm": 1.6791109026917215, "learning_rate": 1.4796402232154228e-05, "loss": 0.2255, "step": 8864 }, { "epoch": 0.7562702610476028, "grad_norm": 1.7521086658849365, "learning_rate": 1.4786592956424055e-05, "loss": 0.2239, "step": 8865 }, { "epoch": 0.7563555707217199, "grad_norm": 1.561126563122813, "learning_rate": 1.4776786369050377e-05, "loss": 0.2091, "step": 8866 }, { "epoch": 0.7564408803958369, "grad_norm": 1.6336118940662039, "learning_rate": 1.4766982470781915e-05, "loss": 0.2354, "step": 8867 }, { "epoch": 0.7565261900699539, "grad_norm": 1.444119512658007, "learning_rate": 1.4757181262367081e-05, "loss": 0.1792, "step": 8868 }, { "epoch": 0.7566114997440709, "grad_norm": 1.8775984243304253, "learning_rate": 1.4747382744554195e-05, "loss": 0.1985, "step": 8869 }, { "epoch": 0.7566968094181881, "grad_norm": 1.8378570261690423, "learning_rate": 1.4737586918091301e-05, "loss": 0.2028, "step": 8870 }, { "epoch": 0.7567821190923051, "grad_norm": 1.7095682596311792, "learning_rate": 1.4727793783726263e-05, "loss": 0.1779, "step": 8871 }, { "epoch": 0.7568674287664221, "grad_norm": 2.0760766740505674, "learning_rate": 1.4718003342206722e-05, "loss": 0.2168, "step": 8872 }, { "epoch": 0.7569527384405391, "grad_norm": 1.6399525308717242, "learning_rate": 1.4708215594280144e-05, "loss": 0.2372, "step": 8873 }, { "epoch": 0.7570380481146562, "grad_norm": 1.9851003207360152, "learning_rate": 1.469843054069377e-05, "loss": 0.2, "step": 8874 }, { "epoch": 0.7571233577887733, "grad_norm": 2.4954837059569788, "learning_rate": 1.4688648182194637e-05, "loss": 0.224, "step": 8875 }, { "epoch": 0.7572086674628903, "grad_norm": 1.4716059736536526, "learning_rate": 1.4678868519529564e-05, "loss": 0.2142, "step": 8876 }, { "epoch": 0.7572939771370073, "grad_norm": 1.6548795968597048, "learning_rate": 1.4669091553445185e-05, "loss": 0.179, "step": 8877 }, { "epoch": 0.7573792868111244, "grad_norm": 2.0055252287178242, "learning_rate": 1.4659317284687918e-05, "loss": 0.2388, "step": 8878 }, { "epoch": 0.7574645964852414, "grad_norm": 1.6549939845226485, "learning_rate": 1.4649545714003959e-05, "loss": 0.156, "step": 8879 }, { "epoch": 0.7575499061593585, "grad_norm": 1.7186205034890099, "learning_rate": 1.4639776842139363e-05, "loss": 0.1481, "step": 8880 }, { "epoch": 0.7576352158334755, "grad_norm": 2.25203866787875, "learning_rate": 1.4630010669839871e-05, "loss": 0.1805, "step": 8881 }, { "epoch": 0.7577205255075926, "grad_norm": 1.9116664291598975, "learning_rate": 1.4620247197851128e-05, "loss": 0.2199, "step": 8882 }, { "epoch": 0.7578058351817096, "grad_norm": 1.7988086413592637, "learning_rate": 1.4610486426918502e-05, "loss": 0.1147, "step": 8883 }, { "epoch": 0.7578911448558266, "grad_norm": 1.9661244641757478, "learning_rate": 1.4600728357787175e-05, "loss": 0.19, "step": 8884 }, { "epoch": 0.7579764545299437, "grad_norm": 1.785712424686007, "learning_rate": 1.4590972991202129e-05, "loss": 0.1832, "step": 8885 }, { "epoch": 0.7580617642040608, "grad_norm": 1.5332345364019566, "learning_rate": 1.458122032790813e-05, "loss": 0.1663, "step": 8886 }, { "epoch": 0.7581470738781778, "grad_norm": 1.564434456207861, "learning_rate": 1.4571470368649742e-05, "loss": 0.1924, "step": 8887 }, { "epoch": 0.7582323835522948, "grad_norm": 1.5258267959851917, "learning_rate": 1.4561723114171311e-05, "loss": 0.1868, "step": 8888 }, { "epoch": 0.7583176932264118, "grad_norm": 1.9714843895120244, "learning_rate": 1.4551978565217017e-05, "loss": 0.2227, "step": 8889 }, { "epoch": 0.7584030029005289, "grad_norm": 1.9382986453104887, "learning_rate": 1.454223672253079e-05, "loss": 0.2346, "step": 8890 }, { "epoch": 0.758488312574646, "grad_norm": 1.731082712622558, "learning_rate": 1.4532497586856364e-05, "loss": 0.1732, "step": 8891 }, { "epoch": 0.758573622248763, "grad_norm": 1.801468393294702, "learning_rate": 1.452276115893726e-05, "loss": 0.2279, "step": 8892 }, { "epoch": 0.75865893192288, "grad_norm": 1.63711288185991, "learning_rate": 1.4513027439516847e-05, "loss": 0.1864, "step": 8893 }, { "epoch": 0.758744241596997, "grad_norm": 1.2793351728225002, "learning_rate": 1.4503296429338181e-05, "loss": 0.2297, "step": 8894 }, { "epoch": 0.7588295512711142, "grad_norm": 1.8432259542267087, "learning_rate": 1.4493568129144214e-05, "loss": 0.1459, "step": 8895 }, { "epoch": 0.7589148609452312, "grad_norm": 2.0779779138355114, "learning_rate": 1.4483842539677644e-05, "loss": 0.239, "step": 8896 }, { "epoch": 0.7590001706193482, "grad_norm": 2.034055679504428, "learning_rate": 1.4474119661680962e-05, "loss": 0.2081, "step": 8897 }, { "epoch": 0.7590854802934652, "grad_norm": 1.8844404977831108, "learning_rate": 1.4464399495896458e-05, "loss": 0.1485, "step": 8898 }, { "epoch": 0.7591707899675824, "grad_norm": 2.0827575905294893, "learning_rate": 1.445468204306622e-05, "loss": 0.1482, "step": 8899 }, { "epoch": 0.7592560996416994, "grad_norm": 1.8831769028685432, "learning_rate": 1.4444967303932116e-05, "loss": 0.1995, "step": 8900 }, { "epoch": 0.7593414093158164, "grad_norm": 1.7134892495282548, "learning_rate": 1.443525527923581e-05, "loss": 0.0857, "step": 8901 }, { "epoch": 0.7594267189899334, "grad_norm": 1.5661231186994222, "learning_rate": 1.4425545969718801e-05, "loss": 0.1883, "step": 8902 }, { "epoch": 0.7595120286640505, "grad_norm": 1.7140552558850168, "learning_rate": 1.4415839376122282e-05, "loss": 0.1967, "step": 8903 }, { "epoch": 0.7595973383381676, "grad_norm": 1.6121182071045468, "learning_rate": 1.4406135499187346e-05, "loss": 0.1909, "step": 8904 }, { "epoch": 0.7596826480122846, "grad_norm": 1.649615332529239, "learning_rate": 1.4396434339654824e-05, "loss": 0.1924, "step": 8905 }, { "epoch": 0.7597679576864016, "grad_norm": 1.805430469907701, "learning_rate": 1.4386735898265342e-05, "loss": 0.1784, "step": 8906 }, { "epoch": 0.7598532673605187, "grad_norm": 1.9044914263599648, "learning_rate": 1.437704017575932e-05, "loss": 0.2476, "step": 8907 }, { "epoch": 0.7599385770346357, "grad_norm": 1.626160720614669, "learning_rate": 1.4367347172876972e-05, "loss": 0.1862, "step": 8908 }, { "epoch": 0.7600238867087528, "grad_norm": 1.8487973066961527, "learning_rate": 1.435765689035834e-05, "loss": 0.1788, "step": 8909 }, { "epoch": 0.7601091963828698, "grad_norm": 1.457929482626734, "learning_rate": 1.4347969328943179e-05, "loss": 0.1608, "step": 8910 }, { "epoch": 0.7601945060569869, "grad_norm": 1.7702427795633546, "learning_rate": 1.4338284489371113e-05, "loss": 0.1645, "step": 8911 }, { "epoch": 0.7602798157311039, "grad_norm": 1.6183303996150216, "learning_rate": 1.4328602372381522e-05, "loss": 0.2018, "step": 8912 }, { "epoch": 0.7603651254052209, "grad_norm": 1.452833987393235, "learning_rate": 1.4318922978713583e-05, "loss": 0.1617, "step": 8913 }, { "epoch": 0.760450435079338, "grad_norm": 1.435335603075821, "learning_rate": 1.430924630910625e-05, "loss": 0.1974, "step": 8914 }, { "epoch": 0.7605357447534551, "grad_norm": 2.412565884654061, "learning_rate": 1.429957236429833e-05, "loss": 0.1948, "step": 8915 }, { "epoch": 0.7606210544275721, "grad_norm": 2.2631963933193884, "learning_rate": 1.4289901145028317e-05, "loss": 0.2065, "step": 8916 }, { "epoch": 0.7607063641016891, "grad_norm": 2.0676883981610112, "learning_rate": 1.428023265203461e-05, "loss": 0.2426, "step": 8917 }, { "epoch": 0.7607916737758061, "grad_norm": 2.0079257799543226, "learning_rate": 1.427056688605532e-05, "loss": 0.274, "step": 8918 }, { "epoch": 0.7608769834499233, "grad_norm": 1.8213977116553752, "learning_rate": 1.4260903847828383e-05, "loss": 0.2001, "step": 8919 }, { "epoch": 0.7609622931240403, "grad_norm": 2.1422529504127765, "learning_rate": 1.425124353809152e-05, "loss": 0.192, "step": 8920 }, { "epoch": 0.7610476027981573, "grad_norm": 1.8263904787482952, "learning_rate": 1.4241585957582226e-05, "loss": 0.2381, "step": 8921 }, { "epoch": 0.7611329124722743, "grad_norm": 1.5282604071725956, "learning_rate": 1.4231931107037861e-05, "loss": 0.1951, "step": 8922 }, { "epoch": 0.7612182221463915, "grad_norm": 1.960987895910713, "learning_rate": 1.4222278987195447e-05, "loss": 0.1745, "step": 8923 }, { "epoch": 0.7613035318205085, "grad_norm": 1.4998955690270774, "learning_rate": 1.4212629598791932e-05, "loss": 0.1068, "step": 8924 }, { "epoch": 0.7613888414946255, "grad_norm": 1.5927486639943804, "learning_rate": 1.420298294256397e-05, "loss": 0.2243, "step": 8925 }, { "epoch": 0.7614741511687425, "grad_norm": 1.5333510017759198, "learning_rate": 1.4193339019248036e-05, "loss": 0.1615, "step": 8926 }, { "epoch": 0.7615594608428595, "grad_norm": 2.3755452383491584, "learning_rate": 1.4183697829580389e-05, "loss": 0.2001, "step": 8927 }, { "epoch": 0.7616447705169767, "grad_norm": 1.6271298462507713, "learning_rate": 1.4174059374297088e-05, "loss": 0.2064, "step": 8928 }, { "epoch": 0.7617300801910937, "grad_norm": 1.6804782159557314, "learning_rate": 1.4164423654133974e-05, "loss": 0.1921, "step": 8929 }, { "epoch": 0.7618153898652107, "grad_norm": 1.652136823534661, "learning_rate": 1.4154790669826668e-05, "loss": 0.1823, "step": 8930 }, { "epoch": 0.7619006995393277, "grad_norm": 1.6652239117974854, "learning_rate": 1.4145160422110643e-05, "loss": 0.2076, "step": 8931 }, { "epoch": 0.7619860092134448, "grad_norm": 1.5011620788492819, "learning_rate": 1.4135532911721061e-05, "loss": 0.2002, "step": 8932 }, { "epoch": 0.7620713188875619, "grad_norm": 1.358649400006739, "learning_rate": 1.4125908139392968e-05, "loss": 0.165, "step": 8933 }, { "epoch": 0.7621566285616789, "grad_norm": 1.7430653092650548, "learning_rate": 1.4116286105861137e-05, "loss": 0.2356, "step": 8934 }, { "epoch": 0.7622419382357959, "grad_norm": 1.8531011312919243, "learning_rate": 1.4106666811860214e-05, "loss": 0.2131, "step": 8935 }, { "epoch": 0.762327247909913, "grad_norm": 1.515030577196813, "learning_rate": 1.4097050258124506e-05, "loss": 0.1966, "step": 8936 }, { "epoch": 0.76241255758403, "grad_norm": 1.6778167953052558, "learning_rate": 1.4087436445388242e-05, "loss": 0.2025, "step": 8937 }, { "epoch": 0.762497867258147, "grad_norm": 2.0301781981103932, "learning_rate": 1.4077825374385362e-05, "loss": 0.2365, "step": 8938 }, { "epoch": 0.7625831769322641, "grad_norm": 1.5811373975866225, "learning_rate": 1.406821704584963e-05, "loss": 0.1964, "step": 8939 }, { "epoch": 0.7626684866063812, "grad_norm": 2.232006605077015, "learning_rate": 1.4058611460514581e-05, "loss": 0.2049, "step": 8940 }, { "epoch": 0.7627537962804982, "grad_norm": 2.17968771876399, "learning_rate": 1.4049008619113558e-05, "loss": 0.2128, "step": 8941 }, { "epoch": 0.7628391059546152, "grad_norm": 1.5431054163416682, "learning_rate": 1.4039408522379683e-05, "loss": 0.1736, "step": 8942 }, { "epoch": 0.7629244156287323, "grad_norm": 2.195842963914786, "learning_rate": 1.4029811171045853e-05, "loss": 0.1951, "step": 8943 }, { "epoch": 0.7630097253028494, "grad_norm": 1.5027764374230863, "learning_rate": 1.402021656584483e-05, "loss": 0.1688, "step": 8944 }, { "epoch": 0.7630950349769664, "grad_norm": 1.7678183376572592, "learning_rate": 1.401062470750904e-05, "loss": 0.2178, "step": 8945 }, { "epoch": 0.7631803446510834, "grad_norm": 2.0779857158592168, "learning_rate": 1.4001035596770828e-05, "loss": 0.1701, "step": 8946 }, { "epoch": 0.7632656543252004, "grad_norm": 1.6303236395506673, "learning_rate": 1.3991449234362246e-05, "loss": 0.1448, "step": 8947 }, { "epoch": 0.7633509639993176, "grad_norm": 1.5554117787234356, "learning_rate": 1.3981865621015167e-05, "loss": 0.1746, "step": 8948 }, { "epoch": 0.7634362736734346, "grad_norm": 1.7935089427851598, "learning_rate": 1.3972284757461246e-05, "loss": 0.245, "step": 8949 }, { "epoch": 0.7635215833475516, "grad_norm": 1.6497922997807244, "learning_rate": 1.3962706644431922e-05, "loss": 0.1799, "step": 8950 }, { "epoch": 0.7636068930216686, "grad_norm": 1.6500826034676572, "learning_rate": 1.395313128265847e-05, "loss": 0.1864, "step": 8951 }, { "epoch": 0.7636922026957857, "grad_norm": 2.25884774837296, "learning_rate": 1.3943558672871858e-05, "loss": 0.1024, "step": 8952 }, { "epoch": 0.7637775123699028, "grad_norm": 1.6391424837770519, "learning_rate": 1.3933988815802962e-05, "loss": 0.1216, "step": 8953 }, { "epoch": 0.7638628220440198, "grad_norm": 1.918370595195056, "learning_rate": 1.3924421712182362e-05, "loss": 0.1831, "step": 8954 }, { "epoch": 0.7639481317181368, "grad_norm": 2.6378777111476808, "learning_rate": 1.3914857362740457e-05, "loss": 0.2424, "step": 8955 }, { "epoch": 0.7640334413922539, "grad_norm": 1.9740390274611335, "learning_rate": 1.3905295768207421e-05, "loss": 0.2248, "step": 8956 }, { "epoch": 0.764118751066371, "grad_norm": 2.378720431281859, "learning_rate": 1.3895736929313274e-05, "loss": 0.1481, "step": 8957 }, { "epoch": 0.764204060740488, "grad_norm": 1.8613017381556989, "learning_rate": 1.3886180846787727e-05, "loss": 0.2323, "step": 8958 }, { "epoch": 0.764289370414605, "grad_norm": 1.8229237438246177, "learning_rate": 1.387662752136038e-05, "loss": 0.1861, "step": 8959 }, { "epoch": 0.7643746800887221, "grad_norm": 2.101197278361552, "learning_rate": 1.386707695376056e-05, "loss": 0.2062, "step": 8960 }, { "epoch": 0.7644599897628391, "grad_norm": 1.609936218183016, "learning_rate": 1.3857529144717407e-05, "loss": 0.239, "step": 8961 }, { "epoch": 0.7645452994369561, "grad_norm": 1.5107690948082566, "learning_rate": 1.3847984094959842e-05, "loss": 0.1742, "step": 8962 }, { "epoch": 0.7646306091110732, "grad_norm": 2.1965576116247267, "learning_rate": 1.3838441805216562e-05, "loss": 0.1985, "step": 8963 }, { "epoch": 0.7647159187851902, "grad_norm": 2.0440712865801873, "learning_rate": 1.382890227621612e-05, "loss": 0.2232, "step": 8964 }, { "epoch": 0.7648012284593073, "grad_norm": 1.6744187157745472, "learning_rate": 1.3819365508686749e-05, "loss": 0.2235, "step": 8965 }, { "epoch": 0.7648865381334243, "grad_norm": 1.768012668874928, "learning_rate": 1.3809831503356569e-05, "loss": 0.1655, "step": 8966 }, { "epoch": 0.7649718478075413, "grad_norm": 2.2924190557701927, "learning_rate": 1.3800300260953442e-05, "loss": 0.2254, "step": 8967 }, { "epoch": 0.7650571574816584, "grad_norm": 1.8335069516364586, "learning_rate": 1.3790771782205026e-05, "loss": 0.2027, "step": 8968 }, { "epoch": 0.7651424671557755, "grad_norm": 2.3294921547486793, "learning_rate": 1.3781246067838766e-05, "loss": 0.2122, "step": 8969 }, { "epoch": 0.7652277768298925, "grad_norm": 1.6420466485128655, "learning_rate": 1.3771723118581897e-05, "loss": 0.2175, "step": 8970 }, { "epoch": 0.7653130865040095, "grad_norm": 1.9837947207279378, "learning_rate": 1.3762202935161461e-05, "loss": 0.2525, "step": 8971 }, { "epoch": 0.7653983961781265, "grad_norm": 2.7365518678600416, "learning_rate": 1.3752685518304243e-05, "loss": 0.2776, "step": 8972 }, { "epoch": 0.7654837058522437, "grad_norm": 1.4255246284201124, "learning_rate": 1.3743170868736898e-05, "loss": 0.1778, "step": 8973 }, { "epoch": 0.7655690155263607, "grad_norm": 1.7500643718324338, "learning_rate": 1.373365898718576e-05, "loss": 0.212, "step": 8974 }, { "epoch": 0.7656543252004777, "grad_norm": 1.541577448282354, "learning_rate": 1.3724149874377057e-05, "loss": 0.1494, "step": 8975 }, { "epoch": 0.7657396348745947, "grad_norm": 1.564410910352342, "learning_rate": 1.3714643531036735e-05, "loss": 0.1379, "step": 8976 }, { "epoch": 0.7658249445487119, "grad_norm": 1.1097433526376492, "learning_rate": 1.3705139957890561e-05, "loss": 0.1047, "step": 8977 }, { "epoch": 0.7659102542228289, "grad_norm": 1.2988892505000944, "learning_rate": 1.369563915566407e-05, "loss": 0.186, "step": 8978 }, { "epoch": 0.7659955638969459, "grad_norm": 1.9548140889624277, "learning_rate": 1.3686141125082619e-05, "loss": 0.1967, "step": 8979 }, { "epoch": 0.7660808735710629, "grad_norm": 1.2740823248190283, "learning_rate": 1.3676645866871323e-05, "loss": 0.2133, "step": 8980 }, { "epoch": 0.76616618324518, "grad_norm": 1.649937241256603, "learning_rate": 1.3667153381755093e-05, "loss": 0.1758, "step": 8981 }, { "epoch": 0.7662514929192971, "grad_norm": 1.5093050670021353, "learning_rate": 1.3657663670458631e-05, "loss": 0.1872, "step": 8982 }, { "epoch": 0.7663368025934141, "grad_norm": 1.6155202628741863, "learning_rate": 1.3648176733706419e-05, "loss": 0.2538, "step": 8983 }, { "epoch": 0.7664221122675311, "grad_norm": 1.670401521028725, "learning_rate": 1.3638692572222744e-05, "loss": 0.1948, "step": 8984 }, { "epoch": 0.7665074219416482, "grad_norm": 1.827192150023278, "learning_rate": 1.3629211186731656e-05, "loss": 0.2365, "step": 8985 }, { "epoch": 0.7665927316157652, "grad_norm": 2.64723723003576, "learning_rate": 1.3619732577957045e-05, "loss": 0.1757, "step": 8986 }, { "epoch": 0.7666780412898823, "grad_norm": 2.177838816511811, "learning_rate": 1.3610256746622501e-05, "loss": 0.2498, "step": 8987 }, { "epoch": 0.7667633509639993, "grad_norm": 2.389076279774477, "learning_rate": 1.3600783693451492e-05, "loss": 0.2203, "step": 8988 }, { "epoch": 0.7668486606381164, "grad_norm": 2.043917900444767, "learning_rate": 1.3591313419167222e-05, "loss": 0.144, "step": 8989 }, { "epoch": 0.7669339703122334, "grad_norm": 1.7189121516639352, "learning_rate": 1.3581845924492698e-05, "loss": 0.24, "step": 8990 }, { "epoch": 0.7670192799863504, "grad_norm": 2.248900038679391, "learning_rate": 1.357238121015071e-05, "loss": 0.1279, "step": 8991 }, { "epoch": 0.7671045896604675, "grad_norm": 1.4894340635971324, "learning_rate": 1.3562919276863844e-05, "loss": 0.1683, "step": 8992 }, { "epoch": 0.7671898993345846, "grad_norm": 1.5438856273795274, "learning_rate": 1.3553460125354461e-05, "loss": 0.1979, "step": 8993 }, { "epoch": 0.7672752090087016, "grad_norm": 1.598334208233978, "learning_rate": 1.3544003756344708e-05, "loss": 0.129, "step": 8994 }, { "epoch": 0.7673605186828186, "grad_norm": 1.4751280421915873, "learning_rate": 1.3534550170556554e-05, "loss": 0.205, "step": 8995 }, { "epoch": 0.7674458283569356, "grad_norm": 1.4004801318561488, "learning_rate": 1.3525099368711718e-05, "loss": 0.1898, "step": 8996 }, { "epoch": 0.7675311380310528, "grad_norm": 1.5392105883307496, "learning_rate": 1.3515651351531722e-05, "loss": 0.1854, "step": 8997 }, { "epoch": 0.7676164477051698, "grad_norm": 1.6283889790804136, "learning_rate": 1.3506206119737847e-05, "loss": 0.1781, "step": 8998 }, { "epoch": 0.7677017573792868, "grad_norm": 1.4965706406326313, "learning_rate": 1.3496763674051238e-05, "loss": 0.2106, "step": 8999 }, { "epoch": 0.7677870670534038, "grad_norm": 2.256875447579833, "learning_rate": 1.3487324015192721e-05, "loss": 0.1785, "step": 9000 }, { "epoch": 0.767872376727521, "grad_norm": 2.620045391155483, "learning_rate": 1.3477887143882994e-05, "loss": 0.2103, "step": 9001 }, { "epoch": 0.767957686401638, "grad_norm": 1.6329662487889447, "learning_rate": 1.3468453060842512e-05, "loss": 0.1942, "step": 9002 }, { "epoch": 0.768042996075755, "grad_norm": 1.847425450432741, "learning_rate": 1.3459021766791502e-05, "loss": 0.1351, "step": 9003 }, { "epoch": 0.768128305749872, "grad_norm": 1.6723926491490737, "learning_rate": 1.3449593262450011e-05, "loss": 0.2175, "step": 9004 }, { "epoch": 0.768213615423989, "grad_norm": 2.200308656714883, "learning_rate": 1.344016754853784e-05, "loss": 0.2131, "step": 9005 }, { "epoch": 0.7682989250981062, "grad_norm": 1.6098386226358765, "learning_rate": 1.34307446257746e-05, "loss": 0.1474, "step": 9006 }, { "epoch": 0.7683842347722232, "grad_norm": 1.8818176933793886, "learning_rate": 1.342132449487966e-05, "loss": 0.1443, "step": 9007 }, { "epoch": 0.7684695444463402, "grad_norm": 2.215104345098459, "learning_rate": 1.3411907156572235e-05, "loss": 0.2065, "step": 9008 }, { "epoch": 0.7685548541204572, "grad_norm": 1.3985455007778516, "learning_rate": 1.3402492611571272e-05, "loss": 0.1788, "step": 9009 }, { "epoch": 0.7686401637945743, "grad_norm": 2.0386501786933495, "learning_rate": 1.3393080860595514e-05, "loss": 0.2196, "step": 9010 }, { "epoch": 0.7687254734686914, "grad_norm": 1.3766250544178869, "learning_rate": 1.3383671904363504e-05, "loss": 0.1695, "step": 9011 }, { "epoch": 0.7688107831428084, "grad_norm": 2.01489717812789, "learning_rate": 1.3374265743593568e-05, "loss": 0.1564, "step": 9012 }, { "epoch": 0.7688960928169254, "grad_norm": 1.448338408305792, "learning_rate": 1.3364862379003812e-05, "loss": 0.1917, "step": 9013 }, { "epoch": 0.7689814024910425, "grad_norm": 1.947051592268192, "learning_rate": 1.335546181131212e-05, "loss": 0.2067, "step": 9014 }, { "epoch": 0.7690667121651595, "grad_norm": 1.62965957456749, "learning_rate": 1.3346064041236217e-05, "loss": 0.2096, "step": 9015 }, { "epoch": 0.7691520218392766, "grad_norm": 1.7590450458538278, "learning_rate": 1.3336669069493518e-05, "loss": 0.2656, "step": 9016 }, { "epoch": 0.7692373315133936, "grad_norm": 1.9411603745049866, "learning_rate": 1.3327276896801321e-05, "loss": 0.1168, "step": 9017 }, { "epoch": 0.7693226411875107, "grad_norm": 1.7072671338829104, "learning_rate": 1.3317887523876655e-05, "loss": 0.1218, "step": 9018 }, { "epoch": 0.7694079508616277, "grad_norm": 1.5318448993615748, "learning_rate": 1.3308500951436348e-05, "loss": 0.1859, "step": 9019 }, { "epoch": 0.7694932605357447, "grad_norm": 1.6648637159091009, "learning_rate": 1.3299117180197002e-05, "loss": 0.2265, "step": 9020 }, { "epoch": 0.7695785702098618, "grad_norm": 1.7994705030690015, "learning_rate": 1.3289736210875059e-05, "loss": 0.1937, "step": 9021 }, { "epoch": 0.7696638798839789, "grad_norm": 1.6758381582703157, "learning_rate": 1.3280358044186647e-05, "loss": 0.1867, "step": 9022 }, { "epoch": 0.7697491895580959, "grad_norm": 1.7998011108296663, "learning_rate": 1.3270982680847788e-05, "loss": 0.1899, "step": 9023 }, { "epoch": 0.7698344992322129, "grad_norm": 1.4959186023150448, "learning_rate": 1.326161012157422e-05, "loss": 0.1487, "step": 9024 }, { "epoch": 0.7699198089063299, "grad_norm": 2.1806734651191153, "learning_rate": 1.3252240367081492e-05, "loss": 0.2139, "step": 9025 }, { "epoch": 0.7700051185804471, "grad_norm": 1.7658218679135391, "learning_rate": 1.3242873418084939e-05, "loss": 0.2508, "step": 9026 }, { "epoch": 0.7700904282545641, "grad_norm": 1.9759720836474395, "learning_rate": 1.3233509275299654e-05, "loss": 0.1618, "step": 9027 }, { "epoch": 0.7701757379286811, "grad_norm": 1.867731884880383, "learning_rate": 1.322414793944059e-05, "loss": 0.1869, "step": 9028 }, { "epoch": 0.7702610476027981, "grad_norm": 1.6661552916251674, "learning_rate": 1.3214789411222372e-05, "loss": 0.1736, "step": 9029 }, { "epoch": 0.7703463572769153, "grad_norm": 1.8767931311272719, "learning_rate": 1.3205433691359526e-05, "loss": 0.1531, "step": 9030 }, { "epoch": 0.7704316669510323, "grad_norm": 1.7244773459955358, "learning_rate": 1.319608078056629e-05, "loss": 0.2273, "step": 9031 }, { "epoch": 0.7705169766251493, "grad_norm": 1.9311025273086313, "learning_rate": 1.3186730679556708e-05, "loss": 0.188, "step": 9032 }, { "epoch": 0.7706022862992663, "grad_norm": 2.008514162947633, "learning_rate": 1.3177383389044617e-05, "loss": 0.186, "step": 9033 }, { "epoch": 0.7706875959733834, "grad_norm": 1.5626555556112767, "learning_rate": 1.3168038909743623e-05, "loss": 0.1767, "step": 9034 }, { "epoch": 0.7707729056475005, "grad_norm": 1.892416113502278, "learning_rate": 1.3158697242367141e-05, "loss": 0.1923, "step": 9035 }, { "epoch": 0.7708582153216175, "grad_norm": 1.6778423731974572, "learning_rate": 1.314935838762833e-05, "loss": 0.1463, "step": 9036 }, { "epoch": 0.7709435249957345, "grad_norm": 1.9067316541178045, "learning_rate": 1.3140022346240216e-05, "loss": 0.2257, "step": 9037 }, { "epoch": 0.7710288346698516, "grad_norm": 1.182331028533731, "learning_rate": 1.313068911891549e-05, "loss": 0.176, "step": 9038 }, { "epoch": 0.7711141443439686, "grad_norm": 1.4743998195472148, "learning_rate": 1.3121358706366738e-05, "loss": 0.156, "step": 9039 }, { "epoch": 0.7711994540180857, "grad_norm": 1.6877137507943583, "learning_rate": 1.3112031109306267e-05, "loss": 0.2297, "step": 9040 }, { "epoch": 0.7712847636922027, "grad_norm": 1.4121184078365576, "learning_rate": 1.3102706328446223e-05, "loss": 0.1751, "step": 9041 }, { "epoch": 0.7713700733663197, "grad_norm": 2.3433117266002963, "learning_rate": 1.309338436449845e-05, "loss": 0.1391, "step": 9042 }, { "epoch": 0.7714553830404368, "grad_norm": 1.6493855141336755, "learning_rate": 1.3084065218174679e-05, "loss": 0.1825, "step": 9043 }, { "epoch": 0.7715406927145538, "grad_norm": 2.087561957073749, "learning_rate": 1.307474889018635e-05, "loss": 0.1812, "step": 9044 }, { "epoch": 0.7716260023886709, "grad_norm": 1.8231324059117247, "learning_rate": 1.3065435381244728e-05, "loss": 0.1556, "step": 9045 }, { "epoch": 0.7717113120627879, "grad_norm": 1.454679252691276, "learning_rate": 1.3056124692060845e-05, "loss": 0.1654, "step": 9046 }, { "epoch": 0.771796621736905, "grad_norm": 1.9010636489503001, "learning_rate": 1.304681682334552e-05, "loss": 0.2277, "step": 9047 }, { "epoch": 0.771881931411022, "grad_norm": 1.6527035729484647, "learning_rate": 1.3037511775809364e-05, "loss": 0.22, "step": 9048 }, { "epoch": 0.771967241085139, "grad_norm": 2.3604341870494085, "learning_rate": 1.3028209550162746e-05, "loss": 0.1806, "step": 9049 }, { "epoch": 0.772052550759256, "grad_norm": 1.7174111440181283, "learning_rate": 1.3018910147115893e-05, "loss": 0.1836, "step": 9050 }, { "epoch": 0.7721378604333732, "grad_norm": 1.7274372077272808, "learning_rate": 1.3009613567378703e-05, "loss": 0.1329, "step": 9051 }, { "epoch": 0.7722231701074902, "grad_norm": 1.5136508472581764, "learning_rate": 1.3000319811660967e-05, "loss": 0.1319, "step": 9052 }, { "epoch": 0.7723084797816072, "grad_norm": 2.5202128589891486, "learning_rate": 1.2991028880672196e-05, "loss": 0.1859, "step": 9053 }, { "epoch": 0.7723937894557242, "grad_norm": 2.1869863724565244, "learning_rate": 1.2981740775121704e-05, "loss": 0.2267, "step": 9054 }, { "epoch": 0.7724790991298414, "grad_norm": 2.112823718847611, "learning_rate": 1.2972455495718589e-05, "loss": 0.1994, "step": 9055 }, { "epoch": 0.7725644088039584, "grad_norm": 1.8799114750249981, "learning_rate": 1.2963173043171717e-05, "loss": 0.2074, "step": 9056 }, { "epoch": 0.7726497184780754, "grad_norm": 1.579194112789153, "learning_rate": 1.2953893418189806e-05, "loss": 0.2039, "step": 9057 }, { "epoch": 0.7727350281521924, "grad_norm": 1.3839489851821603, "learning_rate": 1.2944616621481231e-05, "loss": 0.173, "step": 9058 }, { "epoch": 0.7728203378263095, "grad_norm": 1.491314138102784, "learning_rate": 1.2935342653754284e-05, "loss": 0.1574, "step": 9059 }, { "epoch": 0.7729056475004266, "grad_norm": 1.4425754508875077, "learning_rate": 1.2926071515716964e-05, "loss": 0.1764, "step": 9060 }, { "epoch": 0.7729909571745436, "grad_norm": 1.6389619663706272, "learning_rate": 1.2916803208077072e-05, "loss": 0.1741, "step": 9061 }, { "epoch": 0.7730762668486606, "grad_norm": 1.8831094847912628, "learning_rate": 1.290753773154218e-05, "loss": 0.2272, "step": 9062 }, { "epoch": 0.7731615765227777, "grad_norm": 1.255247118127763, "learning_rate": 1.2898275086819706e-05, "loss": 0.0881, "step": 9063 }, { "epoch": 0.7732468861968947, "grad_norm": 1.4395341164566497, "learning_rate": 1.2889015274616739e-05, "loss": 0.176, "step": 9064 }, { "epoch": 0.7733321958710118, "grad_norm": 1.5188823971907672, "learning_rate": 1.287975829564026e-05, "loss": 0.2279, "step": 9065 }, { "epoch": 0.7734175055451288, "grad_norm": 1.7638502400420124, "learning_rate": 1.2870504150596985e-05, "loss": 0.2276, "step": 9066 }, { "epoch": 0.7735028152192459, "grad_norm": 1.6483732816652932, "learning_rate": 1.2861252840193406e-05, "loss": 0.1517, "step": 9067 }, { "epoch": 0.7735881248933629, "grad_norm": 2.2394962974018644, "learning_rate": 1.2852004365135823e-05, "loss": 0.1844, "step": 9068 }, { "epoch": 0.77367343456748, "grad_norm": 1.6651470250005445, "learning_rate": 1.2842758726130283e-05, "loss": 0.2081, "step": 9069 }, { "epoch": 0.773758744241597, "grad_norm": 1.3484481917186164, "learning_rate": 1.2833515923882694e-05, "loss": 0.171, "step": 9070 }, { "epoch": 0.7738440539157141, "grad_norm": 2.0813584949521013, "learning_rate": 1.2824275959098625e-05, "loss": 0.2387, "step": 9071 }, { "epoch": 0.7739293635898311, "grad_norm": 2.024582351189209, "learning_rate": 1.2815038832483556e-05, "loss": 0.1678, "step": 9072 }, { "epoch": 0.7740146732639481, "grad_norm": 1.6792557982207934, "learning_rate": 1.2805804544742672e-05, "loss": 0.1975, "step": 9073 }, { "epoch": 0.7740999829380651, "grad_norm": 1.7823721045025058, "learning_rate": 1.279657309658096e-05, "loss": 0.1883, "step": 9074 }, { "epoch": 0.7741852926121823, "grad_norm": 2.32188839812283, "learning_rate": 1.2787344488703195e-05, "loss": 0.193, "step": 9075 }, { "epoch": 0.7742706022862993, "grad_norm": 1.8367202713350383, "learning_rate": 1.2778118721813925e-05, "loss": 0.1995, "step": 9076 }, { "epoch": 0.7743559119604163, "grad_norm": 1.9077578350111706, "learning_rate": 1.2768895796617497e-05, "loss": 0.1569, "step": 9077 }, { "epoch": 0.7744412216345333, "grad_norm": 1.7646919418233122, "learning_rate": 1.2759675713818015e-05, "loss": 0.2266, "step": 9078 }, { "epoch": 0.7745265313086503, "grad_norm": 1.707340587809586, "learning_rate": 1.2750458474119426e-05, "loss": 0.1621, "step": 9079 }, { "epoch": 0.7746118409827675, "grad_norm": 1.902261407154948, "learning_rate": 1.2741244078225363e-05, "loss": 0.1718, "step": 9080 }, { "epoch": 0.7746971506568845, "grad_norm": 1.6701337356470256, "learning_rate": 1.2732032526839333e-05, "loss": 0.2175, "step": 9081 }, { "epoch": 0.7747824603310015, "grad_norm": 1.8453025100528038, "learning_rate": 1.2722823820664575e-05, "loss": 0.141, "step": 9082 }, { "epoch": 0.7748677700051185, "grad_norm": 1.885788966842743, "learning_rate": 1.2713617960404134e-05, "loss": 0.2469, "step": 9083 }, { "epoch": 0.7749530796792357, "grad_norm": 1.899313139992339, "learning_rate": 1.27044149467608e-05, "loss": 0.1929, "step": 9084 }, { "epoch": 0.7750383893533527, "grad_norm": 1.9223480882884834, "learning_rate": 1.2695214780437215e-05, "loss": 0.2131, "step": 9085 }, { "epoch": 0.7751236990274697, "grad_norm": 1.2457365761802437, "learning_rate": 1.2686017462135747e-05, "loss": 0.2002, "step": 9086 }, { "epoch": 0.7752090087015867, "grad_norm": 1.6169734204997122, "learning_rate": 1.2676822992558556e-05, "loss": 0.1181, "step": 9087 }, { "epoch": 0.7752943183757038, "grad_norm": 1.9881516210122268, "learning_rate": 1.2667631372407595e-05, "loss": 0.2197, "step": 9088 }, { "epoch": 0.7753796280498209, "grad_norm": 1.7700571790177253, "learning_rate": 1.2658442602384596e-05, "loss": 0.2109, "step": 9089 }, { "epoch": 0.7754649377239379, "grad_norm": 1.64031180162464, "learning_rate": 1.2649256683191068e-05, "loss": 0.1867, "step": 9090 }, { "epoch": 0.7755502473980549, "grad_norm": 1.2475915594653835, "learning_rate": 1.2640073615528298e-05, "loss": 0.1817, "step": 9091 }, { "epoch": 0.775635557072172, "grad_norm": 1.8739152949679565, "learning_rate": 1.2630893400097404e-05, "loss": 0.2145, "step": 9092 }, { "epoch": 0.775720866746289, "grad_norm": 2.2409042248258912, "learning_rate": 1.2621716037599196e-05, "loss": 0.1925, "step": 9093 }, { "epoch": 0.7758061764204061, "grad_norm": 1.8125130225404347, "learning_rate": 1.2612541528734351e-05, "loss": 0.2252, "step": 9094 }, { "epoch": 0.7758914860945231, "grad_norm": 1.3430835934412078, "learning_rate": 1.2603369874203286e-05, "loss": 0.1615, "step": 9095 }, { "epoch": 0.7759767957686402, "grad_norm": 1.873438502829238, "learning_rate": 1.2594201074706202e-05, "loss": 0.2365, "step": 9096 }, { "epoch": 0.7760621054427572, "grad_norm": 2.874981424022869, "learning_rate": 1.2585035130943096e-05, "loss": 0.2271, "step": 9097 }, { "epoch": 0.7761474151168742, "grad_norm": 1.989271475698659, "learning_rate": 1.2575872043613717e-05, "loss": 0.2332, "step": 9098 }, { "epoch": 0.7762327247909913, "grad_norm": 1.757339617382694, "learning_rate": 1.2566711813417665e-05, "loss": 0.1671, "step": 9099 }, { "epoch": 0.7763180344651084, "grad_norm": 1.3967492339250065, "learning_rate": 1.255755444105422e-05, "loss": 0.1621, "step": 9100 }, { "epoch": 0.7764033441392254, "grad_norm": 1.5090859525354718, "learning_rate": 1.2548399927222538e-05, "loss": 0.2127, "step": 9101 }, { "epoch": 0.7764886538133424, "grad_norm": 2.0125761408648173, "learning_rate": 1.2539248272621501e-05, "loss": 0.2318, "step": 9102 }, { "epoch": 0.7765739634874594, "grad_norm": 2.263786043146637, "learning_rate": 1.2530099477949792e-05, "loss": 0.1925, "step": 9103 }, { "epoch": 0.7766592731615766, "grad_norm": 1.6020626124648851, "learning_rate": 1.2520953543905862e-05, "loss": 0.1869, "step": 9104 }, { "epoch": 0.7767445828356936, "grad_norm": 1.3517326732955102, "learning_rate": 1.251181047118799e-05, "loss": 0.1883, "step": 9105 }, { "epoch": 0.7768298925098106, "grad_norm": 1.3148460764419545, "learning_rate": 1.2502670260494154e-05, "loss": 0.1731, "step": 9106 }, { "epoch": 0.7769152021839276, "grad_norm": 2.2405930010054895, "learning_rate": 1.2493532912522193e-05, "loss": 0.1845, "step": 9107 }, { "epoch": 0.7770005118580448, "grad_norm": 2.031655139134129, "learning_rate": 1.2484398427969685e-05, "loss": 0.1864, "step": 9108 }, { "epoch": 0.7770858215321618, "grad_norm": 2.077602578946152, "learning_rate": 1.2475266807534003e-05, "loss": 0.2359, "step": 9109 }, { "epoch": 0.7771711312062788, "grad_norm": 1.9963948420252173, "learning_rate": 1.2466138051912291e-05, "loss": 0.1679, "step": 9110 }, { "epoch": 0.7772564408803958, "grad_norm": 2.1087075767026042, "learning_rate": 1.2457012161801484e-05, "loss": 0.1993, "step": 9111 }, { "epoch": 0.7773417505545129, "grad_norm": 1.3717763565504209, "learning_rate": 1.2447889137898293e-05, "loss": 0.1435, "step": 9112 }, { "epoch": 0.77742706022863, "grad_norm": 1.7090672412945354, "learning_rate": 1.2438768980899208e-05, "loss": 0.1722, "step": 9113 }, { "epoch": 0.777512369902747, "grad_norm": 1.4323320787681912, "learning_rate": 1.2429651691500515e-05, "loss": 0.1215, "step": 9114 }, { "epoch": 0.777597679576864, "grad_norm": 1.5456622986201523, "learning_rate": 1.2420537270398276e-05, "loss": 0.2709, "step": 9115 }, { "epoch": 0.7776829892509811, "grad_norm": 1.6851409387931924, "learning_rate": 1.2411425718288311e-05, "loss": 0.1329, "step": 9116 }, { "epoch": 0.7777682989250981, "grad_norm": 1.8247847077350632, "learning_rate": 1.2402317035866251e-05, "loss": 0.1953, "step": 9117 }, { "epoch": 0.7778536085992152, "grad_norm": 1.558412393138611, "learning_rate": 1.2393211223827494e-05, "loss": 0.1205, "step": 9118 }, { "epoch": 0.7779389182733322, "grad_norm": 1.7793042447621112, "learning_rate": 1.2384108282867218e-05, "loss": 0.183, "step": 9119 }, { "epoch": 0.7780242279474492, "grad_norm": 1.6103246905211315, "learning_rate": 1.2375008213680367e-05, "loss": 0.1713, "step": 9120 }, { "epoch": 0.7781095376215663, "grad_norm": 1.8749092715882607, "learning_rate": 1.236591101696173e-05, "loss": 0.1657, "step": 9121 }, { "epoch": 0.7781948472956833, "grad_norm": 2.0985975941524897, "learning_rate": 1.2356816693405766e-05, "loss": 0.159, "step": 9122 }, { "epoch": 0.7782801569698004, "grad_norm": 2.1329702242146924, "learning_rate": 1.2347725243706831e-05, "loss": 0.2069, "step": 9123 }, { "epoch": 0.7783654666439174, "grad_norm": 1.4384081086796885, "learning_rate": 1.233863666855899e-05, "loss": 0.1579, "step": 9124 }, { "epoch": 0.7784507763180345, "grad_norm": 1.9208669112022723, "learning_rate": 1.23295509686561e-05, "loss": 0.2027, "step": 9125 }, { "epoch": 0.7785360859921515, "grad_norm": 1.6970742911249255, "learning_rate": 1.2320468144691805e-05, "loss": 0.1955, "step": 9126 }, { "epoch": 0.7786213956662685, "grad_norm": 2.2828386601614308, "learning_rate": 1.2311388197359569e-05, "loss": 0.1959, "step": 9127 }, { "epoch": 0.7787067053403856, "grad_norm": 1.9509224641566674, "learning_rate": 1.2302311127352534e-05, "loss": 0.1651, "step": 9128 }, { "epoch": 0.7787920150145027, "grad_norm": 2.1846707167081028, "learning_rate": 1.2293236935363733e-05, "loss": 0.1935, "step": 9129 }, { "epoch": 0.7788773246886197, "grad_norm": 1.5758136403618261, "learning_rate": 1.2284165622085924e-05, "loss": 0.1702, "step": 9130 }, { "epoch": 0.7789626343627367, "grad_norm": 1.652275931767968, "learning_rate": 1.2275097188211643e-05, "loss": 0.1771, "step": 9131 }, { "epoch": 0.7790479440368537, "grad_norm": 1.7402276618048826, "learning_rate": 1.2266031634433223e-05, "loss": 0.2439, "step": 9132 }, { "epoch": 0.7791332537109709, "grad_norm": 1.6198831841066093, "learning_rate": 1.2256968961442755e-05, "loss": 0.2053, "step": 9133 }, { "epoch": 0.7792185633850879, "grad_norm": 2.0642898191825116, "learning_rate": 1.224790916993217e-05, "loss": 0.2298, "step": 9134 }, { "epoch": 0.7793038730592049, "grad_norm": 1.9049725864470455, "learning_rate": 1.223885226059308e-05, "loss": 0.2051, "step": 9135 }, { "epoch": 0.7793891827333219, "grad_norm": 2.67538453662707, "learning_rate": 1.2229798234116968e-05, "loss": 0.2044, "step": 9136 }, { "epoch": 0.779474492407439, "grad_norm": 1.4788892870946346, "learning_rate": 1.222074709119505e-05, "loss": 0.1541, "step": 9137 }, { "epoch": 0.7795598020815561, "grad_norm": 1.3134929897032697, "learning_rate": 1.2211698832518331e-05, "loss": 0.1554, "step": 9138 }, { "epoch": 0.7796451117556731, "grad_norm": 1.947159713573798, "learning_rate": 1.2202653458777602e-05, "loss": 0.2795, "step": 9139 }, { "epoch": 0.7797304214297901, "grad_norm": 2.1639889040114255, "learning_rate": 1.2193610970663427e-05, "loss": 0.2363, "step": 9140 }, { "epoch": 0.7798157311039072, "grad_norm": 2.417774889050578, "learning_rate": 1.218457136886615e-05, "loss": 0.2241, "step": 9141 }, { "epoch": 0.7799010407780242, "grad_norm": 1.8593281972428257, "learning_rate": 1.2175534654075888e-05, "loss": 0.2403, "step": 9142 }, { "epoch": 0.7799863504521413, "grad_norm": 1.5946505377191957, "learning_rate": 1.2166500826982564e-05, "loss": 0.1711, "step": 9143 }, { "epoch": 0.7800716601262583, "grad_norm": 1.1894933133155836, "learning_rate": 1.2157469888275858e-05, "loss": 0.1521, "step": 9144 }, { "epoch": 0.7801569698003754, "grad_norm": 1.8492471245545685, "learning_rate": 1.214844183864523e-05, "loss": 0.1985, "step": 9145 }, { "epoch": 0.7802422794744924, "grad_norm": 2.0806744074548633, "learning_rate": 1.2139416678779913e-05, "loss": 0.2207, "step": 9146 }, { "epoch": 0.7803275891486094, "grad_norm": 1.8167249830609362, "learning_rate": 1.2130394409368967e-05, "loss": 0.2015, "step": 9147 }, { "epoch": 0.7804128988227265, "grad_norm": 1.3014857128705926, "learning_rate": 1.212137503110114e-05, "loss": 0.14, "step": 9148 }, { "epoch": 0.7804982084968436, "grad_norm": 1.5045825577026724, "learning_rate": 1.211235854466506e-05, "loss": 0.2446, "step": 9149 }, { "epoch": 0.7805835181709606, "grad_norm": 1.8562890739053195, "learning_rate": 1.2103344950749069e-05, "loss": 0.1318, "step": 9150 }, { "epoch": 0.7806688278450776, "grad_norm": 1.422694714029898, "learning_rate": 1.2094334250041312e-05, "loss": 0.1334, "step": 9151 }, { "epoch": 0.7807541375191946, "grad_norm": 1.7777865015584102, "learning_rate": 1.20853264432297e-05, "loss": 0.1897, "step": 9152 }, { "epoch": 0.7808394471933118, "grad_norm": 1.5893012393542143, "learning_rate": 1.2076321531001933e-05, "loss": 0.1353, "step": 9153 }, { "epoch": 0.7809247568674288, "grad_norm": 1.8624072954288953, "learning_rate": 1.2067319514045494e-05, "loss": 0.1965, "step": 9154 }, { "epoch": 0.7810100665415458, "grad_norm": 1.4985764424145223, "learning_rate": 1.2058320393047624e-05, "loss": 0.154, "step": 9155 }, { "epoch": 0.7810953762156628, "grad_norm": 1.7423614962466616, "learning_rate": 1.2049324168695398e-05, "loss": 0.2263, "step": 9156 }, { "epoch": 0.7811806858897798, "grad_norm": 2.100285138345152, "learning_rate": 1.2040330841675573e-05, "loss": 0.2094, "step": 9157 }, { "epoch": 0.781265995563897, "grad_norm": 1.893994190348141, "learning_rate": 1.2031340412674785e-05, "loss": 0.1682, "step": 9158 }, { "epoch": 0.781351305238014, "grad_norm": 1.7209180162974467, "learning_rate": 1.2022352882379389e-05, "loss": 0.1856, "step": 9159 }, { "epoch": 0.781436614912131, "grad_norm": 2.1832669308686374, "learning_rate": 1.2013368251475542e-05, "loss": 0.2822, "step": 9160 }, { "epoch": 0.781521924586248, "grad_norm": 1.4577224360179035, "learning_rate": 1.2004386520649164e-05, "loss": 0.1265, "step": 9161 }, { "epoch": 0.7816072342603652, "grad_norm": 1.7871889523992024, "learning_rate": 1.1995407690585953e-05, "loss": 0.1584, "step": 9162 }, { "epoch": 0.7816925439344822, "grad_norm": 2.085108903195129, "learning_rate": 1.198643176197144e-05, "loss": 0.1913, "step": 9163 }, { "epoch": 0.7817778536085992, "grad_norm": 1.5101027414598003, "learning_rate": 1.1977458735490826e-05, "loss": 0.2148, "step": 9164 }, { "epoch": 0.7818631632827162, "grad_norm": 2.0660484307032774, "learning_rate": 1.1968488611829204e-05, "loss": 0.2042, "step": 9165 }, { "epoch": 0.7819484729568333, "grad_norm": 1.5873504508157088, "learning_rate": 1.1959521391671374e-05, "loss": 0.1895, "step": 9166 }, { "epoch": 0.7820337826309504, "grad_norm": 2.1121471132858196, "learning_rate": 1.1950557075701941e-05, "loss": 0.2218, "step": 9167 }, { "epoch": 0.7821190923050674, "grad_norm": 1.9662655488676855, "learning_rate": 1.1941595664605265e-05, "loss": 0.2493, "step": 9168 }, { "epoch": 0.7822044019791844, "grad_norm": 1.627162374936558, "learning_rate": 1.1932637159065546e-05, "loss": 0.1567, "step": 9169 }, { "epoch": 0.7822897116533015, "grad_norm": 1.795398802380302, "learning_rate": 1.1923681559766663e-05, "loss": 0.185, "step": 9170 }, { "epoch": 0.7823750213274185, "grad_norm": 1.8003200511380029, "learning_rate": 1.1914728867392371e-05, "loss": 0.2341, "step": 9171 }, { "epoch": 0.7824603310015356, "grad_norm": 1.7714542272793004, "learning_rate": 1.190577908262614e-05, "loss": 0.2158, "step": 9172 }, { "epoch": 0.7825456406756526, "grad_norm": 1.208565684154802, "learning_rate": 1.1896832206151248e-05, "loss": 0.2242, "step": 9173 }, { "epoch": 0.7826309503497697, "grad_norm": 1.8205001144974904, "learning_rate": 1.1887888238650736e-05, "loss": 0.212, "step": 9174 }, { "epoch": 0.7827162600238867, "grad_norm": 2.4283970782261606, "learning_rate": 1.1878947180807415e-05, "loss": 0.1837, "step": 9175 }, { "epoch": 0.7828015696980037, "grad_norm": 1.5703862870374081, "learning_rate": 1.187000903330393e-05, "loss": 0.1844, "step": 9176 }, { "epoch": 0.7828868793721208, "grad_norm": 1.9617717533706611, "learning_rate": 1.1861073796822608e-05, "loss": 0.217, "step": 9177 }, { "epoch": 0.7829721890462379, "grad_norm": 1.8566407046166549, "learning_rate": 1.185214147204564e-05, "loss": 0.2195, "step": 9178 }, { "epoch": 0.7830574987203549, "grad_norm": 2.062627615015046, "learning_rate": 1.1843212059654957e-05, "loss": 0.1938, "step": 9179 }, { "epoch": 0.7831428083944719, "grad_norm": 1.7514843095517114, "learning_rate": 1.1834285560332264e-05, "loss": 0.1532, "step": 9180 }, { "epoch": 0.7832281180685889, "grad_norm": 1.9596609390641408, "learning_rate": 1.1825361974759063e-05, "loss": 0.2137, "step": 9181 }, { "epoch": 0.7833134277427061, "grad_norm": 1.2007018619619816, "learning_rate": 1.181644130361661e-05, "loss": 0.1476, "step": 9182 }, { "epoch": 0.7833987374168231, "grad_norm": 1.3921365559480146, "learning_rate": 1.1807523547585958e-05, "loss": 0.1726, "step": 9183 }, { "epoch": 0.7834840470909401, "grad_norm": 1.7919094305652372, "learning_rate": 1.1798608707347913e-05, "loss": 0.1618, "step": 9184 }, { "epoch": 0.7835693567650571, "grad_norm": 1.4911159642521625, "learning_rate": 1.1789696783583121e-05, "loss": 0.1885, "step": 9185 }, { "epoch": 0.7836546664391743, "grad_norm": 1.6940247879376549, "learning_rate": 1.1780787776971901e-05, "loss": 0.2407, "step": 9186 }, { "epoch": 0.7837399761132913, "grad_norm": 1.7217405484802661, "learning_rate": 1.1771881688194452e-05, "loss": 0.2106, "step": 9187 }, { "epoch": 0.7838252857874083, "grad_norm": 1.6937027421390236, "learning_rate": 1.1762978517930678e-05, "loss": 0.139, "step": 9188 }, { "epoch": 0.7839105954615253, "grad_norm": 1.7438009001710189, "learning_rate": 1.175407826686033e-05, "loss": 0.1824, "step": 9189 }, { "epoch": 0.7839959051356424, "grad_norm": 1.8799608724661756, "learning_rate": 1.1745180935662842e-05, "loss": 0.2377, "step": 9190 }, { "epoch": 0.7840812148097595, "grad_norm": 1.6469982064937423, "learning_rate": 1.173628652501752e-05, "loss": 0.1278, "step": 9191 }, { "epoch": 0.7841665244838765, "grad_norm": 2.532290021319674, "learning_rate": 1.1727395035603384e-05, "loss": 0.2031, "step": 9192 }, { "epoch": 0.7842518341579935, "grad_norm": 2.2779406228302213, "learning_rate": 1.1718506468099254e-05, "loss": 0.2133, "step": 9193 }, { "epoch": 0.7843371438321105, "grad_norm": 1.9681206560132376, "learning_rate": 1.1709620823183737e-05, "loss": 0.1715, "step": 9194 }, { "epoch": 0.7844224535062276, "grad_norm": 1.5828971847670128, "learning_rate": 1.1700738101535186e-05, "loss": 0.2234, "step": 9195 }, { "epoch": 0.7845077631803447, "grad_norm": 1.369821420015231, "learning_rate": 1.1691858303831766e-05, "loss": 0.1301, "step": 9196 }, { "epoch": 0.7845930728544617, "grad_norm": 1.5412496913590938, "learning_rate": 1.1682981430751378e-05, "loss": 0.1628, "step": 9197 }, { "epoch": 0.7846783825285787, "grad_norm": 1.544570439197995, "learning_rate": 1.1674107482971769e-05, "loss": 0.1949, "step": 9198 }, { "epoch": 0.7847636922026958, "grad_norm": 1.804734068430725, "learning_rate": 1.166523646117036e-05, "loss": 0.2184, "step": 9199 }, { "epoch": 0.7848490018768128, "grad_norm": 1.4111101300485627, "learning_rate": 1.1656368366024451e-05, "loss": 0.1872, "step": 9200 }, { "epoch": 0.7849343115509299, "grad_norm": 1.7455330104061282, "learning_rate": 1.1647503198211063e-05, "loss": 0.214, "step": 9201 }, { "epoch": 0.7850196212250469, "grad_norm": 1.6834825802098081, "learning_rate": 1.1638640958406999e-05, "loss": 0.2073, "step": 9202 }, { "epoch": 0.785104930899164, "grad_norm": 1.727443832610548, "learning_rate": 1.1629781647288846e-05, "loss": 0.2362, "step": 9203 }, { "epoch": 0.785190240573281, "grad_norm": 2.1731331014934727, "learning_rate": 1.1620925265532951e-05, "loss": 0.1939, "step": 9204 }, { "epoch": 0.785275550247398, "grad_norm": 1.3695661506623327, "learning_rate": 1.1612071813815496e-05, "loss": 0.2155, "step": 9205 }, { "epoch": 0.785360859921515, "grad_norm": 2.3678459032412946, "learning_rate": 1.1603221292812332e-05, "loss": 0.1862, "step": 9206 }, { "epoch": 0.7854461695956322, "grad_norm": 1.5727886964727467, "learning_rate": 1.1594373703199195e-05, "loss": 0.168, "step": 9207 }, { "epoch": 0.7855314792697492, "grad_norm": 1.807938722838003, "learning_rate": 1.1585529045651544e-05, "loss": 0.2472, "step": 9208 }, { "epoch": 0.7856167889438662, "grad_norm": 1.5098857955186655, "learning_rate": 1.1576687320844615e-05, "loss": 0.1413, "step": 9209 }, { "epoch": 0.7857020986179832, "grad_norm": 1.9930884624013856, "learning_rate": 1.1567848529453411e-05, "loss": 0.2053, "step": 9210 }, { "epoch": 0.7857874082921004, "grad_norm": 1.8506267774599783, "learning_rate": 1.1559012672152775e-05, "loss": 0.2028, "step": 9211 }, { "epoch": 0.7858727179662174, "grad_norm": 2.2407502675386675, "learning_rate": 1.1550179749617219e-05, "loss": 0.1572, "step": 9212 }, { "epoch": 0.7859580276403344, "grad_norm": 1.7873814443473015, "learning_rate": 1.1541349762521126e-05, "loss": 0.2369, "step": 9213 }, { "epoch": 0.7860433373144514, "grad_norm": 1.5270930325910037, "learning_rate": 1.1532522711538613e-05, "loss": 0.194, "step": 9214 }, { "epoch": 0.7861286469885685, "grad_norm": 1.9748818712277338, "learning_rate": 1.1523698597343575e-05, "loss": 0.1745, "step": 9215 }, { "epoch": 0.7862139566626856, "grad_norm": 1.9778248973595915, "learning_rate": 1.1514877420609688e-05, "loss": 0.2283, "step": 9216 }, { "epoch": 0.7862992663368026, "grad_norm": 1.4530411562778112, "learning_rate": 1.1506059182010393e-05, "loss": 0.1854, "step": 9217 }, { "epoch": 0.7863845760109196, "grad_norm": 1.3414005765978232, "learning_rate": 1.1497243882218928e-05, "loss": 0.1899, "step": 9218 }, { "epoch": 0.7864698856850367, "grad_norm": 1.3751415699991303, "learning_rate": 1.1488431521908278e-05, "loss": 0.2242, "step": 9219 }, { "epoch": 0.7865551953591537, "grad_norm": 2.124097969238322, "learning_rate": 1.1479622101751242e-05, "loss": 0.2217, "step": 9220 }, { "epoch": 0.7866405050332708, "grad_norm": 1.615070154774442, "learning_rate": 1.1470815622420362e-05, "loss": 0.1936, "step": 9221 }, { "epoch": 0.7867258147073878, "grad_norm": 1.8422918210893557, "learning_rate": 1.1462012084587964e-05, "loss": 0.1885, "step": 9222 }, { "epoch": 0.7868111243815049, "grad_norm": 1.6507713826683919, "learning_rate": 1.1453211488926153e-05, "loss": 0.1291, "step": 9223 }, { "epoch": 0.7868964340556219, "grad_norm": 1.5620492666536632, "learning_rate": 1.1444413836106804e-05, "loss": 0.1353, "step": 9224 }, { "epoch": 0.786981743729739, "grad_norm": 1.1494172776014402, "learning_rate": 1.1435619126801584e-05, "loss": 0.1081, "step": 9225 }, { "epoch": 0.787067053403856, "grad_norm": 1.7135730806232685, "learning_rate": 1.142682736168189e-05, "loss": 0.2041, "step": 9226 }, { "epoch": 0.7871523630779731, "grad_norm": 1.8221444665141522, "learning_rate": 1.141803854141898e-05, "loss": 0.2489, "step": 9227 }, { "epoch": 0.7872376727520901, "grad_norm": 1.9517491491899068, "learning_rate": 1.1409252666683778e-05, "loss": 0.2307, "step": 9228 }, { "epoch": 0.7873229824262071, "grad_norm": 1.6961572186276643, "learning_rate": 1.1400469738147074e-05, "loss": 0.1391, "step": 9229 }, { "epoch": 0.7874082921003241, "grad_norm": 2.510545233973324, "learning_rate": 1.139168975647939e-05, "loss": 0.1706, "step": 9230 }, { "epoch": 0.7874936017744413, "grad_norm": 1.8471342159840825, "learning_rate": 1.1382912722351024e-05, "loss": 0.2216, "step": 9231 }, { "epoch": 0.7875789114485583, "grad_norm": 1.634177554514302, "learning_rate": 1.1374138636432053e-05, "loss": 0.2731, "step": 9232 }, { "epoch": 0.7876642211226753, "grad_norm": 1.3745346582371982, "learning_rate": 1.136536749939235e-05, "loss": 0.1255, "step": 9233 }, { "epoch": 0.7877495307967923, "grad_norm": 1.6025183825695695, "learning_rate": 1.1356599311901534e-05, "loss": 0.1461, "step": 9234 }, { "epoch": 0.7878348404709093, "grad_norm": 1.6626668459125618, "learning_rate": 1.134783407462901e-05, "loss": 0.234, "step": 9235 }, { "epoch": 0.7879201501450265, "grad_norm": 2.1300140018629747, "learning_rate": 1.133907178824396e-05, "loss": 0.1967, "step": 9236 }, { "epoch": 0.7880054598191435, "grad_norm": 1.7838711108959937, "learning_rate": 1.1330312453415332e-05, "loss": 0.1651, "step": 9237 }, { "epoch": 0.7880907694932605, "grad_norm": 1.56677348334142, "learning_rate": 1.1321556070811861e-05, "loss": 0.2143, "step": 9238 }, { "epoch": 0.7881760791673775, "grad_norm": 2.081526646828601, "learning_rate": 1.1312802641102033e-05, "loss": 0.2223, "step": 9239 }, { "epoch": 0.7882613888414947, "grad_norm": 1.3159659806872097, "learning_rate": 1.1304052164954165e-05, "loss": 0.1598, "step": 9240 }, { "epoch": 0.7883466985156117, "grad_norm": 1.5609802484625126, "learning_rate": 1.1295304643036252e-05, "loss": 0.1437, "step": 9241 }, { "epoch": 0.7884320081897287, "grad_norm": 1.8440788105157844, "learning_rate": 1.1286560076016172e-05, "loss": 0.182, "step": 9242 }, { "epoch": 0.7885173178638457, "grad_norm": 1.7431657721674039, "learning_rate": 1.1277818464561507e-05, "loss": 0.1908, "step": 9243 }, { "epoch": 0.7886026275379628, "grad_norm": 1.8333195628024723, "learning_rate": 1.1269079809339633e-05, "loss": 0.1541, "step": 9244 }, { "epoch": 0.7886879372120799, "grad_norm": 1.7832998560796571, "learning_rate": 1.1260344111017701e-05, "loss": 0.1447, "step": 9245 }, { "epoch": 0.7887732468861969, "grad_norm": 1.3568073445958055, "learning_rate": 1.1251611370262632e-05, "loss": 0.1527, "step": 9246 }, { "epoch": 0.7888585565603139, "grad_norm": 1.5669760107797996, "learning_rate": 1.1242881587741127e-05, "loss": 0.126, "step": 9247 }, { "epoch": 0.788943866234431, "grad_norm": 1.6718342143280098, "learning_rate": 1.1234154764119642e-05, "loss": 0.1841, "step": 9248 }, { "epoch": 0.789029175908548, "grad_norm": 2.041819261829934, "learning_rate": 1.1225430900064455e-05, "loss": 0.1735, "step": 9249 }, { "epoch": 0.7891144855826651, "grad_norm": 2.0491644016960238, "learning_rate": 1.121670999624157e-05, "loss": 0.1742, "step": 9250 }, { "epoch": 0.7891997952567821, "grad_norm": 1.577655155755391, "learning_rate": 1.1207992053316778e-05, "loss": 0.2014, "step": 9251 }, { "epoch": 0.7892851049308992, "grad_norm": 1.8619575196899647, "learning_rate": 1.1199277071955649e-05, "loss": 0.1678, "step": 9252 }, { "epoch": 0.7893704146050162, "grad_norm": 1.8968934110096787, "learning_rate": 1.1190565052823548e-05, "loss": 0.2548, "step": 9253 }, { "epoch": 0.7894557242791332, "grad_norm": 1.2845796259652207, "learning_rate": 1.118185599658555e-05, "loss": 0.1506, "step": 9254 }, { "epoch": 0.7895410339532503, "grad_norm": 1.784842699423448, "learning_rate": 1.1173149903906577e-05, "loss": 0.1496, "step": 9255 }, { "epoch": 0.7896263436273674, "grad_norm": 2.7086657589081464, "learning_rate": 1.1164446775451282e-05, "loss": 0.2248, "step": 9256 }, { "epoch": 0.7897116533014844, "grad_norm": 1.5700543699144003, "learning_rate": 1.1155746611884105e-05, "loss": 0.1843, "step": 9257 }, { "epoch": 0.7897969629756014, "grad_norm": 2.2340822895095087, "learning_rate": 1.1147049413869259e-05, "loss": 0.1929, "step": 9258 }, { "epoch": 0.7898822726497184, "grad_norm": 2.0322174116054184, "learning_rate": 1.1138355182070725e-05, "loss": 0.2238, "step": 9259 }, { "epoch": 0.7899675823238356, "grad_norm": 1.8393741340388892, "learning_rate": 1.112966391715226e-05, "loss": 0.1658, "step": 9260 }, { "epoch": 0.7900528919979526, "grad_norm": 2.0841097592904103, "learning_rate": 1.1120975619777384e-05, "loss": 0.1385, "step": 9261 }, { "epoch": 0.7901382016720696, "grad_norm": 1.423557334235682, "learning_rate": 1.1112290290609445e-05, "loss": 0.1674, "step": 9262 }, { "epoch": 0.7902235113461866, "grad_norm": 1.4107245148258578, "learning_rate": 1.1103607930311466e-05, "loss": 0.1904, "step": 9263 }, { "epoch": 0.7903088210203038, "grad_norm": 1.6484503722931212, "learning_rate": 1.1094928539546346e-05, "loss": 0.1527, "step": 9264 }, { "epoch": 0.7903941306944208, "grad_norm": 1.7215011772442022, "learning_rate": 1.1086252118976682e-05, "loss": 0.2092, "step": 9265 }, { "epoch": 0.7904794403685378, "grad_norm": 2.16488422533549, "learning_rate": 1.1077578669264888e-05, "loss": 0.2073, "step": 9266 }, { "epoch": 0.7905647500426548, "grad_norm": 1.6514941617312184, "learning_rate": 1.1068908191073123e-05, "loss": 0.1672, "step": 9267 }, { "epoch": 0.7906500597167719, "grad_norm": 2.3119217433158927, "learning_rate": 1.1060240685063328e-05, "loss": 0.2206, "step": 9268 }, { "epoch": 0.790735369390889, "grad_norm": 1.704215164551531, "learning_rate": 1.1051576151897258e-05, "loss": 0.2451, "step": 9269 }, { "epoch": 0.790820679065006, "grad_norm": 1.7919002498980563, "learning_rate": 1.1042914592236347e-05, "loss": 0.1988, "step": 9270 }, { "epoch": 0.790905988739123, "grad_norm": 1.9708264012715515, "learning_rate": 1.1034256006741906e-05, "loss": 0.1453, "step": 9271 }, { "epoch": 0.79099129841324, "grad_norm": 2.1156629866059213, "learning_rate": 1.1025600396074954e-05, "loss": 0.1243, "step": 9272 }, { "epoch": 0.7910766080873571, "grad_norm": 2.0937930430073077, "learning_rate": 1.1016947760896301e-05, "loss": 0.1708, "step": 9273 }, { "epoch": 0.7911619177614742, "grad_norm": 1.5983183964630352, "learning_rate": 1.1008298101866515e-05, "loss": 0.1208, "step": 9274 }, { "epoch": 0.7912472274355912, "grad_norm": 1.7453088553722718, "learning_rate": 1.0999651419646e-05, "loss": 0.2056, "step": 9275 }, { "epoch": 0.7913325371097082, "grad_norm": 2.0011621913195334, "learning_rate": 1.0991007714894824e-05, "loss": 0.148, "step": 9276 }, { "epoch": 0.7914178467838253, "grad_norm": 1.7573061065291606, "learning_rate": 1.0982366988272924e-05, "loss": 0.2444, "step": 9277 }, { "epoch": 0.7915031564579423, "grad_norm": 1.7382786440026112, "learning_rate": 1.0973729240439967e-05, "loss": 0.1623, "step": 9278 }, { "epoch": 0.7915884661320594, "grad_norm": 1.641527926525538, "learning_rate": 1.0965094472055398e-05, "loss": 0.1906, "step": 9279 }, { "epoch": 0.7916737758061764, "grad_norm": 1.5293284445825566, "learning_rate": 1.0956462683778435e-05, "loss": 0.1854, "step": 9280 }, { "epoch": 0.7917590854802935, "grad_norm": 1.7999707961362483, "learning_rate": 1.0947833876268055e-05, "loss": 0.1701, "step": 9281 }, { "epoch": 0.7918443951544105, "grad_norm": 1.886098692673164, "learning_rate": 1.0939208050183064e-05, "loss": 0.1703, "step": 9282 }, { "epoch": 0.7919297048285275, "grad_norm": 1.5705368109527058, "learning_rate": 1.0930585206181942e-05, "loss": 0.1902, "step": 9283 }, { "epoch": 0.7920150145026446, "grad_norm": 1.6523900668098455, "learning_rate": 1.0921965344923035e-05, "loss": 0.1602, "step": 9284 }, { "epoch": 0.7921003241767617, "grad_norm": 1.4207288712986645, "learning_rate": 1.0913348467064417e-05, "loss": 0.129, "step": 9285 }, { "epoch": 0.7921856338508787, "grad_norm": 1.7793566361911048, "learning_rate": 1.0904734573263935e-05, "loss": 0.1989, "step": 9286 }, { "epoch": 0.7922709435249957, "grad_norm": 1.6187275358430866, "learning_rate": 1.089612366417922e-05, "loss": 0.2132, "step": 9287 }, { "epoch": 0.7923562531991127, "grad_norm": 1.7151375694817967, "learning_rate": 1.0887515740467662e-05, "loss": 0.216, "step": 9288 }, { "epoch": 0.7924415628732299, "grad_norm": 1.6836604483507285, "learning_rate": 1.0878910802786436e-05, "loss": 0.1351, "step": 9289 }, { "epoch": 0.7925268725473469, "grad_norm": 2.4216739694161866, "learning_rate": 1.0870308851792466e-05, "loss": 0.1516, "step": 9290 }, { "epoch": 0.7926121822214639, "grad_norm": 1.887581258881486, "learning_rate": 1.0861709888142507e-05, "loss": 0.201, "step": 9291 }, { "epoch": 0.7926974918955809, "grad_norm": 1.7442254433507314, "learning_rate": 1.085311391249299e-05, "loss": 0.1119, "step": 9292 }, { "epoch": 0.792782801569698, "grad_norm": 1.6139292304679478, "learning_rate": 1.0844520925500218e-05, "loss": 0.1975, "step": 9293 }, { "epoch": 0.7928681112438151, "grad_norm": 2.318429716050629, "learning_rate": 1.0835930927820181e-05, "loss": 0.2184, "step": 9294 }, { "epoch": 0.7929534209179321, "grad_norm": 2.034341191305333, "learning_rate": 1.0827343920108729e-05, "loss": 0.2019, "step": 9295 }, { "epoch": 0.7930387305920491, "grad_norm": 1.9472735834293864, "learning_rate": 1.0818759903021381e-05, "loss": 0.208, "step": 9296 }, { "epoch": 0.7931240402661662, "grad_norm": 1.5290394611443396, "learning_rate": 1.0810178877213517e-05, "loss": 0.157, "step": 9297 }, { "epoch": 0.7932093499402832, "grad_norm": 2.0605908429846966, "learning_rate": 1.0801600843340243e-05, "loss": 0.1896, "step": 9298 }, { "epoch": 0.7932946596144003, "grad_norm": 1.8259023212450072, "learning_rate": 1.0793025802056445e-05, "loss": 0.2022, "step": 9299 }, { "epoch": 0.7933799692885173, "grad_norm": 1.7978352510208582, "learning_rate": 1.0784453754016776e-05, "loss": 0.1856, "step": 9300 }, { "epoch": 0.7934652789626344, "grad_norm": 1.568549005771018, "learning_rate": 1.0775884699875676e-05, "loss": 0.1988, "step": 9301 }, { "epoch": 0.7935505886367514, "grad_norm": 1.5136884922222955, "learning_rate": 1.0767318640287343e-05, "loss": 0.1026, "step": 9302 }, { "epoch": 0.7936358983108684, "grad_norm": 1.6655020777592888, "learning_rate": 1.0758755575905732e-05, "loss": 0.1502, "step": 9303 }, { "epoch": 0.7937212079849855, "grad_norm": 1.4982677630092864, "learning_rate": 1.0750195507384637e-05, "loss": 0.1204, "step": 9304 }, { "epoch": 0.7938065176591026, "grad_norm": 2.069666462992501, "learning_rate": 1.074163843537751e-05, "loss": 0.2383, "step": 9305 }, { "epoch": 0.7938918273332196, "grad_norm": 1.91199397205456, "learning_rate": 1.0733084360537687e-05, "loss": 0.1273, "step": 9306 }, { "epoch": 0.7939771370073366, "grad_norm": 1.9702976743547223, "learning_rate": 1.0724533283518206e-05, "loss": 0.1889, "step": 9307 }, { "epoch": 0.7940624466814536, "grad_norm": 1.9253262181498403, "learning_rate": 1.0715985204971901e-05, "loss": 0.208, "step": 9308 }, { "epoch": 0.7941477563555707, "grad_norm": 1.7962464933420885, "learning_rate": 1.0707440125551372e-05, "loss": 0.1308, "step": 9309 }, { "epoch": 0.7942330660296878, "grad_norm": 1.6313741819388285, "learning_rate": 1.0698898045908972e-05, "loss": 0.1905, "step": 9310 }, { "epoch": 0.7943183757038048, "grad_norm": 1.4226751067552645, "learning_rate": 1.069035896669689e-05, "loss": 0.1663, "step": 9311 }, { "epoch": 0.7944036853779218, "grad_norm": 1.5916857347128481, "learning_rate": 1.0681822888566984e-05, "loss": 0.1904, "step": 9312 }, { "epoch": 0.7944889950520388, "grad_norm": 1.4581430401853137, "learning_rate": 1.0673289812170972e-05, "loss": 0.1686, "step": 9313 }, { "epoch": 0.794574304726156, "grad_norm": 2.196314789929627, "learning_rate": 1.0664759738160307e-05, "loss": 0.2055, "step": 9314 }, { "epoch": 0.794659614400273, "grad_norm": 1.4073841184135536, "learning_rate": 1.0656232667186206e-05, "loss": 0.1985, "step": 9315 }, { "epoch": 0.79474492407439, "grad_norm": 1.8874089654859718, "learning_rate": 1.0647708599899653e-05, "loss": 0.1521, "step": 9316 }, { "epoch": 0.794830233748507, "grad_norm": 2.608617432776124, "learning_rate": 1.0639187536951462e-05, "loss": 0.175, "step": 9317 }, { "epoch": 0.7949155434226242, "grad_norm": 2.0877562177198, "learning_rate": 1.0630669478992105e-05, "loss": 0.2127, "step": 9318 }, { "epoch": 0.7950008530967412, "grad_norm": 2.019685189672108, "learning_rate": 1.0622154426671948e-05, "loss": 0.1464, "step": 9319 }, { "epoch": 0.7950861627708582, "grad_norm": 1.863175683059986, "learning_rate": 1.0613642380641042e-05, "loss": 0.1406, "step": 9320 }, { "epoch": 0.7951714724449752, "grad_norm": 2.0171330451263554, "learning_rate": 1.0605133341549239e-05, "loss": 0.2295, "step": 9321 }, { "epoch": 0.7952567821190923, "grad_norm": 1.641148874111944, "learning_rate": 1.0596627310046165e-05, "loss": 0.1857, "step": 9322 }, { "epoch": 0.7953420917932094, "grad_norm": 1.7097791120848278, "learning_rate": 1.0588124286781204e-05, "loss": 0.1939, "step": 9323 }, { "epoch": 0.7954274014673264, "grad_norm": 1.8970925547292379, "learning_rate": 1.057962427240352e-05, "loss": 0.1501, "step": 9324 }, { "epoch": 0.7955127111414434, "grad_norm": 1.6293533406868423, "learning_rate": 1.0571127267562031e-05, "loss": 0.1188, "step": 9325 }, { "epoch": 0.7955980208155605, "grad_norm": 2.2539117505619624, "learning_rate": 1.0562633272905464e-05, "loss": 0.2544, "step": 9326 }, { "epoch": 0.7956833304896775, "grad_norm": 1.5693644988066813, "learning_rate": 1.0554142289082275e-05, "loss": 0.1893, "step": 9327 }, { "epoch": 0.7957686401637946, "grad_norm": 1.6891344420350451, "learning_rate": 1.0545654316740705e-05, "loss": 0.1934, "step": 9328 }, { "epoch": 0.7958539498379116, "grad_norm": 1.4358852273187104, "learning_rate": 1.0537169356528775e-05, "loss": 0.2321, "step": 9329 }, { "epoch": 0.7959392595120287, "grad_norm": 2.2824325631782734, "learning_rate": 1.0528687409094251e-05, "loss": 0.2267, "step": 9330 }, { "epoch": 0.7960245691861457, "grad_norm": 2.0071684401781815, "learning_rate": 1.0520208475084698e-05, "loss": 0.1956, "step": 9331 }, { "epoch": 0.7961098788602627, "grad_norm": 1.3542524261706392, "learning_rate": 1.0511732555147419e-05, "loss": 0.1408, "step": 9332 }, { "epoch": 0.7961951885343798, "grad_norm": 1.6611956764449676, "learning_rate": 1.0503259649929542e-05, "loss": 0.1694, "step": 9333 }, { "epoch": 0.7962804982084969, "grad_norm": 1.950580740307047, "learning_rate": 1.0494789760077883e-05, "loss": 0.1976, "step": 9334 }, { "epoch": 0.7963658078826139, "grad_norm": 1.3477814989352206, "learning_rate": 1.0486322886239109e-05, "loss": 0.2098, "step": 9335 }, { "epoch": 0.7964511175567309, "grad_norm": 2.30533934604588, "learning_rate": 1.0477859029059606e-05, "loss": 0.2072, "step": 9336 }, { "epoch": 0.7965364272308479, "grad_norm": 2.7122483835552194, "learning_rate": 1.0469398189185542e-05, "loss": 0.1714, "step": 9337 }, { "epoch": 0.7966217369049651, "grad_norm": 1.6184475914522118, "learning_rate": 1.046094036726285e-05, "loss": 0.2177, "step": 9338 }, { "epoch": 0.7967070465790821, "grad_norm": 1.9515491689771864, "learning_rate": 1.0452485563937265e-05, "loss": 0.2091, "step": 9339 }, { "epoch": 0.7967923562531991, "grad_norm": 1.4586984540630268, "learning_rate": 1.0444033779854251e-05, "loss": 0.1202, "step": 9340 }, { "epoch": 0.7968776659273161, "grad_norm": 1.8245904123198424, "learning_rate": 1.043558501565906e-05, "loss": 0.1853, "step": 9341 }, { "epoch": 0.7969629756014333, "grad_norm": 2.1429149324708145, "learning_rate": 1.0427139271996705e-05, "loss": 0.1677, "step": 9342 }, { "epoch": 0.7970482852755503, "grad_norm": 1.5499149852941627, "learning_rate": 1.041869654951198e-05, "loss": 0.1918, "step": 9343 }, { "epoch": 0.7971335949496673, "grad_norm": 1.5545170369458507, "learning_rate": 1.0410256848849437e-05, "loss": 0.144, "step": 9344 }, { "epoch": 0.7972189046237843, "grad_norm": 1.6664466076849411, "learning_rate": 1.0401820170653387e-05, "loss": 0.1828, "step": 9345 }, { "epoch": 0.7973042142979014, "grad_norm": 2.0600736969662936, "learning_rate": 1.0393386515567972e-05, "loss": 0.1885, "step": 9346 }, { "epoch": 0.7973895239720185, "grad_norm": 2.427843185036449, "learning_rate": 1.0384955884237003e-05, "loss": 0.1596, "step": 9347 }, { "epoch": 0.7974748336461355, "grad_norm": 1.7075550663615455, "learning_rate": 1.0376528277304148e-05, "loss": 0.1881, "step": 9348 }, { "epoch": 0.7975601433202525, "grad_norm": 1.2315077497367781, "learning_rate": 1.0368103695412801e-05, "loss": 0.1891, "step": 9349 }, { "epoch": 0.7976454529943695, "grad_norm": 1.4260909070843115, "learning_rate": 1.0359682139206134e-05, "loss": 0.2058, "step": 9350 }, { "epoch": 0.7977307626684866, "grad_norm": 1.665447822580326, "learning_rate": 1.0351263609327083e-05, "loss": 0.1847, "step": 9351 }, { "epoch": 0.7978160723426037, "grad_norm": 1.5490180689211597, "learning_rate": 1.0342848106418368e-05, "loss": 0.1449, "step": 9352 }, { "epoch": 0.7979013820167207, "grad_norm": 2.254112511887452, "learning_rate": 1.0334435631122458e-05, "loss": 0.121, "step": 9353 }, { "epoch": 0.7979866916908377, "grad_norm": 1.5695117787833337, "learning_rate": 1.0326026184081595e-05, "loss": 0.188, "step": 9354 }, { "epoch": 0.7980720013649548, "grad_norm": 1.9730273209356035, "learning_rate": 1.031761976593782e-05, "loss": 0.1868, "step": 9355 }, { "epoch": 0.7981573110390718, "grad_norm": 1.5524713257431322, "learning_rate": 1.0309216377332898e-05, "loss": 0.1493, "step": 9356 }, { "epoch": 0.7982426207131889, "grad_norm": 1.8607830198085593, "learning_rate": 1.0300816018908393e-05, "loss": 0.1823, "step": 9357 }, { "epoch": 0.7983279303873059, "grad_norm": 2.11007588363077, "learning_rate": 1.029241869130561e-05, "loss": 0.1752, "step": 9358 }, { "epoch": 0.798413240061423, "grad_norm": 1.625794289943125, "learning_rate": 1.0284024395165682e-05, "loss": 0.1665, "step": 9359 }, { "epoch": 0.79849854973554, "grad_norm": 1.9645140354182316, "learning_rate": 1.0275633131129413e-05, "loss": 0.2091, "step": 9360 }, { "epoch": 0.798583859409657, "grad_norm": 1.476936333375975, "learning_rate": 1.0267244899837475e-05, "loss": 0.1818, "step": 9361 }, { "epoch": 0.798669169083774, "grad_norm": 1.757693545237197, "learning_rate": 1.0258859701930246e-05, "loss": 0.2215, "step": 9362 }, { "epoch": 0.7987544787578912, "grad_norm": 1.9424749921173299, "learning_rate": 1.0250477538047893e-05, "loss": 0.1996, "step": 9363 }, { "epoch": 0.7988397884320082, "grad_norm": 2.1710447192618276, "learning_rate": 1.0242098408830353e-05, "loss": 0.183, "step": 9364 }, { "epoch": 0.7989250981061252, "grad_norm": 1.5186714936989913, "learning_rate": 1.0233722314917326e-05, "loss": 0.1482, "step": 9365 }, { "epoch": 0.7990104077802422, "grad_norm": 1.76611145880038, "learning_rate": 1.0225349256948286e-05, "loss": 0.2002, "step": 9366 }, { "epoch": 0.7990957174543594, "grad_norm": 1.4898713195842377, "learning_rate": 1.0216979235562451e-05, "loss": 0.1375, "step": 9367 }, { "epoch": 0.7991810271284764, "grad_norm": 1.7581876905365987, "learning_rate": 1.0208612251398874e-05, "loss": 0.1672, "step": 9368 }, { "epoch": 0.7992663368025934, "grad_norm": 1.9621168139113725, "learning_rate": 1.020024830509627e-05, "loss": 0.184, "step": 9369 }, { "epoch": 0.7993516464767104, "grad_norm": 1.349388490921294, "learning_rate": 1.0191887397293232e-05, "loss": 0.1155, "step": 9370 }, { "epoch": 0.7994369561508275, "grad_norm": 1.6986534844630086, "learning_rate": 1.0183529528628044e-05, "loss": 0.2208, "step": 9371 }, { "epoch": 0.7995222658249446, "grad_norm": 2.032481597779547, "learning_rate": 1.0175174699738793e-05, "loss": 0.2015, "step": 9372 }, { "epoch": 0.7996075754990616, "grad_norm": 1.773299963907237, "learning_rate": 1.016682291126333e-05, "loss": 0.2331, "step": 9373 }, { "epoch": 0.7996928851731786, "grad_norm": 1.9652888436636815, "learning_rate": 1.0158474163839249e-05, "loss": 0.2063, "step": 9374 }, { "epoch": 0.7997781948472957, "grad_norm": 1.5381713455949646, "learning_rate": 1.015012845810397e-05, "loss": 0.1615, "step": 9375 }, { "epoch": 0.7998635045214127, "grad_norm": 1.608016468452463, "learning_rate": 1.0141785794694597e-05, "loss": 0.1505, "step": 9376 }, { "epoch": 0.7999488141955298, "grad_norm": 1.7670254175142353, "learning_rate": 1.013344617424809e-05, "loss": 0.2118, "step": 9377 }, { "epoch": 0.8000341238696468, "grad_norm": 1.8736757052238115, "learning_rate": 1.0125109597401111e-05, "loss": 0.1538, "step": 9378 }, { "epoch": 0.8001194335437639, "grad_norm": 1.7436340895108262, "learning_rate": 1.0116776064790123e-05, "loss": 0.1407, "step": 9379 }, { "epoch": 0.8002047432178809, "grad_norm": 1.4341926674902887, "learning_rate": 1.0108445577051329e-05, "loss": 0.1695, "step": 9380 }, { "epoch": 0.800290052891998, "grad_norm": 1.3083532767183044, "learning_rate": 1.0100118134820758e-05, "loss": 0.2191, "step": 9381 }, { "epoch": 0.800375362566115, "grad_norm": 1.8600863210069822, "learning_rate": 1.0091793738734113e-05, "loss": 0.1454, "step": 9382 }, { "epoch": 0.8004606722402321, "grad_norm": 1.7868516079471295, "learning_rate": 1.0083472389426956e-05, "loss": 0.1553, "step": 9383 }, { "epoch": 0.8005459819143491, "grad_norm": 1.372017747408353, "learning_rate": 1.0075154087534566e-05, "loss": 0.1628, "step": 9384 }, { "epoch": 0.8006312915884661, "grad_norm": 1.197018621209035, "learning_rate": 1.0066838833692004e-05, "loss": 0.1499, "step": 9385 }, { "epoch": 0.8007166012625831, "grad_norm": 1.6948179587224133, "learning_rate": 1.0058526628534093e-05, "loss": 0.1854, "step": 9386 }, { "epoch": 0.8008019109367002, "grad_norm": 1.4673014863639942, "learning_rate": 1.0050217472695405e-05, "loss": 0.2084, "step": 9387 }, { "epoch": 0.8008872206108173, "grad_norm": 1.2070330277216885, "learning_rate": 1.0041911366810353e-05, "loss": 0.1088, "step": 9388 }, { "epoch": 0.8009725302849343, "grad_norm": 1.8645493772458772, "learning_rate": 1.0033608311513004e-05, "loss": 0.1826, "step": 9389 }, { "epoch": 0.8010578399590513, "grad_norm": 1.5512809044574427, "learning_rate": 1.0025308307437292e-05, "loss": 0.2182, "step": 9390 }, { "epoch": 0.8011431496331683, "grad_norm": 1.5415891249805114, "learning_rate": 1.0017011355216866e-05, "loss": 0.1721, "step": 9391 }, { "epoch": 0.8012284593072855, "grad_norm": 1.3277621951756124, "learning_rate": 1.000871745548515e-05, "loss": 0.1702, "step": 9392 }, { "epoch": 0.8013137689814025, "grad_norm": 2.4048660063344176, "learning_rate": 1.0000426608875346e-05, "loss": 0.2132, "step": 9393 }, { "epoch": 0.8013990786555195, "grad_norm": 1.420696902320456, "learning_rate": 9.992138816020412e-06, "loss": 0.1739, "step": 9394 }, { "epoch": 0.8014843883296365, "grad_norm": 1.6995126053605543, "learning_rate": 9.983854077553078e-06, "loss": 0.1554, "step": 9395 }, { "epoch": 0.8015696980037537, "grad_norm": 2.095202838665065, "learning_rate": 9.975572394105826e-06, "loss": 0.1807, "step": 9396 }, { "epoch": 0.8016550076778707, "grad_norm": 1.658000956186495, "learning_rate": 9.96729376631096e-06, "loss": 0.1666, "step": 9397 }, { "epoch": 0.8017403173519877, "grad_norm": 1.9148668739917885, "learning_rate": 9.95901819480045e-06, "loss": 0.1601, "step": 9398 }, { "epoch": 0.8018256270261047, "grad_norm": 1.6374345547942155, "learning_rate": 9.950745680206142e-06, "loss": 0.1742, "step": 9399 }, { "epoch": 0.8019109367002218, "grad_norm": 1.7878165758336853, "learning_rate": 9.942476223159569e-06, "loss": 0.1244, "step": 9400 }, { "epoch": 0.8019962463743389, "grad_norm": 2.083789648309272, "learning_rate": 9.93420982429209e-06, "loss": 0.231, "step": 9401 }, { "epoch": 0.8020815560484559, "grad_norm": 1.7218786720749042, "learning_rate": 9.92594648423476e-06, "loss": 0.2257, "step": 9402 }, { "epoch": 0.8021668657225729, "grad_norm": 1.7124778050007654, "learning_rate": 9.917686203618475e-06, "loss": 0.1896, "step": 9403 }, { "epoch": 0.80225217539669, "grad_norm": 2.3512866089651383, "learning_rate": 9.909428983073849e-06, "loss": 0.2241, "step": 9404 }, { "epoch": 0.802337485070807, "grad_norm": 1.6868585851039548, "learning_rate": 9.901174823231279e-06, "loss": 0.1847, "step": 9405 }, { "epoch": 0.8024227947449241, "grad_norm": 1.9192226063963587, "learning_rate": 9.892923724720932e-06, "loss": 0.1168, "step": 9406 }, { "epoch": 0.8025081044190411, "grad_norm": 1.6295235102392243, "learning_rate": 9.884675688172723e-06, "loss": 0.2176, "step": 9407 }, { "epoch": 0.8025934140931582, "grad_norm": 1.6528876381201743, "learning_rate": 9.876430714216356e-06, "loss": 0.1752, "step": 9408 }, { "epoch": 0.8026787237672752, "grad_norm": 1.8974381316380686, "learning_rate": 9.868188803481276e-06, "loss": 0.2139, "step": 9409 }, { "epoch": 0.8027640334413922, "grad_norm": 1.906871506729352, "learning_rate": 9.859949956596743e-06, "loss": 0.172, "step": 9410 }, { "epoch": 0.8028493431155093, "grad_norm": 1.8851200383261872, "learning_rate": 9.851714174191701e-06, "loss": 0.2626, "step": 9411 }, { "epoch": 0.8029346527896264, "grad_norm": 1.697846658897713, "learning_rate": 9.843481456894948e-06, "loss": 0.1811, "step": 9412 }, { "epoch": 0.8030199624637434, "grad_norm": 2.330658628508079, "learning_rate": 9.835251805334994e-06, "loss": 0.1898, "step": 9413 }, { "epoch": 0.8031052721378604, "grad_norm": 1.8980357054958577, "learning_rate": 9.827025220140129e-06, "loss": 0.23, "step": 9414 }, { "epoch": 0.8031905818119774, "grad_norm": 2.1988960964467488, "learning_rate": 9.818801701938413e-06, "loss": 0.1944, "step": 9415 }, { "epoch": 0.8032758914860946, "grad_norm": 2.440155343598254, "learning_rate": 9.810581251357647e-06, "loss": 0.1185, "step": 9416 }, { "epoch": 0.8033612011602116, "grad_norm": 1.684882005244667, "learning_rate": 9.802363869025467e-06, "loss": 0.2364, "step": 9417 }, { "epoch": 0.8034465108343286, "grad_norm": 1.497514572964228, "learning_rate": 9.794149555569165e-06, "loss": 0.1543, "step": 9418 }, { "epoch": 0.8035318205084456, "grad_norm": 1.5693594854234403, "learning_rate": 9.785938311615906e-06, "loss": 0.1501, "step": 9419 }, { "epoch": 0.8036171301825628, "grad_norm": 1.7200210033287646, "learning_rate": 9.777730137792557e-06, "loss": 0.2067, "step": 9420 }, { "epoch": 0.8037024398566798, "grad_norm": 1.7341947719649589, "learning_rate": 9.769525034725774e-06, "loss": 0.1533, "step": 9421 }, { "epoch": 0.8037877495307968, "grad_norm": 1.5830051768032114, "learning_rate": 9.761323003041955e-06, "loss": 0.14, "step": 9422 }, { "epoch": 0.8038730592049138, "grad_norm": 1.675033865059653, "learning_rate": 9.753124043367328e-06, "loss": 0.1513, "step": 9423 }, { "epoch": 0.8039583688790308, "grad_norm": 1.6239368188528691, "learning_rate": 9.744928156327776e-06, "loss": 0.1688, "step": 9424 }, { "epoch": 0.804043678553148, "grad_norm": 2.177824365783587, "learning_rate": 9.736735342549059e-06, "loss": 0.1931, "step": 9425 }, { "epoch": 0.804128988227265, "grad_norm": 1.5659463640189375, "learning_rate": 9.728545602656642e-06, "loss": 0.1644, "step": 9426 }, { "epoch": 0.804214297901382, "grad_norm": 1.7113805745857162, "learning_rate": 9.720358937275764e-06, "loss": 0.1944, "step": 9427 }, { "epoch": 0.804299607575499, "grad_norm": 1.822573064482853, "learning_rate": 9.712175347031433e-06, "loss": 0.2253, "step": 9428 }, { "epoch": 0.8043849172496161, "grad_norm": 1.4729552433442312, "learning_rate": 9.703994832548419e-06, "loss": 0.1296, "step": 9429 }, { "epoch": 0.8044702269237332, "grad_norm": 1.9256945855562924, "learning_rate": 9.695817394451285e-06, "loss": 0.1306, "step": 9430 }, { "epoch": 0.8045555365978502, "grad_norm": 2.362226468352868, "learning_rate": 9.687643033364297e-06, "loss": 0.22, "step": 9431 }, { "epoch": 0.8046408462719672, "grad_norm": 1.7496341595402807, "learning_rate": 9.67947174991155e-06, "loss": 0.2289, "step": 9432 }, { "epoch": 0.8047261559460843, "grad_norm": 1.6167851928967227, "learning_rate": 9.671303544716875e-06, "loss": 0.1629, "step": 9433 }, { "epoch": 0.8048114656202013, "grad_norm": 1.4018752396643301, "learning_rate": 9.663138418403872e-06, "loss": 0.2349, "step": 9434 }, { "epoch": 0.8048967752943184, "grad_norm": 1.7146123152083779, "learning_rate": 9.654976371595898e-06, "loss": 0.1702, "step": 9435 }, { "epoch": 0.8049820849684354, "grad_norm": 1.4991403341534093, "learning_rate": 9.646817404916081e-06, "loss": 0.2442, "step": 9436 }, { "epoch": 0.8050673946425525, "grad_norm": 1.439066075290542, "learning_rate": 9.638661518987324e-06, "loss": 0.159, "step": 9437 }, { "epoch": 0.8051527043166695, "grad_norm": 2.3781502560691776, "learning_rate": 9.630508714432268e-06, "loss": 0.1936, "step": 9438 }, { "epoch": 0.8052380139907865, "grad_norm": 1.2720007719421444, "learning_rate": 9.62235899187337e-06, "loss": 0.1484, "step": 9439 }, { "epoch": 0.8053233236649036, "grad_norm": 1.5809012439293135, "learning_rate": 9.614212351932772e-06, "loss": 0.1486, "step": 9440 }, { "epoch": 0.8054086333390207, "grad_norm": 1.4793670500657334, "learning_rate": 9.606068795232465e-06, "loss": 0.2311, "step": 9441 }, { "epoch": 0.8054939430131377, "grad_norm": 1.0152121071393405, "learning_rate": 9.59792832239415e-06, "loss": 0.1021, "step": 9442 }, { "epoch": 0.8055792526872547, "grad_norm": 1.3166702866001505, "learning_rate": 9.589790934039311e-06, "loss": 0.1242, "step": 9443 }, { "epoch": 0.8056645623613717, "grad_norm": 1.8264480463591606, "learning_rate": 9.581656630789181e-06, "loss": 0.1291, "step": 9444 }, { "epoch": 0.8057498720354889, "grad_norm": 2.2049541049947177, "learning_rate": 9.5735254132648e-06, "loss": 0.2454, "step": 9445 }, { "epoch": 0.8058351817096059, "grad_norm": 2.449142333181633, "learning_rate": 9.56539728208693e-06, "loss": 0.2225, "step": 9446 }, { "epoch": 0.8059204913837229, "grad_norm": 2.3233784671305444, "learning_rate": 9.557272237876102e-06, "loss": 0.2166, "step": 9447 }, { "epoch": 0.8060058010578399, "grad_norm": 1.897515720709411, "learning_rate": 9.549150281252633e-06, "loss": 0.2456, "step": 9448 }, { "epoch": 0.806091110731957, "grad_norm": 1.6609898527548534, "learning_rate": 9.541031412836581e-06, "loss": 0.1761, "step": 9449 }, { "epoch": 0.8061764204060741, "grad_norm": 1.3992721368518601, "learning_rate": 9.53291563324778e-06, "loss": 0.1186, "step": 9450 }, { "epoch": 0.8062617300801911, "grad_norm": 1.3409532229680328, "learning_rate": 9.52480294310582e-06, "loss": 0.1626, "step": 9451 }, { "epoch": 0.8063470397543081, "grad_norm": 1.6141674941885813, "learning_rate": 9.516693343030093e-06, "loss": 0.1981, "step": 9452 }, { "epoch": 0.8064323494284252, "grad_norm": 1.5347259928032235, "learning_rate": 9.508586833639677e-06, "loss": 0.1703, "step": 9453 }, { "epoch": 0.8065176591025423, "grad_norm": 1.6656111553826296, "learning_rate": 9.500483415553497e-06, "loss": 0.17, "step": 9454 }, { "epoch": 0.8066029687766593, "grad_norm": 1.8142719157780907, "learning_rate": 9.492383089390195e-06, "loss": 0.1391, "step": 9455 }, { "epoch": 0.8066882784507763, "grad_norm": 1.3612311833590025, "learning_rate": 9.484285855768182e-06, "loss": 0.165, "step": 9456 }, { "epoch": 0.8067735881248934, "grad_norm": 1.8595930340298987, "learning_rate": 9.476191715305649e-06, "loss": 0.1868, "step": 9457 }, { "epoch": 0.8068588977990104, "grad_norm": 1.8874027126073267, "learning_rate": 9.468100668620532e-06, "loss": 0.1813, "step": 9458 }, { "epoch": 0.8069442074731275, "grad_norm": 1.6540434912391646, "learning_rate": 9.460012716330546e-06, "loss": 0.1807, "step": 9459 }, { "epoch": 0.8070295171472445, "grad_norm": 1.861746229704513, "learning_rate": 9.451927859053145e-06, "loss": 0.1747, "step": 9460 }, { "epoch": 0.8071148268213616, "grad_norm": 2.0689628376340505, "learning_rate": 9.443846097405596e-06, "loss": 0.2412, "step": 9461 }, { "epoch": 0.8072001364954786, "grad_norm": 1.8794264360803525, "learning_rate": 9.435767432004877e-06, "loss": 0.1799, "step": 9462 }, { "epoch": 0.8072854461695956, "grad_norm": 1.5273334473557572, "learning_rate": 9.427691863467758e-06, "loss": 0.1437, "step": 9463 }, { "epoch": 0.8073707558437127, "grad_norm": 1.6727244278163615, "learning_rate": 9.419619392410756e-06, "loss": 0.2313, "step": 9464 }, { "epoch": 0.8074560655178297, "grad_norm": 1.3010218858805886, "learning_rate": 9.411550019450189e-06, "loss": 0.1618, "step": 9465 }, { "epoch": 0.8075413751919468, "grad_norm": 1.8246728630021105, "learning_rate": 9.403483745202068e-06, "loss": 0.1815, "step": 9466 }, { "epoch": 0.8076266848660638, "grad_norm": 1.7751611072335156, "learning_rate": 9.395420570282248e-06, "loss": 0.181, "step": 9467 }, { "epoch": 0.8077119945401808, "grad_norm": 2.3175855854119454, "learning_rate": 9.387360495306292e-06, "loss": 0.1967, "step": 9468 }, { "epoch": 0.8077973042142979, "grad_norm": 1.6295119515630638, "learning_rate": 9.379303520889548e-06, "loss": 0.1905, "step": 9469 }, { "epoch": 0.807882613888415, "grad_norm": 1.2825475960538437, "learning_rate": 9.371249647647124e-06, "loss": 0.1554, "step": 9470 }, { "epoch": 0.807967923562532, "grad_norm": 1.6588317614069812, "learning_rate": 9.363198876193884e-06, "loss": 0.2221, "step": 9471 }, { "epoch": 0.808053233236649, "grad_norm": 1.8009709653404375, "learning_rate": 9.35515120714447e-06, "loss": 0.2088, "step": 9472 }, { "epoch": 0.808138542910766, "grad_norm": 1.7699133184727514, "learning_rate": 9.347106641113263e-06, "loss": 0.1844, "step": 9473 }, { "epoch": 0.8082238525848832, "grad_norm": 1.5497249974589116, "learning_rate": 9.33906517871444e-06, "loss": 0.1605, "step": 9474 }, { "epoch": 0.8083091622590002, "grad_norm": 2.372524577617464, "learning_rate": 9.331026820561928e-06, "loss": 0.1727, "step": 9475 }, { "epoch": 0.8083944719331172, "grad_norm": 1.7979961049140605, "learning_rate": 9.322991567269395e-06, "loss": 0.1391, "step": 9476 }, { "epoch": 0.8084797816072342, "grad_norm": 1.727548171170871, "learning_rate": 9.314959419450303e-06, "loss": 0.1948, "step": 9477 }, { "epoch": 0.8085650912813513, "grad_norm": 2.1898398827672776, "learning_rate": 9.306930377717859e-06, "loss": 0.1301, "step": 9478 }, { "epoch": 0.8086504009554684, "grad_norm": 2.249471178428514, "learning_rate": 9.298904442685042e-06, "loss": 0.2404, "step": 9479 }, { "epoch": 0.8087357106295854, "grad_norm": 2.196277772731099, "learning_rate": 9.290881614964569e-06, "loss": 0.2351, "step": 9480 }, { "epoch": 0.8088210203037024, "grad_norm": 1.5864716180089466, "learning_rate": 9.282861895168981e-06, "loss": 0.1897, "step": 9481 }, { "epoch": 0.8089063299778195, "grad_norm": 1.5687187039722523, "learning_rate": 9.274845283910493e-06, "loss": 0.173, "step": 9482 }, { "epoch": 0.8089916396519365, "grad_norm": 1.6334012079084257, "learning_rate": 9.266831781801167e-06, "loss": 0.2913, "step": 9483 }, { "epoch": 0.8090769493260536, "grad_norm": 1.6637734175480672, "learning_rate": 9.258821389452777e-06, "loss": 0.2285, "step": 9484 }, { "epoch": 0.8091622590001706, "grad_norm": 1.536179063492373, "learning_rate": 9.250814107476875e-06, "loss": 0.1917, "step": 9485 }, { "epoch": 0.8092475686742877, "grad_norm": 1.9559066495520907, "learning_rate": 9.242809936484765e-06, "loss": 0.2574, "step": 9486 }, { "epoch": 0.8093328783484047, "grad_norm": 1.5995379048870746, "learning_rate": 9.234808877087554e-06, "loss": 0.1623, "step": 9487 }, { "epoch": 0.8094181880225217, "grad_norm": 1.2018665816894505, "learning_rate": 9.226810929896034e-06, "loss": 0.1529, "step": 9488 }, { "epoch": 0.8095034976966388, "grad_norm": 1.6790039800900058, "learning_rate": 9.218816095520848e-06, "loss": 0.177, "step": 9489 }, { "epoch": 0.8095888073707559, "grad_norm": 1.5665123349461152, "learning_rate": 9.21082437457234e-06, "loss": 0.2001, "step": 9490 }, { "epoch": 0.8096741170448729, "grad_norm": 1.809208347374001, "learning_rate": 9.20283576766064e-06, "loss": 0.1647, "step": 9491 }, { "epoch": 0.8097594267189899, "grad_norm": 2.424482553864164, "learning_rate": 9.194850275395633e-06, "loss": 0.1955, "step": 9492 }, { "epoch": 0.809844736393107, "grad_norm": 1.8980286711365293, "learning_rate": 9.186867898386952e-06, "loss": 0.1829, "step": 9493 }, { "epoch": 0.8099300460672241, "grad_norm": 1.5055523471555663, "learning_rate": 9.178888637244054e-06, "loss": 0.169, "step": 9494 }, { "epoch": 0.8100153557413411, "grad_norm": 1.369255986162814, "learning_rate": 9.170912492576061e-06, "loss": 0.1822, "step": 9495 }, { "epoch": 0.8101006654154581, "grad_norm": 1.7037995428773187, "learning_rate": 9.162939464991948e-06, "loss": 0.1804, "step": 9496 }, { "epoch": 0.8101859750895751, "grad_norm": 1.872062289081985, "learning_rate": 9.154969555100396e-06, "loss": 0.1565, "step": 9497 }, { "epoch": 0.8102712847636923, "grad_norm": 2.0533938668769993, "learning_rate": 9.147002763509865e-06, "loss": 0.1852, "step": 9498 }, { "epoch": 0.8103565944378093, "grad_norm": 1.7283805147804348, "learning_rate": 9.139039090828588e-06, "loss": 0.2533, "step": 9499 }, { "epoch": 0.8104419041119263, "grad_norm": 1.3754336800265017, "learning_rate": 9.131078537664539e-06, "loss": 0.1473, "step": 9500 }, { "epoch": 0.8105272137860433, "grad_norm": 1.8152526473653992, "learning_rate": 9.123121104625465e-06, "loss": 0.1806, "step": 9501 }, { "epoch": 0.8106125234601603, "grad_norm": 2.5699614775030404, "learning_rate": 9.115166792318858e-06, "loss": 0.2727, "step": 9502 }, { "epoch": 0.8106978331342775, "grad_norm": 1.5173764677844328, "learning_rate": 9.107215601352038e-06, "loss": 0.1589, "step": 9503 }, { "epoch": 0.8107831428083945, "grad_norm": 1.5361335885348484, "learning_rate": 9.099267532331973e-06, "loss": 0.1634, "step": 9504 }, { "epoch": 0.8108684524825115, "grad_norm": 2.410652987392121, "learning_rate": 9.091322585865497e-06, "loss": 0.1834, "step": 9505 }, { "epoch": 0.8109537621566285, "grad_norm": 1.6491779996390348, "learning_rate": 9.083380762559146e-06, "loss": 0.1653, "step": 9506 }, { "epoch": 0.8110390718307456, "grad_norm": 1.594821644440432, "learning_rate": 9.075442063019263e-06, "loss": 0.2156, "step": 9507 }, { "epoch": 0.8111243815048627, "grad_norm": 1.7464488647645968, "learning_rate": 9.067506487851884e-06, "loss": 0.1276, "step": 9508 }, { "epoch": 0.8112096911789797, "grad_norm": 1.5684050556907903, "learning_rate": 9.059574037662882e-06, "loss": 0.191, "step": 9509 }, { "epoch": 0.8112950008530967, "grad_norm": 2.2393236113179475, "learning_rate": 9.051644713057844e-06, "loss": 0.1575, "step": 9510 }, { "epoch": 0.8113803105272138, "grad_norm": 1.8956817594307147, "learning_rate": 9.04371851464213e-06, "loss": 0.1602, "step": 9511 }, { "epoch": 0.8114656202013308, "grad_norm": 1.5632618381986, "learning_rate": 9.035795443020873e-06, "loss": 0.152, "step": 9512 }, { "epoch": 0.8115509298754479, "grad_norm": 2.0695974590673725, "learning_rate": 9.027875498798943e-06, "loss": 0.1974, "step": 9513 }, { "epoch": 0.8116362395495649, "grad_norm": 1.9272779486734317, "learning_rate": 9.019958682580998e-06, "loss": 0.1975, "step": 9514 }, { "epoch": 0.811721549223682, "grad_norm": 2.229938718013612, "learning_rate": 9.012044994971425e-06, "loss": 0.2543, "step": 9515 }, { "epoch": 0.811806858897799, "grad_norm": 1.614717375410822, "learning_rate": 9.004134436574429e-06, "loss": 0.1726, "step": 9516 }, { "epoch": 0.811892168571916, "grad_norm": 1.8403100072889504, "learning_rate": 8.996227007993896e-06, "loss": 0.2188, "step": 9517 }, { "epoch": 0.8119774782460331, "grad_norm": 1.458809838330267, "learning_rate": 8.988322709833553e-06, "loss": 0.1807, "step": 9518 }, { "epoch": 0.8120627879201502, "grad_norm": 1.8271591373784781, "learning_rate": 8.980421542696832e-06, "loss": 0.1875, "step": 9519 }, { "epoch": 0.8121480975942672, "grad_norm": 1.4265502528054512, "learning_rate": 8.972523507186948e-06, "loss": 0.092, "step": 9520 }, { "epoch": 0.8122334072683842, "grad_norm": 2.3082803942764567, "learning_rate": 8.964628603906872e-06, "loss": 0.2279, "step": 9521 }, { "epoch": 0.8123187169425012, "grad_norm": 1.5982686480313337, "learning_rate": 8.956736833459328e-06, "loss": 0.139, "step": 9522 }, { "epoch": 0.8124040266166184, "grad_norm": 1.225066420642306, "learning_rate": 8.948848196446852e-06, "loss": 0.1474, "step": 9523 }, { "epoch": 0.8124893362907354, "grad_norm": 2.223692318970739, "learning_rate": 8.940962693471645e-06, "loss": 0.1919, "step": 9524 }, { "epoch": 0.8125746459648524, "grad_norm": 1.5631749030699957, "learning_rate": 8.933080325135756e-06, "loss": 0.2019, "step": 9525 }, { "epoch": 0.8126599556389694, "grad_norm": 3.169408882454987, "learning_rate": 8.925201092040958e-06, "loss": 0.1926, "step": 9526 }, { "epoch": 0.8127452653130866, "grad_norm": 2.357859547969136, "learning_rate": 8.91732499478879e-06, "loss": 0.1927, "step": 9527 }, { "epoch": 0.8128305749872036, "grad_norm": 1.6423772264926113, "learning_rate": 8.909452033980526e-06, "loss": 0.2174, "step": 9528 }, { "epoch": 0.8129158846613206, "grad_norm": 2.0572326909235357, "learning_rate": 8.901582210217274e-06, "loss": 0.2253, "step": 9529 }, { "epoch": 0.8130011943354376, "grad_norm": 2.0742223132546487, "learning_rate": 8.8937155240998e-06, "loss": 0.1048, "step": 9530 }, { "epoch": 0.8130865040095547, "grad_norm": 2.225097816974566, "learning_rate": 8.885851976228714e-06, "loss": 0.1901, "step": 9531 }, { "epoch": 0.8131718136836718, "grad_norm": 2.2362106359012057, "learning_rate": 8.877991567204352e-06, "loss": 0.1667, "step": 9532 }, { "epoch": 0.8132571233577888, "grad_norm": 1.8421351586332446, "learning_rate": 8.870134297626815e-06, "loss": 0.1858, "step": 9533 }, { "epoch": 0.8133424330319058, "grad_norm": 1.835893216512143, "learning_rate": 8.862280168095955e-06, "loss": 0.1281, "step": 9534 }, { "epoch": 0.8134277427060229, "grad_norm": 1.4614369563711975, "learning_rate": 8.854429179211388e-06, "loss": 0.1935, "step": 9535 }, { "epoch": 0.8135130523801399, "grad_norm": 1.82418489935302, "learning_rate": 8.846581331572528e-06, "loss": 0.2163, "step": 9536 }, { "epoch": 0.813598362054257, "grad_norm": 1.5964362749283876, "learning_rate": 8.838736625778476e-06, "loss": 0.1574, "step": 9537 }, { "epoch": 0.813683671728374, "grad_norm": 1.6897672328123852, "learning_rate": 8.830895062428163e-06, "loss": 0.1509, "step": 9538 }, { "epoch": 0.813768981402491, "grad_norm": 1.448228770404176, "learning_rate": 8.823056642120236e-06, "loss": 0.1879, "step": 9539 }, { "epoch": 0.8138542910766081, "grad_norm": 1.6340002093367518, "learning_rate": 8.81522136545312e-06, "loss": 0.1627, "step": 9540 }, { "epoch": 0.8139396007507251, "grad_norm": 2.047286130660162, "learning_rate": 8.807389233025e-06, "loss": 0.1882, "step": 9541 }, { "epoch": 0.8140249104248422, "grad_norm": 1.5689949847421425, "learning_rate": 8.799560245433814e-06, "loss": 0.1572, "step": 9542 }, { "epoch": 0.8141102200989592, "grad_norm": 2.088621206188004, "learning_rate": 8.791734403277262e-06, "loss": 0.2116, "step": 9543 }, { "epoch": 0.8141955297730763, "grad_norm": 2.3048064637596326, "learning_rate": 8.783911707152797e-06, "loss": 0.1587, "step": 9544 }, { "epoch": 0.8142808394471933, "grad_norm": 1.80134133588499, "learning_rate": 8.776092157657679e-06, "loss": 0.1708, "step": 9545 }, { "epoch": 0.8143661491213103, "grad_norm": 1.8494909101532513, "learning_rate": 8.768275755388833e-06, "loss": 0.2194, "step": 9546 }, { "epoch": 0.8144514587954274, "grad_norm": 2.216290682501567, "learning_rate": 8.76046250094304e-06, "loss": 0.2314, "step": 9547 }, { "epoch": 0.8145367684695445, "grad_norm": 2.3399863987714857, "learning_rate": 8.752652394916788e-06, "loss": 0.2216, "step": 9548 }, { "epoch": 0.8146220781436615, "grad_norm": 1.9449089275112257, "learning_rate": 8.74484543790634e-06, "loss": 0.2268, "step": 9549 }, { "epoch": 0.8147073878177785, "grad_norm": 1.9034763857411572, "learning_rate": 8.737041630507697e-06, "loss": 0.185, "step": 9550 }, { "epoch": 0.8147926974918955, "grad_norm": 2.000860625110951, "learning_rate": 8.729240973316671e-06, "loss": 0.1851, "step": 9551 }, { "epoch": 0.8148780071660127, "grad_norm": 2.0865261661690595, "learning_rate": 8.721443466928786e-06, "loss": 0.1978, "step": 9552 }, { "epoch": 0.8149633168401297, "grad_norm": 1.6327175861132317, "learning_rate": 8.713649111939332e-06, "loss": 0.1743, "step": 9553 }, { "epoch": 0.8150486265142467, "grad_norm": 2.801599359136886, "learning_rate": 8.705857908943376e-06, "loss": 0.1579, "step": 9554 }, { "epoch": 0.8151339361883637, "grad_norm": 1.825966367530904, "learning_rate": 8.698069858535728e-06, "loss": 0.1519, "step": 9555 }, { "epoch": 0.8152192458624808, "grad_norm": 2.5202124805788944, "learning_rate": 8.690284961310973e-06, "loss": 0.1701, "step": 9556 }, { "epoch": 0.8153045555365979, "grad_norm": 1.637622665586468, "learning_rate": 8.68250321786343e-06, "loss": 0.1136, "step": 9557 }, { "epoch": 0.8153898652107149, "grad_norm": 1.6881706176831088, "learning_rate": 8.674724628787228e-06, "loss": 0.167, "step": 9558 }, { "epoch": 0.8154751748848319, "grad_norm": 1.7567860637072075, "learning_rate": 8.666949194676171e-06, "loss": 0.2053, "step": 9559 }, { "epoch": 0.815560484558949, "grad_norm": 2.121267294062992, "learning_rate": 8.659176916123918e-06, "loss": 0.2048, "step": 9560 }, { "epoch": 0.815645794233066, "grad_norm": 1.7425048265431355, "learning_rate": 8.651407793723815e-06, "loss": 0.1378, "step": 9561 }, { "epoch": 0.8157311039071831, "grad_norm": 1.5923205678512107, "learning_rate": 8.643641828069005e-06, "loss": 0.182, "step": 9562 }, { "epoch": 0.8158164135813001, "grad_norm": 1.5482606942774393, "learning_rate": 8.635879019752374e-06, "loss": 0.1586, "step": 9563 }, { "epoch": 0.8159017232554172, "grad_norm": 2.1540868108570965, "learning_rate": 8.62811936936656e-06, "loss": 0.1388, "step": 9564 }, { "epoch": 0.8159870329295342, "grad_norm": 2.0110336174035353, "learning_rate": 8.620362877504006e-06, "loss": 0.1961, "step": 9565 }, { "epoch": 0.8160723426036512, "grad_norm": 2.160241013255596, "learning_rate": 8.612609544756828e-06, "loss": 0.1596, "step": 9566 }, { "epoch": 0.8161576522777683, "grad_norm": 1.6248135826581778, "learning_rate": 8.604859371716994e-06, "loss": 0.1788, "step": 9567 }, { "epoch": 0.8162429619518854, "grad_norm": 1.5956457494274898, "learning_rate": 8.597112358976172e-06, "loss": 0.1407, "step": 9568 }, { "epoch": 0.8163282716260024, "grad_norm": 1.572897837275628, "learning_rate": 8.589368507125805e-06, "loss": 0.1601, "step": 9569 }, { "epoch": 0.8164135813001194, "grad_norm": 1.722374717906727, "learning_rate": 8.581627816757088e-06, "loss": 0.1502, "step": 9570 }, { "epoch": 0.8164988909742364, "grad_norm": 1.9884308944177567, "learning_rate": 8.573890288461011e-06, "loss": 0.1559, "step": 9571 }, { "epoch": 0.8165842006483536, "grad_norm": 2.3773282334206494, "learning_rate": 8.56615592282825e-06, "loss": 0.2472, "step": 9572 }, { "epoch": 0.8166695103224706, "grad_norm": 1.8678133885172632, "learning_rate": 8.558424720449321e-06, "loss": 0.2052, "step": 9573 }, { "epoch": 0.8167548199965876, "grad_norm": 1.5452133656053333, "learning_rate": 8.550696681914438e-06, "loss": 0.1618, "step": 9574 }, { "epoch": 0.8168401296707046, "grad_norm": 2.1371103656636117, "learning_rate": 8.542971807813604e-06, "loss": 0.2495, "step": 9575 }, { "epoch": 0.8169254393448218, "grad_norm": 2.3857419876304666, "learning_rate": 8.535250098736575e-06, "loss": 0.16, "step": 9576 }, { "epoch": 0.8170107490189388, "grad_norm": 1.7992180847882306, "learning_rate": 8.527531555272849e-06, "loss": 0.1633, "step": 9577 }, { "epoch": 0.8170960586930558, "grad_norm": 1.8604722832386456, "learning_rate": 8.519816178011714e-06, "loss": 0.2043, "step": 9578 }, { "epoch": 0.8171813683671728, "grad_norm": 1.8296286113328342, "learning_rate": 8.512103967542167e-06, "loss": 0.205, "step": 9579 }, { "epoch": 0.8172666780412898, "grad_norm": 1.9184102406843277, "learning_rate": 8.504394924453029e-06, "loss": 0.2257, "step": 9580 }, { "epoch": 0.817351987715407, "grad_norm": 1.3152529591402178, "learning_rate": 8.496689049332835e-06, "loss": 0.1263, "step": 9581 }, { "epoch": 0.817437297389524, "grad_norm": 1.6557947648805276, "learning_rate": 8.488986342769883e-06, "loss": 0.149, "step": 9582 }, { "epoch": 0.817522607063641, "grad_norm": 1.585085484940125, "learning_rate": 8.481286805352234e-06, "loss": 0.1805, "step": 9583 }, { "epoch": 0.817607916737758, "grad_norm": 2.4168207404470605, "learning_rate": 8.473590437667706e-06, "loss": 0.181, "step": 9584 }, { "epoch": 0.8176932264118751, "grad_norm": 1.3342099089865107, "learning_rate": 8.465897240303877e-06, "loss": 0.1654, "step": 9585 }, { "epoch": 0.8177785360859922, "grad_norm": 1.8099030931359195, "learning_rate": 8.458207213848074e-06, "loss": 0.1794, "step": 9586 }, { "epoch": 0.8178638457601092, "grad_norm": 1.826555474939208, "learning_rate": 8.450520358887415e-06, "loss": 0.1479, "step": 9587 }, { "epoch": 0.8179491554342262, "grad_norm": 2.029449838962458, "learning_rate": 8.442836676008715e-06, "loss": 0.2055, "step": 9588 }, { "epoch": 0.8180344651083433, "grad_norm": 1.9069622772758577, "learning_rate": 8.435156165798608e-06, "loss": 0.2178, "step": 9589 }, { "epoch": 0.8181197747824603, "grad_norm": 1.7829283453492062, "learning_rate": 8.427478828843455e-06, "loss": 0.1722, "step": 9590 }, { "epoch": 0.8182050844565774, "grad_norm": 1.471147325768844, "learning_rate": 8.419804665729375e-06, "loss": 0.208, "step": 9591 }, { "epoch": 0.8182903941306944, "grad_norm": 2.8385041867269467, "learning_rate": 8.41213367704224e-06, "loss": 0.2002, "step": 9592 }, { "epoch": 0.8183757038048115, "grad_norm": 2.3325055788551987, "learning_rate": 8.404465863367727e-06, "loss": 0.1975, "step": 9593 }, { "epoch": 0.8184610134789285, "grad_norm": 1.6529199483864685, "learning_rate": 8.396801225291179e-06, "loss": 0.1888, "step": 9594 }, { "epoch": 0.8185463231530455, "grad_norm": 2.0688689183287585, "learning_rate": 8.389139763397796e-06, "loss": 0.1926, "step": 9595 }, { "epoch": 0.8186316328271626, "grad_norm": 1.4460175315353436, "learning_rate": 8.381481478272469e-06, "loss": 0.1732, "step": 9596 }, { "epoch": 0.8187169425012797, "grad_norm": 2.8761844682916444, "learning_rate": 8.37382637049987e-06, "loss": 0.2097, "step": 9597 }, { "epoch": 0.8188022521753967, "grad_norm": 2.38526904941777, "learning_rate": 8.366174440664425e-06, "loss": 0.2401, "step": 9598 }, { "epoch": 0.8188875618495137, "grad_norm": 1.5741755855880482, "learning_rate": 8.35852568935031e-06, "loss": 0.1445, "step": 9599 }, { "epoch": 0.8189728715236307, "grad_norm": 1.6635626816860365, "learning_rate": 8.350880117141503e-06, "loss": 0.1884, "step": 9600 }, { "epoch": 0.8190581811977479, "grad_norm": 2.1268266792648647, "learning_rate": 8.34323772462165e-06, "loss": 0.1975, "step": 9601 }, { "epoch": 0.8191434908718649, "grad_norm": 1.6821287645137821, "learning_rate": 8.335598512374243e-06, "loss": 0.146, "step": 9602 }, { "epoch": 0.8192288005459819, "grad_norm": 1.6186270825470381, "learning_rate": 8.327962480982482e-06, "loss": 0.227, "step": 9603 }, { "epoch": 0.8193141102200989, "grad_norm": 1.8359415257186582, "learning_rate": 8.320329631029344e-06, "loss": 0.1707, "step": 9604 }, { "epoch": 0.819399419894216, "grad_norm": 1.6333694603226776, "learning_rate": 8.312699963097554e-06, "loss": 0.2028, "step": 9605 }, { "epoch": 0.8194847295683331, "grad_norm": 1.749458160985968, "learning_rate": 8.305073477769599e-06, "loss": 0.2397, "step": 9606 }, { "epoch": 0.8195700392424501, "grad_norm": 2.1772265470816063, "learning_rate": 8.297450175627714e-06, "loss": 0.2226, "step": 9607 }, { "epoch": 0.8196553489165671, "grad_norm": 2.0784434597977812, "learning_rate": 8.289830057253883e-06, "loss": 0.1791, "step": 9608 }, { "epoch": 0.8197406585906842, "grad_norm": 1.4920719761093528, "learning_rate": 8.282213123229898e-06, "loss": 0.1447, "step": 9609 }, { "epoch": 0.8198259682648013, "grad_norm": 1.8919534351030272, "learning_rate": 8.274599374137254e-06, "loss": 0.1725, "step": 9610 }, { "epoch": 0.8199112779389183, "grad_norm": 2.0200584210549697, "learning_rate": 8.26698881055722e-06, "loss": 0.1563, "step": 9611 }, { "epoch": 0.8199965876130353, "grad_norm": 2.048808924145908, "learning_rate": 8.259381433070801e-06, "loss": 0.2407, "step": 9612 }, { "epoch": 0.8200818972871524, "grad_norm": 2.327014222879062, "learning_rate": 8.251777242258834e-06, "loss": 0.1171, "step": 9613 }, { "epoch": 0.8201672069612694, "grad_norm": 1.865564191328828, "learning_rate": 8.244176238701795e-06, "loss": 0.2048, "step": 9614 }, { "epoch": 0.8202525166353865, "grad_norm": 1.2513627729937888, "learning_rate": 8.236578422980024e-06, "loss": 0.1097, "step": 9615 }, { "epoch": 0.8203378263095035, "grad_norm": 1.619301708569612, "learning_rate": 8.228983795673562e-06, "loss": 0.1779, "step": 9616 }, { "epoch": 0.8204231359836205, "grad_norm": 1.7803054446776476, "learning_rate": 8.221392357362211e-06, "loss": 0.1331, "step": 9617 }, { "epoch": 0.8205084456577376, "grad_norm": 1.7311154915788622, "learning_rate": 8.213804108625545e-06, "loss": 0.1311, "step": 9618 }, { "epoch": 0.8205937553318546, "grad_norm": 1.3376557749651696, "learning_rate": 8.206219050042884e-06, "loss": 0.1469, "step": 9619 }, { "epoch": 0.8206790650059717, "grad_norm": 1.3472901662536323, "learning_rate": 8.198637182193303e-06, "loss": 0.1639, "step": 9620 }, { "epoch": 0.8207643746800887, "grad_norm": 1.6558559596770481, "learning_rate": 8.191058505655636e-06, "loss": 0.1319, "step": 9621 }, { "epoch": 0.8208496843542058, "grad_norm": 2.0518196310891827, "learning_rate": 8.183483021008498e-06, "loss": 0.2335, "step": 9622 }, { "epoch": 0.8209349940283228, "grad_norm": 1.489491048590688, "learning_rate": 8.175910728830194e-06, "loss": 0.2237, "step": 9623 }, { "epoch": 0.8210203037024398, "grad_norm": 1.8383261218351759, "learning_rate": 8.168341629698867e-06, "loss": 0.1957, "step": 9624 }, { "epoch": 0.8211056133765569, "grad_norm": 1.6564781463682978, "learning_rate": 8.160775724192365e-06, "loss": 0.1996, "step": 9625 }, { "epoch": 0.821190923050674, "grad_norm": 2.5022196452807175, "learning_rate": 8.153213012888305e-06, "loss": 0.1884, "step": 9626 }, { "epoch": 0.821276232724791, "grad_norm": 3.0713665344461925, "learning_rate": 8.145653496364054e-06, "loss": 0.1644, "step": 9627 }, { "epoch": 0.821361542398908, "grad_norm": 1.2797411431141892, "learning_rate": 8.13809717519673e-06, "loss": 0.1498, "step": 9628 }, { "epoch": 0.821446852073025, "grad_norm": 1.5511309712227639, "learning_rate": 8.130544049963263e-06, "loss": 0.1178, "step": 9629 }, { "epoch": 0.8215321617471422, "grad_norm": 1.6586036512930145, "learning_rate": 8.122994121240236e-06, "loss": 0.1757, "step": 9630 }, { "epoch": 0.8216174714212592, "grad_norm": 1.4192136233870407, "learning_rate": 8.115447389604085e-06, "loss": 0.1427, "step": 9631 }, { "epoch": 0.8217027810953762, "grad_norm": 1.5434354824648466, "learning_rate": 8.107903855630956e-06, "loss": 0.1352, "step": 9632 }, { "epoch": 0.8217880907694932, "grad_norm": 1.746521899409563, "learning_rate": 8.100363519896748e-06, "loss": 0.1373, "step": 9633 }, { "epoch": 0.8218734004436103, "grad_norm": 1.3676565183231557, "learning_rate": 8.09282638297712e-06, "loss": 0.1956, "step": 9634 }, { "epoch": 0.8219587101177274, "grad_norm": 1.3690828537010302, "learning_rate": 8.085292445447529e-06, "loss": 0.1797, "step": 9635 }, { "epoch": 0.8220440197918444, "grad_norm": 1.9317974956445272, "learning_rate": 8.077761707883102e-06, "loss": 0.1884, "step": 9636 }, { "epoch": 0.8221293294659614, "grad_norm": 1.5190124410555508, "learning_rate": 8.070234170858803e-06, "loss": 0.2198, "step": 9637 }, { "epoch": 0.8222146391400785, "grad_norm": 1.5462428641250985, "learning_rate": 8.062709834949312e-06, "loss": 0.2119, "step": 9638 }, { "epoch": 0.8222999488141955, "grad_norm": 1.5214501353807404, "learning_rate": 8.055188700729072e-06, "loss": 0.1448, "step": 9639 }, { "epoch": 0.8223852584883126, "grad_norm": 1.5312717202650774, "learning_rate": 8.04767076877228e-06, "loss": 0.2187, "step": 9640 }, { "epoch": 0.8224705681624296, "grad_norm": 1.3840958410688542, "learning_rate": 8.040156039652874e-06, "loss": 0.1891, "step": 9641 }, { "epoch": 0.8225558778365467, "grad_norm": 1.6118229886405657, "learning_rate": 8.032644513944609e-06, "loss": 0.1585, "step": 9642 }, { "epoch": 0.8226411875106637, "grad_norm": 1.5301737214240796, "learning_rate": 8.025136192220894e-06, "loss": 0.2561, "step": 9643 }, { "epoch": 0.8227264971847807, "grad_norm": 1.746438694277375, "learning_rate": 8.017631075054994e-06, "loss": 0.1854, "step": 9644 }, { "epoch": 0.8228118068588978, "grad_norm": 1.5454650772725245, "learning_rate": 8.010129163019864e-06, "loss": 0.1251, "step": 9645 }, { "epoch": 0.8228971165330149, "grad_norm": 1.2296433359671228, "learning_rate": 8.002630456688238e-06, "loss": 0.1589, "step": 9646 }, { "epoch": 0.8229824262071319, "grad_norm": 1.9834786140167893, "learning_rate": 7.995134956632599e-06, "loss": 0.1845, "step": 9647 }, { "epoch": 0.8230677358812489, "grad_norm": 1.663072891766581, "learning_rate": 7.9876426634252e-06, "loss": 0.1746, "step": 9648 }, { "epoch": 0.823153045555366, "grad_norm": 1.7623104615800205, "learning_rate": 7.980153577638022e-06, "loss": 0.2428, "step": 9649 }, { "epoch": 0.8232383552294831, "grad_norm": 1.3974964235973146, "learning_rate": 7.972667699842818e-06, "loss": 0.1574, "step": 9650 }, { "epoch": 0.8233236649036001, "grad_norm": 2.0009964606364843, "learning_rate": 7.965185030611127e-06, "loss": 0.2131, "step": 9651 }, { "epoch": 0.8234089745777171, "grad_norm": 1.4617862785721454, "learning_rate": 7.957705570514163e-06, "loss": 0.1372, "step": 9652 }, { "epoch": 0.8234942842518341, "grad_norm": 1.8799651209659674, "learning_rate": 7.95022932012297e-06, "loss": 0.2173, "step": 9653 }, { "epoch": 0.8235795939259511, "grad_norm": 1.454356830189986, "learning_rate": 7.942756280008324e-06, "loss": 0.1887, "step": 9654 }, { "epoch": 0.8236649036000683, "grad_norm": 1.5341614273509514, "learning_rate": 7.935286450740743e-06, "loss": 0.1259, "step": 9655 }, { "epoch": 0.8237502132741853, "grad_norm": 1.600781208297111, "learning_rate": 7.927819832890498e-06, "loss": 0.1533, "step": 9656 }, { "epoch": 0.8238355229483023, "grad_norm": 1.95002066527934, "learning_rate": 7.920356427027648e-06, "loss": 0.1801, "step": 9657 }, { "epoch": 0.8239208326224193, "grad_norm": 1.529806897312993, "learning_rate": 7.912896233721973e-06, "loss": 0.2243, "step": 9658 }, { "epoch": 0.8240061422965365, "grad_norm": 1.9274990629992947, "learning_rate": 7.905439253543023e-06, "loss": 0.1626, "step": 9659 }, { "epoch": 0.8240914519706535, "grad_norm": 1.7428130664656938, "learning_rate": 7.897985487060094e-06, "loss": 0.1914, "step": 9660 }, { "epoch": 0.8241767616447705, "grad_norm": 1.6875719832149692, "learning_rate": 7.890534934842242e-06, "loss": 0.1858, "step": 9661 }, { "epoch": 0.8242620713188875, "grad_norm": 1.7967070127827505, "learning_rate": 7.883087597458278e-06, "loss": 0.1754, "step": 9662 }, { "epoch": 0.8243473809930046, "grad_norm": 2.463866413371856, "learning_rate": 7.875643475476757e-06, "loss": 0.1504, "step": 9663 }, { "epoch": 0.8244326906671217, "grad_norm": 1.3222775254495867, "learning_rate": 7.868202569466031e-06, "loss": 0.2099, "step": 9664 }, { "epoch": 0.8245180003412387, "grad_norm": 2.3522826451296464, "learning_rate": 7.860764879994126e-06, "loss": 0.2405, "step": 9665 }, { "epoch": 0.8246033100153557, "grad_norm": 2.3183554671012767, "learning_rate": 7.853330407628912e-06, "loss": 0.2313, "step": 9666 }, { "epoch": 0.8246886196894728, "grad_norm": 2.19394584281519, "learning_rate": 7.845899152937946e-06, "loss": 0.1576, "step": 9667 }, { "epoch": 0.8247739293635898, "grad_norm": 1.8275917775373869, "learning_rate": 7.838471116488577e-06, "loss": 0.2026, "step": 9668 }, { "epoch": 0.8248592390377069, "grad_norm": 1.5294864386025593, "learning_rate": 7.831046298847894e-06, "loss": 0.1636, "step": 9669 }, { "epoch": 0.8249445487118239, "grad_norm": 1.5434438239657564, "learning_rate": 7.823624700582728e-06, "loss": 0.1647, "step": 9670 }, { "epoch": 0.825029858385941, "grad_norm": 1.6609210238711627, "learning_rate": 7.816206322259712e-06, "loss": 0.1464, "step": 9671 }, { "epoch": 0.825115168060058, "grad_norm": 1.788487104480848, "learning_rate": 7.808791164445156e-06, "loss": 0.2269, "step": 9672 }, { "epoch": 0.825200477734175, "grad_norm": 2.3144976261544556, "learning_rate": 7.801379227705203e-06, "loss": 0.2133, "step": 9673 }, { "epoch": 0.8252857874082921, "grad_norm": 1.5679561719024024, "learning_rate": 7.793970512605703e-06, "loss": 0.1809, "step": 9674 }, { "epoch": 0.8253710970824092, "grad_norm": 1.4580228565868196, "learning_rate": 7.786565019712271e-06, "loss": 0.2076, "step": 9675 }, { "epoch": 0.8254564067565262, "grad_norm": 1.6957642968424465, "learning_rate": 7.779162749590268e-06, "loss": 0.2063, "step": 9676 }, { "epoch": 0.8255417164306432, "grad_norm": 1.808689848157332, "learning_rate": 7.771763702804852e-06, "loss": 0.1486, "step": 9677 }, { "epoch": 0.8256270261047602, "grad_norm": 2.1927040323561093, "learning_rate": 7.764367879920852e-06, "loss": 0.1555, "step": 9678 }, { "epoch": 0.8257123357788774, "grad_norm": 1.6563124734864072, "learning_rate": 7.756975281502932e-06, "loss": 0.114, "step": 9679 }, { "epoch": 0.8257976454529944, "grad_norm": 1.9157873361825464, "learning_rate": 7.749585908115475e-06, "loss": 0.209, "step": 9680 }, { "epoch": 0.8258829551271114, "grad_norm": 1.8999966470789187, "learning_rate": 7.742199760322616e-06, "loss": 0.2039, "step": 9681 }, { "epoch": 0.8259682648012284, "grad_norm": 1.6892532142155807, "learning_rate": 7.734816838688248e-06, "loss": 0.1773, "step": 9682 }, { "epoch": 0.8260535744753456, "grad_norm": 1.5208111939930802, "learning_rate": 7.727437143776012e-06, "loss": 0.2208, "step": 9683 }, { "epoch": 0.8261388841494626, "grad_norm": 1.6644457165388253, "learning_rate": 7.720060676149315e-06, "loss": 0.1195, "step": 9684 }, { "epoch": 0.8262241938235796, "grad_norm": 1.6918907969159727, "learning_rate": 7.712687436371302e-06, "loss": 0.1475, "step": 9685 }, { "epoch": 0.8263095034976966, "grad_norm": 2.0810684354015625, "learning_rate": 7.705317425004899e-06, "loss": 0.1657, "step": 9686 }, { "epoch": 0.8263948131718137, "grad_norm": 1.8812560933670266, "learning_rate": 7.697950642612756e-06, "loss": 0.2208, "step": 9687 }, { "epoch": 0.8264801228459308, "grad_norm": 2.0331110718159437, "learning_rate": 7.69058708975729e-06, "loss": 0.2372, "step": 9688 }, { "epoch": 0.8265654325200478, "grad_norm": 1.5858351199924643, "learning_rate": 7.683226767000667e-06, "loss": 0.2685, "step": 9689 }, { "epoch": 0.8266507421941648, "grad_norm": 1.6968330560772715, "learning_rate": 7.675869674904806e-06, "loss": 0.248, "step": 9690 }, { "epoch": 0.8267360518682819, "grad_norm": 2.0482112803254795, "learning_rate": 7.66851581403139e-06, "loss": 0.1906, "step": 9691 }, { "epoch": 0.8268213615423989, "grad_norm": 1.4230721432968414, "learning_rate": 7.661165184941832e-06, "loss": 0.2132, "step": 9692 }, { "epoch": 0.826906671216516, "grad_norm": 1.9820392709223174, "learning_rate": 7.653817788197343e-06, "loss": 0.1924, "step": 9693 }, { "epoch": 0.826991980890633, "grad_norm": 2.3359802650441073, "learning_rate": 7.646473624358819e-06, "loss": 0.1942, "step": 9694 }, { "epoch": 0.82707729056475, "grad_norm": 2.141430000359958, "learning_rate": 7.639132693986972e-06, "loss": 0.157, "step": 9695 }, { "epoch": 0.8271626002388671, "grad_norm": 1.4063947391197877, "learning_rate": 7.631794997642245e-06, "loss": 0.1888, "step": 9696 }, { "epoch": 0.8272479099129841, "grad_norm": 1.5935663603654244, "learning_rate": 7.62446053588482e-06, "loss": 0.1682, "step": 9697 }, { "epoch": 0.8273332195871012, "grad_norm": 1.6569469353116286, "learning_rate": 7.617129309274645e-06, "loss": 0.1772, "step": 9698 }, { "epoch": 0.8274185292612182, "grad_norm": 2.147226887256884, "learning_rate": 7.6098013183714454e-06, "loss": 0.167, "step": 9699 }, { "epoch": 0.8275038389353353, "grad_norm": 1.6052036484194676, "learning_rate": 7.6024765637346286e-06, "loss": 0.2141, "step": 9700 }, { "epoch": 0.8275891486094523, "grad_norm": 1.656335216705138, "learning_rate": 7.595155045923435e-06, "loss": 0.1707, "step": 9701 }, { "epoch": 0.8276744582835693, "grad_norm": 2.235456725013745, "learning_rate": 7.587836765496819e-06, "loss": 0.2022, "step": 9702 }, { "epoch": 0.8277597679576864, "grad_norm": 1.638185849593268, "learning_rate": 7.58052172301349e-06, "loss": 0.1748, "step": 9703 }, { "epoch": 0.8278450776318035, "grad_norm": 1.8939521455046509, "learning_rate": 7.573209919031909e-06, "loss": 0.1472, "step": 9704 }, { "epoch": 0.8279303873059205, "grad_norm": 1.4853483522319024, "learning_rate": 7.565901354110283e-06, "loss": 0.1512, "step": 9705 }, { "epoch": 0.8280156969800375, "grad_norm": 1.5586735984598072, "learning_rate": 7.558596028806614e-06, "loss": 0.1175, "step": 9706 }, { "epoch": 0.8281010066541545, "grad_norm": 1.8573485158398793, "learning_rate": 7.551293943678583e-06, "loss": 0.2136, "step": 9707 }, { "epoch": 0.8281863163282717, "grad_norm": 1.986996338563031, "learning_rate": 7.543995099283702e-06, "loss": 0.2809, "step": 9708 }, { "epoch": 0.8282716260023887, "grad_norm": 1.4006368295628748, "learning_rate": 7.536699496179184e-06, "loss": 0.1511, "step": 9709 }, { "epoch": 0.8283569356765057, "grad_norm": 1.5785891917190356, "learning_rate": 7.5294071349220065e-06, "loss": 0.1526, "step": 9710 }, { "epoch": 0.8284422453506227, "grad_norm": 1.5606212765332628, "learning_rate": 7.5221180160689075e-06, "loss": 0.1784, "step": 9711 }, { "epoch": 0.8285275550247398, "grad_norm": 1.4522613091589116, "learning_rate": 7.51483214017637e-06, "loss": 0.21, "step": 9712 }, { "epoch": 0.8286128646988569, "grad_norm": 1.999708631272404, "learning_rate": 7.507549507800632e-06, "loss": 0.1475, "step": 9713 }, { "epoch": 0.8286981743729739, "grad_norm": 1.4607873523277448, "learning_rate": 7.500270119497671e-06, "loss": 0.1853, "step": 9714 }, { "epoch": 0.8287834840470909, "grad_norm": 1.5221287509934336, "learning_rate": 7.492993975823259e-06, "loss": 0.1533, "step": 9715 }, { "epoch": 0.828868793721208, "grad_norm": 1.7830874686531561, "learning_rate": 7.485721077332875e-06, "loss": 0.168, "step": 9716 }, { "epoch": 0.828954103395325, "grad_norm": 1.6923899972348213, "learning_rate": 7.478451424581761e-06, "loss": 0.1762, "step": 9717 }, { "epoch": 0.8290394130694421, "grad_norm": 2.352111144080238, "learning_rate": 7.4711850181249156e-06, "loss": 0.2047, "step": 9718 }, { "epoch": 0.8291247227435591, "grad_norm": 2.688606744033061, "learning_rate": 7.46392185851712e-06, "loss": 0.2855, "step": 9719 }, { "epoch": 0.8292100324176762, "grad_norm": 2.1401924754422668, "learning_rate": 7.456661946312826e-06, "loss": 0.1646, "step": 9720 }, { "epoch": 0.8292953420917932, "grad_norm": 2.084251837115664, "learning_rate": 7.44940528206633e-06, "loss": 0.2784, "step": 9721 }, { "epoch": 0.8293806517659102, "grad_norm": 1.4440992056881483, "learning_rate": 7.442151866331631e-06, "loss": 0.1987, "step": 9722 }, { "epoch": 0.8294659614400273, "grad_norm": 1.8175513810945487, "learning_rate": 7.434901699662477e-06, "loss": 0.182, "step": 9723 }, { "epoch": 0.8295512711141444, "grad_norm": 2.2867339597351943, "learning_rate": 7.427654782612398e-06, "loss": 0.1772, "step": 9724 }, { "epoch": 0.8296365807882614, "grad_norm": 2.3928041187910307, "learning_rate": 7.4204111157346375e-06, "loss": 0.1755, "step": 9725 }, { "epoch": 0.8297218904623784, "grad_norm": 1.8490324480129956, "learning_rate": 7.413170699582228e-06, "loss": 0.1916, "step": 9726 }, { "epoch": 0.8298072001364954, "grad_norm": 2.050082423716364, "learning_rate": 7.405933534707915e-06, "loss": 0.2357, "step": 9727 }, { "epoch": 0.8298925098106126, "grad_norm": 1.3691431499898703, "learning_rate": 7.398699621664251e-06, "loss": 0.1688, "step": 9728 }, { "epoch": 0.8299778194847296, "grad_norm": 1.4327010615784943, "learning_rate": 7.391468961003473e-06, "loss": 0.1852, "step": 9729 }, { "epoch": 0.8300631291588466, "grad_norm": 1.394773959027742, "learning_rate": 7.384241553277621e-06, "loss": 0.1693, "step": 9730 }, { "epoch": 0.8301484388329636, "grad_norm": 2.1633277499573085, "learning_rate": 7.377017399038471e-06, "loss": 0.2021, "step": 9731 }, { "epoch": 0.8302337485070806, "grad_norm": 1.8006376594011053, "learning_rate": 7.3697964988375444e-06, "loss": 0.239, "step": 9732 }, { "epoch": 0.8303190581811978, "grad_norm": 1.6599603514934547, "learning_rate": 7.362578853226121e-06, "loss": 0.1305, "step": 9733 }, { "epoch": 0.8304043678553148, "grad_norm": 1.6166722310681623, "learning_rate": 7.3553644627552095e-06, "loss": 0.1392, "step": 9734 }, { "epoch": 0.8304896775294318, "grad_norm": 1.5919821414904323, "learning_rate": 7.348153327975638e-06, "loss": 0.1976, "step": 9735 }, { "epoch": 0.8305749872035488, "grad_norm": 2.079792930859682, "learning_rate": 7.340945449437881e-06, "loss": 0.2415, "step": 9736 }, { "epoch": 0.830660296877666, "grad_norm": 1.8929317053532575, "learning_rate": 7.3337408276922594e-06, "loss": 0.2662, "step": 9737 }, { "epoch": 0.830745606551783, "grad_norm": 2.0516032578776957, "learning_rate": 7.326539463288801e-06, "loss": 0.1806, "step": 9738 }, { "epoch": 0.8308309162259, "grad_norm": 1.6386337548642669, "learning_rate": 7.3193413567772815e-06, "loss": 0.1782, "step": 9739 }, { "epoch": 0.830916225900017, "grad_norm": 1.7837023668137786, "learning_rate": 7.312146508707241e-06, "loss": 0.1976, "step": 9740 }, { "epoch": 0.8310015355741341, "grad_norm": 1.8072099767979473, "learning_rate": 7.304954919627993e-06, "loss": 0.2335, "step": 9741 }, { "epoch": 0.8310868452482512, "grad_norm": 1.4652196783245548, "learning_rate": 7.2977665900885285e-06, "loss": 0.1776, "step": 9742 }, { "epoch": 0.8311721549223682, "grad_norm": 1.4722237596963657, "learning_rate": 7.2905815206376794e-06, "loss": 0.2107, "step": 9743 }, { "epoch": 0.8312574645964852, "grad_norm": 1.5854928281171328, "learning_rate": 7.283399711823974e-06, "loss": 0.1418, "step": 9744 }, { "epoch": 0.8313427742706023, "grad_norm": 1.5429872922145362, "learning_rate": 7.276221164195701e-06, "loss": 0.1965, "step": 9745 }, { "epoch": 0.8314280839447193, "grad_norm": 1.8877815112440703, "learning_rate": 7.269045878300912e-06, "loss": 0.2241, "step": 9746 }, { "epoch": 0.8315133936188364, "grad_norm": 1.9027263405461272, "learning_rate": 7.26187385468739e-06, "loss": 0.1864, "step": 9747 }, { "epoch": 0.8315987032929534, "grad_norm": 1.6529169193258333, "learning_rate": 7.254705093902708e-06, "loss": 0.1892, "step": 9748 }, { "epoch": 0.8316840129670705, "grad_norm": 1.6253055872052298, "learning_rate": 7.247539596494118e-06, "loss": 0.1932, "step": 9749 }, { "epoch": 0.8317693226411875, "grad_norm": 2.0444169755601465, "learning_rate": 7.24037736300871e-06, "loss": 0.2215, "step": 9750 }, { "epoch": 0.8318546323153045, "grad_norm": 1.5288865670490255, "learning_rate": 7.233218393993263e-06, "loss": 0.1732, "step": 9751 }, { "epoch": 0.8319399419894216, "grad_norm": 1.7423032713600952, "learning_rate": 7.226062689994328e-06, "loss": 0.1962, "step": 9752 }, { "epoch": 0.8320252516635387, "grad_norm": 2.3451804310046778, "learning_rate": 7.218910251558209e-06, "loss": 0.1876, "step": 9753 }, { "epoch": 0.8321105613376557, "grad_norm": 2.3971340352803314, "learning_rate": 7.2117610792309555e-06, "loss": 0.1838, "step": 9754 }, { "epoch": 0.8321958710117727, "grad_norm": 2.0319737245482874, "learning_rate": 7.204615173558365e-06, "loss": 0.172, "step": 9755 }, { "epoch": 0.8322811806858897, "grad_norm": 1.5193479765760645, "learning_rate": 7.197472535085981e-06, "loss": 0.1994, "step": 9756 }, { "epoch": 0.8323664903600069, "grad_norm": 2.128661087338648, "learning_rate": 7.190333164359137e-06, "loss": 0.1552, "step": 9757 }, { "epoch": 0.8324518000341239, "grad_norm": 2.596114677829289, "learning_rate": 7.183197061922842e-06, "loss": 0.1702, "step": 9758 }, { "epoch": 0.8325371097082409, "grad_norm": 1.9965290110118945, "learning_rate": 7.176064228321938e-06, "loss": 0.2276, "step": 9759 }, { "epoch": 0.8326224193823579, "grad_norm": 1.550960732170149, "learning_rate": 7.1689346641009516e-06, "loss": 0.131, "step": 9760 }, { "epoch": 0.832707729056475, "grad_norm": 1.884303851706035, "learning_rate": 7.161808369804224e-06, "loss": 0.1968, "step": 9761 }, { "epoch": 0.8327930387305921, "grad_norm": 1.895548502167437, "learning_rate": 7.154685345975759e-06, "loss": 0.1874, "step": 9762 }, { "epoch": 0.8328783484047091, "grad_norm": 1.5208695115615638, "learning_rate": 7.1475655931594e-06, "loss": 0.1942, "step": 9763 }, { "epoch": 0.8329636580788261, "grad_norm": 1.5940179786529765, "learning_rate": 7.1404491118986895e-06, "loss": 0.2272, "step": 9764 }, { "epoch": 0.8330489677529432, "grad_norm": 1.4355505288006223, "learning_rate": 7.133335902736937e-06, "loss": 0.1562, "step": 9765 }, { "epoch": 0.8331342774270603, "grad_norm": 1.4183153301639937, "learning_rate": 7.126225966217193e-06, "loss": 0.1973, "step": 9766 }, { "epoch": 0.8332195871011773, "grad_norm": 2.1256268642544107, "learning_rate": 7.119119302882271e-06, "loss": 0.2305, "step": 9767 }, { "epoch": 0.8333048967752943, "grad_norm": 1.935046703947985, "learning_rate": 7.112015913274717e-06, "loss": 0.1607, "step": 9768 }, { "epoch": 0.8333902064494113, "grad_norm": 1.8732927815312037, "learning_rate": 7.1049157979368286e-06, "loss": 0.1475, "step": 9769 }, { "epoch": 0.8334755161235284, "grad_norm": 1.9022986310742878, "learning_rate": 7.097818957410696e-06, "loss": 0.1711, "step": 9770 }, { "epoch": 0.8335608257976455, "grad_norm": 1.8555941569462242, "learning_rate": 7.090725392238084e-06, "loss": 0.1917, "step": 9771 }, { "epoch": 0.8336461354717625, "grad_norm": 1.679317557825789, "learning_rate": 7.083635102960584e-06, "loss": 0.1492, "step": 9772 }, { "epoch": 0.8337314451458795, "grad_norm": 1.626009334199584, "learning_rate": 7.076548090119484e-06, "loss": 0.1595, "step": 9773 }, { "epoch": 0.8338167548199966, "grad_norm": 1.5354849970540183, "learning_rate": 7.0694643542558394e-06, "loss": 0.1297, "step": 9774 }, { "epoch": 0.8339020644941136, "grad_norm": 2.4165216161233496, "learning_rate": 7.062383895910463e-06, "loss": 0.1436, "step": 9775 }, { "epoch": 0.8339873741682307, "grad_norm": 1.8087784279445906, "learning_rate": 7.055306715623888e-06, "loss": 0.1365, "step": 9776 }, { "epoch": 0.8340726838423477, "grad_norm": 2.024312187461684, "learning_rate": 7.048232813936467e-06, "loss": 0.2704, "step": 9777 }, { "epoch": 0.8341579935164648, "grad_norm": 1.3297137462925874, "learning_rate": 7.0411621913882005e-06, "loss": 0.1116, "step": 9778 }, { "epoch": 0.8342433031905818, "grad_norm": 2.2454278904326483, "learning_rate": 7.034094848518924e-06, "loss": 0.2319, "step": 9779 }, { "epoch": 0.8343286128646988, "grad_norm": 1.6902808835159449, "learning_rate": 7.0270307858681935e-06, "loss": 0.2591, "step": 9780 }, { "epoch": 0.8344139225388159, "grad_norm": 2.0605813552358008, "learning_rate": 7.019970003975301e-06, "loss": 0.1653, "step": 9781 }, { "epoch": 0.834499232212933, "grad_norm": 1.669109017260114, "learning_rate": 7.012912503379287e-06, "loss": 0.1579, "step": 9782 }, { "epoch": 0.83458454188705, "grad_norm": 2.274690682479324, "learning_rate": 7.005858284618999e-06, "loss": 0.2369, "step": 9783 }, { "epoch": 0.834669851561167, "grad_norm": 2.0729927419635708, "learning_rate": 6.998807348232933e-06, "loss": 0.1984, "step": 9784 }, { "epoch": 0.834755161235284, "grad_norm": 1.6214021320480045, "learning_rate": 6.9917596947594364e-06, "loss": 0.1664, "step": 9785 }, { "epoch": 0.8348404709094012, "grad_norm": 1.7022896914326127, "learning_rate": 6.984715324736535e-06, "loss": 0.1752, "step": 9786 }, { "epoch": 0.8349257805835182, "grad_norm": 1.428134679448487, "learning_rate": 6.977674238702036e-06, "loss": 0.1823, "step": 9787 }, { "epoch": 0.8350110902576352, "grad_norm": 2.0672748665429888, "learning_rate": 6.970636437193489e-06, "loss": 0.2455, "step": 9788 }, { "epoch": 0.8350963999317522, "grad_norm": 2.120828910578205, "learning_rate": 6.963601920748198e-06, "loss": 0.1167, "step": 9789 }, { "epoch": 0.8351817096058693, "grad_norm": 1.4628062001047648, "learning_rate": 6.956570689903197e-06, "loss": 0.1828, "step": 9790 }, { "epoch": 0.8352670192799864, "grad_norm": 1.7097774387550222, "learning_rate": 6.9495427451952865e-06, "loss": 0.2655, "step": 9791 }, { "epoch": 0.8353523289541034, "grad_norm": 1.9366575686142036, "learning_rate": 6.942518087161026e-06, "loss": 0.1981, "step": 9792 }, { "epoch": 0.8354376386282204, "grad_norm": 1.7064787016242804, "learning_rate": 6.9354967163367035e-06, "loss": 0.2087, "step": 9793 }, { "epoch": 0.8355229483023375, "grad_norm": 1.8882331535833332, "learning_rate": 6.928478633258357e-06, "loss": 0.1844, "step": 9794 }, { "epoch": 0.8356082579764545, "grad_norm": 1.9430415368030776, "learning_rate": 6.921463838461789e-06, "loss": 0.1972, "step": 9795 }, { "epoch": 0.8356935676505716, "grad_norm": 2.3273076408379145, "learning_rate": 6.91445233248254e-06, "loss": 0.1776, "step": 9796 }, { "epoch": 0.8357788773246886, "grad_norm": 1.7222920765946843, "learning_rate": 6.907444115855899e-06, "loss": 0.2091, "step": 9797 }, { "epoch": 0.8358641869988057, "grad_norm": 2.090594290606152, "learning_rate": 6.900439189116892e-06, "loss": 0.1639, "step": 9798 }, { "epoch": 0.8359494966729227, "grad_norm": 1.8505360780305067, "learning_rate": 6.893437552800342e-06, "loss": 0.1293, "step": 9799 }, { "epoch": 0.8360348063470397, "grad_norm": 1.8073389961428818, "learning_rate": 6.886439207440748e-06, "loss": 0.1571, "step": 9800 }, { "epoch": 0.8361201160211568, "grad_norm": 1.834882067598855, "learning_rate": 6.879444153572428e-06, "loss": 0.12, "step": 9801 }, { "epoch": 0.8362054256952739, "grad_norm": 1.3911121672347682, "learning_rate": 6.8724523917294e-06, "loss": 0.1682, "step": 9802 }, { "epoch": 0.8362907353693909, "grad_norm": 1.5100511130467313, "learning_rate": 6.865463922445459e-06, "loss": 0.1837, "step": 9803 }, { "epoch": 0.8363760450435079, "grad_norm": 1.8841423941253304, "learning_rate": 6.858478746254115e-06, "loss": 0.2309, "step": 9804 }, { "epoch": 0.836461354717625, "grad_norm": 1.421596122473225, "learning_rate": 6.851496863688678e-06, "loss": 0.1409, "step": 9805 }, { "epoch": 0.8365466643917421, "grad_norm": 2.1456922713150535, "learning_rate": 6.844518275282163e-06, "loss": 0.2358, "step": 9806 }, { "epoch": 0.8366319740658591, "grad_norm": 1.8495928754543904, "learning_rate": 6.837542981567346e-06, "loss": 0.2009, "step": 9807 }, { "epoch": 0.8367172837399761, "grad_norm": 1.30649601203085, "learning_rate": 6.830570983076761e-06, "loss": 0.1437, "step": 9808 }, { "epoch": 0.8368025934140931, "grad_norm": 1.36998327774089, "learning_rate": 6.823602280342683e-06, "loss": 0.2153, "step": 9809 }, { "epoch": 0.8368879030882101, "grad_norm": 1.450499100125654, "learning_rate": 6.816636873897125e-06, "loss": 0.1478, "step": 9810 }, { "epoch": 0.8369732127623273, "grad_norm": 1.4456633270895918, "learning_rate": 6.809674764271862e-06, "loss": 0.2003, "step": 9811 }, { "epoch": 0.8370585224364443, "grad_norm": 2.5547127912010232, "learning_rate": 6.802715951998434e-06, "loss": 0.2643, "step": 9812 }, { "epoch": 0.8371438321105613, "grad_norm": 1.9176150615442804, "learning_rate": 6.795760437608073e-06, "loss": 0.1609, "step": 9813 }, { "epoch": 0.8372291417846783, "grad_norm": 1.782962310675086, "learning_rate": 6.788808221631826e-06, "loss": 0.1598, "step": 9814 }, { "epoch": 0.8373144514587955, "grad_norm": 1.6195753224950624, "learning_rate": 6.781859304600446e-06, "loss": 0.1556, "step": 9815 }, { "epoch": 0.8373997611329125, "grad_norm": 2.0948625711600077, "learning_rate": 6.774913687044448e-06, "loss": 0.2585, "step": 9816 }, { "epoch": 0.8374850708070295, "grad_norm": 1.6734122223641255, "learning_rate": 6.767971369494097e-06, "loss": 0.1944, "step": 9817 }, { "epoch": 0.8375703804811465, "grad_norm": 2.3494272264509313, "learning_rate": 6.761032352479391e-06, "loss": 0.2088, "step": 9818 }, { "epoch": 0.8376556901552636, "grad_norm": 1.6334503242218417, "learning_rate": 6.754096636530094e-06, "loss": 0.1459, "step": 9819 }, { "epoch": 0.8377409998293807, "grad_norm": 2.204462713925137, "learning_rate": 6.7471642221757005e-06, "loss": 0.1619, "step": 9820 }, { "epoch": 0.8378263095034977, "grad_norm": 2.0287383763984517, "learning_rate": 6.740235109945487e-06, "loss": 0.0874, "step": 9821 }, { "epoch": 0.8379116191776147, "grad_norm": 2.0737229381448796, "learning_rate": 6.733309300368435e-06, "loss": 0.1522, "step": 9822 }, { "epoch": 0.8379969288517318, "grad_norm": 1.908679352197963, "learning_rate": 6.726386793973305e-06, "loss": 0.2011, "step": 9823 }, { "epoch": 0.8380822385258488, "grad_norm": 1.7108030919621238, "learning_rate": 6.719467591288569e-06, "loss": 0.1784, "step": 9824 }, { "epoch": 0.8381675481999659, "grad_norm": 1.6523971368628663, "learning_rate": 6.712551692842517e-06, "loss": 0.2034, "step": 9825 }, { "epoch": 0.8382528578740829, "grad_norm": 2.064621037030838, "learning_rate": 6.705639099163091e-06, "loss": 0.1941, "step": 9826 }, { "epoch": 0.8383381675482, "grad_norm": 2.676451839869598, "learning_rate": 6.698729810778065e-06, "loss": 0.1251, "step": 9827 }, { "epoch": 0.838423477222317, "grad_norm": 2.039075361773971, "learning_rate": 6.691823828214916e-06, "loss": 0.1999, "step": 9828 }, { "epoch": 0.838508786896434, "grad_norm": 2.1345769828519736, "learning_rate": 6.684921152000878e-06, "loss": 0.2076, "step": 9829 }, { "epoch": 0.8385940965705511, "grad_norm": 1.9973975773836468, "learning_rate": 6.678021782662935e-06, "loss": 0.166, "step": 9830 }, { "epoch": 0.8386794062446682, "grad_norm": 2.92444501649116, "learning_rate": 6.671125720727817e-06, "loss": 0.2432, "step": 9831 }, { "epoch": 0.8387647159187852, "grad_norm": 1.6590341882522845, "learning_rate": 6.664232966721995e-06, "loss": 0.1378, "step": 9832 }, { "epoch": 0.8388500255929022, "grad_norm": 1.364270307829096, "learning_rate": 6.657343521171694e-06, "loss": 0.1942, "step": 9833 }, { "epoch": 0.8389353352670192, "grad_norm": 1.2491260811487928, "learning_rate": 6.650457384602915e-06, "loss": 0.1283, "step": 9834 }, { "epoch": 0.8390206449411364, "grad_norm": 1.7084958844267877, "learning_rate": 6.643574557541332e-06, "loss": 0.2287, "step": 9835 }, { "epoch": 0.8391059546152534, "grad_norm": 1.7769988343638052, "learning_rate": 6.6366950405124415e-06, "loss": 0.1989, "step": 9836 }, { "epoch": 0.8391912642893704, "grad_norm": 1.6100743079955833, "learning_rate": 6.629818834041457e-06, "loss": 0.131, "step": 9837 }, { "epoch": 0.8392765739634874, "grad_norm": 1.628403254360994, "learning_rate": 6.622945938653341e-06, "loss": 0.1774, "step": 9838 }, { "epoch": 0.8393618836376046, "grad_norm": 2.5138057029922263, "learning_rate": 6.616076354872791e-06, "loss": 0.2023, "step": 9839 }, { "epoch": 0.8394471933117216, "grad_norm": 1.4900922195007185, "learning_rate": 6.60921008322426e-06, "loss": 0.1729, "step": 9840 }, { "epoch": 0.8395325029858386, "grad_norm": 1.7913879872724987, "learning_rate": 6.6023471242319775e-06, "loss": 0.1616, "step": 9841 }, { "epoch": 0.8396178126599556, "grad_norm": 2.434598320449103, "learning_rate": 6.595487478419859e-06, "loss": 0.1849, "step": 9842 }, { "epoch": 0.8397031223340727, "grad_norm": 1.479854728167428, "learning_rate": 6.588631146311635e-06, "loss": 0.1386, "step": 9843 }, { "epoch": 0.8397884320081898, "grad_norm": 1.987963458072689, "learning_rate": 6.581778128430732e-06, "loss": 0.2208, "step": 9844 }, { "epoch": 0.8398737416823068, "grad_norm": 1.5621708332476816, "learning_rate": 6.574928425300347e-06, "loss": 0.1712, "step": 9845 }, { "epoch": 0.8399590513564238, "grad_norm": 1.491256903019266, "learning_rate": 6.568082037443401e-06, "loss": 0.1875, "step": 9846 }, { "epoch": 0.8400443610305408, "grad_norm": 2.107899693862297, "learning_rate": 6.5612389653826215e-06, "loss": 0.2178, "step": 9847 }, { "epoch": 0.8401296707046579, "grad_norm": 1.3467798894238956, "learning_rate": 6.5543992096403885e-06, "loss": 0.203, "step": 9848 }, { "epoch": 0.840214980378775, "grad_norm": 1.6702584981440922, "learning_rate": 6.5475627707389135e-06, "loss": 0.1936, "step": 9849 }, { "epoch": 0.840300290052892, "grad_norm": 2.3797816277345962, "learning_rate": 6.540729649200123e-06, "loss": 0.215, "step": 9850 }, { "epoch": 0.840385599727009, "grad_norm": 1.6400686138035196, "learning_rate": 6.533899845545677e-06, "loss": 0.1937, "step": 9851 }, { "epoch": 0.8404709094011261, "grad_norm": 1.463174260225604, "learning_rate": 6.527073360296998e-06, "loss": 0.1161, "step": 9852 }, { "epoch": 0.8405562190752431, "grad_norm": 1.6709226944286908, "learning_rate": 6.520250193975242e-06, "loss": 0.1671, "step": 9853 }, { "epoch": 0.8406415287493602, "grad_norm": 1.7310830569425515, "learning_rate": 6.513430347101357e-06, "loss": 0.1211, "step": 9854 }, { "epoch": 0.8407268384234772, "grad_norm": 1.5810898228511827, "learning_rate": 6.506613820195956e-06, "loss": 0.1897, "step": 9855 }, { "epoch": 0.8408121480975943, "grad_norm": 1.2117036426224672, "learning_rate": 6.499800613779472e-06, "loss": 0.2077, "step": 9856 }, { "epoch": 0.8408974577717113, "grad_norm": 1.8441402862748908, "learning_rate": 6.492990728372056e-06, "loss": 0.1262, "step": 9857 }, { "epoch": 0.8409827674458283, "grad_norm": 1.4980414637899335, "learning_rate": 6.486184164493603e-06, "loss": 0.161, "step": 9858 }, { "epoch": 0.8410680771199454, "grad_norm": 1.7374066499291458, "learning_rate": 6.479380922663752e-06, "loss": 0.1917, "step": 9859 }, { "epoch": 0.8411533867940625, "grad_norm": 1.8458501202212494, "learning_rate": 6.4725810034019064e-06, "loss": 0.2148, "step": 9860 }, { "epoch": 0.8412386964681795, "grad_norm": 1.4917918374734924, "learning_rate": 6.465784407227194e-06, "loss": 0.1546, "step": 9861 }, { "epoch": 0.8413240061422965, "grad_norm": 1.9208317848359304, "learning_rate": 6.458991134658487e-06, "loss": 0.2598, "step": 9862 }, { "epoch": 0.8414093158164135, "grad_norm": 1.7318806625378265, "learning_rate": 6.4522011862144515e-06, "loss": 0.1532, "step": 9863 }, { "epoch": 0.8414946254905307, "grad_norm": 2.099407012413737, "learning_rate": 6.445414562413427e-06, "loss": 0.1362, "step": 9864 }, { "epoch": 0.8415799351646477, "grad_norm": 1.4785877043918827, "learning_rate": 6.438631263773559e-06, "loss": 0.1796, "step": 9865 }, { "epoch": 0.8416652448387647, "grad_norm": 1.874439727998619, "learning_rate": 6.431851290812696e-06, "loss": 0.1905, "step": 9866 }, { "epoch": 0.8417505545128817, "grad_norm": 1.5606454142563284, "learning_rate": 6.42507464404849e-06, "loss": 0.1858, "step": 9867 }, { "epoch": 0.8418358641869989, "grad_norm": 1.6349910822167266, "learning_rate": 6.41830132399826e-06, "loss": 0.1419, "step": 9868 }, { "epoch": 0.8419211738611159, "grad_norm": 1.7125768602484097, "learning_rate": 6.411531331179138e-06, "loss": 0.1369, "step": 9869 }, { "epoch": 0.8420064835352329, "grad_norm": 2.089616367412393, "learning_rate": 6.404764666107971e-06, "loss": 0.2085, "step": 9870 }, { "epoch": 0.8420917932093499, "grad_norm": 1.8551253151529319, "learning_rate": 6.398001329301356e-06, "loss": 0.1753, "step": 9871 }, { "epoch": 0.842177102883467, "grad_norm": 2.409138518725062, "learning_rate": 6.391241321275637e-06, "loss": 0.2024, "step": 9872 }, { "epoch": 0.842262412557584, "grad_norm": 1.9407644765368752, "learning_rate": 6.384484642546912e-06, "loss": 0.141, "step": 9873 }, { "epoch": 0.8423477222317011, "grad_norm": 1.5837915493664192, "learning_rate": 6.377731293631006e-06, "loss": 0.1231, "step": 9874 }, { "epoch": 0.8424330319058181, "grad_norm": 1.9673278152563376, "learning_rate": 6.370981275043497e-06, "loss": 0.193, "step": 9875 }, { "epoch": 0.8425183415799352, "grad_norm": 1.9803921837085565, "learning_rate": 6.36423458729975e-06, "loss": 0.1422, "step": 9876 }, { "epoch": 0.8426036512540522, "grad_norm": 1.511058337834362, "learning_rate": 6.357491230914786e-06, "loss": 0.2077, "step": 9877 }, { "epoch": 0.8426889609281693, "grad_norm": 1.9290702543067284, "learning_rate": 6.350751206403466e-06, "loss": 0.1947, "step": 9878 }, { "epoch": 0.8427742706022863, "grad_norm": 1.5467877604226952, "learning_rate": 6.344014514280333e-06, "loss": 0.1319, "step": 9879 }, { "epoch": 0.8428595802764034, "grad_norm": 1.9250071364431498, "learning_rate": 6.33728115505971e-06, "loss": 0.2177, "step": 9880 }, { "epoch": 0.8429448899505204, "grad_norm": 2.4498553992436714, "learning_rate": 6.3305511292556405e-06, "loss": 0.2187, "step": 9881 }, { "epoch": 0.8430301996246374, "grad_norm": 1.8964799749321382, "learning_rate": 6.323824437381931e-06, "loss": 0.1923, "step": 9882 }, { "epoch": 0.8431155092987545, "grad_norm": 2.2886091701279514, "learning_rate": 6.317101079952148e-06, "loss": 0.1859, "step": 9883 }, { "epoch": 0.8432008189728715, "grad_norm": 1.6652564122251658, "learning_rate": 6.310381057479542e-06, "loss": 0.1709, "step": 9884 }, { "epoch": 0.8432861286469886, "grad_norm": 1.6461299999067562, "learning_rate": 6.303664370477192e-06, "loss": 0.1801, "step": 9885 }, { "epoch": 0.8433714383211056, "grad_norm": 1.9063834471972847, "learning_rate": 6.296951019457864e-06, "loss": 0.1389, "step": 9886 }, { "epoch": 0.8434567479952226, "grad_norm": 1.5285452485229956, "learning_rate": 6.290241004934083e-06, "loss": 0.1328, "step": 9887 }, { "epoch": 0.8435420576693397, "grad_norm": 1.9232474288179444, "learning_rate": 6.283534327418122e-06, "loss": 0.1902, "step": 9888 }, { "epoch": 0.8436273673434568, "grad_norm": 1.3759910306582765, "learning_rate": 6.276830987422028e-06, "loss": 0.1398, "step": 9889 }, { "epoch": 0.8437126770175738, "grad_norm": 2.316738677335029, "learning_rate": 6.270130985457523e-06, "loss": 0.203, "step": 9890 }, { "epoch": 0.8437979866916908, "grad_norm": 2.37603817384411, "learning_rate": 6.2634343220361436e-06, "loss": 0.2045, "step": 9891 }, { "epoch": 0.8438832963658078, "grad_norm": 1.9925369853723625, "learning_rate": 6.256740997669142e-06, "loss": 0.1987, "step": 9892 }, { "epoch": 0.843968606039925, "grad_norm": 1.7227424721105657, "learning_rate": 6.2500510128675085e-06, "loss": 0.1517, "step": 9893 }, { "epoch": 0.844053915714042, "grad_norm": 1.7142342633519079, "learning_rate": 6.243364368141996e-06, "loss": 0.1817, "step": 9894 }, { "epoch": 0.844139225388159, "grad_norm": 1.5026604582910195, "learning_rate": 6.2366810640030805e-06, "loss": 0.2267, "step": 9895 }, { "epoch": 0.844224535062276, "grad_norm": 1.7668388042918806, "learning_rate": 6.230001100961031e-06, "loss": 0.1825, "step": 9896 }, { "epoch": 0.8443098447363931, "grad_norm": 2.447575311330086, "learning_rate": 6.223324479525778e-06, "loss": 0.1755, "step": 9897 }, { "epoch": 0.8443951544105102, "grad_norm": 1.96906302249419, "learning_rate": 6.216651200207085e-06, "loss": 0.207, "step": 9898 }, { "epoch": 0.8444804640846272, "grad_norm": 1.7160495698256018, "learning_rate": 6.209981263514414e-06, "loss": 0.1626, "step": 9899 }, { "epoch": 0.8445657737587442, "grad_norm": 2.031798303400986, "learning_rate": 6.203314669956967e-06, "loss": 0.2415, "step": 9900 }, { "epoch": 0.8446510834328613, "grad_norm": 2.0017426766292217, "learning_rate": 6.1966514200437084e-06, "loss": 0.1783, "step": 9901 }, { "epoch": 0.8447363931069783, "grad_norm": 1.3574477378522023, "learning_rate": 6.18999151428335e-06, "loss": 0.1229, "step": 9902 }, { "epoch": 0.8448217027810954, "grad_norm": 1.6686308571791777, "learning_rate": 6.183334953184328e-06, "loss": 0.1463, "step": 9903 }, { "epoch": 0.8449070124552124, "grad_norm": 1.835318473183947, "learning_rate": 6.1766817372548305e-06, "loss": 0.1464, "step": 9904 }, { "epoch": 0.8449923221293295, "grad_norm": 1.680496446850331, "learning_rate": 6.170031867002829e-06, "loss": 0.1662, "step": 9905 }, { "epoch": 0.8450776318034465, "grad_norm": 1.5782744081985436, "learning_rate": 6.163385342935963e-06, "loss": 0.1032, "step": 9906 }, { "epoch": 0.8451629414775635, "grad_norm": 1.3237278571361641, "learning_rate": 6.1567421655616856e-06, "loss": 0.1555, "step": 9907 }, { "epoch": 0.8452482511516806, "grad_norm": 1.8094060074911513, "learning_rate": 6.150102335387159e-06, "loss": 0.1093, "step": 9908 }, { "epoch": 0.8453335608257977, "grad_norm": 1.960928989103169, "learning_rate": 6.143465852919306e-06, "loss": 0.2021, "step": 9909 }, { "epoch": 0.8454188704999147, "grad_norm": 1.8135702985080604, "learning_rate": 6.136832718664765e-06, "loss": 0.1543, "step": 9910 }, { "epoch": 0.8455041801740317, "grad_norm": 1.5067299232594764, "learning_rate": 6.130202933129975e-06, "loss": 0.1435, "step": 9911 }, { "epoch": 0.8455894898481487, "grad_norm": 1.6853300907766762, "learning_rate": 6.12357649682106e-06, "loss": 0.2028, "step": 9912 }, { "epoch": 0.8456747995222659, "grad_norm": 1.5828594535950453, "learning_rate": 6.116953410243925e-06, "loss": 0.1575, "step": 9913 }, { "epoch": 0.8457601091963829, "grad_norm": 1.864385953149134, "learning_rate": 6.1103336739042e-06, "loss": 0.194, "step": 9914 }, { "epoch": 0.8458454188704999, "grad_norm": 1.6321770558615594, "learning_rate": 6.103717288307275e-06, "loss": 0.1534, "step": 9915 }, { "epoch": 0.8459307285446169, "grad_norm": 1.747634310492982, "learning_rate": 6.097104253958263e-06, "loss": 0.1762, "step": 9916 }, { "epoch": 0.8460160382187341, "grad_norm": 1.4878392802317928, "learning_rate": 6.090494571362037e-06, "loss": 0.1736, "step": 9917 }, { "epoch": 0.8461013478928511, "grad_norm": 1.3120086749389004, "learning_rate": 6.083888241023234e-06, "loss": 0.1054, "step": 9918 }, { "epoch": 0.8461866575669681, "grad_norm": 1.6909668948668162, "learning_rate": 6.077285263446175e-06, "loss": 0.2116, "step": 9919 }, { "epoch": 0.8462719672410851, "grad_norm": 1.9233397817696423, "learning_rate": 6.070685639134988e-06, "loss": 0.1645, "step": 9920 }, { "epoch": 0.8463572769152022, "grad_norm": 1.861539014175367, "learning_rate": 6.064089368593517e-06, "loss": 0.1619, "step": 9921 }, { "epoch": 0.8464425865893193, "grad_norm": 2.1047295173240124, "learning_rate": 6.057496452325346e-06, "loss": 0.1564, "step": 9922 }, { "epoch": 0.8465278962634363, "grad_norm": 1.712675979371154, "learning_rate": 6.0509068908338075e-06, "loss": 0.1613, "step": 9923 }, { "epoch": 0.8466132059375533, "grad_norm": 2.005840238277446, "learning_rate": 6.044320684621985e-06, "loss": 0.1387, "step": 9924 }, { "epoch": 0.8466985156116703, "grad_norm": 1.7025477979899148, "learning_rate": 6.037737834192697e-06, "loss": 0.1592, "step": 9925 }, { "epoch": 0.8467838252857874, "grad_norm": 1.6385113861461316, "learning_rate": 6.031158340048504e-06, "loss": 0.1926, "step": 9926 }, { "epoch": 0.8468691349599045, "grad_norm": 1.6408640369482914, "learning_rate": 6.0245822026917256e-06, "loss": 0.179, "step": 9927 }, { "epoch": 0.8469544446340215, "grad_norm": 1.3421644570767355, "learning_rate": 6.018009422624415e-06, "loss": 0.1204, "step": 9928 }, { "epoch": 0.8470397543081385, "grad_norm": 1.8415327803102044, "learning_rate": 6.011440000348362e-06, "loss": 0.1803, "step": 9929 }, { "epoch": 0.8471250639822556, "grad_norm": 1.7217755823806287, "learning_rate": 6.004873936365102e-06, "loss": 0.1386, "step": 9930 }, { "epoch": 0.8472103736563726, "grad_norm": 2.3372792756828655, "learning_rate": 5.998311231175946e-06, "loss": 0.1815, "step": 9931 }, { "epoch": 0.8472956833304897, "grad_norm": 1.675146876485848, "learning_rate": 5.991751885281882e-06, "loss": 0.1428, "step": 9932 }, { "epoch": 0.8473809930046067, "grad_norm": 1.5640330613878524, "learning_rate": 5.985195899183715e-06, "loss": 0.2987, "step": 9933 }, { "epoch": 0.8474663026787238, "grad_norm": 1.987948646530711, "learning_rate": 5.978643273381945e-06, "loss": 0.1851, "step": 9934 }, { "epoch": 0.8475516123528408, "grad_norm": 2.4247706668601126, "learning_rate": 5.9720940083768325e-06, "loss": 0.2393, "step": 9935 }, { "epoch": 0.8476369220269578, "grad_norm": 1.798336330454924, "learning_rate": 5.965548104668378e-06, "loss": 0.1409, "step": 9936 }, { "epoch": 0.8477222317010749, "grad_norm": 1.6561968812881587, "learning_rate": 5.9590055627563256e-06, "loss": 0.2014, "step": 9937 }, { "epoch": 0.847807541375192, "grad_norm": 2.5166784888320364, "learning_rate": 5.95246638314017e-06, "loss": 0.1801, "step": 9938 }, { "epoch": 0.847892851049309, "grad_norm": 1.6351395228405659, "learning_rate": 5.945930566319119e-06, "loss": 0.2038, "step": 9939 }, { "epoch": 0.847978160723426, "grad_norm": 1.4910392137739767, "learning_rate": 5.939398112792183e-06, "loss": 0.1414, "step": 9940 }, { "epoch": 0.848063470397543, "grad_norm": 1.840682889225557, "learning_rate": 5.932869023058063e-06, "loss": 0.1481, "step": 9941 }, { "epoch": 0.8481487800716602, "grad_norm": 2.182903146465868, "learning_rate": 5.926343297615216e-06, "loss": 0.1655, "step": 9942 }, { "epoch": 0.8482340897457772, "grad_norm": 1.5596555377220465, "learning_rate": 5.919820936961856e-06, "loss": 0.1796, "step": 9943 }, { "epoch": 0.8483193994198942, "grad_norm": 1.475967044958645, "learning_rate": 5.913301941595922e-06, "loss": 0.1147, "step": 9944 }, { "epoch": 0.8484047090940112, "grad_norm": 2.388203510270791, "learning_rate": 5.906786312015111e-06, "loss": 0.1817, "step": 9945 }, { "epoch": 0.8484900187681284, "grad_norm": 1.6886080177193592, "learning_rate": 5.900274048716841e-06, "loss": 0.1923, "step": 9946 }, { "epoch": 0.8485753284422454, "grad_norm": 1.4922624339516986, "learning_rate": 5.893765152198327e-06, "loss": 0.1768, "step": 9947 }, { "epoch": 0.8486606381163624, "grad_norm": 2.1126306346702832, "learning_rate": 5.887259622956437e-06, "loss": 0.2657, "step": 9948 }, { "epoch": 0.8487459477904794, "grad_norm": 1.4008758427881283, "learning_rate": 5.8807574614878734e-06, "loss": 0.1354, "step": 9949 }, { "epoch": 0.8488312574645965, "grad_norm": 1.6012824581290055, "learning_rate": 5.874258668289029e-06, "loss": 0.1995, "step": 9950 }, { "epoch": 0.8489165671387136, "grad_norm": 2.5344542014732157, "learning_rate": 5.86776324385605e-06, "loss": 0.216, "step": 9951 }, { "epoch": 0.8490018768128306, "grad_norm": 2.109586351897177, "learning_rate": 5.8612711886848196e-06, "loss": 0.1783, "step": 9952 }, { "epoch": 0.8490871864869476, "grad_norm": 1.7497939260947895, "learning_rate": 5.8547825032710006e-06, "loss": 0.1965, "step": 9953 }, { "epoch": 0.8491724961610647, "grad_norm": 1.7256515654522788, "learning_rate": 5.84829718810993e-06, "loss": 0.2123, "step": 9954 }, { "epoch": 0.8492578058351817, "grad_norm": 1.6009551594713511, "learning_rate": 5.8418152436967575e-06, "loss": 0.2218, "step": 9955 }, { "epoch": 0.8493431155092988, "grad_norm": 2.1301290657448333, "learning_rate": 5.835336670526331e-06, "loss": 0.2127, "step": 9956 }, { "epoch": 0.8494284251834158, "grad_norm": 1.6441721037001247, "learning_rate": 5.828861469093266e-06, "loss": 0.1537, "step": 9957 }, { "epoch": 0.8495137348575329, "grad_norm": 1.8006664578803595, "learning_rate": 5.822389639891895e-06, "loss": 0.1478, "step": 9958 }, { "epoch": 0.8495990445316499, "grad_norm": 1.7546550961743912, "learning_rate": 5.8159211834163116e-06, "loss": 0.2498, "step": 9959 }, { "epoch": 0.8496843542057669, "grad_norm": 1.924483227413644, "learning_rate": 5.809456100160371e-06, "loss": 0.1741, "step": 9960 }, { "epoch": 0.849769663879884, "grad_norm": 1.540927280074179, "learning_rate": 5.802994390617605e-06, "loss": 0.2267, "step": 9961 }, { "epoch": 0.849854973554001, "grad_norm": 2.197605442472814, "learning_rate": 5.796536055281371e-06, "loss": 0.2301, "step": 9962 }, { "epoch": 0.8499402832281181, "grad_norm": 1.7717276241094715, "learning_rate": 5.790081094644706e-06, "loss": 0.2202, "step": 9963 }, { "epoch": 0.8500255929022351, "grad_norm": 2.184270600319457, "learning_rate": 5.783629509200423e-06, "loss": 0.1614, "step": 9964 }, { "epoch": 0.8501109025763521, "grad_norm": 1.6462157404759177, "learning_rate": 5.777181299441054e-06, "loss": 0.2439, "step": 9965 }, { "epoch": 0.8501962122504692, "grad_norm": 1.63454407497575, "learning_rate": 5.7707364658589e-06, "loss": 0.1991, "step": 9966 }, { "epoch": 0.8502815219245863, "grad_norm": 1.6087165152051948, "learning_rate": 5.7642950089459805e-06, "loss": 0.187, "step": 9967 }, { "epoch": 0.8503668315987033, "grad_norm": 1.4271271506309426, "learning_rate": 5.757856929194061e-06, "loss": 0.2182, "step": 9968 }, { "epoch": 0.8504521412728203, "grad_norm": 1.6044692294985257, "learning_rate": 5.751422227094677e-06, "loss": 0.177, "step": 9969 }, { "epoch": 0.8505374509469373, "grad_norm": 1.5041859552444676, "learning_rate": 5.744990903139053e-06, "loss": 0.196, "step": 9970 }, { "epoch": 0.8506227606210545, "grad_norm": 1.7588690401200906, "learning_rate": 5.738562957818217e-06, "loss": 0.1868, "step": 9971 }, { "epoch": 0.8507080702951715, "grad_norm": 1.7765625966255745, "learning_rate": 5.7321383916228764e-06, "loss": 0.1265, "step": 9972 }, { "epoch": 0.8507933799692885, "grad_norm": 1.9605513720661276, "learning_rate": 5.725717205043552e-06, "loss": 0.1562, "step": 9973 }, { "epoch": 0.8508786896434055, "grad_norm": 1.5504469042669762, "learning_rate": 5.719299398570427e-06, "loss": 0.1335, "step": 9974 }, { "epoch": 0.8509639993175226, "grad_norm": 1.799466462005358, "learning_rate": 5.7128849726935e-06, "loss": 0.1603, "step": 9975 }, { "epoch": 0.8510493089916397, "grad_norm": 2.071910313554163, "learning_rate": 5.706473927902456e-06, "loss": 0.1804, "step": 9976 }, { "epoch": 0.8511346186657567, "grad_norm": 1.5039287614995358, "learning_rate": 5.700066264686759e-06, "loss": 0.1745, "step": 9977 }, { "epoch": 0.8512199283398737, "grad_norm": 1.4473283885365877, "learning_rate": 5.693661983535587e-06, "loss": 0.1643, "step": 9978 }, { "epoch": 0.8513052380139908, "grad_norm": 1.7863348945107353, "learning_rate": 5.687261084937884e-06, "loss": 0.1816, "step": 9979 }, { "epoch": 0.8513905476881078, "grad_norm": 2.307457762194208, "learning_rate": 5.680863569382316e-06, "loss": 0.2016, "step": 9980 }, { "epoch": 0.8514758573622249, "grad_norm": 1.945861738870292, "learning_rate": 5.674469437357293e-06, "loss": 0.2569, "step": 9981 }, { "epoch": 0.8515611670363419, "grad_norm": 2.8052513809574657, "learning_rate": 5.668078689351009e-06, "loss": 0.1654, "step": 9982 }, { "epoch": 0.851646476710459, "grad_norm": 1.5331379676142909, "learning_rate": 5.66169132585131e-06, "loss": 0.1614, "step": 9983 }, { "epoch": 0.851731786384576, "grad_norm": 1.3693663755186196, "learning_rate": 5.655307347345879e-06, "loss": 0.1965, "step": 9984 }, { "epoch": 0.851817096058693, "grad_norm": 2.509207554381334, "learning_rate": 5.648926754322081e-06, "loss": 0.1975, "step": 9985 }, { "epoch": 0.8519024057328101, "grad_norm": 1.4866694335455586, "learning_rate": 5.642549547267045e-06, "loss": 0.1924, "step": 9986 }, { "epoch": 0.8519877154069272, "grad_norm": 1.8499814780055563, "learning_rate": 5.636175726667636e-06, "loss": 0.1914, "step": 9987 }, { "epoch": 0.8520730250810442, "grad_norm": 2.045885618620342, "learning_rate": 5.6298052930104536e-06, "loss": 0.1732, "step": 9988 }, { "epoch": 0.8521583347551612, "grad_norm": 1.8724331133601457, "learning_rate": 5.62343824678187e-06, "loss": 0.1839, "step": 9989 }, { "epoch": 0.8522436444292782, "grad_norm": 2.0916183780197617, "learning_rate": 5.617074588467941e-06, "loss": 0.1295, "step": 9990 }, { "epoch": 0.8523289541033954, "grad_norm": 1.286694380176783, "learning_rate": 5.610714318554527e-06, "loss": 0.1473, "step": 9991 }, { "epoch": 0.8524142637775124, "grad_norm": 2.0478056910247515, "learning_rate": 5.604357437527191e-06, "loss": 0.2612, "step": 9992 }, { "epoch": 0.8524995734516294, "grad_norm": 1.972624221390668, "learning_rate": 5.5980039458712494e-06, "loss": 0.1874, "step": 9993 }, { "epoch": 0.8525848831257464, "grad_norm": 1.4541796067034656, "learning_rate": 5.591653844071743e-06, "loss": 0.1413, "step": 9994 }, { "epoch": 0.8526701927998636, "grad_norm": 1.718931986104035, "learning_rate": 5.585307132613493e-06, "loss": 0.184, "step": 9995 }, { "epoch": 0.8527555024739806, "grad_norm": 2.893425651811962, "learning_rate": 5.578963811981014e-06, "loss": 0.1991, "step": 9996 }, { "epoch": 0.8528408121480976, "grad_norm": 1.6853802400793425, "learning_rate": 5.572623882658595e-06, "loss": 0.1775, "step": 9997 }, { "epoch": 0.8529261218222146, "grad_norm": 1.935130978274125, "learning_rate": 5.566287345130267e-06, "loss": 0.2265, "step": 9998 }, { "epoch": 0.8530114314963316, "grad_norm": 1.8030793923042858, "learning_rate": 5.559954199879769e-06, "loss": 0.1644, "step": 9999 }, { "epoch": 0.8530967411704488, "grad_norm": 1.8005580143119055, "learning_rate": 5.553624447390621e-06, "loss": 0.1726, "step": 10000 }, { "epoch": 0.8531820508445658, "grad_norm": 2.1740866179209375, "learning_rate": 5.547298088146047e-06, "loss": 0.1773, "step": 10001 }, { "epoch": 0.8532673605186828, "grad_norm": 2.4048210954643143, "learning_rate": 5.540975122629061e-06, "loss": 0.1539, "step": 10002 }, { "epoch": 0.8533526701927998, "grad_norm": 1.6736925887046836, "learning_rate": 5.5346555513223485e-06, "loss": 0.1848, "step": 10003 }, { "epoch": 0.8534379798669169, "grad_norm": 1.7702291767009188, "learning_rate": 5.5283393747084075e-06, "loss": 0.2065, "step": 10004 }, { "epoch": 0.853523289541034, "grad_norm": 2.2974858931314865, "learning_rate": 5.52202659326943e-06, "loss": 0.2522, "step": 10005 }, { "epoch": 0.853608599215151, "grad_norm": 1.3589045708128253, "learning_rate": 5.51571720748737e-06, "loss": 0.1767, "step": 10006 }, { "epoch": 0.853693908889268, "grad_norm": 1.7817038242377623, "learning_rate": 5.509411217843913e-06, "loss": 0.1981, "step": 10007 }, { "epoch": 0.8537792185633851, "grad_norm": 1.312122472326899, "learning_rate": 5.503108624820486e-06, "loss": 0.1761, "step": 10008 }, { "epoch": 0.8538645282375021, "grad_norm": 2.4207823226553065, "learning_rate": 5.4968094288982585e-06, "loss": 0.1649, "step": 10009 }, { "epoch": 0.8539498379116192, "grad_norm": 1.7038098979224963, "learning_rate": 5.49051363055813e-06, "loss": 0.1679, "step": 10010 }, { "epoch": 0.8540351475857362, "grad_norm": 1.849868321243088, "learning_rate": 5.484221230280784e-06, "loss": 0.1994, "step": 10011 }, { "epoch": 0.8541204572598533, "grad_norm": 1.662956694208281, "learning_rate": 5.477932228546573e-06, "loss": 0.1997, "step": 10012 }, { "epoch": 0.8542057669339703, "grad_norm": 1.7338028385001385, "learning_rate": 5.4716466258356525e-06, "loss": 0.2225, "step": 10013 }, { "epoch": 0.8542910766080873, "grad_norm": 2.0272531948556023, "learning_rate": 5.465364422627889e-06, "loss": 0.2125, "step": 10014 }, { "epoch": 0.8543763862822044, "grad_norm": 1.564180309877695, "learning_rate": 5.459085619402898e-06, "loss": 0.1229, "step": 10015 }, { "epoch": 0.8544616959563215, "grad_norm": 2.180496776568806, "learning_rate": 5.452810216640014e-06, "loss": 0.2893, "step": 10016 }, { "epoch": 0.8545470056304385, "grad_norm": 1.6503345728277365, "learning_rate": 5.4465382148183645e-06, "loss": 0.1788, "step": 10017 }, { "epoch": 0.8546323153045555, "grad_norm": 1.4965365479171262, "learning_rate": 5.44026961441676e-06, "loss": 0.1752, "step": 10018 }, { "epoch": 0.8547176249786725, "grad_norm": 1.7602801147710827, "learning_rate": 5.4340044159137796e-06, "loss": 0.173, "step": 10019 }, { "epoch": 0.8548029346527897, "grad_norm": 2.0104173913931325, "learning_rate": 5.42774261978774e-06, "loss": 0.137, "step": 10020 }, { "epoch": 0.8548882443269067, "grad_norm": 1.542733708116516, "learning_rate": 5.421484226516698e-06, "loss": 0.1637, "step": 10021 }, { "epoch": 0.8549735540010237, "grad_norm": 1.7572885368182114, "learning_rate": 5.41522923657844e-06, "loss": 0.2104, "step": 10022 }, { "epoch": 0.8550588636751407, "grad_norm": 1.8237728905499533, "learning_rate": 5.408977650450503e-06, "loss": 0.1981, "step": 10023 }, { "epoch": 0.8551441733492579, "grad_norm": 1.4996429654229955, "learning_rate": 5.402729468610179e-06, "loss": 0.1525, "step": 10024 }, { "epoch": 0.8552294830233749, "grad_norm": 2.110241521237344, "learning_rate": 5.396484691534454e-06, "loss": 0.1883, "step": 10025 }, { "epoch": 0.8553147926974919, "grad_norm": 1.802895849730873, "learning_rate": 5.3902433197001115e-06, "loss": 0.163, "step": 10026 }, { "epoch": 0.8554001023716089, "grad_norm": 1.8463603146499554, "learning_rate": 5.384005353583632e-06, "loss": 0.263, "step": 10027 }, { "epoch": 0.855485412045726, "grad_norm": 1.1643066662216264, "learning_rate": 5.377770793661257e-06, "loss": 0.1913, "step": 10028 }, { "epoch": 0.855570721719843, "grad_norm": 1.4130303425321857, "learning_rate": 5.371539640408957e-06, "loss": 0.149, "step": 10029 }, { "epoch": 0.8556560313939601, "grad_norm": 1.400785935708425, "learning_rate": 5.3653118943024546e-06, "loss": 0.1477, "step": 10030 }, { "epoch": 0.8557413410680771, "grad_norm": 1.5972417839437247, "learning_rate": 5.359087555817194e-06, "loss": 0.1556, "step": 10031 }, { "epoch": 0.8558266507421942, "grad_norm": 1.7909472669828677, "learning_rate": 5.352866625428371e-06, "loss": 0.1807, "step": 10032 }, { "epoch": 0.8559119604163112, "grad_norm": 1.774741949209053, "learning_rate": 5.34664910361094e-06, "loss": 0.1693, "step": 10033 }, { "epoch": 0.8559972700904283, "grad_norm": 1.4883152114317388, "learning_rate": 5.34043499083956e-06, "loss": 0.1662, "step": 10034 }, { "epoch": 0.8560825797645453, "grad_norm": 1.727039806276091, "learning_rate": 5.334224287588646e-06, "loss": 0.1665, "step": 10035 }, { "epoch": 0.8561678894386624, "grad_norm": 1.9539674696698706, "learning_rate": 5.328016994332341e-06, "loss": 0.1569, "step": 10036 }, { "epoch": 0.8562531991127794, "grad_norm": 1.761718208668154, "learning_rate": 5.321813111544577e-06, "loss": 0.2471, "step": 10037 }, { "epoch": 0.8563385087868964, "grad_norm": 1.5036507525714269, "learning_rate": 5.315612639698941e-06, "loss": 0.1887, "step": 10038 }, { "epoch": 0.8564238184610135, "grad_norm": 1.692569323969589, "learning_rate": 5.309415579268834e-06, "loss": 0.1642, "step": 10039 }, { "epoch": 0.8565091281351305, "grad_norm": 1.6029591495023023, "learning_rate": 5.303221930727364e-06, "loss": 0.1548, "step": 10040 }, { "epoch": 0.8565944378092476, "grad_norm": 1.7527313715290735, "learning_rate": 5.297031694547383e-06, "loss": 0.1896, "step": 10041 }, { "epoch": 0.8566797474833646, "grad_norm": 2.192400212208237, "learning_rate": 5.290844871201484e-06, "loss": 0.2116, "step": 10042 }, { "epoch": 0.8567650571574816, "grad_norm": 1.475167639952743, "learning_rate": 5.2846614611619885e-06, "loss": 0.1667, "step": 10043 }, { "epoch": 0.8568503668315987, "grad_norm": 1.7669449994821764, "learning_rate": 5.2784814649009754e-06, "loss": 0.1479, "step": 10044 }, { "epoch": 0.8569356765057158, "grad_norm": 1.5030899012030627, "learning_rate": 5.272304882890244e-06, "loss": 0.1508, "step": 10045 }, { "epoch": 0.8570209861798328, "grad_norm": 1.9301523119857638, "learning_rate": 5.266131715601358e-06, "loss": 0.2133, "step": 10046 }, { "epoch": 0.8571062958539498, "grad_norm": 1.7571763985785365, "learning_rate": 5.259961963505606e-06, "loss": 0.1736, "step": 10047 }, { "epoch": 0.8571916055280668, "grad_norm": 1.7140213160752842, "learning_rate": 5.253795627074004e-06, "loss": 0.151, "step": 10048 }, { "epoch": 0.857276915202184, "grad_norm": 1.9982972645414283, "learning_rate": 5.247632706777328e-06, "loss": 0.213, "step": 10049 }, { "epoch": 0.857362224876301, "grad_norm": 1.8438952114013047, "learning_rate": 5.241473203086084e-06, "loss": 0.1967, "step": 10050 }, { "epoch": 0.857447534550418, "grad_norm": 2.3282226951634, "learning_rate": 5.235317116470506e-06, "loss": 0.1771, "step": 10051 }, { "epoch": 0.857532844224535, "grad_norm": 1.5054619211419134, "learning_rate": 5.229164447400587e-06, "loss": 0.1391, "step": 10052 }, { "epoch": 0.8576181538986521, "grad_norm": 1.6698263576026933, "learning_rate": 5.223015196346065e-06, "loss": 0.1937, "step": 10053 }, { "epoch": 0.8577034635727692, "grad_norm": 1.7647263256927108, "learning_rate": 5.216869363776367e-06, "loss": 0.1674, "step": 10054 }, { "epoch": 0.8577887732468862, "grad_norm": 2.1641771857655137, "learning_rate": 5.210726950160727e-06, "loss": 0.1704, "step": 10055 }, { "epoch": 0.8578740829210032, "grad_norm": 1.9179643992201798, "learning_rate": 5.204587955968076e-06, "loss": 0.1791, "step": 10056 }, { "epoch": 0.8579593925951203, "grad_norm": 1.6197655809597766, "learning_rate": 5.19845238166709e-06, "loss": 0.1964, "step": 10057 }, { "epoch": 0.8580447022692373, "grad_norm": 2.1364112088719143, "learning_rate": 5.1923202277261775e-06, "loss": 0.1595, "step": 10058 }, { "epoch": 0.8581300119433544, "grad_norm": 1.880194906114589, "learning_rate": 5.186191494613529e-06, "loss": 0.304, "step": 10059 }, { "epoch": 0.8582153216174714, "grad_norm": 1.5268617928144936, "learning_rate": 5.180066182797006e-06, "loss": 0.1385, "step": 10060 }, { "epoch": 0.8583006312915885, "grad_norm": 1.7627310202408188, "learning_rate": 5.17394429274426e-06, "loss": 0.1881, "step": 10061 }, { "epoch": 0.8583859409657055, "grad_norm": 1.4930621873969518, "learning_rate": 5.1678258249226615e-06, "loss": 0.2018, "step": 10062 }, { "epoch": 0.8584712506398225, "grad_norm": 1.4803696249436114, "learning_rate": 5.161710779799328e-06, "loss": 0.2038, "step": 10063 }, { "epoch": 0.8585565603139396, "grad_norm": 2.0689055646508168, "learning_rate": 5.155599157841101e-06, "loss": 0.1616, "step": 10064 }, { "epoch": 0.8586418699880567, "grad_norm": 2.995573911368518, "learning_rate": 5.1494909595145695e-06, "loss": 0.2185, "step": 10065 }, { "epoch": 0.8587271796621737, "grad_norm": 2.5911385131937013, "learning_rate": 5.143386185286086e-06, "loss": 0.2022, "step": 10066 }, { "epoch": 0.8588124893362907, "grad_norm": 1.8250055025618699, "learning_rate": 5.137284835621681e-06, "loss": 0.2044, "step": 10067 }, { "epoch": 0.8588977990104077, "grad_norm": 2.1135301143756053, "learning_rate": 5.13118691098719e-06, "loss": 0.1839, "step": 10068 }, { "epoch": 0.8589831086845249, "grad_norm": 2.0483691171366214, "learning_rate": 5.1250924118481425e-06, "loss": 0.1536, "step": 10069 }, { "epoch": 0.8590684183586419, "grad_norm": 2.064250549944647, "learning_rate": 5.119001338669827e-06, "loss": 0.3044, "step": 10070 }, { "epoch": 0.8591537280327589, "grad_norm": 1.3510501750520287, "learning_rate": 5.112913691917259e-06, "loss": 0.1252, "step": 10071 }, { "epoch": 0.8592390377068759, "grad_norm": 2.310808955312858, "learning_rate": 5.106829472055202e-06, "loss": 0.2062, "step": 10072 }, { "epoch": 0.8593243473809931, "grad_norm": 1.6607672072034108, "learning_rate": 5.100748679548151e-06, "loss": 0.1773, "step": 10073 }, { "epoch": 0.8594096570551101, "grad_norm": 2.093505389599984, "learning_rate": 5.094671314860339e-06, "loss": 0.2441, "step": 10074 }, { "epoch": 0.8594949667292271, "grad_norm": 1.9743585776297836, "learning_rate": 5.088597378455762e-06, "loss": 0.1986, "step": 10075 }, { "epoch": 0.8595802764033441, "grad_norm": 1.922157328887797, "learning_rate": 5.082526870798093e-06, "loss": 0.162, "step": 10076 }, { "epoch": 0.8596655860774611, "grad_norm": 1.7388068958136078, "learning_rate": 5.0764597923508235e-06, "loss": 0.1956, "step": 10077 }, { "epoch": 0.8597508957515783, "grad_norm": 1.426112807927998, "learning_rate": 5.0703961435771105e-06, "loss": 0.1686, "step": 10078 }, { "epoch": 0.8598362054256953, "grad_norm": 1.745179212051798, "learning_rate": 5.064335924939917e-06, "loss": 0.1342, "step": 10079 }, { "epoch": 0.8599215150998123, "grad_norm": 1.4513295885025188, "learning_rate": 5.0582791369018665e-06, "loss": 0.2295, "step": 10080 }, { "epoch": 0.8600068247739293, "grad_norm": 2.149374240920589, "learning_rate": 5.0522257799253955e-06, "loss": 0.1961, "step": 10081 }, { "epoch": 0.8600921344480464, "grad_norm": 1.711746512274506, "learning_rate": 5.046175854472634e-06, "loss": 0.1699, "step": 10082 }, { "epoch": 0.8601774441221635, "grad_norm": 2.201905149566895, "learning_rate": 5.040129361005464e-06, "loss": 0.1396, "step": 10083 }, { "epoch": 0.8602627537962805, "grad_norm": 2.1687385360898492, "learning_rate": 5.034086299985497e-06, "loss": 0.2116, "step": 10084 }, { "epoch": 0.8603480634703975, "grad_norm": 1.67810360061953, "learning_rate": 5.028046671874093e-06, "loss": 0.1444, "step": 10085 }, { "epoch": 0.8604333731445146, "grad_norm": 2.161959952245537, "learning_rate": 5.02201047713235e-06, "loss": 0.2145, "step": 10086 }, { "epoch": 0.8605186828186316, "grad_norm": 1.5183956345060279, "learning_rate": 5.015977716221076e-06, "loss": 0.1972, "step": 10087 }, { "epoch": 0.8606039924927487, "grad_norm": 1.8239874029393797, "learning_rate": 5.009948389600883e-06, "loss": 0.2418, "step": 10088 }, { "epoch": 0.8606893021668657, "grad_norm": 1.9067122102290945, "learning_rate": 5.003922497732033e-06, "loss": 0.1911, "step": 10089 }, { "epoch": 0.8607746118409828, "grad_norm": 2.007644586926319, "learning_rate": 4.997900041074605e-06, "loss": 0.1919, "step": 10090 }, { "epoch": 0.8608599215150998, "grad_norm": 1.3854309979990613, "learning_rate": 4.991881020088362e-06, "loss": 0.1925, "step": 10091 }, { "epoch": 0.8609452311892168, "grad_norm": 1.7253186332429178, "learning_rate": 4.985865435232834e-06, "loss": 0.1212, "step": 10092 }, { "epoch": 0.8610305408633339, "grad_norm": 2.0324534445823184, "learning_rate": 4.979853286967273e-06, "loss": 0.1933, "step": 10093 }, { "epoch": 0.861115850537451, "grad_norm": 1.769753280082645, "learning_rate": 4.973844575750669e-06, "loss": 0.1558, "step": 10094 }, { "epoch": 0.861201160211568, "grad_norm": 2.2451970441702613, "learning_rate": 4.967839302041782e-06, "loss": 0.1389, "step": 10095 }, { "epoch": 0.861286469885685, "grad_norm": 1.6456992018241126, "learning_rate": 4.9618374662990406e-06, "loss": 0.1688, "step": 10096 }, { "epoch": 0.861371779559802, "grad_norm": 1.7583079000263766, "learning_rate": 4.955839068980689e-06, "loss": 0.153, "step": 10097 }, { "epoch": 0.8614570892339192, "grad_norm": 1.6388913395247304, "learning_rate": 4.9498441105446606e-06, "loss": 0.2042, "step": 10098 }, { "epoch": 0.8615423989080362, "grad_norm": 2.05405001305865, "learning_rate": 4.9438525914486385e-06, "loss": 0.1176, "step": 10099 }, { "epoch": 0.8616277085821532, "grad_norm": 1.6223915879718065, "learning_rate": 4.937864512150031e-06, "loss": 0.1607, "step": 10100 }, { "epoch": 0.8617130182562702, "grad_norm": 2.3951229092590975, "learning_rate": 4.931879873106027e-06, "loss": 0.1566, "step": 10101 }, { "epoch": 0.8617983279303874, "grad_norm": 1.9043977837847526, "learning_rate": 4.925898674773488e-06, "loss": 0.2082, "step": 10102 }, { "epoch": 0.8618836376045044, "grad_norm": 1.844970686971723, "learning_rate": 4.919920917609066e-06, "loss": 0.1874, "step": 10103 }, { "epoch": 0.8619689472786214, "grad_norm": 1.7604193452291783, "learning_rate": 4.9139466020691305e-06, "loss": 0.1834, "step": 10104 }, { "epoch": 0.8620542569527384, "grad_norm": 1.5030826047125139, "learning_rate": 4.907975728609782e-06, "loss": 0.1535, "step": 10105 }, { "epoch": 0.8621395666268555, "grad_norm": 1.9202819672804057, "learning_rate": 4.902008297686872e-06, "loss": 0.1624, "step": 10106 }, { "epoch": 0.8622248763009726, "grad_norm": 1.6524653108219394, "learning_rate": 4.896044309755965e-06, "loss": 0.2314, "step": 10107 }, { "epoch": 0.8623101859750896, "grad_norm": 1.7505127291920721, "learning_rate": 4.890083765272413e-06, "loss": 0.2528, "step": 10108 }, { "epoch": 0.8623954956492066, "grad_norm": 1.915916821404449, "learning_rate": 4.884126664691229e-06, "loss": 0.1995, "step": 10109 }, { "epoch": 0.8624808053233237, "grad_norm": 1.6205765600059814, "learning_rate": 4.878173008467241e-06, "loss": 0.2212, "step": 10110 }, { "epoch": 0.8625661149974407, "grad_norm": 1.7912210161588793, "learning_rate": 4.872222797054971e-06, "loss": 0.2058, "step": 10111 }, { "epoch": 0.8626514246715578, "grad_norm": 1.734728665420923, "learning_rate": 4.866276030908678e-06, "loss": 0.1228, "step": 10112 }, { "epoch": 0.8627367343456748, "grad_norm": 2.1315169539183376, "learning_rate": 4.8603327104823685e-06, "loss": 0.1673, "step": 10113 }, { "epoch": 0.8628220440197918, "grad_norm": 2.0166658774550394, "learning_rate": 4.854392836229788e-06, "loss": 0.1518, "step": 10114 }, { "epoch": 0.8629073536939089, "grad_norm": 2.139977795851125, "learning_rate": 4.848456408604407e-06, "loss": 0.1894, "step": 10115 }, { "epoch": 0.8629926633680259, "grad_norm": 1.77526828189059, "learning_rate": 4.842523428059437e-06, "loss": 0.2349, "step": 10116 }, { "epoch": 0.863077973042143, "grad_norm": 2.5046627431349204, "learning_rate": 4.836593895047853e-06, "loss": 0.1782, "step": 10117 }, { "epoch": 0.86316328271626, "grad_norm": 1.6507394636419617, "learning_rate": 4.83066781002231e-06, "loss": 0.1485, "step": 10118 }, { "epoch": 0.8632485923903771, "grad_norm": 1.6962457013339263, "learning_rate": 4.824745173435258e-06, "loss": 0.1575, "step": 10119 }, { "epoch": 0.8633339020644941, "grad_norm": 1.915046407869327, "learning_rate": 4.818825985738856e-06, "loss": 0.1256, "step": 10120 }, { "epoch": 0.8634192117386111, "grad_norm": 1.5143287857567325, "learning_rate": 4.812910247384994e-06, "loss": 0.1737, "step": 10121 }, { "epoch": 0.8635045214127282, "grad_norm": 1.6407940005497308, "learning_rate": 4.806997958825299e-06, "loss": 0.1442, "step": 10122 }, { "epoch": 0.8635898310868453, "grad_norm": 1.801944911102025, "learning_rate": 4.801089120511165e-06, "loss": 0.1734, "step": 10123 }, { "epoch": 0.8636751407609623, "grad_norm": 1.6985448443646907, "learning_rate": 4.795183732893694e-06, "loss": 0.2182, "step": 10124 }, { "epoch": 0.8637604504350793, "grad_norm": 1.8631062615395075, "learning_rate": 4.789281796423723e-06, "loss": 0.1988, "step": 10125 }, { "epoch": 0.8638457601091963, "grad_norm": 2.0116592546572454, "learning_rate": 4.783383311551837e-06, "loss": 0.1495, "step": 10126 }, { "epoch": 0.8639310697833135, "grad_norm": 1.4991030395446463, "learning_rate": 4.777488278728354e-06, "loss": 0.1553, "step": 10127 }, { "epoch": 0.8640163794574305, "grad_norm": 1.3993102179555572, "learning_rate": 4.771596698403336e-06, "loss": 0.1644, "step": 10128 }, { "epoch": 0.8641016891315475, "grad_norm": 1.4323655357963647, "learning_rate": 4.76570857102655e-06, "loss": 0.1625, "step": 10129 }, { "epoch": 0.8641869988056645, "grad_norm": 1.8465649081447029, "learning_rate": 4.75982389704756e-06, "loss": 0.1437, "step": 10130 }, { "epoch": 0.8642723084797816, "grad_norm": 1.7385120024377856, "learning_rate": 4.753942676915591e-06, "loss": 0.1421, "step": 10131 }, { "epoch": 0.8643576181538987, "grad_norm": 1.832193395926228, "learning_rate": 4.748064911079669e-06, "loss": 0.2708, "step": 10132 }, { "epoch": 0.8644429278280157, "grad_norm": 1.929689106187654, "learning_rate": 4.742190599988522e-06, "loss": 0.1707, "step": 10133 }, { "epoch": 0.8645282375021327, "grad_norm": 1.4665686459009166, "learning_rate": 4.736319744090628e-06, "loss": 0.2371, "step": 10134 }, { "epoch": 0.8646135471762498, "grad_norm": 1.4940602798301799, "learning_rate": 4.7304523438341855e-06, "loss": 0.1401, "step": 10135 }, { "epoch": 0.8646988568503668, "grad_norm": 1.77143693248472, "learning_rate": 4.7245883996671405e-06, "loss": 0.1764, "step": 10136 }, { "epoch": 0.8647841665244839, "grad_norm": 1.9167047441542548, "learning_rate": 4.7187279120371905e-06, "loss": 0.1866, "step": 10137 }, { "epoch": 0.8648694761986009, "grad_norm": 1.8887972864054763, "learning_rate": 4.712870881391723e-06, "loss": 0.1449, "step": 10138 }, { "epoch": 0.864954785872718, "grad_norm": 1.2266079353615726, "learning_rate": 4.707017308177919e-06, "loss": 0.1863, "step": 10139 }, { "epoch": 0.865040095546835, "grad_norm": 1.7956380816154756, "learning_rate": 4.701167192842659e-06, "loss": 0.2157, "step": 10140 }, { "epoch": 0.865125405220952, "grad_norm": 1.829588345154677, "learning_rate": 4.695320535832565e-06, "loss": 0.1639, "step": 10141 }, { "epoch": 0.8652107148950691, "grad_norm": 1.5248814708735965, "learning_rate": 4.6894773375939836e-06, "loss": 0.1728, "step": 10142 }, { "epoch": 0.8652960245691862, "grad_norm": 1.4572778242579465, "learning_rate": 4.683637598573054e-06, "loss": 0.1907, "step": 10143 }, { "epoch": 0.8653813342433032, "grad_norm": 1.4214445028350184, "learning_rate": 4.677801319215564e-06, "loss": 0.1604, "step": 10144 }, { "epoch": 0.8654666439174202, "grad_norm": 1.7067287011097851, "learning_rate": 4.6719684999671055e-06, "loss": 0.1589, "step": 10145 }, { "epoch": 0.8655519535915372, "grad_norm": 1.0729422458204407, "learning_rate": 4.66613914127298e-06, "loss": 0.1255, "step": 10146 }, { "epoch": 0.8656372632656544, "grad_norm": 1.4041453400453492, "learning_rate": 4.6603132435782295e-06, "loss": 0.1692, "step": 10147 }, { "epoch": 0.8657225729397714, "grad_norm": 1.5934250444223546, "learning_rate": 4.6544908073276276e-06, "loss": 0.1918, "step": 10148 }, { "epoch": 0.8658078826138884, "grad_norm": 1.5783295451638757, "learning_rate": 4.64867183296569e-06, "loss": 0.166, "step": 10149 }, { "epoch": 0.8658931922880054, "grad_norm": 2.0707592752007846, "learning_rate": 4.642856320936656e-06, "loss": 0.2075, "step": 10150 }, { "epoch": 0.8659785019621226, "grad_norm": 2.306890747049639, "learning_rate": 4.637044271684504e-06, "loss": 0.2279, "step": 10151 }, { "epoch": 0.8660638116362396, "grad_norm": 1.6860127429593723, "learning_rate": 4.631235685652979e-06, "loss": 0.184, "step": 10152 }, { "epoch": 0.8661491213103566, "grad_norm": 1.6193594238285953, "learning_rate": 4.625430563285515e-06, "loss": 0.1702, "step": 10153 }, { "epoch": 0.8662344309844736, "grad_norm": 1.3284824563484312, "learning_rate": 4.619628905025308e-06, "loss": 0.1684, "step": 10154 }, { "epoch": 0.8663197406585906, "grad_norm": 2.619213629493835, "learning_rate": 4.613830711315287e-06, "loss": 0.1717, "step": 10155 }, { "epoch": 0.8664050503327078, "grad_norm": 1.8641191753715192, "learning_rate": 4.608035982598108e-06, "loss": 0.1815, "step": 10156 }, { "epoch": 0.8664903600068248, "grad_norm": 1.426632812371652, "learning_rate": 4.6022447193161625e-06, "loss": 0.1747, "step": 10157 }, { "epoch": 0.8665756696809418, "grad_norm": 1.8780260463041836, "learning_rate": 4.596456921911585e-06, "loss": 0.1557, "step": 10158 }, { "epoch": 0.8666609793550588, "grad_norm": 1.6185361978739077, "learning_rate": 4.590672590826267e-06, "loss": 0.1608, "step": 10159 }, { "epoch": 0.8667462890291759, "grad_norm": 1.868156405172493, "learning_rate": 4.584891726501772e-06, "loss": 0.1608, "step": 10160 }, { "epoch": 0.866831598703293, "grad_norm": 1.574485131531207, "learning_rate": 4.579114329379469e-06, "loss": 0.1834, "step": 10161 }, { "epoch": 0.86691690837741, "grad_norm": 1.3843501103835318, "learning_rate": 4.573340399900417e-06, "loss": 0.1592, "step": 10162 }, { "epoch": 0.867002218051527, "grad_norm": 1.6498040053974854, "learning_rate": 4.567569938505434e-06, "loss": 0.204, "step": 10163 }, { "epoch": 0.8670875277256441, "grad_norm": 1.3385605520061556, "learning_rate": 4.561802945635046e-06, "loss": 0.1783, "step": 10164 }, { "epoch": 0.8671728373997611, "grad_norm": 1.9367793188745437, "learning_rate": 4.556039421729563e-06, "loss": 0.2275, "step": 10165 }, { "epoch": 0.8672581470738782, "grad_norm": 2.114023018501432, "learning_rate": 4.55027936722896e-06, "loss": 0.2121, "step": 10166 }, { "epoch": 0.8673434567479952, "grad_norm": 1.7816151027277407, "learning_rate": 4.544522782573019e-06, "loss": 0.1955, "step": 10167 }, { "epoch": 0.8674287664221123, "grad_norm": 1.8019017769381633, "learning_rate": 4.5387696682012145e-06, "loss": 0.2275, "step": 10168 }, { "epoch": 0.8675140760962293, "grad_norm": 2.3658522059633027, "learning_rate": 4.533020024552765e-06, "loss": 0.2069, "step": 10169 }, { "epoch": 0.8675993857703463, "grad_norm": 1.4856452726747065, "learning_rate": 4.527273852066627e-06, "loss": 0.1695, "step": 10170 }, { "epoch": 0.8676846954444634, "grad_norm": 2.0040166575335334, "learning_rate": 4.521531151181474e-06, "loss": 0.2214, "step": 10171 }, { "epoch": 0.8677700051185805, "grad_norm": 1.5995516923837334, "learning_rate": 4.515791922335772e-06, "loss": 0.1585, "step": 10172 }, { "epoch": 0.8678553147926975, "grad_norm": 1.6796572660342206, "learning_rate": 4.51005616596763e-06, "loss": 0.1512, "step": 10173 }, { "epoch": 0.8679406244668145, "grad_norm": 2.658289496639355, "learning_rate": 4.504323882514977e-06, "loss": 0.1641, "step": 10174 }, { "epoch": 0.8680259341409315, "grad_norm": 1.8984460398791976, "learning_rate": 4.498595072415429e-06, "loss": 0.2033, "step": 10175 }, { "epoch": 0.8681112438150487, "grad_norm": 1.9657127324403918, "learning_rate": 4.492869736106354e-06, "loss": 0.1559, "step": 10176 }, { "epoch": 0.8681965534891657, "grad_norm": 2.4608222086709124, "learning_rate": 4.487147874024855e-06, "loss": 0.2245, "step": 10177 }, { "epoch": 0.8682818631632827, "grad_norm": 1.9531293945263062, "learning_rate": 4.4814294866077525e-06, "loss": 0.2152, "step": 10178 }, { "epoch": 0.8683671728373997, "grad_norm": 1.6737489982072085, "learning_rate": 4.475714574291628e-06, "loss": 0.1736, "step": 10179 }, { "epoch": 0.8684524825115169, "grad_norm": 1.5014697662520935, "learning_rate": 4.470003137512774e-06, "loss": 0.2082, "step": 10180 }, { "epoch": 0.8685377921856339, "grad_norm": 1.6054181093271669, "learning_rate": 4.4642951767072395e-06, "loss": 0.1353, "step": 10181 }, { "epoch": 0.8686231018597509, "grad_norm": 1.5932788526459574, "learning_rate": 4.458590692310793e-06, "loss": 0.1629, "step": 10182 }, { "epoch": 0.8687084115338679, "grad_norm": 1.4488882834502153, "learning_rate": 4.452889684758938e-06, "loss": 0.1388, "step": 10183 }, { "epoch": 0.868793721207985, "grad_norm": 1.4892353994215075, "learning_rate": 4.447192154486912e-06, "loss": 0.1598, "step": 10184 }, { "epoch": 0.868879030882102, "grad_norm": 1.3745526106123132, "learning_rate": 4.441498101929714e-06, "loss": 0.1679, "step": 10185 }, { "epoch": 0.8689643405562191, "grad_norm": 2.121683224985256, "learning_rate": 4.435807527522024e-06, "loss": 0.1267, "step": 10186 }, { "epoch": 0.8690496502303361, "grad_norm": 2.0047286876036066, "learning_rate": 4.4301204316983035e-06, "loss": 0.1773, "step": 10187 }, { "epoch": 0.8691349599044532, "grad_norm": 2.190496299979109, "learning_rate": 4.424436814892735e-06, "loss": 0.2195, "step": 10188 }, { "epoch": 0.8692202695785702, "grad_norm": 1.529349880347383, "learning_rate": 4.4187566775392265e-06, "loss": 0.2137, "step": 10189 }, { "epoch": 0.8693055792526873, "grad_norm": 1.75881508951453, "learning_rate": 4.413080020071425e-06, "loss": 0.1541, "step": 10190 }, { "epoch": 0.8693908889268043, "grad_norm": 1.9369200176806392, "learning_rate": 4.4074068429227174e-06, "loss": 0.2109, "step": 10191 }, { "epoch": 0.8694761986009213, "grad_norm": 1.3925359219312867, "learning_rate": 4.401737146526219e-06, "loss": 0.1442, "step": 10192 }, { "epoch": 0.8695615082750384, "grad_norm": 1.6955771525318417, "learning_rate": 4.396070931314772e-06, "loss": 0.1594, "step": 10193 }, { "epoch": 0.8696468179491554, "grad_norm": 1.997848127964255, "learning_rate": 4.3904081977209855e-06, "loss": 0.1814, "step": 10194 }, { "epoch": 0.8697321276232725, "grad_norm": 1.6511610772145537, "learning_rate": 4.384748946177153e-06, "loss": 0.2336, "step": 10195 }, { "epoch": 0.8698174372973895, "grad_norm": 1.9272185063423282, "learning_rate": 4.37909317711534e-06, "loss": 0.1799, "step": 10196 }, { "epoch": 0.8699027469715066, "grad_norm": 1.864993350449231, "learning_rate": 4.37344089096734e-06, "loss": 0.1896, "step": 10197 }, { "epoch": 0.8699880566456236, "grad_norm": 1.3506179331356734, "learning_rate": 4.367792088164662e-06, "loss": 0.2418, "step": 10198 }, { "epoch": 0.8700733663197406, "grad_norm": 1.7148090611036235, "learning_rate": 4.362146769138575e-06, "loss": 0.1592, "step": 10199 }, { "epoch": 0.8701586759938577, "grad_norm": 1.7481828519477, "learning_rate": 4.356504934320049e-06, "loss": 0.2006, "step": 10200 }, { "epoch": 0.8702439856679748, "grad_norm": 2.172175983458245, "learning_rate": 4.350866584139845e-06, "loss": 0.2152, "step": 10201 }, { "epoch": 0.8703292953420918, "grad_norm": 1.8968620513524335, "learning_rate": 4.3452317190283755e-06, "loss": 0.1892, "step": 10202 }, { "epoch": 0.8704146050162088, "grad_norm": 1.6380174532695237, "learning_rate": 4.3396003394158665e-06, "loss": 0.1224, "step": 10203 }, { "epoch": 0.8704999146903258, "grad_norm": 1.6455317917160452, "learning_rate": 4.333972445732231e-06, "loss": 0.1865, "step": 10204 }, { "epoch": 0.870585224364443, "grad_norm": 1.4657458159991932, "learning_rate": 4.328348038407132e-06, "loss": 0.1924, "step": 10205 }, { "epoch": 0.87067053403856, "grad_norm": 1.858275914124799, "learning_rate": 4.322727117869951e-06, "loss": 0.198, "step": 10206 }, { "epoch": 0.870755843712677, "grad_norm": 1.8427241429466026, "learning_rate": 4.317109684549847e-06, "loss": 0.2105, "step": 10207 }, { "epoch": 0.870841153386794, "grad_norm": 2.0438981868671466, "learning_rate": 4.311495738875637e-06, "loss": 0.1589, "step": 10208 }, { "epoch": 0.8709264630609111, "grad_norm": 1.750715245811758, "learning_rate": 4.305885281275951e-06, "loss": 0.1923, "step": 10209 }, { "epoch": 0.8710117727350282, "grad_norm": 1.6532102797204562, "learning_rate": 4.300278312179107e-06, "loss": 0.2038, "step": 10210 }, { "epoch": 0.8710970824091452, "grad_norm": 1.6670619813669911, "learning_rate": 4.294674832013163e-06, "loss": 0.1458, "step": 10211 }, { "epoch": 0.8711823920832622, "grad_norm": 1.9043421969766976, "learning_rate": 4.289074841205914e-06, "loss": 0.1698, "step": 10212 }, { "epoch": 0.8712677017573793, "grad_norm": 2.4108086542298564, "learning_rate": 4.283478340184893e-06, "loss": 0.2283, "step": 10213 }, { "epoch": 0.8713530114314963, "grad_norm": 1.2929969623173008, "learning_rate": 4.277885329377373e-06, "loss": 0.1274, "step": 10214 }, { "epoch": 0.8714383211056134, "grad_norm": 1.9505895408137965, "learning_rate": 4.27229580921033e-06, "loss": 0.113, "step": 10215 }, { "epoch": 0.8715236307797304, "grad_norm": 1.4949645718870765, "learning_rate": 4.266709780110511e-06, "loss": 0.1292, "step": 10216 }, { "epoch": 0.8716089404538475, "grad_norm": 1.692265880174892, "learning_rate": 4.261127242504376e-06, "loss": 0.2185, "step": 10217 }, { "epoch": 0.8716942501279645, "grad_norm": 1.756530972740595, "learning_rate": 4.255548196818115e-06, "loss": 0.2274, "step": 10218 }, { "epoch": 0.8717795598020815, "grad_norm": 1.7216465904096832, "learning_rate": 4.249972643477668e-06, "loss": 0.1487, "step": 10219 }, { "epoch": 0.8718648694761986, "grad_norm": 1.755705592260196, "learning_rate": 4.244400582908692e-06, "loss": 0.1746, "step": 10220 }, { "epoch": 0.8719501791503157, "grad_norm": 1.4793727713255056, "learning_rate": 4.238832015536587e-06, "loss": 0.1499, "step": 10221 }, { "epoch": 0.8720354888244327, "grad_norm": 1.3025070518258826, "learning_rate": 4.2332669417864735e-06, "loss": 0.1486, "step": 10222 }, { "epoch": 0.8721207984985497, "grad_norm": 2.015079631509803, "learning_rate": 4.2277053620832455e-06, "loss": 0.1936, "step": 10223 }, { "epoch": 0.8722061081726667, "grad_norm": 1.5840658785433477, "learning_rate": 4.222147276851457e-06, "loss": 0.285, "step": 10224 }, { "epoch": 0.8722914178467839, "grad_norm": 1.6736359636177156, "learning_rate": 4.216592686515475e-06, "loss": 0.1729, "step": 10225 }, { "epoch": 0.8723767275209009, "grad_norm": 2.000531364425779, "learning_rate": 4.2110415914993376e-06, "loss": 0.1482, "step": 10226 }, { "epoch": 0.8724620371950179, "grad_norm": 1.7368584800173428, "learning_rate": 4.205493992226867e-06, "loss": 0.2376, "step": 10227 }, { "epoch": 0.8725473468691349, "grad_norm": 1.922924158557695, "learning_rate": 4.199949889121563e-06, "loss": 0.1515, "step": 10228 }, { "epoch": 0.872632656543252, "grad_norm": 1.781403652055765, "learning_rate": 4.194409282606715e-06, "loss": 0.2097, "step": 10229 }, { "epoch": 0.8727179662173691, "grad_norm": 1.6253331649807308, "learning_rate": 4.188872173105302e-06, "loss": 0.1677, "step": 10230 }, { "epoch": 0.8728032758914861, "grad_norm": 1.7545014117135482, "learning_rate": 4.183338561040062e-06, "loss": 0.1996, "step": 10231 }, { "epoch": 0.8728885855656031, "grad_norm": 1.6051685210317497, "learning_rate": 4.177808446833453e-06, "loss": 0.1792, "step": 10232 }, { "epoch": 0.8729738952397201, "grad_norm": 1.567704649590339, "learning_rate": 4.1722818309076706e-06, "loss": 0.2, "step": 10233 }, { "epoch": 0.8730592049138373, "grad_norm": 1.3726565591706485, "learning_rate": 4.166758713684643e-06, "loss": 0.1289, "step": 10234 }, { "epoch": 0.8731445145879543, "grad_norm": 1.6235590194439868, "learning_rate": 4.161239095586022e-06, "loss": 0.2111, "step": 10235 }, { "epoch": 0.8732298242620713, "grad_norm": 1.6890015456134417, "learning_rate": 4.155722977033222e-06, "loss": 0.1607, "step": 10236 }, { "epoch": 0.8733151339361883, "grad_norm": 1.4366362920914932, "learning_rate": 4.150210358447343e-06, "loss": 0.1875, "step": 10237 }, { "epoch": 0.8734004436103054, "grad_norm": 1.2113637358626224, "learning_rate": 4.1447012402492654e-06, "loss": 0.1377, "step": 10238 }, { "epoch": 0.8734857532844225, "grad_norm": 2.6906542343341395, "learning_rate": 4.13919562285957e-06, "loss": 0.2345, "step": 10239 }, { "epoch": 0.8735710629585395, "grad_norm": 1.9342526324736071, "learning_rate": 4.133693506698589e-06, "loss": 0.1748, "step": 10240 }, { "epoch": 0.8736563726326565, "grad_norm": 2.1098027678597826, "learning_rate": 4.128194892186376e-06, "loss": 0.1431, "step": 10241 }, { "epoch": 0.8737416823067736, "grad_norm": 1.8859961727342172, "learning_rate": 4.122699779742711e-06, "loss": 0.1201, "step": 10242 }, { "epoch": 0.8738269919808906, "grad_norm": 2.053319903779803, "learning_rate": 4.11720816978714e-06, "loss": 0.1911, "step": 10243 }, { "epoch": 0.8739123016550077, "grad_norm": 2.7753973255438997, "learning_rate": 4.111720062738894e-06, "loss": 0.1569, "step": 10244 }, { "epoch": 0.8739976113291247, "grad_norm": 2.2073637315244428, "learning_rate": 4.106235459016972e-06, "loss": 0.1882, "step": 10245 }, { "epoch": 0.8740829210032418, "grad_norm": 2.107442578204319, "learning_rate": 4.1007543590400984e-06, "loss": 0.203, "step": 10246 }, { "epoch": 0.8741682306773588, "grad_norm": 1.9552067762913006, "learning_rate": 4.095276763226719e-06, "loss": 0.1722, "step": 10247 }, { "epoch": 0.8742535403514758, "grad_norm": 1.5693793869412997, "learning_rate": 4.089802671995013e-06, "loss": 0.1715, "step": 10248 }, { "epoch": 0.8743388500255929, "grad_norm": 1.8143512873819827, "learning_rate": 4.084332085762926e-06, "loss": 0.1829, "step": 10249 }, { "epoch": 0.87442415969971, "grad_norm": 1.6368135047014285, "learning_rate": 4.0788650049480684e-06, "loss": 0.1856, "step": 10250 }, { "epoch": 0.874509469373827, "grad_norm": 1.6850177027058222, "learning_rate": 4.073401429967854e-06, "loss": 0.1827, "step": 10251 }, { "epoch": 0.874594779047944, "grad_norm": 2.1335698274816868, "learning_rate": 4.067941361239386e-06, "loss": 0.2177, "step": 10252 }, { "epoch": 0.874680088722061, "grad_norm": 2.045397509155368, "learning_rate": 4.062484799179511e-06, "loss": 0.1137, "step": 10253 }, { "epoch": 0.8747653983961782, "grad_norm": 2.1839255420372834, "learning_rate": 4.0570317442048086e-06, "loss": 0.1781, "step": 10254 }, { "epoch": 0.8748507080702952, "grad_norm": 1.6719572813517944, "learning_rate": 4.051582196731596e-06, "loss": 0.2054, "step": 10255 }, { "epoch": 0.8749360177444122, "grad_norm": 1.7902436334619036, "learning_rate": 4.046136157175912e-06, "loss": 0.1673, "step": 10256 }, { "epoch": 0.8750213274185292, "grad_norm": 1.8471539643311643, "learning_rate": 4.040693625953523e-06, "loss": 0.2056, "step": 10257 }, { "epoch": 0.8751066370926464, "grad_norm": 1.395462195556474, "learning_rate": 4.03525460347996e-06, "loss": 0.1299, "step": 10258 }, { "epoch": 0.8751919467667634, "grad_norm": 1.6132719395255704, "learning_rate": 4.0298190901704505e-06, "loss": 0.2011, "step": 10259 }, { "epoch": 0.8752772564408804, "grad_norm": 1.286789989062764, "learning_rate": 4.024387086439962e-06, "loss": 0.1357, "step": 10260 }, { "epoch": 0.8753625661149974, "grad_norm": 1.7184396636999215, "learning_rate": 4.018958592703215e-06, "loss": 0.1185, "step": 10261 }, { "epoch": 0.8754478757891145, "grad_norm": 1.862029032717498, "learning_rate": 4.013533609374631e-06, "loss": 0.1966, "step": 10262 }, { "epoch": 0.8755331854632316, "grad_norm": 1.5792300443923843, "learning_rate": 4.008112136868386e-06, "loss": 0.1719, "step": 10263 }, { "epoch": 0.8756184951373486, "grad_norm": 2.264175056308127, "learning_rate": 4.002694175598371e-06, "loss": 0.2439, "step": 10264 }, { "epoch": 0.8757038048114656, "grad_norm": 1.7646904556684702, "learning_rate": 3.9972797259782425e-06, "loss": 0.1471, "step": 10265 }, { "epoch": 0.8757891144855827, "grad_norm": 2.027659838075734, "learning_rate": 3.991868788421327e-06, "loss": 0.2055, "step": 10266 }, { "epoch": 0.8758744241596997, "grad_norm": 1.9014594746799358, "learning_rate": 3.986461363340754e-06, "loss": 0.1221, "step": 10267 }, { "epoch": 0.8759597338338168, "grad_norm": 1.553385203495528, "learning_rate": 3.981057451149344e-06, "loss": 0.0937, "step": 10268 }, { "epoch": 0.8760450435079338, "grad_norm": 2.035493496941842, "learning_rate": 3.9756570522596516e-06, "loss": 0.268, "step": 10269 }, { "epoch": 0.8761303531820508, "grad_norm": 2.400456774477569, "learning_rate": 3.970260167083961e-06, "loss": 0.2455, "step": 10270 }, { "epoch": 0.8762156628561679, "grad_norm": 1.7565364020376435, "learning_rate": 3.964866796034311e-06, "loss": 0.1718, "step": 10271 }, { "epoch": 0.8763009725302849, "grad_norm": 1.809404426295893, "learning_rate": 3.959476939522455e-06, "loss": 0.1737, "step": 10272 }, { "epoch": 0.876386282204402, "grad_norm": 1.5209123077093232, "learning_rate": 3.95409059795987e-06, "loss": 0.1809, "step": 10273 }, { "epoch": 0.876471591878519, "grad_norm": 1.7183066750033191, "learning_rate": 3.948707771757781e-06, "loss": 0.2034, "step": 10274 }, { "epoch": 0.8765569015526361, "grad_norm": 1.7417762168246096, "learning_rate": 3.943328461327145e-06, "loss": 0.1431, "step": 10275 }, { "epoch": 0.8766422112267531, "grad_norm": 1.3329790161152442, "learning_rate": 3.937952667078626e-06, "loss": 0.1738, "step": 10276 }, { "epoch": 0.8767275209008701, "grad_norm": 1.7514729431832867, "learning_rate": 3.932580389422647e-06, "loss": 0.1884, "step": 10277 }, { "epoch": 0.8768128305749872, "grad_norm": 1.48420352949271, "learning_rate": 3.927211628769367e-06, "loss": 0.1861, "step": 10278 }, { "epoch": 0.8768981402491043, "grad_norm": 1.6321380536460965, "learning_rate": 3.921846385528633e-06, "loss": 0.1984, "step": 10279 }, { "epoch": 0.8769834499232213, "grad_norm": 1.8669604558449986, "learning_rate": 3.916484660110076e-06, "loss": 0.1557, "step": 10280 }, { "epoch": 0.8770687595973383, "grad_norm": 1.939232697498577, "learning_rate": 3.911126452923025e-06, "loss": 0.1466, "step": 10281 }, { "epoch": 0.8771540692714553, "grad_norm": 1.5928249292335892, "learning_rate": 3.905771764376553e-06, "loss": 0.1626, "step": 10282 }, { "epoch": 0.8772393789455725, "grad_norm": 1.8789739457781112, "learning_rate": 3.900420594879467e-06, "loss": 0.2047, "step": 10283 }, { "epoch": 0.8773246886196895, "grad_norm": 2.0965068374547764, "learning_rate": 3.895072944840294e-06, "loss": 0.1808, "step": 10284 }, { "epoch": 0.8774099982938065, "grad_norm": 1.6802050835455227, "learning_rate": 3.889728814667298e-06, "loss": 0.143, "step": 10285 }, { "epoch": 0.8774953079679235, "grad_norm": 2.0091829484015897, "learning_rate": 3.8843882047684745e-06, "loss": 0.1816, "step": 10286 }, { "epoch": 0.8775806176420407, "grad_norm": 1.477254311906968, "learning_rate": 3.879051115551557e-06, "loss": 0.191, "step": 10287 }, { "epoch": 0.8776659273161577, "grad_norm": 1.6922251632880836, "learning_rate": 3.873717547423999e-06, "loss": 0.2308, "step": 10288 }, { "epoch": 0.8777512369902747, "grad_norm": 2.01378932429064, "learning_rate": 3.868387500792997e-06, "loss": 0.1574, "step": 10289 }, { "epoch": 0.8778365466643917, "grad_norm": 1.719439212289196, "learning_rate": 3.863060976065452e-06, "loss": 0.1589, "step": 10290 }, { "epoch": 0.8779218563385088, "grad_norm": 1.4409282274181578, "learning_rate": 3.857737973648051e-06, "loss": 0.1439, "step": 10291 }, { "epoch": 0.8780071660126259, "grad_norm": 1.3002808670930472, "learning_rate": 3.852418493947135e-06, "loss": 0.1324, "step": 10292 }, { "epoch": 0.8780924756867429, "grad_norm": 3.545237158844408, "learning_rate": 3.847102537368852e-06, "loss": 0.2491, "step": 10293 }, { "epoch": 0.8781777853608599, "grad_norm": 1.9846961294739498, "learning_rate": 3.8417901043190315e-06, "loss": 0.1313, "step": 10294 }, { "epoch": 0.878263095034977, "grad_norm": 1.574659489350162, "learning_rate": 3.836481195203251e-06, "loss": 0.1841, "step": 10295 }, { "epoch": 0.878348404709094, "grad_norm": 1.9559365749646445, "learning_rate": 3.831175810426823e-06, "loss": 0.2027, "step": 10296 }, { "epoch": 0.878433714383211, "grad_norm": 1.584596941148318, "learning_rate": 3.825873950394776e-06, "loss": 0.2084, "step": 10297 }, { "epoch": 0.8785190240573281, "grad_norm": 1.612175735871409, "learning_rate": 3.82057561551189e-06, "loss": 0.1516, "step": 10298 }, { "epoch": 0.8786043337314452, "grad_norm": 1.4840574225963201, "learning_rate": 3.815280806182647e-06, "loss": 0.1395, "step": 10299 }, { "epoch": 0.8786896434055622, "grad_norm": 1.9596788234698068, "learning_rate": 3.8099895228113024e-06, "loss": 0.1691, "step": 10300 }, { "epoch": 0.8787749530796792, "grad_norm": 1.8797313757997485, "learning_rate": 3.8047017658017935e-06, "loss": 0.1634, "step": 10301 }, { "epoch": 0.8788602627537963, "grad_norm": 2.1472664155589096, "learning_rate": 3.7994175355578256e-06, "loss": 0.2669, "step": 10302 }, { "epoch": 0.8789455724279134, "grad_norm": 2.1908045195895958, "learning_rate": 3.7941368324828253e-06, "loss": 0.1907, "step": 10303 }, { "epoch": 0.8790308821020304, "grad_norm": 1.6544691624377281, "learning_rate": 3.7888596569799316e-06, "loss": 0.1724, "step": 10304 }, { "epoch": 0.8791161917761474, "grad_norm": 2.063677365037416, "learning_rate": 3.7835860094520446e-06, "loss": 0.1693, "step": 10305 }, { "epoch": 0.8792015014502644, "grad_norm": 1.8177245247747331, "learning_rate": 3.7783158903017645e-06, "loss": 0.2372, "step": 10306 }, { "epoch": 0.8792868111243815, "grad_norm": 1.7041315026918542, "learning_rate": 3.7730492999314583e-06, "loss": 0.1228, "step": 10307 }, { "epoch": 0.8793721207984986, "grad_norm": 2.2660688294397198, "learning_rate": 3.7677862387431706e-06, "loss": 0.174, "step": 10308 }, { "epoch": 0.8794574304726156, "grad_norm": 1.9877039103000156, "learning_rate": 3.762526707138736e-06, "loss": 0.1437, "step": 10309 }, { "epoch": 0.8795427401467326, "grad_norm": 1.6444163520163275, "learning_rate": 3.7572707055196775e-06, "loss": 0.121, "step": 10310 }, { "epoch": 0.8796280498208496, "grad_norm": 1.8718571071374588, "learning_rate": 3.7520182342872743e-06, "loss": 0.155, "step": 10311 }, { "epoch": 0.8797133594949668, "grad_norm": 1.7217683125587333, "learning_rate": 3.7467692938425057e-06, "loss": 0.1894, "step": 10312 }, { "epoch": 0.8797986691690838, "grad_norm": 1.6696588677206208, "learning_rate": 3.741523884586129e-06, "loss": 0.2008, "step": 10313 }, { "epoch": 0.8798839788432008, "grad_norm": 1.6106834647921617, "learning_rate": 3.7362820069185677e-06, "loss": 0.1373, "step": 10314 }, { "epoch": 0.8799692885173178, "grad_norm": 1.8916950823068257, "learning_rate": 3.731043661240036e-06, "loss": 0.1612, "step": 10315 }, { "epoch": 0.880054598191435, "grad_norm": 1.453610820979611, "learning_rate": 3.7258088479504514e-06, "loss": 0.184, "step": 10316 }, { "epoch": 0.880139907865552, "grad_norm": 1.7436259536647574, "learning_rate": 3.7205775674494624e-06, "loss": 0.216, "step": 10317 }, { "epoch": 0.880225217539669, "grad_norm": 1.5594662014697886, "learning_rate": 3.715349820136449e-06, "loss": 0.1428, "step": 10318 }, { "epoch": 0.880310527213786, "grad_norm": 1.4579029901514802, "learning_rate": 3.7101256064105084e-06, "loss": 0.1465, "step": 10319 }, { "epoch": 0.8803958368879031, "grad_norm": 1.5432782212888392, "learning_rate": 3.7049049266705106e-06, "loss": 0.1331, "step": 10320 }, { "epoch": 0.8804811465620201, "grad_norm": 2.318999461612746, "learning_rate": 3.699687781314992e-06, "loss": 0.2231, "step": 10321 }, { "epoch": 0.8805664562361372, "grad_norm": 2.156688369197617, "learning_rate": 3.694474170742279e-06, "loss": 0.1955, "step": 10322 }, { "epoch": 0.8806517659102542, "grad_norm": 1.5594541235288435, "learning_rate": 3.6892640953503975e-06, "loss": 0.1438, "step": 10323 }, { "epoch": 0.8807370755843713, "grad_norm": 1.357021618568971, "learning_rate": 3.684057555537113e-06, "loss": 0.1335, "step": 10324 }, { "epoch": 0.8808223852584883, "grad_norm": 1.5984856054799224, "learning_rate": 3.6788545516999063e-06, "loss": 0.2127, "step": 10325 }, { "epoch": 0.8809076949326053, "grad_norm": 1.3462664971009384, "learning_rate": 3.6736550842359997e-06, "loss": 0.2185, "step": 10326 }, { "epoch": 0.8809930046067224, "grad_norm": 2.0124368219351068, "learning_rate": 3.6684591535423586e-06, "loss": 0.2004, "step": 10327 }, { "epoch": 0.8810783142808395, "grad_norm": 1.8550450533924956, "learning_rate": 3.6632667600156433e-06, "loss": 0.1903, "step": 10328 }, { "epoch": 0.8811636239549565, "grad_norm": 1.7031321394184504, "learning_rate": 3.658077904052293e-06, "loss": 0.1844, "step": 10329 }, { "epoch": 0.8812489336290735, "grad_norm": 1.593108552987769, "learning_rate": 3.6528925860484175e-06, "loss": 0.1344, "step": 10330 }, { "epoch": 0.8813342433031905, "grad_norm": 1.5445894252566752, "learning_rate": 3.647710806399912e-06, "loss": 0.1389, "step": 10331 }, { "epoch": 0.8814195529773077, "grad_norm": 1.7002783266996697, "learning_rate": 3.6425325655023656e-06, "loss": 0.2258, "step": 10332 }, { "epoch": 0.8815048626514247, "grad_norm": 2.5299750991018066, "learning_rate": 3.6373578637511283e-06, "loss": 0.201, "step": 10333 }, { "epoch": 0.8815901723255417, "grad_norm": 2.032646051581116, "learning_rate": 3.6321867015412346e-06, "loss": 0.1859, "step": 10334 }, { "epoch": 0.8816754819996587, "grad_norm": 1.4571434161909083, "learning_rate": 3.627019079267491e-06, "loss": 0.1965, "step": 10335 }, { "epoch": 0.8817607916737759, "grad_norm": 1.5678080614714118, "learning_rate": 3.6218549973244197e-06, "loss": 0.1684, "step": 10336 }, { "epoch": 0.8818461013478929, "grad_norm": 1.8088869059117285, "learning_rate": 3.6166944561062622e-06, "loss": 0.1543, "step": 10337 }, { "epoch": 0.8819314110220099, "grad_norm": 1.302963305351134, "learning_rate": 3.611537456007008e-06, "loss": 0.1353, "step": 10338 }, { "epoch": 0.8820167206961269, "grad_norm": 1.669234856164764, "learning_rate": 3.606383997420354e-06, "loss": 0.1863, "step": 10339 }, { "epoch": 0.882102030370244, "grad_norm": 1.5539996288353122, "learning_rate": 3.6012340807397515e-06, "loss": 0.1734, "step": 10340 }, { "epoch": 0.8821873400443611, "grad_norm": 1.2554638656587667, "learning_rate": 3.5960877063583532e-06, "loss": 0.1498, "step": 10341 }, { "epoch": 0.8822726497184781, "grad_norm": 1.7944957073646066, "learning_rate": 3.590944874669089e-06, "loss": 0.14, "step": 10342 }, { "epoch": 0.8823579593925951, "grad_norm": 1.9214493815226525, "learning_rate": 3.5858055860645445e-06, "loss": 0.2142, "step": 10343 }, { "epoch": 0.8824432690667121, "grad_norm": 2.323316895998253, "learning_rate": 3.580669840937112e-06, "loss": 0.2147, "step": 10344 }, { "epoch": 0.8825285787408292, "grad_norm": 1.581644496032093, "learning_rate": 3.575537639678861e-06, "loss": 0.1463, "step": 10345 }, { "epoch": 0.8826138884149463, "grad_norm": 2.8178707976168402, "learning_rate": 3.570408982681611e-06, "loss": 0.1961, "step": 10346 }, { "epoch": 0.8826991980890633, "grad_norm": 1.6624758324264746, "learning_rate": 3.565283870336911e-06, "loss": 0.1551, "step": 10347 }, { "epoch": 0.8827845077631803, "grad_norm": 1.279438226476473, "learning_rate": 3.5601623030360243e-06, "loss": 0.1671, "step": 10348 }, { "epoch": 0.8828698174372974, "grad_norm": 2.0315004634495177, "learning_rate": 3.5550442811699837e-06, "loss": 0.1927, "step": 10349 }, { "epoch": 0.8829551271114144, "grad_norm": 2.0555381029550865, "learning_rate": 3.5499298051294873e-06, "loss": 0.1928, "step": 10350 }, { "epoch": 0.8830404367855315, "grad_norm": 1.9607013089010883, "learning_rate": 3.5448188753050173e-06, "loss": 0.1419, "step": 10351 }, { "epoch": 0.8831257464596485, "grad_norm": 1.613790584086389, "learning_rate": 3.5397114920867725e-06, "loss": 0.17, "step": 10352 }, { "epoch": 0.8832110561337656, "grad_norm": 1.5673609081413344, "learning_rate": 3.534607655864658e-06, "loss": 0.1569, "step": 10353 }, { "epoch": 0.8832963658078826, "grad_norm": 1.6461764191307753, "learning_rate": 3.5295073670283286e-06, "loss": 0.1614, "step": 10354 }, { "epoch": 0.8833816754819996, "grad_norm": 1.8137601220908626, "learning_rate": 3.5244106259671837e-06, "loss": 0.1317, "step": 10355 }, { "epoch": 0.8834669851561167, "grad_norm": 1.7146228135277093, "learning_rate": 3.519317433070296e-06, "loss": 0.1368, "step": 10356 }, { "epoch": 0.8835522948302338, "grad_norm": 1.6947584521370849, "learning_rate": 3.514227788726537e-06, "loss": 0.2092, "step": 10357 }, { "epoch": 0.8836376045043508, "grad_norm": 1.8883619399343514, "learning_rate": 3.509141693324458e-06, "loss": 0.1721, "step": 10358 }, { "epoch": 0.8837229141784678, "grad_norm": 1.3974294598950927, "learning_rate": 3.504059147252359e-06, "loss": 0.1301, "step": 10359 }, { "epoch": 0.8838082238525848, "grad_norm": 1.5989335976674441, "learning_rate": 3.498980150898268e-06, "loss": 0.1955, "step": 10360 }, { "epoch": 0.883893533526702, "grad_norm": 1.7276096534431034, "learning_rate": 3.4939047046499252e-06, "loss": 0.1773, "step": 10361 }, { "epoch": 0.883978843200819, "grad_norm": 2.0207328247035106, "learning_rate": 3.488832808894843e-06, "loss": 0.1371, "step": 10362 }, { "epoch": 0.884064152874936, "grad_norm": 2.206280768074563, "learning_rate": 3.4837644640202003e-06, "loss": 0.1636, "step": 10363 }, { "epoch": 0.884149462549053, "grad_norm": 2.3366709294554626, "learning_rate": 3.4786996704129604e-06, "loss": 0.2203, "step": 10364 }, { "epoch": 0.8842347722231702, "grad_norm": 1.7056487396925317, "learning_rate": 3.4736384284597857e-06, "loss": 0.1236, "step": 10365 }, { "epoch": 0.8843200818972872, "grad_norm": 1.983665880271787, "learning_rate": 3.468580738547078e-06, "loss": 0.1844, "step": 10366 }, { "epoch": 0.8844053915714042, "grad_norm": 1.802121009506313, "learning_rate": 3.4635266010609624e-06, "loss": 0.1345, "step": 10367 }, { "epoch": 0.8844907012455212, "grad_norm": 1.5185593976012162, "learning_rate": 3.4584760163872963e-06, "loss": 0.1484, "step": 10368 }, { "epoch": 0.8845760109196383, "grad_norm": 1.6045422630366735, "learning_rate": 3.453428984911666e-06, "loss": 0.1897, "step": 10369 }, { "epoch": 0.8846613205937554, "grad_norm": 1.5929737070407792, "learning_rate": 3.448385507019375e-06, "loss": 0.1364, "step": 10370 }, { "epoch": 0.8847466302678724, "grad_norm": 1.7676798922922612, "learning_rate": 3.443345583095492e-06, "loss": 0.1749, "step": 10371 }, { "epoch": 0.8848319399419894, "grad_norm": 1.8307788854787048, "learning_rate": 3.4383092135247543e-06, "loss": 0.1977, "step": 10372 }, { "epoch": 0.8849172496161065, "grad_norm": 1.4702003298384807, "learning_rate": 3.433276398691687e-06, "loss": 0.1356, "step": 10373 }, { "epoch": 0.8850025592902235, "grad_norm": 1.636326271651642, "learning_rate": 3.428247138980517e-06, "loss": 0.1785, "step": 10374 }, { "epoch": 0.8850878689643406, "grad_norm": 2.1498825484730566, "learning_rate": 3.4232214347751924e-06, "loss": 0.2169, "step": 10375 }, { "epoch": 0.8851731786384576, "grad_norm": 1.4770534445998715, "learning_rate": 3.418199286459395e-06, "loss": 0.1815, "step": 10376 }, { "epoch": 0.8852584883125747, "grad_norm": 1.7369866854739895, "learning_rate": 3.413180694416551e-06, "loss": 0.1394, "step": 10377 }, { "epoch": 0.8853437979866917, "grad_norm": 2.3933032622436126, "learning_rate": 3.408165659029805e-06, "loss": 0.1479, "step": 10378 }, { "epoch": 0.8854291076608087, "grad_norm": 1.8850846252873181, "learning_rate": 3.4031541806820166e-06, "loss": 0.2194, "step": 10379 }, { "epoch": 0.8855144173349258, "grad_norm": 1.6098044850121427, "learning_rate": 3.398146259755797e-06, "loss": 0.1396, "step": 10380 }, { "epoch": 0.8855997270090429, "grad_norm": 1.505903705978469, "learning_rate": 3.3931418966334673e-06, "loss": 0.1991, "step": 10381 }, { "epoch": 0.8856850366831599, "grad_norm": 1.6841271212749986, "learning_rate": 3.388141091697078e-06, "loss": 0.1357, "step": 10382 }, { "epoch": 0.8857703463572769, "grad_norm": 1.947650898093266, "learning_rate": 3.383143845328424e-06, "loss": 0.1903, "step": 10383 }, { "epoch": 0.8858556560313939, "grad_norm": 2.213557009176302, "learning_rate": 3.3781501579090214e-06, "loss": 0.1733, "step": 10384 }, { "epoch": 0.885940965705511, "grad_norm": 1.7506042527178405, "learning_rate": 3.3731600298200993e-06, "loss": 0.1926, "step": 10385 }, { "epoch": 0.8860262753796281, "grad_norm": 1.9725083097372293, "learning_rate": 3.3681734614426365e-06, "loss": 0.1752, "step": 10386 }, { "epoch": 0.8861115850537451, "grad_norm": 1.4916817172408252, "learning_rate": 3.3631904531573277e-06, "loss": 0.1992, "step": 10387 }, { "epoch": 0.8861968947278621, "grad_norm": 1.5711080329507556, "learning_rate": 3.3582110053446025e-06, "loss": 0.1979, "step": 10388 }, { "epoch": 0.8862822044019791, "grad_norm": 2.255295138784479, "learning_rate": 3.3532351183846123e-06, "loss": 0.194, "step": 10389 }, { "epoch": 0.8863675140760963, "grad_norm": 1.6453337172601046, "learning_rate": 3.348262792657242e-06, "loss": 0.1731, "step": 10390 }, { "epoch": 0.8864528237502133, "grad_norm": 2.1783561332149604, "learning_rate": 3.3432940285420987e-06, "loss": 0.1712, "step": 10391 }, { "epoch": 0.8865381334243303, "grad_norm": 1.8321011332703545, "learning_rate": 3.338328826418513e-06, "loss": 0.1881, "step": 10392 }, { "epoch": 0.8866234430984473, "grad_norm": 1.8629105237112833, "learning_rate": 3.333367186665576e-06, "loss": 0.1727, "step": 10393 }, { "epoch": 0.8867087527725644, "grad_norm": 2.097467794068782, "learning_rate": 3.328409109662062e-06, "loss": 0.2058, "step": 10394 }, { "epoch": 0.8867940624466815, "grad_norm": 2.2305662330930334, "learning_rate": 3.3234545957865016e-06, "loss": 0.2393, "step": 10395 }, { "epoch": 0.8868793721207985, "grad_norm": 1.4802532592799404, "learning_rate": 3.318503645417137e-06, "loss": 0.1532, "step": 10396 }, { "epoch": 0.8869646817949155, "grad_norm": 1.5075999688298036, "learning_rate": 3.3135562589319656e-06, "loss": 0.2153, "step": 10397 }, { "epoch": 0.8870499914690326, "grad_norm": 1.441664427092178, "learning_rate": 3.308612436708669e-06, "loss": 0.2076, "step": 10398 }, { "epoch": 0.8871353011431496, "grad_norm": 2.6621923062903226, "learning_rate": 3.303672179124706e-06, "loss": 0.1182, "step": 10399 }, { "epoch": 0.8872206108172667, "grad_norm": 1.3766056136308764, "learning_rate": 3.298735486557225e-06, "loss": 0.131, "step": 10400 }, { "epoch": 0.8873059204913837, "grad_norm": 1.9289936254464242, "learning_rate": 3.2938023593831193e-06, "loss": 0.1715, "step": 10401 }, { "epoch": 0.8873912301655008, "grad_norm": 1.8067566210650652, "learning_rate": 3.288872797979009e-06, "loss": 0.1628, "step": 10402 }, { "epoch": 0.8874765398396178, "grad_norm": 1.4920996195694303, "learning_rate": 3.2839468027212326e-06, "loss": 0.1775, "step": 10403 }, { "epoch": 0.8875618495137348, "grad_norm": 1.9199502502393557, "learning_rate": 3.2790243739858782e-06, "loss": 0.1657, "step": 10404 }, { "epoch": 0.8876471591878519, "grad_norm": 1.6965129487127844, "learning_rate": 3.274105512148723e-06, "loss": 0.1983, "step": 10405 }, { "epoch": 0.887732468861969, "grad_norm": 1.3344992765136678, "learning_rate": 3.2691902175853272e-06, "loss": 0.1199, "step": 10406 }, { "epoch": 0.887817778536086, "grad_norm": 2.3836288304530946, "learning_rate": 3.264278490670919e-06, "loss": 0.2344, "step": 10407 }, { "epoch": 0.887903088210203, "grad_norm": 1.5363928398354796, "learning_rate": 3.259370331780498e-06, "loss": 0.1611, "step": 10408 }, { "epoch": 0.88798839788432, "grad_norm": 2.2629817934403773, "learning_rate": 3.2544657412887756e-06, "loss": 0.2066, "step": 10409 }, { "epoch": 0.8880737075584372, "grad_norm": 2.174620066815709, "learning_rate": 3.249564719570186e-06, "loss": 0.2184, "step": 10410 }, { "epoch": 0.8881590172325542, "grad_norm": 2.1930543410582586, "learning_rate": 3.244667266998902e-06, "loss": 0.1742, "step": 10411 }, { "epoch": 0.8882443269066712, "grad_norm": 1.494880682480142, "learning_rate": 3.239773383948802e-06, "loss": 0.1491, "step": 10412 }, { "epoch": 0.8883296365807882, "grad_norm": 2.7863988471886096, "learning_rate": 3.2348830707935427e-06, "loss": 0.1537, "step": 10413 }, { "epoch": 0.8884149462549054, "grad_norm": 2.173145828037546, "learning_rate": 3.2299963279064315e-06, "loss": 0.2027, "step": 10414 }, { "epoch": 0.8885002559290224, "grad_norm": 1.712890625, "learning_rate": 3.2251131556605695e-06, "loss": 0.1275, "step": 10415 }, { "epoch": 0.8885855656031394, "grad_norm": 1.5249759046808502, "learning_rate": 3.2202335544287643e-06, "loss": 0.2008, "step": 10416 }, { "epoch": 0.8886708752772564, "grad_norm": 2.4497182723014213, "learning_rate": 3.2153575245835344e-06, "loss": 0.1602, "step": 10417 }, { "epoch": 0.8887561849513735, "grad_norm": 1.2289014249926387, "learning_rate": 3.2104850664971374e-06, "loss": 0.1659, "step": 10418 }, { "epoch": 0.8888414946254906, "grad_norm": 2.2578472411970876, "learning_rate": 3.205616180541582e-06, "loss": 0.1983, "step": 10419 }, { "epoch": 0.8889268042996076, "grad_norm": 1.7062933612386042, "learning_rate": 3.200750867088553e-06, "loss": 0.2211, "step": 10420 }, { "epoch": 0.8890121139737246, "grad_norm": 1.7283830667303204, "learning_rate": 3.19588912650951e-06, "loss": 0.121, "step": 10421 }, { "epoch": 0.8890974236478416, "grad_norm": 1.2392894118588764, "learning_rate": 3.1910309591756172e-06, "loss": 0.1252, "step": 10422 }, { "epoch": 0.8891827333219587, "grad_norm": 2.1329560283951903, "learning_rate": 3.186176365457766e-06, "loss": 0.2277, "step": 10423 }, { "epoch": 0.8892680429960758, "grad_norm": 1.9449561225433274, "learning_rate": 3.181325345726582e-06, "loss": 0.1254, "step": 10424 }, { "epoch": 0.8893533526701928, "grad_norm": 2.7001139016321347, "learning_rate": 3.1764779003524037e-06, "loss": 0.2355, "step": 10425 }, { "epoch": 0.8894386623443098, "grad_norm": 2.2641218789727326, "learning_rate": 3.1716340297053336e-06, "loss": 0.1545, "step": 10426 }, { "epoch": 0.8895239720184269, "grad_norm": 1.3878511345374773, "learning_rate": 3.166793734155149e-06, "loss": 0.1492, "step": 10427 }, { "epoch": 0.8896092816925439, "grad_norm": 1.595795310076257, "learning_rate": 3.1619570140713927e-06, "loss": 0.1862, "step": 10428 }, { "epoch": 0.889694591366661, "grad_norm": 2.1022785767794874, "learning_rate": 3.1571238698233252e-06, "loss": 0.2421, "step": 10429 }, { "epoch": 0.889779901040778, "grad_norm": 1.4832815057651223, "learning_rate": 3.1522943017799232e-06, "loss": 0.1296, "step": 10430 }, { "epoch": 0.8898652107148951, "grad_norm": 1.5892275054093372, "learning_rate": 3.147468310309909e-06, "loss": 0.1519, "step": 10431 }, { "epoch": 0.8899505203890121, "grad_norm": 1.5301734098011572, "learning_rate": 3.142645895781715e-06, "loss": 0.1733, "step": 10432 }, { "epoch": 0.8900358300631291, "grad_norm": 1.8567424056849473, "learning_rate": 3.1378270585635026e-06, "loss": 0.2573, "step": 10433 }, { "epoch": 0.8901211397372462, "grad_norm": 2.36287384304722, "learning_rate": 3.1330117990231613e-06, "loss": 0.1673, "step": 10434 }, { "epoch": 0.8902064494113633, "grad_norm": 1.63332413683787, "learning_rate": 3.128200117528335e-06, "loss": 0.2335, "step": 10435 }, { "epoch": 0.8902917590854803, "grad_norm": 1.7922524078774398, "learning_rate": 3.1233920144463415e-06, "loss": 0.1968, "step": 10436 }, { "epoch": 0.8903770687595973, "grad_norm": 1.749378570847378, "learning_rate": 3.1185874901442703e-06, "loss": 0.2045, "step": 10437 }, { "epoch": 0.8904623784337143, "grad_norm": 1.660779553257392, "learning_rate": 3.113786544988906e-06, "loss": 0.1931, "step": 10438 }, { "epoch": 0.8905476881078315, "grad_norm": 2.347318450439965, "learning_rate": 3.108989179346805e-06, "loss": 0.2725, "step": 10439 }, { "epoch": 0.8906329977819485, "grad_norm": 1.6523855217597867, "learning_rate": 3.104195393584186e-06, "loss": 0.1356, "step": 10440 }, { "epoch": 0.8907183074560655, "grad_norm": 2.0533095696341603, "learning_rate": 3.0994051880670504e-06, "loss": 0.1998, "step": 10441 }, { "epoch": 0.8908036171301825, "grad_norm": 1.6705343985830896, "learning_rate": 3.0946185631611002e-06, "loss": 0.168, "step": 10442 }, { "epoch": 0.8908889268042997, "grad_norm": 1.4164267318094759, "learning_rate": 3.089835519231771e-06, "loss": 0.1416, "step": 10443 }, { "epoch": 0.8909742364784167, "grad_norm": 1.7399030684733865, "learning_rate": 3.0850560566442145e-06, "loss": 0.1613, "step": 10444 }, { "epoch": 0.8910595461525337, "grad_norm": 1.3733091795673766, "learning_rate": 3.080280175763328e-06, "loss": 0.1485, "step": 10445 }, { "epoch": 0.8911448558266507, "grad_norm": 1.6664922464020393, "learning_rate": 3.075507876953715e-06, "loss": 0.1337, "step": 10446 }, { "epoch": 0.8912301655007678, "grad_norm": 2.4467379281929262, "learning_rate": 3.070739160579711e-06, "loss": 0.2178, "step": 10447 }, { "epoch": 0.8913154751748849, "grad_norm": 1.6329643507440024, "learning_rate": 3.065974027005408e-06, "loss": 0.1941, "step": 10448 }, { "epoch": 0.8914007848490019, "grad_norm": 1.5735763867325292, "learning_rate": 3.0612124765945603e-06, "loss": 0.1721, "step": 10449 }, { "epoch": 0.8914860945231189, "grad_norm": 1.6134469822322912, "learning_rate": 3.056454509710721e-06, "loss": 0.1773, "step": 10450 }, { "epoch": 0.891571404197236, "grad_norm": 1.5323527510876433, "learning_rate": 3.051700126717122e-06, "loss": 0.1346, "step": 10451 }, { "epoch": 0.891656713871353, "grad_norm": 1.5041089998781352, "learning_rate": 3.0469493279767335e-06, "loss": 0.1654, "step": 10452 }, { "epoch": 0.89174202354547, "grad_norm": 1.503623083629841, "learning_rate": 3.042202113852255e-06, "loss": 0.1614, "step": 10453 }, { "epoch": 0.8918273332195871, "grad_norm": 1.576719507994643, "learning_rate": 3.037458484706102e-06, "loss": 0.1551, "step": 10454 }, { "epoch": 0.8919126428937042, "grad_norm": 1.303282569488527, "learning_rate": 3.032718440900456e-06, "loss": 0.1238, "step": 10455 }, { "epoch": 0.8919979525678212, "grad_norm": 2.0062909131022915, "learning_rate": 3.0279819827971513e-06, "loss": 0.1473, "step": 10456 }, { "epoch": 0.8920832622419382, "grad_norm": 2.214527679442634, "learning_rate": 3.0232491107578253e-06, "loss": 0.2257, "step": 10457 }, { "epoch": 0.8921685719160553, "grad_norm": 1.868266284816012, "learning_rate": 3.018519825143795e-06, "loss": 0.2214, "step": 10458 }, { "epoch": 0.8922538815901723, "grad_norm": 2.0095586049156604, "learning_rate": 3.0137941263161164e-06, "loss": 0.1437, "step": 10459 }, { "epoch": 0.8923391912642894, "grad_norm": 1.6162697957088574, "learning_rate": 3.0090720146355666e-06, "loss": 0.1799, "step": 10460 }, { "epoch": 0.8924245009384064, "grad_norm": 1.679664931034787, "learning_rate": 3.004353490462669e-06, "loss": 0.2663, "step": 10461 }, { "epoch": 0.8925098106125234, "grad_norm": 2.430323870076652, "learning_rate": 2.9996385541576353e-06, "loss": 0.2399, "step": 10462 }, { "epoch": 0.8925951202866405, "grad_norm": 1.9673142420364764, "learning_rate": 2.9949272060804445e-06, "loss": 0.1909, "step": 10463 }, { "epoch": 0.8926804299607576, "grad_norm": 1.4730415143846374, "learning_rate": 2.9902194465907807e-06, "loss": 0.1342, "step": 10464 }, { "epoch": 0.8927657396348746, "grad_norm": 2.0161896144009526, "learning_rate": 2.985515276048051e-06, "loss": 0.1629, "step": 10465 }, { "epoch": 0.8928510493089916, "grad_norm": 2.244352246059992, "learning_rate": 2.9808146948113958e-06, "loss": 0.217, "step": 10466 }, { "epoch": 0.8929363589831086, "grad_norm": 1.6065947581986981, "learning_rate": 2.976117703239667e-06, "loss": 0.1448, "step": 10467 }, { "epoch": 0.8930216686572258, "grad_norm": 2.01882302355894, "learning_rate": 2.9714243016914834e-06, "loss": 0.19, "step": 10468 }, { "epoch": 0.8931069783313428, "grad_norm": 2.111786093080852, "learning_rate": 2.9667344905251302e-06, "loss": 0.1908, "step": 10469 }, { "epoch": 0.8931922880054598, "grad_norm": 1.516348440020543, "learning_rate": 2.9620482700986774e-06, "loss": 0.1488, "step": 10470 }, { "epoch": 0.8932775976795768, "grad_norm": 1.6777431856397, "learning_rate": 2.9573656407698713e-06, "loss": 0.1425, "step": 10471 }, { "epoch": 0.893362907353694, "grad_norm": 1.5583931930115256, "learning_rate": 2.9526866028962206e-06, "loss": 0.2141, "step": 10472 }, { "epoch": 0.893448217027811, "grad_norm": 1.8180842698545303, "learning_rate": 2.9480111568349346e-06, "loss": 0.1734, "step": 10473 }, { "epoch": 0.893533526701928, "grad_norm": 2.0939973144222552, "learning_rate": 2.9433393029429657e-06, "loss": 0.2387, "step": 10474 }, { "epoch": 0.893618836376045, "grad_norm": 1.5273201786963904, "learning_rate": 2.938671041576979e-06, "loss": 0.1802, "step": 10475 }, { "epoch": 0.8937041460501621, "grad_norm": 1.774565820917315, "learning_rate": 2.9340063730933675e-06, "loss": 0.1625, "step": 10476 }, { "epoch": 0.8937894557242791, "grad_norm": 1.8775182978605747, "learning_rate": 2.9293452978482793e-06, "loss": 0.1703, "step": 10477 }, { "epoch": 0.8938747653983962, "grad_norm": 2.0663615980894625, "learning_rate": 2.9246878161975298e-06, "loss": 0.1979, "step": 10478 }, { "epoch": 0.8939600750725132, "grad_norm": 1.8573085297057736, "learning_rate": 2.9200339284967127e-06, "loss": 0.1886, "step": 10479 }, { "epoch": 0.8940453847466303, "grad_norm": 1.3854936803001126, "learning_rate": 2.915383635101121e-06, "loss": 0.1065, "step": 10480 }, { "epoch": 0.8941306944207473, "grad_norm": 2.02107743299789, "learning_rate": 2.910736936365782e-06, "loss": 0.2082, "step": 10481 }, { "epoch": 0.8942160040948643, "grad_norm": 2.003807258763536, "learning_rate": 2.90609383264544e-06, "loss": 0.2331, "step": 10482 }, { "epoch": 0.8943013137689814, "grad_norm": 2.413153319766929, "learning_rate": 2.9014543242945837e-06, "loss": 0.174, "step": 10483 }, { "epoch": 0.8943866234430985, "grad_norm": 1.3785938633124581, "learning_rate": 2.896818411667407e-06, "loss": 0.1646, "step": 10484 }, { "epoch": 0.8944719331172155, "grad_norm": 1.38346692133472, "learning_rate": 2.8921860951178435e-06, "loss": 0.13, "step": 10485 }, { "epoch": 0.8945572427913325, "grad_norm": 2.854448921744894, "learning_rate": 2.8875573749995335e-06, "loss": 0.1676, "step": 10486 }, { "epoch": 0.8946425524654495, "grad_norm": 1.7130211809745042, "learning_rate": 2.882932251665871e-06, "loss": 0.1559, "step": 10487 }, { "epoch": 0.8947278621395667, "grad_norm": 2.032023825333057, "learning_rate": 2.878310725469946e-06, "loss": 0.1459, "step": 10488 }, { "epoch": 0.8948131718136837, "grad_norm": 1.5012232243344112, "learning_rate": 2.873692796764582e-06, "loss": 0.1588, "step": 10489 }, { "epoch": 0.8948984814878007, "grad_norm": 1.870215032423232, "learning_rate": 2.869078465902364e-06, "loss": 0.2501, "step": 10490 }, { "epoch": 0.8949837911619177, "grad_norm": 2.1096939410932216, "learning_rate": 2.8644677332355374e-06, "loss": 0.2145, "step": 10491 }, { "epoch": 0.8950691008360349, "grad_norm": 1.806758798394396, "learning_rate": 2.8598605991161264e-06, "loss": 0.2183, "step": 10492 }, { "epoch": 0.8951544105101519, "grad_norm": 2.4077465803367453, "learning_rate": 2.855257063895861e-06, "loss": 0.2607, "step": 10493 }, { "epoch": 0.8952397201842689, "grad_norm": 1.719185999276056, "learning_rate": 2.8506571279261874e-06, "loss": 0.1774, "step": 10494 }, { "epoch": 0.8953250298583859, "grad_norm": 2.1273753410064282, "learning_rate": 2.8460607915582916e-06, "loss": 0.1504, "step": 10495 }, { "epoch": 0.895410339532503, "grad_norm": 2.131442122315891, "learning_rate": 2.8414680551430762e-06, "loss": 0.2177, "step": 10496 }, { "epoch": 0.8954956492066201, "grad_norm": 1.7430209915901826, "learning_rate": 2.8368789190311773e-06, "loss": 0.1876, "step": 10497 }, { "epoch": 0.8955809588807371, "grad_norm": 1.950038760362443, "learning_rate": 2.8322933835729426e-06, "loss": 0.2045, "step": 10498 }, { "epoch": 0.8956662685548541, "grad_norm": 1.6880785338903581, "learning_rate": 2.8277114491184643e-06, "loss": 0.1557, "step": 10499 }, { "epoch": 0.8957515782289711, "grad_norm": 1.9905600688411276, "learning_rate": 2.82313311601754e-06, "loss": 0.1608, "step": 10500 }, { "epoch": 0.8958368879030882, "grad_norm": 1.8851094144843785, "learning_rate": 2.818558384619713e-06, "loss": 0.168, "step": 10501 }, { "epoch": 0.8959221975772053, "grad_norm": 1.785841662087409, "learning_rate": 2.81398725527422e-06, "loss": 0.2549, "step": 10502 }, { "epoch": 0.8960075072513223, "grad_norm": 2.1300600057755052, "learning_rate": 2.8094197283300647e-06, "loss": 0.1734, "step": 10503 }, { "epoch": 0.8960928169254393, "grad_norm": 2.183879253577854, "learning_rate": 2.804855804135931e-06, "loss": 0.1799, "step": 10504 }, { "epoch": 0.8961781265995564, "grad_norm": 1.4967291456292207, "learning_rate": 2.8002954830402717e-06, "loss": 0.1479, "step": 10505 }, { "epoch": 0.8962634362736734, "grad_norm": 1.7773807731377445, "learning_rate": 2.7957387653912315e-06, "loss": 0.2044, "step": 10506 }, { "epoch": 0.8963487459477905, "grad_norm": 2.0735830133772555, "learning_rate": 2.791185651536693e-06, "loss": 0.1594, "step": 10507 }, { "epoch": 0.8964340556219075, "grad_norm": 2.1115970916647084, "learning_rate": 2.7866361418242616e-06, "loss": 0.1753, "step": 10508 }, { "epoch": 0.8965193652960246, "grad_norm": 1.6617454193084238, "learning_rate": 2.7820902366012703e-06, "loss": 0.1841, "step": 10509 }, { "epoch": 0.8966046749701416, "grad_norm": 1.6799834234691435, "learning_rate": 2.777547936214775e-06, "loss": 0.2259, "step": 10510 }, { "epoch": 0.8966899846442586, "grad_norm": 2.1432096373053375, "learning_rate": 2.7730092410115484e-06, "loss": 0.1473, "step": 10511 }, { "epoch": 0.8967752943183757, "grad_norm": 1.7226980468108275, "learning_rate": 2.7684741513381074e-06, "loss": 0.1778, "step": 10512 }, { "epoch": 0.8968606039924928, "grad_norm": 2.6704035940419235, "learning_rate": 2.7639426675406753e-06, "loss": 0.1506, "step": 10513 }, { "epoch": 0.8969459136666098, "grad_norm": 2.264081758215843, "learning_rate": 2.759414789965209e-06, "loss": 0.1716, "step": 10514 }, { "epoch": 0.8970312233407268, "grad_norm": 1.650777015373695, "learning_rate": 2.754890518957387e-06, "loss": 0.1594, "step": 10515 }, { "epoch": 0.8971165330148438, "grad_norm": 1.5992930638685905, "learning_rate": 2.7503698548626167e-06, "loss": 0.1479, "step": 10516 }, { "epoch": 0.897201842688961, "grad_norm": 1.6958277046928754, "learning_rate": 2.7458527980260216e-06, "loss": 0.1898, "step": 10517 }, { "epoch": 0.897287152363078, "grad_norm": 2.010331410657524, "learning_rate": 2.7413393487924543e-06, "loss": 0.1573, "step": 10518 }, { "epoch": 0.897372462037195, "grad_norm": 1.5973883593118763, "learning_rate": 2.736829507506505e-06, "loss": 0.1972, "step": 10519 }, { "epoch": 0.897457771711312, "grad_norm": 1.7921863585836806, "learning_rate": 2.732323274512455e-06, "loss": 0.1946, "step": 10520 }, { "epoch": 0.8975430813854292, "grad_norm": 1.293145942647463, "learning_rate": 2.7278206501543448e-06, "loss": 0.1596, "step": 10521 }, { "epoch": 0.8976283910595462, "grad_norm": 1.9004375455891884, "learning_rate": 2.7233216347759272e-06, "loss": 0.1842, "step": 10522 }, { "epoch": 0.8977137007336632, "grad_norm": 2.1753882664486004, "learning_rate": 2.7188262287206776e-06, "loss": 0.1376, "step": 10523 }, { "epoch": 0.8977990104077802, "grad_norm": 1.3421684539118581, "learning_rate": 2.714334432331783e-06, "loss": 0.1398, "step": 10524 }, { "epoch": 0.8978843200818973, "grad_norm": 1.7704862665884258, "learning_rate": 2.709846245952191e-06, "loss": 0.1894, "step": 10525 }, { "epoch": 0.8979696297560144, "grad_norm": 1.2460427587713037, "learning_rate": 2.7053616699245277e-06, "loss": 0.1712, "step": 10526 }, { "epoch": 0.8980549394301314, "grad_norm": 1.741190740668234, "learning_rate": 2.7008807045911855e-06, "loss": 0.194, "step": 10527 }, { "epoch": 0.8981402491042484, "grad_norm": 1.727555899702432, "learning_rate": 2.6964033502942523e-06, "loss": 0.1942, "step": 10528 }, { "epoch": 0.8982255587783655, "grad_norm": 1.652489333356403, "learning_rate": 2.691929607375554e-06, "loss": 0.1285, "step": 10529 }, { "epoch": 0.8983108684524825, "grad_norm": 1.5580467851442994, "learning_rate": 2.687459476176635e-06, "loss": 0.1712, "step": 10530 }, { "epoch": 0.8983961781265996, "grad_norm": 2.0175027306974287, "learning_rate": 2.6829929570387545e-06, "loss": 0.182, "step": 10531 }, { "epoch": 0.8984814878007166, "grad_norm": 1.7270804616477047, "learning_rate": 2.6785300503029407e-06, "loss": 0.2635, "step": 10532 }, { "epoch": 0.8985667974748337, "grad_norm": 1.7850329734257127, "learning_rate": 2.674070756309871e-06, "loss": 0.2032, "step": 10533 }, { "epoch": 0.8986521071489507, "grad_norm": 1.9727984386603434, "learning_rate": 2.6696150754000227e-06, "loss": 0.1815, "step": 10534 }, { "epoch": 0.8987374168230677, "grad_norm": 1.6653025130654109, "learning_rate": 2.665163007913546e-06, "loss": 0.2374, "step": 10535 }, { "epoch": 0.8988227264971848, "grad_norm": 2.0836344946305614, "learning_rate": 2.6607145541903354e-06, "loss": 0.1745, "step": 10536 }, { "epoch": 0.8989080361713018, "grad_norm": 1.4313238399731516, "learning_rate": 2.656269714570009e-06, "loss": 0.1839, "step": 10537 }, { "epoch": 0.8989933458454189, "grad_norm": 1.5465227747170442, "learning_rate": 2.6518284893919108e-06, "loss": 0.148, "step": 10538 }, { "epoch": 0.8990786555195359, "grad_norm": 1.3864058880400834, "learning_rate": 2.647390878995093e-06, "loss": 0.162, "step": 10539 }, { "epoch": 0.8991639651936529, "grad_norm": 1.7834781965201505, "learning_rate": 2.642956883718345e-06, "loss": 0.226, "step": 10540 }, { "epoch": 0.89924927486777, "grad_norm": 1.5048748909793133, "learning_rate": 2.6385265039002015e-06, "loss": 0.1384, "step": 10541 }, { "epoch": 0.8993345845418871, "grad_norm": 1.8364711554027917, "learning_rate": 2.6340997398788593e-06, "loss": 0.1838, "step": 10542 }, { "epoch": 0.8994198942160041, "grad_norm": 2.4610237227578793, "learning_rate": 2.629676591992314e-06, "loss": 0.1965, "step": 10543 }, { "epoch": 0.8995052038901211, "grad_norm": 1.5320080807425427, "learning_rate": 2.6252570605782234e-06, "loss": 0.1158, "step": 10544 }, { "epoch": 0.8995905135642381, "grad_norm": 1.6403734286753582, "learning_rate": 2.6208411459740235e-06, "loss": 0.1427, "step": 10545 }, { "epoch": 0.8996758232383553, "grad_norm": 1.7684483867444307, "learning_rate": 2.6164288485168164e-06, "loss": 0.1793, "step": 10546 }, { "epoch": 0.8997611329124723, "grad_norm": 1.4423553732791197, "learning_rate": 2.6120201685434776e-06, "loss": 0.1994, "step": 10547 }, { "epoch": 0.8998464425865893, "grad_norm": 1.9681565131931975, "learning_rate": 2.6076151063905764e-06, "loss": 0.2312, "step": 10548 }, { "epoch": 0.8999317522607063, "grad_norm": 2.205974062340875, "learning_rate": 2.6032136623944214e-06, "loss": 0.1726, "step": 10549 }, { "epoch": 0.9000170619348234, "grad_norm": 1.5435340328734017, "learning_rate": 2.598815836891033e-06, "loss": 0.1686, "step": 10550 }, { "epoch": 0.9001023716089405, "grad_norm": 1.42687143947777, "learning_rate": 2.5944216302161704e-06, "loss": 0.1606, "step": 10551 }, { "epoch": 0.9001876812830575, "grad_norm": 1.8744873617646194, "learning_rate": 2.5900310427053044e-06, "loss": 0.197, "step": 10552 }, { "epoch": 0.9002729909571745, "grad_norm": 1.4120500269826781, "learning_rate": 2.5856440746936216e-06, "loss": 0.2018, "step": 10553 }, { "epoch": 0.9003583006312916, "grad_norm": 1.501288575781354, "learning_rate": 2.5812607265160716e-06, "loss": 0.168, "step": 10554 }, { "epoch": 0.9004436103054086, "grad_norm": 1.6156271073052777, "learning_rate": 2.576880998507264e-06, "loss": 0.1809, "step": 10555 }, { "epoch": 0.9005289199795257, "grad_norm": 2.3627376214876596, "learning_rate": 2.5725048910015924e-06, "loss": 0.1671, "step": 10556 }, { "epoch": 0.9006142296536427, "grad_norm": 2.0578717407875504, "learning_rate": 2.5681324043331455e-06, "loss": 0.1742, "step": 10557 }, { "epoch": 0.9006995393277598, "grad_norm": 2.2944761452115436, "learning_rate": 2.5637635388357395e-06, "loss": 0.1784, "step": 10558 }, { "epoch": 0.9007848490018768, "grad_norm": 1.9985248370618334, "learning_rate": 2.5593982948429074e-06, "loss": 0.1971, "step": 10559 }, { "epoch": 0.9008701586759938, "grad_norm": 2.2466666643650193, "learning_rate": 2.5550366726879103e-06, "loss": 0.1328, "step": 10560 }, { "epoch": 0.9009554683501109, "grad_norm": 2.107862820600807, "learning_rate": 2.5506786727037545e-06, "loss": 0.2625, "step": 10561 }, { "epoch": 0.901040778024228, "grad_norm": 1.486819052106636, "learning_rate": 2.5463242952231235e-06, "loss": 0.1645, "step": 10562 }, { "epoch": 0.901126087698345, "grad_norm": 1.7392643923584128, "learning_rate": 2.541973540578474e-06, "loss": 0.1671, "step": 10563 }, { "epoch": 0.901211397372462, "grad_norm": 3.5097895362986264, "learning_rate": 2.5376264091019506e-06, "loss": 0.2236, "step": 10564 }, { "epoch": 0.901296707046579, "grad_norm": 1.7950209630735612, "learning_rate": 2.5332829011254334e-06, "loss": 0.2056, "step": 10565 }, { "epoch": 0.9013820167206962, "grad_norm": 1.6449071314143493, "learning_rate": 2.528943016980523e-06, "loss": 0.2016, "step": 10566 }, { "epoch": 0.9014673263948132, "grad_norm": 2.08566785348244, "learning_rate": 2.524606756998571e-06, "loss": 0.1654, "step": 10567 }, { "epoch": 0.9015526360689302, "grad_norm": 2.0137263382244344, "learning_rate": 2.520274121510591e-06, "loss": 0.1729, "step": 10568 }, { "epoch": 0.9016379457430472, "grad_norm": 1.3655144618360504, "learning_rate": 2.5159451108473843e-06, "loss": 0.16, "step": 10569 }, { "epoch": 0.9017232554171644, "grad_norm": 1.4959645028722306, "learning_rate": 2.511619725339431e-06, "loss": 0.1039, "step": 10570 }, { "epoch": 0.9018085650912814, "grad_norm": 1.36393285006297, "learning_rate": 2.507297965316968e-06, "loss": 0.1386, "step": 10571 }, { "epoch": 0.9018938747653984, "grad_norm": 1.7942748787164877, "learning_rate": 2.502979831109925e-06, "loss": 0.1384, "step": 10572 }, { "epoch": 0.9019791844395154, "grad_norm": 1.73686424534815, "learning_rate": 2.498665323047966e-06, "loss": 0.1875, "step": 10573 }, { "epoch": 0.9020644941136324, "grad_norm": 2.204059051117226, "learning_rate": 2.4943544414605e-06, "loss": 0.196, "step": 10574 }, { "epoch": 0.9021498037877496, "grad_norm": 1.7937276061251848, "learning_rate": 2.4900471866766194e-06, "loss": 0.216, "step": 10575 }, { "epoch": 0.9022351134618666, "grad_norm": 2.3830418210603965, "learning_rate": 2.485743559025172e-06, "loss": 0.182, "step": 10576 }, { "epoch": 0.9023204231359836, "grad_norm": 1.9185712376925055, "learning_rate": 2.481443558834712e-06, "loss": 0.1259, "step": 10577 }, { "epoch": 0.9024057328101006, "grad_norm": 2.124762016882703, "learning_rate": 2.477147186433526e-06, "loss": 0.1483, "step": 10578 }, { "epoch": 0.9024910424842177, "grad_norm": 1.9880815866641777, "learning_rate": 2.4728544421496137e-06, "loss": 0.1828, "step": 10579 }, { "epoch": 0.9025763521583348, "grad_norm": 2.3383634594231664, "learning_rate": 2.4685653263107067e-06, "loss": 0.1985, "step": 10580 }, { "epoch": 0.9026616618324518, "grad_norm": 1.541455417628065, "learning_rate": 2.4642798392442547e-06, "loss": 0.1817, "step": 10581 }, { "epoch": 0.9027469715065688, "grad_norm": 2.005275325564332, "learning_rate": 2.459997981277423e-06, "loss": 0.1306, "step": 10582 }, { "epoch": 0.9028322811806859, "grad_norm": 1.8729805561691883, "learning_rate": 2.4557197527371344e-06, "loss": 0.1497, "step": 10583 }, { "epoch": 0.9029175908548029, "grad_norm": 1.9235317873369684, "learning_rate": 2.4514451539499828e-06, "loss": 0.1401, "step": 10584 }, { "epoch": 0.90300290052892, "grad_norm": 1.4631228498192932, "learning_rate": 2.4471741852423237e-06, "loss": 0.1457, "step": 10585 }, { "epoch": 0.903088210203037, "grad_norm": 1.5457407328108679, "learning_rate": 2.4429068469402184e-06, "loss": 0.1969, "step": 10586 }, { "epoch": 0.9031735198771541, "grad_norm": 2.2592827784710137, "learning_rate": 2.438643139369462e-06, "loss": 0.2441, "step": 10587 }, { "epoch": 0.9032588295512711, "grad_norm": 1.5113886194256736, "learning_rate": 2.4343830628555496e-06, "loss": 0.1074, "step": 10588 }, { "epoch": 0.9033441392253881, "grad_norm": 2.3444981715999997, "learning_rate": 2.430126617723738e-06, "loss": 0.139, "step": 10589 }, { "epoch": 0.9034294488995052, "grad_norm": 1.7867951664097712, "learning_rate": 2.4258738042989728e-06, "loss": 0.1486, "step": 10590 }, { "epoch": 0.9035147585736223, "grad_norm": 1.519964867236028, "learning_rate": 2.4216246229059326e-06, "loss": 0.1782, "step": 10591 }, { "epoch": 0.9036000682477393, "grad_norm": 2.3794073068896964, "learning_rate": 2.4173790738690195e-06, "loss": 0.2083, "step": 10592 }, { "epoch": 0.9036853779218563, "grad_norm": 2.8158197096417648, "learning_rate": 2.413137157512363e-06, "loss": 0.2363, "step": 10593 }, { "epoch": 0.9037706875959733, "grad_norm": 1.4726968855778746, "learning_rate": 2.408898874159804e-06, "loss": 0.2488, "step": 10594 }, { "epoch": 0.9038559972700905, "grad_norm": 1.541473823377075, "learning_rate": 2.404664224134917e-06, "loss": 0.1671, "step": 10595 }, { "epoch": 0.9039413069442075, "grad_norm": 2.1053748737985862, "learning_rate": 2.4004332077610047e-06, "loss": 0.1767, "step": 10596 }, { "epoch": 0.9040266166183245, "grad_norm": 1.834124639995287, "learning_rate": 2.3962058253610587e-06, "loss": 0.1633, "step": 10597 }, { "epoch": 0.9041119262924415, "grad_norm": 2.141628837498739, "learning_rate": 2.391982077257837e-06, "loss": 0.169, "step": 10598 }, { "epoch": 0.9041972359665587, "grad_norm": 1.751007198952909, "learning_rate": 2.387761963773799e-06, "loss": 0.2075, "step": 10599 }, { "epoch": 0.9042825456406757, "grad_norm": 1.1925453618856723, "learning_rate": 2.3835454852311255e-06, "loss": 0.1589, "step": 10600 }, { "epoch": 0.9043678553147927, "grad_norm": 1.5879376956958946, "learning_rate": 2.3793326419517147e-06, "loss": 0.1869, "step": 10601 }, { "epoch": 0.9044531649889097, "grad_norm": 2.2634726988854657, "learning_rate": 2.375123434257198e-06, "loss": 0.2274, "step": 10602 }, { "epoch": 0.9045384746630268, "grad_norm": 1.435930639339823, "learning_rate": 2.370917862468941e-06, "loss": 0.1486, "step": 10603 }, { "epoch": 0.9046237843371439, "grad_norm": 1.9916421181138184, "learning_rate": 2.366715926907992e-06, "loss": 0.1433, "step": 10604 }, { "epoch": 0.9047090940112609, "grad_norm": 2.134515885580592, "learning_rate": 2.362517627895167e-06, "loss": 0.1895, "step": 10605 }, { "epoch": 0.9047944036853779, "grad_norm": 2.003473603240591, "learning_rate": 2.35832296575097e-06, "loss": 0.1782, "step": 10606 }, { "epoch": 0.904879713359495, "grad_norm": 1.7748703063004383, "learning_rate": 2.354131940795651e-06, "loss": 0.2, "step": 10607 }, { "epoch": 0.904965023033612, "grad_norm": 1.5280859819357275, "learning_rate": 2.3499445533491646e-06, "loss": 0.1563, "step": 10608 }, { "epoch": 0.905050332707729, "grad_norm": 1.7783134079054934, "learning_rate": 2.345760803731206e-06, "loss": 0.1243, "step": 10609 }, { "epoch": 0.9051356423818461, "grad_norm": 2.333620689499675, "learning_rate": 2.3415806922611695e-06, "loss": 0.184, "step": 10610 }, { "epoch": 0.9052209520559632, "grad_norm": 2.2649902506906905, "learning_rate": 2.3374042192581934e-06, "loss": 0.1544, "step": 10611 }, { "epoch": 0.9053062617300802, "grad_norm": 1.6221706260607487, "learning_rate": 2.3332313850411236e-06, "loss": 0.1179, "step": 10612 }, { "epoch": 0.9053915714041972, "grad_norm": 1.6463911181700592, "learning_rate": 2.3290621899285436e-06, "loss": 0.1725, "step": 10613 }, { "epoch": 0.9054768810783143, "grad_norm": 1.4939875904573885, "learning_rate": 2.3248966342387378e-06, "loss": 0.1309, "step": 10614 }, { "epoch": 0.9055621907524313, "grad_norm": 2.0156234622919884, "learning_rate": 2.3207347182897298e-06, "loss": 0.1557, "step": 10615 }, { "epoch": 0.9056475004265484, "grad_norm": 2.041257882228567, "learning_rate": 2.3165764423992543e-06, "loss": 0.1745, "step": 10616 }, { "epoch": 0.9057328101006654, "grad_norm": 1.890602048624249, "learning_rate": 2.312421806884779e-06, "loss": 0.1838, "step": 10617 }, { "epoch": 0.9058181197747824, "grad_norm": 1.8740193663647806, "learning_rate": 2.3082708120634898e-06, "loss": 0.1402, "step": 10618 }, { "epoch": 0.9059034294488995, "grad_norm": 1.769450339515936, "learning_rate": 2.3041234582522886e-06, "loss": 0.1726, "step": 10619 }, { "epoch": 0.9059887391230166, "grad_norm": 1.9190244546736495, "learning_rate": 2.299979745767811e-06, "loss": 0.1836, "step": 10620 }, { "epoch": 0.9060740487971336, "grad_norm": 1.5010995014104713, "learning_rate": 2.2958396749263976e-06, "loss": 0.1955, "step": 10621 }, { "epoch": 0.9061593584712506, "grad_norm": 1.6766105680516468, "learning_rate": 2.29170324604413e-06, "loss": 0.1312, "step": 10622 }, { "epoch": 0.9062446681453676, "grad_norm": 1.7216488753756956, "learning_rate": 2.287570459436794e-06, "loss": 0.2343, "step": 10623 }, { "epoch": 0.9063299778194848, "grad_norm": 2.156510599946563, "learning_rate": 2.283441315419904e-06, "loss": 0.1969, "step": 10624 }, { "epoch": 0.9064152874936018, "grad_norm": 1.569080989617024, "learning_rate": 2.279315814308719e-06, "loss": 0.1897, "step": 10625 }, { "epoch": 0.9065005971677188, "grad_norm": 1.9775121767076682, "learning_rate": 2.27519395641817e-06, "loss": 0.1846, "step": 10626 }, { "epoch": 0.9065859068418358, "grad_norm": 1.7200352804868233, "learning_rate": 2.2710757420629558e-06, "loss": 0.1901, "step": 10627 }, { "epoch": 0.906671216515953, "grad_norm": 2.172458268479211, "learning_rate": 2.26696117155748e-06, "loss": 0.159, "step": 10628 }, { "epoch": 0.90675652619007, "grad_norm": 1.5962757217856063, "learning_rate": 2.2628502452158695e-06, "loss": 0.2112, "step": 10629 }, { "epoch": 0.906841835864187, "grad_norm": 1.9475472111788479, "learning_rate": 2.2587429633519563e-06, "loss": 0.1676, "step": 10630 }, { "epoch": 0.906927145538304, "grad_norm": 2.30079700095415, "learning_rate": 2.2546393262793397e-06, "loss": 0.2076, "step": 10631 }, { "epoch": 0.9070124552124211, "grad_norm": 1.542957933605284, "learning_rate": 2.2505393343112745e-06, "loss": 0.1992, "step": 10632 }, { "epoch": 0.9070977648865381, "grad_norm": 1.7101736933069978, "learning_rate": 2.2464429877607995e-06, "loss": 0.1709, "step": 10633 }, { "epoch": 0.9071830745606552, "grad_norm": 1.4812164431605725, "learning_rate": 2.2423502869406366e-06, "loss": 0.1706, "step": 10634 }, { "epoch": 0.9072683842347722, "grad_norm": 1.4944605266709414, "learning_rate": 2.2382612321632468e-06, "loss": 0.14, "step": 10635 }, { "epoch": 0.9073536939088893, "grad_norm": 2.4160556897824956, "learning_rate": 2.2341758237408085e-06, "loss": 0.1481, "step": 10636 }, { "epoch": 0.9074390035830063, "grad_norm": 1.6358617027233726, "learning_rate": 2.2300940619852107e-06, "loss": 0.1494, "step": 10637 }, { "epoch": 0.9075243132571233, "grad_norm": 1.6436601447431998, "learning_rate": 2.2260159472080934e-06, "loss": 0.1763, "step": 10638 }, { "epoch": 0.9076096229312404, "grad_norm": 1.3617695731627453, "learning_rate": 2.2219414797207794e-06, "loss": 0.1564, "step": 10639 }, { "epoch": 0.9076949326053575, "grad_norm": 2.0816317921209673, "learning_rate": 2.2178706598343422e-06, "loss": 0.2183, "step": 10640 }, { "epoch": 0.9077802422794745, "grad_norm": 1.5530212521201192, "learning_rate": 2.2138034878595728e-06, "loss": 0.1907, "step": 10641 }, { "epoch": 0.9078655519535915, "grad_norm": 1.9174525543271477, "learning_rate": 2.209739964106966e-06, "loss": 0.1948, "step": 10642 }, { "epoch": 0.9079508616277085, "grad_norm": 2.020800429662929, "learning_rate": 2.205680088886758e-06, "loss": 0.1439, "step": 10643 }, { "epoch": 0.9080361713018257, "grad_norm": 1.3559648556838977, "learning_rate": 2.2016238625088946e-06, "loss": 0.1219, "step": 10644 }, { "epoch": 0.9081214809759427, "grad_norm": 1.605237883886415, "learning_rate": 2.197571285283051e-06, "loss": 0.117, "step": 10645 }, { "epoch": 0.9082067906500597, "grad_norm": 1.6341944782640456, "learning_rate": 2.1935223575186124e-06, "loss": 0.1487, "step": 10646 }, { "epoch": 0.9082921003241767, "grad_norm": 2.5672345036982422, "learning_rate": 2.1894770795247042e-06, "loss": 0.2073, "step": 10647 }, { "epoch": 0.9083774099982939, "grad_norm": 1.7043104333401644, "learning_rate": 2.185435451610157e-06, "loss": 0.2204, "step": 10648 }, { "epoch": 0.9084627196724109, "grad_norm": 1.7422377916954648, "learning_rate": 2.181397474083524e-06, "loss": 0.176, "step": 10649 }, { "epoch": 0.9085480293465279, "grad_norm": 1.4242595756735164, "learning_rate": 2.1773631472530807e-06, "loss": 0.1435, "step": 10650 }, { "epoch": 0.9086333390206449, "grad_norm": 1.524956361740362, "learning_rate": 2.1733324714268476e-06, "loss": 0.1636, "step": 10651 }, { "epoch": 0.9087186486947619, "grad_norm": 1.671009802437412, "learning_rate": 2.1693054469125118e-06, "loss": 0.2274, "step": 10652 }, { "epoch": 0.9088039583688791, "grad_norm": 1.9998764953626764, "learning_rate": 2.165282074017544e-06, "loss": 0.0943, "step": 10653 }, { "epoch": 0.9088892680429961, "grad_norm": 1.780483415517294, "learning_rate": 2.161262353049093e-06, "loss": 0.1885, "step": 10654 }, { "epoch": 0.9089745777171131, "grad_norm": 1.8932314474379817, "learning_rate": 2.157246284314046e-06, "loss": 0.1285, "step": 10655 }, { "epoch": 0.9090598873912301, "grad_norm": 1.5483961236470136, "learning_rate": 2.1532338681190145e-06, "loss": 0.1265, "step": 10656 }, { "epoch": 0.9091451970653472, "grad_norm": 2.1378643863015045, "learning_rate": 2.149225104770314e-06, "loss": 0.1879, "step": 10657 }, { "epoch": 0.9092305067394643, "grad_norm": 1.4984707984764731, "learning_rate": 2.145219994573999e-06, "loss": 0.1531, "step": 10658 }, { "epoch": 0.9093158164135813, "grad_norm": 1.7662753367513413, "learning_rate": 2.141218537835832e-06, "loss": 0.1597, "step": 10659 }, { "epoch": 0.9094011260876983, "grad_norm": 1.6363176751184438, "learning_rate": 2.1372207348613225e-06, "loss": 0.1744, "step": 10660 }, { "epoch": 0.9094864357618154, "grad_norm": 2.096061114250162, "learning_rate": 2.1332265859556556e-06, "loss": 0.195, "step": 10661 }, { "epoch": 0.9095717454359324, "grad_norm": 2.060514245520679, "learning_rate": 2.1292360914237753e-06, "loss": 0.2446, "step": 10662 }, { "epoch": 0.9096570551100495, "grad_norm": 1.879341186178652, "learning_rate": 2.1252492515703382e-06, "loss": 0.2225, "step": 10663 }, { "epoch": 0.9097423647841665, "grad_norm": 1.8060722818030202, "learning_rate": 2.1212660666997177e-06, "loss": 0.2038, "step": 10664 }, { "epoch": 0.9098276744582836, "grad_norm": 1.6979844795524202, "learning_rate": 2.1172865371160035e-06, "loss": 0.1426, "step": 10665 }, { "epoch": 0.9099129841324006, "grad_norm": 2.335407822178459, "learning_rate": 2.1133106631230027e-06, "loss": 0.1753, "step": 10666 }, { "epoch": 0.9099982938065176, "grad_norm": 1.814539124059415, "learning_rate": 2.109338445024284e-06, "loss": 0.1596, "step": 10667 }, { "epoch": 0.9100836034806347, "grad_norm": 1.6829807378037216, "learning_rate": 2.105369883123065e-06, "loss": 0.1387, "step": 10668 }, { "epoch": 0.9101689131547518, "grad_norm": 1.5308641998689994, "learning_rate": 2.1014049777223544e-06, "loss": 0.1839, "step": 10669 }, { "epoch": 0.9102542228288688, "grad_norm": 1.9653621770785823, "learning_rate": 2.097443729124837e-06, "loss": 0.1402, "step": 10670 }, { "epoch": 0.9103395325029858, "grad_norm": 1.4857852059509329, "learning_rate": 2.0934861376329385e-06, "loss": 0.1567, "step": 10671 }, { "epoch": 0.9104248421771028, "grad_norm": 2.1505728912027084, "learning_rate": 2.089532203548794e-06, "loss": 0.2604, "step": 10672 }, { "epoch": 0.91051015185122, "grad_norm": 1.5787831057885344, "learning_rate": 2.0855819271742793e-06, "loss": 0.1914, "step": 10673 }, { "epoch": 0.910595461525337, "grad_norm": 1.9542041086325683, "learning_rate": 2.0816353088109585e-06, "loss": 0.1932, "step": 10674 }, { "epoch": 0.910680771199454, "grad_norm": 1.947839038410429, "learning_rate": 2.0776923487601462e-06, "loss": 0.1488, "step": 10675 }, { "epoch": 0.910766080873571, "grad_norm": 2.0295294884425816, "learning_rate": 2.073753047322868e-06, "loss": 0.1476, "step": 10676 }, { "epoch": 0.9108513905476882, "grad_norm": 1.5530285442649654, "learning_rate": 2.0698174047998618e-06, "loss": 0.1685, "step": 10677 }, { "epoch": 0.9109367002218052, "grad_norm": 2.3656949918564374, "learning_rate": 2.065885421491598e-06, "loss": 0.1956, "step": 10678 }, { "epoch": 0.9110220098959222, "grad_norm": 1.7575372777162526, "learning_rate": 2.061957097698253e-06, "loss": 0.1769, "step": 10679 }, { "epoch": 0.9111073195700392, "grad_norm": 1.7274051870999814, "learning_rate": 2.058032433719759e-06, "loss": 0.1491, "step": 10680 }, { "epoch": 0.9111926292441563, "grad_norm": 1.6963173837004992, "learning_rate": 2.0541114298557042e-06, "loss": 0.2321, "step": 10681 }, { "epoch": 0.9112779389182734, "grad_norm": 2.1011059346643237, "learning_rate": 2.0501940864054715e-06, "loss": 0.2198, "step": 10682 }, { "epoch": 0.9113632485923904, "grad_norm": 2.3515426280046983, "learning_rate": 2.0462804036681103e-06, "loss": 0.1815, "step": 10683 }, { "epoch": 0.9114485582665074, "grad_norm": 1.7519875547498307, "learning_rate": 2.042370381942416e-06, "loss": 0.1954, "step": 10684 }, { "epoch": 0.9115338679406245, "grad_norm": 1.7804427074286988, "learning_rate": 2.038464021526898e-06, "loss": 0.1084, "step": 10685 }, { "epoch": 0.9116191776147415, "grad_norm": 1.923807676564867, "learning_rate": 2.0345613227197803e-06, "loss": 0.2011, "step": 10686 }, { "epoch": 0.9117044872888586, "grad_norm": 1.7468930320286762, "learning_rate": 2.030662285819024e-06, "loss": 0.1435, "step": 10687 }, { "epoch": 0.9117897969629756, "grad_norm": 1.4473461792802031, "learning_rate": 2.02676691112228e-06, "loss": 0.1631, "step": 10688 }, { "epoch": 0.9118751066370926, "grad_norm": 1.513587053445002, "learning_rate": 2.022875198926971e-06, "loss": 0.1397, "step": 10689 }, { "epoch": 0.9119604163112097, "grad_norm": 2.374851322539729, "learning_rate": 2.0189871495301714e-06, "loss": 0.2143, "step": 10690 }, { "epoch": 0.9120457259853267, "grad_norm": 2.0736430315662533, "learning_rate": 2.0151027632287433e-06, "loss": 0.2073, "step": 10691 }, { "epoch": 0.9121310356594438, "grad_norm": 1.553220123509606, "learning_rate": 2.0112220403192215e-06, "loss": 0.1772, "step": 10692 }, { "epoch": 0.9122163453335608, "grad_norm": 2.0195294791532974, "learning_rate": 2.0073449810978974e-06, "loss": 0.2253, "step": 10693 }, { "epoch": 0.9123016550076779, "grad_norm": 1.47527664952271, "learning_rate": 2.003471585860739e-06, "loss": 0.1076, "step": 10694 }, { "epoch": 0.9123869646817949, "grad_norm": 1.7549213963482353, "learning_rate": 1.9996018549034767e-06, "loss": 0.1661, "step": 10695 }, { "epoch": 0.9124722743559119, "grad_norm": 1.4480270050965947, "learning_rate": 1.995735788521541e-06, "loss": 0.2398, "step": 10696 }, { "epoch": 0.912557584030029, "grad_norm": 2.0984705759766435, "learning_rate": 1.9918733870100793e-06, "loss": 0.1649, "step": 10697 }, { "epoch": 0.9126428937041461, "grad_norm": 2.0848079294705313, "learning_rate": 1.988014650663972e-06, "loss": 0.1386, "step": 10698 }, { "epoch": 0.9127282033782631, "grad_norm": 1.5315458050938555, "learning_rate": 1.9841595797778113e-06, "loss": 0.1394, "step": 10699 }, { "epoch": 0.9128135130523801, "grad_norm": 2.3200952704369295, "learning_rate": 1.980308174645912e-06, "loss": 0.1711, "step": 10700 }, { "epoch": 0.9128988227264971, "grad_norm": 1.7737349878257895, "learning_rate": 1.9764604355622996e-06, "loss": 0.2024, "step": 10701 }, { "epoch": 0.9129841324006143, "grad_norm": 2.142774964073777, "learning_rate": 1.972616362820745e-06, "loss": 0.2291, "step": 10702 }, { "epoch": 0.9130694420747313, "grad_norm": 2.0847233013726623, "learning_rate": 1.968775956714708e-06, "loss": 0.1955, "step": 10703 }, { "epoch": 0.9131547517488483, "grad_norm": 2.3809767358533356, "learning_rate": 1.9649392175373927e-06, "loss": 0.1621, "step": 10704 }, { "epoch": 0.9132400614229653, "grad_norm": 1.6525965287586788, "learning_rate": 1.961106145581709e-06, "loss": 0.1944, "step": 10705 }, { "epoch": 0.9133253710970825, "grad_norm": 1.473544391787944, "learning_rate": 1.9572767411402904e-06, "loss": 0.1394, "step": 10706 }, { "epoch": 0.9134106807711995, "grad_norm": 2.4470604447482422, "learning_rate": 1.9534510045054967e-06, "loss": 0.1782, "step": 10707 }, { "epoch": 0.9134959904453165, "grad_norm": 1.6305588496729286, "learning_rate": 1.949628935969394e-06, "loss": 0.2074, "step": 10708 }, { "epoch": 0.9135813001194335, "grad_norm": 2.608640099016867, "learning_rate": 1.9458105358237945e-06, "loss": 0.2022, "step": 10709 }, { "epoch": 0.9136666097935506, "grad_norm": 1.6954660653734737, "learning_rate": 1.941995804360186e-06, "loss": 0.2042, "step": 10710 }, { "epoch": 0.9137519194676677, "grad_norm": 1.4787831234547544, "learning_rate": 1.9381847418698253e-06, "loss": 0.181, "step": 10711 }, { "epoch": 0.9138372291417847, "grad_norm": 2.066788463182478, "learning_rate": 1.934377348643662e-06, "loss": 0.148, "step": 10712 }, { "epoch": 0.9139225388159017, "grad_norm": 1.69206623125424, "learning_rate": 1.930573624972365e-06, "loss": 0.159, "step": 10713 }, { "epoch": 0.9140078484900188, "grad_norm": 1.6270097628806384, "learning_rate": 1.9267735711463286e-06, "loss": 0.1261, "step": 10714 }, { "epoch": 0.9140931581641358, "grad_norm": 1.7164692397924188, "learning_rate": 1.9229771874556766e-06, "loss": 0.1429, "step": 10715 }, { "epoch": 0.9141784678382529, "grad_norm": 1.7795840051581704, "learning_rate": 1.9191844741902275e-06, "loss": 0.1507, "step": 10716 }, { "epoch": 0.9142637775123699, "grad_norm": 2.1018420178890853, "learning_rate": 1.915395431639544e-06, "loss": 0.1002, "step": 10717 }, { "epoch": 0.914349087186487, "grad_norm": 2.0304736487757733, "learning_rate": 1.9116100600929057e-06, "loss": 0.1642, "step": 10718 }, { "epoch": 0.914434396860604, "grad_norm": 1.3891601991569633, "learning_rate": 1.907828359839292e-06, "loss": 0.1491, "step": 10719 }, { "epoch": 0.914519706534721, "grad_norm": 1.976479087547185, "learning_rate": 1.9040503311674229e-06, "loss": 0.1505, "step": 10720 }, { "epoch": 0.914605016208838, "grad_norm": 2.801851927172769, "learning_rate": 1.9002759743657284e-06, "loss": 0.143, "step": 10721 }, { "epoch": 0.9146903258829552, "grad_norm": 1.6429586616093919, "learning_rate": 1.8965052897223611e-06, "loss": 0.1624, "step": 10722 }, { "epoch": 0.9147756355570722, "grad_norm": 1.6029887477981954, "learning_rate": 1.8927382775251856e-06, "loss": 0.2007, "step": 10723 }, { "epoch": 0.9148609452311892, "grad_norm": 1.6457267276023015, "learning_rate": 1.8889749380618105e-06, "loss": 0.1076, "step": 10724 }, { "epoch": 0.9149462549053062, "grad_norm": 1.533646401634941, "learning_rate": 1.8852152716195336e-06, "loss": 0.1971, "step": 10725 }, { "epoch": 0.9150315645794234, "grad_norm": 1.3626270199984598, "learning_rate": 1.8814592784853924e-06, "loss": 0.1017, "step": 10726 }, { "epoch": 0.9151168742535404, "grad_norm": 1.5947880823588025, "learning_rate": 1.8777069589461348e-06, "loss": 0.176, "step": 10727 }, { "epoch": 0.9152021839276574, "grad_norm": 1.9949317132644864, "learning_rate": 1.8739583132882265e-06, "loss": 0.1176, "step": 10728 }, { "epoch": 0.9152874936017744, "grad_norm": 1.4390167030229286, "learning_rate": 1.8702133417978607e-06, "loss": 0.1746, "step": 10729 }, { "epoch": 0.9153728032758914, "grad_norm": 1.3755822249490668, "learning_rate": 1.8664720447609363e-06, "loss": 0.0834, "step": 10730 }, { "epoch": 0.9154581129500086, "grad_norm": 2.053469801099968, "learning_rate": 1.8627344224631082e-06, "loss": 0.2016, "step": 10731 }, { "epoch": 0.9155434226241256, "grad_norm": 2.556543078794666, "learning_rate": 1.8590004751896871e-06, "loss": 0.1976, "step": 10732 }, { "epoch": 0.9156287322982426, "grad_norm": 1.5353523109782634, "learning_rate": 1.8552702032257674e-06, "loss": 0.1916, "step": 10733 }, { "epoch": 0.9157140419723596, "grad_norm": 1.4976990854329155, "learning_rate": 1.8515436068561265e-06, "loss": 0.1517, "step": 10734 }, { "epoch": 0.9157993516464767, "grad_norm": 1.7322719986518378, "learning_rate": 1.8478206863652702e-06, "loss": 0.1955, "step": 10735 }, { "epoch": 0.9158846613205938, "grad_norm": 1.7914692378127226, "learning_rate": 1.8441014420374215e-06, "loss": 0.1811, "step": 10736 }, { "epoch": 0.9159699709947108, "grad_norm": 1.6871769207281637, "learning_rate": 1.8403858741565306e-06, "loss": 0.1409, "step": 10737 }, { "epoch": 0.9160552806688278, "grad_norm": 1.411577390972688, "learning_rate": 1.8366739830062597e-06, "loss": 0.1485, "step": 10738 }, { "epoch": 0.9161405903429449, "grad_norm": 1.572034053453062, "learning_rate": 1.8329657688699875e-06, "loss": 0.1474, "step": 10739 }, { "epoch": 0.916225900017062, "grad_norm": 1.921142314407189, "learning_rate": 1.8292612320308212e-06, "loss": 0.1788, "step": 10740 }, { "epoch": 0.916311209691179, "grad_norm": 1.8213158326827836, "learning_rate": 1.8255603727715786e-06, "loss": 0.195, "step": 10741 }, { "epoch": 0.916396519365296, "grad_norm": 1.8081526084398172, "learning_rate": 1.8218631913748062e-06, "loss": 0.1492, "step": 10742 }, { "epoch": 0.9164818290394131, "grad_norm": 2.4042377728062747, "learning_rate": 1.8181696881227562e-06, "loss": 0.1662, "step": 10743 }, { "epoch": 0.9165671387135301, "grad_norm": 2.160392872438078, "learning_rate": 1.8144798632974192e-06, "loss": 0.1589, "step": 10744 }, { "epoch": 0.9166524483876471, "grad_norm": 2.7550392796092655, "learning_rate": 1.8107937171804812e-06, "loss": 0.2502, "step": 10745 }, { "epoch": 0.9167377580617642, "grad_norm": 2.2498089391332234, "learning_rate": 1.807111250053367e-06, "loss": 0.2106, "step": 10746 }, { "epoch": 0.9168230677358813, "grad_norm": 1.681628148545471, "learning_rate": 1.8034324621972132e-06, "loss": 0.1576, "step": 10747 }, { "epoch": 0.9169083774099983, "grad_norm": 1.6906991938812397, "learning_rate": 1.799757353892878e-06, "loss": 0.1704, "step": 10748 }, { "epoch": 0.9169936870841153, "grad_norm": 1.6785005855974047, "learning_rate": 1.7960859254209262e-06, "loss": 0.1503, "step": 10749 }, { "epoch": 0.9170789967582323, "grad_norm": 1.6371567169749752, "learning_rate": 1.792418177061661e-06, "loss": 0.1718, "step": 10750 }, { "epoch": 0.9171643064323495, "grad_norm": 1.6463725820217634, "learning_rate": 1.7887541090950977e-06, "loss": 0.1559, "step": 10751 }, { "epoch": 0.9172496161064665, "grad_norm": 1.7705487490096479, "learning_rate": 1.7850937218009567e-06, "loss": 0.1815, "step": 10752 }, { "epoch": 0.9173349257805835, "grad_norm": 1.62557063720572, "learning_rate": 1.781437015458698e-06, "loss": 0.1559, "step": 10753 }, { "epoch": 0.9174202354547005, "grad_norm": 1.4989316632635503, "learning_rate": 1.7777839903474924e-06, "loss": 0.1804, "step": 10754 }, { "epoch": 0.9175055451288177, "grad_norm": 1.8028789887951813, "learning_rate": 1.7741346467462284e-06, "loss": 0.1731, "step": 10755 }, { "epoch": 0.9175908548029347, "grad_norm": 2.0975091694391583, "learning_rate": 1.770488984933505e-06, "loss": 0.1358, "step": 10756 }, { "epoch": 0.9176761644770517, "grad_norm": 1.670361270317432, "learning_rate": 1.7668470051876662e-06, "loss": 0.2243, "step": 10757 }, { "epoch": 0.9177614741511687, "grad_norm": 2.356219676503112, "learning_rate": 1.76320870778674e-06, "loss": 0.1263, "step": 10758 }, { "epoch": 0.9178467838252858, "grad_norm": 2.0815934227223027, "learning_rate": 1.759574093008498e-06, "loss": 0.1662, "step": 10759 }, { "epoch": 0.9179320934994029, "grad_norm": 2.0755284659047106, "learning_rate": 1.75594316113043e-06, "loss": 0.0971, "step": 10760 }, { "epoch": 0.9180174031735199, "grad_norm": 1.669979554011195, "learning_rate": 1.7523159124297306e-06, "loss": 0.2214, "step": 10761 }, { "epoch": 0.9181027128476369, "grad_norm": 2.303101902590995, "learning_rate": 1.7486923471833284e-06, "loss": 0.223, "step": 10762 }, { "epoch": 0.918188022521754, "grad_norm": 1.7475699855777416, "learning_rate": 1.7450724656678518e-06, "loss": 0.1562, "step": 10763 }, { "epoch": 0.918273332195871, "grad_norm": 1.6857905736614645, "learning_rate": 1.741456268159669e-06, "loss": 0.1626, "step": 10764 }, { "epoch": 0.9183586418699881, "grad_norm": 1.9254896705368207, "learning_rate": 1.737843754934848e-06, "loss": 0.1809, "step": 10765 }, { "epoch": 0.9184439515441051, "grad_norm": 1.855549507391407, "learning_rate": 1.734234926269207e-06, "loss": 0.1892, "step": 10766 }, { "epoch": 0.9185292612182221, "grad_norm": 1.5277539261006075, "learning_rate": 1.7306297824382312e-06, "loss": 0.1467, "step": 10767 }, { "epoch": 0.9186145708923392, "grad_norm": 1.4597120398204495, "learning_rate": 1.7270283237171725e-06, "loss": 0.1317, "step": 10768 }, { "epoch": 0.9186998805664562, "grad_norm": 1.7575174719863123, "learning_rate": 1.7234305503809778e-06, "loss": 0.1678, "step": 10769 }, { "epoch": 0.9187851902405733, "grad_norm": 1.7534404042528733, "learning_rate": 1.7198364627043218e-06, "loss": 0.208, "step": 10770 }, { "epoch": 0.9188704999146903, "grad_norm": 1.726541096136066, "learning_rate": 1.7162460609615905e-06, "loss": 0.1545, "step": 10771 }, { "epoch": 0.9189558095888074, "grad_norm": 1.7893450651286409, "learning_rate": 1.712659345426887e-06, "loss": 0.16, "step": 10772 }, { "epoch": 0.9190411192629244, "grad_norm": 1.580131237518961, "learning_rate": 1.7090763163740586e-06, "loss": 0.1966, "step": 10773 }, { "epoch": 0.9191264289370414, "grad_norm": 1.9248450055827788, "learning_rate": 1.7054969740766203e-06, "loss": 0.1919, "step": 10774 }, { "epoch": 0.9192117386111585, "grad_norm": 1.675771148580059, "learning_rate": 1.7019213188078587e-06, "loss": 0.1794, "step": 10775 }, { "epoch": 0.9192970482852756, "grad_norm": 1.7487859602005977, "learning_rate": 1.6983493508407443e-06, "loss": 0.1946, "step": 10776 }, { "epoch": 0.9193823579593926, "grad_norm": 1.807632685285232, "learning_rate": 1.6947810704479873e-06, "loss": 0.1999, "step": 10777 }, { "epoch": 0.9194676676335096, "grad_norm": 1.4101667324532483, "learning_rate": 1.6912164779019969e-06, "loss": 0.1594, "step": 10778 }, { "epoch": 0.9195529773076266, "grad_norm": 1.9625372573328839, "learning_rate": 1.687655573474922e-06, "loss": 0.2643, "step": 10779 }, { "epoch": 0.9196382869817438, "grad_norm": 1.615127652204698, "learning_rate": 1.6840983574386072e-06, "loss": 0.1793, "step": 10780 }, { "epoch": 0.9197235966558608, "grad_norm": 1.6512485779634138, "learning_rate": 1.6805448300646342e-06, "loss": 0.1234, "step": 10781 }, { "epoch": 0.9198089063299778, "grad_norm": 1.8935796176317876, "learning_rate": 1.6769949916242977e-06, "loss": 0.0869, "step": 10782 }, { "epoch": 0.9198942160040948, "grad_norm": 1.9977741968978304, "learning_rate": 1.673448842388603e-06, "loss": 0.1208, "step": 10783 }, { "epoch": 0.919979525678212, "grad_norm": 1.81797568496542, "learning_rate": 1.669906382628278e-06, "loss": 0.1526, "step": 10784 }, { "epoch": 0.920064835352329, "grad_norm": 1.7854260797894188, "learning_rate": 1.6663676126137784e-06, "loss": 0.1472, "step": 10785 }, { "epoch": 0.920150145026446, "grad_norm": 1.6557633026754441, "learning_rate": 1.6628325326152717e-06, "loss": 0.1206, "step": 10786 }, { "epoch": 0.920235454700563, "grad_norm": 1.9737629118503683, "learning_rate": 1.659301142902625e-06, "loss": 0.1312, "step": 10787 }, { "epoch": 0.9203207643746801, "grad_norm": 1.3463167029160639, "learning_rate": 1.655773443745462e-06, "loss": 0.1731, "step": 10788 }, { "epoch": 0.9204060740487972, "grad_norm": 1.7423670381144762, "learning_rate": 1.6522494354131002e-06, "loss": 0.1822, "step": 10789 }, { "epoch": 0.9204913837229142, "grad_norm": 1.6948937808713203, "learning_rate": 1.648729118174569e-06, "loss": 0.2115, "step": 10790 }, { "epoch": 0.9205766933970312, "grad_norm": 1.8283365241376122, "learning_rate": 1.6452124922986311e-06, "loss": 0.2144, "step": 10791 }, { "epoch": 0.9206620030711483, "grad_norm": 1.8926508114992577, "learning_rate": 1.6416995580537664e-06, "loss": 0.2292, "step": 10792 }, { "epoch": 0.9207473127452653, "grad_norm": 1.5067936909279465, "learning_rate": 1.6381903157081602e-06, "loss": 0.1747, "step": 10793 }, { "epoch": 0.9208326224193824, "grad_norm": 2.1131405598457675, "learning_rate": 1.634684765529726e-06, "loss": 0.1666, "step": 10794 }, { "epoch": 0.9209179320934994, "grad_norm": 2.0184235772460193, "learning_rate": 1.6311829077861051e-06, "loss": 0.2508, "step": 10795 }, { "epoch": 0.9210032417676165, "grad_norm": 1.7395869805151192, "learning_rate": 1.6276847427446284e-06, "loss": 0.1817, "step": 10796 }, { "epoch": 0.9210885514417335, "grad_norm": 1.5056440345381223, "learning_rate": 1.6241902706723766e-06, "loss": 0.1861, "step": 10797 }, { "epoch": 0.9211738611158505, "grad_norm": 1.6882671449170008, "learning_rate": 1.620699491836125e-06, "loss": 0.1729, "step": 10798 }, { "epoch": 0.9212591707899676, "grad_norm": 1.772351037183415, "learning_rate": 1.6172124065023886e-06, "loss": 0.2059, "step": 10799 }, { "epoch": 0.9213444804640847, "grad_norm": 1.5139945787562035, "learning_rate": 1.6137290149373708e-06, "loss": 0.1824, "step": 10800 }, { "epoch": 0.9214297901382017, "grad_norm": 1.5503278016509998, "learning_rate": 1.6102493174070255e-06, "loss": 0.1735, "step": 10801 }, { "epoch": 0.9215150998123187, "grad_norm": 2.4126242887273452, "learning_rate": 1.6067733141769958e-06, "loss": 0.1557, "step": 10802 }, { "epoch": 0.9216004094864357, "grad_norm": 1.9817192749321109, "learning_rate": 1.603301005512664e-06, "loss": 0.1668, "step": 10803 }, { "epoch": 0.9216857191605528, "grad_norm": 1.9829697326412445, "learning_rate": 1.5998323916791235e-06, "loss": 0.2066, "step": 10804 }, { "epoch": 0.9217710288346699, "grad_norm": 1.4234223382772555, "learning_rate": 1.5963674729411792e-06, "loss": 0.1767, "step": 10805 }, { "epoch": 0.9218563385087869, "grad_norm": 1.591862926399925, "learning_rate": 1.592906249563364e-06, "loss": 0.2041, "step": 10806 }, { "epoch": 0.9219416481829039, "grad_norm": 2.4247028208063095, "learning_rate": 1.5894487218099164e-06, "loss": 0.229, "step": 10807 }, { "epoch": 0.9220269578570209, "grad_norm": 1.6915395917597444, "learning_rate": 1.5859948899448141e-06, "loss": 0.2217, "step": 10808 }, { "epoch": 0.9221122675311381, "grad_norm": 1.9997308669206855, "learning_rate": 1.5825447542317184e-06, "loss": 0.2123, "step": 10809 }, { "epoch": 0.9221975772052551, "grad_norm": 1.5526281164506728, "learning_rate": 1.5790983149340466e-06, "loss": 0.2082, "step": 10810 }, { "epoch": 0.9222828868793721, "grad_norm": 1.8623332364995133, "learning_rate": 1.5756555723149103e-06, "loss": 0.22, "step": 10811 }, { "epoch": 0.9223681965534891, "grad_norm": 1.4398725253213962, "learning_rate": 1.5722165266371492e-06, "loss": 0.1096, "step": 10812 }, { "epoch": 0.9224535062276062, "grad_norm": 1.6312471031660176, "learning_rate": 1.5687811781633033e-06, "loss": 0.2266, "step": 10813 }, { "epoch": 0.9225388159017233, "grad_norm": 1.735174252667014, "learning_rate": 1.5653495271556517e-06, "loss": 0.1875, "step": 10814 }, { "epoch": 0.9226241255758403, "grad_norm": 1.5548215261992353, "learning_rate": 1.5619215738761906e-06, "loss": 0.1925, "step": 10815 }, { "epoch": 0.9227094352499573, "grad_norm": 1.632818194657758, "learning_rate": 1.5584973185866103e-06, "loss": 0.1744, "step": 10816 }, { "epoch": 0.9227947449240744, "grad_norm": 1.875218187988661, "learning_rate": 1.5550767615483408e-06, "loss": 0.1429, "step": 10817 }, { "epoch": 0.9228800545981914, "grad_norm": 1.7072019164363326, "learning_rate": 1.5516599030225288e-06, "loss": 0.1393, "step": 10818 }, { "epoch": 0.9229653642723085, "grad_norm": 1.7613803851542276, "learning_rate": 1.548246743270032e-06, "loss": 0.1765, "step": 10819 }, { "epoch": 0.9230506739464255, "grad_norm": 1.4604845619835345, "learning_rate": 1.5448372825514146e-06, "loss": 0.162, "step": 10820 }, { "epoch": 0.9231359836205426, "grad_norm": 1.646100453135453, "learning_rate": 1.5414315211269902e-06, "loss": 0.1551, "step": 10821 }, { "epoch": 0.9232212932946596, "grad_norm": 2.294691124536186, "learning_rate": 1.5380294592567513e-06, "loss": 0.226, "step": 10822 }, { "epoch": 0.9233066029687766, "grad_norm": 1.5559356126775747, "learning_rate": 1.5346310972004397e-06, "loss": 0.1808, "step": 10823 }, { "epoch": 0.9233919126428937, "grad_norm": 1.6120590494193126, "learning_rate": 1.5312364352175034e-06, "loss": 0.1629, "step": 10824 }, { "epoch": 0.9234772223170108, "grad_norm": 1.6397781366213262, "learning_rate": 1.5278454735670967e-06, "loss": 0.154, "step": 10825 }, { "epoch": 0.9235625319911278, "grad_norm": 1.900204687637475, "learning_rate": 1.524458212508112e-06, "loss": 0.2101, "step": 10826 }, { "epoch": 0.9236478416652448, "grad_norm": 2.6070675269813166, "learning_rate": 1.5210746522991425e-06, "loss": 0.1441, "step": 10827 }, { "epoch": 0.9237331513393618, "grad_norm": 1.676942791785876, "learning_rate": 1.5176947931985097e-06, "loss": 0.1812, "step": 10828 }, { "epoch": 0.923818461013479, "grad_norm": 1.8260634446210045, "learning_rate": 1.5143186354642346e-06, "loss": 0.2281, "step": 10829 }, { "epoch": 0.923903770687596, "grad_norm": 1.4663191625701362, "learning_rate": 1.5109461793540891e-06, "loss": 0.15, "step": 10830 }, { "epoch": 0.923989080361713, "grad_norm": 1.627392328336649, "learning_rate": 1.507577425125528e-06, "loss": 0.1768, "step": 10831 }, { "epoch": 0.92407439003583, "grad_norm": 1.804620452560167, "learning_rate": 1.504212373035746e-06, "loss": 0.0719, "step": 10832 }, { "epoch": 0.9241596997099472, "grad_norm": 2.091174919208006, "learning_rate": 1.5008510233416374e-06, "loss": 0.175, "step": 10833 }, { "epoch": 0.9242450093840642, "grad_norm": 1.6755667595878032, "learning_rate": 1.4974933762998356e-06, "loss": 0.1389, "step": 10834 }, { "epoch": 0.9243303190581812, "grad_norm": 1.9071890597311283, "learning_rate": 1.494139432166669e-06, "loss": 0.2107, "step": 10835 }, { "epoch": 0.9244156287322982, "grad_norm": 1.842730482739959, "learning_rate": 1.4907891911981886e-06, "loss": 0.231, "step": 10836 }, { "epoch": 0.9245009384064153, "grad_norm": 1.994873988060641, "learning_rate": 1.4874426536501895e-06, "loss": 0.2207, "step": 10837 }, { "epoch": 0.9245862480805324, "grad_norm": 1.328288932107668, "learning_rate": 1.4840998197781397e-06, "loss": 0.1828, "step": 10838 }, { "epoch": 0.9246715577546494, "grad_norm": 1.7749655438154786, "learning_rate": 1.4807606898372572e-06, "loss": 0.2159, "step": 10839 }, { "epoch": 0.9247568674287664, "grad_norm": 1.4331171974339052, "learning_rate": 1.477425264082466e-06, "loss": 0.1376, "step": 10840 }, { "epoch": 0.9248421771028834, "grad_norm": 1.6547660657693808, "learning_rate": 1.4740935427684066e-06, "loss": 0.2282, "step": 10841 }, { "epoch": 0.9249274867770005, "grad_norm": 1.6749363616696935, "learning_rate": 1.4707655261494368e-06, "loss": 0.1256, "step": 10842 }, { "epoch": 0.9250127964511176, "grad_norm": 1.3920831643342038, "learning_rate": 1.4674412144796368e-06, "loss": 0.1551, "step": 10843 }, { "epoch": 0.9250981061252346, "grad_norm": 1.6572628163388672, "learning_rate": 1.4641206080128034e-06, "loss": 0.1683, "step": 10844 }, { "epoch": 0.9251834157993516, "grad_norm": 1.9613674953804079, "learning_rate": 1.4608037070024338e-06, "loss": 0.1961, "step": 10845 }, { "epoch": 0.9252687254734687, "grad_norm": 1.6979720529833235, "learning_rate": 1.45749051170177e-06, "loss": 0.1833, "step": 10846 }, { "epoch": 0.9253540351475857, "grad_norm": 1.3814695485859791, "learning_rate": 1.4541810223637487e-06, "loss": 0.1018, "step": 10847 }, { "epoch": 0.9254393448217028, "grad_norm": 1.8951340178554483, "learning_rate": 1.4508752392410396e-06, "loss": 0.2762, "step": 10848 }, { "epoch": 0.9255246544958198, "grad_norm": 2.33319798712785, "learning_rate": 1.4475731625860023e-06, "loss": 0.2182, "step": 10849 }, { "epoch": 0.9256099641699369, "grad_norm": 1.3228468638750206, "learning_rate": 1.4442747926507626e-06, "loss": 0.1444, "step": 10850 }, { "epoch": 0.9256952738440539, "grad_norm": 1.5818078154091686, "learning_rate": 1.4409801296871083e-06, "loss": 0.1977, "step": 10851 }, { "epoch": 0.9257805835181709, "grad_norm": 2.075147747616655, "learning_rate": 1.4376891739465826e-06, "loss": 0.2232, "step": 10852 }, { "epoch": 0.925865893192288, "grad_norm": 1.487931818753642, "learning_rate": 1.4344019256804286e-06, "loss": 0.1373, "step": 10853 }, { "epoch": 0.9259512028664051, "grad_norm": 1.400193119353318, "learning_rate": 1.431118385139607e-06, "loss": 0.1481, "step": 10854 }, { "epoch": 0.9260365125405221, "grad_norm": 1.8147434950818517, "learning_rate": 1.427838552574806e-06, "loss": 0.1633, "step": 10855 }, { "epoch": 0.9261218222146391, "grad_norm": 1.7499049024628666, "learning_rate": 1.42456242823642e-06, "loss": 0.1384, "step": 10856 }, { "epoch": 0.9262071318887561, "grad_norm": 1.6397292098506915, "learning_rate": 1.42129001237456e-06, "loss": 0.2279, "step": 10857 }, { "epoch": 0.9262924415628733, "grad_norm": 1.953083312543393, "learning_rate": 1.4180213052390534e-06, "loss": 0.2126, "step": 10858 }, { "epoch": 0.9263777512369903, "grad_norm": 1.8869381099835265, "learning_rate": 1.4147563070794623e-06, "loss": 0.1232, "step": 10859 }, { "epoch": 0.9264630609111073, "grad_norm": 1.7109785379861844, "learning_rate": 1.4114950181450481e-06, "loss": 0.1687, "step": 10860 }, { "epoch": 0.9265483705852243, "grad_norm": 1.367001330098061, "learning_rate": 1.4082374386847897e-06, "loss": 0.1174, "step": 10861 }, { "epoch": 0.9266336802593415, "grad_norm": 1.704195998233985, "learning_rate": 1.4049835689473822e-06, "loss": 0.1252, "step": 10862 }, { "epoch": 0.9267189899334585, "grad_norm": 2.422199153741691, "learning_rate": 1.4017334091812551e-06, "loss": 0.1401, "step": 10863 }, { "epoch": 0.9268042996075755, "grad_norm": 1.2499114005160585, "learning_rate": 1.3984869596345207e-06, "loss": 0.1381, "step": 10864 }, { "epoch": 0.9268896092816925, "grad_norm": 1.6032987533906153, "learning_rate": 1.395244220555042e-06, "loss": 0.1819, "step": 10865 }, { "epoch": 0.9269749189558096, "grad_norm": 1.7377845311636622, "learning_rate": 1.392005192190382e-06, "loss": 0.2084, "step": 10866 }, { "epoch": 0.9270602286299267, "grad_norm": 2.6562392290682357, "learning_rate": 1.3887698747878263e-06, "loss": 0.1268, "step": 10867 }, { "epoch": 0.9271455383040437, "grad_norm": 1.3762927913682925, "learning_rate": 1.3855382685943662e-06, "loss": 0.1901, "step": 10868 }, { "epoch": 0.9272308479781607, "grad_norm": 1.7766370772759996, "learning_rate": 1.3823103738567267e-06, "loss": 0.2549, "step": 10869 }, { "epoch": 0.9273161576522778, "grad_norm": 1.7674209777778493, "learning_rate": 1.3790861908213326e-06, "loss": 0.1569, "step": 10870 }, { "epoch": 0.9274014673263948, "grad_norm": 1.5713120500911322, "learning_rate": 1.3758657197343372e-06, "loss": 0.1733, "step": 10871 }, { "epoch": 0.9274867770005119, "grad_norm": 2.3188447259437424, "learning_rate": 1.3726489608416104e-06, "loss": 0.1863, "step": 10872 }, { "epoch": 0.9275720866746289, "grad_norm": 1.7632476194974926, "learning_rate": 1.3694359143887225e-06, "loss": 0.2163, "step": 10873 }, { "epoch": 0.927657396348746, "grad_norm": 2.4014642857936455, "learning_rate": 1.366226580620983e-06, "loss": 0.2174, "step": 10874 }, { "epoch": 0.927742706022863, "grad_norm": 1.7630178732309514, "learning_rate": 1.3630209597834009e-06, "loss": 0.1199, "step": 10875 }, { "epoch": 0.92782801569698, "grad_norm": 1.7266603329933878, "learning_rate": 1.35981905212072e-06, "loss": 0.1461, "step": 10876 }, { "epoch": 0.927913325371097, "grad_norm": 2.9951827791766346, "learning_rate": 1.3566208578773775e-06, "loss": 0.196, "step": 10877 }, { "epoch": 0.9279986350452142, "grad_norm": 1.938544822278974, "learning_rate": 1.3534263772975342e-06, "loss": 0.1186, "step": 10878 }, { "epoch": 0.9280839447193312, "grad_norm": 2.4714823221650635, "learning_rate": 1.3502356106250947e-06, "loss": 0.1947, "step": 10879 }, { "epoch": 0.9281692543934482, "grad_norm": 2.1676047079628957, "learning_rate": 1.347048558103625e-06, "loss": 0.1458, "step": 10880 }, { "epoch": 0.9282545640675652, "grad_norm": 1.6507429299914473, "learning_rate": 1.3438652199764646e-06, "loss": 0.1885, "step": 10881 }, { "epoch": 0.9283398737416823, "grad_norm": 1.8960717299369025, "learning_rate": 1.3406855964866405e-06, "loss": 0.1615, "step": 10882 }, { "epoch": 0.9284251834157994, "grad_norm": 1.3992309024523741, "learning_rate": 1.3375096878768923e-06, "loss": 0.1696, "step": 10883 }, { "epoch": 0.9285104930899164, "grad_norm": 2.2359644431697445, "learning_rate": 1.3343374943896815e-06, "loss": 0.1895, "step": 10884 }, { "epoch": 0.9285958027640334, "grad_norm": 1.5625990264030225, "learning_rate": 1.331169016267203e-06, "loss": 0.177, "step": 10885 }, { "epoch": 0.9286811124381504, "grad_norm": 1.8161450269992863, "learning_rate": 1.328004253751336e-06, "loss": 0.1316, "step": 10886 }, { "epoch": 0.9287664221122676, "grad_norm": 1.815578214531744, "learning_rate": 1.3248432070837035e-06, "loss": 0.1823, "step": 10887 }, { "epoch": 0.9288517317863846, "grad_norm": 2.032868899267458, "learning_rate": 1.321685876505635e-06, "loss": 0.2967, "step": 10888 }, { "epoch": 0.9289370414605016, "grad_norm": 1.7713598777486754, "learning_rate": 1.3185322622581764e-06, "loss": 0.1931, "step": 10889 }, { "epoch": 0.9290223511346186, "grad_norm": 1.5294616532495957, "learning_rate": 1.315382364582085e-06, "loss": 0.1406, "step": 10890 }, { "epoch": 0.9291076608087357, "grad_norm": 1.633560520383277, "learning_rate": 1.31223618371783e-06, "loss": 0.156, "step": 10891 }, { "epoch": 0.9291929704828528, "grad_norm": 2.650668214725884, "learning_rate": 1.3090937199056352e-06, "loss": 0.1781, "step": 10892 }, { "epoch": 0.9292782801569698, "grad_norm": 1.838745826285972, "learning_rate": 1.305954973385376e-06, "loss": 0.2145, "step": 10893 }, { "epoch": 0.9293635898310868, "grad_norm": 1.3175880266638023, "learning_rate": 1.3028199443966994e-06, "loss": 0.1231, "step": 10894 }, { "epoch": 0.9294488995052039, "grad_norm": 2.4667993879267684, "learning_rate": 1.2996886331789416e-06, "loss": 0.3009, "step": 10895 }, { "epoch": 0.929534209179321, "grad_norm": 1.548239751327598, "learning_rate": 1.296561039971167e-06, "loss": 0.1606, "step": 10896 }, { "epoch": 0.929619518853438, "grad_norm": 1.570794026956453, "learning_rate": 1.2934371650121458e-06, "loss": 0.1287, "step": 10897 }, { "epoch": 0.929704828527555, "grad_norm": 1.5826039725764915, "learning_rate": 1.290317008540376e-06, "loss": 0.1615, "step": 10898 }, { "epoch": 0.9297901382016721, "grad_norm": 2.1935315493420613, "learning_rate": 1.2872005707940505e-06, "loss": 0.1903, "step": 10899 }, { "epoch": 0.9298754478757891, "grad_norm": 1.4045095906023548, "learning_rate": 1.284087852011101e-06, "loss": 0.1765, "step": 10900 }, { "epoch": 0.9299607575499061, "grad_norm": 2.1002582345861716, "learning_rate": 1.2809788524291821e-06, "loss": 0.1901, "step": 10901 }, { "epoch": 0.9300460672240232, "grad_norm": 1.5302201525303347, "learning_rate": 1.2778735722856205e-06, "loss": 0.1915, "step": 10902 }, { "epoch": 0.9301313768981403, "grad_norm": 2.220042752034703, "learning_rate": 1.2747720118175099e-06, "loss": 0.175, "step": 10903 }, { "epoch": 0.9302166865722573, "grad_norm": 1.5939642070355742, "learning_rate": 1.271674171261622e-06, "loss": 0.1735, "step": 10904 }, { "epoch": 0.9303019962463743, "grad_norm": 1.4593897489990955, "learning_rate": 1.2685800508544786e-06, "loss": 0.2101, "step": 10905 }, { "epoch": 0.9303873059204913, "grad_norm": 2.706402858443277, "learning_rate": 1.2654896508322856e-06, "loss": 0.2584, "step": 10906 }, { "epoch": 0.9304726155946085, "grad_norm": 1.8983180632519934, "learning_rate": 1.2624029714309872e-06, "loss": 0.1748, "step": 10907 }, { "epoch": 0.9305579252687255, "grad_norm": 1.5283261625037798, "learning_rate": 1.2593200128862225e-06, "loss": 0.2073, "step": 10908 }, { "epoch": 0.9306432349428425, "grad_norm": 2.1735767311678726, "learning_rate": 1.256240775433376e-06, "loss": 0.1715, "step": 10909 }, { "epoch": 0.9307285446169595, "grad_norm": 1.2212728162838242, "learning_rate": 1.2531652593075204e-06, "loss": 0.1259, "step": 10910 }, { "epoch": 0.9308138542910767, "grad_norm": 1.7815397595752698, "learning_rate": 1.2500934647434515e-06, "loss": 0.186, "step": 10911 }, { "epoch": 0.9308991639651937, "grad_norm": 2.1014124026106695, "learning_rate": 1.247025391975698e-06, "loss": 0.2447, "step": 10912 }, { "epoch": 0.9309844736393107, "grad_norm": 1.788043602401106, "learning_rate": 1.2439610412384727e-06, "loss": 0.151, "step": 10913 }, { "epoch": 0.9310697833134277, "grad_norm": 1.674008087134111, "learning_rate": 1.2409004127657441e-06, "loss": 0.1354, "step": 10914 }, { "epoch": 0.9311550929875448, "grad_norm": 1.7924533344190432, "learning_rate": 1.2378435067911532e-06, "loss": 0.1491, "step": 10915 }, { "epoch": 0.9312404026616619, "grad_norm": 1.8125420927224352, "learning_rate": 1.2347903235480906e-06, "loss": 0.199, "step": 10916 }, { "epoch": 0.9313257123357789, "grad_norm": 2.1335213290271025, "learning_rate": 1.2317408632696537e-06, "loss": 0.1712, "step": 10917 }, { "epoch": 0.9314110220098959, "grad_norm": 1.9843945239450769, "learning_rate": 1.2286951261886447e-06, "loss": 0.1548, "step": 10918 }, { "epoch": 0.9314963316840129, "grad_norm": 1.9672733400264089, "learning_rate": 1.2256531125375948e-06, "loss": 0.1997, "step": 10919 }, { "epoch": 0.93158164135813, "grad_norm": 1.4236146952679596, "learning_rate": 1.2226148225487343e-06, "loss": 0.1497, "step": 10920 }, { "epoch": 0.9316669510322471, "grad_norm": 1.551092697885617, "learning_rate": 1.2195802564540392e-06, "loss": 0.219, "step": 10921 }, { "epoch": 0.9317522607063641, "grad_norm": 2.4543478280922177, "learning_rate": 1.2165494144851686e-06, "loss": 0.2411, "step": 10922 }, { "epoch": 0.9318375703804811, "grad_norm": 2.5416858443073456, "learning_rate": 1.213522296873515e-06, "loss": 0.2321, "step": 10923 }, { "epoch": 0.9319228800545982, "grad_norm": 1.48876272508386, "learning_rate": 1.2104989038501825e-06, "loss": 0.227, "step": 10924 }, { "epoch": 0.9320081897287152, "grad_norm": 2.235784766140921, "learning_rate": 1.2074792356459973e-06, "loss": 0.1711, "step": 10925 }, { "epoch": 0.9320934994028323, "grad_norm": 2.3079442693636, "learning_rate": 1.2044632924914812e-06, "loss": 0.1712, "step": 10926 }, { "epoch": 0.9321788090769493, "grad_norm": 1.3028191077086981, "learning_rate": 1.2014510746168994e-06, "loss": 0.152, "step": 10927 }, { "epoch": 0.9322641187510664, "grad_norm": 1.7474593384559438, "learning_rate": 1.1984425822522072e-06, "loss": 0.1901, "step": 10928 }, { "epoch": 0.9323494284251834, "grad_norm": 2.4923342479203976, "learning_rate": 1.1954378156270984e-06, "loss": 0.2675, "step": 10929 }, { "epoch": 0.9324347380993004, "grad_norm": 2.1073487899311583, "learning_rate": 1.192436774970962e-06, "loss": 0.1899, "step": 10930 }, { "epoch": 0.9325200477734175, "grad_norm": 1.5607013267884438, "learning_rate": 1.18943946051292e-06, "loss": 0.1383, "step": 10931 }, { "epoch": 0.9326053574475346, "grad_norm": 2.482627782078035, "learning_rate": 1.1864458724817895e-06, "loss": 0.2476, "step": 10932 }, { "epoch": 0.9326906671216516, "grad_norm": 1.4667707865054695, "learning_rate": 1.1834560111061211e-06, "loss": 0.1506, "step": 10933 }, { "epoch": 0.9327759767957686, "grad_norm": 1.6566912225301587, "learning_rate": 1.180469876614182e-06, "loss": 0.1975, "step": 10934 }, { "epoch": 0.9328612864698856, "grad_norm": 1.9087893972471697, "learning_rate": 1.1774874692339343e-06, "loss": 0.2339, "step": 10935 }, { "epoch": 0.9329465961440028, "grad_norm": 1.8245921110244199, "learning_rate": 1.1745087891930794e-06, "loss": 0.1439, "step": 10936 }, { "epoch": 0.9330319058181198, "grad_norm": 1.9729350582287388, "learning_rate": 1.1715338367190188e-06, "loss": 0.2532, "step": 10937 }, { "epoch": 0.9331172154922368, "grad_norm": 1.861323577779237, "learning_rate": 1.1685626120388816e-06, "loss": 0.143, "step": 10938 }, { "epoch": 0.9332025251663538, "grad_norm": 2.701942229183476, "learning_rate": 1.165595115379492e-06, "loss": 0.2456, "step": 10939 }, { "epoch": 0.933287834840471, "grad_norm": 2.1447260200146157, "learning_rate": 1.1626313469674134e-06, "loss": 0.2581, "step": 10940 }, { "epoch": 0.933373144514588, "grad_norm": 3.253297160317526, "learning_rate": 1.159671307028909e-06, "loss": 0.2108, "step": 10941 }, { "epoch": 0.933458454188705, "grad_norm": 1.7795381183577133, "learning_rate": 1.1567149957899592e-06, "loss": 0.2155, "step": 10942 }, { "epoch": 0.933543763862822, "grad_norm": 1.9197626678489972, "learning_rate": 1.1537624134762726e-06, "loss": 0.2365, "step": 10943 }, { "epoch": 0.9336290735369391, "grad_norm": 1.7381404519654524, "learning_rate": 1.150813560313252e-06, "loss": 0.2029, "step": 10944 }, { "epoch": 0.9337143832110562, "grad_norm": 2.5219485965847754, "learning_rate": 1.1478684365260338e-06, "loss": 0.1767, "step": 10945 }, { "epoch": 0.9337996928851732, "grad_norm": 1.5528077690004845, "learning_rate": 1.1449270423394608e-06, "loss": 0.1922, "step": 10946 }, { "epoch": 0.9338850025592902, "grad_norm": 1.454304534293556, "learning_rate": 1.1419893779780922e-06, "loss": 0.1412, "step": 10947 }, { "epoch": 0.9339703122334073, "grad_norm": 2.567056466100129, "learning_rate": 1.1390554436661983e-06, "loss": 0.1745, "step": 10948 }, { "epoch": 0.9340556219075243, "grad_norm": 2.1078733397258254, "learning_rate": 1.1361252396277778e-06, "loss": 0.2142, "step": 10949 }, { "epoch": 0.9341409315816414, "grad_norm": 1.7293787293829925, "learning_rate": 1.133198766086535e-06, "loss": 0.1826, "step": 10950 }, { "epoch": 0.9342262412557584, "grad_norm": 1.7075934629985816, "learning_rate": 1.1302760232658916e-06, "loss": 0.1515, "step": 10951 }, { "epoch": 0.9343115509298755, "grad_norm": 2.0049983983972863, "learning_rate": 1.1273570113889798e-06, "loss": 0.1866, "step": 10952 }, { "epoch": 0.9343968606039925, "grad_norm": 1.80036838788407, "learning_rate": 1.1244417306786493e-06, "loss": 0.1731, "step": 10953 }, { "epoch": 0.9344821702781095, "grad_norm": 1.6164088194334867, "learning_rate": 1.1215301813574664e-06, "loss": 0.1682, "step": 10954 }, { "epoch": 0.9345674799522266, "grad_norm": 1.9495433468300811, "learning_rate": 1.1186223636477088e-06, "loss": 0.2011, "step": 10955 }, { "epoch": 0.9346527896263436, "grad_norm": 3.238624324502425, "learning_rate": 1.1157182777713936e-06, "loss": 0.2214, "step": 10956 }, { "epoch": 0.9347380993004607, "grad_norm": 1.7135274436228993, "learning_rate": 1.1128179239502046e-06, "loss": 0.0981, "step": 10957 }, { "epoch": 0.9348234089745777, "grad_norm": 1.4580647993352933, "learning_rate": 1.109921302405581e-06, "loss": 0.1336, "step": 10958 }, { "epoch": 0.9349087186486947, "grad_norm": 1.4608656921690784, "learning_rate": 1.1070284133586683e-06, "loss": 0.1664, "step": 10959 }, { "epoch": 0.9349940283228118, "grad_norm": 1.3177897261057696, "learning_rate": 1.1041392570303233e-06, "loss": 0.1516, "step": 10960 }, { "epoch": 0.9350793379969289, "grad_norm": 2.013634933887634, "learning_rate": 1.1012538336411083e-06, "loss": 0.1898, "step": 10961 }, { "epoch": 0.9351646476710459, "grad_norm": 1.6849259418546894, "learning_rate": 1.0983721434113192e-06, "loss": 0.1742, "step": 10962 }, { "epoch": 0.9352499573451629, "grad_norm": 1.9869873393346082, "learning_rate": 1.095494186560947e-06, "loss": 0.1653, "step": 10963 }, { "epoch": 0.9353352670192799, "grad_norm": 1.7813606227856433, "learning_rate": 1.0926199633097157e-06, "loss": 0.2204, "step": 10964 }, { "epoch": 0.9354205766933971, "grad_norm": 1.7929192164275367, "learning_rate": 1.0897494738770608e-06, "loss": 0.1653, "step": 10965 }, { "epoch": 0.9355058863675141, "grad_norm": 1.883350849351477, "learning_rate": 1.0868827184821296e-06, "loss": 0.2256, "step": 10966 }, { "epoch": 0.9355911960416311, "grad_norm": 1.5496179171289013, "learning_rate": 1.0840196973437744e-06, "loss": 0.0847, "step": 10967 }, { "epoch": 0.9356765057157481, "grad_norm": 1.8249481507343304, "learning_rate": 1.0811604106805705e-06, "loss": 0.1729, "step": 10968 }, { "epoch": 0.9357618153898652, "grad_norm": 1.946838638186531, "learning_rate": 1.0783048587108213e-06, "loss": 0.1839, "step": 10969 }, { "epoch": 0.9358471250639823, "grad_norm": 1.7191593029599839, "learning_rate": 1.0754530416525245e-06, "loss": 0.2553, "step": 10970 }, { "epoch": 0.9359324347380993, "grad_norm": 2.1055597911960984, "learning_rate": 1.0726049597234055e-06, "loss": 0.1692, "step": 10971 }, { "epoch": 0.9360177444122163, "grad_norm": 2.2775282089881603, "learning_rate": 1.0697606131408966e-06, "loss": 0.2178, "step": 10972 }, { "epoch": 0.9361030540863334, "grad_norm": 1.3687248959264093, "learning_rate": 1.066920002122146e-06, "loss": 0.1459, "step": 10973 }, { "epoch": 0.9361883637604504, "grad_norm": 1.3402980556941557, "learning_rate": 1.0640831268840302e-06, "loss": 0.167, "step": 10974 }, { "epoch": 0.9362736734345675, "grad_norm": 1.4091613726043666, "learning_rate": 1.0612499876431204e-06, "loss": 0.1768, "step": 10975 }, { "epoch": 0.9363589831086845, "grad_norm": 1.7006105532693225, "learning_rate": 1.0584205846157102e-06, "loss": 0.1557, "step": 10976 }, { "epoch": 0.9364442927828016, "grad_norm": 1.8272661979563813, "learning_rate": 1.0555949180178104e-06, "loss": 0.1599, "step": 10977 }, { "epoch": 0.9365296024569186, "grad_norm": 2.021866943720667, "learning_rate": 1.0527729880651537e-06, "loss": 0.2238, "step": 10978 }, { "epoch": 0.9366149121310356, "grad_norm": 1.459012073422443, "learning_rate": 1.0499547949731735e-06, "loss": 0.1864, "step": 10979 }, { "epoch": 0.9367002218051527, "grad_norm": 1.6304200088826775, "learning_rate": 1.0471403389570256e-06, "loss": 0.1563, "step": 10980 }, { "epoch": 0.9367855314792698, "grad_norm": 1.961059688089642, "learning_rate": 1.0443296202315767e-06, "loss": 0.1809, "step": 10981 }, { "epoch": 0.9368708411533868, "grad_norm": 2.136190680433033, "learning_rate": 1.0415226390114108e-06, "loss": 0.2235, "step": 10982 }, { "epoch": 0.9369561508275038, "grad_norm": 1.7293288219974432, "learning_rate": 1.0387193955108287e-06, "loss": 0.1702, "step": 10983 }, { "epoch": 0.9370414605016208, "grad_norm": 1.7017419781709462, "learning_rate": 1.0359198899438371e-06, "loss": 0.1534, "step": 10984 }, { "epoch": 0.937126770175738, "grad_norm": 2.671322146321917, "learning_rate": 1.0331241225241706e-06, "loss": 0.1575, "step": 10985 }, { "epoch": 0.937212079849855, "grad_norm": 1.3488271740319826, "learning_rate": 1.0303320934652637e-06, "loss": 0.1139, "step": 10986 }, { "epoch": 0.937297389523972, "grad_norm": 1.3284064948269232, "learning_rate": 1.0275438029802797e-06, "loss": 0.1036, "step": 10987 }, { "epoch": 0.937382699198089, "grad_norm": 1.5639136214001097, "learning_rate": 1.024759251282087e-06, "loss": 0.1547, "step": 10988 }, { "epoch": 0.9374680088722062, "grad_norm": 1.6550794099501591, "learning_rate": 1.021978438583271e-06, "loss": 0.0994, "step": 10989 }, { "epoch": 0.9375533185463232, "grad_norm": 2.203799313638817, "learning_rate": 1.0192013650961286e-06, "loss": 0.2423, "step": 10990 }, { "epoch": 0.9376386282204402, "grad_norm": 1.7832588780389504, "learning_rate": 1.0164280310326845e-06, "loss": 0.1883, "step": 10991 }, { "epoch": 0.9377239378945572, "grad_norm": 1.8059231709923258, "learning_rate": 1.0136584366046531e-06, "loss": 0.1781, "step": 10992 }, { "epoch": 0.9378092475686743, "grad_norm": 1.7436320384607742, "learning_rate": 1.0108925820234926e-06, "loss": 0.1443, "step": 10993 }, { "epoch": 0.9378945572427914, "grad_norm": 1.624802063844573, "learning_rate": 1.0081304675003566e-06, "loss": 0.1725, "step": 10994 }, { "epoch": 0.9379798669169084, "grad_norm": 1.5156378991276158, "learning_rate": 1.0053720932461152e-06, "loss": 0.1582, "step": 10995 }, { "epoch": 0.9380651765910254, "grad_norm": 1.5239777218499444, "learning_rate": 1.0026174594713612e-06, "loss": 0.1481, "step": 10996 }, { "epoch": 0.9381504862651424, "grad_norm": 1.4792448838019594, "learning_rate": 9.99866566386387e-07, "loss": 0.2255, "step": 10997 }, { "epoch": 0.9382357959392595, "grad_norm": 1.9864462308176598, "learning_rate": 9.971194142012197e-07, "loss": 0.113, "step": 10998 }, { "epoch": 0.9383211056133766, "grad_norm": 1.7823041507083812, "learning_rate": 9.943760031255744e-07, "loss": 0.2367, "step": 10999 }, { "epoch": 0.9384064152874936, "grad_norm": 2.1890884082031596, "learning_rate": 9.916363333689116e-07, "loss": 0.1918, "step": 11000 }, { "epoch": 0.9384917249616106, "grad_norm": 1.4703260042095376, "learning_rate": 9.88900405140386e-07, "loss": 0.102, "step": 11001 }, { "epoch": 0.9385770346357277, "grad_norm": 1.2115296915456526, "learning_rate": 9.861682186488697e-07, "loss": 0.1472, "step": 11002 }, { "epoch": 0.9386623443098447, "grad_norm": 1.7206632195996994, "learning_rate": 9.83439774102951e-07, "loss": 0.1916, "step": 11003 }, { "epoch": 0.9387476539839618, "grad_norm": 1.8790503940290642, "learning_rate": 9.807150717109303e-07, "loss": 0.1772, "step": 11004 }, { "epoch": 0.9388329636580788, "grad_norm": 1.5088309371717405, "learning_rate": 9.779941116808245e-07, "loss": 0.1608, "step": 11005 }, { "epoch": 0.9389182733321959, "grad_norm": 1.7675709761164136, "learning_rate": 9.75276894220367e-07, "loss": 0.2382, "step": 11006 }, { "epoch": 0.9390035830063129, "grad_norm": 1.797474835661355, "learning_rate": 9.725634195370036e-07, "loss": 0.1729, "step": 11007 }, { "epoch": 0.9390888926804299, "grad_norm": 2.0071824565873384, "learning_rate": 9.69853687837885e-07, "loss": 0.2642, "step": 11008 }, { "epoch": 0.939174202354547, "grad_norm": 1.554757504827994, "learning_rate": 9.671476993298956e-07, "loss": 0.1982, "step": 11009 }, { "epoch": 0.9392595120286641, "grad_norm": 1.82998862195256, "learning_rate": 9.644454542196146e-07, "loss": 0.1765, "step": 11010 }, { "epoch": 0.9393448217027811, "grad_norm": 1.840554911860942, "learning_rate": 9.617469527133605e-07, "loss": 0.1722, "step": 11011 }, { "epoch": 0.9394301313768981, "grad_norm": 1.49459947509103, "learning_rate": 9.590521950171293e-07, "loss": 0.1377, "step": 11012 }, { "epoch": 0.9395154410510151, "grad_norm": 2.1370359530532785, "learning_rate": 9.563611813366568e-07, "loss": 0.1065, "step": 11013 }, { "epoch": 0.9396007507251323, "grad_norm": 1.5537948729586752, "learning_rate": 9.536739118773951e-07, "loss": 0.1747, "step": 11014 }, { "epoch": 0.9396860603992493, "grad_norm": 1.417203502291304, "learning_rate": 9.50990386844497e-07, "loss": 0.1813, "step": 11015 }, { "epoch": 0.9397713700733663, "grad_norm": 1.529214713080669, "learning_rate": 9.483106064428426e-07, "loss": 0.1348, "step": 11016 }, { "epoch": 0.9398566797474833, "grad_norm": 2.021573183624394, "learning_rate": 9.456345708770076e-07, "loss": 0.1483, "step": 11017 }, { "epoch": 0.9399419894216005, "grad_norm": 1.990051081399568, "learning_rate": 9.429622803512949e-07, "loss": 0.2049, "step": 11018 }, { "epoch": 0.9400272990957175, "grad_norm": 1.3509504222229782, "learning_rate": 9.40293735069725e-07, "loss": 0.1586, "step": 11019 }, { "epoch": 0.9401126087698345, "grad_norm": 2.5304309791509, "learning_rate": 9.376289352360346e-07, "loss": 0.1595, "step": 11020 }, { "epoch": 0.9401979184439515, "grad_norm": 1.775418892982637, "learning_rate": 9.349678810536444e-07, "loss": 0.1512, "step": 11021 }, { "epoch": 0.9402832281180686, "grad_norm": 1.858729017866545, "learning_rate": 9.323105727257308e-07, "loss": 0.1877, "step": 11022 }, { "epoch": 0.9403685377921857, "grad_norm": 1.5393719821889509, "learning_rate": 9.29657010455165e-07, "loss": 0.1622, "step": 11023 }, { "epoch": 0.9404538474663027, "grad_norm": 1.556816292187633, "learning_rate": 9.27007194444518e-07, "loss": 0.1681, "step": 11024 }, { "epoch": 0.9405391571404197, "grad_norm": 1.694703867427199, "learning_rate": 9.243611248961059e-07, "loss": 0.2298, "step": 11025 }, { "epoch": 0.9406244668145368, "grad_norm": 1.5201761402166314, "learning_rate": 9.217188020119283e-07, "loss": 0.187, "step": 11026 }, { "epoch": 0.9407097764886538, "grad_norm": 1.7370487258305174, "learning_rate": 9.190802259937237e-07, "loss": 0.16, "step": 11027 }, { "epoch": 0.9407950861627709, "grad_norm": 1.2749750882875257, "learning_rate": 9.164453970429199e-07, "loss": 0.0831, "step": 11028 }, { "epoch": 0.9408803958368879, "grad_norm": 1.3380770142747282, "learning_rate": 9.138143153606893e-07, "loss": 0.1127, "step": 11029 }, { "epoch": 0.940965705511005, "grad_norm": 1.3527044422386079, "learning_rate": 9.111869811478879e-07, "loss": 0.1386, "step": 11030 }, { "epoch": 0.941051015185122, "grad_norm": 2.3853492116483133, "learning_rate": 9.085633946050998e-07, "loss": 0.1954, "step": 11031 }, { "epoch": 0.941136324859239, "grad_norm": 2.1819699823428405, "learning_rate": 9.059435559326257e-07, "loss": 0.2157, "step": 11032 }, { "epoch": 0.941221634533356, "grad_norm": 1.9419506391975971, "learning_rate": 9.033274653304836e-07, "loss": 0.234, "step": 11033 }, { "epoch": 0.9413069442074731, "grad_norm": 1.9083812868626027, "learning_rate": 9.007151229983801e-07, "loss": 0.2045, "step": 11034 }, { "epoch": 0.9413922538815902, "grad_norm": 2.258359532597257, "learning_rate": 8.981065291357671e-07, "loss": 0.1876, "step": 11035 }, { "epoch": 0.9414775635557072, "grad_norm": 1.5649434629715424, "learning_rate": 8.955016839418018e-07, "loss": 0.1729, "step": 11036 }, { "epoch": 0.9415628732298242, "grad_norm": 1.593707364577924, "learning_rate": 8.929005876153307e-07, "loss": 0.1401, "step": 11037 }, { "epoch": 0.9416481829039413, "grad_norm": 1.6991010954014505, "learning_rate": 8.903032403549505e-07, "loss": 0.166, "step": 11038 }, { "epoch": 0.9417334925780584, "grad_norm": 2.8192131457066694, "learning_rate": 8.877096423589415e-07, "loss": 0.1803, "step": 11039 }, { "epoch": 0.9418188022521754, "grad_norm": 2.5132259515780317, "learning_rate": 8.851197938253286e-07, "loss": 0.2198, "step": 11040 }, { "epoch": 0.9419041119262924, "grad_norm": 1.3032586502587207, "learning_rate": 8.825336949518204e-07, "loss": 0.1407, "step": 11041 }, { "epoch": 0.9419894216004094, "grad_norm": 1.766981565853176, "learning_rate": 8.799513459358533e-07, "loss": 0.2043, "step": 11042 }, { "epoch": 0.9420747312745266, "grad_norm": 1.665114307661043, "learning_rate": 8.773727469745751e-07, "loss": 0.2412, "step": 11043 }, { "epoch": 0.9421600409486436, "grad_norm": 1.9581489340898952, "learning_rate": 8.747978982648564e-07, "loss": 0.1513, "step": 11044 }, { "epoch": 0.9422453506227606, "grad_norm": 1.5060981135736016, "learning_rate": 8.722268000032618e-07, "loss": 0.1247, "step": 11045 }, { "epoch": 0.9423306602968776, "grad_norm": 1.7790115747445128, "learning_rate": 8.696594523860901e-07, "loss": 0.1933, "step": 11046 }, { "epoch": 0.9424159699709947, "grad_norm": 1.9890089221100231, "learning_rate": 8.670958556093401e-07, "loss": 0.2297, "step": 11047 }, { "epoch": 0.9425012796451118, "grad_norm": 1.8463681269352419, "learning_rate": 8.645360098687272e-07, "loss": 0.1678, "step": 11048 }, { "epoch": 0.9425865893192288, "grad_norm": 2.667273502047156, "learning_rate": 8.619799153596897e-07, "loss": 0.2418, "step": 11049 }, { "epoch": 0.9426718989933458, "grad_norm": 2.409162072100679, "learning_rate": 8.594275722773548e-07, "loss": 0.125, "step": 11050 }, { "epoch": 0.9427572086674629, "grad_norm": 1.5542098035101395, "learning_rate": 8.568789808166e-07, "loss": 0.1329, "step": 11051 }, { "epoch": 0.94284251834158, "grad_norm": 2.4069313967111987, "learning_rate": 8.543341411719918e-07, "loss": 0.2695, "step": 11052 }, { "epoch": 0.942927828015697, "grad_norm": 1.5980629997773317, "learning_rate": 8.517930535378083e-07, "loss": 0.1667, "step": 11053 }, { "epoch": 0.943013137689814, "grad_norm": 1.3773499694787656, "learning_rate": 8.492557181080496e-07, "loss": 0.1371, "step": 11054 }, { "epoch": 0.9430984473639311, "grad_norm": 1.789435735025535, "learning_rate": 8.467221350764332e-07, "loss": 0.1709, "step": 11055 }, { "epoch": 0.9431837570380481, "grad_norm": 1.6852677026782872, "learning_rate": 8.44192304636382e-07, "loss": 0.1688, "step": 11056 }, { "epoch": 0.9432690667121651, "grad_norm": 1.8761000902849212, "learning_rate": 8.41666226981036e-07, "loss": 0.1838, "step": 11057 }, { "epoch": 0.9433543763862822, "grad_norm": 1.459869728939203, "learning_rate": 8.39143902303241e-07, "loss": 0.1448, "step": 11058 }, { "epoch": 0.9434396860603993, "grad_norm": 1.5117977624397458, "learning_rate": 8.366253307955763e-07, "loss": 0.2356, "step": 11059 }, { "epoch": 0.9435249957345163, "grad_norm": 1.7568180238108388, "learning_rate": 8.341105126503102e-07, "loss": 0.1515, "step": 11060 }, { "epoch": 0.9436103054086333, "grad_norm": 1.7912492339724215, "learning_rate": 8.315994480594336e-07, "loss": 0.1338, "step": 11061 }, { "epoch": 0.9436956150827503, "grad_norm": 1.9167618382363112, "learning_rate": 8.290921372146654e-07, "loss": 0.2325, "step": 11062 }, { "epoch": 0.9437809247568675, "grad_norm": 1.7834346825627714, "learning_rate": 8.265885803074136e-07, "loss": 0.1506, "step": 11063 }, { "epoch": 0.9438662344309845, "grad_norm": 1.7508207848650106, "learning_rate": 8.240887775288197e-07, "loss": 0.1825, "step": 11064 }, { "epoch": 0.9439515441051015, "grad_norm": 1.3962023897173246, "learning_rate": 8.215927290697256e-07, "loss": 0.1496, "step": 11065 }, { "epoch": 0.9440368537792185, "grad_norm": 1.695378877948797, "learning_rate": 8.191004351206954e-07, "loss": 0.2223, "step": 11066 }, { "epoch": 0.9441221634533357, "grad_norm": 1.7045889346039182, "learning_rate": 8.166118958719992e-07, "loss": 0.1208, "step": 11067 }, { "epoch": 0.9442074731274527, "grad_norm": 2.05316825368251, "learning_rate": 8.141271115136184e-07, "loss": 0.2424, "step": 11068 }, { "epoch": 0.9442927828015697, "grad_norm": 1.8627353084334521, "learning_rate": 8.116460822352734e-07, "loss": 0.2336, "step": 11069 }, { "epoch": 0.9443780924756867, "grad_norm": 1.8683907051817275, "learning_rate": 8.091688082263515e-07, "loss": 0.1658, "step": 11070 }, { "epoch": 0.9444634021498037, "grad_norm": 1.748012776814779, "learning_rate": 8.066952896759905e-07, "loss": 0.2266, "step": 11071 }, { "epoch": 0.9445487118239209, "grad_norm": 2.494453954171129, "learning_rate": 8.042255267730392e-07, "loss": 0.2372, "step": 11072 }, { "epoch": 0.9446340214980379, "grad_norm": 1.9549599534638087, "learning_rate": 8.017595197060357e-07, "loss": 0.1546, "step": 11073 }, { "epoch": 0.9447193311721549, "grad_norm": 1.627481326744669, "learning_rate": 7.992972686632571e-07, "loss": 0.1496, "step": 11074 }, { "epoch": 0.9448046408462719, "grad_norm": 1.4325045155825187, "learning_rate": 7.96838773832681e-07, "loss": 0.1128, "step": 11075 }, { "epoch": 0.944889950520389, "grad_norm": 2.0757004210643193, "learning_rate": 7.94384035401996e-07, "loss": 0.1657, "step": 11076 }, { "epoch": 0.9449752601945061, "grad_norm": 2.127451660467742, "learning_rate": 7.919330535586134e-07, "loss": 0.2197, "step": 11077 }, { "epoch": 0.9450605698686231, "grad_norm": 1.5973508958079246, "learning_rate": 7.8948582848965e-07, "loss": 0.1195, "step": 11078 }, { "epoch": 0.9451458795427401, "grad_norm": 2.309124519278297, "learning_rate": 7.870423603819399e-07, "loss": 0.1738, "step": 11079 }, { "epoch": 0.9452311892168572, "grad_norm": 1.7350550512545095, "learning_rate": 7.846026494220282e-07, "loss": 0.1952, "step": 11080 }, { "epoch": 0.9453164988909742, "grad_norm": 2.3097548816314286, "learning_rate": 7.821666957961771e-07, "loss": 0.2577, "step": 11081 }, { "epoch": 0.9454018085650913, "grad_norm": 2.234101498810769, "learning_rate": 7.797344996903544e-07, "loss": 0.2094, "step": 11082 }, { "epoch": 0.9454871182392083, "grad_norm": 1.5672507732545835, "learning_rate": 7.773060612902395e-07, "loss": 0.1633, "step": 11083 }, { "epoch": 0.9455724279133254, "grad_norm": 1.9755638524856145, "learning_rate": 7.748813807812394e-07, "loss": 0.1288, "step": 11084 }, { "epoch": 0.9456577375874424, "grad_norm": 1.7527793883598224, "learning_rate": 7.724604583484674e-07, "loss": 0.1704, "step": 11085 }, { "epoch": 0.9457430472615594, "grad_norm": 2.6991035668712797, "learning_rate": 7.700432941767477e-07, "loss": 0.1776, "step": 11086 }, { "epoch": 0.9458283569356765, "grad_norm": 1.8854977781717162, "learning_rate": 7.676298884506106e-07, "loss": 0.169, "step": 11087 }, { "epoch": 0.9459136666097936, "grad_norm": 1.6955220317123492, "learning_rate": 7.652202413543141e-07, "loss": 0.1341, "step": 11088 }, { "epoch": 0.9459989762839106, "grad_norm": 1.5388550594358834, "learning_rate": 7.628143530718169e-07, "loss": 0.1746, "step": 11089 }, { "epoch": 0.9460842859580276, "grad_norm": 1.7849436159605163, "learning_rate": 7.604122237867939e-07, "loss": 0.1782, "step": 11090 }, { "epoch": 0.9461695956321446, "grad_norm": 1.819446930952371, "learning_rate": 7.580138536826431e-07, "loss": 0.1513, "step": 11091 }, { "epoch": 0.9462549053062618, "grad_norm": 1.6328358625720965, "learning_rate": 7.556192429424569e-07, "loss": 0.1904, "step": 11092 }, { "epoch": 0.9463402149803788, "grad_norm": 1.7544288451408776, "learning_rate": 7.532283917490668e-07, "loss": 0.2018, "step": 11093 }, { "epoch": 0.9464255246544958, "grad_norm": 1.6583398553665702, "learning_rate": 7.508413002849879e-07, "loss": 0.1623, "step": 11094 }, { "epoch": 0.9465108343286128, "grad_norm": 1.9290554231519739, "learning_rate": 7.484579687324633e-07, "loss": 0.1985, "step": 11095 }, { "epoch": 0.94659614400273, "grad_norm": 2.4441591900740303, "learning_rate": 7.460783972734476e-07, "loss": 0.2148, "step": 11096 }, { "epoch": 0.946681453676847, "grad_norm": 1.8394600696963233, "learning_rate": 7.43702586089623e-07, "loss": 0.1609, "step": 11097 }, { "epoch": 0.946766763350964, "grad_norm": 1.6441772514862287, "learning_rate": 7.413305353623445e-07, "loss": 0.161, "step": 11098 }, { "epoch": 0.946852073025081, "grad_norm": 1.3393507514784317, "learning_rate": 7.389622452727285e-07, "loss": 0.1951, "step": 11099 }, { "epoch": 0.9469373826991981, "grad_norm": 1.9161787517761868, "learning_rate": 7.365977160015692e-07, "loss": 0.1663, "step": 11100 }, { "epoch": 0.9470226923733152, "grad_norm": 2.1605495763915945, "learning_rate": 7.342369477293886e-07, "loss": 0.1751, "step": 11101 }, { "epoch": 0.9471080020474322, "grad_norm": 2.4085005107084796, "learning_rate": 7.318799406364208e-07, "loss": 0.1991, "step": 11102 }, { "epoch": 0.9471933117215492, "grad_norm": 1.8136771095819828, "learning_rate": 7.295266949026047e-07, "loss": 0.2126, "step": 11103 }, { "epoch": 0.9472786213956663, "grad_norm": 1.4853675334806644, "learning_rate": 7.271772107076136e-07, "loss": 0.1489, "step": 11104 }, { "epoch": 0.9473639310697833, "grad_norm": 2.137226386153649, "learning_rate": 7.248314882307928e-07, "loss": 0.1976, "step": 11105 }, { "epoch": 0.9474492407439004, "grad_norm": 1.532145646963206, "learning_rate": 7.224895276512489e-07, "loss": 0.1258, "step": 11106 }, { "epoch": 0.9475345504180174, "grad_norm": 2.193473942007321, "learning_rate": 7.201513291477669e-07, "loss": 0.225, "step": 11107 }, { "epoch": 0.9476198600921345, "grad_norm": 2.0221427398241176, "learning_rate": 7.178168928988593e-07, "loss": 0.1965, "step": 11108 }, { "epoch": 0.9477051697662515, "grad_norm": 1.881830109536253, "learning_rate": 7.154862190827449e-07, "loss": 0.1745, "step": 11109 }, { "epoch": 0.9477904794403685, "grad_norm": 2.17391608403864, "learning_rate": 7.13159307877359e-07, "loss": 0.2046, "step": 11110 }, { "epoch": 0.9478757891144856, "grad_norm": 1.8132935463990925, "learning_rate": 7.108361594603541e-07, "loss": 0.1421, "step": 11111 }, { "epoch": 0.9479610987886026, "grad_norm": 1.5510272160103082, "learning_rate": 7.085167740090771e-07, "loss": 0.1351, "step": 11112 }, { "epoch": 0.9480464084627197, "grad_norm": 2.0881574726063192, "learning_rate": 7.062011517006139e-07, "loss": 0.1746, "step": 11113 }, { "epoch": 0.9481317181368367, "grad_norm": 2.243992308387562, "learning_rate": 7.038892927117513e-07, "loss": 0.1475, "step": 11114 }, { "epoch": 0.9482170278109537, "grad_norm": 2.16484997465188, "learning_rate": 7.015811972189757e-07, "loss": 0.2312, "step": 11115 }, { "epoch": 0.9483023374850708, "grad_norm": 1.860679937902832, "learning_rate": 6.992768653985071e-07, "loss": 0.181, "step": 11116 }, { "epoch": 0.9483876471591879, "grad_norm": 1.4609352152597441, "learning_rate": 6.969762974262717e-07, "loss": 0.1437, "step": 11117 }, { "epoch": 0.9484729568333049, "grad_norm": 2.3285076671733775, "learning_rate": 6.9467949347789e-07, "loss": 0.1463, "step": 11118 }, { "epoch": 0.9485582665074219, "grad_norm": 2.363275903112495, "learning_rate": 6.923864537287217e-07, "loss": 0.1756, "step": 11119 }, { "epoch": 0.9486435761815389, "grad_norm": 1.9652198143428925, "learning_rate": 6.900971783538323e-07, "loss": 0.1896, "step": 11120 }, { "epoch": 0.9487288858556561, "grad_norm": 1.754858493371073, "learning_rate": 6.878116675279878e-07, "loss": 0.1982, "step": 11121 }, { "epoch": 0.9488141955297731, "grad_norm": 1.907020163122921, "learning_rate": 6.855299214256817e-07, "loss": 0.1702, "step": 11122 }, { "epoch": 0.9488995052038901, "grad_norm": 1.6689124553341124, "learning_rate": 6.832519402211079e-07, "loss": 0.1909, "step": 11123 }, { "epoch": 0.9489848148780071, "grad_norm": 2.7255960844938314, "learning_rate": 6.809777240881776e-07, "loss": 0.1757, "step": 11124 }, { "epoch": 0.9490701245521243, "grad_norm": 1.2664585429837396, "learning_rate": 6.787072732005129e-07, "loss": 0.171, "step": 11125 }, { "epoch": 0.9491554342262413, "grad_norm": 1.4496903384806463, "learning_rate": 6.764405877314639e-07, "loss": 0.1154, "step": 11126 }, { "epoch": 0.9492407439003583, "grad_norm": 1.446227242992194, "learning_rate": 6.741776678540645e-07, "loss": 0.1731, "step": 11127 }, { "epoch": 0.9493260535744753, "grad_norm": 1.6014384896253326, "learning_rate": 6.719185137410878e-07, "loss": 0.1956, "step": 11128 }, { "epoch": 0.9494113632485924, "grad_norm": 2.127427901986202, "learning_rate": 6.69663125565001e-07, "loss": 0.1549, "step": 11129 }, { "epoch": 0.9494966729227095, "grad_norm": 1.3739563709367453, "learning_rate": 6.674115034979945e-07, "loss": 0.1573, "step": 11130 }, { "epoch": 0.9495819825968265, "grad_norm": 2.485041882720392, "learning_rate": 6.651636477119639e-07, "loss": 0.2713, "step": 11131 }, { "epoch": 0.9496672922709435, "grad_norm": 1.9809291688136377, "learning_rate": 6.629195583785219e-07, "loss": 0.1797, "step": 11132 }, { "epoch": 0.9497526019450606, "grad_norm": 2.194239995851254, "learning_rate": 6.606792356690039e-07, "loss": 0.1488, "step": 11133 }, { "epoch": 0.9498379116191776, "grad_norm": 2.068996370978881, "learning_rate": 6.584426797544286e-07, "loss": 0.2251, "step": 11134 }, { "epoch": 0.9499232212932947, "grad_norm": 1.7401498416530197, "learning_rate": 6.562098908055536e-07, "loss": 0.1493, "step": 11135 }, { "epoch": 0.9500085309674117, "grad_norm": 1.7677501615821745, "learning_rate": 6.539808689928484e-07, "loss": 0.1434, "step": 11136 }, { "epoch": 0.9500938406415288, "grad_norm": 1.567500543183593, "learning_rate": 6.517556144864711e-07, "loss": 0.2085, "step": 11137 }, { "epoch": 0.9501791503156458, "grad_norm": 1.9548461654510345, "learning_rate": 6.495341274563193e-07, "loss": 0.1892, "step": 11138 }, { "epoch": 0.9502644599897628, "grad_norm": 1.2749115539241476, "learning_rate": 6.473164080719906e-07, "loss": 0.1529, "step": 11139 }, { "epoch": 0.9503497696638799, "grad_norm": 2.14969135109398, "learning_rate": 6.451024565027941e-07, "loss": 0.136, "step": 11140 }, { "epoch": 0.950435079337997, "grad_norm": 1.6748149371042054, "learning_rate": 6.4289227291775e-07, "loss": 0.1921, "step": 11141 }, { "epoch": 0.950520389012114, "grad_norm": 2.0509665559881123, "learning_rate": 6.406858574856067e-07, "loss": 0.2177, "step": 11142 }, { "epoch": 0.950605698686231, "grad_norm": 2.085379650944695, "learning_rate": 6.384832103747907e-07, "loss": 0.1507, "step": 11143 }, { "epoch": 0.950691008360348, "grad_norm": 1.9400849480071765, "learning_rate": 6.362843317534839e-07, "loss": 0.1702, "step": 11144 }, { "epoch": 0.9507763180344652, "grad_norm": 1.6623846200472028, "learning_rate": 6.340892217895411e-07, "loss": 0.1845, "step": 11145 }, { "epoch": 0.9508616277085822, "grad_norm": 1.7704054670846734, "learning_rate": 6.318978806505671e-07, "loss": 0.2263, "step": 11146 }, { "epoch": 0.9509469373826992, "grad_norm": 1.8577417846259843, "learning_rate": 6.297103085038391e-07, "loss": 0.1892, "step": 11147 }, { "epoch": 0.9510322470568162, "grad_norm": 1.5743823096638319, "learning_rate": 6.275265055163793e-07, "loss": 0.1707, "step": 11148 }, { "epoch": 0.9511175567309332, "grad_norm": 1.761106938872959, "learning_rate": 6.253464718549096e-07, "loss": 0.1656, "step": 11149 }, { "epoch": 0.9512028664050504, "grad_norm": 2.60243013594165, "learning_rate": 6.231702076858526e-07, "loss": 0.2207, "step": 11150 }, { "epoch": 0.9512881760791674, "grad_norm": 1.3713264811215882, "learning_rate": 6.209977131753697e-07, "loss": 0.0716, "step": 11151 }, { "epoch": 0.9513734857532844, "grad_norm": 1.494341588685849, "learning_rate": 6.188289884893062e-07, "loss": 0.1135, "step": 11152 }, { "epoch": 0.9514587954274014, "grad_norm": 1.3268834762554063, "learning_rate": 6.166640337932406e-07, "loss": 0.1345, "step": 11153 }, { "epoch": 0.9515441051015185, "grad_norm": 1.5127343209090378, "learning_rate": 6.145028492524463e-07, "loss": 0.1753, "step": 11154 }, { "epoch": 0.9516294147756356, "grad_norm": 1.6137110250835482, "learning_rate": 6.123454350319358e-07, "loss": 0.212, "step": 11155 }, { "epoch": 0.9517147244497526, "grad_norm": 1.7017651650033407, "learning_rate": 6.101917912963995e-07, "loss": 0.0786, "step": 11156 }, { "epoch": 0.9518000341238696, "grad_norm": 2.51400173723135, "learning_rate": 6.080419182102615e-07, "loss": 0.2283, "step": 11157 }, { "epoch": 0.9518853437979867, "grad_norm": 1.4985511458243963, "learning_rate": 6.058958159376571e-07, "loss": 0.1619, "step": 11158 }, { "epoch": 0.9519706534721037, "grad_norm": 1.9282537531717898, "learning_rate": 6.037534846424276e-07, "loss": 0.1628, "step": 11159 }, { "epoch": 0.9520559631462208, "grad_norm": 1.8127955491710814, "learning_rate": 6.016149244881253e-07, "loss": 0.1824, "step": 11160 }, { "epoch": 0.9521412728203378, "grad_norm": 2.077396924965521, "learning_rate": 5.994801356380253e-07, "loss": 0.2285, "step": 11161 }, { "epoch": 0.9522265824944549, "grad_norm": 1.6969942110735063, "learning_rate": 5.973491182551028e-07, "loss": 0.1927, "step": 11162 }, { "epoch": 0.9523118921685719, "grad_norm": 1.969960763801957, "learning_rate": 5.952218725020442e-07, "loss": 0.1592, "step": 11163 }, { "epoch": 0.952397201842689, "grad_norm": 2.361447264234943, "learning_rate": 5.930983985412641e-07, "loss": 0.1868, "step": 11164 }, { "epoch": 0.952482511516806, "grad_norm": 1.7289070332797312, "learning_rate": 5.909786965348718e-07, "loss": 0.231, "step": 11165 }, { "epoch": 0.9525678211909231, "grad_norm": 1.6845738507662116, "learning_rate": 5.888627666446988e-07, "loss": 0.1496, "step": 11166 }, { "epoch": 0.9526531308650401, "grad_norm": 1.8352508559511447, "learning_rate": 5.867506090322772e-07, "loss": 0.1935, "step": 11167 }, { "epoch": 0.9527384405391571, "grad_norm": 1.697133084336015, "learning_rate": 5.846422238588723e-07, "loss": 0.2013, "step": 11168 }, { "epoch": 0.9528237502132741, "grad_norm": 1.6911044303009053, "learning_rate": 5.82537611285433e-07, "loss": 0.1656, "step": 11169 }, { "epoch": 0.9529090598873913, "grad_norm": 1.7130214593348214, "learning_rate": 5.804367714726477e-07, "loss": 0.1433, "step": 11170 }, { "epoch": 0.9529943695615083, "grad_norm": 1.7131248670631531, "learning_rate": 5.783397045808992e-07, "loss": 0.1408, "step": 11171 }, { "epoch": 0.9530796792356253, "grad_norm": 2.3111941800512636, "learning_rate": 5.76246410770287e-07, "loss": 0.1655, "step": 11172 }, { "epoch": 0.9531649889097423, "grad_norm": 3.297168013582268, "learning_rate": 5.741568902006277e-07, "loss": 0.2369, "step": 11173 }, { "epoch": 0.9532502985838595, "grad_norm": 1.7411301487672566, "learning_rate": 5.720711430314329e-07, "loss": 0.2427, "step": 11174 }, { "epoch": 0.9533356082579765, "grad_norm": 1.7431127716981416, "learning_rate": 5.699891694219584e-07, "loss": 0.1412, "step": 11175 }, { "epoch": 0.9534209179320935, "grad_norm": 1.7214367760608025, "learning_rate": 5.679109695311269e-07, "loss": 0.137, "step": 11176 }, { "epoch": 0.9535062276062105, "grad_norm": 1.728744784752074, "learning_rate": 5.658365435176171e-07, "loss": 0.1381, "step": 11177 }, { "epoch": 0.9535915372803276, "grad_norm": 2.3499026095723834, "learning_rate": 5.63765891539797e-07, "loss": 0.1989, "step": 11178 }, { "epoch": 0.9536768469544447, "grad_norm": 1.676461106333575, "learning_rate": 5.616990137557454e-07, "loss": 0.1851, "step": 11179 }, { "epoch": 0.9537621566285617, "grad_norm": 1.6338933260449304, "learning_rate": 5.596359103232529e-07, "loss": 0.1626, "step": 11180 }, { "epoch": 0.9538474663026787, "grad_norm": 1.7264223343929126, "learning_rate": 5.575765813998435e-07, "loss": 0.2028, "step": 11181 }, { "epoch": 0.9539327759767958, "grad_norm": 2.2576951785113697, "learning_rate": 5.555210271427192e-07, "loss": 0.2158, "step": 11182 }, { "epoch": 0.9540180856509128, "grad_norm": 1.7667138447250212, "learning_rate": 5.534692477088155e-07, "loss": 0.1882, "step": 11183 }, { "epoch": 0.9541033953250299, "grad_norm": 1.4926677150839058, "learning_rate": 5.51421243254785e-07, "loss": 0.1741, "step": 11184 }, { "epoch": 0.9541887049991469, "grad_norm": 2.096663995123507, "learning_rate": 5.493770139369636e-07, "loss": 0.2255, "step": 11185 }, { "epoch": 0.9542740146732639, "grad_norm": 2.2850505576185403, "learning_rate": 5.473365599114266e-07, "loss": 0.1817, "step": 11186 }, { "epoch": 0.954359324347381, "grad_norm": 1.5637287648906648, "learning_rate": 5.452998813339605e-07, "loss": 0.1298, "step": 11187 }, { "epoch": 0.954444634021498, "grad_norm": 1.705030093221135, "learning_rate": 5.432669783600408e-07, "loss": 0.1804, "step": 11188 }, { "epoch": 0.9545299436956151, "grad_norm": 2.188057310772972, "learning_rate": 5.412378511448712e-07, "loss": 0.167, "step": 11189 }, { "epoch": 0.9546152533697321, "grad_norm": 1.380589697590695, "learning_rate": 5.392124998433723e-07, "loss": 0.1012, "step": 11190 }, { "epoch": 0.9547005630438492, "grad_norm": 1.7120275639338256, "learning_rate": 5.371909246101648e-07, "loss": 0.162, "step": 11191 }, { "epoch": 0.9547858727179662, "grad_norm": 2.550359670667662, "learning_rate": 5.351731255995862e-07, "loss": 0.1825, "step": 11192 }, { "epoch": 0.9548711823920832, "grad_norm": 1.6736147376197508, "learning_rate": 5.331591029656802e-07, "loss": 0.1639, "step": 11193 }, { "epoch": 0.9549564920662003, "grad_norm": 2.0805232614960056, "learning_rate": 5.311488568622125e-07, "loss": 0.1973, "step": 11194 }, { "epoch": 0.9550418017403174, "grad_norm": 1.4353153379392734, "learning_rate": 5.291423874426548e-07, "loss": 0.1605, "step": 11195 }, { "epoch": 0.9551271114144344, "grad_norm": 1.7800488353845125, "learning_rate": 5.271396948601792e-07, "loss": 0.1821, "step": 11196 }, { "epoch": 0.9552124210885514, "grad_norm": 2.5033124913018083, "learning_rate": 5.251407792677021e-07, "loss": 0.1658, "step": 11197 }, { "epoch": 0.9552977307626684, "grad_norm": 1.5950872757331527, "learning_rate": 5.23145640817807e-07, "loss": 0.2052, "step": 11198 }, { "epoch": 0.9553830404367856, "grad_norm": 1.5200786634217691, "learning_rate": 5.211542796628277e-07, "loss": 0.1879, "step": 11199 }, { "epoch": 0.9554683501109026, "grad_norm": 1.497371277538691, "learning_rate": 5.191666959547869e-07, "loss": 0.1349, "step": 11200 }, { "epoch": 0.9555536597850196, "grad_norm": 1.6497383950662485, "learning_rate": 5.1718288984543e-07, "loss": 0.1461, "step": 11201 }, { "epoch": 0.9556389694591366, "grad_norm": 2.3801270407742123, "learning_rate": 5.152028614862026e-07, "loss": 0.2022, "step": 11202 }, { "epoch": 0.9557242791332538, "grad_norm": 2.0218072753129315, "learning_rate": 5.132266110282835e-07, "loss": 0.1548, "step": 11203 }, { "epoch": 0.9558095888073708, "grad_norm": 1.4118302149802189, "learning_rate": 5.1125413862253e-07, "loss": 0.1732, "step": 11204 }, { "epoch": 0.9558948984814878, "grad_norm": 1.731088359461804, "learning_rate": 5.092854444195494e-07, "loss": 0.1757, "step": 11205 }, { "epoch": 0.9559802081556048, "grad_norm": 2.726589432285502, "learning_rate": 5.073205285696269e-07, "loss": 0.1959, "step": 11206 }, { "epoch": 0.9560655178297219, "grad_norm": 1.7607114496155731, "learning_rate": 5.05359391222776e-07, "loss": 0.1319, "step": 11207 }, { "epoch": 0.956150827503839, "grad_norm": 1.9188785919937112, "learning_rate": 5.034020325287269e-07, "loss": 0.2407, "step": 11208 }, { "epoch": 0.956236137177956, "grad_norm": 2.803535841281218, "learning_rate": 5.014484526369046e-07, "loss": 0.2062, "step": 11209 }, { "epoch": 0.956321446852073, "grad_norm": 2.2740676897856518, "learning_rate": 4.994986516964617e-07, "loss": 0.1375, "step": 11210 }, { "epoch": 0.9564067565261901, "grad_norm": 1.53968349263479, "learning_rate": 4.97552629856246e-07, "loss": 0.1549, "step": 11211 }, { "epoch": 0.9564920662003071, "grad_norm": 2.051113021898373, "learning_rate": 4.956103872648333e-07, "loss": 0.1536, "step": 11212 }, { "epoch": 0.9565773758744242, "grad_norm": 1.2229317059260971, "learning_rate": 4.936719240705045e-07, "loss": 0.1432, "step": 11213 }, { "epoch": 0.9566626855485412, "grad_norm": 1.2639590000553027, "learning_rate": 4.917372404212417e-07, "loss": 0.1547, "step": 11214 }, { "epoch": 0.9567479952226583, "grad_norm": 1.8723359573687248, "learning_rate": 4.898063364647598e-07, "loss": 0.1507, "step": 11215 }, { "epoch": 0.9568333048967753, "grad_norm": 1.7869169871683357, "learning_rate": 4.878792123484688e-07, "loss": 0.1952, "step": 11216 }, { "epoch": 0.9569186145708923, "grad_norm": 2.0011321677997507, "learning_rate": 4.859558682194898e-07, "loss": 0.0982, "step": 11217 }, { "epoch": 0.9570039242450094, "grad_norm": 1.6079295064837404, "learning_rate": 4.840363042246554e-07, "loss": 0.1337, "step": 11218 }, { "epoch": 0.9570892339191265, "grad_norm": 1.8285705805887231, "learning_rate": 4.821205205105317e-07, "loss": 0.1544, "step": 11219 }, { "epoch": 0.9571745435932435, "grad_norm": 2.35484930848144, "learning_rate": 4.802085172233628e-07, "loss": 0.1666, "step": 11220 }, { "epoch": 0.9572598532673605, "grad_norm": 1.8563062845552907, "learning_rate": 4.78300294509132e-07, "loss": 0.1547, "step": 11221 }, { "epoch": 0.9573451629414775, "grad_norm": 2.0207721137248282, "learning_rate": 4.7639585251350593e-07, "loss": 0.2199, "step": 11222 }, { "epoch": 0.9574304726155947, "grad_norm": 2.449993297022778, "learning_rate": 4.744951913819018e-07, "loss": 0.1436, "step": 11223 }, { "epoch": 0.9575157822897117, "grad_norm": 1.6835355460954904, "learning_rate": 4.725983112593979e-07, "loss": 0.1361, "step": 11224 }, { "epoch": 0.9576010919638287, "grad_norm": 1.8468755318629602, "learning_rate": 4.7070521229083396e-07, "loss": 0.2248, "step": 11225 }, { "epoch": 0.9576864016379457, "grad_norm": 2.001501949922106, "learning_rate": 4.6881589462072773e-07, "loss": 0.1619, "step": 11226 }, { "epoch": 0.9577717113120627, "grad_norm": 1.812555114960449, "learning_rate": 4.669303583933138e-07, "loss": 0.1988, "step": 11227 }, { "epoch": 0.9578570209861799, "grad_norm": 2.2885079086330413, "learning_rate": 4.6504860375255475e-07, "loss": 0.1987, "step": 11228 }, { "epoch": 0.9579423306602969, "grad_norm": 2.165672171823461, "learning_rate": 4.631706308421024e-07, "loss": 0.2011, "step": 11229 }, { "epoch": 0.9580276403344139, "grad_norm": 1.7811318073880178, "learning_rate": 4.6129643980533657e-07, "loss": 0.1464, "step": 11230 }, { "epoch": 0.9581129500085309, "grad_norm": 2.1405171833591665, "learning_rate": 4.5942603078533706e-07, "loss": 0.1812, "step": 11231 }, { "epoch": 0.958198259682648, "grad_norm": 1.3975300321951765, "learning_rate": 4.575594039249065e-07, "loss": 0.1147, "step": 11232 }, { "epoch": 0.9582835693567651, "grad_norm": 1.8900413558108762, "learning_rate": 4.5569655936654186e-07, "loss": 0.215, "step": 11233 }, { "epoch": 0.9583688790308821, "grad_norm": 1.9123189653470025, "learning_rate": 4.538374972524684e-07, "loss": 0.2007, "step": 11234 }, { "epoch": 0.9584541887049991, "grad_norm": 1.475993132438293, "learning_rate": 4.519822177246114e-07, "loss": 0.1359, "step": 11235 }, { "epoch": 0.9585394983791162, "grad_norm": 1.6121264890072484, "learning_rate": 4.501307209246186e-07, "loss": 0.1273, "step": 11236 }, { "epoch": 0.9586248080532332, "grad_norm": 1.322102028039009, "learning_rate": 4.4828300699383264e-07, "loss": 0.1862, "step": 11237 }, { "epoch": 0.9587101177273503, "grad_norm": 1.4300745872572587, "learning_rate": 4.4643907607332394e-07, "loss": 0.1817, "step": 11238 }, { "epoch": 0.9587954274014673, "grad_norm": 1.5802815121340796, "learning_rate": 4.4459892830386876e-07, "loss": 0.146, "step": 11239 }, { "epoch": 0.9588807370755844, "grad_norm": 2.0013551889065284, "learning_rate": 4.4276256382594364e-07, "loss": 0.1334, "step": 11240 }, { "epoch": 0.9589660467497014, "grad_norm": 1.8897101971294783, "learning_rate": 4.409299827797475e-07, "loss": 0.1547, "step": 11241 }, { "epoch": 0.9590513564238184, "grad_norm": 2.269605835626285, "learning_rate": 4.3910118530519626e-07, "loss": 0.2226, "step": 11242 }, { "epoch": 0.9591366660979355, "grad_norm": 1.9142439152906228, "learning_rate": 4.372761715419005e-07, "loss": 0.181, "step": 11243 }, { "epoch": 0.9592219757720526, "grad_norm": 1.2629625549504266, "learning_rate": 4.354549416291931e-07, "loss": 0.1172, "step": 11244 }, { "epoch": 0.9593072854461696, "grad_norm": 1.6551577727380038, "learning_rate": 4.3363749570611846e-07, "loss": 0.2064, "step": 11245 }, { "epoch": 0.9593925951202866, "grad_norm": 1.6411033296382664, "learning_rate": 4.318238339114211e-07, "loss": 0.1875, "step": 11246 }, { "epoch": 0.9594779047944036, "grad_norm": 1.694968755653508, "learning_rate": 4.300139563835681e-07, "loss": 0.1959, "step": 11247 }, { "epoch": 0.9595632144685208, "grad_norm": 1.6754929799223401, "learning_rate": 4.2820786326074334e-07, "loss": 0.151, "step": 11248 }, { "epoch": 0.9596485241426378, "grad_norm": 2.3952124966065425, "learning_rate": 4.264055546808143e-07, "loss": 0.1378, "step": 11249 }, { "epoch": 0.9597338338167548, "grad_norm": 1.4645528682544557, "learning_rate": 4.246070307813932e-07, "loss": 0.1312, "step": 11250 }, { "epoch": 0.9598191434908718, "grad_norm": 1.8633874507044612, "learning_rate": 4.2281229169977565e-07, "loss": 0.1324, "step": 11251 }, { "epoch": 0.959904453164989, "grad_norm": 1.818935151523885, "learning_rate": 4.2102133757299103e-07, "loss": 0.2278, "step": 11252 }, { "epoch": 0.959989762839106, "grad_norm": 1.3846397611721952, "learning_rate": 4.192341685377632e-07, "loss": 0.1219, "step": 11253 }, { "epoch": 0.960075072513223, "grad_norm": 1.6350981853763165, "learning_rate": 4.1745078473053866e-07, "loss": 0.1933, "step": 11254 }, { "epoch": 0.96016038218734, "grad_norm": 2.1635581850815, "learning_rate": 4.1567118628746406e-07, "loss": 0.1957, "step": 11255 }, { "epoch": 0.9602456918614571, "grad_norm": 1.770383919927536, "learning_rate": 4.1389537334440287e-07, "loss": 0.1166, "step": 11256 }, { "epoch": 0.9603310015355742, "grad_norm": 1.5605228885339772, "learning_rate": 4.1212334603693003e-07, "loss": 0.1496, "step": 11257 }, { "epoch": 0.9604163112096912, "grad_norm": 2.119825795591705, "learning_rate": 4.103551045003262e-07, "loss": 0.1356, "step": 11258 }, { "epoch": 0.9605016208838082, "grad_norm": 1.8312788925235142, "learning_rate": 4.085906488695945e-07, "loss": 0.1307, "step": 11259 }, { "epoch": 0.9605869305579253, "grad_norm": 2.2980419490229056, "learning_rate": 4.068299792794383e-07, "loss": 0.132, "step": 11260 }, { "epoch": 0.9606722402320423, "grad_norm": 1.718948075411795, "learning_rate": 4.0507309586427787e-07, "loss": 0.2156, "step": 11261 }, { "epoch": 0.9607575499061594, "grad_norm": 1.7683635168109215, "learning_rate": 4.033199987582337e-07, "loss": 0.1605, "step": 11262 }, { "epoch": 0.9608428595802764, "grad_norm": 1.6128002862998163, "learning_rate": 4.0157068809515417e-07, "loss": 0.1552, "step": 11263 }, { "epoch": 0.9609281692543934, "grad_norm": 1.9300131580912274, "learning_rate": 3.998251640085826e-07, "loss": 0.1945, "step": 11264 }, { "epoch": 0.9610134789285105, "grad_norm": 1.7711807209430195, "learning_rate": 3.980834266317901e-07, "loss": 0.2419, "step": 11265 }, { "epoch": 0.9610987886026275, "grad_norm": 1.739478840992292, "learning_rate": 3.9634547609774255e-07, "loss": 0.1884, "step": 11266 }, { "epoch": 0.9611840982767446, "grad_norm": 1.917199503045721, "learning_rate": 3.9461131253912266e-07, "loss": 0.1938, "step": 11267 }, { "epoch": 0.9612694079508616, "grad_norm": 1.926356479244102, "learning_rate": 3.928809360883301e-07, "loss": 0.2164, "step": 11268 }, { "epoch": 0.9613547176249787, "grad_norm": 1.431454593078201, "learning_rate": 3.9115434687746477e-07, "loss": 0.1542, "step": 11269 }, { "epoch": 0.9614400272990957, "grad_norm": 1.6036323384081308, "learning_rate": 3.8943154503834344e-07, "loss": 0.1722, "step": 11270 }, { "epoch": 0.9615253369732127, "grad_norm": 1.7677001236601502, "learning_rate": 3.8771253070249423e-07, "loss": 0.1553, "step": 11271 }, { "epoch": 0.9616106466473298, "grad_norm": 1.7806182544134606, "learning_rate": 3.8599730400115107e-07, "loss": 0.1387, "step": 11272 }, { "epoch": 0.9616959563214469, "grad_norm": 1.8657599063320478, "learning_rate": 3.842858650652648e-07, "loss": 0.2156, "step": 11273 }, { "epoch": 0.9617812659955639, "grad_norm": 1.5599299847956638, "learning_rate": 3.8257821402549745e-07, "loss": 0.1469, "step": 11274 }, { "epoch": 0.9618665756696809, "grad_norm": 2.371996837999832, "learning_rate": 3.80874351012217e-07, "loss": 0.1203, "step": 11275 }, { "epoch": 0.9619518853437979, "grad_norm": 1.6164504873455143, "learning_rate": 3.7917427615550283e-07, "loss": 0.193, "step": 11276 }, { "epoch": 0.9620371950179151, "grad_norm": 2.334654592905826, "learning_rate": 3.7747798958515103e-07, "loss": 0.2568, "step": 11277 }, { "epoch": 0.9621225046920321, "grad_norm": 1.491823881094132, "learning_rate": 3.75785491430658e-07, "loss": 0.1283, "step": 11278 }, { "epoch": 0.9622078143661491, "grad_norm": 1.7668262547562081, "learning_rate": 3.740967818212371e-07, "loss": 0.1664, "step": 11279 }, { "epoch": 0.9622931240402661, "grad_norm": 1.7004723846248082, "learning_rate": 3.724118608858185e-07, "loss": 0.1879, "step": 11280 }, { "epoch": 0.9623784337143833, "grad_norm": 1.545714280033036, "learning_rate": 3.7073072875303816e-07, "loss": 0.1759, "step": 11281 }, { "epoch": 0.9624637433885003, "grad_norm": 1.8916449199664465, "learning_rate": 3.690533855512268e-07, "loss": 0.1471, "step": 11282 }, { "epoch": 0.9625490530626173, "grad_norm": 1.7324774044044127, "learning_rate": 3.673798314084598e-07, "loss": 0.1831, "step": 11283 }, { "epoch": 0.9626343627367343, "grad_norm": 1.8084645565020132, "learning_rate": 3.657100664524904e-07, "loss": 0.1321, "step": 11284 }, { "epoch": 0.9627196724108514, "grad_norm": 1.6989494724225822, "learning_rate": 3.6404409081080004e-07, "loss": 0.19, "step": 11285 }, { "epoch": 0.9628049820849685, "grad_norm": 2.0718197500189546, "learning_rate": 3.623819046105814e-07, "loss": 0.1531, "step": 11286 }, { "epoch": 0.9628902917590855, "grad_norm": 1.592419386759159, "learning_rate": 3.60723507978733e-07, "loss": 0.1093, "step": 11287 }, { "epoch": 0.9629756014332025, "grad_norm": 1.7636278723616896, "learning_rate": 3.590689010418535e-07, "loss": 0.1898, "step": 11288 }, { "epoch": 0.9630609111073196, "grad_norm": 1.8230865689981712, "learning_rate": 3.5741808392628083e-07, "loss": 0.25, "step": 11289 }, { "epoch": 0.9631462207814366, "grad_norm": 2.2686076558334096, "learning_rate": 3.5577105675803634e-07, "loss": 0.196, "step": 11290 }, { "epoch": 0.9632315304555537, "grad_norm": 2.2110216104528804, "learning_rate": 3.541278196628528e-07, "loss": 0.1634, "step": 11291 }, { "epoch": 0.9633168401296707, "grad_norm": 1.6393110554099122, "learning_rate": 3.5248837276620205e-07, "loss": 0.1365, "step": 11292 }, { "epoch": 0.9634021498037878, "grad_norm": 2.407942337803183, "learning_rate": 3.50852716193234e-07, "loss": 0.1958, "step": 11293 }, { "epoch": 0.9634874594779048, "grad_norm": 1.5738362872380023, "learning_rate": 3.492208500688265e-07, "loss": 0.1705, "step": 11294 }, { "epoch": 0.9635727691520218, "grad_norm": 1.72506980754747, "learning_rate": 3.475927745175578e-07, "loss": 0.1557, "step": 11295 }, { "epoch": 0.9636580788261389, "grad_norm": 1.6463226202702275, "learning_rate": 3.4596848966373384e-07, "loss": 0.1445, "step": 11296 }, { "epoch": 0.963743388500256, "grad_norm": 1.4354588488105182, "learning_rate": 3.4434799563135556e-07, "loss": 0.1745, "step": 11297 }, { "epoch": 0.963828698174373, "grad_norm": 1.9198069416776258, "learning_rate": 3.4273129254413505e-07, "loss": 0.1442, "step": 11298 }, { "epoch": 0.96391400784849, "grad_norm": 1.5957610961394404, "learning_rate": 3.411183805255014e-07, "loss": 0.169, "step": 11299 }, { "epoch": 0.963999317522607, "grad_norm": 1.3136590653588127, "learning_rate": 3.3950925969859494e-07, "loss": 0.1199, "step": 11300 }, { "epoch": 0.964084627196724, "grad_norm": 1.632464869239503, "learning_rate": 3.379039301862619e-07, "loss": 0.2173, "step": 11301 }, { "epoch": 0.9641699368708412, "grad_norm": 1.8140048652844443, "learning_rate": 3.363023921110542e-07, "loss": 0.1612, "step": 11302 }, { "epoch": 0.9642552465449582, "grad_norm": 1.509305382933698, "learning_rate": 3.3470464559525184e-07, "loss": 0.1552, "step": 11303 }, { "epoch": 0.9643405562190752, "grad_norm": 2.0777207748676556, "learning_rate": 3.331106907608239e-07, "loss": 0.1532, "step": 11304 }, { "epoch": 0.9644258658931922, "grad_norm": 1.7704653937365455, "learning_rate": 3.315205277294675e-07, "loss": 0.1935, "step": 11305 }, { "epoch": 0.9645111755673094, "grad_norm": 1.7540771808890003, "learning_rate": 3.2993415662258555e-07, "loss": 0.2271, "step": 11306 }, { "epoch": 0.9645964852414264, "grad_norm": 2.3377551913570414, "learning_rate": 3.283515775612811e-07, "loss": 0.1881, "step": 11307 }, { "epoch": 0.9646817949155434, "grad_norm": 1.7160566554731522, "learning_rate": 3.2677279066637423e-07, "loss": 0.1298, "step": 11308 }, { "epoch": 0.9647671045896604, "grad_norm": 1.4986992600404854, "learning_rate": 3.2519779605840184e-07, "loss": 0.186, "step": 11309 }, { "epoch": 0.9648524142637775, "grad_norm": 1.2968791364121253, "learning_rate": 3.236265938576122e-07, "loss": 0.122, "step": 11310 }, { "epoch": 0.9649377239378946, "grad_norm": 1.3934148171583334, "learning_rate": 3.220591841839482e-07, "loss": 0.1424, "step": 11311 }, { "epoch": 0.9650230336120116, "grad_norm": 1.3060645870782241, "learning_rate": 3.2049556715708083e-07, "loss": 0.1152, "step": 11312 }, { "epoch": 0.9651083432861286, "grad_norm": 2.2698325188648365, "learning_rate": 3.189357428963757e-07, "loss": 0.2063, "step": 11313 }, { "epoch": 0.9651936529602457, "grad_norm": 1.8818986503156365, "learning_rate": 3.173797115209265e-07, "loss": 0.2084, "step": 11314 }, { "epoch": 0.9652789626343627, "grad_norm": 1.9970399886909285, "learning_rate": 3.158274731495159e-07, "loss": 0.2209, "step": 11315 }, { "epoch": 0.9653642723084798, "grad_norm": 1.8369639023342303, "learning_rate": 3.1427902790066575e-07, "loss": 0.1659, "step": 11316 }, { "epoch": 0.9654495819825968, "grad_norm": 2.2269111226524867, "learning_rate": 3.127343758925705e-07, "loss": 0.2031, "step": 11317 }, { "epoch": 0.9655348916567139, "grad_norm": 1.8650031300945453, "learning_rate": 3.111935172431746e-07, "loss": 0.1542, "step": 11318 }, { "epoch": 0.9656202013308309, "grad_norm": 1.4620199018655415, "learning_rate": 3.0965645207011175e-07, "loss": 0.1241, "step": 11319 }, { "epoch": 0.965705511004948, "grad_norm": 1.469415635585919, "learning_rate": 3.0812318049071586e-07, "loss": 0.1725, "step": 11320 }, { "epoch": 0.965790820679065, "grad_norm": 2.089749970515616, "learning_rate": 3.0659370262206e-07, "loss": 0.2035, "step": 11321 }, { "epoch": 0.9658761303531821, "grad_norm": 2.0216320334054885, "learning_rate": 3.0506801858090073e-07, "loss": 0.1317, "step": 11322 }, { "epoch": 0.9659614400272991, "grad_norm": 2.095206366232704, "learning_rate": 3.0354612848372265e-07, "loss": 0.1444, "step": 11323 }, { "epoch": 0.9660467497014161, "grad_norm": 1.7894044907378484, "learning_rate": 3.020280324467051e-07, "loss": 0.1302, "step": 11324 }, { "epoch": 0.9661320593755331, "grad_norm": 1.7388712021182455, "learning_rate": 3.0051373058576083e-07, "loss": 0.1709, "step": 11325 }, { "epoch": 0.9662173690496503, "grad_norm": 2.2331644260055414, "learning_rate": 2.990032230164863e-07, "loss": 0.1417, "step": 11326 }, { "epoch": 0.9663026787237673, "grad_norm": 1.7653647036809599, "learning_rate": 2.9749650985420043e-07, "loss": 0.1566, "step": 11327 }, { "epoch": 0.9663879883978843, "grad_norm": 2.249819854306352, "learning_rate": 2.9599359121393887e-07, "loss": 0.1571, "step": 11328 }, { "epoch": 0.9664732980720013, "grad_norm": 2.9311244848256504, "learning_rate": 2.944944672104488e-07, "loss": 0.1685, "step": 11329 }, { "epoch": 0.9665586077461185, "grad_norm": 1.631568544351414, "learning_rate": 2.929991379581609e-07, "loss": 0.1515, "step": 11330 }, { "epoch": 0.9666439174202355, "grad_norm": 1.6909576596419142, "learning_rate": 2.915076035712505e-07, "loss": 0.194, "step": 11331 }, { "epoch": 0.9667292270943525, "grad_norm": 1.5944995987258554, "learning_rate": 2.900198641635876e-07, "loss": 0.1756, "step": 11332 }, { "epoch": 0.9668145367684695, "grad_norm": 1.4587858133896572, "learning_rate": 2.885359198487425e-07, "loss": 0.198, "step": 11333 }, { "epoch": 0.9668998464425866, "grad_norm": 1.5187096421644821, "learning_rate": 2.87055770740019e-07, "loss": 0.1987, "step": 11334 }, { "epoch": 0.9669851561167037, "grad_norm": 1.5235090190043343, "learning_rate": 2.8557941695041003e-07, "loss": 0.203, "step": 11335 }, { "epoch": 0.9670704657908207, "grad_norm": 2.136194028707058, "learning_rate": 2.84106858592631e-07, "loss": 0.1741, "step": 11336 }, { "epoch": 0.9671557754649377, "grad_norm": 2.0866540256803914, "learning_rate": 2.826380957790975e-07, "loss": 0.1899, "step": 11337 }, { "epoch": 0.9672410851390548, "grad_norm": 2.2283741858462633, "learning_rate": 2.8117312862195876e-07, "loss": 0.1339, "step": 11338 }, { "epoch": 0.9673263948131718, "grad_norm": 2.193630891267166, "learning_rate": 2.7971195723303646e-07, "loss": 0.1557, "step": 11339 }, { "epoch": 0.9674117044872889, "grad_norm": 1.6029061240528857, "learning_rate": 2.7825458172389684e-07, "loss": 0.1312, "step": 11340 }, { "epoch": 0.9674970141614059, "grad_norm": 1.1727335010474738, "learning_rate": 2.7680100220580097e-07, "loss": 0.1101, "step": 11341 }, { "epoch": 0.9675823238355229, "grad_norm": 1.8105861161638819, "learning_rate": 2.753512187897211e-07, "loss": 0.2422, "step": 11342 }, { "epoch": 0.96766763350964, "grad_norm": 1.5330602883057, "learning_rate": 2.7390523158633554e-07, "loss": 0.2332, "step": 11343 }, { "epoch": 0.967752943183757, "grad_norm": 1.760142769658211, "learning_rate": 2.7246304070603913e-07, "loss": 0.2003, "step": 11344 }, { "epoch": 0.9678382528578741, "grad_norm": 1.8620043844013652, "learning_rate": 2.7102464625894387e-07, "loss": 0.1953, "step": 11345 }, { "epoch": 0.9679235625319911, "grad_norm": 1.797489625054608, "learning_rate": 2.695900483548508e-07, "loss": 0.2237, "step": 11346 }, { "epoch": 0.9680088722061082, "grad_norm": 1.4432242488972324, "learning_rate": 2.6815924710329456e-07, "loss": 0.1386, "step": 11347 }, { "epoch": 0.9680941818802252, "grad_norm": 1.8918511694222309, "learning_rate": 2.6673224261350436e-07, "loss": 0.1623, "step": 11348 }, { "epoch": 0.9681794915543422, "grad_norm": 1.8555845204425756, "learning_rate": 2.653090349944265e-07, "loss": 0.205, "step": 11349 }, { "epoch": 0.9682648012284593, "grad_norm": 2.3708222436450006, "learning_rate": 2.638896243547073e-07, "loss": 0.1914, "step": 11350 }, { "epoch": 0.9683501109025764, "grad_norm": 1.5176276121576562, "learning_rate": 2.624740108027268e-07, "loss": 0.1212, "step": 11351 }, { "epoch": 0.9684354205766934, "grad_norm": 1.4779144233892363, "learning_rate": 2.61062194446543e-07, "loss": 0.1484, "step": 11352 }, { "epoch": 0.9685207302508104, "grad_norm": 2.0577073333213574, "learning_rate": 2.59654175393953e-07, "loss": 0.2263, "step": 11353 }, { "epoch": 0.9686060399249274, "grad_norm": 2.600914676872748, "learning_rate": 2.5824995375244855e-07, "loss": 0.2035, "step": 11354 }, { "epoch": 0.9686913495990446, "grad_norm": 1.7742201636714838, "learning_rate": 2.568495296292273e-07, "loss": 0.1485, "step": 11355 }, { "epoch": 0.9687766592731616, "grad_norm": 2.050521049341241, "learning_rate": 2.5545290313121474e-07, "loss": 0.1364, "step": 11356 }, { "epoch": 0.9688619689472786, "grad_norm": 1.518268442927277, "learning_rate": 2.5406007436502566e-07, "loss": 0.0959, "step": 11357 }, { "epoch": 0.9689472786213956, "grad_norm": 1.6441120691735698, "learning_rate": 2.526710434370083e-07, "loss": 0.1395, "step": 11358 }, { "epoch": 0.9690325882955128, "grad_norm": 1.8291348007774832, "learning_rate": 2.51285810453189e-07, "loss": 0.1575, "step": 11359 }, { "epoch": 0.9691178979696298, "grad_norm": 1.2847582539955589, "learning_rate": 2.4990437551933863e-07, "loss": 0.1298, "step": 11360 }, { "epoch": 0.9692032076437468, "grad_norm": 2.020326085351051, "learning_rate": 2.485267387409229e-07, "loss": 0.2509, "step": 11361 }, { "epoch": 0.9692885173178638, "grad_norm": 1.6453501639986579, "learning_rate": 2.471529002231021e-07, "loss": 0.1027, "step": 11362 }, { "epoch": 0.9693738269919809, "grad_norm": 2.1478069697974487, "learning_rate": 2.457828600707812e-07, "loss": 0.1189, "step": 11363 }, { "epoch": 0.969459136666098, "grad_norm": 3.6382480442070957, "learning_rate": 2.444166183885377e-07, "loss": 0.3038, "step": 11364 }, { "epoch": 0.969544446340215, "grad_norm": 1.7493520627114327, "learning_rate": 2.430541752806881e-07, "loss": 0.2084, "step": 11365 }, { "epoch": 0.969629756014332, "grad_norm": 2.571522436623746, "learning_rate": 2.416955308512381e-07, "loss": 0.163, "step": 11366 }, { "epoch": 0.9697150656884491, "grad_norm": 1.5300191486631398, "learning_rate": 2.4034068520392693e-07, "loss": 0.1594, "step": 11367 }, { "epoch": 0.9698003753625661, "grad_norm": 1.8022470859224105, "learning_rate": 2.3898963844217746e-07, "loss": 0.1444, "step": 11368 }, { "epoch": 0.9698856850366832, "grad_norm": 1.9812691480030202, "learning_rate": 2.376423906691405e-07, "loss": 0.1816, "step": 11369 }, { "epoch": 0.9699709947108002, "grad_norm": 1.9426441199059659, "learning_rate": 2.362989419876671e-07, "loss": 0.2431, "step": 11370 }, { "epoch": 0.9700563043849173, "grad_norm": 1.5545089849222813, "learning_rate": 2.3495929250033078e-07, "loss": 0.1796, "step": 11371 }, { "epoch": 0.9701416140590343, "grad_norm": 2.9225316253334572, "learning_rate": 2.336234423093997e-07, "loss": 0.1831, "step": 11372 }, { "epoch": 0.9702269237331513, "grad_norm": 1.93288252880192, "learning_rate": 2.3229139151685896e-07, "loss": 0.179, "step": 11373 }, { "epoch": 0.9703122334072684, "grad_norm": 1.5457155139919223, "learning_rate": 2.3096314022440503e-07, "loss": 0.2155, "step": 11374 }, { "epoch": 0.9703975430813855, "grad_norm": 1.740128947445678, "learning_rate": 2.2963868853344562e-07, "loss": 0.1169, "step": 11375 }, { "epoch": 0.9704828527555025, "grad_norm": 2.009047785052539, "learning_rate": 2.2831803654508877e-07, "loss": 0.1708, "step": 11376 }, { "epoch": 0.9705681624296195, "grad_norm": 2.0126551550533542, "learning_rate": 2.2700118436016494e-07, "loss": 0.1995, "step": 11377 }, { "epoch": 0.9706534721037365, "grad_norm": 1.7963631605846884, "learning_rate": 2.2568813207921037e-07, "loss": 0.1931, "step": 11378 }, { "epoch": 0.9707387817778536, "grad_norm": 1.6304083103297573, "learning_rate": 2.2437887980246153e-07, "loss": 0.2242, "step": 11379 }, { "epoch": 0.9708240914519707, "grad_norm": 1.9979648368154532, "learning_rate": 2.2307342762988294e-07, "loss": 0.1614, "step": 11380 }, { "epoch": 0.9709094011260877, "grad_norm": 1.7279697396754756, "learning_rate": 2.217717756611337e-07, "loss": 0.2402, "step": 11381 }, { "epoch": 0.9709947108002047, "grad_norm": 2.051359200555122, "learning_rate": 2.2047392399558443e-07, "loss": 0.1824, "step": 11382 }, { "epoch": 0.9710800204743217, "grad_norm": 1.6463733785011765, "learning_rate": 2.1917987273232245e-07, "loss": 0.1513, "step": 11383 }, { "epoch": 0.9711653301484389, "grad_norm": 1.6231069542223553, "learning_rate": 2.1788962197014652e-07, "loss": 0.1657, "step": 11384 }, { "epoch": 0.9712506398225559, "grad_norm": 1.4152848666663178, "learning_rate": 2.1660317180755564e-07, "loss": 0.1564, "step": 11385 }, { "epoch": 0.9713359494966729, "grad_norm": 1.8620922845656747, "learning_rate": 2.1532052234276013e-07, "loss": 0.186, "step": 11386 }, { "epoch": 0.9714212591707899, "grad_norm": 1.5981771277562915, "learning_rate": 2.1404167367368721e-07, "loss": 0.1178, "step": 11387 }, { "epoch": 0.971506568844907, "grad_norm": 2.2431746911480888, "learning_rate": 2.1276662589797547e-07, "loss": 0.1653, "step": 11388 }, { "epoch": 0.9715918785190241, "grad_norm": 1.7591398525799644, "learning_rate": 2.1149537911295814e-07, "loss": 0.2155, "step": 11389 }, { "epoch": 0.9716771881931411, "grad_norm": 2.780462775099386, "learning_rate": 2.102279334156909e-07, "loss": 0.1881, "step": 11390 }, { "epoch": 0.9717624978672581, "grad_norm": 1.711117251634245, "learning_rate": 2.089642889029464e-07, "loss": 0.1881, "step": 11391 }, { "epoch": 0.9718478075413752, "grad_norm": 1.8852284866196152, "learning_rate": 2.0770444567118075e-07, "loss": 0.1207, "step": 11392 }, { "epoch": 0.9719331172154922, "grad_norm": 2.277282923414468, "learning_rate": 2.0644840381658926e-07, "loss": 0.2087, "step": 11393 }, { "epoch": 0.9720184268896093, "grad_norm": 2.022650370436426, "learning_rate": 2.0519616343505633e-07, "loss": 0.1859, "step": 11394 }, { "epoch": 0.9721037365637263, "grad_norm": 2.06236844654731, "learning_rate": 2.0394772462218882e-07, "loss": 0.1613, "step": 11395 }, { "epoch": 0.9721890462378434, "grad_norm": 1.6392409526603788, "learning_rate": 2.0270308747329936e-07, "loss": 0.1061, "step": 11396 }, { "epoch": 0.9722743559119604, "grad_norm": 1.780547823453564, "learning_rate": 2.014622520834064e-07, "loss": 0.2326, "step": 11397 }, { "epoch": 0.9723596655860774, "grad_norm": 1.4285256088946898, "learning_rate": 2.0022521854723975e-07, "loss": 0.2074, "step": 11398 }, { "epoch": 0.9724449752601945, "grad_norm": 1.7637285157969473, "learning_rate": 1.9899198695924048e-07, "loss": 0.1865, "step": 11399 }, { "epoch": 0.9725302849343116, "grad_norm": 1.4911081294885709, "learning_rate": 1.977625574135611e-07, "loss": 0.0873, "step": 11400 }, { "epoch": 0.9726155946084286, "grad_norm": 1.867127485388155, "learning_rate": 1.9653693000405982e-07, "loss": 0.1585, "step": 11401 }, { "epoch": 0.9727009042825456, "grad_norm": 1.719432487233947, "learning_rate": 1.9531510482431182e-07, "loss": 0.2221, "step": 11402 }, { "epoch": 0.9727862139566626, "grad_norm": 1.8743754936337178, "learning_rate": 1.9409708196759247e-07, "loss": 0.1572, "step": 11403 }, { "epoch": 0.9728715236307798, "grad_norm": 1.4902246957367098, "learning_rate": 1.9288286152689406e-07, "loss": 0.1915, "step": 11404 }, { "epoch": 0.9729568333048968, "grad_norm": 1.9929807392688363, "learning_rate": 1.9167244359491467e-07, "loss": 0.0915, "step": 11405 }, { "epoch": 0.9730421429790138, "grad_norm": 2.019770418094364, "learning_rate": 1.9046582826406368e-07, "loss": 0.1596, "step": 11406 }, { "epoch": 0.9731274526531308, "grad_norm": 1.8482501827463322, "learning_rate": 1.8926301562645632e-07, "loss": 0.2454, "step": 11407 }, { "epoch": 0.973212762327248, "grad_norm": 1.3659583533795676, "learning_rate": 1.880640057739247e-07, "loss": 0.1707, "step": 11408 }, { "epoch": 0.973298072001365, "grad_norm": 1.6358506260698096, "learning_rate": 1.8686879879800667e-07, "loss": 0.1277, "step": 11409 }, { "epoch": 0.973383381675482, "grad_norm": 2.169700247281965, "learning_rate": 1.8567739478994595e-07, "loss": 0.2012, "step": 11410 }, { "epoch": 0.973468691349599, "grad_norm": 1.4510540715454348, "learning_rate": 1.8448979384070863e-07, "loss": 0.1628, "step": 11411 }, { "epoch": 0.9735540010237161, "grad_norm": 1.6232683785751005, "learning_rate": 1.8330599604095e-07, "loss": 0.1974, "step": 11412 }, { "epoch": 0.9736393106978332, "grad_norm": 1.7716181493212342, "learning_rate": 1.8212600148105884e-07, "loss": 0.1138, "step": 11413 }, { "epoch": 0.9737246203719502, "grad_norm": 1.4498161462441217, "learning_rate": 1.8094981025110756e-07, "loss": 0.1401, "step": 11414 }, { "epoch": 0.9738099300460672, "grad_norm": 1.7599101795771974, "learning_rate": 1.7977742244090768e-07, "loss": 0.1802, "step": 11415 }, { "epoch": 0.9738952397201842, "grad_norm": 1.7363643749731164, "learning_rate": 1.7860883813995976e-07, "loss": 0.2101, "step": 11416 }, { "epoch": 0.9739805493943013, "grad_norm": 1.6365557386846112, "learning_rate": 1.774440574374703e-07, "loss": 0.1135, "step": 11417 }, { "epoch": 0.9740658590684184, "grad_norm": 1.8500642791097202, "learning_rate": 1.7628308042236807e-07, "loss": 0.1999, "step": 11418 }, { "epoch": 0.9741511687425354, "grad_norm": 2.1206598277089133, "learning_rate": 1.7512590718329336e-07, "loss": 0.162, "step": 11419 }, { "epoch": 0.9742364784166524, "grad_norm": 1.6588103459756982, "learning_rate": 1.7397253780858658e-07, "loss": 0.1939, "step": 11420 }, { "epoch": 0.9743217880907695, "grad_norm": 1.4117934004062362, "learning_rate": 1.7282297238629953e-07, "loss": 0.1682, "step": 11421 }, { "epoch": 0.9744070977648865, "grad_norm": 1.533196671587732, "learning_rate": 1.7167721100420087e-07, "loss": 0.1579, "step": 11422 }, { "epoch": 0.9744924074390036, "grad_norm": 1.533437917432151, "learning_rate": 1.7053525374975953e-07, "loss": 0.1424, "step": 11423 }, { "epoch": 0.9745777171131206, "grad_norm": 1.3991824249204163, "learning_rate": 1.693971007101558e-07, "loss": 0.1804, "step": 11424 }, { "epoch": 0.9746630267872377, "grad_norm": 1.4324156366508383, "learning_rate": 1.682627519722868e-07, "loss": 0.1625, "step": 11425 }, { "epoch": 0.9747483364613547, "grad_norm": 1.33933001308748, "learning_rate": 1.671322076227444e-07, "loss": 0.1592, "step": 11426 }, { "epoch": 0.9748336461354717, "grad_norm": 1.940650716172015, "learning_rate": 1.6600546774785398e-07, "loss": 0.1375, "step": 11427 }, { "epoch": 0.9749189558095888, "grad_norm": 1.4528674646041186, "learning_rate": 1.648825324336245e-07, "loss": 0.1515, "step": 11428 }, { "epoch": 0.9750042654837059, "grad_norm": 1.706204631048537, "learning_rate": 1.6376340176579297e-07, "loss": 0.146, "step": 11429 }, { "epoch": 0.9750895751578229, "grad_norm": 1.898935637248625, "learning_rate": 1.6264807582979103e-07, "loss": 0.1971, "step": 11430 }, { "epoch": 0.9751748848319399, "grad_norm": 1.7726406374126535, "learning_rate": 1.6153655471077832e-07, "loss": 0.1143, "step": 11431 }, { "epoch": 0.9752601945060569, "grad_norm": 1.5614884726287888, "learning_rate": 1.604288384936037e-07, "loss": 0.1278, "step": 11432 }, { "epoch": 0.9753455041801741, "grad_norm": 1.8245959004366246, "learning_rate": 1.5932492726284386e-07, "loss": 0.2335, "step": 11433 }, { "epoch": 0.9754308138542911, "grad_norm": 1.7316492329595856, "learning_rate": 1.5822482110277036e-07, "loss": 0.1481, "step": 11434 }, { "epoch": 0.9755161235284081, "grad_norm": 2.2988611055275445, "learning_rate": 1.5712852009737711e-07, "loss": 0.2111, "step": 11435 }, { "epoch": 0.9756014332025251, "grad_norm": 2.0033060405647176, "learning_rate": 1.5603602433035269e-07, "loss": 0.1313, "step": 11436 }, { "epoch": 0.9756867428766423, "grad_norm": 1.7201156739217112, "learning_rate": 1.5494733388510817e-07, "loss": 0.1673, "step": 11437 }, { "epoch": 0.9757720525507593, "grad_norm": 1.817136687390803, "learning_rate": 1.5386244884476043e-07, "loss": 0.1541, "step": 11438 }, { "epoch": 0.9758573622248763, "grad_norm": 1.5640460185917502, "learning_rate": 1.527813692921265e-07, "loss": 0.1165, "step": 11439 }, { "epoch": 0.9759426718989933, "grad_norm": 1.751611648673111, "learning_rate": 1.517040953097515e-07, "loss": 0.1695, "step": 11440 }, { "epoch": 0.9760279815731104, "grad_norm": 1.9408952435776534, "learning_rate": 1.5063062697987518e-07, "loss": 0.1743, "step": 11441 }, { "epoch": 0.9761132912472275, "grad_norm": 2.147967255887981, "learning_rate": 1.4956096438445423e-07, "loss": 0.133, "step": 11442 }, { "epoch": 0.9761986009213445, "grad_norm": 1.5609537484637845, "learning_rate": 1.4849510760513995e-07, "loss": 0.1601, "step": 11443 }, { "epoch": 0.9762839105954615, "grad_norm": 1.5650491900528738, "learning_rate": 1.4743305672332287e-07, "loss": 0.1562, "step": 11444 }, { "epoch": 0.9763692202695786, "grad_norm": 1.4754596608205248, "learning_rate": 1.463748118200714e-07, "loss": 0.1619, "step": 11445 }, { "epoch": 0.9764545299436956, "grad_norm": 2.4138254586292196, "learning_rate": 1.4532037297618205e-07, "loss": 0.1699, "step": 11446 }, { "epoch": 0.9765398396178127, "grad_norm": 1.507408760055267, "learning_rate": 1.4426974027215713e-07, "loss": 0.0899, "step": 11447 }, { "epoch": 0.9766251492919297, "grad_norm": 2.6729961519224337, "learning_rate": 1.4322291378819908e-07, "loss": 0.2042, "step": 11448 }, { "epoch": 0.9767104589660468, "grad_norm": 1.3725160356707014, "learning_rate": 1.4217989360423845e-07, "loss": 0.1463, "step": 11449 }, { "epoch": 0.9767957686401638, "grad_norm": 1.5737492545128375, "learning_rate": 1.4114067979989488e-07, "loss": 0.1622, "step": 11450 }, { "epoch": 0.9768810783142808, "grad_norm": 2.0715445828954153, "learning_rate": 1.4010527245451045e-07, "loss": 0.147, "step": 11451 }, { "epoch": 0.9769663879883979, "grad_norm": 1.7639975009563156, "learning_rate": 1.3907367164713303e-07, "loss": 0.2406, "step": 11452 }, { "epoch": 0.977051697662515, "grad_norm": 1.3049040289188782, "learning_rate": 1.3804587745652187e-07, "loss": 0.1332, "step": 11453 }, { "epoch": 0.977137007336632, "grad_norm": 2.467341443719584, "learning_rate": 1.3702188996114196e-07, "loss": 0.1479, "step": 11454 }, { "epoch": 0.977222317010749, "grad_norm": 1.9512790959771333, "learning_rate": 1.3600170923916966e-07, "loss": 0.2226, "step": 11455 }, { "epoch": 0.977307626684866, "grad_norm": 2.1812245408460615, "learning_rate": 1.349853353684871e-07, "loss": 0.1668, "step": 11456 }, { "epoch": 0.977392936358983, "grad_norm": 1.4734927768795596, "learning_rate": 1.3397276842669892e-07, "loss": 0.1478, "step": 11457 }, { "epoch": 0.9774782460331002, "grad_norm": 1.7135625758726392, "learning_rate": 1.3296400849109324e-07, "loss": 0.1644, "step": 11458 }, { "epoch": 0.9775635557072172, "grad_norm": 2.2728180451038975, "learning_rate": 1.3195905563869737e-07, "loss": 0.1811, "step": 11459 }, { "epoch": 0.9776488653813342, "grad_norm": 1.6749493150017127, "learning_rate": 1.309579099462277e-07, "loss": 0.1464, "step": 11460 }, { "epoch": 0.9777341750554512, "grad_norm": 1.3181455085501053, "learning_rate": 1.2996057149011752e-07, "loss": 0.1309, "step": 11461 }, { "epoch": 0.9778194847295684, "grad_norm": 1.7107577294790073, "learning_rate": 1.2896704034651152e-07, "loss": 0.2076, "step": 11462 }, { "epoch": 0.9779047944036854, "grad_norm": 1.8573768201110905, "learning_rate": 1.27977316591249e-07, "loss": 0.1585, "step": 11463 }, { "epoch": 0.9779901040778024, "grad_norm": 1.8965856994527435, "learning_rate": 1.2699140029990842e-07, "loss": 0.1509, "step": 11464 }, { "epoch": 0.9780754137519194, "grad_norm": 1.8243216050617468, "learning_rate": 1.2600929154774621e-07, "loss": 0.1404, "step": 11465 }, { "epoch": 0.9781607234260365, "grad_norm": 1.8126419274897347, "learning_rate": 1.250309904097413e-07, "loss": 0.1656, "step": 11466 }, { "epoch": 0.9782460331001536, "grad_norm": 1.9100822325175553, "learning_rate": 1.2405649696058953e-07, "loss": 0.1541, "step": 11467 }, { "epoch": 0.9783313427742706, "grad_norm": 2.152035782895874, "learning_rate": 1.2308581127468132e-07, "loss": 0.1441, "step": 11468 }, { "epoch": 0.9784166524483876, "grad_norm": 1.7346184362078532, "learning_rate": 1.2211893342612968e-07, "loss": 0.2017, "step": 11469 }, { "epoch": 0.9785019621225047, "grad_norm": 1.5382823225320823, "learning_rate": 1.211558634887422e-07, "loss": 0.2284, "step": 11470 }, { "epoch": 0.9785872717966217, "grad_norm": 1.538717163380137, "learning_rate": 1.2019660153604894e-07, "loss": 0.1715, "step": 11471 }, { "epoch": 0.9786725814707388, "grad_norm": 2.1833066802788257, "learning_rate": 1.192411476412858e-07, "loss": 0.1614, "step": 11472 }, { "epoch": 0.9787578911448558, "grad_norm": 2.046084884851571, "learning_rate": 1.182895018773944e-07, "loss": 0.1594, "step": 11473 }, { "epoch": 0.9788432008189729, "grad_norm": 1.7624732049298517, "learning_rate": 1.1734166431702776e-07, "loss": 0.162, "step": 11474 }, { "epoch": 0.9789285104930899, "grad_norm": 2.1251369880951594, "learning_rate": 1.1639763503255019e-07, "loss": 0.1842, "step": 11475 }, { "epoch": 0.979013820167207, "grad_norm": 2.0776577761308035, "learning_rate": 1.1545741409603184e-07, "loss": 0.2055, "step": 11476 }, { "epoch": 0.979099129841324, "grad_norm": 1.9634105392370589, "learning_rate": 1.1452100157925416e-07, "loss": 0.1259, "step": 11477 }, { "epoch": 0.9791844395154411, "grad_norm": 1.905153365582354, "learning_rate": 1.1358839755370443e-07, "loss": 0.1898, "step": 11478 }, { "epoch": 0.9792697491895581, "grad_norm": 2.1539180142341157, "learning_rate": 1.1265960209058679e-07, "loss": 0.1259, "step": 11479 }, { "epoch": 0.9793550588636751, "grad_norm": 1.7203987536518306, "learning_rate": 1.1173461526080565e-07, "loss": 0.188, "step": 11480 }, { "epoch": 0.9794403685377921, "grad_norm": 1.9986968563379381, "learning_rate": 1.1081343713498227e-07, "loss": 0.2157, "step": 11481 }, { "epoch": 0.9795256782119093, "grad_norm": 1.807247575410775, "learning_rate": 1.0989606778344375e-07, "loss": 0.2114, "step": 11482 }, { "epoch": 0.9796109878860263, "grad_norm": 2.0419232992176015, "learning_rate": 1.0898250727622294e-07, "loss": 0.1816, "step": 11483 }, { "epoch": 0.9796962975601433, "grad_norm": 1.7975341914589862, "learning_rate": 1.0807275568306407e-07, "loss": 0.1706, "step": 11484 }, { "epoch": 0.9797816072342603, "grad_norm": 2.1060852393844316, "learning_rate": 1.0716681307342825e-07, "loss": 0.1499, "step": 11485 }, { "epoch": 0.9798669169083775, "grad_norm": 1.2437447916813207, "learning_rate": 1.0626467951647678e-07, "loss": 0.113, "step": 11486 }, { "epoch": 0.9799522265824945, "grad_norm": 2.1938451023626078, "learning_rate": 1.0536635508107684e-07, "loss": 0.1387, "step": 11487 }, { "epoch": 0.9800375362566115, "grad_norm": 1.7235408942160422, "learning_rate": 1.0447183983582353e-07, "loss": 0.114, "step": 11488 }, { "epoch": 0.9801228459307285, "grad_norm": 1.9303166525995104, "learning_rate": 1.0358113384899559e-07, "loss": 0.1617, "step": 11489 }, { "epoch": 0.9802081556048456, "grad_norm": 1.951156784167274, "learning_rate": 1.0269423718859971e-07, "loss": 0.1437, "step": 11490 }, { "epoch": 0.9802934652789627, "grad_norm": 1.8460521872511648, "learning_rate": 1.0181114992234287e-07, "loss": 0.1871, "step": 11491 }, { "epoch": 0.9803787749530797, "grad_norm": 1.5359536155561517, "learning_rate": 1.0093187211764887e-07, "loss": 0.1702, "step": 11492 }, { "epoch": 0.9804640846271967, "grad_norm": 1.8832766645339774, "learning_rate": 1.0005640384164738e-07, "loss": 0.2384, "step": 11493 }, { "epoch": 0.9805493943013137, "grad_norm": 1.8489566283598726, "learning_rate": 9.918474516116272e-08, "loss": 0.1659, "step": 11494 }, { "epoch": 0.9806347039754308, "grad_norm": 1.9911462792959735, "learning_rate": 9.831689614275275e-08, "loss": 0.1726, "step": 11495 }, { "epoch": 0.9807200136495479, "grad_norm": 2.060545139402409, "learning_rate": 9.745285685267558e-08, "loss": 0.1768, "step": 11496 }, { "epoch": 0.9808053233236649, "grad_norm": 1.7638878167350227, "learning_rate": 9.659262735688401e-08, "loss": 0.1722, "step": 11497 }, { "epoch": 0.9808906329977819, "grad_norm": 1.6782840987215868, "learning_rate": 9.573620772106439e-08, "loss": 0.1676, "step": 11498 }, { "epoch": 0.980975942671899, "grad_norm": 1.9893035958311438, "learning_rate": 9.488359801059222e-08, "loss": 0.1549, "step": 11499 }, { "epoch": 0.981061252346016, "grad_norm": 1.4396642066921006, "learning_rate": 9.403479829055983e-08, "loss": 0.1512, "step": 11500 }, { "epoch": 0.9811465620201331, "grad_norm": 2.1363922371785242, "learning_rate": 9.318980862577098e-08, "loss": 0.1715, "step": 11501 }, { "epoch": 0.9812318716942501, "grad_norm": 1.7653373551471763, "learning_rate": 9.234862908074071e-08, "loss": 0.2269, "step": 11502 }, { "epoch": 0.9813171813683672, "grad_norm": 1.5498805276910756, "learning_rate": 9.151125971967878e-08, "loss": 0.1729, "step": 11503 }, { "epoch": 0.9814024910424842, "grad_norm": 1.6625945731203058, "learning_rate": 9.067770060651737e-08, "loss": 0.2036, "step": 11504 }, { "epoch": 0.9814878007166012, "grad_norm": 2.375889661699447, "learning_rate": 8.984795180490003e-08, "loss": 0.2257, "step": 11505 }, { "epoch": 0.9815731103907183, "grad_norm": 1.9426799564146997, "learning_rate": 8.902201337816496e-08, "loss": 0.2286, "step": 11506 }, { "epoch": 0.9816584200648354, "grad_norm": 1.6351726212208255, "learning_rate": 8.81998853893784e-08, "loss": 0.1715, "step": 11507 }, { "epoch": 0.9817437297389524, "grad_norm": 1.9371725236338713, "learning_rate": 8.73815679012957e-08, "loss": 0.138, "step": 11508 }, { "epoch": 0.9818290394130694, "grad_norm": 1.5722330978535612, "learning_rate": 8.656706097639467e-08, "loss": 0.1369, "step": 11509 }, { "epoch": 0.9819143490871864, "grad_norm": 1.4580328313848905, "learning_rate": 8.575636467685888e-08, "loss": 0.1775, "step": 11510 }, { "epoch": 0.9819996587613036, "grad_norm": 1.8453115542390808, "learning_rate": 8.494947906458328e-08, "loss": 0.1542, "step": 11511 }, { "epoch": 0.9820849684354206, "grad_norm": 1.9906508559339864, "learning_rate": 8.414640420116305e-08, "loss": 0.1556, "step": 11512 }, { "epoch": 0.9821702781095376, "grad_norm": 1.954559165833913, "learning_rate": 8.334714014791578e-08, "loss": 0.16, "step": 11513 }, { "epoch": 0.9822555877836546, "grad_norm": 1.4782132402659334, "learning_rate": 8.25516869658538e-08, "loss": 0.1741, "step": 11514 }, { "epoch": 0.9823408974577718, "grad_norm": 1.5207759201848452, "learning_rate": 8.176004471571186e-08, "loss": 0.1709, "step": 11515 }, { "epoch": 0.9824262071318888, "grad_norm": 2.0802263233958636, "learning_rate": 8.097221345792493e-08, "loss": 0.182, "step": 11516 }, { "epoch": 0.9825115168060058, "grad_norm": 1.3774107693331628, "learning_rate": 8.018819325263937e-08, "loss": 0.187, "step": 11517 }, { "epoch": 0.9825968264801228, "grad_norm": 2.036243111895578, "learning_rate": 7.940798415971284e-08, "loss": 0.2021, "step": 11518 }, { "epoch": 0.9826821361542399, "grad_norm": 2.0474272703637606, "learning_rate": 7.86315862387088e-08, "loss": 0.1822, "step": 11519 }, { "epoch": 0.982767445828357, "grad_norm": 2.147328815498483, "learning_rate": 7.785899954890208e-08, "loss": 0.2191, "step": 11520 }, { "epoch": 0.982852755502474, "grad_norm": 1.72345830882316, "learning_rate": 7.709022414927325e-08, "loss": 0.1085, "step": 11521 }, { "epoch": 0.982938065176591, "grad_norm": 1.335705028630083, "learning_rate": 7.632526009851981e-08, "loss": 0.1919, "step": 11522 }, { "epoch": 0.9830233748507081, "grad_norm": 1.5786957416890914, "learning_rate": 7.556410745503395e-08, "loss": 0.1273, "step": 11523 }, { "epoch": 0.9831086845248251, "grad_norm": 2.130767736378797, "learning_rate": 7.480676627693029e-08, "loss": 0.1968, "step": 11524 }, { "epoch": 0.9831939941989422, "grad_norm": 2.0036186621654544, "learning_rate": 7.405323662202924e-08, "loss": 0.2051, "step": 11525 }, { "epoch": 0.9832793038730592, "grad_norm": 1.8393649310381779, "learning_rate": 7.3303518547857e-08, "loss": 0.1829, "step": 11526 }, { "epoch": 0.9833646135471763, "grad_norm": 1.3685678979283158, "learning_rate": 7.255761211165113e-08, "loss": 0.1154, "step": 11527 }, { "epoch": 0.9834499232212933, "grad_norm": 1.6785204004224672, "learning_rate": 7.181551737035497e-08, "loss": 0.137, "step": 11528 }, { "epoch": 0.9835352328954103, "grad_norm": 2.470550074002115, "learning_rate": 7.107723438062874e-08, "loss": 0.1534, "step": 11529 }, { "epoch": 0.9836205425695274, "grad_norm": 1.9512679159452613, "learning_rate": 7.034276319883293e-08, "loss": 0.1846, "step": 11530 }, { "epoch": 0.9837058522436444, "grad_norm": 1.6943674573797272, "learning_rate": 6.961210388104488e-08, "loss": 0.1253, "step": 11531 }, { "epoch": 0.9837911619177615, "grad_norm": 1.7689330582532792, "learning_rate": 6.888525648303667e-08, "loss": 0.1866, "step": 11532 }, { "epoch": 0.9838764715918785, "grad_norm": 1.5412352276145942, "learning_rate": 6.816222106030834e-08, "loss": 0.1522, "step": 11533 }, { "epoch": 0.9839617812659955, "grad_norm": 1.7024981543801228, "learning_rate": 6.744299766806017e-08, "loss": 0.2162, "step": 11534 }, { "epoch": 0.9840470909401126, "grad_norm": 2.108368018647808, "learning_rate": 6.67275863611927e-08, "loss": 0.1559, "step": 11535 }, { "epoch": 0.9841324006142297, "grad_norm": 1.508027214614209, "learning_rate": 6.601598719432889e-08, "loss": 0.1383, "step": 11536 }, { "epoch": 0.9842177102883467, "grad_norm": 1.7924656380268718, "learning_rate": 6.530820022179751e-08, "loss": 0.2114, "step": 11537 }, { "epoch": 0.9843030199624637, "grad_norm": 1.904275465624843, "learning_rate": 6.460422549763312e-08, "loss": 0.1755, "step": 11538 }, { "epoch": 0.9843883296365807, "grad_norm": 2.2839072441333266, "learning_rate": 6.390406307558161e-08, "loss": 0.1199, "step": 11539 }, { "epoch": 0.9844736393106979, "grad_norm": 1.3404311513468938, "learning_rate": 6.320771300908912e-08, "loss": 0.1178, "step": 11540 }, { "epoch": 0.9845589489848149, "grad_norm": 1.400832783420749, "learning_rate": 6.251517535132979e-08, "loss": 0.1217, "step": 11541 }, { "epoch": 0.9846442586589319, "grad_norm": 1.7392305332381337, "learning_rate": 6.182645015516131e-08, "loss": 0.0919, "step": 11542 }, { "epoch": 0.9847295683330489, "grad_norm": 1.8364057877052418, "learning_rate": 6.114153747318052e-08, "loss": 0.1569, "step": 11543 }, { "epoch": 0.984814878007166, "grad_norm": 2.084180685019324, "learning_rate": 6.046043735766783e-08, "loss": 0.2082, "step": 11544 }, { "epoch": 0.9849001876812831, "grad_norm": 1.7784479421182693, "learning_rate": 5.978314986061495e-08, "loss": 0.16, "step": 11545 }, { "epoch": 0.9849854973554001, "grad_norm": 2.055895664098481, "learning_rate": 5.9109675033741654e-08, "loss": 0.2287, "step": 11546 }, { "epoch": 0.9850708070295171, "grad_norm": 1.6621899882221047, "learning_rate": 5.844001292846235e-08, "loss": 0.1697, "step": 11547 }, { "epoch": 0.9851561167036342, "grad_norm": 1.9205778132661802, "learning_rate": 5.7774163595891716e-08, "loss": 0.199, "step": 11548 }, { "epoch": 0.9852414263777513, "grad_norm": 1.7410237483947097, "learning_rate": 5.7112127086877965e-08, "loss": 0.1661, "step": 11549 }, { "epoch": 0.9853267360518683, "grad_norm": 1.9405715345306118, "learning_rate": 5.6453903451952894e-08, "loss": 0.1259, "step": 11550 }, { "epoch": 0.9854120457259853, "grad_norm": 1.6956504098272769, "learning_rate": 5.579949274137075e-08, "loss": 0.1369, "step": 11551 }, { "epoch": 0.9854973554001024, "grad_norm": 1.7658409054161608, "learning_rate": 5.514889500509712e-08, "loss": 0.2026, "step": 11552 }, { "epoch": 0.9855826650742194, "grad_norm": 2.3971132480826394, "learning_rate": 5.450211029279784e-08, "loss": 0.208, "step": 11553 }, { "epoch": 0.9856679747483365, "grad_norm": 2.070128465066224, "learning_rate": 5.385913865385561e-08, "loss": 0.1716, "step": 11554 }, { "epoch": 0.9857532844224535, "grad_norm": 1.7487297897861598, "learning_rate": 5.321998013735341e-08, "loss": 0.1331, "step": 11555 }, { "epoch": 0.9858385940965706, "grad_norm": 1.6739790324284627, "learning_rate": 5.258463479208553e-08, "loss": 0.1536, "step": 11556 }, { "epoch": 0.9859239037706876, "grad_norm": 1.8657188864746985, "learning_rate": 5.195310266656317e-08, "loss": 0.1946, "step": 11557 }, { "epoch": 0.9860092134448046, "grad_norm": 1.957188643714489, "learning_rate": 5.1325383808997764e-08, "loss": 0.1602, "step": 11558 }, { "epoch": 0.9860945231189217, "grad_norm": 3.460981863869866, "learning_rate": 5.070147826731209e-08, "loss": 0.2907, "step": 11559 }, { "epoch": 0.9861798327930388, "grad_norm": 1.7066398538529672, "learning_rate": 5.008138608913471e-08, "loss": 0.1909, "step": 11560 }, { "epoch": 0.9862651424671558, "grad_norm": 1.4811860211554648, "learning_rate": 4.946510732181664e-08, "loss": 0.1584, "step": 11561 }, { "epoch": 0.9863504521412728, "grad_norm": 1.7321404160719738, "learning_rate": 4.885264201239248e-08, "loss": 0.1433, "step": 11562 }, { "epoch": 0.9864357618153898, "grad_norm": 1.9863654539188444, "learning_rate": 4.824399020763593e-08, "loss": 0.1491, "step": 11563 }, { "epoch": 0.986521071489507, "grad_norm": 1.5541587965751518, "learning_rate": 4.7639151954004254e-08, "loss": 0.1806, "step": 11564 }, { "epoch": 0.986606381163624, "grad_norm": 1.8626827662356318, "learning_rate": 4.70381272976772e-08, "loss": 0.1384, "step": 11565 }, { "epoch": 0.986691690837741, "grad_norm": 1.6169468060191354, "learning_rate": 4.644091628454028e-08, "loss": 0.1872, "step": 11566 }, { "epoch": 0.986777000511858, "grad_norm": 1.2300409451314331, "learning_rate": 4.5847518960184796e-08, "loss": 0.173, "step": 11567 }, { "epoch": 0.9868623101859751, "grad_norm": 2.1645145976877855, "learning_rate": 4.525793536991896e-08, "loss": 0.1491, "step": 11568 }, { "epoch": 0.9869476198600922, "grad_norm": 1.83972050928303, "learning_rate": 4.467216555874565e-08, "loss": 0.1518, "step": 11569 }, { "epoch": 0.9870329295342092, "grad_norm": 1.7562361367109505, "learning_rate": 4.409020957139576e-08, "loss": 0.2019, "step": 11570 }, { "epoch": 0.9871182392083262, "grad_norm": 1.632955079492769, "learning_rate": 4.351206745228931e-08, "loss": 0.192, "step": 11571 }, { "epoch": 0.9872035488824432, "grad_norm": 1.8693997353996497, "learning_rate": 4.293773924556321e-08, "loss": 0.2104, "step": 11572 }, { "epoch": 0.9872888585565603, "grad_norm": 2.499808494862422, "learning_rate": 4.236722499507684e-08, "loss": 0.2131, "step": 11573 }, { "epoch": 0.9873741682306774, "grad_norm": 1.819907278381882, "learning_rate": 4.180052474437313e-08, "loss": 0.1204, "step": 11574 }, { "epoch": 0.9874594779047944, "grad_norm": 2.103250952657513, "learning_rate": 4.1237638536728573e-08, "loss": 0.1088, "step": 11575 }, { "epoch": 0.9875447875789114, "grad_norm": 2.0573007189072667, "learning_rate": 4.0678566415103256e-08, "loss": 0.1933, "step": 11576 }, { "epoch": 0.9876300972530285, "grad_norm": 2.1691081034487474, "learning_rate": 4.012330842219081e-08, "loss": 0.1602, "step": 11577 }, { "epoch": 0.9877154069271455, "grad_norm": 1.902233457403387, "learning_rate": 3.957186460037399e-08, "loss": 0.1485, "step": 11578 }, { "epoch": 0.9878007166012626, "grad_norm": 1.1004276788043303, "learning_rate": 3.9024234991758004e-08, "loss": 0.1151, "step": 11579 }, { "epoch": 0.9878860262753796, "grad_norm": 2.1359085136852207, "learning_rate": 3.848041963814275e-08, "loss": 0.1421, "step": 11580 }, { "epoch": 0.9879713359494967, "grad_norm": 1.7712285741399882, "learning_rate": 3.794041858106168e-08, "loss": 0.1659, "step": 11581 }, { "epoch": 0.9880566456236137, "grad_norm": 1.8324347663707297, "learning_rate": 3.7404231861726255e-08, "loss": 0.1757, "step": 11582 }, { "epoch": 0.9881419552977307, "grad_norm": 2.09747927467919, "learning_rate": 3.687185952107597e-08, "loss": 0.1998, "step": 11583 }, { "epoch": 0.9882272649718478, "grad_norm": 2.259068440049436, "learning_rate": 3.6343301599756074e-08, "loss": 0.1952, "step": 11584 }, { "epoch": 0.9883125746459649, "grad_norm": 1.850347385783097, "learning_rate": 3.5818558138123184e-08, "loss": 0.1774, "step": 11585 }, { "epoch": 0.9883978843200819, "grad_norm": 1.69016084345775, "learning_rate": 3.5297629176228587e-08, "loss": 0.2475, "step": 11586 }, { "epoch": 0.9884831939941989, "grad_norm": 1.3624037402385591, "learning_rate": 3.478051475385158e-08, "loss": 0.1727, "step": 11587 }, { "epoch": 0.988568503668316, "grad_norm": 2.1954911454573196, "learning_rate": 3.426721491046059e-08, "loss": 0.0997, "step": 11588 }, { "epoch": 0.9886538133424331, "grad_norm": 1.6427744998640454, "learning_rate": 3.375772968525759e-08, "loss": 0.1538, "step": 11589 }, { "epoch": 0.9887391230165501, "grad_norm": 1.7891315296965171, "learning_rate": 3.325205911712814e-08, "loss": 0.2257, "step": 11590 }, { "epoch": 0.9888244326906671, "grad_norm": 2.3947560638883005, "learning_rate": 3.275020324468026e-08, "loss": 0.1813, "step": 11591 }, { "epoch": 0.9889097423647841, "grad_norm": 1.6452476408981662, "learning_rate": 3.225216210623327e-08, "loss": 0.1342, "step": 11592 }, { "epoch": 0.9889950520389013, "grad_norm": 1.9131374069853404, "learning_rate": 3.175793573980124e-08, "loss": 0.1227, "step": 11593 }, { "epoch": 0.9890803617130183, "grad_norm": 2.2119922076772673, "learning_rate": 3.126752418312062e-08, "loss": 0.2218, "step": 11594 }, { "epoch": 0.9891656713871353, "grad_norm": 1.6993274873164295, "learning_rate": 3.07809274736337e-08, "loss": 0.1592, "step": 11595 }, { "epoch": 0.9892509810612523, "grad_norm": 1.5041210467041632, "learning_rate": 3.029814564848299e-08, "loss": 0.1184, "step": 11596 }, { "epoch": 0.9893362907353694, "grad_norm": 1.3812663167964985, "learning_rate": 2.981917874453344e-08, "loss": 0.1367, "step": 11597 }, { "epoch": 0.9894216004094865, "grad_norm": 2.008951895442169, "learning_rate": 2.9344026798344692e-08, "loss": 0.1598, "step": 11598 }, { "epoch": 0.9895069100836035, "grad_norm": 1.6321283394661854, "learning_rate": 2.887268984619884e-08, "loss": 0.1803, "step": 11599 }, { "epoch": 0.9895922197577205, "grad_norm": 2.3340362670748087, "learning_rate": 2.840516792407266e-08, "loss": 0.2107, "step": 11600 }, { "epoch": 0.9896775294318376, "grad_norm": 1.8092495942346598, "learning_rate": 2.7941461067665376e-08, "loss": 0.1536, "step": 11601 }, { "epoch": 0.9897628391059546, "grad_norm": 1.5108341911525294, "learning_rate": 2.7481569312381995e-08, "loss": 0.1672, "step": 11602 }, { "epoch": 0.9898481487800717, "grad_norm": 2.0720068565976906, "learning_rate": 2.702549269332222e-08, "loss": 0.2043, "step": 11603 }, { "epoch": 0.9899334584541887, "grad_norm": 1.4955443327112494, "learning_rate": 2.6573231245308196e-08, "loss": 0.163, "step": 11604 }, { "epoch": 0.9900187681283058, "grad_norm": 2.597211527221564, "learning_rate": 2.6124785002867857e-08, "loss": 0.2075, "step": 11605 }, { "epoch": 0.9901040778024228, "grad_norm": 2.0749610391658915, "learning_rate": 2.568015400024604e-08, "loss": 0.1993, "step": 11606 }, { "epoch": 0.9901893874765398, "grad_norm": 1.7538329790948712, "learning_rate": 2.52393382713767e-08, "loss": 0.1796, "step": 11607 }, { "epoch": 0.9902746971506569, "grad_norm": 1.8056805673343166, "learning_rate": 2.4802337849921807e-08, "loss": 0.1573, "step": 11608 }, { "epoch": 0.9903600068247739, "grad_norm": 1.9986327981850995, "learning_rate": 2.4369152769238014e-08, "loss": 0.2273, "step": 11609 }, { "epoch": 0.990445316498891, "grad_norm": 1.9571444236940034, "learning_rate": 2.3939783062398857e-08, "loss": 0.224, "step": 11610 }, { "epoch": 0.990530626173008, "grad_norm": 1.7863584514607092, "learning_rate": 2.3514228762183676e-08, "loss": 0.142, "step": 11611 }, { "epoch": 0.990615935847125, "grad_norm": 1.2654439066614835, "learning_rate": 2.3092489901083148e-08, "loss": 0.1522, "step": 11612 }, { "epoch": 0.9907012455212421, "grad_norm": 1.6006596695475477, "learning_rate": 2.2674566511293737e-08, "loss": 0.2552, "step": 11613 }, { "epoch": 0.9907865551953592, "grad_norm": 1.9880992154053605, "learning_rate": 2.2260458624723257e-08, "loss": 0.211, "step": 11614 }, { "epoch": 0.9908718648694762, "grad_norm": 1.4871736344767015, "learning_rate": 2.1850166272985306e-08, "loss": 0.1638, "step": 11615 }, { "epoch": 0.9909571745435932, "grad_norm": 2.4021611338111706, "learning_rate": 2.1443689487404827e-08, "loss": 0.2548, "step": 11616 }, { "epoch": 0.9910424842177102, "grad_norm": 2.1834910032498858, "learning_rate": 2.1041028299012555e-08, "loss": 0.1762, "step": 11617 }, { "epoch": 0.9911277938918274, "grad_norm": 1.7883565917669793, "learning_rate": 2.0642182738545013e-08, "loss": 0.1972, "step": 11618 }, { "epoch": 0.9912131035659444, "grad_norm": 1.5623726602163388, "learning_rate": 2.024715283646117e-08, "loss": 0.2217, "step": 11619 }, { "epoch": 0.9912984132400614, "grad_norm": 1.7589754453835216, "learning_rate": 1.9855938622914683e-08, "loss": 0.0995, "step": 11620 }, { "epoch": 0.9913837229141784, "grad_norm": 2.674551977982194, "learning_rate": 1.9468540127770552e-08, "loss": 0.1586, "step": 11621 }, { "epoch": 0.9914690325882956, "grad_norm": 1.8114773726454452, "learning_rate": 1.908495738061067e-08, "loss": 0.1475, "step": 11622 }, { "epoch": 0.9915543422624126, "grad_norm": 2.4733628262340592, "learning_rate": 1.8705190410717166e-08, "loss": 0.2581, "step": 11623 }, { "epoch": 0.9916396519365296, "grad_norm": 1.6309896268625836, "learning_rate": 1.8329239247077967e-08, "loss": 0.128, "step": 11624 }, { "epoch": 0.9917249616106466, "grad_norm": 1.519296819259557, "learning_rate": 1.7957103918397888e-08, "loss": 0.1884, "step": 11625 }, { "epoch": 0.9918102712847637, "grad_norm": 1.7733649688542399, "learning_rate": 1.7588784453093088e-08, "loss": 0.1916, "step": 11626 }, { "epoch": 0.9918955809588808, "grad_norm": 2.0625478565559385, "learning_rate": 1.7224280879279964e-08, "loss": 0.1872, "step": 11627 }, { "epoch": 0.9919808906329978, "grad_norm": 1.325056593064235, "learning_rate": 1.6863593224780704e-08, "loss": 0.1505, "step": 11628 }, { "epoch": 0.9920662003071148, "grad_norm": 1.7166602174705667, "learning_rate": 1.6506721517134394e-08, "loss": 0.2001, "step": 11629 }, { "epoch": 0.9921515099812319, "grad_norm": 1.6553938470217062, "learning_rate": 1.6153665783591453e-08, "loss": 0.1931, "step": 11630 }, { "epoch": 0.9922368196553489, "grad_norm": 2.1033394827453065, "learning_rate": 1.580442605110255e-08, "loss": 0.1877, "step": 11631 }, { "epoch": 0.992322129329466, "grad_norm": 1.3208461788928711, "learning_rate": 1.5459002346324135e-08, "loss": 0.1303, "step": 11632 }, { "epoch": 0.992407439003583, "grad_norm": 1.8873463726049167, "learning_rate": 1.5117394695640663e-08, "loss": 0.1729, "step": 11633 }, { "epoch": 0.9924927486777001, "grad_norm": 2.269635354049267, "learning_rate": 1.4779603125120166e-08, "loss": 0.1456, "step": 11634 }, { "epoch": 0.9925780583518171, "grad_norm": 1.6016914129710673, "learning_rate": 1.444562766055868e-08, "loss": 0.1983, "step": 11635 }, { "epoch": 0.9926633680259341, "grad_norm": 0.983047074068788, "learning_rate": 1.4115468327446923e-08, "loss": 0.1325, "step": 11636 }, { "epoch": 0.9927486777000512, "grad_norm": 1.54463796984957, "learning_rate": 1.3789125150998061e-08, "loss": 0.1538, "step": 11637 }, { "epoch": 0.9928339873741683, "grad_norm": 2.0759338070160283, "learning_rate": 1.3466598156125498e-08, "loss": 0.1789, "step": 11638 }, { "epoch": 0.9929192970482853, "grad_norm": 1.4314393530445857, "learning_rate": 1.314788736744288e-08, "loss": 0.2343, "step": 11639 }, { "epoch": 0.9930046067224023, "grad_norm": 1.4358899595341954, "learning_rate": 1.2832992809291843e-08, "loss": 0.216, "step": 11640 }, { "epoch": 0.9930899163965193, "grad_norm": 2.4578654172600243, "learning_rate": 1.2521914505714272e-08, "loss": 0.1395, "step": 11641 }, { "epoch": 0.9931752260706365, "grad_norm": 2.253770106813571, "learning_rate": 1.2214652480452282e-08, "loss": 0.2145, "step": 11642 }, { "epoch": 0.9932605357447535, "grad_norm": 2.3773733627977194, "learning_rate": 1.1911206756964888e-08, "loss": 0.1516, "step": 11643 }, { "epoch": 0.9933458454188705, "grad_norm": 1.9609383510876515, "learning_rate": 1.1611577358422442e-08, "loss": 0.2431, "step": 11644 }, { "epoch": 0.9934311550929875, "grad_norm": 2.28405547405691, "learning_rate": 1.1315764307695542e-08, "loss": 0.1521, "step": 11645 }, { "epoch": 0.9935164647671045, "grad_norm": 1.5206507307541852, "learning_rate": 1.1023767627377224e-08, "loss": 0.1114, "step": 11646 }, { "epoch": 0.9936017744412217, "grad_norm": 1.6323276511751603, "learning_rate": 1.0735587339749665e-08, "loss": 0.1816, "step": 11647 }, { "epoch": 0.9936870841153387, "grad_norm": 2.1703448497099713, "learning_rate": 1.0451223466811933e-08, "loss": 0.1911, "step": 11648 }, { "epoch": 0.9937723937894557, "grad_norm": 1.2818958236513223, "learning_rate": 1.0170676030285542e-08, "loss": 0.1656, "step": 11649 }, { "epoch": 0.9938577034635727, "grad_norm": 1.963498817468748, "learning_rate": 9.893945051581143e-09, "loss": 0.1732, "step": 11650 }, { "epoch": 0.9939430131376898, "grad_norm": 1.6978323355796916, "learning_rate": 9.621030551826282e-09, "loss": 0.2014, "step": 11651 }, { "epoch": 0.9940283228118069, "grad_norm": 2.175492930185784, "learning_rate": 9.351932551854292e-09, "loss": 0.1953, "step": 11652 }, { "epoch": 0.9941136324859239, "grad_norm": 1.7040671446201947, "learning_rate": 9.086651072215402e-09, "loss": 0.166, "step": 11653 }, { "epoch": 0.9941989421600409, "grad_norm": 1.3955154578905118, "learning_rate": 8.825186133160079e-09, "loss": 0.2272, "step": 11654 }, { "epoch": 0.994284251834158, "grad_norm": 3.232219917926278, "learning_rate": 8.567537754650135e-09, "loss": 0.1619, "step": 11655 }, { "epoch": 0.994369561508275, "grad_norm": 1.349040064551146, "learning_rate": 8.313705956347618e-09, "loss": 0.2098, "step": 11656 }, { "epoch": 0.9944548711823921, "grad_norm": 2.059851824792283, "learning_rate": 8.063690757642572e-09, "loss": 0.1801, "step": 11657 }, { "epoch": 0.9945401808565091, "grad_norm": 2.3465425258370076, "learning_rate": 7.817492177619735e-09, "loss": 0.1575, "step": 11658 }, { "epoch": 0.9946254905306262, "grad_norm": 2.3436098183989578, "learning_rate": 7.575110235069626e-09, "loss": 0.1327, "step": 11659 }, { "epoch": 0.9947108002047432, "grad_norm": 1.920350190949749, "learning_rate": 7.33654494850522e-09, "loss": 0.1829, "step": 11660 }, { "epoch": 0.9947961098788602, "grad_norm": 2.452699466413769, "learning_rate": 7.101796336128619e-09, "loss": 0.2022, "step": 11661 }, { "epoch": 0.9948814195529773, "grad_norm": 2.041436110992375, "learning_rate": 6.8708644158754775e-09, "loss": 0.1315, "step": 11662 }, { "epoch": 0.9949667292270944, "grad_norm": 1.914618648774791, "learning_rate": 6.6437492053594844e-09, "loss": 0.1492, "step": 11663 }, { "epoch": 0.9950520389012114, "grad_norm": 1.4422049441785947, "learning_rate": 6.420450721933424e-09, "loss": 0.1698, "step": 11664 }, { "epoch": 0.9951373485753284, "grad_norm": 1.9709286820598648, "learning_rate": 6.200968982644773e-09, "loss": 0.2145, "step": 11665 }, { "epoch": 0.9952226582494454, "grad_norm": 1.9790705272681024, "learning_rate": 5.985304004241243e-09, "loss": 0.1514, "step": 11666 }, { "epoch": 0.9953079679235626, "grad_norm": 2.672314077963239, "learning_rate": 5.773455803187444e-09, "loss": 0.2021, "step": 11667 }, { "epoch": 0.9953932775976796, "grad_norm": 1.792893285555301, "learning_rate": 5.565424395670427e-09, "loss": 0.1752, "step": 11668 }, { "epoch": 0.9954785872717966, "grad_norm": 1.481572285933442, "learning_rate": 5.3612097975552775e-09, "loss": 0.162, "step": 11669 }, { "epoch": 0.9955638969459136, "grad_norm": 1.6005244170509914, "learning_rate": 5.160812024446182e-09, "loss": 0.1852, "step": 11670 }, { "epoch": 0.9956492066200308, "grad_norm": 1.4178599586472582, "learning_rate": 4.964231091630911e-09, "loss": 0.1259, "step": 11671 }, { "epoch": 0.9957345162941478, "grad_norm": 1.4213665063456815, "learning_rate": 4.771467014125231e-09, "loss": 0.1248, "step": 11672 }, { "epoch": 0.9958198259682648, "grad_norm": 1.91365875345541, "learning_rate": 4.582519806645147e-09, "loss": 0.1964, "step": 11673 }, { "epoch": 0.9959051356423818, "grad_norm": 2.4015077702596916, "learning_rate": 4.397389483618009e-09, "loss": 0.1772, "step": 11674 }, { "epoch": 0.9959904453164989, "grad_norm": 2.007947390288891, "learning_rate": 4.2160760591658525e-09, "loss": 0.2135, "step": 11675 }, { "epoch": 0.996075754990616, "grad_norm": 1.212257404244205, "learning_rate": 4.038579547144261e-09, "loss": 0.1504, "step": 11676 }, { "epoch": 0.996161064664733, "grad_norm": 1.5354507590882212, "learning_rate": 3.864899961097956e-09, "loss": 0.1587, "step": 11677 }, { "epoch": 0.99624637433885, "grad_norm": 2.0113095950726843, "learning_rate": 3.695037314288552e-09, "loss": 0.2101, "step": 11678 }, { "epoch": 0.9963316840129671, "grad_norm": 2.4099391054239954, "learning_rate": 3.528991619683453e-09, "loss": 0.1972, "step": 11679 }, { "epoch": 0.9964169936870841, "grad_norm": 1.4553522248648914, "learning_rate": 3.3667628899558545e-09, "loss": 0.1725, "step": 11680 }, { "epoch": 0.9965023033612012, "grad_norm": 1.2124550447290372, "learning_rate": 3.2083511374958465e-09, "loss": 0.1729, "step": 11681 }, { "epoch": 0.9965876130353182, "grad_norm": 1.9204662711497062, "learning_rate": 3.053756374393757e-09, "loss": 0.201, "step": 11682 }, { "epoch": 0.9966729227094353, "grad_norm": 1.332700499717863, "learning_rate": 2.902978612456808e-09, "loss": 0.1604, "step": 11683 }, { "epoch": 0.9967582323835523, "grad_norm": 1.5188065004239284, "learning_rate": 2.7560178631869103e-09, "loss": 0.1731, "step": 11684 }, { "epoch": 0.9968435420576693, "grad_norm": 2.553911563731232, "learning_rate": 2.61287413781397e-09, "loss": 0.1451, "step": 11685 }, { "epoch": 0.9969288517317864, "grad_norm": 1.6532502269366882, "learning_rate": 2.4735474472625806e-09, "loss": 0.1852, "step": 11686 }, { "epoch": 0.9970141614059034, "grad_norm": 1.5948971378557855, "learning_rate": 2.338037802174231e-09, "loss": 0.1816, "step": 11687 }, { "epoch": 0.9970994710800205, "grad_norm": 3.0858148091709947, "learning_rate": 2.206345212879546e-09, "loss": 0.2044, "step": 11688 }, { "epoch": 0.9971847807541375, "grad_norm": 1.4604174662473106, "learning_rate": 2.078469689448248e-09, "loss": 0.1506, "step": 11689 }, { "epoch": 0.9972700904282545, "grad_norm": 2.832512923747357, "learning_rate": 1.954411241639198e-09, "loss": 0.2085, "step": 11690 }, { "epoch": 0.9973554001023716, "grad_norm": 1.5428265856184278, "learning_rate": 1.834169878917047e-09, "loss": 0.1626, "step": 11691 }, { "epoch": 0.9974407097764887, "grad_norm": 1.6077611294057041, "learning_rate": 1.7177456104688905e-09, "loss": 0.1439, "step": 11692 }, { "epoch": 0.9975260194506057, "grad_norm": 2.1330420960869945, "learning_rate": 1.6051384451765128e-09, "loss": 0.2151, "step": 11693 }, { "epoch": 0.9976113291247227, "grad_norm": 2.3823992261358864, "learning_rate": 1.4963483916441424e-09, "loss": 0.2375, "step": 11694 }, { "epoch": 0.9976966387988397, "grad_norm": 1.7635292510939848, "learning_rate": 1.3913754581762473e-09, "loss": 0.1426, "step": 11695 }, { "epoch": 0.9977819484729569, "grad_norm": 2.6698932994996056, "learning_rate": 1.2902196527775356e-09, "loss": 0.2231, "step": 11696 }, { "epoch": 0.9978672581470739, "grad_norm": 1.3860819899320624, "learning_rate": 1.1928809831807108e-09, "loss": 0.2046, "step": 11697 }, { "epoch": 0.9979525678211909, "grad_norm": 2.3290053373728514, "learning_rate": 1.099359456818716e-09, "loss": 0.1004, "step": 11698 }, { "epoch": 0.9980378774953079, "grad_norm": 1.7684337589367927, "learning_rate": 1.0096550808191828e-09, "loss": 0.1214, "step": 11699 }, { "epoch": 0.998123187169425, "grad_norm": 1.829174099373398, "learning_rate": 9.2376786204329e-10, "loss": 0.1827, "step": 11700 }, { "epoch": 0.9982084968435421, "grad_norm": 1.7715534167310052, "learning_rate": 8.416978070413529e-10, "loss": 0.1469, "step": 11701 }, { "epoch": 0.9982938065176591, "grad_norm": 1.5960598830459196, "learning_rate": 7.634449220805806e-10, "loss": 0.1689, "step": 11702 }, { "epoch": 0.9983791161917761, "grad_norm": 1.7408880339788042, "learning_rate": 6.890092131339732e-10, "loss": 0.1742, "step": 11703 }, { "epoch": 0.9984644258658932, "grad_norm": 1.674430534008567, "learning_rate": 6.183906858858723e-10, "loss": 0.1852, "step": 11704 }, { "epoch": 0.9985497355400103, "grad_norm": 1.5614855715780989, "learning_rate": 5.515893457264109e-10, "loss": 0.1297, "step": 11705 }, { "epoch": 0.9986350452141273, "grad_norm": 1.8135635938492884, "learning_rate": 4.886051977626149e-10, "loss": 0.1333, "step": 11706 }, { "epoch": 0.9987203548882443, "grad_norm": 1.7238130383379164, "learning_rate": 4.294382467906477e-10, "loss": 0.1669, "step": 11707 }, { "epoch": 0.9988056645623614, "grad_norm": 1.604323226753156, "learning_rate": 3.7408849733466813e-10, "loss": 0.1641, "step": 11708 }, { "epoch": 0.9988909742364784, "grad_norm": 1.9371573852817334, "learning_rate": 3.2255595361907475e-10, "loss": 0.1704, "step": 11709 }, { "epoch": 0.9989762839105955, "grad_norm": 1.9118266846461904, "learning_rate": 2.748406195796083e-10, "loss": 0.1763, "step": 11710 }, { "epoch": 0.9990615935847125, "grad_norm": 1.8683903223625404, "learning_rate": 2.3094249885780016e-10, "loss": 0.1331, "step": 11711 }, { "epoch": 0.9991469032588296, "grad_norm": 1.865484762112669, "learning_rate": 1.9086159480097287e-10, "loss": 0.1502, "step": 11712 }, { "epoch": 0.9992322129329466, "grad_norm": 1.4362576963054134, "learning_rate": 1.5459791047889305e-10, "loss": 0.1435, "step": 11713 }, { "epoch": 0.9993175226070636, "grad_norm": 1.1415880918727812, "learning_rate": 1.221514486504649e-10, "loss": 0.1056, "step": 11714 }, { "epoch": 0.9994028322811807, "grad_norm": 1.3717853073656952, "learning_rate": 9.35222117970369e-11, "loss": 0.1295, "step": 11715 }, { "epoch": 0.9994881419552978, "grad_norm": 1.3068893948477815, "learning_rate": 6.871020210574841e-11, "loss": 0.1355, "step": 11716 }, { "epoch": 0.9995734516294148, "grad_norm": 1.6665547651236217, "learning_rate": 4.771542146952967e-11, "loss": 0.2112, "step": 11717 }, { "epoch": 0.9996587613035318, "grad_norm": 1.9498945452223289, "learning_rate": 3.053787148710185e-11, "loss": 0.1962, "step": 11718 }, { "epoch": 0.9997440709776488, "grad_norm": 2.5822525737512154, "learning_rate": 1.717755347963035e-11, "loss": 0.2002, "step": 11719 }, { "epoch": 0.999829380651766, "grad_norm": 1.6951234773654764, "learning_rate": 7.634468457418109e-12, "loss": 0.1743, "step": 11720 }, { "epoch": 0.999914690325883, "grad_norm": 2.6265397778310553, "learning_rate": 1.908617147661218e-12, "loss": 0.1708, "step": 11721 }, { "epoch": 1.0, "grad_norm": 1.9556974026622904, "learning_rate": 0.0, "loss": 0.1204, "step": 11722 }, { "epoch": 1.0, "step": 11722, "total_flos": 2.204684751559092e+19, "train_loss": 0.2725867780617959, "train_runtime": 52680.0383, "train_samples_per_second": 3.56, "train_steps_per_second": 0.223 } ], "logging_steps": 1.0, "max_steps": 11722, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 2.204684751559092e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }