{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999619245852953, "eval_steps": 500, "global_step": 39394, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.076721960630021e-05, "grad_norm": 0.15864953162799908, "learning_rate": 2.538071065989848e-07, "loss": 1.3239, "step": 1 }, { "epoch": 0.00025383609803150106, "grad_norm": 0.14905968005880138, "learning_rate": 1.2690355329949238e-06, "loss": 1.329, "step": 5 }, { "epoch": 0.0005076721960630021, "grad_norm": 0.1636764111930458, "learning_rate": 2.5380710659898476e-06, "loss": 1.3501, "step": 10 }, { "epoch": 0.0007615082940945031, "grad_norm": 0.1541710477727859, "learning_rate": 3.807106598984772e-06, "loss": 1.3238, "step": 15 }, { "epoch": 0.0010153443921260042, "grad_norm": 0.14176909931295556, "learning_rate": 5.076142131979695e-06, "loss": 1.3062, "step": 20 }, { "epoch": 0.0012691804901575053, "grad_norm": 0.1439438019587985, "learning_rate": 6.345177664974619e-06, "loss": 1.3475, "step": 25 }, { "epoch": 0.0015230165881890063, "grad_norm": 0.14253456248928092, "learning_rate": 7.614213197969544e-06, "loss": 1.3078, "step": 30 }, { "epoch": 0.0017768526862205075, "grad_norm": 0.12204437848030636, "learning_rate": 8.883248730964468e-06, "loss": 1.2961, "step": 35 }, { "epoch": 0.0020306887842520085, "grad_norm": 0.11799978081069029, "learning_rate": 1.015228426395939e-05, "loss": 1.3221, "step": 40 }, { "epoch": 0.0022845248822835097, "grad_norm": 0.10930209965571382, "learning_rate": 1.1421319796954315e-05, "loss": 1.2811, "step": 45 }, { "epoch": 0.0025383609803150105, "grad_norm": 0.10308663751695503, "learning_rate": 1.2690355329949238e-05, "loss": 1.3237, "step": 50 }, { "epoch": 0.0027921970783465117, "grad_norm": 0.09409236832464776, "learning_rate": 1.3959390862944163e-05, "loss": 1.2774, "step": 55 }, { "epoch": 0.0030460331763780125, "grad_norm": 0.10483953760283637, "learning_rate": 1.5228426395939088e-05, "loss": 1.3049, "step": 60 }, { "epoch": 0.0032998692744095138, "grad_norm": 0.10993593164714496, "learning_rate": 1.6497461928934012e-05, "loss": 1.2047, "step": 65 }, { "epoch": 0.003553705372441015, "grad_norm": 0.09323304332570757, "learning_rate": 1.7766497461928935e-05, "loss": 1.2394, "step": 70 }, { "epoch": 0.0038075414704725158, "grad_norm": 0.0972520217773221, "learning_rate": 1.9035532994923858e-05, "loss": 1.2003, "step": 75 }, { "epoch": 0.004061377568504017, "grad_norm": 0.09137245494214241, "learning_rate": 2.030456852791878e-05, "loss": 1.2237, "step": 80 }, { "epoch": 0.004315213666535518, "grad_norm": 0.08933550138779918, "learning_rate": 2.1573604060913704e-05, "loss": 1.2082, "step": 85 }, { "epoch": 0.0045690497645670194, "grad_norm": 0.09208543321006701, "learning_rate": 2.284263959390863e-05, "loss": 1.19, "step": 90 }, { "epoch": 0.00482288586259852, "grad_norm": 0.08961688727512934, "learning_rate": 2.4111675126903553e-05, "loss": 1.2124, "step": 95 }, { "epoch": 0.005076721960630021, "grad_norm": 0.09273865800371339, "learning_rate": 2.5380710659898476e-05, "loss": 1.1408, "step": 100 }, { "epoch": 0.005330558058661523, "grad_norm": 0.092107871361099, "learning_rate": 2.6649746192893403e-05, "loss": 1.1855, "step": 105 }, { "epoch": 0.0055843941566930235, "grad_norm": 0.08602383660420931, "learning_rate": 2.7918781725888326e-05, "loss": 1.1873, "step": 110 }, { "epoch": 0.005838230254724524, "grad_norm": 0.07929380350508854, "learning_rate": 2.918781725888325e-05, "loss": 1.1993, "step": 115 }, { "epoch": 0.006092066352756025, "grad_norm": 0.08325591539156868, "learning_rate": 3.0456852791878175e-05, "loss": 1.1301, "step": 120 }, { "epoch": 0.006345902450787527, "grad_norm": 0.08227812313820977, "learning_rate": 3.17258883248731e-05, "loss": 1.1456, "step": 125 }, { "epoch": 0.0065997385488190275, "grad_norm": 0.0884047464153757, "learning_rate": 3.2994923857868024e-05, "loss": 1.1644, "step": 130 }, { "epoch": 0.006853574646850528, "grad_norm": 0.09724548627073343, "learning_rate": 3.4263959390862944e-05, "loss": 1.1114, "step": 135 }, { "epoch": 0.00710741074488203, "grad_norm": 0.09124965712938661, "learning_rate": 3.553299492385787e-05, "loss": 1.1665, "step": 140 }, { "epoch": 0.007361246842913531, "grad_norm": 0.08194520059165367, "learning_rate": 3.680203045685279e-05, "loss": 1.1483, "step": 145 }, { "epoch": 0.0076150829409450315, "grad_norm": 0.07766910265897527, "learning_rate": 3.8071065989847716e-05, "loss": 1.1598, "step": 150 }, { "epoch": 0.007868919038976532, "grad_norm": 0.07806961902430061, "learning_rate": 3.934010152284264e-05, "loss": 1.1198, "step": 155 }, { "epoch": 0.008122755137008034, "grad_norm": 0.07852352422152742, "learning_rate": 4.060913705583756e-05, "loss": 1.1422, "step": 160 }, { "epoch": 0.008376591235039536, "grad_norm": 0.08226162591912231, "learning_rate": 4.187817258883249e-05, "loss": 1.1221, "step": 165 }, { "epoch": 0.008630427333071036, "grad_norm": 0.0835786517073031, "learning_rate": 4.314720812182741e-05, "loss": 1.0941, "step": 170 }, { "epoch": 0.008884263431102537, "grad_norm": 0.09764129750405323, "learning_rate": 4.4416243654822335e-05, "loss": 1.1005, "step": 175 }, { "epoch": 0.009138099529134039, "grad_norm": 0.08226060265287585, "learning_rate": 4.568527918781726e-05, "loss": 1.1361, "step": 180 }, { "epoch": 0.009391935627165539, "grad_norm": 0.08031312286175114, "learning_rate": 4.695431472081219e-05, "loss": 1.108, "step": 185 }, { "epoch": 0.00964577172519704, "grad_norm": 0.08841791867770428, "learning_rate": 4.822335025380711e-05, "loss": 1.1259, "step": 190 }, { "epoch": 0.009899607823228542, "grad_norm": 0.08361442662348433, "learning_rate": 4.949238578680203e-05, "loss": 1.0744, "step": 195 }, { "epoch": 0.010153443921260042, "grad_norm": 0.0815282193734842, "learning_rate": 5.076142131979695e-05, "loss": 1.121, "step": 200 }, { "epoch": 0.010407280019291544, "grad_norm": 0.07467113655567367, "learning_rate": 5.2030456852791886e-05, "loss": 1.1022, "step": 205 }, { "epoch": 0.010661116117323045, "grad_norm": 0.08367624677037853, "learning_rate": 5.3299492385786806e-05, "loss": 1.0833, "step": 210 }, { "epoch": 0.010914952215354545, "grad_norm": 0.07524274626931184, "learning_rate": 5.4568527918781725e-05, "loss": 1.0565, "step": 215 }, { "epoch": 0.011168788313386047, "grad_norm": 0.08144443622976084, "learning_rate": 5.583756345177665e-05, "loss": 1.0954, "step": 220 }, { "epoch": 0.011422624411417547, "grad_norm": 0.09194259701159102, "learning_rate": 5.710659898477157e-05, "loss": 1.071, "step": 225 }, { "epoch": 0.011676460509449049, "grad_norm": 0.08155974427947957, "learning_rate": 5.83756345177665e-05, "loss": 1.1122, "step": 230 }, { "epoch": 0.01193029660748055, "grad_norm": 0.07658495606309469, "learning_rate": 5.9644670050761424e-05, "loss": 1.09, "step": 235 }, { "epoch": 0.01218413270551205, "grad_norm": 0.07434820552889032, "learning_rate": 6.091370558375635e-05, "loss": 1.0864, "step": 240 }, { "epoch": 0.012437968803543552, "grad_norm": 0.08079926865711588, "learning_rate": 6.218274111675127e-05, "loss": 1.0949, "step": 245 }, { "epoch": 0.012691804901575053, "grad_norm": 0.08262137560889006, "learning_rate": 6.34517766497462e-05, "loss": 1.105, "step": 250 }, { "epoch": 0.012945640999606553, "grad_norm": 0.0854636147439845, "learning_rate": 6.472081218274112e-05, "loss": 1.0643, "step": 255 }, { "epoch": 0.013199477097638055, "grad_norm": 0.08474175077392633, "learning_rate": 6.598984771573605e-05, "loss": 1.0722, "step": 260 }, { "epoch": 0.013453313195669557, "grad_norm": 0.08552599475386766, "learning_rate": 6.725888324873096e-05, "loss": 1.0786, "step": 265 }, { "epoch": 0.013707149293701057, "grad_norm": 0.08726991747767048, "learning_rate": 6.852791878172589e-05, "loss": 1.0849, "step": 270 }, { "epoch": 0.013960985391732558, "grad_norm": 0.08813340694786906, "learning_rate": 6.979695431472081e-05, "loss": 1.0643, "step": 275 }, { "epoch": 0.01421482148976406, "grad_norm": 0.09347732952876346, "learning_rate": 7.106598984771574e-05, "loss": 1.0523, "step": 280 }, { "epoch": 0.01446865758779556, "grad_norm": 0.08348338921493421, "learning_rate": 7.233502538071065e-05, "loss": 1.0793, "step": 285 }, { "epoch": 0.014722493685827061, "grad_norm": 0.07853436298403037, "learning_rate": 7.360406091370558e-05, "loss": 1.0541, "step": 290 }, { "epoch": 0.014976329783858563, "grad_norm": 0.0794279603998058, "learning_rate": 7.48730964467005e-05, "loss": 1.0579, "step": 295 }, { "epoch": 0.015230165881890063, "grad_norm": 0.08347965495576219, "learning_rate": 7.614213197969543e-05, "loss": 1.0461, "step": 300 }, { "epoch": 0.015484001979921565, "grad_norm": 0.07793629997426954, "learning_rate": 7.741116751269036e-05, "loss": 1.0778, "step": 305 }, { "epoch": 0.015737838077953065, "grad_norm": 0.08768562720374157, "learning_rate": 7.868020304568529e-05, "loss": 1.0419, "step": 310 }, { "epoch": 0.015991674175984568, "grad_norm": 0.07869048574095998, "learning_rate": 7.994923857868021e-05, "loss": 1.019, "step": 315 }, { "epoch": 0.016245510274016068, "grad_norm": 0.08027684071782347, "learning_rate": 8.121827411167512e-05, "loss": 1.0398, "step": 320 }, { "epoch": 0.016499346372047568, "grad_norm": 0.07806575846645758, "learning_rate": 8.248730964467005e-05, "loss": 1.0207, "step": 325 }, { "epoch": 0.01675318247007907, "grad_norm": 0.08512530192784429, "learning_rate": 8.375634517766498e-05, "loss": 1.0335, "step": 330 }, { "epoch": 0.01700701856811057, "grad_norm": 0.08983329667640572, "learning_rate": 8.50253807106599e-05, "loss": 1.0185, "step": 335 }, { "epoch": 0.01726085466614207, "grad_norm": 0.09217787868906796, "learning_rate": 8.629441624365482e-05, "loss": 1.0486, "step": 340 }, { "epoch": 0.017514690764173575, "grad_norm": 0.08363234609584456, "learning_rate": 8.756345177664974e-05, "loss": 1.056, "step": 345 }, { "epoch": 0.017768526862205074, "grad_norm": 0.11665488561146797, "learning_rate": 8.883248730964467e-05, "loss": 1.0563, "step": 350 }, { "epoch": 0.018022362960236574, "grad_norm": 0.08300608003336156, "learning_rate": 9.01015228426396e-05, "loss": 1.0402, "step": 355 }, { "epoch": 0.018276199058268078, "grad_norm": 0.08107046751772096, "learning_rate": 9.137055837563452e-05, "loss": 1.028, "step": 360 }, { "epoch": 0.018530035156299578, "grad_norm": 0.08935543618882195, "learning_rate": 9.263959390862945e-05, "loss": 1.0241, "step": 365 }, { "epoch": 0.018783871254331078, "grad_norm": 0.08744583159544143, "learning_rate": 9.390862944162437e-05, "loss": 1.0177, "step": 370 }, { "epoch": 0.01903770735236258, "grad_norm": 0.09523708384781124, "learning_rate": 9.517766497461929e-05, "loss": 1.0229, "step": 375 }, { "epoch": 0.01929154345039408, "grad_norm": 0.10030797789210355, "learning_rate": 9.644670050761421e-05, "loss": 1.025, "step": 380 }, { "epoch": 0.01954537954842558, "grad_norm": 0.0818398575898204, "learning_rate": 9.771573604060914e-05, "loss": 1.0136, "step": 385 }, { "epoch": 0.019799215646457084, "grad_norm": 0.08539798405352364, "learning_rate": 9.898477157360407e-05, "loss": 1.0241, "step": 390 }, { "epoch": 0.020053051744488584, "grad_norm": 0.08012374067519754, "learning_rate": 0.00010025380710659898, "loss": 1.047, "step": 395 }, { "epoch": 0.020306887842520084, "grad_norm": 0.08459683804384517, "learning_rate": 0.0001015228426395939, "loss": 1.0318, "step": 400 }, { "epoch": 0.020560723940551588, "grad_norm": 0.0973916631908539, "learning_rate": 0.00010279187817258883, "loss": 1.0208, "step": 405 }, { "epoch": 0.020814560038583087, "grad_norm": 0.0851948696729007, "learning_rate": 0.00010406091370558377, "loss": 1.0182, "step": 410 }, { "epoch": 0.021068396136614587, "grad_norm": 0.08207878483686948, "learning_rate": 0.00010532994923857868, "loss": 1.0691, "step": 415 }, { "epoch": 0.02132223223464609, "grad_norm": 0.07522771709986531, "learning_rate": 0.00010659898477157361, "loss": 1.0569, "step": 420 }, { "epoch": 0.02157606833267759, "grad_norm": 0.08659226604008653, "learning_rate": 0.00010786802030456854, "loss": 1.0239, "step": 425 }, { "epoch": 0.02182990443070909, "grad_norm": 0.09747634466651632, "learning_rate": 0.00010913705583756345, "loss": 1.0014, "step": 430 }, { "epoch": 0.02208374052874059, "grad_norm": 0.08613760359380251, "learning_rate": 0.00011040609137055838, "loss": 1.0157, "step": 435 }, { "epoch": 0.022337576626772094, "grad_norm": 0.09896120182756117, "learning_rate": 0.0001116751269035533, "loss": 1.0245, "step": 440 }, { "epoch": 0.022591412724803594, "grad_norm": 0.08446665451805722, "learning_rate": 0.00011294416243654823, "loss": 1.0125, "step": 445 }, { "epoch": 0.022845248822835094, "grad_norm": 0.13441195448149143, "learning_rate": 0.00011421319796954314, "loss": 1.0254, "step": 450 }, { "epoch": 0.023099084920866597, "grad_norm": 0.08771644201268806, "learning_rate": 0.00011548223350253807, "loss": 1.0063, "step": 455 }, { "epoch": 0.023352921018898097, "grad_norm": 0.09171173551952413, "learning_rate": 0.000116751269035533, "loss": 0.9906, "step": 460 }, { "epoch": 0.023606757116929597, "grad_norm": 0.13100999811186806, "learning_rate": 0.00011802030456852793, "loss": 0.9969, "step": 465 }, { "epoch": 0.0238605932149611, "grad_norm": 0.1147423089113576, "learning_rate": 0.00011928934010152285, "loss": 1.0127, "step": 470 }, { "epoch": 0.0241144293129926, "grad_norm": 0.11201089262629248, "learning_rate": 0.00012055837563451777, "loss": 0.9893, "step": 475 }, { "epoch": 0.0243682654110241, "grad_norm": 0.09622623862828067, "learning_rate": 0.0001218274111675127, "loss": 1.0286, "step": 480 }, { "epoch": 0.024622101509055604, "grad_norm": 0.08973629865886673, "learning_rate": 0.0001230964467005076, "loss": 1.0275, "step": 485 }, { "epoch": 0.024875937607087104, "grad_norm": 0.08668312593258047, "learning_rate": 0.00012436548223350254, "loss": 0.9835, "step": 490 }, { "epoch": 0.025129773705118603, "grad_norm": 0.10053938502442454, "learning_rate": 0.00012563451776649747, "loss": 1.0258, "step": 495 }, { "epoch": 0.025383609803150107, "grad_norm": 0.08396579155862274, "learning_rate": 0.0001269035532994924, "loss": 0.9858, "step": 500 }, { "epoch": 0.025637445901181607, "grad_norm": 0.10095225214307435, "learning_rate": 0.00012817258883248732, "loss": 1.0041, "step": 505 }, { "epoch": 0.025891281999213107, "grad_norm": 0.087012374837508, "learning_rate": 0.00012944162436548224, "loss": 0.983, "step": 510 }, { "epoch": 0.02614511809724461, "grad_norm": 0.08239464061368434, "learning_rate": 0.00013071065989847717, "loss": 1.0389, "step": 515 }, { "epoch": 0.02639895419527611, "grad_norm": 0.08920388024916706, "learning_rate": 0.0001319796954314721, "loss": 1.0074, "step": 520 }, { "epoch": 0.02665279029330761, "grad_norm": 0.1050216166751352, "learning_rate": 0.00013324873096446702, "loss": 1.0169, "step": 525 }, { "epoch": 0.026906626391339113, "grad_norm": 0.10270804073717273, "learning_rate": 0.00013451776649746192, "loss": 0.978, "step": 530 }, { "epoch": 0.027160462489370613, "grad_norm": 0.08639584568220142, "learning_rate": 0.00013578680203045685, "loss": 0.9985, "step": 535 }, { "epoch": 0.027414298587402113, "grad_norm": 0.08818798798492308, "learning_rate": 0.00013705583756345178, "loss": 0.9873, "step": 540 }, { "epoch": 0.027668134685433617, "grad_norm": 0.07921915519577051, "learning_rate": 0.0001383248730964467, "loss": 0.9838, "step": 545 }, { "epoch": 0.027921970783465117, "grad_norm": 0.08350487415814681, "learning_rate": 0.00013959390862944163, "loss": 0.996, "step": 550 }, { "epoch": 0.028175806881496616, "grad_norm": 0.10514776396511805, "learning_rate": 0.00014086294416243656, "loss": 0.9979, "step": 555 }, { "epoch": 0.02842964297952812, "grad_norm": 0.0975588156259503, "learning_rate": 0.00014213197969543148, "loss": 0.9708, "step": 560 }, { "epoch": 0.02868347907755962, "grad_norm": 0.0758629798387893, "learning_rate": 0.0001434010152284264, "loss": 0.9998, "step": 565 }, { "epoch": 0.02893731517559112, "grad_norm": 0.08341704765235529, "learning_rate": 0.0001446700507614213, "loss": 1.0117, "step": 570 }, { "epoch": 0.029191151273622623, "grad_norm": 0.08008927009486444, "learning_rate": 0.00014593908629441623, "loss": 0.9659, "step": 575 }, { "epoch": 0.029444987371654123, "grad_norm": 0.07446903090423089, "learning_rate": 0.00014720812182741116, "loss": 0.9771, "step": 580 }, { "epoch": 0.029698823469685623, "grad_norm": 0.07852372661727479, "learning_rate": 0.00014847715736040609, "loss": 0.961, "step": 585 }, { "epoch": 0.029952659567717126, "grad_norm": 0.0752111089570152, "learning_rate": 0.000149746192893401, "loss": 1.006, "step": 590 }, { "epoch": 0.030206495665748626, "grad_norm": 0.09068549576897399, "learning_rate": 0.00015101522842639594, "loss": 0.9759, "step": 595 }, { "epoch": 0.030460331763780126, "grad_norm": 0.09379398950939372, "learning_rate": 0.00015228426395939087, "loss": 1.0112, "step": 600 }, { "epoch": 0.03071416786181163, "grad_norm": 0.08701652114792356, "learning_rate": 0.0001535532994923858, "loss": 0.9693, "step": 605 }, { "epoch": 0.03096800395984313, "grad_norm": 0.10496362715714985, "learning_rate": 0.00015482233502538072, "loss": 0.955, "step": 610 }, { "epoch": 0.03122184005787463, "grad_norm": 0.09586275988802406, "learning_rate": 0.00015609137055837564, "loss": 0.9865, "step": 615 }, { "epoch": 0.03147567615590613, "grad_norm": 0.08696332142624916, "learning_rate": 0.00015736040609137057, "loss": 1.0057, "step": 620 }, { "epoch": 0.03172951225393763, "grad_norm": 0.09714601129807855, "learning_rate": 0.0001586294416243655, "loss": 0.9752, "step": 625 }, { "epoch": 0.031983348351969136, "grad_norm": 0.08500781093004785, "learning_rate": 0.00015989847715736042, "loss": 0.9656, "step": 630 }, { "epoch": 0.032237184450000636, "grad_norm": 0.10825366408314628, "learning_rate": 0.00016116751269035535, "loss": 0.9865, "step": 635 }, { "epoch": 0.032491020548032136, "grad_norm": 0.08900977370261648, "learning_rate": 0.00016243654822335025, "loss": 0.9635, "step": 640 }, { "epoch": 0.032744856646063636, "grad_norm": 0.0913416633493855, "learning_rate": 0.00016370558375634518, "loss": 0.9993, "step": 645 }, { "epoch": 0.032998692744095136, "grad_norm": 0.12119539558765004, "learning_rate": 0.0001649746192893401, "loss": 0.97, "step": 650 }, { "epoch": 0.033252528842126636, "grad_norm": 0.09430292477143645, "learning_rate": 0.00016624365482233503, "loss": 0.9737, "step": 655 }, { "epoch": 0.03350636494015814, "grad_norm": 0.11323094974971261, "learning_rate": 0.00016751269035532995, "loss": 0.9648, "step": 660 }, { "epoch": 0.03376020103818964, "grad_norm": 0.08056183023222455, "learning_rate": 0.00016878172588832488, "loss": 0.9635, "step": 665 }, { "epoch": 0.03401403713622114, "grad_norm": 0.08436698763451582, "learning_rate": 0.0001700507614213198, "loss": 0.9571, "step": 670 }, { "epoch": 0.03426787323425264, "grad_norm": 0.07687657151367193, "learning_rate": 0.0001713197969543147, "loss": 0.9192, "step": 675 }, { "epoch": 0.03452170933228414, "grad_norm": 0.07820013670299855, "learning_rate": 0.00017258883248730963, "loss": 0.958, "step": 680 }, { "epoch": 0.03477554543031564, "grad_norm": 0.10189399061770561, "learning_rate": 0.00017385786802030456, "loss": 0.9858, "step": 685 }, { "epoch": 0.03502938152834715, "grad_norm": 0.08328021474448963, "learning_rate": 0.00017512690355329949, "loss": 0.9404, "step": 690 }, { "epoch": 0.03528321762637865, "grad_norm": 0.07900613329182285, "learning_rate": 0.0001763959390862944, "loss": 0.9628, "step": 695 }, { "epoch": 0.03553705372441015, "grad_norm": 0.08316459785486312, "learning_rate": 0.00017766497461928934, "loss": 0.9613, "step": 700 }, { "epoch": 0.03579088982244165, "grad_norm": 0.08847648576021831, "learning_rate": 0.00017893401015228426, "loss": 0.9662, "step": 705 }, { "epoch": 0.03604472592047315, "grad_norm": 0.07758022392258437, "learning_rate": 0.0001802030456852792, "loss": 0.9166, "step": 710 }, { "epoch": 0.03629856201850465, "grad_norm": 0.12168835922961967, "learning_rate": 0.00018147208121827412, "loss": 0.9819, "step": 715 }, { "epoch": 0.036552398116536156, "grad_norm": 0.07773538987020438, "learning_rate": 0.00018274111675126904, "loss": 0.975, "step": 720 }, { "epoch": 0.036806234214567655, "grad_norm": 0.08485925528102167, "learning_rate": 0.00018401015228426397, "loss": 0.9623, "step": 725 }, { "epoch": 0.037060070312599155, "grad_norm": 0.09604382230628937, "learning_rate": 0.0001852791878172589, "loss": 0.9313, "step": 730 }, { "epoch": 0.037313906410630655, "grad_norm": 0.08951699416916307, "learning_rate": 0.00018654822335025382, "loss": 0.9364, "step": 735 }, { "epoch": 0.037567742508662155, "grad_norm": 0.08625284511648179, "learning_rate": 0.00018781725888324875, "loss": 0.959, "step": 740 }, { "epoch": 0.037821578606693655, "grad_norm": 0.09373307459150941, "learning_rate": 0.00018908629441624368, "loss": 0.9975, "step": 745 }, { "epoch": 0.03807541470472516, "grad_norm": 0.0990087997778577, "learning_rate": 0.00019035532994923857, "loss": 0.9222, "step": 750 }, { "epoch": 0.03832925080275666, "grad_norm": 0.09971443126144404, "learning_rate": 0.0001916243654822335, "loss": 0.9358, "step": 755 }, { "epoch": 0.03858308690078816, "grad_norm": 0.0898092943621418, "learning_rate": 0.00019289340101522843, "loss": 0.9667, "step": 760 }, { "epoch": 0.03883692299881966, "grad_norm": 0.083488814344157, "learning_rate": 0.00019416243654822335, "loss": 0.9818, "step": 765 }, { "epoch": 0.03909075909685116, "grad_norm": 0.08175959408602282, "learning_rate": 0.00019543147208121828, "loss": 0.9801, "step": 770 }, { "epoch": 0.03934459519488266, "grad_norm": 0.08853579804824435, "learning_rate": 0.0001967005076142132, "loss": 0.9481, "step": 775 }, { "epoch": 0.03959843129291417, "grad_norm": 0.09238490602473444, "learning_rate": 0.00019796954314720813, "loss": 0.9643, "step": 780 }, { "epoch": 0.03985226739094567, "grad_norm": 0.08970333657331545, "learning_rate": 0.00019923857868020303, "loss": 0.9572, "step": 785 }, { "epoch": 0.04010610348897717, "grad_norm": 0.1360057711160532, "learning_rate": 0.00020050761421319796, "loss": 0.8638, "step": 790 }, { "epoch": 0.04035993958700867, "grad_norm": 0.09964225259221349, "learning_rate": 0.00020177664974619288, "loss": 0.9408, "step": 795 }, { "epoch": 0.04061377568504017, "grad_norm": 0.10801368907357911, "learning_rate": 0.0002030456852791878, "loss": 0.9597, "step": 800 }, { "epoch": 0.04086761178307167, "grad_norm": 0.0953487385545848, "learning_rate": 0.00020431472081218274, "loss": 0.9333, "step": 805 }, { "epoch": 0.041121447881103175, "grad_norm": 0.09410620158866818, "learning_rate": 0.00020558375634517766, "loss": 0.9222, "step": 810 }, { "epoch": 0.041375283979134675, "grad_norm": 0.08051949744621571, "learning_rate": 0.0002068527918781726, "loss": 0.9376, "step": 815 }, { "epoch": 0.041629120077166175, "grad_norm": 0.07938855349388858, "learning_rate": 0.00020812182741116754, "loss": 0.9635, "step": 820 }, { "epoch": 0.041882956175197675, "grad_norm": 0.08376591824164321, "learning_rate": 0.00020939086294416244, "loss": 0.9408, "step": 825 }, { "epoch": 0.042136792273229175, "grad_norm": 0.08297219747430006, "learning_rate": 0.00021065989847715737, "loss": 0.923, "step": 830 }, { "epoch": 0.042390628371260675, "grad_norm": 0.08597633868735409, "learning_rate": 0.0002119289340101523, "loss": 0.9231, "step": 835 }, { "epoch": 0.04264446446929218, "grad_norm": 0.09452687222618845, "learning_rate": 0.00021319796954314722, "loss": 0.9596, "step": 840 }, { "epoch": 0.04289830056732368, "grad_norm": 0.08675084777558249, "learning_rate": 0.00021446700507614215, "loss": 0.941, "step": 845 }, { "epoch": 0.04315213666535518, "grad_norm": 0.07502595056970239, "learning_rate": 0.00021573604060913707, "loss": 0.9338, "step": 850 }, { "epoch": 0.04340597276338668, "grad_norm": 0.0792022473467377, "learning_rate": 0.000217005076142132, "loss": 0.9318, "step": 855 }, { "epoch": 0.04365980886141818, "grad_norm": 0.08238097472689676, "learning_rate": 0.0002182741116751269, "loss": 0.9628, "step": 860 }, { "epoch": 0.04391364495944968, "grad_norm": 0.07527517352766562, "learning_rate": 0.00021954314720812183, "loss": 0.9597, "step": 865 }, { "epoch": 0.04416748105748118, "grad_norm": 0.08949096082306084, "learning_rate": 0.00022081218274111675, "loss": 0.9109, "step": 870 }, { "epoch": 0.04442131715551269, "grad_norm": 0.08293554991957394, "learning_rate": 0.00022208121827411168, "loss": 0.9384, "step": 875 }, { "epoch": 0.04467515325354419, "grad_norm": 0.08262362006065073, "learning_rate": 0.0002233502538071066, "loss": 0.9568, "step": 880 }, { "epoch": 0.04492898935157569, "grad_norm": 0.08997872213327857, "learning_rate": 0.00022461928934010153, "loss": 0.9374, "step": 885 }, { "epoch": 0.04518282544960719, "grad_norm": 0.08852892957382927, "learning_rate": 0.00022588832487309646, "loss": 0.9174, "step": 890 }, { "epoch": 0.04543666154763869, "grad_norm": 0.09818785284686211, "learning_rate": 0.00022715736040609136, "loss": 0.9112, "step": 895 }, { "epoch": 0.04569049764567019, "grad_norm": 0.08355131879764591, "learning_rate": 0.00022842639593908628, "loss": 0.9554, "step": 900 }, { "epoch": 0.045944333743701694, "grad_norm": 0.07554262750779281, "learning_rate": 0.0002296954314720812, "loss": 0.9739, "step": 905 }, { "epoch": 0.046198169841733194, "grad_norm": 0.0716432379781988, "learning_rate": 0.00023096446700507614, "loss": 0.9328, "step": 910 }, { "epoch": 0.046452005939764694, "grad_norm": 0.07364052998091251, "learning_rate": 0.00023223350253807106, "loss": 0.9073, "step": 915 }, { "epoch": 0.046705842037796194, "grad_norm": 0.08540630983432985, "learning_rate": 0.000233502538071066, "loss": 0.9453, "step": 920 }, { "epoch": 0.046959678135827694, "grad_norm": 0.10141352757768125, "learning_rate": 0.00023477157360406092, "loss": 0.9059, "step": 925 }, { "epoch": 0.047213514233859194, "grad_norm": 0.09183323394694985, "learning_rate": 0.00023604060913705587, "loss": 0.9516, "step": 930 }, { "epoch": 0.0474673503318907, "grad_norm": 0.0897158429457693, "learning_rate": 0.00023730964467005077, "loss": 0.9317, "step": 935 }, { "epoch": 0.0477211864299222, "grad_norm": 0.09512447275335688, "learning_rate": 0.0002385786802030457, "loss": 0.9422, "step": 940 }, { "epoch": 0.0479750225279537, "grad_norm": 0.07582590263359679, "learning_rate": 0.00023984771573604062, "loss": 0.8977, "step": 945 }, { "epoch": 0.0482288586259852, "grad_norm": 0.08221361898015273, "learning_rate": 0.00024111675126903555, "loss": 0.9175, "step": 950 }, { "epoch": 0.0484826947240167, "grad_norm": 0.10261789936243684, "learning_rate": 0.00024238578680203047, "loss": 0.9691, "step": 955 }, { "epoch": 0.0487365308220482, "grad_norm": 0.09292518872378591, "learning_rate": 0.0002436548223350254, "loss": 0.9269, "step": 960 }, { "epoch": 0.04899036692007971, "grad_norm": 0.07393217596037531, "learning_rate": 0.0002449238578680203, "loss": 0.8924, "step": 965 }, { "epoch": 0.04924420301811121, "grad_norm": 0.08830580695862104, "learning_rate": 0.0002461928934010152, "loss": 0.9336, "step": 970 }, { "epoch": 0.04949803911614271, "grad_norm": 0.09913522836238056, "learning_rate": 0.00024746192893401015, "loss": 0.9203, "step": 975 }, { "epoch": 0.04975187521417421, "grad_norm": 0.10652710557885169, "learning_rate": 0.0002487309644670051, "loss": 0.9181, "step": 980 }, { "epoch": 0.05000571131220571, "grad_norm": 0.08621943376512357, "learning_rate": 0.00025, "loss": 0.9063, "step": 985 }, { "epoch": 0.05025954741023721, "grad_norm": 0.08123411480977653, "learning_rate": 0.00025126903553299493, "loss": 0.9029, "step": 990 }, { "epoch": 0.050513383508268714, "grad_norm": 0.0859668573243755, "learning_rate": 0.00025253807106598986, "loss": 0.9018, "step": 995 }, { "epoch": 0.050767219606300214, "grad_norm": 0.08284496549085277, "learning_rate": 0.0002538071065989848, "loss": 0.9202, "step": 1000 }, { "epoch": 0.051021055704331714, "grad_norm": 0.08397116291066005, "learning_rate": 0.0002550761421319797, "loss": 0.9259, "step": 1005 }, { "epoch": 0.051274891802363214, "grad_norm": 0.08225567785943506, "learning_rate": 0.00025634517766497464, "loss": 0.9405, "step": 1010 }, { "epoch": 0.051528727900394714, "grad_norm": 0.08654721992572187, "learning_rate": 0.00025761421319796956, "loss": 0.8572, "step": 1015 }, { "epoch": 0.05178256399842621, "grad_norm": 0.09134774355075698, "learning_rate": 0.0002588832487309645, "loss": 0.9011, "step": 1020 }, { "epoch": 0.05203640009645772, "grad_norm": 0.09311045894028665, "learning_rate": 0.00026015228426395936, "loss": 0.8877, "step": 1025 }, { "epoch": 0.05229023619448922, "grad_norm": 0.0791699147862921, "learning_rate": 0.00026142131979695434, "loss": 0.8855, "step": 1030 }, { "epoch": 0.05254407229252072, "grad_norm": 0.08897723913709184, "learning_rate": 0.0002626903553299492, "loss": 0.9072, "step": 1035 }, { "epoch": 0.05279790839055222, "grad_norm": 0.08166632749325431, "learning_rate": 0.0002639593908629442, "loss": 0.8773, "step": 1040 }, { "epoch": 0.05305174448858372, "grad_norm": 0.07144422983936562, "learning_rate": 0.00026522842639593907, "loss": 0.9069, "step": 1045 }, { "epoch": 0.05330558058661522, "grad_norm": 0.07874387233896914, "learning_rate": 0.00026649746192893405, "loss": 0.8843, "step": 1050 }, { "epoch": 0.05355941668464673, "grad_norm": 0.06988973128828328, "learning_rate": 0.0002677664974619289, "loss": 0.925, "step": 1055 }, { "epoch": 0.05381325278267823, "grad_norm": 0.07970819951929803, "learning_rate": 0.00026903553299492385, "loss": 0.9352, "step": 1060 }, { "epoch": 0.05406708888070973, "grad_norm": 0.08397127631961518, "learning_rate": 0.00027030456852791877, "loss": 0.9153, "step": 1065 }, { "epoch": 0.05432092497874123, "grad_norm": 0.08436884698380205, "learning_rate": 0.0002715736040609137, "loss": 0.9363, "step": 1070 }, { "epoch": 0.054574761076772726, "grad_norm": 0.08214340823603501, "learning_rate": 0.0002728426395939086, "loss": 0.9147, "step": 1075 }, { "epoch": 0.054828597174804226, "grad_norm": 0.1011005398112684, "learning_rate": 0.00027411167512690355, "loss": 0.9131, "step": 1080 }, { "epoch": 0.05508243327283573, "grad_norm": 0.0984022492010476, "learning_rate": 0.0002753807106598985, "loss": 0.8987, "step": 1085 }, { "epoch": 0.05533626937086723, "grad_norm": 0.08731055858134601, "learning_rate": 0.0002766497461928934, "loss": 0.9249, "step": 1090 }, { "epoch": 0.05559010546889873, "grad_norm": 0.07965003748835442, "learning_rate": 0.0002779187817258883, "loss": 0.9199, "step": 1095 }, { "epoch": 0.05584394156693023, "grad_norm": 0.08590235907041983, "learning_rate": 0.00027918781725888326, "loss": 0.8908, "step": 1100 }, { "epoch": 0.05609777766496173, "grad_norm": 0.08232225215515018, "learning_rate": 0.0002804568527918782, "loss": 0.9039, "step": 1105 }, { "epoch": 0.05635161376299323, "grad_norm": 0.08135125390443596, "learning_rate": 0.0002817258883248731, "loss": 0.8953, "step": 1110 }, { "epoch": 0.05660544986102474, "grad_norm": 0.08186786752489149, "learning_rate": 0.00028299492385786804, "loss": 0.9146, "step": 1115 }, { "epoch": 0.05685928595905624, "grad_norm": 0.09977499455748641, "learning_rate": 0.00028426395939086296, "loss": 0.9472, "step": 1120 }, { "epoch": 0.05711312205708774, "grad_norm": 0.0971155916054973, "learning_rate": 0.0002855329949238579, "loss": 0.9296, "step": 1125 }, { "epoch": 0.05736695815511924, "grad_norm": 0.0750409520031442, "learning_rate": 0.0002868020304568528, "loss": 0.8678, "step": 1130 }, { "epoch": 0.05762079425315074, "grad_norm": 0.0862624800506259, "learning_rate": 0.00028807106598984774, "loss": 0.8876, "step": 1135 }, { "epoch": 0.05787463035118224, "grad_norm": 0.09395502462504797, "learning_rate": 0.0002893401015228426, "loss": 0.9011, "step": 1140 }, { "epoch": 0.05812846644921374, "grad_norm": 0.09323642151630512, "learning_rate": 0.0002906091370558376, "loss": 0.925, "step": 1145 }, { "epoch": 0.058382302547245246, "grad_norm": 0.07861454462358265, "learning_rate": 0.00029187817258883247, "loss": 0.8639, "step": 1150 }, { "epoch": 0.058636138645276746, "grad_norm": 0.07754151445621327, "learning_rate": 0.00029314720812182745, "loss": 0.8696, "step": 1155 }, { "epoch": 0.058889974743308246, "grad_norm": 0.07967478526694753, "learning_rate": 0.0002944162436548223, "loss": 0.892, "step": 1160 }, { "epoch": 0.059143810841339746, "grad_norm": 0.09182746618299562, "learning_rate": 0.0002956852791878173, "loss": 0.8882, "step": 1165 }, { "epoch": 0.059397646939371246, "grad_norm": 0.09564269933761693, "learning_rate": 0.00029695431472081217, "loss": 0.8883, "step": 1170 }, { "epoch": 0.059651483037402746, "grad_norm": 0.07293175874301112, "learning_rate": 0.0002982233502538071, "loss": 0.9378, "step": 1175 }, { "epoch": 0.05990531913543425, "grad_norm": 0.12751451314577963, "learning_rate": 0.000299492385786802, "loss": 0.9097, "step": 1180 }, { "epoch": 0.06015915523346575, "grad_norm": 0.08192767845384191, "learning_rate": 0.00030076142131979695, "loss": 0.9011, "step": 1185 }, { "epoch": 0.06041299133149725, "grad_norm": 0.08127170621752784, "learning_rate": 0.0003020304568527919, "loss": 0.9027, "step": 1190 }, { "epoch": 0.06066682742952875, "grad_norm": 0.1049759620056876, "learning_rate": 0.0003032994923857868, "loss": 0.895, "step": 1195 }, { "epoch": 0.06092066352756025, "grad_norm": 0.0794315210872211, "learning_rate": 0.00030456852791878173, "loss": 0.9555, "step": 1200 }, { "epoch": 0.06117449962559175, "grad_norm": 0.09561787864528201, "learning_rate": 0.00030583756345177666, "loss": 0.8972, "step": 1205 }, { "epoch": 0.06142833572362326, "grad_norm": 0.07763522542374471, "learning_rate": 0.0003071065989847716, "loss": 0.902, "step": 1210 }, { "epoch": 0.06168217182165476, "grad_norm": 0.0791284247139649, "learning_rate": 0.0003083756345177665, "loss": 0.9079, "step": 1215 }, { "epoch": 0.06193600791968626, "grad_norm": 0.08000405280865566, "learning_rate": 0.00030964467005076144, "loss": 0.9329, "step": 1220 }, { "epoch": 0.06218984401771776, "grad_norm": 0.07698056421741807, "learning_rate": 0.00031091370558375636, "loss": 0.9125, "step": 1225 }, { "epoch": 0.06244368011574926, "grad_norm": 0.07995661200899572, "learning_rate": 0.0003121827411167513, "loss": 0.8968, "step": 1230 }, { "epoch": 0.06269751621378077, "grad_norm": 0.08504648228991155, "learning_rate": 0.0003134517766497462, "loss": 0.8493, "step": 1235 }, { "epoch": 0.06295135231181226, "grad_norm": 0.07971051634227837, "learning_rate": 0.00031472081218274114, "loss": 0.903, "step": 1240 }, { "epoch": 0.06320518840984377, "grad_norm": 0.0872225941591864, "learning_rate": 0.000315989847715736, "loss": 0.9074, "step": 1245 }, { "epoch": 0.06345902450787526, "grad_norm": 0.0804956240023229, "learning_rate": 0.000317258883248731, "loss": 0.8967, "step": 1250 }, { "epoch": 0.06371286060590677, "grad_norm": 0.07643308563742951, "learning_rate": 0.00031852791878172587, "loss": 0.8905, "step": 1255 }, { "epoch": 0.06396669670393827, "grad_norm": 0.08977888216089795, "learning_rate": 0.00031979695431472085, "loss": 0.8991, "step": 1260 }, { "epoch": 0.06422053280196977, "grad_norm": 0.10061768636286983, "learning_rate": 0.0003210659898477157, "loss": 0.9192, "step": 1265 }, { "epoch": 0.06447436890000127, "grad_norm": 0.09318233051281237, "learning_rate": 0.0003223350253807107, "loss": 0.876, "step": 1270 }, { "epoch": 0.06472820499803277, "grad_norm": 0.0753974802693296, "learning_rate": 0.00032360406091370557, "loss": 0.895, "step": 1275 }, { "epoch": 0.06498204109606427, "grad_norm": 0.08073064302884482, "learning_rate": 0.0003248730964467005, "loss": 0.9365, "step": 1280 }, { "epoch": 0.06523587719409578, "grad_norm": 0.08782053635245218, "learning_rate": 0.0003261421319796954, "loss": 0.8686, "step": 1285 }, { "epoch": 0.06548971329212727, "grad_norm": 0.07744311363269024, "learning_rate": 0.00032741116751269035, "loss": 0.8672, "step": 1290 }, { "epoch": 0.06574354939015878, "grad_norm": 0.07309093873704738, "learning_rate": 0.0003286802030456853, "loss": 0.913, "step": 1295 }, { "epoch": 0.06599738548819027, "grad_norm": 0.10526535461595737, "learning_rate": 0.0003299492385786802, "loss": 0.8782, "step": 1300 }, { "epoch": 0.06625122158622178, "grad_norm": 0.07449821225313276, "learning_rate": 0.00033121827411167513, "loss": 0.9103, "step": 1305 }, { "epoch": 0.06650505768425327, "grad_norm": 0.07981805229186914, "learning_rate": 0.00033248730964467006, "loss": 0.8943, "step": 1310 }, { "epoch": 0.06675889378228478, "grad_norm": 0.08817558960256314, "learning_rate": 0.00033375634517766493, "loss": 0.9194, "step": 1315 }, { "epoch": 0.06701272988031629, "grad_norm": 0.09134034586558505, "learning_rate": 0.0003350253807106599, "loss": 0.8917, "step": 1320 }, { "epoch": 0.06726656597834778, "grad_norm": 0.09205487137387972, "learning_rate": 0.00033629441624365484, "loss": 0.8672, "step": 1325 }, { "epoch": 0.06752040207637929, "grad_norm": 0.08126622254544977, "learning_rate": 0.00033756345177664976, "loss": 0.8914, "step": 1330 }, { "epoch": 0.06777423817441078, "grad_norm": 0.07635309140423502, "learning_rate": 0.0003388324873096447, "loss": 0.9015, "step": 1335 }, { "epoch": 0.06802807427244228, "grad_norm": 0.08570716145138096, "learning_rate": 0.0003401015228426396, "loss": 0.9098, "step": 1340 }, { "epoch": 0.06828191037047379, "grad_norm": 0.1255573143705362, "learning_rate": 0.00034137055837563454, "loss": 0.8782, "step": 1345 }, { "epoch": 0.06853574646850528, "grad_norm": 0.07488128144696982, "learning_rate": 0.0003426395939086294, "loss": 0.8461, "step": 1350 }, { "epoch": 0.06878958256653679, "grad_norm": 0.07305712056281854, "learning_rate": 0.0003439086294416244, "loss": 0.8825, "step": 1355 }, { "epoch": 0.06904341866456828, "grad_norm": 0.07951313608352217, "learning_rate": 0.00034517766497461927, "loss": 0.9055, "step": 1360 }, { "epoch": 0.06929725476259979, "grad_norm": 0.09817251752445418, "learning_rate": 0.00034644670050761425, "loss": 0.8683, "step": 1365 }, { "epoch": 0.06955109086063128, "grad_norm": 0.08015867832163759, "learning_rate": 0.0003477157360406091, "loss": 0.8663, "step": 1370 }, { "epoch": 0.06980492695866279, "grad_norm": 0.09776730575541814, "learning_rate": 0.0003489847715736041, "loss": 0.8888, "step": 1375 }, { "epoch": 0.0700587630566943, "grad_norm": 0.08497578062709465, "learning_rate": 0.00035025380710659897, "loss": 0.8791, "step": 1380 }, { "epoch": 0.07031259915472579, "grad_norm": 0.0800446372926376, "learning_rate": 0.00035152284263959395, "loss": 0.8467, "step": 1385 }, { "epoch": 0.0705664352527573, "grad_norm": 0.07645047344022432, "learning_rate": 0.0003527918781725888, "loss": 0.8851, "step": 1390 }, { "epoch": 0.07082027135078879, "grad_norm": 0.07862625884010477, "learning_rate": 0.00035406091370558375, "loss": 0.8584, "step": 1395 }, { "epoch": 0.0710741074488203, "grad_norm": 0.07768649863423993, "learning_rate": 0.0003553299492385787, "loss": 0.9078, "step": 1400 }, { "epoch": 0.0713279435468518, "grad_norm": 0.07380225356016663, "learning_rate": 0.0003565989847715736, "loss": 0.9115, "step": 1405 }, { "epoch": 0.0715817796448833, "grad_norm": 0.08239478810345703, "learning_rate": 0.00035786802030456853, "loss": 0.9242, "step": 1410 }, { "epoch": 0.0718356157429148, "grad_norm": 0.07477635143037639, "learning_rate": 0.00035913705583756346, "loss": 0.8656, "step": 1415 }, { "epoch": 0.0720894518409463, "grad_norm": 0.06964422395800907, "learning_rate": 0.0003604060913705584, "loss": 0.8938, "step": 1420 }, { "epoch": 0.0723432879389778, "grad_norm": 0.07825507198431318, "learning_rate": 0.0003616751269035533, "loss": 0.8905, "step": 1425 }, { "epoch": 0.0725971240370093, "grad_norm": 0.08226905192764356, "learning_rate": 0.00036294416243654823, "loss": 0.8707, "step": 1430 }, { "epoch": 0.0728509601350408, "grad_norm": 0.07760254788133139, "learning_rate": 0.00036421319796954316, "loss": 0.8841, "step": 1435 }, { "epoch": 0.07310479623307231, "grad_norm": 0.08379552310158517, "learning_rate": 0.0003654822335025381, "loss": 0.8805, "step": 1440 }, { "epoch": 0.0733586323311038, "grad_norm": 0.08308428941591223, "learning_rate": 0.000366751269035533, "loss": 0.8509, "step": 1445 }, { "epoch": 0.07361246842913531, "grad_norm": 0.0869493469981548, "learning_rate": 0.00036802030456852794, "loss": 0.8895, "step": 1450 }, { "epoch": 0.0738663045271668, "grad_norm": 0.08518362804761086, "learning_rate": 0.00036928934010152287, "loss": 0.8847, "step": 1455 }, { "epoch": 0.07412014062519831, "grad_norm": 0.07494755670173285, "learning_rate": 0.0003705583756345178, "loss": 0.8724, "step": 1460 }, { "epoch": 0.0743739767232298, "grad_norm": 0.0888364396099253, "learning_rate": 0.00037182741116751266, "loss": 0.8913, "step": 1465 }, { "epoch": 0.07462781282126131, "grad_norm": 0.0838590119788707, "learning_rate": 0.00037309644670050765, "loss": 0.8846, "step": 1470 }, { "epoch": 0.07488164891929282, "grad_norm": 0.09053276363075144, "learning_rate": 0.0003743654822335025, "loss": 0.9244, "step": 1475 }, { "epoch": 0.07513548501732431, "grad_norm": 0.3840256413171509, "learning_rate": 0.0003756345177664975, "loss": 0.9598, "step": 1480 }, { "epoch": 0.07538932111535582, "grad_norm": 0.09729150971666614, "learning_rate": 0.00037690355329949237, "loss": 0.9033, "step": 1485 }, { "epoch": 0.07564315721338731, "grad_norm": 0.09608610624829524, "learning_rate": 0.00037817258883248735, "loss": 0.9193, "step": 1490 }, { "epoch": 0.07589699331141882, "grad_norm": 0.08332636557378577, "learning_rate": 0.0003794416243654822, "loss": 0.908, "step": 1495 }, { "epoch": 0.07615082940945032, "grad_norm": 0.08301759763360658, "learning_rate": 0.00038071065989847715, "loss": 0.88, "step": 1500 }, { "epoch": 0.07640466550748182, "grad_norm": 0.08049522524582133, "learning_rate": 0.0003819796954314721, "loss": 0.8991, "step": 1505 }, { "epoch": 0.07665850160551332, "grad_norm": 0.08750965101066191, "learning_rate": 0.000383248730964467, "loss": 0.8907, "step": 1510 }, { "epoch": 0.07691233770354482, "grad_norm": 0.12806117563194677, "learning_rate": 0.00038451776649746193, "loss": 0.8771, "step": 1515 }, { "epoch": 0.07716617380157632, "grad_norm": 5.2373051670509065, "learning_rate": 0.00038578680203045685, "loss": 0.9285, "step": 1520 }, { "epoch": 0.07742000989960782, "grad_norm": 0.7145279614367116, "learning_rate": 0.0003870558375634518, "loss": 0.9521, "step": 1525 }, { "epoch": 0.07767384599763932, "grad_norm": 0.5704033034094532, "learning_rate": 0.0003883248730964467, "loss": 1.226, "step": 1530 }, { "epoch": 0.07792768209567083, "grad_norm": 0.3565155645196115, "learning_rate": 0.00038959390862944163, "loss": 1.0679, "step": 1535 }, { "epoch": 0.07818151819370232, "grad_norm": 0.11132601070292776, "learning_rate": 0.00039086294416243656, "loss": 0.9426, "step": 1540 }, { "epoch": 0.07843535429173383, "grad_norm": 0.13317095168062648, "learning_rate": 0.0003921319796954315, "loss": 0.9605, "step": 1545 }, { "epoch": 0.07868919038976532, "grad_norm": 0.11420919150760077, "learning_rate": 0.0003934010152284264, "loss": 0.9059, "step": 1550 }, { "epoch": 0.07894302648779683, "grad_norm": 0.09160524158556653, "learning_rate": 0.00039467005076142134, "loss": 0.8983, "step": 1555 }, { "epoch": 0.07919686258582834, "grad_norm": 0.09096086614955563, "learning_rate": 0.00039593908629441627, "loss": 0.9306, "step": 1560 }, { "epoch": 0.07945069868385983, "grad_norm": 0.11360234911811702, "learning_rate": 0.0003972081218274112, "loss": 0.9074, "step": 1565 }, { "epoch": 0.07970453478189134, "grad_norm": 0.10052699276891207, "learning_rate": 0.00039847715736040606, "loss": 0.9053, "step": 1570 }, { "epoch": 0.07995837087992283, "grad_norm": 0.0911530767106713, "learning_rate": 0.00039974619289340104, "loss": 0.8885, "step": 1575 }, { "epoch": 0.08021220697795434, "grad_norm": 0.09254495742885606, "learning_rate": 0.0004010152284263959, "loss": 0.915, "step": 1580 }, { "epoch": 0.08046604307598583, "grad_norm": 0.07477148260204751, "learning_rate": 0.0004022842639593909, "loss": 0.8857, "step": 1585 }, { "epoch": 0.08071987917401734, "grad_norm": 0.0950035481958698, "learning_rate": 0.00040355329949238577, "loss": 0.864, "step": 1590 }, { "epoch": 0.08097371527204884, "grad_norm": 0.08328153761752542, "learning_rate": 0.00040482233502538075, "loss": 0.8936, "step": 1595 }, { "epoch": 0.08122755137008034, "grad_norm": 0.07908633756440138, "learning_rate": 0.0004060913705583756, "loss": 0.8744, "step": 1600 }, { "epoch": 0.08148138746811184, "grad_norm": 0.08092538529333056, "learning_rate": 0.0004073604060913706, "loss": 0.893, "step": 1605 }, { "epoch": 0.08173522356614334, "grad_norm": 0.09271888321902413, "learning_rate": 0.0004086294416243655, "loss": 0.8907, "step": 1610 }, { "epoch": 0.08198905966417484, "grad_norm": 0.08557055493404682, "learning_rate": 0.0004098984771573604, "loss": 0.8959, "step": 1615 }, { "epoch": 0.08224289576220635, "grad_norm": 0.08651818502522003, "learning_rate": 0.00041116751269035533, "loss": 0.8619, "step": 1620 }, { "epoch": 0.08249673186023784, "grad_norm": 0.06661960649333494, "learning_rate": 0.00041243654822335025, "loss": 0.8623, "step": 1625 }, { "epoch": 0.08275056795826935, "grad_norm": 0.08246118778977325, "learning_rate": 0.0004137055837563452, "loss": 0.8979, "step": 1630 }, { "epoch": 0.08300440405630084, "grad_norm": 0.09409685127238218, "learning_rate": 0.0004149746192893401, "loss": 0.8903, "step": 1635 }, { "epoch": 0.08325824015433235, "grad_norm": 0.09065697306134007, "learning_rate": 0.0004162436548223351, "loss": 0.8844, "step": 1640 }, { "epoch": 0.08351207625236384, "grad_norm": 0.07851282286862668, "learning_rate": 0.00041751269035532996, "loss": 0.8819, "step": 1645 }, { "epoch": 0.08376591235039535, "grad_norm": 0.07084130877186355, "learning_rate": 0.0004187817258883249, "loss": 0.889, "step": 1650 }, { "epoch": 0.08401974844842686, "grad_norm": 0.07980664772738791, "learning_rate": 0.0004200507614213198, "loss": 0.8796, "step": 1655 }, { "epoch": 0.08427358454645835, "grad_norm": 0.0957173919613896, "learning_rate": 0.00042131979695431474, "loss": 0.8869, "step": 1660 }, { "epoch": 0.08452742064448986, "grad_norm": 0.24928025644224458, "learning_rate": 0.00042258883248730967, "loss": 0.8981, "step": 1665 }, { "epoch": 0.08478125674252135, "grad_norm": 0.12803314215597378, "learning_rate": 0.0004238578680203046, "loss": 0.8855, "step": 1670 }, { "epoch": 0.08503509284055286, "grad_norm": 0.08208033062580664, "learning_rate": 0.0004251269035532995, "loss": 0.8718, "step": 1675 }, { "epoch": 0.08528892893858436, "grad_norm": 0.0714981325673799, "learning_rate": 0.00042639593908629444, "loss": 0.9105, "step": 1680 }, { "epoch": 0.08554276503661586, "grad_norm": 0.1009598899720904, "learning_rate": 0.0004276649746192893, "loss": 0.8591, "step": 1685 }, { "epoch": 0.08579660113464736, "grad_norm": 0.089261292568091, "learning_rate": 0.0004289340101522843, "loss": 0.8865, "step": 1690 }, { "epoch": 0.08605043723267886, "grad_norm": 0.07028195296266151, "learning_rate": 0.00043020304568527917, "loss": 0.8721, "step": 1695 }, { "epoch": 0.08630427333071036, "grad_norm": 0.08325124400214844, "learning_rate": 0.00043147208121827415, "loss": 0.8931, "step": 1700 }, { "epoch": 0.08655810942874186, "grad_norm": 0.08227688282274721, "learning_rate": 0.000432741116751269, "loss": 0.8839, "step": 1705 }, { "epoch": 0.08681194552677336, "grad_norm": 0.09319642992092989, "learning_rate": 0.000434010152284264, "loss": 0.9486, "step": 1710 }, { "epoch": 0.08706578162480487, "grad_norm": 0.0761786499465775, "learning_rate": 0.0004352791878172589, "loss": 0.8744, "step": 1715 }, { "epoch": 0.08731961772283636, "grad_norm": 0.08349104483728616, "learning_rate": 0.0004365482233502538, "loss": 0.8702, "step": 1720 }, { "epoch": 0.08757345382086787, "grad_norm": 0.07006610397784793, "learning_rate": 0.00043781725888324873, "loss": 0.8792, "step": 1725 }, { "epoch": 0.08782728991889936, "grad_norm": 0.1219325504819667, "learning_rate": 0.00043908629441624365, "loss": 0.9107, "step": 1730 }, { "epoch": 0.08808112601693087, "grad_norm": 0.1217012456401295, "learning_rate": 0.0004403553299492386, "loss": 0.9004, "step": 1735 }, { "epoch": 0.08833496211496236, "grad_norm": 0.09316001229946146, "learning_rate": 0.0004416243654822335, "loss": 0.8592, "step": 1740 }, { "epoch": 0.08858879821299387, "grad_norm": 0.07014738205136709, "learning_rate": 0.00044289340101522843, "loss": 0.8785, "step": 1745 }, { "epoch": 0.08884263431102538, "grad_norm": 0.08303795022935344, "learning_rate": 0.00044416243654822336, "loss": 0.8769, "step": 1750 }, { "epoch": 0.08909647040905687, "grad_norm": 0.07941100314968964, "learning_rate": 0.0004454314720812183, "loss": 0.9135, "step": 1755 }, { "epoch": 0.08935030650708838, "grad_norm": 0.08290745931745216, "learning_rate": 0.0004467005076142132, "loss": 0.8725, "step": 1760 }, { "epoch": 0.08960414260511987, "grad_norm": 0.07881492097354668, "learning_rate": 0.00044796954314720814, "loss": 0.8732, "step": 1765 }, { "epoch": 0.08985797870315138, "grad_norm": 0.0760937396155423, "learning_rate": 0.00044923857868020306, "loss": 0.8641, "step": 1770 }, { "epoch": 0.09011181480118288, "grad_norm": 0.2170207202848839, "learning_rate": 0.000450507614213198, "loss": 0.9363, "step": 1775 }, { "epoch": 0.09036565089921438, "grad_norm": 0.11521462882043704, "learning_rate": 0.0004517766497461929, "loss": 0.8784, "step": 1780 }, { "epoch": 0.09061948699724588, "grad_norm": 0.09191443608562169, "learning_rate": 0.00045304568527918784, "loss": 0.9357, "step": 1785 }, { "epoch": 0.09087332309527738, "grad_norm": 0.6013193343866744, "learning_rate": 0.0004543147208121827, "loss": 0.9007, "step": 1790 }, { "epoch": 0.09112715919330888, "grad_norm": 0.10519905268837258, "learning_rate": 0.0004555837563451777, "loss": 0.8948, "step": 1795 }, { "epoch": 0.09138099529134038, "grad_norm": 0.07274487345190098, "learning_rate": 0.00045685279187817257, "loss": 0.9114, "step": 1800 }, { "epoch": 0.09163483138937188, "grad_norm": 0.07452371110153488, "learning_rate": 0.00045812182741116755, "loss": 0.8652, "step": 1805 }, { "epoch": 0.09188866748740339, "grad_norm": 0.06698807063354198, "learning_rate": 0.0004593908629441624, "loss": 0.8473, "step": 1810 }, { "epoch": 0.09214250358543488, "grad_norm": 0.0671491353342025, "learning_rate": 0.0004606598984771574, "loss": 0.8537, "step": 1815 }, { "epoch": 0.09239633968346639, "grad_norm": 0.08561681280468643, "learning_rate": 0.0004619289340101523, "loss": 0.8881, "step": 1820 }, { "epoch": 0.09265017578149788, "grad_norm": 0.06475421415148738, "learning_rate": 0.0004631979695431472, "loss": 0.863, "step": 1825 }, { "epoch": 0.09290401187952939, "grad_norm": 0.08192189581126416, "learning_rate": 0.0004644670050761421, "loss": 0.9188, "step": 1830 }, { "epoch": 0.0931578479775609, "grad_norm": 0.08079147200722692, "learning_rate": 0.00046573604060913705, "loss": 0.8528, "step": 1835 }, { "epoch": 0.09341168407559239, "grad_norm": 0.06439714169637319, "learning_rate": 0.000467005076142132, "loss": 0.8474, "step": 1840 }, { "epoch": 0.0936655201736239, "grad_norm": 0.07081309209791804, "learning_rate": 0.0004682741116751269, "loss": 0.8307, "step": 1845 }, { "epoch": 0.09391935627165539, "grad_norm": 0.07762307055677733, "learning_rate": 0.00046954314720812183, "loss": 0.8815, "step": 1850 }, { "epoch": 0.0941731923696869, "grad_norm": 0.08917995998584996, "learning_rate": 0.00047081218274111676, "loss": 0.8704, "step": 1855 }, { "epoch": 0.09442702846771839, "grad_norm": 0.07830266156397833, "learning_rate": 0.00047208121827411174, "loss": 0.8852, "step": 1860 }, { "epoch": 0.0946808645657499, "grad_norm": 0.07346058309555209, "learning_rate": 0.0004733502538071066, "loss": 0.8825, "step": 1865 }, { "epoch": 0.0949347006637814, "grad_norm": 0.09518194903720803, "learning_rate": 0.00047461928934010154, "loss": 0.8467, "step": 1870 }, { "epoch": 0.0951885367618129, "grad_norm": 0.06324516247668761, "learning_rate": 0.00047588832487309646, "loss": 0.8671, "step": 1875 }, { "epoch": 0.0954423728598444, "grad_norm": 0.08075000661667853, "learning_rate": 0.0004771573604060914, "loss": 0.8684, "step": 1880 }, { "epoch": 0.0956962089578759, "grad_norm": 0.09041872058875536, "learning_rate": 0.0004784263959390863, "loss": 0.8573, "step": 1885 }, { "epoch": 0.0959500450559074, "grad_norm": 0.06463632312898733, "learning_rate": 0.00047969543147208124, "loss": 0.8729, "step": 1890 }, { "epoch": 0.09620388115393891, "grad_norm": 0.07178745247904356, "learning_rate": 0.00048096446700507617, "loss": 0.8682, "step": 1895 }, { "epoch": 0.0964577172519704, "grad_norm": 0.07059652629460725, "learning_rate": 0.0004822335025380711, "loss": 0.8664, "step": 1900 }, { "epoch": 0.09671155335000191, "grad_norm": 0.06812152124794425, "learning_rate": 0.00048350253807106597, "loss": 0.8711, "step": 1905 }, { "epoch": 0.0969653894480334, "grad_norm": 0.06771093248328444, "learning_rate": 0.00048477157360406095, "loss": 0.8773, "step": 1910 }, { "epoch": 0.09721922554606491, "grad_norm": 0.06712279171541065, "learning_rate": 0.0004860406091370558, "loss": 0.8846, "step": 1915 }, { "epoch": 0.0974730616440964, "grad_norm": 0.07281797317774959, "learning_rate": 0.0004873096446700508, "loss": 0.8951, "step": 1920 }, { "epoch": 0.09772689774212791, "grad_norm": 0.07163311430441872, "learning_rate": 0.0004885786802030457, "loss": 0.8433, "step": 1925 }, { "epoch": 0.09798073384015941, "grad_norm": 0.23679641902268464, "learning_rate": 0.0004898477157360406, "loss": 0.8893, "step": 1930 }, { "epoch": 0.09823456993819091, "grad_norm": 0.06815937350191797, "learning_rate": 0.0004911167512690356, "loss": 0.8591, "step": 1935 }, { "epoch": 0.09848840603622241, "grad_norm": 0.07707444439181808, "learning_rate": 0.0004923857868020305, "loss": 0.866, "step": 1940 }, { "epoch": 0.09874224213425391, "grad_norm": 0.1325832425947022, "learning_rate": 0.0004936548223350254, "loss": 0.8484, "step": 1945 }, { "epoch": 0.09899607823228541, "grad_norm": 0.07427392824757344, "learning_rate": 0.0004949238578680203, "loss": 0.8701, "step": 1950 }, { "epoch": 0.09924991433031692, "grad_norm": 0.06577858823584966, "learning_rate": 0.0004961928934010153, "loss": 0.8558, "step": 1955 }, { "epoch": 0.09950375042834841, "grad_norm": 0.06653585315042584, "learning_rate": 0.0004974619289340102, "loss": 0.8541, "step": 1960 }, { "epoch": 0.09975758652637992, "grad_norm": 0.13128051961450932, "learning_rate": 0.0004987309644670051, "loss": 0.83, "step": 1965 }, { "epoch": 0.10001142262441141, "grad_norm": 0.09937866056904622, "learning_rate": 0.0005, "loss": 0.8446, "step": 1970 }, { "epoch": 0.10026525872244292, "grad_norm": 0.06884526018518473, "learning_rate": 0.000501269035532995, "loss": 0.8521, "step": 1975 }, { "epoch": 0.10051909482047441, "grad_norm": 0.11491994183310837, "learning_rate": 0.0005025380710659899, "loss": 0.8488, "step": 1980 }, { "epoch": 0.10077293091850592, "grad_norm": 0.06754150203226321, "learning_rate": 0.0005038071065989847, "loss": 0.8444, "step": 1985 }, { "epoch": 0.10102676701653743, "grad_norm": 0.07046092339889716, "learning_rate": 0.0005050761421319797, "loss": 0.8382, "step": 1990 }, { "epoch": 0.10128060311456892, "grad_norm": 0.06594835595271006, "learning_rate": 0.0005063451776649747, "loss": 0.8187, "step": 1995 }, { "epoch": 0.10153443921260043, "grad_norm": 0.06666876721282243, "learning_rate": 0.0005076142131979696, "loss": 0.874, "step": 2000 }, { "epoch": 0.10178827531063192, "grad_norm": 0.0657701244951683, "learning_rate": 0.0005088832487309644, "loss": 0.8519, "step": 2005 }, { "epoch": 0.10204211140866343, "grad_norm": 0.07274392757140892, "learning_rate": 0.0005101522842639594, "loss": 0.8517, "step": 2010 }, { "epoch": 0.10229594750669492, "grad_norm": 0.08239234127563405, "learning_rate": 0.0005114213197969543, "loss": 0.8393, "step": 2015 }, { "epoch": 0.10254978360472643, "grad_norm": 0.06704292783392311, "learning_rate": 0.0005126903553299493, "loss": 0.8675, "step": 2020 }, { "epoch": 0.10280361970275793, "grad_norm": 0.06541769795482981, "learning_rate": 0.0005139593908629441, "loss": 0.8696, "step": 2025 }, { "epoch": 0.10305745580078943, "grad_norm": 0.07134209980553383, "learning_rate": 0.0005152284263959391, "loss": 0.8576, "step": 2030 }, { "epoch": 0.10331129189882093, "grad_norm": 0.07645514278654998, "learning_rate": 0.000516497461928934, "loss": 0.8843, "step": 2035 }, { "epoch": 0.10356512799685243, "grad_norm": 0.07839958164629823, "learning_rate": 0.000517766497461929, "loss": 0.8396, "step": 2040 }, { "epoch": 0.10381896409488393, "grad_norm": 0.07275729117673069, "learning_rate": 0.0005190355329949239, "loss": 0.8318, "step": 2045 }, { "epoch": 0.10407280019291544, "grad_norm": 0.06695426940236447, "learning_rate": 0.0005203045685279187, "loss": 0.8863, "step": 2050 }, { "epoch": 0.10432663629094693, "grad_norm": 0.06380910467977541, "learning_rate": 0.0005215736040609137, "loss": 0.8158, "step": 2055 }, { "epoch": 0.10458047238897844, "grad_norm": 0.05987513967906045, "learning_rate": 0.0005228426395939087, "loss": 0.8373, "step": 2060 }, { "epoch": 0.10483430848700993, "grad_norm": 0.06904198468266956, "learning_rate": 0.0005241116751269036, "loss": 0.899, "step": 2065 }, { "epoch": 0.10508814458504144, "grad_norm": 0.07361279967785084, "learning_rate": 0.0005253807106598984, "loss": 0.8443, "step": 2070 }, { "epoch": 0.10534198068307293, "grad_norm": 0.13208821168204252, "learning_rate": 0.0005266497461928934, "loss": 0.8643, "step": 2075 }, { "epoch": 0.10559581678110444, "grad_norm": 0.0950182480674098, "learning_rate": 0.0005279187817258884, "loss": 0.8445, "step": 2080 }, { "epoch": 0.10584965287913595, "grad_norm": 0.06902725124731286, "learning_rate": 0.0005291878172588833, "loss": 0.8383, "step": 2085 }, { "epoch": 0.10610348897716744, "grad_norm": 0.07017113595077061, "learning_rate": 0.0005304568527918781, "loss": 0.9053, "step": 2090 }, { "epoch": 0.10635732507519895, "grad_norm": 0.0831524352166824, "learning_rate": 0.0005317258883248731, "loss": 0.8568, "step": 2095 }, { "epoch": 0.10661116117323044, "grad_norm": 0.0795491957704742, "learning_rate": 0.0005329949238578681, "loss": 0.834, "step": 2100 }, { "epoch": 0.10686499727126195, "grad_norm": 0.08394747717767122, "learning_rate": 0.000534263959390863, "loss": 0.8649, "step": 2105 }, { "epoch": 0.10711883336929345, "grad_norm": 0.07979016852479999, "learning_rate": 0.0005355329949238578, "loss": 0.8663, "step": 2110 }, { "epoch": 0.10737266946732495, "grad_norm": 0.08708619290331072, "learning_rate": 0.0005368020304568528, "loss": 0.8644, "step": 2115 }, { "epoch": 0.10762650556535645, "grad_norm": 0.07425127689040566, "learning_rate": 0.0005380710659898477, "loss": 0.8371, "step": 2120 }, { "epoch": 0.10788034166338795, "grad_norm": 0.07359510078606354, "learning_rate": 0.0005393401015228427, "loss": 0.8511, "step": 2125 }, { "epoch": 0.10813417776141945, "grad_norm": 0.1367496735230648, "learning_rate": 0.0005406091370558375, "loss": 0.8228, "step": 2130 }, { "epoch": 0.10838801385945095, "grad_norm": 0.06661242616150219, "learning_rate": 0.0005418781725888325, "loss": 0.8301, "step": 2135 }, { "epoch": 0.10864184995748245, "grad_norm": 0.070511143666762, "learning_rate": 0.0005431472081218274, "loss": 0.8418, "step": 2140 }, { "epoch": 0.10889568605551396, "grad_norm": 0.06824118647151758, "learning_rate": 0.0005444162436548224, "loss": 0.8174, "step": 2145 }, { "epoch": 0.10914952215354545, "grad_norm": 0.07927604024277334, "learning_rate": 0.0005456852791878173, "loss": 0.8312, "step": 2150 }, { "epoch": 0.10940335825157696, "grad_norm": 0.0790940687186082, "learning_rate": 0.0005469543147208121, "loss": 0.848, "step": 2155 }, { "epoch": 0.10965719434960845, "grad_norm": 0.06805742628026772, "learning_rate": 0.0005482233502538071, "loss": 0.8872, "step": 2160 }, { "epoch": 0.10991103044763996, "grad_norm": 0.07899081050079654, "learning_rate": 0.0005494923857868021, "loss": 0.8688, "step": 2165 }, { "epoch": 0.11016486654567147, "grad_norm": 0.07109865377855785, "learning_rate": 0.000550761421319797, "loss": 0.8465, "step": 2170 }, { "epoch": 0.11041870264370296, "grad_norm": 0.06818032617876316, "learning_rate": 0.0005520304568527918, "loss": 0.8317, "step": 2175 }, { "epoch": 0.11067253874173447, "grad_norm": 0.0688535834137314, "learning_rate": 0.0005532994923857868, "loss": 0.8287, "step": 2180 }, { "epoch": 0.11092637483976596, "grad_norm": 0.0674631679936993, "learning_rate": 0.0005545685279187818, "loss": 0.826, "step": 2185 }, { "epoch": 0.11118021093779747, "grad_norm": 0.06895591747231831, "learning_rate": 0.0005558375634517766, "loss": 0.8626, "step": 2190 }, { "epoch": 0.11143404703582896, "grad_norm": 0.07348950593127732, "learning_rate": 0.0005571065989847715, "loss": 0.8356, "step": 2195 }, { "epoch": 0.11168788313386047, "grad_norm": 0.06572200012755866, "learning_rate": 0.0005583756345177665, "loss": 0.8402, "step": 2200 }, { "epoch": 0.11194171923189197, "grad_norm": 0.07424978847138179, "learning_rate": 0.0005596446700507615, "loss": 0.8493, "step": 2205 }, { "epoch": 0.11219555532992347, "grad_norm": 0.06259214676314034, "learning_rate": 0.0005609137055837564, "loss": 0.8023, "step": 2210 }, { "epoch": 0.11244939142795497, "grad_norm": 0.09042211363509442, "learning_rate": 0.0005621827411167512, "loss": 0.8026, "step": 2215 }, { "epoch": 0.11270322752598647, "grad_norm": 0.07524964613180254, "learning_rate": 0.0005634517766497462, "loss": 0.8648, "step": 2220 }, { "epoch": 0.11295706362401797, "grad_norm": 0.0662901819189373, "learning_rate": 0.0005647208121827412, "loss": 0.845, "step": 2225 }, { "epoch": 0.11321089972204948, "grad_norm": 0.0689021233258092, "learning_rate": 0.0005659898477157361, "loss": 0.8512, "step": 2230 }, { "epoch": 0.11346473582008097, "grad_norm": 0.06176450091756391, "learning_rate": 0.0005672588832487309, "loss": 0.8321, "step": 2235 }, { "epoch": 0.11371857191811248, "grad_norm": 0.06767126269481663, "learning_rate": 0.0005685279187817259, "loss": 0.8285, "step": 2240 }, { "epoch": 0.11397240801614397, "grad_norm": 0.0687021600228753, "learning_rate": 0.0005697969543147208, "loss": 0.8045, "step": 2245 }, { "epoch": 0.11422624411417548, "grad_norm": 0.10921204499862404, "learning_rate": 0.0005710659898477158, "loss": 0.8121, "step": 2250 }, { "epoch": 0.11448008021220697, "grad_norm": 0.070108474156699, "learning_rate": 0.0005723350253807107, "loss": 0.8347, "step": 2255 }, { "epoch": 0.11473391631023848, "grad_norm": 0.06505258157561829, "learning_rate": 0.0005736040609137056, "loss": 0.8124, "step": 2260 }, { "epoch": 0.11498775240826999, "grad_norm": 0.06689236468610028, "learning_rate": 0.0005748730964467005, "loss": 0.8877, "step": 2265 }, { "epoch": 0.11524158850630148, "grad_norm": 0.06340626465858151, "learning_rate": 0.0005761421319796955, "loss": 0.855, "step": 2270 }, { "epoch": 0.11549542460433299, "grad_norm": 0.07397159417321542, "learning_rate": 0.0005774111675126904, "loss": 0.8505, "step": 2275 }, { "epoch": 0.11574926070236448, "grad_norm": 0.07032017119834777, "learning_rate": 0.0005786802030456852, "loss": 0.8187, "step": 2280 }, { "epoch": 0.11600309680039599, "grad_norm": 0.4351367338843396, "learning_rate": 0.0005799492385786802, "loss": 0.8546, "step": 2285 }, { "epoch": 0.11625693289842748, "grad_norm": 0.06897891157439649, "learning_rate": 0.0005812182741116752, "loss": 0.8012, "step": 2290 }, { "epoch": 0.11651076899645899, "grad_norm": 0.07195073472522845, "learning_rate": 0.0005824873096446702, "loss": 0.8431, "step": 2295 }, { "epoch": 0.11676460509449049, "grad_norm": 0.06836795957859078, "learning_rate": 0.0005837563451776649, "loss": 0.8259, "step": 2300 }, { "epoch": 0.11701844119252199, "grad_norm": 0.06674593711060259, "learning_rate": 0.0005850253807106599, "loss": 0.871, "step": 2305 }, { "epoch": 0.11727227729055349, "grad_norm": 0.07779919215251709, "learning_rate": 0.0005862944162436549, "loss": 0.818, "step": 2310 }, { "epoch": 0.11752611338858499, "grad_norm": 0.08460933927193902, "learning_rate": 0.0005875634517766498, "loss": 0.8403, "step": 2315 }, { "epoch": 0.11777994948661649, "grad_norm": 0.06846534074634021, "learning_rate": 0.0005888324873096446, "loss": 0.8229, "step": 2320 }, { "epoch": 0.118033785584648, "grad_norm": 0.07568888816619188, "learning_rate": 0.0005901015228426396, "loss": 0.8473, "step": 2325 }, { "epoch": 0.11828762168267949, "grad_norm": 0.07631610125544379, "learning_rate": 0.0005913705583756346, "loss": 0.8246, "step": 2330 }, { "epoch": 0.118541457780711, "grad_norm": 0.06459724431072238, "learning_rate": 0.0005926395939086295, "loss": 0.8463, "step": 2335 }, { "epoch": 0.11879529387874249, "grad_norm": 0.06665515833850005, "learning_rate": 0.0005939086294416243, "loss": 0.8434, "step": 2340 }, { "epoch": 0.119049129976774, "grad_norm": 0.06978954325798213, "learning_rate": 0.0005951776649746193, "loss": 0.8123, "step": 2345 }, { "epoch": 0.11930296607480549, "grad_norm": 0.07651462388225089, "learning_rate": 0.0005964467005076142, "loss": 0.8281, "step": 2350 }, { "epoch": 0.119556802172837, "grad_norm": 0.06350257177948194, "learning_rate": 0.0005977157360406092, "loss": 0.8395, "step": 2355 }, { "epoch": 0.1198106382708685, "grad_norm": 0.06598209146240407, "learning_rate": 0.000598984771573604, "loss": 0.8482, "step": 2360 }, { "epoch": 0.1200644743689, "grad_norm": 0.05947149153441356, "learning_rate": 0.000600253807106599, "loss": 0.8833, "step": 2365 }, { "epoch": 0.1203183104669315, "grad_norm": 0.06935766572738346, "learning_rate": 0.0006015228426395939, "loss": 0.8684, "step": 2370 }, { "epoch": 0.120572146564963, "grad_norm": 0.0677888459681303, "learning_rate": 0.0006027918781725889, "loss": 0.8398, "step": 2375 }, { "epoch": 0.1208259826629945, "grad_norm": 0.08869062429496338, "learning_rate": 0.0006040609137055838, "loss": 0.8487, "step": 2380 }, { "epoch": 0.12107981876102601, "grad_norm": 0.12012934371285497, "learning_rate": 0.0006053299492385786, "loss": 0.8238, "step": 2385 }, { "epoch": 0.1213336548590575, "grad_norm": 0.09183056822323755, "learning_rate": 0.0006065989847715736, "loss": 0.8258, "step": 2390 }, { "epoch": 0.12158749095708901, "grad_norm": 0.07657225870023741, "learning_rate": 0.0006078680203045686, "loss": 0.8204, "step": 2395 }, { "epoch": 0.1218413270551205, "grad_norm": 0.08875859089257833, "learning_rate": 0.0006091370558375635, "loss": 0.8565, "step": 2400 }, { "epoch": 0.12209516315315201, "grad_norm": 0.06870216708947767, "learning_rate": 0.0006104060913705583, "loss": 0.8234, "step": 2405 }, { "epoch": 0.1223489992511835, "grad_norm": 0.0707295164341986, "learning_rate": 0.0006116751269035533, "loss": 0.8434, "step": 2410 }, { "epoch": 0.12260283534921501, "grad_norm": 0.07381776924964528, "learning_rate": 0.0006129441624365483, "loss": 0.835, "step": 2415 }, { "epoch": 0.12285667144724652, "grad_norm": 0.0846497867271569, "learning_rate": 0.0006142131979695432, "loss": 0.8238, "step": 2420 }, { "epoch": 0.12311050754527801, "grad_norm": 0.07954963177809368, "learning_rate": 0.000615482233502538, "loss": 0.8221, "step": 2425 }, { "epoch": 0.12336434364330952, "grad_norm": 0.07615706087653497, "learning_rate": 0.000616751269035533, "loss": 0.829, "step": 2430 }, { "epoch": 0.12361817974134101, "grad_norm": 0.07816694217625653, "learning_rate": 0.000618020304568528, "loss": 0.8427, "step": 2435 }, { "epoch": 0.12387201583937252, "grad_norm": 0.07985187600453396, "learning_rate": 0.0006192893401015229, "loss": 0.8359, "step": 2440 }, { "epoch": 0.12412585193740402, "grad_norm": 0.10037522707451767, "learning_rate": 0.0006205583756345177, "loss": 0.8097, "step": 2445 }, { "epoch": 0.12437968803543552, "grad_norm": 0.07403913287649622, "learning_rate": 0.0006218274111675127, "loss": 0.8371, "step": 2450 }, { "epoch": 0.12463352413346702, "grad_norm": 0.07018699360030302, "learning_rate": 0.0006230964467005076, "loss": 0.8592, "step": 2455 }, { "epoch": 0.12488736023149852, "grad_norm": 0.06521875900532362, "learning_rate": 0.0006243654822335026, "loss": 0.7903, "step": 2460 }, { "epoch": 0.12514119632953002, "grad_norm": 0.06481818670156049, "learning_rate": 0.0006256345177664974, "loss": 0.837, "step": 2465 }, { "epoch": 0.12539503242756153, "grad_norm": 0.0717331412129577, "learning_rate": 0.0006269035532994924, "loss": 0.8312, "step": 2470 }, { "epoch": 0.12564886852559304, "grad_norm": 0.06075276742781533, "learning_rate": 0.0006281725888324873, "loss": 0.8173, "step": 2475 }, { "epoch": 0.12590270462362452, "grad_norm": 0.06910542884564103, "learning_rate": 0.0006294416243654823, "loss": 0.8277, "step": 2480 }, { "epoch": 0.12615654072165602, "grad_norm": 0.09602066908136982, "learning_rate": 0.0006307106598984772, "loss": 0.82, "step": 2485 }, { "epoch": 0.12641037681968753, "grad_norm": 0.07437855231303801, "learning_rate": 0.000631979695431472, "loss": 0.7939, "step": 2490 }, { "epoch": 0.12666421291771904, "grad_norm": 0.06874922897981554, "learning_rate": 0.000633248730964467, "loss": 0.8544, "step": 2495 }, { "epoch": 0.12691804901575052, "grad_norm": 0.07170017132935298, "learning_rate": 0.000634517766497462, "loss": 0.8092, "step": 2500 }, { "epoch": 0.12717188511378202, "grad_norm": 0.061561215538905575, "learning_rate": 0.0006357868020304569, "loss": 0.7932, "step": 2505 }, { "epoch": 0.12742572121181353, "grad_norm": 0.07393419995625253, "learning_rate": 0.0006370558375634517, "loss": 0.873, "step": 2510 }, { "epoch": 0.12767955730984504, "grad_norm": 0.07928390759573106, "learning_rate": 0.0006383248730964467, "loss": 0.8254, "step": 2515 }, { "epoch": 0.12793339340787654, "grad_norm": 0.0682331632568844, "learning_rate": 0.0006395939086294417, "loss": 0.7931, "step": 2520 }, { "epoch": 0.12818722950590802, "grad_norm": 0.07272623879554883, "learning_rate": 0.0006408629441624366, "loss": 0.7787, "step": 2525 }, { "epoch": 0.12844106560393953, "grad_norm": 0.07181436789790091, "learning_rate": 0.0006421319796954314, "loss": 0.8419, "step": 2530 }, { "epoch": 0.12869490170197104, "grad_norm": 0.06190481412742372, "learning_rate": 0.0006434010152284264, "loss": 0.8511, "step": 2535 }, { "epoch": 0.12894873780000254, "grad_norm": 0.07434050289375677, "learning_rate": 0.0006446700507614214, "loss": 0.8382, "step": 2540 }, { "epoch": 0.12920257389803405, "grad_norm": 0.06824780908649483, "learning_rate": 0.0006459390862944163, "loss": 0.8155, "step": 2545 }, { "epoch": 0.12945640999606553, "grad_norm": 0.07747861487316851, "learning_rate": 0.0006472081218274111, "loss": 0.8366, "step": 2550 }, { "epoch": 0.12971024609409704, "grad_norm": 0.07059487631665852, "learning_rate": 0.0006484771573604061, "loss": 0.846, "step": 2555 }, { "epoch": 0.12996408219212854, "grad_norm": 0.06859167360133805, "learning_rate": 0.000649746192893401, "loss": 0.8206, "step": 2560 }, { "epoch": 0.13021791829016005, "grad_norm": 0.06447685181747545, "learning_rate": 0.000651015228426396, "loss": 0.8962, "step": 2565 }, { "epoch": 0.13047175438819156, "grad_norm": 0.06662815064525365, "learning_rate": 0.0006522842639593908, "loss": 0.8522, "step": 2570 }, { "epoch": 0.13072559048622304, "grad_norm": 0.06754941254240418, "learning_rate": 0.0006535532994923858, "loss": 0.8151, "step": 2575 }, { "epoch": 0.13097942658425454, "grad_norm": 0.06342964865693185, "learning_rate": 0.0006548223350253807, "loss": 0.8425, "step": 2580 }, { "epoch": 0.13123326268228605, "grad_norm": 0.07083933929792709, "learning_rate": 0.0006560913705583757, "loss": 0.8056, "step": 2585 }, { "epoch": 0.13148709878031756, "grad_norm": 0.08066368358301058, "learning_rate": 0.0006573604060913706, "loss": 0.8408, "step": 2590 }, { "epoch": 0.13174093487834904, "grad_norm": 0.06537667178878147, "learning_rate": 0.0006586294416243654, "loss": 0.8028, "step": 2595 }, { "epoch": 0.13199477097638054, "grad_norm": 0.06600449331799242, "learning_rate": 0.0006598984771573604, "loss": 0.8581, "step": 2600 }, { "epoch": 0.13224860707441205, "grad_norm": 0.06992187348735852, "learning_rate": 0.0006611675126903554, "loss": 0.8073, "step": 2605 }, { "epoch": 0.13250244317244356, "grad_norm": 0.6280042590289823, "learning_rate": 0.0006624365482233503, "loss": 0.8283, "step": 2610 }, { "epoch": 0.13275627927047506, "grad_norm": 0.07626111454042304, "learning_rate": 0.0006637055837563451, "loss": 0.8287, "step": 2615 }, { "epoch": 0.13301011536850654, "grad_norm": 0.07065741892048714, "learning_rate": 0.0006649746192893401, "loss": 0.8116, "step": 2620 }, { "epoch": 0.13326395146653805, "grad_norm": 0.07332759534548636, "learning_rate": 0.0006662436548223351, "loss": 0.8345, "step": 2625 }, { "epoch": 0.13351778756456956, "grad_norm": 0.07164116478569996, "learning_rate": 0.0006675126903553299, "loss": 0.8304, "step": 2630 }, { "epoch": 0.13377162366260106, "grad_norm": 0.25139420765606685, "learning_rate": 0.0006687817258883248, "loss": 0.856, "step": 2635 }, { "epoch": 0.13402545976063257, "grad_norm": 0.10133393996070832, "learning_rate": 0.0006700507614213198, "loss": 0.8389, "step": 2640 }, { "epoch": 0.13427929585866405, "grad_norm": 0.08101056535343804, "learning_rate": 0.0006713197969543148, "loss": 0.8564, "step": 2645 }, { "epoch": 0.13453313195669556, "grad_norm": 0.09460574494123876, "learning_rate": 0.0006725888324873097, "loss": 0.8332, "step": 2650 }, { "epoch": 0.13478696805472706, "grad_norm": 0.06756827943270828, "learning_rate": 0.0006738578680203045, "loss": 0.8619, "step": 2655 }, { "epoch": 0.13504080415275857, "grad_norm": 0.0681568067845147, "learning_rate": 0.0006751269035532995, "loss": 0.8684, "step": 2660 }, { "epoch": 0.13529464025079008, "grad_norm": 0.0654671192532249, "learning_rate": 0.0006763959390862944, "loss": 0.8296, "step": 2665 }, { "epoch": 0.13554847634882156, "grad_norm": 0.10294586620689947, "learning_rate": 0.0006776649746192894, "loss": 0.849, "step": 2670 }, { "epoch": 0.13580231244685306, "grad_norm": 0.063207674214321, "learning_rate": 0.0006789340101522842, "loss": 0.8397, "step": 2675 }, { "epoch": 0.13605614854488457, "grad_norm": 0.06363693083610134, "learning_rate": 0.0006802030456852792, "loss": 0.8227, "step": 2680 }, { "epoch": 0.13630998464291608, "grad_norm": 0.06282667025656029, "learning_rate": 0.0006814720812182741, "loss": 0.8021, "step": 2685 }, { "epoch": 0.13656382074094758, "grad_norm": 0.06451957218809772, "learning_rate": 0.0006827411167512691, "loss": 0.8189, "step": 2690 }, { "epoch": 0.13681765683897906, "grad_norm": 0.06473961198490788, "learning_rate": 0.000684010152284264, "loss": 0.8463, "step": 2695 }, { "epoch": 0.13707149293701057, "grad_norm": 0.11265424839609507, "learning_rate": 0.0006852791878172588, "loss": 0.8718, "step": 2700 }, { "epoch": 0.13732532903504208, "grad_norm": 0.07159882729463306, "learning_rate": 0.0006865482233502538, "loss": 0.8614, "step": 2705 }, { "epoch": 0.13757916513307358, "grad_norm": 0.08990723841608071, "learning_rate": 0.0006878172588832488, "loss": 0.8377, "step": 2710 }, { "epoch": 0.13783300123110506, "grad_norm": 0.07586784501561175, "learning_rate": 0.0006890862944162437, "loss": 0.8029, "step": 2715 }, { "epoch": 0.13808683732913657, "grad_norm": 0.06941717775875096, "learning_rate": 0.0006903553299492385, "loss": 0.8579, "step": 2720 }, { "epoch": 0.13834067342716808, "grad_norm": 0.07525718408698982, "learning_rate": 0.0006916243654822335, "loss": 0.8056, "step": 2725 }, { "epoch": 0.13859450952519958, "grad_norm": 0.0813539832802519, "learning_rate": 0.0006928934010152285, "loss": 0.8223, "step": 2730 }, { "epoch": 0.1388483456232311, "grad_norm": 0.09504633124255094, "learning_rate": 0.0006941624365482235, "loss": 0.8036, "step": 2735 }, { "epoch": 0.13910218172126257, "grad_norm": 0.0686083730597281, "learning_rate": 0.0006954314720812182, "loss": 0.8635, "step": 2740 }, { "epoch": 0.13935601781929408, "grad_norm": 0.07532354720176097, "learning_rate": 0.0006967005076142132, "loss": 0.8885, "step": 2745 }, { "epoch": 0.13960985391732558, "grad_norm": 0.061919035281015104, "learning_rate": 0.0006979695431472082, "loss": 0.8384, "step": 2750 }, { "epoch": 0.1398636900153571, "grad_norm": 0.06473754118346722, "learning_rate": 0.0006992385786802031, "loss": 0.8464, "step": 2755 }, { "epoch": 0.1401175261133886, "grad_norm": 0.07035422358526826, "learning_rate": 0.0007005076142131979, "loss": 0.8633, "step": 2760 }, { "epoch": 0.14037136221142008, "grad_norm": 0.07107599161205821, "learning_rate": 0.0007017766497461929, "loss": 0.806, "step": 2765 }, { "epoch": 0.14062519830945158, "grad_norm": 0.07453254673469827, "learning_rate": 0.0007030456852791879, "loss": 0.7992, "step": 2770 }, { "epoch": 0.1408790344074831, "grad_norm": 0.0648465339476217, "learning_rate": 0.0007043147208121828, "loss": 0.8229, "step": 2775 }, { "epoch": 0.1411328705055146, "grad_norm": 0.06469175236502007, "learning_rate": 0.0007055837563451776, "loss": 0.8479, "step": 2780 }, { "epoch": 0.1413867066035461, "grad_norm": 0.06338359983700383, "learning_rate": 0.0007068527918781726, "loss": 0.8302, "step": 2785 }, { "epoch": 0.14164054270157758, "grad_norm": 0.06292782593662385, "learning_rate": 0.0007081218274111675, "loss": 0.8499, "step": 2790 }, { "epoch": 0.1418943787996091, "grad_norm": 0.06396984976110688, "learning_rate": 0.0007093908629441625, "loss": 0.8096, "step": 2795 }, { "epoch": 0.1421482148976406, "grad_norm": 0.08587959295629471, "learning_rate": 0.0007106598984771574, "loss": 0.8863, "step": 2800 }, { "epoch": 0.1424020509956721, "grad_norm": 0.0971500687509984, "learning_rate": 0.0007119289340101523, "loss": 0.8672, "step": 2805 }, { "epoch": 0.1426558870937036, "grad_norm": 0.08325850122779803, "learning_rate": 0.0007131979695431472, "loss": 0.86, "step": 2810 }, { "epoch": 0.1429097231917351, "grad_norm": 0.08457831955631115, "learning_rate": 0.0007144670050761422, "loss": 0.829, "step": 2815 }, { "epoch": 0.1431635592897666, "grad_norm": 0.3736240807558453, "learning_rate": 0.0007157360406091371, "loss": 0.8164, "step": 2820 }, { "epoch": 0.1434173953877981, "grad_norm": 0.06479040852202575, "learning_rate": 0.0007170050761421319, "loss": 0.8335, "step": 2825 }, { "epoch": 0.1436712314858296, "grad_norm": 0.06995534388978611, "learning_rate": 0.0007182741116751269, "loss": 0.8445, "step": 2830 }, { "epoch": 0.1439250675838611, "grad_norm": 0.08617492122713025, "learning_rate": 0.0007195431472081219, "loss": 0.96, "step": 2835 }, { "epoch": 0.1441789036818926, "grad_norm": 0.0862460216878859, "learning_rate": 0.0007208121827411168, "loss": 0.859, "step": 2840 }, { "epoch": 0.1444327397799241, "grad_norm": 0.07063432955302881, "learning_rate": 0.0007220812182741116, "loss": 0.891, "step": 2845 }, { "epoch": 0.1446865758779556, "grad_norm": 0.06445300427885335, "learning_rate": 0.0007233502538071066, "loss": 0.8148, "step": 2850 }, { "epoch": 0.14494041197598712, "grad_norm": 0.07119353945246298, "learning_rate": 0.0007246192893401016, "loss": 0.8522, "step": 2855 }, { "epoch": 0.1451942480740186, "grad_norm": 0.1036399712800794, "learning_rate": 0.0007258883248730965, "loss": 0.8211, "step": 2860 }, { "epoch": 0.1454480841720501, "grad_norm": 0.07386290231202053, "learning_rate": 0.0007271573604060913, "loss": 0.8422, "step": 2865 }, { "epoch": 0.1457019202700816, "grad_norm": 0.06840914328088271, "learning_rate": 0.0007284263959390863, "loss": 0.8191, "step": 2870 }, { "epoch": 0.14595575636811312, "grad_norm": 0.06152812446711413, "learning_rate": 0.0007296954314720813, "loss": 0.8582, "step": 2875 }, { "epoch": 0.14620959246614462, "grad_norm": 0.07617723648912415, "learning_rate": 0.0007309644670050762, "loss": 0.8342, "step": 2880 }, { "epoch": 0.1464634285641761, "grad_norm": 0.06830508316285046, "learning_rate": 0.000732233502538071, "loss": 0.8455, "step": 2885 }, { "epoch": 0.1467172646622076, "grad_norm": 0.07109533839063771, "learning_rate": 0.000733502538071066, "loss": 0.8371, "step": 2890 }, { "epoch": 0.14697110076023912, "grad_norm": 0.0633870522739658, "learning_rate": 0.0007347715736040609, "loss": 0.8295, "step": 2895 }, { "epoch": 0.14722493685827062, "grad_norm": 0.08016248717756944, "learning_rate": 0.0007360406091370559, "loss": 0.8353, "step": 2900 }, { "epoch": 0.14747877295630213, "grad_norm": 0.0690716992050178, "learning_rate": 0.0007373096446700508, "loss": 0.8176, "step": 2905 }, { "epoch": 0.1477326090543336, "grad_norm": 0.06780002353268787, "learning_rate": 0.0007385786802030457, "loss": 0.8163, "step": 2910 }, { "epoch": 0.14798644515236511, "grad_norm": 0.09537836147090958, "learning_rate": 0.0007398477157360406, "loss": 0.8185, "step": 2915 }, { "epoch": 0.14824028125039662, "grad_norm": 0.06259652417653161, "learning_rate": 0.0007411167512690356, "loss": 0.8103, "step": 2920 }, { "epoch": 0.14849411734842813, "grad_norm": 0.06437078353737871, "learning_rate": 0.0007423857868020305, "loss": 0.8113, "step": 2925 }, { "epoch": 0.1487479534464596, "grad_norm": 0.07112978235266368, "learning_rate": 0.0007436548223350253, "loss": 0.8456, "step": 2930 }, { "epoch": 0.14900178954449111, "grad_norm": 0.06516166956595945, "learning_rate": 0.0007449238578680203, "loss": 0.8484, "step": 2935 }, { "epoch": 0.14925562564252262, "grad_norm": 0.07121033941722137, "learning_rate": 0.0007461928934010153, "loss": 0.8118, "step": 2940 }, { "epoch": 0.14950946174055413, "grad_norm": 0.07365736789905115, "learning_rate": 0.0007474619289340102, "loss": 0.827, "step": 2945 }, { "epoch": 0.14976329783858564, "grad_norm": 0.05978590772936408, "learning_rate": 0.000748730964467005, "loss": 0.8236, "step": 2950 }, { "epoch": 0.15001713393661711, "grad_norm": 0.07299760032114484, "learning_rate": 0.00075, "loss": 0.8277, "step": 2955 }, { "epoch": 0.15027097003464862, "grad_norm": 0.06750804208350374, "learning_rate": 0.000751269035532995, "loss": 0.8525, "step": 2960 }, { "epoch": 0.15052480613268013, "grad_norm": 0.06625521121984493, "learning_rate": 0.0007525380710659899, "loss": 0.8694, "step": 2965 }, { "epoch": 0.15077864223071163, "grad_norm": 0.0630451000193105, "learning_rate": 0.0007538071065989847, "loss": 0.8685, "step": 2970 }, { "epoch": 0.15103247832874314, "grad_norm": 0.05851726940971779, "learning_rate": 0.0007550761421319797, "loss": 0.8069, "step": 2975 }, { "epoch": 0.15128631442677462, "grad_norm": 0.06834129589197491, "learning_rate": 0.0007563451776649747, "loss": 0.8756, "step": 2980 }, { "epoch": 0.15154015052480613, "grad_norm": 0.08143215094646693, "learning_rate": 0.0007576142131979696, "loss": 0.8615, "step": 2985 }, { "epoch": 0.15179398662283763, "grad_norm": 0.06318525141851603, "learning_rate": 0.0007588832487309644, "loss": 0.8647, "step": 2990 }, { "epoch": 0.15204782272086914, "grad_norm": 0.073459999733397, "learning_rate": 0.0007601522842639594, "loss": 0.8283, "step": 2995 }, { "epoch": 0.15230165881890065, "grad_norm": 0.056480010999302166, "learning_rate": 0.0007614213197969543, "loss": 0.8663, "step": 3000 }, { "epoch": 0.15255549491693213, "grad_norm": 0.07189641699298936, "learning_rate": 0.0007626903553299493, "loss": 0.8159, "step": 3005 }, { "epoch": 0.15280933101496363, "grad_norm": 0.06192810521293964, "learning_rate": 0.0007639593908629442, "loss": 0.834, "step": 3010 }, { "epoch": 0.15306316711299514, "grad_norm": 0.07201975435762609, "learning_rate": 0.0007652284263959391, "loss": 0.8271, "step": 3015 }, { "epoch": 0.15331700321102665, "grad_norm": 0.10917294835444948, "learning_rate": 0.000766497461928934, "loss": 0.8274, "step": 3020 }, { "epoch": 0.15357083930905815, "grad_norm": 0.09104900856919416, "learning_rate": 0.000767766497461929, "loss": 0.8467, "step": 3025 }, { "epoch": 0.15382467540708963, "grad_norm": 0.06968360209062774, "learning_rate": 0.0007690355329949239, "loss": 0.818, "step": 3030 }, { "epoch": 0.15407851150512114, "grad_norm": 0.07110838415793444, "learning_rate": 0.0007703045685279187, "loss": 0.8356, "step": 3035 }, { "epoch": 0.15433234760315265, "grad_norm": 0.060974273064899775, "learning_rate": 0.0007715736040609137, "loss": 0.8678, "step": 3040 }, { "epoch": 0.15458618370118415, "grad_norm": 0.062026465273006176, "learning_rate": 0.0007728426395939087, "loss": 0.862, "step": 3045 }, { "epoch": 0.15484001979921563, "grad_norm": 0.06377278875051696, "learning_rate": 0.0007741116751269036, "loss": 0.8215, "step": 3050 }, { "epoch": 0.15509385589724714, "grad_norm": 0.062421057088353764, "learning_rate": 0.0007753807106598984, "loss": 0.8051, "step": 3055 }, { "epoch": 0.15534769199527865, "grad_norm": 0.06286514955496539, "learning_rate": 0.0007766497461928934, "loss": 0.8165, "step": 3060 }, { "epoch": 0.15560152809331015, "grad_norm": 1.0116870940794527, "learning_rate": 0.0007779187817258884, "loss": 0.8045, "step": 3065 }, { "epoch": 0.15585536419134166, "grad_norm": 0.13898032503505145, "learning_rate": 0.0007791878172588833, "loss": 0.8078, "step": 3070 }, { "epoch": 0.15610920028937314, "grad_norm": 0.08530043259977693, "learning_rate": 0.0007804568527918781, "loss": 0.8559, "step": 3075 }, { "epoch": 0.15636303638740465, "grad_norm": 0.0834665449731907, "learning_rate": 0.0007817258883248731, "loss": 0.8762, "step": 3080 }, { "epoch": 0.15661687248543615, "grad_norm": 0.06143676408663517, "learning_rate": 0.0007829949238578681, "loss": 0.8297, "step": 3085 }, { "epoch": 0.15687070858346766, "grad_norm": 0.06938090756748962, "learning_rate": 0.000784263959390863, "loss": 0.8074, "step": 3090 }, { "epoch": 0.15712454468149917, "grad_norm": 0.06275908342841607, "learning_rate": 0.0007855329949238578, "loss": 0.8841, "step": 3095 }, { "epoch": 0.15737838077953065, "grad_norm": 0.05987360823103374, "learning_rate": 0.0007868020304568528, "loss": 0.8144, "step": 3100 }, { "epoch": 0.15763221687756215, "grad_norm": 0.23639314472820383, "learning_rate": 0.0007880710659898477, "loss": 0.87, "step": 3105 }, { "epoch": 0.15788605297559366, "grad_norm": 0.07609938425068064, "learning_rate": 0.0007893401015228427, "loss": 0.8317, "step": 3110 }, { "epoch": 0.15813988907362517, "grad_norm": 0.06460844561494195, "learning_rate": 0.0007906091370558376, "loss": 0.833, "step": 3115 }, { "epoch": 0.15839372517165667, "grad_norm": 0.07143255658475585, "learning_rate": 0.0007918781725888325, "loss": 0.8642, "step": 3120 }, { "epoch": 0.15864756126968815, "grad_norm": 0.06038494749511021, "learning_rate": 0.0007931472081218274, "loss": 0.8267, "step": 3125 }, { "epoch": 0.15890139736771966, "grad_norm": 0.0589933194311713, "learning_rate": 0.0007944162436548224, "loss": 0.8618, "step": 3130 }, { "epoch": 0.15915523346575117, "grad_norm": 0.0750439826415746, "learning_rate": 0.0007956852791878173, "loss": 0.7645, "step": 3135 }, { "epoch": 0.15940906956378267, "grad_norm": 0.06380484658961832, "learning_rate": 0.0007969543147208121, "loss": 0.8142, "step": 3140 }, { "epoch": 0.15966290566181415, "grad_norm": 0.07118689334114195, "learning_rate": 0.0007982233502538071, "loss": 0.8377, "step": 3145 }, { "epoch": 0.15991674175984566, "grad_norm": 0.06715728037704204, "learning_rate": 0.0007994923857868021, "loss": 0.8359, "step": 3150 }, { "epoch": 0.16017057785787717, "grad_norm": 0.06574570628853313, "learning_rate": 0.000800761421319797, "loss": 0.7984, "step": 3155 }, { "epoch": 0.16042441395590867, "grad_norm": 0.05672055814709804, "learning_rate": 0.0008020304568527918, "loss": 0.827, "step": 3160 }, { "epoch": 0.16067825005394018, "grad_norm": 0.06800324296509697, "learning_rate": 0.0008032994923857868, "loss": 0.8425, "step": 3165 }, { "epoch": 0.16093208615197166, "grad_norm": 0.05990596678493692, "learning_rate": 0.0008045685279187818, "loss": 0.8632, "step": 3170 }, { "epoch": 0.16118592225000317, "grad_norm": 0.054816477705645184, "learning_rate": 0.0008058375634517766, "loss": 0.8487, "step": 3175 }, { "epoch": 0.16143975834803467, "grad_norm": 0.05761443479140967, "learning_rate": 0.0008071065989847715, "loss": 0.8001, "step": 3180 }, { "epoch": 0.16169359444606618, "grad_norm": 0.06020905904879265, "learning_rate": 0.0008083756345177665, "loss": 0.8468, "step": 3185 }, { "epoch": 0.1619474305440977, "grad_norm": 0.06920728005625856, "learning_rate": 0.0008096446700507615, "loss": 0.8535, "step": 3190 }, { "epoch": 0.16220126664212917, "grad_norm": 0.09418683152305958, "learning_rate": 0.0008109137055837564, "loss": 0.8135, "step": 3195 }, { "epoch": 0.16245510274016067, "grad_norm": 1.4740166738000169, "learning_rate": 0.0008121827411167512, "loss": 0.856, "step": 3200 }, { "epoch": 0.16270893883819218, "grad_norm": 0.11568591356967266, "learning_rate": 0.0008134517766497462, "loss": 0.917, "step": 3205 }, { "epoch": 0.1629627749362237, "grad_norm": 0.09381989074111174, "learning_rate": 0.0008147208121827412, "loss": 0.8894, "step": 3210 }, { "epoch": 0.1632166110342552, "grad_norm": 0.15178450584403697, "learning_rate": 0.0008159898477157361, "loss": 0.9294, "step": 3215 }, { "epoch": 0.16347044713228667, "grad_norm": 0.08010540867716207, "learning_rate": 0.000817258883248731, "loss": 0.9055, "step": 3220 }, { "epoch": 0.16372428323031818, "grad_norm": 0.07726094020367896, "learning_rate": 0.0008185279187817259, "loss": 0.8115, "step": 3225 }, { "epoch": 0.1639781193283497, "grad_norm": 0.0744572188196858, "learning_rate": 0.0008197969543147208, "loss": 0.8357, "step": 3230 }, { "epoch": 0.1642319554263812, "grad_norm": 0.06912820826611779, "learning_rate": 0.0008210659898477158, "loss": 0.8522, "step": 3235 }, { "epoch": 0.1644857915244127, "grad_norm": 0.06152299983951946, "learning_rate": 0.0008223350253807107, "loss": 0.854, "step": 3240 }, { "epoch": 0.16473962762244418, "grad_norm": 0.06492061808881518, "learning_rate": 0.0008236040609137056, "loss": 0.8266, "step": 3245 }, { "epoch": 0.16499346372047569, "grad_norm": 0.06578875091664257, "learning_rate": 0.0008248730964467005, "loss": 0.8778, "step": 3250 }, { "epoch": 0.1652472998185072, "grad_norm": 0.06989485384876433, "learning_rate": 0.0008261421319796955, "loss": 0.8194, "step": 3255 }, { "epoch": 0.1655011359165387, "grad_norm": 0.060104050960036334, "learning_rate": 0.0008274111675126904, "loss": 0.8351, "step": 3260 }, { "epoch": 0.16575497201457018, "grad_norm": 0.21175148701050756, "learning_rate": 0.0008286802030456852, "loss": 0.8004, "step": 3265 }, { "epoch": 0.16600880811260169, "grad_norm": 0.07380919845435023, "learning_rate": 0.0008299492385786802, "loss": 0.809, "step": 3270 }, { "epoch": 0.1662626442106332, "grad_norm": 0.06730197618030688, "learning_rate": 0.0008312182741116752, "loss": 0.8388, "step": 3275 }, { "epoch": 0.1665164803086647, "grad_norm": 0.05565076466206833, "learning_rate": 0.0008324873096446702, "loss": 0.8379, "step": 3280 }, { "epoch": 0.1667703164066962, "grad_norm": 0.08244140854739943, "learning_rate": 0.0008337563451776649, "loss": 0.8432, "step": 3285 }, { "epoch": 0.16702415250472769, "grad_norm": 0.055577520450934156, "learning_rate": 0.0008350253807106599, "loss": 0.8635, "step": 3290 }, { "epoch": 0.1672779886027592, "grad_norm": 0.058091252590638694, "learning_rate": 0.0008362944162436549, "loss": 0.8197, "step": 3295 }, { "epoch": 0.1675318247007907, "grad_norm": 0.06054133750628262, "learning_rate": 0.0008375634517766498, "loss": 0.8498, "step": 3300 }, { "epoch": 0.1677856607988222, "grad_norm": 0.0682223953256468, "learning_rate": 0.0008388324873096446, "loss": 0.8459, "step": 3305 }, { "epoch": 0.1680394968968537, "grad_norm": 0.06412135649531706, "learning_rate": 0.0008401015228426396, "loss": 0.8039, "step": 3310 }, { "epoch": 0.1682933329948852, "grad_norm": 0.06465496710268755, "learning_rate": 0.0008413705583756346, "loss": 0.7855, "step": 3315 }, { "epoch": 0.1685471690929167, "grad_norm": 0.060290545605787976, "learning_rate": 0.0008426395939086295, "loss": 0.8396, "step": 3320 }, { "epoch": 0.1688010051909482, "grad_norm": 0.055084947870993696, "learning_rate": 0.0008439086294416243, "loss": 0.8472, "step": 3325 }, { "epoch": 0.1690548412889797, "grad_norm": 0.06040130584774053, "learning_rate": 0.0008451776649746193, "loss": 0.7638, "step": 3330 }, { "epoch": 0.16930867738701122, "grad_norm": 0.07689569615566774, "learning_rate": 0.0008464467005076142, "loss": 0.8287, "step": 3335 }, { "epoch": 0.1695625134850427, "grad_norm": 0.07248056241681557, "learning_rate": 0.0008477157360406092, "loss": 0.8461, "step": 3340 }, { "epoch": 0.1698163495830742, "grad_norm": 0.0737996581292565, "learning_rate": 0.0008489847715736041, "loss": 0.8456, "step": 3345 }, { "epoch": 0.1700701856811057, "grad_norm": 0.07286436277951984, "learning_rate": 0.000850253807106599, "loss": 0.8179, "step": 3350 }, { "epoch": 0.17032402177913722, "grad_norm": 0.07983958827566592, "learning_rate": 0.0008515228426395939, "loss": 0.8729, "step": 3355 }, { "epoch": 0.17057785787716873, "grad_norm": 0.07329136787602354, "learning_rate": 0.0008527918781725889, "loss": 0.8225, "step": 3360 }, { "epoch": 0.1708316939752002, "grad_norm": 0.059896966113481775, "learning_rate": 0.0008540609137055838, "loss": 0.833, "step": 3365 }, { "epoch": 0.1710855300732317, "grad_norm": 0.06815931331410045, "learning_rate": 0.0008553299492385786, "loss": 0.857, "step": 3370 }, { "epoch": 0.17133936617126322, "grad_norm": 0.7248091790028831, "learning_rate": 0.0008565989847715736, "loss": 0.8457, "step": 3375 }, { "epoch": 0.17159320226929473, "grad_norm": 0.10537813583801574, "learning_rate": 0.0008578680203045686, "loss": 0.8366, "step": 3380 }, { "epoch": 0.1718470383673262, "grad_norm": 0.09124897368931231, "learning_rate": 0.0008591370558375635, "loss": 0.881, "step": 3385 }, { "epoch": 0.1721008744653577, "grad_norm": 0.08801106262549162, "learning_rate": 0.0008604060913705583, "loss": 0.8513, "step": 3390 }, { "epoch": 0.17235471056338922, "grad_norm": 0.08653757075474132, "learning_rate": 0.0008616751269035533, "loss": 0.8313, "step": 3395 }, { "epoch": 0.17260854666142073, "grad_norm": 0.06864338839014776, "learning_rate": 0.0008629441624365483, "loss": 0.8232, "step": 3400 }, { "epoch": 0.17286238275945223, "grad_norm": 0.0639604817726062, "learning_rate": 0.0008642131979695432, "loss": 0.8082, "step": 3405 }, { "epoch": 0.1731162188574837, "grad_norm": 0.057403934358076655, "learning_rate": 0.000865482233502538, "loss": 0.8913, "step": 3410 }, { "epoch": 0.17337005495551522, "grad_norm": 0.056808236218498474, "learning_rate": 0.000866751269035533, "loss": 0.8718, "step": 3415 }, { "epoch": 0.17362389105354673, "grad_norm": 1.9167869377430031, "learning_rate": 0.000868020304568528, "loss": 0.8748, "step": 3420 }, { "epoch": 0.17387772715157823, "grad_norm": 0.10126765080327339, "learning_rate": 0.0008692893401015229, "loss": 0.8568, "step": 3425 }, { "epoch": 0.17413156324960974, "grad_norm": 0.10283785650468462, "learning_rate": 0.0008705583756345177, "loss": 0.8966, "step": 3430 }, { "epoch": 0.17438539934764122, "grad_norm": 0.07626198710740706, "learning_rate": 0.0008718274111675127, "loss": 0.8363, "step": 3435 }, { "epoch": 0.17463923544567272, "grad_norm": 0.060911310170177854, "learning_rate": 0.0008730964467005076, "loss": 0.7989, "step": 3440 }, { "epoch": 0.17489307154370423, "grad_norm": 0.06414901038043923, "learning_rate": 0.0008743654822335026, "loss": 0.8538, "step": 3445 }, { "epoch": 0.17514690764173574, "grad_norm": 0.05969416178900671, "learning_rate": 0.0008756345177664975, "loss": 0.82, "step": 3450 }, { "epoch": 0.17540074373976725, "grad_norm": 0.11244714548975475, "learning_rate": 0.0008769035532994924, "loss": 0.863, "step": 3455 }, { "epoch": 0.17565457983779872, "grad_norm": 0.05756776724027005, "learning_rate": 0.0008781725888324873, "loss": 0.794, "step": 3460 }, { "epoch": 0.17590841593583023, "grad_norm": 0.06048123396410719, "learning_rate": 0.0008794416243654823, "loss": 0.8231, "step": 3465 }, { "epoch": 0.17616225203386174, "grad_norm": 0.06377135644176697, "learning_rate": 0.0008807106598984772, "loss": 0.8489, "step": 3470 }, { "epoch": 0.17641608813189325, "grad_norm": 0.05781888669489183, "learning_rate": 0.000881979695431472, "loss": 0.8111, "step": 3475 }, { "epoch": 0.17666992422992472, "grad_norm": 0.05910693024136481, "learning_rate": 0.000883248730964467, "loss": 0.8156, "step": 3480 }, { "epoch": 0.17692376032795623, "grad_norm": 0.05578517189410424, "learning_rate": 0.000884517766497462, "loss": 0.8057, "step": 3485 }, { "epoch": 0.17717759642598774, "grad_norm": 0.062148723780881106, "learning_rate": 0.0008857868020304569, "loss": 0.8263, "step": 3490 }, { "epoch": 0.17743143252401924, "grad_norm": 0.062417094944652425, "learning_rate": 0.0008870558375634517, "loss": 0.8807, "step": 3495 }, { "epoch": 0.17768526862205075, "grad_norm": 0.07354576392355382, "learning_rate": 0.0008883248730964467, "loss": 0.8356, "step": 3500 }, { "epoch": 0.17793910472008223, "grad_norm": 0.07695421052258823, "learning_rate": 0.0008895939086294417, "loss": 0.8714, "step": 3505 }, { "epoch": 0.17819294081811374, "grad_norm": 0.059611425080563246, "learning_rate": 0.0008908629441624366, "loss": 0.8105, "step": 3510 }, { "epoch": 0.17844677691614524, "grad_norm": 0.06450870820780756, "learning_rate": 0.0008921319796954314, "loss": 0.8098, "step": 3515 }, { "epoch": 0.17870061301417675, "grad_norm": 0.06419354058401365, "learning_rate": 0.0008934010152284264, "loss": 0.8581, "step": 3520 }, { "epoch": 0.17895444911220826, "grad_norm": 0.07690254588743231, "learning_rate": 0.0008946700507614214, "loss": 0.8194, "step": 3525 }, { "epoch": 0.17920828521023974, "grad_norm": 0.06607495577395217, "learning_rate": 0.0008959390862944163, "loss": 0.8138, "step": 3530 }, { "epoch": 0.17946212130827124, "grad_norm": 0.06903853360626681, "learning_rate": 0.0008972081218274111, "loss": 0.862, "step": 3535 }, { "epoch": 0.17971595740630275, "grad_norm": 0.06405657627201167, "learning_rate": 0.0008984771573604061, "loss": 0.8207, "step": 3540 }, { "epoch": 0.17996979350433426, "grad_norm": 0.08987483598937311, "learning_rate": 0.000899746192893401, "loss": 0.8404, "step": 3545 }, { "epoch": 0.18022362960236576, "grad_norm": 0.055244411722830546, "learning_rate": 0.000901015228426396, "loss": 0.8638, "step": 3550 }, { "epoch": 0.18047746570039724, "grad_norm": 0.06485964435719, "learning_rate": 0.0009022842639593909, "loss": 0.8141, "step": 3555 }, { "epoch": 0.18073130179842875, "grad_norm": 0.05506599421424217, "learning_rate": 0.0009035532994923858, "loss": 0.8126, "step": 3560 }, { "epoch": 0.18098513789646026, "grad_norm": 0.057572197953585834, "learning_rate": 0.0009048223350253807, "loss": 0.8521, "step": 3565 }, { "epoch": 0.18123897399449176, "grad_norm": 0.05522804636264892, "learning_rate": 0.0009060913705583757, "loss": 0.8248, "step": 3570 }, { "epoch": 0.18149281009252327, "grad_norm": 0.057885461430674454, "learning_rate": 0.0009073604060913706, "loss": 0.8069, "step": 3575 }, { "epoch": 0.18174664619055475, "grad_norm": 0.052099889453456706, "learning_rate": 0.0009086294416243654, "loss": 0.8009, "step": 3580 }, { "epoch": 0.18200048228858626, "grad_norm": 0.05224867135171047, "learning_rate": 0.0009098984771573604, "loss": 0.8458, "step": 3585 }, { "epoch": 0.18225431838661776, "grad_norm": 0.057222274638652315, "learning_rate": 0.0009111675126903554, "loss": 0.8574, "step": 3590 }, { "epoch": 0.18250815448464927, "grad_norm": 0.05374257609956475, "learning_rate": 0.0009124365482233503, "loss": 0.8311, "step": 3595 }, { "epoch": 0.18276199058268075, "grad_norm": 0.06589549556169298, "learning_rate": 0.0009137055837563451, "loss": 0.8392, "step": 3600 }, { "epoch": 0.18301582668071226, "grad_norm": 0.05402788952501294, "learning_rate": 0.0009149746192893401, "loss": 0.8144, "step": 3605 }, { "epoch": 0.18326966277874376, "grad_norm": 0.05483522974233196, "learning_rate": 0.0009162436548223351, "loss": 0.8001, "step": 3610 }, { "epoch": 0.18352349887677527, "grad_norm": 0.054625714823284495, "learning_rate": 0.0009175126903553299, "loss": 0.8117, "step": 3615 }, { "epoch": 0.18377733497480678, "grad_norm": 0.05165385193820529, "learning_rate": 0.0009187817258883248, "loss": 0.8357, "step": 3620 }, { "epoch": 0.18403117107283826, "grad_norm": 0.05493907914770616, "learning_rate": 0.0009200507614213198, "loss": 0.7741, "step": 3625 }, { "epoch": 0.18428500717086976, "grad_norm": 0.06006083227958731, "learning_rate": 0.0009213197969543148, "loss": 0.8393, "step": 3630 }, { "epoch": 0.18453884326890127, "grad_norm": 0.05570585209378891, "learning_rate": 0.0009225888324873097, "loss": 0.8045, "step": 3635 }, { "epoch": 0.18479267936693278, "grad_norm": 0.06069269488782517, "learning_rate": 0.0009238578680203045, "loss": 0.7763, "step": 3640 }, { "epoch": 0.18504651546496428, "grad_norm": 0.05971581893638315, "learning_rate": 0.0009251269035532995, "loss": 0.7793, "step": 3645 }, { "epoch": 0.18530035156299576, "grad_norm": 0.06057376348923683, "learning_rate": 0.0009263959390862944, "loss": 0.7826, "step": 3650 }, { "epoch": 0.18555418766102727, "grad_norm": 0.059579601813629327, "learning_rate": 0.0009276649746192894, "loss": 0.8021, "step": 3655 }, { "epoch": 0.18580802375905878, "grad_norm": 0.06055660877775957, "learning_rate": 0.0009289340101522843, "loss": 0.7836, "step": 3660 }, { "epoch": 0.18606185985709028, "grad_norm": 0.06258150814365725, "learning_rate": 0.0009302030456852792, "loss": 0.8568, "step": 3665 }, { "epoch": 0.1863156959551218, "grad_norm": 0.06590479413219764, "learning_rate": 0.0009314720812182741, "loss": 0.8145, "step": 3670 }, { "epoch": 0.18656953205315327, "grad_norm": 0.055132193919511976, "learning_rate": 0.0009327411167512691, "loss": 0.8409, "step": 3675 }, { "epoch": 0.18682336815118478, "grad_norm": 0.06024829844772436, "learning_rate": 0.000934010152284264, "loss": 0.8294, "step": 3680 }, { "epoch": 0.18707720424921628, "grad_norm": 0.059354781508770764, "learning_rate": 0.0009352791878172588, "loss": 0.8264, "step": 3685 }, { "epoch": 0.1873310403472478, "grad_norm": 0.056255337870931454, "learning_rate": 0.0009365482233502538, "loss": 0.7943, "step": 3690 }, { "epoch": 0.18758487644527927, "grad_norm": 0.063479066177461, "learning_rate": 0.0009378172588832488, "loss": 0.7935, "step": 3695 }, { "epoch": 0.18783871254331078, "grad_norm": 0.05320549666322313, "learning_rate": 0.0009390862944162437, "loss": 0.7906, "step": 3700 }, { "epoch": 0.18809254864134228, "grad_norm": 0.055647818511846116, "learning_rate": 0.0009403553299492385, "loss": 0.8177, "step": 3705 }, { "epoch": 0.1883463847393738, "grad_norm": 0.05285949308579886, "learning_rate": 0.0009416243654822335, "loss": 0.8456, "step": 3710 }, { "epoch": 0.1886002208374053, "grad_norm": 0.054578564166880165, "learning_rate": 0.0009428934010152285, "loss": 0.8101, "step": 3715 }, { "epoch": 0.18885405693543678, "grad_norm": 0.0607465938248711, "learning_rate": 0.0009441624365482235, "loss": 0.8409, "step": 3720 }, { "epoch": 0.18910789303346828, "grad_norm": 0.05912400335227877, "learning_rate": 0.0009454314720812182, "loss": 0.8176, "step": 3725 }, { "epoch": 0.1893617291314998, "grad_norm": 0.05399627990725248, "learning_rate": 0.0009467005076142132, "loss": 0.7896, "step": 3730 }, { "epoch": 0.1896155652295313, "grad_norm": 0.06474601864013682, "learning_rate": 0.0009479695431472082, "loss": 0.8616, "step": 3735 }, { "epoch": 0.1898694013275628, "grad_norm": 0.05602306613301162, "learning_rate": 0.0009492385786802031, "loss": 0.7871, "step": 3740 }, { "epoch": 0.19012323742559428, "grad_norm": 0.26832831118121225, "learning_rate": 0.000950507614213198, "loss": 0.8126, "step": 3745 }, { "epoch": 0.1903770735236258, "grad_norm": 0.05952176423268163, "learning_rate": 0.0009517766497461929, "loss": 0.7946, "step": 3750 }, { "epoch": 0.1906309096216573, "grad_norm": 0.07002262941116393, "learning_rate": 0.0009530456852791879, "loss": 0.8248, "step": 3755 }, { "epoch": 0.1908847457196888, "grad_norm": 0.0568113805253638, "learning_rate": 0.0009543147208121828, "loss": 0.8183, "step": 3760 }, { "epoch": 0.1911385818177203, "grad_norm": 0.06044140646714903, "learning_rate": 0.0009555837563451777, "loss": 0.8215, "step": 3765 }, { "epoch": 0.1913924179157518, "grad_norm": 0.6223678359673156, "learning_rate": 0.0009568527918781726, "loss": 0.7512, "step": 3770 }, { "epoch": 0.1916462540137833, "grad_norm": 0.07853478469708762, "learning_rate": 0.0009581218274111675, "loss": 0.8174, "step": 3775 }, { "epoch": 0.1919000901118148, "grad_norm": 0.06141424817324027, "learning_rate": 0.0009593908629441625, "loss": 0.7755, "step": 3780 }, { "epoch": 0.1921539262098463, "grad_norm": 0.06034518145797146, "learning_rate": 0.0009606598984771574, "loss": 0.84, "step": 3785 }, { "epoch": 0.19240776230787782, "grad_norm": 0.06120196025659235, "learning_rate": 0.0009619289340101523, "loss": 0.8473, "step": 3790 }, { "epoch": 0.1926615984059093, "grad_norm": 0.0651774600956608, "learning_rate": 0.0009631979695431472, "loss": 0.7942, "step": 3795 }, { "epoch": 0.1929154345039408, "grad_norm": 0.05455937420480435, "learning_rate": 0.0009644670050761422, "loss": 0.806, "step": 3800 }, { "epoch": 0.1931692706019723, "grad_norm": 0.06538640718323678, "learning_rate": 0.0009657360406091371, "loss": 0.8121, "step": 3805 }, { "epoch": 0.19342310670000382, "grad_norm": 0.14228923343599192, "learning_rate": 0.0009670050761421319, "loss": 0.8152, "step": 3810 }, { "epoch": 0.1936769427980353, "grad_norm": 0.13536230744263394, "learning_rate": 0.0009682741116751269, "loss": 0.8819, "step": 3815 }, { "epoch": 0.1939307788960668, "grad_norm": 0.09247581889871288, "learning_rate": 0.0009695431472081219, "loss": 0.8418, "step": 3820 }, { "epoch": 0.1941846149940983, "grad_norm": 0.07601354906359477, "learning_rate": 0.0009708121827411168, "loss": 0.8867, "step": 3825 }, { "epoch": 0.19443845109212982, "grad_norm": 0.06204615923522037, "learning_rate": 0.0009720812182741116, "loss": 0.832, "step": 3830 }, { "epoch": 0.19469228719016132, "grad_norm": 0.07903256880012566, "learning_rate": 0.0009733502538071066, "loss": 0.8515, "step": 3835 }, { "epoch": 0.1949461232881928, "grad_norm": 0.07684998940661389, "learning_rate": 0.0009746192893401016, "loss": 0.8629, "step": 3840 }, { "epoch": 0.1951999593862243, "grad_norm": 0.06838515568859656, "learning_rate": 0.0009758883248730965, "loss": 0.8779, "step": 3845 }, { "epoch": 0.19545379548425582, "grad_norm": 0.0572985464748816, "learning_rate": 0.0009771573604060915, "loss": 0.8074, "step": 3850 }, { "epoch": 0.19570763158228732, "grad_norm": 0.05133516348433746, "learning_rate": 0.0009784263959390863, "loss": 0.8169, "step": 3855 }, { "epoch": 0.19596146768031883, "grad_norm": 0.06301344889171914, "learning_rate": 0.0009796954314720812, "loss": 0.8552, "step": 3860 }, { "epoch": 0.1962153037783503, "grad_norm": 0.054714062586220934, "learning_rate": 0.000980964467005076, "loss": 0.8068, "step": 3865 }, { "epoch": 0.19646913987638182, "grad_norm": 0.05375750789311051, "learning_rate": 0.0009822335025380712, "loss": 0.834, "step": 3870 }, { "epoch": 0.19672297597441332, "grad_norm": 0.06967043637997535, "learning_rate": 0.000983502538071066, "loss": 0.7818, "step": 3875 }, { "epoch": 0.19697681207244483, "grad_norm": 0.0582391495833418, "learning_rate": 0.000984771573604061, "loss": 0.8863, "step": 3880 }, { "epoch": 0.19723064817047634, "grad_norm": 0.05836735565743573, "learning_rate": 0.0009860406091370558, "loss": 0.7654, "step": 3885 }, { "epoch": 0.19748448426850782, "grad_norm": 0.05128727384531704, "learning_rate": 0.0009873096446700509, "loss": 0.8254, "step": 3890 }, { "epoch": 0.19773832036653932, "grad_norm": 0.05801247635607692, "learning_rate": 0.0009885786802030457, "loss": 0.8077, "step": 3895 }, { "epoch": 0.19799215646457083, "grad_norm": 0.05215966330249823, "learning_rate": 0.0009898477157360406, "loss": 0.8155, "step": 3900 }, { "epoch": 0.19824599256260234, "grad_norm": 0.07422426569966557, "learning_rate": 0.0009911167512690355, "loss": 0.8192, "step": 3905 }, { "epoch": 0.19849982866063384, "grad_norm": 0.062479331960389135, "learning_rate": 0.0009923857868020306, "loss": 0.7668, "step": 3910 }, { "epoch": 0.19875366475866532, "grad_norm": 0.05402004838942977, "learning_rate": 0.0009936548223350254, "loss": 0.8132, "step": 3915 }, { "epoch": 0.19900750085669683, "grad_norm": 0.05258665530390831, "learning_rate": 0.0009949238578680203, "loss": 0.8208, "step": 3920 }, { "epoch": 0.19926133695472834, "grad_norm": 0.04927178881230835, "learning_rate": 0.0009961928934010152, "loss": 0.8314, "step": 3925 }, { "epoch": 0.19951517305275984, "grad_norm": 0.051342554299230146, "learning_rate": 0.0009974619289340103, "loss": 0.8175, "step": 3930 }, { "epoch": 0.19976900915079132, "grad_norm": 0.05775696531176317, "learning_rate": 0.0009987309644670051, "loss": 0.8438, "step": 3935 }, { "epoch": 0.20002284524882283, "grad_norm": 0.05661725733627295, "learning_rate": 0.001, "loss": 0.8309, "step": 3940 }, { "epoch": 0.20027668134685433, "grad_norm": 0.05692004969211715, "learning_rate": 0.0009999999509262467, "loss": 0.7994, "step": 3945 }, { "epoch": 0.20053051744488584, "grad_norm": 0.06149863844144051, "learning_rate": 0.0009999998037049968, "loss": 0.8317, "step": 3950 }, { "epoch": 0.20078435354291735, "grad_norm": 0.10015413239219335, "learning_rate": 0.0009999995583362786, "loss": 0.8198, "step": 3955 }, { "epoch": 0.20103818964094883, "grad_norm": 0.05302506346539487, "learning_rate": 0.0009999992148201407, "loss": 0.8048, "step": 3960 }, { "epoch": 0.20129202573898033, "grad_norm": 0.05462035288154979, "learning_rate": 0.0009999987731566505, "loss": 0.7613, "step": 3965 }, { "epoch": 0.20154586183701184, "grad_norm": 0.05596164523808965, "learning_rate": 0.0009999982333458942, "loss": 0.8234, "step": 3970 }, { "epoch": 0.20179969793504335, "grad_norm": 0.1684760724028199, "learning_rate": 0.0009999975953879788, "loss": 0.8037, "step": 3975 }, { "epoch": 0.20205353403307486, "grad_norm": 0.10456485472858275, "learning_rate": 0.0009999968592830286, "loss": 0.8513, "step": 3980 }, { "epoch": 0.20230737013110633, "grad_norm": 0.0675902996947776, "learning_rate": 0.0009999960250311885, "loss": 0.8478, "step": 3985 }, { "epoch": 0.20256120622913784, "grad_norm": 0.06438512466349959, "learning_rate": 0.0009999950926326221, "loss": 0.8713, "step": 3990 }, { "epoch": 0.20281504232716935, "grad_norm": 0.06867183998410772, "learning_rate": 0.0009999940620875124, "loss": 0.8377, "step": 3995 }, { "epoch": 0.20306887842520085, "grad_norm": 0.08155282294143533, "learning_rate": 0.0009999929333960617, "loss": 0.8063, "step": 4000 }, { "epoch": 0.20332271452323236, "grad_norm": 0.05670322849502991, "learning_rate": 0.0009999917065584918, "loss": 0.8381, "step": 4005 }, { "epoch": 0.20357655062126384, "grad_norm": 0.07288571135849768, "learning_rate": 0.0009999903815750436, "loss": 0.8557, "step": 4010 }, { "epoch": 0.20383038671929535, "grad_norm": 0.05740679313162405, "learning_rate": 0.0009999889584459765, "loss": 0.8417, "step": 4015 }, { "epoch": 0.20408422281732685, "grad_norm": 0.06229990333100536, "learning_rate": 0.0009999874371715706, "loss": 0.8243, "step": 4020 }, { "epoch": 0.20433805891535836, "grad_norm": 0.058855168508387304, "learning_rate": 0.0009999858177521242, "loss": 0.7595, "step": 4025 }, { "epoch": 0.20459189501338984, "grad_norm": 0.05686482344115998, "learning_rate": 0.0009999841001879551, "loss": 0.8867, "step": 4030 }, { "epoch": 0.20484573111142135, "grad_norm": 0.4512516497907617, "learning_rate": 0.0009999822844794005, "loss": 0.8774, "step": 4035 }, { "epoch": 0.20509956720945285, "grad_norm": 0.09164844151686198, "learning_rate": 0.000999980370626817, "loss": 0.89, "step": 4040 }, { "epoch": 0.20535340330748436, "grad_norm": 0.11599472806060442, "learning_rate": 0.00099997835863058, "loss": 0.895, "step": 4045 }, { "epoch": 0.20560723940551587, "grad_norm": 0.30302413476279366, "learning_rate": 0.0009999762484910846, "loss": 0.8868, "step": 4050 }, { "epoch": 0.20586107550354735, "grad_norm": 0.07643017403307302, "learning_rate": 0.0009999740402087452, "loss": 0.8812, "step": 4055 }, { "epoch": 0.20611491160157885, "grad_norm": 0.07704003285509131, "learning_rate": 0.0009999717337839948, "loss": 0.8411, "step": 4060 }, { "epoch": 0.20636874769961036, "grad_norm": 0.0793020842654842, "learning_rate": 0.0009999693292172865, "loss": 0.8521, "step": 4065 }, { "epoch": 0.20662258379764187, "grad_norm": 0.0879326223253106, "learning_rate": 0.0009999668265090924, "loss": 0.8437, "step": 4070 }, { "epoch": 0.20687641989567337, "grad_norm": 0.05518579171172884, "learning_rate": 0.0009999642256599034, "loss": 0.8197, "step": 4075 }, { "epoch": 0.20713025599370485, "grad_norm": 0.055578451315583935, "learning_rate": 0.0009999615266702302, "loss": 0.8447, "step": 4080 }, { "epoch": 0.20738409209173636, "grad_norm": 0.05113768411631499, "learning_rate": 0.0009999587295406026, "loss": 0.8367, "step": 4085 }, { "epoch": 0.20763792818976787, "grad_norm": 0.06520864390296133, "learning_rate": 0.00099995583427157, "loss": 0.856, "step": 4090 }, { "epoch": 0.20789176428779937, "grad_norm": 0.05799727624608957, "learning_rate": 0.0009999528408637, "loss": 0.8176, "step": 4095 }, { "epoch": 0.20814560038583088, "grad_norm": 0.050546102718447276, "learning_rate": 0.0009999497493175808, "loss": 0.849, "step": 4100 }, { "epoch": 0.20839943648386236, "grad_norm": 0.0609601107937184, "learning_rate": 0.0009999465596338191, "loss": 0.7982, "step": 4105 }, { "epoch": 0.20865327258189387, "grad_norm": 0.0542568281101906, "learning_rate": 0.000999943271813041, "loss": 0.8109, "step": 4110 }, { "epoch": 0.20890710867992537, "grad_norm": 0.05227120501512879, "learning_rate": 0.0009999398858558917, "loss": 0.8309, "step": 4115 }, { "epoch": 0.20916094477795688, "grad_norm": 0.06268953926869833, "learning_rate": 0.0009999364017630361, "loss": 0.8687, "step": 4120 }, { "epoch": 0.2094147808759884, "grad_norm": 0.437680073628893, "learning_rate": 0.0009999328195351579, "loss": 0.8113, "step": 4125 }, { "epoch": 0.20966861697401987, "grad_norm": 0.07747071792075517, "learning_rate": 0.0009999291391729606, "loss": 0.8258, "step": 4130 }, { "epoch": 0.20992245307205137, "grad_norm": 0.07548174386681084, "learning_rate": 0.0009999253606771661, "loss": 0.8389, "step": 4135 }, { "epoch": 0.21017628917008288, "grad_norm": 0.07941685797470464, "learning_rate": 0.0009999214840485167, "loss": 0.8274, "step": 4140 }, { "epoch": 0.2104301252681144, "grad_norm": 0.0677639423179636, "learning_rate": 0.000999917509287773, "loss": 0.8452, "step": 4145 }, { "epoch": 0.21068396136614587, "grad_norm": 0.05523566480460326, "learning_rate": 0.0009999134363957152, "loss": 0.806, "step": 4150 }, { "epoch": 0.21093779746417737, "grad_norm": 0.05428054750299863, "learning_rate": 0.0009999092653731432, "loss": 0.8533, "step": 4155 }, { "epoch": 0.21119163356220888, "grad_norm": 0.05510695726313813, "learning_rate": 0.0009999049962208751, "loss": 0.8428, "step": 4160 }, { "epoch": 0.2114454696602404, "grad_norm": 0.054856484061472977, "learning_rate": 0.0009999006289397494, "loss": 0.8483, "step": 4165 }, { "epoch": 0.2116993057582719, "grad_norm": 0.061344528816819746, "learning_rate": 0.0009998961635306234, "loss": 0.8569, "step": 4170 }, { "epoch": 0.21195314185630337, "grad_norm": 0.05318640341091229, "learning_rate": 0.0009998915999943733, "loss": 0.7787, "step": 4175 }, { "epoch": 0.21220697795433488, "grad_norm": 0.06093726551366727, "learning_rate": 0.0009998869383318952, "loss": 0.8096, "step": 4180 }, { "epoch": 0.2124608140523664, "grad_norm": 0.053735769992154084, "learning_rate": 0.0009998821785441039, "loss": 0.8631, "step": 4185 }, { "epoch": 0.2127146501503979, "grad_norm": 0.0578379042085939, "learning_rate": 0.000999877320631934, "loss": 0.8089, "step": 4190 }, { "epoch": 0.2129684862484294, "grad_norm": 0.05922600227637529, "learning_rate": 0.0009998723645963388, "loss": 0.8584, "step": 4195 }, { "epoch": 0.21322232234646088, "grad_norm": 0.05453614318202486, "learning_rate": 0.0009998673104382912, "loss": 0.8077, "step": 4200 }, { "epoch": 0.2134761584444924, "grad_norm": 0.05016275431034205, "learning_rate": 0.0009998621581587836, "loss": 0.8651, "step": 4205 }, { "epoch": 0.2137299945425239, "grad_norm": 0.056020425242456026, "learning_rate": 0.000999856907758827, "loss": 0.8537, "step": 4210 }, { "epoch": 0.2139838306405554, "grad_norm": 0.05665598171444764, "learning_rate": 0.0009998515592394524, "loss": 0.8463, "step": 4215 }, { "epoch": 0.2142376667385869, "grad_norm": 0.05330863125920927, "learning_rate": 0.0009998461126017094, "loss": 0.8476, "step": 4220 }, { "epoch": 0.2144915028366184, "grad_norm": 0.050333411229293114, "learning_rate": 0.0009998405678466671, "loss": 0.8104, "step": 4225 }, { "epoch": 0.2147453389346499, "grad_norm": 0.04863826824922215, "learning_rate": 0.0009998349249754142, "loss": 0.8064, "step": 4230 }, { "epoch": 0.2149991750326814, "grad_norm": 0.06418726092636645, "learning_rate": 0.0009998291839890582, "loss": 0.8329, "step": 4235 }, { "epoch": 0.2152530111307129, "grad_norm": 0.05670762036201351, "learning_rate": 0.000999823344888726, "loss": 0.8679, "step": 4240 }, { "epoch": 0.21550684722874439, "grad_norm": 0.054640939199981746, "learning_rate": 0.0009998174076755637, "loss": 0.8213, "step": 4245 }, { "epoch": 0.2157606833267759, "grad_norm": 0.05467265195052574, "learning_rate": 0.000999811372350737, "loss": 0.815, "step": 4250 }, { "epoch": 0.2160145194248074, "grad_norm": 0.061516117648018086, "learning_rate": 0.0009998052389154303, "loss": 0.7857, "step": 4255 }, { "epoch": 0.2162683555228389, "grad_norm": 0.05921346140023558, "learning_rate": 0.0009997990073708479, "loss": 0.7904, "step": 4260 }, { "epoch": 0.2165221916208704, "grad_norm": 0.060765154856067795, "learning_rate": 0.0009997926777182127, "loss": 0.7995, "step": 4265 }, { "epoch": 0.2167760277189019, "grad_norm": 0.06421245801597604, "learning_rate": 0.0009997862499587673, "loss": 0.8609, "step": 4270 }, { "epoch": 0.2170298638169334, "grad_norm": 0.06907830969127508, "learning_rate": 0.0009997797240937736, "loss": 0.8051, "step": 4275 }, { "epoch": 0.2172836999149649, "grad_norm": 0.05628356598542042, "learning_rate": 0.0009997731001245124, "loss": 0.8153, "step": 4280 }, { "epoch": 0.2175375360129964, "grad_norm": 0.07047887628361475, "learning_rate": 0.0009997663780522842, "loss": 0.8108, "step": 4285 }, { "epoch": 0.21779137211102792, "grad_norm": 10.356816581233174, "learning_rate": 0.000999759557878408, "loss": 1.4005, "step": 4290 }, { "epoch": 0.2180452082090594, "grad_norm": 0.14387930081059194, "learning_rate": 0.0009997526396042231, "loss": 0.9904, "step": 4295 }, { "epoch": 0.2182990443070909, "grad_norm": 0.1303063878110208, "learning_rate": 0.000999745623231087, "loss": 0.9725, "step": 4300 }, { "epoch": 0.2185528804051224, "grad_norm": 0.10812803878797526, "learning_rate": 0.0009997385087603776, "loss": 0.8924, "step": 4305 }, { "epoch": 0.21880671650315392, "grad_norm": 0.0813015644863545, "learning_rate": 0.0009997312961934912, "loss": 0.8786, "step": 4310 }, { "epoch": 0.21906055260118543, "grad_norm": 0.07280573869808253, "learning_rate": 0.000999723985531843, "loss": 0.871, "step": 4315 }, { "epoch": 0.2193143886992169, "grad_norm": 0.06949756114018615, "learning_rate": 0.0009997165767768692, "loss": 0.8798, "step": 4320 }, { "epoch": 0.2195682247972484, "grad_norm": 0.06398973332467224, "learning_rate": 0.000999709069930023, "loss": 0.8565, "step": 4325 }, { "epoch": 0.21982206089527992, "grad_norm": 0.0669979833151966, "learning_rate": 0.0009997014649927786, "loss": 0.8308, "step": 4330 }, { "epoch": 0.22007589699331143, "grad_norm": 0.06833796137247712, "learning_rate": 0.0009996937619666287, "loss": 0.8739, "step": 4335 }, { "epoch": 0.22032973309134293, "grad_norm": 0.05800731589909742, "learning_rate": 0.0009996859608530852, "loss": 0.8438, "step": 4340 }, { "epoch": 0.2205835691893744, "grad_norm": 0.060269405184139445, "learning_rate": 0.0009996780616536795, "loss": 0.8956, "step": 4345 }, { "epoch": 0.22083740528740592, "grad_norm": 0.05773510539415019, "learning_rate": 0.0009996700643699623, "loss": 0.8257, "step": 4350 }, { "epoch": 0.22109124138543743, "grad_norm": 0.04745482432084205, "learning_rate": 0.0009996619690035033, "loss": 0.8006, "step": 4355 }, { "epoch": 0.22134507748346893, "grad_norm": 0.050914194132792856, "learning_rate": 0.0009996537755558915, "loss": 0.8128, "step": 4360 }, { "epoch": 0.2215989135815004, "grad_norm": 0.05082826418504723, "learning_rate": 0.0009996454840287355, "loss": 0.8441, "step": 4365 }, { "epoch": 0.22185274967953192, "grad_norm": 0.05312125527446652, "learning_rate": 0.0009996370944236625, "loss": 0.8382, "step": 4370 }, { "epoch": 0.22210658577756343, "grad_norm": 0.05318196469818948, "learning_rate": 0.0009996286067423196, "loss": 0.8295, "step": 4375 }, { "epoch": 0.22236042187559493, "grad_norm": 0.050640190304472595, "learning_rate": 0.000999620020986373, "loss": 0.7929, "step": 4380 }, { "epoch": 0.22261425797362644, "grad_norm": 0.052863448660504825, "learning_rate": 0.0009996113371575075, "loss": 0.8572, "step": 4385 }, { "epoch": 0.22286809407165792, "grad_norm": 0.11025364388386391, "learning_rate": 0.0009996025552574284, "loss": 0.7947, "step": 4390 }, { "epoch": 0.22312193016968943, "grad_norm": 0.09421997753616461, "learning_rate": 0.000999593675287859, "loss": 0.8332, "step": 4395 }, { "epoch": 0.22337576626772093, "grad_norm": 0.054671635666641655, "learning_rate": 0.0009995846972505429, "loss": 0.8511, "step": 4400 }, { "epoch": 0.22362960236575244, "grad_norm": 0.07626491010034327, "learning_rate": 0.000999575621147242, "loss": 0.878, "step": 4405 }, { "epoch": 0.22388343846378395, "grad_norm": 0.05196015130989795, "learning_rate": 0.000999566446979738, "loss": 0.8491, "step": 4410 }, { "epoch": 0.22413727456181542, "grad_norm": 0.054879421886632404, "learning_rate": 0.0009995571747498319, "loss": 0.7694, "step": 4415 }, { "epoch": 0.22439111065984693, "grad_norm": 0.055800853731799445, "learning_rate": 0.0009995478044593435, "loss": 0.8373, "step": 4420 }, { "epoch": 0.22464494675787844, "grad_norm": 0.05794861558321214, "learning_rate": 0.0009995383361101125, "loss": 0.8187, "step": 4425 }, { "epoch": 0.22489878285590995, "grad_norm": 0.06067432301348727, "learning_rate": 0.0009995287697039973, "loss": 0.8204, "step": 4430 }, { "epoch": 0.22515261895394145, "grad_norm": 0.052874425603535856, "learning_rate": 0.0009995191052428758, "loss": 0.847, "step": 4435 }, { "epoch": 0.22540645505197293, "grad_norm": 0.05054219751638529, "learning_rate": 0.0009995093427286447, "loss": 0.8115, "step": 4440 }, { "epoch": 0.22566029115000444, "grad_norm": 0.05018508186818898, "learning_rate": 0.000999499482163221, "loss": 0.8307, "step": 4445 }, { "epoch": 0.22591412724803595, "grad_norm": 0.05004378415471257, "learning_rate": 0.00099948952354854, "loss": 0.8457, "step": 4450 }, { "epoch": 0.22616796334606745, "grad_norm": 0.05009472143089015, "learning_rate": 0.0009994794668865563, "loss": 0.8477, "step": 4455 }, { "epoch": 0.22642179944409896, "grad_norm": 0.05241016805280678, "learning_rate": 0.0009994693121792443, "loss": 0.8425, "step": 4460 }, { "epoch": 0.22667563554213044, "grad_norm": 0.05267520880773833, "learning_rate": 0.000999459059428597, "loss": 0.8388, "step": 4465 }, { "epoch": 0.22692947164016194, "grad_norm": 0.05213472180338855, "learning_rate": 0.0009994487086366272, "loss": 0.8077, "step": 4470 }, { "epoch": 0.22718330773819345, "grad_norm": 0.05151297954111727, "learning_rate": 0.0009994382598053665, "loss": 0.8011, "step": 4475 }, { "epoch": 0.22743714383622496, "grad_norm": 0.056785613905643746, "learning_rate": 0.0009994277129368664, "loss": 0.8145, "step": 4480 }, { "epoch": 0.22769097993425644, "grad_norm": 0.05607377099812519, "learning_rate": 0.0009994170680331968, "loss": 0.8006, "step": 4485 }, { "epoch": 0.22794481603228794, "grad_norm": 0.04914183704957577, "learning_rate": 0.0009994063250964472, "loss": 0.7759, "step": 4490 }, { "epoch": 0.22819865213031945, "grad_norm": 0.045955343794523144, "learning_rate": 0.0009993954841287266, "loss": 0.793, "step": 4495 }, { "epoch": 0.22845248822835096, "grad_norm": 0.05701876655429152, "learning_rate": 0.000999384545132163, "loss": 0.8117, "step": 4500 }, { "epoch": 0.22870632432638247, "grad_norm": 0.050303367425416294, "learning_rate": 0.0009993735081089035, "loss": 0.81, "step": 4505 }, { "epoch": 0.22896016042441394, "grad_norm": 0.046333791246069976, "learning_rate": 0.0009993623730611147, "loss": 0.8026, "step": 4510 }, { "epoch": 0.22921399652244545, "grad_norm": 0.05091253314526701, "learning_rate": 0.0009993511399909825, "loss": 0.7862, "step": 4515 }, { "epoch": 0.22946783262047696, "grad_norm": 0.06080256864853197, "learning_rate": 0.0009993398089007117, "loss": 0.7816, "step": 4520 }, { "epoch": 0.22972166871850846, "grad_norm": 0.05638955585292827, "learning_rate": 0.0009993283797925267, "loss": 0.8044, "step": 4525 }, { "epoch": 0.22997550481653997, "grad_norm": 0.055768663794655934, "learning_rate": 0.0009993168526686708, "loss": 0.834, "step": 4530 }, { "epoch": 0.23022934091457145, "grad_norm": 0.05024038341809871, "learning_rate": 0.000999305227531407, "loss": 0.8116, "step": 4535 }, { "epoch": 0.23048317701260296, "grad_norm": 0.07082469398776259, "learning_rate": 0.000999293504383017, "loss": 0.7697, "step": 4540 }, { "epoch": 0.23073701311063446, "grad_norm": 0.09548957740232247, "learning_rate": 0.000999281683225802, "loss": 0.8489, "step": 4545 }, { "epoch": 0.23099084920866597, "grad_norm": 0.07524288871657674, "learning_rate": 0.0009992697640620824, "loss": 0.8489, "step": 4550 }, { "epoch": 0.23124468530669748, "grad_norm": 0.06124200365608783, "learning_rate": 0.000999257746894198, "loss": 0.8352, "step": 4555 }, { "epoch": 0.23149852140472896, "grad_norm": 0.07242209896032376, "learning_rate": 0.0009992456317245077, "loss": 0.8387, "step": 4560 }, { "epoch": 0.23175235750276046, "grad_norm": 0.052297684346679016, "learning_rate": 0.0009992334185553898, "loss": 0.8337, "step": 4565 }, { "epoch": 0.23200619360079197, "grad_norm": 0.05343847967260285, "learning_rate": 0.0009992211073892414, "loss": 0.8012, "step": 4570 }, { "epoch": 0.23226002969882348, "grad_norm": 0.05632814464342564, "learning_rate": 0.000999208698228479, "loss": 0.8047, "step": 4575 }, { "epoch": 0.23251386579685496, "grad_norm": 0.05687798932347652, "learning_rate": 0.0009991961910755392, "loss": 0.7886, "step": 4580 }, { "epoch": 0.23276770189488646, "grad_norm": 0.053550031598268134, "learning_rate": 0.0009991835859328763, "loss": 0.8198, "step": 4585 }, { "epoch": 0.23302153799291797, "grad_norm": 0.0751482032823479, "learning_rate": 0.0009991708828029648, "loss": 0.7649, "step": 4590 }, { "epoch": 0.23327537409094948, "grad_norm": 0.06424084057235761, "learning_rate": 0.0009991580816882983, "loss": 0.8248, "step": 4595 }, { "epoch": 0.23352921018898098, "grad_norm": 0.061669723921834604, "learning_rate": 0.00099914518259139, "loss": 0.8316, "step": 4600 }, { "epoch": 0.23378304628701246, "grad_norm": 0.05429866005701402, "learning_rate": 0.0009991321855147713, "loss": 0.8372, "step": 4605 }, { "epoch": 0.23403688238504397, "grad_norm": 0.059509250710846166, "learning_rate": 0.0009991190904609939, "loss": 0.8221, "step": 4610 }, { "epoch": 0.23429071848307548, "grad_norm": 0.0645725584584425, "learning_rate": 0.0009991058974326281, "loss": 0.7991, "step": 4615 }, { "epoch": 0.23454455458110698, "grad_norm": 0.055500554888825444, "learning_rate": 0.0009990926064322636, "loss": 0.8138, "step": 4620 }, { "epoch": 0.2347983906791385, "grad_norm": 0.05978106551283718, "learning_rate": 0.0009990792174625095, "loss": 0.8214, "step": 4625 }, { "epoch": 0.23505222677716997, "grad_norm": 0.050763773546228, "learning_rate": 0.000999065730525994, "loss": 0.8549, "step": 4630 }, { "epoch": 0.23530606287520148, "grad_norm": 0.05523771224426716, "learning_rate": 0.0009990521456253643, "loss": 0.7869, "step": 4635 }, { "epoch": 0.23555989897323298, "grad_norm": 0.05800155495890772, "learning_rate": 0.0009990384627632872, "loss": 0.813, "step": 4640 }, { "epoch": 0.2358137350712645, "grad_norm": 0.046631936987490345, "learning_rate": 0.0009990246819424487, "loss": 0.8136, "step": 4645 }, { "epoch": 0.236067571169296, "grad_norm": 0.055199204268512714, "learning_rate": 0.0009990108031655536, "loss": 0.8132, "step": 4650 }, { "epoch": 0.23632140726732748, "grad_norm": 0.0488911267390242, "learning_rate": 0.0009989968264353265, "loss": 0.7985, "step": 4655 }, { "epoch": 0.23657524336535898, "grad_norm": 0.06089740086670313, "learning_rate": 0.0009989827517545107, "loss": 0.8027, "step": 4660 }, { "epoch": 0.2368290794633905, "grad_norm": 0.05520665303518142, "learning_rate": 0.0009989685791258693, "loss": 0.8085, "step": 4665 }, { "epoch": 0.237082915561422, "grad_norm": 0.0541850033349203, "learning_rate": 0.0009989543085521843, "loss": 0.799, "step": 4670 }, { "epoch": 0.2373367516594535, "grad_norm": 0.04933332299514624, "learning_rate": 0.0009989399400362566, "loss": 0.8365, "step": 4675 }, { "epoch": 0.23759058775748498, "grad_norm": 0.04851770519083257, "learning_rate": 0.0009989254735809068, "loss": 0.7788, "step": 4680 }, { "epoch": 0.2378444238555165, "grad_norm": 0.04899194988524947, "learning_rate": 0.000998910909188975, "loss": 0.8397, "step": 4685 }, { "epoch": 0.238098259953548, "grad_norm": 0.04661413222705808, "learning_rate": 0.0009988962468633195, "loss": 0.7822, "step": 4690 }, { "epoch": 0.2383520960515795, "grad_norm": 0.05296467515534692, "learning_rate": 0.000998881486606819, "loss": 0.7905, "step": 4695 }, { "epoch": 0.23860593214961098, "grad_norm": 0.04544577249315294, "learning_rate": 0.0009988666284223703, "loss": 0.7669, "step": 4700 }, { "epoch": 0.2388597682476425, "grad_norm": 0.046658003500647464, "learning_rate": 0.0009988516723128905, "loss": 0.8042, "step": 4705 }, { "epoch": 0.239113604345674, "grad_norm": 0.04150698256355802, "learning_rate": 0.0009988366182813152, "loss": 0.8049, "step": 4710 }, { "epoch": 0.2393674404437055, "grad_norm": 0.05240547523980917, "learning_rate": 0.0009988214663305991, "loss": 0.7963, "step": 4715 }, { "epoch": 0.239621276541737, "grad_norm": 0.04985008578916666, "learning_rate": 0.000998806216463717, "loss": 0.786, "step": 4720 }, { "epoch": 0.2398751126397685, "grad_norm": 0.04392095668122405, "learning_rate": 0.0009987908686836622, "loss": 0.7938, "step": 4725 }, { "epoch": 0.2401289487378, "grad_norm": 0.05119774976039255, "learning_rate": 0.0009987754229934473, "loss": 0.7652, "step": 4730 }, { "epoch": 0.2403827848358315, "grad_norm": 0.0454149083492397, "learning_rate": 0.0009987598793961044, "loss": 0.829, "step": 4735 }, { "epoch": 0.240636620933863, "grad_norm": 0.04916590472044475, "learning_rate": 0.0009987442378946842, "loss": 0.7972, "step": 4740 }, { "epoch": 0.24089045703189452, "grad_norm": 0.0535217242886409, "learning_rate": 0.0009987284984922576, "loss": 0.7849, "step": 4745 }, { "epoch": 0.241144293129926, "grad_norm": 0.0470267475766678, "learning_rate": 0.0009987126611919136, "loss": 0.8053, "step": 4750 }, { "epoch": 0.2413981292279575, "grad_norm": 0.05552512990665631, "learning_rate": 0.0009986967259967617, "loss": 0.8292, "step": 4755 }, { "epoch": 0.241651965325989, "grad_norm": 0.05788318821585466, "learning_rate": 0.0009986806929099291, "loss": 0.8116, "step": 4760 }, { "epoch": 0.24190580142402052, "grad_norm": 0.05216608410249201, "learning_rate": 0.0009986645619345636, "loss": 0.8053, "step": 4765 }, { "epoch": 0.24215963752205202, "grad_norm": 0.05099954490034819, "learning_rate": 0.0009986483330738313, "loss": 0.8011, "step": 4770 }, { "epoch": 0.2424134736200835, "grad_norm": 0.04980682640323316, "learning_rate": 0.0009986320063309182, "loss": 0.8269, "step": 4775 }, { "epoch": 0.242667309718115, "grad_norm": 0.04969783611642078, "learning_rate": 0.0009986155817090288, "loss": 0.8031, "step": 4780 }, { "epoch": 0.24292114581614652, "grad_norm": 0.05626776384575341, "learning_rate": 0.0009985990592113873, "loss": 0.8042, "step": 4785 }, { "epoch": 0.24317498191417802, "grad_norm": 0.04903027349433728, "learning_rate": 0.000998582438841237, "loss": 0.8335, "step": 4790 }, { "epoch": 0.2434288180122095, "grad_norm": 0.04712612546094545, "learning_rate": 0.0009985657206018404, "loss": 0.7812, "step": 4795 }, { "epoch": 0.243682654110241, "grad_norm": 0.0487444740380213, "learning_rate": 0.0009985489044964792, "loss": 0.8161, "step": 4800 }, { "epoch": 0.24393649020827252, "grad_norm": 0.0452569442570038, "learning_rate": 0.0009985319905284542, "loss": 0.8347, "step": 4805 }, { "epoch": 0.24419032630630402, "grad_norm": 0.04579958307870436, "learning_rate": 0.0009985149787010857, "loss": 0.8071, "step": 4810 }, { "epoch": 0.24444416240433553, "grad_norm": 0.049065819060077974, "learning_rate": 0.000998497869017713, "loss": 0.7788, "step": 4815 }, { "epoch": 0.244697998502367, "grad_norm": 0.05902279470219423, "learning_rate": 0.0009984806614816944, "loss": 1.078, "step": 4820 }, { "epoch": 0.24495183460039852, "grad_norm": 0.06870877402829889, "learning_rate": 0.000998463356096408, "loss": 0.8402, "step": 4825 }, { "epoch": 0.24520567069843002, "grad_norm": 0.05447388591330835, "learning_rate": 0.0009984459528652508, "loss": 0.8135, "step": 4830 }, { "epoch": 0.24545950679646153, "grad_norm": 0.06855463799282786, "learning_rate": 0.0009984284517916386, "loss": 0.8098, "step": 4835 }, { "epoch": 0.24571334289449304, "grad_norm": 0.060260891295819796, "learning_rate": 0.000998410852879007, "loss": 0.8327, "step": 4840 }, { "epoch": 0.24596717899252452, "grad_norm": 0.17832302339198228, "learning_rate": 0.0009983931561308105, "loss": 0.7906, "step": 4845 }, { "epoch": 0.24622101509055602, "grad_norm": 0.08370160191982184, "learning_rate": 0.0009983753615505232, "loss": 0.8101, "step": 4850 }, { "epoch": 0.24647485118858753, "grad_norm": 0.06401357394992706, "learning_rate": 0.0009983574691416377, "loss": 0.8592, "step": 4855 }, { "epoch": 0.24672868728661904, "grad_norm": 0.05112360739065728, "learning_rate": 0.0009983394789076663, "loss": 0.8152, "step": 4860 }, { "epoch": 0.24698252338465054, "grad_norm": 0.046888181993093504, "learning_rate": 0.0009983213908521403, "loss": 0.8146, "step": 4865 }, { "epoch": 0.24723635948268202, "grad_norm": 0.05070681880853235, "learning_rate": 0.0009983032049786106, "loss": 0.8319, "step": 4870 }, { "epoch": 0.24749019558071353, "grad_norm": 0.047547171184542346, "learning_rate": 0.0009982849212906465, "loss": 0.7497, "step": 4875 }, { "epoch": 0.24774403167874504, "grad_norm": 0.04601455147802037, "learning_rate": 0.0009982665397918376, "loss": 0.7665, "step": 4880 }, { "epoch": 0.24799786777677654, "grad_norm": 0.04835333634647831, "learning_rate": 0.0009982480604857915, "loss": 0.8405, "step": 4885 }, { "epoch": 0.24825170387480805, "grad_norm": 0.1740441379809542, "learning_rate": 0.000998229483376136, "loss": 0.9003, "step": 4890 }, { "epoch": 0.24850553997283953, "grad_norm": 0.13259044998906824, "learning_rate": 0.0009982108084665177, "loss": 0.8506, "step": 4895 }, { "epoch": 0.24875937607087104, "grad_norm": 0.08635045761389715, "learning_rate": 0.0009981920357606023, "loss": 0.8258, "step": 4900 }, { "epoch": 0.24901321216890254, "grad_norm": 0.07437700469986665, "learning_rate": 0.0009981731652620746, "loss": 0.8101, "step": 4905 }, { "epoch": 0.24926704826693405, "grad_norm": 0.060252570621974255, "learning_rate": 0.0009981541969746389, "loss": 0.7951, "step": 4910 }, { "epoch": 0.24952088436496553, "grad_norm": 0.061649342108658606, "learning_rate": 0.0009981351309020189, "loss": 0.809, "step": 4915 }, { "epoch": 0.24977472046299704, "grad_norm": 0.05112451084853646, "learning_rate": 0.0009981159670479566, "loss": 0.8095, "step": 4920 }, { "epoch": 0.25002855656102857, "grad_norm": 0.04760565175655501, "learning_rate": 0.0009980967054162141, "loss": 0.7804, "step": 4925 }, { "epoch": 0.25028239265906005, "grad_norm": 0.048497421650966155, "learning_rate": 0.0009980773460105726, "loss": 0.7853, "step": 4930 }, { "epoch": 0.25053622875709153, "grad_norm": 0.047992684358013155, "learning_rate": 0.0009980578888348318, "loss": 0.7976, "step": 4935 }, { "epoch": 0.25079006485512306, "grad_norm": 0.0538667275263523, "learning_rate": 0.000998038333892811, "loss": 0.7944, "step": 4940 }, { "epoch": 0.25104390095315454, "grad_norm": 0.04745180335487135, "learning_rate": 0.0009980186811883495, "loss": 0.7751, "step": 4945 }, { "epoch": 0.2512977370511861, "grad_norm": 0.04612360594106859, "learning_rate": 0.000997998930725304, "loss": 0.8264, "step": 4950 }, { "epoch": 0.25155157314921756, "grad_norm": 0.0644175507796381, "learning_rate": 0.0009979790825075522, "loss": 0.7986, "step": 4955 }, { "epoch": 0.25180540924724903, "grad_norm": 0.04910928845336013, "learning_rate": 0.0009979591365389898, "loss": 0.8357, "step": 4960 }, { "epoch": 0.25205924534528057, "grad_norm": 0.05129441798806913, "learning_rate": 0.0009979390928235323, "loss": 0.822, "step": 4965 }, { "epoch": 0.25231308144331205, "grad_norm": 0.048475145104974394, "learning_rate": 0.000997918951365114, "loss": 0.7712, "step": 4970 }, { "epoch": 0.2525669175413435, "grad_norm": 0.04859159339871082, "learning_rate": 0.0009978987121676889, "loss": 0.7667, "step": 4975 }, { "epoch": 0.25282075363937506, "grad_norm": 0.051850843968313944, "learning_rate": 0.0009978783752352294, "loss": 0.788, "step": 4980 }, { "epoch": 0.25307458973740654, "grad_norm": 0.05376553829505734, "learning_rate": 0.0009978579405717277, "loss": 0.8074, "step": 4985 }, { "epoch": 0.2533284258354381, "grad_norm": 0.04574026462298968, "learning_rate": 0.0009978374081811951, "loss": 0.7681, "step": 4990 }, { "epoch": 0.25358226193346955, "grad_norm": 0.057370034492459865, "learning_rate": 0.000997816778067662, "loss": 0.7651, "step": 4995 }, { "epoch": 0.25383609803150103, "grad_norm": 0.07664863769972023, "learning_rate": 0.0009977960502351782, "loss": 0.7755, "step": 5000 }, { "epoch": 0.25408993412953257, "grad_norm": 0.05268074518232119, "learning_rate": 0.000997775224687812, "loss": 0.8248, "step": 5005 }, { "epoch": 0.25434377022756405, "grad_norm": 0.14654966899380362, "learning_rate": 0.0009977543014296516, "loss": 0.7717, "step": 5010 }, { "epoch": 0.2545976063255956, "grad_norm": 0.08081329511957558, "learning_rate": 0.0009977332804648044, "loss": 0.7632, "step": 5015 }, { "epoch": 0.25485144242362706, "grad_norm": 0.054121114008363495, "learning_rate": 0.000997712161797396, "loss": 0.7496, "step": 5020 }, { "epoch": 0.25510527852165854, "grad_norm": 0.07542446141839279, "learning_rate": 0.0009976909454315727, "loss": 0.7871, "step": 5025 }, { "epoch": 0.2553591146196901, "grad_norm": 0.0644371122615925, "learning_rate": 0.0009976696313714986, "loss": 0.7502, "step": 5030 }, { "epoch": 0.25561295071772155, "grad_norm": 0.057060160462481226, "learning_rate": 0.0009976482196213578, "loss": 0.7948, "step": 5035 }, { "epoch": 0.2558667868157531, "grad_norm": 1.6725407694728647, "learning_rate": 0.0009976267101853534, "loss": 0.8022, "step": 5040 }, { "epoch": 0.25612062291378457, "grad_norm": 0.12121391606159579, "learning_rate": 0.000997605103067707, "loss": 0.853, "step": 5045 }, { "epoch": 0.25637445901181605, "grad_norm": 0.07529932662749647, "learning_rate": 0.000997583398272661, "loss": 0.8005, "step": 5050 }, { "epoch": 0.2566282951098476, "grad_norm": 0.07488267613841562, "learning_rate": 0.000997561595804475, "loss": 0.8285, "step": 5055 }, { "epoch": 0.25688213120787906, "grad_norm": 0.05550265297190501, "learning_rate": 0.0009975396956674292, "loss": 0.8137, "step": 5060 }, { "epoch": 0.2571359673059106, "grad_norm": 0.05319369053584789, "learning_rate": 0.0009975176978658223, "loss": 0.7792, "step": 5065 }, { "epoch": 0.2573898034039421, "grad_norm": 0.05492719240871532, "learning_rate": 0.0009974956024039723, "loss": 0.8075, "step": 5070 }, { "epoch": 0.25764363950197355, "grad_norm": 0.04756678047336931, "learning_rate": 0.0009974734092862167, "loss": 0.7675, "step": 5075 }, { "epoch": 0.2578974756000051, "grad_norm": 0.04096444536231544, "learning_rate": 0.0009974511185169119, "loss": 0.7471, "step": 5080 }, { "epoch": 0.25815131169803657, "grad_norm": 0.053169542096396835, "learning_rate": 0.0009974287301004333, "loss": 0.7923, "step": 5085 }, { "epoch": 0.2584051477960681, "grad_norm": 0.07832782627110797, "learning_rate": 0.0009974062440411754, "loss": 0.802, "step": 5090 }, { "epoch": 0.2586589838940996, "grad_norm": 0.045944359077796745, "learning_rate": 0.0009973836603435525, "loss": 0.8064, "step": 5095 }, { "epoch": 0.25891281999213106, "grad_norm": 0.05063775643729869, "learning_rate": 0.0009973609790119974, "loss": 0.7344, "step": 5100 }, { "epoch": 0.2591666560901626, "grad_norm": 0.058477611132640155, "learning_rate": 0.0009973382000509627, "loss": 0.7412, "step": 5105 }, { "epoch": 0.2594204921881941, "grad_norm": 0.0472038823914066, "learning_rate": 0.0009973153234649195, "loss": 0.812, "step": 5110 }, { "epoch": 0.2596743282862256, "grad_norm": 0.054595262717844886, "learning_rate": 0.0009972923492583582, "loss": 0.7977, "step": 5115 }, { "epoch": 0.2599281643842571, "grad_norm": 0.052608834365809855, "learning_rate": 0.0009972692774357888, "loss": 0.7841, "step": 5120 }, { "epoch": 0.26018200048228857, "grad_norm": 0.04735572111967861, "learning_rate": 0.0009972461080017404, "loss": 0.7556, "step": 5125 }, { "epoch": 0.2604358365803201, "grad_norm": 0.050437467240574414, "learning_rate": 0.0009972228409607605, "loss": 0.7921, "step": 5130 }, { "epoch": 0.2606896726783516, "grad_norm": 0.049473172743419266, "learning_rate": 0.0009971994763174165, "loss": 0.7713, "step": 5135 }, { "epoch": 0.2609435087763831, "grad_norm": 0.04891376443252543, "learning_rate": 0.0009971760140762948, "loss": 0.7815, "step": 5140 }, { "epoch": 0.2611973448744146, "grad_norm": 0.05729888951727908, "learning_rate": 0.0009971524542420013, "loss": 0.7726, "step": 5145 }, { "epoch": 0.2614511809724461, "grad_norm": 0.051839110122430636, "learning_rate": 0.00099712879681916, "loss": 0.7667, "step": 5150 }, { "epoch": 0.2617050170704776, "grad_norm": 0.05272606761137572, "learning_rate": 0.0009971050418124152, "loss": 0.7636, "step": 5155 }, { "epoch": 0.2619588531685091, "grad_norm": 0.05433132142481107, "learning_rate": 0.0009970811892264298, "loss": 0.7601, "step": 5160 }, { "epoch": 0.2622126892665406, "grad_norm": 0.05293076917047231, "learning_rate": 0.0009970572390658858, "loss": 0.7948, "step": 5165 }, { "epoch": 0.2624665253645721, "grad_norm": 0.055799272464040206, "learning_rate": 0.0009970331913354846, "loss": 0.7797, "step": 5170 }, { "epoch": 0.2627203614626036, "grad_norm": 0.05639239679478062, "learning_rate": 0.0009970090460399467, "loss": 0.7686, "step": 5175 }, { "epoch": 0.2629741975606351, "grad_norm": 0.06579519460905411, "learning_rate": 0.0009969848031840117, "loss": 0.8281, "step": 5180 }, { "epoch": 0.2632280336586666, "grad_norm": 0.04422763434531487, "learning_rate": 0.000996960462772438, "loss": 0.7577, "step": 5185 }, { "epoch": 0.2634818697566981, "grad_norm": 0.06614353501371721, "learning_rate": 0.000996936024810004, "loss": 0.7791, "step": 5190 }, { "epoch": 0.2637357058547296, "grad_norm": 0.09224438285353626, "learning_rate": 0.0009969114893015065, "loss": 0.8727, "step": 5195 }, { "epoch": 0.2639895419527611, "grad_norm": 0.13748180229054355, "learning_rate": 0.000996886856251762, "loss": 0.8125, "step": 5200 }, { "epoch": 0.2642433780507926, "grad_norm": 0.17134457227107716, "learning_rate": 0.0009968621256656051, "loss": 0.8029, "step": 5205 }, { "epoch": 0.2644972141488241, "grad_norm": 0.3052578498624514, "learning_rate": 0.0009968372975478913, "loss": 0.8305, "step": 5210 }, { "epoch": 0.2647510502468556, "grad_norm": 0.13032560601206136, "learning_rate": 0.0009968123719034934, "loss": 0.8282, "step": 5215 }, { "epoch": 0.2650048863448871, "grad_norm": 0.12945012490779378, "learning_rate": 0.0009967873487373045, "loss": 0.8283, "step": 5220 }, { "epoch": 0.2652587224429186, "grad_norm": 0.1020411529245916, "learning_rate": 0.0009967622280542365, "loss": 0.7958, "step": 5225 }, { "epoch": 0.2655125585409501, "grad_norm": 0.12412177991555699, "learning_rate": 0.0009967370098592206, "loss": 0.83, "step": 5230 }, { "epoch": 0.2657663946389816, "grad_norm": 0.2233048153677006, "learning_rate": 0.000996711694157207, "loss": 0.7997, "step": 5235 }, { "epoch": 0.2660202307370131, "grad_norm": 0.06852731948894285, "learning_rate": 0.0009966862809531647, "loss": 0.7986, "step": 5240 }, { "epoch": 0.2662740668350446, "grad_norm": 0.07947757055524335, "learning_rate": 0.0009966607702520825, "loss": 0.8001, "step": 5245 }, { "epoch": 0.2665279029330761, "grad_norm": 0.09823932454056501, "learning_rate": 0.0009966351620589679, "loss": 0.7807, "step": 5250 }, { "epoch": 0.26678173903110763, "grad_norm": 0.06957828914120649, "learning_rate": 0.0009966094563788478, "loss": 0.832, "step": 5255 }, { "epoch": 0.2670355751291391, "grad_norm": 0.06022297628389845, "learning_rate": 0.0009965836532167679, "loss": 0.8425, "step": 5260 }, { "epoch": 0.2672894112271706, "grad_norm": 0.06160719945750916, "learning_rate": 0.0009965577525777934, "loss": 0.8193, "step": 5265 }, { "epoch": 0.2675432473252021, "grad_norm": 0.10327869264526537, "learning_rate": 0.0009965317544670083, "loss": 0.7957, "step": 5270 }, { "epoch": 0.2677970834232336, "grad_norm": 0.054856607541083374, "learning_rate": 0.000996505658889516, "loss": 0.797, "step": 5275 }, { "epoch": 0.26805091952126514, "grad_norm": 0.06545017852343861, "learning_rate": 0.000996479465850439, "loss": 0.8133, "step": 5280 }, { "epoch": 0.2683047556192966, "grad_norm": 0.05176107810032039, "learning_rate": 0.000996453175354919, "loss": 0.8062, "step": 5285 }, { "epoch": 0.2685585917173281, "grad_norm": 0.05687973294967908, "learning_rate": 0.000996426787408116, "loss": 0.8456, "step": 5290 }, { "epoch": 0.26881242781535963, "grad_norm": 0.05109838848394831, "learning_rate": 0.0009964003020152107, "loss": 0.8284, "step": 5295 }, { "epoch": 0.2690662639133911, "grad_norm": 0.052473266588827526, "learning_rate": 0.0009963737191814015, "loss": 0.7569, "step": 5300 }, { "epoch": 0.26932010001142265, "grad_norm": 0.04694789045208128, "learning_rate": 0.0009963470389119068, "loss": 0.7778, "step": 5305 }, { "epoch": 0.2695739361094541, "grad_norm": 0.05247989945387942, "learning_rate": 0.0009963202612119635, "loss": 0.7739, "step": 5310 }, { "epoch": 0.2698277722074856, "grad_norm": 0.07029729155697463, "learning_rate": 0.000996293386086828, "loss": 0.787, "step": 5315 }, { "epoch": 0.27008160830551714, "grad_norm": 0.05481336987466997, "learning_rate": 0.0009962664135417761, "loss": 0.8117, "step": 5320 }, { "epoch": 0.2703354444035486, "grad_norm": 0.05263123045356416, "learning_rate": 0.0009962393435821017, "loss": 0.7916, "step": 5325 }, { "epoch": 0.27058928050158015, "grad_norm": 0.0535809095427182, "learning_rate": 0.0009962121762131192, "loss": 0.7673, "step": 5330 }, { "epoch": 0.27084311659961163, "grad_norm": 0.04854888204876916, "learning_rate": 0.0009961849114401612, "loss": 0.7303, "step": 5335 }, { "epoch": 0.2710969526976431, "grad_norm": 0.08342852925383373, "learning_rate": 0.0009961575492685793, "loss": 0.7785, "step": 5340 }, { "epoch": 0.27135078879567465, "grad_norm": 0.08513013825233282, "learning_rate": 0.0009961300897037449, "loss": 0.7679, "step": 5345 }, { "epoch": 0.2716046248937061, "grad_norm": 0.05126671650783441, "learning_rate": 0.000996102532751048, "loss": 0.7947, "step": 5350 }, { "epoch": 0.27185846099173766, "grad_norm": 0.06279459184110715, "learning_rate": 0.000996074878415898, "loss": 0.7989, "step": 5355 }, { "epoch": 0.27211229708976914, "grad_norm": 0.05619852646014014, "learning_rate": 0.0009960471267037234, "loss": 0.7713, "step": 5360 }, { "epoch": 0.2723661331878006, "grad_norm": 0.05710392403692135, "learning_rate": 0.0009960192776199716, "loss": 0.7481, "step": 5365 }, { "epoch": 0.27261996928583215, "grad_norm": 0.04616791728470313, "learning_rate": 0.0009959913311701092, "loss": 0.7545, "step": 5370 }, { "epoch": 0.27287380538386363, "grad_norm": 0.04845404451150098, "learning_rate": 0.000995963287359622, "loss": 0.7907, "step": 5375 }, { "epoch": 0.27312764148189517, "grad_norm": 0.05255615544376044, "learning_rate": 0.0009959351461940149, "loss": 0.7614, "step": 5380 }, { "epoch": 0.27338147757992665, "grad_norm": 0.056433505565118856, "learning_rate": 0.0009959069076788118, "loss": 0.8288, "step": 5385 }, { "epoch": 0.2736353136779581, "grad_norm": 0.08329553873217542, "learning_rate": 0.0009958785718195559, "loss": 0.814, "step": 5390 }, { "epoch": 0.27388914977598966, "grad_norm": 0.1210054217184166, "learning_rate": 0.000995850138621809, "loss": 0.7373, "step": 5395 }, { "epoch": 0.27414298587402114, "grad_norm": 0.05599233869910129, "learning_rate": 0.0009958216080911528, "loss": 0.7377, "step": 5400 }, { "epoch": 0.2743968219720526, "grad_norm": 0.047659838290619774, "learning_rate": 0.0009957929802331877, "loss": 0.7814, "step": 5405 }, { "epoch": 0.27465065807008415, "grad_norm": 0.049854624277816235, "learning_rate": 0.000995764255053533, "loss": 0.8396, "step": 5410 }, { "epoch": 0.27490449416811563, "grad_norm": 0.0458758424774534, "learning_rate": 0.0009957354325578276, "loss": 0.7652, "step": 5415 }, { "epoch": 0.27515833026614717, "grad_norm": 0.04708255191534916, "learning_rate": 0.000995706512751729, "loss": 0.7665, "step": 5420 }, { "epoch": 0.27541216636417865, "grad_norm": 0.04454163805013638, "learning_rate": 0.0009956774956409139, "loss": 0.7916, "step": 5425 }, { "epoch": 0.2756660024622101, "grad_norm": 0.048409604926487, "learning_rate": 0.0009956483812310782, "loss": 0.743, "step": 5430 }, { "epoch": 0.27591983856024166, "grad_norm": 0.04375943758273794, "learning_rate": 0.0009956191695279374, "loss": 0.7669, "step": 5435 }, { "epoch": 0.27617367465827314, "grad_norm": 0.04594086033790186, "learning_rate": 0.0009955898605372249, "loss": 0.7508, "step": 5440 }, { "epoch": 0.2764275107563047, "grad_norm": 0.053544535330216414, "learning_rate": 0.0009955604542646946, "loss": 0.8399, "step": 5445 }, { "epoch": 0.27668134685433615, "grad_norm": 0.06826233614354811, "learning_rate": 0.0009955309507161184, "loss": 0.7664, "step": 5450 }, { "epoch": 0.27693518295236763, "grad_norm": 0.04790718561471667, "learning_rate": 0.0009955013498972876, "loss": 0.8201, "step": 5455 }, { "epoch": 0.27718901905039917, "grad_norm": 0.0412471422235012, "learning_rate": 0.000995471651814013, "loss": 0.7592, "step": 5460 }, { "epoch": 0.27744285514843064, "grad_norm": 0.069630260332363, "learning_rate": 0.0009954418564721242, "loss": 0.7959, "step": 5465 }, { "epoch": 0.2776966912464622, "grad_norm": 0.04959249930291213, "learning_rate": 0.0009954119638774695, "loss": 0.7984, "step": 5470 }, { "epoch": 0.27795052734449366, "grad_norm": 0.04627611589063106, "learning_rate": 0.000995381974035917, "loss": 0.8052, "step": 5475 }, { "epoch": 0.27820436344252514, "grad_norm": 0.04684087970196605, "learning_rate": 0.0009953518869533536, "loss": 0.8077, "step": 5480 }, { "epoch": 0.2784581995405567, "grad_norm": 0.04352768138514883, "learning_rate": 0.0009953217026356848, "loss": 0.7865, "step": 5485 }, { "epoch": 0.27871203563858815, "grad_norm": 0.049468357685090024, "learning_rate": 0.0009952914210888363, "loss": 0.7978, "step": 5490 }, { "epoch": 0.2789658717366197, "grad_norm": 0.0525987349031302, "learning_rate": 0.0009952610423187517, "loss": 0.7724, "step": 5495 }, { "epoch": 0.27921970783465117, "grad_norm": 0.08623765388334195, "learning_rate": 0.0009952305663313943, "loss": 0.7965, "step": 5500 }, { "epoch": 0.27947354393268264, "grad_norm": 0.061361546729430956, "learning_rate": 0.0009951999931327464, "loss": 0.8059, "step": 5505 }, { "epoch": 0.2797273800307142, "grad_norm": 0.05047067162994942, "learning_rate": 0.0009951693227288096, "loss": 0.8552, "step": 5510 }, { "epoch": 0.27998121612874566, "grad_norm": 0.04950438055691523, "learning_rate": 0.0009951385551256041, "loss": 0.7916, "step": 5515 }, { "epoch": 0.2802350522267772, "grad_norm": 0.050560168359418206, "learning_rate": 0.0009951076903291693, "loss": 0.7857, "step": 5520 }, { "epoch": 0.28048888832480867, "grad_norm": 0.04699991524529279, "learning_rate": 0.000995076728345564, "loss": 0.8056, "step": 5525 }, { "epoch": 0.28074272442284015, "grad_norm": 0.049069750757152475, "learning_rate": 0.000995045669180866, "loss": 0.7661, "step": 5530 }, { "epoch": 0.2809965605208717, "grad_norm": 0.04718632368381139, "learning_rate": 0.000995014512841172, "loss": 0.8239, "step": 5535 }, { "epoch": 0.28125039661890316, "grad_norm": 0.06453238800197598, "learning_rate": 0.0009949832593325978, "loss": 0.7685, "step": 5540 }, { "epoch": 0.2815042327169347, "grad_norm": 0.052851172863381646, "learning_rate": 0.000994951908661278, "loss": 0.8044, "step": 5545 }, { "epoch": 0.2817580688149662, "grad_norm": 0.04733255701927816, "learning_rate": 0.0009949204608333672, "loss": 0.7895, "step": 5550 }, { "epoch": 0.28201190491299766, "grad_norm": 0.2466192838565957, "learning_rate": 0.0009948889158550376, "loss": 0.7871, "step": 5555 }, { "epoch": 0.2822657410110292, "grad_norm": 0.050482861266673326, "learning_rate": 0.0009948572737324822, "loss": 0.8089, "step": 5560 }, { "epoch": 0.28251957710906067, "grad_norm": 0.7461270272501932, "learning_rate": 0.0009948255344719118, "loss": 0.9002, "step": 5565 }, { "epoch": 0.2827734132070922, "grad_norm": 0.11601892480169733, "learning_rate": 0.0009947936980795565, "loss": 0.9091, "step": 5570 }, { "epoch": 0.2830272493051237, "grad_norm": 0.09302600510313656, "learning_rate": 0.000994761764561666, "loss": 0.8581, "step": 5575 }, { "epoch": 0.28328108540315516, "grad_norm": 0.08015278722453598, "learning_rate": 0.0009947297339245084, "loss": 0.8923, "step": 5580 }, { "epoch": 0.2835349215011867, "grad_norm": 0.06682981812211292, "learning_rate": 0.0009946976061743712, "loss": 0.8259, "step": 5585 }, { "epoch": 0.2837887575992182, "grad_norm": 0.06610227249847023, "learning_rate": 0.000994665381317561, "loss": 0.8115, "step": 5590 }, { "epoch": 0.2840425936972497, "grad_norm": 0.05130669651357426, "learning_rate": 0.0009946330593604033, "loss": 0.813, "step": 5595 }, { "epoch": 0.2842964297952812, "grad_norm": 0.05281847950074043, "learning_rate": 0.000994600640309243, "loss": 0.7861, "step": 5600 }, { "epoch": 0.28455026589331267, "grad_norm": 0.04222362428056991, "learning_rate": 0.0009945681241704434, "loss": 0.8171, "step": 5605 }, { "epoch": 0.2848041019913442, "grad_norm": 0.04664279116483628, "learning_rate": 0.0009945355109503872, "loss": 0.8207, "step": 5610 }, { "epoch": 0.2850579380893757, "grad_norm": 0.05753557066034402, "learning_rate": 0.0009945028006554768, "loss": 0.8185, "step": 5615 }, { "epoch": 0.2853117741874072, "grad_norm": 0.0444103909099358, "learning_rate": 0.0009944699932921326, "loss": 0.8289, "step": 5620 }, { "epoch": 0.2855656102854387, "grad_norm": 0.04797617224294301, "learning_rate": 0.0009944370888667947, "loss": 0.8142, "step": 5625 }, { "epoch": 0.2858194463834702, "grad_norm": 0.05376197137704638, "learning_rate": 0.0009944040873859218, "loss": 0.8164, "step": 5630 }, { "epoch": 0.2860732824815017, "grad_norm": 0.04318157041315873, "learning_rate": 0.0009943709888559922, "loss": 0.7818, "step": 5635 }, { "epoch": 0.2863271185795332, "grad_norm": 0.046621064503775046, "learning_rate": 0.000994337793283503, "loss": 0.821, "step": 5640 }, { "epoch": 0.28658095467756467, "grad_norm": 0.05299691334855593, "learning_rate": 0.0009943045006749703, "loss": 0.781, "step": 5645 }, { "epoch": 0.2868347907755962, "grad_norm": 0.05302364567630385, "learning_rate": 0.0009942711110369291, "loss": 0.7949, "step": 5650 }, { "epoch": 0.2870886268736277, "grad_norm": 0.040496226916699636, "learning_rate": 0.0009942376243759336, "loss": 0.7491, "step": 5655 }, { "epoch": 0.2873424629716592, "grad_norm": 0.051766978907945196, "learning_rate": 0.0009942040406985574, "loss": 0.7624, "step": 5660 }, { "epoch": 0.2875962990696907, "grad_norm": 0.047379671575220904, "learning_rate": 0.0009941703600113926, "loss": 0.8158, "step": 5665 }, { "epoch": 0.2878501351677222, "grad_norm": 0.04868404280163126, "learning_rate": 0.0009941365823210506, "loss": 0.8171, "step": 5670 }, { "epoch": 0.2881039712657537, "grad_norm": 0.039231330420717275, "learning_rate": 0.0009941027076341615, "loss": 0.7625, "step": 5675 }, { "epoch": 0.2883578073637852, "grad_norm": 0.0423166743894628, "learning_rate": 0.0009940687359573752, "loss": 0.8256, "step": 5680 }, { "epoch": 0.2886116434618167, "grad_norm": 0.04498731843118582, "learning_rate": 0.00099403466729736, "loss": 0.7681, "step": 5685 }, { "epoch": 0.2888654795598482, "grad_norm": 0.059318067248307675, "learning_rate": 0.000994000501660803, "loss": 0.8022, "step": 5690 }, { "epoch": 0.2891193156578797, "grad_norm": 0.043971305305251444, "learning_rate": 0.0009939662390544115, "loss": 0.7579, "step": 5695 }, { "epoch": 0.2893731517559112, "grad_norm": 0.04913776443792512, "learning_rate": 0.0009939318794849104, "loss": 0.7777, "step": 5700 }, { "epoch": 0.2896269878539427, "grad_norm": 0.04876843274650846, "learning_rate": 0.0009938974229590446, "loss": 0.7711, "step": 5705 }, { "epoch": 0.28988082395197423, "grad_norm": 0.0464759861865855, "learning_rate": 0.000993862869483578, "loss": 0.7934, "step": 5710 }, { "epoch": 0.2901346600500057, "grad_norm": 0.06561583383794845, "learning_rate": 0.0009938282190652928, "loss": 0.8123, "step": 5715 }, { "epoch": 0.2903884961480372, "grad_norm": 0.044699639509833654, "learning_rate": 0.0009937934717109912, "loss": 0.7726, "step": 5720 }, { "epoch": 0.2906423322460687, "grad_norm": 0.04581062928309336, "learning_rate": 0.0009937586274274932, "loss": 0.7709, "step": 5725 }, { "epoch": 0.2908961683441002, "grad_norm": 0.05873976589419157, "learning_rate": 0.0009937236862216391, "loss": 0.8049, "step": 5730 }, { "epoch": 0.29115000444213174, "grad_norm": 0.053504138706526, "learning_rate": 0.0009936886481002878, "loss": 0.8114, "step": 5735 }, { "epoch": 0.2914038405401632, "grad_norm": 0.054183192183460765, "learning_rate": 0.0009936535130703169, "loss": 0.8096, "step": 5740 }, { "epoch": 0.2916576766381947, "grad_norm": 0.04617968235816496, "learning_rate": 0.0009936182811386232, "loss": 0.7849, "step": 5745 }, { "epoch": 0.29191151273622623, "grad_norm": 0.04775085497230901, "learning_rate": 0.0009935829523121224, "loss": 0.8002, "step": 5750 }, { "epoch": 0.2921653488342577, "grad_norm": 0.04878357230042361, "learning_rate": 0.0009935475265977498, "loss": 0.7534, "step": 5755 }, { "epoch": 0.29241918493228924, "grad_norm": 0.0738047966528467, "learning_rate": 0.0009935120040024587, "loss": 0.7598, "step": 5760 }, { "epoch": 0.2926730210303207, "grad_norm": 0.05180126027864792, "learning_rate": 0.0009934763845332228, "loss": 0.782, "step": 5765 }, { "epoch": 0.2929268571283522, "grad_norm": 0.042272705131000766, "learning_rate": 0.0009934406681970332, "loss": 0.7543, "step": 5770 }, { "epoch": 0.29318069322638374, "grad_norm": 0.04162265866072997, "learning_rate": 0.0009934048550009015, "loss": 0.7381, "step": 5775 }, { "epoch": 0.2934345293244152, "grad_norm": 0.042488004024636386, "learning_rate": 0.0009933689449518573, "loss": 0.7862, "step": 5780 }, { "epoch": 0.29368836542244675, "grad_norm": 0.04922569841165056, "learning_rate": 0.0009933329380569494, "loss": 0.8044, "step": 5785 }, { "epoch": 0.29394220152047823, "grad_norm": 0.04495207754235997, "learning_rate": 0.0009932968343232462, "loss": 0.7685, "step": 5790 }, { "epoch": 0.2941960376185097, "grad_norm": 0.05048642476296767, "learning_rate": 0.0009932606337578346, "loss": 0.7765, "step": 5795 }, { "epoch": 0.29444987371654124, "grad_norm": 0.04579448400914848, "learning_rate": 0.0009932243363678203, "loss": 0.7755, "step": 5800 }, { "epoch": 0.2947037098145727, "grad_norm": 0.04471929566594168, "learning_rate": 0.0009931879421603285, "loss": 0.7984, "step": 5805 }, { "epoch": 0.29495754591260426, "grad_norm": 0.044738778413603277, "learning_rate": 0.0009931514511425032, "loss": 0.7522, "step": 5810 }, { "epoch": 0.29521138201063574, "grad_norm": 0.04745750652861639, "learning_rate": 0.0009931148633215074, "loss": 0.7619, "step": 5815 }, { "epoch": 0.2954652181086672, "grad_norm": 0.05170861312855808, "learning_rate": 0.000993078178704523, "loss": 0.7615, "step": 5820 }, { "epoch": 0.29571905420669875, "grad_norm": 0.042799216248114655, "learning_rate": 0.0009930413972987513, "loss": 0.7758, "step": 5825 }, { "epoch": 0.29597289030473023, "grad_norm": 0.055795644780837254, "learning_rate": 0.000993004519111412, "loss": 0.8233, "step": 5830 }, { "epoch": 0.29622672640276176, "grad_norm": 0.04924971268218262, "learning_rate": 0.0009929675441497441, "loss": 0.7708, "step": 5835 }, { "epoch": 0.29648056250079324, "grad_norm": 0.04299840002419698, "learning_rate": 0.000992930472421006, "loss": 0.7666, "step": 5840 }, { "epoch": 0.2967343985988247, "grad_norm": 0.06772292163720561, "learning_rate": 0.0009928933039324741, "loss": 0.7192, "step": 5845 }, { "epoch": 0.29698823469685626, "grad_norm": 0.049319237040724044, "learning_rate": 0.0009928560386914447, "loss": 0.7747, "step": 5850 }, { "epoch": 0.29724207079488774, "grad_norm": 0.04801737067081464, "learning_rate": 0.000992818676705233, "loss": 0.7541, "step": 5855 }, { "epoch": 0.2974959068929192, "grad_norm": 0.08841772407677571, "learning_rate": 0.0009927812179811727, "loss": 0.8851, "step": 5860 }, { "epoch": 0.29774974299095075, "grad_norm": 0.05713099219055963, "learning_rate": 0.0009927436625266166, "loss": 0.8031, "step": 5865 }, { "epoch": 0.29800357908898223, "grad_norm": 0.12142251919462028, "learning_rate": 0.0009927060103489369, "loss": 0.7927, "step": 5870 }, { "epoch": 0.29825741518701376, "grad_norm": 0.05610057893443329, "learning_rate": 0.0009926682614555247, "loss": 0.8033, "step": 5875 }, { "epoch": 0.29851125128504524, "grad_norm": 0.04762651807039157, "learning_rate": 0.0009926304158537895, "loss": 0.8083, "step": 5880 }, { "epoch": 0.2987650873830767, "grad_norm": 0.04850250984967299, "learning_rate": 0.0009925924735511603, "loss": 0.8036, "step": 5885 }, { "epoch": 0.29901892348110826, "grad_norm": 0.051962094642149084, "learning_rate": 0.0009925544345550854, "loss": 0.822, "step": 5890 }, { "epoch": 0.29927275957913974, "grad_norm": 0.047376944336849296, "learning_rate": 0.0009925162988730313, "loss": 0.7952, "step": 5895 }, { "epoch": 0.29952659567717127, "grad_norm": 0.04456203914200253, "learning_rate": 0.0009924780665124839, "loss": 0.7906, "step": 5900 }, { "epoch": 0.29978043177520275, "grad_norm": 0.06459589971040404, "learning_rate": 0.000992439737480948, "loss": 0.7843, "step": 5905 }, { "epoch": 0.30003426787323423, "grad_norm": 0.053910978711879234, "learning_rate": 0.0009924013117859475, "loss": 0.824, "step": 5910 }, { "epoch": 0.30028810397126576, "grad_norm": 0.049416861058950476, "learning_rate": 0.0009923627894350248, "loss": 0.832, "step": 5915 }, { "epoch": 0.30054194006929724, "grad_norm": 0.046148272223847236, "learning_rate": 0.0009923241704357423, "loss": 0.822, "step": 5920 }, { "epoch": 0.3007957761673288, "grad_norm": 0.0481582443807183, "learning_rate": 0.0009922854547956802, "loss": 0.768, "step": 5925 }, { "epoch": 0.30104961226536026, "grad_norm": 0.045127875978670746, "learning_rate": 0.0009922466425224383, "loss": 0.8093, "step": 5930 }, { "epoch": 0.30130344836339173, "grad_norm": 0.05175081464635259, "learning_rate": 0.0009922077336236353, "loss": 0.748, "step": 5935 }, { "epoch": 0.30155728446142327, "grad_norm": 0.057353531839343726, "learning_rate": 0.000992168728106909, "loss": 0.7672, "step": 5940 }, { "epoch": 0.30181112055945475, "grad_norm": 0.04295872526269075, "learning_rate": 0.0009921296259799155, "loss": 0.8035, "step": 5945 }, { "epoch": 0.3020649566574863, "grad_norm": 0.04973955145036853, "learning_rate": 0.000992090427250331, "loss": 0.7729, "step": 5950 }, { "epoch": 0.30231879275551776, "grad_norm": 0.06516777686311773, "learning_rate": 0.0009920511319258495, "loss": 0.8267, "step": 5955 }, { "epoch": 0.30257262885354924, "grad_norm": 0.045671716649517656, "learning_rate": 0.0009920117400141848, "loss": 0.7895, "step": 5960 }, { "epoch": 0.3028264649515808, "grad_norm": 0.22397152648051658, "learning_rate": 0.0009919722515230691, "loss": 0.7739, "step": 5965 }, { "epoch": 0.30308030104961226, "grad_norm": 0.05013177937429277, "learning_rate": 0.0009919326664602538, "loss": 0.7568, "step": 5970 }, { "epoch": 0.3033341371476438, "grad_norm": 0.045267182817223306, "learning_rate": 0.0009918929848335095, "loss": 0.8267, "step": 5975 }, { "epoch": 0.30358797324567527, "grad_norm": 0.05002693315442928, "learning_rate": 0.0009918532066506252, "loss": 0.7874, "step": 5980 }, { "epoch": 0.30384180934370675, "grad_norm": 0.0541651681079727, "learning_rate": 0.0009918133319194093, "loss": 0.7582, "step": 5985 }, { "epoch": 0.3040956454417383, "grad_norm": 0.05371838571156568, "learning_rate": 0.000991773360647689, "loss": 0.7819, "step": 5990 }, { "epoch": 0.30434948153976976, "grad_norm": 0.0518330066902139, "learning_rate": 0.0009917332928433106, "loss": 0.7851, "step": 5995 }, { "epoch": 0.3046033176378013, "grad_norm": 0.06650514296715761, "learning_rate": 0.000991693128514139, "loss": 0.8225, "step": 6000 }, { "epoch": 0.3048571537358328, "grad_norm": 0.0460607125826587, "learning_rate": 0.0009916528676680585, "loss": 0.7548, "step": 6005 }, { "epoch": 0.30511098983386425, "grad_norm": 0.05107952414794769, "learning_rate": 0.0009916125103129718, "loss": 0.7323, "step": 6010 }, { "epoch": 0.3053648259318958, "grad_norm": 0.052204723845998835, "learning_rate": 0.000991572056456801, "loss": 0.7989, "step": 6015 }, { "epoch": 0.30561866202992727, "grad_norm": 0.044606636564516716, "learning_rate": 0.000991531506107487, "loss": 0.7823, "step": 6020 }, { "epoch": 0.3058724981279588, "grad_norm": 0.044541108925793195, "learning_rate": 0.0009914908592729896, "loss": 0.8286, "step": 6025 }, { "epoch": 0.3061263342259903, "grad_norm": 0.10641743918553237, "learning_rate": 0.0009914501159612877, "loss": 0.7381, "step": 6030 }, { "epoch": 0.30638017032402176, "grad_norm": 0.045154586269530575, "learning_rate": 0.0009914092761803789, "loss": 0.7505, "step": 6035 }, { "epoch": 0.3066340064220533, "grad_norm": 0.04316546416695957, "learning_rate": 0.0009913683399382796, "loss": 0.8041, "step": 6040 }, { "epoch": 0.3068878425200848, "grad_norm": 0.04245244459698869, "learning_rate": 0.0009913273072430257, "loss": 0.7868, "step": 6045 }, { "epoch": 0.3071416786181163, "grad_norm": 0.043572704154091306, "learning_rate": 0.0009912861781026718, "loss": 0.7617, "step": 6050 }, { "epoch": 0.3073955147161478, "grad_norm": 0.04306786537678596, "learning_rate": 0.0009912449525252911, "loss": 0.7746, "step": 6055 }, { "epoch": 0.30764935081417927, "grad_norm": 0.04034461126241467, "learning_rate": 0.000991203630518976, "loss": 0.773, "step": 6060 }, { "epoch": 0.3079031869122108, "grad_norm": 0.048067587818063195, "learning_rate": 0.0009911622120918379, "loss": 0.7399, "step": 6065 }, { "epoch": 0.3081570230102423, "grad_norm": 0.044443012828283016, "learning_rate": 0.0009911206972520068, "loss": 0.7724, "step": 6070 }, { "epoch": 0.30841085910827376, "grad_norm": 0.05184163064922888, "learning_rate": 0.0009910790860076324, "loss": 0.815, "step": 6075 }, { "epoch": 0.3086646952063053, "grad_norm": 0.6123921426904981, "learning_rate": 0.0009910373783668823, "loss": 0.7867, "step": 6080 }, { "epoch": 0.3089185313043368, "grad_norm": 0.08909535930878135, "learning_rate": 0.0009909955743379435, "loss": 0.8017, "step": 6085 }, { "epoch": 0.3091723674023683, "grad_norm": 0.10529892911895897, "learning_rate": 0.0009909536739290221, "loss": 0.8043, "step": 6090 }, { "epoch": 0.3094262035003998, "grad_norm": 0.06859152908755464, "learning_rate": 0.0009909116771483427, "loss": 0.7658, "step": 6095 }, { "epoch": 0.30968003959843127, "grad_norm": 0.29419101050103513, "learning_rate": 0.0009908695840041496, "loss": 0.8143, "step": 6100 }, { "epoch": 0.3099338756964628, "grad_norm": 0.06909936284547424, "learning_rate": 0.000990827394504705, "loss": 0.7819, "step": 6105 }, { "epoch": 0.3101877117944943, "grad_norm": 0.07833125056652004, "learning_rate": 0.0009907851086582906, "loss": 0.8153, "step": 6110 }, { "epoch": 0.3104415478925258, "grad_norm": 0.06865451580802749, "learning_rate": 0.0009907427264732069, "loss": 0.8158, "step": 6115 }, { "epoch": 0.3106953839905573, "grad_norm": 0.05199132431529247, "learning_rate": 0.0009907002479577734, "loss": 0.803, "step": 6120 }, { "epoch": 0.3109492200885888, "grad_norm": 0.0439424384994731, "learning_rate": 0.0009906576731203282, "loss": 0.7771, "step": 6125 }, { "epoch": 0.3112030561866203, "grad_norm": 0.0434271000203874, "learning_rate": 0.0009906150019692288, "loss": 0.7931, "step": 6130 }, { "epoch": 0.3114568922846518, "grad_norm": 0.049210429810876394, "learning_rate": 0.000990572234512851, "loss": 0.7914, "step": 6135 }, { "epoch": 0.3117107283826833, "grad_norm": 0.07725299582778054, "learning_rate": 0.0009905293707595903, "loss": 0.7782, "step": 6140 }, { "epoch": 0.3119645644807148, "grad_norm": 0.06308071274041319, "learning_rate": 0.0009904864107178602, "loss": 0.7676, "step": 6145 }, { "epoch": 0.3122184005787463, "grad_norm": 0.05279433013386825, "learning_rate": 0.000990443354396094, "loss": 0.7998, "step": 6150 }, { "epoch": 0.3124722366767778, "grad_norm": 0.049013135532763936, "learning_rate": 0.000990400201802743, "loss": 0.8454, "step": 6155 }, { "epoch": 0.3127260727748093, "grad_norm": 0.04601541791873265, "learning_rate": 0.0009903569529462778, "loss": 0.8318, "step": 6160 }, { "epoch": 0.31297990887284083, "grad_norm": 0.04702177710071727, "learning_rate": 0.0009903136078351885, "loss": 0.7762, "step": 6165 }, { "epoch": 0.3132337449708723, "grad_norm": 0.04805455479707287, "learning_rate": 0.0009902701664779828, "loss": 0.7721, "step": 6170 }, { "epoch": 0.3134875810689038, "grad_norm": 0.047075108521442595, "learning_rate": 0.0009902266288831887, "loss": 0.806, "step": 6175 }, { "epoch": 0.3137414171669353, "grad_norm": 0.17978970800462618, "learning_rate": 0.000990182995059352, "loss": 0.8331, "step": 6180 }, { "epoch": 0.3139952532649668, "grad_norm": 0.04435248288882701, "learning_rate": 0.0009901392650150378, "loss": 0.7603, "step": 6185 }, { "epoch": 0.31424908936299834, "grad_norm": 0.04165717779512257, "learning_rate": 0.0009900954387588303, "loss": 0.7359, "step": 6190 }, { "epoch": 0.3145029254610298, "grad_norm": 0.043231987467963424, "learning_rate": 0.0009900515162993325, "loss": 0.8004, "step": 6195 }, { "epoch": 0.3147567615590613, "grad_norm": 0.03812656332425266, "learning_rate": 0.0009900074976451655, "loss": 0.7794, "step": 6200 }, { "epoch": 0.31501059765709283, "grad_norm": 0.12581917664623707, "learning_rate": 0.0009899633828049706, "loss": 0.7683, "step": 6205 }, { "epoch": 0.3152644337551243, "grad_norm": 0.044222933350280935, "learning_rate": 0.0009899191717874071, "loss": 0.7676, "step": 6210 }, { "epoch": 0.31551826985315584, "grad_norm": 0.09668558296984253, "learning_rate": 0.0009898748646011534, "loss": 0.7768, "step": 6215 }, { "epoch": 0.3157721059511873, "grad_norm": 0.04749396100902809, "learning_rate": 0.0009898304612549068, "loss": 0.764, "step": 6220 }, { "epoch": 0.3160259420492188, "grad_norm": 0.044585355095928, "learning_rate": 0.0009897859617573833, "loss": 0.7757, "step": 6225 }, { "epoch": 0.31627977814725033, "grad_norm": 0.04603015830900462, "learning_rate": 0.0009897413661173182, "loss": 0.7868, "step": 6230 }, { "epoch": 0.3165336142452818, "grad_norm": 0.045682123295875884, "learning_rate": 0.0009896966743434654, "loss": 0.7972, "step": 6235 }, { "epoch": 0.31678745034331335, "grad_norm": 0.044782617165865984, "learning_rate": 0.0009896518864445974, "loss": 0.7674, "step": 6240 }, { "epoch": 0.3170412864413448, "grad_norm": 0.0716274864197189, "learning_rate": 0.0009896070024295058, "loss": 0.8218, "step": 6245 }, { "epoch": 0.3172951225393763, "grad_norm": 0.04723274888893319, "learning_rate": 0.0009895620223070013, "loss": 0.7483, "step": 6250 }, { "epoch": 0.31754895863740784, "grad_norm": 0.04877101101718428, "learning_rate": 0.0009895169460859136, "loss": 0.7891, "step": 6255 }, { "epoch": 0.3178027947354393, "grad_norm": 0.067217796172833, "learning_rate": 0.0009894717737750905, "loss": 0.766, "step": 6260 }, { "epoch": 0.31805663083347085, "grad_norm": 0.05636244822619028, "learning_rate": 0.000989426505383399, "loss": 0.7655, "step": 6265 }, { "epoch": 0.31831046693150233, "grad_norm": 0.05726851738745464, "learning_rate": 0.0009893811409197254, "loss": 0.803, "step": 6270 }, { "epoch": 0.3185643030295338, "grad_norm": 0.052266420738786276, "learning_rate": 0.0009893356803929742, "loss": 0.8334, "step": 6275 }, { "epoch": 0.31881813912756535, "grad_norm": 0.07547927213638692, "learning_rate": 0.0009892901238120694, "loss": 0.7495, "step": 6280 }, { "epoch": 0.3190719752255968, "grad_norm": 0.06689770543061269, "learning_rate": 0.0009892444711859536, "loss": 0.7462, "step": 6285 }, { "epoch": 0.3193258113236283, "grad_norm": 0.05645465924978959, "learning_rate": 0.0009891987225235876, "loss": 0.7907, "step": 6290 }, { "epoch": 0.31957964742165984, "grad_norm": 0.04419667243123653, "learning_rate": 0.0009891528778339523, "loss": 0.7579, "step": 6295 }, { "epoch": 0.3198334835196913, "grad_norm": 0.1079504156826421, "learning_rate": 0.0009891069371260463, "loss": 0.8024, "step": 6300 }, { "epoch": 0.32008731961772285, "grad_norm": 0.0635514860104287, "learning_rate": 0.0009890609004088878, "loss": 0.7656, "step": 6305 }, { "epoch": 0.32034115571575433, "grad_norm": 0.05809166659461275, "learning_rate": 0.0009890147676915133, "loss": 0.7845, "step": 6310 }, { "epoch": 0.3205949918137858, "grad_norm": 0.04588984313829065, "learning_rate": 0.0009889685389829787, "loss": 0.781, "step": 6315 }, { "epoch": 0.32084882791181735, "grad_norm": 0.049697753819730225, "learning_rate": 0.0009889222142923585, "loss": 0.8198, "step": 6320 }, { "epoch": 0.3211026640098488, "grad_norm": 0.04789859409457676, "learning_rate": 0.0009888757936287458, "loss": 0.7937, "step": 6325 }, { "epoch": 0.32135650010788036, "grad_norm": 0.055703038459331035, "learning_rate": 0.0009888292770012528, "loss": 0.7717, "step": 6330 }, { "epoch": 0.32161033620591184, "grad_norm": 0.04644925364510245, "learning_rate": 0.0009887826644190106, "loss": 0.7323, "step": 6335 }, { "epoch": 0.3218641723039433, "grad_norm": 0.7012287602927039, "learning_rate": 0.0009887359558911689, "loss": 0.7557, "step": 6340 }, { "epoch": 0.32211800840197485, "grad_norm": 0.08548419565478184, "learning_rate": 0.0009886891514268963, "loss": 0.7744, "step": 6345 }, { "epoch": 0.32237184450000633, "grad_norm": 0.06471807818492122, "learning_rate": 0.0009886422510353805, "loss": 0.7822, "step": 6350 }, { "epoch": 0.32262568059803787, "grad_norm": 0.04665217949391778, "learning_rate": 0.0009885952547258278, "loss": 0.7714, "step": 6355 }, { "epoch": 0.32287951669606935, "grad_norm": 0.0504950928737886, "learning_rate": 0.000988548162507463, "loss": 0.8259, "step": 6360 }, { "epoch": 0.3231333527941008, "grad_norm": 0.043012589131754916, "learning_rate": 0.0009885009743895302, "loss": 0.755, "step": 6365 }, { "epoch": 0.32338718889213236, "grad_norm": 0.042100894802394144, "learning_rate": 0.0009884536903812923, "loss": 0.7965, "step": 6370 }, { "epoch": 0.32364102499016384, "grad_norm": 0.04372390079188773, "learning_rate": 0.000988406310492031, "loss": 0.8094, "step": 6375 }, { "epoch": 0.3238948610881954, "grad_norm": 0.04668125971887181, "learning_rate": 0.0009883588347310466, "loss": 0.7661, "step": 6380 }, { "epoch": 0.32414869718622685, "grad_norm": 0.05093147746893981, "learning_rate": 0.0009883112631076585, "loss": 0.7491, "step": 6385 }, { "epoch": 0.32440253328425833, "grad_norm": 0.044015850472439104, "learning_rate": 0.0009882635956312046, "loss": 0.7673, "step": 6390 }, { "epoch": 0.32465636938228987, "grad_norm": 0.04293790742099696, "learning_rate": 0.0009882158323110417, "loss": 0.7477, "step": 6395 }, { "epoch": 0.32491020548032135, "grad_norm": 0.05299386627496661, "learning_rate": 0.0009881679731565457, "loss": 0.7715, "step": 6400 }, { "epoch": 0.3251640415783529, "grad_norm": 0.0570337062272721, "learning_rate": 0.000988120018177111, "loss": 0.7823, "step": 6405 }, { "epoch": 0.32541787767638436, "grad_norm": 0.04408437358106276, "learning_rate": 0.0009880719673821513, "loss": 0.7524, "step": 6410 }, { "epoch": 0.32567171377441584, "grad_norm": 0.046234613858549274, "learning_rate": 0.000988023820781098, "loss": 0.7267, "step": 6415 }, { "epoch": 0.3259255498724474, "grad_norm": 0.04695646360249946, "learning_rate": 0.000987975578383403, "loss": 0.7931, "step": 6420 }, { "epoch": 0.32617938597047885, "grad_norm": 0.057310194423496205, "learning_rate": 0.0009879272401985349, "loss": 0.7799, "step": 6425 }, { "epoch": 0.3264332220685104, "grad_norm": 0.05333851830200283, "learning_rate": 0.0009878788062359831, "loss": 0.793, "step": 6430 }, { "epoch": 0.32668705816654187, "grad_norm": 0.06427632824514147, "learning_rate": 0.0009878302765052548, "loss": 0.7554, "step": 6435 }, { "epoch": 0.32694089426457335, "grad_norm": 0.04694852123622723, "learning_rate": 0.0009877816510158756, "loss": 0.782, "step": 6440 }, { "epoch": 0.3271947303626049, "grad_norm": 0.051677267214603785, "learning_rate": 0.0009877329297773914, "loss": 0.7992, "step": 6445 }, { "epoch": 0.32744856646063636, "grad_norm": 0.04591131974159551, "learning_rate": 0.000987684112799365, "loss": 0.766, "step": 6450 }, { "epoch": 0.3277024025586679, "grad_norm": 0.03996551468115855, "learning_rate": 0.0009876352000913796, "loss": 0.754, "step": 6455 }, { "epoch": 0.3279562386566994, "grad_norm": 0.04598307107673091, "learning_rate": 0.000987586191663036, "loss": 0.7456, "step": 6460 }, { "epoch": 0.32821007475473085, "grad_norm": 0.04291704036678403, "learning_rate": 0.0009875370875239548, "loss": 0.7535, "step": 6465 }, { "epoch": 0.3284639108527624, "grad_norm": 0.04264259491792984, "learning_rate": 0.0009874878876837746, "loss": 0.7809, "step": 6470 }, { "epoch": 0.32871774695079387, "grad_norm": 0.0439634122139025, "learning_rate": 0.0009874385921521533, "loss": 0.8013, "step": 6475 }, { "epoch": 0.3289715830488254, "grad_norm": 0.04652602619349424, "learning_rate": 0.000987389200938767, "loss": 0.8121, "step": 6480 }, { "epoch": 0.3292254191468569, "grad_norm": 0.3929965100599694, "learning_rate": 0.0009873397140533111, "loss": 0.7816, "step": 6485 }, { "epoch": 0.32947925524488836, "grad_norm": 0.12155635641813951, "learning_rate": 0.0009872901315054999, "loss": 0.7731, "step": 6490 }, { "epoch": 0.3297330913429199, "grad_norm": 0.06189263200867016, "learning_rate": 0.000987240453305066, "loss": 0.8128, "step": 6495 }, { "epoch": 0.32998692744095137, "grad_norm": 0.07595236954333391, "learning_rate": 0.0009871906794617607, "loss": 0.8207, "step": 6500 }, { "epoch": 0.33024076353898285, "grad_norm": 0.1437031527139391, "learning_rate": 0.0009871408099853547, "loss": 0.7845, "step": 6505 }, { "epoch": 0.3304945996370144, "grad_norm": 0.057301551541187656, "learning_rate": 0.0009870908448856373, "loss": 0.7638, "step": 6510 }, { "epoch": 0.33074843573504586, "grad_norm": 0.048784323331682054, "learning_rate": 0.000987040784172416, "loss": 0.786, "step": 6515 }, { "epoch": 0.3310022718330774, "grad_norm": 0.044942456041808805, "learning_rate": 0.0009869906278555177, "loss": 0.7787, "step": 6520 }, { "epoch": 0.3312561079311089, "grad_norm": 0.04950809268722967, "learning_rate": 0.0009869403759447876, "loss": 0.8332, "step": 6525 }, { "epoch": 0.33150994402914036, "grad_norm": 0.0521570439409325, "learning_rate": 0.0009868900284500904, "loss": 0.7378, "step": 6530 }, { "epoch": 0.3317637801271719, "grad_norm": 0.0413002124993935, "learning_rate": 0.0009868395853813085, "loss": 0.7957, "step": 6535 }, { "epoch": 0.33201761622520337, "grad_norm": 0.05204223581896749, "learning_rate": 0.000986789046748344, "loss": 0.8001, "step": 6540 }, { "epoch": 0.3322714523232349, "grad_norm": 0.050885879207130495, "learning_rate": 0.000986738412561117, "loss": 0.8145, "step": 6545 }, { "epoch": 0.3325252884212664, "grad_norm": 0.042020240115834, "learning_rate": 0.0009866876828295672, "loss": 0.7409, "step": 6550 }, { "epoch": 0.33277912451929786, "grad_norm": 0.05541518389487471, "learning_rate": 0.0009866368575636522, "loss": 0.7912, "step": 6555 }, { "epoch": 0.3330329606173294, "grad_norm": 0.05226478541976068, "learning_rate": 0.0009865859367733489, "loss": 0.8007, "step": 6560 }, { "epoch": 0.3332867967153609, "grad_norm": 0.04433138648231094, "learning_rate": 0.0009865349204686532, "loss": 0.7479, "step": 6565 }, { "epoch": 0.3335406328133924, "grad_norm": 0.041975502005465334, "learning_rate": 0.0009864838086595783, "loss": 0.7664, "step": 6570 }, { "epoch": 0.3337944689114239, "grad_norm": 0.03893378390291279, "learning_rate": 0.0009864326013561584, "loss": 0.7758, "step": 6575 }, { "epoch": 0.33404830500945537, "grad_norm": 0.043703690039238255, "learning_rate": 0.0009863812985684446, "loss": 0.7851, "step": 6580 }, { "epoch": 0.3343021411074869, "grad_norm": 0.04028279156213922, "learning_rate": 0.0009863299003065073, "loss": 0.7915, "step": 6585 }, { "epoch": 0.3345559772055184, "grad_norm": 0.05108693012395576, "learning_rate": 0.000986278406580436, "loss": 0.8002, "step": 6590 }, { "epoch": 0.3348098133035499, "grad_norm": 0.04423110176239263, "learning_rate": 0.0009862268174003386, "loss": 0.7662, "step": 6595 }, { "epoch": 0.3350636494015814, "grad_norm": 0.0445523874016183, "learning_rate": 0.0009861751327763415, "loss": 0.788, "step": 6600 }, { "epoch": 0.3353174854996129, "grad_norm": 0.047717256722552774, "learning_rate": 0.0009861233527185907, "loss": 0.7772, "step": 6605 }, { "epoch": 0.3355713215976444, "grad_norm": 0.05131971177489671, "learning_rate": 0.00098607147723725, "loss": 0.7797, "step": 6610 }, { "epoch": 0.3358251576956759, "grad_norm": 0.045805912949863234, "learning_rate": 0.000986019506342502, "loss": 0.7511, "step": 6615 }, { "epoch": 0.3360789937937074, "grad_norm": 0.051270020008090625, "learning_rate": 0.0009859674400445491, "loss": 0.7524, "step": 6620 }, { "epoch": 0.3363328298917389, "grad_norm": 0.042304622051709484, "learning_rate": 0.0009859152783536112, "loss": 0.7737, "step": 6625 }, { "epoch": 0.3365866659897704, "grad_norm": 0.04507200201540175, "learning_rate": 0.0009858630212799273, "loss": 0.7237, "step": 6630 }, { "epoch": 0.3368405020878019, "grad_norm": 0.07083719564953414, "learning_rate": 0.0009858106688337552, "loss": 0.7548, "step": 6635 }, { "epoch": 0.3370943381858334, "grad_norm": 0.060463675939449193, "learning_rate": 0.0009857582210253718, "loss": 0.7867, "step": 6640 }, { "epoch": 0.33734817428386493, "grad_norm": 0.32812866794804274, "learning_rate": 0.000985705677865072, "loss": 0.7735, "step": 6645 }, { "epoch": 0.3376020103818964, "grad_norm": 0.06220889860909154, "learning_rate": 0.0009856530393631698, "loss": 0.7543, "step": 6650 }, { "epoch": 0.3378558464799279, "grad_norm": 0.05077057970687043, "learning_rate": 0.0009856003055299979, "loss": 0.8098, "step": 6655 }, { "epoch": 0.3381096825779594, "grad_norm": 0.052997309729697926, "learning_rate": 0.0009855474763759075, "loss": 0.7756, "step": 6660 }, { "epoch": 0.3383635186759909, "grad_norm": 0.04279424495252166, "learning_rate": 0.0009854945519112692, "loss": 0.7811, "step": 6665 }, { "epoch": 0.33861735477402244, "grad_norm": 0.27791658657234963, "learning_rate": 0.0009854415321464715, "loss": 0.7716, "step": 6670 }, { "epoch": 0.3388711908720539, "grad_norm": 0.10664527038003578, "learning_rate": 0.0009853884170919218, "loss": 0.7963, "step": 6675 }, { "epoch": 0.3391250269700854, "grad_norm": 0.06592440227848509, "learning_rate": 0.0009853352067580466, "loss": 0.8088, "step": 6680 }, { "epoch": 0.33937886306811693, "grad_norm": 0.0561906340714217, "learning_rate": 0.0009852819011552908, "loss": 0.7793, "step": 6685 }, { "epoch": 0.3396326991661484, "grad_norm": 0.054076904924019994, "learning_rate": 0.0009852285002941174, "loss": 0.8048, "step": 6690 }, { "epoch": 0.33988653526417995, "grad_norm": 0.04896324605839458, "learning_rate": 0.0009851750041850098, "loss": 0.7831, "step": 6695 }, { "epoch": 0.3401403713622114, "grad_norm": 0.2855360142136468, "learning_rate": 0.000985121412838468, "loss": 0.7625, "step": 6700 }, { "epoch": 0.3403942074602429, "grad_norm": 0.05749568202233009, "learning_rate": 0.0009850677262650124, "loss": 0.8235, "step": 6705 }, { "epoch": 0.34064804355827444, "grad_norm": 0.057572237934678215, "learning_rate": 0.000985013944475181, "loss": 0.7676, "step": 6710 }, { "epoch": 0.3409018796563059, "grad_norm": 0.04681055632705486, "learning_rate": 0.0009849600674795313, "loss": 0.8071, "step": 6715 }, { "epoch": 0.34115571575433745, "grad_norm": 0.046062386229406976, "learning_rate": 0.0009849060952886385, "loss": 0.7807, "step": 6720 }, { "epoch": 0.34140955185236893, "grad_norm": 0.04469066840132692, "learning_rate": 0.0009848520279130979, "loss": 0.7834, "step": 6725 }, { "epoch": 0.3416633879504004, "grad_norm": 0.037832556437690776, "learning_rate": 0.0009847978653635219, "loss": 0.7397, "step": 6730 }, { "epoch": 0.34191722404843194, "grad_norm": 0.04654991163672132, "learning_rate": 0.0009847436076505425, "loss": 0.8013, "step": 6735 }, { "epoch": 0.3421710601464634, "grad_norm": 0.04125523814838273, "learning_rate": 0.0009846892547848106, "loss": 0.7736, "step": 6740 }, { "epoch": 0.3424248962444949, "grad_norm": 0.08460799502443635, "learning_rate": 0.000984634806776995, "loss": 0.7385, "step": 6745 }, { "epoch": 0.34267873234252644, "grad_norm": 0.062464068684271706, "learning_rate": 0.0009845802636377834, "loss": 0.7594, "step": 6750 }, { "epoch": 0.3429325684405579, "grad_norm": 0.04972136770823271, "learning_rate": 0.000984525625377883, "loss": 0.8081, "step": 6755 }, { "epoch": 0.34318640453858945, "grad_norm": 0.04300633837256774, "learning_rate": 0.0009844708920080185, "loss": 0.8195, "step": 6760 }, { "epoch": 0.34344024063662093, "grad_norm": 0.055210166514715116, "learning_rate": 0.000984416063538934, "loss": 0.7774, "step": 6765 }, { "epoch": 0.3436940767346524, "grad_norm": 0.05100676049151723, "learning_rate": 0.0009843611399813921, "loss": 0.8358, "step": 6770 }, { "epoch": 0.34394791283268394, "grad_norm": 0.053634463335757644, "learning_rate": 0.0009843061213461739, "loss": 0.7976, "step": 6775 }, { "epoch": 0.3442017489307154, "grad_norm": 0.05536189976081038, "learning_rate": 0.0009842510076440792, "loss": 0.8306, "step": 6780 }, { "epoch": 0.34445558502874696, "grad_norm": 0.04423851491746859, "learning_rate": 0.0009841957988859268, "loss": 0.7727, "step": 6785 }, { "epoch": 0.34470942112677844, "grad_norm": 0.05314690043284146, "learning_rate": 0.0009841404950825536, "loss": 0.7895, "step": 6790 }, { "epoch": 0.3449632572248099, "grad_norm": 0.05253475482321265, "learning_rate": 0.0009840850962448157, "loss": 0.8552, "step": 6795 }, { "epoch": 0.34521709332284145, "grad_norm": 0.048332915790885694, "learning_rate": 0.0009840296023835877, "loss": 0.8419, "step": 6800 }, { "epoch": 0.34547092942087293, "grad_norm": 0.05608824236449981, "learning_rate": 0.0009839740135097624, "loss": 0.84, "step": 6805 }, { "epoch": 0.34572476551890446, "grad_norm": 0.05157331213965074, "learning_rate": 0.0009839183296342518, "loss": 0.7701, "step": 6810 }, { "epoch": 0.34597860161693594, "grad_norm": 0.050231023115094746, "learning_rate": 0.0009838625507679866, "loss": 0.8203, "step": 6815 }, { "epoch": 0.3462324377149674, "grad_norm": 0.07425533210990408, "learning_rate": 0.0009838066769219155, "loss": 0.8298, "step": 6820 }, { "epoch": 0.34648627381299896, "grad_norm": 0.10474027543109932, "learning_rate": 0.0009837507081070064, "loss": 0.812, "step": 6825 }, { "epoch": 0.34674010991103044, "grad_norm": 0.05068486849642127, "learning_rate": 0.000983694644334246, "loss": 0.795, "step": 6830 }, { "epoch": 0.34699394600906197, "grad_norm": 0.05989363934643183, "learning_rate": 0.000983638485614639, "loss": 0.7627, "step": 6835 }, { "epoch": 0.34724778210709345, "grad_norm": 0.04379313407496557, "learning_rate": 0.0009835822319592092, "loss": 0.8429, "step": 6840 }, { "epoch": 0.34750161820512493, "grad_norm": 2.815870101921845, "learning_rate": 0.0009835258833789987, "loss": 0.862, "step": 6845 }, { "epoch": 0.34775545430315646, "grad_norm": 0.0661431296225742, "learning_rate": 0.0009834694398850687, "loss": 0.7875, "step": 6850 }, { "epoch": 0.34800929040118794, "grad_norm": 0.06009619298856927, "learning_rate": 0.000983412901488499, "loss": 0.7944, "step": 6855 }, { "epoch": 0.3482631264992195, "grad_norm": 0.060685708086925534, "learning_rate": 0.0009833562682003871, "loss": 0.7814, "step": 6860 }, { "epoch": 0.34851696259725096, "grad_norm": 0.06487721169074993, "learning_rate": 0.0009832995400318506, "loss": 0.8141, "step": 6865 }, { "epoch": 0.34877079869528244, "grad_norm": 0.05403694661082728, "learning_rate": 0.0009832427169940243, "loss": 0.8106, "step": 6870 }, { "epoch": 0.34902463479331397, "grad_norm": 0.04255107543151733, "learning_rate": 0.0009831857990980628, "loss": 0.7664, "step": 6875 }, { "epoch": 0.34927847089134545, "grad_norm": 0.04362853121192566, "learning_rate": 0.0009831287863551386, "loss": 0.7813, "step": 6880 }, { "epoch": 0.349532306989377, "grad_norm": 0.04795801599710568, "learning_rate": 0.000983071678776443, "loss": 0.7464, "step": 6885 }, { "epoch": 0.34978614308740846, "grad_norm": 0.0589019475192563, "learning_rate": 0.0009830144763731856, "loss": 0.7662, "step": 6890 }, { "epoch": 0.35003997918543994, "grad_norm": 0.04302037204284794, "learning_rate": 0.0009829571791565956, "loss": 0.7698, "step": 6895 }, { "epoch": 0.3502938152834715, "grad_norm": 0.0513083563868518, "learning_rate": 0.0009828997871379197, "loss": 0.8019, "step": 6900 }, { "epoch": 0.35054765138150296, "grad_norm": 0.050010613355116654, "learning_rate": 0.0009828423003284239, "loss": 0.8109, "step": 6905 }, { "epoch": 0.3508014874795345, "grad_norm": 0.04360242063874711, "learning_rate": 0.0009827847187393924, "loss": 0.778, "step": 6910 }, { "epoch": 0.35105532357756597, "grad_norm": 0.12008808033232239, "learning_rate": 0.0009827270423821283, "loss": 0.8101, "step": 6915 }, { "epoch": 0.35130915967559745, "grad_norm": 0.045108267584957534, "learning_rate": 0.000982669271267953, "loss": 0.7501, "step": 6920 }, { "epoch": 0.351562995773629, "grad_norm": 0.04050165150546747, "learning_rate": 0.000982611405408207, "loss": 0.7658, "step": 6925 }, { "epoch": 0.35181683187166046, "grad_norm": 0.04225906288646799, "learning_rate": 0.0009825534448142487, "loss": 0.7891, "step": 6930 }, { "epoch": 0.352070667969692, "grad_norm": 0.04113099821074536, "learning_rate": 0.0009824953894974559, "loss": 0.7758, "step": 6935 }, { "epoch": 0.3523245040677235, "grad_norm": 0.0452371355133709, "learning_rate": 0.0009824372394692242, "loss": 0.7673, "step": 6940 }, { "epoch": 0.35257834016575496, "grad_norm": 0.04169425086905373, "learning_rate": 0.0009823789947409685, "loss": 0.7692, "step": 6945 }, { "epoch": 0.3528321762637865, "grad_norm": 0.046228146235384515, "learning_rate": 0.0009823206553241214, "loss": 0.811, "step": 6950 }, { "epoch": 0.35308601236181797, "grad_norm": 0.045370896405060965, "learning_rate": 0.0009822622212301354, "loss": 0.8139, "step": 6955 }, { "epoch": 0.35333984845984945, "grad_norm": 0.041839424401182865, "learning_rate": 0.0009822036924704803, "loss": 0.7811, "step": 6960 }, { "epoch": 0.353593684557881, "grad_norm": 0.050549112907227876, "learning_rate": 0.000982145069056645, "loss": 0.7955, "step": 6965 }, { "epoch": 0.35384752065591246, "grad_norm": 0.20610022674094772, "learning_rate": 0.000982086351000137, "loss": 0.7917, "step": 6970 }, { "epoch": 0.354101356753944, "grad_norm": 0.055409326404491664, "learning_rate": 0.0009820275383124826, "loss": 0.8084, "step": 6975 }, { "epoch": 0.3543551928519755, "grad_norm": 0.04156889539273328, "learning_rate": 0.0009819686310052263, "loss": 0.7809, "step": 6980 }, { "epoch": 0.35460902895000695, "grad_norm": 0.04222338720876728, "learning_rate": 0.0009819096290899312, "loss": 0.8157, "step": 6985 }, { "epoch": 0.3548628650480385, "grad_norm": 0.044446668613218124, "learning_rate": 0.0009818505325781793, "loss": 0.8063, "step": 6990 }, { "epoch": 0.35511670114606997, "grad_norm": 0.04595047954945954, "learning_rate": 0.000981791341481571, "loss": 0.8011, "step": 6995 }, { "epoch": 0.3553705372441015, "grad_norm": 0.04586645436317533, "learning_rate": 0.0009817320558117247, "loss": 0.7904, "step": 7000 }, { "epoch": 0.355624373342133, "grad_norm": 0.04169275021933223, "learning_rate": 0.0009816726755802784, "loss": 0.78, "step": 7005 }, { "epoch": 0.35587820944016446, "grad_norm": 0.31669945295533064, "learning_rate": 0.000981613200798888, "loss": 0.8216, "step": 7010 }, { "epoch": 0.356132045538196, "grad_norm": 0.05559711232428416, "learning_rate": 0.000981553631479228, "loss": 0.7641, "step": 7015 }, { "epoch": 0.3563858816362275, "grad_norm": 0.049170649916942075, "learning_rate": 0.0009814939676329917, "loss": 0.774, "step": 7020 }, { "epoch": 0.356639717734259, "grad_norm": 0.040763600267064465, "learning_rate": 0.0009814342092718908, "loss": 0.7887, "step": 7025 }, { "epoch": 0.3568935538322905, "grad_norm": 0.04424212604246813, "learning_rate": 0.0009813743564076557, "loss": 0.772, "step": 7030 }, { "epoch": 0.35714738993032197, "grad_norm": 0.03819958629047328, "learning_rate": 0.0009813144090520347, "loss": 0.7767, "step": 7035 }, { "epoch": 0.3574012260283535, "grad_norm": 0.03970112381363363, "learning_rate": 0.0009812543672167958, "loss": 0.7571, "step": 7040 }, { "epoch": 0.357655062126385, "grad_norm": 0.04327348065092311, "learning_rate": 0.0009811942309137242, "loss": 0.8077, "step": 7045 }, { "epoch": 0.3579088982244165, "grad_norm": 0.04112417412719965, "learning_rate": 0.0009811340001546253, "loss": 0.7842, "step": 7050 }, { "epoch": 0.358162734322448, "grad_norm": 0.04036592209910176, "learning_rate": 0.0009810736749513212, "loss": 0.7721, "step": 7055 }, { "epoch": 0.3584165704204795, "grad_norm": 0.041269469792781226, "learning_rate": 0.000981013255315654, "loss": 0.7984, "step": 7060 }, { "epoch": 0.358670406518511, "grad_norm": 0.045877175452812974, "learning_rate": 0.0009809527412594837, "loss": 0.7797, "step": 7065 }, { "epoch": 0.3589242426165425, "grad_norm": 0.039355481277176425, "learning_rate": 0.0009808921327946886, "loss": 0.7805, "step": 7070 }, { "epoch": 0.359178078714574, "grad_norm": 0.04241605702330681, "learning_rate": 0.000980831429933166, "loss": 0.799, "step": 7075 }, { "epoch": 0.3594319148126055, "grad_norm": 0.04744441371947993, "learning_rate": 0.0009807706326868317, "loss": 0.7866, "step": 7080 }, { "epoch": 0.359685750910637, "grad_norm": 0.04446943558269775, "learning_rate": 0.00098070974106762, "loss": 0.7745, "step": 7085 }, { "epoch": 0.3599395870086685, "grad_norm": 0.04024658381114387, "learning_rate": 0.0009806487550874832, "loss": 0.7666, "step": 7090 }, { "epoch": 0.3601934231067, "grad_norm": 0.04315952916716541, "learning_rate": 0.0009805876747583928, "loss": 0.7616, "step": 7095 }, { "epoch": 0.36044725920473153, "grad_norm": 0.04183380516534586, "learning_rate": 0.0009805265000923384, "loss": 0.83, "step": 7100 }, { "epoch": 0.360701095302763, "grad_norm": 0.03987053998860418, "learning_rate": 0.0009804652311013286, "loss": 0.745, "step": 7105 }, { "epoch": 0.3609549314007945, "grad_norm": 0.04248721086579615, "learning_rate": 0.00098040386779739, "loss": 0.8026, "step": 7110 }, { "epoch": 0.361208767498826, "grad_norm": 0.043610132473126194, "learning_rate": 0.0009803424101925678, "loss": 0.7531, "step": 7115 }, { "epoch": 0.3614626035968575, "grad_norm": 0.048609219910159686, "learning_rate": 0.000980280858298926, "loss": 0.7923, "step": 7120 }, { "epoch": 0.36171643969488904, "grad_norm": 0.05188054004597974, "learning_rate": 0.000980219212128547, "loss": 0.747, "step": 7125 }, { "epoch": 0.3619702757929205, "grad_norm": 0.44493177407308904, "learning_rate": 0.0009801574716935314, "loss": 0.7514, "step": 7130 }, { "epoch": 0.362224111890952, "grad_norm": 0.06707843748136667, "learning_rate": 0.0009800956370059986, "loss": 0.823, "step": 7135 }, { "epoch": 0.36247794798898353, "grad_norm": 0.04507603750134996, "learning_rate": 0.0009800337080780866, "loss": 0.7457, "step": 7140 }, { "epoch": 0.362731784087015, "grad_norm": 0.041832123043995466, "learning_rate": 0.0009799716849219515, "loss": 0.8229, "step": 7145 }, { "epoch": 0.36298562018504654, "grad_norm": 0.04282995874825187, "learning_rate": 0.0009799095675497684, "loss": 0.7823, "step": 7150 }, { "epoch": 0.363239456283078, "grad_norm": 0.04441842730134388, "learning_rate": 0.0009798473559737304, "loss": 0.75, "step": 7155 }, { "epoch": 0.3634932923811095, "grad_norm": 0.046194914120667015, "learning_rate": 0.0009797850502060495, "loss": 0.749, "step": 7160 }, { "epoch": 0.36374712847914104, "grad_norm": 0.037551897664154815, "learning_rate": 0.0009797226502589558, "loss": 0.7476, "step": 7165 }, { "epoch": 0.3640009645771725, "grad_norm": 0.0406530891958264, "learning_rate": 0.0009796601561446983, "loss": 0.7263, "step": 7170 }, { "epoch": 0.364254800675204, "grad_norm": 0.041183955815873266, "learning_rate": 0.0009795975678755441, "loss": 0.7395, "step": 7175 }, { "epoch": 0.36450863677323553, "grad_norm": 0.040751227788432196, "learning_rate": 0.0009795348854637793, "loss": 0.7604, "step": 7180 }, { "epoch": 0.364762472871267, "grad_norm": 0.0408805195945698, "learning_rate": 0.0009794721089217077, "loss": 0.7957, "step": 7185 }, { "epoch": 0.36501630896929854, "grad_norm": 0.04132344205876019, "learning_rate": 0.0009794092382616525, "loss": 0.7415, "step": 7190 }, { "epoch": 0.36527014506733, "grad_norm": 0.04985049327404315, "learning_rate": 0.0009793462734959545, "loss": 0.7823, "step": 7195 }, { "epoch": 0.3655239811653615, "grad_norm": 0.05025180192948144, "learning_rate": 0.0009792832146369734, "loss": 0.7562, "step": 7200 }, { "epoch": 0.36577781726339303, "grad_norm": 0.05370681523352302, "learning_rate": 0.0009792200616970876, "loss": 0.8186, "step": 7205 }, { "epoch": 0.3660316533614245, "grad_norm": 0.05055852296528146, "learning_rate": 0.0009791568146886936, "loss": 0.7916, "step": 7210 }, { "epoch": 0.36628548945945605, "grad_norm": 0.04151642502810757, "learning_rate": 0.0009790934736242064, "loss": 0.7549, "step": 7215 }, { "epoch": 0.3665393255574875, "grad_norm": 0.04125972749920668, "learning_rate": 0.0009790300385160594, "loss": 0.7772, "step": 7220 }, { "epoch": 0.366793161655519, "grad_norm": 0.04030243732261057, "learning_rate": 0.0009789665093767048, "loss": 0.7714, "step": 7225 }, { "epoch": 0.36704699775355054, "grad_norm": 0.039851211786167934, "learning_rate": 0.000978902886218613, "loss": 0.7921, "step": 7230 }, { "epoch": 0.367300833851582, "grad_norm": 0.044599928593395406, "learning_rate": 0.000978839169054273, "loss": 0.7844, "step": 7235 }, { "epoch": 0.36755466994961355, "grad_norm": 0.03903606299412914, "learning_rate": 0.0009787753578961922, "loss": 0.7156, "step": 7240 }, { "epoch": 0.36780850604764503, "grad_norm": 0.036310185912185766, "learning_rate": 0.0009787114527568962, "loss": 0.7277, "step": 7245 }, { "epoch": 0.3680623421456765, "grad_norm": 0.04238132916105585, "learning_rate": 0.0009786474536489292, "loss": 0.784, "step": 7250 }, { "epoch": 0.36831617824370805, "grad_norm": 0.05983801066758554, "learning_rate": 0.0009785833605848542, "loss": 0.7576, "step": 7255 }, { "epoch": 0.3685700143417395, "grad_norm": 0.05096767271715903, "learning_rate": 0.0009785191735772521, "loss": 0.8239, "step": 7260 }, { "epoch": 0.36882385043977106, "grad_norm": 0.04598774620750539, "learning_rate": 0.0009784548926387226, "loss": 0.7511, "step": 7265 }, { "epoch": 0.36907768653780254, "grad_norm": 0.04208392963392749, "learning_rate": 0.000978390517781884, "loss": 0.7666, "step": 7270 }, { "epoch": 0.369331522635834, "grad_norm": 0.04342215859150892, "learning_rate": 0.0009783260490193722, "loss": 0.7857, "step": 7275 }, { "epoch": 0.36958535873386555, "grad_norm": 0.0383828375481389, "learning_rate": 0.0009782614863638424, "loss": 0.7803, "step": 7280 }, { "epoch": 0.36983919483189703, "grad_norm": 0.03752304479515023, "learning_rate": 0.000978196829827968, "loss": 0.7287, "step": 7285 }, { "epoch": 0.37009303092992857, "grad_norm": 0.11710198834808906, "learning_rate": 0.0009781320794244408, "loss": 0.8249, "step": 7290 }, { "epoch": 0.37034686702796005, "grad_norm": 0.03986256937267469, "learning_rate": 0.0009780672351659707, "loss": 0.7184, "step": 7295 }, { "epoch": 0.3706007031259915, "grad_norm": 0.042136525954007104, "learning_rate": 0.0009780022970652864, "loss": 0.8082, "step": 7300 }, { "epoch": 0.37085453922402306, "grad_norm": 0.05890845957516485, "learning_rate": 0.000977937265135135, "loss": 0.7596, "step": 7305 }, { "epoch": 0.37110837532205454, "grad_norm": 0.04564523904830701, "learning_rate": 0.000977872139388282, "loss": 0.7828, "step": 7310 }, { "epoch": 0.3713622114200861, "grad_norm": 0.04777615212806934, "learning_rate": 0.0009778069198375112, "loss": 0.7572, "step": 7315 }, { "epoch": 0.37161604751811755, "grad_norm": 0.043333836455838916, "learning_rate": 0.0009777416064956248, "loss": 0.7802, "step": 7320 }, { "epoch": 0.37186988361614903, "grad_norm": 0.04510435040021453, "learning_rate": 0.0009776761993754435, "loss": 0.7808, "step": 7325 }, { "epoch": 0.37212371971418057, "grad_norm": 0.03607581417406834, "learning_rate": 0.0009776106984898066, "loss": 0.7553, "step": 7330 }, { "epoch": 0.37237755581221205, "grad_norm": 0.11574938639721821, "learning_rate": 0.0009775451038515712, "loss": 0.6929, "step": 7335 }, { "epoch": 0.3726313919102436, "grad_norm": 0.043046659851093245, "learning_rate": 0.0009774794154736135, "loss": 0.7692, "step": 7340 }, { "epoch": 0.37288522800827506, "grad_norm": 0.05679831373563431, "learning_rate": 0.0009774136333688278, "loss": 0.7628, "step": 7345 }, { "epoch": 0.37313906410630654, "grad_norm": 0.03875061673421009, "learning_rate": 0.0009773477575501265, "loss": 0.7457, "step": 7350 }, { "epoch": 0.3733929002043381, "grad_norm": 0.04224963436052486, "learning_rate": 0.0009772817880304412, "loss": 0.7601, "step": 7355 }, { "epoch": 0.37364673630236955, "grad_norm": 0.04190274534427654, "learning_rate": 0.0009772157248227212, "loss": 0.7671, "step": 7360 }, { "epoch": 0.3739005724004011, "grad_norm": 0.03981373455470988, "learning_rate": 0.000977149567939934, "loss": 0.7431, "step": 7365 }, { "epoch": 0.37415440849843257, "grad_norm": 0.040417837293279794, "learning_rate": 0.0009770833173950663, "loss": 0.7726, "step": 7370 }, { "epoch": 0.37440824459646405, "grad_norm": 0.0385611992250893, "learning_rate": 0.0009770169732011224, "loss": 0.766, "step": 7375 }, { "epoch": 0.3746620806944956, "grad_norm": 0.04091713852740643, "learning_rate": 0.000976950535371126, "loss": 0.7498, "step": 7380 }, { "epoch": 0.37491591679252706, "grad_norm": 0.04819179154305004, "learning_rate": 0.0009768840039181177, "loss": 0.7835, "step": 7385 }, { "epoch": 0.37516975289055854, "grad_norm": 0.04096304645089634, "learning_rate": 0.0009768173788551576, "loss": 0.7252, "step": 7390 }, { "epoch": 0.3754235889885901, "grad_norm": 0.04268968108225052, "learning_rate": 0.000976750660195324, "loss": 0.7539, "step": 7395 }, { "epoch": 0.37567742508662155, "grad_norm": 0.035198868931028075, "learning_rate": 0.0009766838479517133, "loss": 0.7458, "step": 7400 }, { "epoch": 0.3759312611846531, "grad_norm": 0.04361646845397231, "learning_rate": 0.0009766169421374406, "loss": 0.7908, "step": 7405 }, { "epoch": 0.37618509728268457, "grad_norm": 0.04053973708974407, "learning_rate": 0.000976549942765639, "loss": 0.7717, "step": 7410 }, { "epoch": 0.37643893338071605, "grad_norm": 0.038554304153090975, "learning_rate": 0.0009764828498494602, "loss": 0.7561, "step": 7415 }, { "epoch": 0.3766927694787476, "grad_norm": 0.04579676163256862, "learning_rate": 0.0009764156634020742, "loss": 0.7723, "step": 7420 }, { "epoch": 0.37694660557677906, "grad_norm": 0.041718265592932256, "learning_rate": 0.0009763483834366693, "loss": 0.7493, "step": 7425 }, { "epoch": 0.3772004416748106, "grad_norm": 0.04225398925941858, "learning_rate": 0.0009762810099664523, "loss": 0.779, "step": 7430 }, { "epoch": 0.3774542777728421, "grad_norm": 0.04357842603991317, "learning_rate": 0.0009762135430046483, "loss": 0.7746, "step": 7435 }, { "epoch": 0.37770811387087355, "grad_norm": 0.04470806202969352, "learning_rate": 0.0009761459825645006, "loss": 0.7638, "step": 7440 }, { "epoch": 0.3779619499689051, "grad_norm": 0.06579078512272377, "learning_rate": 0.0009760783286592711, "loss": 0.7474, "step": 7445 }, { "epoch": 0.37821578606693657, "grad_norm": 0.04436016795804297, "learning_rate": 0.0009760105813022399, "loss": 0.7847, "step": 7450 }, { "epoch": 0.3784696221649681, "grad_norm": 0.048799330986428864, "learning_rate": 0.0009759427405067054, "loss": 0.7487, "step": 7455 }, { "epoch": 0.3787234582629996, "grad_norm": 0.043814741442348336, "learning_rate": 0.0009758748062859844, "loss": 0.7327, "step": 7460 }, { "epoch": 0.37897729436103106, "grad_norm": 0.04021176603126496, "learning_rate": 0.0009758067786534123, "loss": 0.7695, "step": 7465 }, { "epoch": 0.3792311304590626, "grad_norm": 0.04310148829770906, "learning_rate": 0.0009757386576223423, "loss": 0.8091, "step": 7470 }, { "epoch": 0.37948496655709407, "grad_norm": 0.03865025720650538, "learning_rate": 0.0009756704432061463, "loss": 0.7666, "step": 7475 }, { "epoch": 0.3797388026551256, "grad_norm": 0.038470852267584364, "learning_rate": 0.0009756021354182145, "loss": 0.7412, "step": 7480 }, { "epoch": 0.3799926387531571, "grad_norm": 0.03855026481068089, "learning_rate": 0.0009755337342719552, "loss": 0.7662, "step": 7485 }, { "epoch": 0.38024647485118857, "grad_norm": 0.039390577754593266, "learning_rate": 0.0009754652397807955, "loss": 0.8265, "step": 7490 }, { "epoch": 0.3805003109492201, "grad_norm": 0.03647128991284946, "learning_rate": 0.0009753966519581803, "loss": 0.7068, "step": 7495 }, { "epoch": 0.3807541470472516, "grad_norm": 0.04112104791200402, "learning_rate": 0.0009753279708175731, "loss": 0.75, "step": 7500 }, { "epoch": 0.3810079831452831, "grad_norm": 0.04343308177019603, "learning_rate": 0.0009752591963724558, "loss": 0.7722, "step": 7505 }, { "epoch": 0.3812618192433146, "grad_norm": 0.0418935053457892, "learning_rate": 0.0009751903286363283, "loss": 0.7665, "step": 7510 }, { "epoch": 0.38151565534134607, "grad_norm": 0.10071949065052709, "learning_rate": 0.0009751213676227091, "loss": 0.7317, "step": 7515 }, { "epoch": 0.3817694914393776, "grad_norm": 0.04305885493873484, "learning_rate": 0.0009750523133451348, "loss": 0.7863, "step": 7520 }, { "epoch": 0.3820233275374091, "grad_norm": 0.0445468822745699, "learning_rate": 0.0009749831658171605, "loss": 0.7478, "step": 7525 }, { "epoch": 0.3822771636354406, "grad_norm": 0.04549670403518009, "learning_rate": 0.0009749139250523596, "loss": 0.7566, "step": 7530 }, { "epoch": 0.3825309997334721, "grad_norm": 0.04285537600064781, "learning_rate": 0.0009748445910643233, "loss": 0.7803, "step": 7535 }, { "epoch": 0.3827848358315036, "grad_norm": 0.04002724779194633, "learning_rate": 0.000974775163866662, "loss": 0.7688, "step": 7540 }, { "epoch": 0.3830386719295351, "grad_norm": 0.041226281558039785, "learning_rate": 0.0009747056434730037, "loss": 0.8364, "step": 7545 }, { "epoch": 0.3832925080275666, "grad_norm": 0.03669068368646744, "learning_rate": 0.0009746360298969951, "loss": 0.7622, "step": 7550 }, { "epoch": 0.3835463441255981, "grad_norm": 0.037162412634595826, "learning_rate": 0.0009745663231523008, "loss": 0.7455, "step": 7555 }, { "epoch": 0.3838001802236296, "grad_norm": 0.03522716953669594, "learning_rate": 0.0009744965232526037, "loss": 0.7631, "step": 7560 }, { "epoch": 0.3840540163216611, "grad_norm": 0.03834052324219249, "learning_rate": 0.0009744266302116056, "loss": 0.755, "step": 7565 }, { "epoch": 0.3843078524196926, "grad_norm": 0.044656976372995504, "learning_rate": 0.0009743566440430258, "loss": 0.7379, "step": 7570 }, { "epoch": 0.3845616885177241, "grad_norm": 0.04676252989001767, "learning_rate": 0.0009742865647606025, "loss": 0.7524, "step": 7575 }, { "epoch": 0.38481552461575563, "grad_norm": 0.04748915016650444, "learning_rate": 0.0009742163923780918, "loss": 0.7887, "step": 7580 }, { "epoch": 0.3850693607137871, "grad_norm": 0.043752757737883625, "learning_rate": 0.0009741461269092682, "loss": 0.7343, "step": 7585 }, { "epoch": 0.3853231968118186, "grad_norm": 0.042889849010097694, "learning_rate": 0.0009740757683679244, "loss": 0.7656, "step": 7590 }, { "epoch": 0.3855770329098501, "grad_norm": 0.04676868845427278, "learning_rate": 0.0009740053167678715, "loss": 0.7651, "step": 7595 }, { "epoch": 0.3858308690078816, "grad_norm": 0.04134064215996429, "learning_rate": 0.0009739347721229388, "loss": 0.8013, "step": 7600 }, { "epoch": 0.38608470510591314, "grad_norm": 0.04359199748438984, "learning_rate": 0.0009738641344469737, "loss": 0.7517, "step": 7605 }, { "epoch": 0.3863385412039446, "grad_norm": 0.041283000399977704, "learning_rate": 0.0009737934037538422, "loss": 0.7473, "step": 7610 }, { "epoch": 0.3865923773019761, "grad_norm": 0.042937356898500784, "learning_rate": 0.0009737225800574285, "loss": 0.7055, "step": 7615 }, { "epoch": 0.38684621340000763, "grad_norm": 0.04199755102728135, "learning_rate": 0.0009736516633716348, "loss": 0.7504, "step": 7620 }, { "epoch": 0.3871000494980391, "grad_norm": 0.040981834250087626, "learning_rate": 0.0009735806537103815, "loss": 0.7566, "step": 7625 }, { "epoch": 0.3873538855960706, "grad_norm": 0.038372467059910625, "learning_rate": 0.0009735095510876077, "loss": 0.7337, "step": 7630 }, { "epoch": 0.3876077216941021, "grad_norm": 0.03897587143495776, "learning_rate": 0.0009734383555172705, "loss": 0.7558, "step": 7635 }, { "epoch": 0.3878615577921336, "grad_norm": 0.04238600094326803, "learning_rate": 0.000973367067013345, "loss": 0.7364, "step": 7640 }, { "epoch": 0.38811539389016514, "grad_norm": 0.05129125518326557, "learning_rate": 0.000973295685589825, "loss": 0.7432, "step": 7645 }, { "epoch": 0.3883692299881966, "grad_norm": 0.045909820028298055, "learning_rate": 0.0009732242112607222, "loss": 0.7423, "step": 7650 }, { "epoch": 0.3886230660862281, "grad_norm": 0.05107458186847101, "learning_rate": 0.0009731526440400667, "loss": 0.7897, "step": 7655 }, { "epoch": 0.38887690218425963, "grad_norm": 0.0454950986705064, "learning_rate": 0.0009730809839419069, "loss": 0.8004, "step": 7660 }, { "epoch": 0.3891307382822911, "grad_norm": 0.04206275971494423, "learning_rate": 0.0009730092309803091, "loss": 0.7942, "step": 7665 }, { "epoch": 0.38938457438032265, "grad_norm": 0.036809635850043064, "learning_rate": 0.0009729373851693581, "loss": 0.7332, "step": 7670 }, { "epoch": 0.3896384104783541, "grad_norm": 0.04130076800736282, "learning_rate": 0.000972865446523157, "loss": 0.7474, "step": 7675 }, { "epoch": 0.3898922465763856, "grad_norm": 0.041338811067542065, "learning_rate": 0.000972793415055827, "loss": 0.7926, "step": 7680 }, { "epoch": 0.39014608267441714, "grad_norm": 0.0802132620340734, "learning_rate": 0.0009727212907815072, "loss": 0.7719, "step": 7685 }, { "epoch": 0.3903999187724486, "grad_norm": 0.04930863286397553, "learning_rate": 0.0009726490737143557, "loss": 0.8029, "step": 7690 }, { "epoch": 0.39065375487048015, "grad_norm": 0.04786762332796919, "learning_rate": 0.0009725767638685481, "loss": 0.8271, "step": 7695 }, { "epoch": 0.39090759096851163, "grad_norm": 0.07206500686432994, "learning_rate": 0.0009725043612582785, "loss": 0.7965, "step": 7700 }, { "epoch": 0.3911614270665431, "grad_norm": 0.03934640849835133, "learning_rate": 0.0009724318658977591, "loss": 0.7827, "step": 7705 }, { "epoch": 0.39141526316457464, "grad_norm": 0.04351545933537882, "learning_rate": 0.0009723592778012205, "loss": 0.7125, "step": 7710 }, { "epoch": 0.3916690992626061, "grad_norm": 0.04416678114867601, "learning_rate": 0.0009722865969829111, "loss": 0.7573, "step": 7715 }, { "epoch": 0.39192293536063766, "grad_norm": 0.03850975597315251, "learning_rate": 0.0009722138234570983, "loss": 0.7332, "step": 7720 }, { "epoch": 0.39217677145866914, "grad_norm": 0.03932954082994877, "learning_rate": 0.0009721409572380666, "loss": 0.7747, "step": 7725 }, { "epoch": 0.3924306075567006, "grad_norm": 0.04805335988872738, "learning_rate": 0.0009720679983401197, "loss": 0.7544, "step": 7730 }, { "epoch": 0.39268444365473215, "grad_norm": 0.04485568315812846, "learning_rate": 0.0009719949467775791, "loss": 0.7577, "step": 7735 }, { "epoch": 0.39293827975276363, "grad_norm": 0.04151403744555209, "learning_rate": 0.000971921802564784, "loss": 0.8063, "step": 7740 }, { "epoch": 0.39319211585079517, "grad_norm": 0.04063772236006216, "learning_rate": 0.0009718485657160927, "loss": 0.755, "step": 7745 }, { "epoch": 0.39344595194882664, "grad_norm": 0.038613899606246956, "learning_rate": 0.000971775236245881, "loss": 0.7478, "step": 7750 }, { "epoch": 0.3936997880468581, "grad_norm": 0.0376175412874609, "learning_rate": 0.0009717018141685432, "loss": 0.7745, "step": 7755 }, { "epoch": 0.39395362414488966, "grad_norm": 0.03675386991144821, "learning_rate": 0.0009716282994984915, "loss": 0.7553, "step": 7760 }, { "epoch": 0.39420746024292114, "grad_norm": 0.045640693974928453, "learning_rate": 0.0009715546922501568, "loss": 0.7163, "step": 7765 }, { "epoch": 0.39446129634095267, "grad_norm": 0.042231543793124396, "learning_rate": 0.0009714809924379875, "loss": 0.7576, "step": 7770 }, { "epoch": 0.39471513243898415, "grad_norm": 0.04418188612115558, "learning_rate": 0.0009714072000764508, "loss": 0.7842, "step": 7775 }, { "epoch": 0.39496896853701563, "grad_norm": 0.04226174112938385, "learning_rate": 0.0009713333151800315, "loss": 0.7509, "step": 7780 }, { "epoch": 0.39522280463504716, "grad_norm": 0.03951027517058403, "learning_rate": 0.0009712593377632331, "loss": 0.7598, "step": 7785 }, { "epoch": 0.39547664073307864, "grad_norm": 0.043490352077009624, "learning_rate": 0.0009711852678405768, "loss": 0.7772, "step": 7790 }, { "epoch": 0.3957304768311102, "grad_norm": 0.04038312000316044, "learning_rate": 0.0009711111054266022, "loss": 0.7908, "step": 7795 }, { "epoch": 0.39598431292914166, "grad_norm": 0.051934023194513255, "learning_rate": 0.000971036850535867, "loss": 0.7638, "step": 7800 }, { "epoch": 0.39623814902717314, "grad_norm": 0.048846639027517666, "learning_rate": 0.0009709625031829473, "loss": 0.7919, "step": 7805 }, { "epoch": 0.39649198512520467, "grad_norm": 0.05598968828019127, "learning_rate": 0.0009708880633824366, "loss": 0.7694, "step": 7810 }, { "epoch": 0.39674582122323615, "grad_norm": 0.04153306924511199, "learning_rate": 0.0009708135311489475, "loss": 0.8114, "step": 7815 }, { "epoch": 0.3969996573212677, "grad_norm": 0.042505288758596486, "learning_rate": 0.0009707389064971102, "loss": 0.7968, "step": 7820 }, { "epoch": 0.39725349341929916, "grad_norm": 0.043569174506564806, "learning_rate": 0.0009706641894415731, "loss": 0.812, "step": 7825 }, { "epoch": 0.39750732951733064, "grad_norm": 0.039011030940328054, "learning_rate": 0.0009705893799970029, "loss": 0.7252, "step": 7830 }, { "epoch": 0.3977611656153622, "grad_norm": 0.0416781953604398, "learning_rate": 0.0009705144781780842, "loss": 0.8218, "step": 7835 }, { "epoch": 0.39801500171339366, "grad_norm": 0.050828706294615604, "learning_rate": 0.0009704394839995198, "loss": 0.8051, "step": 7840 }, { "epoch": 0.39826883781142514, "grad_norm": 0.04061993398399232, "learning_rate": 0.0009703643974760307, "loss": 0.7846, "step": 7845 }, { "epoch": 0.39852267390945667, "grad_norm": 0.04243237309857493, "learning_rate": 0.0009702892186223564, "loss": 0.676, "step": 7850 }, { "epoch": 0.39877651000748815, "grad_norm": 0.036678979256146535, "learning_rate": 0.0009702139474532536, "loss": 0.7652, "step": 7855 }, { "epoch": 0.3990303461055197, "grad_norm": 0.03765363485837083, "learning_rate": 0.0009701385839834979, "loss": 0.7798, "step": 7860 }, { "epoch": 0.39928418220355116, "grad_norm": 0.044248620996851167, "learning_rate": 0.0009700631282278827, "loss": 0.7867, "step": 7865 }, { "epoch": 0.39953801830158264, "grad_norm": 0.040857221789648165, "learning_rate": 0.0009699875802012197, "loss": 0.7684, "step": 7870 }, { "epoch": 0.3997918543996142, "grad_norm": 0.039937846319251784, "learning_rate": 0.0009699119399183385, "loss": 0.772, "step": 7875 }, { "epoch": 0.40004569049764566, "grad_norm": 0.03733198862727119, "learning_rate": 0.0009698362073940869, "loss": 0.7364, "step": 7880 }, { "epoch": 0.4002995265956772, "grad_norm": 0.03767893580997815, "learning_rate": 0.0009697603826433308, "loss": 0.7453, "step": 7885 }, { "epoch": 0.40055336269370867, "grad_norm": 0.041829617418572065, "learning_rate": 0.0009696844656809545, "loss": 0.7812, "step": 7890 }, { "epoch": 0.40080719879174015, "grad_norm": 3.0641670139442803, "learning_rate": 0.0009696084565218597, "loss": 0.7729, "step": 7895 }, { "epoch": 0.4010610348897717, "grad_norm": 0.05078739124539211, "learning_rate": 0.0009695323551809669, "loss": 0.7413, "step": 7900 }, { "epoch": 0.40131487098780316, "grad_norm": 0.6102236007117402, "learning_rate": 0.0009694561616732143, "loss": 0.7851, "step": 7905 }, { "epoch": 0.4015687070858347, "grad_norm": 0.06414914921423537, "learning_rate": 0.0009693798760135584, "loss": 0.7365, "step": 7910 }, { "epoch": 0.4018225431838662, "grad_norm": 0.042222512641194775, "learning_rate": 0.0009693034982169735, "loss": 0.7723, "step": 7915 }, { "epoch": 0.40207637928189766, "grad_norm": 0.04490654973660664, "learning_rate": 0.0009692270282984525, "loss": 0.7907, "step": 7920 }, { "epoch": 0.4023302153799292, "grad_norm": 0.04925158744002114, "learning_rate": 0.0009691504662730058, "loss": 0.7619, "step": 7925 }, { "epoch": 0.40258405147796067, "grad_norm": 0.05092496190042868, "learning_rate": 0.0009690738121556621, "loss": 0.8164, "step": 7930 }, { "epoch": 0.4028378875759922, "grad_norm": 0.06562088213196457, "learning_rate": 0.0009689970659614684, "loss": 0.7925, "step": 7935 }, { "epoch": 0.4030917236740237, "grad_norm": 0.1440166563030397, "learning_rate": 0.0009689202277054896, "loss": 0.819, "step": 7940 }, { "epoch": 0.40334555977205516, "grad_norm": 0.04710466537111469, "learning_rate": 0.0009688432974028085, "loss": 0.8441, "step": 7945 }, { "epoch": 0.4035993958700867, "grad_norm": 0.046478997925972855, "learning_rate": 0.0009687662750685265, "loss": 0.7921, "step": 7950 }, { "epoch": 0.4038532319681182, "grad_norm": 0.04108330928096877, "learning_rate": 0.0009686891607177621, "loss": 0.7981, "step": 7955 }, { "epoch": 0.4041070680661497, "grad_norm": 0.04526022480993419, "learning_rate": 0.0009686119543656531, "loss": 0.7449, "step": 7960 }, { "epoch": 0.4043609041641812, "grad_norm": 0.3613587478090101, "learning_rate": 0.0009685346560273542, "loss": 0.791, "step": 7965 }, { "epoch": 0.40461474026221267, "grad_norm": 0.06245788619430221, "learning_rate": 0.000968457265718039, "loss": 0.8368, "step": 7970 }, { "epoch": 0.4048685763602442, "grad_norm": 0.04870420759963501, "learning_rate": 0.0009683797834528987, "loss": 0.784, "step": 7975 }, { "epoch": 0.4051224124582757, "grad_norm": 0.045736399108672686, "learning_rate": 0.0009683022092471427, "loss": 0.8209, "step": 7980 }, { "epoch": 0.4053762485563072, "grad_norm": 0.04525793943034907, "learning_rate": 0.0009682245431159984, "loss": 0.7533, "step": 7985 }, { "epoch": 0.4056300846543387, "grad_norm": 0.04018385279205223, "learning_rate": 0.0009681467850747114, "loss": 0.7753, "step": 7990 }, { "epoch": 0.4058839207523702, "grad_norm": 0.038739068133560914, "learning_rate": 0.0009680689351385453, "loss": 0.7663, "step": 7995 }, { "epoch": 0.4061377568504017, "grad_norm": 0.041823201395728475, "learning_rate": 0.0009679909933227811, "loss": 0.7771, "step": 8000 }, { "epoch": 0.4063915929484332, "grad_norm": 0.1788491007508449, "learning_rate": 0.0009679129596427189, "loss": 0.778, "step": 8005 }, { "epoch": 0.4066454290464647, "grad_norm": 0.06665212046288382, "learning_rate": 0.0009678348341136764, "loss": 0.8376, "step": 8010 }, { "epoch": 0.4068992651444962, "grad_norm": 0.06243196378836535, "learning_rate": 0.000967756616750989, "loss": 0.7826, "step": 8015 }, { "epoch": 0.4071531012425277, "grad_norm": 0.04354414928289361, "learning_rate": 0.0009676783075700103, "loss": 0.734, "step": 8020 }, { "epoch": 0.4074069373405592, "grad_norm": 0.04467789145349268, "learning_rate": 0.0009675999065861121, "loss": 0.79, "step": 8025 }, { "epoch": 0.4076607734385907, "grad_norm": 0.04517742992245351, "learning_rate": 0.0009675214138146844, "loss": 0.7802, "step": 8030 }, { "epoch": 0.40791460953662223, "grad_norm": 0.049717631072856976, "learning_rate": 0.0009674428292711346, "loss": 0.7609, "step": 8035 }, { "epoch": 0.4081684456346537, "grad_norm": 0.04316118937419229, "learning_rate": 0.0009673641529708884, "loss": 0.8028, "step": 8040 }, { "epoch": 0.4084222817326852, "grad_norm": 0.04194693386738023, "learning_rate": 0.0009672853849293899, "loss": 0.7731, "step": 8045 }, { "epoch": 0.4086761178307167, "grad_norm": 0.04016196663566101, "learning_rate": 0.0009672065251621005, "loss": 0.823, "step": 8050 }, { "epoch": 0.4089299539287482, "grad_norm": 0.04836425630963262, "learning_rate": 0.0009671275736845002, "loss": 0.7921, "step": 8055 }, { "epoch": 0.4091837900267797, "grad_norm": 0.041446934979265736, "learning_rate": 0.0009670485305120868, "loss": 0.7537, "step": 8060 }, { "epoch": 0.4094376261248112, "grad_norm": 0.11116261888798955, "learning_rate": 0.0009669693956603761, "loss": 0.8754, "step": 8065 }, { "epoch": 0.4096914622228427, "grad_norm": 0.06739485102459883, "learning_rate": 0.0009668901691449017, "loss": 0.8377, "step": 8070 }, { "epoch": 0.40994529832087423, "grad_norm": 0.08730391040648282, "learning_rate": 0.0009668108509812155, "loss": 0.8549, "step": 8075 }, { "epoch": 0.4101991344189057, "grad_norm": 0.1132816109437252, "learning_rate": 0.0009667314411848873, "loss": 0.7772, "step": 8080 }, { "epoch": 0.4104529705169372, "grad_norm": 0.45276514340982776, "learning_rate": 0.0009666519397715048, "loss": 0.8342, "step": 8085 }, { "epoch": 0.4107068066149687, "grad_norm": 0.05891226161699608, "learning_rate": 0.0009665723467566736, "loss": 0.7573, "step": 8090 }, { "epoch": 0.4109606427130002, "grad_norm": 0.1266450376779294, "learning_rate": 0.0009664926621560175, "loss": 0.7905, "step": 8095 }, { "epoch": 0.41121447881103174, "grad_norm": 0.05661005013871889, "learning_rate": 0.0009664128859851784, "loss": 0.8075, "step": 8100 }, { "epoch": 0.4114683149090632, "grad_norm": 0.044407875208863676, "learning_rate": 0.0009663330182598155, "loss": 0.7918, "step": 8105 }, { "epoch": 0.4117221510070947, "grad_norm": 0.08498318696271139, "learning_rate": 0.0009662530589956069, "loss": 0.7716, "step": 8110 }, { "epoch": 0.41197598710512623, "grad_norm": 0.040214304165686485, "learning_rate": 0.0009661730082082481, "loss": 0.7305, "step": 8115 }, { "epoch": 0.4122298232031577, "grad_norm": 0.04550962070114443, "learning_rate": 0.0009660928659134525, "loss": 0.8012, "step": 8120 }, { "epoch": 0.41248365930118924, "grad_norm": 0.04669488801372424, "learning_rate": 0.0009660126321269516, "loss": 0.7918, "step": 8125 }, { "epoch": 0.4127374953992207, "grad_norm": 0.05037338953254435, "learning_rate": 0.0009659323068644952, "loss": 0.7474, "step": 8130 }, { "epoch": 0.4129913314972522, "grad_norm": 0.04012686349350075, "learning_rate": 0.0009658518901418505, "loss": 0.777, "step": 8135 }, { "epoch": 0.41324516759528374, "grad_norm": 0.10913993310938727, "learning_rate": 0.0009657713819748028, "loss": 0.786, "step": 8140 }, { "epoch": 0.4134990036933152, "grad_norm": 0.05044609519043211, "learning_rate": 0.0009656907823791559, "loss": 0.7816, "step": 8145 }, { "epoch": 0.41375283979134675, "grad_norm": 0.03986239537135956, "learning_rate": 0.0009656100913707306, "loss": 0.7659, "step": 8150 }, { "epoch": 0.41400667588937823, "grad_norm": 0.0609406746066053, "learning_rate": 0.0009655293089653665, "loss": 0.7458, "step": 8155 }, { "epoch": 0.4142605119874097, "grad_norm": 0.04307659674891288, "learning_rate": 0.0009654484351789206, "loss": 0.7652, "step": 8160 }, { "epoch": 0.41451434808544124, "grad_norm": 0.04140170247928022, "learning_rate": 0.000965367470027268, "loss": 0.7645, "step": 8165 }, { "epoch": 0.4147681841834727, "grad_norm": 0.043622220755281035, "learning_rate": 0.0009652864135263018, "loss": 0.7688, "step": 8170 }, { "epoch": 0.41502202028150426, "grad_norm": 0.039566884564929745, "learning_rate": 0.0009652052656919331, "loss": 0.7772, "step": 8175 }, { "epoch": 0.41527585637953573, "grad_norm": 0.0386920133992877, "learning_rate": 0.0009651240265400907, "loss": 0.7423, "step": 8180 }, { "epoch": 0.4155296924775672, "grad_norm": 0.04098253075279437, "learning_rate": 0.0009650426960867215, "loss": 0.7859, "step": 8185 }, { "epoch": 0.41578352857559875, "grad_norm": 0.043391123593253034, "learning_rate": 0.00096496127434779, "loss": 0.7934, "step": 8190 }, { "epoch": 0.41603736467363023, "grad_norm": 0.0423729798491267, "learning_rate": 0.0009648797613392794, "loss": 0.7417, "step": 8195 }, { "epoch": 0.41629120077166176, "grad_norm": 0.03833717377161964, "learning_rate": 0.0009647981570771898, "loss": 0.7444, "step": 8200 }, { "epoch": 0.41654503686969324, "grad_norm": 0.05025261160590025, "learning_rate": 0.00096471646157754, "loss": 0.7458, "step": 8205 }, { "epoch": 0.4167988729677247, "grad_norm": 0.04013101839719977, "learning_rate": 0.0009646346748563663, "loss": 0.726, "step": 8210 }, { "epoch": 0.41705270906575626, "grad_norm": 0.04462507047413428, "learning_rate": 0.0009645527969297231, "loss": 0.7765, "step": 8215 }, { "epoch": 0.41730654516378773, "grad_norm": 0.08199126475818648, "learning_rate": 0.0009644708278136826, "loss": 0.7673, "step": 8220 }, { "epoch": 0.41756038126181927, "grad_norm": 0.040336414125756365, "learning_rate": 0.0009643887675243348, "loss": 0.7592, "step": 8225 }, { "epoch": 0.41781421735985075, "grad_norm": 0.043266632658642806, "learning_rate": 0.0009643066160777879, "loss": 0.7809, "step": 8230 }, { "epoch": 0.4180680534578822, "grad_norm": 0.07384699241812419, "learning_rate": 0.0009642243734901678, "loss": 0.82, "step": 8235 }, { "epoch": 0.41832188955591376, "grad_norm": 0.046928271794841166, "learning_rate": 0.0009641420397776181, "loss": 0.7952, "step": 8240 }, { "epoch": 0.41857572565394524, "grad_norm": 0.04286924385686193, "learning_rate": 0.0009640596149563008, "loss": 0.7489, "step": 8245 }, { "epoch": 0.4188295617519768, "grad_norm": 0.03848625655287249, "learning_rate": 0.0009639770990423954, "loss": 0.8036, "step": 8250 }, { "epoch": 0.41908339785000825, "grad_norm": 0.0439828840943445, "learning_rate": 0.0009638944920520992, "loss": 0.7515, "step": 8255 }, { "epoch": 0.41933723394803973, "grad_norm": 0.04018202949163455, "learning_rate": 0.0009638117940016278, "loss": 0.7809, "step": 8260 }, { "epoch": 0.41959107004607127, "grad_norm": 0.10549905773645388, "learning_rate": 0.000963729004907214, "loss": 0.7282, "step": 8265 }, { "epoch": 0.41984490614410275, "grad_norm": 0.13063875588050247, "learning_rate": 0.0009636461247851094, "loss": 0.7885, "step": 8270 }, { "epoch": 0.4200987422421342, "grad_norm": 0.0857064505792334, "learning_rate": 0.0009635631536515825, "loss": 0.762, "step": 8275 }, { "epoch": 0.42035257834016576, "grad_norm": 0.04099398533433659, "learning_rate": 0.0009634800915229205, "loss": 0.7931, "step": 8280 }, { "epoch": 0.42060641443819724, "grad_norm": 0.04855702564335019, "learning_rate": 0.0009633969384154279, "loss": 0.7303, "step": 8285 }, { "epoch": 0.4208602505362288, "grad_norm": 0.04440475666398699, "learning_rate": 0.0009633136943454271, "loss": 0.7496, "step": 8290 }, { "epoch": 0.42111408663426025, "grad_norm": 0.041150952024453694, "learning_rate": 0.0009632303593292589, "loss": 0.7351, "step": 8295 }, { "epoch": 0.42136792273229173, "grad_norm": 0.0545318567236716, "learning_rate": 0.0009631469333832809, "loss": 0.7972, "step": 8300 }, { "epoch": 0.42162175883032327, "grad_norm": 0.044394465547068385, "learning_rate": 0.0009630634165238699, "loss": 0.766, "step": 8305 }, { "epoch": 0.42187559492835475, "grad_norm": 0.04760615823640655, "learning_rate": 0.0009629798087674194, "loss": 0.7524, "step": 8310 }, { "epoch": 0.4221294310263863, "grad_norm": 0.054364498746250475, "learning_rate": 0.0009628961101303412, "loss": 0.7862, "step": 8315 }, { "epoch": 0.42238326712441776, "grad_norm": 0.08166492342576366, "learning_rate": 0.0009628123206290654, "loss": 0.7754, "step": 8320 }, { "epoch": 0.42263710322244924, "grad_norm": 0.07332038322047624, "learning_rate": 0.0009627284402800388, "loss": 0.808, "step": 8325 }, { "epoch": 0.4228909393204808, "grad_norm": 0.41756615495249827, "learning_rate": 0.0009626444690997272, "loss": 0.7282, "step": 8330 }, { "epoch": 0.42314477541851225, "grad_norm": 0.05299322459060185, "learning_rate": 0.0009625604071046133, "loss": 0.7648, "step": 8335 }, { "epoch": 0.4233986115165438, "grad_norm": 0.050191671170989494, "learning_rate": 0.0009624762543111985, "loss": 0.7612, "step": 8340 }, { "epoch": 0.42365244761457527, "grad_norm": 0.07578798010324866, "learning_rate": 0.0009623920107360011, "loss": 0.825, "step": 8345 }, { "epoch": 0.42390628371260675, "grad_norm": 0.04705402015711766, "learning_rate": 0.0009623076763955581, "loss": 0.8207, "step": 8350 }, { "epoch": 0.4241601198106383, "grad_norm": 0.42619520489007046, "learning_rate": 0.0009622232513064237, "loss": 0.7372, "step": 8355 }, { "epoch": 0.42441395590866976, "grad_norm": 0.14624065861260466, "learning_rate": 0.00096213873548517, "loss": 0.7984, "step": 8360 }, { "epoch": 0.4246677920067013, "grad_norm": 0.061867477425625525, "learning_rate": 0.0009620541289483875, "loss": 0.8051, "step": 8365 }, { "epoch": 0.4249216281047328, "grad_norm": 0.06383432605931177, "learning_rate": 0.0009619694317126837, "loss": 0.7836, "step": 8370 }, { "epoch": 0.42517546420276425, "grad_norm": 0.05050820940072691, "learning_rate": 0.0009618846437946842, "loss": 0.7774, "step": 8375 }, { "epoch": 0.4254293003007958, "grad_norm": 0.06599423379240986, "learning_rate": 0.0009617997652110326, "loss": 0.7826, "step": 8380 }, { "epoch": 0.42568313639882727, "grad_norm": 0.05614952099491721, "learning_rate": 0.00096171479597839, "loss": 0.7951, "step": 8385 }, { "epoch": 0.4259369724968588, "grad_norm": 0.04348127377412012, "learning_rate": 0.0009616297361134355, "loss": 0.751, "step": 8390 }, { "epoch": 0.4261908085948903, "grad_norm": 0.10664192669532255, "learning_rate": 0.000961544585632866, "loss": 0.7854, "step": 8395 }, { "epoch": 0.42644464469292176, "grad_norm": 0.04199150573753612, "learning_rate": 0.0009614593445533961, "loss": 0.8024, "step": 8400 }, { "epoch": 0.4266984807909533, "grad_norm": 0.06984833447119529, "learning_rate": 0.0009613740128917581, "loss": 0.7494, "step": 8405 }, { "epoch": 0.4269523168889848, "grad_norm": 0.04496497623791148, "learning_rate": 0.0009612885906647023, "loss": 0.7749, "step": 8410 }, { "epoch": 0.4272061529870163, "grad_norm": 0.04136209563128497, "learning_rate": 0.0009612030778889966, "loss": 0.7511, "step": 8415 }, { "epoch": 0.4274599890850478, "grad_norm": 0.06953527295899364, "learning_rate": 0.0009611174745814266, "loss": 0.7779, "step": 8420 }, { "epoch": 0.42771382518307927, "grad_norm": 0.03847616824731202, "learning_rate": 0.000961031780758796, "loss": 0.761, "step": 8425 }, { "epoch": 0.4279676612811108, "grad_norm": 0.04285393283146993, "learning_rate": 0.000960945996437926, "loss": 0.7811, "step": 8430 }, { "epoch": 0.4282214973791423, "grad_norm": 0.04422877939001848, "learning_rate": 0.0009608601216356557, "loss": 0.819, "step": 8435 }, { "epoch": 0.4284753334771738, "grad_norm": 0.038407821039527695, "learning_rate": 0.0009607741563688417, "loss": 0.7507, "step": 8440 }, { "epoch": 0.4287291695752053, "grad_norm": 0.040290162168262623, "learning_rate": 0.0009606881006543589, "loss": 0.7493, "step": 8445 }, { "epoch": 0.4289830056732368, "grad_norm": 0.049982310310563084, "learning_rate": 0.0009606019545090992, "loss": 0.7182, "step": 8450 }, { "epoch": 0.4292368417712683, "grad_norm": 0.03952191700833091, "learning_rate": 0.0009605157179499728, "loss": 0.7788, "step": 8455 }, { "epoch": 0.4294906778692998, "grad_norm": 0.04624025955524022, "learning_rate": 0.0009604293909939077, "loss": 0.757, "step": 8460 }, { "epoch": 0.4297445139673313, "grad_norm": 0.05097858894063726, "learning_rate": 0.0009603429736578493, "loss": 0.7493, "step": 8465 }, { "epoch": 0.4299983500653628, "grad_norm": 0.04175552334185394, "learning_rate": 0.0009602564659587608, "loss": 0.7969, "step": 8470 }, { "epoch": 0.4302521861633943, "grad_norm": 0.040848993200899165, "learning_rate": 0.0009601698679136233, "loss": 0.7486, "step": 8475 }, { "epoch": 0.4305060222614258, "grad_norm": 0.04091295450850086, "learning_rate": 0.0009600831795394358, "loss": 0.7407, "step": 8480 }, { "epoch": 0.4307598583594573, "grad_norm": 0.03749498780536396, "learning_rate": 0.0009599964008532144, "loss": 0.7705, "step": 8485 }, { "epoch": 0.43101369445748877, "grad_norm": 0.03926309920617154, "learning_rate": 0.0009599095318719935, "loss": 0.7449, "step": 8490 }, { "epoch": 0.4312675305555203, "grad_norm": 0.0672218722457922, "learning_rate": 0.0009598225726128251, "loss": 0.7583, "step": 8495 }, { "epoch": 0.4315213666535518, "grad_norm": 0.03859918704871626, "learning_rate": 0.0009597355230927789, "loss": 0.7637, "step": 8500 }, { "epoch": 0.4317752027515833, "grad_norm": 0.040432076361873126, "learning_rate": 0.0009596483833289422, "loss": 0.7521, "step": 8505 }, { "epoch": 0.4320290388496148, "grad_norm": 0.036232918888958576, "learning_rate": 0.0009595611533384201, "loss": 0.7483, "step": 8510 }, { "epoch": 0.4322828749476463, "grad_norm": 0.037750292933020166, "learning_rate": 0.0009594738331383355, "loss": 0.738, "step": 8515 }, { "epoch": 0.4325367110456778, "grad_norm": 0.04861396062245893, "learning_rate": 0.0009593864227458287, "loss": 0.7954, "step": 8520 }, { "epoch": 0.4327905471437093, "grad_norm": 0.056964103730651916, "learning_rate": 0.0009592989221780581, "loss": 0.7647, "step": 8525 }, { "epoch": 0.4330443832417408, "grad_norm": 0.03877517676261678, "learning_rate": 0.0009592113314521996, "loss": 0.7658, "step": 8530 }, { "epoch": 0.4332982193397723, "grad_norm": 0.04253509018985297, "learning_rate": 0.0009591236505854468, "loss": 0.7635, "step": 8535 }, { "epoch": 0.4335520554378038, "grad_norm": 0.041548675966554566, "learning_rate": 0.0009590358795950112, "loss": 0.7882, "step": 8540 }, { "epoch": 0.4338058915358353, "grad_norm": 0.04286942102636484, "learning_rate": 0.0009589480184981214, "loss": 0.7595, "step": 8545 }, { "epoch": 0.4340597276338668, "grad_norm": 0.03713327097674268, "learning_rate": 0.0009588600673120245, "loss": 0.7674, "step": 8550 }, { "epoch": 0.43431356373189833, "grad_norm": 0.040024477748535826, "learning_rate": 0.0009587720260539847, "loss": 0.7611, "step": 8555 }, { "epoch": 0.4345673998299298, "grad_norm": 0.036602978927485944, "learning_rate": 0.000958683894741284, "loss": 0.7773, "step": 8560 }, { "epoch": 0.4348212359279613, "grad_norm": 0.03695829708890826, "learning_rate": 0.0009585956733912224, "loss": 0.744, "step": 8565 }, { "epoch": 0.4350750720259928, "grad_norm": 0.041060044284110246, "learning_rate": 0.0009585073620211169, "loss": 0.7824, "step": 8570 }, { "epoch": 0.4353289081240243, "grad_norm": 0.03282186481927779, "learning_rate": 0.0009584189606483029, "loss": 0.7459, "step": 8575 }, { "epoch": 0.43558274422205584, "grad_norm": 0.03766692619987279, "learning_rate": 0.0009583304692901331, "loss": 0.7733, "step": 8580 }, { "epoch": 0.4358365803200873, "grad_norm": 0.03751630738382562, "learning_rate": 0.0009582418879639778, "loss": 0.8001, "step": 8585 }, { "epoch": 0.4360904164181188, "grad_norm": 0.03697860617176724, "learning_rate": 0.0009581532166872252, "loss": 0.7382, "step": 8590 }, { "epoch": 0.43634425251615033, "grad_norm": 0.03861908122314458, "learning_rate": 0.0009580644554772809, "loss": 0.7343, "step": 8595 }, { "epoch": 0.4365980886141818, "grad_norm": 0.042515259574637095, "learning_rate": 0.0009579756043515684, "loss": 0.7874, "step": 8600 }, { "epoch": 0.43685192471221335, "grad_norm": 0.037110177487440825, "learning_rate": 0.0009578866633275287, "loss": 0.8181, "step": 8605 }, { "epoch": 0.4371057608102448, "grad_norm": 0.03891153654761726, "learning_rate": 0.0009577976324226205, "loss": 0.7406, "step": 8610 }, { "epoch": 0.4373595969082763, "grad_norm": 0.049955912374361436, "learning_rate": 0.0009577085116543201, "loss": 0.7924, "step": 8615 }, { "epoch": 0.43761343300630784, "grad_norm": 0.04329421866977536, "learning_rate": 0.0009576193010401213, "loss": 0.7336, "step": 8620 }, { "epoch": 0.4378672691043393, "grad_norm": 0.0657917692765946, "learning_rate": 0.0009575300005975361, "loss": 0.7586, "step": 8625 }, { "epoch": 0.43812110520237085, "grad_norm": 0.04042047273306288, "learning_rate": 0.0009574406103440931, "loss": 0.7927, "step": 8630 }, { "epoch": 0.43837494130040233, "grad_norm": 0.04198470161556633, "learning_rate": 0.0009573511302973399, "loss": 0.7389, "step": 8635 }, { "epoch": 0.4386287773984338, "grad_norm": 0.03945689262463779, "learning_rate": 0.0009572615604748405, "loss": 0.7727, "step": 8640 }, { "epoch": 0.43888261349646535, "grad_norm": 0.03712653581206748, "learning_rate": 0.000957171900894177, "loss": 0.7334, "step": 8645 }, { "epoch": 0.4391364495944968, "grad_norm": 0.036020981363722074, "learning_rate": 0.0009570821515729496, "loss": 0.7711, "step": 8650 }, { "epoch": 0.43939028569252836, "grad_norm": 0.035935007316426994, "learning_rate": 0.0009569923125287749, "loss": 0.757, "step": 8655 }, { "epoch": 0.43964412179055984, "grad_norm": 0.037405081869109895, "learning_rate": 0.0009569023837792885, "loss": 0.756, "step": 8660 }, { "epoch": 0.4398979578885913, "grad_norm": 0.036387288085772324, "learning_rate": 0.0009568123653421427, "loss": 0.7636, "step": 8665 }, { "epoch": 0.44015179398662285, "grad_norm": 0.051950127420025645, "learning_rate": 0.0009567222572350078, "loss": 0.761, "step": 8670 }, { "epoch": 0.44040563008465433, "grad_norm": 0.043075422209229296, "learning_rate": 0.0009566320594755713, "loss": 0.7409, "step": 8675 }, { "epoch": 0.44065946618268587, "grad_norm": 0.038731792301034185, "learning_rate": 0.0009565417720815389, "loss": 0.7635, "step": 8680 }, { "epoch": 0.44091330228071735, "grad_norm": 0.040410544160498114, "learning_rate": 0.0009564513950706333, "loss": 0.8231, "step": 8685 }, { "epoch": 0.4411671383787488, "grad_norm": 0.03881473682056207, "learning_rate": 0.0009563609284605951, "loss": 0.7987, "step": 8690 }, { "epoch": 0.44142097447678036, "grad_norm": 0.038119251866453076, "learning_rate": 0.0009562703722691828, "loss": 0.748, "step": 8695 }, { "epoch": 0.44167481057481184, "grad_norm": 0.038756635348786275, "learning_rate": 0.0009561797265141717, "loss": 0.768, "step": 8700 }, { "epoch": 0.4419286466728434, "grad_norm": 0.041209498822817825, "learning_rate": 0.0009560889912133552, "loss": 0.7384, "step": 8705 }, { "epoch": 0.44218248277087485, "grad_norm": 0.03533836482654101, "learning_rate": 0.0009559981663845443, "loss": 0.7272, "step": 8710 }, { "epoch": 0.44243631886890633, "grad_norm": 0.040879166167599026, "learning_rate": 0.0009559072520455672, "loss": 0.763, "step": 8715 }, { "epoch": 0.44269015496693787, "grad_norm": 0.03493061283645192, "learning_rate": 0.0009558162482142703, "loss": 0.6999, "step": 8720 }, { "epoch": 0.44294399106496934, "grad_norm": 0.040861883386266146, "learning_rate": 0.000955725154908517, "loss": 0.7543, "step": 8725 }, { "epoch": 0.4431978271630008, "grad_norm": 0.04037745198069129, "learning_rate": 0.0009556339721461885, "loss": 0.7617, "step": 8730 }, { "epoch": 0.44345166326103236, "grad_norm": 0.0363292511254534, "learning_rate": 0.0009555426999451835, "loss": 0.723, "step": 8735 }, { "epoch": 0.44370549935906384, "grad_norm": 0.042723329053883104, "learning_rate": 0.0009554513383234184, "loss": 0.7678, "step": 8740 }, { "epoch": 0.44395933545709537, "grad_norm": 0.05365980228319146, "learning_rate": 0.0009553598872988268, "loss": 0.7602, "step": 8745 }, { "epoch": 0.44421317155512685, "grad_norm": 0.06833253027682988, "learning_rate": 0.0009552683468893601, "loss": 0.741, "step": 8750 }, { "epoch": 0.44446700765315833, "grad_norm": 0.041589338851688806, "learning_rate": 0.0009551767171129874, "loss": 0.78, "step": 8755 }, { "epoch": 0.44472084375118986, "grad_norm": 0.10917485218581564, "learning_rate": 0.0009550849979876952, "loss": 0.7699, "step": 8760 }, { "epoch": 0.44497467984922134, "grad_norm": 0.05392770910729491, "learning_rate": 0.0009549931895314874, "loss": 0.7487, "step": 8765 }, { "epoch": 0.4452285159472529, "grad_norm": 0.04501405236618846, "learning_rate": 0.0009549012917623854, "loss": 0.7938, "step": 8770 }, { "epoch": 0.44548235204528436, "grad_norm": 0.038534305188172344, "learning_rate": 0.0009548093046984285, "loss": 0.7113, "step": 8775 }, { "epoch": 0.44573618814331584, "grad_norm": 0.0419422424092853, "learning_rate": 0.0009547172283576733, "loss": 0.7504, "step": 8780 }, { "epoch": 0.44599002424134737, "grad_norm": 0.03687188831392871, "learning_rate": 0.0009546250627581936, "loss": 0.7384, "step": 8785 }, { "epoch": 0.44624386033937885, "grad_norm": 0.03920493229221664, "learning_rate": 0.0009545328079180815, "loss": 0.7628, "step": 8790 }, { "epoch": 0.4464976964374104, "grad_norm": 0.04469847118505954, "learning_rate": 0.0009544404638554459, "loss": 0.7459, "step": 8795 }, { "epoch": 0.44675153253544186, "grad_norm": 0.03815499740787775, "learning_rate": 0.0009543480305884136, "loss": 0.731, "step": 8800 }, { "epoch": 0.44700536863347334, "grad_norm": 0.03866404052863374, "learning_rate": 0.0009542555081351286, "loss": 0.7364, "step": 8805 }, { "epoch": 0.4472592047315049, "grad_norm": 0.03584944125443995, "learning_rate": 0.0009541628965137528, "loss": 0.7521, "step": 8810 }, { "epoch": 0.44751304082953636, "grad_norm": 0.03853289048787337, "learning_rate": 0.0009540701957424653, "loss": 0.7085, "step": 8815 }, { "epoch": 0.4477668769275679, "grad_norm": 0.044626624265024936, "learning_rate": 0.0009539774058394628, "loss": 0.7864, "step": 8820 }, { "epoch": 0.44802071302559937, "grad_norm": 0.04564146902526811, "learning_rate": 0.0009538845268229596, "loss": 0.764, "step": 8825 }, { "epoch": 0.44827454912363085, "grad_norm": 0.04295172367327605, "learning_rate": 0.0009537915587111872, "loss": 0.7466, "step": 8830 }, { "epoch": 0.4485283852216624, "grad_norm": 0.03720909165828223, "learning_rate": 0.0009536985015223949, "loss": 0.7955, "step": 8835 }, { "epoch": 0.44878222131969386, "grad_norm": 0.042273124046617185, "learning_rate": 0.0009536053552748494, "loss": 0.716, "step": 8840 }, { "epoch": 0.4490360574177254, "grad_norm": 0.03944803287830875, "learning_rate": 0.0009535121199868348, "loss": 0.7655, "step": 8845 }, { "epoch": 0.4492898935157569, "grad_norm": 0.050423821736293, "learning_rate": 0.0009534187956766526, "loss": 0.7571, "step": 8850 }, { "epoch": 0.44954372961378836, "grad_norm": 0.047995171204666946, "learning_rate": 0.000953325382362622, "loss": 0.7645, "step": 8855 }, { "epoch": 0.4497975657118199, "grad_norm": 0.043381619686384446, "learning_rate": 0.0009532318800630797, "loss": 0.7313, "step": 8860 }, { "epoch": 0.45005140180985137, "grad_norm": 0.05677747016214828, "learning_rate": 0.0009531382887963796, "loss": 0.7472, "step": 8865 }, { "epoch": 0.4503052379078829, "grad_norm": 0.03806620424090292, "learning_rate": 0.0009530446085808932, "loss": 0.7449, "step": 8870 }, { "epoch": 0.4505590740059144, "grad_norm": 0.03943245683516065, "learning_rate": 0.0009529508394350093, "loss": 0.7745, "step": 8875 }, { "epoch": 0.45081291010394586, "grad_norm": 0.0361155917417072, "learning_rate": 0.0009528569813771346, "loss": 0.7685, "step": 8880 }, { "epoch": 0.4510667462019774, "grad_norm": 0.043689010801907624, "learning_rate": 0.0009527630344256929, "loss": 0.7318, "step": 8885 }, { "epoch": 0.4513205823000089, "grad_norm": 0.034493945380992604, "learning_rate": 0.0009526689985991255, "loss": 0.7151, "step": 8890 }, { "epoch": 0.4515744183980404, "grad_norm": 0.0434835537445401, "learning_rate": 0.000952574873915891, "loss": 0.7331, "step": 8895 }, { "epoch": 0.4518282544960719, "grad_norm": 0.03698842233990922, "learning_rate": 0.0009524806603944658, "loss": 0.7245, "step": 8900 }, { "epoch": 0.45208209059410337, "grad_norm": 0.03613122245312194, "learning_rate": 0.0009523863580533434, "loss": 0.749, "step": 8905 }, { "epoch": 0.4523359266921349, "grad_norm": 0.03949972162072239, "learning_rate": 0.000952291966911035, "loss": 0.7402, "step": 8910 }, { "epoch": 0.4525897627901664, "grad_norm": 0.0435044027621711, "learning_rate": 0.0009521974869860691, "loss": 0.7475, "step": 8915 }, { "epoch": 0.4528435988881979, "grad_norm": 0.040268515149981234, "learning_rate": 0.0009521029182969915, "loss": 0.7516, "step": 8920 }, { "epoch": 0.4530974349862294, "grad_norm": 0.043366115560424905, "learning_rate": 0.000952008260862366, "loss": 0.7232, "step": 8925 }, { "epoch": 0.4533512710842609, "grad_norm": 0.0359400440905671, "learning_rate": 0.0009519135147007726, "loss": 0.7447, "step": 8930 }, { "epoch": 0.4536051071822924, "grad_norm": 0.03366212310295488, "learning_rate": 0.0009518186798308104, "loss": 0.7327, "step": 8935 }, { "epoch": 0.4538589432803239, "grad_norm": 0.036215906261239064, "learning_rate": 0.0009517237562710943, "loss": 0.7341, "step": 8940 }, { "epoch": 0.45411277937835537, "grad_norm": 0.038082555808146894, "learning_rate": 0.0009516287440402576, "loss": 0.7664, "step": 8945 }, { "epoch": 0.4543666154763869, "grad_norm": 0.03510939572665765, "learning_rate": 0.0009515336431569508, "loss": 0.7555, "step": 8950 }, { "epoch": 0.4546204515744184, "grad_norm": 0.035770358241769606, "learning_rate": 0.0009514384536398416, "loss": 0.6941, "step": 8955 }, { "epoch": 0.4548742876724499, "grad_norm": 0.03735405616839357, "learning_rate": 0.0009513431755076152, "loss": 0.7164, "step": 8960 }, { "epoch": 0.4551281237704814, "grad_norm": 0.037678897429287574, "learning_rate": 0.0009512478087789745, "loss": 0.8011, "step": 8965 }, { "epoch": 0.4553819598685129, "grad_norm": 0.03459179379931926, "learning_rate": 0.0009511523534726391, "loss": 0.7276, "step": 8970 }, { "epoch": 0.4556357959665444, "grad_norm": 0.04048310613608325, "learning_rate": 0.0009510568096073466, "loss": 0.7459, "step": 8975 }, { "epoch": 0.4558896320645759, "grad_norm": 0.03392319876858949, "learning_rate": 0.0009509611772018519, "loss": 0.7088, "step": 8980 }, { "epoch": 0.4561434681626074, "grad_norm": 0.042270778103544994, "learning_rate": 0.0009508654562749271, "loss": 0.7832, "step": 8985 }, { "epoch": 0.4563973042606389, "grad_norm": 0.041479368776093574, "learning_rate": 0.0009507696468453615, "loss": 0.7574, "step": 8990 }, { "epoch": 0.4566511403586704, "grad_norm": 0.04079889381681917, "learning_rate": 0.0009506737489319623, "loss": 0.7363, "step": 8995 }, { "epoch": 0.4569049764567019, "grad_norm": 0.0362115027275734, "learning_rate": 0.0009505777625535538, "loss": 0.7442, "step": 9000 }, { "epoch": 0.4571588125547334, "grad_norm": 0.0736526630242249, "learning_rate": 0.0009504816877289775, "loss": 0.7474, "step": 9005 }, { "epoch": 0.45741264865276493, "grad_norm": 0.03710777602251197, "learning_rate": 0.0009503855244770923, "loss": 0.7164, "step": 9010 }, { "epoch": 0.4576664847507964, "grad_norm": 0.04267708600295648, "learning_rate": 0.0009502892728167749, "loss": 0.7365, "step": 9015 }, { "epoch": 0.4579203208488279, "grad_norm": 0.040306149843800064, "learning_rate": 0.0009501929327669188, "loss": 0.7523, "step": 9020 }, { "epoch": 0.4581741569468594, "grad_norm": 0.03789194167894667, "learning_rate": 0.0009500965043464349, "loss": 0.6962, "step": 9025 }, { "epoch": 0.4584279930448909, "grad_norm": 0.036788380796352195, "learning_rate": 0.000949999987574252, "loss": 0.7198, "step": 9030 }, { "epoch": 0.45868182914292244, "grad_norm": 0.035960658298140435, "learning_rate": 0.0009499033824693158, "loss": 0.722, "step": 9035 }, { "epoch": 0.4589356652409539, "grad_norm": 0.034642960686471116, "learning_rate": 0.000949806689050589, "loss": 0.7493, "step": 9040 }, { "epoch": 0.4591895013389854, "grad_norm": 0.03665839566003731, "learning_rate": 0.0009497099073370526, "loss": 0.7095, "step": 9045 }, { "epoch": 0.45944333743701693, "grad_norm": 0.03496364750347408, "learning_rate": 0.0009496130373477039, "loss": 0.7286, "step": 9050 }, { "epoch": 0.4596971735350484, "grad_norm": 0.039939079306775815, "learning_rate": 0.0009495160791015583, "loss": 0.7329, "step": 9055 }, { "epoch": 0.45995100963307994, "grad_norm": 0.03436834890303996, "learning_rate": 0.0009494190326176479, "loss": 0.6998, "step": 9060 }, { "epoch": 0.4602048457311114, "grad_norm": 0.038244936902037355, "learning_rate": 0.0009493218979150229, "loss": 0.7692, "step": 9065 }, { "epoch": 0.4604586818291429, "grad_norm": 0.03585679837874039, "learning_rate": 0.00094922467501275, "loss": 0.7494, "step": 9070 }, { "epoch": 0.46071251792717444, "grad_norm": 0.04193277323014534, "learning_rate": 0.0009491273639299136, "loss": 0.7509, "step": 9075 }, { "epoch": 0.4609663540252059, "grad_norm": 0.03652435341381288, "learning_rate": 0.0009490299646856156, "loss": 0.7535, "step": 9080 }, { "epoch": 0.46122019012323745, "grad_norm": 0.03332228518619773, "learning_rate": 0.0009489324772989747, "loss": 0.6815, "step": 9085 }, { "epoch": 0.46147402622126893, "grad_norm": 0.05310434897200618, "learning_rate": 0.0009488349017891275, "loss": 0.7017, "step": 9090 }, { "epoch": 0.4617278623193004, "grad_norm": 0.034411897856016345, "learning_rate": 0.0009487372381752273, "loss": 0.7373, "step": 9095 }, { "epoch": 0.46198169841733194, "grad_norm": 0.03927418271565514, "learning_rate": 0.0009486394864764452, "loss": 0.7445, "step": 9100 }, { "epoch": 0.4622355345153634, "grad_norm": 0.04898520339353834, "learning_rate": 0.000948541646711969, "loss": 0.7205, "step": 9105 }, { "epoch": 0.46248937061339496, "grad_norm": 0.04004921048816867, "learning_rate": 0.0009484437189010047, "loss": 0.7544, "step": 9110 }, { "epoch": 0.46274320671142644, "grad_norm": 0.03794210700629752, "learning_rate": 0.0009483457030627746, "loss": 0.7942, "step": 9115 }, { "epoch": 0.4629970428094579, "grad_norm": 0.03848733041145906, "learning_rate": 0.000948247599216519, "loss": 0.7682, "step": 9120 }, { "epoch": 0.46325087890748945, "grad_norm": 0.040697049858129955, "learning_rate": 0.0009481494073814951, "loss": 0.749, "step": 9125 }, { "epoch": 0.46350471500552093, "grad_norm": 0.03571949657573161, "learning_rate": 0.0009480511275769773, "loss": 0.7463, "step": 9130 }, { "epoch": 0.46375855110355246, "grad_norm": 0.036477871153034795, "learning_rate": 0.0009479527598222577, "loss": 0.7509, "step": 9135 }, { "epoch": 0.46401238720158394, "grad_norm": 0.042962258709380106, "learning_rate": 0.0009478543041366452, "loss": 0.761, "step": 9140 }, { "epoch": 0.4642662232996154, "grad_norm": 0.049744633932601375, "learning_rate": 0.0009477557605394664, "loss": 0.7371, "step": 9145 }, { "epoch": 0.46452005939764696, "grad_norm": 0.040503989952301415, "learning_rate": 0.0009476571290500647, "loss": 0.7829, "step": 9150 }, { "epoch": 0.46477389549567844, "grad_norm": 0.034575340637869996, "learning_rate": 0.000947558409687801, "loss": 0.7702, "step": 9155 }, { "epoch": 0.4650277315937099, "grad_norm": 0.03399511886761603, "learning_rate": 0.0009474596024720534, "loss": 0.7235, "step": 9160 }, { "epoch": 0.46528156769174145, "grad_norm": 0.03615909631856192, "learning_rate": 0.0009473607074222172, "loss": 0.7531, "step": 9165 }, { "epoch": 0.46553540378977293, "grad_norm": 0.034113150183880496, "learning_rate": 0.0009472617245577053, "loss": 0.7701, "step": 9170 }, { "epoch": 0.46578923988780446, "grad_norm": 0.033722749095456785, "learning_rate": 0.0009471626538979474, "loss": 0.7419, "step": 9175 }, { "epoch": 0.46604307598583594, "grad_norm": 0.03727432157771975, "learning_rate": 0.0009470634954623905, "loss": 0.712, "step": 9180 }, { "epoch": 0.4662969120838674, "grad_norm": 0.036359918982329945, "learning_rate": 0.0009469642492704989, "loss": 0.7558, "step": 9185 }, { "epoch": 0.46655074818189896, "grad_norm": 0.03729916828524665, "learning_rate": 0.0009468649153417542, "loss": 0.7294, "step": 9190 }, { "epoch": 0.46680458427993043, "grad_norm": 0.03542217126876099, "learning_rate": 0.000946765493695655, "loss": 0.7407, "step": 9195 }, { "epoch": 0.46705842037796197, "grad_norm": 0.035674784978167715, "learning_rate": 0.0009466659843517176, "loss": 0.7493, "step": 9200 }, { "epoch": 0.46731225647599345, "grad_norm": 0.036033194037522046, "learning_rate": 0.0009465663873294747, "loss": 0.723, "step": 9205 }, { "epoch": 0.4675660925740249, "grad_norm": 0.038233373455248555, "learning_rate": 0.0009464667026484774, "loss": 0.7284, "step": 9210 }, { "epoch": 0.46781992867205646, "grad_norm": 0.03635648498369609, "learning_rate": 0.0009463669303282927, "loss": 0.7526, "step": 9215 }, { "epoch": 0.46807376477008794, "grad_norm": 0.03426538615895332, "learning_rate": 0.0009462670703885054, "loss": 0.7321, "step": 9220 }, { "epoch": 0.4683276008681195, "grad_norm": 0.03481526030309011, "learning_rate": 0.0009461671228487181, "loss": 0.7491, "step": 9225 }, { "epoch": 0.46858143696615095, "grad_norm": 0.032155811619332134, "learning_rate": 0.0009460670877285493, "loss": 0.7353, "step": 9230 }, { "epoch": 0.46883527306418243, "grad_norm": 0.03687349580884466, "learning_rate": 0.0009459669650476359, "loss": 0.7632, "step": 9235 }, { "epoch": 0.46908910916221397, "grad_norm": 0.034163407362945056, "learning_rate": 0.0009458667548256312, "loss": 0.7132, "step": 9240 }, { "epoch": 0.46934294526024545, "grad_norm": 0.0357244871347235, "learning_rate": 0.0009457664570822061, "loss": 0.7605, "step": 9245 }, { "epoch": 0.469596781358277, "grad_norm": 0.03879828685365166, "learning_rate": 0.0009456660718370484, "loss": 0.7185, "step": 9250 }, { "epoch": 0.46985061745630846, "grad_norm": 0.035396410717580595, "learning_rate": 0.0009455655991098635, "loss": 0.7277, "step": 9255 }, { "epoch": 0.47010445355433994, "grad_norm": 0.033668369019169586, "learning_rate": 0.0009454650389203735, "loss": 0.7342, "step": 9260 }, { "epoch": 0.4703582896523715, "grad_norm": 0.03901940536495328, "learning_rate": 0.0009453643912883179, "loss": 0.7112, "step": 9265 }, { "epoch": 0.47061212575040295, "grad_norm": 0.03212554372387254, "learning_rate": 0.0009452636562334532, "loss": 0.764, "step": 9270 }, { "epoch": 0.4708659618484345, "grad_norm": 0.033847849500836455, "learning_rate": 0.0009451628337755533, "loss": 0.7165, "step": 9275 }, { "epoch": 0.47111979794646597, "grad_norm": 0.04263196883750663, "learning_rate": 0.0009450619239344094, "loss": 0.7525, "step": 9280 }, { "epoch": 0.47137363404449745, "grad_norm": 0.044396133488013564, "learning_rate": 0.0009449609267298292, "loss": 0.7337, "step": 9285 }, { "epoch": 0.471627470142529, "grad_norm": 0.04104044512105369, "learning_rate": 0.000944859842181638, "loss": 0.7621, "step": 9290 }, { "epoch": 0.47188130624056046, "grad_norm": 0.03856859802588168, "learning_rate": 0.0009447586703096784, "loss": 0.732, "step": 9295 }, { "epoch": 0.472135142338592, "grad_norm": 0.04138116924944328, "learning_rate": 0.0009446574111338097, "loss": 0.753, "step": 9300 }, { "epoch": 0.4723889784366235, "grad_norm": 0.0350307257882755, "learning_rate": 0.0009445560646739088, "loss": 0.7274, "step": 9305 }, { "epoch": 0.47264281453465495, "grad_norm": 0.032533186195728536, "learning_rate": 0.0009444546309498693, "loss": 0.7053, "step": 9310 }, { "epoch": 0.4728966506326865, "grad_norm": 0.03533212458960782, "learning_rate": 0.0009443531099816025, "loss": 0.7382, "step": 9315 }, { "epoch": 0.47315048673071797, "grad_norm": 0.03662113777259537, "learning_rate": 0.0009442515017890361, "loss": 0.7223, "step": 9320 }, { "epoch": 0.4734043228287495, "grad_norm": 0.035705587255450796, "learning_rate": 0.0009441498063921152, "loss": 0.7351, "step": 9325 }, { "epoch": 0.473658158926781, "grad_norm": 0.0390246628736802, "learning_rate": 0.0009440480238108025, "loss": 0.7599, "step": 9330 }, { "epoch": 0.47391199502481246, "grad_norm": 0.03809423978916735, "learning_rate": 0.000943946154065077, "loss": 0.7171, "step": 9335 }, { "epoch": 0.474165831122844, "grad_norm": 0.035132381246055965, "learning_rate": 0.0009438441971749354, "loss": 0.7516, "step": 9340 }, { "epoch": 0.4744196672208755, "grad_norm": 0.032666413687763865, "learning_rate": 0.0009437421531603916, "loss": 0.7154, "step": 9345 }, { "epoch": 0.474673503318907, "grad_norm": 0.03339540004666216, "learning_rate": 0.0009436400220414758, "loss": 0.7094, "step": 9350 }, { "epoch": 0.4749273394169385, "grad_norm": 0.0371228780206185, "learning_rate": 0.0009435378038382363, "loss": 0.7473, "step": 9355 }, { "epoch": 0.47518117551496997, "grad_norm": 0.03444417672574159, "learning_rate": 0.0009434354985707376, "loss": 0.7048, "step": 9360 }, { "epoch": 0.4754350116130015, "grad_norm": 0.03572531844304398, "learning_rate": 0.0009433331062590621, "loss": 0.682, "step": 9365 }, { "epoch": 0.475688847711033, "grad_norm": 0.03564077182710878, "learning_rate": 0.0009432306269233087, "loss": 0.7221, "step": 9370 }, { "epoch": 0.47594268380906446, "grad_norm": 0.03628031962693484, "learning_rate": 0.0009431280605835937, "loss": 0.7148, "step": 9375 }, { "epoch": 0.476196519907096, "grad_norm": 0.0372323273091336, "learning_rate": 0.0009430254072600501, "loss": 0.7354, "step": 9380 }, { "epoch": 0.4764503560051275, "grad_norm": 0.03665374158236495, "learning_rate": 0.0009429226669728285, "loss": 0.7054, "step": 9385 }, { "epoch": 0.476704192103159, "grad_norm": 0.033837047200090585, "learning_rate": 0.0009428198397420964, "loss": 0.7016, "step": 9390 }, { "epoch": 0.4769580282011905, "grad_norm": 0.03434690269102241, "learning_rate": 0.0009427169255880379, "loss": 0.7516, "step": 9395 }, { "epoch": 0.47721186429922197, "grad_norm": 0.037771827240007966, "learning_rate": 0.0009426139245308548, "loss": 0.6949, "step": 9400 }, { "epoch": 0.4774657003972535, "grad_norm": 0.03715029321175476, "learning_rate": 0.0009425108365907658, "loss": 0.7182, "step": 9405 }, { "epoch": 0.477719536495285, "grad_norm": 0.033835354395939876, "learning_rate": 0.0009424076617880059, "loss": 0.7026, "step": 9410 }, { "epoch": 0.4779733725933165, "grad_norm": 0.037287803500656684, "learning_rate": 0.0009423044001428287, "loss": 0.693, "step": 9415 }, { "epoch": 0.478227208691348, "grad_norm": 0.0405017827301244, "learning_rate": 0.0009422010516755034, "loss": 0.7816, "step": 9420 }, { "epoch": 0.4784810447893795, "grad_norm": 0.03392897490807428, "learning_rate": 0.0009420976164063169, "loss": 0.7393, "step": 9425 }, { "epoch": 0.478734880887411, "grad_norm": 0.03479997814596096, "learning_rate": 0.0009419940943555731, "loss": 0.7331, "step": 9430 }, { "epoch": 0.4789887169854425, "grad_norm": 0.032143657572656026, "learning_rate": 0.0009418904855435927, "loss": 0.7404, "step": 9435 }, { "epoch": 0.479242553083474, "grad_norm": 0.03324751784679724, "learning_rate": 0.0009417867899907138, "loss": 0.7073, "step": 9440 }, { "epoch": 0.4794963891815055, "grad_norm": 0.034070579628319234, "learning_rate": 0.0009416830077172911, "loss": 0.7441, "step": 9445 }, { "epoch": 0.479750225279537, "grad_norm": 0.033716411910050516, "learning_rate": 0.0009415791387436968, "loss": 0.717, "step": 9450 }, { "epoch": 0.4800040613775685, "grad_norm": 0.03792448206260793, "learning_rate": 0.0009414751830903195, "loss": 0.7757, "step": 9455 }, { "epoch": 0.4802578974756, "grad_norm": 0.03375511934012138, "learning_rate": 0.0009413711407775655, "loss": 0.731, "step": 9460 }, { "epoch": 0.4805117335736315, "grad_norm": 0.034150458799890634, "learning_rate": 0.0009412670118258578, "loss": 0.7179, "step": 9465 }, { "epoch": 0.480765569671663, "grad_norm": 0.037842780054711075, "learning_rate": 0.0009411627962556359, "loss": 0.7292, "step": 9470 }, { "epoch": 0.4810194057696945, "grad_norm": 0.03350346751200308, "learning_rate": 0.0009410584940873574, "loss": 0.7388, "step": 9475 }, { "epoch": 0.481273241867726, "grad_norm": 0.04038948910309181, "learning_rate": 0.0009409541053414963, "loss": 0.698, "step": 9480 }, { "epoch": 0.4815270779657575, "grad_norm": 0.03935709723336501, "learning_rate": 0.000940849630038543, "loss": 0.7512, "step": 9485 }, { "epoch": 0.48178091406378903, "grad_norm": 0.041577219855036866, "learning_rate": 0.0009407450681990061, "loss": 0.7451, "step": 9490 }, { "epoch": 0.4820347501618205, "grad_norm": 0.0350073254441594, "learning_rate": 0.0009406404198434102, "loss": 0.7136, "step": 9495 }, { "epoch": 0.482288586259852, "grad_norm": 0.03961783599648961, "learning_rate": 0.0009405356849922972, "loss": 0.7566, "step": 9500 }, { "epoch": 0.4825424223578835, "grad_norm": 0.03459548882557077, "learning_rate": 0.0009404308636662264, "loss": 0.7516, "step": 9505 }, { "epoch": 0.482796258455915, "grad_norm": 0.042823314628822724, "learning_rate": 0.0009403259558857734, "loss": 0.7287, "step": 9510 }, { "epoch": 0.48305009455394654, "grad_norm": 0.03862761913495699, "learning_rate": 0.0009402209616715311, "loss": 0.7029, "step": 9515 }, { "epoch": 0.483303930651978, "grad_norm": 0.03935591067147662, "learning_rate": 0.0009401158810441095, "loss": 0.7492, "step": 9520 }, { "epoch": 0.4835577667500095, "grad_norm": 0.03330207199535938, "learning_rate": 0.0009400107140241354, "loss": 0.7354, "step": 9525 }, { "epoch": 0.48381160284804103, "grad_norm": 0.036577808771870564, "learning_rate": 0.0009399054606322524, "loss": 0.7201, "step": 9530 }, { "epoch": 0.4840654389460725, "grad_norm": 0.0392570659763892, "learning_rate": 0.0009398001208891212, "loss": 0.7088, "step": 9535 }, { "epoch": 0.48431927504410405, "grad_norm": 0.03412719106350502, "learning_rate": 0.0009396946948154194, "loss": 0.7245, "step": 9540 }, { "epoch": 0.4845731111421355, "grad_norm": 0.03958212205399778, "learning_rate": 0.0009395891824318421, "loss": 0.7226, "step": 9545 }, { "epoch": 0.484826947240167, "grad_norm": 0.037189389063379666, "learning_rate": 0.0009394835837591004, "loss": 0.7387, "step": 9550 }, { "epoch": 0.48508078333819854, "grad_norm": 0.03287653060423727, "learning_rate": 0.0009393778988179229, "loss": 0.7548, "step": 9555 }, { "epoch": 0.48533461943623, "grad_norm": 0.06675749034583879, "learning_rate": 0.0009392721276290549, "loss": 0.7326, "step": 9560 }, { "epoch": 0.48558845553426155, "grad_norm": 0.033254266684953376, "learning_rate": 0.0009391662702132591, "loss": 0.7563, "step": 9565 }, { "epoch": 0.48584229163229303, "grad_norm": 0.036481617387076976, "learning_rate": 0.0009390603265913145, "loss": 0.7533, "step": 9570 }, { "epoch": 0.4860961277303245, "grad_norm": 0.03430731294481015, "learning_rate": 0.0009389542967840173, "loss": 0.7081, "step": 9575 }, { "epoch": 0.48634996382835605, "grad_norm": 0.03821790639893066, "learning_rate": 0.0009388481808121807, "loss": 0.7123, "step": 9580 }, { "epoch": 0.4866037999263875, "grad_norm": 0.037506618732981785, "learning_rate": 0.0009387419786966348, "loss": 0.6942, "step": 9585 }, { "epoch": 0.486857636024419, "grad_norm": 0.03799291485736097, "learning_rate": 0.0009386356904582265, "loss": 0.7626, "step": 9590 }, { "epoch": 0.48711147212245054, "grad_norm": 0.04688274088898732, "learning_rate": 0.0009385293161178197, "loss": 0.7582, "step": 9595 }, { "epoch": 0.487365308220482, "grad_norm": 0.037020775485959834, "learning_rate": 0.0009384228556962949, "loss": 0.7392, "step": 9600 }, { "epoch": 0.48761914431851355, "grad_norm": 0.03629829722613318, "learning_rate": 0.0009383163092145501, "loss": 0.7492, "step": 9605 }, { "epoch": 0.48787298041654503, "grad_norm": 0.03533909347833369, "learning_rate": 0.0009382096766934996, "loss": 0.7776, "step": 9610 }, { "epoch": 0.4881268165145765, "grad_norm": 0.03374755758379666, "learning_rate": 0.000938102958154075, "loss": 0.7088, "step": 9615 }, { "epoch": 0.48838065261260805, "grad_norm": 0.040656181914022836, "learning_rate": 0.0009379961536172244, "loss": 0.7632, "step": 9620 }, { "epoch": 0.4886344887106395, "grad_norm": 0.037245195920728225, "learning_rate": 0.0009378892631039132, "loss": 0.7733, "step": 9625 }, { "epoch": 0.48888832480867106, "grad_norm": 0.04797647844584813, "learning_rate": 0.0009377822866351235, "loss": 0.7716, "step": 9630 }, { "epoch": 0.48914216090670254, "grad_norm": 0.04781234690192137, "learning_rate": 0.000937675224231854, "loss": 0.72, "step": 9635 }, { "epoch": 0.489395997004734, "grad_norm": 0.03723487084208058, "learning_rate": 0.0009375680759151206, "loss": 0.7404, "step": 9640 }, { "epoch": 0.48964983310276555, "grad_norm": 0.03561577681912482, "learning_rate": 0.0009374608417059562, "loss": 0.7251, "step": 9645 }, { "epoch": 0.48990366920079703, "grad_norm": 0.033198901675526235, "learning_rate": 0.0009373535216254101, "loss": 0.7276, "step": 9650 }, { "epoch": 0.49015750529882857, "grad_norm": 0.03415900918919849, "learning_rate": 0.0009372461156945489, "loss": 0.743, "step": 9655 }, { "epoch": 0.49041134139686005, "grad_norm": 0.03916092617202635, "learning_rate": 0.0009371386239344557, "loss": 0.758, "step": 9660 }, { "epoch": 0.4906651774948915, "grad_norm": 0.03566307194155841, "learning_rate": 0.0009370310463662306, "loss": 0.7601, "step": 9665 }, { "epoch": 0.49091901359292306, "grad_norm": 0.03251629766200096, "learning_rate": 0.0009369233830109905, "loss": 0.6926, "step": 9670 }, { "epoch": 0.49117284969095454, "grad_norm": 0.0356761989770266, "learning_rate": 0.0009368156338898694, "loss": 0.7443, "step": 9675 }, { "epoch": 0.4914266857889861, "grad_norm": 0.03377045781696197, "learning_rate": 0.0009367077990240176, "loss": 0.7077, "step": 9680 }, { "epoch": 0.49168052188701755, "grad_norm": 0.0372871940634085, "learning_rate": 0.0009365998784346028, "loss": 0.7165, "step": 9685 }, { "epoch": 0.49193435798504903, "grad_norm": 0.037454588297604896, "learning_rate": 0.0009364918721428093, "loss": 0.6974, "step": 9690 }, { "epoch": 0.49218819408308057, "grad_norm": 0.035194482881704535, "learning_rate": 0.0009363837801698379, "loss": 0.7302, "step": 9695 }, { "epoch": 0.49244203018111204, "grad_norm": 0.033383571551715514, "learning_rate": 0.0009362756025369067, "loss": 0.7385, "step": 9700 }, { "epoch": 0.4926958662791436, "grad_norm": 0.03174771930292897, "learning_rate": 0.0009361673392652505, "loss": 0.689, "step": 9705 }, { "epoch": 0.49294970237717506, "grad_norm": 0.03517549138849288, "learning_rate": 0.0009360589903761208, "loss": 0.6861, "step": 9710 }, { "epoch": 0.49320353847520654, "grad_norm": 0.03485741152469644, "learning_rate": 0.0009359505558907857, "loss": 0.7002, "step": 9715 }, { "epoch": 0.4934573745732381, "grad_norm": 0.03414726696081925, "learning_rate": 0.0009358420358305307, "loss": 0.7387, "step": 9720 }, { "epoch": 0.49371121067126955, "grad_norm": 0.03678676972007384, "learning_rate": 0.0009357334302166577, "loss": 0.7312, "step": 9725 }, { "epoch": 0.4939650467693011, "grad_norm": 0.036338841705367254, "learning_rate": 0.0009356247390704853, "loss": 0.7527, "step": 9730 }, { "epoch": 0.49421888286733257, "grad_norm": 0.03303651302349927, "learning_rate": 0.0009355159624133489, "loss": 0.6821, "step": 9735 }, { "epoch": 0.49447271896536404, "grad_norm": 0.032392667288223195, "learning_rate": 0.0009354071002666011, "loss": 0.7273, "step": 9740 }, { "epoch": 0.4947265550633956, "grad_norm": 0.03731725846263331, "learning_rate": 0.000935298152651611, "loss": 0.7179, "step": 9745 }, { "epoch": 0.49498039116142706, "grad_norm": 0.03608823341815174, "learning_rate": 0.0009351891195897644, "loss": 0.7457, "step": 9750 }, { "epoch": 0.4952342272594586, "grad_norm": 0.03294363192961696, "learning_rate": 0.0009350800011024636, "loss": 0.7391, "step": 9755 }, { "epoch": 0.49548806335749007, "grad_norm": 0.03793122431124517, "learning_rate": 0.0009349707972111285, "loss": 0.7275, "step": 9760 }, { "epoch": 0.49574189945552155, "grad_norm": 0.03336413369566877, "learning_rate": 0.0009348615079371952, "loss": 0.6919, "step": 9765 }, { "epoch": 0.4959957355535531, "grad_norm": 0.04311216591519663, "learning_rate": 0.0009347521333021165, "loss": 0.7333, "step": 9770 }, { "epoch": 0.49624957165158456, "grad_norm": 0.03520861707140794, "learning_rate": 0.000934642673327362, "loss": 0.7139, "step": 9775 }, { "epoch": 0.4965034077496161, "grad_norm": 0.03458693474732145, "learning_rate": 0.0009345331280344184, "loss": 0.7551, "step": 9780 }, { "epoch": 0.4967572438476476, "grad_norm": 0.03418559950535964, "learning_rate": 0.0009344234974447888, "loss": 0.7104, "step": 9785 }, { "epoch": 0.49701107994567906, "grad_norm": 0.03295554089954359, "learning_rate": 0.0009343137815799931, "loss": 0.7478, "step": 9790 }, { "epoch": 0.4972649160437106, "grad_norm": 0.03355322892719834, "learning_rate": 0.000934203980461568, "loss": 0.7019, "step": 9795 }, { "epoch": 0.49751875214174207, "grad_norm": 0.03691913548220339, "learning_rate": 0.0009340940941110669, "loss": 0.7407, "step": 9800 }, { "epoch": 0.4977725882397736, "grad_norm": 0.03525528539914667, "learning_rate": 0.00093398412255006, "loss": 0.6973, "step": 9805 }, { "epoch": 0.4980264243378051, "grad_norm": 0.03434877187632999, "learning_rate": 0.000933874065800134, "loss": 0.7445, "step": 9810 }, { "epoch": 0.49828026043583656, "grad_norm": 0.03621983904729332, "learning_rate": 0.0009337639238828927, "loss": 0.7115, "step": 9815 }, { "epoch": 0.4985340965338681, "grad_norm": 0.037519845088101744, "learning_rate": 0.0009336536968199562, "loss": 0.71, "step": 9820 }, { "epoch": 0.4987879326318996, "grad_norm": 0.03728341343385713, "learning_rate": 0.0009335433846329618, "loss": 0.6972, "step": 9825 }, { "epoch": 0.49904176872993106, "grad_norm": 0.6512398390529546, "learning_rate": 0.000933432987343563, "loss": 0.741, "step": 9830 }, { "epoch": 0.4992956048279626, "grad_norm": 0.056122143656495385, "learning_rate": 0.0009333225049734303, "loss": 0.721, "step": 9835 }, { "epoch": 0.49954944092599407, "grad_norm": 0.04104631246164432, "learning_rate": 0.0009332119375442509, "loss": 0.7409, "step": 9840 }, { "epoch": 0.4998032770240256, "grad_norm": 0.04408572317115493, "learning_rate": 0.0009331012850777286, "loss": 0.7325, "step": 9845 }, { "epoch": 0.5000571131220571, "grad_norm": 0.0426150750756381, "learning_rate": 0.0009329905475955838, "loss": 0.6958, "step": 9850 }, { "epoch": 0.5003109492200886, "grad_norm": 0.0366567001456073, "learning_rate": 0.0009328797251195539, "loss": 0.7229, "step": 9855 }, { "epoch": 0.5005647853181201, "grad_norm": 0.03613690702789905, "learning_rate": 0.0009327688176713927, "loss": 0.7614, "step": 9860 }, { "epoch": 0.5008186214161516, "grad_norm": 0.03482308726249597, "learning_rate": 0.0009326578252728708, "loss": 0.7163, "step": 9865 }, { "epoch": 0.5010724575141831, "grad_norm": 0.03603968595774972, "learning_rate": 0.0009325467479457754, "loss": 0.7473, "step": 9870 }, { "epoch": 0.5013262936122146, "grad_norm": 0.03608466164125366, "learning_rate": 0.0009324355857119106, "loss": 0.7156, "step": 9875 }, { "epoch": 0.5015801297102461, "grad_norm": 0.03467720132262962, "learning_rate": 0.0009323243385930968, "loss": 0.7132, "step": 9880 }, { "epoch": 0.5018339658082775, "grad_norm": 0.035820346442267004, "learning_rate": 0.0009322130066111713, "loss": 0.7687, "step": 9885 }, { "epoch": 0.5020878019063091, "grad_norm": 0.03881679461619406, "learning_rate": 0.0009321015897879883, "loss": 0.7349, "step": 9890 }, { "epoch": 0.5023416380043406, "grad_norm": 0.03459804260079071, "learning_rate": 0.0009319900881454179, "loss": 0.7645, "step": 9895 }, { "epoch": 0.5025954741023722, "grad_norm": 0.03763513353247618, "learning_rate": 0.0009318785017053475, "loss": 0.7118, "step": 9900 }, { "epoch": 0.5028493102004036, "grad_norm": 0.03561303893653155, "learning_rate": 0.0009317668304896811, "loss": 0.7469, "step": 9905 }, { "epoch": 0.5031031462984351, "grad_norm": 0.037404472748526395, "learning_rate": 0.000931655074520339, "loss": 0.7069, "step": 9910 }, { "epoch": 0.5033569823964666, "grad_norm": 0.0660560319394988, "learning_rate": 0.0009315432338192584, "loss": 0.7734, "step": 9915 }, { "epoch": 0.5036108184944981, "grad_norm": 0.062405696142591636, "learning_rate": 0.0009314313084083933, "loss": 0.7633, "step": 9920 }, { "epoch": 0.5038646545925296, "grad_norm": 0.0563309399560159, "learning_rate": 0.0009313192983097137, "loss": 0.7619, "step": 9925 }, { "epoch": 0.5041184906905611, "grad_norm": 0.0514878182124079, "learning_rate": 0.0009312072035452069, "loss": 0.8308, "step": 9930 }, { "epoch": 0.5043723267885926, "grad_norm": 0.05382718406172399, "learning_rate": 0.0009310950241368765, "loss": 0.8018, "step": 9935 }, { "epoch": 0.5046261628866241, "grad_norm": 0.05611166679378253, "learning_rate": 0.0009309827601067428, "loss": 0.7831, "step": 9940 }, { "epoch": 0.5048799989846556, "grad_norm": 0.04037622146824623, "learning_rate": 0.0009308704114768425, "loss": 0.7924, "step": 9945 }, { "epoch": 0.505133835082687, "grad_norm": 0.042734634816911504, "learning_rate": 0.0009307579782692291, "loss": 0.7508, "step": 9950 }, { "epoch": 0.5053876711807186, "grad_norm": 0.04448106473599028, "learning_rate": 0.0009306454605059729, "loss": 0.7813, "step": 9955 }, { "epoch": 0.5056415072787501, "grad_norm": 0.06147690582739835, "learning_rate": 0.0009305328582091603, "loss": 0.7788, "step": 9960 }, { "epoch": 0.5058953433767817, "grad_norm": 0.042480460429679213, "learning_rate": 0.0009304201714008948, "loss": 0.7643, "step": 9965 }, { "epoch": 0.5061491794748131, "grad_norm": 0.04215033671048825, "learning_rate": 0.0009303074001032961, "loss": 0.7574, "step": 9970 }, { "epoch": 0.5064030155728446, "grad_norm": 0.03770355724870482, "learning_rate": 0.0009301945443385007, "loss": 0.7851, "step": 9975 }, { "epoch": 0.5066568516708762, "grad_norm": 0.03531954013994903, "learning_rate": 0.0009300816041286617, "loss": 0.7609, "step": 9980 }, { "epoch": 0.5069106877689076, "grad_norm": 0.03670090565272101, "learning_rate": 0.0009299685794959485, "loss": 0.7279, "step": 9985 }, { "epoch": 0.5071645238669391, "grad_norm": 0.03595421091476454, "learning_rate": 0.0009298554704625474, "loss": 0.7499, "step": 9990 }, { "epoch": 0.5074183599649706, "grad_norm": 0.03427947226400229, "learning_rate": 0.0009297422770506613, "loss": 0.7457, "step": 9995 }, { "epoch": 0.5076721960630021, "grad_norm": 0.04311023944937397, "learning_rate": 0.0009296289992825091, "loss": 0.7962, "step": 10000 }, { "epoch": 0.5079260321610336, "grad_norm": 0.043697505372513476, "learning_rate": 0.0009295156371803271, "loss": 0.7306, "step": 10005 }, { "epoch": 0.5081798682590651, "grad_norm": 0.035008843450383055, "learning_rate": 0.0009294021907663674, "loss": 0.7502, "step": 10010 }, { "epoch": 0.5084337043570967, "grad_norm": 0.03855808546762251, "learning_rate": 0.0009292886600628991, "loss": 0.7349, "step": 10015 }, { "epoch": 0.5086875404551281, "grad_norm": 0.03337566615041127, "learning_rate": 0.0009291750450922078, "loss": 0.7496, "step": 10020 }, { "epoch": 0.5089413765531596, "grad_norm": 0.033393038452801775, "learning_rate": 0.0009290613458765953, "loss": 0.7274, "step": 10025 }, { "epoch": 0.5091952126511912, "grad_norm": 0.03909749973088013, "learning_rate": 0.0009289475624383804, "loss": 0.7303, "step": 10030 }, { "epoch": 0.5094490487492226, "grad_norm": 0.03491796374098105, "learning_rate": 0.0009288336947998981, "loss": 0.7377, "step": 10035 }, { "epoch": 0.5097028848472541, "grad_norm": 0.0375810838025445, "learning_rate": 0.0009287197429835002, "loss": 0.747, "step": 10040 }, { "epoch": 0.5099567209452857, "grad_norm": 0.04081939264484194, "learning_rate": 0.0009286057070115545, "loss": 0.7661, "step": 10045 }, { "epoch": 0.5102105570433171, "grad_norm": 0.04269462344977429, "learning_rate": 0.0009284915869064463, "loss": 0.748, "step": 10050 }, { "epoch": 0.5104643931413486, "grad_norm": 0.040394049979938465, "learning_rate": 0.0009283773826905764, "loss": 0.7441, "step": 10055 }, { "epoch": 0.5107182292393802, "grad_norm": 0.03399523859435354, "learning_rate": 0.0009282630943863625, "loss": 0.7351, "step": 10060 }, { "epoch": 0.5109720653374117, "grad_norm": 0.03803616038918416, "learning_rate": 0.0009281487220162388, "loss": 0.7543, "step": 10065 }, { "epoch": 0.5112259014354431, "grad_norm": 0.04027007802110935, "learning_rate": 0.0009280342656026564, "loss": 0.739, "step": 10070 }, { "epoch": 0.5114797375334746, "grad_norm": 0.036111955647564926, "learning_rate": 0.0009279197251680822, "loss": 0.7401, "step": 10075 }, { "epoch": 0.5117335736315062, "grad_norm": 0.050694889490105706, "learning_rate": 0.000927805100735, "loss": 0.7706, "step": 10080 }, { "epoch": 0.5119874097295376, "grad_norm": 0.03668528294463585, "learning_rate": 0.0009276903923259099, "loss": 0.7217, "step": 10085 }, { "epoch": 0.5122412458275691, "grad_norm": 0.039264399412929134, "learning_rate": 0.0009275755999633286, "loss": 0.7547, "step": 10090 }, { "epoch": 0.5124950819256007, "grad_norm": 0.041206037616367666, "learning_rate": 0.0009274607236697895, "loss": 0.7284, "step": 10095 }, { "epoch": 0.5127489180236321, "grad_norm": 0.03968458178798434, "learning_rate": 0.000927345763467842, "loss": 0.7333, "step": 10100 }, { "epoch": 0.5130027541216636, "grad_norm": 0.056267944217352775, "learning_rate": 0.0009272307193800524, "loss": 0.7312, "step": 10105 }, { "epoch": 0.5132565902196952, "grad_norm": 0.03751228760238703, "learning_rate": 0.000927115591429003, "loss": 0.7373, "step": 10110 }, { "epoch": 0.5135104263177267, "grad_norm": 0.03496657749022359, "learning_rate": 0.0009270003796372933, "loss": 0.7298, "step": 10115 }, { "epoch": 0.5137642624157581, "grad_norm": 0.03890904329330306, "learning_rate": 0.0009268850840275382, "loss": 0.7598, "step": 10120 }, { "epoch": 0.5140180985137897, "grad_norm": 0.03537644751180505, "learning_rate": 0.0009267697046223702, "loss": 0.7756, "step": 10125 }, { "epoch": 0.5142719346118212, "grad_norm": 0.03593111142158083, "learning_rate": 0.0009266542414444374, "loss": 0.6868, "step": 10130 }, { "epoch": 0.5145257707098526, "grad_norm": 0.03748755238198075, "learning_rate": 0.0009265386945164049, "loss": 0.7352, "step": 10135 }, { "epoch": 0.5147796068078841, "grad_norm": 0.03632593604591072, "learning_rate": 0.0009264230638609535, "loss": 0.7212, "step": 10140 }, { "epoch": 0.5150334429059157, "grad_norm": 0.03429099178154278, "learning_rate": 0.0009263073495007814, "loss": 0.7515, "step": 10145 }, { "epoch": 0.5152872790039471, "grad_norm": 0.03635394283749715, "learning_rate": 0.0009261915514586026, "loss": 0.7785, "step": 10150 }, { "epoch": 0.5155411151019786, "grad_norm": 0.05129749423718228, "learning_rate": 0.0009260756697571477, "loss": 0.8067, "step": 10155 }, { "epoch": 0.5157949512000102, "grad_norm": 0.05695722067575898, "learning_rate": 0.0009259597044191636, "loss": 0.7669, "step": 10160 }, { "epoch": 0.5160487872980416, "grad_norm": 0.05091189905451843, "learning_rate": 0.0009258436554674137, "loss": 0.7728, "step": 10165 }, { "epoch": 0.5163026233960731, "grad_norm": 0.040727169464617345, "learning_rate": 0.000925727522924678, "loss": 0.7268, "step": 10170 }, { "epoch": 0.5165564594941047, "grad_norm": 0.037361211430518425, "learning_rate": 0.0009256113068137526, "loss": 0.8022, "step": 10175 }, { "epoch": 0.5168102955921362, "grad_norm": 0.03566325499387283, "learning_rate": 0.0009254950071574502, "loss": 0.7546, "step": 10180 }, { "epoch": 0.5170641316901676, "grad_norm": 0.03462725372452473, "learning_rate": 0.0009253786239785999, "loss": 0.7299, "step": 10185 }, { "epoch": 0.5173179677881992, "grad_norm": 0.05208559710298344, "learning_rate": 0.0009252621573000472, "loss": 0.7815, "step": 10190 }, { "epoch": 0.5175718038862307, "grad_norm": 0.04439499036517105, "learning_rate": 0.0009251456071446536, "loss": 0.7173, "step": 10195 }, { "epoch": 0.5178256399842621, "grad_norm": 0.05014217283884164, "learning_rate": 0.0009250289735352975, "loss": 0.774, "step": 10200 }, { "epoch": 0.5180794760822937, "grad_norm": 0.05499684985638828, "learning_rate": 0.0009249122564948736, "loss": 0.7672, "step": 10205 }, { "epoch": 0.5183333121803252, "grad_norm": 0.046189800198077155, "learning_rate": 0.0009247954560462928, "loss": 0.7628, "step": 10210 }, { "epoch": 0.5185871482783566, "grad_norm": 0.03970254870567258, "learning_rate": 0.0009246785722124823, "loss": 0.7525, "step": 10215 }, { "epoch": 0.5188409843763881, "grad_norm": 0.043124584769727144, "learning_rate": 0.0009245616050163861, "loss": 0.7332, "step": 10220 }, { "epoch": 0.5190948204744197, "grad_norm": 0.03695871467906466, "learning_rate": 0.000924444554480964, "loss": 0.766, "step": 10225 }, { "epoch": 0.5193486565724512, "grad_norm": 0.03814642157495956, "learning_rate": 0.0009243274206291926, "loss": 0.7158, "step": 10230 }, { "epoch": 0.5196024926704826, "grad_norm": 0.04443698896413723, "learning_rate": 0.0009242102034840647, "loss": 0.7658, "step": 10235 }, { "epoch": 0.5198563287685142, "grad_norm": 0.035510520243865494, "learning_rate": 0.0009240929030685893, "loss": 0.794, "step": 10240 }, { "epoch": 0.5201101648665457, "grad_norm": 0.040812931537362374, "learning_rate": 0.0009239755194057921, "loss": 0.7766, "step": 10245 }, { "epoch": 0.5203640009645771, "grad_norm": 0.03199814983570617, "learning_rate": 0.0009238580525187146, "loss": 0.6949, "step": 10250 }, { "epoch": 0.5206178370626087, "grad_norm": 0.05608243909913665, "learning_rate": 0.0009237405024304153, "loss": 0.7404, "step": 10255 }, { "epoch": 0.5208716731606402, "grad_norm": 0.047953283069020844, "learning_rate": 0.0009236228691639686, "loss": 0.7902, "step": 10260 }, { "epoch": 0.5211255092586716, "grad_norm": 0.035787025445715825, "learning_rate": 0.0009235051527424652, "loss": 0.713, "step": 10265 }, { "epoch": 0.5213793453567032, "grad_norm": 0.046292052894171716, "learning_rate": 0.0009233873531890123, "loss": 0.7799, "step": 10270 }, { "epoch": 0.5216331814547347, "grad_norm": 0.04008628188565197, "learning_rate": 0.0009232694705267335, "loss": 0.7334, "step": 10275 }, { "epoch": 0.5218870175527662, "grad_norm": 0.04251716953251047, "learning_rate": 0.0009231515047787686, "loss": 0.7844, "step": 10280 }, { "epoch": 0.5221408536507977, "grad_norm": 0.03803555682004818, "learning_rate": 0.0009230334559682734, "loss": 0.7299, "step": 10285 }, { "epoch": 0.5223946897488292, "grad_norm": 0.0533898569633822, "learning_rate": 0.0009229153241184204, "loss": 0.7438, "step": 10290 }, { "epoch": 0.5226485258468607, "grad_norm": 0.0385382747369589, "learning_rate": 0.0009227971092523983, "loss": 0.7549, "step": 10295 }, { "epoch": 0.5229023619448921, "grad_norm": 0.0380725936667189, "learning_rate": 0.0009226788113934123, "loss": 0.7284, "step": 10300 }, { "epoch": 0.5231561980429237, "grad_norm": 0.03310921229423258, "learning_rate": 0.0009225604305646835, "loss": 0.7406, "step": 10305 }, { "epoch": 0.5234100341409552, "grad_norm": 0.033376507883610296, "learning_rate": 0.0009224419667894495, "loss": 0.7133, "step": 10310 }, { "epoch": 0.5236638702389866, "grad_norm": 0.035357948683628015, "learning_rate": 0.000922323420090964, "loss": 0.7319, "step": 10315 }, { "epoch": 0.5239177063370182, "grad_norm": 0.03301572995229905, "learning_rate": 0.0009222047904924975, "loss": 0.6991, "step": 10320 }, { "epoch": 0.5241715424350497, "grad_norm": 0.03497446617244138, "learning_rate": 0.000922086078017336, "loss": 0.7243, "step": 10325 }, { "epoch": 0.5244253785330812, "grad_norm": 0.0367280321867461, "learning_rate": 0.0009219672826887824, "loss": 0.7004, "step": 10330 }, { "epoch": 0.5246792146311127, "grad_norm": 0.03442735097417453, "learning_rate": 0.0009218484045301554, "loss": 0.7289, "step": 10335 }, { "epoch": 0.5249330507291442, "grad_norm": 0.0374239806964646, "learning_rate": 0.0009217294435647905, "loss": 0.7602, "step": 10340 }, { "epoch": 0.5251868868271757, "grad_norm": 0.04350508382851257, "learning_rate": 0.0009216103998160389, "loss": 0.7337, "step": 10345 }, { "epoch": 0.5254407229252072, "grad_norm": 0.03918882694541974, "learning_rate": 0.0009214912733072685, "loss": 0.7268, "step": 10350 }, { "epoch": 0.5256945590232387, "grad_norm": 0.042966253776470516, "learning_rate": 0.0009213720640618631, "loss": 0.7815, "step": 10355 }, { "epoch": 0.5259483951212702, "grad_norm": 0.03798046146210112, "learning_rate": 0.0009212527721032226, "loss": 0.7542, "step": 10360 }, { "epoch": 0.5262022312193017, "grad_norm": 0.03643727441098012, "learning_rate": 0.000921133397454764, "loss": 0.7037, "step": 10365 }, { "epoch": 0.5264560673173332, "grad_norm": 0.032106630942637875, "learning_rate": 0.0009210139401399197, "loss": 0.7547, "step": 10370 }, { "epoch": 0.5267099034153647, "grad_norm": 0.03728160589107629, "learning_rate": 0.0009208944001821384, "loss": 0.7203, "step": 10375 }, { "epoch": 0.5269637395133961, "grad_norm": 0.03766350757955131, "learning_rate": 0.0009207747776048855, "loss": 0.7587, "step": 10380 }, { "epoch": 0.5272175756114277, "grad_norm": 0.03486739393710542, "learning_rate": 0.000920655072431642, "loss": 0.7108, "step": 10385 }, { "epoch": 0.5274714117094592, "grad_norm": 0.034879765461550485, "learning_rate": 0.0009205352846859056, "loss": 0.732, "step": 10390 }, { "epoch": 0.5277252478074907, "grad_norm": 0.03632559594561831, "learning_rate": 0.0009204154143911903, "loss": 0.7443, "step": 10395 }, { "epoch": 0.5279790839055222, "grad_norm": 0.03416968713925031, "learning_rate": 0.0009202954615710256, "loss": 0.7097, "step": 10400 }, { "epoch": 0.5282329200035537, "grad_norm": 0.0422966120383042, "learning_rate": 0.0009201754262489575, "loss": 0.7522, "step": 10405 }, { "epoch": 0.5284867561015852, "grad_norm": 0.04209162953501601, "learning_rate": 0.0009200553084485491, "loss": 0.7469, "step": 10410 }, { "epoch": 0.5287405921996167, "grad_norm": 0.03358529965263969, "learning_rate": 0.0009199351081933781, "loss": 0.7069, "step": 10415 }, { "epoch": 0.5289944282976482, "grad_norm": 0.039061627946438006, "learning_rate": 0.0009198148255070398, "loss": 0.6982, "step": 10420 }, { "epoch": 0.5292482643956797, "grad_norm": 0.038792967773250205, "learning_rate": 0.0009196944604131448, "loss": 0.7114, "step": 10425 }, { "epoch": 0.5295021004937112, "grad_norm": 0.037852379292268395, "learning_rate": 0.0009195740129353202, "loss": 0.7009, "step": 10430 }, { "epoch": 0.5297559365917427, "grad_norm": 0.03389491795845682, "learning_rate": 0.0009194534830972092, "loss": 0.7537, "step": 10435 }, { "epoch": 0.5300097726897742, "grad_norm": 0.038007572799232225, "learning_rate": 0.0009193328709224714, "loss": 0.7251, "step": 10440 }, { "epoch": 0.5302636087878058, "grad_norm": 0.0352226574217384, "learning_rate": 0.0009192121764347822, "loss": 0.7095, "step": 10445 }, { "epoch": 0.5305174448858372, "grad_norm": 0.04116691719100545, "learning_rate": 0.0009190913996578334, "loss": 0.7287, "step": 10450 }, { "epoch": 0.5307712809838687, "grad_norm": 0.043473760998452696, "learning_rate": 0.000918970540615333, "loss": 0.7252, "step": 10455 }, { "epoch": 0.5310251170819003, "grad_norm": 0.03764355600108254, "learning_rate": 0.0009188495993310046, "loss": 0.7514, "step": 10460 }, { "epoch": 0.5312789531799317, "grad_norm": 0.03635808549747912, "learning_rate": 0.0009187285758285889, "loss": 0.7095, "step": 10465 }, { "epoch": 0.5315327892779632, "grad_norm": 0.03610853059951015, "learning_rate": 0.0009186074701318419, "loss": 0.702, "step": 10470 }, { "epoch": 0.5317866253759947, "grad_norm": 0.034962430686057616, "learning_rate": 0.0009184862822645359, "loss": 0.7416, "step": 10475 }, { "epoch": 0.5320404614740262, "grad_norm": 0.039910360369028564, "learning_rate": 0.0009183650122504598, "loss": 0.7449, "step": 10480 }, { "epoch": 0.5322942975720577, "grad_norm": 0.03566367198926247, "learning_rate": 0.0009182436601134184, "loss": 0.7142, "step": 10485 }, { "epoch": 0.5325481336700892, "grad_norm": 0.03276862929379303, "learning_rate": 0.0009181222258772319, "loss": 0.7164, "step": 10490 }, { "epoch": 0.5328019697681208, "grad_norm": 0.03320785192567572, "learning_rate": 0.0009180007095657379, "loss": 0.7027, "step": 10495 }, { "epoch": 0.5330558058661522, "grad_norm": 0.031468117088315296, "learning_rate": 0.0009178791112027891, "loss": 0.7142, "step": 10500 }, { "epoch": 0.5333096419641837, "grad_norm": 0.038279574945412745, "learning_rate": 0.0009177574308122547, "loss": 0.79, "step": 10505 }, { "epoch": 0.5335634780622153, "grad_norm": 0.035326107412672614, "learning_rate": 0.00091763566841802, "loss": 0.7482, "step": 10510 }, { "epoch": 0.5338173141602467, "grad_norm": 0.032587576541738085, "learning_rate": 0.0009175138240439864, "loss": 0.6957, "step": 10515 }, { "epoch": 0.5340711502582782, "grad_norm": 0.03482324428201422, "learning_rate": 0.0009173918977140713, "loss": 0.6801, "step": 10520 }, { "epoch": 0.5343249863563098, "grad_norm": 0.03684435488251196, "learning_rate": 0.0009172698894522082, "loss": 0.7228, "step": 10525 }, { "epoch": 0.5345788224543412, "grad_norm": 0.0377856637150958, "learning_rate": 0.0009171477992823467, "loss": 0.7447, "step": 10530 }, { "epoch": 0.5348326585523727, "grad_norm": 0.03848120594000335, "learning_rate": 0.0009170256272284525, "loss": 0.6925, "step": 10535 }, { "epoch": 0.5350864946504043, "grad_norm": 0.062322900891558126, "learning_rate": 0.0009169033733145074, "loss": 0.7254, "step": 10540 }, { "epoch": 0.5353403307484358, "grad_norm": 0.04148689743652893, "learning_rate": 0.0009167810375645091, "loss": 0.7547, "step": 10545 }, { "epoch": 0.5355941668464672, "grad_norm": 0.03605948490854419, "learning_rate": 0.0009166586200024717, "loss": 0.7007, "step": 10550 }, { "epoch": 0.5358480029444987, "grad_norm": 0.03804346335563033, "learning_rate": 0.000916536120652425, "loss": 0.736, "step": 10555 }, { "epoch": 0.5361018390425303, "grad_norm": 0.09246169339417708, "learning_rate": 0.0009164135395384151, "loss": 0.721, "step": 10560 }, { "epoch": 0.5363556751405617, "grad_norm": 0.03680821584524964, "learning_rate": 0.0009162908766845041, "loss": 0.7014, "step": 10565 }, { "epoch": 0.5366095112385932, "grad_norm": 0.042204421982086945, "learning_rate": 0.00091616813211477, "loss": 0.6807, "step": 10570 }, { "epoch": 0.5368633473366248, "grad_norm": 0.040437145778612446, "learning_rate": 0.0009160453058533071, "loss": 0.7447, "step": 10575 }, { "epoch": 0.5371171834346562, "grad_norm": 0.037372953125507986, "learning_rate": 0.0009159223979242253, "loss": 0.7467, "step": 10580 }, { "epoch": 0.5373710195326877, "grad_norm": 0.04002903761693803, "learning_rate": 0.0009157994083516511, "loss": 0.7856, "step": 10585 }, { "epoch": 0.5376248556307193, "grad_norm": 0.03403435579098075, "learning_rate": 0.0009156763371597266, "loss": 0.7337, "step": 10590 }, { "epoch": 0.5378786917287507, "grad_norm": 0.03856691446627195, "learning_rate": 0.0009155531843726101, "loss": 0.7324, "step": 10595 }, { "epoch": 0.5381325278267822, "grad_norm": 0.04571291575047048, "learning_rate": 0.0009154299500144758, "loss": 0.7348, "step": 10600 }, { "epoch": 0.5383863639248138, "grad_norm": 0.10055337339343262, "learning_rate": 0.0009153066341095142, "loss": 0.7756, "step": 10605 }, { "epoch": 0.5386402000228453, "grad_norm": 0.03926634626263224, "learning_rate": 0.0009151832366819314, "loss": 0.7441, "step": 10610 }, { "epoch": 0.5388940361208767, "grad_norm": 0.039160873090234305, "learning_rate": 0.0009150597577559496, "loss": 0.8006, "step": 10615 }, { "epoch": 0.5391478722189083, "grad_norm": 0.04042996020160884, "learning_rate": 0.0009149361973558075, "loss": 0.6958, "step": 10620 }, { "epoch": 0.5394017083169398, "grad_norm": 0.035886541047967485, "learning_rate": 0.000914812555505759, "loss": 0.7149, "step": 10625 }, { "epoch": 0.5396555444149712, "grad_norm": 0.03256754929572304, "learning_rate": 0.0009146888322300745, "loss": 0.7196, "step": 10630 }, { "epoch": 0.5399093805130027, "grad_norm": 0.03272956573908359, "learning_rate": 0.0009145650275530404, "loss": 0.7143, "step": 10635 }, { "epoch": 0.5401632166110343, "grad_norm": 0.03264281601198753, "learning_rate": 0.0009144411414989587, "loss": 0.702, "step": 10640 }, { "epoch": 0.5404170527090657, "grad_norm": 0.034229019473154564, "learning_rate": 0.0009143171740921479, "loss": 0.7284, "step": 10645 }, { "epoch": 0.5406708888070972, "grad_norm": 0.037501592055498946, "learning_rate": 0.0009141931253569418, "loss": 0.7358, "step": 10650 }, { "epoch": 0.5409247249051288, "grad_norm": 0.036299076714506155, "learning_rate": 0.000914068995317691, "loss": 0.7472, "step": 10655 }, { "epoch": 0.5411785610031603, "grad_norm": 0.03465771115723038, "learning_rate": 0.0009139447839987613, "loss": 0.7023, "step": 10660 }, { "epoch": 0.5414323971011917, "grad_norm": 0.03468814899511266, "learning_rate": 0.0009138204914245347, "loss": 0.6968, "step": 10665 }, { "epoch": 0.5416862331992233, "grad_norm": 0.03377793847931315, "learning_rate": 0.0009136961176194094, "loss": 0.7316, "step": 10670 }, { "epoch": 0.5419400692972548, "grad_norm": 0.03576295324955629, "learning_rate": 0.0009135716626077994, "loss": 0.7031, "step": 10675 }, { "epoch": 0.5421939053952862, "grad_norm": 0.03519911061498888, "learning_rate": 0.0009134471264141345, "loss": 0.7329, "step": 10680 }, { "epoch": 0.5424477414933178, "grad_norm": 0.035157368019404577, "learning_rate": 0.0009133225090628605, "loss": 0.7358, "step": 10685 }, { "epoch": 0.5427015775913493, "grad_norm": 0.03913198001516238, "learning_rate": 0.0009131978105784394, "loss": 0.7662, "step": 10690 }, { "epoch": 0.5429554136893807, "grad_norm": 0.037307273404429846, "learning_rate": 0.0009130730309853483, "loss": 0.7049, "step": 10695 }, { "epoch": 0.5432092497874123, "grad_norm": 0.037331620905052786, "learning_rate": 0.0009129481703080816, "loss": 0.7587, "step": 10700 }, { "epoch": 0.5434630858854438, "grad_norm": 0.039172959458168566, "learning_rate": 0.0009128232285711482, "loss": 0.7303, "step": 10705 }, { "epoch": 0.5437169219834753, "grad_norm": 0.040707811119023725, "learning_rate": 0.0009126982057990738, "loss": 0.7382, "step": 10710 }, { "epoch": 0.5439707580815067, "grad_norm": 0.04379207113099689, "learning_rate": 0.0009125731020163998, "loss": 0.6892, "step": 10715 }, { "epoch": 0.5442245941795383, "grad_norm": 0.0337190671674599, "learning_rate": 0.0009124479172476833, "loss": 0.7186, "step": 10720 }, { "epoch": 0.5444784302775698, "grad_norm": 0.03786871470376239, "learning_rate": 0.0009123226515174976, "loss": 0.6818, "step": 10725 }, { "epoch": 0.5447322663756012, "grad_norm": 0.03573360698653606, "learning_rate": 0.0009121973048504316, "loss": 0.742, "step": 10730 }, { "epoch": 0.5449861024736328, "grad_norm": 0.03349076154454063, "learning_rate": 0.0009120718772710903, "loss": 0.7243, "step": 10735 }, { "epoch": 0.5452399385716643, "grad_norm": 0.03466572992285185, "learning_rate": 0.0009119463688040945, "loss": 0.7112, "step": 10740 }, { "epoch": 0.5454937746696957, "grad_norm": 0.03033847588515325, "learning_rate": 0.0009118207794740809, "loss": 0.6877, "step": 10745 }, { "epoch": 0.5457476107677273, "grad_norm": 0.03336423897729239, "learning_rate": 0.000911695109305702, "loss": 0.7139, "step": 10750 }, { "epoch": 0.5460014468657588, "grad_norm": 0.03364043289085448, "learning_rate": 0.0009115693583236263, "loss": 0.7254, "step": 10755 }, { "epoch": 0.5462552829637903, "grad_norm": 0.036788238427299245, "learning_rate": 0.0009114435265525381, "loss": 0.6917, "step": 10760 }, { "epoch": 0.5465091190618218, "grad_norm": 0.0342284930737131, "learning_rate": 0.0009113176140171373, "loss": 0.7417, "step": 10765 }, { "epoch": 0.5467629551598533, "grad_norm": 0.035269077172184804, "learning_rate": 0.0009111916207421402, "loss": 0.7237, "step": 10770 }, { "epoch": 0.5470167912578848, "grad_norm": 0.04040023568667746, "learning_rate": 0.0009110655467522786, "loss": 0.7489, "step": 10775 }, { "epoch": 0.5472706273559163, "grad_norm": 0.03302390213165727, "learning_rate": 0.0009109393920723001, "loss": 0.7001, "step": 10780 }, { "epoch": 0.5475244634539478, "grad_norm": 0.03330559833593343, "learning_rate": 0.0009108131567269684, "loss": 0.7193, "step": 10785 }, { "epoch": 0.5477782995519793, "grad_norm": 0.037068870552010635, "learning_rate": 0.0009106868407410627, "loss": 0.7413, "step": 10790 }, { "epoch": 0.5480321356500107, "grad_norm": 0.03348583636866603, "learning_rate": 0.0009105604441393782, "loss": 0.7443, "step": 10795 }, { "epoch": 0.5482859717480423, "grad_norm": 0.042660125387794706, "learning_rate": 0.0009104339669467261, "loss": 0.7081, "step": 10800 }, { "epoch": 0.5485398078460738, "grad_norm": 0.03502634325832925, "learning_rate": 0.0009103074091879331, "loss": 0.7428, "step": 10805 }, { "epoch": 0.5487936439441052, "grad_norm": 0.03800223908309727, "learning_rate": 0.0009101807708878418, "loss": 0.7235, "step": 10810 }, { "epoch": 0.5490474800421368, "grad_norm": 0.034721513247652616, "learning_rate": 0.0009100540520713108, "loss": 0.72, "step": 10815 }, { "epoch": 0.5493013161401683, "grad_norm": 0.0407840766617128, "learning_rate": 0.0009099272527632142, "loss": 0.7237, "step": 10820 }, { "epoch": 0.5495551522381998, "grad_norm": 0.03707527432883739, "learning_rate": 0.0009098003729884423, "loss": 0.7211, "step": 10825 }, { "epoch": 0.5498089883362313, "grad_norm": 0.04927622635025202, "learning_rate": 0.0009096734127719007, "loss": 0.7141, "step": 10830 }, { "epoch": 0.5500628244342628, "grad_norm": 0.03515764252380142, "learning_rate": 0.0009095463721385113, "loss": 0.7296, "step": 10835 }, { "epoch": 0.5503166605322943, "grad_norm": 0.04953150387586727, "learning_rate": 0.0009094192511132116, "loss": 0.715, "step": 10840 }, { "epoch": 0.5505704966303258, "grad_norm": 0.03970125231079747, "learning_rate": 0.0009092920497209545, "loss": 0.706, "step": 10845 }, { "epoch": 0.5508243327283573, "grad_norm": 0.07599017119312387, "learning_rate": 0.0009091647679867092, "loss": 0.7345, "step": 10850 }, { "epoch": 0.5510781688263888, "grad_norm": 0.08976349742424448, "learning_rate": 0.0009090374059354605, "loss": 0.7858, "step": 10855 }, { "epoch": 0.5513320049244202, "grad_norm": 0.14264195623155482, "learning_rate": 0.0009089099635922089, "loss": 0.8207, "step": 10860 }, { "epoch": 0.5515858410224518, "grad_norm": 0.06484385166374915, "learning_rate": 0.0009087824409819706, "loss": 0.7949, "step": 10865 }, { "epoch": 0.5518396771204833, "grad_norm": 0.04481853015962915, "learning_rate": 0.0009086548381297778, "loss": 0.8211, "step": 10870 }, { "epoch": 0.5520935132185149, "grad_norm": 0.04333300088743705, "learning_rate": 0.0009085271550606782, "loss": 0.763, "step": 10875 }, { "epoch": 0.5523473493165463, "grad_norm": 0.041320945473904296, "learning_rate": 0.0009083993917997354, "loss": 0.7216, "step": 10880 }, { "epoch": 0.5526011854145778, "grad_norm": 0.04849825702330029, "learning_rate": 0.0009082715483720287, "loss": 0.7211, "step": 10885 }, { "epoch": 0.5528550215126093, "grad_norm": 0.03446435581655918, "learning_rate": 0.000908143624802653, "loss": 0.7882, "step": 10890 }, { "epoch": 0.5531088576106408, "grad_norm": 0.04067135614622352, "learning_rate": 0.0009080156211167192, "loss": 0.7051, "step": 10895 }, { "epoch": 0.5533626937086723, "grad_norm": 0.040213862829815504, "learning_rate": 0.0009078875373393538, "loss": 0.7647, "step": 10900 }, { "epoch": 0.5536165298067038, "grad_norm": 0.044625308130710604, "learning_rate": 0.0009077593734956988, "loss": 0.7407, "step": 10905 }, { "epoch": 0.5538703659047353, "grad_norm": 0.04112290174595482, "learning_rate": 0.0009076311296109125, "loss": 0.7896, "step": 10910 }, { "epoch": 0.5541242020027668, "grad_norm": 0.0355088918786764, "learning_rate": 0.0009075028057101682, "loss": 0.7392, "step": 10915 }, { "epoch": 0.5543780381007983, "grad_norm": 0.03559312892920717, "learning_rate": 0.0009073744018186554, "loss": 0.7423, "step": 10920 }, { "epoch": 0.5546318741988299, "grad_norm": 0.034391056694087804, "learning_rate": 0.0009072459179615789, "loss": 0.8049, "step": 10925 }, { "epoch": 0.5548857102968613, "grad_norm": 0.0372937041263195, "learning_rate": 0.0009071173541641598, "loss": 0.7024, "step": 10930 }, { "epoch": 0.5551395463948928, "grad_norm": 0.035148893827114204, "learning_rate": 0.0009069887104516344, "loss": 0.7501, "step": 10935 }, { "epoch": 0.5553933824929244, "grad_norm": 0.03615029924789864, "learning_rate": 0.0009068599868492549, "loss": 0.7342, "step": 10940 }, { "epoch": 0.5556472185909558, "grad_norm": 0.03948263716967377, "learning_rate": 0.0009067311833822887, "loss": 0.7502, "step": 10945 }, { "epoch": 0.5559010546889873, "grad_norm": 0.034158939690651015, "learning_rate": 0.0009066023000760198, "loss": 0.719, "step": 10950 }, { "epoch": 0.5561548907870189, "grad_norm": 0.0340666404317092, "learning_rate": 0.0009064733369557469, "loss": 0.7026, "step": 10955 }, { "epoch": 0.5564087268850503, "grad_norm": 0.03948750773905488, "learning_rate": 0.0009063442940467852, "loss": 0.728, "step": 10960 }, { "epoch": 0.5566625629830818, "grad_norm": 0.03436625710799207, "learning_rate": 0.0009062151713744649, "loss": 0.6965, "step": 10965 }, { "epoch": 0.5569163990811133, "grad_norm": 0.03448538307217493, "learning_rate": 0.0009060859689641323, "loss": 0.7512, "step": 10970 }, { "epoch": 0.5571702351791449, "grad_norm": 0.03159484338881019, "learning_rate": 0.0009059566868411492, "loss": 0.737, "step": 10975 }, { "epoch": 0.5574240712771763, "grad_norm": 0.03220625725553096, "learning_rate": 0.0009058273250308929, "loss": 0.7596, "step": 10980 }, { "epoch": 0.5576779073752078, "grad_norm": 0.03619855975481962, "learning_rate": 0.0009056978835587566, "loss": 0.7549, "step": 10985 }, { "epoch": 0.5579317434732394, "grad_norm": 0.031575206416220476, "learning_rate": 0.0009055683624501489, "loss": 0.7345, "step": 10990 }, { "epoch": 0.5581855795712708, "grad_norm": 0.03409608391049796, "learning_rate": 0.0009054387617304945, "loss": 0.7169, "step": 10995 }, { "epoch": 0.5584394156693023, "grad_norm": 0.03368295381640171, "learning_rate": 0.0009053090814252327, "loss": 0.7109, "step": 11000 }, { "epoch": 0.5586932517673339, "grad_norm": 0.03245432954328365, "learning_rate": 0.0009051793215598197, "loss": 0.7216, "step": 11005 }, { "epoch": 0.5589470878653653, "grad_norm": 0.03191551354434989, "learning_rate": 0.0009050494821597264, "loss": 0.7492, "step": 11010 }, { "epoch": 0.5592009239633968, "grad_norm": 0.033639532669209285, "learning_rate": 0.0009049195632504399, "loss": 0.7482, "step": 11015 }, { "epoch": 0.5594547600614284, "grad_norm": 0.035774288769859705, "learning_rate": 0.0009047895648574623, "loss": 0.7443, "step": 11020 }, { "epoch": 0.5597085961594598, "grad_norm": 0.03496111194453094, "learning_rate": 0.0009046594870063118, "loss": 0.7198, "step": 11025 }, { "epoch": 0.5599624322574913, "grad_norm": 0.03381488215464521, "learning_rate": 0.0009045293297225221, "loss": 0.6885, "step": 11030 }, { "epoch": 0.5602162683555229, "grad_norm": 0.03629614467339623, "learning_rate": 0.0009043990930316424, "loss": 0.7231, "step": 11035 }, { "epoch": 0.5604701044535544, "grad_norm": 0.03412953521748129, "learning_rate": 0.0009042687769592375, "loss": 0.7413, "step": 11040 }, { "epoch": 0.5607239405515858, "grad_norm": 0.033139772183159395, "learning_rate": 0.0009041383815308877, "loss": 0.7135, "step": 11045 }, { "epoch": 0.5609777766496173, "grad_norm": 0.03128812189744942, "learning_rate": 0.0009040079067721889, "loss": 0.6974, "step": 11050 }, { "epoch": 0.5612316127476489, "grad_norm": 0.03517651196222228, "learning_rate": 0.0009038773527087529, "loss": 0.6883, "step": 11055 }, { "epoch": 0.5614854488456803, "grad_norm": 0.04318105068415238, "learning_rate": 0.0009037467193662068, "loss": 0.7197, "step": 11060 }, { "epoch": 0.5617392849437118, "grad_norm": 0.039320371372748346, "learning_rate": 0.0009036160067701931, "loss": 0.6981, "step": 11065 }, { "epoch": 0.5619931210417434, "grad_norm": 0.033604356846894654, "learning_rate": 0.00090348521494637, "loss": 0.7537, "step": 11070 }, { "epoch": 0.5622469571397748, "grad_norm": 0.03203855164992912, "learning_rate": 0.0009033543439204114, "loss": 0.7243, "step": 11075 }, { "epoch": 0.5625007932378063, "grad_norm": 0.033160389142322846, "learning_rate": 0.0009032233937180067, "loss": 0.7136, "step": 11080 }, { "epoch": 0.5627546293358379, "grad_norm": 0.033784592965270435, "learning_rate": 0.0009030923643648607, "loss": 0.7142, "step": 11085 }, { "epoch": 0.5630084654338694, "grad_norm": 0.031310977781935, "learning_rate": 0.0009029612558866938, "loss": 0.6982, "step": 11090 }, { "epoch": 0.5632623015319008, "grad_norm": 0.03000718813519413, "learning_rate": 0.0009028300683092418, "loss": 0.7095, "step": 11095 }, { "epoch": 0.5635161376299324, "grad_norm": 0.030900059006542264, "learning_rate": 0.0009026988016582564, "loss": 0.7392, "step": 11100 }, { "epoch": 0.5637699737279639, "grad_norm": 0.030686185796535138, "learning_rate": 0.0009025674559595045, "loss": 0.7397, "step": 11105 }, { "epoch": 0.5640238098259953, "grad_norm": 0.03290532594461259, "learning_rate": 0.0009024360312387687, "loss": 0.7231, "step": 11110 }, { "epoch": 0.5642776459240268, "grad_norm": 0.0333214667151388, "learning_rate": 0.0009023045275218467, "loss": 0.7055, "step": 11115 }, { "epoch": 0.5645314820220584, "grad_norm": 0.034399721663477746, "learning_rate": 0.0009021729448345524, "loss": 0.7231, "step": 11120 }, { "epoch": 0.5647853181200898, "grad_norm": 0.0398413093906201, "learning_rate": 0.0009020412832027146, "loss": 0.7195, "step": 11125 }, { "epoch": 0.5650391542181213, "grad_norm": 0.03135209419910256, "learning_rate": 0.0009019095426521779, "loss": 0.7171, "step": 11130 }, { "epoch": 0.5652929903161529, "grad_norm": 0.032887361498149796, "learning_rate": 0.0009017777232088023, "loss": 0.7407, "step": 11135 }, { "epoch": 0.5655468264141844, "grad_norm": 0.032256225783887045, "learning_rate": 0.0009016458248984632, "loss": 0.7743, "step": 11140 }, { "epoch": 0.5658006625122158, "grad_norm": 0.03322035043253819, "learning_rate": 0.0009015138477470516, "loss": 0.7183, "step": 11145 }, { "epoch": 0.5660544986102474, "grad_norm": 0.032144170238580655, "learning_rate": 0.0009013817917804743, "loss": 0.7417, "step": 11150 }, { "epoch": 0.5663083347082789, "grad_norm": 0.032374828928639944, "learning_rate": 0.0009012496570246529, "loss": 0.7367, "step": 11155 }, { "epoch": 0.5665621708063103, "grad_norm": 0.029763369762551974, "learning_rate": 0.0009011174435055247, "loss": 0.6813, "step": 11160 }, { "epoch": 0.5668160069043419, "grad_norm": 0.031175217510649757, "learning_rate": 0.0009009851512490428, "loss": 0.7098, "step": 11165 }, { "epoch": 0.5670698430023734, "grad_norm": 0.03221470488708611, "learning_rate": 0.0009008527802811754, "loss": 0.6963, "step": 11170 }, { "epoch": 0.5673236791004048, "grad_norm": 0.03578699245771891, "learning_rate": 0.0009007203306279064, "loss": 0.7253, "step": 11175 }, { "epoch": 0.5675775151984364, "grad_norm": 0.03327146626826266, "learning_rate": 0.0009005878023152348, "loss": 0.7235, "step": 11180 }, { "epoch": 0.5678313512964679, "grad_norm": 0.03395503867865811, "learning_rate": 0.0009004551953691754, "loss": 0.7258, "step": 11185 }, { "epoch": 0.5680851873944994, "grad_norm": 0.03732510021778575, "learning_rate": 0.000900322509815758, "loss": 0.7198, "step": 11190 }, { "epoch": 0.5683390234925308, "grad_norm": 0.03563520162684654, "learning_rate": 0.0009001897456810286, "loss": 0.7232, "step": 11195 }, { "epoch": 0.5685928595905624, "grad_norm": 0.03134477322381021, "learning_rate": 0.0009000569029910477, "loss": 0.7569, "step": 11200 }, { "epoch": 0.5688466956885939, "grad_norm": 0.053746607667234055, "learning_rate": 0.0008999239817718918, "loss": 0.7336, "step": 11205 }, { "epoch": 0.5691005317866253, "grad_norm": 0.03655234039908247, "learning_rate": 0.0008997909820496528, "loss": 0.723, "step": 11210 }, { "epoch": 0.5693543678846569, "grad_norm": 0.03136033997205398, "learning_rate": 0.0008996579038504376, "loss": 0.7228, "step": 11215 }, { "epoch": 0.5696082039826884, "grad_norm": 0.035456500828193745, "learning_rate": 0.0008995247472003691, "loss": 0.7376, "step": 11220 }, { "epoch": 0.5698620400807198, "grad_norm": 0.03802644847369904, "learning_rate": 0.0008993915121255852, "loss": 0.7507, "step": 11225 }, { "epoch": 0.5701158761787514, "grad_norm": 0.03483813902231033, "learning_rate": 0.0008992581986522392, "loss": 0.7229, "step": 11230 }, { "epoch": 0.5703697122767829, "grad_norm": 0.03643633007050667, "learning_rate": 0.0008991248068064999, "loss": 0.6978, "step": 11235 }, { "epoch": 0.5706235483748144, "grad_norm": 0.030969706546308384, "learning_rate": 0.0008989913366145515, "loss": 0.698, "step": 11240 }, { "epoch": 0.5708773844728459, "grad_norm": 0.033131510784830544, "learning_rate": 0.0008988577881025935, "loss": 0.7179, "step": 11245 }, { "epoch": 0.5711312205708774, "grad_norm": 0.03554738763952332, "learning_rate": 0.0008987241612968406, "loss": 0.7393, "step": 11250 }, { "epoch": 0.5713850566689089, "grad_norm": 0.034447386700794586, "learning_rate": 0.0008985904562235234, "loss": 0.7169, "step": 11255 }, { "epoch": 0.5716388927669404, "grad_norm": 0.03751999950367249, "learning_rate": 0.0008984566729088874, "loss": 0.7013, "step": 11260 }, { "epoch": 0.5718927288649719, "grad_norm": 0.03231371442094389, "learning_rate": 0.0008983228113791937, "loss": 0.7343, "step": 11265 }, { "epoch": 0.5721465649630034, "grad_norm": 0.034946147171277434, "learning_rate": 0.0008981888716607184, "loss": 0.7105, "step": 11270 }, { "epoch": 0.5724004010610348, "grad_norm": 0.03513798457157326, "learning_rate": 0.0008980548537797535, "loss": 0.7134, "step": 11275 }, { "epoch": 0.5726542371590664, "grad_norm": 0.03469097895898645, "learning_rate": 0.0008979207577626058, "loss": 0.7663, "step": 11280 }, { "epoch": 0.5729080732570979, "grad_norm": 0.03185349439698724, "learning_rate": 0.0008977865836355979, "loss": 0.6968, "step": 11285 }, { "epoch": 0.5731619093551293, "grad_norm": 0.035042241245465046, "learning_rate": 0.0008976523314250672, "loss": 0.7146, "step": 11290 }, { "epoch": 0.5734157454531609, "grad_norm": 0.04459176279651248, "learning_rate": 0.0008975180011573669, "loss": 0.7019, "step": 11295 }, { "epoch": 0.5736695815511924, "grad_norm": 0.04130301497761747, "learning_rate": 0.0008973835928588656, "loss": 0.7122, "step": 11300 }, { "epoch": 0.5739234176492239, "grad_norm": 0.03383281613908766, "learning_rate": 0.0008972491065559467, "loss": 0.6867, "step": 11305 }, { "epoch": 0.5741772537472554, "grad_norm": 0.03296718445027936, "learning_rate": 0.0008971145422750094, "loss": 0.6578, "step": 11310 }, { "epoch": 0.5744310898452869, "grad_norm": 0.03306071959318663, "learning_rate": 0.0008969799000424676, "loss": 0.7618, "step": 11315 }, { "epoch": 0.5746849259433184, "grad_norm": 0.037882904853234214, "learning_rate": 0.0008968451798847513, "loss": 0.7019, "step": 11320 }, { "epoch": 0.5749387620413499, "grad_norm": 0.03301165795976233, "learning_rate": 0.0008967103818283051, "loss": 0.7322, "step": 11325 }, { "epoch": 0.5751925981393814, "grad_norm": 0.033831060555760954, "learning_rate": 0.0008965755058995896, "loss": 0.7358, "step": 11330 }, { "epoch": 0.5754464342374129, "grad_norm": 0.03551812457951173, "learning_rate": 0.0008964405521250798, "loss": 0.7054, "step": 11335 }, { "epoch": 0.5757002703354444, "grad_norm": 0.03422472541766102, "learning_rate": 0.0008963055205312667, "loss": 0.7149, "step": 11340 }, { "epoch": 0.5759541064334759, "grad_norm": 0.040683630332860744, "learning_rate": 0.0008961704111446564, "loss": 0.7094, "step": 11345 }, { "epoch": 0.5762079425315074, "grad_norm": 0.03587168023221342, "learning_rate": 0.00089603522399177, "loss": 0.6746, "step": 11350 }, { "epoch": 0.576461778629539, "grad_norm": 0.0328497490703075, "learning_rate": 0.0008958999590991441, "loss": 0.73, "step": 11355 }, { "epoch": 0.5767156147275704, "grad_norm": 0.039639078903490256, "learning_rate": 0.0008957646164933307, "loss": 0.7188, "step": 11360 }, { "epoch": 0.5769694508256019, "grad_norm": 0.03382589958924203, "learning_rate": 0.0008956291962008967, "loss": 0.7188, "step": 11365 }, { "epoch": 0.5772232869236334, "grad_norm": 0.036761155915138746, "learning_rate": 0.0008954936982484245, "loss": 0.6869, "step": 11370 }, { "epoch": 0.5774771230216649, "grad_norm": 0.03487880134430476, "learning_rate": 0.0008953581226625116, "loss": 0.7084, "step": 11375 }, { "epoch": 0.5777309591196964, "grad_norm": 0.04249535318746409, "learning_rate": 0.000895222469469771, "loss": 0.78, "step": 11380 }, { "epoch": 0.5779847952177279, "grad_norm": 0.03431845530450295, "learning_rate": 0.0008950867386968305, "loss": 0.6852, "step": 11385 }, { "epoch": 0.5782386313157594, "grad_norm": 0.03183811797316092, "learning_rate": 0.0008949509303703336, "loss": 0.7445, "step": 11390 }, { "epoch": 0.5784924674137909, "grad_norm": 0.033627428288341706, "learning_rate": 0.0008948150445169386, "loss": 0.7319, "step": 11395 }, { "epoch": 0.5787463035118224, "grad_norm": 0.03303488563414222, "learning_rate": 0.0008946790811633193, "loss": 0.6908, "step": 11400 }, { "epoch": 0.579000139609854, "grad_norm": 0.03626276302399094, "learning_rate": 0.0008945430403361647, "loss": 0.7515, "step": 11405 }, { "epoch": 0.5792539757078854, "grad_norm": 0.03072520788015523, "learning_rate": 0.0008944069220621788, "loss": 0.6644, "step": 11410 }, { "epoch": 0.5795078118059169, "grad_norm": 0.0318587004361506, "learning_rate": 0.000894270726368081, "loss": 0.7499, "step": 11415 }, { "epoch": 0.5797616479039485, "grad_norm": 0.03981006394854538, "learning_rate": 0.0008941344532806057, "loss": 0.7193, "step": 11420 }, { "epoch": 0.5800154840019799, "grad_norm": 0.03129119859919348, "learning_rate": 0.000893998102826503, "loss": 0.7354, "step": 11425 }, { "epoch": 0.5802693201000114, "grad_norm": 0.10844846974108842, "learning_rate": 0.0008938616750325375, "loss": 0.6861, "step": 11430 }, { "epoch": 0.580523156198043, "grad_norm": 0.03405416659519219, "learning_rate": 0.0008937251699254893, "loss": 0.7128, "step": 11435 }, { "epoch": 0.5807769922960744, "grad_norm": 0.03521638488754573, "learning_rate": 0.0008935885875321539, "loss": 0.7099, "step": 11440 }, { "epoch": 0.5810308283941059, "grad_norm": 0.03307958989219652, "learning_rate": 0.0008934519278793416, "loss": 0.7277, "step": 11445 }, { "epoch": 0.5812846644921374, "grad_norm": 0.03330598975628334, "learning_rate": 0.0008933151909938778, "loss": 0.6756, "step": 11450 }, { "epoch": 0.581538500590169, "grad_norm": 0.03683982986186212, "learning_rate": 0.0008931783769026036, "loss": 0.7215, "step": 11455 }, { "epoch": 0.5817923366882004, "grad_norm": 0.032106688804273094, "learning_rate": 0.0008930414856323747, "loss": 0.702, "step": 11460 }, { "epoch": 0.5820461727862319, "grad_norm": 0.031655159355476144, "learning_rate": 0.0008929045172100624, "loss": 0.7157, "step": 11465 }, { "epoch": 0.5823000088842635, "grad_norm": 0.03823078973996269, "learning_rate": 0.0008927674716625527, "loss": 0.7363, "step": 11470 }, { "epoch": 0.5825538449822949, "grad_norm": 0.029764104972562724, "learning_rate": 0.0008926303490167471, "loss": 0.7202, "step": 11475 }, { "epoch": 0.5828076810803264, "grad_norm": 0.0329286814745698, "learning_rate": 0.0008924931492995619, "loss": 0.7242, "step": 11480 }, { "epoch": 0.583061517178358, "grad_norm": 0.0351806907949456, "learning_rate": 0.000892355872537929, "loss": 0.7133, "step": 11485 }, { "epoch": 0.5833153532763894, "grad_norm": 0.03444304482449456, "learning_rate": 0.0008922185187587949, "loss": 0.7137, "step": 11490 }, { "epoch": 0.5835691893744209, "grad_norm": 0.03588276411210118, "learning_rate": 0.0008920810879891217, "loss": 0.7339, "step": 11495 }, { "epoch": 0.5838230254724525, "grad_norm": 0.03494206490675811, "learning_rate": 0.0008919435802558862, "loss": 0.6978, "step": 11500 }, { "epoch": 0.5840768615704839, "grad_norm": 0.038273809127194164, "learning_rate": 0.0008918059955860803, "loss": 0.7474, "step": 11505 }, { "epoch": 0.5843306976685154, "grad_norm": 0.0337320764854851, "learning_rate": 0.0008916683340067116, "loss": 0.728, "step": 11510 }, { "epoch": 0.584584533766547, "grad_norm": 0.03403558455649166, "learning_rate": 0.0008915305955448021, "loss": 0.7294, "step": 11515 }, { "epoch": 0.5848383698645785, "grad_norm": 0.03363313284704427, "learning_rate": 0.0008913927802273894, "loss": 0.705, "step": 11520 }, { "epoch": 0.5850922059626099, "grad_norm": 0.032020698674226994, "learning_rate": 0.0008912548880815256, "loss": 0.7084, "step": 11525 }, { "epoch": 0.5853460420606414, "grad_norm": 0.038334592522532275, "learning_rate": 0.0008911169191342785, "loss": 0.7091, "step": 11530 }, { "epoch": 0.585599878158673, "grad_norm": 0.03395444788379269, "learning_rate": 0.0008909788734127307, "loss": 0.7646, "step": 11535 }, { "epoch": 0.5858537142567044, "grad_norm": 0.03236337632515959, "learning_rate": 0.00089084075094398, "loss": 0.7415, "step": 11540 }, { "epoch": 0.5861075503547359, "grad_norm": 0.0361176626753951, "learning_rate": 0.0008907025517551388, "loss": 0.7355, "step": 11545 }, { "epoch": 0.5863613864527675, "grad_norm": 0.0329713551925033, "learning_rate": 0.0008905642758733352, "loss": 0.7051, "step": 11550 }, { "epoch": 0.5866152225507989, "grad_norm": 0.030299662958705953, "learning_rate": 0.000890425923325712, "loss": 0.7468, "step": 11555 }, { "epoch": 0.5868690586488304, "grad_norm": 0.035101333220567334, "learning_rate": 0.0008902874941394271, "loss": 0.729, "step": 11560 }, { "epoch": 0.587122894746862, "grad_norm": 0.03502371219367709, "learning_rate": 0.0008901489883416535, "loss": 0.714, "step": 11565 }, { "epoch": 0.5873767308448935, "grad_norm": 0.03796436650483986, "learning_rate": 0.0008900104059595791, "loss": 0.7069, "step": 11570 }, { "epoch": 0.5876305669429249, "grad_norm": 0.03521452229600496, "learning_rate": 0.000889871747020407, "loss": 0.6889, "step": 11575 }, { "epoch": 0.5878844030409565, "grad_norm": 0.03134645872618028, "learning_rate": 0.0008897330115513553, "loss": 0.7505, "step": 11580 }, { "epoch": 0.588138239138988, "grad_norm": 0.03487328507988493, "learning_rate": 0.0008895941995796569, "loss": 0.7323, "step": 11585 }, { "epoch": 0.5883920752370194, "grad_norm": 0.03433719088794085, "learning_rate": 0.0008894553111325601, "loss": 0.7487, "step": 11590 }, { "epoch": 0.588645911335051, "grad_norm": 0.034441578656597346, "learning_rate": 0.0008893163462373279, "loss": 0.7479, "step": 11595 }, { "epoch": 0.5888997474330825, "grad_norm": 0.03399889995340533, "learning_rate": 0.0008891773049212387, "loss": 0.7136, "step": 11600 }, { "epoch": 0.5891535835311139, "grad_norm": 0.029576420809220065, "learning_rate": 0.000889038187211585, "loss": 0.7257, "step": 11605 }, { "epoch": 0.5894074196291454, "grad_norm": 0.0320783430453033, "learning_rate": 0.0008888989931356754, "loss": 0.693, "step": 11610 }, { "epoch": 0.589661255727177, "grad_norm": 0.036210347888960326, "learning_rate": 0.0008887597227208331, "loss": 0.7252, "step": 11615 }, { "epoch": 0.5899150918252085, "grad_norm": 0.03256805442726564, "learning_rate": 0.0008886203759943957, "loss": 0.7249, "step": 11620 }, { "epoch": 0.5901689279232399, "grad_norm": 0.03304556521395128, "learning_rate": 0.0008884809529837167, "loss": 0.6657, "step": 11625 }, { "epoch": 0.5904227640212715, "grad_norm": 0.0321846326585193, "learning_rate": 0.0008883414537161638, "loss": 0.7279, "step": 11630 }, { "epoch": 0.590676600119303, "grad_norm": 0.034360828586935475, "learning_rate": 0.0008882018782191204, "loss": 0.7056, "step": 11635 }, { "epoch": 0.5909304362173344, "grad_norm": 0.03746487800157479, "learning_rate": 0.0008880622265199841, "loss": 0.7125, "step": 11640 }, { "epoch": 0.591184272315366, "grad_norm": 0.20178940923282512, "learning_rate": 0.0008879224986461681, "loss": 0.7701, "step": 11645 }, { "epoch": 0.5914381084133975, "grad_norm": 0.03553027243917922, "learning_rate": 0.0008877826946251002, "loss": 0.7554, "step": 11650 }, { "epoch": 0.5916919445114289, "grad_norm": 0.10817313347002717, "learning_rate": 0.0008876428144842231, "loss": 0.6791, "step": 11655 }, { "epoch": 0.5919457806094605, "grad_norm": 0.03716197708686335, "learning_rate": 0.0008875028582509948, "loss": 0.7037, "step": 11660 }, { "epoch": 0.592199616707492, "grad_norm": 0.034287086731817996, "learning_rate": 0.0008873628259528878, "loss": 0.7217, "step": 11665 }, { "epoch": 0.5924534528055235, "grad_norm": 0.11510406063883934, "learning_rate": 0.0008872227176173899, "loss": 0.7217, "step": 11670 }, { "epoch": 0.592707288903555, "grad_norm": 0.03373043520744403, "learning_rate": 0.0008870825332720036, "loss": 0.7067, "step": 11675 }, { "epoch": 0.5929611250015865, "grad_norm": 0.035054942678273866, "learning_rate": 0.0008869422729442465, "loss": 0.7084, "step": 11680 }, { "epoch": 0.593214961099618, "grad_norm": 0.03570582782695498, "learning_rate": 0.0008868019366616508, "loss": 0.7812, "step": 11685 }, { "epoch": 0.5934687971976494, "grad_norm": 0.03268150030325234, "learning_rate": 0.0008866615244517639, "loss": 0.6744, "step": 11690 }, { "epoch": 0.593722633295681, "grad_norm": 0.035010477442948856, "learning_rate": 0.000886521036342148, "loss": 0.7428, "step": 11695 }, { "epoch": 0.5939764693937125, "grad_norm": 0.03449024239843642, "learning_rate": 0.0008863804723603803, "loss": 0.7054, "step": 11700 }, { "epoch": 0.5942303054917439, "grad_norm": 0.03515940718680086, "learning_rate": 0.0008862398325340526, "loss": 0.7468, "step": 11705 }, { "epoch": 0.5944841415897755, "grad_norm": 0.03587024740838932, "learning_rate": 0.0008860991168907721, "loss": 0.7349, "step": 11710 }, { "epoch": 0.594737977687807, "grad_norm": 0.04467420951174001, "learning_rate": 0.0008859583254581605, "loss": 0.7721, "step": 11715 }, { "epoch": 0.5949918137858384, "grad_norm": 0.03602703755690845, "learning_rate": 0.0008858174582638543, "loss": 0.7202, "step": 11720 }, { "epoch": 0.59524564988387, "grad_norm": 0.03261527737536671, "learning_rate": 0.0008856765153355051, "loss": 0.7198, "step": 11725 }, { "epoch": 0.5954994859819015, "grad_norm": 0.03697350739040088, "learning_rate": 0.0008855354967007793, "loss": 0.6845, "step": 11730 }, { "epoch": 0.595753322079933, "grad_norm": 0.0515191368792027, "learning_rate": 0.0008853944023873581, "loss": 0.7352, "step": 11735 }, { "epoch": 0.5960071581779645, "grad_norm": 0.04022917723197039, "learning_rate": 0.0008852532324229379, "loss": 0.7607, "step": 11740 }, { "epoch": 0.596260994275996, "grad_norm": 0.03631784489980178, "learning_rate": 0.0008851119868352292, "loss": 0.7582, "step": 11745 }, { "epoch": 0.5965148303740275, "grad_norm": 0.03858924625769324, "learning_rate": 0.000884970665651958, "loss": 0.7822, "step": 11750 }, { "epoch": 0.596768666472059, "grad_norm": 0.03483632657213007, "learning_rate": 0.0008848292689008653, "loss": 0.7709, "step": 11755 }, { "epoch": 0.5970225025700905, "grad_norm": 0.03098400810635178, "learning_rate": 0.0008846877966097059, "loss": 0.7083, "step": 11760 }, { "epoch": 0.597276338668122, "grad_norm": 0.03431747521168104, "learning_rate": 0.0008845462488062506, "loss": 0.699, "step": 11765 }, { "epoch": 0.5975301747661534, "grad_norm": 0.036528093353988884, "learning_rate": 0.0008844046255182844, "loss": 0.7252, "step": 11770 }, { "epoch": 0.597784010864185, "grad_norm": 0.03283304795629409, "learning_rate": 0.0008842629267736072, "loss": 0.7119, "step": 11775 }, { "epoch": 0.5980378469622165, "grad_norm": 0.03150788761592889, "learning_rate": 0.0008841211526000339, "loss": 0.7497, "step": 11780 }, { "epoch": 0.598291683060248, "grad_norm": 0.030327322271371696, "learning_rate": 0.0008839793030253937, "loss": 0.7413, "step": 11785 }, { "epoch": 0.5985455191582795, "grad_norm": 0.03317783348337346, "learning_rate": 0.0008838373780775315, "loss": 0.7284, "step": 11790 }, { "epoch": 0.598799355256311, "grad_norm": 0.0337097411533264, "learning_rate": 0.000883695377784306, "loss": 0.7136, "step": 11795 }, { "epoch": 0.5990531913543425, "grad_norm": 0.03495336080893599, "learning_rate": 0.0008835533021735914, "loss": 0.709, "step": 11800 }, { "epoch": 0.599307027452374, "grad_norm": 0.03752041771088074, "learning_rate": 0.0008834111512732763, "loss": 0.7437, "step": 11805 }, { "epoch": 0.5995608635504055, "grad_norm": 0.03253791941249975, "learning_rate": 0.0008832689251112645, "loss": 0.7495, "step": 11810 }, { "epoch": 0.599814699648437, "grad_norm": 0.03292385981856066, "learning_rate": 0.0008831266237154738, "loss": 0.7646, "step": 11815 }, { "epoch": 0.6000685357464685, "grad_norm": 0.03439504525804222, "learning_rate": 0.0008829842471138376, "loss": 0.7161, "step": 11820 }, { "epoch": 0.6003223718445, "grad_norm": 0.03466985906223154, "learning_rate": 0.0008828417953343035, "loss": 0.6957, "step": 11825 }, { "epoch": 0.6005762079425315, "grad_norm": 0.032150915535208366, "learning_rate": 0.0008826992684048344, "loss": 0.6975, "step": 11830 }, { "epoch": 0.6008300440405631, "grad_norm": 0.03358189321784852, "learning_rate": 0.0008825566663534074, "loss": 0.6943, "step": 11835 }, { "epoch": 0.6010838801385945, "grad_norm": 0.03011447859496179, "learning_rate": 0.0008824139892080145, "loss": 0.7052, "step": 11840 }, { "epoch": 0.601337716236626, "grad_norm": 0.033920162463525395, "learning_rate": 0.0008822712369966628, "loss": 0.7157, "step": 11845 }, { "epoch": 0.6015915523346576, "grad_norm": 0.030182119266014225, "learning_rate": 0.0008821284097473734, "loss": 0.7093, "step": 11850 }, { "epoch": 0.601845388432689, "grad_norm": 0.03683502724416963, "learning_rate": 0.000881985507488183, "loss": 0.7047, "step": 11855 }, { "epoch": 0.6020992245307205, "grad_norm": 0.03406368675605946, "learning_rate": 0.0008818425302471424, "loss": 0.7146, "step": 11860 }, { "epoch": 0.602353060628752, "grad_norm": 0.06044432177804475, "learning_rate": 0.0008816994780523175, "loss": 0.6876, "step": 11865 }, { "epoch": 0.6026068967267835, "grad_norm": 0.03257299876006436, "learning_rate": 0.0008815563509317883, "loss": 0.7009, "step": 11870 }, { "epoch": 0.602860732824815, "grad_norm": 0.03129782635068647, "learning_rate": 0.0008814131489136506, "loss": 0.6831, "step": 11875 }, { "epoch": 0.6031145689228465, "grad_norm": 0.03486799099591141, "learning_rate": 0.0008812698720260135, "loss": 0.7341, "step": 11880 }, { "epoch": 0.6033684050208781, "grad_norm": 0.03169337068971384, "learning_rate": 0.000881126520297002, "loss": 0.6854, "step": 11885 }, { "epoch": 0.6036222411189095, "grad_norm": 0.03227991120930359, "learning_rate": 0.0008809830937547554, "loss": 0.7193, "step": 11890 }, { "epoch": 0.603876077216941, "grad_norm": 0.03404871647813845, "learning_rate": 0.0008808395924274274, "loss": 0.7128, "step": 11895 }, { "epoch": 0.6041299133149726, "grad_norm": 0.038573489213405115, "learning_rate": 0.0008806960163431866, "loss": 0.7199, "step": 11900 }, { "epoch": 0.604383749413004, "grad_norm": 0.030336828596648855, "learning_rate": 0.0008805523655302164, "loss": 0.6992, "step": 11905 }, { "epoch": 0.6046375855110355, "grad_norm": 0.03150338459765487, "learning_rate": 0.0008804086400167146, "loss": 0.7174, "step": 11910 }, { "epoch": 0.6048914216090671, "grad_norm": 0.03648731887813566, "learning_rate": 0.0008802648398308939, "loss": 0.7084, "step": 11915 }, { "epoch": 0.6051452577070985, "grad_norm": 0.031541691492384276, "learning_rate": 0.0008801209650009813, "loss": 0.7483, "step": 11920 }, { "epoch": 0.60539909380513, "grad_norm": 0.028663350481755875, "learning_rate": 0.0008799770155552192, "loss": 0.6485, "step": 11925 }, { "epoch": 0.6056529299031616, "grad_norm": 0.034607675452196285, "learning_rate": 0.0008798329915218638, "loss": 0.7258, "step": 11930 }, { "epoch": 0.605906766001193, "grad_norm": 0.03518071400678741, "learning_rate": 0.0008796888929291864, "loss": 0.7098, "step": 11935 }, { "epoch": 0.6061606020992245, "grad_norm": 0.03361788723206324, "learning_rate": 0.0008795447198054729, "loss": 0.7221, "step": 11940 }, { "epoch": 0.606414438197256, "grad_norm": 0.03504320688662401, "learning_rate": 0.0008794004721790235, "loss": 0.703, "step": 11945 }, { "epoch": 0.6066682742952876, "grad_norm": 0.03206296030277103, "learning_rate": 0.0008792561500781535, "loss": 0.6773, "step": 11950 }, { "epoch": 0.606922110393319, "grad_norm": 0.03605247432466843, "learning_rate": 0.0008791117535311928, "loss": 0.7285, "step": 11955 }, { "epoch": 0.6071759464913505, "grad_norm": 0.03324138683205842, "learning_rate": 0.0008789672825664854, "loss": 0.6661, "step": 11960 }, { "epoch": 0.6074297825893821, "grad_norm": 0.04022055855241283, "learning_rate": 0.0008788227372123902, "loss": 0.7589, "step": 11965 }, { "epoch": 0.6076836186874135, "grad_norm": 0.03888887289363954, "learning_rate": 0.0008786781174972811, "loss": 0.7269, "step": 11970 }, { "epoch": 0.607937454785445, "grad_norm": 0.03453261761524415, "learning_rate": 0.0008785334234495459, "loss": 0.7133, "step": 11975 }, { "epoch": 0.6081912908834766, "grad_norm": 0.03125071533624802, "learning_rate": 0.0008783886550975872, "loss": 0.654, "step": 11980 }, { "epoch": 0.608445126981508, "grad_norm": 0.03293160793531822, "learning_rate": 0.0008782438124698229, "loss": 0.7352, "step": 11985 }, { "epoch": 0.6086989630795395, "grad_norm": 0.03418622417305084, "learning_rate": 0.0008780988955946843, "loss": 0.6811, "step": 11990 }, { "epoch": 0.6089527991775711, "grad_norm": 0.03567791523316482, "learning_rate": 0.0008779539045006182, "loss": 0.7014, "step": 11995 }, { "epoch": 0.6092066352756026, "grad_norm": 0.03478386041096042, "learning_rate": 0.0008778088392160853, "loss": 0.7207, "step": 12000 }, { "epoch": 0.609460471373634, "grad_norm": 0.03417762549974565, "learning_rate": 0.0008776636997695615, "loss": 0.7194, "step": 12005 }, { "epoch": 0.6097143074716656, "grad_norm": 0.03340598928053876, "learning_rate": 0.0008775184861895369, "loss": 0.6504, "step": 12010 }, { "epoch": 0.6099681435696971, "grad_norm": 0.04876993418862145, "learning_rate": 0.0008773731985045162, "loss": 0.6956, "step": 12015 }, { "epoch": 0.6102219796677285, "grad_norm": 0.0333458906522881, "learning_rate": 0.0008772278367430185, "loss": 0.7087, "step": 12020 }, { "epoch": 0.61047581576576, "grad_norm": 0.03166774819067721, "learning_rate": 0.0008770824009335775, "loss": 0.6851, "step": 12025 }, { "epoch": 0.6107296518637916, "grad_norm": 0.03531447996435205, "learning_rate": 0.000876936891104742, "loss": 0.6927, "step": 12030 }, { "epoch": 0.610983487961823, "grad_norm": 0.0315993378287019, "learning_rate": 0.0008767913072850743, "loss": 0.6871, "step": 12035 }, { "epoch": 0.6112373240598545, "grad_norm": 0.029848654085361498, "learning_rate": 0.0008766456495031521, "loss": 0.7267, "step": 12040 }, { "epoch": 0.6114911601578861, "grad_norm": 0.033008132130975296, "learning_rate": 0.0008764999177875673, "loss": 0.7078, "step": 12045 }, { "epoch": 0.6117449962559176, "grad_norm": 0.030890425990117175, "learning_rate": 0.0008763541121669263, "loss": 0.6927, "step": 12050 }, { "epoch": 0.611998832353949, "grad_norm": 0.032269692852354315, "learning_rate": 0.0008762082326698498, "loss": 0.7103, "step": 12055 }, { "epoch": 0.6122526684519806, "grad_norm": 0.03230375613214236, "learning_rate": 0.0008760622793249735, "loss": 0.7457, "step": 12060 }, { "epoch": 0.6125065045500121, "grad_norm": 0.032435821206896025, "learning_rate": 0.0008759162521609472, "loss": 0.7036, "step": 12065 }, { "epoch": 0.6127603406480435, "grad_norm": 0.03485982385939947, "learning_rate": 0.0008757701512064351, "loss": 0.7033, "step": 12070 }, { "epoch": 0.6130141767460751, "grad_norm": 0.03191889176310094, "learning_rate": 0.0008756239764901165, "loss": 0.7102, "step": 12075 }, { "epoch": 0.6132680128441066, "grad_norm": 0.02940390838742497, "learning_rate": 0.0008754777280406845, "loss": 0.6456, "step": 12080 }, { "epoch": 0.613521848942138, "grad_norm": 0.031710442763311086, "learning_rate": 0.0008753314058868469, "loss": 0.7256, "step": 12085 }, { "epoch": 0.6137756850401695, "grad_norm": 0.03251919958552717, "learning_rate": 0.0008751850100573262, "loss": 0.6903, "step": 12090 }, { "epoch": 0.6140295211382011, "grad_norm": 0.03230466613804624, "learning_rate": 0.000875038540580859, "loss": 0.6854, "step": 12095 }, { "epoch": 0.6142833572362326, "grad_norm": 0.03523143057301895, "learning_rate": 0.0008748919974861967, "loss": 0.6804, "step": 12100 }, { "epoch": 0.614537193334264, "grad_norm": 0.03177491605918997, "learning_rate": 0.0008747453808021047, "loss": 0.7284, "step": 12105 }, { "epoch": 0.6147910294322956, "grad_norm": 0.03175514012482191, "learning_rate": 0.0008745986905573634, "loss": 0.6929, "step": 12110 }, { "epoch": 0.6150448655303271, "grad_norm": 0.029912284681016307, "learning_rate": 0.0008744519267807673, "loss": 0.6947, "step": 12115 }, { "epoch": 0.6152987016283585, "grad_norm": 0.0383303371432181, "learning_rate": 0.0008743050895011253, "loss": 0.7094, "step": 12120 }, { "epoch": 0.6155525377263901, "grad_norm": 0.0328702760330654, "learning_rate": 0.000874158178747261, "loss": 0.6879, "step": 12125 }, { "epoch": 0.6158063738244216, "grad_norm": 0.03523984637462038, "learning_rate": 0.000874011194548012, "loss": 0.6956, "step": 12130 }, { "epoch": 0.616060209922453, "grad_norm": 0.034056215362617444, "learning_rate": 0.0008738641369322308, "loss": 0.7003, "step": 12135 }, { "epoch": 0.6163140460204846, "grad_norm": 0.04047009176863493, "learning_rate": 0.0008737170059287838, "loss": 0.7058, "step": 12140 }, { "epoch": 0.6165678821185161, "grad_norm": 0.03699816114359975, "learning_rate": 0.0008735698015665525, "loss": 0.7162, "step": 12145 }, { "epoch": 0.6168217182165475, "grad_norm": 0.03792086807838734, "learning_rate": 0.000873422523874432, "loss": 0.6953, "step": 12150 }, { "epoch": 0.6170755543145791, "grad_norm": 0.034366550051675715, "learning_rate": 0.0008732751728813324, "loss": 0.7249, "step": 12155 }, { "epoch": 0.6173293904126106, "grad_norm": 0.040681680166345394, "learning_rate": 0.0008731277486161777, "loss": 0.6719, "step": 12160 }, { "epoch": 0.6175832265106421, "grad_norm": 0.03802588364990038, "learning_rate": 0.000872980251107907, "loss": 0.6762, "step": 12165 }, { "epoch": 0.6178370626086735, "grad_norm": 0.035591671966076086, "learning_rate": 0.0008728326803854728, "loss": 0.7258, "step": 12170 }, { "epoch": 0.6180908987067051, "grad_norm": 0.03214583124864738, "learning_rate": 0.0008726850364778429, "loss": 0.724, "step": 12175 }, { "epoch": 0.6183447348047366, "grad_norm": 0.02902032326619006, "learning_rate": 0.000872537319413999, "loss": 0.6991, "step": 12180 }, { "epoch": 0.618598570902768, "grad_norm": 0.04463526609355198, "learning_rate": 0.000872389529222937, "loss": 0.6926, "step": 12185 }, { "epoch": 0.6188524070007996, "grad_norm": 0.03785577003402824, "learning_rate": 0.0008722416659336676, "loss": 0.7029, "step": 12190 }, { "epoch": 0.6191062430988311, "grad_norm": 0.031575593128806645, "learning_rate": 0.0008720937295752153, "loss": 0.6992, "step": 12195 }, { "epoch": 0.6193600791968625, "grad_norm": 0.030687773821850478, "learning_rate": 0.0008719457201766199, "loss": 0.71, "step": 12200 }, { "epoch": 0.6196139152948941, "grad_norm": 0.030841960574893093, "learning_rate": 0.0008717976377669343, "loss": 0.6753, "step": 12205 }, { "epoch": 0.6198677513929256, "grad_norm": 0.03303180780651345, "learning_rate": 0.0008716494823752265, "loss": 0.6956, "step": 12210 }, { "epoch": 0.6201215874909571, "grad_norm": 0.032015477816498925, "learning_rate": 0.0008715012540305789, "loss": 0.7123, "step": 12215 }, { "epoch": 0.6203754235889886, "grad_norm": 0.032776864070079084, "learning_rate": 0.0008713529527620876, "loss": 0.7446, "step": 12220 }, { "epoch": 0.6206292596870201, "grad_norm": 0.03143185914816319, "learning_rate": 0.0008712045785988638, "loss": 0.6755, "step": 12225 }, { "epoch": 0.6208830957850516, "grad_norm": 0.03266992713979169, "learning_rate": 0.0008710561315700323, "loss": 0.7554, "step": 12230 }, { "epoch": 0.621136931883083, "grad_norm": 0.03321949988881074, "learning_rate": 0.0008709076117047326, "loss": 0.6781, "step": 12235 }, { "epoch": 0.6213907679811146, "grad_norm": 0.032413671710506346, "learning_rate": 0.0008707590190321186, "loss": 0.7134, "step": 12240 }, { "epoch": 0.6216446040791461, "grad_norm": 3.642778302866271, "learning_rate": 0.000870610353581358, "loss": 0.8436, "step": 12245 }, { "epoch": 0.6218984401771775, "grad_norm": 0.1783121395649107, "learning_rate": 0.0008704616153816332, "loss": 0.8175, "step": 12250 }, { "epoch": 0.6221522762752091, "grad_norm": 0.09678248775223909, "learning_rate": 0.0008703128044621409, "loss": 0.7478, "step": 12255 }, { "epoch": 0.6224061123732406, "grad_norm": 0.06717517670507876, "learning_rate": 0.0008701639208520917, "loss": 0.7436, "step": 12260 }, { "epoch": 0.6226599484712722, "grad_norm": 0.06181055501987306, "learning_rate": 0.000870014964580711, "loss": 0.7654, "step": 12265 }, { "epoch": 0.6229137845693036, "grad_norm": 0.03816047676585346, "learning_rate": 0.000869865935677238, "loss": 0.7225, "step": 12270 }, { "epoch": 0.6231676206673351, "grad_norm": 0.05497298555045803, "learning_rate": 0.0008697168341709263, "loss": 0.7462, "step": 12275 }, { "epoch": 0.6234214567653666, "grad_norm": 0.04076237707128787, "learning_rate": 0.0008695676600910437, "loss": 0.7458, "step": 12280 }, { "epoch": 0.6236752928633981, "grad_norm": 0.0373766524049183, "learning_rate": 0.0008694184134668726, "loss": 0.726, "step": 12285 }, { "epoch": 0.6239291289614296, "grad_norm": 0.037761769602763826, "learning_rate": 0.0008692690943277092, "loss": 0.7537, "step": 12290 }, { "epoch": 0.6241829650594611, "grad_norm": 0.03156678131012448, "learning_rate": 0.0008691197027028641, "loss": 0.7321, "step": 12295 }, { "epoch": 0.6244368011574926, "grad_norm": 0.0326588086689253, "learning_rate": 0.0008689702386216622, "loss": 0.6988, "step": 12300 }, { "epoch": 0.6246906372555241, "grad_norm": 0.0381126844682047, "learning_rate": 0.0008688207021134424, "loss": 0.7904, "step": 12305 }, { "epoch": 0.6249444733535556, "grad_norm": 0.07619618149242248, "learning_rate": 0.0008686710932075582, "loss": 0.7045, "step": 12310 }, { "epoch": 0.6251983094515872, "grad_norm": 0.03169647690192619, "learning_rate": 0.000868521411933377, "loss": 0.6953, "step": 12315 }, { "epoch": 0.6254521455496186, "grad_norm": 0.03402867320150148, "learning_rate": 0.0008683716583202803, "loss": 0.7159, "step": 12320 }, { "epoch": 0.6257059816476501, "grad_norm": 0.034762207257623946, "learning_rate": 0.0008682218323976643, "loss": 0.7324, "step": 12325 }, { "epoch": 0.6259598177456817, "grad_norm": 0.033935605522055684, "learning_rate": 0.0008680719341949388, "loss": 0.7252, "step": 12330 }, { "epoch": 0.6262136538437131, "grad_norm": 0.0327528872971295, "learning_rate": 0.0008679219637415281, "loss": 0.6958, "step": 12335 }, { "epoch": 0.6264674899417446, "grad_norm": 0.03539256953487578, "learning_rate": 0.0008677719210668708, "loss": 0.7455, "step": 12340 }, { "epoch": 0.6267213260397761, "grad_norm": 0.0318699094756521, "learning_rate": 0.0008676218062004196, "loss": 0.7416, "step": 12345 }, { "epoch": 0.6269751621378076, "grad_norm": 0.031250071110785475, "learning_rate": 0.0008674716191716412, "loss": 0.6917, "step": 12350 }, { "epoch": 0.6272289982358391, "grad_norm": 0.03153788084621052, "learning_rate": 0.0008673213600100165, "loss": 0.7065, "step": 12355 }, { "epoch": 0.6274828343338706, "grad_norm": 0.037432879033764445, "learning_rate": 0.0008671710287450406, "loss": 0.676, "step": 12360 }, { "epoch": 0.6277366704319021, "grad_norm": 0.03366206265082616, "learning_rate": 0.0008670206254062227, "loss": 0.7315, "step": 12365 }, { "epoch": 0.6279905065299336, "grad_norm": 0.0326361233847686, "learning_rate": 0.0008668701500230865, "loss": 0.7326, "step": 12370 }, { "epoch": 0.6282443426279651, "grad_norm": 0.03507618967967259, "learning_rate": 0.0008667196026251694, "loss": 0.7485, "step": 12375 }, { "epoch": 0.6284981787259967, "grad_norm": 0.034166440434796004, "learning_rate": 0.0008665689832420231, "loss": 0.7827, "step": 12380 }, { "epoch": 0.6287520148240281, "grad_norm": 0.03907791706956565, "learning_rate": 0.0008664182919032135, "loss": 0.6998, "step": 12385 }, { "epoch": 0.6290058509220596, "grad_norm": 0.034822372930120526, "learning_rate": 0.0008662675286383206, "loss": 0.6911, "step": 12390 }, { "epoch": 0.6292596870200912, "grad_norm": 0.043140165700480855, "learning_rate": 0.0008661166934769384, "loss": 0.7418, "step": 12395 }, { "epoch": 0.6295135231181226, "grad_norm": 0.03942948937069346, "learning_rate": 0.000865965786448675, "loss": 0.7358, "step": 12400 }, { "epoch": 0.6297673592161541, "grad_norm": 0.03625238657515462, "learning_rate": 0.0008658148075831529, "loss": 0.7304, "step": 12405 }, { "epoch": 0.6300211953141857, "grad_norm": 0.24341815354254734, "learning_rate": 0.0008656637569100083, "loss": 0.7542, "step": 12410 }, { "epoch": 0.6302750314122171, "grad_norm": 0.046282687239533465, "learning_rate": 0.0008655126344588917, "loss": 0.7431, "step": 12415 }, { "epoch": 0.6305288675102486, "grad_norm": 0.04197771457094748, "learning_rate": 0.0008653614402594679, "loss": 0.7127, "step": 12420 }, { "epoch": 0.6307827036082801, "grad_norm": 0.06988696129681676, "learning_rate": 0.0008652101743414154, "loss": 0.7458, "step": 12425 }, { "epoch": 0.6310365397063117, "grad_norm": 0.08592295800051836, "learning_rate": 0.000865058836734427, "loss": 0.7225, "step": 12430 }, { "epoch": 0.6312903758043431, "grad_norm": 0.10255168214788882, "learning_rate": 0.0008649074274682094, "loss": 0.7093, "step": 12435 }, { "epoch": 0.6315442119023746, "grad_norm": 0.07332133787724528, "learning_rate": 0.0008647559465724837, "loss": 0.7889, "step": 12440 }, { "epoch": 0.6317980480004062, "grad_norm": 0.04547942190501729, "learning_rate": 0.0008646043940769846, "loss": 0.7557, "step": 12445 }, { "epoch": 0.6320518840984376, "grad_norm": 0.06259519289212644, "learning_rate": 0.0008644527700114613, "loss": 0.7468, "step": 12450 }, { "epoch": 0.6323057201964691, "grad_norm": 0.04301737637804549, "learning_rate": 0.0008643010744056768, "loss": 0.7223, "step": 12455 }, { "epoch": 0.6325595562945007, "grad_norm": 0.03870984927903649, "learning_rate": 0.0008641493072894081, "loss": 0.7315, "step": 12460 }, { "epoch": 0.6328133923925321, "grad_norm": 0.034552216721820586, "learning_rate": 0.0008639974686924463, "loss": 0.7289, "step": 12465 }, { "epoch": 0.6330672284905636, "grad_norm": 0.03566962430710737, "learning_rate": 0.0008638455586445967, "loss": 0.7276, "step": 12470 }, { "epoch": 0.6333210645885952, "grad_norm": 0.032630994808016185, "learning_rate": 0.0008636935771756787, "loss": 0.7306, "step": 12475 }, { "epoch": 0.6335749006866267, "grad_norm": 0.032924162390368585, "learning_rate": 0.000863541524315525, "loss": 0.7128, "step": 12480 }, { "epoch": 0.6338287367846581, "grad_norm": 0.03631028243473692, "learning_rate": 0.000863389400093983, "loss": 0.7401, "step": 12485 }, { "epoch": 0.6340825728826897, "grad_norm": 0.031974874271371904, "learning_rate": 0.0008632372045409141, "loss": 0.7152, "step": 12490 }, { "epoch": 0.6343364089807212, "grad_norm": 0.16762330557531668, "learning_rate": 0.0008630849376861933, "loss": 0.7317, "step": 12495 }, { "epoch": 0.6345902450787526, "grad_norm": 0.05570139072656796, "learning_rate": 0.0008629325995597101, "loss": 0.745, "step": 12500 }, { "epoch": 0.6348440811767841, "grad_norm": 0.04352116086288314, "learning_rate": 0.0008627801901913675, "loss": 0.7419, "step": 12505 }, { "epoch": 0.6350979172748157, "grad_norm": 0.03772514023634145, "learning_rate": 0.0008626277096110826, "loss": 0.7552, "step": 12510 }, { "epoch": 0.6353517533728471, "grad_norm": 0.034836506092597266, "learning_rate": 0.0008624751578487868, "loss": 0.7103, "step": 12515 }, { "epoch": 0.6356055894708786, "grad_norm": 0.041278098881065615, "learning_rate": 0.0008623225349344252, "loss": 0.7184, "step": 12520 }, { "epoch": 0.6358594255689102, "grad_norm": 0.04048326544930654, "learning_rate": 0.000862169840897957, "loss": 0.7466, "step": 12525 }, { "epoch": 0.6361132616669417, "grad_norm": 0.03156493247056098, "learning_rate": 0.0008620170757693551, "loss": 0.7214, "step": 12530 }, { "epoch": 0.6363670977649731, "grad_norm": 0.03379743401671446, "learning_rate": 0.0008618642395786065, "loss": 0.688, "step": 12535 }, { "epoch": 0.6366209338630047, "grad_norm": 0.037029964607045864, "learning_rate": 0.0008617113323557124, "loss": 0.7349, "step": 12540 }, { "epoch": 0.6368747699610362, "grad_norm": 0.03595249749836293, "learning_rate": 0.0008615583541306875, "loss": 0.7171, "step": 12545 }, { "epoch": 0.6371286060590676, "grad_norm": 0.03445126023332132, "learning_rate": 0.0008614053049335608, "loss": 0.716, "step": 12550 }, { "epoch": 0.6373824421570992, "grad_norm": 0.03214830581906638, "learning_rate": 0.0008612521847943751, "loss": 0.718, "step": 12555 }, { "epoch": 0.6376362782551307, "grad_norm": 0.03275756780945875, "learning_rate": 0.0008610989937431872, "loss": 0.6956, "step": 12560 }, { "epoch": 0.6378901143531621, "grad_norm": 0.03368263596378576, "learning_rate": 0.0008609457318100674, "loss": 0.7382, "step": 12565 }, { "epoch": 0.6381439504511937, "grad_norm": 0.03355116567900129, "learning_rate": 0.0008607923990251005, "loss": 0.7063, "step": 12570 }, { "epoch": 0.6383977865492252, "grad_norm": 0.0317546283221897, "learning_rate": 0.0008606389954183851, "loss": 0.7499, "step": 12575 }, { "epoch": 0.6386516226472566, "grad_norm": 0.03366305973597602, "learning_rate": 0.0008604855210200333, "loss": 0.6983, "step": 12580 }, { "epoch": 0.6389054587452881, "grad_norm": 0.03578803982185422, "learning_rate": 0.0008603319758601715, "loss": 0.7214, "step": 12585 }, { "epoch": 0.6391592948433197, "grad_norm": 0.03274955399709783, "learning_rate": 0.0008601783599689399, "loss": 0.6935, "step": 12590 }, { "epoch": 0.6394131309413512, "grad_norm": 0.034702469820594765, "learning_rate": 0.0008600246733764923, "loss": 0.7078, "step": 12595 }, { "epoch": 0.6396669670393826, "grad_norm": 0.032241151854744854, "learning_rate": 0.0008598709161129969, "loss": 0.6946, "step": 12600 }, { "epoch": 0.6399208031374142, "grad_norm": 0.03400037953724541, "learning_rate": 0.0008597170882086351, "loss": 0.6971, "step": 12605 }, { "epoch": 0.6401746392354457, "grad_norm": 0.03241744103216775, "learning_rate": 0.000859563189693603, "loss": 0.7312, "step": 12610 }, { "epoch": 0.6404284753334771, "grad_norm": 0.03699997433374391, "learning_rate": 0.0008594092205981099, "loss": 0.6899, "step": 12615 }, { "epoch": 0.6406823114315087, "grad_norm": 0.037593294233397564, "learning_rate": 0.0008592551809523791, "loss": 0.7454, "step": 12620 }, { "epoch": 0.6409361475295402, "grad_norm": 0.03337313763218288, "learning_rate": 0.0008591010707866478, "loss": 0.7559, "step": 12625 }, { "epoch": 0.6411899836275716, "grad_norm": 0.03255083382745712, "learning_rate": 0.0008589468901311672, "loss": 0.7262, "step": 12630 }, { "epoch": 0.6414438197256032, "grad_norm": 0.05603855692288778, "learning_rate": 0.0008587926390162022, "loss": 0.7011, "step": 12635 }, { "epoch": 0.6416976558236347, "grad_norm": 0.0361520637802246, "learning_rate": 0.0008586383174720315, "loss": 0.7421, "step": 12640 }, { "epoch": 0.6419514919216662, "grad_norm": 0.037128608737878155, "learning_rate": 0.0008584839255289475, "loss": 0.733, "step": 12645 }, { "epoch": 0.6422053280196977, "grad_norm": 0.03179221263948371, "learning_rate": 0.0008583294632172567, "loss": 0.7127, "step": 12650 }, { "epoch": 0.6424591641177292, "grad_norm": 0.030611097002427232, "learning_rate": 0.0008581749305672792, "loss": 0.7143, "step": 12655 }, { "epoch": 0.6427130002157607, "grad_norm": 0.032940837543091975, "learning_rate": 0.0008580203276093492, "loss": 0.7054, "step": 12660 }, { "epoch": 0.6429668363137921, "grad_norm": 0.031833597851116245, "learning_rate": 0.0008578656543738141, "loss": 0.7017, "step": 12665 }, { "epoch": 0.6432206724118237, "grad_norm": 0.03668713316574106, "learning_rate": 0.0008577109108910359, "loss": 0.6985, "step": 12670 }, { "epoch": 0.6434745085098552, "grad_norm": 0.03626851766755237, "learning_rate": 0.0008575560971913898, "loss": 0.7075, "step": 12675 }, { "epoch": 0.6437283446078866, "grad_norm": 0.030835606886262808, "learning_rate": 0.0008574012133052649, "loss": 0.6768, "step": 12680 }, { "epoch": 0.6439821807059182, "grad_norm": 0.09626643462313413, "learning_rate": 0.0008572462592630641, "loss": 0.6826, "step": 12685 }, { "epoch": 0.6442360168039497, "grad_norm": 0.031580584186374626, "learning_rate": 0.0008570912350952044, "loss": 0.6986, "step": 12690 }, { "epoch": 0.6444898529019812, "grad_norm": 0.03336343133286166, "learning_rate": 0.0008569361408321159, "loss": 0.7237, "step": 12695 }, { "epoch": 0.6447436890000127, "grad_norm": 0.03208188895964099, "learning_rate": 0.000856780976504243, "loss": 0.6886, "step": 12700 }, { "epoch": 0.6449975250980442, "grad_norm": 0.03436019727002221, "learning_rate": 0.0008566257421420439, "loss": 0.7158, "step": 12705 }, { "epoch": 0.6452513611960757, "grad_norm": 0.03152841977477264, "learning_rate": 0.0008564704377759897, "loss": 0.7056, "step": 12710 }, { "epoch": 0.6455051972941072, "grad_norm": 0.03502147342812692, "learning_rate": 0.0008563150634365666, "loss": 0.7076, "step": 12715 }, { "epoch": 0.6457590333921387, "grad_norm": 0.035690239173237784, "learning_rate": 0.0008561596191542733, "loss": 0.6923, "step": 12720 }, { "epoch": 0.6460128694901702, "grad_norm": 0.032733393191899406, "learning_rate": 0.000856004104959623, "loss": 0.7061, "step": 12725 }, { "epoch": 0.6462667055882017, "grad_norm": 0.03376177380584733, "learning_rate": 0.0008558485208831424, "loss": 0.7284, "step": 12730 }, { "epoch": 0.6465205416862332, "grad_norm": 0.03333992837019415, "learning_rate": 0.0008556928669553717, "loss": 0.7084, "step": 12735 }, { "epoch": 0.6467743777842647, "grad_norm": 0.03373310336584765, "learning_rate": 0.000855537143206865, "loss": 0.6966, "step": 12740 }, { "epoch": 0.6470282138822963, "grad_norm": 0.032297190070148904, "learning_rate": 0.00085538134966819, "loss": 0.7446, "step": 12745 }, { "epoch": 0.6472820499803277, "grad_norm": 0.03247161568583564, "learning_rate": 0.0008552254863699286, "loss": 0.6955, "step": 12750 }, { "epoch": 0.6475358860783592, "grad_norm": 0.03224700833943323, "learning_rate": 0.0008550695533426756, "loss": 0.7135, "step": 12755 }, { "epoch": 0.6477897221763907, "grad_norm": 0.043597890883488775, "learning_rate": 0.00085491355061704, "loss": 0.7216, "step": 12760 }, { "epoch": 0.6480435582744222, "grad_norm": 0.03212480268416523, "learning_rate": 0.0008547574782236444, "loss": 0.7124, "step": 12765 }, { "epoch": 0.6482973943724537, "grad_norm": 0.031885752667819116, "learning_rate": 0.0008546013361931251, "loss": 0.7042, "step": 12770 }, { "epoch": 0.6485512304704852, "grad_norm": 0.03221527437243138, "learning_rate": 0.0008544451245561318, "loss": 0.6831, "step": 12775 }, { "epoch": 0.6488050665685167, "grad_norm": 0.03344610596952861, "learning_rate": 0.0008542888433433283, "loss": 0.6779, "step": 12780 }, { "epoch": 0.6490589026665482, "grad_norm": 0.030660394965704486, "learning_rate": 0.0008541324925853915, "loss": 0.6837, "step": 12785 }, { "epoch": 0.6493127387645797, "grad_norm": 0.030432538961177084, "learning_rate": 0.0008539760723130125, "loss": 0.673, "step": 12790 }, { "epoch": 0.6495665748626112, "grad_norm": 0.03324200852629998, "learning_rate": 0.0008538195825568958, "loss": 0.7183, "step": 12795 }, { "epoch": 0.6498204109606427, "grad_norm": 0.03223102102927785, "learning_rate": 0.0008536630233477594, "loss": 0.7017, "step": 12800 }, { "epoch": 0.6500742470586742, "grad_norm": 0.03378577976761442, "learning_rate": 0.0008535063947163355, "loss": 0.7148, "step": 12805 }, { "epoch": 0.6503280831567058, "grad_norm": 0.03724924470739189, "learning_rate": 0.0008533496966933691, "loss": 0.7006, "step": 12810 }, { "epoch": 0.6505819192547372, "grad_norm": 0.034813167070929324, "learning_rate": 0.0008531929293096194, "loss": 0.7318, "step": 12815 }, { "epoch": 0.6508357553527687, "grad_norm": 0.03283519031633051, "learning_rate": 0.0008530360925958591, "loss": 0.6878, "step": 12820 }, { "epoch": 0.6510895914508003, "grad_norm": 0.030676143847227264, "learning_rate": 0.0008528791865828742, "loss": 0.6898, "step": 12825 }, { "epoch": 0.6513434275488317, "grad_norm": 0.03462254558445232, "learning_rate": 0.000852722211301465, "loss": 0.7311, "step": 12830 }, { "epoch": 0.6515972636468632, "grad_norm": 0.03556729399337002, "learning_rate": 0.0008525651667824447, "loss": 0.6766, "step": 12835 }, { "epoch": 0.6518510997448947, "grad_norm": 0.033050342208278856, "learning_rate": 0.0008524080530566405, "loss": 0.7462, "step": 12840 }, { "epoch": 0.6521049358429262, "grad_norm": 0.033302936802276466, "learning_rate": 0.0008522508701548927, "loss": 0.6829, "step": 12845 }, { "epoch": 0.6523587719409577, "grad_norm": 0.030214242796319214, "learning_rate": 0.0008520936181080561, "loss": 0.7033, "step": 12850 }, { "epoch": 0.6526126080389892, "grad_norm": 0.03059908594584137, "learning_rate": 0.0008519362969469979, "loss": 0.7017, "step": 12855 }, { "epoch": 0.6528664441370208, "grad_norm": 0.031771365525568626, "learning_rate": 0.0008517789067025997, "loss": 0.7476, "step": 12860 }, { "epoch": 0.6531202802350522, "grad_norm": 0.03299810923697541, "learning_rate": 0.0008516214474057565, "loss": 0.6717, "step": 12865 }, { "epoch": 0.6533741163330837, "grad_norm": 0.032011672439250106, "learning_rate": 0.0008514639190873767, "loss": 0.6825, "step": 12870 }, { "epoch": 0.6536279524311153, "grad_norm": 0.03329054663186923, "learning_rate": 0.0008513063217783824, "loss": 0.7139, "step": 12875 }, { "epoch": 0.6538817885291467, "grad_norm": 0.03183890785739647, "learning_rate": 0.000851148655509709, "loss": 0.7364, "step": 12880 }, { "epoch": 0.6541356246271782, "grad_norm": 0.03108408138401049, "learning_rate": 0.0008509909203123057, "loss": 0.7158, "step": 12885 }, { "epoch": 0.6543894607252098, "grad_norm": 0.030379085147328806, "learning_rate": 0.0008508331162171353, "loss": 0.7168, "step": 12890 }, { "epoch": 0.6546432968232412, "grad_norm": 0.0335842745662126, "learning_rate": 0.0008506752432551736, "loss": 0.7016, "step": 12895 }, { "epoch": 0.6548971329212727, "grad_norm": 0.033277815579426835, "learning_rate": 0.0008505173014574104, "loss": 0.7043, "step": 12900 }, { "epoch": 0.6551509690193043, "grad_norm": 0.03795988889597526, "learning_rate": 0.0008503592908548492, "loss": 0.7203, "step": 12905 }, { "epoch": 0.6554048051173358, "grad_norm": 0.02963749881251388, "learning_rate": 0.0008502012114785062, "loss": 0.716, "step": 12910 }, { "epoch": 0.6556586412153672, "grad_norm": 0.031695367208012436, "learning_rate": 0.0008500430633594121, "loss": 0.7032, "step": 12915 }, { "epoch": 0.6559124773133987, "grad_norm": 0.03302927214023767, "learning_rate": 0.0008498848465286101, "loss": 0.7487, "step": 12920 }, { "epoch": 0.6561663134114303, "grad_norm": 0.03225617666187677, "learning_rate": 0.0008497265610171576, "loss": 0.6723, "step": 12925 }, { "epoch": 0.6564201495094617, "grad_norm": 0.03714367055751647, "learning_rate": 0.0008495682068561254, "loss": 0.6802, "step": 12930 }, { "epoch": 0.6566739856074932, "grad_norm": 0.030325563288082297, "learning_rate": 0.0008494097840765975, "loss": 0.6937, "step": 12935 }, { "epoch": 0.6569278217055248, "grad_norm": 0.03395194655026448, "learning_rate": 0.0008492512927096714, "loss": 0.7269, "step": 12940 }, { "epoch": 0.6571816578035562, "grad_norm": 0.03241940856117295, "learning_rate": 0.0008490927327864581, "loss": 0.6928, "step": 12945 }, { "epoch": 0.6574354939015877, "grad_norm": 0.038162239708954646, "learning_rate": 0.0008489341043380825, "loss": 0.7819, "step": 12950 }, { "epoch": 0.6576893299996193, "grad_norm": 0.029667680854464407, "learning_rate": 0.0008487754073956823, "loss": 0.7093, "step": 12955 }, { "epoch": 0.6579431660976508, "grad_norm": 0.03369596534298732, "learning_rate": 0.0008486166419904089, "loss": 0.6747, "step": 12960 }, { "epoch": 0.6581970021956822, "grad_norm": 0.03509794431593952, "learning_rate": 0.0008484578081534274, "loss": 0.7364, "step": 12965 }, { "epoch": 0.6584508382937138, "grad_norm": 0.03423822381085619, "learning_rate": 0.0008482989059159158, "loss": 0.7041, "step": 12970 }, { "epoch": 0.6587046743917453, "grad_norm": 0.03501586537026986, "learning_rate": 0.0008481399353090659, "loss": 0.7234, "step": 12975 }, { "epoch": 0.6589585104897767, "grad_norm": 0.031778835934952065, "learning_rate": 0.0008479808963640828, "loss": 0.7613, "step": 12980 }, { "epoch": 0.6592123465878083, "grad_norm": 0.03419372064139252, "learning_rate": 0.0008478217891121853, "loss": 0.6989, "step": 12985 }, { "epoch": 0.6594661826858398, "grad_norm": 0.03162599230280375, "learning_rate": 0.0008476626135846051, "loss": 0.7054, "step": 12990 }, { "epoch": 0.6597200187838712, "grad_norm": 0.033908029220928886, "learning_rate": 0.0008475033698125876, "loss": 0.7535, "step": 12995 }, { "epoch": 0.6599738548819027, "grad_norm": 0.032697264477281725, "learning_rate": 0.0008473440578273916, "loss": 0.7002, "step": 13000 }, { "epoch": 0.6602276909799343, "grad_norm": 0.030881064961690404, "learning_rate": 0.0008471846776602894, "loss": 0.6465, "step": 13005 }, { "epoch": 0.6604815270779657, "grad_norm": 0.037891332912686274, "learning_rate": 0.0008470252293425662, "loss": 0.7025, "step": 13010 }, { "epoch": 0.6607353631759972, "grad_norm": 0.047466830004858775, "learning_rate": 0.0008468657129055213, "loss": 0.6592, "step": 13015 }, { "epoch": 0.6609891992740288, "grad_norm": 0.034254161301697665, "learning_rate": 0.0008467061283804665, "loss": 0.7108, "step": 13020 }, { "epoch": 0.6612430353720603, "grad_norm": 0.03086874550134177, "learning_rate": 0.000846546475798728, "loss": 0.695, "step": 13025 }, { "epoch": 0.6614968714700917, "grad_norm": 0.037058475663069634, "learning_rate": 0.0008463867551916443, "loss": 0.7719, "step": 13030 }, { "epoch": 0.6617507075681233, "grad_norm": 0.03452026776967621, "learning_rate": 0.0008462269665905682, "loss": 0.7114, "step": 13035 }, { "epoch": 0.6620045436661548, "grad_norm": 0.9108657322478945, "learning_rate": 0.0008460671100268649, "loss": 0.7932, "step": 13040 }, { "epoch": 0.6622583797641862, "grad_norm": 0.055694962728493044, "learning_rate": 0.0008459071855319141, "loss": 0.7504, "step": 13045 }, { "epoch": 0.6625122158622178, "grad_norm": 0.0353324403564373, "learning_rate": 0.0008457471931371074, "loss": 0.6966, "step": 13050 }, { "epoch": 0.6627660519602493, "grad_norm": 0.05174172287063251, "learning_rate": 0.0008455871328738512, "loss": 0.7054, "step": 13055 }, { "epoch": 0.6630198880582807, "grad_norm": 0.0354046520574372, "learning_rate": 0.0008454270047735643, "loss": 0.6922, "step": 13060 }, { "epoch": 0.6632737241563122, "grad_norm": 0.03656689064449501, "learning_rate": 0.0008452668088676789, "loss": 0.7226, "step": 13065 }, { "epoch": 0.6635275602543438, "grad_norm": 0.040380411767478044, "learning_rate": 0.0008451065451876408, "loss": 0.7036, "step": 13070 }, { "epoch": 0.6637813963523753, "grad_norm": 0.03519467314725165, "learning_rate": 0.0008449462137649087, "loss": 0.7075, "step": 13075 }, { "epoch": 0.6640352324504067, "grad_norm": 0.032517938663745666, "learning_rate": 0.0008447858146309554, "loss": 0.7028, "step": 13080 }, { "epoch": 0.6642890685484383, "grad_norm": 0.02905945102407453, "learning_rate": 0.000844625347817266, "loss": 0.6947, "step": 13085 }, { "epoch": 0.6645429046464698, "grad_norm": 0.03470635953149482, "learning_rate": 0.0008444648133553394, "loss": 0.7226, "step": 13090 }, { "epoch": 0.6647967407445012, "grad_norm": 0.033414720095642746, "learning_rate": 0.0008443042112766879, "loss": 0.7083, "step": 13095 }, { "epoch": 0.6650505768425328, "grad_norm": 0.03505571024612612, "learning_rate": 0.0008441435416128367, "loss": 0.7315, "step": 13100 }, { "epoch": 0.6653044129405643, "grad_norm": 0.0368878024383197, "learning_rate": 0.0008439828043953246, "loss": 0.7235, "step": 13105 }, { "epoch": 0.6655582490385957, "grad_norm": 0.038824109504116903, "learning_rate": 0.0008438219996557033, "loss": 0.6955, "step": 13110 }, { "epoch": 0.6658120851366273, "grad_norm": 0.032078218638261824, "learning_rate": 0.0008436611274255382, "loss": 0.6903, "step": 13115 }, { "epoch": 0.6660659212346588, "grad_norm": 0.03147444723314519, "learning_rate": 0.0008435001877364076, "loss": 0.7188, "step": 13120 }, { "epoch": 0.6663197573326903, "grad_norm": 0.03243789326611996, "learning_rate": 0.0008433391806199033, "loss": 0.7422, "step": 13125 }, { "epoch": 0.6665735934307218, "grad_norm": 0.030005435088067953, "learning_rate": 0.0008431781061076298, "loss": 0.7158, "step": 13130 }, { "epoch": 0.6668274295287533, "grad_norm": 0.04267666200623939, "learning_rate": 0.0008430169642312058, "loss": 0.7531, "step": 13135 }, { "epoch": 0.6670812656267848, "grad_norm": 0.032424656537864006, "learning_rate": 0.0008428557550222622, "loss": 0.7322, "step": 13140 }, { "epoch": 0.6673351017248162, "grad_norm": 0.0339955107454086, "learning_rate": 0.0008426944785124437, "loss": 0.7057, "step": 13145 }, { "epoch": 0.6675889378228478, "grad_norm": 0.030218459107232358, "learning_rate": 0.000842533134733408, "loss": 0.6563, "step": 13150 }, { "epoch": 0.6678427739208793, "grad_norm": 0.03372239879565664, "learning_rate": 0.0008423717237168263, "loss": 0.6887, "step": 13155 }, { "epoch": 0.6680966100189107, "grad_norm": 0.031736596233868104, "learning_rate": 0.0008422102454943827, "loss": 0.712, "step": 13160 }, { "epoch": 0.6683504461169423, "grad_norm": 0.03533445383113989, "learning_rate": 0.0008420487000977743, "loss": 0.7034, "step": 13165 }, { "epoch": 0.6686042822149738, "grad_norm": 0.03146397835287484, "learning_rate": 0.0008418870875587121, "loss": 0.7092, "step": 13170 }, { "epoch": 0.6688581183130053, "grad_norm": 0.03585022511635492, "learning_rate": 0.0008417254079089194, "loss": 0.7143, "step": 13175 }, { "epoch": 0.6691119544110368, "grad_norm": 0.035080136093066525, "learning_rate": 0.0008415636611801334, "loss": 0.7288, "step": 13180 }, { "epoch": 0.6693657905090683, "grad_norm": 0.029927561077670438, "learning_rate": 0.0008414018474041041, "loss": 0.706, "step": 13185 }, { "epoch": 0.6696196266070998, "grad_norm": 0.03022578439577829, "learning_rate": 0.0008412399666125945, "loss": 0.6885, "step": 13190 }, { "epoch": 0.6698734627051313, "grad_norm": 0.03166950558552259, "learning_rate": 0.0008410780188373814, "loss": 0.7138, "step": 13195 }, { "epoch": 0.6701272988031628, "grad_norm": 0.03180995887906844, "learning_rate": 0.0008409160041102543, "loss": 0.6799, "step": 13200 }, { "epoch": 0.6703811349011943, "grad_norm": 0.03204269025860114, "learning_rate": 0.0008407539224630157, "loss": 0.6712, "step": 13205 }, { "epoch": 0.6706349709992258, "grad_norm": 0.036274606978695334, "learning_rate": 0.0008405917739274813, "loss": 0.7294, "step": 13210 }, { "epoch": 0.6708888070972573, "grad_norm": 0.029703893169128387, "learning_rate": 0.0008404295585354802, "loss": 0.6961, "step": 13215 }, { "epoch": 0.6711426431952888, "grad_norm": 0.02933344348529189, "learning_rate": 0.0008402672763188545, "loss": 0.6856, "step": 13220 }, { "epoch": 0.6713964792933204, "grad_norm": 0.038295110308150616, "learning_rate": 0.0008401049273094594, "loss": 0.6835, "step": 13225 }, { "epoch": 0.6716503153913518, "grad_norm": 0.03152326551915809, "learning_rate": 0.0008399425115391632, "loss": 0.7036, "step": 13230 }, { "epoch": 0.6719041514893833, "grad_norm": 0.03038985168307482, "learning_rate": 0.0008397800290398473, "loss": 0.6611, "step": 13235 }, { "epoch": 0.6721579875874149, "grad_norm": 0.029623769034576704, "learning_rate": 0.0008396174798434062, "loss": 0.6853, "step": 13240 }, { "epoch": 0.6724118236854463, "grad_norm": 0.029083245070892393, "learning_rate": 0.0008394548639817474, "loss": 0.6845, "step": 13245 }, { "epoch": 0.6726656597834778, "grad_norm": 0.03202246436913789, "learning_rate": 0.0008392921814867916, "loss": 0.7331, "step": 13250 }, { "epoch": 0.6729194958815093, "grad_norm": 0.0343472886869884, "learning_rate": 0.0008391294323904726, "loss": 0.6876, "step": 13255 }, { "epoch": 0.6731733319795408, "grad_norm": 0.031334335295312744, "learning_rate": 0.0008389666167247374, "loss": 0.7279, "step": 13260 }, { "epoch": 0.6734271680775723, "grad_norm": 0.030236648578287277, "learning_rate": 0.0008388037345215457, "loss": 0.709, "step": 13265 }, { "epoch": 0.6736810041756038, "grad_norm": 0.03398046654705604, "learning_rate": 0.0008386407858128706, "loss": 0.7229, "step": 13270 }, { "epoch": 0.6739348402736353, "grad_norm": 0.03410140598199888, "learning_rate": 0.0008384777706306979, "loss": 0.7063, "step": 13275 }, { "epoch": 0.6741886763716668, "grad_norm": 0.03302985904579469, "learning_rate": 0.0008383146890070269, "loss": 0.7074, "step": 13280 }, { "epoch": 0.6744425124696983, "grad_norm": 0.031228020749320233, "learning_rate": 0.0008381515409738696, "loss": 0.6904, "step": 13285 }, { "epoch": 0.6746963485677299, "grad_norm": 0.03290044547974289, "learning_rate": 0.0008379883265632512, "loss": 0.7135, "step": 13290 }, { "epoch": 0.6749501846657613, "grad_norm": 0.030876605280336596, "learning_rate": 0.0008378250458072099, "loss": 0.6753, "step": 13295 }, { "epoch": 0.6752040207637928, "grad_norm": 0.03099959231489005, "learning_rate": 0.0008376616987377968, "loss": 0.7572, "step": 13300 }, { "epoch": 0.6754578568618244, "grad_norm": 0.0359504880779347, "learning_rate": 0.0008374982853870761, "loss": 0.7567, "step": 13305 }, { "epoch": 0.6757116929598558, "grad_norm": 0.03588289679351287, "learning_rate": 0.000837334805787125, "loss": 0.7135, "step": 13310 }, { "epoch": 0.6759655290578873, "grad_norm": 0.033555463806716014, "learning_rate": 0.0008371712599700338, "loss": 0.7211, "step": 13315 }, { "epoch": 0.6762193651559189, "grad_norm": 0.030548897338960247, "learning_rate": 0.0008370076479679059, "loss": 0.7064, "step": 13320 }, { "epoch": 0.6764732012539503, "grad_norm": 0.03120790099916535, "learning_rate": 0.0008368439698128574, "loss": 0.6995, "step": 13325 }, { "epoch": 0.6767270373519818, "grad_norm": 0.036565715896749756, "learning_rate": 0.0008366802255370174, "loss": 0.6954, "step": 13330 }, { "epoch": 0.6769808734500133, "grad_norm": 0.03276196659557926, "learning_rate": 0.000836516415172528, "loss": 0.6845, "step": 13335 }, { "epoch": 0.6772347095480449, "grad_norm": 0.0328065387659729, "learning_rate": 0.0008363525387515446, "loss": 0.7081, "step": 13340 }, { "epoch": 0.6774885456460763, "grad_norm": 0.03214715773923613, "learning_rate": 0.0008361885963062353, "loss": 0.6705, "step": 13345 }, { "epoch": 0.6777423817441078, "grad_norm": 0.03483992252982476, "learning_rate": 0.000836024587868781, "loss": 0.7344, "step": 13350 }, { "epoch": 0.6779962178421394, "grad_norm": 0.032706172069922525, "learning_rate": 0.0008358605134713759, "loss": 0.7079, "step": 13355 }, { "epoch": 0.6782500539401708, "grad_norm": 0.030494158756850027, "learning_rate": 0.0008356963731462271, "loss": 0.6745, "step": 13360 }, { "epoch": 0.6785038900382023, "grad_norm": 0.02894960544398778, "learning_rate": 0.0008355321669255542, "loss": 0.7031, "step": 13365 }, { "epoch": 0.6787577261362339, "grad_norm": 0.030914116069904643, "learning_rate": 0.0008353678948415901, "loss": 0.6749, "step": 13370 }, { "epoch": 0.6790115622342653, "grad_norm": 0.030810384518647555, "learning_rate": 0.0008352035569265809, "loss": 0.6805, "step": 13375 }, { "epoch": 0.6792653983322968, "grad_norm": 0.031492822485684885, "learning_rate": 0.0008350391532127851, "loss": 0.7371, "step": 13380 }, { "epoch": 0.6795192344303284, "grad_norm": 0.032149162428489614, "learning_rate": 0.0008348746837324743, "loss": 0.7075, "step": 13385 }, { "epoch": 0.6797730705283599, "grad_norm": 0.034235234415454806, "learning_rate": 0.0008347101485179332, "loss": 0.6655, "step": 13390 }, { "epoch": 0.6800269066263913, "grad_norm": 0.030717632668448742, "learning_rate": 0.0008345455476014592, "loss": 0.6559, "step": 13395 }, { "epoch": 0.6802807427244228, "grad_norm": 0.032881766346810566, "learning_rate": 0.0008343808810153624, "loss": 0.6914, "step": 13400 }, { "epoch": 0.6805345788224544, "grad_norm": 0.03370064600558758, "learning_rate": 0.0008342161487919664, "loss": 0.7017, "step": 13405 }, { "epoch": 0.6807884149204858, "grad_norm": 0.03060247048717861, "learning_rate": 0.000834051350963607, "loss": 0.7091, "step": 13410 }, { "epoch": 0.6810422510185173, "grad_norm": 0.03420554455902038, "learning_rate": 0.0008338864875626333, "loss": 0.7094, "step": 13415 }, { "epoch": 0.6812960871165489, "grad_norm": 0.03248474357734969, "learning_rate": 0.0008337215586214073, "loss": 0.7201, "step": 13420 }, { "epoch": 0.6815499232145803, "grad_norm": 0.033924728064286334, "learning_rate": 0.0008335565641723035, "loss": 0.702, "step": 13425 }, { "epoch": 0.6818037593126118, "grad_norm": 0.031938924104812715, "learning_rate": 0.0008333915042477096, "loss": 0.6722, "step": 13430 }, { "epoch": 0.6820575954106434, "grad_norm": 0.031164999679287764, "learning_rate": 0.000833226378880026, "loss": 0.7362, "step": 13435 }, { "epoch": 0.6823114315086749, "grad_norm": 0.03357236850030476, "learning_rate": 0.000833061188101666, "loss": 0.7408, "step": 13440 }, { "epoch": 0.6825652676067063, "grad_norm": 0.03191294049882267, "learning_rate": 0.000832895931945056, "loss": 0.6791, "step": 13445 }, { "epoch": 0.6828191037047379, "grad_norm": 0.03344190477659921, "learning_rate": 0.0008327306104426345, "loss": 0.7011, "step": 13450 }, { "epoch": 0.6830729398027694, "grad_norm": 0.029974692986517254, "learning_rate": 0.0008325652236268536, "loss": 0.7148, "step": 13455 }, { "epoch": 0.6833267759008008, "grad_norm": 0.031372912156577296, "learning_rate": 0.0008323997715301777, "loss": 0.6848, "step": 13460 }, { "epoch": 0.6835806119988324, "grad_norm": 0.03314687921570143, "learning_rate": 0.0008322342541850844, "loss": 0.6825, "step": 13465 }, { "epoch": 0.6838344480968639, "grad_norm": 0.030075047258561423, "learning_rate": 0.0008320686716240637, "loss": 0.6738, "step": 13470 }, { "epoch": 0.6840882841948953, "grad_norm": 0.0313011460296834, "learning_rate": 0.000831903023879619, "loss": 0.6888, "step": 13475 }, { "epoch": 0.6843421202929268, "grad_norm": 0.03390972933936562, "learning_rate": 0.0008317373109842658, "loss": 0.6837, "step": 13480 }, { "epoch": 0.6845959563909584, "grad_norm": 0.03289173744145311, "learning_rate": 0.0008315715329705329, "loss": 0.6855, "step": 13485 }, { "epoch": 0.6848497924889898, "grad_norm": 0.029416472177282986, "learning_rate": 0.0008314056898709615, "loss": 0.7119, "step": 13490 }, { "epoch": 0.6851036285870213, "grad_norm": 0.03166354936155896, "learning_rate": 0.0008312397817181059, "loss": 0.7184, "step": 13495 }, { "epoch": 0.6853574646850529, "grad_norm": 0.032837062900415576, "learning_rate": 0.0008310738085445332, "loss": 0.7101, "step": 13500 }, { "epoch": 0.6856113007830844, "grad_norm": 0.03496817422026648, "learning_rate": 0.0008309077703828228, "loss": 0.7198, "step": 13505 }, { "epoch": 0.6858651368811158, "grad_norm": 0.030254496917259167, "learning_rate": 0.0008307416672655674, "loss": 0.7139, "step": 13510 }, { "epoch": 0.6861189729791474, "grad_norm": 0.027817349440322405, "learning_rate": 0.000830575499225372, "loss": 0.6999, "step": 13515 }, { "epoch": 0.6863728090771789, "grad_norm": 0.02891726137203717, "learning_rate": 0.0008304092662948548, "loss": 0.7111, "step": 13520 }, { "epoch": 0.6866266451752103, "grad_norm": 0.030055942256152688, "learning_rate": 0.0008302429685066462, "loss": 0.6717, "step": 13525 }, { "epoch": 0.6868804812732419, "grad_norm": 0.030597822518838667, "learning_rate": 0.0008300766058933899, "loss": 0.6693, "step": 13530 }, { "epoch": 0.6871343173712734, "grad_norm": 0.03285644704523922, "learning_rate": 0.0008299101784877421, "loss": 0.7006, "step": 13535 }, { "epoch": 0.6873881534693048, "grad_norm": 0.030013033710979783, "learning_rate": 0.0008297436863223715, "loss": 0.6907, "step": 13540 }, { "epoch": 0.6876419895673364, "grad_norm": 0.03462007742986417, "learning_rate": 0.0008295771294299596, "loss": 0.6754, "step": 13545 }, { "epoch": 0.6878958256653679, "grad_norm": 0.03433770694039393, "learning_rate": 0.0008294105078432007, "loss": 0.6821, "step": 13550 }, { "epoch": 0.6881496617633994, "grad_norm": 0.03104052468557108, "learning_rate": 0.000829243821594802, "loss": 0.7281, "step": 13555 }, { "epoch": 0.6884034978614308, "grad_norm": 0.037366356991054034, "learning_rate": 0.0008290770707174831, "loss": 0.7063, "step": 13560 }, { "epoch": 0.6886573339594624, "grad_norm": 0.029375185318941986, "learning_rate": 0.0008289102552439762, "loss": 0.6521, "step": 13565 }, { "epoch": 0.6889111700574939, "grad_norm": 0.031061524540865287, "learning_rate": 0.0008287433752070265, "loss": 0.6825, "step": 13570 }, { "epoch": 0.6891650061555253, "grad_norm": 0.028105568940834207, "learning_rate": 0.0008285764306393917, "loss": 0.6731, "step": 13575 }, { "epoch": 0.6894188422535569, "grad_norm": 0.033117960568009634, "learning_rate": 0.0008284094215738422, "loss": 0.6772, "step": 13580 }, { "epoch": 0.6896726783515884, "grad_norm": 0.04589625632379751, "learning_rate": 0.000828242348043161, "loss": 0.7157, "step": 13585 }, { "epoch": 0.6899265144496198, "grad_norm": 0.02822867593487086, "learning_rate": 0.0008280752100801439, "loss": 0.6861, "step": 13590 }, { "epoch": 0.6901803505476514, "grad_norm": 0.029173344778861757, "learning_rate": 0.0008279080077175992, "loss": 0.695, "step": 13595 }, { "epoch": 0.6904341866456829, "grad_norm": 0.03191872182819215, "learning_rate": 0.0008277407409883476, "loss": 0.703, "step": 13600 }, { "epoch": 0.6906880227437144, "grad_norm": 0.02902835574741874, "learning_rate": 0.0008275734099252233, "loss": 0.6899, "step": 13605 }, { "epoch": 0.6909418588417459, "grad_norm": 0.028708845973355997, "learning_rate": 0.0008274060145610719, "loss": 0.6577, "step": 13610 }, { "epoch": 0.6911956949397774, "grad_norm": 0.036743792351248875, "learning_rate": 0.0008272385549287529, "loss": 0.7518, "step": 13615 }, { "epoch": 0.6914495310378089, "grad_norm": 0.03232316532353019, "learning_rate": 0.0008270710310611374, "loss": 0.7109, "step": 13620 }, { "epoch": 0.6917033671358404, "grad_norm": 0.031346034075670486, "learning_rate": 0.0008269034429911095, "loss": 0.7163, "step": 13625 }, { "epoch": 0.6919572032338719, "grad_norm": 0.032510516228003064, "learning_rate": 0.0008267357907515661, "loss": 0.6825, "step": 13630 }, { "epoch": 0.6922110393319034, "grad_norm": 0.03286382358046037, "learning_rate": 0.0008265680743754165, "loss": 0.711, "step": 13635 }, { "epoch": 0.6924648754299348, "grad_norm": 0.029848006979396175, "learning_rate": 0.0008264002938955823, "loss": 0.6729, "step": 13640 }, { "epoch": 0.6927187115279664, "grad_norm": 0.036103618484770186, "learning_rate": 0.0008262324493449982, "loss": 0.7105, "step": 13645 }, { "epoch": 0.6929725476259979, "grad_norm": 0.031415473486148704, "learning_rate": 0.0008260645407566114, "loss": 0.7385, "step": 13650 }, { "epoch": 0.6932263837240294, "grad_norm": 0.030590968307845696, "learning_rate": 0.0008258965681633813, "loss": 0.7045, "step": 13655 }, { "epoch": 0.6934802198220609, "grad_norm": 0.030745453565581546, "learning_rate": 0.0008257285315982799, "loss": 0.6706, "step": 13660 }, { "epoch": 0.6937340559200924, "grad_norm": 0.02880526153127152, "learning_rate": 0.0008255604310942922, "loss": 0.6685, "step": 13665 }, { "epoch": 0.6939878920181239, "grad_norm": 0.026614927081638626, "learning_rate": 0.0008253922666844155, "loss": 0.6686, "step": 13670 }, { "epoch": 0.6942417281161554, "grad_norm": 0.02848671680412253, "learning_rate": 0.0008252240384016596, "loss": 0.69, "step": 13675 }, { "epoch": 0.6944955642141869, "grad_norm": 0.03111734954210192, "learning_rate": 0.0008250557462790469, "loss": 0.7233, "step": 13680 }, { "epoch": 0.6947494003122184, "grad_norm": 0.030541535604915366, "learning_rate": 0.0008248873903496123, "loss": 0.6705, "step": 13685 }, { "epoch": 0.6950032364102499, "grad_norm": 0.029523904349735636, "learning_rate": 0.000824718970646403, "loss": 0.7068, "step": 13690 }, { "epoch": 0.6952570725082814, "grad_norm": 0.0364716566242194, "learning_rate": 0.0008245504872024793, "loss": 0.6898, "step": 13695 }, { "epoch": 0.6955109086063129, "grad_norm": 0.031439852520409046, "learning_rate": 0.0008243819400509133, "loss": 0.6329, "step": 13700 }, { "epoch": 0.6957647447043444, "grad_norm": 0.03066946754650396, "learning_rate": 0.0008242133292247902, "loss": 0.6837, "step": 13705 }, { "epoch": 0.6960185808023759, "grad_norm": 0.03401947830541861, "learning_rate": 0.0008240446547572076, "loss": 0.6929, "step": 13710 }, { "epoch": 0.6962724169004074, "grad_norm": 0.033828054241248864, "learning_rate": 0.0008238759166812751, "loss": 0.7009, "step": 13715 }, { "epoch": 0.696526252998439, "grad_norm": 0.031515792354346185, "learning_rate": 0.0008237071150301154, "loss": 0.717, "step": 13720 }, { "epoch": 0.6967800890964704, "grad_norm": 0.030011429821364864, "learning_rate": 0.0008235382498368634, "loss": 0.7036, "step": 13725 }, { "epoch": 0.6970339251945019, "grad_norm": 0.032157714314188096, "learning_rate": 0.0008233693211346663, "loss": 0.6619, "step": 13730 }, { "epoch": 0.6972877612925334, "grad_norm": 0.033427765785913215, "learning_rate": 0.0008232003289566843, "loss": 0.6962, "step": 13735 }, { "epoch": 0.6975415973905649, "grad_norm": 0.028559458848460558, "learning_rate": 0.0008230312733360894, "loss": 0.6689, "step": 13740 }, { "epoch": 0.6977954334885964, "grad_norm": 0.03048314759343779, "learning_rate": 0.0008228621543060665, "loss": 0.661, "step": 13745 }, { "epoch": 0.6980492695866279, "grad_norm": 0.03090382408026085, "learning_rate": 0.0008226929718998129, "loss": 0.7019, "step": 13750 }, { "epoch": 0.6983031056846594, "grad_norm": 0.03406562492950645, "learning_rate": 0.0008225237261505381, "loss": 0.6624, "step": 13755 }, { "epoch": 0.6985569417826909, "grad_norm": 0.030505970231737647, "learning_rate": 0.0008223544170914641, "loss": 0.7259, "step": 13760 }, { "epoch": 0.6988107778807224, "grad_norm": 0.030532436693098776, "learning_rate": 0.0008221850447558259, "loss": 0.7133, "step": 13765 }, { "epoch": 0.699064613978754, "grad_norm": 0.030218958465312185, "learning_rate": 0.00082201560917687, "loss": 0.6796, "step": 13770 }, { "epoch": 0.6993184500767854, "grad_norm": 0.035699523998287866, "learning_rate": 0.000821846110387856, "loss": 0.6986, "step": 13775 }, { "epoch": 0.6995722861748169, "grad_norm": 0.03269284269039984, "learning_rate": 0.0008216765484220554, "loss": 0.708, "step": 13780 }, { "epoch": 0.6998261222728485, "grad_norm": 0.030397785331039935, "learning_rate": 0.0008215069233127528, "loss": 0.6991, "step": 13785 }, { "epoch": 0.7000799583708799, "grad_norm": 0.030077315023339025, "learning_rate": 0.0008213372350932444, "loss": 0.6846, "step": 13790 }, { "epoch": 0.7003337944689114, "grad_norm": 0.031010165459948214, "learning_rate": 0.0008211674837968391, "loss": 0.6973, "step": 13795 }, { "epoch": 0.700587630566943, "grad_norm": 0.033054770122273834, "learning_rate": 0.0008209976694568586, "loss": 0.6869, "step": 13800 }, { "epoch": 0.7008414666649744, "grad_norm": 0.03060053266171417, "learning_rate": 0.0008208277921066362, "loss": 0.6866, "step": 13805 }, { "epoch": 0.7010953027630059, "grad_norm": 0.03258495388790319, "learning_rate": 0.0008206578517795185, "loss": 0.6878, "step": 13810 }, { "epoch": 0.7013491388610374, "grad_norm": 0.03912431840883626, "learning_rate": 0.0008204878485088634, "loss": 0.7238, "step": 13815 }, { "epoch": 0.701602974959069, "grad_norm": 0.03746649632868715, "learning_rate": 0.0008203177823280419, "loss": 0.719, "step": 13820 }, { "epoch": 0.7018568110571004, "grad_norm": 0.035050783695621306, "learning_rate": 0.000820147653270437, "loss": 0.6813, "step": 13825 }, { "epoch": 0.7021106471551319, "grad_norm": 0.035090115584464746, "learning_rate": 0.0008199774613694447, "loss": 0.6625, "step": 13830 }, { "epoch": 0.7023644832531635, "grad_norm": 0.03205783174762401, "learning_rate": 0.0008198072066584721, "loss": 0.6677, "step": 13835 }, { "epoch": 0.7026183193511949, "grad_norm": 0.03276220450787828, "learning_rate": 0.0008196368891709399, "loss": 0.6937, "step": 13840 }, { "epoch": 0.7028721554492264, "grad_norm": 0.031710196757383076, "learning_rate": 0.0008194665089402804, "loss": 0.6637, "step": 13845 }, { "epoch": 0.703125991547258, "grad_norm": 0.031847190282380644, "learning_rate": 0.0008192960659999383, "loss": 0.7237, "step": 13850 }, { "epoch": 0.7033798276452894, "grad_norm": 0.032530461852990494, "learning_rate": 0.0008191255603833708, "loss": 0.6591, "step": 13855 }, { "epoch": 0.7036336637433209, "grad_norm": 0.03275860755282946, "learning_rate": 0.0008189549921240472, "loss": 0.6691, "step": 13860 }, { "epoch": 0.7038874998413525, "grad_norm": 0.03346088475083543, "learning_rate": 0.0008187843612554493, "loss": 0.7039, "step": 13865 }, { "epoch": 0.704141335939384, "grad_norm": 0.05461869294966491, "learning_rate": 0.0008186136678110711, "loss": 0.7042, "step": 13870 }, { "epoch": 0.7043951720374154, "grad_norm": 0.03130891088245813, "learning_rate": 0.000818442911824419, "loss": 0.7256, "step": 13875 }, { "epoch": 0.704649008135447, "grad_norm": 0.031447358575939756, "learning_rate": 0.0008182720933290111, "loss": 0.7141, "step": 13880 }, { "epoch": 0.7049028442334785, "grad_norm": 0.03158139297886828, "learning_rate": 0.0008181012123583786, "loss": 0.7232, "step": 13885 }, { "epoch": 0.7051566803315099, "grad_norm": 0.036777719800232624, "learning_rate": 0.0008179302689460646, "loss": 0.7024, "step": 13890 }, { "epoch": 0.7054105164295414, "grad_norm": 0.034655018636694485, "learning_rate": 0.0008177592631256241, "loss": 0.6723, "step": 13895 }, { "epoch": 0.705664352527573, "grad_norm": 0.03148206717604568, "learning_rate": 0.0008175881949306252, "loss": 0.6957, "step": 13900 }, { "epoch": 0.7059181886256044, "grad_norm": 0.030571486419705005, "learning_rate": 0.0008174170643946472, "loss": 0.7139, "step": 13905 }, { "epoch": 0.7061720247236359, "grad_norm": 0.03058577868337547, "learning_rate": 0.0008172458715512825, "loss": 0.6634, "step": 13910 }, { "epoch": 0.7064258608216675, "grad_norm": 0.03060209580339718, "learning_rate": 0.0008170746164341352, "loss": 0.706, "step": 13915 }, { "epoch": 0.7066796969196989, "grad_norm": 0.028915128158662703, "learning_rate": 0.0008169032990768221, "loss": 0.7335, "step": 13920 }, { "epoch": 0.7069335330177304, "grad_norm": 0.03044457293370702, "learning_rate": 0.0008167319195129717, "loss": 0.7113, "step": 13925 }, { "epoch": 0.707187369115762, "grad_norm": 0.032035623303750885, "learning_rate": 0.0008165604777762251, "loss": 0.706, "step": 13930 }, { "epoch": 0.7074412052137935, "grad_norm": 0.030056310355216005, "learning_rate": 0.0008163889739002354, "loss": 0.6658, "step": 13935 }, { "epoch": 0.7076950413118249, "grad_norm": 0.028299238494285953, "learning_rate": 0.000816217407918668, "loss": 0.6672, "step": 13940 }, { "epoch": 0.7079488774098565, "grad_norm": 0.0315866390447351, "learning_rate": 0.0008160457798652002, "loss": 0.6733, "step": 13945 }, { "epoch": 0.708202713507888, "grad_norm": 0.029691142772494464, "learning_rate": 0.0008158740897735221, "loss": 0.686, "step": 13950 }, { "epoch": 0.7084565496059194, "grad_norm": 0.12593657640231903, "learning_rate": 0.0008157023376773354, "loss": 0.7134, "step": 13955 }, { "epoch": 0.708710385703951, "grad_norm": 0.033762684458144857, "learning_rate": 0.0008155305236103543, "loss": 0.7127, "step": 13960 }, { "epoch": 0.7089642218019825, "grad_norm": 0.031619079945510144, "learning_rate": 0.0008153586476063048, "loss": 0.6524, "step": 13965 }, { "epoch": 0.7092180579000139, "grad_norm": 0.029474573098958772, "learning_rate": 0.0008151867096989256, "loss": 0.6749, "step": 13970 }, { "epoch": 0.7094718939980454, "grad_norm": 0.0317621949088544, "learning_rate": 0.0008150147099219669, "loss": 0.7271, "step": 13975 }, { "epoch": 0.709725730096077, "grad_norm": 0.031109941636575322, "learning_rate": 0.0008148426483091919, "loss": 0.7084, "step": 13980 }, { "epoch": 0.7099795661941085, "grad_norm": 0.03057791523146806, "learning_rate": 0.000814670524894375, "loss": 0.7427, "step": 13985 }, { "epoch": 0.7102334022921399, "grad_norm": 0.030528038211069666, "learning_rate": 0.0008144983397113032, "loss": 0.7065, "step": 13990 }, { "epoch": 0.7104872383901715, "grad_norm": 0.02921732678020078, "learning_rate": 0.000814326092793776, "loss": 0.6556, "step": 13995 }, { "epoch": 0.710741074488203, "grad_norm": 0.028147182821360743, "learning_rate": 0.0008141537841756043, "loss": 0.6639, "step": 14000 }, { "epoch": 0.7109949105862344, "grad_norm": 0.0296272064889346, "learning_rate": 0.0008139814138906112, "loss": 0.7016, "step": 14005 }, { "epoch": 0.711248746684266, "grad_norm": 0.030227320840687646, "learning_rate": 0.0008138089819726326, "loss": 0.691, "step": 14010 }, { "epoch": 0.7115025827822975, "grad_norm": 0.031190136093616178, "learning_rate": 0.0008136364884555158, "loss": 0.6565, "step": 14015 }, { "epoch": 0.7117564188803289, "grad_norm": 0.036023317940766963, "learning_rate": 0.0008134639333731202, "loss": 0.6942, "step": 14020 }, { "epoch": 0.7120102549783605, "grad_norm": 0.0284679012772105, "learning_rate": 0.0008132913167593179, "loss": 0.663, "step": 14025 }, { "epoch": 0.712264091076392, "grad_norm": 0.028659869505251147, "learning_rate": 0.0008131186386479925, "loss": 0.674, "step": 14030 }, { "epoch": 0.7125179271744235, "grad_norm": 0.03693014994276857, "learning_rate": 0.0008129458990730398, "loss": 0.6708, "step": 14035 }, { "epoch": 0.712771763272455, "grad_norm": 0.03453918109305372, "learning_rate": 0.0008127730980683677, "loss": 0.7284, "step": 14040 }, { "epoch": 0.7130255993704865, "grad_norm": 0.03138320811265545, "learning_rate": 0.0008126002356678965, "loss": 0.7268, "step": 14045 }, { "epoch": 0.713279435468518, "grad_norm": 0.036251626656049786, "learning_rate": 0.0008124273119055577, "loss": 0.7363, "step": 14050 }, { "epoch": 0.7135332715665494, "grad_norm": 0.031291461039019475, "learning_rate": 0.0008122543268152957, "loss": 0.7166, "step": 14055 }, { "epoch": 0.713787107664581, "grad_norm": 0.03256457483463647, "learning_rate": 0.0008120812804310667, "loss": 0.6925, "step": 14060 }, { "epoch": 0.7140409437626125, "grad_norm": 0.031745059585743945, "learning_rate": 0.0008119081727868386, "loss": 0.7344, "step": 14065 }, { "epoch": 0.7142947798606439, "grad_norm": 0.02918574126823896, "learning_rate": 0.0008117350039165916, "loss": 0.6853, "step": 14070 }, { "epoch": 0.7145486159586755, "grad_norm": 0.03160200045359093, "learning_rate": 0.0008115617738543182, "loss": 0.7219, "step": 14075 }, { "epoch": 0.714802452056707, "grad_norm": 0.03567519536570089, "learning_rate": 0.0008113884826340221, "loss": 0.7378, "step": 14080 }, { "epoch": 0.7150562881547385, "grad_norm": 0.03213540794441203, "learning_rate": 0.0008112151302897198, "loss": 0.7136, "step": 14085 }, { "epoch": 0.71531012425277, "grad_norm": 0.02879515717017982, "learning_rate": 0.0008110417168554396, "loss": 0.7067, "step": 14090 }, { "epoch": 0.7155639603508015, "grad_norm": 0.0311172821483428, "learning_rate": 0.0008108682423652213, "loss": 0.6853, "step": 14095 }, { "epoch": 0.715817796448833, "grad_norm": 0.03153304542775588, "learning_rate": 0.0008106947068531174, "loss": 0.6566, "step": 14100 }, { "epoch": 0.7160716325468645, "grad_norm": 0.033125573582802124, "learning_rate": 0.000810521110353192, "loss": 0.7416, "step": 14105 }, { "epoch": 0.716325468644896, "grad_norm": 0.030883193509811613, "learning_rate": 0.0008103474528995213, "loss": 0.6969, "step": 14110 }, { "epoch": 0.7165793047429275, "grad_norm": 0.029370058618705865, "learning_rate": 0.0008101737345261932, "loss": 0.6726, "step": 14115 }, { "epoch": 0.716833140840959, "grad_norm": 0.03042232061002849, "learning_rate": 0.0008099999552673079, "loss": 0.7345, "step": 14120 }, { "epoch": 0.7170869769389905, "grad_norm": 0.02964227151621386, "learning_rate": 0.0008098261151569772, "loss": 0.7038, "step": 14125 }, { "epoch": 0.717340813037022, "grad_norm": 0.03238653472532883, "learning_rate": 0.0008096522142293255, "loss": 0.7111, "step": 14130 }, { "epoch": 0.7175946491350534, "grad_norm": 0.02797186317733887, "learning_rate": 0.0008094782525184881, "loss": 0.717, "step": 14135 }, { "epoch": 0.717848485233085, "grad_norm": 0.030864464665490785, "learning_rate": 0.0008093042300586132, "loss": 0.6676, "step": 14140 }, { "epoch": 0.7181023213311165, "grad_norm": 0.030060655112191113, "learning_rate": 0.0008091301468838604, "loss": 0.7216, "step": 14145 }, { "epoch": 0.718356157429148, "grad_norm": 0.029651244848719564, "learning_rate": 0.0008089560030284014, "loss": 0.6971, "step": 14150 }, { "epoch": 0.7186099935271795, "grad_norm": 0.03055119338740579, "learning_rate": 0.0008087817985264197, "loss": 0.6873, "step": 14155 }, { "epoch": 0.718863829625211, "grad_norm": 0.029884620206113002, "learning_rate": 0.0008086075334121111, "loss": 0.7081, "step": 14160 }, { "epoch": 0.7191176657232425, "grad_norm": 0.029728731962047776, "learning_rate": 0.0008084332077196824, "loss": 0.6691, "step": 14165 }, { "epoch": 0.719371501821274, "grad_norm": 0.030654879592388676, "learning_rate": 0.0008082588214833534, "loss": 0.7578, "step": 14170 }, { "epoch": 0.7196253379193055, "grad_norm": 0.030581227721383775, "learning_rate": 0.000808084374737355, "loss": 0.6778, "step": 14175 }, { "epoch": 0.719879174017337, "grad_norm": 0.03264491700154935, "learning_rate": 0.0008079098675159302, "loss": 0.6746, "step": 14180 }, { "epoch": 0.7201330101153685, "grad_norm": 0.030653546604704297, "learning_rate": 0.0008077352998533339, "loss": 0.6671, "step": 14185 }, { "epoch": 0.7203868462134, "grad_norm": 0.08236691479578137, "learning_rate": 0.0008075606717838329, "loss": 0.6816, "step": 14190 }, { "epoch": 0.7206406823114315, "grad_norm": 0.036901581572874056, "learning_rate": 0.0008073859833417059, "loss": 0.6894, "step": 14195 }, { "epoch": 0.7208945184094631, "grad_norm": 0.02989453788779239, "learning_rate": 0.0008072112345612433, "loss": 0.7112, "step": 14200 }, { "epoch": 0.7211483545074945, "grad_norm": 0.03464464381020856, "learning_rate": 0.0008070364254767475, "loss": 0.6866, "step": 14205 }, { "epoch": 0.721402190605526, "grad_norm": 0.05084870852030982, "learning_rate": 0.0008068615561225324, "loss": 0.7306, "step": 14210 }, { "epoch": 0.7216560267035576, "grad_norm": 0.04203676046099435, "learning_rate": 0.0008066866265329242, "loss": 0.7019, "step": 14215 }, { "epoch": 0.721909862801589, "grad_norm": 0.0397076925085868, "learning_rate": 0.0008065116367422607, "loss": 0.7051, "step": 14220 }, { "epoch": 0.7221636988996205, "grad_norm": 0.04002414245062398, "learning_rate": 0.0008063365867848916, "loss": 0.7338, "step": 14225 }, { "epoch": 0.722417534997652, "grad_norm": 0.0340611865399674, "learning_rate": 0.0008061614766951779, "loss": 0.728, "step": 14230 }, { "epoch": 0.7226713710956835, "grad_norm": 0.03477186577588508, "learning_rate": 0.0008059863065074934, "loss": 0.69, "step": 14235 }, { "epoch": 0.722925207193715, "grad_norm": 0.035752139849573435, "learning_rate": 0.0008058110762562227, "loss": 0.7734, "step": 14240 }, { "epoch": 0.7231790432917465, "grad_norm": 0.031771166187393396, "learning_rate": 0.0008056357859757631, "loss": 0.7662, "step": 14245 }, { "epoch": 0.7234328793897781, "grad_norm": 0.031398345468514575, "learning_rate": 0.0008054604357005227, "loss": 0.668, "step": 14250 }, { "epoch": 0.7236867154878095, "grad_norm": 0.033535017151902546, "learning_rate": 0.000805285025464922, "loss": 0.6833, "step": 14255 }, { "epoch": 0.723940551585841, "grad_norm": 0.03174930899198339, "learning_rate": 0.0008051095553033935, "loss": 0.6658, "step": 14260 }, { "epoch": 0.7241943876838726, "grad_norm": 0.03982895189852736, "learning_rate": 0.0008049340252503808, "loss": 0.7017, "step": 14265 }, { "epoch": 0.724448223781904, "grad_norm": 0.03275077380420157, "learning_rate": 0.0008047584353403396, "loss": 0.7496, "step": 14270 }, { "epoch": 0.7247020598799355, "grad_norm": 0.035739297592391056, "learning_rate": 0.0008045827856077373, "loss": 0.6904, "step": 14275 }, { "epoch": 0.7249558959779671, "grad_norm": 0.030797966079487003, "learning_rate": 0.0008044070760870533, "loss": 0.6784, "step": 14280 }, { "epoch": 0.7252097320759985, "grad_norm": 0.03381612862022019, "learning_rate": 0.0008042313068127781, "loss": 0.7508, "step": 14285 }, { "epoch": 0.72546356817403, "grad_norm": 0.030663759246623363, "learning_rate": 0.0008040554778194148, "loss": 0.6483, "step": 14290 }, { "epoch": 0.7257174042720616, "grad_norm": 0.03009848143052187, "learning_rate": 0.0008038795891414774, "loss": 0.7225, "step": 14295 }, { "epoch": 0.7259712403700931, "grad_norm": 0.03243877786486399, "learning_rate": 0.0008037036408134921, "loss": 0.7289, "step": 14300 }, { "epoch": 0.7262250764681245, "grad_norm": 0.03745364166030434, "learning_rate": 0.0008035276328699967, "loss": 0.7201, "step": 14305 }, { "epoch": 0.726478912566156, "grad_norm": 0.032519358127602274, "learning_rate": 0.0008033515653455408, "loss": 0.6839, "step": 14310 }, { "epoch": 0.7267327486641876, "grad_norm": 0.03505356864686156, "learning_rate": 0.0008031754382746854, "loss": 0.6854, "step": 14315 }, { "epoch": 0.726986584762219, "grad_norm": 0.03138981559183381, "learning_rate": 0.0008029992516920033, "loss": 0.7082, "step": 14320 }, { "epoch": 0.7272404208602505, "grad_norm": 0.03563665367207425, "learning_rate": 0.0008028230056320791, "loss": 0.6819, "step": 14325 }, { "epoch": 0.7274942569582821, "grad_norm": 0.030599421379495055, "learning_rate": 0.0008026467001295092, "loss": 0.6835, "step": 14330 }, { "epoch": 0.7277480930563135, "grad_norm": 0.031293062053328054, "learning_rate": 0.0008024703352189011, "loss": 0.7242, "step": 14335 }, { "epoch": 0.728001929154345, "grad_norm": 0.033996200385986906, "learning_rate": 0.0008022939109348749, "loss": 0.7079, "step": 14340 }, { "epoch": 0.7282557652523766, "grad_norm": 0.043394755181515324, "learning_rate": 0.0008021174273120615, "loss": 0.6915, "step": 14345 }, { "epoch": 0.728509601350408, "grad_norm": 0.06002247369021229, "learning_rate": 0.0008019408843851037, "loss": 0.7943, "step": 14350 }, { "epoch": 0.7287634374484395, "grad_norm": 0.05352630005303392, "learning_rate": 0.0008017642821886562, "loss": 0.7405, "step": 14355 }, { "epoch": 0.7290172735464711, "grad_norm": 0.04677596389744062, "learning_rate": 0.0008015876207573848, "loss": 0.741, "step": 14360 }, { "epoch": 0.7292711096445026, "grad_norm": 0.045601050048032346, "learning_rate": 0.0008014109001259675, "loss": 0.7418, "step": 14365 }, { "epoch": 0.729524945742534, "grad_norm": 0.03870388719632002, "learning_rate": 0.0008012341203290936, "loss": 0.7115, "step": 14370 }, { "epoch": 0.7297787818405655, "grad_norm": 0.04965049739358654, "learning_rate": 0.0008010572814014643, "loss": 0.7248, "step": 14375 }, { "epoch": 0.7300326179385971, "grad_norm": 0.04334002393405453, "learning_rate": 0.0008008803833777919, "loss": 0.693, "step": 14380 }, { "epoch": 0.7302864540366285, "grad_norm": 0.03543056526801928, "learning_rate": 0.0008007034262928008, "loss": 0.712, "step": 14385 }, { "epoch": 0.73054029013466, "grad_norm": 0.03888569526239189, "learning_rate": 0.0008005264101812267, "loss": 0.7157, "step": 14390 }, { "epoch": 0.7307941262326916, "grad_norm": 0.03741637253895826, "learning_rate": 0.000800349335077817, "loss": 0.6783, "step": 14395 }, { "epoch": 0.731047962330723, "grad_norm": 0.028870429362687137, "learning_rate": 0.0008001722010173306, "loss": 0.7238, "step": 14400 }, { "epoch": 0.7313017984287545, "grad_norm": 0.030370518247641327, "learning_rate": 0.0007999950080345382, "loss": 0.714, "step": 14405 }, { "epoch": 0.7315556345267861, "grad_norm": 0.033676590221651766, "learning_rate": 0.0007998177561642218, "loss": 0.7224, "step": 14410 }, { "epoch": 0.7318094706248176, "grad_norm": 0.03367667966821678, "learning_rate": 0.000799640445441175, "loss": 0.7162, "step": 14415 }, { "epoch": 0.732063306722849, "grad_norm": 0.03348048867058878, "learning_rate": 0.000799463075900203, "loss": 0.7169, "step": 14420 }, { "epoch": 0.7323171428208806, "grad_norm": 0.042109187277969924, "learning_rate": 0.0007992856475761228, "loss": 0.6974, "step": 14425 }, { "epoch": 0.7325709789189121, "grad_norm": 0.03046184467766764, "learning_rate": 0.0007991081605037624, "loss": 0.6718, "step": 14430 }, { "epoch": 0.7328248150169435, "grad_norm": 0.0317184297416499, "learning_rate": 0.0007989306147179618, "loss": 0.7082, "step": 14435 }, { "epoch": 0.733078651114975, "grad_norm": 0.033759077276635144, "learning_rate": 0.0007987530102535723, "loss": 0.7037, "step": 14440 }, { "epoch": 0.7333324872130066, "grad_norm": 0.038445707478062426, "learning_rate": 0.0007985753471454566, "loss": 0.6942, "step": 14445 }, { "epoch": 0.733586323311038, "grad_norm": 0.04764624493816826, "learning_rate": 0.0007983976254284894, "loss": 0.6543, "step": 14450 }, { "epoch": 0.7338401594090695, "grad_norm": 0.03581578143023923, "learning_rate": 0.0007982198451375564, "loss": 0.7186, "step": 14455 }, { "epoch": 0.7340939955071011, "grad_norm": 0.03371997174743705, "learning_rate": 0.0007980420063075551, "loss": 0.7667, "step": 14460 }, { "epoch": 0.7343478316051326, "grad_norm": 0.03646309005952903, "learning_rate": 0.0007978641089733941, "loss": 0.6961, "step": 14465 }, { "epoch": 0.734601667703164, "grad_norm": 0.033528345021001174, "learning_rate": 0.0007976861531699942, "loss": 0.6972, "step": 14470 }, { "epoch": 0.7348555038011956, "grad_norm": 0.03796203439924517, "learning_rate": 0.0007975081389322868, "loss": 0.735, "step": 14475 }, { "epoch": 0.7351093398992271, "grad_norm": 0.03233445683582897, "learning_rate": 0.0007973300662952155, "loss": 0.7013, "step": 14480 }, { "epoch": 0.7353631759972585, "grad_norm": 0.0316092055769806, "learning_rate": 0.0007971519352937349, "loss": 0.6714, "step": 14485 }, { "epoch": 0.7356170120952901, "grad_norm": 0.031277573753963, "learning_rate": 0.0007969737459628112, "loss": 0.704, "step": 14490 }, { "epoch": 0.7358708481933216, "grad_norm": 0.04488652795121502, "learning_rate": 0.0007967954983374224, "loss": 0.7283, "step": 14495 }, { "epoch": 0.736124684291353, "grad_norm": 0.03627122379556517, "learning_rate": 0.0007966171924525573, "loss": 0.6925, "step": 14500 }, { "epoch": 0.7363785203893846, "grad_norm": 0.03607195277452343, "learning_rate": 0.0007964388283432165, "loss": 0.6945, "step": 14505 }, { "epoch": 0.7366323564874161, "grad_norm": 0.02970610068624744, "learning_rate": 0.0007962604060444121, "loss": 0.6966, "step": 14510 }, { "epoch": 0.7368861925854476, "grad_norm": 0.03347373204482646, "learning_rate": 0.0007960819255911673, "loss": 0.6952, "step": 14515 }, { "epoch": 0.737140028683479, "grad_norm": 0.03942401391081267, "learning_rate": 0.0007959033870185173, "loss": 0.7003, "step": 14520 }, { "epoch": 0.7373938647815106, "grad_norm": 0.03377907435885641, "learning_rate": 0.0007957247903615079, "loss": 0.6803, "step": 14525 }, { "epoch": 0.7376477008795421, "grad_norm": 0.03207930793787687, "learning_rate": 0.0007955461356551971, "loss": 0.6924, "step": 14530 }, { "epoch": 0.7379015369775735, "grad_norm": 0.034007855404817086, "learning_rate": 0.0007953674229346537, "loss": 0.7, "step": 14535 }, { "epoch": 0.7381553730756051, "grad_norm": 0.030740290529416694, "learning_rate": 0.000795188652234958, "loss": 0.6843, "step": 14540 }, { "epoch": 0.7384092091736366, "grad_norm": 0.05742415226351171, "learning_rate": 0.0007950098235912021, "loss": 0.9151, "step": 14545 }, { "epoch": 0.738663045271668, "grad_norm": 0.05057725807747636, "learning_rate": 0.0007948309370384891, "loss": 0.7099, "step": 14550 }, { "epoch": 0.7389168813696996, "grad_norm": 0.09037465491792765, "learning_rate": 0.0007946519926119335, "loss": 0.7408, "step": 14555 }, { "epoch": 0.7391707174677311, "grad_norm": 0.07205721919950031, "learning_rate": 0.000794472990346661, "loss": 0.7046, "step": 14560 }, { "epoch": 0.7394245535657625, "grad_norm": 0.06229645215955686, "learning_rate": 0.0007942939302778092, "loss": 0.7057, "step": 14565 }, { "epoch": 0.7396783896637941, "grad_norm": 0.04893262637599946, "learning_rate": 0.0007941148124405264, "loss": 0.7602, "step": 14570 }, { "epoch": 0.7399322257618256, "grad_norm": 0.04359886470380064, "learning_rate": 0.0007939356368699727, "loss": 0.7366, "step": 14575 }, { "epoch": 0.7401860618598571, "grad_norm": 0.036040229227501434, "learning_rate": 0.0007937564036013194, "loss": 0.6776, "step": 14580 }, { "epoch": 0.7404398979578886, "grad_norm": 0.052753227808771126, "learning_rate": 0.000793577112669749, "loss": 0.7175, "step": 14585 }, { "epoch": 0.7406937340559201, "grad_norm": 0.04026945935590256, "learning_rate": 0.0007933977641104555, "loss": 0.715, "step": 14590 }, { "epoch": 0.7409475701539516, "grad_norm": 0.03330550921546235, "learning_rate": 0.000793218357958644, "loss": 0.6746, "step": 14595 }, { "epoch": 0.741201406251983, "grad_norm": 0.03600810295199625, "learning_rate": 0.0007930388942495312, "loss": 0.7142, "step": 14600 }, { "epoch": 0.7414552423500146, "grad_norm": 0.03297552749570841, "learning_rate": 0.0007928593730183447, "loss": 0.6646, "step": 14605 }, { "epoch": 0.7417090784480461, "grad_norm": 0.0335314084221009, "learning_rate": 0.0007926797943003239, "loss": 0.7051, "step": 14610 }, { "epoch": 0.7419629145460775, "grad_norm": 0.03460033436725832, "learning_rate": 0.0007925001581307189, "loss": 0.7163, "step": 14615 }, { "epoch": 0.7422167506441091, "grad_norm": 0.03361663190658547, "learning_rate": 0.0007923204645447916, "loss": 0.6948, "step": 14620 }, { "epoch": 0.7424705867421406, "grad_norm": 0.03582525169641109, "learning_rate": 0.0007921407135778151, "loss": 0.6775, "step": 14625 }, { "epoch": 0.7427244228401721, "grad_norm": 0.03132408860122156, "learning_rate": 0.0007919609052650734, "loss": 0.7248, "step": 14630 }, { "epoch": 0.7429782589382036, "grad_norm": 0.031543406363358145, "learning_rate": 0.0007917810396418618, "loss": 0.7062, "step": 14635 }, { "epoch": 0.7432320950362351, "grad_norm": 0.031904350822122014, "learning_rate": 0.0007916011167434873, "loss": 0.7055, "step": 14640 }, { "epoch": 0.7434859311342666, "grad_norm": 0.031027452530548248, "learning_rate": 0.000791421136605268, "loss": 0.7091, "step": 14645 }, { "epoch": 0.7437397672322981, "grad_norm": 0.03300513623003071, "learning_rate": 0.0007912410992625326, "loss": 0.729, "step": 14650 }, { "epoch": 0.7439936033303296, "grad_norm": 0.034241615248767336, "learning_rate": 0.0007910610047506219, "loss": 0.6742, "step": 14655 }, { "epoch": 0.7442474394283611, "grad_norm": 0.034985662748825634, "learning_rate": 0.0007908808531048876, "loss": 0.7124, "step": 14660 }, { "epoch": 0.7445012755263926, "grad_norm": 0.03379879311456872, "learning_rate": 0.0007907006443606924, "loss": 0.7121, "step": 14665 }, { "epoch": 0.7447551116244241, "grad_norm": 0.03160633045990129, "learning_rate": 0.0007905203785534104, "loss": 0.7123, "step": 14670 }, { "epoch": 0.7450089477224556, "grad_norm": 0.03584404577534061, "learning_rate": 0.000790340055718427, "loss": 0.6948, "step": 14675 }, { "epoch": 0.7452627838204872, "grad_norm": 0.03192437589306027, "learning_rate": 0.0007901596758911384, "loss": 0.7038, "step": 14680 }, { "epoch": 0.7455166199185186, "grad_norm": 0.035106027378827176, "learning_rate": 0.0007899792391069527, "loss": 0.701, "step": 14685 }, { "epoch": 0.7457704560165501, "grad_norm": 0.055247683989146895, "learning_rate": 0.0007897987454012885, "loss": 0.6937, "step": 14690 }, { "epoch": 0.7460242921145817, "grad_norm": 0.031014855020389323, "learning_rate": 0.0007896181948095755, "loss": 0.7168, "step": 14695 }, { "epoch": 0.7462781282126131, "grad_norm": 0.02984070756545962, "learning_rate": 0.0007894375873672555, "loss": 0.6758, "step": 14700 }, { "epoch": 0.7465319643106446, "grad_norm": 0.032747378151814825, "learning_rate": 0.0007892569231097804, "loss": 0.7207, "step": 14705 }, { "epoch": 0.7467858004086761, "grad_norm": 0.03453661137080221, "learning_rate": 0.0007890762020726136, "loss": 0.6893, "step": 14710 }, { "epoch": 0.7470396365067076, "grad_norm": 0.03358076804616059, "learning_rate": 0.0007888954242912303, "loss": 0.6683, "step": 14715 }, { "epoch": 0.7472934726047391, "grad_norm": 0.0321398176341551, "learning_rate": 0.0007887145898011158, "loss": 0.6965, "step": 14720 }, { "epoch": 0.7475473087027706, "grad_norm": 0.04666123616700847, "learning_rate": 0.0007885336986377671, "loss": 0.6483, "step": 14725 }, { "epoch": 0.7478011448008022, "grad_norm": 0.03547491096637454, "learning_rate": 0.0007883527508366923, "loss": 0.7064, "step": 14730 }, { "epoch": 0.7480549808988336, "grad_norm": 0.03229871704658294, "learning_rate": 0.0007881717464334104, "loss": 0.6933, "step": 14735 }, { "epoch": 0.7483088169968651, "grad_norm": 0.032491419232776146, "learning_rate": 0.000787990685463452, "loss": 0.6679, "step": 14740 }, { "epoch": 0.7485626530948967, "grad_norm": 0.03430856979114074, "learning_rate": 0.000787809567962358, "loss": 0.6789, "step": 14745 }, { "epoch": 0.7488164891929281, "grad_norm": 0.03280782833969156, "learning_rate": 0.0007876283939656814, "loss": 0.6954, "step": 14750 }, { "epoch": 0.7490703252909596, "grad_norm": 0.036666919649989214, "learning_rate": 0.0007874471635089853, "loss": 0.6671, "step": 14755 }, { "epoch": 0.7493241613889912, "grad_norm": 0.03378835624451928, "learning_rate": 0.0007872658766278444, "loss": 0.6664, "step": 14760 }, { "epoch": 0.7495779974870226, "grad_norm": 0.029735915272109788, "learning_rate": 0.0007870845333578447, "loss": 0.6492, "step": 14765 }, { "epoch": 0.7498318335850541, "grad_norm": 0.03249920118951511, "learning_rate": 0.0007869031337345828, "loss": 0.6598, "step": 14770 }, { "epoch": 0.7500856696830857, "grad_norm": 0.03219814379113128, "learning_rate": 0.0007867216777936665, "loss": 0.7111, "step": 14775 }, { "epoch": 0.7503395057811171, "grad_norm": 0.03421087576596425, "learning_rate": 0.0007865401655707148, "loss": 0.736, "step": 14780 }, { "epoch": 0.7505933418791486, "grad_norm": 0.03231322772309719, "learning_rate": 0.0007863585971013574, "loss": 0.728, "step": 14785 }, { "epoch": 0.7508471779771801, "grad_norm": 0.03237315178170715, "learning_rate": 0.0007861769724212353, "loss": 0.6854, "step": 14790 }, { "epoch": 0.7511010140752117, "grad_norm": 0.03493862393511305, "learning_rate": 0.0007859952915660009, "loss": 0.7343, "step": 14795 }, { "epoch": 0.7513548501732431, "grad_norm": 0.030106499817806085, "learning_rate": 0.000785813554571317, "loss": 0.6885, "step": 14800 }, { "epoch": 0.7516086862712746, "grad_norm": 0.0299564686608778, "learning_rate": 0.0007856317614728578, "loss": 0.7342, "step": 14805 }, { "epoch": 0.7518625223693062, "grad_norm": 0.03087414123799912, "learning_rate": 0.0007854499123063081, "loss": 0.6789, "step": 14810 }, { "epoch": 0.7521163584673376, "grad_norm": 0.034093290113439964, "learning_rate": 0.0007852680071073644, "loss": 0.7101, "step": 14815 }, { "epoch": 0.7523701945653691, "grad_norm": 0.0306649755431816, "learning_rate": 0.0007850860459117332, "loss": 0.6796, "step": 14820 }, { "epoch": 0.7526240306634007, "grad_norm": 0.03314271974990694, "learning_rate": 0.0007849040287551332, "loss": 0.6848, "step": 14825 }, { "epoch": 0.7528778667614321, "grad_norm": 0.0301371425824357, "learning_rate": 0.0007847219556732929, "loss": 0.6938, "step": 14830 }, { "epoch": 0.7531317028594636, "grad_norm": 0.03221828477908601, "learning_rate": 0.0007845398267019528, "loss": 0.7672, "step": 14835 }, { "epoch": 0.7533855389574952, "grad_norm": 0.0338922386450304, "learning_rate": 0.0007843576418768637, "loss": 0.705, "step": 14840 }, { "epoch": 0.7536393750555267, "grad_norm": 0.032954284376615885, "learning_rate": 0.0007841754012337876, "loss": 0.6834, "step": 14845 }, { "epoch": 0.7538932111535581, "grad_norm": 0.034100584493029, "learning_rate": 0.0007839931048084971, "loss": 0.6887, "step": 14850 }, { "epoch": 0.7541470472515897, "grad_norm": 0.032646337240582225, "learning_rate": 0.0007838107526367768, "loss": 0.7216, "step": 14855 }, { "epoch": 0.7544008833496212, "grad_norm": 0.03664315474875518, "learning_rate": 0.0007836283447544211, "loss": 0.6785, "step": 14860 }, { "epoch": 0.7546547194476526, "grad_norm": 0.03475921572884608, "learning_rate": 0.0007834458811972356, "loss": 0.6793, "step": 14865 }, { "epoch": 0.7549085555456841, "grad_norm": 0.030869249522562314, "learning_rate": 0.0007832633620010372, "loss": 0.7039, "step": 14870 }, { "epoch": 0.7551623916437157, "grad_norm": 0.032334026538272324, "learning_rate": 0.0007830807872016536, "loss": 0.7244, "step": 14875 }, { "epoch": 0.7554162277417471, "grad_norm": 0.0354954354984085, "learning_rate": 0.000782898156834923, "loss": 0.7524, "step": 14880 }, { "epoch": 0.7556700638397786, "grad_norm": 0.035072622327381425, "learning_rate": 0.000782715470936695, "loss": 0.6909, "step": 14885 }, { "epoch": 0.7559238999378102, "grad_norm": 0.03144811741332261, "learning_rate": 0.0007825327295428302, "loss": 0.6806, "step": 14890 }, { "epoch": 0.7561777360358417, "grad_norm": 0.033667800452331025, "learning_rate": 0.0007823499326891994, "loss": 0.7174, "step": 14895 }, { "epoch": 0.7564315721338731, "grad_norm": 0.036040768269752554, "learning_rate": 0.000782167080411685, "loss": 0.684, "step": 14900 }, { "epoch": 0.7566854082319047, "grad_norm": 0.03528594949820456, "learning_rate": 0.0007819841727461798, "loss": 0.7184, "step": 14905 }, { "epoch": 0.7569392443299362, "grad_norm": 0.03121652954116864, "learning_rate": 0.0007818012097285876, "loss": 0.7343, "step": 14910 }, { "epoch": 0.7571930804279676, "grad_norm": 0.0345641664761462, "learning_rate": 0.0007816181913948235, "loss": 0.7152, "step": 14915 }, { "epoch": 0.7574469165259992, "grad_norm": 0.03123172435955336, "learning_rate": 0.0007814351177808128, "loss": 0.7224, "step": 14920 }, { "epoch": 0.7577007526240307, "grad_norm": 0.14672083158687915, "learning_rate": 0.000781251988922492, "loss": 0.8115, "step": 14925 }, { "epoch": 0.7579545887220621, "grad_norm": 0.1243586132248442, "learning_rate": 0.0007810688048558083, "loss": 0.7397, "step": 14930 }, { "epoch": 0.7582084248200937, "grad_norm": 0.051047443926543284, "learning_rate": 0.00078088556561672, "loss": 0.7617, "step": 14935 }, { "epoch": 0.7584622609181252, "grad_norm": 0.0361217265213863, "learning_rate": 0.0007807022712411957, "loss": 0.705, "step": 14940 }, { "epoch": 0.7587160970161567, "grad_norm": 0.039629264284229054, "learning_rate": 0.0007805189217652158, "loss": 0.7305, "step": 14945 }, { "epoch": 0.7589699331141881, "grad_norm": 0.03476543343604447, "learning_rate": 0.0007803355172247702, "loss": 0.7091, "step": 14950 }, { "epoch": 0.7592237692122197, "grad_norm": 0.03572852103141015, "learning_rate": 0.0007801520576558608, "loss": 0.7249, "step": 14955 }, { "epoch": 0.7594776053102512, "grad_norm": 0.03248497178985213, "learning_rate": 0.0007799685430944995, "loss": 0.6864, "step": 14960 }, { "epoch": 0.7597314414082826, "grad_norm": 0.03472557253538441, "learning_rate": 0.0007797849735767094, "loss": 0.6828, "step": 14965 }, { "epoch": 0.7599852775063142, "grad_norm": 0.029965470956712346, "learning_rate": 0.0007796013491385243, "loss": 0.7044, "step": 14970 }, { "epoch": 0.7602391136043457, "grad_norm": 0.030852814768581177, "learning_rate": 0.0007794176698159887, "loss": 0.6873, "step": 14975 }, { "epoch": 0.7604929497023771, "grad_norm": 0.03526374166940503, "learning_rate": 0.000779233935645158, "loss": 0.7319, "step": 14980 }, { "epoch": 0.7607467858004087, "grad_norm": 0.03485321630519142, "learning_rate": 0.0007790501466620983, "loss": 0.7091, "step": 14985 }, { "epoch": 0.7610006218984402, "grad_norm": 0.5609451377331782, "learning_rate": 0.0007788663029028863, "loss": 0.6583, "step": 14990 }, { "epoch": 0.7612544579964716, "grad_norm": 0.04767240927137768, "learning_rate": 0.0007786824044036098, "loss": 0.6804, "step": 14995 }, { "epoch": 0.7615082940945032, "grad_norm": 0.046137997818592937, "learning_rate": 0.0007784984512003671, "loss": 0.7521, "step": 15000 }, { "epoch": 0.7617621301925347, "grad_norm": 0.03978686635772606, "learning_rate": 0.0007783144433292673, "loss": 0.7492, "step": 15005 }, { "epoch": 0.7620159662905662, "grad_norm": 0.031157989492179496, "learning_rate": 0.0007781303808264303, "loss": 0.6409, "step": 15010 }, { "epoch": 0.7622698023885977, "grad_norm": 0.0378632016874794, "learning_rate": 0.0007779462637279865, "loss": 0.7158, "step": 15015 }, { "epoch": 0.7625236384866292, "grad_norm": 0.47749751034085863, "learning_rate": 0.0007777620920700773, "loss": 0.7234, "step": 15020 }, { "epoch": 0.7627774745846607, "grad_norm": 0.08137750738848192, "learning_rate": 0.0007775778658888546, "loss": 0.7628, "step": 15025 }, { "epoch": 0.7630313106826921, "grad_norm": 0.06806606092440419, "learning_rate": 0.000777393585220481, "loss": 0.7433, "step": 15030 }, { "epoch": 0.7632851467807237, "grad_norm": 0.0976434287611991, "learning_rate": 0.0007772092501011301, "loss": 0.7623, "step": 15035 }, { "epoch": 0.7635389828787552, "grad_norm": 0.045634646009329, "learning_rate": 0.0007770248605669858, "loss": 0.7129, "step": 15040 }, { "epoch": 0.7637928189767866, "grad_norm": 0.04040902105411713, "learning_rate": 0.0007768404166542431, "loss": 0.7263, "step": 15045 }, { "epoch": 0.7640466550748182, "grad_norm": 0.04435253676792917, "learning_rate": 0.000776655918399107, "loss": 0.7077, "step": 15050 }, { "epoch": 0.7643004911728497, "grad_norm": 0.07147071042623866, "learning_rate": 0.0007764713658377938, "loss": 0.7169, "step": 15055 }, { "epoch": 0.7645543272708812, "grad_norm": 0.04458784347386294, "learning_rate": 0.0007762867590065302, "loss": 0.7034, "step": 15060 }, { "epoch": 0.7648081633689127, "grad_norm": 0.04028762825743298, "learning_rate": 0.0007761020979415537, "loss": 0.7292, "step": 15065 }, { "epoch": 0.7650619994669442, "grad_norm": 0.03706345356596311, "learning_rate": 0.0007759173826791123, "loss": 0.7207, "step": 15070 }, { "epoch": 0.7653158355649757, "grad_norm": 0.033574876558876574, "learning_rate": 0.0007757326132554648, "loss": 0.7123, "step": 15075 }, { "epoch": 0.7655696716630072, "grad_norm": 0.03223408958333217, "learning_rate": 0.0007755477897068803, "loss": 0.7364, "step": 15080 }, { "epoch": 0.7658235077610387, "grad_norm": 0.03263629351932663, "learning_rate": 0.0007753629120696388, "loss": 0.7252, "step": 15085 }, { "epoch": 0.7660773438590702, "grad_norm": 0.035423455843432976, "learning_rate": 0.000775177980380031, "loss": 0.7012, "step": 15090 }, { "epoch": 0.7663311799571016, "grad_norm": 0.03260865608870414, "learning_rate": 0.0007749929946743578, "loss": 0.729, "step": 15095 }, { "epoch": 0.7665850160551332, "grad_norm": 0.030764093824245012, "learning_rate": 0.0007748079549889312, "loss": 0.692, "step": 15100 }, { "epoch": 0.7668388521531647, "grad_norm": 0.03215724279682096, "learning_rate": 0.0007746228613600735, "loss": 0.6815, "step": 15105 }, { "epoch": 0.7670926882511963, "grad_norm": 0.03534146592555897, "learning_rate": 0.0007744377138241177, "loss": 0.6941, "step": 15110 }, { "epoch": 0.7673465243492277, "grad_norm": 0.03224225474996451, "learning_rate": 0.0007742525124174073, "loss": 0.729, "step": 15115 }, { "epoch": 0.7676003604472592, "grad_norm": 0.033990250219169245, "learning_rate": 0.0007740672571762963, "loss": 0.7222, "step": 15120 }, { "epoch": 0.7678541965452907, "grad_norm": 0.032760058551244926, "learning_rate": 0.0007738819481371495, "loss": 0.6669, "step": 15125 }, { "epoch": 0.7681080326433222, "grad_norm": 0.03740747663582317, "learning_rate": 0.0007736965853363423, "loss": 0.7071, "step": 15130 }, { "epoch": 0.7683618687413537, "grad_norm": 0.036548460407979, "learning_rate": 0.0007735111688102602, "loss": 0.7742, "step": 15135 }, { "epoch": 0.7686157048393852, "grad_norm": 0.032887535800904706, "learning_rate": 0.0007733256985952997, "loss": 0.7133, "step": 15140 }, { "epoch": 0.7688695409374167, "grad_norm": 0.08252154006464, "learning_rate": 0.0007731401747278676, "loss": 0.6888, "step": 15145 }, { "epoch": 0.7691233770354482, "grad_norm": 0.2579200405316775, "learning_rate": 0.0007729545972443812, "loss": 0.7212, "step": 15150 }, { "epoch": 0.7693772131334797, "grad_norm": 0.03024651397899259, "learning_rate": 0.000772768966181269, "loss": 0.6655, "step": 15155 }, { "epoch": 0.7696310492315113, "grad_norm": 0.03953351219846279, "learning_rate": 0.0007725832815749686, "loss": 0.7429, "step": 15160 }, { "epoch": 0.7698848853295427, "grad_norm": 0.03139967324343511, "learning_rate": 0.0007723975434619296, "loss": 0.6602, "step": 15165 }, { "epoch": 0.7701387214275742, "grad_norm": 0.03166089816956448, "learning_rate": 0.0007722117518786112, "loss": 0.6613, "step": 15170 }, { "epoch": 0.7703925575256058, "grad_norm": 0.032709031822673706, "learning_rate": 0.0007720259068614836, "loss": 0.7182, "step": 15175 }, { "epoch": 0.7706463936236372, "grad_norm": 0.03094548515669175, "learning_rate": 0.0007718400084470267, "loss": 0.7136, "step": 15180 }, { "epoch": 0.7709002297216687, "grad_norm": 0.03604006335510385, "learning_rate": 0.0007716540566717321, "loss": 0.7123, "step": 15185 }, { "epoch": 0.7711540658197003, "grad_norm": 0.032007461797167604, "learning_rate": 0.0007714680515721008, "loss": 0.7094, "step": 15190 }, { "epoch": 0.7714079019177317, "grad_norm": 0.03120356307554995, "learning_rate": 0.0007712819931846448, "loss": 0.7276, "step": 15195 }, { "epoch": 0.7716617380157632, "grad_norm": 0.03149152678682684, "learning_rate": 0.0007710958815458866, "loss": 0.6825, "step": 15200 }, { "epoch": 0.7719155741137947, "grad_norm": 0.032617122676294644, "learning_rate": 0.0007709097166923586, "loss": 0.6486, "step": 15205 }, { "epoch": 0.7721694102118263, "grad_norm": 0.032137431771119725, "learning_rate": 0.0007707234986606043, "loss": 0.7225, "step": 15210 }, { "epoch": 0.7724232463098577, "grad_norm": 0.03286038757700011, "learning_rate": 0.0007705372274871774, "loss": 0.7234, "step": 15215 }, { "epoch": 0.7726770824078892, "grad_norm": 0.031298125731407105, "learning_rate": 0.0007703509032086417, "loss": 0.6681, "step": 15220 }, { "epoch": 0.7729309185059208, "grad_norm": 0.03507192701717385, "learning_rate": 0.0007701645258615721, "loss": 0.6981, "step": 15225 }, { "epoch": 0.7731847546039522, "grad_norm": 0.0291327197979621, "learning_rate": 0.0007699780954825534, "loss": 0.7125, "step": 15230 }, { "epoch": 0.7734385907019837, "grad_norm": 0.028441106574990126, "learning_rate": 0.0007697916121081809, "loss": 0.6648, "step": 15235 }, { "epoch": 0.7736924268000153, "grad_norm": 0.029831197355549992, "learning_rate": 0.0007696050757750603, "loss": 0.6713, "step": 15240 }, { "epoch": 0.7739462628980467, "grad_norm": 0.026858687909649112, "learning_rate": 0.000769418486519808, "loss": 0.7096, "step": 15245 }, { "epoch": 0.7742000989960782, "grad_norm": 0.03092921077949159, "learning_rate": 0.0007692318443790503, "loss": 0.6649, "step": 15250 }, { "epoch": 0.7744539350941098, "grad_norm": 0.030617349399658194, "learning_rate": 0.0007690451493894241, "loss": 0.7191, "step": 15255 }, { "epoch": 0.7747077711921412, "grad_norm": 0.029635943063400977, "learning_rate": 0.0007688584015875769, "loss": 0.7131, "step": 15260 }, { "epoch": 0.7749616072901727, "grad_norm": 0.03437490948917494, "learning_rate": 0.0007686716010101663, "loss": 0.7647, "step": 15265 }, { "epoch": 0.7752154433882043, "grad_norm": 0.038595533643138946, "learning_rate": 0.0007684847476938601, "loss": 0.6923, "step": 15270 }, { "epoch": 0.7754692794862358, "grad_norm": 0.03415039860263324, "learning_rate": 0.0007682978416753371, "loss": 0.693, "step": 15275 }, { "epoch": 0.7757231155842672, "grad_norm": 0.03191064663670735, "learning_rate": 0.0007681108829912857, "loss": 0.6863, "step": 15280 }, { "epoch": 0.7759769516822987, "grad_norm": 0.039912944770506574, "learning_rate": 0.0007679238716784049, "loss": 0.8987, "step": 15285 }, { "epoch": 0.7762307877803303, "grad_norm": 0.05389366837900137, "learning_rate": 0.0007677368077734045, "loss": 0.6973, "step": 15290 }, { "epoch": 0.7764846238783617, "grad_norm": 0.11935936961686032, "learning_rate": 0.0007675496913130038, "loss": 0.8413, "step": 15295 }, { "epoch": 0.7767384599763932, "grad_norm": 0.34098267757563056, "learning_rate": 0.0007673625223339329, "loss": 0.7565, "step": 15300 }, { "epoch": 0.7769922960744248, "grad_norm": 0.06665299394736711, "learning_rate": 0.0007671753008729323, "loss": 0.7651, "step": 15305 }, { "epoch": 0.7772461321724562, "grad_norm": 0.07346588100711213, "learning_rate": 0.0007669880269667524, "loss": 0.689, "step": 15310 }, { "epoch": 0.7774999682704877, "grad_norm": 0.09666100298738436, "learning_rate": 0.0007668007006521544, "loss": 0.7307, "step": 15315 }, { "epoch": 0.7777538043685193, "grad_norm": 0.05676581531996185, "learning_rate": 0.0007666133219659094, "loss": 0.7177, "step": 15320 }, { "epoch": 0.7780076404665508, "grad_norm": 0.04329111256015885, "learning_rate": 0.0007664258909447989, "loss": 0.764, "step": 15325 }, { "epoch": 0.7782614765645822, "grad_norm": 0.04435698479637442, "learning_rate": 0.0007662384076256146, "loss": 0.7322, "step": 15330 }, { "epoch": 0.7785153126626138, "grad_norm": 0.03940055407533615, "learning_rate": 0.0007660508720451585, "loss": 0.7544, "step": 15335 }, { "epoch": 0.7787691487606453, "grad_norm": 0.03907003722227113, "learning_rate": 0.0007658632842402432, "loss": 0.7063, "step": 15340 }, { "epoch": 0.7790229848586767, "grad_norm": 0.03560523564815547, "learning_rate": 0.0007656756442476911, "loss": 0.7081, "step": 15345 }, { "epoch": 0.7792768209567082, "grad_norm": 0.03627025603006745, "learning_rate": 0.0007654879521043347, "loss": 0.6918, "step": 15350 }, { "epoch": 0.7795306570547398, "grad_norm": 0.037756388424523175, "learning_rate": 0.0007653002078470175, "loss": 0.7287, "step": 15355 }, { "epoch": 0.7797844931527712, "grad_norm": 0.030960524507734573, "learning_rate": 0.0007651124115125924, "loss": 0.7028, "step": 15360 }, { "epoch": 0.7800383292508027, "grad_norm": 0.03377339777562192, "learning_rate": 0.0007649245631379232, "loss": 0.7117, "step": 15365 }, { "epoch": 0.7802921653488343, "grad_norm": 0.03019909542905132, "learning_rate": 0.0007647366627598835, "loss": 0.6757, "step": 15370 }, { "epoch": 0.7805460014468658, "grad_norm": 0.031368354687971105, "learning_rate": 0.0007645487104153568, "loss": 0.7285, "step": 15375 }, { "epoch": 0.7807998375448972, "grad_norm": 0.029907555830791934, "learning_rate": 0.0007643607061412379, "loss": 0.6813, "step": 15380 }, { "epoch": 0.7810536736429288, "grad_norm": 0.029693609767534216, "learning_rate": 0.0007641726499744306, "loss": 0.6918, "step": 15385 }, { "epoch": 0.7813075097409603, "grad_norm": 0.031674920539778245, "learning_rate": 0.0007639845419518494, "loss": 0.7184, "step": 15390 }, { "epoch": 0.7815613458389917, "grad_norm": 0.032679843894852255, "learning_rate": 0.0007637963821104192, "loss": 0.6822, "step": 15395 }, { "epoch": 0.7818151819370233, "grad_norm": 0.03161491285281414, "learning_rate": 0.0007636081704870749, "loss": 0.6868, "step": 15400 }, { "epoch": 0.7820690180350548, "grad_norm": 0.029885507002612763, "learning_rate": 0.0007634199071187613, "loss": 0.7127, "step": 15405 }, { "epoch": 0.7823228541330862, "grad_norm": 0.03458954334378118, "learning_rate": 0.0007632315920424335, "loss": 0.6906, "step": 15410 }, { "epoch": 0.7825766902311178, "grad_norm": 0.03141667598870458, "learning_rate": 0.000763043225295057, "loss": 0.6641, "step": 15415 }, { "epoch": 0.7828305263291493, "grad_norm": 0.029599371955797055, "learning_rate": 0.0007628548069136071, "loss": 0.7026, "step": 15420 }, { "epoch": 0.7830843624271808, "grad_norm": 0.032456764371258044, "learning_rate": 0.0007626663369350695, "loss": 0.6426, "step": 15425 }, { "epoch": 0.7833381985252122, "grad_norm": 0.03692675101535406, "learning_rate": 0.0007624778153964398, "loss": 0.6872, "step": 15430 }, { "epoch": 0.7835920346232438, "grad_norm": 0.03242777123650107, "learning_rate": 0.0007622892423347241, "loss": 0.7155, "step": 15435 }, { "epoch": 0.7838458707212753, "grad_norm": 0.037613190811190134, "learning_rate": 0.000762100617786938, "loss": 0.7273, "step": 15440 }, { "epoch": 0.7840997068193067, "grad_norm": 0.033786859948227964, "learning_rate": 0.0007619119417901077, "loss": 0.7171, "step": 15445 }, { "epoch": 0.7843535429173383, "grad_norm": 0.03281023423760598, "learning_rate": 0.0007617232143812693, "loss": 0.6884, "step": 15450 }, { "epoch": 0.7846073790153698, "grad_norm": 0.03486810887020331, "learning_rate": 0.0007615344355974694, "loss": 0.7108, "step": 15455 }, { "epoch": 0.7848612151134012, "grad_norm": 0.0323800363466372, "learning_rate": 0.0007613456054757639, "loss": 0.7261, "step": 15460 }, { "epoch": 0.7851150512114328, "grad_norm": 0.02930073659133771, "learning_rate": 0.0007611567240532193, "loss": 0.6987, "step": 15465 }, { "epoch": 0.7853688873094643, "grad_norm": 0.032696073917996615, "learning_rate": 0.0007609677913669124, "loss": 0.7106, "step": 15470 }, { "epoch": 0.7856227234074957, "grad_norm": 0.033159447073239985, "learning_rate": 0.0007607788074539293, "loss": 0.7096, "step": 15475 }, { "epoch": 0.7858765595055273, "grad_norm": 0.03029900238539528, "learning_rate": 0.0007605897723513669, "loss": 0.716, "step": 15480 }, { "epoch": 0.7861303956035588, "grad_norm": 0.031032182997006517, "learning_rate": 0.0007604006860963315, "loss": 0.6914, "step": 15485 }, { "epoch": 0.7863842317015903, "grad_norm": 0.6776020441271128, "learning_rate": 0.0007602115487259403, "loss": 0.7213, "step": 15490 }, { "epoch": 0.7866380677996218, "grad_norm": 0.05194333925030968, "learning_rate": 0.0007600223602773198, "loss": 0.7369, "step": 15495 }, { "epoch": 0.7868919038976533, "grad_norm": 0.04124286470016046, "learning_rate": 0.0007598331207876066, "loss": 0.7032, "step": 15500 }, { "epoch": 0.7871457399956848, "grad_norm": 0.050992994476708735, "learning_rate": 0.0007596438302939475, "loss": 0.7116, "step": 15505 }, { "epoch": 0.7873995760937162, "grad_norm": 0.03910949841919257, "learning_rate": 0.0007594544888334994, "loss": 0.7059, "step": 15510 }, { "epoch": 0.7876534121917478, "grad_norm": 0.03658228139599529, "learning_rate": 0.0007592650964434292, "loss": 0.7327, "step": 15515 }, { "epoch": 0.7879072482897793, "grad_norm": 0.6543273906423895, "learning_rate": 0.0007590756531609133, "loss": 0.7363, "step": 15520 }, { "epoch": 0.7881610843878107, "grad_norm": 0.04574297982443633, "learning_rate": 0.0007588861590231388, "loss": 0.7071, "step": 15525 }, { "epoch": 0.7884149204858423, "grad_norm": 0.05387585614624529, "learning_rate": 0.0007586966140673024, "loss": 0.7045, "step": 15530 }, { "epoch": 0.7886687565838738, "grad_norm": 0.035174637025802445, "learning_rate": 0.0007585070183306106, "loss": 0.677, "step": 15535 }, { "epoch": 0.7889225926819053, "grad_norm": 0.03920750746735018, "learning_rate": 0.0007583173718502803, "loss": 0.6893, "step": 15540 }, { "epoch": 0.7891764287799368, "grad_norm": 0.03853363117374574, "learning_rate": 0.0007581276746635383, "loss": 0.7116, "step": 15545 }, { "epoch": 0.7894302648779683, "grad_norm": 0.03808389224950803, "learning_rate": 0.000757937926807621, "loss": 0.6563, "step": 15550 }, { "epoch": 0.7896841009759998, "grad_norm": 0.07074135304538541, "learning_rate": 0.0007577481283197749, "loss": 0.7184, "step": 15555 }, { "epoch": 0.7899379370740313, "grad_norm": 18.700588748062753, "learning_rate": 0.0007575582792372567, "loss": 0.7214, "step": 15560 }, { "epoch": 0.7901917731720628, "grad_norm": 0.05072493716261705, "learning_rate": 0.0007573683795973328, "loss": 0.6784, "step": 15565 }, { "epoch": 0.7904456092700943, "grad_norm": 0.03401302673509027, "learning_rate": 0.0007571784294372792, "loss": 0.7286, "step": 15570 }, { "epoch": 0.7906994453681258, "grad_norm": 0.05729898795237374, "learning_rate": 0.0007569884287943826, "loss": 0.7142, "step": 15575 }, { "epoch": 0.7909532814661573, "grad_norm": 0.04250763961753953, "learning_rate": 0.000756798377705939, "loss": 0.7657, "step": 15580 }, { "epoch": 0.7912071175641888, "grad_norm": 0.08365735194859497, "learning_rate": 0.0007566082762092546, "loss": 0.7378, "step": 15585 }, { "epoch": 0.7914609536622204, "grad_norm": 0.061569568351706024, "learning_rate": 0.0007564181243416453, "loss": 0.6978, "step": 15590 }, { "epoch": 0.7917147897602518, "grad_norm": 0.04486522049902785, "learning_rate": 0.0007562279221404368, "loss": 0.7099, "step": 15595 }, { "epoch": 0.7919686258582833, "grad_norm": 0.09833794754094159, "learning_rate": 0.0007560376696429651, "loss": 0.6957, "step": 15600 }, { "epoch": 0.7922224619563148, "grad_norm": 0.05415479761857598, "learning_rate": 0.0007558473668865755, "loss": 0.7092, "step": 15605 }, { "epoch": 0.7924762980543463, "grad_norm": 0.0782705606169497, "learning_rate": 0.0007556570139086239, "loss": 0.7418, "step": 15610 }, { "epoch": 0.7927301341523778, "grad_norm": 0.0388420089518164, "learning_rate": 0.0007554666107464754, "loss": 0.702, "step": 15615 }, { "epoch": 0.7929839702504093, "grad_norm": 0.0376731311639368, "learning_rate": 0.0007552761574375052, "loss": 0.726, "step": 15620 }, { "epoch": 0.7932378063484408, "grad_norm": 0.03674993656828158, "learning_rate": 0.0007550856540190985, "loss": 0.6981, "step": 15625 }, { "epoch": 0.7934916424464723, "grad_norm": 0.03211659683392685, "learning_rate": 0.0007548951005286498, "loss": 0.7171, "step": 15630 }, { "epoch": 0.7937454785445038, "grad_norm": 0.03284808010037892, "learning_rate": 0.0007547044970035641, "loss": 0.7005, "step": 15635 }, { "epoch": 0.7939993146425354, "grad_norm": 0.035179245400094775, "learning_rate": 0.0007545138434812559, "loss": 0.6686, "step": 15640 }, { "epoch": 0.7942531507405668, "grad_norm": 0.03299057254893175, "learning_rate": 0.0007543231399991495, "loss": 0.7073, "step": 15645 }, { "epoch": 0.7945069868385983, "grad_norm": 0.03127092304397352, "learning_rate": 0.0007541323865946789, "loss": 0.6919, "step": 15650 }, { "epoch": 0.7947608229366299, "grad_norm": 0.030391791487195393, "learning_rate": 0.0007539415833052882, "loss": 0.7228, "step": 15655 }, { "epoch": 0.7950146590346613, "grad_norm": 0.7374223159037425, "learning_rate": 0.0007537507301684312, "loss": 0.6605, "step": 15660 }, { "epoch": 0.7952684951326928, "grad_norm": 0.0744213668960736, "learning_rate": 0.0007535598272215712, "loss": 0.7182, "step": 15665 }, { "epoch": 0.7955223312307244, "grad_norm": 0.04568245542298855, "learning_rate": 0.0007533688745021817, "loss": 0.6661, "step": 15670 }, { "epoch": 0.7957761673287558, "grad_norm": 0.03792821239763951, "learning_rate": 0.0007531778720477457, "loss": 0.7106, "step": 15675 }, { "epoch": 0.7960300034267873, "grad_norm": 0.09694762846919533, "learning_rate": 0.000752986819895756, "loss": 0.7033, "step": 15680 }, { "epoch": 0.7962838395248188, "grad_norm": 0.03489728358196131, "learning_rate": 0.0007527957180837152, "loss": 0.7354, "step": 15685 }, { "epoch": 0.7965376756228503, "grad_norm": 0.046583405268994404, "learning_rate": 0.0007526045666491355, "loss": 0.7071, "step": 15690 }, { "epoch": 0.7967915117208818, "grad_norm": 0.05012892420373314, "learning_rate": 0.0007524133656295392, "loss": 0.6866, "step": 15695 }, { "epoch": 0.7970453478189133, "grad_norm": 0.03449884957696478, "learning_rate": 0.0007522221150624579, "loss": 0.7631, "step": 15700 }, { "epoch": 0.7972991839169449, "grad_norm": 0.031809424072869706, "learning_rate": 0.0007520308149854336, "loss": 0.6811, "step": 15705 }, { "epoch": 0.7975530200149763, "grad_norm": 0.04620580961635948, "learning_rate": 0.0007518394654360169, "loss": 0.6905, "step": 15710 }, { "epoch": 0.7978068561130078, "grad_norm": 0.03449206822934349, "learning_rate": 0.000751648066451769, "loss": 0.7172, "step": 15715 }, { "epoch": 0.7980606922110394, "grad_norm": 0.02844818751780594, "learning_rate": 0.0007514566180702609, "loss": 0.6789, "step": 15720 }, { "epoch": 0.7983145283090708, "grad_norm": 0.1509876771917359, "learning_rate": 0.0007512651203290723, "loss": 0.7282, "step": 15725 }, { "epoch": 0.7985683644071023, "grad_norm": 0.049553458260520586, "learning_rate": 0.000751073573265794, "loss": 0.6834, "step": 15730 }, { "epoch": 0.7988222005051339, "grad_norm": 0.03288775248615093, "learning_rate": 0.0007508819769180252, "loss": 0.6813, "step": 15735 }, { "epoch": 0.7990760366031653, "grad_norm": 0.0400792541166591, "learning_rate": 0.0007506903313233755, "loss": 0.6955, "step": 15740 }, { "epoch": 0.7993298727011968, "grad_norm": 0.03275957948431163, "learning_rate": 0.0007504986365194639, "loss": 0.646, "step": 15745 }, { "epoch": 0.7995837087992284, "grad_norm": 0.03220440774274188, "learning_rate": 0.0007503068925439194, "loss": 0.6643, "step": 15750 }, { "epoch": 0.7998375448972599, "grad_norm": 0.029878775176747783, "learning_rate": 0.00075011509943438, "loss": 0.6498, "step": 15755 }, { "epoch": 0.8000913809952913, "grad_norm": 0.046473858473473365, "learning_rate": 0.0007499232572284938, "loss": 0.7047, "step": 15760 }, { "epoch": 0.8003452170933228, "grad_norm": 0.031127884295178144, "learning_rate": 0.0007497313659639188, "loss": 0.6978, "step": 15765 }, { "epoch": 0.8005990531913544, "grad_norm": 0.030940840561540103, "learning_rate": 0.0007495394256783219, "loss": 0.6818, "step": 15770 }, { "epoch": 0.8008528892893858, "grad_norm": 0.07841291430899305, "learning_rate": 0.0007493474364093803, "loss": 0.7085, "step": 15775 }, { "epoch": 0.8011067253874173, "grad_norm": 0.08580984735660796, "learning_rate": 0.0007491553981947804, "loss": 0.659, "step": 15780 }, { "epoch": 0.8013605614854489, "grad_norm": 0.05360337968989419, "learning_rate": 0.0007489633110722183, "loss": 0.6714, "step": 15785 }, { "epoch": 0.8016143975834803, "grad_norm": 0.033327988654252315, "learning_rate": 0.0007487711750793998, "loss": 0.7149, "step": 15790 }, { "epoch": 0.8018682336815118, "grad_norm": 0.03703672227639531, "learning_rate": 0.0007485789902540403, "loss": 0.6669, "step": 15795 }, { "epoch": 0.8021220697795434, "grad_norm": 0.033719078493257795, "learning_rate": 0.0007483867566338647, "loss": 0.7057, "step": 15800 }, { "epoch": 0.8023759058775749, "grad_norm": 0.031363129488011655, "learning_rate": 0.0007481944742566076, "loss": 0.6807, "step": 15805 }, { "epoch": 0.8026297419756063, "grad_norm": 0.03251645216589744, "learning_rate": 0.0007480021431600128, "loss": 0.7355, "step": 15810 }, { "epoch": 0.8028835780736379, "grad_norm": 0.03324224096073844, "learning_rate": 0.000747809763381834, "loss": 0.6832, "step": 15815 }, { "epoch": 0.8031374141716694, "grad_norm": 0.030793782942647254, "learning_rate": 0.0007476173349598345, "loss": 0.7025, "step": 15820 }, { "epoch": 0.8033912502697008, "grad_norm": 0.032934303058910666, "learning_rate": 0.000747424857931787, "loss": 0.6524, "step": 15825 }, { "epoch": 0.8036450863677324, "grad_norm": 0.052126732213258156, "learning_rate": 0.0007472323323354739, "loss": 0.6737, "step": 15830 }, { "epoch": 0.8038989224657639, "grad_norm": 0.03145708022321536, "learning_rate": 0.0007470397582086869, "loss": 0.7104, "step": 15835 }, { "epoch": 0.8041527585637953, "grad_norm": 0.03744236642537962, "learning_rate": 0.0007468471355892275, "loss": 0.6716, "step": 15840 }, { "epoch": 0.8044065946618268, "grad_norm": 0.03635412354324207, "learning_rate": 0.0007466544645149061, "loss": 0.7084, "step": 15845 }, { "epoch": 0.8046604307598584, "grad_norm": 0.03163022056654914, "learning_rate": 0.0007464617450235434, "loss": 0.6922, "step": 15850 }, { "epoch": 0.8049142668578899, "grad_norm": 0.028165557245614006, "learning_rate": 0.0007462689771529695, "loss": 0.6637, "step": 15855 }, { "epoch": 0.8051681029559213, "grad_norm": 0.03508001348369766, "learning_rate": 0.0007460761609410233, "loss": 0.7125, "step": 15860 }, { "epoch": 0.8054219390539529, "grad_norm": 0.03230692933829848, "learning_rate": 0.000745883296425554, "loss": 0.6811, "step": 15865 }, { "epoch": 0.8056757751519844, "grad_norm": 0.028718485779281735, "learning_rate": 0.00074569038364442, "loss": 0.6652, "step": 15870 }, { "epoch": 0.8059296112500158, "grad_norm": 0.03449470368567134, "learning_rate": 0.0007454974226354887, "loss": 0.691, "step": 15875 }, { "epoch": 0.8061834473480474, "grad_norm": 0.028684217897395115, "learning_rate": 0.0007453044134366377, "loss": 0.6702, "step": 15880 }, { "epoch": 0.8064372834460789, "grad_norm": 0.03314145674842895, "learning_rate": 0.0007451113560857537, "loss": 0.6729, "step": 15885 }, { "epoch": 0.8066911195441103, "grad_norm": 0.03016641130760411, "learning_rate": 0.0007449182506207328, "loss": 0.66, "step": 15890 }, { "epoch": 0.8069449556421419, "grad_norm": 0.032422959643889, "learning_rate": 0.0007447250970794807, "loss": 0.6982, "step": 15895 }, { "epoch": 0.8071987917401734, "grad_norm": 0.028251788293890783, "learning_rate": 0.0007445318954999126, "loss": 0.6955, "step": 15900 }, { "epoch": 0.8074526278382048, "grad_norm": 0.030398602187150733, "learning_rate": 0.0007443386459199528, "loss": 0.6897, "step": 15905 }, { "epoch": 0.8077064639362364, "grad_norm": 0.03199625315762041, "learning_rate": 0.0007441453483775354, "loss": 0.7256, "step": 15910 }, { "epoch": 0.8079603000342679, "grad_norm": 0.03185669581509626, "learning_rate": 0.0007439520029106035, "loss": 0.7029, "step": 15915 }, { "epoch": 0.8082141361322994, "grad_norm": 0.037449419030400624, "learning_rate": 0.0007437586095571102, "loss": 0.6953, "step": 15920 }, { "epoch": 0.8084679722303308, "grad_norm": 0.0501965824158008, "learning_rate": 0.0007435651683550173, "loss": 0.6783, "step": 15925 }, { "epoch": 0.8087218083283624, "grad_norm": 0.031331316083916684, "learning_rate": 0.0007433716793422967, "loss": 0.7069, "step": 15930 }, { "epoch": 0.8089756444263939, "grad_norm": 0.03307089690578495, "learning_rate": 0.0007431781425569289, "loss": 0.6655, "step": 15935 }, { "epoch": 0.8092294805244253, "grad_norm": 0.030436938934051447, "learning_rate": 0.0007429845580369046, "loss": 0.675, "step": 15940 }, { "epoch": 0.8094833166224569, "grad_norm": 0.03059832578743004, "learning_rate": 0.0007427909258202232, "loss": 0.6985, "step": 15945 }, { "epoch": 0.8097371527204884, "grad_norm": 0.03361313960602132, "learning_rate": 0.0007425972459448941, "loss": 0.6758, "step": 15950 }, { "epoch": 0.8099909888185198, "grad_norm": 0.03204603509372673, "learning_rate": 0.0007424035184489352, "loss": 0.7085, "step": 15955 }, { "epoch": 0.8102448249165514, "grad_norm": 0.033963457702220985, "learning_rate": 0.0007422097433703748, "loss": 0.6955, "step": 15960 }, { "epoch": 0.8104986610145829, "grad_norm": 0.03302877796264428, "learning_rate": 0.0007420159207472494, "loss": 0.7004, "step": 15965 }, { "epoch": 0.8107524971126144, "grad_norm": 0.031137965254228756, "learning_rate": 0.0007418220506176058, "loss": 0.7094, "step": 15970 }, { "epoch": 0.8110063332106459, "grad_norm": 0.031090576960818073, "learning_rate": 0.0007416281330194996, "loss": 0.7005, "step": 15975 }, { "epoch": 0.8112601693086774, "grad_norm": 0.03187377456776277, "learning_rate": 0.0007414341679909958, "loss": 0.7282, "step": 15980 }, { "epoch": 0.8115140054067089, "grad_norm": 0.033503968314961774, "learning_rate": 0.0007412401555701689, "loss": 0.698, "step": 15985 }, { "epoch": 0.8117678415047404, "grad_norm": 0.03489114079994072, "learning_rate": 0.0007410460957951026, "loss": 0.6559, "step": 15990 }, { "epoch": 0.8120216776027719, "grad_norm": 0.03362445911837516, "learning_rate": 0.0007408519887038898, "loss": 0.6802, "step": 15995 }, { "epoch": 0.8122755137008034, "grad_norm": 0.03164548295177179, "learning_rate": 0.0007406578343346327, "loss": 0.7144, "step": 16000 }, { "epoch": 0.8125293497988348, "grad_norm": 0.033435582819062726, "learning_rate": 0.0007404636327254428, "loss": 0.6976, "step": 16005 }, { "epoch": 0.8127831858968664, "grad_norm": 0.20417929626451017, "learning_rate": 0.000740269383914441, "loss": 0.6935, "step": 16010 }, { "epoch": 0.8130370219948979, "grad_norm": 0.04447233575633374, "learning_rate": 0.0007400750879397576, "loss": 0.6706, "step": 16015 }, { "epoch": 0.8132908580929294, "grad_norm": 0.0357885957023717, "learning_rate": 0.0007398807448395314, "loss": 0.6904, "step": 16020 }, { "epoch": 0.8135446941909609, "grad_norm": 0.03596258977883425, "learning_rate": 0.0007396863546519113, "loss": 0.693, "step": 16025 }, { "epoch": 0.8137985302889924, "grad_norm": 0.0329837497196655, "learning_rate": 0.0007394919174150552, "loss": 0.7105, "step": 16030 }, { "epoch": 0.8140523663870239, "grad_norm": 0.030542957276074145, "learning_rate": 0.0007392974331671301, "loss": 0.7018, "step": 16035 }, { "epoch": 0.8143062024850554, "grad_norm": 0.03734152872653899, "learning_rate": 0.0007391029019463121, "loss": 0.6922, "step": 16040 }, { "epoch": 0.8145600385830869, "grad_norm": 0.03456516486670086, "learning_rate": 0.0007389083237907869, "loss": 0.6494, "step": 16045 }, { "epoch": 0.8148138746811184, "grad_norm": 0.03316694835336155, "learning_rate": 0.0007387136987387493, "loss": 0.6612, "step": 16050 }, { "epoch": 0.8150677107791499, "grad_norm": 0.037648816514548086, "learning_rate": 0.0007385190268284028, "loss": 0.6759, "step": 16055 }, { "epoch": 0.8153215468771814, "grad_norm": 0.02893017784941368, "learning_rate": 0.000738324308097961, "loss": 0.6906, "step": 16060 }, { "epoch": 0.8155753829752129, "grad_norm": 0.02934217608091606, "learning_rate": 0.0007381295425856461, "loss": 0.6945, "step": 16065 }, { "epoch": 0.8158292190732445, "grad_norm": 0.034100778905957044, "learning_rate": 0.0007379347303296895, "loss": 0.6829, "step": 16070 }, { "epoch": 0.8160830551712759, "grad_norm": 0.029662560006883356, "learning_rate": 0.0007377398713683319, "loss": 0.6786, "step": 16075 }, { "epoch": 0.8163368912693074, "grad_norm": 0.03413632599225428, "learning_rate": 0.0007375449657398232, "loss": 0.7304, "step": 16080 }, { "epoch": 0.816590727367339, "grad_norm": 0.028992296505568917, "learning_rate": 0.0007373500134824224, "loss": 0.6554, "step": 16085 }, { "epoch": 0.8168445634653704, "grad_norm": 0.03248645116573712, "learning_rate": 0.0007371550146343976, "loss": 0.6951, "step": 16090 }, { "epoch": 0.8170983995634019, "grad_norm": 0.02853802971719601, "learning_rate": 0.0007369599692340261, "loss": 0.6257, "step": 16095 }, { "epoch": 0.8173522356614334, "grad_norm": 0.03589114134736362, "learning_rate": 0.0007367648773195942, "loss": 0.6803, "step": 16100 }, { "epoch": 0.8176060717594649, "grad_norm": 0.03216437847312991, "learning_rate": 0.000736569738929398, "loss": 0.6595, "step": 16105 }, { "epoch": 0.8178599078574964, "grad_norm": 0.03126663004686734, "learning_rate": 0.0007363745541017415, "loss": 0.6748, "step": 16110 }, { "epoch": 0.8181137439555279, "grad_norm": 0.029820091383118273, "learning_rate": 0.0007361793228749387, "loss": 0.7184, "step": 16115 }, { "epoch": 0.8183675800535594, "grad_norm": 0.031800800947982946, "learning_rate": 0.0007359840452873129, "loss": 0.6855, "step": 16120 }, { "epoch": 0.8186214161515909, "grad_norm": 0.03201885403939605, "learning_rate": 0.0007357887213771958, "loss": 0.6817, "step": 16125 }, { "epoch": 0.8188752522496224, "grad_norm": 0.03558457401839023, "learning_rate": 0.0007355933511829286, "loss": 0.6995, "step": 16130 }, { "epoch": 0.819129088347654, "grad_norm": 0.03130940333239788, "learning_rate": 0.0007353979347428614, "loss": 0.6464, "step": 16135 }, { "epoch": 0.8193829244456854, "grad_norm": 0.030344268825989663, "learning_rate": 0.0007352024720953536, "loss": 0.6489, "step": 16140 }, { "epoch": 0.8196367605437169, "grad_norm": 0.027787337130435893, "learning_rate": 0.0007350069632787734, "loss": 0.6302, "step": 16145 }, { "epoch": 0.8198905966417485, "grad_norm": 0.034075067025413984, "learning_rate": 0.0007348114083314984, "loss": 0.6841, "step": 16150 }, { "epoch": 0.8201444327397799, "grad_norm": 0.03037755602912441, "learning_rate": 0.0007346158072919149, "loss": 0.6928, "step": 16155 }, { "epoch": 0.8203982688378114, "grad_norm": 0.030829309195356074, "learning_rate": 0.0007344201601984185, "loss": 0.6958, "step": 16160 }, { "epoch": 0.820652104935843, "grad_norm": 0.03307063510465378, "learning_rate": 0.0007342244670894136, "loss": 0.6625, "step": 16165 }, { "epoch": 0.8209059410338744, "grad_norm": 0.03172654512214787, "learning_rate": 0.000734028728003314, "loss": 0.6754, "step": 16170 }, { "epoch": 0.8211597771319059, "grad_norm": 0.029464224343398213, "learning_rate": 0.000733832942978542, "loss": 0.6746, "step": 16175 }, { "epoch": 0.8214136132299374, "grad_norm": 0.02813963689443119, "learning_rate": 0.0007336371120535295, "loss": 0.6852, "step": 16180 }, { "epoch": 0.821667449327969, "grad_norm": 0.028665055731721584, "learning_rate": 0.0007334412352667173, "loss": 0.7455, "step": 16185 }, { "epoch": 0.8219212854260004, "grad_norm": 0.1981540515064532, "learning_rate": 0.0007332453126565545, "loss": 0.6665, "step": 16190 }, { "epoch": 0.8221751215240319, "grad_norm": 0.029341358626818397, "learning_rate": 0.0007330493442615, "loss": 0.6569, "step": 16195 }, { "epoch": 0.8224289576220635, "grad_norm": 0.029653228031913177, "learning_rate": 0.0007328533301200216, "loss": 0.6608, "step": 16200 }, { "epoch": 0.8226827937200949, "grad_norm": 0.03003799557657544, "learning_rate": 0.0007326572702705958, "loss": 0.6777, "step": 16205 }, { "epoch": 0.8229366298181264, "grad_norm": 0.03028550883959852, "learning_rate": 0.0007324611647517078, "loss": 0.6647, "step": 16210 }, { "epoch": 0.823190465916158, "grad_norm": 0.03096525080337996, "learning_rate": 0.0007322650136018527, "loss": 0.6931, "step": 16215 }, { "epoch": 0.8234443020141894, "grad_norm": 0.03432626327374171, "learning_rate": 0.0007320688168595338, "loss": 0.7155, "step": 16220 }, { "epoch": 0.8236981381122209, "grad_norm": 0.05945096622907068, "learning_rate": 0.0007318725745632632, "loss": 0.6585, "step": 16225 }, { "epoch": 0.8239519742102525, "grad_norm": 0.028010810625557787, "learning_rate": 0.0007316762867515627, "loss": 0.6712, "step": 16230 }, { "epoch": 0.824205810308284, "grad_norm": 0.03507288890845984, "learning_rate": 0.0007314799534629625, "loss": 0.741, "step": 16235 }, { "epoch": 0.8244596464063154, "grad_norm": 0.02823296119210229, "learning_rate": 0.0007312835747360018, "loss": 0.6653, "step": 16240 }, { "epoch": 0.824713482504347, "grad_norm": 0.029300031364511633, "learning_rate": 0.0007310871506092287, "loss": 0.646, "step": 16245 }, { "epoch": 0.8249673186023785, "grad_norm": 0.028869173069306656, "learning_rate": 0.0007308906811212004, "loss": 0.7054, "step": 16250 }, { "epoch": 0.8252211547004099, "grad_norm": 0.030167458947338896, "learning_rate": 0.000730694166310483, "loss": 0.6682, "step": 16255 }, { "epoch": 0.8254749907984414, "grad_norm": 0.03491054244530242, "learning_rate": 0.0007304976062156512, "loss": 0.6868, "step": 16260 }, { "epoch": 0.825728826896473, "grad_norm": 0.0330692684271076, "learning_rate": 0.0007303010008752886, "loss": 0.6757, "step": 16265 }, { "epoch": 0.8259826629945044, "grad_norm": 0.03127104168737178, "learning_rate": 0.0007301043503279881, "loss": 0.7088, "step": 16270 }, { "epoch": 0.8262364990925359, "grad_norm": 0.03171828059579301, "learning_rate": 0.0007299076546123512, "loss": 0.6781, "step": 16275 }, { "epoch": 0.8264903351905675, "grad_norm": 0.03337434010350327, "learning_rate": 0.0007297109137669882, "loss": 0.6695, "step": 16280 }, { "epoch": 0.826744171288599, "grad_norm": 0.028704116315093828, "learning_rate": 0.0007295141278305185, "loss": 0.6685, "step": 16285 }, { "epoch": 0.8269980073866304, "grad_norm": 0.030912775279227672, "learning_rate": 0.0007293172968415701, "loss": 0.6905, "step": 16290 }, { "epoch": 0.827251843484662, "grad_norm": 0.031434387284452026, "learning_rate": 0.0007291204208387798, "loss": 0.689, "step": 16295 }, { "epoch": 0.8275056795826935, "grad_norm": 0.02863628156385919, "learning_rate": 0.0007289234998607935, "loss": 0.6441, "step": 16300 }, { "epoch": 0.8277595156807249, "grad_norm": 0.03861924185488775, "learning_rate": 0.000728726533946266, "loss": 0.6884, "step": 16305 }, { "epoch": 0.8280133517787565, "grad_norm": 0.03162049270355201, "learning_rate": 0.0007285295231338605, "loss": 0.6656, "step": 16310 }, { "epoch": 0.828267187876788, "grad_norm": 0.03370242971869153, "learning_rate": 0.0007283324674622491, "loss": 0.706, "step": 16315 }, { "epoch": 0.8285210239748194, "grad_norm": 0.03358516107993129, "learning_rate": 0.0007281353669701131, "loss": 0.7083, "step": 16320 }, { "epoch": 0.828774860072851, "grad_norm": 0.031938319347864425, "learning_rate": 0.0007279382216961426, "loss": 0.6748, "step": 16325 }, { "epoch": 0.8290286961708825, "grad_norm": 0.03197962851123186, "learning_rate": 0.0007277410316790355, "loss": 0.6886, "step": 16330 }, { "epoch": 0.8292825322689139, "grad_norm": 0.03123318696204382, "learning_rate": 0.0007275437969574999, "loss": 0.6782, "step": 16335 }, { "epoch": 0.8295363683669454, "grad_norm": 0.029452111083884572, "learning_rate": 0.0007273465175702515, "loss": 0.6576, "step": 16340 }, { "epoch": 0.829790204464977, "grad_norm": 0.03408922013508775, "learning_rate": 0.0007271491935560155, "loss": 0.7194, "step": 16345 }, { "epoch": 0.8300440405630085, "grad_norm": 0.028991293911504023, "learning_rate": 0.0007269518249535256, "loss": 0.641, "step": 16350 }, { "epoch": 0.8302978766610399, "grad_norm": 0.031120891632061186, "learning_rate": 0.0007267544118015243, "loss": 0.6761, "step": 16355 }, { "epoch": 0.8305517127590715, "grad_norm": 0.03083518373057741, "learning_rate": 0.0007265569541387628, "loss": 0.692, "step": 16360 }, { "epoch": 0.830805548857103, "grad_norm": 0.027851804571404636, "learning_rate": 0.0007263594520040011, "loss": 0.6712, "step": 16365 }, { "epoch": 0.8310593849551344, "grad_norm": 0.03128752193439832, "learning_rate": 0.0007261619054360078, "loss": 0.6683, "step": 16370 }, { "epoch": 0.831313221053166, "grad_norm": 0.038714624206284236, "learning_rate": 0.0007259643144735603, "loss": 0.676, "step": 16375 }, { "epoch": 0.8315670571511975, "grad_norm": 0.03613242701148452, "learning_rate": 0.0007257666791554447, "loss": 0.6979, "step": 16380 }, { "epoch": 0.8318208932492289, "grad_norm": 0.033054393450377466, "learning_rate": 0.0007255689995204559, "loss": 0.6879, "step": 16385 }, { "epoch": 0.8320747293472605, "grad_norm": 0.03213378286027016, "learning_rate": 0.0007253712756073973, "loss": 0.7084, "step": 16390 }, { "epoch": 0.832328565445292, "grad_norm": 0.02921282672802365, "learning_rate": 0.0007251735074550815, "loss": 0.6415, "step": 16395 }, { "epoch": 0.8325824015433235, "grad_norm": 0.03235225144120135, "learning_rate": 0.000724975695102329, "loss": 0.6855, "step": 16400 }, { "epoch": 0.832836237641355, "grad_norm": 0.03203915105239164, "learning_rate": 0.0007247778385879695, "loss": 0.6754, "step": 16405 }, { "epoch": 0.8330900737393865, "grad_norm": 0.02906859145474365, "learning_rate": 0.0007245799379508412, "loss": 0.6665, "step": 16410 }, { "epoch": 0.833343909837418, "grad_norm": 0.029026529775904347, "learning_rate": 0.000724381993229791, "loss": 0.6457, "step": 16415 }, { "epoch": 0.8335977459354494, "grad_norm": 0.030452424299970846, "learning_rate": 0.0007241840044636747, "loss": 0.7143, "step": 16420 }, { "epoch": 0.833851582033481, "grad_norm": 0.030534920710923563, "learning_rate": 0.0007239859716913562, "loss": 0.6694, "step": 16425 }, { "epoch": 0.8341054181315125, "grad_norm": 0.0331051482801569, "learning_rate": 0.0007237878949517085, "loss": 0.685, "step": 16430 }, { "epoch": 0.8343592542295439, "grad_norm": 0.028588957509579786, "learning_rate": 0.0007235897742836131, "loss": 0.7085, "step": 16435 }, { "epoch": 0.8346130903275755, "grad_norm": 0.03128835620801525, "learning_rate": 0.00072339160972596, "loss": 0.6731, "step": 16440 }, { "epoch": 0.834866926425607, "grad_norm": 0.03299836734544922, "learning_rate": 0.000723193401317648, "loss": 0.7085, "step": 16445 }, { "epoch": 0.8351207625236385, "grad_norm": 0.029686298190558954, "learning_rate": 0.0007229951490975844, "loss": 0.6633, "step": 16450 }, { "epoch": 0.83537459862167, "grad_norm": 0.032208152381701445, "learning_rate": 0.000722796853104685, "loss": 0.6582, "step": 16455 }, { "epoch": 0.8356284347197015, "grad_norm": 0.031854167249358085, "learning_rate": 0.0007225985133778745, "loss": 0.6661, "step": 16460 }, { "epoch": 0.835882270817733, "grad_norm": 0.028599616106104963, "learning_rate": 0.0007224001299560859, "loss": 0.6847, "step": 16465 }, { "epoch": 0.8361361069157645, "grad_norm": 0.027728978272126014, "learning_rate": 0.000722201702878261, "loss": 0.6376, "step": 16470 }, { "epoch": 0.836389943013796, "grad_norm": 0.03186716426610514, "learning_rate": 0.0007220032321833498, "loss": 0.6894, "step": 16475 }, { "epoch": 0.8366437791118275, "grad_norm": 0.06405925267761614, "learning_rate": 0.0007218047179103112, "loss": 0.68, "step": 16480 }, { "epoch": 0.836897615209859, "grad_norm": 0.03794995451618184, "learning_rate": 0.0007216061600981128, "loss": 0.6801, "step": 16485 }, { "epoch": 0.8371514513078905, "grad_norm": 0.034780794758989306, "learning_rate": 0.0007214075587857302, "loss": 0.6302, "step": 16490 }, { "epoch": 0.837405287405922, "grad_norm": 0.0978104675753311, "learning_rate": 0.0007212089140121481, "loss": 0.6379, "step": 16495 }, { "epoch": 0.8376591235039536, "grad_norm": 0.034205788936002485, "learning_rate": 0.0007210102258163592, "loss": 0.6557, "step": 16500 }, { "epoch": 0.837912959601985, "grad_norm": 0.037401578356713115, "learning_rate": 0.0007208114942373651, "loss": 0.6703, "step": 16505 }, { "epoch": 0.8381667957000165, "grad_norm": 0.029741772819327196, "learning_rate": 0.0007206127193141761, "loss": 0.6422, "step": 16510 }, { "epoch": 0.838420631798048, "grad_norm": 0.04207207973848489, "learning_rate": 0.0007204139010858103, "loss": 0.7033, "step": 16515 }, { "epoch": 0.8386744678960795, "grad_norm": 0.034569624187278714, "learning_rate": 0.0007202150395912949, "loss": 0.679, "step": 16520 }, { "epoch": 0.838928303994111, "grad_norm": 0.03442285241824819, "learning_rate": 0.0007200161348696655, "loss": 0.6711, "step": 16525 }, { "epoch": 0.8391821400921425, "grad_norm": 0.03475072337304419, "learning_rate": 0.0007198171869599662, "loss": 0.6995, "step": 16530 }, { "epoch": 0.839435976190174, "grad_norm": 0.034018730390661854, "learning_rate": 0.0007196181959012491, "loss": 0.7045, "step": 16535 }, { "epoch": 0.8396898122882055, "grad_norm": 0.029012298866997355, "learning_rate": 0.0007194191617325755, "loss": 0.7043, "step": 16540 }, { "epoch": 0.839943648386237, "grad_norm": 0.043587266643652506, "learning_rate": 0.0007192200844930147, "loss": 0.6028, "step": 16545 }, { "epoch": 0.8401974844842685, "grad_norm": 0.029555288217304864, "learning_rate": 0.0007190209642216445, "loss": 0.7068, "step": 16550 }, { "epoch": 0.8404513205823, "grad_norm": 0.030530875257094955, "learning_rate": 0.0007188218009575514, "loss": 0.6826, "step": 16555 }, { "epoch": 0.8407051566803315, "grad_norm": 0.031618717696415666, "learning_rate": 0.0007186225947398298, "loss": 0.6915, "step": 16560 }, { "epoch": 0.8409589927783631, "grad_norm": 0.04095810139821996, "learning_rate": 0.0007184233456075833, "loss": 0.7699, "step": 16565 }, { "epoch": 0.8412128288763945, "grad_norm": 0.04105325135171553, "learning_rate": 0.0007182240535999232, "loss": 0.679, "step": 16570 }, { "epoch": 0.841466664974426, "grad_norm": 0.035991864396086794, "learning_rate": 0.0007180247187559697, "loss": 0.6794, "step": 16575 }, { "epoch": 0.8417205010724575, "grad_norm": 0.033842128599381655, "learning_rate": 0.0007178253411148513, "loss": 0.7518, "step": 16580 }, { "epoch": 0.841974337170489, "grad_norm": 0.03670954903656268, "learning_rate": 0.0007176259207157048, "loss": 0.6644, "step": 16585 }, { "epoch": 0.8422281732685205, "grad_norm": 0.03177517213140897, "learning_rate": 0.0007174264575976752, "loss": 0.6595, "step": 16590 }, { "epoch": 0.842482009366552, "grad_norm": 0.03251075378797236, "learning_rate": 0.0007172269517999163, "loss": 0.6824, "step": 16595 }, { "epoch": 0.8427358454645835, "grad_norm": 0.030917853363449262, "learning_rate": 0.00071702740336159, "loss": 0.6793, "step": 16600 }, { "epoch": 0.842989681562615, "grad_norm": 0.036168514127805945, "learning_rate": 0.0007168278123218667, "loss": 0.6744, "step": 16605 }, { "epoch": 0.8432435176606465, "grad_norm": 0.032269262957677354, "learning_rate": 0.0007166281787199253, "loss": 0.6708, "step": 16610 }, { "epoch": 0.8434973537586781, "grad_norm": 0.03207943368479865, "learning_rate": 0.0007164285025949528, "loss": 0.719, "step": 16615 }, { "epoch": 0.8437511898567095, "grad_norm": 0.03181280183265756, "learning_rate": 0.0007162287839861445, "loss": 0.6806, "step": 16620 }, { "epoch": 0.844005025954741, "grad_norm": 0.029778928801827444, "learning_rate": 0.0007160290229327042, "loss": 0.6692, "step": 16625 }, { "epoch": 0.8442588620527726, "grad_norm": 0.03245500853418771, "learning_rate": 0.000715829219473844, "loss": 0.6909, "step": 16630 }, { "epoch": 0.844512698150804, "grad_norm": 0.02906823219374587, "learning_rate": 0.0007156293736487844, "loss": 0.6465, "step": 16635 }, { "epoch": 0.8447665342488355, "grad_norm": 0.029992343015578748, "learning_rate": 0.0007154294854967541, "loss": 0.6905, "step": 16640 }, { "epoch": 0.8450203703468671, "grad_norm": 0.03498100067718652, "learning_rate": 0.0007152295550569902, "loss": 0.6604, "step": 16645 }, { "epoch": 0.8452742064448985, "grad_norm": 0.03184440339114562, "learning_rate": 0.0007150295823687379, "loss": 0.6896, "step": 16650 }, { "epoch": 0.84552804254293, "grad_norm": 0.02939561871814488, "learning_rate": 0.000714829567471251, "loss": 0.6765, "step": 16655 }, { "epoch": 0.8457818786409615, "grad_norm": 0.030784532302951844, "learning_rate": 0.0007146295104037914, "loss": 0.685, "step": 16660 }, { "epoch": 0.8460357147389931, "grad_norm": 0.03081945881663194, "learning_rate": 0.0007144294112056292, "loss": 0.6977, "step": 16665 }, { "epoch": 0.8462895508370245, "grad_norm": 0.034660075847437265, "learning_rate": 0.000714229269916043, "loss": 0.6857, "step": 16670 }, { "epoch": 0.846543386935056, "grad_norm": 0.16488743222225918, "learning_rate": 0.0007140290865743194, "loss": 0.7073, "step": 16675 }, { "epoch": 0.8467972230330876, "grad_norm": 0.1760267766432886, "learning_rate": 0.0007138288612197534, "loss": 0.6771, "step": 16680 }, { "epoch": 0.847051059131119, "grad_norm": 0.10468181367541479, "learning_rate": 0.0007136285938916484, "loss": 0.6557, "step": 16685 }, { "epoch": 0.8473048952291505, "grad_norm": 0.03634231903491674, "learning_rate": 0.0007134282846293157, "loss": 0.6743, "step": 16690 }, { "epoch": 0.8475587313271821, "grad_norm": 0.04165911017662932, "learning_rate": 0.0007132279334720751, "loss": 0.7089, "step": 16695 }, { "epoch": 0.8478125674252135, "grad_norm": 0.03265728139223375, "learning_rate": 0.0007130275404592547, "loss": 0.6937, "step": 16700 }, { "epoch": 0.848066403523245, "grad_norm": 0.03070692342990863, "learning_rate": 0.0007128271056301902, "loss": 0.6604, "step": 16705 }, { "epoch": 0.8483202396212766, "grad_norm": 0.02843916937833062, "learning_rate": 0.0007126266290242264, "loss": 0.6583, "step": 16710 }, { "epoch": 0.8485740757193081, "grad_norm": 0.030909940966102837, "learning_rate": 0.0007124261106807158, "loss": 0.6488, "step": 16715 }, { "epoch": 0.8488279118173395, "grad_norm": 0.03067845134344279, "learning_rate": 0.0007122255506390188, "loss": 0.6648, "step": 16720 }, { "epoch": 0.849081747915371, "grad_norm": 0.03263411886510651, "learning_rate": 0.0007120249489385048, "loss": 0.6988, "step": 16725 }, { "epoch": 0.8493355840134026, "grad_norm": 0.03133327104199551, "learning_rate": 0.0007118243056185505, "loss": 0.6895, "step": 16730 }, { "epoch": 0.849589420111434, "grad_norm": 0.03697827924050908, "learning_rate": 0.0007116236207185414, "loss": 0.6837, "step": 16735 }, { "epoch": 0.8498432562094655, "grad_norm": 0.03446930552836637, "learning_rate": 0.0007114228942778711, "loss": 0.6527, "step": 16740 }, { "epoch": 0.8500970923074971, "grad_norm": 0.030478097471144466, "learning_rate": 0.0007112221263359408, "loss": 0.6572, "step": 16745 }, { "epoch": 0.8503509284055285, "grad_norm": 0.047020986963279644, "learning_rate": 0.0007110213169321606, "loss": 0.7437, "step": 16750 }, { "epoch": 0.85060476450356, "grad_norm": 0.037999446153579954, "learning_rate": 0.0007108204661059482, "loss": 0.6987, "step": 16755 }, { "epoch": 0.8508586006015916, "grad_norm": 0.029685152674449145, "learning_rate": 0.0007106195738967296, "loss": 0.6775, "step": 16760 }, { "epoch": 0.851112436699623, "grad_norm": 0.08680041337309971, "learning_rate": 0.0007104186403439391, "loss": 0.7133, "step": 16765 }, { "epoch": 0.8513662727976545, "grad_norm": 0.07123868403631593, "learning_rate": 0.0007102176654870189, "loss": 0.6891, "step": 16770 }, { "epoch": 0.8516201088956861, "grad_norm": 0.11482058174956565, "learning_rate": 0.0007100166493654192, "loss": 0.7115, "step": 16775 }, { "epoch": 0.8518739449937176, "grad_norm": 0.05355578326306851, "learning_rate": 0.0007098155920185987, "loss": 0.6941, "step": 16780 }, { "epoch": 0.852127781091749, "grad_norm": 0.04528290439600834, "learning_rate": 0.0007096144934860237, "loss": 0.6877, "step": 16785 }, { "epoch": 0.8523816171897806, "grad_norm": 0.03581830944527217, "learning_rate": 0.0007094133538071691, "loss": 0.6583, "step": 16790 }, { "epoch": 0.8526354532878121, "grad_norm": 0.03860688913646833, "learning_rate": 0.0007092121730215174, "loss": 0.7281, "step": 16795 }, { "epoch": 0.8528892893858435, "grad_norm": 0.03682279289808468, "learning_rate": 0.0007090109511685595, "loss": 0.7026, "step": 16800 }, { "epoch": 0.853143125483875, "grad_norm": 0.06403486849547414, "learning_rate": 0.0007088096882877942, "loss": 0.7403, "step": 16805 }, { "epoch": 0.8533969615819066, "grad_norm": 0.031005257467468673, "learning_rate": 0.0007086083844187284, "loss": 0.6533, "step": 16810 }, { "epoch": 0.853650797679938, "grad_norm": 0.039977649085635746, "learning_rate": 0.0007084070396008771, "loss": 0.7149, "step": 16815 }, { "epoch": 0.8539046337779695, "grad_norm": 0.03204867903899664, "learning_rate": 0.0007082056538737633, "loss": 0.6487, "step": 16820 }, { "epoch": 0.8541584698760011, "grad_norm": 0.031458640994534916, "learning_rate": 0.0007080042272769179, "loss": 0.683, "step": 16825 }, { "epoch": 0.8544123059740326, "grad_norm": 0.03018359973064647, "learning_rate": 0.0007078027598498801, "loss": 0.6816, "step": 16830 }, { "epoch": 0.854666142072064, "grad_norm": 0.03153228816515555, "learning_rate": 0.0007076012516321968, "loss": 0.719, "step": 16835 }, { "epoch": 0.8549199781700956, "grad_norm": 0.39012589725249674, "learning_rate": 0.0007073997026634229, "loss": 0.7037, "step": 16840 }, { "epoch": 0.8551738142681271, "grad_norm": 0.032764417315952886, "learning_rate": 0.000707198112983122, "loss": 0.6357, "step": 16845 }, { "epoch": 0.8554276503661585, "grad_norm": 0.030337370911392353, "learning_rate": 0.0007069964826308646, "loss": 0.679, "step": 16850 }, { "epoch": 0.8556814864641901, "grad_norm": 0.033710480472829964, "learning_rate": 0.00070679481164623, "loss": 0.6713, "step": 16855 }, { "epoch": 0.8559353225622216, "grad_norm": 0.03784966233528648, "learning_rate": 0.0007065931000688053, "loss": 0.6308, "step": 16860 }, { "epoch": 0.856189158660253, "grad_norm": 0.033804800402821356, "learning_rate": 0.0007063913479381851, "loss": 0.6991, "step": 16865 }, { "epoch": 0.8564429947582846, "grad_norm": 0.03503773170185298, "learning_rate": 0.0007061895552939727, "loss": 0.6839, "step": 16870 }, { "epoch": 0.8566968308563161, "grad_norm": 0.02919657998421449, "learning_rate": 0.0007059877221757789, "loss": 0.6783, "step": 16875 }, { "epoch": 0.8569506669543476, "grad_norm": 0.029494304438265524, "learning_rate": 0.0007057858486232224, "loss": 0.6466, "step": 16880 }, { "epoch": 0.857204503052379, "grad_norm": 0.02833169659662335, "learning_rate": 0.00070558393467593, "loss": 0.6368, "step": 16885 }, { "epoch": 0.8574583391504106, "grad_norm": 0.029924744695976047, "learning_rate": 0.0007053819803735367, "loss": 0.6446, "step": 16890 }, { "epoch": 0.8577121752484421, "grad_norm": 0.029456925138482137, "learning_rate": 0.0007051799857556848, "loss": 0.6453, "step": 16895 }, { "epoch": 0.8579660113464735, "grad_norm": 0.0300366762551071, "learning_rate": 0.0007049779508620248, "loss": 0.6901, "step": 16900 }, { "epoch": 0.8582198474445051, "grad_norm": 0.028805481235224542, "learning_rate": 0.0007047758757322155, "loss": 0.6734, "step": 16905 }, { "epoch": 0.8584736835425366, "grad_norm": 0.02943461469974667, "learning_rate": 0.0007045737604059228, "loss": 0.7041, "step": 16910 }, { "epoch": 0.858727519640568, "grad_norm": 0.03134457133311643, "learning_rate": 0.0007043716049228212, "loss": 0.6885, "step": 16915 }, { "epoch": 0.8589813557385996, "grad_norm": 0.0308510601891844, "learning_rate": 0.0007041694093225929, "loss": 0.6657, "step": 16920 }, { "epoch": 0.8592351918366311, "grad_norm": 0.028792970227950025, "learning_rate": 0.0007039671736449275, "loss": 0.6603, "step": 16925 }, { "epoch": 0.8594890279346626, "grad_norm": 0.030583668293161156, "learning_rate": 0.0007037648979295232, "loss": 0.6436, "step": 16930 }, { "epoch": 0.8597428640326941, "grad_norm": 0.02863976978307729, "learning_rate": 0.0007035625822160856, "loss": 0.6566, "step": 16935 }, { "epoch": 0.8599967001307256, "grad_norm": 0.032920490961818955, "learning_rate": 0.0007033602265443284, "loss": 0.6778, "step": 16940 }, { "epoch": 0.8602505362287571, "grad_norm": 0.027545825740431844, "learning_rate": 0.0007031578309539728, "loss": 0.7115, "step": 16945 }, { "epoch": 0.8605043723267886, "grad_norm": 0.031086335328479372, "learning_rate": 0.000702955395484748, "loss": 0.7094, "step": 16950 }, { "epoch": 0.8607582084248201, "grad_norm": 0.0313125866069602, "learning_rate": 0.0007027529201763913, "loss": 0.7056, "step": 16955 }, { "epoch": 0.8610120445228516, "grad_norm": 0.029787273019815826, "learning_rate": 0.0007025504050686475, "loss": 0.6913, "step": 16960 }, { "epoch": 0.861265880620883, "grad_norm": 0.032568804305412855, "learning_rate": 0.0007023478502012694, "loss": 0.6983, "step": 16965 }, { "epoch": 0.8615197167189146, "grad_norm": 0.030245483537861925, "learning_rate": 0.0007021452556140173, "loss": 0.682, "step": 16970 }, { "epoch": 0.8617735528169461, "grad_norm": 0.029642315429883367, "learning_rate": 0.0007019426213466597, "loss": 0.6739, "step": 16975 }, { "epoch": 0.8620273889149775, "grad_norm": 0.032832376067387654, "learning_rate": 0.0007017399474389725, "loss": 0.6509, "step": 16980 }, { "epoch": 0.8622812250130091, "grad_norm": 0.029590067456531883, "learning_rate": 0.0007015372339307398, "loss": 0.6446, "step": 16985 }, { "epoch": 0.8625350611110406, "grad_norm": 0.43928065056253346, "learning_rate": 0.000701334480861753, "loss": 0.6766, "step": 16990 }, { "epoch": 0.8627888972090721, "grad_norm": 0.033700414095634845, "learning_rate": 0.0007011316882718119, "loss": 0.6427, "step": 16995 }, { "epoch": 0.8630427333071036, "grad_norm": 0.027598585795965462, "learning_rate": 0.0007009288562007232, "loss": 0.639, "step": 17000 }, { "epoch": 0.8632965694051351, "grad_norm": 0.029569627120860153, "learning_rate": 0.0007007259846883022, "loss": 0.6646, "step": 17005 }, { "epoch": 0.8635504055031666, "grad_norm": 0.0360095720473622, "learning_rate": 0.0007005230737743714, "loss": 0.6632, "step": 17010 }, { "epoch": 0.8638042416011981, "grad_norm": 0.03421495475956032, "learning_rate": 0.0007003201234987612, "loss": 0.6858, "step": 17015 }, { "epoch": 0.8640580776992296, "grad_norm": 0.03614113803350267, "learning_rate": 0.0007001171339013097, "loss": 0.6484, "step": 17020 }, { "epoch": 0.8643119137972611, "grad_norm": 0.035442514085163176, "learning_rate": 0.0006999141050218628, "loss": 0.6928, "step": 17025 }, { "epoch": 0.8645657498952926, "grad_norm": 0.03186791928669632, "learning_rate": 0.0006997110369002742, "loss": 0.6688, "step": 17030 }, { "epoch": 0.8648195859933241, "grad_norm": 0.029045683177750633, "learning_rate": 0.0006995079295764048, "loss": 0.6656, "step": 17035 }, { "epoch": 0.8650734220913556, "grad_norm": 1.3421404993875752, "learning_rate": 0.000699304783090124, "loss": 0.6779, "step": 17040 }, { "epoch": 0.8653272581893872, "grad_norm": 0.06988566429965536, "learning_rate": 0.0006991015974813081, "loss": 0.704, "step": 17045 }, { "epoch": 0.8655810942874186, "grad_norm": 0.04260580244778527, "learning_rate": 0.0006988983727898414, "loss": 0.6604, "step": 17050 }, { "epoch": 0.8658349303854501, "grad_norm": 0.03439477419752089, "learning_rate": 0.0006986951090556161, "loss": 0.6626, "step": 17055 }, { "epoch": 0.8660887664834817, "grad_norm": 0.03366322744732173, "learning_rate": 0.0006984918063185319, "loss": 0.6997, "step": 17060 }, { "epoch": 0.8663426025815131, "grad_norm": 0.030920534566228515, "learning_rate": 0.0006982884646184959, "loss": 0.6576, "step": 17065 }, { "epoch": 0.8665964386795446, "grad_norm": 0.03392787098324168, "learning_rate": 0.0006980850839954232, "loss": 0.6807, "step": 17070 }, { "epoch": 0.8668502747775761, "grad_norm": 0.03443914185054174, "learning_rate": 0.0006978816644892364, "loss": 0.6795, "step": 17075 }, { "epoch": 0.8671041108756076, "grad_norm": 0.03174976687616872, "learning_rate": 0.0006976782061398657, "loss": 0.7016, "step": 17080 }, { "epoch": 0.8673579469736391, "grad_norm": 0.033654615542026936, "learning_rate": 0.0006974747089872488, "loss": 0.7012, "step": 17085 }, { "epoch": 0.8676117830716706, "grad_norm": 0.03245855937871226, "learning_rate": 0.0006972711730713315, "loss": 0.689, "step": 17090 }, { "epoch": 0.8678656191697022, "grad_norm": 0.029959742253799267, "learning_rate": 0.0006970675984320667, "loss": 0.6679, "step": 17095 }, { "epoch": 0.8681194552677336, "grad_norm": 0.03001308257432217, "learning_rate": 0.000696863985109415, "loss": 0.677, "step": 17100 }, { "epoch": 0.8683732913657651, "grad_norm": 0.03288048589173484, "learning_rate": 0.0006966603331433447, "loss": 0.72, "step": 17105 }, { "epoch": 0.8686271274637967, "grad_norm": 0.027978606085364625, "learning_rate": 0.0006964566425738321, "loss": 0.6488, "step": 17110 }, { "epoch": 0.8688809635618281, "grad_norm": 0.02982272783435597, "learning_rate": 0.0006962529134408599, "loss": 0.7038, "step": 17115 }, { "epoch": 0.8691347996598596, "grad_norm": 0.03108258782121802, "learning_rate": 0.0006960491457844198, "loss": 0.6624, "step": 17120 }, { "epoch": 0.8693886357578912, "grad_norm": 0.03123821785681963, "learning_rate": 0.00069584533964451, "loss": 0.676, "step": 17125 }, { "epoch": 0.8696424718559226, "grad_norm": 0.031570728981819264, "learning_rate": 0.0006956414950611366, "loss": 0.651, "step": 17130 }, { "epoch": 0.8698963079539541, "grad_norm": 0.03085231579433253, "learning_rate": 0.0006954376120743136, "loss": 0.7118, "step": 17135 }, { "epoch": 0.8701501440519857, "grad_norm": 0.02919233964311636, "learning_rate": 0.0006952336907240616, "loss": 0.6421, "step": 17140 }, { "epoch": 0.8704039801500172, "grad_norm": 0.034461636439042785, "learning_rate": 0.00069502973105041, "loss": 0.6642, "step": 17145 }, { "epoch": 0.8706578162480486, "grad_norm": 0.031569263629491175, "learning_rate": 0.0006948257330933948, "loss": 0.6943, "step": 17150 }, { "epoch": 0.8709116523460801, "grad_norm": 0.030591200076039493, "learning_rate": 0.0006946216968930598, "loss": 0.6921, "step": 17155 }, { "epoch": 0.8711654884441117, "grad_norm": 0.03509054057608262, "learning_rate": 0.0006944176224894563, "loss": 0.6985, "step": 17160 }, { "epoch": 0.8714193245421431, "grad_norm": 0.028518552469373848, "learning_rate": 0.000694213509922643, "loss": 0.6997, "step": 17165 }, { "epoch": 0.8716731606401746, "grad_norm": 0.029845359560581437, "learning_rate": 0.0006940093592326861, "loss": 0.6842, "step": 17170 }, { "epoch": 0.8719269967382062, "grad_norm": 0.031067264634425062, "learning_rate": 0.0006938051704596598, "loss": 0.7115, "step": 17175 }, { "epoch": 0.8721808328362376, "grad_norm": 0.030396262379911845, "learning_rate": 0.0006936009436436448, "loss": 0.6632, "step": 17180 }, { "epoch": 0.8724346689342691, "grad_norm": 0.03174945729618797, "learning_rate": 0.0006933966788247302, "loss": 0.6775, "step": 17185 }, { "epoch": 0.8726885050323007, "grad_norm": 0.03019811021428771, "learning_rate": 0.000693192376043012, "loss": 0.6755, "step": 17190 }, { "epoch": 0.8729423411303322, "grad_norm": 0.037576502980376576, "learning_rate": 0.0006929880353385938, "loss": 0.652, "step": 17195 }, { "epoch": 0.8731961772283636, "grad_norm": 0.03641602277230229, "learning_rate": 0.0006927836567515866, "loss": 0.7098, "step": 17200 }, { "epoch": 0.8734500133263952, "grad_norm": 0.033566751921166926, "learning_rate": 0.0006925792403221091, "loss": 0.682, "step": 17205 }, { "epoch": 0.8737038494244267, "grad_norm": 0.04188905925661803, "learning_rate": 0.0006923747860902871, "loss": 0.6953, "step": 17210 }, { "epoch": 0.8739576855224581, "grad_norm": 0.04311685985170262, "learning_rate": 0.000692170294096254, "loss": 0.6993, "step": 17215 }, { "epoch": 0.8742115216204897, "grad_norm": 0.03162642365555913, "learning_rate": 0.0006919657643801504, "loss": 0.6064, "step": 17220 }, { "epoch": 0.8744653577185212, "grad_norm": 0.030046268274625105, "learning_rate": 0.0006917611969821248, "loss": 0.6373, "step": 17225 }, { "epoch": 0.8747191938165526, "grad_norm": 0.02869410344770003, "learning_rate": 0.0006915565919423324, "loss": 0.7032, "step": 17230 }, { "epoch": 0.8749730299145841, "grad_norm": 0.03436583755872421, "learning_rate": 0.0006913519493009363, "loss": 0.6972, "step": 17235 }, { "epoch": 0.8752268660126157, "grad_norm": 0.031251303826750794, "learning_rate": 0.0006911472690981069, "loss": 0.7194, "step": 17240 }, { "epoch": 0.8754807021106471, "grad_norm": 0.029626857870476173, "learning_rate": 0.0006909425513740217, "loss": 0.7111, "step": 17245 }, { "epoch": 0.8757345382086786, "grad_norm": 0.028907838044135804, "learning_rate": 0.000690737796168866, "loss": 0.7031, "step": 17250 }, { "epoch": 0.8759883743067102, "grad_norm": 0.02674155684675277, "learning_rate": 0.0006905330035228321, "loss": 0.6436, "step": 17255 }, { "epoch": 0.8762422104047417, "grad_norm": 0.03656491401880869, "learning_rate": 0.0006903281734761197, "loss": 0.644, "step": 17260 }, { "epoch": 0.8764960465027731, "grad_norm": 0.03012185405116772, "learning_rate": 0.000690123306068936, "loss": 0.6679, "step": 17265 }, { "epoch": 0.8767498826008047, "grad_norm": 0.029954571450951716, "learning_rate": 0.0006899184013414955, "loss": 0.6641, "step": 17270 }, { "epoch": 0.8770037186988362, "grad_norm": 0.02764382657700367, "learning_rate": 0.00068971345933402, "loss": 0.6314, "step": 17275 }, { "epoch": 0.8772575547968676, "grad_norm": 0.03362005819543559, "learning_rate": 0.0006895084800867386, "loss": 0.6631, "step": 17280 }, { "epoch": 0.8775113908948992, "grad_norm": 0.032680705407298144, "learning_rate": 0.0006893034636398875, "loss": 0.6824, "step": 17285 }, { "epoch": 0.8777652269929307, "grad_norm": 0.028105324644329858, "learning_rate": 0.0006890984100337105, "loss": 0.6529, "step": 17290 }, { "epoch": 0.8780190630909621, "grad_norm": 0.030456876573769005, "learning_rate": 0.0006888933193084588, "loss": 0.6862, "step": 17295 }, { "epoch": 0.8782728991889936, "grad_norm": 0.029770525835205643, "learning_rate": 0.0006886881915043905, "loss": 0.6545, "step": 17300 }, { "epoch": 0.8785267352870252, "grad_norm": 0.028027013453591734, "learning_rate": 0.0006884830266617711, "loss": 0.6563, "step": 17305 }, { "epoch": 0.8787805713850567, "grad_norm": 0.03058433541245794, "learning_rate": 0.0006882778248208737, "loss": 0.6295, "step": 17310 }, { "epoch": 0.8790344074830881, "grad_norm": 0.028243903376601872, "learning_rate": 0.000688072586021978, "loss": 0.6961, "step": 17315 }, { "epoch": 0.8792882435811197, "grad_norm": 0.028743402064962766, "learning_rate": 0.0006878673103053717, "loss": 0.6354, "step": 17320 }, { "epoch": 0.8795420796791512, "grad_norm": 0.03091919376530688, "learning_rate": 0.0006876619977113492, "loss": 0.6791, "step": 17325 }, { "epoch": 0.8797959157771826, "grad_norm": 0.030853848899843862, "learning_rate": 0.0006874566482802125, "loss": 0.6745, "step": 17330 }, { "epoch": 0.8800497518752142, "grad_norm": 0.05032398387545476, "learning_rate": 0.0006872512620522707, "loss": 0.6843, "step": 17335 }, { "epoch": 0.8803035879732457, "grad_norm": 0.032868446220507344, "learning_rate": 0.0006870458390678397, "loss": 0.6534, "step": 17340 }, { "epoch": 0.8805574240712771, "grad_norm": 0.02914579637554954, "learning_rate": 0.0006868403793672435, "loss": 0.6863, "step": 17345 }, { "epoch": 0.8808112601693087, "grad_norm": 0.029303598963988122, "learning_rate": 0.0006866348829908125, "loss": 0.6658, "step": 17350 }, { "epoch": 0.8810650962673402, "grad_norm": 0.03221528772302487, "learning_rate": 0.0006864293499788849, "loss": 0.6923, "step": 17355 }, { "epoch": 0.8813189323653717, "grad_norm": 0.029186163370936394, "learning_rate": 0.0006862237803718054, "loss": 0.6814, "step": 17360 }, { "epoch": 0.8815727684634032, "grad_norm": 0.026416794407174236, "learning_rate": 0.0006860181742099266, "loss": 0.654, "step": 17365 }, { "epoch": 0.8818266045614347, "grad_norm": 0.03168227378372621, "learning_rate": 0.0006858125315336079, "loss": 0.6647, "step": 17370 }, { "epoch": 0.8820804406594662, "grad_norm": 0.028830088635083663, "learning_rate": 0.0006856068523832158, "loss": 0.6767, "step": 17375 }, { "epoch": 0.8823342767574976, "grad_norm": 0.0320706462186201, "learning_rate": 0.0006854011367991243, "loss": 0.625, "step": 17380 }, { "epoch": 0.8825881128555292, "grad_norm": 0.030160379253971054, "learning_rate": 0.0006851953848217142, "loss": 0.6761, "step": 17385 }, { "epoch": 0.8828419489535607, "grad_norm": 0.032546851587338915, "learning_rate": 0.0006849895964913737, "loss": 0.6531, "step": 17390 }, { "epoch": 0.8830957850515921, "grad_norm": 0.02879779589968242, "learning_rate": 0.0006847837718484977, "loss": 0.6226, "step": 17395 }, { "epoch": 0.8833496211496237, "grad_norm": 0.02778938944120821, "learning_rate": 0.0006845779109334891, "loss": 0.6534, "step": 17400 }, { "epoch": 0.8836034572476552, "grad_norm": 0.03129437042907462, "learning_rate": 0.0006843720137867569, "loss": 0.6796, "step": 17405 }, { "epoch": 0.8838572933456867, "grad_norm": 0.05369700760265257, "learning_rate": 0.0006841660804487179, "loss": 0.6855, "step": 17410 }, { "epoch": 0.8841111294437182, "grad_norm": 0.028423121772175302, "learning_rate": 0.0006839601109597957, "loss": 0.6462, "step": 17415 }, { "epoch": 0.8843649655417497, "grad_norm": 0.0301807880231965, "learning_rate": 0.0006837541053604213, "loss": 0.7065, "step": 17420 }, { "epoch": 0.8846188016397812, "grad_norm": 0.053648648196981855, "learning_rate": 0.0006835480636910321, "loss": 0.6398, "step": 17425 }, { "epoch": 0.8848726377378127, "grad_norm": 0.04788444668418925, "learning_rate": 0.0006833419859920736, "loss": 0.6735, "step": 17430 }, { "epoch": 0.8851264738358442, "grad_norm": 0.03258481946007635, "learning_rate": 0.0006831358723039976, "loss": 0.6901, "step": 17435 }, { "epoch": 0.8853803099338757, "grad_norm": 0.03119904821569732, "learning_rate": 0.000682929722667263, "loss": 0.6526, "step": 17440 }, { "epoch": 0.8856341460319072, "grad_norm": 0.04110217455509604, "learning_rate": 0.0006827235371223362, "loss": 0.6863, "step": 17445 }, { "epoch": 0.8858879821299387, "grad_norm": 0.03179544082735434, "learning_rate": 0.0006825173157096903, "loss": 0.6724, "step": 17450 }, { "epoch": 0.8861418182279702, "grad_norm": 0.033084037488709986, "learning_rate": 0.0006823110584698055, "loss": 0.6633, "step": 17455 }, { "epoch": 0.8863956543260016, "grad_norm": 0.03102976533067641, "learning_rate": 0.0006821047654431691, "loss": 0.6836, "step": 17460 }, { "epoch": 0.8866494904240332, "grad_norm": 0.028385690258454492, "learning_rate": 0.0006818984366702754, "loss": 0.6407, "step": 17465 }, { "epoch": 0.8869033265220647, "grad_norm": 0.02799403591258303, "learning_rate": 0.0006816920721916259, "loss": 0.6788, "step": 17470 }, { "epoch": 0.8871571626200963, "grad_norm": 0.03058641050635339, "learning_rate": 0.0006814856720477285, "loss": 0.6806, "step": 17475 }, { "epoch": 0.8874109987181277, "grad_norm": 0.03195947794218517, "learning_rate": 0.0006812792362790987, "loss": 0.6785, "step": 17480 }, { "epoch": 0.8876648348161592, "grad_norm": 0.034025506615933725, "learning_rate": 0.0006810727649262591, "loss": 0.6272, "step": 17485 }, { "epoch": 0.8879186709141907, "grad_norm": 0.029634695847424328, "learning_rate": 0.0006808662580297385, "loss": 0.6893, "step": 17490 }, { "epoch": 0.8881725070122222, "grad_norm": 0.03241331661663449, "learning_rate": 0.0006806597156300736, "loss": 0.7019, "step": 17495 }, { "epoch": 0.8884263431102537, "grad_norm": 0.028591858459862486, "learning_rate": 0.0006804531377678074, "loss": 0.6615, "step": 17500 }, { "epoch": 0.8886801792082852, "grad_norm": 0.029526124006159105, "learning_rate": 0.0006802465244834901, "loss": 0.6281, "step": 17505 }, { "epoch": 0.8889340153063167, "grad_norm": 0.03540646931202244, "learning_rate": 0.000680039875817679, "loss": 0.6926, "step": 17510 }, { "epoch": 0.8891878514043482, "grad_norm": 0.044899171024895705, "learning_rate": 0.0006798331918109381, "loss": 0.6316, "step": 17515 }, { "epoch": 0.8894416875023797, "grad_norm": 0.030721088931477493, "learning_rate": 0.0006796264725038387, "loss": 0.6936, "step": 17520 }, { "epoch": 0.8896955236004113, "grad_norm": 0.03155980335361958, "learning_rate": 0.0006794197179369584, "loss": 0.6657, "step": 17525 }, { "epoch": 0.8899493596984427, "grad_norm": 0.04076933899648426, "learning_rate": 0.0006792129281508821, "loss": 0.6676, "step": 17530 }, { "epoch": 0.8902031957964742, "grad_norm": 0.03555958083126104, "learning_rate": 0.0006790061031862018, "loss": 0.6614, "step": 17535 }, { "epoch": 0.8904570318945058, "grad_norm": 0.04163923041025127, "learning_rate": 0.0006787992430835161, "loss": 0.6951, "step": 17540 }, { "epoch": 0.8907108679925372, "grad_norm": 0.029722989745581584, "learning_rate": 0.0006785923478834308, "loss": 0.6664, "step": 17545 }, { "epoch": 0.8909647040905687, "grad_norm": 0.03025093717829298, "learning_rate": 0.0006783854176265582, "loss": 0.6754, "step": 17550 }, { "epoch": 0.8912185401886003, "grad_norm": 0.030234049682949227, "learning_rate": 0.0006781784523535177, "loss": 0.7113, "step": 17555 }, { "epoch": 0.8914723762866317, "grad_norm": 0.04114343462159167, "learning_rate": 0.0006779714521049356, "loss": 0.6499, "step": 17560 }, { "epoch": 0.8917262123846632, "grad_norm": 0.030410307456549154, "learning_rate": 0.000677764416921445, "loss": 0.6817, "step": 17565 }, { "epoch": 0.8919800484826947, "grad_norm": 0.027858252565586526, "learning_rate": 0.000677557346843686, "loss": 0.6619, "step": 17570 }, { "epoch": 0.8922338845807263, "grad_norm": 0.030078953003380336, "learning_rate": 0.0006773502419123051, "loss": 0.7041, "step": 17575 }, { "epoch": 0.8924877206787577, "grad_norm": 0.02737669375101074, "learning_rate": 0.0006771431021679561, "loss": 0.6783, "step": 17580 }, { "epoch": 0.8927415567767892, "grad_norm": 0.031242378970549798, "learning_rate": 0.0006769359276512998, "loss": 0.6398, "step": 17585 }, { "epoch": 0.8929953928748208, "grad_norm": 0.03038747688268042, "learning_rate": 0.0006767287184030031, "loss": 0.7002, "step": 17590 }, { "epoch": 0.8932492289728522, "grad_norm": 0.02894465234815313, "learning_rate": 0.0006765214744637402, "loss": 0.6852, "step": 17595 }, { "epoch": 0.8935030650708837, "grad_norm": 0.027912370768144203, "learning_rate": 0.0006763141958741924, "loss": 0.6699, "step": 17600 }, { "epoch": 0.8937569011689153, "grad_norm": 0.026491723797902312, "learning_rate": 0.0006761068826750472, "loss": 0.6519, "step": 17605 }, { "epoch": 0.8940107372669467, "grad_norm": 0.029456320197903556, "learning_rate": 0.0006758995349069992, "loss": 0.6772, "step": 17610 }, { "epoch": 0.8942645733649782, "grad_norm": 0.042997581207685695, "learning_rate": 0.0006756921526107495, "loss": 0.6847, "step": 17615 }, { "epoch": 0.8945184094630098, "grad_norm": 0.029862217496103766, "learning_rate": 0.0006754847358270066, "loss": 0.6931, "step": 17620 }, { "epoch": 0.8947722455610413, "grad_norm": 0.030271794935833398, "learning_rate": 0.0006752772845964852, "loss": 0.6936, "step": 17625 }, { "epoch": 0.8950260816590727, "grad_norm": 0.03113774890255375, "learning_rate": 0.0006750697989599068, "loss": 0.6805, "step": 17630 }, { "epoch": 0.8952799177571042, "grad_norm": 0.02862403374533774, "learning_rate": 0.0006748622789580001, "loss": 0.6567, "step": 17635 }, { "epoch": 0.8955337538551358, "grad_norm": 0.030003173611019028, "learning_rate": 0.0006746547246315, "loss": 0.7006, "step": 17640 }, { "epoch": 0.8957875899531672, "grad_norm": 0.029988993842717398, "learning_rate": 0.0006744471360211484, "loss": 0.6718, "step": 17645 }, { "epoch": 0.8960414260511987, "grad_norm": 0.029085614614139794, "learning_rate": 0.0006742395131676942, "loss": 0.6527, "step": 17650 }, { "epoch": 0.8962952621492303, "grad_norm": 0.03188485952387949, "learning_rate": 0.0006740318561118922, "loss": 0.6759, "step": 17655 }, { "epoch": 0.8965490982472617, "grad_norm": 0.03342336991786555, "learning_rate": 0.0006738241648945049, "loss": 0.6467, "step": 17660 }, { "epoch": 0.8968029343452932, "grad_norm": 0.04888176188276012, "learning_rate": 0.0006736164395563009, "loss": 0.703, "step": 17665 }, { "epoch": 0.8970567704433248, "grad_norm": 0.03384923081712749, "learning_rate": 0.0006734086801380556, "loss": 0.653, "step": 17670 }, { "epoch": 0.8973106065413562, "grad_norm": 0.027198750905539856, "learning_rate": 0.0006732008866805512, "loss": 0.6574, "step": 17675 }, { "epoch": 0.8975644426393877, "grad_norm": 0.029744446731744173, "learning_rate": 0.0006729930592245764, "loss": 0.6746, "step": 17680 }, { "epoch": 0.8978182787374193, "grad_norm": 0.02813790571941267, "learning_rate": 0.000672785197810927, "loss": 0.6477, "step": 17685 }, { "epoch": 0.8980721148354508, "grad_norm": 0.028241549972578342, "learning_rate": 0.0006725773024804047, "loss": 0.677, "step": 17690 }, { "epoch": 0.8983259509334822, "grad_norm": 0.033662543104628476, "learning_rate": 0.0006723693732738188, "loss": 0.7017, "step": 17695 }, { "epoch": 0.8985797870315138, "grad_norm": 0.029027332872772912, "learning_rate": 0.0006721614102319845, "loss": 0.7049, "step": 17700 }, { "epoch": 0.8988336231295453, "grad_norm": 0.03055069157933205, "learning_rate": 0.0006719534133957237, "loss": 0.7002, "step": 17705 }, { "epoch": 0.8990874592275767, "grad_norm": 0.030225910770733447, "learning_rate": 0.0006717453828058655, "loss": 0.6882, "step": 17710 }, { "epoch": 0.8993412953256082, "grad_norm": 0.028800808206352144, "learning_rate": 0.0006715373185032452, "loss": 0.6723, "step": 17715 }, { "epoch": 0.8995951314236398, "grad_norm": 0.029081882775121234, "learning_rate": 0.0006713292205287047, "loss": 0.6235, "step": 17720 }, { "epoch": 0.8998489675216712, "grad_norm": 0.02964651300767994, "learning_rate": 0.0006711210889230926, "loss": 0.6438, "step": 17725 }, { "epoch": 0.9001028036197027, "grad_norm": 0.02783734000673687, "learning_rate": 0.0006709129237272642, "loss": 0.703, "step": 17730 }, { "epoch": 0.9003566397177343, "grad_norm": 0.029326433250848345, "learning_rate": 0.0006707047249820813, "loss": 0.6686, "step": 17735 }, { "epoch": 0.9006104758157658, "grad_norm": 0.03479372510351558, "learning_rate": 0.0006704964927284119, "loss": 0.6978, "step": 17740 }, { "epoch": 0.9008643119137972, "grad_norm": 0.02772303151803179, "learning_rate": 0.0006702882270071313, "loss": 0.6676, "step": 17745 }, { "epoch": 0.9011181480118288, "grad_norm": 0.02980954979694452, "learning_rate": 0.0006700799278591212, "loss": 0.7099, "step": 17750 }, { "epoch": 0.9013719841098603, "grad_norm": 0.03408603092760559, "learning_rate": 0.0006698715953252693, "loss": 0.6895, "step": 17755 }, { "epoch": 0.9016258202078917, "grad_norm": 0.030943350578237754, "learning_rate": 0.0006696632294464704, "loss": 0.7181, "step": 17760 }, { "epoch": 0.9018796563059233, "grad_norm": 0.03649721437473434, "learning_rate": 0.0006694548302636256, "loss": 0.7162, "step": 17765 }, { "epoch": 0.9021334924039548, "grad_norm": 0.03427040027693339, "learning_rate": 0.0006692463978176428, "loss": 0.6994, "step": 17770 }, { "epoch": 0.9023873285019862, "grad_norm": 0.032735783265172604, "learning_rate": 0.0006690379321494361, "loss": 0.6674, "step": 17775 }, { "epoch": 0.9026411646000178, "grad_norm": 0.029035188700660457, "learning_rate": 0.0006688294332999263, "loss": 0.6765, "step": 17780 }, { "epoch": 0.9028950006980493, "grad_norm": 0.03806119097187777, "learning_rate": 0.0006686209013100407, "loss": 0.6864, "step": 17785 }, { "epoch": 0.9031488367960808, "grad_norm": 0.030484178037981945, "learning_rate": 0.0006684123362207131, "loss": 0.6548, "step": 17790 }, { "epoch": 0.9034026728941122, "grad_norm": 0.031830220251236746, "learning_rate": 0.0006682037380728839, "loss": 0.6489, "step": 17795 }, { "epoch": 0.9036565089921438, "grad_norm": 0.02830462117981865, "learning_rate": 0.0006679951069074995, "loss": 0.6608, "step": 17800 }, { "epoch": 0.9039103450901753, "grad_norm": 0.029150601210220554, "learning_rate": 0.0006677864427655135, "loss": 0.6925, "step": 17805 }, { "epoch": 0.9041641811882067, "grad_norm": 0.06212769780879453, "learning_rate": 0.0006675777456878855, "loss": 0.6544, "step": 17810 }, { "epoch": 0.9044180172862383, "grad_norm": 0.034348558634371244, "learning_rate": 0.0006673690157155818, "loss": 0.6778, "step": 17815 }, { "epoch": 0.9046718533842698, "grad_norm": 0.03050998302854449, "learning_rate": 0.000667160252889575, "loss": 0.6804, "step": 17820 }, { "epoch": 0.9049256894823012, "grad_norm": 0.029863386387110006, "learning_rate": 0.0006669514572508441, "loss": 0.6382, "step": 17825 }, { "epoch": 0.9051795255803328, "grad_norm": 0.031085983351490515, "learning_rate": 0.0006667426288403749, "loss": 0.6653, "step": 17830 }, { "epoch": 0.9054333616783643, "grad_norm": 0.03955437354462771, "learning_rate": 0.000666533767699159, "loss": 0.6675, "step": 17835 }, { "epoch": 0.9056871977763958, "grad_norm": 0.02944406557204913, "learning_rate": 0.0006663248738681951, "loss": 0.6915, "step": 17840 }, { "epoch": 0.9059410338744273, "grad_norm": 0.03027143615272517, "learning_rate": 0.0006661159473884879, "loss": 0.6157, "step": 17845 }, { "epoch": 0.9061948699724588, "grad_norm": 0.032384274234686525, "learning_rate": 0.0006659069883010487, "loss": 0.6738, "step": 17850 }, { "epoch": 0.9064487060704903, "grad_norm": 0.030957990968300705, "learning_rate": 0.0006656979966468949, "loss": 0.7256, "step": 17855 }, { "epoch": 0.9067025421685218, "grad_norm": 0.02796657295947347, "learning_rate": 0.0006654889724670509, "loss": 0.687, "step": 17860 }, { "epoch": 0.9069563782665533, "grad_norm": 0.028522945790756697, "learning_rate": 0.0006652799158025466, "loss": 0.6629, "step": 17865 }, { "epoch": 0.9072102143645848, "grad_norm": 0.030640697011519037, "learning_rate": 0.0006650708266944194, "loss": 0.6394, "step": 17870 }, { "epoch": 0.9074640504626162, "grad_norm": 0.06655448396685146, "learning_rate": 0.000664861705183712, "loss": 0.6584, "step": 17875 }, { "epoch": 0.9077178865606478, "grad_norm": 0.16251739746284777, "learning_rate": 0.0006646525513114741, "loss": 0.7033, "step": 17880 }, { "epoch": 0.9079717226586793, "grad_norm": 0.044132426849415866, "learning_rate": 0.0006644433651187613, "loss": 0.7613, "step": 17885 }, { "epoch": 0.9082255587567107, "grad_norm": 0.20898347851802201, "learning_rate": 0.0006642341466466363, "loss": 0.6921, "step": 17890 }, { "epoch": 0.9084793948547423, "grad_norm": 0.0328726064751086, "learning_rate": 0.0006640248959361671, "loss": 0.6932, "step": 17895 }, { "epoch": 0.9087332309527738, "grad_norm": 0.034535862625027405, "learning_rate": 0.000663815613028429, "loss": 0.6913, "step": 17900 }, { "epoch": 0.9089870670508053, "grad_norm": 0.031702473354727194, "learning_rate": 0.0006636062979645029, "loss": 0.7225, "step": 17905 }, { "epoch": 0.9092409031488368, "grad_norm": 0.029217713709617774, "learning_rate": 0.0006633969507854764, "loss": 0.6919, "step": 17910 }, { "epoch": 0.9094947392468683, "grad_norm": 0.03511694339277816, "learning_rate": 0.0006631875715324433, "loss": 0.6618, "step": 17915 }, { "epoch": 0.9097485753448998, "grad_norm": 0.03233689371798976, "learning_rate": 0.0006629781602465039, "loss": 0.6691, "step": 17920 }, { "epoch": 0.9100024114429313, "grad_norm": 0.037448450509427404, "learning_rate": 0.0006627687169687643, "loss": 0.6724, "step": 17925 }, { "epoch": 0.9102562475409628, "grad_norm": 0.03184955799467332, "learning_rate": 0.0006625592417403372, "loss": 0.6908, "step": 17930 }, { "epoch": 0.9105100836389943, "grad_norm": 0.036404280363254556, "learning_rate": 0.0006623497346023419, "loss": 0.7263, "step": 17935 }, { "epoch": 0.9107639197370258, "grad_norm": 0.035123035417043626, "learning_rate": 0.0006621401955959029, "loss": 0.6902, "step": 17940 }, { "epoch": 0.9110177558350573, "grad_norm": 0.031738381947037556, "learning_rate": 0.0006619306247621525, "loss": 0.6645, "step": 17945 }, { "epoch": 0.9112715919330888, "grad_norm": 0.032775716934275174, "learning_rate": 0.0006617210221422278, "loss": 0.6592, "step": 17950 }, { "epoch": 0.9115254280311204, "grad_norm": 0.02910466891277214, "learning_rate": 0.0006615113877772729, "loss": 0.6685, "step": 17955 }, { "epoch": 0.9117792641291518, "grad_norm": 0.03422071883897591, "learning_rate": 0.0006613017217084382, "loss": 0.6873, "step": 17960 }, { "epoch": 0.9120331002271833, "grad_norm": 0.031016118001051196, "learning_rate": 0.00066109202397688, "loss": 0.6764, "step": 17965 }, { "epoch": 0.9122869363252148, "grad_norm": 0.028621860816592543, "learning_rate": 0.0006608822946237607, "loss": 0.6615, "step": 17970 }, { "epoch": 0.9125407724232463, "grad_norm": 0.03199328552784388, "learning_rate": 0.0006606725336902493, "loss": 0.6554, "step": 17975 }, { "epoch": 0.9127946085212778, "grad_norm": 0.02803903679829704, "learning_rate": 0.0006604627412175209, "loss": 0.7059, "step": 17980 }, { "epoch": 0.9130484446193093, "grad_norm": 0.03170914693218245, "learning_rate": 0.0006602529172467564, "loss": 0.6595, "step": 17985 }, { "epoch": 0.9133022807173408, "grad_norm": 0.03127295086124657, "learning_rate": 0.0006600430618191436, "loss": 0.6669, "step": 17990 }, { "epoch": 0.9135561168153723, "grad_norm": 0.02669496068565935, "learning_rate": 0.0006598331749758759, "loss": 0.705, "step": 17995 }, { "epoch": 0.9138099529134038, "grad_norm": 0.02808305594642916, "learning_rate": 0.0006596232567581531, "loss": 0.6564, "step": 18000 }, { "epoch": 0.9140637890114354, "grad_norm": 0.02806079529054275, "learning_rate": 0.0006594133072071809, "loss": 0.6684, "step": 18005 }, { "epoch": 0.9143176251094668, "grad_norm": 0.03104302348422221, "learning_rate": 0.0006592033263641715, "loss": 0.6395, "step": 18010 }, { "epoch": 0.9145714612074983, "grad_norm": 0.028605786379722227, "learning_rate": 0.000658993314270343, "loss": 0.6818, "step": 18015 }, { "epoch": 0.9148252973055299, "grad_norm": 0.02995267117085593, "learning_rate": 0.00065878327096692, "loss": 0.678, "step": 18020 }, { "epoch": 0.9150791334035613, "grad_norm": 0.02919787073310821, "learning_rate": 0.0006585731964951327, "loss": 0.6774, "step": 18025 }, { "epoch": 0.9153329695015928, "grad_norm": 0.03164271207608264, "learning_rate": 0.0006583630908962178, "loss": 0.7251, "step": 18030 }, { "epoch": 0.9155868055996244, "grad_norm": 0.02987152068874936, "learning_rate": 0.0006581529542114178, "loss": 0.6737, "step": 18035 }, { "epoch": 0.9158406416976558, "grad_norm": 0.029375449127649965, "learning_rate": 0.0006579427864819817, "loss": 0.6998, "step": 18040 }, { "epoch": 0.9160944777956873, "grad_norm": 0.02973213049724198, "learning_rate": 0.0006577325877491641, "loss": 0.6434, "step": 18045 }, { "epoch": 0.9163483138937188, "grad_norm": 0.0344335321254048, "learning_rate": 0.0006575223580542263, "loss": 0.6779, "step": 18050 }, { "epoch": 0.9166021499917504, "grad_norm": 0.028239231746237634, "learning_rate": 0.0006573120974384351, "loss": 0.6783, "step": 18055 }, { "epoch": 0.9168559860897818, "grad_norm": 0.025845418902121893, "learning_rate": 0.0006571018059430638, "loss": 0.6666, "step": 18060 }, { "epoch": 0.9171098221878133, "grad_norm": 0.03889089271262204, "learning_rate": 0.0006568914836093913, "loss": 0.6729, "step": 18065 }, { "epoch": 0.9173636582858449, "grad_norm": 0.03066048169988323, "learning_rate": 0.000656681130478703, "loss": 0.7011, "step": 18070 }, { "epoch": 0.9176174943838763, "grad_norm": 0.02826957321232297, "learning_rate": 0.0006564707465922901, "loss": 0.6653, "step": 18075 }, { "epoch": 0.9178713304819078, "grad_norm": 0.03038010699042897, "learning_rate": 0.0006562603319914502, "loss": 0.685, "step": 18080 }, { "epoch": 0.9181251665799394, "grad_norm": 0.027701622580431007, "learning_rate": 0.0006560498867174862, "loss": 0.6631, "step": 18085 }, { "epoch": 0.9183790026779708, "grad_norm": 0.028991488670192436, "learning_rate": 0.0006558394108117078, "loss": 0.6937, "step": 18090 }, { "epoch": 0.9186328387760023, "grad_norm": 0.029020094625573003, "learning_rate": 0.00065562890431543, "loss": 0.6495, "step": 18095 }, { "epoch": 0.9188866748740339, "grad_norm": 0.033170333008103785, "learning_rate": 0.0006554183672699747, "loss": 0.6794, "step": 18100 }, { "epoch": 0.9191405109720653, "grad_norm": 0.031842163611523616, "learning_rate": 0.0006552077997166686, "loss": 0.6937, "step": 18105 }, { "epoch": 0.9193943470700968, "grad_norm": 0.029654628534773943, "learning_rate": 0.0006549972016968457, "loss": 0.6772, "step": 18110 }, { "epoch": 0.9196481831681284, "grad_norm": 0.030824347128709993, "learning_rate": 0.0006547865732518451, "loss": 0.665, "step": 18115 }, { "epoch": 0.9199020192661599, "grad_norm": 0.026610959539443098, "learning_rate": 0.0006545759144230122, "loss": 0.6277, "step": 18120 }, { "epoch": 0.9201558553641913, "grad_norm": 0.027051688534596992, "learning_rate": 0.0006543652252516978, "loss": 0.6567, "step": 18125 }, { "epoch": 0.9204096914622228, "grad_norm": 0.03058042544551771, "learning_rate": 0.0006541545057792597, "loss": 0.6679, "step": 18130 }, { "epoch": 0.9206635275602544, "grad_norm": 0.027833295885854932, "learning_rate": 0.0006539437560470609, "loss": 0.6902, "step": 18135 }, { "epoch": 0.9209173636582858, "grad_norm": 0.029151592155417325, "learning_rate": 0.0006537329760964705, "loss": 0.658, "step": 18140 }, { "epoch": 0.9211711997563173, "grad_norm": 0.0299289720340803, "learning_rate": 0.0006535221659688636, "loss": 0.6825, "step": 18145 }, { "epoch": 0.9214250358543489, "grad_norm": 0.03034403685040283, "learning_rate": 0.0006533113257056212, "loss": 0.6827, "step": 18150 }, { "epoch": 0.9216788719523803, "grad_norm": 0.030858722956688674, "learning_rate": 0.0006531004553481299, "loss": 0.662, "step": 18155 }, { "epoch": 0.9219327080504118, "grad_norm": 0.03408253340105829, "learning_rate": 0.0006528895549377829, "loss": 0.6556, "step": 18160 }, { "epoch": 0.9221865441484434, "grad_norm": 0.029734577304705943, "learning_rate": 0.0006526786245159785, "loss": 0.6688, "step": 18165 }, { "epoch": 0.9224403802464749, "grad_norm": 0.029469231446308176, "learning_rate": 0.0006524676641241216, "loss": 0.6805, "step": 18170 }, { "epoch": 0.9226942163445063, "grad_norm": 0.028462763105278656, "learning_rate": 0.0006522566738036227, "loss": 0.6654, "step": 18175 }, { "epoch": 0.9229480524425379, "grad_norm": 0.033208199490354126, "learning_rate": 0.0006520456535958981, "loss": 0.6503, "step": 18180 }, { "epoch": 0.9232018885405694, "grad_norm": 0.033449472129535904, "learning_rate": 0.0006518346035423697, "loss": 0.6715, "step": 18185 }, { "epoch": 0.9234557246386008, "grad_norm": 0.03627313036213827, "learning_rate": 0.0006516235236844661, "loss": 0.6908, "step": 18190 }, { "epoch": 0.9237095607366324, "grad_norm": 0.031774741933190735, "learning_rate": 0.0006514124140636206, "loss": 0.6836, "step": 18195 }, { "epoch": 0.9239633968346639, "grad_norm": 0.027716480002934557, "learning_rate": 0.0006512012747212736, "loss": 0.6653, "step": 18200 }, { "epoch": 0.9242172329326953, "grad_norm": 0.030020718982286, "learning_rate": 0.0006509901056988703, "loss": 0.6477, "step": 18205 }, { "epoch": 0.9244710690307268, "grad_norm": 0.030841739467350842, "learning_rate": 0.0006507789070378623, "loss": 0.6985, "step": 18210 }, { "epoch": 0.9247249051287584, "grad_norm": 0.03178581808319531, "learning_rate": 0.0006505676787797068, "loss": 0.6668, "step": 18215 }, { "epoch": 0.9249787412267899, "grad_norm": 0.028562565227742677, "learning_rate": 0.0006503564209658668, "loss": 0.6765, "step": 18220 }, { "epoch": 0.9252325773248213, "grad_norm": 0.03263476585800813, "learning_rate": 0.0006501451336378111, "loss": 0.6963, "step": 18225 }, { "epoch": 0.9254864134228529, "grad_norm": 0.03333412639200544, "learning_rate": 0.0006499338168370145, "loss": 0.661, "step": 18230 }, { "epoch": 0.9257402495208844, "grad_norm": 0.030669248023670964, "learning_rate": 0.0006497224706049574, "loss": 0.6623, "step": 18235 }, { "epoch": 0.9259940856189158, "grad_norm": 0.029714266082097544, "learning_rate": 0.000649511094983126, "loss": 0.6552, "step": 18240 }, { "epoch": 0.9262479217169474, "grad_norm": 0.03115826569042706, "learning_rate": 0.0006492996900130122, "loss": 0.6637, "step": 18245 }, { "epoch": 0.9265017578149789, "grad_norm": 0.031908537946570774, "learning_rate": 0.0006490882557361138, "loss": 0.6794, "step": 18250 }, { "epoch": 0.9267555939130103, "grad_norm": 0.03280618822259159, "learning_rate": 0.0006488767921939344, "loss": 0.6668, "step": 18255 }, { "epoch": 0.9270094300110419, "grad_norm": 0.029233099170146954, "learning_rate": 0.0006486652994279832, "loss": 0.6268, "step": 18260 }, { "epoch": 0.9272632661090734, "grad_norm": 0.031489024461510515, "learning_rate": 0.000648453777479775, "loss": 0.6708, "step": 18265 }, { "epoch": 0.9275171022071049, "grad_norm": 0.029278657488660955, "learning_rate": 0.0006482422263908305, "loss": 0.6894, "step": 18270 }, { "epoch": 0.9277709383051364, "grad_norm": 0.053159726951652615, "learning_rate": 0.0006480306462026765, "loss": 0.6616, "step": 18275 }, { "epoch": 0.9280247744031679, "grad_norm": 0.030389770468811476, "learning_rate": 0.0006478190369568447, "loss": 0.6842, "step": 18280 }, { "epoch": 0.9282786105011994, "grad_norm": 0.027117555810864714, "learning_rate": 0.0006476073986948731, "loss": 0.6577, "step": 18285 }, { "epoch": 0.9285324465992308, "grad_norm": 0.03165829960533959, "learning_rate": 0.0006473957314583053, "loss": 0.6772, "step": 18290 }, { "epoch": 0.9287862826972624, "grad_norm": 0.03072197608256708, "learning_rate": 0.0006471840352886906, "loss": 0.6689, "step": 18295 }, { "epoch": 0.9290401187952939, "grad_norm": 0.028788947524522056, "learning_rate": 0.0006469723102275835, "loss": 0.6571, "step": 18300 }, { "epoch": 0.9292939548933253, "grad_norm": 0.030149676019451434, "learning_rate": 0.000646760556316545, "loss": 0.6823, "step": 18305 }, { "epoch": 0.9295477909913569, "grad_norm": 0.03108709555407851, "learning_rate": 0.0006465487735971414, "loss": 0.6984, "step": 18310 }, { "epoch": 0.9298016270893884, "grad_norm": 0.031691518757790305, "learning_rate": 0.000646336962110944, "loss": 0.6483, "step": 18315 }, { "epoch": 0.9300554631874198, "grad_norm": 0.0328333410330306, "learning_rate": 0.0006461251218995309, "loss": 0.6994, "step": 18320 }, { "epoch": 0.9303092992854514, "grad_norm": 0.0343573006713216, "learning_rate": 0.0006459132530044851, "loss": 0.6514, "step": 18325 }, { "epoch": 0.9305631353834829, "grad_norm": 0.02783084902734074, "learning_rate": 0.0006457013554673954, "loss": 0.6276, "step": 18330 }, { "epoch": 0.9308169714815144, "grad_norm": 0.030118143936472414, "learning_rate": 0.0006454894293298563, "loss": 0.7147, "step": 18335 }, { "epoch": 0.9310708075795459, "grad_norm": 0.02818185014623596, "learning_rate": 0.0006452774746334677, "loss": 0.7035, "step": 18340 }, { "epoch": 0.9313246436775774, "grad_norm": 0.029094260401746935, "learning_rate": 0.0006450654914198354, "loss": 0.647, "step": 18345 }, { "epoch": 0.9315784797756089, "grad_norm": 0.028524528026121994, "learning_rate": 0.0006448534797305704, "loss": 0.6779, "step": 18350 }, { "epoch": 0.9318323158736403, "grad_norm": 0.029410003973663973, "learning_rate": 0.0006446414396072899, "loss": 0.6461, "step": 18355 }, { "epoch": 0.9320861519716719, "grad_norm": 0.03647738377341709, "learning_rate": 0.0006444293710916161, "loss": 0.6607, "step": 18360 }, { "epoch": 0.9323399880697034, "grad_norm": 0.04375511356998904, "learning_rate": 0.000644217274225177, "loss": 0.6972, "step": 18365 }, { "epoch": 0.9325938241677348, "grad_norm": 0.05064804331088906, "learning_rate": 0.000644005149049606, "loss": 0.6775, "step": 18370 }, { "epoch": 0.9328476602657664, "grad_norm": 0.037731360490726264, "learning_rate": 0.0006437929956065426, "loss": 0.6853, "step": 18375 }, { "epoch": 0.9331014963637979, "grad_norm": 0.02990084086815968, "learning_rate": 0.0006435808139376313, "loss": 0.6775, "step": 18380 }, { "epoch": 0.9333553324618294, "grad_norm": 0.03203163633301937, "learning_rate": 0.0006433686040845222, "loss": 0.683, "step": 18385 }, { "epoch": 0.9336091685598609, "grad_norm": 0.03231826394403643, "learning_rate": 0.0006431563660888711, "loss": 0.6505, "step": 18390 }, { "epoch": 0.9338630046578924, "grad_norm": 0.031917952305757356, "learning_rate": 0.0006429440999923392, "loss": 0.6677, "step": 18395 }, { "epoch": 0.9341168407559239, "grad_norm": 0.030331504681163936, "learning_rate": 0.0006427318058365934, "loss": 0.7033, "step": 18400 }, { "epoch": 0.9343706768539554, "grad_norm": 0.030709078159259676, "learning_rate": 0.0006425194836633058, "loss": 0.704, "step": 18405 }, { "epoch": 0.9346245129519869, "grad_norm": 0.027830394018377305, "learning_rate": 0.0006423071335141543, "loss": 0.672, "step": 18410 }, { "epoch": 0.9348783490500184, "grad_norm": 0.033526564193566644, "learning_rate": 0.0006420947554308223, "loss": 0.6779, "step": 18415 }, { "epoch": 0.9351321851480499, "grad_norm": 0.028806894520424443, "learning_rate": 0.0006418823494549983, "loss": 0.6885, "step": 18420 }, { "epoch": 0.9353860212460814, "grad_norm": 0.029364072594323685, "learning_rate": 0.0006416699156283768, "loss": 0.6259, "step": 18425 }, { "epoch": 0.9356398573441129, "grad_norm": 0.031175970536306788, "learning_rate": 0.0006414574539926574, "loss": 0.6791, "step": 18430 }, { "epoch": 0.9358936934421445, "grad_norm": 0.03278017056535612, "learning_rate": 0.0006412449645895452, "loss": 0.6473, "step": 18435 }, { "epoch": 0.9361475295401759, "grad_norm": 0.028490870583956156, "learning_rate": 0.0006410324474607507, "loss": 0.7018, "step": 18440 }, { "epoch": 0.9364013656382074, "grad_norm": 0.03272592499338899, "learning_rate": 0.0006408199026479901, "loss": 0.6958, "step": 18445 }, { "epoch": 0.936655201736239, "grad_norm": 0.034201323674787, "learning_rate": 0.000640607330192985, "loss": 0.7055, "step": 18450 }, { "epoch": 0.9369090378342704, "grad_norm": 0.02755407622893804, "learning_rate": 0.0006403947301374622, "loss": 0.6251, "step": 18455 }, { "epoch": 0.9371628739323019, "grad_norm": 0.02733769798943959, "learning_rate": 0.000640182102523154, "loss": 0.6442, "step": 18460 }, { "epoch": 0.9374167100303334, "grad_norm": 0.02770684867668132, "learning_rate": 0.0006399694473917981, "loss": 0.6583, "step": 18465 }, { "epoch": 0.9376705461283649, "grad_norm": 0.030259272284709284, "learning_rate": 0.0006397567647851377, "loss": 0.6758, "step": 18470 }, { "epoch": 0.9379243822263964, "grad_norm": 0.02850716178070247, "learning_rate": 0.0006395440547449214, "loss": 0.6567, "step": 18475 }, { "epoch": 0.9381782183244279, "grad_norm": 0.032310828531999694, "learning_rate": 0.000639331317312903, "loss": 0.6661, "step": 18480 }, { "epoch": 0.9384320544224595, "grad_norm": 0.029014433575197385, "learning_rate": 0.0006391185525308419, "loss": 0.6706, "step": 18485 }, { "epoch": 0.9386858905204909, "grad_norm": 0.032109943294983616, "learning_rate": 0.0006389057604405027, "loss": 0.6805, "step": 18490 }, { "epoch": 0.9389397266185224, "grad_norm": 0.029166106410957893, "learning_rate": 0.0006386929410836555, "loss": 0.6692, "step": 18495 }, { "epoch": 0.939193562716554, "grad_norm": 0.02930770976229482, "learning_rate": 0.0006384800945020755, "loss": 0.6425, "step": 18500 }, { "epoch": 0.9394473988145854, "grad_norm": 0.025069635107060277, "learning_rate": 0.0006382672207375438, "loss": 0.6181, "step": 18505 }, { "epoch": 0.9397012349126169, "grad_norm": 0.03373564366152573, "learning_rate": 0.000638054319831846, "loss": 0.6624, "step": 18510 }, { "epoch": 0.9399550710106485, "grad_norm": 0.02909522029785918, "learning_rate": 0.0006378413918267737, "loss": 0.6549, "step": 18515 }, { "epoch": 0.9402089071086799, "grad_norm": 0.03420480061469397, "learning_rate": 0.0006376284367641237, "loss": 0.6432, "step": 18520 }, { "epoch": 0.9404627432067114, "grad_norm": 0.030232420398668473, "learning_rate": 0.0006374154546856978, "loss": 0.6825, "step": 18525 }, { "epoch": 0.940716579304743, "grad_norm": 0.034968428026122905, "learning_rate": 0.0006372024456333034, "loss": 0.6873, "step": 18530 }, { "epoch": 0.9409704154027744, "grad_norm": 0.03194047053748897, "learning_rate": 0.0006369894096487533, "loss": 0.6572, "step": 18535 }, { "epoch": 0.9412242515008059, "grad_norm": 0.05240255632117546, "learning_rate": 0.0006367763467738652, "loss": 0.6545, "step": 18540 }, { "epoch": 0.9414780875988374, "grad_norm": 0.030403388528951455, "learning_rate": 0.0006365632570504622, "loss": 0.6599, "step": 18545 }, { "epoch": 0.941731923696869, "grad_norm": 0.033737765789903174, "learning_rate": 0.000636350140520373, "loss": 0.6717, "step": 18550 }, { "epoch": 0.9419857597949004, "grad_norm": 0.032480313560277106, "learning_rate": 0.0006361369972254313, "loss": 0.6684, "step": 18555 }, { "epoch": 0.9422395958929319, "grad_norm": 0.029117171388029033, "learning_rate": 0.0006359238272074757, "loss": 0.6462, "step": 18560 }, { "epoch": 0.9424934319909635, "grad_norm": 0.031854133232489125, "learning_rate": 0.0006357106305083509, "loss": 0.7005, "step": 18565 }, { "epoch": 0.9427472680889949, "grad_norm": 0.03253595983284283, "learning_rate": 0.000635497407169906, "loss": 0.6772, "step": 18570 }, { "epoch": 0.9430011041870264, "grad_norm": 0.028982043961989234, "learning_rate": 0.0006352841572339957, "loss": 0.6179, "step": 18575 }, { "epoch": 0.943254940285058, "grad_norm": 0.03064434267968507, "learning_rate": 0.0006350708807424803, "loss": 0.6209, "step": 18580 }, { "epoch": 0.9435087763830894, "grad_norm": 0.030729974329013327, "learning_rate": 0.0006348575777372244, "loss": 0.6669, "step": 18585 }, { "epoch": 0.9437626124811209, "grad_norm": 0.029720194815102324, "learning_rate": 0.0006346442482600986, "loss": 0.6423, "step": 18590 }, { "epoch": 0.9440164485791525, "grad_norm": 0.028536523023094958, "learning_rate": 0.0006344308923529784, "loss": 0.6438, "step": 18595 }, { "epoch": 0.944270284677184, "grad_norm": 0.0331175402337783, "learning_rate": 0.0006342175100577443, "loss": 0.6502, "step": 18600 }, { "epoch": 0.9445241207752154, "grad_norm": 0.03008233134820693, "learning_rate": 0.0006340041014162822, "loss": 0.6299, "step": 18605 }, { "epoch": 0.944777956873247, "grad_norm": 0.03032471742377659, "learning_rate": 0.0006337906664704836, "loss": 0.6519, "step": 18610 }, { "epoch": 0.9450317929712785, "grad_norm": 0.02860961048116484, "learning_rate": 0.0006335772052622441, "loss": 0.6438, "step": 18615 }, { "epoch": 0.9452856290693099, "grad_norm": 0.024833320743150453, "learning_rate": 0.0006333637178334655, "loss": 0.6432, "step": 18620 }, { "epoch": 0.9455394651673414, "grad_norm": 0.027859881004254048, "learning_rate": 0.0006331502042260541, "loss": 0.6293, "step": 18625 }, { "epoch": 0.945793301265373, "grad_norm": 0.028652852285721123, "learning_rate": 0.0006329366644819217, "loss": 0.6727, "step": 18630 }, { "epoch": 0.9460471373634044, "grad_norm": 0.0269938950460306, "learning_rate": 0.0006327230986429849, "loss": 0.6819, "step": 18635 }, { "epoch": 0.9463009734614359, "grad_norm": 0.02755755712778954, "learning_rate": 0.0006325095067511658, "loss": 0.6432, "step": 18640 }, { "epoch": 0.9465548095594675, "grad_norm": 0.03369567226244427, "learning_rate": 0.0006322958888483914, "loss": 0.6669, "step": 18645 }, { "epoch": 0.946808645657499, "grad_norm": 0.03157729039392671, "learning_rate": 0.0006320822449765937, "loss": 0.6574, "step": 18650 }, { "epoch": 0.9470624817555304, "grad_norm": 0.03130838161838653, "learning_rate": 0.00063186857517771, "loss": 0.6557, "step": 18655 }, { "epoch": 0.947316317853562, "grad_norm": 0.03163478236802311, "learning_rate": 0.0006316548794936827, "loss": 0.6874, "step": 18660 }, { "epoch": 0.9475701539515935, "grad_norm": 0.030863850495401215, "learning_rate": 0.0006314411579664591, "loss": 0.6668, "step": 18665 }, { "epoch": 0.9478239900496249, "grad_norm": 0.029981532549963066, "learning_rate": 0.0006312274106379916, "loss": 0.667, "step": 18670 }, { "epoch": 0.9480778261476565, "grad_norm": 0.02773851572149058, "learning_rate": 0.0006310136375502379, "loss": 0.6234, "step": 18675 }, { "epoch": 0.948331662245688, "grad_norm": 0.02695767898516691, "learning_rate": 0.0006307998387451604, "loss": 0.647, "step": 18680 }, { "epoch": 0.9485854983437194, "grad_norm": 0.027083360274156507, "learning_rate": 0.0006305860142647269, "loss": 0.6025, "step": 18685 }, { "epoch": 0.948839334441751, "grad_norm": 0.03881343069341882, "learning_rate": 0.0006303721641509101, "loss": 0.6447, "step": 18690 }, { "epoch": 0.9490931705397825, "grad_norm": 0.02874382592437549, "learning_rate": 0.0006301582884456877, "loss": 0.6508, "step": 18695 }, { "epoch": 0.949347006637814, "grad_norm": 0.031006065920472545, "learning_rate": 0.0006299443871910423, "loss": 0.6955, "step": 18700 }, { "epoch": 0.9496008427358454, "grad_norm": 0.028020095838662544, "learning_rate": 0.0006297304604289618, "loss": 0.6894, "step": 18705 }, { "epoch": 0.949854678833877, "grad_norm": 0.02740863842733769, "learning_rate": 0.0006295165082014387, "loss": 0.6313, "step": 18710 }, { "epoch": 0.9501085149319085, "grad_norm": 0.03469015706048846, "learning_rate": 0.0006293025305504712, "loss": 0.6585, "step": 18715 }, { "epoch": 0.9503623510299399, "grad_norm": 0.3660199461098182, "learning_rate": 0.0006290885275180615, "loss": 0.6151, "step": 18720 }, { "epoch": 0.9506161871279715, "grad_norm": 0.029730055285483687, "learning_rate": 0.0006288744991462177, "loss": 0.6633, "step": 18725 }, { "epoch": 0.950870023226003, "grad_norm": 0.031579978630651456, "learning_rate": 0.0006286604454769526, "loss": 0.6619, "step": 18730 }, { "epoch": 0.9511238593240344, "grad_norm": 0.029439720868024228, "learning_rate": 0.0006284463665522835, "loss": 0.7035, "step": 18735 }, { "epoch": 0.951377695422066, "grad_norm": 0.028196710870183713, "learning_rate": 0.0006282322624142332, "loss": 0.683, "step": 18740 }, { "epoch": 0.9516315315200975, "grad_norm": 0.027699310126035753, "learning_rate": 0.0006280181331048293, "loss": 0.7007, "step": 18745 }, { "epoch": 0.9518853676181289, "grad_norm": 0.029223415027408266, "learning_rate": 0.0006278039786661042, "loss": 0.692, "step": 18750 }, { "epoch": 0.9521392037161605, "grad_norm": 0.029143187780206255, "learning_rate": 0.0006275897991400956, "loss": 0.6901, "step": 18755 }, { "epoch": 0.952393039814192, "grad_norm": 0.034823367757112866, "learning_rate": 0.0006273755945688458, "loss": 0.6808, "step": 18760 }, { "epoch": 0.9526468759122235, "grad_norm": 0.028207545572889833, "learning_rate": 0.0006271613649944019, "loss": 0.6573, "step": 18765 }, { "epoch": 0.952900712010255, "grad_norm": 0.030145569663023477, "learning_rate": 0.000626947110458816, "loss": 0.7021, "step": 18770 }, { "epoch": 0.9531545481082865, "grad_norm": 0.02720336240003592, "learning_rate": 0.0006267328310041457, "loss": 0.6751, "step": 18775 }, { "epoch": 0.953408384206318, "grad_norm": 0.027891335051743977, "learning_rate": 0.0006265185266724526, "loss": 0.6832, "step": 18780 }, { "epoch": 0.9536622203043494, "grad_norm": 0.027044072343316627, "learning_rate": 0.0006263041975058035, "loss": 0.6724, "step": 18785 }, { "epoch": 0.953916056402381, "grad_norm": 0.029329792592871138, "learning_rate": 0.0006260898435462705, "loss": 0.66, "step": 18790 }, { "epoch": 0.9541698925004125, "grad_norm": 0.030593052636187314, "learning_rate": 0.0006258754648359301, "loss": 0.6529, "step": 18795 }, { "epoch": 0.9544237285984439, "grad_norm": 0.03016352148485823, "learning_rate": 0.0006256610614168634, "loss": 0.6696, "step": 18800 }, { "epoch": 0.9546775646964755, "grad_norm": 0.03091153480051312, "learning_rate": 0.0006254466333311573, "loss": 0.6878, "step": 18805 }, { "epoch": 0.954931400794507, "grad_norm": 0.02982302597361866, "learning_rate": 0.0006252321806209024, "loss": 0.7201, "step": 18810 }, { "epoch": 0.9551852368925385, "grad_norm": 0.026503953482421638, "learning_rate": 0.0006250177033281952, "loss": 0.6994, "step": 18815 }, { "epoch": 0.95543907299057, "grad_norm": 0.03565564947361242, "learning_rate": 0.0006248032014951363, "loss": 0.6312, "step": 18820 }, { "epoch": 0.9556929090886015, "grad_norm": 0.02640422060510701, "learning_rate": 0.0006245886751638312, "loss": 0.624, "step": 18825 }, { "epoch": 0.955946745186633, "grad_norm": 0.029112252779114733, "learning_rate": 0.0006243741243763906, "loss": 0.6583, "step": 18830 }, { "epoch": 0.9562005812846645, "grad_norm": 0.02945371576047327, "learning_rate": 0.0006241595491749297, "loss": 0.6426, "step": 18835 }, { "epoch": 0.956454417382696, "grad_norm": 0.030537211235564462, "learning_rate": 0.0006239449496015684, "loss": 0.6866, "step": 18840 }, { "epoch": 0.9567082534807275, "grad_norm": 0.028212687200863677, "learning_rate": 0.0006237303256984315, "loss": 0.649, "step": 18845 }, { "epoch": 0.956962089578759, "grad_norm": 0.03255222223721001, "learning_rate": 0.0006235156775076488, "loss": 0.6795, "step": 18850 }, { "epoch": 0.9572159256767905, "grad_norm": 0.029725811792114403, "learning_rate": 0.0006233010050713546, "loss": 0.654, "step": 18855 }, { "epoch": 0.957469761774822, "grad_norm": 0.031704856469196845, "learning_rate": 0.0006230863084316879, "loss": 0.6562, "step": 18860 }, { "epoch": 0.9577235978728535, "grad_norm": 0.030116253579622717, "learning_rate": 0.0006228715876307928, "loss": 0.6366, "step": 18865 }, { "epoch": 0.957977433970885, "grad_norm": 0.031103542017359067, "learning_rate": 0.0006226568427108177, "loss": 0.6339, "step": 18870 }, { "epoch": 0.9582312700689165, "grad_norm": 0.029460363076975957, "learning_rate": 0.0006224420737139161, "loss": 0.6788, "step": 18875 }, { "epoch": 0.958485106166948, "grad_norm": 0.029837158992258406, "learning_rate": 0.0006222272806822463, "loss": 0.6649, "step": 18880 }, { "epoch": 0.9587389422649795, "grad_norm": 0.03178737186434546, "learning_rate": 0.0006220124636579704, "loss": 0.6545, "step": 18885 }, { "epoch": 0.958992778363011, "grad_norm": 0.033635381113275326, "learning_rate": 0.0006217976226832565, "loss": 0.6884, "step": 18890 }, { "epoch": 0.9592466144610425, "grad_norm": 0.04927546290115523, "learning_rate": 0.0006215827578002768, "loss": 0.6761, "step": 18895 }, { "epoch": 0.959500450559074, "grad_norm": 0.037729600990650904, "learning_rate": 0.0006213678690512081, "loss": 0.6816, "step": 18900 }, { "epoch": 0.9597542866571055, "grad_norm": 0.043299426149679326, "learning_rate": 0.0006211529564782319, "loss": 0.6619, "step": 18905 }, { "epoch": 0.960008122755137, "grad_norm": 0.036219165492952204, "learning_rate": 0.0006209380201235345, "loss": 0.6758, "step": 18910 }, { "epoch": 0.9602619588531686, "grad_norm": 0.03530180311732388, "learning_rate": 0.000620723060029307, "loss": 0.6804, "step": 18915 }, { "epoch": 0.9605157949512, "grad_norm": 0.030871134459106553, "learning_rate": 0.0006205080762377446, "loss": 0.6789, "step": 18920 }, { "epoch": 0.9607696310492315, "grad_norm": 0.02770556148426203, "learning_rate": 0.000620293068791048, "loss": 0.6509, "step": 18925 }, { "epoch": 0.961023467147263, "grad_norm": 0.030384194051429718, "learning_rate": 0.0006200780377314219, "loss": 0.673, "step": 18930 }, { "epoch": 0.9612773032452945, "grad_norm": 0.02731959576299872, "learning_rate": 0.0006198629831010758, "loss": 0.696, "step": 18935 }, { "epoch": 0.961531139343326, "grad_norm": 0.033285897320577594, "learning_rate": 0.0006196479049422239, "loss": 0.6579, "step": 18940 }, { "epoch": 0.9617849754413575, "grad_norm": 0.03655407008230032, "learning_rate": 0.0006194328032970848, "loss": 0.6555, "step": 18945 }, { "epoch": 0.962038811539389, "grad_norm": 0.03284695840131725, "learning_rate": 0.0006192176782078822, "loss": 0.637, "step": 18950 }, { "epoch": 0.9622926476374205, "grad_norm": 0.03154787998794475, "learning_rate": 0.0006190025297168437, "loss": 0.6502, "step": 18955 }, { "epoch": 0.962546483735452, "grad_norm": 0.09175318198474769, "learning_rate": 0.0006187873578662024, "loss": 0.6331, "step": 18960 }, { "epoch": 0.9628003198334835, "grad_norm": 0.03125651662402041, "learning_rate": 0.0006185721626981949, "loss": 0.6696, "step": 18965 }, { "epoch": 0.963054155931515, "grad_norm": 0.030665227349976752, "learning_rate": 0.0006183569442550633, "loss": 0.7279, "step": 18970 }, { "epoch": 0.9633079920295465, "grad_norm": 0.031251072169071616, "learning_rate": 0.0006181417025790536, "loss": 0.6376, "step": 18975 }, { "epoch": 0.9635618281275781, "grad_norm": 0.028868424251521273, "learning_rate": 0.000617926437712417, "loss": 0.6655, "step": 18980 }, { "epoch": 0.9638156642256095, "grad_norm": 0.03439417543491047, "learning_rate": 0.0006177111496974087, "loss": 0.6539, "step": 18985 }, { "epoch": 0.964069500323641, "grad_norm": 0.029400419362560944, "learning_rate": 0.0006174958385762888, "loss": 0.6826, "step": 18990 }, { "epoch": 0.9643233364216726, "grad_norm": 0.02954084629059326, "learning_rate": 0.0006172805043913218, "loss": 0.6713, "step": 18995 }, { "epoch": 0.964577172519704, "grad_norm": 0.029438462486060087, "learning_rate": 0.0006170651471847766, "loss": 0.6661, "step": 19000 }, { "epoch": 0.9648310086177355, "grad_norm": 0.029621237819513733, "learning_rate": 0.0006168497669989268, "loss": 0.6679, "step": 19005 }, { "epoch": 0.965084844715767, "grad_norm": 0.029892514732713746, "learning_rate": 0.0006166343638760504, "loss": 0.6718, "step": 19010 }, { "epoch": 0.9653386808137985, "grad_norm": 0.026507026667463657, "learning_rate": 0.0006164189378584301, "loss": 0.6667, "step": 19015 }, { "epoch": 0.96559251691183, "grad_norm": 0.027927241926225792, "learning_rate": 0.0006162034889883529, "loss": 0.6512, "step": 19020 }, { "epoch": 0.9658463530098615, "grad_norm": 0.026877310205350734, "learning_rate": 0.0006159880173081103, "loss": 0.643, "step": 19025 }, { "epoch": 0.9661001891078931, "grad_norm": 0.03210699487714612, "learning_rate": 0.0006157725228599982, "loss": 0.6608, "step": 19030 }, { "epoch": 0.9663540252059245, "grad_norm": 0.02798509437005897, "learning_rate": 0.0006155570056863175, "loss": 0.624, "step": 19035 }, { "epoch": 0.966607861303956, "grad_norm": 0.03039103941027961, "learning_rate": 0.0006153414658293725, "loss": 0.6795, "step": 19040 }, { "epoch": 0.9668616974019876, "grad_norm": 0.02957017010726274, "learning_rate": 0.0006151259033314733, "loss": 0.6467, "step": 19045 }, { "epoch": 0.967115533500019, "grad_norm": 0.028289680383955067, "learning_rate": 0.0006149103182349333, "loss": 0.678, "step": 19050 }, { "epoch": 0.9673693695980505, "grad_norm": 0.032003878421903595, "learning_rate": 0.0006146947105820709, "loss": 0.6916, "step": 19055 }, { "epoch": 0.9676232056960821, "grad_norm": 0.02855038124411541, "learning_rate": 0.0006144790804152088, "loss": 0.6869, "step": 19060 }, { "epoch": 0.9678770417941135, "grad_norm": 0.033567071123226766, "learning_rate": 0.0006142634277766741, "loss": 0.6642, "step": 19065 }, { "epoch": 0.968130877892145, "grad_norm": 0.028798989072348152, "learning_rate": 0.0006140477527087983, "loss": 0.6697, "step": 19070 }, { "epoch": 0.9683847139901766, "grad_norm": 0.030305919202102384, "learning_rate": 0.0006138320552539175, "loss": 0.6933, "step": 19075 }, { "epoch": 0.9686385500882081, "grad_norm": 0.03093272775099532, "learning_rate": 0.000613616335454372, "loss": 0.6676, "step": 19080 }, { "epoch": 0.9688923861862395, "grad_norm": 0.028262918319958315, "learning_rate": 0.0006134005933525062, "loss": 0.679, "step": 19085 }, { "epoch": 0.969146222284271, "grad_norm": 0.03102475372065114, "learning_rate": 0.0006131848289906696, "loss": 0.6377, "step": 19090 }, { "epoch": 0.9694000583823026, "grad_norm": 0.03037810347565391, "learning_rate": 0.0006129690424112156, "loss": 0.709, "step": 19095 }, { "epoch": 0.969653894480334, "grad_norm": 0.02844206207538225, "learning_rate": 0.0006127532336565018, "loss": 0.6779, "step": 19100 }, { "epoch": 0.9699077305783655, "grad_norm": 0.027988538761619792, "learning_rate": 0.0006125374027688905, "loss": 0.6598, "step": 19105 }, { "epoch": 0.9701615666763971, "grad_norm": 0.028584240164617997, "learning_rate": 0.0006123215497907484, "loss": 0.6784, "step": 19110 }, { "epoch": 0.9704154027744285, "grad_norm": 0.030242360415607406, "learning_rate": 0.0006121056747644461, "loss": 0.6746, "step": 19115 }, { "epoch": 0.97066923887246, "grad_norm": 0.04283975243406091, "learning_rate": 0.000611889777732359, "loss": 0.6627, "step": 19120 }, { "epoch": 0.9709230749704916, "grad_norm": 0.04328988747145708, "learning_rate": 0.0006116738587368665, "loss": 0.6738, "step": 19125 }, { "epoch": 0.9711769110685231, "grad_norm": 0.029956364470167997, "learning_rate": 0.0006114579178203524, "loss": 0.6558, "step": 19130 }, { "epoch": 0.9714307471665545, "grad_norm": 0.027483631701609626, "learning_rate": 0.000611241955025205, "loss": 0.6549, "step": 19135 }, { "epoch": 0.9716845832645861, "grad_norm": 0.02942360182069158, "learning_rate": 0.0006110259703938165, "loss": 0.6873, "step": 19140 }, { "epoch": 0.9719384193626176, "grad_norm": 0.038337209505038904, "learning_rate": 0.0006108099639685837, "loss": 0.6605, "step": 19145 }, { "epoch": 0.972192255460649, "grad_norm": 0.03086482760421007, "learning_rate": 0.0006105939357919076, "loss": 0.6554, "step": 19150 }, { "epoch": 0.9724460915586806, "grad_norm": 0.029252378091061453, "learning_rate": 0.0006103778859061935, "loss": 0.65, "step": 19155 }, { "epoch": 0.9726999276567121, "grad_norm": 0.029575325876365723, "learning_rate": 0.0006101618143538508, "loss": 0.6703, "step": 19160 }, { "epoch": 0.9729537637547435, "grad_norm": 0.036771267879823016, "learning_rate": 0.0006099457211772933, "loss": 0.6639, "step": 19165 }, { "epoch": 0.973207599852775, "grad_norm": 0.031004310337380322, "learning_rate": 0.0006097296064189391, "loss": 0.6676, "step": 19170 }, { "epoch": 0.9734614359508066, "grad_norm": 0.03162306911543833, "learning_rate": 0.0006095134701212102, "loss": 0.7101, "step": 19175 }, { "epoch": 0.973715272048838, "grad_norm": 0.03029112624989704, "learning_rate": 0.0006092973123265334, "loss": 0.642, "step": 19180 }, { "epoch": 0.9739691081468695, "grad_norm": 0.027045076860072504, "learning_rate": 0.0006090811330773392, "loss": 0.6465, "step": 19185 }, { "epoch": 0.9742229442449011, "grad_norm": 0.0259845959342946, "learning_rate": 0.0006088649324160626, "loss": 0.6007, "step": 19190 }, { "epoch": 0.9744767803429326, "grad_norm": 0.031453505396735484, "learning_rate": 0.0006086487103851426, "loss": 0.6991, "step": 19195 }, { "epoch": 0.974730616440964, "grad_norm": 0.0339826785057557, "learning_rate": 0.0006084324670270227, "loss": 0.6886, "step": 19200 }, { "epoch": 0.9749844525389956, "grad_norm": 0.031238946443663863, "learning_rate": 0.0006082162023841502, "loss": 0.6683, "step": 19205 }, { "epoch": 0.9752382886370271, "grad_norm": 0.02974583462893673, "learning_rate": 0.0006079999164989769, "loss": 0.6591, "step": 19210 }, { "epoch": 0.9754921247350585, "grad_norm": 0.02650061690934559, "learning_rate": 0.0006077836094139586, "loss": 0.6687, "step": 19215 }, { "epoch": 0.9757459608330901, "grad_norm": 0.031530486057886865, "learning_rate": 0.0006075672811715553, "loss": 0.665, "step": 19220 }, { "epoch": 0.9759997969311216, "grad_norm": 0.030282188912183088, "learning_rate": 0.0006073509318142308, "loss": 0.6641, "step": 19225 }, { "epoch": 0.976253633029153, "grad_norm": 0.030905437835523873, "learning_rate": 0.0006071345613844541, "loss": 0.685, "step": 19230 }, { "epoch": 0.9765074691271846, "grad_norm": 0.030039958243523047, "learning_rate": 0.0006069181699246973, "loss": 0.6908, "step": 19235 }, { "epoch": 0.9767613052252161, "grad_norm": 0.030878639431641493, "learning_rate": 0.0006067017574774369, "loss": 0.6858, "step": 19240 }, { "epoch": 0.9770151413232476, "grad_norm": 0.02866787693516888, "learning_rate": 0.0006064853240851536, "loss": 0.6474, "step": 19245 }, { "epoch": 0.977268977421279, "grad_norm": 0.029124451454928257, "learning_rate": 0.0006062688697903322, "loss": 0.6469, "step": 19250 }, { "epoch": 0.9775228135193106, "grad_norm": 0.026814770177414387, "learning_rate": 0.0006060523946354615, "loss": 0.6377, "step": 19255 }, { "epoch": 0.9777766496173421, "grad_norm": 0.028880869869678118, "learning_rate": 0.0006058358986630347, "loss": 0.632, "step": 19260 }, { "epoch": 0.9780304857153735, "grad_norm": 0.03121013191256417, "learning_rate": 0.0006056193819155488, "loss": 0.6934, "step": 19265 }, { "epoch": 0.9782843218134051, "grad_norm": 0.030083838468656594, "learning_rate": 0.0006054028444355051, "loss": 0.6587, "step": 19270 }, { "epoch": 0.9785381579114366, "grad_norm": 0.029231995403348424, "learning_rate": 0.0006051862862654085, "loss": 0.6442, "step": 19275 }, { "epoch": 0.978791994009468, "grad_norm": 0.0275269919545958, "learning_rate": 0.0006049697074477686, "loss": 0.6608, "step": 19280 }, { "epoch": 0.9790458301074996, "grad_norm": 0.02871555576988694, "learning_rate": 0.0006047531080250985, "loss": 0.6516, "step": 19285 }, { "epoch": 0.9792996662055311, "grad_norm": 0.030409657345460084, "learning_rate": 0.0006045364880399158, "loss": 0.6776, "step": 19290 }, { "epoch": 0.9795535023035626, "grad_norm": 0.03147496610857147, "learning_rate": 0.0006043198475347418, "loss": 0.7055, "step": 19295 }, { "epoch": 0.9798073384015941, "grad_norm": 0.03844042972406689, "learning_rate": 0.0006041031865521019, "loss": 0.6484, "step": 19300 }, { "epoch": 0.9800611744996256, "grad_norm": 0.035433734777461534, "learning_rate": 0.0006038865051345257, "loss": 0.6945, "step": 19305 }, { "epoch": 0.9803150105976571, "grad_norm": 0.028465948106315378, "learning_rate": 0.0006036698033245466, "loss": 0.651, "step": 19310 }, { "epoch": 0.9805688466956886, "grad_norm": 0.0294812846049766, "learning_rate": 0.000603453081164702, "loss": 0.6303, "step": 19315 }, { "epoch": 0.9808226827937201, "grad_norm": 0.0269211149697901, "learning_rate": 0.0006032363386975337, "loss": 0.6711, "step": 19320 }, { "epoch": 0.9810765188917516, "grad_norm": 0.028358742806949666, "learning_rate": 0.0006030195759655867, "loss": 0.6555, "step": 19325 }, { "epoch": 0.981330354989783, "grad_norm": 0.026287572424772882, "learning_rate": 0.0006028027930114109, "loss": 0.6548, "step": 19330 }, { "epoch": 0.9815841910878146, "grad_norm": 0.028014656618407234, "learning_rate": 0.0006025859898775596, "loss": 0.6516, "step": 19335 }, { "epoch": 0.9818380271858461, "grad_norm": 0.030102661907225187, "learning_rate": 0.0006023691666065899, "loss": 0.6815, "step": 19340 }, { "epoch": 0.9820918632838777, "grad_norm": 0.02714236235007249, "learning_rate": 0.0006021523232410633, "loss": 0.6384, "step": 19345 }, { "epoch": 0.9823456993819091, "grad_norm": 0.029237330679810478, "learning_rate": 0.0006019354598235451, "loss": 0.6477, "step": 19350 }, { "epoch": 0.9825995354799406, "grad_norm": 0.02814614428540533, "learning_rate": 0.0006017185763966044, "loss": 0.6306, "step": 19355 }, { "epoch": 0.9828533715779721, "grad_norm": 0.027669672846072805, "learning_rate": 0.0006015016730028147, "loss": 0.6371, "step": 19360 }, { "epoch": 0.9831072076760036, "grad_norm": 0.030244192306541667, "learning_rate": 0.0006012847496847525, "loss": 0.6691, "step": 19365 }, { "epoch": 0.9833610437740351, "grad_norm": 0.03227069147483829, "learning_rate": 0.0006010678064849993, "loss": 0.66, "step": 19370 }, { "epoch": 0.9836148798720666, "grad_norm": 0.028341615344368947, "learning_rate": 0.0006008508434461394, "loss": 0.6553, "step": 19375 }, { "epoch": 0.9838687159700981, "grad_norm": 0.029246441719997116, "learning_rate": 0.0006006338606107621, "loss": 0.6881, "step": 19380 }, { "epoch": 0.9841225520681296, "grad_norm": 0.029088483307599902, "learning_rate": 0.0006004168580214598, "loss": 0.6232, "step": 19385 }, { "epoch": 0.9843763881661611, "grad_norm": 0.031914525054495664, "learning_rate": 0.000600199835720829, "loss": 0.6163, "step": 19390 }, { "epoch": 0.9846302242641927, "grad_norm": 0.030647304349424837, "learning_rate": 0.0005999827937514701, "loss": 0.6694, "step": 19395 }, { "epoch": 0.9848840603622241, "grad_norm": 0.032270344577208517, "learning_rate": 0.0005997657321559875, "loss": 0.6746, "step": 19400 }, { "epoch": 0.9851378964602556, "grad_norm": 0.03206169640890802, "learning_rate": 0.0005995486509769892, "loss": 0.6571, "step": 19405 }, { "epoch": 0.9853917325582872, "grad_norm": 0.027089938981335555, "learning_rate": 0.0005993315502570871, "loss": 0.6355, "step": 19410 }, { "epoch": 0.9856455686563186, "grad_norm": 0.026596468178423364, "learning_rate": 0.000599114430038897, "loss": 0.6488, "step": 19415 }, { "epoch": 0.9858994047543501, "grad_norm": 0.026794570980087546, "learning_rate": 0.0005988972903650388, "loss": 0.6708, "step": 19420 }, { "epoch": 0.9861532408523817, "grad_norm": 0.030908936762704065, "learning_rate": 0.0005986801312781356, "loss": 0.7002, "step": 19425 }, { "epoch": 0.9864070769504131, "grad_norm": 0.027798291115401472, "learning_rate": 0.0005984629528208147, "loss": 0.6489, "step": 19430 }, { "epoch": 0.9866609130484446, "grad_norm": 0.029355145337792682, "learning_rate": 0.000598245755035707, "loss": 0.654, "step": 19435 }, { "epoch": 0.9869147491464761, "grad_norm": 0.030382389985837328, "learning_rate": 0.0005980285379654478, "loss": 0.7032, "step": 19440 }, { "epoch": 0.9871685852445076, "grad_norm": 0.029655629368567735, "learning_rate": 0.0005978113016526753, "loss": 0.6694, "step": 19445 }, { "epoch": 0.9874224213425391, "grad_norm": 0.03049348280989871, "learning_rate": 0.0005975940461400322, "loss": 0.6858, "step": 19450 }, { "epoch": 0.9876762574405706, "grad_norm": 0.03373968632985081, "learning_rate": 0.0005973767714701646, "loss": 0.6511, "step": 19455 }, { "epoch": 0.9879300935386022, "grad_norm": 0.031360560141867326, "learning_rate": 0.0005971594776857224, "loss": 0.671, "step": 19460 }, { "epoch": 0.9881839296366336, "grad_norm": 0.0286117804183375, "learning_rate": 0.000596942164829359, "loss": 0.6419, "step": 19465 }, { "epoch": 0.9884377657346651, "grad_norm": 0.030834064699760726, "learning_rate": 0.0005967248329437322, "loss": 0.6732, "step": 19470 }, { "epoch": 0.9886916018326967, "grad_norm": 0.031509589510047585, "learning_rate": 0.0005965074820715031, "loss": 0.6294, "step": 19475 }, { "epoch": 0.9889454379307281, "grad_norm": 0.03403401847852691, "learning_rate": 0.0005962901122553366, "loss": 0.6306, "step": 19480 }, { "epoch": 0.9891992740287596, "grad_norm": 0.02964126755557149, "learning_rate": 0.000596072723537901, "loss": 0.6476, "step": 19485 }, { "epoch": 0.9894531101267912, "grad_norm": 0.02683861243265167, "learning_rate": 0.0005958553159618693, "loss": 0.6276, "step": 19490 }, { "epoch": 0.9897069462248226, "grad_norm": 0.028581167288995957, "learning_rate": 0.0005956378895699169, "loss": 0.6207, "step": 19495 }, { "epoch": 0.9899607823228541, "grad_norm": 0.029387830920785247, "learning_rate": 0.0005954204444047237, "loss": 0.6913, "step": 19500 }, { "epoch": 0.9902146184208857, "grad_norm": 0.04081666687506028, "learning_rate": 0.000595202980508973, "loss": 0.6531, "step": 19505 }, { "epoch": 0.9904684545189172, "grad_norm": 0.02899883877244315, "learning_rate": 0.0005949854979253521, "loss": 0.6193, "step": 19510 }, { "epoch": 0.9907222906169486, "grad_norm": 0.03126282136869641, "learning_rate": 0.0005947679966965517, "loss": 0.6787, "step": 19515 }, { "epoch": 0.9909761267149801, "grad_norm": 0.0299212857268955, "learning_rate": 0.0005945504768652664, "loss": 0.6795, "step": 19520 }, { "epoch": 0.9912299628130117, "grad_norm": 0.03169140717820167, "learning_rate": 0.0005943329384741937, "loss": 0.6957, "step": 19525 }, { "epoch": 0.9914837989110431, "grad_norm": 0.027094679850642577, "learning_rate": 0.0005941153815660357, "loss": 0.66, "step": 19530 }, { "epoch": 0.9917376350090746, "grad_norm": 0.029708966912979723, "learning_rate": 0.0005938978061834977, "loss": 0.7, "step": 19535 }, { "epoch": 0.9919914711071062, "grad_norm": 0.030663839888747067, "learning_rate": 0.0005936802123692885, "loss": 0.6906, "step": 19540 }, { "epoch": 0.9922453072051376, "grad_norm": 0.032498086035867, "learning_rate": 0.0005934626001661209, "loss": 0.6384, "step": 19545 }, { "epoch": 0.9924991433031691, "grad_norm": 0.027515062654933035, "learning_rate": 0.000593244969616711, "loss": 0.6632, "step": 19550 }, { "epoch": 0.9927529794012007, "grad_norm": 0.027046769543235413, "learning_rate": 0.0005930273207637783, "loss": 0.6312, "step": 19555 }, { "epoch": 0.9930068154992322, "grad_norm": 0.03462754472702289, "learning_rate": 0.0005928096536500467, "loss": 0.6508, "step": 19560 }, { "epoch": 0.9932606515972636, "grad_norm": 0.03441275083677409, "learning_rate": 0.0005925919683182429, "loss": 0.6352, "step": 19565 }, { "epoch": 0.9935144876952952, "grad_norm": 0.03471746160837888, "learning_rate": 0.0005923742648110974, "loss": 0.6129, "step": 19570 }, { "epoch": 0.9937683237933267, "grad_norm": 0.028986653989202596, "learning_rate": 0.0005921565431713445, "loss": 0.6924, "step": 19575 }, { "epoch": 0.9940221598913581, "grad_norm": 0.03155164305365954, "learning_rate": 0.0005919388034417218, "loss": 0.6399, "step": 19580 }, { "epoch": 0.9942759959893896, "grad_norm": 0.03342584588715975, "learning_rate": 0.0005917210456649703, "loss": 0.6548, "step": 19585 }, { "epoch": 0.9945298320874212, "grad_norm": 0.028631419820933483, "learning_rate": 0.0005915032698838351, "loss": 0.6468, "step": 19590 }, { "epoch": 0.9947836681854526, "grad_norm": 0.033225165915423335, "learning_rate": 0.0005912854761410642, "loss": 0.6812, "step": 19595 }, { "epoch": 0.9950375042834841, "grad_norm": 0.0304127338120677, "learning_rate": 0.0005910676644794098, "loss": 0.6794, "step": 19600 }, { "epoch": 0.9952913403815157, "grad_norm": 0.02789576197403392, "learning_rate": 0.0005908498349416269, "loss": 0.6745, "step": 19605 }, { "epoch": 0.9955451764795472, "grad_norm": 0.028166376337279767, "learning_rate": 0.0005906319875704744, "loss": 0.6653, "step": 19610 }, { "epoch": 0.9957990125775786, "grad_norm": 0.027668076965456402, "learning_rate": 0.0005904141224087147, "loss": 0.6002, "step": 19615 }, { "epoch": 0.9960528486756102, "grad_norm": 0.03342549830581369, "learning_rate": 0.0005901962394991139, "loss": 0.7034, "step": 19620 }, { "epoch": 0.9963066847736417, "grad_norm": 0.03524287470416653, "learning_rate": 0.0005899783388844408, "loss": 0.6712, "step": 19625 }, { "epoch": 0.9965605208716731, "grad_norm": 0.03118580057372306, "learning_rate": 0.0005897604206074687, "loss": 0.6838, "step": 19630 }, { "epoch": 0.9968143569697047, "grad_norm": 0.030595694021722037, "learning_rate": 0.0005895424847109736, "loss": 0.6473, "step": 19635 }, { "epoch": 0.9970681930677362, "grad_norm": 0.02771418453090281, "learning_rate": 0.0005893245312377353, "loss": 0.6263, "step": 19640 }, { "epoch": 0.9973220291657676, "grad_norm": 0.028582519056453454, "learning_rate": 0.0005891065602305369, "loss": 0.6746, "step": 19645 }, { "epoch": 0.9975758652637992, "grad_norm": 0.029373753276386798, "learning_rate": 0.0005888885717321653, "loss": 0.6655, "step": 19650 }, { "epoch": 0.9978297013618307, "grad_norm": 0.02819343802167969, "learning_rate": 0.0005886705657854101, "loss": 0.6393, "step": 19655 }, { "epoch": 0.9980835374598621, "grad_norm": 0.031077071845706215, "learning_rate": 0.0005884525424330652, "loss": 0.6602, "step": 19660 }, { "epoch": 0.9983373735578936, "grad_norm": 0.03214189483509175, "learning_rate": 0.0005882345017179274, "loss": 0.6539, "step": 19665 }, { "epoch": 0.9985912096559252, "grad_norm": 0.028335944858992497, "learning_rate": 0.0005880164436827968, "loss": 0.6411, "step": 19670 }, { "epoch": 0.9988450457539567, "grad_norm": 0.0279279275616532, "learning_rate": 0.0005877983683704772, "loss": 0.6605, "step": 19675 }, { "epoch": 0.9990988818519881, "grad_norm": 0.02865424188837183, "learning_rate": 0.0005875802758237758, "loss": 0.6133, "step": 19680 }, { "epoch": 0.9993527179500197, "grad_norm": 0.028472312184067313, "learning_rate": 0.0005873621660855031, "loss": 0.6859, "step": 19685 }, { "epoch": 0.9996065540480512, "grad_norm": 0.10017764281991437, "learning_rate": 0.0005871440391984729, "loss": 0.6385, "step": 19690 }, { "epoch": 0.9998603901460826, "grad_norm": 0.035962742849865816, "learning_rate": 0.0005869258952055023, "loss": 0.6697, "step": 19695 }, { "epoch": 1.000152301658819, "grad_norm": 0.02843859155188421, "learning_rate": 0.000586707734149412, "loss": 0.6009, "step": 19700 }, { "epoch": 1.0004061377568505, "grad_norm": 0.02854427457589784, "learning_rate": 0.0005864895560730257, "loss": 0.6141, "step": 19705 }, { "epoch": 1.0006599738548818, "grad_norm": 0.02932649707711216, "learning_rate": 0.000586271361019171, "loss": 0.6047, "step": 19710 }, { "epoch": 1.0009138099529133, "grad_norm": 0.026716144835882326, "learning_rate": 0.0005860531490306784, "loss": 0.6153, "step": 19715 }, { "epoch": 1.0011676460509449, "grad_norm": 0.02729000381253775, "learning_rate": 0.0005858349201503819, "loss": 0.6104, "step": 19720 }, { "epoch": 1.0014214821489764, "grad_norm": 0.02734286141418305, "learning_rate": 0.0005856166744211185, "loss": 0.6497, "step": 19725 }, { "epoch": 1.001675318247008, "grad_norm": 0.034945698035974734, "learning_rate": 0.000585398411885729, "loss": 0.6511, "step": 19730 }, { "epoch": 1.0019291543450395, "grad_norm": 0.02785183296847619, "learning_rate": 0.0005851801325870569, "loss": 0.6449, "step": 19735 }, { "epoch": 1.002182990443071, "grad_norm": 0.026663078387867196, "learning_rate": 0.0005849618365679497, "loss": 0.6043, "step": 19740 }, { "epoch": 1.0024368265411023, "grad_norm": 0.030130013835984185, "learning_rate": 0.0005847435238712578, "loss": 0.6033, "step": 19745 }, { "epoch": 1.0026906626391339, "grad_norm": 0.027530055516066926, "learning_rate": 0.0005845251945398347, "loss": 0.6242, "step": 19750 }, { "epoch": 1.0029444987371654, "grad_norm": 0.030169017696521677, "learning_rate": 0.0005843068486165374, "loss": 0.6299, "step": 19755 }, { "epoch": 1.003198334835197, "grad_norm": 0.03328181457031382, "learning_rate": 0.0005840884861442262, "loss": 0.6246, "step": 19760 }, { "epoch": 1.0034521709332285, "grad_norm": 0.029510401473095547, "learning_rate": 0.0005838701071657643, "loss": 0.6271, "step": 19765 }, { "epoch": 1.00370600703126, "grad_norm": 0.029443007080166932, "learning_rate": 0.0005836517117240188, "loss": 0.6285, "step": 19770 }, { "epoch": 1.0039598431292913, "grad_norm": 0.03137109304096966, "learning_rate": 0.0005834332998618596, "loss": 0.6109, "step": 19775 }, { "epoch": 1.0042136792273229, "grad_norm": 0.030268237241694893, "learning_rate": 0.0005832148716221595, "loss": 0.6219, "step": 19780 }, { "epoch": 1.0044675153253544, "grad_norm": 0.029012330690896475, "learning_rate": 0.0005829964270477953, "loss": 0.6329, "step": 19785 }, { "epoch": 1.004721351423386, "grad_norm": 0.031002786294132348, "learning_rate": 0.0005827779661816461, "loss": 0.6045, "step": 19790 }, { "epoch": 1.0049751875214175, "grad_norm": 0.028517009258261526, "learning_rate": 0.000582559489066595, "loss": 0.5942, "step": 19795 }, { "epoch": 1.005229023619449, "grad_norm": 0.02915390049663092, "learning_rate": 0.0005823409957455281, "loss": 0.647, "step": 19800 }, { "epoch": 1.0054828597174805, "grad_norm": 0.030459634575222685, "learning_rate": 0.0005821224862613343, "loss": 0.6402, "step": 19805 }, { "epoch": 1.0057366958155118, "grad_norm": 0.042383559013911574, "learning_rate": 0.000581903960656906, "loss": 0.6358, "step": 19810 }, { "epoch": 1.0059905319135434, "grad_norm": 0.030868989504289707, "learning_rate": 0.0005816854189751386, "loss": 0.622, "step": 19815 }, { "epoch": 1.006244368011575, "grad_norm": 0.03091750780903903, "learning_rate": 0.0005814668612589309, "loss": 0.642, "step": 19820 }, { "epoch": 1.0064982041096064, "grad_norm": 0.031459631035907265, "learning_rate": 0.0005812482875511845, "loss": 0.6587, "step": 19825 }, { "epoch": 1.006752040207638, "grad_norm": 0.03079305069452767, "learning_rate": 0.0005810296978948045, "loss": 0.6555, "step": 19830 }, { "epoch": 1.0070058763056695, "grad_norm": 0.027391973704407487, "learning_rate": 0.0005808110923326989, "loss": 0.6495, "step": 19835 }, { "epoch": 1.0072597124037008, "grad_norm": 0.03152807640277719, "learning_rate": 0.000580592470907779, "loss": 0.6365, "step": 19840 }, { "epoch": 1.0075135485017324, "grad_norm": 0.02931147770363895, "learning_rate": 0.0005803738336629588, "loss": 0.6481, "step": 19845 }, { "epoch": 1.007767384599764, "grad_norm": 0.06174272991062351, "learning_rate": 0.0005801551806411561, "loss": 0.6671, "step": 19850 }, { "epoch": 1.0080212206977954, "grad_norm": 0.05019352239396665, "learning_rate": 0.000579936511885291, "loss": 0.6641, "step": 19855 }, { "epoch": 1.008275056795827, "grad_norm": 0.04538201981263293, "learning_rate": 0.0005797178274382873, "loss": 0.6518, "step": 19860 }, { "epoch": 1.0085288928938585, "grad_norm": 0.03377835700205759, "learning_rate": 0.0005794991273430716, "loss": 0.6599, "step": 19865 }, { "epoch": 1.00878272899189, "grad_norm": 0.030713316837419662, "learning_rate": 0.0005792804116425736, "loss": 0.6425, "step": 19870 }, { "epoch": 1.0090365650899213, "grad_norm": 0.029363677927011077, "learning_rate": 0.0005790616803797263, "loss": 0.6419, "step": 19875 }, { "epoch": 1.0092904011879529, "grad_norm": 0.027844119919916113, "learning_rate": 0.0005788429335974653, "loss": 0.6149, "step": 19880 }, { "epoch": 1.0095442372859844, "grad_norm": 0.02857994465549951, "learning_rate": 0.0005786241713387297, "loss": 0.6269, "step": 19885 }, { "epoch": 1.009798073384016, "grad_norm": 0.030753037875065618, "learning_rate": 0.0005784053936464613, "loss": 0.6392, "step": 19890 }, { "epoch": 1.0100519094820475, "grad_norm": 0.028876729886118224, "learning_rate": 0.0005781866005636052, "loss": 0.6314, "step": 19895 }, { "epoch": 1.010305745580079, "grad_norm": 0.03012710735434138, "learning_rate": 0.0005779677921331093, "loss": 0.6446, "step": 19900 }, { "epoch": 1.0105595816781106, "grad_norm": 0.029285077507710117, "learning_rate": 0.0005777489683979247, "loss": 0.6148, "step": 19905 }, { "epoch": 1.0108134177761419, "grad_norm": 0.03300999391546842, "learning_rate": 0.0005775301294010052, "loss": 0.6831, "step": 19910 }, { "epoch": 1.0110672538741734, "grad_norm": 0.037550635328480096, "learning_rate": 0.000577311275185308, "loss": 0.6398, "step": 19915 }, { "epoch": 1.011321089972205, "grad_norm": 0.03106004494644575, "learning_rate": 0.000577092405793793, "loss": 0.6331, "step": 19920 }, { "epoch": 1.0115749260702365, "grad_norm": 0.029177953895789095, "learning_rate": 0.0005768735212694232, "loss": 0.6469, "step": 19925 }, { "epoch": 1.011828762168268, "grad_norm": 0.029923389596699445, "learning_rate": 0.0005766546216551646, "loss": 0.6239, "step": 19930 }, { "epoch": 1.0120825982662995, "grad_norm": 0.028183272221578157, "learning_rate": 0.0005764357069939861, "loss": 0.6139, "step": 19935 }, { "epoch": 1.0123364343643309, "grad_norm": 0.03358236258051726, "learning_rate": 0.0005762167773288594, "loss": 0.6691, "step": 19940 }, { "epoch": 1.0125902704623624, "grad_norm": 0.03742396680100745, "learning_rate": 0.0005759978327027594, "loss": 0.6278, "step": 19945 }, { "epoch": 1.012844106560394, "grad_norm": 0.029714501673771827, "learning_rate": 0.000575778873158664, "loss": 0.6524, "step": 19950 }, { "epoch": 1.0130979426584255, "grad_norm": 0.031649442502146206, "learning_rate": 0.0005755598987395535, "loss": 0.6773, "step": 19955 }, { "epoch": 1.013351778756457, "grad_norm": 0.03025993098945578, "learning_rate": 0.0005753409094884118, "loss": 0.6438, "step": 19960 }, { "epoch": 1.0136056148544885, "grad_norm": 0.026442363577419466, "learning_rate": 0.0005751219054482252, "loss": 0.6624, "step": 19965 }, { "epoch": 1.01385945095252, "grad_norm": 0.02816401234192548, "learning_rate": 0.0005749028866619833, "loss": 0.6504, "step": 19970 }, { "epoch": 1.0141132870505514, "grad_norm": 0.028121608328684197, "learning_rate": 0.0005746838531726783, "loss": 0.5946, "step": 19975 }, { "epoch": 1.014367123148583, "grad_norm": 0.028070732734731412, "learning_rate": 0.0005744648050233053, "loss": 0.6212, "step": 19980 }, { "epoch": 1.0146209592466144, "grad_norm": 0.03379707210393175, "learning_rate": 0.0005742457422568626, "loss": 0.6106, "step": 19985 }, { "epoch": 1.014874795344646, "grad_norm": 0.03301338513931386, "learning_rate": 0.0005740266649163507, "loss": 0.6444, "step": 19990 }, { "epoch": 1.0151286314426775, "grad_norm": 0.030706237522464847, "learning_rate": 0.0005738075730447738, "loss": 0.6496, "step": 19995 }, { "epoch": 1.015382467540709, "grad_norm": 0.046071725852372215, "learning_rate": 0.0005735884666851383, "loss": 0.6386, "step": 20000 }, { "epoch": 1.0156363036387404, "grad_norm": 0.029669718953235974, "learning_rate": 0.0005733693458804537, "loss": 0.6342, "step": 20005 }, { "epoch": 1.015890139736772, "grad_norm": 0.03193703134922076, "learning_rate": 0.0005731502106737326, "loss": 0.6166, "step": 20010 }, { "epoch": 1.0161439758348034, "grad_norm": 0.039825363997106046, "learning_rate": 0.0005729310611079899, "loss": 0.641, "step": 20015 }, { "epoch": 1.016397811932835, "grad_norm": 0.031247410821304573, "learning_rate": 0.0005727118972262437, "loss": 0.6439, "step": 20020 }, { "epoch": 1.0166516480308665, "grad_norm": 0.03299132631420552, "learning_rate": 0.0005724927190715144, "loss": 0.6413, "step": 20025 }, { "epoch": 1.016905484128898, "grad_norm": 0.031051588331968735, "learning_rate": 0.0005722735266868261, "loss": 0.6224, "step": 20030 }, { "epoch": 1.0171593202269296, "grad_norm": 0.02859587802230808, "learning_rate": 0.0005720543201152048, "loss": 0.6504, "step": 20035 }, { "epoch": 1.0174131563249609, "grad_norm": 0.029330766055895807, "learning_rate": 0.0005718350993996798, "loss": 0.6242, "step": 20040 }, { "epoch": 1.0176669924229924, "grad_norm": 0.02858781514510055, "learning_rate": 0.0005716158645832831, "loss": 0.6471, "step": 20045 }, { "epoch": 1.017920828521024, "grad_norm": 0.02779782048085534, "learning_rate": 0.0005713966157090493, "loss": 0.6276, "step": 20050 }, { "epoch": 1.0181746646190555, "grad_norm": 0.04674455561645387, "learning_rate": 0.000571177352820016, "loss": 0.6757, "step": 20055 }, { "epoch": 1.018428500717087, "grad_norm": 0.029659513507902257, "learning_rate": 0.0005709580759592232, "loss": 0.6253, "step": 20060 }, { "epoch": 1.0186823368151185, "grad_norm": 0.028579846484842044, "learning_rate": 0.000570738785169714, "loss": 0.6535, "step": 20065 }, { "epoch": 1.01893617291315, "grad_norm": 0.028269832205257906, "learning_rate": 0.0005705194804945339, "loss": 0.6551, "step": 20070 }, { "epoch": 1.0191900090111814, "grad_norm": 0.027793487399022977, "learning_rate": 0.0005703001619767317, "loss": 0.6704, "step": 20075 }, { "epoch": 1.019443845109213, "grad_norm": 0.027070813009888988, "learning_rate": 0.0005700808296593581, "loss": 0.6497, "step": 20080 }, { "epoch": 1.0196976812072445, "grad_norm": 0.031815130726805876, "learning_rate": 0.0005698614835854672, "loss": 0.6588, "step": 20085 }, { "epoch": 1.019951517305276, "grad_norm": 0.02561969747108025, "learning_rate": 0.0005696421237981155, "loss": 0.6414, "step": 20090 }, { "epoch": 1.0202053534033075, "grad_norm": 0.03057841471640854, "learning_rate": 0.0005694227503403623, "loss": 0.6322, "step": 20095 }, { "epoch": 1.020459189501339, "grad_norm": 0.029890785642181172, "learning_rate": 0.0005692033632552691, "loss": 0.6365, "step": 20100 }, { "epoch": 1.0207130255993704, "grad_norm": 0.05178841035705398, "learning_rate": 0.000568983962585901, "loss": 0.613, "step": 20105 }, { "epoch": 1.020966861697402, "grad_norm": 0.03035895787820191, "learning_rate": 0.0005687645483753252, "loss": 0.6359, "step": 20110 }, { "epoch": 1.0212206977954335, "grad_norm": 0.030029569083028736, "learning_rate": 0.0005685451206666113, "loss": 0.6113, "step": 20115 }, { "epoch": 1.021474533893465, "grad_norm": 0.029610917159954318, "learning_rate": 0.0005683256795028321, "loss": 0.6149, "step": 20120 }, { "epoch": 1.0217283699914965, "grad_norm": 0.027107501373891077, "learning_rate": 0.0005681062249270627, "loss": 0.6173, "step": 20125 }, { "epoch": 1.021982206089528, "grad_norm": 0.026567854365187136, "learning_rate": 0.000567886756982381, "loss": 0.61, "step": 20130 }, { "epoch": 1.0222360421875596, "grad_norm": 0.03347285186094029, "learning_rate": 0.0005676672757118675, "loss": 0.634, "step": 20135 }, { "epoch": 1.022489878285591, "grad_norm": 0.026670558394444915, "learning_rate": 0.0005674477811586053, "loss": 0.6087, "step": 20140 }, { "epoch": 1.0227437143836224, "grad_norm": 0.033187942360587745, "learning_rate": 0.0005672282733656799, "loss": 0.6294, "step": 20145 }, { "epoch": 1.022997550481654, "grad_norm": 0.030508036934401763, "learning_rate": 0.0005670087523761797, "loss": 0.6309, "step": 20150 }, { "epoch": 1.0232513865796855, "grad_norm": 0.02866134832457858, "learning_rate": 0.0005667892182331958, "loss": 0.6576, "step": 20155 }, { "epoch": 1.023505222677717, "grad_norm": 0.027730707934763284, "learning_rate": 0.0005665696709798211, "loss": 0.6274, "step": 20160 }, { "epoch": 1.0237590587757486, "grad_norm": 0.027908513757561475, "learning_rate": 0.0005663501106591522, "loss": 0.6376, "step": 20165 }, { "epoch": 1.0240128948737799, "grad_norm": 0.03525860744814596, "learning_rate": 0.0005661305373142874, "loss": 0.6354, "step": 20170 }, { "epoch": 1.0242667309718114, "grad_norm": 0.030339601649416004, "learning_rate": 0.0005659109509883279, "loss": 0.6265, "step": 20175 }, { "epoch": 1.024520567069843, "grad_norm": 0.03508269129550844, "learning_rate": 0.0005656913517243775, "loss": 0.6498, "step": 20180 }, { "epoch": 1.0247744031678745, "grad_norm": 0.031165332711025517, "learning_rate": 0.0005654717395655423, "loss": 0.6403, "step": 20185 }, { "epoch": 1.025028239265906, "grad_norm": 0.028132055454179034, "learning_rate": 0.0005652521145549312, "loss": 0.6398, "step": 20190 }, { "epoch": 1.0252820753639376, "grad_norm": 0.029297581196072738, "learning_rate": 0.0005650324767356553, "loss": 0.6444, "step": 20195 }, { "epoch": 1.025535911461969, "grad_norm": 0.02931096809130409, "learning_rate": 0.0005648128261508287, "loss": 0.6408, "step": 20200 }, { "epoch": 1.0257897475600004, "grad_norm": 0.029863430422639278, "learning_rate": 0.0005645931628435674, "loss": 0.6041, "step": 20205 }, { "epoch": 1.026043583658032, "grad_norm": 0.02904517224838759, "learning_rate": 0.0005643734868569904, "loss": 0.6251, "step": 20210 }, { "epoch": 1.0262974197560635, "grad_norm": 0.03309662720243923, "learning_rate": 0.0005641537982342189, "loss": 0.6303, "step": 20215 }, { "epoch": 1.026551255854095, "grad_norm": 0.027155620800163045, "learning_rate": 0.0005639340970183767, "loss": 0.6639, "step": 20220 }, { "epoch": 1.0268050919521265, "grad_norm": 0.028794889618699332, "learning_rate": 0.0005637143832525902, "loss": 0.6519, "step": 20225 }, { "epoch": 1.027058928050158, "grad_norm": 0.034163654477613464, "learning_rate": 0.000563494656979988, "loss": 0.6305, "step": 20230 }, { "epoch": 1.0273127641481896, "grad_norm": 0.028067119802545903, "learning_rate": 0.0005632749182437013, "loss": 0.6016, "step": 20235 }, { "epoch": 1.027566600246221, "grad_norm": 0.03136557965227905, "learning_rate": 0.0005630551670868638, "loss": 0.627, "step": 20240 }, { "epoch": 1.0278204363442525, "grad_norm": 0.0316470711435105, "learning_rate": 0.0005628354035526113, "loss": 0.6412, "step": 20245 }, { "epoch": 1.028074272442284, "grad_norm": 0.0274857434859716, "learning_rate": 0.0005626156276840824, "loss": 0.6605, "step": 20250 }, { "epoch": 1.0283281085403155, "grad_norm": 0.03177377678458856, "learning_rate": 0.0005623958395244182, "loss": 0.643, "step": 20255 }, { "epoch": 1.028581944638347, "grad_norm": 0.028147178985520677, "learning_rate": 0.0005621760391167618, "loss": 0.6209, "step": 20260 }, { "epoch": 1.0288357807363786, "grad_norm": 0.028388614746341106, "learning_rate": 0.0005619562265042589, "loss": 0.6362, "step": 20265 }, { "epoch": 1.02908961683441, "grad_norm": 0.029970276372417417, "learning_rate": 0.0005617364017300579, "loss": 0.6872, "step": 20270 }, { "epoch": 1.0293434529324414, "grad_norm": 0.028806560530965943, "learning_rate": 0.0005615165648373091, "loss": 0.629, "step": 20275 }, { "epoch": 1.029597289030473, "grad_norm": 0.030174923696767397, "learning_rate": 0.0005612967158691652, "loss": 0.6675, "step": 20280 }, { "epoch": 1.0298511251285045, "grad_norm": 0.0293563454903592, "learning_rate": 0.0005610768548687818, "loss": 0.6042, "step": 20285 }, { "epoch": 1.030104961226536, "grad_norm": 0.03370445496995283, "learning_rate": 0.0005608569818793163, "loss": 0.6391, "step": 20290 }, { "epoch": 1.0303587973245676, "grad_norm": 0.027806550019787577, "learning_rate": 0.0005606370969439288, "loss": 0.6461, "step": 20295 }, { "epoch": 1.0306126334225991, "grad_norm": 0.030329439495030322, "learning_rate": 0.0005604172001057817, "loss": 0.6138, "step": 20300 }, { "epoch": 1.0308664695206304, "grad_norm": 0.025961540996315414, "learning_rate": 0.0005601972914080394, "loss": 0.6553, "step": 20305 }, { "epoch": 1.031120305618662, "grad_norm": 0.02801739072007073, "learning_rate": 0.000559977370893869, "loss": 0.6322, "step": 20310 }, { "epoch": 1.0313741417166935, "grad_norm": 0.031311331785293224, "learning_rate": 0.0005597574386064398, "loss": 0.6096, "step": 20315 }, { "epoch": 1.031627977814725, "grad_norm": 0.03335263927310206, "learning_rate": 0.0005595374945889235, "loss": 0.6739, "step": 20320 }, { "epoch": 1.0318818139127566, "grad_norm": 0.02731563158645986, "learning_rate": 0.0005593175388844939, "loss": 0.6378, "step": 20325 }, { "epoch": 1.032135650010788, "grad_norm": 0.027731155624900344, "learning_rate": 0.0005590975715363271, "loss": 0.6316, "step": 20330 }, { "epoch": 1.0323894861088196, "grad_norm": 0.035216832326168804, "learning_rate": 0.0005588775925876019, "loss": 0.6014, "step": 20335 }, { "epoch": 1.032643322206851, "grad_norm": 0.031481980745479574, "learning_rate": 0.0005586576020814986, "loss": 0.679, "step": 20340 }, { "epoch": 1.0328971583048825, "grad_norm": 0.02918763560100261, "learning_rate": 0.0005584376000612008, "loss": 0.6216, "step": 20345 }, { "epoch": 1.033150994402914, "grad_norm": 0.033706212529476094, "learning_rate": 0.0005582175865698935, "loss": 0.6219, "step": 20350 }, { "epoch": 1.0334048305009456, "grad_norm": 0.028010089922912464, "learning_rate": 0.0005579975616507642, "loss": 0.6559, "step": 20355 }, { "epoch": 1.033658666598977, "grad_norm": 0.02767739606976176, "learning_rate": 0.0005577775253470028, "loss": 0.6577, "step": 20360 }, { "epoch": 1.0339125026970086, "grad_norm": 0.03516381258832037, "learning_rate": 0.0005575574777018014, "loss": 0.6335, "step": 20365 }, { "epoch": 1.03416633879504, "grad_norm": 0.028838075124335442, "learning_rate": 0.000557337418758354, "loss": 0.6826, "step": 20370 }, { "epoch": 1.0344201748930715, "grad_norm": 0.027546666200919914, "learning_rate": 0.0005571173485598575, "loss": 0.667, "step": 20375 }, { "epoch": 1.034674010991103, "grad_norm": 0.030732740101499692, "learning_rate": 0.0005568972671495102, "loss": 0.6347, "step": 20380 }, { "epoch": 1.0349278470891345, "grad_norm": 0.02769579834449506, "learning_rate": 0.000556677174570513, "loss": 0.6142, "step": 20385 }, { "epoch": 1.035181683187166, "grad_norm": 0.030699132352193712, "learning_rate": 0.0005564570708660692, "loss": 0.6003, "step": 20390 }, { "epoch": 1.0354355192851976, "grad_norm": 0.031906213179893284, "learning_rate": 0.000556236956079384, "loss": 0.6272, "step": 20395 }, { "epoch": 1.0356893553832291, "grad_norm": 0.028688787660841514, "learning_rate": 0.0005560168302536645, "loss": 0.6682, "step": 20400 }, { "epoch": 1.0359431914812605, "grad_norm": 0.030142031596859437, "learning_rate": 0.0005557966934321208, "loss": 0.6367, "step": 20405 }, { "epoch": 1.036197027579292, "grad_norm": 0.032296647300427375, "learning_rate": 0.0005555765456579645, "loss": 0.612, "step": 20410 }, { "epoch": 1.0364508636773235, "grad_norm": 0.03165649427674885, "learning_rate": 0.0005553563869744092, "loss": 0.6302, "step": 20415 }, { "epoch": 1.036704699775355, "grad_norm": 0.028897503076198663, "learning_rate": 0.0005551362174246714, "loss": 0.6026, "step": 20420 }, { "epoch": 1.0369585358733866, "grad_norm": 0.03145711055727913, "learning_rate": 0.000554916037051969, "loss": 0.6165, "step": 20425 }, { "epoch": 1.0372123719714181, "grad_norm": 0.027390245042059384, "learning_rate": 0.0005546958458995225, "loss": 0.638, "step": 20430 }, { "epoch": 1.0374662080694494, "grad_norm": 0.028358095690773532, "learning_rate": 0.0005544756440105541, "loss": 0.617, "step": 20435 }, { "epoch": 1.037720044167481, "grad_norm": 0.03441901932192754, "learning_rate": 0.0005542554314282885, "loss": 0.625, "step": 20440 }, { "epoch": 1.0379738802655125, "grad_norm": 0.02834425984845626, "learning_rate": 0.0005540352081959524, "loss": 0.6379, "step": 20445 }, { "epoch": 1.038227716363544, "grad_norm": 0.026040726297700097, "learning_rate": 0.0005538149743567742, "loss": 0.6062, "step": 20450 }, { "epoch": 1.0384815524615756, "grad_norm": 0.02686433171691892, "learning_rate": 0.000553594729953985, "loss": 0.6205, "step": 20455 }, { "epoch": 1.0387353885596071, "grad_norm": 0.029576966963290326, "learning_rate": 0.0005533744750308173, "loss": 0.6533, "step": 20460 }, { "epoch": 1.0389892246576387, "grad_norm": 0.026932143177321014, "learning_rate": 0.0005531542096305067, "loss": 0.5978, "step": 20465 }, { "epoch": 1.03924306075567, "grad_norm": 0.03190692501240132, "learning_rate": 0.0005529339337962898, "loss": 0.6278, "step": 20470 }, { "epoch": 1.0394968968537015, "grad_norm": 0.029911909798840598, "learning_rate": 0.0005527136475714055, "loss": 0.6279, "step": 20475 }, { "epoch": 1.039750732951733, "grad_norm": 0.030446092809617957, "learning_rate": 0.0005524933509990953, "loss": 0.6369, "step": 20480 }, { "epoch": 1.0400045690497646, "grad_norm": 0.027217757550597205, "learning_rate": 0.0005522730441226019, "loss": 0.5854, "step": 20485 }, { "epoch": 1.040258405147796, "grad_norm": 0.03101649397658981, "learning_rate": 0.0005520527269851707, "loss": 0.6719, "step": 20490 }, { "epoch": 1.0405122412458276, "grad_norm": 0.032399681930346744, "learning_rate": 0.0005518323996300486, "loss": 0.6099, "step": 20495 }, { "epoch": 1.0407660773438592, "grad_norm": 0.02923203512258405, "learning_rate": 0.0005516120621004852, "loss": 0.62, "step": 20500 }, { "epoch": 1.0410199134418905, "grad_norm": 0.027908260510780974, "learning_rate": 0.0005513917144397313, "loss": 0.6649, "step": 20505 }, { "epoch": 1.041273749539922, "grad_norm": 0.03137806740323492, "learning_rate": 0.0005511713566910401, "loss": 0.6308, "step": 20510 }, { "epoch": 1.0415275856379536, "grad_norm": 0.027282258482289653, "learning_rate": 0.0005509509888976668, "loss": 0.6131, "step": 20515 }, { "epoch": 1.041781421735985, "grad_norm": 0.0300069376612707, "learning_rate": 0.0005507306111028683, "loss": 0.6759, "step": 20520 }, { "epoch": 1.0420352578340166, "grad_norm": 0.032893038580643075, "learning_rate": 0.000550510223349904, "loss": 0.6382, "step": 20525 }, { "epoch": 1.0422890939320482, "grad_norm": 0.03517825324600527, "learning_rate": 0.0005502898256820349, "loss": 0.6351, "step": 20530 }, { "epoch": 1.0425429300300795, "grad_norm": 0.03496146550558237, "learning_rate": 0.0005500694181425237, "loss": 0.6295, "step": 20535 }, { "epoch": 1.042796766128111, "grad_norm": 0.03213454180881841, "learning_rate": 0.0005498490007746354, "loss": 0.6249, "step": 20540 }, { "epoch": 1.0430506022261425, "grad_norm": 0.028872120446459374, "learning_rate": 0.0005496285736216369, "loss": 0.6163, "step": 20545 }, { "epoch": 1.043304438324174, "grad_norm": 0.030687516119110858, "learning_rate": 0.0005494081367267968, "loss": 0.6606, "step": 20550 }, { "epoch": 1.0435582744222056, "grad_norm": 0.028965595399561363, "learning_rate": 0.0005491876901333859, "loss": 0.6375, "step": 20555 }, { "epoch": 1.0438121105202371, "grad_norm": 0.031074633926361117, "learning_rate": 0.0005489672338846767, "loss": 0.6721, "step": 20560 }, { "epoch": 1.0440659466182687, "grad_norm": 0.03554021866670771, "learning_rate": 0.0005487467680239437, "loss": 0.6162, "step": 20565 }, { "epoch": 1.0443197827163, "grad_norm": 0.035697845672705786, "learning_rate": 0.0005485262925944633, "loss": 0.6575, "step": 20570 }, { "epoch": 1.0445736188143315, "grad_norm": 0.03035932705154792, "learning_rate": 0.0005483058076395136, "loss": 0.6346, "step": 20575 }, { "epoch": 1.044827454912363, "grad_norm": 0.03788665337971818, "learning_rate": 0.0005480853132023746, "loss": 0.6104, "step": 20580 }, { "epoch": 1.0450812910103946, "grad_norm": 0.026939618160350867, "learning_rate": 0.0005478648093263286, "loss": 0.612, "step": 20585 }, { "epoch": 1.0453351271084261, "grad_norm": 0.028466429161623282, "learning_rate": 0.0005476442960546592, "loss": 0.6325, "step": 20590 }, { "epoch": 1.0455889632064577, "grad_norm": 0.031622708640798206, "learning_rate": 0.0005474237734306522, "loss": 0.6116, "step": 20595 }, { "epoch": 1.0458427993044892, "grad_norm": 0.030308376928387292, "learning_rate": 0.0005472032414975949, "loss": 0.6375, "step": 20600 }, { "epoch": 1.0460966354025205, "grad_norm": 0.03183083008177163, "learning_rate": 0.0005469827002987767, "loss": 0.6189, "step": 20605 }, { "epoch": 1.046350471500552, "grad_norm": 0.029784121549040796, "learning_rate": 0.0005467621498774886, "loss": 0.6366, "step": 20610 }, { "epoch": 1.0466043075985836, "grad_norm": 0.02858082064404031, "learning_rate": 0.0005465415902770238, "loss": 0.6115, "step": 20615 }, { "epoch": 1.0468581436966151, "grad_norm": 0.03704819911897671, "learning_rate": 0.0005463210215406769, "loss": 0.6439, "step": 20620 }, { "epoch": 1.0471119797946467, "grad_norm": 0.028435789455785537, "learning_rate": 0.0005461004437117445, "loss": 0.6249, "step": 20625 }, { "epoch": 1.0473658158926782, "grad_norm": 0.02952479995228734, "learning_rate": 0.0005458798568335249, "loss": 0.6325, "step": 20630 }, { "epoch": 1.0476196519907095, "grad_norm": 0.027419420151043025, "learning_rate": 0.0005456592609493182, "loss": 0.6463, "step": 20635 }, { "epoch": 1.047873488088741, "grad_norm": 0.029179792467657107, "learning_rate": 0.0005454386561024263, "loss": 0.6105, "step": 20640 }, { "epoch": 1.0481273241867726, "grad_norm": 0.029895883327924574, "learning_rate": 0.0005452180423361528, "loss": 0.6465, "step": 20645 }, { "epoch": 1.048381160284804, "grad_norm": 0.05134861475124888, "learning_rate": 0.0005449974196938031, "loss": 0.6481, "step": 20650 }, { "epoch": 1.0486349963828356, "grad_norm": 0.03137857499343926, "learning_rate": 0.0005447767882186844, "loss": 0.6445, "step": 20655 }, { "epoch": 1.0488888324808672, "grad_norm": 0.03656581185534298, "learning_rate": 0.0005445561479541053, "loss": 0.6258, "step": 20660 }, { "epoch": 1.0491426685788987, "grad_norm": 0.03237630579199845, "learning_rate": 0.0005443354989433766, "loss": 0.6205, "step": 20665 }, { "epoch": 1.04939650467693, "grad_norm": 0.029528300936051275, "learning_rate": 0.0005441148412298106, "loss": 0.5822, "step": 20670 }, { "epoch": 1.0496503407749616, "grad_norm": 0.03032071191623146, "learning_rate": 0.0005438941748567212, "loss": 0.6633, "step": 20675 }, { "epoch": 1.049904176872993, "grad_norm": 0.03389887753362935, "learning_rate": 0.0005436734998674242, "loss": 0.664, "step": 20680 }, { "epoch": 1.0501580129710246, "grad_norm": 0.029446176983628346, "learning_rate": 0.0005434528163052371, "loss": 0.6135, "step": 20685 }, { "epoch": 1.0504118490690562, "grad_norm": 0.03256815766705793, "learning_rate": 0.0005432321242134787, "loss": 0.6266, "step": 20690 }, { "epoch": 1.0506656851670877, "grad_norm": 0.02891046164333591, "learning_rate": 0.0005430114236354701, "loss": 0.6632, "step": 20695 }, { "epoch": 1.050919521265119, "grad_norm": 0.02935369269394424, "learning_rate": 0.0005427907146145333, "loss": 0.6254, "step": 20700 }, { "epoch": 1.0511733573631505, "grad_norm": 0.02777714796391245, "learning_rate": 0.0005425699971939927, "loss": 0.6387, "step": 20705 }, { "epoch": 1.051427193461182, "grad_norm": 0.027605709250157838, "learning_rate": 0.000542349271417174, "loss": 0.646, "step": 20710 }, { "epoch": 1.0516810295592136, "grad_norm": 0.02874204042706929, "learning_rate": 0.0005421285373274045, "loss": 0.6044, "step": 20715 }, { "epoch": 1.0519348656572451, "grad_norm": 0.026012738000712556, "learning_rate": 0.0005419077949680132, "loss": 0.6196, "step": 20720 }, { "epoch": 1.0521887017552767, "grad_norm": 0.030458482017457253, "learning_rate": 0.0005416870443823308, "loss": 0.6166, "step": 20725 }, { "epoch": 1.0524425378533082, "grad_norm": 0.04403250831355763, "learning_rate": 0.0005414662856136894, "loss": 0.6606, "step": 20730 }, { "epoch": 1.0526963739513395, "grad_norm": 0.03271794249385033, "learning_rate": 0.0005412455187054229, "loss": 0.647, "step": 20735 }, { "epoch": 1.052950210049371, "grad_norm": 0.02965521216285197, "learning_rate": 0.0005410247437008668, "loss": 0.614, "step": 20740 }, { "epoch": 1.0532040461474026, "grad_norm": 0.035397303132409214, "learning_rate": 0.0005408039606433582, "loss": 0.6772, "step": 20745 }, { "epoch": 1.0534578822454341, "grad_norm": 0.027820677194074234, "learning_rate": 0.0005405831695762355, "loss": 0.6318, "step": 20750 }, { "epoch": 1.0537117183434657, "grad_norm": 0.025498109700773135, "learning_rate": 0.0005403623705428391, "loss": 0.6142, "step": 20755 }, { "epoch": 1.0539655544414972, "grad_norm": 0.026170375043931327, "learning_rate": 0.0005401415635865106, "loss": 0.6055, "step": 20760 }, { "epoch": 1.0542193905395287, "grad_norm": 0.033697361026278114, "learning_rate": 0.0005399207487505934, "loss": 0.6018, "step": 20765 }, { "epoch": 1.05447322663756, "grad_norm": 0.0293295816002693, "learning_rate": 0.0005396999260784323, "loss": 0.611, "step": 20770 }, { "epoch": 1.0547270627355916, "grad_norm": 0.03261943195145193, "learning_rate": 0.0005394790956133736, "loss": 0.5963, "step": 20775 }, { "epoch": 1.0549808988336231, "grad_norm": 0.03158555189566786, "learning_rate": 0.0005392582573987654, "loss": 0.6115, "step": 20780 }, { "epoch": 1.0552347349316546, "grad_norm": 0.028698986830576773, "learning_rate": 0.0005390374114779571, "loss": 0.6297, "step": 20785 }, { "epoch": 1.0554885710296862, "grad_norm": 0.028712261490311152, "learning_rate": 0.0005388165578942993, "loss": 0.6085, "step": 20790 }, { "epoch": 1.0557424071277177, "grad_norm": 0.02840961152452904, "learning_rate": 0.0005385956966911451, "loss": 0.6363, "step": 20795 }, { "epoch": 1.055996243225749, "grad_norm": 0.02846986241295935, "learning_rate": 0.000538374827911848, "loss": 0.649, "step": 20800 }, { "epoch": 1.0562500793237806, "grad_norm": 0.030253208601512045, "learning_rate": 0.0005381539515997636, "loss": 0.6378, "step": 20805 }, { "epoch": 1.056503915421812, "grad_norm": 0.026375092531954695, "learning_rate": 0.0005379330677982487, "loss": 0.5884, "step": 20810 }, { "epoch": 1.0567577515198436, "grad_norm": 0.02908966161202036, "learning_rate": 0.0005377121765506619, "loss": 0.6426, "step": 20815 }, { "epoch": 1.0570115876178752, "grad_norm": 0.026096415975714618, "learning_rate": 0.0005374912779003626, "loss": 0.6192, "step": 20820 }, { "epoch": 1.0572654237159067, "grad_norm": 0.027466198453909893, "learning_rate": 0.0005372703718907127, "loss": 0.6316, "step": 20825 }, { "epoch": 1.0575192598139382, "grad_norm": 0.02926280306071641, "learning_rate": 0.0005370494585650746, "loss": 0.609, "step": 20830 }, { "epoch": 1.0577730959119696, "grad_norm": 0.02703917418653838, "learning_rate": 0.0005368285379668125, "loss": 0.6426, "step": 20835 }, { "epoch": 1.058026932010001, "grad_norm": 0.02860972111458893, "learning_rate": 0.0005366076101392922, "loss": 0.6089, "step": 20840 }, { "epoch": 1.0582807681080326, "grad_norm": 0.03176840532640253, "learning_rate": 0.0005363866751258805, "loss": 0.6482, "step": 20845 }, { "epoch": 1.0585346042060642, "grad_norm": 0.03204951709888885, "learning_rate": 0.0005361657329699457, "loss": 0.6048, "step": 20850 }, { "epoch": 1.0587884403040957, "grad_norm": 0.030795738838282596, "learning_rate": 0.0005359447837148582, "loss": 0.6381, "step": 20855 }, { "epoch": 1.0590422764021272, "grad_norm": 0.02987389497339997, "learning_rate": 0.0005357238274039888, "loss": 0.645, "step": 20860 }, { "epoch": 1.0592961125001588, "grad_norm": 0.031628330168158865, "learning_rate": 0.0005355028640807103, "loss": 0.6041, "step": 20865 }, { "epoch": 1.05954994859819, "grad_norm": 0.029301940615677004, "learning_rate": 0.0005352818937883966, "loss": 0.6602, "step": 20870 }, { "epoch": 1.0598037846962216, "grad_norm": 0.045005849227960214, "learning_rate": 0.0005350609165704231, "loss": 0.6053, "step": 20875 }, { "epoch": 1.0600576207942531, "grad_norm": 0.029791371039933706, "learning_rate": 0.0005348399324701665, "loss": 0.6191, "step": 20880 }, { "epoch": 1.0603114568922847, "grad_norm": 0.028440565915498237, "learning_rate": 0.0005346189415310049, "loss": 0.6529, "step": 20885 }, { "epoch": 1.0605652929903162, "grad_norm": 0.03217229449705654, "learning_rate": 0.0005343979437963178, "loss": 0.6755, "step": 20890 }, { "epoch": 1.0608191290883477, "grad_norm": 0.03591783819085467, "learning_rate": 0.0005341769393094857, "loss": 0.675, "step": 20895 }, { "epoch": 1.061072965186379, "grad_norm": 0.02928947713655643, "learning_rate": 0.000533955928113891, "loss": 0.6592, "step": 20900 }, { "epoch": 1.0613268012844106, "grad_norm": 0.03467093550482425, "learning_rate": 0.000533734910252917, "loss": 0.6656, "step": 20905 }, { "epoch": 1.0615806373824421, "grad_norm": 0.030376879477920303, "learning_rate": 0.0005335138857699482, "loss": 0.6616, "step": 20910 }, { "epoch": 1.0618344734804737, "grad_norm": 0.03093434261721385, "learning_rate": 0.0005332928547083707, "loss": 0.634, "step": 20915 }, { "epoch": 1.0620883095785052, "grad_norm": 0.030496624522973626, "learning_rate": 0.0005330718171115721, "loss": 0.6383, "step": 20920 }, { "epoch": 1.0623421456765367, "grad_norm": 0.030940597461837655, "learning_rate": 0.0005328507730229407, "loss": 0.63, "step": 20925 }, { "epoch": 1.0625959817745683, "grad_norm": 0.02804783894139419, "learning_rate": 0.0005326297224858661, "loss": 0.613, "step": 20930 }, { "epoch": 1.0628498178725996, "grad_norm": 0.03445195839754514, "learning_rate": 0.00053240866554374, "loss": 0.5964, "step": 20935 }, { "epoch": 1.0631036539706311, "grad_norm": 0.028665799682781, "learning_rate": 0.0005321876022399542, "loss": 0.5943, "step": 20940 }, { "epoch": 1.0633574900686626, "grad_norm": 0.03275484874861871, "learning_rate": 0.0005319665326179028, "loss": 0.6473, "step": 20945 }, { "epoch": 1.0636113261666942, "grad_norm": 0.037838208315163505, "learning_rate": 0.0005317454567209804, "loss": 0.5826, "step": 20950 }, { "epoch": 1.0638651622647257, "grad_norm": 0.03494199190763812, "learning_rate": 0.0005315243745925833, "loss": 0.6245, "step": 20955 }, { "epoch": 1.0641189983627573, "grad_norm": 0.02909565996825898, "learning_rate": 0.0005313032862761085, "loss": 0.6562, "step": 20960 }, { "epoch": 1.0643728344607886, "grad_norm": 0.030331852009200883, "learning_rate": 0.0005310821918149548, "loss": 0.6029, "step": 20965 }, { "epoch": 1.06462667055882, "grad_norm": 0.029527374716765482, "learning_rate": 0.0005308610912525218, "loss": 0.6653, "step": 20970 }, { "epoch": 1.0648805066568516, "grad_norm": 0.029372261155341185, "learning_rate": 0.0005306399846322106, "loss": 0.6342, "step": 20975 }, { "epoch": 1.0651343427548832, "grad_norm": 0.03090444990666435, "learning_rate": 0.000530418871997423, "loss": 0.6286, "step": 20980 }, { "epoch": 1.0653881788529147, "grad_norm": 0.03117742525977387, "learning_rate": 0.0005301977533915627, "loss": 0.6195, "step": 20985 }, { "epoch": 1.0656420149509462, "grad_norm": 0.03431669555353471, "learning_rate": 0.000529976628858034, "loss": 0.6293, "step": 20990 }, { "epoch": 1.0658958510489778, "grad_norm": 0.03360229646155309, "learning_rate": 0.0005297554984402426, "loss": 0.6552, "step": 20995 }, { "epoch": 1.066149687147009, "grad_norm": 0.028855246822519443, "learning_rate": 0.0005295343621815952, "loss": 0.6481, "step": 21000 }, { "epoch": 1.0664035232450406, "grad_norm": 0.030294995889939138, "learning_rate": 0.0005293132201254996, "loss": 0.6297, "step": 21005 }, { "epoch": 1.0666573593430722, "grad_norm": 0.030227378783262173, "learning_rate": 0.0005290920723153653, "loss": 0.6647, "step": 21010 }, { "epoch": 1.0669111954411037, "grad_norm": 0.03199980737864099, "learning_rate": 0.0005288709187946022, "loss": 0.6417, "step": 21015 }, { "epoch": 1.0671650315391352, "grad_norm": 0.028975480625044084, "learning_rate": 0.0005286497596066218, "loss": 0.7121, "step": 21020 }, { "epoch": 1.0674188676371668, "grad_norm": 0.026864434426198566, "learning_rate": 0.0005284285947948364, "loss": 0.5874, "step": 21025 }, { "epoch": 1.067672703735198, "grad_norm": 0.028169723088644936, "learning_rate": 0.0005282074244026597, "loss": 0.6092, "step": 21030 }, { "epoch": 1.0679265398332296, "grad_norm": 0.02865119653433748, "learning_rate": 0.0005279862484735059, "loss": 0.6273, "step": 21035 }, { "epoch": 1.0681803759312611, "grad_norm": 0.030029494406921858, "learning_rate": 0.0005277650670507915, "loss": 0.655, "step": 21040 }, { "epoch": 1.0684342120292927, "grad_norm": 0.04554946303188224, "learning_rate": 0.0005275438801779327, "loss": 0.6038, "step": 21045 }, { "epoch": 1.0686880481273242, "grad_norm": 0.02833613275237062, "learning_rate": 0.0005273226878983476, "loss": 0.6343, "step": 21050 }, { "epoch": 1.0689418842253557, "grad_norm": 0.027263274809446864, "learning_rate": 0.0005271014902554552, "loss": 0.6156, "step": 21055 }, { "epoch": 1.0691957203233873, "grad_norm": 0.03042134873911709, "learning_rate": 0.0005268802872926755, "loss": 0.6803, "step": 21060 }, { "epoch": 1.0694495564214186, "grad_norm": 0.031088835903835828, "learning_rate": 0.0005266590790534292, "loss": 0.6613, "step": 21065 }, { "epoch": 1.0697033925194501, "grad_norm": 0.031171312547036978, "learning_rate": 0.0005264378655811388, "loss": 0.6023, "step": 21070 }, { "epoch": 1.0699572286174817, "grad_norm": 0.029526565377433322, "learning_rate": 0.0005262166469192273, "loss": 0.6298, "step": 21075 }, { "epoch": 1.0702110647155132, "grad_norm": 0.03280583173935073, "learning_rate": 0.0005259954231111186, "loss": 0.6533, "step": 21080 }, { "epoch": 1.0704649008135447, "grad_norm": 0.02892714117368612, "learning_rate": 0.000525774194200238, "loss": 0.6437, "step": 21085 }, { "epoch": 1.0707187369115763, "grad_norm": 0.02771681358923486, "learning_rate": 0.0005255529602300118, "loss": 0.6412, "step": 21090 }, { "epoch": 1.0709725730096078, "grad_norm": 0.028225814934819436, "learning_rate": 0.0005253317212438668, "loss": 0.645, "step": 21095 }, { "epoch": 1.071226409107639, "grad_norm": 0.03135808691567858, "learning_rate": 0.0005251104772852312, "loss": 0.6642, "step": 21100 }, { "epoch": 1.0714802452056706, "grad_norm": 0.03305441859511727, "learning_rate": 0.0005248892283975341, "loss": 0.6308, "step": 21105 }, { "epoch": 1.0717340813037022, "grad_norm": 0.02788385266640481, "learning_rate": 0.0005246679746242058, "loss": 0.6191, "step": 21110 }, { "epoch": 1.0719879174017337, "grad_norm": 0.025848200030813492, "learning_rate": 0.000524446716008677, "loss": 0.6053, "step": 21115 }, { "epoch": 1.0722417534997652, "grad_norm": 0.03068727424306698, "learning_rate": 0.0005242254525943799, "loss": 0.6601, "step": 21120 }, { "epoch": 1.0724955895977968, "grad_norm": 0.030547897781127813, "learning_rate": 0.000524004184424747, "loss": 0.6359, "step": 21125 }, { "epoch": 1.0727494256958283, "grad_norm": 0.02940646046879394, "learning_rate": 0.0005237829115432124, "loss": 0.6332, "step": 21130 }, { "epoch": 1.0730032617938596, "grad_norm": 0.026461936330326553, "learning_rate": 0.000523561633993211, "loss": 0.6067, "step": 21135 }, { "epoch": 1.0732570978918912, "grad_norm": 0.03103589107771118, "learning_rate": 0.0005233403518181784, "loss": 0.6016, "step": 21140 }, { "epoch": 1.0735109339899227, "grad_norm": 0.026855017200195026, "learning_rate": 0.000523119065061551, "loss": 0.6413, "step": 21145 }, { "epoch": 1.0737647700879542, "grad_norm": 0.03370983587930053, "learning_rate": 0.0005228977737667665, "loss": 0.6598, "step": 21150 }, { "epoch": 1.0740186061859858, "grad_norm": 0.02965244614582874, "learning_rate": 0.0005226764779772632, "loss": 0.6562, "step": 21155 }, { "epoch": 1.0742724422840173, "grad_norm": 0.02912267960495478, "learning_rate": 0.0005224551777364803, "loss": 0.652, "step": 21160 }, { "epoch": 1.0745262783820486, "grad_norm": 0.029543695413772452, "learning_rate": 0.0005222338730878581, "loss": 0.6472, "step": 21165 }, { "epoch": 1.0747801144800802, "grad_norm": 0.03194949190780454, "learning_rate": 0.0005220125640748375, "loss": 0.6348, "step": 21170 }, { "epoch": 1.0750339505781117, "grad_norm": 0.029752993284623473, "learning_rate": 0.0005217912507408602, "loss": 0.6358, "step": 21175 }, { "epoch": 1.0752877866761432, "grad_norm": 0.027227101649956936, "learning_rate": 0.0005215699331293692, "loss": 0.6067, "step": 21180 }, { "epoch": 1.0755416227741748, "grad_norm": 0.02710361856318992, "learning_rate": 0.0005213486112838076, "loss": 0.6104, "step": 21185 }, { "epoch": 1.0757954588722063, "grad_norm": 0.029321694925800716, "learning_rate": 0.0005211272852476204, "loss": 0.637, "step": 21190 }, { "epoch": 1.0760492949702378, "grad_norm": 0.03420723651522672, "learning_rate": 0.0005209059550642523, "loss": 0.6683, "step": 21195 }, { "epoch": 1.0763031310682691, "grad_norm": 0.029149193787314086, "learning_rate": 0.0005206846207771496, "loss": 0.6776, "step": 21200 }, { "epoch": 1.0765569671663007, "grad_norm": 0.030130294996254445, "learning_rate": 0.0005204632824297589, "loss": 0.6554, "step": 21205 }, { "epoch": 1.0768108032643322, "grad_norm": 0.02979414994940381, "learning_rate": 0.0005202419400655281, "loss": 0.6352, "step": 21210 }, { "epoch": 1.0770646393623637, "grad_norm": 0.02831380351849676, "learning_rate": 0.0005200205937279052, "loss": 0.6137, "step": 21215 }, { "epoch": 1.0773184754603953, "grad_norm": 0.029788021168430004, "learning_rate": 0.0005197992434603397, "loss": 0.6297, "step": 21220 }, { "epoch": 1.0775723115584268, "grad_norm": 0.030276018822389773, "learning_rate": 0.0005195778893062814, "loss": 0.6283, "step": 21225 }, { "epoch": 1.0778261476564581, "grad_norm": 0.028507890144453048, "learning_rate": 0.000519356531309181, "loss": 0.6119, "step": 21230 }, { "epoch": 1.0780799837544897, "grad_norm": 0.028818953438482002, "learning_rate": 0.0005191351695124902, "loss": 0.6325, "step": 21235 }, { "epoch": 1.0783338198525212, "grad_norm": 0.028395978179257007, "learning_rate": 0.000518913803959661, "loss": 0.666, "step": 21240 }, { "epoch": 1.0785876559505527, "grad_norm": 0.02881100163255776, "learning_rate": 0.0005186924346941463, "loss": 0.6314, "step": 21245 }, { "epoch": 1.0788414920485843, "grad_norm": 0.028011633825167498, "learning_rate": 0.0005184710617593998, "loss": 0.6538, "step": 21250 }, { "epoch": 1.0790953281466158, "grad_norm": 0.027451858383445278, "learning_rate": 0.0005182496851988763, "loss": 0.5953, "step": 21255 }, { "epoch": 1.0793491642446473, "grad_norm": 0.035283067846456545, "learning_rate": 0.0005180283050560304, "loss": 0.6491, "step": 21260 }, { "epoch": 1.0796030003426786, "grad_norm": 0.03142993019400392, "learning_rate": 0.0005178069213743182, "loss": 0.5999, "step": 21265 }, { "epoch": 1.0798568364407102, "grad_norm": 0.027244902577508626, "learning_rate": 0.0005175855341971961, "loss": 0.6318, "step": 21270 }, { "epoch": 1.0801106725387417, "grad_norm": 0.029750577950312655, "learning_rate": 0.0005173641435681212, "loss": 0.6136, "step": 21275 }, { "epoch": 1.0803645086367732, "grad_norm": 0.03183481078057576, "learning_rate": 0.0005171427495305517, "loss": 0.6433, "step": 21280 }, { "epoch": 1.0806183447348048, "grad_norm": 0.028298920906164794, "learning_rate": 0.000516921352127946, "loss": 0.635, "step": 21285 }, { "epoch": 1.0808721808328363, "grad_norm": 0.029166031553055968, "learning_rate": 0.0005166999514037631, "loss": 0.6702, "step": 21290 }, { "epoch": 1.0811260169308676, "grad_norm": 0.026862985488898736, "learning_rate": 0.0005164785474014631, "loss": 0.5983, "step": 21295 }, { "epoch": 1.0813798530288992, "grad_norm": 0.030663270644200196, "learning_rate": 0.0005162571401645065, "loss": 0.6688, "step": 21300 }, { "epoch": 1.0816336891269307, "grad_norm": 0.03041423793317463, "learning_rate": 0.0005160357297363541, "loss": 0.6733, "step": 21305 }, { "epoch": 1.0818875252249622, "grad_norm": 0.027054695607663257, "learning_rate": 0.0005158143161604682, "loss": 0.6391, "step": 21310 }, { "epoch": 1.0821413613229938, "grad_norm": 0.029025786383658596, "learning_rate": 0.0005155928994803108, "loss": 0.6402, "step": 21315 }, { "epoch": 1.0823951974210253, "grad_norm": 0.029206022666626825, "learning_rate": 0.0005153714797393451, "loss": 0.6536, "step": 21320 }, { "epoch": 1.0826490335190568, "grad_norm": 0.02779645015227374, "learning_rate": 0.0005151500569810345, "loss": 0.6224, "step": 21325 }, { "epoch": 1.0829028696170881, "grad_norm": 0.027448288892096277, "learning_rate": 0.0005149286312488432, "loss": 0.6767, "step": 21330 }, { "epoch": 1.0831567057151197, "grad_norm": 0.02965059612638917, "learning_rate": 0.0005147072025862362, "loss": 0.6024, "step": 21335 }, { "epoch": 1.0834105418131512, "grad_norm": 0.02895912112799168, "learning_rate": 0.0005144857710366785, "loss": 0.6367, "step": 21340 }, { "epoch": 1.0836643779111828, "grad_norm": 0.02886692753523987, "learning_rate": 0.0005142643366436362, "loss": 0.6558, "step": 21345 }, { "epoch": 1.0839182140092143, "grad_norm": 0.027481782649381746, "learning_rate": 0.0005140428994505759, "loss": 0.644, "step": 21350 }, { "epoch": 1.0841720501072458, "grad_norm": 0.02899254442973709, "learning_rate": 0.0005138214595009643, "loss": 0.6441, "step": 21355 }, { "epoch": 1.0844258862052774, "grad_norm": 0.027420447055865344, "learning_rate": 0.0005136000168382693, "loss": 0.6174, "step": 21360 }, { "epoch": 1.0846797223033087, "grad_norm": 0.028506736811585043, "learning_rate": 0.0005133785715059586, "loss": 0.6445, "step": 21365 }, { "epoch": 1.0849335584013402, "grad_norm": 0.028765606858000607, "learning_rate": 0.0005131571235475012, "loss": 0.646, "step": 21370 }, { "epoch": 1.0851873944993717, "grad_norm": 0.0293489456110326, "learning_rate": 0.000512935673006366, "loss": 0.6527, "step": 21375 }, { "epoch": 1.0854412305974033, "grad_norm": 0.024332463938088196, "learning_rate": 0.0005127142199260228, "loss": 0.5798, "step": 21380 }, { "epoch": 1.0856950666954348, "grad_norm": 0.029508580260956485, "learning_rate": 0.0005124927643499415, "loss": 0.6678, "step": 21385 }, { "epoch": 1.0859489027934663, "grad_norm": 0.0292222898626637, "learning_rate": 0.000512271306321593, "loss": 0.6477, "step": 21390 }, { "epoch": 1.0862027388914979, "grad_norm": 0.02952428830204106, "learning_rate": 0.000512049845884448, "loss": 0.6367, "step": 21395 }, { "epoch": 1.0864565749895292, "grad_norm": 0.029910475867529318, "learning_rate": 0.0005118283830819786, "loss": 0.6191, "step": 21400 }, { "epoch": 1.0867104110875607, "grad_norm": 0.029120200575509837, "learning_rate": 0.0005116069179576565, "loss": 0.6277, "step": 21405 }, { "epoch": 1.0869642471855923, "grad_norm": 0.029044375730934786, "learning_rate": 0.0005113854505549543, "loss": 0.6336, "step": 21410 }, { "epoch": 1.0872180832836238, "grad_norm": 0.02917524405404939, "learning_rate": 0.000511163980917345, "loss": 0.5835, "step": 21415 }, { "epoch": 1.0874719193816553, "grad_norm": 0.027631781694111464, "learning_rate": 0.0005109425090883019, "loss": 0.6188, "step": 21420 }, { "epoch": 1.0877257554796869, "grad_norm": 0.029476089492950976, "learning_rate": 0.0005107210351112986, "loss": 0.6535, "step": 21425 }, { "epoch": 1.0879795915777182, "grad_norm": 0.026583763468929825, "learning_rate": 0.0005104995590298098, "loss": 0.6035, "step": 21430 }, { "epoch": 1.0882334276757497, "grad_norm": 0.026457153392592407, "learning_rate": 0.0005102780808873098, "loss": 0.6209, "step": 21435 }, { "epoch": 1.0884872637737812, "grad_norm": 0.024397421411244746, "learning_rate": 0.000510056600727274, "loss": 0.5544, "step": 21440 }, { "epoch": 1.0887410998718128, "grad_norm": 0.02937176900329421, "learning_rate": 0.0005098351185931775, "loss": 0.6312, "step": 21445 }, { "epoch": 1.0889949359698443, "grad_norm": 0.029753779847483324, "learning_rate": 0.0005096136345284963, "loss": 0.6242, "step": 21450 }, { "epoch": 1.0892487720678758, "grad_norm": 0.030195440186569944, "learning_rate": 0.0005093921485767066, "loss": 0.6371, "step": 21455 }, { "epoch": 1.0895026081659074, "grad_norm": 0.028666822181037235, "learning_rate": 0.0005091706607812848, "loss": 0.6139, "step": 21460 }, { "epoch": 1.0897564442639387, "grad_norm": 0.0313456037186578, "learning_rate": 0.0005089491711857083, "loss": 0.6279, "step": 21465 }, { "epoch": 1.0900102803619702, "grad_norm": 0.028362285104698674, "learning_rate": 0.0005087276798334539, "loss": 0.5833, "step": 21470 }, { "epoch": 1.0902641164600018, "grad_norm": 0.03743898204428094, "learning_rate": 0.0005085061867679995, "loss": 0.6362, "step": 21475 }, { "epoch": 1.0905179525580333, "grad_norm": 0.029169508294510378, "learning_rate": 0.0005082846920328232, "loss": 0.6438, "step": 21480 }, { "epoch": 1.0907717886560648, "grad_norm": 0.029959600725585848, "learning_rate": 0.0005080631956714029, "loss": 0.647, "step": 21485 }, { "epoch": 1.0910256247540964, "grad_norm": 0.02879631666517018, "learning_rate": 0.0005078416977272178, "loss": 0.625, "step": 21490 }, { "epoch": 1.0912794608521277, "grad_norm": 0.027762569804612725, "learning_rate": 0.0005076201982437464, "loss": 0.6514, "step": 21495 }, { "epoch": 1.0915332969501592, "grad_norm": 0.029213218872249837, "learning_rate": 0.0005073986972644681, "loss": 0.6596, "step": 21500 }, { "epoch": 1.0917871330481907, "grad_norm": 0.03372326227851402, "learning_rate": 0.0005071771948328624, "loss": 0.6082, "step": 21505 }, { "epoch": 1.0920409691462223, "grad_norm": 0.034861141863521274, "learning_rate": 0.0005069556909924092, "loss": 0.6162, "step": 21510 }, { "epoch": 1.0922948052442538, "grad_norm": 0.03138562578892669, "learning_rate": 0.0005067341857865885, "loss": 0.6327, "step": 21515 }, { "epoch": 1.0925486413422854, "grad_norm": 0.032030057969560294, "learning_rate": 0.0005065126792588807, "loss": 0.6478, "step": 21520 }, { "epoch": 1.0928024774403169, "grad_norm": 0.031234908197096026, "learning_rate": 0.0005062911714527664, "loss": 0.638, "step": 21525 }, { "epoch": 1.0930563135383482, "grad_norm": 0.028903092964076196, "learning_rate": 0.0005060696624117266, "loss": 0.6012, "step": 21530 }, { "epoch": 1.0933101496363797, "grad_norm": 0.033260541559827425, "learning_rate": 0.0005058481521792424, "loss": 0.6431, "step": 21535 }, { "epoch": 1.0935639857344113, "grad_norm": 0.028391245142456403, "learning_rate": 0.000505626640798795, "loss": 0.6161, "step": 21540 }, { "epoch": 1.0938178218324428, "grad_norm": 0.026455516303298128, "learning_rate": 0.000505405128313866, "loss": 0.6173, "step": 21545 }, { "epoch": 1.0940716579304743, "grad_norm": 0.027477197498994355, "learning_rate": 0.0005051836147679374, "loss": 0.5907, "step": 21550 }, { "epoch": 1.0943254940285059, "grad_norm": 0.026444437531829103, "learning_rate": 0.000504962100204491, "loss": 0.6049, "step": 21555 }, { "epoch": 1.0945793301265372, "grad_norm": 0.031706940786378535, "learning_rate": 0.0005047405846670091, "loss": 0.6241, "step": 21560 }, { "epoch": 1.0948331662245687, "grad_norm": 0.027139704217137985, "learning_rate": 0.0005045190681989742, "loss": 0.6203, "step": 21565 }, { "epoch": 1.0950870023226003, "grad_norm": 0.026024620763848424, "learning_rate": 0.0005042975508438687, "loss": 0.5875, "step": 21570 }, { "epoch": 1.0953408384206318, "grad_norm": 0.030939388901898113, "learning_rate": 0.0005040760326451752, "loss": 0.6222, "step": 21575 }, { "epoch": 1.0955946745186633, "grad_norm": 0.04189778139577468, "learning_rate": 0.000503854513646377, "loss": 0.5755, "step": 21580 }, { "epoch": 1.0958485106166949, "grad_norm": 0.0330897832141388, "learning_rate": 0.000503632993890957, "loss": 0.6466, "step": 21585 }, { "epoch": 1.0961023467147264, "grad_norm": 0.027464717482186228, "learning_rate": 0.0005034114734223983, "loss": 0.6108, "step": 21590 }, { "epoch": 1.0963561828127577, "grad_norm": 0.02806959195023062, "learning_rate": 0.0005031899522841845, "loss": 0.6525, "step": 21595 }, { "epoch": 1.0966100189107892, "grad_norm": 0.026043140635761634, "learning_rate": 0.0005029684305197989, "loss": 0.5998, "step": 21600 }, { "epoch": 1.0968638550088208, "grad_norm": 0.02850840541786695, "learning_rate": 0.000502746908172725, "loss": 0.6323, "step": 21605 }, { "epoch": 1.0971176911068523, "grad_norm": 0.028933907465840118, "learning_rate": 0.000502525385286447, "loss": 0.614, "step": 21610 }, { "epoch": 1.0973715272048838, "grad_norm": 0.02779688245822838, "learning_rate": 0.0005023038619044485, "loss": 0.6021, "step": 21615 }, { "epoch": 1.0976253633029154, "grad_norm": 0.027388389294290952, "learning_rate": 0.0005020823380702133, "loss": 0.6363, "step": 21620 }, { "epoch": 1.0978791994009467, "grad_norm": 0.03355168136900585, "learning_rate": 0.0005018608138272255, "loss": 0.6208, "step": 21625 }, { "epoch": 1.0981330354989782, "grad_norm": 0.03373594782311456, "learning_rate": 0.0005016392892189692, "loss": 0.6306, "step": 21630 }, { "epoch": 1.0983868715970098, "grad_norm": 0.025405800705319562, "learning_rate": 0.0005014177642889286, "loss": 0.6155, "step": 21635 }, { "epoch": 1.0986407076950413, "grad_norm": 0.029459901268611876, "learning_rate": 0.000501196239080588, "loss": 0.5996, "step": 21640 }, { "epoch": 1.0988945437930728, "grad_norm": 0.03126297932283852, "learning_rate": 0.0005009747136374317, "loss": 0.6315, "step": 21645 }, { "epoch": 1.0991483798911044, "grad_norm": 0.028374852591601843, "learning_rate": 0.0005007531880029438, "loss": 0.6123, "step": 21650 }, { "epoch": 1.099402215989136, "grad_norm": 0.029342697424911576, "learning_rate": 0.000500531662220609, "loss": 0.6466, "step": 21655 }, { "epoch": 1.0996560520871672, "grad_norm": 0.027568330284911978, "learning_rate": 0.0005003101363339114, "loss": 0.6101, "step": 21660 }, { "epoch": 1.0999098881851987, "grad_norm": 0.0263210343874408, "learning_rate": 0.0005000886103863355, "loss": 0.5986, "step": 21665 }, { "epoch": 1.1001637242832303, "grad_norm": 0.026073222679567933, "learning_rate": 0.0004998670844213661, "loss": 0.6177, "step": 21670 }, { "epoch": 1.1004175603812618, "grad_norm": 0.029842357104644612, "learning_rate": 0.0004996455584824873, "loss": 0.648, "step": 21675 }, { "epoch": 1.1006713964792934, "grad_norm": 0.02901686978445643, "learning_rate": 0.0004994240326131837, "loss": 0.657, "step": 21680 }, { "epoch": 1.1009252325773249, "grad_norm": 0.028682031318145007, "learning_rate": 0.0004992025068569395, "loss": 0.644, "step": 21685 }, { "epoch": 1.1011790686753564, "grad_norm": 0.032038795855661203, "learning_rate": 0.0004989809812572392, "loss": 0.6863, "step": 21690 }, { "epoch": 1.1014329047733877, "grad_norm": 0.03029352912616947, "learning_rate": 0.0004987594558575673, "loss": 0.6441, "step": 21695 }, { "epoch": 1.1016867408714193, "grad_norm": 0.030826724798907388, "learning_rate": 0.0004985379307014079, "loss": 0.6283, "step": 21700 }, { "epoch": 1.1019405769694508, "grad_norm": 0.031675542914022935, "learning_rate": 0.0004983164058322455, "loss": 0.6321, "step": 21705 }, { "epoch": 1.1021944130674823, "grad_norm": 0.034287831690269666, "learning_rate": 0.000498094881293564, "loss": 0.655, "step": 21710 }, { "epoch": 1.1024482491655139, "grad_norm": 0.029735044361100776, "learning_rate": 0.000497873357128848, "loss": 0.6199, "step": 21715 }, { "epoch": 1.1027020852635454, "grad_norm": 0.03241340254753976, "learning_rate": 0.0004976518333815814, "loss": 0.6247, "step": 21720 }, { "epoch": 1.102955921361577, "grad_norm": 0.0456273326682569, "learning_rate": 0.0004974303100952483, "loss": 0.619, "step": 21725 }, { "epoch": 1.1032097574596083, "grad_norm": 0.027265489282916944, "learning_rate": 0.0004972087873133323, "loss": 0.645, "step": 21730 }, { "epoch": 1.1034635935576398, "grad_norm": 0.02848879646813714, "learning_rate": 0.0004969872650793176, "loss": 0.6022, "step": 21735 }, { "epoch": 1.1037174296556713, "grad_norm": 0.03313267149843693, "learning_rate": 0.0004967657434366877, "loss": 0.6176, "step": 21740 }, { "epoch": 1.1039712657537029, "grad_norm": 0.026388189884665198, "learning_rate": 0.0004965442224289262, "loss": 0.5859, "step": 21745 }, { "epoch": 1.1042251018517344, "grad_norm": 0.02905094375812271, "learning_rate": 0.0004963227020995167, "loss": 0.6472, "step": 21750 }, { "epoch": 1.104478937949766, "grad_norm": 0.027102768352581845, "learning_rate": 0.0004961011824919422, "loss": 0.6484, "step": 21755 }, { "epoch": 1.1047327740477972, "grad_norm": 0.028834090598019276, "learning_rate": 0.0004958796636496864, "loss": 0.595, "step": 21760 }, { "epoch": 1.1049866101458288, "grad_norm": 0.02796603113394203, "learning_rate": 0.0004956581456162319, "loss": 0.5968, "step": 21765 }, { "epoch": 1.1052404462438603, "grad_norm": 0.032147153590463336, "learning_rate": 0.0004954366284350617, "loss": 0.6329, "step": 21770 }, { "epoch": 1.1054942823418918, "grad_norm": 0.02713222410385503, "learning_rate": 0.0004952151121496587, "loss": 0.6287, "step": 21775 }, { "epoch": 1.1057481184399234, "grad_norm": 0.02611431967302617, "learning_rate": 0.0004949935968035054, "loss": 0.623, "step": 21780 }, { "epoch": 1.106001954537955, "grad_norm": 0.02781331505918938, "learning_rate": 0.000494772082440084, "loss": 0.6287, "step": 21785 }, { "epoch": 1.1062557906359864, "grad_norm": 0.02747486555895183, "learning_rate": 0.0004945505691028769, "loss": 0.632, "step": 21790 }, { "epoch": 1.1065096267340178, "grad_norm": 0.02797016009406927, "learning_rate": 0.0004943290568353657, "loss": 0.6087, "step": 21795 }, { "epoch": 1.1067634628320493, "grad_norm": 0.026996408214778066, "learning_rate": 0.0004941075456810324, "loss": 0.6118, "step": 21800 }, { "epoch": 1.1070172989300808, "grad_norm": 0.02867304246102084, "learning_rate": 0.0004938860356833585, "loss": 0.6244, "step": 21805 }, { "epoch": 1.1072711350281124, "grad_norm": 0.03343813638485912, "learning_rate": 0.0004936645268858253, "loss": 0.5992, "step": 21810 }, { "epoch": 1.107524971126144, "grad_norm": 0.02743600434704491, "learning_rate": 0.000493443019331914, "loss": 0.6063, "step": 21815 }, { "epoch": 1.1077788072241754, "grad_norm": 0.026680565625553217, "learning_rate": 0.0004932215130651052, "loss": 0.6368, "step": 21820 }, { "epoch": 1.1080326433222067, "grad_norm": 0.0307286647874672, "learning_rate": 0.0004930000081288797, "loss": 0.6112, "step": 21825 }, { "epoch": 1.1082864794202383, "grad_norm": 0.032231183227362924, "learning_rate": 0.0004927785045667173, "loss": 0.603, "step": 21830 }, { "epoch": 1.1085403155182698, "grad_norm": 0.027580757220128458, "learning_rate": 0.0004925570024220987, "loss": 0.6439, "step": 21835 }, { "epoch": 1.1087941516163013, "grad_norm": 0.030154033050273688, "learning_rate": 0.0004923355017385035, "loss": 0.6116, "step": 21840 }, { "epoch": 1.1090479877143329, "grad_norm": 0.02897468303952431, "learning_rate": 0.000492114002559411, "loss": 0.5921, "step": 21845 }, { "epoch": 1.1093018238123644, "grad_norm": 0.031790507899298937, "learning_rate": 0.0004918925049283005, "loss": 0.6633, "step": 21850 }, { "epoch": 1.109555659910396, "grad_norm": 0.028138153895113927, "learning_rate": 0.0004916710088886508, "loss": 0.6068, "step": 21855 }, { "epoch": 1.1098094960084273, "grad_norm": 0.028454951582153023, "learning_rate": 0.0004914495144839406, "loss": 0.5925, "step": 21860 }, { "epoch": 1.1100633321064588, "grad_norm": 0.026061456168074894, "learning_rate": 0.0004912280217576481, "loss": 0.6021, "step": 21865 }, { "epoch": 1.1103171682044903, "grad_norm": 0.028348069160824496, "learning_rate": 0.0004910065307532511, "loss": 0.6339, "step": 21870 }, { "epoch": 1.1105710043025219, "grad_norm": 0.028312179379507293, "learning_rate": 0.0004907850415142273, "loss": 0.611, "step": 21875 }, { "epoch": 1.1108248404005534, "grad_norm": 0.02792166448163546, "learning_rate": 0.0004905635540840539, "loss": 0.6008, "step": 21880 }, { "epoch": 1.111078676498585, "grad_norm": 0.028766781041880633, "learning_rate": 0.0004903420685062077, "loss": 0.6283, "step": 21885 }, { "epoch": 1.1113325125966163, "grad_norm": 0.031654488989599494, "learning_rate": 0.0004901205848241654, "loss": 0.6417, "step": 21890 }, { "epoch": 1.1115863486946478, "grad_norm": 0.02987593514438151, "learning_rate": 0.0004898991030814028, "loss": 0.6545, "step": 21895 }, { "epoch": 1.1118401847926793, "grad_norm": 0.03088565246639466, "learning_rate": 0.000489677623321396, "loss": 0.6001, "step": 21900 }, { "epoch": 1.1120940208907109, "grad_norm": 0.03058628798875359, "learning_rate": 0.0004894561455876204, "loss": 0.6563, "step": 21905 }, { "epoch": 1.1123478569887424, "grad_norm": 0.3858380682280928, "learning_rate": 0.0004892346699235507, "loss": 0.653, "step": 21910 }, { "epoch": 1.112601693086774, "grad_norm": 0.03502408529964659, "learning_rate": 0.0004890131963726617, "loss": 0.6203, "step": 21915 }, { "epoch": 1.1128555291848055, "grad_norm": 0.03275478462183682, "learning_rate": 0.0004887917249784275, "loss": 0.6279, "step": 21920 }, { "epoch": 1.1131093652828368, "grad_norm": 0.030790687594776246, "learning_rate": 0.0004885702557843217, "loss": 0.6111, "step": 21925 }, { "epoch": 1.1133632013808683, "grad_norm": 0.033919729517144434, "learning_rate": 0.0004883487888338177, "loss": 0.6133, "step": 21930 }, { "epoch": 1.1136170374788998, "grad_norm": 0.02695173670497474, "learning_rate": 0.0004881273241703884, "loss": 0.6605, "step": 21935 }, { "epoch": 1.1138708735769314, "grad_norm": 0.028758369200681234, "learning_rate": 0.00048790586183750605, "loss": 0.6135, "step": 21940 }, { "epoch": 1.114124709674963, "grad_norm": 0.0291801154509777, "learning_rate": 0.0004876844018786428, "loss": 0.6372, "step": 21945 }, { "epoch": 1.1143785457729944, "grad_norm": 0.026182137182926297, "learning_rate": 0.00048746294433727003, "loss": 0.6222, "step": 21950 }, { "epoch": 1.114632381871026, "grad_norm": 0.0305858844903621, "learning_rate": 0.0004872414892568585, "loss": 0.6355, "step": 21955 }, { "epoch": 1.1148862179690573, "grad_norm": 0.03261319651821448, "learning_rate": 0.00048702003668087926, "loss": 0.6509, "step": 21960 }, { "epoch": 1.1151400540670888, "grad_norm": 0.027498842824706934, "learning_rate": 0.00048679858665280206, "loss": 0.6051, "step": 21965 }, { "epoch": 1.1153938901651204, "grad_norm": 0.03183074596418746, "learning_rate": 0.00048657713921609647, "loss": 0.6387, "step": 21970 }, { "epoch": 1.115647726263152, "grad_norm": 0.03182789999529789, "learning_rate": 0.0004863556944142316, "loss": 0.6226, "step": 21975 }, { "epoch": 1.1159015623611834, "grad_norm": 0.030998690609891067, "learning_rate": 0.00048613425229067575, "loss": 0.5928, "step": 21980 }, { "epoch": 1.116155398459215, "grad_norm": 0.029152224405808353, "learning_rate": 0.0004859128128888971, "loss": 0.6692, "step": 21985 }, { "epoch": 1.1164092345572465, "grad_norm": 0.02820399568831847, "learning_rate": 0.000485691376252363, "loss": 0.6649, "step": 21990 }, { "epoch": 1.1166630706552778, "grad_norm": 0.026563899617830976, "learning_rate": 0.0004854699424245404, "loss": 0.6362, "step": 21995 }, { "epoch": 1.1169169067533093, "grad_norm": 0.030163918683637863, "learning_rate": 0.00048524851144889563, "loss": 0.6369, "step": 22000 }, { "epoch": 1.1171707428513409, "grad_norm": 0.03058179317344724, "learning_rate": 0.0004850270833688945, "loss": 0.6546, "step": 22005 }, { "epoch": 1.1174245789493724, "grad_norm": 0.03435401938831556, "learning_rate": 0.0004848056582280022, "loss": 0.6006, "step": 22010 }, { "epoch": 1.117678415047404, "grad_norm": 0.027141974185599355, "learning_rate": 0.00048458423606968337, "loss": 0.6501, "step": 22015 }, { "epoch": 1.1179322511454355, "grad_norm": 0.031308970116469804, "learning_rate": 0.0004843628169374022, "loss": 0.6277, "step": 22020 }, { "epoch": 1.1181860872434668, "grad_norm": 0.02943465339769898, "learning_rate": 0.0004841414008746221, "loss": 0.608, "step": 22025 }, { "epoch": 1.1184399233414983, "grad_norm": 0.0275601253785938, "learning_rate": 0.0004839199879248059, "loss": 0.6249, "step": 22030 }, { "epoch": 1.1186937594395299, "grad_norm": 0.02591006301816938, "learning_rate": 0.00048369857813141586, "loss": 0.5874, "step": 22035 }, { "epoch": 1.1189475955375614, "grad_norm": 0.025896800871253924, "learning_rate": 0.00048347717153791365, "loss": 0.5923, "step": 22040 }, { "epoch": 1.119201431635593, "grad_norm": 0.02967675877798653, "learning_rate": 0.0004832557681877603, "loss": 0.6336, "step": 22045 }, { "epoch": 1.1194552677336245, "grad_norm": 0.03402377528970238, "learning_rate": 0.0004830343681244161, "loss": 0.647, "step": 22050 }, { "epoch": 1.119709103831656, "grad_norm": 0.026565862502883352, "learning_rate": 0.0004828129713913409, "loss": 0.6232, "step": 22055 }, { "epoch": 1.1199629399296873, "grad_norm": 0.028043166055980836, "learning_rate": 0.0004825915780319937, "loss": 0.6183, "step": 22060 }, { "epoch": 1.1202167760277189, "grad_norm": 0.028018843455680793, "learning_rate": 0.00048237018808983286, "loss": 0.6335, "step": 22065 }, { "epoch": 1.1204706121257504, "grad_norm": 0.027118126196709908, "learning_rate": 0.0004821488016083162, "loss": 0.6084, "step": 22070 }, { "epoch": 1.120724448223782, "grad_norm": 0.030113564960245975, "learning_rate": 0.0004819274186309005, "loss": 0.6409, "step": 22075 }, { "epoch": 1.1209782843218135, "grad_norm": 0.031684571408786194, "learning_rate": 0.0004817060392010427, "loss": 0.646, "step": 22080 }, { "epoch": 1.121232120419845, "grad_norm": 0.027714014158573493, "learning_rate": 0.0004814846633621981, "loss": 0.6242, "step": 22085 }, { "epoch": 1.1214859565178763, "grad_norm": 0.0314031605350087, "learning_rate": 0.0004812632911578218, "loss": 0.6133, "step": 22090 }, { "epoch": 1.1217397926159078, "grad_norm": 0.029648054564404953, "learning_rate": 0.000481041922631368, "loss": 0.6386, "step": 22095 }, { "epoch": 1.1219936287139394, "grad_norm": 0.031171661912163754, "learning_rate": 0.00048082055782629017, "loss": 0.6408, "step": 22100 }, { "epoch": 1.122247464811971, "grad_norm": 0.03315498484125005, "learning_rate": 0.00048059919678604125, "loss": 0.6229, "step": 22105 }, { "epoch": 1.1225013009100024, "grad_norm": 0.03094063163266609, "learning_rate": 0.0004803778395540733, "loss": 0.6184, "step": 22110 }, { "epoch": 1.122755137008034, "grad_norm": 0.032632812991635576, "learning_rate": 0.0004801564861738375, "loss": 0.5846, "step": 22115 }, { "epoch": 1.1230089731060655, "grad_norm": 0.024754189935516042, "learning_rate": 0.00047993513668878455, "loss": 0.6178, "step": 22120 }, { "epoch": 1.1232628092040968, "grad_norm": 0.026070451815996723, "learning_rate": 0.0004797137911423642, "loss": 0.6239, "step": 22125 }, { "epoch": 1.1235166453021284, "grad_norm": 0.027532298164589967, "learning_rate": 0.00047949244957802545, "loss": 0.6384, "step": 22130 }, { "epoch": 1.12377048140016, "grad_norm": 0.03109280544803846, "learning_rate": 0.0004792711120392165, "loss": 0.635, "step": 22135 }, { "epoch": 1.1240243174981914, "grad_norm": 0.030044686230880968, "learning_rate": 0.00047904977856938496, "loss": 0.5987, "step": 22140 }, { "epoch": 1.124278153596223, "grad_norm": 0.03317002438895142, "learning_rate": 0.0004788284492119775, "loss": 0.6225, "step": 22145 }, { "epoch": 1.1245319896942545, "grad_norm": 0.03096984028441982, "learning_rate": 0.00047860712401043976, "loss": 0.6122, "step": 22150 }, { "epoch": 1.1247858257922858, "grad_norm": 0.02974108155493261, "learning_rate": 0.00047838580300821695, "loss": 0.6098, "step": 22155 }, { "epoch": 1.1250396618903173, "grad_norm": 0.03227886531489935, "learning_rate": 0.0004781644862487532, "loss": 0.6113, "step": 22160 }, { "epoch": 1.1252934979883489, "grad_norm": 0.029482807634831555, "learning_rate": 0.000477943173775492, "loss": 0.6282, "step": 22165 }, { "epoch": 1.1255473340863804, "grad_norm": 0.02611930683862336, "learning_rate": 0.00047772186563187566, "loss": 0.597, "step": 22170 }, { "epoch": 1.125801170184412, "grad_norm": 0.03122998347997647, "learning_rate": 0.00047750056186134603, "loss": 0.648, "step": 22175 }, { "epoch": 1.1260550062824435, "grad_norm": 0.026631288110545173, "learning_rate": 0.00047727926250734393, "loss": 0.623, "step": 22180 }, { "epoch": 1.126308842380475, "grad_norm": 0.03216083621273097, "learning_rate": 0.00047705796761330927, "loss": 0.6493, "step": 22185 }, { "epoch": 1.1265626784785066, "grad_norm": 0.031452051434923876, "learning_rate": 0.00047683667722268116, "loss": 0.6082, "step": 22190 }, { "epoch": 1.1268165145765379, "grad_norm": 0.033985385387129266, "learning_rate": 0.0004766153913788976, "loss": 0.6131, "step": 22195 }, { "epoch": 1.1270703506745694, "grad_norm": 0.02758481568194239, "learning_rate": 0.00047639411012539626, "loss": 0.6367, "step": 22200 }, { "epoch": 1.127324186772601, "grad_norm": 0.030409936475034177, "learning_rate": 0.0004761728335056134, "loss": 0.5873, "step": 22205 }, { "epoch": 1.1275780228706325, "grad_norm": 0.028741252394413497, "learning_rate": 0.00047595156156298455, "loss": 0.5762, "step": 22210 }, { "epoch": 1.127831858968664, "grad_norm": 0.02829983342168853, "learning_rate": 0.0004757302943409442, "loss": 0.6418, "step": 22215 }, { "epoch": 1.1280856950666953, "grad_norm": 0.02936745733009566, "learning_rate": 0.000475509031882926, "loss": 0.6028, "step": 22220 }, { "epoch": 1.1283395311647268, "grad_norm": 0.03177434587227307, "learning_rate": 0.00047528777423236276, "loss": 0.6316, "step": 22225 }, { "epoch": 1.1285933672627584, "grad_norm": 0.030029143233318928, "learning_rate": 0.00047506652143268615, "loss": 0.6367, "step": 22230 }, { "epoch": 1.12884720336079, "grad_norm": 0.028746213310016176, "learning_rate": 0.0004748452735273271, "loss": 0.6037, "step": 22235 }, { "epoch": 1.1291010394588215, "grad_norm": 0.028915657841646833, "learning_rate": 0.0004746240305597154, "loss": 0.6197, "step": 22240 }, { "epoch": 1.129354875556853, "grad_norm": 0.03002088047280267, "learning_rate": 0.0004744027925732799, "loss": 0.6195, "step": 22245 }, { "epoch": 1.1296087116548845, "grad_norm": 0.03174538667538779, "learning_rate": 0.0004741815596114486, "loss": 0.5711, "step": 22250 }, { "epoch": 1.129862547752916, "grad_norm": 0.028396929061009945, "learning_rate": 0.00047396033171764825, "loss": 0.5712, "step": 22255 }, { "epoch": 1.1301163838509474, "grad_norm": 0.03195778170436093, "learning_rate": 0.00047373910893530504, "loss": 0.67, "step": 22260 }, { "epoch": 1.130370219948979, "grad_norm": 0.025688105588926896, "learning_rate": 0.00047351789130784384, "loss": 0.6084, "step": 22265 }, { "epoch": 1.1306240560470104, "grad_norm": 0.02681277600411434, "learning_rate": 0.00047329667887868846, "loss": 0.6051, "step": 22270 }, { "epoch": 1.130877892145042, "grad_norm": 0.02904659292378993, "learning_rate": 0.00047307547169126183, "loss": 0.6041, "step": 22275 }, { "epoch": 1.1311317282430735, "grad_norm": 0.028610185210701407, "learning_rate": 0.0004728542697889859, "loss": 0.6237, "step": 22280 }, { "epoch": 1.131385564341105, "grad_norm": 0.031203109678995155, "learning_rate": 0.00047263307321528136, "loss": 0.626, "step": 22285 }, { "epoch": 1.1316394004391364, "grad_norm": 0.029319496669646252, "learning_rate": 0.0004724118820135681, "loss": 0.6661, "step": 22290 }, { "epoch": 1.131893236537168, "grad_norm": 0.02905154251869747, "learning_rate": 0.00047219069622726485, "loss": 0.6528, "step": 22295 }, { "epoch": 1.1321470726351994, "grad_norm": 0.027816818631144942, "learning_rate": 0.0004719695158997892, "loss": 0.6125, "step": 22300 }, { "epoch": 1.132400908733231, "grad_norm": 0.02846938132269483, "learning_rate": 0.00047174834107455784, "loss": 0.6196, "step": 22305 }, { "epoch": 1.1326547448312625, "grad_norm": 0.026297955010337375, "learning_rate": 0.00047152717179498624, "loss": 0.6549, "step": 22310 }, { "epoch": 1.132908580929294, "grad_norm": 0.029216881107967562, "learning_rate": 0.00047130600810448855, "loss": 0.5751, "step": 22315 }, { "epoch": 1.1331624170273256, "grad_norm": 0.030117673911079595, "learning_rate": 0.0004710848500464786, "loss": 0.6409, "step": 22320 }, { "epoch": 1.1334162531253569, "grad_norm": 0.029153859013856722, "learning_rate": 0.0004708636976643684, "loss": 0.5816, "step": 22325 }, { "epoch": 1.1336700892233884, "grad_norm": 0.02931633250050365, "learning_rate": 0.00047064255100156904, "loss": 0.5815, "step": 22330 }, { "epoch": 1.13392392532142, "grad_norm": 0.02802339216177387, "learning_rate": 0.00047042141010149053, "loss": 0.6278, "step": 22335 }, { "epoch": 1.1341777614194515, "grad_norm": 0.027346470512525835, "learning_rate": 0.0004702002750075417, "loss": 0.6271, "step": 22340 }, { "epoch": 1.134431597517483, "grad_norm": 0.029432876284928657, "learning_rate": 0.0004699791457631303, "loss": 0.6303, "step": 22345 }, { "epoch": 1.1346854336155145, "grad_norm": 0.02811173519550666, "learning_rate": 0.00046975802241166283, "loss": 0.6341, "step": 22350 }, { "epoch": 1.1349392697135459, "grad_norm": 0.02885784470398463, "learning_rate": 0.00046953690499654477, "loss": 0.6665, "step": 22355 }, { "epoch": 1.1351931058115774, "grad_norm": 0.030723083952924567, "learning_rate": 0.0004693157935611803, "loss": 0.5921, "step": 22360 }, { "epoch": 1.135446941909609, "grad_norm": 0.02816834792631118, "learning_rate": 0.0004690946881489726, "loss": 0.6332, "step": 22365 }, { "epoch": 1.1357007780076405, "grad_norm": 0.027434770045047265, "learning_rate": 0.00046887358880332345, "loss": 0.6526, "step": 22370 }, { "epoch": 1.135954614105672, "grad_norm": 0.028582717020457758, "learning_rate": 0.00046865249556763344, "loss": 0.6592, "step": 22375 }, { "epoch": 1.1362084502037035, "grad_norm": 0.02613528460941682, "learning_rate": 0.0004684314084853024, "loss": 0.5896, "step": 22380 }, { "epoch": 1.136462286301735, "grad_norm": 0.039708214549440346, "learning_rate": 0.0004682103275997284, "loss": 0.6732, "step": 22385 }, { "epoch": 1.1367161223997664, "grad_norm": 0.028082928371057154, "learning_rate": 0.00046798925295430863, "loss": 0.5985, "step": 22390 }, { "epoch": 1.136969958497798, "grad_norm": 0.03352870883025746, "learning_rate": 0.00046776818459243874, "loss": 0.601, "step": 22395 }, { "epoch": 1.1372237945958295, "grad_norm": 0.0275484525436852, "learning_rate": 0.0004675471225575136, "loss": 0.6182, "step": 22400 }, { "epoch": 1.137477630693861, "grad_norm": 0.07722743719803081, "learning_rate": 0.00046732606689292637, "loss": 0.5904, "step": 22405 }, { "epoch": 1.1377314667918925, "grad_norm": 0.029077621803640836, "learning_rate": 0.00046710501764206933, "loss": 0.6133, "step": 22410 }, { "epoch": 1.137985302889924, "grad_norm": 0.030531880912692426, "learning_rate": 0.0004668839748483332, "loss": 0.6011, "step": 22415 }, { "epoch": 1.1382391389879554, "grad_norm": 0.03258951882602982, "learning_rate": 0.0004666629385551078, "loss": 0.6473, "step": 22420 }, { "epoch": 1.138492975085987, "grad_norm": 0.03252626574298687, "learning_rate": 0.0004664419088057812, "loss": 0.6337, "step": 22425 }, { "epoch": 1.1387468111840184, "grad_norm": 0.030264576316933453, "learning_rate": 0.0004662208856437405, "loss": 0.6218, "step": 22430 }, { "epoch": 1.13900064728205, "grad_norm": 0.03008341853657956, "learning_rate": 0.00046599986911237135, "loss": 0.619, "step": 22435 }, { "epoch": 1.1392544833800815, "grad_norm": 0.02758384411753497, "learning_rate": 0.00046577885925505857, "loss": 0.6405, "step": 22440 }, { "epoch": 1.139508319478113, "grad_norm": 0.02932418753871491, "learning_rate": 0.00046555785611518505, "loss": 0.6326, "step": 22445 }, { "epoch": 1.1397621555761446, "grad_norm": 0.02657392171763836, "learning_rate": 0.0004653368597361326, "loss": 0.6295, "step": 22450 }, { "epoch": 1.1400159916741759, "grad_norm": 0.026951938490342667, "learning_rate": 0.00046511587016128173, "loss": 0.637, "step": 22455 }, { "epoch": 1.1402698277722074, "grad_norm": 0.028222933938101113, "learning_rate": 0.0004648948874340115, "loss": 0.6321, "step": 22460 }, { "epoch": 1.140523663870239, "grad_norm": 0.03060922262824818, "learning_rate": 0.0004646739115976999, "loss": 0.6106, "step": 22465 }, { "epoch": 1.1407774999682705, "grad_norm": 0.027339677337263345, "learning_rate": 0.00046445294269572326, "loss": 0.6209, "step": 22470 }, { "epoch": 1.141031336066302, "grad_norm": 0.028087356761713596, "learning_rate": 0.0004642319807714567, "loss": 0.6163, "step": 22475 }, { "epoch": 1.1412851721643336, "grad_norm": 0.029556200716096943, "learning_rate": 0.0004640110258682739, "loss": 0.663, "step": 22480 }, { "epoch": 1.1415390082623649, "grad_norm": 0.03031613169581474, "learning_rate": 0.0004637900780295472, "loss": 0.6036, "step": 22485 }, { "epoch": 1.1417928443603964, "grad_norm": 0.0280849853262679, "learning_rate": 0.0004635691372986477, "loss": 0.579, "step": 22490 }, { "epoch": 1.142046680458428, "grad_norm": 0.02720864234858306, "learning_rate": 0.0004633482037189447, "loss": 0.5995, "step": 22495 }, { "epoch": 1.1423005165564595, "grad_norm": 0.027322192235323953, "learning_rate": 0.00046312727733380666, "loss": 0.642, "step": 22500 }, { "epoch": 1.142554352654491, "grad_norm": 0.027748045333845638, "learning_rate": 0.0004629063581866002, "loss": 0.6248, "step": 22505 }, { "epoch": 1.1428081887525225, "grad_norm": 0.028327318859292357, "learning_rate": 0.00046268544632069064, "loss": 0.6448, "step": 22510 }, { "epoch": 1.143062024850554, "grad_norm": 0.5068281401904405, "learning_rate": 0.00046246454177944194, "loss": 0.6366, "step": 22515 }, { "epoch": 1.1433158609485856, "grad_norm": 0.04214419907548619, "learning_rate": 0.0004622436446062164, "loss": 0.5938, "step": 22520 }, { "epoch": 1.143569697046617, "grad_norm": 0.032675742943866964, "learning_rate": 0.0004620227548443752, "loss": 0.6403, "step": 22525 }, { "epoch": 1.1438235331446485, "grad_norm": 0.03270700233492833, "learning_rate": 0.0004618018725372778, "loss": 0.6083, "step": 22530 }, { "epoch": 1.14407736924268, "grad_norm": 0.030608625730338165, "learning_rate": 0.0004615809977282823, "loss": 0.6161, "step": 22535 }, { "epoch": 1.1443312053407115, "grad_norm": 0.03253134639029704, "learning_rate": 0.0004613601304607454, "loss": 0.6328, "step": 22540 }, { "epoch": 1.144585041438743, "grad_norm": 0.03406278569445549, "learning_rate": 0.0004611392707780222, "loss": 0.6334, "step": 22545 }, { "epoch": 1.1448388775367746, "grad_norm": 0.030594788363283453, "learning_rate": 0.00046091841872346627, "loss": 0.6202, "step": 22550 }, { "epoch": 1.145092713634806, "grad_norm": 0.031479079559448904, "learning_rate": 0.00046069757434042975, "loss": 0.6466, "step": 22555 }, { "epoch": 1.1453465497328374, "grad_norm": 0.032928324263483945, "learning_rate": 0.0004604767376722635, "loss": 0.6486, "step": 22560 }, { "epoch": 1.145600385830869, "grad_norm": 0.02892574334708458, "learning_rate": 0.0004602559087623166, "loss": 0.6376, "step": 22565 }, { "epoch": 1.1458542219289005, "grad_norm": 0.02826700653894514, "learning_rate": 0.0004600350876539366, "loss": 0.5956, "step": 22570 }, { "epoch": 1.146108058026932, "grad_norm": 0.026733595764672127, "learning_rate": 0.00045981427439046956, "loss": 0.6028, "step": 22575 }, { "epoch": 1.1463618941249636, "grad_norm": 0.03841157221556567, "learning_rate": 0.00045959346901526006, "loss": 0.6252, "step": 22580 }, { "epoch": 1.1466157302229951, "grad_norm": 0.0298466084043351, "learning_rate": 0.0004593726715716511, "loss": 0.6551, "step": 22585 }, { "epoch": 1.1468695663210264, "grad_norm": 0.028872256372455257, "learning_rate": 0.00045915188210298406, "loss": 0.5863, "step": 22590 }, { "epoch": 1.147123402419058, "grad_norm": 0.030715572990258264, "learning_rate": 0.00045893110065259893, "loss": 0.6549, "step": 22595 }, { "epoch": 1.1473772385170895, "grad_norm": 0.026894359884894078, "learning_rate": 0.0004587103272638339, "loss": 0.6006, "step": 22600 }, { "epoch": 1.147631074615121, "grad_norm": 0.02766047737699189, "learning_rate": 0.0004584895619800257, "loss": 0.6317, "step": 22605 }, { "epoch": 1.1478849107131526, "grad_norm": 0.027191288992706765, "learning_rate": 0.00045826880484450946, "loss": 0.6159, "step": 22610 }, { "epoch": 1.148138746811184, "grad_norm": 0.02530728028801115, "learning_rate": 0.0004580480559006186, "loss": 0.5971, "step": 22615 }, { "epoch": 1.1483925829092154, "grad_norm": 0.025408163922942904, "learning_rate": 0.0004578273151916853, "loss": 0.5873, "step": 22620 }, { "epoch": 1.148646419007247, "grad_norm": 0.028756463540330235, "learning_rate": 0.0004576065827610397, "loss": 0.628, "step": 22625 }, { "epoch": 1.1489002551052785, "grad_norm": 0.029295736675674713, "learning_rate": 0.0004573858586520105, "loss": 0.6212, "step": 22630 }, { "epoch": 1.14915409120331, "grad_norm": 0.029596542565721105, "learning_rate": 0.0004571651429079247, "loss": 0.615, "step": 22635 }, { "epoch": 1.1494079273013416, "grad_norm": 0.024639776725090226, "learning_rate": 0.00045694443557210777, "loss": 0.6234, "step": 22640 }, { "epoch": 1.149661763399373, "grad_norm": 0.027261686586937166, "learning_rate": 0.00045672373668788336, "loss": 0.5901, "step": 22645 }, { "epoch": 1.1499155994974046, "grad_norm": 0.028673844315637426, "learning_rate": 0.0004565030462985737, "loss": 0.6277, "step": 22650 }, { "epoch": 1.150169435595436, "grad_norm": 0.026990706241773924, "learning_rate": 0.00045628236444749905, "loss": 0.5933, "step": 22655 }, { "epoch": 1.1504232716934675, "grad_norm": 0.026353668194660132, "learning_rate": 0.0004560616911779783, "loss": 0.6137, "step": 22660 }, { "epoch": 1.150677107791499, "grad_norm": 0.2650534239866454, "learning_rate": 0.00045584102653332845, "loss": 0.5986, "step": 22665 }, { "epoch": 1.1509309438895305, "grad_norm": 0.03216871886744392, "learning_rate": 0.0004556203705568648, "loss": 0.6434, "step": 22670 }, { "epoch": 1.151184779987562, "grad_norm": 0.0390378794176396, "learning_rate": 0.0004553997232919009, "loss": 0.6564, "step": 22675 }, { "epoch": 1.1514386160855936, "grad_norm": 0.030619482132857527, "learning_rate": 0.00045517908478174917, "loss": 0.6486, "step": 22680 }, { "epoch": 1.151692452183625, "grad_norm": 0.027336311935839425, "learning_rate": 0.0004549584550697196, "loss": 0.588, "step": 22685 }, { "epoch": 1.1519462882816565, "grad_norm": 0.027746453852511236, "learning_rate": 0.00045473783419912057, "loss": 0.6108, "step": 22690 }, { "epoch": 1.152200124379688, "grad_norm": 0.03051760971513631, "learning_rate": 0.000454517222213259, "loss": 0.6214, "step": 22695 }, { "epoch": 1.1524539604777195, "grad_norm": 0.026527758432513607, "learning_rate": 0.00045429661915543995, "loss": 0.641, "step": 22700 }, { "epoch": 1.152707796575751, "grad_norm": 0.02705465743605758, "learning_rate": 0.0004540760250689666, "loss": 0.6432, "step": 22705 }, { "epoch": 1.1529616326737826, "grad_norm": 0.026699911430202086, "learning_rate": 0.0004538554399971406, "loss": 0.6288, "step": 22710 }, { "epoch": 1.1532154687718141, "grad_norm": 0.027345415553153323, "learning_rate": 0.00045363486398326147, "loss": 0.6099, "step": 22715 }, { "epoch": 1.1534693048698454, "grad_norm": 0.02868493324046476, "learning_rate": 0.0004534142970706274, "loss": 0.6648, "step": 22720 }, { "epoch": 1.153723140967877, "grad_norm": 0.029848907769854875, "learning_rate": 0.0004531937393025344, "loss": 0.6397, "step": 22725 }, { "epoch": 1.1539769770659085, "grad_norm": 0.024692190193772005, "learning_rate": 0.000452973190722277, "loss": 0.6249, "step": 22730 }, { "epoch": 1.15423081316394, "grad_norm": 0.028053185338354884, "learning_rate": 0.00045275265137314754, "loss": 0.663, "step": 22735 }, { "epoch": 1.1544846492619716, "grad_norm": 0.028501661049271744, "learning_rate": 0.0004525321212984372, "loss": 0.6262, "step": 22740 }, { "epoch": 1.1547384853600031, "grad_norm": 0.02606377273641178, "learning_rate": 0.00045231160054143467, "loss": 0.6158, "step": 22745 }, { "epoch": 1.1549923214580344, "grad_norm": 0.027647159825867847, "learning_rate": 0.00045209108914542716, "loss": 0.6407, "step": 22750 }, { "epoch": 1.155246157556066, "grad_norm": 0.02674793278125167, "learning_rate": 0.0004518705871537, "loss": 0.6471, "step": 22755 }, { "epoch": 1.1554999936540975, "grad_norm": 0.02735774163106191, "learning_rate": 0.0004516500946095365, "loss": 0.5942, "step": 22760 }, { "epoch": 1.155753829752129, "grad_norm": 0.02985636610632689, "learning_rate": 0.0004514296115562183, "loss": 0.6518, "step": 22765 }, { "epoch": 1.1560076658501606, "grad_norm": 0.025208227199806445, "learning_rate": 0.0004512091380370251, "loss": 0.6041, "step": 22770 }, { "epoch": 1.156261501948192, "grad_norm": 0.03023802695395942, "learning_rate": 0.00045098867409523486, "loss": 0.6196, "step": 22775 }, { "epoch": 1.1565153380462236, "grad_norm": 0.03183598440129252, "learning_rate": 0.0004507682197741235, "loss": 0.5985, "step": 22780 }, { "epoch": 1.1567691741442552, "grad_norm": 0.026388264849811896, "learning_rate": 0.000450547775116965, "loss": 0.5848, "step": 22785 }, { "epoch": 1.1570230102422865, "grad_norm": 0.025837657821127572, "learning_rate": 0.00045032734016703163, "loss": 0.6015, "step": 22790 }, { "epoch": 1.157276846340318, "grad_norm": 0.026349576265908434, "learning_rate": 0.0004501069149675937, "loss": 0.6365, "step": 22795 }, { "epoch": 1.1575306824383496, "grad_norm": 0.029034089141978716, "learning_rate": 0.00044988649956191943, "loss": 0.6364, "step": 22800 }, { "epoch": 1.157784518536381, "grad_norm": 0.03459433261510473, "learning_rate": 0.00044966609399327544, "loss": 0.6109, "step": 22805 }, { "epoch": 1.1580383546344126, "grad_norm": 0.028404286811755584, "learning_rate": 0.0004494456983049263, "loss": 0.6547, "step": 22810 }, { "epoch": 1.158292190732444, "grad_norm": 0.02856883101908499, "learning_rate": 0.0004492253125401344, "loss": 0.6403, "step": 22815 }, { "epoch": 1.1585460268304755, "grad_norm": 0.027646837929665213, "learning_rate": 0.00044900493674216043, "loss": 0.6571, "step": 22820 }, { "epoch": 1.158799862928507, "grad_norm": 0.025866232201213726, "learning_rate": 0.00044878457095426307, "loss": 0.5942, "step": 22825 }, { "epoch": 1.1590536990265385, "grad_norm": 0.0314362609502642, "learning_rate": 0.000448564215219699, "loss": 0.6189, "step": 22830 }, { "epoch": 1.15930753512457, "grad_norm": 0.032061916274796425, "learning_rate": 0.00044834386958172295, "loss": 0.6514, "step": 22835 }, { "epoch": 1.1595613712226016, "grad_norm": 0.026550126054422515, "learning_rate": 0.00044812353408358777, "loss": 0.6162, "step": 22840 }, { "epoch": 1.1598152073206331, "grad_norm": 0.027287422085443203, "learning_rate": 0.0004479032087685441, "loss": 0.6246, "step": 22845 }, { "epoch": 1.1600690434186647, "grad_norm": 0.028083630645216233, "learning_rate": 0.00044768289367984077, "loss": 0.642, "step": 22850 }, { "epoch": 1.160322879516696, "grad_norm": 0.026903365115405368, "learning_rate": 0.0004474625888607245, "loss": 0.6347, "step": 22855 }, { "epoch": 1.1605767156147275, "grad_norm": 0.03086912221195035, "learning_rate": 0.00044724229435443973, "loss": 0.6466, "step": 22860 }, { "epoch": 1.160830551712759, "grad_norm": 0.028051539244347095, "learning_rate": 0.0004470220102042298, "loss": 0.6357, "step": 22865 }, { "epoch": 1.1610843878107906, "grad_norm": 0.02882743747057163, "learning_rate": 0.00044680173645333504, "loss": 0.6317, "step": 22870 }, { "epoch": 1.1613382239088221, "grad_norm": 0.030311482823799626, "learning_rate": 0.0004465814731449941, "loss": 0.6113, "step": 22875 }, { "epoch": 1.1615920600068537, "grad_norm": 0.02806766578890326, "learning_rate": 0.0004463612203224436, "loss": 0.6365, "step": 22880 }, { "epoch": 1.161845896104885, "grad_norm": 0.027085548534945176, "learning_rate": 0.0004461409780289181, "loss": 0.5985, "step": 22885 }, { "epoch": 1.1620997322029165, "grad_norm": 0.027102672686189176, "learning_rate": 0.0004459207463076499, "loss": 0.6006, "step": 22890 }, { "epoch": 1.162353568300948, "grad_norm": 0.034733598476475336, "learning_rate": 0.00044570052520186956, "loss": 0.6331, "step": 22895 }, { "epoch": 1.1626074043989796, "grad_norm": 0.03091713121564943, "learning_rate": 0.00044548031475480533, "loss": 0.6422, "step": 22900 }, { "epoch": 1.1628612404970111, "grad_norm": 0.028895086768020772, "learning_rate": 0.0004452601150096834, "loss": 0.6424, "step": 22905 }, { "epoch": 1.1631150765950427, "grad_norm": 0.03643044046984113, "learning_rate": 0.000445039926009728, "loss": 0.6339, "step": 22910 }, { "epoch": 1.1633689126930742, "grad_norm": 0.03220092864427975, "learning_rate": 0.00044481974779816096, "loss": 0.613, "step": 22915 }, { "epoch": 1.1636227487911055, "grad_norm": 0.02871497576877088, "learning_rate": 0.00044459958041820217, "loss": 0.6386, "step": 22920 }, { "epoch": 1.163876584889137, "grad_norm": 0.3179752494863042, "learning_rate": 0.0004443794239130696, "loss": 0.6584, "step": 22925 }, { "epoch": 1.1641304209871686, "grad_norm": 0.0296069489480403, "learning_rate": 0.00044415927832597865, "loss": 0.6026, "step": 22930 }, { "epoch": 1.1643842570852, "grad_norm": 0.031524870342341124, "learning_rate": 0.00044393914370014295, "loss": 0.6154, "step": 22935 }, { "epoch": 1.1646380931832316, "grad_norm": 0.031105894976730518, "learning_rate": 0.00044371902007877374, "loss": 0.6265, "step": 22940 }, { "epoch": 1.1648919292812632, "grad_norm": 0.030124163114069947, "learning_rate": 0.0004434989075050802, "loss": 0.6411, "step": 22945 }, { "epoch": 1.1651457653792945, "grad_norm": 0.02923098733102157, "learning_rate": 0.0004432788060222694, "loss": 0.606, "step": 22950 }, { "epoch": 1.165399601477326, "grad_norm": 0.02648828246644887, "learning_rate": 0.00044305871567354606, "loss": 0.6332, "step": 22955 }, { "epoch": 1.1656534375753576, "grad_norm": 0.027464568114043995, "learning_rate": 0.0004428386365021129, "loss": 0.6359, "step": 22960 }, { "epoch": 1.165907273673389, "grad_norm": 0.027975188621120056, "learning_rate": 0.0004426185685511703, "loss": 0.6473, "step": 22965 }, { "epoch": 1.1661611097714206, "grad_norm": 0.02857448826475402, "learning_rate": 0.00044239851186391653, "loss": 0.5928, "step": 22970 }, { "epoch": 1.1664149458694522, "grad_norm": 0.026673651999183718, "learning_rate": 0.00044217846648354764, "loss": 0.5579, "step": 22975 }, { "epoch": 1.1666687819674837, "grad_norm": 0.03103200034328798, "learning_rate": 0.00044195843245325723, "loss": 0.6179, "step": 22980 }, { "epoch": 1.166922618065515, "grad_norm": 0.02822345630192866, "learning_rate": 0.0004417384098162373, "loss": 0.66, "step": 22985 }, { "epoch": 1.1671764541635465, "grad_norm": 0.029811499940814775, "learning_rate": 0.00044151839861567694, "loss": 0.6326, "step": 22990 }, { "epoch": 1.167430290261578, "grad_norm": 0.10603077112566263, "learning_rate": 0.0004412983988947633, "loss": 0.6456, "step": 22995 }, { "epoch": 1.1676841263596096, "grad_norm": 0.060112473417473763, "learning_rate": 0.0004410784106966812, "loss": 0.618, "step": 23000 }, { "epoch": 1.1679379624576411, "grad_norm": 0.04296938002428486, "learning_rate": 0.0004408584340646132, "loss": 0.6546, "step": 23005 }, { "epoch": 1.1681917985556727, "grad_norm": 0.5965490151779677, "learning_rate": 0.0004406384690417397, "loss": 0.6458, "step": 23010 }, { "epoch": 1.168445634653704, "grad_norm": 0.03482098583059092, "learning_rate": 0.0004404185156712387, "loss": 0.6469, "step": 23015 }, { "epoch": 1.1686994707517355, "grad_norm": 0.03061492074440805, "learning_rate": 0.00044019857399628593, "loss": 0.5867, "step": 23020 }, { "epoch": 1.168953306849767, "grad_norm": 0.034388333805591434, "learning_rate": 0.0004399786440600549, "loss": 0.6291, "step": 23025 }, { "epoch": 1.1692071429477986, "grad_norm": 0.028684102893332144, "learning_rate": 0.0004397587259057166, "loss": 0.5926, "step": 23030 }, { "epoch": 1.1694609790458301, "grad_norm": 0.030081008293143044, "learning_rate": 0.0004395388195764401, "loss": 0.6196, "step": 23035 }, { "epoch": 1.1697148151438617, "grad_norm": 0.031050376055041608, "learning_rate": 0.00043931892511539164, "loss": 0.633, "step": 23040 }, { "epoch": 1.1699686512418932, "grad_norm": 0.030243104219882813, "learning_rate": 0.0004390990425657357, "loss": 0.623, "step": 23045 }, { "epoch": 1.1702224873399247, "grad_norm": 0.029788280943359613, "learning_rate": 0.00043887917197063395, "loss": 0.6091, "step": 23050 }, { "epoch": 1.170476323437956, "grad_norm": 0.027511583775229456, "learning_rate": 0.00043865931337324596, "loss": 0.6046, "step": 23055 }, { "epoch": 1.1707301595359876, "grad_norm": 0.028170031441052987, "learning_rate": 0.0004384394668167288, "loss": 0.6361, "step": 23060 }, { "epoch": 1.1709839956340191, "grad_norm": 0.027609813716081993, "learning_rate": 0.00043821963234423736, "loss": 0.6464, "step": 23065 }, { "epoch": 1.1712378317320506, "grad_norm": 0.03417317781133001, "learning_rate": 0.00043799980999892395, "loss": 0.6469, "step": 23070 }, { "epoch": 1.1714916678300822, "grad_norm": 0.031497826513667136, "learning_rate": 0.00043777999982393866, "loss": 0.6474, "step": 23075 }, { "epoch": 1.1717455039281135, "grad_norm": 0.03158881502933971, "learning_rate": 0.00043756020186242915, "loss": 0.6105, "step": 23080 }, { "epoch": 1.171999340026145, "grad_norm": 0.028931745743617544, "learning_rate": 0.0004373404161575406, "loss": 0.6124, "step": 23085 }, { "epoch": 1.1722531761241766, "grad_norm": 0.027665237422490406, "learning_rate": 0.00043712064275241584, "loss": 0.6312, "step": 23090 }, { "epoch": 1.172507012222208, "grad_norm": 0.030698292663581917, "learning_rate": 0.00043690088169019535, "loss": 0.6034, "step": 23095 }, { "epoch": 1.1727608483202396, "grad_norm": 0.025884221313275686, "learning_rate": 0.0004366811330140169, "loss": 0.581, "step": 23100 }, { "epoch": 1.1730146844182712, "grad_norm": 0.04366380982341068, "learning_rate": 0.0004364613967670165, "loss": 0.6262, "step": 23105 }, { "epoch": 1.1732685205163027, "grad_norm": 0.03405836142924154, "learning_rate": 0.0004362416729923271, "loss": 0.6254, "step": 23110 }, { "epoch": 1.1735223566143342, "grad_norm": 0.04598585450488518, "learning_rate": 0.0004360219617330792, "loss": 0.6027, "step": 23115 }, { "epoch": 1.1737761927123656, "grad_norm": 0.029160566478303823, "learning_rate": 0.00043580226303240125, "loss": 0.6461, "step": 23120 }, { "epoch": 1.174030028810397, "grad_norm": 0.0862691952555387, "learning_rate": 0.0004355825769334189, "loss": 0.6206, "step": 23125 }, { "epoch": 1.1742838649084286, "grad_norm": 0.027719184048990073, "learning_rate": 0.00043536290347925545, "loss": 0.6054, "step": 23130 }, { "epoch": 1.1745377010064602, "grad_norm": 0.08299369241876958, "learning_rate": 0.0004351432427130316, "loss": 0.662, "step": 23135 }, { "epoch": 1.1747915371044917, "grad_norm": 0.02882646209894441, "learning_rate": 0.0004349235946778659, "loss": 0.6428, "step": 23140 }, { "epoch": 1.1750453732025232, "grad_norm": 0.02673777830316676, "learning_rate": 0.000434703959416874, "loss": 0.5871, "step": 23145 }, { "epoch": 1.1752992093005545, "grad_norm": 0.02653189899977786, "learning_rate": 0.0004344843369731692, "loss": 0.5902, "step": 23150 }, { "epoch": 1.175553045398586, "grad_norm": 0.02941908466080837, "learning_rate": 0.00043426472738986233, "loss": 0.6379, "step": 23155 }, { "epoch": 1.1758068814966176, "grad_norm": 0.028868265395127934, "learning_rate": 0.00043404513071006157, "loss": 0.6552, "step": 23160 }, { "epoch": 1.1760607175946491, "grad_norm": 0.02713830062735835, "learning_rate": 0.0004338255469768728, "loss": 0.6215, "step": 23165 }, { "epoch": 1.1763145536926807, "grad_norm": 0.02783579376910735, "learning_rate": 0.0004336059762333992, "loss": 0.6524, "step": 23170 }, { "epoch": 1.1765683897907122, "grad_norm": 0.029346536071203258, "learning_rate": 0.0004333864185227413, "loss": 0.6441, "step": 23175 }, { "epoch": 1.1768222258887437, "grad_norm": 0.030255254405579003, "learning_rate": 0.0004331668738879973, "loss": 0.6318, "step": 23180 }, { "epoch": 1.177076061986775, "grad_norm": 0.02903403298741982, "learning_rate": 0.00043294734237226263, "loss": 0.6188, "step": 23185 }, { "epoch": 1.1773298980848066, "grad_norm": 0.028345865362350266, "learning_rate": 0.0004327278240186303, "loss": 0.6383, "step": 23190 }, { "epoch": 1.1775837341828381, "grad_norm": 0.031018204384821887, "learning_rate": 0.0004325083188701906, "loss": 0.6124, "step": 23195 }, { "epoch": 1.1778375702808697, "grad_norm": 0.029045353884617924, "learning_rate": 0.0004322888269700313, "loss": 0.657, "step": 23200 }, { "epoch": 1.1780914063789012, "grad_norm": 0.030756967721977793, "learning_rate": 0.00043206934836123763, "loss": 0.6069, "step": 23205 }, { "epoch": 1.1783452424769327, "grad_norm": 0.028257567257062792, "learning_rate": 0.0004318498830868921, "loss": 0.5994, "step": 23210 }, { "epoch": 1.178599078574964, "grad_norm": 0.028623475218014206, "learning_rate": 0.0004316304311900746, "loss": 0.6108, "step": 23215 }, { "epoch": 1.1788529146729956, "grad_norm": 0.027695323981103914, "learning_rate": 0.00043141099271386236, "loss": 0.6161, "step": 23220 }, { "epoch": 1.1791067507710271, "grad_norm": 0.02975155951251917, "learning_rate": 0.0004311915677013304, "loss": 0.6431, "step": 23225 }, { "epoch": 1.1793605868690586, "grad_norm": 0.02602316802726786, "learning_rate": 0.00043097215619555053, "loss": 0.6067, "step": 23230 }, { "epoch": 1.1796144229670902, "grad_norm": 0.028192941212547885, "learning_rate": 0.00043075275823959217, "loss": 0.6009, "step": 23235 }, { "epoch": 1.1798682590651217, "grad_norm": 0.03234917657835002, "learning_rate": 0.000430533373876522, "loss": 0.6254, "step": 23240 }, { "epoch": 1.1801220951631533, "grad_norm": 0.027928265757039345, "learning_rate": 0.0004303140031494042, "loss": 0.6083, "step": 23245 }, { "epoch": 1.1803759312611846, "grad_norm": 0.02830682419964761, "learning_rate": 0.0004300946461012999, "loss": 0.6045, "step": 23250 }, { "epoch": 1.180629767359216, "grad_norm": 0.02661222268744032, "learning_rate": 0.0004298753027752681, "loss": 0.6151, "step": 23255 }, { "epoch": 1.1808836034572476, "grad_norm": 0.02940933438752278, "learning_rate": 0.00042965597321436454, "loss": 0.6318, "step": 23260 }, { "epoch": 1.1811374395552792, "grad_norm": 0.024291670711112368, "learning_rate": 0.00042943665746164274, "loss": 0.6046, "step": 23265 }, { "epoch": 1.1813912756533107, "grad_norm": 0.03158118609081792, "learning_rate": 0.0004292173555601531, "loss": 0.6442, "step": 23270 }, { "epoch": 1.1816451117513422, "grad_norm": 0.03030826397910878, "learning_rate": 0.00042899806755294364, "loss": 0.6342, "step": 23275 }, { "epoch": 1.1818989478493735, "grad_norm": 0.03433326076205483, "learning_rate": 0.00042877879348305925, "loss": 0.6033, "step": 23280 }, { "epoch": 1.182152783947405, "grad_norm": 0.02831756524308542, "learning_rate": 0.0004285595333935427, "loss": 0.5731, "step": 23285 }, { "epoch": 1.1824066200454366, "grad_norm": 0.030437584358328336, "learning_rate": 0.0004283402873274334, "loss": 0.6143, "step": 23290 }, { "epoch": 1.1826604561434682, "grad_norm": 0.02828557420528974, "learning_rate": 0.0004281210553277684, "loss": 0.6169, "step": 23295 }, { "epoch": 1.1829142922414997, "grad_norm": 0.026819926343675388, "learning_rate": 0.0004279018374375817, "loss": 0.64, "step": 23300 }, { "epoch": 1.1831681283395312, "grad_norm": 0.027285847619680094, "learning_rate": 0.00042768263369990486, "loss": 0.6152, "step": 23305 }, { "epoch": 1.1834219644375628, "grad_norm": 0.025119965243210703, "learning_rate": 0.00042746344415776634, "loss": 0.644, "step": 23310 }, { "epoch": 1.1836758005355943, "grad_norm": 0.026184334974847796, "learning_rate": 0.00042724426885419197, "loss": 0.6365, "step": 23315 }, { "epoch": 1.1839296366336256, "grad_norm": 0.027961886400347705, "learning_rate": 0.0004270251078322048, "loss": 0.5593, "step": 23320 }, { "epoch": 1.1841834727316571, "grad_norm": 0.03204473106623644, "learning_rate": 0.000426805961134825, "loss": 0.6089, "step": 23325 }, { "epoch": 1.1844373088296887, "grad_norm": 0.025092046726474124, "learning_rate": 0.00042658682880507005, "loss": 0.6125, "step": 23330 }, { "epoch": 1.1846911449277202, "grad_norm": 0.028730472391193453, "learning_rate": 0.0004263677108859545, "loss": 0.6267, "step": 23335 }, { "epoch": 1.1849449810257517, "grad_norm": 0.030784205221816238, "learning_rate": 0.0004261486074204899, "loss": 0.661, "step": 23340 }, { "epoch": 1.185198817123783, "grad_norm": 0.027372677941392073, "learning_rate": 0.0004259295184516855, "loss": 0.6191, "step": 23345 }, { "epoch": 1.1854526532218146, "grad_norm": 0.02727646334030384, "learning_rate": 0.00042571044402254734, "loss": 0.6544, "step": 23350 }, { "epoch": 1.1857064893198461, "grad_norm": 0.02841514113765113, "learning_rate": 0.00042549138417607855, "loss": 0.6442, "step": 23355 }, { "epoch": 1.1859603254178777, "grad_norm": 0.03395558880874732, "learning_rate": 0.0004252723389552794, "loss": 0.5906, "step": 23360 }, { "epoch": 1.1862141615159092, "grad_norm": 0.026462628385632486, "learning_rate": 0.0004250533084031474, "loss": 0.6041, "step": 23365 }, { "epoch": 1.1864679976139407, "grad_norm": 0.027172982614891168, "learning_rate": 0.0004248342925626773, "loss": 0.6355, "step": 23370 }, { "epoch": 1.1867218337119723, "grad_norm": 0.026604854797493655, "learning_rate": 0.0004246152914768607, "loss": 0.5757, "step": 23375 }, { "epoch": 1.1869756698100038, "grad_norm": 0.04084987873068349, "learning_rate": 0.00042439630518868645, "loss": 0.6352, "step": 23380 }, { "epoch": 1.187229505908035, "grad_norm": 0.02614193884518259, "learning_rate": 0.00042417733374114044, "loss": 0.5887, "step": 23385 }, { "epoch": 1.1874833420060666, "grad_norm": 0.03080972652184434, "learning_rate": 0.00042395837717720564, "loss": 0.6572, "step": 23390 }, { "epoch": 1.1877371781040982, "grad_norm": 0.027774210018276596, "learning_rate": 0.0004237394355398622, "loss": 0.5862, "step": 23395 }, { "epoch": 1.1879910142021297, "grad_norm": 0.027908824086833966, "learning_rate": 0.0004235205088720872, "loss": 0.6063, "step": 23400 }, { "epoch": 1.1882448503001612, "grad_norm": 0.02936110103644055, "learning_rate": 0.000423301597216855, "loss": 0.5948, "step": 23405 }, { "epoch": 1.1884986863981928, "grad_norm": 0.026014442563937996, "learning_rate": 0.0004230827006171367, "loss": 0.5911, "step": 23410 }, { "epoch": 1.188752522496224, "grad_norm": 0.02858557045618591, "learning_rate": 0.00042286381911590075, "loss": 0.6409, "step": 23415 }, { "epoch": 1.1890063585942556, "grad_norm": 0.02815009854345172, "learning_rate": 0.0004226449527561124, "loss": 0.5943, "step": 23420 }, { "epoch": 1.1892601946922872, "grad_norm": 0.028437611011184995, "learning_rate": 0.0004224261015807341, "loss": 0.6421, "step": 23425 }, { "epoch": 1.1895140307903187, "grad_norm": 0.02794563490817387, "learning_rate": 0.00042220726563272514, "loss": 0.6547, "step": 23430 }, { "epoch": 1.1897678668883502, "grad_norm": 0.029678350882088908, "learning_rate": 0.0004219884449550421, "loss": 0.606, "step": 23435 }, { "epoch": 1.1900217029863818, "grad_norm": 0.07598991775247795, "learning_rate": 0.0004217696395906381, "loss": 0.6215, "step": 23440 }, { "epoch": 1.1902755390844133, "grad_norm": 0.0328077026793681, "learning_rate": 0.00042155084958246387, "loss": 0.6562, "step": 23445 }, { "epoch": 1.1905293751824446, "grad_norm": 0.026772790854714745, "learning_rate": 0.0004213320749734665, "loss": 0.6123, "step": 23450 }, { "epoch": 1.1907832112804762, "grad_norm": 0.03213958406721922, "learning_rate": 0.0004211133158065906, "loss": 0.6007, "step": 23455 }, { "epoch": 1.1910370473785077, "grad_norm": 0.029227970416644187, "learning_rate": 0.0004208945721247772, "loss": 0.6114, "step": 23460 }, { "epoch": 1.1912908834765392, "grad_norm": 0.026435767224030838, "learning_rate": 0.0004206758439709649, "loss": 0.6127, "step": 23465 }, { "epoch": 1.1915447195745708, "grad_norm": 0.0269512593858783, "learning_rate": 0.00042045713138808894, "loss": 0.649, "step": 23470 }, { "epoch": 1.1917985556726023, "grad_norm": 0.028995207678625897, "learning_rate": 0.0004202384344190814, "loss": 0.6136, "step": 23475 }, { "epoch": 1.1920523917706336, "grad_norm": 0.02848528552769512, "learning_rate": 0.00042001975310687134, "loss": 0.629, "step": 23480 }, { "epoch": 1.1923062278686651, "grad_norm": 0.028887186524239817, "learning_rate": 0.0004198010874943849, "loss": 0.6084, "step": 23485 }, { "epoch": 1.1925600639666967, "grad_norm": 0.036908385532670826, "learning_rate": 0.0004195824376245451, "loss": 0.6212, "step": 23490 }, { "epoch": 1.1928139000647282, "grad_norm": 0.026463093433362773, "learning_rate": 0.0004193638035402717, "loss": 0.5908, "step": 23495 }, { "epoch": 1.1930677361627597, "grad_norm": 0.029823048306442015, "learning_rate": 0.0004191451852844816, "loss": 0.634, "step": 23500 }, { "epoch": 1.1933215722607913, "grad_norm": 0.02657687599274199, "learning_rate": 0.00041892658290008835, "loss": 0.5874, "step": 23505 }, { "epoch": 1.1935754083588228, "grad_norm": 0.028899117841709617, "learning_rate": 0.00041870799643000257, "loss": 0.6324, "step": 23510 }, { "epoch": 1.1938292444568541, "grad_norm": 0.0302304767859427, "learning_rate": 0.00041848942591713167, "loss": 0.5926, "step": 23515 }, { "epoch": 1.1940830805548857, "grad_norm": 0.030006290158805356, "learning_rate": 0.0004182708714043799, "loss": 0.6187, "step": 23520 }, { "epoch": 1.1943369166529172, "grad_norm": 0.028159497771198635, "learning_rate": 0.0004180523329346486, "loss": 0.6047, "step": 23525 }, { "epoch": 1.1945907527509487, "grad_norm": 0.027812000230701284, "learning_rate": 0.00041783381055083565, "loss": 0.6302, "step": 23530 }, { "epoch": 1.1948445888489803, "grad_norm": 0.0294037800784646, "learning_rate": 0.0004176153042958359, "loss": 0.5795, "step": 23535 }, { "epoch": 1.1950984249470118, "grad_norm": 0.026904190573133365, "learning_rate": 0.0004173968142125411, "loss": 0.5811, "step": 23540 }, { "epoch": 1.195352261045043, "grad_norm": 0.026504524795576228, "learning_rate": 0.00041717834034383974, "loss": 0.6161, "step": 23545 }, { "epoch": 1.1956060971430746, "grad_norm": 0.026129011946368408, "learning_rate": 0.0004169598827326171, "loss": 0.6061, "step": 23550 }, { "epoch": 1.1958599332411062, "grad_norm": 0.02883707661215693, "learning_rate": 0.0004167414414217554, "loss": 0.6555, "step": 23555 }, { "epoch": 1.1961137693391377, "grad_norm": 0.02754717499142376, "learning_rate": 0.0004165230164541335, "loss": 0.572, "step": 23560 }, { "epoch": 1.1963676054371692, "grad_norm": 0.029792062848930306, "learning_rate": 0.00041630460787262717, "loss": 0.5856, "step": 23565 }, { "epoch": 1.1966214415352008, "grad_norm": 0.03059213041973138, "learning_rate": 0.00041608621572010896, "loss": 0.6541, "step": 23570 }, { "epoch": 1.1968752776332323, "grad_norm": 0.026789289677867447, "learning_rate": 0.0004158678400394481, "loss": 0.6246, "step": 23575 }, { "epoch": 1.1971291137312636, "grad_norm": 0.03100254433798806, "learning_rate": 0.00041564948087351053, "loss": 0.6444, "step": 23580 }, { "epoch": 1.1973829498292952, "grad_norm": 0.02845051430302516, "learning_rate": 0.0004154311382651593, "loss": 0.604, "step": 23585 }, { "epoch": 1.1976367859273267, "grad_norm": 0.02811163976355231, "learning_rate": 0.000415212812257254, "loss": 0.6012, "step": 23590 }, { "epoch": 1.1978906220253582, "grad_norm": 0.03133332755604334, "learning_rate": 0.0004149945028926507, "loss": 0.6099, "step": 23595 }, { "epoch": 1.1981444581233898, "grad_norm": 0.027811985326181926, "learning_rate": 0.0004147762102142027, "loss": 0.5997, "step": 23600 }, { "epoch": 1.1983982942214213, "grad_norm": 0.027565024233330772, "learning_rate": 0.0004145579342647595, "loss": 0.5873, "step": 23605 }, { "epoch": 1.1986521303194526, "grad_norm": 0.026609619170380756, "learning_rate": 0.0004143396750871678, "loss": 0.6134, "step": 23610 }, { "epoch": 1.1989059664174841, "grad_norm": 0.030416752244378554, "learning_rate": 0.0004141214327242707, "loss": 0.5998, "step": 23615 }, { "epoch": 1.1991598025155157, "grad_norm": 0.028534650547493788, "learning_rate": 0.000413903207218908, "loss": 0.6039, "step": 23620 }, { "epoch": 1.1994136386135472, "grad_norm": 0.03494888076835613, "learning_rate": 0.0004136849986139164, "loss": 0.6083, "step": 23625 }, { "epoch": 1.1996674747115788, "grad_norm": 0.029460023405010864, "learning_rate": 0.0004134668069521291, "loss": 0.6671, "step": 23630 }, { "epoch": 1.1999213108096103, "grad_norm": 0.03340360510091334, "learning_rate": 0.00041324863227637607, "loss": 0.644, "step": 23635 }, { "epoch": 1.2001751469076418, "grad_norm": 0.03309857115274153, "learning_rate": 0.0004130304746294839, "loss": 0.6299, "step": 23640 }, { "epoch": 1.2004289830056734, "grad_norm": 0.02894484977257895, "learning_rate": 0.0004128123340542757, "loss": 0.6243, "step": 23645 }, { "epoch": 1.2006828191037047, "grad_norm": 0.03135322356879798, "learning_rate": 0.0004125942105935717, "loss": 0.6341, "step": 23650 }, { "epoch": 1.2009366552017362, "grad_norm": 0.03180388795057021, "learning_rate": 0.00041237610429018824, "loss": 0.6203, "step": 23655 }, { "epoch": 1.2011904912997677, "grad_norm": 0.03172454893092431, "learning_rate": 0.0004121580151869385, "loss": 0.639, "step": 23660 }, { "epoch": 1.2014443273977993, "grad_norm": 0.029852236688973712, "learning_rate": 0.0004119399433266323, "loss": 0.6246, "step": 23665 }, { "epoch": 1.2016981634958308, "grad_norm": 0.026439075790358774, "learning_rate": 0.0004117218887520761, "loss": 0.5816, "step": 23670 }, { "epoch": 1.2019519995938621, "grad_norm": 0.02718615397057077, "learning_rate": 0.00041150385150607287, "loss": 0.6298, "step": 23675 }, { "epoch": 1.2022058356918937, "grad_norm": 0.027779143598060017, "learning_rate": 0.0004112858316314223, "loss": 0.6077, "step": 23680 }, { "epoch": 1.2024596717899252, "grad_norm": 0.031651234903162406, "learning_rate": 0.00041106782917092055, "loss": 0.6734, "step": 23685 }, { "epoch": 1.2027135078879567, "grad_norm": 0.03166367665374823, "learning_rate": 0.00041084984416736044, "loss": 0.6241, "step": 23690 }, { "epoch": 1.2029673439859883, "grad_norm": 0.03341100354746406, "learning_rate": 0.0004106318766635313, "loss": 0.5911, "step": 23695 }, { "epoch": 1.2032211800840198, "grad_norm": 0.028628331658643254, "learning_rate": 0.00041041392670221913, "loss": 0.6106, "step": 23700 }, { "epoch": 1.2034750161820513, "grad_norm": 0.027647681109472057, "learning_rate": 0.00041019599432620614, "loss": 0.6061, "step": 23705 }, { "epoch": 1.2037288522800829, "grad_norm": 0.04001621245679728, "learning_rate": 0.00040997807957827184, "loss": 0.5848, "step": 23710 }, { "epoch": 1.2039826883781142, "grad_norm": 0.0279931688468641, "learning_rate": 0.0004097601825011916, "loss": 0.6032, "step": 23715 }, { "epoch": 1.2042365244761457, "grad_norm": 0.0671875879834024, "learning_rate": 0.00040954230313773745, "loss": 0.5987, "step": 23720 }, { "epoch": 1.2044903605741772, "grad_norm": 0.028433203204455776, "learning_rate": 0.0004093244415306781, "loss": 0.6449, "step": 23725 }, { "epoch": 1.2047441966722088, "grad_norm": 0.03370731759482824, "learning_rate": 0.00040910659772277867, "loss": 0.6101, "step": 23730 }, { "epoch": 1.2049980327702403, "grad_norm": 0.02528250275004859, "learning_rate": 0.0004088887717568009, "loss": 0.5799, "step": 23735 }, { "epoch": 1.2052518688682718, "grad_norm": 0.027234253090134546, "learning_rate": 0.0004086709636755029, "loss": 0.6384, "step": 23740 }, { "epoch": 1.2055057049663032, "grad_norm": 0.02675577327392335, "learning_rate": 0.0004084531735216392, "loss": 0.6048, "step": 23745 }, { "epoch": 1.2057595410643347, "grad_norm": 0.03130931137316266, "learning_rate": 0.000408235401337961, "loss": 0.6337, "step": 23750 }, { "epoch": 1.2060133771623662, "grad_norm": 0.02543402219233465, "learning_rate": 0.00040801764716721586, "loss": 0.6098, "step": 23755 }, { "epoch": 1.2062672132603978, "grad_norm": 0.028814905903431852, "learning_rate": 0.00040779991105214787, "loss": 0.6053, "step": 23760 }, { "epoch": 1.2065210493584293, "grad_norm": 0.029161466469517488, "learning_rate": 0.00040758219303549734, "loss": 0.6205, "step": 23765 }, { "epoch": 1.2067748854564608, "grad_norm": 0.028185959211526654, "learning_rate": 0.00040736449316000156, "loss": 0.6527, "step": 23770 }, { "epoch": 1.2070287215544924, "grad_norm": 0.029694582398217283, "learning_rate": 0.00040714681146839394, "loss": 0.6066, "step": 23775 }, { "epoch": 1.2072825576525237, "grad_norm": 0.030315264946904393, "learning_rate": 0.00040692914800340407, "loss": 0.6473, "step": 23780 }, { "epoch": 1.2075363937505552, "grad_norm": 0.03206623787198917, "learning_rate": 0.00040671150280775835, "loss": 0.6467, "step": 23785 }, { "epoch": 1.2077902298485867, "grad_norm": 0.028052916941434265, "learning_rate": 0.0004064938759241794, "loss": 0.6157, "step": 23790 }, { "epoch": 1.2080440659466183, "grad_norm": 0.029947999641626546, "learning_rate": 0.0004062762673953863, "loss": 0.6131, "step": 23795 }, { "epoch": 1.2082979020446498, "grad_norm": 0.02971959661545437, "learning_rate": 0.00040605867726409446, "loss": 0.6201, "step": 23800 }, { "epoch": 1.2085517381426814, "grad_norm": 0.03150838352704558, "learning_rate": 0.00040584110557301576, "loss": 0.6215, "step": 23805 }, { "epoch": 1.2088055742407127, "grad_norm": 0.0279563590447983, "learning_rate": 0.0004056235523648586, "loss": 0.6154, "step": 23810 }, { "epoch": 1.2090594103387442, "grad_norm": 0.028447375317625936, "learning_rate": 0.0004054060176823273, "loss": 0.5903, "step": 23815 }, { "epoch": 1.2093132464367757, "grad_norm": 0.029524266304211153, "learning_rate": 0.00040518850156812315, "loss": 0.6369, "step": 23820 }, { "epoch": 1.2095670825348073, "grad_norm": 0.027414710607026233, "learning_rate": 0.0004049710040649431, "loss": 0.6269, "step": 23825 }, { "epoch": 1.2098209186328388, "grad_norm": 0.02750679849436543, "learning_rate": 0.0004047535252154812, "loss": 0.6087, "step": 23830 }, { "epoch": 1.2100747547308703, "grad_norm": 0.03077984182053346, "learning_rate": 0.0004045360650624272, "loss": 0.6046, "step": 23835 }, { "epoch": 1.2103285908289019, "grad_norm": 0.02827729431201857, "learning_rate": 0.0004043186236484677, "loss": 0.6475, "step": 23840 }, { "epoch": 1.2105824269269332, "grad_norm": 0.029197883754897232, "learning_rate": 0.0004041012010162852, "loss": 0.5951, "step": 23845 }, { "epoch": 1.2108362630249647, "grad_norm": 0.029463238562214583, "learning_rate": 0.0004038837972085586, "loss": 0.6455, "step": 23850 }, { "epoch": 1.2110900991229963, "grad_norm": 0.028456136146440058, "learning_rate": 0.0004036664122679633, "loss": 0.6403, "step": 23855 }, { "epoch": 1.2113439352210278, "grad_norm": 0.02833563794541019, "learning_rate": 0.00040344904623717094, "loss": 0.6354, "step": 23860 }, { "epoch": 1.2115977713190593, "grad_norm": 0.03196497971721034, "learning_rate": 0.00040323169915884924, "loss": 0.635, "step": 23865 }, { "epoch": 1.2118516074170909, "grad_norm": 0.024492458494111347, "learning_rate": 0.0004030143710756624, "loss": 0.6402, "step": 23870 }, { "epoch": 1.2121054435151222, "grad_norm": 0.02816441652390956, "learning_rate": 0.0004027970620302709, "loss": 0.6443, "step": 23875 }, { "epoch": 1.2123592796131537, "grad_norm": 0.0277629409278204, "learning_rate": 0.0004025797720653313, "loss": 0.63, "step": 23880 }, { "epoch": 1.2126131157111852, "grad_norm": 0.027873266861239292, "learning_rate": 0.00040236250122349643, "loss": 0.6119, "step": 23885 }, { "epoch": 1.2128669518092168, "grad_norm": 0.030105270164463793, "learning_rate": 0.0004021452495474159, "loss": 0.6467, "step": 23890 }, { "epoch": 1.2131207879072483, "grad_norm": 0.03372724444067418, "learning_rate": 0.0004019280170797349, "loss": 0.6146, "step": 23895 }, { "epoch": 1.2133746240052798, "grad_norm": 0.027993236707385637, "learning_rate": 0.000401710803863095, "loss": 0.6072, "step": 23900 }, { "epoch": 1.2136284601033114, "grad_norm": 0.02700164555492898, "learning_rate": 0.0004014936099401341, "loss": 0.6158, "step": 23905 }, { "epoch": 1.213882296201343, "grad_norm": 0.026817727760730922, "learning_rate": 0.0004012764353534864, "loss": 0.6136, "step": 23910 }, { "epoch": 1.2141361322993742, "grad_norm": 0.028459459168257827, "learning_rate": 0.00040105928014578206, "loss": 0.5995, "step": 23915 }, { "epoch": 1.2143899683974058, "grad_norm": 0.028693998752087953, "learning_rate": 0.00040084214435964766, "loss": 0.623, "step": 23920 }, { "epoch": 1.2146438044954373, "grad_norm": 0.028977188131344228, "learning_rate": 0.0004006250280377058, "loss": 0.6142, "step": 23925 }, { "epoch": 1.2148976405934688, "grad_norm": 0.032444936914083504, "learning_rate": 0.0004004079312225754, "loss": 0.6128, "step": 23930 }, { "epoch": 1.2151514766915004, "grad_norm": 0.03322886637253877, "learning_rate": 0.00040019085395687134, "loss": 0.6194, "step": 23935 }, { "epoch": 1.2154053127895317, "grad_norm": 0.030159669399413308, "learning_rate": 0.00039997379628320493, "loss": 0.6217, "step": 23940 }, { "epoch": 1.2156591488875632, "grad_norm": 0.026184495658578118, "learning_rate": 0.0003997567582441834, "loss": 0.637, "step": 23945 }, { "epoch": 1.2159129849855947, "grad_norm": 0.027158042594313973, "learning_rate": 0.00039953973988241035, "loss": 0.6385, "step": 23950 }, { "epoch": 1.2161668210836263, "grad_norm": 0.02462811701034489, "learning_rate": 0.00039932274124048546, "loss": 0.6178, "step": 23955 }, { "epoch": 1.2164206571816578, "grad_norm": 0.026647784285802827, "learning_rate": 0.00039910576236100437, "loss": 0.6291, "step": 23960 }, { "epoch": 1.2166744932796894, "grad_norm": 0.0293448500915657, "learning_rate": 0.000398888803286559, "loss": 0.6314, "step": 23965 }, { "epoch": 1.2169283293777209, "grad_norm": 0.02919687490040667, "learning_rate": 0.0003986718640597372, "loss": 0.6007, "step": 23970 }, { "epoch": 1.2171821654757524, "grad_norm": 0.028804983439474775, "learning_rate": 0.0003984549447231232, "loss": 0.5969, "step": 23975 }, { "epoch": 1.2174360015737837, "grad_norm": 0.027692493473163216, "learning_rate": 0.0003982380453192972, "loss": 0.6183, "step": 23980 }, { "epoch": 1.2176898376718153, "grad_norm": 0.028812293559735674, "learning_rate": 0.0003980211658908354, "loss": 0.6252, "step": 23985 }, { "epoch": 1.2179436737698468, "grad_norm": 0.02970373611014827, "learning_rate": 0.0003978043064803101, "loss": 0.6421, "step": 23990 }, { "epoch": 1.2181975098678783, "grad_norm": 0.02781570660718527, "learning_rate": 0.0003975874671302899, "loss": 0.6288, "step": 23995 }, { "epoch": 1.2184513459659099, "grad_norm": 0.028389959061278492, "learning_rate": 0.00039737064788333907, "loss": 0.6224, "step": 24000 }, { "epoch": 1.2187051820639414, "grad_norm": 0.027539132522204312, "learning_rate": 0.0003971538487820181, "loss": 0.6348, "step": 24005 }, { "epoch": 1.2189590181619727, "grad_norm": 0.034732489689041635, "learning_rate": 0.0003969370698688839, "loss": 0.6506, "step": 24010 }, { "epoch": 1.2192128542600043, "grad_norm": 0.030441195389823646, "learning_rate": 0.0003967203111864889, "loss": 0.6278, "step": 24015 }, { "epoch": 1.2194666903580358, "grad_norm": 0.028052485441630078, "learning_rate": 0.0003965035727773818, "loss": 0.6056, "step": 24020 }, { "epoch": 1.2197205264560673, "grad_norm": 0.028856796659075548, "learning_rate": 0.0003962868546841072, "loss": 0.5614, "step": 24025 }, { "epoch": 1.2199743625540989, "grad_norm": 0.029239916727322287, "learning_rate": 0.0003960701569492058, "loss": 0.6401, "step": 24030 }, { "epoch": 1.2202281986521304, "grad_norm": 0.02828649892506898, "learning_rate": 0.00039585347961521434, "loss": 0.6186, "step": 24035 }, { "epoch": 1.220482034750162, "grad_norm": 0.027086787237235056, "learning_rate": 0.0003956368227246654, "loss": 0.6362, "step": 24040 }, { "epoch": 1.2207358708481932, "grad_norm": 0.030171505163613842, "learning_rate": 0.00039542018632008773, "loss": 0.6317, "step": 24045 }, { "epoch": 1.2209897069462248, "grad_norm": 0.02802301205632446, "learning_rate": 0.00039520357044400595, "loss": 0.6064, "step": 24050 }, { "epoch": 1.2212435430442563, "grad_norm": 0.029057419508969545, "learning_rate": 0.0003949869751389407, "loss": 0.5986, "step": 24055 }, { "epoch": 1.2214973791422878, "grad_norm": 0.02913759930698659, "learning_rate": 0.0003947704004474085, "loss": 0.638, "step": 24060 }, { "epoch": 1.2217512152403194, "grad_norm": 0.027190366816589172, "learning_rate": 0.0003945538464119218, "loss": 0.6301, "step": 24065 }, { "epoch": 1.222005051338351, "grad_norm": 0.028515410670376887, "learning_rate": 0.00039433731307498925, "loss": 0.6543, "step": 24070 }, { "epoch": 1.2222588874363822, "grad_norm": 0.027319409668695456, "learning_rate": 0.00039412080047911526, "loss": 0.647, "step": 24075 }, { "epoch": 1.2225127235344138, "grad_norm": 0.02997647120403099, "learning_rate": 0.00039390430866680017, "loss": 0.6044, "step": 24080 }, { "epoch": 1.2227665596324453, "grad_norm": 0.027605138733108787, "learning_rate": 0.00039368783768054005, "loss": 0.6151, "step": 24085 }, { "epoch": 1.2230203957304768, "grad_norm": 0.02891254921372912, "learning_rate": 0.00039347138756282737, "loss": 0.6338, "step": 24090 }, { "epoch": 1.2232742318285084, "grad_norm": 0.028681756305588276, "learning_rate": 0.0003932549583561499, "loss": 0.6221, "step": 24095 }, { "epoch": 1.22352806792654, "grad_norm": 0.13577297491898077, "learning_rate": 0.00039303855010299187, "loss": 0.5904, "step": 24100 }, { "epoch": 1.2237819040245714, "grad_norm": 0.03255565584127607, "learning_rate": 0.00039282216284583304, "loss": 0.6354, "step": 24105 }, { "epoch": 1.2240357401226027, "grad_norm": 0.02854421487136403, "learning_rate": 0.00039260579662714915, "loss": 0.6468, "step": 24110 }, { "epoch": 1.2242895762206343, "grad_norm": 0.030478154917886566, "learning_rate": 0.0003923894514894118, "loss": 0.6339, "step": 24115 }, { "epoch": 1.2245434123186658, "grad_norm": 0.034170678437704154, "learning_rate": 0.00039217312747508843, "loss": 0.6067, "step": 24120 }, { "epoch": 1.2247972484166973, "grad_norm": 0.02812457337404647, "learning_rate": 0.00039195682462664225, "loss": 0.5915, "step": 24125 }, { "epoch": 1.2250510845147289, "grad_norm": 0.027797397440626787, "learning_rate": 0.0003917405429865327, "loss": 0.6019, "step": 24130 }, { "epoch": 1.2253049206127604, "grad_norm": 0.02939603452421798, "learning_rate": 0.0003915242825972148, "loss": 0.636, "step": 24135 }, { "epoch": 1.2255587567107917, "grad_norm": 0.025672295948270345, "learning_rate": 0.0003913080435011392, "loss": 0.6281, "step": 24140 }, { "epoch": 1.2258125928088233, "grad_norm": 0.026590393912523616, "learning_rate": 0.00039109182574075256, "loss": 0.6244, "step": 24145 }, { "epoch": 1.2260664289068548, "grad_norm": 0.026638521464407212, "learning_rate": 0.00039087562935849745, "loss": 0.628, "step": 24150 }, { "epoch": 1.2263202650048863, "grad_norm": 0.027520575229063142, "learning_rate": 0.00039065945439681213, "loss": 0.606, "step": 24155 }, { "epoch": 1.2265741011029179, "grad_norm": 0.025363137183298242, "learning_rate": 0.0003904433008981306, "loss": 0.5973, "step": 24160 }, { "epoch": 1.2268279372009494, "grad_norm": 0.02781331536588804, "learning_rate": 0.00039022716890488275, "loss": 0.6153, "step": 24165 }, { "epoch": 1.227081773298981, "grad_norm": 0.027649483674806866, "learning_rate": 0.0003900110584594942, "loss": 0.6318, "step": 24170 }, { "epoch": 1.2273356093970125, "grad_norm": 0.0364769607605609, "learning_rate": 0.00038979496960438637, "loss": 0.5777, "step": 24175 }, { "epoch": 1.2275894454950438, "grad_norm": 0.02779832241259017, "learning_rate": 0.0003895789023819764, "loss": 0.6478, "step": 24180 }, { "epoch": 1.2278432815930753, "grad_norm": 0.02720872064791723, "learning_rate": 0.0003893628568346771, "loss": 0.6106, "step": 24185 }, { "epoch": 1.2280971176911069, "grad_norm": 0.028920899440833567, "learning_rate": 0.0003891468330048974, "loss": 0.545, "step": 24190 }, { "epoch": 1.2283509537891384, "grad_norm": 0.025185308005669517, "learning_rate": 0.00038893083093504154, "loss": 0.5881, "step": 24195 }, { "epoch": 1.22860478988717, "grad_norm": 0.030598855195039517, "learning_rate": 0.00038871485066750965, "loss": 0.5863, "step": 24200 }, { "epoch": 1.2288586259852012, "grad_norm": 0.03138630696887011, "learning_rate": 0.00038849889224469765, "loss": 0.6103, "step": 24205 }, { "epoch": 1.2291124620832328, "grad_norm": 0.026423513187650495, "learning_rate": 0.000388282955708997, "loss": 0.617, "step": 24210 }, { "epoch": 1.2293662981812643, "grad_norm": 0.027339391782161004, "learning_rate": 0.0003880670411027951, "loss": 0.5944, "step": 24215 }, { "epoch": 1.2296201342792958, "grad_norm": 0.02878611256536262, "learning_rate": 0.0003878511484684747, "loss": 0.6046, "step": 24220 }, { "epoch": 1.2298739703773274, "grad_norm": 0.03037080460445057, "learning_rate": 0.00038763527784841463, "loss": 0.6633, "step": 24225 }, { "epoch": 1.230127806475359, "grad_norm": 0.028602642793528317, "learning_rate": 0.00038741942928498913, "loss": 0.6723, "step": 24230 }, { "epoch": 1.2303816425733904, "grad_norm": 0.0276203023913362, "learning_rate": 0.0003872036028205683, "loss": 0.6011, "step": 24235 }, { "epoch": 1.230635478671422, "grad_norm": 0.031308139998870006, "learning_rate": 0.00038698779849751766, "loss": 0.5762, "step": 24240 }, { "epoch": 1.2308893147694533, "grad_norm": 0.029627054830622033, "learning_rate": 0.0003867720163581983, "loss": 0.6065, "step": 24245 }, { "epoch": 1.2311431508674848, "grad_norm": 0.03055779349879629, "learning_rate": 0.0003865562564449678, "loss": 0.6205, "step": 24250 }, { "epoch": 1.2313969869655164, "grad_norm": 0.02711375944965068, "learning_rate": 0.0003863405188001783, "loss": 0.6571, "step": 24255 }, { "epoch": 1.231650823063548, "grad_norm": 0.0277103902953046, "learning_rate": 0.00038612480346617825, "loss": 0.5812, "step": 24260 }, { "epoch": 1.2319046591615794, "grad_norm": 0.03050415026475316, "learning_rate": 0.00038590911048531136, "loss": 0.6434, "step": 24265 }, { "epoch": 1.232158495259611, "grad_norm": 0.026745734446305597, "learning_rate": 0.00038569343989991705, "loss": 0.6113, "step": 24270 }, { "epoch": 1.2324123313576423, "grad_norm": 0.02785479242717774, "learning_rate": 0.0003854777917523305, "loss": 0.6684, "step": 24275 }, { "epoch": 1.2326661674556738, "grad_norm": 0.03029960855565497, "learning_rate": 0.00038526216608488227, "loss": 0.6241, "step": 24280 }, { "epoch": 1.2329200035537053, "grad_norm": 0.025638775585762108, "learning_rate": 0.0003850465629398987, "loss": 0.6184, "step": 24285 }, { "epoch": 1.2331738396517369, "grad_norm": 0.027950853929365006, "learning_rate": 0.00038483098235970147, "loss": 0.5896, "step": 24290 }, { "epoch": 1.2334276757497684, "grad_norm": 0.02953741225558591, "learning_rate": 0.00038461542438660815, "loss": 0.6267, "step": 24295 }, { "epoch": 1.2336815118478, "grad_norm": 0.027901155697624705, "learning_rate": 0.00038439988906293157, "loss": 0.5909, "step": 24300 }, { "epoch": 1.2339353479458315, "grad_norm": 0.03100740945664599, "learning_rate": 0.00038418437643098006, "loss": 0.6215, "step": 24305 }, { "epoch": 1.2341891840438628, "grad_norm": 0.0266740145282656, "learning_rate": 0.0003839688865330581, "loss": 0.5958, "step": 24310 }, { "epoch": 1.2344430201418943, "grad_norm": 0.029741016474606908, "learning_rate": 0.00038375341941146505, "loss": 0.6058, "step": 24315 }, { "epoch": 1.2346968562399259, "grad_norm": 0.024747432508553775, "learning_rate": 0.0003835379751084961, "loss": 0.5984, "step": 24320 }, { "epoch": 1.2349506923379574, "grad_norm": 0.02755611533098998, "learning_rate": 0.00038332255366644175, "loss": 0.6185, "step": 24325 }, { "epoch": 1.235204528435989, "grad_norm": 0.033770764050534266, "learning_rate": 0.0003831071551275883, "loss": 0.6126, "step": 24330 }, { "epoch": 1.2354583645340205, "grad_norm": 0.028121346880791764, "learning_rate": 0.0003828917795342173, "loss": 0.5963, "step": 24335 }, { "epoch": 1.2357122006320518, "grad_norm": 0.028420453474940696, "learning_rate": 0.000382676426928606, "loss": 0.6435, "step": 24340 }, { "epoch": 1.2359660367300833, "grad_norm": 0.028867435219111517, "learning_rate": 0.00038246109735302696, "loss": 0.6092, "step": 24345 }, { "epoch": 1.2362198728281149, "grad_norm": 0.02664674444176584, "learning_rate": 0.0003822457908497484, "loss": 0.6424, "step": 24350 }, { "epoch": 1.2364737089261464, "grad_norm": 0.02736930324924258, "learning_rate": 0.00038203050746103386, "loss": 0.646, "step": 24355 }, { "epoch": 1.236727545024178, "grad_norm": 0.030625039703045635, "learning_rate": 0.00038181524722914235, "loss": 0.6379, "step": 24360 }, { "epoch": 1.2369813811222095, "grad_norm": 0.028745141747655568, "learning_rate": 0.0003816000101963282, "loss": 0.6162, "step": 24365 }, { "epoch": 1.237235217220241, "grad_norm": 0.032689097761546555, "learning_rate": 0.00038138479640484183, "loss": 0.6161, "step": 24370 }, { "epoch": 1.2374890533182723, "grad_norm": 0.028222253799998594, "learning_rate": 0.00038116960589692844, "loss": 0.6193, "step": 24375 }, { "epoch": 1.2377428894163038, "grad_norm": 0.028211322231113854, "learning_rate": 0.00038095443871482876, "loss": 0.6024, "step": 24380 }, { "epoch": 1.2379967255143354, "grad_norm": 0.027546361900185824, "learning_rate": 0.0003807392949007791, "loss": 0.6116, "step": 24385 }, { "epoch": 1.238250561612367, "grad_norm": 0.026126069726598523, "learning_rate": 0.00038052417449701106, "loss": 0.5862, "step": 24390 }, { "epoch": 1.2385043977103984, "grad_norm": 0.02907226551593026, "learning_rate": 0.00038030907754575173, "loss": 0.6155, "step": 24395 }, { "epoch": 1.23875823380843, "grad_norm": 0.026380745548455844, "learning_rate": 0.0003800940040892236, "loss": 0.623, "step": 24400 }, { "epoch": 1.2390120699064613, "grad_norm": 0.02549564017380517, "learning_rate": 0.00037987895416964455, "loss": 0.6184, "step": 24405 }, { "epoch": 1.2392659060044928, "grad_norm": 0.02933644476482105, "learning_rate": 0.0003796639278292277, "loss": 0.6174, "step": 24410 }, { "epoch": 1.2395197421025244, "grad_norm": 0.029628855655134637, "learning_rate": 0.0003794489251101817, "loss": 0.6296, "step": 24415 }, { "epoch": 1.239773578200556, "grad_norm": 0.03962769005501, "learning_rate": 0.00037923394605471057, "loss": 0.6056, "step": 24420 }, { "epoch": 1.2400274142985874, "grad_norm": 0.03163082559720287, "learning_rate": 0.00037901899070501337, "loss": 0.6354, "step": 24425 }, { "epoch": 1.240281250396619, "grad_norm": 0.028419589718890366, "learning_rate": 0.00037880405910328515, "loss": 0.5967, "step": 24430 }, { "epoch": 1.2405350864946505, "grad_norm": 0.029789143292221494, "learning_rate": 0.0003785891512917157, "loss": 0.6416, "step": 24435 }, { "epoch": 1.2407889225926818, "grad_norm": 0.03034131334143518, "learning_rate": 0.00037837426731249035, "loss": 0.6192, "step": 24440 }, { "epoch": 1.2410427586907133, "grad_norm": 0.028085308820494433, "learning_rate": 0.0003781594072077899, "loss": 0.6501, "step": 24445 }, { "epoch": 1.2412965947887449, "grad_norm": 0.029035427491918484, "learning_rate": 0.00037794457101979, "loss": 0.5842, "step": 24450 }, { "epoch": 1.2415504308867764, "grad_norm": 0.03007390638082848, "learning_rate": 0.00037772975879066224, "loss": 0.6501, "step": 24455 }, { "epoch": 1.241804266984808, "grad_norm": 0.03089292922868796, "learning_rate": 0.00037751497056257304, "loss": 0.64, "step": 24460 }, { "epoch": 1.2420581030828395, "grad_norm": 0.02879758306118636, "learning_rate": 0.0003773002063776843, "loss": 0.6015, "step": 24465 }, { "epoch": 1.2423119391808708, "grad_norm": 0.029196348757497294, "learning_rate": 0.00037708546627815317, "loss": 0.6166, "step": 24470 }, { "epoch": 1.2425657752789023, "grad_norm": 0.027696266438452192, "learning_rate": 0.000376870750306132, "loss": 0.6119, "step": 24475 }, { "epoch": 1.2428196113769339, "grad_norm": 0.02699594024055042, "learning_rate": 0.0003766560585037685, "loss": 0.6172, "step": 24480 }, { "epoch": 1.2430734474749654, "grad_norm": 0.030342426255820253, "learning_rate": 0.0003764413909132054, "loss": 0.6168, "step": 24485 }, { "epoch": 1.243327283572997, "grad_norm": 0.02966390975452094, "learning_rate": 0.00037622674757658127, "loss": 0.6281, "step": 24490 }, { "epoch": 1.2435811196710285, "grad_norm": 0.029693580283016528, "learning_rate": 0.0003760121285360293, "loss": 0.6155, "step": 24495 }, { "epoch": 1.24383495576906, "grad_norm": 0.030945155563781714, "learning_rate": 0.00037579753383367825, "loss": 0.5923, "step": 24500 }, { "epoch": 1.2440887918670915, "grad_norm": 0.03361219024718893, "learning_rate": 0.0003755829635116519, "loss": 0.6142, "step": 24505 }, { "epoch": 1.2443426279651228, "grad_norm": 0.026201376722725413, "learning_rate": 0.0003753684176120693, "loss": 0.5921, "step": 24510 }, { "epoch": 1.2445964640631544, "grad_norm": 0.02963394938476199, "learning_rate": 0.0003751538961770448, "loss": 0.62, "step": 24515 }, { "epoch": 1.244850300161186, "grad_norm": 0.027282000665576595, "learning_rate": 0.0003749393992486879, "loss": 0.617, "step": 24520 }, { "epoch": 1.2451041362592175, "grad_norm": 0.030537435717839387, "learning_rate": 0.0003747249268691033, "loss": 0.6112, "step": 24525 }, { "epoch": 1.245357972357249, "grad_norm": 0.027154344295855233, "learning_rate": 0.0003745104790803907, "loss": 0.6384, "step": 24530 }, { "epoch": 1.2456118084552805, "grad_norm": 0.02901638118922784, "learning_rate": 0.0003742960559246453, "loss": 0.5937, "step": 24535 }, { "epoch": 1.2458656445533118, "grad_norm": 0.02624327777254125, "learning_rate": 0.0003740816574439572, "loss": 0.623, "step": 24540 }, { "epoch": 1.2461194806513434, "grad_norm": 0.029871271817831755, "learning_rate": 0.00037386728368041185, "loss": 0.641, "step": 24545 }, { "epoch": 1.246373316749375, "grad_norm": 0.025732934705871898, "learning_rate": 0.00037365293467608954, "loss": 0.632, "step": 24550 }, { "epoch": 1.2466271528474064, "grad_norm": 0.028561916928940157, "learning_rate": 0.00037343861047306617, "loss": 0.6193, "step": 24555 }, { "epoch": 1.246880988945438, "grad_norm": 0.03147591738097484, "learning_rate": 0.00037322431111341245, "loss": 0.6014, "step": 24560 }, { "epoch": 1.2471348250434695, "grad_norm": 0.02919198614920128, "learning_rate": 0.0003730100366391942, "loss": 0.5757, "step": 24565 }, { "epoch": 1.247388661141501, "grad_norm": 0.026662225677452356, "learning_rate": 0.0003727957870924724, "loss": 0.5855, "step": 24570 }, { "epoch": 1.2476424972395324, "grad_norm": 0.026675437957563244, "learning_rate": 0.0003725815625153033, "loss": 0.5654, "step": 24575 }, { "epoch": 1.247896333337564, "grad_norm": 0.027556445944132806, "learning_rate": 0.00037236736294973805, "loss": 0.6199, "step": 24580 }, { "epoch": 1.2481501694355954, "grad_norm": 0.02788550622282366, "learning_rate": 0.00037215318843782287, "loss": 0.5963, "step": 24585 }, { "epoch": 1.248404005533627, "grad_norm": 0.03210188094643427, "learning_rate": 0.0003719390390215993, "loss": 0.6257, "step": 24590 }, { "epoch": 1.2486578416316585, "grad_norm": 0.02706095902067571, "learning_rate": 0.0003717249147431037, "loss": 0.5873, "step": 24595 }, { "epoch": 1.24891167772969, "grad_norm": 0.026734754730119777, "learning_rate": 0.0003715108156443676, "loss": 0.6246, "step": 24600 }, { "epoch": 1.2491655138277213, "grad_norm": 0.03014290638781019, "learning_rate": 0.0003712967417674177, "loss": 0.6406, "step": 24605 }, { "epoch": 1.2494193499257529, "grad_norm": 0.02759697880976429, "learning_rate": 0.0003710826931542753, "loss": 0.619, "step": 24610 }, { "epoch": 1.2496731860237844, "grad_norm": 0.02869005956402474, "learning_rate": 0.0003708686698469575, "loss": 0.5971, "step": 24615 }, { "epoch": 1.249927022121816, "grad_norm": 0.02913487965443375, "learning_rate": 0.00037065467188747593, "loss": 0.6236, "step": 24620 }, { "epoch": 1.2501808582198475, "grad_norm": 0.029172264205813717, "learning_rate": 0.0003704406993178371, "loss": 0.6075, "step": 24625 }, { "epoch": 1.250434694317879, "grad_norm": 0.026341689404944652, "learning_rate": 0.000370226752180043, "loss": 0.6192, "step": 24630 }, { "epoch": 1.2506885304159105, "grad_norm": 0.026009122181555263, "learning_rate": 0.0003700128305160901, "loss": 0.6371, "step": 24635 }, { "epoch": 1.250942366513942, "grad_norm": 0.026256691497436556, "learning_rate": 0.00036979893436797054, "loss": 0.6386, "step": 24640 }, { "epoch": 1.2511962026119734, "grad_norm": 0.029153929108143344, "learning_rate": 0.0003695850637776707, "loss": 0.5947, "step": 24645 }, { "epoch": 1.251450038710005, "grad_norm": 0.029764988164663442, "learning_rate": 0.0003693712187871725, "loss": 0.6231, "step": 24650 }, { "epoch": 1.2517038748080365, "grad_norm": 0.02902220329055969, "learning_rate": 0.0003691573994384526, "loss": 0.6473, "step": 24655 }, { "epoch": 1.251957710906068, "grad_norm": 0.027072129119223493, "learning_rate": 0.00036894360577348275, "loss": 0.6003, "step": 24660 }, { "epoch": 1.2522115470040995, "grad_norm": 0.024588199829287425, "learning_rate": 0.00036872983783422944, "loss": 0.6323, "step": 24665 }, { "epoch": 1.2524653831021308, "grad_norm": 0.023564745788163616, "learning_rate": 0.0003685160956626542, "loss": 0.5822, "step": 24670 }, { "epoch": 1.2527192192001624, "grad_norm": 0.030856412182703154, "learning_rate": 0.0003683023793007138, "loss": 0.6026, "step": 24675 }, { "epoch": 1.252973055298194, "grad_norm": 0.030835478042404777, "learning_rate": 0.0003680886887903596, "loss": 0.6192, "step": 24680 }, { "epoch": 1.2532268913962255, "grad_norm": 0.030603124592505814, "learning_rate": 0.0003678750241735379, "loss": 0.5837, "step": 24685 }, { "epoch": 1.253480727494257, "grad_norm": 0.028844286476711008, "learning_rate": 0.00036766138549219007, "loss": 0.6005, "step": 24690 }, { "epoch": 1.2537345635922885, "grad_norm": 0.02672817322654506, "learning_rate": 0.00036744777278825225, "loss": 0.6184, "step": 24695 }, { "epoch": 1.25398839969032, "grad_norm": 0.028672337417059518, "learning_rate": 0.0003672341861036557, "loss": 0.6108, "step": 24700 }, { "epoch": 1.2542422357883516, "grad_norm": 0.029566048113847842, "learning_rate": 0.00036702062548032624, "loss": 0.6138, "step": 24705 }, { "epoch": 1.254496071886383, "grad_norm": 0.03315647888441058, "learning_rate": 0.00036680709096018483, "loss": 0.611, "step": 24710 }, { "epoch": 1.2547499079844144, "grad_norm": 0.0263854580413781, "learning_rate": 0.0003665935825851473, "loss": 0.6383, "step": 24715 }, { "epoch": 1.255003744082446, "grad_norm": 0.028482332201687572, "learning_rate": 0.0003663801003971241, "loss": 0.6507, "step": 24720 }, { "epoch": 1.2552575801804775, "grad_norm": 0.027227623152780336, "learning_rate": 0.0003661666444380209, "loss": 0.5938, "step": 24725 }, { "epoch": 1.255511416278509, "grad_norm": 0.025790084159042593, "learning_rate": 0.00036595321474973777, "loss": 0.5878, "step": 24730 }, { "epoch": 1.2557652523765404, "grad_norm": 0.026461131587888553, "learning_rate": 0.0003657398113741703, "loss": 0.6031, "step": 24735 }, { "epoch": 1.2560190884745719, "grad_norm": 0.026687394767818777, "learning_rate": 0.0003655264343532083, "loss": 0.6043, "step": 24740 }, { "epoch": 1.2562729245726034, "grad_norm": 0.028227038739829086, "learning_rate": 0.0003653130837287366, "loss": 0.6079, "step": 24745 }, { "epoch": 1.256526760670635, "grad_norm": 0.02717643882195909, "learning_rate": 0.00036509975954263486, "loss": 0.6306, "step": 24750 }, { "epoch": 1.2567805967686665, "grad_norm": 0.025912167805839652, "learning_rate": 0.00036488646183677767, "loss": 0.615, "step": 24755 }, { "epoch": 1.257034432866698, "grad_norm": 0.030586832516983707, "learning_rate": 0.00036467319065303414, "loss": 0.6273, "step": 24760 }, { "epoch": 1.2572882689647296, "grad_norm": 0.029681350567798265, "learning_rate": 0.00036445994603326835, "loss": 0.6476, "step": 24765 }, { "epoch": 1.257542105062761, "grad_norm": 0.03059285913560464, "learning_rate": 0.00036424672801933946, "loss": 0.6056, "step": 24770 }, { "epoch": 1.2577959411607924, "grad_norm": 0.032228786525441165, "learning_rate": 0.0003640335366531007, "loss": 0.6319, "step": 24775 }, { "epoch": 1.258049777258824, "grad_norm": 0.027439736263882426, "learning_rate": 0.00036382037197640063, "loss": 0.5935, "step": 24780 }, { "epoch": 1.2583036133568555, "grad_norm": 0.027059615723851353, "learning_rate": 0.00036360723403108233, "loss": 0.6378, "step": 24785 }, { "epoch": 1.258557449454887, "grad_norm": 0.03152956891444104, "learning_rate": 0.00036339412285898363, "loss": 0.6181, "step": 24790 }, { "epoch": 1.2588112855529185, "grad_norm": 0.0306764667551907, "learning_rate": 0.0003631810385019376, "loss": 0.6418, "step": 24795 }, { "epoch": 1.2590651216509499, "grad_norm": 0.03218895641316748, "learning_rate": 0.0003629679810017714, "loss": 0.6458, "step": 24800 }, { "epoch": 1.2593189577489814, "grad_norm": 0.034690271136168474, "learning_rate": 0.0003627549504003072, "loss": 0.5804, "step": 24805 }, { "epoch": 1.259572793847013, "grad_norm": 0.028075790084085134, "learning_rate": 0.00036254194673936174, "loss": 0.5974, "step": 24810 }, { "epoch": 1.2598266299450445, "grad_norm": 0.030198092147530272, "learning_rate": 0.0003623289700607466, "loss": 0.6326, "step": 24815 }, { "epoch": 1.260080466043076, "grad_norm": 0.031286751738303545, "learning_rate": 0.00036211602040626815, "loss": 0.6196, "step": 24820 }, { "epoch": 1.2603343021411075, "grad_norm": 0.02600953550705156, "learning_rate": 0.00036190309781772723, "loss": 0.5964, "step": 24825 }, { "epoch": 1.260588138239139, "grad_norm": 0.024918749901372297, "learning_rate": 0.00036169020233691953, "loss": 0.6105, "step": 24830 }, { "epoch": 1.2608419743371706, "grad_norm": 0.0317206897834887, "learning_rate": 0.0003614773340056353, "loss": 0.6132, "step": 24835 }, { "epoch": 1.261095810435202, "grad_norm": 0.031694499041300825, "learning_rate": 0.00036126449286565966, "loss": 0.5815, "step": 24840 }, { "epoch": 1.2613496465332334, "grad_norm": 0.03169449101900454, "learning_rate": 0.0003610516789587722, "loss": 0.6496, "step": 24845 }, { "epoch": 1.261603482631265, "grad_norm": 0.027467011341455766, "learning_rate": 0.000360838892326747, "loss": 0.6029, "step": 24850 }, { "epoch": 1.2618573187292965, "grad_norm": 0.027706905050791594, "learning_rate": 0.00036062613301135357, "loss": 0.6252, "step": 24855 }, { "epoch": 1.262111154827328, "grad_norm": 0.026582601258789446, "learning_rate": 0.00036041340105435506, "loss": 0.6369, "step": 24860 }, { "epoch": 1.2623649909253594, "grad_norm": 0.02812913936630698, "learning_rate": 0.00036020069649750976, "loss": 0.6303, "step": 24865 }, { "epoch": 1.262618827023391, "grad_norm": 0.03042476610050354, "learning_rate": 0.00035998801938257063, "loss": 0.5939, "step": 24870 }, { "epoch": 1.2628726631214224, "grad_norm": 0.02821117644681053, "learning_rate": 0.000359775369751285, "loss": 0.5677, "step": 24875 }, { "epoch": 1.263126499219454, "grad_norm": 0.029306147854662686, "learning_rate": 0.00035956274764539504, "loss": 0.6073, "step": 24880 }, { "epoch": 1.2633803353174855, "grad_norm": 0.02845788257144992, "learning_rate": 0.0003593501531066373, "loss": 0.6127, "step": 24885 }, { "epoch": 1.263634171415517, "grad_norm": 0.027193708681809012, "learning_rate": 0.00035913758617674315, "loss": 0.5794, "step": 24890 }, { "epoch": 1.2638880075135486, "grad_norm": 0.02680201012769047, "learning_rate": 0.0003589250468974383, "loss": 0.6102, "step": 24895 }, { "epoch": 1.26414184361158, "grad_norm": 0.02544933561377219, "learning_rate": 0.00035871253531044323, "loss": 0.5997, "step": 24900 }, { "epoch": 1.2643956797096116, "grad_norm": 0.032412267373466815, "learning_rate": 0.00035850005145747287, "loss": 0.6029, "step": 24905 }, { "epoch": 1.264649515807643, "grad_norm": 0.031044574292238936, "learning_rate": 0.00035828759538023653, "loss": 0.6182, "step": 24910 }, { "epoch": 1.2649033519056745, "grad_norm": 0.027985139698270958, "learning_rate": 0.00035807516712043876, "loss": 0.6337, "step": 24915 }, { "epoch": 1.265157188003706, "grad_norm": 0.03239381628690015, "learning_rate": 0.00035786276671977786, "loss": 0.6086, "step": 24920 }, { "epoch": 1.2654110241017376, "grad_norm": 0.02877074507992951, "learning_rate": 0.000357650394219947, "loss": 0.6174, "step": 24925 }, { "epoch": 1.2656648601997689, "grad_norm": 0.049815071898487176, "learning_rate": 0.0003574380496626339, "loss": 0.5963, "step": 24930 }, { "epoch": 1.2659186962978004, "grad_norm": 0.02893165229317296, "learning_rate": 0.00035722573308952064, "loss": 0.5832, "step": 24935 }, { "epoch": 1.266172532395832, "grad_norm": 0.029297532433242596, "learning_rate": 0.000357013444542284, "loss": 0.6136, "step": 24940 }, { "epoch": 1.2664263684938635, "grad_norm": 0.03392869872184223, "learning_rate": 0.00035680118406259515, "loss": 0.6278, "step": 24945 }, { "epoch": 1.266680204591895, "grad_norm": 0.02813966918577347, "learning_rate": 0.00035658895169211966, "loss": 0.5885, "step": 24950 }, { "epoch": 1.2669340406899265, "grad_norm": 0.028486671884618262, "learning_rate": 0.00035637674747251785, "loss": 0.5967, "step": 24955 }, { "epoch": 1.267187876787958, "grad_norm": 0.02502056382375222, "learning_rate": 0.00035616457144544425, "loss": 0.6038, "step": 24960 }, { "epoch": 1.2674417128859896, "grad_norm": 0.027714016472550753, "learning_rate": 0.0003559524236525479, "loss": 0.6093, "step": 24965 }, { "epoch": 1.2676955489840211, "grad_norm": 0.031562386322391514, "learning_rate": 0.0003557403041354724, "loss": 0.5907, "step": 24970 }, { "epoch": 1.2679493850820525, "grad_norm": 0.02864531231394603, "learning_rate": 0.0003555282129358558, "loss": 0.6018, "step": 24975 }, { "epoch": 1.268203221180084, "grad_norm": 0.02689602798979903, "learning_rate": 0.0003553161500953306, "loss": 0.5972, "step": 24980 }, { "epoch": 1.2684570572781155, "grad_norm": 0.02826774609152114, "learning_rate": 0.0003551041156555236, "loss": 0.6114, "step": 24985 }, { "epoch": 1.268710893376147, "grad_norm": 0.024889738273216142, "learning_rate": 0.000354892109658056, "loss": 0.5731, "step": 24990 }, { "epoch": 1.2689647294741786, "grad_norm": 0.0290855740284235, "learning_rate": 0.00035468013214454375, "loss": 0.6265, "step": 24995 }, { "epoch": 1.26921856557221, "grad_norm": 0.025147655393621335, "learning_rate": 0.0003544681831565968, "loss": 0.5727, "step": 25000 }, { "epoch": 1.2694724016702414, "grad_norm": 0.027850633060113886, "learning_rate": 0.0003542562627358197, "loss": 0.6586, "step": 25005 }, { "epoch": 1.269726237768273, "grad_norm": 0.028524114155326106, "learning_rate": 0.0003540443709238114, "loss": 0.603, "step": 25010 }, { "epoch": 1.2699800738663045, "grad_norm": 0.026547507456507764, "learning_rate": 0.00035383250776216526, "loss": 0.5798, "step": 25015 }, { "epoch": 1.270233909964336, "grad_norm": 0.033745480267672955, "learning_rate": 0.00035362067329246884, "loss": 0.6219, "step": 25020 }, { "epoch": 1.2704877460623676, "grad_norm": 0.031136247720860547, "learning_rate": 0.0003534088675563043, "loss": 0.6123, "step": 25025 }, { "epoch": 1.2707415821603991, "grad_norm": 0.03152931167150692, "learning_rate": 0.0003531970905952478, "loss": 0.6018, "step": 25030 }, { "epoch": 1.2709954182584307, "grad_norm": 0.029107034590418336, "learning_rate": 0.00035298534245087055, "loss": 0.6112, "step": 25035 }, { "epoch": 1.271249254356462, "grad_norm": 0.029153278704303936, "learning_rate": 0.0003527736231647374, "loss": 0.6135, "step": 25040 }, { "epoch": 1.2715030904544935, "grad_norm": 0.028628733674810863, "learning_rate": 0.0003525619327784078, "loss": 0.6202, "step": 25045 }, { "epoch": 1.271756926552525, "grad_norm": 0.027573536898978093, "learning_rate": 0.00035235027133343546, "loss": 0.6095, "step": 25050 }, { "epoch": 1.2720107626505566, "grad_norm": 0.027292391511368762, "learning_rate": 0.0003521386388713686, "loss": 0.6192, "step": 25055 }, { "epoch": 1.272264598748588, "grad_norm": 0.027376319694489386, "learning_rate": 0.0003519270354337495, "loss": 0.6016, "step": 25060 }, { "epoch": 1.2725184348466194, "grad_norm": 0.0307698996068503, "learning_rate": 0.0003517154610621149, "loss": 0.6177, "step": 25065 }, { "epoch": 1.272772270944651, "grad_norm": 0.029733316719815772, "learning_rate": 0.0003515039157979959, "loss": 0.5975, "step": 25070 }, { "epoch": 1.2730261070426825, "grad_norm": 0.029034920539691728, "learning_rate": 0.0003512923996829176, "loss": 0.6036, "step": 25075 }, { "epoch": 1.273279943140714, "grad_norm": 0.027755002067292672, "learning_rate": 0.0003510809127583997, "loss": 0.6228, "step": 25080 }, { "epoch": 1.2735337792387456, "grad_norm": 0.02752416026872927, "learning_rate": 0.0003508694550659559, "loss": 0.6148, "step": 25085 }, { "epoch": 1.273787615336777, "grad_norm": 0.03140642679468924, "learning_rate": 0.00035065802664709426, "loss": 0.6473, "step": 25090 }, { "epoch": 1.2740414514348086, "grad_norm": 0.02731645813861016, "learning_rate": 0.00035044662754331736, "loss": 0.6096, "step": 25095 }, { "epoch": 1.2742952875328402, "grad_norm": 0.028038349697800922, "learning_rate": 0.00035023525779612165, "loss": 0.6118, "step": 25100 }, { "epoch": 1.2745491236308715, "grad_norm": 0.02812063284914319, "learning_rate": 0.0003500239174469979, "loss": 0.6333, "step": 25105 }, { "epoch": 1.274802959728903, "grad_norm": 0.027447371436890383, "learning_rate": 0.0003498126065374313, "loss": 0.6001, "step": 25110 }, { "epoch": 1.2750567958269345, "grad_norm": 0.02661645071417667, "learning_rate": 0.00034960132510890096, "loss": 0.5971, "step": 25115 }, { "epoch": 1.275310631924966, "grad_norm": 0.028025029699986024, "learning_rate": 0.0003493900732028806, "loss": 0.6089, "step": 25120 }, { "epoch": 1.2755644680229976, "grad_norm": 0.026129176142411142, "learning_rate": 0.0003491788508608377, "loss": 0.6196, "step": 25125 }, { "epoch": 1.275818304121029, "grad_norm": 0.028552217031028188, "learning_rate": 0.00034896765812423425, "loss": 0.6023, "step": 25130 }, { "epoch": 1.2760721402190605, "grad_norm": 0.02776637813431517, "learning_rate": 0.00034875649503452626, "loss": 0.6167, "step": 25135 }, { "epoch": 1.276325976317092, "grad_norm": 0.029555195836249964, "learning_rate": 0.0003485453616331641, "loss": 0.564, "step": 25140 }, { "epoch": 1.2765798124151235, "grad_norm": 0.030949047199487534, "learning_rate": 0.00034833425796159214, "loss": 0.5912, "step": 25145 }, { "epoch": 1.276833648513155, "grad_norm": 0.025986719401814715, "learning_rate": 0.00034812318406124876, "loss": 0.6113, "step": 25150 }, { "epoch": 1.2770874846111866, "grad_norm": 0.025434387276087878, "learning_rate": 0.0003479121399735672, "loss": 0.5851, "step": 25155 }, { "epoch": 1.2773413207092181, "grad_norm": 0.027791338328374767, "learning_rate": 0.00034770112573997405, "loss": 0.6107, "step": 25160 }, { "epoch": 1.2775951568072497, "grad_norm": 0.026727296547211475, "learning_rate": 0.0003474901414018904, "loss": 0.6309, "step": 25165 }, { "epoch": 1.277848992905281, "grad_norm": 0.030312697905663828, "learning_rate": 0.00034727918700073145, "loss": 0.5976, "step": 25170 }, { "epoch": 1.2781028290033125, "grad_norm": 0.028595756359976507, "learning_rate": 0.0003470682625779065, "loss": 0.6439, "step": 25175 }, { "epoch": 1.278356665101344, "grad_norm": 0.029811031012164202, "learning_rate": 0.0003468573681748188, "loss": 0.6089, "step": 25180 }, { "epoch": 1.2786105011993756, "grad_norm": 0.02929733820698927, "learning_rate": 0.00034664650383286615, "loss": 0.6075, "step": 25185 }, { "epoch": 1.2788643372974071, "grad_norm": 0.026264468605810394, "learning_rate": 0.00034643566959343997, "loss": 0.6193, "step": 25190 }, { "epoch": 1.2791181733954384, "grad_norm": 0.02799327802095198, "learning_rate": 0.0003462248654979261, "loss": 0.5721, "step": 25195 }, { "epoch": 1.27937200949347, "grad_norm": 0.028271067651858273, "learning_rate": 0.0003460140915877041, "loss": 0.5757, "step": 25200 }, { "epoch": 1.2796258455915015, "grad_norm": 0.0260093602987616, "learning_rate": 0.00034580334790414814, "loss": 0.5691, "step": 25205 }, { "epoch": 1.279879681689533, "grad_norm": 0.031006771914237605, "learning_rate": 0.0003455926344886259, "loss": 0.5903, "step": 25210 }, { "epoch": 1.2801335177875646, "grad_norm": 0.02878310556269228, "learning_rate": 0.0003453819513824995, "loss": 0.6153, "step": 25215 }, { "epoch": 1.280387353885596, "grad_norm": 0.02764397103930344, "learning_rate": 0.00034517129862712506, "loss": 0.5977, "step": 25220 }, { "epoch": 1.2806411899836276, "grad_norm": 0.026845530104639168, "learning_rate": 0.00034496067626385254, "loss": 0.6024, "step": 25225 }, { "epoch": 1.2808950260816592, "grad_norm": 0.0259236950377756, "learning_rate": 0.000344750084334026, "loss": 0.584, "step": 25230 }, { "epoch": 1.2811488621796907, "grad_norm": 0.030580148233185733, "learning_rate": 0.00034453952287898375, "loss": 0.5768, "step": 25235 }, { "epoch": 1.281402698277722, "grad_norm": 0.029489781601093053, "learning_rate": 0.0003443289919400579, "loss": 0.5846, "step": 25240 }, { "epoch": 1.2816565343757536, "grad_norm": 0.02766080091640391, "learning_rate": 0.0003441184915585746, "loss": 0.5972, "step": 25245 }, { "epoch": 1.281910370473785, "grad_norm": 0.030089603994821315, "learning_rate": 0.000343908021775854, "loss": 0.5846, "step": 25250 }, { "epoch": 1.2821642065718166, "grad_norm": 0.02369353558472577, "learning_rate": 0.00034369758263321025, "loss": 0.5692, "step": 25255 }, { "epoch": 1.2824180426698482, "grad_norm": 0.026034163264509837, "learning_rate": 0.0003434871741719516, "loss": 0.5927, "step": 25260 }, { "epoch": 1.2826718787678795, "grad_norm": 0.02701280623926368, "learning_rate": 0.0003432767964333802, "loss": 0.5876, "step": 25265 }, { "epoch": 1.282925714865911, "grad_norm": 0.02716739522074331, "learning_rate": 0.00034306644945879174, "loss": 0.6079, "step": 25270 }, { "epoch": 1.2831795509639425, "grad_norm": 0.029035033158042442, "learning_rate": 0.0003428561332894769, "loss": 0.6029, "step": 25275 }, { "epoch": 1.283433387061974, "grad_norm": 0.028289979033931562, "learning_rate": 0.0003426458479667194, "loss": 0.5848, "step": 25280 }, { "epoch": 1.2836872231600056, "grad_norm": 0.03044334715455197, "learning_rate": 0.00034243559353179726, "loss": 0.6043, "step": 25285 }, { "epoch": 1.2839410592580371, "grad_norm": 0.028852110449318884, "learning_rate": 0.00034222537002598233, "loss": 0.6129, "step": 25290 }, { "epoch": 1.2841948953560687, "grad_norm": 0.026970010323277418, "learning_rate": 0.00034201517749054037, "loss": 0.5788, "step": 25295 }, { "epoch": 1.2844487314541002, "grad_norm": 0.028157676680659105, "learning_rate": 0.0003418050159667313, "loss": 0.613, "step": 25300 }, { "epoch": 1.2847025675521315, "grad_norm": 0.0271912091881284, "learning_rate": 0.00034159488549580865, "loss": 0.6112, "step": 25305 }, { "epoch": 1.284956403650163, "grad_norm": 0.028035446571760513, "learning_rate": 0.00034138478611902, "loss": 0.5876, "step": 25310 }, { "epoch": 1.2852102397481946, "grad_norm": 0.041359666954372785, "learning_rate": 0.0003411747178776068, "loss": 0.7364, "step": 25315 }, { "epoch": 1.2854640758462261, "grad_norm": 0.06121678359480143, "learning_rate": 0.00034096468081280443, "loss": 0.5903, "step": 25320 }, { "epoch": 1.2857179119442577, "grad_norm": 0.029400835643998337, "learning_rate": 0.00034075467496584214, "loss": 0.5676, "step": 25325 }, { "epoch": 1.285971748042289, "grad_norm": 0.03005040168147061, "learning_rate": 0.00034054470037794284, "loss": 0.6267, "step": 25330 }, { "epoch": 1.2862255841403205, "grad_norm": 0.028708999990763068, "learning_rate": 0.0003403347570903238, "loss": 0.6267, "step": 25335 }, { "epoch": 1.286479420238352, "grad_norm": 0.036145280455055544, "learning_rate": 0.0003401248451441957, "loss": 0.5982, "step": 25340 }, { "epoch": 1.2867332563363836, "grad_norm": 0.03056795548079916, "learning_rate": 0.0003399149645807632, "loss": 0.6092, "step": 25345 }, { "epoch": 1.2869870924344151, "grad_norm": 0.027508535317531085, "learning_rate": 0.00033970511544122476, "loss": 0.6267, "step": 25350 }, { "epoch": 1.2872409285324466, "grad_norm": 0.030993417253201525, "learning_rate": 0.0003394952977667728, "loss": 0.6085, "step": 25355 }, { "epoch": 1.2874947646304782, "grad_norm": 0.029539225872246257, "learning_rate": 0.0003392855115985935, "loss": 0.6541, "step": 25360 }, { "epoch": 1.2877486007285097, "grad_norm": 0.029873743054143562, "learning_rate": 0.00033907575697786677, "loss": 0.6135, "step": 25365 }, { "epoch": 1.288002436826541, "grad_norm": 0.02819392067063322, "learning_rate": 0.0003388660339457664, "loss": 0.611, "step": 25370 }, { "epoch": 1.2882562729245726, "grad_norm": 0.03235327440691328, "learning_rate": 0.00033865634254345996, "loss": 0.6006, "step": 25375 }, { "epoch": 1.288510109022604, "grad_norm": 0.027920128144208082, "learning_rate": 0.0003384466828121089, "loss": 0.6243, "step": 25380 }, { "epoch": 1.2887639451206356, "grad_norm": 0.0806972780893091, "learning_rate": 0.0003382370547928683, "loss": 0.5951, "step": 25385 }, { "epoch": 1.2890177812186672, "grad_norm": 0.03263680448886996, "learning_rate": 0.000338027458526887, "loss": 0.6198, "step": 25390 }, { "epoch": 1.2892716173166985, "grad_norm": 0.036024407448976885, "learning_rate": 0.00033781789405530794, "loss": 0.5847, "step": 25395 }, { "epoch": 1.28952545341473, "grad_norm": 0.02692777007014469, "learning_rate": 0.00033760836141926754, "loss": 0.5773, "step": 25400 }, { "epoch": 1.2897792895127616, "grad_norm": 0.026793968746867985, "learning_rate": 0.000337398860659896, "loss": 0.6375, "step": 25405 }, { "epoch": 1.290033125610793, "grad_norm": 0.02998747619810983, "learning_rate": 0.0003371893918183171, "loss": 0.6238, "step": 25410 }, { "epoch": 1.2902869617088246, "grad_norm": 0.029174161982708537, "learning_rate": 0.0003369799549356487, "loss": 0.6109, "step": 25415 }, { "epoch": 1.2905407978068562, "grad_norm": 0.030860868102447876, "learning_rate": 0.00033677055005300224, "loss": 0.5718, "step": 25420 }, { "epoch": 1.2907946339048877, "grad_norm": 0.029306979140846956, "learning_rate": 0.0003365611772114827, "loss": 0.6484, "step": 25425 }, { "epoch": 1.2910484700029192, "grad_norm": 0.03111708126339167, "learning_rate": 0.000336351836452189, "loss": 0.6196, "step": 25430 }, { "epoch": 1.2913023061009505, "grad_norm": 0.028985298077485806, "learning_rate": 0.00033614252781621374, "loss": 0.6177, "step": 25435 }, { "epoch": 1.291556142198982, "grad_norm": 0.025853488397298143, "learning_rate": 0.0003359332513446431, "loss": 0.5751, "step": 25440 }, { "epoch": 1.2918099782970136, "grad_norm": 0.029232810283564, "learning_rate": 0.000335724007078557, "loss": 0.586, "step": 25445 }, { "epoch": 1.2920638143950451, "grad_norm": 0.02965806486167479, "learning_rate": 0.0003355147950590291, "loss": 0.5935, "step": 25450 }, { "epoch": 1.2923176504930767, "grad_norm": 0.02713001405556292, "learning_rate": 0.00033530561532712653, "loss": 0.6166, "step": 25455 }, { "epoch": 1.292571486591108, "grad_norm": 0.028942700247331737, "learning_rate": 0.00033509646792391045, "loss": 0.6066, "step": 25460 }, { "epoch": 1.2928253226891395, "grad_norm": 0.029197425486024237, "learning_rate": 0.0003348873528904353, "loss": 0.6116, "step": 25465 }, { "epoch": 1.293079158787171, "grad_norm": 0.029809908165749972, "learning_rate": 0.0003346782702677494, "loss": 0.6176, "step": 25470 }, { "epoch": 1.2933329948852026, "grad_norm": 0.028856714818476452, "learning_rate": 0.0003344692200968946, "loss": 0.6111, "step": 25475 }, { "epoch": 1.2935868309832341, "grad_norm": 0.026039114929495472, "learning_rate": 0.00033426020241890636, "loss": 0.5951, "step": 25480 }, { "epoch": 1.2938406670812657, "grad_norm": 0.0585529360848667, "learning_rate": 0.00033405121727481384, "loss": 0.624, "step": 25485 }, { "epoch": 1.2940945031792972, "grad_norm": 0.027667995752684214, "learning_rate": 0.00033384226470563983, "loss": 0.6064, "step": 25490 }, { "epoch": 1.2943483392773287, "grad_norm": 0.028725634343344866, "learning_rate": 0.0003336333447524006, "loss": 0.6274, "step": 25495 }, { "epoch": 1.2946021753753603, "grad_norm": 0.028430683815105984, "learning_rate": 0.0003334244574561061, "loss": 0.6421, "step": 25500 }, { "epoch": 1.2948560114733916, "grad_norm": 0.027057871989055245, "learning_rate": 0.0003332156028577599, "loss": 0.6324, "step": 25505 }, { "epoch": 1.2951098475714231, "grad_norm": 0.02832766183842246, "learning_rate": 0.00033300678099835914, "loss": 0.5839, "step": 25510 }, { "epoch": 1.2953636836694546, "grad_norm": 0.02635392466817539, "learning_rate": 0.00033279799191889426, "loss": 0.6095, "step": 25515 }, { "epoch": 1.2956175197674862, "grad_norm": 0.029069370737908773, "learning_rate": 0.00033258923566034995, "loss": 0.621, "step": 25520 }, { "epoch": 1.2958713558655177, "grad_norm": 0.026099512302746493, "learning_rate": 0.0003323805122637038, "loss": 0.5786, "step": 25525 }, { "epoch": 1.296125191963549, "grad_norm": 0.02825729040764705, "learning_rate": 0.0003321718217699271, "loss": 0.6344, "step": 25530 }, { "epoch": 1.2963790280615806, "grad_norm": 0.03260357878621704, "learning_rate": 0.00033196316421998495, "loss": 0.6081, "step": 25535 }, { "epoch": 1.296632864159612, "grad_norm": 0.02919353161966156, "learning_rate": 0.0003317545396548356, "loss": 0.6212, "step": 25540 }, { "epoch": 1.2968867002576436, "grad_norm": 0.02781717557545044, "learning_rate": 0.00033154594811543104, "loss": 0.5973, "step": 25545 }, { "epoch": 1.2971405363556752, "grad_norm": 0.032028271804346094, "learning_rate": 0.00033133738964271687, "loss": 0.5893, "step": 25550 }, { "epoch": 1.2973943724537067, "grad_norm": 0.02813556533431577, "learning_rate": 0.00033112886427763197, "loss": 0.626, "step": 25555 }, { "epoch": 1.2976482085517382, "grad_norm": 0.028052360819986542, "learning_rate": 0.0003309203720611088, "loss": 0.6392, "step": 25560 }, { "epoch": 1.2979020446497698, "grad_norm": 0.030273288659676, "learning_rate": 0.00033071191303407345, "loss": 0.6101, "step": 25565 }, { "epoch": 1.298155880747801, "grad_norm": 0.027074232386228487, "learning_rate": 0.00033050348723744527, "loss": 0.6073, "step": 25570 }, { "epoch": 1.2984097168458326, "grad_norm": 0.029065233096742898, "learning_rate": 0.00033029509471213726, "loss": 0.6311, "step": 25575 }, { "epoch": 1.2986635529438642, "grad_norm": 0.02957302867036165, "learning_rate": 0.00033008673549905586, "loss": 0.6357, "step": 25580 }, { "epoch": 1.2989173890418957, "grad_norm": 0.033188152879306716, "learning_rate": 0.000329878409639101, "loss": 0.626, "step": 25585 }, { "epoch": 1.2991712251399272, "grad_norm": 0.035591408716344274, "learning_rate": 0.00032967011717316587, "loss": 0.6218, "step": 25590 }, { "epoch": 1.2994250612379585, "grad_norm": 0.03394592905804857, "learning_rate": 0.00032946185814213734, "loss": 0.6559, "step": 25595 }, { "epoch": 1.29967889733599, "grad_norm": 0.02842935363624207, "learning_rate": 0.00032925363258689557, "loss": 0.596, "step": 25600 }, { "epoch": 1.2999327334340216, "grad_norm": 0.031310216294888865, "learning_rate": 0.0003290454405483142, "loss": 0.6082, "step": 25605 }, { "epoch": 1.3001865695320531, "grad_norm": 0.025189644752392337, "learning_rate": 0.00032883728206726035, "loss": 0.5929, "step": 25610 }, { "epoch": 1.3004404056300847, "grad_norm": 0.027456690323845262, "learning_rate": 0.00032862915718459443, "loss": 0.5922, "step": 25615 }, { "epoch": 1.3006942417281162, "grad_norm": 0.029118299634491845, "learning_rate": 0.0003284210659411703, "loss": 0.6153, "step": 25620 }, { "epoch": 1.3009480778261477, "grad_norm": 0.02965442297016434, "learning_rate": 0.0003282130083778352, "loss": 0.592, "step": 25625 }, { "epoch": 1.3012019139241793, "grad_norm": 0.029053554178905424, "learning_rate": 0.0003280049845354299, "loss": 0.63, "step": 25630 }, { "epoch": 1.3014557500222106, "grad_norm": 0.02827862477823921, "learning_rate": 0.00032779699445478826, "loss": 0.5944, "step": 25635 }, { "epoch": 1.3017095861202421, "grad_norm": 0.02760590412905903, "learning_rate": 0.000327589038176738, "loss": 0.583, "step": 25640 }, { "epoch": 1.3019634222182737, "grad_norm": 0.030066226313471247, "learning_rate": 0.00032738111574209973, "loss": 0.6117, "step": 25645 }, { "epoch": 1.3022172583163052, "grad_norm": 0.028274410863274077, "learning_rate": 0.0003271732271916876, "loss": 0.5737, "step": 25650 }, { "epoch": 1.3024710944143367, "grad_norm": 0.02739952915577895, "learning_rate": 0.0003269653725663091, "loss": 0.6535, "step": 25655 }, { "epoch": 1.302724930512368, "grad_norm": 0.03006061695984897, "learning_rate": 0.000326757551906765, "loss": 0.6358, "step": 25660 }, { "epoch": 1.3029787666103996, "grad_norm": 0.026920479210641772, "learning_rate": 0.00032654976525384947, "loss": 0.6103, "step": 25665 }, { "epoch": 1.303232602708431, "grad_norm": 0.02933334911317878, "learning_rate": 0.0003263420126483501, "loss": 0.5999, "step": 25670 }, { "epoch": 1.3034864388064626, "grad_norm": 0.02697904807111223, "learning_rate": 0.0003261342941310476, "loss": 0.5835, "step": 25675 }, { "epoch": 1.3037402749044942, "grad_norm": 0.029373682387251757, "learning_rate": 0.00032592660974271615, "loss": 0.6444, "step": 25680 }, { "epoch": 1.3039941110025257, "grad_norm": 0.028900300344976558, "learning_rate": 0.000325718959524123, "loss": 0.6397, "step": 25685 }, { "epoch": 1.3042479471005572, "grad_norm": 0.02712902179165636, "learning_rate": 0.000325511343516029, "loss": 0.5965, "step": 25690 }, { "epoch": 1.3045017831985888, "grad_norm": 0.02632904265591956, "learning_rate": 0.00032530376175918794, "loss": 0.5781, "step": 25695 }, { "epoch": 1.30475561929662, "grad_norm": 0.026010018537155625, "learning_rate": 0.00032509621429434744, "loss": 0.5944, "step": 25700 }, { "epoch": 1.3050094553946516, "grad_norm": 0.030489242140081044, "learning_rate": 0.0003248887011622478, "loss": 0.6148, "step": 25705 }, { "epoch": 1.3052632914926832, "grad_norm": 0.027463234295623654, "learning_rate": 0.00032468122240362287, "loss": 0.6294, "step": 25710 }, { "epoch": 1.3055171275907147, "grad_norm": 0.02869038255783698, "learning_rate": 0.00032447377805919957, "loss": 0.6245, "step": 25715 }, { "epoch": 1.3057709636887462, "grad_norm": 0.029608890175205915, "learning_rate": 0.00032426636816969837, "loss": 0.608, "step": 25720 }, { "epoch": 1.3060247997867775, "grad_norm": 0.02543601952860072, "learning_rate": 0.0003240589927758327, "loss": 0.6167, "step": 25725 }, { "epoch": 1.306278635884809, "grad_norm": 0.033009389699433615, "learning_rate": 0.0003238516519183093, "loss": 0.6446, "step": 25730 }, { "epoch": 1.3065324719828406, "grad_norm": 0.026005252875981728, "learning_rate": 0.0003236443456378282, "loss": 0.5932, "step": 25735 }, { "epoch": 1.3067863080808721, "grad_norm": 0.024551094242176226, "learning_rate": 0.0003234370739750826, "loss": 0.5822, "step": 25740 }, { "epoch": 1.3070401441789037, "grad_norm": 0.024736220635632678, "learning_rate": 0.00032322983697075883, "loss": 0.5927, "step": 25745 }, { "epoch": 1.3072939802769352, "grad_norm": 0.030804253047267485, "learning_rate": 0.0003230226346655365, "loss": 0.6132, "step": 25750 }, { "epoch": 1.3075478163749668, "grad_norm": 0.027389368839425356, "learning_rate": 0.0003228154671000882, "loss": 0.6065, "step": 25755 }, { "epoch": 1.3078016524729983, "grad_norm": 0.02976863723494786, "learning_rate": 0.0003226083343150803, "loss": 0.5868, "step": 25760 }, { "epoch": 1.3080554885710298, "grad_norm": 0.028289110416854803, "learning_rate": 0.0003224012363511717, "loss": 0.5797, "step": 25765 }, { "epoch": 1.3083093246690611, "grad_norm": 0.02675387478537107, "learning_rate": 0.0003221941732490148, "loss": 0.578, "step": 25770 }, { "epoch": 1.3085631607670927, "grad_norm": 0.024348910604433026, "learning_rate": 0.00032198714504925487, "loss": 0.6056, "step": 25775 }, { "epoch": 1.3088169968651242, "grad_norm": 0.02448261808281384, "learning_rate": 0.0003217801517925307, "loss": 0.5682, "step": 25780 }, { "epoch": 1.3090708329631557, "grad_norm": 0.028228655493948287, "learning_rate": 0.0003215731935194739, "loss": 0.5715, "step": 25785 }, { "epoch": 1.3093246690611873, "grad_norm": 0.028107113762425823, "learning_rate": 0.0003213662702707094, "loss": 0.6293, "step": 25790 }, { "epoch": 1.3095785051592186, "grad_norm": 0.028179556014225407, "learning_rate": 0.00032115938208685527, "loss": 0.6116, "step": 25795 }, { "epoch": 1.3098323412572501, "grad_norm": 0.02482212051819286, "learning_rate": 0.0003209525290085226, "loss": 0.6076, "step": 25800 }, { "epoch": 1.3100861773552817, "grad_norm": 0.027111771439777693, "learning_rate": 0.00032074571107631544, "loss": 0.5953, "step": 25805 }, { "epoch": 1.3103400134533132, "grad_norm": 0.03091439694578375, "learning_rate": 0.0003205389283308313, "loss": 0.6085, "step": 25810 }, { "epoch": 1.3105938495513447, "grad_norm": 0.02655356251528361, "learning_rate": 0.0003203321808126604, "loss": 0.5983, "step": 25815 }, { "epoch": 1.3108476856493763, "grad_norm": 0.027393409597124687, "learning_rate": 0.0003201254685623866, "loss": 0.6054, "step": 25820 }, { "epoch": 1.3111015217474078, "grad_norm": 0.029250320551148963, "learning_rate": 0.00031991879162058623, "loss": 0.6174, "step": 25825 }, { "epoch": 1.3113553578454393, "grad_norm": 0.02597569051325779, "learning_rate": 0.00031971215002782907, "loss": 0.572, "step": 25830 }, { "epoch": 1.3116091939434706, "grad_norm": 0.02775236679080708, "learning_rate": 0.00031950554382467766, "loss": 0.6038, "step": 25835 }, { "epoch": 1.3118630300415022, "grad_norm": 0.02663190789597413, "learning_rate": 0.000319298973051688, "loss": 0.6158, "step": 25840 }, { "epoch": 1.3121168661395337, "grad_norm": 0.0284197736392537, "learning_rate": 0.00031909243774940865, "loss": 0.6372, "step": 25845 }, { "epoch": 1.3123707022375652, "grad_norm": 0.027548474899481628, "learning_rate": 0.0003188859379583816, "loss": 0.5974, "step": 25850 }, { "epoch": 1.3126245383355968, "grad_norm": 0.03160551917572536, "learning_rate": 0.0003186794737191418, "loss": 0.6146, "step": 25855 }, { "epoch": 1.312878374433628, "grad_norm": 0.027287076665540304, "learning_rate": 0.000318473045072217, "loss": 0.5941, "step": 25860 }, { "epoch": 1.3131322105316596, "grad_norm": 0.023992879337934966, "learning_rate": 0.00031826665205812824, "loss": 0.5803, "step": 25865 }, { "epoch": 1.3133860466296912, "grad_norm": 0.027669371221433675, "learning_rate": 0.00031806029471738933, "loss": 0.6198, "step": 25870 }, { "epoch": 1.3136398827277227, "grad_norm": 0.02671633664430187, "learning_rate": 0.000317853973090507, "loss": 0.593, "step": 25875 }, { "epoch": 1.3138937188257542, "grad_norm": 0.02793216261377222, "learning_rate": 0.00031764768721798163, "loss": 0.5789, "step": 25880 }, { "epoch": 1.3141475549237858, "grad_norm": 0.02731971002133117, "learning_rate": 0.00031744143714030606, "loss": 0.6207, "step": 25885 }, { "epoch": 1.3144013910218173, "grad_norm": 0.028434241094825417, "learning_rate": 0.00031723522289796573, "loss": 0.6185, "step": 25890 }, { "epoch": 1.3146552271198488, "grad_norm": 0.028540803248445046, "learning_rate": 0.00031702904453143976, "loss": 0.5933, "step": 25895 }, { "epoch": 1.3149090632178801, "grad_norm": 0.026953785880044826, "learning_rate": 0.0003168229020811999, "loss": 0.6036, "step": 25900 }, { "epoch": 1.3151628993159117, "grad_norm": 0.026613662520399192, "learning_rate": 0.00031661679558771076, "loss": 0.6109, "step": 25905 }, { "epoch": 1.3154167354139432, "grad_norm": 0.02795893583643754, "learning_rate": 0.0003164107250914302, "loss": 0.6459, "step": 25910 }, { "epoch": 1.3156705715119748, "grad_norm": 0.02944992534407844, "learning_rate": 0.0003162046906328087, "loss": 0.6342, "step": 25915 }, { "epoch": 1.3159244076100063, "grad_norm": 0.02966145985946975, "learning_rate": 0.0003159986922522899, "loss": 0.6116, "step": 25920 }, { "epoch": 1.3161782437080376, "grad_norm": 0.02657490217100358, "learning_rate": 0.0003157927299903102, "loss": 0.6271, "step": 25925 }, { "epoch": 1.3164320798060691, "grad_norm": 0.030292020314888424, "learning_rate": 0.0003155868038872989, "loss": 0.6217, "step": 25930 }, { "epoch": 1.3166859159041007, "grad_norm": 0.027193219625924015, "learning_rate": 0.0003153809139836781, "loss": 0.6113, "step": 25935 }, { "epoch": 1.3169397520021322, "grad_norm": 0.025979288832930814, "learning_rate": 0.0003151750603198634, "loss": 0.5884, "step": 25940 }, { "epoch": 1.3171935881001637, "grad_norm": 0.027650721856013954, "learning_rate": 0.0003149692429362627, "loss": 0.6115, "step": 25945 }, { "epoch": 1.3174474241981953, "grad_norm": 0.04321705759063255, "learning_rate": 0.00031476346187327684, "loss": 0.5589, "step": 25950 }, { "epoch": 1.3177012602962268, "grad_norm": 0.02953759430519687, "learning_rate": 0.0003145577171712997, "loss": 0.6435, "step": 25955 }, { "epoch": 1.3179550963942583, "grad_norm": 0.02976861148752578, "learning_rate": 0.00031435200887071786, "loss": 0.5876, "step": 25960 }, { "epoch": 1.3182089324922897, "grad_norm": 0.029945340992226364, "learning_rate": 0.0003141463370119108, "loss": 0.5769, "step": 25965 }, { "epoch": 1.3184627685903212, "grad_norm": 0.02885223370831977, "learning_rate": 0.00031394070163525095, "loss": 0.5961, "step": 25970 }, { "epoch": 1.3187166046883527, "grad_norm": 0.02655038958008587, "learning_rate": 0.0003137351027811035, "loss": 0.6211, "step": 25975 }, { "epoch": 1.3189704407863843, "grad_norm": 0.028736710261595872, "learning_rate": 0.0003135295404898265, "loss": 0.6306, "step": 25980 }, { "epoch": 1.3192242768844158, "grad_norm": 0.025629643623405318, "learning_rate": 0.00031332401480177073, "loss": 0.5806, "step": 25985 }, { "epoch": 1.319478112982447, "grad_norm": 0.0315236199984097, "learning_rate": 0.0003131185257572799, "loss": 0.6029, "step": 25990 }, { "epoch": 1.3197319490804786, "grad_norm": 0.026962952223560408, "learning_rate": 0.0003129130733966904, "loss": 0.6314, "step": 25995 }, { "epoch": 1.3199857851785102, "grad_norm": 0.025113352602577875, "learning_rate": 0.00031270765776033173, "loss": 0.5761, "step": 26000 }, { "epoch": 1.3202396212765417, "grad_norm": 0.02774959288777949, "learning_rate": 0.00031250227888852576, "loss": 0.6107, "step": 26005 }, { "epoch": 1.3204934573745732, "grad_norm": 0.028081815646566775, "learning_rate": 0.0003122969368215874, "loss": 0.6131, "step": 26010 }, { "epoch": 1.3207472934726048, "grad_norm": 0.024649033429397636, "learning_rate": 0.0003120916315998243, "loss": 0.5723, "step": 26015 }, { "epoch": 1.3210011295706363, "grad_norm": 0.029640882815413334, "learning_rate": 0.0003118863632635368, "loss": 0.5865, "step": 26020 }, { "epoch": 1.3212549656686678, "grad_norm": 0.027402729438738876, "learning_rate": 0.00031168113185301815, "loss": 0.6228, "step": 26025 }, { "epoch": 1.3215088017666992, "grad_norm": 0.027658301605973684, "learning_rate": 0.00031147593740855407, "loss": 0.6096, "step": 26030 }, { "epoch": 1.3217626378647307, "grad_norm": 0.02731511992320659, "learning_rate": 0.00031127077997042336, "loss": 0.5888, "step": 26035 }, { "epoch": 1.3220164739627622, "grad_norm": 0.028606442812175385, "learning_rate": 0.0003110656595788973, "loss": 0.6475, "step": 26040 }, { "epoch": 1.3222703100607938, "grad_norm": 0.0287607462354693, "learning_rate": 0.0003108605762742401, "loss": 0.5958, "step": 26045 }, { "epoch": 1.3225241461588253, "grad_norm": 0.029863110849942694, "learning_rate": 0.00031065553009670857, "loss": 0.6094, "step": 26050 }, { "epoch": 1.3227779822568566, "grad_norm": 0.027090712423054435, "learning_rate": 0.00031045052108655193, "loss": 0.6336, "step": 26055 }, { "epoch": 1.3230318183548881, "grad_norm": 0.02937784697349492, "learning_rate": 0.0003102455492840129, "loss": 0.6217, "step": 26060 }, { "epoch": 1.3232856544529197, "grad_norm": 0.029022714406079907, "learning_rate": 0.00031004061472932634, "loss": 0.6354, "step": 26065 }, { "epoch": 1.3235394905509512, "grad_norm": 0.028879425031486005, "learning_rate": 0.00030983571746271977, "loss": 0.5959, "step": 26070 }, { "epoch": 1.3237933266489827, "grad_norm": 0.030569992407924736, "learning_rate": 0.0003096308575244135, "loss": 0.5909, "step": 26075 }, { "epoch": 1.3240471627470143, "grad_norm": 0.029680244643751776, "learning_rate": 0.00030942603495462054, "loss": 0.6236, "step": 26080 }, { "epoch": 1.3243009988450458, "grad_norm": 0.02748780326892449, "learning_rate": 0.0003092212497935465, "loss": 0.6153, "step": 26085 }, { "epoch": 1.3245548349430774, "grad_norm": 0.028037771734475348, "learning_rate": 0.0003090165020813897, "loss": 0.5937, "step": 26090 }, { "epoch": 1.3248086710411089, "grad_norm": 0.027653978505106987, "learning_rate": 0.00030881179185834114, "loss": 0.6487, "step": 26095 }, { "epoch": 1.3250625071391402, "grad_norm": 0.025856253247847263, "learning_rate": 0.0003086071191645844, "loss": 0.575, "step": 26100 }, { "epoch": 1.3253163432371717, "grad_norm": 0.027500878739771285, "learning_rate": 0.00030840248404029563, "loss": 0.5973, "step": 26105 }, { "epoch": 1.3255701793352033, "grad_norm": 0.02678773172568441, "learning_rate": 0.00030819788652564377, "loss": 0.5918, "step": 26110 }, { "epoch": 1.3258240154332348, "grad_norm": 0.027442031195545155, "learning_rate": 0.00030799332666079016, "loss": 0.6074, "step": 26115 }, { "epoch": 1.3260778515312663, "grad_norm": 0.026646571933537203, "learning_rate": 0.0003077888044858891, "loss": 0.6125, "step": 26120 }, { "epoch": 1.3263316876292977, "grad_norm": 0.027435400951568285, "learning_rate": 0.00030758432004108723, "loss": 0.6091, "step": 26125 }, { "epoch": 1.3265855237273292, "grad_norm": 0.029604787547089235, "learning_rate": 0.0003073798733665237, "loss": 0.6145, "step": 26130 }, { "epoch": 1.3268393598253607, "grad_norm": 0.02879876337462031, "learning_rate": 0.00030717546450233045, "loss": 0.6278, "step": 26135 }, { "epoch": 1.3270931959233923, "grad_norm": 0.02565081672618055, "learning_rate": 0.0003069710934886319, "loss": 0.5903, "step": 26140 }, { "epoch": 1.3273470320214238, "grad_norm": 0.029222816313687203, "learning_rate": 0.0003067667603655451, "loss": 0.5952, "step": 26145 }, { "epoch": 1.3276008681194553, "grad_norm": 0.024814862901078077, "learning_rate": 0.0003065624651731795, "loss": 0.5719, "step": 26150 }, { "epoch": 1.3278547042174869, "grad_norm": 0.029681277705251443, "learning_rate": 0.00030635820795163737, "loss": 0.6223, "step": 26155 }, { "epoch": 1.3281085403155184, "grad_norm": 0.028657582033078815, "learning_rate": 0.0003061539887410133, "loss": 0.5831, "step": 26160 }, { "epoch": 1.3283623764135497, "grad_norm": 0.03184096028268776, "learning_rate": 0.0003059498075813946, "loss": 0.5918, "step": 26165 }, { "epoch": 1.3286162125115812, "grad_norm": 0.028508433593837864, "learning_rate": 0.0003057456645128609, "loss": 0.636, "step": 26170 }, { "epoch": 1.3288700486096128, "grad_norm": 0.02924338402067012, "learning_rate": 0.00030554155957548425, "loss": 0.6169, "step": 26175 }, { "epoch": 1.3291238847076443, "grad_norm": 0.026748182863783304, "learning_rate": 0.00030533749280933, "loss": 0.6126, "step": 26180 }, { "epoch": 1.3293777208056758, "grad_norm": 0.025955259731216394, "learning_rate": 0.0003051334642544551, "loss": 0.5956, "step": 26185 }, { "epoch": 1.3296315569037072, "grad_norm": 0.026289719733363148, "learning_rate": 0.0003049294739509093, "loss": 0.5817, "step": 26190 }, { "epoch": 1.3298853930017387, "grad_norm": 0.026709276240798312, "learning_rate": 0.00030472552193873506, "loss": 0.6272, "step": 26195 }, { "epoch": 1.3301392290997702, "grad_norm": 0.02756895343840252, "learning_rate": 0.0003045216082579669, "loss": 0.5812, "step": 26200 }, { "epoch": 1.3303930651978018, "grad_norm": 0.029945543826129267, "learning_rate": 0.0003043177329486323, "loss": 0.6296, "step": 26205 }, { "epoch": 1.3306469012958333, "grad_norm": 0.028068513670879147, "learning_rate": 0.0003041138960507508, "loss": 0.6373, "step": 26210 }, { "epoch": 1.3309007373938648, "grad_norm": 0.02622626266091463, "learning_rate": 0.0003039100976043346, "loss": 0.6015, "step": 26215 }, { "epoch": 1.3311545734918964, "grad_norm": 0.028820854717106357, "learning_rate": 0.0003037063376493884, "loss": 0.5838, "step": 26220 }, { "epoch": 1.331408409589928, "grad_norm": 0.0272435313526718, "learning_rate": 0.00030350261622590926, "loss": 0.5846, "step": 26225 }, { "epoch": 1.3316622456879592, "grad_norm": 0.03282783334587489, "learning_rate": 0.0003032989333738865, "loss": 0.5957, "step": 26230 }, { "epoch": 1.3319160817859907, "grad_norm": 0.028198100893260695, "learning_rate": 0.0003030952891333021, "loss": 0.6016, "step": 26235 }, { "epoch": 1.3321699178840223, "grad_norm": 0.029202927002724994, "learning_rate": 0.00030289168354413065, "loss": 0.6004, "step": 26240 }, { "epoch": 1.3324237539820538, "grad_norm": 0.029858175659062916, "learning_rate": 0.00030268811664633865, "loss": 0.6144, "step": 26245 }, { "epoch": 1.3326775900800854, "grad_norm": 0.030089075213541826, "learning_rate": 0.0003024845884798855, "loss": 0.6015, "step": 26250 }, { "epoch": 1.3329314261781167, "grad_norm": 0.026813986047498607, "learning_rate": 0.00030228109908472247, "loss": 0.5818, "step": 26255 }, { "epoch": 1.3331852622761482, "grad_norm": 0.027047265564774867, "learning_rate": 0.00030207764850079374, "loss": 0.5911, "step": 26260 }, { "epoch": 1.3334390983741797, "grad_norm": 0.026670343872202892, "learning_rate": 0.00030187423676803556, "loss": 0.5624, "step": 26265 }, { "epoch": 1.3336929344722113, "grad_norm": 0.027254286961728828, "learning_rate": 0.00030167086392637665, "loss": 0.6125, "step": 26270 }, { "epoch": 1.3339467705702428, "grad_norm": 0.02876055978740514, "learning_rate": 0.0003014675300157381, "loss": 0.5764, "step": 26275 }, { "epoch": 1.3342006066682743, "grad_norm": 0.029270299985031185, "learning_rate": 0.00030126423507603327, "loss": 0.5949, "step": 26280 }, { "epoch": 1.3344544427663059, "grad_norm": 0.02794709410855056, "learning_rate": 0.00030106097914716804, "loss": 0.5946, "step": 26285 }, { "epoch": 1.3347082788643374, "grad_norm": 0.028706995842116157, "learning_rate": 0.0003008577622690405, "loss": 0.609, "step": 26290 }, { "epoch": 1.3349621149623687, "grad_norm": 0.02822030150958515, "learning_rate": 0.00030065458448154094, "loss": 0.6068, "step": 26295 }, { "epoch": 1.3352159510604003, "grad_norm": 0.02856846991201538, "learning_rate": 0.0003004514458245525, "loss": 0.6454, "step": 26300 }, { "epoch": 1.3354697871584318, "grad_norm": 0.026155065899490085, "learning_rate": 0.00030024834633795005, "loss": 0.5803, "step": 26305 }, { "epoch": 1.3357236232564633, "grad_norm": 0.026408409118437268, "learning_rate": 0.0003000452860616011, "loss": 0.6168, "step": 26310 }, { "epoch": 1.3359774593544949, "grad_norm": 0.02976291000463554, "learning_rate": 0.00029984226503536527, "loss": 0.5969, "step": 26315 }, { "epoch": 1.3362312954525262, "grad_norm": 0.030611920285802247, "learning_rate": 0.0002996392832990946, "loss": 0.6029, "step": 26320 }, { "epoch": 1.3364851315505577, "grad_norm": 0.030158550566162182, "learning_rate": 0.00029943634089263355, "loss": 0.613, "step": 26325 }, { "epoch": 1.3367389676485892, "grad_norm": 0.026977569847306444, "learning_rate": 0.0002992334378558185, "loss": 0.5903, "step": 26330 }, { "epoch": 1.3369928037466208, "grad_norm": 0.026158405751516976, "learning_rate": 0.00029903057422847834, "loss": 0.5795, "step": 26335 }, { "epoch": 1.3372466398446523, "grad_norm": 0.02936121534222341, "learning_rate": 0.0002988277500504343, "loss": 0.5562, "step": 26340 }, { "epoch": 1.3375004759426838, "grad_norm": 0.031745260792229106, "learning_rate": 0.00029862496536149966, "loss": 0.6131, "step": 26345 }, { "epoch": 1.3377543120407154, "grad_norm": 0.0284508215534703, "learning_rate": 0.00029842222020148, "loss": 0.6186, "step": 26350 }, { "epoch": 1.338008148138747, "grad_norm": 0.0290575135775951, "learning_rate": 0.0002982195146101734, "loss": 0.6247, "step": 26355 }, { "epoch": 1.3382619842367784, "grad_norm": 0.029004328150382643, "learning_rate": 0.00029801684862736956, "loss": 0.5825, "step": 26360 }, { "epoch": 1.3385158203348098, "grad_norm": 0.030293078754255836, "learning_rate": 0.0002978142222928512, "loss": 0.6104, "step": 26365 }, { "epoch": 1.3387696564328413, "grad_norm": 0.02735145151149089, "learning_rate": 0.0002976116356463927, "loss": 0.5835, "step": 26370 }, { "epoch": 1.3390234925308728, "grad_norm": 0.027258704053039148, "learning_rate": 0.00029740908872776087, "loss": 0.6099, "step": 26375 }, { "epoch": 1.3392773286289044, "grad_norm": 0.02932557114927946, "learning_rate": 0.00029720658157671447, "loss": 0.6185, "step": 26380 }, { "epoch": 1.339531164726936, "grad_norm": 0.027609490715430663, "learning_rate": 0.0002970041142330049, "loss": 0.5845, "step": 26385 }, { "epoch": 1.3397850008249672, "grad_norm": 0.02919006711873554, "learning_rate": 0.0002968016867363753, "loss": 0.6107, "step": 26390 }, { "epoch": 1.3400388369229987, "grad_norm": 0.026988411665933767, "learning_rate": 0.00029659929912656123, "loss": 0.5874, "step": 26395 }, { "epoch": 1.3402926730210303, "grad_norm": 0.02722790136318282, "learning_rate": 0.0002963969514432904, "loss": 0.5801, "step": 26400 }, { "epoch": 1.3405465091190618, "grad_norm": 0.02782240617480226, "learning_rate": 0.0002961946437262827, "loss": 0.5892, "step": 26405 }, { "epoch": 1.3408003452170933, "grad_norm": 0.031668855195347054, "learning_rate": 0.00029599237601525, "loss": 0.5893, "step": 26410 }, { "epoch": 1.3410541813151249, "grad_norm": 0.028729155619462817, "learning_rate": 0.00029579014834989653, "loss": 0.6352, "step": 26415 }, { "epoch": 1.3413080174131564, "grad_norm": 0.027180299124391998, "learning_rate": 0.00029558796076991836, "loss": 0.6302, "step": 26420 }, { "epoch": 1.341561853511188, "grad_norm": 0.029836764588936117, "learning_rate": 0.00029538581331500427, "loss": 0.5985, "step": 26425 }, { "epoch": 1.3418156896092193, "grad_norm": 0.026922976458125157, "learning_rate": 0.0002951837060248346, "loss": 0.6098, "step": 26430 }, { "epoch": 1.3420695257072508, "grad_norm": 0.029778015100382128, "learning_rate": 0.000294981638939082, "loss": 0.5643, "step": 26435 }, { "epoch": 1.3423233618052823, "grad_norm": 0.029289613421809226, "learning_rate": 0.0002947796120974113, "loss": 0.5744, "step": 26440 }, { "epoch": 1.3425771979033139, "grad_norm": 0.02752686859370424, "learning_rate": 0.0002945776255394793, "loss": 0.595, "step": 26445 }, { "epoch": 1.3428310340013454, "grad_norm": 0.032409087925674304, "learning_rate": 0.00029437567930493493, "loss": 0.5962, "step": 26450 }, { "epoch": 1.3430848700993767, "grad_norm": 0.027205998388497025, "learning_rate": 0.0002941737734334193, "loss": 0.6172, "step": 26455 }, { "epoch": 1.3433387061974082, "grad_norm": 0.02820274544100632, "learning_rate": 0.00029397190796456553, "loss": 0.6075, "step": 26460 }, { "epoch": 1.3435925422954398, "grad_norm": 0.026827904532525208, "learning_rate": 0.00029377008293799865, "loss": 0.6249, "step": 26465 }, { "epoch": 1.3438463783934713, "grad_norm": 0.030626332539165184, "learning_rate": 0.00029356829839333615, "loss": 0.6176, "step": 26470 }, { "epoch": 1.3441002144915029, "grad_norm": 0.02864478314705645, "learning_rate": 0.0002933665543701871, "loss": 0.606, "step": 26475 }, { "epoch": 1.3443540505895344, "grad_norm": 0.02824321755352412, "learning_rate": 0.0002931648509081529, "loss": 0.6383, "step": 26480 }, { "epoch": 1.344607886687566, "grad_norm": 0.0303296795860159, "learning_rate": 0.0002929631880468271, "loss": 0.6118, "step": 26485 }, { "epoch": 1.3448617227855975, "grad_norm": 0.029790925442726403, "learning_rate": 0.000292761565825795, "loss": 0.5817, "step": 26490 }, { "epoch": 1.3451155588836288, "grad_norm": 0.030992013558726647, "learning_rate": 0.000292559984284634, "loss": 0.6414, "step": 26495 }, { "epoch": 1.3453693949816603, "grad_norm": 0.026967684422556537, "learning_rate": 0.0002923584434629136, "loss": 0.5986, "step": 26500 }, { "epoch": 1.3456232310796918, "grad_norm": 0.030085714595619397, "learning_rate": 0.0002921569434001952, "loss": 0.5534, "step": 26505 }, { "epoch": 1.3458770671777234, "grad_norm": 0.03063655482577497, "learning_rate": 0.00029195548413603236, "loss": 0.5886, "step": 26510 }, { "epoch": 1.346130903275755, "grad_norm": 0.029534240937694026, "learning_rate": 0.0002917540657099703, "loss": 0.6144, "step": 26515 }, { "epoch": 1.3463847393737862, "grad_norm": 0.028097215716170357, "learning_rate": 0.0002915526881615469, "loss": 0.645, "step": 26520 }, { "epoch": 1.3466385754718178, "grad_norm": 0.029377160605715748, "learning_rate": 0.000291351351530291, "loss": 0.6368, "step": 26525 }, { "epoch": 1.3468924115698493, "grad_norm": 0.02933242695731079, "learning_rate": 0.0002911500558557245, "loss": 0.6082, "step": 26530 }, { "epoch": 1.3471462476678808, "grad_norm": 0.028180431060371405, "learning_rate": 0.0002909488011773603, "loss": 0.6081, "step": 26535 }, { "epoch": 1.3474000837659124, "grad_norm": 0.03064014983668662, "learning_rate": 0.000290747587534704, "loss": 0.63, "step": 26540 }, { "epoch": 1.347653919863944, "grad_norm": 0.029098701408683283, "learning_rate": 0.00029054641496725276, "loss": 0.6625, "step": 26545 }, { "epoch": 1.3479077559619754, "grad_norm": 0.02798631437136471, "learning_rate": 0.00029034528351449564, "loss": 0.6194, "step": 26550 }, { "epoch": 1.348161592060007, "grad_norm": 0.02673095745054032, "learning_rate": 0.00029014419321591396, "loss": 0.5963, "step": 26555 }, { "epoch": 1.3484154281580383, "grad_norm": 0.02761120728745198, "learning_rate": 0.00028994314411098044, "loss": 0.5978, "step": 26560 }, { "epoch": 1.3486692642560698, "grad_norm": 0.028680017149230394, "learning_rate": 0.00028974213623916037, "loss": 0.5822, "step": 26565 }, { "epoch": 1.3489231003541013, "grad_norm": 0.025146493098096728, "learning_rate": 0.0002895411696399102, "loss": 0.633, "step": 26570 }, { "epoch": 1.3491769364521329, "grad_norm": 0.028598982100998337, "learning_rate": 0.000289340244352679, "loss": 0.6043, "step": 26575 }, { "epoch": 1.3494307725501644, "grad_norm": 0.025950084734208317, "learning_rate": 0.00028913936041690715, "loss": 0.5921, "step": 26580 }, { "epoch": 1.3496846086481957, "grad_norm": 0.028946723225998998, "learning_rate": 0.00028893851787202746, "loss": 0.6123, "step": 26585 }, { "epoch": 1.3499384447462273, "grad_norm": 0.0301302074367496, "learning_rate": 0.00028873771675746394, "loss": 0.5993, "step": 26590 }, { "epoch": 1.3501922808442588, "grad_norm": 0.030918589735767922, "learning_rate": 0.0002885369571126333, "loss": 0.6086, "step": 26595 }, { "epoch": 1.3504461169422903, "grad_norm": 0.03243420961755178, "learning_rate": 0.000288336238976943, "loss": 0.6647, "step": 26600 }, { "epoch": 1.3506999530403219, "grad_norm": 0.026802703726005336, "learning_rate": 0.00028813556238979377, "loss": 0.6265, "step": 26605 }, { "epoch": 1.3509537891383534, "grad_norm": 0.027980528459403853, "learning_rate": 0.000287934927390577, "loss": 0.5826, "step": 26610 }, { "epoch": 1.351207625236385, "grad_norm": 0.025993557091445442, "learning_rate": 0.0002877343340186765, "loss": 0.5932, "step": 26615 }, { "epoch": 1.3514614613344165, "grad_norm": 0.02803034766322572, "learning_rate": 0.0002875337823134675, "loss": 0.6646, "step": 26620 }, { "epoch": 1.351715297432448, "grad_norm": 0.026776054486399713, "learning_rate": 0.0002873332723143177, "loss": 0.6285, "step": 26625 }, { "epoch": 1.3519691335304793, "grad_norm": 0.025534635735037014, "learning_rate": 0.00028713280406058575, "loss": 0.6098, "step": 26630 }, { "epoch": 1.3522229696285109, "grad_norm": 0.027543416393098565, "learning_rate": 0.00028693237759162295, "loss": 0.6119, "step": 26635 }, { "epoch": 1.3524768057265424, "grad_norm": 0.02863032440352346, "learning_rate": 0.0002867319929467717, "loss": 0.6402, "step": 26640 }, { "epoch": 1.352730641824574, "grad_norm": 0.026071117962523257, "learning_rate": 0.0002865316501653669, "loss": 0.6157, "step": 26645 }, { "epoch": 1.3529844779226055, "grad_norm": 0.029252271527938935, "learning_rate": 0.0002863313492867344, "loss": 0.6147, "step": 26650 }, { "epoch": 1.3532383140206368, "grad_norm": 0.02628526037455198, "learning_rate": 0.0002861310903501926, "loss": 0.631, "step": 26655 }, { "epoch": 1.3534921501186683, "grad_norm": 0.026923881833056624, "learning_rate": 0.0002859308733950511, "loss": 0.6017, "step": 26660 }, { "epoch": 1.3537459862166998, "grad_norm": 0.02627678853611847, "learning_rate": 0.0002857306984606115, "loss": 0.5904, "step": 26665 }, { "epoch": 1.3539998223147314, "grad_norm": 0.027209698216935748, "learning_rate": 0.0002855305655861675, "loss": 0.5843, "step": 26670 }, { "epoch": 1.354253658412763, "grad_norm": 0.026648766787009035, "learning_rate": 0.0002853304748110037, "loss": 0.6061, "step": 26675 }, { "epoch": 1.3545074945107944, "grad_norm": 0.027270767054163005, "learning_rate": 0.00028513042617439734, "loss": 0.6118, "step": 26680 }, { "epoch": 1.354761330608826, "grad_norm": 0.028451452030653135, "learning_rate": 0.0002849304197156166, "loss": 0.626, "step": 26685 }, { "epoch": 1.3550151667068575, "grad_norm": 0.036142958457325104, "learning_rate": 0.00028473045547392205, "loss": 0.596, "step": 26690 }, { "epoch": 1.3552690028048888, "grad_norm": 0.02679016604173102, "learning_rate": 0.0002845305334885654, "loss": 0.6232, "step": 26695 }, { "epoch": 1.3555228389029204, "grad_norm": 0.025781546829094136, "learning_rate": 0.0002843306537987906, "loss": 0.558, "step": 26700 }, { "epoch": 1.355776675000952, "grad_norm": 0.027753141472583964, "learning_rate": 0.00028413081644383285, "loss": 0.6029, "step": 26705 }, { "epoch": 1.3560305110989834, "grad_norm": 0.026310811188698206, "learning_rate": 0.0002839310214629194, "loss": 0.6324, "step": 26710 }, { "epoch": 1.356284347197015, "grad_norm": 0.027892109908402594, "learning_rate": 0.00028373126889526875, "loss": 0.6221, "step": 26715 }, { "epoch": 1.3565381832950463, "grad_norm": 0.030467590027343734, "learning_rate": 0.0002835315587800914, "loss": 0.5855, "step": 26720 }, { "epoch": 1.3567920193930778, "grad_norm": 0.03023753649698496, "learning_rate": 0.00028333189115658966, "loss": 0.6061, "step": 26725 }, { "epoch": 1.3570458554911093, "grad_norm": 0.027503529863737674, "learning_rate": 0.0002831322660639573, "loss": 0.6404, "step": 26730 }, { "epoch": 1.3572996915891409, "grad_norm": 0.028125385524876003, "learning_rate": 0.0002829326835413794, "loss": 0.5953, "step": 26735 }, { "epoch": 1.3575535276871724, "grad_norm": 0.027988776539730444, "learning_rate": 0.00028273314362803337, "loss": 0.6075, "step": 26740 }, { "epoch": 1.357807363785204, "grad_norm": 0.02636303891814438, "learning_rate": 0.0002825336463630875, "loss": 0.6126, "step": 26745 }, { "epoch": 1.3580611998832355, "grad_norm": 0.029627461342801434, "learning_rate": 0.0002823341917857027, "loss": 0.5725, "step": 26750 }, { "epoch": 1.358315035981267, "grad_norm": 0.025868498499123708, "learning_rate": 0.0002821347799350302, "loss": 0.5884, "step": 26755 }, { "epoch": 1.3585688720792983, "grad_norm": 0.029391211411677776, "learning_rate": 0.00028193541085021423, "loss": 0.6261, "step": 26760 }, { "epoch": 1.3588227081773299, "grad_norm": 0.029405320147292465, "learning_rate": 0.00028173608457038936, "loss": 0.6344, "step": 26765 }, { "epoch": 1.3590765442753614, "grad_norm": 0.02828008260843649, "learning_rate": 0.0002815368011346828, "loss": 0.6048, "step": 26770 }, { "epoch": 1.359330380373393, "grad_norm": 0.02746523817056, "learning_rate": 0.00028133756058221253, "loss": 0.5858, "step": 26775 }, { "epoch": 1.3595842164714245, "grad_norm": 0.026855064913763705, "learning_rate": 0.0002811383629520887, "loss": 0.6016, "step": 26780 }, { "epoch": 1.3598380525694558, "grad_norm": 0.02731144414714777, "learning_rate": 0.0002809392082834129, "loss": 0.622, "step": 26785 }, { "epoch": 1.3600918886674873, "grad_norm": 0.03009192055108865, "learning_rate": 0.0002807400966152778, "loss": 0.607, "step": 26790 }, { "epoch": 1.3603457247655188, "grad_norm": 0.027174625966016698, "learning_rate": 0.0002805410279867686, "loss": 0.57, "step": 26795 }, { "epoch": 1.3605995608635504, "grad_norm": 0.026173212095108245, "learning_rate": 0.0002803420024369609, "loss": 0.5445, "step": 26800 }, { "epoch": 1.360853396961582, "grad_norm": 0.028145006430464917, "learning_rate": 0.00028014302000492285, "loss": 0.5974, "step": 26805 }, { "epoch": 1.3611072330596135, "grad_norm": 0.026555429541605915, "learning_rate": 0.00027994408072971346, "loss": 0.5876, "step": 26810 }, { "epoch": 1.361361069157645, "grad_norm": 0.02938881163016128, "learning_rate": 0.0002797451846503837, "loss": 0.6118, "step": 26815 }, { "epoch": 1.3616149052556765, "grad_norm": 0.026258592076875222, "learning_rate": 0.00027954633180597564, "loss": 0.5999, "step": 26820 }, { "epoch": 1.3618687413537078, "grad_norm": 0.029495676529180406, "learning_rate": 0.00027934752223552343, "loss": 0.5802, "step": 26825 }, { "epoch": 1.3621225774517394, "grad_norm": 0.027877415678486357, "learning_rate": 0.0002791487559780521, "loss": 0.5802, "step": 26830 }, { "epoch": 1.362376413549771, "grad_norm": 0.026727382777156777, "learning_rate": 0.00027895003307257867, "loss": 0.596, "step": 26835 }, { "epoch": 1.3626302496478024, "grad_norm": 0.02659945423625494, "learning_rate": 0.000278751353558111, "loss": 0.6183, "step": 26840 }, { "epoch": 1.362884085745834, "grad_norm": 0.026348838992588542, "learning_rate": 0.00027855271747364966, "loss": 0.594, "step": 26845 }, { "epoch": 1.3631379218438653, "grad_norm": 0.028068725880603287, "learning_rate": 0.00027835412485818534, "loss": 0.5647, "step": 26850 }, { "epoch": 1.3633917579418968, "grad_norm": 0.027201772234768384, "learning_rate": 0.00027815557575070117, "loss": 0.5605, "step": 26855 }, { "epoch": 1.3636455940399284, "grad_norm": 0.027036692086091583, "learning_rate": 0.0002779570701901709, "loss": 0.5691, "step": 26860 }, { "epoch": 1.36389943013796, "grad_norm": 0.02785356211281556, "learning_rate": 0.0002777586082155607, "loss": 0.6419, "step": 26865 }, { "epoch": 1.3641532662359914, "grad_norm": 0.027836640848264194, "learning_rate": 0.00027756018986582715, "loss": 0.591, "step": 26870 }, { "epoch": 1.364407102334023, "grad_norm": 0.02809202169188486, "learning_rate": 0.00027736181517991923, "loss": 0.5646, "step": 26875 }, { "epoch": 1.3646609384320545, "grad_norm": 0.028278660693348532, "learning_rate": 0.0002771634841967767, "loss": 0.6715, "step": 26880 }, { "epoch": 1.364914774530086, "grad_norm": 0.03634298632839383, "learning_rate": 0.00027696519695533074, "loss": 0.615, "step": 26885 }, { "epoch": 1.3651686106281176, "grad_norm": 0.028863671370549435, "learning_rate": 0.00027676695349450456, "loss": 0.5561, "step": 26890 }, { "epoch": 1.3654224467261489, "grad_norm": 0.027555347110756623, "learning_rate": 0.0002765687538532119, "loss": 0.5623, "step": 26895 }, { "epoch": 1.3656762828241804, "grad_norm": 0.0273511713993819, "learning_rate": 0.0002763705980703586, "loss": 0.6128, "step": 26900 }, { "epoch": 1.365930118922212, "grad_norm": 0.02702148666343566, "learning_rate": 0.0002761724861848417, "loss": 0.5963, "step": 26905 }, { "epoch": 1.3661839550202435, "grad_norm": 0.02771815975558554, "learning_rate": 0.0002759744182355498, "loss": 0.5969, "step": 26910 }, { "epoch": 1.3664377911182748, "grad_norm": 0.027697863613009664, "learning_rate": 0.00027577639426136204, "loss": 0.6355, "step": 26915 }, { "epoch": 1.3666916272163063, "grad_norm": 0.02649931458567522, "learning_rate": 0.00027557841430115015, "loss": 0.6017, "step": 26920 }, { "epoch": 1.3669454633143379, "grad_norm": 0.025361630896626575, "learning_rate": 0.0002753804783937762, "loss": 0.6176, "step": 26925 }, { "epoch": 1.3671992994123694, "grad_norm": 0.028100560701725633, "learning_rate": 0.0002751825865780943, "loss": 0.5923, "step": 26930 }, { "epoch": 1.367453135510401, "grad_norm": 0.02864497332256347, "learning_rate": 0.0002749847388929493, "loss": 0.5896, "step": 26935 }, { "epoch": 1.3677069716084325, "grad_norm": 0.029781116069314086, "learning_rate": 0.0002747869353771781, "loss": 0.627, "step": 26940 }, { "epoch": 1.367960807706464, "grad_norm": 0.03052006090505464, "learning_rate": 0.0002745891760696082, "loss": 0.6181, "step": 26945 }, { "epoch": 1.3682146438044955, "grad_norm": 0.03643462154523136, "learning_rate": 0.0002743914610090591, "loss": 0.6004, "step": 26950 }, { "epoch": 1.368468479902527, "grad_norm": 0.027285447529589245, "learning_rate": 0.0002741937902343409, "loss": 0.5971, "step": 26955 }, { "epoch": 1.3687223160005584, "grad_norm": 0.029283429579514086, "learning_rate": 0.0002739961637842555, "loss": 0.5876, "step": 26960 }, { "epoch": 1.36897615209859, "grad_norm": 0.025575460783159743, "learning_rate": 0.0002737985816975963, "loss": 0.6252, "step": 26965 }, { "epoch": 1.3692299881966215, "grad_norm": 0.06000287448683985, "learning_rate": 0.00027360104401314735, "loss": 0.6142, "step": 26970 }, { "epoch": 1.369483824294653, "grad_norm": 0.030032677279486594, "learning_rate": 0.0002734035507696845, "loss": 0.6154, "step": 26975 }, { "epoch": 1.3697376603926845, "grad_norm": 0.029620351644214067, "learning_rate": 0.0002732061020059745, "loss": 0.652, "step": 26980 }, { "epoch": 1.3699914964907158, "grad_norm": 0.029023341163516174, "learning_rate": 0.00027300869776077574, "loss": 0.6406, "step": 26985 }, { "epoch": 1.3702453325887474, "grad_norm": 0.025284714779906216, "learning_rate": 0.0002728113380728375, "loss": 0.6093, "step": 26990 }, { "epoch": 1.370499168686779, "grad_norm": 0.028735662454695235, "learning_rate": 0.0002726140229809008, "loss": 0.6432, "step": 26995 }, { "epoch": 1.3707530047848104, "grad_norm": 0.026712066854960217, "learning_rate": 0.00027241675252369715, "loss": 0.5762, "step": 27000 }, { "epoch": 1.371006840882842, "grad_norm": 0.02778690143179983, "learning_rate": 0.0002722195267399502, "loss": 0.5789, "step": 27005 }, { "epoch": 1.3712606769808735, "grad_norm": 0.024029781104151337, "learning_rate": 0.00027202234566837415, "loss": 0.602, "step": 27010 }, { "epoch": 1.371514513078905, "grad_norm": 0.026207949969519976, "learning_rate": 0.0002718252093476748, "loss": 0.6111, "step": 27015 }, { "epoch": 1.3717683491769366, "grad_norm": 0.026309270589436756, "learning_rate": 0.0002716281178165486, "loss": 0.6323, "step": 27020 }, { "epoch": 1.3720221852749679, "grad_norm": 0.02695305431700533, "learning_rate": 0.00027143107111368437, "loss": 0.6317, "step": 27025 }, { "epoch": 1.3722760213729994, "grad_norm": 0.03583481422379402, "learning_rate": 0.00027123406927776085, "loss": 0.5895, "step": 27030 }, { "epoch": 1.372529857471031, "grad_norm": 0.02468246929797747, "learning_rate": 0.0002710371123474488, "loss": 0.5725, "step": 27035 }, { "epoch": 1.3727836935690625, "grad_norm": 0.02741306954291771, "learning_rate": 0.00027084020036140965, "loss": 0.5743, "step": 27040 }, { "epoch": 1.373037529667094, "grad_norm": 0.028858666526172354, "learning_rate": 0.00027064333335829647, "loss": 0.5961, "step": 27045 }, { "epoch": 1.3732913657651253, "grad_norm": 0.026208871897892082, "learning_rate": 0.00027044651137675304, "loss": 0.6, "step": 27050 }, { "epoch": 1.3735452018631569, "grad_norm": 0.026432261015011376, "learning_rate": 0.00027024973445541475, "loss": 0.5974, "step": 27055 }, { "epoch": 1.3737990379611884, "grad_norm": 0.029218441443135772, "learning_rate": 0.00027005300263290764, "loss": 0.6278, "step": 27060 }, { "epoch": 1.37405287405922, "grad_norm": 0.027673159078982917, "learning_rate": 0.00026985631594784966, "loss": 0.6103, "step": 27065 }, { "epoch": 1.3743067101572515, "grad_norm": 0.034197975784340286, "learning_rate": 0.0002696596744388488, "loss": 0.5979, "step": 27070 }, { "epoch": 1.374560546255283, "grad_norm": 0.030109901385034842, "learning_rate": 0.0002694630781445054, "loss": 0.6206, "step": 27075 }, { "epoch": 1.3748143823533145, "grad_norm": 0.027263078954603323, "learning_rate": 0.0002692665271034099, "loss": 0.5984, "step": 27080 }, { "epoch": 1.375068218451346, "grad_norm": 0.02634773935567618, "learning_rate": 0.00026907002135414447, "loss": 0.5655, "step": 27085 }, { "epoch": 1.3753220545493774, "grad_norm": 0.029872655701557175, "learning_rate": 0.00026887356093528237, "loss": 0.5892, "step": 27090 }, { "epoch": 1.375575890647409, "grad_norm": 0.02578986037487444, "learning_rate": 0.00026867714588538747, "loss": 0.6309, "step": 27095 }, { "epoch": 1.3758297267454405, "grad_norm": 0.02516211356490713, "learning_rate": 0.00026848077624301537, "loss": 0.5868, "step": 27100 }, { "epoch": 1.376083562843472, "grad_norm": 0.025798091330959203, "learning_rate": 0.00026828445204671216, "loss": 0.6043, "step": 27105 }, { "epoch": 1.3763373989415035, "grad_norm": 0.027642385178535695, "learning_rate": 0.0002680881733350156, "loss": 0.5743, "step": 27110 }, { "epoch": 1.3765912350395348, "grad_norm": 0.027495500021115147, "learning_rate": 0.0002678919401464539, "loss": 0.6075, "step": 27115 }, { "epoch": 1.3768450711375664, "grad_norm": 0.025316013854544583, "learning_rate": 0.00026769575251954703, "loss": 0.6161, "step": 27120 }, { "epoch": 1.377098907235598, "grad_norm": 0.027506248772021347, "learning_rate": 0.00026749961049280527, "loss": 0.6046, "step": 27125 }, { "epoch": 1.3773527433336294, "grad_norm": 0.025553785103428213, "learning_rate": 0.0002673035141047306, "loss": 0.5868, "step": 27130 }, { "epoch": 1.377606579431661, "grad_norm": 0.028039526394663457, "learning_rate": 0.0002671074633938156, "loss": 0.6105, "step": 27135 }, { "epoch": 1.3778604155296925, "grad_norm": 0.030047551201887956, "learning_rate": 0.00026691145839854405, "loss": 0.6236, "step": 27140 }, { "epoch": 1.378114251627724, "grad_norm": 0.0254012263967111, "learning_rate": 0.00026671549915739076, "loss": 0.5777, "step": 27145 }, { "epoch": 1.3783680877257556, "grad_norm": 0.027605927435245627, "learning_rate": 0.0002665195857088218, "loss": 0.5804, "step": 27150 }, { "epoch": 1.378621923823787, "grad_norm": 0.030949072921172036, "learning_rate": 0.0002663237180912936, "loss": 0.6005, "step": 27155 }, { "epoch": 1.3788757599218184, "grad_norm": 0.027858063894277266, "learning_rate": 0.0002661278963432544, "loss": 0.592, "step": 27160 }, { "epoch": 1.37912959601985, "grad_norm": 0.02528068701137993, "learning_rate": 0.00026593212050314265, "loss": 0.5991, "step": 27165 }, { "epoch": 1.3793834321178815, "grad_norm": 0.0274412588400717, "learning_rate": 0.0002657363906093886, "loss": 0.5814, "step": 27170 }, { "epoch": 1.379637268215913, "grad_norm": 0.028111881700219336, "learning_rate": 0.0002655407067004125, "loss": 0.6066, "step": 27175 }, { "epoch": 1.3798911043139443, "grad_norm": 0.02569704051577371, "learning_rate": 0.00026534506881462674, "loss": 0.5919, "step": 27180 }, { "epoch": 1.3801449404119759, "grad_norm": 0.030764950240004245, "learning_rate": 0.0002651494769904335, "loss": 0.6197, "step": 27185 }, { "epoch": 1.3803987765100074, "grad_norm": 0.024719910713456122, "learning_rate": 0.00026495393126622685, "loss": 0.5984, "step": 27190 }, { "epoch": 1.380652612608039, "grad_norm": 0.025973368332012567, "learning_rate": 0.00026475843168039117, "loss": 0.5861, "step": 27195 }, { "epoch": 1.3809064487060705, "grad_norm": 0.026813281528654115, "learning_rate": 0.0002645629782713022, "loss": 0.6002, "step": 27200 }, { "epoch": 1.381160284804102, "grad_norm": 0.027101272308406255, "learning_rate": 0.00026436757107732665, "loss": 0.6147, "step": 27205 }, { "epoch": 1.3814141209021336, "grad_norm": 0.02846454877908511, "learning_rate": 0.0002641722101368217, "loss": 0.6289, "step": 27210 }, { "epoch": 1.381667957000165, "grad_norm": 0.02490863553341213, "learning_rate": 0.000263976895488136, "loss": 0.6399, "step": 27215 }, { "epoch": 1.3819217930981966, "grad_norm": 0.03028530321062019, "learning_rate": 0.0002637816271696084, "loss": 0.6302, "step": 27220 }, { "epoch": 1.382175629196228, "grad_norm": 0.026852557991263314, "learning_rate": 0.0002635864052195696, "loss": 0.6298, "step": 27225 }, { "epoch": 1.3824294652942595, "grad_norm": 0.026087758534732948, "learning_rate": 0.00026339122967634026, "loss": 0.5819, "step": 27230 }, { "epoch": 1.382683301392291, "grad_norm": 0.026808807466496415, "learning_rate": 0.0002631961005782328, "loss": 0.5447, "step": 27235 }, { "epoch": 1.3829371374903225, "grad_norm": 0.028165215857373197, "learning_rate": 0.00026300101796354966, "loss": 0.5696, "step": 27240 }, { "epoch": 1.383190973588354, "grad_norm": 0.025546981311527557, "learning_rate": 0.0002628059818705849, "loss": 0.5704, "step": 27245 }, { "epoch": 1.3834448096863854, "grad_norm": 0.027211445530378787, "learning_rate": 0.00026261099233762286, "loss": 0.5634, "step": 27250 }, { "epoch": 1.383698645784417, "grad_norm": 0.02767014890531401, "learning_rate": 0.0002624160494029394, "loss": 0.603, "step": 27255 }, { "epoch": 1.3839524818824485, "grad_norm": 0.026389599047791652, "learning_rate": 0.0002622211531048004, "loss": 0.5895, "step": 27260 }, { "epoch": 1.38420631798048, "grad_norm": 0.027058484751263687, "learning_rate": 0.0002620263034814632, "loss": 0.5846, "step": 27265 }, { "epoch": 1.3844601540785115, "grad_norm": 0.03456800347696813, "learning_rate": 0.00026183150057117595, "loss": 0.6136, "step": 27270 }, { "epoch": 1.384713990176543, "grad_norm": 0.032724699431718664, "learning_rate": 0.0002616367444121775, "loss": 0.6002, "step": 27275 }, { "epoch": 1.3849678262745746, "grad_norm": 0.027187787802161, "learning_rate": 0.0002614420350426973, "loss": 0.6056, "step": 27280 }, { "epoch": 1.3852216623726061, "grad_norm": 0.026212117012160054, "learning_rate": 0.00026124737250095596, "loss": 0.5637, "step": 27285 }, { "epoch": 1.3854754984706374, "grad_norm": 0.02537103139157865, "learning_rate": 0.0002610527568251647, "loss": 0.5771, "step": 27290 }, { "epoch": 1.385729334568669, "grad_norm": 0.02867209509335596, "learning_rate": 0.0002608581880535258, "loss": 0.6137, "step": 27295 }, { "epoch": 1.3859831706667005, "grad_norm": 0.02985334673192294, "learning_rate": 0.00026066366622423177, "loss": 0.5907, "step": 27300 }, { "epoch": 1.386237006764732, "grad_norm": 0.028277727140311428, "learning_rate": 0.0002604691913754668, "loss": 0.6288, "step": 27305 }, { "epoch": 1.3864908428627636, "grad_norm": 0.028185923740741435, "learning_rate": 0.0002602747635454047, "loss": 0.6282, "step": 27310 }, { "epoch": 1.386744678960795, "grad_norm": 0.029009800665711193, "learning_rate": 0.00026008038277221127, "loss": 0.6049, "step": 27315 }, { "epoch": 1.3869985150588264, "grad_norm": 0.031190565280666088, "learning_rate": 0.0002598860490940419, "loss": 0.591, "step": 27320 }, { "epoch": 1.387252351156858, "grad_norm": 0.031208076650389088, "learning_rate": 0.0002596917625490438, "loss": 0.5996, "step": 27325 }, { "epoch": 1.3875061872548895, "grad_norm": 0.02848241825840419, "learning_rate": 0.0002594975231753544, "loss": 0.6013, "step": 27330 }, { "epoch": 1.387760023352921, "grad_norm": 0.0369528562360773, "learning_rate": 0.00025930333101110173, "loss": 0.6244, "step": 27335 }, { "epoch": 1.3880138594509526, "grad_norm": 0.028237024886915957, "learning_rate": 0.0002591091860944049, "loss": 0.6129, "step": 27340 }, { "epoch": 1.388267695548984, "grad_norm": 0.027654813141055903, "learning_rate": 0.00025891508846337337, "loss": 0.5807, "step": 27345 }, { "epoch": 1.3885215316470156, "grad_norm": 0.026058916706144825, "learning_rate": 0.00025872103815610794, "loss": 0.5651, "step": 27350 }, { "epoch": 1.388775367745047, "grad_norm": 0.024717593687595317, "learning_rate": 0.0002585270352106992, "loss": 0.5807, "step": 27355 }, { "epoch": 1.3890292038430785, "grad_norm": 0.025847586251778798, "learning_rate": 0.0002583330796652294, "loss": 0.5921, "step": 27360 }, { "epoch": 1.38928303994111, "grad_norm": 0.030590177497538812, "learning_rate": 0.0002581391715577707, "loss": 0.6087, "step": 27365 }, { "epoch": 1.3895368760391416, "grad_norm": 0.028710375775991108, "learning_rate": 0.00025794531092638667, "loss": 0.5995, "step": 27370 }, { "epoch": 1.389790712137173, "grad_norm": 0.02619014815041433, "learning_rate": 0.0002577514978091308, "loss": 0.6281, "step": 27375 }, { "epoch": 1.3900445482352044, "grad_norm": 0.02907600085159115, "learning_rate": 0.000257557732244048, "loss": 0.6126, "step": 27380 }, { "epoch": 1.390298384333236, "grad_norm": 0.030683574346034716, "learning_rate": 0.00025736401426917286, "loss": 0.5947, "step": 27385 }, { "epoch": 1.3905522204312675, "grad_norm": 0.02893253742283481, "learning_rate": 0.0002571703439225322, "loss": 0.6065, "step": 27390 }, { "epoch": 1.390806056529299, "grad_norm": 0.027283122757131732, "learning_rate": 0.00025697672124214176, "loss": 0.6175, "step": 27395 }, { "epoch": 1.3910598926273305, "grad_norm": 0.02787145280339073, "learning_rate": 0.00025678314626600924, "loss": 0.5805, "step": 27400 }, { "epoch": 1.391313728725362, "grad_norm": 0.027980844187330246, "learning_rate": 0.00025658961903213197, "loss": 0.5921, "step": 27405 }, { "epoch": 1.3915675648233936, "grad_norm": 0.027377302961963952, "learning_rate": 0.0002563961395784987, "loss": 0.5987, "step": 27410 }, { "epoch": 1.3918214009214251, "grad_norm": 0.028538515049238225, "learning_rate": 0.0002562027079430883, "loss": 0.5736, "step": 27415 }, { "epoch": 1.3920752370194565, "grad_norm": 0.02666694235612271, "learning_rate": 0.0002560093241638707, "loss": 0.6328, "step": 27420 }, { "epoch": 1.392329073117488, "grad_norm": 0.027083037519473042, "learning_rate": 0.00025581598827880575, "loss": 0.599, "step": 27425 }, { "epoch": 1.3925829092155195, "grad_norm": 0.02775455125956325, "learning_rate": 0.0002556227003258448, "loss": 0.5751, "step": 27430 }, { "epoch": 1.392836745313551, "grad_norm": 0.028436504572025185, "learning_rate": 0.0002554294603429288, "loss": 0.6212, "step": 27435 }, { "epoch": 1.3930905814115826, "grad_norm": 0.02624397506489226, "learning_rate": 0.0002552362683679903, "loss": 0.6024, "step": 27440 }, { "epoch": 1.393344417509614, "grad_norm": 0.027631703800240333, "learning_rate": 0.0002550431244389515, "loss": 0.6279, "step": 27445 }, { "epoch": 1.3935982536076454, "grad_norm": 0.02887947634265888, "learning_rate": 0.00025485002859372574, "loss": 0.5818, "step": 27450 }, { "epoch": 1.393852089705677, "grad_norm": 0.02792326066360026, "learning_rate": 0.00025465698087021705, "loss": 0.5836, "step": 27455 }, { "epoch": 1.3941059258037085, "grad_norm": 0.0280684795796649, "learning_rate": 0.0002544639813063193, "loss": 0.5646, "step": 27460 }, { "epoch": 1.39435976190174, "grad_norm": 0.029422448939579363, "learning_rate": 0.0002542710299399177, "loss": 0.5628, "step": 27465 }, { "epoch": 1.3946135979997716, "grad_norm": 0.028302295357002915, "learning_rate": 0.00025407812680888726, "loss": 0.5645, "step": 27470 }, { "epoch": 1.3948674340978031, "grad_norm": 0.026845369221523814, "learning_rate": 0.0002538852719510943, "loss": 0.5983, "step": 27475 }, { "epoch": 1.3951212701958347, "grad_norm": 0.03527804954595251, "learning_rate": 0.00025369246540439495, "loss": 0.5902, "step": 27480 }, { "epoch": 1.3953751062938662, "grad_norm": 0.027642432774053788, "learning_rate": 0.00025349970720663653, "loss": 0.6291, "step": 27485 }, { "epoch": 1.3956289423918975, "grad_norm": 0.02725037526391078, "learning_rate": 0.000253306997395656, "loss": 0.6189, "step": 27490 }, { "epoch": 1.395882778489929, "grad_norm": 0.031754897982890606, "learning_rate": 0.00025311433600928184, "loss": 0.5864, "step": 27495 }, { "epoch": 1.3961366145879606, "grad_norm": 0.02688294396296545, "learning_rate": 0.00025292172308533214, "loss": 0.5412, "step": 27500 }, { "epoch": 1.396390450685992, "grad_norm": 0.027616478608906946, "learning_rate": 0.000252729158661616, "loss": 0.5977, "step": 27505 }, { "epoch": 1.3966442867840236, "grad_norm": 0.02671414014112634, "learning_rate": 0.0002525366427759329, "loss": 0.6005, "step": 27510 }, { "epoch": 1.396898122882055, "grad_norm": 0.02840299327650642, "learning_rate": 0.00025234417546607293, "loss": 0.6031, "step": 27515 }, { "epoch": 1.3971519589800865, "grad_norm": 0.02911462714325842, "learning_rate": 0.000252151756769816, "loss": 0.623, "step": 27520 }, { "epoch": 1.397405795078118, "grad_norm": 0.028219693400990464, "learning_rate": 0.00025195938672493344, "loss": 0.5916, "step": 27525 }, { "epoch": 1.3976596311761496, "grad_norm": 0.029131915453227172, "learning_rate": 0.0002517670653691861, "loss": 0.6163, "step": 27530 }, { "epoch": 1.397913467274181, "grad_norm": 0.026110290527264934, "learning_rate": 0.0002515747927403261, "loss": 0.5545, "step": 27535 }, { "epoch": 1.3981673033722126, "grad_norm": 0.02716045824338567, "learning_rate": 0.00025138256887609513, "loss": 0.6037, "step": 27540 }, { "epoch": 1.3984211394702442, "grad_norm": 0.026365249456751477, "learning_rate": 0.0002511903938142263, "loss": 0.5715, "step": 27545 }, { "epoch": 1.3986749755682757, "grad_norm": 0.0287392446620063, "learning_rate": 0.0002509982675924421, "loss": 0.578, "step": 27550 }, { "epoch": 1.398928811666307, "grad_norm": 0.026968382468763485, "learning_rate": 0.00025080619024845643, "loss": 0.6072, "step": 27555 }, { "epoch": 1.3991826477643385, "grad_norm": 0.026305045609763874, "learning_rate": 0.0002506141618199727, "loss": 0.5758, "step": 27560 }, { "epoch": 1.39943648386237, "grad_norm": 0.02828995676248506, "learning_rate": 0.0002504221823446853, "loss": 0.5948, "step": 27565 }, { "epoch": 1.3996903199604016, "grad_norm": 0.02519771250854929, "learning_rate": 0.00025023025186027905, "loss": 0.5996, "step": 27570 }, { "epoch": 1.3999441560584331, "grad_norm": 0.02906458236314542, "learning_rate": 0.0002500383704044286, "loss": 0.596, "step": 27575 }, { "epoch": 1.4001979921564645, "grad_norm": 0.027445328866923604, "learning_rate": 0.00024984653801479967, "loss": 0.5821, "step": 27580 }, { "epoch": 1.400451828254496, "grad_norm": 0.029691379418635314, "learning_rate": 0.0002496547547290476, "loss": 0.5991, "step": 27585 }, { "epoch": 1.4007056643525275, "grad_norm": 0.03007386065470511, "learning_rate": 0.0002494630205848189, "loss": 0.59, "step": 27590 }, { "epoch": 1.400959500450559, "grad_norm": 0.02992801871704479, "learning_rate": 0.0002492713356197497, "loss": 0.594, "step": 27595 }, { "epoch": 1.4012133365485906, "grad_norm": 0.026536134299555703, "learning_rate": 0.0002490796998714671, "loss": 0.5884, "step": 27600 }, { "epoch": 1.4014671726466221, "grad_norm": 0.027001879309926258, "learning_rate": 0.0002488881133775878, "loss": 0.5784, "step": 27605 }, { "epoch": 1.4017210087446537, "grad_norm": 0.028995385518873307, "learning_rate": 0.00024869657617571984, "loss": 0.6025, "step": 27610 }, { "epoch": 1.4019748448426852, "grad_norm": 0.03005841997116499, "learning_rate": 0.00024850508830346046, "loss": 0.6134, "step": 27615 }, { "epoch": 1.4022286809407165, "grad_norm": 0.026505338210080408, "learning_rate": 0.0002483136497983983, "loss": 0.5873, "step": 27620 }, { "epoch": 1.402482517038748, "grad_norm": 0.027414150261769618, "learning_rate": 0.00024812226069811114, "loss": 0.5703, "step": 27625 }, { "epoch": 1.4027363531367796, "grad_norm": 0.030387632352669003, "learning_rate": 0.00024793092104016844, "loss": 0.6308, "step": 27630 }, { "epoch": 1.4029901892348111, "grad_norm": 0.029978128003972274, "learning_rate": 0.00024773963086212867, "loss": 0.5977, "step": 27635 }, { "epoch": 1.4032440253328426, "grad_norm": 0.029039258141387286, "learning_rate": 0.0002475483902015416, "loss": 0.5755, "step": 27640 }, { "epoch": 1.403497861430874, "grad_norm": 0.028266041579076178, "learning_rate": 0.00024735719909594635, "loss": 0.6333, "step": 27645 }, { "epoch": 1.4037516975289055, "grad_norm": 0.026917772162637712, "learning_rate": 0.00024716605758287315, "loss": 0.6185, "step": 27650 }, { "epoch": 1.404005533626937, "grad_norm": 0.028436549242291716, "learning_rate": 0.00024697496569984177, "loss": 0.5981, "step": 27655 }, { "epoch": 1.4042593697249686, "grad_norm": 0.03430244346762299, "learning_rate": 0.000246783923484363, "loss": 0.5975, "step": 27660 }, { "epoch": 1.404513205823, "grad_norm": 0.02758967723348374, "learning_rate": 0.0002465929309739371, "loss": 0.5822, "step": 27665 }, { "epoch": 1.4047670419210316, "grad_norm": 0.0298251552152947, "learning_rate": 0.0002464019882060553, "loss": 0.5705, "step": 27670 }, { "epoch": 1.4050208780190632, "grad_norm": 0.02641755276146093, "learning_rate": 0.0002462110952181982, "loss": 0.5733, "step": 27675 }, { "epoch": 1.4052747141170947, "grad_norm": 0.02813784652141456, "learning_rate": 0.0002460202520478378, "loss": 0.5835, "step": 27680 }, { "epoch": 1.405528550215126, "grad_norm": 0.02758910363174736, "learning_rate": 0.0002458294587324351, "loss": 0.5751, "step": 27685 }, { "epoch": 1.4057823863131576, "grad_norm": 0.02498671819751416, "learning_rate": 0.0002456387153094421, "loss": 0.6, "step": 27690 }, { "epoch": 1.406036222411189, "grad_norm": 0.02633413491717013, "learning_rate": 0.000245448021816301, "loss": 0.5687, "step": 27695 }, { "epoch": 1.4062900585092206, "grad_norm": 0.02519991212515768, "learning_rate": 0.00024525737829044354, "loss": 0.5952, "step": 27700 }, { "epoch": 1.4065438946072522, "grad_norm": 0.02828758149974822, "learning_rate": 0.0002450667847692925, "loss": 0.6298, "step": 27705 }, { "epoch": 1.4067977307052835, "grad_norm": 0.05916751484138233, "learning_rate": 0.00024487624129026017, "loss": 0.5589, "step": 27710 }, { "epoch": 1.407051566803315, "grad_norm": 0.02629514197497406, "learning_rate": 0.00024468574789074946, "loss": 0.5781, "step": 27715 }, { "epoch": 1.4073054029013465, "grad_norm": 0.028551177546181625, "learning_rate": 0.000244495304608153, "loss": 0.5846, "step": 27720 }, { "epoch": 1.407559238999378, "grad_norm": 0.02799595790520515, "learning_rate": 0.0002443049114798543, "loss": 0.5831, "step": 27725 }, { "epoch": 1.4078130750974096, "grad_norm": 0.028016860062046147, "learning_rate": 0.00024411456854322612, "loss": 0.5962, "step": 27730 }, { "epoch": 1.4080669111954411, "grad_norm": 0.027906958130300186, "learning_rate": 0.0002439242758356322, "loss": 0.5916, "step": 27735 }, { "epoch": 1.4083207472934727, "grad_norm": 0.02709303328912745, "learning_rate": 0.0002437340333944257, "loss": 0.5678, "step": 27740 }, { "epoch": 1.4085745833915042, "grad_norm": 0.027730855873762585, "learning_rate": 0.00024354384125695045, "loss": 0.6114, "step": 27745 }, { "epoch": 1.4088284194895357, "grad_norm": 0.028971247818475872, "learning_rate": 0.00024335369946054027, "loss": 0.5904, "step": 27750 }, { "epoch": 1.409082255587567, "grad_norm": 0.027054850783141123, "learning_rate": 0.00024316360804251907, "loss": 0.5997, "step": 27755 }, { "epoch": 1.4093360916855986, "grad_norm": 0.02610081065457022, "learning_rate": 0.0002429735670402007, "loss": 0.5669, "step": 27760 }, { "epoch": 1.4095899277836301, "grad_norm": 0.029937776570278858, "learning_rate": 0.00024278357649088945, "loss": 0.6173, "step": 27765 }, { "epoch": 1.4098437638816617, "grad_norm": 0.02992600434310274, "learning_rate": 0.00024259363643187922, "loss": 0.5996, "step": 27770 }, { "epoch": 1.4100975999796932, "grad_norm": 0.025708201306323113, "learning_rate": 0.00024240374690045468, "loss": 0.5938, "step": 27775 }, { "epoch": 1.4103514360777245, "grad_norm": 0.02772590194536299, "learning_rate": 0.00024221390793388977, "loss": 0.6133, "step": 27780 }, { "epoch": 1.410605272175756, "grad_norm": 0.027004316817502275, "learning_rate": 0.00024202411956944937, "loss": 0.605, "step": 27785 }, { "epoch": 1.4108591082737876, "grad_norm": 0.03005561091587619, "learning_rate": 0.00024183438184438761, "loss": 0.6381, "step": 27790 }, { "epoch": 1.4111129443718191, "grad_norm": 0.03337887742512655, "learning_rate": 0.00024164469479594935, "loss": 0.6335, "step": 27795 }, { "epoch": 1.4113667804698506, "grad_norm": 0.027045495896146663, "learning_rate": 0.00024145505846136895, "loss": 0.6242, "step": 27800 }, { "epoch": 1.4116206165678822, "grad_norm": 0.02922803176633179, "learning_rate": 0.0002412654728778712, "loss": 0.6246, "step": 27805 }, { "epoch": 1.4118744526659137, "grad_norm": 0.027786144858170606, "learning_rate": 0.00024107593808267102, "loss": 0.5796, "step": 27810 }, { "epoch": 1.4121282887639452, "grad_norm": 0.030051734408088518, "learning_rate": 0.00024088645411297273, "loss": 0.6018, "step": 27815 }, { "epoch": 1.4123821248619766, "grad_norm": 0.029650007379785754, "learning_rate": 0.00024069702100597146, "loss": 0.5843, "step": 27820 }, { "epoch": 1.412635960960008, "grad_norm": 0.03232515510847507, "learning_rate": 0.00024050763879885167, "loss": 0.6125, "step": 27825 }, { "epoch": 1.4128897970580396, "grad_norm": 0.029714401649393443, "learning_rate": 0.00024031830752878854, "loss": 0.6226, "step": 27830 }, { "epoch": 1.4131436331560712, "grad_norm": 0.029918397177202426, "learning_rate": 0.00024012902723294632, "loss": 0.6258, "step": 27835 }, { "epoch": 1.4133974692541027, "grad_norm": 0.026741854882360205, "learning_rate": 0.00023993979794848037, "loss": 0.592, "step": 27840 }, { "epoch": 1.413651305352134, "grad_norm": 0.04209026557561846, "learning_rate": 0.00023975061971253492, "loss": 0.5999, "step": 27845 }, { "epoch": 1.4139051414501655, "grad_norm": 0.026388685856967513, "learning_rate": 0.00023956149256224512, "loss": 0.5441, "step": 27850 }, { "epoch": 1.414158977548197, "grad_norm": 0.027646045606755605, "learning_rate": 0.0002393724165347354, "loss": 0.5943, "step": 27855 }, { "epoch": 1.4144128136462286, "grad_norm": 0.026686586691547946, "learning_rate": 0.0002391833916671207, "loss": 0.6315, "step": 27860 }, { "epoch": 1.4146666497442602, "grad_norm": 0.02591615436730263, "learning_rate": 0.0002389944179965052, "loss": 0.5768, "step": 27865 }, { "epoch": 1.4149204858422917, "grad_norm": 0.026465695744161518, "learning_rate": 0.00023880549555998416, "loss": 0.5888, "step": 27870 }, { "epoch": 1.4151743219403232, "grad_norm": 0.027810742147278425, "learning_rate": 0.00023861662439464155, "loss": 0.6325, "step": 27875 }, { "epoch": 1.4154281580383548, "grad_norm": 0.028272659478594427, "learning_rate": 0.00023842780453755231, "loss": 0.5991, "step": 27880 }, { "epoch": 1.415681994136386, "grad_norm": 0.026041772696430304, "learning_rate": 0.00023823903602578035, "loss": 0.5647, "step": 27885 }, { "epoch": 1.4159358302344176, "grad_norm": 0.028679105947985135, "learning_rate": 0.0002380503188963804, "loss": 0.635, "step": 27890 }, { "epoch": 1.4161896663324491, "grad_norm": 0.027078809225221597, "learning_rate": 0.00023786165318639635, "loss": 0.6235, "step": 27895 }, { "epoch": 1.4164435024304807, "grad_norm": 0.025233847855019444, "learning_rate": 0.00023767303893286262, "loss": 0.594, "step": 27900 }, { "epoch": 1.4166973385285122, "grad_norm": 0.027608500724249696, "learning_rate": 0.00023748447617280322, "loss": 0.5772, "step": 27905 }, { "epoch": 1.4169511746265435, "grad_norm": 0.03033817859877912, "learning_rate": 0.00023729596494323173, "loss": 0.5814, "step": 27910 }, { "epoch": 1.417205010724575, "grad_norm": 0.029449843396241064, "learning_rate": 0.00023710750528115244, "loss": 0.6069, "step": 27915 }, { "epoch": 1.4174588468226066, "grad_norm": 0.027902394876854728, "learning_rate": 0.00023691909722355864, "loss": 0.6014, "step": 27920 }, { "epoch": 1.4177126829206381, "grad_norm": 0.025672343645022543, "learning_rate": 0.00023673074080743405, "loss": 0.6164, "step": 27925 }, { "epoch": 1.4179665190186697, "grad_norm": 0.027198042616836695, "learning_rate": 0.00023654243606975213, "loss": 0.6365, "step": 27930 }, { "epoch": 1.4182203551167012, "grad_norm": 0.02714597560282828, "learning_rate": 0.0002363541830474763, "loss": 0.6558, "step": 27935 }, { "epoch": 1.4184741912147327, "grad_norm": 0.026824587655472475, "learning_rate": 0.00023616598177755938, "loss": 0.599, "step": 27940 }, { "epoch": 1.4187280273127643, "grad_norm": 0.0255053756052806, "learning_rate": 0.0002359778322969447, "loss": 0.5997, "step": 27945 }, { "epoch": 1.4189818634107956, "grad_norm": 0.026207224618590923, "learning_rate": 0.00023578973464256464, "loss": 0.6352, "step": 27950 }, { "epoch": 1.419235699508827, "grad_norm": 0.030819226561158333, "learning_rate": 0.0002356016888513423, "loss": 0.6136, "step": 27955 }, { "epoch": 1.4194895356068586, "grad_norm": 0.02692628830745274, "learning_rate": 0.00023541369496018967, "loss": 0.5905, "step": 27960 }, { "epoch": 1.4197433717048902, "grad_norm": 0.026195281631991595, "learning_rate": 0.0002352257530060094, "loss": 0.5954, "step": 27965 }, { "epoch": 1.4199972078029217, "grad_norm": 0.026370435719384692, "learning_rate": 0.00023503786302569318, "loss": 0.5819, "step": 27970 }, { "epoch": 1.420251043900953, "grad_norm": 0.025082265041354305, "learning_rate": 0.0002348500250561233, "loss": 0.5691, "step": 27975 }, { "epoch": 1.4205048799989846, "grad_norm": 0.03122050689598019, "learning_rate": 0.00023466223913417105, "loss": 0.5972, "step": 27980 }, { "epoch": 1.420758716097016, "grad_norm": 0.0273352253205753, "learning_rate": 0.00023447450529669796, "loss": 0.5975, "step": 27985 }, { "epoch": 1.4210125521950476, "grad_norm": 0.028447434041621446, "learning_rate": 0.00023428682358055553, "loss": 0.625, "step": 27990 }, { "epoch": 1.4212663882930792, "grad_norm": 0.028164347197496352, "learning_rate": 0.00023409919402258433, "loss": 0.5933, "step": 27995 }, { "epoch": 1.4215202243911107, "grad_norm": 0.027928650064211774, "learning_rate": 0.00023391161665961546, "loss": 0.6225, "step": 28000 }, { "epoch": 1.4217740604891422, "grad_norm": 0.027334834926276308, "learning_rate": 0.00023372409152846912, "loss": 0.5818, "step": 28005 }, { "epoch": 1.4220278965871738, "grad_norm": 0.02999616101577759, "learning_rate": 0.00023353661866595582, "loss": 0.639, "step": 28010 }, { "epoch": 1.422281732685205, "grad_norm": 0.02497658474399228, "learning_rate": 0.00023334919810887527, "loss": 0.5251, "step": 28015 }, { "epoch": 1.4225355687832366, "grad_norm": 0.028941144028388262, "learning_rate": 0.0002331618298940176, "loss": 0.6248, "step": 28020 }, { "epoch": 1.4227894048812681, "grad_norm": 0.7065681119538179, "learning_rate": 0.00023297451405816173, "loss": 0.5702, "step": 28025 }, { "epoch": 1.4230432409792997, "grad_norm": 0.043995954691285484, "learning_rate": 0.00023278725063807733, "loss": 0.6029, "step": 28030 }, { "epoch": 1.4232970770773312, "grad_norm": 0.029402758070784525, "learning_rate": 0.0002326000396705228, "loss": 0.5996, "step": 28035 }, { "epoch": 1.4235509131753625, "grad_norm": 0.03401656187553716, "learning_rate": 0.0002324128811922472, "loss": 0.6191, "step": 28040 }, { "epoch": 1.423804749273394, "grad_norm": 0.02871205721447534, "learning_rate": 0.00023222577523998816, "loss": 0.5886, "step": 28045 }, { "epoch": 1.4240585853714256, "grad_norm": 0.029894044884989167, "learning_rate": 0.00023203872185047442, "loss": 0.6661, "step": 28050 }, { "epoch": 1.4243124214694571, "grad_norm": 0.04137469933429975, "learning_rate": 0.00023185172106042308, "loss": 0.5937, "step": 28055 }, { "epoch": 1.4245662575674887, "grad_norm": 0.03609405323666338, "learning_rate": 0.00023166477290654185, "loss": 0.612, "step": 28060 }, { "epoch": 1.4248200936655202, "grad_norm": 0.030181807219110412, "learning_rate": 0.00023147787742552734, "loss": 0.6006, "step": 28065 }, { "epoch": 1.4250739297635517, "grad_norm": 0.02895726515882961, "learning_rate": 0.00023129103465406654, "loss": 0.5939, "step": 28070 }, { "epoch": 1.4253277658615833, "grad_norm": 0.028504928041189634, "learning_rate": 0.00023110424462883538, "loss": 0.6083, "step": 28075 }, { "epoch": 1.4255816019596148, "grad_norm": 0.027361211659975068, "learning_rate": 0.00023091750738650024, "loss": 0.5784, "step": 28080 }, { "epoch": 1.4258354380576461, "grad_norm": 0.028064062476795857, "learning_rate": 0.00023073082296371628, "loss": 0.5772, "step": 28085 }, { "epoch": 1.4260892741556777, "grad_norm": 0.02796172562122104, "learning_rate": 0.0002305441913971291, "loss": 0.5702, "step": 28090 }, { "epoch": 1.4263431102537092, "grad_norm": 0.02506604119986121, "learning_rate": 0.0002303576127233732, "loss": 0.6069, "step": 28095 }, { "epoch": 1.4265969463517407, "grad_norm": 0.029303894859980712, "learning_rate": 0.0002301710869790734, "loss": 0.6293, "step": 28100 }, { "epoch": 1.4268507824497723, "grad_norm": 0.027510777131197702, "learning_rate": 0.00022998461420084342, "loss": 0.6409, "step": 28105 }, { "epoch": 1.4271046185478036, "grad_norm": 0.026077758103800044, "learning_rate": 0.00022979819442528715, "loss": 0.5985, "step": 28110 }, { "epoch": 1.427358454645835, "grad_norm": 0.027229949390517313, "learning_rate": 0.00022961182768899797, "loss": 0.5963, "step": 28115 }, { "epoch": 1.4276122907438666, "grad_norm": 0.03053682693799676, "learning_rate": 0.00022942551402855839, "loss": 0.6272, "step": 28120 }, { "epoch": 1.4278661268418982, "grad_norm": 0.026632540244820476, "learning_rate": 0.0002292392534805412, "loss": 0.6126, "step": 28125 }, { "epoch": 1.4281199629399297, "grad_norm": 0.02778008201641172, "learning_rate": 0.0002290530460815082, "loss": 0.6181, "step": 28130 }, { "epoch": 1.4283737990379612, "grad_norm": 0.02824326439333104, "learning_rate": 0.00022886689186801113, "loss": 0.6314, "step": 28135 }, { "epoch": 1.4286276351359928, "grad_norm": 0.026254474417235964, "learning_rate": 0.00022868079087659087, "loss": 0.5993, "step": 28140 }, { "epoch": 1.4288814712340243, "grad_norm": 0.029722073499021288, "learning_rate": 0.0002284947431437785, "loss": 0.6123, "step": 28145 }, { "epoch": 1.4291353073320556, "grad_norm": 0.027867489579541695, "learning_rate": 0.00022830874870609385, "loss": 0.5819, "step": 28150 }, { "epoch": 1.4293891434300872, "grad_norm": 0.0278106643779377, "learning_rate": 0.00022812280760004718, "loss": 0.5623, "step": 28155 }, { "epoch": 1.4296429795281187, "grad_norm": 0.02563831535242637, "learning_rate": 0.00022793691986213726, "loss": 0.5924, "step": 28160 }, { "epoch": 1.4298968156261502, "grad_norm": 0.029535851156954226, "learning_rate": 0.00022775108552885336, "loss": 0.6108, "step": 28165 }, { "epoch": 1.4301506517241818, "grad_norm": 0.029564507785070054, "learning_rate": 0.00022756530463667336, "loss": 0.5834, "step": 28170 }, { "epoch": 1.430404487822213, "grad_norm": 0.033202066211584516, "learning_rate": 0.00022737957722206576, "loss": 0.5872, "step": 28175 }, { "epoch": 1.4306583239202446, "grad_norm": 0.02490162219927689, "learning_rate": 0.00022719390332148743, "loss": 0.5715, "step": 28180 }, { "epoch": 1.4309121600182761, "grad_norm": 0.027993587062555003, "learning_rate": 0.0002270082829713856, "loss": 0.6175, "step": 28185 }, { "epoch": 1.4311659961163077, "grad_norm": 0.030977448438117967, "learning_rate": 0.00022682271620819622, "loss": 0.6097, "step": 28190 }, { "epoch": 1.4314198322143392, "grad_norm": 0.02971844853671126, "learning_rate": 0.00022663720306834544, "loss": 0.6095, "step": 28195 }, { "epoch": 1.4316736683123708, "grad_norm": 0.027295614623872397, "learning_rate": 0.00022645174358824834, "loss": 0.6194, "step": 28200 }, { "epoch": 1.4319275044104023, "grad_norm": 0.028953501459607615, "learning_rate": 0.00022626633780430995, "loss": 0.6086, "step": 28205 }, { "epoch": 1.4321813405084338, "grad_norm": 0.02739444429410844, "learning_rate": 0.00022608098575292412, "loss": 0.5845, "step": 28210 }, { "epoch": 1.4324351766064651, "grad_norm": 2.7470454875620964, "learning_rate": 0.00022589568747047496, "loss": 0.6188, "step": 28215 }, { "epoch": 1.4326890127044967, "grad_norm": 0.0322766176117612, "learning_rate": 0.00022571044299333522, "loss": 0.5682, "step": 28220 }, { "epoch": 1.4329428488025282, "grad_norm": 0.029816245853754916, "learning_rate": 0.0002255252523578678, "loss": 0.5908, "step": 28225 }, { "epoch": 1.4331966849005597, "grad_norm": 0.028382174558632466, "learning_rate": 0.0002253401156004244, "loss": 0.5703, "step": 28230 }, { "epoch": 1.4334505209985913, "grad_norm": 0.026335202932884865, "learning_rate": 0.00022515503275734655, "loss": 0.6047, "step": 28235 }, { "epoch": 1.4337043570966226, "grad_norm": 0.03053231140166231, "learning_rate": 0.0002249700038649653, "loss": 0.6084, "step": 28240 }, { "epoch": 1.4339581931946541, "grad_norm": 0.030516561155204684, "learning_rate": 0.00022478502895960056, "loss": 0.669, "step": 28245 }, { "epoch": 1.4342120292926857, "grad_norm": 0.02603166268590988, "learning_rate": 0.00022460010807756232, "loss": 0.5879, "step": 28250 }, { "epoch": 1.4344658653907172, "grad_norm": 0.029372810796470618, "learning_rate": 0.00022441524125514924, "loss": 0.6073, "step": 28255 }, { "epoch": 1.4347197014887487, "grad_norm": 0.028729473926340295, "learning_rate": 0.0002242304285286501, "loss": 0.6097, "step": 28260 }, { "epoch": 1.4349735375867803, "grad_norm": 0.026913317285256845, "learning_rate": 0.0002240456699343425, "loss": 0.5805, "step": 28265 }, { "epoch": 1.4352273736848118, "grad_norm": 0.024545569034536665, "learning_rate": 0.00022386096550849384, "loss": 0.5658, "step": 28270 }, { "epoch": 1.4354812097828433, "grad_norm": 0.02858062985047487, "learning_rate": 0.00022367631528736037, "loss": 0.5901, "step": 28275 }, { "epoch": 1.4357350458808746, "grad_norm": 0.027813302997085357, "learning_rate": 0.00022349171930718836, "loss": 0.602, "step": 28280 }, { "epoch": 1.4359888819789062, "grad_norm": 0.027800288867623858, "learning_rate": 0.0002233071776042127, "loss": 0.6115, "step": 28285 }, { "epoch": 1.4362427180769377, "grad_norm": 0.025484983349850904, "learning_rate": 0.00022312269021465826, "loss": 0.5612, "step": 28290 }, { "epoch": 1.4364965541749692, "grad_norm": 0.028113567289228137, "learning_rate": 0.00022293825717473891, "loss": 0.6208, "step": 28295 }, { "epoch": 1.4367503902730008, "grad_norm": 0.028096760813874345, "learning_rate": 0.0002227538785206582, "loss": 0.6375, "step": 28300 }, { "epoch": 1.437004226371032, "grad_norm": 0.026735048211649796, "learning_rate": 0.0002225695542886083, "loss": 0.5982, "step": 28305 }, { "epoch": 1.4372580624690636, "grad_norm": 0.02888754514183744, "learning_rate": 0.00022238528451477152, "loss": 0.6013, "step": 28310 }, { "epoch": 1.4375118985670952, "grad_norm": 0.026209359028790218, "learning_rate": 0.0002222010692353188, "loss": 0.5802, "step": 28315 }, { "epoch": 1.4377657346651267, "grad_norm": 0.0278770638679039, "learning_rate": 0.00022201690848641092, "loss": 0.5938, "step": 28320 }, { "epoch": 1.4380195707631582, "grad_norm": 0.029163887221016913, "learning_rate": 0.00022183280230419746, "loss": 0.5715, "step": 28325 }, { "epoch": 1.4382734068611898, "grad_norm": 0.03142767010115254, "learning_rate": 0.00022164875072481788, "loss": 0.6201, "step": 28330 }, { "epoch": 1.4385272429592213, "grad_norm": 0.03244669763445163, "learning_rate": 0.00022146475378440018, "loss": 0.61, "step": 28335 }, { "epoch": 1.4387810790572528, "grad_norm": 0.026214135518087777, "learning_rate": 0.00022128081151906248, "loss": 0.5999, "step": 28340 }, { "epoch": 1.4390349151552844, "grad_norm": 0.025727284155517928, "learning_rate": 0.00022109692396491128, "loss": 0.6108, "step": 28345 }, { "epoch": 1.4392887512533157, "grad_norm": 0.02827786696661331, "learning_rate": 0.00022091309115804305, "loss": 0.6088, "step": 28350 }, { "epoch": 1.4395425873513472, "grad_norm": 0.028617665597713223, "learning_rate": 0.0002207293131345434, "loss": 0.6123, "step": 28355 }, { "epoch": 1.4397964234493787, "grad_norm": 0.02769816605647136, "learning_rate": 0.00022054558993048667, "loss": 0.5984, "step": 28360 }, { "epoch": 1.4400502595474103, "grad_norm": 0.026338003685430017, "learning_rate": 0.00022036192158193717, "loss": 0.6475, "step": 28365 }, { "epoch": 1.4403040956454418, "grad_norm": 0.026259710742686092, "learning_rate": 0.00022017830812494778, "loss": 0.5872, "step": 28370 }, { "epoch": 1.4405579317434731, "grad_norm": 0.027125924775797734, "learning_rate": 0.0002199947495955612, "loss": 0.6366, "step": 28375 }, { "epoch": 1.4408117678415047, "grad_norm": 0.03345426510522094, "learning_rate": 0.00021981124602980868, "loss": 0.6145, "step": 28380 }, { "epoch": 1.4410656039395362, "grad_norm": 0.027078923373312414, "learning_rate": 0.00021962779746371148, "loss": 0.5561, "step": 28385 }, { "epoch": 1.4413194400375677, "grad_norm": 0.027343443531646024, "learning_rate": 0.0002194444039332792, "loss": 0.6086, "step": 28390 }, { "epoch": 1.4415732761355993, "grad_norm": 0.027503279135057613, "learning_rate": 0.00021926106547451153, "loss": 0.6307, "step": 28395 }, { "epoch": 1.4418271122336308, "grad_norm": 0.029234370991232602, "learning_rate": 0.00021907778212339646, "loss": 0.5997, "step": 28400 }, { "epoch": 1.4420809483316623, "grad_norm": 0.02961246126338173, "learning_rate": 0.00021889455391591197, "loss": 0.6154, "step": 28405 }, { "epoch": 1.4423347844296939, "grad_norm": 0.029801734982311034, "learning_rate": 0.00021871138088802434, "loss": 0.5751, "step": 28410 }, { "epoch": 1.4425886205277252, "grad_norm": 0.02522657505048561, "learning_rate": 0.00021852826307569017, "loss": 0.5575, "step": 28415 }, { "epoch": 1.4428424566257567, "grad_norm": 0.025845412529967884, "learning_rate": 0.00021834520051485412, "loss": 0.619, "step": 28420 }, { "epoch": 1.4430962927237883, "grad_norm": 0.027865534614470746, "learning_rate": 0.00021816219324145082, "loss": 0.601, "step": 28425 }, { "epoch": 1.4433501288218198, "grad_norm": 0.028376724963727683, "learning_rate": 0.00021797924129140323, "loss": 0.6092, "step": 28430 }, { "epoch": 1.4436039649198513, "grad_norm": 0.03055751302326067, "learning_rate": 0.00021779634470062433, "loss": 0.6097, "step": 28435 }, { "epoch": 1.4438578010178826, "grad_norm": 0.03132460762307156, "learning_rate": 0.0002176135035050154, "loss": 0.5735, "step": 28440 }, { "epoch": 1.4441116371159142, "grad_norm": 0.03021106600005474, "learning_rate": 0.00021743071774046768, "loss": 0.6471, "step": 28445 }, { "epoch": 1.4443654732139457, "grad_norm": 0.042090534896218786, "learning_rate": 0.00021724798744286072, "loss": 0.592, "step": 28450 }, { "epoch": 1.4446193093119772, "grad_norm": 0.0741269812817319, "learning_rate": 0.00021706531264806394, "loss": 0.6134, "step": 28455 }, { "epoch": 1.4448731454100088, "grad_norm": 0.027577200363512844, "learning_rate": 0.00021688269339193513, "loss": 0.5969, "step": 28460 }, { "epoch": 1.4451269815080403, "grad_norm": 0.03272997787660522, "learning_rate": 0.00021670012971032184, "loss": 0.5884, "step": 28465 }, { "epoch": 1.4453808176060718, "grad_norm": 0.027333880978822115, "learning_rate": 0.00021651762163906008, "loss": 0.5973, "step": 28470 }, { "epoch": 1.4456346537041034, "grad_norm": 0.027565925152795153, "learning_rate": 0.0002163351692139755, "loss": 0.5977, "step": 28475 }, { "epoch": 1.4458884898021347, "grad_norm": 0.027075782594696784, "learning_rate": 0.00021615277247088278, "loss": 0.606, "step": 28480 }, { "epoch": 1.4461423259001662, "grad_norm": 0.027608529444984466, "learning_rate": 0.00021597043144558505, "loss": 0.5655, "step": 28485 }, { "epoch": 1.4463961619981978, "grad_norm": 0.02952678307296711, "learning_rate": 0.00021578814617387537, "loss": 0.6062, "step": 28490 }, { "epoch": 1.4466499980962293, "grad_norm": 0.0271010303100182, "learning_rate": 0.00021560591669153505, "loss": 0.5983, "step": 28495 }, { "epoch": 1.4469038341942608, "grad_norm": 0.02811599753239695, "learning_rate": 0.00021542374303433522, "loss": 0.567, "step": 28500 }, { "epoch": 1.4471576702922921, "grad_norm": 0.026746236853554116, "learning_rate": 0.00021524162523803525, "loss": 0.6026, "step": 28505 }, { "epoch": 1.4474115063903237, "grad_norm": 0.025449634432498976, "learning_rate": 0.00021505956333838432, "loss": 0.6349, "step": 28510 }, { "epoch": 1.4476653424883552, "grad_norm": 0.025387579599613404, "learning_rate": 0.00021487755737111997, "loss": 0.6212, "step": 28515 }, { "epoch": 1.4479191785863867, "grad_norm": 0.027783319664029686, "learning_rate": 0.00021469560737196936, "loss": 0.5999, "step": 28520 }, { "epoch": 1.4481730146844183, "grad_norm": 0.02507857753571605, "learning_rate": 0.00021451371337664803, "loss": 0.5961, "step": 28525 }, { "epoch": 1.4484268507824498, "grad_norm": 0.027789420777753295, "learning_rate": 0.00021433187542086102, "loss": 0.6456, "step": 28530 }, { "epoch": 1.4486806868804813, "grad_norm": 0.027155374787467695, "learning_rate": 0.0002141500935403023, "loss": 0.6184, "step": 28535 }, { "epoch": 1.4489345229785129, "grad_norm": 0.02682080255081656, "learning_rate": 0.0002139683677706548, "loss": 0.5923, "step": 28540 }, { "epoch": 1.4491883590765442, "grad_norm": 0.025863679722394794, "learning_rate": 0.00021378669814759016, "loss": 0.5906, "step": 28545 }, { "epoch": 1.4494421951745757, "grad_norm": 0.02648283677127344, "learning_rate": 0.00021360508470676947, "loss": 0.5365, "step": 28550 }, { "epoch": 1.4496960312726073, "grad_norm": 0.026805411809686704, "learning_rate": 0.00021342352748384224, "loss": 0.5699, "step": 28555 }, { "epoch": 1.4499498673706388, "grad_norm": 0.026384268321927216, "learning_rate": 0.00021324202651444758, "loss": 0.5711, "step": 28560 }, { "epoch": 1.4502037034686703, "grad_norm": 0.0266107357820126, "learning_rate": 0.00021306058183421289, "loss": 0.5829, "step": 28565 }, { "epoch": 1.4504575395667016, "grad_norm": 0.13024683109337445, "learning_rate": 0.00021287919347875517, "loss": 0.6254, "step": 28570 }, { "epoch": 1.4507113756647332, "grad_norm": 0.026908144852354092, "learning_rate": 0.00021269786148367975, "loss": 0.6171, "step": 28575 }, { "epoch": 1.4509652117627647, "grad_norm": 0.0285454717671817, "learning_rate": 0.00021251658588458151, "loss": 0.6295, "step": 28580 }, { "epoch": 1.4512190478607963, "grad_norm": 0.03183351737547352, "learning_rate": 0.00021233536671704363, "loss": 0.6236, "step": 28585 }, { "epoch": 1.4514728839588278, "grad_norm": 0.027175483358370178, "learning_rate": 0.00021215420401663864, "loss": 0.5973, "step": 28590 }, { "epoch": 1.4517267200568593, "grad_norm": 0.02723536790441386, "learning_rate": 0.0002119730978189281, "loss": 0.578, "step": 28595 }, { "epoch": 1.4519805561548909, "grad_norm": 0.029337158178012968, "learning_rate": 0.0002117920481594619, "loss": 0.6016, "step": 28600 }, { "epoch": 1.4522343922529224, "grad_norm": 0.028888326081591444, "learning_rate": 0.00021161105507377958, "loss": 0.5821, "step": 28605 }, { "epoch": 1.452488228350954, "grad_norm": 0.029070134512912207, "learning_rate": 0.00021143011859740875, "loss": 0.5718, "step": 28610 }, { "epoch": 1.4527420644489852, "grad_norm": 0.027947023170869348, "learning_rate": 0.00021124923876586672, "loss": 0.6054, "step": 28615 }, { "epoch": 1.4529959005470168, "grad_norm": 0.027726565217244172, "learning_rate": 0.0002110684156146589, "loss": 0.5804, "step": 28620 }, { "epoch": 1.4532497366450483, "grad_norm": 0.03008684576884019, "learning_rate": 0.00021088764917928044, "loss": 0.5927, "step": 28625 }, { "epoch": 1.4535035727430798, "grad_norm": 0.027638262323520124, "learning_rate": 0.0002107069394952144, "loss": 0.6171, "step": 28630 }, { "epoch": 1.4537574088411114, "grad_norm": 0.02686102230817948, "learning_rate": 0.00021052628659793367, "loss": 0.5892, "step": 28635 }, { "epoch": 1.4540112449391427, "grad_norm": 0.030355488513162217, "learning_rate": 0.00021034569052289908, "loss": 0.6113, "step": 28640 }, { "epoch": 1.4542650810371742, "grad_norm": 0.027764875665070143, "learning_rate": 0.00021016515130556113, "loss": 0.5915, "step": 28645 }, { "epoch": 1.4545189171352058, "grad_norm": 0.025720552517139766, "learning_rate": 0.0002099846689813582, "loss": 0.6, "step": 28650 }, { "epoch": 1.4547727532332373, "grad_norm": 0.030229644703784198, "learning_rate": 0.0002098042435857188, "loss": 0.5608, "step": 28655 }, { "epoch": 1.4550265893312688, "grad_norm": 0.028542760737995946, "learning_rate": 0.000209623875154059, "loss": 0.5953, "step": 28660 }, { "epoch": 1.4552804254293004, "grad_norm": 0.03144487478265786, "learning_rate": 0.00020944356372178458, "loss": 0.6161, "step": 28665 }, { "epoch": 1.455534261527332, "grad_norm": 0.028140809299302096, "learning_rate": 0.00020926330932428944, "loss": 0.5602, "step": 28670 }, { "epoch": 1.4557880976253634, "grad_norm": 0.029601794011240898, "learning_rate": 0.00020908311199695695, "loss": 0.5946, "step": 28675 }, { "epoch": 1.4560419337233947, "grad_norm": 0.03175489431656953, "learning_rate": 0.0002089029717751586, "loss": 0.6023, "step": 28680 }, { "epoch": 1.4562957698214263, "grad_norm": 0.029529980206639134, "learning_rate": 0.00020872288869425536, "loss": 0.626, "step": 28685 }, { "epoch": 1.4565496059194578, "grad_norm": 0.029108376973171042, "learning_rate": 0.0002085428627895963, "loss": 0.5836, "step": 28690 }, { "epoch": 1.4568034420174893, "grad_norm": 0.02730801551465893, "learning_rate": 0.00020836289409651993, "loss": 0.6135, "step": 28695 }, { "epoch": 1.4570572781155209, "grad_norm": 0.029814216482505165, "learning_rate": 0.0002081829826503529, "loss": 0.587, "step": 28700 }, { "epoch": 1.4573111142135522, "grad_norm": 0.025453935043998496, "learning_rate": 0.0002080031284864113, "loss": 0.5805, "step": 28705 }, { "epoch": 1.4575649503115837, "grad_norm": 0.029005831801503835, "learning_rate": 0.00020782333163999917, "loss": 0.598, "step": 28710 }, { "epoch": 1.4578187864096153, "grad_norm": 0.02895767659474218, "learning_rate": 0.00020764359214640998, "loss": 0.5747, "step": 28715 }, { "epoch": 1.4580726225076468, "grad_norm": 0.027627631193377883, "learning_rate": 0.0002074639100409258, "loss": 0.5917, "step": 28720 }, { "epoch": 1.4583264586056783, "grad_norm": 0.029031005425501587, "learning_rate": 0.0002072842853588171, "loss": 0.6176, "step": 28725 }, { "epoch": 1.4585802947037099, "grad_norm": 0.02794341043661665, "learning_rate": 0.00020710471813534354, "loss": 0.5872, "step": 28730 }, { "epoch": 1.4588341308017414, "grad_norm": 0.026676529480261998, "learning_rate": 0.00020692520840575297, "loss": 0.6122, "step": 28735 }, { "epoch": 1.459087966899773, "grad_norm": 0.030816196915599947, "learning_rate": 0.00020674575620528262, "loss": 0.6205, "step": 28740 }, { "epoch": 1.4593418029978042, "grad_norm": 0.026610145153685517, "learning_rate": 0.0002065663615691577, "loss": 0.6058, "step": 28745 }, { "epoch": 1.4595956390958358, "grad_norm": 0.026789153780571284, "learning_rate": 0.00020638702453259285, "loss": 0.5922, "step": 28750 }, { "epoch": 1.4598494751938673, "grad_norm": 0.03105094341173288, "learning_rate": 0.0002062077451307906, "loss": 0.5816, "step": 28755 }, { "epoch": 1.4601033112918989, "grad_norm": 0.025886251266935188, "learning_rate": 0.00020602852339894306, "loss": 0.5693, "step": 28760 }, { "epoch": 1.4603571473899304, "grad_norm": 0.027096876578589384, "learning_rate": 0.00020584935937223016, "loss": 0.6203, "step": 28765 }, { "epoch": 1.4606109834879617, "grad_norm": 0.027096493385234913, "learning_rate": 0.0002056702530858211, "loss": 0.588, "step": 28770 }, { "epoch": 1.4608648195859932, "grad_norm": 0.028575326475179557, "learning_rate": 0.00020549120457487354, "loss": 0.5792, "step": 28775 }, { "epoch": 1.4611186556840248, "grad_norm": 0.028428310458880685, "learning_rate": 0.00020531221387453392, "loss": 0.5604, "step": 28780 }, { "epoch": 1.4613724917820563, "grad_norm": 0.027234540763247987, "learning_rate": 0.000205133281019937, "loss": 0.5861, "step": 28785 }, { "epoch": 1.4616263278800878, "grad_norm": 0.027108255732135957, "learning_rate": 0.0002049544060462067, "loss": 0.5967, "step": 28790 }, { "epoch": 1.4618801639781194, "grad_norm": 0.03155844151224286, "learning_rate": 0.00020477558898845488, "loss": 0.6191, "step": 28795 }, { "epoch": 1.462134000076151, "grad_norm": 0.027823603884459078, "learning_rate": 0.00020459682988178285, "loss": 0.5866, "step": 28800 }, { "epoch": 1.4623878361741824, "grad_norm": 0.028671516977117742, "learning_rate": 0.0002044181287612798, "loss": 0.6067, "step": 28805 }, { "epoch": 1.4626416722722138, "grad_norm": 0.02613245738662167, "learning_rate": 0.00020423948566202415, "loss": 0.6163, "step": 28810 }, { "epoch": 1.4628955083702453, "grad_norm": 0.02735429880768207, "learning_rate": 0.00020406090061908234, "loss": 0.5681, "step": 28815 }, { "epoch": 1.4631493444682768, "grad_norm": 0.026767535802725718, "learning_rate": 0.00020388237366751006, "loss": 0.5767, "step": 28820 }, { "epoch": 1.4634031805663084, "grad_norm": 0.0275077326475287, "learning_rate": 0.00020370390484235096, "loss": 0.6039, "step": 28825 }, { "epoch": 1.46365701666434, "grad_norm": 0.02755121632141317, "learning_rate": 0.00020352549417863768, "loss": 0.6132, "step": 28830 }, { "epoch": 1.4639108527623712, "grad_norm": 0.028131349132866832, "learning_rate": 0.00020334714171139158, "loss": 0.5945, "step": 28835 }, { "epoch": 1.4641646888604027, "grad_norm": 0.028451221702669477, "learning_rate": 0.00020316884747562192, "loss": 0.5955, "step": 28840 }, { "epoch": 1.4644185249584343, "grad_norm": 0.025770698046250904, "learning_rate": 0.0002029906115063274, "loss": 0.5768, "step": 28845 }, { "epoch": 1.4646723610564658, "grad_norm": 0.028522304863107698, "learning_rate": 0.0002028124338384945, "loss": 0.5885, "step": 28850 }, { "epoch": 1.4649261971544973, "grad_norm": 0.029427179588936245, "learning_rate": 0.00020263431450709895, "loss": 0.6418, "step": 28855 }, { "epoch": 1.4651800332525289, "grad_norm": 0.025774860941947003, "learning_rate": 0.00020245625354710435, "loss": 0.5705, "step": 28860 }, { "epoch": 1.4654338693505604, "grad_norm": 0.028651643756858403, "learning_rate": 0.00020227825099346347, "loss": 0.5859, "step": 28865 }, { "epoch": 1.465687705448592, "grad_norm": 0.02654563143030565, "learning_rate": 0.00020210030688111701, "loss": 0.5724, "step": 28870 }, { "epoch": 1.4659415415466235, "grad_norm": 0.029573548663863875, "learning_rate": 0.00020192242124499488, "loss": 0.6148, "step": 28875 }, { "epoch": 1.4661953776446548, "grad_norm": 0.026503477435847626, "learning_rate": 0.00020174459412001473, "loss": 0.5713, "step": 28880 }, { "epoch": 1.4664492137426863, "grad_norm": 0.02656644377104431, "learning_rate": 0.00020156682554108357, "loss": 0.586, "step": 28885 }, { "epoch": 1.4667030498407179, "grad_norm": 0.035906175786054054, "learning_rate": 0.0002013891155430959, "loss": 0.5885, "step": 28890 }, { "epoch": 1.4669568859387494, "grad_norm": 0.028352314665057703, "learning_rate": 0.00020121146416093605, "loss": 0.5965, "step": 28895 }, { "epoch": 1.4672107220367807, "grad_norm": 0.02715097146749369, "learning_rate": 0.00020103387142947555, "loss": 0.6314, "step": 28900 }, { "epoch": 1.4674645581348122, "grad_norm": 0.03248958789815402, "learning_rate": 0.00020085633738357533, "loss": 0.6128, "step": 28905 }, { "epoch": 1.4677183942328438, "grad_norm": 0.02699733004580963, "learning_rate": 0.00020067886205808405, "loss": 0.6032, "step": 28910 }, { "epoch": 1.4679722303308753, "grad_norm": 0.027070989865454774, "learning_rate": 0.0002005014454878396, "loss": 0.5788, "step": 28915 }, { "epoch": 1.4682260664289069, "grad_norm": 0.028768182109558473, "learning_rate": 0.0002003240877076677, "loss": 0.5928, "step": 28920 }, { "epoch": 1.4684799025269384, "grad_norm": 0.029926616106308732, "learning_rate": 0.00020014678875238302, "loss": 0.6158, "step": 28925 }, { "epoch": 1.46873373862497, "grad_norm": 0.029806783821714106, "learning_rate": 0.00019996954865678817, "loss": 0.6086, "step": 28930 }, { "epoch": 1.4689875747230015, "grad_norm": 0.028975288017754362, "learning_rate": 0.00019979236745567487, "loss": 0.6344, "step": 28935 }, { "epoch": 1.469241410821033, "grad_norm": 0.025710231925539357, "learning_rate": 0.00019961524518382267, "loss": 0.5684, "step": 28940 }, { "epoch": 1.4694952469190643, "grad_norm": 0.027594544240868556, "learning_rate": 0.00019943818187599966, "loss": 0.5742, "step": 28945 }, { "epoch": 1.4697490830170958, "grad_norm": 0.028180161496410262, "learning_rate": 0.00019926117756696265, "loss": 0.5989, "step": 28950 }, { "epoch": 1.4700029191151274, "grad_norm": 0.026100884260046045, "learning_rate": 0.00019908423229145672, "loss": 0.6249, "step": 28955 }, { "epoch": 1.470256755213159, "grad_norm": 0.026365522220529233, "learning_rate": 0.00019890734608421552, "loss": 0.5836, "step": 28960 }, { "epoch": 1.4705105913111904, "grad_norm": 0.11187654043099779, "learning_rate": 0.00019873051897996053, "loss": 0.5693, "step": 28965 }, { "epoch": 1.4707644274092218, "grad_norm": 0.02768084086576473, "learning_rate": 0.0001985537510134024, "loss": 0.5797, "step": 28970 }, { "epoch": 1.4710182635072533, "grad_norm": 0.026113663422587387, "learning_rate": 0.00019837704221923946, "loss": 0.5671, "step": 28975 }, { "epoch": 1.4712720996052848, "grad_norm": 0.029611079271465657, "learning_rate": 0.00019820039263215917, "loss": 0.6026, "step": 28980 }, { "epoch": 1.4715259357033164, "grad_norm": 0.030770164002104573, "learning_rate": 0.00019802380228683646, "loss": 0.616, "step": 28985 }, { "epoch": 1.471779771801348, "grad_norm": 0.02929755639392888, "learning_rate": 0.00019784727121793566, "loss": 0.6141, "step": 28990 }, { "epoch": 1.4720336078993794, "grad_norm": 0.02815166979310672, "learning_rate": 0.00019767079946010852, "loss": 0.5818, "step": 28995 }, { "epoch": 1.472287443997411, "grad_norm": 0.028896894960946665, "learning_rate": 0.00019749438704799588, "loss": 0.5897, "step": 29000 }, { "epoch": 1.4725412800954425, "grad_norm": 0.031153800139819072, "learning_rate": 0.0001973180340162263, "loss": 0.5759, "step": 29005 }, { "epoch": 1.4727951161934738, "grad_norm": 0.031825507384690595, "learning_rate": 0.00019714174039941736, "loss": 0.6488, "step": 29010 }, { "epoch": 1.4730489522915053, "grad_norm": 0.0301928716303435, "learning_rate": 0.00019696550623217403, "loss": 0.6083, "step": 29015 }, { "epoch": 1.4733027883895369, "grad_norm": 0.027865518566078572, "learning_rate": 0.00019678933154909095, "loss": 0.5884, "step": 29020 }, { "epoch": 1.4735566244875684, "grad_norm": 0.02681982179775413, "learning_rate": 0.00019661321638475004, "loss": 0.5622, "step": 29025 }, { "epoch": 1.4738104605856, "grad_norm": 0.0255990067308323, "learning_rate": 0.00019643716077372153, "loss": 0.6128, "step": 29030 }, { "epoch": 1.4740642966836313, "grad_norm": 0.02718480165399722, "learning_rate": 0.0001962611647505647, "loss": 0.613, "step": 29035 }, { "epoch": 1.4743181327816628, "grad_norm": 0.027936349785240656, "learning_rate": 0.00019608522834982633, "loss": 0.6257, "step": 29040 }, { "epoch": 1.4745719688796943, "grad_norm": 0.027753181984867738, "learning_rate": 0.00019590935160604218, "loss": 0.5898, "step": 29045 }, { "epoch": 1.4748258049777259, "grad_norm": 0.029850984153863128, "learning_rate": 0.0001957335345537356, "loss": 0.5819, "step": 29050 }, { "epoch": 1.4750796410757574, "grad_norm": 0.026847240771003977, "learning_rate": 0.00019555777722741902, "loss": 0.593, "step": 29055 }, { "epoch": 1.475333477173789, "grad_norm": 0.05846742940626925, "learning_rate": 0.00019538207966159234, "loss": 0.6046, "step": 29060 }, { "epoch": 1.4755873132718205, "grad_norm": 0.026882796100611197, "learning_rate": 0.00019520644189074444, "loss": 0.5828, "step": 29065 }, { "epoch": 1.475841149369852, "grad_norm": 0.02800890650926337, "learning_rate": 0.00019503086394935182, "loss": 0.5807, "step": 29070 }, { "epoch": 1.4760949854678833, "grad_norm": 0.027036487091023322, "learning_rate": 0.00019485534587187977, "loss": 0.5445, "step": 29075 }, { "epoch": 1.4763488215659148, "grad_norm": 0.027304048131581295, "learning_rate": 0.00019467988769278154, "loss": 0.5785, "step": 29080 }, { "epoch": 1.4766026576639464, "grad_norm": 0.03050859890954667, "learning_rate": 0.00019450448944649895, "loss": 0.611, "step": 29085 }, { "epoch": 1.476856493761978, "grad_norm": 0.028139563325885665, "learning_rate": 0.00019432915116746136, "loss": 0.614, "step": 29090 }, { "epoch": 1.4771103298600095, "grad_norm": 0.02842847816273221, "learning_rate": 0.0001941538728900872, "loss": 0.5723, "step": 29095 }, { "epoch": 1.4773641659580408, "grad_norm": 0.02822746838530909, "learning_rate": 0.00019397865464878235, "loss": 0.587, "step": 29100 }, { "epoch": 1.4776180020560723, "grad_norm": 0.027059489175148242, "learning_rate": 0.00019380349647794165, "loss": 0.6099, "step": 29105 }, { "epoch": 1.4778718381541038, "grad_norm": 0.030036536033108498, "learning_rate": 0.00019362839841194747, "loss": 0.6003, "step": 29110 }, { "epoch": 1.4781256742521354, "grad_norm": 0.027843641977206218, "learning_rate": 0.00019345336048517094, "loss": 0.6085, "step": 29115 }, { "epoch": 1.478379510350167, "grad_norm": 0.02807514301754485, "learning_rate": 0.00019327838273197078, "loss": 0.6165, "step": 29120 }, { "epoch": 1.4786333464481984, "grad_norm": 0.02779696351937964, "learning_rate": 0.0001931034651866947, "loss": 0.6023, "step": 29125 }, { "epoch": 1.47888718254623, "grad_norm": 0.02963913126102633, "learning_rate": 0.00019292860788367773, "loss": 0.629, "step": 29130 }, { "epoch": 1.4791410186442615, "grad_norm": 0.027109301943378104, "learning_rate": 0.00019275381085724364, "loss": 0.5647, "step": 29135 }, { "epoch": 1.4793948547422928, "grad_norm": 0.026963064837793784, "learning_rate": 0.00019257907414170445, "loss": 0.5682, "step": 29140 }, { "epoch": 1.4796486908403244, "grad_norm": 0.02832456910479914, "learning_rate": 0.00019240439777135976, "loss": 0.5601, "step": 29145 }, { "epoch": 1.4799025269383559, "grad_norm": 0.02863674548614805, "learning_rate": 0.00019222978178049793, "loss": 0.596, "step": 29150 }, { "epoch": 1.4801563630363874, "grad_norm": 0.024565265014479747, "learning_rate": 0.00019205522620339494, "loss": 0.6032, "step": 29155 }, { "epoch": 1.480410199134419, "grad_norm": 0.029028706816427954, "learning_rate": 0.00019188073107431546, "loss": 0.594, "step": 29160 }, { "epoch": 1.4806640352324503, "grad_norm": 0.02571386716187704, "learning_rate": 0.00019170629642751175, "loss": 0.5717, "step": 29165 }, { "epoch": 1.4809178713304818, "grad_norm": 0.025456768545356098, "learning_rate": 0.00019153192229722478, "loss": 0.5902, "step": 29170 }, { "epoch": 1.4811717074285133, "grad_norm": 0.031455318437145705, "learning_rate": 0.00019135760871768294, "loss": 0.6224, "step": 29175 }, { "epoch": 1.4814255435265449, "grad_norm": 0.027858630747083224, "learning_rate": 0.00019118335572310347, "loss": 0.6063, "step": 29180 }, { "epoch": 1.4816793796245764, "grad_norm": 0.030673751744295995, "learning_rate": 0.00019100916334769107, "loss": 0.5643, "step": 29185 }, { "epoch": 1.481933215722608, "grad_norm": 0.029673256190060955, "learning_rate": 0.00019083503162563908, "loss": 0.5982, "step": 29190 }, { "epoch": 1.4821870518206395, "grad_norm": 0.029708160505734025, "learning_rate": 0.0001906609605911283, "loss": 0.6027, "step": 29195 }, { "epoch": 1.482440887918671, "grad_norm": 0.029536323439251135, "learning_rate": 0.00019048695027832862, "loss": 0.5743, "step": 29200 }, { "epoch": 1.4826947240167025, "grad_norm": 0.02862617835014209, "learning_rate": 0.00019031300072139685, "loss": 0.637, "step": 29205 }, { "epoch": 1.4829485601147339, "grad_norm": 0.027007611858792014, "learning_rate": 0.00019013911195447887, "loss": 0.6022, "step": 29210 }, { "epoch": 1.4832023962127654, "grad_norm": 0.029602988304713744, "learning_rate": 0.0001899652840117077, "loss": 0.6236, "step": 29215 }, { "epoch": 1.483456232310797, "grad_norm": 0.029929286241533695, "learning_rate": 0.0001897915169272053, "loss": 0.6111, "step": 29220 }, { "epoch": 1.4837100684088285, "grad_norm": 0.026976302650280535, "learning_rate": 0.000189617810735081, "loss": 0.6351, "step": 29225 }, { "epoch": 1.48396390450686, "grad_norm": 0.026897708564300567, "learning_rate": 0.0001894441654694327, "loss": 0.5486, "step": 29230 }, { "epoch": 1.4842177406048913, "grad_norm": 0.026445259567654766, "learning_rate": 0.00018927058116434588, "loss": 0.5951, "step": 29235 }, { "epoch": 1.4844715767029228, "grad_norm": 0.02526824603393684, "learning_rate": 0.00018909705785389452, "loss": 0.5847, "step": 29240 }, { "epoch": 1.4847254128009544, "grad_norm": 0.025943007075155948, "learning_rate": 0.00018892359557214, "loss": 0.5871, "step": 29245 }, { "epoch": 1.484979248898986, "grad_norm": 0.027366862818462918, "learning_rate": 0.00018875019435313255, "loss": 0.596, "step": 29250 }, { "epoch": 1.4852330849970174, "grad_norm": 0.026666153623757276, "learning_rate": 0.0001885768542309096, "loss": 0.5903, "step": 29255 }, { "epoch": 1.485486921095049, "grad_norm": 0.028794066357211042, "learning_rate": 0.0001884035752394971, "loss": 0.6079, "step": 29260 }, { "epoch": 1.4857407571930805, "grad_norm": 0.026088761969753984, "learning_rate": 0.000188230357412909, "loss": 0.5479, "step": 29265 }, { "epoch": 1.485994593291112, "grad_norm": 0.026237406140261963, "learning_rate": 0.00018805720078514677, "loss": 0.579, "step": 29270 }, { "epoch": 1.4862484293891434, "grad_norm": 0.027093226017505023, "learning_rate": 0.0001878841053902005, "loss": 0.5757, "step": 29275 }, { "epoch": 1.486502265487175, "grad_norm": 0.025756730134956135, "learning_rate": 0.00018771107126204771, "loss": 0.5891, "step": 29280 }, { "epoch": 1.4867561015852064, "grad_norm": 0.028932803444229615, "learning_rate": 0.00018753809843465442, "loss": 0.6035, "step": 29285 }, { "epoch": 1.487009937683238, "grad_norm": 0.024684850760801142, "learning_rate": 0.00018736518694197396, "loss": 0.5693, "step": 29290 }, { "epoch": 1.4872637737812695, "grad_norm": 0.02692285430247923, "learning_rate": 0.0001871923368179484, "loss": 0.5856, "step": 29295 }, { "epoch": 1.4875176098793008, "grad_norm": 0.027565929577283092, "learning_rate": 0.000187019548096507, "loss": 0.6058, "step": 29300 }, { "epoch": 1.4877714459773324, "grad_norm": 0.026498661028665077, "learning_rate": 0.00018684682081156762, "loss": 0.6193, "step": 29305 }, { "epoch": 1.4880252820753639, "grad_norm": 0.027005123414376987, "learning_rate": 0.00018667415499703545, "loss": 0.579, "step": 29310 }, { "epoch": 1.4882791181733954, "grad_norm": 0.02741448507437126, "learning_rate": 0.00018650155068680407, "loss": 0.5648, "step": 29315 }, { "epoch": 1.488532954271427, "grad_norm": 0.025995728772742158, "learning_rate": 0.00018632900791475492, "loss": 0.5873, "step": 29320 }, { "epoch": 1.4887867903694585, "grad_norm": 0.024846779458772252, "learning_rate": 0.0001861565267147574, "loss": 0.5452, "step": 29325 }, { "epoch": 1.48904062646749, "grad_norm": 0.027703368361042785, "learning_rate": 0.0001859841071206684, "loss": 0.5576, "step": 29330 }, { "epoch": 1.4892944625655216, "grad_norm": 0.028268692938249386, "learning_rate": 0.0001858117491663333, "loss": 0.6091, "step": 29335 }, { "epoch": 1.4895482986635529, "grad_norm": 0.029139166913622852, "learning_rate": 0.0001856394528855848, "loss": 0.5835, "step": 29340 }, { "epoch": 1.4898021347615844, "grad_norm": 0.02945927012063612, "learning_rate": 0.00018546721831224424, "loss": 0.5951, "step": 29345 }, { "epoch": 1.490055970859616, "grad_norm": 0.026509131404585197, "learning_rate": 0.00018529504548011995, "loss": 0.6043, "step": 29350 }, { "epoch": 1.4903098069576475, "grad_norm": 0.026326804932584737, "learning_rate": 0.00018512293442300893, "loss": 0.5867, "step": 29355 }, { "epoch": 1.490563643055679, "grad_norm": 0.02907291560045587, "learning_rate": 0.00018495088517469545, "loss": 0.5958, "step": 29360 }, { "epoch": 1.4908174791537103, "grad_norm": 0.027466841134040534, "learning_rate": 0.00018477889776895225, "loss": 0.5681, "step": 29365 }, { "epoch": 1.4910713152517419, "grad_norm": 0.02866295544264686, "learning_rate": 0.0001846069722395392, "loss": 0.5738, "step": 29370 }, { "epoch": 1.4913251513497734, "grad_norm": 0.029668025068953725, "learning_rate": 0.00018443510862020467, "loss": 0.6174, "step": 29375 }, { "epoch": 1.491578987447805, "grad_norm": 0.025133354664694425, "learning_rate": 0.0001842633069446848, "loss": 0.5646, "step": 29380 }, { "epoch": 1.4918328235458365, "grad_norm": 0.0259986011649285, "learning_rate": 0.00018409156724670295, "loss": 0.6269, "step": 29385 }, { "epoch": 1.492086659643868, "grad_norm": 0.02570531654437042, "learning_rate": 0.00018391988955997126, "loss": 0.5731, "step": 29390 }, { "epoch": 1.4923404957418995, "grad_norm": 0.02857185379854826, "learning_rate": 0.00018374827391818877, "loss": 0.5901, "step": 29395 }, { "epoch": 1.492594331839931, "grad_norm": 0.07661883193695102, "learning_rate": 0.00018357672035504313, "loss": 0.5823, "step": 29400 }, { "epoch": 1.4928481679379624, "grad_norm": 0.026269747744276054, "learning_rate": 0.00018340522890420907, "loss": 0.5496, "step": 29405 }, { "epoch": 1.493102004035994, "grad_norm": 0.029429051206677156, "learning_rate": 0.00018323379959934993, "loss": 0.6085, "step": 29410 }, { "epoch": 1.4933558401340254, "grad_norm": 0.029041821989533957, "learning_rate": 0.0001830624324741161, "loss": 0.5988, "step": 29415 }, { "epoch": 1.493609676232057, "grad_norm": 0.027941212893068203, "learning_rate": 0.00018289112756214633, "loss": 0.5817, "step": 29420 }, { "epoch": 1.4938635123300885, "grad_norm": 0.027836459737652617, "learning_rate": 0.0001827198848970666, "loss": 0.5857, "step": 29425 }, { "epoch": 1.4941173484281198, "grad_norm": 0.02500942962439454, "learning_rate": 0.00018254870451249138, "loss": 0.5771, "step": 29430 }, { "epoch": 1.4943711845261514, "grad_norm": 0.02787280300553742, "learning_rate": 0.000182377586442022, "loss": 0.6227, "step": 29435 }, { "epoch": 1.494625020624183, "grad_norm": 0.031141209519551308, "learning_rate": 0.00018220653071924876, "loss": 0.5749, "step": 29440 }, { "epoch": 1.4948788567222144, "grad_norm": 0.02563780405510332, "learning_rate": 0.0001820355373777486, "loss": 0.5448, "step": 29445 }, { "epoch": 1.495132692820246, "grad_norm": 0.029706533627528283, "learning_rate": 0.0001818646064510868, "loss": 0.6199, "step": 29450 }, { "epoch": 1.4953865289182775, "grad_norm": 0.030252004995425026, "learning_rate": 0.00018169373797281618, "loss": 0.5603, "step": 29455 }, { "epoch": 1.495640365016309, "grad_norm": 0.026486067241419362, "learning_rate": 0.0001815229319764775, "loss": 0.5737, "step": 29460 }, { "epoch": 1.4958942011143406, "grad_norm": 0.027323008900660573, "learning_rate": 0.00018135218849559887, "loss": 0.6259, "step": 29465 }, { "epoch": 1.496148037212372, "grad_norm": 0.031025029655519454, "learning_rate": 0.00018118150756369673, "loss": 0.5692, "step": 29470 }, { "epoch": 1.4964018733104034, "grad_norm": 0.027598845242202776, "learning_rate": 0.00018101088921427456, "loss": 0.5759, "step": 29475 }, { "epoch": 1.496655709408435, "grad_norm": 0.02620086879799622, "learning_rate": 0.00018084033348082418, "loss": 0.5715, "step": 29480 }, { "epoch": 1.4969095455064665, "grad_norm": 0.02877894508827133, "learning_rate": 0.00018066984039682456, "loss": 0.5617, "step": 29485 }, { "epoch": 1.497163381604498, "grad_norm": 0.027353027934983728, "learning_rate": 0.00018049940999574288, "loss": 0.598, "step": 29490 }, { "epoch": 1.4974172177025296, "grad_norm": 0.026345180925546097, "learning_rate": 0.00018032904231103354, "loss": 0.5945, "step": 29495 }, { "epoch": 1.4976710538005609, "grad_norm": 0.027619633772602917, "learning_rate": 0.00018015873737613897, "loss": 0.5714, "step": 29500 }, { "epoch": 1.4979248898985924, "grad_norm": 0.02757901110078004, "learning_rate": 0.0001799884952244894, "loss": 0.6266, "step": 29505 }, { "epoch": 1.498178725996624, "grad_norm": 0.02720642601654385, "learning_rate": 0.00017981831588950216, "loss": 0.5634, "step": 29510 }, { "epoch": 1.4984325620946555, "grad_norm": 0.030148050559281547, "learning_rate": 0.00017964819940458293, "loss": 0.5835, "step": 29515 }, { "epoch": 1.498686398192687, "grad_norm": 0.026327771336154392, "learning_rate": 0.00017947814580312438, "loss": 0.5648, "step": 29520 }, { "epoch": 1.4989402342907185, "grad_norm": 0.02820826480188378, "learning_rate": 0.00017930815511850757, "loss": 0.5869, "step": 29525 }, { "epoch": 1.49919407038875, "grad_norm": 0.026636248406962326, "learning_rate": 0.00017913822738410042, "loss": 0.6206, "step": 29530 }, { "epoch": 1.4994479064867816, "grad_norm": 0.03114073712062875, "learning_rate": 0.00017896836263325928, "loss": 0.6163, "step": 29535 }, { "epoch": 1.499701742584813, "grad_norm": 0.02676532886973011, "learning_rate": 0.0001787985608993274, "loss": 0.6097, "step": 29540 }, { "epoch": 1.4999555786828445, "grad_norm": 0.028432270542159074, "learning_rate": 0.00017862882221563635, "loss": 0.5731, "step": 29545 }, { "epoch": 1.500209414780876, "grad_norm": 0.024850515285149605, "learning_rate": 0.00017845914661550466, "loss": 0.5934, "step": 29550 }, { "epoch": 1.5004632508789075, "grad_norm": 0.027471832751048916, "learning_rate": 0.00017828953413223897, "loss": 0.5911, "step": 29555 }, { "epoch": 1.5007170869769388, "grad_norm": 0.029933744087721783, "learning_rate": 0.00017811998479913337, "loss": 0.612, "step": 29560 }, { "epoch": 1.5009709230749704, "grad_norm": 0.026274600481583733, "learning_rate": 0.0001779504986494697, "loss": 0.5824, "step": 29565 }, { "epoch": 1.501224759173002, "grad_norm": 0.02920945539788073, "learning_rate": 0.00017778107571651692, "loss": 0.6087, "step": 29570 }, { "epoch": 1.5014785952710334, "grad_norm": 0.02765498562812065, "learning_rate": 0.00017761171603353226, "loss": 0.5334, "step": 29575 }, { "epoch": 1.501732431369065, "grad_norm": 0.027242930878637696, "learning_rate": 0.00017744241963375986, "loss": 0.6098, "step": 29580 }, { "epoch": 1.5019862674670965, "grad_norm": 0.02839389800522616, "learning_rate": 0.00017727318655043196, "loss": 0.5902, "step": 29585 }, { "epoch": 1.502240103565128, "grad_norm": 0.028807476146563954, "learning_rate": 0.00017710401681676803, "loss": 0.588, "step": 29590 }, { "epoch": 1.5024939396631596, "grad_norm": 0.03012758987673389, "learning_rate": 0.00017693491046597544, "loss": 0.5884, "step": 29595 }, { "epoch": 1.5027477757611911, "grad_norm": 0.02724319917377948, "learning_rate": 0.0001767658675312486, "loss": 0.6021, "step": 29600 }, { "epoch": 1.5030016118592227, "grad_norm": 0.026277683064184848, "learning_rate": 0.00017659688804577022, "loss": 0.6099, "step": 29605 }, { "epoch": 1.503255447957254, "grad_norm": 0.03506490910282729, "learning_rate": 0.00017642797204270972, "loss": 0.594, "step": 29610 }, { "epoch": 1.5035092840552855, "grad_norm": 0.027953303491732296, "learning_rate": 0.00017625911955522467, "loss": 0.5985, "step": 29615 }, { "epoch": 1.503763120153317, "grad_norm": 0.02630222802912579, "learning_rate": 0.00017609033061646013, "loss": 0.5794, "step": 29620 }, { "epoch": 1.5040169562513483, "grad_norm": 0.09352632064608317, "learning_rate": 0.0001759216052595482, "loss": 0.5949, "step": 29625 }, { "epoch": 1.5042707923493799, "grad_norm": 0.02894532823256737, "learning_rate": 0.00017575294351760912, "loss": 0.5656, "step": 29630 }, { "epoch": 1.5045246284474114, "grad_norm": 0.027213652562536772, "learning_rate": 0.00017558434542375002, "loss": 0.5917, "step": 29635 }, { "epoch": 1.504778464545443, "grad_norm": 0.024774548353224023, "learning_rate": 0.0001754158110110663, "loss": 0.5701, "step": 29640 }, { "epoch": 1.5050323006434745, "grad_norm": 0.02902185448240762, "learning_rate": 0.00017524734031263995, "loss": 0.6128, "step": 29645 }, { "epoch": 1.505286136741506, "grad_norm": 0.027619655209522562, "learning_rate": 0.00017507893336154136, "loss": 0.5761, "step": 29650 }, { "epoch": 1.5055399728395376, "grad_norm": 0.024995880044787012, "learning_rate": 0.00017491059019082757, "loss": 0.582, "step": 29655 }, { "epoch": 1.505793808937569, "grad_norm": 0.02843153871482845, "learning_rate": 0.00017474231083354386, "loss": 0.5801, "step": 29660 }, { "epoch": 1.5060476450356006, "grad_norm": 0.025767163583045944, "learning_rate": 0.00017457409532272233, "loss": 0.5756, "step": 29665 }, { "epoch": 1.5063014811336322, "grad_norm": 0.027126679525359656, "learning_rate": 0.00017440594369138318, "loss": 0.5901, "step": 29670 }, { "epoch": 1.5065553172316635, "grad_norm": 0.028697285497854958, "learning_rate": 0.00017423785597253322, "loss": 0.5903, "step": 29675 }, { "epoch": 1.506809153329695, "grad_norm": 0.027047509151742883, "learning_rate": 0.00017406983219916784, "loss": 0.6028, "step": 29680 }, { "epoch": 1.5070629894277265, "grad_norm": 0.0268369238387586, "learning_rate": 0.00017390187240426885, "loss": 0.5718, "step": 29685 }, { "epoch": 1.5073168255257579, "grad_norm": 0.030577383526840123, "learning_rate": 0.00017373397662080625, "loss": 0.5784, "step": 29690 }, { "epoch": 1.5075706616237894, "grad_norm": 0.02858839021030639, "learning_rate": 0.0001735661448817368, "loss": 0.6023, "step": 29695 }, { "epoch": 1.507824497721821, "grad_norm": 0.028454611418279426, "learning_rate": 0.0001733983772200053, "loss": 0.5897, "step": 29700 }, { "epoch": 1.5080783338198525, "grad_norm": 0.03069004842119655, "learning_rate": 0.00017323067366854344, "loss": 0.5996, "step": 29705 }, { "epoch": 1.508332169917884, "grad_norm": 0.026772017152615173, "learning_rate": 0.00017306303426027094, "loss": 0.611, "step": 29710 }, { "epoch": 1.5085860060159155, "grad_norm": 0.030060526822309662, "learning_rate": 0.00017289545902809416, "loss": 0.5793, "step": 29715 }, { "epoch": 1.508839842113947, "grad_norm": 0.02630334809985566, "learning_rate": 0.00017272794800490772, "loss": 0.6013, "step": 29720 }, { "epoch": 1.5090936782119786, "grad_norm": 0.03033673181727264, "learning_rate": 0.00017256050122359278, "loss": 0.5796, "step": 29725 }, { "epoch": 1.5093475143100101, "grad_norm": 0.028805154469892413, "learning_rate": 0.00017239311871701868, "loss": 0.556, "step": 29730 }, { "epoch": 1.5096013504080417, "grad_norm": 0.025081760639005916, "learning_rate": 0.00017222580051804147, "loss": 0.5476, "step": 29735 }, { "epoch": 1.509855186506073, "grad_norm": 0.029015716918792733, "learning_rate": 0.000172058546659505, "loss": 0.5996, "step": 29740 }, { "epoch": 1.5101090226041045, "grad_norm": 0.027993481992512628, "learning_rate": 0.00017189135717424054, "loss": 0.5567, "step": 29745 }, { "epoch": 1.510362858702136, "grad_norm": 0.026744094278876113, "learning_rate": 0.0001717242320950662, "loss": 0.57, "step": 29750 }, { "epoch": 1.5106166948001676, "grad_norm": 0.02859463197131414, "learning_rate": 0.00017155717145478822, "loss": 0.5831, "step": 29755 }, { "epoch": 1.510870530898199, "grad_norm": 0.02702823554080416, "learning_rate": 0.00017139017528619932, "loss": 0.5729, "step": 29760 }, { "epoch": 1.5111243669962304, "grad_norm": 0.027449594807249, "learning_rate": 0.0001712232436220804, "loss": 0.599, "step": 29765 }, { "epoch": 1.511378203094262, "grad_norm": 0.026091505825644387, "learning_rate": 0.000171056376495199, "loss": 0.6085, "step": 29770 }, { "epoch": 1.5116320391922935, "grad_norm": 0.02794672337524129, "learning_rate": 0.00017088957393831066, "loss": 0.5691, "step": 29775 }, { "epoch": 1.511885875290325, "grad_norm": 0.027707028502050455, "learning_rate": 0.0001707228359841575, "loss": 0.6121, "step": 29780 }, { "epoch": 1.5121397113883566, "grad_norm": 0.02701752849002109, "learning_rate": 0.0001705561626654697, "loss": 0.5671, "step": 29785 }, { "epoch": 1.512393547486388, "grad_norm": 0.028763970162266256, "learning_rate": 0.00017038955401496404, "loss": 0.5983, "step": 29790 }, { "epoch": 1.5126473835844196, "grad_norm": 0.026452929898052008, "learning_rate": 0.00017022301006534512, "loss": 0.6022, "step": 29795 }, { "epoch": 1.5129012196824512, "grad_norm": 0.027207755323341687, "learning_rate": 0.00017005653084930483, "loss": 0.6136, "step": 29800 }, { "epoch": 1.5131550557804827, "grad_norm": 0.02630810751688967, "learning_rate": 0.00016989011639952222, "loss": 0.5798, "step": 29805 }, { "epoch": 1.513408891878514, "grad_norm": 0.02734211210911144, "learning_rate": 0.00016972376674866336, "loss": 0.5927, "step": 29810 }, { "epoch": 1.5136627279765456, "grad_norm": 0.027547634174138108, "learning_rate": 0.00016955748192938215, "loss": 0.6152, "step": 29815 }, { "epoch": 1.513916564074577, "grad_norm": 0.0282855309810747, "learning_rate": 0.00016939126197431916, "loss": 0.5995, "step": 29820 }, { "epoch": 1.5141704001726084, "grad_norm": 0.026586254489436044, "learning_rate": 0.00016922510691610288, "loss": 0.6142, "step": 29825 }, { "epoch": 1.51442423627064, "grad_norm": 0.024944059147214714, "learning_rate": 0.00016905901678734836, "loss": 0.5646, "step": 29830 }, { "epoch": 1.5146780723686715, "grad_norm": 0.02560786366280062, "learning_rate": 0.00016889299162065863, "loss": 0.5687, "step": 29835 }, { "epoch": 1.514931908466703, "grad_norm": 0.02618890024549396, "learning_rate": 0.00016872703144862322, "loss": 0.5571, "step": 29840 }, { "epoch": 1.5151857445647345, "grad_norm": 0.027372008670766387, "learning_rate": 0.0001685611363038197, "loss": 0.5845, "step": 29845 }, { "epoch": 1.515439580662766, "grad_norm": 0.025539808421977093, "learning_rate": 0.000168395306218812, "loss": 0.5422, "step": 29850 }, { "epoch": 1.5156934167607976, "grad_norm": 0.027606046971493107, "learning_rate": 0.00016822954122615202, "loss": 0.6289, "step": 29855 }, { "epoch": 1.5159472528588291, "grad_norm": 0.028282949575773035, "learning_rate": 0.0001680638413583787, "loss": 0.5824, "step": 29860 }, { "epoch": 1.5162010889568607, "grad_norm": 0.02838833872931143, "learning_rate": 0.00016789820664801785, "loss": 0.5774, "step": 29865 }, { "epoch": 1.5164549250548922, "grad_norm": 0.028062243640246832, "learning_rate": 0.00016773263712758298, "loss": 0.6051, "step": 29870 }, { "epoch": 1.5167087611529235, "grad_norm": 0.02932333388404908, "learning_rate": 0.00016756713282957425, "loss": 0.5737, "step": 29875 }, { "epoch": 1.516962597250955, "grad_norm": 0.025457101475314263, "learning_rate": 0.00016740169378647967, "loss": 0.5832, "step": 29880 }, { "epoch": 1.5172164333489866, "grad_norm": 0.02960281124458466, "learning_rate": 0.00016723632003077382, "loss": 0.6015, "step": 29885 }, { "epoch": 1.517470269447018, "grad_norm": 0.02677241411436382, "learning_rate": 0.000167071011594919, "loss": 0.5435, "step": 29890 }, { "epoch": 1.5177241055450494, "grad_norm": 0.029859926641658676, "learning_rate": 0.00016690576851136407, "loss": 0.5804, "step": 29895 }, { "epoch": 1.517977941643081, "grad_norm": 0.025660940952869898, "learning_rate": 0.00016674059081254588, "loss": 0.6073, "step": 29900 }, { "epoch": 1.5182317777411125, "grad_norm": 0.027373729253345747, "learning_rate": 0.00016657547853088755, "loss": 0.5932, "step": 29905 }, { "epoch": 1.518485613839144, "grad_norm": 0.026522837172558742, "learning_rate": 0.00016641043169880016, "loss": 0.5983, "step": 29910 }, { "epoch": 1.5187394499371756, "grad_norm": 0.024721248205362336, "learning_rate": 0.00016624545034868126, "loss": 0.5933, "step": 29915 }, { "epoch": 1.5189932860352071, "grad_norm": 0.02552060209170456, "learning_rate": 0.00016608053451291606, "loss": 0.5655, "step": 29920 }, { "epoch": 1.5192471221332386, "grad_norm": 0.02589663916546119, "learning_rate": 0.0001659156842238766, "loss": 0.5845, "step": 29925 }, { "epoch": 1.5195009582312702, "grad_norm": 0.029955979137939834, "learning_rate": 0.00016575089951392246, "loss": 0.5968, "step": 29930 }, { "epoch": 1.5197547943293017, "grad_norm": 0.029056657238405965, "learning_rate": 0.0001655861804153997, "loss": 0.6003, "step": 29935 }, { "epoch": 1.520008630427333, "grad_norm": 0.026099981033650337, "learning_rate": 0.00016542152696064216, "loss": 0.5543, "step": 29940 }, { "epoch": 1.5202624665253646, "grad_norm": 0.02648216578776069, "learning_rate": 0.00016525693918197017, "loss": 0.5759, "step": 29945 }, { "epoch": 1.520516302623396, "grad_norm": 0.03244397996169991, "learning_rate": 0.00016509241711169182, "loss": 0.5583, "step": 29950 }, { "epoch": 1.5207701387214274, "grad_norm": 0.02765357975091321, "learning_rate": 0.00016492796078210165, "loss": 0.5845, "step": 29955 }, { "epoch": 1.521023974819459, "grad_norm": 0.027856029102610508, "learning_rate": 0.00016476357022548194, "loss": 0.5826, "step": 29960 }, { "epoch": 1.5212778109174905, "grad_norm": 0.02941918532562971, "learning_rate": 0.0001645992454741016, "loss": 0.5848, "step": 29965 }, { "epoch": 1.521531647015522, "grad_norm": 0.026571737103827677, "learning_rate": 0.0001644349865602165, "loss": 0.5952, "step": 29970 }, { "epoch": 1.5217854831135535, "grad_norm": 0.028047086370649364, "learning_rate": 0.00016427079351607031, "loss": 0.6253, "step": 29975 }, { "epoch": 1.522039319211585, "grad_norm": 0.027093001154910014, "learning_rate": 0.00016410666637389272, "loss": 0.5905, "step": 29980 }, { "epoch": 1.5222931553096166, "grad_norm": 0.02949984713565583, "learning_rate": 0.00016394260516590175, "loss": 0.6065, "step": 29985 }, { "epoch": 1.5225469914076482, "grad_norm": 0.027038031946231417, "learning_rate": 0.00016377860992430128, "loss": 0.5949, "step": 29990 }, { "epoch": 1.5228008275056797, "grad_norm": 0.029072239947882106, "learning_rate": 0.00016361468068128314, "loss": 0.602, "step": 29995 }, { "epoch": 1.5230546636037112, "grad_norm": 0.026327851413183022, "learning_rate": 0.00016345081746902546, "loss": 0.5766, "step": 30000 }, { "epoch": 1.5233084997017425, "grad_norm": 0.028176235231529685, "learning_rate": 0.0001632870203196941, "loss": 0.572, "step": 30005 }, { "epoch": 1.523562335799774, "grad_norm": 0.032979071846241104, "learning_rate": 0.00016312328926544134, "loss": 0.6027, "step": 30010 }, { "epoch": 1.5238161718978056, "grad_norm": 0.03421272508805122, "learning_rate": 0.00016295962433840705, "loss": 0.5956, "step": 30015 }, { "epoch": 1.5240700079958371, "grad_norm": 0.02806776832838247, "learning_rate": 0.0001627960255707175, "loss": 0.6202, "step": 30020 }, { "epoch": 1.5243238440938685, "grad_norm": 0.026014921139459198, "learning_rate": 0.0001626324929944867, "loss": 0.5983, "step": 30025 }, { "epoch": 1.5245776801919, "grad_norm": 0.028846160944018064, "learning_rate": 0.00016246902664181483, "loss": 0.5839, "step": 30030 }, { "epoch": 1.5248315162899315, "grad_norm": 0.027533588476638927, "learning_rate": 0.00016230562654478997, "loss": 0.6285, "step": 30035 }, { "epoch": 1.525085352387963, "grad_norm": 0.025374856224499594, "learning_rate": 0.00016214229273548626, "loss": 0.5978, "step": 30040 }, { "epoch": 1.5253391884859946, "grad_norm": 0.2273219718549547, "learning_rate": 0.00016197902524596586, "loss": 0.595, "step": 30045 }, { "epoch": 1.5255930245840261, "grad_norm": 0.025757068009811857, "learning_rate": 0.0001618158241082771, "loss": 0.5697, "step": 30050 }, { "epoch": 1.5258468606820577, "grad_norm": 0.02770438940638054, "learning_rate": 0.00016165268935445544, "loss": 0.5567, "step": 30055 }, { "epoch": 1.5261006967800892, "grad_norm": 0.027437419778066116, "learning_rate": 0.00016148962101652364, "loss": 0.5933, "step": 30060 }, { "epoch": 1.5263545328781207, "grad_norm": 0.026703565637052923, "learning_rate": 0.00016132661912649093, "loss": 0.5865, "step": 30065 }, { "epoch": 1.5266083689761523, "grad_norm": 0.027758751885206816, "learning_rate": 0.0001611636837163541, "loss": 0.6028, "step": 30070 }, { "epoch": 1.5268622050741836, "grad_norm": 0.029619058503582218, "learning_rate": 0.0001610008148180962, "loss": 0.6009, "step": 30075 }, { "epoch": 1.527116041172215, "grad_norm": 0.028196404964485517, "learning_rate": 0.0001608380124636879, "loss": 0.597, "step": 30080 }, { "epoch": 1.5273698772702466, "grad_norm": 0.027176546598479817, "learning_rate": 0.00016067527668508624, "loss": 0.5637, "step": 30085 }, { "epoch": 1.527623713368278, "grad_norm": 0.026678235935136338, "learning_rate": 0.00016051260751423575, "loss": 0.5761, "step": 30090 }, { "epoch": 1.5278775494663095, "grad_norm": 0.025065825586963162, "learning_rate": 0.00016035000498306712, "loss": 0.5888, "step": 30095 }, { "epoch": 1.528131385564341, "grad_norm": 0.02648758310869149, "learning_rate": 0.00016018746912349873, "loss": 0.5882, "step": 30100 }, { "epoch": 1.5283852216623726, "grad_norm": 0.038481113988732725, "learning_rate": 0.00016002499996743553, "loss": 0.5648, "step": 30105 }, { "epoch": 1.528639057760404, "grad_norm": 0.025809258494984017, "learning_rate": 0.00015986259754676956, "loss": 0.5655, "step": 30110 }, { "epoch": 1.5288928938584356, "grad_norm": 0.02703581884885727, "learning_rate": 0.00015970026189337922, "loss": 0.5669, "step": 30115 }, { "epoch": 1.5291467299564672, "grad_norm": 0.029557306812166145, "learning_rate": 0.00015953799303913057, "loss": 0.5783, "step": 30120 }, { "epoch": 1.5294005660544987, "grad_norm": 0.026124276218757115, "learning_rate": 0.0001593757910158759, "loss": 0.5783, "step": 30125 }, { "epoch": 1.5296544021525302, "grad_norm": 0.03049109157971119, "learning_rate": 0.00015921365585545483, "loss": 0.6109, "step": 30130 }, { "epoch": 1.5299082382505618, "grad_norm": 0.026342762469623526, "learning_rate": 0.00015905158758969351, "loss": 0.6044, "step": 30135 }, { "epoch": 1.530162074348593, "grad_norm": 0.028712621708650785, "learning_rate": 0.0001588895862504054, "loss": 0.5972, "step": 30140 }, { "epoch": 1.5304159104466246, "grad_norm": 0.1342854241367405, "learning_rate": 0.00015872765186939025, "loss": 0.6081, "step": 30145 }, { "epoch": 1.5306697465446562, "grad_norm": 0.02840368716838765, "learning_rate": 0.00015856578447843523, "loss": 0.6069, "step": 30150 }, { "epoch": 1.5309235826426875, "grad_norm": 0.0282173410117438, "learning_rate": 0.0001584039841093139, "loss": 0.57, "step": 30155 }, { "epoch": 1.531177418740719, "grad_norm": 0.03493969772005974, "learning_rate": 0.00015824225079378684, "loss": 0.5867, "step": 30160 }, { "epoch": 1.5314312548387505, "grad_norm": 0.02558649683882184, "learning_rate": 0.00015808058456360185, "loss": 0.5845, "step": 30165 }, { "epoch": 1.531685090936782, "grad_norm": 0.029953045191913392, "learning_rate": 0.00015791898545049277, "loss": 0.6096, "step": 30170 }, { "epoch": 1.5319389270348136, "grad_norm": 0.026214262568782506, "learning_rate": 0.0001577574534861811, "loss": 0.5854, "step": 30175 }, { "epoch": 1.5321927631328451, "grad_norm": 0.026294029438795115, "learning_rate": 0.00015759598870237435, "loss": 0.5573, "step": 30180 }, { "epoch": 1.5324465992308767, "grad_norm": 0.02934232062368232, "learning_rate": 0.00015743459113076757, "loss": 0.6221, "step": 30185 }, { "epoch": 1.5327004353289082, "grad_norm": 0.031087354122132187, "learning_rate": 0.0001572732608030421, "loss": 0.6137, "step": 30190 }, { "epoch": 1.5329542714269397, "grad_norm": 0.030748806793875306, "learning_rate": 0.0001571119977508665, "loss": 0.6064, "step": 30195 }, { "epoch": 1.5332081075249713, "grad_norm": 0.028997763514913132, "learning_rate": 0.00015695080200589555, "loss": 0.5854, "step": 30200 }, { "epoch": 1.5334619436230026, "grad_norm": 0.03063174801479237, "learning_rate": 0.0001567896735997716, "loss": 0.6263, "step": 30205 }, { "epoch": 1.5337157797210341, "grad_norm": 0.028772170694729458, "learning_rate": 0.00015662861256412293, "loss": 0.5919, "step": 30210 }, { "epoch": 1.5339696158190657, "grad_norm": 0.028827915750754003, "learning_rate": 0.0001564676189305654, "loss": 0.5578, "step": 30215 }, { "epoch": 1.534223451917097, "grad_norm": 0.026501946218708126, "learning_rate": 0.00015630669273070075, "loss": 0.5807, "step": 30220 }, { "epoch": 1.5344772880151285, "grad_norm": 0.028718470512635038, "learning_rate": 0.00015614583399611864, "loss": 0.5912, "step": 30225 }, { "epoch": 1.53473112411316, "grad_norm": 0.026796678684239107, "learning_rate": 0.00015598504275839443, "loss": 0.5955, "step": 30230 }, { "epoch": 1.5349849602111916, "grad_norm": 0.027344823745682107, "learning_rate": 0.00015582431904909082, "loss": 0.6018, "step": 30235 }, { "epoch": 1.535238796309223, "grad_norm": 0.028010521039267575, "learning_rate": 0.00015566366289975682, "loss": 0.6102, "step": 30240 }, { "epoch": 1.5354926324072546, "grad_norm": 0.028165871714890524, "learning_rate": 0.00015550307434192878, "loss": 0.5999, "step": 30245 }, { "epoch": 1.5357464685052862, "grad_norm": 0.021984780313490267, "learning_rate": 0.00015534255340712906, "loss": 0.5721, "step": 30250 }, { "epoch": 1.5360003046033177, "grad_norm": 0.029267165802997014, "learning_rate": 0.00015518210012686746, "loss": 0.6059, "step": 30255 }, { "epoch": 1.5362541407013492, "grad_norm": 0.028505975993005417, "learning_rate": 0.00015502171453263985, "loss": 0.5669, "step": 30260 }, { "epoch": 1.5365079767993808, "grad_norm": 0.026919998426900994, "learning_rate": 0.0001548613966559294, "loss": 0.5756, "step": 30265 }, { "epoch": 1.536761812897412, "grad_norm": 0.027318834603615057, "learning_rate": 0.00015470114652820548, "loss": 0.6101, "step": 30270 }, { "epoch": 1.5370156489954436, "grad_norm": 0.028828067564065796, "learning_rate": 0.0001545409641809246, "loss": 0.5544, "step": 30275 }, { "epoch": 1.5372694850934752, "grad_norm": 0.028289381957735258, "learning_rate": 0.00015438084964552952, "loss": 0.568, "step": 30280 }, { "epoch": 1.5375233211915067, "grad_norm": 0.02965647670858718, "learning_rate": 0.0001542208029534501, "loss": 0.6097, "step": 30285 }, { "epoch": 1.537777157289538, "grad_norm": 0.02984214399464331, "learning_rate": 0.00015406082413610273, "loss": 0.6276, "step": 30290 }, { "epoch": 1.5380309933875695, "grad_norm": 0.027976367804254307, "learning_rate": 0.0001539009132248903, "loss": 0.5873, "step": 30295 }, { "epoch": 1.538284829485601, "grad_norm": 0.027761656181652227, "learning_rate": 0.0001537410702512027, "loss": 0.6081, "step": 30300 }, { "epoch": 1.5385386655836326, "grad_norm": 0.031198512237943926, "learning_rate": 0.00015358129524641612, "loss": 0.6991, "step": 30305 }, { "epoch": 1.5387925016816641, "grad_norm": 0.03114342806068528, "learning_rate": 0.00015342158824189383, "loss": 0.5825, "step": 30310 }, { "epoch": 1.5390463377796957, "grad_norm": 0.041507172177780396, "learning_rate": 0.00015326194926898524, "loss": 0.5804, "step": 30315 }, { "epoch": 1.5393001738777272, "grad_norm": 0.028320928804414357, "learning_rate": 0.00015310237835902696, "loss": 0.6133, "step": 30320 }, { "epoch": 1.5395540099757588, "grad_norm": 0.029017791613340473, "learning_rate": 0.0001529428755433417, "loss": 0.605, "step": 30325 }, { "epoch": 1.5398078460737903, "grad_norm": 0.026322498312391977, "learning_rate": 0.00015278344085323936, "loss": 0.6156, "step": 30330 }, { "epoch": 1.5400616821718216, "grad_norm": 0.027022874357011162, "learning_rate": 0.00015262407432001585, "loss": 0.5886, "step": 30335 }, { "epoch": 1.5403155182698531, "grad_norm": 0.027776632335430297, "learning_rate": 0.00015246477597495418, "loss": 0.586, "step": 30340 }, { "epoch": 1.5405693543678847, "grad_norm": 0.03081054633642835, "learning_rate": 0.00015230554584932382, "loss": 0.6568, "step": 30345 }, { "epoch": 1.5408231904659162, "grad_norm": 0.027889535450297386, "learning_rate": 0.00015214638397438108, "loss": 0.551, "step": 30350 }, { "epoch": 1.5410770265639475, "grad_norm": 0.026928837429433833, "learning_rate": 0.00015198729038136822, "loss": 0.6032, "step": 30355 }, { "epoch": 1.541330862661979, "grad_norm": 0.0292507503877274, "learning_rate": 0.00015182826510151486, "loss": 0.6104, "step": 30360 }, { "epoch": 1.5415846987600106, "grad_norm": 0.026261156879357275, "learning_rate": 0.00015166930816603658, "loss": 0.5573, "step": 30365 }, { "epoch": 1.5418385348580421, "grad_norm": 0.026433671690189037, "learning_rate": 0.00015151041960613615, "loss": 0.5627, "step": 30370 }, { "epoch": 1.5420923709560737, "grad_norm": 0.028466128862299996, "learning_rate": 0.0001513515994530023, "loss": 0.6043, "step": 30375 }, { "epoch": 1.5423462070541052, "grad_norm": 0.028555636081782732, "learning_rate": 0.00015119284773781088, "loss": 0.6159, "step": 30380 }, { "epoch": 1.5426000431521367, "grad_norm": 0.02541494825762446, "learning_rate": 0.00015103416449172385, "loss": 0.5963, "step": 30385 }, { "epoch": 1.5428538792501683, "grad_norm": 0.028731461256157997, "learning_rate": 0.0001508755497458902, "loss": 0.6033, "step": 30390 }, { "epoch": 1.5431077153481998, "grad_norm": 0.026138526639131406, "learning_rate": 0.00015071700353144486, "loss": 0.5958, "step": 30395 }, { "epoch": 1.5433615514462313, "grad_norm": 0.026556457318242712, "learning_rate": 0.00015055852587950985, "loss": 0.5758, "step": 30400 }, { "epoch": 1.5436153875442626, "grad_norm": 0.029108110158787722, "learning_rate": 0.0001504001168211937, "loss": 0.5975, "step": 30405 }, { "epoch": 1.5438692236422942, "grad_norm": 0.02591491444042051, "learning_rate": 0.00015024177638759106, "loss": 0.5674, "step": 30410 }, { "epoch": 1.5441230597403257, "grad_norm": 0.023973569274714678, "learning_rate": 0.00015008350460978358, "loss": 0.5526, "step": 30415 }, { "epoch": 1.544376895838357, "grad_norm": 0.025151121077559638, "learning_rate": 0.00014992530151883898, "loss": 0.5874, "step": 30420 }, { "epoch": 1.5446307319363886, "grad_norm": 0.026738688514215583, "learning_rate": 0.000149767167145812, "loss": 0.5723, "step": 30425 }, { "epoch": 1.54488456803442, "grad_norm": 0.027703587185149297, "learning_rate": 0.0001496091015217434, "loss": 0.585, "step": 30430 }, { "epoch": 1.5451384041324516, "grad_norm": 0.028689026492404034, "learning_rate": 0.00014945110467766087, "loss": 0.5843, "step": 30435 }, { "epoch": 1.5453922402304832, "grad_norm": 0.02788120862771295, "learning_rate": 0.0001492931766445782, "loss": 0.6103, "step": 30440 }, { "epoch": 1.5456460763285147, "grad_norm": 0.0260694022841004, "learning_rate": 0.0001491353174534961, "loss": 0.5692, "step": 30445 }, { "epoch": 1.5458999124265462, "grad_norm": 0.02752627401935081, "learning_rate": 0.0001489775271354013, "loss": 0.6006, "step": 30450 }, { "epoch": 1.5461537485245778, "grad_norm": 0.02774795713271297, "learning_rate": 0.00014881980572126752, "loss": 0.6017, "step": 30455 }, { "epoch": 1.5464075846226093, "grad_norm": 0.028898387856488586, "learning_rate": 0.00014866215324205423, "loss": 0.5944, "step": 30460 }, { "epoch": 1.5466614207206408, "grad_norm": 0.025555646533797955, "learning_rate": 0.00014850456972870845, "loss": 0.5683, "step": 30465 }, { "epoch": 1.5469152568186721, "grad_norm": 0.02750534819790408, "learning_rate": 0.00014834705521216262, "loss": 0.5982, "step": 30470 }, { "epoch": 1.5471690929167037, "grad_norm": 0.026903129759379172, "learning_rate": 0.0001481896097233363, "loss": 0.5683, "step": 30475 }, { "epoch": 1.5474229290147352, "grad_norm": 0.026136536045272104, "learning_rate": 0.00014803223329313493, "loss": 0.5878, "step": 30480 }, { "epoch": 1.5476767651127665, "grad_norm": 0.027225417738923407, "learning_rate": 0.00014787492595245107, "loss": 0.6015, "step": 30485 }, { "epoch": 1.547930601210798, "grad_norm": 0.03268137059629612, "learning_rate": 0.00014771768773216298, "loss": 0.582, "step": 30490 }, { "epoch": 1.5481844373088296, "grad_norm": 0.02757259057538164, "learning_rate": 0.00014756051866313618, "loss": 0.5822, "step": 30495 }, { "epoch": 1.5484382734068611, "grad_norm": 0.026327696546312482, "learning_rate": 0.00014740341877622181, "loss": 0.5465, "step": 30500 }, { "epoch": 1.5486921095048927, "grad_norm": 0.026469446100759225, "learning_rate": 0.0001472463881022581, "loss": 0.5478, "step": 30505 }, { "epoch": 1.5489459456029242, "grad_norm": 0.02566134025650235, "learning_rate": 0.00014708942667206903, "loss": 0.6068, "step": 30510 }, { "epoch": 1.5491997817009557, "grad_norm": 0.027328503891801845, "learning_rate": 0.0001469325345164657, "loss": 0.5824, "step": 30515 }, { "epoch": 1.5494536177989873, "grad_norm": 0.02847910896708751, "learning_rate": 0.00014677571166624498, "loss": 0.5446, "step": 30520 }, { "epoch": 1.5497074538970188, "grad_norm": 0.026452465202961167, "learning_rate": 0.0001466189581521905, "loss": 0.626, "step": 30525 }, { "epoch": 1.5499612899950503, "grad_norm": 0.027479196855812682, "learning_rate": 0.00014646227400507238, "loss": 0.5833, "step": 30530 }, { "epoch": 1.5502151260930817, "grad_norm": 0.025548348380699692, "learning_rate": 0.00014630565925564666, "loss": 0.5765, "step": 30535 }, { "epoch": 1.5504689621911132, "grad_norm": 0.02862743304130452, "learning_rate": 0.0001461491139346563, "loss": 0.5793, "step": 30540 }, { "epoch": 1.5507227982891447, "grad_norm": 0.026475625446973614, "learning_rate": 0.00014599263807283004, "loss": 0.5802, "step": 30545 }, { "epoch": 1.550976634387176, "grad_norm": 0.02994634208847806, "learning_rate": 0.00014583623170088368, "loss": 0.5853, "step": 30550 }, { "epoch": 1.5512304704852076, "grad_norm": 0.025230911698299302, "learning_rate": 0.00014567989484951866, "loss": 0.6002, "step": 30555 }, { "epoch": 1.551484306583239, "grad_norm": 0.027553172387471347, "learning_rate": 0.00014552362754942345, "loss": 0.5964, "step": 30560 }, { "epoch": 1.5517381426812706, "grad_norm": 0.028118155066370235, "learning_rate": 0.00014536742983127222, "loss": 0.5859, "step": 30565 }, { "epoch": 1.5519919787793022, "grad_norm": 0.025869684195350684, "learning_rate": 0.0001452113017257261, "loss": 0.5723, "step": 30570 }, { "epoch": 1.5522458148773337, "grad_norm": 0.03005672960089902, "learning_rate": 0.000145055243263432, "loss": 0.6254, "step": 30575 }, { "epoch": 1.5524996509753652, "grad_norm": 0.030412158796017302, "learning_rate": 0.0001448992544750235, "loss": 0.6138, "step": 30580 }, { "epoch": 1.5527534870733968, "grad_norm": 0.030019138763061, "learning_rate": 0.0001447433353911205, "loss": 0.6127, "step": 30585 }, { "epoch": 1.5530073231714283, "grad_norm": 0.03914085676382992, "learning_rate": 0.00014458748604232924, "loss": 0.5876, "step": 30590 }, { "epoch": 1.5532611592694598, "grad_norm": 0.02619426257491794, "learning_rate": 0.00014443170645924192, "loss": 0.5902, "step": 30595 }, { "epoch": 1.5535149953674912, "grad_norm": 0.02613786681937849, "learning_rate": 0.0001442759966724375, "loss": 0.5744, "step": 30600 }, { "epoch": 1.5537688314655227, "grad_norm": 0.027343920729622193, "learning_rate": 0.0001441203567124808, "loss": 0.5724, "step": 30605 }, { "epoch": 1.5540226675635542, "grad_norm": 0.02599124260216569, "learning_rate": 0.00014396478660992353, "loss": 0.5822, "step": 30610 }, { "epoch": 1.5542765036615858, "grad_norm": 0.0323137127937067, "learning_rate": 0.00014380928639530282, "loss": 0.6064, "step": 30615 }, { "epoch": 1.554530339759617, "grad_norm": 0.025409462311820015, "learning_rate": 0.00014365385609914312, "loss": 0.575, "step": 30620 }, { "epoch": 1.5547841758576486, "grad_norm": 0.026046540635868722, "learning_rate": 0.00014349849575195423, "loss": 0.5632, "step": 30625 }, { "epoch": 1.5550380119556801, "grad_norm": 0.029342284886949923, "learning_rate": 0.00014334320538423285, "loss": 0.5854, "step": 30630 }, { "epoch": 1.5552918480537117, "grad_norm": 0.024333631929104475, "learning_rate": 0.00014318798502646146, "loss": 0.5659, "step": 30635 }, { "epoch": 1.5555456841517432, "grad_norm": 0.025396383830880057, "learning_rate": 0.00014303283470910923, "loss": 0.5869, "step": 30640 }, { "epoch": 1.5557995202497747, "grad_norm": 0.026607654932253835, "learning_rate": 0.00014287775446263147, "loss": 0.6011, "step": 30645 }, { "epoch": 1.5560533563478063, "grad_norm": 0.024430892835619746, "learning_rate": 0.0001427227443174694, "loss": 0.552, "step": 30650 }, { "epoch": 1.5563071924458378, "grad_norm": 0.025557937266241643, "learning_rate": 0.00014256780430405103, "loss": 0.5651, "step": 30655 }, { "epoch": 1.5565610285438694, "grad_norm": 0.02945308914165588, "learning_rate": 0.00014241293445279, "loss": 0.6114, "step": 30660 }, { "epoch": 1.5568148646419009, "grad_norm": 0.026694550693566677, "learning_rate": 0.00014225813479408684, "loss": 0.5888, "step": 30665 }, { "epoch": 1.5570687007399322, "grad_norm": 0.02457916069203174, "learning_rate": 0.0001421034053583276, "loss": 0.5521, "step": 30670 }, { "epoch": 1.5573225368379637, "grad_norm": 0.024785729589188517, "learning_rate": 0.00014194874617588522, "loss": 0.5735, "step": 30675 }, { "epoch": 1.5575763729359953, "grad_norm": 0.026792415173550813, "learning_rate": 0.0001417941572771182, "loss": 0.5593, "step": 30680 }, { "epoch": 1.5578302090340266, "grad_norm": 0.026480651777955777, "learning_rate": 0.0001416396386923719, "loss": 0.5865, "step": 30685 }, { "epoch": 1.5580840451320581, "grad_norm": 0.02649057201103826, "learning_rate": 0.00014148519045197722, "loss": 0.6411, "step": 30690 }, { "epoch": 1.5583378812300896, "grad_norm": 0.02780169307991487, "learning_rate": 0.00014133081258625192, "loss": 0.571, "step": 30695 }, { "epoch": 1.5585917173281212, "grad_norm": 0.02759230397679329, "learning_rate": 0.00014117650512549912, "loss": 0.5932, "step": 30700 }, { "epoch": 1.5588455534261527, "grad_norm": 0.028577465238085766, "learning_rate": 0.00014102226810000919, "loss": 0.6028, "step": 30705 }, { "epoch": 1.5590993895241843, "grad_norm": 0.029651605755176738, "learning_rate": 0.0001408681015400577, "loss": 0.6021, "step": 30710 }, { "epoch": 1.5593532256222158, "grad_norm": 0.026852588285596102, "learning_rate": 0.000140714005475907, "loss": 0.5706, "step": 30715 }, { "epoch": 1.5596070617202473, "grad_norm": 0.028476565905344066, "learning_rate": 0.00014055997993780512, "loss": 0.602, "step": 30720 }, { "epoch": 1.5598608978182789, "grad_norm": 0.02717576672411838, "learning_rate": 0.0001404060249559868, "loss": 0.5868, "step": 30725 }, { "epoch": 1.5601147339163104, "grad_norm": 0.02785811980413769, "learning_rate": 0.00014025214056067237, "loss": 0.6076, "step": 30730 }, { "epoch": 1.5603685700143417, "grad_norm": 0.026836650312996757, "learning_rate": 0.00014009832678206887, "loss": 0.5902, "step": 30735 }, { "epoch": 1.5606224061123732, "grad_norm": 0.027648976714257426, "learning_rate": 0.00013994458365036879, "loss": 0.5837, "step": 30740 }, { "epoch": 1.5608762422104048, "grad_norm": 0.02996425785324507, "learning_rate": 0.0001397909111957515, "loss": 0.6258, "step": 30745 }, { "epoch": 1.561130078308436, "grad_norm": 0.030475237898957112, "learning_rate": 0.00013963730944838181, "loss": 0.5716, "step": 30750 }, { "epoch": 1.5613839144064676, "grad_norm": 0.024576841874980316, "learning_rate": 0.00013948377843841137, "loss": 0.592, "step": 30755 }, { "epoch": 1.5616377505044992, "grad_norm": 0.027656447813982403, "learning_rate": 0.00013933031819597714, "loss": 0.5835, "step": 30760 }, { "epoch": 1.5618915866025307, "grad_norm": 0.025372256567756196, "learning_rate": 0.00013917692875120276, "loss": 0.5739, "step": 30765 }, { "epoch": 1.5621454227005622, "grad_norm": 0.02920528296298664, "learning_rate": 0.00013902361013419807, "loss": 0.6224, "step": 30770 }, { "epoch": 1.5623992587985938, "grad_norm": 0.03083281416567836, "learning_rate": 0.0001388703623750583, "loss": 0.5564, "step": 30775 }, { "epoch": 1.5626530948966253, "grad_norm": 0.026443539203048375, "learning_rate": 0.00013871718550386564, "loss": 0.5766, "step": 30780 }, { "epoch": 1.5629069309946568, "grad_norm": 0.031893690058875275, "learning_rate": 0.00013856407955068755, "loss": 0.6025, "step": 30785 }, { "epoch": 1.5631607670926884, "grad_norm": 0.02814718938624298, "learning_rate": 0.0001384110445455784, "loss": 0.5626, "step": 30790 }, { "epoch": 1.56341460319072, "grad_norm": 0.032272932972061574, "learning_rate": 0.00013825808051857774, "loss": 0.588, "step": 30795 }, { "epoch": 1.5636684392887512, "grad_norm": 0.02790929645602712, "learning_rate": 0.00013810518749971207, "loss": 0.6049, "step": 30800 }, { "epoch": 1.5639222753867827, "grad_norm": 0.027856088336986554, "learning_rate": 0.00013795236551899316, "loss": 0.6235, "step": 30805 }, { "epoch": 1.5641761114848143, "grad_norm": 0.027975072154593154, "learning_rate": 0.0001377996146064195, "loss": 0.6144, "step": 30810 }, { "epoch": 1.5644299475828456, "grad_norm": 0.025111842822665667, "learning_rate": 0.00013764693479197503, "loss": 0.5643, "step": 30815 }, { "epoch": 1.5646837836808771, "grad_norm": 0.025059590721188477, "learning_rate": 0.00013749432610563045, "loss": 0.5847, "step": 30820 }, { "epoch": 1.5649376197789087, "grad_norm": 0.027302394382296737, "learning_rate": 0.00013734178857734147, "loss": 0.594, "step": 30825 }, { "epoch": 1.5651914558769402, "grad_norm": 0.03632839525231845, "learning_rate": 0.0001371893222370511, "loss": 0.6015, "step": 30830 }, { "epoch": 1.5654452919749717, "grad_norm": 0.02669713992363188, "learning_rate": 0.00013703692711468734, "loss": 0.5888, "step": 30835 }, { "epoch": 1.5656991280730033, "grad_norm": 0.025342733689215757, "learning_rate": 0.00013688460324016484, "loss": 0.556, "step": 30840 }, { "epoch": 1.5659529641710348, "grad_norm": 0.02582852063490374, "learning_rate": 0.00013673235064338375, "loss": 0.5734, "step": 30845 }, { "epoch": 1.5662068002690663, "grad_norm": 0.02854088429680099, "learning_rate": 0.00013658016935423067, "loss": 0.5516, "step": 30850 }, { "epoch": 1.5664606363670979, "grad_norm": 0.02820670845382943, "learning_rate": 0.0001364280594025779, "loss": 0.585, "step": 30855 }, { "epoch": 1.5667144724651294, "grad_norm": 0.024967458090317623, "learning_rate": 0.00013627602081828412, "loss": 0.5835, "step": 30860 }, { "epoch": 1.5669683085631607, "grad_norm": 0.02964893468194992, "learning_rate": 0.00013612405363119334, "loss": 0.5798, "step": 30865 }, { "epoch": 1.5672221446611923, "grad_norm": 0.026299406311454297, "learning_rate": 0.00013597215787113638, "loss": 0.5778, "step": 30870 }, { "epoch": 1.5674759807592238, "grad_norm": 0.026818253457773365, "learning_rate": 0.00013582033356792923, "loss": 0.5796, "step": 30875 }, { "epoch": 1.5677298168572553, "grad_norm": 0.027492647128368054, "learning_rate": 0.00013566858075137462, "loss": 0.5657, "step": 30880 }, { "epoch": 1.5679836529552866, "grad_norm": 0.02724986930478381, "learning_rate": 0.00013551689945126056, "loss": 0.5749, "step": 30885 }, { "epoch": 1.5682374890533182, "grad_norm": 0.02960779141906906, "learning_rate": 0.0001353652896973614, "loss": 0.6044, "step": 30890 }, { "epoch": 1.5684913251513497, "grad_norm": 0.03133889047334089, "learning_rate": 0.00013521375151943766, "loss": 0.5974, "step": 30895 }, { "epoch": 1.5687451612493812, "grad_norm": 0.025981252905902037, "learning_rate": 0.0001350622849472351, "loss": 0.589, "step": 30900 }, { "epoch": 1.5689989973474128, "grad_norm": 0.02716976261058515, "learning_rate": 0.00013491089001048628, "loss": 0.6004, "step": 30905 }, { "epoch": 1.5692528334454443, "grad_norm": 0.023459592367124388, "learning_rate": 0.00013475956673890887, "loss": 0.5765, "step": 30910 }, { "epoch": 1.5695066695434758, "grad_norm": 0.02884249719442461, "learning_rate": 0.0001346083151622072, "loss": 0.566, "step": 30915 }, { "epoch": 1.5697605056415074, "grad_norm": 0.02925390033322587, "learning_rate": 0.00013445713531007092, "loss": 0.5871, "step": 30920 }, { "epoch": 1.570014341739539, "grad_norm": 0.027699931024575335, "learning_rate": 0.00013430602721217617, "loss": 0.5953, "step": 30925 }, { "epoch": 1.5702681778375704, "grad_norm": 0.02823389756321497, "learning_rate": 0.0001341549908981844, "loss": 0.6031, "step": 30930 }, { "epoch": 1.5705220139356018, "grad_norm": 0.025192519228568272, "learning_rate": 0.00013400402639774362, "loss": 0.5713, "step": 30935 }, { "epoch": 1.5707758500336333, "grad_norm": 0.02936108187433379, "learning_rate": 0.00013385313374048708, "loss": 0.571, "step": 30940 }, { "epoch": 1.5710296861316648, "grad_norm": 0.027924017637400773, "learning_rate": 0.0001337023129560344, "loss": 0.5702, "step": 30945 }, { "epoch": 1.5712835222296961, "grad_norm": 0.02737191676035919, "learning_rate": 0.000133551564073991, "loss": 0.6045, "step": 30950 }, { "epoch": 1.5715373583277277, "grad_norm": 0.03186590759182722, "learning_rate": 0.0001334008871239482, "loss": 0.5983, "step": 30955 }, { "epoch": 1.5717911944257592, "grad_norm": 0.026125347537190542, "learning_rate": 0.0001332502821354829, "loss": 0.5907, "step": 30960 }, { "epoch": 1.5720450305237907, "grad_norm": 0.028535529896154672, "learning_rate": 0.00013309974913815843, "loss": 0.6077, "step": 30965 }, { "epoch": 1.5722988666218223, "grad_norm": 0.026944241050666996, "learning_rate": 0.0001329492881615233, "loss": 0.5989, "step": 30970 }, { "epoch": 1.5725527027198538, "grad_norm": 0.028409521580846104, "learning_rate": 0.00013279889923511256, "loss": 0.6085, "step": 30975 }, { "epoch": 1.5728065388178853, "grad_norm": 0.027555780818773676, "learning_rate": 0.00013264858238844652, "loss": 0.571, "step": 30980 }, { "epoch": 1.5730603749159169, "grad_norm": 0.027343106755835088, "learning_rate": 0.0001324983376510319, "loss": 0.6103, "step": 30985 }, { "epoch": 1.5733142110139484, "grad_norm": 0.029827754708259508, "learning_rate": 0.0001323481650523608, "loss": 0.5849, "step": 30990 }, { "epoch": 1.57356804711198, "grad_norm": 0.03440472140361145, "learning_rate": 0.00013219806462191154, "loss": 0.6086, "step": 30995 }, { "epoch": 1.5738218832100113, "grad_norm": 0.028201685203193362, "learning_rate": 0.00013204803638914791, "loss": 0.6183, "step": 31000 }, { "epoch": 1.5740757193080428, "grad_norm": 0.028261259716930918, "learning_rate": 0.00013189808038351953, "loss": 0.5946, "step": 31005 }, { "epoch": 1.5743295554060743, "grad_norm": 0.03169970375943244, "learning_rate": 0.00013174819663446254, "loss": 0.5657, "step": 31010 }, { "epoch": 1.5745833915041056, "grad_norm": 0.03215110528835446, "learning_rate": 0.00013159838517139795, "loss": 0.582, "step": 31015 }, { "epoch": 1.5748372276021372, "grad_norm": 0.025533109136475166, "learning_rate": 0.00013144864602373325, "loss": 0.5854, "step": 31020 }, { "epoch": 1.5750910637001687, "grad_norm": 0.026300437816548533, "learning_rate": 0.0001312989792208612, "loss": 0.5524, "step": 31025 }, { "epoch": 1.5753448997982002, "grad_norm": 0.026834823960937938, "learning_rate": 0.00013114938479216105, "loss": 0.5736, "step": 31030 }, { "epoch": 1.5755987358962318, "grad_norm": 0.0270115088483996, "learning_rate": 0.000130999862766997, "loss": 0.5849, "step": 31035 }, { "epoch": 1.5758525719942633, "grad_norm": 0.027192282250934865, "learning_rate": 0.00013085041317471984, "loss": 0.5951, "step": 31040 }, { "epoch": 1.5761064080922949, "grad_norm": 0.0288871195330666, "learning_rate": 0.00013070103604466548, "loss": 0.54, "step": 31045 }, { "epoch": 1.5763602441903264, "grad_norm": 0.026906365288114586, "learning_rate": 0.00013055173140615623, "loss": 0.6133, "step": 31050 }, { "epoch": 1.576614080288358, "grad_norm": 0.027542210601689966, "learning_rate": 0.00013040249928849952, "loss": 0.593, "step": 31055 }, { "epoch": 1.5768679163863895, "grad_norm": 0.025704794109673825, "learning_rate": 0.00013025333972098912, "loss": 0.5889, "step": 31060 }, { "epoch": 1.5771217524844208, "grad_norm": 0.027936168441358728, "learning_rate": 0.00013010425273290394, "loss": 0.5868, "step": 31065 }, { "epoch": 1.5773755885824523, "grad_norm": 0.028967202818727743, "learning_rate": 0.00012995523835350958, "loss": 0.5847, "step": 31070 }, { "epoch": 1.5776294246804838, "grad_norm": 0.029390660439600837, "learning_rate": 0.0001298062966120564, "loss": 0.609, "step": 31075 }, { "epoch": 1.5778832607785152, "grad_norm": 0.026770359695577346, "learning_rate": 0.00012965742753778115, "loss": 0.5734, "step": 31080 }, { "epoch": 1.5781370968765467, "grad_norm": 0.03002801206295241, "learning_rate": 0.00012950863115990602, "loss": 0.5701, "step": 31085 }, { "epoch": 1.5783909329745782, "grad_norm": 0.027342407175775033, "learning_rate": 0.00012935990750763876, "loss": 0.5891, "step": 31090 }, { "epoch": 1.5786447690726098, "grad_norm": 0.02808912797071826, "learning_rate": 0.00012921125661017347, "loss": 0.5538, "step": 31095 }, { "epoch": 1.5788986051706413, "grad_norm": 0.02503524655071281, "learning_rate": 0.0001290626784966892, "loss": 0.5648, "step": 31100 }, { "epoch": 1.5791524412686728, "grad_norm": 0.028936565870657145, "learning_rate": 0.00012891417319635146, "loss": 0.5876, "step": 31105 }, { "epoch": 1.5794062773667044, "grad_norm": 0.028155682174166165, "learning_rate": 0.0001287657407383107, "loss": 0.5815, "step": 31110 }, { "epoch": 1.579660113464736, "grad_norm": 0.025349478144683576, "learning_rate": 0.0001286173811517039, "loss": 0.5782, "step": 31115 }, { "epoch": 1.5799139495627674, "grad_norm": 0.026142885777338688, "learning_rate": 0.00012846909446565297, "loss": 0.5747, "step": 31120 }, { "epoch": 1.580167785660799, "grad_norm": 0.030701155853460255, "learning_rate": 0.00012832088070926595, "loss": 0.5929, "step": 31125 }, { "epoch": 1.5804216217588303, "grad_norm": 0.025519034465583363, "learning_rate": 0.00012817273991163648, "loss": 0.5805, "step": 31130 }, { "epoch": 1.5806754578568618, "grad_norm": 0.02929764321427659, "learning_rate": 0.00012802467210184398, "loss": 0.6044, "step": 31135 }, { "epoch": 1.5809292939548933, "grad_norm": 0.02558336591877359, "learning_rate": 0.00012787667730895325, "loss": 0.5946, "step": 31140 }, { "epoch": 1.5811831300529249, "grad_norm": 0.027212322192026506, "learning_rate": 0.00012772875556201507, "loss": 0.5779, "step": 31145 }, { "epoch": 1.5814369661509562, "grad_norm": 0.02901294892869856, "learning_rate": 0.0001275809068900655, "loss": 0.5847, "step": 31150 }, { "epoch": 1.5816908022489877, "grad_norm": 0.03208939400669025, "learning_rate": 0.00012743313132212685, "loss": 0.6117, "step": 31155 }, { "epoch": 1.5819446383470193, "grad_norm": 0.028481158568853138, "learning_rate": 0.00012728542888720633, "loss": 0.5986, "step": 31160 }, { "epoch": 1.5821984744450508, "grad_norm": 0.027170277043582377, "learning_rate": 0.0001271377996142976, "loss": 0.6027, "step": 31165 }, { "epoch": 1.5824523105430823, "grad_norm": 0.02647549952421632, "learning_rate": 0.00012699024353237921, "loss": 0.6012, "step": 31170 }, { "epoch": 1.5827061466411139, "grad_norm": 0.02623795937054138, "learning_rate": 0.0001268427606704159, "loss": 0.5744, "step": 31175 }, { "epoch": 1.5829599827391454, "grad_norm": 0.026625661729587337, "learning_rate": 0.00012669535105735763, "loss": 0.58, "step": 31180 }, { "epoch": 1.583213818837177, "grad_norm": 0.027116424373249806, "learning_rate": 0.0001265480147221403, "loss": 0.6057, "step": 31185 }, { "epoch": 1.5834676549352085, "grad_norm": 0.028659560096309357, "learning_rate": 0.00012640075169368536, "loss": 0.596, "step": 31190 }, { "epoch": 1.58372149103324, "grad_norm": 0.026741086327942056, "learning_rate": 0.0001262535620008996, "loss": 0.5769, "step": 31195 }, { "epoch": 1.5839753271312713, "grad_norm": 0.028156867218776027, "learning_rate": 0.00012610644567267592, "loss": 0.5702, "step": 31200 }, { "epoch": 1.5842291632293029, "grad_norm": 0.028306786668841073, "learning_rate": 0.0001259594027378922, "loss": 0.6244, "step": 31205 }, { "epoch": 1.5844829993273344, "grad_norm": 0.02789074824475848, "learning_rate": 0.00012581243322541252, "loss": 0.6243, "step": 31210 }, { "epoch": 1.5847368354253657, "grad_norm": 0.026713342237265292, "learning_rate": 0.000125665537164086, "loss": 0.6015, "step": 31215 }, { "epoch": 1.5849906715233972, "grad_norm": 0.02627507659215349, "learning_rate": 0.00012551871458274787, "loss": 0.6142, "step": 31220 }, { "epoch": 1.5852445076214288, "grad_norm": 0.02559336632393885, "learning_rate": 0.0001253719655102184, "loss": 0.5875, "step": 31225 }, { "epoch": 1.5854983437194603, "grad_norm": 0.02597708655358238, "learning_rate": 0.0001252252899753039, "loss": 0.5909, "step": 31230 }, { "epoch": 1.5857521798174918, "grad_norm": 0.027108273985417278, "learning_rate": 0.00012507868800679594, "loss": 0.5862, "step": 31235 }, { "epoch": 1.5860060159155234, "grad_norm": 0.02845448564333825, "learning_rate": 0.00012493215963347188, "loss": 0.6026, "step": 31240 }, { "epoch": 1.586259852013555, "grad_norm": 0.024967393535761672, "learning_rate": 0.00012478570488409413, "loss": 0.555, "step": 31245 }, { "epoch": 1.5865136881115864, "grad_norm": 0.026710013470827736, "learning_rate": 0.00012463932378741166, "loss": 0.5848, "step": 31250 }, { "epoch": 1.586767524209618, "grad_norm": 0.027248639529126293, "learning_rate": 0.00012449301637215782, "loss": 0.5824, "step": 31255 }, { "epoch": 1.5870213603076495, "grad_norm": 0.027291031414224027, "learning_rate": 0.0001243467826670524, "loss": 0.5673, "step": 31260 }, { "epoch": 1.5872751964056808, "grad_norm": 0.028727038048192023, "learning_rate": 0.00012420062270079995, "loss": 0.5919, "step": 31265 }, { "epoch": 1.5875290325037124, "grad_norm": 0.024790738874823297, "learning_rate": 0.00012405453650209136, "loss": 0.5455, "step": 31270 }, { "epoch": 1.587782868601744, "grad_norm": 0.029264322885802507, "learning_rate": 0.00012390852409960223, "loss": 0.5964, "step": 31275 }, { "epoch": 1.5880367046997752, "grad_norm": 0.027794730612298556, "learning_rate": 0.00012376258552199444, "loss": 0.5941, "step": 31280 }, { "epoch": 1.5882905407978067, "grad_norm": 0.02444785016154133, "learning_rate": 0.00012361672079791469, "loss": 0.5839, "step": 31285 }, { "epoch": 1.5885443768958383, "grad_norm": 0.02859918593498619, "learning_rate": 0.00012347092995599574, "loss": 0.5527, "step": 31290 }, { "epoch": 1.5887982129938698, "grad_norm": 0.028888813878945555, "learning_rate": 0.00012332521302485533, "loss": 0.585, "step": 31295 }, { "epoch": 1.5890520490919013, "grad_norm": 0.02621551877370369, "learning_rate": 0.00012317957003309726, "loss": 0.5337, "step": 31300 }, { "epoch": 1.5893058851899329, "grad_norm": 0.028156256693106205, "learning_rate": 0.00012303400100931029, "loss": 0.5676, "step": 31305 }, { "epoch": 1.5895597212879644, "grad_norm": 0.026683211584251854, "learning_rate": 0.00012288850598206902, "loss": 0.5708, "step": 31310 }, { "epoch": 1.589813557385996, "grad_norm": 0.026989986793567542, "learning_rate": 0.00012274308497993346, "loss": 0.5818, "step": 31315 }, { "epoch": 1.5900673934840275, "grad_norm": 0.028453530252711006, "learning_rate": 0.0001225977380314488, "loss": 0.5609, "step": 31320 }, { "epoch": 1.590321229582059, "grad_norm": 0.027492991211460364, "learning_rate": 0.00012245246516514626, "loss": 0.5855, "step": 31325 }, { "epoch": 1.5905750656800903, "grad_norm": 0.025329118666488404, "learning_rate": 0.00012230726640954183, "loss": 0.5591, "step": 31330 }, { "epoch": 1.5908289017781219, "grad_norm": 0.02707796497392369, "learning_rate": 0.0001221621417931375, "loss": 0.5792, "step": 31335 }, { "epoch": 1.5910827378761534, "grad_norm": 0.028166884080630263, "learning_rate": 0.00012201709134442041, "loss": 0.5776, "step": 31340 }, { "epoch": 1.5913365739741847, "grad_norm": 0.025897539970454925, "learning_rate": 0.00012187211509186341, "loss": 0.5985, "step": 31345 }, { "epoch": 1.5915904100722162, "grad_norm": 0.026321965014455485, "learning_rate": 0.00012172721306392437, "loss": 0.5855, "step": 31350 }, { "epoch": 1.5918442461702478, "grad_norm": 0.0258025295728748, "learning_rate": 0.00012158238528904707, "loss": 0.5914, "step": 31355 }, { "epoch": 1.5920980822682793, "grad_norm": 0.027710285500446963, "learning_rate": 0.00012143763179566026, "loss": 0.5739, "step": 31360 }, { "epoch": 1.5923519183663108, "grad_norm": 0.025722565837443753, "learning_rate": 0.00012129295261217843, "loss": 0.598, "step": 31365 }, { "epoch": 1.5926057544643424, "grad_norm": 0.028465455379758378, "learning_rate": 0.0001211483477670014, "loss": 0.5793, "step": 31370 }, { "epoch": 1.592859590562374, "grad_norm": 0.027131166403190254, "learning_rate": 0.0001210038172885145, "loss": 0.5564, "step": 31375 }, { "epoch": 1.5931134266604055, "grad_norm": 0.027050961034464, "learning_rate": 0.00012085936120508811, "loss": 0.5737, "step": 31380 }, { "epoch": 1.593367262758437, "grad_norm": 0.025043671266837656, "learning_rate": 0.00012071497954507843, "loss": 0.5831, "step": 31385 }, { "epoch": 1.5936210988564685, "grad_norm": 0.02473243663406968, "learning_rate": 0.00012057067233682667, "loss": 0.5933, "step": 31390 }, { "epoch": 1.5938749349544998, "grad_norm": 0.030797975528292415, "learning_rate": 0.00012042643960865985, "loss": 0.5901, "step": 31395 }, { "epoch": 1.5941287710525314, "grad_norm": 0.02532239891727524, "learning_rate": 0.00012028228138888986, "loss": 0.6001, "step": 31400 }, { "epoch": 1.594382607150563, "grad_norm": 0.02817211255967934, "learning_rate": 0.00012013819770581458, "loss": 0.5623, "step": 31405 }, { "epoch": 1.5946364432485944, "grad_norm": 0.029937862401135103, "learning_rate": 0.00011999418858771649, "loss": 0.5854, "step": 31410 }, { "epoch": 1.5948902793466257, "grad_norm": 0.026242512677680106, "learning_rate": 0.00011985025406286432, "loss": 0.5574, "step": 31415 }, { "epoch": 1.5951441154446573, "grad_norm": 0.027348038694204352, "learning_rate": 0.00011970639415951129, "loss": 0.6045, "step": 31420 }, { "epoch": 1.5953979515426888, "grad_norm": 0.028137205081104615, "learning_rate": 0.00011956260890589655, "loss": 0.5699, "step": 31425 }, { "epoch": 1.5956517876407204, "grad_norm": 0.029523321079779535, "learning_rate": 0.00011941889833024461, "loss": 0.5995, "step": 31430 }, { "epoch": 1.5959056237387519, "grad_norm": 0.027324328182319075, "learning_rate": 0.0001192752624607648, "loss": 0.5788, "step": 31435 }, { "epoch": 1.5961594598367834, "grad_norm": 0.026008857165242098, "learning_rate": 0.00011913170132565248, "loss": 0.5401, "step": 31440 }, { "epoch": 1.596413295934815, "grad_norm": 0.02875247294269179, "learning_rate": 0.00011898821495308764, "loss": 0.6198, "step": 31445 }, { "epoch": 1.5966671320328465, "grad_norm": 0.02830799925208491, "learning_rate": 0.00011884480337123621, "loss": 0.5911, "step": 31450 }, { "epoch": 1.596920968130878, "grad_norm": 0.027483255989761743, "learning_rate": 0.00011870146660824899, "loss": 0.582, "step": 31455 }, { "epoch": 1.5971748042289093, "grad_norm": 0.02895103756479696, "learning_rate": 0.00011855820469226242, "loss": 0.5758, "step": 31460 }, { "epoch": 1.5974286403269409, "grad_norm": 0.028931104070229224, "learning_rate": 0.00011841501765139795, "loss": 0.5887, "step": 31465 }, { "epoch": 1.5976824764249724, "grad_norm": 0.027175313744590764, "learning_rate": 0.00011827190551376265, "loss": 0.5665, "step": 31470 }, { "epoch": 1.597936312523004, "grad_norm": 0.02774278449122281, "learning_rate": 0.00011812886830744846, "loss": 0.5153, "step": 31475 }, { "epoch": 1.5981901486210353, "grad_norm": 0.027765681407720448, "learning_rate": 0.00011798590606053322, "loss": 0.6067, "step": 31480 }, { "epoch": 1.5984439847190668, "grad_norm": 0.02603483024814125, "learning_rate": 0.00011784301880107917, "loss": 0.5815, "step": 31485 }, { "epoch": 1.5986978208170983, "grad_norm": 0.025959305181879347, "learning_rate": 0.00011770020655713509, "loss": 0.5832, "step": 31490 }, { "epoch": 1.5989516569151299, "grad_norm": 0.029058793201598848, "learning_rate": 0.00011755746935673372, "loss": 0.5859, "step": 31495 }, { "epoch": 1.5992054930131614, "grad_norm": 0.027083637736088906, "learning_rate": 0.00011741480722789405, "loss": 0.5646, "step": 31500 }, { "epoch": 1.599459329111193, "grad_norm": 0.027409892319861996, "learning_rate": 0.00011727222019861966, "loss": 0.5641, "step": 31505 }, { "epoch": 1.5997131652092245, "grad_norm": 0.02814456288811649, "learning_rate": 0.0001171297082968999, "loss": 0.5784, "step": 31510 }, { "epoch": 1.599967001307256, "grad_norm": 0.027602743523613076, "learning_rate": 0.00011698727155070888, "loss": 0.6103, "step": 31515 }, { "epoch": 1.6002208374052875, "grad_norm": 0.026918471739323924, "learning_rate": 0.0001168449099880065, "loss": 0.5818, "step": 31520 }, { "epoch": 1.600474673503319, "grad_norm": 0.0275838695810506, "learning_rate": 0.0001167026236367374, "loss": 0.5888, "step": 31525 }, { "epoch": 1.6007285096013504, "grad_norm": 0.026551651154293525, "learning_rate": 0.00011656041252483185, "loss": 0.5811, "step": 31530 }, { "epoch": 1.600982345699382, "grad_norm": 0.024333592027907955, "learning_rate": 0.00011641827668020504, "loss": 0.5676, "step": 31535 }, { "epoch": 1.6012361817974134, "grad_norm": 0.028544772250496533, "learning_rate": 0.00011627621613075772, "loss": 0.6067, "step": 31540 }, { "epoch": 1.6014900178954448, "grad_norm": 0.029801140328252262, "learning_rate": 0.00011613423090437536, "loss": 0.6055, "step": 31545 }, { "epoch": 1.6017438539934763, "grad_norm": 0.025753832140259413, "learning_rate": 0.0001159923210289292, "loss": 0.5479, "step": 31550 }, { "epoch": 1.6019976900915078, "grad_norm": 0.02562297662953987, "learning_rate": 0.00011585048653227548, "loss": 0.5711, "step": 31555 }, { "epoch": 1.6022515261895394, "grad_norm": 0.024907838216369507, "learning_rate": 0.00011570872744225541, "loss": 0.5776, "step": 31560 }, { "epoch": 1.602505362287571, "grad_norm": 0.027608295159646568, "learning_rate": 0.0001155670437866958, "loss": 0.6107, "step": 31565 }, { "epoch": 1.6027591983856024, "grad_norm": 0.026332894870514142, "learning_rate": 0.00011542543559340817, "loss": 0.6039, "step": 31570 }, { "epoch": 1.603013034483634, "grad_norm": 0.026804977044451606, "learning_rate": 0.0001152839028901898, "loss": 0.5866, "step": 31575 }, { "epoch": 1.6032668705816655, "grad_norm": 0.024871928767462467, "learning_rate": 0.00011514244570482263, "loss": 0.557, "step": 31580 }, { "epoch": 1.603520706679697, "grad_norm": 0.029531415878737352, "learning_rate": 0.00011500106406507416, "loss": 0.5622, "step": 31585 }, { "epoch": 1.6037745427777286, "grad_norm": 0.029714704559421233, "learning_rate": 0.00011485975799869675, "loss": 0.6068, "step": 31590 }, { "epoch": 1.6040283788757599, "grad_norm": 0.028349054062983936, "learning_rate": 0.00011471852753342826, "loss": 0.5812, "step": 31595 }, { "epoch": 1.6042822149737914, "grad_norm": 0.02522483663961381, "learning_rate": 0.00011457737269699125, "loss": 0.5863, "step": 31600 }, { "epoch": 1.604536051071823, "grad_norm": 0.02811638571953454, "learning_rate": 0.00011443629351709394, "loss": 0.6154, "step": 31605 }, { "epoch": 1.6047898871698543, "grad_norm": 0.024386658977417344, "learning_rate": 0.00011429529002142941, "loss": 0.5717, "step": 31610 }, { "epoch": 1.6050437232678858, "grad_norm": 0.027473240295320674, "learning_rate": 0.00011415436223767606, "loss": 0.567, "step": 31615 }, { "epoch": 1.6052975593659173, "grad_norm": 0.02891507436179309, "learning_rate": 0.00011401351019349704, "loss": 0.5707, "step": 31620 }, { "epoch": 1.6055513954639489, "grad_norm": 0.028609394742672753, "learning_rate": 0.00011387273391654118, "loss": 0.5995, "step": 31625 }, { "epoch": 1.6058052315619804, "grad_norm": 0.026133280674939446, "learning_rate": 0.00011373203343444194, "loss": 0.6016, "step": 31630 }, { "epoch": 1.606059067660012, "grad_norm": 0.026540825064387975, "learning_rate": 0.00011359140877481833, "loss": 0.5877, "step": 31635 }, { "epoch": 1.6063129037580435, "grad_norm": 0.02691127621894756, "learning_rate": 0.00011345085996527405, "loss": 0.5975, "step": 31640 }, { "epoch": 1.606566739856075, "grad_norm": 0.030074359115097152, "learning_rate": 0.00011331038703339836, "loss": 0.573, "step": 31645 }, { "epoch": 1.6068205759541065, "grad_norm": 0.02772561481980876, "learning_rate": 0.00011316999000676514, "loss": 0.5822, "step": 31650 }, { "epoch": 1.607074412052138, "grad_norm": 0.02818386844686637, "learning_rate": 0.00011302966891293392, "loss": 0.6037, "step": 31655 }, { "epoch": 1.6073282481501694, "grad_norm": 0.029388249646123563, "learning_rate": 0.00011288942377944872, "loss": 0.6254, "step": 31660 }, { "epoch": 1.607582084248201, "grad_norm": 0.027955566450782607, "learning_rate": 0.00011274925463383912, "loss": 0.5748, "step": 31665 }, { "epoch": 1.6078359203462325, "grad_norm": 0.02600094576122725, "learning_rate": 0.00011260916150361977, "loss": 0.5708, "step": 31670 }, { "epoch": 1.6080897564442638, "grad_norm": 0.026143616252860897, "learning_rate": 0.00011246914441628992, "loss": 0.6018, "step": 31675 }, { "epoch": 1.6083435925422953, "grad_norm": 0.026608924816329666, "learning_rate": 0.00011232920339933461, "loss": 0.587, "step": 31680 }, { "epoch": 1.6085974286403268, "grad_norm": 0.027515097595981017, "learning_rate": 0.00011218933848022317, "loss": 0.622, "step": 31685 }, { "epoch": 1.6088512647383584, "grad_norm": 0.02630260805179827, "learning_rate": 0.00011204954968641074, "loss": 0.5865, "step": 31690 }, { "epoch": 1.60910510083639, "grad_norm": 0.02541444583644928, "learning_rate": 0.00011190983704533685, "loss": 0.5768, "step": 31695 }, { "epoch": 1.6093589369344214, "grad_norm": 0.025920742897588632, "learning_rate": 0.00011177020058442672, "loss": 0.587, "step": 31700 }, { "epoch": 1.609612773032453, "grad_norm": 0.027219197825386596, "learning_rate": 0.00011163064033108994, "loss": 0.6026, "step": 31705 }, { "epoch": 1.6098666091304845, "grad_norm": 0.02907140998930775, "learning_rate": 0.00011149115631272183, "loss": 0.6017, "step": 31710 }, { "epoch": 1.610120445228516, "grad_norm": 0.028562075413703262, "learning_rate": 0.00011135174855670205, "loss": 0.5814, "step": 31715 }, { "epoch": 1.6103742813265476, "grad_norm": 0.02783671918925754, "learning_rate": 0.00011121241709039604, "loss": 0.5776, "step": 31720 }, { "epoch": 1.610628117424579, "grad_norm": 0.02777766136754194, "learning_rate": 0.00011107316194115352, "loss": 0.5947, "step": 31725 }, { "epoch": 1.6108819535226104, "grad_norm": 0.0313239078051435, "learning_rate": 0.00011093398313630975, "loss": 0.6206, "step": 31730 }, { "epoch": 1.611135789620642, "grad_norm": 0.025851715679744994, "learning_rate": 0.00011079488070318477, "loss": 0.5675, "step": 31735 }, { "epoch": 1.6113896257186735, "grad_norm": 0.025691337606166605, "learning_rate": 0.00011065585466908395, "loss": 0.5955, "step": 31740 }, { "epoch": 1.6116434618167048, "grad_norm": 0.026590102291701865, "learning_rate": 0.00011051690506129702, "loss": 0.5637, "step": 31745 }, { "epoch": 1.6118972979147363, "grad_norm": 0.025579962123555126, "learning_rate": 0.00011037803190709945, "loss": 0.5744, "step": 31750 }, { "epoch": 1.6121511340127679, "grad_norm": 0.028550509476253188, "learning_rate": 0.00011023923523375102, "loss": 0.5799, "step": 31755 }, { "epoch": 1.6124049701107994, "grad_norm": 0.027686236560318336, "learning_rate": 0.00011010051506849711, "loss": 0.5834, "step": 31760 }, { "epoch": 1.612658806208831, "grad_norm": 0.025846754377361496, "learning_rate": 0.0001099618714385675, "loss": 0.5902, "step": 31765 }, { "epoch": 1.6129126423068625, "grad_norm": 0.02812876741032123, "learning_rate": 0.0001098233043711776, "loss": 0.591, "step": 31770 }, { "epoch": 1.613166478404894, "grad_norm": 0.030304168828143567, "learning_rate": 0.00010968481389352708, "loss": 0.5369, "step": 31775 }, { "epoch": 1.6134203145029256, "grad_norm": 0.02682274262940163, "learning_rate": 0.00010954640003280125, "loss": 0.5481, "step": 31780 }, { "epoch": 1.613674150600957, "grad_norm": 0.027506514604660588, "learning_rate": 0.00010940806281616977, "loss": 0.5517, "step": 31785 }, { "epoch": 1.6139279866989886, "grad_norm": 0.03097664387785532, "learning_rate": 0.00010926980227078765, "loss": 0.6155, "step": 31790 }, { "epoch": 1.61418182279702, "grad_norm": 0.028579912620614396, "learning_rate": 0.00010913161842379493, "loss": 0.5589, "step": 31795 }, { "epoch": 1.6144356588950515, "grad_norm": 0.025954203383934206, "learning_rate": 0.00010899351130231611, "loss": 0.5469, "step": 31800 }, { "epoch": 1.614689494993083, "grad_norm": 0.02812995558771654, "learning_rate": 0.00010885548093346126, "loss": 0.5835, "step": 31805 }, { "epoch": 1.6149433310911143, "grad_norm": 0.028198823135730845, "learning_rate": 0.00010871752734432466, "loss": 0.5645, "step": 31810 }, { "epoch": 1.6151971671891459, "grad_norm": 0.025968408431787837, "learning_rate": 0.00010857965056198633, "loss": 0.561, "step": 31815 }, { "epoch": 1.6154510032871774, "grad_norm": 0.02736005959307358, "learning_rate": 0.00010844185061351036, "loss": 0.6053, "step": 31820 }, { "epoch": 1.615704839385209, "grad_norm": 0.028230185911027696, "learning_rate": 0.00010830412752594659, "loss": 0.5713, "step": 31825 }, { "epoch": 1.6159586754832405, "grad_norm": 0.02551059897363366, "learning_rate": 0.00010816648132632912, "loss": 0.5597, "step": 31830 }, { "epoch": 1.616212511581272, "grad_norm": 0.027137006833275506, "learning_rate": 0.00010802891204167736, "loss": 0.563, "step": 31835 }, { "epoch": 1.6164663476793035, "grad_norm": 0.02549447207064044, "learning_rate": 0.0001078914196989953, "loss": 0.5523, "step": 31840 }, { "epoch": 1.616720183777335, "grad_norm": 0.028100574540608173, "learning_rate": 0.00010775400432527228, "loss": 0.5968, "step": 31845 }, { "epoch": 1.6169740198753666, "grad_norm": 0.030141549425697675, "learning_rate": 0.00010761666594748176, "loss": 0.5883, "step": 31850 }, { "epoch": 1.6172278559733981, "grad_norm": 0.03050700090737261, "learning_rate": 0.00010747940459258321, "loss": 0.5463, "step": 31855 }, { "epoch": 1.6174816920714294, "grad_norm": 0.028591343153205214, "learning_rate": 0.00010734222028751989, "loss": 0.6151, "step": 31860 }, { "epoch": 1.617735528169461, "grad_norm": 0.0259283868826866, "learning_rate": 0.00010720511305922065, "loss": 0.5296, "step": 31865 }, { "epoch": 1.6179893642674925, "grad_norm": 0.03063947456299327, "learning_rate": 0.00010706808293459875, "loss": 0.611, "step": 31870 }, { "epoch": 1.6182432003655238, "grad_norm": 0.02829719644582972, "learning_rate": 0.00010693112994055277, "loss": 0.5818, "step": 31875 }, { "epoch": 1.6184970364635554, "grad_norm": 0.03859414125402242, "learning_rate": 0.00010679425410396559, "loss": 0.5848, "step": 31880 }, { "epoch": 1.618750872561587, "grad_norm": 0.027698268518338856, "learning_rate": 0.00010665745545170557, "loss": 0.557, "step": 31885 }, { "epoch": 1.6190047086596184, "grad_norm": 0.026436322122518972, "learning_rate": 0.00010652073401062529, "loss": 0.542, "step": 31890 }, { "epoch": 1.61925854475765, "grad_norm": 0.025037041669130736, "learning_rate": 0.00010638408980756281, "loss": 0.5731, "step": 31895 }, { "epoch": 1.6195123808556815, "grad_norm": 0.027495219452348684, "learning_rate": 0.00010624752286934037, "loss": 0.5666, "step": 31900 }, { "epoch": 1.619766216953713, "grad_norm": 0.029523819364174628, "learning_rate": 0.00010611103322276571, "loss": 0.5889, "step": 31905 }, { "epoch": 1.6200200530517446, "grad_norm": 0.02587620168479905, "learning_rate": 0.00010597462089463078, "loss": 0.5883, "step": 31910 }, { "epoch": 1.620273889149776, "grad_norm": 0.026794967209740454, "learning_rate": 0.00010583828591171273, "loss": 0.555, "step": 31915 }, { "epoch": 1.6205277252478076, "grad_norm": 0.023816875825645994, "learning_rate": 0.00010570202830077363, "loss": 0.5472, "step": 31920 }, { "epoch": 1.620781561345839, "grad_norm": 0.027784918403883217, "learning_rate": 0.0001055658480885599, "loss": 0.5864, "step": 31925 }, { "epoch": 1.6210353974438705, "grad_norm": 0.02722290698931548, "learning_rate": 0.00010542974530180327, "loss": 0.5997, "step": 31930 }, { "epoch": 1.621289233541902, "grad_norm": 0.026552535310745678, "learning_rate": 0.00010529371996721976, "loss": 0.5716, "step": 31935 }, { "epoch": 1.6215430696399333, "grad_norm": 0.027237895127156103, "learning_rate": 0.00010515777211151079, "loss": 0.5595, "step": 31940 }, { "epoch": 1.6217969057379649, "grad_norm": 0.027793390072539205, "learning_rate": 0.00010502190176136195, "loss": 0.5634, "step": 31945 }, { "epoch": 1.6220507418359964, "grad_norm": 0.026800627967342467, "learning_rate": 0.00010488610894344414, "loss": 0.5752, "step": 31950 }, { "epoch": 1.622304577934028, "grad_norm": 0.024925389476273516, "learning_rate": 0.00010475039368441258, "loss": 0.5534, "step": 31955 }, { "epoch": 1.6225584140320595, "grad_norm": 0.025860410198421502, "learning_rate": 0.0001046147560109078, "loss": 0.5571, "step": 31960 }, { "epoch": 1.622812250130091, "grad_norm": 0.02765517115682944, "learning_rate": 0.00010447919594955452, "loss": 0.6053, "step": 31965 }, { "epoch": 1.6230660862281225, "grad_norm": 0.02657553026002282, "learning_rate": 0.00010434371352696259, "loss": 0.5887, "step": 31970 }, { "epoch": 1.623319922326154, "grad_norm": 0.030150044204785894, "learning_rate": 0.00010420830876972653, "loss": 0.5545, "step": 31975 }, { "epoch": 1.6235737584241856, "grad_norm": 0.027355055309008496, "learning_rate": 0.0001040729817044258, "loss": 0.5985, "step": 31980 }, { "epoch": 1.6238275945222171, "grad_norm": 0.026136419894199975, "learning_rate": 0.00010393773235762416, "loss": 0.582, "step": 31985 }, { "epoch": 1.6240814306202485, "grad_norm": 0.026625193331209128, "learning_rate": 0.00010380256075587063, "loss": 0.6158, "step": 31990 }, { "epoch": 1.62433526671828, "grad_norm": 0.028445545614662185, "learning_rate": 0.00010366746692569845, "loss": 0.5378, "step": 31995 }, { "epoch": 1.6245891028163115, "grad_norm": 0.02917262335925962, "learning_rate": 0.00010353245089362612, "loss": 0.582, "step": 32000 }, { "epoch": 1.624842938914343, "grad_norm": 0.02985567758638779, "learning_rate": 0.00010339751268615639, "loss": 0.5585, "step": 32005 }, { "epoch": 1.6250967750123744, "grad_norm": 0.026777941042209968, "learning_rate": 0.00010326265232977717, "loss": 0.566, "step": 32010 }, { "epoch": 1.625350611110406, "grad_norm": 0.0277644004373224, "learning_rate": 0.00010312786985096067, "loss": 0.5802, "step": 32015 }, { "epoch": 1.6256044472084374, "grad_norm": 0.024495125117029552, "learning_rate": 0.00010299316527616426, "loss": 0.5919, "step": 32020 }, { "epoch": 1.625858283306469, "grad_norm": 0.027412839980146222, "learning_rate": 0.00010285853863182948, "loss": 0.5603, "step": 32025 }, { "epoch": 1.6261121194045005, "grad_norm": 0.030045716033172288, "learning_rate": 0.00010272398994438303, "loss": 0.5724, "step": 32030 }, { "epoch": 1.626365955502532, "grad_norm": 0.025706061455061227, "learning_rate": 0.00010258951924023625, "loss": 0.5603, "step": 32035 }, { "epoch": 1.6266197916005636, "grad_norm": 0.025520320772610766, "learning_rate": 0.00010245512654578487, "loss": 0.5861, "step": 32040 }, { "epoch": 1.6268736276985951, "grad_norm": 0.0257469226382622, "learning_rate": 0.00010232081188740971, "loss": 0.5738, "step": 32045 }, { "epoch": 1.6271274637966266, "grad_norm": 0.02658524150808095, "learning_rate": 0.0001021865752914758, "loss": 0.5981, "step": 32050 }, { "epoch": 1.6273812998946582, "grad_norm": 0.02817336274561513, "learning_rate": 0.00010205241678433341, "loss": 0.5832, "step": 32055 }, { "epoch": 1.6276351359926895, "grad_norm": 0.027001776615394272, "learning_rate": 0.00010191833639231695, "loss": 0.6029, "step": 32060 }, { "epoch": 1.627888972090721, "grad_norm": 0.026244747920405195, "learning_rate": 0.00010178433414174593, "loss": 0.6155, "step": 32065 }, { "epoch": 1.6281428081887526, "grad_norm": 0.026711834681378638, "learning_rate": 0.00010165041005892412, "loss": 0.5802, "step": 32070 }, { "epoch": 1.6283966442867839, "grad_norm": 0.03149444904596662, "learning_rate": 0.00010151656417014033, "loss": 0.5989, "step": 32075 }, { "epoch": 1.6286504803848154, "grad_norm": 0.02767910447922115, "learning_rate": 0.00010138279650166765, "loss": 0.6343, "step": 32080 }, { "epoch": 1.628904316482847, "grad_norm": 0.028575609422110116, "learning_rate": 0.00010124910707976426, "loss": 0.5636, "step": 32085 }, { "epoch": 1.6291581525808785, "grad_norm": 0.032953233264813905, "learning_rate": 0.00010111549593067226, "loss": 0.5906, "step": 32090 }, { "epoch": 1.62941198867891, "grad_norm": 0.02756076413444612, "learning_rate": 0.00010098196308061953, "loss": 0.5571, "step": 32095 }, { "epoch": 1.6296658247769416, "grad_norm": 0.02625177923292692, "learning_rate": 0.00010084850855581734, "loss": 0.5699, "step": 32100 }, { "epoch": 1.629919660874973, "grad_norm": 0.0262392775917148, "learning_rate": 0.00010071513238246255, "loss": 0.5841, "step": 32105 }, { "epoch": 1.6301734969730046, "grad_norm": 0.025293773722792567, "learning_rate": 0.00010058183458673587, "loss": 0.5687, "step": 32110 }, { "epoch": 1.6304273330710362, "grad_norm": 0.027756226999076507, "learning_rate": 0.0001004486151948033, "loss": 0.6015, "step": 32115 }, { "epoch": 1.6306811691690677, "grad_norm": 0.027544739910638532, "learning_rate": 0.00010031547423281501, "loss": 0.6112, "step": 32120 }, { "epoch": 1.630935005267099, "grad_norm": 0.028407046394110187, "learning_rate": 0.00010018241172690578, "loss": 0.5858, "step": 32125 }, { "epoch": 1.6311888413651305, "grad_norm": 0.02871614819422843, "learning_rate": 0.00010004942770319536, "loss": 0.5758, "step": 32130 }, { "epoch": 1.631442677463162, "grad_norm": 0.027192325940820706, "learning_rate": 9.991652218778762e-05, "loss": 0.5752, "step": 32135 }, { "epoch": 1.6316965135611934, "grad_norm": 0.027647961531478527, "learning_rate": 9.97836952067715e-05, "loss": 0.5939, "step": 32140 }, { "epoch": 1.631950349659225, "grad_norm": 0.027108570161093973, "learning_rate": 9.965094678621994e-05, "loss": 0.6134, "step": 32145 }, { "epoch": 1.6322041857572565, "grad_norm": 0.028495060986277274, "learning_rate": 9.951827695219107e-05, "loss": 0.5504, "step": 32150 }, { "epoch": 1.632458021855288, "grad_norm": 0.02835576532876524, "learning_rate": 9.938568573072715e-05, "loss": 0.5766, "step": 32155 }, { "epoch": 1.6327118579533195, "grad_norm": 0.02836593901974505, "learning_rate": 9.925317314785548e-05, "loss": 0.5583, "step": 32160 }, { "epoch": 1.632965694051351, "grad_norm": 0.02705278365501261, "learning_rate": 9.91207392295872e-05, "loss": 0.5978, "step": 32165 }, { "epoch": 1.6332195301493826, "grad_norm": 0.026447251363258237, "learning_rate": 9.898838400191879e-05, "loss": 0.5931, "step": 32170 }, { "epoch": 1.6334733662474141, "grad_norm": 0.026839947935684616, "learning_rate": 9.885610749083063e-05, "loss": 0.5953, "step": 32175 }, { "epoch": 1.6337272023454457, "grad_norm": 0.02697581623844076, "learning_rate": 9.872390972228823e-05, "loss": 0.5882, "step": 32180 }, { "epoch": 1.6339810384434772, "grad_norm": 0.028135464980545696, "learning_rate": 9.8591790722241e-05, "loss": 0.6315, "step": 32185 }, { "epoch": 1.6342348745415085, "grad_norm": 0.02450923208603619, "learning_rate": 9.84597505166236e-05, "loss": 0.5814, "step": 32190 }, { "epoch": 1.63448871063954, "grad_norm": 0.02663600712558919, "learning_rate": 9.832778913135454e-05, "loss": 0.5563, "step": 32195 }, { "epoch": 1.6347425467375716, "grad_norm": 0.029133202576115395, "learning_rate": 9.819590659233746e-05, "loss": 0.5904, "step": 32200 }, { "epoch": 1.634996382835603, "grad_norm": 0.0286757229031933, "learning_rate": 9.806410292546003e-05, "loss": 0.5762, "step": 32205 }, { "epoch": 1.6352502189336344, "grad_norm": 0.028997659984258715, "learning_rate": 9.793237815659473e-05, "loss": 0.6142, "step": 32210 }, { "epoch": 1.635504055031666, "grad_norm": 0.02968993580496376, "learning_rate": 9.780073231159864e-05, "loss": 0.6037, "step": 32215 }, { "epoch": 1.6357578911296975, "grad_norm": 0.02523243396627756, "learning_rate": 9.766916541631288e-05, "loss": 0.5766, "step": 32220 }, { "epoch": 1.636011727227729, "grad_norm": 0.02833321972911261, "learning_rate": 9.753767749656361e-05, "loss": 0.5306, "step": 32225 }, { "epoch": 1.6362655633257606, "grad_norm": 0.027465613654852273, "learning_rate": 9.740626857816109e-05, "loss": 0.5842, "step": 32230 }, { "epoch": 1.636519399423792, "grad_norm": 0.02672208709665233, "learning_rate": 9.727493868690046e-05, "loss": 0.5735, "step": 32235 }, { "epoch": 1.6367732355218236, "grad_norm": 0.028411062108325877, "learning_rate": 9.714368784856081e-05, "loss": 0.5998, "step": 32240 }, { "epoch": 1.6370270716198552, "grad_norm": 0.026132450874120864, "learning_rate": 9.701251608890638e-05, "loss": 0.6035, "step": 32245 }, { "epoch": 1.6372809077178867, "grad_norm": 0.026945923265285075, "learning_rate": 9.688142343368517e-05, "loss": 0.6008, "step": 32250 }, { "epoch": 1.637534743815918, "grad_norm": 0.025865093634416, "learning_rate": 9.675040990863032e-05, "loss": 0.5721, "step": 32255 }, { "epoch": 1.6377885799139495, "grad_norm": 0.028989168756325543, "learning_rate": 9.661947553945893e-05, "loss": 0.5969, "step": 32260 }, { "epoch": 1.638042416011981, "grad_norm": 0.02791678356603265, "learning_rate": 9.648862035187289e-05, "loss": 0.5871, "step": 32265 }, { "epoch": 1.6382962521100126, "grad_norm": 0.02489752137718904, "learning_rate": 9.635784437155815e-05, "loss": 0.6025, "step": 32270 }, { "epoch": 1.638550088208044, "grad_norm": 0.026101671021901772, "learning_rate": 9.622714762418588e-05, "loss": 0.5659, "step": 32275 }, { "epoch": 1.6388039243060755, "grad_norm": 0.02668857376804859, "learning_rate": 9.609653013541076e-05, "loss": 0.5945, "step": 32280 }, { "epoch": 1.639057760404107, "grad_norm": 0.025910204396227234, "learning_rate": 9.596599193087263e-05, "loss": 0.5555, "step": 32285 }, { "epoch": 1.6393115965021385, "grad_norm": 0.025692241760037943, "learning_rate": 9.583553303619524e-05, "loss": 0.6085, "step": 32290 }, { "epoch": 1.63956543260017, "grad_norm": 0.0283629919753774, "learning_rate": 9.570515347698727e-05, "loss": 0.5807, "step": 32295 }, { "epoch": 1.6398192686982016, "grad_norm": 0.027160386383813104, "learning_rate": 9.557485327884136e-05, "loss": 0.5663, "step": 32300 }, { "epoch": 1.6400731047962331, "grad_norm": 0.027667490591295404, "learning_rate": 9.544463246733503e-05, "loss": 0.5904, "step": 32305 }, { "epoch": 1.6403269408942647, "grad_norm": 0.030015013406518316, "learning_rate": 9.531449106802964e-05, "loss": 0.5857, "step": 32310 }, { "epoch": 1.6405807769922962, "grad_norm": 0.026620132219175977, "learning_rate": 9.518442910647168e-05, "loss": 0.5607, "step": 32315 }, { "epoch": 1.6408346130903275, "grad_norm": 0.025760821493205494, "learning_rate": 9.50544466081913e-05, "loss": 0.555, "step": 32320 }, { "epoch": 1.641088449188359, "grad_norm": 0.02712644115308892, "learning_rate": 9.492454359870379e-05, "loss": 0.5872, "step": 32325 }, { "epoch": 1.6413422852863906, "grad_norm": 0.0270275338047262, "learning_rate": 9.479472010350803e-05, "loss": 0.5716, "step": 32330 }, { "epoch": 1.6415961213844221, "grad_norm": 0.028741124108731596, "learning_rate": 9.466497614808806e-05, "loss": 0.5706, "step": 32335 }, { "epoch": 1.6418499574824534, "grad_norm": 0.026574987891644593, "learning_rate": 9.453531175791191e-05, "loss": 0.5915, "step": 32340 }, { "epoch": 1.642103793580485, "grad_norm": 0.02586400240595415, "learning_rate": 9.440572695843192e-05, "loss": 0.5668, "step": 32345 }, { "epoch": 1.6423576296785165, "grad_norm": 0.02695785520428268, "learning_rate": 9.427622177508521e-05, "loss": 0.5435, "step": 32350 }, { "epoch": 1.642611465776548, "grad_norm": 0.02669182019071189, "learning_rate": 9.414679623329264e-05, "loss": 0.5699, "step": 32355 }, { "epoch": 1.6428653018745796, "grad_norm": 0.028142642636117998, "learning_rate": 9.40174503584601e-05, "loss": 0.6085, "step": 32360 }, { "epoch": 1.643119137972611, "grad_norm": 0.0258031527476143, "learning_rate": 9.388818417597733e-05, "loss": 0.5694, "step": 32365 }, { "epoch": 1.6433729740706426, "grad_norm": 0.026444271481178835, "learning_rate": 9.375899771121888e-05, "loss": 0.5257, "step": 32370 }, { "epoch": 1.6436268101686742, "grad_norm": 0.03409241642859765, "learning_rate": 9.362989098954306e-05, "loss": 0.576, "step": 32375 }, { "epoch": 1.6438806462667057, "grad_norm": 0.029156550224633565, "learning_rate": 9.350086403629326e-05, "loss": 0.5583, "step": 32380 }, { "epoch": 1.6441344823647372, "grad_norm": 0.0267184978988484, "learning_rate": 9.337191687679648e-05, "loss": 0.5632, "step": 32385 }, { "epoch": 1.6443883184627686, "grad_norm": 0.02649208195661815, "learning_rate": 9.324304953636458e-05, "loss": 0.5801, "step": 32390 }, { "epoch": 1.6446421545608, "grad_norm": 0.026907231995997434, "learning_rate": 9.311426204029355e-05, "loss": 0.5683, "step": 32395 }, { "epoch": 1.6448959906588316, "grad_norm": 0.02775651458219463, "learning_rate": 9.298555441386392e-05, "loss": 0.6185, "step": 32400 }, { "epoch": 1.645149826756863, "grad_norm": 0.027591897431136585, "learning_rate": 9.285692668233997e-05, "loss": 0.6009, "step": 32405 }, { "epoch": 1.6454036628548945, "grad_norm": 0.03208421591412264, "learning_rate": 9.272837887097108e-05, "loss": 0.6114, "step": 32410 }, { "epoch": 1.645657498952926, "grad_norm": 0.02741644197817024, "learning_rate": 9.259991100499021e-05, "loss": 0.5836, "step": 32415 }, { "epoch": 1.6459113350509575, "grad_norm": 0.02562698669628972, "learning_rate": 9.247152310961527e-05, "loss": 0.5684, "step": 32420 }, { "epoch": 1.646165171148989, "grad_norm": 0.027617087205243245, "learning_rate": 9.234321521004786e-05, "loss": 0.6047, "step": 32425 }, { "epoch": 1.6464190072470206, "grad_norm": 0.029161936697912293, "learning_rate": 9.221498733147443e-05, "loss": 0.5764, "step": 32430 }, { "epoch": 1.6466728433450522, "grad_norm": 0.02743003180158243, "learning_rate": 9.208683949906526e-05, "loss": 0.5285, "step": 32435 }, { "epoch": 1.6469266794430837, "grad_norm": 0.026962091072161565, "learning_rate": 9.195877173797534e-05, "loss": 0.5741, "step": 32440 }, { "epoch": 1.6471805155411152, "grad_norm": 0.030669639581964236, "learning_rate": 9.18307840733435e-05, "loss": 0.5883, "step": 32445 }, { "epoch": 1.6474343516391468, "grad_norm": 0.02839638429511899, "learning_rate": 9.170287653029325e-05, "loss": 0.5632, "step": 32450 }, { "epoch": 1.647688187737178, "grad_norm": 0.028407171654805503, "learning_rate": 9.157504913393228e-05, "loss": 0.5913, "step": 32455 }, { "epoch": 1.6479420238352096, "grad_norm": 0.028797172495894637, "learning_rate": 9.14473019093522e-05, "loss": 0.5856, "step": 32460 }, { "epoch": 1.6481958599332411, "grad_norm": 0.028354990403850932, "learning_rate": 9.131963488162942e-05, "loss": 0.5756, "step": 32465 }, { "epoch": 1.6484496960312724, "grad_norm": 0.027842138877604505, "learning_rate": 9.119204807582415e-05, "loss": 0.5614, "step": 32470 }, { "epoch": 1.648703532129304, "grad_norm": 0.027133498260771338, "learning_rate": 9.106454151698118e-05, "loss": 0.5774, "step": 32475 }, { "epoch": 1.6489573682273355, "grad_norm": 0.024833288633832253, "learning_rate": 9.093711523012933e-05, "loss": 0.5556, "step": 32480 }, { "epoch": 1.649211204325367, "grad_norm": 0.029376656129581066, "learning_rate": 9.080976924028177e-05, "loss": 0.5599, "step": 32485 }, { "epoch": 1.6494650404233986, "grad_norm": 0.026796321404208765, "learning_rate": 9.068250357243585e-05, "loss": 0.5651, "step": 32490 }, { "epoch": 1.6497188765214301, "grad_norm": 0.026241580152056344, "learning_rate": 9.055531825157332e-05, "loss": 0.5651, "step": 32495 }, { "epoch": 1.6499727126194617, "grad_norm": 0.029764088407712046, "learning_rate": 9.042821330265976e-05, "loss": 0.5846, "step": 32500 }, { "epoch": 1.6502265487174932, "grad_norm": 0.02572220433481158, "learning_rate": 9.030118875064553e-05, "loss": 0.5779, "step": 32505 }, { "epoch": 1.6504803848155247, "grad_norm": 0.027091729231452578, "learning_rate": 9.017424462046453e-05, "loss": 0.5896, "step": 32510 }, { "epoch": 1.6507342209135563, "grad_norm": 0.027259370151736777, "learning_rate": 9.00473809370358e-05, "loss": 0.5666, "step": 32515 }, { "epoch": 1.6509880570115876, "grad_norm": 0.02828760520027945, "learning_rate": 8.992059772526163e-05, "loss": 0.6045, "step": 32520 }, { "epoch": 1.651241893109619, "grad_norm": 0.026705919601215825, "learning_rate": 8.979389501002916e-05, "loss": 0.5606, "step": 32525 }, { "epoch": 1.6514957292076506, "grad_norm": 0.028115350450125056, "learning_rate": 8.966727281620929e-05, "loss": 0.6014, "step": 32530 }, { "epoch": 1.651749565305682, "grad_norm": 0.026050217339294586, "learning_rate": 8.954073116865757e-05, "loss": 0.6106, "step": 32535 }, { "epoch": 1.6520034014037135, "grad_norm": 0.028182651157135582, "learning_rate": 8.941427009221325e-05, "loss": 0.6023, "step": 32540 }, { "epoch": 1.652257237501745, "grad_norm": 0.02748661586083736, "learning_rate": 8.928788961170025e-05, "loss": 0.6082, "step": 32545 }, { "epoch": 1.6525110735997766, "grad_norm": 0.026251868949428738, "learning_rate": 8.916158975192618e-05, "loss": 0.5802, "step": 32550 }, { "epoch": 1.652764909697808, "grad_norm": 0.02662684048321657, "learning_rate": 8.903537053768329e-05, "loss": 0.5631, "step": 32555 }, { "epoch": 1.6530187457958396, "grad_norm": 0.0272988885301939, "learning_rate": 8.890923199374756e-05, "loss": 0.5807, "step": 32560 }, { "epoch": 1.6532725818938712, "grad_norm": 0.030266645362414718, "learning_rate": 8.878317414487964e-05, "loss": 0.581, "step": 32565 }, { "epoch": 1.6535264179919027, "grad_norm": 0.03001657668216681, "learning_rate": 8.865719701582376e-05, "loss": 0.5698, "step": 32570 }, { "epoch": 1.6537802540899342, "grad_norm": 0.026681881891594816, "learning_rate": 8.85313006313087e-05, "loss": 0.5761, "step": 32575 }, { "epoch": 1.6540340901879658, "grad_norm": 0.026121933748324958, "learning_rate": 8.84054850160475e-05, "loss": 0.6002, "step": 32580 }, { "epoch": 1.654287926285997, "grad_norm": 0.02397950017513012, "learning_rate": 8.827975019473688e-05, "loss": 0.545, "step": 32585 }, { "epoch": 1.6545417623840286, "grad_norm": 0.027847558387063345, "learning_rate": 8.815409619205811e-05, "loss": 0.5711, "step": 32590 }, { "epoch": 1.6547955984820601, "grad_norm": 0.026721492529702393, "learning_rate": 8.802852303267634e-05, "loss": 0.5836, "step": 32595 }, { "epoch": 1.6550494345800917, "grad_norm": 0.030431420321309306, "learning_rate": 8.790303074124106e-05, "loss": 0.5632, "step": 32600 }, { "epoch": 1.655303270678123, "grad_norm": 0.027861390334917347, "learning_rate": 8.77776193423856e-05, "loss": 0.5928, "step": 32605 }, { "epoch": 1.6555571067761545, "grad_norm": 0.030549926024472537, "learning_rate": 8.765228886072785e-05, "loss": 0.5754, "step": 32610 }, { "epoch": 1.655810942874186, "grad_norm": 0.028885941074706645, "learning_rate": 8.75270393208693e-05, "loss": 0.5692, "step": 32615 }, { "epoch": 1.6560647789722176, "grad_norm": 0.026405246038291592, "learning_rate": 8.740187074739609e-05, "loss": 0.5403, "step": 32620 }, { "epoch": 1.6563186150702491, "grad_norm": 0.027074128566883657, "learning_rate": 8.727678316487786e-05, "loss": 0.579, "step": 32625 }, { "epoch": 1.6565724511682807, "grad_norm": 0.028516950854033024, "learning_rate": 8.7151776597869e-05, "loss": 0.6298, "step": 32630 }, { "epoch": 1.6568262872663122, "grad_norm": 0.026088297131060298, "learning_rate": 8.702685107090725e-05, "loss": 0.598, "step": 32635 }, { "epoch": 1.6570801233643437, "grad_norm": 0.025798643929602617, "learning_rate": 8.690200660851539e-05, "loss": 0.5804, "step": 32640 }, { "epoch": 1.6573339594623753, "grad_norm": 0.03153198950216478, "learning_rate": 8.677724323519937e-05, "loss": 0.6144, "step": 32645 }, { "epoch": 1.6575877955604068, "grad_norm": 0.02524698938995036, "learning_rate": 8.665256097544994e-05, "loss": 0.5879, "step": 32650 }, { "epoch": 1.6578416316584381, "grad_norm": 0.026466998109776242, "learning_rate": 8.65279598537413e-05, "loss": 0.5938, "step": 32655 }, { "epoch": 1.6580954677564697, "grad_norm": 0.02734792187206278, "learning_rate": 8.640343989453225e-05, "loss": 0.5666, "step": 32660 }, { "epoch": 1.6583493038545012, "grad_norm": 0.029089331845843402, "learning_rate": 8.627900112226522e-05, "loss": 0.6017, "step": 32665 }, { "epoch": 1.6586031399525325, "grad_norm": 0.022921572269339913, "learning_rate": 8.61546435613672e-05, "loss": 0.5691, "step": 32670 }, { "epoch": 1.658856976050564, "grad_norm": 0.024612477731646088, "learning_rate": 8.603036723624868e-05, "loss": 0.583, "step": 32675 }, { "epoch": 1.6591108121485956, "grad_norm": 0.02695622850863616, "learning_rate": 8.590617217130469e-05, "loss": 0.6076, "step": 32680 }, { "epoch": 1.659364648246627, "grad_norm": 0.0301263708947954, "learning_rate": 8.578205839091397e-05, "loss": 0.572, "step": 32685 }, { "epoch": 1.6596184843446586, "grad_norm": 0.029233915243437457, "learning_rate": 8.565802591943955e-05, "loss": 0.5777, "step": 32690 }, { "epoch": 1.6598723204426902, "grad_norm": 0.029293159482813576, "learning_rate": 8.55340747812282e-05, "loss": 0.5777, "step": 32695 }, { "epoch": 1.6601261565407217, "grad_norm": 0.025154237771763878, "learning_rate": 8.541020500061109e-05, "loss": 0.5837, "step": 32700 }, { "epoch": 1.6603799926387532, "grad_norm": 0.02875100840214304, "learning_rate": 8.528641660190323e-05, "loss": 0.611, "step": 32705 }, { "epoch": 1.6606338287367848, "grad_norm": 0.02503393890199444, "learning_rate": 8.516270960940353e-05, "loss": 0.5653, "step": 32710 }, { "epoch": 1.6608876648348163, "grad_norm": 0.026588418135678724, "learning_rate": 8.50390840473953e-05, "loss": 0.5825, "step": 32715 }, { "epoch": 1.6611415009328476, "grad_norm": 0.02625548898512878, "learning_rate": 8.491553994014528e-05, "loss": 0.5193, "step": 32720 }, { "epoch": 1.6613953370308792, "grad_norm": 0.028777430074365316, "learning_rate": 8.479207731190491e-05, "loss": 0.5634, "step": 32725 }, { "epoch": 1.6616491731289107, "grad_norm": 0.030023381038228398, "learning_rate": 8.466869618690898e-05, "loss": 0.5617, "step": 32730 }, { "epoch": 1.661903009226942, "grad_norm": 0.027894428825922014, "learning_rate": 8.454539658937688e-05, "loss": 0.5546, "step": 32735 }, { "epoch": 1.6621568453249735, "grad_norm": 0.025257668942281618, "learning_rate": 8.442217854351142e-05, "loss": 0.5548, "step": 32740 }, { "epoch": 1.662410681423005, "grad_norm": 0.02935806810889225, "learning_rate": 8.429904207349997e-05, "loss": 0.6256, "step": 32745 }, { "epoch": 1.6626645175210366, "grad_norm": 0.026653680054215815, "learning_rate": 8.417598720351333e-05, "loss": 0.553, "step": 32750 }, { "epoch": 1.6629183536190681, "grad_norm": 0.025885771582421047, "learning_rate": 8.40530139577067e-05, "loss": 0.552, "step": 32755 }, { "epoch": 1.6631721897170997, "grad_norm": 0.02908658113374204, "learning_rate": 8.393012236021908e-05, "loss": 0.5682, "step": 32760 }, { "epoch": 1.6634260258151312, "grad_norm": 0.029521887681530655, "learning_rate": 8.380731243517365e-05, "loss": 0.6131, "step": 32765 }, { "epoch": 1.6636798619131627, "grad_norm": 0.026810936827813558, "learning_rate": 8.368458420667707e-05, "loss": 0.5713, "step": 32770 }, { "epoch": 1.6639336980111943, "grad_norm": 0.026429799696405604, "learning_rate": 8.356193769882064e-05, "loss": 0.5839, "step": 32775 }, { "epoch": 1.6641875341092258, "grad_norm": 0.025847466216518154, "learning_rate": 8.343937293567888e-05, "loss": 0.5599, "step": 32780 }, { "epoch": 1.6644413702072571, "grad_norm": 0.06606397309927288, "learning_rate": 8.331688994131098e-05, "loss": 0.5993, "step": 32785 }, { "epoch": 1.6646952063052887, "grad_norm": 0.026585844365823487, "learning_rate": 8.319448873975948e-05, "loss": 0.5616, "step": 32790 }, { "epoch": 1.6649490424033202, "grad_norm": 0.027898443362688093, "learning_rate": 8.307216935505135e-05, "loss": 0.5791, "step": 32795 }, { "epoch": 1.6652028785013515, "grad_norm": 0.028339124156140226, "learning_rate": 8.294993181119703e-05, "loss": 0.6118, "step": 32800 }, { "epoch": 1.665456714599383, "grad_norm": 0.02845049562604933, "learning_rate": 8.282777613219139e-05, "loss": 0.5734, "step": 32805 }, { "epoch": 1.6657105506974146, "grad_norm": 0.027064573462528177, "learning_rate": 8.270570234201274e-05, "loss": 0.5825, "step": 32810 }, { "epoch": 1.6659643867954461, "grad_norm": 0.026984828685037772, "learning_rate": 8.25837104646237e-05, "loss": 0.6115, "step": 32815 }, { "epoch": 1.6662182228934777, "grad_norm": 0.026984059917189218, "learning_rate": 8.246180052397078e-05, "loss": 0.6179, "step": 32820 }, { "epoch": 1.6664720589915092, "grad_norm": 0.025287339238529678, "learning_rate": 8.233997254398401e-05, "loss": 0.5683, "step": 32825 }, { "epoch": 1.6667258950895407, "grad_norm": 0.029862471965077356, "learning_rate": 8.221822654857786e-05, "loss": 0.5584, "step": 32830 }, { "epoch": 1.6669797311875723, "grad_norm": 0.02936264177675743, "learning_rate": 8.209656256165027e-05, "loss": 0.6143, "step": 32835 }, { "epoch": 1.6672335672856038, "grad_norm": 0.02708936134097619, "learning_rate": 8.197498060708347e-05, "loss": 0.5792, "step": 32840 }, { "epoch": 1.6674874033836353, "grad_norm": 0.029166901102336173, "learning_rate": 8.185348070874316e-05, "loss": 0.5733, "step": 32845 }, { "epoch": 1.6677412394816666, "grad_norm": 0.02486256452620972, "learning_rate": 8.173206289047947e-05, "loss": 0.5837, "step": 32850 }, { "epoch": 1.6679950755796982, "grad_norm": 0.02781316721810526, "learning_rate": 8.161072717612578e-05, "loss": 0.5796, "step": 32855 }, { "epoch": 1.6682489116777297, "grad_norm": 0.027266566463259558, "learning_rate": 8.148947358949992e-05, "loss": 0.5817, "step": 32860 }, { "epoch": 1.6685027477757612, "grad_norm": 0.030102548566395656, "learning_rate": 8.136830215440322e-05, "loss": 0.5946, "step": 32865 }, { "epoch": 1.6687565838737926, "grad_norm": 0.025938663512735268, "learning_rate": 8.124721289462122e-05, "loss": 0.5592, "step": 32870 }, { "epoch": 1.669010419971824, "grad_norm": 0.028264980764618, "learning_rate": 8.112620583392272e-05, "loss": 0.5808, "step": 32875 }, { "epoch": 1.6692642560698556, "grad_norm": 0.02705223298920203, "learning_rate": 8.100528099606135e-05, "loss": 0.5858, "step": 32880 }, { "epoch": 1.6695180921678872, "grad_norm": 0.026964229042886298, "learning_rate": 8.088443840477371e-05, "loss": 0.5704, "step": 32885 }, { "epoch": 1.6697719282659187, "grad_norm": 0.027120936842749646, "learning_rate": 8.076367808378083e-05, "loss": 0.5817, "step": 32890 }, { "epoch": 1.6700257643639502, "grad_norm": 0.028015507474832148, "learning_rate": 8.064300005678705e-05, "loss": 0.6351, "step": 32895 }, { "epoch": 1.6702796004619818, "grad_norm": 0.02648598601643453, "learning_rate": 8.052240434748114e-05, "loss": 0.5754, "step": 32900 }, { "epoch": 1.6705334365600133, "grad_norm": 0.03962514034821299, "learning_rate": 8.04018909795352e-05, "loss": 0.5601, "step": 32905 }, { "epoch": 1.6707872726580448, "grad_norm": 0.027512826342349996, "learning_rate": 8.028145997660569e-05, "loss": 0.6075, "step": 32910 }, { "epoch": 1.6710411087560764, "grad_norm": 0.028636379319664392, "learning_rate": 8.016111136233229e-05, "loss": 0.5925, "step": 32915 }, { "epoch": 1.6712949448541077, "grad_norm": 0.026031651326726083, "learning_rate": 8.00408451603391e-05, "loss": 0.5948, "step": 32920 }, { "epoch": 1.6715487809521392, "grad_norm": 0.026105209581651904, "learning_rate": 7.992066139423359e-05, "loss": 0.5743, "step": 32925 }, { "epoch": 1.6718026170501707, "grad_norm": 0.02693232635447708, "learning_rate": 7.980056008760744e-05, "loss": 0.589, "step": 32930 }, { "epoch": 1.672056453148202, "grad_norm": 0.027947212862748778, "learning_rate": 7.968054126403568e-05, "loss": 0.5592, "step": 32935 }, { "epoch": 1.6723102892462336, "grad_norm": 0.025120453555736715, "learning_rate": 7.956060494707757e-05, "loss": 0.5719, "step": 32940 }, { "epoch": 1.6725641253442651, "grad_norm": 0.029025695236304713, "learning_rate": 7.944075116027604e-05, "loss": 0.5862, "step": 32945 }, { "epoch": 1.6728179614422967, "grad_norm": 0.026492223230136145, "learning_rate": 7.93209799271577e-05, "loss": 0.5772, "step": 32950 }, { "epoch": 1.6730717975403282, "grad_norm": 0.026900098716815736, "learning_rate": 7.920129127123316e-05, "loss": 0.566, "step": 32955 }, { "epoch": 1.6733256336383597, "grad_norm": 0.024238878543892152, "learning_rate": 7.908168521599646e-05, "loss": 0.5735, "step": 32960 }, { "epoch": 1.6735794697363913, "grad_norm": 0.03179700135650423, "learning_rate": 7.896216178492599e-05, "loss": 0.5934, "step": 32965 }, { "epoch": 1.6738333058344228, "grad_norm": 0.025936350316363904, "learning_rate": 7.884272100148332e-05, "loss": 0.6036, "step": 32970 }, { "epoch": 1.6740871419324543, "grad_norm": 0.04326668230474214, "learning_rate": 7.872336288911436e-05, "loss": 0.5767, "step": 32975 }, { "epoch": 1.6743409780304859, "grad_norm": 0.026808609159941793, "learning_rate": 7.86040874712482e-05, "loss": 0.5594, "step": 32980 }, { "epoch": 1.6745948141285172, "grad_norm": 0.027095564586580457, "learning_rate": 7.848489477129828e-05, "loss": 0.5887, "step": 32985 }, { "epoch": 1.6748486502265487, "grad_norm": 0.02617072190851564, "learning_rate": 7.836578481266132e-05, "loss": 0.5756, "step": 32990 }, { "epoch": 1.6751024863245803, "grad_norm": 0.026298501828785043, "learning_rate": 7.824675761871814e-05, "loss": 0.6011, "step": 32995 }, { "epoch": 1.6753563224226116, "grad_norm": 0.025922805080192554, "learning_rate": 7.812781321283319e-05, "loss": 0.5768, "step": 33000 }, { "epoch": 1.675610158520643, "grad_norm": 0.028500287262673094, "learning_rate": 7.800895161835469e-05, "loss": 0.6228, "step": 33005 }, { "epoch": 1.6758639946186746, "grad_norm": 0.02727947937793367, "learning_rate": 7.789017285861439e-05, "loss": 0.5959, "step": 33010 }, { "epoch": 1.6761178307167062, "grad_norm": 0.02848131863202182, "learning_rate": 7.777147695692827e-05, "loss": 0.5908, "step": 33015 }, { "epoch": 1.6763716668147377, "grad_norm": 0.03062432408025501, "learning_rate": 7.765286393659543e-05, "loss": 0.5494, "step": 33020 }, { "epoch": 1.6766255029127692, "grad_norm": 0.02651704874936649, "learning_rate": 7.75343338208993e-05, "loss": 0.5848, "step": 33025 }, { "epoch": 1.6768793390108008, "grad_norm": 0.02668678874976483, "learning_rate": 7.741588663310644e-05, "loss": 0.5723, "step": 33030 }, { "epoch": 1.6771331751088323, "grad_norm": 0.02827373368410595, "learning_rate": 7.729752239646776e-05, "loss": 0.569, "step": 33035 }, { "epoch": 1.6773870112068638, "grad_norm": 0.02721453494496366, "learning_rate": 7.717924113421732e-05, "loss": 0.5668, "step": 33040 }, { "epoch": 1.6776408473048954, "grad_norm": 0.02695061333735442, "learning_rate": 7.706104286957333e-05, "loss": 0.5729, "step": 33045 }, { "epoch": 1.6778946834029267, "grad_norm": 0.02832058803987468, "learning_rate": 7.694292762573729e-05, "loss": 0.5812, "step": 33050 }, { "epoch": 1.6781485195009582, "grad_norm": 0.02732855864256778, "learning_rate": 7.682489542589483e-05, "loss": 0.6312, "step": 33055 }, { "epoch": 1.6784023555989898, "grad_norm": 0.027087251422783797, "learning_rate": 7.670694629321511e-05, "loss": 0.5932, "step": 33060 }, { "epoch": 1.678656191697021, "grad_norm": 0.02613952819353669, "learning_rate": 7.658908025085076e-05, "loss": 0.5823, "step": 33065 }, { "epoch": 1.6789100277950526, "grad_norm": 0.025772391670387036, "learning_rate": 7.647129732193859e-05, "loss": 0.54, "step": 33070 }, { "epoch": 1.6791638638930841, "grad_norm": 0.032209099243505325, "learning_rate": 7.635359752959841e-05, "loss": 0.6073, "step": 33075 }, { "epoch": 1.6794176999911157, "grad_norm": 0.030109970613569983, "learning_rate": 7.623598089693446e-05, "loss": 0.6044, "step": 33080 }, { "epoch": 1.6796715360891472, "grad_norm": 0.02815874816992607, "learning_rate": 7.611844744703406e-05, "loss": 0.5848, "step": 33085 }, { "epoch": 1.6799253721871787, "grad_norm": 0.02743319559460864, "learning_rate": 7.600099720296866e-05, "loss": 0.5967, "step": 33090 }, { "epoch": 1.6801792082852103, "grad_norm": 0.027990854330499405, "learning_rate": 7.588363018779288e-05, "loss": 0.5868, "step": 33095 }, { "epoch": 1.6804330443832418, "grad_norm": 0.026535198641073, "learning_rate": 7.576634642454555e-05, "loss": 0.5784, "step": 33100 }, { "epoch": 1.6806868804812733, "grad_norm": 0.02754288385199119, "learning_rate": 7.564914593624866e-05, "loss": 0.5973, "step": 33105 }, { "epoch": 1.6809407165793049, "grad_norm": 0.028603823949667553, "learning_rate": 7.553202874590825e-05, "loss": 0.5623, "step": 33110 }, { "epoch": 1.6811945526773362, "grad_norm": 0.02694636863993202, "learning_rate": 7.54149948765136e-05, "loss": 0.5716, "step": 33115 }, { "epoch": 1.6814483887753677, "grad_norm": 0.02459017158033073, "learning_rate": 7.529804435103831e-05, "loss": 0.585, "step": 33120 }, { "epoch": 1.6817022248733993, "grad_norm": 0.02781100967233465, "learning_rate": 7.518117719243878e-05, "loss": 0.5842, "step": 33125 }, { "epoch": 1.6819560609714308, "grad_norm": 0.029471772880662696, "learning_rate": 7.506439342365573e-05, "loss": 0.6044, "step": 33130 }, { "epoch": 1.6822098970694621, "grad_norm": 0.027876816558934443, "learning_rate": 7.494769306761296e-05, "loss": 0.5722, "step": 33135 }, { "epoch": 1.6824637331674936, "grad_norm": 0.028154819610902757, "learning_rate": 7.483107614721846e-05, "loss": 0.5975, "step": 33140 }, { "epoch": 1.6827175692655252, "grad_norm": 0.02781430840906716, "learning_rate": 7.471454268536338e-05, "loss": 0.5718, "step": 33145 }, { "epoch": 1.6829714053635567, "grad_norm": 0.02877470531174321, "learning_rate": 7.459809270492252e-05, "loss": 0.5682, "step": 33150 }, { "epoch": 1.6832252414615883, "grad_norm": 0.02569336797153326, "learning_rate": 7.448172622875477e-05, "loss": 0.5854, "step": 33155 }, { "epoch": 1.6834790775596198, "grad_norm": 0.027886964239361794, "learning_rate": 7.436544327970191e-05, "loss": 0.5585, "step": 33160 }, { "epoch": 1.6837329136576513, "grad_norm": 0.030367582058535198, "learning_rate": 7.424924388059007e-05, "loss": 0.5904, "step": 33165 }, { "epoch": 1.6839867497556829, "grad_norm": 0.028745692778811414, "learning_rate": 7.413312805422834e-05, "loss": 0.5893, "step": 33170 }, { "epoch": 1.6842405858537144, "grad_norm": 0.02956217587831845, "learning_rate": 7.40170958234097e-05, "loss": 0.5852, "step": 33175 }, { "epoch": 1.684494421951746, "grad_norm": 0.031970607645328664, "learning_rate": 7.390114721091084e-05, "loss": 0.5603, "step": 33180 }, { "epoch": 1.6847482580497772, "grad_norm": 0.02807178054220606, "learning_rate": 7.378528223949194e-05, "loss": 0.6397, "step": 33185 }, { "epoch": 1.6850020941478088, "grad_norm": 0.02526609396210578, "learning_rate": 7.366950093189651e-05, "loss": 0.559, "step": 33190 }, { "epoch": 1.6852559302458403, "grad_norm": 0.025391993116590284, "learning_rate": 7.355380331085205e-05, "loss": 0.5972, "step": 33195 }, { "epoch": 1.6855097663438716, "grad_norm": 0.02652335380517712, "learning_rate": 7.343818939906915e-05, "loss": 0.5709, "step": 33200 }, { "epoch": 1.6857636024419032, "grad_norm": 0.02712595645060889, "learning_rate": 7.332265921924258e-05, "loss": 0.62, "step": 33205 }, { "epoch": 1.6860174385399347, "grad_norm": 0.025793911854951206, "learning_rate": 7.320721279405002e-05, "loss": 0.582, "step": 33210 }, { "epoch": 1.6862712746379662, "grad_norm": 0.027713051475045963, "learning_rate": 7.309185014615333e-05, "loss": 0.5659, "step": 33215 }, { "epoch": 1.6865251107359978, "grad_norm": 0.02398266826337735, "learning_rate": 7.29765712981973e-05, "loss": 0.5515, "step": 33220 }, { "epoch": 1.6867789468340293, "grad_norm": 0.024346741073865915, "learning_rate": 7.286137627281092e-05, "loss": 0.5574, "step": 33225 }, { "epoch": 1.6870327829320608, "grad_norm": 0.027771142699589836, "learning_rate": 7.274626509260612e-05, "loss": 0.6063, "step": 33230 }, { "epoch": 1.6872866190300924, "grad_norm": 0.027903620736017636, "learning_rate": 7.263123778017877e-05, "loss": 0.5747, "step": 33235 }, { "epoch": 1.687540455128124, "grad_norm": 0.027535405315954154, "learning_rate": 7.251629435810825e-05, "loss": 0.5834, "step": 33240 }, { "epoch": 1.6877942912261554, "grad_norm": 0.026042975269835098, "learning_rate": 7.240143484895718e-05, "loss": 0.5679, "step": 33245 }, { "epoch": 1.6880481273241867, "grad_norm": 0.025959306891259342, "learning_rate": 7.228665927527217e-05, "loss": 0.5917, "step": 33250 }, { "epoch": 1.6883019634222183, "grad_norm": 0.026445266603781905, "learning_rate": 7.217196765958278e-05, "loss": 0.5699, "step": 33255 }, { "epoch": 1.6885557995202498, "grad_norm": 0.027814206245422943, "learning_rate": 7.205736002440272e-05, "loss": 0.5524, "step": 33260 }, { "epoch": 1.6888096356182811, "grad_norm": 0.028827441775674603, "learning_rate": 7.19428363922286e-05, "loss": 0.5737, "step": 33265 }, { "epoch": 1.6890634717163127, "grad_norm": 0.02720648620773936, "learning_rate": 7.18283967855411e-05, "loss": 0.5959, "step": 33270 }, { "epoch": 1.6893173078143442, "grad_norm": 0.02662856360981668, "learning_rate": 7.171404122680391e-05, "loss": 0.5672, "step": 33275 }, { "epoch": 1.6895711439123757, "grad_norm": 0.029221736918608132, "learning_rate": 7.159976973846466e-05, "loss": 0.5876, "step": 33280 }, { "epoch": 1.6898249800104073, "grad_norm": 0.02829880034544521, "learning_rate": 7.14855823429541e-05, "loss": 0.5851, "step": 33285 }, { "epoch": 1.6900788161084388, "grad_norm": 0.02547746246020009, "learning_rate": 7.137147906268682e-05, "loss": 0.577, "step": 33290 }, { "epoch": 1.6903326522064703, "grad_norm": 0.026137064529774944, "learning_rate": 7.125745992006044e-05, "loss": 0.5834, "step": 33295 }, { "epoch": 1.6905864883045019, "grad_norm": 0.027855864677479535, "learning_rate": 7.114352493745674e-05, "loss": 0.5935, "step": 33300 }, { "epoch": 1.6908403244025334, "grad_norm": 0.028466576137303166, "learning_rate": 7.102967413724027e-05, "loss": 0.5964, "step": 33305 }, { "epoch": 1.691094160500565, "grad_norm": 0.025529513616761475, "learning_rate": 7.091590754175963e-05, "loss": 0.5467, "step": 33310 }, { "epoch": 1.6913479965985962, "grad_norm": 0.027272937465584887, "learning_rate": 7.080222517334639e-05, "loss": 0.5296, "step": 33315 }, { "epoch": 1.6916018326966278, "grad_norm": 0.02446926985263996, "learning_rate": 7.068862705431601e-05, "loss": 0.5909, "step": 33320 }, { "epoch": 1.6918556687946593, "grad_norm": 0.03148188116913504, "learning_rate": 7.057511320696708e-05, "loss": 0.5589, "step": 33325 }, { "epoch": 1.6921095048926906, "grad_norm": 0.025931269660345174, "learning_rate": 7.046168365358202e-05, "loss": 0.53, "step": 33330 }, { "epoch": 1.6923633409907222, "grad_norm": 0.025413494224212792, "learning_rate": 7.034833841642624e-05, "loss": 0.6063, "step": 33335 }, { "epoch": 1.6926171770887537, "grad_norm": 0.024732853512729188, "learning_rate": 7.023507751774905e-05, "loss": 0.5443, "step": 33340 }, { "epoch": 1.6928710131867852, "grad_norm": 0.025380999340214063, "learning_rate": 7.012190097978282e-05, "loss": 0.5738, "step": 33345 }, { "epoch": 1.6931248492848168, "grad_norm": 0.027719257714946346, "learning_rate": 7.000880882474375e-05, "loss": 0.5592, "step": 33350 }, { "epoch": 1.6933786853828483, "grad_norm": 0.026778996594897072, "learning_rate": 6.989580107483102e-05, "loss": 0.5866, "step": 33355 }, { "epoch": 1.6936325214808798, "grad_norm": 0.029829154112451513, "learning_rate": 6.978287775222758e-05, "loss": 0.5933, "step": 33360 }, { "epoch": 1.6938863575789114, "grad_norm": 0.029718544256822, "learning_rate": 6.967003887909989e-05, "loss": 0.5393, "step": 33365 }, { "epoch": 1.694140193676943, "grad_norm": 0.026137176624428812, "learning_rate": 6.95572844775974e-05, "loss": 0.564, "step": 33370 }, { "epoch": 1.6943940297749744, "grad_norm": 0.027883579997098156, "learning_rate": 6.944461456985346e-05, "loss": 0.587, "step": 33375 }, { "epoch": 1.6946478658730058, "grad_norm": 0.029894280823157867, "learning_rate": 6.933202917798443e-05, "loss": 0.5317, "step": 33380 }, { "epoch": 1.6949017019710373, "grad_norm": 0.03301224893806321, "learning_rate": 6.92195283240904e-05, "loss": 0.5717, "step": 33385 }, { "epoch": 1.6951555380690688, "grad_norm": 0.02603611188874925, "learning_rate": 6.910711203025455e-05, "loss": 0.5784, "step": 33390 }, { "epoch": 1.6954093741671004, "grad_norm": 0.030924006328101144, "learning_rate": 6.89947803185439e-05, "loss": 0.6081, "step": 33395 }, { "epoch": 1.6956632102651317, "grad_norm": 0.027362242773752817, "learning_rate": 6.888253321100829e-05, "loss": 0.594, "step": 33400 }, { "epoch": 1.6959170463631632, "grad_norm": 0.026043968511349345, "learning_rate": 6.877037072968157e-05, "loss": 0.5346, "step": 33405 }, { "epoch": 1.6961708824611947, "grad_norm": 0.026553387585261205, "learning_rate": 6.865829289658044e-05, "loss": 0.6074, "step": 33410 }, { "epoch": 1.6964247185592263, "grad_norm": 0.02524184844031332, "learning_rate": 6.85462997337053e-05, "loss": 0.5797, "step": 33415 }, { "epoch": 1.6966785546572578, "grad_norm": 0.02855677835323308, "learning_rate": 6.843439126303985e-05, "loss": 0.5744, "step": 33420 }, { "epoch": 1.6969323907552893, "grad_norm": 0.025080851454048727, "learning_rate": 6.83225675065513e-05, "loss": 0.547, "step": 33425 }, { "epoch": 1.6971862268533209, "grad_norm": 0.026797297148716558, "learning_rate": 6.821082848618988e-05, "loss": 0.5907, "step": 33430 }, { "epoch": 1.6974400629513524, "grad_norm": 0.03415668249806721, "learning_rate": 6.809917422388961e-05, "loss": 0.5947, "step": 33435 }, { "epoch": 1.697693899049384, "grad_norm": 0.028246245005384932, "learning_rate": 6.798760474156745e-05, "loss": 0.6159, "step": 33440 }, { "epoch": 1.6979477351474153, "grad_norm": 0.026143850365324267, "learning_rate": 6.787612006112409e-05, "loss": 0.5993, "step": 33445 }, { "epoch": 1.6982015712454468, "grad_norm": 0.026294871276885658, "learning_rate": 6.77647202044433e-05, "loss": 0.5537, "step": 33450 }, { "epoch": 1.6984554073434783, "grad_norm": 0.026353543364882725, "learning_rate": 6.765340519339252e-05, "loss": 0.5841, "step": 33455 }, { "epoch": 1.6987092434415099, "grad_norm": 0.02783445111971958, "learning_rate": 6.754217504982202e-05, "loss": 0.5688, "step": 33460 }, { "epoch": 1.6989630795395412, "grad_norm": 0.025647863749220626, "learning_rate": 6.743102979556604e-05, "loss": 0.5862, "step": 33465 }, { "epoch": 1.6992169156375727, "grad_norm": 0.027650276028216552, "learning_rate": 6.731996945244162e-05, "loss": 0.5568, "step": 33470 }, { "epoch": 1.6994707517356042, "grad_norm": 0.029498959398639054, "learning_rate": 6.720899404224934e-05, "loss": 0.5671, "step": 33475 }, { "epoch": 1.6997245878336358, "grad_norm": 0.024918039334963383, "learning_rate": 6.709810358677337e-05, "loss": 0.5499, "step": 33480 }, { "epoch": 1.6999784239316673, "grad_norm": 0.023226931645609118, "learning_rate": 6.698729810778065e-05, "loss": 0.5487, "step": 33485 }, { "epoch": 1.7002322600296988, "grad_norm": 0.027361569530506277, "learning_rate": 6.687657762702203e-05, "loss": 0.5795, "step": 33490 }, { "epoch": 1.7004860961277304, "grad_norm": 0.028181182240598925, "learning_rate": 6.67659421662311e-05, "loss": 0.5911, "step": 33495 }, { "epoch": 1.700739932225762, "grad_norm": 0.02605402942723535, "learning_rate": 6.665539174712532e-05, "loss": 0.5827, "step": 33500 }, { "epoch": 1.7009937683237935, "grad_norm": 0.025388486485803014, "learning_rate": 6.654492639140492e-05, "loss": 0.5598, "step": 33505 }, { "epoch": 1.701247604421825, "grad_norm": 0.028642760916963956, "learning_rate": 6.643454612075395e-05, "loss": 0.5966, "step": 33510 }, { "epoch": 1.7015014405198563, "grad_norm": 0.02971253183612174, "learning_rate": 6.632425095683925e-05, "loss": 0.5159, "step": 33515 }, { "epoch": 1.7017552766178878, "grad_norm": 0.026343807242782243, "learning_rate": 6.62140409213115e-05, "loss": 0.5805, "step": 33520 }, { "epoch": 1.7020091127159194, "grad_norm": 0.02732168306792421, "learning_rate": 6.610391603580412e-05, "loss": 0.5635, "step": 33525 }, { "epoch": 1.7022629488139507, "grad_norm": 0.02784887799802656, "learning_rate": 6.599387632193426e-05, "loss": 0.5823, "step": 33530 }, { "epoch": 1.7025167849119822, "grad_norm": 0.027514501741754065, "learning_rate": 6.588392180130198e-05, "loss": 0.6076, "step": 33535 }, { "epoch": 1.7027706210100138, "grad_norm": 0.039486895130747685, "learning_rate": 6.577405249549096e-05, "loss": 0.5648, "step": 33540 }, { "epoch": 1.7030244571080453, "grad_norm": 0.026268225877584418, "learning_rate": 6.566426842606793e-05, "loss": 0.5803, "step": 33545 }, { "epoch": 1.7032782932060768, "grad_norm": 0.0296354847729687, "learning_rate": 6.555456961458311e-05, "loss": 0.5903, "step": 33550 }, { "epoch": 1.7035321293041084, "grad_norm": 0.029214127391116108, "learning_rate": 6.544495608256957e-05, "loss": 0.5745, "step": 33555 }, { "epoch": 1.70378596540214, "grad_norm": 0.027702701942345618, "learning_rate": 6.533542785154412e-05, "loss": 0.6018, "step": 33560 }, { "epoch": 1.7040398015001714, "grad_norm": 0.026591706234840407, "learning_rate": 6.522598494300647e-05, "loss": 0.5708, "step": 33565 }, { "epoch": 1.704293637598203, "grad_norm": 0.02676978787345021, "learning_rate": 6.511662737843981e-05, "loss": 0.5952, "step": 33570 }, { "epoch": 1.7045474736962345, "grad_norm": 0.027770238317527486, "learning_rate": 6.500735517931033e-05, "loss": 0.5697, "step": 33575 }, { "epoch": 1.7048013097942658, "grad_norm": 0.027472192611621437, "learning_rate": 6.489816836706786e-05, "loss": 0.5419, "step": 33580 }, { "epoch": 1.7050551458922973, "grad_norm": 0.02626522368113404, "learning_rate": 6.478906696314496e-05, "loss": 0.5749, "step": 33585 }, { "epoch": 1.7053089819903289, "grad_norm": 0.026927575781493903, "learning_rate": 6.468005098895797e-05, "loss": 0.5763, "step": 33590 }, { "epoch": 1.7055628180883602, "grad_norm": 0.02551296112860592, "learning_rate": 6.457112046590585e-05, "loss": 0.6207, "step": 33595 }, { "epoch": 1.7058166541863917, "grad_norm": 0.02587510494772804, "learning_rate": 6.446227541537136e-05, "loss": 0.5743, "step": 33600 }, { "epoch": 1.7060704902844233, "grad_norm": 0.027633422032046783, "learning_rate": 6.43535158587203e-05, "loss": 0.597, "step": 33605 }, { "epoch": 1.7063243263824548, "grad_norm": 0.03294392247787146, "learning_rate": 6.424484181730134e-05, "loss": 0.5722, "step": 33610 }, { "epoch": 1.7065781624804863, "grad_norm": 0.027265032753798872, "learning_rate": 6.413625331244698e-05, "loss": 0.5605, "step": 33615 }, { "epoch": 1.7068319985785179, "grad_norm": 0.03045778112381498, "learning_rate": 6.402775036547231e-05, "loss": 0.5898, "step": 33620 }, { "epoch": 1.7070858346765494, "grad_norm": 0.026224042350299293, "learning_rate": 6.391933299767622e-05, "loss": 0.5813, "step": 33625 }, { "epoch": 1.707339670774581, "grad_norm": 0.024975888773289774, "learning_rate": 6.381100123034017e-05, "loss": 0.5712, "step": 33630 }, { "epoch": 1.7075935068726125, "grad_norm": 0.025527894902524603, "learning_rate": 6.370275508472945e-05, "loss": 0.5462, "step": 33635 }, { "epoch": 1.707847342970644, "grad_norm": 0.025600422521629158, "learning_rate": 6.359459458209194e-05, "loss": 0.5716, "step": 33640 }, { "epoch": 1.7081011790686753, "grad_norm": 0.02559886902869727, "learning_rate": 6.348651974365932e-05, "loss": 0.5455, "step": 33645 }, { "epoch": 1.7083550151667068, "grad_norm": 0.02610299906004576, "learning_rate": 6.337853059064586e-05, "loss": 0.5682, "step": 33650 }, { "epoch": 1.7086088512647384, "grad_norm": 0.026383072730500605, "learning_rate": 6.327062714424946e-05, "loss": 0.608, "step": 33655 }, { "epoch": 1.7088626873627697, "grad_norm": 0.027274039841103468, "learning_rate": 6.31628094256509e-05, "loss": 0.5943, "step": 33660 }, { "epoch": 1.7091165234608012, "grad_norm": 0.02507753468642289, "learning_rate": 6.305507745601446e-05, "loss": 0.5766, "step": 33665 }, { "epoch": 1.7093703595588328, "grad_norm": 0.026202702242565337, "learning_rate": 6.294743125648722e-05, "loss": 0.5625, "step": 33670 }, { "epoch": 1.7096241956568643, "grad_norm": 0.02726835013736652, "learning_rate": 6.28398708481997e-05, "loss": 0.59, "step": 33675 }, { "epoch": 1.7098780317548958, "grad_norm": 0.025507436456101824, "learning_rate": 6.273239625226534e-05, "loss": 0.5621, "step": 33680 }, { "epoch": 1.7101318678529274, "grad_norm": 0.026108076042087782, "learning_rate": 6.262500748978106e-05, "loss": 0.5737, "step": 33685 }, { "epoch": 1.710385703950959, "grad_norm": 0.028141845144459277, "learning_rate": 6.251770458182654e-05, "loss": 0.5425, "step": 33690 }, { "epoch": 1.7106395400489904, "grad_norm": 0.026996605507224856, "learning_rate": 6.241048754946493e-05, "loss": 0.5745, "step": 33695 }, { "epoch": 1.710893376147022, "grad_norm": 0.027268504760089466, "learning_rate": 6.23033564137423e-05, "loss": 0.5841, "step": 33700 }, { "epoch": 1.7111472122450535, "grad_norm": 0.028129934829398143, "learning_rate": 6.219631119568814e-05, "loss": 0.6141, "step": 33705 }, { "epoch": 1.7114010483430848, "grad_norm": 0.027267924341359787, "learning_rate": 6.208935191631465e-05, "loss": 0.561, "step": 33710 }, { "epoch": 1.7116548844411164, "grad_norm": 0.02633191685833144, "learning_rate": 6.19824785966176e-05, "loss": 0.6009, "step": 33715 }, { "epoch": 1.7119087205391479, "grad_norm": 0.024622651235337842, "learning_rate": 6.187569125757553e-05, "loss": 0.5618, "step": 33720 }, { "epoch": 1.7121625566371794, "grad_norm": 0.028834483224229226, "learning_rate": 6.176898992015034e-05, "loss": 0.559, "step": 33725 }, { "epoch": 1.7124163927352107, "grad_norm": 0.024965633182169458, "learning_rate": 6.166237460528706e-05, "loss": 0.5789, "step": 33730 }, { "epoch": 1.7126702288332423, "grad_norm": 0.024755786085603117, "learning_rate": 6.155584533391356e-05, "loss": 0.5677, "step": 33735 }, { "epoch": 1.7129240649312738, "grad_norm": 0.027168295181973848, "learning_rate": 6.144940212694122e-05, "loss": 0.5744, "step": 33740 }, { "epoch": 1.7131779010293053, "grad_norm": 0.02682789531319901, "learning_rate": 6.134304500526411e-05, "loss": 0.6138, "step": 33745 }, { "epoch": 1.7134317371273369, "grad_norm": 0.029139932517480354, "learning_rate": 6.123677398975974e-05, "loss": 0.5681, "step": 33750 }, { "epoch": 1.7136855732253684, "grad_norm": 0.02795458664504997, "learning_rate": 6.11305891012885e-05, "loss": 0.5614, "step": 33755 }, { "epoch": 1.7139394093234, "grad_norm": 0.026587242001386936, "learning_rate": 6.1024490360694016e-05, "loss": 0.5573, "step": 33760 }, { "epoch": 1.7141932454214315, "grad_norm": 0.028511112377845177, "learning_rate": 6.091847778880283e-05, "loss": 0.5773, "step": 33765 }, { "epoch": 1.714447081519463, "grad_norm": 0.027687542649870105, "learning_rate": 6.081255140642483e-05, "loss": 0.5859, "step": 33770 }, { "epoch": 1.7147009176174945, "grad_norm": 0.028029852397005516, "learning_rate": 6.0706711234352674e-05, "loss": 0.5757, "step": 33775 }, { "epoch": 1.7149547537155259, "grad_norm": 0.028831617641918498, "learning_rate": 6.06009572933624e-05, "loss": 0.6062, "step": 33780 }, { "epoch": 1.7152085898135574, "grad_norm": 0.03449059988683908, "learning_rate": 6.0495289604212853e-05, "loss": 0.5727, "step": 33785 }, { "epoch": 1.715462425911589, "grad_norm": 0.026376235786312305, "learning_rate": 6.038970818764633e-05, "loss": 0.5674, "step": 33790 }, { "epoch": 1.7157162620096202, "grad_norm": 0.029473005599942297, "learning_rate": 6.0284213064387586e-05, "loss": 0.5921, "step": 33795 }, { "epoch": 1.7159700981076518, "grad_norm": 0.030593195880179138, "learning_rate": 6.0178804255145106e-05, "loss": 0.5922, "step": 33800 }, { "epoch": 1.7162239342056833, "grad_norm": 0.023656347231777935, "learning_rate": 6.007348178060984e-05, "loss": 0.5906, "step": 33805 }, { "epoch": 1.7164777703037148, "grad_norm": 0.02715366510320437, "learning_rate": 5.996824566145631e-05, "loss": 0.576, "step": 33810 }, { "epoch": 1.7167316064017464, "grad_norm": 0.027233628985368723, "learning_rate": 5.98630959183416e-05, "loss": 0.5751, "step": 33815 }, { "epoch": 1.716985442499778, "grad_norm": 0.029778273865873933, "learning_rate": 5.975803257190632e-05, "loss": 0.5633, "step": 33820 }, { "epoch": 1.7172392785978094, "grad_norm": 0.02763109528758151, "learning_rate": 5.965305564277368e-05, "loss": 0.5414, "step": 33825 }, { "epoch": 1.717493114695841, "grad_norm": 0.02631737060468398, "learning_rate": 5.954816515155026e-05, "loss": 0.6009, "step": 33830 }, { "epoch": 1.7177469507938725, "grad_norm": 0.02518104673668617, "learning_rate": 5.944336111882542e-05, "loss": 0.5641, "step": 33835 }, { "epoch": 1.718000786891904, "grad_norm": 0.025913591904343853, "learning_rate": 5.933864356517177e-05, "loss": 0.5437, "step": 33840 }, { "epoch": 1.7182546229899354, "grad_norm": 0.027234721122653863, "learning_rate": 5.923401251114485e-05, "loss": 0.5918, "step": 33845 }, { "epoch": 1.718508459087967, "grad_norm": 0.02710648705434675, "learning_rate": 5.9129467977283135e-05, "loss": 0.5584, "step": 33850 }, { "epoch": 1.7187622951859984, "grad_norm": 0.02585856256434335, "learning_rate": 5.902500998410831e-05, "loss": 0.5455, "step": 33855 }, { "epoch": 1.7190161312840297, "grad_norm": 0.024568650608249726, "learning_rate": 5.892063855212476e-05, "loss": 0.5716, "step": 33860 }, { "epoch": 1.7192699673820613, "grad_norm": 0.02605778898093949, "learning_rate": 5.881635370182037e-05, "loss": 0.5896, "step": 33865 }, { "epoch": 1.7195238034800928, "grad_norm": 0.02483687841523801, "learning_rate": 5.8712155453665426e-05, "loss": 0.5656, "step": 33870 }, { "epoch": 1.7197776395781244, "grad_norm": 0.026531455932514127, "learning_rate": 5.8608043828113744e-05, "loss": 0.5651, "step": 33875 }, { "epoch": 1.7200314756761559, "grad_norm": 0.024478270953701278, "learning_rate": 5.8504018845601804e-05, "loss": 0.5785, "step": 33880 }, { "epoch": 1.7202853117741874, "grad_norm": 0.02830466144351759, "learning_rate": 5.840008052654927e-05, "loss": 0.5737, "step": 33885 }, { "epoch": 1.720539147872219, "grad_norm": 0.026887090266828503, "learning_rate": 5.8296228891358604e-05, "loss": 0.5787, "step": 33890 }, { "epoch": 1.7207929839702505, "grad_norm": 0.02748309979395072, "learning_rate": 5.81924639604155e-05, "loss": 0.567, "step": 33895 }, { "epoch": 1.721046820068282, "grad_norm": 0.027559886571015685, "learning_rate": 5.808878575408827e-05, "loss": 0.5608, "step": 33900 }, { "epoch": 1.7213006561663136, "grad_norm": 0.03164636472341146, "learning_rate": 5.798519429272875e-05, "loss": 0.5831, "step": 33905 }, { "epoch": 1.7215544922643449, "grad_norm": 0.028327584918699283, "learning_rate": 5.7881689596671226e-05, "loss": 0.5971, "step": 33910 }, { "epoch": 1.7218083283623764, "grad_norm": 0.026091375951503175, "learning_rate": 5.777827168623323e-05, "loss": 0.5883, "step": 33915 }, { "epoch": 1.722062164460408, "grad_norm": 0.027573593371325634, "learning_rate": 5.767494058171507e-05, "loss": 0.5513, "step": 33920 }, { "epoch": 1.7223160005584393, "grad_norm": 0.027006081722925245, "learning_rate": 5.757169630340031e-05, "loss": 0.5621, "step": 33925 }, { "epoch": 1.7225698366564708, "grad_norm": 0.029582577269474546, "learning_rate": 5.7468538871555064e-05, "loss": 0.5716, "step": 33930 }, { "epoch": 1.7228236727545023, "grad_norm": 0.027517995081465768, "learning_rate": 5.736546830642886e-05, "loss": 0.6187, "step": 33935 }, { "epoch": 1.7230775088525339, "grad_norm": 0.027017563789887872, "learning_rate": 5.726248462825373e-05, "loss": 0.5382, "step": 33940 }, { "epoch": 1.7233313449505654, "grad_norm": 0.028212317829367863, "learning_rate": 5.715958785724501e-05, "loss": 0.5681, "step": 33945 }, { "epoch": 1.723585181048597, "grad_norm": 0.02705786592183116, "learning_rate": 5.705677801360065e-05, "loss": 0.5739, "step": 33950 }, { "epoch": 1.7238390171466285, "grad_norm": 0.029049805501804776, "learning_rate": 5.69540551175019e-05, "loss": 0.5971, "step": 33955 }, { "epoch": 1.72409285324466, "grad_norm": 0.02829959126929204, "learning_rate": 5.6851419189112575e-05, "loss": 0.569, "step": 33960 }, { "epoch": 1.7243466893426915, "grad_norm": 0.025507991981840512, "learning_rate": 5.6748870248579666e-05, "loss": 0.5673, "step": 33965 }, { "epoch": 1.724600525440723, "grad_norm": 0.026159028417405056, "learning_rate": 5.6646408316033185e-05, "loss": 0.5877, "step": 33970 }, { "epoch": 1.7248543615387544, "grad_norm": 0.026044550583401334, "learning_rate": 5.654403341158565e-05, "loss": 0.603, "step": 33975 }, { "epoch": 1.725108197636786, "grad_norm": 0.026061794772908816, "learning_rate": 5.644174555533288e-05, "loss": 0.5507, "step": 33980 }, { "epoch": 1.7253620337348174, "grad_norm": 0.026840812469578933, "learning_rate": 5.633954476735337e-05, "loss": 0.5808, "step": 33985 }, { "epoch": 1.725615869832849, "grad_norm": 0.027289287072865882, "learning_rate": 5.623743106770879e-05, "loss": 0.5881, "step": 33990 }, { "epoch": 1.7258697059308803, "grad_norm": 0.028172430295427434, "learning_rate": 5.6135404476443384e-05, "loss": 0.5835, "step": 33995 }, { "epoch": 1.7261235420289118, "grad_norm": 0.027877473728314433, "learning_rate": 5.603346501358458e-05, "loss": 0.6328, "step": 34000 }, { "epoch": 1.7263773781269434, "grad_norm": 0.027793992491160092, "learning_rate": 5.593161269914249e-05, "loss": 0.5726, "step": 34005 }, { "epoch": 1.726631214224975, "grad_norm": 0.02687144296282653, "learning_rate": 5.5829847553110326e-05, "loss": 0.5323, "step": 34010 }, { "epoch": 1.7268850503230064, "grad_norm": 0.03002428016914681, "learning_rate": 5.572816959546389e-05, "loss": 0.5695, "step": 34015 }, { "epoch": 1.727138886421038, "grad_norm": 0.027486309995012368, "learning_rate": 5.562657884616223e-05, "loss": 0.5747, "step": 34020 }, { "epoch": 1.7273927225190695, "grad_norm": 0.026885204105737817, "learning_rate": 5.5525075325147054e-05, "loss": 0.5762, "step": 34025 }, { "epoch": 1.727646558617101, "grad_norm": 0.02659427886892295, "learning_rate": 5.542365905234309e-05, "loss": 0.5766, "step": 34030 }, { "epoch": 1.7279003947151326, "grad_norm": 0.02435167892332491, "learning_rate": 5.532233004765763e-05, "loss": 0.5517, "step": 34035 }, { "epoch": 1.728154230813164, "grad_norm": 0.026261917041916685, "learning_rate": 5.5221088330981274e-05, "loss": 0.5794, "step": 34040 }, { "epoch": 1.7284080669111954, "grad_norm": 0.028646515219902337, "learning_rate": 5.5119933922187115e-05, "loss": 0.5813, "step": 34045 }, { "epoch": 1.728661903009227, "grad_norm": 0.0263512431414132, "learning_rate": 5.501886684113139e-05, "loss": 0.5625, "step": 34050 }, { "epoch": 1.7289157391072585, "grad_norm": 0.02827676741419047, "learning_rate": 5.491788710765289e-05, "loss": 0.604, "step": 34055 }, { "epoch": 1.7291695752052898, "grad_norm": 0.026820125194088582, "learning_rate": 5.481699474157364e-05, "loss": 0.5713, "step": 34060 }, { "epoch": 1.7294234113033213, "grad_norm": 0.02858590233188263, "learning_rate": 5.4716189762698044e-05, "loss": 0.5778, "step": 34065 }, { "epoch": 1.7296772474013529, "grad_norm": 0.02678305506349344, "learning_rate": 5.461547219081392e-05, "loss": 0.5564, "step": 34070 }, { "epoch": 1.7299310834993844, "grad_norm": 0.024716975849465663, "learning_rate": 5.4514842045691346e-05, "loss": 0.5485, "step": 34075 }, { "epoch": 1.730184919597416, "grad_norm": 0.026447810885984545, "learning_rate": 5.441429934708369e-05, "loss": 0.564, "step": 34080 }, { "epoch": 1.7304387556954475, "grad_norm": 0.028254191203001225, "learning_rate": 5.431384411472701e-05, "loss": 0.6073, "step": 34085 }, { "epoch": 1.730692591793479, "grad_norm": 0.027710543709464346, "learning_rate": 5.421347636834001e-05, "loss": 0.5583, "step": 34090 }, { "epoch": 1.7309464278915105, "grad_norm": 0.02484038760386574, "learning_rate": 5.411319612762455e-05, "loss": 0.5433, "step": 34095 }, { "epoch": 1.731200263989542, "grad_norm": 0.028763342096312823, "learning_rate": 5.4013003412265004e-05, "loss": 0.5894, "step": 34100 }, { "epoch": 1.7314541000875736, "grad_norm": 0.02721214017035493, "learning_rate": 5.3912898241928796e-05, "loss": 0.5618, "step": 34105 }, { "epoch": 1.731707936185605, "grad_norm": 0.0278628618617866, "learning_rate": 5.3812880636265935e-05, "loss": 0.6127, "step": 34110 }, { "epoch": 1.7319617722836365, "grad_norm": 0.026133122194275132, "learning_rate": 5.371295061490961e-05, "loss": 0.5864, "step": 34115 }, { "epoch": 1.732215608381668, "grad_norm": 0.02585952196091886, "learning_rate": 5.3613108197475335e-05, "loss": 0.5767, "step": 34120 }, { "epoch": 1.7324694444796993, "grad_norm": 0.025859674950618913, "learning_rate": 5.3513353403561895e-05, "loss": 0.5708, "step": 34125 }, { "epoch": 1.7327232805777308, "grad_norm": 0.03016090592214234, "learning_rate": 5.3413686252750445e-05, "loss": 0.5783, "step": 34130 }, { "epoch": 1.7329771166757624, "grad_norm": 0.027759273587262344, "learning_rate": 5.3314106764605354e-05, "loss": 0.5924, "step": 34135 }, { "epoch": 1.733230952773794, "grad_norm": 0.026960187730145996, "learning_rate": 5.32146149586733e-05, "loss": 0.5965, "step": 34140 }, { "epoch": 1.7334847888718254, "grad_norm": 0.025391399919947218, "learning_rate": 5.3115210854484394e-05, "loss": 0.5442, "step": 34145 }, { "epoch": 1.733738624969857, "grad_norm": 0.02645204631363137, "learning_rate": 5.301589447155092e-05, "loss": 0.5628, "step": 34150 }, { "epoch": 1.7339924610678885, "grad_norm": 0.02667662691103661, "learning_rate": 5.2916665829368324e-05, "loss": 0.5726, "step": 34155 }, { "epoch": 1.73424629716592, "grad_norm": 0.02928302518549062, "learning_rate": 5.281752494741454e-05, "loss": 0.5889, "step": 34160 }, { "epoch": 1.7345001332639516, "grad_norm": 0.02556053564750337, "learning_rate": 5.2718471845150604e-05, "loss": 0.5782, "step": 34165 }, { "epoch": 1.7347539693619831, "grad_norm": 0.02673036917999311, "learning_rate": 5.261950654201997e-05, "loss": 0.5791, "step": 34170 }, { "epoch": 1.7350078054600144, "grad_norm": 0.028656542953519513, "learning_rate": 5.252062905744926e-05, "loss": 0.5697, "step": 34175 }, { "epoch": 1.735261641558046, "grad_norm": 0.031320424403364626, "learning_rate": 5.2421839410847436e-05, "loss": 0.5824, "step": 34180 }, { "epoch": 1.7355154776560775, "grad_norm": 0.026100412353193436, "learning_rate": 5.2323137621606345e-05, "loss": 0.576, "step": 34185 }, { "epoch": 1.7357693137541088, "grad_norm": 0.028373374077813755, "learning_rate": 5.2224523709100914e-05, "loss": 0.5914, "step": 34190 }, { "epoch": 1.7360231498521403, "grad_norm": 0.027557603918047017, "learning_rate": 5.212599769268833e-05, "loss": 0.5722, "step": 34195 }, { "epoch": 1.7362769859501719, "grad_norm": 0.026163327994186556, "learning_rate": 5.202755959170885e-05, "loss": 0.6115, "step": 34200 }, { "epoch": 1.7365308220482034, "grad_norm": 0.025803325652901783, "learning_rate": 5.1929209425485346e-05, "loss": 0.5637, "step": 34205 }, { "epoch": 1.736784658146235, "grad_norm": 0.028444433418944096, "learning_rate": 5.1830947213323656e-05, "loss": 0.5586, "step": 34210 }, { "epoch": 1.7370384942442665, "grad_norm": 0.028003105196599588, "learning_rate": 5.17327729745119e-05, "loss": 0.597, "step": 34215 }, { "epoch": 1.737292330342298, "grad_norm": 0.025346269106053535, "learning_rate": 5.163468672832139e-05, "loss": 0.55, "step": 34220 }, { "epoch": 1.7375461664403296, "grad_norm": 0.026478818011277265, "learning_rate": 5.1536688494005835e-05, "loss": 0.5918, "step": 34225 }, { "epoch": 1.737800002538361, "grad_norm": 0.02633609301175136, "learning_rate": 5.14387782908019e-05, "loss": 0.5782, "step": 34230 }, { "epoch": 1.7380538386363926, "grad_norm": 0.02650484650553022, "learning_rate": 5.134095613792872e-05, "loss": 0.595, "step": 34235 }, { "epoch": 1.738307674734424, "grad_norm": 0.025951275564884174, "learning_rate": 5.124322205458848e-05, "loss": 0.5446, "step": 34240 }, { "epoch": 1.7385615108324555, "grad_norm": 0.026348527309633205, "learning_rate": 5.1145576059965726e-05, "loss": 0.5965, "step": 34245 }, { "epoch": 1.738815346930487, "grad_norm": 0.027334434705542858, "learning_rate": 5.1048018173228015e-05, "loss": 0.5649, "step": 34250 }, { "epoch": 1.7390691830285185, "grad_norm": 0.02729803179321084, "learning_rate": 5.0950548413525365e-05, "loss": 0.5657, "step": 34255 }, { "epoch": 1.7393230191265499, "grad_norm": 0.026078108161653187, "learning_rate": 5.085316679999064e-05, "loss": 0.5453, "step": 34260 }, { "epoch": 1.7395768552245814, "grad_norm": 0.027295787048158045, "learning_rate": 5.075587335173948e-05, "loss": 0.5746, "step": 34265 }, { "epoch": 1.739830691322613, "grad_norm": 0.03259360189474335, "learning_rate": 5.06586680878699e-05, "loss": 0.561, "step": 34270 }, { "epoch": 1.7400845274206445, "grad_norm": 0.0290050187578121, "learning_rate": 5.056155102746302e-05, "loss": 0.575, "step": 34275 }, { "epoch": 1.740338363518676, "grad_norm": 0.027669346728559224, "learning_rate": 5.0464522189582194e-05, "loss": 0.6031, "step": 34280 }, { "epoch": 1.7405921996167075, "grad_norm": 0.02904684044001808, "learning_rate": 5.036758159327398e-05, "loss": 0.6192, "step": 34285 }, { "epoch": 1.740846035714739, "grad_norm": 0.02665086033442736, "learning_rate": 5.027072925756709e-05, "loss": 0.5826, "step": 34290 }, { "epoch": 1.7410998718127706, "grad_norm": 0.02780754715579722, "learning_rate": 5.017396520147333e-05, "loss": 0.5633, "step": 34295 }, { "epoch": 1.7413537079108021, "grad_norm": 0.027825507976394482, "learning_rate": 5.007728944398682e-05, "loss": 0.5683, "step": 34300 }, { "epoch": 1.7416075440088334, "grad_norm": 0.02838584706320733, "learning_rate": 4.9980702004084724e-05, "loss": 0.5884, "step": 34305 }, { "epoch": 1.741861380106865, "grad_norm": 0.028777071310005405, "learning_rate": 4.9884202900726486e-05, "loss": 0.5705, "step": 34310 }, { "epoch": 1.7421152162048965, "grad_norm": 0.028045692837075708, "learning_rate": 4.978779215285456e-05, "loss": 0.5435, "step": 34315 }, { "epoch": 1.742369052302928, "grad_norm": 0.02710150806653718, "learning_rate": 4.9691469779393706e-05, "loss": 0.591, "step": 34320 }, { "epoch": 1.7426228884009594, "grad_norm": 0.030524692952728645, "learning_rate": 4.959523579925179e-05, "loss": 0.5909, "step": 34325 }, { "epoch": 1.742876724498991, "grad_norm": 0.02668873873708883, "learning_rate": 4.949909023131888e-05, "loss": 0.6027, "step": 34330 }, { "epoch": 1.7431305605970224, "grad_norm": 0.02846877760098295, "learning_rate": 4.940303309446798e-05, "loss": 0.5952, "step": 34335 }, { "epoch": 1.743384396695054, "grad_norm": 0.027517023333170278, "learning_rate": 4.9307064407554445e-05, "loss": 0.572, "step": 34340 }, { "epoch": 1.7436382327930855, "grad_norm": 0.024678031302718485, "learning_rate": 4.921118418941667e-05, "loss": 0.5787, "step": 34345 }, { "epoch": 1.743892068891117, "grad_norm": 0.028110194482919255, "learning_rate": 4.911539245887525e-05, "loss": 0.5472, "step": 34350 }, { "epoch": 1.7441459049891486, "grad_norm": 0.026824717523282984, "learning_rate": 4.901968923473382e-05, "loss": 0.5838, "step": 34355 }, { "epoch": 1.74439974108718, "grad_norm": 0.024967665621948763, "learning_rate": 4.8924074535778294e-05, "loss": 0.5482, "step": 34360 }, { "epoch": 1.7446535771852116, "grad_norm": 0.02751178777390818, "learning_rate": 4.882854838077755e-05, "loss": 0.6142, "step": 34365 }, { "epoch": 1.7449074132832432, "grad_norm": 0.02979627126509869, "learning_rate": 4.873311078848264e-05, "loss": 0.5787, "step": 34370 }, { "epoch": 1.7451612493812745, "grad_norm": 0.027617565710674807, "learning_rate": 4.863776177762769e-05, "loss": 0.5895, "step": 34375 }, { "epoch": 1.745415085479306, "grad_norm": 0.027002250290528815, "learning_rate": 4.854250136692912e-05, "loss": 0.5988, "step": 34380 }, { "epoch": 1.7456689215773376, "grad_norm": 0.029966347098498936, "learning_rate": 4.844732957508607e-05, "loss": 0.5771, "step": 34385 }, { "epoch": 1.7459227576753689, "grad_norm": 0.028794286107210985, "learning_rate": 4.8352246420780456e-05, "loss": 0.5626, "step": 34390 }, { "epoch": 1.7461765937734004, "grad_norm": 0.02520563015843576, "learning_rate": 4.825725192267638e-05, "loss": 0.5667, "step": 34395 }, { "epoch": 1.746430429871432, "grad_norm": 0.027702185045968792, "learning_rate": 4.816234609942105e-05, "loss": 0.5841, "step": 34400 }, { "epoch": 1.7466842659694635, "grad_norm": 0.02563752303935389, "learning_rate": 4.806752896964373e-05, "loss": 0.5837, "step": 34405 }, { "epoch": 1.746938102067495, "grad_norm": 0.026879999677309956, "learning_rate": 4.79728005519568e-05, "loss": 0.5812, "step": 34410 }, { "epoch": 1.7471919381655265, "grad_norm": 0.026567841159395124, "learning_rate": 4.787816086495478e-05, "loss": 0.5905, "step": 34415 }, { "epoch": 1.747445774263558, "grad_norm": 0.025575931481141224, "learning_rate": 4.7783609927215145e-05, "loss": 0.5472, "step": 34420 }, { "epoch": 1.7476996103615896, "grad_norm": 0.02544133647179094, "learning_rate": 4.7689147757297605e-05, "loss": 0.5504, "step": 34425 }, { "epoch": 1.7479534464596211, "grad_norm": 0.026530846370447555, "learning_rate": 4.7594774373744766e-05, "loss": 0.5924, "step": 34430 }, { "epoch": 1.7482072825576527, "grad_norm": 0.02826996657077321, "learning_rate": 4.750048979508148e-05, "loss": 0.5819, "step": 34435 }, { "epoch": 1.748461118655684, "grad_norm": 0.027551941734736504, "learning_rate": 4.7406294039815553e-05, "loss": 0.5891, "step": 34440 }, { "epoch": 1.7487149547537155, "grad_norm": 0.025057360508628834, "learning_rate": 4.731218712643681e-05, "loss": 0.557, "step": 34445 }, { "epoch": 1.748968790851747, "grad_norm": 0.027209148532919596, "learning_rate": 4.721816907341836e-05, "loss": 0.6145, "step": 34450 }, { "epoch": 1.7492226269497784, "grad_norm": 0.02497374817702334, "learning_rate": 4.712423989921527e-05, "loss": 0.5664, "step": 34455 }, { "epoch": 1.74947646304781, "grad_norm": 0.028625462786736822, "learning_rate": 4.703039962226541e-05, "loss": 0.5939, "step": 34460 }, { "epoch": 1.7497302991458414, "grad_norm": 0.02538658177288655, "learning_rate": 4.693664826098909e-05, "loss": 0.5383, "step": 34465 }, { "epoch": 1.749984135243873, "grad_norm": 0.031821178992759935, "learning_rate": 4.684298583378943e-05, "loss": 0.5716, "step": 34470 }, { "epoch": 1.7502379713419045, "grad_norm": 0.0269995797084954, "learning_rate": 4.674941235905161e-05, "loss": 0.579, "step": 34475 }, { "epoch": 1.750491807439936, "grad_norm": 0.028023661621050994, "learning_rate": 4.6655927855143886e-05, "loss": 0.577, "step": 34480 }, { "epoch": 1.7507456435379676, "grad_norm": 0.027056413065108066, "learning_rate": 4.656253234041663e-05, "loss": 0.614, "step": 34485 }, { "epoch": 1.7509994796359991, "grad_norm": 0.025471377250664062, "learning_rate": 4.646922583320307e-05, "loss": 0.5845, "step": 34490 }, { "epoch": 1.7512533157340306, "grad_norm": 0.02734595438255556, "learning_rate": 4.637600835181866e-05, "loss": 0.5956, "step": 34495 }, { "epoch": 1.7515071518320622, "grad_norm": 0.027886689309992427, "learning_rate": 4.6282879914561646e-05, "loss": 0.5694, "step": 34500 }, { "epoch": 1.7517609879300935, "grad_norm": 0.026347774558792666, "learning_rate": 4.6189840539712534e-05, "loss": 0.569, "step": 34505 }, { "epoch": 1.752014824028125, "grad_norm": 0.026172887145664854, "learning_rate": 4.609689024553459e-05, "loss": 0.5939, "step": 34510 }, { "epoch": 1.7522686601261566, "grad_norm": 0.026001589530267533, "learning_rate": 4.600402905027357e-05, "loss": 0.5904, "step": 34515 }, { "epoch": 1.7525224962241879, "grad_norm": 0.02507345258199712, "learning_rate": 4.5911256972157476e-05, "loss": 0.5595, "step": 34520 }, { "epoch": 1.7527763323222194, "grad_norm": 0.040444085730050816, "learning_rate": 4.581857402939721e-05, "loss": 0.5743, "step": 34525 }, { "epoch": 1.753030168420251, "grad_norm": 0.027311881241162236, "learning_rate": 4.572598024018571e-05, "loss": 0.6006, "step": 34530 }, { "epoch": 1.7532840045182825, "grad_norm": 0.03125919082177523, "learning_rate": 4.563347562269898e-05, "loss": 0.5558, "step": 34535 }, { "epoch": 1.753537840616314, "grad_norm": 0.027768138647873808, "learning_rate": 4.5541060195094965e-05, "loss": 0.5654, "step": 34540 }, { "epoch": 1.7537916767143455, "grad_norm": 0.026733338711322666, "learning_rate": 4.5448733975514524e-05, "loss": 0.5713, "step": 34545 }, { "epoch": 1.754045512812377, "grad_norm": 0.026977126548794243, "learning_rate": 4.535649698208066e-05, "loss": 0.5681, "step": 34550 }, { "epoch": 1.7542993489104086, "grad_norm": 0.026180325651852493, "learning_rate": 4.526434923289924e-05, "loss": 0.5587, "step": 34555 }, { "epoch": 1.7545531850084402, "grad_norm": 0.02810461138350593, "learning_rate": 4.517229074605822e-05, "loss": 0.6038, "step": 34560 }, { "epoch": 1.7548070211064717, "grad_norm": 0.028332658805006142, "learning_rate": 4.508032153962832e-05, "loss": 0.5954, "step": 34565 }, { "epoch": 1.755060857204503, "grad_norm": 0.027268988156026817, "learning_rate": 4.49884416316626e-05, "loss": 0.5873, "step": 34570 }, { "epoch": 1.7553146933025345, "grad_norm": 0.025259256130790414, "learning_rate": 4.489665104019675e-05, "loss": 0.5827, "step": 34575 }, { "epoch": 1.755568529400566, "grad_norm": 0.02876636527740156, "learning_rate": 4.4804949783248564e-05, "loss": 0.5702, "step": 34580 }, { "epoch": 1.7558223654985976, "grad_norm": 0.025367755053679362, "learning_rate": 4.471333787881881e-05, "loss": 0.5822, "step": 34585 }, { "epoch": 1.756076201596629, "grad_norm": 0.02636830024889624, "learning_rate": 4.4621815344890235e-05, "loss": 0.5527, "step": 34590 }, { "epoch": 1.7563300376946605, "grad_norm": 0.026837181967578904, "learning_rate": 4.453038219942845e-05, "loss": 0.6007, "step": 34595 }, { "epoch": 1.756583873792692, "grad_norm": 0.02914527598895184, "learning_rate": 4.443903846038111e-05, "loss": 0.583, "step": 34600 }, { "epoch": 1.7568377098907235, "grad_norm": 0.030624915917167626, "learning_rate": 4.4347784145678695e-05, "loss": 0.5748, "step": 34605 }, { "epoch": 1.757091545988755, "grad_norm": 0.027853798714809495, "learning_rate": 4.425661927323388e-05, "loss": 0.594, "step": 34610 }, { "epoch": 1.7573453820867866, "grad_norm": 0.028816297804919042, "learning_rate": 4.416554386094196e-05, "loss": 0.5432, "step": 34615 }, { "epoch": 1.7575992181848181, "grad_norm": 0.028053770733313752, "learning_rate": 4.407455792668047e-05, "loss": 0.6192, "step": 34620 }, { "epoch": 1.7578530542828497, "grad_norm": 0.028194859948276495, "learning_rate": 4.3983661488309565e-05, "loss": 0.5512, "step": 34625 }, { "epoch": 1.7581068903808812, "grad_norm": 0.026114572619138302, "learning_rate": 4.389285456367181e-05, "loss": 0.5948, "step": 34630 }, { "epoch": 1.7583607264789127, "grad_norm": 0.026739185650797578, "learning_rate": 4.380213717059206e-05, "loss": 0.5966, "step": 34635 }, { "epoch": 1.758614562576944, "grad_norm": 0.02594627545694842, "learning_rate": 4.371150932687784e-05, "loss": 0.568, "step": 34640 }, { "epoch": 1.7588683986749756, "grad_norm": 0.026085398207684175, "learning_rate": 4.3620971050318706e-05, "loss": 0.5784, "step": 34645 }, { "epoch": 1.759122234773007, "grad_norm": 0.027466592709797736, "learning_rate": 4.3530522358687045e-05, "loss": 0.5644, "step": 34650 }, { "epoch": 1.7593760708710384, "grad_norm": 0.02499229167236782, "learning_rate": 4.3440163269737374e-05, "loss": 0.5704, "step": 34655 }, { "epoch": 1.75962990696907, "grad_norm": 0.028323993802141408, "learning_rate": 4.334989380120691e-05, "loss": 0.5561, "step": 34660 }, { "epoch": 1.7598837430671015, "grad_norm": 0.026019409971321343, "learning_rate": 4.3259713970814904e-05, "loss": 0.5877, "step": 34665 }, { "epoch": 1.760137579165133, "grad_norm": 0.026121830617099023, "learning_rate": 4.316962379626333e-05, "loss": 0.5883, "step": 34670 }, { "epoch": 1.7603914152631646, "grad_norm": 0.028129455053351148, "learning_rate": 4.3079623295236345e-05, "loss": 0.5149, "step": 34675 }, { "epoch": 1.760645251361196, "grad_norm": 0.02899605176623769, "learning_rate": 4.298971248540068e-05, "loss": 0.574, "step": 34680 }, { "epoch": 1.7608990874592276, "grad_norm": 0.027316101973992174, "learning_rate": 4.2899891384405196e-05, "loss": 0.5411, "step": 34685 }, { "epoch": 1.7611529235572592, "grad_norm": 0.02699579486971392, "learning_rate": 4.281016000988169e-05, "loss": 0.5752, "step": 34690 }, { "epoch": 1.7614067596552907, "grad_norm": 0.03250960696033901, "learning_rate": 4.2720518379443684e-05, "loss": 0.5837, "step": 34695 }, { "epoch": 1.7616605957533222, "grad_norm": 0.02553729708257845, "learning_rate": 4.263096651068754e-05, "loss": 0.5751, "step": 34700 }, { "epoch": 1.7619144318513535, "grad_norm": 0.025102321464401816, "learning_rate": 4.254150442119164e-05, "loss": 0.5919, "step": 34705 }, { "epoch": 1.762168267949385, "grad_norm": 0.02452904437375022, "learning_rate": 4.2452132128517226e-05, "loss": 0.5447, "step": 34710 }, { "epoch": 1.7624221040474166, "grad_norm": 0.02779673892994203, "learning_rate": 4.236284965020737e-05, "loss": 0.5479, "step": 34715 }, { "epoch": 1.762675940145448, "grad_norm": 0.025955182807262365, "learning_rate": 4.227365700378799e-05, "loss": 0.5828, "step": 34720 }, { "epoch": 1.7629297762434795, "grad_norm": 0.026429031635278273, "learning_rate": 4.2184554206767034e-05, "loss": 0.5805, "step": 34725 }, { "epoch": 1.763183612341511, "grad_norm": 0.027706774714792163, "learning_rate": 4.209554127663495e-05, "loss": 0.5714, "step": 34730 }, { "epoch": 1.7634374484395425, "grad_norm": 0.0288213838771143, "learning_rate": 4.200661823086454e-05, "loss": 0.5883, "step": 34735 }, { "epoch": 1.763691284537574, "grad_norm": 0.029334462770685004, "learning_rate": 4.191778508691102e-05, "loss": 0.6016, "step": 34740 }, { "epoch": 1.7639451206356056, "grad_norm": 0.025154935843672788, "learning_rate": 4.182904186221176e-05, "loss": 0.5698, "step": 34745 }, { "epoch": 1.7641989567336371, "grad_norm": 0.025052626974298258, "learning_rate": 4.174038857418666e-05, "loss": 0.524, "step": 34750 }, { "epoch": 1.7644527928316687, "grad_norm": 0.027367077459828016, "learning_rate": 4.165182524023803e-05, "loss": 0.5747, "step": 34755 }, { "epoch": 1.7647066289297002, "grad_norm": 0.026101889038873435, "learning_rate": 4.156335187775029e-05, "loss": 0.5589, "step": 34760 }, { "epoch": 1.7649604650277317, "grad_norm": 0.026912170686723448, "learning_rate": 4.1474968504090385e-05, "loss": 0.5646, "step": 34765 }, { "epoch": 1.765214301125763, "grad_norm": 0.026218739254728632, "learning_rate": 4.1386675136607434e-05, "loss": 0.5647, "step": 34770 }, { "epoch": 1.7654681372237946, "grad_norm": 0.028345560880919773, "learning_rate": 4.129847179263318e-05, "loss": 0.5523, "step": 34775 }, { "epoch": 1.7657219733218261, "grad_norm": 0.02738018584679926, "learning_rate": 4.121035848948124e-05, "loss": 0.5754, "step": 34780 }, { "epoch": 1.7659758094198574, "grad_norm": 0.028402469514159624, "learning_rate": 4.112233524444803e-05, "loss": 0.6073, "step": 34785 }, { "epoch": 1.766229645517889, "grad_norm": 0.026447575436928848, "learning_rate": 4.103440207481196e-05, "loss": 0.5527, "step": 34790 }, { "epoch": 1.7664834816159205, "grad_norm": 0.027281021364748727, "learning_rate": 4.094655899783395e-05, "loss": 0.5552, "step": 34795 }, { "epoch": 1.766737317713952, "grad_norm": 0.029199048668676064, "learning_rate": 4.085880603075703e-05, "loss": 0.5542, "step": 34800 }, { "epoch": 1.7669911538119836, "grad_norm": 0.025190435108131293, "learning_rate": 4.077114319080671e-05, "loss": 0.5387, "step": 34805 }, { "epoch": 1.767244989910015, "grad_norm": 0.026178758662066, "learning_rate": 4.068357049519089e-05, "loss": 0.5629, "step": 34810 }, { "epoch": 1.7674988260080466, "grad_norm": 0.0293134665568765, "learning_rate": 4.0596087961099595e-05, "loss": 0.6152, "step": 34815 }, { "epoch": 1.7677526621060782, "grad_norm": 0.02573323363532793, "learning_rate": 4.0508695605705136e-05, "loss": 0.5407, "step": 34820 }, { "epoch": 1.7680064982041097, "grad_norm": 0.02748287814396991, "learning_rate": 4.042139344616236e-05, "loss": 0.5659, "step": 34825 }, { "epoch": 1.7682603343021412, "grad_norm": 0.025987228961287245, "learning_rate": 4.033418149960799e-05, "loss": 0.5818, "step": 34830 }, { "epoch": 1.7685141704001726, "grad_norm": 0.0270274314396361, "learning_rate": 4.0247059783161565e-05, "loss": 0.5443, "step": 34835 }, { "epoch": 1.768768006498204, "grad_norm": 0.026461295369809488, "learning_rate": 4.0160028313924456e-05, "loss": 0.6043, "step": 34840 }, { "epoch": 1.7690218425962356, "grad_norm": 0.02636815634023551, "learning_rate": 4.007308710898061e-05, "loss": 0.5674, "step": 34845 }, { "epoch": 1.7692756786942672, "grad_norm": 0.027694279269560567, "learning_rate": 3.998623618539604e-05, "loss": 0.6042, "step": 34850 }, { "epoch": 1.7695295147922985, "grad_norm": 0.027621153118650803, "learning_rate": 3.9899475560219336e-05, "loss": 0.5755, "step": 34855 }, { "epoch": 1.76978335089033, "grad_norm": 0.027219299554404146, "learning_rate": 3.981280525048098e-05, "loss": 0.5777, "step": 34860 }, { "epoch": 1.7700371869883615, "grad_norm": 0.026950474196703503, "learning_rate": 3.972622527319397e-05, "loss": 0.5756, "step": 34865 }, { "epoch": 1.770291023086393, "grad_norm": 0.03927608292485455, "learning_rate": 3.963973564535361e-05, "loss": 0.553, "step": 34870 }, { "epoch": 1.7705448591844246, "grad_norm": 0.024559053630240316, "learning_rate": 3.955333638393732e-05, "loss": 0.546, "step": 34875 }, { "epoch": 1.7707986952824561, "grad_norm": 0.024672864573209018, "learning_rate": 3.9467027505904916e-05, "loss": 0.5382, "step": 34880 }, { "epoch": 1.7710525313804877, "grad_norm": 0.026381411281292715, "learning_rate": 3.938080902819824e-05, "loss": 0.5863, "step": 34885 }, { "epoch": 1.7713063674785192, "grad_norm": 0.02727669806478831, "learning_rate": 3.929468096774175e-05, "loss": 0.5846, "step": 34890 }, { "epoch": 1.7715602035765508, "grad_norm": 0.027321021961464388, "learning_rate": 3.92086433414417e-05, "loss": 0.5709, "step": 34895 }, { "epoch": 1.7718140396745823, "grad_norm": 0.02870941700977807, "learning_rate": 3.9122696166187186e-05, "loss": 0.5753, "step": 34900 }, { "epoch": 1.7720678757726136, "grad_norm": 0.0280824714079145, "learning_rate": 3.903683945884884e-05, "loss": 0.5485, "step": 34905 }, { "epoch": 1.7723217118706451, "grad_norm": 0.028237693414917893, "learning_rate": 3.895107323628022e-05, "loss": 0.586, "step": 34910 }, { "epoch": 1.7725755479686767, "grad_norm": 0.026998656055274992, "learning_rate": 3.8865397515316645e-05, "loss": 0.5681, "step": 34915 }, { "epoch": 1.772829384066708, "grad_norm": 0.0234616343837121, "learning_rate": 3.8779812312775885e-05, "loss": 0.5364, "step": 34920 }, { "epoch": 1.7730832201647395, "grad_norm": 0.03076729475274499, "learning_rate": 3.869431764545772e-05, "loss": 0.562, "step": 34925 }, { "epoch": 1.773337056262771, "grad_norm": 0.02844930380199644, "learning_rate": 3.860891353014462e-05, "loss": 0.5423, "step": 34930 }, { "epoch": 1.7735908923608026, "grad_norm": 0.02688083915781989, "learning_rate": 3.8523599983600776e-05, "loss": 0.5535, "step": 34935 }, { "epoch": 1.7738447284588341, "grad_norm": 0.028296594443821533, "learning_rate": 3.843837702257291e-05, "loss": 0.5552, "step": 34940 }, { "epoch": 1.7740985645568657, "grad_norm": 0.02542636555043672, "learning_rate": 3.835324466378981e-05, "loss": 0.5736, "step": 34945 }, { "epoch": 1.7743524006548972, "grad_norm": 0.026931065451114636, "learning_rate": 3.82682029239626e-05, "loss": 0.5535, "step": 34950 }, { "epoch": 1.7746062367529287, "grad_norm": 0.030170765271243762, "learning_rate": 3.8183251819784436e-05, "loss": 0.5742, "step": 34955 }, { "epoch": 1.7748600728509603, "grad_norm": 0.028075154748267283, "learning_rate": 3.8098391367930976e-05, "loss": 0.5931, "step": 34960 }, { "epoch": 1.7751139089489918, "grad_norm": 0.027551000846845863, "learning_rate": 3.8013621585059665e-05, "loss": 0.6087, "step": 34965 }, { "epoch": 1.775367745047023, "grad_norm": 0.02783825761841286, "learning_rate": 3.7928942487810594e-05, "loss": 0.5891, "step": 34970 }, { "epoch": 1.7756215811450546, "grad_norm": 0.028933084340785235, "learning_rate": 3.7844354092805735e-05, "loss": 0.5742, "step": 34975 }, { "epoch": 1.7758754172430862, "grad_norm": 0.025282945475065335, "learning_rate": 3.775985641664942e-05, "loss": 0.5763, "step": 34980 }, { "epoch": 1.7761292533411175, "grad_norm": 0.02714491613502592, "learning_rate": 3.767544947592805e-05, "loss": 0.5554, "step": 34985 }, { "epoch": 1.776383089439149, "grad_norm": 0.0290037438711093, "learning_rate": 3.759113328721036e-05, "loss": 0.5543, "step": 34990 }, { "epoch": 1.7766369255371806, "grad_norm": 0.025777621288877388, "learning_rate": 3.750690786704725e-05, "loss": 0.5466, "step": 34995 }, { "epoch": 1.776890761635212, "grad_norm": 0.025738659113873026, "learning_rate": 3.742277323197158e-05, "loss": 0.581, "step": 35000 }, { "epoch": 1.7771445977332436, "grad_norm": 0.028980979364536245, "learning_rate": 3.733872939849875e-05, "loss": 0.5849, "step": 35005 }, { "epoch": 1.7773984338312752, "grad_norm": 0.02874048885352796, "learning_rate": 3.725477638312591e-05, "loss": 0.5725, "step": 35010 }, { "epoch": 1.7776522699293067, "grad_norm": 0.026110588134437843, "learning_rate": 3.717091420233293e-05, "loss": 0.5422, "step": 35015 }, { "epoch": 1.7779061060273382, "grad_norm": 0.028323929546348783, "learning_rate": 3.708714287258125e-05, "loss": 0.556, "step": 35020 }, { "epoch": 1.7781599421253698, "grad_norm": 0.025916364466222818, "learning_rate": 3.700346241031494e-05, "loss": 0.551, "step": 35025 }, { "epoch": 1.7784137782234013, "grad_norm": 0.028773431626986295, "learning_rate": 3.691987283195991e-05, "loss": 0.5925, "step": 35030 }, { "epoch": 1.7786676143214326, "grad_norm": 0.027360973556182788, "learning_rate": 3.68363741539246e-05, "loss": 0.5962, "step": 35035 }, { "epoch": 1.7789214504194641, "grad_norm": 0.026919915671010024, "learning_rate": 3.675296639259912e-05, "loss": 0.5936, "step": 35040 }, { "epoch": 1.7791752865174957, "grad_norm": 0.026192917369802747, "learning_rate": 3.66696495643562e-05, "loss": 0.564, "step": 35045 }, { "epoch": 1.779429122615527, "grad_norm": 0.025969359531232798, "learning_rate": 3.6586423685550374e-05, "loss": 0.5729, "step": 35050 }, { "epoch": 1.7796829587135585, "grad_norm": 0.030310378500678335, "learning_rate": 3.6503288772518626e-05, "loss": 0.593, "step": 35055 }, { "epoch": 1.77993679481159, "grad_norm": 0.02526028448575393, "learning_rate": 3.64202448415798e-05, "loss": 0.561, "step": 35060 }, { "epoch": 1.7801906309096216, "grad_norm": 0.025776592107600138, "learning_rate": 3.6337291909035065e-05, "loss": 0.556, "step": 35065 }, { "epoch": 1.7804444670076531, "grad_norm": 0.02539676749903186, "learning_rate": 3.625442999116763e-05, "loss": 0.5419, "step": 35070 }, { "epoch": 1.7806983031056847, "grad_norm": 0.02724291026795106, "learning_rate": 3.6171659104242914e-05, "loss": 0.5782, "step": 35075 }, { "epoch": 1.7809521392037162, "grad_norm": 0.024919869005302197, "learning_rate": 3.608897926450838e-05, "loss": 0.5664, "step": 35080 }, { "epoch": 1.7812059753017477, "grad_norm": 0.02733226547741776, "learning_rate": 3.600639048819371e-05, "loss": 0.552, "step": 35085 }, { "epoch": 1.7814598113997793, "grad_norm": 0.026731850754375328, "learning_rate": 3.592389279151065e-05, "loss": 0.5909, "step": 35090 }, { "epoch": 1.7817136474978108, "grad_norm": 0.025248477914745392, "learning_rate": 3.584148619065314e-05, "loss": 0.5547, "step": 35095 }, { "epoch": 1.7819674835958421, "grad_norm": 0.02623101242047658, "learning_rate": 3.575917070179702e-05, "loss": 0.55, "step": 35100 }, { "epoch": 1.7822213196938737, "grad_norm": 0.027339498786589443, "learning_rate": 3.567694634110058e-05, "loss": 0.6083, "step": 35105 }, { "epoch": 1.7824751557919052, "grad_norm": 0.027297464942973977, "learning_rate": 3.559481312470403e-05, "loss": 0.5937, "step": 35110 }, { "epoch": 1.7827289918899367, "grad_norm": 0.02720351660375938, "learning_rate": 3.551277106872963e-05, "loss": 0.5955, "step": 35115 }, { "epoch": 1.782982827987968, "grad_norm": 0.029733835172789883, "learning_rate": 3.5430820189281954e-05, "loss": 0.5798, "step": 35120 }, { "epoch": 1.7832366640859996, "grad_norm": 0.029423097138670938, "learning_rate": 3.53489605024474e-05, "loss": 0.6251, "step": 35125 }, { "epoch": 1.783490500184031, "grad_norm": 0.02701172901391294, "learning_rate": 3.526719202429474e-05, "loss": 0.6021, "step": 35130 }, { "epoch": 1.7837443362820626, "grad_norm": 0.028718093736446838, "learning_rate": 3.518551477087462e-05, "loss": 0.5751, "step": 35135 }, { "epoch": 1.7839981723800942, "grad_norm": 0.026181714631097114, "learning_rate": 3.5103928758219995e-05, "loss": 0.5974, "step": 35140 }, { "epoch": 1.7842520084781257, "grad_norm": 0.025534803511708826, "learning_rate": 3.5022434002345615e-05, "loss": 0.5915, "step": 35145 }, { "epoch": 1.7845058445761572, "grad_norm": 0.02642655703142115, "learning_rate": 3.4941030519248685e-05, "loss": 0.5606, "step": 35150 }, { "epoch": 1.7847596806741888, "grad_norm": 0.025939332465112302, "learning_rate": 3.485971832490814e-05, "loss": 0.5777, "step": 35155 }, { "epoch": 1.7850135167722203, "grad_norm": 0.027348605413695805, "learning_rate": 3.477849743528533e-05, "loss": 0.5815, "step": 35160 }, { "epoch": 1.7852673528702518, "grad_norm": 0.026255114341193327, "learning_rate": 3.469736786632327e-05, "loss": 0.5683, "step": 35165 }, { "epoch": 1.7855211889682832, "grad_norm": 0.028350823213357402, "learning_rate": 3.461632963394756e-05, "loss": 0.5776, "step": 35170 }, { "epoch": 1.7857750250663147, "grad_norm": 0.02514811891023813, "learning_rate": 3.453538275406542e-05, "loss": 0.5633, "step": 35175 }, { "epoch": 1.7860288611643462, "grad_norm": 0.02785730329079368, "learning_rate": 3.445452724256648e-05, "loss": 0.5677, "step": 35180 }, { "epoch": 1.7862826972623775, "grad_norm": 0.027335022278462573, "learning_rate": 3.437376311532209e-05, "loss": 0.5847, "step": 35185 }, { "epoch": 1.786536533360409, "grad_norm": 0.025723741184710065, "learning_rate": 3.4293090388185955e-05, "loss": 0.5326, "step": 35190 }, { "epoch": 1.7867903694584406, "grad_norm": 0.028413204563193937, "learning_rate": 3.421250907699369e-05, "loss": 0.586, "step": 35195 }, { "epoch": 1.7870442055564721, "grad_norm": 0.027030816260977083, "learning_rate": 3.413201919756304e-05, "loss": 0.5379, "step": 35200 }, { "epoch": 1.7872980416545037, "grad_norm": 0.02655595430545289, "learning_rate": 3.4051620765693734e-05, "loss": 0.5867, "step": 35205 }, { "epoch": 1.7875518777525352, "grad_norm": 0.02681971668129288, "learning_rate": 3.3971313797167555e-05, "loss": 0.5615, "step": 35210 }, { "epoch": 1.7878057138505667, "grad_norm": 0.028568318313484362, "learning_rate": 3.389109830774845e-05, "loss": 0.592, "step": 35215 }, { "epoch": 1.7880595499485983, "grad_norm": 0.02469622480874688, "learning_rate": 3.38109743131822e-05, "loss": 0.5492, "step": 35220 }, { "epoch": 1.7883133860466298, "grad_norm": 0.026394252158649455, "learning_rate": 3.373094182919678e-05, "loss": 0.5487, "step": 35225 }, { "epoch": 1.7885672221446614, "grad_norm": 0.026201351190332373, "learning_rate": 3.3651000871502245e-05, "loss": 0.5813, "step": 35230 }, { "epoch": 1.7888210582426927, "grad_norm": 0.02520803835040115, "learning_rate": 3.357115145579059e-05, "loss": 0.5903, "step": 35235 }, { "epoch": 1.7890748943407242, "grad_norm": 0.026712404852844995, "learning_rate": 3.3491393597735786e-05, "loss": 0.5895, "step": 35240 }, { "epoch": 1.7893287304387557, "grad_norm": 0.03038215052641627, "learning_rate": 3.341172731299402e-05, "loss": 0.5991, "step": 35245 }, { "epoch": 1.789582566536787, "grad_norm": 0.028018171396845776, "learning_rate": 3.3332152617203237e-05, "loss": 0.5555, "step": 35250 }, { "epoch": 1.7898364026348186, "grad_norm": 0.02603912565672124, "learning_rate": 3.325266952598366e-05, "loss": 0.5789, "step": 35255 }, { "epoch": 1.7900902387328501, "grad_norm": 0.030072739589955835, "learning_rate": 3.317327805493736e-05, "loss": 0.5694, "step": 35260 }, { "epoch": 1.7903440748308816, "grad_norm": 0.025842182005628996, "learning_rate": 3.3093978219648605e-05, "loss": 0.6019, "step": 35265 }, { "epoch": 1.7905979109289132, "grad_norm": 0.027436217327865126, "learning_rate": 3.3014770035683315e-05, "loss": 0.5674, "step": 35270 }, { "epoch": 1.7908517470269447, "grad_norm": 0.026644867863729726, "learning_rate": 3.293565351858996e-05, "loss": 0.5553, "step": 35275 }, { "epoch": 1.7911055831249763, "grad_norm": 0.025639976418303775, "learning_rate": 3.285662868389849e-05, "loss": 0.592, "step": 35280 }, { "epoch": 1.7913594192230078, "grad_norm": 0.02725327867873492, "learning_rate": 3.2777695547121236e-05, "loss": 0.57, "step": 35285 }, { "epoch": 1.7916132553210393, "grad_norm": 0.02674946972105896, "learning_rate": 3.269885412375223e-05, "loss": 0.6047, "step": 35290 }, { "epoch": 1.7918670914190709, "grad_norm": 0.024640303167450785, "learning_rate": 3.262010442926772e-05, "loss": 0.5368, "step": 35295 }, { "epoch": 1.7921209275171022, "grad_norm": 0.02691641729148715, "learning_rate": 3.254144647912599e-05, "loss": 0.5642, "step": 35300 }, { "epoch": 1.7923747636151337, "grad_norm": 0.025235360915677057, "learning_rate": 3.246288028876704e-05, "loss": 0.5424, "step": 35305 }, { "epoch": 1.7926285997131652, "grad_norm": 0.02870273966985209, "learning_rate": 3.2384405873613134e-05, "loss": 0.5866, "step": 35310 }, { "epoch": 1.7928824358111966, "grad_norm": 0.02722917189516367, "learning_rate": 3.2306023249068285e-05, "loss": 0.5594, "step": 35315 }, { "epoch": 1.793136271909228, "grad_norm": 0.027774396676925966, "learning_rate": 3.22277324305188e-05, "loss": 0.5752, "step": 35320 }, { "epoch": 1.7933901080072596, "grad_norm": 0.027120629224570113, "learning_rate": 3.214953343333255e-05, "loss": 0.5517, "step": 35325 }, { "epoch": 1.7936439441052912, "grad_norm": 0.0260009699871434, "learning_rate": 3.20714262728598e-05, "loss": 0.5729, "step": 35330 }, { "epoch": 1.7938977802033227, "grad_norm": 0.027759882989301547, "learning_rate": 3.1993410964432424e-05, "loss": 0.5941, "step": 35335 }, { "epoch": 1.7941516163013542, "grad_norm": 0.026757704368540282, "learning_rate": 3.1915487523364596e-05, "loss": 0.5609, "step": 35340 }, { "epoch": 1.7944054523993858, "grad_norm": 0.02595580788931661, "learning_rate": 3.18376559649522e-05, "loss": 0.5693, "step": 35345 }, { "epoch": 1.7946592884974173, "grad_norm": 0.0265449885960612, "learning_rate": 3.175991630447322e-05, "loss": 0.6006, "step": 35350 }, { "epoch": 1.7949131245954488, "grad_norm": 0.02639452116642626, "learning_rate": 3.1682268557187535e-05, "loss": 0.5518, "step": 35355 }, { "epoch": 1.7951669606934804, "grad_norm": 0.028722076142266986, "learning_rate": 3.160471273833709e-05, "loss": 0.5792, "step": 35360 }, { "epoch": 1.7954207967915117, "grad_norm": 0.03961320622261278, "learning_rate": 3.152724886314562e-05, "loss": 0.5676, "step": 35365 }, { "epoch": 1.7956746328895432, "grad_norm": 0.029115229926894613, "learning_rate": 3.1449876946819e-05, "loss": 0.5976, "step": 35370 }, { "epoch": 1.7959284689875747, "grad_norm": 0.031218353453837163, "learning_rate": 3.137259700454481e-05, "loss": 0.6147, "step": 35375 }, { "epoch": 1.7961823050856063, "grad_norm": 0.02611737848116274, "learning_rate": 3.129540905149281e-05, "loss": 0.5691, "step": 35380 }, { "epoch": 1.7964361411836376, "grad_norm": 0.030002937335903646, "learning_rate": 3.121831310281459e-05, "loss": 0.6162, "step": 35385 }, { "epoch": 1.7966899772816691, "grad_norm": 0.024538821425696022, "learning_rate": 3.114130917364372e-05, "loss": 0.5342, "step": 35390 }, { "epoch": 1.7969438133797007, "grad_norm": 0.030211852508084072, "learning_rate": 3.10643972790956e-05, "loss": 0.6143, "step": 35395 }, { "epoch": 1.7971976494777322, "grad_norm": 0.026691774863741313, "learning_rate": 3.098757743426778e-05, "loss": 0.59, "step": 35400 }, { "epoch": 1.7974514855757637, "grad_norm": 0.02852475915197716, "learning_rate": 3.0910849654239456e-05, "loss": 0.5589, "step": 35405 }, { "epoch": 1.7977053216737953, "grad_norm": 0.02741625275011629, "learning_rate": 3.0834213954072046e-05, "loss": 0.5759, "step": 35410 }, { "epoch": 1.7979591577718268, "grad_norm": 0.027885037080123014, "learning_rate": 3.0757670348808774e-05, "loss": 0.5635, "step": 35415 }, { "epoch": 1.7982129938698583, "grad_norm": 0.02802318215273364, "learning_rate": 3.0681218853474636e-05, "loss": 0.5348, "step": 35420 }, { "epoch": 1.7984668299678899, "grad_norm": 0.025219863176964303, "learning_rate": 3.0604859483076785e-05, "loss": 0.5725, "step": 35425 }, { "epoch": 1.7987206660659212, "grad_norm": 0.02671640845920605, "learning_rate": 3.0528592252604126e-05, "loss": 0.586, "step": 35430 }, { "epoch": 1.7989745021639527, "grad_norm": 0.026388530821720536, "learning_rate": 3.045241717702757e-05, "loss": 0.6052, "step": 35435 }, { "epoch": 1.7992283382619843, "grad_norm": 0.0254647340600205, "learning_rate": 3.0376334271299878e-05, "loss": 0.572, "step": 35440 }, { "epoch": 1.7994821743600158, "grad_norm": 0.028320139236665515, "learning_rate": 3.0300343550355767e-05, "loss": 0.5853, "step": 35445 }, { "epoch": 1.799736010458047, "grad_norm": 0.024557828113796025, "learning_rate": 3.0224445029111812e-05, "loss": 0.5679, "step": 35450 }, { "epoch": 1.7999898465560786, "grad_norm": 0.026303422450413675, "learning_rate": 3.0148638722466593e-05, "loss": 0.5853, "step": 35455 }, { "epoch": 1.8002436826541102, "grad_norm": 0.026517602169977415, "learning_rate": 3.007292464530037e-05, "loss": 0.5939, "step": 35460 }, { "epoch": 1.8004975187521417, "grad_norm": 0.02640681430193606, "learning_rate": 2.9997302812475592e-05, "loss": 0.5879, "step": 35465 }, { "epoch": 1.8007513548501732, "grad_norm": 0.026065039094631832, "learning_rate": 2.9921773238836215e-05, "loss": 0.5884, "step": 35470 }, { "epoch": 1.8010051909482048, "grad_norm": 0.027189976180064482, "learning_rate": 2.9846335939208602e-05, "loss": 0.5584, "step": 35475 }, { "epoch": 1.8012590270462363, "grad_norm": 0.027716379885005752, "learning_rate": 2.9770990928400575e-05, "loss": 0.5468, "step": 35480 }, { "epoch": 1.8015128631442678, "grad_norm": 0.0287226032741187, "learning_rate": 2.969573822120203e-05, "loss": 0.5873, "step": 35485 }, { "epoch": 1.8017666992422994, "grad_norm": 0.0295481055006875, "learning_rate": 2.9620577832384643e-05, "loss": 0.6083, "step": 35490 }, { "epoch": 1.802020535340331, "grad_norm": 0.02859379853471278, "learning_rate": 2.9545509776702062e-05, "loss": 0.5543, "step": 35495 }, { "epoch": 1.8022743714383622, "grad_norm": 0.02570559833558401, "learning_rate": 2.947053406888972e-05, "loss": 0.5453, "step": 35500 }, { "epoch": 1.8025282075363938, "grad_norm": 0.030458222961368023, "learning_rate": 2.939565072366507e-05, "loss": 0.5599, "step": 35505 }, { "epoch": 1.8027820436344253, "grad_norm": 0.02764468434984449, "learning_rate": 2.9320859755727238e-05, "loss": 0.5518, "step": 35510 }, { "epoch": 1.8030358797324566, "grad_norm": 0.030946836924673843, "learning_rate": 2.9246161179757425e-05, "loss": 0.5617, "step": 35515 }, { "epoch": 1.8032897158304881, "grad_norm": 0.026667338671972803, "learning_rate": 2.9171555010418404e-05, "loss": 0.5449, "step": 35520 }, { "epoch": 1.8035435519285197, "grad_norm": 0.025456973392188522, "learning_rate": 2.909704126235524e-05, "loss": 0.5766, "step": 35525 }, { "epoch": 1.8037973880265512, "grad_norm": 0.028515551254674087, "learning_rate": 2.9022619950194395e-05, "loss": 0.5756, "step": 35530 }, { "epoch": 1.8040512241245827, "grad_norm": 0.028428462019228974, "learning_rate": 2.8948291088544522e-05, "loss": 0.5832, "step": 35535 }, { "epoch": 1.8043050602226143, "grad_norm": 0.02613187134332968, "learning_rate": 2.8874054691996054e-05, "loss": 0.5737, "step": 35540 }, { "epoch": 1.8045588963206458, "grad_norm": 0.02700908888399614, "learning_rate": 2.8799910775121008e-05, "loss": 0.5702, "step": 35545 }, { "epoch": 1.8048127324186773, "grad_norm": 0.025638675056755867, "learning_rate": 2.8725859352473737e-05, "loss": 0.5846, "step": 35550 }, { "epoch": 1.8050665685167089, "grad_norm": 0.026658617973824062, "learning_rate": 2.865190043858995e-05, "loss": 0.5948, "step": 35555 }, { "epoch": 1.8053204046147404, "grad_norm": 0.02556570172430922, "learning_rate": 2.8578034047987587e-05, "loss": 0.5667, "step": 35560 }, { "epoch": 1.8055742407127717, "grad_norm": 0.026567545064743625, "learning_rate": 2.8504260195166055e-05, "loss": 0.5858, "step": 35565 }, { "epoch": 1.8058280768108033, "grad_norm": 0.025470858507189952, "learning_rate": 2.8430578894606985e-05, "loss": 0.5918, "step": 35570 }, { "epoch": 1.8060819129088348, "grad_norm": 0.02614508475632217, "learning_rate": 2.8356990160773534e-05, "loss": 0.5869, "step": 35575 }, { "epoch": 1.806335749006866, "grad_norm": 0.027463450777919955, "learning_rate": 2.8283494008110867e-05, "loss": 0.568, "step": 35580 }, { "epoch": 1.8065895851048976, "grad_norm": 0.025689426855449327, "learning_rate": 2.821009045104578e-05, "loss": 0.568, "step": 35585 }, { "epoch": 1.8068434212029292, "grad_norm": 0.02644083591513624, "learning_rate": 2.8136779503987186e-05, "loss": 0.5758, "step": 35590 }, { "epoch": 1.8070972573009607, "grad_norm": 0.02676987265376532, "learning_rate": 2.8063561181325526e-05, "loss": 0.5526, "step": 35595 }, { "epoch": 1.8073510933989922, "grad_norm": 0.027764982432362816, "learning_rate": 2.7990435497433408e-05, "loss": 0.606, "step": 35600 }, { "epoch": 1.8076049294970238, "grad_norm": 0.026454167978451754, "learning_rate": 2.79174024666648e-05, "loss": 0.5586, "step": 35605 }, { "epoch": 1.8078587655950553, "grad_norm": 0.027234874375994044, "learning_rate": 2.7844462103355838e-05, "loss": 0.557, "step": 35610 }, { "epoch": 1.8081126016930869, "grad_norm": 0.028145085620029247, "learning_rate": 2.7771614421824297e-05, "loss": 0.5995, "step": 35615 }, { "epoch": 1.8083664377911184, "grad_norm": 0.0257344577246878, "learning_rate": 2.769885943636996e-05, "loss": 0.5659, "step": 35620 }, { "epoch": 1.80862027388915, "grad_norm": 0.026775386278390705, "learning_rate": 2.7626197161274014e-05, "loss": 0.577, "step": 35625 }, { "epoch": 1.8088741099871812, "grad_norm": 0.027689390353898224, "learning_rate": 2.7553627610799938e-05, "loss": 0.5605, "step": 35630 }, { "epoch": 1.8091279460852128, "grad_norm": 0.025552070614980923, "learning_rate": 2.748115079919261e-05, "loss": 0.5575, "step": 35635 }, { "epoch": 1.8093817821832443, "grad_norm": 0.026053307168733556, "learning_rate": 2.7408766740678994e-05, "loss": 0.5507, "step": 35640 }, { "epoch": 1.8096356182812756, "grad_norm": 0.027745728125725733, "learning_rate": 2.73364754494676e-05, "loss": 0.5832, "step": 35645 }, { "epoch": 1.8098894543793071, "grad_norm": 0.025430573320550688, "learning_rate": 2.7264276939748923e-05, "loss": 0.5384, "step": 35650 }, { "epoch": 1.8101432904773387, "grad_norm": 0.028443504810591564, "learning_rate": 2.7192171225695172e-05, "loss": 0.6049, "step": 35655 }, { "epoch": 1.8103971265753702, "grad_norm": 0.026372985422187387, "learning_rate": 2.712015832146031e-05, "loss": 0.5772, "step": 35660 }, { "epoch": 1.8106509626734018, "grad_norm": 0.02745014825054626, "learning_rate": 2.7048238241180133e-05, "loss": 0.5535, "step": 35665 }, { "epoch": 1.8109047987714333, "grad_norm": 0.028125534834131133, "learning_rate": 2.6976410998972134e-05, "loss": 0.5945, "step": 35670 }, { "epoch": 1.8111586348694648, "grad_norm": 0.02797126588871551, "learning_rate": 2.690467660893575e-05, "loss": 0.5778, "step": 35675 }, { "epoch": 1.8114124709674964, "grad_norm": 0.028482061589839822, "learning_rate": 2.6833035085152003e-05, "loss": 0.5966, "step": 35680 }, { "epoch": 1.811666307065528, "grad_norm": 0.027534880445931595, "learning_rate": 2.6761486441683802e-05, "loss": 0.5502, "step": 35685 }, { "epoch": 1.8119201431635594, "grad_norm": 0.026665635188296424, "learning_rate": 2.669003069257575e-05, "loss": 0.5523, "step": 35690 }, { "epoch": 1.8121739792615907, "grad_norm": 0.027770380081730644, "learning_rate": 2.661866785185435e-05, "loss": 0.5683, "step": 35695 }, { "epoch": 1.8124278153596223, "grad_norm": 0.0306792368769413, "learning_rate": 2.6547397933527562e-05, "loss": 0.5793, "step": 35700 }, { "epoch": 1.8126816514576538, "grad_norm": 0.024594813148666614, "learning_rate": 2.6476220951585582e-05, "loss": 0.5686, "step": 35705 }, { "epoch": 1.8129354875556853, "grad_norm": 0.025350708829511414, "learning_rate": 2.640513691999985e-05, "loss": 0.5679, "step": 35710 }, { "epoch": 1.8131893236537167, "grad_norm": 0.028171176021036082, "learning_rate": 2.6334145852724035e-05, "loss": 0.528, "step": 35715 }, { "epoch": 1.8134431597517482, "grad_norm": 0.02846211991259568, "learning_rate": 2.6263247763693153e-05, "loss": 0.5676, "step": 35720 }, { "epoch": 1.8136969958497797, "grad_norm": 0.027170230935614815, "learning_rate": 2.61924426668243e-05, "loss": 0.6141, "step": 35725 }, { "epoch": 1.8139508319478113, "grad_norm": 0.026831599339920527, "learning_rate": 2.6121730576015967e-05, "loss": 0.6051, "step": 35730 }, { "epoch": 1.8142046680458428, "grad_norm": 0.02860541413705239, "learning_rate": 2.605111150514883e-05, "loss": 0.6095, "step": 35735 }, { "epoch": 1.8144585041438743, "grad_norm": 0.02615144258457426, "learning_rate": 2.5980585468084795e-05, "loss": 0.5737, "step": 35740 }, { "epoch": 1.8147123402419059, "grad_norm": 0.02788390967189045, "learning_rate": 2.5910152478668015e-05, "loss": 0.5875, "step": 35745 }, { "epoch": 1.8149661763399374, "grad_norm": 0.024916755394950894, "learning_rate": 2.5839812550723928e-05, "loss": 0.5703, "step": 35750 }, { "epoch": 1.815220012437969, "grad_norm": 0.025507102808045835, "learning_rate": 2.5769565698060047e-05, "loss": 0.5844, "step": 35755 }, { "epoch": 1.8154738485360005, "grad_norm": 0.02655608889777319, "learning_rate": 2.56994119344654e-05, "loss": 0.5622, "step": 35760 }, { "epoch": 1.8157276846340318, "grad_norm": 0.025945417899478334, "learning_rate": 2.562935127371091e-05, "loss": 0.5919, "step": 35765 }, { "epoch": 1.8159815207320633, "grad_norm": 0.02724033108608283, "learning_rate": 2.5559383729549025e-05, "loss": 0.5679, "step": 35770 }, { "epoch": 1.8162353568300948, "grad_norm": 0.029356913441099555, "learning_rate": 2.5489509315714087e-05, "loss": 0.5881, "step": 35775 }, { "epoch": 1.8164891929281262, "grad_norm": 0.027370565314524874, "learning_rate": 2.5419728045922186e-05, "loss": 0.5542, "step": 35780 }, { "epoch": 1.8167430290261577, "grad_norm": 0.02951111717984348, "learning_rate": 2.5350039933870805e-05, "loss": 0.547, "step": 35785 }, { "epoch": 1.8169968651241892, "grad_norm": 0.027715488948312654, "learning_rate": 2.5280444993239616e-05, "loss": 0.558, "step": 35790 }, { "epoch": 1.8172507012222208, "grad_norm": 0.025825687932100865, "learning_rate": 2.5210943237689575e-05, "loss": 0.5771, "step": 35795 }, { "epoch": 1.8175045373202523, "grad_norm": 0.025120966260281086, "learning_rate": 2.514153468086372e-05, "loss": 0.5763, "step": 35800 }, { "epoch": 1.8177583734182838, "grad_norm": 0.025495301904177176, "learning_rate": 2.507221933638637e-05, "loss": 0.5668, "step": 35805 }, { "epoch": 1.8180122095163154, "grad_norm": 0.03079750865809753, "learning_rate": 2.5002997217863975e-05, "loss": 0.598, "step": 35810 }, { "epoch": 1.818266045614347, "grad_norm": 0.030045625719031775, "learning_rate": 2.4933868338884392e-05, "loss": 0.571, "step": 35815 }, { "epoch": 1.8185198817123784, "grad_norm": 0.027044554841358114, "learning_rate": 2.4864832713017316e-05, "loss": 0.602, "step": 35820 }, { "epoch": 1.81877371781041, "grad_norm": 0.025299105707791616, "learning_rate": 2.479589035381402e-05, "loss": 0.5806, "step": 35825 }, { "epoch": 1.8190275539084413, "grad_norm": 0.025229499806563884, "learning_rate": 2.472704127480768e-05, "loss": 0.5699, "step": 35830 }, { "epoch": 1.8192813900064728, "grad_norm": 0.026292539053083472, "learning_rate": 2.4658285489512876e-05, "loss": 0.5779, "step": 35835 }, { "epoch": 1.8195352261045044, "grad_norm": 0.029069651920036445, "learning_rate": 2.45896230114262e-05, "loss": 0.6133, "step": 35840 }, { "epoch": 1.8197890622025357, "grad_norm": 0.029450803786003198, "learning_rate": 2.4521053854025587e-05, "loss": 0.5485, "step": 35845 }, { "epoch": 1.8200428983005672, "grad_norm": 0.025868769893284675, "learning_rate": 2.4452578030771e-05, "loss": 0.5525, "step": 35850 }, { "epoch": 1.8202967343985987, "grad_norm": 0.029347756296694085, "learning_rate": 2.4384195555103685e-05, "loss": 0.5728, "step": 35855 }, { "epoch": 1.8205505704966303, "grad_norm": 0.02804613305429074, "learning_rate": 2.4315906440446956e-05, "loss": 0.5523, "step": 35860 }, { "epoch": 1.8208044065946618, "grad_norm": 0.026957256942004737, "learning_rate": 2.4247710700205484e-05, "loss": 0.5967, "step": 35865 }, { "epoch": 1.8210582426926933, "grad_norm": 0.02814660855551674, "learning_rate": 2.4179608347765948e-05, "loss": 0.6385, "step": 35870 }, { "epoch": 1.8213120787907249, "grad_norm": 0.026598024993185098, "learning_rate": 2.4111599396496263e-05, "loss": 0.5866, "step": 35875 }, { "epoch": 1.8215659148887564, "grad_norm": 0.028179962637627568, "learning_rate": 2.404368385974648e-05, "loss": 0.5973, "step": 35880 }, { "epoch": 1.821819750986788, "grad_norm": 0.02660429047275285, "learning_rate": 2.3975861750847872e-05, "loss": 0.5931, "step": 35885 }, { "epoch": 1.8220735870848195, "grad_norm": 0.02566356567904075, "learning_rate": 2.3908133083113627e-05, "loss": 0.5357, "step": 35890 }, { "epoch": 1.8223274231828508, "grad_norm": 0.029357229260375983, "learning_rate": 2.3840497869838718e-05, "loss": 0.5488, "step": 35895 }, { "epoch": 1.8225812592808823, "grad_norm": 0.02782382370511682, "learning_rate": 2.3772956124299416e-05, "loss": 0.5577, "step": 35900 }, { "epoch": 1.8228350953789139, "grad_norm": 0.02702152039590964, "learning_rate": 2.3705507859753896e-05, "loss": 0.5544, "step": 35905 }, { "epoch": 1.8230889314769452, "grad_norm": 0.02641736201684276, "learning_rate": 2.3638153089441893e-05, "loss": 0.5711, "step": 35910 }, { "epoch": 1.8233427675749767, "grad_norm": 0.027247129386988218, "learning_rate": 2.357089182658484e-05, "loss": 0.587, "step": 35915 }, { "epoch": 1.8235966036730082, "grad_norm": 0.02637042225949838, "learning_rate": 2.350372408438578e-05, "loss": 0.5772, "step": 35920 }, { "epoch": 1.8238504397710398, "grad_norm": 0.028275373601790294, "learning_rate": 2.343664987602939e-05, "loss": 0.59, "step": 35925 }, { "epoch": 1.8241042758690713, "grad_norm": 0.029236532422549383, "learning_rate": 2.3369669214681977e-05, "loss": 0.5954, "step": 35930 }, { "epoch": 1.8243581119671028, "grad_norm": 0.02699452920208486, "learning_rate": 2.3302782113491628e-05, "loss": 0.585, "step": 35935 }, { "epoch": 1.8246119480651344, "grad_norm": 0.028583453500494467, "learning_rate": 2.3235988585587784e-05, "loss": 0.5926, "step": 35940 }, { "epoch": 1.824865784163166, "grad_norm": 0.02866401688334965, "learning_rate": 2.31692886440818e-05, "loss": 0.5603, "step": 35945 }, { "epoch": 1.8251196202611975, "grad_norm": 0.026455094297170554, "learning_rate": 2.3102682302066412e-05, "loss": 0.5598, "step": 35950 }, { "epoch": 1.825373456359229, "grad_norm": 0.027908658596636885, "learning_rate": 2.303616957261634e-05, "loss": 0.5771, "step": 35955 }, { "epoch": 1.8256272924572603, "grad_norm": 0.02834244169273443, "learning_rate": 2.2969750468787466e-05, "loss": 0.5464, "step": 35960 }, { "epoch": 1.8258811285552918, "grad_norm": 0.027477895342985337, "learning_rate": 2.290342500361775e-05, "loss": 0.576, "step": 35965 }, { "epoch": 1.8261349646533234, "grad_norm": 0.02670281796913446, "learning_rate": 2.2837193190126282e-05, "loss": 0.5694, "step": 35970 }, { "epoch": 1.826388800751355, "grad_norm": 0.02590585220004404, "learning_rate": 2.2771055041314327e-05, "loss": 0.5446, "step": 35975 }, { "epoch": 1.8266426368493862, "grad_norm": 0.028039759361562923, "learning_rate": 2.270501057016422e-05, "loss": 0.6037, "step": 35980 }, { "epoch": 1.8268964729474177, "grad_norm": 0.026685099838050286, "learning_rate": 2.263905978964037e-05, "loss": 0.5864, "step": 35985 }, { "epoch": 1.8271503090454493, "grad_norm": 0.028970856158449524, "learning_rate": 2.2573202712688367e-05, "loss": 0.6113, "step": 35990 }, { "epoch": 1.8274041451434808, "grad_norm": 0.026824972755855877, "learning_rate": 2.250743935223587e-05, "loss": 0.5859, "step": 35995 }, { "epoch": 1.8276579812415124, "grad_norm": 0.037468406032323154, "learning_rate": 2.2441769721191662e-05, "loss": 0.5636, "step": 36000 }, { "epoch": 1.8279118173395439, "grad_norm": 0.027724940582433053, "learning_rate": 2.23761938324466e-05, "loss": 0.5725, "step": 36005 }, { "epoch": 1.8281656534375754, "grad_norm": 0.02618455707379379, "learning_rate": 2.2310711698872665e-05, "loss": 0.5629, "step": 36010 }, { "epoch": 1.828419489535607, "grad_norm": 0.024521449751739338, "learning_rate": 2.224532333332385e-05, "loss": 0.5727, "step": 36015 }, { "epoch": 1.8286733256336385, "grad_norm": 0.030437087360381695, "learning_rate": 2.2180028748635506e-05, "loss": 0.6112, "step": 36020 }, { "epoch": 1.82892716173167, "grad_norm": 0.02624871679565409, "learning_rate": 2.2114827957624595e-05, "loss": 0.5861, "step": 36025 }, { "epoch": 1.8291809978297013, "grad_norm": 0.027289018657293246, "learning_rate": 2.2049720973089825e-05, "loss": 0.5657, "step": 36030 }, { "epoch": 1.8294348339277329, "grad_norm": 0.026033999797218217, "learning_rate": 2.19847078078112e-05, "loss": 0.5922, "step": 36035 }, { "epoch": 1.8296886700257644, "grad_norm": 0.025492839549746068, "learning_rate": 2.1919788474550673e-05, "loss": 0.5555, "step": 36040 }, { "epoch": 1.8299425061237957, "grad_norm": 0.024256537075199234, "learning_rate": 2.185496298605144e-05, "loss": 0.569, "step": 36045 }, { "epoch": 1.8301963422218273, "grad_norm": 0.026773302848634294, "learning_rate": 2.1790231355038493e-05, "loss": 0.538, "step": 36050 }, { "epoch": 1.8304501783198588, "grad_norm": 0.027794272561042957, "learning_rate": 2.172559359421822e-05, "loss": 0.5779, "step": 36055 }, { "epoch": 1.8307040144178903, "grad_norm": 0.025832352570204078, "learning_rate": 2.166104971627886e-05, "loss": 0.5443, "step": 36060 }, { "epoch": 1.8309578505159219, "grad_norm": 0.02830764516421438, "learning_rate": 2.1596599733889888e-05, "loss": 0.5718, "step": 36065 }, { "epoch": 1.8312116866139534, "grad_norm": 0.024297090615256894, "learning_rate": 2.1532243659702634e-05, "loss": 0.5928, "step": 36070 }, { "epoch": 1.831465522711985, "grad_norm": 0.02657477924578628, "learning_rate": 2.146798150634982e-05, "loss": 0.5899, "step": 36075 }, { "epoch": 1.8317193588100165, "grad_norm": 0.028905014191822558, "learning_rate": 2.140381328644586e-05, "loss": 0.5637, "step": 36080 }, { "epoch": 1.831973194908048, "grad_norm": 0.027035499051523956, "learning_rate": 2.133973901258651e-05, "loss": 0.5935, "step": 36085 }, { "epoch": 1.8322270310060795, "grad_norm": 0.02715171569873808, "learning_rate": 2.1275758697349434e-05, "loss": 0.5824, "step": 36090 }, { "epoch": 1.8324808671041108, "grad_norm": 0.0260937001170499, "learning_rate": 2.1211872353293417e-05, "loss": 0.5742, "step": 36095 }, { "epoch": 1.8327347032021424, "grad_norm": 0.0263195936944104, "learning_rate": 2.11480799929592e-05, "loss": 0.5681, "step": 36100 }, { "epoch": 1.832988539300174, "grad_norm": 0.028091219814634485, "learning_rate": 2.1084381628868833e-05, "loss": 0.5442, "step": 36105 }, { "epoch": 1.8332423753982052, "grad_norm": 0.02758511280938601, "learning_rate": 2.1020777273526025e-05, "loss": 0.5416, "step": 36110 }, { "epoch": 1.8334962114962368, "grad_norm": 0.02428310326954132, "learning_rate": 2.0957266939415965e-05, "loss": 0.5622, "step": 36115 }, { "epoch": 1.8337500475942683, "grad_norm": 0.025802532394473016, "learning_rate": 2.0893850639005453e-05, "loss": 0.5842, "step": 36120 }, { "epoch": 1.8340038836922998, "grad_norm": 0.027394236177426833, "learning_rate": 2.0830528384742697e-05, "loss": 0.5586, "step": 36125 }, { "epoch": 1.8342577197903314, "grad_norm": 0.02712292531655323, "learning_rate": 2.076730018905759e-05, "loss": 0.5624, "step": 36130 }, { "epoch": 1.834511555888363, "grad_norm": 0.027003208615814147, "learning_rate": 2.0704166064361596e-05, "loss": 0.5621, "step": 36135 }, { "epoch": 1.8347653919863944, "grad_norm": 0.026619760424973014, "learning_rate": 2.0641126023047518e-05, "loss": 0.5854, "step": 36140 }, { "epoch": 1.835019228084426, "grad_norm": 0.027140319984850742, "learning_rate": 2.0578180077489905e-05, "loss": 0.5611, "step": 36145 }, { "epoch": 1.8352730641824575, "grad_norm": 0.0282803783937816, "learning_rate": 2.0515328240044594e-05, "loss": 0.5687, "step": 36150 }, { "epoch": 1.835526900280489, "grad_norm": 0.02530397832223153, "learning_rate": 2.0452570523049217e-05, "loss": 0.556, "step": 36155 }, { "epoch": 1.8357807363785204, "grad_norm": 0.027386739405436122, "learning_rate": 2.03899069388227e-05, "loss": 0.5578, "step": 36160 }, { "epoch": 1.8360345724765519, "grad_norm": 0.026338502574654293, "learning_rate": 2.03273374996657e-05, "loss": 0.5729, "step": 36165 }, { "epoch": 1.8362884085745834, "grad_norm": 0.029431085449717637, "learning_rate": 2.026486221786017e-05, "loss": 0.6021, "step": 36170 }, { "epoch": 1.8365422446726147, "grad_norm": 0.025678366657224685, "learning_rate": 2.02024811056698e-05, "loss": 0.5715, "step": 36175 }, { "epoch": 1.8367960807706463, "grad_norm": 0.02722139592580082, "learning_rate": 2.0140194175339575e-05, "loss": 0.5927, "step": 36180 }, { "epoch": 1.8370499168686778, "grad_norm": 0.029598666515167252, "learning_rate": 2.0078001439096218e-05, "loss": 0.5422, "step": 36185 }, { "epoch": 1.8373037529667093, "grad_norm": 0.025640975321780536, "learning_rate": 2.001590290914779e-05, "loss": 0.5465, "step": 36190 }, { "epoch": 1.8375575890647409, "grad_norm": 0.027157158202289786, "learning_rate": 1.9953898597683927e-05, "loss": 0.6135, "step": 36195 }, { "epoch": 1.8378114251627724, "grad_norm": 0.027066188059528463, "learning_rate": 1.989198851687579e-05, "loss": 0.5638, "step": 36200 }, { "epoch": 1.838065261260804, "grad_norm": 0.025168186252495516, "learning_rate": 1.9830172678876103e-05, "loss": 0.5828, "step": 36205 }, { "epoch": 1.8383190973588355, "grad_norm": 0.027319117950254174, "learning_rate": 1.9768451095818818e-05, "loss": 0.5757, "step": 36210 }, { "epoch": 1.838572933456867, "grad_norm": 0.02845792505849997, "learning_rate": 1.9706823779819692e-05, "loss": 0.5747, "step": 36215 }, { "epoch": 1.8388267695548985, "grad_norm": 0.025088117600244413, "learning_rate": 1.964529074297583e-05, "loss": 0.5765, "step": 36220 }, { "epoch": 1.8390806056529299, "grad_norm": 0.026087186763016027, "learning_rate": 1.9583851997365954e-05, "loss": 0.5487, "step": 36225 }, { "epoch": 1.8393344417509614, "grad_norm": 0.026313455047492194, "learning_rate": 1.952250755505003e-05, "loss": 0.5849, "step": 36230 }, { "epoch": 1.839588277848993, "grad_norm": 0.02786533775577585, "learning_rate": 1.9461257428069755e-05, "loss": 0.5516, "step": 36235 }, { "epoch": 1.8398421139470245, "grad_norm": 0.02661697392817356, "learning_rate": 1.9400101628448242e-05, "loss": 0.5459, "step": 36240 }, { "epoch": 1.8400959500450558, "grad_norm": 0.026520185804499382, "learning_rate": 1.9339040168189937e-05, "loss": 0.5965, "step": 36245 }, { "epoch": 1.8403497861430873, "grad_norm": 0.02763984698101778, "learning_rate": 1.927807305928109e-05, "loss": 0.5857, "step": 36250 }, { "epoch": 1.8406036222411188, "grad_norm": 0.02592465299246235, "learning_rate": 1.921720031368901e-05, "loss": 0.5576, "step": 36255 }, { "epoch": 1.8408574583391504, "grad_norm": 0.027818599622824344, "learning_rate": 1.9156421943362924e-05, "loss": 0.6031, "step": 36260 }, { "epoch": 1.841111294437182, "grad_norm": 0.026907144609488717, "learning_rate": 1.9095737960233228e-05, "loss": 0.5662, "step": 36265 }, { "epoch": 1.8413651305352134, "grad_norm": 0.028109750602876057, "learning_rate": 1.903514837621201e-05, "loss": 0.5958, "step": 36270 }, { "epoch": 1.841618966633245, "grad_norm": 0.02545538323248745, "learning_rate": 1.897465320319247e-05, "loss": 0.5413, "step": 36275 }, { "epoch": 1.8418728027312765, "grad_norm": 0.02728915808279882, "learning_rate": 1.891425245304973e-05, "loss": 0.5998, "step": 36280 }, { "epoch": 1.842126638829308, "grad_norm": 0.025928638172705305, "learning_rate": 1.8853946137639966e-05, "loss": 0.5429, "step": 36285 }, { "epoch": 1.8423804749273394, "grad_norm": 0.026683079858681152, "learning_rate": 1.879373426880121e-05, "loss": 0.566, "step": 36290 }, { "epoch": 1.842634311025371, "grad_norm": 0.02616819834698326, "learning_rate": 1.8733616858352564e-05, "loss": 0.5623, "step": 36295 }, { "epoch": 1.8428881471234024, "grad_norm": 0.028163366014965295, "learning_rate": 1.8673593918094923e-05, "loss": 0.5576, "step": 36300 }, { "epoch": 1.843141983221434, "grad_norm": 0.026603044594163645, "learning_rate": 1.8613665459810357e-05, "loss": 0.5898, "step": 36305 }, { "epoch": 1.8433958193194653, "grad_norm": 0.026250385205995524, "learning_rate": 1.8553831495262685e-05, "loss": 0.5925, "step": 36310 }, { "epoch": 1.8436496554174968, "grad_norm": 0.02807434050007101, "learning_rate": 1.849409203619673e-05, "loss": 0.5731, "step": 36315 }, { "epoch": 1.8439034915155283, "grad_norm": 0.027884670656111873, "learning_rate": 1.8434447094339446e-05, "loss": 0.5945, "step": 36320 }, { "epoch": 1.8441573276135599, "grad_norm": 0.02659557811651669, "learning_rate": 1.837489668139858e-05, "loss": 0.5588, "step": 36325 }, { "epoch": 1.8444111637115914, "grad_norm": 0.027332683853181203, "learning_rate": 1.8315440809063554e-05, "loss": 0.5743, "step": 36330 }, { "epoch": 1.844664999809623, "grad_norm": 0.02675165200384304, "learning_rate": 1.8256079489005485e-05, "loss": 0.5382, "step": 36335 }, { "epoch": 1.8449188359076545, "grad_norm": 0.024703111711585273, "learning_rate": 1.8196812732876434e-05, "loss": 0.5826, "step": 36340 }, { "epoch": 1.845172672005686, "grad_norm": 0.02699885990470083, "learning_rate": 1.8137640552310374e-05, "loss": 0.568, "step": 36345 }, { "epoch": 1.8454265081037176, "grad_norm": 0.024878463049257116, "learning_rate": 1.807856295892235e-05, "loss": 0.5644, "step": 36350 }, { "epoch": 1.845680344201749, "grad_norm": 0.0270226524162194, "learning_rate": 1.801957996430914e-05, "loss": 0.5932, "step": 36355 }, { "epoch": 1.8459341802997804, "grad_norm": 0.027634367912995724, "learning_rate": 1.7960691580048705e-05, "loss": 0.565, "step": 36360 }, { "epoch": 1.846188016397812, "grad_norm": 0.028117525350812956, "learning_rate": 1.7901897817700685e-05, "loss": 0.589, "step": 36365 }, { "epoch": 1.8464418524958435, "grad_norm": 0.024824353424336764, "learning_rate": 1.7843198688805793e-05, "loss": 0.6052, "step": 36370 }, { "epoch": 1.8466956885938748, "grad_norm": 0.026520726247098402, "learning_rate": 1.7784594204886485e-05, "loss": 0.608, "step": 36375 }, { "epoch": 1.8469495246919063, "grad_norm": 0.02877092790364254, "learning_rate": 1.772608437744655e-05, "loss": 0.5779, "step": 36380 }, { "epoch": 1.8472033607899379, "grad_norm": 0.02716363306870091, "learning_rate": 1.7667669217971195e-05, "loss": 0.57, "step": 36385 }, { "epoch": 1.8474571968879694, "grad_norm": 0.026315284778600585, "learning_rate": 1.7609348737926968e-05, "loss": 0.5814, "step": 36390 }, { "epoch": 1.847711032986001, "grad_norm": 0.030198483851937186, "learning_rate": 1.7551122948761932e-05, "loss": 0.5881, "step": 36395 }, { "epoch": 1.8479648690840325, "grad_norm": 0.028369644888468888, "learning_rate": 1.7492991861905394e-05, "loss": 0.5598, "step": 36400 }, { "epoch": 1.848218705182064, "grad_norm": 0.025274056071766644, "learning_rate": 1.7434955488768445e-05, "loss": 0.5829, "step": 36405 }, { "epoch": 1.8484725412800955, "grad_norm": 0.02637463274003168, "learning_rate": 1.7377013840743083e-05, "loss": 0.5801, "step": 36410 }, { "epoch": 1.848726377378127, "grad_norm": 0.02551240042177894, "learning_rate": 1.73191669292031e-05, "loss": 0.5742, "step": 36415 }, { "epoch": 1.8489802134761586, "grad_norm": 0.026793904049247648, "learning_rate": 1.726141476550347e-05, "loss": 0.5898, "step": 36420 }, { "epoch": 1.84923404957419, "grad_norm": 0.02535322973367802, "learning_rate": 1.720375736098079e-05, "loss": 0.5649, "step": 36425 }, { "epoch": 1.8494878856722214, "grad_norm": 0.02775719090794009, "learning_rate": 1.7146194726952778e-05, "loss": 0.5558, "step": 36430 }, { "epoch": 1.849741721770253, "grad_norm": 0.03002793154139665, "learning_rate": 1.708872687471874e-05, "loss": 0.5815, "step": 36435 }, { "epoch": 1.8499955578682843, "grad_norm": 0.025489982741216954, "learning_rate": 1.7031353815559425e-05, "loss": 0.5944, "step": 36440 }, { "epoch": 1.8502493939663158, "grad_norm": 0.026326440429905708, "learning_rate": 1.697407556073671e-05, "loss": 0.5689, "step": 36445 }, { "epoch": 1.8505032300643474, "grad_norm": 0.02418754656650073, "learning_rate": 1.6916892121494166e-05, "loss": 0.5736, "step": 36450 }, { "epoch": 1.850757066162379, "grad_norm": 0.029200481930988675, "learning_rate": 1.6859803509056527e-05, "loss": 0.5703, "step": 36455 }, { "epoch": 1.8510109022604104, "grad_norm": 0.02706643528364148, "learning_rate": 1.680280973463011e-05, "loss": 0.5661, "step": 36460 }, { "epoch": 1.851264738358442, "grad_norm": 0.02735862265317539, "learning_rate": 1.674591080940241e-05, "loss": 0.5837, "step": 36465 }, { "epoch": 1.8515185744564735, "grad_norm": 0.02614737853931124, "learning_rate": 1.6689106744542437e-05, "loss": 0.5584, "step": 36470 }, { "epoch": 1.851772410554505, "grad_norm": 0.024508433518787613, "learning_rate": 1.6632397551200496e-05, "loss": 0.561, "step": 36475 }, { "epoch": 1.8520262466525366, "grad_norm": 0.0274685878829509, "learning_rate": 1.6575783240508458e-05, "loss": 0.5714, "step": 36480 }, { "epoch": 1.852280082750568, "grad_norm": 0.028283286388763762, "learning_rate": 1.6519263823579213e-05, "loss": 0.5733, "step": 36485 }, { "epoch": 1.8525339188485994, "grad_norm": 0.02892661134737752, "learning_rate": 1.6462839311507494e-05, "loss": 0.5678, "step": 36490 }, { "epoch": 1.852787754946631, "grad_norm": 0.026206679576455182, "learning_rate": 1.640650971536889e-05, "loss": 0.5941, "step": 36495 }, { "epoch": 1.8530415910446625, "grad_norm": 0.028889225370380106, "learning_rate": 1.635027504622083e-05, "loss": 0.5577, "step": 36500 }, { "epoch": 1.8532954271426938, "grad_norm": 0.02837171476451808, "learning_rate": 1.6294135315101765e-05, "loss": 0.5483, "step": 36505 }, { "epoch": 1.8535492632407253, "grad_norm": 0.025807545978435764, "learning_rate": 1.6238090533031825e-05, "loss": 0.578, "step": 36510 }, { "epoch": 1.8538030993387569, "grad_norm": 0.02766860705331187, "learning_rate": 1.6182140711012095e-05, "loss": 0.5617, "step": 36515 }, { "epoch": 1.8540569354367884, "grad_norm": 0.03119642224456226, "learning_rate": 1.6126285860025403e-05, "loss": 0.5798, "step": 36520 }, { "epoch": 1.85431077153482, "grad_norm": 0.028079641741461787, "learning_rate": 1.6070525991035646e-05, "loss": 0.5672, "step": 36525 }, { "epoch": 1.8545646076328515, "grad_norm": 0.026064410063975686, "learning_rate": 1.6014861114988343e-05, "loss": 0.5526, "step": 36530 }, { "epoch": 1.854818443730883, "grad_norm": 0.026260042014070385, "learning_rate": 1.5959291242810146e-05, "loss": 0.6011, "step": 36535 }, { "epoch": 1.8550722798289145, "grad_norm": 0.026098405664968675, "learning_rate": 1.590381638540922e-05, "loss": 0.5384, "step": 36540 }, { "epoch": 1.855326115926946, "grad_norm": 0.026573896288392733, "learning_rate": 1.5848436553674905e-05, "loss": 0.5855, "step": 36545 }, { "epoch": 1.8555799520249776, "grad_norm": 0.026456425424321087, "learning_rate": 1.5793151758478064e-05, "loss": 0.5859, "step": 36550 }, { "epoch": 1.855833788123009, "grad_norm": 0.02331090707661556, "learning_rate": 1.5737962010670738e-05, "loss": 0.5292, "step": 36555 }, { "epoch": 1.8560876242210405, "grad_norm": 0.02679295860093039, "learning_rate": 1.5682867321086482e-05, "loss": 0.568, "step": 36560 }, { "epoch": 1.856341460319072, "grad_norm": 0.024572864446572897, "learning_rate": 1.5627867700540144e-05, "loss": 0.5366, "step": 36565 }, { "epoch": 1.8565952964171035, "grad_norm": 0.02641895964825604, "learning_rate": 1.557296315982776e-05, "loss": 0.5863, "step": 36570 }, { "epoch": 1.8568491325151348, "grad_norm": 0.026617568824230612, "learning_rate": 1.5518153709726922e-05, "loss": 0.5563, "step": 36575 }, { "epoch": 1.8571029686131664, "grad_norm": 0.028942210292703413, "learning_rate": 1.5463439360996367e-05, "loss": 0.5754, "step": 36580 }, { "epoch": 1.857356804711198, "grad_norm": 0.026462888441986812, "learning_rate": 1.5408820124376277e-05, "loss": 0.5747, "step": 36585 }, { "epoch": 1.8576106408092294, "grad_norm": 0.02923758562020356, "learning_rate": 1.535429601058813e-05, "loss": 0.56, "step": 36590 }, { "epoch": 1.857864476907261, "grad_norm": 0.02749671025990348, "learning_rate": 1.5299867030334813e-05, "loss": 0.5472, "step": 36595 }, { "epoch": 1.8581183130052925, "grad_norm": 0.025482468528002726, "learning_rate": 1.5245533194300387e-05, "loss": 0.5955, "step": 36600 }, { "epoch": 1.858372149103324, "grad_norm": 0.02602316851958648, "learning_rate": 1.5191294513150322e-05, "loss": 0.5682, "step": 36605 }, { "epoch": 1.8586259852013556, "grad_norm": 0.02477270924689558, "learning_rate": 1.5137150997531379e-05, "loss": 0.5767, "step": 36610 }, { "epoch": 1.8588798212993871, "grad_norm": 0.026067656945052992, "learning_rate": 1.5083102658071667e-05, "loss": 0.5745, "step": 36615 }, { "epoch": 1.8591336573974186, "grad_norm": 0.02772710375267261, "learning_rate": 1.5029149505380647e-05, "loss": 0.5881, "step": 36620 }, { "epoch": 1.85938749349545, "grad_norm": 0.025970314599066527, "learning_rate": 1.4975291550049063e-05, "loss": 0.5849, "step": 36625 }, { "epoch": 1.8596413295934815, "grad_norm": 0.026711874806104704, "learning_rate": 1.492152880264891e-05, "loss": 0.5705, "step": 36630 }, { "epoch": 1.859895165691513, "grad_norm": 0.02490734920661892, "learning_rate": 1.4867861273733629e-05, "loss": 0.5549, "step": 36635 }, { "epoch": 1.8601490017895443, "grad_norm": 0.027736973559793453, "learning_rate": 1.4814288973837742e-05, "loss": 0.5603, "step": 36640 }, { "epoch": 1.8604028378875759, "grad_norm": 0.02692338434253238, "learning_rate": 1.4760811913477389e-05, "loss": 0.5838, "step": 36645 }, { "epoch": 1.8606566739856074, "grad_norm": 0.02778700383107342, "learning_rate": 1.4707430103149732e-05, "loss": 0.593, "step": 36650 }, { "epoch": 1.860910510083639, "grad_norm": 0.024811962538317675, "learning_rate": 1.4654143553333387e-05, "loss": 0.592, "step": 36655 }, { "epoch": 1.8611643461816705, "grad_norm": 0.02557997067148238, "learning_rate": 1.4600952274488265e-05, "loss": 0.5823, "step": 36660 }, { "epoch": 1.861418182279702, "grad_norm": 0.026512636511512682, "learning_rate": 1.4547856277055571e-05, "loss": 0.5791, "step": 36665 }, { "epoch": 1.8616720183777336, "grad_norm": 0.029796804080340297, "learning_rate": 1.4494855571457633e-05, "loss": 0.6005, "step": 36670 }, { "epoch": 1.861925854475765, "grad_norm": 0.026250988292652287, "learning_rate": 1.4441950168098406e-05, "loss": 0.5531, "step": 36675 }, { "epoch": 1.8621796905737966, "grad_norm": 0.024189467527238807, "learning_rate": 1.4389140077362916e-05, "loss": 0.5611, "step": 36680 }, { "epoch": 1.8624335266718282, "grad_norm": 0.027284030150552557, "learning_rate": 1.433642530961743e-05, "loss": 0.5808, "step": 36685 }, { "epoch": 1.8626873627698595, "grad_norm": 0.02577522892884733, "learning_rate": 1.4283805875209721e-05, "loss": 0.5655, "step": 36690 }, { "epoch": 1.862941198867891, "grad_norm": 0.026375890375864595, "learning_rate": 1.4231281784468587e-05, "loss": 0.5394, "step": 36695 }, { "epoch": 1.8631950349659225, "grad_norm": 0.027204499708072795, "learning_rate": 1.4178853047704388e-05, "loss": 0.5764, "step": 36700 }, { "epoch": 1.8634488710639538, "grad_norm": 0.02710295889595074, "learning_rate": 1.412651967520845e-05, "loss": 0.5999, "step": 36705 }, { "epoch": 1.8637027071619854, "grad_norm": 0.03207466203117013, "learning_rate": 1.4074281677253719e-05, "loss": 0.567, "step": 36710 }, { "epoch": 1.863956543260017, "grad_norm": 0.0273904739867386, "learning_rate": 1.4022139064094164e-05, "loss": 0.5584, "step": 36715 }, { "epoch": 1.8642103793580485, "grad_norm": 0.027567247664802595, "learning_rate": 1.3970091845965205e-05, "loss": 0.5734, "step": 36720 }, { "epoch": 1.86446421545608, "grad_norm": 0.027990968969123918, "learning_rate": 1.3918140033083338e-05, "loss": 0.621, "step": 36725 }, { "epoch": 1.8647180515541115, "grad_norm": 0.02709867798319262, "learning_rate": 1.3866283635646515e-05, "loss": 0.5985, "step": 36730 }, { "epoch": 1.864971887652143, "grad_norm": 0.02522124919334186, "learning_rate": 1.3814522663833761e-05, "loss": 0.5628, "step": 36735 }, { "epoch": 1.8652257237501746, "grad_norm": 0.02743840971724697, "learning_rate": 1.3762857127805727e-05, "loss": 0.563, "step": 36740 }, { "epoch": 1.8654795598482061, "grad_norm": 0.028910503467867058, "learning_rate": 1.3711287037703913e-05, "loss": 0.5697, "step": 36745 }, { "epoch": 1.8657333959462377, "grad_norm": 0.025508741473056533, "learning_rate": 1.3659812403651439e-05, "loss": 0.5672, "step": 36750 }, { "epoch": 1.865987232044269, "grad_norm": 0.02508100081037655, "learning_rate": 1.3608433235752282e-05, "loss": 0.575, "step": 36755 }, { "epoch": 1.8662410681423005, "grad_norm": 0.027125277678490285, "learning_rate": 1.355714954409215e-05, "loss": 0.5707, "step": 36760 }, { "epoch": 1.866494904240332, "grad_norm": 0.027613053776696928, "learning_rate": 1.3505961338737604e-05, "loss": 0.5707, "step": 36765 }, { "epoch": 1.8667487403383634, "grad_norm": 0.025380902059776694, "learning_rate": 1.3454868629736771e-05, "loss": 0.568, "step": 36770 }, { "epoch": 1.867002576436395, "grad_norm": 0.027895550210552397, "learning_rate": 1.3403871427118798e-05, "loss": 0.5827, "step": 36775 }, { "epoch": 1.8672564125344264, "grad_norm": 0.029709858415128663, "learning_rate": 1.3352969740894228e-05, "loss": 0.5981, "step": 36780 }, { "epoch": 1.867510248632458, "grad_norm": 0.02844691564922667, "learning_rate": 1.3302163581054793e-05, "loss": 0.5866, "step": 36785 }, { "epoch": 1.8677640847304895, "grad_norm": 0.026535314956775462, "learning_rate": 1.3251452957573517e-05, "loss": 0.5918, "step": 36790 }, { "epoch": 1.868017920828521, "grad_norm": 0.026798504199066396, "learning_rate": 1.3200837880404548e-05, "loss": 0.6228, "step": 36795 }, { "epoch": 1.8682717569265526, "grad_norm": 0.02786255494576218, "learning_rate": 1.3150318359483437e-05, "loss": 0.5697, "step": 36800 }, { "epoch": 1.868525593024584, "grad_norm": 0.02780992077086267, "learning_rate": 1.3099894404726976e-05, "loss": 0.5948, "step": 36805 }, { "epoch": 1.8687794291226156, "grad_norm": 0.02750041000466397, "learning_rate": 1.3049566026033022e-05, "loss": 0.5759, "step": 36810 }, { "epoch": 1.8690332652206472, "grad_norm": 0.02805682888530231, "learning_rate": 1.2999333233280896e-05, "loss": 0.5674, "step": 36815 }, { "epoch": 1.8692871013186785, "grad_norm": 0.025751945407299776, "learning_rate": 1.294919603633088e-05, "loss": 0.5469, "step": 36820 }, { "epoch": 1.86954093741671, "grad_norm": 0.02751691719749977, "learning_rate": 1.2899154445024874e-05, "loss": 0.5921, "step": 36825 }, { "epoch": 1.8697947735147415, "grad_norm": 0.027323329347541638, "learning_rate": 1.2849208469185636e-05, "loss": 0.6025, "step": 36830 }, { "epoch": 1.870048609612773, "grad_norm": 0.025314111085062957, "learning_rate": 1.2799358118617377e-05, "loss": 0.5825, "step": 36835 }, { "epoch": 1.8703024457108044, "grad_norm": 0.029464938283397794, "learning_rate": 1.2749603403105437e-05, "loss": 0.5868, "step": 36840 }, { "epoch": 1.870556281808836, "grad_norm": 0.028381884549294877, "learning_rate": 1.2699944332416502e-05, "loss": 0.5651, "step": 36845 }, { "epoch": 1.8708101179068675, "grad_norm": 0.02722179468852649, "learning_rate": 1.2650380916298222e-05, "loss": 0.6096, "step": 36850 }, { "epoch": 1.871063954004899, "grad_norm": 0.02704204949180691, "learning_rate": 1.2600913164479811e-05, "loss": 0.5663, "step": 36855 }, { "epoch": 1.8713177901029305, "grad_norm": 0.025394904050208, "learning_rate": 1.2551541086671447e-05, "loss": 0.5646, "step": 36860 }, { "epoch": 1.871571626200962, "grad_norm": 0.029482916281631726, "learning_rate": 1.2502264692564768e-05, "loss": 0.5775, "step": 36865 }, { "epoch": 1.8718254622989936, "grad_norm": 0.026336380997209995, "learning_rate": 1.2453083991832258e-05, "loss": 0.5642, "step": 36870 }, { "epoch": 1.8720792983970251, "grad_norm": 0.027573195017879564, "learning_rate": 1.2403998994128085e-05, "loss": 0.5517, "step": 36875 }, { "epoch": 1.8723331344950567, "grad_norm": 0.027702143181533818, "learning_rate": 1.2355009709087205e-05, "loss": 0.5689, "step": 36880 }, { "epoch": 1.8725869705930882, "grad_norm": 0.02681291908873711, "learning_rate": 1.2306116146326096e-05, "loss": 0.561, "step": 36885 }, { "epoch": 1.8728408066911195, "grad_norm": 0.02903390244108363, "learning_rate": 1.225731831544219e-05, "loss": 0.6, "step": 36890 }, { "epoch": 1.873094642789151, "grad_norm": 0.028526913151317543, "learning_rate": 1.220861622601438e-05, "loss": 0.5893, "step": 36895 }, { "epoch": 1.8733484788871826, "grad_norm": 0.027871345669932986, "learning_rate": 1.2160009887602575e-05, "loss": 0.5569, "step": 36900 }, { "epoch": 1.873602314985214, "grad_norm": 0.02583330507453777, "learning_rate": 1.2111499309747975e-05, "loss": 0.5744, "step": 36905 }, { "epoch": 1.8738561510832454, "grad_norm": 0.029316111503293937, "learning_rate": 1.2063084501972966e-05, "loss": 0.5849, "step": 36910 }, { "epoch": 1.874109987181277, "grad_norm": 0.027757299361849543, "learning_rate": 1.2014765473781053e-05, "loss": 0.5866, "step": 36915 }, { "epoch": 1.8743638232793085, "grad_norm": 0.025259808597788445, "learning_rate": 1.1966542234657208e-05, "loss": 0.5252, "step": 36920 }, { "epoch": 1.87461765937734, "grad_norm": 0.025493577973862997, "learning_rate": 1.1918414794067244e-05, "loss": 0.5578, "step": 36925 }, { "epoch": 1.8748714954753716, "grad_norm": 0.02818424881162839, "learning_rate": 1.1870383161458497e-05, "loss": 0.564, "step": 36930 }, { "epoch": 1.875125331573403, "grad_norm": 0.02736099267068976, "learning_rate": 1.182244734625909e-05, "loss": 0.5746, "step": 36935 }, { "epoch": 1.8753791676714346, "grad_norm": 0.02582329760643321, "learning_rate": 1.1774607357878886e-05, "loss": 0.5658, "step": 36940 }, { "epoch": 1.8756330037694662, "grad_norm": 0.02610102307346908, "learning_rate": 1.1726863205708372e-05, "loss": 0.5495, "step": 36945 }, { "epoch": 1.8758868398674977, "grad_norm": 0.026190005683773297, "learning_rate": 1.1679214899119605e-05, "loss": 0.5717, "step": 36950 }, { "epoch": 1.876140675965529, "grad_norm": 0.027498210683944264, "learning_rate": 1.1631662447465719e-05, "loss": 0.5688, "step": 36955 }, { "epoch": 1.8763945120635606, "grad_norm": 0.028168370312422142, "learning_rate": 1.1584205860081021e-05, "loss": 0.5842, "step": 36960 }, { "epoch": 1.876648348161592, "grad_norm": 0.02684678005079372, "learning_rate": 1.153684514628095e-05, "loss": 0.5423, "step": 36965 }, { "epoch": 1.8769021842596234, "grad_norm": 0.0337734520793996, "learning_rate": 1.1489580315362292e-05, "loss": 0.5873, "step": 36970 }, { "epoch": 1.877156020357655, "grad_norm": 0.025502985968799578, "learning_rate": 1.1442411376602679e-05, "loss": 0.5509, "step": 36975 }, { "epoch": 1.8774098564556865, "grad_norm": 0.02784081137637154, "learning_rate": 1.139533833926143e-05, "loss": 0.5528, "step": 36980 }, { "epoch": 1.877663692553718, "grad_norm": 0.02604447162430491, "learning_rate": 1.1348361212578484e-05, "loss": 0.5651, "step": 36985 }, { "epoch": 1.8779175286517495, "grad_norm": 0.029110963228896315, "learning_rate": 1.1301480005775412e-05, "loss": 0.5981, "step": 36990 }, { "epoch": 1.878171364749781, "grad_norm": 0.02854564330410588, "learning_rate": 1.1254694728054626e-05, "loss": 0.5678, "step": 36995 }, { "epoch": 1.8784252008478126, "grad_norm": 0.025453434027912235, "learning_rate": 1.1208005388599951e-05, "loss": 0.5926, "step": 37000 }, { "epoch": 1.8786790369458441, "grad_norm": 0.027869214055381366, "learning_rate": 1.1161411996576165e-05, "loss": 0.6055, "step": 37005 }, { "epoch": 1.8789328730438757, "grad_norm": 0.02896707195756648, "learning_rate": 1.1114914561129396e-05, "loss": 0.5828, "step": 37010 }, { "epoch": 1.8791867091419072, "grad_norm": 0.029442531271252438, "learning_rate": 1.106851309138679e-05, "loss": 0.599, "step": 37015 }, { "epoch": 1.8794405452399385, "grad_norm": 0.02566765014337398, "learning_rate": 1.1022207596456835e-05, "loss": 0.5523, "step": 37020 }, { "epoch": 1.87969438133797, "grad_norm": 0.025699098897615985, "learning_rate": 1.0975998085428984e-05, "loss": 0.537, "step": 37025 }, { "epoch": 1.8799482174360016, "grad_norm": 0.027241263190426107, "learning_rate": 1.0929884567373927e-05, "loss": 0.5964, "step": 37030 }, { "epoch": 1.880202053534033, "grad_norm": 0.025743101034059582, "learning_rate": 1.0883867051343533e-05, "loss": 0.5787, "step": 37035 }, { "epoch": 1.8804558896320644, "grad_norm": 0.02399163309042105, "learning_rate": 1.0837945546370798e-05, "loss": 0.5431, "step": 37040 }, { "epoch": 1.880709725730096, "grad_norm": 0.028898014746488107, "learning_rate": 1.0792120061469956e-05, "loss": 0.5624, "step": 37045 }, { "epoch": 1.8809635618281275, "grad_norm": 0.025743601421365263, "learning_rate": 1.0746390605636259e-05, "loss": 0.5841, "step": 37050 }, { "epoch": 1.881217397926159, "grad_norm": 0.027590971962768628, "learning_rate": 1.0700757187846188e-05, "loss": 0.5992, "step": 37055 }, { "epoch": 1.8814712340241906, "grad_norm": 0.025037530486062005, "learning_rate": 1.065521981705736e-05, "loss": 0.5558, "step": 37060 }, { "epoch": 1.8817250701222221, "grad_norm": 0.030334428964144126, "learning_rate": 1.0609778502208512e-05, "loss": 0.5586, "step": 37065 }, { "epoch": 1.8819789062202537, "grad_norm": 0.025081078874794677, "learning_rate": 1.0564433252219507e-05, "loss": 0.5741, "step": 37070 }, { "epoch": 1.8822327423182852, "grad_norm": 0.02485200049401627, "learning_rate": 1.0519184075991505e-05, "loss": 0.5658, "step": 37075 }, { "epoch": 1.8824865784163167, "grad_norm": 0.02591777605686816, "learning_rate": 1.0474030982406624e-05, "loss": 0.6094, "step": 37080 }, { "epoch": 1.882740414514348, "grad_norm": 0.025716616076104478, "learning_rate": 1.0428973980328216e-05, "loss": 0.5582, "step": 37085 }, { "epoch": 1.8829942506123796, "grad_norm": 0.02364283680415937, "learning_rate": 1.038401307860065e-05, "loss": 0.5379, "step": 37090 }, { "epoch": 1.883248086710411, "grad_norm": 0.025159546131693928, "learning_rate": 1.0339148286049705e-05, "loss": 0.5716, "step": 37095 }, { "epoch": 1.8835019228084426, "grad_norm": 0.027021508082619335, "learning_rate": 1.0294379611481885e-05, "loss": 0.5658, "step": 37100 }, { "epoch": 1.883755758906474, "grad_norm": 0.0263087153176971, "learning_rate": 1.0249707063685277e-05, "loss": 0.6022, "step": 37105 }, { "epoch": 1.8840095950045055, "grad_norm": 0.024311776614764323, "learning_rate": 1.0205130651428806e-05, "loss": 0.57, "step": 37110 }, { "epoch": 1.884263431102537, "grad_norm": 0.025997121940538596, "learning_rate": 1.0160650383462588e-05, "loss": 0.5597, "step": 37115 }, { "epoch": 1.8845172672005686, "grad_norm": 0.026749415696329673, "learning_rate": 1.0116266268517805e-05, "loss": 0.5247, "step": 37120 }, { "epoch": 1.8847711032986, "grad_norm": 0.026289601591454895, "learning_rate": 1.0071978315306984e-05, "loss": 0.576, "step": 37125 }, { "epoch": 1.8850249393966316, "grad_norm": 0.027700521630013564, "learning_rate": 1.0027786532523508e-05, "loss": 0.5928, "step": 37130 }, { "epoch": 1.8852787754946632, "grad_norm": 0.02649482164247028, "learning_rate": 9.983690928842105e-06, "loss": 0.5832, "step": 37135 }, { "epoch": 1.8855326115926947, "grad_norm": 0.026663643147640224, "learning_rate": 9.939691512918404e-06, "loss": 0.5538, "step": 37140 }, { "epoch": 1.8857864476907262, "grad_norm": 0.026083566896938647, "learning_rate": 9.895788293389385e-06, "loss": 0.5933, "step": 37145 }, { "epoch": 1.8860402837887578, "grad_norm": 0.0255975748000361, "learning_rate": 9.851981278872878e-06, "loss": 0.5789, "step": 37150 }, { "epoch": 1.886294119886789, "grad_norm": 0.028970832514264744, "learning_rate": 9.808270477968173e-06, "loss": 0.5747, "step": 37155 }, { "epoch": 1.8865479559848206, "grad_norm": 0.02737031767637999, "learning_rate": 9.764655899255347e-06, "loss": 0.5916, "step": 37160 }, { "epoch": 1.8868017920828521, "grad_norm": 0.026293837348347737, "learning_rate": 9.721137551295778e-06, "loss": 0.58, "step": 37165 }, { "epoch": 1.8870556281808835, "grad_norm": 0.030347062709316995, "learning_rate": 9.677715442631962e-06, "loss": 0.5682, "step": 37170 }, { "epoch": 1.887309464278915, "grad_norm": 0.027806463684118488, "learning_rate": 9.63438958178725e-06, "loss": 0.5938, "step": 37175 }, { "epoch": 1.8875633003769465, "grad_norm": 0.0264742607666662, "learning_rate": 9.591159977266506e-06, "loss": 0.5729, "step": 37180 }, { "epoch": 1.887817136474978, "grad_norm": 0.024797934168588068, "learning_rate": 9.54802663755533e-06, "loss": 0.5584, "step": 37185 }, { "epoch": 1.8880709725730096, "grad_norm": 0.029038149647414125, "learning_rate": 9.504989571120726e-06, "loss": 0.5853, "step": 37190 }, { "epoch": 1.8883248086710411, "grad_norm": 0.02671462603170711, "learning_rate": 9.462048786410492e-06, "loss": 0.5606, "step": 37195 }, { "epoch": 1.8885786447690727, "grad_norm": 0.03088319116352382, "learning_rate": 9.419204291853834e-06, "loss": 0.5644, "step": 37200 }, { "epoch": 1.8888324808671042, "grad_norm": 0.02602771256937326, "learning_rate": 9.376456095860798e-06, "loss": 0.5812, "step": 37205 }, { "epoch": 1.8890863169651357, "grad_norm": 0.027965124737644426, "learning_rate": 9.333804206822726e-06, "loss": 0.581, "step": 37210 }, { "epoch": 1.8893401530631673, "grad_norm": 0.025520251459449445, "learning_rate": 9.291248633111927e-06, "loss": 0.5461, "step": 37215 }, { "epoch": 1.8895939891611986, "grad_norm": 0.026875058092326116, "learning_rate": 9.248789383081879e-06, "loss": 0.5938, "step": 37220 }, { "epoch": 1.8898478252592301, "grad_norm": 0.02863008307840187, "learning_rate": 9.206426465067031e-06, "loss": 0.5721, "step": 37225 }, { "epoch": 1.8901016613572617, "grad_norm": 0.02558275141765558, "learning_rate": 9.164159887383172e-06, "loss": 0.5606, "step": 37230 }, { "epoch": 1.890355497455293, "grad_norm": 0.027147099762133177, "learning_rate": 9.12198965832689e-06, "loss": 0.5747, "step": 37235 }, { "epoch": 1.8906093335533245, "grad_norm": 0.026488117202584198, "learning_rate": 9.079915786176063e-06, "loss": 0.5746, "step": 37240 }, { "epoch": 1.890863169651356, "grad_norm": 0.02563602598351144, "learning_rate": 9.037938279189528e-06, "loss": 0.546, "step": 37245 }, { "epoch": 1.8911170057493876, "grad_norm": 0.024411458840358606, "learning_rate": 8.996057145607306e-06, "loss": 0.561, "step": 37250 }, { "epoch": 1.891370841847419, "grad_norm": 0.026419390889227585, "learning_rate": 8.95427239365043e-06, "loss": 0.5677, "step": 37255 }, { "epoch": 1.8916246779454506, "grad_norm": 0.02695539279068827, "learning_rate": 8.912584031521065e-06, "loss": 0.5812, "step": 37260 }, { "epoch": 1.8918785140434822, "grad_norm": 0.024013152537925662, "learning_rate": 8.870992067402384e-06, "loss": 0.5714, "step": 37265 }, { "epoch": 1.8921323501415137, "grad_norm": 0.025882802750407656, "learning_rate": 8.82949650945869e-06, "loss": 0.5643, "step": 37270 }, { "epoch": 1.8923861862395452, "grad_norm": 0.031260042893550694, "learning_rate": 8.788097365835358e-06, "loss": 0.5331, "step": 37275 }, { "epoch": 1.8926400223375768, "grad_norm": 0.026972413747748503, "learning_rate": 8.746794644658828e-06, "loss": 0.5276, "step": 37280 }, { "epoch": 1.892893858435608, "grad_norm": 0.02689033097733921, "learning_rate": 8.705588354036676e-06, "loss": 0.6022, "step": 37285 }, { "epoch": 1.8931476945336396, "grad_norm": 0.027429465928063584, "learning_rate": 8.664478502057427e-06, "loss": 0.5394, "step": 37290 }, { "epoch": 1.8934015306316712, "grad_norm": 0.027750615384527694, "learning_rate": 8.623465096790794e-06, "loss": 0.5672, "step": 37295 }, { "epoch": 1.8936553667297025, "grad_norm": 0.026124515562572163, "learning_rate": 8.582548146287395e-06, "loss": 0.5733, "step": 37300 }, { "epoch": 1.893909202827734, "grad_norm": 0.025588894679432494, "learning_rate": 8.541727658579191e-06, "loss": 0.5596, "step": 37305 }, { "epoch": 1.8941630389257655, "grad_norm": 0.027947103929922553, "learning_rate": 8.501003641678885e-06, "loss": 0.5875, "step": 37310 }, { "epoch": 1.894416875023797, "grad_norm": 0.025859331539862283, "learning_rate": 8.460376103580526e-06, "loss": 0.5719, "step": 37315 }, { "epoch": 1.8946707111218286, "grad_norm": 0.027945653323018127, "learning_rate": 8.419845052258956e-06, "loss": 0.5992, "step": 37320 }, { "epoch": 1.8949245472198601, "grad_norm": 0.025991656260660467, "learning_rate": 8.37941049567037e-06, "loss": 0.5763, "step": 37325 }, { "epoch": 1.8951783833178917, "grad_norm": 0.02596907741938166, "learning_rate": 8.339072441751749e-06, "loss": 0.5703, "step": 37330 }, { "epoch": 1.8954322194159232, "grad_norm": 0.025382628117074365, "learning_rate": 8.298830898421316e-06, "loss": 0.5976, "step": 37335 }, { "epoch": 1.8956860555139547, "grad_norm": 0.027160241301718163, "learning_rate": 8.258685873578198e-06, "loss": 0.605, "step": 37340 }, { "epoch": 1.8959398916119863, "grad_norm": 0.026105375909172394, "learning_rate": 8.218637375102866e-06, "loss": 0.5317, "step": 37345 }, { "epoch": 1.8961937277100176, "grad_norm": 0.027030412936622502, "learning_rate": 8.178685410856424e-06, "loss": 0.5857, "step": 37350 }, { "epoch": 1.8964475638080491, "grad_norm": 0.02714392518562358, "learning_rate": 8.138829988681318e-06, "loss": 0.5743, "step": 37355 }, { "epoch": 1.8967013999060807, "grad_norm": 0.02469416733304674, "learning_rate": 8.09907111640107e-06, "loss": 0.569, "step": 37360 }, { "epoch": 1.8969552360041122, "grad_norm": 0.030880380033314024, "learning_rate": 8.059408801819934e-06, "loss": 0.5724, "step": 37365 }, { "epoch": 1.8972090721021435, "grad_norm": 0.028073978511295682, "learning_rate": 8.01984305272363e-06, "loss": 0.5738, "step": 37370 }, { "epoch": 1.897462908200175, "grad_norm": 0.024777859820155068, "learning_rate": 7.98037387687861e-06, "loss": 0.5608, "step": 37375 }, { "epoch": 1.8977167442982066, "grad_norm": 0.027112040768232606, "learning_rate": 7.941001282032512e-06, "loss": 0.5276, "step": 37380 }, { "epoch": 1.8979705803962381, "grad_norm": 0.02649951423714033, "learning_rate": 7.90172527591393e-06, "loss": 0.5583, "step": 37385 }, { "epoch": 1.8982244164942697, "grad_norm": 0.02559514795262262, "learning_rate": 7.862545866232585e-06, "loss": 0.5839, "step": 37390 }, { "epoch": 1.8984782525923012, "grad_norm": 0.025258334048110342, "learning_rate": 7.823463060679215e-06, "loss": 0.5655, "step": 37395 }, { "epoch": 1.8987320886903327, "grad_norm": 0.026995657582077278, "learning_rate": 7.784476866925571e-06, "loss": 0.5878, "step": 37400 }, { "epoch": 1.8989859247883643, "grad_norm": 0.026614511526167187, "learning_rate": 7.745587292624423e-06, "loss": 0.575, "step": 37405 }, { "epoch": 1.8992397608863958, "grad_norm": 0.02631240140576987, "learning_rate": 7.706794345409662e-06, "loss": 0.5975, "step": 37410 }, { "epoch": 1.899493596984427, "grad_norm": 0.026463916575504228, "learning_rate": 7.668098032896086e-06, "loss": 0.5627, "step": 37415 }, { "epoch": 1.8997474330824586, "grad_norm": 0.026463835502163142, "learning_rate": 7.629498362679621e-06, "loss": 0.5573, "step": 37420 }, { "epoch": 1.9000012691804902, "grad_norm": 0.02759025163295101, "learning_rate": 7.590995342337148e-06, "loss": 0.5873, "step": 37425 }, { "epoch": 1.9002551052785217, "grad_norm": 0.02549152334742565, "learning_rate": 7.552588979426733e-06, "loss": 0.5916, "step": 37430 }, { "epoch": 1.900508941376553, "grad_norm": 0.026145476398963013, "learning_rate": 7.514279281487179e-06, "loss": 0.582, "step": 37435 }, { "epoch": 1.9007627774745846, "grad_norm": 0.02577900877121215, "learning_rate": 7.476066256038638e-06, "loss": 0.5653, "step": 37440 }, { "epoch": 1.901016613572616, "grad_norm": 0.02856545521010206, "learning_rate": 7.437949910581998e-06, "loss": 0.562, "step": 37445 }, { "epoch": 1.9012704496706476, "grad_norm": 0.02547015056852988, "learning_rate": 7.399930252599496e-06, "loss": 0.5988, "step": 37450 }, { "epoch": 1.9015242857686792, "grad_norm": 0.025877219136309235, "learning_rate": 7.362007289553996e-06, "loss": 0.5766, "step": 37455 }, { "epoch": 1.9017781218667107, "grad_norm": 0.025369438944289006, "learning_rate": 7.324181028889709e-06, "loss": 0.5619, "step": 37460 }, { "epoch": 1.9020319579647422, "grad_norm": 0.027662625556543488, "learning_rate": 7.286451478031753e-06, "loss": 0.5786, "step": 37465 }, { "epoch": 1.9022857940627738, "grad_norm": 0.02652637693177889, "learning_rate": 7.2488186443862015e-06, "loss": 0.5675, "step": 37470 }, { "epoch": 1.9025396301608053, "grad_norm": 0.025938398887209828, "learning_rate": 7.211282535340202e-06, "loss": 0.5994, "step": 37475 }, { "epoch": 1.9027934662588368, "grad_norm": 0.025702548170370845, "learning_rate": 7.173843158261861e-06, "loss": 0.5762, "step": 37480 }, { "epoch": 1.9030473023568681, "grad_norm": 0.024913378541144768, "learning_rate": 7.136500520500466e-06, "loss": 0.5339, "step": 37485 }, { "epoch": 1.9033011384548997, "grad_norm": 0.024771963607800134, "learning_rate": 7.0992546293860425e-06, "loss": 0.5448, "step": 37490 }, { "epoch": 1.9035549745529312, "grad_norm": 0.030091179720477153, "learning_rate": 7.062105492229909e-06, "loss": 0.5907, "step": 37495 }, { "epoch": 1.9038088106509625, "grad_norm": 0.026673561068603708, "learning_rate": 7.02505311632412e-06, "loss": 0.5579, "step": 37500 }, { "epoch": 1.904062646748994, "grad_norm": 0.02615757668120422, "learning_rate": 6.988097508942026e-06, "loss": 0.6059, "step": 37505 }, { "epoch": 1.9043164828470256, "grad_norm": 0.0264097863491924, "learning_rate": 6.951238677337657e-06, "loss": 0.5729, "step": 37510 }, { "epoch": 1.9045703189450571, "grad_norm": 0.029014131124990617, "learning_rate": 6.914476628746391e-06, "loss": 0.5771, "step": 37515 }, { "epoch": 1.9048241550430887, "grad_norm": 0.027080950229215903, "learning_rate": 6.8778113703842345e-06, "loss": 0.5616, "step": 37520 }, { "epoch": 1.9050779911411202, "grad_norm": 0.028090406175576146, "learning_rate": 6.8412429094485975e-06, "loss": 0.5788, "step": 37525 }, { "epoch": 1.9053318272391517, "grad_norm": 0.029645595678468018, "learning_rate": 6.80477125311757e-06, "loss": 0.5845, "step": 37530 }, { "epoch": 1.9055856633371833, "grad_norm": 0.025844439203782373, "learning_rate": 6.768396408550426e-06, "loss": 0.551, "step": 37535 }, { "epoch": 1.9058394994352148, "grad_norm": 0.03373040616638067, "learning_rate": 6.732118382887287e-06, "loss": 0.5514, "step": 37540 }, { "epoch": 1.9060933355332463, "grad_norm": 0.02807409207040195, "learning_rate": 6.695937183249401e-06, "loss": 0.5738, "step": 37545 }, { "epoch": 1.9063471716312776, "grad_norm": 0.028252820454627608, "learning_rate": 6.6598528167389205e-06, "loss": 0.5753, "step": 37550 }, { "epoch": 1.9066010077293092, "grad_norm": 0.02732035686471038, "learning_rate": 6.623865290439068e-06, "loss": 0.5563, "step": 37555 }, { "epoch": 1.9068548438273407, "grad_norm": 0.027780879398996337, "learning_rate": 6.587974611413972e-06, "loss": 0.5821, "step": 37560 }, { "epoch": 1.907108679925372, "grad_norm": 0.02593853564160558, "learning_rate": 6.552180786708828e-06, "loss": 0.5562, "step": 37565 }, { "epoch": 1.9073625160234036, "grad_norm": 0.025967484495699657, "learning_rate": 6.516483823349795e-06, "loss": 0.6042, "step": 37570 }, { "epoch": 1.907616352121435, "grad_norm": 0.026149788808303858, "learning_rate": 6.480883728343989e-06, "loss": 0.5716, "step": 37575 }, { "epoch": 1.9078701882194666, "grad_norm": 0.02695280722139515, "learning_rate": 6.445380508679488e-06, "loss": 0.5316, "step": 37580 }, { "epoch": 1.9081240243174982, "grad_norm": 0.02728154940420993, "learning_rate": 6.4099741713254945e-06, "loss": 0.5641, "step": 37585 }, { "epoch": 1.9083778604155297, "grad_norm": 0.026367906890524366, "learning_rate": 6.374664723232004e-06, "loss": 0.5592, "step": 37590 }, { "epoch": 1.9086316965135612, "grad_norm": 0.02400799715690841, "learning_rate": 6.33945217133014e-06, "loss": 0.5553, "step": 37595 }, { "epoch": 1.9088855326115928, "grad_norm": 0.024963204242097574, "learning_rate": 6.304336522531928e-06, "loss": 0.6035, "step": 37600 }, { "epoch": 1.9091393687096243, "grad_norm": 0.02700079456073282, "learning_rate": 6.26931778373041e-06, "loss": 0.5507, "step": 37605 }, { "epoch": 1.9093932048076558, "grad_norm": 0.023840144372670416, "learning_rate": 6.234395961799588e-06, "loss": 0.5592, "step": 37610 }, { "epoch": 1.9096470409056872, "grad_norm": 0.02650155080852087, "learning_rate": 6.199571063594423e-06, "loss": 0.5795, "step": 37615 }, { "epoch": 1.9099008770037187, "grad_norm": 0.029307425281309852, "learning_rate": 6.164843095950889e-06, "loss": 0.6132, "step": 37620 }, { "epoch": 1.9101547131017502, "grad_norm": 0.02747656136786626, "learning_rate": 6.13021206568587e-06, "loss": 0.5708, "step": 37625 }, { "epoch": 1.9104085491997815, "grad_norm": 0.02649832913121192, "learning_rate": 6.095677979597314e-06, "loss": 0.5658, "step": 37630 }, { "epoch": 1.910662385297813, "grad_norm": 0.02349879000203609, "learning_rate": 6.0612408444640775e-06, "loss": 0.5422, "step": 37635 }, { "epoch": 1.9109162213958446, "grad_norm": 0.02911261332072561, "learning_rate": 6.026900667045976e-06, "loss": 0.5807, "step": 37640 }, { "epoch": 1.9111700574938761, "grad_norm": 0.02548916220635181, "learning_rate": 5.992657454083839e-06, "loss": 0.5425, "step": 37645 }, { "epoch": 1.9114238935919077, "grad_norm": 0.02771292406451384, "learning_rate": 5.958511212299455e-06, "loss": 0.5887, "step": 37650 }, { "epoch": 1.9116777296899392, "grad_norm": 0.02704741131934364, "learning_rate": 5.9244619483955206e-06, "loss": 0.5852, "step": 37655 }, { "epoch": 1.9119315657879707, "grad_norm": 0.023754874541861235, "learning_rate": 5.890509669055799e-06, "loss": 0.5898, "step": 37660 }, { "epoch": 1.9121854018860023, "grad_norm": 0.02638601030812135, "learning_rate": 5.856654380944848e-06, "loss": 0.6039, "step": 37665 }, { "epoch": 1.9124392379840338, "grad_norm": 0.027262828131450437, "learning_rate": 5.822896090708407e-06, "loss": 0.5371, "step": 37670 }, { "epoch": 1.9126930740820653, "grad_norm": 0.02704021823416584, "learning_rate": 5.789234804972954e-06, "loss": 0.5731, "step": 37675 }, { "epoch": 1.9129469101800967, "grad_norm": 0.0294348096859636, "learning_rate": 5.755670530346146e-06, "loss": 0.581, "step": 37680 }, { "epoch": 1.9132007462781282, "grad_norm": 0.02910723196766121, "learning_rate": 5.722203273416326e-06, "loss": 0.5942, "step": 37685 }, { "epoch": 1.9134545823761597, "grad_norm": 0.026934713038938792, "learning_rate": 5.6888330407531275e-06, "loss": 0.6015, "step": 37690 }, { "epoch": 1.9137084184741913, "grad_norm": 0.028119147312119834, "learning_rate": 5.6555598389068656e-06, "loss": 0.6161, "step": 37695 }, { "epoch": 1.9139622545722226, "grad_norm": 0.024964497901300786, "learning_rate": 5.622383674408871e-06, "loss": 0.5524, "step": 37700 }, { "epoch": 1.9142160906702541, "grad_norm": 0.027431659630677015, "learning_rate": 5.589304553771546e-06, "loss": 0.5944, "step": 37705 }, { "epoch": 1.9144699267682856, "grad_norm": 0.02722290199613922, "learning_rate": 5.556322483488086e-06, "loss": 0.5602, "step": 37710 }, { "epoch": 1.9147237628663172, "grad_norm": 0.025359272526615056, "learning_rate": 5.523437470032755e-06, "loss": 0.5593, "step": 37715 }, { "epoch": 1.9149775989643487, "grad_norm": 0.023488490477485007, "learning_rate": 5.4906495198607246e-06, "loss": 0.5357, "step": 37720 }, { "epoch": 1.9152314350623802, "grad_norm": 0.0246115024086358, "learning_rate": 5.457958639408067e-06, "loss": 0.5664, "step": 37725 }, { "epoch": 1.9154852711604118, "grad_norm": 0.026670977298243925, "learning_rate": 5.425364835091817e-06, "loss": 0.5471, "step": 37730 }, { "epoch": 1.9157391072584433, "grad_norm": 0.027199843043641092, "learning_rate": 5.392868113310023e-06, "loss": 0.5992, "step": 37735 }, { "epoch": 1.9159929433564749, "grad_norm": 0.027602046418108706, "learning_rate": 5.3604684804416385e-06, "loss": 0.5822, "step": 37740 }, { "epoch": 1.9162467794545064, "grad_norm": 0.02849117428848076, "learning_rate": 5.328165942846519e-06, "loss": 0.5861, "step": 37745 }, { "epoch": 1.9165006155525377, "grad_norm": 0.02814777525533936, "learning_rate": 5.2959605068654825e-06, "loss": 0.5634, "step": 37750 }, { "epoch": 1.9167544516505692, "grad_norm": 0.024898077144596454, "learning_rate": 5.263852178820305e-06, "loss": 0.5268, "step": 37755 }, { "epoch": 1.9170082877486008, "grad_norm": 0.026279452647359036, "learning_rate": 5.231840965013668e-06, "loss": 0.5847, "step": 37760 }, { "epoch": 1.917262123846632, "grad_norm": 0.026106028697481912, "learning_rate": 5.199926871729321e-06, "loss": 0.6171, "step": 37765 }, { "epoch": 1.9175159599446636, "grad_norm": 0.031151597125152804, "learning_rate": 5.1681099052317545e-06, "loss": 0.5568, "step": 37770 }, { "epoch": 1.9177697960426952, "grad_norm": 0.030711265213588357, "learning_rate": 5.136390071766472e-06, "loss": 0.5515, "step": 37775 }, { "epoch": 1.9180236321407267, "grad_norm": 0.02716635079540998, "learning_rate": 5.104767377559938e-06, "loss": 0.586, "step": 37780 }, { "epoch": 1.9182774682387582, "grad_norm": 0.02559855058387933, "learning_rate": 5.073241828819519e-06, "loss": 0.5771, "step": 37785 }, { "epoch": 1.9185313043367898, "grad_norm": 0.031302602221972384, "learning_rate": 5.041813431733544e-06, "loss": 0.5734, "step": 37790 }, { "epoch": 1.9187851404348213, "grad_norm": 0.0258771187155332, "learning_rate": 5.010482192471244e-06, "loss": 0.5746, "step": 37795 }, { "epoch": 1.9190389765328528, "grad_norm": 0.024995208015619996, "learning_rate": 4.9792481171828105e-06, "loss": 0.5666, "step": 37800 }, { "epoch": 1.9192928126308844, "grad_norm": 0.02854607672778443, "learning_rate": 4.948111211999284e-06, "loss": 0.5836, "step": 37805 }, { "epoch": 1.919546648728916, "grad_norm": 0.027572999962288344, "learning_rate": 4.917071483032665e-06, "loss": 0.5698, "step": 37810 }, { "epoch": 1.9198004848269472, "grad_norm": 0.027248046157491886, "learning_rate": 4.886128936375966e-06, "loss": 0.5917, "step": 37815 }, { "epoch": 1.9200543209249787, "grad_norm": 0.025472181337880206, "learning_rate": 4.855283578103054e-06, "loss": 0.5544, "step": 37820 }, { "epoch": 1.9203081570230103, "grad_norm": 0.027474558161127566, "learning_rate": 4.824535414268638e-06, "loss": 0.5857, "step": 37825 }, { "epoch": 1.9205619931210416, "grad_norm": 0.03028430269461977, "learning_rate": 4.793884450908559e-06, "loss": 0.5884, "step": 37830 }, { "epoch": 1.9208158292190731, "grad_norm": 0.026859591708732748, "learning_rate": 4.763330694039281e-06, "loss": 0.5809, "step": 37835 }, { "epoch": 1.9210696653171047, "grad_norm": 0.026545657955760475, "learning_rate": 4.7328741496585615e-06, "loss": 0.5951, "step": 37840 }, { "epoch": 1.9213235014151362, "grad_norm": 0.027047108527638097, "learning_rate": 4.7025148237446745e-06, "loss": 0.5809, "step": 37845 }, { "epoch": 1.9215773375131677, "grad_norm": 0.029688524549730866, "learning_rate": 4.672252722257076e-06, "loss": 0.5839, "step": 37850 }, { "epoch": 1.9218311736111993, "grad_norm": 0.027315346919804247, "learning_rate": 4.642087851136123e-06, "loss": 0.5712, "step": 37855 }, { "epoch": 1.9220850097092308, "grad_norm": 0.033852445880514344, "learning_rate": 4.61202021630297e-06, "loss": 0.5581, "step": 37860 }, { "epoch": 1.9223388458072623, "grad_norm": 0.02787035283508809, "learning_rate": 4.582049823659673e-06, "loss": 0.5803, "step": 37865 }, { "epoch": 1.9225926819052939, "grad_norm": 0.026484267635250636, "learning_rate": 4.55217667908947e-06, "loss": 0.6, "step": 37870 }, { "epoch": 1.9228465180033254, "grad_norm": 0.027082162746054732, "learning_rate": 4.522400788456115e-06, "loss": 0.5852, "step": 37875 }, { "epoch": 1.9231003541013567, "grad_norm": 0.02756235562623746, "learning_rate": 4.492722157604545e-06, "loss": 0.586, "step": 37880 }, { "epoch": 1.9233541901993882, "grad_norm": 0.030977424492649126, "learning_rate": 4.463140792360487e-06, "loss": 0.5624, "step": 37885 }, { "epoch": 1.9236080262974198, "grad_norm": 0.027505992043633506, "learning_rate": 4.433656698530741e-06, "loss": 0.5698, "step": 37890 }, { "epoch": 1.923861862395451, "grad_norm": 0.025201629909847943, "learning_rate": 4.404269881902734e-06, "loss": 0.5701, "step": 37895 }, { "epoch": 1.9241156984934826, "grad_norm": 0.02743353600460854, "learning_rate": 4.374980348245072e-06, "loss": 0.5542, "step": 37900 }, { "epoch": 1.9243695345915142, "grad_norm": 0.02493501716192832, "learning_rate": 4.345788103307047e-06, "loss": 0.5786, "step": 37905 }, { "epoch": 1.9246233706895457, "grad_norm": 0.02768825614796075, "learning_rate": 4.316693152819018e-06, "loss": 0.6006, "step": 37910 }, { "epoch": 1.9248772067875772, "grad_norm": 0.02543485555286312, "learning_rate": 4.287695502492139e-06, "loss": 0.5725, "step": 37915 }, { "epoch": 1.9251310428856088, "grad_norm": 0.02644184317018769, "learning_rate": 4.25879515801858e-06, "loss": 0.5708, "step": 37920 }, { "epoch": 1.9253848789836403, "grad_norm": 0.025982457428351476, "learning_rate": 4.229992125071192e-06, "loss": 0.5608, "step": 37925 }, { "epoch": 1.9256387150816718, "grad_norm": 0.02690819396201029, "learning_rate": 4.201286409304006e-06, "loss": 0.5672, "step": 37930 }, { "epoch": 1.9258925511797034, "grad_norm": 0.027371569152807883, "learning_rate": 4.172678016351683e-06, "loss": 0.5449, "step": 37935 }, { "epoch": 1.926146387277735, "grad_norm": 0.027353611752972677, "learning_rate": 4.1441669518300086e-06, "loss": 0.5824, "step": 37940 }, { "epoch": 1.9264002233757662, "grad_norm": 0.02534337379880528, "learning_rate": 4.115753221335561e-06, "loss": 0.5724, "step": 37945 }, { "epoch": 1.9266540594737978, "grad_norm": 0.02678591503758515, "learning_rate": 4.087436830445768e-06, "loss": 0.5574, "step": 37950 }, { "epoch": 1.9269078955718293, "grad_norm": 0.027454676192790017, "learning_rate": 4.059217784719016e-06, "loss": 0.5657, "step": 37955 }, { "epoch": 1.9271617316698608, "grad_norm": 0.026603843158610224, "learning_rate": 4.0310960896945415e-06, "loss": 0.5917, "step": 37960 }, { "epoch": 1.9274155677678921, "grad_norm": 0.027312349923342093, "learning_rate": 4.003071750892595e-06, "loss": 0.5722, "step": 37965 }, { "epoch": 1.9276694038659237, "grad_norm": 0.026261931787459957, "learning_rate": 3.9751447738140545e-06, "loss": 0.588, "step": 37970 }, { "epoch": 1.9279232399639552, "grad_norm": 0.027490929422034894, "learning_rate": 3.9473151639409235e-06, "loss": 0.5905, "step": 37975 }, { "epoch": 1.9281770760619867, "grad_norm": 0.027039305023795986, "learning_rate": 3.919582926735999e-06, "loss": 0.5642, "step": 37980 }, { "epoch": 1.9284309121600183, "grad_norm": 0.02542038861802734, "learning_rate": 3.891948067643036e-06, "loss": 0.5825, "step": 37985 }, { "epoch": 1.9286847482580498, "grad_norm": 0.025348977423598745, "learning_rate": 3.864410592086587e-06, "loss": 0.5756, "step": 37990 }, { "epoch": 1.9289385843560813, "grad_norm": 0.026890365414191385, "learning_rate": 3.836970505472104e-06, "loss": 0.5496, "step": 37995 }, { "epoch": 1.9291924204541129, "grad_norm": 0.026665489023465556, "learning_rate": 3.8096278131859452e-06, "loss": 0.5804, "step": 38000 }, { "epoch": 1.9294462565521444, "grad_norm": 0.02894734733954505, "learning_rate": 3.7823825205953177e-06, "loss": 0.575, "step": 38005 }, { "epoch": 1.929700092650176, "grad_norm": 0.028458343876803068, "learning_rate": 3.755234633048388e-06, "loss": 0.5693, "step": 38010 }, { "epoch": 1.9299539287482073, "grad_norm": 0.026316498756082222, "learning_rate": 3.7281841558741147e-06, "loss": 0.5687, "step": 38015 }, { "epoch": 1.9302077648462388, "grad_norm": 0.02462385302905774, "learning_rate": 3.7012310943824178e-06, "loss": 0.5689, "step": 38020 }, { "epoch": 1.9304616009442703, "grad_norm": 0.02744655919506136, "learning_rate": 3.6743754538640093e-06, "loss": 0.6009, "step": 38025 }, { "epoch": 1.9307154370423016, "grad_norm": 0.025426512720659815, "learning_rate": 3.6476172395905615e-06, "loss": 0.5637, "step": 38030 }, { "epoch": 1.9309692731403332, "grad_norm": 0.058843478338871484, "learning_rate": 3.6209564568144837e-06, "loss": 0.5889, "step": 38035 }, { "epoch": 1.9312231092383647, "grad_norm": 0.025717709019918232, "learning_rate": 3.5943931107692563e-06, "loss": 0.5679, "step": 38040 }, { "epoch": 1.9314769453363962, "grad_norm": 0.025390859814737064, "learning_rate": 3.567927206669097e-06, "loss": 0.5723, "step": 38045 }, { "epoch": 1.9317307814344278, "grad_norm": 0.026061449742197502, "learning_rate": 3.5415587497090727e-06, "loss": 0.576, "step": 38050 }, { "epoch": 1.9319846175324593, "grad_norm": 0.026843687363145793, "learning_rate": 3.515287745065321e-06, "loss": 0.5811, "step": 38055 }, { "epoch": 1.9322384536304908, "grad_norm": 0.02760187387495946, "learning_rate": 3.4891141978945497e-06, "loss": 0.5572, "step": 38060 }, { "epoch": 1.9324922897285224, "grad_norm": 0.026456358621662006, "learning_rate": 3.463038113334538e-06, "loss": 0.5801, "step": 38065 }, { "epoch": 1.932746125826554, "grad_norm": 0.025905923616227906, "learning_rate": 3.437059496503969e-06, "loss": 0.577, "step": 38070 }, { "epoch": 1.9329999619245855, "grad_norm": 0.02927395460857848, "learning_rate": 3.4111783525022646e-06, "loss": 0.6139, "step": 38075 }, { "epoch": 1.9332537980226168, "grad_norm": 0.029217960208774398, "learning_rate": 3.3853946864097486e-06, "loss": 0.59, "step": 38080 }, { "epoch": 1.9335076341206483, "grad_norm": 0.024713927884077197, "learning_rate": 3.3597085032876505e-06, "loss": 0.5569, "step": 38085 }, { "epoch": 1.9337614702186798, "grad_norm": 0.02703834327363661, "learning_rate": 3.3341198081780487e-06, "loss": 0.6036, "step": 38090 }, { "epoch": 1.9340153063167111, "grad_norm": 0.027536203070580457, "learning_rate": 3.3086286061038697e-06, "loss": 0.5949, "step": 38095 }, { "epoch": 1.9342691424147427, "grad_norm": 0.026570843365523098, "learning_rate": 3.283234902068888e-06, "loss": 0.5524, "step": 38100 }, { "epoch": 1.9345229785127742, "grad_norm": 0.042332728091517974, "learning_rate": 3.2579387010577277e-06, "loss": 0.5874, "step": 38105 }, { "epoch": 1.9347768146108058, "grad_norm": 0.02431499748221158, "learning_rate": 3.2327400080359725e-06, "loss": 0.5698, "step": 38110 }, { "epoch": 1.9350306507088373, "grad_norm": 0.026062876918449118, "learning_rate": 3.207638827949999e-06, "loss": 0.5578, "step": 38115 }, { "epoch": 1.9352844868068688, "grad_norm": 0.027310876101039813, "learning_rate": 3.1826351657270323e-06, "loss": 0.5901, "step": 38120 }, { "epoch": 1.9355383229049004, "grad_norm": 0.03003893516229613, "learning_rate": 3.1577290262750912e-06, "loss": 0.5731, "step": 38125 }, { "epoch": 1.935792159002932, "grad_norm": 0.02740473381234478, "learning_rate": 3.1329204144832647e-06, "loss": 0.567, "step": 38130 }, { "epoch": 1.9360459951009634, "grad_norm": 0.028019999585990836, "learning_rate": 3.108209335221268e-06, "loss": 0.5911, "step": 38135 }, { "epoch": 1.936299831198995, "grad_norm": 0.031154086101098048, "learning_rate": 3.0835957933397774e-06, "loss": 0.5933, "step": 38140 }, { "epoch": 1.9365536672970263, "grad_norm": 0.027127715649407625, "learning_rate": 3.0590797936703164e-06, "loss": 0.6191, "step": 38145 }, { "epoch": 1.9368075033950578, "grad_norm": 0.02396846662121213, "learning_rate": 3.034661341025258e-06, "loss": 0.544, "step": 38150 }, { "epoch": 1.9370613394930893, "grad_norm": 0.029169030009966523, "learning_rate": 3.010340440197823e-06, "loss": 0.584, "step": 38155 }, { "epoch": 1.9373151755911207, "grad_norm": 0.029342968326165446, "learning_rate": 2.986117095962082e-06, "loss": 0.5659, "step": 38160 }, { "epoch": 1.9375690116891522, "grad_norm": 0.029091742361008234, "learning_rate": 2.961991313072898e-06, "loss": 0.5406, "step": 38165 }, { "epoch": 1.9378228477871837, "grad_norm": 0.028125282294871094, "learning_rate": 2.9379630962661496e-06, "loss": 0.5844, "step": 38170 }, { "epoch": 1.9380766838852153, "grad_norm": 0.02617518986421098, "learning_rate": 2.914032450258397e-06, "loss": 0.577, "step": 38175 }, { "epoch": 1.9383305199832468, "grad_norm": 0.02395887217118883, "learning_rate": 2.890199379747105e-06, "loss": 0.552, "step": 38180 }, { "epoch": 1.9385843560812783, "grad_norm": 0.02770768270162409, "learning_rate": 2.8664638894105867e-06, "loss": 0.6009, "step": 38185 }, { "epoch": 1.9388381921793099, "grad_norm": 0.029960188244695783, "learning_rate": 2.8428259839079486e-06, "loss": 0.5959, "step": 38190 }, { "epoch": 1.9390920282773414, "grad_norm": 0.025635929332544483, "learning_rate": 2.819285667879312e-06, "loss": 0.59, "step": 38195 }, { "epoch": 1.939345864375373, "grad_norm": 0.029544233618550228, "learning_rate": 2.7958429459454817e-06, "loss": 0.5707, "step": 38200 }, { "epoch": 1.9395997004734045, "grad_norm": 0.027484890755394918, "learning_rate": 2.7724978227081086e-06, "loss": 0.553, "step": 38205 }, { "epoch": 1.9398535365714358, "grad_norm": 0.023817836701303215, "learning_rate": 2.7492503027496953e-06, "loss": 0.5307, "step": 38210 }, { "epoch": 1.9401073726694673, "grad_norm": 0.026065338192806112, "learning_rate": 2.726100390633757e-06, "loss": 0.5585, "step": 38215 }, { "epoch": 1.9403612087674988, "grad_norm": 0.02657044606749882, "learning_rate": 2.7030480909043254e-06, "loss": 0.5794, "step": 38220 }, { "epoch": 1.9406150448655304, "grad_norm": 0.025367967409138683, "learning_rate": 2.680093408086559e-06, "loss": 0.5923, "step": 38225 }, { "epoch": 1.9408688809635617, "grad_norm": 0.028047648188387866, "learning_rate": 2.6572363466863534e-06, "loss": 0.6065, "step": 38230 }, { "epoch": 1.9411227170615932, "grad_norm": 0.028989424035073586, "learning_rate": 2.6344769111903975e-06, "loss": 0.6064, "step": 38235 }, { "epoch": 1.9413765531596248, "grad_norm": 0.02838689316524152, "learning_rate": 2.6118151060662842e-06, "loss": 0.5471, "step": 38240 }, { "epoch": 1.9416303892576563, "grad_norm": 0.02903864430837102, "learning_rate": 2.589250935762344e-06, "loss": 0.5662, "step": 38245 }, { "epoch": 1.9418842253556878, "grad_norm": 0.026349639693820256, "learning_rate": 2.566784404707867e-06, "loss": 0.5523, "step": 38250 }, { "epoch": 1.9421380614537194, "grad_norm": 0.02748857564177342, "learning_rate": 2.5444155173129368e-06, "loss": 0.5796, "step": 38255 }, { "epoch": 1.942391897551751, "grad_norm": 0.02398479064280042, "learning_rate": 2.52214427796843e-06, "loss": 0.5431, "step": 38260 }, { "epoch": 1.9426457336497824, "grad_norm": 0.0274209715133097, "learning_rate": 2.499970691046127e-06, "loss": 0.5989, "step": 38265 }, { "epoch": 1.942899569747814, "grad_norm": 0.026398952008381455, "learning_rate": 2.4778947608984915e-06, "loss": 0.5795, "step": 38270 }, { "epoch": 1.9431534058458453, "grad_norm": 0.02719372717396355, "learning_rate": 2.4559164918590005e-06, "loss": 0.5673, "step": 38275 }, { "epoch": 1.9434072419438768, "grad_norm": 0.02855734749373924, "learning_rate": 2.4340358882418144e-06, "loss": 0.5551, "step": 38280 }, { "epoch": 1.9436610780419084, "grad_norm": 0.026573131979195603, "learning_rate": 2.412252954342109e-06, "loss": 0.5904, "step": 38285 }, { "epoch": 1.9439149141399399, "grad_norm": 0.025915257982720822, "learning_rate": 2.3905676944356303e-06, "loss": 0.5546, "step": 38290 }, { "epoch": 1.9441687502379712, "grad_norm": 0.025527503101607996, "learning_rate": 2.36898011277914e-06, "loss": 0.5739, "step": 38295 }, { "epoch": 1.9444225863360027, "grad_norm": 0.0283306082125643, "learning_rate": 2.3474902136101927e-06, "loss": 0.579, "step": 38300 }, { "epoch": 1.9446764224340343, "grad_norm": 0.026050472098431338, "learning_rate": 2.3260980011470258e-06, "loss": 0.5553, "step": 38305 }, { "epoch": 1.9449302585320658, "grad_norm": 0.028927172187049818, "learning_rate": 2.304803479589057e-06, "loss": 0.5659, "step": 38310 }, { "epoch": 1.9451840946300973, "grad_norm": 0.02697677499056412, "learning_rate": 2.2836066531161104e-06, "loss": 0.5787, "step": 38315 }, { "epoch": 1.9454379307281289, "grad_norm": 0.02625293579205151, "learning_rate": 2.2625075258890793e-06, "loss": 0.5701, "step": 38320 }, { "epoch": 1.9456917668261604, "grad_norm": 0.02771591950136747, "learning_rate": 2.2415061020495954e-06, "loss": 0.5785, "step": 38325 }, { "epoch": 1.945945602924192, "grad_norm": 0.02813557031152894, "learning_rate": 2.2206023857201386e-06, "loss": 0.5567, "step": 38330 }, { "epoch": 1.9461994390222235, "grad_norm": 0.02880524778629422, "learning_rate": 2.199796381004038e-06, "loss": 0.5591, "step": 38335 }, { "epoch": 1.946453275120255, "grad_norm": 0.028097608678148134, "learning_rate": 2.1790880919853595e-06, "loss": 0.5809, "step": 38340 }, { "epoch": 1.9467071112182863, "grad_norm": 0.027547669155710962, "learning_rate": 2.1584775227290745e-06, "loss": 0.5788, "step": 38345 }, { "epoch": 1.9469609473163179, "grad_norm": 0.02872343926300039, "learning_rate": 2.1379646772808903e-06, "loss": 0.5749, "step": 38350 }, { "epoch": 1.9472147834143494, "grad_norm": 0.026979049237418208, "learning_rate": 2.11754955966742e-06, "loss": 0.6016, "step": 38355 }, { "epoch": 1.9474686195123807, "grad_norm": 0.026911725672864047, "learning_rate": 2.0972321738960687e-06, "loss": 0.5845, "step": 38360 }, { "epoch": 1.9477224556104122, "grad_norm": 0.029304750386809793, "learning_rate": 2.0770125239549797e-06, "loss": 0.5436, "step": 38365 }, { "epoch": 1.9479762917084438, "grad_norm": 0.02658353093343735, "learning_rate": 2.0568906138132002e-06, "loss": 0.6014, "step": 38370 }, { "epoch": 1.9482301278064753, "grad_norm": 0.027945930208107768, "learning_rate": 2.0368664474205157e-06, "loss": 0.6025, "step": 38375 }, { "epoch": 1.9484839639045068, "grad_norm": 0.028044295273482567, "learning_rate": 2.01694002870767e-06, "loss": 0.5499, "step": 38380 }, { "epoch": 1.9487378000025384, "grad_norm": 0.026563808048353017, "learning_rate": 1.997111361586035e-06, "loss": 0.5929, "step": 38385 }, { "epoch": 1.94899163610057, "grad_norm": 0.026151832407987092, "learning_rate": 1.9773804499478854e-06, "loss": 0.5664, "step": 38390 }, { "epoch": 1.9492454721986014, "grad_norm": 0.0251653993642813, "learning_rate": 1.957747297666346e-06, "loss": 0.5591, "step": 38395 }, { "epoch": 1.949499308296633, "grad_norm": 0.026613133188114473, "learning_rate": 1.9382119085952777e-06, "loss": 0.5825, "step": 38400 }, { "epoch": 1.9497531443946645, "grad_norm": 0.02565768424546227, "learning_rate": 1.9187742865693915e-06, "loss": 0.5556, "step": 38405 }, { "epoch": 1.9500069804926958, "grad_norm": 0.037922402009410924, "learning_rate": 1.899434435404135e-06, "loss": 0.5486, "step": 38410 }, { "epoch": 1.9502608165907274, "grad_norm": 0.025674075058438276, "learning_rate": 1.8801923588959157e-06, "loss": 0.537, "step": 38415 }, { "epoch": 1.950514652688759, "grad_norm": 0.02680805420632808, "learning_rate": 1.8610480608218239e-06, "loss": 0.577, "step": 38420 }, { "epoch": 1.9507684887867902, "grad_norm": 0.027054147002942935, "learning_rate": 1.842001544939742e-06, "loss": 0.5798, "step": 38425 }, { "epoch": 1.9510223248848217, "grad_norm": 0.02573464627026845, "learning_rate": 1.8230528149884573e-06, "loss": 0.5999, "step": 38430 }, { "epoch": 1.9512761609828533, "grad_norm": 0.025562014194698453, "learning_rate": 1.80420187468755e-06, "loss": 0.5462, "step": 38435 }, { "epoch": 1.9515299970808848, "grad_norm": 0.026236671783815817, "learning_rate": 1.7854487277372822e-06, "loss": 0.557, "step": 38440 }, { "epoch": 1.9517838331789163, "grad_norm": 0.026284417912040326, "learning_rate": 1.7667933778188206e-06, "loss": 0.568, "step": 38445 }, { "epoch": 1.9520376692769479, "grad_norm": 0.026146108482464656, "learning_rate": 1.7482358285941803e-06, "loss": 0.5802, "step": 38450 }, { "epoch": 1.9522915053749794, "grad_norm": 0.027359196302388176, "learning_rate": 1.729776083706003e-06, "loss": 0.573, "step": 38455 }, { "epoch": 1.952545341473011, "grad_norm": 0.028635450750895696, "learning_rate": 1.7114141467779454e-06, "loss": 0.5822, "step": 38460 }, { "epoch": 1.9527991775710425, "grad_norm": 0.02648953814824736, "learning_rate": 1.693150021414347e-06, "loss": 0.5796, "step": 38465 }, { "epoch": 1.953053013669074, "grad_norm": 0.026400008613723946, "learning_rate": 1.6749837112003398e-06, "loss": 0.5831, "step": 38470 }, { "epoch": 1.9533068497671053, "grad_norm": 0.025788958074878635, "learning_rate": 1.656915219701849e-06, "loss": 0.536, "step": 38475 }, { "epoch": 1.9535606858651369, "grad_norm": 0.026198567858620934, "learning_rate": 1.6389445504657041e-06, "loss": 0.5725, "step": 38480 }, { "epoch": 1.9538145219631684, "grad_norm": 0.02642224217270925, "learning_rate": 1.621071707019417e-06, "loss": 0.5433, "step": 38485 }, { "epoch": 1.9540683580611997, "grad_norm": 0.028565262893761916, "learning_rate": 1.6032966928713477e-06, "loss": 0.6027, "step": 38490 }, { "epoch": 1.9543221941592313, "grad_norm": 0.027800691408527062, "learning_rate": 1.5856195115105943e-06, "loss": 0.5569, "step": 38495 }, { "epoch": 1.9545760302572628, "grad_norm": 0.02687428374458374, "learning_rate": 1.5680401664072141e-06, "loss": 0.5902, "step": 38500 }, { "epoch": 1.9548298663552943, "grad_norm": 0.025147156924122976, "learning_rate": 1.5505586610118361e-06, "loss": 0.5711, "step": 38505 }, { "epoch": 1.9550837024533259, "grad_norm": 0.025799390540028888, "learning_rate": 1.5331749987560484e-06, "loss": 0.5804, "step": 38510 }, { "epoch": 1.9553375385513574, "grad_norm": 0.027417222251300007, "learning_rate": 1.5158891830521215e-06, "loss": 0.5549, "step": 38515 }, { "epoch": 1.955591374649389, "grad_norm": 0.027826523183621204, "learning_rate": 1.4987012172932301e-06, "loss": 0.5814, "step": 38520 }, { "epoch": 1.9558452107474205, "grad_norm": 0.026538300854937816, "learning_rate": 1.481611104853231e-06, "loss": 0.5858, "step": 38525 }, { "epoch": 1.956099046845452, "grad_norm": 0.02597854526723679, "learning_rate": 1.4646188490869405e-06, "loss": 0.5979, "step": 38530 }, { "epoch": 1.9563528829434835, "grad_norm": 0.02753568988791941, "learning_rate": 1.4477244533297463e-06, "loss": 0.5677, "step": 38535 }, { "epoch": 1.9566067190415148, "grad_norm": 0.027185759736870714, "learning_rate": 1.4309279208979398e-06, "loss": 0.5845, "step": 38540 }, { "epoch": 1.9568605551395464, "grad_norm": 0.023873938561358856, "learning_rate": 1.414229255088606e-06, "loss": 0.5482, "step": 38545 }, { "epoch": 1.957114391237578, "grad_norm": 0.02576734627949202, "learning_rate": 1.3976284591796783e-06, "loss": 0.5695, "step": 38550 }, { "epoch": 1.9573682273356094, "grad_norm": 0.025599133868475423, "learning_rate": 1.381125536429717e-06, "loss": 0.5611, "step": 38555 }, { "epoch": 1.9576220634336408, "grad_norm": 0.0258957106262729, "learning_rate": 1.3647204900782417e-06, "loss": 0.551, "step": 38560 }, { "epoch": 1.9578758995316723, "grad_norm": 0.027018587490315292, "learning_rate": 1.3484133233454544e-06, "loss": 0.5862, "step": 38565 }, { "epoch": 1.9581297356297038, "grad_norm": 0.02759912969730078, "learning_rate": 1.3322040394323498e-06, "loss": 0.5823, "step": 38570 }, { "epoch": 1.9583835717277354, "grad_norm": 0.02721350663279493, "learning_rate": 1.3160926415207163e-06, "loss": 0.5646, "step": 38575 }, { "epoch": 1.958637407825767, "grad_norm": 0.02595396991520284, "learning_rate": 1.300079132773191e-06, "loss": 0.5673, "step": 38580 }, { "epoch": 1.9588912439237984, "grad_norm": 0.027330035768316177, "learning_rate": 1.2841635163330922e-06, "loss": 0.627, "step": 38585 }, { "epoch": 1.95914508002183, "grad_norm": 0.030103850914442693, "learning_rate": 1.268345795324588e-06, "loss": 0.6006, "step": 38590 }, { "epoch": 1.9593989161198615, "grad_norm": 0.027085219985595117, "learning_rate": 1.252625972852639e-06, "loss": 0.6066, "step": 38595 }, { "epoch": 1.959652752217893, "grad_norm": 0.02808306966077576, "learning_rate": 1.237004052002999e-06, "loss": 0.541, "step": 38600 }, { "epoch": 1.9599065883159246, "grad_norm": 0.02417960093900466, "learning_rate": 1.221480035842104e-06, "loss": 0.5316, "step": 38605 }, { "epoch": 1.9601604244139559, "grad_norm": 0.025832611689122265, "learning_rate": 1.2060539274172944e-06, "loss": 0.5598, "step": 38610 }, { "epoch": 1.9604142605119874, "grad_norm": 0.025735230867072356, "learning_rate": 1.1907257297566477e-06, "loss": 0.5602, "step": 38615 }, { "epoch": 1.960668096610019, "grad_norm": 0.026199087063542615, "learning_rate": 1.1754954458689238e-06, "loss": 0.5822, "step": 38620 }, { "epoch": 1.9609219327080503, "grad_norm": 0.02724432021050292, "learning_rate": 1.1603630787438424e-06, "loss": 0.586, "step": 38625 }, { "epoch": 1.9611757688060818, "grad_norm": 0.025856425920108998, "learning_rate": 1.1453286313517498e-06, "loss": 0.5769, "step": 38630 }, { "epoch": 1.9614296049041133, "grad_norm": 0.02553030669696096, "learning_rate": 1.130392106643896e-06, "loss": 0.5436, "step": 38635 }, { "epoch": 1.9616834410021449, "grad_norm": 0.026121854750863857, "learning_rate": 1.1155535075522138e-06, "loss": 0.5697, "step": 38640 }, { "epoch": 1.9619372771001764, "grad_norm": 0.024664963529894324, "learning_rate": 1.1008128369894288e-06, "loss": 0.5597, "step": 38645 }, { "epoch": 1.962191113198208, "grad_norm": 0.0266161512969385, "learning_rate": 1.0861700978490596e-06, "loss": 0.5841, "step": 38650 }, { "epoch": 1.9624449492962395, "grad_norm": 0.02658060648647594, "learning_rate": 1.0716252930054737e-06, "loss": 0.5744, "step": 38655 }, { "epoch": 1.962698785394271, "grad_norm": 0.05576489779637387, "learning_rate": 1.0571784253136652e-06, "loss": 0.5839, "step": 38660 }, { "epoch": 1.9629526214923025, "grad_norm": 0.026068322303723065, "learning_rate": 1.0428294976094766e-06, "loss": 0.587, "step": 38665 }, { "epoch": 1.963206457590334, "grad_norm": 0.02641195644001675, "learning_rate": 1.0285785127095993e-06, "loss": 0.5844, "step": 38670 }, { "epoch": 1.9634602936883654, "grad_norm": 0.028461226288043776, "learning_rate": 1.0144254734113511e-06, "loss": 0.5653, "step": 38675 }, { "epoch": 1.963714129786397, "grad_norm": 0.027973117037734762, "learning_rate": 1.00037038249301e-06, "loss": 0.5884, "step": 38680 }, { "epoch": 1.9639679658844285, "grad_norm": 0.027265002987793684, "learning_rate": 9.864132427134243e-07, "loss": 0.5792, "step": 38685 }, { "epoch": 1.9642218019824598, "grad_norm": 0.027171101555842573, "learning_rate": 9.725540568122915e-07, "loss": 0.5439, "step": 38690 }, { "epoch": 1.9644756380804913, "grad_norm": 0.027488709389757424, "learning_rate": 9.587928275102132e-07, "loss": 0.5671, "step": 38695 }, { "epoch": 1.9647294741785228, "grad_norm": 0.025908525528610716, "learning_rate": 9.451295575083618e-07, "loss": 0.5756, "step": 38700 }, { "epoch": 1.9649833102765544, "grad_norm": 0.025349140808685295, "learning_rate": 9.315642494888144e-07, "loss": 0.542, "step": 38705 }, { "epoch": 1.965237146374586, "grad_norm": 0.026847564781225946, "learning_rate": 9.180969061143851e-07, "loss": 0.5738, "step": 38710 }, { "epoch": 1.9654909824726174, "grad_norm": 0.0266248620408468, "learning_rate": 9.047275300285706e-07, "loss": 0.5812, "step": 38715 }, { "epoch": 1.965744818570649, "grad_norm": 0.02532892568070448, "learning_rate": 8.914561238557717e-07, "loss": 0.5551, "step": 38720 }, { "epoch": 1.9659986546686805, "grad_norm": 0.027130437039344243, "learning_rate": 8.78282690201071e-07, "loss": 0.5592, "step": 38725 }, { "epoch": 1.966252490766712, "grad_norm": 0.02971635964698811, "learning_rate": 8.652072316503446e-07, "loss": 0.5692, "step": 38730 }, { "epoch": 1.9665063268647436, "grad_norm": 0.0262202442704056, "learning_rate": 8.52229750770317e-07, "loss": 0.5922, "step": 38735 }, { "epoch": 1.966760162962775, "grad_norm": 0.025211107447864152, "learning_rate": 8.39350250108284e-07, "loss": 0.5662, "step": 38740 }, { "epoch": 1.9670139990608064, "grad_norm": 0.022560711301004826, "learning_rate": 8.265687321925009e-07, "loss": 0.5342, "step": 38745 }, { "epoch": 1.967267835158838, "grad_norm": 0.02630392010219344, "learning_rate": 8.138851995319608e-07, "loss": 0.5434, "step": 38750 }, { "epoch": 1.9675216712568693, "grad_norm": 0.02628855696555488, "learning_rate": 8.012996546162277e-07, "loss": 0.5861, "step": 38755 }, { "epoch": 1.9677755073549008, "grad_norm": 0.02564935741374567, "learning_rate": 7.888120999159365e-07, "loss": 0.589, "step": 38760 }, { "epoch": 1.9680293434529323, "grad_norm": 0.02673559149644929, "learning_rate": 7.764225378822377e-07, "loss": 0.5623, "step": 38765 }, { "epoch": 1.9682831795509639, "grad_norm": 0.02638179502931584, "learning_rate": 7.641309709471855e-07, "loss": 0.5719, "step": 38770 }, { "epoch": 1.9685370156489954, "grad_norm": 0.02834474864108488, "learning_rate": 7.51937401523517e-07, "loss": 0.5756, "step": 38775 }, { "epoch": 1.968790851747027, "grad_norm": 0.028319211620961498, "learning_rate": 7.398418320048173e-07, "loss": 0.5834, "step": 38780 }, { "epoch": 1.9690446878450585, "grad_norm": 0.02559211038988313, "learning_rate": 7.278442647653538e-07, "loss": 0.54, "step": 38785 }, { "epoch": 1.96929852394309, "grad_norm": 0.02730975703185445, "learning_rate": 7.159447021601872e-07, "loss": 0.5611, "step": 38790 }, { "epoch": 1.9695523600411216, "grad_norm": 0.02604121383325393, "learning_rate": 7.041431465251713e-07, "loss": 0.5712, "step": 38795 }, { "epoch": 1.969806196139153, "grad_norm": 0.027332911512730325, "learning_rate": 6.924396001768418e-07, "loss": 0.5736, "step": 38800 }, { "epoch": 1.9700600322371844, "grad_norm": 0.027607238246662617, "learning_rate": 6.808340654125833e-07, "loss": 0.5774, "step": 38805 }, { "epoch": 1.970313868335216, "grad_norm": 0.026692316698903543, "learning_rate": 6.693265445105179e-07, "loss": 0.5534, "step": 38810 }, { "epoch": 1.9705677044332475, "grad_norm": 0.024693930709243837, "learning_rate": 6.579170397294498e-07, "loss": 0.5275, "step": 38815 }, { "epoch": 1.970821540531279, "grad_norm": 0.028454460766382448, "learning_rate": 6.466055533090875e-07, "loss": 0.5543, "step": 38820 }, { "epoch": 1.9710753766293103, "grad_norm": 0.028656729436829103, "learning_rate": 6.35392087469766e-07, "loss": 0.5681, "step": 38825 }, { "epoch": 1.9713292127273419, "grad_norm": 0.026263764563460825, "learning_rate": 6.24276644412669e-07, "loss": 0.5802, "step": 38830 }, { "epoch": 1.9715830488253734, "grad_norm": 0.02687120411651479, "learning_rate": 6.132592263196623e-07, "loss": 0.5481, "step": 38835 }, { "epoch": 1.971836884923405, "grad_norm": 0.025016673517658555, "learning_rate": 6.023398353534604e-07, "loss": 0.5766, "step": 38840 }, { "epoch": 1.9720907210214365, "grad_norm": 0.027655159082394274, "learning_rate": 5.915184736574597e-07, "loss": 0.5717, "step": 38845 }, { "epoch": 1.972344557119468, "grad_norm": 0.025444702353377178, "learning_rate": 5.807951433557946e-07, "loss": 0.5613, "step": 38850 }, { "epoch": 1.9725983932174995, "grad_norm": 0.029004616132434144, "learning_rate": 5.701698465534477e-07, "loss": 0.5695, "step": 38855 }, { "epoch": 1.972852229315531, "grad_norm": 0.02882930023028103, "learning_rate": 5.596425853361397e-07, "loss": 0.5572, "step": 38860 }, { "epoch": 1.9731060654135626, "grad_norm": 0.027335248370795127, "learning_rate": 5.492133617702733e-07, "loss": 0.5663, "step": 38865 }, { "epoch": 1.9733599015115941, "grad_norm": 0.03029493284415852, "learning_rate": 5.388821779030994e-07, "loss": 0.5994, "step": 38870 }, { "epoch": 1.9736137376096254, "grad_norm": 0.02532966875776641, "learning_rate": 5.286490357624962e-07, "loss": 0.5618, "step": 38875 }, { "epoch": 1.973867573707657, "grad_norm": 0.028404350041748176, "learning_rate": 5.185139373572456e-07, "loss": 0.5641, "step": 38880 }, { "epoch": 1.9741214098056885, "grad_norm": 0.026546458164633597, "learning_rate": 5.084768846768117e-07, "loss": 0.5751, "step": 38885 }, { "epoch": 1.9743752459037198, "grad_norm": 0.03060332237449581, "learning_rate": 4.985378796913964e-07, "loss": 0.5918, "step": 38890 }, { "epoch": 1.9746290820017514, "grad_norm": 0.024307522842533437, "learning_rate": 4.886969243519391e-07, "loss": 0.5355, "step": 38895 }, { "epoch": 1.974882918099783, "grad_norm": 0.026327740109736854, "learning_rate": 4.789540205902831e-07, "loss": 0.5661, "step": 38900 }, { "epoch": 1.9751367541978144, "grad_norm": 0.025372655576260048, "learning_rate": 4.6930917031878796e-07, "loss": 0.5329, "step": 38905 }, { "epoch": 1.975390590295846, "grad_norm": 0.02555567837221021, "learning_rate": 4.597623754307723e-07, "loss": 0.5572, "step": 38910 }, { "epoch": 1.9756444263938775, "grad_norm": 0.02742769445222376, "learning_rate": 4.5031363780023705e-07, "loss": 0.5608, "step": 38915 }, { "epoch": 1.975898262491909, "grad_norm": 0.02622290361061954, "learning_rate": 4.4096295928186534e-07, "loss": 0.5958, "step": 38920 }, { "epoch": 1.9761520985899406, "grad_norm": 0.02746597098932347, "learning_rate": 4.3171034171113346e-07, "loss": 0.5818, "step": 38925 }, { "epoch": 1.976405934687972, "grad_norm": 0.02816176906896145, "learning_rate": 4.225557869043661e-07, "loss": 0.5979, "step": 38930 }, { "epoch": 1.9766597707860036, "grad_norm": 0.030042909516198608, "learning_rate": 4.134992966584594e-07, "loss": 0.5669, "step": 38935 }, { "epoch": 1.976913606884035, "grad_norm": 0.026295906168094692, "learning_rate": 4.0454087275121344e-07, "loss": 0.5355, "step": 38940 }, { "epoch": 1.9771674429820665, "grad_norm": 0.028474250567716762, "learning_rate": 3.956805169411659e-07, "loss": 0.6023, "step": 38945 }, { "epoch": 1.977421279080098, "grad_norm": 0.024783071987096252, "learning_rate": 3.8691823096748126e-07, "loss": 0.5462, "step": 38950 }, { "epoch": 1.9776751151781293, "grad_norm": 0.026687502733370803, "learning_rate": 3.7825401655017246e-07, "loss": 0.6136, "step": 38955 }, { "epoch": 1.9779289512761609, "grad_norm": 0.02722250712787834, "learning_rate": 3.6968787538999016e-07, "loss": 0.5631, "step": 38960 }, { "epoch": 1.9781827873741924, "grad_norm": 0.02583956356646283, "learning_rate": 3.6121980916842265e-07, "loss": 0.5742, "step": 38965 }, { "epoch": 1.978436623472224, "grad_norm": 0.025099575669774995, "learning_rate": 3.528498195476959e-07, "loss": 0.5371, "step": 38970 }, { "epoch": 1.9786904595702555, "grad_norm": 0.027525294680643672, "learning_rate": 3.445779081708844e-07, "loss": 0.6074, "step": 38975 }, { "epoch": 1.978944295668287, "grad_norm": 0.028367067766392168, "learning_rate": 3.3640407666157835e-07, "loss": 0.591, "step": 38980 }, { "epoch": 1.9791981317663185, "grad_norm": 0.027295475031204608, "learning_rate": 3.283283266243831e-07, "loss": 0.5632, "step": 38985 }, { "epoch": 1.97945196786435, "grad_norm": 0.027120818839684355, "learning_rate": 3.203506596444194e-07, "loss": 0.5941, "step": 38990 }, { "epoch": 1.9797058039623816, "grad_norm": 0.025452533594469477, "learning_rate": 3.1247107728776815e-07, "loss": 0.585, "step": 38995 }, { "epoch": 1.9799596400604131, "grad_norm": 0.02570441583317385, "learning_rate": 3.046895811011363e-07, "loss": 0.5556, "step": 39000 }, { "epoch": 1.9802134761584445, "grad_norm": 0.025544741614495887, "learning_rate": 2.970061726119133e-07, "loss": 0.5499, "step": 39005 }, { "epoch": 1.980467312256476, "grad_norm": 0.02497012747253452, "learning_rate": 2.894208533283371e-07, "loss": 0.5482, "step": 39010 }, { "epoch": 1.9807211483545075, "grad_norm": 0.024674521895544015, "learning_rate": 2.8193362473943885e-07, "loss": 0.541, "step": 39015 }, { "epoch": 1.9809749844525388, "grad_norm": 0.026703509398308928, "learning_rate": 2.7454448831487624e-07, "loss": 0.5851, "step": 39020 }, { "epoch": 1.9812288205505704, "grad_norm": 0.025163922090751547, "learning_rate": 2.672534455051001e-07, "loss": 0.5698, "step": 39025 }, { "epoch": 1.981482656648602, "grad_norm": 0.025999180976107875, "learning_rate": 2.60060497741299e-07, "loss": 0.5873, "step": 39030 }, { "epoch": 1.9817364927466334, "grad_norm": 0.025309985772055732, "learning_rate": 2.529656464354546e-07, "loss": 0.5923, "step": 39035 }, { "epoch": 1.981990328844665, "grad_norm": 0.03158423312518992, "learning_rate": 2.459688929802306e-07, "loss": 0.5698, "step": 39040 }, { "epoch": 1.9822441649426965, "grad_norm": 0.025628729978019873, "learning_rate": 2.3907023874897295e-07, "loss": 0.5374, "step": 39045 }, { "epoch": 1.982498001040728, "grad_norm": 0.029602629772001614, "learning_rate": 2.3226968509598712e-07, "loss": 0.5725, "step": 39050 }, { "epoch": 1.9827518371387596, "grad_norm": 0.027588329908316552, "learning_rate": 2.2556723335609431e-07, "loss": 0.5465, "step": 39055 }, { "epoch": 1.983005673236791, "grad_norm": 0.0267102925618694, "learning_rate": 2.1896288484496428e-07, "loss": 0.5768, "step": 39060 }, { "epoch": 1.9832595093348226, "grad_norm": 0.026860663319452714, "learning_rate": 2.1245664085906002e-07, "loss": 0.563, "step": 39065 }, { "epoch": 1.983513345432854, "grad_norm": 0.027831528882566677, "learning_rate": 2.0604850267547104e-07, "loss": 0.5724, "step": 39070 }, { "epoch": 1.9837671815308855, "grad_norm": 0.026059205181733425, "learning_rate": 1.9973847155208003e-07, "loss": 0.5812, "step": 39075 }, { "epoch": 1.984021017628917, "grad_norm": 0.02458384712911002, "learning_rate": 1.935265487275073e-07, "loss": 0.5433, "step": 39080 }, { "epoch": 1.9842748537269486, "grad_norm": 0.025245584850882238, "learning_rate": 1.8741273542116633e-07, "loss": 0.5918, "step": 39085 }, { "epoch": 1.9845286898249799, "grad_norm": 0.026099019973026834, "learning_rate": 1.8139703283315267e-07, "loss": 0.6008, "step": 39090 }, { "epoch": 1.9847825259230114, "grad_norm": 0.02638127629116268, "learning_rate": 1.7547944214429957e-07, "loss": 0.5489, "step": 39095 }, { "epoch": 1.985036362021043, "grad_norm": 0.02825348183167176, "learning_rate": 1.6965996451623334e-07, "loss": 0.5685, "step": 39100 }, { "epoch": 1.9852901981190745, "grad_norm": 0.025131743104072127, "learning_rate": 1.6393860109120695e-07, "loss": 0.5709, "step": 39105 }, { "epoch": 1.985544034217106, "grad_norm": 0.025641774251846747, "learning_rate": 1.5831535299243304e-07, "loss": 0.5297, "step": 39110 }, { "epoch": 1.9857978703151375, "grad_norm": 0.02607969124678805, "learning_rate": 1.5279022132358434e-07, "loss": 0.5585, "step": 39115 }, { "epoch": 1.986051706413169, "grad_norm": 0.02688866111983418, "learning_rate": 1.473632071692932e-07, "loss": 0.5457, "step": 39120 }, { "epoch": 1.9863055425112006, "grad_norm": 0.023998522706623504, "learning_rate": 1.4203431159487413e-07, "loss": 0.5432, "step": 39125 }, { "epoch": 1.9865593786092322, "grad_norm": 0.027336638602829578, "learning_rate": 1.3680353564632375e-07, "loss": 0.5387, "step": 39130 }, { "epoch": 1.9868132147072637, "grad_norm": 0.02484196976870124, "learning_rate": 1.3167088035037632e-07, "loss": 0.5438, "step": 39135 }, { "epoch": 1.987067050805295, "grad_norm": 0.02869056154620727, "learning_rate": 1.266363467146703e-07, "loss": 0.5951, "step": 39140 }, { "epoch": 1.9873208869033265, "grad_norm": 0.024514855411955954, "learning_rate": 1.216999357273596e-07, "loss": 0.5421, "step": 39145 }, { "epoch": 1.987574723001358, "grad_norm": 0.02682503918503035, "learning_rate": 1.1686164835744695e-07, "loss": 0.5643, "step": 39150 }, { "epoch": 1.9878285590993894, "grad_norm": 0.028888388514840165, "learning_rate": 1.121214855546726e-07, "loss": 0.5698, "step": 39155 }, { "epoch": 1.988082395197421, "grad_norm": 0.026174539618839105, "learning_rate": 1.074794482495145e-07, "loss": 0.5606, "step": 39160 }, { "epoch": 1.9883362312954524, "grad_norm": 0.026211255110279608, "learning_rate": 1.0293553735318817e-07, "loss": 0.565, "step": 39165 }, { "epoch": 1.988590067393484, "grad_norm": 0.02676399557856488, "learning_rate": 9.84897537576468e-08, "loss": 0.5513, "step": 39170 }, { "epoch": 1.9888439034915155, "grad_norm": 0.027863143073232344, "learning_rate": 9.414209833552567e-08, "loss": 0.5839, "step": 39175 }, { "epoch": 1.989097739589547, "grad_norm": 0.02787836272286346, "learning_rate": 8.989257194030876e-08, "loss": 0.6088, "step": 39180 }, { "epoch": 1.9893515756875786, "grad_norm": 0.024834062804948657, "learning_rate": 8.57411754061621e-08, "loss": 0.571, "step": 39185 }, { "epoch": 1.9896054117856101, "grad_norm": 0.026558711878479538, "learning_rate": 8.168790954793392e-08, "loss": 0.5979, "step": 39190 }, { "epoch": 1.9898592478836417, "grad_norm": 0.024515181581265977, "learning_rate": 7.773277516126553e-08, "loss": 0.5696, "step": 39195 }, { "epoch": 1.9901130839816732, "grad_norm": 0.02721108135044938, "learning_rate": 7.38757730225359e-08, "loss": 0.5522, "step": 39200 }, { "epoch": 1.9903669200797045, "grad_norm": 0.028402957171672138, "learning_rate": 7.01169038888616e-08, "loss": 0.5766, "step": 39205 }, { "epoch": 1.990620756177736, "grad_norm": 0.029588329874503945, "learning_rate": 6.64561684981524e-08, "loss": 0.5818, "step": 39210 }, { "epoch": 1.9908745922757676, "grad_norm": 0.03091632343096726, "learning_rate": 6.289356756888908e-08, "loss": 0.5785, "step": 39215 }, { "epoch": 1.9911284283737989, "grad_norm": 0.032844318458248124, "learning_rate": 5.9429101800401174e-08, "loss": 0.5671, "step": 39220 }, { "epoch": 1.9913822644718304, "grad_norm": 0.026884204416447385, "learning_rate": 5.606277187286679e-08, "loss": 0.5721, "step": 39225 }, { "epoch": 1.991636100569862, "grad_norm": 0.028013028498613264, "learning_rate": 5.2794578446924145e-08, "loss": 0.6133, "step": 39230 }, { "epoch": 1.9918899366678935, "grad_norm": 0.026995584691949116, "learning_rate": 4.962452216417113e-08, "loss": 0.6067, "step": 39235 }, { "epoch": 1.992143772765925, "grad_norm": 0.02818959855646271, "learning_rate": 4.655260364694325e-08, "loss": 0.58, "step": 39240 }, { "epoch": 1.9923976088639566, "grad_norm": 0.026033068484495565, "learning_rate": 4.357882349809161e-08, "loss": 0.5731, "step": 39245 }, { "epoch": 1.992651444961988, "grad_norm": 0.027452031979232905, "learning_rate": 4.0703182301482514e-08, "loss": 0.5649, "step": 39250 }, { "epoch": 1.9929052810600196, "grad_norm": 0.026036890102437153, "learning_rate": 3.792568062155333e-08, "loss": 0.5911, "step": 39255 }, { "epoch": 1.9931591171580512, "grad_norm": 0.02618804756653391, "learning_rate": 3.524631900347908e-08, "loss": 0.5566, "step": 39260 }, { "epoch": 1.9934129532560827, "grad_norm": 0.027192339368356123, "learning_rate": 3.266509797328343e-08, "loss": 0.5992, "step": 39265 }, { "epoch": 1.993666789354114, "grad_norm": 0.027224047047684295, "learning_rate": 3.018201803756115e-08, "loss": 0.5569, "step": 39270 }, { "epoch": 1.9939206254521455, "grad_norm": 0.02836270864009691, "learning_rate": 2.7797079683755666e-08, "loss": 0.5849, "step": 39275 }, { "epoch": 1.994174461550177, "grad_norm": 0.027970255186669454, "learning_rate": 2.5510283379992505e-08, "loss": 0.6227, "step": 39280 }, { "epoch": 1.9944282976482084, "grad_norm": 0.028197605213243656, "learning_rate": 2.3321629575245862e-08, "loss": 0.5412, "step": 39285 }, { "epoch": 1.99468213374624, "grad_norm": 0.025435718725420487, "learning_rate": 2.1231118699061024e-08, "loss": 0.5842, "step": 39290 }, { "epoch": 1.9949359698442715, "grad_norm": 0.02806372172166062, "learning_rate": 1.9238751161831936e-08, "loss": 0.5358, "step": 39295 }, { "epoch": 1.995189805942303, "grad_norm": 0.025857470839735535, "learning_rate": 1.7344527354634655e-08, "loss": 0.5513, "step": 39300 }, { "epoch": 1.9954436420403345, "grad_norm": 0.029637866654925293, "learning_rate": 1.554844764928287e-08, "loss": 0.5775, "step": 39305 }, { "epoch": 1.995697478138366, "grad_norm": 0.024707328234349173, "learning_rate": 1.3850512398383419e-08, "loss": 0.5828, "step": 39310 }, { "epoch": 1.9959513142363976, "grad_norm": 0.02529791960122678, "learning_rate": 1.225072193516974e-08, "loss": 0.5765, "step": 39315 }, { "epoch": 1.9962051503344291, "grad_norm": 0.028392552225814112, "learning_rate": 1.0749076573723927e-08, "loss": 0.5691, "step": 39320 }, { "epoch": 1.9964589864324607, "grad_norm": 0.028468203603683597, "learning_rate": 9.34557660875468e-09, "loss": 0.57, "step": 39325 }, { "epoch": 1.9967128225304922, "grad_norm": 0.027182847590014436, "learning_rate": 8.040222315819357e-09, "loss": 0.5715, "step": 39330 }, { "epoch": 1.9969666586285235, "grad_norm": 0.02872349833767389, "learning_rate": 6.833013951157429e-09, "loss": 0.588, "step": 39335 }, { "epoch": 1.997220494726555, "grad_norm": 0.027074225792545346, "learning_rate": 5.7239517516904925e-09, "loss": 0.5722, "step": 39340 }, { "epoch": 1.9974743308245866, "grad_norm": 0.023656870930833913, "learning_rate": 4.713035935188792e-09, "loss": 0.5948, "step": 39345 }, { "epoch": 1.9977281669226181, "grad_norm": 0.024799710233402765, "learning_rate": 3.800266699993671e-09, "loss": 0.5631, "step": 39350 }, { "epoch": 1.9979820030206494, "grad_norm": 0.029289392297673324, "learning_rate": 2.9856442253506366e-09, "loss": 0.5938, "step": 39355 }, { "epoch": 1.998235839118681, "grad_norm": 0.024912809251049394, "learning_rate": 2.2691686711318048e-09, "loss": 0.5515, "step": 39360 }, { "epoch": 1.9984896752167125, "grad_norm": 0.027194256680193204, "learning_rate": 1.6508401780024329e-09, "loss": 0.5987, "step": 39365 }, { "epoch": 1.998743511314744, "grad_norm": 0.024791363097195034, "learning_rate": 1.1306588673098972e-09, "loss": 0.5612, "step": 39370 }, { "epoch": 1.9989973474127756, "grad_norm": 0.026738786696350415, "learning_rate": 7.08624841194716e-10, "loss": 0.5842, "step": 39375 }, { "epoch": 1.999251183510807, "grad_norm": 0.025979797645752305, "learning_rate": 3.8473818242401594e-10, "loss": 0.5642, "step": 39380 }, { "epoch": 1.9995050196088386, "grad_norm": 0.02970831174203495, "learning_rate": 1.5899895472459848e-10, "loss": 0.5905, "step": 39385 }, { "epoch": 1.9997588557068702, "grad_norm": 0.024882086896978725, "learning_rate": 3.140720228334004e-11, "loss": 0.5453, "step": 39390 }, { "epoch": 1.9999619245852953, "step": 39394, "total_flos": 1.234003630074364e+18, "train_loss": 0.2977831879109727, "train_runtime": 39966.3186, "train_samples_per_second": 7.886, "train_steps_per_second": 0.986 } ], "logging_steps": 5, "max_steps": 39394, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.234003630074364e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }