{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999365409754921, "eval_steps": 500, "global_step": 39394, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.076721960630021e-05, "grad_norm": 0.15623666546780773, "learning_rate": 2.538071065989848e-07, "loss": 1.1099, "step": 1 }, { "epoch": 0.00025383609803150106, "grad_norm": 0.1494363970160199, "learning_rate": 1.2690355329949238e-06, "loss": 1.1138, "step": 5 }, { "epoch": 0.0005076721960630021, "grad_norm": 0.15321706073016964, "learning_rate": 2.5380710659898476e-06, "loss": 1.1368, "step": 10 }, { "epoch": 0.0007615082940945031, "grad_norm": 0.1542643246754322, "learning_rate": 3.807106598984772e-06, "loss": 1.1091, "step": 15 }, { "epoch": 0.0010153443921260042, "grad_norm": 0.13824138762283186, "learning_rate": 5.076142131979695e-06, "loss": 1.0911, "step": 20 }, { "epoch": 0.0012691804901575053, "grad_norm": 0.1579877396947511, "learning_rate": 6.345177664974619e-06, "loss": 1.1267, "step": 25 }, { "epoch": 0.0015230165881890063, "grad_norm": 0.1478480849215318, "learning_rate": 7.614213197969544e-06, "loss": 1.0848, "step": 30 }, { "epoch": 0.0017768526862205075, "grad_norm": 0.11527528929066594, "learning_rate": 8.883248730964468e-06, "loss": 1.07, "step": 35 }, { "epoch": 0.0020306887842520085, "grad_norm": 0.10942201134957022, "learning_rate": 1.015228426395939e-05, "loss": 1.0856, "step": 40 }, { "epoch": 0.0022845248822835097, "grad_norm": 0.1003586160454669, "learning_rate": 1.1421319796954315e-05, "loss": 1.0483, "step": 45 }, { "epoch": 0.0025383609803150105, "grad_norm": 0.09455563009354048, "learning_rate": 1.2690355329949238e-05, "loss": 1.0842, "step": 50 }, { "epoch": 0.0027921970783465117, "grad_norm": 0.08578428766129725, "learning_rate": 1.3959390862944163e-05, "loss": 1.0387, "step": 55 }, { "epoch": 0.0030460331763780125, "grad_norm": 0.08715199888221983, "learning_rate": 1.5228426395939088e-05, "loss": 1.0585, "step": 60 }, { "epoch": 0.0032998692744095138, "grad_norm": 0.07478324580510415, "learning_rate": 1.6497461928934012e-05, "loss": 0.9773, "step": 65 }, { "epoch": 0.003553705372441015, "grad_norm": 0.07368740079084418, "learning_rate": 1.7766497461928935e-05, "loss": 1.0023, "step": 70 }, { "epoch": 0.0038075414704725158, "grad_norm": 0.07301153154637648, "learning_rate": 1.9035532994923858e-05, "loss": 0.9723, "step": 75 }, { "epoch": 0.004061377568504017, "grad_norm": 0.07289038213742076, "learning_rate": 2.030456852791878e-05, "loss": 0.9927, "step": 80 }, { "epoch": 0.004315213666535518, "grad_norm": 0.07465327431922439, "learning_rate": 2.1573604060913704e-05, "loss": 0.9821, "step": 85 }, { "epoch": 0.0045690497645670194, "grad_norm": 0.06774510984286918, "learning_rate": 2.284263959390863e-05, "loss": 0.965, "step": 90 }, { "epoch": 0.00482288586259852, "grad_norm": 0.06855996583318778, "learning_rate": 2.4111675126903553e-05, "loss": 0.9821, "step": 95 }, { "epoch": 0.005076721960630021, "grad_norm": 0.07536106861979534, "learning_rate": 2.5380710659898476e-05, "loss": 0.9284, "step": 100 }, { "epoch": 0.005330558058661523, "grad_norm": 0.06730944873958547, "learning_rate": 2.6649746192893403e-05, "loss": 0.9626, "step": 105 }, { "epoch": 0.0055843941566930235, "grad_norm": 0.069457861068512, "learning_rate": 2.7918781725888326e-05, "loss": 0.956, "step": 110 }, { "epoch": 0.005838230254724524, "grad_norm": 0.06693731117055934, "learning_rate": 2.918781725888325e-05, "loss": 0.9747, "step": 115 }, { "epoch": 0.006092066352756025, "grad_norm": 0.06277015163357891, "learning_rate": 3.0456852791878175e-05, "loss": 0.9116, "step": 120 }, { "epoch": 0.006345902450787527, "grad_norm": 0.06643101153314454, "learning_rate": 3.17258883248731e-05, "loss": 0.9232, "step": 125 }, { "epoch": 0.0065997385488190275, "grad_norm": 0.06924486382591209, "learning_rate": 3.2994923857868024e-05, "loss": 0.9399, "step": 130 }, { "epoch": 0.006853574646850528, "grad_norm": 0.07604177904943392, "learning_rate": 3.4263959390862944e-05, "loss": 0.8964, "step": 135 }, { "epoch": 0.00710741074488203, "grad_norm": 0.06481060411660645, "learning_rate": 3.553299492385787e-05, "loss": 0.948, "step": 140 }, { "epoch": 0.007361246842913531, "grad_norm": 0.06555462608634341, "learning_rate": 3.680203045685279e-05, "loss": 0.9325, "step": 145 }, { "epoch": 0.0076150829409450315, "grad_norm": 0.07121267717524804, "learning_rate": 3.8071065989847716e-05, "loss": 0.9379, "step": 150 }, { "epoch": 0.007868919038976532, "grad_norm": 0.07190008765656207, "learning_rate": 3.934010152284264e-05, "loss": 0.9104, "step": 155 }, { "epoch": 0.008122755137008034, "grad_norm": 0.06497388555993808, "learning_rate": 4.060913705583756e-05, "loss": 0.9238, "step": 160 }, { "epoch": 0.008376591235039536, "grad_norm": 0.06647076390288592, "learning_rate": 4.187817258883249e-05, "loss": 0.9081, "step": 165 }, { "epoch": 0.008630427333071036, "grad_norm": 0.07078773209118674, "learning_rate": 4.314720812182741e-05, "loss": 0.8845, "step": 170 }, { "epoch": 0.008884263431102537, "grad_norm": 0.060550162204052516, "learning_rate": 4.4416243654822335e-05, "loss": 0.8901, "step": 175 }, { "epoch": 0.009138099529134039, "grad_norm": 0.06924081884858772, "learning_rate": 4.568527918781726e-05, "loss": 0.924, "step": 180 }, { "epoch": 0.009391935627165539, "grad_norm": 0.06720539183021615, "learning_rate": 4.695431472081219e-05, "loss": 0.8938, "step": 185 }, { "epoch": 0.00964577172519704, "grad_norm": 0.07177203010161029, "learning_rate": 4.822335025380711e-05, "loss": 0.9048, "step": 190 }, { "epoch": 0.009899607823228542, "grad_norm": 0.07321268374977867, "learning_rate": 4.949238578680203e-05, "loss": 0.8666, "step": 195 }, { "epoch": 0.010153443921260042, "grad_norm": 0.0704505269850039, "learning_rate": 5.076142131979695e-05, "loss": 0.9043, "step": 200 }, { "epoch": 0.010407280019291544, "grad_norm": 0.06657597088837011, "learning_rate": 5.2030456852791886e-05, "loss": 0.8896, "step": 205 }, { "epoch": 0.010661116117323045, "grad_norm": 0.06540442514049039, "learning_rate": 5.3299492385786806e-05, "loss": 0.8706, "step": 210 }, { "epoch": 0.010914952215354545, "grad_norm": 0.07007986246869724, "learning_rate": 5.4568527918781725e-05, "loss": 0.8502, "step": 215 }, { "epoch": 0.011168788313386047, "grad_norm": 0.07149965435491132, "learning_rate": 5.583756345177665e-05, "loss": 0.8837, "step": 220 }, { "epoch": 0.011422624411417547, "grad_norm": 0.06719503318382268, "learning_rate": 5.710659898477157e-05, "loss": 0.8644, "step": 225 }, { "epoch": 0.011676460509449049, "grad_norm": 0.06782034211539793, "learning_rate": 5.83756345177665e-05, "loss": 0.8966, "step": 230 }, { "epoch": 0.01193029660748055, "grad_norm": 0.06913885877260682, "learning_rate": 5.9644670050761424e-05, "loss": 0.881, "step": 235 }, { "epoch": 0.01218413270551205, "grad_norm": 0.06719486728451429, "learning_rate": 6.091370558375635e-05, "loss": 0.8766, "step": 240 }, { "epoch": 0.012437968803543552, "grad_norm": 0.06653298354073509, "learning_rate": 6.218274111675127e-05, "loss": 0.8781, "step": 245 }, { "epoch": 0.012691804901575053, "grad_norm": 0.06438730665451305, "learning_rate": 6.34517766497462e-05, "loss": 0.8918, "step": 250 }, { "epoch": 0.012945640999606553, "grad_norm": 0.06665969371026668, "learning_rate": 6.472081218274112e-05, "loss": 0.8583, "step": 255 }, { "epoch": 0.013199477097638055, "grad_norm": 0.06715127430825377, "learning_rate": 6.598984771573605e-05, "loss": 0.8656, "step": 260 }, { "epoch": 0.013453313195669557, "grad_norm": 0.06926262661035425, "learning_rate": 6.725888324873096e-05, "loss": 0.8649, "step": 265 }, { "epoch": 0.013707149293701057, "grad_norm": 0.06985857406011756, "learning_rate": 6.852791878172589e-05, "loss": 0.8729, "step": 270 }, { "epoch": 0.013960985391732558, "grad_norm": 0.06860201087036598, "learning_rate": 6.979695431472081e-05, "loss": 0.8601, "step": 275 }, { "epoch": 0.01421482148976406, "grad_norm": 0.06930283958418837, "learning_rate": 7.106598984771574e-05, "loss": 0.8531, "step": 280 }, { "epoch": 0.01446865758779556, "grad_norm": 0.06690534995876123, "learning_rate": 7.233502538071065e-05, "loss": 0.8696, "step": 285 }, { "epoch": 0.014722493685827061, "grad_norm": 0.07121164184727187, "learning_rate": 7.360406091370558e-05, "loss": 0.8475, "step": 290 }, { "epoch": 0.014976329783858563, "grad_norm": 0.06510867515978803, "learning_rate": 7.48730964467005e-05, "loss": 0.8529, "step": 295 }, { "epoch": 0.015230165881890063, "grad_norm": 0.07520321408132809, "learning_rate": 7.614213197969543e-05, "loss": 0.8385, "step": 300 }, { "epoch": 0.015484001979921565, "grad_norm": 0.06551584845379264, "learning_rate": 7.741116751269036e-05, "loss": 0.8687, "step": 305 }, { "epoch": 0.015737838077953065, "grad_norm": 0.07490825713450537, "learning_rate": 7.868020304568529e-05, "loss": 0.8349, "step": 310 }, { "epoch": 0.015991674175984568, "grad_norm": 0.06896780239118258, "learning_rate": 7.994923857868021e-05, "loss": 0.8212, "step": 315 }, { "epoch": 0.016245510274016068, "grad_norm": 0.07054318367146102, "learning_rate": 8.121827411167512e-05, "loss": 0.8385, "step": 320 }, { "epoch": 0.016499346372047568, "grad_norm": 0.07148603385955352, "learning_rate": 8.248730964467005e-05, "loss": 0.821, "step": 325 }, { "epoch": 0.01675318247007907, "grad_norm": 0.07322626942475637, "learning_rate": 8.375634517766498e-05, "loss": 0.8307, "step": 330 }, { "epoch": 0.01700701856811057, "grad_norm": 0.0656810178548751, "learning_rate": 8.50253807106599e-05, "loss": 0.8172, "step": 335 }, { "epoch": 0.01726085466614207, "grad_norm": 0.08377135110233941, "learning_rate": 8.629441624365482e-05, "loss": 0.8464, "step": 340 }, { "epoch": 0.017514690764173575, "grad_norm": 0.075227118003797, "learning_rate": 8.756345177664974e-05, "loss": 0.8505, "step": 345 }, { "epoch": 0.017768526862205074, "grad_norm": 0.06433840181173646, "learning_rate": 8.883248730964467e-05, "loss": 0.8511, "step": 350 }, { "epoch": 0.018022362960236574, "grad_norm": 0.06922064842556071, "learning_rate": 9.01015228426396e-05, "loss": 0.8391, "step": 355 }, { "epoch": 0.018276199058268078, "grad_norm": 0.0755697527347306, "learning_rate": 9.137055837563452e-05, "loss": 0.821, "step": 360 }, { "epoch": 0.018530035156299578, "grad_norm": 0.07031238898852438, "learning_rate": 9.263959390862945e-05, "loss": 0.8231, "step": 365 }, { "epoch": 0.018783871254331078, "grad_norm": 0.0736219488733804, "learning_rate": 9.390862944162437e-05, "loss": 0.822, "step": 370 }, { "epoch": 0.01903770735236258, "grad_norm": 0.06790722048303544, "learning_rate": 9.517766497461929e-05, "loss": 0.8175, "step": 375 }, { "epoch": 0.01929154345039408, "grad_norm": 0.06650805003812338, "learning_rate": 9.644670050761421e-05, "loss": 0.821, "step": 380 }, { "epoch": 0.01954537954842558, "grad_norm": 0.0655885804187293, "learning_rate": 9.771573604060914e-05, "loss": 0.8174, "step": 385 }, { "epoch": 0.019799215646457084, "grad_norm": 0.07059246576832452, "learning_rate": 9.898477157360407e-05, "loss": 0.8255, "step": 390 }, { "epoch": 0.020053051744488584, "grad_norm": 0.08379327242669067, "learning_rate": 0.00010025380710659898, "loss": 0.8401, "step": 395 }, { "epoch": 0.020306887842520084, "grad_norm": 0.06727444934015664, "learning_rate": 0.0001015228426395939, "loss": 0.8304, "step": 400 }, { "epoch": 0.020560723940551588, "grad_norm": 0.0772208129389319, "learning_rate": 0.00010279187817258883, "loss": 0.8172, "step": 405 }, { "epoch": 0.020814560038583087, "grad_norm": 0.07506045651461135, "learning_rate": 0.00010406091370558377, "loss": 0.8148, "step": 410 }, { "epoch": 0.021068396136614587, "grad_norm": 0.07444802891412307, "learning_rate": 0.00010532994923857868, "loss": 0.8642, "step": 415 }, { "epoch": 0.02132223223464609, "grad_norm": 0.06410369753897495, "learning_rate": 0.00010659898477157361, "loss": 0.8523, "step": 420 }, { "epoch": 0.02157606833267759, "grad_norm": 0.06581099995077219, "learning_rate": 0.00010786802030456854, "loss": 0.8214, "step": 425 }, { "epoch": 0.02182990443070909, "grad_norm": 0.06664549211252414, "learning_rate": 0.00010913705583756345, "loss": 0.8083, "step": 430 }, { "epoch": 0.02208374052874059, "grad_norm": 0.06658304689511382, "learning_rate": 0.00011040609137055838, "loss": 0.8143, "step": 435 }, { "epoch": 0.022337576626772094, "grad_norm": 0.0683685494048942, "learning_rate": 0.0001116751269035533, "loss": 0.8239, "step": 440 }, { "epoch": 0.022591412724803594, "grad_norm": 0.06845204574468279, "learning_rate": 0.00011294416243654823, "loss": 0.8109, "step": 445 }, { "epoch": 0.022845248822835094, "grad_norm": 0.0677741143702505, "learning_rate": 0.00011421319796954314, "loss": 0.8244, "step": 450 }, { "epoch": 0.023099084920866597, "grad_norm": 0.06606869955263507, "learning_rate": 0.00011548223350253807, "loss": 0.8075, "step": 455 }, { "epoch": 0.023352921018898097, "grad_norm": 0.07205738377017837, "learning_rate": 0.000116751269035533, "loss": 0.7934, "step": 460 }, { "epoch": 0.023606757116929597, "grad_norm": 0.06872617393230794, "learning_rate": 0.00011802030456852793, "loss": 0.8011, "step": 465 }, { "epoch": 0.0238605932149611, "grad_norm": 0.08044019183258687, "learning_rate": 0.00011928934010152285, "loss": 0.8127, "step": 470 }, { "epoch": 0.0241144293129926, "grad_norm": 0.07828212067502605, "learning_rate": 0.00012055837563451777, "loss": 0.787, "step": 475 }, { "epoch": 0.0243682654110241, "grad_norm": 0.07606104395777182, "learning_rate": 0.0001218274111675127, "loss": 0.8251, "step": 480 }, { "epoch": 0.024622101509055604, "grad_norm": 0.06583732816278534, "learning_rate": 0.0001230964467005076, "loss": 0.8246, "step": 485 }, { "epoch": 0.024875937607087104, "grad_norm": 0.06441670784951158, "learning_rate": 0.00012436548223350254, "loss": 0.7876, "step": 490 }, { "epoch": 0.025129773705118603, "grad_norm": 0.07758246886806429, "learning_rate": 0.00012563451776649747, "loss": 0.8221, "step": 495 }, { "epoch": 0.025383609803150107, "grad_norm": 0.06322158391207536, "learning_rate": 0.0001269035532994924, "loss": 0.7881, "step": 500 }, { "epoch": 0.025637445901181607, "grad_norm": 0.07176275752432852, "learning_rate": 0.00012817258883248732, "loss": 0.8097, "step": 505 }, { "epoch": 0.025891281999213107, "grad_norm": 0.06256825439016349, "learning_rate": 0.00012944162436548224, "loss": 0.7871, "step": 510 }, { "epoch": 0.02614511809724461, "grad_norm": 0.06274483186816927, "learning_rate": 0.00013071065989847717, "loss": 0.8353, "step": 515 }, { "epoch": 0.02639895419527611, "grad_norm": 0.06260104403606175, "learning_rate": 0.0001319796954314721, "loss": 0.8062, "step": 520 }, { "epoch": 0.02665279029330761, "grad_norm": 0.0692807834839823, "learning_rate": 0.00013324873096446702, "loss": 0.8184, "step": 525 }, { "epoch": 0.026906626391339113, "grad_norm": 0.06940858744959649, "learning_rate": 0.00013451776649746192, "loss": 0.7818, "step": 530 }, { "epoch": 0.027160462489370613, "grad_norm": 0.06227095786073527, "learning_rate": 0.00013578680203045685, "loss": 0.7997, "step": 535 }, { "epoch": 0.027414298587402113, "grad_norm": 0.06606458794872055, "learning_rate": 0.00013705583756345178, "loss": 0.791, "step": 540 }, { "epoch": 0.027668134685433617, "grad_norm": 0.07921383531960545, "learning_rate": 0.0001383248730964467, "loss": 0.7865, "step": 545 }, { "epoch": 0.027921970783465117, "grad_norm": 0.07202541973928622, "learning_rate": 0.00013959390862944163, "loss": 0.799, "step": 550 }, { "epoch": 0.028175806881496616, "grad_norm": 0.07446687633156211, "learning_rate": 0.00014086294416243656, "loss": 0.8005, "step": 555 }, { "epoch": 0.02842964297952812, "grad_norm": 0.06806792304243547, "learning_rate": 0.00014213197969543148, "loss": 0.7807, "step": 560 }, { "epoch": 0.02868347907755962, "grad_norm": 0.06961466568049711, "learning_rate": 0.0001434010152284264, "loss": 0.798, "step": 565 }, { "epoch": 0.02893731517559112, "grad_norm": 0.06993114177456627, "learning_rate": 0.0001446700507614213, "loss": 0.8141, "step": 570 }, { "epoch": 0.029191151273622623, "grad_norm": 0.07132008217126029, "learning_rate": 0.00014593908629441623, "loss": 0.7732, "step": 575 }, { "epoch": 0.029444987371654123, "grad_norm": 0.0634032782262794, "learning_rate": 0.00014720812182741116, "loss": 0.7833, "step": 580 }, { "epoch": 0.029698823469685623, "grad_norm": 0.06291224826282306, "learning_rate": 0.00014847715736040609, "loss": 0.7713, "step": 585 }, { "epoch": 0.029952659567717126, "grad_norm": 0.06955080092630633, "learning_rate": 0.000149746192893401, "loss": 0.812, "step": 590 }, { "epoch": 0.030206495665748626, "grad_norm": 0.06642744415224454, "learning_rate": 0.00015101522842639594, "loss": 0.7816, "step": 595 }, { "epoch": 0.030460331763780126, "grad_norm": 0.07915699035230568, "learning_rate": 0.00015228426395939087, "loss": 0.815, "step": 600 }, { "epoch": 0.03071416786181163, "grad_norm": 0.07162405450532806, "learning_rate": 0.0001535532994923858, "loss": 0.7785, "step": 605 }, { "epoch": 0.03096800395984313, "grad_norm": 0.06622725095139695, "learning_rate": 0.00015482233502538072, "loss": 0.7698, "step": 610 }, { "epoch": 0.03122184005787463, "grad_norm": 0.06718329111593666, "learning_rate": 0.00015609137055837564, "loss": 0.7938, "step": 615 }, { "epoch": 0.03147567615590613, "grad_norm": 0.0834368594300888, "learning_rate": 0.00015736040609137057, "loss": 0.8093, "step": 620 }, { "epoch": 0.03172951225393763, "grad_norm": 0.09950838339388045, "learning_rate": 0.0001586294416243655, "loss": 0.7822, "step": 625 }, { "epoch": 0.031983348351969136, "grad_norm": 0.08312710734927868, "learning_rate": 0.00015989847715736042, "loss": 0.7726, "step": 630 }, { "epoch": 0.032237184450000636, "grad_norm": 0.06335807274711928, "learning_rate": 0.00016116751269035535, "loss": 0.7934, "step": 635 }, { "epoch": 0.032491020548032136, "grad_norm": 0.07178135932549387, "learning_rate": 0.00016243654822335025, "loss": 0.7719, "step": 640 }, { "epoch": 0.032744856646063636, "grad_norm": 0.07635177651816702, "learning_rate": 0.00016370558375634518, "loss": 0.7969, "step": 645 }, { "epoch": 0.032998692744095136, "grad_norm": 0.17477231843517774, "learning_rate": 0.0001649746192893401, "loss": 0.7771, "step": 650 }, { "epoch": 0.033252528842126636, "grad_norm": 0.06434201144677448, "learning_rate": 0.00016624365482233503, "loss": 0.7829, "step": 655 }, { "epoch": 0.03350636494015814, "grad_norm": 0.0663036035191937, "learning_rate": 0.00016751269035532995, "loss": 0.7742, "step": 660 }, { "epoch": 0.03376020103818964, "grad_norm": 0.061908820772137774, "learning_rate": 0.00016878172588832488, "loss": 0.7748, "step": 665 }, { "epoch": 0.03401403713622114, "grad_norm": 0.07340604363772968, "learning_rate": 0.0001700507614213198, "loss": 0.7663, "step": 670 }, { "epoch": 0.03426787323425264, "grad_norm": 0.06471552425628542, "learning_rate": 0.0001713197969543147, "loss": 0.738, "step": 675 }, { "epoch": 0.03452170933228414, "grad_norm": 0.0628486951271683, "learning_rate": 0.00017258883248730963, "loss": 0.7693, "step": 680 }, { "epoch": 0.03477554543031564, "grad_norm": 0.0821063953061165, "learning_rate": 0.00017385786802030456, "loss": 0.7937, "step": 685 }, { "epoch": 0.03502938152834715, "grad_norm": 0.0677477739831011, "learning_rate": 0.00017512690355329949, "loss": 0.7516, "step": 690 }, { "epoch": 0.03528321762637865, "grad_norm": 0.06601728867845341, "learning_rate": 0.0001763959390862944, "loss": 0.7711, "step": 695 }, { "epoch": 0.03553705372441015, "grad_norm": 0.06337314268961115, "learning_rate": 0.00017766497461928934, "loss": 0.7725, "step": 700 }, { "epoch": 0.03579088982244165, "grad_norm": 0.06700277491271579, "learning_rate": 0.00017893401015228426, "loss": 0.7779, "step": 705 }, { "epoch": 0.03604472592047315, "grad_norm": 0.06439978678064547, "learning_rate": 0.0001802030456852792, "loss": 0.7377, "step": 710 }, { "epoch": 0.03629856201850465, "grad_norm": 0.08022019987059843, "learning_rate": 0.00018147208121827412, "loss": 0.7891, "step": 715 }, { "epoch": 0.036552398116536156, "grad_norm": 0.06618773295124729, "learning_rate": 0.00018274111675126904, "loss": 0.7847, "step": 720 }, { "epoch": 0.036806234214567655, "grad_norm": 0.06785165350325073, "learning_rate": 0.00018401015228426397, "loss": 0.7709, "step": 725 }, { "epoch": 0.037060070312599155, "grad_norm": 0.06446068323928258, "learning_rate": 0.0001852791878172589, "loss": 0.7466, "step": 730 }, { "epoch": 0.037313906410630655, "grad_norm": 0.0743985429884066, "learning_rate": 0.00018654822335025382, "loss": 0.7495, "step": 735 }, { "epoch": 0.037567742508662155, "grad_norm": 0.06381394090876102, "learning_rate": 0.00018781725888324875, "loss": 0.7724, "step": 740 }, { "epoch": 0.037821578606693655, "grad_norm": 0.07609937482268822, "learning_rate": 0.00018908629441624368, "loss": 0.8044, "step": 745 }, { "epoch": 0.03807541470472516, "grad_norm": 0.07080823262148744, "learning_rate": 0.00019035532994923857, "loss": 0.7392, "step": 750 }, { "epoch": 0.03832925080275666, "grad_norm": 0.08986296323961589, "learning_rate": 0.0001916243654822335, "loss": 0.7506, "step": 755 }, { "epoch": 0.03858308690078816, "grad_norm": 0.0633546160075049, "learning_rate": 0.00019289340101522843, "loss": 0.775, "step": 760 }, { "epoch": 0.03883692299881966, "grad_norm": 0.06672741053597235, "learning_rate": 0.00019416243654822335, "loss": 0.7918, "step": 765 }, { "epoch": 0.03909075909685116, "grad_norm": 0.0667100508132584, "learning_rate": 0.00019543147208121828, "loss": 0.7857, "step": 770 }, { "epoch": 0.03934459519488266, "grad_norm": 0.07489251478473465, "learning_rate": 0.0001967005076142132, "loss": 0.7601, "step": 775 }, { "epoch": 0.03959843129291417, "grad_norm": 0.07514829685101908, "learning_rate": 0.00019796954314720813, "loss": 0.7734, "step": 780 }, { "epoch": 0.03985226739094567, "grad_norm": 0.06072516621359633, "learning_rate": 0.00019923857868020303, "loss": 0.7716, "step": 785 }, { "epoch": 0.04010610348897717, "grad_norm": 0.10545789364607022, "learning_rate": 0.00020050761421319796, "loss": 0.6972, "step": 790 }, { "epoch": 0.04035993958700867, "grad_norm": 0.06621028315139817, "learning_rate": 0.00020177664974619288, "loss": 0.7619, "step": 795 }, { "epoch": 0.04061377568504017, "grad_norm": 0.07339900729710998, "learning_rate": 0.0002030456852791878, "loss": 0.7748, "step": 800 }, { "epoch": 0.04086761178307167, "grad_norm": 0.07861308330289217, "learning_rate": 0.00020431472081218274, "loss": 0.7492, "step": 805 }, { "epoch": 0.041121447881103175, "grad_norm": 0.06891500592846019, "learning_rate": 0.00020558375634517766, "loss": 0.7389, "step": 810 }, { "epoch": 0.041375283979134675, "grad_norm": 0.061487916642653274, "learning_rate": 0.0002068527918781726, "loss": 0.7533, "step": 815 }, { "epoch": 0.041629120077166175, "grad_norm": 0.06175106319498476, "learning_rate": 0.00020812182741116754, "loss": 0.775, "step": 820 }, { "epoch": 0.041882956175197675, "grad_norm": 0.07347500552851026, "learning_rate": 0.00020939086294416244, "loss": 0.7565, "step": 825 }, { "epoch": 0.042136792273229175, "grad_norm": 0.06765223170162757, "learning_rate": 0.00021065989847715737, "loss": 0.7441, "step": 830 }, { "epoch": 0.042390628371260675, "grad_norm": 0.07847030528657203, "learning_rate": 0.0002119289340101523, "loss": 0.7446, "step": 835 }, { "epoch": 0.04264446446929218, "grad_norm": 0.07339106925074805, "learning_rate": 0.00021319796954314722, "loss": 0.7719, "step": 840 }, { "epoch": 0.04289830056732368, "grad_norm": 0.06531782875804885, "learning_rate": 0.00021446700507614215, "loss": 0.7569, "step": 845 }, { "epoch": 0.04315213666535518, "grad_norm": 0.06579891597056135, "learning_rate": 0.00021573604060913707, "loss": 0.7495, "step": 850 }, { "epoch": 0.04340597276338668, "grad_norm": 0.07284604345072491, "learning_rate": 0.000217005076142132, "loss": 0.7566, "step": 855 }, { "epoch": 0.04365980886141818, "grad_norm": 0.06726864956360354, "learning_rate": 0.0002182741116751269, "loss": 0.7742, "step": 860 }, { "epoch": 0.04391364495944968, "grad_norm": 0.06299243656591876, "learning_rate": 0.00021954314720812183, "loss": 0.7676, "step": 865 }, { "epoch": 0.04416748105748118, "grad_norm": 0.07127446574398555, "learning_rate": 0.00022081218274111675, "loss": 0.7328, "step": 870 }, { "epoch": 0.04442131715551269, "grad_norm": 0.059333625669680895, "learning_rate": 0.00022208121827411168, "loss": 0.7528, "step": 875 }, { "epoch": 0.04467515325354419, "grad_norm": 0.06769961067024773, "learning_rate": 0.0002233502538071066, "loss": 0.7702, "step": 880 }, { "epoch": 0.04492898935157569, "grad_norm": 0.05797151521946424, "learning_rate": 0.00022461928934010153, "loss": 0.7565, "step": 885 }, { "epoch": 0.04518282544960719, "grad_norm": 0.06349124685160816, "learning_rate": 0.00022588832487309646, "loss": 0.7386, "step": 890 }, { "epoch": 0.04543666154763869, "grad_norm": 0.07021759323373769, "learning_rate": 0.00022715736040609136, "loss": 0.7306, "step": 895 }, { "epoch": 0.04569049764567019, "grad_norm": 0.0637596966680832, "learning_rate": 0.00022842639593908628, "loss": 0.7699, "step": 900 }, { "epoch": 0.045944333743701694, "grad_norm": 0.06500213255962216, "learning_rate": 0.0002296954314720812, "loss": 0.7816, "step": 905 }, { "epoch": 0.046198169841733194, "grad_norm": 0.06309605572576463, "learning_rate": 0.00023096446700507614, "loss": 0.749, "step": 910 }, { "epoch": 0.046452005939764694, "grad_norm": 0.05963186976020712, "learning_rate": 0.00023223350253807106, "loss": 0.7287, "step": 915 }, { "epoch": 0.046705842037796194, "grad_norm": 0.06576264741938838, "learning_rate": 0.000233502538071066, "loss": 0.7624, "step": 920 }, { "epoch": 0.046959678135827694, "grad_norm": 0.0660867978726128, "learning_rate": 0.00023477157360406092, "loss": 0.7249, "step": 925 }, { "epoch": 0.047213514233859194, "grad_norm": 0.08980020895136409, "learning_rate": 0.00023604060913705587, "loss": 0.7694, "step": 930 }, { "epoch": 0.0474673503318907, "grad_norm": 0.06355755979289256, "learning_rate": 0.00023730964467005077, "loss": 0.7519, "step": 935 }, { "epoch": 0.0477211864299222, "grad_norm": 0.0726778191121942, "learning_rate": 0.0002385786802030457, "loss": 0.7576, "step": 940 }, { "epoch": 0.0479750225279537, "grad_norm": 0.058859928139284715, "learning_rate": 0.00023984771573604062, "loss": 0.7244, "step": 945 }, { "epoch": 0.0482288586259852, "grad_norm": 0.06081900024027338, "learning_rate": 0.00024111675126903555, "loss": 0.7373, "step": 950 }, { "epoch": 0.0484826947240167, "grad_norm": 0.061248646181951284, "learning_rate": 0.00024238578680203047, "loss": 0.7822, "step": 955 }, { "epoch": 0.0487365308220482, "grad_norm": 0.06672583029591972, "learning_rate": 0.0002436548223350254, "loss": 0.7488, "step": 960 }, { "epoch": 0.04899036692007971, "grad_norm": 0.0572603692641915, "learning_rate": 0.0002449238578680203, "loss": 0.7198, "step": 965 }, { "epoch": 0.04924420301811121, "grad_norm": 0.06681173443942526, "learning_rate": 0.0002461928934010152, "loss": 0.7525, "step": 970 }, { "epoch": 0.04949803911614271, "grad_norm": 0.07257418993579445, "learning_rate": 0.00024746192893401015, "loss": 0.7395, "step": 975 }, { "epoch": 0.04975187521417421, "grad_norm": 0.07302937635349391, "learning_rate": 0.0002487309644670051, "loss": 0.7372, "step": 980 }, { "epoch": 0.05000571131220571, "grad_norm": 0.06996794132833503, "learning_rate": 0.00025, "loss": 0.729, "step": 985 }, { "epoch": 0.05025954741023721, "grad_norm": 0.05788422679267036, "learning_rate": 0.00025126903553299493, "loss": 0.7263, "step": 990 }, { "epoch": 0.050513383508268714, "grad_norm": 0.06234697260625598, "learning_rate": 0.00025253807106598986, "loss": 0.7245, "step": 995 }, { "epoch": 0.050767219606300214, "grad_norm": 0.058460398878268476, "learning_rate": 0.0002538071065989848, "loss": 0.7382, "step": 1000 }, { "epoch": 0.051021055704331714, "grad_norm": 0.05925618041244326, "learning_rate": 0.0002550761421319797, "loss": 0.7431, "step": 1005 }, { "epoch": 0.051274891802363214, "grad_norm": 0.059944269822413306, "learning_rate": 0.00025634517766497464, "loss": 0.7556, "step": 1010 }, { "epoch": 0.051528727900394714, "grad_norm": 0.05405315094033796, "learning_rate": 0.00025761421319796956, "loss": 0.6909, "step": 1015 }, { "epoch": 0.05178256399842621, "grad_norm": 0.05752428711722885, "learning_rate": 0.0002588832487309645, "loss": 0.7232, "step": 1020 }, { "epoch": 0.05203640009645772, "grad_norm": 0.06068982302190885, "learning_rate": 0.00026015228426395936, "loss": 0.7159, "step": 1025 }, { "epoch": 0.05229023619448922, "grad_norm": 0.06926054723257352, "learning_rate": 0.00026142131979695434, "loss": 0.7097, "step": 1030 }, { "epoch": 0.05254407229252072, "grad_norm": 0.05944640426895496, "learning_rate": 0.0002626903553299492, "loss": 0.7266, "step": 1035 }, { "epoch": 0.05279790839055222, "grad_norm": 0.05911056311965479, "learning_rate": 0.0002639593908629442, "loss": 0.7034, "step": 1040 }, { "epoch": 0.05305174448858372, "grad_norm": 0.05404273390108656, "learning_rate": 0.00026522842639593907, "loss": 0.7263, "step": 1045 }, { "epoch": 0.05330558058661522, "grad_norm": 0.0681569692981572, "learning_rate": 0.00026649746192893405, "loss": 0.709, "step": 1050 }, { "epoch": 0.05355941668464673, "grad_norm": 0.061027222712780306, "learning_rate": 0.0002677664974619289, "loss": 0.745, "step": 1055 }, { "epoch": 0.05381325278267823, "grad_norm": 0.060423144178792326, "learning_rate": 0.00026903553299492385, "loss": 0.7525, "step": 1060 }, { "epoch": 0.05406708888070973, "grad_norm": 0.05967597970626861, "learning_rate": 0.00027030456852791877, "loss": 0.739, "step": 1065 }, { "epoch": 0.05432092497874123, "grad_norm": 0.06674697326887237, "learning_rate": 0.0002715736040609137, "loss": 0.754, "step": 1070 }, { "epoch": 0.054574761076772726, "grad_norm": 0.06984166791857392, "learning_rate": 0.0002728426395939086, "loss": 0.7347, "step": 1075 }, { "epoch": 0.054828597174804226, "grad_norm": 0.05660547722828721, "learning_rate": 0.00027411167512690355, "loss": 0.7372, "step": 1080 }, { "epoch": 0.05508243327283573, "grad_norm": 0.061731160855571954, "learning_rate": 0.0002753807106598985, "loss": 0.7273, "step": 1085 }, { "epoch": 0.05533626937086723, "grad_norm": 0.05843567186487286, "learning_rate": 0.0002766497461928934, "loss": 0.7405, "step": 1090 }, { "epoch": 0.05559010546889873, "grad_norm": 0.06160351540468081, "learning_rate": 0.0002779187817258883, "loss": 0.7392, "step": 1095 }, { "epoch": 0.05584394156693023, "grad_norm": 0.06302649918893416, "learning_rate": 0.00027918781725888326, "loss": 0.7169, "step": 1100 }, { "epoch": 0.05609777766496173, "grad_norm": 0.26748522478789255, "learning_rate": 0.0002804568527918782, "loss": 0.7258, "step": 1105 }, { "epoch": 0.05635161376299323, "grad_norm": 0.060283031147792536, "learning_rate": 0.0002817258883248731, "loss": 0.7216, "step": 1110 }, { "epoch": 0.05660544986102474, "grad_norm": 0.06726444207819834, "learning_rate": 0.00028299492385786804, "loss": 0.751, "step": 1115 }, { "epoch": 0.05685928595905624, "grad_norm": 0.09905598633925246, "learning_rate": 0.00028426395939086296, "loss": 0.7714, "step": 1120 }, { "epoch": 0.05711312205708774, "grad_norm": 0.1277329704703035, "learning_rate": 0.0002855329949238579, "loss": 0.7602, "step": 1125 }, { "epoch": 0.05736695815511924, "grad_norm": 0.20723659499952354, "learning_rate": 0.0002868020304568528, "loss": 0.7063, "step": 1130 }, { "epoch": 0.05762079425315074, "grad_norm": 0.08585174275787383, "learning_rate": 0.00028807106598984774, "loss": 0.7199, "step": 1135 }, { "epoch": 0.05787463035118224, "grad_norm": 0.0660384074730241, "learning_rate": 0.0002893401015228426, "loss": 0.7308, "step": 1140 }, { "epoch": 0.05812846644921374, "grad_norm": 0.19383133097044608, "learning_rate": 0.0002906091370558376, "loss": 0.9052, "step": 1145 }, { "epoch": 0.058382302547245246, "grad_norm": 0.24673064263656, "learning_rate": 0.00029187817258883247, "loss": 0.7624, "step": 1150 }, { "epoch": 0.058636138645276746, "grad_norm": 32.757348375145966, "learning_rate": 0.00029314720812182745, "loss": 0.7527, "step": 1155 }, { "epoch": 0.058889974743308246, "grad_norm": 0.09916235027198479, "learning_rate": 0.0002944162436548223, "loss": 0.7613, "step": 1160 }, { "epoch": 0.059143810841339746, "grad_norm": 0.12917795113084668, "learning_rate": 0.0002956852791878173, "loss": 0.7631, "step": 1165 }, { "epoch": 0.059397646939371246, "grad_norm": 0.08883151007488581, "learning_rate": 0.00029695431472081217, "loss": 0.7412, "step": 1170 }, { "epoch": 0.059651483037402746, "grad_norm": 0.08449886407082698, "learning_rate": 0.0002982233502538071, "loss": 0.7789, "step": 1175 }, { "epoch": 0.05990531913543425, "grad_norm": 0.12057734525050375, "learning_rate": 0.000299492385786802, "loss": 0.7513, "step": 1180 }, { "epoch": 0.06015915523346575, "grad_norm": 0.16122305869580125, "learning_rate": 0.00030076142131979695, "loss": 0.8202, "step": 1185 }, { "epoch": 0.06041299133149725, "grad_norm": 0.09478283020933216, "learning_rate": 0.0003020304568527919, "loss": 0.7556, "step": 1190 }, { "epoch": 0.06066682742952875, "grad_norm": 1.1482190144535993, "learning_rate": 0.0003032994923857868, "loss": 0.742, "step": 1195 }, { "epoch": 0.06092066352756025, "grad_norm": 0.08948971775561998, "learning_rate": 0.00030456852791878173, "loss": 0.7897, "step": 1200 }, { "epoch": 0.06117449962559175, "grad_norm": 0.11543924190628982, "learning_rate": 0.00030583756345177666, "loss": 0.7372, "step": 1205 }, { "epoch": 0.06142833572362326, "grad_norm": 0.06522079820810416, "learning_rate": 0.0003071065989847716, "loss": 0.7409, "step": 1210 }, { "epoch": 0.06168217182165476, "grad_norm": 0.05653686696839828, "learning_rate": 0.0003083756345177665, "loss": 0.7432, "step": 1215 }, { "epoch": 0.06193600791968626, "grad_norm": 0.06485576389961441, "learning_rate": 0.00030964467005076144, "loss": 0.7637, "step": 1220 }, { "epoch": 0.06218984401771776, "grad_norm": 0.06287754156459976, "learning_rate": 0.00031091370558375636, "loss": 0.7496, "step": 1225 }, { "epoch": 0.06244368011574926, "grad_norm": 0.06275169467007742, "learning_rate": 0.0003121827411167513, "loss": 0.7356, "step": 1230 }, { "epoch": 0.06269751621378077, "grad_norm": 0.1431156053310293, "learning_rate": 0.0003134517766497462, "loss": 0.7021, "step": 1235 }, { "epoch": 0.06295135231181226, "grad_norm": 0.0698152906671079, "learning_rate": 0.00031472081218274114, "loss": 0.7369, "step": 1240 }, { "epoch": 0.06320518840984377, "grad_norm": 0.06474169533744434, "learning_rate": 0.000315989847715736, "loss": 0.7431, "step": 1245 }, { "epoch": 0.06345902450787526, "grad_norm": 0.05656964302149857, "learning_rate": 0.000317258883248731, "loss": 0.7279, "step": 1250 }, { "epoch": 0.06371286060590677, "grad_norm": 0.06008444247403088, "learning_rate": 0.00031852791878172587, "loss": 0.7255, "step": 1255 }, { "epoch": 0.06396669670393827, "grad_norm": 0.06258676825297353, "learning_rate": 0.00031979695431472085, "loss": 0.7294, "step": 1260 }, { "epoch": 0.06422053280196977, "grad_norm": 0.06101644314132169, "learning_rate": 0.0003210659898477157, "loss": 0.7495, "step": 1265 }, { "epoch": 0.06447436890000127, "grad_norm": 0.05784297759132409, "learning_rate": 0.0003223350253807107, "loss": 0.7089, "step": 1270 }, { "epoch": 0.06472820499803277, "grad_norm": 0.06608430212446814, "learning_rate": 0.00032360406091370557, "loss": 0.729, "step": 1275 }, { "epoch": 0.06498204109606427, "grad_norm": 0.06682999306491608, "learning_rate": 0.0003248730964467005, "loss": 0.7659, "step": 1280 }, { "epoch": 0.06523587719409578, "grad_norm": 0.05567632533610063, "learning_rate": 0.0003261421319796954, "loss": 0.7043, "step": 1285 }, { "epoch": 0.06548971329212727, "grad_norm": 0.08049433253072921, "learning_rate": 0.00032741116751269035, "loss": 0.7045, "step": 1290 }, { "epoch": 0.06574354939015878, "grad_norm": 0.06943993107179286, "learning_rate": 0.0003286802030456853, "loss": 0.7442, "step": 1295 }, { "epoch": 0.06599738548819027, "grad_norm": 0.3124576265680848, "learning_rate": 0.0003299492385786802, "loss": 0.7156, "step": 1300 }, { "epoch": 0.06625122158622178, "grad_norm": 0.05993979750837367, "learning_rate": 0.00033121827411167513, "loss": 0.7378, "step": 1305 }, { "epoch": 0.06650505768425327, "grad_norm": 0.05955364139261034, "learning_rate": 0.00033248730964467006, "loss": 0.7258, "step": 1310 }, { "epoch": 0.06675889378228478, "grad_norm": 0.08613170534764741, "learning_rate": 0.00033375634517766493, "loss": 0.7505, "step": 1315 }, { "epoch": 0.06701272988031629, "grad_norm": 0.06998830116145732, "learning_rate": 0.0003350253807106599, "loss": 0.7261, "step": 1320 }, { "epoch": 0.06726656597834778, "grad_norm": 0.05795740324311744, "learning_rate": 0.00033629441624365484, "loss": 0.7025, "step": 1325 }, { "epoch": 0.06752040207637929, "grad_norm": 0.055676641145626066, "learning_rate": 0.00033756345177664976, "loss": 0.72, "step": 1330 }, { "epoch": 0.06777423817441078, "grad_norm": 0.05604862800727641, "learning_rate": 0.0003388324873096447, "loss": 0.7285, "step": 1335 }, { "epoch": 0.06802807427244228, "grad_norm": 0.05356518336455629, "learning_rate": 0.0003401015228426396, "loss": 0.7386, "step": 1340 }, { "epoch": 0.06828191037047379, "grad_norm": 0.09605226497693169, "learning_rate": 0.00034137055837563454, "loss": 0.7118, "step": 1345 }, { "epoch": 0.06853574646850528, "grad_norm": 0.06982897061697936, "learning_rate": 0.0003426395939086294, "loss": 0.6877, "step": 1350 }, { "epoch": 0.06878958256653679, "grad_norm": 0.07303904652751834, "learning_rate": 0.0003439086294416244, "loss": 0.7174, "step": 1355 }, { "epoch": 0.06904341866456828, "grad_norm": 0.07291571688385211, "learning_rate": 0.00034517766497461927, "loss": 0.7344, "step": 1360 }, { "epoch": 0.06929725476259979, "grad_norm": 0.07543740254911013, "learning_rate": 0.00034644670050761425, "loss": 0.7053, "step": 1365 }, { "epoch": 0.06955109086063128, "grad_norm": 0.06625493235058802, "learning_rate": 0.0003477157360406091, "loss": 0.7, "step": 1370 }, { "epoch": 0.06980492695866279, "grad_norm": 0.05544221962273842, "learning_rate": 0.0003489847715736041, "loss": 0.7211, "step": 1375 }, { "epoch": 0.0700587630566943, "grad_norm": 0.07915130432819355, "learning_rate": 0.00035025380710659897, "loss": 0.7113, "step": 1380 }, { "epoch": 0.07031259915472579, "grad_norm": 0.37754027559534814, "learning_rate": 0.00035152284263959395, "loss": 0.7025, "step": 1385 }, { "epoch": 0.0705664352527573, "grad_norm": 0.12584003699209365, "learning_rate": 0.0003527918781725888, "loss": 0.7465, "step": 1390 }, { "epoch": 0.07082027135078879, "grad_norm": 0.06582449891223112, "learning_rate": 0.00035406091370558375, "loss": 0.7178, "step": 1395 }, { "epoch": 0.0710741074488203, "grad_norm": 0.06342116685715943, "learning_rate": 0.0003553299492385787, "loss": 0.7553, "step": 1400 }, { "epoch": 0.0713279435468518, "grad_norm": 0.062008656515146095, "learning_rate": 0.0003565989847715736, "loss": 0.7584, "step": 1405 }, { "epoch": 0.0715817796448833, "grad_norm": 0.15575295778070705, "learning_rate": 0.00035786802030456853, "loss": 0.7721, "step": 1410 }, { "epoch": 0.0718356157429148, "grad_norm": 0.07024393682133559, "learning_rate": 0.00035913705583756346, "loss": 0.716, "step": 1415 }, { "epoch": 0.0720894518409463, "grad_norm": 0.06141309488998922, "learning_rate": 0.0003604060913705584, "loss": 0.7537, "step": 1420 }, { "epoch": 0.0723432879389778, "grad_norm": 0.05848534147436462, "learning_rate": 0.0003616751269035533, "loss": 0.7358, "step": 1425 }, { "epoch": 0.0725971240370093, "grad_norm": 0.05896313633341948, "learning_rate": 0.00036294416243654823, "loss": 0.7163, "step": 1430 }, { "epoch": 0.0728509601350408, "grad_norm": 0.0612049129333866, "learning_rate": 0.00036421319796954316, "loss": 0.7278, "step": 1435 }, { "epoch": 0.07310479623307231, "grad_norm": 0.20510721585245476, "learning_rate": 0.0003654822335025381, "loss": 0.7204, "step": 1440 }, { "epoch": 0.0733586323311038, "grad_norm": 0.06353888649819851, "learning_rate": 0.000366751269035533, "loss": 0.703, "step": 1445 }, { "epoch": 0.07361246842913531, "grad_norm": 0.11598595193927975, "learning_rate": 0.00036802030456852794, "loss": 0.7353, "step": 1450 }, { "epoch": 0.0738663045271668, "grad_norm": 0.06735218740803854, "learning_rate": 0.00036928934010152287, "loss": 0.7387, "step": 1455 }, { "epoch": 0.07412014062519831, "grad_norm": 0.056456597607688834, "learning_rate": 0.0003705583756345178, "loss": 0.7244, "step": 1460 }, { "epoch": 0.0743739767232298, "grad_norm": 0.06695445879742773, "learning_rate": 0.00037182741116751266, "loss": 0.733, "step": 1465 }, { "epoch": 0.07462781282126131, "grad_norm": 0.062469347353580236, "learning_rate": 0.00037309644670050765, "loss": 0.7283, "step": 1470 }, { "epoch": 0.07488164891929282, "grad_norm": 0.10743187268032847, "learning_rate": 0.0003743654822335025, "loss": 0.7627, "step": 1475 }, { "epoch": 0.07513548501732431, "grad_norm": 0.06985614418900853, "learning_rate": 0.0003756345177664975, "loss": 0.7398, "step": 1480 }, { "epoch": 0.07538932111535582, "grad_norm": 4.112242925569732, "learning_rate": 0.00037690355329949237, "loss": 0.7503, "step": 1485 }, { "epoch": 0.07564315721338731, "grad_norm": 3.056249205800858, "learning_rate": 0.00037817258883248735, "loss": 0.7365, "step": 1490 }, { "epoch": 0.07589699331141882, "grad_norm": 0.08367423229399179, "learning_rate": 0.0003794416243654822, "loss": 0.751, "step": 1495 }, { "epoch": 0.07615082940945032, "grad_norm": 0.17368019423571526, "learning_rate": 0.00038071065989847715, "loss": 0.7447, "step": 1500 }, { "epoch": 0.07640466550748182, "grad_norm": 0.06787177882481868, "learning_rate": 0.0003819796954314721, "loss": 0.7489, "step": 1505 }, { "epoch": 0.07665850160551332, "grad_norm": 0.07169631398568231, "learning_rate": 0.000383248730964467, "loss": 0.7393, "step": 1510 }, { "epoch": 0.07691233770354482, "grad_norm": 0.05749839111352872, "learning_rate": 0.00038451776649746193, "loss": 0.721, "step": 1515 }, { "epoch": 0.07716617380157632, "grad_norm": 0.08575887204955374, "learning_rate": 0.00038578680203045685, "loss": 0.7128, "step": 1520 }, { "epoch": 0.07742000989960782, "grad_norm": 0.05613491181829679, "learning_rate": 0.0003870558375634518, "loss": 0.7338, "step": 1525 }, { "epoch": 0.07767384599763932, "grad_norm": 0.06300181911559392, "learning_rate": 0.0003883248730964467, "loss": 0.7337, "step": 1530 }, { "epoch": 0.07792768209567083, "grad_norm": 0.07134915135851151, "learning_rate": 0.00038959390862944163, "loss": 0.7629, "step": 1535 }, { "epoch": 0.07818151819370232, "grad_norm": 0.05162935081471609, "learning_rate": 0.00039086294416243656, "loss": 0.6955, "step": 1540 }, { "epoch": 0.07843535429173383, "grad_norm": 0.06414129871881698, "learning_rate": 0.0003921319796954315, "loss": 0.7301, "step": 1545 }, { "epoch": 0.07868919038976532, "grad_norm": 0.05373549484924304, "learning_rate": 0.0003934010152284264, "loss": 0.6976, "step": 1550 }, { "epoch": 0.07894302648779683, "grad_norm": 0.06837620727230255, "learning_rate": 0.00039467005076142134, "loss": 0.6985, "step": 1555 }, { "epoch": 0.07919686258582834, "grad_norm": 0.07846375652980406, "learning_rate": 0.00039593908629441627, "loss": 0.7338, "step": 1560 }, { "epoch": 0.07945069868385983, "grad_norm": 0.06174362803606399, "learning_rate": 0.0003972081218274112, "loss": 0.7168, "step": 1565 }, { "epoch": 0.07970453478189134, "grad_norm": 0.05882865445136937, "learning_rate": 0.00039847715736040606, "loss": 0.7184, "step": 1570 }, { "epoch": 0.07995837087992283, "grad_norm": 0.0493701696839989, "learning_rate": 0.00039974619289340104, "loss": 0.7053, "step": 1575 }, { "epoch": 0.08021220697795434, "grad_norm": 0.054577428336826876, "learning_rate": 0.0004010152284263959, "loss": 0.7315, "step": 1580 }, { "epoch": 0.08046604307598583, "grad_norm": 0.05891870223720398, "learning_rate": 0.0004022842639593909, "loss": 0.708, "step": 1585 }, { "epoch": 0.08071987917401734, "grad_norm": 0.09122730379802985, "learning_rate": 0.00040355329949238577, "loss": 0.6886, "step": 1590 }, { "epoch": 0.08097371527204884, "grad_norm": 0.05434428539871062, "learning_rate": 0.00040482233502538075, "loss": 0.7235, "step": 1595 }, { "epoch": 0.08122755137008034, "grad_norm": 0.057406938749746325, "learning_rate": 0.0004060913705583756, "loss": 0.6998, "step": 1600 }, { "epoch": 0.08148138746811184, "grad_norm": 0.05624064887105565, "learning_rate": 0.0004073604060913706, "loss": 0.7167, "step": 1605 }, { "epoch": 0.08173522356614334, "grad_norm": 0.08475156677013762, "learning_rate": 0.0004086294416243655, "loss": 0.717, "step": 1610 }, { "epoch": 0.08198905966417484, "grad_norm": 0.061648445637681494, "learning_rate": 0.0004098984771573604, "loss": 0.7219, "step": 1615 }, { "epoch": 0.08224289576220635, "grad_norm": 0.05747063059303642, "learning_rate": 0.00041116751269035533, "loss": 0.698, "step": 1620 }, { "epoch": 0.08249673186023784, "grad_norm": 0.0531564390368294, "learning_rate": 0.00041243654822335025, "loss": 0.6933, "step": 1625 }, { "epoch": 0.08275056795826935, "grad_norm": 0.0531546385182133, "learning_rate": 0.0004137055837563452, "loss": 0.7269, "step": 1630 }, { "epoch": 0.08300440405630084, "grad_norm": 0.05287338545106486, "learning_rate": 0.0004149746192893401, "loss": 0.7225, "step": 1635 }, { "epoch": 0.08325824015433235, "grad_norm": 0.05148433899438861, "learning_rate": 0.0004162436548223351, "loss": 0.7105, "step": 1640 }, { "epoch": 0.08351207625236384, "grad_norm": 0.05687600911185101, "learning_rate": 0.00041751269035532996, "loss": 0.7124, "step": 1645 }, { "epoch": 0.08376591235039535, "grad_norm": 0.05524705398314237, "learning_rate": 0.0004187817258883249, "loss": 0.7175, "step": 1650 }, { "epoch": 0.08401974844842686, "grad_norm": 0.05772786142305999, "learning_rate": 0.0004200507614213198, "loss": 0.72, "step": 1655 }, { "epoch": 0.08427358454645835, "grad_norm": 0.06460258179361435, "learning_rate": 0.00042131979695431474, "loss": 0.7155, "step": 1660 }, { "epoch": 0.08452742064448986, "grad_norm": 0.08154444806905058, "learning_rate": 0.00042258883248730967, "loss": 0.7249, "step": 1665 }, { "epoch": 0.08478125674252135, "grad_norm": 0.07036154319504197, "learning_rate": 0.0004238578680203046, "loss": 0.7018, "step": 1670 }, { "epoch": 0.08503509284055286, "grad_norm": 0.07974859335868449, "learning_rate": 0.0004251269035532995, "loss": 0.6848, "step": 1675 }, { "epoch": 0.08528892893858436, "grad_norm": 0.07106622592720147, "learning_rate": 0.00042639593908629444, "loss": 0.7306, "step": 1680 }, { "epoch": 0.08554276503661586, "grad_norm": 0.07520683130587769, "learning_rate": 0.0004276649746192893, "loss": 0.6777, "step": 1685 }, { "epoch": 0.08579660113464736, "grad_norm": 0.060430901146499016, "learning_rate": 0.0004289340101522843, "loss": 0.7051, "step": 1690 }, { "epoch": 0.08605043723267886, "grad_norm": 0.0635450282792058, "learning_rate": 0.00043020304568527917, "loss": 0.6913, "step": 1695 }, { "epoch": 0.08630427333071036, "grad_norm": 0.04972958060129512, "learning_rate": 0.00043147208121827415, "loss": 0.7135, "step": 1700 }, { "epoch": 0.08655810942874186, "grad_norm": 0.04944823830530392, "learning_rate": 0.000432741116751269, "loss": 0.7038, "step": 1705 }, { "epoch": 0.08681194552677336, "grad_norm": 0.0567985625555269, "learning_rate": 0.000434010152284264, "loss": 0.7287, "step": 1710 }, { "epoch": 0.08706578162480487, "grad_norm": 0.051682155754569976, "learning_rate": 0.0004352791878172589, "loss": 0.6989, "step": 1715 }, { "epoch": 0.08731961772283636, "grad_norm": 0.057888169540962604, "learning_rate": 0.0004365482233502538, "loss": 0.6968, "step": 1720 }, { "epoch": 0.08757345382086787, "grad_norm": 0.04580235344300138, "learning_rate": 0.00043781725888324873, "loss": 0.7062, "step": 1725 }, { "epoch": 0.08782728991889936, "grad_norm": 0.05599349020439277, "learning_rate": 0.00043908629441624365, "loss": 0.7211, "step": 1730 }, { "epoch": 0.08808112601693087, "grad_norm": 0.06293126880283036, "learning_rate": 0.0004403553299492386, "loss": 0.7035, "step": 1735 }, { "epoch": 0.08833496211496236, "grad_norm": 0.05082245406055062, "learning_rate": 0.0004416243654822335, "loss": 0.674, "step": 1740 }, { "epoch": 0.08858879821299387, "grad_norm": 0.05354641526756831, "learning_rate": 0.00044289340101522843, "loss": 0.6947, "step": 1745 }, { "epoch": 0.08884263431102538, "grad_norm": 0.05188231038829293, "learning_rate": 0.00044416243654822336, "loss": 0.6985, "step": 1750 }, { "epoch": 0.08909647040905687, "grad_norm": 0.28571205654683896, "learning_rate": 0.0004454314720812183, "loss": 0.7366, "step": 1755 }, { "epoch": 0.08935030650708838, "grad_norm": 0.05009396851866736, "learning_rate": 0.0004467005076142132, "loss": 0.7037, "step": 1760 }, { "epoch": 0.08960414260511987, "grad_norm": 0.057008248968552507, "learning_rate": 0.00044796954314720814, "loss": 0.7004, "step": 1765 }, { "epoch": 0.08985797870315138, "grad_norm": 0.08570192402076927, "learning_rate": 0.00044923857868020306, "loss": 0.6962, "step": 1770 }, { "epoch": 0.09011181480118288, "grad_norm": 0.07466469422515871, "learning_rate": 0.000450507614213198, "loss": 0.6782, "step": 1775 }, { "epoch": 0.09036565089921438, "grad_norm": 0.050896499358629874, "learning_rate": 0.0004517766497461929, "loss": 0.6832, "step": 1780 }, { "epoch": 0.09061948699724588, "grad_norm": 0.05086157843514099, "learning_rate": 0.00045304568527918784, "loss": 0.7398, "step": 1785 }, { "epoch": 0.09087332309527738, "grad_norm": 0.048846685380405284, "learning_rate": 0.0004543147208121827, "loss": 0.7125, "step": 1790 }, { "epoch": 0.09112715919330888, "grad_norm": 0.06253755883118842, "learning_rate": 0.0004555837563451777, "loss": 0.7122, "step": 1795 }, { "epoch": 0.09138099529134038, "grad_norm": 0.0511560475400958, "learning_rate": 0.00045685279187817257, "loss": 0.7291, "step": 1800 }, { "epoch": 0.09163483138937188, "grad_norm": 0.052121572152721875, "learning_rate": 0.00045812182741116755, "loss": 0.6912, "step": 1805 }, { "epoch": 0.09188866748740339, "grad_norm": 0.049287056641165186, "learning_rate": 0.0004593908629441624, "loss": 0.6776, "step": 1810 }, { "epoch": 0.09214250358543488, "grad_norm": 0.04890540112646688, "learning_rate": 0.0004606598984771574, "loss": 0.6833, "step": 1815 }, { "epoch": 0.09239633968346639, "grad_norm": 0.0640968907329142, "learning_rate": 0.0004619289340101523, "loss": 0.7165, "step": 1820 }, { "epoch": 0.09265017578149788, "grad_norm": 0.09373858909563126, "learning_rate": 0.0004631979695431472, "loss": 0.6946, "step": 1825 }, { "epoch": 0.09290401187952939, "grad_norm": 0.0502898696426856, "learning_rate": 0.0004644670050761421, "loss": 0.7373, "step": 1830 }, { "epoch": 0.0931578479775609, "grad_norm": 0.052116651580975205, "learning_rate": 0.00046573604060913705, "loss": 0.6888, "step": 1835 }, { "epoch": 0.09341168407559239, "grad_norm": 0.06418059125424298, "learning_rate": 0.000467005076142132, "loss": 0.6905, "step": 1840 }, { "epoch": 0.0936655201736239, "grad_norm": 0.06311317514924268, "learning_rate": 0.0004682741116751269, "loss": 0.6741, "step": 1845 }, { "epoch": 0.09391935627165539, "grad_norm": 0.04939022002771504, "learning_rate": 0.00046954314720812183, "loss": 0.7203, "step": 1850 }, { "epoch": 0.0941731923696869, "grad_norm": 0.05910410775267137, "learning_rate": 0.00047081218274111676, "loss": 0.7099, "step": 1855 }, { "epoch": 0.09442702846771839, "grad_norm": 0.0814151828326972, "learning_rate": 0.00047208121827411174, "loss": 0.7248, "step": 1860 }, { "epoch": 0.0946808645657499, "grad_norm": 0.8076656010487256, "learning_rate": 0.0004733502538071066, "loss": 0.7253, "step": 1865 }, { "epoch": 0.0949347006637814, "grad_norm": 0.06765805196660898, "learning_rate": 0.00047461928934010154, "loss": 0.6885, "step": 1870 }, { "epoch": 0.0951885367618129, "grad_norm": 0.05841136910440899, "learning_rate": 0.00047588832487309646, "loss": 0.7059, "step": 1875 }, { "epoch": 0.0954423728598444, "grad_norm": 0.06300415339926192, "learning_rate": 0.0004771573604060914, "loss": 0.7087, "step": 1880 }, { "epoch": 0.0956962089578759, "grad_norm": 0.0740423426972598, "learning_rate": 0.0004784263959390863, "loss": 0.6992, "step": 1885 }, { "epoch": 0.0959500450559074, "grad_norm": 0.06612814847151591, "learning_rate": 0.00047969543147208124, "loss": 0.713, "step": 1890 }, { "epoch": 0.09620388115393891, "grad_norm": 0.05216005232585783, "learning_rate": 0.00048096446700507617, "loss": 0.707, "step": 1895 }, { "epoch": 0.0964577172519704, "grad_norm": 0.05326644260558058, "learning_rate": 0.0004822335025380711, "loss": 0.7025, "step": 1900 }, { "epoch": 0.09671155335000191, "grad_norm": 0.06471751560857093, "learning_rate": 0.00048350253807106597, "loss": 0.7059, "step": 1905 }, { "epoch": 0.0969653894480334, "grad_norm": 0.06373553800707545, "learning_rate": 0.00048477157360406095, "loss": 0.7124, "step": 1910 }, { "epoch": 0.09721922554606491, "grad_norm": 0.07107687339224335, "learning_rate": 0.0004860406091370558, "loss": 0.7199, "step": 1915 }, { "epoch": 0.0974730616440964, "grad_norm": 0.07062703466342107, "learning_rate": 0.0004873096446700508, "loss": 0.7306, "step": 1920 }, { "epoch": 0.09772689774212791, "grad_norm": 0.05938067294182585, "learning_rate": 0.0004885786802030457, "loss": 0.683, "step": 1925 }, { "epoch": 0.09798073384015941, "grad_norm": 0.07174216980411427, "learning_rate": 0.0004898477157360406, "loss": 0.7168, "step": 1930 }, { "epoch": 0.09823456993819091, "grad_norm": 0.06009740027025641, "learning_rate": 0.0004911167512690356, "loss": 0.6961, "step": 1935 }, { "epoch": 0.09848840603622241, "grad_norm": 0.05206994171495008, "learning_rate": 0.0004923857868020305, "loss": 0.6947, "step": 1940 }, { "epoch": 0.09874224213425391, "grad_norm": 0.07987693756335262, "learning_rate": 0.0004936548223350254, "loss": 0.6895, "step": 1945 }, { "epoch": 0.09899607823228541, "grad_norm": 0.05144943467180292, "learning_rate": 0.0004949238578680203, "loss": 0.7062, "step": 1950 }, { "epoch": 0.09924991433031692, "grad_norm": 0.05082445787484508, "learning_rate": 0.0004961928934010153, "loss": 0.6943, "step": 1955 }, { "epoch": 0.09950375042834841, "grad_norm": 0.047141885648804095, "learning_rate": 0.0004974619289340102, "loss": 0.6917, "step": 1960 }, { "epoch": 0.09975758652637992, "grad_norm": 0.047197680147561615, "learning_rate": 0.0004987309644670051, "loss": 0.6746, "step": 1965 }, { "epoch": 0.10001142262441141, "grad_norm": 0.07137820513966749, "learning_rate": 0.0005, "loss": 0.6841, "step": 1970 }, { "epoch": 0.10026525872244292, "grad_norm": 0.052618795283310475, "learning_rate": 0.000501269035532995, "loss": 0.6897, "step": 1975 }, { "epoch": 0.10051909482047441, "grad_norm": 0.05796055424874039, "learning_rate": 0.0005025380710659899, "loss": 0.6861, "step": 1980 }, { "epoch": 0.10077293091850592, "grad_norm": 0.050447551050472175, "learning_rate": 0.0005038071065989847, "loss": 0.6834, "step": 1985 }, { "epoch": 0.10102676701653743, "grad_norm": 0.048726174649120095, "learning_rate": 0.0005050761421319797, "loss": 0.6813, "step": 1990 }, { "epoch": 0.10128060311456892, "grad_norm": 0.07440137618882016, "learning_rate": 0.0005063451776649747, "loss": 0.6647, "step": 1995 }, { "epoch": 0.10153443921260043, "grad_norm": 0.05076461271762827, "learning_rate": 0.0005076142131979696, "loss": 0.7137, "step": 2000 }, { "epoch": 0.10178827531063192, "grad_norm": 0.0471884569121775, "learning_rate": 0.0005088832487309644, "loss": 0.6887, "step": 2005 }, { "epoch": 0.10204211140866343, "grad_norm": 0.11880359913639196, "learning_rate": 0.0005101522842639594, "loss": 0.6919, "step": 2010 }, { "epoch": 0.10229594750669492, "grad_norm": 0.20996719664722965, "learning_rate": 0.0005114213197969543, "loss": 0.6802, "step": 2015 }, { "epoch": 0.10254978360472643, "grad_norm": 0.047803725376976, "learning_rate": 0.0005126903553299493, "loss": 0.7059, "step": 2020 }, { "epoch": 0.10280361970275793, "grad_norm": 0.0449286937065325, "learning_rate": 0.0005139593908629441, "loss": 0.7061, "step": 2025 }, { "epoch": 0.10305745580078943, "grad_norm": 0.052746260634763203, "learning_rate": 0.0005152284263959391, "loss": 0.6967, "step": 2030 }, { "epoch": 0.10331129189882093, "grad_norm": 0.04943616988366681, "learning_rate": 0.000516497461928934, "loss": 0.717, "step": 2035 }, { "epoch": 0.10356512799685243, "grad_norm": 0.049716392637897996, "learning_rate": 0.000517766497461929, "loss": 0.6805, "step": 2040 }, { "epoch": 0.10381896409488393, "grad_norm": 0.04637995606001784, "learning_rate": 0.0005190355329949239, "loss": 0.6779, "step": 2045 }, { "epoch": 0.10407280019291544, "grad_norm": 0.04774106805011772, "learning_rate": 0.0005203045685279187, "loss": 0.7253, "step": 2050 }, { "epoch": 0.10432663629094693, "grad_norm": 0.051793990117296816, "learning_rate": 0.0005215736040609137, "loss": 0.6626, "step": 2055 }, { "epoch": 0.10458047238897844, "grad_norm": 0.045546210431367966, "learning_rate": 0.0005228426395939087, "loss": 0.6842, "step": 2060 }, { "epoch": 0.10483430848700993, "grad_norm": 0.048518075416691675, "learning_rate": 0.0005241116751269036, "loss": 0.7328, "step": 2065 }, { "epoch": 0.10508814458504144, "grad_norm": 0.05679454235982254, "learning_rate": 0.0005253807106598984, "loss": 0.6876, "step": 2070 }, { "epoch": 0.10534198068307293, "grad_norm": 0.05098370288454924, "learning_rate": 0.0005266497461928934, "loss": 0.6929, "step": 2075 }, { "epoch": 0.10559581678110444, "grad_norm": 0.04168186472739525, "learning_rate": 0.0005279187817258884, "loss": 0.6769, "step": 2080 }, { "epoch": 0.10584965287913595, "grad_norm": 0.04581891098414401, "learning_rate": 0.0005291878172588833, "loss": 0.6754, "step": 2085 }, { "epoch": 0.10610348897716744, "grad_norm": 0.043468753842354504, "learning_rate": 0.0005304568527918781, "loss": 0.7335, "step": 2090 }, { "epoch": 0.10635732507519895, "grad_norm": 0.05308604744136886, "learning_rate": 0.0005317258883248731, "loss": 0.6921, "step": 2095 }, { "epoch": 0.10661116117323044, "grad_norm": 0.07051381632192111, "learning_rate": 0.0005329949238578681, "loss": 0.6755, "step": 2100 }, { "epoch": 0.10686499727126195, "grad_norm": 0.046426623183754255, "learning_rate": 0.000534263959390863, "loss": 0.7157, "step": 2105 }, { "epoch": 0.10711883336929345, "grad_norm": 0.052422754447883815, "learning_rate": 0.0005355329949238578, "loss": 0.7047, "step": 2110 }, { "epoch": 0.10737266946732495, "grad_norm": 0.07613534981689268, "learning_rate": 0.0005368020304568528, "loss": 0.7077, "step": 2115 }, { "epoch": 0.10762650556535645, "grad_norm": 0.053514202239991294, "learning_rate": 0.0005380710659898477, "loss": 0.6843, "step": 2120 }, { "epoch": 0.10788034166338795, "grad_norm": 0.04913041286768531, "learning_rate": 0.0005393401015228427, "loss": 0.6961, "step": 2125 }, { "epoch": 0.10813417776141945, "grad_norm": 0.0568300226701408, "learning_rate": 0.0005406091370558375, "loss": 0.6692, "step": 2130 }, { "epoch": 0.10838801385945095, "grad_norm": 0.04636934678676007, "learning_rate": 0.0005418781725888325, "loss": 0.6763, "step": 2135 }, { "epoch": 0.10864184995748245, "grad_norm": 0.05136400672323533, "learning_rate": 0.0005431472081218274, "loss": 0.6894, "step": 2140 }, { "epoch": 0.10889568605551396, "grad_norm": 0.04344668075028007, "learning_rate": 0.0005444162436548224, "loss": 0.6631, "step": 2145 }, { "epoch": 0.10914952215354545, "grad_norm": 0.0555972048428014, "learning_rate": 0.0005456852791878173, "loss": 0.6784, "step": 2150 }, { "epoch": 0.10940335825157696, "grad_norm": 0.06620172522346869, "learning_rate": 0.0005469543147208121, "loss": 0.6911, "step": 2155 }, { "epoch": 0.10965719434960845, "grad_norm": 0.0601809692146959, "learning_rate": 0.0005482233502538071, "loss": 0.7259, "step": 2160 }, { "epoch": 0.10991103044763996, "grad_norm": 0.044989749038749825, "learning_rate": 0.0005494923857868021, "loss": 0.7114, "step": 2165 }, { "epoch": 0.11016486654567147, "grad_norm": 0.04741683660493615, "learning_rate": 0.000550761421319797, "loss": 0.6949, "step": 2170 }, { "epoch": 0.11041870264370296, "grad_norm": 0.054064091770256034, "learning_rate": 0.0005520304568527918, "loss": 0.6774, "step": 2175 }, { "epoch": 0.11067253874173447, "grad_norm": 0.050772197507611055, "learning_rate": 0.0005532994923857868, "loss": 0.6747, "step": 2180 }, { "epoch": 0.11092637483976596, "grad_norm": 0.07122688113002712, "learning_rate": 0.0005545685279187818, "loss": 0.6751, "step": 2185 }, { "epoch": 0.11118021093779747, "grad_norm": 0.056605934899527, "learning_rate": 0.0005558375634517766, "loss": 0.7048, "step": 2190 }, { "epoch": 0.11143404703582896, "grad_norm": 0.049631694115174936, "learning_rate": 0.0005571065989847715, "loss": 0.6795, "step": 2195 }, { "epoch": 0.11168788313386047, "grad_norm": 0.05830993033392446, "learning_rate": 0.0005583756345177665, "loss": 0.6846, "step": 2200 }, { "epoch": 0.11194171923189197, "grad_norm": 0.044352746447960285, "learning_rate": 0.0005596446700507615, "loss": 0.6956, "step": 2205 }, { "epoch": 0.11219555532992347, "grad_norm": 0.04064168684790446, "learning_rate": 0.0005609137055837564, "loss": 0.6584, "step": 2210 }, { "epoch": 0.11244939142795497, "grad_norm": 0.05355535018905834, "learning_rate": 0.0005621827411167512, "loss": 0.6551, "step": 2215 }, { "epoch": 0.11270322752598647, "grad_norm": 0.04823668883816903, "learning_rate": 0.0005634517766497462, "loss": 0.7101, "step": 2220 }, { "epoch": 0.11295706362401797, "grad_norm": 0.04507099918690335, "learning_rate": 0.0005647208121827412, "loss": 0.6928, "step": 2225 }, { "epoch": 0.11321089972204948, "grad_norm": 0.04388054794584642, "learning_rate": 0.0005659898477157361, "loss": 0.6937, "step": 2230 }, { "epoch": 0.11346473582008097, "grad_norm": 0.04682924867200663, "learning_rate": 0.0005672588832487309, "loss": 0.6785, "step": 2235 }, { "epoch": 0.11371857191811248, "grad_norm": 0.06604610799465334, "learning_rate": 0.0005685279187817259, "loss": 0.6779, "step": 2240 }, { "epoch": 0.11397240801614397, "grad_norm": 0.06120863018511214, "learning_rate": 0.0005697969543147208, "loss": 0.6533, "step": 2245 }, { "epoch": 0.11422624411417548, "grad_norm": 0.05270150831774229, "learning_rate": 0.0005710659898477158, "loss": 0.6622, "step": 2250 }, { "epoch": 0.11448008021220697, "grad_norm": 0.054970737100826304, "learning_rate": 0.0005723350253807107, "loss": 0.682, "step": 2255 }, { "epoch": 0.11473391631023848, "grad_norm": 0.04567202978046955, "learning_rate": 0.0005736040609137056, "loss": 0.6642, "step": 2260 }, { "epoch": 0.11498775240826999, "grad_norm": 0.06810657291430826, "learning_rate": 0.0005748730964467005, "loss": 0.7241, "step": 2265 }, { "epoch": 0.11524158850630148, "grad_norm": 0.051106730998495775, "learning_rate": 0.0005761421319796955, "loss": 0.6964, "step": 2270 }, { "epoch": 0.11549542460433299, "grad_norm": 0.05289405047092881, "learning_rate": 0.0005774111675126904, "loss": 0.6951, "step": 2275 }, { "epoch": 0.11574926070236448, "grad_norm": 0.04803879297706406, "learning_rate": 0.0005786802030456852, "loss": 0.668, "step": 2280 }, { "epoch": 0.11600309680039599, "grad_norm": 0.05408216249103949, "learning_rate": 0.0005799492385786802, "loss": 0.6976, "step": 2285 }, { "epoch": 0.11625693289842748, "grad_norm": 0.046893742742678574, "learning_rate": 0.0005812182741116752, "loss": 0.6492, "step": 2290 }, { "epoch": 0.11651076899645899, "grad_norm": 0.05546942162432547, "learning_rate": 0.0005824873096446702, "loss": 0.6905, "step": 2295 }, { "epoch": 0.11676460509449049, "grad_norm": 0.044480364722733014, "learning_rate": 0.0005837563451776649, "loss": 0.6774, "step": 2300 }, { "epoch": 0.11701844119252199, "grad_norm": 0.058930815501948106, "learning_rate": 0.0005850253807106599, "loss": 0.7125, "step": 2305 }, { "epoch": 0.11727227729055349, "grad_norm": 0.06993707438691858, "learning_rate": 0.0005862944162436549, "loss": 0.6672, "step": 2310 }, { "epoch": 0.11752611338858499, "grad_norm": 0.07094737512117218, "learning_rate": 0.0005875634517766498, "loss": 0.6874, "step": 2315 }, { "epoch": 0.11777994948661649, "grad_norm": 0.04776737380254815, "learning_rate": 0.0005888324873096446, "loss": 0.6743, "step": 2320 }, { "epoch": 0.118033785584648, "grad_norm": 0.04754319568857845, "learning_rate": 0.0005901015228426396, "loss": 0.6955, "step": 2325 }, { "epoch": 0.11828762168267949, "grad_norm": 0.0530012975882747, "learning_rate": 0.0005913705583756346, "loss": 0.6741, "step": 2330 }, { "epoch": 0.118541457780711, "grad_norm": 0.047906343503061846, "learning_rate": 0.0005926395939086295, "loss": 0.6932, "step": 2335 }, { "epoch": 0.11879529387874249, "grad_norm": 0.04482089427430776, "learning_rate": 0.0005939086294416243, "loss": 0.6872, "step": 2340 }, { "epoch": 0.119049129976774, "grad_norm": 0.04529373048320046, "learning_rate": 0.0005951776649746193, "loss": 0.6626, "step": 2345 }, { "epoch": 0.11930296607480549, "grad_norm": 0.0476388588473774, "learning_rate": 0.0005964467005076142, "loss": 0.6791, "step": 2350 }, { "epoch": 0.119556802172837, "grad_norm": 0.04934267321682797, "learning_rate": 0.0005977157360406092, "loss": 0.686, "step": 2355 }, { "epoch": 0.1198106382708685, "grad_norm": 0.048244390716089255, "learning_rate": 0.000598984771573604, "loss": 0.6926, "step": 2360 }, { "epoch": 0.1200644743689, "grad_norm": 0.062491881852921594, "learning_rate": 0.000600253807106599, "loss": 0.7189, "step": 2365 }, { "epoch": 0.1203183104669315, "grad_norm": 0.06931903303967604, "learning_rate": 0.0006015228426395939, "loss": 0.712, "step": 2370 }, { "epoch": 0.120572146564963, "grad_norm": 0.05423697704542445, "learning_rate": 0.0006027918781725889, "loss": 0.6846, "step": 2375 }, { "epoch": 0.1208259826629945, "grad_norm": 0.05778931196460261, "learning_rate": 0.0006040609137055838, "loss": 0.693, "step": 2380 }, { "epoch": 0.12107981876102601, "grad_norm": 0.06091151475086323, "learning_rate": 0.0006053299492385786, "loss": 0.6676, "step": 2385 }, { "epoch": 0.1213336548590575, "grad_norm": 0.055216368348407444, "learning_rate": 0.0006065989847715736, "loss": 0.6724, "step": 2390 }, { "epoch": 0.12158749095708901, "grad_norm": 0.04670707090251174, "learning_rate": 0.0006078680203045686, "loss": 0.6651, "step": 2395 }, { "epoch": 0.1218413270551205, "grad_norm": 0.05746162859057901, "learning_rate": 0.0006091370558375635, "loss": 0.6942, "step": 2400 }, { "epoch": 0.12209516315315201, "grad_norm": 0.056859124253096104, "learning_rate": 0.0006104060913705583, "loss": 0.6698, "step": 2405 }, { "epoch": 0.1223489992511835, "grad_norm": 0.05339650908828867, "learning_rate": 0.0006116751269035533, "loss": 0.6844, "step": 2410 }, { "epoch": 0.12260283534921501, "grad_norm": 0.04544174695117297, "learning_rate": 0.0006129441624365483, "loss": 0.681, "step": 2415 }, { "epoch": 0.12285667144724652, "grad_norm": 0.047853644230645295, "learning_rate": 0.0006142131979695432, "loss": 0.6736, "step": 2420 }, { "epoch": 0.12311050754527801, "grad_norm": 0.044716395502977895, "learning_rate": 0.000615482233502538, "loss": 0.6692, "step": 2425 }, { "epoch": 0.12336434364330952, "grad_norm": 0.0440973721411255, "learning_rate": 0.000616751269035533, "loss": 0.6751, "step": 2430 }, { "epoch": 0.12361817974134101, "grad_norm": 0.04429030828005958, "learning_rate": 0.000618020304568528, "loss": 0.6906, "step": 2435 }, { "epoch": 0.12387201583937252, "grad_norm": 0.04787659061724324, "learning_rate": 0.0006192893401015229, "loss": 0.6818, "step": 2440 }, { "epoch": 0.12412585193740402, "grad_norm": 0.04128004321338866, "learning_rate": 0.0006205583756345177, "loss": 0.6588, "step": 2445 }, { "epoch": 0.12437968803543552, "grad_norm": 0.061281310808498836, "learning_rate": 0.0006218274111675127, "loss": 0.6794, "step": 2450 }, { "epoch": 0.12463352413346702, "grad_norm": 0.0452210479730751, "learning_rate": 0.0006230964467005076, "loss": 0.6981, "step": 2455 }, { "epoch": 0.12488736023149852, "grad_norm": 0.07279557551578562, "learning_rate": 0.0006243654822335026, "loss": 0.6425, "step": 2460 }, { "epoch": 0.12514119632953002, "grad_norm": 0.050867733799510144, "learning_rate": 0.0006256345177664974, "loss": 0.6822, "step": 2465 }, { "epoch": 0.12539503242756153, "grad_norm": 0.03997161618281022, "learning_rate": 0.0006269035532994924, "loss": 0.6798, "step": 2470 }, { "epoch": 0.12564886852559304, "grad_norm": 0.04666231998293777, "learning_rate": 0.0006281725888324873, "loss": 0.6669, "step": 2475 }, { "epoch": 0.12590270462362452, "grad_norm": 0.06993861990367087, "learning_rate": 0.0006294416243654823, "loss": 0.6769, "step": 2480 }, { "epoch": 0.12615654072165602, "grad_norm": 0.04460301577499877, "learning_rate": 0.0006307106598984772, "loss": 0.6702, "step": 2485 }, { "epoch": 0.12641037681968753, "grad_norm": 0.046300009587602504, "learning_rate": 0.000631979695431472, "loss": 0.6485, "step": 2490 }, { "epoch": 0.12666421291771904, "grad_norm": 0.04525513100008759, "learning_rate": 0.000633248730964467, "loss": 0.7022, "step": 2495 }, { "epoch": 0.12691804901575052, "grad_norm": 0.046659707052599364, "learning_rate": 0.000634517766497462, "loss": 0.6636, "step": 2500 }, { "epoch": 0.12717188511378202, "grad_norm": 0.04824843023874754, "learning_rate": 0.0006357868020304569, "loss": 0.6485, "step": 2505 }, { "epoch": 0.12742572121181353, "grad_norm": 0.04746300692436404, "learning_rate": 0.0006370558375634517, "loss": 0.7173, "step": 2510 }, { "epoch": 0.12767955730984504, "grad_norm": 0.04431626679908001, "learning_rate": 0.0006383248730964467, "loss": 0.6767, "step": 2515 }, { "epoch": 0.12793339340787654, "grad_norm": 0.04212599191521632, "learning_rate": 0.0006395939086294417, "loss": 0.6486, "step": 2520 }, { "epoch": 0.12818722950590802, "grad_norm": 0.044688664389786185, "learning_rate": 0.0006408629441624366, "loss": 0.6343, "step": 2525 }, { "epoch": 0.12844106560393953, "grad_norm": 0.04418877402589161, "learning_rate": 0.0006421319796954314, "loss": 0.6908, "step": 2530 }, { "epoch": 0.12869490170197104, "grad_norm": 0.04434768157491682, "learning_rate": 0.0006434010152284264, "loss": 0.6997, "step": 2535 }, { "epoch": 0.12894873780000254, "grad_norm": 0.04818401670766656, "learning_rate": 0.0006446700507614214, "loss": 0.6832, "step": 2540 }, { "epoch": 0.12920257389803405, "grad_norm": 0.04591714580956639, "learning_rate": 0.0006459390862944163, "loss": 0.662, "step": 2545 }, { "epoch": 0.12945640999606553, "grad_norm": 0.050750089447246204, "learning_rate": 0.0006472081218274111, "loss": 0.6859, "step": 2550 }, { "epoch": 0.12971024609409704, "grad_norm": 0.049535561340610365, "learning_rate": 0.0006484771573604061, "loss": 0.6929, "step": 2555 }, { "epoch": 0.12996408219212854, "grad_norm": 0.04625672713487381, "learning_rate": 0.000649746192893401, "loss": 0.6712, "step": 2560 }, { "epoch": 0.13021791829016005, "grad_norm": 0.0443189289054786, "learning_rate": 0.000651015228426396, "loss": 0.7355, "step": 2565 }, { "epoch": 0.13047175438819156, "grad_norm": 0.05280014157523752, "learning_rate": 0.0006522842639593908, "loss": 0.701, "step": 2570 }, { "epoch": 0.13072559048622304, "grad_norm": 0.04705146570879352, "learning_rate": 0.0006535532994923858, "loss": 0.6695, "step": 2575 }, { "epoch": 0.13097942658425454, "grad_norm": 0.0460576955250553, "learning_rate": 0.0006548223350253807, "loss": 0.692, "step": 2580 }, { "epoch": 0.13123326268228605, "grad_norm": 0.03813344619291145, "learning_rate": 0.0006560913705583757, "loss": 0.66, "step": 2585 }, { "epoch": 0.13148709878031756, "grad_norm": 0.04969973984569192, "learning_rate": 0.0006573604060913706, "loss": 0.6934, "step": 2590 }, { "epoch": 0.13174093487834904, "grad_norm": 0.042656040318584894, "learning_rate": 0.0006586294416243654, "loss": 0.6615, "step": 2595 }, { "epoch": 0.13199477097638054, "grad_norm": 0.04426457599994935, "learning_rate": 0.0006598984771573604, "loss": 0.7075, "step": 2600 }, { "epoch": 0.13224860707441205, "grad_norm": 0.04911188070281771, "learning_rate": 0.0006611675126903554, "loss": 0.663, "step": 2605 }, { "epoch": 0.13250244317244356, "grad_norm": 0.045086612880435376, "learning_rate": 0.0006624365482233503, "loss": 0.6769, "step": 2610 }, { "epoch": 0.13275627927047506, "grad_norm": 0.0806263949064106, "learning_rate": 0.0006637055837563451, "loss": 0.6701, "step": 2615 }, { "epoch": 0.13301011536850654, "grad_norm": 0.09236793730312937, "learning_rate": 0.0006649746192893401, "loss": 0.648, "step": 2620 }, { "epoch": 0.13326395146653805, "grad_norm": 0.045322993172678835, "learning_rate": 0.0006662436548223351, "loss": 0.6718, "step": 2625 }, { "epoch": 0.13351778756456956, "grad_norm": 0.04182199738451879, "learning_rate": 0.0006675126903553299, "loss": 0.6724, "step": 2630 }, { "epoch": 0.13377162366260106, "grad_norm": 0.042921119598924244, "learning_rate": 0.0006687817258883248, "loss": 0.6813, "step": 2635 }, { "epoch": 0.13402545976063257, "grad_norm": 0.10282609717664869, "learning_rate": 0.0006700507614213198, "loss": 0.6714, "step": 2640 }, { "epoch": 0.13427929585866405, "grad_norm": 0.05297792239316466, "learning_rate": 0.0006713197969543148, "loss": 0.6872, "step": 2645 }, { "epoch": 0.13453313195669556, "grad_norm": 0.10242225950893688, "learning_rate": 0.0006725888324873097, "loss": 0.6771, "step": 2650 }, { "epoch": 0.13478696805472706, "grad_norm": 0.0603931866469296, "learning_rate": 0.0006738578680203045, "loss": 0.6974, "step": 2655 }, { "epoch": 0.13504080415275857, "grad_norm": 0.04679504191127658, "learning_rate": 0.0006751269035532995, "loss": 0.7012, "step": 2660 }, { "epoch": 0.13529464025079008, "grad_norm": 0.04709498367472285, "learning_rate": 0.0006763959390862944, "loss": 0.6706, "step": 2665 }, { "epoch": 0.13554847634882156, "grad_norm": 0.0564473112589115, "learning_rate": 0.0006776649746192894, "loss": 0.6901, "step": 2670 }, { "epoch": 0.13580231244685306, "grad_norm": 0.06027753857126253, "learning_rate": 0.0006789340101522842, "loss": 0.6819, "step": 2675 }, { "epoch": 0.13605614854488457, "grad_norm": 0.06314337231356607, "learning_rate": 0.0006802030456852792, "loss": 0.6699, "step": 2680 }, { "epoch": 0.13630998464291608, "grad_norm": 0.057435222380299925, "learning_rate": 0.0006814720812182741, "loss": 0.6497, "step": 2685 }, { "epoch": 0.13656382074094758, "grad_norm": 0.04260996003530418, "learning_rate": 0.0006827411167512691, "loss": 0.6658, "step": 2690 }, { "epoch": 0.13681765683897906, "grad_norm": 0.04258191163717926, "learning_rate": 0.000684010152284264, "loss": 0.6924, "step": 2695 }, { "epoch": 0.13707149293701057, "grad_norm": 0.05691730259771199, "learning_rate": 0.0006852791878172588, "loss": 0.6983, "step": 2700 }, { "epoch": 0.13732532903504208, "grad_norm": 0.043909736858620214, "learning_rate": 0.0006865482233502538, "loss": 0.6979, "step": 2705 }, { "epoch": 0.13757916513307358, "grad_norm": 0.047090946914990356, "learning_rate": 0.0006878172588832488, "loss": 0.6779, "step": 2710 }, { "epoch": 0.13783300123110506, "grad_norm": 0.047548729011293284, "learning_rate": 0.0006890862944162437, "loss": 0.6499, "step": 2715 }, { "epoch": 0.13808683732913657, "grad_norm": 0.06616744593038089, "learning_rate": 0.0006903553299492385, "loss": 0.6969, "step": 2720 }, { "epoch": 0.13834067342716808, "grad_norm": 0.0703509579934864, "learning_rate": 0.0006916243654822335, "loss": 0.652, "step": 2725 }, { "epoch": 0.13859450952519958, "grad_norm": 0.04246140238019912, "learning_rate": 0.0006928934010152285, "loss": 0.6682, "step": 2730 }, { "epoch": 0.1388483456232311, "grad_norm": 0.04135755013487853, "learning_rate": 0.0006941624365482235, "loss": 0.6556, "step": 2735 }, { "epoch": 0.13910218172126257, "grad_norm": 0.04205045142396158, "learning_rate": 0.0006954314720812182, "loss": 0.7063, "step": 2740 }, { "epoch": 0.13935601781929408, "grad_norm": 0.05001019855676361, "learning_rate": 0.0006967005076142132, "loss": 0.7266, "step": 2745 }, { "epoch": 0.13960985391732558, "grad_norm": 0.05281826891896312, "learning_rate": 0.0006979695431472082, "loss": 0.685, "step": 2750 }, { "epoch": 0.1398636900153571, "grad_norm": 0.08165529805729099, "learning_rate": 0.0006992385786802031, "loss": 0.6946, "step": 2755 }, { "epoch": 0.1401175261133886, "grad_norm": 0.06359142712647757, "learning_rate": 0.0007005076142131979, "loss": 0.691, "step": 2760 }, { "epoch": 0.14037136221142008, "grad_norm": 0.04783534944187642, "learning_rate": 0.0007017766497461929, "loss": 0.6572, "step": 2765 }, { "epoch": 0.14062519830945158, "grad_norm": 0.04836474183739573, "learning_rate": 0.0007030456852791879, "loss": 0.6499, "step": 2770 }, { "epoch": 0.1408790344074831, "grad_norm": 0.04622813948916543, "learning_rate": 0.0007043147208121828, "loss": 0.6722, "step": 2775 }, { "epoch": 0.1411328705055146, "grad_norm": 0.044196925779096605, "learning_rate": 0.0007055837563451776, "loss": 0.6927, "step": 2780 }, { "epoch": 0.1413867066035461, "grad_norm": 0.0563954116918076, "learning_rate": 0.0007068527918781726, "loss": 0.6773, "step": 2785 }, { "epoch": 0.14164054270157758, "grad_norm": 0.05630324142592108, "learning_rate": 0.0007081218274111675, "loss": 0.6948, "step": 2790 }, { "epoch": 0.1418943787996091, "grad_norm": 0.04380530703415174, "learning_rate": 0.0007093908629441625, "loss": 0.6608, "step": 2795 }, { "epoch": 0.1421482148976406, "grad_norm": 0.09238879549470941, "learning_rate": 0.0007106598984771574, "loss": 0.7379, "step": 2800 }, { "epoch": 0.1424020509956721, "grad_norm": 0.08771073436704688, "learning_rate": 0.0007119289340101523, "loss": 0.7167, "step": 2805 }, { "epoch": 0.1426558870937036, "grad_norm": 0.06359201377032256, "learning_rate": 0.0007131979695431472, "loss": 0.7088, "step": 2810 }, { "epoch": 0.1429097231917351, "grad_norm": 0.1200886744339994, "learning_rate": 0.0007144670050761422, "loss": 0.6847, "step": 2815 }, { "epoch": 0.1431635592897666, "grad_norm": 0.138281486633561, "learning_rate": 0.0007157360406091371, "loss": 0.6848, "step": 2820 }, { "epoch": 0.1434173953877981, "grad_norm": 0.04480938074303019, "learning_rate": 0.0007170050761421319, "loss": 0.6904, "step": 2825 }, { "epoch": 0.1436712314858296, "grad_norm": 21.175586578852744, "learning_rate": 0.0007182741116751269, "loss": 0.7594, "step": 2830 }, { "epoch": 0.1439250675838611, "grad_norm": 0.06295691464093359, "learning_rate": 0.0007195431472081219, "loss": 0.7236, "step": 2835 }, { "epoch": 0.1441789036818926, "grad_norm": 0.10004928301513774, "learning_rate": 0.0007208121827411168, "loss": 0.7397, "step": 2840 }, { "epoch": 0.1444327397799241, "grad_norm": 0.46538792031189885, "learning_rate": 0.0007220812182741116, "loss": 0.8646, "step": 2845 }, { "epoch": 0.1446865758779556, "grad_norm": 0.4358423651644742, "learning_rate": 0.0007233502538071066, "loss": 0.8406, "step": 2850 }, { "epoch": 0.14494041197598712, "grad_norm": 0.10845670895421451, "learning_rate": 0.0007246192893401016, "loss": 0.785, "step": 2855 }, { "epoch": 0.1451942480740186, "grad_norm": 0.06788040013815881, "learning_rate": 0.0007258883248730965, "loss": 0.7328, "step": 2860 }, { "epoch": 0.1454480841720501, "grad_norm": 0.07189189427731303, "learning_rate": 0.0007271573604060913, "loss": 0.7376, "step": 2865 }, { "epoch": 0.1457019202700816, "grad_norm": 0.04909596292317751, "learning_rate": 0.0007284263959390863, "loss": 0.7061, "step": 2870 }, { "epoch": 0.14595575636811312, "grad_norm": 0.05537834807483057, "learning_rate": 0.0007296954314720813, "loss": 0.7313, "step": 2875 }, { "epoch": 0.14620959246614462, "grad_norm": 0.2299897910231885, "learning_rate": 0.0007309644670050762, "loss": 1.4098, "step": 2880 }, { "epoch": 0.1464634285641761, "grad_norm": 0.23582707917647705, "learning_rate": 0.000732233502538071, "loss": 0.8452, "step": 2885 }, { "epoch": 0.1467172646622076, "grad_norm": 0.11644453166636645, "learning_rate": 0.000733502538071066, "loss": 0.7672, "step": 2890 }, { "epoch": 0.14697110076023912, "grad_norm": 0.09392008019229685, "learning_rate": 0.0007347715736040609, "loss": 0.744, "step": 2895 }, { "epoch": 0.14722493685827062, "grad_norm": 0.09298026761511366, "learning_rate": 0.0007360406091370559, "loss": 0.7396, "step": 2900 }, { "epoch": 0.14747877295630213, "grad_norm": 0.058376952983496454, "learning_rate": 0.0007373096446700508, "loss": 0.7174, "step": 2905 }, { "epoch": 0.1477326090543336, "grad_norm": 0.05509134155546014, "learning_rate": 0.0007385786802030457, "loss": 0.7056, "step": 2910 }, { "epoch": 0.14798644515236511, "grad_norm": 0.04295937409763232, "learning_rate": 0.0007398477157360406, "loss": 0.7003, "step": 2915 }, { "epoch": 0.14824028125039662, "grad_norm": 0.050074822465611714, "learning_rate": 0.0007411167512690356, "loss": 0.6936, "step": 2920 }, { "epoch": 0.14849411734842813, "grad_norm": 0.06561188123909512, "learning_rate": 0.0007423857868020305, "loss": 0.69, "step": 2925 }, { "epoch": 0.1487479534464596, "grad_norm": 0.05110790900431651, "learning_rate": 0.0007436548223350253, "loss": 0.7239, "step": 2930 }, { "epoch": 0.14900178954449111, "grad_norm": 0.053991352186369024, "learning_rate": 0.0007449238578680203, "loss": 0.7211, "step": 2935 }, { "epoch": 0.14925562564252262, "grad_norm": 0.07363083960749695, "learning_rate": 0.0007461928934010153, "loss": 0.6989, "step": 2940 }, { "epoch": 0.14950946174055413, "grad_norm": 0.06727590148650675, "learning_rate": 0.0007474619289340102, "loss": 0.7114, "step": 2945 }, { "epoch": 0.14976329783858564, "grad_norm": 0.057979540617668884, "learning_rate": 0.000748730964467005, "loss": 0.7061, "step": 2950 }, { "epoch": 0.15001713393661711, "grad_norm": 0.059049940642629514, "learning_rate": 0.00075, "loss": 0.7095, "step": 2955 }, { "epoch": 0.15027097003464862, "grad_norm": 0.0653578877080519, "learning_rate": 0.000751269035532995, "loss": 0.7319, "step": 2960 }, { "epoch": 0.15052480613268013, "grad_norm": 0.06333871135346043, "learning_rate": 0.0007525380710659899, "loss": 0.7417, "step": 2965 }, { "epoch": 0.15077864223071163, "grad_norm": 0.0728096512372998, "learning_rate": 0.0007538071065989847, "loss": 0.7424, "step": 2970 }, { "epoch": 0.15103247832874314, "grad_norm": 0.041806345943965685, "learning_rate": 0.0007550761421319797, "loss": 0.6842, "step": 2975 }, { "epoch": 0.15128631442677462, "grad_norm": 0.048509525473583434, "learning_rate": 0.0007563451776649747, "loss": 0.7402, "step": 2980 }, { "epoch": 0.15154015052480613, "grad_norm": 0.06583937758938846, "learning_rate": 0.0007576142131979696, "loss": 0.7292, "step": 2985 }, { "epoch": 0.15179398662283763, "grad_norm": 0.04468189699925895, "learning_rate": 0.0007588832487309644, "loss": 0.7317, "step": 2990 }, { "epoch": 0.15204782272086914, "grad_norm": 0.047287822361576956, "learning_rate": 0.0007601522842639594, "loss": 0.6964, "step": 2995 }, { "epoch": 0.15230165881890065, "grad_norm": 0.03938327148057227, "learning_rate": 0.0007614213197969543, "loss": 0.7265, "step": 3000 }, { "epoch": 0.15255549491693213, "grad_norm": 0.03972449267379906, "learning_rate": 0.0007626903553299493, "loss": 0.6827, "step": 3005 }, { "epoch": 0.15280933101496363, "grad_norm": 0.04711089033081518, "learning_rate": 0.0007639593908629442, "loss": 0.7007, "step": 3010 }, { "epoch": 0.15306316711299514, "grad_norm": 0.06336031295625702, "learning_rate": 0.0007652284263959391, "loss": 0.6908, "step": 3015 }, { "epoch": 0.15331700321102665, "grad_norm": 0.03736772924042978, "learning_rate": 0.000766497461928934, "loss": 0.689, "step": 3020 }, { "epoch": 0.15357083930905815, "grad_norm": 0.041934899038226205, "learning_rate": 0.000767766497461929, "loss": 0.7015, "step": 3025 }, { "epoch": 0.15382467540708963, "grad_norm": 0.04211578318800713, "learning_rate": 0.0007690355329949239, "loss": 0.6677, "step": 3030 }, { "epoch": 0.15407851150512114, "grad_norm": 0.04215189545395842, "learning_rate": 0.0007703045685279187, "loss": 0.6836, "step": 3035 }, { "epoch": 0.15433234760315265, "grad_norm": 0.05201347380185946, "learning_rate": 0.0007715736040609137, "loss": 0.7107, "step": 3040 }, { "epoch": 0.15458618370118415, "grad_norm": 0.04761766831274283, "learning_rate": 0.0007728426395939087, "loss": 0.7119, "step": 3045 }, { "epoch": 0.15484001979921563, "grad_norm": 0.03933638281586551, "learning_rate": 0.0007741116751269036, "loss": 0.6738, "step": 3050 }, { "epoch": 0.15509385589724714, "grad_norm": 0.11996962799432084, "learning_rate": 0.0007753807106598984, "loss": 0.6781, "step": 3055 }, { "epoch": 0.15534769199527865, "grad_norm": 0.05837163303706869, "learning_rate": 0.0007766497461928934, "loss": 0.6902, "step": 3060 }, { "epoch": 0.15560152809331015, "grad_norm": 0.06028633251581502, "learning_rate": 0.0007779187817258884, "loss": 0.6698, "step": 3065 }, { "epoch": 0.15585536419134166, "grad_norm": 0.07092481098060303, "learning_rate": 0.0007791878172588833, "loss": 0.6567, "step": 3070 }, { "epoch": 0.15610920028937314, "grad_norm": 0.04707521754835134, "learning_rate": 0.0007804568527918781, "loss": 0.6966, "step": 3075 }, { "epoch": 0.15636303638740465, "grad_norm": 0.047329839412153664, "learning_rate": 0.0007817258883248731, "loss": 0.7177, "step": 3080 }, { "epoch": 0.15661687248543615, "grad_norm": 0.04088170174383998, "learning_rate": 0.0007829949238578681, "loss": 0.6803, "step": 3085 }, { "epoch": 0.15687070858346766, "grad_norm": 0.038345774566105946, "learning_rate": 0.000784263959390863, "loss": 0.6656, "step": 3090 }, { "epoch": 0.15712454468149917, "grad_norm": 0.04514482638739989, "learning_rate": 0.0007855329949238578, "loss": 0.7314, "step": 3095 }, { "epoch": 0.15737838077953065, "grad_norm": 0.04006321580374188, "learning_rate": 0.0007868020304568528, "loss": 0.6747, "step": 3100 }, { "epoch": 0.15763221687756215, "grad_norm": 0.05067763436233774, "learning_rate": 0.0007880710659898477, "loss": 0.7088, "step": 3105 }, { "epoch": 0.15788605297559366, "grad_norm": 0.03882503010997677, "learning_rate": 0.0007893401015228427, "loss": 0.6857, "step": 3110 }, { "epoch": 0.15813988907362517, "grad_norm": 0.041915161141761616, "learning_rate": 0.0007906091370558376, "loss": 0.6804, "step": 3115 }, { "epoch": 0.15839372517165667, "grad_norm": 0.055655601622924857, "learning_rate": 0.0007918781725888325, "loss": 0.7128, "step": 3120 }, { "epoch": 0.15864756126968815, "grad_norm": 0.09341393989640125, "learning_rate": 0.0007931472081218274, "loss": 0.6803, "step": 3125 }, { "epoch": 0.15890139736771966, "grad_norm": 0.06199349132436108, "learning_rate": 0.0007944162436548224, "loss": 0.7121, "step": 3130 }, { "epoch": 0.15915523346575117, "grad_norm": 0.08342488615098623, "learning_rate": 0.0007956852791878173, "loss": 0.631, "step": 3135 }, { "epoch": 0.15940906956378267, "grad_norm": 0.06702222456990227, "learning_rate": 0.0007969543147208121, "loss": 0.681, "step": 3140 }, { "epoch": 0.15966290566181415, "grad_norm": 0.051322791325766115, "learning_rate": 0.0007982233502538071, "loss": 0.6961, "step": 3145 }, { "epoch": 0.15991674175984566, "grad_norm": 0.05093510864847829, "learning_rate": 0.0007994923857868021, "loss": 0.6924, "step": 3150 }, { "epoch": 0.16017057785787717, "grad_norm": 0.05191372708283371, "learning_rate": 0.000800761421319797, "loss": 0.6651, "step": 3155 }, { "epoch": 0.16042441395590867, "grad_norm": 0.065309480406257, "learning_rate": 0.0008020304568527918, "loss": 0.6873, "step": 3160 }, { "epoch": 0.16067825005394018, "grad_norm": 0.04035360012723001, "learning_rate": 0.0008032994923857868, "loss": 0.7014, "step": 3165 }, { "epoch": 0.16093208615197166, "grad_norm": 0.03787636570491385, "learning_rate": 0.0008045685279187818, "loss": 0.7184, "step": 3170 }, { "epoch": 0.16118592225000317, "grad_norm": 0.04892199114261499, "learning_rate": 0.0008058375634517766, "loss": 0.7074, "step": 3175 }, { "epoch": 0.16143975834803467, "grad_norm": 0.044454591373735045, "learning_rate": 0.0008071065989847715, "loss": 0.6681, "step": 3180 }, { "epoch": 0.16169359444606618, "grad_norm": 0.05477810689691977, "learning_rate": 0.0008083756345177665, "loss": 0.7073, "step": 3185 }, { "epoch": 0.1619474305440977, "grad_norm": 0.1561533627636135, "learning_rate": 0.0008096446700507615, "loss": 0.7062, "step": 3190 }, { "epoch": 0.16220126664212917, "grad_norm": 0.07695082260270414, "learning_rate": 0.0008109137055837564, "loss": 0.6695, "step": 3195 }, { "epoch": 0.16245510274016067, "grad_norm": 0.0675592941811925, "learning_rate": 0.0008121827411167512, "loss": 0.6852, "step": 3200 }, { "epoch": 0.16270893883819218, "grad_norm": 0.052732094884202066, "learning_rate": 0.0008134517766497462, "loss": 0.7046, "step": 3205 }, { "epoch": 0.1629627749362237, "grad_norm": 0.055496608441699284, "learning_rate": 0.0008147208121827412, "loss": 0.6916, "step": 3210 }, { "epoch": 0.1632166110342552, "grad_norm": 0.043427396820308814, "learning_rate": 0.0008159898477157361, "loss": 0.7338, "step": 3215 }, { "epoch": 0.16347044713228667, "grad_norm": 0.04788846665606786, "learning_rate": 0.000817258883248731, "loss": 0.7164, "step": 3220 }, { "epoch": 0.16372428323031818, "grad_norm": 0.05499792725011934, "learning_rate": 0.0008185279187817259, "loss": 0.6441, "step": 3225 }, { "epoch": 0.1639781193283497, "grad_norm": 0.07060079550345069, "learning_rate": 0.0008197969543147208, "loss": 0.6704, "step": 3230 }, { "epoch": 0.1642319554263812, "grad_norm": 0.04697902634882019, "learning_rate": 0.0008210659898477158, "loss": 0.6896, "step": 3235 }, { "epoch": 0.1644857915244127, "grad_norm": 0.03693026821420421, "learning_rate": 0.0008223350253807107, "loss": 0.6979, "step": 3240 }, { "epoch": 0.16473962762244418, "grad_norm": 0.044236930949549086, "learning_rate": 0.0008236040609137056, "loss": 0.6727, "step": 3245 }, { "epoch": 0.16499346372047569, "grad_norm": 0.04297691167933406, "learning_rate": 0.0008248730964467005, "loss": 0.7183, "step": 3250 }, { "epoch": 0.1652472998185072, "grad_norm": 0.03926872438952914, "learning_rate": 0.0008261421319796955, "loss": 0.6678, "step": 3255 }, { "epoch": 0.1655011359165387, "grad_norm": 0.058748589368983826, "learning_rate": 0.0008274111675126904, "loss": 0.6859, "step": 3260 }, { "epoch": 0.16575497201457018, "grad_norm": 0.043071867704478226, "learning_rate": 0.0008286802030456852, "loss": 0.6584, "step": 3265 }, { "epoch": 0.16600880811260169, "grad_norm": 0.036727442528139344, "learning_rate": 0.0008299492385786802, "loss": 0.6644, "step": 3270 }, { "epoch": 0.1662626442106332, "grad_norm": 0.045406805927585635, "learning_rate": 0.0008312182741116752, "loss": 0.6902, "step": 3275 }, { "epoch": 0.1665164803086647, "grad_norm": 0.03886588985080634, "learning_rate": 0.0008324873096446702, "loss": 0.6912, "step": 3280 }, { "epoch": 0.1667703164066962, "grad_norm": 0.03860450090032993, "learning_rate": 0.0008337563451776649, "loss": 0.6918, "step": 3285 }, { "epoch": 0.16702415250472769, "grad_norm": 0.03919405708529676, "learning_rate": 0.0008350253807106599, "loss": 0.7097, "step": 3290 }, { "epoch": 0.1672779886027592, "grad_norm": 0.06384676391238123, "learning_rate": 0.0008362944162436549, "loss": 0.6769, "step": 3295 }, { "epoch": 0.1675318247007907, "grad_norm": 0.04387951589525683, "learning_rate": 0.0008375634517766498, "loss": 0.7001, "step": 3300 }, { "epoch": 0.1677856607988222, "grad_norm": 0.04020760640485976, "learning_rate": 0.0008388324873096446, "loss": 0.7, "step": 3305 }, { "epoch": 0.1680394968968537, "grad_norm": 0.04343991292299841, "learning_rate": 0.0008401015228426396, "loss": 0.6653, "step": 3310 }, { "epoch": 0.1682933329948852, "grad_norm": 0.04877171128678498, "learning_rate": 0.0008413705583756346, "loss": 0.6491, "step": 3315 }, { "epoch": 0.1685471690929167, "grad_norm": 0.04489329625384213, "learning_rate": 0.0008426395939086295, "loss": 0.6944, "step": 3320 }, { "epoch": 0.1688010051909482, "grad_norm": 0.03924346550703404, "learning_rate": 0.0008439086294416243, "loss": 0.7004, "step": 3325 }, { "epoch": 0.1690548412889797, "grad_norm": 0.0351682478604759, "learning_rate": 0.0008451776649746193, "loss": 0.6289, "step": 3330 }, { "epoch": 0.16930867738701122, "grad_norm": 0.05804265280485358, "learning_rate": 0.0008464467005076142, "loss": 0.6819, "step": 3335 }, { "epoch": 0.1695625134850427, "grad_norm": 0.05558949651884164, "learning_rate": 0.0008477157360406092, "loss": 0.6995, "step": 3340 }, { "epoch": 0.1698163495830742, "grad_norm": 0.07239860999828403, "learning_rate": 0.0008489847715736041, "loss": 0.698, "step": 3345 }, { "epoch": 0.1700701856811057, "grad_norm": 0.04268065537159936, "learning_rate": 0.000850253807106599, "loss": 0.6724, "step": 3350 }, { "epoch": 0.17032402177913722, "grad_norm": 0.04596502747992512, "learning_rate": 0.0008515228426395939, "loss": 0.715, "step": 3355 }, { "epoch": 0.17057785787716873, "grad_norm": 0.07978824559788683, "learning_rate": 0.0008527918781725889, "loss": 0.6687, "step": 3360 }, { "epoch": 0.1708316939752002, "grad_norm": 0.03941742907400371, "learning_rate": 0.0008540609137055838, "loss": 0.6799, "step": 3365 }, { "epoch": 0.1710855300732317, "grad_norm": 0.0527239971063264, "learning_rate": 0.0008553299492385786, "loss": 0.7063, "step": 3370 }, { "epoch": 0.17133936617126322, "grad_norm": 3.822834197350998, "learning_rate": 0.0008565989847715736, "loss": 0.696, "step": 3375 }, { "epoch": 0.17159320226929473, "grad_norm": 0.083121560754106, "learning_rate": 0.0008578680203045686, "loss": 0.6914, "step": 3380 }, { "epoch": 0.1718470383673262, "grad_norm": 0.06454785794212209, "learning_rate": 0.0008591370558375635, "loss": 0.7085, "step": 3385 }, { "epoch": 0.1721008744653577, "grad_norm": 0.03945210916226416, "learning_rate": 0.0008604060913705583, "loss": 0.682, "step": 3390 }, { "epoch": 0.17235471056338922, "grad_norm": 0.04193981751233244, "learning_rate": 0.0008616751269035533, "loss": 0.666, "step": 3395 }, { "epoch": 0.17260854666142073, "grad_norm": 0.03967018442585797, "learning_rate": 0.0008629441624365483, "loss": 0.6608, "step": 3400 }, { "epoch": 0.17286238275945223, "grad_norm": 0.04644004597796404, "learning_rate": 0.0008642131979695432, "loss": 0.6535, "step": 3405 }, { "epoch": 0.1731162188574837, "grad_norm": 0.03873588952866688, "learning_rate": 0.000865482233502538, "loss": 0.7228, "step": 3410 }, { "epoch": 0.17337005495551522, "grad_norm": 0.31833461711624433, "learning_rate": 0.000866751269035533, "loss": 0.7115, "step": 3415 }, { "epoch": 0.17362389105354673, "grad_norm": 0.35935016596831215, "learning_rate": 0.000868020304568528, "loss": 0.6879, "step": 3420 }, { "epoch": 0.17387772715157823, "grad_norm": 0.04780743742889909, "learning_rate": 0.0008692893401015229, "loss": 0.6907, "step": 3425 }, { "epoch": 0.17413156324960974, "grad_norm": 0.04110231970405351, "learning_rate": 0.0008705583756345177, "loss": 0.7238, "step": 3430 }, { "epoch": 0.17438539934764122, "grad_norm": 0.046525580251483005, "learning_rate": 0.0008718274111675127, "loss": 0.6778, "step": 3435 }, { "epoch": 0.17463923544567272, "grad_norm": 0.041718923819856486, "learning_rate": 0.0008730964467005076, "loss": 0.6486, "step": 3440 }, { "epoch": 0.17489307154370423, "grad_norm": 0.04055383524511006, "learning_rate": 0.0008743654822335026, "loss": 0.6968, "step": 3445 }, { "epoch": 0.17514690764173574, "grad_norm": 0.045736208992562415, "learning_rate": 0.0008756345177664975, "loss": 0.6675, "step": 3450 }, { "epoch": 0.17540074373976725, "grad_norm": 0.06303424703405444, "learning_rate": 0.0008769035532994924, "loss": 0.7083, "step": 3455 }, { "epoch": 0.17565457983779872, "grad_norm": 0.03565589845558804, "learning_rate": 0.0008781725888324873, "loss": 0.6513, "step": 3460 }, { "epoch": 0.17590841593583023, "grad_norm": 0.039028072883575, "learning_rate": 0.0008794416243654823, "loss": 0.6768, "step": 3465 }, { "epoch": 0.17616225203386174, "grad_norm": 0.03826237652273635, "learning_rate": 0.0008807106598984772, "loss": 0.6931, "step": 3470 }, { "epoch": 0.17641608813189325, "grad_norm": 0.03786297862349843, "learning_rate": 0.000881979695431472, "loss": 0.6665, "step": 3475 }, { "epoch": 0.17666992422992472, "grad_norm": 0.035346860951597725, "learning_rate": 0.000883248730964467, "loss": 0.6739, "step": 3480 }, { "epoch": 0.17692376032795623, "grad_norm": 0.038526863874646516, "learning_rate": 0.000884517766497462, "loss": 0.665, "step": 3485 }, { "epoch": 0.17717759642598774, "grad_norm": 0.06026897162610092, "learning_rate": 0.0008857868020304569, "loss": 0.68, "step": 3490 }, { "epoch": 0.17743143252401924, "grad_norm": 0.05069239821159444, "learning_rate": 0.0008870558375634517, "loss": 0.7229, "step": 3495 }, { "epoch": 0.17768526862205075, "grad_norm": 0.09985009714735808, "learning_rate": 0.0008883248730964467, "loss": 0.6917, "step": 3500 }, { "epoch": 0.17793910472008223, "grad_norm": 0.10170892988817608, "learning_rate": 0.0008895939086294417, "loss": 0.7233, "step": 3505 }, { "epoch": 0.17819294081811374, "grad_norm": 0.07075066250119805, "learning_rate": 0.0008908629441624366, "loss": 0.669, "step": 3510 }, { "epoch": 0.17844677691614524, "grad_norm": 0.08757273613751611, "learning_rate": 0.0008921319796954314, "loss": 0.6676, "step": 3515 }, { "epoch": 0.17870061301417675, "grad_norm": 0.038714672890372746, "learning_rate": 0.0008934010152284264, "loss": 0.7115, "step": 3520 }, { "epoch": 0.17895444911220826, "grad_norm": 0.04036233500547512, "learning_rate": 0.0008946700507614214, "loss": 0.6783, "step": 3525 }, { "epoch": 0.17920828521023974, "grad_norm": 0.06230842789641548, "learning_rate": 0.0008959390862944163, "loss": 0.669, "step": 3530 }, { "epoch": 0.17946212130827124, "grad_norm": 0.07454575938873274, "learning_rate": 0.0008972081218274111, "loss": 0.7506, "step": 3535 }, { "epoch": 0.17971595740630275, "grad_norm": 0.06240895100177482, "learning_rate": 0.0008984771573604061, "loss": 0.7194, "step": 3540 }, { "epoch": 0.17996979350433426, "grad_norm": 0.04630554537296931, "learning_rate": 0.000899746192893401, "loss": 0.7211, "step": 3545 }, { "epoch": 0.18022362960236576, "grad_norm": 0.056718557931670986, "learning_rate": 0.000901015228426396, "loss": 0.7443, "step": 3550 }, { "epoch": 0.18047746570039724, "grad_norm": 0.053855068524236355, "learning_rate": 0.0009022842639593909, "loss": 0.6938, "step": 3555 }, { "epoch": 0.18073130179842875, "grad_norm": 0.04098910289114666, "learning_rate": 0.0009035532994923858, "loss": 0.6882, "step": 3560 }, { "epoch": 0.18098513789646026, "grad_norm": 0.043572400017178894, "learning_rate": 0.0009048223350253807, "loss": 0.7237, "step": 3565 }, { "epoch": 0.18123897399449176, "grad_norm": 0.05122515570696231, "learning_rate": 0.0009060913705583757, "loss": 0.6965, "step": 3570 }, { "epoch": 0.18149281009252327, "grad_norm": 0.0486904734197597, "learning_rate": 0.0009073604060913706, "loss": 0.6821, "step": 3575 }, { "epoch": 0.18174664619055475, "grad_norm": 0.14211715338289158, "learning_rate": 0.0009086294416243654, "loss": 0.6776, "step": 3580 }, { "epoch": 0.18200048228858626, "grad_norm": 0.04140861099483773, "learning_rate": 0.0009098984771573604, "loss": 0.716, "step": 3585 }, { "epoch": 0.18225431838661776, "grad_norm": 0.05317734506655789, "learning_rate": 0.0009111675126903554, "loss": 0.7328, "step": 3590 }, { "epoch": 0.18250815448464927, "grad_norm": 0.04412671048588959, "learning_rate": 0.0009124365482233503, "loss": 0.7013, "step": 3595 }, { "epoch": 0.18276199058268075, "grad_norm": 0.043226638147343656, "learning_rate": 0.0009137055837563451, "loss": 0.7068, "step": 3600 }, { "epoch": 0.18301582668071226, "grad_norm": 0.03626076158298662, "learning_rate": 0.0009149746192893401, "loss": 0.6853, "step": 3605 }, { "epoch": 0.18326966277874376, "grad_norm": 0.047674224438480246, "learning_rate": 0.0009162436548223351, "loss": 0.6745, "step": 3610 }, { "epoch": 0.18352349887677527, "grad_norm": 0.04231046030159459, "learning_rate": 0.0009175126903553299, "loss": 0.6849, "step": 3615 }, { "epoch": 0.18377733497480678, "grad_norm": 0.04195288389214527, "learning_rate": 0.0009187817258883248, "loss": 0.7041, "step": 3620 }, { "epoch": 0.18403117107283826, "grad_norm": 0.039251889309433935, "learning_rate": 0.0009200507614213198, "loss": 0.6504, "step": 3625 }, { "epoch": 0.18428500717086976, "grad_norm": 0.03738914770413547, "learning_rate": 0.0009213197969543148, "loss": 0.7048, "step": 3630 }, { "epoch": 0.18453884326890127, "grad_norm": 0.0436948152095552, "learning_rate": 0.0009225888324873097, "loss": 0.6771, "step": 3635 }, { "epoch": 0.18479267936693278, "grad_norm": 0.03767490570058011, "learning_rate": 0.0009238578680203045, "loss": 0.6494, "step": 3640 }, { "epoch": 0.18504651546496428, "grad_norm": 0.037090867993691726, "learning_rate": 0.0009251269035532995, "loss": 0.6545, "step": 3645 }, { "epoch": 0.18530035156299576, "grad_norm": 0.042846186747705906, "learning_rate": 0.0009263959390862944, "loss": 0.6573, "step": 3650 }, { "epoch": 0.18555418766102727, "grad_norm": 0.046132833928787344, "learning_rate": 0.0009276649746192894, "loss": 0.6716, "step": 3655 }, { "epoch": 0.18580802375905878, "grad_norm": 0.04810476990314317, "learning_rate": 0.0009289340101522843, "loss": 0.6552, "step": 3660 }, { "epoch": 0.18606185985709028, "grad_norm": 0.05725224131003678, "learning_rate": 0.0009302030456852792, "loss": 0.7188, "step": 3665 }, { "epoch": 0.1863156959551218, "grad_norm": 0.03760096816148779, "learning_rate": 0.0009314720812182741, "loss": 0.679, "step": 3670 }, { "epoch": 0.18656953205315327, "grad_norm": 0.0414336045399388, "learning_rate": 0.0009327411167512691, "loss": 0.703, "step": 3675 }, { "epoch": 0.18682336815118478, "grad_norm": 0.04888803816351589, "learning_rate": 0.000934010152284264, "loss": 0.6984, "step": 3680 }, { "epoch": 0.18707720424921628, "grad_norm": 0.03932910243932595, "learning_rate": 0.0009352791878172588, "loss": 0.6935, "step": 3685 }, { "epoch": 0.1873310403472478, "grad_norm": 0.053329295837259566, "learning_rate": 0.0009365482233502538, "loss": 0.6654, "step": 3690 }, { "epoch": 0.18758487644527927, "grad_norm": 0.04033686402339195, "learning_rate": 0.0009378172588832488, "loss": 0.6656, "step": 3695 }, { "epoch": 0.18783871254331078, "grad_norm": 0.05078207384763503, "learning_rate": 0.0009390862944162437, "loss": 0.6616, "step": 3700 }, { "epoch": 0.18809254864134228, "grad_norm": 0.03612814331688169, "learning_rate": 0.0009403553299492385, "loss": 0.6856, "step": 3705 }, { "epoch": 0.1883463847393738, "grad_norm": 0.04152827694147588, "learning_rate": 0.0009416243654822335, "loss": 0.707, "step": 3710 }, { "epoch": 0.1886002208374053, "grad_norm": 0.03476616159624936, "learning_rate": 0.0009428934010152285, "loss": 0.6786, "step": 3715 }, { "epoch": 0.18885405693543678, "grad_norm": 0.03981634185742135, "learning_rate": 0.0009441624365482235, "loss": 0.7042, "step": 3720 }, { "epoch": 0.18910789303346828, "grad_norm": 0.04204868547098972, "learning_rate": 0.0009454314720812182, "loss": 0.6841, "step": 3725 }, { "epoch": 0.1893617291314998, "grad_norm": 0.039850666477108665, "learning_rate": 0.0009467005076142132, "loss": 0.6583, "step": 3730 }, { "epoch": 0.1896155652295313, "grad_norm": 0.03866838212345305, "learning_rate": 0.0009479695431472082, "loss": 0.7246, "step": 3735 }, { "epoch": 0.1898694013275628, "grad_norm": 0.040841413496425324, "learning_rate": 0.0009492385786802031, "loss": 0.6598, "step": 3740 }, { "epoch": 0.19012323742559428, "grad_norm": 0.04010141554319208, "learning_rate": 0.000950507614213198, "loss": 0.6813, "step": 3745 }, { "epoch": 0.1903770735236258, "grad_norm": 0.06530308653395914, "learning_rate": 0.0009517766497461929, "loss": 0.6685, "step": 3750 }, { "epoch": 0.1906309096216573, "grad_norm": 0.07450856843136024, "learning_rate": 0.0009530456852791879, "loss": 0.6874, "step": 3755 }, { "epoch": 0.1908847457196888, "grad_norm": 0.035499974530579376, "learning_rate": 0.0009543147208121828, "loss": 0.681, "step": 3760 }, { "epoch": 0.1911385818177203, "grad_norm": 0.0437247777174162, "learning_rate": 0.0009555837563451777, "loss": 0.6848, "step": 3765 }, { "epoch": 0.1913924179157518, "grad_norm": 0.035676219852568955, "learning_rate": 0.0009568527918781726, "loss": 0.627, "step": 3770 }, { "epoch": 0.1916462540137833, "grad_norm": 0.03768653840820619, "learning_rate": 0.0009581218274111675, "loss": 0.6689, "step": 3775 }, { "epoch": 0.1919000901118148, "grad_norm": 0.03823207024695021, "learning_rate": 0.0009593908629441625, "loss": 0.6335, "step": 3780 }, { "epoch": 0.1921539262098463, "grad_norm": 0.04684915109347205, "learning_rate": 0.0009606598984771574, "loss": 0.6975, "step": 3785 }, { "epoch": 0.19240776230787782, "grad_norm": 0.04745389321415872, "learning_rate": 0.0009619289340101523, "loss": 0.7062, "step": 3790 }, { "epoch": 0.1926615984059093, "grad_norm": 0.03760091854535719, "learning_rate": 0.0009631979695431472, "loss": 0.6646, "step": 3795 }, { "epoch": 0.1929154345039408, "grad_norm": 0.03932186553338342, "learning_rate": 0.0009644670050761422, "loss": 0.6743, "step": 3800 }, { "epoch": 0.1931692706019723, "grad_norm": 0.03951929870880764, "learning_rate": 0.0009657360406091371, "loss": 0.6831, "step": 3805 }, { "epoch": 0.19342310670000382, "grad_norm": 0.03553090190878169, "learning_rate": 0.0009670050761421319, "loss": 0.6571, "step": 3810 }, { "epoch": 0.1936769427980353, "grad_norm": 0.06138620851402293, "learning_rate": 0.0009682741116751269, "loss": 0.6986, "step": 3815 }, { "epoch": 0.1939307788960668, "grad_norm": 0.24733244873927648, "learning_rate": 0.0009695431472081219, "loss": 0.6976, "step": 3820 }, { "epoch": 0.1941846149940983, "grad_norm": 0.0666364936631057, "learning_rate": 0.0009708121827411168, "loss": 0.7391, "step": 3825 }, { "epoch": 0.19443845109212982, "grad_norm": 0.07967106187155334, "learning_rate": 0.0009720812182741116, "loss": 0.6993, "step": 3830 }, { "epoch": 0.19469228719016132, "grad_norm": 0.051138778909318464, "learning_rate": 0.0009733502538071066, "loss": 0.7191, "step": 3835 }, { "epoch": 0.1949461232881928, "grad_norm": 0.5413258576711231, "learning_rate": 0.0009746192893401016, "loss": 0.7592, "step": 3840 }, { "epoch": 0.1951999593862243, "grad_norm": 0.1471254894588324, "learning_rate": 0.0009758883248730965, "loss": 0.7572, "step": 3845 }, { "epoch": 0.19545379548425582, "grad_norm": 0.2706851022164675, "learning_rate": 0.0009771573604060915, "loss": 0.6907, "step": 3850 }, { "epoch": 0.19570763158228732, "grad_norm": 0.14415862874984905, "learning_rate": 0.0009784263959390863, "loss": 0.7062, "step": 3855 }, { "epoch": 0.19596146768031883, "grad_norm": 0.14791455727385014, "learning_rate": 0.0009796954314720812, "loss": 0.7451, "step": 3860 }, { "epoch": 0.1962153037783503, "grad_norm": 0.12586017082802645, "learning_rate": 0.000980964467005076, "loss": 0.6971, "step": 3865 }, { "epoch": 0.19646913987638182, "grad_norm": 0.0826384222127971, "learning_rate": 0.0009822335025380712, "loss": 0.7288, "step": 3870 }, { "epoch": 0.19672297597441332, "grad_norm": 0.4096552727669512, "learning_rate": 0.000983502538071066, "loss": 0.6983, "step": 3875 }, { "epoch": 0.19697681207244483, "grad_norm": 0.126131262033545, "learning_rate": 0.000984771573604061, "loss": 0.7836, "step": 3880 }, { "epoch": 0.19723064817047634, "grad_norm": 0.1721626969232291, "learning_rate": 0.0009860406091370558, "loss": 0.7095, "step": 3885 }, { "epoch": 0.19748448426850782, "grad_norm": 0.1375632163342092, "learning_rate": 0.0009873096446700509, "loss": 0.7249, "step": 3890 }, { "epoch": 0.19773832036653932, "grad_norm": 0.16481287346009105, "learning_rate": 0.0009885786802030457, "loss": 0.7144, "step": 3895 }, { "epoch": 0.19799215646457083, "grad_norm": 0.08436713045812702, "learning_rate": 0.0009898477157360406, "loss": 0.7119, "step": 3900 }, { "epoch": 0.19824599256260234, "grad_norm": 0.05626919966189561, "learning_rate": 0.0009911167512690355, "loss": 0.7135, "step": 3905 }, { "epoch": 0.19849982866063384, "grad_norm": 0.04456845230313172, "learning_rate": 0.0009923857868020306, "loss": 0.6619, "step": 3910 }, { "epoch": 0.19875366475866532, "grad_norm": 0.07101056929004967, "learning_rate": 0.0009936548223350254, "loss": 0.7022, "step": 3915 }, { "epoch": 0.19900750085669683, "grad_norm": 0.04108441619780391, "learning_rate": 0.0009949238578680203, "loss": 0.7024, "step": 3920 }, { "epoch": 0.19926133695472834, "grad_norm": 0.07150668017794924, "learning_rate": 0.0009961928934010152, "loss": 0.7146, "step": 3925 }, { "epoch": 0.19951517305275984, "grad_norm": 0.058887395407361695, "learning_rate": 0.0009974619289340103, "loss": 0.7013, "step": 3930 }, { "epoch": 0.19976900915079132, "grad_norm": 0.08301973441191342, "learning_rate": 0.0009987309644670051, "loss": 0.7245, "step": 3935 }, { "epoch": 0.20002284524882283, "grad_norm": 0.13425731803767796, "learning_rate": 0.001, "loss": 0.7384, "step": 3940 }, { "epoch": 0.20027668134685433, "grad_norm": 0.0954240553881326, "learning_rate": 0.0009999999509262467, "loss": 0.7082, "step": 3945 }, { "epoch": 0.20053051744488584, "grad_norm": 0.09001615927204533, "learning_rate": 0.0009999998037049968, "loss": 0.7479, "step": 3950 }, { "epoch": 0.20078435354291735, "grad_norm": 0.055536250218606135, "learning_rate": 0.0009999995583362786, "loss": 0.7217, "step": 3955 }, { "epoch": 0.20103818964094883, "grad_norm": 0.039988384096081575, "learning_rate": 0.0009999992148201407, "loss": 0.7023, "step": 3960 }, { "epoch": 0.20129202573898033, "grad_norm": 0.07631095658372448, "learning_rate": 0.0009999987731566505, "loss": 0.6593, "step": 3965 }, { "epoch": 0.20154586183701184, "grad_norm": 0.055019840260049975, "learning_rate": 0.0009999982333458942, "loss": 0.7141, "step": 3970 }, { "epoch": 0.20179969793504335, "grad_norm": 0.13389800181440178, "learning_rate": 0.0009999975953879788, "loss": 0.8059, "step": 3975 }, { "epoch": 0.20205353403307486, "grad_norm": 0.10002422022594674, "learning_rate": 0.0009999968592830286, "loss": 0.7418, "step": 3980 }, { "epoch": 0.20230737013110633, "grad_norm": 0.065235305911112, "learning_rate": 0.0009999960250311885, "loss": 0.7388, "step": 3985 }, { "epoch": 0.20256120622913784, "grad_norm": 0.07700805891945464, "learning_rate": 0.0009999950926326221, "loss": 0.7619, "step": 3990 }, { "epoch": 0.20281504232716935, "grad_norm": 0.05585759735632213, "learning_rate": 0.0009999940620875124, "loss": 0.7262, "step": 3995 }, { "epoch": 0.20306887842520085, "grad_norm": 0.04210144533533203, "learning_rate": 0.0009999929333960617, "loss": 0.7003, "step": 4000 }, { "epoch": 0.20332271452323236, "grad_norm": 0.04939954030213548, "learning_rate": 0.0009999917065584918, "loss": 0.7223, "step": 4005 }, { "epoch": 0.20357655062126384, "grad_norm": 0.05682863013701724, "learning_rate": 0.0009999903815750436, "loss": 0.7366, "step": 4010 }, { "epoch": 0.20383038671929535, "grad_norm": 0.07201666259364202, "learning_rate": 0.0009999889584459765, "loss": 0.7237, "step": 4015 }, { "epoch": 0.20408422281732685, "grad_norm": 0.05415233755750322, "learning_rate": 0.0009999874371715706, "loss": 0.7099, "step": 4020 }, { "epoch": 0.20433805891535836, "grad_norm": 0.09491299760351525, "learning_rate": 0.0009999858177521242, "loss": 0.9184, "step": 4025 }, { "epoch": 0.20459189501338984, "grad_norm": 0.09564328359072853, "learning_rate": 0.0009999841001879551, "loss": 0.799, "step": 4030 }, { "epoch": 0.20484573111142135, "grad_norm": 2.399109555715659, "learning_rate": 0.0009999822844794005, "loss": 1.0065, "step": 4035 }, { "epoch": 0.20509956720945285, "grad_norm": 0.5227791347356591, "learning_rate": 0.000999980370626817, "loss": 0.8278, "step": 4040 }, { "epoch": 0.20535340330748436, "grad_norm": 0.16651871729788892, "learning_rate": 0.00099997835863058, "loss": 0.8614, "step": 4045 }, { "epoch": 0.20560723940551587, "grad_norm": 0.18221807327257578, "learning_rate": 0.0009999762484910846, "loss": 0.8153, "step": 4050 }, { "epoch": 0.20586107550354735, "grad_norm": 0.10310087152591199, "learning_rate": 0.0009999740402087452, "loss": 0.8029, "step": 4055 }, { "epoch": 0.20611491160157885, "grad_norm": 0.0703395551608927, "learning_rate": 0.0009999717337839948, "loss": 0.7577, "step": 4060 }, { "epoch": 0.20636874769961036, "grad_norm": 0.08012644608333368, "learning_rate": 0.0009999693292172865, "loss": 0.7503, "step": 4065 }, { "epoch": 0.20662258379764187, "grad_norm": 0.06287680300777991, "learning_rate": 0.0009999668265090924, "loss": 0.7382, "step": 4070 }, { "epoch": 0.20687641989567337, "grad_norm": 0.05354965471224598, "learning_rate": 0.0009999642256599034, "loss": 0.7085, "step": 4075 }, { "epoch": 0.20713025599370485, "grad_norm": 0.0482492491840279, "learning_rate": 0.0009999615266702302, "loss": 0.7315, "step": 4080 }, { "epoch": 0.20738409209173636, "grad_norm": 0.05181020090220497, "learning_rate": 0.0009999587295406026, "loss": 0.7227, "step": 4085 }, { "epoch": 0.20763792818976787, "grad_norm": 0.040021587065134735, "learning_rate": 0.00099995583427157, "loss": 0.7407, "step": 4090 }, { "epoch": 0.20789176428779937, "grad_norm": 0.04000165166533322, "learning_rate": 0.0009999528408637, "loss": 0.7042, "step": 4095 }, { "epoch": 0.20814560038583088, "grad_norm": 0.03943135892336847, "learning_rate": 0.0009999497493175808, "loss": 0.7267, "step": 4100 }, { "epoch": 0.20839943648386236, "grad_norm": 0.0634943990185323, "learning_rate": 0.0009999465596338191, "loss": 0.6844, "step": 4105 }, { "epoch": 0.20865327258189387, "grad_norm": 0.053343219962136215, "learning_rate": 0.000999943271813041, "loss": 0.6986, "step": 4110 }, { "epoch": 0.20890710867992537, "grad_norm": 0.051181086485765775, "learning_rate": 0.0009999398858558917, "loss": 0.7183, "step": 4115 }, { "epoch": 0.20916094477795688, "grad_norm": 0.057765287845386676, "learning_rate": 0.0009999364017630361, "loss": 0.7451, "step": 4120 }, { "epoch": 0.2094147808759884, "grad_norm": 0.564887138188438, "learning_rate": 0.0009999328195351579, "loss": 0.7099, "step": 4125 }, { "epoch": 0.20966861697401987, "grad_norm": 0.04854697031445982, "learning_rate": 0.0009999291391729606, "loss": 0.7045, "step": 4130 }, { "epoch": 0.20992245307205137, "grad_norm": 0.06786359137742107, "learning_rate": 0.0009999253606771661, "loss": 0.7074, "step": 4135 }, { "epoch": 0.21017628917008288, "grad_norm": 0.07218662791060168, "learning_rate": 0.0009999214840485167, "loss": 0.6902, "step": 4140 }, { "epoch": 0.2104301252681144, "grad_norm": 0.1208003703070213, "learning_rate": 0.000999917509287773, "loss": 0.7087, "step": 4145 }, { "epoch": 0.21068396136614587, "grad_norm": 0.05878145753992489, "learning_rate": 0.0009999134363957152, "loss": 0.674, "step": 4150 }, { "epoch": 0.21093779746417737, "grad_norm": 0.046846061065139784, "learning_rate": 0.0009999092653731432, "loss": 0.7174, "step": 4155 }, { "epoch": 0.21119163356220888, "grad_norm": 0.06346911438232657, "learning_rate": 0.0009999049962208751, "loss": 0.708, "step": 4160 }, { "epoch": 0.2114454696602404, "grad_norm": 0.05215116911139721, "learning_rate": 0.0009999006289397494, "loss": 0.7164, "step": 4165 }, { "epoch": 0.2116993057582719, "grad_norm": 0.04940387903306439, "learning_rate": 0.0009998961635306234, "loss": 0.7235, "step": 4170 }, { "epoch": 0.21195314185630337, "grad_norm": 0.05729960864593158, "learning_rate": 0.0009998915999943733, "loss": 0.655, "step": 4175 }, { "epoch": 0.21220697795433488, "grad_norm": 0.03918310878918639, "learning_rate": 0.0009998869383318952, "loss": 0.6842, "step": 4180 }, { "epoch": 0.2124608140523664, "grad_norm": 0.06071981733933351, "learning_rate": 0.0009998821785441039, "loss": 0.7281, "step": 4185 }, { "epoch": 0.2127146501503979, "grad_norm": 0.04728543724507953, "learning_rate": 0.000999877320631934, "loss": 0.684, "step": 4190 }, { "epoch": 0.2129684862484294, "grad_norm": 0.0404988884730302, "learning_rate": 0.0009998723645963388, "loss": 0.7254, "step": 4195 }, { "epoch": 0.21322232234646088, "grad_norm": 0.047811829648568324, "learning_rate": 0.0009998673104382912, "loss": 0.6826, "step": 4200 }, { "epoch": 0.2134761584444924, "grad_norm": 0.049104308173568804, "learning_rate": 0.0009998621581587836, "loss": 0.7314, "step": 4205 }, { "epoch": 0.2137299945425239, "grad_norm": 0.03823024902121797, "learning_rate": 0.000999856907758827, "loss": 0.72, "step": 4210 }, { "epoch": 0.2139838306405554, "grad_norm": 0.057874634046649116, "learning_rate": 0.0009998515592394524, "loss": 0.713, "step": 4215 }, { "epoch": 0.2142376667385869, "grad_norm": 0.07434713454091414, "learning_rate": 0.0009998461126017094, "loss": 0.7125, "step": 4220 }, { "epoch": 0.2144915028366184, "grad_norm": 0.06272529032155742, "learning_rate": 0.0009998405678466671, "loss": 0.6889, "step": 4225 }, { "epoch": 0.2147453389346499, "grad_norm": 0.033595008050920305, "learning_rate": 0.0009998349249754142, "loss": 0.6796, "step": 4230 }, { "epoch": 0.2149991750326814, "grad_norm": 0.04029569391079619, "learning_rate": 0.0009998291839890582, "loss": 0.7028, "step": 4235 }, { "epoch": 0.2152530111307129, "grad_norm": 0.056686671703328986, "learning_rate": 0.000999823344888726, "loss": 0.7324, "step": 4240 }, { "epoch": 0.21550684722874439, "grad_norm": 0.06609484871358738, "learning_rate": 0.0009998174076755637, "loss": 0.6927, "step": 4245 }, { "epoch": 0.2157606833267759, "grad_norm": 0.03952252087967003, "learning_rate": 0.000999811372350737, "loss": 0.6872, "step": 4250 }, { "epoch": 0.2160145194248074, "grad_norm": 0.0421816140235618, "learning_rate": 0.0009998052389154303, "loss": 0.6629, "step": 4255 }, { "epoch": 0.2162683555228389, "grad_norm": 0.0392009074437296, "learning_rate": 0.0009997990073708479, "loss": 0.6661, "step": 4260 }, { "epoch": 0.2165221916208704, "grad_norm": 0.04466501671098514, "learning_rate": 0.0009997926777182127, "loss": 0.6814, "step": 4265 }, { "epoch": 0.2167760277189019, "grad_norm": 0.04350807791667501, "learning_rate": 0.0009997862499587673, "loss": 0.7324, "step": 4270 }, { "epoch": 0.2170298638169334, "grad_norm": 0.04149859498803727, "learning_rate": 0.0009997797240937736, "loss": 0.6829, "step": 4275 }, { "epoch": 0.2172836999149649, "grad_norm": 0.04436311666473786, "learning_rate": 0.0009997731001245124, "loss": 0.6919, "step": 4280 }, { "epoch": 0.2175375360129964, "grad_norm": 0.08118673796209477, "learning_rate": 0.0009997663780522842, "loss": 0.6884, "step": 4285 }, { "epoch": 0.21779137211102792, "grad_norm": 1.4346902161122619, "learning_rate": 0.000999759557878408, "loss": 0.9, "step": 4290 }, { "epoch": 0.2180452082090594, "grad_norm": 0.2203188373312744, "learning_rate": 0.0009997526396042231, "loss": 0.7955, "step": 4295 }, { "epoch": 0.2182990443070909, "grad_norm": 0.10935101729406492, "learning_rate": 0.000999745623231087, "loss": 0.799, "step": 4300 }, { "epoch": 0.2185528804051224, "grad_norm": 0.08157477584763652, "learning_rate": 0.0009997385087603776, "loss": 0.7428, "step": 4305 }, { "epoch": 0.21880671650315392, "grad_norm": 0.33104745046572154, "learning_rate": 0.0009997312961934912, "loss": 0.7342, "step": 4310 }, { "epoch": 0.21906055260118543, "grad_norm": 0.11235803791274827, "learning_rate": 0.000999723985531843, "loss": 0.7372, "step": 4315 }, { "epoch": 0.2193143886992169, "grad_norm": 0.5110586261632338, "learning_rate": 0.0009997165767768692, "loss": 0.7387, "step": 4320 }, { "epoch": 0.2195682247972484, "grad_norm": 0.12206962291351009, "learning_rate": 0.000999709069930023, "loss": 0.7563, "step": 4325 }, { "epoch": 0.21982206089527992, "grad_norm": 0.08087926540403238, "learning_rate": 0.0009997014649927786, "loss": 0.729, "step": 4330 }, { "epoch": 0.22007589699331143, "grad_norm": 0.14194129116961565, "learning_rate": 0.0009996937619666287, "loss": 0.763, "step": 4335 }, { "epoch": 0.22032973309134293, "grad_norm": 0.05593580754563041, "learning_rate": 0.0009996859608530852, "loss": 0.738, "step": 4340 }, { "epoch": 0.2205835691893744, "grad_norm": 0.05744534028157504, "learning_rate": 0.0009996780616536795, "loss": 0.7737, "step": 4345 }, { "epoch": 0.22083740528740592, "grad_norm": 0.1748737919710415, "learning_rate": 0.0009996700643699623, "loss": 0.8663, "step": 4350 }, { "epoch": 0.22109124138543743, "grad_norm": 0.21064202815185434, "learning_rate": 0.0009996619690035033, "loss": 0.7204, "step": 4355 }, { "epoch": 0.22134507748346893, "grad_norm": 0.10370603131130617, "learning_rate": 0.0009996537755558915, "loss": 0.7197, "step": 4360 }, { "epoch": 0.2215989135815004, "grad_norm": 0.06885717236979047, "learning_rate": 0.0009996454840287355, "loss": 0.744, "step": 4365 }, { "epoch": 0.22185274967953192, "grad_norm": 0.07087821700739863, "learning_rate": 0.0009996370944236625, "loss": 0.7341, "step": 4370 }, { "epoch": 0.22210658577756343, "grad_norm": 0.08283414484063312, "learning_rate": 0.0009996286067423196, "loss": 0.7271, "step": 4375 }, { "epoch": 0.22236042187559493, "grad_norm": 0.03861660942385861, "learning_rate": 0.000999620020986373, "loss": 0.6911, "step": 4380 }, { "epoch": 0.22261425797362644, "grad_norm": 0.06635541968079052, "learning_rate": 0.0009996113371575075, "loss": 0.7413, "step": 4385 }, { "epoch": 0.22286809407165792, "grad_norm": 0.064436083193186, "learning_rate": 0.0009996025552574284, "loss": 0.687, "step": 4390 }, { "epoch": 0.22312193016968943, "grad_norm": 0.07447130960554259, "learning_rate": 0.000999593675287859, "loss": 0.7174, "step": 4395 }, { "epoch": 0.22337576626772093, "grad_norm": 0.04227245688144544, "learning_rate": 0.0009995846972505429, "loss": 0.7318, "step": 4400 }, { "epoch": 0.22362960236575244, "grad_norm": 0.04336627684359626, "learning_rate": 0.000999575621147242, "loss": 0.758, "step": 4405 }, { "epoch": 0.22388343846378395, "grad_norm": 0.052749435841111, "learning_rate": 0.000999566446979738, "loss": 0.7302, "step": 4410 }, { "epoch": 0.22413727456181542, "grad_norm": 0.046319696344097425, "learning_rate": 0.0009995571747498319, "loss": 0.6572, "step": 4415 }, { "epoch": 0.22439111065984693, "grad_norm": 0.036417375306608095, "learning_rate": 0.0009995478044593435, "loss": 0.7134, "step": 4420 }, { "epoch": 0.22464494675787844, "grad_norm": 0.03453022900900243, "learning_rate": 0.0009995383361101125, "loss": 0.7012, "step": 4425 }, { "epoch": 0.22489878285590995, "grad_norm": 0.07427033738679807, "learning_rate": 0.0009995287697039973, "loss": 0.7013, "step": 4430 }, { "epoch": 0.22515261895394145, "grad_norm": 0.054569394891019696, "learning_rate": 0.0009995191052428758, "loss": 0.7198, "step": 4435 }, { "epoch": 0.22540645505197293, "grad_norm": 0.0393817069157149, "learning_rate": 0.0009995093427286447, "loss": 0.6906, "step": 4440 }, { "epoch": 0.22566029115000444, "grad_norm": 0.061316789682121224, "learning_rate": 0.000999499482163221, "loss": 0.7074, "step": 4445 }, { "epoch": 0.22591412724803595, "grad_norm": 0.05219263283094663, "learning_rate": 0.00099948952354854, "loss": 0.7237, "step": 4450 }, { "epoch": 0.22616796334606745, "grad_norm": 0.041749572353213034, "learning_rate": 0.0009994794668865563, "loss": 0.7215, "step": 4455 }, { "epoch": 0.22642179944409896, "grad_norm": 0.06621048957460272, "learning_rate": 0.0009994693121792443, "loss": 0.7196, "step": 4460 }, { "epoch": 0.22667563554213044, "grad_norm": 0.04745304839755999, "learning_rate": 0.000999459059428597, "loss": 0.7145, "step": 4465 }, { "epoch": 0.22692947164016194, "grad_norm": 0.03774244254610854, "learning_rate": 0.0009994487086366272, "loss": 0.684, "step": 4470 }, { "epoch": 0.22718330773819345, "grad_norm": 0.05065779440911714, "learning_rate": 0.0009994382598053665, "loss": 0.676, "step": 4475 }, { "epoch": 0.22743714383622496, "grad_norm": 0.04660423097011398, "learning_rate": 0.0009994277129368664, "loss": 0.6876, "step": 4480 }, { "epoch": 0.22769097993425644, "grad_norm": 0.039430766510009256, "learning_rate": 0.0009994170680331968, "loss": 0.6723, "step": 4485 }, { "epoch": 0.22794481603228794, "grad_norm": 0.043461044788944345, "learning_rate": 0.0009994063250964472, "loss": 0.6599, "step": 4490 }, { "epoch": 0.22819865213031945, "grad_norm": 0.05440998730009002, "learning_rate": 0.0009993954841287266, "loss": 0.6705, "step": 4495 }, { "epoch": 0.22845248822835096, "grad_norm": 0.058972665330176754, "learning_rate": 0.000999384545132163, "loss": 0.6872, "step": 4500 }, { "epoch": 0.22870632432638247, "grad_norm": 0.07456158646652374, "learning_rate": 0.0009993735081089035, "loss": 0.6873, "step": 4505 }, { "epoch": 0.22896016042441394, "grad_norm": 0.05068153364562075, "learning_rate": 0.0009993623730611147, "loss": 0.6833, "step": 4510 }, { "epoch": 0.22921399652244545, "grad_norm": 0.06571910537543511, "learning_rate": 0.0009993511399909825, "loss": 0.6663, "step": 4515 }, { "epoch": 0.22946783262047696, "grad_norm": 0.1595236692921901, "learning_rate": 0.0009993398089007117, "loss": 0.6632, "step": 4520 }, { "epoch": 0.22972166871850846, "grad_norm": 0.057680648886969166, "learning_rate": 0.0009993283797925267, "loss": 0.6815, "step": 4525 }, { "epoch": 0.22997550481653997, "grad_norm": 0.05224803456061352, "learning_rate": 0.0009993168526686708, "loss": 0.7055, "step": 4530 }, { "epoch": 0.23022934091457145, "grad_norm": 0.03339058561257075, "learning_rate": 0.000999305227531407, "loss": 0.6897, "step": 4535 }, { "epoch": 0.23048317701260296, "grad_norm": 0.03485057296061167, "learning_rate": 0.000999293504383017, "loss": 0.6382, "step": 4540 }, { "epoch": 0.23073701311063446, "grad_norm": 0.06126014494022501, "learning_rate": 0.000999281683225802, "loss": 0.7143, "step": 4545 }, { "epoch": 0.23099084920866597, "grad_norm": 0.045158627169637415, "learning_rate": 0.0009992697640620824, "loss": 0.7128, "step": 4550 }, { "epoch": 0.23124468530669748, "grad_norm": 0.0453807537141564, "learning_rate": 0.000999257746894198, "loss": 0.7073, "step": 4555 }, { "epoch": 0.23149852140472896, "grad_norm": 0.038686364819667377, "learning_rate": 0.0009992456317245077, "loss": 0.7112, "step": 4560 }, { "epoch": 0.23175235750276046, "grad_norm": 0.04309918961411153, "learning_rate": 0.0009992334185553898, "loss": 0.7091, "step": 4565 }, { "epoch": 0.23200619360079197, "grad_norm": 0.03768367614783312, "learning_rate": 0.0009992211073892414, "loss": 0.6774, "step": 4570 }, { "epoch": 0.23226002969882348, "grad_norm": 0.05953571982775251, "learning_rate": 0.000999208698228479, "loss": 0.6822, "step": 4575 }, { "epoch": 0.23251386579685496, "grad_norm": 0.033857197586949536, "learning_rate": 0.0009991961910755392, "loss": 0.663, "step": 4580 }, { "epoch": 0.23276770189488646, "grad_norm": 0.039971596166750424, "learning_rate": 0.0009991835859328763, "loss": 0.6908, "step": 4585 }, { "epoch": 0.23302153799291797, "grad_norm": 0.050452966613271054, "learning_rate": 0.0009991708828029648, "loss": 0.642, "step": 4590 }, { "epoch": 0.23327537409094948, "grad_norm": 0.0579358422967732, "learning_rate": 0.0009991580816882983, "loss": 0.6956, "step": 4595 }, { "epoch": 0.23352921018898098, "grad_norm": 0.034608348704914735, "learning_rate": 0.00099914518259139, "loss": 0.7036, "step": 4600 }, { "epoch": 0.23378304628701246, "grad_norm": 0.04500468096828464, "learning_rate": 0.0009991321855147713, "loss": 0.7096, "step": 4605 }, { "epoch": 0.23403688238504397, "grad_norm": 0.06192464258502063, "learning_rate": 0.0009991190904609939, "loss": 0.6963, "step": 4610 }, { "epoch": 0.23429071848307548, "grad_norm": 0.036824417923052896, "learning_rate": 0.0009991058974326281, "loss": 0.6719, "step": 4615 }, { "epoch": 0.23454455458110698, "grad_norm": 0.034966580229344214, "learning_rate": 0.0009990926064322636, "loss": 0.6867, "step": 4620 }, { "epoch": 0.2347983906791385, "grad_norm": 0.03553588744447904, "learning_rate": 0.0009990792174625095, "loss": 0.6936, "step": 4625 }, { "epoch": 0.23505222677716997, "grad_norm": 0.03744697297486917, "learning_rate": 0.000999065730525994, "loss": 0.725, "step": 4630 }, { "epoch": 0.23530606287520148, "grad_norm": 0.03140251768780669, "learning_rate": 0.0009990521456253643, "loss": 0.6593, "step": 4635 }, { "epoch": 0.23555989897323298, "grad_norm": 0.039136334308997274, "learning_rate": 0.0009990384627632872, "loss": 0.6846, "step": 4640 }, { "epoch": 0.2358137350712645, "grad_norm": 0.03616571384042556, "learning_rate": 0.0009990246819424487, "loss": 0.6877, "step": 4645 }, { "epoch": 0.236067571169296, "grad_norm": 0.038208828921227095, "learning_rate": 0.0009990108031655536, "loss": 0.6841, "step": 4650 }, { "epoch": 0.23632140726732748, "grad_norm": 0.03751589202735807, "learning_rate": 0.0009989968264353265, "loss": 0.6737, "step": 4655 }, { "epoch": 0.23657524336535898, "grad_norm": 0.06015453571891426, "learning_rate": 0.0009989827517545107, "loss": 0.6753, "step": 4660 }, { "epoch": 0.2368290794633905, "grad_norm": 0.03457380624142321, "learning_rate": 0.0009989685791258693, "loss": 0.6835, "step": 4665 }, { "epoch": 0.237082915561422, "grad_norm": 0.033252457228367796, "learning_rate": 0.0009989543085521843, "loss": 0.676, "step": 4670 }, { "epoch": 0.2373367516594535, "grad_norm": 0.05093290561964852, "learning_rate": 0.0009989399400362566, "loss": 0.7078, "step": 4675 }, { "epoch": 0.23759058775748498, "grad_norm": 0.030107770564768616, "learning_rate": 0.0009989254735809068, "loss": 0.6547, "step": 4680 }, { "epoch": 0.2378444238555165, "grad_norm": 0.03606716641926515, "learning_rate": 0.000998910909188975, "loss": 0.7156, "step": 4685 }, { "epoch": 0.238098259953548, "grad_norm": 0.030847705095771165, "learning_rate": 0.0009988962468633195, "loss": 0.6556, "step": 4690 }, { "epoch": 0.2383520960515795, "grad_norm": 0.03107834681283107, "learning_rate": 0.000998881486606819, "loss": 0.6654, "step": 4695 }, { "epoch": 0.23860593214961098, "grad_norm": 0.03269129778574898, "learning_rate": 0.0009988666284223703, "loss": 0.646, "step": 4700 }, { "epoch": 0.2388597682476425, "grad_norm": 0.03326143191194153, "learning_rate": 0.0009988516723128905, "loss": 0.6804, "step": 4705 }, { "epoch": 0.239113604345674, "grad_norm": 0.032004462939776754, "learning_rate": 0.0009988366182813152, "loss": 0.6756, "step": 4710 }, { "epoch": 0.2393674404437055, "grad_norm": 0.03226471228597121, "learning_rate": 0.0009988214663305991, "loss": 0.6736, "step": 4715 }, { "epoch": 0.239621276541737, "grad_norm": 0.03050888275116761, "learning_rate": 0.000998806216463717, "loss": 0.6593, "step": 4720 }, { "epoch": 0.2398751126397685, "grad_norm": 0.06624380514348276, "learning_rate": 0.0009987908686836622, "loss": 0.666, "step": 4725 }, { "epoch": 0.2401289487378, "grad_norm": 0.04639500533331041, "learning_rate": 0.0009987754229934473, "loss": 0.6432, "step": 4730 }, { "epoch": 0.2403827848358315, "grad_norm": 0.03651599837288676, "learning_rate": 0.0009987598793961044, "loss": 0.6984, "step": 4735 }, { "epoch": 0.240636620933863, "grad_norm": 0.05906840836987052, "learning_rate": 0.0009987442378946842, "loss": 0.6734, "step": 4740 }, { "epoch": 0.24089045703189452, "grad_norm": 0.0448969230153068, "learning_rate": 0.0009987284984922576, "loss": 0.6632, "step": 4745 }, { "epoch": 0.241144293129926, "grad_norm": 0.0369703497767851, "learning_rate": 0.0009987126611919136, "loss": 0.6797, "step": 4750 }, { "epoch": 0.2413981292279575, "grad_norm": 0.03187137783090064, "learning_rate": 0.0009986967259967617, "loss": 0.6988, "step": 4755 }, { "epoch": 0.241651965325989, "grad_norm": 0.046567965068659574, "learning_rate": 0.0009986806929099291, "loss": 0.6878, "step": 4760 }, { "epoch": 0.24190580142402052, "grad_norm": 0.03459321103042977, "learning_rate": 0.0009986645619345636, "loss": 0.678, "step": 4765 }, { "epoch": 0.24215963752205202, "grad_norm": 0.03218614265241276, "learning_rate": 0.0009986483330738313, "loss": 0.6708, "step": 4770 }, { "epoch": 0.2424134736200835, "grad_norm": 0.032098655982760134, "learning_rate": 0.0009986320063309182, "loss": 0.6975, "step": 4775 }, { "epoch": 0.242667309718115, "grad_norm": 0.03147966656169076, "learning_rate": 0.0009986155817090288, "loss": 0.6792, "step": 4780 }, { "epoch": 0.24292114581614652, "grad_norm": 0.036497678142445616, "learning_rate": 0.0009985990592113873, "loss": 0.6736, "step": 4785 }, { "epoch": 0.24317498191417802, "grad_norm": 0.037003131869676374, "learning_rate": 0.000998582438841237, "loss": 0.7012, "step": 4790 }, { "epoch": 0.2434288180122095, "grad_norm": 0.02977697312157573, "learning_rate": 0.0009985657206018404, "loss": 0.6579, "step": 4795 }, { "epoch": 0.243682654110241, "grad_norm": 0.033364670469226705, "learning_rate": 0.0009985489044964792, "loss": 0.6868, "step": 4800 }, { "epoch": 0.24393649020827252, "grad_norm": 0.03611311028340616, "learning_rate": 0.0009985319905284542, "loss": 0.7035, "step": 4805 }, { "epoch": 0.24419032630630402, "grad_norm": 0.03271327971441799, "learning_rate": 0.0009985149787010857, "loss": 0.682, "step": 4810 }, { "epoch": 0.24444416240433553, "grad_norm": 0.045957037298533635, "learning_rate": 0.000998497869017713, "loss": 0.6568, "step": 4815 }, { "epoch": 0.244697998502367, "grad_norm": 0.058416697755835315, "learning_rate": 0.0009984806614816944, "loss": 0.6353, "step": 4820 }, { "epoch": 0.24495183460039852, "grad_norm": 0.04141568212799508, "learning_rate": 0.000998463356096408, "loss": 0.6997, "step": 4825 }, { "epoch": 0.24520567069843002, "grad_norm": 0.03429415259338343, "learning_rate": 0.0009984459528652508, "loss": 0.6693, "step": 4830 }, { "epoch": 0.24545950679646153, "grad_norm": 0.04246659526708136, "learning_rate": 0.0009984284517916386, "loss": 0.6749, "step": 4835 }, { "epoch": 0.24571334289449304, "grad_norm": 0.044954596862590616, "learning_rate": 0.000998410852879007, "loss": 0.6954, "step": 4840 }, { "epoch": 0.24596717899252452, "grad_norm": 0.04365653720088704, "learning_rate": 0.0009983931561308105, "loss": 0.6583, "step": 4845 }, { "epoch": 0.24622101509055602, "grad_norm": 0.04095497127619676, "learning_rate": 0.0009983753615505232, "loss": 0.6648, "step": 4850 }, { "epoch": 0.24647485118858753, "grad_norm": 0.037206051604634686, "learning_rate": 0.0009983574691416377, "loss": 0.7103, "step": 4855 }, { "epoch": 0.24672868728661904, "grad_norm": 0.033774627357303716, "learning_rate": 0.0009983394789076663, "loss": 0.6761, "step": 4860 }, { "epoch": 0.24698252338465054, "grad_norm": 0.04836628251413319, "learning_rate": 0.0009983213908521403, "loss": 0.6753, "step": 4865 }, { "epoch": 0.24723635948268202, "grad_norm": 0.05466352076606845, "learning_rate": 0.0009983032049786106, "loss": 0.6939, "step": 4870 }, { "epoch": 0.24749019558071353, "grad_norm": 0.03600417172852513, "learning_rate": 0.0009982849212906465, "loss": 0.6242, "step": 4875 }, { "epoch": 0.24774403167874504, "grad_norm": 0.03575528198191203, "learning_rate": 0.0009982665397918376, "loss": 0.6428, "step": 4880 }, { "epoch": 0.24799786777677654, "grad_norm": 0.0444096053237933, "learning_rate": 0.0009982480604857915, "loss": 0.6974, "step": 4885 }, { "epoch": 0.24825170387480805, "grad_norm": 0.0467175869465778, "learning_rate": 0.000998229483376136, "loss": 0.6542, "step": 4890 }, { "epoch": 0.24850553997283953, "grad_norm": 0.03386552164563125, "learning_rate": 0.0009982108084665177, "loss": 0.6855, "step": 4895 }, { "epoch": 0.24875937607087104, "grad_norm": 0.04919740684938901, "learning_rate": 0.0009981920357606023, "loss": 0.6631, "step": 4900 }, { "epoch": 0.24901321216890254, "grad_norm": 0.057577585211750874, "learning_rate": 0.0009981731652620746, "loss": 0.6562, "step": 4905 }, { "epoch": 0.24926704826693405, "grad_norm": 0.036406073723948565, "learning_rate": 0.0009981541969746389, "loss": 0.647, "step": 4910 }, { "epoch": 0.24952088436496553, "grad_norm": 0.05465235293761601, "learning_rate": 0.0009981351309020189, "loss": 0.6631, "step": 4915 }, { "epoch": 0.24977472046299704, "grad_norm": 0.048518701182604415, "learning_rate": 0.0009981159670479566, "loss": 0.6637, "step": 4920 }, { "epoch": 0.25002855656102857, "grad_norm": 0.0440335328009626, "learning_rate": 0.0009980967054162141, "loss": 0.6347, "step": 4925 }, { "epoch": 0.25028239265906005, "grad_norm": 0.035049011739473654, "learning_rate": 0.0009980773460105726, "loss": 0.6469, "step": 4930 }, { "epoch": 0.25053622875709153, "grad_norm": 0.030844174743583502, "learning_rate": 0.0009980578888348318, "loss": 0.6618, "step": 4935 }, { "epoch": 0.25079006485512306, "grad_norm": 0.04942299263880296, "learning_rate": 0.000998038333892811, "loss": 0.659, "step": 4940 }, { "epoch": 0.25104390095315454, "grad_norm": 0.03920588146136878, "learning_rate": 0.0009980186811883495, "loss": 0.6439, "step": 4945 }, { "epoch": 0.2512977370511861, "grad_norm": 0.037389944860020057, "learning_rate": 0.000997998930725304, "loss": 0.6883, "step": 4950 }, { "epoch": 0.25155157314921756, "grad_norm": 0.035236188832557136, "learning_rate": 0.0009979790825075522, "loss": 0.6553, "step": 4955 }, { "epoch": 0.25180540924724903, "grad_norm": 0.040826549993116246, "learning_rate": 0.0009979591365389898, "loss": 0.6896, "step": 4960 }, { "epoch": 0.25205924534528057, "grad_norm": 0.08332873012050707, "learning_rate": 0.0009979390928235323, "loss": 0.6845, "step": 4965 }, { "epoch": 0.25231308144331205, "grad_norm": 0.033440948748920125, "learning_rate": 0.000997918951365114, "loss": 0.639, "step": 4970 }, { "epoch": 0.2525669175413435, "grad_norm": 0.03031028775821842, "learning_rate": 0.0009978987121676889, "loss": 0.6361, "step": 4975 }, { "epoch": 0.25282075363937506, "grad_norm": 0.031208187381832196, "learning_rate": 0.0009978783752352294, "loss": 0.652, "step": 4980 }, { "epoch": 0.25307458973740654, "grad_norm": 0.02872166512981619, "learning_rate": 0.0009978579405717277, "loss": 0.6724, "step": 4985 }, { "epoch": 0.2533284258354381, "grad_norm": 0.03216058478323663, "learning_rate": 0.0009978374081811951, "loss": 0.6371, "step": 4990 }, { "epoch": 0.25358226193346955, "grad_norm": 0.044909528815065505, "learning_rate": 0.000997816778067662, "loss": 0.6411, "step": 4995 }, { "epoch": 0.25383609803150103, "grad_norm": 0.05288790528971391, "learning_rate": 0.0009977960502351782, "loss": 0.649, "step": 5000 }, { "epoch": 0.25408993412953257, "grad_norm": 0.053954926716690006, "learning_rate": 0.000997775224687812, "loss": 0.6916, "step": 5005 }, { "epoch": 0.25434377022756405, "grad_norm": 0.03358524756795457, "learning_rate": 0.0009977543014296516, "loss": 0.6414, "step": 5010 }, { "epoch": 0.2545976063255956, "grad_norm": 0.04116556703883807, "learning_rate": 0.0009977332804648044, "loss": 0.629, "step": 5015 }, { "epoch": 0.25485144242362706, "grad_norm": 0.0607964434321599, "learning_rate": 0.000997712161797396, "loss": 0.6169, "step": 5020 }, { "epoch": 0.25510527852165854, "grad_norm": 0.03948191550117113, "learning_rate": 0.0009976909454315727, "loss": 0.6516, "step": 5025 }, { "epoch": 0.2553591146196901, "grad_norm": 0.03816107077659965, "learning_rate": 0.0009976696313714986, "loss": 0.6208, "step": 5030 }, { "epoch": 0.25561295071772155, "grad_norm": 0.03580194239027954, "learning_rate": 0.0009976482196213578, "loss": 0.6565, "step": 5035 }, { "epoch": 0.2558667868157531, "grad_norm": 0.03359792481796965, "learning_rate": 0.0009976267101853534, "loss": 0.6612, "step": 5040 }, { "epoch": 0.25612062291378457, "grad_norm": 0.07818021673605252, "learning_rate": 0.000997605103067707, "loss": 0.6803, "step": 5045 }, { "epoch": 0.25637445901181605, "grad_norm": 0.05189617302318715, "learning_rate": 0.000997583398272661, "loss": 0.6446, "step": 5050 }, { "epoch": 0.2566282951098476, "grad_norm": 0.040877169883040965, "learning_rate": 0.000997561595804475, "loss": 0.6738, "step": 5055 }, { "epoch": 0.25688213120787906, "grad_norm": 0.03223264560242032, "learning_rate": 0.0009975396956674292, "loss": 0.6671, "step": 5060 }, { "epoch": 0.2571359673059106, "grad_norm": 0.0293074841987577, "learning_rate": 0.0009975176978658223, "loss": 0.6393, "step": 5065 }, { "epoch": 0.2573898034039421, "grad_norm": 0.029454580799978237, "learning_rate": 0.0009974956024039723, "loss": 0.668, "step": 5070 }, { "epoch": 0.25764363950197355, "grad_norm": 0.036264462292175975, "learning_rate": 0.0009974734092862167, "loss": 0.6323, "step": 5075 }, { "epoch": 0.2578974756000051, "grad_norm": 0.03401180337813072, "learning_rate": 0.0009974511185169119, "loss": 0.6179, "step": 5080 }, { "epoch": 0.25815131169803657, "grad_norm": 0.035116697500188865, "learning_rate": 0.0009974287301004333, "loss": 0.6568, "step": 5085 }, { "epoch": 0.2584051477960681, "grad_norm": 0.032907495214095854, "learning_rate": 0.0009974062440411754, "loss": 0.6608, "step": 5090 }, { "epoch": 0.2586589838940996, "grad_norm": 0.03230228212232674, "learning_rate": 0.0009973836603435525, "loss": 0.6692, "step": 5095 }, { "epoch": 0.25891281999213106, "grad_norm": 0.036175243285705705, "learning_rate": 0.0009973609790119974, "loss": 0.6115, "step": 5100 }, { "epoch": 0.2591666560901626, "grad_norm": 0.04087747325856648, "learning_rate": 0.0009973382000509627, "loss": 0.6158, "step": 5105 }, { "epoch": 0.2594204921881941, "grad_norm": 0.05131461463304514, "learning_rate": 0.0009973153234649195, "loss": 0.6733, "step": 5110 }, { "epoch": 0.2596743282862256, "grad_norm": 0.042101949951047324, "learning_rate": 0.0009972923492583582, "loss": 0.6649, "step": 5115 }, { "epoch": 0.2599281643842571, "grad_norm": 0.032414502454393444, "learning_rate": 0.0009972692774357888, "loss": 0.6502, "step": 5120 }, { "epoch": 0.26018200048228857, "grad_norm": 0.031594756495697617, "learning_rate": 0.0009972461080017404, "loss": 0.6269, "step": 5125 }, { "epoch": 0.2604358365803201, "grad_norm": 0.029578356553082198, "learning_rate": 0.0009972228409607605, "loss": 0.658, "step": 5130 }, { "epoch": 0.2606896726783516, "grad_norm": 0.04180221452673936, "learning_rate": 0.0009971994763174165, "loss": 0.6404, "step": 5135 }, { "epoch": 0.2609435087763831, "grad_norm": 0.034100422332999716, "learning_rate": 0.0009971760140762948, "loss": 0.6475, "step": 5140 }, { "epoch": 0.2611973448744146, "grad_norm": 0.04081751959411303, "learning_rate": 0.0009971524542420013, "loss": 0.64, "step": 5145 }, { "epoch": 0.2614511809724461, "grad_norm": 0.028872945512903075, "learning_rate": 0.00099712879681916, "loss": 0.6393, "step": 5150 }, { "epoch": 0.2617050170704776, "grad_norm": 0.035240956930609156, "learning_rate": 0.0009971050418124152, "loss": 0.6333, "step": 5155 }, { "epoch": 0.2619588531685091, "grad_norm": 0.044136885746740954, "learning_rate": 0.0009970811892264298, "loss": 0.635, "step": 5160 }, { "epoch": 0.2622126892665406, "grad_norm": 0.0459739244555897, "learning_rate": 0.0009970572390658858, "loss": 0.6544, "step": 5165 }, { "epoch": 0.2624665253645721, "grad_norm": 0.04065361653486883, "learning_rate": 0.0009970331913354846, "loss": 0.6466, "step": 5170 }, { "epoch": 0.2627203614626036, "grad_norm": 0.06797018866692801, "learning_rate": 0.0009970090460399467, "loss": 0.635, "step": 5175 }, { "epoch": 0.2629741975606351, "grad_norm": 0.037934420949089415, "learning_rate": 0.0009969848031840117, "loss": 0.6785, "step": 5180 }, { "epoch": 0.2632280336586666, "grad_norm": 0.041773499604102336, "learning_rate": 0.000996960462772438, "loss": 0.6269, "step": 5185 }, { "epoch": 0.2634818697566981, "grad_norm": 0.03264370334223641, "learning_rate": 0.000996936024810004, "loss": 0.6399, "step": 5190 }, { "epoch": 0.2637357058547296, "grad_norm": 0.05100248961195995, "learning_rate": 0.0009969114893015065, "loss": 0.6499, "step": 5195 }, { "epoch": 0.2639895419527611, "grad_norm": 0.052788735735977844, "learning_rate": 0.000996886856251762, "loss": 0.6809, "step": 5200 }, { "epoch": 0.2642433780507926, "grad_norm": 0.04842406245587388, "learning_rate": 0.0009968621256656051, "loss": 0.6552, "step": 5205 }, { "epoch": 0.2644972141488241, "grad_norm": 0.03292511247465923, "learning_rate": 0.0009968372975478913, "loss": 0.6661, "step": 5210 }, { "epoch": 0.2647510502468556, "grad_norm": 0.058627129552400285, "learning_rate": 0.0009968123719034934, "loss": 0.6759, "step": 5215 }, { "epoch": 0.2650048863448871, "grad_norm": 0.04585833544931062, "learning_rate": 0.0009967873487373045, "loss": 0.6838, "step": 5220 }, { "epoch": 0.2652587224429186, "grad_norm": 0.06213801760761045, "learning_rate": 0.0009967622280542365, "loss": 0.686, "step": 5225 }, { "epoch": 0.2655125585409501, "grad_norm": 0.0465036803469171, "learning_rate": 0.0009967370098592206, "loss": 0.6789, "step": 5230 }, { "epoch": 0.2657663946389816, "grad_norm": 0.036623256236247154, "learning_rate": 0.000996711694157207, "loss": 0.6537, "step": 5235 }, { "epoch": 0.2660202307370131, "grad_norm": 0.036813613833440686, "learning_rate": 0.0009966862809531647, "loss": 0.6605, "step": 5240 }, { "epoch": 0.2662740668350446, "grad_norm": 0.033104325193404505, "learning_rate": 0.0009966607702520825, "loss": 0.6667, "step": 5245 }, { "epoch": 0.2665279029330761, "grad_norm": 0.03616645613143295, "learning_rate": 0.0009966351620589679, "loss": 0.6442, "step": 5250 }, { "epoch": 0.26678173903110763, "grad_norm": 0.1824434683613809, "learning_rate": 0.0009966094563788478, "loss": 0.7154, "step": 5255 }, { "epoch": 0.2670355751291391, "grad_norm": 0.08584590008440728, "learning_rate": 0.0009965836532167679, "loss": 0.7169, "step": 5260 }, { "epoch": 0.2672894112271706, "grad_norm": 0.08992314008060993, "learning_rate": 0.0009965577525777934, "loss": 0.7042, "step": 5265 }, { "epoch": 0.2675432473252021, "grad_norm": 0.05488924202012461, "learning_rate": 0.0009965317544670083, "loss": 0.6752, "step": 5270 }, { "epoch": 0.2677970834232336, "grad_norm": 0.045608215425613115, "learning_rate": 0.000996505658889516, "loss": 0.6711, "step": 5275 }, { "epoch": 0.26805091952126514, "grad_norm": 0.04073278787103573, "learning_rate": 0.000996479465850439, "loss": 0.6915, "step": 5280 }, { "epoch": 0.2683047556192966, "grad_norm": 0.04291579060984302, "learning_rate": 0.000996453175354919, "loss": 0.6813, "step": 5285 }, { "epoch": 0.2685585917173281, "grad_norm": 0.037038830744694835, "learning_rate": 0.000996426787408116, "loss": 0.7183, "step": 5290 }, { "epoch": 0.26881242781535963, "grad_norm": 0.03993617745746424, "learning_rate": 0.0009964003020152107, "loss": 0.7023, "step": 5295 }, { "epoch": 0.2690662639133911, "grad_norm": 0.043203555024935315, "learning_rate": 0.0009963737191814015, "loss": 0.6355, "step": 5300 }, { "epoch": 0.26932010001142265, "grad_norm": 0.03774583913684898, "learning_rate": 0.0009963470389119068, "loss": 0.6524, "step": 5305 }, { "epoch": 0.2695739361094541, "grad_norm": 0.03395726169338956, "learning_rate": 0.0009963202612119635, "loss": 0.6512, "step": 5310 }, { "epoch": 0.2698277722074856, "grad_norm": 0.043530029342712495, "learning_rate": 0.000996293386086828, "loss": 0.6618, "step": 5315 }, { "epoch": 0.27008160830551714, "grad_norm": 0.03403180797982201, "learning_rate": 0.0009962664135417761, "loss": 0.6811, "step": 5320 }, { "epoch": 0.2703354444035486, "grad_norm": 0.04666110465558761, "learning_rate": 0.0009962393435821017, "loss": 0.6615, "step": 5325 }, { "epoch": 0.27058928050158015, "grad_norm": 0.032556938435121995, "learning_rate": 0.0009962121762131192, "loss": 0.6395, "step": 5330 }, { "epoch": 0.27084311659961163, "grad_norm": 0.04419252183394335, "learning_rate": 0.0009961849114401612, "loss": 0.6119, "step": 5335 }, { "epoch": 0.2710969526976431, "grad_norm": 0.035595422315825305, "learning_rate": 0.0009961575492685793, "loss": 0.6484, "step": 5340 }, { "epoch": 0.27135078879567465, "grad_norm": 0.02926242788618824, "learning_rate": 0.0009961300897037449, "loss": 0.6407, "step": 5345 }, { "epoch": 0.2716046248937061, "grad_norm": 0.03602952233417284, "learning_rate": 0.000996102532751048, "loss": 0.6641, "step": 5350 }, { "epoch": 0.27185846099173766, "grad_norm": 0.045258498714948955, "learning_rate": 0.000996074878415898, "loss": 0.6636, "step": 5355 }, { "epoch": 0.27211229708976914, "grad_norm": 0.055406038005293826, "learning_rate": 0.0009960471267037234, "loss": 0.6462, "step": 5360 }, { "epoch": 0.2723661331878006, "grad_norm": 0.05518321218015624, "learning_rate": 0.0009960192776199716, "loss": 0.6265, "step": 5365 }, { "epoch": 0.27261996928583215, "grad_norm": 0.03701471191581659, "learning_rate": 0.0009959913311701092, "loss": 0.6311, "step": 5370 }, { "epoch": 0.27287380538386363, "grad_norm": 0.04247484585566076, "learning_rate": 0.000995963287359622, "loss": 0.663, "step": 5375 }, { "epoch": 0.27312764148189517, "grad_norm": 0.03805986357905345, "learning_rate": 0.0009959351461940149, "loss": 0.6367, "step": 5380 }, { "epoch": 0.27338147757992665, "grad_norm": 0.041801835611640255, "learning_rate": 0.0009959069076788118, "loss": 0.6948, "step": 5385 }, { "epoch": 0.2736353136779581, "grad_norm": 0.034657777527560114, "learning_rate": 0.0009958785718195559, "loss": 0.6829, "step": 5390 }, { "epoch": 0.27388914977598966, "grad_norm": 0.18006607610520042, "learning_rate": 0.000995850138621809, "loss": 0.6177, "step": 5395 }, { "epoch": 0.27414298587402114, "grad_norm": 0.10543857125691686, "learning_rate": 0.0009958216080911528, "loss": 0.6223, "step": 5400 }, { "epoch": 0.2743968219720526, "grad_norm": 0.0900614057284698, "learning_rate": 0.0009957929802331877, "loss": 0.6623, "step": 5405 }, { "epoch": 0.27465065807008415, "grad_norm": 0.04819979788344021, "learning_rate": 0.000995764255053533, "loss": 0.7131, "step": 5410 }, { "epoch": 0.27490449416811563, "grad_norm": 0.03076997581248242, "learning_rate": 0.0009957354325578276, "loss": 0.6477, "step": 5415 }, { "epoch": 0.27515833026614717, "grad_norm": 0.041385162650395144, "learning_rate": 0.000995706512751729, "loss": 0.6565, "step": 5420 }, { "epoch": 0.27541216636417865, "grad_norm": 0.03721380433063758, "learning_rate": 0.0009956774956409139, "loss": 0.6774, "step": 5425 }, { "epoch": 0.2756660024622101, "grad_norm": 0.0354007938770567, "learning_rate": 0.0009956483812310782, "loss": 0.6286, "step": 5430 }, { "epoch": 0.27591983856024166, "grad_norm": 0.04494906800664398, "learning_rate": 0.0009956191695279374, "loss": 0.6499, "step": 5435 }, { "epoch": 0.27617367465827314, "grad_norm": 0.06723158886677959, "learning_rate": 0.0009955898605372249, "loss": 0.6385, "step": 5440 }, { "epoch": 0.2764275107563047, "grad_norm": 0.04401273742367783, "learning_rate": 0.0009955604542646946, "loss": 0.7135, "step": 5445 }, { "epoch": 0.27668134685433615, "grad_norm": 0.05565588713522223, "learning_rate": 0.0009955309507161184, "loss": 0.6492, "step": 5450 }, { "epoch": 0.27693518295236763, "grad_norm": 0.0719042454362043, "learning_rate": 0.0009955013498972876, "loss": 0.6936, "step": 5455 }, { "epoch": 0.27718901905039917, "grad_norm": 0.03740121789307142, "learning_rate": 0.000995471651814013, "loss": 0.6424, "step": 5460 }, { "epoch": 0.27744285514843064, "grad_norm": 0.03850759086931981, "learning_rate": 0.0009954418564721242, "loss": 0.6759, "step": 5465 }, { "epoch": 0.2776966912464622, "grad_norm": 0.036923309271708, "learning_rate": 0.0009954119638774695, "loss": 0.6807, "step": 5470 }, { "epoch": 0.27795052734449366, "grad_norm": 0.042899936320904095, "learning_rate": 0.000995381974035917, "loss": 0.6874, "step": 5475 }, { "epoch": 0.27820436344252514, "grad_norm": 0.0784299633425732, "learning_rate": 0.0009953518869533536, "loss": 0.6906, "step": 5480 }, { "epoch": 0.2784581995405567, "grad_norm": 0.05844919432805366, "learning_rate": 0.0009953217026356848, "loss": 0.6793, "step": 5485 }, { "epoch": 0.27871203563858815, "grad_norm": 0.040898154688727616, "learning_rate": 0.0009952914210888363, "loss": 0.6796, "step": 5490 }, { "epoch": 0.2789658717366197, "grad_norm": 0.056560854563487195, "learning_rate": 0.0009952610423187517, "loss": 0.6589, "step": 5495 }, { "epoch": 0.27921970783465117, "grad_norm": 0.07111930095820192, "learning_rate": 0.0009952305663313943, "loss": 0.665, "step": 5500 }, { "epoch": 0.27947354393268264, "grad_norm": 0.06949742084914043, "learning_rate": 0.0009951999931327464, "loss": 0.6705, "step": 5505 }, { "epoch": 0.2797273800307142, "grad_norm": 0.05060283874594893, "learning_rate": 0.0009951693227288096, "loss": 0.7088, "step": 5510 }, { "epoch": 0.27998121612874566, "grad_norm": 0.03589194724274505, "learning_rate": 0.0009951385551256041, "loss": 0.6602, "step": 5515 }, { "epoch": 0.2802350522267772, "grad_norm": 0.0327414416620521, "learning_rate": 0.0009951076903291693, "loss": 0.6571, "step": 5520 }, { "epoch": 0.28048888832480867, "grad_norm": 0.052789013483981816, "learning_rate": 0.000995076728345564, "loss": 0.6725, "step": 5525 }, { "epoch": 0.28074272442284015, "grad_norm": 0.05677596579461743, "learning_rate": 0.000995045669180866, "loss": 0.6413, "step": 5530 }, { "epoch": 0.2809965605208717, "grad_norm": 0.03745942908058603, "learning_rate": 0.000995014512841172, "loss": 0.6909, "step": 5535 }, { "epoch": 0.28125039661890316, "grad_norm": 0.14627372454354676, "learning_rate": 0.0009949832593325978, "loss": 0.6459, "step": 5540 }, { "epoch": 0.2815042327169347, "grad_norm": 0.04133886420786463, "learning_rate": 0.000994951908661278, "loss": 0.6771, "step": 5545 }, { "epoch": 0.2817580688149662, "grad_norm": 0.038383767135896224, "learning_rate": 0.0009949204608333672, "loss": 0.6659, "step": 5550 }, { "epoch": 0.28201190491299766, "grad_norm": 0.03870804386622496, "learning_rate": 0.0009948889158550376, "loss": 0.6628, "step": 5555 }, { "epoch": 0.2822657410110292, "grad_norm": 0.03666141726099845, "learning_rate": 0.0009948572737324822, "loss": 0.6783, "step": 5560 }, { "epoch": 0.28251957710906067, "grad_norm": 0.13133641642519978, "learning_rate": 0.0009948255344719118, "loss": 0.675, "step": 5565 }, { "epoch": 0.2827734132070922, "grad_norm": 0.12174050657136312, "learning_rate": 0.0009947936980795565, "loss": 0.7195, "step": 5570 }, { "epoch": 0.2830272493051237, "grad_norm": 0.058601197176472185, "learning_rate": 0.000994761764561666, "loss": 0.6784, "step": 5575 }, { "epoch": 0.28328108540315516, "grad_norm": 0.16508393923966153, "learning_rate": 0.0009947297339245084, "loss": 0.7202, "step": 5580 }, { "epoch": 0.2835349215011867, "grad_norm": 0.0766947212292105, "learning_rate": 0.0009946976061743712, "loss": 0.6596, "step": 5585 }, { "epoch": 0.2837887575992182, "grad_norm": 0.03340983687623817, "learning_rate": 0.000994665381317561, "loss": 0.6599, "step": 5590 }, { "epoch": 0.2840425936972497, "grad_norm": 0.037573302023749436, "learning_rate": 0.0009946330593604033, "loss": 0.6594, "step": 5595 }, { "epoch": 0.2842964297952812, "grad_norm": 0.03926723139173781, "learning_rate": 0.000994600640309243, "loss": 0.6436, "step": 5600 }, { "epoch": 0.28455026589331267, "grad_norm": 0.033758520131084185, "learning_rate": 0.0009945681241704434, "loss": 0.6706, "step": 5605 }, { "epoch": 0.2848041019913442, "grad_norm": 0.035833106887649714, "learning_rate": 0.0009945355109503872, "loss": 0.6757, "step": 5610 }, { "epoch": 0.2850579380893757, "grad_norm": 0.03439574893245049, "learning_rate": 0.0009945028006554768, "loss": 0.674, "step": 5615 }, { "epoch": 0.2853117741874072, "grad_norm": 0.04115472629656225, "learning_rate": 0.0009944699932921326, "loss": 0.6901, "step": 5620 }, { "epoch": 0.2855656102854387, "grad_norm": 0.042426909272189464, "learning_rate": 0.0009944370888667947, "loss": 0.6709, "step": 5625 }, { "epoch": 0.2858194463834702, "grad_norm": 0.05644132704585222, "learning_rate": 0.0009944040873859218, "loss": 0.6765, "step": 5630 }, { "epoch": 0.2860732824815017, "grad_norm": 0.050269965909692245, "learning_rate": 0.0009943709888559922, "loss": 0.6463, "step": 5635 }, { "epoch": 0.2863271185795332, "grad_norm": 0.03990106543413605, "learning_rate": 0.000994337793283503, "loss": 0.6788, "step": 5640 }, { "epoch": 0.28658095467756467, "grad_norm": 0.039426229153061605, "learning_rate": 0.0009943045006749703, "loss": 0.6477, "step": 5645 }, { "epoch": 0.2868347907755962, "grad_norm": 0.037701616134469464, "learning_rate": 0.0009942711110369291, "loss": 0.6609, "step": 5650 }, { "epoch": 0.2870886268736277, "grad_norm": 0.036088987975116615, "learning_rate": 0.0009942376243759336, "loss": 0.6237, "step": 5655 }, { "epoch": 0.2873424629716592, "grad_norm": 0.039086804987042645, "learning_rate": 0.0009942040406985574, "loss": 0.6313, "step": 5660 }, { "epoch": 0.2875962990696907, "grad_norm": 0.05039912347585908, "learning_rate": 0.0009941703600113926, "loss": 0.6786, "step": 5665 }, { "epoch": 0.2878501351677222, "grad_norm": 0.05153135663276811, "learning_rate": 0.0009941365823210506, "loss": 0.6796, "step": 5670 }, { "epoch": 0.2881039712657537, "grad_norm": 0.048537339426450846, "learning_rate": 0.0009941027076341615, "loss": 0.6335, "step": 5675 }, { "epoch": 0.2883578073637852, "grad_norm": 0.05231418629671102, "learning_rate": 0.0009940687359573752, "loss": 0.689, "step": 5680 }, { "epoch": 0.2886116434618167, "grad_norm": 0.03179751321808205, "learning_rate": 0.00099403466729736, "loss": 0.6398, "step": 5685 }, { "epoch": 0.2888654795598482, "grad_norm": 0.056582858417932216, "learning_rate": 0.000994000501660803, "loss": 0.6721, "step": 5690 }, { "epoch": 0.2891193156578797, "grad_norm": 0.02974455078712651, "learning_rate": 0.0009939662390544115, "loss": 0.6327, "step": 5695 }, { "epoch": 0.2893731517559112, "grad_norm": 0.05164849531976483, "learning_rate": 0.0009939318794849104, "loss": 0.6492, "step": 5700 }, { "epoch": 0.2896269878539427, "grad_norm": 0.03957457040909659, "learning_rate": 0.0009938974229590446, "loss": 0.6436, "step": 5705 }, { "epoch": 0.28988082395197423, "grad_norm": 0.04242341712591207, "learning_rate": 0.000993862869483578, "loss": 0.6644, "step": 5710 }, { "epoch": 0.2901346600500057, "grad_norm": 0.1372690604293506, "learning_rate": 0.0009938282190652928, "loss": 0.681, "step": 5715 }, { "epoch": 0.2903884961480372, "grad_norm": 0.06712065575391647, "learning_rate": 0.0009937934717109912, "loss": 0.6436, "step": 5720 }, { "epoch": 0.2906423322460687, "grad_norm": 0.0657319128974874, "learning_rate": 0.0009937586274274932, "loss": 0.646, "step": 5725 }, { "epoch": 0.2908961683441002, "grad_norm": 0.032380117007307335, "learning_rate": 0.0009937236862216391, "loss": 0.6722, "step": 5730 }, { "epoch": 0.29115000444213174, "grad_norm": 0.051361069870512616, "learning_rate": 0.0009936886481002878, "loss": 0.6756, "step": 5735 }, { "epoch": 0.2914038405401632, "grad_norm": 0.03396960893164104, "learning_rate": 0.0009936535130703169, "loss": 0.6785, "step": 5740 }, { "epoch": 0.2916576766381947, "grad_norm": 0.03936833237945123, "learning_rate": 0.0009936182811386232, "loss": 0.6588, "step": 5745 }, { "epoch": 0.29191151273622623, "grad_norm": 0.05198925092916733, "learning_rate": 0.0009935829523121224, "loss": 0.6675, "step": 5750 }, { "epoch": 0.2921653488342577, "grad_norm": 0.06085607860536531, "learning_rate": 0.0009935475265977498, "loss": 0.6286, "step": 5755 }, { "epoch": 0.29241918493228924, "grad_norm": 0.059359860916179744, "learning_rate": 0.0009935120040024587, "loss": 0.6369, "step": 5760 }, { "epoch": 0.2926730210303207, "grad_norm": 0.039118870920788806, "learning_rate": 0.0009934763845332228, "loss": 0.6544, "step": 5765 }, { "epoch": 0.2929268571283522, "grad_norm": 0.04229042882872178, "learning_rate": 0.0009934406681970332, "loss": 0.6304, "step": 5770 }, { "epoch": 0.29318069322638374, "grad_norm": 0.026802917169532255, "learning_rate": 0.0009934048550009015, "loss": 0.6143, "step": 5775 }, { "epoch": 0.2934345293244152, "grad_norm": 0.03201916028903979, "learning_rate": 0.0009933689449518573, "loss": 0.6586, "step": 5780 }, { "epoch": 0.29368836542244675, "grad_norm": 0.03780618180915201, "learning_rate": 0.0009933329380569494, "loss": 0.6688, "step": 5785 }, { "epoch": 0.29394220152047823, "grad_norm": 0.04436936096005898, "learning_rate": 0.0009932968343232462, "loss": 0.6441, "step": 5790 }, { "epoch": 0.2941960376185097, "grad_norm": 0.05967133531857773, "learning_rate": 0.0009932606337578346, "loss": 0.6466, "step": 5795 }, { "epoch": 0.29444987371654124, "grad_norm": 0.05591640331892786, "learning_rate": 0.0009932243363678203, "loss": 0.6458, "step": 5800 }, { "epoch": 0.2947037098145727, "grad_norm": 0.0350539399039161, "learning_rate": 0.0009931879421603285, "loss": 0.6694, "step": 5805 }, { "epoch": 0.29495754591260426, "grad_norm": 0.047049178898087726, "learning_rate": 0.0009931514511425032, "loss": 0.628, "step": 5810 }, { "epoch": 0.29521138201063574, "grad_norm": 0.06870970965553135, "learning_rate": 0.0009931148633215074, "loss": 0.6383, "step": 5815 }, { "epoch": 0.2954652181086672, "grad_norm": 0.04607237701564132, "learning_rate": 0.000993078178704523, "loss": 0.6395, "step": 5820 }, { "epoch": 0.29571905420669875, "grad_norm": 0.04293642734855891, "learning_rate": 0.0009930413972987513, "loss": 0.6509, "step": 5825 }, { "epoch": 0.29597289030473023, "grad_norm": 0.04186066507422814, "learning_rate": 0.000993004519111412, "loss": 0.689, "step": 5830 }, { "epoch": 0.29622672640276176, "grad_norm": 0.04758901402636238, "learning_rate": 0.0009929675441497441, "loss": 0.6446, "step": 5835 }, { "epoch": 0.29648056250079324, "grad_norm": 0.02860243925691624, "learning_rate": 0.000992930472421006, "loss": 0.6412, "step": 5840 }, { "epoch": 0.2967343985988247, "grad_norm": 0.14274718702473935, "learning_rate": 0.0009928933039324741, "loss": 0.6036, "step": 5845 }, { "epoch": 0.29698823469685626, "grad_norm": 0.048711259308299386, "learning_rate": 0.0009928560386914447, "loss": 0.6508, "step": 5850 }, { "epoch": 0.29724207079488774, "grad_norm": 0.05737639269415528, "learning_rate": 0.000992818676705233, "loss": 0.6338, "step": 5855 }, { "epoch": 0.2974959068929192, "grad_norm": 0.08608374496462033, "learning_rate": 0.0009927812179811727, "loss": 0.7236, "step": 5860 }, { "epoch": 0.29774974299095075, "grad_norm": 0.0849381389952358, "learning_rate": 0.0009927436625266166, "loss": 0.6686, "step": 5865 }, { "epoch": 0.29800357908898223, "grad_norm": 1.0620336080619193, "learning_rate": 0.0009927060103489369, "loss": 0.6706, "step": 5870 }, { "epoch": 0.29825741518701376, "grad_norm": 13.801221772105595, "learning_rate": 0.0009926682614555247, "loss": 2.1038, "step": 5875 }, { "epoch": 0.29851125128504524, "grad_norm": 0.529463438355754, "learning_rate": 0.0009926304158537895, "loss": 1.9905, "step": 5880 }, { "epoch": 0.2987650873830767, "grad_norm": 0.5693873876849637, "learning_rate": 0.0009925924735511603, "loss": 0.7879, "step": 5885 }, { "epoch": 0.29901892348110826, "grad_norm": 0.15207649725469286, "learning_rate": 0.0009925544345550854, "loss": 0.7494, "step": 5890 }, { "epoch": 0.29927275957913974, "grad_norm": 0.053777419398167665, "learning_rate": 0.0009925162988730313, "loss": 0.7054, "step": 5895 }, { "epoch": 0.29952659567717127, "grad_norm": 0.11506938387205977, "learning_rate": 0.0009924780665124839, "loss": 0.6893, "step": 5900 }, { "epoch": 0.29978043177520275, "grad_norm": 0.07887804598963974, "learning_rate": 0.000992439737480948, "loss": 0.687, "step": 5905 }, { "epoch": 0.30003426787323423, "grad_norm": 0.06054971527019416, "learning_rate": 0.0009924013117859475, "loss": 0.7238, "step": 5910 }, { "epoch": 0.30028810397126576, "grad_norm": 0.04536799993236666, "learning_rate": 0.0009923627894350248, "loss": 0.7173, "step": 5915 }, { "epoch": 0.30054194006929724, "grad_norm": 0.05390447300870516, "learning_rate": 0.0009923241704357423, "loss": 0.7083, "step": 5920 }, { "epoch": 0.3007957761673288, "grad_norm": 0.05672780707635087, "learning_rate": 0.0009922854547956802, "loss": 0.6606, "step": 5925 }, { "epoch": 0.30104961226536026, "grad_norm": 0.03885747550342492, "learning_rate": 0.0009922466425224383, "loss": 0.6993, "step": 5930 }, { "epoch": 0.30130344836339173, "grad_norm": 0.07191125789862535, "learning_rate": 0.0009922077336236353, "loss": 0.6375, "step": 5935 }, { "epoch": 0.30155728446142327, "grad_norm": 0.10081257590820915, "learning_rate": 0.000992168728106909, "loss": 0.6541, "step": 5940 }, { "epoch": 0.30181112055945475, "grad_norm": 0.03547952948787697, "learning_rate": 0.0009921296259799155, "loss": 0.6846, "step": 5945 }, { "epoch": 0.3020649566574863, "grad_norm": 0.04217686067301573, "learning_rate": 0.000992090427250331, "loss": 0.6567, "step": 5950 }, { "epoch": 0.30231879275551776, "grad_norm": 0.05110904105032828, "learning_rate": 0.0009920511319258495, "loss": 0.6976, "step": 5955 }, { "epoch": 0.30257262885354924, "grad_norm": 0.03701759809254075, "learning_rate": 0.0009920117400141848, "loss": 0.67, "step": 5960 }, { "epoch": 0.3028264649515808, "grad_norm": 0.7757113792101427, "learning_rate": 0.0009919722515230691, "loss": 0.6825, "step": 5965 }, { "epoch": 0.30308030104961226, "grad_norm": 0.10831424664913561, "learning_rate": 0.0009919326664602538, "loss": 0.6896, "step": 5970 }, { "epoch": 0.3033341371476438, "grad_norm": 0.09797456779700615, "learning_rate": 0.0009918929848335095, "loss": 0.7677, "step": 5975 }, { "epoch": 0.30358797324567527, "grad_norm": 0.05159857874021976, "learning_rate": 0.0009918532066506252, "loss": 0.7122, "step": 5980 }, { "epoch": 0.30384180934370675, "grad_norm": 0.05813781521982605, "learning_rate": 0.0009918133319194093, "loss": 0.6823, "step": 5985 }, { "epoch": 0.3040956454417383, "grad_norm": 0.03671398334295547, "learning_rate": 0.000991773360647689, "loss": 0.6991, "step": 5990 }, { "epoch": 0.30434948153976976, "grad_norm": 0.07402367959365846, "learning_rate": 0.0009917332928433106, "loss": 0.6946, "step": 5995 }, { "epoch": 0.3046033176378013, "grad_norm": 0.0653670605879878, "learning_rate": 0.000991693128514139, "loss": 0.7273, "step": 6000 }, { "epoch": 0.3048571537358328, "grad_norm": 0.03813871957965758, "learning_rate": 0.0009916528676680585, "loss": 0.6652, "step": 6005 }, { "epoch": 0.30511098983386425, "grad_norm": 0.04376402030900822, "learning_rate": 0.0009916125103129718, "loss": 0.6455, "step": 6010 }, { "epoch": 0.3053648259318958, "grad_norm": 0.04963286911499542, "learning_rate": 0.000991572056456801, "loss": 0.7049, "step": 6015 }, { "epoch": 0.30561866202992727, "grad_norm": 0.030884815020089395, "learning_rate": 0.000991531506107487, "loss": 0.6823, "step": 6020 }, { "epoch": 0.3058724981279588, "grad_norm": 0.03715644515895416, "learning_rate": 0.0009914908592729896, "loss": 0.7246, "step": 6025 }, { "epoch": 0.3061263342259903, "grad_norm": 0.05308010746588163, "learning_rate": 0.0009914501159612877, "loss": 0.6453, "step": 6030 }, { "epoch": 0.30638017032402176, "grad_norm": 0.042657404727221966, "learning_rate": 0.0009914092761803789, "loss": 0.6524, "step": 6035 }, { "epoch": 0.3066340064220533, "grad_norm": 0.06017465873203829, "learning_rate": 0.0009913683399382796, "loss": 0.7004, "step": 6040 }, { "epoch": 0.3068878425200848, "grad_norm": 0.038922670583128045, "learning_rate": 0.0009913273072430257, "loss": 0.6804, "step": 6045 }, { "epoch": 0.3071416786181163, "grad_norm": 0.0648567542128592, "learning_rate": 0.0009912861781026718, "loss": 0.6558, "step": 6050 }, { "epoch": 0.3073955147161478, "grad_norm": 0.03494754440461928, "learning_rate": 0.0009912449525252911, "loss": 0.666, "step": 6055 }, { "epoch": 0.30764935081417927, "grad_norm": 0.03092884139080322, "learning_rate": 0.000991203630518976, "loss": 0.6688, "step": 6060 }, { "epoch": 0.3079031869122108, "grad_norm": 0.05872150090284288, "learning_rate": 0.0009911622120918379, "loss": 0.6385, "step": 6065 }, { "epoch": 0.3081570230102423, "grad_norm": 0.08520802622712559, "learning_rate": 0.0009911206972520068, "loss": 0.6635, "step": 6070 }, { "epoch": 0.30841085910827376, "grad_norm": 0.06879085230717281, "learning_rate": 0.0009910790860076324, "loss": 0.7711, "step": 6075 }, { "epoch": 0.3086646952063053, "grad_norm": 0.06496604434818472, "learning_rate": 0.0009910373783668823, "loss": 0.6546, "step": 6080 }, { "epoch": 0.3089185313043368, "grad_norm": 0.03788866020152313, "learning_rate": 0.0009909955743379435, "loss": 0.6574, "step": 6085 }, { "epoch": 0.3091723674023683, "grad_norm": 0.036029343391252786, "learning_rate": 0.0009909536739290221, "loss": 0.6614, "step": 6090 }, { "epoch": 0.3094262035003998, "grad_norm": 0.0357204003060438, "learning_rate": 0.0009909116771483427, "loss": 0.6273, "step": 6095 }, { "epoch": 0.30968003959843127, "grad_norm": 0.036306841185922825, "learning_rate": 0.0009908695840041496, "loss": 0.6725, "step": 6100 }, { "epoch": 0.3099338756964628, "grad_norm": 0.03660198406181924, "learning_rate": 0.000990827394504705, "loss": 0.647, "step": 6105 }, { "epoch": 0.3101877117944943, "grad_norm": 0.04095269101364938, "learning_rate": 0.0009907851086582906, "loss": 0.6755, "step": 6110 }, { "epoch": 0.3104415478925258, "grad_norm": 0.03247697430503874, "learning_rate": 0.0009907427264732069, "loss": 0.6813, "step": 6115 }, { "epoch": 0.3106953839905573, "grad_norm": 0.03162996739245832, "learning_rate": 0.0009907002479577734, "loss": 0.6643, "step": 6120 }, { "epoch": 0.3109492200885888, "grad_norm": 0.5455514035154928, "learning_rate": 0.0009906576731203282, "loss": 0.6433, "step": 6125 }, { "epoch": 0.3112030561866203, "grad_norm": 0.044476465750440224, "learning_rate": 0.0009906150019692288, "loss": 0.6653, "step": 6130 }, { "epoch": 0.3114568922846518, "grad_norm": 0.05927549810439251, "learning_rate": 0.000990572234512851, "loss": 0.6589, "step": 6135 }, { "epoch": 0.3117107283826833, "grad_norm": 0.056665905206855084, "learning_rate": 0.0009905293707595903, "loss": 0.649, "step": 6140 }, { "epoch": 0.3119645644807148, "grad_norm": 0.03331713795791355, "learning_rate": 0.0009904864107178602, "loss": 0.64, "step": 6145 }, { "epoch": 0.3122184005787463, "grad_norm": 0.04406210853179684, "learning_rate": 0.000990443354396094, "loss": 0.6702, "step": 6150 }, { "epoch": 0.3124722366767778, "grad_norm": 0.057693446080025364, "learning_rate": 0.000990400201802743, "loss": 0.7062, "step": 6155 }, { "epoch": 0.3127260727748093, "grad_norm": 0.035021125314635915, "learning_rate": 0.0009903569529462778, "loss": 0.697, "step": 6160 }, { "epoch": 0.31297990887284083, "grad_norm": 0.055650402580445305, "learning_rate": 0.0009903136078351885, "loss": 0.648, "step": 6165 }, { "epoch": 0.3132337449708723, "grad_norm": 0.06196313978890203, "learning_rate": 0.0009902701664779828, "loss": 0.6454, "step": 6170 }, { "epoch": 0.3134875810689038, "grad_norm": 0.038941666671227525, "learning_rate": 0.0009902266288831887, "loss": 0.676, "step": 6175 }, { "epoch": 0.3137414171669353, "grad_norm": 0.0415527424909999, "learning_rate": 0.000990182995059352, "loss": 0.6999, "step": 6180 }, { "epoch": 0.3139952532649668, "grad_norm": 0.032478914330314065, "learning_rate": 0.0009901392650150378, "loss": 0.6384, "step": 6185 }, { "epoch": 0.31424908936299834, "grad_norm": 0.03685841261439452, "learning_rate": 0.0009900954387588303, "loss": 0.6149, "step": 6190 }, { "epoch": 0.3145029254610298, "grad_norm": 0.0576119132892154, "learning_rate": 0.0009900515162993325, "loss": 0.6754, "step": 6195 }, { "epoch": 0.3147567615590613, "grad_norm": 0.03227069972941047, "learning_rate": 0.0009900074976451655, "loss": 0.6563, "step": 6200 }, { "epoch": 0.31501059765709283, "grad_norm": 0.03374111959001576, "learning_rate": 0.0009899633828049706, "loss": 0.6457, "step": 6205 }, { "epoch": 0.3152644337551243, "grad_norm": 0.036155101851612435, "learning_rate": 0.0009899191717874071, "loss": 0.646, "step": 6210 }, { "epoch": 0.31551826985315584, "grad_norm": 0.031824458816045474, "learning_rate": 0.0009898748646011534, "loss": 0.6559, "step": 6215 }, { "epoch": 0.3157721059511873, "grad_norm": 0.03276354958142416, "learning_rate": 0.0009898304612549068, "loss": 0.6378, "step": 6220 }, { "epoch": 0.3160259420492188, "grad_norm": 0.029188354303673416, "learning_rate": 0.0009897859617573833, "loss": 0.6496, "step": 6225 }, { "epoch": 0.31627977814725033, "grad_norm": 0.03357230960317099, "learning_rate": 0.0009897413661173182, "loss": 0.6655, "step": 6230 }, { "epoch": 0.3165336142452818, "grad_norm": 0.03530965710230692, "learning_rate": 0.0009896966743434654, "loss": 0.6709, "step": 6235 }, { "epoch": 0.31678745034331335, "grad_norm": 0.03547112168077008, "learning_rate": 0.0009896518864445974, "loss": 0.6459, "step": 6240 }, { "epoch": 0.3170412864413448, "grad_norm": 0.05701195512800427, "learning_rate": 0.0009896070024295058, "loss": 0.6933, "step": 6245 }, { "epoch": 0.3172951225393763, "grad_norm": 0.034050322178401336, "learning_rate": 0.0009895620223070013, "loss": 0.6286, "step": 6250 }, { "epoch": 0.31754895863740784, "grad_norm": 0.10844739490541815, "learning_rate": 0.0009895169460859136, "loss": 0.6691, "step": 6255 }, { "epoch": 0.3178027947354393, "grad_norm": 0.04811906636847503, "learning_rate": 0.0009894717737750905, "loss": 0.6406, "step": 6260 }, { "epoch": 0.31805663083347085, "grad_norm": 0.03651016020761419, "learning_rate": 0.000989426505383399, "loss": 0.6404, "step": 6265 }, { "epoch": 0.31831046693150233, "grad_norm": 0.03909160423336249, "learning_rate": 0.0009893811409197254, "loss": 0.6718, "step": 6270 }, { "epoch": 0.3185643030295338, "grad_norm": 0.03433471167278222, "learning_rate": 0.0009893356803929742, "loss": 0.7035, "step": 6275 }, { "epoch": 0.31881813912756535, "grad_norm": 0.0315707692611077, "learning_rate": 0.0009892901238120694, "loss": 0.6278, "step": 6280 }, { "epoch": 0.3190719752255968, "grad_norm": 0.03181406843213265, "learning_rate": 0.0009892444711859536, "loss": 0.6195, "step": 6285 }, { "epoch": 0.3193258113236283, "grad_norm": 0.03252733678185362, "learning_rate": 0.0009891987225235876, "loss": 0.6591, "step": 6290 }, { "epoch": 0.31957964742165984, "grad_norm": 0.028275724026086066, "learning_rate": 0.0009891528778339523, "loss": 0.632, "step": 6295 }, { "epoch": 0.3198334835196913, "grad_norm": 0.0332639388947232, "learning_rate": 0.0009891069371260463, "loss": 0.6706, "step": 6300 }, { "epoch": 0.32008731961772285, "grad_norm": 0.04023616497430709, "learning_rate": 0.0009890609004088878, "loss": 0.6397, "step": 6305 }, { "epoch": 0.32034115571575433, "grad_norm": 0.04189562916683901, "learning_rate": 0.0009890147676915133, "loss": 0.6589, "step": 6310 }, { "epoch": 0.3205949918137858, "grad_norm": 0.04615410615909303, "learning_rate": 0.0009889685389829787, "loss": 0.654, "step": 6315 }, { "epoch": 0.32084882791181735, "grad_norm": 0.0876708805444767, "learning_rate": 0.0009889222142923585, "loss": 0.6873, "step": 6320 }, { "epoch": 0.3211026640098488, "grad_norm": 0.0731800370414708, "learning_rate": 0.0009888757936287458, "loss": 0.6661, "step": 6325 }, { "epoch": 0.32135650010788036, "grad_norm": 0.04489055013050584, "learning_rate": 0.0009888292770012528, "loss": 0.6437, "step": 6330 }, { "epoch": 0.32161033620591184, "grad_norm": 0.05950573079091027, "learning_rate": 0.0009887826644190106, "loss": 0.6107, "step": 6335 }, { "epoch": 0.3218641723039433, "grad_norm": 0.03311301201808374, "learning_rate": 0.0009887359558911689, "loss": 0.6396, "step": 6340 }, { "epoch": 0.32211800840197485, "grad_norm": 0.04257944673946886, "learning_rate": 0.0009886891514268963, "loss": 0.6497, "step": 6345 }, { "epoch": 0.32237184450000633, "grad_norm": 0.03870234251780305, "learning_rate": 0.0009886422510353805, "loss": 0.657, "step": 6350 }, { "epoch": 0.32262568059803787, "grad_norm": 0.03016914715973346, "learning_rate": 0.0009885952547258278, "loss": 0.6503, "step": 6355 }, { "epoch": 0.32287951669606935, "grad_norm": 0.039652301246605876, "learning_rate": 0.000988548162507463, "loss": 0.7024, "step": 6360 }, { "epoch": 0.3231333527941008, "grad_norm": 0.032827930330652684, "learning_rate": 0.0009885009743895302, "loss": 0.6377, "step": 6365 }, { "epoch": 0.32338718889213236, "grad_norm": 0.04068567499221553, "learning_rate": 0.0009884536903812923, "loss": 0.6727, "step": 6370 }, { "epoch": 0.32364102499016384, "grad_norm": 0.0345449057811016, "learning_rate": 0.000988406310492031, "loss": 0.6875, "step": 6375 }, { "epoch": 0.3238948610881954, "grad_norm": 0.04902874284849529, "learning_rate": 0.0009883588347310466, "loss": 0.6455, "step": 6380 }, { "epoch": 0.32414869718622685, "grad_norm": 0.03422848521805073, "learning_rate": 0.0009883112631076585, "loss": 0.6338, "step": 6385 }, { "epoch": 0.32440253328425833, "grad_norm": 0.03165939758398714, "learning_rate": 0.0009882635956312046, "loss": 0.6472, "step": 6390 }, { "epoch": 0.32465636938228987, "grad_norm": 0.027078597446684824, "learning_rate": 0.0009882158323110417, "loss": 0.6291, "step": 6395 }, { "epoch": 0.32491020548032135, "grad_norm": 0.03356428796023044, "learning_rate": 0.0009881679731565457, "loss": 0.649, "step": 6400 }, { "epoch": 0.3251640415783529, "grad_norm": 0.0644450340702738, "learning_rate": 0.000988120018177111, "loss": 0.6616, "step": 6405 }, { "epoch": 0.32541787767638436, "grad_norm": 0.049328556792104926, "learning_rate": 0.0009880719673821513, "loss": 0.6334, "step": 6410 }, { "epoch": 0.32567171377441584, "grad_norm": 0.03340548046894897, "learning_rate": 0.000988023820781098, "loss": 0.6107, "step": 6415 }, { "epoch": 0.3259255498724474, "grad_norm": 0.031492806873797255, "learning_rate": 0.000987975578383403, "loss": 0.6654, "step": 6420 }, { "epoch": 0.32617938597047885, "grad_norm": 0.03786542334456132, "learning_rate": 0.0009879272401985349, "loss": 0.6533, "step": 6425 }, { "epoch": 0.3264332220685104, "grad_norm": 0.03333849837492178, "learning_rate": 0.0009878788062359831, "loss": 0.6664, "step": 6430 }, { "epoch": 0.32668705816654187, "grad_norm": 0.03610249374246746, "learning_rate": 0.0009878302765052548, "loss": 0.633, "step": 6435 }, { "epoch": 0.32694089426457335, "grad_norm": 0.031209587596926817, "learning_rate": 0.0009877816510158756, "loss": 0.6613, "step": 6440 }, { "epoch": 0.3271947303626049, "grad_norm": 0.03483245394564131, "learning_rate": 0.0009877329297773914, "loss": 0.6727, "step": 6445 }, { "epoch": 0.32744856646063636, "grad_norm": 0.034854234123111616, "learning_rate": 0.000987684112799365, "loss": 0.6448, "step": 6450 }, { "epoch": 0.3277024025586679, "grad_norm": 0.03027463862874961, "learning_rate": 0.0009876352000913796, "loss": 0.6376, "step": 6455 }, { "epoch": 0.3279562386566994, "grad_norm": 0.029904982015815273, "learning_rate": 0.000987586191663036, "loss": 0.6271, "step": 6460 }, { "epoch": 0.32821007475473085, "grad_norm": 0.04606450117610927, "learning_rate": 0.0009875370875239548, "loss": 0.6324, "step": 6465 }, { "epoch": 0.3284639108527624, "grad_norm": 0.047345984810625526, "learning_rate": 0.0009874878876837746, "loss": 0.6559, "step": 6470 }, { "epoch": 0.32871774695079387, "grad_norm": 0.027399081554610526, "learning_rate": 0.0009874385921521533, "loss": 0.6782, "step": 6475 }, { "epoch": 0.3289715830488254, "grad_norm": 0.03402265966097058, "learning_rate": 0.000987389200938767, "loss": 0.6826, "step": 6480 }, { "epoch": 0.3292254191468569, "grad_norm": 0.07145987939489644, "learning_rate": 0.0009873397140533111, "loss": 0.648, "step": 6485 }, { "epoch": 0.32947925524488836, "grad_norm": 0.030640240717414162, "learning_rate": 0.0009872901315054999, "loss": 0.6225, "step": 6490 }, { "epoch": 0.3297330913429199, "grad_norm": 0.029273085435207902, "learning_rate": 0.000987240453305066, "loss": 0.668, "step": 6495 }, { "epoch": 0.32998692744095137, "grad_norm": 0.027915653420681048, "learning_rate": 0.0009871906794617607, "loss": 0.675, "step": 6500 }, { "epoch": 0.33024076353898285, "grad_norm": 0.028085088656119427, "learning_rate": 0.0009871408099853547, "loss": 0.6468, "step": 6505 }, { "epoch": 0.3304945996370144, "grad_norm": 0.03469790217987671, "learning_rate": 0.0009870908448856373, "loss": 0.6291, "step": 6510 }, { "epoch": 0.33074843573504586, "grad_norm": 0.03940795336268566, "learning_rate": 0.000987040784172416, "loss": 0.6471, "step": 6515 }, { "epoch": 0.3310022718330774, "grad_norm": 0.0372777835424837, "learning_rate": 0.0009869906278555177, "loss": 0.6434, "step": 6520 }, { "epoch": 0.3312561079311089, "grad_norm": 0.05751236855268058, "learning_rate": 0.0009869403759447876, "loss": 0.6963, "step": 6525 }, { "epoch": 0.33150994402914036, "grad_norm": 0.028415506939899773, "learning_rate": 0.0009868900284500904, "loss": 0.6116, "step": 6530 }, { "epoch": 0.3317637801271719, "grad_norm": 0.06720606915783799, "learning_rate": 0.0009868395853813085, "loss": 0.665, "step": 6535 }, { "epoch": 0.33201761622520337, "grad_norm": 0.037437450250349114, "learning_rate": 0.000986789046748344, "loss": 0.6683, "step": 6540 }, { "epoch": 0.3322714523232349, "grad_norm": 0.03050364304716209, "learning_rate": 0.000986738412561117, "loss": 0.6796, "step": 6545 }, { "epoch": 0.3325252884212664, "grad_norm": 0.03574775300175517, "learning_rate": 0.0009866876828295672, "loss": 0.6177, "step": 6550 }, { "epoch": 0.33277912451929786, "grad_norm": 0.02999643436973219, "learning_rate": 0.0009866368575636522, "loss": 0.6632, "step": 6555 }, { "epoch": 0.3330329606173294, "grad_norm": 0.034667486256933826, "learning_rate": 0.0009865859367733489, "loss": 0.6685, "step": 6560 }, { "epoch": 0.3332867967153609, "grad_norm": 0.035749048566894245, "learning_rate": 0.0009865349204686532, "loss": 0.624, "step": 6565 }, { "epoch": 0.3335406328133924, "grad_norm": 0.02721431745155214, "learning_rate": 0.0009864838086595783, "loss": 0.6438, "step": 6570 }, { "epoch": 0.3337944689114239, "grad_norm": 0.030303232949427904, "learning_rate": 0.0009864326013561584, "loss": 0.6504, "step": 6575 }, { "epoch": 0.33404830500945537, "grad_norm": 0.033151379828375675, "learning_rate": 0.0009863812985684446, "loss": 0.6616, "step": 6580 }, { "epoch": 0.3343021411074869, "grad_norm": 0.03492354659989374, "learning_rate": 0.0009863299003065073, "loss": 0.6649, "step": 6585 }, { "epoch": 0.3345559772055184, "grad_norm": 0.029289412856450096, "learning_rate": 0.000986278406580436, "loss": 0.671, "step": 6590 }, { "epoch": 0.3348098133035499, "grad_norm": 0.029758417773996532, "learning_rate": 0.0009862268174003386, "loss": 0.6431, "step": 6595 }, { "epoch": 0.3350636494015814, "grad_norm": 0.02917170623260094, "learning_rate": 0.0009861751327763415, "loss": 0.6586, "step": 6600 }, { "epoch": 0.3353174854996129, "grad_norm": 0.039238413473097804, "learning_rate": 0.0009861233527185907, "loss": 0.6523, "step": 6605 }, { "epoch": 0.3355713215976444, "grad_norm": 0.05769974377090198, "learning_rate": 0.00098607147723725, "loss": 0.6534, "step": 6610 }, { "epoch": 0.3358251576956759, "grad_norm": 0.05207362585582225, "learning_rate": 0.000986019506342502, "loss": 0.628, "step": 6615 }, { "epoch": 0.3360789937937074, "grad_norm": 0.042481555414880016, "learning_rate": 0.0009859674400445491, "loss": 0.6352, "step": 6620 }, { "epoch": 0.3363328298917389, "grad_norm": 0.038592274715997285, "learning_rate": 0.0009859152783536112, "loss": 0.655, "step": 6625 }, { "epoch": 0.3365866659897704, "grad_norm": 0.03810235969474428, "learning_rate": 0.0009858630212799273, "loss": 0.6078, "step": 6630 }, { "epoch": 0.3368405020878019, "grad_norm": 0.04141260814892809, "learning_rate": 0.0009858106688337552, "loss": 0.6309, "step": 6635 }, { "epoch": 0.3370943381858334, "grad_norm": 0.050224588657700506, "learning_rate": 0.0009857582210253718, "loss": 0.6598, "step": 6640 }, { "epoch": 0.33734817428386493, "grad_norm": 0.044838404082403416, "learning_rate": 0.000985705677865072, "loss": 0.643, "step": 6645 }, { "epoch": 0.3376020103818964, "grad_norm": 0.039078603300489616, "learning_rate": 0.0009856530393631698, "loss": 0.6293, "step": 6650 }, { "epoch": 0.3378558464799279, "grad_norm": 0.049346205757841856, "learning_rate": 0.0009856003055299979, "loss": 0.67, "step": 6655 }, { "epoch": 0.3381096825779594, "grad_norm": 0.0467859227211263, "learning_rate": 0.0009855474763759075, "loss": 0.6425, "step": 6660 }, { "epoch": 0.3383635186759909, "grad_norm": 0.03718081935719444, "learning_rate": 0.0009854945519112692, "loss": 0.65, "step": 6665 }, { "epoch": 0.33861735477402244, "grad_norm": 0.04145755563324682, "learning_rate": 0.0009854415321464715, "loss": 0.6424, "step": 6670 }, { "epoch": 0.3388711908720539, "grad_norm": 0.03672619994306692, "learning_rate": 0.0009853884170919218, "loss": 0.66, "step": 6675 }, { "epoch": 0.3391250269700854, "grad_norm": 0.03752204942770816, "learning_rate": 0.0009853352067580466, "loss": 0.6683, "step": 6680 }, { "epoch": 0.33937886306811693, "grad_norm": 0.03222882623511126, "learning_rate": 0.0009852819011552908, "loss": 0.6438, "step": 6685 }, { "epoch": 0.3396326991661484, "grad_norm": 0.02965590085193827, "learning_rate": 0.0009852285002941174, "loss": 0.6642, "step": 6690 }, { "epoch": 0.33988653526417995, "grad_norm": 0.03879854510223968, "learning_rate": 0.0009851750041850098, "loss": 0.6477, "step": 6695 }, { "epoch": 0.3401403713622114, "grad_norm": 0.14912016060061054, "learning_rate": 0.000985121412838468, "loss": 0.6301, "step": 6700 }, { "epoch": 0.3403942074602429, "grad_norm": 0.031220805777869454, "learning_rate": 0.0009850677262650124, "loss": 0.6785, "step": 6705 }, { "epoch": 0.34064804355827444, "grad_norm": 0.047945073583954875, "learning_rate": 0.000985013944475181, "loss": 0.6329, "step": 6710 }, { "epoch": 0.3409018796563059, "grad_norm": 0.03125211749366002, "learning_rate": 0.0009849600674795313, "loss": 0.6694, "step": 6715 }, { "epoch": 0.34115571575433745, "grad_norm": 0.030016984119769507, "learning_rate": 0.0009849060952886385, "loss": 0.6489, "step": 6720 }, { "epoch": 0.34140955185236893, "grad_norm": 0.0321881850801395, "learning_rate": 0.0009848520279130979, "loss": 0.6557, "step": 6725 }, { "epoch": 0.3416633879504004, "grad_norm": 0.027489966034472072, "learning_rate": 0.0009847978653635219, "loss": 0.6161, "step": 6730 }, { "epoch": 0.34191722404843194, "grad_norm": 0.03167397292683289, "learning_rate": 0.0009847436076505425, "loss": 0.6661, "step": 6735 }, { "epoch": 0.3421710601464634, "grad_norm": 0.027164902072622888, "learning_rate": 0.0009846892547848106, "loss": 0.6416, "step": 6740 }, { "epoch": 0.3424248962444949, "grad_norm": 0.032186132345030184, "learning_rate": 0.000984634806776995, "loss": 0.6011, "step": 6745 }, { "epoch": 0.34267873234252644, "grad_norm": 0.05422054145392441, "learning_rate": 0.0009845802636377834, "loss": 0.619, "step": 6750 }, { "epoch": 0.3429325684405579, "grad_norm": 0.03126640652473011, "learning_rate": 0.000984525625377883, "loss": 0.663, "step": 6755 }, { "epoch": 0.34318640453858945, "grad_norm": 0.04178980876855967, "learning_rate": 0.0009844708920080185, "loss": 0.6764, "step": 6760 }, { "epoch": 0.34344024063662093, "grad_norm": 0.0479771178058185, "learning_rate": 0.000984416063538934, "loss": 0.636, "step": 6765 }, { "epoch": 0.3436940767346524, "grad_norm": 0.048210015731811606, "learning_rate": 0.0009843611399813921, "loss": 0.6826, "step": 6770 }, { "epoch": 0.34394791283268394, "grad_norm": 0.040035034533658136, "learning_rate": 0.0009843061213461739, "loss": 0.6551, "step": 6775 }, { "epoch": 0.3442017489307154, "grad_norm": 0.04450679708387797, "learning_rate": 0.0009842510076440792, "loss": 0.6789, "step": 6780 }, { "epoch": 0.34445558502874696, "grad_norm": 0.03469053565219521, "learning_rate": 0.0009841957988859268, "loss": 0.636, "step": 6785 }, { "epoch": 0.34470942112677844, "grad_norm": 0.03854111003127534, "learning_rate": 0.0009841404950825536, "loss": 0.6504, "step": 6790 }, { "epoch": 0.3449632572248099, "grad_norm": 0.0422532957102432, "learning_rate": 0.0009840850962448157, "loss": 0.7046, "step": 6795 }, { "epoch": 0.34521709332284145, "grad_norm": 0.042849883678541305, "learning_rate": 0.0009840296023835877, "loss": 0.6982, "step": 6800 }, { "epoch": 0.34547092942087293, "grad_norm": 0.11970895618204253, "learning_rate": 0.0009839740135097624, "loss": 0.7033, "step": 6805 }, { "epoch": 0.34572476551890446, "grad_norm": 0.28594499591001454, "learning_rate": 0.0009839183296342518, "loss": 0.6618, "step": 6810 }, { "epoch": 0.34597860161693594, "grad_norm": 0.09723775531984895, "learning_rate": 0.0009838625507679866, "loss": 0.7034, "step": 6815 }, { "epoch": 0.3462324377149674, "grad_norm": 0.12532852308987766, "learning_rate": 0.0009838066769219155, "loss": 0.7629, "step": 6820 }, { "epoch": 0.34648627381299896, "grad_norm": 0.0753799546279816, "learning_rate": 0.0009837507081070064, "loss": 0.7012, "step": 6825 }, { "epoch": 0.34674010991103044, "grad_norm": 0.23690674591283684, "learning_rate": 0.000983694644334246, "loss": 0.7184, "step": 6830 }, { "epoch": 0.34699394600906197, "grad_norm": 0.06495848284102379, "learning_rate": 0.000983638485614639, "loss": 0.6394, "step": 6835 }, { "epoch": 0.34724778210709345, "grad_norm": 0.10113306645676379, "learning_rate": 0.0009835822319592092, "loss": 0.7055, "step": 6840 }, { "epoch": 0.34750161820512493, "grad_norm": 13.507304609333943, "learning_rate": 0.0009835258833789987, "loss": 0.7508, "step": 6845 }, { "epoch": 0.34775545430315646, "grad_norm": 0.08682835501016006, "learning_rate": 0.0009834694398850687, "loss": 0.6614, "step": 6850 }, { "epoch": 0.34800929040118794, "grad_norm": 0.0777945624030565, "learning_rate": 0.000983412901488499, "loss": 0.6523, "step": 6855 }, { "epoch": 0.3482631264992195, "grad_norm": 0.04009386117281962, "learning_rate": 0.0009833562682003871, "loss": 0.6425, "step": 6860 }, { "epoch": 0.34851696259725096, "grad_norm": 0.07938149186312117, "learning_rate": 0.0009832995400318506, "loss": 0.6724, "step": 6865 }, { "epoch": 0.34877079869528244, "grad_norm": 0.0464733613481905, "learning_rate": 0.0009832427169940243, "loss": 0.6723, "step": 6870 }, { "epoch": 0.34902463479331397, "grad_norm": 0.030541854449360313, "learning_rate": 0.0009831857990980628, "loss": 0.6328, "step": 6875 }, { "epoch": 0.34927847089134545, "grad_norm": 0.08684468777696315, "learning_rate": 0.0009831287863551386, "loss": 0.6537, "step": 6880 }, { "epoch": 0.349532306989377, "grad_norm": 0.032667710885275855, "learning_rate": 0.000983071678776443, "loss": 0.6233, "step": 6885 }, { "epoch": 0.34978614308740846, "grad_norm": 0.06215345815334673, "learning_rate": 0.0009830144763731856, "loss": 0.6523, "step": 6890 }, { "epoch": 0.35003997918543994, "grad_norm": 0.050425409257167854, "learning_rate": 0.0009829571791565956, "loss": 0.6414, "step": 6895 }, { "epoch": 0.3502938152834715, "grad_norm": 0.039909942468084936, "learning_rate": 0.0009828997871379197, "loss": 0.6715, "step": 6900 }, { "epoch": 0.35054765138150296, "grad_norm": 0.03305269203844265, "learning_rate": 0.0009828423003284239, "loss": 0.68, "step": 6905 }, { "epoch": 0.3508014874795345, "grad_norm": 0.06428144126234532, "learning_rate": 0.0009827847187393924, "loss": 0.6522, "step": 6910 }, { "epoch": 0.35105532357756597, "grad_norm": 0.040717882474944, "learning_rate": 0.0009827270423821283, "loss": 0.6798, "step": 6915 }, { "epoch": 0.35130915967559745, "grad_norm": 0.03774641806829989, "learning_rate": 0.000982669271267953, "loss": 0.6249, "step": 6920 }, { "epoch": 0.351562995773629, "grad_norm": 0.22254368278616823, "learning_rate": 0.000982611405408207, "loss": 0.6375, "step": 6925 }, { "epoch": 0.35181683187166046, "grad_norm": 0.0492079469548121, "learning_rate": 0.0009825534448142487, "loss": 0.6596, "step": 6930 }, { "epoch": 0.352070667969692, "grad_norm": 0.030183273462930794, "learning_rate": 0.0009824953894974559, "loss": 0.6492, "step": 6935 }, { "epoch": 0.3523245040677235, "grad_norm": 0.03485104672445065, "learning_rate": 0.0009824372394692242, "loss": 0.6405, "step": 6940 }, { "epoch": 0.35257834016575496, "grad_norm": 0.04394880912256667, "learning_rate": 0.0009823789947409685, "loss": 0.6408, "step": 6945 }, { "epoch": 0.3528321762637865, "grad_norm": 0.062030496326437375, "learning_rate": 0.0009823206553241214, "loss": 0.6799, "step": 6950 }, { "epoch": 0.35308601236181797, "grad_norm": 0.034400042065388416, "learning_rate": 0.0009822622212301354, "loss": 0.684, "step": 6955 }, { "epoch": 0.35333984845984945, "grad_norm": 0.04472477328842319, "learning_rate": 0.0009822036924704803, "loss": 0.6524, "step": 6960 }, { "epoch": 0.353593684557881, "grad_norm": 0.0724030385807204, "learning_rate": 0.000982145069056645, "loss": 0.668, "step": 6965 }, { "epoch": 0.35384752065591246, "grad_norm": 0.19364536723665782, "learning_rate": 0.000982086351000137, "loss": 0.6629, "step": 6970 }, { "epoch": 0.354101356753944, "grad_norm": 0.04733731040455226, "learning_rate": 0.0009820275383124826, "loss": 0.6729, "step": 6975 }, { "epoch": 0.3543551928519755, "grad_norm": 0.041046331221462486, "learning_rate": 0.0009819686310052263, "loss": 0.6494, "step": 6980 }, { "epoch": 0.35460902895000695, "grad_norm": 0.030904028187576038, "learning_rate": 0.0009819096290899312, "loss": 0.6761, "step": 6985 }, { "epoch": 0.3548628650480385, "grad_norm": 0.06764767017877209, "learning_rate": 0.0009818505325781793, "loss": 0.6716, "step": 6990 }, { "epoch": 0.35511670114606997, "grad_norm": 0.028029672458931103, "learning_rate": 0.000981791341481571, "loss": 0.6701, "step": 6995 }, { "epoch": 0.3553705372441015, "grad_norm": 0.05118116772844429, "learning_rate": 0.0009817320558117247, "loss": 0.663, "step": 7000 }, { "epoch": 0.355624373342133, "grad_norm": 0.07241085260585647, "learning_rate": 0.0009816726755802784, "loss": 0.6521, "step": 7005 }, { "epoch": 0.35587820944016446, "grad_norm": 0.041960569266684886, "learning_rate": 0.000981613200798888, "loss": 0.6904, "step": 7010 }, { "epoch": 0.356132045538196, "grad_norm": 0.052753495247033405, "learning_rate": 0.000981553631479228, "loss": 0.6356, "step": 7015 }, { "epoch": 0.3563858816362275, "grad_norm": 0.032282399891573395, "learning_rate": 0.0009814939676329917, "loss": 0.6411, "step": 7020 }, { "epoch": 0.356639717734259, "grad_norm": 0.03581374152863383, "learning_rate": 0.0009814342092718908, "loss": 0.6605, "step": 7025 }, { "epoch": 0.3568935538322905, "grad_norm": 0.03356418157006434, "learning_rate": 0.0009813743564076557, "loss": 0.6459, "step": 7030 }, { "epoch": 0.35714738993032197, "grad_norm": 0.027321655354040797, "learning_rate": 0.0009813144090520347, "loss": 0.6455, "step": 7035 }, { "epoch": 0.3574012260283535, "grad_norm": 0.036750526110344274, "learning_rate": 0.0009812543672167958, "loss": 0.6281, "step": 7040 }, { "epoch": 0.357655062126385, "grad_norm": 0.03514246272392273, "learning_rate": 0.0009811942309137242, "loss": 0.676, "step": 7045 }, { "epoch": 0.3579088982244165, "grad_norm": 0.061078619376600155, "learning_rate": 0.0009811340001546253, "loss": 0.66, "step": 7050 }, { "epoch": 0.358162734322448, "grad_norm": 0.03588660012791982, "learning_rate": 0.0009810736749513212, "loss": 0.6456, "step": 7055 }, { "epoch": 0.3584165704204795, "grad_norm": 0.032510848869803814, "learning_rate": 0.000981013255315654, "loss": 0.6658, "step": 7060 }, { "epoch": 0.358670406518511, "grad_norm": 0.03332754665414955, "learning_rate": 0.0009809527412594837, "loss": 0.6503, "step": 7065 }, { "epoch": 0.3589242426165425, "grad_norm": 0.03149052779731926, "learning_rate": 0.0009808921327946886, "loss": 0.6563, "step": 7070 }, { "epoch": 0.359178078714574, "grad_norm": 0.04408836704626032, "learning_rate": 0.000980831429933166, "loss": 0.6587, "step": 7075 }, { "epoch": 0.3594319148126055, "grad_norm": 0.029092555752435204, "learning_rate": 0.0009807706326868317, "loss": 0.6508, "step": 7080 }, { "epoch": 0.359685750910637, "grad_norm": 0.0310181753692322, "learning_rate": 0.00098070974106762, "loss": 0.6438, "step": 7085 }, { "epoch": 0.3599395870086685, "grad_norm": 0.04993998965712036, "learning_rate": 0.0009806487550874832, "loss": 0.635, "step": 7090 }, { "epoch": 0.3601934231067, "grad_norm": 0.04501979297683468, "learning_rate": 0.0009805876747583928, "loss": 0.6353, "step": 7095 }, { "epoch": 0.36044725920473153, "grad_norm": 0.04933885261852858, "learning_rate": 0.0009805265000923384, "loss": 0.6906, "step": 7100 }, { "epoch": 0.360701095302763, "grad_norm": 0.025031721543288105, "learning_rate": 0.0009804652311013286, "loss": 0.62, "step": 7105 }, { "epoch": 0.3609549314007945, "grad_norm": 0.03681142313849968, "learning_rate": 0.00098040386779739, "loss": 0.6725, "step": 7110 }, { "epoch": 0.361208767498826, "grad_norm": 0.02848773898790337, "learning_rate": 0.0009803424101925678, "loss": 0.6265, "step": 7115 }, { "epoch": 0.3614626035968575, "grad_norm": 0.0629595006387732, "learning_rate": 0.000980280858298926, "loss": 0.6621, "step": 7120 }, { "epoch": 0.36171643969488904, "grad_norm": 0.031817556084941155, "learning_rate": 0.000980219212128547, "loss": 0.6269, "step": 7125 }, { "epoch": 0.3619702757929205, "grad_norm": 0.03482414094775657, "learning_rate": 0.0009801574716935314, "loss": 0.6279, "step": 7130 }, { "epoch": 0.362224111890952, "grad_norm": 0.02962568942750712, "learning_rate": 0.0009800956370059986, "loss": 0.6858, "step": 7135 }, { "epoch": 0.36247794798898353, "grad_norm": 0.0303541560939534, "learning_rate": 0.0009800337080780866, "loss": 0.6156, "step": 7140 }, { "epoch": 0.362731784087015, "grad_norm": 0.033517980729787206, "learning_rate": 0.0009799716849219515, "loss": 0.6849, "step": 7145 }, { "epoch": 0.36298562018504654, "grad_norm": 0.03930435599887139, "learning_rate": 0.0009799095675497684, "loss": 0.6508, "step": 7150 }, { "epoch": 0.363239456283078, "grad_norm": 0.03507509033957748, "learning_rate": 0.0009798473559737304, "loss": 0.6221, "step": 7155 }, { "epoch": 0.3634932923811095, "grad_norm": 0.0345726788751751, "learning_rate": 0.0009797850502060495, "loss": 0.6244, "step": 7160 }, { "epoch": 0.36374712847914104, "grad_norm": 0.031654784953979505, "learning_rate": 0.0009797226502589558, "loss": 0.6224, "step": 7165 }, { "epoch": 0.3640009645771725, "grad_norm": 0.03475111940127676, "learning_rate": 0.0009796601561446983, "loss": 0.604, "step": 7170 }, { "epoch": 0.364254800675204, "grad_norm": 0.03356801085531049, "learning_rate": 0.0009795975678755441, "loss": 0.6165, "step": 7175 }, { "epoch": 0.36450863677323553, "grad_norm": 0.030202402409465263, "learning_rate": 0.0009795348854637793, "loss": 0.6357, "step": 7180 }, { "epoch": 0.364762472871267, "grad_norm": 0.042891378351094925, "learning_rate": 0.0009794721089217077, "loss": 0.6639, "step": 7185 }, { "epoch": 0.36501630896929854, "grad_norm": 0.026781555972239687, "learning_rate": 0.0009794092382616525, "loss": 0.6193, "step": 7190 }, { "epoch": 0.36527014506733, "grad_norm": 0.058555352910862106, "learning_rate": 0.0009793462734959545, "loss": 0.6684, "step": 7195 }, { "epoch": 0.3655239811653615, "grad_norm": 0.04710681738488992, "learning_rate": 0.0009792832146369734, "loss": 0.6433, "step": 7200 }, { "epoch": 0.36577781726339303, "grad_norm": 0.05005107749394292, "learning_rate": 0.0009792200616970876, "loss": 0.7054, "step": 7205 }, { "epoch": 0.3660316533614245, "grad_norm": 0.03323686626122536, "learning_rate": 0.0009791568146886936, "loss": 0.6797, "step": 7210 }, { "epoch": 0.36628548945945605, "grad_norm": 0.03185618160464647, "learning_rate": 0.0009790934736242064, "loss": 0.6476, "step": 7215 }, { "epoch": 0.3665393255574875, "grad_norm": 0.03692950551425256, "learning_rate": 0.0009790300385160594, "loss": 0.6653, "step": 7220 }, { "epoch": 0.366793161655519, "grad_norm": 0.03377555225120197, "learning_rate": 0.0009789665093767048, "loss": 0.6571, "step": 7225 }, { "epoch": 0.36704699775355054, "grad_norm": 0.029454638958564077, "learning_rate": 0.000978902886218613, "loss": 0.6743, "step": 7230 }, { "epoch": 0.367300833851582, "grad_norm": 0.05946350290356885, "learning_rate": 0.000978839169054273, "loss": 0.6702, "step": 7235 }, { "epoch": 0.36755466994961355, "grad_norm": 0.05244370262839264, "learning_rate": 0.0009787753578961922, "loss": 0.6083, "step": 7240 }, { "epoch": 0.36780850604764503, "grad_norm": 0.03264974043837502, "learning_rate": 0.0009787114527568962, "loss": 0.6166, "step": 7245 }, { "epoch": 0.3680623421456765, "grad_norm": 0.03513830130194355, "learning_rate": 0.0009786474536489292, "loss": 0.6634, "step": 7250 }, { "epoch": 0.36831617824370805, "grad_norm": 0.029479205631305596, "learning_rate": 0.0009785833605848542, "loss": 0.636, "step": 7255 }, { "epoch": 0.3685700143417395, "grad_norm": 0.032292807860328114, "learning_rate": 0.0009785191735772521, "loss": 0.6858, "step": 7260 }, { "epoch": 0.36882385043977106, "grad_norm": 0.04406918705243669, "learning_rate": 0.0009784548926387226, "loss": 0.6267, "step": 7265 }, { "epoch": 0.36907768653780254, "grad_norm": 0.03484394221767657, "learning_rate": 0.000978390517781884, "loss": 0.6394, "step": 7270 }, { "epoch": 0.369331522635834, "grad_norm": 0.03455567149269723, "learning_rate": 0.0009783260490193722, "loss": 0.657, "step": 7275 }, { "epoch": 0.36958535873386555, "grad_norm": 0.0553757152489157, "learning_rate": 0.0009782614863638424, "loss": 0.6519, "step": 7280 }, { "epoch": 0.36983919483189703, "grad_norm": 0.03087746692822628, "learning_rate": 0.000978196829827968, "loss": 0.6106, "step": 7285 }, { "epoch": 0.37009303092992857, "grad_norm": 0.028942872631375818, "learning_rate": 0.0009781320794244408, "loss": 0.689, "step": 7290 }, { "epoch": 0.37034686702796005, "grad_norm": 0.05560542668705997, "learning_rate": 0.0009780672351659707, "loss": 0.6009, "step": 7295 }, { "epoch": 0.3706007031259915, "grad_norm": 0.03597874219905048, "learning_rate": 0.0009780022970652864, "loss": 0.675, "step": 7300 }, { "epoch": 0.37085453922402306, "grad_norm": 0.041750636143700906, "learning_rate": 0.000977937265135135, "loss": 0.6347, "step": 7305 }, { "epoch": 0.37110837532205454, "grad_norm": 0.03053212587846297, "learning_rate": 0.000977872139388282, "loss": 0.6547, "step": 7310 }, { "epoch": 0.3713622114200861, "grad_norm": 0.04623962660097538, "learning_rate": 0.0009778069198375112, "loss": 0.6296, "step": 7315 }, { "epoch": 0.37161604751811755, "grad_norm": 0.053173942447247664, "learning_rate": 0.0009777416064956248, "loss": 0.6557, "step": 7320 }, { "epoch": 0.37186988361614903, "grad_norm": 0.04322723325958535, "learning_rate": 0.0009776761993754435, "loss": 0.6506, "step": 7325 }, { "epoch": 0.37212371971418057, "grad_norm": 0.054474373793746526, "learning_rate": 0.0009776106984898066, "loss": 0.6337, "step": 7330 }, { "epoch": 0.37237755581221205, "grad_norm": 0.07566751344663698, "learning_rate": 0.0009775451038515712, "loss": 0.5778, "step": 7335 }, { "epoch": 0.3726313919102436, "grad_norm": 0.05812108518347267, "learning_rate": 0.0009774794154736135, "loss": 0.6431, "step": 7340 }, { "epoch": 0.37288522800827506, "grad_norm": 0.0569408117979321, "learning_rate": 0.0009774136333688278, "loss": 0.6382, "step": 7345 }, { "epoch": 0.37313906410630654, "grad_norm": 0.04610218065404068, "learning_rate": 0.0009773477575501265, "loss": 0.6282, "step": 7350 }, { "epoch": 0.3733929002043381, "grad_norm": 0.02735821778929375, "learning_rate": 0.0009772817880304412, "loss": 0.639, "step": 7355 }, { "epoch": 0.37364673630236955, "grad_norm": 0.02996563286462766, "learning_rate": 0.0009772157248227212, "loss": 0.643, "step": 7360 }, { "epoch": 0.3739005724004011, "grad_norm": 0.03578755871655436, "learning_rate": 0.000977149567939934, "loss": 0.6244, "step": 7365 }, { "epoch": 0.37415440849843257, "grad_norm": 0.03305618805759151, "learning_rate": 0.0009770833173950663, "loss": 0.6467, "step": 7370 }, { "epoch": 0.37440824459646405, "grad_norm": 0.030303389029491416, "learning_rate": 0.0009770169732011224, "loss": 0.6439, "step": 7375 }, { "epoch": 0.3746620806944956, "grad_norm": 0.028992948847689715, "learning_rate": 0.000976950535371126, "loss": 0.6306, "step": 7380 }, { "epoch": 0.37491591679252706, "grad_norm": 0.03888889527619476, "learning_rate": 0.0009768840039181177, "loss": 0.6581, "step": 7385 }, { "epoch": 0.37516975289055854, "grad_norm": 0.031228572216811263, "learning_rate": 0.0009768173788551576, "loss": 0.6081, "step": 7390 }, { "epoch": 0.3754235889885901, "grad_norm": 0.03181486445785271, "learning_rate": 0.000976750660195324, "loss": 0.6339, "step": 7395 }, { "epoch": 0.37567742508662155, "grad_norm": 0.031669584990155854, "learning_rate": 0.0009766838479517133, "loss": 0.626, "step": 7400 }, { "epoch": 0.3759312611846531, "grad_norm": 0.03222078035458589, "learning_rate": 0.0009766169421374406, "loss": 0.6635, "step": 7405 }, { "epoch": 0.37618509728268457, "grad_norm": 0.02957011725240359, "learning_rate": 0.000976549942765639, "loss": 0.6494, "step": 7410 }, { "epoch": 0.37643893338071605, "grad_norm": 0.03590678349168216, "learning_rate": 0.0009764828498494602, "loss": 0.6354, "step": 7415 }, { "epoch": 0.3766927694787476, "grad_norm": 0.03220030735871836, "learning_rate": 0.0009764156634020742, "loss": 0.6464, "step": 7420 }, { "epoch": 0.37694660557677906, "grad_norm": 0.03607797476983635, "learning_rate": 0.0009763483834366693, "loss": 0.6277, "step": 7425 }, { "epoch": 0.3772004416748106, "grad_norm": 0.11467843343234524, "learning_rate": 0.0009762810099664523, "loss": 0.6575, "step": 7430 }, { "epoch": 0.3774542777728421, "grad_norm": 0.03779371870962273, "learning_rate": 0.0009762135430046483, "loss": 0.6564, "step": 7435 }, { "epoch": 0.37770811387087355, "grad_norm": 0.03333634417865055, "learning_rate": 0.0009761459825645006, "loss": 0.6516, "step": 7440 }, { "epoch": 0.3779619499689051, "grad_norm": 0.03352151545779738, "learning_rate": 0.0009760783286592711, "loss": 0.628, "step": 7445 }, { "epoch": 0.37821578606693657, "grad_norm": 0.031390578882334645, "learning_rate": 0.0009760105813022399, "loss": 0.6631, "step": 7450 }, { "epoch": 0.3784696221649681, "grad_norm": 0.03126775917347245, "learning_rate": 0.0009759427405067054, "loss": 0.6296, "step": 7455 }, { "epoch": 0.3787234582629996, "grad_norm": 0.03189164977628853, "learning_rate": 0.0009758748062859844, "loss": 0.614, "step": 7460 }, { "epoch": 0.37897729436103106, "grad_norm": 0.03924473622722979, "learning_rate": 0.0009758067786534123, "loss": 0.648, "step": 7465 }, { "epoch": 0.3792311304590626, "grad_norm": 0.030124031962455097, "learning_rate": 0.0009757386576223423, "loss": 0.6831, "step": 7470 }, { "epoch": 0.37948496655709407, "grad_norm": 0.026069578510561218, "learning_rate": 0.0009756704432061463, "loss": 0.6449, "step": 7475 }, { "epoch": 0.3797388026551256, "grad_norm": 0.04496395917283666, "learning_rate": 0.0009756021354182145, "loss": 0.6236, "step": 7480 }, { "epoch": 0.3799926387531571, "grad_norm": 0.03492290304715105, "learning_rate": 0.0009755337342719552, "loss": 0.6452, "step": 7485 }, { "epoch": 0.38024647485118857, "grad_norm": 0.03119933363654312, "learning_rate": 0.0009754652397807955, "loss": 0.697, "step": 7490 }, { "epoch": 0.3805003109492201, "grad_norm": 0.025025130639123577, "learning_rate": 0.0009753966519581803, "loss": 0.5946, "step": 7495 }, { "epoch": 0.3807541470472516, "grad_norm": 0.028756961098509868, "learning_rate": 0.0009753279708175731, "loss": 0.6274, "step": 7500 }, { "epoch": 0.3810079831452831, "grad_norm": 0.04267829613214311, "learning_rate": 0.0009752591963724558, "loss": 0.6514, "step": 7505 }, { "epoch": 0.3812618192433146, "grad_norm": 0.027804227185939587, "learning_rate": 0.0009751903286363283, "loss": 0.6459, "step": 7510 }, { "epoch": 0.38151565534134607, "grad_norm": 0.0368399353284197, "learning_rate": 0.0009751213676227091, "loss": 0.6143, "step": 7515 }, { "epoch": 0.3817694914393776, "grad_norm": 0.03566837281270354, "learning_rate": 0.0009750523133451348, "loss": 0.6591, "step": 7520 }, { "epoch": 0.3820233275374091, "grad_norm": 0.04104675754438289, "learning_rate": 0.0009749831658171605, "loss": 0.6285, "step": 7525 }, { "epoch": 0.3822771636354406, "grad_norm": 0.03463924815280617, "learning_rate": 0.0009749139250523596, "loss": 0.6361, "step": 7530 }, { "epoch": 0.3825309997334721, "grad_norm": 0.0371233837488764, "learning_rate": 0.0009748445910643233, "loss": 0.6529, "step": 7535 }, { "epoch": 0.3827848358315036, "grad_norm": 0.030735573426834636, "learning_rate": 0.000974775163866662, "loss": 0.6463, "step": 7540 }, { "epoch": 0.3830386719295351, "grad_norm": 0.028788082474888697, "learning_rate": 0.0009747056434730037, "loss": 0.7047, "step": 7545 }, { "epoch": 0.3832925080275666, "grad_norm": 0.34285998838897536, "learning_rate": 0.0009746360298969951, "loss": 0.6438, "step": 7550 }, { "epoch": 0.3835463441255981, "grad_norm": 0.04150800275275346, "learning_rate": 0.0009745663231523008, "loss": 0.6424, "step": 7555 }, { "epoch": 0.3838001802236296, "grad_norm": 0.08071417524310523, "learning_rate": 0.0009744965232526037, "loss": 0.6581, "step": 7560 }, { "epoch": 0.3840540163216611, "grad_norm": 0.03292538278471775, "learning_rate": 0.0009744266302116056, "loss": 0.6429, "step": 7565 }, { "epoch": 0.3843078524196926, "grad_norm": 0.0553091431056109, "learning_rate": 0.0009743566440430258, "loss": 0.6247, "step": 7570 }, { "epoch": 0.3845616885177241, "grad_norm": 0.0576869905147013, "learning_rate": 0.0009742865647606025, "loss": 0.6362, "step": 7575 }, { "epoch": 0.38481552461575563, "grad_norm": 0.054333812412449035, "learning_rate": 0.0009742163923780918, "loss": 0.6644, "step": 7580 }, { "epoch": 0.3850693607137871, "grad_norm": 0.0458876312027763, "learning_rate": 0.0009741461269092682, "loss": 0.6174, "step": 7585 }, { "epoch": 0.3853231968118186, "grad_norm": 0.026346604004534237, "learning_rate": 0.0009740757683679244, "loss": 0.6396, "step": 7590 }, { "epoch": 0.3855770329098501, "grad_norm": 0.026408511013485066, "learning_rate": 0.0009740053167678715, "loss": 0.641, "step": 7595 }, { "epoch": 0.3858308690078816, "grad_norm": 0.027408625330890064, "learning_rate": 0.0009739347721229388, "loss": 0.6739, "step": 7600 }, { "epoch": 0.38608470510591314, "grad_norm": 0.0261075942022835, "learning_rate": 0.0009738641344469737, "loss": 0.6311, "step": 7605 }, { "epoch": 0.3863385412039446, "grad_norm": 0.035196730638145504, "learning_rate": 0.0009737934037538422, "loss": 0.6283, "step": 7610 }, { "epoch": 0.3865923773019761, "grad_norm": 0.029985199254207513, "learning_rate": 0.0009737225800574285, "loss": 0.5949, "step": 7615 }, { "epoch": 0.38684621340000763, "grad_norm": 0.0314143972278088, "learning_rate": 0.0009736516633716348, "loss": 0.6309, "step": 7620 }, { "epoch": 0.3871000494980391, "grad_norm": 0.040375384651748156, "learning_rate": 0.0009735806537103815, "loss": 0.6346, "step": 7625 }, { "epoch": 0.3873538855960706, "grad_norm": 0.030875166168935275, "learning_rate": 0.0009735095510876077, "loss": 0.6188, "step": 7630 }, { "epoch": 0.3876077216941021, "grad_norm": 0.035657526104679045, "learning_rate": 0.0009734383555172705, "loss": 0.6334, "step": 7635 }, { "epoch": 0.3878615577921336, "grad_norm": 0.03726651149611401, "learning_rate": 0.000973367067013345, "loss": 0.6194, "step": 7640 }, { "epoch": 0.38811539389016514, "grad_norm": 0.0549306418678483, "learning_rate": 0.000973295685589825, "loss": 0.6289, "step": 7645 }, { "epoch": 0.3883692299881966, "grad_norm": 0.029283307487707237, "learning_rate": 0.0009732242112607222, "loss": 0.6263, "step": 7650 }, { "epoch": 0.3886230660862281, "grad_norm": 0.03836427004065547, "learning_rate": 0.0009731526440400667, "loss": 0.671, "step": 7655 }, { "epoch": 0.38887690218425963, "grad_norm": 0.03320166265686439, "learning_rate": 0.0009730809839419069, "loss": 0.679, "step": 7660 }, { "epoch": 0.3891307382822911, "grad_norm": 0.03903157209354159, "learning_rate": 0.0009730092309803091, "loss": 0.6721, "step": 7665 }, { "epoch": 0.38938457438032265, "grad_norm": 0.02937314090643266, "learning_rate": 0.0009729373851693581, "loss": 0.619, "step": 7670 }, { "epoch": 0.3896384104783541, "grad_norm": 0.0567425226530842, "learning_rate": 0.000972865446523157, "loss": 0.6287, "step": 7675 }, { "epoch": 0.3898922465763856, "grad_norm": 0.03706531779273381, "learning_rate": 0.000972793415055827, "loss": 0.6683, "step": 7680 }, { "epoch": 0.39014608267441714, "grad_norm": 0.034876406305835016, "learning_rate": 0.0009727212907815072, "loss": 0.6199, "step": 7685 }, { "epoch": 0.3903999187724486, "grad_norm": 0.04177776494427368, "learning_rate": 0.0009726490737143557, "loss": 0.6593, "step": 7690 }, { "epoch": 0.39065375487048015, "grad_norm": 0.029493444763321025, "learning_rate": 0.0009725767638685481, "loss": 0.6818, "step": 7695 }, { "epoch": 0.39090759096851163, "grad_norm": 0.05797901330208343, "learning_rate": 0.0009725043612582785, "loss": 0.6558, "step": 7700 }, { "epoch": 0.3911614270665431, "grad_norm": 0.03060632068082371, "learning_rate": 0.0009724318658977591, "loss": 0.6483, "step": 7705 }, { "epoch": 0.39141526316457464, "grad_norm": 0.029533842342163073, "learning_rate": 0.0009723592778012205, "loss": 0.5882, "step": 7710 }, { "epoch": 0.3916690992626061, "grad_norm": 0.04016752549164152, "learning_rate": 0.0009722865969829111, "loss": 0.6289, "step": 7715 }, { "epoch": 0.39192293536063766, "grad_norm": 0.02740885040789266, "learning_rate": 0.0009722138234570983, "loss": 0.6119, "step": 7720 }, { "epoch": 0.39217677145866914, "grad_norm": 0.027711495610382927, "learning_rate": 0.0009721409572380666, "loss": 0.6439, "step": 7725 }, { "epoch": 0.3924306075567006, "grad_norm": 0.03504746630633313, "learning_rate": 0.0009720679983401197, "loss": 0.6287, "step": 7730 }, { "epoch": 0.39268444365473215, "grad_norm": 0.030714782507982694, "learning_rate": 0.0009719949467775791, "loss": 0.6305, "step": 7735 }, { "epoch": 0.39293827975276363, "grad_norm": 0.05519161310942375, "learning_rate": 0.000971921802564784, "loss": 0.6733, "step": 7740 }, { "epoch": 0.39319211585079517, "grad_norm": 0.035905400393446016, "learning_rate": 0.0009718485657160927, "loss": 0.6309, "step": 7745 }, { "epoch": 0.39344595194882664, "grad_norm": 0.05482351027401504, "learning_rate": 0.000971775236245881, "loss": 0.6257, "step": 7750 }, { "epoch": 0.3936997880468581, "grad_norm": 0.033754877354996, "learning_rate": 0.0009717018141685432, "loss": 0.6465, "step": 7755 }, { "epoch": 0.39395362414488966, "grad_norm": 0.03394880260163253, "learning_rate": 0.0009716282994984915, "loss": 0.6321, "step": 7760 }, { "epoch": 0.39420746024292114, "grad_norm": 0.06103387015592127, "learning_rate": 0.0009715546922501568, "loss": 0.5975, "step": 7765 }, { "epoch": 0.39446129634095267, "grad_norm": 0.05594462999211319, "learning_rate": 0.0009714809924379875, "loss": 0.6332, "step": 7770 }, { "epoch": 0.39471513243898415, "grad_norm": 0.028505003978790574, "learning_rate": 0.0009714072000764508, "loss": 0.6577, "step": 7775 }, { "epoch": 0.39496896853701563, "grad_norm": 0.028286990335348833, "learning_rate": 0.0009713333151800315, "loss": 0.6279, "step": 7780 }, { "epoch": 0.39522280463504716, "grad_norm": 0.030106626846128356, "learning_rate": 0.0009712593377632331, "loss": 0.6379, "step": 7785 }, { "epoch": 0.39547664073307864, "grad_norm": 0.03861700452164129, "learning_rate": 0.0009711852678405768, "loss": 0.6545, "step": 7790 }, { "epoch": 0.3957304768311102, "grad_norm": 0.02838597618173486, "learning_rate": 0.0009711111054266022, "loss": 0.6671, "step": 7795 }, { "epoch": 0.39598431292914166, "grad_norm": 0.034442366907157136, "learning_rate": 0.000971036850535867, "loss": 0.7508, "step": 7800 }, { "epoch": 0.39623814902717314, "grad_norm": 0.11425516985484442, "learning_rate": 0.0009709625031829473, "loss": 0.66, "step": 7805 }, { "epoch": 0.39649198512520467, "grad_norm": 0.04883647009890089, "learning_rate": 0.0009708880633824366, "loss": 0.644, "step": 7810 }, { "epoch": 0.39674582122323615, "grad_norm": 0.039289664485130486, "learning_rate": 0.0009708135311489475, "loss": 0.6813, "step": 7815 }, { "epoch": 0.3969996573212677, "grad_norm": 0.0375152643384516, "learning_rate": 0.0009707389064971102, "loss": 0.6693, "step": 7820 }, { "epoch": 0.39725349341929916, "grad_norm": 0.03153109087788052, "learning_rate": 0.0009706641894415731, "loss": 0.6824, "step": 7825 }, { "epoch": 0.39750732951733064, "grad_norm": 0.031082795917225698, "learning_rate": 0.0009705893799970029, "loss": 0.6108, "step": 7830 }, { "epoch": 0.3977611656153622, "grad_norm": 0.04178387644138036, "learning_rate": 0.0009705144781780842, "loss": 0.6963, "step": 7835 }, { "epoch": 0.39801500171339366, "grad_norm": 0.06903963849411496, "learning_rate": 0.0009704394839995198, "loss": 0.6825, "step": 7840 }, { "epoch": 0.39826883781142514, "grad_norm": 0.03484218684833334, "learning_rate": 0.0009703643974760307, "loss": 0.6589, "step": 7845 }, { "epoch": 0.39852267390945667, "grad_norm": 0.045063431620059595, "learning_rate": 0.0009702892186223564, "loss": 0.5665, "step": 7850 }, { "epoch": 0.39877651000748815, "grad_norm": 0.029449955491035615, "learning_rate": 0.0009702139474532536, "loss": 0.6465, "step": 7855 }, { "epoch": 0.3990303461055197, "grad_norm": 0.03916288270704511, "learning_rate": 0.0009701385839834979, "loss": 0.6582, "step": 7860 }, { "epoch": 0.39928418220355116, "grad_norm": 0.04166533905758244, "learning_rate": 0.0009700631282278827, "loss": 0.6625, "step": 7865 }, { "epoch": 0.39953801830158264, "grad_norm": 0.024806646516510142, "learning_rate": 0.0009699875802012197, "loss": 0.6456, "step": 7870 }, { "epoch": 0.3997918543996142, "grad_norm": 0.0349710848699053, "learning_rate": 0.0009699119399183385, "loss": 0.6463, "step": 7875 }, { "epoch": 0.40004569049764566, "grad_norm": 0.031334987561882265, "learning_rate": 0.0009698362073940869, "loss": 0.6219, "step": 7880 }, { "epoch": 0.4002995265956772, "grad_norm": 0.031472418909322404, "learning_rate": 0.0009697603826433308, "loss": 0.6228, "step": 7885 }, { "epoch": 0.40055336269370867, "grad_norm": 0.037854618230179916, "learning_rate": 0.0009696844656809545, "loss": 0.6558, "step": 7890 }, { "epoch": 0.40080719879174015, "grad_norm": 0.11265157750996117, "learning_rate": 0.0009696084565218597, "loss": 0.6325, "step": 7895 }, { "epoch": 0.4010610348897717, "grad_norm": 0.031122545622557275, "learning_rate": 0.0009695323551809669, "loss": 0.6229, "step": 7900 }, { "epoch": 0.40131487098780316, "grad_norm": 0.04038263274235042, "learning_rate": 0.0009694561616732143, "loss": 0.6507, "step": 7905 }, { "epoch": 0.4015687070858347, "grad_norm": 0.029612453037692527, "learning_rate": 0.0009693798760135584, "loss": 0.6059, "step": 7910 }, { "epoch": 0.4018225431838662, "grad_norm": 0.032308274222676875, "learning_rate": 0.0009693034982169735, "loss": 0.6363, "step": 7915 }, { "epoch": 0.40207637928189766, "grad_norm": 0.05317177280278271, "learning_rate": 0.0009692270282984525, "loss": 0.6503, "step": 7920 }, { "epoch": 0.4023302153799292, "grad_norm": 0.04291373669655286, "learning_rate": 0.0009691504662730058, "loss": 0.6311, "step": 7925 }, { "epoch": 0.40258405147796067, "grad_norm": 0.03662642484263608, "learning_rate": 0.0009690738121556621, "loss": 0.6753, "step": 7930 }, { "epoch": 0.4028378875759922, "grad_norm": 0.035393766402028314, "learning_rate": 0.0009689970659614684, "loss": 0.6479, "step": 7935 }, { "epoch": 0.4030917236740237, "grad_norm": 0.03128196520109021, "learning_rate": 0.0009689202277054896, "loss": 0.671, "step": 7940 }, { "epoch": 0.40334555977205516, "grad_norm": 0.03677958165657406, "learning_rate": 0.0009688432974028085, "loss": 0.6932, "step": 7945 }, { "epoch": 0.4035993958700867, "grad_norm": 0.02727568027543021, "learning_rate": 0.0009687662750685265, "loss": 0.6504, "step": 7950 }, { "epoch": 0.4038532319681182, "grad_norm": 0.028131562831269996, "learning_rate": 0.0009686891607177621, "loss": 0.6585, "step": 7955 }, { "epoch": 0.4041070680661497, "grad_norm": 0.027514481928846146, "learning_rate": 0.0009686119543656531, "loss": 0.6182, "step": 7960 }, { "epoch": 0.4043609041641812, "grad_norm": 0.07056505685894615, "learning_rate": 0.0009685346560273542, "loss": 0.636, "step": 7965 }, { "epoch": 0.40461474026221267, "grad_norm": 0.04403662030554125, "learning_rate": 0.000968457265718039, "loss": 0.6695, "step": 7970 }, { "epoch": 0.4048685763602442, "grad_norm": 0.027718390866363754, "learning_rate": 0.0009683797834528987, "loss": 0.6283, "step": 7975 }, { "epoch": 0.4051224124582757, "grad_norm": 0.040775149461867354, "learning_rate": 0.0009683022092471427, "loss": 0.6609, "step": 7980 }, { "epoch": 0.4053762485563072, "grad_norm": 0.03695097130504616, "learning_rate": 0.0009682245431159984, "loss": 0.6104, "step": 7985 }, { "epoch": 0.4056300846543387, "grad_norm": 0.046398150204463785, "learning_rate": 0.0009681467850747114, "loss": 0.6298, "step": 7990 }, { "epoch": 0.4058839207523702, "grad_norm": 0.04641307748919207, "learning_rate": 0.0009680689351385453, "loss": 0.6244, "step": 7995 }, { "epoch": 0.4061377568504017, "grad_norm": 0.03193976715382944, "learning_rate": 0.0009679909933227811, "loss": 0.6378, "step": 8000 }, { "epoch": 0.4063915929484332, "grad_norm": 0.028276413703941978, "learning_rate": 0.0009679129596427189, "loss": 0.6194, "step": 8005 }, { "epoch": 0.4066454290464647, "grad_norm": 0.02733137371225709, "learning_rate": 0.0009678348341136764, "loss": 0.6586, "step": 8010 }, { "epoch": 0.4068992651444962, "grad_norm": 0.029814450966989852, "learning_rate": 0.000967756616750989, "loss": 0.6222, "step": 8015 }, { "epoch": 0.4071531012425277, "grad_norm": 0.03102914660521326, "learning_rate": 0.0009676783075700103, "loss": 0.5845, "step": 8020 }, { "epoch": 0.4074069373405592, "grad_norm": 0.031168821244497057, "learning_rate": 0.0009675999065861121, "loss": 0.6355, "step": 8025 }, { "epoch": 0.4076607734385907, "grad_norm": 0.05486797052015813, "learning_rate": 0.0009675214138146844, "loss": 0.6322, "step": 8030 }, { "epoch": 0.40791460953662223, "grad_norm": 0.04707616172534586, "learning_rate": 0.0009674428292711346, "loss": 0.6169, "step": 8035 }, { "epoch": 0.4081684456346537, "grad_norm": 0.02787363667264948, "learning_rate": 0.0009673641529708884, "loss": 0.6481, "step": 8040 }, { "epoch": 0.4084222817326852, "grad_norm": 0.027658440071354547, "learning_rate": 0.0009672853849293899, "loss": 0.6315, "step": 8045 }, { "epoch": 0.4086761178307167, "grad_norm": 0.04557845624399485, "learning_rate": 0.0009672065251621005, "loss": 0.6724, "step": 8050 }, { "epoch": 0.4089299539287482, "grad_norm": 0.028701244900625106, "learning_rate": 0.0009671275736845002, "loss": 0.6497, "step": 8055 }, { "epoch": 0.4091837900267797, "grad_norm": 0.027420431774163943, "learning_rate": 0.0009670485305120868, "loss": 0.617, "step": 8060 }, { "epoch": 0.4094376261248112, "grad_norm": 0.04387568253300268, "learning_rate": 0.0009669693956603761, "loss": 0.6354, "step": 8065 }, { "epoch": 0.4096914622228427, "grad_norm": 0.0356837346671153, "learning_rate": 0.0009668901691449017, "loss": 0.6622, "step": 8070 }, { "epoch": 0.40994529832087423, "grad_norm": 0.0663989720981148, "learning_rate": 0.0009668108509812155, "loss": 0.6492, "step": 8075 }, { "epoch": 0.4101991344189057, "grad_norm": 0.06052011549737725, "learning_rate": 0.0009667314411848873, "loss": 0.5987, "step": 8080 }, { "epoch": 0.4104529705169372, "grad_norm": 0.036195040168482975, "learning_rate": 0.0009666519397715048, "loss": 0.654, "step": 8085 }, { "epoch": 0.4107068066149687, "grad_norm": 0.040727444865344034, "learning_rate": 0.0009665723467566736, "loss": 0.6036, "step": 8090 }, { "epoch": 0.4109606427130002, "grad_norm": 0.1897729919545745, "learning_rate": 0.0009664926621560175, "loss": 0.6395, "step": 8095 }, { "epoch": 0.41121447881103174, "grad_norm": 0.0569334892002028, "learning_rate": 0.0009664128859851784, "loss": 0.6519, "step": 8100 }, { "epoch": 0.4114683149090632, "grad_norm": 0.06785163170844886, "learning_rate": 0.0009663330182598155, "loss": 0.6455, "step": 8105 }, { "epoch": 0.4117221510070947, "grad_norm": 0.040654887306683485, "learning_rate": 0.0009662530589956069, "loss": 0.6278, "step": 8110 }, { "epoch": 0.41197598710512623, "grad_norm": 0.03066395095140888, "learning_rate": 0.0009661730082082481, "loss": 0.5972, "step": 8115 }, { "epoch": 0.4122298232031577, "grad_norm": 0.0319600496844395, "learning_rate": 0.0009660928659134525, "loss": 0.6551, "step": 8120 }, { "epoch": 0.41248365930118924, "grad_norm": 0.03366629328090797, "learning_rate": 0.0009660126321269516, "loss": 0.6494, "step": 8125 }, { "epoch": 0.4127374953992207, "grad_norm": 0.045751657644504846, "learning_rate": 0.0009659323068644952, "loss": 0.6166, "step": 8130 }, { "epoch": 0.4129913314972522, "grad_norm": 0.03206675763665257, "learning_rate": 0.0009658518901418505, "loss": 0.6429, "step": 8135 }, { "epoch": 0.41324516759528374, "grad_norm": 0.026550019766652787, "learning_rate": 0.0009657713819748028, "loss": 0.6498, "step": 8140 }, { "epoch": 0.4134990036933152, "grad_norm": 0.045396032327730144, "learning_rate": 0.0009656907823791559, "loss": 0.6428, "step": 8145 }, { "epoch": 0.41375283979134675, "grad_norm": 0.036710167826551975, "learning_rate": 0.0009656100913707306, "loss": 0.6255, "step": 8150 }, { "epoch": 0.41400667588937823, "grad_norm": 0.031135118412712215, "learning_rate": 0.0009655293089653665, "loss": 0.6103, "step": 8155 }, { "epoch": 0.4142605119874097, "grad_norm": 0.03347596053431247, "learning_rate": 0.0009654484351789206, "loss": 0.627, "step": 8160 }, { "epoch": 0.41451434808544124, "grad_norm": 0.030646522747677672, "learning_rate": 0.000965367470027268, "loss": 0.6277, "step": 8165 }, { "epoch": 0.4147681841834727, "grad_norm": 0.05286996241213806, "learning_rate": 0.0009652864135263018, "loss": 0.633, "step": 8170 }, { "epoch": 0.41502202028150426, "grad_norm": 0.028068076907754813, "learning_rate": 0.0009652052656919331, "loss": 0.6432, "step": 8175 }, { "epoch": 0.41527585637953573, "grad_norm": 0.03619239399249029, "learning_rate": 0.0009651240265400907, "loss": 0.6117, "step": 8180 }, { "epoch": 0.4155296924775672, "grad_norm": 0.03319322210171537, "learning_rate": 0.0009650426960867215, "loss": 0.6499, "step": 8185 }, { "epoch": 0.41578352857559875, "grad_norm": 0.04854675850168715, "learning_rate": 0.00096496127434779, "loss": 0.6564, "step": 8190 }, { "epoch": 0.41603736467363023, "grad_norm": 0.029353149898544276, "learning_rate": 0.0009648797613392794, "loss": 0.6103, "step": 8195 }, { "epoch": 0.41629120077166176, "grad_norm": 0.027116416725451065, "learning_rate": 0.0009647981570771898, "loss": 0.6166, "step": 8200 }, { "epoch": 0.41654503686969324, "grad_norm": 0.05618152750846531, "learning_rate": 0.00096471646157754, "loss": 0.6162, "step": 8205 }, { "epoch": 0.4167988729677247, "grad_norm": 0.02729960033208377, "learning_rate": 0.0009646346748563663, "loss": 0.6004, "step": 8210 }, { "epoch": 0.41705270906575626, "grad_norm": 0.08612473698169335, "learning_rate": 0.0009645527969297231, "loss": 0.6483, "step": 8215 }, { "epoch": 0.41730654516378773, "grad_norm": 0.029936015816733892, "learning_rate": 0.0009644708278136826, "loss": 0.6317, "step": 8220 }, { "epoch": 0.41756038126181927, "grad_norm": 0.0726201142485528, "learning_rate": 0.0009643887675243348, "loss": 0.6244, "step": 8225 }, { "epoch": 0.41781421735985075, "grad_norm": 0.057917723581260475, "learning_rate": 0.0009643066160777879, "loss": 0.647, "step": 8230 }, { "epoch": 0.4180680534578822, "grad_norm": 0.13072452439749868, "learning_rate": 0.0009642243734901678, "loss": 0.6838, "step": 8235 }, { "epoch": 0.41832188955591376, "grad_norm": 0.05469152409284166, "learning_rate": 0.0009641420397776181, "loss": 0.6624, "step": 8240 }, { "epoch": 0.41857572565394524, "grad_norm": 0.033211019095030915, "learning_rate": 0.0009640596149563008, "loss": 0.6251, "step": 8245 }, { "epoch": 0.4188295617519768, "grad_norm": 0.039537117087901134, "learning_rate": 0.0009639770990423954, "loss": 0.6764, "step": 8250 }, { "epoch": 0.41908339785000825, "grad_norm": 0.043759885870117206, "learning_rate": 0.0009638944920520992, "loss": 0.6291, "step": 8255 }, { "epoch": 0.41933723394803973, "grad_norm": 0.06431426896668371, "learning_rate": 0.0009638117940016278, "loss": 0.6548, "step": 8260 }, { "epoch": 0.41959107004607127, "grad_norm": 0.027159981833348182, "learning_rate": 0.000963729004907214, "loss": 0.6055, "step": 8265 }, { "epoch": 0.41984490614410275, "grad_norm": 0.10032739292221377, "learning_rate": 0.0009636461247851094, "loss": 0.6578, "step": 8270 }, { "epoch": 0.4200987422421342, "grad_norm": 0.06844856797298811, "learning_rate": 0.0009635631536515825, "loss": 0.6316, "step": 8275 }, { "epoch": 0.42035257834016576, "grad_norm": 0.03539995161128657, "learning_rate": 0.0009634800915229205, "loss": 0.6597, "step": 8280 }, { "epoch": 0.42060641443819724, "grad_norm": 0.033253904264618245, "learning_rate": 0.0009633969384154279, "loss": 0.6066, "step": 8285 }, { "epoch": 0.4208602505362288, "grad_norm": 0.042401610577462875, "learning_rate": 0.0009633136943454271, "loss": 0.6235, "step": 8290 }, { "epoch": 0.42111408663426025, "grad_norm": 0.06416678518804314, "learning_rate": 0.0009632303593292589, "loss": 0.6112, "step": 8295 }, { "epoch": 0.42136792273229173, "grad_norm": 0.055313941008177936, "learning_rate": 0.0009631469333832809, "loss": 0.6632, "step": 8300 }, { "epoch": 0.42162175883032327, "grad_norm": 0.11795660265352328, "learning_rate": 0.0009630634165238699, "loss": 0.6342, "step": 8305 }, { "epoch": 0.42187559492835475, "grad_norm": 0.0657921582096661, "learning_rate": 0.0009629798087674194, "loss": 0.6253, "step": 8310 }, { "epoch": 0.4221294310263863, "grad_norm": 0.03717901771126588, "learning_rate": 0.0009628961101303412, "loss": 0.6518, "step": 8315 }, { "epoch": 0.42238326712441776, "grad_norm": 0.04495023963968202, "learning_rate": 0.0009628123206290654, "loss": 0.6451, "step": 8320 }, { "epoch": 0.42263710322244924, "grad_norm": 0.0418969458155804, "learning_rate": 0.0009627284402800388, "loss": 0.6534, "step": 8325 }, { "epoch": 0.4228909393204808, "grad_norm": 0.030384984685908507, "learning_rate": 0.0009626444690997272, "loss": 0.5976, "step": 8330 }, { "epoch": 0.42314477541851225, "grad_norm": 0.04470751844633285, "learning_rate": 0.0009625604071046133, "loss": 0.6242, "step": 8335 }, { "epoch": 0.4233986115165438, "grad_norm": 0.025004516406755696, "learning_rate": 0.0009624762543111985, "loss": 0.6245, "step": 8340 }, { "epoch": 0.42365244761457527, "grad_norm": 0.04181782200662666, "learning_rate": 0.0009623920107360011, "loss": 0.6792, "step": 8345 }, { "epoch": 0.42390628371260675, "grad_norm": 0.03102456315861995, "learning_rate": 0.0009623076763955581, "loss": 0.6725, "step": 8350 }, { "epoch": 0.4241601198106383, "grad_norm": 0.057488518244734055, "learning_rate": 0.0009622232513064237, "loss": 0.6335, "step": 8355 }, { "epoch": 0.42441395590866976, "grad_norm": 0.05907000801077856, "learning_rate": 0.00096213873548517, "loss": 0.6626, "step": 8360 }, { "epoch": 0.4246677920067013, "grad_norm": 0.07423416923574946, "learning_rate": 0.0009620541289483875, "loss": 0.6819, "step": 8365 }, { "epoch": 0.4249216281047328, "grad_norm": 0.04354747716701456, "learning_rate": 0.0009619694317126837, "loss": 0.6638, "step": 8370 }, { "epoch": 0.42517546420276425, "grad_norm": 0.03257416270726558, "learning_rate": 0.0009618846437946842, "loss": 0.6575, "step": 8375 }, { "epoch": 0.4254293003007958, "grad_norm": 0.1385958109330031, "learning_rate": 0.0009617997652110326, "loss": 0.6893, "step": 8380 }, { "epoch": 0.42568313639882727, "grad_norm": 0.11178651527717962, "learning_rate": 0.00096171479597839, "loss": 0.7221, "step": 8385 }, { "epoch": 0.4259369724968588, "grad_norm": 0.07142680791086048, "learning_rate": 0.0009616297361134355, "loss": 0.6673, "step": 8390 }, { "epoch": 0.4261908085948903, "grad_norm": 0.07599589867371198, "learning_rate": 0.000961544585632866, "loss": 0.6966, "step": 8395 }, { "epoch": 0.42644464469292176, "grad_norm": 0.0324535534315102, "learning_rate": 0.0009614593445533961, "loss": 0.6887, "step": 8400 }, { "epoch": 0.4266984807909533, "grad_norm": 0.03664906295093519, "learning_rate": 0.0009613740128917581, "loss": 0.6472, "step": 8405 }, { "epoch": 0.4269523168889848, "grad_norm": 0.06479920979013709, "learning_rate": 0.0009612885906647023, "loss": 0.6738, "step": 8410 }, { "epoch": 0.4272061529870163, "grad_norm": 0.04275229522059579, "learning_rate": 0.0009612030778889966, "loss": 0.6488, "step": 8415 }, { "epoch": 0.4274599890850478, "grad_norm": 0.029470064994178823, "learning_rate": 0.0009611174745814266, "loss": 0.6667, "step": 8420 }, { "epoch": 0.42771382518307927, "grad_norm": 0.03351287696319307, "learning_rate": 0.000961031780758796, "loss": 0.6559, "step": 8425 }, { "epoch": 0.4279676612811108, "grad_norm": 0.05384940363959001, "learning_rate": 0.000960945996437926, "loss": 0.6732, "step": 8430 }, { "epoch": 0.4282214973791423, "grad_norm": 0.0320326929365238, "learning_rate": 0.0009608601216356557, "loss": 0.7019, "step": 8435 }, { "epoch": 0.4284753334771738, "grad_norm": 0.02719381758672677, "learning_rate": 0.0009607741563688417, "loss": 0.6465, "step": 8440 }, { "epoch": 0.4287291695752053, "grad_norm": 0.03282220675318019, "learning_rate": 0.0009606881006543589, "loss": 0.6366, "step": 8445 }, { "epoch": 0.4289830056732368, "grad_norm": 0.03248468141086955, "learning_rate": 0.0009606019545090992, "loss": 0.6066, "step": 8450 }, { "epoch": 0.4292368417712683, "grad_norm": 0.03936893710205544, "learning_rate": 0.0009605157179499728, "loss": 0.6604, "step": 8455 }, { "epoch": 0.4294906778692998, "grad_norm": 0.03206017279382604, "learning_rate": 0.0009604293909939077, "loss": 0.6396, "step": 8460 }, { "epoch": 0.4297445139673313, "grad_norm": 0.056617557142407925, "learning_rate": 0.0009603429736578493, "loss": 0.6345, "step": 8465 }, { "epoch": 0.4299983500653628, "grad_norm": 0.04111342531938289, "learning_rate": 0.0009602564659587608, "loss": 0.6756, "step": 8470 }, { "epoch": 0.4302521861633943, "grad_norm": 0.03727263305415869, "learning_rate": 0.0009601698679136233, "loss": 0.6321, "step": 8475 }, { "epoch": 0.4305060222614258, "grad_norm": 0.03320811173723645, "learning_rate": 0.0009600831795394358, "loss": 0.6278, "step": 8480 }, { "epoch": 0.4307598583594573, "grad_norm": 0.04118865787603138, "learning_rate": 0.0009599964008532144, "loss": 0.6521, "step": 8485 }, { "epoch": 0.43101369445748877, "grad_norm": 0.027675504471304815, "learning_rate": 0.0009599095318719935, "loss": 0.6282, "step": 8490 }, { "epoch": 0.4312675305555203, "grad_norm": 0.04646301540154575, "learning_rate": 0.0009598225726128251, "loss": 0.6408, "step": 8495 }, { "epoch": 0.4315213666535518, "grad_norm": 0.030455384549113017, "learning_rate": 0.0009597355230927789, "loss": 0.6444, "step": 8500 }, { "epoch": 0.4317752027515833, "grad_norm": 0.031905572716127935, "learning_rate": 0.0009596483833289422, "loss": 0.632, "step": 8505 }, { "epoch": 0.4320290388496148, "grad_norm": 0.03178070686854921, "learning_rate": 0.0009595611533384201, "loss": 0.6341, "step": 8510 }, { "epoch": 0.4322828749476463, "grad_norm": 0.037639770941935886, "learning_rate": 0.0009594738331383355, "loss": 0.6236, "step": 8515 }, { "epoch": 0.4325367110456778, "grad_norm": 0.06449433125052263, "learning_rate": 0.0009593864227458287, "loss": 0.6534, "step": 8520 }, { "epoch": 0.4327905471437093, "grad_norm": 0.028739688486159997, "learning_rate": 0.0009592989221780581, "loss": 0.6303, "step": 8525 }, { "epoch": 0.4330443832417408, "grad_norm": 0.07881302169372612, "learning_rate": 0.0009592113314521996, "loss": 0.6427, "step": 8530 }, { "epoch": 0.4332982193397723, "grad_norm": 0.06386066146903421, "learning_rate": 0.0009591236505854468, "loss": 0.6411, "step": 8535 }, { "epoch": 0.4335520554378038, "grad_norm": 0.08823347046423324, "learning_rate": 0.0009590358795950112, "loss": 0.6628, "step": 8540 }, { "epoch": 0.4338058915358353, "grad_norm": 0.04402973321402076, "learning_rate": 0.0009589480184981214, "loss": 0.6312, "step": 8545 }, { "epoch": 0.4340597276338668, "grad_norm": 0.06702143934000367, "learning_rate": 0.0009588600673120245, "loss": 0.6386, "step": 8550 }, { "epoch": 0.43431356373189833, "grad_norm": 0.0303696868506802, "learning_rate": 0.0009587720260539847, "loss": 0.6389, "step": 8555 }, { "epoch": 0.4345673998299298, "grad_norm": 0.028798244571663335, "learning_rate": 0.000958683894741284, "loss": 0.6517, "step": 8560 }, { "epoch": 0.4348212359279613, "grad_norm": 0.026950156282351694, "learning_rate": 0.0009585956733912224, "loss": 0.6254, "step": 8565 }, { "epoch": 0.4350750720259928, "grad_norm": 0.0355083939976014, "learning_rate": 0.0009585073620211169, "loss": 0.6603, "step": 8570 }, { "epoch": 0.4353289081240243, "grad_norm": 0.03066199950726959, "learning_rate": 0.0009584189606483029, "loss": 0.6305, "step": 8575 }, { "epoch": 0.43558274422205584, "grad_norm": 0.030869429351402574, "learning_rate": 0.0009583304692901331, "loss": 0.6479, "step": 8580 }, { "epoch": 0.4358365803200873, "grad_norm": 0.03223477546507825, "learning_rate": 0.0009582418879639778, "loss": 0.6709, "step": 8585 }, { "epoch": 0.4360904164181188, "grad_norm": 0.0480603195178494, "learning_rate": 0.0009581532166872252, "loss": 0.6198, "step": 8590 }, { "epoch": 0.43634425251615033, "grad_norm": 0.02653848497460224, "learning_rate": 0.0009580644554772809, "loss": 0.6247, "step": 8595 }, { "epoch": 0.4365980886141818, "grad_norm": 0.03447545219420309, "learning_rate": 0.0009579756043515684, "loss": 0.6677, "step": 8600 }, { "epoch": 0.43685192471221335, "grad_norm": 0.0340747508415548, "learning_rate": 0.0009578866633275287, "loss": 0.695, "step": 8605 }, { "epoch": 0.4371057608102448, "grad_norm": 0.034686685618524345, "learning_rate": 0.0009577976324226205, "loss": 0.6303, "step": 8610 }, { "epoch": 0.4373595969082763, "grad_norm": 0.04739152588761622, "learning_rate": 0.0009577085116543201, "loss": 0.6718, "step": 8615 }, { "epoch": 0.43761343300630784, "grad_norm": 0.06752904402187106, "learning_rate": 0.0009576193010401213, "loss": 0.6227, "step": 8620 }, { "epoch": 0.4378672691043393, "grad_norm": 0.03265339538465412, "learning_rate": 0.0009575300005975361, "loss": 0.6396, "step": 8625 }, { "epoch": 0.43812110520237085, "grad_norm": 0.04449318891895552, "learning_rate": 0.0009574406103440931, "loss": 0.6716, "step": 8630 }, { "epoch": 0.43837494130040233, "grad_norm": 0.04632596776822006, "learning_rate": 0.0009573511302973399, "loss": 0.6233, "step": 8635 }, { "epoch": 0.4386287773984338, "grad_norm": 0.03465904010797608, "learning_rate": 0.0009572615604748405, "loss": 0.6524, "step": 8640 }, { "epoch": 0.43888261349646535, "grad_norm": 0.0870434657255882, "learning_rate": 0.000957171900894177, "loss": 0.6214, "step": 8645 }, { "epoch": 0.4391364495944968, "grad_norm": 0.04534670041292767, "learning_rate": 0.0009570821515729496, "loss": 0.6523, "step": 8650 }, { "epoch": 0.43939028569252836, "grad_norm": 0.028417736922154647, "learning_rate": 0.0009569923125287749, "loss": 0.6387, "step": 8655 }, { "epoch": 0.43964412179055984, "grad_norm": 0.03085472383605309, "learning_rate": 0.0009569023837792885, "loss": 0.6569, "step": 8660 }, { "epoch": 0.4398979578885913, "grad_norm": 0.029597971880738125, "learning_rate": 0.0009568123653421427, "loss": 0.6517, "step": 8665 }, { "epoch": 0.44015179398662285, "grad_norm": 0.031124321107765782, "learning_rate": 0.0009567222572350078, "loss": 0.6481, "step": 8670 }, { "epoch": 0.44040563008465433, "grad_norm": 0.04250290000689362, "learning_rate": 0.0009566320594755713, "loss": 0.6301, "step": 8675 }, { "epoch": 0.44065946618268587, "grad_norm": 0.03803602491108363, "learning_rate": 0.0009565417720815389, "loss": 0.6506, "step": 8680 }, { "epoch": 0.44091330228071735, "grad_norm": 0.10059999444471293, "learning_rate": 0.0009564513950706333, "loss": 0.7053, "step": 8685 }, { "epoch": 0.4411671383787488, "grad_norm": 0.028992308416219137, "learning_rate": 0.0009563609284605951, "loss": 0.6764, "step": 8690 }, { "epoch": 0.44142097447678036, "grad_norm": 0.028072857577109308, "learning_rate": 0.0009562703722691828, "loss": 0.6359, "step": 8695 }, { "epoch": 0.44167481057481184, "grad_norm": 0.04504031556014271, "learning_rate": 0.0009561797265141717, "loss": 0.6518, "step": 8700 }, { "epoch": 0.4419286466728434, "grad_norm": 0.028108033251197, "learning_rate": 0.0009560889912133552, "loss": 0.6291, "step": 8705 }, { "epoch": 0.44218248277087485, "grad_norm": 0.03109459111595203, "learning_rate": 0.0009559981663845443, "loss": 0.6163, "step": 8710 }, { "epoch": 0.44243631886890633, "grad_norm": 0.04169194643254887, "learning_rate": 0.0009559072520455672, "loss": 0.6475, "step": 8715 }, { "epoch": 0.44269015496693787, "grad_norm": 0.20480559958951622, "learning_rate": 0.0009558162482142703, "loss": 0.5948, "step": 8720 }, { "epoch": 0.44294399106496934, "grad_norm": 0.056634271165419775, "learning_rate": 0.000955725154908517, "loss": 0.6407, "step": 8725 }, { "epoch": 0.4431978271630008, "grad_norm": 0.04431366313723485, "learning_rate": 0.0009556339721461885, "loss": 0.6483, "step": 8730 }, { "epoch": 0.44345166326103236, "grad_norm": 0.030702014731428928, "learning_rate": 0.0009555426999451835, "loss": 0.6127, "step": 8735 }, { "epoch": 0.44370549935906384, "grad_norm": 0.04349749497078023, "learning_rate": 0.0009554513383234184, "loss": 0.6401, "step": 8740 }, { "epoch": 0.44395933545709537, "grad_norm": 0.028774456041381447, "learning_rate": 0.0009553598872988268, "loss": 0.6411, "step": 8745 }, { "epoch": 0.44421317155512685, "grad_norm": 0.04729637621631858, "learning_rate": 0.0009552683468893601, "loss": 0.6229, "step": 8750 }, { "epoch": 0.44446700765315833, "grad_norm": 0.03315699465561875, "learning_rate": 0.0009551767171129874, "loss": 0.6576, "step": 8755 }, { "epoch": 0.44472084375118986, "grad_norm": 0.07162055236767001, "learning_rate": 0.0009550849979876952, "loss": 0.6478, "step": 8760 }, { "epoch": 0.44497467984922134, "grad_norm": 0.03618826906366848, "learning_rate": 0.0009549931895314874, "loss": 0.6262, "step": 8765 }, { "epoch": 0.4452285159472529, "grad_norm": 0.05135770096791484, "learning_rate": 0.0009549012917623854, "loss": 0.6681, "step": 8770 }, { "epoch": 0.44548235204528436, "grad_norm": 0.02358653749589321, "learning_rate": 0.0009548093046984285, "loss": 0.5986, "step": 8775 }, { "epoch": 0.44573618814331584, "grad_norm": 0.04897232885513862, "learning_rate": 0.0009547172283576733, "loss": 0.6277, "step": 8780 }, { "epoch": 0.44599002424134737, "grad_norm": 0.029813286658092467, "learning_rate": 0.0009546250627581936, "loss": 0.6221, "step": 8785 }, { "epoch": 0.44624386033937885, "grad_norm": 0.04724784641629323, "learning_rate": 0.0009545328079180815, "loss": 0.644, "step": 8790 }, { "epoch": 0.4464976964374104, "grad_norm": 0.04105666493239327, "learning_rate": 0.0009544404638554459, "loss": 0.6266, "step": 8795 }, { "epoch": 0.44675153253544186, "grad_norm": 0.036873167174765825, "learning_rate": 0.0009543480305884136, "loss": 0.6148, "step": 8800 }, { "epoch": 0.44700536863347334, "grad_norm": 0.024048534299153178, "learning_rate": 0.0009542555081351286, "loss": 0.6191, "step": 8805 }, { "epoch": 0.4472592047315049, "grad_norm": 0.02625656937587616, "learning_rate": 0.0009541628965137528, "loss": 0.6347, "step": 8810 }, { "epoch": 0.44751304082953636, "grad_norm": 0.060240180235133486, "learning_rate": 0.0009540701957424653, "loss": 0.5963, "step": 8815 }, { "epoch": 0.4477668769275679, "grad_norm": 0.05265523726241905, "learning_rate": 0.0009539774058394628, "loss": 0.6619, "step": 8820 }, { "epoch": 0.44802071302559937, "grad_norm": 0.06653130957206357, "learning_rate": 0.0009538845268229596, "loss": 0.643, "step": 8825 }, { "epoch": 0.44827454912363085, "grad_norm": 0.027089718279573065, "learning_rate": 0.0009537915587111872, "loss": 0.6249, "step": 8830 }, { "epoch": 0.4485283852216624, "grad_norm": 0.041501163456008426, "learning_rate": 0.0009536985015223949, "loss": 0.669, "step": 8835 }, { "epoch": 0.44878222131969386, "grad_norm": 0.04886005734739699, "learning_rate": 0.0009536053552748494, "loss": 0.5982, "step": 8840 }, { "epoch": 0.4490360574177254, "grad_norm": 0.07228127391608218, "learning_rate": 0.0009535121199868348, "loss": 0.6427, "step": 8845 }, { "epoch": 0.4492898935157569, "grad_norm": 0.0560495813187393, "learning_rate": 0.0009534187956766526, "loss": 0.6376, "step": 8850 }, { "epoch": 0.44954372961378836, "grad_norm": 0.024625434128465788, "learning_rate": 0.000953325382362622, "loss": 0.6456, "step": 8855 }, { "epoch": 0.4497975657118199, "grad_norm": 0.02767775635007945, "learning_rate": 0.0009532318800630797, "loss": 0.6126, "step": 8860 }, { "epoch": 0.45005140180985137, "grad_norm": 0.03132593059311349, "learning_rate": 0.0009531382887963796, "loss": 0.626, "step": 8865 }, { "epoch": 0.4503052379078829, "grad_norm": 0.03409858571498195, "learning_rate": 0.0009530446085808932, "loss": 0.6232, "step": 8870 }, { "epoch": 0.4505590740059144, "grad_norm": 0.03060723735055533, "learning_rate": 0.0009529508394350093, "loss": 0.6515, "step": 8875 }, { "epoch": 0.45081291010394586, "grad_norm": 0.02791574567598346, "learning_rate": 0.0009528569813771346, "loss": 0.643, "step": 8880 }, { "epoch": 0.4510667462019774, "grad_norm": 0.025463994979441774, "learning_rate": 0.0009527630344256929, "loss": 0.6072, "step": 8885 }, { "epoch": 0.4513205823000089, "grad_norm": 0.03198977951851349, "learning_rate": 0.0009526689985991255, "loss": 0.6013, "step": 8890 }, { "epoch": 0.4515744183980404, "grad_norm": 0.03573354091801804, "learning_rate": 0.000952574873915891, "loss": 0.6139, "step": 8895 }, { "epoch": 0.4518282544960719, "grad_norm": 0.037344124835746804, "learning_rate": 0.0009524806603944658, "loss": 0.6095, "step": 8900 }, { "epoch": 0.45208209059410337, "grad_norm": 0.0338181770188993, "learning_rate": 0.0009523863580533434, "loss": 0.63, "step": 8905 }, { "epoch": 0.4523359266921349, "grad_norm": 0.047741999628023585, "learning_rate": 0.000952291966911035, "loss": 0.6191, "step": 8910 }, { "epoch": 0.4525897627901664, "grad_norm": 0.027889977098360996, "learning_rate": 0.0009521974869860691, "loss": 0.6289, "step": 8915 }, { "epoch": 0.4528435988881979, "grad_norm": 0.04542033140089097, "learning_rate": 0.0009521029182969915, "loss": 0.6292, "step": 8920 }, { "epoch": 0.4530974349862294, "grad_norm": 0.060630645721840855, "learning_rate": 0.000952008260862366, "loss": 0.6073, "step": 8925 }, { "epoch": 0.4533512710842609, "grad_norm": 0.042297603142427635, "learning_rate": 0.0009519135147007726, "loss": 0.6255, "step": 8930 }, { "epoch": 0.4536051071822924, "grad_norm": 0.03012511251936056, "learning_rate": 0.0009518186798308104, "loss": 0.6177, "step": 8935 }, { "epoch": 0.4538589432803239, "grad_norm": 0.030825815994073906, "learning_rate": 0.0009517237562710943, "loss": 0.6154, "step": 8940 }, { "epoch": 0.45411277937835537, "grad_norm": 0.027418304719546968, "learning_rate": 0.0009516287440402576, "loss": 0.6451, "step": 8945 }, { "epoch": 0.4543666154763869, "grad_norm": 0.03437809223733027, "learning_rate": 0.0009515336431569508, "loss": 0.6372, "step": 8950 }, { "epoch": 0.4546204515744184, "grad_norm": 0.02706173332906627, "learning_rate": 0.0009514384536398416, "loss": 0.5825, "step": 8955 }, { "epoch": 0.4548742876724499, "grad_norm": 0.026630978850147032, "learning_rate": 0.0009513431755076152, "loss": 0.6007, "step": 8960 }, { "epoch": 0.4551281237704814, "grad_norm": 0.027776987645599445, "learning_rate": 0.0009512478087789745, "loss": 0.6777, "step": 8965 }, { "epoch": 0.4553819598685129, "grad_norm": 0.03127848050806932, "learning_rate": 0.0009511523534726391, "loss": 0.6137, "step": 8970 }, { "epoch": 0.4556357959665444, "grad_norm": 0.0316814909688181, "learning_rate": 0.0009510568096073466, "loss": 0.6259, "step": 8975 }, { "epoch": 0.4558896320645759, "grad_norm": 0.03288592896635044, "learning_rate": 0.0009509611772018519, "loss": 0.594, "step": 8980 }, { "epoch": 0.4561434681626074, "grad_norm": 0.030504149235616482, "learning_rate": 0.0009508654562749271, "loss": 0.6559, "step": 8985 }, { "epoch": 0.4563973042606389, "grad_norm": 0.02918212945710069, "learning_rate": 0.0009507696468453615, "loss": 0.6376, "step": 8990 }, { "epoch": 0.4566511403586704, "grad_norm": 0.02867657062417966, "learning_rate": 0.0009506737489319623, "loss": 0.6153, "step": 8995 }, { "epoch": 0.4569049764567019, "grad_norm": 0.04312848043233246, "learning_rate": 0.0009505777625535538, "loss": 0.6249, "step": 9000 }, { "epoch": 0.4571588125547334, "grad_norm": 0.025378226871406697, "learning_rate": 0.0009504816877289775, "loss": 0.625, "step": 9005 }, { "epoch": 0.45741264865276493, "grad_norm": 0.034824462280793105, "learning_rate": 0.0009503855244770923, "loss": 0.5977, "step": 9010 }, { "epoch": 0.4576664847507964, "grad_norm": 0.06002542817312333, "learning_rate": 0.0009502892728167749, "loss": 0.6148, "step": 9015 }, { "epoch": 0.4579203208488279, "grad_norm": 0.04708946523260842, "learning_rate": 0.0009501929327669188, "loss": 0.6292, "step": 9020 }, { "epoch": 0.4581741569468594, "grad_norm": 0.030847513835845684, "learning_rate": 0.0009500965043464349, "loss": 0.5828, "step": 9025 }, { "epoch": 0.4584279930448909, "grad_norm": 0.030701249013652274, "learning_rate": 0.000949999987574252, "loss": 0.6038, "step": 9030 }, { "epoch": 0.45868182914292244, "grad_norm": 0.027439149183266075, "learning_rate": 0.0009499033824693158, "loss": 0.6028, "step": 9035 }, { "epoch": 0.4589356652409539, "grad_norm": 0.050799419978763395, "learning_rate": 0.000949806689050589, "loss": 0.633, "step": 9040 }, { "epoch": 0.4591895013389854, "grad_norm": 0.03760838688025009, "learning_rate": 0.0009497099073370526, "loss": 0.5974, "step": 9045 }, { "epoch": 0.45944333743701693, "grad_norm": 0.03301722262990158, "learning_rate": 0.0009496130373477039, "loss": 0.614, "step": 9050 }, { "epoch": 0.4596971735350484, "grad_norm": 0.03076280298803993, "learning_rate": 0.0009495160791015583, "loss": 0.6149, "step": 9055 }, { "epoch": 0.45995100963307994, "grad_norm": 0.02435240521738851, "learning_rate": 0.0009494190326176479, "loss": 0.5875, "step": 9060 }, { "epoch": 0.4602048457311114, "grad_norm": 0.036608354215028616, "learning_rate": 0.0009493218979150229, "loss": 0.6492, "step": 9065 }, { "epoch": 0.4604586818291429, "grad_norm": 0.031052494072487117, "learning_rate": 0.00094922467501275, "loss": 0.6289, "step": 9070 }, { "epoch": 0.46071251792717444, "grad_norm": 0.05646399870150115, "learning_rate": 0.0009491273639299136, "loss": 0.6317, "step": 9075 }, { "epoch": 0.4609663540252059, "grad_norm": 0.026516267771592012, "learning_rate": 0.0009490299646856156, "loss": 0.6345, "step": 9080 }, { "epoch": 0.46122019012323745, "grad_norm": 0.026314766475744072, "learning_rate": 0.0009489324772989747, "loss": 0.5714, "step": 9085 }, { "epoch": 0.46147402622126893, "grad_norm": 0.04954065910053027, "learning_rate": 0.0009488349017891275, "loss": 0.5893, "step": 9090 }, { "epoch": 0.4617278623193004, "grad_norm": 0.030999818544982013, "learning_rate": 0.0009487372381752273, "loss": 0.6209, "step": 9095 }, { "epoch": 0.46198169841733194, "grad_norm": 0.05472893804971776, "learning_rate": 0.0009486394864764452, "loss": 0.6358, "step": 9100 }, { "epoch": 0.4622355345153634, "grad_norm": 0.03583598972027129, "learning_rate": 0.000948541646711969, "loss": 0.6119, "step": 9105 }, { "epoch": 0.46248937061339496, "grad_norm": 0.03444527417330176, "learning_rate": 0.0009484437189010047, "loss": 0.6398, "step": 9110 }, { "epoch": 0.46274320671142644, "grad_norm": 0.03009633415385528, "learning_rate": 0.0009483457030627746, "loss": 0.6811, "step": 9115 }, { "epoch": 0.4629970428094579, "grad_norm": 0.02932226956272446, "learning_rate": 0.000948247599216519, "loss": 0.65, "step": 9120 }, { "epoch": 0.46325087890748945, "grad_norm": 0.040643168448799956, "learning_rate": 0.0009481494073814951, "loss": 0.6312, "step": 9125 }, { "epoch": 0.46350471500552093, "grad_norm": 0.035491703470452655, "learning_rate": 0.0009480511275769773, "loss": 0.6314, "step": 9130 }, { "epoch": 0.46375855110355246, "grad_norm": 0.030658119230129795, "learning_rate": 0.0009479527598222577, "loss": 0.6384, "step": 9135 }, { "epoch": 0.46401238720158394, "grad_norm": 0.031569879525829354, "learning_rate": 0.0009478543041366452, "loss": 0.6376, "step": 9140 }, { "epoch": 0.4642662232996154, "grad_norm": 0.03790647935672875, "learning_rate": 0.0009477557605394664, "loss": 0.6153, "step": 9145 }, { "epoch": 0.46452005939764696, "grad_norm": 0.030688459561337894, "learning_rate": 0.0009476571290500647, "loss": 0.6538, "step": 9150 }, { "epoch": 0.46477389549567844, "grad_norm": 0.03266933170402601, "learning_rate": 0.000947558409687801, "loss": 0.642, "step": 9155 }, { "epoch": 0.4650277315937099, "grad_norm": 0.02719276820532039, "learning_rate": 0.0009474596024720534, "loss": 0.6071, "step": 9160 }, { "epoch": 0.46528156769174145, "grad_norm": 0.03990484440945978, "learning_rate": 0.0009473607074222172, "loss": 0.6301, "step": 9165 }, { "epoch": 0.46553540378977293, "grad_norm": 0.030262431248659433, "learning_rate": 0.0009472617245577053, "loss": 0.6482, "step": 9170 }, { "epoch": 0.46578923988780446, "grad_norm": 0.02733900220464855, "learning_rate": 0.0009471626538979474, "loss": 0.6199, "step": 9175 }, { "epoch": 0.46604307598583594, "grad_norm": 0.04504748091932876, "learning_rate": 0.0009470634954623905, "loss": 0.596, "step": 9180 }, { "epoch": 0.4662969120838674, "grad_norm": 0.027370854127821743, "learning_rate": 0.0009469642492704989, "loss": 0.6347, "step": 9185 }, { "epoch": 0.46655074818189896, "grad_norm": 0.05378088594277385, "learning_rate": 0.0009468649153417542, "loss": 0.6133, "step": 9190 }, { "epoch": 0.46680458427993043, "grad_norm": 0.03391320932942569, "learning_rate": 0.000946765493695655, "loss": 0.6205, "step": 9195 }, { "epoch": 0.46705842037796197, "grad_norm": 0.04140473437665591, "learning_rate": 0.0009466659843517176, "loss": 0.6276, "step": 9200 }, { "epoch": 0.46731225647599345, "grad_norm": 0.02569082334485342, "learning_rate": 0.0009465663873294747, "loss": 0.6059, "step": 9205 }, { "epoch": 0.4675660925740249, "grad_norm": 0.02578745353589086, "learning_rate": 0.0009464667026484774, "loss": 0.6109, "step": 9210 }, { "epoch": 0.46781992867205646, "grad_norm": 0.04347854360585143, "learning_rate": 0.0009463669303282927, "loss": 0.6338, "step": 9215 }, { "epoch": 0.46807376477008794, "grad_norm": 0.03554446657138465, "learning_rate": 0.0009462670703885054, "loss": 0.615, "step": 9220 }, { "epoch": 0.4683276008681195, "grad_norm": 0.035194592885897366, "learning_rate": 0.0009461671228487181, "loss": 0.6292, "step": 9225 }, { "epoch": 0.46858143696615095, "grad_norm": 0.048381105428334986, "learning_rate": 0.0009460670877285493, "loss": 0.6208, "step": 9230 }, { "epoch": 0.46883527306418243, "grad_norm": 0.029440014096268563, "learning_rate": 0.0009459669650476359, "loss": 0.6429, "step": 9235 }, { "epoch": 0.46908910916221397, "grad_norm": 0.032081756608336384, "learning_rate": 0.0009458667548256312, "loss": 0.6007, "step": 9240 }, { "epoch": 0.46934294526024545, "grad_norm": 0.029846212941765773, "learning_rate": 0.0009457664570822061, "loss": 0.6394, "step": 9245 }, { "epoch": 0.469596781358277, "grad_norm": 0.029316590912338028, "learning_rate": 0.0009456660718370484, "loss": 0.6067, "step": 9250 }, { "epoch": 0.46985061745630846, "grad_norm": 0.0239561828979098, "learning_rate": 0.0009455655991098635, "loss": 0.6099, "step": 9255 }, { "epoch": 0.47010445355433994, "grad_norm": 0.031644371771562194, "learning_rate": 0.0009454650389203735, "loss": 0.6135, "step": 9260 }, { "epoch": 0.4703582896523715, "grad_norm": 0.02960336906370349, "learning_rate": 0.0009453643912883179, "loss": 0.5989, "step": 9265 }, { "epoch": 0.47061212575040295, "grad_norm": 0.026335689130472083, "learning_rate": 0.0009452636562334532, "loss": 0.6412, "step": 9270 }, { "epoch": 0.4708659618484345, "grad_norm": 0.02648170596691074, "learning_rate": 0.0009451628337755533, "loss": 0.5987, "step": 9275 }, { "epoch": 0.47111979794646597, "grad_norm": 0.09832497965370343, "learning_rate": 0.0009450619239344094, "loss": 0.6353, "step": 9280 }, { "epoch": 0.47137363404449745, "grad_norm": 0.04865339323397357, "learning_rate": 0.0009449609267298292, "loss": 0.6184, "step": 9285 }, { "epoch": 0.471627470142529, "grad_norm": 0.0675079877924424, "learning_rate": 0.000944859842181638, "loss": 0.6434, "step": 9290 }, { "epoch": 0.47188130624056046, "grad_norm": 0.049608513841766726, "learning_rate": 0.0009447586703096784, "loss": 0.6152, "step": 9295 }, { "epoch": 0.472135142338592, "grad_norm": 0.027169536333829045, "learning_rate": 0.0009446574111338097, "loss": 0.6311, "step": 9300 }, { "epoch": 0.4723889784366235, "grad_norm": 0.03714479241574352, "learning_rate": 0.0009445560646739088, "loss": 0.6124, "step": 9305 }, { "epoch": 0.47264281453465495, "grad_norm": 0.02908329857098156, "learning_rate": 0.0009444546309498693, "loss": 0.5914, "step": 9310 }, { "epoch": 0.4728966506326865, "grad_norm": 0.040933687722797846, "learning_rate": 0.0009443531099816025, "loss": 0.6233, "step": 9315 }, { "epoch": 0.47315048673071797, "grad_norm": 0.039379070667730956, "learning_rate": 0.0009442515017890361, "loss": 0.6072, "step": 9320 }, { "epoch": 0.4734043228287495, "grad_norm": 0.029435204711081575, "learning_rate": 0.0009441498063921152, "loss": 0.6187, "step": 9325 }, { "epoch": 0.473658158926781, "grad_norm": 0.03446056203368723, "learning_rate": 0.0009440480238108025, "loss": 0.6397, "step": 9330 }, { "epoch": 0.47391199502481246, "grad_norm": 0.033665244711908196, "learning_rate": 0.000943946154065077, "loss": 0.6008, "step": 9335 }, { "epoch": 0.474165831122844, "grad_norm": 0.027286057953673223, "learning_rate": 0.0009438441971749354, "loss": 0.6321, "step": 9340 }, { "epoch": 0.4744196672208755, "grad_norm": 0.027252383518020805, "learning_rate": 0.0009437421531603916, "loss": 0.6052, "step": 9345 }, { "epoch": 0.474673503318907, "grad_norm": 0.0323130206125324, "learning_rate": 0.0009436400220414758, "loss": 0.5934, "step": 9350 }, { "epoch": 0.4749273394169385, "grad_norm": 0.05540094012390889, "learning_rate": 0.0009435378038382363, "loss": 0.6281, "step": 9355 }, { "epoch": 0.47518117551496997, "grad_norm": 0.026971796001700972, "learning_rate": 0.0009434354985707376, "loss": 0.5914, "step": 9360 }, { "epoch": 0.4754350116130015, "grad_norm": 0.028590512194596497, "learning_rate": 0.0009433331062590621, "loss": 0.5702, "step": 9365 }, { "epoch": 0.475688847711033, "grad_norm": 0.03025843533876914, "learning_rate": 0.0009432306269233087, "loss": 0.6067, "step": 9370 }, { "epoch": 0.47594268380906446, "grad_norm": 0.038050502244553336, "learning_rate": 0.0009431280605835937, "loss": 0.5976, "step": 9375 }, { "epoch": 0.476196519907096, "grad_norm": 0.02875268521706395, "learning_rate": 0.0009430254072600501, "loss": 0.6181, "step": 9380 }, { "epoch": 0.4764503560051275, "grad_norm": 0.03480214322443527, "learning_rate": 0.0009429226669728285, "loss": 0.5914, "step": 9385 }, { "epoch": 0.476704192103159, "grad_norm": 0.02601422654788008, "learning_rate": 0.0009428198397420964, "loss": 0.5903, "step": 9390 }, { "epoch": 0.4769580282011905, "grad_norm": 0.025876539373789892, "learning_rate": 0.0009427169255880379, "loss": 0.6328, "step": 9395 }, { "epoch": 0.47721186429922197, "grad_norm": 0.04738818931522865, "learning_rate": 0.0009426139245308548, "loss": 0.5819, "step": 9400 }, { "epoch": 0.4774657003972535, "grad_norm": 0.035112867568037956, "learning_rate": 0.0009425108365907658, "loss": 0.6039, "step": 9405 }, { "epoch": 0.477719536495285, "grad_norm": 0.03811522461096082, "learning_rate": 0.0009424076617880059, "loss": 0.5912, "step": 9410 }, { "epoch": 0.4779733725933165, "grad_norm": 0.03389803328245173, "learning_rate": 0.0009423044001428287, "loss": 0.5831, "step": 9415 }, { "epoch": 0.478227208691348, "grad_norm": 0.03133776562544537, "learning_rate": 0.0009422010516755034, "loss": 0.6577, "step": 9420 }, { "epoch": 0.4784810447893795, "grad_norm": 0.0269014540427202, "learning_rate": 0.0009420976164063169, "loss": 0.6213, "step": 9425 }, { "epoch": 0.478734880887411, "grad_norm": 0.02669607689703728, "learning_rate": 0.0009419940943555731, "loss": 0.6164, "step": 9430 }, { "epoch": 0.4789887169854425, "grad_norm": 0.026824612081548814, "learning_rate": 0.0009418904855435927, "loss": 0.6229, "step": 9435 }, { "epoch": 0.479242553083474, "grad_norm": 0.046154821680137924, "learning_rate": 0.0009417867899907138, "loss": 0.5931, "step": 9440 }, { "epoch": 0.4794963891815055, "grad_norm": 0.036033799620862, "learning_rate": 0.0009416830077172911, "loss": 0.6269, "step": 9445 }, { "epoch": 0.479750225279537, "grad_norm": 0.06210323273721612, "learning_rate": 0.0009415791387436968, "loss": 0.6021, "step": 9450 }, { "epoch": 0.4800040613775685, "grad_norm": 0.027549653199184822, "learning_rate": 0.0009414751830903195, "loss": 0.6554, "step": 9455 }, { "epoch": 0.4802578974756, "grad_norm": 0.03550040518296244, "learning_rate": 0.0009413711407775655, "loss": 0.6116, "step": 9460 }, { "epoch": 0.4805117335736315, "grad_norm": 0.025622868890069674, "learning_rate": 0.0009412670118258578, "loss": 0.6054, "step": 9465 }, { "epoch": 0.480765569671663, "grad_norm": 0.04189228398723917, "learning_rate": 0.0009411627962556359, "loss": 0.6122, "step": 9470 }, { "epoch": 0.4810194057696945, "grad_norm": 0.02570476986142007, "learning_rate": 0.0009410584940873574, "loss": 0.6176, "step": 9475 }, { "epoch": 0.481273241867726, "grad_norm": 0.07031584927871945, "learning_rate": 0.0009409541053414963, "loss": 0.5885, "step": 9480 }, { "epoch": 0.4815270779657575, "grad_norm": 0.03149142845263447, "learning_rate": 0.000940849630038543, "loss": 0.6325, "step": 9485 }, { "epoch": 0.48178091406378903, "grad_norm": 0.049358378222708374, "learning_rate": 0.0009407450681990061, "loss": 0.6283, "step": 9490 }, { "epoch": 0.4820347501618205, "grad_norm": 0.025008692305935783, "learning_rate": 0.0009406404198434102, "loss": 0.6001, "step": 9495 }, { "epoch": 0.482288586259852, "grad_norm": 0.030799405008239217, "learning_rate": 0.0009405356849922972, "loss": 0.638, "step": 9500 }, { "epoch": 0.4825424223578835, "grad_norm": 0.024246543364429107, "learning_rate": 0.0009404308636662264, "loss": 0.6356, "step": 9505 }, { "epoch": 0.482796258455915, "grad_norm": 0.03302243412784637, "learning_rate": 0.0009403259558857734, "loss": 0.6112, "step": 9510 }, { "epoch": 0.48305009455394654, "grad_norm": 0.023532175082063685, "learning_rate": 0.0009402209616715311, "loss": 0.585, "step": 9515 }, { "epoch": 0.483303930651978, "grad_norm": 0.04255227744929531, "learning_rate": 0.0009401158810441095, "loss": 0.6327, "step": 9520 }, { "epoch": 0.4835577667500095, "grad_norm": 0.024840322869465154, "learning_rate": 0.0009400107140241354, "loss": 0.6208, "step": 9525 }, { "epoch": 0.48381160284804103, "grad_norm": 0.024921227435088424, "learning_rate": 0.0009399054606322524, "loss": 0.6054, "step": 9530 }, { "epoch": 0.4840654389460725, "grad_norm": 0.031344853057544725, "learning_rate": 0.0009398001208891212, "loss": 0.5989, "step": 9535 }, { "epoch": 0.48431927504410405, "grad_norm": 0.14707366575213146, "learning_rate": 0.0009396946948154194, "loss": 0.6113, "step": 9540 }, { "epoch": 0.4845731111421355, "grad_norm": 0.02572297244175927, "learning_rate": 0.0009395891824318421, "loss": 0.6063, "step": 9545 }, { "epoch": 0.484826947240167, "grad_norm": 0.024708458838109962, "learning_rate": 0.0009394835837591004, "loss": 0.6199, "step": 9550 }, { "epoch": 0.48508078333819854, "grad_norm": 0.035794765917293574, "learning_rate": 0.0009393778988179229, "loss": 0.635, "step": 9555 }, { "epoch": 0.48533461943623, "grad_norm": 0.0554244840891106, "learning_rate": 0.0009392721276290549, "loss": 0.6169, "step": 9560 }, { "epoch": 0.48558845553426155, "grad_norm": 0.029279116423008678, "learning_rate": 0.0009391662702132591, "loss": 0.637, "step": 9565 }, { "epoch": 0.48584229163229303, "grad_norm": 0.028687460122576846, "learning_rate": 0.0009390603265913145, "loss": 0.6328, "step": 9570 }, { "epoch": 0.4860961277303245, "grad_norm": 0.03505406613441969, "learning_rate": 0.0009389542967840173, "loss": 0.5973, "step": 9575 }, { "epoch": 0.48634996382835605, "grad_norm": 0.041803466393629654, "learning_rate": 0.0009388481808121807, "loss": 0.599, "step": 9580 }, { "epoch": 0.4866037999263875, "grad_norm": 0.023902502916385335, "learning_rate": 0.0009387419786966348, "loss": 0.5804, "step": 9585 }, { "epoch": 0.486857636024419, "grad_norm": 0.04180433344415987, "learning_rate": 0.0009386356904582265, "loss": 0.6429, "step": 9590 }, { "epoch": 0.48711147212245054, "grad_norm": 0.037702560242762466, "learning_rate": 0.0009385293161178197, "loss": 0.6352, "step": 9595 }, { "epoch": 0.487365308220482, "grad_norm": 0.040454587767540365, "learning_rate": 0.0009384228556962949, "loss": 0.617, "step": 9600 }, { "epoch": 0.48761914431851355, "grad_norm": 0.051660525518630485, "learning_rate": 0.0009383163092145501, "loss": 0.6255, "step": 9605 }, { "epoch": 0.48787298041654503, "grad_norm": 0.03034861191132385, "learning_rate": 0.0009382096766934996, "loss": 0.6528, "step": 9610 }, { "epoch": 0.4881268165145765, "grad_norm": 0.03757127293588768, "learning_rate": 0.000938102958154075, "loss": 0.59, "step": 9615 }, { "epoch": 0.48838065261260805, "grad_norm": 0.07147380234071418, "learning_rate": 0.0009379961536172244, "loss": 0.6392, "step": 9620 }, { "epoch": 0.4886344887106395, "grad_norm": 0.026977337654578798, "learning_rate": 0.0009378892631039132, "loss": 0.6504, "step": 9625 }, { "epoch": 0.48888832480867106, "grad_norm": 0.04626336090648292, "learning_rate": 0.0009377822866351235, "loss": 0.651, "step": 9630 }, { "epoch": 0.48914216090670254, "grad_norm": 0.06597352026364534, "learning_rate": 0.000937675224231854, "loss": 0.6051, "step": 9635 }, { "epoch": 0.489395997004734, "grad_norm": 0.04524993145754299, "learning_rate": 0.0009375680759151206, "loss": 0.6247, "step": 9640 }, { "epoch": 0.48964983310276555, "grad_norm": 0.034280234478550185, "learning_rate": 0.0009374608417059562, "loss": 0.612, "step": 9645 }, { "epoch": 0.48990366920079703, "grad_norm": 0.02631497433619995, "learning_rate": 0.0009373535216254101, "loss": 0.612, "step": 9650 }, { "epoch": 0.49015750529882857, "grad_norm": 0.03832792024810497, "learning_rate": 0.0009372461156945489, "loss": 0.6249, "step": 9655 }, { "epoch": 0.49041134139686005, "grad_norm": 0.033364201876492666, "learning_rate": 0.0009371386239344557, "loss": 0.637, "step": 9660 }, { "epoch": 0.4906651774948915, "grad_norm": 0.03219096028559519, "learning_rate": 0.0009370310463662306, "loss": 0.6425, "step": 9665 }, { "epoch": 0.49091901359292306, "grad_norm": 0.024519457484542897, "learning_rate": 0.0009369233830109905, "loss": 0.5807, "step": 9670 }, { "epoch": 0.49117284969095454, "grad_norm": 0.0341270179716214, "learning_rate": 0.0009368156338898694, "loss": 0.6251, "step": 9675 }, { "epoch": 0.4914266857889861, "grad_norm": 0.024071390232175503, "learning_rate": 0.0009367077990240176, "loss": 0.5962, "step": 9680 }, { "epoch": 0.49168052188701755, "grad_norm": 0.02743756336479237, "learning_rate": 0.0009365998784346028, "loss": 0.6005, "step": 9685 }, { "epoch": 0.49193435798504903, "grad_norm": 0.04122709532915945, "learning_rate": 0.0009364918721428093, "loss": 0.5867, "step": 9690 }, { "epoch": 0.49218819408308057, "grad_norm": 0.02721674730288296, "learning_rate": 0.0009363837801698379, "loss": 0.62, "step": 9695 }, { "epoch": 0.49244203018111204, "grad_norm": 0.04100687725132822, "learning_rate": 0.0009362756025369067, "loss": 0.6184, "step": 9700 }, { "epoch": 0.4926958662791436, "grad_norm": 0.023552301564775294, "learning_rate": 0.0009361673392652505, "loss": 0.5772, "step": 9705 }, { "epoch": 0.49294970237717506, "grad_norm": 0.043765850357777386, "learning_rate": 0.0009360589903761208, "loss": 0.5763, "step": 9710 }, { "epoch": 0.49320353847520654, "grad_norm": 0.0377192420219829, "learning_rate": 0.0009359505558907857, "loss": 0.5907, "step": 9715 }, { "epoch": 0.4934573745732381, "grad_norm": 0.04173445599728607, "learning_rate": 0.0009358420358305307, "loss": 0.6209, "step": 9720 }, { "epoch": 0.49371121067126955, "grad_norm": 0.02795345786794999, "learning_rate": 0.0009357334302166577, "loss": 0.6139, "step": 9725 }, { "epoch": 0.4939650467693011, "grad_norm": 0.030534586657452248, "learning_rate": 0.0009356247390704853, "loss": 0.6334, "step": 9730 }, { "epoch": 0.49421888286733257, "grad_norm": 0.024191524057541644, "learning_rate": 0.0009355159624133489, "loss": 0.5692, "step": 9735 }, { "epoch": 0.49447271896536404, "grad_norm": 0.03323609638954307, "learning_rate": 0.0009354071002666011, "loss": 0.6106, "step": 9740 }, { "epoch": 0.4947265550633956, "grad_norm": 0.024949282216645566, "learning_rate": 0.000935298152651611, "loss": 0.5978, "step": 9745 }, { "epoch": 0.49498039116142706, "grad_norm": 0.06663669858744471, "learning_rate": 0.0009351891195897644, "loss": 0.623, "step": 9750 }, { "epoch": 0.4952342272594586, "grad_norm": 0.032099026906883724, "learning_rate": 0.0009350800011024636, "loss": 0.6189, "step": 9755 }, { "epoch": 0.49548806335749007, "grad_norm": 0.0663218764773123, "learning_rate": 0.0009349707972111285, "loss": 0.6074, "step": 9760 }, { "epoch": 0.49574189945552155, "grad_norm": 0.025197965203499328, "learning_rate": 0.0009348615079371952, "loss": 0.5815, "step": 9765 }, { "epoch": 0.4959957355535531, "grad_norm": 0.04923718941099546, "learning_rate": 0.0009347521333021165, "loss": 0.6104, "step": 9770 }, { "epoch": 0.49624957165158456, "grad_norm": 0.024233669045321673, "learning_rate": 0.000934642673327362, "loss": 0.5966, "step": 9775 }, { "epoch": 0.4965034077496161, "grad_norm": 0.02630064833029989, "learning_rate": 0.0009345331280344184, "loss": 0.6308, "step": 9780 }, { "epoch": 0.4967572438476476, "grad_norm": 0.03733420897200894, "learning_rate": 0.0009344234974447888, "loss": 0.5984, "step": 9785 }, { "epoch": 0.49701107994567906, "grad_norm": 0.03288604787671546, "learning_rate": 0.0009343137815799931, "loss": 0.6278, "step": 9790 }, { "epoch": 0.4972649160437106, "grad_norm": 0.02870697067749721, "learning_rate": 0.000934203980461568, "loss": 0.5907, "step": 9795 }, { "epoch": 0.49751875214174207, "grad_norm": 0.029210706904460447, "learning_rate": 0.0009340940941110669, "loss": 0.623, "step": 9800 }, { "epoch": 0.4977725882397736, "grad_norm": 0.022650840666329053, "learning_rate": 0.00093398412255006, "loss": 0.585, "step": 9805 }, { "epoch": 0.4980264243378051, "grad_norm": 0.033968761190204845, "learning_rate": 0.000933874065800134, "loss": 0.6272, "step": 9810 }, { "epoch": 0.49828026043583656, "grad_norm": 0.03225151989127355, "learning_rate": 0.0009337639238828927, "loss": 0.5957, "step": 9815 }, { "epoch": 0.4985340965338681, "grad_norm": 0.03766789630190195, "learning_rate": 0.0009336536968199562, "loss": 0.5961, "step": 9820 }, { "epoch": 0.4987879326318996, "grad_norm": 0.024291519990984625, "learning_rate": 0.0009335433846329618, "loss": 0.5848, "step": 9825 }, { "epoch": 0.49904176872993106, "grad_norm": 1.1081797676644811, "learning_rate": 0.000933432987343563, "loss": 0.6314, "step": 9830 }, { "epoch": 0.4992956048279626, "grad_norm": 0.06216658843899505, "learning_rate": 0.0009333225049734303, "loss": 0.6137, "step": 9835 }, { "epoch": 0.49954944092599407, "grad_norm": 0.06842641946547982, "learning_rate": 0.0009332119375442509, "loss": 0.6494, "step": 9840 }, { "epoch": 0.4998032770240256, "grad_norm": 0.07771247073389582, "learning_rate": 0.0009331012850777286, "loss": 0.6461, "step": 9845 }, { "epoch": 0.5000571131220571, "grad_norm": 0.09586623316765225, "learning_rate": 0.0009329905475955838, "loss": 0.6127, "step": 9850 }, { "epoch": 0.5003109492200886, "grad_norm": 0.0549907807273626, "learning_rate": 0.0009328797251195539, "loss": 0.6336, "step": 9855 }, { "epoch": 0.5005647853181201, "grad_norm": 0.03707374666368335, "learning_rate": 0.0009327688176713927, "loss": 0.6643, "step": 9860 }, { "epoch": 0.5008186214161516, "grad_norm": 0.06451159664108401, "learning_rate": 0.0009326578252728708, "loss": 0.6221, "step": 9865 }, { "epoch": 0.5010724575141831, "grad_norm": 0.07176924497990926, "learning_rate": 0.0009325467479457754, "loss": 0.6478, "step": 9870 }, { "epoch": 0.5013262936122146, "grad_norm": 0.05337950431785672, "learning_rate": 0.0009324355857119106, "loss": 0.6161, "step": 9875 }, { "epoch": 0.5015801297102461, "grad_norm": 0.040267440416152635, "learning_rate": 0.0009323243385930968, "loss": 0.6131, "step": 9880 }, { "epoch": 0.5018339658082775, "grad_norm": 0.03271936843258226, "learning_rate": 0.0009322130066111713, "loss": 0.662, "step": 9885 }, { "epoch": 0.5020878019063091, "grad_norm": 0.029561308985711136, "learning_rate": 0.0009321015897879883, "loss": 0.6276, "step": 9890 }, { "epoch": 0.5023416380043406, "grad_norm": 0.053789724310795484, "learning_rate": 0.0009319900881454179, "loss": 0.6598, "step": 9895 }, { "epoch": 0.5025954741023722, "grad_norm": 0.040676726302224304, "learning_rate": 0.0009318785017053475, "loss": 0.6075, "step": 9900 }, { "epoch": 0.5028493102004036, "grad_norm": 0.04707911068377355, "learning_rate": 0.0009317668304896811, "loss": 0.6384, "step": 9905 }, { "epoch": 0.5031031462984351, "grad_norm": 0.02729704500716682, "learning_rate": 0.000931655074520339, "loss": 0.6042, "step": 9910 }, { "epoch": 0.5033569823964666, "grad_norm": 0.041817519847404235, "learning_rate": 0.0009315432338192584, "loss": 0.6407, "step": 9915 }, { "epoch": 0.5036108184944981, "grad_norm": 0.06693004074647073, "learning_rate": 0.0009314313084083933, "loss": 0.6267, "step": 9920 }, { "epoch": 0.5038646545925296, "grad_norm": 0.037734517481965976, "learning_rate": 0.0009313192983097137, "loss": 0.6235, "step": 9925 }, { "epoch": 0.5041184906905611, "grad_norm": 0.04441041670706489, "learning_rate": 0.0009312072035452069, "loss": 0.6722, "step": 9930 }, { "epoch": 0.5043723267885926, "grad_norm": 0.06966465373395124, "learning_rate": 0.0009310950241368765, "loss": 0.6524, "step": 9935 }, { "epoch": 0.5046261628866241, "grad_norm": 0.11851335463883052, "learning_rate": 0.0009309827601067428, "loss": 0.6386, "step": 9940 }, { "epoch": 0.5048799989846556, "grad_norm": 0.044597706923391586, "learning_rate": 0.0009308704114768425, "loss": 0.6495, "step": 9945 }, { "epoch": 0.505133835082687, "grad_norm": 0.048673205080514974, "learning_rate": 0.0009307579782692291, "loss": 0.6183, "step": 9950 }, { "epoch": 0.5053876711807186, "grad_norm": 0.03281635254658095, "learning_rate": 0.0009306454605059729, "loss": 0.6426, "step": 9955 }, { "epoch": 0.5056415072787501, "grad_norm": 0.046125277826759015, "learning_rate": 0.0009305328582091603, "loss": 0.6343, "step": 9960 }, { "epoch": 0.5058953433767817, "grad_norm": 0.03821293529358294, "learning_rate": 0.0009304201714008948, "loss": 0.6326, "step": 9965 }, { "epoch": 0.5061491794748131, "grad_norm": 0.03994101386720053, "learning_rate": 0.0009303074001032961, "loss": 0.6243, "step": 9970 }, { "epoch": 0.5064030155728446, "grad_norm": 0.04022586000406283, "learning_rate": 0.0009301945443385007, "loss": 0.6491, "step": 9975 }, { "epoch": 0.5066568516708762, "grad_norm": 0.045502011916775865, "learning_rate": 0.0009300816041286617, "loss": 0.6329, "step": 9980 }, { "epoch": 0.5069106877689076, "grad_norm": 0.031196383196199537, "learning_rate": 0.0009299685794959485, "loss": 0.6071, "step": 9985 }, { "epoch": 0.5071645238669391, "grad_norm": 0.05198326476515164, "learning_rate": 0.0009298554704625474, "loss": 0.6269, "step": 9990 }, { "epoch": 0.5074183599649706, "grad_norm": 0.044150229220093255, "learning_rate": 0.0009297422770506613, "loss": 0.6137, "step": 9995 }, { "epoch": 0.5076721960630021, "grad_norm": 0.03554285265944353, "learning_rate": 0.0009296289992825091, "loss": 0.6603, "step": 10000 }, { "epoch": 0.5079260321610336, "grad_norm": 0.02972651584300317, "learning_rate": 0.0009295156371803271, "loss": 0.6034, "step": 10005 }, { "epoch": 0.5081798682590651, "grad_norm": 0.04646869581333915, "learning_rate": 0.0009294021907663674, "loss": 0.6213, "step": 10010 }, { "epoch": 0.5084337043570967, "grad_norm": 0.04166905651002559, "learning_rate": 0.0009292886600628991, "loss": 0.6103, "step": 10015 }, { "epoch": 0.5086875404551281, "grad_norm": 0.05922446812891187, "learning_rate": 0.0009291750450922078, "loss": 0.6225, "step": 10020 }, { "epoch": 0.5089413765531596, "grad_norm": 0.024191658630679954, "learning_rate": 0.0009290613458765953, "loss": 0.6064, "step": 10025 }, { "epoch": 0.5091952126511912, "grad_norm": 0.06362952277601344, "learning_rate": 0.0009289475624383804, "loss": 0.6077, "step": 10030 }, { "epoch": 0.5094490487492226, "grad_norm": 0.040791426024977054, "learning_rate": 0.0009288336947998981, "loss": 0.6121, "step": 10035 }, { "epoch": 0.5097028848472541, "grad_norm": 0.07317831667654075, "learning_rate": 0.0009287197429835002, "loss": 0.6198, "step": 10040 }, { "epoch": 0.5099567209452857, "grad_norm": 0.031549492144105, "learning_rate": 0.0009286057070115545, "loss": 0.6359, "step": 10045 }, { "epoch": 0.5102105570433171, "grad_norm": 0.047125852190035775, "learning_rate": 0.0009284915869064463, "loss": 0.6185, "step": 10050 }, { "epoch": 0.5104643931413486, "grad_norm": 0.03542009861223921, "learning_rate": 0.0009283773826905764, "loss": 0.6187, "step": 10055 }, { "epoch": 0.5107182292393802, "grad_norm": 0.026666894145488004, "learning_rate": 0.0009282630943863625, "loss": 0.6106, "step": 10060 }, { "epoch": 0.5109720653374117, "grad_norm": 0.05545034501844204, "learning_rate": 0.0009281487220162388, "loss": 0.6283, "step": 10065 }, { "epoch": 0.5112259014354431, "grad_norm": 0.02963889637887912, "learning_rate": 0.0009280342656026564, "loss": 0.6136, "step": 10070 }, { "epoch": 0.5114797375334746, "grad_norm": 0.05901358817696739, "learning_rate": 0.0009279197251680822, "loss": 0.6145, "step": 10075 }, { "epoch": 0.5117335736315062, "grad_norm": 0.040264054246851005, "learning_rate": 0.000927805100735, "loss": 0.6421, "step": 10080 }, { "epoch": 0.5119874097295376, "grad_norm": 0.0649111774735333, "learning_rate": 0.0009276903923259099, "loss": 0.6036, "step": 10085 }, { "epoch": 0.5122412458275691, "grad_norm": 0.037817792224739064, "learning_rate": 0.0009275755999633286, "loss": 0.6248, "step": 10090 }, { "epoch": 0.5124950819256007, "grad_norm": 0.03397505282281072, "learning_rate": 0.0009274607236697895, "loss": 0.612, "step": 10095 }, { "epoch": 0.5127489180236321, "grad_norm": 0.05208015537649028, "learning_rate": 0.000927345763467842, "loss": 0.6096, "step": 10100 }, { "epoch": 0.5130027541216636, "grad_norm": 0.08839766979888053, "learning_rate": 0.0009272307193800524, "loss": 0.6074, "step": 10105 }, { "epoch": 0.5132565902196952, "grad_norm": 0.03841799364281357, "learning_rate": 0.000927115591429003, "loss": 0.6152, "step": 10110 }, { "epoch": 0.5135104263177267, "grad_norm": 0.03045633762227496, "learning_rate": 0.0009270003796372933, "loss": 0.6044, "step": 10115 }, { "epoch": 0.5137642624157581, "grad_norm": 0.02569055323506306, "learning_rate": 0.0009268850840275382, "loss": 0.6355, "step": 10120 }, { "epoch": 0.5140180985137897, "grad_norm": 0.026408037229659734, "learning_rate": 0.0009267697046223702, "loss": 0.6462, "step": 10125 }, { "epoch": 0.5142719346118212, "grad_norm": 0.025909580463186872, "learning_rate": 0.0009266542414444374, "loss": 0.5864, "step": 10130 }, { "epoch": 0.5145257707098526, "grad_norm": 0.03722734324648191, "learning_rate": 0.0009265386945164049, "loss": 0.6161, "step": 10135 }, { "epoch": 0.5147796068078841, "grad_norm": 0.03566709272968282, "learning_rate": 0.0009264230638609535, "loss": 0.6085, "step": 10140 }, { "epoch": 0.5150334429059157, "grad_norm": 0.06277602947724216, "learning_rate": 0.0009263073495007814, "loss": 0.6338, "step": 10145 }, { "epoch": 0.5152872790039471, "grad_norm": 0.058049067673817435, "learning_rate": 0.0009261915514586026, "loss": 0.6555, "step": 10150 }, { "epoch": 0.5155411151019786, "grad_norm": 0.03795674743934634, "learning_rate": 0.0009260756697571477, "loss": 0.6693, "step": 10155 }, { "epoch": 0.5157949512000102, "grad_norm": 0.04777864042916011, "learning_rate": 0.0009259597044191636, "loss": 0.6382, "step": 10160 }, { "epoch": 0.5160487872980416, "grad_norm": 0.03739573996244197, "learning_rate": 0.0009258436554674137, "loss": 0.638, "step": 10165 }, { "epoch": 0.5163026233960731, "grad_norm": 0.028436191862654784, "learning_rate": 0.000925727522924678, "loss": 0.6003, "step": 10170 }, { "epoch": 0.5165564594941047, "grad_norm": 0.03204307790523904, "learning_rate": 0.0009256113068137526, "loss": 0.6642, "step": 10175 }, { "epoch": 0.5168102955921362, "grad_norm": 0.055050382059251654, "learning_rate": 0.0009254950071574502, "loss": 0.6275, "step": 10180 }, { "epoch": 0.5170641316901676, "grad_norm": 0.026441208032810616, "learning_rate": 0.0009253786239785999, "loss": 0.6071, "step": 10185 }, { "epoch": 0.5173179677881992, "grad_norm": 0.035768658322057015, "learning_rate": 0.0009252621573000472, "loss": 0.6499, "step": 10190 }, { "epoch": 0.5175718038862307, "grad_norm": 0.024019701385016478, "learning_rate": 0.0009251456071446536, "loss": 0.5909, "step": 10195 }, { "epoch": 0.5178256399842621, "grad_norm": 0.0261249278412097, "learning_rate": 0.0009250289735352975, "loss": 0.6388, "step": 10200 }, { "epoch": 0.5180794760822937, "grad_norm": 0.05688851509224552, "learning_rate": 0.0009249122564948736, "loss": 0.6392, "step": 10205 }, { "epoch": 0.5183333121803252, "grad_norm": 0.02773345047952887, "learning_rate": 0.0009247954560462928, "loss": 0.6311, "step": 10210 }, { "epoch": 0.5185871482783566, "grad_norm": 0.0270974270992603, "learning_rate": 0.0009246785722124823, "loss": 0.6285, "step": 10215 }, { "epoch": 0.5188409843763881, "grad_norm": 0.03094525704725652, "learning_rate": 0.0009245616050163861, "loss": 0.6084, "step": 10220 }, { "epoch": 0.5190948204744197, "grad_norm": 0.0302889231805603, "learning_rate": 0.000924444554480964, "loss": 0.6384, "step": 10225 }, { "epoch": 0.5193486565724512, "grad_norm": 0.053230977077894856, "learning_rate": 0.0009243274206291926, "loss": 0.5947, "step": 10230 }, { "epoch": 0.5196024926704826, "grad_norm": 0.06540135430875998, "learning_rate": 0.0009242102034840647, "loss": 0.6397, "step": 10235 }, { "epoch": 0.5198563287685142, "grad_norm": 0.026623481542029606, "learning_rate": 0.0009240929030685893, "loss": 0.6642, "step": 10240 }, { "epoch": 0.5201101648665457, "grad_norm": 0.034876098813892115, "learning_rate": 0.0009239755194057921, "loss": 0.6457, "step": 10245 }, { "epoch": 0.5203640009645771, "grad_norm": 0.03378616222313645, "learning_rate": 0.0009238580525187146, "loss": 0.5793, "step": 10250 }, { "epoch": 0.5206178370626087, "grad_norm": 0.1052756526973527, "learning_rate": 0.0009237405024304153, "loss": 0.6458, "step": 10255 }, { "epoch": 0.5208716731606402, "grad_norm": 0.03839663563882219, "learning_rate": 0.0009236228691639686, "loss": 0.665, "step": 10260 }, { "epoch": 0.5211255092586716, "grad_norm": 0.033097916077724686, "learning_rate": 0.0009235051527424652, "loss": 0.6032, "step": 10265 }, { "epoch": 0.5213793453567032, "grad_norm": 0.04114159903818375, "learning_rate": 0.0009233873531890123, "loss": 0.6592, "step": 10270 }, { "epoch": 0.5216331814547347, "grad_norm": 0.059483637355810894, "learning_rate": 0.0009232694705267335, "loss": 0.6168, "step": 10275 }, { "epoch": 0.5218870175527662, "grad_norm": 0.03912192748754646, "learning_rate": 0.0009231515047787686, "loss": 0.6632, "step": 10280 }, { "epoch": 0.5221408536507977, "grad_norm": 0.03455355947863813, "learning_rate": 0.0009230334559682734, "loss": 0.6174, "step": 10285 }, { "epoch": 0.5223946897488292, "grad_norm": 0.06201615106125605, "learning_rate": 0.0009229153241184204, "loss": 0.6291, "step": 10290 }, { "epoch": 0.5226485258468607, "grad_norm": 0.03866401919091889, "learning_rate": 0.0009227971092523983, "loss": 0.6441, "step": 10295 }, { "epoch": 0.5229023619448921, "grad_norm": 0.04274992026640168, "learning_rate": 0.0009226788113934123, "loss": 0.6226, "step": 10300 }, { "epoch": 0.5231561980429237, "grad_norm": 0.050672917058466604, "learning_rate": 0.0009225604305646835, "loss": 0.6336, "step": 10305 }, { "epoch": 0.5234100341409552, "grad_norm": 0.03842833678462949, "learning_rate": 0.0009224419667894495, "loss": 0.6085, "step": 10310 }, { "epoch": 0.5236638702389866, "grad_norm": 0.038015665579822645, "learning_rate": 0.000922323420090964, "loss": 0.6214, "step": 10315 }, { "epoch": 0.5239177063370182, "grad_norm": 0.03004579581769112, "learning_rate": 0.0009222047904924975, "loss": 0.5919, "step": 10320 }, { "epoch": 0.5241715424350497, "grad_norm": 0.03190679818928855, "learning_rate": 0.000922086078017336, "loss": 0.6159, "step": 10325 }, { "epoch": 0.5244253785330812, "grad_norm": 0.029075752083056672, "learning_rate": 0.0009219672826887824, "loss": 0.5941, "step": 10330 }, { "epoch": 0.5246792146311127, "grad_norm": 0.034715107108026645, "learning_rate": 0.0009218484045301554, "loss": 0.6209, "step": 10335 }, { "epoch": 0.5249330507291442, "grad_norm": 0.02962263191628107, "learning_rate": 0.0009217294435647905, "loss": 0.6439, "step": 10340 }, { "epoch": 0.5251868868271757, "grad_norm": 0.0357418067321709, "learning_rate": 0.0009216103998160389, "loss": 0.6227, "step": 10345 }, { "epoch": 0.5254407229252072, "grad_norm": 0.05508757844826437, "learning_rate": 0.0009214912733072685, "loss": 0.618, "step": 10350 }, { "epoch": 0.5256945590232387, "grad_norm": 0.04548692629351668, "learning_rate": 0.0009213720640618631, "loss": 0.6661, "step": 10355 }, { "epoch": 0.5259483951212702, "grad_norm": 0.03260375865114965, "learning_rate": 0.0009212527721032226, "loss": 0.6416, "step": 10360 }, { "epoch": 0.5262022312193017, "grad_norm": 0.029374475221709984, "learning_rate": 0.000921133397454764, "loss": 0.5965, "step": 10365 }, { "epoch": 0.5264560673173332, "grad_norm": 0.025766929716711365, "learning_rate": 0.0009210139401399197, "loss": 0.64, "step": 10370 }, { "epoch": 0.5267099034153647, "grad_norm": 0.028038442454314576, "learning_rate": 0.0009208944001821384, "loss": 0.6088, "step": 10375 }, { "epoch": 0.5269637395133961, "grad_norm": 0.052489144282257295, "learning_rate": 0.0009207747776048855, "loss": 0.6408, "step": 10380 }, { "epoch": 0.5272175756114277, "grad_norm": 0.03444955545077113, "learning_rate": 0.000920655072431642, "loss": 0.5995, "step": 10385 }, { "epoch": 0.5274714117094592, "grad_norm": 0.03088222308172369, "learning_rate": 0.0009205352846859056, "loss": 0.6194, "step": 10390 }, { "epoch": 0.5277252478074907, "grad_norm": 0.027998757741213697, "learning_rate": 0.0009204154143911903, "loss": 0.63, "step": 10395 }, { "epoch": 0.5279790839055222, "grad_norm": 0.04158454624606828, "learning_rate": 0.0009202954615710256, "loss": 0.5983, "step": 10400 }, { "epoch": 0.5282329200035537, "grad_norm": 0.0453153646691045, "learning_rate": 0.0009201754262489575, "loss": 0.6328, "step": 10405 }, { "epoch": 0.5284867561015852, "grad_norm": 0.04473664568912869, "learning_rate": 0.0009200553084485491, "loss": 0.6301, "step": 10410 }, { "epoch": 0.5287405921996167, "grad_norm": 0.0382431324349534, "learning_rate": 0.0009199351081933781, "loss": 0.6, "step": 10415 }, { "epoch": 0.5289944282976482, "grad_norm": 0.039893358245096724, "learning_rate": 0.0009198148255070398, "loss": 0.5886, "step": 10420 }, { "epoch": 0.5292482643956797, "grad_norm": 0.058211501320811744, "learning_rate": 0.0009196944604131448, "loss": 0.5985, "step": 10425 }, { "epoch": 0.5295021004937112, "grad_norm": 0.03851407689786308, "learning_rate": 0.0009195740129353202, "loss": 0.5908, "step": 10430 }, { "epoch": 0.5297559365917427, "grad_norm": 0.038851954860420396, "learning_rate": 0.0009194534830972092, "loss": 0.6391, "step": 10435 }, { "epoch": 0.5300097726897742, "grad_norm": 0.026937948392526747, "learning_rate": 0.0009193328709224714, "loss": 0.6123, "step": 10440 }, { "epoch": 0.5302636087878058, "grad_norm": 0.036109446414125626, "learning_rate": 0.0009192121764347822, "loss": 0.5954, "step": 10445 }, { "epoch": 0.5305174448858372, "grad_norm": 0.03316804876731099, "learning_rate": 0.0009190913996578334, "loss": 0.6145, "step": 10450 }, { "epoch": 0.5307712809838687, "grad_norm": 0.06849212460462101, "learning_rate": 0.000918970540615333, "loss": 0.613, "step": 10455 }, { "epoch": 0.5310251170819003, "grad_norm": 0.03323906997049166, "learning_rate": 0.0009188495993310046, "loss": 0.6341, "step": 10460 }, { "epoch": 0.5312789531799317, "grad_norm": 0.03941007227631917, "learning_rate": 0.0009187285758285889, "loss": 0.5978, "step": 10465 }, { "epoch": 0.5315327892779632, "grad_norm": 0.02635949534355945, "learning_rate": 0.0009186074701318419, "loss": 0.5906, "step": 10470 }, { "epoch": 0.5317866253759947, "grad_norm": 0.04078016260737411, "learning_rate": 0.0009184862822645359, "loss": 0.6263, "step": 10475 }, { "epoch": 0.5320404614740262, "grad_norm": 0.057867925606058655, "learning_rate": 0.0009183650122504598, "loss": 0.6272, "step": 10480 }, { "epoch": 0.5322942975720577, "grad_norm": 0.037243395752825356, "learning_rate": 0.0009182436601134184, "loss": 0.6025, "step": 10485 }, { "epoch": 0.5325481336700892, "grad_norm": 0.04821834594916435, "learning_rate": 0.0009181222258772319, "loss": 0.6018, "step": 10490 }, { "epoch": 0.5328019697681208, "grad_norm": 0.028085738317345087, "learning_rate": 0.0009180007095657379, "loss": 0.5925, "step": 10495 }, { "epoch": 0.5330558058661522, "grad_norm": 0.036071988511367975, "learning_rate": 0.0009178791112027891, "loss": 0.6027, "step": 10500 }, { "epoch": 0.5333096419641837, "grad_norm": 0.02739881003703629, "learning_rate": 0.0009177574308122547, "loss": 0.6641, "step": 10505 }, { "epoch": 0.5335634780622153, "grad_norm": 0.02822235924487654, "learning_rate": 0.00091763566841802, "loss": 0.6287, "step": 10510 }, { "epoch": 0.5338173141602467, "grad_norm": 0.024121670310468826, "learning_rate": 0.0009175138240439864, "loss": 0.5854, "step": 10515 }, { "epoch": 0.5340711502582782, "grad_norm": 0.025342117126993798, "learning_rate": 0.0009173918977140713, "loss": 0.5713, "step": 10520 }, { "epoch": 0.5343249863563098, "grad_norm": 0.027321368031964118, "learning_rate": 0.0009172698894522082, "loss": 0.6106, "step": 10525 }, { "epoch": 0.5345788224543412, "grad_norm": 0.03343234624185458, "learning_rate": 0.0009171477992823467, "loss": 0.6268, "step": 10530 }, { "epoch": 0.5348326585523727, "grad_norm": 0.04732716404477403, "learning_rate": 0.0009170256272284525, "loss": 0.5807, "step": 10535 }, { "epoch": 0.5350864946504043, "grad_norm": 0.02840525748237608, "learning_rate": 0.0009169033733145074, "loss": 0.6045, "step": 10540 }, { "epoch": 0.5353403307484358, "grad_norm": 0.027475221513180086, "learning_rate": 0.0009167810375645091, "loss": 0.6323, "step": 10545 }, { "epoch": 0.5355941668464672, "grad_norm": 0.025750551058873007, "learning_rate": 0.0009166586200024717, "loss": 0.5838, "step": 10550 }, { "epoch": 0.5358480029444987, "grad_norm": 0.03765393114821948, "learning_rate": 0.000916536120652425, "loss": 0.6147, "step": 10555 }, { "epoch": 0.5361018390425303, "grad_norm": 0.03715730591665516, "learning_rate": 0.0009164135395384151, "loss": 0.6005, "step": 10560 }, { "epoch": 0.5363556751405617, "grad_norm": 0.03645041602808113, "learning_rate": 0.0009162908766845041, "loss": 0.5842, "step": 10565 }, { "epoch": 0.5366095112385932, "grad_norm": 0.03100263105448387, "learning_rate": 0.00091616813211477, "loss": 0.5594, "step": 10570 }, { "epoch": 0.5368633473366248, "grad_norm": 0.04226102686913694, "learning_rate": 0.0009160453058533071, "loss": 0.6151, "step": 10575 }, { "epoch": 0.5371171834346562, "grad_norm": 0.02589182283128981, "learning_rate": 0.0009159223979242253, "loss": 0.614, "step": 10580 }, { "epoch": 0.5373710195326877, "grad_norm": 0.028467877248238536, "learning_rate": 0.0009157994083516511, "loss": 0.6492, "step": 10585 }, { "epoch": 0.5376248556307193, "grad_norm": 0.03135996868398204, "learning_rate": 0.0009156763371597266, "loss": 0.6065, "step": 10590 }, { "epoch": 0.5378786917287507, "grad_norm": 0.040987082480228504, "learning_rate": 0.0009155531843726101, "loss": 0.6084, "step": 10595 }, { "epoch": 0.5381325278267822, "grad_norm": 0.029712117528617986, "learning_rate": 0.0009154299500144758, "loss": 0.6086, "step": 10600 }, { "epoch": 0.5383863639248138, "grad_norm": 0.05504579201351947, "learning_rate": 0.0009153066341095142, "loss": 0.6395, "step": 10605 }, { "epoch": 0.5386402000228453, "grad_norm": 0.030518737256477527, "learning_rate": 0.0009151832366819314, "loss": 0.6192, "step": 10610 }, { "epoch": 0.5388940361208767, "grad_norm": 0.029879557984338494, "learning_rate": 0.0009150597577559496, "loss": 0.668, "step": 10615 }, { "epoch": 0.5391478722189083, "grad_norm": 0.03153018920722057, "learning_rate": 0.0009149361973558075, "loss": 0.5801, "step": 10620 }, { "epoch": 0.5394017083169398, "grad_norm": 0.030512110696911215, "learning_rate": 0.000914812555505759, "loss": 0.5942, "step": 10625 }, { "epoch": 0.5396555444149712, "grad_norm": 0.041125863080099585, "learning_rate": 0.0009146888322300745, "loss": 0.6002, "step": 10630 }, { "epoch": 0.5399093805130027, "grad_norm": 0.054165606681991184, "learning_rate": 0.0009145650275530404, "loss": 0.5951, "step": 10635 }, { "epoch": 0.5401632166110343, "grad_norm": 0.029366439611697406, "learning_rate": 0.0009144411414989587, "loss": 0.5834, "step": 10640 }, { "epoch": 0.5404170527090657, "grad_norm": 0.04321665819488211, "learning_rate": 0.0009143171740921479, "loss": 0.61, "step": 10645 }, { "epoch": 0.5406708888070972, "grad_norm": 0.02466718128736207, "learning_rate": 0.0009141931253569418, "loss": 0.6155, "step": 10650 }, { "epoch": 0.5409247249051288, "grad_norm": 0.025734399782198193, "learning_rate": 0.000914068995317691, "loss": 0.6249, "step": 10655 }, { "epoch": 0.5411785610031603, "grad_norm": 0.030956416519900224, "learning_rate": 0.0009139447839987613, "loss": 0.588, "step": 10660 }, { "epoch": 0.5414323971011917, "grad_norm": 0.03328165539133208, "learning_rate": 0.0009138204914245347, "loss": 0.5812, "step": 10665 }, { "epoch": 0.5416862331992233, "grad_norm": 0.03418976866258275, "learning_rate": 0.0009136961176194094, "loss": 0.6126, "step": 10670 }, { "epoch": 0.5419400692972548, "grad_norm": 0.060922163184233144, "learning_rate": 0.0009135716626077994, "loss": 0.5858, "step": 10675 }, { "epoch": 0.5421939053952862, "grad_norm": 0.0316654793096513, "learning_rate": 0.0009134471264141345, "loss": 0.6172, "step": 10680 }, { "epoch": 0.5424477414933178, "grad_norm": 0.033052227554561796, "learning_rate": 0.0009133225090628605, "loss": 0.6176, "step": 10685 }, { "epoch": 0.5427015775913493, "grad_norm": 0.034867711839285526, "learning_rate": 0.0009131978105784394, "loss": 0.6441, "step": 10690 }, { "epoch": 0.5429554136893807, "grad_norm": 0.03242440613542117, "learning_rate": 0.0009130730309853483, "loss": 0.5871, "step": 10695 }, { "epoch": 0.5432092497874123, "grad_norm": 0.02846877585722155, "learning_rate": 0.0009129481703080816, "loss": 0.636, "step": 10700 }, { "epoch": 0.5434630858854438, "grad_norm": 0.03968673666286646, "learning_rate": 0.0009128232285711482, "loss": 0.6109, "step": 10705 }, { "epoch": 0.5437169219834753, "grad_norm": 0.025092489100120463, "learning_rate": 0.0009126982057990738, "loss": 0.6156, "step": 10710 }, { "epoch": 0.5439707580815067, "grad_norm": 0.03529191550732015, "learning_rate": 0.0009125731020163998, "loss": 0.5723, "step": 10715 }, { "epoch": 0.5442245941795383, "grad_norm": 0.027050926910876526, "learning_rate": 0.0009124479172476833, "loss": 0.6045, "step": 10720 }, { "epoch": 0.5444784302775698, "grad_norm": 0.07622157764619372, "learning_rate": 0.0009123226515174976, "loss": 0.5689, "step": 10725 }, { "epoch": 0.5447322663756012, "grad_norm": 0.02751158017548396, "learning_rate": 0.0009121973048504316, "loss": 0.6213, "step": 10730 }, { "epoch": 0.5449861024736328, "grad_norm": 0.049460936685531046, "learning_rate": 0.0009120718772710903, "loss": 0.6061, "step": 10735 }, { "epoch": 0.5452399385716643, "grad_norm": 0.03544395471579363, "learning_rate": 0.0009119463688040945, "loss": 0.5933, "step": 10740 }, { "epoch": 0.5454937746696957, "grad_norm": 0.03400989240869563, "learning_rate": 0.0009118207794740809, "loss": 0.5727, "step": 10745 }, { "epoch": 0.5457476107677273, "grad_norm": 0.03151062837134726, "learning_rate": 0.000911695109305702, "loss": 0.5955, "step": 10750 }, { "epoch": 0.5460014468657588, "grad_norm": 0.02463583148102344, "learning_rate": 0.0009115693583236263, "loss": 0.6051, "step": 10755 }, { "epoch": 0.5462552829637903, "grad_norm": 0.033094296247287235, "learning_rate": 0.0009114435265525381, "loss": 0.5763, "step": 10760 }, { "epoch": 0.5465091190618218, "grad_norm": 0.026074781871230777, "learning_rate": 0.0009113176140171373, "loss": 0.6216, "step": 10765 }, { "epoch": 0.5467629551598533, "grad_norm": 0.03141881910628044, "learning_rate": 0.0009111916207421402, "loss": 0.606, "step": 10770 }, { "epoch": 0.5470167912578848, "grad_norm": 0.029052049913770146, "learning_rate": 0.0009110655467522786, "loss": 0.6244, "step": 10775 }, { "epoch": 0.5472706273559163, "grad_norm": 0.0251807877680953, "learning_rate": 0.0009109393920723001, "loss": 0.5854, "step": 10780 }, { "epoch": 0.5475244634539478, "grad_norm": 0.03612511248761835, "learning_rate": 0.0009108131567269684, "loss": 0.601, "step": 10785 }, { "epoch": 0.5477782995519793, "grad_norm": 0.05249632040568085, "learning_rate": 0.0009106868407410627, "loss": 0.6207, "step": 10790 }, { "epoch": 0.5480321356500107, "grad_norm": 0.030858769777672986, "learning_rate": 0.0009105604441393782, "loss": 0.624, "step": 10795 }, { "epoch": 0.5482859717480423, "grad_norm": 0.029487600818791027, "learning_rate": 0.0009104339669467261, "loss": 0.5926, "step": 10800 }, { "epoch": 0.5485398078460738, "grad_norm": 0.03067140315629251, "learning_rate": 0.0009103074091879331, "loss": 0.623, "step": 10805 }, { "epoch": 0.5487936439441052, "grad_norm": 0.02640097178132819, "learning_rate": 0.0009101807708878418, "loss": 0.6049, "step": 10810 }, { "epoch": 0.5490474800421368, "grad_norm": 0.049525814735870075, "learning_rate": 0.0009100540520713108, "loss": 0.6012, "step": 10815 }, { "epoch": 0.5493013161401683, "grad_norm": 0.04668719700489464, "learning_rate": 0.0009099272527632142, "loss": 0.6075, "step": 10820 }, { "epoch": 0.5495551522381998, "grad_norm": 0.04879877503113339, "learning_rate": 0.0009098003729884423, "loss": 0.6007, "step": 10825 }, { "epoch": 0.5498089883362313, "grad_norm": 0.035478008135277204, "learning_rate": 0.0009096734127719007, "loss": 0.6009, "step": 10830 }, { "epoch": 0.5500628244342628, "grad_norm": 0.06071680497508226, "learning_rate": 0.0009095463721385113, "loss": 0.6103, "step": 10835 }, { "epoch": 0.5503166605322943, "grad_norm": 0.06356999878241335, "learning_rate": 0.0009094192511132116, "loss": 0.6008, "step": 10840 }, { "epoch": 0.5505704966303258, "grad_norm": 0.07522895375448929, "learning_rate": 0.0009092920497209545, "loss": 0.59, "step": 10845 }, { "epoch": 0.5508243327283573, "grad_norm": 0.06152876042967086, "learning_rate": 0.0009091647679867092, "loss": 0.6016, "step": 10850 }, { "epoch": 0.5510781688263888, "grad_norm": 1.435812648824866, "learning_rate": 0.0009090374059354605, "loss": 0.9246, "step": 10855 }, { "epoch": 0.5513320049244202, "grad_norm": 0.16453277042657283, "learning_rate": 0.0009089099635922089, "loss": 0.7979, "step": 10860 }, { "epoch": 0.5515858410224518, "grad_norm": 0.09400053941343282, "learning_rate": 0.0009087824409819706, "loss": 0.7172, "step": 10865 }, { "epoch": 0.5518396771204833, "grad_norm": 0.037709382524332286, "learning_rate": 0.0009086548381297778, "loss": 0.7196, "step": 10870 }, { "epoch": 0.5520935132185149, "grad_norm": 0.058763207110847336, "learning_rate": 0.0009085271550606782, "loss": 0.6644, "step": 10875 }, { "epoch": 0.5523473493165463, "grad_norm": 0.03877532647328554, "learning_rate": 0.0009083993917997354, "loss": 0.6212, "step": 10880 }, { "epoch": 0.5526011854145778, "grad_norm": 0.032910769522172735, "learning_rate": 0.0009082715483720287, "loss": 0.6176, "step": 10885 }, { "epoch": 0.5528550215126093, "grad_norm": 0.06379896202591075, "learning_rate": 0.000908143624802653, "loss": 0.6868, "step": 10890 }, { "epoch": 0.5531088576106408, "grad_norm": 0.03960368139013127, "learning_rate": 0.0009080156211167192, "loss": 0.595, "step": 10895 }, { "epoch": 0.5533626937086723, "grad_norm": 0.03265424433724443, "learning_rate": 0.0009078875373393538, "loss": 0.649, "step": 10900 }, { "epoch": 0.5536165298067038, "grad_norm": 0.03415971908723185, "learning_rate": 0.0009077593734956988, "loss": 0.6206, "step": 10905 }, { "epoch": 0.5538703659047353, "grad_norm": 0.036488130956054673, "learning_rate": 0.0009076311296109125, "loss": 0.67, "step": 10910 }, { "epoch": 0.5541242020027668, "grad_norm": 0.035320885747738756, "learning_rate": 0.0009075028057101682, "loss": 0.6229, "step": 10915 }, { "epoch": 0.5543780381007983, "grad_norm": 0.027348385875276045, "learning_rate": 0.0009073744018186554, "loss": 0.6252, "step": 10920 }, { "epoch": 0.5546318741988299, "grad_norm": 0.05990522832658699, "learning_rate": 0.0009072459179615789, "loss": 0.6792, "step": 10925 }, { "epoch": 0.5548857102968613, "grad_norm": 0.025462128094323643, "learning_rate": 0.0009071173541641598, "loss": 0.5896, "step": 10930 }, { "epoch": 0.5551395463948928, "grad_norm": 0.02807969122992142, "learning_rate": 0.0009069887104516344, "loss": 0.6309, "step": 10935 }, { "epoch": 0.5553933824929244, "grad_norm": 0.036603755788089144, "learning_rate": 0.0009068599868492549, "loss": 0.6168, "step": 10940 }, { "epoch": 0.5556472185909558, "grad_norm": 0.027661489675158593, "learning_rate": 0.0009067311833822887, "loss": 0.6306, "step": 10945 }, { "epoch": 0.5559010546889873, "grad_norm": 0.03077694227440884, "learning_rate": 0.0009066023000760198, "loss": 0.6059, "step": 10950 }, { "epoch": 0.5561548907870189, "grad_norm": 0.03144955523682315, "learning_rate": 0.0009064733369557469, "loss": 0.5914, "step": 10955 }, { "epoch": 0.5564087268850503, "grad_norm": 0.02757118827722829, "learning_rate": 0.0009063442940467852, "loss": 0.6138, "step": 10960 }, { "epoch": 0.5566625629830818, "grad_norm": 0.028124044394521205, "learning_rate": 0.0009062151713744649, "loss": 0.5849, "step": 10965 }, { "epoch": 0.5569163990811133, "grad_norm": 0.025072368259489837, "learning_rate": 0.0009060859689641323, "loss": 0.6349, "step": 10970 }, { "epoch": 0.5571702351791449, "grad_norm": 0.031444917396144835, "learning_rate": 0.0009059566868411492, "loss": 0.6198, "step": 10975 }, { "epoch": 0.5574240712771763, "grad_norm": 0.04501045513342346, "learning_rate": 0.0009058273250308929, "loss": 0.6399, "step": 10980 }, { "epoch": 0.5576779073752078, "grad_norm": 0.03025025685706769, "learning_rate": 0.0009056978835587566, "loss": 0.6388, "step": 10985 }, { "epoch": 0.5579317434732394, "grad_norm": 0.03855276480190827, "learning_rate": 0.0009055683624501489, "loss": 0.6172, "step": 10990 }, { "epoch": 0.5581855795712708, "grad_norm": 0.04172515052455696, "learning_rate": 0.0009054387617304945, "loss": 0.6024, "step": 10995 }, { "epoch": 0.5584394156693023, "grad_norm": 0.02429228351814789, "learning_rate": 0.0009053090814252327, "loss": 0.5964, "step": 11000 }, { "epoch": 0.5586932517673339, "grad_norm": 0.04151154242191302, "learning_rate": 0.0009051793215598197, "loss": 0.6044, "step": 11005 }, { "epoch": 0.5589470878653653, "grad_norm": 0.024858734225047054, "learning_rate": 0.0009050494821597264, "loss": 0.6301, "step": 11010 }, { "epoch": 0.5592009239633968, "grad_norm": 0.028072967963923535, "learning_rate": 0.0009049195632504399, "loss": 0.626, "step": 11015 }, { "epoch": 0.5594547600614284, "grad_norm": 0.03193777274190929, "learning_rate": 0.0009047895648574623, "loss": 0.6255, "step": 11020 }, { "epoch": 0.5597085961594598, "grad_norm": 0.026480125682836954, "learning_rate": 0.0009046594870063118, "loss": 0.6033, "step": 11025 }, { "epoch": 0.5599624322574913, "grad_norm": 0.026097311141357767, "learning_rate": 0.0009045293297225221, "loss": 0.5809, "step": 11030 }, { "epoch": 0.5602162683555229, "grad_norm": 0.0308704453992081, "learning_rate": 0.0009043990930316424, "loss": 0.6089, "step": 11035 }, { "epoch": 0.5604701044535544, "grad_norm": 0.025288526434383552, "learning_rate": 0.0009042687769592375, "loss": 0.6248, "step": 11040 }, { "epoch": 0.5607239405515858, "grad_norm": 0.024553453597092455, "learning_rate": 0.0009041383815308877, "loss": 0.598, "step": 11045 }, { "epoch": 0.5609777766496173, "grad_norm": 0.030367718805119067, "learning_rate": 0.0009040079067721889, "loss": 0.5852, "step": 11050 }, { "epoch": 0.5612316127476489, "grad_norm": 0.03317092988088747, "learning_rate": 0.0009038773527087529, "loss": 0.5776, "step": 11055 }, { "epoch": 0.5614854488456803, "grad_norm": 0.05184744896110897, "learning_rate": 0.0009037467193662068, "loss": 0.6059, "step": 11060 }, { "epoch": 0.5617392849437118, "grad_norm": 0.025577618854582287, "learning_rate": 0.0009036160067701931, "loss": 0.5846, "step": 11065 }, { "epoch": 0.5619931210417434, "grad_norm": 0.03938538897439106, "learning_rate": 0.00090348521494637, "loss": 0.6368, "step": 11070 }, { "epoch": 0.5622469571397748, "grad_norm": 0.038820715798665424, "learning_rate": 0.0009033543439204114, "loss": 0.609, "step": 11075 }, { "epoch": 0.5625007932378063, "grad_norm": 0.029533488100105525, "learning_rate": 0.0009032233937180067, "loss": 0.5961, "step": 11080 }, { "epoch": 0.5627546293358379, "grad_norm": 0.02379671510332352, "learning_rate": 0.0009030923643648607, "loss": 0.5998, "step": 11085 }, { "epoch": 0.5630084654338694, "grad_norm": 0.023448978175139953, "learning_rate": 0.0009029612558866938, "loss": 0.5834, "step": 11090 }, { "epoch": 0.5632623015319008, "grad_norm": 0.024265093856533426, "learning_rate": 0.0009028300683092418, "loss": 0.5921, "step": 11095 }, { "epoch": 0.5635161376299324, "grad_norm": 0.024632811713908073, "learning_rate": 0.0009026988016582564, "loss": 0.6196, "step": 11100 }, { "epoch": 0.5637699737279639, "grad_norm": 0.022302125548208863, "learning_rate": 0.0009025674559595045, "loss": 0.6236, "step": 11105 }, { "epoch": 0.5640238098259953, "grad_norm": 0.036192999100420786, "learning_rate": 0.0009024360312387687, "loss": 0.609, "step": 11110 }, { "epoch": 0.5642776459240268, "grad_norm": 0.0306562971571317, "learning_rate": 0.0009023045275218467, "loss": 0.5926, "step": 11115 }, { "epoch": 0.5645314820220584, "grad_norm": 0.032739588825804515, "learning_rate": 0.0009021729448345524, "loss": 0.6067, "step": 11120 }, { "epoch": 0.5647853181200898, "grad_norm": 0.02788797841269012, "learning_rate": 0.0009020412832027146, "loss": 0.6026, "step": 11125 }, { "epoch": 0.5650391542181213, "grad_norm": 0.025558068319595086, "learning_rate": 0.0009019095426521779, "loss": 0.6021, "step": 11130 }, { "epoch": 0.5652929903161529, "grad_norm": 0.025476402210346454, "learning_rate": 0.0009017777232088023, "loss": 0.6236, "step": 11135 }, { "epoch": 0.5655468264141844, "grad_norm": 0.024392907820231798, "learning_rate": 0.0009016458248984632, "loss": 0.6497, "step": 11140 }, { "epoch": 0.5658006625122158, "grad_norm": 0.02799976269471266, "learning_rate": 0.0009015138477470516, "loss": 0.601, "step": 11145 }, { "epoch": 0.5660544986102474, "grad_norm": 0.027785695512840384, "learning_rate": 0.0009013817917804743, "loss": 0.6225, "step": 11150 }, { "epoch": 0.5663083347082789, "grad_norm": 0.031205524445284426, "learning_rate": 0.0009012496570246529, "loss": 0.6179, "step": 11155 }, { "epoch": 0.5665621708063103, "grad_norm": 0.025209600845023012, "learning_rate": 0.0009011174435055247, "loss": 0.5718, "step": 11160 }, { "epoch": 0.5668160069043419, "grad_norm": 0.03060213292836761, "learning_rate": 0.0009009851512490428, "loss": 0.5973, "step": 11165 }, { "epoch": 0.5670698430023734, "grad_norm": 0.03631452482840258, "learning_rate": 0.0009008527802811754, "loss": 0.5809, "step": 11170 }, { "epoch": 0.5673236791004048, "grad_norm": 0.03908183938816148, "learning_rate": 0.0009007203306279064, "loss": 0.6072, "step": 11175 }, { "epoch": 0.5675775151984364, "grad_norm": 0.035964383599072204, "learning_rate": 0.0009005878023152348, "loss": 0.6082, "step": 11180 }, { "epoch": 0.5678313512964679, "grad_norm": 0.0306440737992479, "learning_rate": 0.0009004551953691754, "loss": 0.6095, "step": 11185 }, { "epoch": 0.5680851873944994, "grad_norm": 0.0513744832338725, "learning_rate": 0.000900322509815758, "loss": 0.6013, "step": 11190 }, { "epoch": 0.5683390234925308, "grad_norm": 0.025114635513695662, "learning_rate": 0.0009001897456810286, "loss": 0.6058, "step": 11195 }, { "epoch": 0.5685928595905624, "grad_norm": 0.04276993258659403, "learning_rate": 0.0009000569029910477, "loss": 0.6343, "step": 11200 }, { "epoch": 0.5688466956885939, "grad_norm": 0.03646375136050398, "learning_rate": 0.0008999239817718918, "loss": 0.6169, "step": 11205 }, { "epoch": 0.5691005317866253, "grad_norm": 0.024107862227893833, "learning_rate": 0.0008997909820496528, "loss": 0.605, "step": 11210 }, { "epoch": 0.5693543678846569, "grad_norm": 0.02312217074080101, "learning_rate": 0.0008996579038504376, "loss": 0.6016, "step": 11215 }, { "epoch": 0.5696082039826884, "grad_norm": 0.026924470535448695, "learning_rate": 0.0008995247472003691, "loss": 0.6151, "step": 11220 }, { "epoch": 0.5698620400807198, "grad_norm": 0.025057655726728318, "learning_rate": 0.0008993915121255852, "loss": 0.6265, "step": 11225 }, { "epoch": 0.5701158761787514, "grad_norm": 0.025445367684530214, "learning_rate": 0.0008992581986522392, "loss": 0.6009, "step": 11230 }, { "epoch": 0.5703697122767829, "grad_norm": 0.0382438609341841, "learning_rate": 0.0008991248068064999, "loss": 0.5777, "step": 11235 }, { "epoch": 0.5706235483748144, "grad_norm": 0.03626958625239848, "learning_rate": 0.0008989913366145515, "loss": 0.5829, "step": 11240 }, { "epoch": 0.5708773844728459, "grad_norm": 0.024880449607419988, "learning_rate": 0.0008988577881025935, "loss": 0.5972, "step": 11245 }, { "epoch": 0.5711312205708774, "grad_norm": 0.02888281590301525, "learning_rate": 0.0008987241612968406, "loss": 0.615, "step": 11250 }, { "epoch": 0.5713850566689089, "grad_norm": 0.03757091988973743, "learning_rate": 0.0008985904562235234, "loss": 0.5978, "step": 11255 }, { "epoch": 0.5716388927669404, "grad_norm": 0.03374674197558035, "learning_rate": 0.0008984566729088874, "loss": 0.581, "step": 11260 }, { "epoch": 0.5718927288649719, "grad_norm": 0.031246163529604482, "learning_rate": 0.0008983228113791937, "loss": 0.6174, "step": 11265 }, { "epoch": 0.5721465649630034, "grad_norm": 0.03523115249240424, "learning_rate": 0.0008981888716607184, "loss": 0.5909, "step": 11270 }, { "epoch": 0.5724004010610348, "grad_norm": 0.058253114080335025, "learning_rate": 0.0008980548537797535, "loss": 0.5974, "step": 11275 }, { "epoch": 0.5726542371590664, "grad_norm": 0.03323643646387071, "learning_rate": 0.0008979207577626058, "loss": 0.6457, "step": 11280 }, { "epoch": 0.5729080732570979, "grad_norm": 0.02636690022689109, "learning_rate": 0.0008977865836355979, "loss": 0.5843, "step": 11285 }, { "epoch": 0.5731619093551293, "grad_norm": 0.02518146447116509, "learning_rate": 0.0008976523314250672, "loss": 0.5996, "step": 11290 }, { "epoch": 0.5734157454531609, "grad_norm": 0.03063657652619607, "learning_rate": 0.0008975180011573669, "loss": 0.5909, "step": 11295 }, { "epoch": 0.5736695815511924, "grad_norm": 0.033555485623473526, "learning_rate": 0.0008973835928588656, "loss": 0.5984, "step": 11300 }, { "epoch": 0.5739234176492239, "grad_norm": 0.026858437506102287, "learning_rate": 0.0008972491065559467, "loss": 0.5745, "step": 11305 }, { "epoch": 0.5741772537472554, "grad_norm": 0.041900500650273065, "learning_rate": 0.0008971145422750094, "loss": 0.5525, "step": 11310 }, { "epoch": 0.5744310898452869, "grad_norm": 0.023692636987519583, "learning_rate": 0.0008969799000424676, "loss": 0.6412, "step": 11315 }, { "epoch": 0.5746849259433184, "grad_norm": 0.03520820653367125, "learning_rate": 0.0008968451798847513, "loss": 0.5895, "step": 11320 }, { "epoch": 0.5749387620413499, "grad_norm": 0.045950374099341315, "learning_rate": 0.0008967103818283051, "loss": 0.6144, "step": 11325 }, { "epoch": 0.5751925981393814, "grad_norm": 0.03013110218086917, "learning_rate": 0.0008965755058995896, "loss": 0.6167, "step": 11330 }, { "epoch": 0.5754464342374129, "grad_norm": 0.028677449123263064, "learning_rate": 0.0008964405521250798, "loss": 0.5907, "step": 11335 }, { "epoch": 0.5757002703354444, "grad_norm": 0.06897665640762995, "learning_rate": 0.0008963055205312667, "loss": 0.6008, "step": 11340 }, { "epoch": 0.5759541064334759, "grad_norm": 0.039108751736186075, "learning_rate": 0.0008961704111446564, "loss": 0.5963, "step": 11345 }, { "epoch": 0.5762079425315074, "grad_norm": 0.02497304268795272, "learning_rate": 0.00089603522399177, "loss": 0.5648, "step": 11350 }, { "epoch": 0.576461778629539, "grad_norm": 0.02409915928784757, "learning_rate": 0.0008958999590991441, "loss": 0.6111, "step": 11355 }, { "epoch": 0.5767156147275704, "grad_norm": 0.023423833712250722, "learning_rate": 0.0008957646164933307, "loss": 0.6034, "step": 11360 }, { "epoch": 0.5769694508256019, "grad_norm": 0.02939021201285094, "learning_rate": 0.0008956291962008967, "loss": 0.601, "step": 11365 }, { "epoch": 0.5772232869236334, "grad_norm": 0.025113894696930674, "learning_rate": 0.0008954936982484245, "loss": 0.5741, "step": 11370 }, { "epoch": 0.5774771230216649, "grad_norm": 0.03523771168659597, "learning_rate": 0.0008953581226625116, "loss": 0.5955, "step": 11375 }, { "epoch": 0.5777309591196964, "grad_norm": 0.033018941463141716, "learning_rate": 0.000895222469469771, "loss": 0.6561, "step": 11380 }, { "epoch": 0.5779847952177279, "grad_norm": 0.03870222645684432, "learning_rate": 0.0008950867386968305, "loss": 0.5742, "step": 11385 }, { "epoch": 0.5782386313157594, "grad_norm": 0.033035287220069445, "learning_rate": 0.0008949509303703336, "loss": 0.6268, "step": 11390 }, { "epoch": 0.5784924674137909, "grad_norm": 0.02535187944282503, "learning_rate": 0.0008948150445169386, "loss": 0.6145, "step": 11395 }, { "epoch": 0.5787463035118224, "grad_norm": 0.03673161990221083, "learning_rate": 0.0008946790811633193, "loss": 0.5821, "step": 11400 }, { "epoch": 0.579000139609854, "grad_norm": 0.027797183940164883, "learning_rate": 0.0008945430403361647, "loss": 0.6329, "step": 11405 }, { "epoch": 0.5792539757078854, "grad_norm": 0.024400895985974023, "learning_rate": 0.0008944069220621788, "loss": 0.5537, "step": 11410 }, { "epoch": 0.5795078118059169, "grad_norm": 0.029540130582463044, "learning_rate": 0.000894270726368081, "loss": 0.6305, "step": 11415 }, { "epoch": 0.5797616479039485, "grad_norm": 0.03403617081620501, "learning_rate": 0.0008941344532806057, "loss": 0.6018, "step": 11420 }, { "epoch": 0.5800154840019799, "grad_norm": 0.036771472648533886, "learning_rate": 0.000893998102826503, "loss": 0.6164, "step": 11425 }, { "epoch": 0.5802693201000114, "grad_norm": 0.050072623532946885, "learning_rate": 0.0008938616750325375, "loss": 0.5806, "step": 11430 }, { "epoch": 0.580523156198043, "grad_norm": 0.031054510529734854, "learning_rate": 0.0008937251699254893, "loss": 0.6024, "step": 11435 }, { "epoch": 0.5807769922960744, "grad_norm": 0.02607242206947156, "learning_rate": 0.0008935885875321539, "loss": 0.5976, "step": 11440 }, { "epoch": 0.5810308283941059, "grad_norm": 0.025700401652453922, "learning_rate": 0.0008934519278793416, "loss": 0.6159, "step": 11445 }, { "epoch": 0.5812846644921374, "grad_norm": 0.03334981900638615, "learning_rate": 0.0008933151909938778, "loss": 0.5664, "step": 11450 }, { "epoch": 0.581538500590169, "grad_norm": 0.03470397493711404, "learning_rate": 0.0008931783769026036, "loss": 0.6065, "step": 11455 }, { "epoch": 0.5817923366882004, "grad_norm": 0.040190824236687406, "learning_rate": 0.0008930414856323747, "loss": 0.5918, "step": 11460 }, { "epoch": 0.5820461727862319, "grad_norm": 0.02671898679248395, "learning_rate": 0.0008929045172100624, "loss": 0.5995, "step": 11465 }, { "epoch": 0.5823000088842635, "grad_norm": 0.06746663315212366, "learning_rate": 0.0008927674716625527, "loss": 0.6212, "step": 11470 }, { "epoch": 0.5825538449822949, "grad_norm": 0.03231819490055282, "learning_rate": 0.0008926303490167471, "loss": 0.6091, "step": 11475 }, { "epoch": 0.5828076810803264, "grad_norm": 0.028477552470271787, "learning_rate": 0.0008924931492995619, "loss": 0.6107, "step": 11480 }, { "epoch": 0.583061517178358, "grad_norm": 0.030545895434276424, "learning_rate": 0.000892355872537929, "loss": 0.6016, "step": 11485 }, { "epoch": 0.5833153532763894, "grad_norm": 0.02764360721101484, "learning_rate": 0.0008922185187587949, "loss": 0.6014, "step": 11490 }, { "epoch": 0.5835691893744209, "grad_norm": 0.051972230411543044, "learning_rate": 0.0008920810879891217, "loss": 0.6184, "step": 11495 }, { "epoch": 0.5838230254724525, "grad_norm": 0.049601905584870654, "learning_rate": 0.0008919435802558862, "loss": 0.5848, "step": 11500 }, { "epoch": 0.5840768615704839, "grad_norm": 0.5732510853690319, "learning_rate": 0.0008918059955860803, "loss": 0.6587, "step": 11505 }, { "epoch": 0.5843306976685154, "grad_norm": 0.042150081750240605, "learning_rate": 0.0008916683340067116, "loss": 0.625, "step": 11510 }, { "epoch": 0.584584533766547, "grad_norm": 0.05774925217438834, "learning_rate": 0.0008915305955448021, "loss": 0.6199, "step": 11515 }, { "epoch": 0.5848383698645785, "grad_norm": 0.04160871680886203, "learning_rate": 0.0008913927802273894, "loss": 0.6081, "step": 11520 }, { "epoch": 0.5850922059626099, "grad_norm": 0.03323206015318204, "learning_rate": 0.0008912548880815256, "loss": 0.6115, "step": 11525 }, { "epoch": 0.5853460420606414, "grad_norm": 0.04576230402616482, "learning_rate": 0.0008911169191342785, "loss": 0.6077, "step": 11530 }, { "epoch": 0.585599878158673, "grad_norm": 0.09158330505750234, "learning_rate": 0.0008909788734127307, "loss": 0.6539, "step": 11535 }, { "epoch": 0.5858537142567044, "grad_norm": 0.02861387432236888, "learning_rate": 0.00089084075094398, "loss": 0.6305, "step": 11540 }, { "epoch": 0.5861075503547359, "grad_norm": 0.037516416748120646, "learning_rate": 0.0008907025517551388, "loss": 0.6258, "step": 11545 }, { "epoch": 0.5863613864527675, "grad_norm": 0.04086135649565531, "learning_rate": 0.0008905642758733352, "loss": 0.599, "step": 11550 }, { "epoch": 0.5866152225507989, "grad_norm": 0.03266087385037055, "learning_rate": 0.000890425923325712, "loss": 0.6342, "step": 11555 }, { "epoch": 0.5868690586488304, "grad_norm": 0.04351600531336264, "learning_rate": 0.0008902874941394271, "loss": 0.623, "step": 11560 }, { "epoch": 0.587122894746862, "grad_norm": 0.04031763253939243, "learning_rate": 0.0008901489883416535, "loss": 0.6061, "step": 11565 }, { "epoch": 0.5873767308448935, "grad_norm": 0.03024842628718133, "learning_rate": 0.0008900104059595791, "loss": 0.5995, "step": 11570 }, { "epoch": 0.5876305669429249, "grad_norm": 0.03277584675848276, "learning_rate": 0.000889871747020407, "loss": 0.5808, "step": 11575 }, { "epoch": 0.5878844030409565, "grad_norm": 0.025589433458913628, "learning_rate": 0.0008897330115513553, "loss": 0.6386, "step": 11580 }, { "epoch": 0.588138239138988, "grad_norm": 0.04730997143858022, "learning_rate": 0.0008895941995796569, "loss": 0.6175, "step": 11585 }, { "epoch": 0.5883920752370194, "grad_norm": 0.02716519389485637, "learning_rate": 0.0008894553111325601, "loss": 0.6319, "step": 11590 }, { "epoch": 0.588645911335051, "grad_norm": 0.029548377642727466, "learning_rate": 0.0008893163462373279, "loss": 0.6307, "step": 11595 }, { "epoch": 0.5888997474330825, "grad_norm": 0.029441752118062915, "learning_rate": 0.0008891773049212387, "loss": 0.604, "step": 11600 }, { "epoch": 0.5891535835311139, "grad_norm": 0.03547180982249062, "learning_rate": 0.000889038187211585, "loss": 0.615, "step": 11605 }, { "epoch": 0.5894074196291454, "grad_norm": 0.025520246542906348, "learning_rate": 0.0008888989931356754, "loss": 0.5925, "step": 11610 }, { "epoch": 0.589661255727177, "grad_norm": 0.032232116965678355, "learning_rate": 0.0008887597227208331, "loss": 0.618, "step": 11615 }, { "epoch": 0.5899150918252085, "grad_norm": 0.026107740044775674, "learning_rate": 0.0008886203759943957, "loss": 0.6311, "step": 11620 }, { "epoch": 0.5901689279232399, "grad_norm": 0.028672705224940687, "learning_rate": 0.0008884809529837167, "loss": 0.5692, "step": 11625 }, { "epoch": 0.5904227640212715, "grad_norm": 0.0258655607946437, "learning_rate": 0.0008883414537161638, "loss": 0.6251, "step": 11630 }, { "epoch": 0.590676600119303, "grad_norm": 0.028780253182444794, "learning_rate": 0.0008882018782191204, "loss": 0.6016, "step": 11635 }, { "epoch": 0.5909304362173344, "grad_norm": 0.03331921506050773, "learning_rate": 0.0008880622265199841, "loss": 0.6073, "step": 11640 }, { "epoch": 0.591184272315366, "grad_norm": 0.4723036335382741, "learning_rate": 0.0008879224986461681, "loss": 0.668, "step": 11645 }, { "epoch": 0.5914381084133975, "grad_norm": 0.06636323581751623, "learning_rate": 0.0008877826946251002, "loss": 0.6493, "step": 11650 }, { "epoch": 0.5916919445114289, "grad_norm": 0.06678436419997426, "learning_rate": 0.0008876428144842231, "loss": 0.5844, "step": 11655 }, { "epoch": 0.5919457806094605, "grad_norm": 0.048242208081622634, "learning_rate": 0.0008875028582509948, "loss": 0.606, "step": 11660 }, { "epoch": 0.592199616707492, "grad_norm": 0.06276218448082216, "learning_rate": 0.0008873628259528878, "loss": 0.6185, "step": 11665 }, { "epoch": 0.5924534528055235, "grad_norm": 0.02996611743977883, "learning_rate": 0.0008872227176173899, "loss": 0.6132, "step": 11670 }, { "epoch": 0.592707288903555, "grad_norm": 0.025393102232066908, "learning_rate": 0.0008870825332720036, "loss": 0.6025, "step": 11675 }, { "epoch": 0.5929611250015865, "grad_norm": 0.04284571854613707, "learning_rate": 0.0008869422729442465, "loss": 0.6017, "step": 11680 }, { "epoch": 0.593214961099618, "grad_norm": 0.03802645828166949, "learning_rate": 0.0008868019366616508, "loss": 0.6635, "step": 11685 }, { "epoch": 0.5934687971976494, "grad_norm": 0.02382312826786996, "learning_rate": 0.0008866615244517639, "loss": 0.569, "step": 11690 }, { "epoch": 0.593722633295681, "grad_norm": 0.027194057612111737, "learning_rate": 0.000886521036342148, "loss": 0.6323, "step": 11695 }, { "epoch": 0.5939764693937125, "grad_norm": 0.030713511992266334, "learning_rate": 0.0008863804723603803, "loss": 0.5997, "step": 11700 }, { "epoch": 0.5942303054917439, "grad_norm": 0.027258233011688147, "learning_rate": 0.0008862398325340526, "loss": 0.6317, "step": 11705 }, { "epoch": 0.5944841415897755, "grad_norm": 0.03926742498370449, "learning_rate": 0.0008860991168907721, "loss": 0.6199, "step": 11710 }, { "epoch": 0.594737977687807, "grad_norm": 0.043600047694201435, "learning_rate": 0.0008859583254581605, "loss": 0.6552, "step": 11715 }, { "epoch": 0.5949918137858384, "grad_norm": 0.030006140812709456, "learning_rate": 0.0008858174582638543, "loss": 0.608, "step": 11720 }, { "epoch": 0.59524564988387, "grad_norm": 0.022938563538545016, "learning_rate": 0.0008856765153355051, "loss": 0.6107, "step": 11725 }, { "epoch": 0.5954994859819015, "grad_norm": 0.029591425378169715, "learning_rate": 0.0008855354967007793, "loss": 0.5701, "step": 11730 }, { "epoch": 0.595753322079933, "grad_norm": 0.037546799230365954, "learning_rate": 0.0008853944023873581, "loss": 0.6099, "step": 11735 }, { "epoch": 0.5960071581779645, "grad_norm": 0.030940988076782326, "learning_rate": 0.0008852532324229379, "loss": 0.6363, "step": 11740 }, { "epoch": 0.596260994275996, "grad_norm": 0.02823182925476383, "learning_rate": 0.0008851119868352292, "loss": 0.6331, "step": 11745 }, { "epoch": 0.5965148303740275, "grad_norm": 0.03156718715990183, "learning_rate": 0.000884970665651958, "loss": 0.6566, "step": 11750 }, { "epoch": 0.596768666472059, "grad_norm": 0.041051536536051314, "learning_rate": 0.0008848292689008653, "loss": 0.644, "step": 11755 }, { "epoch": 0.5970225025700905, "grad_norm": 0.023005223765102525, "learning_rate": 0.0008846877966097059, "loss": 0.592, "step": 11760 }, { "epoch": 0.597276338668122, "grad_norm": 0.027109360668611315, "learning_rate": 0.0008845462488062506, "loss": 0.5873, "step": 11765 }, { "epoch": 0.5975301747661534, "grad_norm": 0.027879918337945408, "learning_rate": 0.0008844046255182844, "loss": 0.6062, "step": 11770 }, { "epoch": 0.597784010864185, "grad_norm": 0.03759374089636192, "learning_rate": 0.0008842629267736072, "loss": 0.5949, "step": 11775 }, { "epoch": 0.5980378469622165, "grad_norm": 0.02369923860074637, "learning_rate": 0.0008841211526000339, "loss": 0.6322, "step": 11780 }, { "epoch": 0.598291683060248, "grad_norm": 0.028051536267109798, "learning_rate": 0.0008839793030253937, "loss": 0.6232, "step": 11785 }, { "epoch": 0.5985455191582795, "grad_norm": 0.056018315371266905, "learning_rate": 0.0008838373780775315, "loss": 0.6123, "step": 11790 }, { "epoch": 0.598799355256311, "grad_norm": 0.03552958483190999, "learning_rate": 0.000883695377784306, "loss": 0.5976, "step": 11795 }, { "epoch": 0.5990531913543425, "grad_norm": 0.032759803712457444, "learning_rate": 0.0008835533021735914, "loss": 0.5939, "step": 11800 }, { "epoch": 0.599307027452374, "grad_norm": 0.049806073645126195, "learning_rate": 0.0008834111512732763, "loss": 0.6214, "step": 11805 }, { "epoch": 0.5995608635504055, "grad_norm": 0.02802494107222027, "learning_rate": 0.0008832689251112645, "loss": 0.6295, "step": 11810 }, { "epoch": 0.599814699648437, "grad_norm": 0.030855803033759243, "learning_rate": 0.0008831266237154738, "loss": 0.6435, "step": 11815 }, { "epoch": 0.6000685357464685, "grad_norm": 0.024861923925470025, "learning_rate": 0.0008829842471138376, "loss": 0.6032, "step": 11820 }, { "epoch": 0.6003223718445, "grad_norm": 0.02762869776919164, "learning_rate": 0.0008828417953343035, "loss": 0.5834, "step": 11825 }, { "epoch": 0.6005762079425315, "grad_norm": 0.029202677438373507, "learning_rate": 0.0008826992684048344, "loss": 0.5823, "step": 11830 }, { "epoch": 0.6008300440405631, "grad_norm": 0.04246057047785471, "learning_rate": 0.0008825566663534074, "loss": 0.5808, "step": 11835 }, { "epoch": 0.6010838801385945, "grad_norm": 0.02677918683034048, "learning_rate": 0.0008824139892080145, "loss": 0.589, "step": 11840 }, { "epoch": 0.601337716236626, "grad_norm": 0.022623912880773735, "learning_rate": 0.0008822712369966628, "loss": 0.5985, "step": 11845 }, { "epoch": 0.6015915523346576, "grad_norm": 0.03863218744236265, "learning_rate": 0.0008821284097473734, "loss": 0.5964, "step": 11850 }, { "epoch": 0.601845388432689, "grad_norm": 0.026457485001171663, "learning_rate": 0.000881985507488183, "loss": 0.592, "step": 11855 }, { "epoch": 0.6020992245307205, "grad_norm": 0.03158510312459021, "learning_rate": 0.0008818425302471424, "loss": 0.5976, "step": 11860 }, { "epoch": 0.602353060628752, "grad_norm": 0.044049060218755355, "learning_rate": 0.0008816994780523175, "loss": 0.5742, "step": 11865 }, { "epoch": 0.6026068967267835, "grad_norm": 0.02599413568481521, "learning_rate": 0.0008815563509317883, "loss": 0.5854, "step": 11870 }, { "epoch": 0.602860732824815, "grad_norm": 0.03864798470702778, "learning_rate": 0.0008814131489136506, "loss": 0.5732, "step": 11875 }, { "epoch": 0.6031145689228465, "grad_norm": 0.032289876389169754, "learning_rate": 0.0008812698720260135, "loss": 0.621, "step": 11880 }, { "epoch": 0.6033684050208781, "grad_norm": 0.05318024428744845, "learning_rate": 0.000881126520297002, "loss": 0.5706, "step": 11885 }, { "epoch": 0.6036222411189095, "grad_norm": 0.03086598470372497, "learning_rate": 0.0008809830937547554, "loss": 0.601, "step": 11890 }, { "epoch": 0.603876077216941, "grad_norm": 0.025731078946828065, "learning_rate": 0.0008808395924274274, "loss": 0.5996, "step": 11895 }, { "epoch": 0.6041299133149726, "grad_norm": 0.025339932657364943, "learning_rate": 0.0008806960163431866, "loss": 0.601, "step": 11900 }, { "epoch": 0.604383749413004, "grad_norm": 0.037850633222621166, "learning_rate": 0.0008805523655302164, "loss": 0.5848, "step": 11905 }, { "epoch": 0.6046375855110355, "grad_norm": 0.026694596427904598, "learning_rate": 0.0008804086400167146, "loss": 0.6027, "step": 11910 }, { "epoch": 0.6048914216090671, "grad_norm": 0.028977129692472393, "learning_rate": 0.0008802648398308939, "loss": 0.5947, "step": 11915 }, { "epoch": 0.6051452577070985, "grad_norm": 0.02467551449885606, "learning_rate": 0.0008801209650009813, "loss": 0.6287, "step": 11920 }, { "epoch": 0.60539909380513, "grad_norm": 0.024282792825939505, "learning_rate": 0.0008799770155552192, "loss": 0.5401, "step": 11925 }, { "epoch": 0.6056529299031616, "grad_norm": 0.04118279824961832, "learning_rate": 0.0008798329915218638, "loss": 0.6076, "step": 11930 }, { "epoch": 0.605906766001193, "grad_norm": 0.025423020492523492, "learning_rate": 0.0008796888929291864, "loss": 0.5958, "step": 11935 }, { "epoch": 0.6061606020992245, "grad_norm": 0.02499168872123397, "learning_rate": 0.0008795447198054729, "loss": 0.6043, "step": 11940 }, { "epoch": 0.606414438197256, "grad_norm": 0.042162291687962206, "learning_rate": 0.0008794004721790235, "loss": 0.589, "step": 11945 }, { "epoch": 0.6066682742952876, "grad_norm": 0.03291178818373513, "learning_rate": 0.0008792561500781535, "loss": 0.5671, "step": 11950 }, { "epoch": 0.606922110393319, "grad_norm": 0.0376953507530463, "learning_rate": 0.0008791117535311928, "loss": 0.6097, "step": 11955 }, { "epoch": 0.6071759464913505, "grad_norm": 0.025349852157537867, "learning_rate": 0.0008789672825664854, "loss": 0.5582, "step": 11960 }, { "epoch": 0.6074297825893821, "grad_norm": 0.02634738699215234, "learning_rate": 0.0008788227372123902, "loss": 0.6388, "step": 11965 }, { "epoch": 0.6076836186874135, "grad_norm": 0.030461346907874626, "learning_rate": 0.0008786781174972811, "loss": 0.6106, "step": 11970 }, { "epoch": 0.607937454785445, "grad_norm": 0.033633731377443056, "learning_rate": 0.0008785334234495459, "loss": 0.5998, "step": 11975 }, { "epoch": 0.6081912908834766, "grad_norm": 0.022923564848087382, "learning_rate": 0.0008783886550975872, "loss": 0.547, "step": 11980 }, { "epoch": 0.608445126981508, "grad_norm": 0.029198962706889067, "learning_rate": 0.0008782438124698229, "loss": 0.618, "step": 11985 }, { "epoch": 0.6086989630795395, "grad_norm": 0.026047548738704816, "learning_rate": 0.0008780988955946843, "loss": 0.5686, "step": 11990 }, { "epoch": 0.6089527991775711, "grad_norm": 0.02660176396313372, "learning_rate": 0.0008779539045006182, "loss": 0.5884, "step": 11995 }, { "epoch": 0.6092066352756026, "grad_norm": 0.030589674203168663, "learning_rate": 0.0008778088392160853, "loss": 0.6039, "step": 12000 }, { "epoch": 0.609460471373634, "grad_norm": 0.050693411471426746, "learning_rate": 0.0008776636997695615, "loss": 0.6013, "step": 12005 }, { "epoch": 0.6097143074716656, "grad_norm": 0.026446497681540608, "learning_rate": 0.0008775184861895369, "loss": 0.5416, "step": 12010 }, { "epoch": 0.6099681435696971, "grad_norm": 0.1932080655888927, "learning_rate": 0.0008773731985045162, "loss": 0.5818, "step": 12015 }, { "epoch": 0.6102219796677285, "grad_norm": 0.027856228756609126, "learning_rate": 0.0008772278367430185, "loss": 0.5952, "step": 12020 }, { "epoch": 0.61047581576576, "grad_norm": 0.04117569443549469, "learning_rate": 0.0008770824009335775, "loss": 0.5738, "step": 12025 }, { "epoch": 0.6107296518637916, "grad_norm": 0.025174168899307972, "learning_rate": 0.000876936891104742, "loss": 0.5825, "step": 12030 }, { "epoch": 0.610983487961823, "grad_norm": 0.024070740808714138, "learning_rate": 0.0008767913072850743, "loss": 0.5766, "step": 12035 }, { "epoch": 0.6112373240598545, "grad_norm": 0.022925340728893576, "learning_rate": 0.0008766456495031521, "loss": 0.6117, "step": 12040 }, { "epoch": 0.6114911601578861, "grad_norm": 0.02650268728322704, "learning_rate": 0.0008764999177875673, "loss": 0.594, "step": 12045 }, { "epoch": 0.6117449962559176, "grad_norm": 0.04585848245773159, "learning_rate": 0.0008763541121669263, "loss": 0.5841, "step": 12050 }, { "epoch": 0.611998832353949, "grad_norm": 0.028232685184612846, "learning_rate": 0.0008762082326698498, "loss": 0.5985, "step": 12055 }, { "epoch": 0.6122526684519806, "grad_norm": 0.02453819199214054, "learning_rate": 0.0008760622793249735, "loss": 0.6275, "step": 12060 }, { "epoch": 0.6125065045500121, "grad_norm": 0.03043290217761071, "learning_rate": 0.0008759162521609472, "loss": 0.5899, "step": 12065 }, { "epoch": 0.6127603406480435, "grad_norm": 0.035932514886264776, "learning_rate": 0.0008757701512064351, "loss": 0.5917, "step": 12070 }, { "epoch": 0.6130141767460751, "grad_norm": 0.024922837264232393, "learning_rate": 0.0008756239764901165, "loss": 0.5963, "step": 12075 }, { "epoch": 0.6132680128441066, "grad_norm": 0.021912894422967358, "learning_rate": 0.0008754777280406845, "loss": 0.5423, "step": 12080 }, { "epoch": 0.613521848942138, "grad_norm": 0.03153730727693658, "learning_rate": 0.0008753314058868469, "loss": 0.6127, "step": 12085 }, { "epoch": 0.6137756850401695, "grad_norm": 0.026200893072720284, "learning_rate": 0.0008751850100573262, "loss": 0.5789, "step": 12090 }, { "epoch": 0.6140295211382011, "grad_norm": 0.03394058282414533, "learning_rate": 0.000875038540580859, "loss": 0.5715, "step": 12095 }, { "epoch": 0.6142833572362326, "grad_norm": 0.027581562059680105, "learning_rate": 0.0008748919974861967, "loss": 0.5693, "step": 12100 }, { "epoch": 0.614537193334264, "grad_norm": 0.029272850241735242, "learning_rate": 0.0008747453808021047, "loss": 0.6127, "step": 12105 }, { "epoch": 0.6147910294322956, "grad_norm": 0.02771381768749453, "learning_rate": 0.0008745986905573634, "loss": 0.5814, "step": 12110 }, { "epoch": 0.6150448655303271, "grad_norm": 0.02582305425809761, "learning_rate": 0.0008744519267807673, "loss": 0.5818, "step": 12115 }, { "epoch": 0.6152987016283585, "grad_norm": 0.03650039105604249, "learning_rate": 0.0008743050895011253, "loss": 0.5948, "step": 12120 }, { "epoch": 0.6155525377263901, "grad_norm": 0.025855707462884535, "learning_rate": 0.000874158178747261, "loss": 0.5788, "step": 12125 }, { "epoch": 0.6158063738244216, "grad_norm": 0.02817299125660346, "learning_rate": 0.000874011194548012, "loss": 0.5825, "step": 12130 }, { "epoch": 0.616060209922453, "grad_norm": 0.05520364073190341, "learning_rate": 0.0008738641369322308, "loss": 0.5901, "step": 12135 }, { "epoch": 0.6163140460204846, "grad_norm": 0.04862377362129934, "learning_rate": 0.0008737170059287838, "loss": 0.5905, "step": 12140 }, { "epoch": 0.6165678821185161, "grad_norm": 0.043586867841226354, "learning_rate": 0.0008735698015665525, "loss": 0.6009, "step": 12145 }, { "epoch": 0.6168217182165475, "grad_norm": 0.02855284608566154, "learning_rate": 0.000873422523874432, "loss": 0.5848, "step": 12150 }, { "epoch": 0.6170755543145791, "grad_norm": 0.02574982692090748, "learning_rate": 0.0008732751728813324, "loss": 0.6078, "step": 12155 }, { "epoch": 0.6173293904126106, "grad_norm": 0.028740949383117314, "learning_rate": 0.0008731277486161777, "loss": 0.5622, "step": 12160 }, { "epoch": 0.6175832265106421, "grad_norm": 0.028127163339311326, "learning_rate": 0.000872980251107907, "loss": 0.5664, "step": 12165 }, { "epoch": 0.6178370626086735, "grad_norm": 0.024587054800826992, "learning_rate": 0.0008728326803854728, "loss": 0.6062, "step": 12170 }, { "epoch": 0.6180908987067051, "grad_norm": 0.03673435237469503, "learning_rate": 0.0008726850364778429, "loss": 0.6079, "step": 12175 }, { "epoch": 0.6183447348047366, "grad_norm": 0.022463713399365034, "learning_rate": 0.000872537319413999, "loss": 0.5885, "step": 12180 }, { "epoch": 0.618598570902768, "grad_norm": 0.04006474303404941, "learning_rate": 0.000872389529222937, "loss": 0.5838, "step": 12185 }, { "epoch": 0.6188524070007996, "grad_norm": 0.03323874643198722, "learning_rate": 0.0008722416659336676, "loss": 0.5907, "step": 12190 }, { "epoch": 0.6191062430988311, "grad_norm": 0.0325269083515918, "learning_rate": 0.0008720937295752153, "loss": 0.5848, "step": 12195 }, { "epoch": 0.6193600791968625, "grad_norm": 0.023584804219643197, "learning_rate": 0.0008719457201766199, "loss": 0.5965, "step": 12200 }, { "epoch": 0.6196139152948941, "grad_norm": 0.02567919434786954, "learning_rate": 0.0008717976377669343, "loss": 0.5644, "step": 12205 }, { "epoch": 0.6198677513929256, "grad_norm": 0.023457406627368636, "learning_rate": 0.0008716494823752265, "loss": 0.5846, "step": 12210 }, { "epoch": 0.6201215874909571, "grad_norm": 0.10110762567310781, "learning_rate": 0.0008715012540305789, "loss": 0.5958, "step": 12215 }, { "epoch": 0.6203754235889886, "grad_norm": 0.027499055613954763, "learning_rate": 0.0008713529527620876, "loss": 0.6264, "step": 12220 }, { "epoch": 0.6206292596870201, "grad_norm": 0.04044664204628187, "learning_rate": 0.0008712045785988638, "loss": 0.5648, "step": 12225 }, { "epoch": 0.6208830957850516, "grad_norm": 0.026521946530749428, "learning_rate": 0.0008710561315700323, "loss": 0.6316, "step": 12230 }, { "epoch": 0.621136931883083, "grad_norm": 0.024722241475712593, "learning_rate": 0.0008709076117047326, "loss": 0.5668, "step": 12235 }, { "epoch": 0.6213907679811146, "grad_norm": 0.03259920434042201, "learning_rate": 0.0008707590190321186, "loss": 0.5987, "step": 12240 }, { "epoch": 0.6216446040791461, "grad_norm": 3.80266301123272, "learning_rate": 0.000870610353581358, "loss": 0.6304, "step": 12245 }, { "epoch": 0.6218984401771775, "grad_norm": 0.08694604714084496, "learning_rate": 0.0008704616153816332, "loss": 0.649, "step": 12250 }, { "epoch": 0.6221522762752091, "grad_norm": 0.05774492909835282, "learning_rate": 0.0008703128044621409, "loss": 0.6147, "step": 12255 }, { "epoch": 0.6224061123732406, "grad_norm": 0.04804931614993646, "learning_rate": 0.0008701639208520917, "loss": 0.6145, "step": 12260 }, { "epoch": 0.6226599484712722, "grad_norm": 0.08384982616634495, "learning_rate": 0.000870014964580711, "loss": 0.6363, "step": 12265 }, { "epoch": 0.6229137845693036, "grad_norm": 0.03400226647956488, "learning_rate": 0.000869865935677238, "loss": 0.5997, "step": 12270 }, { "epoch": 0.6231676206673351, "grad_norm": 0.026650165047009896, "learning_rate": 0.0008697168341709263, "loss": 0.6205, "step": 12275 }, { "epoch": 0.6234214567653666, "grad_norm": 0.03254784519557384, "learning_rate": 0.0008695676600910437, "loss": 0.6218, "step": 12280 }, { "epoch": 0.6236752928633981, "grad_norm": 0.025852394090405486, "learning_rate": 0.0008694184134668726, "loss": 0.6043, "step": 12285 }, { "epoch": 0.6239291289614296, "grad_norm": 0.03673450921472964, "learning_rate": 0.0008692690943277092, "loss": 0.6277, "step": 12290 }, { "epoch": 0.6241829650594611, "grad_norm": 0.026035740947944315, "learning_rate": 0.0008691197027028641, "loss": 0.6135, "step": 12295 }, { "epoch": 0.6244368011574926, "grad_norm": 0.035313678241679185, "learning_rate": 0.0008689702386216622, "loss": 0.584, "step": 12300 }, { "epoch": 0.6246906372555241, "grad_norm": 0.03306444173549, "learning_rate": 0.0008688207021134424, "loss": 0.6576, "step": 12305 }, { "epoch": 0.6249444733535556, "grad_norm": 0.03609474653140698, "learning_rate": 0.0008686710932075582, "loss": 0.5882, "step": 12310 }, { "epoch": 0.6251983094515872, "grad_norm": 0.02644393403575191, "learning_rate": 0.000868521411933377, "loss": 0.5805, "step": 12315 }, { "epoch": 0.6254521455496186, "grad_norm": 0.03558589982698361, "learning_rate": 0.0008683716583202803, "loss": 0.597, "step": 12320 }, { "epoch": 0.6257059816476501, "grad_norm": 0.027207972337934594, "learning_rate": 0.0008682218323976643, "loss": 0.6125, "step": 12325 }, { "epoch": 0.6259598177456817, "grad_norm": 0.030538062894411674, "learning_rate": 0.0008680719341949388, "loss": 0.6047, "step": 12330 }, { "epoch": 0.6262136538437131, "grad_norm": 0.02839935399711346, "learning_rate": 0.0008679219637415281, "loss": 0.5826, "step": 12335 }, { "epoch": 0.6264674899417446, "grad_norm": 0.03696443623177008, "learning_rate": 0.0008677719210668708, "loss": 0.6189, "step": 12340 }, { "epoch": 0.6267213260397761, "grad_norm": 0.037688218408350786, "learning_rate": 0.0008676218062004196, "loss": 0.6191, "step": 12345 }, { "epoch": 0.6269751621378076, "grad_norm": 0.030327644773068856, "learning_rate": 0.0008674716191716412, "loss": 0.5768, "step": 12350 }, { "epoch": 0.6272289982358391, "grad_norm": 0.026282274839364978, "learning_rate": 0.0008673213600100165, "loss": 0.589, "step": 12355 }, { "epoch": 0.6274828343338706, "grad_norm": 0.023814603743172128, "learning_rate": 0.0008671710287450406, "loss": 0.5615, "step": 12360 }, { "epoch": 0.6277366704319021, "grad_norm": 0.027581917581802243, "learning_rate": 0.0008670206254062227, "loss": 0.6108, "step": 12365 }, { "epoch": 0.6279905065299336, "grad_norm": 0.026215277115865347, "learning_rate": 0.0008668701500230865, "loss": 0.6145, "step": 12370 }, { "epoch": 0.6282443426279651, "grad_norm": 0.025711494856042162, "learning_rate": 0.0008667196026251694, "loss": 0.6259, "step": 12375 }, { "epoch": 0.6284981787259967, "grad_norm": 0.02851492204909293, "learning_rate": 0.0008665689832420231, "loss": 0.6542, "step": 12380 }, { "epoch": 0.6287520148240281, "grad_norm": 0.0445871823188339, "learning_rate": 0.0008664182919032135, "loss": 0.5829, "step": 12385 }, { "epoch": 0.6290058509220596, "grad_norm": 0.03817207065267871, "learning_rate": 0.0008662675286383206, "loss": 0.5721, "step": 12390 }, { "epoch": 0.6292596870200912, "grad_norm": 0.025461696058893753, "learning_rate": 0.0008661166934769384, "loss": 0.6207, "step": 12395 }, { "epoch": 0.6295135231181226, "grad_norm": 0.03291254373796645, "learning_rate": 0.000865965786448675, "loss": 0.6254, "step": 12400 }, { "epoch": 0.6297673592161541, "grad_norm": 0.02456833220774285, "learning_rate": 0.0008658148075831529, "loss": 0.6136, "step": 12405 }, { "epoch": 0.6300211953141857, "grad_norm": 0.03660349536181771, "learning_rate": 0.0008656637569100083, "loss": 0.6312, "step": 12410 }, { "epoch": 0.6302750314122171, "grad_norm": 0.028629048846541345, "learning_rate": 0.0008655126344588917, "loss": 0.6128, "step": 12415 }, { "epoch": 0.6305288675102486, "grad_norm": 0.027152018036820328, "learning_rate": 0.0008653614402594679, "loss": 0.5857, "step": 12420 }, { "epoch": 0.6307827036082801, "grad_norm": 0.02680519867482763, "learning_rate": 0.0008652101743414154, "loss": 0.6, "step": 12425 }, { "epoch": 0.6310365397063117, "grad_norm": 0.038568306378071626, "learning_rate": 0.000865058836734427, "loss": 0.5764, "step": 12430 }, { "epoch": 0.6312903758043431, "grad_norm": 0.022332649415363656, "learning_rate": 0.0008649074274682094, "loss": 0.5723, "step": 12435 }, { "epoch": 0.6315442119023746, "grad_norm": 0.03745046556172155, "learning_rate": 0.0008647559465724837, "loss": 0.6402, "step": 12440 }, { "epoch": 0.6317980480004062, "grad_norm": 0.043267157791484344, "learning_rate": 0.0008646043940769846, "loss": 0.614, "step": 12445 }, { "epoch": 0.6320518840984376, "grad_norm": 0.026176498937215194, "learning_rate": 0.0008644527700114613, "loss": 0.6109, "step": 12450 }, { "epoch": 0.6323057201964691, "grad_norm": 0.026927373965425307, "learning_rate": 0.0008643010744056768, "loss": 0.5921, "step": 12455 }, { "epoch": 0.6325595562945007, "grad_norm": 0.030985586351536417, "learning_rate": 0.0008641493072894081, "loss": 0.6037, "step": 12460 }, { "epoch": 0.6328133923925321, "grad_norm": 0.034088985294575574, "learning_rate": 0.0008639974686924463, "loss": 0.5987, "step": 12465 }, { "epoch": 0.6330672284905636, "grad_norm": 0.04071949159100916, "learning_rate": 0.0008638455586445967, "loss": 0.598, "step": 12470 }, { "epoch": 0.6333210645885952, "grad_norm": 0.04119075917419428, "learning_rate": 0.0008636935771756787, "loss": 0.6045, "step": 12475 }, { "epoch": 0.6335749006866267, "grad_norm": 0.024601400128880478, "learning_rate": 0.000863541524315525, "loss": 0.5895, "step": 12480 }, { "epoch": 0.6338287367846581, "grad_norm": 0.025669915239589995, "learning_rate": 0.000863389400093983, "loss": 0.6138, "step": 12485 }, { "epoch": 0.6340825728826897, "grad_norm": 0.07547210741704416, "learning_rate": 0.0008632372045409141, "loss": 0.5917, "step": 12490 }, { "epoch": 0.6343364089807212, "grad_norm": 0.08126800386669199, "learning_rate": 0.0008630849376861933, "loss": 0.5978, "step": 12495 }, { "epoch": 0.6345902450787526, "grad_norm": 0.039597508909703365, "learning_rate": 0.0008629325995597101, "loss": 0.6147, "step": 12500 }, { "epoch": 0.6348440811767841, "grad_norm": 0.03847450277611692, "learning_rate": 0.0008627801901913675, "loss": 0.6161, "step": 12505 }, { "epoch": 0.6350979172748157, "grad_norm": 0.031096240484264088, "learning_rate": 0.0008626277096110826, "loss": 0.6256, "step": 12510 }, { "epoch": 0.6353517533728471, "grad_norm": 0.0351603079410334, "learning_rate": 0.0008624751578487868, "loss": 0.5906, "step": 12515 }, { "epoch": 0.6356055894708786, "grad_norm": 0.05304450618895726, "learning_rate": 0.0008623225349344252, "loss": 0.5935, "step": 12520 }, { "epoch": 0.6358594255689102, "grad_norm": 0.042723117144634414, "learning_rate": 0.000862169840897957, "loss": 0.6222, "step": 12525 }, { "epoch": 0.6361132616669417, "grad_norm": 0.06453506596782073, "learning_rate": 0.0008620170757693551, "loss": 0.5989, "step": 12530 }, { "epoch": 0.6363670977649731, "grad_norm": 0.03504784561459624, "learning_rate": 0.0008618642395786065, "loss": 0.5673, "step": 12535 }, { "epoch": 0.6366209338630047, "grad_norm": 0.0351102465977865, "learning_rate": 0.0008617113323557124, "loss": 0.6076, "step": 12540 }, { "epoch": 0.6368747699610362, "grad_norm": 0.030573876028541808, "learning_rate": 0.0008615583541306875, "loss": 0.5974, "step": 12545 }, { "epoch": 0.6371286060590676, "grad_norm": 0.03746793184981241, "learning_rate": 0.0008614053049335608, "loss": 0.5956, "step": 12550 }, { "epoch": 0.6373824421570992, "grad_norm": 0.03858770491598553, "learning_rate": 0.0008612521847943751, "loss": 0.5991, "step": 12555 }, { "epoch": 0.6376362782551307, "grad_norm": 0.026289258814351405, "learning_rate": 0.0008610989937431872, "loss": 0.5822, "step": 12560 }, { "epoch": 0.6378901143531621, "grad_norm": 0.03593649422727849, "learning_rate": 0.0008609457318100674, "loss": 0.6193, "step": 12565 }, { "epoch": 0.6381439504511937, "grad_norm": 0.02527123338083977, "learning_rate": 0.0008607923990251005, "loss": 0.5848, "step": 12570 }, { "epoch": 0.6383977865492252, "grad_norm": 0.03742296667921461, "learning_rate": 0.0008606389954183851, "loss": 0.625, "step": 12575 }, { "epoch": 0.6386516226472566, "grad_norm": 0.025365542502550003, "learning_rate": 0.0008604855210200333, "loss": 0.5827, "step": 12580 }, { "epoch": 0.6389054587452881, "grad_norm": 0.0405102841258219, "learning_rate": 0.0008603319758601715, "loss": 0.6005, "step": 12585 }, { "epoch": 0.6391592948433197, "grad_norm": 0.034192188660412175, "learning_rate": 0.0008601783599689399, "loss": 0.5751, "step": 12590 }, { "epoch": 0.6394131309413512, "grad_norm": 0.027215532759491887, "learning_rate": 0.0008600246733764923, "loss": 0.5862, "step": 12595 }, { "epoch": 0.6396669670393826, "grad_norm": 0.03061750468523527, "learning_rate": 0.0008598709161129969, "loss": 0.5811, "step": 12600 }, { "epoch": 0.6399208031374142, "grad_norm": 0.028928109299113807, "learning_rate": 0.0008597170882086351, "loss": 0.5806, "step": 12605 }, { "epoch": 0.6401746392354457, "grad_norm": 0.027021955515260244, "learning_rate": 0.000859563189693603, "loss": 0.6105, "step": 12610 }, { "epoch": 0.6404284753334771, "grad_norm": 0.027332261017979507, "learning_rate": 0.0008594092205981099, "loss": 0.5754, "step": 12615 }, { "epoch": 0.6406823114315087, "grad_norm": 0.02712378032650469, "learning_rate": 0.0008592551809523791, "loss": 0.6216, "step": 12620 }, { "epoch": 0.6409361475295402, "grad_norm": 0.02824436218143356, "learning_rate": 0.0008591010707866478, "loss": 0.6344, "step": 12625 }, { "epoch": 0.6411899836275716, "grad_norm": 0.023321393807142935, "learning_rate": 0.0008589468901311672, "loss": 0.6034, "step": 12630 }, { "epoch": 0.6414438197256032, "grad_norm": 0.03151905452662985, "learning_rate": 0.0008587926390162022, "loss": 0.587, "step": 12635 }, { "epoch": 0.6416976558236347, "grad_norm": 0.030624964870169982, "learning_rate": 0.0008586383174720315, "loss": 0.6196, "step": 12640 }, { "epoch": 0.6419514919216662, "grad_norm": 0.025288630292415442, "learning_rate": 0.0008584839255289475, "loss": 0.6114, "step": 12645 }, { "epoch": 0.6422053280196977, "grad_norm": 0.028822411703530795, "learning_rate": 0.0008583294632172567, "loss": 0.598, "step": 12650 }, { "epoch": 0.6424591641177292, "grad_norm": 0.026442735674073018, "learning_rate": 0.0008581749305672792, "loss": 0.5951, "step": 12655 }, { "epoch": 0.6427130002157607, "grad_norm": 0.026576792989007662, "learning_rate": 0.0008580203276093492, "loss": 0.5872, "step": 12660 }, { "epoch": 0.6429668363137921, "grad_norm": 0.029042645916427186, "learning_rate": 0.0008578656543738141, "loss": 0.5867, "step": 12665 }, { "epoch": 0.6432206724118237, "grad_norm": 0.04209495966527208, "learning_rate": 0.0008577109108910359, "loss": 0.5837, "step": 12670 }, { "epoch": 0.6434745085098552, "grad_norm": 0.043317431826600195, "learning_rate": 0.0008575560971913898, "loss": 0.5904, "step": 12675 }, { "epoch": 0.6437283446078866, "grad_norm": 0.038439775882813054, "learning_rate": 0.0008574012133052649, "loss": 0.5627, "step": 12680 }, { "epoch": 0.6439821807059182, "grad_norm": 0.48049526028798634, "learning_rate": 0.0008572462592630641, "loss": 0.5769, "step": 12685 }, { "epoch": 0.6442360168039497, "grad_norm": 0.033053890270663924, "learning_rate": 0.0008570912350952044, "loss": 0.5878, "step": 12690 }, { "epoch": 0.6444898529019812, "grad_norm": 0.07863611845071104, "learning_rate": 0.0008569361408321159, "loss": 0.6133, "step": 12695 }, { "epoch": 0.6447436890000127, "grad_norm": 0.029398204077650845, "learning_rate": 0.000856780976504243, "loss": 0.5799, "step": 12700 }, { "epoch": 0.6449975250980442, "grad_norm": 0.02869897907225985, "learning_rate": 0.0008566257421420439, "loss": 0.6008, "step": 12705 }, { "epoch": 0.6452513611960757, "grad_norm": 0.026097172563309384, "learning_rate": 0.0008564704377759897, "loss": 0.5939, "step": 12710 }, { "epoch": 0.6455051972941072, "grad_norm": 0.02502019281825702, "learning_rate": 0.0008563150634365666, "loss": 0.598, "step": 12715 }, { "epoch": 0.6457590333921387, "grad_norm": 0.02483685696837895, "learning_rate": 0.0008561596191542733, "loss": 0.5801, "step": 12720 }, { "epoch": 0.6460128694901702, "grad_norm": 0.03535549265065644, "learning_rate": 0.000856004104959623, "loss": 0.5946, "step": 12725 }, { "epoch": 0.6462667055882017, "grad_norm": 0.027879010371972223, "learning_rate": 0.0008558485208831424, "loss": 0.612, "step": 12730 }, { "epoch": 0.6465205416862332, "grad_norm": 0.04735218553478145, "learning_rate": 0.0008556928669553717, "loss": 0.5938, "step": 12735 }, { "epoch": 0.6467743777842647, "grad_norm": 0.0279629249002624, "learning_rate": 0.000855537143206865, "loss": 0.5837, "step": 12740 }, { "epoch": 0.6470282138822963, "grad_norm": 0.02533240032359082, "learning_rate": 0.00085538134966819, "loss": 0.6252, "step": 12745 }, { "epoch": 0.6472820499803277, "grad_norm": 0.025603639669077066, "learning_rate": 0.0008552254863699286, "loss": 0.5819, "step": 12750 }, { "epoch": 0.6475358860783592, "grad_norm": 0.04145899859904642, "learning_rate": 0.0008550695533426756, "loss": 0.597, "step": 12755 }, { "epoch": 0.6477897221763907, "grad_norm": 0.029237034218711128, "learning_rate": 0.00085491355061704, "loss": 0.6067, "step": 12760 }, { "epoch": 0.6480435582744222, "grad_norm": 0.030474815495890747, "learning_rate": 0.0008547574782236444, "loss": 0.5969, "step": 12765 }, { "epoch": 0.6482973943724537, "grad_norm": 0.033389985535876285, "learning_rate": 0.0008546013361931251, "loss": 0.5902, "step": 12770 }, { "epoch": 0.6485512304704852, "grad_norm": 0.03244401680509467, "learning_rate": 0.0008544451245561318, "loss": 0.5714, "step": 12775 }, { "epoch": 0.6488050665685167, "grad_norm": 0.047562688287312124, "learning_rate": 0.0008542888433433283, "loss": 0.5706, "step": 12780 }, { "epoch": 0.6490589026665482, "grad_norm": 0.028307045275590848, "learning_rate": 0.0008541324925853915, "loss": 0.5689, "step": 12785 }, { "epoch": 0.6493127387645797, "grad_norm": 0.04704027854528433, "learning_rate": 0.0008539760723130125, "loss": 0.5661, "step": 12790 }, { "epoch": 0.6495665748626112, "grad_norm": 0.04648606005162825, "learning_rate": 0.0008538195825568958, "loss": 0.6028, "step": 12795 }, { "epoch": 0.6498204109606427, "grad_norm": 0.025943070142767706, "learning_rate": 0.0008536630233477594, "loss": 0.5877, "step": 12800 }, { "epoch": 0.6500742470586742, "grad_norm": 0.04012620623020576, "learning_rate": 0.0008535063947163355, "loss": 0.5976, "step": 12805 }, { "epoch": 0.6503280831567058, "grad_norm": 0.03544811219416931, "learning_rate": 0.0008533496966933691, "loss": 0.5855, "step": 12810 }, { "epoch": 0.6505819192547372, "grad_norm": 0.0281669036894832, "learning_rate": 0.0008531929293096194, "loss": 0.6111, "step": 12815 }, { "epoch": 0.6508357553527687, "grad_norm": 0.023763951065239725, "learning_rate": 0.0008530360925958591, "loss": 0.5776, "step": 12820 }, { "epoch": 0.6510895914508003, "grad_norm": 0.24418838214511798, "learning_rate": 0.0008528791865828742, "loss": 0.6009, "step": 12825 }, { "epoch": 0.6513434275488317, "grad_norm": 0.029087961946777385, "learning_rate": 0.000852722211301465, "loss": 0.6235, "step": 12830 }, { "epoch": 0.6515972636468632, "grad_norm": 0.034137672479786794, "learning_rate": 0.0008525651667824447, "loss": 0.5799, "step": 12835 }, { "epoch": 0.6518510997448947, "grad_norm": 0.028312942222624603, "learning_rate": 0.0008524080530566405, "loss": 0.6404, "step": 12840 }, { "epoch": 0.6521049358429262, "grad_norm": 0.02843554231766899, "learning_rate": 0.0008522508701548927, "loss": 0.5799, "step": 12845 }, { "epoch": 0.6523587719409577, "grad_norm": 0.027322928571429136, "learning_rate": 0.0008520936181080561, "loss": 0.5999, "step": 12850 }, { "epoch": 0.6526126080389892, "grad_norm": 0.0303740372452228, "learning_rate": 0.0008519362969469979, "loss": 0.5929, "step": 12855 }, { "epoch": 0.6528664441370208, "grad_norm": 0.027100993271736346, "learning_rate": 0.0008517789067025997, "loss": 0.6328, "step": 12860 }, { "epoch": 0.6531202802350522, "grad_norm": 0.028849366128399865, "learning_rate": 0.0008516214474057565, "loss": 0.5698, "step": 12865 }, { "epoch": 0.6533741163330837, "grad_norm": 0.029545255854450884, "learning_rate": 0.0008514639190873767, "loss": 0.5783, "step": 12870 }, { "epoch": 0.6536279524311153, "grad_norm": 0.024473560866559003, "learning_rate": 0.0008513063217783824, "loss": 0.6031, "step": 12875 }, { "epoch": 0.6538817885291467, "grad_norm": 0.02666332020016646, "learning_rate": 0.000851148655509709, "loss": 0.6249, "step": 12880 }, { "epoch": 0.6541356246271782, "grad_norm": 0.03803144464344789, "learning_rate": 0.0008509909203123057, "loss": 0.6052, "step": 12885 }, { "epoch": 0.6543894607252098, "grad_norm": 0.029430501206934002, "learning_rate": 0.0008508331162171353, "loss": 0.6082, "step": 12890 }, { "epoch": 0.6546432968232412, "grad_norm": 0.038636878659898226, "learning_rate": 0.0008506752432551736, "loss": 0.5922, "step": 12895 }, { "epoch": 0.6548971329212727, "grad_norm": 0.03645227270336851, "learning_rate": 0.0008505173014574104, "loss": 0.5961, "step": 12900 }, { "epoch": 0.6551509690193043, "grad_norm": 0.0339469915664989, "learning_rate": 0.0008503592908548492, "loss": 0.6087, "step": 12905 }, { "epoch": 0.6554048051173358, "grad_norm": 0.04584178211309174, "learning_rate": 0.0008502012114785062, "loss": 0.6057, "step": 12910 }, { "epoch": 0.6556586412153672, "grad_norm": 0.027847814793399383, "learning_rate": 0.0008500430633594121, "loss": 0.5957, "step": 12915 }, { "epoch": 0.6559124773133987, "grad_norm": 0.04088969160713702, "learning_rate": 0.0008498848465286101, "loss": 0.6299, "step": 12920 }, { "epoch": 0.6561663134114303, "grad_norm": 0.024361703944890165, "learning_rate": 0.0008497265610171576, "loss": 0.5643, "step": 12925 }, { "epoch": 0.6564201495094617, "grad_norm": 0.08374406284028113, "learning_rate": 0.0008495682068561254, "loss": 0.5758, "step": 12930 }, { "epoch": 0.6566739856074932, "grad_norm": 0.023934163458897068, "learning_rate": 0.0008494097840765975, "loss": 0.5817, "step": 12935 }, { "epoch": 0.6569278217055248, "grad_norm": 0.027116947138030544, "learning_rate": 0.0008492512927096714, "loss": 0.6075, "step": 12940 }, { "epoch": 0.6571816578035562, "grad_norm": 0.027883742219997405, "learning_rate": 0.0008490927327864581, "loss": 0.587, "step": 12945 }, { "epoch": 0.6574354939015877, "grad_norm": 0.02716053289665079, "learning_rate": 0.0008489341043380825, "loss": 0.657, "step": 12950 }, { "epoch": 0.6576893299996193, "grad_norm": 0.03758581951547631, "learning_rate": 0.0008487754073956823, "loss": 0.5958, "step": 12955 }, { "epoch": 0.6579431660976508, "grad_norm": 0.028060630011589125, "learning_rate": 0.0008486166419904089, "loss": 0.5604, "step": 12960 }, { "epoch": 0.6581970021956822, "grad_norm": 0.026078088164496267, "learning_rate": 0.0008484578081534274, "loss": 0.6172, "step": 12965 }, { "epoch": 0.6584508382937138, "grad_norm": 0.028071334962187697, "learning_rate": 0.0008482989059159158, "loss": 0.5903, "step": 12970 }, { "epoch": 0.6587046743917453, "grad_norm": 0.03378636851534933, "learning_rate": 0.0008481399353090659, "loss": 0.6051, "step": 12975 }, { "epoch": 0.6589585104897767, "grad_norm": 0.026105241851777608, "learning_rate": 0.0008479808963640828, "loss": 0.6411, "step": 12980 }, { "epoch": 0.6592123465878083, "grad_norm": 0.030533873522670462, "learning_rate": 0.0008478217891121853, "loss": 0.5837, "step": 12985 }, { "epoch": 0.6594661826858398, "grad_norm": 0.03300525899669726, "learning_rate": 0.0008476626135846051, "loss": 0.5938, "step": 12990 }, { "epoch": 0.6597200187838712, "grad_norm": 0.028329077912125737, "learning_rate": 0.0008475033698125876, "loss": 0.6348, "step": 12995 }, { "epoch": 0.6599738548819027, "grad_norm": 0.04110076364540404, "learning_rate": 0.0008473440578273916, "loss": 0.5867, "step": 13000 }, { "epoch": 0.6602276909799343, "grad_norm": 0.05757425924457839, "learning_rate": 0.0008471846776602894, "loss": 0.5384, "step": 13005 }, { "epoch": 0.6604815270779657, "grad_norm": 0.05844590144036168, "learning_rate": 0.0008470252293425662, "loss": 0.5848, "step": 13010 }, { "epoch": 0.6607353631759972, "grad_norm": 0.06415338224333281, "learning_rate": 0.0008468657129055213, "loss": 0.5522, "step": 13015 }, { "epoch": 0.6609891992740288, "grad_norm": 0.030473738178567217, "learning_rate": 0.0008467061283804665, "loss": 0.5945, "step": 13020 }, { "epoch": 0.6612430353720603, "grad_norm": 0.02536912248622989, "learning_rate": 0.000846546475798728, "loss": 0.5816, "step": 13025 }, { "epoch": 0.6614968714700917, "grad_norm": 0.0461113719066347, "learning_rate": 0.0008463867551916443, "loss": 0.6512, "step": 13030 }, { "epoch": 0.6617507075681233, "grad_norm": 0.024517762602526454, "learning_rate": 0.0008462269665905682, "loss": 0.597, "step": 13035 }, { "epoch": 0.6620045436661548, "grad_norm": 1.033031229487397, "learning_rate": 0.0008460671100268649, "loss": 0.6271, "step": 13040 }, { "epoch": 0.6622583797641862, "grad_norm": 0.0693341169836654, "learning_rate": 0.0008459071855319141, "loss": 0.6248, "step": 13045 }, { "epoch": 0.6625122158622178, "grad_norm": 0.03717279346920806, "learning_rate": 0.0008457471931371074, "loss": 0.573, "step": 13050 }, { "epoch": 0.6627660519602493, "grad_norm": 0.03262397839797902, "learning_rate": 0.0008455871328738512, "loss": 0.5841, "step": 13055 }, { "epoch": 0.6630198880582807, "grad_norm": 0.05355080175870677, "learning_rate": 0.0008454270047735643, "loss": 0.5727, "step": 13060 }, { "epoch": 0.6632737241563122, "grad_norm": 0.03628283883616998, "learning_rate": 0.0008452668088676789, "loss": 0.6012, "step": 13065 }, { "epoch": 0.6635275602543438, "grad_norm": 0.06228849912821036, "learning_rate": 0.0008451065451876408, "loss": 0.5836, "step": 13070 }, { "epoch": 0.6637813963523753, "grad_norm": 0.03209553307639597, "learning_rate": 0.0008449462137649087, "loss": 0.5907, "step": 13075 }, { "epoch": 0.6640352324504067, "grad_norm": 0.029984865563620306, "learning_rate": 0.0008447858146309554, "loss": 0.5891, "step": 13080 }, { "epoch": 0.6642890685484383, "grad_norm": 0.03128245374162066, "learning_rate": 0.000844625347817266, "loss": 0.5829, "step": 13085 }, { "epoch": 0.6645429046464698, "grad_norm": 0.03488425051563897, "learning_rate": 0.0008444648133553394, "loss": 0.6055, "step": 13090 }, { "epoch": 0.6647967407445012, "grad_norm": 0.026243668933171885, "learning_rate": 0.0008443042112766879, "loss": 0.5931, "step": 13095 }, { "epoch": 0.6650505768425328, "grad_norm": 0.0776128900108717, "learning_rate": 0.0008441435416128367, "loss": 0.7144, "step": 13100 }, { "epoch": 0.6653044129405643, "grad_norm": 0.06729227309686386, "learning_rate": 0.0008439828043953246, "loss": 0.6272, "step": 13105 }, { "epoch": 0.6655582490385957, "grad_norm": 0.06770135653654577, "learning_rate": 0.0008438219996557033, "loss": 0.6059, "step": 13110 }, { "epoch": 0.6658120851366273, "grad_norm": 0.059522461982278785, "learning_rate": 0.0008436611274255382, "loss": 0.5964, "step": 13115 }, { "epoch": 0.6660659212346588, "grad_norm": 0.038971286412593786, "learning_rate": 0.0008435001877364076, "loss": 0.6201, "step": 13120 }, { "epoch": 0.6663197573326903, "grad_norm": 0.03964531176980359, "learning_rate": 0.0008433391806199033, "loss": 0.6378, "step": 13125 }, { "epoch": 0.6665735934307218, "grad_norm": 0.03537229559330627, "learning_rate": 0.0008431781061076298, "loss": 0.6107, "step": 13130 }, { "epoch": 0.6668274295287533, "grad_norm": 0.03125614020555413, "learning_rate": 0.0008430169642312058, "loss": 0.6444, "step": 13135 }, { "epoch": 0.6670812656267848, "grad_norm": 0.02696637588263313, "learning_rate": 0.0008428557550222622, "loss": 0.6245, "step": 13140 }, { "epoch": 0.6673351017248162, "grad_norm": 0.03589221798809491, "learning_rate": 0.0008426944785124437, "loss": 0.5984, "step": 13145 }, { "epoch": 0.6675889378228478, "grad_norm": 0.05479122023801771, "learning_rate": 0.000842533134733408, "loss": 0.5568, "step": 13150 }, { "epoch": 0.6678427739208793, "grad_norm": 0.02850027631836941, "learning_rate": 0.0008423717237168263, "loss": 0.5844, "step": 13155 }, { "epoch": 0.6680966100189107, "grad_norm": 0.043303911644560475, "learning_rate": 0.0008422102454943827, "loss": 0.6056, "step": 13160 }, { "epoch": 0.6683504461169423, "grad_norm": 0.0485597197702503, "learning_rate": 0.0008420487000977743, "loss": 0.5952, "step": 13165 }, { "epoch": 0.6686042822149738, "grad_norm": 0.04377645965991001, "learning_rate": 0.0008418870875587121, "loss": 0.6004, "step": 13170 }, { "epoch": 0.6688581183130053, "grad_norm": 0.04194534237527523, "learning_rate": 0.0008417254079089194, "loss": 0.6054, "step": 13175 }, { "epoch": 0.6691119544110368, "grad_norm": 0.03712390306106914, "learning_rate": 0.0008415636611801334, "loss": 0.6166, "step": 13180 }, { "epoch": 0.6693657905090683, "grad_norm": 0.03756186192265224, "learning_rate": 0.0008414018474041041, "loss": 0.5976, "step": 13185 }, { "epoch": 0.6696196266070998, "grad_norm": 0.039758177882621364, "learning_rate": 0.0008412399666125945, "loss": 0.5797, "step": 13190 }, { "epoch": 0.6698734627051313, "grad_norm": 0.034160759225817725, "learning_rate": 0.0008410780188373814, "loss": 0.603, "step": 13195 }, { "epoch": 0.6701272988031628, "grad_norm": 0.045688219761594664, "learning_rate": 0.0008409160041102543, "loss": 0.5717, "step": 13200 }, { "epoch": 0.6703811349011943, "grad_norm": 0.025679114000337882, "learning_rate": 0.0008407539224630157, "loss": 0.5667, "step": 13205 }, { "epoch": 0.6706349709992258, "grad_norm": 0.036873394000505495, "learning_rate": 0.0008405917739274813, "loss": 0.6186, "step": 13210 }, { "epoch": 0.6708888070972573, "grad_norm": 0.04153117906728298, "learning_rate": 0.0008404295585354802, "loss": 0.5834, "step": 13215 }, { "epoch": 0.6711426431952888, "grad_norm": 0.02339181698231598, "learning_rate": 0.0008402672763188545, "loss": 0.5755, "step": 13220 }, { "epoch": 0.6713964792933204, "grad_norm": 0.030301352786187497, "learning_rate": 0.0008401049273094594, "loss": 0.5768, "step": 13225 }, { "epoch": 0.6716503153913518, "grad_norm": 0.030870240027105245, "learning_rate": 0.0008399425115391632, "loss": 0.5899, "step": 13230 }, { "epoch": 0.6719041514893833, "grad_norm": 0.04099103986806237, "learning_rate": 0.0008397800290398473, "loss": 0.5572, "step": 13235 }, { "epoch": 0.6721579875874149, "grad_norm": 0.025526012832099508, "learning_rate": 0.0008396174798434062, "loss": 0.5796, "step": 13240 }, { "epoch": 0.6724118236854463, "grad_norm": 0.02582782722174359, "learning_rate": 0.0008394548639817474, "loss": 0.5748, "step": 13245 }, { "epoch": 0.6726656597834778, "grad_norm": 0.044686293141161536, "learning_rate": 0.0008392921814867916, "loss": 0.6161, "step": 13250 }, { "epoch": 0.6729194958815093, "grad_norm": 0.03157479001428999, "learning_rate": 0.0008391294323904726, "loss": 0.579, "step": 13255 }, { "epoch": 0.6731733319795408, "grad_norm": 0.03230925419893887, "learning_rate": 0.0008389666167247374, "loss": 0.6105, "step": 13260 }, { "epoch": 0.6734271680775723, "grad_norm": 0.0227012216152469, "learning_rate": 0.0008388037345215457, "loss": 0.5985, "step": 13265 }, { "epoch": 0.6736810041756038, "grad_norm": 0.027786730311156502, "learning_rate": 0.0008386407858128706, "loss": 0.609, "step": 13270 }, { "epoch": 0.6739348402736353, "grad_norm": 0.024074341825161803, "learning_rate": 0.0008384777706306979, "loss": 0.5953, "step": 13275 }, { "epoch": 0.6741886763716668, "grad_norm": 0.024751468872537288, "learning_rate": 0.0008383146890070269, "loss": 0.5925, "step": 13280 }, { "epoch": 0.6744425124696983, "grad_norm": 0.028042950086599638, "learning_rate": 0.0008381515409738696, "loss": 0.5829, "step": 13285 }, { "epoch": 0.6746963485677299, "grad_norm": 0.026500810254070112, "learning_rate": 0.0008379883265632512, "loss": 0.6014, "step": 13290 }, { "epoch": 0.6749501846657613, "grad_norm": 0.031677519675672304, "learning_rate": 0.0008378250458072099, "loss": 0.5688, "step": 13295 }, { "epoch": 0.6752040207637928, "grad_norm": 0.03226776792346643, "learning_rate": 0.0008376616987377968, "loss": 0.637, "step": 13300 }, { "epoch": 0.6754578568618244, "grad_norm": 0.03935591032487197, "learning_rate": 0.0008374982853870761, "loss": 0.6372, "step": 13305 }, { "epoch": 0.6757116929598558, "grad_norm": 0.04985731682272338, "learning_rate": 0.000837334805787125, "loss": 0.5976, "step": 13310 }, { "epoch": 0.6759655290578873, "grad_norm": 0.03831220440900208, "learning_rate": 0.0008371712599700338, "loss": 0.6055, "step": 13315 }, { "epoch": 0.6762193651559189, "grad_norm": 0.02840577329842378, "learning_rate": 0.0008370076479679059, "loss": 0.5921, "step": 13320 }, { "epoch": 0.6764732012539503, "grad_norm": 0.02670171769669835, "learning_rate": 0.0008368439698128574, "loss": 0.5863, "step": 13325 }, { "epoch": 0.6767270373519818, "grad_norm": 0.027414460086001645, "learning_rate": 0.0008366802255370174, "loss": 0.5851, "step": 13330 }, { "epoch": 0.6769808734500133, "grad_norm": 0.028303785071122464, "learning_rate": 0.000836516415172528, "loss": 0.5752, "step": 13335 }, { "epoch": 0.6772347095480449, "grad_norm": 0.025538256488275953, "learning_rate": 0.0008363525387515446, "loss": 0.5939, "step": 13340 }, { "epoch": 0.6774885456460763, "grad_norm": 0.03068030408097493, "learning_rate": 0.0008361885963062353, "loss": 0.5596, "step": 13345 }, { "epoch": 0.6777423817441078, "grad_norm": 0.02823000001525365, "learning_rate": 0.000836024587868781, "loss": 0.6162, "step": 13350 }, { "epoch": 0.6779962178421394, "grad_norm": 0.025476619812650033, "learning_rate": 0.0008358605134713759, "loss": 0.5924, "step": 13355 }, { "epoch": 0.6782500539401708, "grad_norm": 0.04780163681995968, "learning_rate": 0.0008356963731462271, "loss": 0.5633, "step": 13360 }, { "epoch": 0.6785038900382023, "grad_norm": 0.02825831061440577, "learning_rate": 0.0008355321669255542, "loss": 0.5918, "step": 13365 }, { "epoch": 0.6787577261362339, "grad_norm": 0.026437801730974143, "learning_rate": 0.0008353678948415901, "loss": 0.5642, "step": 13370 }, { "epoch": 0.6790115622342653, "grad_norm": 0.025454718983034218, "learning_rate": 0.0008352035569265809, "loss": 0.5691, "step": 13375 }, { "epoch": 0.6792653983322968, "grad_norm": 0.023242855883304124, "learning_rate": 0.0008350391532127851, "loss": 0.6205, "step": 13380 }, { "epoch": 0.6795192344303284, "grad_norm": 0.03623503812266734, "learning_rate": 0.0008348746837324743, "loss": 0.5955, "step": 13385 }, { "epoch": 0.6797730705283599, "grad_norm": 0.029114104299875758, "learning_rate": 0.0008347101485179332, "loss": 0.5573, "step": 13390 }, { "epoch": 0.6800269066263913, "grad_norm": 0.024202571333690956, "learning_rate": 0.0008345455476014592, "loss": 0.5487, "step": 13395 }, { "epoch": 0.6802807427244228, "grad_norm": 0.02802981312295006, "learning_rate": 0.0008343808810153624, "loss": 0.5798, "step": 13400 }, { "epoch": 0.6805345788224544, "grad_norm": 0.024805503868118448, "learning_rate": 0.0008342161487919664, "loss": 0.5874, "step": 13405 }, { "epoch": 0.6807884149204858, "grad_norm": 0.023290389364338096, "learning_rate": 0.000834051350963607, "loss": 0.5926, "step": 13410 }, { "epoch": 0.6810422510185173, "grad_norm": 0.028882137299632987, "learning_rate": 0.0008338864875626333, "loss": 0.5975, "step": 13415 }, { "epoch": 0.6812960871165489, "grad_norm": 0.0288092686879103, "learning_rate": 0.0008337215586214073, "loss": 0.6053, "step": 13420 }, { "epoch": 0.6815499232145803, "grad_norm": 0.025146916551842938, "learning_rate": 0.0008335565641723035, "loss": 0.5884, "step": 13425 }, { "epoch": 0.6818037593126118, "grad_norm": 0.024739281591221643, "learning_rate": 0.0008333915042477096, "loss": 0.5652, "step": 13430 }, { "epoch": 0.6820575954106434, "grad_norm": 0.024056449982588327, "learning_rate": 0.000833226378880026, "loss": 0.6189, "step": 13435 }, { "epoch": 0.6823114315086749, "grad_norm": 0.026316533874474936, "learning_rate": 0.000833061188101666, "loss": 0.6197, "step": 13440 }, { "epoch": 0.6825652676067063, "grad_norm": 0.022435368988669158, "learning_rate": 0.000832895931945056, "loss": 0.5676, "step": 13445 }, { "epoch": 0.6828191037047379, "grad_norm": 0.03713138580934235, "learning_rate": 0.0008327306104426345, "loss": 0.587, "step": 13450 }, { "epoch": 0.6830729398027694, "grad_norm": 0.028633714641747837, "learning_rate": 0.0008325652236268536, "loss": 0.599, "step": 13455 }, { "epoch": 0.6833267759008008, "grad_norm": 0.040045692037592666, "learning_rate": 0.0008323997715301777, "loss": 0.5729, "step": 13460 }, { "epoch": 0.6835806119988324, "grad_norm": 0.03628423966164721, "learning_rate": 0.0008322342541850844, "loss": 0.5689, "step": 13465 }, { "epoch": 0.6838344480968639, "grad_norm": 0.030711278133780332, "learning_rate": 0.0008320686716240637, "loss": 0.5646, "step": 13470 }, { "epoch": 0.6840882841948953, "grad_norm": 0.025362649640482234, "learning_rate": 0.000831903023879619, "loss": 0.5774, "step": 13475 }, { "epoch": 0.6843421202929268, "grad_norm": 0.03407568648626278, "learning_rate": 0.0008317373109842658, "loss": 0.573, "step": 13480 }, { "epoch": 0.6845959563909584, "grad_norm": 0.02491716289782007, "learning_rate": 0.0008315715329705329, "loss": 0.5727, "step": 13485 }, { "epoch": 0.6848497924889898, "grad_norm": 0.0317932126461997, "learning_rate": 0.0008314056898709615, "loss": 0.6018, "step": 13490 }, { "epoch": 0.6851036285870213, "grad_norm": 0.024107080698907192, "learning_rate": 0.0008312397817181059, "loss": 0.6016, "step": 13495 }, { "epoch": 0.6853574646850529, "grad_norm": 0.03632719720582319, "learning_rate": 0.0008310738085445332, "loss": 0.5991, "step": 13500 }, { "epoch": 0.6856113007830844, "grad_norm": 0.05715841618806044, "learning_rate": 0.0008309077703828228, "loss": 0.6122, "step": 13505 }, { "epoch": 0.6858651368811158, "grad_norm": 0.03704079824369565, "learning_rate": 0.0008307416672655674, "loss": 0.6023, "step": 13510 }, { "epoch": 0.6861189729791474, "grad_norm": 0.24267969330580477, "learning_rate": 0.000830575499225372, "loss": 0.5998, "step": 13515 }, { "epoch": 0.6863728090771789, "grad_norm": 0.048764967617313686, "learning_rate": 0.0008304092662948548, "loss": 0.608, "step": 13520 }, { "epoch": 0.6866266451752103, "grad_norm": 0.0516560962912755, "learning_rate": 0.0008302429685066462, "loss": 0.5713, "step": 13525 }, { "epoch": 0.6868804812732419, "grad_norm": 0.03554128227511413, "learning_rate": 0.0008300766058933899, "loss": 0.5681, "step": 13530 }, { "epoch": 0.6871343173712734, "grad_norm": 0.028215204615185188, "learning_rate": 0.0008299101784877421, "loss": 0.5954, "step": 13535 }, { "epoch": 0.6873881534693048, "grad_norm": 0.0321511775141223, "learning_rate": 0.0008297436863223715, "loss": 0.5873, "step": 13540 }, { "epoch": 0.6876419895673364, "grad_norm": 0.029157356552754574, "learning_rate": 0.0008295771294299596, "loss": 0.5775, "step": 13545 }, { "epoch": 0.6878958256653679, "grad_norm": 0.028937031419344138, "learning_rate": 0.0008294105078432007, "loss": 0.5808, "step": 13550 }, { "epoch": 0.6881496617633994, "grad_norm": 0.037942050160010676, "learning_rate": 0.000829243821594802, "loss": 0.6233, "step": 13555 }, { "epoch": 0.6884034978614308, "grad_norm": 0.031543194063541696, "learning_rate": 0.0008290770707174831, "loss": 0.5977, "step": 13560 }, { "epoch": 0.6886573339594624, "grad_norm": 0.02238872742391851, "learning_rate": 0.0008289102552439762, "loss": 0.5518, "step": 13565 }, { "epoch": 0.6889111700574939, "grad_norm": 0.027982976773836193, "learning_rate": 0.0008287433752070265, "loss": 0.5747, "step": 13570 }, { "epoch": 0.6891650061555253, "grad_norm": 0.03491184560447035, "learning_rate": 0.0008285764306393917, "loss": 0.5675, "step": 13575 }, { "epoch": 0.6894188422535569, "grad_norm": 0.05242850032688674, "learning_rate": 0.0008284094215738422, "loss": 0.5764, "step": 13580 }, { "epoch": 0.6896726783515884, "grad_norm": 0.05106024303010987, "learning_rate": 0.000828242348043161, "loss": 0.6068, "step": 13585 }, { "epoch": 0.6899265144496198, "grad_norm": 0.030655123821702948, "learning_rate": 0.0008280752100801439, "loss": 0.5834, "step": 13590 }, { "epoch": 0.6901803505476514, "grad_norm": 0.030080411514861318, "learning_rate": 0.0008279080077175992, "loss": 0.589, "step": 13595 }, { "epoch": 0.6904341866456829, "grad_norm": 0.07602111036419369, "learning_rate": 0.0008277407409883476, "loss": 0.5973, "step": 13600 }, { "epoch": 0.6906880227437144, "grad_norm": 0.026230560906281026, "learning_rate": 0.0008275734099252233, "loss": 0.5867, "step": 13605 }, { "epoch": 0.6909418588417459, "grad_norm": 0.03576027679760639, "learning_rate": 0.0008274060145610719, "loss": 0.5555, "step": 13610 }, { "epoch": 0.6911956949397774, "grad_norm": 0.03286724650115783, "learning_rate": 0.0008272385549287529, "loss": 0.6358, "step": 13615 }, { "epoch": 0.6914495310378089, "grad_norm": 0.023355654899445015, "learning_rate": 0.0008270710310611374, "loss": 0.5994, "step": 13620 }, { "epoch": 0.6917033671358404, "grad_norm": 0.026604280616454975, "learning_rate": 0.0008269034429911095, "loss": 0.6027, "step": 13625 }, { "epoch": 0.6919572032338719, "grad_norm": 0.025639930478733225, "learning_rate": 0.0008267357907515661, "loss": 0.5732, "step": 13630 }, { "epoch": 0.6922110393319034, "grad_norm": 0.0332874158069147, "learning_rate": 0.0008265680743754165, "loss": 0.6007, "step": 13635 }, { "epoch": 0.6924648754299348, "grad_norm": 0.029848135095345608, "learning_rate": 0.0008264002938955823, "loss": 0.568, "step": 13640 }, { "epoch": 0.6927187115279664, "grad_norm": 0.02669194226166377, "learning_rate": 0.0008262324493449982, "loss": 0.5983, "step": 13645 }, { "epoch": 0.6929725476259979, "grad_norm": 0.027289337864795267, "learning_rate": 0.0008260645407566114, "loss": 0.6212, "step": 13650 }, { "epoch": 0.6932263837240294, "grad_norm": 0.028021835675969425, "learning_rate": 0.0008258965681633813, "loss": 0.5927, "step": 13655 }, { "epoch": 0.6934802198220609, "grad_norm": 0.025781289683893413, "learning_rate": 0.0008257285315982799, "loss": 0.5623, "step": 13660 }, { "epoch": 0.6937340559200924, "grad_norm": 0.03639235414298802, "learning_rate": 0.0008255604310942922, "loss": 0.5608, "step": 13665 }, { "epoch": 0.6939878920181239, "grad_norm": 0.025081206584032647, "learning_rate": 0.0008253922666844155, "loss": 0.5641, "step": 13670 }, { "epoch": 0.6942417281161554, "grad_norm": 0.02503169555093532, "learning_rate": 0.0008252240384016596, "loss": 0.5815, "step": 13675 }, { "epoch": 0.6944955642141869, "grad_norm": 0.025522493920881646, "learning_rate": 0.0008250557462790469, "loss": 0.6103, "step": 13680 }, { "epoch": 0.6947494003122184, "grad_norm": 0.029829670493091157, "learning_rate": 0.0008248873903496123, "loss": 0.561, "step": 13685 }, { "epoch": 0.6950032364102499, "grad_norm": 0.03274242306959272, "learning_rate": 0.000824718970646403, "loss": 0.5965, "step": 13690 }, { "epoch": 0.6952570725082814, "grad_norm": 0.053602824065729755, "learning_rate": 0.0008245504872024793, "loss": 0.5778, "step": 13695 }, { "epoch": 0.6955109086063129, "grad_norm": 0.02930268258189034, "learning_rate": 0.0008243819400509133, "loss": 0.5291, "step": 13700 }, { "epoch": 0.6957647447043444, "grad_norm": 0.024987980789305964, "learning_rate": 0.0008242133292247902, "loss": 0.5828, "step": 13705 }, { "epoch": 0.6960185808023759, "grad_norm": 0.02829975573628627, "learning_rate": 0.0008240446547572076, "loss": 0.5881, "step": 13710 }, { "epoch": 0.6962724169004074, "grad_norm": 0.03443116573593433, "learning_rate": 0.0008238759166812751, "loss": 0.5919, "step": 13715 }, { "epoch": 0.696526252998439, "grad_norm": 0.031587204673142105, "learning_rate": 0.0008237071150301154, "loss": 0.6065, "step": 13720 }, { "epoch": 0.6967800890964704, "grad_norm": 0.023508246283948264, "learning_rate": 0.0008235382498368634, "loss": 0.5946, "step": 13725 }, { "epoch": 0.6970339251945019, "grad_norm": 0.025009199129867415, "learning_rate": 0.0008233693211346663, "loss": 0.5563, "step": 13730 }, { "epoch": 0.6972877612925334, "grad_norm": 0.03326099984467122, "learning_rate": 0.0008232003289566843, "loss": 0.5873, "step": 13735 }, { "epoch": 0.6975415973905649, "grad_norm": 0.034975275494994254, "learning_rate": 0.0008230312733360894, "loss": 0.5658, "step": 13740 }, { "epoch": 0.6977954334885964, "grad_norm": 0.025872022613802486, "learning_rate": 0.0008228621543060665, "loss": 0.5572, "step": 13745 }, { "epoch": 0.6980492695866279, "grad_norm": 0.02546372161263482, "learning_rate": 0.0008226929718998129, "loss": 0.5905, "step": 13750 }, { "epoch": 0.6983031056846594, "grad_norm": 0.03215322261937108, "learning_rate": 0.0008225237261505381, "loss": 0.5581, "step": 13755 }, { "epoch": 0.6985569417826909, "grad_norm": 0.027702755257204188, "learning_rate": 0.0008223544170914641, "loss": 0.6156, "step": 13760 }, { "epoch": 0.6988107778807224, "grad_norm": 0.03195843106084198, "learning_rate": 0.0008221850447558259, "loss": 0.6007, "step": 13765 }, { "epoch": 0.699064613978754, "grad_norm": 0.037527909108500845, "learning_rate": 0.00082201560917687, "loss": 0.5738, "step": 13770 }, { "epoch": 0.6993184500767854, "grad_norm": 0.05309666088358755, "learning_rate": 0.000821846110387856, "loss": 0.585, "step": 13775 }, { "epoch": 0.6995722861748169, "grad_norm": 0.03185639071908041, "learning_rate": 0.0008216765484220554, "loss": 0.594, "step": 13780 }, { "epoch": 0.6998261222728485, "grad_norm": 0.029682191254135553, "learning_rate": 0.0008215069233127528, "loss": 0.5832, "step": 13785 }, { "epoch": 0.7000799583708799, "grad_norm": 0.030629890653038103, "learning_rate": 0.0008213372350932444, "loss": 0.5727, "step": 13790 }, { "epoch": 0.7003337944689114, "grad_norm": 0.025415360193154723, "learning_rate": 0.0008211674837968391, "loss": 0.5891, "step": 13795 }, { "epoch": 0.700587630566943, "grad_norm": 0.04954711837375036, "learning_rate": 0.0008209976694568586, "loss": 0.58, "step": 13800 }, { "epoch": 0.7008414666649744, "grad_norm": 0.047662707757456846, "learning_rate": 0.0008208277921066362, "loss": 0.5835, "step": 13805 }, { "epoch": 0.7010953027630059, "grad_norm": 0.049928278726034496, "learning_rate": 0.0008206578517795185, "loss": 0.584, "step": 13810 }, { "epoch": 0.7013491388610374, "grad_norm": 0.06832687556620186, "learning_rate": 0.0008204878485088634, "loss": 0.6406, "step": 13815 }, { "epoch": 0.701602974959069, "grad_norm": 0.08083335473139422, "learning_rate": 0.0008203177823280419, "loss": 0.6479, "step": 13820 }, { "epoch": 0.7018568110571004, "grad_norm": 0.07135721218869323, "learning_rate": 0.000820147653270437, "loss": 0.6405, "step": 13825 }, { "epoch": 0.7021106471551319, "grad_norm": 0.04104888957953317, "learning_rate": 0.0008199774613694447, "loss": 0.5936, "step": 13830 }, { "epoch": 0.7023644832531635, "grad_norm": 0.0875218883592723, "learning_rate": 0.0008198072066584721, "loss": 0.6216, "step": 13835 }, { "epoch": 0.7026183193511949, "grad_norm": 0.07942963393083619, "learning_rate": 0.0008196368891709399, "loss": 0.6315, "step": 13840 }, { "epoch": 0.7028721554492264, "grad_norm": 0.06110437267505552, "learning_rate": 0.0008194665089402804, "loss": 0.5965, "step": 13845 }, { "epoch": 0.703125991547258, "grad_norm": 0.08891102687332698, "learning_rate": 0.0008192960659999383, "loss": 0.6391, "step": 13850 }, { "epoch": 0.7033798276452894, "grad_norm": 0.06448648520699571, "learning_rate": 0.0008191255603833708, "loss": 0.5794, "step": 13855 }, { "epoch": 0.7036336637433209, "grad_norm": 0.030570919417378353, "learning_rate": 0.0008189549921240472, "loss": 0.5855, "step": 13860 }, { "epoch": 0.7038874998413525, "grad_norm": 0.03473828499726488, "learning_rate": 0.0008187843612554493, "loss": 0.6168, "step": 13865 }, { "epoch": 0.704141335939384, "grad_norm": 0.050512426090771084, "learning_rate": 0.0008186136678110711, "loss": 0.6157, "step": 13870 }, { "epoch": 0.7043951720374154, "grad_norm": 0.03331461557177089, "learning_rate": 0.000818442911824419, "loss": 0.6325, "step": 13875 }, { "epoch": 0.704649008135447, "grad_norm": 0.03887589726537461, "learning_rate": 0.0008182720933290111, "loss": 0.619, "step": 13880 }, { "epoch": 0.7049028442334785, "grad_norm": 0.034491361473236455, "learning_rate": 0.0008181012123583786, "loss": 0.6265, "step": 13885 }, { "epoch": 0.7051566803315099, "grad_norm": 0.04939733683559655, "learning_rate": 0.0008179302689460646, "loss": 0.6209, "step": 13890 }, { "epoch": 0.7054105164295414, "grad_norm": 0.036958385336953684, "learning_rate": 0.0008177592631256241, "loss": 0.5847, "step": 13895 }, { "epoch": 0.705664352527573, "grad_norm": 0.0338882190476505, "learning_rate": 0.0008175881949306252, "loss": 0.6022, "step": 13900 }, { "epoch": 0.7059181886256044, "grad_norm": 0.03164427898151885, "learning_rate": 0.0008174170643946472, "loss": 0.6194, "step": 13905 }, { "epoch": 0.7061720247236359, "grad_norm": 0.02788808622745641, "learning_rate": 0.0008172458715512825, "loss": 0.5735, "step": 13910 }, { "epoch": 0.7064258608216675, "grad_norm": 0.05228385785798862, "learning_rate": 0.0008170746164341352, "loss": 0.6066, "step": 13915 }, { "epoch": 0.7066796969196989, "grad_norm": 0.026836897802726748, "learning_rate": 0.0008169032990768221, "loss": 0.6333, "step": 13920 }, { "epoch": 0.7069335330177304, "grad_norm": 0.028759260350827046, "learning_rate": 0.0008167319195129717, "loss": 0.6147, "step": 13925 }, { "epoch": 0.707187369115762, "grad_norm": 0.045590276542868145, "learning_rate": 0.0008165604777762251, "loss": 0.6096, "step": 13930 }, { "epoch": 0.7074412052137935, "grad_norm": 0.025644349892156408, "learning_rate": 0.0008163889739002354, "loss": 0.571, "step": 13935 }, { "epoch": 0.7076950413118249, "grad_norm": 0.03515540024760426, "learning_rate": 0.000816217407918668, "loss": 0.57, "step": 13940 }, { "epoch": 0.7079488774098565, "grad_norm": 0.030510481245704536, "learning_rate": 0.0008160457798652002, "loss": 0.5802, "step": 13945 }, { "epoch": 0.708202713507888, "grad_norm": 0.026068842912907026, "learning_rate": 0.0008158740897735221, "loss": 0.59, "step": 13950 }, { "epoch": 0.7084565496059194, "grad_norm": 0.02799394637765536, "learning_rate": 0.0008157023376773354, "loss": 0.6148, "step": 13955 }, { "epoch": 0.708710385703951, "grad_norm": 0.039004735079580716, "learning_rate": 0.0008155305236103543, "loss": 0.6115, "step": 13960 }, { "epoch": 0.7089642218019825, "grad_norm": 0.028262356505209356, "learning_rate": 0.0008153586476063048, "loss": 0.5613, "step": 13965 }, { "epoch": 0.7092180579000139, "grad_norm": 0.02565901929071906, "learning_rate": 0.0008151867096989256, "loss": 0.5753, "step": 13970 }, { "epoch": 0.7094718939980454, "grad_norm": 0.03604279567945798, "learning_rate": 0.0008150147099219669, "loss": 0.6221, "step": 13975 }, { "epoch": 0.709725730096077, "grad_norm": 0.02599493314932738, "learning_rate": 0.0008148426483091919, "loss": 0.6047, "step": 13980 }, { "epoch": 0.7099795661941085, "grad_norm": 0.03222079110872602, "learning_rate": 0.000814670524894375, "loss": 0.6341, "step": 13985 }, { "epoch": 0.7102334022921399, "grad_norm": 0.027685751102907306, "learning_rate": 0.0008144983397113032, "loss": 0.6027, "step": 13990 }, { "epoch": 0.7104872383901715, "grad_norm": 0.023842700636128793, "learning_rate": 0.000814326092793776, "loss": 0.5593, "step": 13995 }, { "epoch": 0.710741074488203, "grad_norm": 0.04054047646149784, "learning_rate": 0.0008141537841756043, "loss": 0.563, "step": 14000 }, { "epoch": 0.7109949105862344, "grad_norm": 0.028942162080538656, "learning_rate": 0.0008139814138906112, "loss": 0.5957, "step": 14005 }, { "epoch": 0.711248746684266, "grad_norm": 0.03050796562803737, "learning_rate": 0.0008138089819726326, "loss": 0.5865, "step": 14010 }, { "epoch": 0.7115025827822975, "grad_norm": 0.02451193708223736, "learning_rate": 0.0008136364884555158, "loss": 0.5557, "step": 14015 }, { "epoch": 0.7117564188803289, "grad_norm": 0.04559764332264477, "learning_rate": 0.0008134639333731202, "loss": 0.5906, "step": 14020 }, { "epoch": 0.7120102549783605, "grad_norm": 0.03944520888736035, "learning_rate": 0.0008132913167593179, "loss": 0.5703, "step": 14025 }, { "epoch": 0.712264091076392, "grad_norm": 0.02938776092497612, "learning_rate": 0.0008131186386479925, "loss": 0.5766, "step": 14030 }, { "epoch": 0.7125179271744235, "grad_norm": 0.04170810908228807, "learning_rate": 0.0008129458990730398, "loss": 0.5721, "step": 14035 }, { "epoch": 0.712771763272455, "grad_norm": 0.04306584561555878, "learning_rate": 0.0008127730980683677, "loss": 0.6244, "step": 14040 }, { "epoch": 0.7130255993704865, "grad_norm": 0.05080947630039173, "learning_rate": 0.0008126002356678965, "loss": 0.6219, "step": 14045 }, { "epoch": 0.713279435468518, "grad_norm": 0.04665163730281675, "learning_rate": 0.0008124273119055577, "loss": 0.6287, "step": 14050 }, { "epoch": 0.7135332715665494, "grad_norm": 0.023905364788607067, "learning_rate": 0.0008122543268152957, "loss": 0.6114, "step": 14055 }, { "epoch": 0.713787107664581, "grad_norm": 0.031161542177222803, "learning_rate": 0.0008120812804310667, "loss": 0.5888, "step": 14060 }, { "epoch": 0.7140409437626125, "grad_norm": 0.03335446756811596, "learning_rate": 0.0008119081727868386, "loss": 0.6239, "step": 14065 }, { "epoch": 0.7142947798606439, "grad_norm": 0.12183613795209156, "learning_rate": 0.0008117350039165916, "loss": 0.5827, "step": 14070 }, { "epoch": 0.7145486159586755, "grad_norm": 0.031453942182186376, "learning_rate": 0.0008115617738543182, "loss": 0.6154, "step": 14075 }, { "epoch": 0.714802452056707, "grad_norm": 0.03179438032903415, "learning_rate": 0.0008113884826340221, "loss": 0.6307, "step": 14080 }, { "epoch": 0.7150562881547385, "grad_norm": 0.03411034419877854, "learning_rate": 0.0008112151302897198, "loss": 0.6119, "step": 14085 }, { "epoch": 0.71531012425277, "grad_norm": 0.03217809599591784, "learning_rate": 0.0008110417168554396, "loss": 0.5976, "step": 14090 }, { "epoch": 0.7155639603508015, "grad_norm": 0.039786783196638205, "learning_rate": 0.0008108682423652213, "loss": 0.5819, "step": 14095 }, { "epoch": 0.715817796448833, "grad_norm": 0.027681937005003987, "learning_rate": 0.0008106947068531174, "loss": 0.5556, "step": 14100 }, { "epoch": 0.7160716325468645, "grad_norm": 0.046793772542064, "learning_rate": 0.000810521110353192, "loss": 0.6361, "step": 14105 }, { "epoch": 0.716325468644896, "grad_norm": 0.02471149262461818, "learning_rate": 0.0008103474528995213, "loss": 0.5904, "step": 14110 }, { "epoch": 0.7165793047429275, "grad_norm": 0.034604562342302796, "learning_rate": 0.0008101737345261932, "loss": 0.5659, "step": 14115 }, { "epoch": 0.716833140840959, "grad_norm": 0.03117446691261137, "learning_rate": 0.0008099999552673079, "loss": 0.6229, "step": 14120 }, { "epoch": 0.7170869769389905, "grad_norm": 0.0247974767087348, "learning_rate": 0.0008098261151569772, "loss": 0.5935, "step": 14125 }, { "epoch": 0.717340813037022, "grad_norm": 0.03509041523808686, "learning_rate": 0.0008096522142293255, "loss": 0.6033, "step": 14130 }, { "epoch": 0.7175946491350534, "grad_norm": 0.025370999477552076, "learning_rate": 0.0008094782525184881, "loss": 0.6065, "step": 14135 }, { "epoch": 0.717848485233085, "grad_norm": 0.03293482244748489, "learning_rate": 0.0008093042300586132, "loss": 0.5631, "step": 14140 }, { "epoch": 0.7181023213311165, "grad_norm": 0.02484308148359049, "learning_rate": 0.0008091301468838604, "loss": 0.6092, "step": 14145 }, { "epoch": 0.718356157429148, "grad_norm": 0.024837575281261598, "learning_rate": 0.0008089560030284014, "loss": 0.5881, "step": 14150 }, { "epoch": 0.7186099935271795, "grad_norm": 0.03353516289786398, "learning_rate": 0.0008087817985264197, "loss": 0.5782, "step": 14155 }, { "epoch": 0.718863829625211, "grad_norm": 0.03056718458106469, "learning_rate": 0.0008086075334121111, "loss": 0.5962, "step": 14160 }, { "epoch": 0.7191176657232425, "grad_norm": 0.05151128438996498, "learning_rate": 0.0008084332077196824, "loss": 0.5671, "step": 14165 }, { "epoch": 0.719371501821274, "grad_norm": 0.029141462162182278, "learning_rate": 0.0008082588214833534, "loss": 0.6444, "step": 14170 }, { "epoch": 0.7196253379193055, "grad_norm": 0.036675084804481534, "learning_rate": 0.000808084374737355, "loss": 0.5763, "step": 14175 }, { "epoch": 0.719879174017337, "grad_norm": 0.03544199673026975, "learning_rate": 0.0008079098675159302, "loss": 0.5726, "step": 14180 }, { "epoch": 0.7201330101153685, "grad_norm": 0.026411392839355837, "learning_rate": 0.0008077352998533339, "loss": 0.5642, "step": 14185 }, { "epoch": 0.7203868462134, "grad_norm": 0.06222643500106606, "learning_rate": 0.0008075606717838329, "loss": 0.579, "step": 14190 }, { "epoch": 0.7206406823114315, "grad_norm": 0.04757491182106772, "learning_rate": 0.0008073859833417059, "loss": 0.5845, "step": 14195 }, { "epoch": 0.7208945184094631, "grad_norm": 0.03794874490419018, "learning_rate": 0.0008072112345612433, "loss": 0.6024, "step": 14200 }, { "epoch": 0.7211483545074945, "grad_norm": 0.02909593167125118, "learning_rate": 0.0008070364254767475, "loss": 0.5807, "step": 14205 }, { "epoch": 0.721402190605526, "grad_norm": 0.04995516503580864, "learning_rate": 0.0008068615561225324, "loss": 0.6365, "step": 14210 }, { "epoch": 0.7216560267035576, "grad_norm": 0.029511579035933468, "learning_rate": 0.0008066866265329242, "loss": 0.5855, "step": 14215 }, { "epoch": 0.721909862801589, "grad_norm": 0.04837520573816359, "learning_rate": 0.0008065116367422607, "loss": 0.5877, "step": 14220 }, { "epoch": 0.7221636988996205, "grad_norm": 0.029346824274389435, "learning_rate": 0.0008063365867848916, "loss": 0.6129, "step": 14225 }, { "epoch": 0.722417534997652, "grad_norm": 0.03801220854976456, "learning_rate": 0.0008061614766951779, "loss": 0.6121, "step": 14230 }, { "epoch": 0.7226713710956835, "grad_norm": 0.035405249909125754, "learning_rate": 0.0008059863065074934, "loss": 0.5757, "step": 14235 }, { "epoch": 0.722925207193715, "grad_norm": 0.03770160969115643, "learning_rate": 0.0008058110762562227, "loss": 0.6527, "step": 14240 }, { "epoch": 0.7231790432917465, "grad_norm": 0.031787448281367404, "learning_rate": 0.0008056357859757631, "loss": 0.6437, "step": 14245 }, { "epoch": 0.7234328793897781, "grad_norm": 0.3335810711614509, "learning_rate": 0.0008054604357005227, "loss": 0.5659, "step": 14250 }, { "epoch": 0.7236867154878095, "grad_norm": 0.04424485372113206, "learning_rate": 0.000805285025464922, "loss": 0.5756, "step": 14255 }, { "epoch": 0.723940551585841, "grad_norm": 0.03463932389672562, "learning_rate": 0.0008051095553033935, "loss": 0.5662, "step": 14260 }, { "epoch": 0.7241943876838726, "grad_norm": 0.059238021172196606, "learning_rate": 0.0008049340252503808, "loss": 0.6028, "step": 14265 }, { "epoch": 0.724448223781904, "grad_norm": 0.037906766126114544, "learning_rate": 0.0008047584353403396, "loss": 0.6386, "step": 14270 }, { "epoch": 0.7247020598799355, "grad_norm": 0.03614272842990135, "learning_rate": 0.0008045827856077373, "loss": 0.5869, "step": 14275 }, { "epoch": 0.7249558959779671, "grad_norm": 0.025607815005150466, "learning_rate": 0.0008044070760870533, "loss": 0.5728, "step": 14280 }, { "epoch": 0.7252097320759985, "grad_norm": 0.027627783001377804, "learning_rate": 0.0008042313068127781, "loss": 0.639, "step": 14285 }, { "epoch": 0.72546356817403, "grad_norm": 0.043412145360747785, "learning_rate": 0.0008040554778194148, "loss": 0.5497, "step": 14290 }, { "epoch": 0.7257174042720616, "grad_norm": 0.02701727522162977, "learning_rate": 0.0008038795891414774, "loss": 0.6095, "step": 14295 }, { "epoch": 0.7259712403700931, "grad_norm": 0.029754029318448776, "learning_rate": 0.0008037036408134921, "loss": 0.6163, "step": 14300 }, { "epoch": 0.7262250764681245, "grad_norm": 0.04031215844232491, "learning_rate": 0.0008035276328699967, "loss": 0.6099, "step": 14305 }, { "epoch": 0.726478912566156, "grad_norm": 0.029676037361304357, "learning_rate": 0.0008033515653455408, "loss": 0.5771, "step": 14310 }, { "epoch": 0.7267327486641876, "grad_norm": 0.059697029416254835, "learning_rate": 0.0008031754382746854, "loss": 0.5749, "step": 14315 }, { "epoch": 0.726986584762219, "grad_norm": 0.025871394003377013, "learning_rate": 0.0008029992516920033, "loss": 0.5962, "step": 14320 }, { "epoch": 0.7272404208602505, "grad_norm": 0.02569159141087064, "learning_rate": 0.0008028230056320791, "loss": 0.575, "step": 14325 }, { "epoch": 0.7274942569582821, "grad_norm": 0.02738571226532053, "learning_rate": 0.0008026467001295092, "loss": 0.5774, "step": 14330 }, { "epoch": 0.7277480930563135, "grad_norm": 0.036913640747463713, "learning_rate": 0.0008024703352189011, "loss": 0.6074, "step": 14335 }, { "epoch": 0.728001929154345, "grad_norm": 0.02570980610387182, "learning_rate": 0.0008022939109348749, "loss": 0.5959, "step": 14340 }, { "epoch": 0.7282557652523766, "grad_norm": 0.04358525580235644, "learning_rate": 0.0008021174273120615, "loss": 0.5795, "step": 14345 }, { "epoch": 0.728509601350408, "grad_norm": 0.07617898724384416, "learning_rate": 0.0008019408843851037, "loss": 0.7202, "step": 14350 }, { "epoch": 0.7287634374484395, "grad_norm": 0.07466034802353338, "learning_rate": 0.0008017642821886562, "loss": 0.6215, "step": 14355 }, { "epoch": 0.7290172735464711, "grad_norm": 0.06325253244834098, "learning_rate": 0.0008015876207573848, "loss": 0.6182, "step": 14360 }, { "epoch": 0.7292711096445026, "grad_norm": 0.033647850896565544, "learning_rate": 0.0008014109001259675, "loss": 0.62, "step": 14365 }, { "epoch": 0.729524945742534, "grad_norm": 0.0311901569505441, "learning_rate": 0.0008012341203290936, "loss": 0.5985, "step": 14370 }, { "epoch": 0.7297787818405655, "grad_norm": 0.030231478505345966, "learning_rate": 0.0008010572814014643, "loss": 0.6101, "step": 14375 }, { "epoch": 0.7300326179385971, "grad_norm": 0.03148649478982455, "learning_rate": 0.0008008803833777919, "loss": 0.5824, "step": 14380 }, { "epoch": 0.7302864540366285, "grad_norm": 0.030003355989516, "learning_rate": 0.0008007034262928008, "loss": 0.5957, "step": 14385 }, { "epoch": 0.73054029013466, "grad_norm": 0.02917680627459806, "learning_rate": 0.0008005264101812267, "loss": 0.5986, "step": 14390 }, { "epoch": 0.7307941262326916, "grad_norm": 0.02495332231474039, "learning_rate": 0.000800349335077817, "loss": 0.5705, "step": 14395 }, { "epoch": 0.731047962330723, "grad_norm": 0.024227094406430567, "learning_rate": 0.0008001722010173306, "loss": 0.606, "step": 14400 }, { "epoch": 0.7313017984287545, "grad_norm": 0.02497321166367223, "learning_rate": 0.0007999950080345382, "loss": 0.598, "step": 14405 }, { "epoch": 0.7315556345267861, "grad_norm": 0.03244596407041108, "learning_rate": 0.0007998177561642218, "loss": 0.6059, "step": 14410 }, { "epoch": 0.7318094706248176, "grad_norm": 0.031337458561177915, "learning_rate": 0.000799640445441175, "loss": 0.6066, "step": 14415 }, { "epoch": 0.732063306722849, "grad_norm": 0.027382503542512458, "learning_rate": 0.000799463075900203, "loss": 0.6036, "step": 14420 }, { "epoch": 0.7323171428208806, "grad_norm": 0.11807874331082033, "learning_rate": 0.0007992856475761228, "loss": 0.5847, "step": 14425 }, { "epoch": 0.7325709789189121, "grad_norm": 0.05405316379164612, "learning_rate": 0.0007991081605037624, "loss": 0.5619, "step": 14430 }, { "epoch": 0.7328248150169435, "grad_norm": 0.03327425512133745, "learning_rate": 0.0007989306147179618, "loss": 0.5953, "step": 14435 }, { "epoch": 0.733078651114975, "grad_norm": 0.03368726791670784, "learning_rate": 0.0007987530102535723, "loss": 0.5914, "step": 14440 }, { "epoch": 0.7333324872130066, "grad_norm": 0.03358231187464119, "learning_rate": 0.0007985753471454566, "loss": 0.5838, "step": 14445 }, { "epoch": 0.733586323311038, "grad_norm": 0.03261890078454357, "learning_rate": 0.0007983976254284894, "loss": 0.5523, "step": 14450 }, { "epoch": 0.7338401594090695, "grad_norm": 0.05662683445430045, "learning_rate": 0.0007982198451375564, "loss": 0.6053, "step": 14455 }, { "epoch": 0.7340939955071011, "grad_norm": 0.030310060759633013, "learning_rate": 0.0007980420063075551, "loss": 0.6454, "step": 14460 }, { "epoch": 0.7343478316051326, "grad_norm": 0.026572897492377096, "learning_rate": 0.0007978641089733941, "loss": 0.5835, "step": 14465 }, { "epoch": 0.734601667703164, "grad_norm": 0.03738980925312766, "learning_rate": 0.0007976861531699942, "loss": 0.5832, "step": 14470 }, { "epoch": 0.7348555038011956, "grad_norm": 0.04967817488154915, "learning_rate": 0.0007975081389322868, "loss": 0.6216, "step": 14475 }, { "epoch": 0.7351093398992271, "grad_norm": 0.021984750342595057, "learning_rate": 0.0007973300662952155, "loss": 0.5908, "step": 14480 }, { "epoch": 0.7353631759972585, "grad_norm": 0.030283172609555046, "learning_rate": 0.0007971519352937349, "loss": 0.5622, "step": 14485 }, { "epoch": 0.7356170120952901, "grad_norm": 0.025554536052177994, "learning_rate": 0.0007969737459628112, "loss": 0.5918, "step": 14490 }, { "epoch": 0.7358708481933216, "grad_norm": 0.04131262873095309, "learning_rate": 0.0007967954983374224, "loss": 0.6124, "step": 14495 }, { "epoch": 0.736124684291353, "grad_norm": 0.028079868427916613, "learning_rate": 0.0007966171924525573, "loss": 0.5802, "step": 14500 }, { "epoch": 0.7363785203893846, "grad_norm": 0.03973060406341919, "learning_rate": 0.0007964388283432165, "loss": 0.5826, "step": 14505 }, { "epoch": 0.7366323564874161, "grad_norm": 0.02430548491118239, "learning_rate": 0.0007962604060444121, "loss": 0.5866, "step": 14510 }, { "epoch": 0.7368861925854476, "grad_norm": 0.02552835377781297, "learning_rate": 0.0007960819255911673, "loss": 0.5807, "step": 14515 }, { "epoch": 0.737140028683479, "grad_norm": 0.030612793762513597, "learning_rate": 0.0007959033870185173, "loss": 0.5847, "step": 14520 }, { "epoch": 0.7373938647815106, "grad_norm": 0.02107127051595995, "learning_rate": 0.0007957247903615079, "loss": 0.5667, "step": 14525 }, { "epoch": 0.7376477008795421, "grad_norm": 0.022070086673001976, "learning_rate": 0.0007955461356551971, "loss": 0.5777, "step": 14530 }, { "epoch": 0.7379015369775735, "grad_norm": 0.02546159830842789, "learning_rate": 0.0007953674229346537, "loss": 0.5899, "step": 14535 }, { "epoch": 0.7381553730756051, "grad_norm": 0.039042598157433286, "learning_rate": 0.000795188652234958, "loss": 0.5747, "step": 14540 }, { "epoch": 0.7384092091736366, "grad_norm": 0.2906439126670049, "learning_rate": 0.0007950098235912021, "loss": 0.6219, "step": 14545 }, { "epoch": 0.738663045271668, "grad_norm": 0.07072338732370455, "learning_rate": 0.0007948309370384891, "loss": 0.5922, "step": 14550 }, { "epoch": 0.7389168813696996, "grad_norm": 0.47514585488788236, "learning_rate": 0.0007946519926119335, "loss": 0.615, "step": 14555 }, { "epoch": 0.7391707174677311, "grad_norm": 0.0648487542350592, "learning_rate": 0.000794472990346661, "loss": 0.5799, "step": 14560 }, { "epoch": 0.7394245535657625, "grad_norm": 0.03408321166656662, "learning_rate": 0.0007942939302778092, "loss": 0.5847, "step": 14565 }, { "epoch": 0.7396783896637941, "grad_norm": 0.04411672773665418, "learning_rate": 0.0007941148124405264, "loss": 0.6344, "step": 14570 }, { "epoch": 0.7399322257618256, "grad_norm": 0.029167074032466867, "learning_rate": 0.0007939356368699727, "loss": 0.6158, "step": 14575 }, { "epoch": 0.7401860618598571, "grad_norm": 0.030168988293000874, "learning_rate": 0.0007937564036013194, "loss": 0.5652, "step": 14580 }, { "epoch": 0.7404398979578886, "grad_norm": 0.035815596751334416, "learning_rate": 0.000793577112669749, "loss": 0.5966, "step": 14585 }, { "epoch": 0.7406937340559201, "grad_norm": 0.028868811480219827, "learning_rate": 0.0007933977641104555, "loss": 0.6047, "step": 14590 }, { "epoch": 0.7409475701539516, "grad_norm": 0.031907618454550465, "learning_rate": 0.000793218357958644, "loss": 0.5645, "step": 14595 }, { "epoch": 0.741201406251983, "grad_norm": 0.030941483646788018, "learning_rate": 0.0007930388942495312, "loss": 0.6008, "step": 14600 }, { "epoch": 0.7414552423500146, "grad_norm": 0.026271292559618437, "learning_rate": 0.0007928593730183447, "loss": 0.5566, "step": 14605 }, { "epoch": 0.7417090784480461, "grad_norm": 0.030445120700402395, "learning_rate": 0.0007926797943003239, "loss": 0.5926, "step": 14610 }, { "epoch": 0.7419629145460775, "grad_norm": 0.02990094964372429, "learning_rate": 0.0007925001581307189, "loss": 0.5997, "step": 14615 }, { "epoch": 0.7422167506441091, "grad_norm": 0.02511691268212505, "learning_rate": 0.0007923204645447916, "loss": 0.5804, "step": 14620 }, { "epoch": 0.7424705867421406, "grad_norm": 0.041391672490102865, "learning_rate": 0.0007921407135778151, "loss": 0.5659, "step": 14625 }, { "epoch": 0.7427244228401721, "grad_norm": 0.02995128050105831, "learning_rate": 0.0007919609052650734, "loss": 0.6081, "step": 14630 }, { "epoch": 0.7429782589382036, "grad_norm": 0.03127151350046966, "learning_rate": 0.0007917810396418618, "loss": 0.5946, "step": 14635 }, { "epoch": 0.7432320950362351, "grad_norm": 0.03606392489536054, "learning_rate": 0.0007916011167434873, "loss": 0.5931, "step": 14640 }, { "epoch": 0.7434859311342666, "grad_norm": 0.037073684283635416, "learning_rate": 0.000791421136605268, "loss": 0.5968, "step": 14645 }, { "epoch": 0.7437397672322981, "grad_norm": 0.04600230974128539, "learning_rate": 0.0007912410992625326, "loss": 0.6161, "step": 14650 }, { "epoch": 0.7439936033303296, "grad_norm": 0.030237177490696085, "learning_rate": 0.0007910610047506219, "loss": 0.5662, "step": 14655 }, { "epoch": 0.7442474394283611, "grad_norm": 0.04480838241032686, "learning_rate": 0.0007908808531048876, "loss": 0.6055, "step": 14660 }, { "epoch": 0.7445012755263926, "grad_norm": 0.038523733334650756, "learning_rate": 0.0007907006443606924, "loss": 0.6029, "step": 14665 }, { "epoch": 0.7447551116244241, "grad_norm": 0.03737018496192438, "learning_rate": 0.0007905203785534104, "loss": 0.6037, "step": 14670 }, { "epoch": 0.7450089477224556, "grad_norm": 0.09121012906159448, "learning_rate": 0.000790340055718427, "loss": 0.5908, "step": 14675 }, { "epoch": 0.7452627838204872, "grad_norm": 0.024868732926211504, "learning_rate": 0.0007901596758911384, "loss": 0.5974, "step": 14680 }, { "epoch": 0.7455166199185186, "grad_norm": 0.029371473458118812, "learning_rate": 0.0007899792391069527, "loss": 0.5968, "step": 14685 }, { "epoch": 0.7457704560165501, "grad_norm": 0.030092147058257405, "learning_rate": 0.0007897987454012885, "loss": 0.5867, "step": 14690 }, { "epoch": 0.7460242921145817, "grad_norm": 0.06169934776691142, "learning_rate": 0.0007896181948095755, "loss": 0.6084, "step": 14695 }, { "epoch": 0.7462781282126131, "grad_norm": 0.03499538883906122, "learning_rate": 0.0007894375873672555, "loss": 0.5695, "step": 14700 }, { "epoch": 0.7465319643106446, "grad_norm": 0.03834546473926501, "learning_rate": 0.0007892569231097804, "loss": 0.6104, "step": 14705 }, { "epoch": 0.7467858004086761, "grad_norm": 0.054730939898757484, "learning_rate": 0.0007890762020726136, "loss": 0.5776, "step": 14710 }, { "epoch": 0.7470396365067076, "grad_norm": 0.024843870205877142, "learning_rate": 0.0007888954242912303, "loss": 0.5656, "step": 14715 }, { "epoch": 0.7472934726047391, "grad_norm": 0.03349184814575679, "learning_rate": 0.0007887145898011158, "loss": 0.5901, "step": 14720 }, { "epoch": 0.7475473087027706, "grad_norm": 0.049461573887305814, "learning_rate": 0.0007885336986377671, "loss": 0.5508, "step": 14725 }, { "epoch": 0.7478011448008022, "grad_norm": 0.03552849919599442, "learning_rate": 0.0007883527508366923, "loss": 0.5951, "step": 14730 }, { "epoch": 0.7480549808988336, "grad_norm": 0.028690443617235013, "learning_rate": 0.0007881717464334104, "loss": 0.5891, "step": 14735 }, { "epoch": 0.7483088169968651, "grad_norm": 0.04167848811425972, "learning_rate": 0.000787990685463452, "loss": 0.562, "step": 14740 }, { "epoch": 0.7485626530948967, "grad_norm": 0.05775833745744054, "learning_rate": 0.000787809567962358, "loss": 0.5688, "step": 14745 }, { "epoch": 0.7488164891929281, "grad_norm": 0.026872351737841308, "learning_rate": 0.0007876283939656814, "loss": 0.5846, "step": 14750 }, { "epoch": 0.7490703252909596, "grad_norm": 0.03924993939110344, "learning_rate": 0.0007874471635089853, "loss": 0.5622, "step": 14755 }, { "epoch": 0.7493241613889912, "grad_norm": 0.04221861220551446, "learning_rate": 0.0007872658766278444, "loss": 0.5605, "step": 14760 }, { "epoch": 0.7495779974870226, "grad_norm": 0.026884242563953744, "learning_rate": 0.0007870845333578447, "loss": 0.5434, "step": 14765 }, { "epoch": 0.7498318335850541, "grad_norm": 0.05659339772085351, "learning_rate": 0.0007869031337345828, "loss": 0.5545, "step": 14770 }, { "epoch": 0.7500856696830857, "grad_norm": 0.04676635774385785, "learning_rate": 0.0007867216777936665, "loss": 0.6038, "step": 14775 }, { "epoch": 0.7503395057811171, "grad_norm": 0.03555192654015796, "learning_rate": 0.0007865401655707148, "loss": 0.628, "step": 14780 }, { "epoch": 0.7505933418791486, "grad_norm": 0.02697674684520458, "learning_rate": 0.0007863585971013574, "loss": 0.6192, "step": 14785 }, { "epoch": 0.7508471779771801, "grad_norm": 0.032422868044355145, "learning_rate": 0.0007861769724212353, "loss": 0.5789, "step": 14790 }, { "epoch": 0.7511010140752117, "grad_norm": 0.0308503473942792, "learning_rate": 0.0007859952915660009, "loss": 0.6233, "step": 14795 }, { "epoch": 0.7513548501732431, "grad_norm": 0.024092818055271237, "learning_rate": 0.000785813554571317, "loss": 0.5842, "step": 14800 }, { "epoch": 0.7516086862712746, "grad_norm": 0.03344296842370167, "learning_rate": 0.0007856317614728578, "loss": 0.6261, "step": 14805 }, { "epoch": 0.7518625223693062, "grad_norm": 0.037965967888487176, "learning_rate": 0.0007854499123063081, "loss": 0.5733, "step": 14810 }, { "epoch": 0.7521163584673376, "grad_norm": 0.04191928838742156, "learning_rate": 0.0007852680071073644, "loss": 0.6117, "step": 14815 }, { "epoch": 0.7523701945653691, "grad_norm": 0.04383694642505963, "learning_rate": 0.0007850860459117332, "loss": 0.5831, "step": 14820 }, { "epoch": 0.7526240306634007, "grad_norm": 0.037614692321617, "learning_rate": 0.0007849040287551332, "loss": 0.5871, "step": 14825 }, { "epoch": 0.7528778667614321, "grad_norm": 0.02516403676981507, "learning_rate": 0.0007847219556732929, "loss": 0.5927, "step": 14830 }, { "epoch": 0.7531317028594636, "grad_norm": 0.041692616391361986, "learning_rate": 0.0007845398267019528, "loss": 0.6591, "step": 14835 }, { "epoch": 0.7533855389574952, "grad_norm": 0.025964287691236976, "learning_rate": 0.0007843576418768637, "loss": 0.6005, "step": 14840 }, { "epoch": 0.7536393750555267, "grad_norm": 0.03353359440550333, "learning_rate": 0.0007841754012337876, "loss": 0.579, "step": 14845 }, { "epoch": 0.7538932111535581, "grad_norm": 0.055412308005210985, "learning_rate": 0.0007839931048084971, "loss": 0.6126, "step": 14850 }, { "epoch": 0.7541470472515897, "grad_norm": 0.04895600693106303, "learning_rate": 0.0007838107526367768, "loss": 0.62, "step": 14855 }, { "epoch": 0.7544008833496212, "grad_norm": 0.03477352614622541, "learning_rate": 0.0007836283447544211, "loss": 0.585, "step": 14860 }, { "epoch": 0.7546547194476526, "grad_norm": 0.03588526651863759, "learning_rate": 0.0007834458811972356, "loss": 0.5851, "step": 14865 }, { "epoch": 0.7549085555456841, "grad_norm": 0.05624481095210902, "learning_rate": 0.0007832633620010372, "loss": 0.6079, "step": 14870 }, { "epoch": 0.7551623916437157, "grad_norm": 0.04357936006581149, "learning_rate": 0.0007830807872016536, "loss": 0.6187, "step": 14875 }, { "epoch": 0.7554162277417471, "grad_norm": 0.040083961110080835, "learning_rate": 0.000782898156834923, "loss": 0.6431, "step": 14880 }, { "epoch": 0.7556700638397786, "grad_norm": 0.07764594003089959, "learning_rate": 0.000782715470936695, "loss": 0.5915, "step": 14885 }, { "epoch": 0.7559238999378102, "grad_norm": 0.030985982015469513, "learning_rate": 0.0007825327295428302, "loss": 0.5931, "step": 14890 }, { "epoch": 0.7561777360358417, "grad_norm": 0.029912753853148843, "learning_rate": 0.0007823499326891994, "loss": 0.6124, "step": 14895 }, { "epoch": 0.7564315721338731, "grad_norm": 0.028550336668593645, "learning_rate": 0.000782167080411685, "loss": 0.5834, "step": 14900 }, { "epoch": 0.7566854082319047, "grad_norm": 0.028003935514170488, "learning_rate": 0.0007819841727461798, "loss": 0.6129, "step": 14905 }, { "epoch": 0.7569392443299362, "grad_norm": 0.030636633612124004, "learning_rate": 0.0007818012097285876, "loss": 0.6299, "step": 14910 }, { "epoch": 0.7571930804279676, "grad_norm": 0.05006422454029914, "learning_rate": 0.0007816181913948235, "loss": 0.6086, "step": 14915 }, { "epoch": 0.7574469165259992, "grad_norm": 0.026143934888707424, "learning_rate": 0.0007814351177808128, "loss": 0.6139, "step": 14920 }, { "epoch": 0.7577007526240307, "grad_norm": 1.2578760148102972, "learning_rate": 0.000781251988922492, "loss": 0.8194, "step": 14925 }, { "epoch": 0.7579545887220621, "grad_norm": 0.10182912331451577, "learning_rate": 0.0007810688048558083, "loss": 0.6552, "step": 14930 }, { "epoch": 0.7582084248200937, "grad_norm": 0.1212458030523509, "learning_rate": 0.00078088556561672, "loss": 0.6837, "step": 14935 }, { "epoch": 0.7584622609181252, "grad_norm": 0.10362361037228977, "learning_rate": 0.0007807022712411957, "loss": 0.6756, "step": 14940 }, { "epoch": 0.7587160970161567, "grad_norm": 0.04440285687666505, "learning_rate": 0.0007805189217652158, "loss": 0.6408, "step": 14945 }, { "epoch": 0.7589699331141881, "grad_norm": 0.05277231543250202, "learning_rate": 0.0007803355172247702, "loss": 0.6197, "step": 14950 }, { "epoch": 0.7592237692122197, "grad_norm": 0.031231568656463315, "learning_rate": 0.0007801520576558608, "loss": 0.6298, "step": 14955 }, { "epoch": 0.7594776053102512, "grad_norm": 0.0345196918831133, "learning_rate": 0.0007799685430944995, "loss": 0.5926, "step": 14960 }, { "epoch": 0.7597314414082826, "grad_norm": 0.03315835092152511, "learning_rate": 0.0007797849735767094, "loss": 0.5859, "step": 14965 }, { "epoch": 0.7599852775063142, "grad_norm": 0.05487099815278874, "learning_rate": 0.0007796013491385243, "loss": 0.6047, "step": 14970 }, { "epoch": 0.7602391136043457, "grad_norm": 0.0305550620003498, "learning_rate": 0.0007794176698159887, "loss": 0.5898, "step": 14975 }, { "epoch": 0.7604929497023771, "grad_norm": 0.03492544688949025, "learning_rate": 0.000779233935645158, "loss": 0.6285, "step": 14980 }, { "epoch": 0.7607467858004087, "grad_norm": 0.03623180099708256, "learning_rate": 0.0007790501466620983, "loss": 0.6035, "step": 14985 }, { "epoch": 0.7610006218984402, "grad_norm": 0.04180008581977664, "learning_rate": 0.0007788663029028863, "loss": 0.5536, "step": 14990 }, { "epoch": 0.7612544579964716, "grad_norm": 0.030734490730055696, "learning_rate": 0.0007786824044036098, "loss": 0.5731, "step": 14995 }, { "epoch": 0.7615082940945032, "grad_norm": 0.055420433682433455, "learning_rate": 0.0007784984512003671, "loss": 0.6263, "step": 15000 }, { "epoch": 0.7617621301925347, "grad_norm": 0.02941885467547894, "learning_rate": 0.0007783144433292673, "loss": 0.6284, "step": 15005 }, { "epoch": 0.7620159662905662, "grad_norm": 0.033206110423357985, "learning_rate": 0.0007781303808264303, "loss": 0.5358, "step": 15010 }, { "epoch": 0.7622698023885977, "grad_norm": 0.02625796801377728, "learning_rate": 0.0007779462637279865, "loss": 0.5969, "step": 15015 }, { "epoch": 0.7625236384866292, "grad_norm": 0.29298031514322703, "learning_rate": 0.0007777620920700773, "loss": 0.5957, "step": 15020 }, { "epoch": 0.7627774745846607, "grad_norm": 0.04086600855995801, "learning_rate": 0.0007775778658888546, "loss": 0.6322, "step": 15025 }, { "epoch": 0.7630313106826921, "grad_norm": 0.039401332495816975, "learning_rate": 0.000777393585220481, "loss": 0.5687, "step": 15030 }, { "epoch": 0.7632851467807237, "grad_norm": 0.026702482139940326, "learning_rate": 0.0007772092501011301, "loss": 0.6218, "step": 15035 }, { "epoch": 0.7635389828787552, "grad_norm": 0.050236563570187805, "learning_rate": 0.0007770248605669858, "loss": 0.5861, "step": 15040 }, { "epoch": 0.7637928189767866, "grad_norm": 0.02874533178510391, "learning_rate": 0.0007768404166542431, "loss": 0.5967, "step": 15045 }, { "epoch": 0.7640466550748182, "grad_norm": 0.02753978513413307, "learning_rate": 0.000776655918399107, "loss": 0.5802, "step": 15050 }, { "epoch": 0.7643004911728497, "grad_norm": 0.037996461348599846, "learning_rate": 0.0007764713658377938, "loss": 0.5915, "step": 15055 }, { "epoch": 0.7645543272708812, "grad_norm": 0.06312266025775402, "learning_rate": 0.0007762867590065302, "loss": 0.5816, "step": 15060 }, { "epoch": 0.7648081633689127, "grad_norm": 0.04647511323225117, "learning_rate": 0.0007761020979415537, "loss": 0.6081, "step": 15065 }, { "epoch": 0.7650619994669442, "grad_norm": 0.0350344173940654, "learning_rate": 0.0007759173826791123, "loss": 0.5988, "step": 15070 }, { "epoch": 0.7653158355649757, "grad_norm": 0.026503015783973544, "learning_rate": 0.0007757326132554648, "loss": 0.5924, "step": 15075 }, { "epoch": 0.7655696716630072, "grad_norm": 0.03832259438303605, "learning_rate": 0.0007755477897068803, "loss": 0.6175, "step": 15080 }, { "epoch": 0.7658235077610387, "grad_norm": 0.09757514516462057, "learning_rate": 0.0007753629120696388, "loss": 0.6075, "step": 15085 }, { "epoch": 0.7660773438590702, "grad_norm": 0.05636880292552427, "learning_rate": 0.000775177980380031, "loss": 0.5902, "step": 15090 }, { "epoch": 0.7663311799571016, "grad_norm": 0.044508398900761414, "learning_rate": 0.0007749929946743578, "loss": 0.6134, "step": 15095 }, { "epoch": 0.7665850160551332, "grad_norm": 0.02889076972000296, "learning_rate": 0.0007748079549889312, "loss": 0.5796, "step": 15100 }, { "epoch": 0.7668388521531647, "grad_norm": 0.038063937210235835, "learning_rate": 0.0007746228613600735, "loss": 0.5813, "step": 15105 }, { "epoch": 0.7670926882511963, "grad_norm": 0.09708033315517485, "learning_rate": 0.0007744377138241177, "loss": 0.5844, "step": 15110 }, { "epoch": 0.7673465243492277, "grad_norm": 0.06930206252274214, "learning_rate": 0.0007742525124174073, "loss": 0.6186, "step": 15115 }, { "epoch": 0.7676003604472592, "grad_norm": 0.03248594459557167, "learning_rate": 0.0007740672571762963, "loss": 0.6108, "step": 15120 }, { "epoch": 0.7678541965452907, "grad_norm": 0.0358203234252936, "learning_rate": 0.0007738819481371495, "loss": 0.5629, "step": 15125 }, { "epoch": 0.7681080326433222, "grad_norm": 0.0446450825233111, "learning_rate": 0.0007736965853363423, "loss": 0.5974, "step": 15130 }, { "epoch": 0.7683618687413537, "grad_norm": 0.03164679167642612, "learning_rate": 0.0007735111688102602, "loss": 0.6547, "step": 15135 }, { "epoch": 0.7686157048393852, "grad_norm": 0.03064186972892073, "learning_rate": 0.0007733256985952997, "loss": 0.6022, "step": 15140 }, { "epoch": 0.7688695409374167, "grad_norm": 0.34573913673428947, "learning_rate": 0.0007731401747278676, "loss": 0.5827, "step": 15145 }, { "epoch": 0.7691233770354482, "grad_norm": 0.13966486620739513, "learning_rate": 0.0007729545972443812, "loss": 0.6077, "step": 15150 }, { "epoch": 0.7693772131334797, "grad_norm": 0.029192487039169105, "learning_rate": 0.000772768966181269, "loss": 0.5599, "step": 15155 }, { "epoch": 0.7696310492315113, "grad_norm": 0.03124263927843463, "learning_rate": 0.0007725832815749686, "loss": 0.6285, "step": 15160 }, { "epoch": 0.7698848853295427, "grad_norm": 0.024326930348444097, "learning_rate": 0.0007723975434619296, "loss": 0.5531, "step": 15165 }, { "epoch": 0.7701387214275742, "grad_norm": 0.02512413491231233, "learning_rate": 0.0007722117518786112, "loss": 0.5537, "step": 15170 }, { "epoch": 0.7703925575256058, "grad_norm": 0.031772939647128885, "learning_rate": 0.0007720259068614836, "loss": 0.6025, "step": 15175 }, { "epoch": 0.7706463936236372, "grad_norm": 0.043947294185195836, "learning_rate": 0.0007718400084470267, "loss": 0.6034, "step": 15180 }, { "epoch": 0.7709002297216687, "grad_norm": 0.03222419275981874, "learning_rate": 0.0007716540566717321, "loss": 0.5981, "step": 15185 }, { "epoch": 0.7711540658197003, "grad_norm": 0.03819007335282729, "learning_rate": 0.0007714680515721008, "loss": 0.5963, "step": 15190 }, { "epoch": 0.7714079019177317, "grad_norm": 0.04093973435288178, "learning_rate": 0.0007712819931846448, "loss": 0.613, "step": 15195 }, { "epoch": 0.7716617380157632, "grad_norm": 0.03695759978176583, "learning_rate": 0.0007710958815458866, "loss": 0.5745, "step": 15200 }, { "epoch": 0.7719155741137947, "grad_norm": 0.027960391790393533, "learning_rate": 0.0007709097166923586, "loss": 0.5462, "step": 15205 }, { "epoch": 0.7721694102118263, "grad_norm": 0.05757494108734914, "learning_rate": 0.0007707234986606043, "loss": 0.615, "step": 15210 }, { "epoch": 0.7724232463098577, "grad_norm": 0.04085819775177726, "learning_rate": 0.0007705372274871774, "loss": 0.61, "step": 15215 }, { "epoch": 0.7726770824078892, "grad_norm": 0.04422316744214125, "learning_rate": 0.0007703509032086417, "loss": 0.5625, "step": 15220 }, { "epoch": 0.7729309185059208, "grad_norm": 0.029609165102383688, "learning_rate": 0.0007701645258615721, "loss": 0.5884, "step": 15225 }, { "epoch": 0.7731847546039522, "grad_norm": 0.025139781930555237, "learning_rate": 0.0007699780954825534, "loss": 0.599, "step": 15230 }, { "epoch": 0.7734385907019837, "grad_norm": 0.02759229289936248, "learning_rate": 0.0007697916121081809, "loss": 0.5581, "step": 15235 }, { "epoch": 0.7736924268000153, "grad_norm": 0.022703912785005373, "learning_rate": 0.0007696050757750603, "loss": 0.5604, "step": 15240 }, { "epoch": 0.7739462628980467, "grad_norm": 0.026623783300295627, "learning_rate": 0.000769418486519808, "loss": 0.5961, "step": 15245 }, { "epoch": 0.7742000989960782, "grad_norm": 0.026127263841029254, "learning_rate": 0.0007692318443790503, "loss": 0.5585, "step": 15250 }, { "epoch": 0.7744539350941098, "grad_norm": 0.02569593152061675, "learning_rate": 0.0007690451493894241, "loss": 0.6041, "step": 15255 }, { "epoch": 0.7747077711921412, "grad_norm": 0.0256631736374656, "learning_rate": 0.0007688584015875769, "loss": 0.6003, "step": 15260 }, { "epoch": 0.7749616072901727, "grad_norm": 0.03236587398767528, "learning_rate": 0.0007686716010101663, "loss": 0.642, "step": 15265 }, { "epoch": 0.7752154433882043, "grad_norm": 0.03721845153105975, "learning_rate": 0.0007684847476938601, "loss": 0.5819, "step": 15270 }, { "epoch": 0.7754692794862358, "grad_norm": 0.028027879120731618, "learning_rate": 0.0007682978416753371, "loss": 0.5795, "step": 15275 }, { "epoch": 0.7757231155842672, "grad_norm": 0.02737803847928579, "learning_rate": 0.0007681108829912857, "loss": 0.5746, "step": 15280 }, { "epoch": 0.7759769516822987, "grad_norm": 0.027303438264093486, "learning_rate": 0.0007679238716784049, "loss": 0.8898, "step": 15285 }, { "epoch": 0.7762307877803303, "grad_norm": 0.036685326663591804, "learning_rate": 0.0007677368077734045, "loss": 0.5854, "step": 15290 }, { "epoch": 0.7764846238783617, "grad_norm": 0.1602053406884727, "learning_rate": 0.0007675496913130038, "loss": 0.6604, "step": 15295 }, { "epoch": 0.7767384599763932, "grad_norm": 0.04847230882404775, "learning_rate": 0.0007673625223339329, "loss": 0.623, "step": 15300 }, { "epoch": 0.7769922960744248, "grad_norm": 0.05089648219518116, "learning_rate": 0.0007671753008729323, "loss": 0.6436, "step": 15305 }, { "epoch": 0.7772461321724562, "grad_norm": 0.07145564787304685, "learning_rate": 0.0007669880269667524, "loss": 0.5677, "step": 15310 }, { "epoch": 0.7774999682704877, "grad_norm": 0.058442914245472245, "learning_rate": 0.0007668007006521544, "loss": 0.6102, "step": 15315 }, { "epoch": 0.7777538043685193, "grad_norm": 0.04062643001361585, "learning_rate": 0.0007666133219659094, "loss": 0.5994, "step": 15320 }, { "epoch": 0.7780076404665508, "grad_norm": 0.04022904195831844, "learning_rate": 0.0007664258909447989, "loss": 0.6411, "step": 15325 }, { "epoch": 0.7782614765645822, "grad_norm": 0.02659845024976935, "learning_rate": 0.0007662384076256146, "loss": 0.6142, "step": 15330 }, { "epoch": 0.7785153126626138, "grad_norm": 0.04785803634447859, "learning_rate": 0.0007660508720451585, "loss": 0.6396, "step": 15335 }, { "epoch": 0.7787691487606453, "grad_norm": 0.061095944021831515, "learning_rate": 0.0007658632842402432, "loss": 0.5819, "step": 15340 }, { "epoch": 0.7790229848586767, "grad_norm": 0.030130348283759538, "learning_rate": 0.0007656756442476911, "loss": 0.5868, "step": 15345 }, { "epoch": 0.7792768209567082, "grad_norm": 0.028351823717474526, "learning_rate": 0.0007654879521043347, "loss": 0.5797, "step": 15350 }, { "epoch": 0.7795306570547398, "grad_norm": 0.050575288848887096, "learning_rate": 0.0007653002078470175, "loss": 0.6096, "step": 15355 }, { "epoch": 0.7797844931527712, "grad_norm": 0.05292190850284731, "learning_rate": 0.0007651124115125924, "loss": 0.5891, "step": 15360 }, { "epoch": 0.7800383292508027, "grad_norm": 0.03779146555526197, "learning_rate": 0.0007649245631379232, "loss": 0.5974, "step": 15365 }, { "epoch": 0.7802921653488343, "grad_norm": 0.03100086787490601, "learning_rate": 0.0007647366627598835, "loss": 0.566, "step": 15370 }, { "epoch": 0.7805460014468658, "grad_norm": 0.030255527750173147, "learning_rate": 0.0007645487104153568, "loss": 0.6128, "step": 15375 }, { "epoch": 0.7807998375448972, "grad_norm": 0.027165593937075252, "learning_rate": 0.0007643607061412379, "loss": 0.5686, "step": 15380 }, { "epoch": 0.7810536736429288, "grad_norm": 0.02824615720081996, "learning_rate": 0.0007641726499744306, "loss": 0.582, "step": 15385 }, { "epoch": 0.7813075097409603, "grad_norm": 0.04126298838740098, "learning_rate": 0.0007639845419518494, "loss": 0.6027, "step": 15390 }, { "epoch": 0.7815613458389917, "grad_norm": 0.032036059387028706, "learning_rate": 0.0007637963821104192, "loss": 0.5775, "step": 15395 }, { "epoch": 0.7818151819370233, "grad_norm": 0.028486079119276198, "learning_rate": 0.0007636081704870749, "loss": 0.5682, "step": 15400 }, { "epoch": 0.7820690180350548, "grad_norm": 0.024905654752730703, "learning_rate": 0.0007634199071187613, "loss": 0.5981, "step": 15405 }, { "epoch": 0.7823228541330862, "grad_norm": 0.17057789658969538, "learning_rate": 0.0007632315920424335, "loss": 0.5801, "step": 15410 }, { "epoch": 0.7825766902311178, "grad_norm": 0.03659353032866976, "learning_rate": 0.000763043225295057, "loss": 0.5597, "step": 15415 }, { "epoch": 0.7828305263291493, "grad_norm": 0.026725388304567464, "learning_rate": 0.0007628548069136071, "loss": 0.5931, "step": 15420 }, { "epoch": 0.7830843624271808, "grad_norm": 0.037625318866698565, "learning_rate": 0.0007626663369350695, "loss": 0.538, "step": 15425 }, { "epoch": 0.7833381985252122, "grad_norm": 0.03956094460634958, "learning_rate": 0.0007624778153964398, "loss": 0.5758, "step": 15430 }, { "epoch": 0.7835920346232438, "grad_norm": 0.02719027610400642, "learning_rate": 0.0007622892423347241, "loss": 0.6001, "step": 15435 }, { "epoch": 0.7838458707212753, "grad_norm": 0.03100928874647277, "learning_rate": 0.000762100617786938, "loss": 0.6138, "step": 15440 }, { "epoch": 0.7840997068193067, "grad_norm": 0.027836970066281757, "learning_rate": 0.0007619119417901077, "loss": 0.6017, "step": 15445 }, { "epoch": 0.7843535429173383, "grad_norm": 0.03469260516526115, "learning_rate": 0.0007617232143812693, "loss": 0.5756, "step": 15450 }, { "epoch": 0.7846073790153698, "grad_norm": 0.03935777037891353, "learning_rate": 0.0007615344355974694, "loss": 0.595, "step": 15455 }, { "epoch": 0.7848612151134012, "grad_norm": 0.02501991833734147, "learning_rate": 0.0007613456054757639, "loss": 0.6073, "step": 15460 }, { "epoch": 0.7851150512114328, "grad_norm": 0.02333012899999052, "learning_rate": 0.0007611567240532193, "loss": 0.5868, "step": 15465 }, { "epoch": 0.7853688873094643, "grad_norm": 0.03818225366150291, "learning_rate": 0.0007609677913669124, "loss": 0.599, "step": 15470 }, { "epoch": 0.7856227234074957, "grad_norm": 0.028704075914218637, "learning_rate": 0.0007607788074539293, "loss": 0.5973, "step": 15475 }, { "epoch": 0.7858765595055273, "grad_norm": 0.021928136458700793, "learning_rate": 0.0007605897723513669, "loss": 0.593, "step": 15480 }, { "epoch": 0.7861303956035588, "grad_norm": 0.025069154514387224, "learning_rate": 0.0007604006860963315, "loss": 0.5762, "step": 15485 }, { "epoch": 0.7863842317015903, "grad_norm": 0.029790848879588383, "learning_rate": 0.0007602115487259403, "loss": 0.5952, "step": 15490 }, { "epoch": 0.7866380677996218, "grad_norm": 0.030994892822406125, "learning_rate": 0.0007600223602773198, "loss": 0.6024, "step": 15495 }, { "epoch": 0.7868919038976533, "grad_norm": 0.028391213579639876, "learning_rate": 0.0007598331207876066, "loss": 0.58, "step": 15500 }, { "epoch": 0.7871457399956848, "grad_norm": 0.04047000777595839, "learning_rate": 0.0007596438302939475, "loss": 0.5813, "step": 15505 }, { "epoch": 0.7873995760937162, "grad_norm": 0.032259318761633765, "learning_rate": 0.0007594544888334994, "loss": 0.583, "step": 15510 }, { "epoch": 0.7876534121917478, "grad_norm": 0.036383271252755846, "learning_rate": 0.0007592650964434292, "loss": 0.6082, "step": 15515 }, { "epoch": 0.7879072482897793, "grad_norm": 0.046019554411827555, "learning_rate": 0.0007590756531609133, "loss": 0.6063, "step": 15520 }, { "epoch": 0.7881610843878107, "grad_norm": 0.027578051274795613, "learning_rate": 0.0007588861590231388, "loss": 0.5804, "step": 15525 }, { "epoch": 0.7884149204858423, "grad_norm": 0.030164791483657458, "learning_rate": 0.0007586966140673024, "loss": 0.5828, "step": 15530 }, { "epoch": 0.7886687565838738, "grad_norm": 0.029268754849734627, "learning_rate": 0.0007585070183306106, "loss": 0.5624, "step": 15535 }, { "epoch": 0.7889225926819053, "grad_norm": 0.026293524029202597, "learning_rate": 0.0007583173718502803, "loss": 0.5694, "step": 15540 }, { "epoch": 0.7891764287799368, "grad_norm": 0.03225939541940473, "learning_rate": 0.0007581276746635383, "loss": 0.5923, "step": 15545 }, { "epoch": 0.7894302648779683, "grad_norm": 0.02421244006898213, "learning_rate": 0.000757937926807621, "loss": 0.5453, "step": 15550 }, { "epoch": 0.7896841009759998, "grad_norm": 0.03888331896268403, "learning_rate": 0.0007577481283197749, "loss": 0.6002, "step": 15555 }, { "epoch": 0.7899379370740313, "grad_norm": 0.031178413757377548, "learning_rate": 0.0007575582792372567, "loss": 0.5885, "step": 15560 }, { "epoch": 0.7901917731720628, "grad_norm": 0.02453845415254218, "learning_rate": 0.0007573683795973328, "loss": 0.5623, "step": 15565 }, { "epoch": 0.7904456092700943, "grad_norm": 0.03542656617397271, "learning_rate": 0.0007571784294372792, "loss": 0.6026, "step": 15570 }, { "epoch": 0.7906994453681258, "grad_norm": 0.024593970240234127, "learning_rate": 0.0007569884287943826, "loss": 0.5946, "step": 15575 }, { "epoch": 0.7909532814661573, "grad_norm": 0.034196088349742554, "learning_rate": 0.000756798377705939, "loss": 0.6394, "step": 15580 }, { "epoch": 0.7912071175641888, "grad_norm": 0.03593770916178315, "learning_rate": 0.0007566082762092546, "loss": 0.6134, "step": 15585 }, { "epoch": 0.7914609536622204, "grad_norm": 0.02744220052589826, "learning_rate": 0.0007564181243416453, "loss": 0.574, "step": 15590 }, { "epoch": 0.7917147897602518, "grad_norm": 0.04256423578944382, "learning_rate": 0.0007562279221404368, "loss": 0.5861, "step": 15595 }, { "epoch": 0.7919686258582833, "grad_norm": 0.036644059764111316, "learning_rate": 0.0007560376696429651, "loss": 0.5489, "step": 15600 }, { "epoch": 0.7922224619563148, "grad_norm": 0.03260114588100312, "learning_rate": 0.0007558473668865755, "loss": 0.5637, "step": 15605 }, { "epoch": 0.7924762980543463, "grad_norm": 0.024808409878328188, "learning_rate": 0.0007556570139086239, "loss": 0.593, "step": 15610 }, { "epoch": 0.7927301341523778, "grad_norm": 0.04023189432619606, "learning_rate": 0.0007554666107464754, "loss": 0.5664, "step": 15615 }, { "epoch": 0.7929839702504093, "grad_norm": 0.02342950434065168, "learning_rate": 0.0007552761574375052, "loss": 0.5895, "step": 15620 }, { "epoch": 0.7932378063484408, "grad_norm": 0.03876234729699636, "learning_rate": 0.0007550856540190985, "loss": 0.5723, "step": 15625 }, { "epoch": 0.7934916424464723, "grad_norm": 0.023809875697390476, "learning_rate": 0.0007548951005286498, "loss": 0.5897, "step": 15630 }, { "epoch": 0.7937454785445038, "grad_norm": 0.025531257953473226, "learning_rate": 0.0007547044970035641, "loss": 0.5728, "step": 15635 }, { "epoch": 0.7939993146425354, "grad_norm": 0.023549442711572447, "learning_rate": 0.0007545138434812559, "loss": 0.5479, "step": 15640 }, { "epoch": 0.7942531507405668, "grad_norm": 0.03393140892826125, "learning_rate": 0.0007543231399991495, "loss": 0.591, "step": 15645 }, { "epoch": 0.7945069868385983, "grad_norm": 0.026850249760398237, "learning_rate": 0.0007541323865946789, "loss": 0.5756, "step": 15650 }, { "epoch": 0.7947608229366299, "grad_norm": 0.02644298213479883, "learning_rate": 0.0007539415833052882, "loss": 0.6027, "step": 15655 }, { "epoch": 0.7950146590346613, "grad_norm": 1.7634760340274238, "learning_rate": 0.0007537507301684312, "loss": 0.6009, "step": 15660 }, { "epoch": 0.7952684951326928, "grad_norm": 0.05116113095501194, "learning_rate": 0.0007535598272215712, "loss": 0.6035, "step": 15665 }, { "epoch": 0.7955223312307244, "grad_norm": 0.09123746164702429, "learning_rate": 0.0007533688745021817, "loss": 0.5869, "step": 15670 }, { "epoch": 0.7957761673287558, "grad_norm": 0.04008705946671749, "learning_rate": 0.0007531778720477457, "loss": 0.6197, "step": 15675 }, { "epoch": 0.7960300034267873, "grad_norm": 0.2577476743899855, "learning_rate": 0.000752986819895756, "loss": 0.6145, "step": 15680 }, { "epoch": 0.7962838395248188, "grad_norm": 0.04408392445254025, "learning_rate": 0.0007527957180837152, "loss": 0.6326, "step": 15685 }, { "epoch": 0.7965376756228503, "grad_norm": 0.03340637676201848, "learning_rate": 0.0007526045666491355, "loss": 0.6089, "step": 15690 }, { "epoch": 0.7967915117208818, "grad_norm": 0.03220967045008419, "learning_rate": 0.0007524133656295392, "loss": 0.5869, "step": 15695 }, { "epoch": 0.7970453478189133, "grad_norm": 0.03674143282021481, "learning_rate": 0.0007522221150624579, "loss": 0.6527, "step": 15700 }, { "epoch": 0.7972991839169449, "grad_norm": 0.02344499359362633, "learning_rate": 0.0007520308149854336, "loss": 0.5767, "step": 15705 }, { "epoch": 0.7975530200149763, "grad_norm": 0.030359982821470545, "learning_rate": 0.0007518394654360169, "loss": 0.5843, "step": 15710 }, { "epoch": 0.7978068561130078, "grad_norm": 0.03557247505749754, "learning_rate": 0.000751648066451769, "loss": 0.608, "step": 15715 }, { "epoch": 0.7980606922110394, "grad_norm": 0.027894981110671378, "learning_rate": 0.0007514566180702609, "loss": 0.5726, "step": 15720 }, { "epoch": 0.7983145283090708, "grad_norm": 0.03575658449386519, "learning_rate": 0.0007512651203290723, "loss": 0.616, "step": 15725 }, { "epoch": 0.7985683644071023, "grad_norm": 0.031795758337900826, "learning_rate": 0.000751073573265794, "loss": 0.5772, "step": 15730 }, { "epoch": 0.7988222005051339, "grad_norm": 0.03329640143386616, "learning_rate": 0.0007508819769180252, "loss": 0.5722, "step": 15735 }, { "epoch": 0.7990760366031653, "grad_norm": 0.028561079497327932, "learning_rate": 0.0007506903313233755, "loss": 0.5843, "step": 15740 }, { "epoch": 0.7993298727011968, "grad_norm": 0.03499905510867631, "learning_rate": 0.0007504986365194639, "loss": 0.5441, "step": 15745 }, { "epoch": 0.7995837087992284, "grad_norm": 0.02584998094099768, "learning_rate": 0.0007503068925439194, "loss": 0.5588, "step": 15750 }, { "epoch": 0.7998375448972599, "grad_norm": 0.02484740511899366, "learning_rate": 0.00075011509943438, "loss": 0.5439, "step": 15755 }, { "epoch": 0.8000913809952913, "grad_norm": 0.05001793472695475, "learning_rate": 0.0007499232572284938, "loss": 0.5912, "step": 15760 }, { "epoch": 0.8003452170933228, "grad_norm": 0.02822375304428738, "learning_rate": 0.0007497313659639188, "loss": 0.5858, "step": 15765 }, { "epoch": 0.8005990531913544, "grad_norm": 0.025776424090071148, "learning_rate": 0.0007495394256783219, "loss": 0.5725, "step": 15770 }, { "epoch": 0.8008528892893858, "grad_norm": 0.03319633421805169, "learning_rate": 0.0007493474364093803, "loss": 0.5897, "step": 15775 }, { "epoch": 0.8011067253874173, "grad_norm": 0.04258494801205897, "learning_rate": 0.0007491553981947804, "loss": 0.5431, "step": 15780 }, { "epoch": 0.8013605614854489, "grad_norm": 0.028145407612735995, "learning_rate": 0.0007489633110722183, "loss": 0.5549, "step": 15785 }, { "epoch": 0.8016143975834803, "grad_norm": 0.026623484939927906, "learning_rate": 0.0007487711750793998, "loss": 0.595, "step": 15790 }, { "epoch": 0.8018682336815118, "grad_norm": 0.022934168464128556, "learning_rate": 0.0007485789902540403, "loss": 0.5527, "step": 15795 }, { "epoch": 0.8021220697795434, "grad_norm": 0.025252927285032714, "learning_rate": 0.0007483867566338647, "loss": 0.5876, "step": 15800 }, { "epoch": 0.8023759058775749, "grad_norm": 0.028849244701322482, "learning_rate": 0.0007481944742566076, "loss": 0.5715, "step": 15805 }, { "epoch": 0.8026297419756063, "grad_norm": 0.026220259487405768, "learning_rate": 0.0007480021431600128, "loss": 0.6162, "step": 15810 }, { "epoch": 0.8028835780736379, "grad_norm": 0.021106176901375888, "learning_rate": 0.000747809763381834, "loss": 0.5713, "step": 15815 }, { "epoch": 0.8031374141716694, "grad_norm": 0.02762824758071424, "learning_rate": 0.0007476173349598345, "loss": 0.5849, "step": 15820 }, { "epoch": 0.8033912502697008, "grad_norm": 0.05745838787319821, "learning_rate": 0.000747424857931787, "loss": 0.5462, "step": 15825 }, { "epoch": 0.8036450863677324, "grad_norm": 0.040680994894634726, "learning_rate": 0.0007472323323354739, "loss": 0.5626, "step": 15830 }, { "epoch": 0.8038989224657639, "grad_norm": 0.029285643222152852, "learning_rate": 0.0007470397582086869, "loss": 0.598, "step": 15835 }, { "epoch": 0.8041527585637953, "grad_norm": 0.03200481493108892, "learning_rate": 0.0007468471355892275, "loss": 0.5663, "step": 15840 }, { "epoch": 0.8044065946618268, "grad_norm": 0.02381141335581086, "learning_rate": 0.0007466544645149061, "loss": 0.5944, "step": 15845 }, { "epoch": 0.8046604307598584, "grad_norm": 0.02766719856403892, "learning_rate": 0.0007464617450235434, "loss": 0.5815, "step": 15850 }, { "epoch": 0.8049142668578899, "grad_norm": 0.04488529656718956, "learning_rate": 0.0007462689771529695, "loss": 0.5526, "step": 15855 }, { "epoch": 0.8051681029559213, "grad_norm": 0.034731993566618206, "learning_rate": 0.0007460761609410233, "loss": 0.6007, "step": 15860 }, { "epoch": 0.8054219390539529, "grad_norm": 0.025791063644137125, "learning_rate": 0.000745883296425554, "loss": 0.5695, "step": 15865 }, { "epoch": 0.8056757751519844, "grad_norm": 0.028080587090770482, "learning_rate": 0.00074569038364442, "loss": 0.556, "step": 15870 }, { "epoch": 0.8059296112500158, "grad_norm": 0.035411194412573295, "learning_rate": 0.0007454974226354887, "loss": 0.5774, "step": 15875 }, { "epoch": 0.8061834473480474, "grad_norm": 0.03763595723144201, "learning_rate": 0.0007453044134366377, "loss": 0.5604, "step": 15880 }, { "epoch": 0.8064372834460789, "grad_norm": 0.026026178191929684, "learning_rate": 0.0007451113560857537, "loss": 0.5668, "step": 15885 }, { "epoch": 0.8066911195441103, "grad_norm": 0.023998437380902855, "learning_rate": 0.0007449182506207328, "loss": 0.5542, "step": 15890 }, { "epoch": 0.8069449556421419, "grad_norm": 0.02465158291279762, "learning_rate": 0.0007447250970794807, "loss": 0.585, "step": 15895 }, { "epoch": 0.8071987917401734, "grad_norm": 0.03811256192460854, "learning_rate": 0.0007445318954999126, "loss": 0.5816, "step": 15900 }, { "epoch": 0.8074526278382048, "grad_norm": 0.03160589407825792, "learning_rate": 0.0007443386459199528, "loss": 0.577, "step": 15905 }, { "epoch": 0.8077064639362364, "grad_norm": 0.028906762540522022, "learning_rate": 0.0007441453483775354, "loss": 0.6078, "step": 15910 }, { "epoch": 0.8079603000342679, "grad_norm": 0.036054008668968705, "learning_rate": 0.0007439520029106035, "loss": 0.5942, "step": 15915 }, { "epoch": 0.8082141361322994, "grad_norm": 0.026999292449829813, "learning_rate": 0.0007437586095571102, "loss": 0.5836, "step": 15920 }, { "epoch": 0.8084679722303308, "grad_norm": 0.027971628557511584, "learning_rate": 0.0007435651683550173, "loss": 0.5629, "step": 15925 }, { "epoch": 0.8087218083283624, "grad_norm": 0.036526997758639046, "learning_rate": 0.0007433716793422967, "loss": 0.5892, "step": 15930 }, { "epoch": 0.8089756444263939, "grad_norm": 0.03567724658137178, "learning_rate": 0.0007431781425569289, "loss": 0.5557, "step": 15935 }, { "epoch": 0.8092294805244253, "grad_norm": 0.024802333956108272, "learning_rate": 0.0007429845580369046, "loss": 0.5618, "step": 15940 }, { "epoch": 0.8094833166224569, "grad_norm": 0.022538774195908166, "learning_rate": 0.0007427909258202232, "loss": 0.583, "step": 15945 }, { "epoch": 0.8097371527204884, "grad_norm": 0.025056735200451364, "learning_rate": 0.0007425972459448941, "loss": 0.5647, "step": 15950 }, { "epoch": 0.8099909888185198, "grad_norm": 0.03851297901292128, "learning_rate": 0.0007424035184489352, "loss": 0.5914, "step": 15955 }, { "epoch": 0.8102448249165514, "grad_norm": 0.04099783815757058, "learning_rate": 0.0007422097433703748, "loss": 0.5802, "step": 15960 }, { "epoch": 0.8104986610145829, "grad_norm": 0.03222527568690099, "learning_rate": 0.0007420159207472494, "loss": 0.5839, "step": 15965 }, { "epoch": 0.8107524971126144, "grad_norm": 0.024865699732427023, "learning_rate": 0.0007418220506176058, "loss": 0.5913, "step": 15970 }, { "epoch": 0.8110063332106459, "grad_norm": 0.035177466344471775, "learning_rate": 0.0007416281330194996, "loss": 0.5812, "step": 15975 }, { "epoch": 0.8112601693086774, "grad_norm": 0.024089520192005706, "learning_rate": 0.0007414341679909958, "loss": 0.609, "step": 15980 }, { "epoch": 0.8115140054067089, "grad_norm": 0.023966198926588644, "learning_rate": 0.0007412401555701689, "loss": 0.5816, "step": 15985 }, { "epoch": 0.8117678415047404, "grad_norm": 0.09247687498068556, "learning_rate": 0.0007410460957951026, "loss": 0.5439, "step": 15990 }, { "epoch": 0.8120216776027719, "grad_norm": 0.026101986363411846, "learning_rate": 0.0007408519887038898, "loss": 0.5656, "step": 15995 }, { "epoch": 0.8122755137008034, "grad_norm": 0.024600475072470263, "learning_rate": 0.0007406578343346327, "loss": 0.5966, "step": 16000 }, { "epoch": 0.8125293497988348, "grad_norm": 0.02581395178412151, "learning_rate": 0.0007404636327254428, "loss": 0.5841, "step": 16005 }, { "epoch": 0.8127831858968664, "grad_norm": 0.06338067688891946, "learning_rate": 0.000740269383914441, "loss": 0.5751, "step": 16010 }, { "epoch": 0.8130370219948979, "grad_norm": 0.02366983052130053, "learning_rate": 0.0007400750879397576, "loss": 0.5536, "step": 16015 }, { "epoch": 0.8132908580929294, "grad_norm": 0.0363662136727588, "learning_rate": 0.0007398807448395314, "loss": 0.5709, "step": 16020 }, { "epoch": 0.8135446941909609, "grad_norm": 0.04335279844371627, "learning_rate": 0.0007396863546519113, "loss": 0.5775, "step": 16025 }, { "epoch": 0.8137985302889924, "grad_norm": 0.028807809521302862, "learning_rate": 0.0007394919174150552, "loss": 0.587, "step": 16030 }, { "epoch": 0.8140523663870239, "grad_norm": 0.03090810771638599, "learning_rate": 0.0007392974331671301, "loss": 0.5813, "step": 16035 }, { "epoch": 0.8143062024850554, "grad_norm": 0.03332179615924521, "learning_rate": 0.0007391029019463121, "loss": 0.5748, "step": 16040 }, { "epoch": 0.8145600385830869, "grad_norm": 0.02260131038125526, "learning_rate": 0.0007389083237907869, "loss": 0.5357, "step": 16045 }, { "epoch": 0.8148138746811184, "grad_norm": 0.03144023339848837, "learning_rate": 0.0007387136987387493, "loss": 0.5479, "step": 16050 }, { "epoch": 0.8150677107791499, "grad_norm": 0.04852227270793531, "learning_rate": 0.0007385190268284028, "loss": 0.562, "step": 16055 }, { "epoch": 0.8153215468771814, "grad_norm": 0.0238226778020187, "learning_rate": 0.000738324308097961, "loss": 0.5748, "step": 16060 }, { "epoch": 0.8155753829752129, "grad_norm": 0.024025342394617455, "learning_rate": 0.0007381295425856461, "loss": 0.5779, "step": 16065 }, { "epoch": 0.8158292190732445, "grad_norm": 0.025164599314322966, "learning_rate": 0.0007379347303296895, "loss": 0.5699, "step": 16070 }, { "epoch": 0.8160830551712759, "grad_norm": 0.024454600763767285, "learning_rate": 0.0007377398713683319, "loss": 0.5647, "step": 16075 }, { "epoch": 0.8163368912693074, "grad_norm": 0.0465643154498082, "learning_rate": 0.0007375449657398232, "loss": 0.6121, "step": 16080 }, { "epoch": 0.816590727367339, "grad_norm": 0.05708836406856427, "learning_rate": 0.0007373500134824224, "loss": 0.552, "step": 16085 }, { "epoch": 0.8168445634653704, "grad_norm": 0.03307721737401848, "learning_rate": 0.0007371550146343976, "loss": 0.5815, "step": 16090 }, { "epoch": 0.8170983995634019, "grad_norm": 0.022676746585353742, "learning_rate": 0.0007369599692340261, "loss": 0.5196, "step": 16095 }, { "epoch": 0.8173522356614334, "grad_norm": 0.022835698320630423, "learning_rate": 0.0007367648773195942, "loss": 0.5688, "step": 16100 }, { "epoch": 0.8176060717594649, "grad_norm": 0.024353558374283157, "learning_rate": 0.000736569738929398, "loss": 0.5533, "step": 16105 }, { "epoch": 0.8178599078574964, "grad_norm": 0.035753016297181556, "learning_rate": 0.0007363745541017415, "loss": 0.5759, "step": 16110 }, { "epoch": 0.8181137439555279, "grad_norm": 0.04602579365083806, "learning_rate": 0.0007361793228749387, "loss": 0.6057, "step": 16115 }, { "epoch": 0.8183675800535594, "grad_norm": 0.02654832147555816, "learning_rate": 0.0007359840452873129, "loss": 0.5732, "step": 16120 }, { "epoch": 0.8186214161515909, "grad_norm": 0.029689830503051053, "learning_rate": 0.0007357887213771958, "loss": 0.5735, "step": 16125 }, { "epoch": 0.8188752522496224, "grad_norm": 0.02922774898175317, "learning_rate": 0.0007355933511829286, "loss": 0.5886, "step": 16130 }, { "epoch": 0.819129088347654, "grad_norm": 0.02346950210966065, "learning_rate": 0.0007353979347428614, "loss": 0.5436, "step": 16135 }, { "epoch": 0.8193829244456854, "grad_norm": 0.4292564062685949, "learning_rate": 0.0007352024720953536, "loss": 0.5475, "step": 16140 }, { "epoch": 0.8196367605437169, "grad_norm": 0.054772914446169205, "learning_rate": 0.0007350069632787734, "loss": 0.5315, "step": 16145 }, { "epoch": 0.8198905966417485, "grad_norm": 0.05912804272652365, "learning_rate": 0.0007348114083314984, "loss": 0.5795, "step": 16150 }, { "epoch": 0.8201444327397799, "grad_norm": 0.030284659576007726, "learning_rate": 0.0007346158072919149, "loss": 0.5843, "step": 16155 }, { "epoch": 0.8203982688378114, "grad_norm": 0.023151058600271775, "learning_rate": 0.0007344201601984185, "loss": 0.5864, "step": 16160 }, { "epoch": 0.820652104935843, "grad_norm": 0.03376567799773459, "learning_rate": 0.0007342244670894136, "loss": 0.5571, "step": 16165 }, { "epoch": 0.8209059410338744, "grad_norm": 0.03959840237334942, "learning_rate": 0.000734028728003314, "loss": 0.5696, "step": 16170 }, { "epoch": 0.8211597771319059, "grad_norm": 0.02935275476865624, "learning_rate": 0.000733832942978542, "loss": 0.5647, "step": 16175 }, { "epoch": 0.8214136132299374, "grad_norm": 0.03823963239401788, "learning_rate": 0.0007336371120535295, "loss": 0.5742, "step": 16180 }, { "epoch": 0.821667449327969, "grad_norm": 0.024101714729295634, "learning_rate": 0.0007334412352667173, "loss": 0.6284, "step": 16185 }, { "epoch": 0.8219212854260004, "grad_norm": 2.693520215640975, "learning_rate": 0.0007332453126565545, "loss": 0.6209, "step": 16190 }, { "epoch": 0.8221751215240319, "grad_norm": 0.03279854521465908, "learning_rate": 0.0007330493442615, "loss": 0.5537, "step": 16195 }, { "epoch": 0.8224289576220635, "grad_norm": 0.044838327703993694, "learning_rate": 0.0007328533301200216, "loss": 0.557, "step": 16200 }, { "epoch": 0.8226827937200949, "grad_norm": 0.02638836436847605, "learning_rate": 0.0007326572702705958, "loss": 0.5701, "step": 16205 }, { "epoch": 0.8229366298181264, "grad_norm": 0.04922931478557384, "learning_rate": 0.0007324611647517078, "loss": 0.56, "step": 16210 }, { "epoch": 0.823190465916158, "grad_norm": 0.03891712292801701, "learning_rate": 0.0007322650136018527, "loss": 0.5869, "step": 16215 }, { "epoch": 0.8234443020141894, "grad_norm": 0.04138342114685851, "learning_rate": 0.0007320688168595338, "loss": 0.6059, "step": 16220 }, { "epoch": 0.8236981381122209, "grad_norm": 0.02918011994976219, "learning_rate": 0.0007318725745632632, "loss": 0.5551, "step": 16225 }, { "epoch": 0.8239519742102525, "grad_norm": 0.0419742382923944, "learning_rate": 0.0007316762867515627, "loss": 0.5673, "step": 16230 }, { "epoch": 0.824205810308284, "grad_norm": 0.030211128758018017, "learning_rate": 0.0007314799534629625, "loss": 0.6303, "step": 16235 }, { "epoch": 0.8244596464063154, "grad_norm": 0.02340354696643915, "learning_rate": 0.0007312835747360018, "loss": 0.5603, "step": 16240 }, { "epoch": 0.824713482504347, "grad_norm": 0.042394083569737895, "learning_rate": 0.0007310871506092287, "loss": 0.5427, "step": 16245 }, { "epoch": 0.8249673186023785, "grad_norm": 0.04912858556363834, "learning_rate": 0.0007308906811212004, "loss": 0.5917, "step": 16250 }, { "epoch": 0.8252211547004099, "grad_norm": 0.03971810340456524, "learning_rate": 0.000730694166310483, "loss": 0.562, "step": 16255 }, { "epoch": 0.8254749907984414, "grad_norm": 0.02785763337910464, "learning_rate": 0.0007304976062156512, "loss": 0.5795, "step": 16260 }, { "epoch": 0.825728826896473, "grad_norm": 0.033158827041856395, "learning_rate": 0.0007303010008752886, "loss": 0.5688, "step": 16265 }, { "epoch": 0.8259826629945044, "grad_norm": 0.030706479795822323, "learning_rate": 0.0007301043503279881, "loss": 0.5976, "step": 16270 }, { "epoch": 0.8262364990925359, "grad_norm": 0.030873986022704285, "learning_rate": 0.0007299076546123512, "loss": 0.5715, "step": 16275 }, { "epoch": 0.8264903351905675, "grad_norm": 0.0474915666866346, "learning_rate": 0.0007297109137669882, "loss": 0.563, "step": 16280 }, { "epoch": 0.826744171288599, "grad_norm": 0.03893683462709932, "learning_rate": 0.0007295141278305185, "loss": 0.5592, "step": 16285 }, { "epoch": 0.8269980073866304, "grad_norm": 0.03111456293981649, "learning_rate": 0.0007293172968415701, "loss": 0.5792, "step": 16290 }, { "epoch": 0.827251843484662, "grad_norm": 0.024853102754921644, "learning_rate": 0.0007291204208387798, "loss": 0.58, "step": 16295 }, { "epoch": 0.8275056795826935, "grad_norm": 0.0251707952164745, "learning_rate": 0.0007289234998607935, "loss": 0.5397, "step": 16300 }, { "epoch": 0.8277595156807249, "grad_norm": 0.03637341065833897, "learning_rate": 0.000728726533946266, "loss": 0.5889, "step": 16305 }, { "epoch": 0.8280133517787565, "grad_norm": 0.025249035570284573, "learning_rate": 0.0007285295231338605, "loss": 0.5608, "step": 16310 }, { "epoch": 0.828267187876788, "grad_norm": 0.03480589568385215, "learning_rate": 0.0007283324674622491, "loss": 0.5958, "step": 16315 }, { "epoch": 0.8285210239748194, "grad_norm": 0.06002094158028185, "learning_rate": 0.0007281353669701131, "loss": 0.5949, "step": 16320 }, { "epoch": 0.828774860072851, "grad_norm": 0.02645055337203729, "learning_rate": 0.0007279382216961426, "loss": 0.5696, "step": 16325 }, { "epoch": 0.8290286961708825, "grad_norm": 0.026182888420755522, "learning_rate": 0.0007277410316790355, "loss": 0.5815, "step": 16330 }, { "epoch": 0.8292825322689139, "grad_norm": 0.026085049179655987, "learning_rate": 0.0007275437969574999, "loss": 0.5716, "step": 16335 }, { "epoch": 0.8295363683669454, "grad_norm": 0.03239294409527313, "learning_rate": 0.0007273465175702515, "loss": 0.5524, "step": 16340 }, { "epoch": 0.829790204464977, "grad_norm": 0.027393441053260734, "learning_rate": 0.0007271491935560155, "loss": 0.6076, "step": 16345 }, { "epoch": 0.8300440405630085, "grad_norm": 0.029966666901580178, "learning_rate": 0.0007269518249535256, "loss": 0.5385, "step": 16350 }, { "epoch": 0.8302978766610399, "grad_norm": 0.02610780272688068, "learning_rate": 0.0007267544118015243, "loss": 0.5672, "step": 16355 }, { "epoch": 0.8305517127590715, "grad_norm": 0.03032975964214376, "learning_rate": 0.0007265569541387628, "loss": 0.5809, "step": 16360 }, { "epoch": 0.830805548857103, "grad_norm": 0.025317306734505777, "learning_rate": 0.0007263594520040011, "loss": 0.5645, "step": 16365 }, { "epoch": 0.8310593849551344, "grad_norm": 0.02593763038542392, "learning_rate": 0.0007261619054360078, "loss": 0.5595, "step": 16370 }, { "epoch": 0.831313221053166, "grad_norm": 0.0439291131573673, "learning_rate": 0.0007259643144735603, "loss": 0.568, "step": 16375 }, { "epoch": 0.8315670571511975, "grad_norm": 0.026930260112695524, "learning_rate": 0.0007257666791554447, "loss": 0.5841, "step": 16380 }, { "epoch": 0.8318208932492289, "grad_norm": 0.031340436270697436, "learning_rate": 0.0007255689995204559, "loss": 0.5776, "step": 16385 }, { "epoch": 0.8320747293472605, "grad_norm": 0.035038658960907056, "learning_rate": 0.0007253712756073973, "loss": 0.5945, "step": 16390 }, { "epoch": 0.832328565445292, "grad_norm": 0.02459703987491012, "learning_rate": 0.0007251735074550815, "loss": 0.5377, "step": 16395 }, { "epoch": 0.8325824015433235, "grad_norm": 0.04858837100594378, "learning_rate": 0.000724975695102329, "loss": 0.5757, "step": 16400 }, { "epoch": 0.832836237641355, "grad_norm": 0.025374293791486993, "learning_rate": 0.0007247778385879695, "loss": 0.5681, "step": 16405 }, { "epoch": 0.8330900737393865, "grad_norm": 0.024727570721873534, "learning_rate": 0.0007245799379508412, "loss": 0.5556, "step": 16410 }, { "epoch": 0.833343909837418, "grad_norm": 0.04461548506528957, "learning_rate": 0.000724381993229791, "loss": 0.5403, "step": 16415 }, { "epoch": 0.8335977459354494, "grad_norm": 0.02730610710183291, "learning_rate": 0.0007241840044636747, "loss": 0.5994, "step": 16420 }, { "epoch": 0.833851582033481, "grad_norm": 0.0335198165281854, "learning_rate": 0.0007239859716913562, "loss": 0.5605, "step": 16425 }, { "epoch": 0.8341054181315125, "grad_norm": 0.04217088658047506, "learning_rate": 0.0007237878949517085, "loss": 0.5743, "step": 16430 }, { "epoch": 0.8343592542295439, "grad_norm": 0.024585253772533063, "learning_rate": 0.0007235897742836131, "loss": 0.5929, "step": 16435 }, { "epoch": 0.8346130903275755, "grad_norm": 0.025207671295824716, "learning_rate": 0.00072339160972596, "loss": 0.5635, "step": 16440 }, { "epoch": 0.834866926425607, "grad_norm": 0.027558993795189373, "learning_rate": 0.000723193401317648, "loss": 0.592, "step": 16445 }, { "epoch": 0.8351207625236385, "grad_norm": 0.036827202153997284, "learning_rate": 0.0007229951490975844, "loss": 0.5541, "step": 16450 }, { "epoch": 0.83537459862167, "grad_norm": 0.026861426162605315, "learning_rate": 0.000722796853104685, "loss": 0.552, "step": 16455 }, { "epoch": 0.8356284347197015, "grad_norm": 0.04528560408360573, "learning_rate": 0.0007225985133778745, "loss": 0.5598, "step": 16460 }, { "epoch": 0.835882270817733, "grad_norm": 0.026142557018130994, "learning_rate": 0.0007224001299560859, "loss": 0.5774, "step": 16465 }, { "epoch": 0.8361361069157645, "grad_norm": 0.03854871080553361, "learning_rate": 0.000722201702878261, "loss": 0.5299, "step": 16470 }, { "epoch": 0.836389943013796, "grad_norm": 0.03451324677710476, "learning_rate": 0.0007220032321833498, "loss": 0.5753, "step": 16475 }, { "epoch": 0.8366437791118275, "grad_norm": 0.026185635664542194, "learning_rate": 0.0007218047179103112, "loss": 0.5643, "step": 16480 }, { "epoch": 0.836897615209859, "grad_norm": 0.026415165159016925, "learning_rate": 0.0007216061600981128, "loss": 0.5673, "step": 16485 }, { "epoch": 0.8371514513078905, "grad_norm": 0.022148025561463187, "learning_rate": 0.0007214075587857302, "loss": 0.5244, "step": 16490 }, { "epoch": 0.837405287405922, "grad_norm": 0.0809295820317378, "learning_rate": 0.0007212089140121481, "loss": 0.5359, "step": 16495 }, { "epoch": 0.8376591235039536, "grad_norm": 0.03396241665558352, "learning_rate": 0.0007210102258163592, "loss": 0.5489, "step": 16500 }, { "epoch": 0.837912959601985, "grad_norm": 0.04736874657246381, "learning_rate": 0.0007208114942373651, "loss": 0.5593, "step": 16505 }, { "epoch": 0.8381667957000165, "grad_norm": 0.02415642764938736, "learning_rate": 0.0007206127193141761, "loss": 0.5368, "step": 16510 }, { "epoch": 0.838420631798048, "grad_norm": 0.025190772006756453, "learning_rate": 0.0007204139010858103, "loss": 0.5872, "step": 16515 }, { "epoch": 0.8386744678960795, "grad_norm": 0.023720884374368048, "learning_rate": 0.0007202150395912949, "loss": 0.5686, "step": 16520 }, { "epoch": 0.838928303994111, "grad_norm": 0.02328821938311436, "learning_rate": 0.0007200161348696655, "loss": 0.5578, "step": 16525 }, { "epoch": 0.8391821400921425, "grad_norm": 0.05346546124165315, "learning_rate": 0.0007198171869599662, "loss": 0.5899, "step": 16530 }, { "epoch": 0.839435976190174, "grad_norm": 0.04804743967944873, "learning_rate": 0.0007196181959012491, "loss": 0.5919, "step": 16535 }, { "epoch": 0.8396898122882055, "grad_norm": 0.07197115729997203, "learning_rate": 0.0007194191617325755, "loss": 0.5914, "step": 16540 }, { "epoch": 0.839943648386237, "grad_norm": 0.09143766754452975, "learning_rate": 0.0007192200844930147, "loss": 0.5068, "step": 16545 }, { "epoch": 0.8401974844842685, "grad_norm": 0.03931986444195336, "learning_rate": 0.0007190209642216445, "loss": 0.5951, "step": 16550 }, { "epoch": 0.8404513205823, "grad_norm": 0.05372740389831407, "learning_rate": 0.0007188218009575514, "loss": 0.5746, "step": 16555 }, { "epoch": 0.8407051566803315, "grad_norm": 0.03388013411522178, "learning_rate": 0.0007186225947398298, "loss": 0.5847, "step": 16560 }, { "epoch": 0.8409589927783631, "grad_norm": 0.2323506802544606, "learning_rate": 0.0007184233456075833, "loss": 1.0376, "step": 16565 }, { "epoch": 0.8412128288763945, "grad_norm": 0.1835106341020443, "learning_rate": 0.0007182240535999232, "loss": 0.5778, "step": 16570 }, { "epoch": 0.841466664974426, "grad_norm": 0.11156210838067974, "learning_rate": 0.0007180247187559697, "loss": 0.5769, "step": 16575 }, { "epoch": 0.8417205010724575, "grad_norm": 0.07718512327967042, "learning_rate": 0.0007178253411148513, "loss": 0.6366, "step": 16580 }, { "epoch": 0.841974337170489, "grad_norm": 0.043680931999653776, "learning_rate": 0.0007176259207157048, "loss": 0.5654, "step": 16585 }, { "epoch": 0.8422281732685205, "grad_norm": 0.05565560445937635, "learning_rate": 0.0007174264575976752, "loss": 0.5576, "step": 16590 }, { "epoch": 0.842482009366552, "grad_norm": 0.04784086578137076, "learning_rate": 0.0007172269517999163, "loss": 0.5756, "step": 16595 }, { "epoch": 0.8427358454645835, "grad_norm": 0.029385540515945264, "learning_rate": 0.00071702740336159, "loss": 0.5723, "step": 16600 }, { "epoch": 0.842989681562615, "grad_norm": 0.028863407001418123, "learning_rate": 0.0007168278123218667, "loss": 0.5663, "step": 16605 }, { "epoch": 0.8432435176606465, "grad_norm": 0.036227912872782514, "learning_rate": 0.0007166281787199253, "loss": 0.5634, "step": 16610 }, { "epoch": 0.8434973537586781, "grad_norm": 0.05143219348236945, "learning_rate": 0.0007164285025949528, "loss": 0.6063, "step": 16615 }, { "epoch": 0.8437511898567095, "grad_norm": 0.028270375163286395, "learning_rate": 0.0007162287839861445, "loss": 0.5714, "step": 16620 }, { "epoch": 0.844005025954741, "grad_norm": 0.045237364055904485, "learning_rate": 0.0007160290229327042, "loss": 0.5614, "step": 16625 }, { "epoch": 0.8442588620527726, "grad_norm": 0.04742378757483653, "learning_rate": 0.000715829219473844, "loss": 0.5856, "step": 16630 }, { "epoch": 0.844512698150804, "grad_norm": 0.028756290264305383, "learning_rate": 0.0007156293736487844, "loss": 0.5441, "step": 16635 }, { "epoch": 0.8447665342488355, "grad_norm": 0.03771401096316666, "learning_rate": 0.0007154294854967541, "loss": 0.5825, "step": 16640 }, { "epoch": 0.8450203703468671, "grad_norm": 0.03402827219689952, "learning_rate": 0.0007152295550569902, "loss": 0.5564, "step": 16645 }, { "epoch": 0.8452742064448985, "grad_norm": 0.03963421220660269, "learning_rate": 0.0007150295823687379, "loss": 0.5801, "step": 16650 }, { "epoch": 0.84552804254293, "grad_norm": 0.03217463407046853, "learning_rate": 0.000714829567471251, "loss": 0.5673, "step": 16655 }, { "epoch": 0.8457818786409615, "grad_norm": 0.025969270367900015, "learning_rate": 0.0007146295104037914, "loss": 0.5762, "step": 16660 }, { "epoch": 0.8460357147389931, "grad_norm": 0.0372684784203102, "learning_rate": 0.0007144294112056292, "loss": 0.5858, "step": 16665 }, { "epoch": 0.8462895508370245, "grad_norm": 0.03206359149474644, "learning_rate": 0.000714229269916043, "loss": 0.5754, "step": 16670 }, { "epoch": 0.846543386935056, "grad_norm": 0.08771530469398538, "learning_rate": 0.0007140290865743194, "loss": 0.5861, "step": 16675 }, { "epoch": 0.8467972230330876, "grad_norm": 0.04534708545965994, "learning_rate": 0.0007138288612197534, "loss": 0.5762, "step": 16680 }, { "epoch": 0.847051059131119, "grad_norm": 0.03943245508252062, "learning_rate": 0.0007136285938916484, "loss": 0.5512, "step": 16685 }, { "epoch": 0.8473048952291505, "grad_norm": 0.05354631204112542, "learning_rate": 0.0007134282846293157, "loss": 0.5659, "step": 16690 }, { "epoch": 0.8475587313271821, "grad_norm": 0.10850672830641753, "learning_rate": 0.0007132279334720751, "loss": 0.5914, "step": 16695 }, { "epoch": 0.8478125674252135, "grad_norm": 0.0344090734999104, "learning_rate": 0.0007130275404592547, "loss": 0.583, "step": 16700 }, { "epoch": 0.848066403523245, "grad_norm": 0.026564143833072755, "learning_rate": 0.0007128271056301902, "loss": 0.5518, "step": 16705 }, { "epoch": 0.8483202396212766, "grad_norm": 0.02413909355518316, "learning_rate": 0.0007126266290242264, "loss": 0.5541, "step": 16710 }, { "epoch": 0.8485740757193081, "grad_norm": 0.02253771359833544, "learning_rate": 0.0007124261106807158, "loss": 0.5439, "step": 16715 }, { "epoch": 0.8488279118173395, "grad_norm": 0.025639805853241732, "learning_rate": 0.0007122255506390188, "loss": 0.5553, "step": 16720 }, { "epoch": 0.849081747915371, "grad_norm": 0.025441727802141368, "learning_rate": 0.0007120249489385048, "loss": 0.5888, "step": 16725 }, { "epoch": 0.8493355840134026, "grad_norm": 0.20833156756734772, "learning_rate": 0.0007118243056185505, "loss": 0.584, "step": 16730 }, { "epoch": 0.849589420111434, "grad_norm": 0.02582708018087541, "learning_rate": 0.0007116236207185414, "loss": 0.5758, "step": 16735 }, { "epoch": 0.8498432562094655, "grad_norm": 0.02244099996933645, "learning_rate": 0.0007114228942778711, "loss": 0.5464, "step": 16740 }, { "epoch": 0.8500970923074971, "grad_norm": 0.030980292003979164, "learning_rate": 0.0007112221263359408, "loss": 0.5484, "step": 16745 }, { "epoch": 0.8503509284055285, "grad_norm": 0.03890776257527718, "learning_rate": 0.0007110213169321606, "loss": 0.6233, "step": 16750 }, { "epoch": 0.85060476450356, "grad_norm": 0.04528676589062287, "learning_rate": 0.0007108204661059482, "loss": 0.5863, "step": 16755 }, { "epoch": 0.8508586006015916, "grad_norm": 0.028278146245680834, "learning_rate": 0.0007106195738967296, "loss": 0.5689, "step": 16760 }, { "epoch": 0.851112436699623, "grad_norm": 0.02656249132824659, "learning_rate": 0.0007104186403439391, "loss": 0.5627, "step": 16765 }, { "epoch": 0.8513662727976545, "grad_norm": 0.032513287659724305, "learning_rate": 0.0007102176654870189, "loss": 0.5654, "step": 16770 }, { "epoch": 0.8516201088956861, "grad_norm": 0.02776149485485987, "learning_rate": 0.0007100166493654192, "loss": 0.5786, "step": 16775 }, { "epoch": 0.8518739449937176, "grad_norm": 0.031105339929159002, "learning_rate": 0.0007098155920185987, "loss": 0.562, "step": 16780 }, { "epoch": 0.852127781091749, "grad_norm": 0.02446590396405293, "learning_rate": 0.0007096144934860237, "loss": 0.5624, "step": 16785 }, { "epoch": 0.8523816171897806, "grad_norm": 0.03981011051439899, "learning_rate": 0.0007094133538071691, "loss": 0.539, "step": 16790 }, { "epoch": 0.8526354532878121, "grad_norm": 0.033938471991999015, "learning_rate": 0.0007092121730215174, "loss": 0.6041, "step": 16795 }, { "epoch": 0.8528892893858435, "grad_norm": 0.028696702111982848, "learning_rate": 0.0007090109511685595, "loss": 0.5812, "step": 16800 }, { "epoch": 0.853143125483875, "grad_norm": 0.031170929377724053, "learning_rate": 0.0007088096882877942, "loss": 0.6141, "step": 16805 }, { "epoch": 0.8533969615819066, "grad_norm": 0.0229475127058901, "learning_rate": 0.0007086083844187284, "loss": 0.5381, "step": 16810 }, { "epoch": 0.853650797679938, "grad_norm": 0.04104758489251979, "learning_rate": 0.0007084070396008771, "loss": 0.5961, "step": 16815 }, { "epoch": 0.8539046337779695, "grad_norm": 0.02451932040436503, "learning_rate": 0.0007082056538737633, "loss": 0.5366, "step": 16820 }, { "epoch": 0.8541584698760011, "grad_norm": 0.04468035129714473, "learning_rate": 0.0007080042272769179, "loss": 0.5693, "step": 16825 }, { "epoch": 0.8544123059740326, "grad_norm": 0.025369526471293296, "learning_rate": 0.0007078027598498801, "loss": 0.5659, "step": 16830 }, { "epoch": 0.854666142072064, "grad_norm": 0.023020797662670914, "learning_rate": 0.0007076012516321968, "loss": 0.5979, "step": 16835 }, { "epoch": 0.8549199781700956, "grad_norm": 0.02317984351608055, "learning_rate": 0.0007073997026634229, "loss": 0.5815, "step": 16840 }, { "epoch": 0.8551738142681271, "grad_norm": 0.024195045783076848, "learning_rate": 0.000707198112983122, "loss": 0.5277, "step": 16845 }, { "epoch": 0.8554276503661585, "grad_norm": 0.03085842460268176, "learning_rate": 0.0007069964826308646, "loss": 0.5641, "step": 16850 }, { "epoch": 0.8556814864641901, "grad_norm": 0.023758504611377088, "learning_rate": 0.00070679481164623, "loss": 0.5577, "step": 16855 }, { "epoch": 0.8559353225622216, "grad_norm": 0.03533884780505401, "learning_rate": 0.0007065931000688053, "loss": 0.5214, "step": 16860 }, { "epoch": 0.856189158660253, "grad_norm": 0.028013725394752647, "learning_rate": 0.0007063913479381851, "loss": 0.5775, "step": 16865 }, { "epoch": 0.8564429947582846, "grad_norm": 0.03922126124071241, "learning_rate": 0.0007061895552939727, "loss": 0.5676, "step": 16870 }, { "epoch": 0.8566968308563161, "grad_norm": 0.02828532099990536, "learning_rate": 0.0007059877221757789, "loss": 0.5652, "step": 16875 }, { "epoch": 0.8569506669543476, "grad_norm": 0.036809316995682864, "learning_rate": 0.0007057858486232224, "loss": 0.536, "step": 16880 }, { "epoch": 0.857204503052379, "grad_norm": 0.021695340919237437, "learning_rate": 0.00070558393467593, "loss": 0.529, "step": 16885 }, { "epoch": 0.8574583391504106, "grad_norm": 0.02740340955824536, "learning_rate": 0.0007053819803735367, "loss": 0.5352, "step": 16890 }, { "epoch": 0.8577121752484421, "grad_norm": 0.03336334546505141, "learning_rate": 0.0007051799857556848, "loss": 0.5372, "step": 16895 }, { "epoch": 0.8579660113464735, "grad_norm": 0.028637615728519715, "learning_rate": 0.0007049779508620248, "loss": 0.5726, "step": 16900 }, { "epoch": 0.8582198474445051, "grad_norm": 0.02540896995489235, "learning_rate": 0.0007047758757322155, "loss": 0.5588, "step": 16905 }, { "epoch": 0.8584736835425366, "grad_norm": 0.0243774664015233, "learning_rate": 0.0007045737604059228, "loss": 0.5855, "step": 16910 }, { "epoch": 0.858727519640568, "grad_norm": 0.025712991028124108, "learning_rate": 0.0007043716049228212, "loss": 0.5725, "step": 16915 }, { "epoch": 0.8589813557385996, "grad_norm": 0.01963408655252367, "learning_rate": 0.0007041694093225929, "loss": 0.5553, "step": 16920 }, { "epoch": 0.8592351918366311, "grad_norm": 0.024595690577265465, "learning_rate": 0.0007039671736449275, "loss": 0.5464, "step": 16925 }, { "epoch": 0.8594890279346626, "grad_norm": 0.022243385284112183, "learning_rate": 0.0007037648979295232, "loss": 0.5316, "step": 16930 }, { "epoch": 0.8597428640326941, "grad_norm": 0.02440380975141963, "learning_rate": 0.0007035625822160856, "loss": 0.5465, "step": 16935 }, { "epoch": 0.8599967001307256, "grad_norm": 0.03482315185034089, "learning_rate": 0.0007033602265443284, "loss": 0.5636, "step": 16940 }, { "epoch": 0.8602505362287571, "grad_norm": 0.024590501396839586, "learning_rate": 0.0007031578309539728, "loss": 0.5877, "step": 16945 }, { "epoch": 0.8605043723267886, "grad_norm": 0.03922772861007277, "learning_rate": 0.000702955395484748, "loss": 0.5928, "step": 16950 }, { "epoch": 0.8607582084248201, "grad_norm": 0.026668312848002942, "learning_rate": 0.0007027529201763913, "loss": 0.5848, "step": 16955 }, { "epoch": 0.8610120445228516, "grad_norm": 0.022676881579317083, "learning_rate": 0.0007025504050686475, "loss": 0.5759, "step": 16960 }, { "epoch": 0.861265880620883, "grad_norm": 0.023945888246282236, "learning_rate": 0.0007023478502012694, "loss": 0.5796, "step": 16965 }, { "epoch": 0.8615197167189146, "grad_norm": 0.030829524909033925, "learning_rate": 0.0007021452556140173, "loss": 0.5673, "step": 16970 }, { "epoch": 0.8617735528169461, "grad_norm": 0.023288491755525775, "learning_rate": 0.0007019426213466597, "loss": 0.5592, "step": 16975 }, { "epoch": 0.8620273889149775, "grad_norm": 0.034313257493583556, "learning_rate": 0.0007017399474389725, "loss": 0.5415, "step": 16980 }, { "epoch": 0.8622812250130091, "grad_norm": 0.030676916955254648, "learning_rate": 0.0007015372339307398, "loss": 0.5374, "step": 16985 }, { "epoch": 0.8625350611110406, "grad_norm": 0.9270526277643246, "learning_rate": 0.000701334480861753, "loss": 0.5703, "step": 16990 }, { "epoch": 0.8627888972090721, "grad_norm": 0.03881534525111206, "learning_rate": 0.0007011316882718119, "loss": 0.5436, "step": 16995 }, { "epoch": 0.8630427333071036, "grad_norm": 0.028288958072038958, "learning_rate": 0.0007009288562007232, "loss": 0.5424, "step": 17000 }, { "epoch": 0.8632965694051351, "grad_norm": 0.03263081861380027, "learning_rate": 0.0007007259846883022, "loss": 0.5635, "step": 17005 }, { "epoch": 0.8635504055031666, "grad_norm": 0.02801944612812956, "learning_rate": 0.0007005230737743714, "loss": 0.5607, "step": 17010 }, { "epoch": 0.8638042416011981, "grad_norm": 0.0354537026800304, "learning_rate": 0.0007003201234987612, "loss": 0.5787, "step": 17015 }, { "epoch": 0.8640580776992296, "grad_norm": 0.04224026943667041, "learning_rate": 0.0007001171339013097, "loss": 0.5469, "step": 17020 }, { "epoch": 0.8643119137972611, "grad_norm": 0.027519369512133282, "learning_rate": 0.0006999141050218628, "loss": 0.5826, "step": 17025 }, { "epoch": 0.8645657498952926, "grad_norm": 0.026240030212773185, "learning_rate": 0.0006997110369002742, "loss": 0.5602, "step": 17030 }, { "epoch": 0.8648195859933241, "grad_norm": 0.02599836695677239, "learning_rate": 0.0006995079295764048, "loss": 0.5593, "step": 17035 }, { "epoch": 0.8650734220913556, "grad_norm": 0.03230402905795593, "learning_rate": 0.000699304783090124, "loss": 0.5685, "step": 17040 }, { "epoch": 0.8653272581893872, "grad_norm": 0.028811832838592445, "learning_rate": 0.0006991015974813081, "loss": 0.5859, "step": 17045 }, { "epoch": 0.8655810942874186, "grad_norm": 0.034518109548492715, "learning_rate": 0.0006988983727898414, "loss": 0.5502, "step": 17050 }, { "epoch": 0.8658349303854501, "grad_norm": 0.03883667058154074, "learning_rate": 0.0006986951090556161, "loss": 0.5473, "step": 17055 }, { "epoch": 0.8660887664834817, "grad_norm": 0.025695035028394198, "learning_rate": 0.0006984918063185319, "loss": 0.5815, "step": 17060 }, { "epoch": 0.8663426025815131, "grad_norm": 0.034846551058862335, "learning_rate": 0.0006982884646184959, "loss": 0.543, "step": 17065 }, { "epoch": 0.8665964386795446, "grad_norm": 0.03999095610243723, "learning_rate": 0.0006980850839954232, "loss": 0.5682, "step": 17070 }, { "epoch": 0.8668502747775761, "grad_norm": 0.029998948571798918, "learning_rate": 0.0006978816644892364, "loss": 0.5651, "step": 17075 }, { "epoch": 0.8671041108756076, "grad_norm": 0.03870411499824972, "learning_rate": 0.0006976782061398657, "loss": 0.5865, "step": 17080 }, { "epoch": 0.8673579469736391, "grad_norm": 0.02581985137998219, "learning_rate": 0.0006974747089872488, "loss": 0.5883, "step": 17085 }, { "epoch": 0.8676117830716706, "grad_norm": 0.03341194341814896, "learning_rate": 0.0006972711730713315, "loss": 0.573, "step": 17090 }, { "epoch": 0.8678656191697022, "grad_norm": 0.036010523707030416, "learning_rate": 0.0006970675984320667, "loss": 0.5535, "step": 17095 }, { "epoch": 0.8681194552677336, "grad_norm": 0.027649651226152285, "learning_rate": 0.000696863985109415, "loss": 0.5654, "step": 17100 }, { "epoch": 0.8683732913657651, "grad_norm": 0.03162081059227697, "learning_rate": 0.0006966603331433447, "loss": 0.6022, "step": 17105 }, { "epoch": 0.8686271274637967, "grad_norm": 0.032299427855624986, "learning_rate": 0.0006964566425738321, "loss": 0.5425, "step": 17110 }, { "epoch": 0.8688809635618281, "grad_norm": 0.021742217941501947, "learning_rate": 0.0006962529134408599, "loss": 0.5897, "step": 17115 }, { "epoch": 0.8691347996598596, "grad_norm": 0.0346527041544656, "learning_rate": 0.0006960491457844198, "loss": 0.5532, "step": 17120 }, { "epoch": 0.8693886357578912, "grad_norm": 0.022954764473995297, "learning_rate": 0.00069584533964451, "loss": 0.5665, "step": 17125 }, { "epoch": 0.8696424718559226, "grad_norm": 0.02307425717340246, "learning_rate": 0.0006956414950611366, "loss": 0.5429, "step": 17130 }, { "epoch": 0.8698963079539541, "grad_norm": 0.023508308014224755, "learning_rate": 0.0006954376120743136, "loss": 0.5923, "step": 17135 }, { "epoch": 0.8701501440519857, "grad_norm": 0.025046949091913972, "learning_rate": 0.0006952336907240616, "loss": 0.5354, "step": 17140 }, { "epoch": 0.8704039801500172, "grad_norm": 0.02807435788273573, "learning_rate": 0.00069502973105041, "loss": 0.5499, "step": 17145 }, { "epoch": 0.8706578162480486, "grad_norm": 0.023394478814900033, "learning_rate": 0.0006948257330933948, "loss": 0.5804, "step": 17150 }, { "epoch": 0.8709116523460801, "grad_norm": 0.02484051188427158, "learning_rate": 0.0006946216968930598, "loss": 0.5795, "step": 17155 }, { "epoch": 0.8711654884441117, "grad_norm": 0.02775584758767845, "learning_rate": 0.0006944176224894563, "loss": 0.5821, "step": 17160 }, { "epoch": 0.8714193245421431, "grad_norm": 0.03912060374962221, "learning_rate": 0.000694213509922643, "loss": 0.583, "step": 17165 }, { "epoch": 0.8716731606401746, "grad_norm": 0.0362056815020368, "learning_rate": 0.0006940093592326861, "loss": 0.571, "step": 17170 }, { "epoch": 0.8719269967382062, "grad_norm": 0.028946737163921484, "learning_rate": 0.0006938051704596598, "loss": 0.5959, "step": 17175 }, { "epoch": 0.8721808328362376, "grad_norm": 0.02620608291978458, "learning_rate": 0.0006936009436436448, "loss": 0.5515, "step": 17180 }, { "epoch": 0.8724346689342691, "grad_norm": 0.02801972461379925, "learning_rate": 0.0006933966788247302, "loss": 0.565, "step": 17185 }, { "epoch": 0.8726885050323007, "grad_norm": 0.042778133049176235, "learning_rate": 0.000693192376043012, "loss": 0.5829, "step": 17190 }, { "epoch": 0.8729423411303322, "grad_norm": 0.03858970271483881, "learning_rate": 0.0006929880353385938, "loss": 0.5507, "step": 17195 }, { "epoch": 0.8731961772283636, "grad_norm": 0.031470436939788314, "learning_rate": 0.0006927836567515866, "loss": 0.5978, "step": 17200 }, { "epoch": 0.8734500133263952, "grad_norm": 0.07394640988697473, "learning_rate": 0.0006925792403221091, "loss": 0.5749, "step": 17205 }, { "epoch": 0.8737038494244267, "grad_norm": 0.03603952523691002, "learning_rate": 0.0006923747860902871, "loss": 0.5866, "step": 17210 }, { "epoch": 0.8739576855224581, "grad_norm": 0.043584509249294856, "learning_rate": 0.000692170294096254, "loss": 0.5899, "step": 17215 }, { "epoch": 0.8742115216204897, "grad_norm": 0.024373073949978077, "learning_rate": 0.0006919657643801504, "loss": 0.506, "step": 17220 }, { "epoch": 0.8744653577185212, "grad_norm": 0.029580565745340112, "learning_rate": 0.0006917611969821248, "loss": 0.5343, "step": 17225 }, { "epoch": 0.8747191938165526, "grad_norm": 0.0257140032071113, "learning_rate": 0.0006915565919423324, "loss": 0.5875, "step": 17230 }, { "epoch": 0.8749730299145841, "grad_norm": 0.03390362964198948, "learning_rate": 0.0006913519493009363, "loss": 0.5813, "step": 17235 }, { "epoch": 0.8752268660126157, "grad_norm": 0.05042303052722009, "learning_rate": 0.0006911472690981069, "loss": 0.606, "step": 17240 }, { "epoch": 0.8754807021106471, "grad_norm": 0.05272668324721124, "learning_rate": 0.0006909425513740217, "loss": 0.5965, "step": 17245 }, { "epoch": 0.8757345382086786, "grad_norm": 0.0488542916140239, "learning_rate": 0.000690737796168866, "loss": 0.5871, "step": 17250 }, { "epoch": 0.8759883743067102, "grad_norm": 0.030433512897338318, "learning_rate": 0.0006905330035228321, "loss": 0.5384, "step": 17255 }, { "epoch": 0.8762422104047417, "grad_norm": 0.026478083460412936, "learning_rate": 0.0006903281734761197, "loss": 0.5365, "step": 17260 }, { "epoch": 0.8764960465027731, "grad_norm": 0.029126871537591283, "learning_rate": 0.000690123306068936, "loss": 0.5554, "step": 17265 }, { "epoch": 0.8767498826008047, "grad_norm": 0.032584410814189266, "learning_rate": 0.0006899184013414955, "loss": 0.5551, "step": 17270 }, { "epoch": 0.8770037186988362, "grad_norm": 0.03105939707552323, "learning_rate": 0.00068971345933402, "loss": 0.5244, "step": 17275 }, { "epoch": 0.8772575547968676, "grad_norm": 0.04428702286739715, "learning_rate": 0.0006895084800867386, "loss": 0.5524, "step": 17280 }, { "epoch": 0.8775113908948992, "grad_norm": 0.026501029034102612, "learning_rate": 0.0006893034636398875, "loss": 0.5697, "step": 17285 }, { "epoch": 0.8777652269929307, "grad_norm": 0.02789104304834869, "learning_rate": 0.0006890984100337105, "loss": 0.5451, "step": 17290 }, { "epoch": 0.8780190630909621, "grad_norm": 0.030381112646398638, "learning_rate": 0.0006888933193084588, "loss": 0.5728, "step": 17295 }, { "epoch": 0.8782728991889936, "grad_norm": 0.03320203500452083, "learning_rate": 0.0006886881915043905, "loss": 0.5478, "step": 17300 }, { "epoch": 0.8785267352870252, "grad_norm": 0.04308556659587886, "learning_rate": 0.0006884830266617711, "loss": 0.5476, "step": 17305 }, { "epoch": 0.8787805713850567, "grad_norm": 0.04744726865498166, "learning_rate": 0.0006882778248208737, "loss": 0.5283, "step": 17310 }, { "epoch": 0.8790344074830881, "grad_norm": 0.04730302242145846, "learning_rate": 0.000688072586021978, "loss": 0.5921, "step": 17315 }, { "epoch": 0.8792882435811197, "grad_norm": 0.02844248850997349, "learning_rate": 0.0006878673103053717, "loss": 0.5359, "step": 17320 }, { "epoch": 0.8795420796791512, "grad_norm": 0.02665626078133169, "learning_rate": 0.0006876619977113492, "loss": 0.5729, "step": 17325 }, { "epoch": 0.8797959157771826, "grad_norm": 0.05159803146684941, "learning_rate": 0.0006874566482802125, "loss": 0.5683, "step": 17330 }, { "epoch": 0.8800497518752142, "grad_norm": 0.028811741517915785, "learning_rate": 0.0006872512620522707, "loss": 0.577, "step": 17335 }, { "epoch": 0.8803035879732457, "grad_norm": 0.032289986764814115, "learning_rate": 0.0006870458390678397, "loss": 0.5473, "step": 17340 }, { "epoch": 0.8805574240712771, "grad_norm": 0.026018555624320636, "learning_rate": 0.0006868403793672435, "loss": 0.5738, "step": 17345 }, { "epoch": 0.8808112601693087, "grad_norm": 0.02320427721745887, "learning_rate": 0.0006866348829908125, "loss": 0.5572, "step": 17350 }, { "epoch": 0.8810650962673402, "grad_norm": 0.028063747667248035, "learning_rate": 0.0006864293499788849, "loss": 0.5831, "step": 17355 }, { "epoch": 0.8813189323653717, "grad_norm": 0.026346028747411987, "learning_rate": 0.0006862237803718054, "loss": 0.5708, "step": 17360 }, { "epoch": 0.8815727684634032, "grad_norm": 0.023583638229848918, "learning_rate": 0.0006860181742099266, "loss": 0.5457, "step": 17365 }, { "epoch": 0.8818266045614347, "grad_norm": 0.03017396130009387, "learning_rate": 0.0006858125315336079, "loss": 0.5551, "step": 17370 }, { "epoch": 0.8820804406594662, "grad_norm": 0.04360521104064648, "learning_rate": 0.0006856068523832158, "loss": 0.5653, "step": 17375 }, { "epoch": 0.8823342767574976, "grad_norm": 0.02249120515299714, "learning_rate": 0.0006854011367991243, "loss": 0.521, "step": 17380 }, { "epoch": 0.8825881128555292, "grad_norm": 0.03244681552967337, "learning_rate": 0.0006851953848217142, "loss": 0.5652, "step": 17385 }, { "epoch": 0.8828419489535607, "grad_norm": 0.026484829488799315, "learning_rate": 0.0006849895964913737, "loss": 0.5456, "step": 17390 }, { "epoch": 0.8830957850515921, "grad_norm": 0.027157693421820065, "learning_rate": 0.0006847837718484977, "loss": 0.5214, "step": 17395 }, { "epoch": 0.8833496211496237, "grad_norm": 0.07609582956202243, "learning_rate": 0.0006845779109334891, "loss": 0.545, "step": 17400 }, { "epoch": 0.8836034572476552, "grad_norm": 0.03239068638126852, "learning_rate": 0.0006843720137867569, "loss": 0.5713, "step": 17405 }, { "epoch": 0.8838572933456867, "grad_norm": 0.025149606981124856, "learning_rate": 0.0006841660804487179, "loss": 0.5704, "step": 17410 }, { "epoch": 0.8841111294437182, "grad_norm": 0.023833536556687185, "learning_rate": 0.0006839601109597957, "loss": 0.5377, "step": 17415 }, { "epoch": 0.8843649655417497, "grad_norm": 0.028450371295118047, "learning_rate": 0.0006837541053604213, "loss": 0.5916, "step": 17420 }, { "epoch": 0.8846188016397812, "grad_norm": 0.033176344085271356, "learning_rate": 0.0006835480636910321, "loss": 0.5335, "step": 17425 }, { "epoch": 0.8848726377378127, "grad_norm": 0.02880029477938549, "learning_rate": 0.0006833419859920736, "loss": 0.5554, "step": 17430 }, { "epoch": 0.8851264738358442, "grad_norm": 0.029522099650815575, "learning_rate": 0.0006831358723039976, "loss": 0.5704, "step": 17435 }, { "epoch": 0.8853803099338757, "grad_norm": 0.04615263077662782, "learning_rate": 0.000682929722667263, "loss": 0.5362, "step": 17440 }, { "epoch": 0.8856341460319072, "grad_norm": 0.48427634793861496, "learning_rate": 0.0006827235371223362, "loss": 0.5819, "step": 17445 }, { "epoch": 0.8858879821299387, "grad_norm": 0.030739394979893288, "learning_rate": 0.0006825173157096903, "loss": 0.5927, "step": 17450 }, { "epoch": 0.8861418182279702, "grad_norm": 0.03176037900151184, "learning_rate": 0.0006823110584698055, "loss": 0.5528, "step": 17455 }, { "epoch": 0.8863956543260016, "grad_norm": 0.026725514935062814, "learning_rate": 0.0006821047654431691, "loss": 0.5695, "step": 17460 }, { "epoch": 0.8866494904240332, "grad_norm": 0.02741254348251184, "learning_rate": 0.0006818984366702754, "loss": 0.5361, "step": 17465 }, { "epoch": 0.8869033265220647, "grad_norm": 0.06467389710456385, "learning_rate": 0.0006816920721916259, "loss": 0.5637, "step": 17470 }, { "epoch": 0.8871571626200963, "grad_norm": 0.05652090655931264, "learning_rate": 0.0006814856720477285, "loss": 0.5662, "step": 17475 }, { "epoch": 0.8874109987181277, "grad_norm": 0.025971100514922223, "learning_rate": 0.0006812792362790987, "loss": 0.5662, "step": 17480 }, { "epoch": 0.8876648348161592, "grad_norm": 0.037312238893479364, "learning_rate": 0.0006810727649262591, "loss": 0.5216, "step": 17485 }, { "epoch": 0.8879186709141907, "grad_norm": 0.03529026683625101, "learning_rate": 0.0006808662580297385, "loss": 0.5754, "step": 17490 }, { "epoch": 0.8881725070122222, "grad_norm": 0.052177166996155586, "learning_rate": 0.0006806597156300736, "loss": 0.5862, "step": 17495 }, { "epoch": 0.8884263431102537, "grad_norm": 0.034917141903712645, "learning_rate": 0.0006804531377678074, "loss": 0.5504, "step": 17500 }, { "epoch": 0.8886801792082852, "grad_norm": 0.035789909497893145, "learning_rate": 0.0006802465244834901, "loss": 0.523, "step": 17505 }, { "epoch": 0.8889340153063167, "grad_norm": 0.02633496325094234, "learning_rate": 0.000680039875817679, "loss": 0.5773, "step": 17510 }, { "epoch": 0.8891878514043482, "grad_norm": 0.1027046920453322, "learning_rate": 0.0006798331918109381, "loss": 0.5328, "step": 17515 }, { "epoch": 0.8894416875023797, "grad_norm": 0.03431649346909258, "learning_rate": 0.0006796264725038387, "loss": 0.584, "step": 17520 }, { "epoch": 0.8896955236004113, "grad_norm": 0.02609942274925054, "learning_rate": 0.0006794197179369584, "loss": 0.5586, "step": 17525 }, { "epoch": 0.8899493596984427, "grad_norm": 0.03430169189866086, "learning_rate": 0.0006792129281508821, "loss": 0.546, "step": 17530 }, { "epoch": 0.8902031957964742, "grad_norm": 0.024928043771357686, "learning_rate": 0.0006790061031862018, "loss": 0.5435, "step": 17535 }, { "epoch": 0.8904570318945058, "grad_norm": 0.028688456933202128, "learning_rate": 0.0006787992430835161, "loss": 0.5732, "step": 17540 }, { "epoch": 0.8907108679925372, "grad_norm": 0.042830556579080276, "learning_rate": 0.0006785923478834308, "loss": 0.5484, "step": 17545 }, { "epoch": 0.8909647040905687, "grad_norm": 0.035751869994615854, "learning_rate": 0.0006783854176265582, "loss": 0.5575, "step": 17550 }, { "epoch": 0.8912185401886003, "grad_norm": 0.028210300266072946, "learning_rate": 0.0006781784523535177, "loss": 0.5915, "step": 17555 }, { "epoch": 0.8914723762866317, "grad_norm": 0.03715659125320336, "learning_rate": 0.0006779714521049356, "loss": 0.5359, "step": 17560 }, { "epoch": 0.8917262123846632, "grad_norm": 0.03583841060325269, "learning_rate": 0.000677764416921445, "loss": 0.5624, "step": 17565 }, { "epoch": 0.8919800484826947, "grad_norm": 0.03003577944086306, "learning_rate": 0.000677557346843686, "loss": 0.5508, "step": 17570 }, { "epoch": 0.8922338845807263, "grad_norm": 0.03580088688841066, "learning_rate": 0.0006773502419123051, "loss": 0.5862, "step": 17575 }, { "epoch": 0.8924877206787577, "grad_norm": 0.03560540588373299, "learning_rate": 0.0006771431021679561, "loss": 0.5623, "step": 17580 }, { "epoch": 0.8927415567767892, "grad_norm": 0.029606401551905418, "learning_rate": 0.0006769359276512998, "loss": 0.5346, "step": 17585 }, { "epoch": 0.8929953928748208, "grad_norm": 0.034532543727339556, "learning_rate": 0.0006767287184030031, "loss": 0.5903, "step": 17590 }, { "epoch": 0.8932492289728522, "grad_norm": 0.029377320096539063, "learning_rate": 0.0006765214744637402, "loss": 0.5723, "step": 17595 }, { "epoch": 0.8935030650708837, "grad_norm": 0.030743555347065143, "learning_rate": 0.0006763141958741924, "loss": 0.5641, "step": 17600 }, { "epoch": 0.8937569011689153, "grad_norm": 0.0409722689803858, "learning_rate": 0.0006761068826750472, "loss": 0.5476, "step": 17605 }, { "epoch": 0.8940107372669467, "grad_norm": 0.029101547286446497, "learning_rate": 0.0006758995349069992, "loss": 0.5678, "step": 17610 }, { "epoch": 0.8942645733649782, "grad_norm": 0.026075517265298196, "learning_rate": 0.0006756921526107495, "loss": 0.5715, "step": 17615 }, { "epoch": 0.8945184094630098, "grad_norm": 0.04138837555118283, "learning_rate": 0.0006754847358270066, "loss": 0.5776, "step": 17620 }, { "epoch": 0.8947722455610413, "grad_norm": 0.02499298797537694, "learning_rate": 0.0006752772845964852, "loss": 0.5813, "step": 17625 }, { "epoch": 0.8950260816590727, "grad_norm": 0.02920872117024721, "learning_rate": 0.0006750697989599068, "loss": 0.567, "step": 17630 }, { "epoch": 0.8952799177571042, "grad_norm": 0.024311481377263307, "learning_rate": 0.0006748622789580001, "loss": 0.5472, "step": 17635 }, { "epoch": 0.8955337538551358, "grad_norm": 0.022553193192463565, "learning_rate": 0.0006746547246315, "loss": 0.5839, "step": 17640 }, { "epoch": 0.8957875899531672, "grad_norm": 0.025203410801119673, "learning_rate": 0.0006744471360211484, "loss": 0.5593, "step": 17645 }, { "epoch": 0.8960414260511987, "grad_norm": 0.06256849888992963, "learning_rate": 0.0006742395131676942, "loss": 0.5406, "step": 17650 }, { "epoch": 0.8962952621492303, "grad_norm": 0.02521288112975126, "learning_rate": 0.0006740318561118922, "loss": 0.5682, "step": 17655 }, { "epoch": 0.8965490982472617, "grad_norm": 0.3098613025846971, "learning_rate": 0.0006738241648945049, "loss": 0.5492, "step": 17660 }, { "epoch": 0.8968029343452932, "grad_norm": 0.037730912321007094, "learning_rate": 0.0006736164395563009, "loss": 0.59, "step": 17665 }, { "epoch": 0.8970567704433248, "grad_norm": 0.034261701601160224, "learning_rate": 0.0006734086801380556, "loss": 0.549, "step": 17670 }, { "epoch": 0.8973106065413562, "grad_norm": 0.023592660379858885, "learning_rate": 0.0006732008866805512, "loss": 0.5566, "step": 17675 }, { "epoch": 0.8975644426393877, "grad_norm": 0.03082538341771991, "learning_rate": 0.0006729930592245764, "loss": 0.5699, "step": 17680 }, { "epoch": 0.8978182787374193, "grad_norm": 0.028133490847564162, "learning_rate": 0.000672785197810927, "loss": 0.5445, "step": 17685 }, { "epoch": 0.8980721148354508, "grad_norm": 0.03384650095501219, "learning_rate": 0.0006725773024804047, "loss": 0.572, "step": 17690 }, { "epoch": 0.8983259509334822, "grad_norm": 0.029532460122143937, "learning_rate": 0.0006723693732738188, "loss": 0.5905, "step": 17695 }, { "epoch": 0.8985797870315138, "grad_norm": 0.04184269802969518, "learning_rate": 0.0006721614102319845, "loss": 0.595, "step": 17700 }, { "epoch": 0.8988336231295453, "grad_norm": 0.024257335369379907, "learning_rate": 0.0006719534133957237, "loss": 0.5904, "step": 17705 }, { "epoch": 0.8990874592275767, "grad_norm": 0.8407754555100807, "learning_rate": 0.0006717453828058655, "loss": 0.5799, "step": 17710 }, { "epoch": 0.8993412953256082, "grad_norm": 0.03889746059406645, "learning_rate": 0.0006715373185032452, "loss": 0.5636, "step": 17715 }, { "epoch": 0.8995951314236398, "grad_norm": 0.03812093829559977, "learning_rate": 0.0006713292205287047, "loss": 0.5268, "step": 17720 }, { "epoch": 0.8998489675216712, "grad_norm": 0.031502538038959534, "learning_rate": 0.0006711210889230926, "loss": 0.5409, "step": 17725 }, { "epoch": 0.9001028036197027, "grad_norm": 0.04019619925278292, "learning_rate": 0.0006709129237272642, "loss": 0.5921, "step": 17730 }, { "epoch": 0.9003566397177343, "grad_norm": 0.029122633192066108, "learning_rate": 0.0006707047249820813, "loss": 0.5613, "step": 17735 }, { "epoch": 0.9006104758157658, "grad_norm": 0.04486417536535739, "learning_rate": 0.0006704964927284119, "loss": 0.5838, "step": 17740 }, { "epoch": 0.9008643119137972, "grad_norm": 0.035238015488419025, "learning_rate": 0.0006702882270071313, "loss": 0.56, "step": 17745 }, { "epoch": 0.9011181480118288, "grad_norm": 0.047574854581445294, "learning_rate": 0.0006700799278591212, "loss": 0.5946, "step": 17750 }, { "epoch": 0.9013719841098603, "grad_norm": 0.029209760126982294, "learning_rate": 0.0006698715953252693, "loss": 0.5785, "step": 17755 }, { "epoch": 0.9016258202078917, "grad_norm": 0.04635207472974123, "learning_rate": 0.0006696632294464704, "loss": 0.6096, "step": 17760 }, { "epoch": 0.9018796563059233, "grad_norm": 0.04669566058976855, "learning_rate": 0.0006694548302636256, "loss": 0.5996, "step": 17765 }, { "epoch": 0.9021334924039548, "grad_norm": 0.047565163368266106, "learning_rate": 0.0006692463978176428, "loss": 0.5887, "step": 17770 }, { "epoch": 0.9023873285019862, "grad_norm": 0.04629943424684873, "learning_rate": 0.0006690379321494361, "loss": 0.5573, "step": 17775 }, { "epoch": 0.9026411646000178, "grad_norm": 0.02436241562205948, "learning_rate": 0.0006688294332999263, "loss": 0.5669, "step": 17780 }, { "epoch": 0.9028950006980493, "grad_norm": 0.04739393205079893, "learning_rate": 0.0006686209013100407, "loss": 0.5763, "step": 17785 }, { "epoch": 0.9031488367960808, "grad_norm": 0.06024300726679382, "learning_rate": 0.0006684123362207131, "loss": 0.5468, "step": 17790 }, { "epoch": 0.9034026728941122, "grad_norm": 0.024550757216584715, "learning_rate": 0.0006682037380728839, "loss": 0.5404, "step": 17795 }, { "epoch": 0.9036565089921438, "grad_norm": 0.029738050538604267, "learning_rate": 0.0006679951069074995, "loss": 0.5511, "step": 17800 }, { "epoch": 0.9039103450901753, "grad_norm": 0.031231955912328697, "learning_rate": 0.0006677864427655135, "loss": 0.5778, "step": 17805 }, { "epoch": 0.9041641811882067, "grad_norm": 0.12238924641223849, "learning_rate": 0.0006675777456878855, "loss": 0.5493, "step": 17810 }, { "epoch": 0.9044180172862383, "grad_norm": 0.04489807461665631, "learning_rate": 0.0006673690157155818, "loss": 0.5639, "step": 17815 }, { "epoch": 0.9046718533842698, "grad_norm": 0.040984958114977495, "learning_rate": 0.000667160252889575, "loss": 0.5693, "step": 17820 }, { "epoch": 0.9049256894823012, "grad_norm": 0.0299007750606263, "learning_rate": 0.0006669514572508441, "loss": 0.5359, "step": 17825 }, { "epoch": 0.9051795255803328, "grad_norm": 0.02649079224122146, "learning_rate": 0.0006667426288403749, "loss": 0.5571, "step": 17830 }, { "epoch": 0.9054333616783643, "grad_norm": 0.0491849427073363, "learning_rate": 0.000666533767699159, "loss": 0.5594, "step": 17835 }, { "epoch": 0.9056871977763958, "grad_norm": 0.028577497869547455, "learning_rate": 0.0006663248738681951, "loss": 0.5776, "step": 17840 }, { "epoch": 0.9059410338744273, "grad_norm": 0.029544273344263332, "learning_rate": 0.0006661159473884879, "loss": 0.5156, "step": 17845 }, { "epoch": 0.9061948699724588, "grad_norm": 0.07785921945262643, "learning_rate": 0.0006659069883010487, "loss": 0.6146, "step": 17850 }, { "epoch": 0.9064487060704903, "grad_norm": 0.04916615480146852, "learning_rate": 0.0006656979966468949, "loss": 0.621, "step": 17855 }, { "epoch": 0.9067025421685218, "grad_norm": 0.04266402533079821, "learning_rate": 0.0006654889724670509, "loss": 0.5852, "step": 17860 }, { "epoch": 0.9069563782665533, "grad_norm": 0.03478646050112459, "learning_rate": 0.0006652799158025466, "loss": 0.5586, "step": 17865 }, { "epoch": 0.9072102143645848, "grad_norm": 0.03269211204236239, "learning_rate": 0.0006650708266944194, "loss": 0.5417, "step": 17870 }, { "epoch": 0.9074640504626162, "grad_norm": 0.05926828968618208, "learning_rate": 0.000664861705183712, "loss": 0.5573, "step": 17875 }, { "epoch": 0.9077178865606478, "grad_norm": 1.137998555724827, "learning_rate": 0.0006646525513114741, "loss": 0.7911, "step": 17880 }, { "epoch": 0.9079717226586793, "grad_norm": 0.15186991344137019, "learning_rate": 0.0006644433651187613, "loss": 0.6537, "step": 17885 }, { "epoch": 0.9082255587567107, "grad_norm": 0.10837397788584424, "learning_rate": 0.0006642341466466363, "loss": 0.5914, "step": 17890 }, { "epoch": 0.9084793948547423, "grad_norm": 0.04567223182586979, "learning_rate": 0.0006640248959361671, "loss": 0.5919, "step": 17895 }, { "epoch": 0.9087332309527738, "grad_norm": 0.0424493699036947, "learning_rate": 0.000663815613028429, "loss": 0.5887, "step": 17900 }, { "epoch": 0.9089870670508053, "grad_norm": 0.02914392670442529, "learning_rate": 0.0006636062979645029, "loss": 0.6148, "step": 17905 }, { "epoch": 0.9092409031488368, "grad_norm": 0.03378335007634043, "learning_rate": 0.0006633969507854764, "loss": 0.5855, "step": 17910 }, { "epoch": 0.9094947392468683, "grad_norm": 0.045172687759939864, "learning_rate": 0.0006631875715324433, "loss": 0.5607, "step": 17915 }, { "epoch": 0.9097485753448998, "grad_norm": 0.03534790871616725, "learning_rate": 0.0006629781602465039, "loss": 0.5642, "step": 17920 }, { "epoch": 0.9100024114429313, "grad_norm": 0.031709338585968895, "learning_rate": 0.0006627687169687643, "loss": 0.5685, "step": 17925 }, { "epoch": 0.9102562475409628, "grad_norm": 0.02405831853389006, "learning_rate": 0.0006625592417403372, "loss": 0.5837, "step": 17930 }, { "epoch": 0.9105100836389943, "grad_norm": 0.02887059553974142, "learning_rate": 0.0006623497346023419, "loss": 0.6114, "step": 17935 }, { "epoch": 0.9107639197370258, "grad_norm": 0.026405583798025718, "learning_rate": 0.0006621401955959029, "loss": 0.5811, "step": 17940 }, { "epoch": 0.9110177558350573, "grad_norm": 0.030545139437636752, "learning_rate": 0.0006619306247621525, "loss": 0.5621, "step": 17945 }, { "epoch": 0.9112715919330888, "grad_norm": 0.03844677188791095, "learning_rate": 0.0006617210221422278, "loss": 0.5567, "step": 17950 }, { "epoch": 0.9115254280311204, "grad_norm": 0.025348216932646248, "learning_rate": 0.0006615113877772729, "loss": 0.5636, "step": 17955 }, { "epoch": 0.9117792641291518, "grad_norm": 0.02747141569387044, "learning_rate": 0.0006613017217084382, "loss": 0.58, "step": 17960 }, { "epoch": 0.9120331002271833, "grad_norm": 0.02571958303044386, "learning_rate": 0.00066109202397688, "loss": 0.569, "step": 17965 }, { "epoch": 0.9122869363252148, "grad_norm": 0.02567934522052075, "learning_rate": 0.0006608822946237607, "loss": 0.5563, "step": 17970 }, { "epoch": 0.9125407724232463, "grad_norm": 0.02982421448410097, "learning_rate": 0.0006606725336902493, "loss": 0.5524, "step": 17975 }, { "epoch": 0.9127946085212778, "grad_norm": 0.02603891319299408, "learning_rate": 0.0006604627412175209, "loss": 0.5962, "step": 17980 }, { "epoch": 0.9130484446193093, "grad_norm": 0.030676874285685652, "learning_rate": 0.0006602529172467564, "loss": 0.5556, "step": 17985 }, { "epoch": 0.9133022807173408, "grad_norm": 0.028691123229749194, "learning_rate": 0.0006600430618191436, "loss": 0.559, "step": 17990 }, { "epoch": 0.9135561168153723, "grad_norm": 0.022843921568002558, "learning_rate": 0.0006598331749758759, "loss": 0.5935, "step": 17995 }, { "epoch": 0.9138099529134038, "grad_norm": 0.026576251107490675, "learning_rate": 0.0006596232567581531, "loss": 0.5514, "step": 18000 }, { "epoch": 0.9140637890114354, "grad_norm": 0.029295378037257274, "learning_rate": 0.0006594133072071809, "loss": 0.5623, "step": 18005 }, { "epoch": 0.9143176251094668, "grad_norm": 0.026342637202697045, "learning_rate": 0.0006592033263641715, "loss": 0.5377, "step": 18010 }, { "epoch": 0.9145714612074983, "grad_norm": 0.0268948168872539, "learning_rate": 0.000658993314270343, "loss": 0.5734, "step": 18015 }, { "epoch": 0.9148252973055299, "grad_norm": 0.03228314048721056, "learning_rate": 0.00065878327096692, "loss": 0.5695, "step": 18020 }, { "epoch": 0.9150791334035613, "grad_norm": 0.03776376655159827, "learning_rate": 0.0006585731964951327, "loss": 0.5676, "step": 18025 }, { "epoch": 0.9153329695015928, "grad_norm": 0.02713281422594516, "learning_rate": 0.0006583630908962178, "loss": 0.6121, "step": 18030 }, { "epoch": 0.9155868055996244, "grad_norm": 0.0313761277274463, "learning_rate": 0.0006581529542114178, "loss": 0.5656, "step": 18035 }, { "epoch": 0.9158406416976558, "grad_norm": 0.023551377190705478, "learning_rate": 0.0006579427864819817, "loss": 0.5859, "step": 18040 }, { "epoch": 0.9160944777956873, "grad_norm": 0.03213179076154911, "learning_rate": 0.0006577325877491641, "loss": 0.5385, "step": 18045 }, { "epoch": 0.9163483138937188, "grad_norm": 0.03309679333832961, "learning_rate": 0.0006575223580542263, "loss": 0.5686, "step": 18050 }, { "epoch": 0.9166021499917504, "grad_norm": 0.02292030032279284, "learning_rate": 0.0006573120974384351, "loss": 0.569, "step": 18055 }, { "epoch": 0.9168559860897818, "grad_norm": 0.02196534616948093, "learning_rate": 0.0006571018059430638, "loss": 0.5583, "step": 18060 }, { "epoch": 0.9171098221878133, "grad_norm": 0.03751186679215072, "learning_rate": 0.0006568914836093913, "loss": 0.5632, "step": 18065 }, { "epoch": 0.9173636582858449, "grad_norm": 0.039073154648674925, "learning_rate": 0.000656681130478703, "loss": 0.5875, "step": 18070 }, { "epoch": 0.9176174943838763, "grad_norm": 0.03016782826811619, "learning_rate": 0.0006564707465922901, "loss": 0.5562, "step": 18075 }, { "epoch": 0.9178713304819078, "grad_norm": 0.44620351828974875, "learning_rate": 0.0006562603319914502, "loss": 0.5836, "step": 18080 }, { "epoch": 0.9181251665799394, "grad_norm": 0.03733778783819997, "learning_rate": 0.0006560498867174862, "loss": 0.5544, "step": 18085 }, { "epoch": 0.9183790026779708, "grad_norm": 0.04185835413154692, "learning_rate": 0.0006558394108117078, "loss": 0.5843, "step": 18090 }, { "epoch": 0.9186328387760023, "grad_norm": 0.02584084106316554, "learning_rate": 0.00065562890431543, "loss": 0.545, "step": 18095 }, { "epoch": 0.9188866748740339, "grad_norm": 0.026170167620165836, "learning_rate": 0.0006554183672699747, "loss": 0.5684, "step": 18100 }, { "epoch": 0.9191405109720653, "grad_norm": 0.028617521912761606, "learning_rate": 0.0006552077997166686, "loss": 0.5778, "step": 18105 }, { "epoch": 0.9193943470700968, "grad_norm": 0.024764375605303438, "learning_rate": 0.0006549972016968457, "loss": 0.5668, "step": 18110 }, { "epoch": 0.9196481831681284, "grad_norm": 0.04224569793867106, "learning_rate": 0.0006547865732518451, "loss": 0.5563, "step": 18115 }, { "epoch": 0.9199020192661599, "grad_norm": 0.024423393777903805, "learning_rate": 0.0006545759144230122, "loss": 0.5272, "step": 18120 }, { "epoch": 0.9201558553641913, "grad_norm": 0.03562323884403274, "learning_rate": 0.0006543652252516978, "loss": 0.5497, "step": 18125 }, { "epoch": 0.9204096914622228, "grad_norm": 0.03532107692857155, "learning_rate": 0.0006541545057792597, "loss": 0.562, "step": 18130 }, { "epoch": 0.9206635275602544, "grad_norm": 0.02233461039237326, "learning_rate": 0.0006539437560470609, "loss": 0.5778, "step": 18135 }, { "epoch": 0.9209173636582858, "grad_norm": 0.036379888044198186, "learning_rate": 0.0006537329760964705, "loss": 0.5385, "step": 18140 }, { "epoch": 0.9211711997563173, "grad_norm": 0.031306097214398455, "learning_rate": 0.0006535221659688636, "loss": 0.5766, "step": 18145 }, { "epoch": 0.9214250358543489, "grad_norm": 0.032940875660410714, "learning_rate": 0.0006533113257056212, "loss": 0.5745, "step": 18150 }, { "epoch": 0.9216788719523803, "grad_norm": 0.028269439007305754, "learning_rate": 0.0006531004553481299, "loss": 0.5593, "step": 18155 }, { "epoch": 0.9219327080504118, "grad_norm": 0.025325938543204793, "learning_rate": 0.0006528895549377829, "loss": 0.5545, "step": 18160 }, { "epoch": 0.9221865441484434, "grad_norm": 0.025608019654732625, "learning_rate": 0.0006526786245159785, "loss": 0.5645, "step": 18165 }, { "epoch": 0.9224403802464749, "grad_norm": 0.03073301426221954, "learning_rate": 0.0006524676641241216, "loss": 0.5729, "step": 18170 }, { "epoch": 0.9226942163445063, "grad_norm": 0.02655926848711976, "learning_rate": 0.0006522566738036227, "loss": 0.5605, "step": 18175 }, { "epoch": 0.9229480524425379, "grad_norm": 0.030923379907562828, "learning_rate": 0.0006520456535958981, "loss": 0.5438, "step": 18180 }, { "epoch": 0.9232018885405694, "grad_norm": 0.02498118812708288, "learning_rate": 0.0006518346035423697, "loss": 0.5618, "step": 18185 }, { "epoch": 0.9234557246386008, "grad_norm": 0.03195096400109142, "learning_rate": 0.0006516235236844661, "loss": 0.5771, "step": 18190 }, { "epoch": 0.9237095607366324, "grad_norm": 0.025673518361398005, "learning_rate": 0.0006514124140636206, "loss": 0.5714, "step": 18195 }, { "epoch": 0.9239633968346639, "grad_norm": 0.023420267058336542, "learning_rate": 0.0006512012747212736, "loss": 0.5543, "step": 18200 }, { "epoch": 0.9242172329326953, "grad_norm": 0.035974375956175315, "learning_rate": 0.0006509901056988703, "loss": 0.5399, "step": 18205 }, { "epoch": 0.9244710690307268, "grad_norm": 0.026974281528685886, "learning_rate": 0.0006507789070378623, "loss": 0.5803, "step": 18210 }, { "epoch": 0.9247249051287584, "grad_norm": 0.03386779705440503, "learning_rate": 0.0006505676787797068, "loss": 0.5573, "step": 18215 }, { "epoch": 0.9249787412267899, "grad_norm": 0.02994069343757094, "learning_rate": 0.0006503564209658668, "loss": 0.5631, "step": 18220 }, { "epoch": 0.9252325773248213, "grad_norm": 0.025204798762410694, "learning_rate": 0.0006501451336378111, "loss": 0.5778, "step": 18225 }, { "epoch": 0.9254864134228529, "grad_norm": 0.02844201792241884, "learning_rate": 0.0006499338168370145, "loss": 0.5494, "step": 18230 }, { "epoch": 0.9257402495208844, "grad_norm": 0.027272927584755857, "learning_rate": 0.0006497224706049574, "loss": 0.5513, "step": 18235 }, { "epoch": 0.9259940856189158, "grad_norm": 0.022625222413103843, "learning_rate": 0.000649511094983126, "loss": 0.5454, "step": 18240 }, { "epoch": 0.9262479217169474, "grad_norm": 0.025965109273752868, "learning_rate": 0.0006492996900130122, "loss": 0.5521, "step": 18245 }, { "epoch": 0.9265017578149789, "grad_norm": 0.023734960990954648, "learning_rate": 0.0006490882557361138, "loss": 0.569, "step": 18250 }, { "epoch": 0.9267555939130103, "grad_norm": 0.029694319744587402, "learning_rate": 0.0006488767921939344, "loss": 0.5544, "step": 18255 }, { "epoch": 0.9270094300110419, "grad_norm": 0.027522631414110266, "learning_rate": 0.0006486652994279832, "loss": 0.5191, "step": 18260 }, { "epoch": 0.9272632661090734, "grad_norm": 0.0213560260774621, "learning_rate": 0.000648453777479775, "loss": 0.5602, "step": 18265 }, { "epoch": 0.9275171022071049, "grad_norm": 0.027648563843928633, "learning_rate": 0.0006482422263908305, "loss": 0.5757, "step": 18270 }, { "epoch": 0.9277709383051364, "grad_norm": 0.024346877510486684, "learning_rate": 0.0006480306462026765, "loss": 0.5502, "step": 18275 }, { "epoch": 0.9280247744031679, "grad_norm": 0.025987941627693693, "learning_rate": 0.0006478190369568447, "loss": 0.572, "step": 18280 }, { "epoch": 0.9282786105011994, "grad_norm": 0.03607494520553649, "learning_rate": 0.0006476073986948731, "loss": 0.5474, "step": 18285 }, { "epoch": 0.9285324465992308, "grad_norm": 0.04133198513293862, "learning_rate": 0.0006473957314583053, "loss": 0.5646, "step": 18290 }, { "epoch": 0.9287862826972624, "grad_norm": 0.02356878697506822, "learning_rate": 0.0006471840352886906, "loss": 0.5555, "step": 18295 }, { "epoch": 0.9290401187952939, "grad_norm": 0.026719448119602498, "learning_rate": 0.0006469723102275835, "loss": 0.5478, "step": 18300 }, { "epoch": 0.9292939548933253, "grad_norm": 0.023824789207588595, "learning_rate": 0.000646760556316545, "loss": 0.5686, "step": 18305 }, { "epoch": 0.9295477909913569, "grad_norm": 0.02504122878294649, "learning_rate": 0.0006465487735971414, "loss": 0.5822, "step": 18310 }, { "epoch": 0.9298016270893884, "grad_norm": 0.025465279303564035, "learning_rate": 0.000646336962110944, "loss": 0.5391, "step": 18315 }, { "epoch": 0.9300554631874198, "grad_norm": 0.024053916899900416, "learning_rate": 0.0006461251218995309, "loss": 0.5812, "step": 18320 }, { "epoch": 0.9303092992854514, "grad_norm": 0.023988031306641153, "learning_rate": 0.0006459132530044851, "loss": 0.5653, "step": 18325 }, { "epoch": 0.9305631353834829, "grad_norm": 0.033028824504018735, "learning_rate": 0.0006457013554673954, "loss": 0.529, "step": 18330 }, { "epoch": 0.9308169714815144, "grad_norm": 0.031328475615483485, "learning_rate": 0.0006454894293298563, "loss": 0.6023, "step": 18335 }, { "epoch": 0.9310708075795459, "grad_norm": 0.025458288041452327, "learning_rate": 0.0006452774746334677, "loss": 0.5946, "step": 18340 }, { "epoch": 0.9313246436775774, "grad_norm": 0.029766324738899293, "learning_rate": 0.0006450654914198354, "loss": 0.5466, "step": 18345 }, { "epoch": 0.9315784797756089, "grad_norm": 0.02517080042746789, "learning_rate": 0.0006448534797305704, "loss": 0.5717, "step": 18350 }, { "epoch": 0.9318323158736403, "grad_norm": 0.02380537403364461, "learning_rate": 0.0006446414396072899, "loss": 0.5402, "step": 18355 }, { "epoch": 0.9320861519716719, "grad_norm": 0.03236454209338435, "learning_rate": 0.0006444293710916161, "loss": 0.5468, "step": 18360 }, { "epoch": 0.9323399880697034, "grad_norm": 0.07009893831871468, "learning_rate": 0.000644217274225177, "loss": 0.5848, "step": 18365 }, { "epoch": 0.9325938241677348, "grad_norm": 0.03355070794386776, "learning_rate": 0.000644005149049606, "loss": 0.5628, "step": 18370 }, { "epoch": 0.9328476602657664, "grad_norm": 0.04417035285010824, "learning_rate": 0.0006437929956065426, "loss": 0.5699, "step": 18375 }, { "epoch": 0.9331014963637979, "grad_norm": 0.025686169691077572, "learning_rate": 0.0006435808139376313, "loss": 0.5644, "step": 18380 }, { "epoch": 0.9333553324618294, "grad_norm": 0.02456795859417348, "learning_rate": 0.0006433686040845222, "loss": 0.5688, "step": 18385 }, { "epoch": 0.9336091685598609, "grad_norm": 0.027858399120178644, "learning_rate": 0.0006431563660888711, "loss": 0.5418, "step": 18390 }, { "epoch": 0.9338630046578924, "grad_norm": 0.031235035810057876, "learning_rate": 0.0006429440999923392, "loss": 0.5544, "step": 18395 }, { "epoch": 0.9341168407559239, "grad_norm": 0.03109567390073356, "learning_rate": 0.0006427318058365934, "loss": 0.5845, "step": 18400 }, { "epoch": 0.9343706768539554, "grad_norm": 0.023673410214207512, "learning_rate": 0.0006425194836633058, "loss": 0.586, "step": 18405 }, { "epoch": 0.9346245129519869, "grad_norm": 0.025651225561305648, "learning_rate": 0.0006423071335141543, "loss": 0.5602, "step": 18410 }, { "epoch": 0.9348783490500184, "grad_norm": 0.024917392040912964, "learning_rate": 0.0006420947554308223, "loss": 0.5642, "step": 18415 }, { "epoch": 0.9351321851480499, "grad_norm": 0.027718757843249613, "learning_rate": 0.0006418823494549983, "loss": 0.5735, "step": 18420 }, { "epoch": 0.9353860212460814, "grad_norm": 0.026445735170235473, "learning_rate": 0.0006416699156283768, "loss": 0.5166, "step": 18425 }, { "epoch": 0.9356398573441129, "grad_norm": 0.024653454471051918, "learning_rate": 0.0006414574539926574, "loss": 0.5693, "step": 18430 }, { "epoch": 0.9358936934421445, "grad_norm": 0.035138012803531134, "learning_rate": 0.0006412449645895452, "loss": 0.5398, "step": 18435 }, { "epoch": 0.9361475295401759, "grad_norm": 0.03579404088252254, "learning_rate": 0.0006410324474607507, "loss": 0.5867, "step": 18440 }, { "epoch": 0.9364013656382074, "grad_norm": 0.05203695866136957, "learning_rate": 0.0006408199026479901, "loss": 0.5798, "step": 18445 }, { "epoch": 0.936655201736239, "grad_norm": 0.03418141205359679, "learning_rate": 0.000640607330192985, "loss": 0.589, "step": 18450 }, { "epoch": 0.9369090378342704, "grad_norm": 0.029127603885871836, "learning_rate": 0.0006403947301374622, "loss": 0.5181, "step": 18455 }, { "epoch": 0.9371628739323019, "grad_norm": 0.025330901763078596, "learning_rate": 0.000640182102523154, "loss": 0.5358, "step": 18460 }, { "epoch": 0.9374167100303334, "grad_norm": 0.02939613956496011, "learning_rate": 0.0006399694473917981, "loss": 0.5448, "step": 18465 }, { "epoch": 0.9376705461283649, "grad_norm": 0.05802483149330994, "learning_rate": 0.0006397567647851377, "loss": 0.6088, "step": 18470 }, { "epoch": 0.9379243822263964, "grad_norm": 0.05969020572466202, "learning_rate": 0.0006395440547449214, "loss": 0.5606, "step": 18475 }, { "epoch": 0.9381782183244279, "grad_norm": 0.03090071852022251, "learning_rate": 0.000639331317312903, "loss": 0.5658, "step": 18480 }, { "epoch": 0.9384320544224595, "grad_norm": 0.026204359425882364, "learning_rate": 0.0006391185525308419, "loss": 0.5685, "step": 18485 }, { "epoch": 0.9386858905204909, "grad_norm": 0.03498061994333065, "learning_rate": 0.0006389057604405027, "loss": 0.5711, "step": 18490 }, { "epoch": 0.9389397266185224, "grad_norm": 0.02717442339063079, "learning_rate": 0.0006386929410836555, "loss": 0.5629, "step": 18495 }, { "epoch": 0.939193562716554, "grad_norm": 0.02682126084664184, "learning_rate": 0.0006384800945020755, "loss": 0.5369, "step": 18500 }, { "epoch": 0.9394473988145854, "grad_norm": 0.021417798921295972, "learning_rate": 0.0006382672207375438, "loss": 0.516, "step": 18505 }, { "epoch": 0.9397012349126169, "grad_norm": 0.0335394781975359, "learning_rate": 0.000638054319831846, "loss": 0.5536, "step": 18510 }, { "epoch": 0.9399550710106485, "grad_norm": 0.033542368843596015, "learning_rate": 0.0006378413918267737, "loss": 0.5476, "step": 18515 }, { "epoch": 0.9402089071086799, "grad_norm": 0.03314207140461258, "learning_rate": 0.0006376284367641237, "loss": 0.5405, "step": 18520 }, { "epoch": 0.9404627432067114, "grad_norm": 0.025257880645441717, "learning_rate": 0.0006374154546856978, "loss": 0.5727, "step": 18525 }, { "epoch": 0.940716579304743, "grad_norm": 0.03153823246454771, "learning_rate": 0.0006372024456333034, "loss": 0.576, "step": 18530 }, { "epoch": 0.9409704154027744, "grad_norm": 0.02414021611137436, "learning_rate": 0.0006369894096487533, "loss": 0.5608, "step": 18535 }, { "epoch": 0.9412242515008059, "grad_norm": 0.05820198130239536, "learning_rate": 0.0006367763467738652, "loss": 0.5498, "step": 18540 }, { "epoch": 0.9414780875988374, "grad_norm": 0.024189243271371962, "learning_rate": 0.0006365632570504622, "loss": 0.5532, "step": 18545 }, { "epoch": 0.941731923696869, "grad_norm": 0.025398202281715243, "learning_rate": 0.000636350140520373, "loss": 0.5623, "step": 18550 }, { "epoch": 0.9419857597949004, "grad_norm": 0.03514525492016054, "learning_rate": 0.0006361369972254313, "loss": 0.5561, "step": 18555 }, { "epoch": 0.9422395958929319, "grad_norm": 0.027198150257000973, "learning_rate": 0.0006359238272074757, "loss": 0.5701, "step": 18560 }, { "epoch": 0.9424934319909635, "grad_norm": 0.03242882219511253, "learning_rate": 0.0006357106305083509, "loss": 0.5913, "step": 18565 }, { "epoch": 0.9427472680889949, "grad_norm": 0.030898225159171074, "learning_rate": 0.000635497407169906, "loss": 0.5652, "step": 18570 }, { "epoch": 0.9430011041870264, "grad_norm": 0.023609900849047016, "learning_rate": 0.0006352841572339957, "loss": 0.5138, "step": 18575 }, { "epoch": 0.943254940285058, "grad_norm": 0.022870274202999735, "learning_rate": 0.0006350708807424803, "loss": 0.5139, "step": 18580 }, { "epoch": 0.9435087763830894, "grad_norm": 0.029704150434133162, "learning_rate": 0.0006348575777372244, "loss": 0.5581, "step": 18585 }, { "epoch": 0.9437626124811209, "grad_norm": 0.023261609889708712, "learning_rate": 0.0006346442482600986, "loss": 0.5341, "step": 18590 }, { "epoch": 0.9440164485791525, "grad_norm": 0.03251302144724121, "learning_rate": 0.0006344308923529784, "loss": 0.5377, "step": 18595 }, { "epoch": 0.944270284677184, "grad_norm": 0.04594564704845458, "learning_rate": 0.0006342175100577443, "loss": 0.5423, "step": 18600 }, { "epoch": 0.9445241207752154, "grad_norm": 0.03186429074273563, "learning_rate": 0.0006340041014162822, "loss": 0.5286, "step": 18605 }, { "epoch": 0.944777956873247, "grad_norm": 0.025609007700680392, "learning_rate": 0.0006337906664704836, "loss": 0.5456, "step": 18610 }, { "epoch": 0.9450317929712785, "grad_norm": 0.024349448360170327, "learning_rate": 0.0006335772052622441, "loss": 0.5522, "step": 18615 }, { "epoch": 0.9452856290693099, "grad_norm": 0.03254833741297758, "learning_rate": 0.0006333637178334655, "loss": 0.5441, "step": 18620 }, { "epoch": 0.9455394651673414, "grad_norm": 0.05909130904483525, "learning_rate": 0.0006331502042260541, "loss": 0.5259, "step": 18625 }, { "epoch": 0.945793301265373, "grad_norm": 0.05134549162232977, "learning_rate": 0.0006329366644819217, "loss": 0.5649, "step": 18630 }, { "epoch": 0.9460471373634044, "grad_norm": 0.037053648813884996, "learning_rate": 0.0006327230986429849, "loss": 0.5742, "step": 18635 }, { "epoch": 0.9463009734614359, "grad_norm": 0.03129953084498823, "learning_rate": 0.0006325095067511658, "loss": 0.5378, "step": 18640 }, { "epoch": 0.9465548095594675, "grad_norm": 0.03654618370634232, "learning_rate": 0.0006322958888483914, "loss": 0.5617, "step": 18645 }, { "epoch": 0.946808645657499, "grad_norm": 0.02422795139105715, "learning_rate": 0.0006320822449765937, "loss": 0.5494, "step": 18650 }, { "epoch": 0.9470624817555304, "grad_norm": 0.02329083288512449, "learning_rate": 0.00063186857517771, "loss": 0.5506, "step": 18655 }, { "epoch": 0.947316317853562, "grad_norm": 0.03354540198025808, "learning_rate": 0.0006316548794936827, "loss": 0.5735, "step": 18660 }, { "epoch": 0.9475701539515935, "grad_norm": 0.027348158991933078, "learning_rate": 0.0006314411579664591, "loss": 0.5586, "step": 18665 }, { "epoch": 0.9478239900496249, "grad_norm": 0.023645544124025912, "learning_rate": 0.0006312274106379916, "loss": 0.5575, "step": 18670 }, { "epoch": 0.9480778261476565, "grad_norm": 0.04599714324805517, "learning_rate": 0.0006310136375502379, "loss": 0.5205, "step": 18675 }, { "epoch": 0.948331662245688, "grad_norm": 0.02201034707176053, "learning_rate": 0.0006307998387451604, "loss": 0.5386, "step": 18680 }, { "epoch": 0.9485854983437194, "grad_norm": 0.02399542893990445, "learning_rate": 0.0006305860142647269, "loss": 0.5139, "step": 18685 }, { "epoch": 0.948839334441751, "grad_norm": 0.04397570627326147, "learning_rate": 0.0006303721641509101, "loss": 0.5376, "step": 18690 }, { "epoch": 0.9490931705397825, "grad_norm": 0.025883089049502404, "learning_rate": 0.0006301582884456877, "loss": 0.5458, "step": 18695 }, { "epoch": 0.949347006637814, "grad_norm": 0.026996067831657474, "learning_rate": 0.0006299443871910423, "loss": 0.5854, "step": 18700 }, { "epoch": 0.9496008427358454, "grad_norm": 0.02942829816892566, "learning_rate": 0.0006297304604289618, "loss": 0.5797, "step": 18705 }, { "epoch": 0.949854678833877, "grad_norm": 0.024736740711198107, "learning_rate": 0.0006295165082014387, "loss": 0.5299, "step": 18710 }, { "epoch": 0.9501085149319085, "grad_norm": 0.02630491688826239, "learning_rate": 0.0006293025305504712, "loss": 0.5528, "step": 18715 }, { "epoch": 0.9503623510299399, "grad_norm": 0.028244133702288142, "learning_rate": 0.0006290885275180615, "loss": 0.5188, "step": 18720 }, { "epoch": 0.9506161871279715, "grad_norm": 0.02421009162431851, "learning_rate": 0.0006288744991462177, "loss": 0.5593, "step": 18725 }, { "epoch": 0.950870023226003, "grad_norm": 0.023551000998892246, "learning_rate": 0.0006286604454769526, "loss": 0.5521, "step": 18730 }, { "epoch": 0.9511238593240344, "grad_norm": 0.024350962137287317, "learning_rate": 0.0006284463665522835, "loss": 0.59, "step": 18735 }, { "epoch": 0.951377695422066, "grad_norm": 0.08344993588030145, "learning_rate": 0.0006282322624142332, "loss": 0.571, "step": 18740 }, { "epoch": 0.9516315315200975, "grad_norm": 0.031834745293134896, "learning_rate": 0.0006280181331048293, "loss": 0.5855, "step": 18745 }, { "epoch": 0.9518853676181289, "grad_norm": 0.025834530453184186, "learning_rate": 0.0006278039786661042, "loss": 0.5814, "step": 18750 }, { "epoch": 0.9521392037161605, "grad_norm": 0.027296230433010688, "learning_rate": 0.0006275897991400956, "loss": 0.5759, "step": 18755 }, { "epoch": 0.952393039814192, "grad_norm": 0.06183118735204768, "learning_rate": 0.0006273755945688458, "loss": 0.5715, "step": 18760 }, { "epoch": 0.9526468759122235, "grad_norm": 0.026071457227966246, "learning_rate": 0.0006271613649944019, "loss": 0.5506, "step": 18765 }, { "epoch": 0.952900712010255, "grad_norm": 0.05151300719134269, "learning_rate": 0.000626947110458816, "loss": 0.5903, "step": 18770 }, { "epoch": 0.9531545481082865, "grad_norm": 0.029346199524142243, "learning_rate": 0.0006267328310041457, "loss": 0.5632, "step": 18775 }, { "epoch": 0.953408384206318, "grad_norm": 0.026410770850243227, "learning_rate": 0.0006265185266724526, "loss": 0.5699, "step": 18780 }, { "epoch": 0.9536622203043494, "grad_norm": 0.049041169261654256, "learning_rate": 0.0006263041975058035, "loss": 0.5605, "step": 18785 }, { "epoch": 0.953916056402381, "grad_norm": 0.0363002561476258, "learning_rate": 0.0006260898435462705, "loss": 0.5506, "step": 18790 }, { "epoch": 0.9541698925004125, "grad_norm": 0.022941890322864124, "learning_rate": 0.0006258754648359301, "loss": 0.5471, "step": 18795 }, { "epoch": 0.9544237285984439, "grad_norm": 0.025645292089913767, "learning_rate": 0.0006256610614168634, "loss": 0.5604, "step": 18800 }, { "epoch": 0.9546775646964755, "grad_norm": 0.022494488082290532, "learning_rate": 0.0006254466333311573, "loss": 0.5778, "step": 18805 }, { "epoch": 0.954931400794507, "grad_norm": 0.02606808639822887, "learning_rate": 0.0006252321806209024, "loss": 0.6058, "step": 18810 }, { "epoch": 0.9551852368925385, "grad_norm": 0.025745633241197222, "learning_rate": 0.0006250177033281952, "loss": 0.5883, "step": 18815 }, { "epoch": 0.95543907299057, "grad_norm": 0.033669145164535265, "learning_rate": 0.0006248032014951363, "loss": 0.5292, "step": 18820 }, { "epoch": 0.9556929090886015, "grad_norm": 0.03772741877989379, "learning_rate": 0.0006245886751638312, "loss": 0.5183, "step": 18825 }, { "epoch": 0.955946745186633, "grad_norm": 0.03475199734255405, "learning_rate": 0.0006243741243763906, "loss": 0.5497, "step": 18830 }, { "epoch": 0.9562005812846645, "grad_norm": 0.028007500754168836, "learning_rate": 0.0006241595491749297, "loss": 0.5348, "step": 18835 }, { "epoch": 0.956454417382696, "grad_norm": 0.03964976547186352, "learning_rate": 0.0006239449496015684, "loss": 0.5696, "step": 18840 }, { "epoch": 0.9567082534807275, "grad_norm": 0.029844026908865874, "learning_rate": 0.0006237303256984315, "loss": 0.5383, "step": 18845 }, { "epoch": 0.956962089578759, "grad_norm": 0.03293271336871704, "learning_rate": 0.0006235156775076488, "loss": 0.5653, "step": 18850 }, { "epoch": 0.9572159256767905, "grad_norm": 0.023764813247282635, "learning_rate": 0.0006233010050713546, "loss": 0.5474, "step": 18855 }, { "epoch": 0.957469761774822, "grad_norm": 0.02729058413216523, "learning_rate": 0.0006230863084316879, "loss": 0.546, "step": 18860 }, { "epoch": 0.9577235978728535, "grad_norm": 0.03048111721240167, "learning_rate": 0.0006228715876307928, "loss": 0.5302, "step": 18865 }, { "epoch": 0.957977433970885, "grad_norm": 0.024821623443136005, "learning_rate": 0.0006226568427108177, "loss": 0.5267, "step": 18870 }, { "epoch": 0.9582312700689165, "grad_norm": 0.02171005251341012, "learning_rate": 0.0006224420737139161, "loss": 0.5686, "step": 18875 }, { "epoch": 0.958485106166948, "grad_norm": 0.02438019118654564, "learning_rate": 0.0006222272806822463, "loss": 0.5566, "step": 18880 }, { "epoch": 0.9587389422649795, "grad_norm": 0.0223542750232972, "learning_rate": 0.0006220124636579704, "loss": 0.5438, "step": 18885 }, { "epoch": 0.958992778363011, "grad_norm": 0.03015303343884345, "learning_rate": 0.0006217976226832565, "loss": 0.5771, "step": 18890 }, { "epoch": 0.9592466144610425, "grad_norm": 0.025825991972978846, "learning_rate": 0.0006215827578002768, "loss": 0.5591, "step": 18895 }, { "epoch": 0.959500450559074, "grad_norm": 0.03547031836750829, "learning_rate": 0.0006213678690512081, "loss": 0.5608, "step": 18900 }, { "epoch": 0.9597542866571055, "grad_norm": 0.020995458669697365, "learning_rate": 0.0006211529564782319, "loss": 0.5428, "step": 18905 }, { "epoch": 0.960008122755137, "grad_norm": 0.02476217559421539, "learning_rate": 0.0006209380201235345, "loss": 0.5592, "step": 18910 }, { "epoch": 0.9602619588531686, "grad_norm": 0.023830491201295642, "learning_rate": 0.000620723060029307, "loss": 0.5601, "step": 18915 }, { "epoch": 0.9605157949512, "grad_norm": 0.024232475162824736, "learning_rate": 0.0006205080762377446, "loss": 0.5588, "step": 18920 }, { "epoch": 0.9607696310492315, "grad_norm": 0.02838429347591899, "learning_rate": 0.000620293068791048, "loss": 0.5365, "step": 18925 }, { "epoch": 0.961023467147263, "grad_norm": 0.022820427870362616, "learning_rate": 0.0006200780377314219, "loss": 0.5594, "step": 18930 }, { "epoch": 0.9612773032452945, "grad_norm": 0.03755731589801296, "learning_rate": 0.0006198629831010758, "loss": 0.5745, "step": 18935 }, { "epoch": 0.961531139343326, "grad_norm": 0.024014657854385738, "learning_rate": 0.0006196479049422239, "loss": 0.5418, "step": 18940 }, { "epoch": 0.9617849754413575, "grad_norm": 0.027051758292076112, "learning_rate": 0.0006194328032970848, "loss": 0.5438, "step": 18945 }, { "epoch": 0.962038811539389, "grad_norm": 0.023230927228254993, "learning_rate": 0.0006192176782078822, "loss": 0.5285, "step": 18950 }, { "epoch": 0.9622926476374205, "grad_norm": 0.02453226053942355, "learning_rate": 0.0006190025297168437, "loss": 0.5408, "step": 18955 }, { "epoch": 0.962546483735452, "grad_norm": 0.28109523504650163, "learning_rate": 0.0006187873578662024, "loss": 0.5413, "step": 18960 }, { "epoch": 0.9628003198334835, "grad_norm": 0.03123756702410563, "learning_rate": 0.0006185721626981949, "loss": 0.5555, "step": 18965 }, { "epoch": 0.963054155931515, "grad_norm": 0.031041861048312636, "learning_rate": 0.0006183569442550633, "loss": 0.6098, "step": 18970 }, { "epoch": 0.9633079920295465, "grad_norm": 0.031099393750686002, "learning_rate": 0.0006181417025790536, "loss": 0.5361, "step": 18975 }, { "epoch": 0.9635618281275781, "grad_norm": 0.02595781815514382, "learning_rate": 0.000617926437712417, "loss": 0.5539, "step": 18980 }, { "epoch": 0.9638156642256095, "grad_norm": 0.03639463677804149, "learning_rate": 0.0006177111496974087, "loss": 0.5472, "step": 18985 }, { "epoch": 0.964069500323641, "grad_norm": 0.02606363135734158, "learning_rate": 0.0006174958385762888, "loss": 0.5701, "step": 18990 }, { "epoch": 0.9643233364216726, "grad_norm": 0.04038231790604605, "learning_rate": 0.0006172805043913218, "loss": 0.5584, "step": 18995 }, { "epoch": 0.964577172519704, "grad_norm": 0.03184750465530715, "learning_rate": 0.0006170651471847766, "loss": 0.5571, "step": 19000 }, { "epoch": 0.9648310086177355, "grad_norm": 0.03705308485544799, "learning_rate": 0.0006168497669989268, "loss": 0.5572, "step": 19005 }, { "epoch": 0.965084844715767, "grad_norm": 0.03454894666757974, "learning_rate": 0.0006166343638760504, "loss": 0.6049, "step": 19010 }, { "epoch": 0.9653386808137985, "grad_norm": 0.03088369982680049, "learning_rate": 0.0006164189378584301, "loss": 0.5598, "step": 19015 }, { "epoch": 0.96559251691183, "grad_norm": 0.028758495609164273, "learning_rate": 0.0006162034889883529, "loss": 0.5474, "step": 19020 }, { "epoch": 0.9658463530098615, "grad_norm": 0.02750682584809894, "learning_rate": 0.0006159880173081103, "loss": 0.542, "step": 19025 }, { "epoch": 0.9661001891078931, "grad_norm": 0.03970699514356098, "learning_rate": 0.0006157725228599982, "loss": 0.5545, "step": 19030 }, { "epoch": 0.9663540252059245, "grad_norm": 0.022540600879443772, "learning_rate": 0.0006155570056863175, "loss": 0.5211, "step": 19035 }, { "epoch": 0.966607861303956, "grad_norm": 0.03517528202934328, "learning_rate": 0.0006153414658293725, "loss": 0.5706, "step": 19040 }, { "epoch": 0.9668616974019876, "grad_norm": 0.025523416998452415, "learning_rate": 0.0006151259033314733, "loss": 0.5416, "step": 19045 }, { "epoch": 0.967115533500019, "grad_norm": 0.024970335802673438, "learning_rate": 0.0006149103182349333, "loss": 0.5711, "step": 19050 }, { "epoch": 0.9673693695980505, "grad_norm": 0.04734193158993932, "learning_rate": 0.0006146947105820709, "loss": 0.5787, "step": 19055 }, { "epoch": 0.9676232056960821, "grad_norm": 0.025774746375266132, "learning_rate": 0.0006144790804152088, "loss": 0.5752, "step": 19060 }, { "epoch": 0.9678770417941135, "grad_norm": 0.035809770586720045, "learning_rate": 0.0006142634277766741, "loss": 0.5544, "step": 19065 }, { "epoch": 0.968130877892145, "grad_norm": 0.022096408564194946, "learning_rate": 0.0006140477527087983, "loss": 0.5594, "step": 19070 }, { "epoch": 0.9683847139901766, "grad_norm": 0.02285174641983169, "learning_rate": 0.0006138320552539175, "loss": 0.5824, "step": 19075 }, { "epoch": 0.9686385500882081, "grad_norm": 0.022639632770351623, "learning_rate": 0.000613616335454372, "loss": 0.5582, "step": 19080 }, { "epoch": 0.9688923861862395, "grad_norm": 0.02407222130765984, "learning_rate": 0.0006134005933525062, "loss": 0.5672, "step": 19085 }, { "epoch": 0.969146222284271, "grad_norm": 0.02688787209576168, "learning_rate": 0.0006131848289906696, "loss": 0.5335, "step": 19090 }, { "epoch": 0.9694000583823026, "grad_norm": 0.02409004068147683, "learning_rate": 0.0006129690424112156, "loss": 0.5962, "step": 19095 }, { "epoch": 0.969653894480334, "grad_norm": 0.024141956964642438, "learning_rate": 0.0006127532336565018, "loss": 0.5666, "step": 19100 }, { "epoch": 0.9699077305783655, "grad_norm": 0.022675312599314844, "learning_rate": 0.0006125374027688905, "loss": 0.551, "step": 19105 }, { "epoch": 0.9701615666763971, "grad_norm": 0.037996729698158885, "learning_rate": 0.0006123215497907484, "loss": 0.5652, "step": 19110 }, { "epoch": 0.9704154027744285, "grad_norm": 0.0272987646077441, "learning_rate": 0.0006121056747644461, "loss": 0.565, "step": 19115 }, { "epoch": 0.97066923887246, "grad_norm": 0.04267144316368038, "learning_rate": 0.000611889777732359, "loss": 0.5429, "step": 19120 }, { "epoch": 0.9709230749704916, "grad_norm": 0.02925397740944498, "learning_rate": 0.0006116738587368665, "loss": 0.5578, "step": 19125 }, { "epoch": 0.9711769110685231, "grad_norm": 0.023290731955223867, "learning_rate": 0.0006114579178203524, "loss": 0.5429, "step": 19130 }, { "epoch": 0.9714307471665545, "grad_norm": 0.03164983053501458, "learning_rate": 0.000611241955025205, "loss": 0.5405, "step": 19135 }, { "epoch": 0.9716845832645861, "grad_norm": 0.037520719870187604, "learning_rate": 0.0006110259703938165, "loss": 0.5724, "step": 19140 }, { "epoch": 0.9719384193626176, "grad_norm": 0.034312355122149724, "learning_rate": 0.0006108099639685837, "loss": 0.5474, "step": 19145 }, { "epoch": 0.972192255460649, "grad_norm": 0.02562062543260294, "learning_rate": 0.0006105939357919076, "loss": 0.5461, "step": 19150 }, { "epoch": 0.9724460915586806, "grad_norm": 0.02354699369047809, "learning_rate": 0.0006103778859061935, "loss": 0.545, "step": 19155 }, { "epoch": 0.9726999276567121, "grad_norm": 0.021930383911286492, "learning_rate": 0.0006101618143538508, "loss": 0.5573, "step": 19160 }, { "epoch": 0.9729537637547435, "grad_norm": 0.02583749368715139, "learning_rate": 0.0006099457211772933, "loss": 0.5544, "step": 19165 }, { "epoch": 0.973207599852775, "grad_norm": 0.022556631905887248, "learning_rate": 0.0006097296064189391, "loss": 0.5559, "step": 19170 }, { "epoch": 0.9734614359508066, "grad_norm": 0.03226579384353296, "learning_rate": 0.0006095134701212102, "loss": 0.5902, "step": 19175 }, { "epoch": 0.973715272048838, "grad_norm": 0.023654661735220267, "learning_rate": 0.0006092973123265334, "loss": 0.5331, "step": 19180 }, { "epoch": 0.9739691081468695, "grad_norm": 0.021164960310687785, "learning_rate": 0.0006090811330773392, "loss": 0.5349, "step": 19185 }, { "epoch": 0.9742229442449011, "grad_norm": 0.026505686211264395, "learning_rate": 0.0006088649324160626, "loss": 0.4964, "step": 19190 }, { "epoch": 0.9744767803429326, "grad_norm": 0.15964885508324672, "learning_rate": 0.0006086487103851426, "loss": 0.5849, "step": 19195 }, { "epoch": 0.974730616440964, "grad_norm": 0.031195162048445326, "learning_rate": 0.0006084324670270227, "loss": 0.5735, "step": 19200 }, { "epoch": 0.9749844525389956, "grad_norm": 0.024371095692914808, "learning_rate": 0.0006082162023841502, "loss": 0.5566, "step": 19205 }, { "epoch": 0.9752382886370271, "grad_norm": 0.027331834493339167, "learning_rate": 0.0006079999164989769, "loss": 0.551, "step": 19210 }, { "epoch": 0.9754921247350585, "grad_norm": 0.027798770270491248, "learning_rate": 0.0006077836094139586, "loss": 0.5576, "step": 19215 }, { "epoch": 0.9757459608330901, "grad_norm": 0.025968021890026095, "learning_rate": 0.0006075672811715553, "loss": 0.5549, "step": 19220 }, { "epoch": 0.9759997969311216, "grad_norm": 0.02489623198625103, "learning_rate": 0.0006073509318142308, "loss": 0.5526, "step": 19225 }, { "epoch": 0.976253633029153, "grad_norm": 0.02608892933733737, "learning_rate": 0.0006071345613844541, "loss": 0.5696, "step": 19230 }, { "epoch": 0.9765074691271846, "grad_norm": 0.023901773570461967, "learning_rate": 0.0006069181699246973, "loss": 0.5763, "step": 19235 }, { "epoch": 0.9767613052252161, "grad_norm": 0.04188933752859617, "learning_rate": 0.0006067017574774369, "loss": 0.5685, "step": 19240 }, { "epoch": 0.9770151413232476, "grad_norm": 0.04061459638720143, "learning_rate": 0.0006064853240851536, "loss": 0.5394, "step": 19245 }, { "epoch": 0.977268977421279, "grad_norm": 0.037889258081175194, "learning_rate": 0.0006062688697903322, "loss": 0.5385, "step": 19250 }, { "epoch": 0.9775228135193106, "grad_norm": 0.03360689695404566, "learning_rate": 0.0006060523946354615, "loss": 0.5318, "step": 19255 }, { "epoch": 0.9777766496173421, "grad_norm": 0.03162668844088351, "learning_rate": 0.0006058358986630347, "loss": 0.5254, "step": 19260 }, { "epoch": 0.9780304857153735, "grad_norm": 0.028559717544297737, "learning_rate": 0.0006056193819155488, "loss": 0.5786, "step": 19265 }, { "epoch": 0.9782843218134051, "grad_norm": 0.02644336830634299, "learning_rate": 0.0006054028444355051, "loss": 0.5469, "step": 19270 }, { "epoch": 0.9785381579114366, "grad_norm": 0.03308577275301962, "learning_rate": 0.0006051862862654085, "loss": 0.5332, "step": 19275 }, { "epoch": 0.978791994009468, "grad_norm": 0.022000448921150856, "learning_rate": 0.0006049697074477686, "loss": 0.5501, "step": 19280 }, { "epoch": 0.9790458301074996, "grad_norm": 0.0247774682959936, "learning_rate": 0.0006047531080250985, "loss": 0.5404, "step": 19285 }, { "epoch": 0.9792996662055311, "grad_norm": 0.022885746957311163, "learning_rate": 0.0006045364880399158, "loss": 0.5675, "step": 19290 }, { "epoch": 0.9795535023035626, "grad_norm": 0.024643646796613586, "learning_rate": 0.0006043198475347418, "loss": 0.5897, "step": 19295 }, { "epoch": 0.9798073384015941, "grad_norm": 0.03123615100865316, "learning_rate": 0.0006041031865521019, "loss": 0.5386, "step": 19300 }, { "epoch": 0.9800611744996256, "grad_norm": 0.023897535561443067, "learning_rate": 0.0006038865051345257, "loss": 0.5787, "step": 19305 }, { "epoch": 0.9803150105976571, "grad_norm": 0.0238186011366476, "learning_rate": 0.0006036698033245466, "loss": 0.5415, "step": 19310 }, { "epoch": 0.9805688466956886, "grad_norm": 0.024067063625727737, "learning_rate": 0.000603453081164702, "loss": 0.5225, "step": 19315 }, { "epoch": 0.9808226827937201, "grad_norm": 0.02230127040642843, "learning_rate": 0.0006032363386975337, "loss": 0.5599, "step": 19320 }, { "epoch": 0.9810765188917516, "grad_norm": 0.023928492267361032, "learning_rate": 0.0006030195759655867, "loss": 0.5454, "step": 19325 }, { "epoch": 0.981330354989783, "grad_norm": 0.022074494679136896, "learning_rate": 0.0006028027930114109, "loss": 0.545, "step": 19330 }, { "epoch": 0.9815841910878146, "grad_norm": 0.036818146764293826, "learning_rate": 0.0006025859898775596, "loss": 0.5431, "step": 19335 }, { "epoch": 0.9818380271858461, "grad_norm": 0.021267108908214902, "learning_rate": 0.0006023691666065899, "loss": 0.5694, "step": 19340 }, { "epoch": 0.9820918632838777, "grad_norm": 0.18195885280117177, "learning_rate": 0.0006021523232410633, "loss": 0.53, "step": 19345 }, { "epoch": 0.9823456993819091, "grad_norm": 0.02933469861631167, "learning_rate": 0.0006019354598235451, "loss": 0.5383, "step": 19350 }, { "epoch": 0.9825995354799406, "grad_norm": 0.02677555173548487, "learning_rate": 0.0006017185763966044, "loss": 0.5268, "step": 19355 }, { "epoch": 0.9828533715779721, "grad_norm": 0.03238477638416001, "learning_rate": 0.0006015016730028147, "loss": 0.5301, "step": 19360 }, { "epoch": 0.9831072076760036, "grad_norm": 0.026772761845334205, "learning_rate": 0.0006012847496847525, "loss": 0.5573, "step": 19365 }, { "epoch": 0.9833610437740351, "grad_norm": 0.033966700490202034, "learning_rate": 0.0006010678064849993, "loss": 0.5473, "step": 19370 }, { "epoch": 0.9836148798720666, "grad_norm": 0.023419696424668214, "learning_rate": 0.0006008508434461394, "loss": 0.5467, "step": 19375 }, { "epoch": 0.9838687159700981, "grad_norm": 0.023940801308642033, "learning_rate": 0.0006006338606107621, "loss": 0.5717, "step": 19380 }, { "epoch": 0.9841225520681296, "grad_norm": 0.025782454053669163, "learning_rate": 0.0006004168580214598, "loss": 0.52, "step": 19385 }, { "epoch": 0.9843763881661611, "grad_norm": 0.0347230672413288, "learning_rate": 0.000600199835720829, "loss": 0.5136, "step": 19390 }, { "epoch": 0.9846302242641927, "grad_norm": 0.03694921277593407, "learning_rate": 0.0005999827937514701, "loss": 0.557, "step": 19395 }, { "epoch": 0.9848840603622241, "grad_norm": 0.042215904453139365, "learning_rate": 0.0005997657321559875, "loss": 0.5597, "step": 19400 }, { "epoch": 0.9851378964602556, "grad_norm": 0.029186970807708833, "learning_rate": 0.0005995486509769892, "loss": 0.543, "step": 19405 }, { "epoch": 0.9853917325582872, "grad_norm": 0.030825680890777987, "learning_rate": 0.0005993315502570871, "loss": 0.5283, "step": 19410 }, { "epoch": 0.9856455686563186, "grad_norm": 0.03084685303258577, "learning_rate": 0.000599114430038897, "loss": 0.5378, "step": 19415 }, { "epoch": 0.9858994047543501, "grad_norm": 0.03624341566980718, "learning_rate": 0.0005988972903650388, "loss": 0.5581, "step": 19420 }, { "epoch": 0.9861532408523817, "grad_norm": 0.03238937387426097, "learning_rate": 0.0005986801312781356, "loss": 0.5855, "step": 19425 }, { "epoch": 0.9864070769504131, "grad_norm": 0.029275031406698588, "learning_rate": 0.0005984629528208147, "loss": 0.5407, "step": 19430 }, { "epoch": 0.9866609130484446, "grad_norm": 0.025894922300801874, "learning_rate": 0.000598245755035707, "loss": 0.5429, "step": 19435 }, { "epoch": 0.9869147491464761, "grad_norm": 0.03331858196787023, "learning_rate": 0.0005980285379654478, "loss": 0.5862, "step": 19440 }, { "epoch": 0.9871685852445076, "grad_norm": 0.02236486818578095, "learning_rate": 0.0005978113016526753, "loss": 0.5557, "step": 19445 }, { "epoch": 0.9874224213425391, "grad_norm": 0.028767154063198953, "learning_rate": 0.0005975940461400322, "loss": 0.5677, "step": 19450 }, { "epoch": 0.9876762574405706, "grad_norm": 0.02765546851820107, "learning_rate": 0.0005973767714701646, "loss": 0.5399, "step": 19455 }, { "epoch": 0.9879300935386022, "grad_norm": 0.025484293553854787, "learning_rate": 0.0005971594776857224, "loss": 0.5593, "step": 19460 }, { "epoch": 0.9881839296366336, "grad_norm": 0.031096244994143988, "learning_rate": 0.000596942164829359, "loss": 0.5332, "step": 19465 }, { "epoch": 0.9884377657346651, "grad_norm": 0.044264139567922633, "learning_rate": 0.0005967248329437322, "loss": 0.5601, "step": 19470 }, { "epoch": 0.9886916018326967, "grad_norm": 0.038014080072740736, "learning_rate": 0.0005965074820715031, "loss": 0.5252, "step": 19475 }, { "epoch": 0.9889454379307281, "grad_norm": 0.02925907334151629, "learning_rate": 0.0005962901122553366, "loss": 0.5219, "step": 19480 }, { "epoch": 0.9891992740287596, "grad_norm": 0.03246914140061126, "learning_rate": 0.000596072723537901, "loss": 0.5414, "step": 19485 }, { "epoch": 0.9894531101267912, "grad_norm": 0.08010941576386262, "learning_rate": 0.0005958553159618693, "loss": 0.5208, "step": 19490 }, { "epoch": 0.9897069462248226, "grad_norm": 0.0337186100394352, "learning_rate": 0.0005956378895699169, "loss": 0.5167, "step": 19495 }, { "epoch": 0.9899607823228541, "grad_norm": 0.02503702827550537, "learning_rate": 0.0005954204444047237, "loss": 0.577, "step": 19500 }, { "epoch": 0.9902146184208857, "grad_norm": 0.027330468479088564, "learning_rate": 0.000595202980508973, "loss": 0.5403, "step": 19505 }, { "epoch": 0.9904684545189172, "grad_norm": 0.024841582567018985, "learning_rate": 0.0005949854979253521, "loss": 0.5139, "step": 19510 }, { "epoch": 0.9907222906169486, "grad_norm": 0.02841062368445544, "learning_rate": 0.0005947679966965517, "loss": 0.5688, "step": 19515 }, { "epoch": 0.9909761267149801, "grad_norm": 0.023717779713039483, "learning_rate": 0.0005945504768652664, "loss": 0.5672, "step": 19520 }, { "epoch": 0.9912299628130117, "grad_norm": 0.02633602181222713, "learning_rate": 0.0005943329384741937, "loss": 0.5766, "step": 19525 }, { "epoch": 0.9914837989110431, "grad_norm": 0.04290875603513479, "learning_rate": 0.0005941153815660357, "loss": 0.5472, "step": 19530 }, { "epoch": 0.9917376350090746, "grad_norm": 0.024582207250698523, "learning_rate": 0.0005938978061834977, "loss": 0.5855, "step": 19535 }, { "epoch": 0.9919914711071062, "grad_norm": 0.023721707310345872, "learning_rate": 0.0005936802123692885, "loss": 0.5748, "step": 19540 }, { "epoch": 0.9922453072051376, "grad_norm": 0.026918040386621857, "learning_rate": 0.0005934626001661209, "loss": 0.5301, "step": 19545 }, { "epoch": 0.9924991433031691, "grad_norm": 0.021708866914701308, "learning_rate": 0.000593244969616711, "loss": 0.5495, "step": 19550 }, { "epoch": 0.9927529794012007, "grad_norm": 0.026495248356829797, "learning_rate": 0.0005930273207637783, "loss": 0.5222, "step": 19555 }, { "epoch": 0.9930068154992322, "grad_norm": 0.02555187322005583, "learning_rate": 0.0005928096536500467, "loss": 0.54, "step": 19560 }, { "epoch": 0.9932606515972636, "grad_norm": 0.02168086852198851, "learning_rate": 0.0005925919683182429, "loss": 0.5276, "step": 19565 }, { "epoch": 0.9935144876952952, "grad_norm": 0.02499980089901408, "learning_rate": 0.0005923742648110974, "loss": 0.5073, "step": 19570 }, { "epoch": 0.9937683237933267, "grad_norm": 0.02692412234182429, "learning_rate": 0.0005921565431713445, "loss": 0.5752, "step": 19575 }, { "epoch": 0.9940221598913581, "grad_norm": 0.024546898354117743, "learning_rate": 0.0005919388034417218, "loss": 0.5328, "step": 19580 }, { "epoch": 0.9942759959893896, "grad_norm": 0.024371485803514188, "learning_rate": 0.0005917210456649703, "loss": 0.5451, "step": 19585 }, { "epoch": 0.9945298320874212, "grad_norm": 0.021869432790112914, "learning_rate": 0.0005915032698838351, "loss": 0.5394, "step": 19590 }, { "epoch": 0.9947836681854526, "grad_norm": 0.03548043935373576, "learning_rate": 0.0005912854761410642, "loss": 0.5672, "step": 19595 }, { "epoch": 0.9950375042834841, "grad_norm": 0.03662482896138081, "learning_rate": 0.0005910676644794098, "loss": 0.5641, "step": 19600 }, { "epoch": 0.9952913403815157, "grad_norm": 0.0378468309188474, "learning_rate": 0.0005908498349416269, "loss": 0.5613, "step": 19605 }, { "epoch": 0.9955451764795472, "grad_norm": 0.03627088854294031, "learning_rate": 0.0005906319875704744, "loss": 0.5532, "step": 19610 }, { "epoch": 0.9957990125775786, "grad_norm": 0.03649215551829472, "learning_rate": 0.0005904141224087147, "loss": 0.4949, "step": 19615 }, { "epoch": 0.9960528486756102, "grad_norm": 0.03763331065484329, "learning_rate": 0.0005901962394991139, "loss": 0.5862, "step": 19620 }, { "epoch": 0.9963066847736417, "grad_norm": 0.0383387603823414, "learning_rate": 0.0005899783388844408, "loss": 0.5556, "step": 19625 }, { "epoch": 0.9965605208716731, "grad_norm": 0.023484212925512286, "learning_rate": 0.0005897604206074687, "loss": 0.5708, "step": 19630 }, { "epoch": 0.9968143569697047, "grad_norm": 0.03442449480611108, "learning_rate": 0.0005895424847109736, "loss": 0.5424, "step": 19635 }, { "epoch": 0.9970681930677362, "grad_norm": 0.025468803257999176, "learning_rate": 0.0005893245312377353, "loss": 0.5184, "step": 19640 }, { "epoch": 0.9973220291657676, "grad_norm": 0.023711176900071695, "learning_rate": 0.0005891065602305369, "loss": 0.5628, "step": 19645 }, { "epoch": 0.9975758652637992, "grad_norm": 0.02757286076818459, "learning_rate": 0.0005888885717321653, "loss": 0.5549, "step": 19650 }, { "epoch": 0.9978297013618307, "grad_norm": 0.022946177140901372, "learning_rate": 0.0005886705657854101, "loss": 0.531, "step": 19655 }, { "epoch": 0.9980835374598621, "grad_norm": 0.03239722642028069, "learning_rate": 0.0005884525424330652, "loss": 0.547, "step": 19660 }, { "epoch": 0.9983373735578936, "grad_norm": 0.02284766198581876, "learning_rate": 0.0005882345017179274, "loss": 0.5415, "step": 19665 }, { "epoch": 0.9985912096559252, "grad_norm": 0.024423645649003435, "learning_rate": 0.0005880164436827968, "loss": 0.5319, "step": 19670 }, { "epoch": 0.9988450457539567, "grad_norm": 0.023330277892660952, "learning_rate": 0.0005877983683704772, "loss": 0.5493, "step": 19675 }, { "epoch": 0.9990988818519881, "grad_norm": 0.023557950454847, "learning_rate": 0.0005875802758237758, "loss": 0.5102, "step": 19680 }, { "epoch": 0.9993527179500197, "grad_norm": 0.023129582403367616, "learning_rate": 0.0005873621660855031, "loss": 0.5691, "step": 19685 }, { "epoch": 0.9996065540480512, "grad_norm": 0.03468112252443195, "learning_rate": 0.0005871440391984729, "loss": 0.5328, "step": 19690 }, { "epoch": 0.9998603901460826, "grad_norm": 0.027312657946593693, "learning_rate": 0.0005869258952055023, "loss": 0.5544, "step": 19695 }, { "epoch": 1.0001269180490158, "grad_norm": 0.026262955674306445, "learning_rate": 0.000586707734149412, "loss": 0.5426, "step": 19700 }, { "epoch": 1.0003807541470473, "grad_norm": 0.022474094556517613, "learning_rate": 0.0005864895560730257, "loss": 0.4976, "step": 19705 }, { "epoch": 1.0006345902450788, "grad_norm": 0.030681716691258087, "learning_rate": 0.000586271361019171, "loss": 0.484, "step": 19710 }, { "epoch": 1.0008884263431101, "grad_norm": 0.06523945944943467, "learning_rate": 0.0005860531490306784, "loss": 0.5035, "step": 19715 }, { "epoch": 1.0011422624411417, "grad_norm": 0.02136637932462246, "learning_rate": 0.0005858349201503819, "loss": 0.4933, "step": 19720 }, { "epoch": 1.0013960985391732, "grad_norm": 0.023171987689390824, "learning_rate": 0.0005856166744211185, "loss": 0.5241, "step": 19725 }, { "epoch": 1.0016499346372048, "grad_norm": 0.024915720736357203, "learning_rate": 0.000585398411885729, "loss": 0.5251, "step": 19730 }, { "epoch": 1.0019037707352363, "grad_norm": 0.026668874934590553, "learning_rate": 0.0005851801325870569, "loss": 0.5185, "step": 19735 }, { "epoch": 1.0021576068332678, "grad_norm": 0.03835627675960685, "learning_rate": 0.0005849618365679497, "loss": 0.5184, "step": 19740 }, { "epoch": 1.0024114429312994, "grad_norm": 0.034939337784261996, "learning_rate": 0.0005847435238712578, "loss": 0.492, "step": 19745 }, { "epoch": 1.0026652790293307, "grad_norm": 0.02797155118461246, "learning_rate": 0.0005845251945398347, "loss": 0.5061, "step": 19750 }, { "epoch": 1.0029191151273622, "grad_norm": 0.02625721377041572, "learning_rate": 0.0005843068486165374, "loss": 0.5162, "step": 19755 }, { "epoch": 1.0031729512253937, "grad_norm": 0.033958806367861846, "learning_rate": 0.0005840884861442262, "loss": 0.5117, "step": 19760 }, { "epoch": 1.0034267873234253, "grad_norm": 0.03202782443206157, "learning_rate": 0.0005838701071657643, "loss": 0.5007, "step": 19765 }, { "epoch": 1.0036806234214568, "grad_norm": 0.05036650421537131, "learning_rate": 0.0005836517117240188, "loss": 0.5151, "step": 19770 }, { "epoch": 1.0039344595194883, "grad_norm": 0.04653612155409561, "learning_rate": 0.0005834332998618596, "loss": 0.4854, "step": 19775 }, { "epoch": 1.0041882956175199, "grad_norm": 0.02804671100615471, "learning_rate": 0.0005832148716221595, "loss": 0.4987, "step": 19780 }, { "epoch": 1.0044421317155512, "grad_norm": 0.022529443828552497, "learning_rate": 0.0005829964270477953, "loss": 0.5166, "step": 19785 }, { "epoch": 1.0046959678135827, "grad_norm": 0.026684379922705167, "learning_rate": 0.0005827779661816461, "loss": 0.4885, "step": 19790 }, { "epoch": 1.0049498039116143, "grad_norm": 0.060963234819393114, "learning_rate": 0.000582559489066595, "loss": 0.4791, "step": 19795 }, { "epoch": 1.0052036400096458, "grad_norm": 0.03024468169105826, "learning_rate": 0.0005823409957455281, "loss": 0.5277, "step": 19800 }, { "epoch": 1.0054574761076773, "grad_norm": 0.026438898745094152, "learning_rate": 0.0005821224862613343, "loss": 0.5012, "step": 19805 }, { "epoch": 1.0057113122057089, "grad_norm": 0.03800242164729915, "learning_rate": 0.000581903960656906, "loss": 0.5191, "step": 19810 }, { "epoch": 1.0059651483037402, "grad_norm": 0.02349650729472903, "learning_rate": 0.0005816854189751386, "loss": 0.5025, "step": 19815 }, { "epoch": 1.0062189844017717, "grad_norm": 0.027214316740470396, "learning_rate": 0.0005814668612589309, "loss": 0.5251, "step": 19820 }, { "epoch": 1.0064728204998032, "grad_norm": 0.025006007655423122, "learning_rate": 0.0005812482875511845, "loss": 0.5188, "step": 19825 }, { "epoch": 1.0067266565978348, "grad_norm": 0.023683757750602448, "learning_rate": 0.0005810296978948045, "loss": 0.5285, "step": 19830 }, { "epoch": 1.0069804926958663, "grad_norm": 0.02813093510207219, "learning_rate": 0.0005808110923326989, "loss": 0.5355, "step": 19835 }, { "epoch": 1.0072343287938978, "grad_norm": 0.036465732354810876, "learning_rate": 0.000580592470907779, "loss": 0.5106, "step": 19840 }, { "epoch": 1.0074881648919294, "grad_norm": 0.027979504250805996, "learning_rate": 0.0005803738336629588, "loss": 0.5217, "step": 19845 }, { "epoch": 1.0077420009899607, "grad_norm": 0.02444979840392901, "learning_rate": 0.0005801551806411561, "loss": 0.5038, "step": 19850 }, { "epoch": 1.0079958370879922, "grad_norm": 0.02488255109410723, "learning_rate": 0.000579936511885291, "loss": 0.5093, "step": 19855 }, { "epoch": 1.0082496731860238, "grad_norm": 0.02550049459565845, "learning_rate": 0.0005797178274382873, "loss": 0.5269, "step": 19860 }, { "epoch": 1.0085035092840553, "grad_norm": 0.02148889517361041, "learning_rate": 0.0005794991273430716, "loss": 0.5236, "step": 19865 }, { "epoch": 1.0087573453820868, "grad_norm": 0.029497009648340886, "learning_rate": 0.0005792804116425736, "loss": 0.5042, "step": 19870 }, { "epoch": 1.0090111814801184, "grad_norm": 0.02775516992605051, "learning_rate": 0.0005790616803797263, "loss": 0.5132, "step": 19875 }, { "epoch": 1.0092650175781497, "grad_norm": 0.021211655705574328, "learning_rate": 0.0005788429335974653, "loss": 0.4801, "step": 19880 }, { "epoch": 1.0095188536761812, "grad_norm": 0.02517030398746431, "learning_rate": 0.0005786241713387297, "loss": 0.5146, "step": 19885 }, { "epoch": 1.0097726897742128, "grad_norm": 0.09462270152737755, "learning_rate": 0.0005784053936464613, "loss": 0.5127, "step": 19890 }, { "epoch": 1.0100265258722443, "grad_norm": 0.026408205454532502, "learning_rate": 0.0005781866005636052, "loss": 0.5132, "step": 19895 }, { "epoch": 1.0102803619702758, "grad_norm": 0.048718416202262686, "learning_rate": 0.0005779677921331093, "loss": 0.5499, "step": 19900 }, { "epoch": 1.0105341980683074, "grad_norm": 0.05158533517321516, "learning_rate": 0.0005777489683979247, "loss": 0.4807, "step": 19905 }, { "epoch": 1.010788034166339, "grad_norm": 0.042414622078804524, "learning_rate": 0.0005775301294010052, "loss": 0.5528, "step": 19910 }, { "epoch": 1.0110418702643702, "grad_norm": 0.04213150411449631, "learning_rate": 0.000577311275185308, "loss": 0.5266, "step": 19915 }, { "epoch": 1.0112957063624017, "grad_norm": 0.03233858503499294, "learning_rate": 0.000577092405793793, "loss": 0.4932, "step": 19920 }, { "epoch": 1.0115495424604333, "grad_norm": 0.039425677298710396, "learning_rate": 0.0005768735212694232, "loss": 0.5242, "step": 19925 }, { "epoch": 1.0118033785584648, "grad_norm": 0.04737514302447274, "learning_rate": 0.0005766546216551646, "loss": 0.4966, "step": 19930 }, { "epoch": 1.0120572146564963, "grad_norm": 0.03637605851463593, "learning_rate": 0.0005764357069939861, "loss": 0.4907, "step": 19935 }, { "epoch": 1.0123110507545279, "grad_norm": 0.03694503903527369, "learning_rate": 0.0005762167773288594, "loss": 0.5324, "step": 19940 }, { "epoch": 1.0125648868525594, "grad_norm": 0.02639160420478131, "learning_rate": 0.0005759978327027594, "loss": 0.5153, "step": 19945 }, { "epoch": 1.0128187229505907, "grad_norm": 0.028504997239272276, "learning_rate": 0.000575778873158664, "loss": 0.5113, "step": 19950 }, { "epoch": 1.0130725590486223, "grad_norm": 0.024034023043991923, "learning_rate": 0.0005755598987395535, "loss": 0.5544, "step": 19955 }, { "epoch": 1.0133263951466538, "grad_norm": 0.024483302437720976, "learning_rate": 0.0005753409094884118, "loss": 0.512, "step": 19960 }, { "epoch": 1.0135802312446853, "grad_norm": 0.03202531364702948, "learning_rate": 0.0005751219054482252, "loss": 0.5313, "step": 19965 }, { "epoch": 1.0138340673427169, "grad_norm": 0.04629752856614847, "learning_rate": 0.0005749028866619833, "loss": 0.5335, "step": 19970 }, { "epoch": 1.0140879034407484, "grad_norm": 0.03603069481589699, "learning_rate": 0.0005746838531726783, "loss": 0.4915, "step": 19975 }, { "epoch": 1.0143417395387797, "grad_norm": 0.036047586573516865, "learning_rate": 0.0005744648050233053, "loss": 0.5061, "step": 19980 }, { "epoch": 1.0145955756368112, "grad_norm": 0.02500217530038146, "learning_rate": 0.0005742457422568626, "loss": 0.5021, "step": 19985 }, { "epoch": 1.0148494117348428, "grad_norm": 0.030208741553390497, "learning_rate": 0.0005740266649163507, "loss": 0.5167, "step": 19990 }, { "epoch": 1.0151032478328743, "grad_norm": 0.027545548141998097, "learning_rate": 0.0005738075730447738, "loss": 0.523, "step": 19995 }, { "epoch": 1.0153570839309058, "grad_norm": 0.08854306261500218, "learning_rate": 0.0005735884666851383, "loss": 0.5208, "step": 20000 }, { "epoch": 1.0156109200289374, "grad_norm": 0.030820917059042843, "learning_rate": 0.0005733693458804537, "loss": 0.5101, "step": 20005 }, { "epoch": 1.015864756126969, "grad_norm": 0.034348864701267225, "learning_rate": 0.0005731502106737326, "loss": 0.5029, "step": 20010 }, { "epoch": 1.0161185922250002, "grad_norm": 0.05872350374420824, "learning_rate": 0.0005729310611079899, "loss": 0.522, "step": 20015 }, { "epoch": 1.0163724283230318, "grad_norm": 0.156969240858159, "learning_rate": 0.0005727118972262437, "loss": 0.5268, "step": 20020 }, { "epoch": 1.0166262644210633, "grad_norm": 0.028641596405852787, "learning_rate": 0.0005724927190715144, "loss": 0.5322, "step": 20025 }, { "epoch": 1.0168801005190948, "grad_norm": 0.03521162865341286, "learning_rate": 0.0005722735266868261, "loss": 0.4916, "step": 20030 }, { "epoch": 1.0171339366171264, "grad_norm": 0.10045881918602356, "learning_rate": 0.0005720543201152048, "loss": 0.5197, "step": 20035 }, { "epoch": 1.017387772715158, "grad_norm": 0.029867425574080526, "learning_rate": 0.0005718350993996798, "loss": 0.517, "step": 20040 }, { "epoch": 1.0176416088131894, "grad_norm": 0.025642792051341005, "learning_rate": 0.0005716158645832831, "loss": 0.5213, "step": 20045 }, { "epoch": 1.0178954449112207, "grad_norm": 0.02164896016678895, "learning_rate": 0.0005713966157090493, "loss": 0.5081, "step": 20050 }, { "epoch": 1.0181492810092523, "grad_norm": 0.029307046081771176, "learning_rate": 0.000571177352820016, "loss": 0.56, "step": 20055 }, { "epoch": 1.0184031171072838, "grad_norm": 0.03348711512912175, "learning_rate": 0.0005709580759592232, "loss": 0.4986, "step": 20060 }, { "epoch": 1.0186569532053154, "grad_norm": 0.022750518622437432, "learning_rate": 0.000570738785169714, "loss": 0.5318, "step": 20065 }, { "epoch": 1.0189107893033469, "grad_norm": 0.03273089769737566, "learning_rate": 0.0005705194804945339, "loss": 0.5205, "step": 20070 }, { "epoch": 1.0191646254013784, "grad_norm": 0.03101997693290262, "learning_rate": 0.0005703001619767317, "loss": 0.5585, "step": 20075 }, { "epoch": 1.0194184614994097, "grad_norm": 0.48293463443735013, "learning_rate": 0.0005700808296593581, "loss": 0.5232, "step": 20080 }, { "epoch": 1.0196722975974413, "grad_norm": 0.026869468991904833, "learning_rate": 0.0005698614835854672, "loss": 0.5329, "step": 20085 }, { "epoch": 1.0199261336954728, "grad_norm": 0.03175236206705197, "learning_rate": 0.0005696421237981155, "loss": 0.5293, "step": 20090 }, { "epoch": 1.0201799697935043, "grad_norm": 0.02839497971854906, "learning_rate": 0.0005694227503403623, "loss": 0.5012, "step": 20095 }, { "epoch": 1.0204338058915359, "grad_norm": 0.03500525520582677, "learning_rate": 0.0005692033632552691, "loss": 0.5219, "step": 20100 }, { "epoch": 1.0206876419895674, "grad_norm": 0.034416581373221365, "learning_rate": 0.000568983962585901, "loss": 0.4977, "step": 20105 }, { "epoch": 1.020941478087599, "grad_norm": 0.025912971110602936, "learning_rate": 0.0005687645483753252, "loss": 0.5208, "step": 20110 }, { "epoch": 1.0211953141856303, "grad_norm": 0.033142018106894794, "learning_rate": 0.0005685451206666113, "loss": 0.4926, "step": 20115 }, { "epoch": 1.0214491502836618, "grad_norm": 0.03866721854730293, "learning_rate": 0.0005683256795028321, "loss": 0.4984, "step": 20120 }, { "epoch": 1.0217029863816933, "grad_norm": 0.027988000692318403, "learning_rate": 0.0005681062249270627, "loss": 0.4899, "step": 20125 }, { "epoch": 1.0219568224797249, "grad_norm": 0.022355333164415802, "learning_rate": 0.000567886756982381, "loss": 0.5059, "step": 20130 }, { "epoch": 1.0222106585777564, "grad_norm": 0.0440650062378424, "learning_rate": 0.0005676672757118675, "loss": 0.5015, "step": 20135 }, { "epoch": 1.022464494675788, "grad_norm": 0.023375514087650193, "learning_rate": 0.0005674477811586053, "loss": 0.4984, "step": 20140 }, { "epoch": 1.0227183307738192, "grad_norm": 0.03157563540299216, "learning_rate": 0.0005672282733656799, "loss": 0.5046, "step": 20145 }, { "epoch": 1.0229721668718508, "grad_norm": 0.02116231894172818, "learning_rate": 0.0005670087523761797, "loss": 0.5165, "step": 20150 }, { "epoch": 1.0232260029698823, "grad_norm": 0.02699273830998635, "learning_rate": 0.0005667892182331958, "loss": 0.5343, "step": 20155 }, { "epoch": 1.0234798390679138, "grad_norm": 0.025520103162852272, "learning_rate": 0.0005665696709798211, "loss": 0.5015, "step": 20160 }, { "epoch": 1.0237336751659454, "grad_norm": 0.02320429503242182, "learning_rate": 0.0005663501106591522, "loss": 0.5069, "step": 20165 }, { "epoch": 1.023987511263977, "grad_norm": 0.03088412096065196, "learning_rate": 0.0005661305373142874, "loss": 0.5269, "step": 20170 }, { "epoch": 1.0242413473620084, "grad_norm": 0.028956706789680344, "learning_rate": 0.0005659109509883279, "loss": 0.5156, "step": 20175 }, { "epoch": 1.0244951834600398, "grad_norm": 0.028907980726983182, "learning_rate": 0.0005656913517243775, "loss": 0.5126, "step": 20180 }, { "epoch": 1.0247490195580713, "grad_norm": 0.031564123214603855, "learning_rate": 0.0005654717395655423, "loss": 0.5212, "step": 20185 }, { "epoch": 1.0250028556561028, "grad_norm": 0.023659437611885405, "learning_rate": 0.0005652521145549312, "loss": 0.5182, "step": 20190 }, { "epoch": 1.0252566917541344, "grad_norm": 0.024467789569289182, "learning_rate": 0.0005650324767356553, "loss": 0.5317, "step": 20195 }, { "epoch": 1.025510527852166, "grad_norm": 0.0259776185535513, "learning_rate": 0.0005648128261508287, "loss": 0.5111, "step": 20200 }, { "epoch": 1.0257643639501974, "grad_norm": 0.037490439119855785, "learning_rate": 0.0005645931628435674, "loss": 0.4861, "step": 20205 }, { "epoch": 1.026018200048229, "grad_norm": 0.033783262227514106, "learning_rate": 0.0005643734868569904, "loss": 0.5077, "step": 20210 }, { "epoch": 1.0262720361462603, "grad_norm": 0.02114752691351358, "learning_rate": 0.0005641537982342189, "loss": 0.5203, "step": 20215 }, { "epoch": 1.0265258722442918, "grad_norm": 0.0438818125159392, "learning_rate": 0.0005639340970183767, "loss": 0.5161, "step": 20220 }, { "epoch": 1.0267797083423233, "grad_norm": 0.025324346639667195, "learning_rate": 0.0005637143832525902, "loss": 0.5301, "step": 20225 }, { "epoch": 1.0270335444403549, "grad_norm": 0.02505419634462526, "learning_rate": 0.000563494656979988, "loss": 0.5142, "step": 20230 }, { "epoch": 1.0272873805383864, "grad_norm": 0.0260932413609203, "learning_rate": 0.0005632749182437013, "loss": 0.476, "step": 20235 }, { "epoch": 1.027541216636418, "grad_norm": 0.02657813325380469, "learning_rate": 0.0005630551670868638, "loss": 0.5121, "step": 20240 }, { "epoch": 1.0277950527344493, "grad_norm": 0.02517945560861088, "learning_rate": 0.0005628354035526113, "loss": 0.5014, "step": 20245 }, { "epoch": 1.0280488888324808, "grad_norm": 0.02457715419344653, "learning_rate": 0.0005626156276840824, "loss": 0.5445, "step": 20250 }, { "epoch": 1.0283027249305123, "grad_norm": 0.023965938720396072, "learning_rate": 0.0005623958395244182, "loss": 0.5173, "step": 20255 }, { "epoch": 1.0285565610285439, "grad_norm": 0.025808791859439455, "learning_rate": 0.0005621760391167618, "loss": 0.512, "step": 20260 }, { "epoch": 1.0288103971265754, "grad_norm": 0.02616645418239067, "learning_rate": 0.0005619562265042589, "loss": 0.5024, "step": 20265 }, { "epoch": 1.029064233224607, "grad_norm": 0.026011495515384515, "learning_rate": 0.0005617364017300579, "loss": 0.5688, "step": 20270 }, { "epoch": 1.0293180693226385, "grad_norm": 0.023645680045192928, "learning_rate": 0.0005615165648373091, "loss": 0.5107, "step": 20275 }, { "epoch": 1.0295719054206698, "grad_norm": 0.030625762897844983, "learning_rate": 0.0005612967158691652, "loss": 0.5425, "step": 20280 }, { "epoch": 1.0298257415187013, "grad_norm": 0.02374291305295429, "learning_rate": 0.0005610768548687818, "loss": 0.4875, "step": 20285 }, { "epoch": 1.0300795776167329, "grad_norm": 0.030056754991982286, "learning_rate": 0.0005608569818793163, "loss": 0.5138, "step": 20290 }, { "epoch": 1.0303334137147644, "grad_norm": 0.021415600184617786, "learning_rate": 0.0005606370969439288, "loss": 0.5262, "step": 20295 }, { "epoch": 1.030587249812796, "grad_norm": 0.035674997734029885, "learning_rate": 0.0005604172001057817, "loss": 0.4913, "step": 20300 }, { "epoch": 1.0308410859108275, "grad_norm": 0.02322983861959061, "learning_rate": 0.0005601972914080394, "loss": 0.5258, "step": 20305 }, { "epoch": 1.0310949220088588, "grad_norm": 0.02417219163699135, "learning_rate": 0.000559977370893869, "loss": 0.523, "step": 20310 }, { "epoch": 1.0313487581068903, "grad_norm": 0.028159975076131768, "learning_rate": 0.0005597574386064398, "loss": 0.4937, "step": 20315 }, { "epoch": 1.0316025942049218, "grad_norm": 0.025480520142062726, "learning_rate": 0.0005595374945889235, "loss": 0.5297, "step": 20320 }, { "epoch": 1.0318564303029534, "grad_norm": 0.02648031129017469, "learning_rate": 0.0005593175388844939, "loss": 0.5191, "step": 20325 }, { "epoch": 1.032110266400985, "grad_norm": 0.027996109985128536, "learning_rate": 0.0005590975715363271, "loss": 0.5182, "step": 20330 }, { "epoch": 1.0323641024990164, "grad_norm": 0.02208647698714924, "learning_rate": 0.0005588775925876019, "loss": 0.4733, "step": 20335 }, { "epoch": 1.032617938597048, "grad_norm": 0.03617341314263012, "learning_rate": 0.0005586576020814986, "loss": 0.5569, "step": 20340 }, { "epoch": 1.0328717746950793, "grad_norm": 0.02362640361558171, "learning_rate": 0.0005584376000612008, "loss": 0.5051, "step": 20345 }, { "epoch": 1.0331256107931108, "grad_norm": 0.024439837400368546, "learning_rate": 0.0005582175865698935, "loss": 0.5007, "step": 20350 }, { "epoch": 1.0333794468911424, "grad_norm": 0.02551816319500186, "learning_rate": 0.0005579975616507642, "loss": 0.5351, "step": 20355 }, { "epoch": 1.033633282989174, "grad_norm": 0.03547530832085492, "learning_rate": 0.0005577775253470028, "loss": 0.5283, "step": 20360 }, { "epoch": 1.0338871190872054, "grad_norm": 0.024034616664719018, "learning_rate": 0.0005575574777018014, "loss": 0.5191, "step": 20365 }, { "epoch": 1.034140955185237, "grad_norm": 0.023421751953444182, "learning_rate": 0.000557337418758354, "loss": 0.5522, "step": 20370 }, { "epoch": 1.0343947912832685, "grad_norm": 0.02493069566308207, "learning_rate": 0.0005571173485598575, "loss": 0.5506, "step": 20375 }, { "epoch": 1.0346486273812998, "grad_norm": 0.02674149513349264, "learning_rate": 0.0005568972671495102, "loss": 0.5057, "step": 20380 }, { "epoch": 1.0349024634793313, "grad_norm": 0.025106714581499427, "learning_rate": 0.000556677174570513, "loss": 0.5033, "step": 20385 }, { "epoch": 1.0351562995773629, "grad_norm": 0.025943628794495237, "learning_rate": 0.0005564570708660692, "loss": 0.4955, "step": 20390 }, { "epoch": 1.0354101356753944, "grad_norm": 0.026309101778550982, "learning_rate": 0.000556236956079384, "loss": 0.4825, "step": 20395 }, { "epoch": 1.035663971773426, "grad_norm": 0.026006362089191562, "learning_rate": 0.0005560168302536645, "loss": 0.5453, "step": 20400 }, { "epoch": 1.0359178078714575, "grad_norm": 0.026998637894890546, "learning_rate": 0.0005557966934321208, "loss": 0.5181, "step": 20405 }, { "epoch": 1.0361716439694888, "grad_norm": 0.023293334910978493, "learning_rate": 0.0005555765456579645, "loss": 0.4928, "step": 20410 }, { "epoch": 1.0364254800675203, "grad_norm": 0.02235857148830006, "learning_rate": 0.0005553563869744092, "loss": 0.5101, "step": 20415 }, { "epoch": 1.0366793161655519, "grad_norm": 0.031120430714844608, "learning_rate": 0.0005551362174246714, "loss": 0.4839, "step": 20420 }, { "epoch": 1.0369331522635834, "grad_norm": 0.03754523909785098, "learning_rate": 0.000554916037051969, "loss": 0.5065, "step": 20425 }, { "epoch": 1.037186988361615, "grad_norm": 0.02326842333764262, "learning_rate": 0.0005546958458995225, "loss": 0.5045, "step": 20430 }, { "epoch": 1.0374408244596465, "grad_norm": 0.02252685117622399, "learning_rate": 0.0005544756440105541, "loss": 0.4895, "step": 20435 }, { "epoch": 1.037694660557678, "grad_norm": 0.022048155029177167, "learning_rate": 0.0005542554314282885, "loss": 0.4994, "step": 20440 }, { "epoch": 1.0379484966557093, "grad_norm": 0.034805475396144946, "learning_rate": 0.0005540352081959524, "loss": 0.5204, "step": 20445 }, { "epoch": 1.0382023327537409, "grad_norm": 0.03443533236482359, "learning_rate": 0.0005538149743567742, "loss": 0.4981, "step": 20450 }, { "epoch": 1.0384561688517724, "grad_norm": 0.02382977310407126, "learning_rate": 0.000553594729953985, "loss": 0.51, "step": 20455 }, { "epoch": 1.038710004949804, "grad_norm": 0.025391666920891502, "learning_rate": 0.0005533744750308173, "loss": 0.5266, "step": 20460 }, { "epoch": 1.0389638410478355, "grad_norm": 0.03609117313209483, "learning_rate": 0.0005531542096305067, "loss": 0.4907, "step": 20465 }, { "epoch": 1.039217677145867, "grad_norm": 0.023507381495373715, "learning_rate": 0.0005529339337962898, "loss": 0.4975, "step": 20470 }, { "epoch": 1.0394715132438983, "grad_norm": 0.07606685865235592, "learning_rate": 0.0005527136475714055, "loss": 0.4953, "step": 20475 }, { "epoch": 1.0397253493419298, "grad_norm": 0.025963486371282662, "learning_rate": 0.0005524933509990953, "loss": 0.5241, "step": 20480 }, { "epoch": 1.0399791854399614, "grad_norm": 0.03372063969736153, "learning_rate": 0.0005522730441226019, "loss": 0.4818, "step": 20485 }, { "epoch": 1.040233021537993, "grad_norm": 0.024045885674214548, "learning_rate": 0.0005520527269851707, "loss": 0.5318, "step": 20490 }, { "epoch": 1.0404868576360244, "grad_norm": 0.025422330712362343, "learning_rate": 0.0005518323996300486, "loss": 0.4953, "step": 20495 }, { "epoch": 1.040740693734056, "grad_norm": 0.033795313139793635, "learning_rate": 0.0005516120621004852, "loss": 0.5, "step": 20500 }, { "epoch": 1.0409945298320875, "grad_norm": 0.023641788988281797, "learning_rate": 0.0005513917144397313, "loss": 0.5393, "step": 20505 }, { "epoch": 1.0412483659301188, "grad_norm": 0.029211469214469557, "learning_rate": 0.0005511713566910401, "loss": 0.5019, "step": 20510 }, { "epoch": 1.0415022020281504, "grad_norm": 0.03256182907446245, "learning_rate": 0.0005509509888976668, "loss": 0.5106, "step": 20515 }, { "epoch": 1.041756038126182, "grad_norm": 0.03264231316359048, "learning_rate": 0.0005507306111028683, "loss": 0.5352, "step": 20520 }, { "epoch": 1.0420098742242134, "grad_norm": 0.04894371601933567, "learning_rate": 0.000550510223349904, "loss": 0.5144, "step": 20525 }, { "epoch": 1.042263710322245, "grad_norm": 0.0439869944668601, "learning_rate": 0.0005502898256820349, "loss": 0.5165, "step": 20530 }, { "epoch": 1.0425175464202765, "grad_norm": 0.03490709301718278, "learning_rate": 0.0005500694181425237, "loss": 0.5228, "step": 20535 }, { "epoch": 1.042771382518308, "grad_norm": 0.02637599113409724, "learning_rate": 0.0005498490007746354, "loss": 0.4868, "step": 20540 }, { "epoch": 1.0430252186163393, "grad_norm": 0.030905442645324806, "learning_rate": 0.0005496285736216369, "loss": 0.5007, "step": 20545 }, { "epoch": 1.0432790547143709, "grad_norm": 0.022912410310244463, "learning_rate": 0.0005494081367267968, "loss": 0.531, "step": 20550 }, { "epoch": 1.0435328908124024, "grad_norm": 0.03822182605381993, "learning_rate": 0.0005491876901333859, "loss": 0.5184, "step": 20555 }, { "epoch": 1.043786726910434, "grad_norm": 0.03854079290096544, "learning_rate": 0.0005489672338846767, "loss": 0.5451, "step": 20560 }, { "epoch": 1.0440405630084655, "grad_norm": 0.023025560697022475, "learning_rate": 0.0005487467680239437, "loss": 0.486, "step": 20565 }, { "epoch": 1.044294399106497, "grad_norm": 0.02820566558878494, "learning_rate": 0.0005485262925944633, "loss": 0.5208, "step": 20570 }, { "epoch": 1.0445482352045283, "grad_norm": 0.023098213177233765, "learning_rate": 0.0005483058076395136, "loss": 0.5211, "step": 20575 }, { "epoch": 1.0448020713025599, "grad_norm": 0.032862139607394404, "learning_rate": 0.0005480853132023746, "loss": 0.4847, "step": 20580 }, { "epoch": 1.0450559074005914, "grad_norm": 0.02296236963012799, "learning_rate": 0.0005478648093263286, "loss": 0.4886, "step": 20585 }, { "epoch": 1.045309743498623, "grad_norm": 0.022572602535311893, "learning_rate": 0.0005476442960546592, "loss": 0.5119, "step": 20590 }, { "epoch": 1.0455635795966545, "grad_norm": 0.030771735483987164, "learning_rate": 0.0005474237734306522, "loss": 0.4762, "step": 20595 }, { "epoch": 1.045817415694686, "grad_norm": 0.03547547036096252, "learning_rate": 0.0005472032414975949, "loss": 0.5127, "step": 20600 }, { "epoch": 1.0460712517927175, "grad_norm": 0.0254360744170134, "learning_rate": 0.0005469827002987767, "loss": 0.4929, "step": 20605 }, { "epoch": 1.0463250878907489, "grad_norm": 0.025518311798158718, "learning_rate": 0.0005467621498774886, "loss": 0.5245, "step": 20610 }, { "epoch": 1.0465789239887804, "grad_norm": 0.026490108041312813, "learning_rate": 0.0005465415902770238, "loss": 0.4944, "step": 20615 }, { "epoch": 1.046832760086812, "grad_norm": 0.02526869174684408, "learning_rate": 0.0005463210215406769, "loss": 0.5045, "step": 20620 }, { "epoch": 1.0470865961848435, "grad_norm": 0.02074411912466121, "learning_rate": 0.0005461004437117445, "loss": 0.51, "step": 20625 }, { "epoch": 1.047340432282875, "grad_norm": 0.027093812806925615, "learning_rate": 0.0005458798568335249, "loss": 0.5166, "step": 20630 }, { "epoch": 1.0475942683809065, "grad_norm": 0.02436102653069909, "learning_rate": 0.0005456592609493182, "loss": 0.5132, "step": 20635 }, { "epoch": 1.047848104478938, "grad_norm": 0.023097199436276732, "learning_rate": 0.0005454386561024263, "loss": 0.4994, "step": 20640 }, { "epoch": 1.0481019405769694, "grad_norm": 0.026331858968549688, "learning_rate": 0.0005452180423361528, "loss": 0.5153, "step": 20645 }, { "epoch": 1.048355776675001, "grad_norm": 0.02645976286908623, "learning_rate": 0.0005449974196938031, "loss": 0.5188, "step": 20650 }, { "epoch": 1.0486096127730324, "grad_norm": 0.025310276627568677, "learning_rate": 0.0005447767882186844, "loss": 0.53, "step": 20655 }, { "epoch": 1.048863448871064, "grad_norm": 0.0261930070884005, "learning_rate": 0.0005445561479541053, "loss": 0.4882, "step": 20660 }, { "epoch": 1.0491172849690955, "grad_norm": 0.025129244942486483, "learning_rate": 0.0005443354989433766, "loss": 0.4951, "step": 20665 }, { "epoch": 1.049371121067127, "grad_norm": 0.020547190474118154, "learning_rate": 0.0005441148412298106, "loss": 0.4623, "step": 20670 }, { "epoch": 1.0496249571651584, "grad_norm": 0.02180036590225857, "learning_rate": 0.0005438941748567212, "loss": 0.5372, "step": 20675 }, { "epoch": 1.04987879326319, "grad_norm": 0.026634325082656105, "learning_rate": 0.0005436734998674242, "loss": 0.526, "step": 20680 }, { "epoch": 1.0501326293612214, "grad_norm": 0.022973149324837107, "learning_rate": 0.0005434528163052371, "loss": 0.4898, "step": 20685 }, { "epoch": 1.050386465459253, "grad_norm": 0.024359590869779388, "learning_rate": 0.0005432321242134787, "loss": 0.504, "step": 20690 }, { "epoch": 1.0506403015572845, "grad_norm": 0.024548501089468676, "learning_rate": 0.0005430114236354701, "loss": 0.5368, "step": 20695 }, { "epoch": 1.050894137655316, "grad_norm": 0.021875608126201377, "learning_rate": 0.0005427907146145333, "loss": 0.5023, "step": 20700 }, { "epoch": 1.0511479737533476, "grad_norm": 0.03438836236481197, "learning_rate": 0.0005425699971939927, "loss": 0.5339, "step": 20705 }, { "epoch": 1.0514018098513789, "grad_norm": 0.023250977742150812, "learning_rate": 0.000542349271417174, "loss": 0.5067, "step": 20710 }, { "epoch": 1.0516556459494104, "grad_norm": 0.030063734343917937, "learning_rate": 0.0005421285373274045, "loss": 0.4875, "step": 20715 }, { "epoch": 1.051909482047442, "grad_norm": 0.028285057645613915, "learning_rate": 0.0005419077949680132, "loss": 0.5006, "step": 20720 }, { "epoch": 1.0521633181454735, "grad_norm": 0.023333102184669698, "learning_rate": 0.0005416870443823308, "loss": 0.4778, "step": 20725 }, { "epoch": 1.052417154243505, "grad_norm": 0.02557383529258429, "learning_rate": 0.0005414662856136894, "loss": 0.53, "step": 20730 }, { "epoch": 1.0526709903415365, "grad_norm": 0.023805131180192746, "learning_rate": 0.0005412455187054229, "loss": 0.5292, "step": 20735 }, { "epoch": 1.0529248264395679, "grad_norm": 0.049484796752778205, "learning_rate": 0.0005410247437008668, "loss": 0.5113, "step": 20740 }, { "epoch": 1.0531786625375994, "grad_norm": 0.04103309197810282, "learning_rate": 0.0005408039606433582, "loss": 0.5546, "step": 20745 }, { "epoch": 1.053432498635631, "grad_norm": 0.03609048454627881, "learning_rate": 0.0005405831695762355, "loss": 0.5268, "step": 20750 }, { "epoch": 1.0536863347336625, "grad_norm": 0.02838028673259011, "learning_rate": 0.0005403623705428391, "loss": 0.5169, "step": 20755 }, { "epoch": 1.053940170831694, "grad_norm": 0.02412838334841983, "learning_rate": 0.0005401415635865106, "loss": 0.4983, "step": 20760 }, { "epoch": 1.0541940069297255, "grad_norm": 0.023534830774553976, "learning_rate": 0.0005399207487505934, "loss": 0.4908, "step": 20765 }, { "epoch": 1.054447843027757, "grad_norm": 0.08793074530330729, "learning_rate": 0.0005396999260784323, "loss": 0.4971, "step": 20770 }, { "epoch": 1.0547016791257884, "grad_norm": 0.02146817209464667, "learning_rate": 0.0005394790956133736, "loss": 0.487, "step": 20775 }, { "epoch": 1.05495551522382, "grad_norm": 0.02956735657834776, "learning_rate": 0.0005392582573987654, "loss": 0.4974, "step": 20780 }, { "epoch": 1.0552093513218515, "grad_norm": 0.03392596129392696, "learning_rate": 0.0005390374114779571, "loss": 0.5112, "step": 20785 }, { "epoch": 1.055463187419883, "grad_norm": 0.024893466438836578, "learning_rate": 0.0005388165578942993, "loss": 0.4961, "step": 20790 }, { "epoch": 1.0557170235179145, "grad_norm": 0.02466824162848842, "learning_rate": 0.0005385956966911451, "loss": 0.5279, "step": 20795 }, { "epoch": 1.055970859615946, "grad_norm": 0.021932982882764882, "learning_rate": 0.000538374827911848, "loss": 0.5209, "step": 20800 }, { "epoch": 1.0562246957139776, "grad_norm": 0.031454975273976886, "learning_rate": 0.0005381539515997636, "loss": 0.5206, "step": 20805 }, { "epoch": 1.056478531812009, "grad_norm": 0.02097302093594751, "learning_rate": 0.0005379330677982487, "loss": 0.4813, "step": 20810 }, { "epoch": 1.0567323679100404, "grad_norm": 0.027013738993206558, "learning_rate": 0.0005377121765506619, "loss": 0.5156, "step": 20815 }, { "epoch": 1.056986204008072, "grad_norm": 0.035362483004186006, "learning_rate": 0.0005374912779003626, "loss": 0.5105, "step": 20820 }, { "epoch": 1.0572400401061035, "grad_norm": 0.028929030452595348, "learning_rate": 0.0005372703718907127, "loss": 0.5047, "step": 20825 }, { "epoch": 1.057493876204135, "grad_norm": 0.024817950996213613, "learning_rate": 0.0005370494585650746, "loss": 0.4983, "step": 20830 }, { "epoch": 1.0577477123021666, "grad_norm": 0.022644591412287526, "learning_rate": 0.0005368285379668125, "loss": 0.5294, "step": 20835 }, { "epoch": 1.0580015484001979, "grad_norm": 0.02520849939725291, "learning_rate": 0.0005366076101392922, "loss": 0.4923, "step": 20840 }, { "epoch": 1.0582553844982294, "grad_norm": 0.023884449834670873, "learning_rate": 0.0005363866751258805, "loss": 0.5227, "step": 20845 }, { "epoch": 1.058509220596261, "grad_norm": 0.0246942336074402, "learning_rate": 0.0005361657329699457, "loss": 0.5014, "step": 20850 }, { "epoch": 1.0587630566942925, "grad_norm": 0.024562310682568606, "learning_rate": 0.0005359447837148582, "loss": 0.5134, "step": 20855 }, { "epoch": 1.059016892792324, "grad_norm": 0.023069233139811948, "learning_rate": 0.0005357238274039888, "loss": 0.5302, "step": 20860 }, { "epoch": 1.0592707288903556, "grad_norm": 0.03147995452519133, "learning_rate": 0.0005355028640807103, "loss": 0.4765, "step": 20865 }, { "epoch": 1.059524564988387, "grad_norm": 0.023156418652504413, "learning_rate": 0.0005352818937883966, "loss": 0.5326, "step": 20870 }, { "epoch": 1.0597784010864184, "grad_norm": 0.023516340548810546, "learning_rate": 0.0005350609165704231, "loss": 0.5014, "step": 20875 }, { "epoch": 1.06003223718445, "grad_norm": 0.02173782389595552, "learning_rate": 0.0005348399324701665, "loss": 0.4892, "step": 20880 }, { "epoch": 1.0602860732824815, "grad_norm": 0.02497605503933453, "learning_rate": 0.0005346189415310049, "loss": 0.5238, "step": 20885 }, { "epoch": 1.060539909380513, "grad_norm": 0.026640052657665187, "learning_rate": 0.0005343979437963178, "loss": 0.5475, "step": 20890 }, { "epoch": 1.0607937454785445, "grad_norm": 0.02611282353111445, "learning_rate": 0.0005341769393094857, "loss": 0.5454, "step": 20895 }, { "epoch": 1.061047581576576, "grad_norm": 0.05663032774630538, "learning_rate": 0.000533955928113891, "loss": 0.5268, "step": 20900 }, { "epoch": 1.0613014176746076, "grad_norm": 0.027192717640165086, "learning_rate": 0.000533734910252917, "loss": 0.5328, "step": 20905 }, { "epoch": 1.061555253772639, "grad_norm": 0.0479761926852491, "learning_rate": 0.0005335138857699482, "loss": 0.5321, "step": 20910 }, { "epoch": 1.0618090898706705, "grad_norm": 0.03449047232824588, "learning_rate": 0.0005332928547083707, "loss": 0.5135, "step": 20915 }, { "epoch": 1.062062925968702, "grad_norm": 0.03882056462374773, "learning_rate": 0.0005330718171115721, "loss": 0.5159, "step": 20920 }, { "epoch": 1.0623167620667335, "grad_norm": 0.029049562658363927, "learning_rate": 0.0005328507730229407, "loss": 0.5109, "step": 20925 }, { "epoch": 1.062570598164765, "grad_norm": 0.03658465415118424, "learning_rate": 0.0005326297224858661, "loss": 0.5055, "step": 20930 }, { "epoch": 1.0628244342627966, "grad_norm": 0.026304675203290016, "learning_rate": 0.00053240866554374, "loss": 0.4788, "step": 20935 }, { "epoch": 1.063078270360828, "grad_norm": 0.02473561068343538, "learning_rate": 0.0005321876022399542, "loss": 0.4786, "step": 20940 }, { "epoch": 1.0633321064588594, "grad_norm": 0.027156090763984255, "learning_rate": 0.0005319665326179028, "loss": 0.5033, "step": 20945 }, { "epoch": 1.063585942556891, "grad_norm": 0.02473336770832404, "learning_rate": 0.0005317454567209804, "loss": 0.4906, "step": 20950 }, { "epoch": 1.0638397786549225, "grad_norm": 0.03156233005770823, "learning_rate": 0.0005315243745925833, "loss": 0.5012, "step": 20955 }, { "epoch": 1.064093614752954, "grad_norm": 0.02170352188784017, "learning_rate": 0.0005313032862761085, "loss": 0.5181, "step": 20960 }, { "epoch": 1.0643474508509856, "grad_norm": 0.022630967619430002, "learning_rate": 0.0005310821918149548, "loss": 0.4965, "step": 20965 }, { "epoch": 1.0646012869490171, "grad_norm": 0.021957567917221844, "learning_rate": 0.0005308610912525218, "loss": 0.5313, "step": 20970 }, { "epoch": 1.0648551230470484, "grad_norm": 0.0830362281234276, "learning_rate": 0.0005306399846322106, "loss": 0.5142, "step": 20975 }, { "epoch": 1.06510895914508, "grad_norm": 0.021880705238877092, "learning_rate": 0.000530418871997423, "loss": 0.4935, "step": 20980 }, { "epoch": 1.0653627952431115, "grad_norm": 0.025755954372723704, "learning_rate": 0.0005301977533915627, "loss": 0.5006, "step": 20985 }, { "epoch": 1.065616631341143, "grad_norm": 0.02290313209246396, "learning_rate": 0.000529976628858034, "loss": 0.4983, "step": 20990 }, { "epoch": 1.0658704674391746, "grad_norm": 0.02118947492338632, "learning_rate": 0.0005297554984402426, "loss": 0.5367, "step": 20995 }, { "epoch": 1.066124303537206, "grad_norm": 0.025500946021106714, "learning_rate": 0.0005295343621815952, "loss": 0.5239, "step": 21000 }, { "epoch": 1.0663781396352374, "grad_norm": 0.02849283742078081, "learning_rate": 0.0005293132201254996, "loss": 0.5033, "step": 21005 }, { "epoch": 1.066631975733269, "grad_norm": 0.023519558376559214, "learning_rate": 0.0005290920723153653, "loss": 0.5283, "step": 21010 }, { "epoch": 1.0668858118313005, "grad_norm": 0.023929624757609843, "learning_rate": 0.0005288709187946022, "loss": 0.5176, "step": 21015 }, { "epoch": 1.067139647929332, "grad_norm": 0.024787372939198683, "learning_rate": 0.0005286497596066218, "loss": 0.5717, "step": 21020 }, { "epoch": 1.0673934840273636, "grad_norm": 0.02105047392838496, "learning_rate": 0.0005284285947948364, "loss": 0.4807, "step": 21025 }, { "epoch": 1.067647320125395, "grad_norm": 0.021809780590215072, "learning_rate": 0.0005282074244026597, "loss": 0.4876, "step": 21030 }, { "epoch": 1.0679011562234266, "grad_norm": 0.02979096066481348, "learning_rate": 0.0005279862484735059, "loss": 0.4964, "step": 21035 }, { "epoch": 1.068154992321458, "grad_norm": 0.02698075454085126, "learning_rate": 0.0005277650670507915, "loss": 0.5425, "step": 21040 }, { "epoch": 1.0684088284194895, "grad_norm": 0.02305290180830383, "learning_rate": 0.0005275438801779327, "loss": 0.4809, "step": 21045 }, { "epoch": 1.068662664517521, "grad_norm": 0.024541759228573837, "learning_rate": 0.0005273226878983476, "loss": 0.5089, "step": 21050 }, { "epoch": 1.0689165006155525, "grad_norm": 0.022014145757905085, "learning_rate": 0.0005271014902554552, "loss": 0.4971, "step": 21055 }, { "epoch": 1.069170336713584, "grad_norm": 0.024792727012130904, "learning_rate": 0.0005268802872926755, "loss": 0.5472, "step": 21060 }, { "epoch": 1.0694241728116156, "grad_norm": 0.02829315368981998, "learning_rate": 0.0005266590790534292, "loss": 0.5336, "step": 21065 }, { "epoch": 1.069678008909647, "grad_norm": 0.022298962580040806, "learning_rate": 0.0005264378655811388, "loss": 0.4895, "step": 21070 }, { "epoch": 1.0699318450076785, "grad_norm": 0.029443771995413214, "learning_rate": 0.0005262166469192273, "loss": 0.5068, "step": 21075 }, { "epoch": 1.07018568110571, "grad_norm": 0.035247934270422096, "learning_rate": 0.0005259954231111186, "loss": 0.5181, "step": 21080 }, { "epoch": 1.0704395172037415, "grad_norm": 0.02766821768801607, "learning_rate": 0.000525774194200238, "loss": 0.5312, "step": 21085 }, { "epoch": 1.070693353301773, "grad_norm": 0.034962832983413614, "learning_rate": 0.0005255529602300118, "loss": 0.5087, "step": 21090 }, { "epoch": 1.0709471893998046, "grad_norm": 0.024917411843302463, "learning_rate": 0.0005253317212438668, "loss": 0.5247, "step": 21095 }, { "epoch": 1.0712010254978361, "grad_norm": 0.022976680538935538, "learning_rate": 0.0005251104772852312, "loss": 0.527, "step": 21100 }, { "epoch": 1.0714548615958674, "grad_norm": 0.023641409157934393, "learning_rate": 0.0005248892283975341, "loss": 0.5004, "step": 21105 }, { "epoch": 1.071708697693899, "grad_norm": 0.026029533102603888, "learning_rate": 0.0005246679746242058, "loss": 0.5154, "step": 21110 }, { "epoch": 1.0719625337919305, "grad_norm": 0.03545812118043111, "learning_rate": 0.000524446716008677, "loss": 0.5032, "step": 21115 }, { "epoch": 1.072216369889962, "grad_norm": 0.02325775970323228, "learning_rate": 0.0005242254525943799, "loss": 0.5166, "step": 21120 }, { "epoch": 1.0724702059879936, "grad_norm": 0.027063279281270988, "learning_rate": 0.000524004184424747, "loss": 0.5139, "step": 21125 }, { "epoch": 1.0727240420860251, "grad_norm": 0.022406878965903397, "learning_rate": 0.0005237829115432124, "loss": 0.5163, "step": 21130 }, { "epoch": 1.0729778781840567, "grad_norm": 0.020476946022531705, "learning_rate": 0.000523561633993211, "loss": 0.4938, "step": 21135 }, { "epoch": 1.073231714282088, "grad_norm": 0.02946149615897215, "learning_rate": 0.0005233403518181784, "loss": 0.4777, "step": 21140 }, { "epoch": 1.0734855503801195, "grad_norm": 0.022659371359572644, "learning_rate": 0.000523119065061551, "loss": 0.52, "step": 21145 }, { "epoch": 1.073739386478151, "grad_norm": 0.024333860488001735, "learning_rate": 0.0005228977737667665, "loss": 0.5307, "step": 21150 }, { "epoch": 1.0739932225761826, "grad_norm": 0.024349542450303005, "learning_rate": 0.0005226764779772632, "loss": 0.5321, "step": 21155 }, { "epoch": 1.074247058674214, "grad_norm": 0.0349413047957909, "learning_rate": 0.0005224551777364803, "loss": 0.5386, "step": 21160 }, { "epoch": 1.0745008947722456, "grad_norm": 0.024434904354044745, "learning_rate": 0.0005222338730878581, "loss": 0.5224, "step": 21165 }, { "epoch": 1.0747547308702772, "grad_norm": 0.022587237289784082, "learning_rate": 0.0005220125640748375, "loss": 0.5014, "step": 21170 }, { "epoch": 1.0750085669683085, "grad_norm": 0.028695622755737175, "learning_rate": 0.0005217912507408602, "loss": 0.5129, "step": 21175 }, { "epoch": 1.07526240306634, "grad_norm": 0.04548078606712318, "learning_rate": 0.0005215699331293692, "loss": 0.4981, "step": 21180 }, { "epoch": 1.0755162391643716, "grad_norm": 0.03372321027470343, "learning_rate": 0.0005213486112838076, "loss": 0.4986, "step": 21185 }, { "epoch": 1.075770075262403, "grad_norm": 0.03485560152221166, "learning_rate": 0.0005211272852476204, "loss": 0.5012, "step": 21190 }, { "epoch": 1.0760239113604346, "grad_norm": 0.025328513759871395, "learning_rate": 0.0005209059550642523, "loss": 0.536, "step": 21195 }, { "epoch": 1.0762777474584662, "grad_norm": 0.0382721168074841, "learning_rate": 0.0005206846207771496, "loss": 0.5569, "step": 21200 }, { "epoch": 1.0765315835564975, "grad_norm": 0.02520714240837497, "learning_rate": 0.0005204632824297589, "loss": 0.5249, "step": 21205 }, { "epoch": 1.076785419654529, "grad_norm": 0.028982430714109, "learning_rate": 0.0005202419400655281, "loss": 0.5233, "step": 21210 }, { "epoch": 1.0770392557525605, "grad_norm": 0.02574048454288774, "learning_rate": 0.0005200205937279052, "loss": 0.4889, "step": 21215 }, { "epoch": 1.077293091850592, "grad_norm": 0.03233762725205312, "learning_rate": 0.0005197992434603397, "loss": 0.5201, "step": 21220 }, { "epoch": 1.0775469279486236, "grad_norm": 0.026051642045672115, "learning_rate": 0.0005195778893062814, "loss": 0.5596, "step": 21225 }, { "epoch": 1.0778007640466551, "grad_norm": 0.034440397145426654, "learning_rate": 0.000519356531309181, "loss": 0.508, "step": 21230 }, { "epoch": 1.0780546001446867, "grad_norm": 0.024779579773170867, "learning_rate": 0.0005191351695124902, "loss": 0.511, "step": 21235 }, { "epoch": 1.078308436242718, "grad_norm": 0.027199827628791112, "learning_rate": 0.000518913803959661, "loss": 0.5391, "step": 21240 }, { "epoch": 1.0785622723407495, "grad_norm": 0.039295388227057915, "learning_rate": 0.0005186924346941463, "loss": 0.5159, "step": 21245 }, { "epoch": 1.078816108438781, "grad_norm": 0.03140488277705228, "learning_rate": 0.0005184710617593998, "loss": 0.5333, "step": 21250 }, { "epoch": 1.0790699445368126, "grad_norm": 0.037347453599965476, "learning_rate": 0.0005182496851988763, "loss": 0.4879, "step": 21255 }, { "epoch": 1.0793237806348441, "grad_norm": 0.030428369817123643, "learning_rate": 0.0005180283050560304, "loss": 0.5088, "step": 21260 }, { "epoch": 1.0795776167328757, "grad_norm": 0.02612433843324173, "learning_rate": 0.0005178069213743182, "loss": 0.4955, "step": 21265 }, { "epoch": 1.079831452830907, "grad_norm": 0.028085153442456857, "learning_rate": 0.0005175855341971961, "loss": 0.5297, "step": 21270 }, { "epoch": 1.0800852889289385, "grad_norm": 0.030147412276925384, "learning_rate": 0.0005173641435681212, "loss": 0.4905, "step": 21275 }, { "epoch": 1.08033912502697, "grad_norm": 0.029195226257636587, "learning_rate": 0.0005171427495305517, "loss": 0.5101, "step": 21280 }, { "epoch": 1.0805929611250016, "grad_norm": 0.02573350843933712, "learning_rate": 0.000516921352127946, "loss": 0.5075, "step": 21285 }, { "epoch": 1.0808467972230331, "grad_norm": 0.03039462615229446, "learning_rate": 0.0005166999514037631, "loss": 0.5506, "step": 21290 }, { "epoch": 1.0811006333210647, "grad_norm": 0.03422447727787612, "learning_rate": 0.0005164785474014631, "loss": 0.4838, "step": 21295 }, { "epoch": 1.0813544694190962, "grad_norm": 0.031835929504930464, "learning_rate": 0.0005162571401645065, "loss": 0.5303, "step": 21300 }, { "epoch": 1.0816083055171275, "grad_norm": 0.024796482258768755, "learning_rate": 0.0005160357297363541, "loss": 0.5407, "step": 21305 }, { "epoch": 1.081862141615159, "grad_norm": 0.024946672122012212, "learning_rate": 0.0005158143161604682, "loss": 0.5406, "step": 21310 }, { "epoch": 1.0821159777131906, "grad_norm": 0.028919849891573573, "learning_rate": 0.0005155928994803108, "loss": 0.5164, "step": 21315 }, { "epoch": 1.082369813811222, "grad_norm": 0.02634430322655897, "learning_rate": 0.0005153714797393451, "loss": 0.5272, "step": 21320 }, { "epoch": 1.0826236499092536, "grad_norm": 0.034896141494776906, "learning_rate": 0.0005151500569810345, "loss": 0.5127, "step": 21325 }, { "epoch": 1.0828774860072852, "grad_norm": 0.022623036190050343, "learning_rate": 0.0005149286312488432, "loss": 0.5492, "step": 21330 }, { "epoch": 1.0831313221053165, "grad_norm": 0.02338043777628199, "learning_rate": 0.0005147072025862362, "loss": 0.4818, "step": 21335 }, { "epoch": 1.083385158203348, "grad_norm": 0.02630639246261155, "learning_rate": 0.0005144857710366785, "loss": 0.5142, "step": 21340 }, { "epoch": 1.0836389943013796, "grad_norm": 0.02480597381903302, "learning_rate": 0.0005142643366436362, "loss": 0.5441, "step": 21345 }, { "epoch": 1.083892830399411, "grad_norm": 0.02501822087153717, "learning_rate": 0.0005140428994505759, "loss": 0.5228, "step": 21350 }, { "epoch": 1.0841466664974426, "grad_norm": 0.027223628649433902, "learning_rate": 0.0005138214595009643, "loss": 0.5148, "step": 21355 }, { "epoch": 1.0844005025954742, "grad_norm": 0.022696406307624702, "learning_rate": 0.0005136000168382693, "loss": 0.5075, "step": 21360 }, { "epoch": 1.0846543386935057, "grad_norm": 0.032940782729276794, "learning_rate": 0.0005133785715059586, "loss": 0.5322, "step": 21365 }, { "epoch": 1.084908174791537, "grad_norm": 0.022402759329344542, "learning_rate": 0.0005131571235475012, "loss": 0.5077, "step": 21370 }, { "epoch": 1.0851620108895685, "grad_norm": 0.038649939505366236, "learning_rate": 0.000512935673006366, "loss": 0.5236, "step": 21375 }, { "epoch": 1.0854158469876, "grad_norm": 0.022969477729405774, "learning_rate": 0.0005127142199260228, "loss": 0.4826, "step": 21380 }, { "epoch": 1.0856696830856316, "grad_norm": 0.02759754028356563, "learning_rate": 0.0005124927643499415, "loss": 0.5351, "step": 21385 }, { "epoch": 1.0859235191836631, "grad_norm": 0.026556927936903868, "learning_rate": 0.000512271306321593, "loss": 0.5354, "step": 21390 }, { "epoch": 1.0861773552816947, "grad_norm": 0.024696980583765446, "learning_rate": 0.000512049845884448, "loss": 0.5162, "step": 21395 }, { "epoch": 1.0864311913797262, "grad_norm": 0.026456834753896295, "learning_rate": 0.0005118283830819786, "loss": 0.4936, "step": 21400 }, { "epoch": 1.0866850274777575, "grad_norm": 0.023930744190106705, "learning_rate": 0.0005116069179576565, "loss": 0.4917, "step": 21405 }, { "epoch": 1.086938863575789, "grad_norm": 0.02349838967686964, "learning_rate": 0.0005113854505549543, "loss": 0.5321, "step": 21410 }, { "epoch": 1.0871926996738206, "grad_norm": 0.032693372410154034, "learning_rate": 0.000511163980917345, "loss": 0.471, "step": 21415 }, { "epoch": 1.0874465357718521, "grad_norm": 0.03223543121863213, "learning_rate": 0.0005109425090883019, "loss": 0.5017, "step": 21420 }, { "epoch": 1.0877003718698837, "grad_norm": 0.027354722210189156, "learning_rate": 0.0005107210351112986, "loss": 0.5257, "step": 21425 }, { "epoch": 1.0879542079679152, "grad_norm": 0.020761117911752558, "learning_rate": 0.0005104995590298098, "loss": 0.5032, "step": 21430 }, { "epoch": 1.0882080440659467, "grad_norm": 0.025848466019290892, "learning_rate": 0.0005102780808873098, "loss": 0.4959, "step": 21435 }, { "epoch": 1.088461880163978, "grad_norm": 0.026496761790380605, "learning_rate": 0.000510056600727274, "loss": 0.4579, "step": 21440 }, { "epoch": 1.0887157162620096, "grad_norm": 0.024422392773643196, "learning_rate": 0.0005098351185931775, "loss": 0.503, "step": 21445 }, { "epoch": 1.0889695523600411, "grad_norm": 0.04356332488795266, "learning_rate": 0.0005096136345284963, "loss": 0.5012, "step": 21450 }, { "epoch": 1.0892233884580726, "grad_norm": 0.025205409841538344, "learning_rate": 0.0005093921485767066, "loss": 0.5125, "step": 21455 }, { "epoch": 1.0894772245561042, "grad_norm": 0.03256381052307056, "learning_rate": 0.0005091706607812848, "loss": 0.5021, "step": 21460 }, { "epoch": 1.0897310606541357, "grad_norm": 0.022789084607815834, "learning_rate": 0.0005089491711857083, "loss": 0.5027, "step": 21465 }, { "epoch": 1.089984896752167, "grad_norm": 0.022777957966430507, "learning_rate": 0.0005087276798334539, "loss": 0.4724, "step": 21470 }, { "epoch": 1.0902387328501986, "grad_norm": 0.024179892808939247, "learning_rate": 0.0005085061867679995, "loss": 0.5076, "step": 21475 }, { "epoch": 1.09049256894823, "grad_norm": 0.023288334670357742, "learning_rate": 0.0005082846920328232, "loss": 0.5211, "step": 21480 }, { "epoch": 1.0907464050462616, "grad_norm": 0.024409120224983153, "learning_rate": 0.0005080631956714029, "loss": 0.5234, "step": 21485 }, { "epoch": 1.0910002411442932, "grad_norm": 0.02599437175288467, "learning_rate": 0.0005078416977272178, "loss": 0.5029, "step": 21490 }, { "epoch": 1.0912540772423247, "grad_norm": 0.022222357794203767, "learning_rate": 0.0005076201982437464, "loss": 0.5228, "step": 21495 }, { "epoch": 1.0915079133403562, "grad_norm": 0.02880385866020692, "learning_rate": 0.0005073986972644681, "loss": 0.5447, "step": 21500 }, { "epoch": 1.0917617494383876, "grad_norm": 0.020822683076055126, "learning_rate": 0.0005071771948328624, "loss": 0.4931, "step": 21505 }, { "epoch": 1.092015585536419, "grad_norm": 0.02398468484355743, "learning_rate": 0.0005069556909924092, "loss": 0.482, "step": 21510 }, { "epoch": 1.0922694216344506, "grad_norm": 0.02807227467217408, "learning_rate": 0.0005067341857865885, "loss": 0.5237, "step": 21515 }, { "epoch": 1.0925232577324822, "grad_norm": 0.02290810606351165, "learning_rate": 0.0005065126792588807, "loss": 0.5166, "step": 21520 }, { "epoch": 1.0927770938305137, "grad_norm": 0.028239546173554175, "learning_rate": 0.0005062911714527664, "loss": 0.5152, "step": 21525 }, { "epoch": 1.0930309299285452, "grad_norm": 0.026605118843009996, "learning_rate": 0.0005060696624117266, "loss": 0.4891, "step": 21530 }, { "epoch": 1.0932847660265765, "grad_norm": 0.03451731867184281, "learning_rate": 0.0005058481521792424, "loss": 0.5054, "step": 21535 }, { "epoch": 1.093538602124608, "grad_norm": 0.022870995542147783, "learning_rate": 0.000505626640798795, "loss": 0.5094, "step": 21540 }, { "epoch": 1.0937924382226396, "grad_norm": 0.023384178570215613, "learning_rate": 0.000505405128313866, "loss": 0.4954, "step": 21545 }, { "epoch": 1.0940462743206711, "grad_norm": 0.021332217474057745, "learning_rate": 0.0005051836147679374, "loss": 0.4802, "step": 21550 }, { "epoch": 1.0943001104187027, "grad_norm": 0.025123000830193358, "learning_rate": 0.000504962100204491, "loss": 0.4802, "step": 21555 }, { "epoch": 1.0945539465167342, "grad_norm": 0.025473977289128598, "learning_rate": 0.0005047405846670091, "loss": 0.4966, "step": 21560 }, { "epoch": 1.0948077826147657, "grad_norm": 0.026371420615893433, "learning_rate": 0.0005045190681989742, "loss": 0.4988, "step": 21565 }, { "epoch": 1.095061618712797, "grad_norm": 0.022460113412766883, "learning_rate": 0.0005042975508438687, "loss": 0.4925, "step": 21570 }, { "epoch": 1.0953154548108286, "grad_norm": 0.02307578944045605, "learning_rate": 0.0005040760326451752, "loss": 0.4906, "step": 21575 }, { "epoch": 1.0955692909088601, "grad_norm": 0.03059351913144231, "learning_rate": 0.000503854513646377, "loss": 0.4782, "step": 21580 }, { "epoch": 1.0958231270068917, "grad_norm": 0.024544268108570917, "learning_rate": 0.000503632993890957, "loss": 0.4977, "step": 21585 }, { "epoch": 1.0960769631049232, "grad_norm": 0.021615409644089055, "learning_rate": 0.0005034114734223983, "loss": 0.5147, "step": 21590 }, { "epoch": 1.0963307992029547, "grad_norm": 0.025081101527570617, "learning_rate": 0.0005031899522841845, "loss": 0.5216, "step": 21595 }, { "epoch": 1.096584635300986, "grad_norm": 0.02738150628716518, "learning_rate": 0.0005029684305197989, "loss": 0.4893, "step": 21600 }, { "epoch": 1.0968384713990176, "grad_norm": 0.030632362820872255, "learning_rate": 0.000502746908172725, "loss": 0.512, "step": 21605 }, { "epoch": 1.0970923074970491, "grad_norm": 0.03751097192664053, "learning_rate": 0.000502525385286447, "loss": 0.4916, "step": 21610 }, { "epoch": 1.0973461435950806, "grad_norm": 0.03045939530777982, "learning_rate": 0.0005023038619044485, "loss": 0.4898, "step": 21615 }, { "epoch": 1.0975999796931122, "grad_norm": 0.025961661067633108, "learning_rate": 0.0005020823380702133, "loss": 0.5124, "step": 21620 }, { "epoch": 1.0978538157911437, "grad_norm": 0.026346768663115368, "learning_rate": 0.0005018608138272255, "loss": 0.4997, "step": 21625 }, { "epoch": 1.0981076518891753, "grad_norm": 0.035779285163878446, "learning_rate": 0.0005016392892189692, "loss": 0.5007, "step": 21630 }, { "epoch": 1.0983614879872066, "grad_norm": 0.022963332458905212, "learning_rate": 0.0005014177642889286, "loss": 0.5089, "step": 21635 }, { "epoch": 1.098615324085238, "grad_norm": 0.027026555042418753, "learning_rate": 0.000501196239080588, "loss": 0.4791, "step": 21640 }, { "epoch": 1.0988691601832696, "grad_norm": 0.026351077220473095, "learning_rate": 0.0005009747136374317, "loss": 0.5035, "step": 21645 }, { "epoch": 1.0991229962813012, "grad_norm": 0.02173089121604654, "learning_rate": 0.0005007531880029438, "loss": 0.4921, "step": 21650 }, { "epoch": 1.0993768323793327, "grad_norm": 0.02864481532801961, "learning_rate": 0.000500531662220609, "loss": 0.5183, "step": 21655 }, { "epoch": 1.0996306684773642, "grad_norm": 0.02446699653140137, "learning_rate": 0.0005003101363339114, "loss": 0.5084, "step": 21660 }, { "epoch": 1.0998845045753955, "grad_norm": 0.023667829604516112, "learning_rate": 0.0005000886103863355, "loss": 0.4801, "step": 21665 }, { "epoch": 1.100138340673427, "grad_norm": 0.023439234864149394, "learning_rate": 0.0004998670844213661, "loss": 0.5036, "step": 21670 }, { "epoch": 1.1003921767714586, "grad_norm": 0.027040266970354027, "learning_rate": 0.0004996455584824873, "loss": 0.5186, "step": 21675 }, { "epoch": 1.1006460128694902, "grad_norm": 0.0317481536525307, "learning_rate": 0.0004994240326131837, "loss": 0.5298, "step": 21680 }, { "epoch": 1.1008998489675217, "grad_norm": 0.023747077663502984, "learning_rate": 0.0004992025068569395, "loss": 0.5139, "step": 21685 }, { "epoch": 1.1011536850655532, "grad_norm": 0.035914229145339024, "learning_rate": 0.0004989809812572392, "loss": 0.553, "step": 21690 }, { "epoch": 1.1014075211635848, "grad_norm": 0.022286343442528934, "learning_rate": 0.0004987594558575673, "loss": 0.511, "step": 21695 }, { "epoch": 1.1016613572616163, "grad_norm": 0.032836479339666325, "learning_rate": 0.0004985379307014079, "loss": 0.5277, "step": 21700 }, { "epoch": 1.1019151933596476, "grad_norm": 0.021909939697584042, "learning_rate": 0.0004983164058322455, "loss": 0.5027, "step": 21705 }, { "epoch": 1.1021690294576791, "grad_norm": 0.036886073050320635, "learning_rate": 0.000498094881293564, "loss": 0.5319, "step": 21710 }, { "epoch": 1.1024228655557107, "grad_norm": 0.04010799722360534, "learning_rate": 0.000497873357128848, "loss": 0.493, "step": 21715 }, { "epoch": 1.1026767016537422, "grad_norm": 0.03364367343902673, "learning_rate": 0.0004976518333815814, "loss": 0.5138, "step": 21720 }, { "epoch": 1.1029305377517737, "grad_norm": 0.03065112641730614, "learning_rate": 0.0004974303100952483, "loss": 0.4981, "step": 21725 }, { "epoch": 1.1031843738498053, "grad_norm": 0.02896054754547121, "learning_rate": 0.0004972087873133323, "loss": 0.5215, "step": 21730 }, { "epoch": 1.1034382099478366, "grad_norm": 0.03710991307969362, "learning_rate": 0.0004969872650793176, "loss": 0.4901, "step": 21735 }, { "epoch": 1.1036920460458681, "grad_norm": 0.022960913289772747, "learning_rate": 0.0004967657434366877, "loss": 0.4889, "step": 21740 }, { "epoch": 1.1039458821438997, "grad_norm": 0.025188988881946872, "learning_rate": 0.0004965442224289262, "loss": 0.4756, "step": 21745 }, { "epoch": 1.1041997182419312, "grad_norm": 0.022061989690903828, "learning_rate": 0.0004963227020995167, "loss": 0.5295, "step": 21750 }, { "epoch": 1.1044535543399627, "grad_norm": 0.03873982984670393, "learning_rate": 0.0004961011824919422, "loss": 0.5255, "step": 21755 }, { "epoch": 1.1047073904379943, "grad_norm": 0.024110509410144455, "learning_rate": 0.0004958796636496864, "loss": 0.4762, "step": 21760 }, { "epoch": 1.1049612265360258, "grad_norm": 0.024542539175151357, "learning_rate": 0.0004956581456162319, "loss": 0.4792, "step": 21765 }, { "epoch": 1.105215062634057, "grad_norm": 0.022363840834245956, "learning_rate": 0.0004954366284350617, "loss": 0.5118, "step": 21770 }, { "epoch": 1.1054688987320886, "grad_norm": 0.025982163556048574, "learning_rate": 0.0004952151121496587, "loss": 0.5072, "step": 21775 }, { "epoch": 1.1057227348301202, "grad_norm": 0.02270591702590939, "learning_rate": 0.0004949935968035054, "loss": 0.504, "step": 21780 }, { "epoch": 1.1059765709281517, "grad_norm": 0.022406489019998924, "learning_rate": 0.000494772082440084, "loss": 0.4982, "step": 21785 }, { "epoch": 1.1062304070261832, "grad_norm": 0.025696915164668793, "learning_rate": 0.0004945505691028769, "loss": 0.5271, "step": 21790 }, { "epoch": 1.1064842431242148, "grad_norm": 0.02202726327814844, "learning_rate": 0.0004943290568353657, "loss": 0.4841, "step": 21795 }, { "epoch": 1.106738079222246, "grad_norm": 0.027349694310620433, "learning_rate": 0.0004941075456810324, "loss": 0.4977, "step": 21800 }, { "epoch": 1.1069919153202776, "grad_norm": 0.021841600344543598, "learning_rate": 0.0004938860356833585, "loss": 0.4939, "step": 21805 }, { "epoch": 1.1072457514183092, "grad_norm": 0.024165588262214474, "learning_rate": 0.0004936645268858253, "loss": 0.4974, "step": 21810 }, { "epoch": 1.1074995875163407, "grad_norm": 0.020070193682354385, "learning_rate": 0.000493443019331914, "loss": 0.4816, "step": 21815 }, { "epoch": 1.1077534236143722, "grad_norm": 0.024862032516753653, "learning_rate": 0.0004932215130651052, "loss": 0.5147, "step": 21820 }, { "epoch": 1.1080072597124038, "grad_norm": 0.025675685849875715, "learning_rate": 0.0004930000081288797, "loss": 0.4855, "step": 21825 }, { "epoch": 1.1082610958104353, "grad_norm": 0.02461817640839028, "learning_rate": 0.0004927785045667173, "loss": 0.4864, "step": 21830 }, { "epoch": 1.1085149319084666, "grad_norm": 0.023549777823439446, "learning_rate": 0.0004925570024220987, "loss": 0.5247, "step": 21835 }, { "epoch": 1.1087687680064982, "grad_norm": 0.022915635775272027, "learning_rate": 0.0004923355017385035, "loss": 0.4903, "step": 21840 }, { "epoch": 1.1090226041045297, "grad_norm": 0.028502210315459423, "learning_rate": 0.000492114002559411, "loss": 0.4815, "step": 21845 }, { "epoch": 1.1092764402025612, "grad_norm": 0.03661671337568482, "learning_rate": 0.0004918925049283005, "loss": 0.5259, "step": 21850 }, { "epoch": 1.1095302763005928, "grad_norm": 0.034776988326917536, "learning_rate": 0.0004916710088886508, "loss": 0.5042, "step": 21855 }, { "epoch": 1.1097841123986243, "grad_norm": 0.022641065605251254, "learning_rate": 0.0004914495144839406, "loss": 0.4759, "step": 21860 }, { "epoch": 1.1100379484966556, "grad_norm": 0.02470161392259303, "learning_rate": 0.0004912280217576481, "loss": 0.484, "step": 21865 }, { "epoch": 1.1102917845946871, "grad_norm": 0.02216524988218757, "learning_rate": 0.0004910065307532511, "loss": 0.514, "step": 21870 }, { "epoch": 1.1105456206927187, "grad_norm": 0.022165983415213607, "learning_rate": 0.0004907850415142273, "loss": 0.4873, "step": 21875 }, { "epoch": 1.1107994567907502, "grad_norm": 0.02581011869369852, "learning_rate": 0.0004905635540840539, "loss": 0.4913, "step": 21880 }, { "epoch": 1.1110532928887817, "grad_norm": 0.02854559216993533, "learning_rate": 0.0004903420685062077, "loss": 0.4985, "step": 21885 }, { "epoch": 1.1113071289868133, "grad_norm": 0.031286700118217534, "learning_rate": 0.0004901205848241654, "loss": 0.5251, "step": 21890 }, { "epoch": 1.1115609650848448, "grad_norm": 0.027160788459885017, "learning_rate": 0.0004898991030814028, "loss": 0.5148, "step": 21895 }, { "epoch": 1.1118148011828761, "grad_norm": 0.024176529505431532, "learning_rate": 0.000489677623321396, "loss": 0.4936, "step": 21900 }, { "epoch": 1.1120686372809077, "grad_norm": 0.039337612553737564, "learning_rate": 0.0004894561455876204, "loss": 0.5253, "step": 21905 }, { "epoch": 1.1123224733789392, "grad_norm": 0.022635785746774068, "learning_rate": 0.0004892346699235507, "loss": 0.5176, "step": 21910 }, { "epoch": 1.1125763094769707, "grad_norm": 0.02559706784297847, "learning_rate": 0.0004890131963726617, "loss": 0.5059, "step": 21915 }, { "epoch": 1.1128301455750023, "grad_norm": 0.021313731704569797, "learning_rate": 0.0004887917249784275, "loss": 0.4962, "step": 21920 }, { "epoch": 1.1130839816730338, "grad_norm": 0.03607627354015021, "learning_rate": 0.0004885702557843217, "loss": 0.4859, "step": 21925 }, { "epoch": 1.113337817771065, "grad_norm": 0.02559178946483111, "learning_rate": 0.0004883487888338177, "loss": 0.4888, "step": 21930 }, { "epoch": 1.1135916538690966, "grad_norm": 0.023591231457929233, "learning_rate": 0.0004881273241703884, "loss": 0.5408, "step": 21935 }, { "epoch": 1.1138454899671282, "grad_norm": 0.032232207206550165, "learning_rate": 0.00048790586183750605, "loss": 0.4845, "step": 21940 }, { "epoch": 1.1140993260651597, "grad_norm": 0.023369247501599504, "learning_rate": 0.0004876844018786428, "loss": 0.5135, "step": 21945 }, { "epoch": 1.1143531621631912, "grad_norm": 0.021929243467260426, "learning_rate": 0.00048746294433727003, "loss": 0.5041, "step": 21950 }, { "epoch": 1.1146069982612228, "grad_norm": 0.024271700794895327, "learning_rate": 0.0004872414892568585, "loss": 0.5053, "step": 21955 }, { "epoch": 1.1148608343592543, "grad_norm": 0.02341400981114356, "learning_rate": 0.00048702003668087926, "loss": 0.5172, "step": 21960 }, { "epoch": 1.1151146704572856, "grad_norm": 0.024390827336790148, "learning_rate": 0.00048679858665280206, "loss": 0.4964, "step": 21965 }, { "epoch": 1.1153685065553172, "grad_norm": 0.023466277161900626, "learning_rate": 0.00048657713921609647, "loss": 0.5098, "step": 21970 }, { "epoch": 1.1156223426533487, "grad_norm": 0.03522806209015746, "learning_rate": 0.0004863556944142316, "loss": 0.5102, "step": 21975 }, { "epoch": 1.1158761787513802, "grad_norm": 0.023062324780772376, "learning_rate": 0.00048613425229067575, "loss": 0.4558, "step": 21980 }, { "epoch": 1.1161300148494118, "grad_norm": 0.029175018799990165, "learning_rate": 0.0004859128128888971, "loss": 0.5498, "step": 21985 }, { "epoch": 1.1163838509474433, "grad_norm": 0.023244545510022245, "learning_rate": 0.000485691376252363, "loss": 0.5382, "step": 21990 }, { "epoch": 1.1166376870454748, "grad_norm": 0.02540046969404091, "learning_rate": 0.0004854699424245404, "loss": 0.5167, "step": 21995 }, { "epoch": 1.1168915231435061, "grad_norm": 0.023122619586228372, "learning_rate": 0.00048524851144889563, "loss": 0.4975, "step": 22000 }, { "epoch": 1.1171453592415377, "grad_norm": 0.028102268868484732, "learning_rate": 0.0004850270833688945, "loss": 0.5306, "step": 22005 }, { "epoch": 1.1173991953395692, "grad_norm": 0.028295243268877415, "learning_rate": 0.0004848056582280022, "loss": 0.485, "step": 22010 }, { "epoch": 1.1176530314376008, "grad_norm": 0.023662648309147206, "learning_rate": 0.00048458423606968337, "loss": 0.5316, "step": 22015 }, { "epoch": 1.1179068675356323, "grad_norm": 0.023956513141186277, "learning_rate": 0.0004843628169374022, "loss": 0.4989, "step": 22020 }, { "epoch": 1.1181607036336638, "grad_norm": 0.03533645288614336, "learning_rate": 0.0004841414008746221, "loss": 0.4866, "step": 22025 }, { "epoch": 1.1184145397316954, "grad_norm": 0.02362689256504165, "learning_rate": 0.0004839199879248059, "loss": 0.5038, "step": 22030 }, { "epoch": 1.1186683758297267, "grad_norm": 0.022341925551004328, "learning_rate": 0.00048369857813141586, "loss": 0.4776, "step": 22035 }, { "epoch": 1.1189222119277582, "grad_norm": 0.02777412452937675, "learning_rate": 0.00048347717153791365, "loss": 0.4765, "step": 22040 }, { "epoch": 1.1191760480257897, "grad_norm": 0.03839285711516218, "learning_rate": 0.0004832557681877603, "loss": 0.4991, "step": 22045 }, { "epoch": 1.1194298841238213, "grad_norm": 0.02786618691587262, "learning_rate": 0.0004830343681244161, "loss": 0.5314, "step": 22050 }, { "epoch": 1.1196837202218528, "grad_norm": 0.02177076732255751, "learning_rate": 0.0004828129713913409, "loss": 0.5031, "step": 22055 }, { "epoch": 1.1199375563198843, "grad_norm": 0.021374169537151152, "learning_rate": 0.0004825915780319937, "loss": 0.4887, "step": 22060 }, { "epoch": 1.1201913924179157, "grad_norm": 0.021242873714302467, "learning_rate": 0.00048237018808983286, "loss": 0.5098, "step": 22065 }, { "epoch": 1.1204452285159472, "grad_norm": 0.03862878033938459, "learning_rate": 0.0004821488016083162, "loss": 0.4902, "step": 22070 }, { "epoch": 1.1206990646139787, "grad_norm": 0.022914131678326958, "learning_rate": 0.0004819274186309005, "loss": 0.5181, "step": 22075 }, { "epoch": 1.1209529007120103, "grad_norm": 0.02444901284526625, "learning_rate": 0.0004817060392010427, "loss": 0.519, "step": 22080 }, { "epoch": 1.1212067368100418, "grad_norm": 0.027511001579787645, "learning_rate": 0.0004814846633621981, "loss": 0.5124, "step": 22085 }, { "epoch": 1.1214605729080733, "grad_norm": 0.02170627891570555, "learning_rate": 0.0004812632911578218, "loss": 0.4825, "step": 22090 }, { "epoch": 1.1217144090061049, "grad_norm": 0.023975365604404518, "learning_rate": 0.000481041922631368, "loss": 0.5226, "step": 22095 }, { "epoch": 1.1219682451041362, "grad_norm": 0.023405836814547467, "learning_rate": 0.00048082055782629017, "loss": 0.5045, "step": 22100 }, { "epoch": 1.1222220812021677, "grad_norm": 0.023988948357005786, "learning_rate": 0.00048059919678604125, "loss": 0.5046, "step": 22105 }, { "epoch": 1.1224759173001992, "grad_norm": 0.019265243232751396, "learning_rate": 0.0004803778395540733, "loss": 0.4932, "step": 22110 }, { "epoch": 1.1227297533982308, "grad_norm": 0.020015777068000878, "learning_rate": 0.0004801564861738375, "loss": 0.4692, "step": 22115 }, { "epoch": 1.1229835894962623, "grad_norm": 0.02271364249987384, "learning_rate": 0.00047993513668878455, "loss": 0.5071, "step": 22120 }, { "epoch": 1.1232374255942938, "grad_norm": 0.0283767253917993, "learning_rate": 0.0004797137911423642, "loss": 0.49, "step": 22125 }, { "epoch": 1.1234912616923252, "grad_norm": 0.03057189959396885, "learning_rate": 0.00047949244957802545, "loss": 0.5132, "step": 22130 }, { "epoch": 1.1237450977903567, "grad_norm": 0.02307975684077437, "learning_rate": 0.0004792711120392165, "loss": 0.5052, "step": 22135 }, { "epoch": 1.1239989338883882, "grad_norm": 0.022609530542469808, "learning_rate": 0.00047904977856938496, "loss": 0.4825, "step": 22140 }, { "epoch": 1.1242527699864198, "grad_norm": 0.02667533885777748, "learning_rate": 0.0004788284492119775, "loss": 0.4987, "step": 22145 }, { "epoch": 1.1245066060844513, "grad_norm": 0.03401959306286431, "learning_rate": 0.00047860712401043976, "loss": 0.4834, "step": 22150 }, { "epoch": 1.1247604421824828, "grad_norm": 0.03470866794755956, "learning_rate": 0.00047838580300821695, "loss": 0.4963, "step": 22155 }, { "epoch": 1.1250142782805144, "grad_norm": 0.023970283863950843, "learning_rate": 0.0004781644862487532, "loss": 0.4791, "step": 22160 }, { "epoch": 1.1252681143785457, "grad_norm": 0.02312660422996864, "learning_rate": 0.000477943173775492, "loss": 0.5058, "step": 22165 }, { "epoch": 1.1255219504765772, "grad_norm": 0.02435017898171767, "learning_rate": 0.00047772186563187566, "loss": 0.4919, "step": 22170 }, { "epoch": 1.1257757865746088, "grad_norm": 0.023843442848662726, "learning_rate": 0.00047750056186134603, "loss": 0.5119, "step": 22175 }, { "epoch": 1.1260296226726403, "grad_norm": 0.027676021986730217, "learning_rate": 0.00047727926250734393, "loss": 0.5182, "step": 22180 }, { "epoch": 1.1262834587706718, "grad_norm": 0.02668274381778642, "learning_rate": 0.00047705796761330927, "loss": 0.5088, "step": 22185 }, { "epoch": 1.1265372948687034, "grad_norm": 0.04197766800019646, "learning_rate": 0.00047683667722268116, "loss": 0.5212, "step": 22190 }, { "epoch": 1.1267911309667347, "grad_norm": 0.04081434517250474, "learning_rate": 0.0004766153913788976, "loss": 0.4979, "step": 22195 }, { "epoch": 1.1270449670647662, "grad_norm": 0.03853608127797696, "learning_rate": 0.00047639411012539626, "loss": 0.5104, "step": 22200 }, { "epoch": 1.1272988031627977, "grad_norm": 0.02809744226799178, "learning_rate": 0.0004761728335056134, "loss": 0.4911, "step": 22205 }, { "epoch": 1.1275526392608293, "grad_norm": 0.027375956086551304, "learning_rate": 0.00047595156156298455, "loss": 0.4629, "step": 22210 }, { "epoch": 1.1278064753588608, "grad_norm": 0.026405679471089256, "learning_rate": 0.0004757302943409442, "loss": 0.5249, "step": 22215 }, { "epoch": 1.1280603114568923, "grad_norm": 0.02422688158994748, "learning_rate": 0.000475509031882926, "loss": 0.4936, "step": 22220 }, { "epoch": 1.1283141475549239, "grad_norm": 0.02310122351518651, "learning_rate": 0.00047528777423236276, "loss": 0.4977, "step": 22225 }, { "epoch": 1.1285679836529554, "grad_norm": 0.027179185579231067, "learning_rate": 0.00047506652143268615, "loss": 0.527, "step": 22230 }, { "epoch": 1.1288218197509867, "grad_norm": 0.02555209948452092, "learning_rate": 0.0004748452735273271, "loss": 0.4867, "step": 22235 }, { "epoch": 1.1290756558490183, "grad_norm": 0.024286447301691477, "learning_rate": 0.0004746240305597154, "loss": 0.4958, "step": 22240 }, { "epoch": 1.1293294919470498, "grad_norm": 0.028126452190403196, "learning_rate": 0.0004744027925732799, "loss": 0.5077, "step": 22245 }, { "epoch": 1.1295833280450813, "grad_norm": 0.031834226850280145, "learning_rate": 0.0004741815596114486, "loss": 0.4713, "step": 22250 }, { "epoch": 1.1298371641431129, "grad_norm": 0.025659978434002053, "learning_rate": 0.00047396033171764825, "loss": 0.4642, "step": 22255 }, { "epoch": 1.1300910002411442, "grad_norm": 0.029700890203732126, "learning_rate": 0.00047373910893530504, "loss": 0.5396, "step": 22260 }, { "epoch": 1.1303448363391757, "grad_norm": 0.022579372378051864, "learning_rate": 0.00047351789130784384, "loss": 0.4994, "step": 22265 }, { "epoch": 1.1305986724372072, "grad_norm": 0.029980851132857454, "learning_rate": 0.00047329667887868846, "loss": 0.4837, "step": 22270 }, { "epoch": 1.1308525085352388, "grad_norm": 0.038482466915576954, "learning_rate": 0.00047307547169126183, "loss": 0.4999, "step": 22275 }, { "epoch": 1.1311063446332703, "grad_norm": 0.028758075959962347, "learning_rate": 0.0004728542697889859, "loss": 0.4943, "step": 22280 }, { "epoch": 1.1313601807313018, "grad_norm": 0.02302067520972011, "learning_rate": 0.00047263307321528136, "loss": 0.5058, "step": 22285 }, { "epoch": 1.1316140168293334, "grad_norm": 0.02338759212969084, "learning_rate": 0.0004724118820135681, "loss": 0.5417, "step": 22290 }, { "epoch": 1.131867852927365, "grad_norm": 0.024240682457677416, "learning_rate": 0.00047219069622726485, "loss": 0.5429, "step": 22295 }, { "epoch": 1.1321216890253962, "grad_norm": 0.03240119123904312, "learning_rate": 0.0004719695158997892, "loss": 0.4872, "step": 22300 }, { "epoch": 1.1323755251234278, "grad_norm": 0.027587688170413006, "learning_rate": 0.00047174834107455784, "loss": 0.5008, "step": 22305 }, { "epoch": 1.1326293612214593, "grad_norm": 0.026389922450704612, "learning_rate": 0.00047152717179498624, "loss": 0.5294, "step": 22310 }, { "epoch": 1.1328831973194908, "grad_norm": 0.031230602354335214, "learning_rate": 0.00047130600810448855, "loss": 0.4712, "step": 22315 }, { "epoch": 1.1331370334175224, "grad_norm": 0.02765524085794462, "learning_rate": 0.0004710848500464786, "loss": 0.526, "step": 22320 }, { "epoch": 1.133390869515554, "grad_norm": 0.023377797728668624, "learning_rate": 0.0004708636976643684, "loss": 0.4667, "step": 22325 }, { "epoch": 1.1336447056135852, "grad_norm": 0.024350148694776152, "learning_rate": 0.00047064255100156904, "loss": 0.4631, "step": 22330 }, { "epoch": 1.1338985417116167, "grad_norm": 0.023281128309858613, "learning_rate": 0.00047042141010149053, "loss": 0.5129, "step": 22335 }, { "epoch": 1.1341523778096483, "grad_norm": 0.030603931112854497, "learning_rate": 0.0004702002750075417, "loss": 0.517, "step": 22340 }, { "epoch": 1.1344062139076798, "grad_norm": 0.024716062309770172, "learning_rate": 0.0004699791457631303, "loss": 0.4932, "step": 22345 }, { "epoch": 1.1346600500057114, "grad_norm": 0.10774252187329189, "learning_rate": 0.00046975802241166283, "loss": 0.5326, "step": 22350 }, { "epoch": 1.1349138861037429, "grad_norm": 0.029932485775150907, "learning_rate": 0.00046953690499654477, "loss": 0.5271, "step": 22355 }, { "epoch": 1.1351677222017744, "grad_norm": 0.02306648216314524, "learning_rate": 0.0004693157935611803, "loss": 0.4831, "step": 22360 }, { "epoch": 1.1354215582998057, "grad_norm": 0.024003978824830057, "learning_rate": 0.0004690946881489726, "loss": 0.4986, "step": 22365 }, { "epoch": 1.1356753943978373, "grad_norm": 0.021905055146049835, "learning_rate": 0.00046887358880332345, "loss": 0.5525, "step": 22370 }, { "epoch": 1.1359292304958688, "grad_norm": 0.022848413649515294, "learning_rate": 0.00046865249556763344, "loss": 0.529, "step": 22375 }, { "epoch": 1.1361830665939003, "grad_norm": 0.03605177144637833, "learning_rate": 0.0004684314084853024, "loss": 0.4814, "step": 22380 }, { "epoch": 1.1364369026919319, "grad_norm": 0.056644899234727326, "learning_rate": 0.0004682103275997284, "loss": 0.546, "step": 22385 }, { "epoch": 1.1366907387899634, "grad_norm": 0.032741353664718975, "learning_rate": 0.00046798925295430863, "loss": 0.492, "step": 22390 }, { "epoch": 1.1369445748879947, "grad_norm": 0.024068036984486157, "learning_rate": 0.00046776818459243874, "loss": 0.4783, "step": 22395 }, { "epoch": 1.1371984109860263, "grad_norm": 0.026300065257889048, "learning_rate": 0.0004675471225575136, "loss": 0.5042, "step": 22400 }, { "epoch": 1.1374522470840578, "grad_norm": 0.026588922808316026, "learning_rate": 0.00046732606689292637, "loss": 0.4825, "step": 22405 }, { "epoch": 1.1377060831820893, "grad_norm": 0.03082021206138233, "learning_rate": 0.00046710501764206933, "loss": 0.4886, "step": 22410 }, { "epoch": 1.1379599192801209, "grad_norm": 0.02702663439746158, "learning_rate": 0.0004668839748483332, "loss": 0.4956, "step": 22415 }, { "epoch": 1.1382137553781524, "grad_norm": 0.02770867084856874, "learning_rate": 0.0004666629385551078, "loss": 0.5162, "step": 22420 }, { "epoch": 1.138467591476184, "grad_norm": 0.02593191401749543, "learning_rate": 0.0004664419088057812, "loss": 0.5222, "step": 22425 }, { "epoch": 1.1387214275742152, "grad_norm": 0.025067133006483912, "learning_rate": 0.0004662208856437405, "loss": 0.5045, "step": 22430 }, { "epoch": 1.1389752636722468, "grad_norm": 0.02386909656610933, "learning_rate": 0.00046599986911237135, "loss": 0.4937, "step": 22435 }, { "epoch": 1.1392290997702783, "grad_norm": 0.02308227237038335, "learning_rate": 0.00046577885925505857, "loss": 0.5298, "step": 22440 }, { "epoch": 1.1394829358683098, "grad_norm": 0.022608086605042318, "learning_rate": 0.00046555785611518505, "loss": 0.5098, "step": 22445 }, { "epoch": 1.1397367719663414, "grad_norm": 0.022743886474204174, "learning_rate": 0.0004653368597361326, "loss": 0.5065, "step": 22450 }, { "epoch": 1.139990608064373, "grad_norm": 0.021721050273651527, "learning_rate": 0.00046511587016128173, "loss": 0.5221, "step": 22455 }, { "epoch": 1.1402444441624042, "grad_norm": 0.028647441808002567, "learning_rate": 0.0004648948874340115, "loss": 0.5102, "step": 22460 }, { "epoch": 1.1404982802604358, "grad_norm": 0.023629836872766076, "learning_rate": 0.0004646739115976999, "loss": 0.4838, "step": 22465 }, { "epoch": 1.1407521163584673, "grad_norm": 0.022987645202448132, "learning_rate": 0.00046445294269572326, "loss": 0.5101, "step": 22470 }, { "epoch": 1.1410059524564988, "grad_norm": 0.028359397619188362, "learning_rate": 0.0004642319807714567, "loss": 0.49, "step": 22475 }, { "epoch": 1.1412597885545304, "grad_norm": 0.024714621127131544, "learning_rate": 0.0004640110258682739, "loss": 0.5394, "step": 22480 }, { "epoch": 1.141513624652562, "grad_norm": 0.023929726037479372, "learning_rate": 0.0004637900780295472, "loss": 0.4957, "step": 22485 }, { "epoch": 1.1417674607505934, "grad_norm": 0.02145690677992983, "learning_rate": 0.0004635691372986477, "loss": 0.455, "step": 22490 }, { "epoch": 1.142021296848625, "grad_norm": 0.020947420613049612, "learning_rate": 0.0004633482037189447, "loss": 0.4911, "step": 22495 }, { "epoch": 1.1422751329466563, "grad_norm": 0.02298839539671349, "learning_rate": 0.00046312727733380666, "loss": 0.5216, "step": 22500 }, { "epoch": 1.1425289690446878, "grad_norm": 0.021966101109555183, "learning_rate": 0.0004629063581866002, "loss": 0.5083, "step": 22505 }, { "epoch": 1.1427828051427193, "grad_norm": 0.03686984297075965, "learning_rate": 0.00046268544632069064, "loss": 0.5224, "step": 22510 }, { "epoch": 1.1430366412407509, "grad_norm": 0.030756815032274516, "learning_rate": 0.00046246454177944194, "loss": 0.508, "step": 22515 }, { "epoch": 1.1432904773387824, "grad_norm": 0.05181483793136802, "learning_rate": 0.0004622436446062164, "loss": 0.489, "step": 22520 }, { "epoch": 1.1435443134368137, "grad_norm": 0.03958359517287309, "learning_rate": 0.0004620227548443752, "loss": 0.5243, "step": 22525 }, { "epoch": 1.1437981495348453, "grad_norm": 0.03427906955795205, "learning_rate": 0.0004618018725372778, "loss": 0.4834, "step": 22530 }, { "epoch": 1.1440519856328768, "grad_norm": 0.023909407473577712, "learning_rate": 0.0004615809977282823, "loss": 0.4969, "step": 22535 }, { "epoch": 1.1443058217309083, "grad_norm": 0.026640110354467325, "learning_rate": 0.0004613601304607454, "loss": 0.5131, "step": 22540 }, { "epoch": 1.1445596578289399, "grad_norm": 0.0237982378487044, "learning_rate": 0.0004611392707780222, "loss": 0.4965, "step": 22545 }, { "epoch": 1.1448134939269714, "grad_norm": 0.03386310576456612, "learning_rate": 0.00046091841872346627, "loss": 0.5167, "step": 22550 }, { "epoch": 1.145067330025003, "grad_norm": 0.02459001107447409, "learning_rate": 0.00046069757434042975, "loss": 0.5098, "step": 22555 }, { "epoch": 1.1453211661230345, "grad_norm": 0.02750293868217048, "learning_rate": 0.0004604767376722635, "loss": 0.5183, "step": 22560 }, { "epoch": 1.1455750022210658, "grad_norm": 0.02465764415596952, "learning_rate": 0.0004602559087623166, "loss": 0.5346, "step": 22565 }, { "epoch": 1.1458288383190973, "grad_norm": 0.021840825134174797, "learning_rate": 0.0004600350876539366, "loss": 0.4734, "step": 22570 }, { "epoch": 1.1460826744171289, "grad_norm": 0.022144355795008893, "learning_rate": 0.00045981427439046956, "loss": 0.4902, "step": 22575 }, { "epoch": 1.1463365105151604, "grad_norm": 0.02333513320332411, "learning_rate": 0.00045959346901526006, "loss": 0.4993, "step": 22580 }, { "epoch": 1.146590346613192, "grad_norm": 0.02599869001595599, "learning_rate": 0.0004593726715716511, "loss": 0.5261, "step": 22585 }, { "epoch": 1.1468441827112235, "grad_norm": 0.03686029455414443, "learning_rate": 0.00045915188210298406, "loss": 0.4647, "step": 22590 }, { "epoch": 1.1470980188092548, "grad_norm": 0.027385432815099645, "learning_rate": 0.00045893110065259893, "loss": 0.5352, "step": 22595 }, { "epoch": 1.1473518549072863, "grad_norm": 0.02888582352407677, "learning_rate": 0.0004587103272638339, "loss": 0.4924, "step": 22600 }, { "epoch": 1.1476056910053178, "grad_norm": 0.023625257936494698, "learning_rate": 0.0004584895619800257, "loss": 0.505, "step": 22605 }, { "epoch": 1.1478595271033494, "grad_norm": 0.0277603791896047, "learning_rate": 0.00045826880484450946, "loss": 0.5021, "step": 22610 }, { "epoch": 1.148113363201381, "grad_norm": 0.028467365276233746, "learning_rate": 0.0004580480559006186, "loss": 0.497, "step": 22615 }, { "epoch": 1.1483671992994124, "grad_norm": 0.03687524663675464, "learning_rate": 0.0004578273151916853, "loss": 0.4686, "step": 22620 }, { "epoch": 1.148621035397444, "grad_norm": 0.033659979229860255, "learning_rate": 0.0004576065827610397, "loss": 0.489, "step": 22625 }, { "epoch": 1.1488748714954753, "grad_norm": 0.025235412390316816, "learning_rate": 0.0004573858586520105, "loss": 0.5098, "step": 22630 }, { "epoch": 1.1491287075935068, "grad_norm": 0.03243044981273508, "learning_rate": 0.0004571651429079247, "loss": 0.4934, "step": 22635 }, { "epoch": 1.1493825436915384, "grad_norm": 0.022833415951073927, "learning_rate": 0.00045694443557210777, "loss": 0.5142, "step": 22640 }, { "epoch": 1.14963637978957, "grad_norm": 0.03068941635429747, "learning_rate": 0.00045672373668788336, "loss": 0.466, "step": 22645 }, { "epoch": 1.1498902158876014, "grad_norm": 0.0246266009056399, "learning_rate": 0.0004565030462985737, "loss": 0.5178, "step": 22650 }, { "epoch": 1.150144051985633, "grad_norm": 0.030534870475012552, "learning_rate": 0.00045628236444749905, "loss": 0.4653, "step": 22655 }, { "epoch": 1.1503978880836643, "grad_norm": 0.024906564205494433, "learning_rate": 0.0004560616911779783, "loss": 0.5041, "step": 22660 }, { "epoch": 1.1506517241816958, "grad_norm": 0.03444194262921391, "learning_rate": 0.00045584102653332845, "loss": 0.4884, "step": 22665 }, { "epoch": 1.1509055602797273, "grad_norm": 0.036461759923969016, "learning_rate": 0.0004556203705568648, "loss": 0.5029, "step": 22670 }, { "epoch": 1.1511593963777589, "grad_norm": 0.04594830004230979, "learning_rate": 0.0004553997232919009, "loss": 0.5145, "step": 22675 }, { "epoch": 1.1514132324757904, "grad_norm": 0.03261588655559848, "learning_rate": 0.00045517908478174917, "loss": 0.5217, "step": 22680 }, { "epoch": 1.151667068573822, "grad_norm": 0.0242254240936562, "learning_rate": 0.0004549584550697196, "loss": 0.4833, "step": 22685 }, { "epoch": 1.1519209046718535, "grad_norm": 0.023948731317067408, "learning_rate": 0.00045473783419912057, "loss": 0.473, "step": 22690 }, { "epoch": 1.1521747407698848, "grad_norm": 0.02369769333893661, "learning_rate": 0.000454517222213259, "loss": 0.4996, "step": 22695 }, { "epoch": 1.1524285768679163, "grad_norm": 0.0235533696148796, "learning_rate": 0.00045429661915543995, "loss": 0.5215, "step": 22700 }, { "epoch": 1.1526824129659479, "grad_norm": 0.029693427304754598, "learning_rate": 0.0004540760250689666, "loss": 0.5202, "step": 22705 }, { "epoch": 1.1529362490639794, "grad_norm": 0.026223364679563748, "learning_rate": 0.0004538554399971406, "loss": 0.499, "step": 22710 }, { "epoch": 1.153190085162011, "grad_norm": 0.022238545679295847, "learning_rate": 0.00045363486398326147, "loss": 0.4906, "step": 22715 }, { "epoch": 1.1534439212600425, "grad_norm": 0.02977765503263147, "learning_rate": 0.0004534142970706274, "loss": 0.529, "step": 22720 }, { "epoch": 1.1536977573580738, "grad_norm": 0.022706195045235984, "learning_rate": 0.0004531937393025344, "loss": 0.5039, "step": 22725 }, { "epoch": 1.1539515934561053, "grad_norm": 0.02434660390615717, "learning_rate": 0.000452973190722277, "loss": 0.5186, "step": 22730 }, { "epoch": 1.1542054295541369, "grad_norm": 0.0213880961035262, "learning_rate": 0.00045275265137314754, "loss": 0.5202, "step": 22735 }, { "epoch": 1.1544592656521684, "grad_norm": 0.024719044730757197, "learning_rate": 0.0004525321212984372, "loss": 0.5122, "step": 22740 }, { "epoch": 1.1547131017502, "grad_norm": 0.024745026751628207, "learning_rate": 0.00045231160054143467, "loss": 0.5046, "step": 22745 }, { "epoch": 1.1549669378482315, "grad_norm": 0.025339355379831775, "learning_rate": 0.00045209108914542716, "loss": 0.5085, "step": 22750 }, { "epoch": 1.155220773946263, "grad_norm": 0.03885787120756748, "learning_rate": 0.0004518705871537, "loss": 0.5161, "step": 22755 }, { "epoch": 1.1554746100442943, "grad_norm": 0.022406997994527122, "learning_rate": 0.0004516500946095365, "loss": 0.4808, "step": 22760 }, { "epoch": 1.1557284461423258, "grad_norm": 0.026005826539297415, "learning_rate": 0.0004514296115562183, "loss": 0.5135, "step": 22765 }, { "epoch": 1.1559822822403574, "grad_norm": 0.03539270632977643, "learning_rate": 0.0004512091380370251, "loss": 0.4971, "step": 22770 }, { "epoch": 1.156236118338389, "grad_norm": 0.029564361875585962, "learning_rate": 0.00045098867409523486, "loss": 0.4924, "step": 22775 }, { "epoch": 1.1564899544364204, "grad_norm": 0.02358096736186789, "learning_rate": 0.0004507682197741235, "loss": 0.4856, "step": 22780 }, { "epoch": 1.156743790534452, "grad_norm": 0.028230367180303174, "learning_rate": 0.000450547775116965, "loss": 0.4659, "step": 22785 }, { "epoch": 1.1569976266324833, "grad_norm": 0.02700559394625303, "learning_rate": 0.00045032734016703163, "loss": 0.4845, "step": 22790 }, { "epoch": 1.1572514627305148, "grad_norm": 0.026220476689667822, "learning_rate": 0.0004501069149675937, "loss": 0.5081, "step": 22795 }, { "epoch": 1.1575052988285464, "grad_norm": 0.023325253274810016, "learning_rate": 0.00044988649956191943, "loss": 0.5124, "step": 22800 }, { "epoch": 1.157759134926578, "grad_norm": 0.02964466344942591, "learning_rate": 0.00044966609399327544, "loss": 0.5034, "step": 22805 }, { "epoch": 1.1580129710246094, "grad_norm": 0.025629931037714163, "learning_rate": 0.0004494456983049263, "loss": 0.5159, "step": 22810 }, { "epoch": 1.158266807122641, "grad_norm": 0.02818068589305215, "learning_rate": 0.0004492253125401344, "loss": 0.517, "step": 22815 }, { "epoch": 1.1585206432206725, "grad_norm": 0.025937834093816534, "learning_rate": 0.00044900493674216043, "loss": 0.535, "step": 22820 }, { "epoch": 1.158774479318704, "grad_norm": 0.021906620341182587, "learning_rate": 0.00044878457095426307, "loss": 0.4881, "step": 22825 }, { "epoch": 1.1590283154167353, "grad_norm": 0.023326406428330577, "learning_rate": 0.000448564215219699, "loss": 0.4898, "step": 22830 }, { "epoch": 1.1592821515147669, "grad_norm": 0.025849041107935036, "learning_rate": 0.00044834386958172295, "loss": 0.5198, "step": 22835 }, { "epoch": 1.1595359876127984, "grad_norm": 0.02594641672975575, "learning_rate": 0.00044812353408358777, "loss": 0.4972, "step": 22840 }, { "epoch": 1.15978982371083, "grad_norm": 0.0330275583093027, "learning_rate": 0.0004479032087685441, "loss": 0.5119, "step": 22845 }, { "epoch": 1.1600436598088615, "grad_norm": 0.023604960673479395, "learning_rate": 0.00044768289367984077, "loss": 0.5103, "step": 22850 }, { "epoch": 1.1602974959068928, "grad_norm": 0.04128228371869332, "learning_rate": 0.0004474625888607245, "loss": 0.5197, "step": 22855 }, { "epoch": 1.1605513320049243, "grad_norm": 0.020903884654111818, "learning_rate": 0.00044724229435443973, "loss": 0.5284, "step": 22860 }, { "epoch": 1.1608051681029559, "grad_norm": 0.031129567663155357, "learning_rate": 0.0004470220102042298, "loss": 0.5068, "step": 22865 }, { "epoch": 1.1610590042009874, "grad_norm": 0.020499848777966484, "learning_rate": 0.00044680173645333504, "loss": 0.5014, "step": 22870 }, { "epoch": 1.161312840299019, "grad_norm": 0.024298947458801685, "learning_rate": 0.0004465814731449941, "loss": 0.4989, "step": 22875 }, { "epoch": 1.1615666763970505, "grad_norm": 0.02238268280787287, "learning_rate": 0.0004463612203224436, "loss": 0.5114, "step": 22880 }, { "epoch": 1.161820512495082, "grad_norm": 0.030230746480821854, "learning_rate": 0.0004461409780289181, "loss": 0.4876, "step": 22885 }, { "epoch": 1.1620743485931135, "grad_norm": 0.024028109056989183, "learning_rate": 0.0004459207463076499, "loss": 0.4815, "step": 22890 }, { "epoch": 1.1623281846911449, "grad_norm": 0.021016127843322022, "learning_rate": 0.00044570052520186956, "loss": 0.5067, "step": 22895 }, { "epoch": 1.1625820207891764, "grad_norm": 0.029561997978537945, "learning_rate": 0.00044548031475480533, "loss": 0.5074, "step": 22900 }, { "epoch": 1.162835856887208, "grad_norm": 0.022169964271249176, "learning_rate": 0.0004452601150096834, "loss": 0.522, "step": 22905 }, { "epoch": 1.1630896929852395, "grad_norm": 0.024800692528682368, "learning_rate": 0.000445039926009728, "loss": 0.5108, "step": 22910 }, { "epoch": 1.163343529083271, "grad_norm": 0.02591690066399758, "learning_rate": 0.00044481974779816096, "loss": 0.5044, "step": 22915 }, { "epoch": 1.1635973651813025, "grad_norm": 0.025486400367960977, "learning_rate": 0.00044459958041820217, "loss": 0.5242, "step": 22920 }, { "epoch": 1.1638512012793338, "grad_norm": 2.4946821079686567, "learning_rate": 0.0004443794239130696, "loss": 0.7984, "step": 22925 }, { "epoch": 1.1641050373773654, "grad_norm": 0.06100607110479418, "learning_rate": 0.00044415927832597865, "loss": 0.4905, "step": 22930 }, { "epoch": 1.164358873475397, "grad_norm": 0.025761932513384978, "learning_rate": 0.00044393914370014295, "loss": 0.4968, "step": 22935 }, { "epoch": 1.1646127095734284, "grad_norm": 0.02625663236193525, "learning_rate": 0.00044371902007877374, "loss": 0.4951, "step": 22940 }, { "epoch": 1.16486654567146, "grad_norm": 0.02683158448012171, "learning_rate": 0.0004434989075050802, "loss": 0.5164, "step": 22945 }, { "epoch": 1.1651203817694915, "grad_norm": 0.03126770701983838, "learning_rate": 0.0004432788060222694, "loss": 0.4774, "step": 22950 }, { "epoch": 1.165374217867523, "grad_norm": 0.02778926944239494, "learning_rate": 0.00044305871567354606, "loss": 0.5266, "step": 22955 }, { "epoch": 1.1656280539655544, "grad_norm": 0.02199391640791797, "learning_rate": 0.0004428386365021129, "loss": 0.5053, "step": 22960 }, { "epoch": 1.165881890063586, "grad_norm": 0.02185784711754856, "learning_rate": 0.0004426185685511703, "loss": 0.5215, "step": 22965 }, { "epoch": 1.1661357261616174, "grad_norm": 0.03374512683832721, "learning_rate": 0.00044239851186391653, "loss": 0.4848, "step": 22970 }, { "epoch": 1.166389562259649, "grad_norm": 0.029868509213157268, "learning_rate": 0.00044217846648354764, "loss": 0.4479, "step": 22975 }, { "epoch": 1.1666433983576805, "grad_norm": 0.04777615390254651, "learning_rate": 0.00044195843245325723, "loss": 0.4859, "step": 22980 }, { "epoch": 1.166897234455712, "grad_norm": 0.023512758989838855, "learning_rate": 0.0004417384098162373, "loss": 0.5342, "step": 22985 }, { "epoch": 1.1671510705537433, "grad_norm": 0.02229369139136557, "learning_rate": 0.00044151839861567694, "loss": 0.5134, "step": 22990 }, { "epoch": 1.1674049066517749, "grad_norm": 0.026894096234455038, "learning_rate": 0.0004412983988947633, "loss": 0.4969, "step": 22995 }, { "epoch": 1.1676587427498064, "grad_norm": 0.02589303820825699, "learning_rate": 0.0004410784106966812, "loss": 0.4886, "step": 23000 }, { "epoch": 1.167912578847838, "grad_norm": 0.02124381698964111, "learning_rate": 0.0004408584340646132, "loss": 0.523, "step": 23005 }, { "epoch": 1.1681664149458695, "grad_norm": 0.03791510606931018, "learning_rate": 0.0004406384690417397, "loss": 0.5113, "step": 23010 }, { "epoch": 1.168420251043901, "grad_norm": 0.02862399836284424, "learning_rate": 0.0004404185156712387, "loss": 0.5179, "step": 23015 }, { "epoch": 1.1686740871419325, "grad_norm": 0.030684358273063143, "learning_rate": 0.00044019857399628593, "loss": 0.4732, "step": 23020 }, { "epoch": 1.1689279232399639, "grad_norm": 0.023354500795241345, "learning_rate": 0.0004399786440600549, "loss": 0.5008, "step": 23025 }, { "epoch": 1.1691817593379954, "grad_norm": 0.024691152727817047, "learning_rate": 0.0004397587259057166, "loss": 0.4704, "step": 23030 }, { "epoch": 1.169435595436027, "grad_norm": 0.023111558411901535, "learning_rate": 0.0004395388195764401, "loss": 0.4923, "step": 23035 }, { "epoch": 1.1696894315340585, "grad_norm": 0.023376429033597424, "learning_rate": 0.00043931892511539164, "loss": 0.4986, "step": 23040 }, { "epoch": 1.16994326763209, "grad_norm": 0.021932488535556502, "learning_rate": 0.0004390990425657357, "loss": 0.5079, "step": 23045 }, { "epoch": 1.1701971037301215, "grad_norm": 0.02295870523021244, "learning_rate": 0.00043887917197063395, "loss": 0.4881, "step": 23050 }, { "epoch": 1.1704509398281528, "grad_norm": 0.025540059947805397, "learning_rate": 0.00043865931337324596, "loss": 0.4948, "step": 23055 }, { "epoch": 1.1707047759261844, "grad_norm": 0.022156403341379288, "learning_rate": 0.0004384394668167288, "loss": 0.4972, "step": 23060 }, { "epoch": 1.170958612024216, "grad_norm": 0.02786034852874224, "learning_rate": 0.00043821963234423736, "loss": 0.5261, "step": 23065 }, { "epoch": 1.1712124481222475, "grad_norm": 0.04119583380859592, "learning_rate": 0.00043799980999892395, "loss": 0.5146, "step": 23070 }, { "epoch": 1.171466284220279, "grad_norm": 0.023409857934518236, "learning_rate": 0.00043777999982393866, "loss": 0.5204, "step": 23075 }, { "epoch": 1.1717201203183105, "grad_norm": 0.02598832215798176, "learning_rate": 0.00043756020186242915, "loss": 0.4982, "step": 23080 }, { "epoch": 1.171973956416342, "grad_norm": 0.03531536654582589, "learning_rate": 0.0004373404161575406, "loss": 0.4819, "step": 23085 }, { "epoch": 1.1722277925143736, "grad_norm": 0.022966689799931606, "learning_rate": 0.00043712064275241584, "loss": 0.5127, "step": 23090 }, { "epoch": 1.172481628612405, "grad_norm": 0.022269854028272786, "learning_rate": 0.00043690088169019535, "loss": 0.4744, "step": 23095 }, { "epoch": 1.1727354647104364, "grad_norm": 0.025940748821848634, "learning_rate": 0.0004366811330140169, "loss": 0.4777, "step": 23100 }, { "epoch": 1.172989300808468, "grad_norm": 0.022624731878350762, "learning_rate": 0.0004364613967670165, "loss": 0.4943, "step": 23105 }, { "epoch": 1.1732431369064995, "grad_norm": 0.023056585507083997, "learning_rate": 0.0004362416729923271, "loss": 0.495, "step": 23110 }, { "epoch": 1.173496973004531, "grad_norm": 0.03515992882856805, "learning_rate": 0.0004360219617330792, "loss": 0.5044, "step": 23115 }, { "epoch": 1.1737508091025624, "grad_norm": 0.02197279853949111, "learning_rate": 0.00043580226303240125, "loss": 0.5008, "step": 23120 }, { "epoch": 1.1740046452005939, "grad_norm": 0.026489459382601607, "learning_rate": 0.0004355825769334189, "loss": 0.5203, "step": 23125 }, { "epoch": 1.1742584812986254, "grad_norm": 0.028795879529102696, "learning_rate": 0.00043536290347925545, "loss": 0.4819, "step": 23130 }, { "epoch": 1.174512317396657, "grad_norm": 0.023530562218747813, "learning_rate": 0.0004351432427130316, "loss": 0.5285, "step": 23135 }, { "epoch": 1.1747661534946885, "grad_norm": 0.026020705705209802, "learning_rate": 0.0004349235946778659, "loss": 0.5238, "step": 23140 }, { "epoch": 1.17501998959272, "grad_norm": 0.0214632854435188, "learning_rate": 0.000434703959416874, "loss": 0.4631, "step": 23145 }, { "epoch": 1.1752738256907516, "grad_norm": 0.025379100473350413, "learning_rate": 0.0004344843369731692, "loss": 0.479, "step": 23150 }, { "epoch": 1.175527661788783, "grad_norm": 0.02437051797462676, "learning_rate": 0.00043426472738986233, "loss": 0.512, "step": 23155 }, { "epoch": 1.1757814978868144, "grad_norm": 0.02897066685669499, "learning_rate": 0.00043404513071006157, "loss": 0.5283, "step": 23160 }, { "epoch": 1.176035333984846, "grad_norm": 0.02546901345942979, "learning_rate": 0.0004338255469768728, "loss": 0.4924, "step": 23165 }, { "epoch": 1.1762891700828775, "grad_norm": 0.02368301627174018, "learning_rate": 0.0004336059762333992, "loss": 0.5317, "step": 23170 }, { "epoch": 1.176543006180909, "grad_norm": 0.022864082081725436, "learning_rate": 0.0004333864185227413, "loss": 0.5158, "step": 23175 }, { "epoch": 1.1767968422789405, "grad_norm": 0.02715490428058028, "learning_rate": 0.0004331668738879973, "loss": 0.5156, "step": 23180 }, { "epoch": 1.177050678376972, "grad_norm": 0.021324658368549614, "learning_rate": 0.00043294734237226263, "loss": 0.5044, "step": 23185 }, { "epoch": 1.1773045144750034, "grad_norm": 0.024522981714582275, "learning_rate": 0.0004327278240186303, "loss": 0.5064, "step": 23190 }, { "epoch": 1.177558350573035, "grad_norm": 0.03465334835103066, "learning_rate": 0.0004325083188701906, "loss": 0.4927, "step": 23195 }, { "epoch": 1.1778121866710665, "grad_norm": 0.02331481482855284, "learning_rate": 0.0004322888269700313, "loss": 0.5376, "step": 23200 }, { "epoch": 1.178066022769098, "grad_norm": 0.020402232394778678, "learning_rate": 0.00043206934836123763, "loss": 0.4963, "step": 23205 }, { "epoch": 1.1783198588671295, "grad_norm": 0.029288417733953877, "learning_rate": 0.0004318498830868921, "loss": 0.4816, "step": 23210 }, { "epoch": 1.178573694965161, "grad_norm": 0.026993170435865486, "learning_rate": 0.0004316304311900746, "loss": 0.4947, "step": 23215 }, { "epoch": 1.1788275310631926, "grad_norm": 0.029826322623069505, "learning_rate": 0.00043141099271386236, "loss": 0.5017, "step": 23220 }, { "epoch": 1.179081367161224, "grad_norm": 0.02839469569776015, "learning_rate": 0.0004311915677013304, "loss": 0.5048, "step": 23225 }, { "epoch": 1.1793352032592554, "grad_norm": 0.03104613930338224, "learning_rate": 0.00043097215619555053, "loss": 0.4949, "step": 23230 }, { "epoch": 1.179589039357287, "grad_norm": 0.023927607690992232, "learning_rate": 0.00043075275823959217, "loss": 0.4748, "step": 23235 }, { "epoch": 1.1798428754553185, "grad_norm": 0.027181678664027754, "learning_rate": 0.000430533373876522, "loss": 0.5051, "step": 23240 }, { "epoch": 1.18009671155335, "grad_norm": 0.025312092368077646, "learning_rate": 0.0004303140031494042, "loss": 0.5043, "step": 23245 }, { "epoch": 1.1803505476513816, "grad_norm": 0.024109005993253833, "learning_rate": 0.0004300946461012999, "loss": 0.4829, "step": 23250 }, { "epoch": 1.180604383749413, "grad_norm": 0.031076290791344705, "learning_rate": 0.0004298753027752681, "loss": 0.5013, "step": 23255 }, { "epoch": 1.1808582198474444, "grad_norm": 0.02301069082386406, "learning_rate": 0.00042965597321436454, "loss": 0.496, "step": 23260 }, { "epoch": 1.181112055945476, "grad_norm": 0.02834869538619908, "learning_rate": 0.00042943665746164274, "loss": 0.4945, "step": 23265 }, { "epoch": 1.1813658920435075, "grad_norm": 0.040647134876522086, "learning_rate": 0.0004292173555601531, "loss": 0.5038, "step": 23270 }, { "epoch": 1.181619728141539, "grad_norm": 0.047047098373209854, "learning_rate": 0.00042899806755294364, "loss": 0.5186, "step": 23275 }, { "epoch": 1.1818735642395706, "grad_norm": 0.023835906470185797, "learning_rate": 0.00042877879348305925, "loss": 0.4914, "step": 23280 }, { "epoch": 1.182127400337602, "grad_norm": 0.3198338312283238, "learning_rate": 0.0004285595333935427, "loss": 0.4655, "step": 23285 }, { "epoch": 1.1823812364356334, "grad_norm": 0.08480024872059884, "learning_rate": 0.0004283402873274334, "loss": 0.5015, "step": 23290 }, { "epoch": 1.182635072533665, "grad_norm": 0.044275755637726996, "learning_rate": 0.0004281210553277684, "loss": 0.5064, "step": 23295 }, { "epoch": 1.1828889086316965, "grad_norm": 0.03453100961430329, "learning_rate": 0.0004279018374375817, "loss": 0.518, "step": 23300 }, { "epoch": 1.183142744729728, "grad_norm": 0.02331481023981138, "learning_rate": 0.00042768263369990486, "loss": 0.5057, "step": 23305 }, { "epoch": 1.1833965808277596, "grad_norm": 0.02423433591461226, "learning_rate": 0.00042746344415776634, "loss": 0.5355, "step": 23310 }, { "epoch": 1.183650416925791, "grad_norm": 0.020158746070223478, "learning_rate": 0.00042724426885419197, "loss": 0.5056, "step": 23315 }, { "epoch": 1.1839042530238224, "grad_norm": 0.02346254177392359, "learning_rate": 0.0004270251078322048, "loss": 0.4555, "step": 23320 }, { "epoch": 1.184158089121854, "grad_norm": 0.02215680364485662, "learning_rate": 0.000426805961134825, "loss": 0.4967, "step": 23325 }, { "epoch": 1.1844119252198855, "grad_norm": 0.025983453386552117, "learning_rate": 0.00042658682880507005, "loss": 0.488, "step": 23330 }, { "epoch": 1.184665761317917, "grad_norm": 0.02558287853502543, "learning_rate": 0.0004263677108859545, "loss": 0.5222, "step": 23335 }, { "epoch": 1.1849195974159485, "grad_norm": 0.029862575005858382, "learning_rate": 0.0004261486074204899, "loss": 0.5155, "step": 23340 }, { "epoch": 1.18517343351398, "grad_norm": 0.03365037560913944, "learning_rate": 0.0004259295184516855, "loss": 0.5005, "step": 23345 }, { "epoch": 1.1854272696120116, "grad_norm": 0.02324079187444754, "learning_rate": 0.00042571044402254734, "loss": 0.5356, "step": 23350 }, { "epoch": 1.1856811057100431, "grad_norm": 0.023311446376579104, "learning_rate": 0.00042549138417607855, "loss": 0.5226, "step": 23355 }, { "epoch": 1.1859349418080745, "grad_norm": 0.02264754449055334, "learning_rate": 0.0004252723389552794, "loss": 0.4857, "step": 23360 }, { "epoch": 1.186188777906106, "grad_norm": 0.024792949347000763, "learning_rate": 0.0004250533084031474, "loss": 0.4847, "step": 23365 }, { "epoch": 1.1864426140041375, "grad_norm": 0.02365423995133714, "learning_rate": 0.0004248342925626773, "loss": 0.5237, "step": 23370 }, { "epoch": 1.186696450102169, "grad_norm": 0.025122984233723788, "learning_rate": 0.0004246152914768607, "loss": 0.4653, "step": 23375 }, { "epoch": 1.1869502862002006, "grad_norm": 0.037495988695979386, "learning_rate": 0.00042439630518868645, "loss": 0.4927, "step": 23380 }, { "epoch": 1.187204122298232, "grad_norm": 0.02744207164178393, "learning_rate": 0.00042417733374114044, "loss": 0.4939, "step": 23385 }, { "epoch": 1.1874579583962634, "grad_norm": 0.023497982340652368, "learning_rate": 0.00042395837717720564, "loss": 0.517, "step": 23390 }, { "epoch": 1.187711794494295, "grad_norm": 0.022057752647203964, "learning_rate": 0.0004237394355398622, "loss": 0.4763, "step": 23395 }, { "epoch": 1.1879656305923265, "grad_norm": 0.022675614809165457, "learning_rate": 0.0004235205088720872, "loss": 0.4862, "step": 23400 }, { "epoch": 1.188219466690358, "grad_norm": 0.027088398303537622, "learning_rate": 0.000423301597216855, "loss": 0.4862, "step": 23405 }, { "epoch": 1.1884733027883896, "grad_norm": 0.021433012116146354, "learning_rate": 0.0004230827006171367, "loss": 0.4712, "step": 23410 }, { "epoch": 1.1887271388864211, "grad_norm": 0.026295761373166058, "learning_rate": 0.00042286381911590075, "loss": 0.5153, "step": 23415 }, { "epoch": 1.1889809749844527, "grad_norm": 0.0245055560686366, "learning_rate": 0.0004226449527561124, "loss": 0.4915, "step": 23420 }, { "epoch": 1.189234811082484, "grad_norm": 0.023358170637321662, "learning_rate": 0.0004224261015807341, "loss": 0.5051, "step": 23425 }, { "epoch": 1.1894886471805155, "grad_norm": 0.02494520282712566, "learning_rate": 0.00042220726563272514, "loss": 0.5168, "step": 23430 }, { "epoch": 1.189742483278547, "grad_norm": 0.026188639690865594, "learning_rate": 0.0004219884449550421, "loss": 0.5007, "step": 23435 }, { "epoch": 1.1899963193765786, "grad_norm": 0.02181777095566158, "learning_rate": 0.0004217696395906381, "loss": 0.4987, "step": 23440 }, { "epoch": 1.19025015547461, "grad_norm": 0.022429924845303997, "learning_rate": 0.00042155084958246387, "loss": 0.5191, "step": 23445 }, { "epoch": 1.1905039915726416, "grad_norm": 0.022466417610871727, "learning_rate": 0.0004213320749734665, "loss": 0.507, "step": 23450 }, { "epoch": 1.190757827670673, "grad_norm": 0.03174964407796664, "learning_rate": 0.0004211133158065906, "loss": 0.487, "step": 23455 }, { "epoch": 1.1910116637687045, "grad_norm": 0.02782479444320585, "learning_rate": 0.0004208945721247772, "loss": 0.4833, "step": 23460 }, { "epoch": 1.191265499866736, "grad_norm": 0.023164893847604912, "learning_rate": 0.0004206758439709649, "loss": 0.5069, "step": 23465 }, { "epoch": 1.1915193359647676, "grad_norm": 0.02156991148966579, "learning_rate": 0.00042045713138808894, "loss": 0.5061, "step": 23470 }, { "epoch": 1.191773172062799, "grad_norm": 0.03206441910648273, "learning_rate": 0.0004202384344190814, "loss": 0.4991, "step": 23475 }, { "epoch": 1.1920270081608306, "grad_norm": 0.033785505523490716, "learning_rate": 0.00042001975310687134, "loss": 0.5094, "step": 23480 }, { "epoch": 1.1922808442588622, "grad_norm": 0.029304316918651336, "learning_rate": 0.0004198010874943849, "loss": 0.4911, "step": 23485 }, { "epoch": 1.1925346803568935, "grad_norm": 0.024414079665053167, "learning_rate": 0.0004195824376245451, "loss": 0.5014, "step": 23490 }, { "epoch": 1.192788516454925, "grad_norm": 0.02755089300926629, "learning_rate": 0.0004193638035402717, "loss": 0.4836, "step": 23495 }, { "epoch": 1.1930423525529565, "grad_norm": 0.04448377045587385, "learning_rate": 0.0004191451852844816, "loss": 0.5055, "step": 23500 }, { "epoch": 1.193296188650988, "grad_norm": 0.02563765603314885, "learning_rate": 0.00041892658290008835, "loss": 0.4757, "step": 23505 }, { "epoch": 1.1935500247490196, "grad_norm": 0.029729997745895555, "learning_rate": 0.00041870799643000257, "loss": 0.5031, "step": 23510 }, { "epoch": 1.1938038608470511, "grad_norm": 0.022197332496704934, "learning_rate": 0.00041848942591713167, "loss": 0.493, "step": 23515 }, { "epoch": 1.1940576969450825, "grad_norm": 0.02508559588342066, "learning_rate": 0.0004182708714043799, "loss": 0.493, "step": 23520 }, { "epoch": 1.194311533043114, "grad_norm": 0.02801379075039443, "learning_rate": 0.0004180523329346486, "loss": 0.4848, "step": 23525 }, { "epoch": 1.1945653691411455, "grad_norm": 0.02487825457446648, "learning_rate": 0.00041783381055083565, "loss": 0.5065, "step": 23530 }, { "epoch": 1.194819205239177, "grad_norm": 0.024301005036306705, "learning_rate": 0.0004176153042958359, "loss": 0.4799, "step": 23535 }, { "epoch": 1.1950730413372086, "grad_norm": 0.028592227759142063, "learning_rate": 0.0004173968142125411, "loss": 0.4706, "step": 23540 }, { "epoch": 1.1953268774352401, "grad_norm": 0.020208642081283146, "learning_rate": 0.00041717834034383974, "loss": 0.4838, "step": 23545 }, { "epoch": 1.1955807135332717, "grad_norm": 0.025013814826272913, "learning_rate": 0.0004169598827326171, "loss": 0.5043, "step": 23550 }, { "epoch": 1.195834549631303, "grad_norm": 0.02413307493483816, "learning_rate": 0.0004167414414217554, "loss": 0.5207, "step": 23555 }, { "epoch": 1.1960883857293345, "grad_norm": 0.02524654414343768, "learning_rate": 0.0004165230164541335, "loss": 0.465, "step": 23560 }, { "epoch": 1.196342221827366, "grad_norm": 0.03571503975031509, "learning_rate": 0.00041630460787262717, "loss": 0.4715, "step": 23565 }, { "epoch": 1.1965960579253976, "grad_norm": 0.05081085862061478, "learning_rate": 0.00041608621572010896, "loss": 0.5229, "step": 23570 }, { "epoch": 1.1968498940234291, "grad_norm": 0.04184691240326009, "learning_rate": 0.0004158678400394481, "loss": 0.5048, "step": 23575 }, { "epoch": 1.1971037301214607, "grad_norm": 0.026686408386705083, "learning_rate": 0.00041564948087351053, "loss": 0.5156, "step": 23580 }, { "epoch": 1.197357566219492, "grad_norm": 0.03417816047008303, "learning_rate": 0.0004154311382651593, "loss": 0.4824, "step": 23585 }, { "epoch": 1.1976114023175235, "grad_norm": 0.026200514928376224, "learning_rate": 0.000415212812257254, "loss": 0.4909, "step": 23590 }, { "epoch": 1.197865238415555, "grad_norm": 0.033241782938311475, "learning_rate": 0.0004149945028926507, "loss": 0.4919, "step": 23595 }, { "epoch": 1.1981190745135866, "grad_norm": 0.023753551533967514, "learning_rate": 0.0004147762102142027, "loss": 0.4899, "step": 23600 }, { "epoch": 1.198372910611618, "grad_norm": 0.026945495744205454, "learning_rate": 0.0004145579342647595, "loss": 0.4666, "step": 23605 }, { "epoch": 1.1986267467096496, "grad_norm": 0.02038326341118833, "learning_rate": 0.0004143396750871678, "loss": 0.4999, "step": 23610 }, { "epoch": 1.1988805828076812, "grad_norm": 0.022956204878543746, "learning_rate": 0.0004141214327242707, "loss": 0.491, "step": 23615 }, { "epoch": 1.1991344189057125, "grad_norm": 0.0219273779464646, "learning_rate": 0.000413903207218908, "loss": 0.4867, "step": 23620 }, { "epoch": 1.199388255003744, "grad_norm": 0.021082208350651496, "learning_rate": 0.0004136849986139164, "loss": 0.4782, "step": 23625 }, { "epoch": 1.1996420911017756, "grad_norm": 0.021461829412799976, "learning_rate": 0.0004134668069521291, "loss": 0.5114, "step": 23630 }, { "epoch": 1.199895927199807, "grad_norm": 0.027611384568723554, "learning_rate": 0.00041324863227637607, "loss": 0.5122, "step": 23635 }, { "epoch": 1.2001497632978386, "grad_norm": 0.0313518897326471, "learning_rate": 0.0004130304746294839, "loss": 0.5, "step": 23640 }, { "epoch": 1.2004035993958702, "grad_norm": 0.021179468678818012, "learning_rate": 0.0004128123340542757, "loss": 0.4911, "step": 23645 }, { "epoch": 1.2006574354939015, "grad_norm": 0.021479315942391494, "learning_rate": 0.0004125942105935717, "loss": 0.5172, "step": 23650 }, { "epoch": 1.200911271591933, "grad_norm": 0.030395801642170088, "learning_rate": 0.00041237610429018824, "loss": 0.4812, "step": 23655 }, { "epoch": 1.2011651076899645, "grad_norm": 0.026149647662465924, "learning_rate": 0.0004121580151869385, "loss": 0.5146, "step": 23660 }, { "epoch": 1.201418943787996, "grad_norm": 0.025141949645227077, "learning_rate": 0.0004119399433266323, "loss": 0.5049, "step": 23665 }, { "epoch": 1.2016727798860276, "grad_norm": 0.022368500516788497, "learning_rate": 0.0004117218887520761, "loss": 0.4685, "step": 23670 }, { "epoch": 1.2019266159840591, "grad_norm": 0.028886687584022533, "learning_rate": 0.00041150385150607287, "loss": 0.5113, "step": 23675 }, { "epoch": 1.2021804520820907, "grad_norm": 0.026390424071676334, "learning_rate": 0.0004112858316314223, "loss": 0.486, "step": 23680 }, { "epoch": 1.2024342881801222, "grad_norm": 0.027944997664867154, "learning_rate": 0.00041106782917092055, "loss": 0.5285, "step": 23685 }, { "epoch": 1.2026881242781535, "grad_norm": 0.024740823096402697, "learning_rate": 0.00041084984416736044, "loss": 0.5107, "step": 23690 }, { "epoch": 1.202941960376185, "grad_norm": 0.02106534018159003, "learning_rate": 0.0004106318766635313, "loss": 0.4754, "step": 23695 }, { "epoch": 1.2031957964742166, "grad_norm": 0.0238222430975329, "learning_rate": 0.00041041392670221913, "loss": 0.4942, "step": 23700 }, { "epoch": 1.2034496325722481, "grad_norm": 0.0237081263610808, "learning_rate": 0.00041019599432620614, "loss": 0.4864, "step": 23705 }, { "epoch": 1.2037034686702797, "grad_norm": 0.02344590182148663, "learning_rate": 0.00040997807957827184, "loss": 0.4607, "step": 23710 }, { "epoch": 1.2039573047683112, "grad_norm": 0.022849121360239078, "learning_rate": 0.0004097601825011916, "loss": 0.4918, "step": 23715 }, { "epoch": 1.2042111408663425, "grad_norm": 0.02180468331845955, "learning_rate": 0.00040954230313773745, "loss": 0.4798, "step": 23720 }, { "epoch": 1.204464976964374, "grad_norm": 0.0243468829191348, "learning_rate": 0.0004093244415306781, "loss": 0.5206, "step": 23725 }, { "epoch": 1.2047188130624056, "grad_norm": 0.0376925453970005, "learning_rate": 0.00040910659772277867, "loss": 0.4733, "step": 23730 }, { "epoch": 1.2049726491604371, "grad_norm": 0.02194738892506034, "learning_rate": 0.0004088887717568009, "loss": 0.4802, "step": 23735 }, { "epoch": 1.2052264852584686, "grad_norm": 0.02392526501857356, "learning_rate": 0.0004086709636755029, "loss": 0.5137, "step": 23740 }, { "epoch": 1.2054803213565002, "grad_norm": 0.023675767647900945, "learning_rate": 0.0004084531735216392, "loss": 0.4837, "step": 23745 }, { "epoch": 1.2057341574545317, "grad_norm": 0.029907048109683313, "learning_rate": 0.000408235401337961, "loss": 0.4991, "step": 23750 }, { "epoch": 1.205987993552563, "grad_norm": 0.023611085556861953, "learning_rate": 0.00040801764716721586, "loss": 0.5074, "step": 23755 }, { "epoch": 1.2062418296505946, "grad_norm": 0.02342929941369781, "learning_rate": 0.00040779991105214787, "loss": 0.4753, "step": 23760 }, { "epoch": 1.206495665748626, "grad_norm": 0.024184197724051767, "learning_rate": 0.00040758219303549734, "loss": 0.5053, "step": 23765 }, { "epoch": 1.2067495018466576, "grad_norm": 0.022748064850076327, "learning_rate": 0.00040736449316000156, "loss": 0.5263, "step": 23770 }, { "epoch": 1.2070033379446892, "grad_norm": 0.04285865957318062, "learning_rate": 0.00040714681146839394, "loss": 0.4945, "step": 23775 }, { "epoch": 1.2072571740427207, "grad_norm": 0.027550877084688605, "learning_rate": 0.00040692914800340407, "loss": 0.519, "step": 23780 }, { "epoch": 1.207511010140752, "grad_norm": 0.028024709655043955, "learning_rate": 0.00040671150280775835, "loss": 0.5254, "step": 23785 }, { "epoch": 1.2077648462387836, "grad_norm": 0.027347216039832773, "learning_rate": 0.0004064938759241794, "loss": 0.5175, "step": 23790 }, { "epoch": 1.208018682336815, "grad_norm": 0.03246702500036394, "learning_rate": 0.0004062762673953863, "loss": 0.4976, "step": 23795 }, { "epoch": 1.2082725184348466, "grad_norm": 0.024018195972550192, "learning_rate": 0.00040605867726409446, "loss": 0.5024, "step": 23800 }, { "epoch": 1.2085263545328782, "grad_norm": 0.036249081128423837, "learning_rate": 0.00040584110557301576, "loss": 0.5149, "step": 23805 }, { "epoch": 1.2087801906309097, "grad_norm": 0.025423232304757903, "learning_rate": 0.0004056235523648586, "loss": 0.4963, "step": 23810 }, { "epoch": 1.2090340267289412, "grad_norm": 0.02314566298487677, "learning_rate": 0.0004054060176823273, "loss": 0.4801, "step": 23815 }, { "epoch": 1.2092878628269725, "grad_norm": 0.031246175124657095, "learning_rate": 0.00040518850156812315, "loss": 0.5157, "step": 23820 }, { "epoch": 1.209541698925004, "grad_norm": 0.025447196021980784, "learning_rate": 0.0004049710040649431, "loss": 0.4978, "step": 23825 }, { "epoch": 1.2097955350230356, "grad_norm": 0.19408857583730024, "learning_rate": 0.0004047535252154812, "loss": 0.4927, "step": 23830 }, { "epoch": 1.2100493711210671, "grad_norm": 0.037859047454809586, "learning_rate": 0.0004045360650624272, "loss": 0.492, "step": 23835 }, { "epoch": 1.2103032072190987, "grad_norm": 0.02407784644881004, "learning_rate": 0.0004043186236484677, "loss": 0.5306, "step": 23840 }, { "epoch": 1.2105570433171302, "grad_norm": 0.021170736417903336, "learning_rate": 0.0004041012010162852, "loss": 0.4789, "step": 23845 }, { "epoch": 1.2108108794151615, "grad_norm": 0.02333015002979291, "learning_rate": 0.0004038837972085586, "loss": 0.5165, "step": 23850 }, { "epoch": 1.211064715513193, "grad_norm": 0.022900385191847467, "learning_rate": 0.0004036664122679633, "loss": 0.529, "step": 23855 }, { "epoch": 1.2113185516112246, "grad_norm": 0.02900664488594827, "learning_rate": 0.00040344904623717094, "loss": 0.5186, "step": 23860 }, { "epoch": 1.2115723877092561, "grad_norm": 0.02318704654191729, "learning_rate": 0.00040323169915884924, "loss": 0.5114, "step": 23865 }, { "epoch": 1.2118262238072877, "grad_norm": 0.02241334253331378, "learning_rate": 0.0004030143710756624, "loss": 0.5244, "step": 23870 }, { "epoch": 1.2120800599053192, "grad_norm": 0.02019981834426705, "learning_rate": 0.0004027970620302709, "loss": 0.5171, "step": 23875 }, { "epoch": 1.2123338960033507, "grad_norm": 0.021530806359524663, "learning_rate": 0.0004025797720653313, "loss": 0.5028, "step": 23880 }, { "epoch": 1.212587732101382, "grad_norm": 0.023565880923201048, "learning_rate": 0.00040236250122349643, "loss": 0.5048, "step": 23885 }, { "epoch": 1.2128415681994136, "grad_norm": 0.02799458782575493, "learning_rate": 0.0004021452495474159, "loss": 0.5116, "step": 23890 }, { "epoch": 1.2130954042974451, "grad_norm": 0.026146402918077717, "learning_rate": 0.0004019280170797349, "loss": 0.5023, "step": 23895 }, { "epoch": 1.2133492403954766, "grad_norm": 0.04633882230495636, "learning_rate": 0.000401710803863095, "loss": 0.499, "step": 23900 }, { "epoch": 1.2136030764935082, "grad_norm": 0.03278896093851392, "learning_rate": 0.0004014936099401341, "loss": 0.484, "step": 23905 }, { "epoch": 1.2138569125915397, "grad_norm": 0.023279990625039652, "learning_rate": 0.0004012764353534864, "loss": 0.5017, "step": 23910 }, { "epoch": 1.214110748689571, "grad_norm": 0.025580791250068846, "learning_rate": 0.00040105928014578206, "loss": 0.4841, "step": 23915 }, { "epoch": 1.2143645847876026, "grad_norm": 0.0231176720102259, "learning_rate": 0.00040084214435964766, "loss": 0.4992, "step": 23920 }, { "epoch": 1.214618420885634, "grad_norm": 0.02248737002851678, "learning_rate": 0.0004006250280377058, "loss": 0.5068, "step": 23925 }, { "epoch": 1.2148722569836656, "grad_norm": 0.022927316960127005, "learning_rate": 0.0004004079312225754, "loss": 0.492, "step": 23930 }, { "epoch": 1.2151260930816972, "grad_norm": 0.02313720027070532, "learning_rate": 0.00040019085395687134, "loss": 0.4949, "step": 23935 }, { "epoch": 1.2153799291797287, "grad_norm": 0.030106027729234503, "learning_rate": 0.00039997379628320493, "loss": 0.5044, "step": 23940 }, { "epoch": 1.2156337652777602, "grad_norm": 0.023137200469528597, "learning_rate": 0.0003997567582441834, "loss": 0.5162, "step": 23945 }, { "epoch": 1.2158876013757918, "grad_norm": 0.02729799923558458, "learning_rate": 0.00039953973988241035, "loss": 0.5144, "step": 23950 }, { "epoch": 1.216141437473823, "grad_norm": 0.02110940033197545, "learning_rate": 0.00039932274124048546, "loss": 0.4959, "step": 23955 }, { "epoch": 1.2163952735718546, "grad_norm": 0.023832077215208247, "learning_rate": 0.00039910576236100437, "loss": 0.5058, "step": 23960 }, { "epoch": 1.2166491096698862, "grad_norm": 0.021490387702712672, "learning_rate": 0.000398888803286559, "loss": 0.5041, "step": 23965 }, { "epoch": 1.2169029457679177, "grad_norm": 0.022133880712670376, "learning_rate": 0.0003986718640597372, "loss": 0.5059, "step": 23970 }, { "epoch": 1.2171567818659492, "grad_norm": 0.021425175563689818, "learning_rate": 0.0003984549447231232, "loss": 0.4609, "step": 23975 }, { "epoch": 1.2174106179639805, "grad_norm": 0.019327536501010368, "learning_rate": 0.0003982380453192972, "loss": 0.4943, "step": 23980 }, { "epoch": 1.217664454062012, "grad_norm": 0.023998177431719198, "learning_rate": 0.0003980211658908354, "loss": 0.4973, "step": 23985 }, { "epoch": 1.2179182901600436, "grad_norm": 0.033915972076828955, "learning_rate": 0.0003978043064803101, "loss": 0.5149, "step": 23990 }, { "epoch": 1.2181721262580751, "grad_norm": 0.0237613621554742, "learning_rate": 0.0003975874671302899, "loss": 0.5135, "step": 23995 }, { "epoch": 1.2184259623561067, "grad_norm": 0.026750732728818505, "learning_rate": 0.00039737064788333907, "loss": 0.503, "step": 24000 }, { "epoch": 1.2186797984541382, "grad_norm": 0.025308308981860574, "learning_rate": 0.0003971538487820181, "loss": 0.5002, "step": 24005 }, { "epoch": 1.2189336345521697, "grad_norm": 0.02222643637338913, "learning_rate": 0.0003969370698688839, "loss": 0.5231, "step": 24010 }, { "epoch": 1.2191874706502013, "grad_norm": 0.031118899097702987, "learning_rate": 0.0003967203111864889, "loss": 0.5099, "step": 24015 }, { "epoch": 1.2194413067482326, "grad_norm": 0.022092906382930166, "learning_rate": 0.0003965035727773818, "loss": 0.4864, "step": 24020 }, { "epoch": 1.2196951428462641, "grad_norm": 0.028023218544810078, "learning_rate": 0.0003962868546841072, "loss": 0.4551, "step": 24025 }, { "epoch": 1.2199489789442957, "grad_norm": 0.023384820051804464, "learning_rate": 0.0003960701569492058, "loss": 0.4975, "step": 24030 }, { "epoch": 1.2202028150423272, "grad_norm": 0.022821055406039938, "learning_rate": 0.00039585347961521434, "loss": 0.5172, "step": 24035 }, { "epoch": 1.2204566511403587, "grad_norm": 0.030261713548462982, "learning_rate": 0.0003956368227246654, "loss": 0.5088, "step": 24040 }, { "epoch": 1.2207104872383903, "grad_norm": 0.023111376142268682, "learning_rate": 0.00039542018632008773, "loss": 0.5049, "step": 24045 }, { "epoch": 1.2209643233364216, "grad_norm": 0.021954597042835224, "learning_rate": 0.00039520357044400595, "loss": 0.4923, "step": 24050 }, { "epoch": 1.221218159434453, "grad_norm": 0.022076500459662276, "learning_rate": 0.0003949869751389407, "loss": 0.4696, "step": 24055 }, { "epoch": 1.2214719955324846, "grad_norm": 0.023319825211927294, "learning_rate": 0.0003947704004474085, "loss": 0.5188, "step": 24060 }, { "epoch": 1.2217258316305162, "grad_norm": 0.02700682357188388, "learning_rate": 0.0003945538464119218, "loss": 0.5128, "step": 24065 }, { "epoch": 1.2219796677285477, "grad_norm": 0.025886961687645718, "learning_rate": 0.00039433731307498925, "loss": 0.5348, "step": 24070 }, { "epoch": 1.2222335038265792, "grad_norm": 0.02644977307390592, "learning_rate": 0.00039412080047911526, "loss": 0.5141, "step": 24075 }, { "epoch": 1.2224873399246108, "grad_norm": 0.028225811453520037, "learning_rate": 0.00039390430866680017, "loss": 0.5302, "step": 24080 }, { "epoch": 1.222741176022642, "grad_norm": 0.035161610050816194, "learning_rate": 0.00039368783768054005, "loss": 0.4871, "step": 24085 }, { "epoch": 1.2229950121206736, "grad_norm": 0.08376585705403677, "learning_rate": 0.00039347138756282737, "loss": 0.5101, "step": 24090 }, { "epoch": 1.2232488482187052, "grad_norm": 0.028243847099455724, "learning_rate": 0.0003932549583561499, "loss": 0.5145, "step": 24095 }, { "epoch": 1.2235026843167367, "grad_norm": 0.02241171828134657, "learning_rate": 0.00039303855010299187, "loss": 0.4779, "step": 24100 }, { "epoch": 1.2237565204147682, "grad_norm": 0.027565864630121506, "learning_rate": 0.00039282216284583304, "loss": 0.5191, "step": 24105 }, { "epoch": 1.2240103565127998, "grad_norm": 0.04141754255679934, "learning_rate": 0.00039260579662714915, "loss": 0.5127, "step": 24110 }, { "epoch": 1.224264192610831, "grad_norm": 0.03073490220186529, "learning_rate": 0.0003923894514894118, "loss": 0.5125, "step": 24115 }, { "epoch": 1.2245180287088626, "grad_norm": 0.02681137516278315, "learning_rate": 0.00039217312747508843, "loss": 0.4995, "step": 24120 }, { "epoch": 1.2247718648068942, "grad_norm": 0.03856684515614741, "learning_rate": 0.00039195682462664225, "loss": 0.4739, "step": 24125 }, { "epoch": 1.2250257009049257, "grad_norm": 0.039563497611539114, "learning_rate": 0.0003917405429865327, "loss": 0.4841, "step": 24130 }, { "epoch": 1.2252795370029572, "grad_norm": 0.02634194065105358, "learning_rate": 0.0003915242825972148, "loss": 0.5017, "step": 24135 }, { "epoch": 1.2255333731009888, "grad_norm": 0.022677125872718785, "learning_rate": 0.0003913080435011392, "loss": 0.5316, "step": 24140 }, { "epoch": 1.2257872091990203, "grad_norm": 0.021362302216391706, "learning_rate": 0.00039109182574075256, "loss": 0.4939, "step": 24145 }, { "epoch": 1.2260410452970516, "grad_norm": 0.02490355388687261, "learning_rate": 0.00039087562935849745, "loss": 0.4987, "step": 24150 }, { "epoch": 1.2262948813950831, "grad_norm": 0.02658190177665588, "learning_rate": 0.00039065945439681213, "loss": 0.4917, "step": 24155 }, { "epoch": 1.2265487174931147, "grad_norm": 0.020375606787208123, "learning_rate": 0.0003904433008981306, "loss": 0.4781, "step": 24160 }, { "epoch": 1.2268025535911462, "grad_norm": 0.02515928592237166, "learning_rate": 0.00039022716890488275, "loss": 0.4924, "step": 24165 }, { "epoch": 1.2270563896891777, "grad_norm": 0.036589589288339636, "learning_rate": 0.0003900110584594942, "loss": 0.5066, "step": 24170 }, { "epoch": 1.2273102257872093, "grad_norm": 0.023900416252566864, "learning_rate": 0.00038979496960438637, "loss": 0.4742, "step": 24175 }, { "epoch": 1.2275640618852406, "grad_norm": 0.024765774151169644, "learning_rate": 0.0003895789023819764, "loss": 0.5312, "step": 24180 }, { "epoch": 1.2278178979832721, "grad_norm": 0.023371819709462876, "learning_rate": 0.0003893628568346771, "loss": 0.495, "step": 24185 }, { "epoch": 1.2280717340813037, "grad_norm": 0.023492189347015212, "learning_rate": 0.0003891468330048974, "loss": 0.4349, "step": 24190 }, { "epoch": 1.2283255701793352, "grad_norm": 0.022915433000463895, "learning_rate": 0.00038893083093504154, "loss": 0.4776, "step": 24195 }, { "epoch": 1.2285794062773667, "grad_norm": 0.03768217141505213, "learning_rate": 0.00038871485066750965, "loss": 0.4771, "step": 24200 }, { "epoch": 1.2288332423753983, "grad_norm": 0.08372265623632144, "learning_rate": 0.00038849889224469765, "loss": 0.4923, "step": 24205 }, { "epoch": 1.2290870784734298, "grad_norm": 0.023183814231157947, "learning_rate": 0.000388282955708997, "loss": 0.4936, "step": 24210 }, { "epoch": 1.2293409145714613, "grad_norm": 0.021976570825354085, "learning_rate": 0.0003880670411027951, "loss": 0.4798, "step": 24215 }, { "epoch": 1.2295947506694926, "grad_norm": 0.02966175812308913, "learning_rate": 0.0003878511484684747, "loss": 0.4887, "step": 24220 }, { "epoch": 1.2298485867675242, "grad_norm": 0.02807429193190882, "learning_rate": 0.00038763527784841463, "loss": 0.5163, "step": 24225 }, { "epoch": 1.2301024228655557, "grad_norm": 0.023104952777379555, "learning_rate": 0.00038741942928498913, "loss": 0.5479, "step": 24230 }, { "epoch": 1.2303562589635872, "grad_norm": 0.029930387788839295, "learning_rate": 0.0003872036028205683, "loss": 0.4894, "step": 24235 }, { "epoch": 1.2306100950616188, "grad_norm": 0.024336392055221638, "learning_rate": 0.00038698779849751766, "loss": 0.4459, "step": 24240 }, { "epoch": 1.23086393115965, "grad_norm": 0.022263929857281015, "learning_rate": 0.0003867720163581983, "loss": 0.4906, "step": 24245 }, { "epoch": 1.2311177672576816, "grad_norm": 0.03133076104283715, "learning_rate": 0.0003865562564449678, "loss": 0.4989, "step": 24250 }, { "epoch": 1.2313716033557132, "grad_norm": 0.03260231148423326, "learning_rate": 0.0003863405188001783, "loss": 0.5352, "step": 24255 }, { "epoch": 1.2316254394537447, "grad_norm": 0.04700860380299025, "learning_rate": 0.00038612480346617825, "loss": 0.477, "step": 24260 }, { "epoch": 1.2318792755517762, "grad_norm": 0.02740921040477875, "learning_rate": 0.00038590911048531136, "loss": 0.5073, "step": 24265 }, { "epoch": 1.2321331116498078, "grad_norm": 0.028178131413544947, "learning_rate": 0.00038569343989991705, "loss": 0.5108, "step": 24270 }, { "epoch": 1.2323869477478393, "grad_norm": 0.03757846827473028, "learning_rate": 0.0003854777917523305, "loss": 0.5107, "step": 24275 }, { "epoch": 1.2326407838458708, "grad_norm": 0.023537367550062548, "learning_rate": 0.00038526216608488227, "loss": 0.5113, "step": 24280 }, { "epoch": 1.2328946199439021, "grad_norm": 0.023626230913656128, "learning_rate": 0.0003850465629398987, "loss": 0.5004, "step": 24285 }, { "epoch": 1.2331484560419337, "grad_norm": 0.0223229940793588, "learning_rate": 0.00038483098235970147, "loss": 0.474, "step": 24290 }, { "epoch": 1.2334022921399652, "grad_norm": 0.024562899395510082, "learning_rate": 0.00038461542438660815, "loss": 0.5167, "step": 24295 }, { "epoch": 1.2336561282379968, "grad_norm": 0.028102102596075113, "learning_rate": 0.00038439988906293157, "loss": 0.4756, "step": 24300 }, { "epoch": 1.2339099643360283, "grad_norm": 0.025740036122312678, "learning_rate": 0.00038418437643098006, "loss": 0.4896, "step": 24305 }, { "epoch": 1.2341638004340598, "grad_norm": 0.026595707267021335, "learning_rate": 0.0003839688865330581, "loss": 0.4887, "step": 24310 }, { "epoch": 1.2344176365320911, "grad_norm": 0.024095548845127494, "learning_rate": 0.00038375341941146505, "loss": 0.4901, "step": 24315 }, { "epoch": 1.2346714726301227, "grad_norm": 0.021876912693680587, "learning_rate": 0.0003835379751084961, "loss": 0.4839, "step": 24320 }, { "epoch": 1.2349253087281542, "grad_norm": 0.02794133991499325, "learning_rate": 0.00038332255366644175, "loss": 0.4859, "step": 24325 }, { "epoch": 1.2351791448261857, "grad_norm": 0.03176212918526051, "learning_rate": 0.0003831071551275883, "loss": 0.5001, "step": 24330 }, { "epoch": 1.2354329809242173, "grad_norm": 0.02008532610763912, "learning_rate": 0.0003828917795342173, "loss": 0.474, "step": 24335 }, { "epoch": 1.2356868170222488, "grad_norm": 0.02404321499467447, "learning_rate": 0.000382676426928606, "loss": 0.523, "step": 24340 }, { "epoch": 1.2359406531202803, "grad_norm": 0.020304618971407788, "learning_rate": 0.00038246109735302696, "loss": 0.4927, "step": 24345 }, { "epoch": 1.2361944892183117, "grad_norm": 0.021943643137327405, "learning_rate": 0.0003822457908497484, "loss": 0.5214, "step": 24350 }, { "epoch": 1.2364483253163432, "grad_norm": 0.026610455716482826, "learning_rate": 0.00038203050746103386, "loss": 0.5162, "step": 24355 }, { "epoch": 1.2367021614143747, "grad_norm": 0.028298758995324663, "learning_rate": 0.00038181524722914235, "loss": 0.509, "step": 24360 }, { "epoch": 1.2369559975124063, "grad_norm": 0.037423844884409904, "learning_rate": 0.0003816000101963282, "loss": 0.5026, "step": 24365 }, { "epoch": 1.2372098336104378, "grad_norm": 0.022982766278687995, "learning_rate": 0.00038138479640484183, "loss": 0.5009, "step": 24370 }, { "epoch": 1.2374636697084693, "grad_norm": 0.024787564495660528, "learning_rate": 0.00038116960589692844, "loss": 0.4921, "step": 24375 }, { "epoch": 1.2377175058065006, "grad_norm": 0.02088814143474573, "learning_rate": 0.00038095443871482876, "loss": 0.4885, "step": 24380 }, { "epoch": 1.2379713419045322, "grad_norm": 0.021715700250562936, "learning_rate": 0.0003807392949007791, "loss": 0.4914, "step": 24385 }, { "epoch": 1.2382251780025637, "grad_norm": 0.027943308695689863, "learning_rate": 0.00038052417449701106, "loss": 0.4809, "step": 24390 }, { "epoch": 1.2384790141005952, "grad_norm": 0.024457173191587765, "learning_rate": 0.00038030907754575173, "loss": 0.4905, "step": 24395 }, { "epoch": 1.2387328501986268, "grad_norm": 0.02298993158514161, "learning_rate": 0.0003800940040892236, "loss": 0.5076, "step": 24400 }, { "epoch": 1.2389866862966583, "grad_norm": 0.02407909122160034, "learning_rate": 0.00037987895416964455, "loss": 0.505, "step": 24405 }, { "epoch": 1.2392405223946898, "grad_norm": 0.023168723950299244, "learning_rate": 0.0003796639278292277, "loss": 0.4801, "step": 24410 }, { "epoch": 1.2394943584927212, "grad_norm": 0.022695039167580793, "learning_rate": 0.0003794489251101817, "loss": 0.4998, "step": 24415 }, { "epoch": 1.2397481945907527, "grad_norm": 0.025740573541342248, "learning_rate": 0.00037923394605471057, "loss": 0.4891, "step": 24420 }, { "epoch": 1.2400020306887842, "grad_norm": 0.022190743593717716, "learning_rate": 0.00037901899070501337, "loss": 0.5166, "step": 24425 }, { "epoch": 1.2402558667868158, "grad_norm": 0.02273570131152975, "learning_rate": 0.00037880405910328515, "loss": 0.4837, "step": 24430 }, { "epoch": 1.2405097028848473, "grad_norm": 0.02247267893276436, "learning_rate": 0.0003785891512917157, "loss": 0.503, "step": 24435 }, { "epoch": 1.2407635389828788, "grad_norm": 0.027397988839106935, "learning_rate": 0.00037837426731249035, "loss": 0.5138, "step": 24440 }, { "epoch": 1.2410173750809101, "grad_norm": 0.0360747255061236, "learning_rate": 0.0003781594072077899, "loss": 0.5247, "step": 24445 }, { "epoch": 1.2412712111789417, "grad_norm": 0.020839949347156835, "learning_rate": 0.00037794457101979, "loss": 0.4719, "step": 24450 }, { "epoch": 1.2415250472769732, "grad_norm": 0.023805826469806165, "learning_rate": 0.00037772975879066224, "loss": 0.528, "step": 24455 }, { "epoch": 1.2417788833750047, "grad_norm": 0.02229363835470009, "learning_rate": 0.00037751497056257304, "loss": 0.5116, "step": 24460 }, { "epoch": 1.2420327194730363, "grad_norm": 0.02478864260181994, "learning_rate": 0.0003773002063776843, "loss": 0.4765, "step": 24465 }, { "epoch": 1.2422865555710678, "grad_norm": 0.026718793626477343, "learning_rate": 0.00037708546627815317, "loss": 0.4911, "step": 24470 }, { "epoch": 1.2425403916690994, "grad_norm": 0.022347714227584022, "learning_rate": 0.000376870750306132, "loss": 0.5098, "step": 24475 }, { "epoch": 1.2427942277671309, "grad_norm": 0.02195899584253101, "learning_rate": 0.0003766560585037685, "loss": 0.4948, "step": 24480 }, { "epoch": 1.2430480638651622, "grad_norm": 0.024116784723832947, "learning_rate": 0.0003764413909132054, "loss": 0.4916, "step": 24485 }, { "epoch": 1.2433018999631937, "grad_norm": 0.025348617292129703, "learning_rate": 0.00037622674757658127, "loss": 0.4983, "step": 24490 }, { "epoch": 1.2435557360612253, "grad_norm": 0.028723847683448087, "learning_rate": 0.0003760121285360293, "loss": 0.4974, "step": 24495 }, { "epoch": 1.2438095721592568, "grad_norm": 0.02549204124081865, "learning_rate": 0.00037579753383367825, "loss": 0.4797, "step": 24500 }, { "epoch": 1.2440634082572883, "grad_norm": 0.02572550068222192, "learning_rate": 0.0003755829635116519, "loss": 0.4911, "step": 24505 }, { "epoch": 1.2443172443553197, "grad_norm": 0.02151255214938373, "learning_rate": 0.0003753684176120693, "loss": 0.4818, "step": 24510 }, { "epoch": 1.2445710804533512, "grad_norm": 0.03485262446331745, "learning_rate": 0.0003751538961770448, "loss": 0.4964, "step": 24515 }, { "epoch": 1.2448249165513827, "grad_norm": 0.020421404595873636, "learning_rate": 0.0003749393992486879, "loss": 0.4928, "step": 24520 }, { "epoch": 1.2450787526494143, "grad_norm": 0.02213502791269541, "learning_rate": 0.0003747249268691033, "loss": 0.5011, "step": 24525 }, { "epoch": 1.2453325887474458, "grad_norm": 0.039753961527774426, "learning_rate": 0.0003745104790803907, "loss": 0.5099, "step": 24530 }, { "epoch": 1.2455864248454773, "grad_norm": 0.020760234136978934, "learning_rate": 0.0003742960559246453, "loss": 0.471, "step": 24535 }, { "epoch": 1.2458402609435089, "grad_norm": 0.027835500455752622, "learning_rate": 0.0003740816574439572, "loss": 0.4985, "step": 24540 }, { "epoch": 1.2460940970415404, "grad_norm": 0.024653367801313206, "learning_rate": 0.00037386728368041185, "loss": 0.5224, "step": 24545 }, { "epoch": 1.2463479331395717, "grad_norm": 0.025495982916413043, "learning_rate": 0.00037365293467608954, "loss": 0.5011, "step": 24550 }, { "epoch": 1.2466017692376032, "grad_norm": 0.04123159279622747, "learning_rate": 0.00037343861047306617, "loss": 0.4972, "step": 24555 }, { "epoch": 1.2468556053356348, "grad_norm": 0.021840485245885177, "learning_rate": 0.00037322431111341245, "loss": 0.484, "step": 24560 }, { "epoch": 1.2471094414336663, "grad_norm": 0.02574861374734803, "learning_rate": 0.0003730100366391942, "loss": 0.4747, "step": 24565 }, { "epoch": 1.2473632775316978, "grad_norm": 0.029726658779212437, "learning_rate": 0.0003727957870924724, "loss": 0.468, "step": 24570 }, { "epoch": 1.2476171136297294, "grad_norm": 0.023386237965567654, "learning_rate": 0.0003725815625153033, "loss": 0.461, "step": 24575 }, { "epoch": 1.2478709497277607, "grad_norm": 0.02511675497455319, "learning_rate": 0.00037236736294973805, "loss": 0.4921, "step": 24580 }, { "epoch": 1.2481247858257922, "grad_norm": 0.022973041358262655, "learning_rate": 0.00037215318843782287, "loss": 0.4862, "step": 24585 }, { "epoch": 1.2483786219238238, "grad_norm": 0.025350959019371976, "learning_rate": 0.0003719390390215993, "loss": 0.4988, "step": 24590 }, { "epoch": 1.2486324580218553, "grad_norm": 0.022332478902111395, "learning_rate": 0.0003717249147431037, "loss": 0.4785, "step": 24595 }, { "epoch": 1.2488862941198868, "grad_norm": 0.02605623024379265, "learning_rate": 0.0003715108156443676, "loss": 0.5072, "step": 24600 }, { "epoch": 1.2491401302179184, "grad_norm": 0.023303658340822323, "learning_rate": 0.0003712967417674177, "loss": 0.5011, "step": 24605 }, { "epoch": 1.24939396631595, "grad_norm": 0.02406384974114823, "learning_rate": 0.0003710826931542753, "loss": 0.512, "step": 24610 }, { "epoch": 1.2496478024139812, "grad_norm": 0.02759010527581803, "learning_rate": 0.0003708686698469575, "loss": 0.4894, "step": 24615 }, { "epoch": 1.2499016385120127, "grad_norm": 0.03146887342339219, "learning_rate": 0.00037065467188747593, "loss": 0.4801, "step": 24620 }, { "epoch": 1.2501554746100443, "grad_norm": 0.030508984982450908, "learning_rate": 0.0003704406993178371, "loss": 0.4932, "step": 24625 }, { "epoch": 1.2504093107080758, "grad_norm": 0.02545476005935238, "learning_rate": 0.000370226752180043, "loss": 0.5114, "step": 24630 }, { "epoch": 1.2506631468061074, "grad_norm": 0.02230948334682719, "learning_rate": 0.0003700128305160901, "loss": 0.5132, "step": 24635 }, { "epoch": 1.2509169829041387, "grad_norm": 0.02206263568624184, "learning_rate": 0.00036979893436797054, "loss": 0.5015, "step": 24640 }, { "epoch": 1.2511708190021702, "grad_norm": 0.02398238667525352, "learning_rate": 0.0003695850637776707, "loss": 0.4873, "step": 24645 }, { "epoch": 1.2514246551002017, "grad_norm": 0.02334469006495267, "learning_rate": 0.0003693712187871725, "loss": 0.4907, "step": 24650 }, { "epoch": 1.2516784911982333, "grad_norm": 0.02105351477888063, "learning_rate": 0.0003691573994384526, "loss": 0.5158, "step": 24655 }, { "epoch": 1.2519323272962648, "grad_norm": 0.023611719484646673, "learning_rate": 0.00036894360577348275, "loss": 0.4912, "step": 24660 }, { "epoch": 1.2521861633942963, "grad_norm": 0.026957081159899983, "learning_rate": 0.00036872983783422944, "loss": 0.5186, "step": 24665 }, { "epoch": 1.2524399994923279, "grad_norm": 0.022112456631055228, "learning_rate": 0.0003685160956626542, "loss": 0.4708, "step": 24670 }, { "epoch": 1.2526938355903594, "grad_norm": 0.03960707777333683, "learning_rate": 0.0003683023793007138, "loss": 0.4818, "step": 24675 }, { "epoch": 1.252947671688391, "grad_norm": 0.024900205832922344, "learning_rate": 0.0003680886887903596, "loss": 0.4882, "step": 24680 }, { "epoch": 1.2532015077864223, "grad_norm": 0.02600602146903172, "learning_rate": 0.0003678750241735379, "loss": 0.482, "step": 24685 }, { "epoch": 1.2534553438844538, "grad_norm": 0.020762831712885167, "learning_rate": 0.00036766138549219007, "loss": 0.4721, "step": 24690 }, { "epoch": 1.2537091799824853, "grad_norm": 0.02365987982582443, "learning_rate": 0.00036744777278825225, "loss": 0.4996, "step": 24695 }, { "epoch": 1.2539630160805169, "grad_norm": 0.021839292335870203, "learning_rate": 0.0003672341861036557, "loss": 0.5116, "step": 24700 }, { "epoch": 1.2542168521785484, "grad_norm": 0.02473930369877982, "learning_rate": 0.00036702062548032624, "loss": 0.4747, "step": 24705 }, { "epoch": 1.2544706882765797, "grad_norm": 0.021843985621647143, "learning_rate": 0.00036680709096018483, "loss": 0.4981, "step": 24710 }, { "epoch": 1.2547245243746112, "grad_norm": 0.024828258327636316, "learning_rate": 0.0003665935825851473, "loss": 0.5257, "step": 24715 }, { "epoch": 1.2549783604726428, "grad_norm": 0.02156185794599099, "learning_rate": 0.0003663801003971241, "loss": 0.505, "step": 24720 }, { "epoch": 1.2552321965706743, "grad_norm": 0.02121222455282886, "learning_rate": 0.0003661666444380209, "loss": 0.4864, "step": 24725 }, { "epoch": 1.2554860326687058, "grad_norm": 0.028321758337109544, "learning_rate": 0.00036595321474973777, "loss": 0.4814, "step": 24730 }, { "epoch": 1.2557398687667374, "grad_norm": 0.02973621886010056, "learning_rate": 0.0003657398113741703, "loss": 0.4777, "step": 24735 }, { "epoch": 1.255993704864769, "grad_norm": 0.03303744106278796, "learning_rate": 0.0003655264343532083, "loss": 0.4917, "step": 24740 }, { "epoch": 1.2562475409628004, "grad_norm": 0.024275455013697354, "learning_rate": 0.0003653130837287366, "loss": 0.4928, "step": 24745 }, { "epoch": 1.2565013770608318, "grad_norm": 0.021864363759526228, "learning_rate": 0.00036509975954263486, "loss": 0.5008, "step": 24750 }, { "epoch": 1.2567552131588633, "grad_norm": 0.023011380155086403, "learning_rate": 0.00036488646183677767, "loss": 0.4985, "step": 24755 }, { "epoch": 1.2570090492568948, "grad_norm": 0.020669161817962644, "learning_rate": 0.00036467319065303414, "loss": 0.5056, "step": 24760 }, { "epoch": 1.2572628853549264, "grad_norm": 0.02725897387201023, "learning_rate": 0.00036445994603326835, "loss": 0.5112, "step": 24765 }, { "epoch": 1.257516721452958, "grad_norm": 0.024508242607115118, "learning_rate": 0.00036424672801933946, "loss": 0.5077, "step": 24770 }, { "epoch": 1.2577705575509892, "grad_norm": 0.025773782621984185, "learning_rate": 0.0003640335366531007, "loss": 0.4975, "step": 24775 }, { "epoch": 1.2580243936490207, "grad_norm": 0.023553740478245766, "learning_rate": 0.00036382037197640063, "loss": 0.4949, "step": 24780 }, { "epoch": 1.2582782297470523, "grad_norm": 0.022708195915233667, "learning_rate": 0.00036360723403108233, "loss": 0.5042, "step": 24785 }, { "epoch": 1.2585320658450838, "grad_norm": 0.021576770915417216, "learning_rate": 0.00036339412285898363, "loss": 0.4956, "step": 24790 }, { "epoch": 1.2587859019431153, "grad_norm": 0.024661142091304572, "learning_rate": 0.0003631810385019376, "loss": 0.5243, "step": 24795 }, { "epoch": 1.2590397380411469, "grad_norm": 0.023783896612178973, "learning_rate": 0.0003629679810017714, "loss": 0.5104, "step": 24800 }, { "epoch": 1.2592935741391784, "grad_norm": 0.028819292594759488, "learning_rate": 0.0003627549504003072, "loss": 0.4754, "step": 24805 }, { "epoch": 1.25954741023721, "grad_norm": 0.02512338445035506, "learning_rate": 0.00036254194673936174, "loss": 0.4788, "step": 24810 }, { "epoch": 1.2598012463352413, "grad_norm": 0.02170026035535352, "learning_rate": 0.0003623289700607466, "loss": 0.5096, "step": 24815 }, { "epoch": 1.2600550824332728, "grad_norm": 0.026629983889606926, "learning_rate": 0.00036211602040626815, "loss": 0.4805, "step": 24820 }, { "epoch": 1.2603089185313043, "grad_norm": 0.03586703668787359, "learning_rate": 0.00036190309781772723, "loss": 0.4917, "step": 24825 }, { "epoch": 1.2605627546293359, "grad_norm": 0.021805993713398496, "learning_rate": 0.00036169020233691953, "loss": 0.4935, "step": 24830 }, { "epoch": 1.2608165907273674, "grad_norm": 0.02066388286099757, "learning_rate": 0.0003614773340056353, "loss": 0.4849, "step": 24835 }, { "epoch": 1.2610704268253987, "grad_norm": 0.02456481899175869, "learning_rate": 0.00036126449286565966, "loss": 0.4749, "step": 24840 }, { "epoch": 1.2613242629234303, "grad_norm": 0.022104636358690293, "learning_rate": 0.0003610516789587722, "loss": 0.5236, "step": 24845 }, { "epoch": 1.2615780990214618, "grad_norm": 0.023314937158047715, "learning_rate": 0.000360838892326747, "loss": 0.478, "step": 24850 }, { "epoch": 1.2618319351194933, "grad_norm": 0.02278136876188265, "learning_rate": 0.00036062613301135357, "loss": 0.5083, "step": 24855 }, { "epoch": 1.2620857712175249, "grad_norm": 0.022721574613654438, "learning_rate": 0.00036041340105435506, "loss": 0.5178, "step": 24860 }, { "epoch": 1.2623396073155564, "grad_norm": 0.021038721362959267, "learning_rate": 0.00036020069649750976, "loss": 0.4987, "step": 24865 }, { "epoch": 1.262593443413588, "grad_norm": 0.024677584467370603, "learning_rate": 0.00035998801938257063, "loss": 0.4939, "step": 24870 }, { "epoch": 1.2628472795116195, "grad_norm": 0.021066193323710593, "learning_rate": 0.000359775369751285, "loss": 0.4553, "step": 24875 }, { "epoch": 1.2631011156096508, "grad_norm": 0.020772474815759088, "learning_rate": 0.00035956274764539504, "loss": 0.4793, "step": 24880 }, { "epoch": 1.2633549517076823, "grad_norm": 0.02074020180323779, "learning_rate": 0.0003593501531066373, "loss": 0.4897, "step": 24885 }, { "epoch": 1.2636087878057138, "grad_norm": 0.022634506709179467, "learning_rate": 0.00035913758617674315, "loss": 0.4656, "step": 24890 }, { "epoch": 1.2638626239037454, "grad_norm": 0.02442246115751448, "learning_rate": 0.0003589250468974383, "loss": 0.4923, "step": 24895 }, { "epoch": 1.264116460001777, "grad_norm": 0.02182147534232203, "learning_rate": 0.00035871253531044323, "loss": 0.4827, "step": 24900 }, { "epoch": 1.2643702960998082, "grad_norm": 0.03540133910293839, "learning_rate": 0.00035850005145747287, "loss": 0.4997, "step": 24905 }, { "epoch": 1.2646241321978398, "grad_norm": 0.031068992552075844, "learning_rate": 0.00035828759538023653, "loss": 0.4974, "step": 24910 }, { "epoch": 1.2648779682958713, "grad_norm": 0.024541408824232794, "learning_rate": 0.00035807516712043876, "loss": 0.521, "step": 24915 }, { "epoch": 1.2651318043939028, "grad_norm": 0.027145012369650882, "learning_rate": 0.00035786276671977786, "loss": 0.4929, "step": 24920 }, { "epoch": 1.2653856404919344, "grad_norm": 0.03763414082526663, "learning_rate": 0.000357650394219947, "loss": 0.4915, "step": 24925 }, { "epoch": 1.265639476589966, "grad_norm": 0.04014144903363009, "learning_rate": 0.0003574380496626339, "loss": 0.4897, "step": 24930 }, { "epoch": 1.2658933126879974, "grad_norm": 0.029764740803608244, "learning_rate": 0.00035722573308952064, "loss": 0.4696, "step": 24935 }, { "epoch": 1.266147148786029, "grad_norm": 0.030962115180249043, "learning_rate": 0.000357013444542284, "loss": 0.5014, "step": 24940 }, { "epoch": 1.2664009848840605, "grad_norm": 0.027834184566246624, "learning_rate": 0.00035680118406259515, "loss": 0.4928, "step": 24945 }, { "epoch": 1.2666548209820918, "grad_norm": 0.021043989423317384, "learning_rate": 0.00035658895169211966, "loss": 0.4762, "step": 24950 }, { "epoch": 1.2669086570801233, "grad_norm": 0.02449083248247568, "learning_rate": 0.00035637674747251785, "loss": 0.491, "step": 24955 }, { "epoch": 1.2671624931781549, "grad_norm": 0.02271799388618845, "learning_rate": 0.00035616457144544425, "loss": 0.4848, "step": 24960 }, { "epoch": 1.2674163292761864, "grad_norm": 0.02138761722620517, "learning_rate": 0.0003559524236525479, "loss": 0.478, "step": 24965 }, { "epoch": 1.267670165374218, "grad_norm": 0.02379623979828549, "learning_rate": 0.0003557403041354724, "loss": 0.4819, "step": 24970 }, { "epoch": 1.2679240014722493, "grad_norm": 0.02449724126117815, "learning_rate": 0.0003555282129358558, "loss": 0.4804, "step": 24975 }, { "epoch": 1.2681778375702808, "grad_norm": 0.021883903501979027, "learning_rate": 0.0003553161500953306, "loss": 0.4858, "step": 24980 }, { "epoch": 1.2684316736683123, "grad_norm": 0.0209418839992429, "learning_rate": 0.0003551041156555236, "loss": 0.4907, "step": 24985 }, { "epoch": 1.2686855097663439, "grad_norm": 0.023379422469475157, "learning_rate": 0.000354892109658056, "loss": 0.4686, "step": 24990 }, { "epoch": 1.2689393458643754, "grad_norm": 0.03050915873944768, "learning_rate": 0.00035468013214454375, "loss": 0.4872, "step": 24995 }, { "epoch": 1.269193181962407, "grad_norm": 0.019469878265224637, "learning_rate": 0.0003544681831565968, "loss": 0.467, "step": 25000 }, { "epoch": 1.2694470180604385, "grad_norm": 0.02706167777407716, "learning_rate": 0.0003542562627358197, "loss": 0.5358, "step": 25005 }, { "epoch": 1.26970085415847, "grad_norm": 0.02607408126692583, "learning_rate": 0.0003540443709238114, "loss": 0.484, "step": 25010 }, { "epoch": 1.2699546902565013, "grad_norm": 0.026898207511883893, "learning_rate": 0.00035383250776216526, "loss": 0.4652, "step": 25015 }, { "epoch": 1.2702085263545329, "grad_norm": 0.02822647163163459, "learning_rate": 0.00035362067329246884, "loss": 0.5043, "step": 25020 }, { "epoch": 1.2704623624525644, "grad_norm": 0.024912999653158442, "learning_rate": 0.0003534088675563043, "loss": 0.4968, "step": 25025 }, { "epoch": 1.270716198550596, "grad_norm": 0.04104540138730313, "learning_rate": 0.0003531970905952478, "loss": 0.48, "step": 25030 }, { "epoch": 1.2709700346486275, "grad_norm": 0.04967344080591417, "learning_rate": 0.00035298534245087055, "loss": 0.4968, "step": 25035 }, { "epoch": 1.2712238707466588, "grad_norm": 0.032041261058398655, "learning_rate": 0.0003527736231647374, "loss": 0.4833, "step": 25040 }, { "epoch": 1.2714777068446903, "grad_norm": 0.026420537505594786, "learning_rate": 0.0003525619327784078, "loss": 0.5085, "step": 25045 }, { "epoch": 1.2717315429427218, "grad_norm": 0.02728321137774862, "learning_rate": 0.00035235027133343546, "loss": 0.4869, "step": 25050 }, { "epoch": 1.2719853790407534, "grad_norm": 0.022824173369318995, "learning_rate": 0.0003521386388713686, "loss": 0.5129, "step": 25055 }, { "epoch": 1.272239215138785, "grad_norm": 0.027880785233754136, "learning_rate": 0.0003519270354337495, "loss": 0.48, "step": 25060 }, { "epoch": 1.2724930512368164, "grad_norm": 0.02194127139960059, "learning_rate": 0.0003517154610621149, "loss": 0.4868, "step": 25065 }, { "epoch": 1.272746887334848, "grad_norm": 0.023243161860758137, "learning_rate": 0.0003515039157979959, "loss": 0.4901, "step": 25070 }, { "epoch": 1.2730007234328795, "grad_norm": 0.0375654084513337, "learning_rate": 0.0003512923996829176, "loss": 0.4824, "step": 25075 }, { "epoch": 1.2732545595309108, "grad_norm": 0.02148030589323779, "learning_rate": 0.0003510809127583997, "loss": 0.4985, "step": 25080 }, { "epoch": 1.2735083956289424, "grad_norm": 0.02366581081516137, "learning_rate": 0.0003508694550659559, "loss": 0.4889, "step": 25085 }, { "epoch": 1.273762231726974, "grad_norm": 0.026078232012127944, "learning_rate": 0.00035065802664709426, "loss": 0.5308, "step": 25090 }, { "epoch": 1.2740160678250054, "grad_norm": 0.025296448194318342, "learning_rate": 0.00035044662754331736, "loss": 0.4917, "step": 25095 }, { "epoch": 1.274269903923037, "grad_norm": 0.032298444356296446, "learning_rate": 0.00035023525779612165, "loss": 0.4935, "step": 25100 }, { "epoch": 1.2745237400210683, "grad_norm": 0.02559527128726925, "learning_rate": 0.0003500239174469979, "loss": 0.502, "step": 25105 }, { "epoch": 1.2747775761190998, "grad_norm": 0.024979128795998007, "learning_rate": 0.0003498126065374313, "loss": 0.4913, "step": 25110 }, { "epoch": 1.2750314122171313, "grad_norm": 0.025909934373053247, "learning_rate": 0.00034960132510890096, "loss": 0.4648, "step": 25115 }, { "epoch": 1.2752852483151629, "grad_norm": 0.02298436546813651, "learning_rate": 0.0003493900732028806, "loss": 0.5013, "step": 25120 }, { "epoch": 1.2755390844131944, "grad_norm": 0.02056212672636294, "learning_rate": 0.0003491788508608377, "loss": 0.4907, "step": 25125 }, { "epoch": 1.275792920511226, "grad_norm": 0.021393425271120866, "learning_rate": 0.00034896765812423425, "loss": 0.4743, "step": 25130 }, { "epoch": 1.2760467566092575, "grad_norm": 0.02799046607865763, "learning_rate": 0.00034875649503452626, "loss": 0.5046, "step": 25135 }, { "epoch": 1.276300592707289, "grad_norm": 0.021557706427491538, "learning_rate": 0.0003485453616331641, "loss": 0.445, "step": 25140 }, { "epoch": 1.2765544288053203, "grad_norm": 0.035754348141408966, "learning_rate": 0.00034833425796159214, "loss": 0.4767, "step": 25145 }, { "epoch": 1.2768082649033519, "grad_norm": 0.022260297154022193, "learning_rate": 0.00034812318406124876, "loss": 0.4998, "step": 25150 }, { "epoch": 1.2770621010013834, "grad_norm": 0.020517704282148764, "learning_rate": 0.0003479121399735672, "loss": 0.4705, "step": 25155 }, { "epoch": 1.277315937099415, "grad_norm": 0.023421817797892777, "learning_rate": 0.00034770112573997405, "loss": 0.486, "step": 25160 }, { "epoch": 1.2775697731974465, "grad_norm": 0.03614632807366431, "learning_rate": 0.0003474901414018904, "loss": 0.5111, "step": 25165 }, { "epoch": 1.2778236092954778, "grad_norm": 0.05816272517086025, "learning_rate": 0.00034727918700073145, "loss": 0.4814, "step": 25170 }, { "epoch": 1.2780774453935093, "grad_norm": 0.025050111858038723, "learning_rate": 0.0003470682625779065, "loss": 0.5197, "step": 25175 }, { "epoch": 1.2783312814915408, "grad_norm": 0.03165688938324348, "learning_rate": 0.0003468573681748188, "loss": 0.4827, "step": 25180 }, { "epoch": 1.2785851175895724, "grad_norm": 0.019624471813982844, "learning_rate": 0.00034664650383286615, "loss": 0.4922, "step": 25185 }, { "epoch": 1.278838953687604, "grad_norm": 0.021835974079306434, "learning_rate": 0.00034643566959343997, "loss": 0.5077, "step": 25190 }, { "epoch": 1.2790927897856355, "grad_norm": 0.021070204790334, "learning_rate": 0.0003462248654979261, "loss": 0.4665, "step": 25195 }, { "epoch": 1.279346625883667, "grad_norm": 0.04029683650164615, "learning_rate": 0.0003460140915877041, "loss": 0.4625, "step": 25200 }, { "epoch": 1.2796004619816985, "grad_norm": 0.02203877571856326, "learning_rate": 0.00034580334790414814, "loss": 0.4589, "step": 25205 }, { "epoch": 1.2798542980797298, "grad_norm": 0.03135309516087329, "learning_rate": 0.0003455926344886259, "loss": 0.4735, "step": 25210 }, { "epoch": 1.2801081341777614, "grad_norm": 0.02096327734711526, "learning_rate": 0.0003453819513824995, "loss": 0.4874, "step": 25215 }, { "epoch": 1.280361970275793, "grad_norm": 0.022733042961183666, "learning_rate": 0.00034517129862712506, "loss": 0.4795, "step": 25220 }, { "epoch": 1.2806158063738244, "grad_norm": 0.02232352087148914, "learning_rate": 0.00034496067626385254, "loss": 0.4843, "step": 25225 }, { "epoch": 1.280869642471856, "grad_norm": 0.021657090158753636, "learning_rate": 0.000344750084334026, "loss": 0.4864, "step": 25230 }, { "epoch": 1.2811234785698873, "grad_norm": 0.02829516889946878, "learning_rate": 0.00034453952287898375, "loss": 0.4628, "step": 25235 }, { "epoch": 1.2813773146679188, "grad_norm": 0.02189023360857141, "learning_rate": 0.0003443289919400579, "loss": 0.4704, "step": 25240 }, { "epoch": 1.2816311507659504, "grad_norm": 0.02205365383036312, "learning_rate": 0.0003441184915585746, "loss": 0.4757, "step": 25245 }, { "epoch": 1.281884986863982, "grad_norm": 0.03272625160720233, "learning_rate": 0.000343908021775854, "loss": 0.462, "step": 25250 }, { "epoch": 1.2821388229620134, "grad_norm": 0.02110097107472652, "learning_rate": 0.00034369758263321025, "loss": 0.4615, "step": 25255 }, { "epoch": 1.282392659060045, "grad_norm": 0.02123030495394618, "learning_rate": 0.0003434871741719516, "loss": 0.4795, "step": 25260 }, { "epoch": 1.2826464951580765, "grad_norm": 0.02373251352589659, "learning_rate": 0.0003432767964333802, "loss": 0.4727, "step": 25265 }, { "epoch": 1.282900331256108, "grad_norm": 0.03378365804202472, "learning_rate": 0.00034306644945879174, "loss": 0.4789, "step": 25270 }, { "epoch": 1.2831541673541396, "grad_norm": 0.023436130695674143, "learning_rate": 0.0003428561332894769, "loss": 0.4851, "step": 25275 }, { "epoch": 1.2834080034521709, "grad_norm": 0.025875198122374644, "learning_rate": 0.0003426458479667194, "loss": 0.4709, "step": 25280 }, { "epoch": 1.2836618395502024, "grad_norm": 0.030081526490696734, "learning_rate": 0.00034243559353179726, "loss": 0.4867, "step": 25285 }, { "epoch": 1.283915675648234, "grad_norm": 0.021009383912915302, "learning_rate": 0.00034222537002598233, "loss": 0.4919, "step": 25290 }, { "epoch": 1.2841695117462655, "grad_norm": 0.020722639706099787, "learning_rate": 0.00034201517749054037, "loss": 0.4786, "step": 25295 }, { "epoch": 1.284423347844297, "grad_norm": 0.02511803417211432, "learning_rate": 0.0003418050159667313, "loss": 0.4895, "step": 25300 }, { "epoch": 1.2846771839423283, "grad_norm": 0.023199730249595922, "learning_rate": 0.00034159488549580865, "loss": 0.4975, "step": 25305 }, { "epoch": 1.2849310200403599, "grad_norm": 0.022681672373907304, "learning_rate": 0.00034138478611902, "loss": 0.4618, "step": 25310 }, { "epoch": 1.2851848561383914, "grad_norm": 0.025437230351334297, "learning_rate": 0.0003411747178776068, "loss": 0.6043, "step": 25315 }, { "epoch": 1.285438692236423, "grad_norm": 0.02517031394643481, "learning_rate": 0.00034096468081280443, "loss": 0.4689, "step": 25320 }, { "epoch": 1.2856925283344545, "grad_norm": 0.0255164084090977, "learning_rate": 0.00034075467496584214, "loss": 0.4664, "step": 25325 }, { "epoch": 1.285946364432486, "grad_norm": 0.024254877277534864, "learning_rate": 0.00034054470037794284, "loss": 0.4927, "step": 25330 }, { "epoch": 1.2862002005305175, "grad_norm": 0.02399484634796993, "learning_rate": 0.0003403347570903238, "loss": 0.4992, "step": 25335 }, { "epoch": 1.286454036628549, "grad_norm": 0.022617445238938865, "learning_rate": 0.0003401248451441957, "loss": 0.4784, "step": 25340 }, { "epoch": 1.2867078727265804, "grad_norm": 0.02702146861655969, "learning_rate": 0.0003399149645807632, "loss": 0.4879, "step": 25345 }, { "epoch": 1.286961708824612, "grad_norm": 0.021698144351784757, "learning_rate": 0.00033970511544122476, "loss": 0.5057, "step": 25350 }, { "epoch": 1.2872155449226435, "grad_norm": 0.02835164720141961, "learning_rate": 0.0003394952977667728, "loss": 0.4854, "step": 25355 }, { "epoch": 1.287469381020675, "grad_norm": 0.023058090063842972, "learning_rate": 0.0003392855115985935, "loss": 0.5309, "step": 25360 }, { "epoch": 1.2877232171187065, "grad_norm": 0.043240874010317586, "learning_rate": 0.00033907575697786677, "loss": 0.4942, "step": 25365 }, { "epoch": 1.2879770532167378, "grad_norm": 0.030301182776510747, "learning_rate": 0.0003388660339457664, "loss": 0.4814, "step": 25370 }, { "epoch": 1.2882308893147694, "grad_norm": 0.037066055185106375, "learning_rate": 0.00033865634254345996, "loss": 0.4831, "step": 25375 }, { "epoch": 1.288484725412801, "grad_norm": 0.031338071085445056, "learning_rate": 0.0003384466828121089, "loss": 0.508, "step": 25380 }, { "epoch": 1.2887385615108324, "grad_norm": 0.02436415302207937, "learning_rate": 0.0003382370547928683, "loss": 0.4708, "step": 25385 }, { "epoch": 1.288992397608864, "grad_norm": 0.028312170689583957, "learning_rate": 0.000338027458526887, "loss": 0.4962, "step": 25390 }, { "epoch": 1.2892462337068955, "grad_norm": 0.02167851043165668, "learning_rate": 0.00033781789405530794, "loss": 0.4667, "step": 25395 }, { "epoch": 1.289500069804927, "grad_norm": 0.023856335129568706, "learning_rate": 0.00033760836141926754, "loss": 0.4736, "step": 25400 }, { "epoch": 1.2897539059029586, "grad_norm": 0.028628411965523105, "learning_rate": 0.000337398860659896, "loss": 0.5157, "step": 25405 }, { "epoch": 1.2900077420009899, "grad_norm": 0.021708828812962416, "learning_rate": 0.0003371893918183171, "loss": 0.4964, "step": 25410 }, { "epoch": 1.2902615780990214, "grad_norm": 0.02198643320874413, "learning_rate": 0.0003369799549356487, "loss": 0.4799, "step": 25415 }, { "epoch": 1.290515414197053, "grad_norm": 0.02453087273669778, "learning_rate": 0.00033677055005300224, "loss": 0.4635, "step": 25420 }, { "epoch": 1.2907692502950845, "grad_norm": 0.026346186269380012, "learning_rate": 0.0003365611772114827, "loss": 0.5175, "step": 25425 }, { "epoch": 1.291023086393116, "grad_norm": 0.029201729373243494, "learning_rate": 0.000336351836452189, "loss": 0.5042, "step": 25430 }, { "epoch": 1.2912769224911473, "grad_norm": 0.020057883724721514, "learning_rate": 0.00033614252781621374, "loss": 0.4929, "step": 25435 }, { "epoch": 1.2915307585891789, "grad_norm": 0.023365071609751025, "learning_rate": 0.0003359332513446431, "loss": 0.4701, "step": 25440 }, { "epoch": 1.2917845946872104, "grad_norm": 0.029195703341591574, "learning_rate": 0.000335724007078557, "loss": 0.4679, "step": 25445 }, { "epoch": 1.292038430785242, "grad_norm": 0.024061801809005346, "learning_rate": 0.0003355147950590291, "loss": 0.4735, "step": 25450 }, { "epoch": 1.2922922668832735, "grad_norm": 0.020717453665263782, "learning_rate": 0.00033530561532712653, "loss": 0.5058, "step": 25455 }, { "epoch": 1.292546102981305, "grad_norm": 0.02955683642722223, "learning_rate": 0.00033509646792391045, "loss": 0.4869, "step": 25460 }, { "epoch": 1.2927999390793365, "grad_norm": 0.022595682726649052, "learning_rate": 0.0003348873528904353, "loss": 0.4827, "step": 25465 }, { "epoch": 1.293053775177368, "grad_norm": 0.029808289348049386, "learning_rate": 0.0003346782702677494, "loss": 0.496, "step": 25470 }, { "epoch": 1.2933076112753994, "grad_norm": 0.02274550903922533, "learning_rate": 0.0003344692200968946, "loss": 0.4972, "step": 25475 }, { "epoch": 1.293561447373431, "grad_norm": 0.041851383405792156, "learning_rate": 0.00033426020241890636, "loss": 0.4798, "step": 25480 }, { "epoch": 1.2938152834714625, "grad_norm": 0.033587120320584446, "learning_rate": 0.00033405121727481384, "loss": 0.4889, "step": 25485 }, { "epoch": 1.294069119569494, "grad_norm": 0.037859152680054645, "learning_rate": 0.00033384226470563983, "loss": 0.492, "step": 25490 }, { "epoch": 1.2943229556675255, "grad_norm": 0.028727700779199155, "learning_rate": 0.0003336333447524006, "loss": 0.5041, "step": 25495 }, { "epoch": 1.2945767917655568, "grad_norm": 0.025080840121736163, "learning_rate": 0.0003334244574561061, "loss": 0.5099, "step": 25500 }, { "epoch": 1.2948306278635884, "grad_norm": 0.02222522508564205, "learning_rate": 0.0003332156028577599, "loss": 0.5108, "step": 25505 }, { "epoch": 1.29508446396162, "grad_norm": 0.037804332629990035, "learning_rate": 0.00033300678099835914, "loss": 0.472, "step": 25510 }, { "epoch": 1.2953383000596514, "grad_norm": 0.022133420759234763, "learning_rate": 0.00033279799191889426, "loss": 0.4965, "step": 25515 }, { "epoch": 1.295592136157683, "grad_norm": 0.027447234503632068, "learning_rate": 0.00033258923566034995, "loss": 0.4852, "step": 25520 }, { "epoch": 1.2958459722557145, "grad_norm": 0.026292192328524953, "learning_rate": 0.0003323805122637038, "loss": 0.4646, "step": 25525 }, { "epoch": 1.296099808353746, "grad_norm": 0.02694952032736674, "learning_rate": 0.0003321718217699271, "loss": 0.4964, "step": 25530 }, { "epoch": 1.2963536444517776, "grad_norm": 0.0217904749033225, "learning_rate": 0.00033196316421998495, "loss": 0.5027, "step": 25535 }, { "epoch": 1.2966074805498091, "grad_norm": 0.021851950907082737, "learning_rate": 0.0003317545396548356, "loss": 0.4986, "step": 25540 }, { "epoch": 1.2968613166478404, "grad_norm": 0.023510352499926397, "learning_rate": 0.00033154594811543104, "loss": 0.4833, "step": 25545 }, { "epoch": 1.297115152745872, "grad_norm": 0.024076269555153962, "learning_rate": 0.00033133738964271687, "loss": 0.4734, "step": 25550 }, { "epoch": 1.2973689888439035, "grad_norm": 0.022988360192355953, "learning_rate": 0.00033112886427763197, "loss": 0.5029, "step": 25555 }, { "epoch": 1.297622824941935, "grad_norm": 0.02164944305735075, "learning_rate": 0.0003309203720611088, "loss": 0.5109, "step": 25560 }, { "epoch": 1.2978766610399666, "grad_norm": 0.038596290229903046, "learning_rate": 0.00033071191303407345, "loss": 0.4938, "step": 25565 }, { "epoch": 1.2981304971379979, "grad_norm": 0.024119758300361713, "learning_rate": 0.00033050348723744527, "loss": 0.4897, "step": 25570 }, { "epoch": 1.2983843332360294, "grad_norm": 0.02587506880218007, "learning_rate": 0.00033029509471213726, "loss": 0.505, "step": 25575 }, { "epoch": 1.298638169334061, "grad_norm": 0.02275936565570734, "learning_rate": 0.00033008673549905586, "loss": 0.5145, "step": 25580 }, { "epoch": 1.2988920054320925, "grad_norm": 0.023072141573578203, "learning_rate": 0.000329878409639101, "loss": 0.4862, "step": 25585 }, { "epoch": 1.299145841530124, "grad_norm": 0.021274802226077427, "learning_rate": 0.00032967011717316587, "loss": 0.4965, "step": 25590 }, { "epoch": 1.2993996776281556, "grad_norm": 0.030502737103929357, "learning_rate": 0.00032946185814213734, "loss": 0.5204, "step": 25595 }, { "epoch": 1.299653513726187, "grad_norm": 0.020252746392252694, "learning_rate": 0.00032925363258689557, "loss": 0.479, "step": 25600 }, { "epoch": 1.2999073498242186, "grad_norm": 0.021662085892109512, "learning_rate": 0.0003290454405483142, "loss": 0.4786, "step": 25605 }, { "epoch": 1.30016118592225, "grad_norm": 0.030512365258254626, "learning_rate": 0.00032883728206726035, "loss": 0.4816, "step": 25610 }, { "epoch": 1.3004150220202815, "grad_norm": 0.02419501135859568, "learning_rate": 0.00032862915718459443, "loss": 0.4659, "step": 25615 }, { "epoch": 1.300668858118313, "grad_norm": 0.02205521149147181, "learning_rate": 0.0003284210659411703, "loss": 0.4919, "step": 25620 }, { "epoch": 1.3009226942163445, "grad_norm": 0.03676879072186463, "learning_rate": 0.0003282130083778352, "loss": 0.4887, "step": 25625 }, { "epoch": 1.301176530314376, "grad_norm": 0.02352083834323318, "learning_rate": 0.0003280049845354299, "loss": 0.4994, "step": 25630 }, { "epoch": 1.3014303664124074, "grad_norm": 0.02443911005181168, "learning_rate": 0.00032779699445478826, "loss": 0.4826, "step": 25635 }, { "epoch": 1.301684202510439, "grad_norm": 0.02197560170718096, "learning_rate": 0.000327589038176738, "loss": 0.4615, "step": 25640 }, { "epoch": 1.3019380386084705, "grad_norm": 0.022748104689422538, "learning_rate": 0.00032738111574209973, "loss": 0.4801, "step": 25645 }, { "epoch": 1.302191874706502, "grad_norm": 0.02023037758904707, "learning_rate": 0.0003271732271916876, "loss": 0.4661, "step": 25650 }, { "epoch": 1.3024457108045335, "grad_norm": 0.029780248485063505, "learning_rate": 0.0003269653725663091, "loss": 0.5154, "step": 25655 }, { "epoch": 1.302699546902565, "grad_norm": 0.022291598091070952, "learning_rate": 0.000326757551906765, "loss": 0.5173, "step": 25660 }, { "epoch": 1.3029533830005966, "grad_norm": 0.022819230833246173, "learning_rate": 0.00032654976525384947, "loss": 0.4833, "step": 25665 }, { "epoch": 1.3032072190986281, "grad_norm": 0.02715749696309478, "learning_rate": 0.0003263420126483501, "loss": 0.4879, "step": 25670 }, { "epoch": 1.3034610551966594, "grad_norm": 0.023854247107745894, "learning_rate": 0.0003261342941310476, "loss": 0.473, "step": 25675 }, { "epoch": 1.303714891294691, "grad_norm": 0.022155878634300286, "learning_rate": 0.00032592660974271615, "loss": 0.505, "step": 25680 }, { "epoch": 1.3039687273927225, "grad_norm": 0.02398677758854186, "learning_rate": 0.000325718959524123, "loss": 0.5156, "step": 25685 }, { "epoch": 1.304222563490754, "grad_norm": 0.03172998468913407, "learning_rate": 0.000325511343516029, "loss": 0.488, "step": 25690 }, { "epoch": 1.3044763995887856, "grad_norm": 0.03394625530167979, "learning_rate": 0.00032530376175918794, "loss": 0.4604, "step": 25695 }, { "epoch": 1.304730235686817, "grad_norm": 0.02947534576412719, "learning_rate": 0.00032509621429434744, "loss": 0.4747, "step": 25700 }, { "epoch": 1.3049840717848484, "grad_norm": 0.03537345478451157, "learning_rate": 0.0003248887011622478, "loss": 0.4933, "step": 25705 }, { "epoch": 1.30523790788288, "grad_norm": 0.022484576203938268, "learning_rate": 0.00032468122240362287, "loss": 0.5131, "step": 25710 }, { "epoch": 1.3054917439809115, "grad_norm": 0.03394847342947618, "learning_rate": 0.00032447377805919957, "loss": 0.4996, "step": 25715 }, { "epoch": 1.305745580078943, "grad_norm": 0.022258649273689368, "learning_rate": 0.00032426636816969837, "loss": 0.4864, "step": 25720 }, { "epoch": 1.3059994161769746, "grad_norm": 0.028081809394511537, "learning_rate": 0.0003240589927758327, "loss": 0.4942, "step": 25725 }, { "epoch": 1.306253252275006, "grad_norm": 0.02227139522287654, "learning_rate": 0.0003238516519183093, "loss": 0.5094, "step": 25730 }, { "epoch": 1.3065070883730376, "grad_norm": 0.03070685559989823, "learning_rate": 0.0003236443456378282, "loss": 0.4944, "step": 25735 }, { "epoch": 1.306760924471069, "grad_norm": 0.04102837340472807, "learning_rate": 0.0003234370739750826, "loss": 0.458, "step": 25740 }, { "epoch": 1.3070147605691005, "grad_norm": 0.021808995007741704, "learning_rate": 0.00032322983697075883, "loss": 0.4717, "step": 25745 }, { "epoch": 1.307268596667132, "grad_norm": 0.023349246400971222, "learning_rate": 0.0003230226346655365, "loss": 0.4855, "step": 25750 }, { "epoch": 1.3075224327651636, "grad_norm": 0.023103164390614456, "learning_rate": 0.0003228154671000882, "loss": 0.4876, "step": 25755 }, { "epoch": 1.307776268863195, "grad_norm": 0.024319382145632857, "learning_rate": 0.0003226083343150803, "loss": 0.4753, "step": 25760 }, { "epoch": 1.3080301049612264, "grad_norm": 0.02350220796453745, "learning_rate": 0.0003224012363511717, "loss": 0.4565, "step": 25765 }, { "epoch": 1.308283941059258, "grad_norm": 0.023084276590446533, "learning_rate": 0.0003221941732490148, "loss": 0.4742, "step": 25770 }, { "epoch": 1.3085377771572895, "grad_norm": 0.023561783980489238, "learning_rate": 0.00032198714504925487, "loss": 0.4818, "step": 25775 }, { "epoch": 1.308791613255321, "grad_norm": 0.024018962472135568, "learning_rate": 0.0003217801517925307, "loss": 0.454, "step": 25780 }, { "epoch": 1.3090454493533525, "grad_norm": 0.02133114101515424, "learning_rate": 0.0003215731935194739, "loss": 0.4503, "step": 25785 }, { "epoch": 1.309299285451384, "grad_norm": 0.026035923942215537, "learning_rate": 0.0003213662702707094, "loss": 0.5116, "step": 25790 }, { "epoch": 1.3095531215494156, "grad_norm": 0.022735126112242227, "learning_rate": 0.00032115938208685527, "loss": 0.4965, "step": 25795 }, { "epoch": 1.3098069576474471, "grad_norm": 0.023075574740689275, "learning_rate": 0.0003209525290085226, "loss": 0.4858, "step": 25800 }, { "epoch": 1.3100607937454787, "grad_norm": 0.02157939323367077, "learning_rate": 0.00032074571107631544, "loss": 0.465, "step": 25805 }, { "epoch": 1.31031462984351, "grad_norm": 0.02922405968847422, "learning_rate": 0.0003205389283308313, "loss": 0.4925, "step": 25810 }, { "epoch": 1.3105684659415415, "grad_norm": 0.022879840515332046, "learning_rate": 0.0003203321808126604, "loss": 0.4703, "step": 25815 }, { "epoch": 1.310822302039573, "grad_norm": 0.023144556467176263, "learning_rate": 0.0003201254685623866, "loss": 0.4869, "step": 25820 }, { "epoch": 1.3110761381376046, "grad_norm": 0.02520401264527816, "learning_rate": 0.00031991879162058623, "loss": 0.4813, "step": 25825 }, { "epoch": 1.3113299742356361, "grad_norm": 0.02170783835021678, "learning_rate": 0.00031971215002782907, "loss": 0.4802, "step": 25830 }, { "epoch": 1.3115838103336674, "grad_norm": 0.028491353202148535, "learning_rate": 0.00031950554382467766, "loss": 0.4757, "step": 25835 }, { "epoch": 1.311837646431699, "grad_norm": 0.02918056571262906, "learning_rate": 0.000319298973051688, "loss": 0.4895, "step": 25840 }, { "epoch": 1.3120914825297305, "grad_norm": 0.03246670924753988, "learning_rate": 0.00031909243774940865, "loss": 0.5045, "step": 25845 }, { "epoch": 1.312345318627762, "grad_norm": 0.04517489478201621, "learning_rate": 0.0003188859379583816, "loss": 0.49, "step": 25850 }, { "epoch": 1.3125991547257936, "grad_norm": 0.025048916079119958, "learning_rate": 0.0003186794737191418, "loss": 0.4813, "step": 25855 }, { "epoch": 1.3128529908238251, "grad_norm": 0.02702368574587235, "learning_rate": 0.000318473045072217, "loss": 0.4809, "step": 25860 }, { "epoch": 1.3131068269218567, "grad_norm": 0.02451499381789933, "learning_rate": 0.00031826665205812824, "loss": 0.4777, "step": 25865 }, { "epoch": 1.3133606630198882, "grad_norm": 0.027600609156731175, "learning_rate": 0.00031806029471738933, "loss": 0.4788, "step": 25870 }, { "epoch": 1.3136144991179195, "grad_norm": 0.03131877820295245, "learning_rate": 0.000317853973090507, "loss": 0.4761, "step": 25875 }, { "epoch": 1.313868335215951, "grad_norm": 0.027123485696514793, "learning_rate": 0.00031764768721798163, "loss": 0.4727, "step": 25880 }, { "epoch": 1.3141221713139826, "grad_norm": 0.02108556651131552, "learning_rate": 0.00031744143714030606, "loss": 0.4948, "step": 25885 }, { "epoch": 1.314376007412014, "grad_norm": 0.02128012590382479, "learning_rate": 0.00031723522289796573, "loss": 0.4942, "step": 25890 }, { "epoch": 1.3146298435100456, "grad_norm": 0.021709980768361257, "learning_rate": 0.00031702904453143976, "loss": 0.4826, "step": 25895 }, { "epoch": 1.314883679608077, "grad_norm": 0.023018801114703516, "learning_rate": 0.0003168229020811999, "loss": 0.4924, "step": 25900 }, { "epoch": 1.3151375157061085, "grad_norm": 0.024999798230040154, "learning_rate": 0.00031661679558771076, "loss": 0.4843, "step": 25905 }, { "epoch": 1.31539135180414, "grad_norm": 0.02466258211924113, "learning_rate": 0.0003164107250914302, "loss": 0.5157, "step": 25910 }, { "epoch": 1.3156451879021716, "grad_norm": 0.03808085877482315, "learning_rate": 0.0003162046906328087, "loss": 0.4982, "step": 25915 }, { "epoch": 1.315899024000203, "grad_norm": 0.022915850153156146, "learning_rate": 0.0003159986922522899, "loss": 0.4911, "step": 25920 }, { "epoch": 1.3161528600982346, "grad_norm": 0.029411931313225212, "learning_rate": 0.0003157927299903102, "loss": 0.5142, "step": 25925 }, { "epoch": 1.3164066961962662, "grad_norm": 0.022508290380115467, "learning_rate": 0.0003155868038872989, "loss": 0.4781, "step": 25930 }, { "epoch": 1.3166605322942977, "grad_norm": 0.02367130521025921, "learning_rate": 0.0003153809139836781, "loss": 0.5034, "step": 25935 }, { "epoch": 1.316914368392329, "grad_norm": 0.02370837844837251, "learning_rate": 0.0003151750603198634, "loss": 0.4825, "step": 25940 }, { "epoch": 1.3171682044903605, "grad_norm": 0.022145644887770835, "learning_rate": 0.0003149692429362627, "loss": 0.4711, "step": 25945 }, { "epoch": 1.317422040588392, "grad_norm": 0.030152956850519705, "learning_rate": 0.00031476346187327684, "loss": 0.4648, "step": 25950 }, { "epoch": 1.3176758766864236, "grad_norm": 0.027631145783320436, "learning_rate": 0.0003145577171712997, "loss": 0.4992, "step": 25955 }, { "epoch": 1.3179297127844551, "grad_norm": 0.028341925435365112, "learning_rate": 0.00031435200887071786, "loss": 0.4761, "step": 25960 }, { "epoch": 1.3181835488824865, "grad_norm": 0.026884867627421965, "learning_rate": 0.0003141463370119108, "loss": 0.4643, "step": 25965 }, { "epoch": 1.318437384980518, "grad_norm": 0.022874705393107205, "learning_rate": 0.00031394070163525095, "loss": 0.4786, "step": 25970 }, { "epoch": 1.3186912210785495, "grad_norm": 0.027134145494403333, "learning_rate": 0.0003137351027811035, "loss": 0.503, "step": 25975 }, { "epoch": 1.318945057176581, "grad_norm": 0.024479146918958583, "learning_rate": 0.0003135295404898265, "loss": 0.4983, "step": 25980 }, { "epoch": 1.3191988932746126, "grad_norm": 0.02172267954854436, "learning_rate": 0.00031332401480177073, "loss": 0.4721, "step": 25985 }, { "epoch": 1.3194527293726441, "grad_norm": 0.025255869240494836, "learning_rate": 0.0003131185257572799, "loss": 0.4665, "step": 25990 }, { "epoch": 1.3197065654706757, "grad_norm": 0.023915003608357185, "learning_rate": 0.0003129130733966904, "loss": 0.5186, "step": 25995 }, { "epoch": 1.3199604015687072, "grad_norm": 0.038739561505479345, "learning_rate": 0.00031270765776033173, "loss": 0.4665, "step": 26000 }, { "epoch": 1.3202142376667385, "grad_norm": 0.02087696169338501, "learning_rate": 0.00031250227888852576, "loss": 0.4838, "step": 26005 }, { "epoch": 1.32046807376477, "grad_norm": 0.024862811416681813, "learning_rate": 0.0003122969368215874, "loss": 0.4861, "step": 26010 }, { "epoch": 1.3207219098628016, "grad_norm": 0.019972495995757008, "learning_rate": 0.0003120916315998243, "loss": 0.4681, "step": 26015 }, { "epoch": 1.3209757459608331, "grad_norm": 0.026081632082320026, "learning_rate": 0.0003118863632635368, "loss": 0.4591, "step": 26020 }, { "epoch": 1.3212295820588646, "grad_norm": 0.03877882882655485, "learning_rate": 0.00031168113185301815, "loss": 0.5011, "step": 26025 }, { "epoch": 1.321483418156896, "grad_norm": 0.035565096811897436, "learning_rate": 0.00031147593740855407, "loss": 0.4884, "step": 26030 }, { "epoch": 1.3217372542549275, "grad_norm": 0.032673275953771695, "learning_rate": 0.00031127077997042336, "loss": 0.4688, "step": 26035 }, { "epoch": 1.321991090352959, "grad_norm": 0.02225124249672015, "learning_rate": 0.0003110656595788973, "loss": 0.5164, "step": 26040 }, { "epoch": 1.3222449264509906, "grad_norm": 0.027980878312804384, "learning_rate": 0.0003108605762742401, "loss": 0.4872, "step": 26045 }, { "epoch": 1.322498762549022, "grad_norm": 0.025276827695637313, "learning_rate": 0.00031065553009670857, "loss": 0.4903, "step": 26050 }, { "epoch": 1.3227525986470536, "grad_norm": 0.028706939574934476, "learning_rate": 0.00031045052108655193, "loss": 0.5026, "step": 26055 }, { "epoch": 1.3230064347450852, "grad_norm": 0.027941893356288494, "learning_rate": 0.0003102455492840129, "loss": 0.4932, "step": 26060 }, { "epoch": 1.3232602708431167, "grad_norm": 0.026634652222519675, "learning_rate": 0.00031004061472932634, "loss": 0.5158, "step": 26065 }, { "epoch": 1.3235141069411482, "grad_norm": 0.03025644762717795, "learning_rate": 0.00030983571746271977, "loss": 0.4733, "step": 26070 }, { "epoch": 1.3237679430391796, "grad_norm": 0.02398881393021072, "learning_rate": 0.0003096308575244135, "loss": 0.4784, "step": 26075 }, { "epoch": 1.324021779137211, "grad_norm": 0.023325566374430225, "learning_rate": 0.00030942603495462054, "loss": 0.501, "step": 26080 }, { "epoch": 1.3242756152352426, "grad_norm": 0.02552948646532719, "learning_rate": 0.0003092212497935465, "loss": 0.4894, "step": 26085 }, { "epoch": 1.3245294513332742, "grad_norm": 0.03612792972246537, "learning_rate": 0.0003090165020813897, "loss": 0.4688, "step": 26090 }, { "epoch": 1.3247832874313055, "grad_norm": 0.028878202348219067, "learning_rate": 0.00030881179185834114, "loss": 0.5304, "step": 26095 }, { "epoch": 1.325037123529337, "grad_norm": 0.020583251018650484, "learning_rate": 0.0003086071191645844, "loss": 0.4639, "step": 26100 }, { "epoch": 1.3252909596273685, "grad_norm": 0.023744717612926514, "learning_rate": 0.00030840248404029563, "loss": 0.4727, "step": 26105 }, { "epoch": 1.3255447957254, "grad_norm": 0.024384417847231648, "learning_rate": 0.00030819788652564377, "loss": 0.4774, "step": 26110 }, { "epoch": 1.3257986318234316, "grad_norm": 0.029828723552213644, "learning_rate": 0.00030799332666079016, "loss": 0.4778, "step": 26115 }, { "epoch": 1.3260524679214631, "grad_norm": 0.0277189472799926, "learning_rate": 0.0003077888044858891, "loss": 0.4987, "step": 26120 }, { "epoch": 1.3263063040194947, "grad_norm": 0.021616997214167058, "learning_rate": 0.00030758432004108723, "loss": 0.491, "step": 26125 }, { "epoch": 1.3265601401175262, "grad_norm": 0.03097966201237373, "learning_rate": 0.0003073798733665237, "loss": 0.4743, "step": 26130 }, { "epoch": 1.3268139762155577, "grad_norm": 0.03448839917044529, "learning_rate": 0.00030717546450233045, "loss": 0.5102, "step": 26135 }, { "epoch": 1.327067812313589, "grad_norm": 0.027797346027596648, "learning_rate": 0.0003069710934886319, "loss": 0.4805, "step": 26140 }, { "epoch": 1.3273216484116206, "grad_norm": 0.023865639312762033, "learning_rate": 0.0003067667603655451, "loss": 0.4738, "step": 26145 }, { "epoch": 1.3275754845096521, "grad_norm": 0.022994781293125738, "learning_rate": 0.0003065624651731795, "loss": 0.4604, "step": 26150 }, { "epoch": 1.3278293206076837, "grad_norm": 0.02452841527289796, "learning_rate": 0.00030635820795163737, "loss": 0.495, "step": 26155 }, { "epoch": 1.3280831567057152, "grad_norm": 0.025166790867288878, "learning_rate": 0.0003061539887410133, "loss": 0.4653, "step": 26160 }, { "epoch": 1.3283369928037465, "grad_norm": 0.025844039701185498, "learning_rate": 0.0003059498075813946, "loss": 0.474, "step": 26165 }, { "epoch": 1.328590828901778, "grad_norm": 0.022922263461315293, "learning_rate": 0.0003057456645128609, "loss": 0.5229, "step": 26170 }, { "epoch": 1.3288446649998096, "grad_norm": 0.028735220280624686, "learning_rate": 0.00030554155957548425, "loss": 0.4895, "step": 26175 }, { "epoch": 1.3290985010978411, "grad_norm": 0.02399502885581283, "learning_rate": 0.00030533749280933, "loss": 0.4898, "step": 26180 }, { "epoch": 1.3293523371958726, "grad_norm": 0.027489214240756413, "learning_rate": 0.0003051334642544551, "loss": 0.4868, "step": 26185 }, { "epoch": 1.3296061732939042, "grad_norm": 0.022807932982948913, "learning_rate": 0.0003049294739509093, "loss": 0.4645, "step": 26190 }, { "epoch": 1.3298600093919357, "grad_norm": 0.02364717870718042, "learning_rate": 0.00030472552193873506, "loss": 0.4874, "step": 26195 }, { "epoch": 1.3301138454899673, "grad_norm": 0.02421526146734168, "learning_rate": 0.0003045216082579669, "loss": 0.4753, "step": 26200 }, { "epoch": 1.3303676815879986, "grad_norm": 0.023753864278555786, "learning_rate": 0.0003043177329486323, "loss": 0.4974, "step": 26205 }, { "epoch": 1.33062151768603, "grad_norm": 0.02405794761573703, "learning_rate": 0.0003041138960507508, "loss": 0.5051, "step": 26210 }, { "epoch": 1.3308753537840616, "grad_norm": 0.022815391635180565, "learning_rate": 0.0003039100976043346, "loss": 0.491, "step": 26215 }, { "epoch": 1.3311291898820932, "grad_norm": 0.022343572537838263, "learning_rate": 0.0003037063376493884, "loss": 0.4743, "step": 26220 }, { "epoch": 1.3313830259801247, "grad_norm": 0.022723276658171112, "learning_rate": 0.00030350261622590926, "loss": 0.4531, "step": 26225 }, { "epoch": 1.331636862078156, "grad_norm": 0.023454158002978998, "learning_rate": 0.0003032989333738865, "loss": 0.4808, "step": 26230 }, { "epoch": 1.3318906981761875, "grad_norm": 0.023075690824087954, "learning_rate": 0.0003030952891333021, "loss": 0.4672, "step": 26235 }, { "epoch": 1.332144534274219, "grad_norm": 0.024106778177103005, "learning_rate": 0.00030289168354413065, "loss": 0.4942, "step": 26240 }, { "epoch": 1.3323983703722506, "grad_norm": 0.025151670342148585, "learning_rate": 0.00030268811664633865, "loss": 0.4927, "step": 26245 }, { "epoch": 1.3326522064702822, "grad_norm": 0.030957786516116338, "learning_rate": 0.0003024845884798855, "loss": 0.4773, "step": 26250 }, { "epoch": 1.3329060425683137, "grad_norm": 0.025925112474195305, "learning_rate": 0.00030228109908472247, "loss": 0.4611, "step": 26255 }, { "epoch": 1.3331598786663452, "grad_norm": 0.019884408833520717, "learning_rate": 0.00030207764850079374, "loss": 0.4644, "step": 26260 }, { "epoch": 1.3334137147643768, "grad_norm": 0.025171919728835243, "learning_rate": 0.00030187423676803556, "loss": 0.4628, "step": 26265 }, { "epoch": 1.333667550862408, "grad_norm": 0.02155136741752636, "learning_rate": 0.00030167086392637665, "loss": 0.4931, "step": 26270 }, { "epoch": 1.3339213869604396, "grad_norm": 0.03331276638766642, "learning_rate": 0.0003014675300157381, "loss": 0.4617, "step": 26275 }, { "epoch": 1.3341752230584711, "grad_norm": 0.026091056559288128, "learning_rate": 0.00030126423507603327, "loss": 0.4691, "step": 26280 }, { "epoch": 1.3344290591565027, "grad_norm": 0.021480786402548066, "learning_rate": 0.00030106097914716804, "loss": 0.4798, "step": 26285 }, { "epoch": 1.3346828952545342, "grad_norm": 0.026115011721068254, "learning_rate": 0.0003008577622690405, "loss": 0.4778, "step": 26290 }, { "epoch": 1.3349367313525655, "grad_norm": 0.029188603624534938, "learning_rate": 0.00030065458448154094, "loss": 0.4956, "step": 26295 }, { "epoch": 1.335190567450597, "grad_norm": 0.02343770485301329, "learning_rate": 0.0003004514458245525, "loss": 0.5235, "step": 26300 }, { "epoch": 1.3354444035486286, "grad_norm": 0.023629932549987128, "learning_rate": 0.00030024834633795005, "loss": 0.4652, "step": 26305 }, { "epoch": 1.3356982396466601, "grad_norm": 0.02116981769281469, "learning_rate": 0.0003000452860616011, "loss": 0.4936, "step": 26310 }, { "epoch": 1.3359520757446917, "grad_norm": 0.019859957952761565, "learning_rate": 0.00029984226503536527, "loss": 0.4681, "step": 26315 }, { "epoch": 1.3362059118427232, "grad_norm": 0.025886391465037177, "learning_rate": 0.0002996392832990946, "loss": 0.4803, "step": 26320 }, { "epoch": 1.3364597479407547, "grad_norm": 0.02594128632014247, "learning_rate": 0.00029943634089263355, "loss": 0.4873, "step": 26325 }, { "epoch": 1.3367135840387863, "grad_norm": 0.03408346447284297, "learning_rate": 0.0002992334378558185, "loss": 0.48, "step": 26330 }, { "epoch": 1.3369674201368176, "grad_norm": 0.03841155039452, "learning_rate": 0.00029903057422847834, "loss": 0.4702, "step": 26335 }, { "epoch": 1.337221256234849, "grad_norm": 0.022888290658989296, "learning_rate": 0.0002988277500504343, "loss": 0.4388, "step": 26340 }, { "epoch": 1.3374750923328806, "grad_norm": 0.03122320066551133, "learning_rate": 0.00029862496536149966, "loss": 0.4969, "step": 26345 }, { "epoch": 1.3377289284309122, "grad_norm": 0.03769099264555599, "learning_rate": 0.00029842222020148, "loss": 0.489, "step": 26350 }, { "epoch": 1.3379827645289437, "grad_norm": 0.029431216873870947, "learning_rate": 0.0002982195146101734, "loss": 0.5069, "step": 26355 }, { "epoch": 1.338236600626975, "grad_norm": 0.024178396636187073, "learning_rate": 0.00029801684862736956, "loss": 0.4598, "step": 26360 }, { "epoch": 1.3384904367250066, "grad_norm": 0.027229237482082266, "learning_rate": 0.0002978142222928512, "loss": 0.4881, "step": 26365 }, { "epoch": 1.338744272823038, "grad_norm": 0.02518085523341514, "learning_rate": 0.0002976116356463927, "loss": 0.4734, "step": 26370 }, { "epoch": 1.3389981089210696, "grad_norm": 0.026449318799886906, "learning_rate": 0.00029740908872776087, "loss": 0.4857, "step": 26375 }, { "epoch": 1.3392519450191012, "grad_norm": 0.02736453220212371, "learning_rate": 0.00029720658157671447, "loss": 0.4952, "step": 26380 }, { "epoch": 1.3395057811171327, "grad_norm": 0.02011376143672255, "learning_rate": 0.0002970041142330049, "loss": 0.475, "step": 26385 }, { "epoch": 1.3397596172151642, "grad_norm": 0.022297115519452224, "learning_rate": 0.0002968016867363753, "loss": 0.4881, "step": 26390 }, { "epoch": 1.3400134533131958, "grad_norm": 0.028308862230847293, "learning_rate": 0.00029659929912656123, "loss": 0.4707, "step": 26395 }, { "epoch": 1.3402672894112273, "grad_norm": 0.02418795190807018, "learning_rate": 0.0002963969514432904, "loss": 0.472, "step": 26400 }, { "epoch": 1.3405211255092586, "grad_norm": 0.03161928209152755, "learning_rate": 0.0002961946437262827, "loss": 0.4715, "step": 26405 }, { "epoch": 1.3407749616072901, "grad_norm": 0.03500499761857729, "learning_rate": 0.00029599237601525, "loss": 0.4636, "step": 26410 }, { "epoch": 1.3410287977053217, "grad_norm": 0.03348168345879801, "learning_rate": 0.00029579014834989653, "loss": 0.4979, "step": 26415 }, { "epoch": 1.3412826338033532, "grad_norm": 0.029165985289788657, "learning_rate": 0.00029558796076991836, "loss": 0.5169, "step": 26420 }, { "epoch": 1.3415364699013848, "grad_norm": 0.026140837166966736, "learning_rate": 0.00029538581331500427, "loss": 0.4786, "step": 26425 }, { "epoch": 1.341790305999416, "grad_norm": 0.031152081282308442, "learning_rate": 0.0002951837060248346, "loss": 0.4906, "step": 26430 }, { "epoch": 1.3420441420974476, "grad_norm": 0.036089699561631966, "learning_rate": 0.000294981638939082, "loss": 0.4527, "step": 26435 }, { "epoch": 1.3422979781954791, "grad_norm": 0.029379508239414574, "learning_rate": 0.0002947796120974113, "loss": 0.4576, "step": 26440 }, { "epoch": 1.3425518142935107, "grad_norm": 0.026574119568228798, "learning_rate": 0.0002945776255394793, "loss": 0.487, "step": 26445 }, { "epoch": 1.3428056503915422, "grad_norm": 0.02274745690514462, "learning_rate": 0.00029437567930493493, "loss": 0.4661, "step": 26450 }, { "epoch": 1.3430594864895737, "grad_norm": 0.021614065812495135, "learning_rate": 0.0002941737734334193, "loss": 0.496, "step": 26455 }, { "epoch": 1.3433133225876053, "grad_norm": 0.022482586999432463, "learning_rate": 0.00029397190796456553, "loss": 0.4844, "step": 26460 }, { "epoch": 1.3435671586856368, "grad_norm": 0.022116336028821203, "learning_rate": 0.00029377008293799865, "loss": 0.52, "step": 26465 }, { "epoch": 1.3438209947836681, "grad_norm": 0.028349507842694705, "learning_rate": 0.00029356829839333615, "loss": 0.4871, "step": 26470 }, { "epoch": 1.3440748308816997, "grad_norm": 0.022089539699478508, "learning_rate": 0.0002933665543701871, "loss": 0.4943, "step": 26475 }, { "epoch": 1.3443286669797312, "grad_norm": 0.026039145829479762, "learning_rate": 0.0002931648509081529, "loss": 0.502, "step": 26480 }, { "epoch": 1.3445825030777627, "grad_norm": 0.028120685619677774, "learning_rate": 0.0002929631880468271, "loss": 0.4956, "step": 26485 }, { "epoch": 1.3448363391757943, "grad_norm": 0.02097286192345862, "learning_rate": 0.000292761565825795, "loss": 0.4729, "step": 26490 }, { "epoch": 1.3450901752738256, "grad_norm": 0.027620567755231737, "learning_rate": 0.000292559984284634, "loss": 0.5011, "step": 26495 }, { "epoch": 1.345344011371857, "grad_norm": 0.022184671610469525, "learning_rate": 0.0002923584434629136, "loss": 0.4848, "step": 26500 }, { "epoch": 1.3455978474698886, "grad_norm": 0.027060057039042365, "learning_rate": 0.0002921569434001952, "loss": 0.446, "step": 26505 }, { "epoch": 1.3458516835679202, "grad_norm": 0.022283856953366245, "learning_rate": 0.00029195548413603236, "loss": 0.4637, "step": 26510 }, { "epoch": 1.3461055196659517, "grad_norm": 0.02142821011081133, "learning_rate": 0.0002917540657099703, "loss": 0.4999, "step": 26515 }, { "epoch": 1.3463593557639832, "grad_norm": 0.03140709448224681, "learning_rate": 0.0002915526881615469, "loss": 0.5199, "step": 26520 }, { "epoch": 1.3466131918620148, "grad_norm": 0.02193762571699132, "learning_rate": 0.000291351351530291, "loss": 0.5037, "step": 26525 }, { "epoch": 1.3468670279600463, "grad_norm": 0.024860792301576635, "learning_rate": 0.0002911500558557245, "loss": 0.4918, "step": 26530 }, { "epoch": 1.3471208640580776, "grad_norm": 0.02411891556809714, "learning_rate": 0.0002909488011773603, "loss": 0.48, "step": 26535 }, { "epoch": 1.3473747001561092, "grad_norm": 0.02334451935870803, "learning_rate": 0.000290747587534704, "loss": 0.5067, "step": 26540 }, { "epoch": 1.3476285362541407, "grad_norm": 0.033294686217091356, "learning_rate": 0.00029054641496725276, "loss": 0.533, "step": 26545 }, { "epoch": 1.3478823723521722, "grad_norm": 0.021364746646164178, "learning_rate": 0.00029034528351449564, "loss": 0.4972, "step": 26550 }, { "epoch": 1.3481362084502038, "grad_norm": 0.022844057569169195, "learning_rate": 0.00029014419321591396, "loss": 0.4839, "step": 26555 }, { "epoch": 1.348390044548235, "grad_norm": 0.021739041832296423, "learning_rate": 0.00028994314411098044, "loss": 0.4683, "step": 26560 }, { "epoch": 1.3486438806462666, "grad_norm": 0.022308989962987445, "learning_rate": 0.00028974213623916037, "loss": 0.4752, "step": 26565 }, { "epoch": 1.3488977167442981, "grad_norm": 0.022250254963704218, "learning_rate": 0.0002895411696399102, "loss": 0.5067, "step": 26570 }, { "epoch": 1.3491515528423297, "grad_norm": 0.02323169518050859, "learning_rate": 0.000289340244352679, "loss": 0.4824, "step": 26575 }, { "epoch": 1.3494053889403612, "grad_norm": 0.021304787629742503, "learning_rate": 0.00028913936041690715, "loss": 0.4832, "step": 26580 }, { "epoch": 1.3496592250383928, "grad_norm": 0.021154944633414424, "learning_rate": 0.00028893851787202746, "loss": 0.496, "step": 26585 }, { "epoch": 1.3499130611364243, "grad_norm": 0.023742114405543396, "learning_rate": 0.00028873771675746394, "loss": 0.4846, "step": 26590 }, { "epoch": 1.3501668972344558, "grad_norm": 0.02312754487239046, "learning_rate": 0.0002885369571126333, "loss": 0.4775, "step": 26595 }, { "epoch": 1.3504207333324871, "grad_norm": 0.026159799755686802, "learning_rate": 0.000288336238976943, "loss": 0.5369, "step": 26600 }, { "epoch": 1.3506745694305187, "grad_norm": 0.022936903925025648, "learning_rate": 0.00028813556238979377, "loss": 0.5077, "step": 26605 }, { "epoch": 1.3509284055285502, "grad_norm": 0.023814084119461076, "learning_rate": 0.000287934927390577, "loss": 0.4698, "step": 26610 }, { "epoch": 1.3511822416265817, "grad_norm": 0.02246864872428122, "learning_rate": 0.0002877343340186765, "loss": 0.482, "step": 26615 }, { "epoch": 1.3514360777246133, "grad_norm": 0.02288321097342424, "learning_rate": 0.0002875337823134675, "loss": 0.5256, "step": 26620 }, { "epoch": 1.3516899138226446, "grad_norm": 0.0224047370327657, "learning_rate": 0.0002873332723143177, "loss": 0.5193, "step": 26625 }, { "epoch": 1.3519437499206761, "grad_norm": 0.025203167907080704, "learning_rate": 0.00028713280406058575, "loss": 0.484, "step": 26630 }, { "epoch": 1.3521975860187077, "grad_norm": 0.020720560042865592, "learning_rate": 0.00028693237759162295, "loss": 0.4852, "step": 26635 }, { "epoch": 1.3524514221167392, "grad_norm": 0.021481598559433032, "learning_rate": 0.0002867319929467717, "loss": 0.5115, "step": 26640 }, { "epoch": 1.3527052582147707, "grad_norm": 0.022428252435808805, "learning_rate": 0.0002865316501653669, "loss": 0.5024, "step": 26645 }, { "epoch": 1.3529590943128023, "grad_norm": 0.02233749023643697, "learning_rate": 0.0002863313492867344, "loss": 0.476, "step": 26650 }, { "epoch": 1.3532129304108338, "grad_norm": 0.03464327071026982, "learning_rate": 0.0002861310903501926, "loss": 0.5206, "step": 26655 }, { "epoch": 1.3534667665088653, "grad_norm": 0.02542767368623182, "learning_rate": 0.0002859308733950511, "loss": 0.4785, "step": 26660 }, { "epoch": 1.3537206026068969, "grad_norm": 0.021672839236436318, "learning_rate": 0.0002857306984606115, "loss": 0.4777, "step": 26665 }, { "epoch": 1.3539744387049282, "grad_norm": 0.021627780540600057, "learning_rate": 0.0002855305655861675, "loss": 0.4673, "step": 26670 }, { "epoch": 1.3542282748029597, "grad_norm": 0.02290542274140012, "learning_rate": 0.0002853304748110037, "loss": 0.4879, "step": 26675 }, { "epoch": 1.3544821109009912, "grad_norm": 0.024948536996439105, "learning_rate": 0.00028513042617439734, "loss": 0.4777, "step": 26680 }, { "epoch": 1.3547359469990228, "grad_norm": 0.029359162803752524, "learning_rate": 0.0002849304197156166, "loss": 0.5148, "step": 26685 }, { "epoch": 1.3549897830970543, "grad_norm": 0.023302687409291374, "learning_rate": 0.00028473045547392205, "loss": 0.482, "step": 26690 }, { "epoch": 1.3552436191950856, "grad_norm": 0.025299148598432575, "learning_rate": 0.0002845305334885654, "loss": 0.51, "step": 26695 }, { "epoch": 1.3554974552931172, "grad_norm": 0.02516023607604002, "learning_rate": 0.0002843306537987906, "loss": 0.4369, "step": 26700 }, { "epoch": 1.3557512913911487, "grad_norm": 0.022672487670563547, "learning_rate": 0.00028413081644383285, "loss": 0.4883, "step": 26705 }, { "epoch": 1.3560051274891802, "grad_norm": 0.02464631035571871, "learning_rate": 0.0002839310214629194, "loss": 0.5134, "step": 26710 }, { "epoch": 1.3562589635872118, "grad_norm": 0.021789322301810896, "learning_rate": 0.00028373126889526875, "loss": 0.4862, "step": 26715 }, { "epoch": 1.3565127996852433, "grad_norm": 0.021183030059470056, "learning_rate": 0.0002835315587800914, "loss": 0.4785, "step": 26720 }, { "epoch": 1.3567666357832748, "grad_norm": 0.02477154579365883, "learning_rate": 0.00028333189115658966, "loss": 0.4824, "step": 26725 }, { "epoch": 1.3570204718813064, "grad_norm": 0.023028212328728488, "learning_rate": 0.0002831322660639573, "loss": 0.5141, "step": 26730 }, { "epoch": 1.3572743079793377, "grad_norm": 0.03275446295412662, "learning_rate": 0.0002829326835413794, "loss": 0.4805, "step": 26735 }, { "epoch": 1.3575281440773692, "grad_norm": 0.02501122701564825, "learning_rate": 0.00028273314362803337, "loss": 0.4855, "step": 26740 }, { "epoch": 1.3577819801754007, "grad_norm": 0.02328137101092054, "learning_rate": 0.0002825336463630875, "loss": 0.4999, "step": 26745 }, { "epoch": 1.3580358162734323, "grad_norm": 0.026605008466514972, "learning_rate": 0.0002823341917857027, "loss": 0.4587, "step": 26750 }, { "epoch": 1.3582896523714638, "grad_norm": 0.023150456810902106, "learning_rate": 0.0002821347799350302, "loss": 0.4723, "step": 26755 }, { "epoch": 1.3585434884694951, "grad_norm": 0.02251804410714866, "learning_rate": 0.00028193541085021423, "loss": 0.501, "step": 26760 }, { "epoch": 1.3587973245675267, "grad_norm": 0.029335610284536826, "learning_rate": 0.00028173608457038936, "loss": 0.5047, "step": 26765 }, { "epoch": 1.3590511606655582, "grad_norm": 0.022188907618175026, "learning_rate": 0.0002815368011346828, "loss": 0.4884, "step": 26770 }, { "epoch": 1.3593049967635897, "grad_norm": 0.021239459035322466, "learning_rate": 0.00028133756058221253, "loss": 0.4655, "step": 26775 }, { "epoch": 1.3595588328616213, "grad_norm": 0.022946805788639644, "learning_rate": 0.0002811383629520887, "loss": 0.4871, "step": 26780 }, { "epoch": 1.3598126689596528, "grad_norm": 0.023284296559063916, "learning_rate": 0.0002809392082834129, "loss": 0.4965, "step": 26785 }, { "epoch": 1.3600665050576843, "grad_norm": 0.028648125257779124, "learning_rate": 0.0002807400966152778, "loss": 0.4915, "step": 26790 }, { "epoch": 1.3603203411557159, "grad_norm": 0.02302269054921378, "learning_rate": 0.0002805410279867686, "loss": 0.459, "step": 26795 }, { "epoch": 1.3605741772537472, "grad_norm": 0.030405240018795764, "learning_rate": 0.0002803420024369609, "loss": 0.4316, "step": 26800 }, { "epoch": 1.3608280133517787, "grad_norm": 0.030800198523568283, "learning_rate": 0.00028014302000492285, "loss": 0.4752, "step": 26805 }, { "epoch": 1.3610818494498103, "grad_norm": 0.02475384060074233, "learning_rate": 0.00027994408072971346, "loss": 0.4718, "step": 26810 }, { "epoch": 1.3613356855478418, "grad_norm": 0.021700406170621297, "learning_rate": 0.0002797451846503837, "loss": 0.4708, "step": 26815 }, { "epoch": 1.3615895216458733, "grad_norm": 0.021235969952080012, "learning_rate": 0.00027954633180597564, "loss": 0.495, "step": 26820 }, { "epoch": 1.3618433577439046, "grad_norm": 0.024028737162551318, "learning_rate": 0.00027934752223552343, "loss": 0.4563, "step": 26825 }, { "epoch": 1.3620971938419362, "grad_norm": 0.024465335502800833, "learning_rate": 0.0002791487559780521, "loss": 0.4787, "step": 26830 }, { "epoch": 1.3623510299399677, "grad_norm": 0.022491243446900795, "learning_rate": 0.00027895003307257867, "loss": 0.4787, "step": 26835 }, { "epoch": 1.3626048660379992, "grad_norm": 0.021133211476665054, "learning_rate": 0.000278751353558111, "loss": 0.4931, "step": 26840 }, { "epoch": 1.3628587021360308, "grad_norm": 0.02049442999020091, "learning_rate": 0.00027855271747364966, "loss": 0.4676, "step": 26845 }, { "epoch": 1.3631125382340623, "grad_norm": 0.08243543665279841, "learning_rate": 0.00027835412485818534, "loss": 0.4618, "step": 26850 }, { "epoch": 1.3633663743320938, "grad_norm": 0.03544850817167854, "learning_rate": 0.00027815557575070117, "loss": 0.4583, "step": 26855 }, { "epoch": 1.3636202104301254, "grad_norm": 0.020365646726838955, "learning_rate": 0.0002779570701901709, "loss": 0.4487, "step": 26860 }, { "epoch": 1.3638740465281567, "grad_norm": 0.02299060460650672, "learning_rate": 0.0002777586082155607, "loss": 0.5215, "step": 26865 }, { "epoch": 1.3641278826261882, "grad_norm": 0.02293172060847605, "learning_rate": 0.00027756018986582715, "loss": 0.4658, "step": 26870 }, { "epoch": 1.3643817187242198, "grad_norm": 0.024224482941596973, "learning_rate": 0.00027736181517991923, "loss": 0.4582, "step": 26875 }, { "epoch": 1.3646355548222513, "grad_norm": 0.02013654142332293, "learning_rate": 0.0002771634841967767, "loss": 0.5103, "step": 26880 }, { "epoch": 1.3648893909202828, "grad_norm": 0.022624447257192006, "learning_rate": 0.00027696519695533074, "loss": 0.5027, "step": 26885 }, { "epoch": 1.3651432270183141, "grad_norm": 0.023533731523000142, "learning_rate": 0.00027676695349450456, "loss": 0.4481, "step": 26890 }, { "epoch": 1.3653970631163457, "grad_norm": 0.023179531374566843, "learning_rate": 0.0002765687538532119, "loss": 0.4483, "step": 26895 }, { "epoch": 1.3656508992143772, "grad_norm": 0.024281101734940795, "learning_rate": 0.0002763705980703586, "loss": 0.4812, "step": 26900 }, { "epoch": 1.3659047353124087, "grad_norm": 0.026283033797865986, "learning_rate": 0.0002761724861848417, "loss": 0.491, "step": 26905 }, { "epoch": 1.3661585714104403, "grad_norm": 0.02323522667080857, "learning_rate": 0.0002759744182355498, "loss": 0.4649, "step": 26910 }, { "epoch": 1.3664124075084718, "grad_norm": 0.021854167824604177, "learning_rate": 0.00027577639426136204, "loss": 0.5056, "step": 26915 }, { "epoch": 1.3666662436065034, "grad_norm": 0.023141909554007044, "learning_rate": 0.00027557841430115015, "loss": 0.4835, "step": 26920 }, { "epoch": 1.3669200797045349, "grad_norm": 0.025826171421759428, "learning_rate": 0.0002753804783937762, "loss": 0.5056, "step": 26925 }, { "epoch": 1.3671739158025664, "grad_norm": 0.022488613024276922, "learning_rate": 0.0002751825865780943, "loss": 0.4655, "step": 26930 }, { "epoch": 1.3674277519005977, "grad_norm": 0.023927541727324465, "learning_rate": 0.0002749847388929493, "loss": 0.4865, "step": 26935 }, { "epoch": 1.3676815879986293, "grad_norm": 0.022655438670871445, "learning_rate": 0.0002747869353771781, "loss": 0.4848, "step": 26940 }, { "epoch": 1.3679354240966608, "grad_norm": 0.022064396845660302, "learning_rate": 0.0002745891760696082, "loss": 0.5048, "step": 26945 }, { "epoch": 1.3681892601946923, "grad_norm": 0.026679064333024728, "learning_rate": 0.0002743914610090591, "loss": 0.4827, "step": 26950 }, { "epoch": 1.3684430962927239, "grad_norm": 0.027571108833935626, "learning_rate": 0.0002741937902343409, "loss": 0.4845, "step": 26955 }, { "epoch": 1.3686969323907552, "grad_norm": 0.025110754378475025, "learning_rate": 0.0002739961637842555, "loss": 0.4623, "step": 26960 }, { "epoch": 1.3689507684887867, "grad_norm": 0.0203989644652357, "learning_rate": 0.0002737985816975963, "loss": 0.5092, "step": 26965 }, { "epoch": 1.3692046045868183, "grad_norm": 0.02200896596387883, "learning_rate": 0.00027360104401314735, "loss": 0.4924, "step": 26970 }, { "epoch": 1.3694584406848498, "grad_norm": 0.027141315110277984, "learning_rate": 0.0002734035507696845, "loss": 0.4874, "step": 26975 }, { "epoch": 1.3697122767828813, "grad_norm": 0.02848772652258403, "learning_rate": 0.0002732061020059745, "loss": 0.5233, "step": 26980 }, { "epoch": 1.3699661128809129, "grad_norm": 0.022711189059726297, "learning_rate": 0.00027300869776077574, "loss": 0.5153, "step": 26985 }, { "epoch": 1.3702199489789444, "grad_norm": 0.023955919803751433, "learning_rate": 0.0002728113380728375, "loss": 0.4977, "step": 26990 }, { "epoch": 1.370473785076976, "grad_norm": 0.028659928908923184, "learning_rate": 0.0002726140229809008, "loss": 0.515, "step": 26995 }, { "epoch": 1.3707276211750072, "grad_norm": 0.02508581003225146, "learning_rate": 0.00027241675252369715, "loss": 0.4582, "step": 27000 }, { "epoch": 1.3709814572730388, "grad_norm": 0.021436214718563537, "learning_rate": 0.0002722195267399502, "loss": 0.4612, "step": 27005 }, { "epoch": 1.3712352933710703, "grad_norm": 0.02201171442218941, "learning_rate": 0.00027202234566837415, "loss": 0.4995, "step": 27010 }, { "epoch": 1.3714891294691018, "grad_norm": 0.02526187332817724, "learning_rate": 0.0002718252093476748, "loss": 0.4808, "step": 27015 }, { "epoch": 1.3717429655671334, "grad_norm": 0.020953262767424676, "learning_rate": 0.0002716281178165486, "loss": 0.4999, "step": 27020 }, { "epoch": 1.3719968016651647, "grad_norm": 0.022858740436538003, "learning_rate": 0.00027143107111368437, "loss": 0.5066, "step": 27025 }, { "epoch": 1.3722506377631962, "grad_norm": 0.030228302005709733, "learning_rate": 0.00027123406927776085, "loss": 0.4618, "step": 27030 }, { "epoch": 1.3725044738612278, "grad_norm": 0.021247447506756632, "learning_rate": 0.0002710371123474488, "loss": 0.4838, "step": 27035 }, { "epoch": 1.3727583099592593, "grad_norm": 0.029048074402705838, "learning_rate": 0.00027084020036140965, "loss": 0.4537, "step": 27040 }, { "epoch": 1.3730121460572908, "grad_norm": 0.020897232940356406, "learning_rate": 0.00027064333335829647, "loss": 0.4661, "step": 27045 }, { "epoch": 1.3732659821553224, "grad_norm": 0.024458604155040357, "learning_rate": 0.00027044651137675304, "loss": 0.4854, "step": 27050 }, { "epoch": 1.373519818253354, "grad_norm": 0.021452148822171557, "learning_rate": 0.00027024973445541475, "loss": 0.4756, "step": 27055 }, { "epoch": 1.3737736543513854, "grad_norm": 0.02638487068742648, "learning_rate": 0.00027005300263290764, "loss": 0.5049, "step": 27060 }, { "epoch": 1.3740274904494167, "grad_norm": 0.030588138064835756, "learning_rate": 0.00026985631594784966, "loss": 0.4904, "step": 27065 }, { "epoch": 1.3742813265474483, "grad_norm": 0.021448138541180863, "learning_rate": 0.0002696596744388488, "loss": 0.4687, "step": 27070 }, { "epoch": 1.3745351626454798, "grad_norm": 0.0220331732837399, "learning_rate": 0.0002694630781445054, "loss": 0.5052, "step": 27075 }, { "epoch": 1.3747889987435113, "grad_norm": 0.021848289466681034, "learning_rate": 0.0002692665271034099, "loss": 0.4823, "step": 27080 }, { "epoch": 1.3750428348415429, "grad_norm": 0.024123987652464134, "learning_rate": 0.00026907002135414447, "loss": 0.4641, "step": 27085 }, { "epoch": 1.3752966709395742, "grad_norm": 0.020535646841589324, "learning_rate": 0.00026887356093528237, "loss": 0.4648, "step": 27090 }, { "epoch": 1.3755505070376057, "grad_norm": 0.02584026244252551, "learning_rate": 0.00026867714588538747, "loss": 0.5047, "step": 27095 }, { "epoch": 1.3758043431356373, "grad_norm": 0.02367328497332991, "learning_rate": 0.00026848077624301537, "loss": 0.4565, "step": 27100 }, { "epoch": 1.3760581792336688, "grad_norm": 0.031745274615003184, "learning_rate": 0.00026828445204671216, "loss": 0.4953, "step": 27105 }, { "epoch": 1.3763120153317003, "grad_norm": 0.029581043724073622, "learning_rate": 0.0002680881733350156, "loss": 0.4612, "step": 27110 }, { "epoch": 1.3765658514297319, "grad_norm": 0.025514285969248228, "learning_rate": 0.0002678919401464539, "loss": 0.4811, "step": 27115 }, { "epoch": 1.3768196875277634, "grad_norm": 0.023296903129213998, "learning_rate": 0.00026769575251954703, "loss": 0.5023, "step": 27120 }, { "epoch": 1.377073523625795, "grad_norm": 0.02227739230704745, "learning_rate": 0.00026749961049280527, "loss": 0.4709, "step": 27125 }, { "epoch": 1.3773273597238263, "grad_norm": 0.023504231198353757, "learning_rate": 0.0002673035141047306, "loss": 0.4861, "step": 27130 }, { "epoch": 1.3775811958218578, "grad_norm": 0.02229075980390175, "learning_rate": 0.0002671074633938156, "loss": 0.4808, "step": 27135 }, { "epoch": 1.3778350319198893, "grad_norm": 0.024181315546383808, "learning_rate": 0.00026691145839854405, "loss": 0.4894, "step": 27140 }, { "epoch": 1.3780888680179209, "grad_norm": 0.02532811062819618, "learning_rate": 0.00026671549915739076, "loss": 0.472, "step": 27145 }, { "epoch": 1.3783427041159524, "grad_norm": 0.021472103926306933, "learning_rate": 0.0002665195857088218, "loss": 0.4636, "step": 27150 }, { "epoch": 1.3785965402139837, "grad_norm": 0.028858062964877782, "learning_rate": 0.0002663237180912936, "loss": 0.4687, "step": 27155 }, { "epoch": 1.3788503763120152, "grad_norm": 0.022973674462700357, "learning_rate": 0.0002661278963432544, "loss": 0.4748, "step": 27160 }, { "epoch": 1.3791042124100468, "grad_norm": 0.024592376476516513, "learning_rate": 0.00026593212050314265, "loss": 0.4917, "step": 27165 }, { "epoch": 1.3793580485080783, "grad_norm": 0.026905170435774242, "learning_rate": 0.0002657363906093886, "loss": 0.461, "step": 27170 }, { "epoch": 1.3796118846061098, "grad_norm": 0.03238266587023234, "learning_rate": 0.0002655407067004125, "loss": 0.4955, "step": 27175 }, { "epoch": 1.3798657207041414, "grad_norm": 0.03437683326100972, "learning_rate": 0.00026534506881462674, "loss": 0.4659, "step": 27180 }, { "epoch": 1.380119556802173, "grad_norm": 0.030372754755464044, "learning_rate": 0.0002651494769904335, "loss": 0.4952, "step": 27185 }, { "epoch": 1.3803733929002044, "grad_norm": 0.029874387863824136, "learning_rate": 0.00026495393126622685, "loss": 0.4846, "step": 27190 }, { "epoch": 1.3806272289982358, "grad_norm": 0.0267554731125212, "learning_rate": 0.00026475843168039117, "loss": 0.4723, "step": 27195 }, { "epoch": 1.3808810650962673, "grad_norm": 0.020690270070420125, "learning_rate": 0.0002645629782713022, "loss": 0.4799, "step": 27200 }, { "epoch": 1.3811349011942988, "grad_norm": 0.02838109794666923, "learning_rate": 0.00026436757107732665, "loss": 0.4916, "step": 27205 }, { "epoch": 1.3813887372923304, "grad_norm": 0.02358589235650878, "learning_rate": 0.0002641722101368217, "loss": 0.4953, "step": 27210 }, { "epoch": 1.381642573390362, "grad_norm": 0.02842708954146116, "learning_rate": 0.000263976895488136, "loss": 0.5203, "step": 27215 }, { "epoch": 1.3818964094883932, "grad_norm": 0.0220954764308833, "learning_rate": 0.0002637816271696084, "loss": 0.4997, "step": 27220 }, { "epoch": 1.3821502455864247, "grad_norm": 0.031197550414260204, "learning_rate": 0.0002635864052195696, "loss": 0.5016, "step": 27225 }, { "epoch": 1.3824040816844563, "grad_norm": 0.02454468885248768, "learning_rate": 0.00026339122967634026, "loss": 0.499, "step": 27230 }, { "epoch": 1.3826579177824878, "grad_norm": 0.05778464299883465, "learning_rate": 0.0002631961005782328, "loss": 0.4341, "step": 27235 }, { "epoch": 1.3829117538805193, "grad_norm": 0.023244866350785674, "learning_rate": 0.00026300101796354966, "loss": 0.4571, "step": 27240 }, { "epoch": 1.3831655899785509, "grad_norm": 0.06533569452053184, "learning_rate": 0.0002628059818705849, "loss": 0.471, "step": 27245 }, { "epoch": 1.3834194260765824, "grad_norm": 0.026721629818407002, "learning_rate": 0.00026261099233762286, "loss": 0.448, "step": 27250 }, { "epoch": 1.383673262174614, "grad_norm": 0.023742554699451898, "learning_rate": 0.0002624160494029394, "loss": 0.4868, "step": 27255 }, { "epoch": 1.3839270982726455, "grad_norm": 0.028301790837454648, "learning_rate": 0.0002622211531048004, "loss": 0.4682, "step": 27260 }, { "epoch": 1.3841809343706768, "grad_norm": 0.025666697236186767, "learning_rate": 0.0002620263034814632, "loss": 0.4725, "step": 27265 }, { "epoch": 1.3844347704687083, "grad_norm": 0.02634404140327674, "learning_rate": 0.00026183150057117595, "loss": 0.4929, "step": 27270 }, { "epoch": 1.3846886065667399, "grad_norm": 0.025227814246512378, "learning_rate": 0.0002616367444121775, "loss": 0.4846, "step": 27275 }, { "epoch": 1.3849424426647714, "grad_norm": 0.028927076349626928, "learning_rate": 0.0002614420350426973, "loss": 0.4822, "step": 27280 }, { "epoch": 1.385196278762803, "grad_norm": 0.023294755705656967, "learning_rate": 0.00026124737250095596, "loss": 0.4622, "step": 27285 }, { "epoch": 1.3854501148608342, "grad_norm": 0.023578850146889208, "learning_rate": 0.0002610527568251647, "loss": 0.457, "step": 27290 }, { "epoch": 1.3857039509588658, "grad_norm": 0.021005770334938684, "learning_rate": 0.0002608581880535258, "loss": 0.4816, "step": 27295 }, { "epoch": 1.3859577870568973, "grad_norm": 0.022374150226942276, "learning_rate": 0.00026066366622423177, "loss": 0.4843, "step": 27300 }, { "epoch": 1.3862116231549289, "grad_norm": 0.027829807992719183, "learning_rate": 0.0002604691913754668, "loss": 0.4921, "step": 27305 }, { "epoch": 1.3864654592529604, "grad_norm": 0.021996056034670426, "learning_rate": 0.0002602747635454047, "loss": 0.5174, "step": 27310 }, { "epoch": 1.386719295350992, "grad_norm": 0.021135081168996664, "learning_rate": 0.00026008038277221127, "loss": 0.4732, "step": 27315 }, { "epoch": 1.3869731314490235, "grad_norm": 0.040553027428796726, "learning_rate": 0.0002598860490940419, "loss": 0.4869, "step": 27320 }, { "epoch": 1.387226967547055, "grad_norm": 0.02726713196600684, "learning_rate": 0.0002596917625490438, "loss": 0.4646, "step": 27325 }, { "epoch": 1.3874808036450863, "grad_norm": 0.030567319881294505, "learning_rate": 0.0002594975231753544, "loss": 0.4909, "step": 27330 }, { "epoch": 1.3877346397431178, "grad_norm": 0.02274111194508241, "learning_rate": 0.00025930333101110173, "loss": 0.476, "step": 27335 }, { "epoch": 1.3879884758411494, "grad_norm": 0.02044291002475312, "learning_rate": 0.0002591091860944049, "loss": 0.5066, "step": 27340 }, { "epoch": 1.388242311939181, "grad_norm": 0.028846129971471627, "learning_rate": 0.00025891508846337337, "loss": 0.4598, "step": 27345 }, { "epoch": 1.3884961480372124, "grad_norm": 0.024004903325607403, "learning_rate": 0.00025872103815610794, "loss": 0.4617, "step": 27350 }, { "epoch": 1.3887499841352438, "grad_norm": 0.02506170048370503, "learning_rate": 0.0002585270352106992, "loss": 0.4712, "step": 27355 }, { "epoch": 1.3890038202332753, "grad_norm": 0.02408913428393671, "learning_rate": 0.0002583330796652294, "loss": 0.4632, "step": 27360 }, { "epoch": 1.3892576563313068, "grad_norm": 0.022166652995046117, "learning_rate": 0.0002581391715577707, "loss": 0.4792, "step": 27365 }, { "epoch": 1.3895114924293384, "grad_norm": 0.034847220934961225, "learning_rate": 0.00025794531092638667, "loss": 0.4824, "step": 27370 }, { "epoch": 1.38976532852737, "grad_norm": 0.02419671012650832, "learning_rate": 0.0002577514978091308, "loss": 0.5055, "step": 27375 }, { "epoch": 1.3900191646254014, "grad_norm": 0.02417967296068721, "learning_rate": 0.000257557732244048, "loss": 0.4874, "step": 27380 }, { "epoch": 1.390273000723433, "grad_norm": 0.022712157728031215, "learning_rate": 0.00025736401426917286, "loss": 0.4702, "step": 27385 }, { "epoch": 1.3905268368214645, "grad_norm": 0.027626729533074428, "learning_rate": 0.0002571703439225322, "loss": 0.4904, "step": 27390 }, { "epoch": 1.3907806729194958, "grad_norm": 0.023010404248220272, "learning_rate": 0.00025697672124214176, "loss": 0.4923, "step": 27395 }, { "epoch": 1.3910345090175273, "grad_norm": 0.021117714170819323, "learning_rate": 0.00025678314626600924, "loss": 0.4616, "step": 27400 }, { "epoch": 1.3912883451155589, "grad_norm": 0.023323314097295, "learning_rate": 0.00025658961903213197, "loss": 0.4779, "step": 27405 }, { "epoch": 1.3915421812135904, "grad_norm": 0.03386308270675858, "learning_rate": 0.0002563961395784987, "loss": 0.4701, "step": 27410 }, { "epoch": 1.391796017311622, "grad_norm": 0.023072249782764793, "learning_rate": 0.0002562027079430883, "loss": 0.4628, "step": 27415 }, { "epoch": 1.3920498534096533, "grad_norm": 0.026329411232554476, "learning_rate": 0.0002560093241638707, "loss": 0.5117, "step": 27420 }, { "epoch": 1.3923036895076848, "grad_norm": 0.02564234053698023, "learning_rate": 0.00025581598827880575, "loss": 0.4713, "step": 27425 }, { "epoch": 1.3925575256057163, "grad_norm": 0.026068964054194645, "learning_rate": 0.0002556227003258448, "loss": 0.4654, "step": 27430 }, { "epoch": 1.3928113617037479, "grad_norm": 0.020125692575617058, "learning_rate": 0.0002554294603429288, "loss": 0.4917, "step": 27435 }, { "epoch": 1.3930651978017794, "grad_norm": 0.02566386436456717, "learning_rate": 0.0002552362683679903, "loss": 0.491, "step": 27440 }, { "epoch": 1.393319033899811, "grad_norm": 0.022596381640967816, "learning_rate": 0.0002550431244389515, "loss": 0.4924, "step": 27445 }, { "epoch": 1.3935728699978425, "grad_norm": 0.05194383248908799, "learning_rate": 0.00025485002859372574, "loss": 0.472, "step": 27450 }, { "epoch": 1.393826706095874, "grad_norm": 0.0222046493642593, "learning_rate": 0.00025465698087021705, "loss": 0.4746, "step": 27455 }, { "epoch": 1.3940805421939053, "grad_norm": 0.02186007738670582, "learning_rate": 0.0002544639813063193, "loss": 0.4409, "step": 27460 }, { "epoch": 1.3943343782919368, "grad_norm": 0.023367690449110124, "learning_rate": 0.0002542710299399177, "loss": 0.4573, "step": 27465 }, { "epoch": 1.3945882143899684, "grad_norm": 0.02272755185104169, "learning_rate": 0.00025407812680888726, "loss": 0.4495, "step": 27470 }, { "epoch": 1.394842050488, "grad_norm": 0.024020085751695088, "learning_rate": 0.0002538852719510943, "loss": 0.4779, "step": 27475 }, { "epoch": 1.3950958865860315, "grad_norm": 0.0218924915046459, "learning_rate": 0.00025369246540439495, "loss": 0.4737, "step": 27480 }, { "epoch": 1.3953497226840628, "grad_norm": 0.02276957102386517, "learning_rate": 0.00025349970720663653, "loss": 0.5073, "step": 27485 }, { "epoch": 1.3956035587820943, "grad_norm": 0.02384126314470508, "learning_rate": 0.000253306997395656, "loss": 0.4972, "step": 27490 }, { "epoch": 1.3958573948801258, "grad_norm": 0.03572583730853441, "learning_rate": 0.00025311433600928184, "loss": 0.4611, "step": 27495 }, { "epoch": 1.3961112309781574, "grad_norm": 0.7840304155881027, "learning_rate": 0.00025292172308533214, "loss": 0.4962, "step": 27500 }, { "epoch": 1.396365067076189, "grad_norm": 0.0463916296322995, "learning_rate": 0.000252729158661616, "loss": 0.4765, "step": 27505 }, { "epoch": 1.3966189031742204, "grad_norm": 0.05568946088292396, "learning_rate": 0.0002525366427759329, "loss": 0.4964, "step": 27510 }, { "epoch": 1.396872739272252, "grad_norm": 0.023057160898420713, "learning_rate": 0.00025234417546607293, "loss": 0.4928, "step": 27515 }, { "epoch": 1.3971265753702835, "grad_norm": 0.029288129095807625, "learning_rate": 0.000252151756769816, "loss": 0.4895, "step": 27520 }, { "epoch": 1.397380411468315, "grad_norm": 0.03340361260423104, "learning_rate": 0.00025195938672493344, "loss": 0.479, "step": 27525 }, { "epoch": 1.3976342475663464, "grad_norm": 0.03739192421428975, "learning_rate": 0.0002517670653691861, "loss": 0.4901, "step": 27530 }, { "epoch": 1.397888083664378, "grad_norm": 0.023015539091900444, "learning_rate": 0.0002515747927403261, "loss": 0.457, "step": 27535 }, { "epoch": 1.3981419197624094, "grad_norm": 0.024683773093099548, "learning_rate": 0.00025138256887609513, "loss": 0.4845, "step": 27540 }, { "epoch": 1.398395755860441, "grad_norm": 0.03275767392537276, "learning_rate": 0.0002511903938142263, "loss": 0.4641, "step": 27545 }, { "epoch": 1.3986495919584725, "grad_norm": 0.02455035950939294, "learning_rate": 0.0002509982675924421, "loss": 0.4641, "step": 27550 }, { "epoch": 1.3989034280565038, "grad_norm": 0.021707198486694775, "learning_rate": 0.00025080619024845643, "loss": 0.482, "step": 27555 }, { "epoch": 1.3991572641545353, "grad_norm": 0.022054949788400096, "learning_rate": 0.0002506141618199727, "loss": 0.4771, "step": 27560 }, { "epoch": 1.3994111002525669, "grad_norm": 0.023827476345478014, "learning_rate": 0.0002504221823446853, "loss": 0.4608, "step": 27565 }, { "epoch": 1.3996649363505984, "grad_norm": 0.028504264487398272, "learning_rate": 0.00025023025186027905, "loss": 0.4909, "step": 27570 }, { "epoch": 1.39991877244863, "grad_norm": 0.029743187667604372, "learning_rate": 0.0002500383704044286, "loss": 0.4695, "step": 27575 }, { "epoch": 1.4001726085466615, "grad_norm": 0.024573032907380145, "learning_rate": 0.00024984653801479967, "loss": 0.4768, "step": 27580 }, { "epoch": 1.400426444644693, "grad_norm": 0.04851132365758049, "learning_rate": 0.0002496547547290476, "loss": 0.4658, "step": 27585 }, { "epoch": 1.4006802807427245, "grad_norm": 0.03611595038168912, "learning_rate": 0.0002494630205848189, "loss": 0.4891, "step": 27590 }, { "epoch": 1.4009341168407559, "grad_norm": 0.020312909657943287, "learning_rate": 0.0002492713356197497, "loss": 0.4731, "step": 27595 }, { "epoch": 1.4011879529387874, "grad_norm": 0.02051178256643885, "learning_rate": 0.0002490796998714671, "loss": 0.4633, "step": 27600 }, { "epoch": 1.401441789036819, "grad_norm": 0.028805213882367668, "learning_rate": 0.0002488881133775878, "loss": 0.4689, "step": 27605 }, { "epoch": 1.4016956251348505, "grad_norm": 0.03456140699472926, "learning_rate": 0.00024869657617571984, "loss": 0.4732, "step": 27610 }, { "epoch": 1.401949461232882, "grad_norm": 0.02567267995131493, "learning_rate": 0.00024850508830346046, "loss": 0.4893, "step": 27615 }, { "epoch": 1.4022032973309133, "grad_norm": 0.027690736913673394, "learning_rate": 0.0002483136497983983, "loss": 0.4773, "step": 27620 }, { "epoch": 1.4024571334289448, "grad_norm": 0.02059800948274391, "learning_rate": 0.00024812226069811114, "loss": 0.4533, "step": 27625 }, { "epoch": 1.4027109695269764, "grad_norm": 0.022577872784147853, "learning_rate": 0.00024793092104016844, "loss": 0.4989, "step": 27630 }, { "epoch": 1.402964805625008, "grad_norm": 0.02564370891867458, "learning_rate": 0.00024773963086212867, "loss": 0.483, "step": 27635 }, { "epoch": 1.4032186417230395, "grad_norm": 0.022783404442849806, "learning_rate": 0.0002475483902015416, "loss": 0.4517, "step": 27640 }, { "epoch": 1.403472477821071, "grad_norm": 0.02130850865861562, "learning_rate": 0.00024735719909594635, "loss": 0.5042, "step": 27645 }, { "epoch": 1.4037263139191025, "grad_norm": 0.025642889265787623, "learning_rate": 0.00024716605758287315, "loss": 0.4993, "step": 27650 }, { "epoch": 1.403980150017134, "grad_norm": 0.022945742341174302, "learning_rate": 0.00024697496569984177, "loss": 0.4853, "step": 27655 }, { "epoch": 1.4042339861151654, "grad_norm": 0.022770860368356186, "learning_rate": 0.000246783923484363, "loss": 0.467, "step": 27660 }, { "epoch": 1.404487822213197, "grad_norm": 0.022660091235407458, "learning_rate": 0.0002465929309739371, "loss": 0.4698, "step": 27665 }, { "epoch": 1.4047416583112284, "grad_norm": 0.027042451768303048, "learning_rate": 0.0002464019882060553, "loss": 0.4554, "step": 27670 }, { "epoch": 1.40499549440926, "grad_norm": 0.02246177180890777, "learning_rate": 0.0002462110952181982, "loss": 0.4776, "step": 27675 }, { "epoch": 1.4052493305072915, "grad_norm": 0.022416923064172367, "learning_rate": 0.0002460202520478378, "loss": 0.4547, "step": 27680 }, { "epoch": 1.4055031666053228, "grad_norm": 0.02287798343042281, "learning_rate": 0.0002458294587324351, "loss": 0.4598, "step": 27685 }, { "epoch": 1.4057570027033544, "grad_norm": 0.02035412219965513, "learning_rate": 0.0002456387153094421, "loss": 0.4872, "step": 27690 }, { "epoch": 1.4060108388013859, "grad_norm": 0.029570301391272168, "learning_rate": 0.000245448021816301, "loss": 0.4461, "step": 27695 }, { "epoch": 1.4062646748994174, "grad_norm": 0.024728980525368392, "learning_rate": 0.00024525737829044354, "loss": 0.4797, "step": 27700 }, { "epoch": 1.406518510997449, "grad_norm": 0.022333792784804983, "learning_rate": 0.0002450667847692925, "loss": 0.5004, "step": 27705 }, { "epoch": 1.4067723470954805, "grad_norm": 0.02145010095522035, "learning_rate": 0.00024487624129026017, "loss": 0.4473, "step": 27710 }, { "epoch": 1.407026183193512, "grad_norm": 0.02758397602988774, "learning_rate": 0.00024468574789074946, "loss": 0.4705, "step": 27715 }, { "epoch": 1.4072800192915436, "grad_norm": 0.033109491790319426, "learning_rate": 0.000244495304608153, "loss": 0.4633, "step": 27720 }, { "epoch": 1.4075338553895749, "grad_norm": 0.029190273291780672, "learning_rate": 0.0002443049114798543, "loss": 0.4585, "step": 27725 }, { "epoch": 1.4077876914876064, "grad_norm": 0.03253325936543185, "learning_rate": 0.00024411456854322612, "loss": 0.4757, "step": 27730 }, { "epoch": 1.408041527585638, "grad_norm": 0.020506343909216858, "learning_rate": 0.0002439242758356322, "loss": 0.4797, "step": 27735 }, { "epoch": 1.4082953636836695, "grad_norm": 0.026935301152816532, "learning_rate": 0.0002437340333944257, "loss": 0.4494, "step": 27740 }, { "epoch": 1.408549199781701, "grad_norm": 0.029552183804907328, "learning_rate": 0.00024354384125695045, "loss": 0.4962, "step": 27745 }, { "epoch": 1.4088030358797323, "grad_norm": 0.027966829528624507, "learning_rate": 0.00024335369946054027, "loss": 0.4627, "step": 27750 }, { "epoch": 1.4090568719777639, "grad_norm": 0.020488294841671674, "learning_rate": 0.00024316360804251907, "loss": 0.4783, "step": 27755 }, { "epoch": 1.4093107080757954, "grad_norm": 0.02500272619569665, "learning_rate": 0.0002429735670402007, "loss": 0.4523, "step": 27760 }, { "epoch": 1.409564544173827, "grad_norm": 0.03524132852415086, "learning_rate": 0.00024278357649088945, "loss": 0.4832, "step": 27765 }, { "epoch": 1.4098183802718585, "grad_norm": 0.029454084316621682, "learning_rate": 0.00024259363643187922, "loss": 0.4866, "step": 27770 }, { "epoch": 1.41007221636989, "grad_norm": 0.0230214354412747, "learning_rate": 0.00024240374690045468, "loss": 0.4832, "step": 27775 }, { "epoch": 1.4103260524679215, "grad_norm": 0.022547181126946717, "learning_rate": 0.00024221390793388977, "loss": 0.4898, "step": 27780 }, { "epoch": 1.410579888565953, "grad_norm": 0.023719925710689885, "learning_rate": 0.00024202411956944937, "loss": 0.4859, "step": 27785 }, { "epoch": 1.4108337246639846, "grad_norm": 0.026253153450593418, "learning_rate": 0.00024183438184438761, "loss": 0.4943, "step": 27790 }, { "epoch": 1.411087560762016, "grad_norm": 0.023581771288596627, "learning_rate": 0.00024164469479594935, "loss": 0.5134, "step": 27795 }, { "epoch": 1.4113413968600474, "grad_norm": 0.024469131177750587, "learning_rate": 0.00024145505846136895, "loss": 0.4991, "step": 27800 }, { "epoch": 1.411595232958079, "grad_norm": 0.03535425997247597, "learning_rate": 0.0002412654728778712, "loss": 0.4964, "step": 27805 }, { "epoch": 1.4118490690561105, "grad_norm": 0.024175285871346007, "learning_rate": 0.00024107593808267102, "loss": 0.471, "step": 27810 }, { "epoch": 1.412102905154142, "grad_norm": 0.023755148541426182, "learning_rate": 0.00024088645411297273, "loss": 0.4849, "step": 27815 }, { "epoch": 1.4123567412521734, "grad_norm": 0.02516434829571276, "learning_rate": 0.00024069702100597146, "loss": 0.467, "step": 27820 }, { "epoch": 1.412610577350205, "grad_norm": 0.023488420926309095, "learning_rate": 0.00024050763879885167, "loss": 0.482, "step": 27825 }, { "epoch": 1.4128644134482364, "grad_norm": 0.022254767215497136, "learning_rate": 0.00024031830752878854, "loss": 0.5116, "step": 27830 }, { "epoch": 1.413118249546268, "grad_norm": 0.02191194664116013, "learning_rate": 0.00024012902723294632, "loss": 0.4826, "step": 27835 }, { "epoch": 1.4133720856442995, "grad_norm": 0.03718887288280671, "learning_rate": 0.00023993979794848037, "loss": 0.4901, "step": 27840 }, { "epoch": 1.413625921742331, "grad_norm": 0.028432289745428645, "learning_rate": 0.00023975061971253492, "loss": 0.4806, "step": 27845 }, { "epoch": 1.4138797578403626, "grad_norm": 0.023502679374965474, "learning_rate": 0.00023956149256224512, "loss": 0.4374, "step": 27850 }, { "epoch": 1.414133593938394, "grad_norm": 0.02184921161134165, "learning_rate": 0.0002393724165347354, "loss": 0.4712, "step": 27855 }, { "epoch": 1.4143874300364254, "grad_norm": 0.02284152025331955, "learning_rate": 0.0002391833916671207, "loss": 0.5026, "step": 27860 }, { "epoch": 1.414641266134457, "grad_norm": 0.02065393654716191, "learning_rate": 0.0002389944179965052, "loss": 0.4614, "step": 27865 }, { "epoch": 1.4148951022324885, "grad_norm": 0.021188395785789092, "learning_rate": 0.00023880549555998416, "loss": 0.4681, "step": 27870 }, { "epoch": 1.41514893833052, "grad_norm": 0.02498643430523538, "learning_rate": 0.00023861662439464155, "loss": 0.5116, "step": 27875 }, { "epoch": 1.4154027744285516, "grad_norm": 0.03642276120823365, "learning_rate": 0.00023842780453755231, "loss": 0.479, "step": 27880 }, { "epoch": 1.4156566105265829, "grad_norm": 0.021274402113326943, "learning_rate": 0.00023823903602578035, "loss": 0.4518, "step": 27885 }, { "epoch": 1.4159104466246144, "grad_norm": 0.02378943254532682, "learning_rate": 0.0002380503188963804, "loss": 0.5052, "step": 27890 }, { "epoch": 1.416164282722646, "grad_norm": 0.026776271663452332, "learning_rate": 0.00023786165318639635, "loss": 0.4953, "step": 27895 }, { "epoch": 1.4164181188206775, "grad_norm": 0.022203192350251453, "learning_rate": 0.00023767303893286262, "loss": 0.4893, "step": 27900 }, { "epoch": 1.416671954918709, "grad_norm": 0.0235156458738656, "learning_rate": 0.00023748447617280322, "loss": 0.4461, "step": 27905 }, { "epoch": 1.4169257910167405, "grad_norm": 0.021958486852655536, "learning_rate": 0.00023729596494323173, "loss": 0.4653, "step": 27910 }, { "epoch": 1.417179627114772, "grad_norm": 0.0289039207384805, "learning_rate": 0.00023710750528115244, "loss": 0.4816, "step": 27915 }, { "epoch": 1.4174334632128036, "grad_norm": 0.024505561379556843, "learning_rate": 0.00023691909722355864, "loss": 0.475, "step": 27920 }, { "epoch": 1.417687299310835, "grad_norm": 0.021676083599215912, "learning_rate": 0.00023673074080743405, "loss": 0.4999, "step": 27925 }, { "epoch": 1.4179411354088665, "grad_norm": 0.027958626700585656, "learning_rate": 0.00023654243606975213, "loss": 0.512, "step": 27930 }, { "epoch": 1.418194971506898, "grad_norm": 0.024897726465202892, "learning_rate": 0.0002363541830474763, "loss": 0.5286, "step": 27935 }, { "epoch": 1.4184488076049295, "grad_norm": 0.02113190762244004, "learning_rate": 0.00023616598177755938, "loss": 0.4781, "step": 27940 }, { "epoch": 1.418702643702961, "grad_norm": 0.020658914881807294, "learning_rate": 0.0002359778322969447, "loss": 0.476, "step": 27945 }, { "epoch": 1.4189564798009924, "grad_norm": 0.02016987097475399, "learning_rate": 0.00023578973464256464, "loss": 0.5082, "step": 27950 }, { "epoch": 1.419210315899024, "grad_norm": 0.025221457362809504, "learning_rate": 0.0002356016888513423, "loss": 0.4956, "step": 27955 }, { "epoch": 1.4194641519970554, "grad_norm": 0.027710307385371332, "learning_rate": 0.00023541369496018967, "loss": 0.4782, "step": 27960 }, { "epoch": 1.419717988095087, "grad_norm": 0.019837189877263843, "learning_rate": 0.0002352257530060094, "loss": 0.4673, "step": 27965 }, { "epoch": 1.4199718241931185, "grad_norm": 0.022705298855318596, "learning_rate": 0.00023503786302569318, "loss": 0.4746, "step": 27970 }, { "epoch": 1.42022566029115, "grad_norm": 0.030143315594230304, "learning_rate": 0.0002348500250561233, "loss": 0.4531, "step": 27975 }, { "epoch": 1.4204794963891816, "grad_norm": 0.02996052140446814, "learning_rate": 0.00023466223913417105, "loss": 0.4657, "step": 27980 }, { "epoch": 1.4207333324872131, "grad_norm": 0.02310648055769472, "learning_rate": 0.00023447450529669796, "loss": 0.4884, "step": 27985 }, { "epoch": 1.4209871685852444, "grad_norm": 0.029314484029678897, "learning_rate": 0.00023428682358055553, "loss": 0.4989, "step": 27990 }, { "epoch": 1.421241004683276, "grad_norm": 0.0252418080632575, "learning_rate": 0.00023409919402258433, "loss": 0.4721, "step": 27995 }, { "epoch": 1.4214948407813075, "grad_norm": 0.023141843733552995, "learning_rate": 0.00023391161665961546, "loss": 0.4946, "step": 28000 }, { "epoch": 1.421748676879339, "grad_norm": 0.02792804961488345, "learning_rate": 0.00023372409152846912, "loss": 0.4681, "step": 28005 }, { "epoch": 1.4220025129773706, "grad_norm": 0.029553149663017995, "learning_rate": 0.00023353661866595582, "loss": 0.5152, "step": 28010 }, { "epoch": 1.4222563490754019, "grad_norm": 0.020892682323818024, "learning_rate": 0.00023334919810887527, "loss": 0.4211, "step": 28015 }, { "epoch": 1.4225101851734334, "grad_norm": 0.02707479709867003, "learning_rate": 0.0002331618298940176, "loss": 0.4819, "step": 28020 }, { "epoch": 1.422764021271465, "grad_norm": 0.02176754481264695, "learning_rate": 0.00023297451405816173, "loss": 0.4723, "step": 28025 }, { "epoch": 1.4230178573694965, "grad_norm": 0.02889767627713131, "learning_rate": 0.00023278725063807733, "loss": 0.4705, "step": 28030 }, { "epoch": 1.423271693467528, "grad_norm": 0.020395619422563103, "learning_rate": 0.0002326000396705228, "loss": 0.4619, "step": 28035 }, { "epoch": 1.4235255295655596, "grad_norm": 0.02300164782732942, "learning_rate": 0.0002324128811922472, "loss": 0.4976, "step": 28040 }, { "epoch": 1.423779365663591, "grad_norm": 0.026291959890614048, "learning_rate": 0.00023222577523998816, "loss": 0.473, "step": 28045 }, { "epoch": 1.4240332017616226, "grad_norm": 0.020485251522475535, "learning_rate": 0.00023203872185047442, "loss": 0.4657, "step": 28050 }, { "epoch": 1.4242870378596542, "grad_norm": 0.025828189579312467, "learning_rate": 0.00023185172106042308, "loss": 0.4625, "step": 28055 }, { "epoch": 1.4245408739576855, "grad_norm": 0.02218487881694467, "learning_rate": 0.00023166477290654185, "loss": 0.4819, "step": 28060 }, { "epoch": 1.424794710055717, "grad_norm": 0.0286179264415249, "learning_rate": 0.00023147787742552734, "loss": 0.4737, "step": 28065 }, { "epoch": 1.4250485461537485, "grad_norm": 0.0228564657619892, "learning_rate": 0.00023129103465406654, "loss": 0.4672, "step": 28070 }, { "epoch": 1.42530238225178, "grad_norm": 0.029588254076403645, "learning_rate": 0.00023110424462883538, "loss": 0.49, "step": 28075 }, { "epoch": 1.4255562183498114, "grad_norm": 0.0214582359967556, "learning_rate": 0.00023091750738650024, "loss": 0.4618, "step": 28080 }, { "epoch": 1.425810054447843, "grad_norm": 0.032133974034998646, "learning_rate": 0.00023073082296371628, "loss": 0.4364, "step": 28085 }, { "epoch": 1.4260638905458745, "grad_norm": 0.027651903639121147, "learning_rate": 0.0002305441913971291, "loss": 0.4692, "step": 28090 }, { "epoch": 1.426317726643906, "grad_norm": 0.019039201847567744, "learning_rate": 0.0002303576127233732, "loss": 0.4859, "step": 28095 }, { "epoch": 1.4265715627419375, "grad_norm": 0.02386583789317677, "learning_rate": 0.0002301710869790734, "loss": 0.4875, "step": 28100 }, { "epoch": 1.426825398839969, "grad_norm": 0.025025245204414376, "learning_rate": 0.00022998461420084342, "loss": 0.5166, "step": 28105 }, { "epoch": 1.4270792349380006, "grad_norm": 0.023420261634063733, "learning_rate": 0.00022979819442528715, "loss": 0.4759, "step": 28110 }, { "epoch": 1.4273330710360321, "grad_norm": 0.02333034761205301, "learning_rate": 0.00022961182768899797, "loss": 0.48, "step": 28115 }, { "epoch": 1.4275869071340637, "grad_norm": 0.02627854902400825, "learning_rate": 0.00022942551402855839, "loss": 0.4807, "step": 28120 }, { "epoch": 1.427840743232095, "grad_norm": 0.021412030460517, "learning_rate": 0.0002292392534805412, "loss": 0.5039, "step": 28125 }, { "epoch": 1.4280945793301265, "grad_norm": 0.022170572716743882, "learning_rate": 0.0002290530460815082, "loss": 0.4968, "step": 28130 }, { "epoch": 1.428348415428158, "grad_norm": 0.025786423164189262, "learning_rate": 0.00022886689186801113, "loss": 0.4907, "step": 28135 }, { "epoch": 1.4286022515261896, "grad_norm": 0.021201177325464542, "learning_rate": 0.00022868079087659087, "loss": 0.4856, "step": 28140 }, { "epoch": 1.4288560876242211, "grad_norm": 0.02309415988605629, "learning_rate": 0.0002284947431437785, "loss": 0.4825, "step": 28145 }, { "epoch": 1.4291099237222524, "grad_norm": 0.024799580659098996, "learning_rate": 0.00022830874870609385, "loss": 0.4728, "step": 28150 }, { "epoch": 1.429363759820284, "grad_norm": 0.02544415478890754, "learning_rate": 0.00022812280760004718, "loss": 0.4449, "step": 28155 }, { "epoch": 1.4296175959183155, "grad_norm": 0.022858736494680027, "learning_rate": 0.00022793691986213726, "loss": 0.4719, "step": 28160 }, { "epoch": 1.429871432016347, "grad_norm": 0.02868557828847498, "learning_rate": 0.00022775108552885336, "loss": 0.4756, "step": 28165 }, { "epoch": 1.4301252681143786, "grad_norm": 0.022645438963720982, "learning_rate": 0.00022756530463667336, "loss": 0.4726, "step": 28170 }, { "epoch": 1.43037910421241, "grad_norm": 0.02751085893232087, "learning_rate": 0.00022737957722206576, "loss": 0.4698, "step": 28175 }, { "epoch": 1.4306329403104416, "grad_norm": 0.02151280270290485, "learning_rate": 0.00022719390332148743, "loss": 0.4608, "step": 28180 }, { "epoch": 1.4308867764084732, "grad_norm": 0.02606728512274907, "learning_rate": 0.0002270082829713856, "loss": 0.4721, "step": 28185 }, { "epoch": 1.4311406125065045, "grad_norm": 0.023997247097793137, "learning_rate": 0.00022682271620819622, "loss": 0.4877, "step": 28190 }, { "epoch": 1.431394448604536, "grad_norm": 0.022872074241439312, "learning_rate": 0.00022663720306834544, "loss": 0.4929, "step": 28195 }, { "epoch": 1.4316482847025676, "grad_norm": 0.029600629396223117, "learning_rate": 0.00022645174358824834, "loss": 0.4875, "step": 28200 }, { "epoch": 1.431902120800599, "grad_norm": 0.024623593274030287, "learning_rate": 0.00022626633780430995, "loss": 0.4856, "step": 28205 }, { "epoch": 1.4321559568986306, "grad_norm": 0.02466052196985095, "learning_rate": 0.00022608098575292412, "loss": 0.4738, "step": 28210 }, { "epoch": 1.432409792996662, "grad_norm": 0.02014319185333175, "learning_rate": 0.00022589568747047496, "loss": 0.4535, "step": 28215 }, { "epoch": 1.4326636290946935, "grad_norm": 0.07392864104661222, "learning_rate": 0.00022571044299333522, "loss": 0.5205, "step": 28220 }, { "epoch": 1.432917465192725, "grad_norm": 0.02808956874894487, "learning_rate": 0.0002255252523578678, "loss": 0.4641, "step": 28225 }, { "epoch": 1.4331713012907565, "grad_norm": 0.023026002461065096, "learning_rate": 0.0002253401156004244, "loss": 0.4719, "step": 28230 }, { "epoch": 1.433425137388788, "grad_norm": 0.025239834221944403, "learning_rate": 0.00022515503275734655, "loss": 0.4812, "step": 28235 }, { "epoch": 1.4336789734868196, "grad_norm": 0.023740320525211498, "learning_rate": 0.0002249700038649653, "loss": 0.4871, "step": 28240 }, { "epoch": 1.4339328095848511, "grad_norm": 0.022974485649644116, "learning_rate": 0.00022478502895960056, "loss": 0.5321, "step": 28245 }, { "epoch": 1.4341866456828827, "grad_norm": 0.038073746528805776, "learning_rate": 0.00022460010807756232, "loss": 0.4879, "step": 28250 }, { "epoch": 1.434440481780914, "grad_norm": 0.022813896482576042, "learning_rate": 0.00022441524125514924, "loss": 0.4661, "step": 28255 }, { "epoch": 1.4346943178789455, "grad_norm": 0.023168119706696216, "learning_rate": 0.0002242304285286501, "loss": 0.4958, "step": 28260 }, { "epoch": 1.434948153976977, "grad_norm": 0.02529122063862638, "learning_rate": 0.0002240456699343425, "loss": 0.477, "step": 28265 }, { "epoch": 1.4352019900750086, "grad_norm": 0.022195490060621263, "learning_rate": 0.00022386096550849384, "loss": 0.4525, "step": 28270 }, { "epoch": 1.4354558261730401, "grad_norm": 0.022086437502397378, "learning_rate": 0.00022367631528736037, "loss": 0.4679, "step": 28275 }, { "epoch": 1.4357096622710714, "grad_norm": 0.02864867259943996, "learning_rate": 0.00022349171930718836, "loss": 0.4855, "step": 28280 }, { "epoch": 1.435963498369103, "grad_norm": 0.0224610763647068, "learning_rate": 0.0002233071776042127, "loss": 0.4817, "step": 28285 }, { "epoch": 1.4362173344671345, "grad_norm": 0.02349424018618673, "learning_rate": 0.00022312269021465826, "loss": 0.4559, "step": 28290 }, { "epoch": 1.436471170565166, "grad_norm": 0.02339458889206688, "learning_rate": 0.00022293825717473891, "loss": 0.4929, "step": 28295 }, { "epoch": 1.4367250066631976, "grad_norm": 0.031316045398563565, "learning_rate": 0.0002227538785206582, "loss": 0.5114, "step": 28300 }, { "epoch": 1.4369788427612291, "grad_norm": 0.029663272075113593, "learning_rate": 0.0002225695542886083, "loss": 0.4813, "step": 28305 }, { "epoch": 1.4372326788592606, "grad_norm": 0.01994990696285228, "learning_rate": 0.00022238528451477152, "loss": 0.4663, "step": 28310 }, { "epoch": 1.4374865149572922, "grad_norm": 0.02117337091557514, "learning_rate": 0.0002222010692353188, "loss": 0.4757, "step": 28315 }, { "epoch": 1.4377403510553235, "grad_norm": 0.027257286084898684, "learning_rate": 0.00022201690848641092, "loss": 0.465, "step": 28320 }, { "epoch": 1.437994187153355, "grad_norm": 0.02068920674069367, "learning_rate": 0.00022183280230419746, "loss": 0.454, "step": 28325 }, { "epoch": 1.4382480232513866, "grad_norm": 0.026482096977822, "learning_rate": 0.00022164875072481788, "loss": 0.4992, "step": 28330 }, { "epoch": 1.438501859349418, "grad_norm": 0.025250420834823727, "learning_rate": 0.00022146475378440018, "loss": 0.4845, "step": 28335 }, { "epoch": 1.4387556954474496, "grad_norm": 0.023124139028515872, "learning_rate": 0.00022128081151906248, "loss": 0.4981, "step": 28340 }, { "epoch": 1.439009531545481, "grad_norm": 0.0250480716309079, "learning_rate": 0.00022109692396491128, "loss": 0.4932, "step": 28345 }, { "epoch": 1.4392633676435125, "grad_norm": 0.025172649290414614, "learning_rate": 0.00022091309115804305, "loss": 0.4734, "step": 28350 }, { "epoch": 1.439517203741544, "grad_norm": 0.023861858060389464, "learning_rate": 0.0002207293131345434, "loss": 0.4958, "step": 28355 }, { "epoch": 1.4397710398395756, "grad_norm": 0.023276629706840232, "learning_rate": 0.00022054558993048667, "loss": 0.4775, "step": 28360 }, { "epoch": 1.440024875937607, "grad_norm": 0.021929215795727805, "learning_rate": 0.00022036192158193717, "loss": 0.5146, "step": 28365 }, { "epoch": 1.4402787120356386, "grad_norm": 0.022629914839638336, "learning_rate": 0.00022017830812494778, "loss": 0.4748, "step": 28370 }, { "epoch": 1.4405325481336702, "grad_norm": 0.024040761607839467, "learning_rate": 0.0002199947495955612, "loss": 0.5049, "step": 28375 }, { "epoch": 1.4407863842317017, "grad_norm": 0.02291771186966908, "learning_rate": 0.00021981124602980868, "loss": 0.4945, "step": 28380 }, { "epoch": 1.4410402203297332, "grad_norm": 0.02136166148441168, "learning_rate": 0.00021962779746371148, "loss": 0.4428, "step": 28385 }, { "epoch": 1.4412940564277645, "grad_norm": 0.02612556885350365, "learning_rate": 0.0002194444039332792, "loss": 0.4812, "step": 28390 }, { "epoch": 1.441547892525796, "grad_norm": 0.03403548088384833, "learning_rate": 0.00021926106547451153, "loss": 0.5052, "step": 28395 }, { "epoch": 1.4418017286238276, "grad_norm": 0.02777524166007245, "learning_rate": 0.00021907778212339646, "loss": 0.4847, "step": 28400 }, { "epoch": 1.4420555647218591, "grad_norm": 0.02317959016446424, "learning_rate": 0.00021889455391591197, "loss": 0.4857, "step": 28405 }, { "epoch": 1.4423094008198907, "grad_norm": 0.03177895171595999, "learning_rate": 0.00021871138088802434, "loss": 0.4558, "step": 28410 }, { "epoch": 1.442563236917922, "grad_norm": 0.02126964198718204, "learning_rate": 0.00021852826307569017, "loss": 0.4506, "step": 28415 }, { "epoch": 1.4428170730159535, "grad_norm": 0.024740968782442203, "learning_rate": 0.00021834520051485412, "loss": 0.4973, "step": 28420 }, { "epoch": 1.443070909113985, "grad_norm": 0.027489399167317248, "learning_rate": 0.00021816219324145082, "loss": 0.4751, "step": 28425 }, { "epoch": 1.4433247452120166, "grad_norm": 0.025374524663172544, "learning_rate": 0.00021797924129140323, "loss": 0.4764, "step": 28430 }, { "epoch": 1.4435785813100481, "grad_norm": 0.02277464171649709, "learning_rate": 0.00021779634470062433, "loss": 0.4912, "step": 28435 }, { "epoch": 1.4438324174080797, "grad_norm": 0.021400550142160534, "learning_rate": 0.0002176135035050154, "loss": 0.4559, "step": 28440 }, { "epoch": 1.4440862535061112, "grad_norm": 0.03045404966662296, "learning_rate": 0.00021743071774046768, "loss": 0.5034, "step": 28445 }, { "epoch": 1.4443400896041427, "grad_norm": 0.02407917586260191, "learning_rate": 0.00021724798744286072, "loss": 0.4785, "step": 28450 }, { "epoch": 1.444593925702174, "grad_norm": 0.025205748983629185, "learning_rate": 0.00021706531264806394, "loss": 0.4874, "step": 28455 }, { "epoch": 1.4448477618002056, "grad_norm": 0.02348829524564938, "learning_rate": 0.00021688269339193513, "loss": 0.4709, "step": 28460 }, { "epoch": 1.4451015978982371, "grad_norm": 0.0232211398020149, "learning_rate": 0.00021670012971032184, "loss": 0.4774, "step": 28465 }, { "epoch": 1.4453554339962686, "grad_norm": 0.024930973074259177, "learning_rate": 0.00021651762163906008, "loss": 0.4685, "step": 28470 }, { "epoch": 1.4456092700943002, "grad_norm": 0.03092756571772325, "learning_rate": 0.0002163351692139755, "loss": 0.4734, "step": 28475 }, { "epoch": 1.4458631061923315, "grad_norm": 0.022477760693391274, "learning_rate": 0.00021615277247088278, "loss": 0.4869, "step": 28480 }, { "epoch": 1.446116942290363, "grad_norm": 0.03361966594571212, "learning_rate": 0.00021597043144558505, "loss": 0.4451, "step": 28485 }, { "epoch": 1.4463707783883946, "grad_norm": 0.030322892044589482, "learning_rate": 0.00021578814617387537, "loss": 0.4828, "step": 28490 }, { "epoch": 1.446624614486426, "grad_norm": 0.024900585317585298, "learning_rate": 0.00021560591669153505, "loss": 0.4799, "step": 28495 }, { "epoch": 1.4468784505844576, "grad_norm": 0.022462012440346523, "learning_rate": 0.00021542374303433522, "loss": 0.4528, "step": 28500 }, { "epoch": 1.4471322866824892, "grad_norm": 0.02385434946419265, "learning_rate": 0.00021524162523803525, "loss": 0.4762, "step": 28505 }, { "epoch": 1.4473861227805207, "grad_norm": 0.0237899246854406, "learning_rate": 0.00021505956333838432, "loss": 0.5128, "step": 28510 }, { "epoch": 1.4476399588785522, "grad_norm": 0.022093078514918426, "learning_rate": 0.00021487755737111997, "loss": 0.5007, "step": 28515 }, { "epoch": 1.4478937949765835, "grad_norm": 0.03720387345715551, "learning_rate": 0.00021469560737196936, "loss": 0.4737, "step": 28520 }, { "epoch": 1.448147631074615, "grad_norm": 0.02307666838695736, "learning_rate": 0.00021451371337664803, "loss": 0.4847, "step": 28525 }, { "epoch": 1.4484014671726466, "grad_norm": 0.027202707719082012, "learning_rate": 0.00021433187542086102, "loss": 0.5096, "step": 28530 }, { "epoch": 1.4486553032706782, "grad_norm": 0.02281167822321718, "learning_rate": 0.0002141500935403023, "loss": 0.4936, "step": 28535 }, { "epoch": 1.4489091393687097, "grad_norm": 0.022981780525392213, "learning_rate": 0.0002139683677706548, "loss": 0.4721, "step": 28540 }, { "epoch": 1.449162975466741, "grad_norm": 0.023971573526569936, "learning_rate": 0.00021378669814759016, "loss": 0.4788, "step": 28545 }, { "epoch": 1.4494168115647725, "grad_norm": 0.0351687277665943, "learning_rate": 0.00021360508470676947, "loss": 0.4275, "step": 28550 }, { "epoch": 1.449670647662804, "grad_norm": 0.030238615571759643, "learning_rate": 0.00021342352748384224, "loss": 0.4425, "step": 28555 }, { "epoch": 1.4499244837608356, "grad_norm": 0.019177171938210256, "learning_rate": 0.00021324202651444758, "loss": 0.4624, "step": 28560 }, { "epoch": 1.4501783198588671, "grad_norm": 0.023734475246235107, "learning_rate": 0.00021306058183421289, "loss": 0.4531, "step": 28565 }, { "epoch": 1.4504321559568987, "grad_norm": 0.022972344810526544, "learning_rate": 0.00021287919347875517, "loss": 0.5048, "step": 28570 }, { "epoch": 1.4506859920549302, "grad_norm": 0.022124511197820666, "learning_rate": 0.00021269786148367975, "loss": 0.4901, "step": 28575 }, { "epoch": 1.4509398281529617, "grad_norm": 0.020604414986246704, "learning_rate": 0.00021251658588458151, "loss": 0.498, "step": 28580 }, { "epoch": 1.451193664250993, "grad_norm": 0.03208564684499312, "learning_rate": 0.00021233536671704363, "loss": 0.4814, "step": 28585 }, { "epoch": 1.4514475003490246, "grad_norm": 0.026818436879605508, "learning_rate": 0.00021215420401663864, "loss": 0.494, "step": 28590 }, { "epoch": 1.4517013364470561, "grad_norm": 0.02278810185462568, "learning_rate": 0.0002119730978189281, "loss": 0.4588, "step": 28595 }, { "epoch": 1.4519551725450877, "grad_norm": 0.03576004008501719, "learning_rate": 0.0002117920481594619, "loss": 0.4828, "step": 28600 }, { "epoch": 1.4522090086431192, "grad_norm": 0.02300363802637254, "learning_rate": 0.00021161105507377958, "loss": 0.4625, "step": 28605 }, { "epoch": 1.4524628447411505, "grad_norm": 0.021251968272071712, "learning_rate": 0.00021143011859740875, "loss": 0.4577, "step": 28610 }, { "epoch": 1.452716680839182, "grad_norm": 0.02134933721672599, "learning_rate": 0.00021124923876586672, "loss": 0.4804, "step": 28615 }, { "epoch": 1.4529705169372136, "grad_norm": 0.022574435169212356, "learning_rate": 0.0002110684156146589, "loss": 0.4659, "step": 28620 }, { "epoch": 1.453224353035245, "grad_norm": 0.021844543840398207, "learning_rate": 0.00021088764917928044, "loss": 0.4765, "step": 28625 }, { "epoch": 1.4534781891332766, "grad_norm": 0.022297227696848144, "learning_rate": 0.0002107069394952144, "loss": 0.4838, "step": 28630 }, { "epoch": 1.4537320252313082, "grad_norm": 0.025232582647418608, "learning_rate": 0.00021052628659793367, "loss": 0.4793, "step": 28635 }, { "epoch": 1.4539858613293397, "grad_norm": 0.02270811870174028, "learning_rate": 0.00021034569052289908, "loss": 0.4736, "step": 28640 }, { "epoch": 1.4542396974273712, "grad_norm": 0.03638109948966619, "learning_rate": 0.00021016515130556113, "loss": 0.4767, "step": 28645 }, { "epoch": 1.4544935335254028, "grad_norm": 0.020506178116723225, "learning_rate": 0.0002099846689813582, "loss": 0.4831, "step": 28650 }, { "epoch": 1.454747369623434, "grad_norm": 0.02980987722615396, "learning_rate": 0.0002098042435857188, "loss": 0.4477, "step": 28655 }, { "epoch": 1.4550012057214656, "grad_norm": 0.02633014121866591, "learning_rate": 0.000209623875154059, "loss": 0.4758, "step": 28660 }, { "epoch": 1.4552550418194972, "grad_norm": 0.024442989213594755, "learning_rate": 0.00020944356372178458, "loss": 0.4776, "step": 28665 }, { "epoch": 1.4555088779175287, "grad_norm": 0.022043581830940656, "learning_rate": 0.00020926330932428944, "loss": 0.4482, "step": 28670 }, { "epoch": 1.4557627140155602, "grad_norm": 0.023858740881236372, "learning_rate": 0.00020908311199695695, "loss": 0.4835, "step": 28675 }, { "epoch": 1.4560165501135915, "grad_norm": 0.027411770365613423, "learning_rate": 0.0002089029717751586, "loss": 0.4671, "step": 28680 }, { "epoch": 1.456270386211623, "grad_norm": 0.023697876508040217, "learning_rate": 0.00020872288869425536, "loss": 0.4911, "step": 28685 }, { "epoch": 1.4565242223096546, "grad_norm": 0.026627596602070615, "learning_rate": 0.0002085428627895963, "loss": 0.4721, "step": 28690 }, { "epoch": 1.4567780584076861, "grad_norm": 0.021374243564867154, "learning_rate": 0.00020836289409651993, "loss": 0.4851, "step": 28695 }, { "epoch": 1.4570318945057177, "grad_norm": 0.026025357905320266, "learning_rate": 0.0002081829826503529, "loss": 0.4711, "step": 28700 }, { "epoch": 1.4572857306037492, "grad_norm": 0.022910628027651588, "learning_rate": 0.0002080031284864113, "loss": 0.467, "step": 28705 }, { "epoch": 1.4575395667017808, "grad_norm": 0.020149333711585025, "learning_rate": 0.00020782333163999917, "loss": 0.4638, "step": 28710 }, { "epoch": 1.4577934027998123, "grad_norm": 0.022348782139786152, "learning_rate": 0.00020764359214640998, "loss": 0.4672, "step": 28715 }, { "epoch": 1.4580472388978436, "grad_norm": 0.0314334704344233, "learning_rate": 0.0002074639100409258, "loss": 0.4774, "step": 28720 }, { "epoch": 1.4583010749958751, "grad_norm": 0.022750058450229413, "learning_rate": 0.0002072842853588171, "loss": 0.4761, "step": 28725 }, { "epoch": 1.4585549110939067, "grad_norm": 0.026687684346631817, "learning_rate": 0.00020710471813534354, "loss": 0.4796, "step": 28730 }, { "epoch": 1.4588087471919382, "grad_norm": 0.025046655726691035, "learning_rate": 0.00020692520840575297, "loss": 0.48, "step": 28735 }, { "epoch": 1.4590625832899697, "grad_norm": 0.024616034157493274, "learning_rate": 0.00020674575620528262, "loss": 0.4789, "step": 28740 }, { "epoch": 1.459316419388001, "grad_norm": 0.022143594499575536, "learning_rate": 0.0002065663615691577, "loss": 0.5053, "step": 28745 }, { "epoch": 1.4595702554860326, "grad_norm": 0.021547465775443694, "learning_rate": 0.00020638702453259285, "loss": 0.4676, "step": 28750 }, { "epoch": 1.4598240915840641, "grad_norm": 0.02362010469124488, "learning_rate": 0.0002062077451307906, "loss": 0.4553, "step": 28755 }, { "epoch": 1.4600779276820957, "grad_norm": 0.021056652145822894, "learning_rate": 0.00020602852339894306, "loss": 0.4686, "step": 28760 }, { "epoch": 1.4603317637801272, "grad_norm": 0.02196518330009373, "learning_rate": 0.00020584935937223016, "loss": 0.4884, "step": 28765 }, { "epoch": 1.4605855998781587, "grad_norm": 0.021922777074083848, "learning_rate": 0.0002056702530858211, "loss": 0.4752, "step": 28770 }, { "epoch": 1.4608394359761903, "grad_norm": 0.02529934917068532, "learning_rate": 0.00020549120457487354, "loss": 0.4553, "step": 28775 }, { "epoch": 1.4610932720742218, "grad_norm": 0.02498486840017278, "learning_rate": 0.00020531221387453392, "loss": 0.4434, "step": 28780 }, { "epoch": 1.461347108172253, "grad_norm": 0.021722508591940946, "learning_rate": 0.000205133281019937, "loss": 0.4787, "step": 28785 }, { "epoch": 1.4616009442702846, "grad_norm": 0.023233582767727436, "learning_rate": 0.0002049544060462067, "loss": 0.4614, "step": 28790 }, { "epoch": 1.4618547803683162, "grad_norm": 0.02278293695138392, "learning_rate": 0.00020477558898845488, "loss": 0.5037, "step": 28795 }, { "epoch": 1.4621086164663477, "grad_norm": 0.023402783149172365, "learning_rate": 0.00020459682988178285, "loss": 0.4573, "step": 28800 }, { "epoch": 1.4623624525643792, "grad_norm": 0.031169336608011022, "learning_rate": 0.0002044181287612798, "loss": 0.4791, "step": 28805 }, { "epoch": 1.4626162886624106, "grad_norm": 0.028362314190744413, "learning_rate": 0.00020423948566202415, "loss": 0.495, "step": 28810 }, { "epoch": 1.462870124760442, "grad_norm": 0.02179210935015138, "learning_rate": 0.00020406090061908234, "loss": 0.4638, "step": 28815 }, { "epoch": 1.4631239608584736, "grad_norm": 0.024277811689262054, "learning_rate": 0.00020388237366751006, "loss": 0.4549, "step": 28820 }, { "epoch": 1.4633777969565052, "grad_norm": 0.020361262646131383, "learning_rate": 0.00020370390484235096, "loss": 0.479, "step": 28825 }, { "epoch": 1.4636316330545367, "grad_norm": 0.02705830030106512, "learning_rate": 0.00020352549417863768, "loss": 0.4854, "step": 28830 }, { "epoch": 1.4638854691525682, "grad_norm": 0.03431989841271053, "learning_rate": 0.00020334714171139158, "loss": 0.4722, "step": 28835 }, { "epoch": 1.4641393052505998, "grad_norm": 0.02690359771060965, "learning_rate": 0.00020316884747562192, "loss": 0.4755, "step": 28840 }, { "epoch": 1.4643931413486313, "grad_norm": 0.02164943326089008, "learning_rate": 0.0002029906115063274, "loss": 0.468, "step": 28845 }, { "epoch": 1.4646469774466626, "grad_norm": 0.024397789571063064, "learning_rate": 0.0002028124338384945, "loss": 0.4625, "step": 28850 }, { "epoch": 1.4649008135446941, "grad_norm": 0.022519450507155134, "learning_rate": 0.00020263431450709895, "loss": 0.4975, "step": 28855 }, { "epoch": 1.4651546496427257, "grad_norm": 0.0217210003632832, "learning_rate": 0.00020245625354710435, "loss": 0.4629, "step": 28860 }, { "epoch": 1.4654084857407572, "grad_norm": 0.025787902107206176, "learning_rate": 0.00020227825099346347, "loss": 0.4741, "step": 28865 }, { "epoch": 1.4656623218387888, "grad_norm": 0.022185672758679997, "learning_rate": 0.00020210030688111701, "loss": 0.4508, "step": 28870 }, { "epoch": 1.46591615793682, "grad_norm": 0.023178483612238884, "learning_rate": 0.00020192242124499488, "loss": 0.4874, "step": 28875 }, { "epoch": 1.4661699940348516, "grad_norm": 0.02582945990064946, "learning_rate": 0.00020174459412001473, "loss": 0.4582, "step": 28880 }, { "epoch": 1.4664238301328831, "grad_norm": 0.023143750413606654, "learning_rate": 0.00020156682554108357, "loss": 0.4656, "step": 28885 }, { "epoch": 1.4666776662309147, "grad_norm": 0.02269378720031511, "learning_rate": 0.0002013891155430959, "loss": 0.4668, "step": 28890 }, { "epoch": 1.4669315023289462, "grad_norm": 0.03448609380058457, "learning_rate": 0.00020121146416093605, "loss": 0.4811, "step": 28895 }, { "epoch": 1.4671853384269777, "grad_norm": 0.021699709617265108, "learning_rate": 0.00020103387142947555, "loss": 0.5225, "step": 28900 }, { "epoch": 1.4674391745250093, "grad_norm": 0.026406847147590853, "learning_rate": 0.00020085633738357533, "loss": 0.4825, "step": 28905 }, { "epoch": 1.4676930106230408, "grad_norm": 0.033192215599298, "learning_rate": 0.00020067886205808405, "loss": 0.4979, "step": 28910 }, { "epoch": 1.4679468467210723, "grad_norm": 0.02394413280206084, "learning_rate": 0.0002005014454878396, "loss": 0.459, "step": 28915 }, { "epoch": 1.4682006828191037, "grad_norm": 0.0245933526818191, "learning_rate": 0.0002003240877076677, "loss": 0.4576, "step": 28920 }, { "epoch": 1.4684545189171352, "grad_norm": 0.02516217005778102, "learning_rate": 0.00020014678875238302, "loss": 0.5001, "step": 28925 }, { "epoch": 1.4687083550151667, "grad_norm": 0.02361500803054123, "learning_rate": 0.00019996954865678817, "loss": 0.4896, "step": 28930 }, { "epoch": 1.4689621911131983, "grad_norm": 0.024603894061880013, "learning_rate": 0.00019979236745567487, "loss": 0.5019, "step": 28935 }, { "epoch": 1.4692160272112298, "grad_norm": 0.020585521169676338, "learning_rate": 0.00019961524518382267, "loss": 0.456, "step": 28940 }, { "epoch": 1.469469863309261, "grad_norm": 0.02522878829527217, "learning_rate": 0.00019943818187599966, "loss": 0.4565, "step": 28945 }, { "epoch": 1.4697236994072926, "grad_norm": 0.02118075848457831, "learning_rate": 0.00019926117756696265, "loss": 0.4868, "step": 28950 }, { "epoch": 1.4699775355053242, "grad_norm": 0.022435152582389803, "learning_rate": 0.00019908423229145672, "loss": 0.4959, "step": 28955 }, { "epoch": 1.4702313716033557, "grad_norm": 0.02197759398001768, "learning_rate": 0.00019890734608421552, "loss": 0.4627, "step": 28960 }, { "epoch": 1.4704852077013872, "grad_norm": 0.04773874949155717, "learning_rate": 0.00019873051897996053, "loss": 0.4562, "step": 28965 }, { "epoch": 1.4707390437994188, "grad_norm": 0.03353710032706133, "learning_rate": 0.0001985537510134024, "loss": 0.4665, "step": 28970 }, { "epoch": 1.4709928798974503, "grad_norm": 0.026180926097628836, "learning_rate": 0.00019837704221923946, "loss": 0.4479, "step": 28975 }, { "epoch": 1.4712467159954818, "grad_norm": 0.03126605626141384, "learning_rate": 0.00019820039263215917, "loss": 0.4602, "step": 28980 }, { "epoch": 1.4715005520935132, "grad_norm": 0.023705023219665274, "learning_rate": 0.00019802380228683646, "loss": 0.4881, "step": 28985 }, { "epoch": 1.4717543881915447, "grad_norm": 0.023534377244730398, "learning_rate": 0.00019784727121793566, "loss": 0.4828, "step": 28990 }, { "epoch": 1.4720082242895762, "grad_norm": 0.021276660406363043, "learning_rate": 0.00019767079946010852, "loss": 0.4731, "step": 28995 }, { "epoch": 1.4722620603876078, "grad_norm": 0.0275960081210604, "learning_rate": 0.00019749438704799588, "loss": 0.4718, "step": 29000 }, { "epoch": 1.4725158964856393, "grad_norm": 0.021170380103597952, "learning_rate": 0.0001973180340162263, "loss": 0.4525, "step": 29005 }, { "epoch": 1.4727697325836706, "grad_norm": 0.024756352788350977, "learning_rate": 0.00019714174039941736, "loss": 0.5201, "step": 29010 }, { "epoch": 1.4730235686817021, "grad_norm": 0.02683451441512577, "learning_rate": 0.00019696550623217403, "loss": 0.4786, "step": 29015 }, { "epoch": 1.4732774047797337, "grad_norm": 0.02575373794125539, "learning_rate": 0.00019678933154909095, "loss": 0.4784, "step": 29020 }, { "epoch": 1.4735312408777652, "grad_norm": 0.020364829491525765, "learning_rate": 0.00019661321638475004, "loss": 0.4341, "step": 29025 }, { "epoch": 1.4737850769757967, "grad_norm": 0.021298525215463972, "learning_rate": 0.00019643716077372153, "loss": 0.4904, "step": 29030 }, { "epoch": 1.4740389130738283, "grad_norm": 0.021611695522008065, "learning_rate": 0.0001962611647505647, "loss": 0.4968, "step": 29035 }, { "epoch": 1.4742927491718598, "grad_norm": 0.021550734290455672, "learning_rate": 0.00019608522834982633, "loss": 0.4822, "step": 29040 }, { "epoch": 1.4745465852698914, "grad_norm": 0.021212859093263894, "learning_rate": 0.00019590935160604218, "loss": 0.4758, "step": 29045 }, { "epoch": 1.4748004213679227, "grad_norm": 0.02522241453937803, "learning_rate": 0.0001957335345537356, "loss": 0.4638, "step": 29050 }, { "epoch": 1.4750542574659542, "grad_norm": 0.02216798656322059, "learning_rate": 0.00019555777722741902, "loss": 0.4727, "step": 29055 }, { "epoch": 1.4753080935639857, "grad_norm": 0.024334896417849243, "learning_rate": 0.00019538207966159234, "loss": 0.4876, "step": 29060 }, { "epoch": 1.4755619296620173, "grad_norm": 0.020644818633358916, "learning_rate": 0.00019520644189074444, "loss": 0.4633, "step": 29065 }, { "epoch": 1.4758157657600488, "grad_norm": 0.02651778575033576, "learning_rate": 0.00019503086394935182, "loss": 0.4603, "step": 29070 }, { "epoch": 1.4760696018580801, "grad_norm": 0.03491940275430201, "learning_rate": 0.00019485534587187977, "loss": 0.4425, "step": 29075 }, { "epoch": 1.4763234379561117, "grad_norm": 0.023186579903332387, "learning_rate": 0.00019467988769278154, "loss": 0.4549, "step": 29080 }, { "epoch": 1.4765772740541432, "grad_norm": 0.025307596693011927, "learning_rate": 0.00019450448944649895, "loss": 0.4795, "step": 29085 }, { "epoch": 1.4768311101521747, "grad_norm": 0.021614617095183405, "learning_rate": 0.00019432915116746136, "loss": 0.4892, "step": 29090 }, { "epoch": 1.4770849462502063, "grad_norm": 0.02464047963920047, "learning_rate": 0.0001941538728900872, "loss": 0.4628, "step": 29095 }, { "epoch": 1.4773387823482378, "grad_norm": 0.029044426055459413, "learning_rate": 0.00019397865464878235, "loss": 0.4777, "step": 29100 }, { "epoch": 1.4775926184462693, "grad_norm": 0.03081511255172388, "learning_rate": 0.00019380349647794165, "loss": 0.4915, "step": 29105 }, { "epoch": 1.4778464545443009, "grad_norm": 0.02441428752542893, "learning_rate": 0.00019362839841194747, "loss": 0.4716, "step": 29110 }, { "epoch": 1.4781002906423322, "grad_norm": 0.022749416876083776, "learning_rate": 0.00019345336048517094, "loss": 0.4796, "step": 29115 }, { "epoch": 1.4783541267403637, "grad_norm": 0.023954531450951794, "learning_rate": 0.00019327838273197078, "loss": 0.4952, "step": 29120 }, { "epoch": 1.4786079628383952, "grad_norm": 0.02345877737054963, "learning_rate": 0.0001931034651866947, "loss": 0.477, "step": 29125 }, { "epoch": 1.4788617989364268, "grad_norm": 0.02651510050871646, "learning_rate": 0.00019292860788367773, "loss": 0.4987, "step": 29130 }, { "epoch": 1.4791156350344583, "grad_norm": 0.02263999113571259, "learning_rate": 0.00019275381085724364, "loss": 0.455, "step": 29135 }, { "epoch": 1.4793694711324896, "grad_norm": 0.020869618792912294, "learning_rate": 0.00019257907414170445, "loss": 0.4501, "step": 29140 }, { "epoch": 1.4796233072305212, "grad_norm": 0.02387312884738094, "learning_rate": 0.00019240439777135976, "loss": 0.4511, "step": 29145 }, { "epoch": 1.4798771433285527, "grad_norm": 0.05515921100199077, "learning_rate": 0.00019222978178049793, "loss": 0.4692, "step": 29150 }, { "epoch": 1.4801309794265842, "grad_norm": 0.022481925501552334, "learning_rate": 0.00019205522620339494, "loss": 0.4822, "step": 29155 }, { "epoch": 1.4803848155246158, "grad_norm": 0.029596722532962007, "learning_rate": 0.00019188073107431546, "loss": 0.4648, "step": 29160 }, { "epoch": 1.4806386516226473, "grad_norm": 0.022443147646602133, "learning_rate": 0.00019170629642751175, "loss": 0.4588, "step": 29165 }, { "epoch": 1.4808924877206788, "grad_norm": 0.019695512999081097, "learning_rate": 0.00019153192229722478, "loss": 0.47, "step": 29170 }, { "epoch": 1.4811463238187104, "grad_norm": 0.023069499823679177, "learning_rate": 0.00019135760871768294, "loss": 0.4826, "step": 29175 }, { "epoch": 1.4814001599167417, "grad_norm": 0.020513414900472458, "learning_rate": 0.00019118335572310347, "loss": 0.4897, "step": 29180 }, { "epoch": 1.4816539960147732, "grad_norm": 0.02458185146292343, "learning_rate": 0.00019100916334769107, "loss": 0.4503, "step": 29185 }, { "epoch": 1.4819078321128047, "grad_norm": 0.020161767636643235, "learning_rate": 0.00019083503162563908, "loss": 0.4888, "step": 29190 }, { "epoch": 1.4821616682108363, "grad_norm": 0.021529664850411454, "learning_rate": 0.0001906609605911283, "loss": 0.4678, "step": 29195 }, { "epoch": 1.4824155043088678, "grad_norm": 0.022419770519300376, "learning_rate": 0.00019048695027832862, "loss": 0.4569, "step": 29200 }, { "epoch": 1.4826693404068991, "grad_norm": 0.02111471529503639, "learning_rate": 0.00019031300072139685, "loss": 0.4865, "step": 29205 }, { "epoch": 1.4829231765049307, "grad_norm": 0.022270474614644514, "learning_rate": 0.00019013911195447887, "loss": 0.4909, "step": 29210 }, { "epoch": 1.4831770126029622, "grad_norm": 0.0246869876351486, "learning_rate": 0.0001899652840117077, "loss": 0.4963, "step": 29215 }, { "epoch": 1.4834308487009937, "grad_norm": 0.02576348095397173, "learning_rate": 0.0001897915169272053, "loss": 0.4913, "step": 29220 }, { "epoch": 1.4836846847990253, "grad_norm": 0.024597752477760727, "learning_rate": 0.000189617810735081, "loss": 0.5088, "step": 29225 }, { "epoch": 1.4839385208970568, "grad_norm": 0.02344812114499017, "learning_rate": 0.0001894441654694327, "loss": 0.4355, "step": 29230 }, { "epoch": 1.4841923569950883, "grad_norm": 0.021559453697928024, "learning_rate": 0.00018927058116434588, "loss": 0.4764, "step": 29235 }, { "epoch": 1.4844461930931199, "grad_norm": 0.02036859797483484, "learning_rate": 0.00018909705785389452, "loss": 0.4684, "step": 29240 }, { "epoch": 1.4847000291911514, "grad_norm": 0.02205258982868522, "learning_rate": 0.00018892359557214, "loss": 0.4725, "step": 29245 }, { "epoch": 1.4849538652891827, "grad_norm": 0.020563901133827663, "learning_rate": 0.00018875019435313255, "loss": 0.4731, "step": 29250 }, { "epoch": 1.4852077013872143, "grad_norm": 0.028685312406026363, "learning_rate": 0.0001885768542309096, "loss": 0.4728, "step": 29255 }, { "epoch": 1.4854615374852458, "grad_norm": 0.02454713847924293, "learning_rate": 0.0001884035752394971, "loss": 0.4763, "step": 29260 }, { "epoch": 1.4857153735832773, "grad_norm": 0.02365382394464072, "learning_rate": 0.000188230357412909, "loss": 0.4331, "step": 29265 }, { "epoch": 1.4859692096813089, "grad_norm": 0.021080550215192822, "learning_rate": 0.00018805720078514677, "loss": 0.4587, "step": 29270 }, { "epoch": 1.4862230457793402, "grad_norm": 0.022858405035791375, "learning_rate": 0.0001878841053902005, "loss": 0.4602, "step": 29275 }, { "epoch": 1.4864768818773717, "grad_norm": 0.026334553802682143, "learning_rate": 0.00018771107126204771, "loss": 0.4749, "step": 29280 }, { "epoch": 1.4867307179754032, "grad_norm": 0.024924349203167863, "learning_rate": 0.00018753809843465442, "loss": 0.4775, "step": 29285 }, { "epoch": 1.4869845540734348, "grad_norm": 0.031850516565785435, "learning_rate": 0.00018736518694197396, "loss": 0.4589, "step": 29290 }, { "epoch": 1.4872383901714663, "grad_norm": 0.028274802301810807, "learning_rate": 0.0001871923368179484, "loss": 0.4538, "step": 29295 }, { "epoch": 1.4874922262694978, "grad_norm": 0.021991555650211984, "learning_rate": 0.000187019548096507, "loss": 0.4906, "step": 29300 }, { "epoch": 1.4877460623675294, "grad_norm": 0.027104083276996482, "learning_rate": 0.00018684682081156762, "loss": 0.4922, "step": 29305 }, { "epoch": 1.487999898465561, "grad_norm": 0.02050509919684562, "learning_rate": 0.00018667415499703545, "loss": 0.4614, "step": 29310 }, { "epoch": 1.4882537345635922, "grad_norm": 0.021216527675499326, "learning_rate": 0.00018650155068680407, "loss": 0.4525, "step": 29315 }, { "epoch": 1.4885075706616238, "grad_norm": 0.022730076917583136, "learning_rate": 0.00018632900791475492, "loss": 0.4685, "step": 29320 }, { "epoch": 1.4887614067596553, "grad_norm": 0.023575223517873986, "learning_rate": 0.0001861565267147574, "loss": 0.4376, "step": 29325 }, { "epoch": 1.4890152428576868, "grad_norm": 0.02277744320446659, "learning_rate": 0.0001859841071206684, "loss": 0.4398, "step": 29330 }, { "epoch": 1.4892690789557184, "grad_norm": 0.021608319308460473, "learning_rate": 0.0001858117491663333, "loss": 0.489, "step": 29335 }, { "epoch": 1.4895229150537497, "grad_norm": 0.020073745176878363, "learning_rate": 0.0001856394528855848, "loss": 0.4597, "step": 29340 }, { "epoch": 1.4897767511517812, "grad_norm": 0.022010323279037017, "learning_rate": 0.00018546721831224424, "loss": 0.4762, "step": 29345 }, { "epoch": 1.4900305872498127, "grad_norm": 0.02437069862496661, "learning_rate": 0.00018529504548011995, "loss": 0.4831, "step": 29350 }, { "epoch": 1.4902844233478443, "grad_norm": 0.021983892566894717, "learning_rate": 0.00018512293442300893, "loss": 0.4737, "step": 29355 }, { "epoch": 1.4905382594458758, "grad_norm": 0.03613783598950718, "learning_rate": 0.00018495088517469545, "loss": 0.4716, "step": 29360 }, { "epoch": 1.4907920955439073, "grad_norm": 0.02504114958866562, "learning_rate": 0.00018477889776895225, "loss": 0.453, "step": 29365 }, { "epoch": 1.4910459316419389, "grad_norm": 0.023647519356946433, "learning_rate": 0.0001846069722395392, "loss": 0.4599, "step": 29370 }, { "epoch": 1.4912997677399704, "grad_norm": 0.02647181372921064, "learning_rate": 0.00018443510862020467, "loss": 0.4836, "step": 29375 }, { "epoch": 1.4915536038380017, "grad_norm": 0.0222009899568302, "learning_rate": 0.0001842633069446848, "loss": 0.4592, "step": 29380 }, { "epoch": 1.4918074399360333, "grad_norm": 0.02282816694246597, "learning_rate": 0.00018409156724670295, "loss": 0.5007, "step": 29385 }, { "epoch": 1.4920612760340648, "grad_norm": 0.02159121780004676, "learning_rate": 0.00018391988955997126, "loss": 0.4567, "step": 29390 }, { "epoch": 1.4923151121320963, "grad_norm": 0.022260535759933305, "learning_rate": 0.00018374827391818877, "loss": 0.4663, "step": 29395 }, { "epoch": 1.4925689482301279, "grad_norm": 0.022850427122847246, "learning_rate": 0.00018357672035504313, "loss": 0.4874, "step": 29400 }, { "epoch": 1.4928227843281592, "grad_norm": 0.019789375321462733, "learning_rate": 0.00018340522890420907, "loss": 0.4172, "step": 29405 }, { "epoch": 1.4930766204261907, "grad_norm": 0.025613999903597914, "learning_rate": 0.00018323379959934993, "loss": 0.4852, "step": 29410 }, { "epoch": 1.4933304565242222, "grad_norm": 0.027238755553218558, "learning_rate": 0.0001830624324741161, "loss": 0.4733, "step": 29415 }, { "epoch": 1.4935842926222538, "grad_norm": 0.025748721906667212, "learning_rate": 0.00018289112756214633, "loss": 0.4633, "step": 29420 }, { "epoch": 1.4938381287202853, "grad_norm": 0.022524685723581257, "learning_rate": 0.0001827198848970666, "loss": 0.4573, "step": 29425 }, { "epoch": 1.4940919648183169, "grad_norm": 0.02722240287350484, "learning_rate": 0.00018254870451249138, "loss": 0.4754, "step": 29430 }, { "epoch": 1.4943458009163484, "grad_norm": 0.024092764906284942, "learning_rate": 0.000182377586442022, "loss": 0.4882, "step": 29435 }, { "epoch": 1.49459963701438, "grad_norm": 0.023881198609627088, "learning_rate": 0.00018220653071924876, "loss": 0.4624, "step": 29440 }, { "epoch": 1.4948534731124112, "grad_norm": 0.027775639716056417, "learning_rate": 0.0001820355373777486, "loss": 0.438, "step": 29445 }, { "epoch": 1.4951073092104428, "grad_norm": 0.028210032309733676, "learning_rate": 0.0001818646064510868, "loss": 0.485, "step": 29450 }, { "epoch": 1.4953611453084743, "grad_norm": 0.024018687894484327, "learning_rate": 0.00018169373797281618, "loss": 0.4624, "step": 29455 }, { "epoch": 1.4956149814065058, "grad_norm": 0.02242820966339362, "learning_rate": 0.0001815229319764775, "loss": 0.4483, "step": 29460 }, { "epoch": 1.4958688175045374, "grad_norm": 0.024756677687856363, "learning_rate": 0.00018135218849559887, "loss": 0.4973, "step": 29465 }, { "epoch": 1.4961226536025687, "grad_norm": 0.026600698958258437, "learning_rate": 0.00018118150756369673, "loss": 0.4575, "step": 29470 }, { "epoch": 1.4963764897006002, "grad_norm": 0.021303694946705722, "learning_rate": 0.00018101088921427456, "loss": 0.4561, "step": 29475 }, { "epoch": 1.4966303257986318, "grad_norm": 0.02617735975797147, "learning_rate": 0.00018084033348082418, "loss": 0.4593, "step": 29480 }, { "epoch": 1.4968841618966633, "grad_norm": 0.021397946565801608, "learning_rate": 0.00018066984039682456, "loss": 0.4405, "step": 29485 }, { "epoch": 1.4971379979946948, "grad_norm": 0.024482557930807926, "learning_rate": 0.00018049940999574288, "loss": 0.4788, "step": 29490 }, { "epoch": 1.4973918340927264, "grad_norm": 0.025781729735713393, "learning_rate": 0.00018032904231103354, "loss": 0.4797, "step": 29495 }, { "epoch": 1.497645670190758, "grad_norm": 0.01949959677459056, "learning_rate": 0.00018015873737613897, "loss": 0.4558, "step": 29500 }, { "epoch": 1.4978995062887894, "grad_norm": 0.022386748122245725, "learning_rate": 0.0001799884952244894, "loss": 0.4935, "step": 29505 }, { "epoch": 1.498153342386821, "grad_norm": 0.021221757126598573, "learning_rate": 0.00017981831588950216, "loss": 0.4462, "step": 29510 }, { "epoch": 1.4984071784848523, "grad_norm": 0.022333608494739168, "learning_rate": 0.00017964819940458293, "loss": 0.4621, "step": 29515 }, { "epoch": 1.4986610145828838, "grad_norm": 0.024666056084257013, "learning_rate": 0.00017947814580312438, "loss": 0.4549, "step": 29520 }, { "epoch": 1.4989148506809153, "grad_norm": 0.02090549438169576, "learning_rate": 0.00017930815511850757, "loss": 0.4689, "step": 29525 }, { "epoch": 1.4991686867789469, "grad_norm": 0.03565612070546386, "learning_rate": 0.00017913822738410042, "loss": 0.4928, "step": 29530 }, { "epoch": 1.4994225228769784, "grad_norm": 0.023113435548243647, "learning_rate": 0.00017896836263325928, "loss": 0.4889, "step": 29535 }, { "epoch": 1.4996763589750097, "grad_norm": 0.023728923010015324, "learning_rate": 0.0001787985608993274, "loss": 0.4956, "step": 29540 }, { "epoch": 1.4999301950730413, "grad_norm": 0.025393929885262224, "learning_rate": 0.00017862882221563635, "loss": 0.4454, "step": 29545 }, { "epoch": 1.5001840311710728, "grad_norm": 0.028049319465082344, "learning_rate": 0.00017845914661550466, "loss": 0.4871, "step": 29550 }, { "epoch": 1.5004378672691043, "grad_norm": 0.031110725136696264, "learning_rate": 0.00017828953413223897, "loss": 0.4707, "step": 29555 }, { "epoch": 1.5006917033671359, "grad_norm": 0.023067267396166907, "learning_rate": 0.00017811998479913337, "loss": 0.4852, "step": 29560 }, { "epoch": 1.5009455394651674, "grad_norm": 0.022599329066948542, "learning_rate": 0.0001779504986494697, "loss": 0.4629, "step": 29565 }, { "epoch": 1.501199375563199, "grad_norm": 0.024939817004019073, "learning_rate": 0.00017778107571651692, "loss": 0.486, "step": 29570 }, { "epoch": 1.5014532116612305, "grad_norm": 0.0293017112366802, "learning_rate": 0.00017761171603353226, "loss": 0.4563, "step": 29575 }, { "epoch": 1.501707047759262, "grad_norm": 0.029238115277750512, "learning_rate": 0.00017744241963375986, "loss": 0.4879, "step": 29580 }, { "epoch": 1.5019608838572933, "grad_norm": 0.02734864802953103, "learning_rate": 0.00017727318655043196, "loss": 0.4643, "step": 29585 }, { "epoch": 1.5022147199553249, "grad_norm": 0.023307569346119706, "learning_rate": 0.00017710401681676803, "loss": 0.4675, "step": 29590 }, { "epoch": 1.5024685560533564, "grad_norm": 0.03538931937461218, "learning_rate": 0.00017693491046597544, "loss": 0.4816, "step": 29595 }, { "epoch": 1.5027223921513877, "grad_norm": 0.02593067797767141, "learning_rate": 0.0001767658675312486, "loss": 0.4854, "step": 29600 }, { "epoch": 1.5029762282494192, "grad_norm": 0.038262040752877625, "learning_rate": 0.00017659688804577022, "loss": 0.4869, "step": 29605 }, { "epoch": 1.5032300643474508, "grad_norm": 0.03034656256994557, "learning_rate": 0.00017642797204270972, "loss": 0.4745, "step": 29610 }, { "epoch": 1.5034839004454823, "grad_norm": 0.02925654062689158, "learning_rate": 0.00017625911955522467, "loss": 0.4796, "step": 29615 }, { "epoch": 1.5037377365435138, "grad_norm": 0.35865233957359255, "learning_rate": 0.00017609033061646013, "loss": 0.4897, "step": 29620 }, { "epoch": 1.5039915726415454, "grad_norm": 0.024975856991066987, "learning_rate": 0.0001759216052595482, "loss": 0.4771, "step": 29625 }, { "epoch": 1.504245408739577, "grad_norm": 0.027727973964852774, "learning_rate": 0.00017575294351760912, "loss": 0.4449, "step": 29630 }, { "epoch": 1.5044992448376084, "grad_norm": 0.03183878239195353, "learning_rate": 0.00017558434542375002, "loss": 0.4803, "step": 29635 }, { "epoch": 1.50475308093564, "grad_norm": 0.021762613260108758, "learning_rate": 0.0001754158110110663, "loss": 0.4505, "step": 29640 }, { "epoch": 1.5050069170336715, "grad_norm": 0.02265057874561824, "learning_rate": 0.00017524734031263995, "loss": 0.4783, "step": 29645 }, { "epoch": 1.5052607531317028, "grad_norm": 0.024018928533084835, "learning_rate": 0.00017507893336154136, "loss": 0.4687, "step": 29650 }, { "epoch": 1.5055145892297344, "grad_norm": 0.025100700637922822, "learning_rate": 0.00017491059019082757, "loss": 0.4687, "step": 29655 }, { "epoch": 1.505768425327766, "grad_norm": 0.025226901808710973, "learning_rate": 0.00017474231083354386, "loss": 0.4664, "step": 29660 }, { "epoch": 1.5060222614257972, "grad_norm": 0.019733370421100523, "learning_rate": 0.00017457409532272233, "loss": 0.4593, "step": 29665 }, { "epoch": 1.5062760975238287, "grad_norm": 0.036127873751405706, "learning_rate": 0.00017440594369138318, "loss": 0.4612, "step": 29670 }, { "epoch": 1.5065299336218603, "grad_norm": 0.027988146551036864, "learning_rate": 0.00017423785597253322, "loss": 0.4712, "step": 29675 }, { "epoch": 1.5067837697198918, "grad_norm": 0.022722614037556717, "learning_rate": 0.00017406983219916784, "loss": 0.4818, "step": 29680 }, { "epoch": 1.5070376058179233, "grad_norm": 0.021733007504925076, "learning_rate": 0.00017390187240426885, "loss": 0.4636, "step": 29685 }, { "epoch": 1.5072914419159549, "grad_norm": 0.022966860647516654, "learning_rate": 0.00017373397662080625, "loss": 0.4507, "step": 29690 }, { "epoch": 1.5075452780139864, "grad_norm": 0.02313533592545971, "learning_rate": 0.0001735661448817368, "loss": 0.4839, "step": 29695 }, { "epoch": 1.507799114112018, "grad_norm": 0.02643057262905096, "learning_rate": 0.0001733983772200053, "loss": 0.4706, "step": 29700 }, { "epoch": 1.5080529502100495, "grad_norm": 0.021347818597730134, "learning_rate": 0.00017323067366854344, "loss": 0.481, "step": 29705 }, { "epoch": 1.508306786308081, "grad_norm": 0.02075350561553384, "learning_rate": 0.00017306303426027094, "loss": 0.5027, "step": 29710 }, { "epoch": 1.5085606224061123, "grad_norm": 0.044968810989294526, "learning_rate": 0.00017289545902809416, "loss": 0.4437, "step": 29715 }, { "epoch": 1.5088144585041439, "grad_norm": 0.02712313510234019, "learning_rate": 0.00017272794800490772, "loss": 0.4846, "step": 29720 }, { "epoch": 1.5090682946021754, "grad_norm": 0.030525162317820355, "learning_rate": 0.00017256050122359278, "loss": 0.4566, "step": 29725 }, { "epoch": 1.5093221307002067, "grad_norm": 0.023955850748629902, "learning_rate": 0.00017239311871701868, "loss": 0.453, "step": 29730 }, { "epoch": 1.5095759667982382, "grad_norm": 0.02040802810169976, "learning_rate": 0.00017222580051804147, "loss": 0.4381, "step": 29735 }, { "epoch": 1.5098298028962698, "grad_norm": 0.022363301802030486, "learning_rate": 0.000172058546659505, "loss": 0.4707, "step": 29740 }, { "epoch": 1.5100836389943013, "grad_norm": 0.020755527638949292, "learning_rate": 0.00017189135717424054, "loss": 0.4495, "step": 29745 }, { "epoch": 1.5103374750923328, "grad_norm": 0.022821381482327964, "learning_rate": 0.0001717242320950662, "loss": 0.4443, "step": 29750 }, { "epoch": 1.5105913111903644, "grad_norm": 0.02519971954586667, "learning_rate": 0.00017155717145478822, "loss": 0.4724, "step": 29755 }, { "epoch": 1.510845147288396, "grad_norm": 0.031004863317514574, "learning_rate": 0.00017139017528619932, "loss": 0.4501, "step": 29760 }, { "epoch": 1.5110989833864275, "grad_norm": 0.022610866791681877, "learning_rate": 0.0001712232436220804, "loss": 0.4789, "step": 29765 }, { "epoch": 1.511352819484459, "grad_norm": 0.027520059296275306, "learning_rate": 0.000171056376495199, "loss": 0.4893, "step": 29770 }, { "epoch": 1.5116066555824905, "grad_norm": 0.02082044707695213, "learning_rate": 0.00017088957393831066, "loss": 0.4471, "step": 29775 }, { "epoch": 1.5118604916805218, "grad_norm": 0.02144250442393683, "learning_rate": 0.0001707228359841575, "loss": 0.4981, "step": 29780 }, { "epoch": 1.5121143277785534, "grad_norm": 0.021818336086735435, "learning_rate": 0.0001705561626654697, "loss": 0.4511, "step": 29785 }, { "epoch": 1.512368163876585, "grad_norm": 0.023425182235900163, "learning_rate": 0.00017038955401496404, "loss": 0.4725, "step": 29790 }, { "epoch": 1.5126219999746164, "grad_norm": 0.0227877965017971, "learning_rate": 0.00017022301006534512, "loss": 0.4842, "step": 29795 }, { "epoch": 1.5128758360726478, "grad_norm": 0.02442465002826424, "learning_rate": 0.00017005653084930483, "loss": 0.485, "step": 29800 }, { "epoch": 1.5131296721706793, "grad_norm": 0.020723345058742938, "learning_rate": 0.00016989011639952222, "loss": 0.4655, "step": 29805 }, { "epoch": 1.5133835082687108, "grad_norm": 0.020479347433989627, "learning_rate": 0.00016972376674866336, "loss": 0.473, "step": 29810 }, { "epoch": 1.5136373443667424, "grad_norm": 0.02383837108874991, "learning_rate": 0.00016955748192938215, "loss": 0.4885, "step": 29815 }, { "epoch": 1.513891180464774, "grad_norm": 0.02189974689590864, "learning_rate": 0.00016939126197431916, "loss": 0.4831, "step": 29820 }, { "epoch": 1.5141450165628054, "grad_norm": 0.022326421553039454, "learning_rate": 0.00016922510691610288, "loss": 0.4914, "step": 29825 }, { "epoch": 1.514398852660837, "grad_norm": 0.025686926161214048, "learning_rate": 0.00016905901678734836, "loss": 0.4565, "step": 29830 }, { "epoch": 1.5146526887588685, "grad_norm": 0.02510185893652824, "learning_rate": 0.00016889299162065863, "loss": 0.4412, "step": 29835 }, { "epoch": 1.5149065248569, "grad_norm": 0.019743756304140575, "learning_rate": 0.00016872703144862322, "loss": 0.4503, "step": 29840 }, { "epoch": 1.5151603609549316, "grad_norm": 0.023099741532777593, "learning_rate": 0.0001685611363038197, "loss": 0.4646, "step": 29845 }, { "epoch": 1.5154141970529629, "grad_norm": 0.02167102495604313, "learning_rate": 0.000168395306218812, "loss": 0.4297, "step": 29850 }, { "epoch": 1.5156680331509944, "grad_norm": 0.021199600428507007, "learning_rate": 0.00016822954122615202, "loss": 0.5018, "step": 29855 }, { "epoch": 1.515921869249026, "grad_norm": 0.03525431444628682, "learning_rate": 0.0001680638413583787, "loss": 0.4627, "step": 29860 }, { "epoch": 1.5161757053470573, "grad_norm": 0.021847874025035, "learning_rate": 0.00016789820664801785, "loss": 0.4601, "step": 29865 }, { "epoch": 1.5164295414450888, "grad_norm": 0.023339409719603202, "learning_rate": 0.00016773263712758298, "loss": 0.4853, "step": 29870 }, { "epoch": 1.5166833775431203, "grad_norm": 0.028528438177320366, "learning_rate": 0.00016756713282957425, "loss": 0.4764, "step": 29875 }, { "epoch": 1.5169372136411519, "grad_norm": 0.02968109811483413, "learning_rate": 0.00016740169378647967, "loss": 0.4622, "step": 29880 }, { "epoch": 1.5171910497391834, "grad_norm": 0.020421612487212134, "learning_rate": 0.00016723632003077382, "loss": 0.4608, "step": 29885 }, { "epoch": 1.517444885837215, "grad_norm": 0.022429618618365205, "learning_rate": 0.000167071011594919, "loss": 0.4365, "step": 29890 }, { "epoch": 1.5176987219352465, "grad_norm": 0.022662076262398696, "learning_rate": 0.00016690576851136407, "loss": 0.4606, "step": 29895 }, { "epoch": 1.517952558033278, "grad_norm": 0.024997243449074575, "learning_rate": 0.00016674059081254588, "loss": 0.494, "step": 29900 }, { "epoch": 1.5182063941313095, "grad_norm": 0.02074484540640106, "learning_rate": 0.00016657547853088755, "loss": 0.4665, "step": 29905 }, { "epoch": 1.518460230229341, "grad_norm": 0.02155076942870292, "learning_rate": 0.00016641043169880016, "loss": 0.4733, "step": 29910 }, { "epoch": 1.5187140663273724, "grad_norm": 0.021665837981174028, "learning_rate": 0.00016624545034868126, "loss": 0.4754, "step": 29915 }, { "epoch": 1.518967902425404, "grad_norm": 0.021946502878706562, "learning_rate": 0.00016608053451291606, "loss": 0.4603, "step": 29920 }, { "epoch": 1.5192217385234354, "grad_norm": 0.021175079987024562, "learning_rate": 0.0001659156842238766, "loss": 0.4527, "step": 29925 }, { "epoch": 1.5194755746214668, "grad_norm": 0.0237496786913426, "learning_rate": 0.00016575089951392246, "loss": 0.4646, "step": 29930 }, { "epoch": 1.5197294107194983, "grad_norm": 0.023737688121588916, "learning_rate": 0.0001655861804153997, "loss": 0.4924, "step": 29935 }, { "epoch": 1.5199832468175298, "grad_norm": 0.021122130099155718, "learning_rate": 0.00016542152696064216, "loss": 0.4394, "step": 29940 }, { "epoch": 1.5202370829155614, "grad_norm": 0.02227799640020005, "learning_rate": 0.00016525693918197017, "loss": 0.4538, "step": 29945 }, { "epoch": 1.520490919013593, "grad_norm": 0.02210238996254023, "learning_rate": 0.00016509241711169182, "loss": 0.4354, "step": 29950 }, { "epoch": 1.5207447551116244, "grad_norm": 0.023706052350875976, "learning_rate": 0.00016492796078210165, "loss": 0.4673, "step": 29955 }, { "epoch": 1.520998591209656, "grad_norm": 0.024032972742291287, "learning_rate": 0.00016476357022548194, "loss": 0.4675, "step": 29960 }, { "epoch": 1.5212524273076875, "grad_norm": 0.022993782010873133, "learning_rate": 0.0001645992454741016, "loss": 0.467, "step": 29965 }, { "epoch": 1.521506263405719, "grad_norm": 0.02190020611794009, "learning_rate": 0.0001644349865602165, "loss": 0.4702, "step": 29970 }, { "epoch": 1.5217600995037506, "grad_norm": 0.028275273809187658, "learning_rate": 0.00016427079351607031, "loss": 0.4871, "step": 29975 }, { "epoch": 1.5220139356017819, "grad_norm": 0.021445956898613706, "learning_rate": 0.00016410666637389272, "loss": 0.4798, "step": 29980 }, { "epoch": 1.5222677716998134, "grad_norm": 0.02438123499531666, "learning_rate": 0.00016394260516590175, "loss": 0.4827, "step": 29985 }, { "epoch": 1.522521607797845, "grad_norm": 0.022720865331639653, "learning_rate": 0.00016377860992430128, "loss": 0.4744, "step": 29990 }, { "epoch": 1.5227754438958763, "grad_norm": 0.022386692990421366, "learning_rate": 0.00016361468068128314, "loss": 0.487, "step": 29995 }, { "epoch": 1.5230292799939078, "grad_norm": 0.022169321428591356, "learning_rate": 0.00016345081746902546, "loss": 0.455, "step": 30000 }, { "epoch": 1.5232831160919393, "grad_norm": 0.023182632494306774, "learning_rate": 0.0001632870203196941, "loss": 0.4439, "step": 30005 }, { "epoch": 1.5235369521899709, "grad_norm": 0.02371609367353566, "learning_rate": 0.00016312328926544134, "loss": 0.4749, "step": 30010 }, { "epoch": 1.5237907882880024, "grad_norm": 0.03987679265817027, "learning_rate": 0.00016295962433840705, "loss": 0.4789, "step": 30015 }, { "epoch": 1.524044624386034, "grad_norm": 0.02301138402117863, "learning_rate": 0.0001627960255707175, "loss": 0.4964, "step": 30020 }, { "epoch": 1.5242984604840655, "grad_norm": 0.03265353278900402, "learning_rate": 0.0001626324929944867, "loss": 0.4898, "step": 30025 }, { "epoch": 1.524552296582097, "grad_norm": 0.022592094290072112, "learning_rate": 0.00016246902664181483, "loss": 0.4581, "step": 30030 }, { "epoch": 1.5248061326801285, "grad_norm": 0.022149041415701717, "learning_rate": 0.00016230562654478997, "loss": 0.501, "step": 30035 }, { "epoch": 1.52505996877816, "grad_norm": 0.027689045530166045, "learning_rate": 0.00016214229273548626, "loss": 0.4852, "step": 30040 }, { "epoch": 1.5253138048761914, "grad_norm": 0.021429952552435644, "learning_rate": 0.00016197902524596586, "loss": 0.4657, "step": 30045 }, { "epoch": 1.525567640974223, "grad_norm": 0.020301150588238986, "learning_rate": 0.0001618158241082771, "loss": 0.4545, "step": 30050 }, { "epoch": 1.5258214770722545, "grad_norm": 0.031778032501045626, "learning_rate": 0.00016165268935445544, "loss": 0.4439, "step": 30055 }, { "epoch": 1.526075313170286, "grad_norm": 0.035429628853980294, "learning_rate": 0.00016148962101652364, "loss": 0.4669, "step": 30060 }, { "epoch": 1.5263291492683173, "grad_norm": 0.02204480950917167, "learning_rate": 0.00016132661912649093, "loss": 0.4762, "step": 30065 }, { "epoch": 1.5265829853663488, "grad_norm": 0.10463008791075984, "learning_rate": 0.0001611636837163541, "loss": 0.4746, "step": 30070 }, { "epoch": 1.5268368214643804, "grad_norm": 0.022253586897096424, "learning_rate": 0.0001610008148180962, "loss": 0.4731, "step": 30075 }, { "epoch": 1.527090657562412, "grad_norm": 0.02649746556621996, "learning_rate": 0.0001608380124636879, "loss": 0.4716, "step": 30080 }, { "epoch": 1.5273444936604434, "grad_norm": 0.02103303584052773, "learning_rate": 0.00016067527668508624, "loss": 0.4537, "step": 30085 }, { "epoch": 1.527598329758475, "grad_norm": 0.022990234812475127, "learning_rate": 0.00016051260751423575, "loss": 0.4653, "step": 30090 }, { "epoch": 1.5278521658565065, "grad_norm": 0.023544638654335974, "learning_rate": 0.00016035000498306712, "loss": 0.4698, "step": 30095 }, { "epoch": 1.528106001954538, "grad_norm": 0.02200543924407069, "learning_rate": 0.00016018746912349873, "loss": 0.4672, "step": 30100 }, { "epoch": 1.5283598380525696, "grad_norm": 0.024788258547969233, "learning_rate": 0.00016002499996743553, "loss": 0.4542, "step": 30105 }, { "epoch": 1.5286136741506011, "grad_norm": 0.020208891949950916, "learning_rate": 0.00015986259754676956, "loss": 0.4441, "step": 30110 }, { "epoch": 1.5288675102486324, "grad_norm": 0.020897749391169118, "learning_rate": 0.00015970026189337922, "loss": 0.4426, "step": 30115 }, { "epoch": 1.529121346346664, "grad_norm": 0.029396390617214736, "learning_rate": 0.00015953799303913057, "loss": 0.4663, "step": 30120 }, { "epoch": 1.5293751824446955, "grad_norm": 0.022145565505644067, "learning_rate": 0.0001593757910158759, "loss": 0.461, "step": 30125 }, { "epoch": 1.5296290185427268, "grad_norm": 0.025670850634170336, "learning_rate": 0.00015921365585545483, "loss": 0.4842, "step": 30130 }, { "epoch": 1.5298828546407583, "grad_norm": 0.02368956140515389, "learning_rate": 0.00015905158758969351, "loss": 0.47, "step": 30135 }, { "epoch": 1.5301366907387899, "grad_norm": 0.029291043930741913, "learning_rate": 0.0001588895862504054, "loss": 0.4698, "step": 30140 }, { "epoch": 1.5303905268368214, "grad_norm": 0.025337071026864683, "learning_rate": 0.00015872765186939025, "loss": 0.4856, "step": 30145 }, { "epoch": 1.530644362934853, "grad_norm": 0.026591156717338086, "learning_rate": 0.00015856578447843523, "loss": 0.4747, "step": 30150 }, { "epoch": 1.5308981990328845, "grad_norm": 0.022063128074845867, "learning_rate": 0.0001584039841093139, "loss": 0.4591, "step": 30155 }, { "epoch": 1.531152035130916, "grad_norm": 0.021335741188654174, "learning_rate": 0.00015824225079378684, "loss": 0.4508, "step": 30160 }, { "epoch": 1.5314058712289476, "grad_norm": 0.0223720420156148, "learning_rate": 0.00015808058456360185, "loss": 0.476, "step": 30165 }, { "epoch": 1.531659707326979, "grad_norm": 0.020840588861711914, "learning_rate": 0.00015791898545049277, "loss": 0.4812, "step": 30170 }, { "epoch": 1.5319135434250106, "grad_norm": 0.022298891763324967, "learning_rate": 0.0001577574534861811, "loss": 0.4683, "step": 30175 }, { "epoch": 1.532167379523042, "grad_norm": 0.023587454451723652, "learning_rate": 0.00015759598870237435, "loss": 0.4414, "step": 30180 }, { "epoch": 1.5324212156210735, "grad_norm": 0.021926001890396517, "learning_rate": 0.00015743459113076757, "loss": 0.4848, "step": 30185 }, { "epoch": 1.532675051719105, "grad_norm": 0.02060956296914585, "learning_rate": 0.0001572732608030421, "loss": 0.4925, "step": 30190 }, { "epoch": 1.5329288878171363, "grad_norm": 0.023321532056538852, "learning_rate": 0.0001571119977508665, "loss": 0.4832, "step": 30195 }, { "epoch": 1.5331827239151679, "grad_norm": 0.020455538805839175, "learning_rate": 0.00015695080200589555, "loss": 0.46, "step": 30200 }, { "epoch": 1.5334365600131994, "grad_norm": 0.027072650225216113, "learning_rate": 0.0001567896735997716, "loss": 0.5015, "step": 30205 }, { "epoch": 1.533690396111231, "grad_norm": 0.025146547764616688, "learning_rate": 0.00015662861256412293, "loss": 0.4592, "step": 30210 }, { "epoch": 1.5339442322092625, "grad_norm": 0.036222624314755626, "learning_rate": 0.0001564676189305654, "loss": 0.4494, "step": 30215 }, { "epoch": 1.534198068307294, "grad_norm": 0.021401020225457784, "learning_rate": 0.00015630669273070075, "loss": 0.4658, "step": 30220 }, { "epoch": 1.5344519044053255, "grad_norm": 0.030566491748286028, "learning_rate": 0.00015614583399611864, "loss": 0.4616, "step": 30225 }, { "epoch": 1.534705740503357, "grad_norm": 0.02095090812110531, "learning_rate": 0.00015598504275839443, "loss": 0.4697, "step": 30230 }, { "epoch": 1.5349595766013886, "grad_norm": 0.022932148457458463, "learning_rate": 0.00015582431904909082, "loss": 0.4771, "step": 30235 }, { "epoch": 1.5352134126994201, "grad_norm": 0.03407518278595503, "learning_rate": 0.00015566366289975682, "loss": 0.4951, "step": 30240 }, { "epoch": 1.5354672487974514, "grad_norm": 0.025253916128979353, "learning_rate": 0.00015550307434192878, "loss": 0.4665, "step": 30245 }, { "epoch": 1.535721084895483, "grad_norm": 0.02181284888478828, "learning_rate": 0.00015534255340712906, "loss": 0.4712, "step": 30250 }, { "epoch": 1.5359749209935145, "grad_norm": 0.026511357983416556, "learning_rate": 0.00015518210012686746, "loss": 0.457, "step": 30255 }, { "epoch": 1.5362287570915458, "grad_norm": 0.02225675314670062, "learning_rate": 0.00015502171453263985, "loss": 0.4655, "step": 30260 }, { "epoch": 1.5364825931895774, "grad_norm": 0.02579060282785478, "learning_rate": 0.0001548613966559294, "loss": 0.4537, "step": 30265 }, { "epoch": 1.536736429287609, "grad_norm": 0.022733734170467795, "learning_rate": 0.00015470114652820548, "loss": 0.4893, "step": 30270 }, { "epoch": 1.5369902653856404, "grad_norm": 0.033369886202445666, "learning_rate": 0.0001545409641809246, "loss": 0.4329, "step": 30275 }, { "epoch": 1.537244101483672, "grad_norm": 0.02399529836464429, "learning_rate": 0.00015438084964552952, "loss": 0.4608, "step": 30280 }, { "epoch": 1.5374979375817035, "grad_norm": 0.026731534341703136, "learning_rate": 0.0001542208029534501, "loss": 0.4834, "step": 30285 }, { "epoch": 1.537751773679735, "grad_norm": 0.02160422482777416, "learning_rate": 0.00015406082413610273, "loss": 0.4872, "step": 30290 }, { "epoch": 1.5380056097777666, "grad_norm": 0.0316642181786694, "learning_rate": 0.0001539009132248903, "loss": 0.4739, "step": 30295 }, { "epoch": 1.538259445875798, "grad_norm": 0.021390203234012684, "learning_rate": 0.0001537410702512027, "loss": 0.485, "step": 30300 }, { "epoch": 1.5385132819738296, "grad_norm": 0.021910472645039536, "learning_rate": 0.00015358129524641612, "loss": 0.4836, "step": 30305 }, { "epoch": 1.538767118071861, "grad_norm": 0.026086465805670835, "learning_rate": 0.00015342158824189383, "loss": 0.4645, "step": 30310 }, { "epoch": 1.5390209541698925, "grad_norm": 0.02206117837334602, "learning_rate": 0.00015326194926898524, "loss": 0.4429, "step": 30315 }, { "epoch": 1.539274790267924, "grad_norm": 0.04439473835947078, "learning_rate": 0.00015310237835902696, "loss": 0.4955, "step": 30320 }, { "epoch": 1.5395286263659556, "grad_norm": 0.022276905328733724, "learning_rate": 0.0001529428755433417, "loss": 0.4728, "step": 30325 }, { "epoch": 1.5397824624639869, "grad_norm": 0.022985726029175126, "learning_rate": 0.00015278344085323936, "loss": 0.4855, "step": 30330 }, { "epoch": 1.5400362985620184, "grad_norm": 0.02518413121712412, "learning_rate": 0.00015262407432001585, "loss": 0.4615, "step": 30335 }, { "epoch": 1.54029013466005, "grad_norm": 0.02425031078779939, "learning_rate": 0.00015246477597495418, "loss": 0.4743, "step": 30340 }, { "epoch": 1.5405439707580815, "grad_norm": 0.02735453587465954, "learning_rate": 0.00015230554584932382, "loss": 0.5191, "step": 30345 }, { "epoch": 1.540797806856113, "grad_norm": 0.019919301710259858, "learning_rate": 0.00015214638397438108, "loss": 0.4306, "step": 30350 }, { "epoch": 1.5410516429541445, "grad_norm": 0.0254003448653861, "learning_rate": 0.00015198729038136822, "loss": 0.4859, "step": 30355 }, { "epoch": 1.541305479052176, "grad_norm": 0.025573678483990125, "learning_rate": 0.00015182826510151486, "loss": 0.4723, "step": 30360 }, { "epoch": 1.5415593151502076, "grad_norm": 0.027652411805370227, "learning_rate": 0.00015166930816603658, "loss": 0.455, "step": 30365 }, { "epoch": 1.5418131512482391, "grad_norm": 0.020242159971136965, "learning_rate": 0.00015151041960613615, "loss": 0.4484, "step": 30370 }, { "epoch": 1.5420669873462707, "grad_norm": 0.020441851093310592, "learning_rate": 0.0001513515994530023, "loss": 0.4777, "step": 30375 }, { "epoch": 1.542320823444302, "grad_norm": 0.02264328559702477, "learning_rate": 0.00015119284773781088, "loss": 0.4692, "step": 30380 }, { "epoch": 1.5425746595423335, "grad_norm": 0.026801259654149444, "learning_rate": 0.00015103416449172385, "loss": 0.4879, "step": 30385 }, { "epoch": 1.542828495640365, "grad_norm": 0.029775650566010867, "learning_rate": 0.0001508755497458902, "loss": 0.4733, "step": 30390 }, { "epoch": 1.5430823317383964, "grad_norm": 0.023702035657112406, "learning_rate": 0.00015071700353144486, "loss": 0.4844, "step": 30395 }, { "epoch": 1.543336167836428, "grad_norm": 0.02932424299604861, "learning_rate": 0.00015055852587950985, "loss": 0.4498, "step": 30400 }, { "epoch": 1.5435900039344594, "grad_norm": 0.024319591793915026, "learning_rate": 0.0001504001168211937, "loss": 0.485, "step": 30405 }, { "epoch": 1.543843840032491, "grad_norm": 0.02366358061890231, "learning_rate": 0.00015024177638759106, "loss": 0.4566, "step": 30410 }, { "epoch": 1.5440976761305225, "grad_norm": 0.024118758618433097, "learning_rate": 0.00015008350460978358, "loss": 0.4397, "step": 30415 }, { "epoch": 1.544351512228554, "grad_norm": 0.020666341449150978, "learning_rate": 0.00014992530151883898, "loss": 0.4599, "step": 30420 }, { "epoch": 1.5446053483265856, "grad_norm": 0.02778511686784231, "learning_rate": 0.000149767167145812, "loss": 0.454, "step": 30425 }, { "epoch": 1.5448591844246171, "grad_norm": 0.02389674933505474, "learning_rate": 0.0001496091015217434, "loss": 0.463, "step": 30430 }, { "epoch": 1.5451130205226487, "grad_norm": 0.027551341882567568, "learning_rate": 0.00014945110467766087, "loss": 0.4622, "step": 30435 }, { "epoch": 1.5453668566206802, "grad_norm": 0.02501687597581702, "learning_rate": 0.0001492931766445782, "loss": 0.4764, "step": 30440 }, { "epoch": 1.5456206927187115, "grad_norm": 0.0233086153289092, "learning_rate": 0.0001491353174534961, "loss": 0.4686, "step": 30445 }, { "epoch": 1.545874528816743, "grad_norm": 0.02184154260928588, "learning_rate": 0.0001489775271354013, "loss": 0.4702, "step": 30450 }, { "epoch": 1.5461283649147746, "grad_norm": 0.024678224162576597, "learning_rate": 0.00014881980572126752, "loss": 0.483, "step": 30455 }, { "epoch": 1.5463822010128059, "grad_norm": 0.023783148168890343, "learning_rate": 0.00014866215324205423, "loss": 0.4683, "step": 30460 }, { "epoch": 1.5466360371108374, "grad_norm": 0.022198329720484546, "learning_rate": 0.00014850456972870845, "loss": 0.458, "step": 30465 }, { "epoch": 1.546889873208869, "grad_norm": 0.02239881749730953, "learning_rate": 0.00014834705521216262, "loss": 0.4715, "step": 30470 }, { "epoch": 1.5471437093069005, "grad_norm": 0.022857676745027398, "learning_rate": 0.0001481896097233363, "loss": 0.4553, "step": 30475 }, { "epoch": 1.547397545404932, "grad_norm": 0.021961872351366773, "learning_rate": 0.00014803223329313493, "loss": 0.458, "step": 30480 }, { "epoch": 1.5476513815029636, "grad_norm": 0.025052913119013042, "learning_rate": 0.00014787492595245107, "loss": 0.4809, "step": 30485 }, { "epoch": 1.547905217600995, "grad_norm": 0.023440120967819812, "learning_rate": 0.00014771768773216298, "loss": 0.4511, "step": 30490 }, { "epoch": 1.5481590536990266, "grad_norm": 0.021273780361796304, "learning_rate": 0.00014756051866313618, "loss": 0.4716, "step": 30495 }, { "epoch": 1.5484128897970582, "grad_norm": 0.17776653515516164, "learning_rate": 0.00014740341877622181, "loss": 0.4469, "step": 30500 }, { "epoch": 1.5486667258950897, "grad_norm": 0.028505550422634855, "learning_rate": 0.0001472463881022581, "loss": 0.4391, "step": 30505 }, { "epoch": 1.548920561993121, "grad_norm": 0.021992503155247888, "learning_rate": 0.00014708942667206903, "loss": 0.4806, "step": 30510 }, { "epoch": 1.5491743980911525, "grad_norm": 0.022741796670375448, "learning_rate": 0.0001469325345164657, "loss": 0.4632, "step": 30515 }, { "epoch": 1.549428234189184, "grad_norm": 0.020106646227831544, "learning_rate": 0.00014677571166624498, "loss": 0.4321, "step": 30520 }, { "epoch": 1.5496820702872154, "grad_norm": 0.02404819367108056, "learning_rate": 0.0001466189581521905, "loss": 0.5009, "step": 30525 }, { "epoch": 1.549935906385247, "grad_norm": 0.020931172832830407, "learning_rate": 0.00014646227400507238, "loss": 0.4553, "step": 30530 }, { "epoch": 1.5501897424832785, "grad_norm": 0.021373194488072105, "learning_rate": 0.00014630565925564666, "loss": 0.4716, "step": 30535 }, { "epoch": 1.55044357858131, "grad_norm": 0.0197633540678443, "learning_rate": 0.0001461491139346563, "loss": 0.4438, "step": 30540 }, { "epoch": 1.5506974146793415, "grad_norm": 0.02187526961268043, "learning_rate": 0.00014599263807283004, "loss": 0.475, "step": 30545 }, { "epoch": 1.550951250777373, "grad_norm": 0.026271464926687128, "learning_rate": 0.00014583623170088368, "loss": 0.4536, "step": 30550 }, { "epoch": 1.5512050868754046, "grad_norm": 0.02494946006654002, "learning_rate": 0.00014567989484951866, "loss": 0.4895, "step": 30555 }, { "epoch": 1.5514589229734361, "grad_norm": 0.021477409162667542, "learning_rate": 0.00014552362754942345, "loss": 0.4711, "step": 30560 }, { "epoch": 1.5517127590714677, "grad_norm": 0.02423951069240377, "learning_rate": 0.00014536742983127222, "loss": 0.4594, "step": 30565 }, { "epoch": 1.5519665951694992, "grad_norm": 0.022676565651745723, "learning_rate": 0.0001452113017257261, "loss": 0.4663, "step": 30570 }, { "epoch": 1.5522204312675305, "grad_norm": 0.02291640098589712, "learning_rate": 0.000145055243263432, "loss": 0.4895, "step": 30575 }, { "epoch": 1.552474267365562, "grad_norm": 0.028475726341260384, "learning_rate": 0.0001448992544750235, "loss": 0.4935, "step": 30580 }, { "epoch": 1.5527281034635936, "grad_norm": 0.027950998531424066, "learning_rate": 0.0001447433353911205, "loss": 0.4825, "step": 30585 }, { "epoch": 1.5529819395616251, "grad_norm": 0.03974053562564052, "learning_rate": 0.00014458748604232924, "loss": 0.4758, "step": 30590 }, { "epoch": 1.5532357756596564, "grad_norm": 0.022027661241062403, "learning_rate": 0.00014443170645924192, "loss": 0.4816, "step": 30595 }, { "epoch": 1.553489611757688, "grad_norm": 0.02238767269393634, "learning_rate": 0.0001442759966724375, "loss": 0.4531, "step": 30600 }, { "epoch": 1.5537434478557195, "grad_norm": 0.026046907786261127, "learning_rate": 0.0001441203567124808, "loss": 0.459, "step": 30605 }, { "epoch": 1.553997283953751, "grad_norm": 0.023211596978539077, "learning_rate": 0.00014396478660992353, "loss": 0.4597, "step": 30610 }, { "epoch": 1.5542511200517826, "grad_norm": 0.02283234851673069, "learning_rate": 0.00014380928639530282, "loss": 0.4729, "step": 30615 }, { "epoch": 1.554504956149814, "grad_norm": 0.022351336504833237, "learning_rate": 0.00014365385609914312, "loss": 0.4719, "step": 30620 }, { "epoch": 1.5547587922478456, "grad_norm": 0.022992716838049543, "learning_rate": 0.00014349849575195423, "loss": 0.444, "step": 30625 }, { "epoch": 1.5550126283458772, "grad_norm": 0.024021395143192816, "learning_rate": 0.00014334320538423285, "loss": 0.4633, "step": 30630 }, { "epoch": 1.5552664644439087, "grad_norm": 0.023643090769376898, "learning_rate": 0.00014318798502646146, "loss": 0.4528, "step": 30635 }, { "epoch": 1.55552030054194, "grad_norm": 0.01987202638142388, "learning_rate": 0.00014303283470910923, "loss": 0.4648, "step": 30640 }, { "epoch": 1.5557741366399715, "grad_norm": 0.022557897730457903, "learning_rate": 0.00014287775446263147, "loss": 0.4736, "step": 30645 }, { "epoch": 1.556027972738003, "grad_norm": 0.045642258711147084, "learning_rate": 0.0001427227443174694, "loss": 0.4524, "step": 30650 }, { "epoch": 1.5562818088360346, "grad_norm": 0.02528878503814024, "learning_rate": 0.00014256780430405103, "loss": 0.4454, "step": 30655 }, { "epoch": 1.556535644934066, "grad_norm": 0.026569740501743554, "learning_rate": 0.00014241293445279, "loss": 0.4783, "step": 30660 }, { "epoch": 1.5567894810320975, "grad_norm": 0.021501409027796143, "learning_rate": 0.00014225813479408684, "loss": 0.4639, "step": 30665 }, { "epoch": 1.557043317130129, "grad_norm": 0.02015158186890378, "learning_rate": 0.0001421034053583276, "loss": 0.452, "step": 30670 }, { "epoch": 1.5572971532281605, "grad_norm": 0.02290562338028314, "learning_rate": 0.00014194874617588522, "loss": 0.4564, "step": 30675 }, { "epoch": 1.557550989326192, "grad_norm": 0.021262535192917962, "learning_rate": 0.0001417941572771182, "loss": 0.4404, "step": 30680 }, { "epoch": 1.5578048254242236, "grad_norm": 0.023855140132822632, "learning_rate": 0.0001416396386923719, "loss": 0.4657, "step": 30685 }, { "epoch": 1.5580586615222551, "grad_norm": 0.026001147130948372, "learning_rate": 0.00014148519045197722, "loss": 0.4983, "step": 30690 }, { "epoch": 1.5583124976202867, "grad_norm": 0.025029246692496336, "learning_rate": 0.00014133081258625192, "loss": 0.4728, "step": 30695 }, { "epoch": 1.5585663337183182, "grad_norm": 0.02861011211966918, "learning_rate": 0.00014117650512549912, "loss": 0.4667, "step": 30700 }, { "epoch": 1.5588201698163497, "grad_norm": 0.020989857959052866, "learning_rate": 0.00014102226810000919, "loss": 0.4785, "step": 30705 }, { "epoch": 1.559074005914381, "grad_norm": 0.02520491934250732, "learning_rate": 0.0001408681015400577, "loss": 0.4715, "step": 30710 }, { "epoch": 1.5593278420124126, "grad_norm": 0.02884493504793044, "learning_rate": 0.000140714005475907, "loss": 0.4626, "step": 30715 }, { "epoch": 1.5595816781104441, "grad_norm": 0.023290080100767288, "learning_rate": 0.00014055997993780512, "loss": 0.4761, "step": 30720 }, { "epoch": 1.5598355142084754, "grad_norm": 0.024842802813832925, "learning_rate": 0.0001404060249559868, "loss": 0.4583, "step": 30725 }, { "epoch": 1.560089350306507, "grad_norm": 0.02281559318997292, "learning_rate": 0.00014025214056067237, "loss": 0.4806, "step": 30730 }, { "epoch": 1.5603431864045385, "grad_norm": 0.033399438534170926, "learning_rate": 0.00014009832678206887, "loss": 0.4709, "step": 30735 }, { "epoch": 1.56059702250257, "grad_norm": 0.028247300578566486, "learning_rate": 0.00013994458365036879, "loss": 0.4727, "step": 30740 }, { "epoch": 1.5608508586006016, "grad_norm": 0.02286113664149876, "learning_rate": 0.0001397909111957515, "loss": 0.4916, "step": 30745 }, { "epoch": 1.561104694698633, "grad_norm": 0.02910635163937602, "learning_rate": 0.00013963730944838181, "loss": 0.4586, "step": 30750 }, { "epoch": 1.5613585307966646, "grad_norm": 0.028491477035486, "learning_rate": 0.00013948377843841137, "loss": 0.4695, "step": 30755 }, { "epoch": 1.5616123668946962, "grad_norm": 0.0224890085130081, "learning_rate": 0.00013933031819597714, "loss": 0.4666, "step": 30760 }, { "epoch": 1.5618662029927277, "grad_norm": 0.019158984174170664, "learning_rate": 0.00013917692875120276, "loss": 0.4534, "step": 30765 }, { "epoch": 1.5621200390907592, "grad_norm": 0.023081415384837618, "learning_rate": 0.00013902361013419807, "loss": 0.4889, "step": 30770 }, { "epoch": 1.5623738751887906, "grad_norm": 0.023710781179949143, "learning_rate": 0.0001388703623750583, "loss": 0.4508, "step": 30775 }, { "epoch": 1.562627711286822, "grad_norm": 0.02229210338649228, "learning_rate": 0.00013871718550386564, "loss": 0.4581, "step": 30780 }, { "epoch": 1.5628815473848536, "grad_norm": 0.02255486124550216, "learning_rate": 0.00013856407955068755, "loss": 0.468, "step": 30785 }, { "epoch": 1.563135383482885, "grad_norm": 0.028951039267342597, "learning_rate": 0.0001384110445455784, "loss": 0.4481, "step": 30790 }, { "epoch": 1.5633892195809165, "grad_norm": 0.03206682793554021, "learning_rate": 0.00013825808051857774, "loss": 0.4628, "step": 30795 }, { "epoch": 1.563643055678948, "grad_norm": 0.03315618731467156, "learning_rate": 0.00013810518749971207, "loss": 0.4913, "step": 30800 }, { "epoch": 1.5638968917769795, "grad_norm": 0.020986799281081246, "learning_rate": 0.00013795236551899316, "loss": 0.4788, "step": 30805 }, { "epoch": 1.564150727875011, "grad_norm": 0.027439058381662006, "learning_rate": 0.0001377996146064195, "loss": 0.5025, "step": 30810 }, { "epoch": 1.5644045639730426, "grad_norm": 0.03575554930996507, "learning_rate": 0.00013764693479197503, "loss": 0.4559, "step": 30815 }, { "epoch": 1.5646584000710742, "grad_norm": 0.022347257231482485, "learning_rate": 0.00013749432610563045, "loss": 0.464, "step": 30820 }, { "epoch": 1.5649122361691057, "grad_norm": 0.022141752765691802, "learning_rate": 0.00013734178857734147, "loss": 0.4646, "step": 30825 }, { "epoch": 1.5651660722671372, "grad_norm": 0.0205841238198598, "learning_rate": 0.0001371893222370511, "loss": 0.4786, "step": 30830 }, { "epoch": 1.5654199083651688, "grad_norm": 0.02330750400737154, "learning_rate": 0.00013703692711468734, "loss": 0.479, "step": 30835 }, { "epoch": 1.5656737444632, "grad_norm": 0.02083094398512009, "learning_rate": 0.00013688460324016484, "loss": 0.4422, "step": 30840 }, { "epoch": 1.5659275805612316, "grad_norm": 0.02046271028364385, "learning_rate": 0.00013673235064338375, "loss": 0.4481, "step": 30845 }, { "epoch": 1.5661814166592631, "grad_norm": 0.020193980043060795, "learning_rate": 0.00013658016935423067, "loss": 0.4385, "step": 30850 }, { "epoch": 1.5664352527572944, "grad_norm": 0.023275239337087233, "learning_rate": 0.0001364280594025779, "loss": 0.4721, "step": 30855 }, { "epoch": 1.566689088855326, "grad_norm": 0.021162190591902365, "learning_rate": 0.00013627602081828412, "loss": 0.4614, "step": 30860 }, { "epoch": 1.5669429249533575, "grad_norm": 0.020039658211474635, "learning_rate": 0.00013612405363119334, "loss": 0.4461, "step": 30865 }, { "epoch": 1.567196761051389, "grad_norm": 0.023617453564502285, "learning_rate": 0.00013597215787113638, "loss": 0.4713, "step": 30870 }, { "epoch": 1.5674505971494206, "grad_norm": 0.025261328277977876, "learning_rate": 0.00013582033356792923, "loss": 0.464, "step": 30875 }, { "epoch": 1.5677044332474521, "grad_norm": 0.022210536243253937, "learning_rate": 0.00013566858075137462, "loss": 0.4461, "step": 30880 }, { "epoch": 1.5679582693454837, "grad_norm": 0.02139787157750851, "learning_rate": 0.00013551689945126056, "loss": 0.4619, "step": 30885 }, { "epoch": 1.5682121054435152, "grad_norm": 0.02347373800866568, "learning_rate": 0.0001353652896973614, "loss": 0.4681, "step": 30890 }, { "epoch": 1.5684659415415467, "grad_norm": 0.027527125241354457, "learning_rate": 0.00013521375151943766, "loss": 0.4738, "step": 30895 }, { "epoch": 1.5687197776395783, "grad_norm": 0.023524626568918166, "learning_rate": 0.0001350622849472351, "loss": 0.4821, "step": 30900 }, { "epoch": 1.5689736137376096, "grad_norm": 0.02466639732929348, "learning_rate": 0.00013491089001048628, "loss": 0.4721, "step": 30905 }, { "epoch": 1.569227449835641, "grad_norm": 0.027029307313648266, "learning_rate": 0.00013475956673890887, "loss": 0.4678, "step": 30910 }, { "epoch": 1.5694812859336726, "grad_norm": 0.03282760997469555, "learning_rate": 0.0001346083151622072, "loss": 0.4394, "step": 30915 }, { "epoch": 1.5697351220317042, "grad_norm": 0.02789891853127172, "learning_rate": 0.00013445713531007092, "loss": 0.4628, "step": 30920 }, { "epoch": 1.5699889581297355, "grad_norm": 0.02426468888804551, "learning_rate": 0.00013430602721217617, "loss": 0.48, "step": 30925 }, { "epoch": 1.570242794227767, "grad_norm": 0.021681960512083976, "learning_rate": 0.0001341549908981844, "loss": 0.4885, "step": 30930 }, { "epoch": 1.5704966303257986, "grad_norm": 0.022832994313996206, "learning_rate": 0.00013400402639774362, "loss": 0.4431, "step": 30935 }, { "epoch": 1.57075046642383, "grad_norm": 0.024825658736222323, "learning_rate": 0.00013385313374048708, "loss": 0.4614, "step": 30940 }, { "epoch": 1.5710043025218616, "grad_norm": 0.021382123283392958, "learning_rate": 0.0001337023129560344, "loss": 0.4473, "step": 30945 }, { "epoch": 1.5712581386198932, "grad_norm": 0.022778600002653724, "learning_rate": 0.000133551564073991, "loss": 0.4876, "step": 30950 }, { "epoch": 1.5715119747179247, "grad_norm": 0.022378373834901225, "learning_rate": 0.0001334008871239482, "loss": 0.4689, "step": 30955 }, { "epoch": 1.5717658108159562, "grad_norm": 0.022126622701371928, "learning_rate": 0.0001332502821354829, "loss": 0.4686, "step": 30960 }, { "epoch": 1.5720196469139878, "grad_norm": 0.02328246824343477, "learning_rate": 0.00013309974913815843, "loss": 0.49, "step": 30965 }, { "epoch": 1.5722734830120193, "grad_norm": 0.022513981649263024, "learning_rate": 0.0001329492881615233, "loss": 0.4663, "step": 30970 }, { "epoch": 1.5725273191100506, "grad_norm": 0.02648229761719871, "learning_rate": 0.00013279889923511256, "loss": 0.4824, "step": 30975 }, { "epoch": 1.5727811552080821, "grad_norm": 0.031697592758533163, "learning_rate": 0.00013264858238844652, "loss": 0.4597, "step": 30980 }, { "epoch": 1.5730349913061137, "grad_norm": 0.023692598750396664, "learning_rate": 0.0001324983376510319, "loss": 0.4792, "step": 30985 }, { "epoch": 1.573288827404145, "grad_norm": 0.02253078704005018, "learning_rate": 0.0001323481650523608, "loss": 0.4632, "step": 30990 }, { "epoch": 1.5735426635021765, "grad_norm": 0.023678013208962723, "learning_rate": 0.00013219806462191154, "loss": 0.4776, "step": 30995 }, { "epoch": 1.573796499600208, "grad_norm": 0.022573287303128707, "learning_rate": 0.00013204803638914791, "loss": 0.5042, "step": 31000 }, { "epoch": 1.5740503356982396, "grad_norm": 0.022353046325857568, "learning_rate": 0.00013189808038351953, "loss": 0.468, "step": 31005 }, { "epoch": 1.5743041717962711, "grad_norm": 0.019324948043265216, "learning_rate": 0.00013174819663446254, "loss": 0.4637, "step": 31010 }, { "epoch": 1.5745580078943027, "grad_norm": 0.02228322129344453, "learning_rate": 0.00013159838517139795, "loss": 0.4464, "step": 31015 }, { "epoch": 1.5748118439923342, "grad_norm": 0.020602212019198058, "learning_rate": 0.00013144864602373325, "loss": 0.4768, "step": 31020 }, { "epoch": 1.5750656800903657, "grad_norm": 0.021221567403693256, "learning_rate": 0.0001312989792208612, "loss": 0.4386, "step": 31025 }, { "epoch": 1.5753195161883973, "grad_norm": 0.022718156061845098, "learning_rate": 0.00013114938479216105, "loss": 0.4555, "step": 31030 }, { "epoch": 1.5755733522864288, "grad_norm": 0.026040312386550257, "learning_rate": 0.000130999862766997, "loss": 0.4637, "step": 31035 }, { "epoch": 1.5758271883844601, "grad_norm": 0.025384578710037565, "learning_rate": 0.00013085041317471984, "loss": 0.4709, "step": 31040 }, { "epoch": 1.5760810244824917, "grad_norm": 0.019114058763165305, "learning_rate": 0.00013070103604466548, "loss": 0.43, "step": 31045 }, { "epoch": 1.5763348605805232, "grad_norm": 0.02267175025832458, "learning_rate": 0.00013055173140615623, "loss": 0.4853, "step": 31050 }, { "epoch": 1.5765886966785545, "grad_norm": 0.02231164473017753, "learning_rate": 0.00013040249928849952, "loss": 0.4755, "step": 31055 }, { "epoch": 1.576842532776586, "grad_norm": 0.025173835816743373, "learning_rate": 0.00013025333972098912, "loss": 0.4666, "step": 31060 }, { "epoch": 1.5770963688746176, "grad_norm": 0.022930909710894763, "learning_rate": 0.00013010425273290394, "loss": 0.4614, "step": 31065 }, { "epoch": 1.577350204972649, "grad_norm": 0.019310788047425583, "learning_rate": 0.00012995523835350958, "loss": 0.4661, "step": 31070 }, { "epoch": 1.5776040410706806, "grad_norm": 0.033091726187777254, "learning_rate": 0.0001298062966120564, "loss": 0.4882, "step": 31075 }, { "epoch": 1.5778578771687122, "grad_norm": 0.02326961130506038, "learning_rate": 0.00012965742753778115, "loss": 0.4549, "step": 31080 }, { "epoch": 1.5781117132667437, "grad_norm": 0.03266197407987793, "learning_rate": 0.00012950863115990602, "loss": 0.4458, "step": 31085 }, { "epoch": 1.5783655493647752, "grad_norm": 0.021876108652836432, "learning_rate": 0.00012935990750763876, "loss": 0.4695, "step": 31090 }, { "epoch": 1.5786193854628068, "grad_norm": 0.026393027361750386, "learning_rate": 0.00012921125661017347, "loss": 0.4486, "step": 31095 }, { "epoch": 1.5788732215608383, "grad_norm": 0.02184839617602591, "learning_rate": 0.0001290626784966892, "loss": 0.4432, "step": 31100 }, { "epoch": 1.5791270576588696, "grad_norm": 0.029331145194564957, "learning_rate": 0.00012891417319635146, "loss": 0.4636, "step": 31105 }, { "epoch": 1.5793808937569012, "grad_norm": 0.02774728403803806, "learning_rate": 0.0001287657407383107, "loss": 0.4598, "step": 31110 }, { "epoch": 1.5796347298549327, "grad_norm": 0.02128441451715541, "learning_rate": 0.0001286173811517039, "loss": 0.461, "step": 31115 }, { "epoch": 1.579888565952964, "grad_norm": 0.026163707221929008, "learning_rate": 0.00012846909446565297, "loss": 0.4585, "step": 31120 }, { "epoch": 1.5801424020509955, "grad_norm": 0.022788567101665227, "learning_rate": 0.00012832088070926595, "loss": 0.4514, "step": 31125 }, { "epoch": 1.580396238149027, "grad_norm": 0.023031836651392787, "learning_rate": 0.00012817273991163648, "loss": 0.4727, "step": 31130 }, { "epoch": 1.5806500742470586, "grad_norm": 0.022560904451555855, "learning_rate": 0.00012802467210184398, "loss": 0.4728, "step": 31135 }, { "epoch": 1.5809039103450901, "grad_norm": 0.024809637462144523, "learning_rate": 0.00012787667730895325, "loss": 0.4804, "step": 31140 }, { "epoch": 1.5811577464431217, "grad_norm": 0.027218959388466347, "learning_rate": 0.00012772875556201507, "loss": 0.4705, "step": 31145 }, { "epoch": 1.5814115825411532, "grad_norm": 0.022561455510123646, "learning_rate": 0.0001275809068900655, "loss": 0.45, "step": 31150 }, { "epoch": 1.5816654186391848, "grad_norm": 0.022204515235270998, "learning_rate": 0.00012743313132212685, "loss": 0.4892, "step": 31155 }, { "epoch": 1.5819192547372163, "grad_norm": 0.022075257403621297, "learning_rate": 0.00012728542888720633, "loss": 0.4829, "step": 31160 }, { "epoch": 1.5821730908352478, "grad_norm": 0.028454740968265037, "learning_rate": 0.0001271377996142976, "loss": 0.4713, "step": 31165 }, { "epoch": 1.5824269269332791, "grad_norm": 0.02252943808074146, "learning_rate": 0.00012699024353237921, "loss": 0.4869, "step": 31170 }, { "epoch": 1.5826807630313107, "grad_norm": 0.02494152110893949, "learning_rate": 0.0001268427606704159, "loss": 0.46, "step": 31175 }, { "epoch": 1.5829345991293422, "grad_norm": 0.02655046766353095, "learning_rate": 0.00012669535105735763, "loss": 0.4563, "step": 31180 }, { "epoch": 1.5831884352273737, "grad_norm": 0.023971988910522334, "learning_rate": 0.0001265480147221403, "loss": 0.4794, "step": 31185 }, { "epoch": 1.583442271325405, "grad_norm": 0.02428090384726735, "learning_rate": 0.00012640075169368536, "loss": 0.4667, "step": 31190 }, { "epoch": 1.5836961074234366, "grad_norm": 0.02616066326968095, "learning_rate": 0.0001262535620008996, "loss": 0.4711, "step": 31195 }, { "epoch": 1.5839499435214681, "grad_norm": 0.029430410330922516, "learning_rate": 0.00012610644567267592, "loss": 0.4481, "step": 31200 }, { "epoch": 1.5842037796194997, "grad_norm": 0.023433715884341892, "learning_rate": 0.0001259594027378922, "loss": 0.4922, "step": 31205 }, { "epoch": 1.5844576157175312, "grad_norm": 0.022517731868072016, "learning_rate": 0.00012581243322541252, "loss": 0.5017, "step": 31210 }, { "epoch": 1.5847114518155627, "grad_norm": 0.019566396686039718, "learning_rate": 0.000125665537164086, "loss": 0.4659, "step": 31215 }, { "epoch": 1.5849652879135943, "grad_norm": 0.02130331252828402, "learning_rate": 0.00012551871458274787, "loss": 0.4998, "step": 31220 }, { "epoch": 1.5852191240116258, "grad_norm": 0.021977116905423498, "learning_rate": 0.0001253719655102184, "loss": 0.4663, "step": 31225 }, { "epoch": 1.5854729601096573, "grad_norm": 0.025769773014901368, "learning_rate": 0.0001252252899753039, "loss": 0.4722, "step": 31230 }, { "epoch": 1.5857267962076889, "grad_norm": 0.028231473801800827, "learning_rate": 0.00012507868800679594, "loss": 0.4624, "step": 31235 }, { "epoch": 1.5859806323057202, "grad_norm": 0.022650562195746557, "learning_rate": 0.00012493215963347188, "loss": 0.4796, "step": 31240 }, { "epoch": 1.5862344684037517, "grad_norm": 0.029622948175144603, "learning_rate": 0.00012478570488409413, "loss": 0.4543, "step": 31245 }, { "epoch": 1.5864883045017832, "grad_norm": 0.021229634657799406, "learning_rate": 0.00012463932378741166, "loss": 0.4534, "step": 31250 }, { "epoch": 1.5867421405998146, "grad_norm": 0.02759828595048276, "learning_rate": 0.00012449301637215782, "loss": 0.4761, "step": 31255 }, { "epoch": 1.586995976697846, "grad_norm": 0.022925858589321915, "learning_rate": 0.0001243467826670524, "loss": 0.4405, "step": 31260 }, { "epoch": 1.5872498127958776, "grad_norm": 0.023105250881910747, "learning_rate": 0.00012420062270079995, "loss": 0.4712, "step": 31265 }, { "epoch": 1.5875036488939092, "grad_norm": 0.021868594752397733, "learning_rate": 0.00012405453650209136, "loss": 0.4677, "step": 31270 }, { "epoch": 1.5877574849919407, "grad_norm": 0.03077895247333048, "learning_rate": 0.00012390852409960223, "loss": 0.4539, "step": 31275 }, { "epoch": 1.5880113210899722, "grad_norm": 0.02329117852026229, "learning_rate": 0.00012376258552199444, "loss": 0.4749, "step": 31280 }, { "epoch": 1.5882651571880038, "grad_norm": 0.021664079596118983, "learning_rate": 0.00012361672079791469, "loss": 0.4691, "step": 31285 }, { "epoch": 1.5885189932860353, "grad_norm": 0.025983796005873985, "learning_rate": 0.00012347092995599574, "loss": 0.4481, "step": 31290 }, { "epoch": 1.5887728293840668, "grad_norm": 0.027325008699016377, "learning_rate": 0.00012332521302485533, "loss": 0.4636, "step": 31295 }, { "epoch": 1.5890266654820984, "grad_norm": 0.020269269921609292, "learning_rate": 0.00012317957003309726, "loss": 0.425, "step": 31300 }, { "epoch": 1.5892805015801297, "grad_norm": 0.019317983071545985, "learning_rate": 0.00012303400100931029, "loss": 0.4336, "step": 31305 }, { "epoch": 1.5895343376781612, "grad_norm": 0.02161940503170865, "learning_rate": 0.00012288850598206902, "loss": 0.4696, "step": 31310 }, { "epoch": 1.5897881737761927, "grad_norm": 0.026885333685387532, "learning_rate": 0.00012274308497993346, "loss": 0.4598, "step": 31315 }, { "epoch": 1.590042009874224, "grad_norm": 0.023792733517593017, "learning_rate": 0.0001225977380314488, "loss": 0.4553, "step": 31320 }, { "epoch": 1.5902958459722556, "grad_norm": 0.02653728675273625, "learning_rate": 0.00012245246516514626, "loss": 0.4675, "step": 31325 }, { "epoch": 1.5905496820702871, "grad_norm": 0.021186203668840244, "learning_rate": 0.00012230726640954183, "loss": 0.4436, "step": 31330 }, { "epoch": 1.5908035181683187, "grad_norm": 0.025039321378471157, "learning_rate": 0.0001221621417931375, "loss": 0.4697, "step": 31335 }, { "epoch": 1.5910573542663502, "grad_norm": 0.024695574034159236, "learning_rate": 0.00012201709134442041, "loss": 0.4479, "step": 31340 }, { "epoch": 1.5913111903643817, "grad_norm": 0.020718266810303008, "learning_rate": 0.00012187211509186341, "loss": 0.479, "step": 31345 }, { "epoch": 1.5915650264624133, "grad_norm": 0.02115176797212826, "learning_rate": 0.00012172721306392437, "loss": 0.4714, "step": 31350 }, { "epoch": 1.5918188625604448, "grad_norm": 0.02176260766701053, "learning_rate": 0.00012158238528904707, "loss": 0.4647, "step": 31355 }, { "epoch": 1.5920726986584763, "grad_norm": 0.02570121404643556, "learning_rate": 0.00012143763179566026, "loss": 0.456, "step": 31360 }, { "epoch": 1.5923265347565079, "grad_norm": 0.02737621105346301, "learning_rate": 0.00012129295261217843, "loss": 0.483, "step": 31365 }, { "epoch": 1.5925803708545392, "grad_norm": 0.026854830654727595, "learning_rate": 0.0001211483477670014, "loss": 0.457, "step": 31370 }, { "epoch": 1.5928342069525707, "grad_norm": 0.02062145184869095, "learning_rate": 0.0001210038172885145, "loss": 0.4471, "step": 31375 }, { "epoch": 1.5930880430506023, "grad_norm": 0.02532668196949466, "learning_rate": 0.00012085936120508811, "loss": 0.4475, "step": 31380 }, { "epoch": 1.5933418791486336, "grad_norm": 0.020702904030837858, "learning_rate": 0.00012071497954507843, "loss": 0.4668, "step": 31385 }, { "epoch": 1.593595715246665, "grad_norm": 0.031141316623061212, "learning_rate": 0.00012057067233682667, "loss": 0.4714, "step": 31390 }, { "epoch": 1.5938495513446966, "grad_norm": 0.028963886369253525, "learning_rate": 0.00012042643960865985, "loss": 0.4677, "step": 31395 }, { "epoch": 1.5941033874427282, "grad_norm": 0.02100153262498024, "learning_rate": 0.00012028228138888986, "loss": 0.4844, "step": 31400 }, { "epoch": 1.5943572235407597, "grad_norm": 0.021293170094263958, "learning_rate": 0.00012013819770581458, "loss": 0.4344, "step": 31405 }, { "epoch": 1.5946110596387912, "grad_norm": 0.022079676785804837, "learning_rate": 0.00011999418858771649, "loss": 0.472, "step": 31410 }, { "epoch": 1.5948648957368228, "grad_norm": 0.020399139507724554, "learning_rate": 0.00011985025406286432, "loss": 0.4412, "step": 31415 }, { "epoch": 1.5951187318348543, "grad_norm": 0.020737921370516353, "learning_rate": 0.00011970639415951129, "loss": 0.4782, "step": 31420 }, { "epoch": 1.5953725679328858, "grad_norm": 0.021206624778793808, "learning_rate": 0.00011956260890589655, "loss": 0.459, "step": 31425 }, { "epoch": 1.5956264040309174, "grad_norm": 0.0245333591767397, "learning_rate": 0.00011941889833024461, "loss": 0.4675, "step": 31430 }, { "epoch": 1.5958802401289487, "grad_norm": 0.025273714859497986, "learning_rate": 0.0001192752624607648, "loss": 0.4753, "step": 31435 }, { "epoch": 1.5961340762269802, "grad_norm": 0.02473632336996924, "learning_rate": 0.00011913170132565248, "loss": 0.4207, "step": 31440 }, { "epoch": 1.5963879123250118, "grad_norm": 0.024196910552343232, "learning_rate": 0.00011898821495308764, "loss": 0.4838, "step": 31445 }, { "epoch": 1.5966417484230433, "grad_norm": 0.024546317457647595, "learning_rate": 0.00011884480337123621, "loss": 0.4766, "step": 31450 }, { "epoch": 1.5968955845210746, "grad_norm": 0.021580443853018175, "learning_rate": 0.00011870146660824899, "loss": 0.4618, "step": 31455 }, { "epoch": 1.5971494206191061, "grad_norm": 0.021030168327500064, "learning_rate": 0.00011855820469226242, "loss": 0.4697, "step": 31460 }, { "epoch": 1.5974032567171377, "grad_norm": 0.02542968053225913, "learning_rate": 0.00011841501765139795, "loss": 0.4583, "step": 31465 }, { "epoch": 1.5976570928151692, "grad_norm": 0.0245457635447329, "learning_rate": 0.00011827190551376265, "loss": 0.469, "step": 31470 }, { "epoch": 1.5979109289132007, "grad_norm": 0.022086900914002925, "learning_rate": 0.00011812886830744846, "loss": 0.404, "step": 31475 }, { "epoch": 1.5981647650112323, "grad_norm": 0.024647282768808704, "learning_rate": 0.00011798590606053322, "loss": 0.4778, "step": 31480 }, { "epoch": 1.5984186011092638, "grad_norm": 0.02520833814662268, "learning_rate": 0.00011784301880107917, "loss": 0.4682, "step": 31485 }, { "epoch": 1.5986724372072953, "grad_norm": 0.030999706879264646, "learning_rate": 0.00011770020655713509, "loss": 0.4543, "step": 31490 }, { "epoch": 1.5989262733053269, "grad_norm": 0.028404831004490545, "learning_rate": 0.00011755746935673372, "loss": 0.4685, "step": 31495 }, { "epoch": 1.5991801094033582, "grad_norm": 0.02067687233055602, "learning_rate": 0.00011741480722789405, "loss": 0.4439, "step": 31500 }, { "epoch": 1.5994339455013897, "grad_norm": 0.03005348926340442, "learning_rate": 0.00011727222019861966, "loss": 0.4475, "step": 31505 }, { "epoch": 1.5996877815994213, "grad_norm": 0.025221995139169794, "learning_rate": 0.0001171297082968999, "loss": 0.4601, "step": 31510 }, { "epoch": 1.5999416176974528, "grad_norm": 0.024617531922327757, "learning_rate": 0.00011698727155070888, "loss": 0.485, "step": 31515 }, { "epoch": 1.6001954537954841, "grad_norm": 0.02522929219903767, "learning_rate": 0.0001168449099880065, "loss": 0.4684, "step": 31520 }, { "epoch": 1.6004492898935156, "grad_norm": 0.02188538172758521, "learning_rate": 0.0001167026236367374, "loss": 0.4667, "step": 31525 }, { "epoch": 1.6007031259915472, "grad_norm": 0.03015421394199565, "learning_rate": 0.00011656041252483185, "loss": 0.4609, "step": 31530 }, { "epoch": 1.6009569620895787, "grad_norm": 0.02771728039572498, "learning_rate": 0.00011641827668020504, "loss": 0.4537, "step": 31535 }, { "epoch": 1.6012107981876103, "grad_norm": 0.024448943269020997, "learning_rate": 0.00011627621613075772, "loss": 0.4735, "step": 31540 }, { "epoch": 1.6014646342856418, "grad_norm": 0.024047651615317647, "learning_rate": 0.00011613423090437536, "loss": 0.4797, "step": 31545 }, { "epoch": 1.6017184703836733, "grad_norm": 0.022908547869978178, "learning_rate": 0.0001159923210289292, "loss": 0.4419, "step": 31550 }, { "epoch": 1.6019723064817049, "grad_norm": 0.027921713893488884, "learning_rate": 0.00011585048653227548, "loss": 0.4529, "step": 31555 }, { "epoch": 1.6022261425797364, "grad_norm": 0.022455295711872506, "learning_rate": 0.00011570872744225541, "loss": 0.4689, "step": 31560 }, { "epoch": 1.602479978677768, "grad_norm": 0.029028863638509785, "learning_rate": 0.0001155670437866958, "loss": 0.4693, "step": 31565 }, { "epoch": 1.6027338147757992, "grad_norm": 0.026146904467255937, "learning_rate": 0.00011542543559340817, "loss": 0.4802, "step": 31570 }, { "epoch": 1.6029876508738308, "grad_norm": 0.021361160108741662, "learning_rate": 0.0001152839028901898, "loss": 0.4735, "step": 31575 }, { "epoch": 1.6032414869718623, "grad_norm": 0.022332608927233596, "learning_rate": 0.00011514244570482263, "loss": 0.4405, "step": 31580 }, { "epoch": 1.6034953230698936, "grad_norm": 0.023861647792640446, "learning_rate": 0.00011500106406507416, "loss": 0.4384, "step": 31585 }, { "epoch": 1.6037491591679252, "grad_norm": 0.03455619002140877, "learning_rate": 0.00011485975799869675, "loss": 0.4781, "step": 31590 }, { "epoch": 1.6040029952659567, "grad_norm": 0.032908225955371004, "learning_rate": 0.00011471852753342826, "loss": 0.4617, "step": 31595 }, { "epoch": 1.6042568313639882, "grad_norm": 0.0248333675995091, "learning_rate": 0.00011457737269699125, "loss": 0.4736, "step": 31600 }, { "epoch": 1.6045106674620198, "grad_norm": 0.023654932219431005, "learning_rate": 0.00011443629351709394, "loss": 0.4893, "step": 31605 }, { "epoch": 1.6047645035600513, "grad_norm": 0.019152490721215262, "learning_rate": 0.00011429529002142941, "loss": 0.4619, "step": 31610 }, { "epoch": 1.6050183396580828, "grad_norm": 0.024857396949531627, "learning_rate": 0.00011415436223767606, "loss": 0.4452, "step": 31615 }, { "epoch": 1.6052721757561144, "grad_norm": 0.02790240391915016, "learning_rate": 0.00011401351019349704, "loss": 0.4561, "step": 31620 }, { "epoch": 1.605526011854146, "grad_norm": 0.024084282543615547, "learning_rate": 0.00011387273391654118, "loss": 0.4774, "step": 31625 }, { "epoch": 1.6057798479521774, "grad_norm": 0.02920146578824623, "learning_rate": 0.00011373203343444194, "loss": 0.4789, "step": 31630 }, { "epoch": 1.6060336840502087, "grad_norm": 0.029127523345097956, "learning_rate": 0.00011359140877481833, "loss": 0.464, "step": 31635 }, { "epoch": 1.6062875201482403, "grad_norm": 0.022090138726677838, "learning_rate": 0.00011345085996527405, "loss": 0.4806, "step": 31640 }, { "epoch": 1.6065413562462718, "grad_norm": 0.026141716192201573, "learning_rate": 0.00011331038703339836, "loss": 0.4589, "step": 31645 }, { "epoch": 1.6067951923443031, "grad_norm": 0.02162959028748172, "learning_rate": 0.00011316999000676514, "loss": 0.4543, "step": 31650 }, { "epoch": 1.6070490284423347, "grad_norm": 0.023063059924878233, "learning_rate": 0.00011302966891293392, "loss": 0.47, "step": 31655 }, { "epoch": 1.6073028645403662, "grad_norm": 0.027292009363410075, "learning_rate": 0.00011288942377944872, "loss": 0.4909, "step": 31660 }, { "epoch": 1.6075567006383977, "grad_norm": 0.04681074491676968, "learning_rate": 0.00011274925463383912, "loss": 0.4656, "step": 31665 }, { "epoch": 1.6078105367364293, "grad_norm": 0.01979398122113408, "learning_rate": 0.00011260916150361977, "loss": 0.4488, "step": 31670 }, { "epoch": 1.6080643728344608, "grad_norm": 0.020250869022994698, "learning_rate": 0.00011246914441628992, "loss": 0.4782, "step": 31675 }, { "epoch": 1.6083182089324923, "grad_norm": 0.019872795940748027, "learning_rate": 0.00011232920339933461, "loss": 0.4777, "step": 31680 }, { "epoch": 1.6085720450305239, "grad_norm": 0.02316643741156545, "learning_rate": 0.00011218933848022317, "loss": 0.493, "step": 31685 }, { "epoch": 1.6088258811285554, "grad_norm": 0.02234673533736629, "learning_rate": 0.00011204954968641074, "loss": 0.4629, "step": 31690 }, { "epoch": 1.609079717226587, "grad_norm": 0.022362390983495437, "learning_rate": 0.00011190983704533685, "loss": 0.4572, "step": 31695 }, { "epoch": 1.6093335533246182, "grad_norm": 0.021690554415178736, "learning_rate": 0.00011177020058442672, "loss": 0.4685, "step": 31700 }, { "epoch": 1.6095873894226498, "grad_norm": 0.02776519862681794, "learning_rate": 0.00011163064033108994, "loss": 0.4662, "step": 31705 }, { "epoch": 1.6098412255206813, "grad_norm": 0.022607748514862483, "learning_rate": 0.00011149115631272183, "loss": 0.4853, "step": 31710 }, { "epoch": 1.6100950616187126, "grad_norm": 0.041421241284070646, "learning_rate": 0.00011135174855670205, "loss": 0.458, "step": 31715 }, { "epoch": 1.6103488977167442, "grad_norm": 0.022255670798694553, "learning_rate": 0.00011121241709039604, "loss": 0.4626, "step": 31720 }, { "epoch": 1.6106027338147757, "grad_norm": 0.02393315473991221, "learning_rate": 0.00011107316194115352, "loss": 0.482, "step": 31725 }, { "epoch": 1.6108565699128072, "grad_norm": 0.022345117574356848, "learning_rate": 0.00011093398313630975, "loss": 0.4787, "step": 31730 }, { "epoch": 1.6111104060108388, "grad_norm": 0.021278319102440692, "learning_rate": 0.00011079488070318477, "loss": 0.4639, "step": 31735 }, { "epoch": 1.6113642421088703, "grad_norm": 0.02130312138742558, "learning_rate": 0.00011065585466908395, "loss": 0.4675, "step": 31740 }, { "epoch": 1.6116180782069018, "grad_norm": 0.020984294776622912, "learning_rate": 0.00011051690506129702, "loss": 0.4446, "step": 31745 }, { "epoch": 1.6118719143049334, "grad_norm": 0.023292694738733775, "learning_rate": 0.00011037803190709945, "loss": 0.4559, "step": 31750 }, { "epoch": 1.612125750402965, "grad_norm": 0.021942067180995492, "learning_rate": 0.00011023923523375102, "loss": 0.4573, "step": 31755 }, { "epoch": 1.6123795865009964, "grad_norm": 0.02815465580690162, "learning_rate": 0.00011010051506849711, "loss": 0.4792, "step": 31760 }, { "epoch": 1.6126334225990278, "grad_norm": 0.021597418410303274, "learning_rate": 0.0001099618714385675, "loss": 0.4643, "step": 31765 }, { "epoch": 1.6128872586970593, "grad_norm": 0.02364408717712364, "learning_rate": 0.0001098233043711776, "loss": 0.4706, "step": 31770 }, { "epoch": 1.6131410947950908, "grad_norm": 0.027433611435712636, "learning_rate": 0.00010968481389352708, "loss": 0.4131, "step": 31775 }, { "epoch": 1.6133949308931224, "grad_norm": 0.02152935633696659, "learning_rate": 0.00010954640003280125, "loss": 0.44, "step": 31780 }, { "epoch": 1.6136487669911537, "grad_norm": 0.026076111440461344, "learning_rate": 0.00010940806281616977, "loss": 0.4446, "step": 31785 }, { "epoch": 1.6139026030891852, "grad_norm": 0.023558985051178333, "learning_rate": 0.00010926980227078765, "loss": 0.4795, "step": 31790 }, { "epoch": 1.6141564391872167, "grad_norm": 0.025471211297723347, "learning_rate": 0.00010913161842379493, "loss": 0.442, "step": 31795 }, { "epoch": 1.6144102752852483, "grad_norm": 0.020061384357153312, "learning_rate": 0.00010899351130231611, "loss": 0.4324, "step": 31800 }, { "epoch": 1.6146641113832798, "grad_norm": 0.032744538736248614, "learning_rate": 0.00010885548093346126, "loss": 0.468, "step": 31805 }, { "epoch": 1.6149179474813113, "grad_norm": 0.02071627920287155, "learning_rate": 0.00010871752734432466, "loss": 0.4439, "step": 31810 }, { "epoch": 1.6151717835793429, "grad_norm": 0.031839601183315996, "learning_rate": 0.00010857965056198633, "loss": 0.4447, "step": 31815 }, { "epoch": 1.6154256196773744, "grad_norm": 0.024140957539309883, "learning_rate": 0.00010844185061351036, "loss": 0.4768, "step": 31820 }, { "epoch": 1.615679455775406, "grad_norm": 0.020597217507985967, "learning_rate": 0.00010830412752594659, "loss": 0.4638, "step": 31825 }, { "epoch": 1.6159332918734375, "grad_norm": 0.02099597835732562, "learning_rate": 0.00010816648132632912, "loss": 0.4407, "step": 31830 }, { "epoch": 1.6161871279714688, "grad_norm": 0.022374131730080137, "learning_rate": 0.00010802891204167736, "loss": 0.4437, "step": 31835 }, { "epoch": 1.6164409640695003, "grad_norm": 0.022866381042039967, "learning_rate": 0.0001078914196989953, "loss": 0.4499, "step": 31840 }, { "epoch": 1.6166948001675319, "grad_norm": 0.023954437468624835, "learning_rate": 0.00010775400432527228, "loss": 0.462, "step": 31845 }, { "epoch": 1.6169486362655632, "grad_norm": 0.023454406216757356, "learning_rate": 0.00010761666594748176, "loss": 0.4746, "step": 31850 }, { "epoch": 1.6172024723635947, "grad_norm": 0.023827431315221397, "learning_rate": 0.00010747940459258321, "loss": 0.4376, "step": 31855 }, { "epoch": 1.6174563084616262, "grad_norm": 0.025096747648666724, "learning_rate": 0.00010734222028751989, "loss": 0.4879, "step": 31860 }, { "epoch": 1.6177101445596578, "grad_norm": 0.021230019197699266, "learning_rate": 0.00010720511305922065, "loss": 0.4286, "step": 31865 }, { "epoch": 1.6179639806576893, "grad_norm": 0.024734269364125625, "learning_rate": 0.00010706808293459875, "loss": 0.4792, "step": 31870 }, { "epoch": 1.6182178167557209, "grad_norm": 0.02274730432603367, "learning_rate": 0.00010693112994055277, "loss": 0.4631, "step": 31875 }, { "epoch": 1.6184716528537524, "grad_norm": 0.023273183427376207, "learning_rate": 0.00010679425410396559, "loss": 0.4611, "step": 31880 }, { "epoch": 1.618725488951784, "grad_norm": 0.020775817214814158, "learning_rate": 0.00010665745545170557, "loss": 0.4473, "step": 31885 }, { "epoch": 1.6189793250498155, "grad_norm": 0.02042396438999102, "learning_rate": 0.00010652073401062529, "loss": 0.4245, "step": 31890 }, { "epoch": 1.619233161147847, "grad_norm": 0.02282973946779475, "learning_rate": 0.00010638408980756281, "loss": 0.4685, "step": 31895 }, { "epoch": 1.6194869972458783, "grad_norm": 0.020201540964168187, "learning_rate": 0.00010624752286934037, "loss": 0.4285, "step": 31900 }, { "epoch": 1.6197408333439098, "grad_norm": 0.02332813243361064, "learning_rate": 0.00010611103322276571, "loss": 0.47, "step": 31905 }, { "epoch": 1.6199946694419414, "grad_norm": 0.0210065909715921, "learning_rate": 0.00010597462089463078, "loss": 0.4695, "step": 31910 }, { "epoch": 1.6202485055399727, "grad_norm": 0.02246075990254701, "learning_rate": 0.00010583828591171273, "loss": 0.4382, "step": 31915 }, { "epoch": 1.6205023416380042, "grad_norm": 0.0283987877904691, "learning_rate": 0.00010570202830077363, "loss": 0.4513, "step": 31920 }, { "epoch": 1.6207561777360358, "grad_norm": 0.023181166904655805, "learning_rate": 0.0001055658480885599, "loss": 0.454, "step": 31925 }, { "epoch": 1.6210100138340673, "grad_norm": 0.02263188362017243, "learning_rate": 0.00010542974530180327, "loss": 0.4693, "step": 31930 }, { "epoch": 1.6212638499320988, "grad_norm": 0.020696164165394288, "learning_rate": 0.00010529371996721976, "loss": 0.4531, "step": 31935 }, { "epoch": 1.6215176860301304, "grad_norm": 0.022616240854955654, "learning_rate": 0.00010515777211151079, "loss": 0.4457, "step": 31940 }, { "epoch": 1.621771522128162, "grad_norm": 0.023467651643862778, "learning_rate": 0.00010502190176136195, "loss": 0.4472, "step": 31945 }, { "epoch": 1.6220253582261934, "grad_norm": 0.024225187265112066, "learning_rate": 0.00010488610894344414, "loss": 0.4586, "step": 31950 }, { "epoch": 1.622279194324225, "grad_norm": 0.02003294507179616, "learning_rate": 0.00010475039368441258, "loss": 0.4476, "step": 31955 }, { "epoch": 1.6225330304222565, "grad_norm": 0.02746040189534053, "learning_rate": 0.0001046147560109078, "loss": 0.4355, "step": 31960 }, { "epoch": 1.6227868665202878, "grad_norm": 0.023121259899165414, "learning_rate": 0.00010447919594955452, "loss": 0.4772, "step": 31965 }, { "epoch": 1.6230407026183193, "grad_norm": 0.021015795475823218, "learning_rate": 0.00010434371352696259, "loss": 0.4599, "step": 31970 }, { "epoch": 1.6232945387163509, "grad_norm": 0.020515866606887798, "learning_rate": 0.00010420830876972653, "loss": 0.4425, "step": 31975 }, { "epoch": 1.6235483748143822, "grad_norm": 0.02337407247961481, "learning_rate": 0.0001040729817044258, "loss": 0.4713, "step": 31980 }, { "epoch": 1.6238022109124137, "grad_norm": 0.02089505650167657, "learning_rate": 0.00010393773235762416, "loss": 0.4621, "step": 31985 }, { "epoch": 1.6240560470104453, "grad_norm": 0.023288518470522646, "learning_rate": 0.00010380256075587063, "loss": 0.4926, "step": 31990 }, { "epoch": 1.6243098831084768, "grad_norm": 0.020663246760806137, "learning_rate": 0.00010366746692569845, "loss": 0.4301, "step": 31995 }, { "epoch": 1.6245637192065083, "grad_norm": 0.021804964895316997, "learning_rate": 0.00010353245089362612, "loss": 0.4608, "step": 32000 }, { "epoch": 1.6248175553045399, "grad_norm": 0.02226540859695977, "learning_rate": 0.00010339751268615639, "loss": 0.4374, "step": 32005 }, { "epoch": 1.6250713914025714, "grad_norm": 0.02024366952881605, "learning_rate": 0.00010326265232977717, "loss": 0.4543, "step": 32010 }, { "epoch": 1.625325227500603, "grad_norm": 0.022002203748430427, "learning_rate": 0.00010312786985096067, "loss": 0.4619, "step": 32015 }, { "epoch": 1.6255790635986345, "grad_norm": 0.020301191207565898, "learning_rate": 0.00010299316527616426, "loss": 0.4779, "step": 32020 }, { "epoch": 1.625832899696666, "grad_norm": 0.021523853309415912, "learning_rate": 0.00010285853863182948, "loss": 0.4441, "step": 32025 }, { "epoch": 1.6260867357946973, "grad_norm": 0.020392254627019237, "learning_rate": 0.00010272398994438303, "loss": 0.4482, "step": 32030 }, { "epoch": 1.6263405718927288, "grad_norm": 0.028102974962649765, "learning_rate": 0.00010258951924023625, "loss": 0.4494, "step": 32035 }, { "epoch": 1.6265944079907604, "grad_norm": 0.028653290930459173, "learning_rate": 0.00010245512654578487, "loss": 0.6634, "step": 32040 }, { "epoch": 1.626848244088792, "grad_norm": 0.03496966873027593, "learning_rate": 0.00010232081188740971, "loss": 0.4563, "step": 32045 }, { "epoch": 1.6271020801868232, "grad_norm": 0.0444008021284147, "learning_rate": 0.0001021865752914758, "loss": 0.4782, "step": 32050 }, { "epoch": 1.6273559162848548, "grad_norm": 0.028458235576396235, "learning_rate": 0.00010205241678433341, "loss": 0.4633, "step": 32055 }, { "epoch": 1.6276097523828863, "grad_norm": 0.033779463143654556, "learning_rate": 0.00010191833639231695, "loss": 0.473, "step": 32060 }, { "epoch": 1.6278635884809178, "grad_norm": 0.02289380406880714, "learning_rate": 0.00010178433414174593, "loss": 0.4981, "step": 32065 }, { "epoch": 1.6281174245789494, "grad_norm": 0.023008365122818897, "learning_rate": 0.00010165041005892412, "loss": 0.4632, "step": 32070 }, { "epoch": 1.628371260676981, "grad_norm": 0.024659825812692805, "learning_rate": 0.00010151656417014033, "loss": 0.4615, "step": 32075 }, { "epoch": 1.6286250967750124, "grad_norm": 0.023409476356552144, "learning_rate": 0.00010138279650166765, "loss": 0.5097, "step": 32080 }, { "epoch": 1.628878932873044, "grad_norm": 0.021110774308924826, "learning_rate": 0.00010124910707976426, "loss": 0.4515, "step": 32085 }, { "epoch": 1.6291327689710755, "grad_norm": 0.02965948111333057, "learning_rate": 0.00010111549593067226, "loss": 0.4821, "step": 32090 }, { "epoch": 1.629386605069107, "grad_norm": 0.02391127207510598, "learning_rate": 0.00010098196308061953, "loss": 0.4448, "step": 32095 }, { "epoch": 1.6296404411671384, "grad_norm": 0.02681195362674087, "learning_rate": 0.00010084850855581734, "loss": 0.433, "step": 32100 }, { "epoch": 1.6298942772651699, "grad_norm": 0.021012222244902096, "learning_rate": 0.00010071513238246255, "loss": 0.4676, "step": 32105 }, { "epoch": 1.6301481133632014, "grad_norm": 0.02751151777371785, "learning_rate": 0.00010058183458673587, "loss": 0.4638, "step": 32110 }, { "epoch": 1.6304019494612327, "grad_norm": 0.02956936482420226, "learning_rate": 0.0001004486151948033, "loss": 0.473, "step": 32115 }, { "epoch": 1.6306557855592643, "grad_norm": 0.034062325657136044, "learning_rate": 0.00010031547423281501, "loss": 0.4819, "step": 32120 }, { "epoch": 1.6309096216572958, "grad_norm": 0.024178056660624028, "learning_rate": 0.00010018241172690578, "loss": 0.4669, "step": 32125 }, { "epoch": 1.6311634577553273, "grad_norm": 0.021402418062271347, "learning_rate": 0.00010004942770319536, "loss": 0.4613, "step": 32130 }, { "epoch": 1.6314172938533589, "grad_norm": 0.02076067212479062, "learning_rate": 9.991652218778762e-05, "loss": 0.4463, "step": 32135 }, { "epoch": 1.6316711299513904, "grad_norm": 0.028173969272268517, "learning_rate": 9.97836952067715e-05, "loss": 0.4744, "step": 32140 }, { "epoch": 1.631924966049422, "grad_norm": 0.022830320689664698, "learning_rate": 9.965094678621994e-05, "loss": 0.4921, "step": 32145 }, { "epoch": 1.6321788021474535, "grad_norm": 0.022425906549558932, "learning_rate": 9.951827695219107e-05, "loss": 0.4392, "step": 32150 }, { "epoch": 1.632432638245485, "grad_norm": 0.029061636424618946, "learning_rate": 9.938568573072715e-05, "loss": 0.4671, "step": 32155 }, { "epoch": 1.6326864743435165, "grad_norm": 0.028166002445137558, "learning_rate": 9.925317314785548e-05, "loss": 0.4338, "step": 32160 }, { "epoch": 1.6329403104415479, "grad_norm": 0.02300375901450975, "learning_rate": 9.91207392295872e-05, "loss": 0.4764, "step": 32165 }, { "epoch": 1.6331941465395794, "grad_norm": 0.022728515736743903, "learning_rate": 9.898838400191879e-05, "loss": 0.4725, "step": 32170 }, { "epoch": 1.633447982637611, "grad_norm": 0.02413443910577726, "learning_rate": 9.885610749083063e-05, "loss": 0.468, "step": 32175 }, { "epoch": 1.6337018187356422, "grad_norm": 0.02197648542631456, "learning_rate": 9.872390972228823e-05, "loss": 0.4635, "step": 32180 }, { "epoch": 1.6339556548336738, "grad_norm": 0.02750430405496458, "learning_rate": 9.8591790722241e-05, "loss": 0.4992, "step": 32185 }, { "epoch": 1.6342094909317053, "grad_norm": 0.022925967015699954, "learning_rate": 9.84597505166236e-05, "loss": 0.4749, "step": 32190 }, { "epoch": 1.6344633270297368, "grad_norm": 0.023864232586490966, "learning_rate": 9.832778913135454e-05, "loss": 0.4432, "step": 32195 }, { "epoch": 1.6347171631277684, "grad_norm": 0.023429582390276676, "learning_rate": 9.819590659233746e-05, "loss": 0.4689, "step": 32200 }, { "epoch": 1.6349709992258, "grad_norm": 0.023005759239922634, "learning_rate": 9.806410292546003e-05, "loss": 0.4647, "step": 32205 }, { "epoch": 1.6352248353238314, "grad_norm": 0.033322292665807295, "learning_rate": 9.793237815659473e-05, "loss": 0.4787, "step": 32210 }, { "epoch": 1.635478671421863, "grad_norm": 0.02676499418010649, "learning_rate": 9.780073231159864e-05, "loss": 0.4879, "step": 32215 }, { "epoch": 1.6357325075198945, "grad_norm": 0.023068752325228373, "learning_rate": 9.766916541631288e-05, "loss": 0.4579, "step": 32220 }, { "epoch": 1.635986343617926, "grad_norm": 0.02443617843704049, "learning_rate": 9.753767749656361e-05, "loss": 0.4236, "step": 32225 }, { "epoch": 1.6362401797159574, "grad_norm": 0.022098127052591947, "learning_rate": 9.740626857816109e-05, "loss": 0.4596, "step": 32230 }, { "epoch": 1.636494015813989, "grad_norm": 0.025000807984976732, "learning_rate": 9.727493868690046e-05, "loss": 0.4547, "step": 32235 }, { "epoch": 1.6367478519120204, "grad_norm": 0.02155154892913524, "learning_rate": 9.714368784856081e-05, "loss": 0.4733, "step": 32240 }, { "epoch": 1.6370016880100517, "grad_norm": 0.02097638201730721, "learning_rate": 9.701251608890638e-05, "loss": 0.4939, "step": 32245 }, { "epoch": 1.6372555241080833, "grad_norm": 0.024844539480766835, "learning_rate": 9.688142343368517e-05, "loss": 0.4623, "step": 32250 }, { "epoch": 1.6375093602061148, "grad_norm": 0.022892093225189144, "learning_rate": 9.675040990863032e-05, "loss": 0.4643, "step": 32255 }, { "epoch": 1.6377631963041464, "grad_norm": 0.021736869039684987, "learning_rate": 9.661947553945893e-05, "loss": 0.4592, "step": 32260 }, { "epoch": 1.6380170324021779, "grad_norm": 0.030318571487825602, "learning_rate": 9.648862035187289e-05, "loss": 0.4798, "step": 32265 }, { "epoch": 1.6382708685002094, "grad_norm": 0.023079937986626196, "learning_rate": 9.635784437155815e-05, "loss": 0.4786, "step": 32270 }, { "epoch": 1.638524704598241, "grad_norm": 0.020531984789138997, "learning_rate": 9.622714762418588e-05, "loss": 0.4466, "step": 32275 }, { "epoch": 1.6387785406962725, "grad_norm": 0.02520449611053328, "learning_rate": 9.609653013541076e-05, "loss": 0.4799, "step": 32280 }, { "epoch": 1.639032376794304, "grad_norm": 0.02276790959983237, "learning_rate": 9.596599193087263e-05, "loss": 0.4371, "step": 32285 }, { "epoch": 1.6392862128923356, "grad_norm": 0.023814420038553278, "learning_rate": 9.583553303619524e-05, "loss": 0.4874, "step": 32290 }, { "epoch": 1.6395400489903669, "grad_norm": 0.032769714816805816, "learning_rate": 9.570515347698727e-05, "loss": 0.4549, "step": 32295 }, { "epoch": 1.6397938850883984, "grad_norm": 0.02497036630007797, "learning_rate": 9.557485327884136e-05, "loss": 0.4573, "step": 32300 }, { "epoch": 1.64004772118643, "grad_norm": 0.020083465356179893, "learning_rate": 9.544463246733503e-05, "loss": 0.4504, "step": 32305 }, { "epoch": 1.6403015572844615, "grad_norm": 0.022338564551145575, "learning_rate": 9.531449106802964e-05, "loss": 0.4702, "step": 32310 }, { "epoch": 1.6405553933824928, "grad_norm": 0.021589374085527493, "learning_rate": 9.518442910647168e-05, "loss": 0.4527, "step": 32315 }, { "epoch": 1.6408092294805243, "grad_norm": 0.02577068006041862, "learning_rate": 9.50544466081913e-05, "loss": 0.4341, "step": 32320 }, { "epoch": 1.6410630655785559, "grad_norm": 0.024543726750310888, "learning_rate": 9.492454359870379e-05, "loss": 0.4649, "step": 32325 }, { "epoch": 1.6413169016765874, "grad_norm": 0.029648235764047823, "learning_rate": 9.479472010350803e-05, "loss": 0.4673, "step": 32330 }, { "epoch": 1.641570737774619, "grad_norm": 0.021346922529075836, "learning_rate": 9.466497614808806e-05, "loss": 0.4419, "step": 32335 }, { "epoch": 1.6418245738726505, "grad_norm": 0.02170707567899142, "learning_rate": 9.453531175791191e-05, "loss": 0.4657, "step": 32340 }, { "epoch": 1.642078409970682, "grad_norm": 0.025382331522061787, "learning_rate": 9.440572695843192e-05, "loss": 0.4543, "step": 32345 }, { "epoch": 1.6423322460687135, "grad_norm": 0.01955537085234093, "learning_rate": 9.427622177508521e-05, "loss": 0.4287, "step": 32350 }, { "epoch": 1.642586082166745, "grad_norm": 0.021772913466288676, "learning_rate": 9.414679623329264e-05, "loss": 0.46, "step": 32355 }, { "epoch": 1.6428399182647766, "grad_norm": 0.02173314708203356, "learning_rate": 9.40174503584601e-05, "loss": 0.4652, "step": 32360 }, { "epoch": 1.643093754362808, "grad_norm": 0.02028675485464349, "learning_rate": 9.388818417597733e-05, "loss": 0.4648, "step": 32365 }, { "epoch": 1.6433475904608394, "grad_norm": 0.024655003878005108, "learning_rate": 9.375899771121888e-05, "loss": 0.4126, "step": 32370 }, { "epoch": 1.643601426558871, "grad_norm": 0.022638057624033838, "learning_rate": 9.362989098954306e-05, "loss": 0.4586, "step": 32375 }, { "epoch": 1.6438552626569023, "grad_norm": 0.020410137630603085, "learning_rate": 9.350086403629326e-05, "loss": 0.4371, "step": 32380 }, { "epoch": 1.6441090987549338, "grad_norm": 0.02664816125540432, "learning_rate": 9.337191687679648e-05, "loss": 0.4442, "step": 32385 }, { "epoch": 1.6443629348529654, "grad_norm": 0.0234300596390381, "learning_rate": 9.324304953636458e-05, "loss": 0.464, "step": 32390 }, { "epoch": 1.644616770950997, "grad_norm": 0.022576297788024068, "learning_rate": 9.311426204029355e-05, "loss": 0.4434, "step": 32395 }, { "epoch": 1.6448706070490284, "grad_norm": 0.022495995573104423, "learning_rate": 9.298555441386392e-05, "loss": 0.495, "step": 32400 }, { "epoch": 1.64512444314706, "grad_norm": 0.020474348549872002, "learning_rate": 9.285692668233997e-05, "loss": 0.4833, "step": 32405 }, { "epoch": 1.6453782792450915, "grad_norm": 0.024333096423502235, "learning_rate": 9.272837887097108e-05, "loss": 0.4754, "step": 32410 }, { "epoch": 1.645632115343123, "grad_norm": 0.020618548983006132, "learning_rate": 9.259991100499021e-05, "loss": 0.4721, "step": 32415 }, { "epoch": 1.6458859514411546, "grad_norm": 0.020652302792662478, "learning_rate": 9.247152310961527e-05, "loss": 0.4418, "step": 32420 }, { "epoch": 1.646139787539186, "grad_norm": 0.02297733570270565, "learning_rate": 9.234321521004786e-05, "loss": 0.4864, "step": 32425 }, { "epoch": 1.6463936236372174, "grad_norm": 0.019973132791394135, "learning_rate": 9.221498733147443e-05, "loss": 0.455, "step": 32430 }, { "epoch": 1.646647459735249, "grad_norm": 0.0213492601346557, "learning_rate": 9.208683949906526e-05, "loss": 0.4294, "step": 32435 }, { "epoch": 1.6469012958332805, "grad_norm": 0.020742705287425788, "learning_rate": 9.195877173797534e-05, "loss": 0.4499, "step": 32440 }, { "epoch": 1.6471551319313118, "grad_norm": 0.01942087181042954, "learning_rate": 9.18307840733435e-05, "loss": 0.456, "step": 32445 }, { "epoch": 1.6474089680293433, "grad_norm": 0.031308669135702176, "learning_rate": 9.170287653029325e-05, "loss": 0.4647, "step": 32450 }, { "epoch": 1.6476628041273749, "grad_norm": 0.02130109350045376, "learning_rate": 9.157504913393228e-05, "loss": 0.4667, "step": 32455 }, { "epoch": 1.6479166402254064, "grad_norm": 0.027077864769477565, "learning_rate": 9.14473019093522e-05, "loss": 0.4551, "step": 32460 }, { "epoch": 1.648170476323438, "grad_norm": 0.023508906357244588, "learning_rate": 9.131963488162942e-05, "loss": 0.4568, "step": 32465 }, { "epoch": 1.6484243124214695, "grad_norm": 0.021416094495555104, "learning_rate": 9.119204807582415e-05, "loss": 0.4501, "step": 32470 }, { "epoch": 1.648678148519501, "grad_norm": 0.026119351851691987, "learning_rate": 9.106454151698118e-05, "loss": 0.4582, "step": 32475 }, { "epoch": 1.6489319846175325, "grad_norm": 0.0208804764024078, "learning_rate": 9.093711523012933e-05, "loss": 0.4448, "step": 32480 }, { "epoch": 1.649185820715564, "grad_norm": 0.024577116363089128, "learning_rate": 9.080976924028177e-05, "loss": 0.4357, "step": 32485 }, { "epoch": 1.6494396568135956, "grad_norm": 0.022644134972793314, "learning_rate": 9.068250357243585e-05, "loss": 0.459, "step": 32490 }, { "epoch": 1.649693492911627, "grad_norm": 0.024600197563246825, "learning_rate": 9.055531825157332e-05, "loss": 0.4454, "step": 32495 }, { "epoch": 1.6499473290096585, "grad_norm": 0.021916792555752632, "learning_rate": 9.042821330265976e-05, "loss": 0.4652, "step": 32500 }, { "epoch": 1.65020116510769, "grad_norm": 0.022504308117136322, "learning_rate": 9.030118875064553e-05, "loss": 0.464, "step": 32505 }, { "epoch": 1.6504550012057213, "grad_norm": 0.023410283807235067, "learning_rate": 9.017424462046453e-05, "loss": 0.4625, "step": 32510 }, { "epoch": 1.6507088373037528, "grad_norm": 0.023827231525166772, "learning_rate": 9.00473809370358e-05, "loss": 0.4476, "step": 32515 }, { "epoch": 1.6509626734017844, "grad_norm": 0.024423379173870094, "learning_rate": 8.992059772526163e-05, "loss": 0.4765, "step": 32520 }, { "epoch": 1.651216509499816, "grad_norm": 0.022352780343270956, "learning_rate": 8.979389501002916e-05, "loss": 0.4578, "step": 32525 }, { "epoch": 1.6514703455978474, "grad_norm": 0.021988070837923635, "learning_rate": 8.966727281620929e-05, "loss": 0.4696, "step": 32530 }, { "epoch": 1.651724181695879, "grad_norm": 0.020573849908693122, "learning_rate": 8.954073116865757e-05, "loss": 0.4873, "step": 32535 }, { "epoch": 1.6519780177939105, "grad_norm": 0.023883137882499617, "learning_rate": 8.941427009221325e-05, "loss": 0.4686, "step": 32540 }, { "epoch": 1.652231853891942, "grad_norm": 0.027468298536276997, "learning_rate": 8.928788961170025e-05, "loss": 0.4925, "step": 32545 }, { "epoch": 1.6524856899899736, "grad_norm": 0.027088296741393195, "learning_rate": 8.916158975192618e-05, "loss": 0.4534, "step": 32550 }, { "epoch": 1.6527395260880051, "grad_norm": 0.02137783275444411, "learning_rate": 8.903537053768329e-05, "loss": 0.4541, "step": 32555 }, { "epoch": 1.6529933621860364, "grad_norm": 0.02449402913442447, "learning_rate": 8.890923199374756e-05, "loss": 0.4558, "step": 32560 }, { "epoch": 1.653247198284068, "grad_norm": 0.022228205017642978, "learning_rate": 8.878317414487964e-05, "loss": 0.4577, "step": 32565 }, { "epoch": 1.6535010343820995, "grad_norm": 0.023535911694491006, "learning_rate": 8.865719701582376e-05, "loss": 0.4531, "step": 32570 }, { "epoch": 1.653754870480131, "grad_norm": 0.021119661452584808, "learning_rate": 8.85313006313087e-05, "loss": 0.4574, "step": 32575 }, { "epoch": 1.6540087065781623, "grad_norm": 0.022839968865042536, "learning_rate": 8.84054850160475e-05, "loss": 0.4713, "step": 32580 }, { "epoch": 1.6542625426761939, "grad_norm": 0.022416424195115817, "learning_rate": 8.827975019473688e-05, "loss": 0.4826, "step": 32585 }, { "epoch": 1.6545163787742254, "grad_norm": 0.021476787913188244, "learning_rate": 8.815409619205811e-05, "loss": 0.4419, "step": 32590 }, { "epoch": 1.654770214872257, "grad_norm": 0.028305345188139118, "learning_rate": 8.802852303267634e-05, "loss": 0.465, "step": 32595 }, { "epoch": 1.6550240509702885, "grad_norm": 0.04040707057128688, "learning_rate": 8.790303074124106e-05, "loss": 0.5121, "step": 32600 }, { "epoch": 1.65527788706832, "grad_norm": 0.04087985900665181, "learning_rate": 8.77776193423856e-05, "loss": 0.4659, "step": 32605 }, { "epoch": 1.6555317231663516, "grad_norm": 0.028685293599005325, "learning_rate": 8.765228886072785e-05, "loss": 0.4659, "step": 32610 }, { "epoch": 1.655785559264383, "grad_norm": 0.022967724296628123, "learning_rate": 8.75270393208693e-05, "loss": 0.4558, "step": 32615 }, { "epoch": 1.6560393953624146, "grad_norm": 0.02385629905389606, "learning_rate": 8.740187074739609e-05, "loss": 0.4281, "step": 32620 }, { "epoch": 1.656293231460446, "grad_norm": 0.024276473538365772, "learning_rate": 8.727678316487786e-05, "loss": 0.4542, "step": 32625 }, { "epoch": 1.6565470675584775, "grad_norm": 0.02582996339953851, "learning_rate": 8.7151776597869e-05, "loss": 0.4921, "step": 32630 }, { "epoch": 1.656800903656509, "grad_norm": 0.019009514698150008, "learning_rate": 8.702685107090725e-05, "loss": 0.4833, "step": 32635 }, { "epoch": 1.6570547397545405, "grad_norm": 0.02262802830227219, "learning_rate": 8.690200660851539e-05, "loss": 0.4611, "step": 32640 }, { "epoch": 1.6573085758525719, "grad_norm": 0.022327220330587837, "learning_rate": 8.677724323519937e-05, "loss": 0.4822, "step": 32645 }, { "epoch": 1.6575624119506034, "grad_norm": 0.026247061121182514, "learning_rate": 8.665256097544994e-05, "loss": 0.483, "step": 32650 }, { "epoch": 1.657816248048635, "grad_norm": 0.021890433127245045, "learning_rate": 8.65279598537413e-05, "loss": 0.4744, "step": 32655 }, { "epoch": 1.6580700841466665, "grad_norm": 0.023375317295645848, "learning_rate": 8.640343989453225e-05, "loss": 0.4488, "step": 32660 }, { "epoch": 1.658323920244698, "grad_norm": 0.025314102237468532, "learning_rate": 8.627900112226522e-05, "loss": 0.4704, "step": 32665 }, { "epoch": 1.6585777563427295, "grad_norm": 0.020441167533399623, "learning_rate": 8.61546435613672e-05, "loss": 0.4608, "step": 32670 }, { "epoch": 1.658831592440761, "grad_norm": 0.022593790331337692, "learning_rate": 8.603036723624868e-05, "loss": 0.4543, "step": 32675 }, { "epoch": 1.6590854285387926, "grad_norm": 0.023141528922392886, "learning_rate": 8.590617217130469e-05, "loss": 0.4867, "step": 32680 }, { "epoch": 1.6593392646368241, "grad_norm": 0.019506697724557195, "learning_rate": 8.578205839091397e-05, "loss": 0.4562, "step": 32685 }, { "epoch": 1.6595931007348557, "grad_norm": 0.021711834234721967, "learning_rate": 8.565802591943955e-05, "loss": 0.4485, "step": 32690 }, { "epoch": 1.659846936832887, "grad_norm": 0.021652695537899707, "learning_rate": 8.55340747812282e-05, "loss": 0.4609, "step": 32695 }, { "epoch": 1.6601007729309185, "grad_norm": 0.021747456684810037, "learning_rate": 8.541020500061109e-05, "loss": 0.4705, "step": 32700 }, { "epoch": 1.66035460902895, "grad_norm": 0.022086611000210216, "learning_rate": 8.528641660190323e-05, "loss": 0.478, "step": 32705 }, { "epoch": 1.6606084451269814, "grad_norm": 0.022136451230823195, "learning_rate": 8.516270960940353e-05, "loss": 0.4541, "step": 32710 }, { "epoch": 1.660862281225013, "grad_norm": 0.025866562335245584, "learning_rate": 8.50390840473953e-05, "loss": 0.4446, "step": 32715 }, { "epoch": 1.6611161173230444, "grad_norm": 0.0228248045581071, "learning_rate": 8.491553994014528e-05, "loss": 0.4186, "step": 32720 }, { "epoch": 1.661369953421076, "grad_norm": 0.021908437534819313, "learning_rate": 8.479207731190491e-05, "loss": 0.444, "step": 32725 }, { "epoch": 1.6616237895191075, "grad_norm": 0.022820980730176514, "learning_rate": 8.466869618690898e-05, "loss": 0.4496, "step": 32730 }, { "epoch": 1.661877625617139, "grad_norm": 0.02083612493092523, "learning_rate": 8.454539658937688e-05, "loss": 0.4318, "step": 32735 }, { "epoch": 1.6621314617151706, "grad_norm": 0.02223274504726005, "learning_rate": 8.442217854351142e-05, "loss": 0.4465, "step": 32740 }, { "epoch": 1.662385297813202, "grad_norm": 0.027608070343403793, "learning_rate": 8.429904207349997e-05, "loss": 0.4895, "step": 32745 }, { "epoch": 1.6626391339112336, "grad_norm": 0.023017127806108626, "learning_rate": 8.417598720351333e-05, "loss": 0.4566, "step": 32750 }, { "epoch": 1.6628929700092652, "grad_norm": 0.024694217741001077, "learning_rate": 8.40530139577067e-05, "loss": 0.4202, "step": 32755 }, { "epoch": 1.6631468061072965, "grad_norm": 0.021747446327509346, "learning_rate": 8.393012236021908e-05, "loss": 0.442, "step": 32760 }, { "epoch": 1.663400642205328, "grad_norm": 0.02285407342505394, "learning_rate": 8.380731243517365e-05, "loss": 0.4849, "step": 32765 }, { "epoch": 1.6636544783033596, "grad_norm": 0.01880950444315991, "learning_rate": 8.368458420667707e-05, "loss": 0.4462, "step": 32770 }, { "epoch": 1.6639083144013909, "grad_norm": 0.02321735100250034, "learning_rate": 8.356193769882064e-05, "loss": 0.4659, "step": 32775 }, { "epoch": 1.6641621504994224, "grad_norm": 0.021688726089099976, "learning_rate": 8.343937293567888e-05, "loss": 0.4518, "step": 32780 }, { "epoch": 1.664415986597454, "grad_norm": 0.022255121531889792, "learning_rate": 8.331688994131098e-05, "loss": 0.4709, "step": 32785 }, { "epoch": 1.6646698226954855, "grad_norm": 0.0234424879295081, "learning_rate": 8.319448873975948e-05, "loss": 0.4485, "step": 32790 }, { "epoch": 1.664923658793517, "grad_norm": 0.01971346035102544, "learning_rate": 8.307216935505135e-05, "loss": 0.4471, "step": 32795 }, { "epoch": 1.6651774948915485, "grad_norm": 0.027744043981377947, "learning_rate": 8.294993181119703e-05, "loss": 0.4878, "step": 32800 }, { "epoch": 1.66543133098958, "grad_norm": 0.02538544426279172, "learning_rate": 8.282777613219139e-05, "loss": 0.4614, "step": 32805 }, { "epoch": 1.6656851670876116, "grad_norm": 0.024257509182234863, "learning_rate": 8.270570234201274e-05, "loss": 0.456, "step": 32810 }, { "epoch": 1.6659390031856431, "grad_norm": 0.022358059693099067, "learning_rate": 8.25837104646237e-05, "loss": 0.4957, "step": 32815 }, { "epoch": 1.6661928392836747, "grad_norm": 0.021859775564087938, "learning_rate": 8.246180052397078e-05, "loss": 0.4784, "step": 32820 }, { "epoch": 1.666446675381706, "grad_norm": 0.020567147747974097, "learning_rate": 8.233997254398401e-05, "loss": 0.4608, "step": 32825 }, { "epoch": 1.6667005114797375, "grad_norm": 0.023002497460944293, "learning_rate": 8.221822654857786e-05, "loss": 0.441, "step": 32830 }, { "epoch": 1.666954347577769, "grad_norm": 0.024727893994143192, "learning_rate": 8.209656256165027e-05, "loss": 0.4698, "step": 32835 }, { "epoch": 1.6672081836758004, "grad_norm": 0.020590000782565322, "learning_rate": 8.197498060708347e-05, "loss": 0.4686, "step": 32840 }, { "epoch": 1.667462019773832, "grad_norm": 0.026579843381741665, "learning_rate": 8.185348070874316e-05, "loss": 0.4612, "step": 32845 }, { "epoch": 1.6677158558718634, "grad_norm": 0.01927314772150972, "learning_rate": 8.173206289047947e-05, "loss": 0.4627, "step": 32850 }, { "epoch": 1.667969691969895, "grad_norm": 0.028426534307149153, "learning_rate": 8.161072717612578e-05, "loss": 0.4565, "step": 32855 }, { "epoch": 1.6682235280679265, "grad_norm": 0.028414912943784405, "learning_rate": 8.148947358949992e-05, "loss": 0.4608, "step": 32860 }, { "epoch": 1.668477364165958, "grad_norm": 0.02128169255167679, "learning_rate": 8.136830215440322e-05, "loss": 0.4669, "step": 32865 }, { "epoch": 1.6687312002639896, "grad_norm": 0.020873791873976224, "learning_rate": 8.124721289462122e-05, "loss": 0.4489, "step": 32870 }, { "epoch": 1.6689850363620211, "grad_norm": 0.03135989480369304, "learning_rate": 8.112620583392272e-05, "loss": 0.4529, "step": 32875 }, { "epoch": 1.6692388724600526, "grad_norm": 0.02000361275840586, "learning_rate": 8.100528099606135e-05, "loss": 0.4683, "step": 32880 }, { "epoch": 1.6694927085580842, "grad_norm": 0.03133371354490263, "learning_rate": 8.088443840477371e-05, "loss": 0.4594, "step": 32885 }, { "epoch": 1.6697465446561155, "grad_norm": 0.020707782157897564, "learning_rate": 8.076367808378083e-05, "loss": 0.4547, "step": 32890 }, { "epoch": 1.670000380754147, "grad_norm": 0.022147898036397162, "learning_rate": 8.064300005678705e-05, "loss": 0.4866, "step": 32895 }, { "epoch": 1.6702542168521786, "grad_norm": 0.02198563076624078, "learning_rate": 8.052240434748114e-05, "loss": 0.4789, "step": 32900 }, { "epoch": 1.67050805295021, "grad_norm": 0.39720820061969236, "learning_rate": 8.04018909795352e-05, "loss": 0.438, "step": 32905 }, { "epoch": 1.6707618890482414, "grad_norm": 0.022207069306431813, "learning_rate": 8.028145997660569e-05, "loss": 0.4757, "step": 32910 }, { "epoch": 1.671015725146273, "grad_norm": 0.024892721760228, "learning_rate": 8.016111136233229e-05, "loss": 0.4694, "step": 32915 }, { "epoch": 1.6712695612443045, "grad_norm": 0.026240771643922845, "learning_rate": 8.00408451603391e-05, "loss": 0.483, "step": 32920 }, { "epoch": 1.671523397342336, "grad_norm": 0.027364163639244488, "learning_rate": 7.992066139423359e-05, "loss": 0.4495, "step": 32925 }, { "epoch": 1.6717772334403675, "grad_norm": 0.022658845760252785, "learning_rate": 7.980056008760744e-05, "loss": 0.4604, "step": 32930 }, { "epoch": 1.672031069538399, "grad_norm": 0.020613403322029124, "learning_rate": 7.968054126403568e-05, "loss": 0.4436, "step": 32935 }, { "epoch": 1.6722849056364306, "grad_norm": 0.020569339169032044, "learning_rate": 7.956060494707757e-05, "loss": 0.4611, "step": 32940 }, { "epoch": 1.6725387417344622, "grad_norm": 0.02209932289337912, "learning_rate": 7.944075116027604e-05, "loss": 0.4665, "step": 32945 }, { "epoch": 1.6727925778324937, "grad_norm": 0.022726524271639435, "learning_rate": 7.93209799271577e-05, "loss": 0.4564, "step": 32950 }, { "epoch": 1.6730464139305252, "grad_norm": 0.021952171929792236, "learning_rate": 7.920129127123316e-05, "loss": 0.4485, "step": 32955 }, { "epoch": 1.6733002500285565, "grad_norm": 0.02051579303808435, "learning_rate": 7.908168521599646e-05, "loss": 0.4584, "step": 32960 }, { "epoch": 1.673554086126588, "grad_norm": 0.02294166980769807, "learning_rate": 7.896216178492599e-05, "loss": 0.4563, "step": 32965 }, { "epoch": 1.6738079222246196, "grad_norm": 0.02490011470275378, "learning_rate": 7.884272100148332e-05, "loss": 0.486, "step": 32970 }, { "epoch": 1.674061758322651, "grad_norm": 0.02500987816795483, "learning_rate": 7.872336288911436e-05, "loss": 0.4521, "step": 32975 }, { "epoch": 1.6743155944206825, "grad_norm": 0.025048858782017916, "learning_rate": 7.86040874712482e-05, "loss": 0.4468, "step": 32980 }, { "epoch": 1.674569430518714, "grad_norm": 0.028012775399430874, "learning_rate": 7.848489477129828e-05, "loss": 0.4734, "step": 32985 }, { "epoch": 1.6748232666167455, "grad_norm": 0.02640398235527484, "learning_rate": 7.836578481266132e-05, "loss": 0.4504, "step": 32990 }, { "epoch": 1.675077102714777, "grad_norm": 0.023020229066731457, "learning_rate": 7.824675761871814e-05, "loss": 0.4716, "step": 32995 }, { "epoch": 1.6753309388128086, "grad_norm": 0.02729357114542021, "learning_rate": 7.812781321283319e-05, "loss": 0.4414, "step": 33000 }, { "epoch": 1.6755847749108401, "grad_norm": 0.02331912928699373, "learning_rate": 7.800895161835469e-05, "loss": 0.5048, "step": 33005 }, { "epoch": 1.6758386110088717, "grad_norm": 0.020302890548499142, "learning_rate": 7.789017285861439e-05, "loss": 0.4686, "step": 33010 }, { "epoch": 1.6760924471069032, "grad_norm": 0.030570223530975774, "learning_rate": 7.777147695692827e-05, "loss": 0.4768, "step": 33015 }, { "epoch": 1.6763462832049347, "grad_norm": 0.021681113676065226, "learning_rate": 7.765286393659543e-05, "loss": 0.4306, "step": 33020 }, { "epoch": 1.676600119302966, "grad_norm": 0.02236935979091485, "learning_rate": 7.75343338208993e-05, "loss": 0.4734, "step": 33025 }, { "epoch": 1.6768539554009976, "grad_norm": 0.025693461163144155, "learning_rate": 7.741588663310644e-05, "loss": 0.4494, "step": 33030 }, { "epoch": 1.677107791499029, "grad_norm": 0.02791486982571019, "learning_rate": 7.729752239646776e-05, "loss": 0.4523, "step": 33035 }, { "epoch": 1.6773616275970604, "grad_norm": 0.02226158895252821, "learning_rate": 7.717924113421732e-05, "loss": 0.4467, "step": 33040 }, { "epoch": 1.677615463695092, "grad_norm": 0.02192720169085694, "learning_rate": 7.706104286957333e-05, "loss": 0.4695, "step": 33045 }, { "epoch": 1.6778692997931235, "grad_norm": 0.022241780707175234, "learning_rate": 7.694292762573729e-05, "loss": 0.4432, "step": 33050 }, { "epoch": 1.678123135891155, "grad_norm": 0.025844348165324875, "learning_rate": 7.682489542589483e-05, "loss": 0.5004, "step": 33055 }, { "epoch": 1.6783769719891866, "grad_norm": 0.03152479859049114, "learning_rate": 7.670694629321511e-05, "loss": 0.4711, "step": 33060 }, { "epoch": 1.678630808087218, "grad_norm": 0.020936664773053695, "learning_rate": 7.658908025085076e-05, "loss": 0.4632, "step": 33065 }, { "epoch": 1.6788846441852496, "grad_norm": 0.021671379054153835, "learning_rate": 7.647129732193859e-05, "loss": 0.4327, "step": 33070 }, { "epoch": 1.6791384802832812, "grad_norm": 0.023902127255322248, "learning_rate": 7.635359752959841e-05, "loss": 0.4704, "step": 33075 }, { "epoch": 1.6793923163813127, "grad_norm": 0.024510636050162397, "learning_rate": 7.623598089693446e-05, "loss": 0.4771, "step": 33080 }, { "epoch": 1.6796461524793442, "grad_norm": 0.023726057600865935, "learning_rate": 7.611844744703406e-05, "loss": 0.471, "step": 33085 }, { "epoch": 1.6798999885773755, "grad_norm": 0.022957493744582655, "learning_rate": 7.600099720296866e-05, "loss": 0.4646, "step": 33090 }, { "epoch": 1.680153824675407, "grad_norm": 0.022157047872468763, "learning_rate": 7.588363018779288e-05, "loss": 0.4636, "step": 33095 }, { "epoch": 1.6804076607734386, "grad_norm": 0.020795019876852835, "learning_rate": 7.576634642454555e-05, "loss": 0.4648, "step": 33100 }, { "epoch": 1.68066149687147, "grad_norm": 0.02546400520836241, "learning_rate": 7.564914593624866e-05, "loss": 0.4771, "step": 33105 }, { "epoch": 1.6809153329695015, "grad_norm": 0.035868249174773666, "learning_rate": 7.553202874590825e-05, "loss": 0.4593, "step": 33110 }, { "epoch": 1.681169169067533, "grad_norm": 0.029253511552575565, "learning_rate": 7.54149948765136e-05, "loss": 0.4585, "step": 33115 }, { "epoch": 1.6814230051655645, "grad_norm": 0.021167662879100695, "learning_rate": 7.529804435103831e-05, "loss": 0.4654, "step": 33120 }, { "epoch": 1.681676841263596, "grad_norm": 0.02498414369849199, "learning_rate": 7.518117719243878e-05, "loss": 0.4542, "step": 33125 }, { "epoch": 1.6819306773616276, "grad_norm": 0.022195501835238156, "learning_rate": 7.506439342365573e-05, "loss": 0.4739, "step": 33130 }, { "epoch": 1.6821845134596591, "grad_norm": 0.021987713178080383, "learning_rate": 7.494769306761296e-05, "loss": 0.4555, "step": 33135 }, { "epoch": 1.6824383495576907, "grad_norm": 0.030374528516733935, "learning_rate": 7.483107614721846e-05, "loss": 0.4736, "step": 33140 }, { "epoch": 1.6826921856557222, "grad_norm": 0.02149051980457975, "learning_rate": 7.471454268536338e-05, "loss": 0.45, "step": 33145 }, { "epoch": 1.6829460217537537, "grad_norm": 0.022265453269349957, "learning_rate": 7.459809270492252e-05, "loss": 0.4573, "step": 33150 }, { "epoch": 1.683199857851785, "grad_norm": 0.03210156404072531, "learning_rate": 7.448172622875477e-05, "loss": 0.462, "step": 33155 }, { "epoch": 1.6834536939498166, "grad_norm": 0.027726727483721063, "learning_rate": 7.436544327970191e-05, "loss": 0.4343, "step": 33160 }, { "epoch": 1.6837075300478481, "grad_norm": 0.026983110267163848, "learning_rate": 7.424924388059007e-05, "loss": 0.4746, "step": 33165 }, { "epoch": 1.6839613661458797, "grad_norm": 0.022811847390947975, "learning_rate": 7.413312805422834e-05, "loss": 0.475, "step": 33170 }, { "epoch": 1.684215202243911, "grad_norm": 0.022500674142276494, "learning_rate": 7.40170958234097e-05, "loss": 0.4579, "step": 33175 }, { "epoch": 1.6844690383419425, "grad_norm": 0.021612375802658696, "learning_rate": 7.390114721091084e-05, "loss": 0.4414, "step": 33180 }, { "epoch": 1.684722874439974, "grad_norm": 0.0248775111800712, "learning_rate": 7.378528223949194e-05, "loss": 0.5039, "step": 33185 }, { "epoch": 1.6849767105380056, "grad_norm": 0.02388192498972222, "learning_rate": 7.366950093189651e-05, "loss": 0.4526, "step": 33190 }, { "epoch": 1.685230546636037, "grad_norm": 0.030723879806664092, "learning_rate": 7.355380331085205e-05, "loss": 0.4692, "step": 33195 }, { "epoch": 1.6854843827340686, "grad_norm": 0.018952594935604992, "learning_rate": 7.343818939906915e-05, "loss": 0.4533, "step": 33200 }, { "epoch": 1.6857382188321002, "grad_norm": 0.021896935877159233, "learning_rate": 7.332265921924258e-05, "loss": 0.4908, "step": 33205 }, { "epoch": 1.6859920549301317, "grad_norm": 0.024915232475315307, "learning_rate": 7.320721279405002e-05, "loss": 0.4696, "step": 33210 }, { "epoch": 1.6862458910281632, "grad_norm": 0.023173557196539376, "learning_rate": 7.309185014615333e-05, "loss": 0.4438, "step": 33215 }, { "epoch": 1.6864997271261948, "grad_norm": 0.023554891519709124, "learning_rate": 7.29765712981973e-05, "loss": 0.4373, "step": 33220 }, { "epoch": 1.686753563224226, "grad_norm": 0.019733917611659817, "learning_rate": 7.286137627281092e-05, "loss": 0.4432, "step": 33225 }, { "epoch": 1.6870073993222576, "grad_norm": 0.02160655258996359, "learning_rate": 7.274626509260612e-05, "loss": 0.4774, "step": 33230 }, { "epoch": 1.6872612354202892, "grad_norm": 0.02405968481850796, "learning_rate": 7.263123778017877e-05, "loss": 0.4548, "step": 33235 }, { "epoch": 1.6875150715183205, "grad_norm": 0.024128327278038663, "learning_rate": 7.251629435810825e-05, "loss": 0.4642, "step": 33240 }, { "epoch": 1.687768907616352, "grad_norm": 0.024693827881499066, "learning_rate": 7.240143484895718e-05, "loss": 0.4463, "step": 33245 }, { "epoch": 1.6880227437143835, "grad_norm": 0.03310653970465261, "learning_rate": 7.228665927527217e-05, "loss": 0.4786, "step": 33250 }, { "epoch": 1.688276579812415, "grad_norm": 0.024219301603149616, "learning_rate": 7.217196765958278e-05, "loss": 0.4417, "step": 33255 }, { "epoch": 1.6885304159104466, "grad_norm": 0.020412058664254072, "learning_rate": 7.205736002440272e-05, "loss": 0.4324, "step": 33260 }, { "epoch": 1.6887842520084781, "grad_norm": 0.020652073502858727, "learning_rate": 7.19428363922286e-05, "loss": 0.4505, "step": 33265 }, { "epoch": 1.6890380881065097, "grad_norm": 0.020752974694944956, "learning_rate": 7.18283967855411e-05, "loss": 0.4729, "step": 33270 }, { "epoch": 1.6892919242045412, "grad_norm": 0.026444187207730747, "learning_rate": 7.171404122680391e-05, "loss": 0.4503, "step": 33275 }, { "epoch": 1.6895457603025728, "grad_norm": 0.024097349847725474, "learning_rate": 7.159976973846466e-05, "loss": 0.4576, "step": 33280 }, { "epoch": 1.6897995964006043, "grad_norm": 0.02500842430792348, "learning_rate": 7.14855823429541e-05, "loss": 0.4684, "step": 33285 }, { "epoch": 1.6900534324986356, "grad_norm": 0.022174452172228635, "learning_rate": 7.137147906268682e-05, "loss": 0.4605, "step": 33290 }, { "epoch": 1.6903072685966671, "grad_norm": 0.02242173750304481, "learning_rate": 7.125745992006044e-05, "loss": 0.4677, "step": 33295 }, { "epoch": 1.6905611046946987, "grad_norm": 0.023083145288370695, "learning_rate": 7.114352493745674e-05, "loss": 0.4695, "step": 33300 }, { "epoch": 1.69081494079273, "grad_norm": 0.02381655314266417, "learning_rate": 7.102967413724027e-05, "loss": 0.4685, "step": 33305 }, { "epoch": 1.6910687768907615, "grad_norm": 0.024080193683219564, "learning_rate": 7.091590754175963e-05, "loss": 0.445, "step": 33310 }, { "epoch": 1.691322612988793, "grad_norm": 0.021690243710150815, "learning_rate": 7.080222517334639e-05, "loss": 0.4125, "step": 33315 }, { "epoch": 1.6915764490868246, "grad_norm": 0.021918256209021103, "learning_rate": 7.068862705431601e-05, "loss": 0.4648, "step": 33320 }, { "epoch": 1.6918302851848561, "grad_norm": 0.022452404320154225, "learning_rate": 7.057511320696708e-05, "loss": 0.4451, "step": 33325 }, { "epoch": 1.6920841212828877, "grad_norm": 0.020322901723341114, "learning_rate": 7.046168365358202e-05, "loss": 0.4244, "step": 33330 }, { "epoch": 1.6923379573809192, "grad_norm": 0.026597028609778042, "learning_rate": 7.034833841642624e-05, "loss": 0.4772, "step": 33335 }, { "epoch": 1.6925917934789507, "grad_norm": 0.02102494226253916, "learning_rate": 7.023507751774905e-05, "loss": 0.4323, "step": 33340 }, { "epoch": 1.6928456295769823, "grad_norm": 0.021714702487951786, "learning_rate": 7.012190097978282e-05, "loss": 0.449, "step": 33345 }, { "epoch": 1.6930994656750138, "grad_norm": 0.02158282445558592, "learning_rate": 7.000880882474375e-05, "loss": 0.447, "step": 33350 }, { "epoch": 1.693353301773045, "grad_norm": 0.02078501840382419, "learning_rate": 6.989580107483102e-05, "loss": 0.452, "step": 33355 }, { "epoch": 1.6936071378710766, "grad_norm": 0.022296558827429345, "learning_rate": 6.978287775222758e-05, "loss": 0.4745, "step": 33360 }, { "epoch": 1.6938609739691082, "grad_norm": 0.022424807973195095, "learning_rate": 6.967003887909989e-05, "loss": 0.4309, "step": 33365 }, { "epoch": 1.6941148100671395, "grad_norm": 0.022647524848265772, "learning_rate": 6.95572844775974e-05, "loss": 0.4513, "step": 33370 }, { "epoch": 1.694368646165171, "grad_norm": 0.02604844884193192, "learning_rate": 6.944461456985346e-05, "loss": 0.4496, "step": 33375 }, { "epoch": 1.6946224822632026, "grad_norm": 0.02231782508277886, "learning_rate": 6.933202917798443e-05, "loss": 0.4345, "step": 33380 }, { "epoch": 1.694876318361234, "grad_norm": 0.027331402316067194, "learning_rate": 6.92195283240904e-05, "loss": 0.452, "step": 33385 }, { "epoch": 1.6951301544592656, "grad_norm": 0.02244181112322746, "learning_rate": 6.910711203025455e-05, "loss": 0.4548, "step": 33390 }, { "epoch": 1.6953839905572972, "grad_norm": 0.031539164754729246, "learning_rate": 6.89947803185439e-05, "loss": 0.4876, "step": 33395 }, { "epoch": 1.6956378266553287, "grad_norm": 0.029750827597486304, "learning_rate": 6.888253321100829e-05, "loss": 0.4631, "step": 33400 }, { "epoch": 1.6958916627533602, "grad_norm": 0.02054220426689229, "learning_rate": 6.877037072968157e-05, "loss": 0.433, "step": 33405 }, { "epoch": 1.6961454988513918, "grad_norm": 0.022921757508354648, "learning_rate": 6.865829289658044e-05, "loss": 0.4793, "step": 33410 }, { "epoch": 1.6963993349494233, "grad_norm": 0.02272022241679228, "learning_rate": 6.85462997337053e-05, "loss": 0.4618, "step": 33415 }, { "epoch": 1.6966531710474546, "grad_norm": 0.021335577126739804, "learning_rate": 6.843439126303985e-05, "loss": 0.4481, "step": 33420 }, { "epoch": 1.6969070071454861, "grad_norm": 0.024313607070824157, "learning_rate": 6.83225675065513e-05, "loss": 0.4399, "step": 33425 }, { "epoch": 1.6971608432435177, "grad_norm": 0.02095148113976031, "learning_rate": 6.821082848618988e-05, "loss": 0.4599, "step": 33430 }, { "epoch": 1.6974146793415492, "grad_norm": 0.023138372142731362, "learning_rate": 6.809917422388961e-05, "loss": 0.4756, "step": 33435 }, { "epoch": 1.6976685154395805, "grad_norm": 0.022979590715020475, "learning_rate": 6.798760474156745e-05, "loss": 0.4845, "step": 33440 }, { "epoch": 1.697922351537612, "grad_norm": 0.02503552519860586, "learning_rate": 6.787612006112409e-05, "loss": 0.4802, "step": 33445 }, { "epoch": 1.6981761876356436, "grad_norm": 0.02551716298674583, "learning_rate": 6.77647202044433e-05, "loss": 0.4388, "step": 33450 }, { "epoch": 1.6984300237336751, "grad_norm": 0.02430410214676648, "learning_rate": 6.765340519339252e-05, "loss": 0.4553, "step": 33455 }, { "epoch": 1.6986838598317067, "grad_norm": 0.02781257765021097, "learning_rate": 6.754217504982202e-05, "loss": 0.4474, "step": 33460 }, { "epoch": 1.6989376959297382, "grad_norm": 0.021901755276533954, "learning_rate": 6.743102979556604e-05, "loss": 0.4726, "step": 33465 }, { "epoch": 1.6991915320277697, "grad_norm": 0.02314556208967541, "learning_rate": 6.731996945244162e-05, "loss": 0.435, "step": 33470 }, { "epoch": 1.6994453681258013, "grad_norm": 0.0216630417947033, "learning_rate": 6.720899404224934e-05, "loss": 0.4521, "step": 33475 }, { "epoch": 1.6996992042238328, "grad_norm": 0.02078810198958909, "learning_rate": 6.709810358677337e-05, "loss": 0.4387, "step": 33480 }, { "epoch": 1.6999530403218641, "grad_norm": 0.021858124288052016, "learning_rate": 6.698729810778065e-05, "loss": 0.4292, "step": 33485 }, { "epoch": 1.7002068764198957, "grad_norm": 0.02220940183114807, "learning_rate": 6.687657762702203e-05, "loss": 0.4541, "step": 33490 }, { "epoch": 1.7004607125179272, "grad_norm": 0.021898318644871732, "learning_rate": 6.67659421662311e-05, "loss": 0.4709, "step": 33495 }, { "epoch": 1.7007145486159587, "grad_norm": 0.019784763924519774, "learning_rate": 6.665539174712532e-05, "loss": 0.4626, "step": 33500 }, { "epoch": 1.70096838471399, "grad_norm": 0.022919389047645698, "learning_rate": 6.654492639140492e-05, "loss": 0.4507, "step": 33505 }, { "epoch": 1.7012222208120216, "grad_norm": 0.02271121221914657, "learning_rate": 6.643454612075395e-05, "loss": 0.4652, "step": 33510 }, { "epoch": 1.701476056910053, "grad_norm": 0.022198266733550638, "learning_rate": 6.632425095683925e-05, "loss": 0.4125, "step": 33515 }, { "epoch": 1.7017298930080846, "grad_norm": 0.023454135381851583, "learning_rate": 6.62140409213115e-05, "loss": 0.4537, "step": 33520 }, { "epoch": 1.7019837291061162, "grad_norm": 0.02311814200716394, "learning_rate": 6.610391603580412e-05, "loss": 0.4415, "step": 33525 }, { "epoch": 1.7022375652041477, "grad_norm": 0.02338916064340492, "learning_rate": 6.599387632193426e-05, "loss": 0.468, "step": 33530 }, { "epoch": 1.7024914013021792, "grad_norm": 0.022293680542931123, "learning_rate": 6.588392180130198e-05, "loss": 0.4832, "step": 33535 }, { "epoch": 1.7027452374002108, "grad_norm": 0.02228526819113989, "learning_rate": 6.577405249549096e-05, "loss": 0.4406, "step": 33540 }, { "epoch": 1.7029990734982423, "grad_norm": 0.02669841047779239, "learning_rate": 6.566426842606793e-05, "loss": 0.4683, "step": 33545 }, { "epoch": 1.7032529095962738, "grad_norm": 0.020687449495207438, "learning_rate": 6.555456961458311e-05, "loss": 0.4788, "step": 33550 }, { "epoch": 1.7035067456943052, "grad_norm": 0.02316737056094181, "learning_rate": 6.544495608256957e-05, "loss": 0.4479, "step": 33555 }, { "epoch": 1.7037605817923367, "grad_norm": 0.02015658685414917, "learning_rate": 6.533542785154412e-05, "loss": 0.4675, "step": 33560 }, { "epoch": 1.7040144178903682, "grad_norm": 0.024630709903064985, "learning_rate": 6.522598494300647e-05, "loss": 0.4612, "step": 33565 }, { "epoch": 1.7042682539883995, "grad_norm": 0.021211217998291603, "learning_rate": 6.511662737843981e-05, "loss": 0.4552, "step": 33570 }, { "epoch": 1.704522090086431, "grad_norm": 0.022897383797772494, "learning_rate": 6.500735517931033e-05, "loss": 0.4553, "step": 33575 }, { "epoch": 1.7047759261844626, "grad_norm": 0.021403658562759068, "learning_rate": 6.489816836706786e-05, "loss": 0.4417, "step": 33580 }, { "epoch": 1.7050297622824941, "grad_norm": 0.024079482108871682, "learning_rate": 6.478906696314496e-05, "loss": 0.4456, "step": 33585 }, { "epoch": 1.7052835983805257, "grad_norm": 0.02474012090426629, "learning_rate": 6.468005098895797e-05, "loss": 0.4528, "step": 33590 }, { "epoch": 1.7055374344785572, "grad_norm": 0.025738626621193236, "learning_rate": 6.457112046590585e-05, "loss": 0.5034, "step": 33595 }, { "epoch": 1.7057912705765887, "grad_norm": 0.0219847722352076, "learning_rate": 6.446227541537136e-05, "loss": 0.4519, "step": 33600 }, { "epoch": 1.7060451066746203, "grad_norm": 0.023838238873203427, "learning_rate": 6.43535158587203e-05, "loss": 0.4685, "step": 33605 }, { "epoch": 1.7062989427726518, "grad_norm": 0.02133132469936017, "learning_rate": 6.424484181730134e-05, "loss": 0.4578, "step": 33610 }, { "epoch": 1.7065527788706834, "grad_norm": 0.020615296500899355, "learning_rate": 6.413625331244698e-05, "loss": 0.4472, "step": 33615 }, { "epoch": 1.7068066149687147, "grad_norm": 0.02205130832162417, "learning_rate": 6.402775036547231e-05, "loss": 0.4635, "step": 33620 }, { "epoch": 1.7070604510667462, "grad_norm": 0.022030504089429213, "learning_rate": 6.391933299767622e-05, "loss": 0.4566, "step": 33625 }, { "epoch": 1.7073142871647777, "grad_norm": 0.026096405850323693, "learning_rate": 6.381100123034017e-05, "loss": 0.4668, "step": 33630 }, { "epoch": 1.707568123262809, "grad_norm": 0.021626998107609114, "learning_rate": 6.370275508472945e-05, "loss": 0.4256, "step": 33635 }, { "epoch": 1.7078219593608406, "grad_norm": 0.020387795966162997, "learning_rate": 6.359459458209194e-05, "loss": 0.4587, "step": 33640 }, { "epoch": 1.7080757954588721, "grad_norm": 0.03128743884752836, "learning_rate": 6.348651974365932e-05, "loss": 0.4348, "step": 33645 }, { "epoch": 1.7083296315569036, "grad_norm": 0.022169939171472575, "learning_rate": 6.337853059064586e-05, "loss": 0.4487, "step": 33650 }, { "epoch": 1.7085834676549352, "grad_norm": 0.021740231589529504, "learning_rate": 6.327062714424946e-05, "loss": 0.4785, "step": 33655 }, { "epoch": 1.7088373037529667, "grad_norm": 0.023768265517867246, "learning_rate": 6.31628094256509e-05, "loss": 0.4787, "step": 33660 }, { "epoch": 1.7090911398509983, "grad_norm": 0.024960047338923673, "learning_rate": 6.305507745601446e-05, "loss": 0.4519, "step": 33665 }, { "epoch": 1.7093449759490298, "grad_norm": 0.02652925712341185, "learning_rate": 6.294743125648722e-05, "loss": 0.432, "step": 33670 }, { "epoch": 1.7095988120470613, "grad_norm": 0.021458655669676602, "learning_rate": 6.28398708481997e-05, "loss": 0.4734, "step": 33675 }, { "epoch": 1.7098526481450929, "grad_norm": 0.02287235529136257, "learning_rate": 6.273239625226534e-05, "loss": 0.4538, "step": 33680 }, { "epoch": 1.7101064842431242, "grad_norm": 0.02228112960841006, "learning_rate": 6.262500748978106e-05, "loss": 0.4456, "step": 33685 }, { "epoch": 1.7103603203411557, "grad_norm": 0.021753237335259395, "learning_rate": 6.251770458182654e-05, "loss": 0.4251, "step": 33690 }, { "epoch": 1.7106141564391872, "grad_norm": 0.02618127618582834, "learning_rate": 6.241048754946493e-05, "loss": 0.4458, "step": 33695 }, { "epoch": 1.7108679925372186, "grad_norm": 0.022092349146015322, "learning_rate": 6.23033564137423e-05, "loss": 0.4643, "step": 33700 }, { "epoch": 1.71112182863525, "grad_norm": 0.02659549264775889, "learning_rate": 6.219631119568814e-05, "loss": 0.4926, "step": 33705 }, { "epoch": 1.7113756647332816, "grad_norm": 0.021634033709659647, "learning_rate": 6.208935191631465e-05, "loss": 0.4538, "step": 33710 }, { "epoch": 1.7116295008313132, "grad_norm": 0.020836889426777406, "learning_rate": 6.19824785966176e-05, "loss": 0.4703, "step": 33715 }, { "epoch": 1.7118833369293447, "grad_norm": 0.01971938523489474, "learning_rate": 6.187569125757553e-05, "loss": 0.4523, "step": 33720 }, { "epoch": 1.7121371730273762, "grad_norm": 0.021843066134417412, "learning_rate": 6.176898992015034e-05, "loss": 0.4278, "step": 33725 }, { "epoch": 1.7123910091254078, "grad_norm": 0.020570604910276645, "learning_rate": 6.166237460528706e-05, "loss": 0.4728, "step": 33730 }, { "epoch": 1.7126448452234393, "grad_norm": 0.019151004234894124, "learning_rate": 6.155584533391356e-05, "loss": 0.4478, "step": 33735 }, { "epoch": 1.7128986813214708, "grad_norm": 0.020268446809525022, "learning_rate": 6.144940212694122e-05, "loss": 0.4503, "step": 33740 }, { "epoch": 1.7131525174195024, "grad_norm": 0.024437006226734212, "learning_rate": 6.134304500526411e-05, "loss": 0.4756, "step": 33745 }, { "epoch": 1.7134063535175337, "grad_norm": 0.02201855727722671, "learning_rate": 6.123677398975974e-05, "loss": 0.4507, "step": 33750 }, { "epoch": 1.7136601896155652, "grad_norm": 0.021612750422246748, "learning_rate": 6.11305891012885e-05, "loss": 0.4466, "step": 33755 }, { "epoch": 1.7139140257135967, "grad_norm": 0.02317623568444332, "learning_rate": 6.1024490360694016e-05, "loss": 0.4361, "step": 33760 }, { "epoch": 1.7141678618116283, "grad_norm": 0.022813604709884243, "learning_rate": 6.091847778880283e-05, "loss": 0.4513, "step": 33765 }, { "epoch": 1.7144216979096596, "grad_norm": 0.02326852728426279, "learning_rate": 6.081255140642483e-05, "loss": 0.4761, "step": 33770 }, { "epoch": 1.7146755340076911, "grad_norm": 0.02386406329427358, "learning_rate": 6.0706711234352674e-05, "loss": 0.4465, "step": 33775 }, { "epoch": 1.7149293701057227, "grad_norm": 0.022526987576066877, "learning_rate": 6.06009572933624e-05, "loss": 0.4818, "step": 33780 }, { "epoch": 1.7151832062037542, "grad_norm": 0.024915335276817413, "learning_rate": 6.0495289604212853e-05, "loss": 0.4499, "step": 33785 }, { "epoch": 1.7154370423017857, "grad_norm": 0.02046552164736613, "learning_rate": 6.038970818764633e-05, "loss": 0.4566, "step": 33790 }, { "epoch": 1.7156908783998173, "grad_norm": 0.02288153652110328, "learning_rate": 6.0284213064387586e-05, "loss": 0.4612, "step": 33795 }, { "epoch": 1.7159447144978488, "grad_norm": 0.02442365282755549, "learning_rate": 6.0178804255145106e-05, "loss": 0.4732, "step": 33800 }, { "epoch": 1.7161985505958803, "grad_norm": 0.025400195295964864, "learning_rate": 6.007348178060984e-05, "loss": 0.4789, "step": 33805 }, { "epoch": 1.7164523866939119, "grad_norm": 0.021023893448474346, "learning_rate": 5.996824566145631e-05, "loss": 0.454, "step": 33810 }, { "epoch": 1.7167062227919434, "grad_norm": 0.02088473207737844, "learning_rate": 5.98630959183416e-05, "loss": 0.4452, "step": 33815 }, { "epoch": 1.7169600588899747, "grad_norm": 0.021319092582090367, "learning_rate": 5.975803257190632e-05, "loss": 0.452, "step": 33820 }, { "epoch": 1.7172138949880063, "grad_norm": 0.02336522341312864, "learning_rate": 5.965305564277368e-05, "loss": 0.4185, "step": 33825 }, { "epoch": 1.7174677310860378, "grad_norm": 0.02176603784713292, "learning_rate": 5.954816515155026e-05, "loss": 0.4783, "step": 33830 }, { "epoch": 1.717721567184069, "grad_norm": 0.021689798405552758, "learning_rate": 5.944336111882542e-05, "loss": 0.4443, "step": 33835 }, { "epoch": 1.7179754032821006, "grad_norm": 0.021388359751042172, "learning_rate": 5.933864356517177e-05, "loss": 0.4269, "step": 33840 }, { "epoch": 1.7182292393801322, "grad_norm": 0.021256453398072323, "learning_rate": 5.923401251114485e-05, "loss": 0.4655, "step": 33845 }, { "epoch": 1.7184830754781637, "grad_norm": 0.02151794556558625, "learning_rate": 5.9129467977283135e-05, "loss": 0.4529, "step": 33850 }, { "epoch": 1.7187369115761952, "grad_norm": 0.029363161811987813, "learning_rate": 5.902500998410831e-05, "loss": 0.4275, "step": 33855 }, { "epoch": 1.7189907476742268, "grad_norm": 0.02440315247691098, "learning_rate": 5.892063855212476e-05, "loss": 0.4658, "step": 33860 }, { "epoch": 1.7192445837722583, "grad_norm": 0.018975284982618558, "learning_rate": 5.881635370182037e-05, "loss": 0.4563, "step": 33865 }, { "epoch": 1.7194984198702898, "grad_norm": 0.021764078280474785, "learning_rate": 5.8712155453665426e-05, "loss": 0.4476, "step": 33870 }, { "epoch": 1.7197522559683214, "grad_norm": 0.020822416739688775, "learning_rate": 5.8608043828113744e-05, "loss": 0.45, "step": 33875 }, { "epoch": 1.720006092066353, "grad_norm": 0.022089565418501236, "learning_rate": 5.8504018845601804e-05, "loss": 0.4551, "step": 33880 }, { "epoch": 1.7202599281643842, "grad_norm": 0.02359465062305091, "learning_rate": 5.840008052654927e-05, "loss": 0.4634, "step": 33885 }, { "epoch": 1.7205137642624158, "grad_norm": 0.029544919593911603, "learning_rate": 5.8296228891358604e-05, "loss": 0.4532, "step": 33890 }, { "epoch": 1.7207676003604473, "grad_norm": 0.026839978594084928, "learning_rate": 5.81924639604155e-05, "loss": 0.4486, "step": 33895 }, { "epoch": 1.7210214364584786, "grad_norm": 0.02088653909861625, "learning_rate": 5.808878575408827e-05, "loss": 0.4447, "step": 33900 }, { "epoch": 1.7212752725565101, "grad_norm": 0.022797571162386343, "learning_rate": 5.798519429272875e-05, "loss": 0.4544, "step": 33905 }, { "epoch": 1.7215291086545417, "grad_norm": 0.023109589614580234, "learning_rate": 5.7881689596671226e-05, "loss": 0.4803, "step": 33910 }, { "epoch": 1.7217829447525732, "grad_norm": 0.020498287608990536, "learning_rate": 5.777827168623323e-05, "loss": 0.4638, "step": 33915 }, { "epoch": 1.7220367808506047, "grad_norm": 0.021809212367703504, "learning_rate": 5.767494058171507e-05, "loss": 0.4412, "step": 33920 }, { "epoch": 1.7222906169486363, "grad_norm": 0.01961469054428933, "learning_rate": 5.757169630340031e-05, "loss": 0.4349, "step": 33925 }, { "epoch": 1.7225444530466678, "grad_norm": 0.021210553875649015, "learning_rate": 5.7468538871555064e-05, "loss": 0.4451, "step": 33930 }, { "epoch": 1.7227982891446993, "grad_norm": 0.023689032069760745, "learning_rate": 5.736546830642886e-05, "loss": 0.4785, "step": 33935 }, { "epoch": 1.7230521252427309, "grad_norm": 0.022017968208859488, "learning_rate": 5.726248462825373e-05, "loss": 0.4336, "step": 33940 }, { "epoch": 1.7233059613407624, "grad_norm": 0.021437511540021992, "learning_rate": 5.715958785724501e-05, "loss": 0.4562, "step": 33945 }, { "epoch": 1.7235597974387937, "grad_norm": 0.021235793273451938, "learning_rate": 5.705677801360065e-05, "loss": 0.4432, "step": 33950 }, { "epoch": 1.7238136335368253, "grad_norm": 0.02204282859521283, "learning_rate": 5.69540551175019e-05, "loss": 0.4808, "step": 33955 }, { "epoch": 1.7240674696348568, "grad_norm": 0.022958015134989514, "learning_rate": 5.6851419189112575e-05, "loss": 0.4472, "step": 33960 }, { "epoch": 1.724321305732888, "grad_norm": 0.022808401192192258, "learning_rate": 5.6748870248579666e-05, "loss": 0.4495, "step": 33965 }, { "epoch": 1.7245751418309196, "grad_norm": 0.02509495051395949, "learning_rate": 5.6646408316033185e-05, "loss": 0.4662, "step": 33970 }, { "epoch": 1.7248289779289512, "grad_norm": 0.022427454109016656, "learning_rate": 5.654403341158565e-05, "loss": 0.4887, "step": 33975 }, { "epoch": 1.7250828140269827, "grad_norm": 0.02867990044631863, "learning_rate": 5.644174555533288e-05, "loss": 0.4325, "step": 33980 }, { "epoch": 1.7253366501250142, "grad_norm": 0.02527782595636069, "learning_rate": 5.633954476735337e-05, "loss": 0.455, "step": 33985 }, { "epoch": 1.7255904862230458, "grad_norm": 0.02978500394864718, "learning_rate": 5.623743106770879e-05, "loss": 0.4693, "step": 33990 }, { "epoch": 1.7258443223210773, "grad_norm": 0.021989421251067075, "learning_rate": 5.6135404476443384e-05, "loss": 0.4501, "step": 33995 }, { "epoch": 1.7260981584191089, "grad_norm": 0.02414972943152398, "learning_rate": 5.603346501358458e-05, "loss": 0.5015, "step": 34000 }, { "epoch": 1.7263519945171404, "grad_norm": 0.020959704894745596, "learning_rate": 5.593161269914249e-05, "loss": 0.4601, "step": 34005 }, { "epoch": 1.726605830615172, "grad_norm": 0.01883199147720315, "learning_rate": 5.5829847553110326e-05, "loss": 0.4297, "step": 34010 }, { "epoch": 1.7268596667132032, "grad_norm": 0.02135197011443074, "learning_rate": 5.572816959546389e-05, "loss": 0.4358, "step": 34015 }, { "epoch": 1.7271135028112348, "grad_norm": 0.02107503468536846, "learning_rate": 5.562657884616223e-05, "loss": 0.4669, "step": 34020 }, { "epoch": 1.7273673389092663, "grad_norm": 0.023771596018301224, "learning_rate": 5.5525075325147054e-05, "loss": 0.4587, "step": 34025 }, { "epoch": 1.7276211750072978, "grad_norm": 0.021587351694255002, "learning_rate": 5.542365905234309e-05, "loss": 0.4481, "step": 34030 }, { "epoch": 1.7278750111053292, "grad_norm": 0.020770474747904666, "learning_rate": 5.532233004765763e-05, "loss": 0.4439, "step": 34035 }, { "epoch": 1.7281288472033607, "grad_norm": 0.01967096579673539, "learning_rate": 5.5221088330981274e-05, "loss": 0.4629, "step": 34040 }, { "epoch": 1.7283826833013922, "grad_norm": 0.02081313356039374, "learning_rate": 5.5119933922187115e-05, "loss": 0.4447, "step": 34045 }, { "epoch": 1.7286365193994238, "grad_norm": 0.030103240651800995, "learning_rate": 5.501886684113139e-05, "loss": 0.448, "step": 34050 }, { "epoch": 1.7288903554974553, "grad_norm": 0.021992967904753014, "learning_rate": 5.491788710765289e-05, "loss": 0.4657, "step": 34055 }, { "epoch": 1.7291441915954868, "grad_norm": 0.024640038006979705, "learning_rate": 5.481699474157364e-05, "loss": 0.4678, "step": 34060 }, { "epoch": 1.7293980276935184, "grad_norm": 0.02259317629920808, "learning_rate": 5.4716189762698044e-05, "loss": 0.4526, "step": 34065 }, { "epoch": 1.72965186379155, "grad_norm": 0.021047781618106692, "learning_rate": 5.461547219081392e-05, "loss": 0.4498, "step": 34070 }, { "epoch": 1.7299056998895814, "grad_norm": 0.024835964339130236, "learning_rate": 5.4514842045691346e-05, "loss": 0.4491, "step": 34075 }, { "epoch": 1.730159535987613, "grad_norm": 0.023902080014017398, "learning_rate": 5.441429934708369e-05, "loss": 0.4293, "step": 34080 }, { "epoch": 1.7304133720856443, "grad_norm": 0.022346544604734297, "learning_rate": 5.431384411472701e-05, "loss": 0.475, "step": 34085 }, { "epoch": 1.7306672081836758, "grad_norm": 0.022715026835179515, "learning_rate": 5.421347636834001e-05, "loss": 0.4446, "step": 34090 }, { "epoch": 1.7309210442817073, "grad_norm": 0.022933158827406614, "learning_rate": 5.411319612762455e-05, "loss": 0.4259, "step": 34095 }, { "epoch": 1.7311748803797387, "grad_norm": 0.024421520760028334, "learning_rate": 5.4013003412265004e-05, "loss": 0.4691, "step": 34100 }, { "epoch": 1.7314287164777702, "grad_norm": 0.02420532978107086, "learning_rate": 5.3912898241928796e-05, "loss": 0.4405, "step": 34105 }, { "epoch": 1.7316825525758017, "grad_norm": 0.02170211562927459, "learning_rate": 5.3812880636265935e-05, "loss": 0.4729, "step": 34110 }, { "epoch": 1.7319363886738333, "grad_norm": 0.02370624185573575, "learning_rate": 5.371295061490961e-05, "loss": 0.4741, "step": 34115 }, { "epoch": 1.7321902247718648, "grad_norm": 0.021244344362853385, "learning_rate": 5.3613108197475335e-05, "loss": 0.4529, "step": 34120 }, { "epoch": 1.7324440608698963, "grad_norm": 0.02100785970567331, "learning_rate": 5.3513353403561895e-05, "loss": 0.462, "step": 34125 }, { "epoch": 1.7326978969679279, "grad_norm": 0.019808061436518504, "learning_rate": 5.3413686252750445e-05, "loss": 0.4525, "step": 34130 }, { "epoch": 1.7329517330659594, "grad_norm": 0.02187199410993585, "learning_rate": 5.3314106764605354e-05, "loss": 0.4645, "step": 34135 }, { "epoch": 1.733205569163991, "grad_norm": 0.024006944977098717, "learning_rate": 5.32146149586733e-05, "loss": 0.4742, "step": 34140 }, { "epoch": 1.7334594052620225, "grad_norm": 0.019976476111784883, "learning_rate": 5.3115210854484394e-05, "loss": 0.4409, "step": 34145 }, { "epoch": 1.7337132413600538, "grad_norm": 0.026385558195068118, "learning_rate": 5.301589447155092e-05, "loss": 0.4456, "step": 34150 }, { "epoch": 1.7339670774580853, "grad_norm": 0.021897238374891215, "learning_rate": 5.2916665829368324e-05, "loss": 0.4438, "step": 34155 }, { "epoch": 1.7342209135561168, "grad_norm": 0.024811420195424226, "learning_rate": 5.281752494741454e-05, "loss": 0.4627, "step": 34160 }, { "epoch": 1.7344747496541482, "grad_norm": 0.024672823583302583, "learning_rate": 5.2718471845150604e-05, "loss": 0.4759, "step": 34165 }, { "epoch": 1.7347285857521797, "grad_norm": 0.02181462463091039, "learning_rate": 5.261950654201997e-05, "loss": 0.4559, "step": 34170 }, { "epoch": 1.7349824218502112, "grad_norm": 0.024684395145582377, "learning_rate": 5.252062905744926e-05, "loss": 0.4422, "step": 34175 }, { "epoch": 1.7352362579482428, "grad_norm": 0.02230800553146116, "learning_rate": 5.2421839410847436e-05, "loss": 0.4637, "step": 34180 }, { "epoch": 1.7354900940462743, "grad_norm": 0.023147228626355657, "learning_rate": 5.2323137621606345e-05, "loss": 0.456, "step": 34185 }, { "epoch": 1.7357439301443058, "grad_norm": 0.02281138844618608, "learning_rate": 5.2224523709100914e-05, "loss": 0.4666, "step": 34190 }, { "epoch": 1.7359977662423374, "grad_norm": 0.022744297929036067, "learning_rate": 5.212599769268833e-05, "loss": 0.4577, "step": 34195 }, { "epoch": 1.736251602340369, "grad_norm": 0.026435757976267977, "learning_rate": 5.202755959170885e-05, "loss": 0.4792, "step": 34200 }, { "epoch": 1.7365054384384004, "grad_norm": 0.02327427177805453, "learning_rate": 5.1929209425485346e-05, "loss": 0.454, "step": 34205 }, { "epoch": 1.736759274536432, "grad_norm": 0.019262280027333455, "learning_rate": 5.1830947213323656e-05, "loss": 0.4344, "step": 34210 }, { "epoch": 1.7370131106344633, "grad_norm": 0.02020587242748104, "learning_rate": 5.17327729745119e-05, "loss": 0.4687, "step": 34215 }, { "epoch": 1.7372669467324948, "grad_norm": 0.022404415922131816, "learning_rate": 5.163468672832139e-05, "loss": 0.444, "step": 34220 }, { "epoch": 1.7375207828305264, "grad_norm": 0.022425367350743124, "learning_rate": 5.1536688494005835e-05, "loss": 0.4682, "step": 34225 }, { "epoch": 1.7377746189285577, "grad_norm": 0.022763870580745405, "learning_rate": 5.14387782908019e-05, "loss": 0.4601, "step": 34230 }, { "epoch": 1.7380284550265892, "grad_norm": 0.023042118858273286, "learning_rate": 5.134095613792872e-05, "loss": 0.4637, "step": 34235 }, { "epoch": 1.7382822911246207, "grad_norm": 0.02604482240900869, "learning_rate": 5.124322205458848e-05, "loss": 0.4382, "step": 34240 }, { "epoch": 1.7385361272226523, "grad_norm": 0.021731629118018583, "learning_rate": 5.1145576059965726e-05, "loss": 0.4654, "step": 34245 }, { "epoch": 1.7387899633206838, "grad_norm": 0.019413019416783566, "learning_rate": 5.1048018173228015e-05, "loss": 0.4529, "step": 34250 }, { "epoch": 1.7390437994187153, "grad_norm": 0.02664943630561449, "learning_rate": 5.0950548413525365e-05, "loss": 0.4562, "step": 34255 }, { "epoch": 1.7392976355167469, "grad_norm": 0.02087198766602968, "learning_rate": 5.085316679999064e-05, "loss": 0.4422, "step": 34260 }, { "epoch": 1.7395514716147784, "grad_norm": 0.021876909463953986, "learning_rate": 5.075587335173948e-05, "loss": 0.4453, "step": 34265 }, { "epoch": 1.73980530771281, "grad_norm": 0.02240544876246522, "learning_rate": 5.06586680878699e-05, "loss": 0.4302, "step": 34270 }, { "epoch": 1.7400591438108415, "grad_norm": 0.020994222583535893, "learning_rate": 5.056155102746302e-05, "loss": 0.4592, "step": 34275 }, { "epoch": 1.7403129799088728, "grad_norm": 0.022701990879272254, "learning_rate": 5.0464522189582194e-05, "loss": 0.4851, "step": 34280 }, { "epoch": 1.7405668160069043, "grad_norm": 0.027194326586476105, "learning_rate": 5.036758159327398e-05, "loss": 0.4818, "step": 34285 }, { "epoch": 1.7408206521049359, "grad_norm": 0.02078698225293263, "learning_rate": 5.027072925756709e-05, "loss": 0.4662, "step": 34290 }, { "epoch": 1.7410744882029674, "grad_norm": 0.02420724997035512, "learning_rate": 5.017396520147333e-05, "loss": 0.4687, "step": 34295 }, { "epoch": 1.7413283243009987, "grad_norm": 0.031193300288096932, "learning_rate": 5.007728944398682e-05, "loss": 0.4567, "step": 34300 }, { "epoch": 1.7415821603990302, "grad_norm": 0.020794031282449752, "learning_rate": 4.9980702004084724e-05, "loss": 0.4529, "step": 34305 }, { "epoch": 1.7418359964970618, "grad_norm": 0.022257297830772214, "learning_rate": 4.9884202900726486e-05, "loss": 0.4428, "step": 34310 }, { "epoch": 1.7420898325950933, "grad_norm": 0.019932049025810045, "learning_rate": 4.978779215285456e-05, "loss": 0.4386, "step": 34315 }, { "epoch": 1.7423436686931248, "grad_norm": 0.026391941731528173, "learning_rate": 4.9691469779393706e-05, "loss": 0.464, "step": 34320 }, { "epoch": 1.7425975047911564, "grad_norm": 0.022393195253757603, "learning_rate": 4.959523579925179e-05, "loss": 0.4803, "step": 34325 }, { "epoch": 1.742851340889188, "grad_norm": 0.029090252728671346, "learning_rate": 4.949909023131888e-05, "loss": 0.4668, "step": 34330 }, { "epoch": 1.7431051769872195, "grad_norm": 0.024897631086516752, "learning_rate": 4.940303309446798e-05, "loss": 0.4674, "step": 34335 }, { "epoch": 1.743359013085251, "grad_norm": 0.020804078169891622, "learning_rate": 4.9307064407554445e-05, "loss": 0.4664, "step": 34340 }, { "epoch": 1.7436128491832825, "grad_norm": 0.021704515629220097, "learning_rate": 4.921118418941667e-05, "loss": 0.4632, "step": 34345 }, { "epoch": 1.7438666852813138, "grad_norm": 0.024256706115496882, "learning_rate": 4.911539245887525e-05, "loss": 0.4219, "step": 34350 }, { "epoch": 1.7441205213793454, "grad_norm": 0.02298081741126669, "learning_rate": 4.901968923473382e-05, "loss": 0.4591, "step": 34355 }, { "epoch": 1.744374357477377, "grad_norm": 0.03464863168116285, "learning_rate": 4.8924074535778294e-05, "loss": 0.446, "step": 34360 }, { "epoch": 1.7446281935754082, "grad_norm": 0.02157577331468465, "learning_rate": 4.882854838077755e-05, "loss": 0.4657, "step": 34365 }, { "epoch": 1.7448820296734397, "grad_norm": 0.019964492489978992, "learning_rate": 4.873311078848264e-05, "loss": 0.4643, "step": 34370 }, { "epoch": 1.7451358657714713, "grad_norm": 0.022047232259986137, "learning_rate": 4.863776177762769e-05, "loss": 0.4639, "step": 34375 }, { "epoch": 1.7453897018695028, "grad_norm": 0.03244136132931666, "learning_rate": 4.854250136692912e-05, "loss": 0.4822, "step": 34380 }, { "epoch": 1.7456435379675344, "grad_norm": 0.026966628720723723, "learning_rate": 4.844732957508607e-05, "loss": 0.4612, "step": 34385 }, { "epoch": 1.7458973740655659, "grad_norm": 0.021335951919062863, "learning_rate": 4.8352246420780456e-05, "loss": 0.44, "step": 34390 }, { "epoch": 1.7461512101635974, "grad_norm": 0.021305484071550175, "learning_rate": 4.825725192267638e-05, "loss": 0.4573, "step": 34395 }, { "epoch": 1.746405046261629, "grad_norm": 0.021326881921091657, "learning_rate": 4.816234609942105e-05, "loss": 0.4524, "step": 34400 }, { "epoch": 1.7466588823596605, "grad_norm": 0.02188831813845589, "learning_rate": 4.806752896964373e-05, "loss": 0.4707, "step": 34405 }, { "epoch": 1.746912718457692, "grad_norm": 0.02064699692483605, "learning_rate": 4.79728005519568e-05, "loss": 0.4565, "step": 34410 }, { "epoch": 1.7471665545557233, "grad_norm": 0.021389953513019002, "learning_rate": 4.787816086495478e-05, "loss": 0.47, "step": 34415 }, { "epoch": 1.7474203906537549, "grad_norm": 0.019822070124622355, "learning_rate": 4.7783609927215145e-05, "loss": 0.4244, "step": 34420 }, { "epoch": 1.7476742267517864, "grad_norm": 0.01981125932792789, "learning_rate": 4.7689147757297605e-05, "loss": 0.4369, "step": 34425 }, { "epoch": 1.7479280628498177, "grad_norm": 0.028737375632746403, "learning_rate": 4.7594774373744766e-05, "loss": 0.4647, "step": 34430 }, { "epoch": 1.7481818989478493, "grad_norm": 0.031205219363603577, "learning_rate": 4.750048979508148e-05, "loss": 0.4611, "step": 34435 }, { "epoch": 1.7484357350458808, "grad_norm": 0.022939345811528095, "learning_rate": 4.7406294039815553e-05, "loss": 0.4711, "step": 34440 }, { "epoch": 1.7486895711439123, "grad_norm": 0.027141259222797177, "learning_rate": 4.731218712643681e-05, "loss": 0.4422, "step": 34445 }, { "epoch": 1.7489434072419439, "grad_norm": 0.027268634228652108, "learning_rate": 4.721816907341836e-05, "loss": 0.4768, "step": 34450 }, { "epoch": 1.7491972433399754, "grad_norm": 0.023701231135006304, "learning_rate": 4.712423989921527e-05, "loss": 0.4631, "step": 34455 }, { "epoch": 1.749451079438007, "grad_norm": 0.02347054862041871, "learning_rate": 4.703039962226541e-05, "loss": 0.4619, "step": 34460 }, { "epoch": 1.7497049155360385, "grad_norm": 0.028567040630409428, "learning_rate": 4.693664826098909e-05, "loss": 0.4311, "step": 34465 }, { "epoch": 1.74995875163407, "grad_norm": 0.02148438393708741, "learning_rate": 4.684298583378943e-05, "loss": 0.4438, "step": 34470 }, { "epoch": 1.7502125877321015, "grad_norm": 0.02216116188881408, "learning_rate": 4.674941235905161e-05, "loss": 0.4676, "step": 34475 }, { "epoch": 1.7504664238301328, "grad_norm": 0.022665906180228006, "learning_rate": 4.6655927855143886e-05, "loss": 0.4453, "step": 34480 }, { "epoch": 1.7507202599281644, "grad_norm": 0.022100912241708407, "learning_rate": 4.656253234041663e-05, "loss": 0.4854, "step": 34485 }, { "epoch": 1.750974096026196, "grad_norm": 0.019646671652917273, "learning_rate": 4.646922583320307e-05, "loss": 0.4738, "step": 34490 }, { "epoch": 1.7512279321242272, "grad_norm": 0.02278953025792618, "learning_rate": 4.637600835181866e-05, "loss": 0.469, "step": 34495 }, { "epoch": 1.7514817682222588, "grad_norm": 0.022089585019886025, "learning_rate": 4.6282879914561646e-05, "loss": 0.4497, "step": 34500 }, { "epoch": 1.7517356043202903, "grad_norm": 0.023445723973391685, "learning_rate": 4.6189840539712534e-05, "loss": 0.4551, "step": 34505 }, { "epoch": 1.7519894404183218, "grad_norm": 0.02715037290748338, "learning_rate": 4.609689024553459e-05, "loss": 0.462, "step": 34510 }, { "epoch": 1.7522432765163534, "grad_norm": 0.02174226702680522, "learning_rate": 4.600402905027357e-05, "loss": 0.4662, "step": 34515 }, { "epoch": 1.752497112614385, "grad_norm": 0.021279501901226617, "learning_rate": 4.5911256972157476e-05, "loss": 0.4485, "step": 34520 }, { "epoch": 1.7527509487124164, "grad_norm": 0.023627373642052033, "learning_rate": 4.581857402939721e-05, "loss": 0.4522, "step": 34525 }, { "epoch": 1.753004784810448, "grad_norm": 0.0215796445898145, "learning_rate": 4.572598024018571e-05, "loss": 0.4708, "step": 34530 }, { "epoch": 1.7532586209084795, "grad_norm": 0.022638555616672124, "learning_rate": 4.563347562269898e-05, "loss": 0.4391, "step": 34535 }, { "epoch": 1.753512457006511, "grad_norm": 0.02165756905100213, "learning_rate": 4.5541060195094965e-05, "loss": 0.4557, "step": 34540 }, { "epoch": 1.7537662931045424, "grad_norm": 0.021506773610349895, "learning_rate": 4.5448733975514524e-05, "loss": 0.4493, "step": 34545 }, { "epoch": 1.7540201292025739, "grad_norm": 0.021106012563365185, "learning_rate": 4.535649698208066e-05, "loss": 0.4566, "step": 34550 }, { "epoch": 1.7542739653006054, "grad_norm": 0.02090198796312075, "learning_rate": 4.526434923289924e-05, "loss": 0.4457, "step": 34555 }, { "epoch": 1.754527801398637, "grad_norm": 0.02136853005412373, "learning_rate": 4.517229074605822e-05, "loss": 0.4747, "step": 34560 }, { "epoch": 1.7547816374966683, "grad_norm": 0.02157527210244312, "learning_rate": 4.508032153962832e-05, "loss": 0.4704, "step": 34565 }, { "epoch": 1.7550354735946998, "grad_norm": 0.023144728143769867, "learning_rate": 4.49884416316626e-05, "loss": 0.4704, "step": 34570 }, { "epoch": 1.7552893096927313, "grad_norm": 0.020386305284952208, "learning_rate": 4.489665104019675e-05, "loss": 0.4557, "step": 34575 }, { "epoch": 1.7555431457907629, "grad_norm": 0.022166335060348437, "learning_rate": 4.4804949783248564e-05, "loss": 0.46, "step": 34580 }, { "epoch": 1.7557969818887944, "grad_norm": 0.02094210104633068, "learning_rate": 4.471333787881881e-05, "loss": 0.4561, "step": 34585 }, { "epoch": 1.756050817986826, "grad_norm": 0.02140439011369098, "learning_rate": 4.4621815344890235e-05, "loss": 0.4253, "step": 34590 }, { "epoch": 1.7563046540848575, "grad_norm": 0.023342207225477016, "learning_rate": 4.453038219942845e-05, "loss": 0.4902, "step": 34595 }, { "epoch": 1.756558490182889, "grad_norm": 0.019041730139629988, "learning_rate": 4.443903846038111e-05, "loss": 0.4525, "step": 34600 }, { "epoch": 1.7568123262809205, "grad_norm": 0.024800524118674366, "learning_rate": 4.4347784145678695e-05, "loss": 0.4606, "step": 34605 }, { "epoch": 1.7570661623789519, "grad_norm": 0.02916453322026752, "learning_rate": 4.425661927323388e-05, "loss": 0.4643, "step": 34610 }, { "epoch": 1.7573199984769834, "grad_norm": 0.024905988994358454, "learning_rate": 4.416554386094196e-05, "loss": 0.437, "step": 34615 }, { "epoch": 1.757573834575015, "grad_norm": 0.030406574388976055, "learning_rate": 4.407455792668047e-05, "loss": 0.4883, "step": 34620 }, { "epoch": 1.7578276706730465, "grad_norm": 0.020939906014040045, "learning_rate": 4.3983661488309565e-05, "loss": 0.4282, "step": 34625 }, { "epoch": 1.7580815067710778, "grad_norm": 0.022365655581909887, "learning_rate": 4.389285456367181e-05, "loss": 0.478, "step": 34630 }, { "epoch": 1.7583353428691093, "grad_norm": 0.023054617223912062, "learning_rate": 4.380213717059206e-05, "loss": 0.4749, "step": 34635 }, { "epoch": 1.7585891789671408, "grad_norm": 0.021279493374970884, "learning_rate": 4.371150932687784e-05, "loss": 0.4535, "step": 34640 }, { "epoch": 1.7588430150651724, "grad_norm": 0.021608983124179647, "learning_rate": 4.3620971050318706e-05, "loss": 0.452, "step": 34645 }, { "epoch": 1.759096851163204, "grad_norm": 0.02125252639391023, "learning_rate": 4.3530522358687045e-05, "loss": 0.4427, "step": 34650 }, { "epoch": 1.7593506872612354, "grad_norm": 0.021716546978771272, "learning_rate": 4.3440163269737374e-05, "loss": 0.4551, "step": 34655 }, { "epoch": 1.759604523359267, "grad_norm": 0.022723743581024202, "learning_rate": 4.334989380120691e-05, "loss": 0.4373, "step": 34660 }, { "epoch": 1.7598583594572985, "grad_norm": 0.023783058996820775, "learning_rate": 4.3259713970814904e-05, "loss": 0.471, "step": 34665 }, { "epoch": 1.76011219555533, "grad_norm": 0.022842675520249336, "learning_rate": 4.316962379626333e-05, "loss": 0.4575, "step": 34670 }, { "epoch": 1.7603660316533616, "grad_norm": 0.024246326137658224, "learning_rate": 4.3079623295236345e-05, "loss": 0.4028, "step": 34675 }, { "epoch": 1.760619867751393, "grad_norm": 0.025535918723743688, "learning_rate": 4.298971248540068e-05, "loss": 0.4636, "step": 34680 }, { "epoch": 1.7608737038494244, "grad_norm": 0.022390889197142872, "learning_rate": 4.2899891384405196e-05, "loss": 0.4283, "step": 34685 }, { "epoch": 1.761127539947456, "grad_norm": 0.025131303601364605, "learning_rate": 4.281016000988169e-05, "loss": 0.4524, "step": 34690 }, { "epoch": 1.7613813760454873, "grad_norm": 0.02186743401814803, "learning_rate": 4.2720518379443684e-05, "loss": 0.4497, "step": 34695 }, { "epoch": 1.7616352121435188, "grad_norm": 0.02114715992999132, "learning_rate": 4.263096651068754e-05, "loss": 0.4662, "step": 34700 }, { "epoch": 1.7618890482415503, "grad_norm": 0.028404232206161735, "learning_rate": 4.254150442119164e-05, "loss": 0.4742, "step": 34705 }, { "epoch": 1.7621428843395819, "grad_norm": 0.02630860958767224, "learning_rate": 4.2452132128517226e-05, "loss": 0.4296, "step": 34710 }, { "epoch": 1.7623967204376134, "grad_norm": 0.024673175731050732, "learning_rate": 4.236284965020737e-05, "loss": 0.4308, "step": 34715 }, { "epoch": 1.762650556535645, "grad_norm": 0.02180317292962621, "learning_rate": 4.227365700378799e-05, "loss": 0.4469, "step": 34720 }, { "epoch": 1.7629043926336765, "grad_norm": 0.0214905144095388, "learning_rate": 4.2184554206767034e-05, "loss": 0.4723, "step": 34725 }, { "epoch": 1.763158228731708, "grad_norm": 0.021720539302091973, "learning_rate": 4.209554127663495e-05, "loss": 0.4465, "step": 34730 }, { "epoch": 1.7634120648297396, "grad_norm": 0.02368170581782568, "learning_rate": 4.200661823086454e-05, "loss": 0.4699, "step": 34735 }, { "epoch": 1.763665900927771, "grad_norm": 0.022332063931247034, "learning_rate": 4.191778508691102e-05, "loss": 0.4846, "step": 34740 }, { "epoch": 1.7639197370258024, "grad_norm": 0.021873430868819217, "learning_rate": 4.182904186221176e-05, "loss": 0.4475, "step": 34745 }, { "epoch": 1.764173573123834, "grad_norm": 0.018510023864084377, "learning_rate": 4.174038857418666e-05, "loss": 0.4087, "step": 34750 }, { "epoch": 1.7644274092218655, "grad_norm": 0.020083902539849405, "learning_rate": 4.165182524023803e-05, "loss": 0.4578, "step": 34755 }, { "epoch": 1.7646812453198968, "grad_norm": 0.01956729272999206, "learning_rate": 4.156335187775029e-05, "loss": 0.432, "step": 34760 }, { "epoch": 1.7649350814179283, "grad_norm": 0.033417341393678385, "learning_rate": 4.1474968504090385e-05, "loss": 0.4514, "step": 34765 }, { "epoch": 1.7651889175159599, "grad_norm": 0.027037751978040633, "learning_rate": 4.1386675136607434e-05, "loss": 0.4497, "step": 34770 }, { "epoch": 1.7654427536139914, "grad_norm": 0.02221486375997609, "learning_rate": 4.129847179263318e-05, "loss": 0.4261, "step": 34775 }, { "epoch": 1.765696589712023, "grad_norm": 0.02085106880362318, "learning_rate": 4.121035848948124e-05, "loss": 0.4639, "step": 34780 }, { "epoch": 1.7659504258100545, "grad_norm": 0.023019324461147312, "learning_rate": 4.112233524444803e-05, "loss": 0.4712, "step": 34785 }, { "epoch": 1.766204261908086, "grad_norm": 0.019887284736623528, "learning_rate": 4.103440207481196e-05, "loss": 0.4421, "step": 34790 }, { "epoch": 1.7664580980061175, "grad_norm": 0.0244559527736879, "learning_rate": 4.094655899783395e-05, "loss": 0.4429, "step": 34795 }, { "epoch": 1.766711934104149, "grad_norm": 0.023135446169896694, "learning_rate": 4.085880603075703e-05, "loss": 0.435, "step": 34800 }, { "epoch": 1.7669657702021806, "grad_norm": 0.020742048909337243, "learning_rate": 4.077114319080671e-05, "loss": 0.4307, "step": 34805 }, { "epoch": 1.767219606300212, "grad_norm": 0.02392298449389485, "learning_rate": 4.068357049519089e-05, "loss": 0.4433, "step": 34810 }, { "epoch": 1.7674734423982434, "grad_norm": 0.023622721434002313, "learning_rate": 4.0596087961099595e-05, "loss": 0.4809, "step": 34815 }, { "epoch": 1.767727278496275, "grad_norm": 0.02007693135153689, "learning_rate": 4.0508695605705136e-05, "loss": 0.443, "step": 34820 }, { "epoch": 1.7679811145943063, "grad_norm": 0.020604137444585325, "learning_rate": 4.042139344616236e-05, "loss": 0.45, "step": 34825 }, { "epoch": 1.7682349506923378, "grad_norm": 0.021241405566437443, "learning_rate": 4.033418149960799e-05, "loss": 0.4537, "step": 34830 }, { "epoch": 1.7684887867903694, "grad_norm": 0.02142638288944903, "learning_rate": 4.0247059783161565e-05, "loss": 0.4272, "step": 34835 }, { "epoch": 1.768742622888401, "grad_norm": 0.022520249913372568, "learning_rate": 4.0160028313924456e-05, "loss": 0.4789, "step": 34840 }, { "epoch": 1.7689964589864324, "grad_norm": 0.021175421917149957, "learning_rate": 4.007308710898061e-05, "loss": 0.4452, "step": 34845 }, { "epoch": 1.769250295084464, "grad_norm": 0.020501338142536445, "learning_rate": 3.998623618539604e-05, "loss": 0.4764, "step": 34850 }, { "epoch": 1.7695041311824955, "grad_norm": 0.019904516522167997, "learning_rate": 3.9899475560219336e-05, "loss": 0.4533, "step": 34855 }, { "epoch": 1.769757967280527, "grad_norm": 0.023716865857408845, "learning_rate": 3.981280525048098e-05, "loss": 0.4717, "step": 34860 }, { "epoch": 1.7700118033785586, "grad_norm": 0.020968802334100606, "learning_rate": 3.972622527319397e-05, "loss": 0.4447, "step": 34865 }, { "epoch": 1.77026563947659, "grad_norm": 0.0220213821940423, "learning_rate": 3.963973564535361e-05, "loss": 0.4418, "step": 34870 }, { "epoch": 1.7705194755746214, "grad_norm": 0.020738385170455637, "learning_rate": 3.955333638393732e-05, "loss": 0.434, "step": 34875 }, { "epoch": 1.770773311672653, "grad_norm": 0.023096359415643572, "learning_rate": 3.9467027505904916e-05, "loss": 0.4299, "step": 34880 }, { "epoch": 1.7710271477706845, "grad_norm": 0.0199432213339235, "learning_rate": 3.938080902819824e-05, "loss": 0.4624, "step": 34885 }, { "epoch": 1.771280983868716, "grad_norm": 0.026960951902493344, "learning_rate": 3.929468096774175e-05, "loss": 0.4648, "step": 34890 }, { "epoch": 1.7715348199667473, "grad_norm": 0.025299529980017853, "learning_rate": 3.92086433414417e-05, "loss": 0.4428, "step": 34895 }, { "epoch": 1.7717886560647789, "grad_norm": 0.02174184002171596, "learning_rate": 3.9122696166187186e-05, "loss": 0.4534, "step": 34900 }, { "epoch": 1.7720424921628104, "grad_norm": 0.024345802063285356, "learning_rate": 3.903683945884884e-05, "loss": 0.4454, "step": 34905 }, { "epoch": 1.772296328260842, "grad_norm": 0.023452333877164416, "learning_rate": 3.895107323628022e-05, "loss": 0.4538, "step": 34910 }, { "epoch": 1.7725501643588735, "grad_norm": 0.019713235902259953, "learning_rate": 3.8865397515316645e-05, "loss": 0.442, "step": 34915 }, { "epoch": 1.772804000456905, "grad_norm": 0.01862175117732225, "learning_rate": 3.8779812312775885e-05, "loss": 0.4352, "step": 34920 }, { "epoch": 1.7730578365549365, "grad_norm": 0.02407320867951667, "learning_rate": 3.869431764545772e-05, "loss": 0.4435, "step": 34925 }, { "epoch": 1.773311672652968, "grad_norm": 0.02675629662340058, "learning_rate": 3.860891353014462e-05, "loss": 0.4357, "step": 34930 }, { "epoch": 1.7735655087509996, "grad_norm": 0.027161122772953582, "learning_rate": 3.8523599983600776e-05, "loss": 0.4311, "step": 34935 }, { "epoch": 1.7738193448490311, "grad_norm": 0.023763895197796703, "learning_rate": 3.843837702257291e-05, "loss": 0.4361, "step": 34940 }, { "epoch": 1.7740731809470625, "grad_norm": 0.02520329920417153, "learning_rate": 3.835324466378981e-05, "loss": 0.4608, "step": 34945 }, { "epoch": 1.774327017045094, "grad_norm": 0.026335133730435882, "learning_rate": 3.82682029239626e-05, "loss": 0.4425, "step": 34950 }, { "epoch": 1.7745808531431255, "grad_norm": 0.02077693291797904, "learning_rate": 3.8183251819784436e-05, "loss": 0.4415, "step": 34955 }, { "epoch": 1.7748346892411568, "grad_norm": 0.021994750743455038, "learning_rate": 3.8098391367930976e-05, "loss": 0.4816, "step": 34960 }, { "epoch": 1.7750885253391884, "grad_norm": 0.022292288456564727, "learning_rate": 3.8013621585059665e-05, "loss": 0.4741, "step": 34965 }, { "epoch": 1.77534236143722, "grad_norm": 0.022199822217541125, "learning_rate": 3.7928942487810594e-05, "loss": 0.4602, "step": 34970 }, { "epoch": 1.7755961975352514, "grad_norm": 0.026501730743718647, "learning_rate": 3.7844354092805735e-05, "loss": 0.451, "step": 34975 }, { "epoch": 1.775850033633283, "grad_norm": 0.02266914903426037, "learning_rate": 3.775985641664942e-05, "loss": 0.4661, "step": 34980 }, { "epoch": 1.7761038697313145, "grad_norm": 0.01942723439600507, "learning_rate": 3.767544947592805e-05, "loss": 0.4289, "step": 34985 }, { "epoch": 1.776357705829346, "grad_norm": 0.021825648976358702, "learning_rate": 3.759113328721036e-05, "loss": 0.433, "step": 34990 }, { "epoch": 1.7766115419273776, "grad_norm": 0.023665285889932587, "learning_rate": 3.750690786704725e-05, "loss": 0.4506, "step": 34995 }, { "epoch": 1.7768653780254091, "grad_norm": 0.022461997989559473, "learning_rate": 3.742277323197158e-05, "loss": 0.4552, "step": 35000 }, { "epoch": 1.7771192141234406, "grad_norm": 0.024581088277719675, "learning_rate": 3.733872939849875e-05, "loss": 0.4634, "step": 35005 }, { "epoch": 1.777373050221472, "grad_norm": 0.026586649366082245, "learning_rate": 3.725477638312591e-05, "loss": 0.4461, "step": 35010 }, { "epoch": 1.7776268863195035, "grad_norm": 0.029002282206786262, "learning_rate": 3.717091420233293e-05, "loss": 0.4392, "step": 35015 }, { "epoch": 1.777880722417535, "grad_norm": 0.025326290885756917, "learning_rate": 3.708714287258125e-05, "loss": 0.4314, "step": 35020 }, { "epoch": 1.7781345585155663, "grad_norm": 0.025316121879115406, "learning_rate": 3.700346241031494e-05, "loss": 0.4434, "step": 35025 }, { "epoch": 1.7783883946135979, "grad_norm": 0.02063467888087572, "learning_rate": 3.691987283195991e-05, "loss": 0.4665, "step": 35030 }, { "epoch": 1.7786422307116294, "grad_norm": 0.022082244394205426, "learning_rate": 3.68363741539246e-05, "loss": 0.4657, "step": 35035 }, { "epoch": 1.778896066809661, "grad_norm": 0.024010484572527937, "learning_rate": 3.675296639259912e-05, "loss": 0.4751, "step": 35040 }, { "epoch": 1.7791499029076925, "grad_norm": 0.020949059566116365, "learning_rate": 3.66696495643562e-05, "loss": 0.4485, "step": 35045 }, { "epoch": 1.779403739005724, "grad_norm": 0.02321244261448484, "learning_rate": 3.6586423685550374e-05, "loss": 0.4597, "step": 35050 }, { "epoch": 1.7796575751037556, "grad_norm": 0.02673174516429816, "learning_rate": 3.6503288772518626e-05, "loss": 0.4677, "step": 35055 }, { "epoch": 1.779911411201787, "grad_norm": 0.020598141046026204, "learning_rate": 3.64202448415798e-05, "loss": 0.4393, "step": 35060 }, { "epoch": 1.7801652472998186, "grad_norm": 0.02209046479234219, "learning_rate": 3.6337291909035065e-05, "loss": 0.4413, "step": 35065 }, { "epoch": 1.7804190833978502, "grad_norm": 0.027347973082832013, "learning_rate": 3.625442999116763e-05, "loss": 0.4273, "step": 35070 }, { "epoch": 1.7806729194958815, "grad_norm": 0.02196378431429979, "learning_rate": 3.6171659104242914e-05, "loss": 0.4579, "step": 35075 }, { "epoch": 1.780926755593913, "grad_norm": 0.03402444971932555, "learning_rate": 3.608897926450838e-05, "loss": 0.4532, "step": 35080 }, { "epoch": 1.7811805916919445, "grad_norm": 0.025631046835950563, "learning_rate": 3.600639048819371e-05, "loss": 0.4314, "step": 35085 }, { "epoch": 1.7814344277899758, "grad_norm": 0.023888969294360688, "learning_rate": 3.592389279151065e-05, "loss": 0.4677, "step": 35090 }, { "epoch": 1.7816882638880074, "grad_norm": 0.020867066218234958, "learning_rate": 3.584148619065314e-05, "loss": 0.4444, "step": 35095 }, { "epoch": 1.781942099986039, "grad_norm": 0.023266323128579264, "learning_rate": 3.575917070179702e-05, "loss": 0.4331, "step": 35100 }, { "epoch": 1.7821959360840705, "grad_norm": 0.02512759163952341, "learning_rate": 3.567694634110058e-05, "loss": 0.4822, "step": 35105 }, { "epoch": 1.782449772182102, "grad_norm": 0.021294948101753024, "learning_rate": 3.559481312470403e-05, "loss": 0.4744, "step": 35110 }, { "epoch": 1.7827036082801335, "grad_norm": 0.021426087304758333, "learning_rate": 3.551277106872963e-05, "loss": 0.4677, "step": 35115 }, { "epoch": 1.782957444378165, "grad_norm": 0.02346702339510721, "learning_rate": 3.5430820189281954e-05, "loss": 0.4692, "step": 35120 }, { "epoch": 1.7832112804761966, "grad_norm": 0.020925318483005197, "learning_rate": 3.53489605024474e-05, "loss": 0.4804, "step": 35125 }, { "epoch": 1.7834651165742281, "grad_norm": 0.024026448655141502, "learning_rate": 3.526719202429474e-05, "loss": 0.4871, "step": 35130 }, { "epoch": 1.7837189526722597, "grad_norm": 0.02135052995935449, "learning_rate": 3.518551477087462e-05, "loss": 0.4451, "step": 35135 }, { "epoch": 1.783972788770291, "grad_norm": 0.03284714110137373, "learning_rate": 3.5103928758219995e-05, "loss": 0.4731, "step": 35140 }, { "epoch": 1.7842266248683225, "grad_norm": 0.021457290842253285, "learning_rate": 3.5022434002345615e-05, "loss": 0.4752, "step": 35145 }, { "epoch": 1.784480460966354, "grad_norm": 0.030062390522041568, "learning_rate": 3.4941030519248685e-05, "loss": 0.4536, "step": 35150 }, { "epoch": 1.7847342970643856, "grad_norm": 0.02160896181203855, "learning_rate": 3.485971832490814e-05, "loss": 0.4567, "step": 35155 }, { "epoch": 1.784988133162417, "grad_norm": 0.022948769188133194, "learning_rate": 3.477849743528533e-05, "loss": 0.45, "step": 35160 }, { "epoch": 1.7852419692604484, "grad_norm": 0.023052274795903466, "learning_rate": 3.469736786632327e-05, "loss": 0.4513, "step": 35165 }, { "epoch": 1.78549580535848, "grad_norm": 0.02387145233098405, "learning_rate": 3.461632963394756e-05, "loss": 0.4534, "step": 35170 }, { "epoch": 1.7857496414565115, "grad_norm": 0.02398398740621941, "learning_rate": 3.453538275406542e-05, "loss": 0.454, "step": 35175 }, { "epoch": 1.786003477554543, "grad_norm": 0.024153894469605582, "learning_rate": 3.445452724256648e-05, "loss": 0.4466, "step": 35180 }, { "epoch": 1.7862573136525746, "grad_norm": 0.02474750346228495, "learning_rate": 3.437376311532209e-05, "loss": 0.4586, "step": 35185 }, { "epoch": 1.786511149750606, "grad_norm": 0.021820142241289246, "learning_rate": 3.4293090388185955e-05, "loss": 0.4259, "step": 35190 }, { "epoch": 1.7867649858486376, "grad_norm": 0.021702335087422333, "learning_rate": 3.421250907699369e-05, "loss": 0.4538, "step": 35195 }, { "epoch": 1.7870188219466692, "grad_norm": 0.025281488724625104, "learning_rate": 3.413201919756304e-05, "loss": 0.4311, "step": 35200 }, { "epoch": 1.7872726580447007, "grad_norm": 0.022103622641024265, "learning_rate": 3.4051620765693734e-05, "loss": 0.4543, "step": 35205 }, { "epoch": 1.787526494142732, "grad_norm": 0.020184392908854817, "learning_rate": 3.3971313797167555e-05, "loss": 0.4551, "step": 35210 }, { "epoch": 1.7877803302407635, "grad_norm": 0.024327540101643993, "learning_rate": 3.389109830774845e-05, "loss": 0.462, "step": 35215 }, { "epoch": 1.788034166338795, "grad_norm": 0.018786966111353623, "learning_rate": 3.38109743131822e-05, "loss": 0.4489, "step": 35220 }, { "epoch": 1.7882880024368264, "grad_norm": 0.024298565666928316, "learning_rate": 3.373094182919678e-05, "loss": 0.4377, "step": 35225 }, { "epoch": 1.788541838534858, "grad_norm": 0.025256385511008395, "learning_rate": 3.3651000871502245e-05, "loss": 0.4503, "step": 35230 }, { "epoch": 1.7887956746328895, "grad_norm": 0.02722898777164149, "learning_rate": 3.357115145579059e-05, "loss": 0.4697, "step": 35235 }, { "epoch": 1.789049510730921, "grad_norm": 0.022525515289120692, "learning_rate": 3.3491393597735786e-05, "loss": 0.4664, "step": 35240 }, { "epoch": 1.7893033468289525, "grad_norm": 0.02377488171334522, "learning_rate": 3.341172731299402e-05, "loss": 0.4654, "step": 35245 }, { "epoch": 1.789557182926984, "grad_norm": 0.020576451658282147, "learning_rate": 3.3332152617203237e-05, "loss": 0.4412, "step": 35250 }, { "epoch": 1.7898110190250156, "grad_norm": 0.019862554906292372, "learning_rate": 3.325266952598366e-05, "loss": 0.4613, "step": 35255 }, { "epoch": 1.7900648551230471, "grad_norm": 0.025920687548760694, "learning_rate": 3.317327805493736e-05, "loss": 0.4409, "step": 35260 }, { "epoch": 1.7903186912210787, "grad_norm": 0.025050850533418417, "learning_rate": 3.3093978219648605e-05, "loss": 0.487, "step": 35265 }, { "epoch": 1.7905725273191102, "grad_norm": 0.021263072671365613, "learning_rate": 3.3014770035683315e-05, "loss": 0.449, "step": 35270 }, { "epoch": 1.7908263634171415, "grad_norm": 0.021022969296222715, "learning_rate": 3.293565351858996e-05, "loss": 0.4434, "step": 35275 }, { "epoch": 1.791080199515173, "grad_norm": 0.02133416611869445, "learning_rate": 3.285662868389849e-05, "loss": 0.4612, "step": 35280 }, { "epoch": 1.7913340356132046, "grad_norm": 0.021363627371597606, "learning_rate": 3.2777695547121236e-05, "loss": 0.4463, "step": 35285 }, { "epoch": 1.791587871711236, "grad_norm": 0.023294959319734358, "learning_rate": 3.269885412375223e-05, "loss": 0.4843, "step": 35290 }, { "epoch": 1.7918417078092674, "grad_norm": 0.020389213955970452, "learning_rate": 3.262010442926772e-05, "loss": 0.4228, "step": 35295 }, { "epoch": 1.792095543907299, "grad_norm": 0.020902326598321523, "learning_rate": 3.254144647912599e-05, "loss": 0.4467, "step": 35300 }, { "epoch": 1.7923493800053305, "grad_norm": 0.029630242897729194, "learning_rate": 3.246288028876704e-05, "loss": 0.4303, "step": 35305 }, { "epoch": 1.792603216103362, "grad_norm": 0.02027047342735473, "learning_rate": 3.2384405873613134e-05, "loss": 0.4566, "step": 35310 }, { "epoch": 1.7928570522013936, "grad_norm": 0.02183801483079023, "learning_rate": 3.2306023249068285e-05, "loss": 0.4414, "step": 35315 }, { "epoch": 1.793110888299425, "grad_norm": 0.022104767365956915, "learning_rate": 3.22277324305188e-05, "loss": 0.4563, "step": 35320 }, { "epoch": 1.7933647243974566, "grad_norm": 0.02103106803838072, "learning_rate": 3.214953343333255e-05, "loss": 0.4399, "step": 35325 }, { "epoch": 1.7936185604954882, "grad_norm": 0.025480846033686194, "learning_rate": 3.20714262728598e-05, "loss": 0.4418, "step": 35330 }, { "epoch": 1.7938723965935197, "grad_norm": 0.026910726059110684, "learning_rate": 3.1993410964432424e-05, "loss": 0.4787, "step": 35335 }, { "epoch": 1.794126232691551, "grad_norm": 0.03201969008252627, "learning_rate": 3.1915487523364596e-05, "loss": 0.4484, "step": 35340 }, { "epoch": 1.7943800687895826, "grad_norm": 0.030531459636693972, "learning_rate": 3.18376559649522e-05, "loss": 0.4485, "step": 35345 }, { "epoch": 1.794633904887614, "grad_norm": 0.02274451247134649, "learning_rate": 3.175991630447322e-05, "loss": 0.4718, "step": 35350 }, { "epoch": 1.7948877409856454, "grad_norm": 0.02435117079584466, "learning_rate": 3.1682268557187535e-05, "loss": 0.436, "step": 35355 }, { "epoch": 1.795141577083677, "grad_norm": 0.024073899530536718, "learning_rate": 3.160471273833709e-05, "loss": 0.4625, "step": 35360 }, { "epoch": 1.7953954131817085, "grad_norm": 0.023077987822385543, "learning_rate": 3.152724886314562e-05, "loss": 0.4367, "step": 35365 }, { "epoch": 1.79564924927974, "grad_norm": 0.024014102288831315, "learning_rate": 3.1449876946819e-05, "loss": 0.4771, "step": 35370 }, { "epoch": 1.7959030853777715, "grad_norm": 0.022994443638095826, "learning_rate": 3.137259700454481e-05, "loss": 0.4728, "step": 35375 }, { "epoch": 1.796156921475803, "grad_norm": 0.019915951884140817, "learning_rate": 3.129540905149281e-05, "loss": 0.4655, "step": 35380 }, { "epoch": 1.7964107575738346, "grad_norm": 0.022563204784642146, "learning_rate": 3.121831310281459e-05, "loss": 0.4764, "step": 35385 }, { "epoch": 1.7966645936718662, "grad_norm": 0.02124227375276603, "learning_rate": 3.114130917364372e-05, "loss": 0.4419, "step": 35390 }, { "epoch": 1.7969184297698977, "grad_norm": 0.023126356296767688, "learning_rate": 3.10643972790956e-05, "loss": 0.4736, "step": 35395 }, { "epoch": 1.7971722658679292, "grad_norm": 0.026053218593494146, "learning_rate": 3.098757743426778e-05, "loss": 0.4739, "step": 35400 }, { "epoch": 1.7974261019659605, "grad_norm": 0.023364066512373553, "learning_rate": 3.0910849654239456e-05, "loss": 0.4504, "step": 35405 }, { "epoch": 1.797679938063992, "grad_norm": 0.027082071431975455, "learning_rate": 3.0834213954072046e-05, "loss": 0.4452, "step": 35410 }, { "epoch": 1.7979337741620236, "grad_norm": 0.02380009827362938, "learning_rate": 3.0757670348808774e-05, "loss": 0.4494, "step": 35415 }, { "epoch": 1.7981876102600551, "grad_norm": 0.033607420854026, "learning_rate": 3.0681218853474636e-05, "loss": 0.4235, "step": 35420 }, { "epoch": 1.7984414463580864, "grad_norm": 0.02321159683766678, "learning_rate": 3.0604859483076785e-05, "loss": 0.4543, "step": 35425 }, { "epoch": 1.798695282456118, "grad_norm": 0.03605534174130652, "learning_rate": 3.0528592252604126e-05, "loss": 0.4683, "step": 35430 }, { "epoch": 1.7989491185541495, "grad_norm": 0.024039033525210644, "learning_rate": 3.045241717702757e-05, "loss": 0.4711, "step": 35435 }, { "epoch": 1.799202954652181, "grad_norm": 0.022549638573687806, "learning_rate": 3.0376334271299878e-05, "loss": 0.467, "step": 35440 }, { "epoch": 1.7994567907502126, "grad_norm": 0.021025752959848737, "learning_rate": 3.0300343550355767e-05, "loss": 0.4471, "step": 35445 }, { "epoch": 1.7997106268482441, "grad_norm": 0.02042717498913915, "learning_rate": 3.0224445029111812e-05, "loss": 0.4593, "step": 35450 }, { "epoch": 1.7999644629462757, "grad_norm": 0.01959098368966413, "learning_rate": 3.0148638722466593e-05, "loss": 0.4566, "step": 35455 }, { "epoch": 1.8002182990443072, "grad_norm": 0.0388203089794261, "learning_rate": 3.007292464530037e-05, "loss": 0.4778, "step": 35460 }, { "epoch": 1.8004721351423387, "grad_norm": 0.022497742943539102, "learning_rate": 2.9997302812475592e-05, "loss": 0.4565, "step": 35465 }, { "epoch": 1.80072597124037, "grad_norm": 0.023761047149701668, "learning_rate": 2.9921773238836215e-05, "loss": 0.4653, "step": 35470 }, { "epoch": 1.8009798073384016, "grad_norm": 0.021599826773256652, "learning_rate": 2.9846335939208602e-05, "loss": 0.437, "step": 35475 }, { "epoch": 1.801233643436433, "grad_norm": 0.02511013717130302, "learning_rate": 2.9770990928400575e-05, "loss": 0.4416, "step": 35480 }, { "epoch": 1.8014874795344646, "grad_norm": 0.02204277337871591, "learning_rate": 2.969573822120203e-05, "loss": 0.4594, "step": 35485 }, { "epoch": 1.801741315632496, "grad_norm": 0.022231649077976815, "learning_rate": 2.9620577832384643e-05, "loss": 0.4725, "step": 35490 }, { "epoch": 1.8019951517305275, "grad_norm": 0.02225484721568446, "learning_rate": 2.9545509776702062e-05, "loss": 0.448, "step": 35495 }, { "epoch": 1.802248987828559, "grad_norm": 0.022235212450463348, "learning_rate": 2.947053406888972e-05, "loss": 0.4285, "step": 35500 }, { "epoch": 1.8025028239265906, "grad_norm": 0.020744906938457715, "learning_rate": 2.939565072366507e-05, "loss": 0.4364, "step": 35505 }, { "epoch": 1.802756660024622, "grad_norm": 0.02218934079642219, "learning_rate": 2.9320859755727238e-05, "loss": 0.4372, "step": 35510 }, { "epoch": 1.8030104961226536, "grad_norm": 0.021558249002831046, "learning_rate": 2.9246161179757425e-05, "loss": 0.4534, "step": 35515 }, { "epoch": 1.8032643322206852, "grad_norm": 0.02400375345216303, "learning_rate": 2.9171555010418404e-05, "loss": 0.43, "step": 35520 }, { "epoch": 1.8035181683187167, "grad_norm": 0.0224200032122597, "learning_rate": 2.909704126235524e-05, "loss": 0.4536, "step": 35525 }, { "epoch": 1.8037720044167482, "grad_norm": 0.023048378811341624, "learning_rate": 2.9022619950194395e-05, "loss": 0.4554, "step": 35530 }, { "epoch": 1.8040258405147798, "grad_norm": 0.026029749704731063, "learning_rate": 2.8948291088544522e-05, "loss": 0.4645, "step": 35535 }, { "epoch": 1.804279676612811, "grad_norm": 0.019985537420418396, "learning_rate": 2.8874054691996054e-05, "loss": 0.445, "step": 35540 }, { "epoch": 1.8045335127108426, "grad_norm": 0.022666606334347597, "learning_rate": 2.8799910775121008e-05, "loss": 0.4507, "step": 35545 }, { "epoch": 1.8047873488088741, "grad_norm": 0.020489511769417657, "learning_rate": 2.8725859352473737e-05, "loss": 0.4697, "step": 35550 }, { "epoch": 1.8050411849069055, "grad_norm": 0.02213113768181123, "learning_rate": 2.865190043858995e-05, "loss": 0.4638, "step": 35555 }, { "epoch": 1.805295021004937, "grad_norm": 0.028290545411715884, "learning_rate": 2.8578034047987587e-05, "loss": 0.4496, "step": 35560 }, { "epoch": 1.8055488571029685, "grad_norm": 0.02059738323425534, "learning_rate": 2.8504260195166055e-05, "loss": 0.4598, "step": 35565 }, { "epoch": 1.805802693201, "grad_norm": 0.02560738205998867, "learning_rate": 2.8430578894606985e-05, "loss": 0.4699, "step": 35570 }, { "epoch": 1.8060565292990316, "grad_norm": 0.02077551935432808, "learning_rate": 2.8356990160773534e-05, "loss": 0.4605, "step": 35575 }, { "epoch": 1.8063103653970631, "grad_norm": 0.02698118825215807, "learning_rate": 2.8283494008110867e-05, "loss": 0.4439, "step": 35580 }, { "epoch": 1.8065642014950947, "grad_norm": 0.021234911775022253, "learning_rate": 2.821009045104578e-05, "loss": 0.4599, "step": 35585 }, { "epoch": 1.8068180375931262, "grad_norm": 0.023435049130171297, "learning_rate": 2.8136779503987186e-05, "loss": 0.4547, "step": 35590 }, { "epoch": 1.8070718736911577, "grad_norm": 0.022261106315884807, "learning_rate": 2.8063561181325526e-05, "loss": 0.4291, "step": 35595 }, { "epoch": 1.8073257097891893, "grad_norm": 0.025676143757082985, "learning_rate": 2.7990435497433408e-05, "loss": 0.4872, "step": 35600 }, { "epoch": 1.8075795458872206, "grad_norm": 0.02382218211439765, "learning_rate": 2.79174024666648e-05, "loss": 0.4365, "step": 35605 }, { "epoch": 1.8078333819852521, "grad_norm": 0.022263485578407526, "learning_rate": 2.7844462103355838e-05, "loss": 0.4421, "step": 35610 }, { "epoch": 1.8080872180832837, "grad_norm": 0.025275647713049482, "learning_rate": 2.7771614421824297e-05, "loss": 0.471, "step": 35615 }, { "epoch": 1.808341054181315, "grad_norm": 0.019588771701611427, "learning_rate": 2.769885943636996e-05, "loss": 0.4582, "step": 35620 }, { "epoch": 1.8085948902793465, "grad_norm": 0.020818530567243063, "learning_rate": 2.7626197161274014e-05, "loss": 0.4502, "step": 35625 }, { "epoch": 1.808848726377378, "grad_norm": 0.022388154883143465, "learning_rate": 2.7553627610799938e-05, "loss": 0.4305, "step": 35630 }, { "epoch": 1.8091025624754096, "grad_norm": 0.0278131432647123, "learning_rate": 2.748115079919261e-05, "loss": 0.4423, "step": 35635 }, { "epoch": 1.809356398573441, "grad_norm": 0.0214926109026721, "learning_rate": 2.7408766740678994e-05, "loss": 0.4498, "step": 35640 }, { "epoch": 1.8096102346714726, "grad_norm": 0.020383587231959203, "learning_rate": 2.73364754494676e-05, "loss": 0.4468, "step": 35645 }, { "epoch": 1.8098640707695042, "grad_norm": 0.023441844532656696, "learning_rate": 2.7264276939748923e-05, "loss": 0.4376, "step": 35650 }, { "epoch": 1.8101179068675357, "grad_norm": 0.021868685315593866, "learning_rate": 2.7192171225695172e-05, "loss": 0.4606, "step": 35655 }, { "epoch": 1.8103717429655672, "grad_norm": 0.023969847865051304, "learning_rate": 2.712015832146031e-05, "loss": 0.4675, "step": 35660 }, { "epoch": 1.8106255790635988, "grad_norm": 0.022850546221913993, "learning_rate": 2.7048238241180133e-05, "loss": 0.4327, "step": 35665 }, { "epoch": 1.81087941516163, "grad_norm": 0.02192908510076702, "learning_rate": 2.6976410998972134e-05, "loss": 0.4755, "step": 35670 }, { "epoch": 1.8111332512596616, "grad_norm": 0.023714661280836886, "learning_rate": 2.690467660893575e-05, "loss": 0.4509, "step": 35675 }, { "epoch": 1.8113870873576932, "grad_norm": 0.023176233911916783, "learning_rate": 2.6833035085152003e-05, "loss": 0.4615, "step": 35680 }, { "epoch": 1.8116409234557245, "grad_norm": 0.023676313879505324, "learning_rate": 2.6761486441683802e-05, "loss": 0.4545, "step": 35685 }, { "epoch": 1.811894759553756, "grad_norm": 0.03016670003145498, "learning_rate": 2.669003069257575e-05, "loss": 0.4299, "step": 35690 }, { "epoch": 1.8121485956517875, "grad_norm": 0.022836140384253037, "learning_rate": 2.661866785185435e-05, "loss": 0.4544, "step": 35695 }, { "epoch": 1.812402431749819, "grad_norm": 0.022515164600397174, "learning_rate": 2.6547397933527562e-05, "loss": 0.4482, "step": 35700 }, { "epoch": 1.8126562678478506, "grad_norm": 0.023784234188635057, "learning_rate": 2.6476220951585582e-05, "loss": 0.4566, "step": 35705 }, { "epoch": 1.8129101039458821, "grad_norm": 0.021185807136934295, "learning_rate": 2.640513691999985e-05, "loss": 0.454, "step": 35710 }, { "epoch": 1.8131639400439137, "grad_norm": 0.019955400348529193, "learning_rate": 2.6334145852724035e-05, "loss": 0.4142, "step": 35715 }, { "epoch": 1.8134177761419452, "grad_norm": 0.032252200455131774, "learning_rate": 2.6263247763693153e-05, "loss": 0.4394, "step": 35720 }, { "epoch": 1.8136716122399767, "grad_norm": 0.025647649030433494, "learning_rate": 2.61924426668243e-05, "loss": 0.4871, "step": 35725 }, { "epoch": 1.8139254483380083, "grad_norm": 0.02352222162068727, "learning_rate": 2.6121730576015967e-05, "loss": 0.4726, "step": 35730 }, { "epoch": 1.8141792844360396, "grad_norm": 0.023536332582882832, "learning_rate": 2.605111150514883e-05, "loss": 0.4894, "step": 35735 }, { "epoch": 1.8144331205340711, "grad_norm": 0.020218263139566366, "learning_rate": 2.5980585468084795e-05, "loss": 0.4571, "step": 35740 }, { "epoch": 1.8146869566321027, "grad_norm": 0.022759174498022658, "learning_rate": 2.5910152478668015e-05, "loss": 0.4687, "step": 35745 }, { "epoch": 1.8149407927301342, "grad_norm": 0.021047033486352878, "learning_rate": 2.5839812550723928e-05, "loss": 0.4586, "step": 35750 }, { "epoch": 1.8151946288281655, "grad_norm": 0.023654540393796105, "learning_rate": 2.5769565698060047e-05, "loss": 0.4613, "step": 35755 }, { "epoch": 1.815448464926197, "grad_norm": 0.021804276959855963, "learning_rate": 2.56994119344654e-05, "loss": 0.4454, "step": 35760 }, { "epoch": 1.8157023010242286, "grad_norm": 0.02108201902634222, "learning_rate": 2.562935127371091e-05, "loss": 0.4555, "step": 35765 }, { "epoch": 1.8159561371222601, "grad_norm": 0.02148487710666091, "learning_rate": 2.5559383729549025e-05, "loss": 0.4611, "step": 35770 }, { "epoch": 1.8162099732202917, "grad_norm": 0.029051791837414652, "learning_rate": 2.5489509315714087e-05, "loss": 0.4538, "step": 35775 }, { "epoch": 1.8164638093183232, "grad_norm": 0.026776408987169115, "learning_rate": 2.5419728045922186e-05, "loss": 0.4421, "step": 35780 }, { "epoch": 1.8167176454163547, "grad_norm": 0.02023071442340706, "learning_rate": 2.5350039933870805e-05, "loss": 0.4225, "step": 35785 }, { "epoch": 1.8169714815143863, "grad_norm": 0.02079097271610684, "learning_rate": 2.5280444993239616e-05, "loss": 0.4471, "step": 35790 }, { "epoch": 1.8172253176124178, "grad_norm": 0.02335976147958995, "learning_rate": 2.5210943237689575e-05, "loss": 0.4645, "step": 35795 }, { "epoch": 1.8174791537104493, "grad_norm": 0.025662477366910293, "learning_rate": 2.514153468086372e-05, "loss": 0.4523, "step": 35800 }, { "epoch": 1.8177329898084806, "grad_norm": 0.02096423059117386, "learning_rate": 2.507221933638637e-05, "loss": 0.4512, "step": 35805 }, { "epoch": 1.8179868259065122, "grad_norm": 0.021587834721081906, "learning_rate": 2.5002997217863975e-05, "loss": 0.4739, "step": 35810 }, { "epoch": 1.8182406620045437, "grad_norm": 0.02346278616646674, "learning_rate": 2.4933868338884392e-05, "loss": 0.4391, "step": 35815 }, { "epoch": 1.818494498102575, "grad_norm": 0.02239295958313847, "learning_rate": 2.4864832713017316e-05, "loss": 0.4868, "step": 35820 }, { "epoch": 1.8187483342006066, "grad_norm": 0.022180419791008956, "learning_rate": 2.479589035381402e-05, "loss": 0.4144, "step": 35825 }, { "epoch": 1.819002170298638, "grad_norm": 0.02094838835196554, "learning_rate": 2.472704127480768e-05, "loss": 0.4537, "step": 35830 }, { "epoch": 1.8192560063966696, "grad_norm": 0.02255615727366666, "learning_rate": 2.4658285489512876e-05, "loss": 0.4575, "step": 35835 }, { "epoch": 1.8195098424947012, "grad_norm": 0.024813587982619647, "learning_rate": 2.45896230114262e-05, "loss": 0.4705, "step": 35840 }, { "epoch": 1.8197636785927327, "grad_norm": 0.024876925814751656, "learning_rate": 2.4521053854025587e-05, "loss": 0.443, "step": 35845 }, { "epoch": 1.8200175146907642, "grad_norm": 0.022425131114226664, "learning_rate": 2.4452578030771e-05, "loss": 0.4437, "step": 35850 }, { "epoch": 1.8202713507887958, "grad_norm": 0.02983896975643497, "learning_rate": 2.4384195555103685e-05, "loss": 0.4525, "step": 35855 }, { "epoch": 1.8205251868868273, "grad_norm": 0.022587211482833724, "learning_rate": 2.4315906440446956e-05, "loss": 0.4341, "step": 35860 }, { "epoch": 1.8207790229848588, "grad_norm": 0.024206758618488585, "learning_rate": 2.4247710700205484e-05, "loss": 0.4649, "step": 35865 }, { "epoch": 1.8210328590828901, "grad_norm": 0.024478336072376698, "learning_rate": 2.4179608347765948e-05, "loss": 0.5033, "step": 35870 }, { "epoch": 1.8212866951809217, "grad_norm": 0.02070813235705416, "learning_rate": 2.4111599396496263e-05, "loss": 0.4639, "step": 35875 }, { "epoch": 1.8215405312789532, "grad_norm": 0.021774866578754614, "learning_rate": 2.404368385974648e-05, "loss": 0.4698, "step": 35880 }, { "epoch": 1.8217943673769845, "grad_norm": 0.0212623746339756, "learning_rate": 2.3975861750847872e-05, "loss": 0.4637, "step": 35885 }, { "epoch": 1.822048203475016, "grad_norm": 0.02128933031223815, "learning_rate": 2.3908133083113627e-05, "loss": 0.4293, "step": 35890 }, { "epoch": 1.8223020395730476, "grad_norm": 0.021642236447562767, "learning_rate": 2.3840497869838718e-05, "loss": 0.4228, "step": 35895 }, { "epoch": 1.8225558756710791, "grad_norm": 0.020495462348286645, "learning_rate": 2.3772956124299416e-05, "loss": 0.4498, "step": 35900 }, { "epoch": 1.8228097117691107, "grad_norm": 0.019689106987887187, "learning_rate": 2.3705507859753896e-05, "loss": 0.4314, "step": 35905 }, { "epoch": 1.8230635478671422, "grad_norm": 0.0217185094138313, "learning_rate": 2.3638153089441893e-05, "loss": 0.4535, "step": 35910 }, { "epoch": 1.8233173839651737, "grad_norm": 0.02237290814529761, "learning_rate": 2.357089182658484e-05, "loss": 0.4633, "step": 35915 }, { "epoch": 1.8235712200632053, "grad_norm": 0.02187816074803507, "learning_rate": 2.350372408438578e-05, "loss": 0.4592, "step": 35920 }, { "epoch": 1.8238250561612368, "grad_norm": 0.022184120130896715, "learning_rate": 2.343664987602939e-05, "loss": 0.4646, "step": 35925 }, { "epoch": 1.8240788922592683, "grad_norm": 0.019843601924149395, "learning_rate": 2.3369669214681977e-05, "loss": 0.4541, "step": 35930 }, { "epoch": 1.8243327283572996, "grad_norm": 0.022400869249771683, "learning_rate": 2.3302782113491628e-05, "loss": 0.4757, "step": 35935 }, { "epoch": 1.8245865644553312, "grad_norm": 0.02151836218094333, "learning_rate": 2.3235988585587784e-05, "loss": 0.4753, "step": 35940 }, { "epoch": 1.8248404005533627, "grad_norm": 0.02301572497170209, "learning_rate": 2.31692886440818e-05, "loss": 0.4432, "step": 35945 }, { "epoch": 1.825094236651394, "grad_norm": 0.019813493528431883, "learning_rate": 2.3102682302066412e-05, "loss": 0.4371, "step": 35950 }, { "epoch": 1.8253480727494256, "grad_norm": 0.01992449587850107, "learning_rate": 2.303616957261634e-05, "loss": 0.4603, "step": 35955 }, { "epoch": 1.825601908847457, "grad_norm": 0.023016076431343672, "learning_rate": 2.2969750468787466e-05, "loss": 0.4326, "step": 35960 }, { "epoch": 1.8258557449454886, "grad_norm": 0.022191758185042577, "learning_rate": 2.290342500361775e-05, "loss": 0.4524, "step": 35965 }, { "epoch": 1.8261095810435202, "grad_norm": 0.025349303799634692, "learning_rate": 2.2837193190126282e-05, "loss": 0.4524, "step": 35970 }, { "epoch": 1.8263634171415517, "grad_norm": 0.020969407434920425, "learning_rate": 2.2771055041314327e-05, "loss": 0.4363, "step": 35975 }, { "epoch": 1.8266172532395832, "grad_norm": 0.023006042793577052, "learning_rate": 2.270501057016422e-05, "loss": 0.4669, "step": 35980 }, { "epoch": 1.8268710893376148, "grad_norm": 0.028873927723596555, "learning_rate": 2.263905978964037e-05, "loss": 0.4695, "step": 35985 }, { "epoch": 1.8271249254356463, "grad_norm": 0.024116668482079474, "learning_rate": 2.2573202712688367e-05, "loss": 0.4777, "step": 35990 }, { "epoch": 1.8273787615336778, "grad_norm": 0.024030296769852598, "learning_rate": 2.250743935223587e-05, "loss": 0.4963, "step": 35995 }, { "epoch": 1.8276325976317092, "grad_norm": 0.026746408895009968, "learning_rate": 2.2441769721191662e-05, "loss": 0.4449, "step": 36000 }, { "epoch": 1.8278864337297407, "grad_norm": 0.023888483474343483, "learning_rate": 2.23761938324466e-05, "loss": 0.4392, "step": 36005 }, { "epoch": 1.8281402698277722, "grad_norm": 0.035099541692213014, "learning_rate": 2.2310711698872665e-05, "loss": 0.4504, "step": 36010 }, { "epoch": 1.8283941059258038, "grad_norm": 0.02112342338748428, "learning_rate": 2.224532333332385e-05, "loss": 0.4618, "step": 36015 }, { "epoch": 1.828647942023835, "grad_norm": 0.020712955873280085, "learning_rate": 2.2180028748635506e-05, "loss": 0.4715, "step": 36020 }, { "epoch": 1.8289017781218666, "grad_norm": 0.02460324591191274, "learning_rate": 2.2114827957624595e-05, "loss": 0.4728, "step": 36025 }, { "epoch": 1.8291556142198981, "grad_norm": 0.02098638010680304, "learning_rate": 2.2049720973089825e-05, "loss": 0.4343, "step": 36030 }, { "epoch": 1.8294094503179297, "grad_norm": 0.021605128746199135, "learning_rate": 2.19847078078112e-05, "loss": 0.475, "step": 36035 }, { "epoch": 1.8296632864159612, "grad_norm": 0.020744240541715135, "learning_rate": 2.1919788474550673e-05, "loss": 0.4333, "step": 36040 }, { "epoch": 1.8299171225139927, "grad_norm": 0.021445317479955133, "learning_rate": 2.185496298605144e-05, "loss": 0.4572, "step": 36045 }, { "epoch": 1.8301709586120243, "grad_norm": 0.025771996958530503, "learning_rate": 2.1790231355038493e-05, "loss": 0.4197, "step": 36050 }, { "epoch": 1.8304247947100558, "grad_norm": 0.022223270474904383, "learning_rate": 2.172559359421822e-05, "loss": 0.4581, "step": 36055 }, { "epoch": 1.8306786308080873, "grad_norm": 0.020030488071074923, "learning_rate": 2.166104971627886e-05, "loss": 0.4306, "step": 36060 }, { "epoch": 1.8309324669061189, "grad_norm": 0.022278981780526303, "learning_rate": 2.1596599733889888e-05, "loss": 0.4406, "step": 36065 }, { "epoch": 1.8311863030041502, "grad_norm": 0.0221757803883364, "learning_rate": 2.1532243659702634e-05, "loss": 0.4834, "step": 36070 }, { "epoch": 1.8314401391021817, "grad_norm": 0.023275974115444426, "learning_rate": 2.146798150634982e-05, "loss": 0.4563, "step": 36075 }, { "epoch": 1.8316939752002133, "grad_norm": 0.023221039443217652, "learning_rate": 2.140381328644586e-05, "loss": 0.4463, "step": 36080 }, { "epoch": 1.8319478112982446, "grad_norm": 0.020569710112120266, "learning_rate": 2.133973901258651e-05, "loss": 0.4703, "step": 36085 }, { "epoch": 1.8322016473962761, "grad_norm": 0.021785715748550184, "learning_rate": 2.1275758697349434e-05, "loss": 0.4624, "step": 36090 }, { "epoch": 1.8324554834943076, "grad_norm": 0.02146143080616713, "learning_rate": 2.1211872353293417e-05, "loss": 0.4476, "step": 36095 }, { "epoch": 1.8327093195923392, "grad_norm": 0.020350789547040154, "learning_rate": 2.11480799929592e-05, "loss": 0.4511, "step": 36100 }, { "epoch": 1.8329631556903707, "grad_norm": 0.02933647592875351, "learning_rate": 2.1084381628868833e-05, "loss": 0.4251, "step": 36105 }, { "epoch": 1.8332169917884023, "grad_norm": 0.02113714142733342, "learning_rate": 2.1020777273526025e-05, "loss": 0.4418, "step": 36110 }, { "epoch": 1.8334708278864338, "grad_norm": 0.021965707443001067, "learning_rate": 2.0957266939415965e-05, "loss": 0.4415, "step": 36115 }, { "epoch": 1.8337246639844653, "grad_norm": 0.02268716397256464, "learning_rate": 2.0893850639005453e-05, "loss": 0.457, "step": 36120 }, { "epoch": 1.8339785000824969, "grad_norm": 0.025935687476320603, "learning_rate": 2.0830528384742697e-05, "loss": 0.4502, "step": 36125 }, { "epoch": 1.8342323361805284, "grad_norm": 0.02355894973776656, "learning_rate": 2.076730018905759e-05, "loss": 0.4344, "step": 36130 }, { "epoch": 1.8344861722785597, "grad_norm": 0.022212368167510682, "learning_rate": 2.0704166064361596e-05, "loss": 0.4486, "step": 36135 }, { "epoch": 1.8347400083765912, "grad_norm": 0.02141121626813041, "learning_rate": 2.0641126023047518e-05, "loss": 0.4587, "step": 36140 }, { "epoch": 1.8349938444746228, "grad_norm": 0.02163591724705281, "learning_rate": 2.0578180077489905e-05, "loss": 0.4409, "step": 36145 }, { "epoch": 1.835247680572654, "grad_norm": 0.023066084860677204, "learning_rate": 2.0515328240044594e-05, "loss": 0.4499, "step": 36150 }, { "epoch": 1.8355015166706856, "grad_norm": 0.022242330077506877, "learning_rate": 2.0452570523049217e-05, "loss": 0.4362, "step": 36155 }, { "epoch": 1.8357553527687172, "grad_norm": 0.020051623391778836, "learning_rate": 2.03899069388227e-05, "loss": 0.4427, "step": 36160 }, { "epoch": 1.8360091888667487, "grad_norm": 0.029608579217612622, "learning_rate": 2.03273374996657e-05, "loss": 0.4502, "step": 36165 }, { "epoch": 1.8362630249647802, "grad_norm": 0.023737953896778508, "learning_rate": 2.026486221786017e-05, "loss": 0.4767, "step": 36170 }, { "epoch": 1.8365168610628118, "grad_norm": 0.023375619205875876, "learning_rate": 2.02024811056698e-05, "loss": 0.4497, "step": 36175 }, { "epoch": 1.8367706971608433, "grad_norm": 0.027714733020031652, "learning_rate": 2.0140194175339575e-05, "loss": 0.4693, "step": 36180 }, { "epoch": 1.8370245332588748, "grad_norm": 0.02427169464452961, "learning_rate": 2.0078001439096218e-05, "loss": 0.4364, "step": 36185 }, { "epoch": 1.8372783693569064, "grad_norm": 0.020026240969129284, "learning_rate": 2.001590290914779e-05, "loss": 0.4272, "step": 36190 }, { "epoch": 1.837532205454938, "grad_norm": 0.022495330500840523, "learning_rate": 1.9953898597683927e-05, "loss": 0.4843, "step": 36195 }, { "epoch": 1.8377860415529692, "grad_norm": 0.020360142174855975, "learning_rate": 1.989198851687579e-05, "loss": 0.4429, "step": 36200 }, { "epoch": 1.8380398776510007, "grad_norm": 0.023413422223116345, "learning_rate": 1.9830172678876103e-05, "loss": 0.468, "step": 36205 }, { "epoch": 1.8382937137490323, "grad_norm": 0.02298430069711076, "learning_rate": 1.9768451095818818e-05, "loss": 0.4567, "step": 36210 }, { "epoch": 1.8385475498470636, "grad_norm": 0.02280937325321792, "learning_rate": 1.9706823779819692e-05, "loss": 0.442, "step": 36215 }, { "epoch": 1.8388013859450951, "grad_norm": 0.021396894421407745, "learning_rate": 1.964529074297583e-05, "loss": 0.4653, "step": 36220 }, { "epoch": 1.8390552220431267, "grad_norm": 0.020024815824637394, "learning_rate": 1.9583851997365954e-05, "loss": 0.4385, "step": 36225 }, { "epoch": 1.8393090581411582, "grad_norm": 0.02174390016299535, "learning_rate": 1.952250755505003e-05, "loss": 0.4606, "step": 36230 }, { "epoch": 1.8395628942391897, "grad_norm": 0.022477525249405196, "learning_rate": 1.9461257428069755e-05, "loss": 0.4434, "step": 36235 }, { "epoch": 1.8398167303372213, "grad_norm": 0.0245485389384089, "learning_rate": 1.9400101628448242e-05, "loss": 0.4237, "step": 36240 }, { "epoch": 1.8400705664352528, "grad_norm": 0.021224620960195745, "learning_rate": 1.9339040168189937e-05, "loss": 0.4724, "step": 36245 }, { "epoch": 1.8403244025332843, "grad_norm": 0.023242450571638745, "learning_rate": 1.927807305928109e-05, "loss": 0.4638, "step": 36250 }, { "epoch": 1.8405782386313159, "grad_norm": 0.020915924336955997, "learning_rate": 1.921720031368901e-05, "loss": 0.4405, "step": 36255 }, { "epoch": 1.8408320747293474, "grad_norm": 0.02248775188519345, "learning_rate": 1.9156421943362924e-05, "loss": 0.4786, "step": 36260 }, { "epoch": 1.8410859108273787, "grad_norm": 0.02148666149767411, "learning_rate": 1.9095737960233228e-05, "loss": 0.4436, "step": 36265 }, { "epoch": 1.8413397469254102, "grad_norm": 0.02416587138910249, "learning_rate": 1.903514837621201e-05, "loss": 0.4714, "step": 36270 }, { "epoch": 1.8415935830234418, "grad_norm": 0.023157173410389604, "learning_rate": 1.897465320319247e-05, "loss": 0.4338, "step": 36275 }, { "epoch": 1.8418474191214733, "grad_norm": 0.029233910498072987, "learning_rate": 1.891425245304973e-05, "loss": 0.4753, "step": 36280 }, { "epoch": 1.8421012552195046, "grad_norm": 0.02283659947545247, "learning_rate": 1.8853946137639966e-05, "loss": 0.4239, "step": 36285 }, { "epoch": 1.8423550913175362, "grad_norm": 0.019978195457050724, "learning_rate": 1.879373426880121e-05, "loss": 0.4456, "step": 36290 }, { "epoch": 1.8426089274155677, "grad_norm": 0.021687663942728044, "learning_rate": 1.8733616858352564e-05, "loss": 0.4522, "step": 36295 }, { "epoch": 1.8428627635135992, "grad_norm": 0.019528780287524306, "learning_rate": 1.8673593918094923e-05, "loss": 0.4439, "step": 36300 }, { "epoch": 1.8431165996116308, "grad_norm": 0.02155234027892195, "learning_rate": 1.8613665459810357e-05, "loss": 0.4574, "step": 36305 }, { "epoch": 1.8433704357096623, "grad_norm": 0.0261456620147658, "learning_rate": 1.8553831495262685e-05, "loss": 0.4648, "step": 36310 }, { "epoch": 1.8436242718076938, "grad_norm": 0.026088651944962685, "learning_rate": 1.849409203619673e-05, "loss": 0.4534, "step": 36315 }, { "epoch": 1.8438781079057254, "grad_norm": 0.02423314397669894, "learning_rate": 1.8434447094339446e-05, "loss": 0.4652, "step": 36320 }, { "epoch": 1.844131944003757, "grad_norm": 0.023517519839947224, "learning_rate": 1.837489668139858e-05, "loss": 0.4425, "step": 36325 }, { "epoch": 1.8443857801017884, "grad_norm": 0.02513005496856768, "learning_rate": 1.8315440809063554e-05, "loss": 0.4472, "step": 36330 }, { "epoch": 1.8446396161998198, "grad_norm": 0.021229880550278774, "learning_rate": 1.8256079489005485e-05, "loss": 0.4364, "step": 36335 }, { "epoch": 1.8448934522978513, "grad_norm": 0.01841576739055947, "learning_rate": 1.8196812732876434e-05, "loss": 0.4649, "step": 36340 }, { "epoch": 1.8451472883958828, "grad_norm": 0.022976094393190677, "learning_rate": 1.8137640552310374e-05, "loss": 0.4356, "step": 36345 }, { "epoch": 1.8454011244939141, "grad_norm": 0.02206953417030893, "learning_rate": 1.807856295892235e-05, "loss": 0.4636, "step": 36350 }, { "epoch": 1.8456549605919457, "grad_norm": 0.02132833882982846, "learning_rate": 1.801957996430914e-05, "loss": 0.4605, "step": 36355 }, { "epoch": 1.8459087966899772, "grad_norm": 0.025669102961975235, "learning_rate": 1.7960691580048705e-05, "loss": 0.4491, "step": 36360 }, { "epoch": 1.8461626327880087, "grad_norm": 0.0256888308875704, "learning_rate": 1.7901897817700685e-05, "loss": 0.4626, "step": 36365 }, { "epoch": 1.8464164688860403, "grad_norm": 0.022264542254564815, "learning_rate": 1.7843198688805793e-05, "loss": 0.4803, "step": 36370 }, { "epoch": 1.8466703049840718, "grad_norm": 0.023955069464813527, "learning_rate": 1.7784594204886485e-05, "loss": 0.4728, "step": 36375 }, { "epoch": 1.8469241410821033, "grad_norm": 0.028377687030451647, "learning_rate": 1.772608437744655e-05, "loss": 0.4523, "step": 36380 }, { "epoch": 1.8471779771801349, "grad_norm": 0.02256742111449494, "learning_rate": 1.7667669217971195e-05, "loss": 0.4608, "step": 36385 }, { "epoch": 1.8474318132781664, "grad_norm": 0.020948702040254757, "learning_rate": 1.7609348737926968e-05, "loss": 0.4561, "step": 36390 }, { "epoch": 1.847685649376198, "grad_norm": 0.02215357030539792, "learning_rate": 1.7551122948761932e-05, "loss": 0.4575, "step": 36395 }, { "epoch": 1.8479394854742293, "grad_norm": 0.021140043797980356, "learning_rate": 1.7492991861905394e-05, "loss": 0.4541, "step": 36400 }, { "epoch": 1.8481933215722608, "grad_norm": 0.02126277301347097, "learning_rate": 1.7434955488768445e-05, "loss": 0.4592, "step": 36405 }, { "epoch": 1.8484471576702923, "grad_norm": 0.020846498654345385, "learning_rate": 1.7377013840743083e-05, "loss": 0.4505, "step": 36410 }, { "epoch": 1.8487009937683236, "grad_norm": 0.024918017001830127, "learning_rate": 1.73191669292031e-05, "loss": 0.4629, "step": 36415 }, { "epoch": 1.8489548298663552, "grad_norm": 0.02250400370693143, "learning_rate": 1.726141476550347e-05, "loss": 0.4666, "step": 36420 }, { "epoch": 1.8492086659643867, "grad_norm": 0.019638524615499656, "learning_rate": 1.720375736098079e-05, "loss": 0.4416, "step": 36425 }, { "epoch": 1.8494625020624182, "grad_norm": 0.022038205053431632, "learning_rate": 1.7146194726952778e-05, "loss": 0.4335, "step": 36430 }, { "epoch": 1.8497163381604498, "grad_norm": 0.022686280035144835, "learning_rate": 1.708872687471874e-05, "loss": 0.4578, "step": 36435 }, { "epoch": 1.8499701742584813, "grad_norm": 0.025248657754128545, "learning_rate": 1.7031353815559425e-05, "loss": 0.4814, "step": 36440 }, { "epoch": 1.8502240103565128, "grad_norm": 0.021988387262798826, "learning_rate": 1.697407556073671e-05, "loss": 0.4446, "step": 36445 }, { "epoch": 1.8504778464545444, "grad_norm": 0.021771081888713024, "learning_rate": 1.6916892121494166e-05, "loss": 0.4603, "step": 36450 }, { "epoch": 1.850731682552576, "grad_norm": 0.019874477923159228, "learning_rate": 1.6859803509056527e-05, "loss": 0.4329, "step": 36455 }, { "epoch": 1.8509855186506075, "grad_norm": 0.02171798205791076, "learning_rate": 1.680280973463011e-05, "loss": 0.449, "step": 36460 }, { "epoch": 1.8512393547486388, "grad_norm": 0.02199931724655234, "learning_rate": 1.674591080940241e-05, "loss": 0.4585, "step": 36465 }, { "epoch": 1.8514931908466703, "grad_norm": 0.02577647787558014, "learning_rate": 1.6689106744542437e-05, "loss": 0.4542, "step": 36470 }, { "epoch": 1.8517470269447018, "grad_norm": 0.023435984326753145, "learning_rate": 1.6632397551200496e-05, "loss": 0.4436, "step": 36475 }, { "epoch": 1.8520008630427331, "grad_norm": 0.020355449487897765, "learning_rate": 1.6575783240508458e-05, "loss": 0.4355, "step": 36480 }, { "epoch": 1.8522546991407647, "grad_norm": 0.0207668042498274, "learning_rate": 1.6519263823579213e-05, "loss": 0.4557, "step": 36485 }, { "epoch": 1.8525085352387962, "grad_norm": 0.0236812491259185, "learning_rate": 1.6462839311507494e-05, "loss": 0.4519, "step": 36490 }, { "epoch": 1.8527623713368278, "grad_norm": 0.026595024576517964, "learning_rate": 1.640650971536889e-05, "loss": 0.4644, "step": 36495 }, { "epoch": 1.8530162074348593, "grad_norm": 0.022762821590263934, "learning_rate": 1.635027504622083e-05, "loss": 0.4323, "step": 36500 }, { "epoch": 1.8532700435328908, "grad_norm": 0.025364089662246653, "learning_rate": 1.6294135315101765e-05, "loss": 0.4326, "step": 36505 }, { "epoch": 1.8535238796309224, "grad_norm": 0.021616350106393015, "learning_rate": 1.6238090533031825e-05, "loss": 0.4613, "step": 36510 }, { "epoch": 1.853777715728954, "grad_norm": 0.022173841898150175, "learning_rate": 1.6182140711012095e-05, "loss": 0.4623, "step": 36515 }, { "epoch": 1.8540315518269854, "grad_norm": 0.021041461117188336, "learning_rate": 1.6126285860025403e-05, "loss": 0.4418, "step": 36520 }, { "epoch": 1.854285387925017, "grad_norm": 0.03205910168306328, "learning_rate": 1.6070525991035646e-05, "loss": 0.4613, "step": 36525 }, { "epoch": 1.8545392240230483, "grad_norm": 0.02175375824381657, "learning_rate": 1.6014861114988343e-05, "loss": 0.4306, "step": 36530 }, { "epoch": 1.8547930601210798, "grad_norm": 0.024123083015002662, "learning_rate": 1.5959291242810146e-05, "loss": 0.4818, "step": 36535 }, { "epoch": 1.8550468962191113, "grad_norm": 0.022298076921305057, "learning_rate": 1.590381638540922e-05, "loss": 0.4215, "step": 36540 }, { "epoch": 1.8553007323171429, "grad_norm": 0.026828035039684898, "learning_rate": 1.5848436553674905e-05, "loss": 0.456, "step": 36545 }, { "epoch": 1.8555545684151742, "grad_norm": 0.02438509358051607, "learning_rate": 1.5793151758478064e-05, "loss": 0.4759, "step": 36550 }, { "epoch": 1.8558084045132057, "grad_norm": 0.022150780639142196, "learning_rate": 1.5737962010670738e-05, "loss": 0.43, "step": 36555 }, { "epoch": 1.8560622406112373, "grad_norm": 0.02587280793684847, "learning_rate": 1.5682867321086482e-05, "loss": 0.4301, "step": 36560 }, { "epoch": 1.8563160767092688, "grad_norm": 0.02196586338846343, "learning_rate": 1.5627867700540144e-05, "loss": 0.4392, "step": 36565 }, { "epoch": 1.8565699128073003, "grad_norm": 0.02236881612109358, "learning_rate": 1.557296315982776e-05, "loss": 0.4533, "step": 36570 }, { "epoch": 1.8568237489053319, "grad_norm": 0.021640783844340757, "learning_rate": 1.5518153709726922e-05, "loss": 0.4356, "step": 36575 }, { "epoch": 1.8570775850033634, "grad_norm": 0.021850601092974337, "learning_rate": 1.5463439360996367e-05, "loss": 0.4601, "step": 36580 }, { "epoch": 1.857331421101395, "grad_norm": 0.022863672714919226, "learning_rate": 1.5408820124376277e-05, "loss": 0.4623, "step": 36585 }, { "epoch": 1.8575852571994265, "grad_norm": 0.02052988156416512, "learning_rate": 1.535429601058813e-05, "loss": 0.4182, "step": 36590 }, { "epoch": 1.8578390932974578, "grad_norm": 0.01930622632761717, "learning_rate": 1.5299867030334813e-05, "loss": 0.4422, "step": 36595 }, { "epoch": 1.8580929293954893, "grad_norm": 0.020532303166287503, "learning_rate": 1.5245533194300387e-05, "loss": 0.4742, "step": 36600 }, { "epoch": 1.8583467654935208, "grad_norm": 0.021453897011547724, "learning_rate": 1.5191294513150322e-05, "loss": 0.4438, "step": 36605 }, { "epoch": 1.8586006015915524, "grad_norm": 0.021567947200895245, "learning_rate": 1.5137150997531379e-05, "loss": 0.4645, "step": 36610 }, { "epoch": 1.8588544376895837, "grad_norm": 0.02120114038014877, "learning_rate": 1.5083102658071667e-05, "loss": 0.4417, "step": 36615 }, { "epoch": 1.8591082737876152, "grad_norm": 0.02211469006670524, "learning_rate": 1.5029149505380647e-05, "loss": 0.4773, "step": 36620 }, { "epoch": 1.8593621098856468, "grad_norm": 0.023182166431144506, "learning_rate": 1.4975291550049063e-05, "loss": 0.4537, "step": 36625 }, { "epoch": 1.8596159459836783, "grad_norm": 0.019178819226592782, "learning_rate": 1.492152880264891e-05, "loss": 0.4491, "step": 36630 }, { "epoch": 1.8598697820817098, "grad_norm": 0.026391292018557753, "learning_rate": 1.4867861273733629e-05, "loss": 0.4437, "step": 36635 }, { "epoch": 1.8601236181797414, "grad_norm": 0.02545968689149914, "learning_rate": 1.4814288973837742e-05, "loss": 0.4414, "step": 36640 }, { "epoch": 1.860377454277773, "grad_norm": 0.02262304039898802, "learning_rate": 1.4760811913477389e-05, "loss": 0.4643, "step": 36645 }, { "epoch": 1.8606312903758044, "grad_norm": 0.022296739978009437, "learning_rate": 1.4707430103149732e-05, "loss": 0.4651, "step": 36650 }, { "epoch": 1.860885126473836, "grad_norm": 0.025642319286203506, "learning_rate": 1.4654143553333387e-05, "loss": 0.4729, "step": 36655 }, { "epoch": 1.8611389625718675, "grad_norm": 0.02273311192918183, "learning_rate": 1.4600952274488265e-05, "loss": 0.4587, "step": 36660 }, { "epoch": 1.8613927986698988, "grad_norm": 0.02116679503489843, "learning_rate": 1.4547856277055571e-05, "loss": 0.4557, "step": 36665 }, { "epoch": 1.8616466347679304, "grad_norm": 0.021316010404225146, "learning_rate": 1.4494855571457633e-05, "loss": 0.4697, "step": 36670 }, { "epoch": 1.8619004708659619, "grad_norm": 0.02350879271004433, "learning_rate": 1.4441950168098406e-05, "loss": 0.4464, "step": 36675 }, { "epoch": 1.8621543069639932, "grad_norm": 0.023258720770063683, "learning_rate": 1.4389140077362916e-05, "loss": 0.4375, "step": 36680 }, { "epoch": 1.8624081430620247, "grad_norm": 0.021273891527511624, "learning_rate": 1.433642530961743e-05, "loss": 0.4552, "step": 36685 }, { "epoch": 1.8626619791600563, "grad_norm": 0.02136026789458103, "learning_rate": 1.4283805875209721e-05, "loss": 0.4587, "step": 36690 }, { "epoch": 1.8629158152580878, "grad_norm": 0.02214731165919245, "learning_rate": 1.4231281784468587e-05, "loss": 0.4264, "step": 36695 }, { "epoch": 1.8631696513561193, "grad_norm": 0.02350780328623469, "learning_rate": 1.4178853047704388e-05, "loss": 0.4499, "step": 36700 }, { "epoch": 1.8634234874541509, "grad_norm": 0.03141213231106275, "learning_rate": 1.412651967520845e-05, "loss": 0.4694, "step": 36705 }, { "epoch": 1.8636773235521824, "grad_norm": 0.023866885124020652, "learning_rate": 1.4074281677253719e-05, "loss": 0.4456, "step": 36710 }, { "epoch": 1.863931159650214, "grad_norm": 0.020431070665186554, "learning_rate": 1.4022139064094164e-05, "loss": 0.4463, "step": 36715 }, { "epoch": 1.8641849957482455, "grad_norm": 0.021642398766930707, "learning_rate": 1.3970091845965205e-05, "loss": 0.4466, "step": 36720 }, { "epoch": 1.864438831846277, "grad_norm": 0.02411974345336584, "learning_rate": 1.3918140033083338e-05, "loss": 0.4958, "step": 36725 }, { "epoch": 1.8646926679443083, "grad_norm": 0.024224467721648102, "learning_rate": 1.3866283635646515e-05, "loss": 0.4691, "step": 36730 }, { "epoch": 1.8649465040423399, "grad_norm": 0.02057402120977364, "learning_rate": 1.3814522663833761e-05, "loss": 0.4473, "step": 36735 }, { "epoch": 1.8652003401403714, "grad_norm": 0.021182543447772897, "learning_rate": 1.3762857127805727e-05, "loss": 0.4408, "step": 36740 }, { "epoch": 1.8654541762384027, "grad_norm": 0.025082497648913876, "learning_rate": 1.3711287037703913e-05, "loss": 0.4422, "step": 36745 }, { "epoch": 1.8657080123364342, "grad_norm": 0.023947880929159032, "learning_rate": 1.3659812403651439e-05, "loss": 0.464, "step": 36750 }, { "epoch": 1.8659618484344658, "grad_norm": 0.0255745124388733, "learning_rate": 1.3608433235752282e-05, "loss": 0.4467, "step": 36755 }, { "epoch": 1.8662156845324973, "grad_norm": 0.02307802412282208, "learning_rate": 1.355714954409215e-05, "loss": 0.4477, "step": 36760 }, { "epoch": 1.8664695206305288, "grad_norm": 0.02612322796667025, "learning_rate": 1.3505961338737604e-05, "loss": 0.4463, "step": 36765 }, { "epoch": 1.8667233567285604, "grad_norm": 0.029078431865484707, "learning_rate": 1.3454868629736771e-05, "loss": 0.4497, "step": 36770 }, { "epoch": 1.866977192826592, "grad_norm": 0.025822399311491818, "learning_rate": 1.3403871427118798e-05, "loss": 0.4527, "step": 36775 }, { "epoch": 1.8672310289246234, "grad_norm": 0.022291834000361617, "learning_rate": 1.3352969740894228e-05, "loss": 0.4711, "step": 36780 }, { "epoch": 1.867484865022655, "grad_norm": 0.02647393100605352, "learning_rate": 1.3302163581054793e-05, "loss": 0.4574, "step": 36785 }, { "epoch": 1.8677387011206865, "grad_norm": 0.023446096336786612, "learning_rate": 1.3251452957573517e-05, "loss": 0.4738, "step": 36790 }, { "epoch": 1.8679925372187178, "grad_norm": 0.02127291076715947, "learning_rate": 1.3200837880404548e-05, "loss": 0.4877, "step": 36795 }, { "epoch": 1.8682463733167494, "grad_norm": 0.024906256232495056, "learning_rate": 1.3150318359483437e-05, "loss": 0.447, "step": 36800 }, { "epoch": 1.868500209414781, "grad_norm": 0.02057976417447868, "learning_rate": 1.3099894404726976e-05, "loss": 0.4736, "step": 36805 }, { "epoch": 1.8687540455128122, "grad_norm": 0.02492191020600024, "learning_rate": 1.3049566026033022e-05, "loss": 0.4711, "step": 36810 }, { "epoch": 1.8690078816108437, "grad_norm": 0.02420601274081891, "learning_rate": 1.2999333233280896e-05, "loss": 0.4365, "step": 36815 }, { "epoch": 1.8692617177088753, "grad_norm": 0.020254390572854473, "learning_rate": 1.294919603633088e-05, "loss": 0.439, "step": 36820 }, { "epoch": 1.8695155538069068, "grad_norm": 0.02474683239453854, "learning_rate": 1.2899154445024874e-05, "loss": 0.4541, "step": 36825 }, { "epoch": 1.8697693899049384, "grad_norm": 0.03001458417928382, "learning_rate": 1.2849208469185636e-05, "loss": 0.4811, "step": 36830 }, { "epoch": 1.8700232260029699, "grad_norm": 0.021337265901394033, "learning_rate": 1.2799358118617377e-05, "loss": 0.4643, "step": 36835 }, { "epoch": 1.8702770621010014, "grad_norm": 0.02208717085882638, "learning_rate": 1.2749603403105437e-05, "loss": 0.4623, "step": 36840 }, { "epoch": 1.870530898199033, "grad_norm": 0.02100059123832777, "learning_rate": 1.2699944332416502e-05, "loss": 0.4391, "step": 36845 }, { "epoch": 1.8707847342970645, "grad_norm": 0.023998206914377488, "learning_rate": 1.2650380916298222e-05, "loss": 0.4945, "step": 36850 }, { "epoch": 1.871038570395096, "grad_norm": 0.022644141112716502, "learning_rate": 1.2600913164479811e-05, "loss": 0.443, "step": 36855 }, { "epoch": 1.8712924064931273, "grad_norm": 0.019765022307224444, "learning_rate": 1.2551541086671447e-05, "loss": 0.4424, "step": 36860 }, { "epoch": 1.8715462425911589, "grad_norm": 0.021694636824496228, "learning_rate": 1.2502264692564768e-05, "loss": 0.4575, "step": 36865 }, { "epoch": 1.8718000786891904, "grad_norm": 0.020689782081024555, "learning_rate": 1.2453083991832258e-05, "loss": 0.4464, "step": 36870 }, { "epoch": 1.872053914787222, "grad_norm": 0.02173020264000965, "learning_rate": 1.2403998994128085e-05, "loss": 0.4299, "step": 36875 }, { "epoch": 1.8723077508852533, "grad_norm": 0.023926802677357032, "learning_rate": 1.2355009709087205e-05, "loss": 0.4544, "step": 36880 }, { "epoch": 1.8725615869832848, "grad_norm": 0.024662532571645124, "learning_rate": 1.2306116146326096e-05, "loss": 0.4503, "step": 36885 }, { "epoch": 1.8728154230813163, "grad_norm": 0.022866472003658402, "learning_rate": 1.225731831544219e-05, "loss": 0.4684, "step": 36890 }, { "epoch": 1.8730692591793479, "grad_norm": 0.02737229931132882, "learning_rate": 1.220861622601438e-05, "loss": 0.4691, "step": 36895 }, { "epoch": 1.8733230952773794, "grad_norm": 0.02045519299338699, "learning_rate": 1.2160009887602575e-05, "loss": 0.439, "step": 36900 }, { "epoch": 1.873576931375411, "grad_norm": 0.02105021426485451, "learning_rate": 1.2111499309747975e-05, "loss": 0.4512, "step": 36905 }, { "epoch": 1.8738307674734425, "grad_norm": 0.023234909473193354, "learning_rate": 1.2063084501972966e-05, "loss": 0.4605, "step": 36910 }, { "epoch": 1.874084603571474, "grad_norm": 0.02495377423784143, "learning_rate": 1.2014765473781053e-05, "loss": 0.4709, "step": 36915 }, { "epoch": 1.8743384396695055, "grad_norm": 0.01982317350160172, "learning_rate": 1.1966542234657208e-05, "loss": 0.4205, "step": 36920 }, { "epoch": 1.874592275767537, "grad_norm": 0.023080934785440023, "learning_rate": 1.1918414794067244e-05, "loss": 0.4355, "step": 36925 }, { "epoch": 1.8748461118655684, "grad_norm": 0.022797451618634154, "learning_rate": 1.1870383161458497e-05, "loss": 0.4465, "step": 36930 }, { "epoch": 1.8750999479636, "grad_norm": 0.0241812112755731, "learning_rate": 1.182244734625909e-05, "loss": 0.4426, "step": 36935 }, { "epoch": 1.8753537840616314, "grad_norm": 0.023434936541055627, "learning_rate": 1.1774607357878886e-05, "loss": 0.4484, "step": 36940 }, { "epoch": 1.8756076201596628, "grad_norm": 0.02410828643170135, "learning_rate": 1.1726863205708372e-05, "loss": 0.4408, "step": 36945 }, { "epoch": 1.8758614562576943, "grad_norm": 0.022411527527885312, "learning_rate": 1.1679214899119605e-05, "loss": 0.4561, "step": 36950 }, { "epoch": 1.8761152923557258, "grad_norm": 0.021883107784838412, "learning_rate": 1.1631662447465719e-05, "loss": 0.4449, "step": 36955 }, { "epoch": 1.8763691284537574, "grad_norm": 0.02541344155482163, "learning_rate": 1.1584205860081021e-05, "loss": 0.4598, "step": 36960 }, { "epoch": 1.876622964551789, "grad_norm": 0.02445820739790313, "learning_rate": 1.153684514628095e-05, "loss": 0.4213, "step": 36965 }, { "epoch": 1.8768768006498204, "grad_norm": 0.022274003744240302, "learning_rate": 1.1489580315362292e-05, "loss": 0.4674, "step": 36970 }, { "epoch": 1.877130636747852, "grad_norm": 0.02302479732283661, "learning_rate": 1.1442411376602679e-05, "loss": 0.4447, "step": 36975 }, { "epoch": 1.8773844728458835, "grad_norm": 0.023099210723607017, "learning_rate": 1.139533833926143e-05, "loss": 0.4334, "step": 36980 }, { "epoch": 1.877638308943915, "grad_norm": 0.02557626233760811, "learning_rate": 1.1348361212578484e-05, "loss": 0.4435, "step": 36985 }, { "epoch": 1.8778921450419466, "grad_norm": 0.02197010518349448, "learning_rate": 1.1301480005775412e-05, "loss": 0.4739, "step": 36990 }, { "epoch": 1.8781459811399779, "grad_norm": 0.025263565398403336, "learning_rate": 1.1254694728054626e-05, "loss": 0.4556, "step": 36995 }, { "epoch": 1.8783998172380094, "grad_norm": 0.021885371029893786, "learning_rate": 1.1208005388599951e-05, "loss": 0.4652, "step": 37000 }, { "epoch": 1.878653653336041, "grad_norm": 0.023486472540742757, "learning_rate": 1.1161411996576165e-05, "loss": 0.4667, "step": 37005 }, { "epoch": 1.8789074894340723, "grad_norm": 0.022717357060794766, "learning_rate": 1.1114914561129396e-05, "loss": 0.4579, "step": 37010 }, { "epoch": 1.8791613255321038, "grad_norm": 0.021029880026985255, "learning_rate": 1.106851309138679e-05, "loss": 0.4903, "step": 37015 }, { "epoch": 1.8794151616301353, "grad_norm": 0.020524349147354494, "learning_rate": 1.1022207596456835e-05, "loss": 0.4325, "step": 37020 }, { "epoch": 1.8796689977281669, "grad_norm": 0.026662156059636153, "learning_rate": 1.0975998085428984e-05, "loss": 0.4224, "step": 37025 }, { "epoch": 1.8799228338261984, "grad_norm": 0.02148292116153404, "learning_rate": 1.0929884567373927e-05, "loss": 0.4643, "step": 37030 }, { "epoch": 1.88017666992423, "grad_norm": 0.030321978223338382, "learning_rate": 1.0883867051343533e-05, "loss": 0.4631, "step": 37035 }, { "epoch": 1.8804305060222615, "grad_norm": 0.02090326478040996, "learning_rate": 1.0837945546370798e-05, "loss": 0.4325, "step": 37040 }, { "epoch": 1.880684342120293, "grad_norm": 0.02145454006132866, "learning_rate": 1.0792120061469956e-05, "loss": 0.4412, "step": 37045 }, { "epoch": 1.8809381782183245, "grad_norm": 0.022566197322495922, "learning_rate": 1.0746390605636259e-05, "loss": 0.463, "step": 37050 }, { "epoch": 1.881192014316356, "grad_norm": 0.027247127025454444, "learning_rate": 1.0700757187846188e-05, "loss": 0.4749, "step": 37055 }, { "epoch": 1.8814458504143874, "grad_norm": 0.02154384889356073, "learning_rate": 1.065521981705736e-05, "loss": 0.429, "step": 37060 }, { "epoch": 1.881699686512419, "grad_norm": 0.026084199584670894, "learning_rate": 1.0609778502208512e-05, "loss": 0.4497, "step": 37065 }, { "epoch": 1.8819535226104505, "grad_norm": 0.020525637238970808, "learning_rate": 1.0564433252219507e-05, "loss": 0.4537, "step": 37070 }, { "epoch": 1.8822073587084818, "grad_norm": 0.020483461060641535, "learning_rate": 1.0519184075991505e-05, "loss": 0.4505, "step": 37075 }, { "epoch": 1.8824611948065133, "grad_norm": 0.02326068817822505, "learning_rate": 1.0474030982406624e-05, "loss": 0.4732, "step": 37080 }, { "epoch": 1.8827150309045448, "grad_norm": 0.019163506578638276, "learning_rate": 1.0428973980328216e-05, "loss": 0.441, "step": 37085 }, { "epoch": 1.8829688670025764, "grad_norm": 0.018667241598925556, "learning_rate": 1.038401307860065e-05, "loss": 0.435, "step": 37090 }, { "epoch": 1.883222703100608, "grad_norm": 0.02123912140480303, "learning_rate": 1.0339148286049705e-05, "loss": 0.4478, "step": 37095 }, { "epoch": 1.8834765391986394, "grad_norm": 0.021748725396268195, "learning_rate": 1.0294379611481885e-05, "loss": 0.4382, "step": 37100 }, { "epoch": 1.883730375296671, "grad_norm": 0.021313373453137146, "learning_rate": 1.0249707063685277e-05, "loss": 0.4929, "step": 37105 }, { "epoch": 1.8839842113947025, "grad_norm": 0.020814925335928592, "learning_rate": 1.0205130651428806e-05, "loss": 0.4589, "step": 37110 }, { "epoch": 1.884238047492734, "grad_norm": 0.019883791376370075, "learning_rate": 1.0160650383462588e-05, "loss": 0.4311, "step": 37115 }, { "epoch": 1.8844918835907656, "grad_norm": 0.025265388503289, "learning_rate": 1.0116266268517805e-05, "loss": 0.4132, "step": 37120 }, { "epoch": 1.884745719688797, "grad_norm": 0.024819525773685097, "learning_rate": 1.0071978315306984e-05, "loss": 0.4526, "step": 37125 }, { "epoch": 1.8849995557868284, "grad_norm": 0.02126190229536664, "learning_rate": 1.0027786532523508e-05, "loss": 0.4589, "step": 37130 }, { "epoch": 1.88525339188486, "grad_norm": 0.022454913487583175, "learning_rate": 9.983690928842105e-06, "loss": 0.4721, "step": 37135 }, { "epoch": 1.8855072279828915, "grad_norm": 0.03277420429629867, "learning_rate": 9.939691512918404e-06, "loss": 0.4367, "step": 37140 }, { "epoch": 1.8857610640809228, "grad_norm": 0.022196508671986213, "learning_rate": 9.895788293389385e-06, "loss": 0.4626, "step": 37145 }, { "epoch": 1.8860149001789543, "grad_norm": 0.022168660914226923, "learning_rate": 9.851981278872878e-06, "loss": 0.4689, "step": 37150 }, { "epoch": 1.8862687362769859, "grad_norm": 0.02563443384862334, "learning_rate": 9.808270477968173e-06, "loss": 0.4455, "step": 37155 }, { "epoch": 1.8865225723750174, "grad_norm": 0.021376673277626792, "learning_rate": 9.764655899255347e-06, "loss": 0.4679, "step": 37160 }, { "epoch": 1.886776408473049, "grad_norm": 0.026018940765089024, "learning_rate": 9.721137551295778e-06, "loss": 0.4601, "step": 37165 }, { "epoch": 1.8870302445710805, "grad_norm": 0.023643143077169092, "learning_rate": 9.677715442631962e-06, "loss": 0.4454, "step": 37170 }, { "epoch": 1.887284080669112, "grad_norm": 0.025955682817119883, "learning_rate": 9.63438958178725e-06, "loss": 0.47, "step": 37175 }, { "epoch": 1.8875379167671436, "grad_norm": 0.02096935393978991, "learning_rate": 9.591159977266506e-06, "loss": 0.4425, "step": 37180 }, { "epoch": 1.887791752865175, "grad_norm": 0.018616468472140536, "learning_rate": 9.54802663755533e-06, "loss": 0.4415, "step": 37185 }, { "epoch": 1.8880455889632066, "grad_norm": 0.02545228535890788, "learning_rate": 9.504989571120726e-06, "loss": 0.4677, "step": 37190 }, { "epoch": 1.888299425061238, "grad_norm": 0.022999000833790844, "learning_rate": 9.462048786410492e-06, "loss": 0.4482, "step": 37195 }, { "epoch": 1.8885532611592695, "grad_norm": 0.02212084208825238, "learning_rate": 9.419204291853834e-06, "loss": 0.4339, "step": 37200 }, { "epoch": 1.888807097257301, "grad_norm": 0.024284937751072763, "learning_rate": 9.376456095860798e-06, "loss": 0.4601, "step": 37205 }, { "epoch": 1.8890609333553323, "grad_norm": 0.02382615118490866, "learning_rate": 9.333804206822726e-06, "loss": 0.4606, "step": 37210 }, { "epoch": 1.8893147694533639, "grad_norm": 0.021486485098027707, "learning_rate": 9.291248633111927e-06, "loss": 0.436, "step": 37215 }, { "epoch": 1.8895686055513954, "grad_norm": 0.02341752435747304, "learning_rate": 9.248789383081879e-06, "loss": 0.4725, "step": 37220 }, { "epoch": 1.889822441649427, "grad_norm": 0.027777059793671188, "learning_rate": 9.206426465067031e-06, "loss": 0.4456, "step": 37225 }, { "epoch": 1.8900762777474585, "grad_norm": 0.021359684770520977, "learning_rate": 9.164159887383172e-06, "loss": 0.4423, "step": 37230 }, { "epoch": 1.89033011384549, "grad_norm": 0.02349495122091391, "learning_rate": 9.12198965832689e-06, "loss": 0.46, "step": 37235 }, { "epoch": 1.8905839499435215, "grad_norm": 0.022308573567443774, "learning_rate": 9.079915786176063e-06, "loss": 0.4566, "step": 37240 }, { "epoch": 1.890837786041553, "grad_norm": 0.020770175767240735, "learning_rate": 9.037938279189528e-06, "loss": 0.4211, "step": 37245 }, { "epoch": 1.8910916221395846, "grad_norm": 0.0222815680616627, "learning_rate": 8.996057145607306e-06, "loss": 0.4519, "step": 37250 }, { "epoch": 1.8913454582376161, "grad_norm": 0.02139157051540678, "learning_rate": 8.95427239365043e-06, "loss": 0.4423, "step": 37255 }, { "epoch": 1.8915992943356474, "grad_norm": 0.021039178876140388, "learning_rate": 8.912584031521065e-06, "loss": 0.4599, "step": 37260 }, { "epoch": 1.891853130433679, "grad_norm": 0.031078033789477582, "learning_rate": 8.870992067402384e-06, "loss": 0.4544, "step": 37265 }, { "epoch": 1.8921069665317105, "grad_norm": 0.02651545626840905, "learning_rate": 8.82949650945869e-06, "loss": 0.4506, "step": 37270 }, { "epoch": 1.8923608026297418, "grad_norm": 0.018840248290589404, "learning_rate": 8.788097365835358e-06, "loss": 0.4068, "step": 37275 }, { "epoch": 1.8926146387277734, "grad_norm": 0.02098145796800753, "learning_rate": 8.746794644658828e-06, "loss": 0.4254, "step": 37280 }, { "epoch": 1.892868474825805, "grad_norm": 0.023778470835364184, "learning_rate": 8.705588354036676e-06, "loss": 0.4727, "step": 37285 }, { "epoch": 1.8931223109238364, "grad_norm": 0.019800277404283806, "learning_rate": 8.664478502057427e-06, "loss": 0.4189, "step": 37290 }, { "epoch": 1.893376147021868, "grad_norm": 0.022580776759673696, "learning_rate": 8.623465096790794e-06, "loss": 0.4688, "step": 37295 }, { "epoch": 1.8936299831198995, "grad_norm": 0.024504692780699923, "learning_rate": 8.582548146287395e-06, "loss": 0.4415, "step": 37300 }, { "epoch": 1.893883819217931, "grad_norm": 0.023773581217463372, "learning_rate": 8.541727658579191e-06, "loss": 0.4546, "step": 37305 }, { "epoch": 1.8941376553159626, "grad_norm": 0.02457580274560056, "learning_rate": 8.501003641678885e-06, "loss": 0.4493, "step": 37310 }, { "epoch": 1.894391491413994, "grad_norm": 0.023069551301224907, "learning_rate": 8.460376103580526e-06, "loss": 0.4494, "step": 37315 }, { "epoch": 1.8946453275120256, "grad_norm": 0.02501756577031343, "learning_rate": 8.419845052258956e-06, "loss": 0.4785, "step": 37320 }, { "epoch": 1.894899163610057, "grad_norm": 0.025329116897176897, "learning_rate": 8.37941049567037e-06, "loss": 0.4603, "step": 37325 }, { "epoch": 1.8951529997080885, "grad_norm": 0.023975660111064893, "learning_rate": 8.339072441751749e-06, "loss": 0.4487, "step": 37330 }, { "epoch": 1.89540683580612, "grad_norm": 0.022681786439959096, "learning_rate": 8.298830898421316e-06, "loss": 0.4756, "step": 37335 }, { "epoch": 1.8956606719041513, "grad_norm": 0.019839744977402773, "learning_rate": 8.258685873578198e-06, "loss": 0.4743, "step": 37340 }, { "epoch": 1.8959145080021829, "grad_norm": 0.029110595946668913, "learning_rate": 8.218637375102866e-06, "loss": 0.4219, "step": 37345 }, { "epoch": 1.8961683441002144, "grad_norm": 0.02468212485542408, "learning_rate": 8.178685410856424e-06, "loss": 0.4636, "step": 37350 }, { "epoch": 1.896422180198246, "grad_norm": 0.023021032923387984, "learning_rate": 8.138829988681318e-06, "loss": 0.4565, "step": 37355 }, { "epoch": 1.8966760162962775, "grad_norm": 0.021806816479458407, "learning_rate": 8.09907111640107e-06, "loss": 0.4547, "step": 37360 }, { "epoch": 1.896929852394309, "grad_norm": 0.028433865062280395, "learning_rate": 8.059408801819934e-06, "loss": 0.4506, "step": 37365 }, { "epoch": 1.8971836884923405, "grad_norm": 0.024983454984375437, "learning_rate": 8.01984305272363e-06, "loss": 0.4492, "step": 37370 }, { "epoch": 1.897437524590372, "grad_norm": 0.022957101384112662, "learning_rate": 7.98037387687861e-06, "loss": 0.4389, "step": 37375 }, { "epoch": 1.8976913606884036, "grad_norm": 0.018439604142735946, "learning_rate": 7.941001282032512e-06, "loss": 0.4117, "step": 37380 }, { "epoch": 1.8979451967864351, "grad_norm": 0.027207353586784774, "learning_rate": 7.90172527591393e-06, "loss": 0.4506, "step": 37385 }, { "epoch": 1.8981990328844665, "grad_norm": 0.027055180572066115, "learning_rate": 7.862545866232585e-06, "loss": 0.4689, "step": 37390 }, { "epoch": 1.898452868982498, "grad_norm": 0.026896364230939135, "learning_rate": 7.823463060679215e-06, "loss": 0.4432, "step": 37395 }, { "epoch": 1.8987067050805295, "grad_norm": 0.02423512351795487, "learning_rate": 7.784476866925571e-06, "loss": 0.4585, "step": 37400 }, { "epoch": 1.898960541178561, "grad_norm": 0.02515565929962076, "learning_rate": 7.745587292624423e-06, "loss": 0.4585, "step": 37405 }, { "epoch": 1.8992143772765924, "grad_norm": 0.020797485192393404, "learning_rate": 7.706794345409662e-06, "loss": 0.4633, "step": 37410 }, { "epoch": 1.899468213374624, "grad_norm": 0.021221061620204006, "learning_rate": 7.668098032896086e-06, "loss": 0.4474, "step": 37415 }, { "epoch": 1.8997220494726554, "grad_norm": 0.02165854297022104, "learning_rate": 7.629498362679621e-06, "loss": 0.4516, "step": 37420 }, { "epoch": 1.899975885570687, "grad_norm": 0.021157573658216487, "learning_rate": 7.590995342337148e-06, "loss": 0.451, "step": 37425 }, { "epoch": 1.9002297216687185, "grad_norm": 0.022283635644036758, "learning_rate": 7.552588979426733e-06, "loss": 0.4755, "step": 37430 }, { "epoch": 1.90048355776675, "grad_norm": 0.024413711078674752, "learning_rate": 7.514279281487179e-06, "loss": 0.4585, "step": 37435 }, { "epoch": 1.9007373938647816, "grad_norm": 0.022093763977813735, "learning_rate": 7.476066256038638e-06, "loss": 0.4473, "step": 37440 }, { "epoch": 1.9009912299628131, "grad_norm": 0.021626286158038993, "learning_rate": 7.437949910581998e-06, "loss": 0.4354, "step": 37445 }, { "epoch": 1.9012450660608446, "grad_norm": 0.0273165842843997, "learning_rate": 7.399930252599496e-06, "loss": 0.4737, "step": 37450 }, { "epoch": 1.901498902158876, "grad_norm": 0.023472801346320262, "learning_rate": 7.362007289553996e-06, "loss": 0.4606, "step": 37455 }, { "epoch": 1.9017527382569075, "grad_norm": 0.025848419421606466, "learning_rate": 7.324181028889709e-06, "loss": 0.4439, "step": 37460 }, { "epoch": 1.902006574354939, "grad_norm": 0.022098946912295272, "learning_rate": 7.286451478031753e-06, "loss": 0.4525, "step": 37465 }, { "epoch": 1.9022604104529706, "grad_norm": 0.020132077745634438, "learning_rate": 7.2488186443862015e-06, "loss": 0.4505, "step": 37470 }, { "epoch": 1.9025142465510019, "grad_norm": 0.021696897376979055, "learning_rate": 7.211282535340202e-06, "loss": 0.4805, "step": 37475 }, { "epoch": 1.9027680826490334, "grad_norm": 0.027185595143177976, "learning_rate": 7.173843158261861e-06, "loss": 0.4553, "step": 37480 }, { "epoch": 1.903021918747065, "grad_norm": 0.02056624187081856, "learning_rate": 7.136500520500466e-06, "loss": 0.4245, "step": 37485 }, { "epoch": 1.9032757548450965, "grad_norm": 0.020926738183475128, "learning_rate": 7.0992546293860425e-06, "loss": 0.4228, "step": 37490 }, { "epoch": 1.903529590943128, "grad_norm": 0.028740551485723114, "learning_rate": 7.062105492229909e-06, "loss": 0.459, "step": 37495 }, { "epoch": 1.9037834270411595, "grad_norm": 0.053682388754172104, "learning_rate": 7.02505311632412e-06, "loss": 0.4424, "step": 37500 }, { "epoch": 1.904037263139191, "grad_norm": 0.02173561656109306, "learning_rate": 6.988097508942026e-06, "loss": 0.4822, "step": 37505 }, { "epoch": 1.9042910992372226, "grad_norm": 0.020845652238898626, "learning_rate": 6.951238677337657e-06, "loss": 0.4551, "step": 37510 }, { "epoch": 1.9045449353352542, "grad_norm": 0.022442971026077695, "learning_rate": 6.914476628746391e-06, "loss": 0.4574, "step": 37515 }, { "epoch": 1.9047987714332857, "grad_norm": 0.019100573728677607, "learning_rate": 6.8778113703842345e-06, "loss": 0.441, "step": 37520 }, { "epoch": 1.905052607531317, "grad_norm": 0.022661668431873734, "learning_rate": 6.8412429094485975e-06, "loss": 0.4653, "step": 37525 }, { "epoch": 1.9053064436293485, "grad_norm": 0.022521358187594164, "learning_rate": 6.80477125311757e-06, "loss": 0.459, "step": 37530 }, { "epoch": 1.90556027972738, "grad_norm": 0.02587584279617253, "learning_rate": 6.768396408550426e-06, "loss": 0.434, "step": 37535 }, { "epoch": 1.9058141158254114, "grad_norm": 0.02722351063157932, "learning_rate": 6.732118382887287e-06, "loss": 0.4388, "step": 37540 }, { "epoch": 1.906067951923443, "grad_norm": 0.02166094929641842, "learning_rate": 6.695937183249401e-06, "loss": 0.4502, "step": 37545 }, { "epoch": 1.9063217880214745, "grad_norm": 0.02157001533468999, "learning_rate": 6.6598528167389205e-06, "loss": 0.4463, "step": 37550 }, { "epoch": 1.906575624119506, "grad_norm": 0.02241970910672534, "learning_rate": 6.623865290439068e-06, "loss": 0.4456, "step": 37555 }, { "epoch": 1.9068294602175375, "grad_norm": 0.021738008804449923, "learning_rate": 6.587974611413972e-06, "loss": 0.4634, "step": 37560 }, { "epoch": 1.907083296315569, "grad_norm": 0.021239611101420534, "learning_rate": 6.552180786708828e-06, "loss": 0.4402, "step": 37565 }, { "epoch": 1.9073371324136006, "grad_norm": 0.02101083088232794, "learning_rate": 6.516483823349795e-06, "loss": 0.4725, "step": 37570 }, { "epoch": 1.9075909685116321, "grad_norm": 0.025164203687074636, "learning_rate": 6.480883728343989e-06, "loss": 0.4614, "step": 37575 }, { "epoch": 1.9078448046096637, "grad_norm": 0.023582969514340343, "learning_rate": 6.445380508679488e-06, "loss": 0.4138, "step": 37580 }, { "epoch": 1.9080986407076952, "grad_norm": 0.018915533270275835, "learning_rate": 6.4099741713254945e-06, "loss": 0.432, "step": 37585 }, { "epoch": 1.9083524768057265, "grad_norm": 0.02275092358064681, "learning_rate": 6.374664723232004e-06, "loss": 0.4535, "step": 37590 }, { "epoch": 1.908606312903758, "grad_norm": 0.022085053146593946, "learning_rate": 6.33945217133014e-06, "loss": 0.4422, "step": 37595 }, { "epoch": 1.9088601490017896, "grad_norm": 0.020867827787237652, "learning_rate": 6.304336522531928e-06, "loss": 0.462, "step": 37600 }, { "epoch": 1.9091139850998209, "grad_norm": 0.024270037364154493, "learning_rate": 6.26931778373041e-06, "loss": 0.4475, "step": 37605 }, { "epoch": 1.9093678211978524, "grad_norm": 0.019787562403379675, "learning_rate": 6.234395961799588e-06, "loss": 0.4427, "step": 37610 }, { "epoch": 1.909621657295884, "grad_norm": 0.021795151563263483, "learning_rate": 6.199571063594423e-06, "loss": 0.4609, "step": 37615 }, { "epoch": 1.9098754933939155, "grad_norm": 0.025237377838803695, "learning_rate": 6.164843095950889e-06, "loss": 0.4763, "step": 37620 }, { "epoch": 1.910129329491947, "grad_norm": 0.0213538500394289, "learning_rate": 6.13021206568587e-06, "loss": 0.4668, "step": 37625 }, { "epoch": 1.9103831655899786, "grad_norm": 0.019996110614226716, "learning_rate": 6.095677979597314e-06, "loss": 0.4321, "step": 37630 }, { "epoch": 1.91063700168801, "grad_norm": 0.0239910603152135, "learning_rate": 6.0612408444640775e-06, "loss": 0.4418, "step": 37635 }, { "epoch": 1.9108908377860416, "grad_norm": 0.01982225962400516, "learning_rate": 6.026900667045976e-06, "loss": 0.452, "step": 37640 }, { "epoch": 1.9111446738840732, "grad_norm": 0.020978657467764606, "learning_rate": 5.992657454083839e-06, "loss": 0.4343, "step": 37645 }, { "epoch": 1.9113985099821047, "grad_norm": 0.021612862899188023, "learning_rate": 5.958511212299455e-06, "loss": 0.4522, "step": 37650 }, { "epoch": 1.911652346080136, "grad_norm": 0.02111789726848896, "learning_rate": 5.9244619483955206e-06, "loss": 0.4635, "step": 37655 }, { "epoch": 1.9119061821781675, "grad_norm": 0.026163308653677195, "learning_rate": 5.890509669055799e-06, "loss": 0.4711, "step": 37660 }, { "epoch": 1.912160018276199, "grad_norm": 0.021019925153574526, "learning_rate": 5.856654380944848e-06, "loss": 0.4837, "step": 37665 }, { "epoch": 1.9124138543742304, "grad_norm": 0.027602896310674144, "learning_rate": 5.822896090708407e-06, "loss": 0.4239, "step": 37670 }, { "epoch": 1.912667690472262, "grad_norm": 0.021167535487248926, "learning_rate": 5.789234804972954e-06, "loss": 0.4552, "step": 37675 }, { "epoch": 1.9129215265702935, "grad_norm": 0.023086996155861482, "learning_rate": 5.755670530346146e-06, "loss": 0.4514, "step": 37680 }, { "epoch": 1.913175362668325, "grad_norm": 0.022681878507942586, "learning_rate": 5.722203273416326e-06, "loss": 0.4658, "step": 37685 }, { "epoch": 1.9134291987663565, "grad_norm": 0.02359410198455559, "learning_rate": 5.6888330407531275e-06, "loss": 0.4964, "step": 37690 }, { "epoch": 1.913683034864388, "grad_norm": 0.03200500971381236, "learning_rate": 5.6555598389068656e-06, "loss": 0.4776, "step": 37695 }, { "epoch": 1.9139368709624196, "grad_norm": 0.022712836996455054, "learning_rate": 5.622383674408871e-06, "loss": 0.4376, "step": 37700 }, { "epoch": 1.9141907070604511, "grad_norm": 0.025060341466893926, "learning_rate": 5.589304553771546e-06, "loss": 0.458, "step": 37705 }, { "epoch": 1.9144445431584827, "grad_norm": 0.030278170291182304, "learning_rate": 5.556322483488086e-06, "loss": 0.4499, "step": 37710 }, { "epoch": 1.9146983792565142, "grad_norm": 0.02740154808328099, "learning_rate": 5.523437470032755e-06, "loss": 0.4415, "step": 37715 }, { "epoch": 1.9149522153545455, "grad_norm": 0.02153574679548554, "learning_rate": 5.4906495198607246e-06, "loss": 0.4241, "step": 37720 }, { "epoch": 1.915206051452577, "grad_norm": 0.017912518417006917, "learning_rate": 5.457958639408067e-06, "loss": 0.4443, "step": 37725 }, { "epoch": 1.9154598875506086, "grad_norm": 0.02149002570361707, "learning_rate": 5.425364835091817e-06, "loss": 0.4269, "step": 37730 }, { "epoch": 1.9157137236486401, "grad_norm": 0.020655007242153935, "learning_rate": 5.392868113310023e-06, "loss": 0.4742, "step": 37735 }, { "epoch": 1.9159675597466714, "grad_norm": 0.02053084592337424, "learning_rate": 5.3604684804416385e-06, "loss": 0.4562, "step": 37740 }, { "epoch": 1.916221395844703, "grad_norm": 0.0241087565783307, "learning_rate": 5.328165942846519e-06, "loss": 0.4665, "step": 37745 }, { "epoch": 1.9164752319427345, "grad_norm": 0.022566883002394126, "learning_rate": 5.2959605068654825e-06, "loss": 0.4356, "step": 37750 }, { "epoch": 1.916729068040766, "grad_norm": 0.02242988197114318, "learning_rate": 5.263852178820305e-06, "loss": 0.4291, "step": 37755 }, { "epoch": 1.9169829041387976, "grad_norm": 0.020849242044697184, "learning_rate": 5.231840965013668e-06, "loss": 0.4608, "step": 37760 }, { "epoch": 1.917236740236829, "grad_norm": 0.02353852633397079, "learning_rate": 5.199926871729321e-06, "loss": 0.476, "step": 37765 }, { "epoch": 1.9174905763348606, "grad_norm": 0.022019434675896826, "learning_rate": 5.1681099052317545e-06, "loss": 0.4421, "step": 37770 }, { "epoch": 1.9177444124328922, "grad_norm": 0.02349710133273863, "learning_rate": 5.136390071766472e-06, "loss": 0.4404, "step": 37775 }, { "epoch": 1.9179982485309237, "grad_norm": 0.023170632986613243, "learning_rate": 5.104767377559938e-06, "loss": 0.4633, "step": 37780 }, { "epoch": 1.9182520846289552, "grad_norm": 0.020601169329142837, "learning_rate": 5.073241828819519e-06, "loss": 0.4633, "step": 37785 }, { "epoch": 1.9185059207269866, "grad_norm": 0.020705639764346858, "learning_rate": 5.041813431733544e-06, "loss": 0.441, "step": 37790 }, { "epoch": 1.918759756825018, "grad_norm": 0.02556826389734915, "learning_rate": 5.010482192471244e-06, "loss": 0.4671, "step": 37795 }, { "epoch": 1.9190135929230496, "grad_norm": 0.025036129806181157, "learning_rate": 4.9792481171828105e-06, "loss": 0.4469, "step": 37800 }, { "epoch": 1.919267429021081, "grad_norm": 0.022980559401410746, "learning_rate": 4.948111211999284e-06, "loss": 0.454, "step": 37805 }, { "epoch": 1.9195212651191125, "grad_norm": 0.02211175879102414, "learning_rate": 4.917071483032665e-06, "loss": 0.4545, "step": 37810 }, { "epoch": 1.919775101217144, "grad_norm": 0.021321225074512732, "learning_rate": 4.886128936375966e-06, "loss": 0.4651, "step": 37815 }, { "epoch": 1.9200289373151755, "grad_norm": 0.021856946004611112, "learning_rate": 4.855283578103054e-06, "loss": 0.4445, "step": 37820 }, { "epoch": 1.920282773413207, "grad_norm": 0.020187327224539992, "learning_rate": 4.824535414268638e-06, "loss": 0.4632, "step": 37825 }, { "epoch": 1.9205366095112386, "grad_norm": 0.023155717064296796, "learning_rate": 4.793884450908559e-06, "loss": 0.4565, "step": 37830 }, { "epoch": 1.9207904456092701, "grad_norm": 0.023520053434749763, "learning_rate": 4.763330694039281e-06, "loss": 0.4615, "step": 37835 }, { "epoch": 1.9210442817073017, "grad_norm": 0.02152645989454147, "learning_rate": 4.7328741496585615e-06, "loss": 0.4672, "step": 37840 }, { "epoch": 1.9212981178053332, "grad_norm": 0.020920871384161287, "learning_rate": 4.7025148237446745e-06, "loss": 0.462, "step": 37845 }, { "epoch": 1.9215519539033648, "grad_norm": 0.02171301262060699, "learning_rate": 4.672252722257076e-06, "loss": 0.4595, "step": 37850 }, { "epoch": 1.921805790001396, "grad_norm": 0.02813041922076858, "learning_rate": 4.642087851136123e-06, "loss": 0.4462, "step": 37855 }, { "epoch": 1.9220596260994276, "grad_norm": 0.025961513788743708, "learning_rate": 4.61202021630297e-06, "loss": 0.4425, "step": 37860 }, { "epoch": 1.9223134621974591, "grad_norm": 0.031507528395826975, "learning_rate": 4.582049823659673e-06, "loss": 0.4624, "step": 37865 }, { "epoch": 1.9225672982954904, "grad_norm": 0.022048534282361677, "learning_rate": 4.55217667908947e-06, "loss": 0.467, "step": 37870 }, { "epoch": 1.922821134393522, "grad_norm": 0.02306257762735943, "learning_rate": 4.522400788456115e-06, "loss": 0.4674, "step": 37875 }, { "epoch": 1.9230749704915535, "grad_norm": 0.02387832694700697, "learning_rate": 4.492722157604545e-06, "loss": 0.4673, "step": 37880 }, { "epoch": 1.923328806589585, "grad_norm": 0.020366760561243185, "learning_rate": 4.463140792360487e-06, "loss": 0.4333, "step": 37885 }, { "epoch": 1.9235826426876166, "grad_norm": 0.02130836354004238, "learning_rate": 4.433656698530741e-06, "loss": 0.4599, "step": 37890 }, { "epoch": 1.9238364787856481, "grad_norm": 0.0206503846500878, "learning_rate": 4.404269881902734e-06, "loss": 0.4508, "step": 37895 }, { "epoch": 1.9240903148836797, "grad_norm": 0.02300274852597502, "learning_rate": 4.374980348245072e-06, "loss": 0.4446, "step": 37900 }, { "epoch": 1.9243441509817112, "grad_norm": 0.02138956368703685, "learning_rate": 4.345788103307047e-06, "loss": 0.4589, "step": 37905 }, { "epoch": 1.9245979870797427, "grad_norm": 0.02227297400993538, "learning_rate": 4.316693152819018e-06, "loss": 0.4654, "step": 37910 }, { "epoch": 1.9248518231777743, "grad_norm": 0.022468692731550267, "learning_rate": 4.287695502492139e-06, "loss": 0.4457, "step": 37915 }, { "epoch": 1.9251056592758056, "grad_norm": 0.02069126082899075, "learning_rate": 4.25879515801858e-06, "loss": 0.4567, "step": 37920 }, { "epoch": 1.925359495373837, "grad_norm": 0.019826909262503828, "learning_rate": 4.229992125071192e-06, "loss": 0.4453, "step": 37925 }, { "epoch": 1.9256133314718686, "grad_norm": 0.020987878267737438, "learning_rate": 4.201286409304006e-06, "loss": 0.4454, "step": 37930 }, { "epoch": 1.9258671675699, "grad_norm": 0.020420855142907778, "learning_rate": 4.172678016351683e-06, "loss": 0.433, "step": 37935 }, { "epoch": 1.9261210036679315, "grad_norm": 0.02284228322904161, "learning_rate": 4.1441669518300086e-06, "loss": 0.4631, "step": 37940 }, { "epoch": 1.926374839765963, "grad_norm": 0.0222567772359531, "learning_rate": 4.115753221335561e-06, "loss": 0.4523, "step": 37945 }, { "epoch": 1.9266286758639946, "grad_norm": 0.023189951687417994, "learning_rate": 4.087436830445768e-06, "loss": 0.4458, "step": 37950 }, { "epoch": 1.926882511962026, "grad_norm": 0.021658902190008947, "learning_rate": 4.059217784719016e-06, "loss": 0.4459, "step": 37955 }, { "epoch": 1.9271363480600576, "grad_norm": 0.024217930442230858, "learning_rate": 4.0310960896945415e-06, "loss": 0.4538, "step": 37960 }, { "epoch": 1.9273901841580892, "grad_norm": 0.022206278190921094, "learning_rate": 4.003071750892595e-06, "loss": 0.4552, "step": 37965 }, { "epoch": 1.9276440202561207, "grad_norm": 0.025828497485879003, "learning_rate": 3.9751447738140545e-06, "loss": 0.4645, "step": 37970 }, { "epoch": 1.9278978563541522, "grad_norm": 0.019836230031236992, "learning_rate": 3.9473151639409235e-06, "loss": 0.4585, "step": 37975 }, { "epoch": 1.9281516924521838, "grad_norm": 0.020662066999979745, "learning_rate": 3.919582926735999e-06, "loss": 0.454, "step": 37980 }, { "epoch": 1.928405528550215, "grad_norm": 0.022723364615094117, "learning_rate": 3.891948067643036e-06, "loss": 0.4606, "step": 37985 }, { "epoch": 1.9286593646482466, "grad_norm": 0.023952908845065374, "learning_rate": 3.864410592086587e-06, "loss": 0.4559, "step": 37990 }, { "epoch": 1.9289132007462781, "grad_norm": 0.025670760204192115, "learning_rate": 3.836970505472104e-06, "loss": 0.4263, "step": 37995 }, { "epoch": 1.9291670368443097, "grad_norm": 0.02416027669647824, "learning_rate": 3.8096278131859452e-06, "loss": 0.4594, "step": 38000 }, { "epoch": 1.929420872942341, "grad_norm": 0.02400148828175322, "learning_rate": 3.7823825205953177e-06, "loss": 0.4519, "step": 38005 }, { "epoch": 1.9296747090403725, "grad_norm": 0.020877060418883157, "learning_rate": 3.755234633048388e-06, "loss": 0.4399, "step": 38010 }, { "epoch": 1.929928545138404, "grad_norm": 0.021079166677847018, "learning_rate": 3.7281841558741147e-06, "loss": 0.4526, "step": 38015 }, { "epoch": 1.9301823812364356, "grad_norm": 0.02979341790679115, "learning_rate": 3.7012310943824178e-06, "loss": 0.4559, "step": 38020 }, { "epoch": 1.9304362173344671, "grad_norm": 0.021472840172437824, "learning_rate": 3.6743754538640093e-06, "loss": 0.4546, "step": 38025 }, { "epoch": 1.9306900534324987, "grad_norm": 0.024965054424825203, "learning_rate": 3.6476172395905615e-06, "loss": 0.4608, "step": 38030 }, { "epoch": 1.9309438895305302, "grad_norm": 0.030534410696193816, "learning_rate": 3.6209564568144837e-06, "loss": 0.4537, "step": 38035 }, { "epoch": 1.9311977256285617, "grad_norm": 0.020565392136713392, "learning_rate": 3.5943931107692563e-06, "loss": 0.4551, "step": 38040 }, { "epoch": 1.9314515617265933, "grad_norm": 0.02990769064293206, "learning_rate": 3.567927206669097e-06, "loss": 0.4534, "step": 38045 }, { "epoch": 1.9317053978246248, "grad_norm": 0.020744114829942508, "learning_rate": 3.5415587497090727e-06, "loss": 0.4606, "step": 38050 }, { "epoch": 1.9319592339226561, "grad_norm": 0.02147231946067232, "learning_rate": 3.515287745065321e-06, "loss": 0.4542, "step": 38055 }, { "epoch": 1.9322130700206877, "grad_norm": 0.02382770440065348, "learning_rate": 3.4891141978945497e-06, "loss": 0.4489, "step": 38060 }, { "epoch": 1.9324669061187192, "grad_norm": 0.02168451146145817, "learning_rate": 3.463038113334538e-06, "loss": 0.4486, "step": 38065 }, { "epoch": 1.9327207422167505, "grad_norm": 0.019029522910771698, "learning_rate": 3.437059496503969e-06, "loss": 0.4532, "step": 38070 }, { "epoch": 1.932974578314782, "grad_norm": 0.02297240339975973, "learning_rate": 3.4111783525022646e-06, "loss": 0.4704, "step": 38075 }, { "epoch": 1.9332284144128136, "grad_norm": 0.029039053764296118, "learning_rate": 3.3853946864097486e-06, "loss": 0.4711, "step": 38080 }, { "epoch": 1.933482250510845, "grad_norm": 0.021336084269481418, "learning_rate": 3.3597085032876505e-06, "loss": 0.4505, "step": 38085 }, { "epoch": 1.9337360866088766, "grad_norm": 0.026153460382982597, "learning_rate": 3.3341198081780487e-06, "loss": 0.4829, "step": 38090 }, { "epoch": 1.9339899227069082, "grad_norm": 0.026443914139286612, "learning_rate": 3.3086286061038697e-06, "loss": 0.46, "step": 38095 }, { "epoch": 1.9342437588049397, "grad_norm": 0.022842151943259267, "learning_rate": 3.283234902068888e-06, "loss": 0.4416, "step": 38100 }, { "epoch": 1.9344975949029712, "grad_norm": 0.020825476467275915, "learning_rate": 3.2579387010577277e-06, "loss": 0.4656, "step": 38105 }, { "epoch": 1.9347514310010028, "grad_norm": 0.021769918904403884, "learning_rate": 3.2327400080359725e-06, "loss": 0.4631, "step": 38110 }, { "epoch": 1.9350052670990343, "grad_norm": 0.02082151349347851, "learning_rate": 3.207638827949999e-06, "loss": 0.4353, "step": 38115 }, { "epoch": 1.9352591031970656, "grad_norm": 0.02513216757499985, "learning_rate": 3.1826351657270323e-06, "loss": 0.4638, "step": 38120 }, { "epoch": 1.9355129392950972, "grad_norm": 0.022971752053904795, "learning_rate": 3.1577290262750912e-06, "loss": 0.455, "step": 38125 }, { "epoch": 1.9357667753931287, "grad_norm": 0.023390667458557612, "learning_rate": 3.1329204144832647e-06, "loss": 0.4503, "step": 38130 }, { "epoch": 1.93602061149116, "grad_norm": 0.027462875293432002, "learning_rate": 3.108209335221268e-06, "loss": 0.4698, "step": 38135 }, { "epoch": 1.9362744475891915, "grad_norm": 0.022880416789241798, "learning_rate": 3.0835957933397774e-06, "loss": 0.4613, "step": 38140 }, { "epoch": 1.936528283687223, "grad_norm": 0.021014225306591475, "learning_rate": 3.0590797936703164e-06, "loss": 0.5413, "step": 38145 }, { "epoch": 1.9367821197852546, "grad_norm": 0.022815293753873547, "learning_rate": 3.034661341025258e-06, "loss": 0.4312, "step": 38150 }, { "epoch": 1.9370359558832861, "grad_norm": 0.021813853966513634, "learning_rate": 3.010340440197823e-06, "loss": 0.455, "step": 38155 }, { "epoch": 1.9372897919813177, "grad_norm": 0.027654537630323616, "learning_rate": 2.986117095962082e-06, "loss": 0.4457, "step": 38160 }, { "epoch": 1.9375436280793492, "grad_norm": 0.020417251873714362, "learning_rate": 2.961991313072898e-06, "loss": 0.4311, "step": 38165 }, { "epoch": 1.9377974641773807, "grad_norm": 0.02126948120782193, "learning_rate": 2.9379630962661496e-06, "loss": 0.4697, "step": 38170 }, { "epoch": 1.9380513002754123, "grad_norm": 0.020504217551342806, "learning_rate": 2.914032450258397e-06, "loss": 0.4503, "step": 38175 }, { "epoch": 1.9383051363734438, "grad_norm": 0.021169339004345845, "learning_rate": 2.890199379747105e-06, "loss": 0.4436, "step": 38180 }, { "epoch": 1.9385589724714751, "grad_norm": 0.027548188156958732, "learning_rate": 2.8664638894105867e-06, "loss": 0.4585, "step": 38185 }, { "epoch": 1.9388128085695067, "grad_norm": 0.02785876405422749, "learning_rate": 2.8428259839079486e-06, "loss": 0.4681, "step": 38190 }, { "epoch": 1.9390666446675382, "grad_norm": 0.025887282870096888, "learning_rate": 2.819285667879312e-06, "loss": 0.4748, "step": 38195 }, { "epoch": 1.9393204807655695, "grad_norm": 0.021934926759944635, "learning_rate": 2.7958429459454817e-06, "loss": 0.4409, "step": 38200 }, { "epoch": 1.939574316863601, "grad_norm": 0.02102254390453021, "learning_rate": 2.7724978227081086e-06, "loss": 0.443, "step": 38205 }, { "epoch": 1.9398281529616326, "grad_norm": 0.025739303931824997, "learning_rate": 2.7492503027496953e-06, "loss": 0.4346, "step": 38210 }, { "epoch": 1.9400819890596641, "grad_norm": 0.020415985192957675, "learning_rate": 2.726100390633757e-06, "loss": 0.4278, "step": 38215 }, { "epoch": 1.9403358251576956, "grad_norm": 0.027445653367167453, "learning_rate": 2.7030480909043254e-06, "loss": 0.4677, "step": 38220 }, { "epoch": 1.9405896612557272, "grad_norm": 0.022194649303696412, "learning_rate": 2.680093408086559e-06, "loss": 0.4588, "step": 38225 }, { "epoch": 1.9408434973537587, "grad_norm": 0.03334788531458536, "learning_rate": 2.6572363466863534e-06, "loss": 0.48, "step": 38230 }, { "epoch": 1.9410973334517903, "grad_norm": 0.02165643766225538, "learning_rate": 2.6344769111903975e-06, "loss": 0.4869, "step": 38235 }, { "epoch": 1.9413511695498218, "grad_norm": 0.02482685913614366, "learning_rate": 2.6118151060662842e-06, "loss": 0.4267, "step": 38240 }, { "epoch": 1.9416050056478533, "grad_norm": 0.02207024691983292, "learning_rate": 2.589250935762344e-06, "loss": 0.4325, "step": 38245 }, { "epoch": 1.9418588417458846, "grad_norm": 0.021736842944571975, "learning_rate": 2.566784404707867e-06, "loss": 0.444, "step": 38250 }, { "epoch": 1.9421126778439162, "grad_norm": 0.033892642112711965, "learning_rate": 2.5444155173129368e-06, "loss": 0.4529, "step": 38255 }, { "epoch": 1.9423665139419477, "grad_norm": 0.021770668922768112, "learning_rate": 2.52214427796843e-06, "loss": 0.4422, "step": 38260 }, { "epoch": 1.9426203500399792, "grad_norm": 0.02338794948994561, "learning_rate": 2.499970691046127e-06, "loss": 0.4577, "step": 38265 }, { "epoch": 1.9428741861380106, "grad_norm": 0.0211563138278504, "learning_rate": 2.4778947608984915e-06, "loss": 0.4691, "step": 38270 }, { "epoch": 1.943128022236042, "grad_norm": 0.021156732461866418, "learning_rate": 2.4559164918590005e-06, "loss": 0.4505, "step": 38275 }, { "epoch": 1.9433818583340736, "grad_norm": 0.029251366916451853, "learning_rate": 2.4340358882418144e-06, "loss": 0.436, "step": 38280 }, { "epoch": 1.9436356944321052, "grad_norm": 0.020800337122009502, "learning_rate": 2.412252954342109e-06, "loss": 0.4635, "step": 38285 }, { "epoch": 1.9438895305301367, "grad_norm": 0.020582842457094158, "learning_rate": 2.3905676944356303e-06, "loss": 0.4379, "step": 38290 }, { "epoch": 1.9441433666281682, "grad_norm": 0.021231715330240406, "learning_rate": 2.36898011277914e-06, "loss": 0.4536, "step": 38295 }, { "epoch": 1.9443972027261998, "grad_norm": 0.019707268959821594, "learning_rate": 2.3474902136101927e-06, "loss": 0.4483, "step": 38300 }, { "epoch": 1.9446510388242313, "grad_norm": 0.021565038396861153, "learning_rate": 2.3260980011470258e-06, "loss": 0.4508, "step": 38305 }, { "epoch": 1.9449048749222628, "grad_norm": 0.022307843520456463, "learning_rate": 2.304803479589057e-06, "loss": 0.4469, "step": 38310 }, { "epoch": 1.9451587110202944, "grad_norm": 0.021662290602322585, "learning_rate": 2.2836066531161104e-06, "loss": 0.4584, "step": 38315 }, { "epoch": 1.9454125471183257, "grad_norm": 0.022824581912878204, "learning_rate": 2.2625075258890793e-06, "loss": 0.4524, "step": 38320 }, { "epoch": 1.9456663832163572, "grad_norm": 0.02383320091907116, "learning_rate": 2.2415061020495954e-06, "loss": 0.4545, "step": 38325 }, { "epoch": 1.9459202193143887, "grad_norm": 0.02259973050640163, "learning_rate": 2.2206023857201386e-06, "loss": 0.4395, "step": 38330 }, { "epoch": 1.94617405541242, "grad_norm": 0.02451602070891827, "learning_rate": 2.199796381004038e-06, "loss": 0.4463, "step": 38335 }, { "epoch": 1.9464278915104516, "grad_norm": 0.02350649241970694, "learning_rate": 2.1790880919853595e-06, "loss": 0.4571, "step": 38340 }, { "epoch": 1.9466817276084831, "grad_norm": 0.024863563292605854, "learning_rate": 2.1584775227290745e-06, "loss": 0.4537, "step": 38345 }, { "epoch": 1.9469355637065147, "grad_norm": 0.026608746542194056, "learning_rate": 2.1379646772808903e-06, "loss": 0.4525, "step": 38350 }, { "epoch": 1.9471893998045462, "grad_norm": 0.021420118326157717, "learning_rate": 2.11754955966742e-06, "loss": 0.4788, "step": 38355 }, { "epoch": 1.9474432359025777, "grad_norm": 0.021686725777661547, "learning_rate": 2.0972321738960687e-06, "loss": 0.4555, "step": 38360 }, { "epoch": 1.9476970720006093, "grad_norm": 0.020239535964856954, "learning_rate": 2.0770125239549797e-06, "loss": 0.4275, "step": 38365 }, { "epoch": 1.9479509080986408, "grad_norm": 0.026279491549929514, "learning_rate": 2.0568906138132002e-06, "loss": 0.4728, "step": 38370 }, { "epoch": 1.9482047441966723, "grad_norm": 0.03912414768353642, "learning_rate": 2.0368664474205157e-06, "loss": 0.4767, "step": 38375 }, { "epoch": 1.9484585802947039, "grad_norm": 0.02377861368004799, "learning_rate": 2.01694002870767e-06, "loss": 0.4395, "step": 38380 }, { "epoch": 1.9487124163927352, "grad_norm": 0.022829057653313226, "learning_rate": 1.997111361586035e-06, "loss": 0.4665, "step": 38385 }, { "epoch": 1.9489662524907667, "grad_norm": 0.02234878999035154, "learning_rate": 1.9773804499478854e-06, "loss": 0.4479, "step": 38390 }, { "epoch": 1.9492200885887982, "grad_norm": 0.020446858573344908, "learning_rate": 1.957747297666346e-06, "loss": 0.4451, "step": 38395 }, { "epoch": 1.9494739246868296, "grad_norm": 0.022150719815722406, "learning_rate": 1.9382119085952777e-06, "loss": 0.467, "step": 38400 }, { "epoch": 1.949727760784861, "grad_norm": 0.02395305631634785, "learning_rate": 1.9187742865693915e-06, "loss": 0.4446, "step": 38405 }, { "epoch": 1.9499815968828926, "grad_norm": 0.025226111441204044, "learning_rate": 1.899434435404135e-06, "loss": 0.4212, "step": 38410 }, { "epoch": 1.9502354329809242, "grad_norm": 0.026080819122294808, "learning_rate": 1.8801923588959157e-06, "loss": 0.43, "step": 38415 }, { "epoch": 1.9504892690789557, "grad_norm": 0.027147258519150435, "learning_rate": 1.8610480608218239e-06, "loss": 0.4371, "step": 38420 }, { "epoch": 1.9507431051769872, "grad_norm": 0.02461769872993647, "learning_rate": 1.842001544939742e-06, "loss": 0.47, "step": 38425 }, { "epoch": 1.9509969412750188, "grad_norm": 0.020951902456712867, "learning_rate": 1.8230528149884573e-06, "loss": 0.4701, "step": 38430 }, { "epoch": 1.9512507773730503, "grad_norm": 0.026972795277747482, "learning_rate": 1.80420187468755e-06, "loss": 0.4404, "step": 38435 }, { "epoch": 1.9515046134710818, "grad_norm": 0.021207557562487, "learning_rate": 1.7854487277372822e-06, "loss": 0.4366, "step": 38440 }, { "epoch": 1.9517584495691134, "grad_norm": 0.02498355979793343, "learning_rate": 1.7667933778188206e-06, "loss": 0.4516, "step": 38445 }, { "epoch": 1.9520122856671447, "grad_norm": 0.02192218125038996, "learning_rate": 1.7482358285941803e-06, "loss": 0.4627, "step": 38450 }, { "epoch": 1.9522661217651762, "grad_norm": 0.029378815413494033, "learning_rate": 1.729776083706003e-06, "loss": 0.4461, "step": 38455 }, { "epoch": 1.9525199578632078, "grad_norm": 0.024581468794183266, "learning_rate": 1.7114141467779454e-06, "loss": 0.454, "step": 38460 }, { "epoch": 1.952773793961239, "grad_norm": 0.025894600364455185, "learning_rate": 1.693150021414347e-06, "loss": 0.4724, "step": 38465 }, { "epoch": 1.9530276300592706, "grad_norm": 0.02278420673605267, "learning_rate": 1.6749837112003398e-06, "loss": 0.4594, "step": 38470 }, { "epoch": 1.9532814661573021, "grad_norm": 0.02509311215738138, "learning_rate": 1.656915219701849e-06, "loss": 0.4309, "step": 38475 }, { "epoch": 1.9535353022553337, "grad_norm": 0.02132393990165603, "learning_rate": 1.6389445504657041e-06, "loss": 0.4423, "step": 38480 }, { "epoch": 1.9537891383533652, "grad_norm": 0.02171837487424093, "learning_rate": 1.621071707019417e-06, "loss": 0.4358, "step": 38485 }, { "epoch": 1.9540429744513967, "grad_norm": 0.024981217751365643, "learning_rate": 1.6032966928713477e-06, "loss": 0.4675, "step": 38490 }, { "epoch": 1.9542968105494283, "grad_norm": 0.020893534776536122, "learning_rate": 1.5856195115105943e-06, "loss": 0.4433, "step": 38495 }, { "epoch": 1.9545506466474598, "grad_norm": 0.02068830461015465, "learning_rate": 1.5680401664072141e-06, "loss": 0.4629, "step": 38500 }, { "epoch": 1.9548044827454913, "grad_norm": 0.020953616835626587, "learning_rate": 1.5505586610118361e-06, "loss": 0.4689, "step": 38505 }, { "epoch": 1.9550583188435229, "grad_norm": 0.02440341051189393, "learning_rate": 1.5331749987560484e-06, "loss": 0.461, "step": 38510 }, { "epoch": 1.9553121549415542, "grad_norm": 0.029598548396459087, "learning_rate": 1.5158891830521215e-06, "loss": 0.431, "step": 38515 }, { "epoch": 1.9555659910395857, "grad_norm": 0.02717560220840279, "learning_rate": 1.4987012172932301e-06, "loss": 0.4535, "step": 38520 }, { "epoch": 1.9558198271376173, "grad_norm": 0.02370839493798561, "learning_rate": 1.481611104853231e-06, "loss": 0.4735, "step": 38525 }, { "epoch": 1.9560736632356488, "grad_norm": 0.022848184848146453, "learning_rate": 1.4646188490869405e-06, "loss": 0.4754, "step": 38530 }, { "epoch": 1.95632749933368, "grad_norm": 0.022225861131520797, "learning_rate": 1.4477244533297463e-06, "loss": 0.4466, "step": 38535 }, { "epoch": 1.9565813354317116, "grad_norm": 0.022152077073412883, "learning_rate": 1.4309279208979398e-06, "loss": 0.4698, "step": 38540 }, { "epoch": 1.9568351715297432, "grad_norm": 0.02524987360341103, "learning_rate": 1.414229255088606e-06, "loss": 0.4294, "step": 38545 }, { "epoch": 1.9570890076277747, "grad_norm": 0.023994085753204972, "learning_rate": 1.3976284591796783e-06, "loss": 0.4485, "step": 38550 }, { "epoch": 1.9573428437258062, "grad_norm": 0.021722894693704954, "learning_rate": 1.381125536429717e-06, "loss": 0.4456, "step": 38555 }, { "epoch": 1.9575966798238378, "grad_norm": 0.024344062652594294, "learning_rate": 1.3647204900782417e-06, "loss": 0.4338, "step": 38560 }, { "epoch": 1.9578505159218693, "grad_norm": 0.021929490605915293, "learning_rate": 1.3484133233454544e-06, "loss": 0.4643, "step": 38565 }, { "epoch": 1.9581043520199009, "grad_norm": 0.02177586782728187, "learning_rate": 1.3322040394323498e-06, "loss": 0.4649, "step": 38570 }, { "epoch": 1.9583581881179324, "grad_norm": 0.020581067246549605, "learning_rate": 1.3160926415207163e-06, "loss": 0.4485, "step": 38575 }, { "epoch": 1.9586120242159637, "grad_norm": 0.0215530463340169, "learning_rate": 1.300079132773191e-06, "loss": 0.4468, "step": 38580 }, { "epoch": 1.9588658603139952, "grad_norm": 0.024185145615631558, "learning_rate": 1.2841635163330922e-06, "loss": 0.4886, "step": 38585 }, { "epoch": 1.9591196964120268, "grad_norm": 0.02437935153018092, "learning_rate": 1.268345795324588e-06, "loss": 0.469, "step": 38590 }, { "epoch": 1.9593735325100583, "grad_norm": 0.023546706097560363, "learning_rate": 1.252625972852639e-06, "loss": 0.4888, "step": 38595 }, { "epoch": 1.9596273686080896, "grad_norm": 0.021139727672287015, "learning_rate": 1.237004052002999e-06, "loss": 0.4337, "step": 38600 }, { "epoch": 1.9598812047061211, "grad_norm": 0.03423369843513143, "learning_rate": 1.221480035842104e-06, "loss": 0.4259, "step": 38605 }, { "epoch": 1.9601350408041527, "grad_norm": 0.021383194616096283, "learning_rate": 1.2060539274172944e-06, "loss": 0.4357, "step": 38610 }, { "epoch": 1.9603888769021842, "grad_norm": 0.020286977515740528, "learning_rate": 1.1907257297566477e-06, "loss": 0.4446, "step": 38615 }, { "epoch": 1.9606427130002158, "grad_norm": 0.02403465823618508, "learning_rate": 1.1754954458689238e-06, "loss": 0.4564, "step": 38620 }, { "epoch": 1.9608965490982473, "grad_norm": 0.02086613709755337, "learning_rate": 1.1603630787438424e-06, "loss": 0.4625, "step": 38625 }, { "epoch": 1.9611503851962788, "grad_norm": 0.02145705596640427, "learning_rate": 1.1453286313517498e-06, "loss": 0.4622, "step": 38630 }, { "epoch": 1.9614042212943104, "grad_norm": 0.0198366436003919, "learning_rate": 1.130392106643896e-06, "loss": 0.4296, "step": 38635 }, { "epoch": 1.961658057392342, "grad_norm": 0.021595570641432995, "learning_rate": 1.1155535075522138e-06, "loss": 0.4395, "step": 38640 }, { "epoch": 1.9619118934903734, "grad_norm": 0.027632249043748877, "learning_rate": 1.1008128369894288e-06, "loss": 0.4571, "step": 38645 }, { "epoch": 1.9621657295884047, "grad_norm": 0.0211592206862124, "learning_rate": 1.0861700978490596e-06, "loss": 0.4516, "step": 38650 }, { "epoch": 1.9624195656864363, "grad_norm": 0.020641952024909045, "learning_rate": 1.0716252930054737e-06, "loss": 0.4525, "step": 38655 }, { "epoch": 1.9626734017844678, "grad_norm": 0.02239625096519365, "learning_rate": 1.0571784253136652e-06, "loss": 0.4538, "step": 38660 }, { "epoch": 1.9629272378824991, "grad_norm": 0.020935572931792722, "learning_rate": 1.0428294976094766e-06, "loss": 0.4726, "step": 38665 }, { "epoch": 1.9631810739805307, "grad_norm": 0.023797580850831307, "learning_rate": 1.0285785127095993e-06, "loss": 0.4591, "step": 38670 }, { "epoch": 1.9634349100785622, "grad_norm": 0.021663436246309797, "learning_rate": 1.0144254734113511e-06, "loss": 0.439, "step": 38675 }, { "epoch": 1.9636887461765937, "grad_norm": 0.02399748053716495, "learning_rate": 1.00037038249301e-06, "loss": 0.467, "step": 38680 }, { "epoch": 1.9639425822746253, "grad_norm": 0.02209544242205649, "learning_rate": 9.864132427134243e-07, "loss": 0.4573, "step": 38685 }, { "epoch": 1.9641964183726568, "grad_norm": 0.02180806139548943, "learning_rate": 9.725540568122915e-07, "loss": 0.4393, "step": 38690 }, { "epoch": 1.9644502544706883, "grad_norm": 0.025112167834570206, "learning_rate": 9.587928275102132e-07, "loss": 0.4367, "step": 38695 }, { "epoch": 1.9647040905687199, "grad_norm": 0.019834862643478415, "learning_rate": 9.451295575083618e-07, "loss": 0.4682, "step": 38700 }, { "epoch": 1.9649579266667514, "grad_norm": 0.019828424235274936, "learning_rate": 9.315642494888144e-07, "loss": 0.424, "step": 38705 }, { "epoch": 1.965211762764783, "grad_norm": 0.023011146520013428, "learning_rate": 9.180969061143851e-07, "loss": 0.4488, "step": 38710 }, { "epoch": 1.9654655988628142, "grad_norm": 0.021083449030767323, "learning_rate": 9.047275300285706e-07, "loss": 0.4656, "step": 38715 }, { "epoch": 1.9657194349608458, "grad_norm": 0.028229164879771167, "learning_rate": 8.914561238557717e-07, "loss": 0.4336, "step": 38720 }, { "epoch": 1.9659732710588773, "grad_norm": 0.023561467883931243, "learning_rate": 8.78282690201071e-07, "loss": 0.4416, "step": 38725 }, { "epoch": 1.9662271071569086, "grad_norm": 0.0231890769370734, "learning_rate": 8.652072316503446e-07, "loss": 0.4391, "step": 38730 }, { "epoch": 1.9664809432549402, "grad_norm": 0.023528700345852093, "learning_rate": 8.52229750770317e-07, "loss": 0.4776, "step": 38735 }, { "epoch": 1.9667347793529717, "grad_norm": 0.022550152791175105, "learning_rate": 8.39350250108284e-07, "loss": 0.4482, "step": 38740 }, { "epoch": 1.9669886154510032, "grad_norm": 0.018641810249880507, "learning_rate": 8.265687321925009e-07, "loss": 0.4285, "step": 38745 }, { "epoch": 1.9672424515490348, "grad_norm": 0.02746049418696759, "learning_rate": 8.138851995319608e-07, "loss": 0.4298, "step": 38750 }, { "epoch": 1.9674962876470663, "grad_norm": 0.023113644657525075, "learning_rate": 8.012996546162277e-07, "loss": 0.4573, "step": 38755 }, { "epoch": 1.9677501237450978, "grad_norm": 0.02536365354716488, "learning_rate": 7.888120999159365e-07, "loss": 0.4669, "step": 38760 }, { "epoch": 1.9680039598431294, "grad_norm": 0.031051578737683758, "learning_rate": 7.764225378822377e-07, "loss": 0.4395, "step": 38765 }, { "epoch": 1.968257795941161, "grad_norm": 0.022412814259141482, "learning_rate": 7.641309709471855e-07, "loss": 0.4616, "step": 38770 }, { "epoch": 1.9685116320391924, "grad_norm": 0.029004560100377053, "learning_rate": 7.51937401523517e-07, "loss": 0.4527, "step": 38775 }, { "epoch": 1.9687654681372238, "grad_norm": 0.03144197827525452, "learning_rate": 7.398418320048173e-07, "loss": 0.4587, "step": 38780 }, { "epoch": 1.9690193042352553, "grad_norm": 0.020675890106864256, "learning_rate": 7.278442647653538e-07, "loss": 0.4294, "step": 38785 }, { "epoch": 1.9692731403332868, "grad_norm": 0.022404177027982874, "learning_rate": 7.159447021601872e-07, "loss": 0.4414, "step": 38790 }, { "epoch": 1.9695269764313181, "grad_norm": 0.025733305949821667, "learning_rate": 7.041431465251713e-07, "loss": 0.4477, "step": 38795 }, { "epoch": 1.9697808125293497, "grad_norm": 0.027375264236320213, "learning_rate": 6.924396001768418e-07, "loss": 0.4665, "step": 38800 }, { "epoch": 1.9700346486273812, "grad_norm": 0.025948495378095977, "learning_rate": 6.808340654125833e-07, "loss": 0.4424, "step": 38805 }, { "epoch": 1.9702884847254127, "grad_norm": 0.023666766635241906, "learning_rate": 6.693265445105179e-07, "loss": 0.4416, "step": 38810 }, { "epoch": 1.9705423208234443, "grad_norm": 0.02132357302519558, "learning_rate": 6.579170397294498e-07, "loss": 0.4253, "step": 38815 }, { "epoch": 1.9707961569214758, "grad_norm": 0.030128825836747055, "learning_rate": 6.466055533090875e-07, "loss": 0.4195, "step": 38820 }, { "epoch": 1.9710499930195073, "grad_norm": 0.02602764759746211, "learning_rate": 6.35392087469766e-07, "loss": 0.442, "step": 38825 }, { "epoch": 1.9713038291175389, "grad_norm": 0.021881181041248757, "learning_rate": 6.24276644412669e-07, "loss": 0.473, "step": 38830 }, { "epoch": 1.9715576652155704, "grad_norm": 0.02185523352546431, "learning_rate": 6.132592263196623e-07, "loss": 0.43, "step": 38835 }, { "epoch": 1.971811501313602, "grad_norm": 0.021680403099536016, "learning_rate": 6.023398353534604e-07, "loss": 0.4553, "step": 38840 }, { "epoch": 1.9720653374116333, "grad_norm": 0.021974257862993504, "learning_rate": 5.915184736574597e-07, "loss": 0.4562, "step": 38845 }, { "epoch": 1.9723191735096648, "grad_norm": 0.02096949718491153, "learning_rate": 5.807951433557946e-07, "loss": 0.4408, "step": 38850 }, { "epoch": 1.9725730096076963, "grad_norm": 0.025939230078918414, "learning_rate": 5.701698465534477e-07, "loss": 0.456, "step": 38855 }, { "epoch": 1.9728268457057279, "grad_norm": 0.02828624799294119, "learning_rate": 5.596425853361397e-07, "loss": 0.4327, "step": 38860 }, { "epoch": 1.9730806818037592, "grad_norm": 0.022004046110511963, "learning_rate": 5.492133617702733e-07, "loss": 0.4511, "step": 38865 }, { "epoch": 1.9733345179017907, "grad_norm": 0.024590538540810992, "learning_rate": 5.388821779030994e-07, "loss": 0.4676, "step": 38870 }, { "epoch": 1.9735883539998222, "grad_norm": 0.021191903724915806, "learning_rate": 5.286490357624962e-07, "loss": 0.4506, "step": 38875 }, { "epoch": 1.9738421900978538, "grad_norm": 0.023563631620566922, "learning_rate": 5.185139373572456e-07, "loss": 0.4451, "step": 38880 }, { "epoch": 1.9740960261958853, "grad_norm": 0.023529886583346264, "learning_rate": 5.084768846768117e-07, "loss": 0.4457, "step": 38885 }, { "epoch": 1.9743498622939168, "grad_norm": 0.023884990638485995, "learning_rate": 4.985378796913964e-07, "loss": 0.4755, "step": 38890 }, { "epoch": 1.9746036983919484, "grad_norm": 0.02209835269662074, "learning_rate": 4.886969243519391e-07, "loss": 0.4252, "step": 38895 }, { "epoch": 1.97485753448998, "grad_norm": 0.019159617450172008, "learning_rate": 4.789540205902831e-07, "loss": 0.4405, "step": 38900 }, { "epoch": 1.9751113705880115, "grad_norm": 0.02540563968338361, "learning_rate": 4.6930917031878796e-07, "loss": 0.4234, "step": 38905 }, { "epoch": 1.975365206686043, "grad_norm": 0.025584914989898507, "learning_rate": 4.597623754307723e-07, "loss": 0.4422, "step": 38910 }, { "epoch": 1.9756190427840743, "grad_norm": 0.02296434045482897, "learning_rate": 4.5031363780023705e-07, "loss": 0.4342, "step": 38915 }, { "epoch": 1.9758728788821058, "grad_norm": 0.023745961805210634, "learning_rate": 4.4096295928186534e-07, "loss": 0.4681, "step": 38920 }, { "epoch": 1.9761267149801374, "grad_norm": 0.023731819927084494, "learning_rate": 4.3171034171113346e-07, "loss": 0.4512, "step": 38925 }, { "epoch": 1.9763805510781687, "grad_norm": 0.022054760692146153, "learning_rate": 4.225557869043661e-07, "loss": 0.4677, "step": 38930 }, { "epoch": 1.9766343871762002, "grad_norm": 0.023331327072565897, "learning_rate": 4.134992966584594e-07, "loss": 0.4595, "step": 38935 }, { "epoch": 1.9768882232742317, "grad_norm": 0.022221322234984905, "learning_rate": 4.0454087275121344e-07, "loss": 0.4274, "step": 38940 }, { "epoch": 1.9771420593722633, "grad_norm": 0.028686436657490943, "learning_rate": 3.956805169411659e-07, "loss": 0.4756, "step": 38945 }, { "epoch": 1.9773958954702948, "grad_norm": 0.021950488480769904, "learning_rate": 3.8691823096748126e-07, "loss": 0.4263, "step": 38950 }, { "epoch": 1.9776497315683264, "grad_norm": 0.021704365417338094, "learning_rate": 3.7825401655017246e-07, "loss": 0.4912, "step": 38955 }, { "epoch": 1.9779035676663579, "grad_norm": 0.029412474606072804, "learning_rate": 3.6968787538999016e-07, "loss": 0.446, "step": 38960 }, { "epoch": 1.9781574037643894, "grad_norm": 0.021330314142649562, "learning_rate": 3.6121980916842265e-07, "loss": 0.4515, "step": 38965 }, { "epoch": 1.978411239862421, "grad_norm": 0.02002666201390011, "learning_rate": 3.528498195476959e-07, "loss": 0.4289, "step": 38970 }, { "epoch": 1.9786650759604525, "grad_norm": 0.022747671492206325, "learning_rate": 3.445779081708844e-07, "loss": 0.4598, "step": 38975 }, { "epoch": 1.9789189120584838, "grad_norm": 0.023336518609922925, "learning_rate": 3.3640407666157835e-07, "loss": 0.4739, "step": 38980 }, { "epoch": 1.9791727481565153, "grad_norm": 0.03376020799973892, "learning_rate": 3.283283266243831e-07, "loss": 0.4483, "step": 38985 }, { "epoch": 1.9794265842545469, "grad_norm": 0.02132524288935295, "learning_rate": 3.203506596444194e-07, "loss": 0.463, "step": 38990 }, { "epoch": 1.9796804203525782, "grad_norm": 0.020945363773474973, "learning_rate": 3.1247107728776815e-07, "loss": 0.46, "step": 38995 }, { "epoch": 1.9799342564506097, "grad_norm": 0.019771955108709015, "learning_rate": 3.046895811011363e-07, "loss": 0.4515, "step": 39000 }, { "epoch": 1.9801880925486413, "grad_norm": 0.02332612793278335, "learning_rate": 2.970061726119133e-07, "loss": 0.434, "step": 39005 }, { "epoch": 1.9804419286466728, "grad_norm": 0.022591788630783205, "learning_rate": 2.894208533283371e-07, "loss": 0.4332, "step": 39010 }, { "epoch": 1.9806957647447043, "grad_norm": 0.020037110918590357, "learning_rate": 2.8193362473943885e-07, "loss": 0.4296, "step": 39015 }, { "epoch": 1.9809496008427359, "grad_norm": 0.023998278576397646, "learning_rate": 2.7454448831487624e-07, "loss": 0.4527, "step": 39020 }, { "epoch": 1.9812034369407674, "grad_norm": 0.02707422698823243, "learning_rate": 2.672534455051001e-07, "loss": 0.4571, "step": 39025 }, { "epoch": 1.981457273038799, "grad_norm": 0.025204251574380124, "learning_rate": 2.60060497741299e-07, "loss": 0.461, "step": 39030 }, { "epoch": 1.9817111091368305, "grad_norm": 0.024377692549952947, "learning_rate": 2.529656464354546e-07, "loss": 0.4683, "step": 39035 }, { "epoch": 1.981964945234862, "grad_norm": 0.02538043216380801, "learning_rate": 2.459688929802306e-07, "loss": 0.4442, "step": 39040 }, { "epoch": 1.9822187813328933, "grad_norm": 0.02464209180856968, "learning_rate": 2.3907023874897295e-07, "loss": 0.4245, "step": 39045 }, { "epoch": 1.9824726174309248, "grad_norm": 0.023932076143716494, "learning_rate": 2.3226968509598712e-07, "loss": 0.4507, "step": 39050 }, { "epoch": 1.9827264535289564, "grad_norm": 0.020196673491678915, "learning_rate": 2.2556723335609431e-07, "loss": 0.4407, "step": 39055 }, { "epoch": 1.9829802896269877, "grad_norm": 0.022940306957792315, "learning_rate": 2.1896288484496428e-07, "loss": 0.4575, "step": 39060 }, { "epoch": 1.9832341257250192, "grad_norm": 0.028907148506462126, "learning_rate": 2.1245664085906002e-07, "loss": 0.4506, "step": 39065 }, { "epoch": 1.9834879618230508, "grad_norm": 0.023945110409690495, "learning_rate": 2.0604850267547104e-07, "loss": 0.4592, "step": 39070 }, { "epoch": 1.9837417979210823, "grad_norm": 0.028605852547885884, "learning_rate": 1.9973847155208003e-07, "loss": 0.4588, "step": 39075 }, { "epoch": 1.9839956340191138, "grad_norm": 0.02150438379068988, "learning_rate": 1.935265487275073e-07, "loss": 0.4286, "step": 39080 }, { "epoch": 1.9842494701171454, "grad_norm": 0.021743607443387786, "learning_rate": 1.8741273542116633e-07, "loss": 0.4728, "step": 39085 }, { "epoch": 1.984503306215177, "grad_norm": 0.024759023733061148, "learning_rate": 1.8139703283315267e-07, "loss": 0.4691, "step": 39090 }, { "epoch": 1.9847571423132084, "grad_norm": 0.021978062996421237, "learning_rate": 1.7547944214429957e-07, "loss": 0.4413, "step": 39095 }, { "epoch": 1.98501097841124, "grad_norm": 0.02156291605727596, "learning_rate": 1.6965996451623334e-07, "loss": 0.4424, "step": 39100 }, { "epoch": 1.9852648145092715, "grad_norm": 0.019974654547251697, "learning_rate": 1.6393860109120695e-07, "loss": 0.4581, "step": 39105 }, { "epoch": 1.9855186506073028, "grad_norm": 0.023303362723310548, "learning_rate": 1.5831535299243304e-07, "loss": 0.4222, "step": 39110 }, { "epoch": 1.9857724867053343, "grad_norm": 0.021376371336891062, "learning_rate": 1.5279022132358434e-07, "loss": 0.4265, "step": 39115 }, { "epoch": 1.9860263228033659, "grad_norm": 0.07893795121247377, "learning_rate": 1.473632071692932e-07, "loss": 0.4357, "step": 39120 }, { "epoch": 1.9862801589013974, "grad_norm": 0.02242228809324414, "learning_rate": 1.4203431159487413e-07, "loss": 0.4276, "step": 39125 }, { "epoch": 1.9865339949994287, "grad_norm": 0.024029115581471198, "learning_rate": 1.3680353564632375e-07, "loss": 0.4291, "step": 39130 }, { "epoch": 1.9867878310974603, "grad_norm": 0.02328166827725145, "learning_rate": 1.3167088035037632e-07, "loss": 0.4332, "step": 39135 }, { "epoch": 1.9870416671954918, "grad_norm": 0.02357094301240424, "learning_rate": 1.266363467146703e-07, "loss": 0.4545, "step": 39140 }, { "epoch": 1.9872955032935233, "grad_norm": 0.023516549978020867, "learning_rate": 1.216999357273596e-07, "loss": 0.4384, "step": 39145 }, { "epoch": 1.9875493393915549, "grad_norm": 0.025546553659660538, "learning_rate": 1.1686164835744695e-07, "loss": 0.4447, "step": 39150 }, { "epoch": 1.9878031754895864, "grad_norm": 0.018763562747325255, "learning_rate": 1.121214855546726e-07, "loss": 0.4391, "step": 39155 }, { "epoch": 1.988057011587618, "grad_norm": 0.02484040778673898, "learning_rate": 1.074794482495145e-07, "loss": 0.4554, "step": 39160 }, { "epoch": 1.9883108476856495, "grad_norm": 0.020917177794273913, "learning_rate": 1.0293553735318817e-07, "loss": 0.4487, "step": 39165 }, { "epoch": 1.988564683783681, "grad_norm": 0.02066839479802247, "learning_rate": 9.84897537576468e-08, "loss": 0.4241, "step": 39170 }, { "epoch": 1.9888185198817125, "grad_norm": 0.023515586475883914, "learning_rate": 9.414209833552567e-08, "loss": 0.4709, "step": 39175 }, { "epoch": 1.9890723559797439, "grad_norm": 0.024226686054511816, "learning_rate": 8.989257194030876e-08, "loss": 0.4775, "step": 39180 }, { "epoch": 1.9893261920777754, "grad_norm": 0.023019658882310785, "learning_rate": 8.57411754061621e-08, "loss": 0.4539, "step": 39185 }, { "epoch": 1.989580028175807, "grad_norm": 0.02633296041160311, "learning_rate": 8.168790954793392e-08, "loss": 0.4702, "step": 39190 }, { "epoch": 1.9898338642738382, "grad_norm": 0.02531198180541627, "learning_rate": 7.773277516126553e-08, "loss": 0.4541, "step": 39195 }, { "epoch": 1.9900877003718698, "grad_norm": 0.025611896685111196, "learning_rate": 7.38757730225359e-08, "loss": 0.4324, "step": 39200 }, { "epoch": 1.9903415364699013, "grad_norm": 0.02262366655843535, "learning_rate": 7.01169038888616e-08, "loss": 0.4531, "step": 39205 }, { "epoch": 1.9905953725679328, "grad_norm": 0.022614701857616906, "learning_rate": 6.64561684981524e-08, "loss": 0.4504, "step": 39210 }, { "epoch": 1.9908492086659644, "grad_norm": 0.024008714004990543, "learning_rate": 6.289356756888908e-08, "loss": 0.4487, "step": 39215 }, { "epoch": 1.991103044763996, "grad_norm": 0.020603992655022652, "learning_rate": 5.9429101800401174e-08, "loss": 0.4555, "step": 39220 }, { "epoch": 1.9913568808620274, "grad_norm": 0.020889931974897115, "learning_rate": 5.606277187286679e-08, "loss": 0.463, "step": 39225 }, { "epoch": 1.991610716960059, "grad_norm": 0.0313564646039055, "learning_rate": 5.2794578446924145e-08, "loss": 0.4804, "step": 39230 }, { "epoch": 1.9918645530580905, "grad_norm": 0.021498099912898545, "learning_rate": 4.962452216417113e-08, "loss": 0.4825, "step": 39235 }, { "epoch": 1.992118389156122, "grad_norm": 0.02066023063054367, "learning_rate": 4.655260364694325e-08, "loss": 0.4463, "step": 39240 }, { "epoch": 1.9923722252541534, "grad_norm": 0.02131649131072014, "learning_rate": 4.357882349809161e-08, "loss": 0.4666, "step": 39245 }, { "epoch": 1.992626061352185, "grad_norm": 0.02195382258423416, "learning_rate": 4.0703182301482514e-08, "loss": 0.4372, "step": 39250 }, { "epoch": 1.9928798974502164, "grad_norm": 0.020715639914265123, "learning_rate": 3.792568062155333e-08, "loss": 0.4769, "step": 39255 }, { "epoch": 1.9931337335482477, "grad_norm": 0.0206383390200722, "learning_rate": 3.524631900347908e-08, "loss": 0.4327, "step": 39260 }, { "epoch": 1.9933875696462793, "grad_norm": 0.021381354938705657, "learning_rate": 3.266509797328343e-08, "loss": 0.4715, "step": 39265 }, { "epoch": 1.9936414057443108, "grad_norm": 0.021713933568330897, "learning_rate": 3.018201803756115e-08, "loss": 0.4428, "step": 39270 }, { "epoch": 1.9938952418423423, "grad_norm": 0.019288162544681065, "learning_rate": 2.7797079683755666e-08, "loss": 0.4647, "step": 39275 }, { "epoch": 1.9941490779403739, "grad_norm": 0.027710930933847345, "learning_rate": 2.5510283379992505e-08, "loss": 0.4898, "step": 39280 }, { "epoch": 1.9944029140384054, "grad_norm": 0.021943033021282254, "learning_rate": 2.3321629575245862e-08, "loss": 0.4366, "step": 39285 }, { "epoch": 1.994656750136437, "grad_norm": 0.02816046395571235, "learning_rate": 2.1231118699061024e-08, "loss": 0.4599, "step": 39290 }, { "epoch": 1.9949105862344685, "grad_norm": 0.021547960303644455, "learning_rate": 1.9238751161831936e-08, "loss": 0.425, "step": 39295 }, { "epoch": 1.9951644223325, "grad_norm": 0.020328451114982965, "learning_rate": 1.7344527354634655e-08, "loss": 0.4201, "step": 39300 }, { "epoch": 1.9954182584305316, "grad_norm": 0.02182918824731763, "learning_rate": 1.554844764928287e-08, "loss": 0.4637, "step": 39305 }, { "epoch": 1.9956720945285629, "grad_norm": 0.02193159918916923, "learning_rate": 1.3850512398383419e-08, "loss": 0.4707, "step": 39310 }, { "epoch": 1.9959259306265944, "grad_norm": 0.02149565745718913, "learning_rate": 1.225072193516974e-08, "loss": 0.4406, "step": 39315 }, { "epoch": 1.996179766724626, "grad_norm": 0.020693611586431505, "learning_rate": 1.0749076573723927e-08, "loss": 0.4555, "step": 39320 }, { "epoch": 1.9964336028226572, "grad_norm": 0.0239210359312069, "learning_rate": 9.34557660875468e-09, "loss": 0.4554, "step": 39325 }, { "epoch": 1.9966874389206888, "grad_norm": 0.02080480579062484, "learning_rate": 8.040222315819357e-09, "loss": 0.4551, "step": 39330 }, { "epoch": 1.9969412750187203, "grad_norm": 0.023148989474884817, "learning_rate": 6.833013951157429e-09, "loss": 0.4525, "step": 39335 }, { "epoch": 1.9971951111167519, "grad_norm": 0.023818805572660067, "learning_rate": 5.7239517516904925e-09, "loss": 0.4586, "step": 39340 }, { "epoch": 1.9974489472147834, "grad_norm": 0.029865648276199377, "learning_rate": 4.713035935188792e-09, "loss": 0.4767, "step": 39345 }, { "epoch": 1.997702783312815, "grad_norm": 0.020425051877142286, "learning_rate": 3.800266699993671e-09, "loss": 0.444, "step": 39350 }, { "epoch": 1.9979566194108465, "grad_norm": 0.022269852971449254, "learning_rate": 2.9856442253506366e-09, "loss": 0.4632, "step": 39355 }, { "epoch": 1.998210455508878, "grad_norm": 0.02273454463601086, "learning_rate": 2.2691686711318048e-09, "loss": 0.4404, "step": 39360 }, { "epoch": 1.9984642916069095, "grad_norm": 0.021280763318387155, "learning_rate": 1.6508401780024329e-09, "loss": 0.4621, "step": 39365 }, { "epoch": 1.998718127704941, "grad_norm": 0.025913136491658925, "learning_rate": 1.1306588673098972e-09, "loss": 0.4594, "step": 39370 }, { "epoch": 1.9989719638029724, "grad_norm": 0.02119584869294317, "learning_rate": 7.08624841194716e-10, "loss": 0.456, "step": 39375 }, { "epoch": 1.999225799901004, "grad_norm": 0.01982643960905116, "learning_rate": 3.8473818242401594e-10, "loss": 0.436, "step": 39380 }, { "epoch": 1.9994796359990354, "grad_norm": 0.024448883021925015, "learning_rate": 1.5899895472459848e-10, "loss": 0.4621, "step": 39385 }, { "epoch": 1.999733472097067, "grad_norm": 0.027796596702240602, "learning_rate": 3.140720228334004e-11, "loss": 0.4499, "step": 39390 }, { "epoch": 1.9999365409754921, "step": 39394, "total_flos": 3.655598871565828e+18, "train_loss": 0.5567794406680932, "train_runtime": 148446.9136, "train_samples_per_second": 2.123, "train_steps_per_second": 0.265 } ], "logging_steps": 5, "max_steps": 39394, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.655598871565828e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }