smolvox / trainer_state.json
eustlb's picture
eustlb HF Staff
Upload folder using huggingface_hub
6c68170 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1660,
"global_step": 33185,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.0134096730450506e-05,
"grad_norm": 8.0,
"learning_rate": 2e-06,
"loss": 1.334,
"step": 1
},
{
"epoch": 0.0030134096730450506,
"grad_norm": 0.34765625,
"learning_rate": 0.0002,
"loss": 0.9372,
"step": 100
},
{
"epoch": 0.006026819346090101,
"grad_norm": 1.2734375,
"learning_rate": 0.0004,
"loss": 0.6477,
"step": 200
},
{
"epoch": 0.009040229019135152,
"grad_norm": 0.59375,
"learning_rate": 0.0006,
"loss": 0.5967,
"step": 300
},
{
"epoch": 0.012053638692180202,
"grad_norm": 1.40625,
"learning_rate": 0.0008,
"loss": 0.5828,
"step": 400
},
{
"epoch": 0.015067048365225252,
"grad_norm": 0.453125,
"learning_rate": 0.001,
"loss": 0.5774,
"step": 500
},
{
"epoch": 0.018080458038270304,
"grad_norm": 0.890625,
"learning_rate": 0.0012,
"loss": 0.557,
"step": 600
},
{
"epoch": 0.021093867711315353,
"grad_norm": 0.396484375,
"learning_rate": 0.0014,
"loss": 0.5573,
"step": 700
},
{
"epoch": 0.024107277384360404,
"grad_norm": 0.416015625,
"learning_rate": 0.0016,
"loss": 0.5577,
"step": 800
},
{
"epoch": 0.027120687057405453,
"grad_norm": 0.375,
"learning_rate": 0.0018000000000000002,
"loss": 0.563,
"step": 900
},
{
"epoch": 0.030134096730450505,
"grad_norm": 0.58984375,
"learning_rate": 0.002,
"loss": 0.568,
"step": 1000
},
{
"epoch": 0.03314750640349556,
"grad_norm": 0.33203125,
"learning_rate": 0.0019999571252319053,
"loss": 0.5969,
"step": 1100
},
{
"epoch": 0.03616091607654061,
"grad_norm": 0.5234375,
"learning_rate": 0.0019998285050126107,
"loss": 0.6365,
"step": 1200
},
{
"epoch": 0.03917432574958565,
"grad_norm": 0.302734375,
"learning_rate": 0.0019996141515967,
"loss": 0.6351,
"step": 1300
},
{
"epoch": 0.042187735422630705,
"grad_norm": 0.375,
"learning_rate": 0.001999314085407178,
"loss": 0.5537,
"step": 1400
},
{
"epoch": 0.04520114509567576,
"grad_norm": 0.271484375,
"learning_rate": 0.0019989283350335314,
"loss": 0.5484,
"step": 1500
},
{
"epoch": 0.04821455476872081,
"grad_norm": 0.375,
"learning_rate": 0.0019984569372289993,
"loss": 0.5583,
"step": 1600
},
{
"epoch": 0.050022600572547836,
"eval_peoplespeech-clean-transcription_loss": 4.166935443878174,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 15.2594,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.194,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.066,
"step": 1660
},
{
"epoch": 0.05122796444176586,
"grad_norm": 0.2578125,
"learning_rate": 0.0019978999369070737,
"loss": 0.5722,
"step": 1700
},
{
"epoch": 0.054241374114810906,
"grad_norm": 0.3203125,
"learning_rate": 0.001997257387137221,
"loss": 0.5699,
"step": 1800
},
{
"epoch": 0.05725478378785596,
"grad_norm": 0.27734375,
"learning_rate": 0.0019965293491398237,
"loss": 0.595,
"step": 1900
},
{
"epoch": 0.06026819346090101,
"grad_norm": 0.455078125,
"learning_rate": 0.001995715892280349,
"loss": 0.561,
"step": 2000
},
{
"epoch": 0.06328160313394605,
"grad_norm": 0.26171875,
"learning_rate": 0.00199481709406274,
"loss": 0.5553,
"step": 2100
},
{
"epoch": 0.06629501280699111,
"grad_norm": 0.27734375,
"learning_rate": 0.0019938330401220307,
"loss": 0.5668,
"step": 2200
},
{
"epoch": 0.06930842248003616,
"grad_norm": 0.318359375,
"learning_rate": 0.0019927638242161864,
"loss": 0.574,
"step": 2300
},
{
"epoch": 0.07232183215308122,
"grad_norm": 0.1767578125,
"learning_rate": 0.001991609548217171,
"loss": 0.6076,
"step": 2400
},
{
"epoch": 0.07533524182612626,
"grad_norm": 0.18359375,
"learning_rate": 0.001990370322101242,
"loss": 0.6369,
"step": 2500
},
{
"epoch": 0.0783486514991713,
"grad_norm": 0.314453125,
"learning_rate": 0.001989046263938472,
"loss": 0.6106,
"step": 2600
},
{
"epoch": 0.08136206117221637,
"grad_norm": 0.240234375,
"learning_rate": 0.0019876374998814973,
"loss": 0.5993,
"step": 2700
},
{
"epoch": 0.08437547084526141,
"grad_norm": 0.51953125,
"learning_rate": 0.0019861441641535007,
"loss": 0.5933,
"step": 2800
},
{
"epoch": 0.08738888051830647,
"grad_norm": 0.609375,
"learning_rate": 0.001984566399035423,
"loss": 0.5937,
"step": 2900
},
{
"epoch": 0.09040229019135151,
"grad_norm": 0.21484375,
"learning_rate": 0.001982904354852404,
"loss": 0.5881,
"step": 3000
},
{
"epoch": 0.09341569986439656,
"grad_norm": 0.26171875,
"learning_rate": 0.0019811581899594646,
"loss": 0.5817,
"step": 3100
},
{
"epoch": 0.09642910953744162,
"grad_norm": 0.2138671875,
"learning_rate": 0.0019793280707264154,
"loss": 0.588,
"step": 3200
},
{
"epoch": 0.09944251921048666,
"grad_norm": 0.23828125,
"learning_rate": 0.0019774141715220065,
"loss": 0.5813,
"step": 3300
},
{
"epoch": 0.10004520114509567,
"eval_peoplespeech-clean-transcription_loss": 4.31866455078125,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.293,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.478,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07,
"step": 3320
},
{
"epoch": 0.10245592888353172,
"grad_norm": 0.2080078125,
"learning_rate": 0.0019754166746973156,
"loss": 0.5784,
"step": 3400
},
{
"epoch": 0.10546933855657677,
"grad_norm": 0.2119140625,
"learning_rate": 0.0019733357705683705,
"loss": 0.582,
"step": 3500
},
{
"epoch": 0.10848274822962181,
"grad_norm": 0.248046875,
"learning_rate": 0.001971171657398021,
"loss": 0.5877,
"step": 3600
},
{
"epoch": 0.11149615790266687,
"grad_norm": 0.255859375,
"learning_rate": 0.001968924541377045,
"loss": 0.5788,
"step": 3700
},
{
"epoch": 0.11450956757571192,
"grad_norm": 0.271484375,
"learning_rate": 0.001966594636604506,
"loss": 0.5739,
"step": 3800
},
{
"epoch": 0.11752297724875697,
"grad_norm": 0.26171875,
"learning_rate": 0.001964182165067352,
"loss": 0.5748,
"step": 3900
},
{
"epoch": 0.12053638692180202,
"grad_norm": 7.28125,
"learning_rate": 0.001961687356619266,
"loss": 0.5746,
"step": 4000
},
{
"epoch": 0.12354979659484706,
"grad_norm": 0.25,
"learning_rate": 0.001959110448958769,
"loss": 0.5877,
"step": 4100
},
{
"epoch": 0.1265632062678921,
"grad_norm": 0.2421875,
"learning_rate": 0.001956451687606567,
"loss": 0.5652,
"step": 4200
},
{
"epoch": 0.12957661594093717,
"grad_norm": 0.26171875,
"learning_rate": 0.0019537113258821636,
"loss": 0.5842,
"step": 4300
},
{
"epoch": 0.13259002561398223,
"grad_norm": 0.2158203125,
"learning_rate": 0.001950889624879722,
"loss": 0.5687,
"step": 4400
},
{
"epoch": 0.13560343528702729,
"grad_norm": 0.32421875,
"learning_rate": 0.0019479868534431892,
"loss": 0.5715,
"step": 4500
},
{
"epoch": 0.13861684496007232,
"grad_norm": 0.251953125,
"learning_rate": 0.001945003288140681,
"loss": 0.5811,
"step": 4600
},
{
"epoch": 0.14163025463311738,
"grad_norm": 0.44921875,
"learning_rate": 0.0019419392132381317,
"loss": 0.5936,
"step": 4700
},
{
"epoch": 0.14464366430616243,
"grad_norm": 0.24609375,
"learning_rate": 0.0019387949206722099,
"loss": 0.5861,
"step": 4800
},
{
"epoch": 0.14765707397920746,
"grad_norm": 0.255859375,
"learning_rate": 0.0019355707100225034,
"loss": 0.5867,
"step": 4900
},
{
"epoch": 0.1500678017176435,
"eval_peoplespeech-clean-transcription_loss": 4.08488130569458,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.7738,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.332,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068,
"step": 4980
},
{
"epoch": 0.15067048365225252,
"grad_norm": 0.22265625,
"learning_rate": 0.0019322668884829768,
"loss": 0.5827,
"step": 5000
},
{
"epoch": 0.15368389332529758,
"grad_norm": 0.267578125,
"learning_rate": 0.0019288837708327019,
"loss": 0.5829,
"step": 5100
},
{
"epoch": 0.1566973029983426,
"grad_norm": 0.2236328125,
"learning_rate": 0.0019254216794058665,
"loss": 0.574,
"step": 5200
},
{
"epoch": 0.15971071267138767,
"grad_norm": 0.232421875,
"learning_rate": 0.0019218809440610645,
"loss": 0.5907,
"step": 5300
},
{
"epoch": 0.16272412234443273,
"grad_norm": 0.234375,
"learning_rate": 0.0019182619021498664,
"loss": 0.5736,
"step": 5400
},
{
"epoch": 0.1657375320174778,
"grad_norm": 0.2451171875,
"learning_rate": 0.001914564898484678,
"loss": 0.586,
"step": 5500
},
{
"epoch": 0.16875094169052282,
"grad_norm": 0.2080078125,
"learning_rate": 0.0019107902853058875,
"loss": 0.583,
"step": 5600
},
{
"epoch": 0.17176435136356788,
"grad_norm": 0.2314453125,
"learning_rate": 0.0019069384222483061,
"loss": 0.589,
"step": 5700
},
{
"epoch": 0.17477776103661294,
"grad_norm": 0.390625,
"learning_rate": 0.0019030096763069007,
"loss": 0.569,
"step": 5800
},
{
"epoch": 0.17779117070965797,
"grad_norm": 0.24609375,
"learning_rate": 0.0018990044218018295,
"loss": 0.5914,
"step": 5900
},
{
"epoch": 0.18080458038270303,
"grad_norm": 0.205078125,
"learning_rate": 0.0018949230403427768,
"loss": 0.5936,
"step": 6000
},
{
"epoch": 0.1838179900557481,
"grad_norm": 0.2353515625,
"learning_rate": 0.0018907659207925951,
"loss": 0.5959,
"step": 6100
},
{
"epoch": 0.18683139972879312,
"grad_norm": 0.220703125,
"learning_rate": 0.0018865334592302553,
"loss": 0.5734,
"step": 6200
},
{
"epoch": 0.18984480940183818,
"grad_norm": 0.2431640625,
"learning_rate": 0.0018822260589131075,
"loss": 0.5815,
"step": 6300
},
{
"epoch": 0.19285821907488324,
"grad_norm": 0.2158203125,
"learning_rate": 0.0018778441302384629,
"loss": 0.58,
"step": 6400
},
{
"epoch": 0.1958716287479283,
"grad_norm": 0.2734375,
"learning_rate": 0.0018733880907044892,
"loss": 0.5807,
"step": 6500
},
{
"epoch": 0.19888503842097333,
"grad_norm": 0.2216796875,
"learning_rate": 0.0018688583648704348,
"loss": 0.5741,
"step": 6600
},
{
"epoch": 0.20009040229019134,
"eval_peoplespeech-clean-transcription_loss": 4.054948806762695,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.5864,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.388,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 6640
},
{
"epoch": 0.20189844809401838,
"grad_norm": 0.263671875,
"learning_rate": 0.0018642553843161765,
"loss": 0.5808,
"step": 6700
},
{
"epoch": 0.20491185776706344,
"grad_norm": 0.1962890625,
"learning_rate": 0.0018595795876011011,
"loss": 0.572,
"step": 6800
},
{
"epoch": 0.20792526744010847,
"grad_norm": 0.224609375,
"learning_rate": 0.001854831420222319,
"loss": 0.5738,
"step": 6900
},
{
"epoch": 0.21093867711315353,
"grad_norm": 0.18359375,
"learning_rate": 0.001850011334572219,
"loss": 0.5631,
"step": 7000
},
{
"epoch": 0.2139520867861986,
"grad_norm": 0.2001953125,
"learning_rate": 0.0018451197898953675,
"loss": 0.5656,
"step": 7100
},
{
"epoch": 0.21696549645924362,
"grad_norm": 0.1708984375,
"learning_rate": 0.0018401572522447499,
"loss": 0.5501,
"step": 7200
},
{
"epoch": 0.21997890613228868,
"grad_norm": 0.1845703125,
"learning_rate": 0.0018351241944373684,
"loss": 0.5487,
"step": 7300
},
{
"epoch": 0.22299231580533374,
"grad_norm": 0.14453125,
"learning_rate": 0.0018300210960091926,
"loss": 0.535,
"step": 7400
},
{
"epoch": 0.2260057254783788,
"grad_norm": 0.111328125,
"learning_rate": 0.0018248484431694705,
"loss": 0.5265,
"step": 7500
},
{
"epoch": 0.22901913515142383,
"grad_norm": 0.05126953125,
"learning_rate": 0.0018196067287544043,
"loss": 0.4819,
"step": 7600
},
{
"epoch": 0.2320325448244689,
"grad_norm": 0.04296875,
"learning_rate": 0.0018142964521801936,
"loss": 0.4168,
"step": 7700
},
{
"epoch": 0.23504595449751395,
"grad_norm": 0.033447265625,
"learning_rate": 0.001808918119395454,
"loss": 0.3548,
"step": 7800
},
{
"epoch": 0.23805936417055898,
"grad_norm": 0.03271484375,
"learning_rate": 0.0018034722428330089,
"loss": 0.3206,
"step": 7900
},
{
"epoch": 0.24107277384360404,
"grad_norm": 0.0274658203125,
"learning_rate": 0.0017979593413610688,
"loss": 0.3043,
"step": 8000
},
{
"epoch": 0.2440861835166491,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0017923799402337944,
"loss": 0.2899,
"step": 8100
},
{
"epoch": 0.24709959318969413,
"grad_norm": 0.028076171875,
"learning_rate": 0.0017867345710412504,
"loss": 0.2772,
"step": 8200
},
{
"epoch": 0.2501130028627392,
"grad_norm": 0.030029296875,
"learning_rate": 0.00178102377165876,
"loss": 0.2692,
"step": 8300
},
{
"epoch": 0.2501130028627392,
"eval_peoplespeech-clean-transcription_loss": 1.788245677947998,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7068,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.669,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 8300
},
{
"epoch": 0.2531264125357842,
"grad_norm": 0.0262451171875,
"learning_rate": 0.0017752480861956536,
"loss": 0.2649,
"step": 8400
},
{
"epoch": 0.2561398222088293,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0017694080649434314,
"loss": 0.2574,
"step": 8500
},
{
"epoch": 0.25915323188187434,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0017635042643233307,
"loss": 0.2522,
"step": 8600
},
{
"epoch": 0.26216664155491937,
"grad_norm": 0.02880859375,
"learning_rate": 0.0017575372468333127,
"loss": 0.2487,
"step": 8700
},
{
"epoch": 0.26518005122796445,
"grad_norm": 0.0230712890625,
"learning_rate": 0.001751507580994468,
"loss": 0.2407,
"step": 8800
},
{
"epoch": 0.2681934609010095,
"grad_norm": 0.031005859375,
"learning_rate": 0.0017454158412968522,
"loss": 0.238,
"step": 8900
},
{
"epoch": 0.27120687057405457,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0017392626081447465,
"loss": 0.2347,
"step": 9000
},
{
"epoch": 0.2742202802470996,
"grad_norm": 0.0306396484375,
"learning_rate": 0.0017330484678013609,
"loss": 0.2343,
"step": 9100
},
{
"epoch": 0.27723368992014463,
"grad_norm": 0.0262451171875,
"learning_rate": 0.0017267740123329753,
"loss": 0.2324,
"step": 9200
},
{
"epoch": 0.2802470995931897,
"grad_norm": 0.02880859375,
"learning_rate": 0.0017204398395525308,
"loss": 0.2294,
"step": 9300
},
{
"epoch": 0.28326050926623475,
"grad_norm": 0.025390625,
"learning_rate": 0.0017140465529626692,
"loss": 0.2278,
"step": 9400
},
{
"epoch": 0.2862739189392798,
"grad_norm": 0.027587890625,
"learning_rate": 0.0017075947616982349,
"loss": 0.2247,
"step": 9500
},
{
"epoch": 0.28928732861232487,
"grad_norm": 0.024658203125,
"learning_rate": 0.001701085080468237,
"loss": 0.2204,
"step": 9600
},
{
"epoch": 0.2923007382853699,
"grad_norm": 0.030029296875,
"learning_rate": 0.0016945181294972828,
"loss": 0.2201,
"step": 9700
},
{
"epoch": 0.29531414795841493,
"grad_norm": 0.025634765625,
"learning_rate": 0.0016878945344664831,
"loss": 0.2196,
"step": 9800
},
{
"epoch": 0.29832755763146,
"grad_norm": 0.0279541015625,
"learning_rate": 0.0016812149264538402,
"loss": 0.2163,
"step": 9900
},
{
"epoch": 0.300135603435287,
"eval_peoplespeech-clean-transcription_loss": 1.6014918088912964,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7355,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.659,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 9960
},
{
"epoch": 0.30134096730450505,
"grad_norm": 0.0257568359375,
"learning_rate": 0.0016744799418741193,
"loss": 0.2143,
"step": 10000
},
{
"epoch": 0.3043543769775501,
"grad_norm": 0.027099609375,
"learning_rate": 0.001667690222418214,
"loss": 0.214,
"step": 10100
},
{
"epoch": 0.30736778665059517,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0016608464149920064,
"loss": 0.2111,
"step": 10200
},
{
"epoch": 0.3103811963236402,
"grad_norm": 0.02734375,
"learning_rate": 0.0016539491716547332,
"loss": 0.2124,
"step": 10300
},
{
"epoch": 0.3133946059966852,
"grad_norm": 0.025146484375,
"learning_rate": 0.0016469991495568573,
"loss": 0.2071,
"step": 10400
},
{
"epoch": 0.3164080156697303,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0016399970108774587,
"loss": 0.2106,
"step": 10500
},
{
"epoch": 0.31942142534277534,
"grad_norm": 0.025634765625,
"learning_rate": 0.001632943422761141,
"loss": 0.2075,
"step": 10600
},
{
"epoch": 0.3224348350158204,
"grad_norm": 0.0269775390625,
"learning_rate": 0.0016258390572544716,
"loss": 0.2065,
"step": 10700
},
{
"epoch": 0.32544824468886546,
"grad_norm": 0.024169921875,
"learning_rate": 0.001618684591241946,
"loss": 0.2065,
"step": 10800
},
{
"epoch": 0.3284616543619105,
"grad_norm": 0.026123046875,
"learning_rate": 0.0016114807063815008,
"loss": 0.2055,
"step": 10900
},
{
"epoch": 0.3314750640349556,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0016042280890395642,
"loss": 0.2043,
"step": 11000
},
{
"epoch": 0.3344884737080006,
"grad_norm": 0.02685546875,
"learning_rate": 0.0015969274302256621,
"loss": 0.2006,
"step": 11100
},
{
"epoch": 0.33750188338104564,
"grad_norm": 0.0245361328125,
"learning_rate": 0.00158957942552658,
"loss": 0.2021,
"step": 11200
},
{
"epoch": 0.34051529305409073,
"grad_norm": 0.02783203125,
"learning_rate": 0.00158218477504009,
"loss": 0.2042,
"step": 11300
},
{
"epoch": 0.34352870272713576,
"grad_norm": 0.0257568359375,
"learning_rate": 0.0015747441833082476,
"loss": 0.2043,
"step": 11400
},
{
"epoch": 0.3465421124001808,
"grad_norm": 0.0263671875,
"learning_rate": 0.0015672583592502632,
"loss": 0.1991,
"step": 11500
},
{
"epoch": 0.3495555220732259,
"grad_norm": 0.0281982421875,
"learning_rate": 0.0015597280160949602,
"loss": 0.1994,
"step": 11600
},
{
"epoch": 0.3501582040078349,
"eval_peoplespeech-clean-transcription_loss": 1.5406583547592163,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.5635,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.395,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 11620
},
{
"epoch": 0.3525689317462709,
"grad_norm": 0.0257568359375,
"learning_rate": 0.0015521538713128204,
"loss": 0.2,
"step": 11700
},
{
"epoch": 0.35558234141931594,
"grad_norm": 0.0244140625,
"learning_rate": 0.001544536646547623,
"loss": 0.1978,
"step": 11800
},
{
"epoch": 0.358595751092361,
"grad_norm": 0.0255126953125,
"learning_rate": 0.0015368770675476915,
"loss": 0.1974,
"step": 11900
},
{
"epoch": 0.36160916076540606,
"grad_norm": 0.025390625,
"learning_rate": 0.001529175864096744,
"loss": 0.1963,
"step": 12000
},
{
"epoch": 0.3646225704384511,
"grad_norm": 0.027587890625,
"learning_rate": 0.0015214337699443632,
"loss": 0.1958,
"step": 12100
},
{
"epoch": 0.3676359801114962,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0015136515227360855,
"loss": 0.1974,
"step": 12200
},
{
"epoch": 0.3706493897845412,
"grad_norm": 0.029052734375,
"learning_rate": 0.0015058298639431193,
"loss": 0.1974,
"step": 12300
},
{
"epoch": 0.37366279945758624,
"grad_norm": 0.0245361328125,
"learning_rate": 0.0014979695387917036,
"loss": 0.1924,
"step": 12400
},
{
"epoch": 0.3766762091306313,
"grad_norm": 0.024169921875,
"learning_rate": 0.0014900712961920999,
"loss": 0.1925,
"step": 12500
},
{
"epoch": 0.37968961880367635,
"grad_norm": 0.021484375,
"learning_rate": 0.0014821358886672414,
"loss": 0.1935,
"step": 12600
},
{
"epoch": 0.3827030284767214,
"grad_norm": 0.0252685546875,
"learning_rate": 0.0014741640722810332,
"loss": 0.1925,
"step": 12700
},
{
"epoch": 0.38571643814976647,
"grad_norm": 0.024658203125,
"learning_rate": 0.0014661566065663168,
"loss": 0.1936,
"step": 12800
},
{
"epoch": 0.3887298478228115,
"grad_norm": 0.0263671875,
"learning_rate": 0.0014581142544525052,
"loss": 0.1928,
"step": 12900
},
{
"epoch": 0.3917432574958566,
"grad_norm": 0.025390625,
"learning_rate": 0.0014500377821928911,
"loss": 0.1927,
"step": 13000
},
{
"epoch": 0.3947566671689016,
"grad_norm": 0.0245361328125,
"learning_rate": 0.0014419279592916417,
"loss": 0.1931,
"step": 13100
},
{
"epoch": 0.39777007684194665,
"grad_norm": 0.025390625,
"learning_rate": 0.001433785558430481,
"loss": 0.1903,
"step": 13200
},
{
"epoch": 0.4001808045803827,
"eval_peoplespeech-clean-transcription_loss": 1.5337910652160645,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.9866,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.27,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.067,
"step": 13280
},
{
"epoch": 0.40078348651499174,
"grad_norm": 0.0257568359375,
"learning_rate": 0.0014256113553950739,
"loss": 0.1917,
"step": 13300
},
{
"epoch": 0.40379689618803677,
"grad_norm": 0.023681640625,
"learning_rate": 0.0014174061290011075,
"loss": 0.1893,
"step": 13400
},
{
"epoch": 0.4068103058610818,
"grad_norm": 0.02685546875,
"learning_rate": 0.0014091706610200902,
"loss": 0.1909,
"step": 13500
},
{
"epoch": 0.4098237155341269,
"grad_norm": 0.02294921875,
"learning_rate": 0.0014009057361048665,
"loss": 0.19,
"step": 13600
},
{
"epoch": 0.4128371252071719,
"grad_norm": 0.0274658203125,
"learning_rate": 0.001392612141714856,
"loss": 0.1913,
"step": 13700
},
{
"epoch": 0.41585053488021695,
"grad_norm": 0.02294921875,
"learning_rate": 0.0013842906680410286,
"loss": 0.1898,
"step": 13800
},
{
"epoch": 0.41886394455326204,
"grad_norm": 0.026611328125,
"learning_rate": 0.0013759421079306145,
"loss": 0.1892,
"step": 13900
},
{
"epoch": 0.42187735422630707,
"grad_norm": 0.0252685546875,
"learning_rate": 0.001367567256811567,
"loss": 0.1893,
"step": 14000
},
{
"epoch": 0.4248907638993521,
"grad_norm": 0.02783203125,
"learning_rate": 0.0013591669126167736,
"loss": 0.1898,
"step": 14100
},
{
"epoch": 0.4279041735723972,
"grad_norm": 0.0235595703125,
"learning_rate": 0.001350741875708033,
"loss": 0.1874,
"step": 14200
},
{
"epoch": 0.4309175832454422,
"grad_norm": 0.029541015625,
"learning_rate": 0.0013422929487997973,
"loss": 0.188,
"step": 14300
},
{
"epoch": 0.43393099291848725,
"grad_norm": 0.0252685546875,
"learning_rate": 0.0013338209368826933,
"loss": 0.1879,
"step": 14400
},
{
"epoch": 0.43694440259153233,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0013253266471468235,
"loss": 0.1865,
"step": 14500
},
{
"epoch": 0.43995781226457736,
"grad_norm": 0.0213623046875,
"learning_rate": 0.0013168108889048602,
"loss": 0.1859,
"step": 14600
},
{
"epoch": 0.4429712219376224,
"grad_norm": 0.02685546875,
"learning_rate": 0.0013082744735149366,
"loss": 0.1872,
"step": 14700
},
{
"epoch": 0.4459846316106675,
"grad_norm": 0.0223388671875,
"learning_rate": 0.0012997182143033416,
"loss": 0.1867,
"step": 14800
},
{
"epoch": 0.4489980412837125,
"grad_norm": 0.0279541015625,
"learning_rate": 0.00129114292648703,
"loss": 0.1867,
"step": 14900
},
{
"epoch": 0.45020340515293056,
"eval_peoplespeech-clean-transcription_loss": 1.5441259145736694,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.9015,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.295,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.067,
"step": 14940
},
{
"epoch": 0.4520114509567576,
"grad_norm": 0.0211181640625,
"learning_rate": 0.001282549427095949,
"loss": 0.1866,
"step": 15000
},
{
"epoch": 0.45502486062980263,
"grad_norm": 0.029541015625,
"learning_rate": 0.0012739385348951955,
"loss": 0.1852,
"step": 15100
},
{
"epoch": 0.45803827030284766,
"grad_norm": 0.026123046875,
"learning_rate": 0.0012653110703070055,
"loss": 0.1849,
"step": 15200
},
{
"epoch": 0.46105167997589275,
"grad_norm": 0.030029296875,
"learning_rate": 0.001256667855332587,
"loss": 0.1846,
"step": 15300
},
{
"epoch": 0.4640650896489378,
"grad_norm": 0.0225830078125,
"learning_rate": 0.0012480097134738009,
"loss": 0.185,
"step": 15400
},
{
"epoch": 0.4670784993219828,
"grad_norm": 0.02490234375,
"learning_rate": 0.0012393374696547015,
"loss": 0.1861,
"step": 15500
},
{
"epoch": 0.4700919089950279,
"grad_norm": 0.0223388671875,
"learning_rate": 0.0012306519501429395,
"loss": 0.1877,
"step": 15600
},
{
"epoch": 0.4731053186680729,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0012219539824710357,
"loss": 0.1859,
"step": 15700
},
{
"epoch": 0.47611872834111796,
"grad_norm": 0.02392578125,
"learning_rate": 0.0012132443953575397,
"loss": 0.1847,
"step": 15800
},
{
"epoch": 0.47913213801416304,
"grad_norm": 0.0267333984375,
"learning_rate": 0.0012045240186280676,
"loss": 0.1853,
"step": 15900
},
{
"epoch": 0.4821455476872081,
"grad_norm": 0.0211181640625,
"learning_rate": 0.0011957936831362426,
"loss": 0.185,
"step": 16000
},
{
"epoch": 0.4851589573602531,
"grad_norm": 0.029541015625,
"learning_rate": 0.0011870542206845298,
"loss": 0.1849,
"step": 16100
},
{
"epoch": 0.4881723670332982,
"grad_norm": 0.0213623046875,
"learning_rate": 0.001178306463944987,
"loss": 0.1835,
"step": 16200
},
{
"epoch": 0.4911857767063432,
"grad_norm": 0.026123046875,
"learning_rate": 0.0011695512463799286,
"loss": 0.1837,
"step": 16300
},
{
"epoch": 0.49419918637938826,
"grad_norm": 0.0234375,
"learning_rate": 0.0011607894021625166,
"loss": 0.1847,
"step": 16400
},
{
"epoch": 0.49721259605243334,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0011520217660972811,
"loss": 0.1853,
"step": 16500
},
{
"epoch": 0.5002260057254784,
"grad_norm": 0.02392578125,
"learning_rate": 0.0011432491735405852,
"loss": 0.1827,
"step": 16600
},
{
"epoch": 0.5002260057254784,
"eval_peoplespeech-clean-transcription_loss": 1.5211623907089233,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.7246,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.346,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068,
"step": 16600
},
{
"epoch": 0.5032394153985235,
"grad_norm": 0.0291748046875,
"learning_rate": 0.0011344724603210318,
"loss": 0.1818,
"step": 16700
},
{
"epoch": 0.5062528250715684,
"grad_norm": 0.025146484375,
"learning_rate": 0.0011256924626598297,
"loss": 0.1831,
"step": 16800
},
{
"epoch": 0.5092662347446135,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0011169100170911204,
"loss": 0.184,
"step": 16900
},
{
"epoch": 0.5122796444176586,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0011081259603822747,
"loss": 0.1833,
"step": 17000
},
{
"epoch": 0.5152930540907036,
"grad_norm": 0.0294189453125,
"learning_rate": 0.0010993411294541694,
"loss": 0.1841,
"step": 17100
},
{
"epoch": 0.5183064637637487,
"grad_norm": 0.0223388671875,
"learning_rate": 0.001090556361301446,
"loss": 0.1849,
"step": 17200
},
{
"epoch": 0.5213198734367938,
"grad_norm": 0.0274658203125,
"learning_rate": 0.0010817724929127646,
"loss": 0.1831,
"step": 17300
},
{
"epoch": 0.5243332831098387,
"grad_norm": 0.021484375,
"learning_rate": 0.00107299036119106,
"loss": 0.1822,
"step": 17400
},
{
"epoch": 0.5273466927828838,
"grad_norm": 0.02880859375,
"learning_rate": 0.0010642108028738003,
"loss": 0.1819,
"step": 17500
},
{
"epoch": 0.5303601024559289,
"grad_norm": 0.021728515625,
"learning_rate": 0.0010554346544532672,
"loss": 0.1839,
"step": 17600
},
{
"epoch": 0.5333735121289739,
"grad_norm": 0.031494140625,
"learning_rate": 0.0010466627520968577,
"loss": 0.1858,
"step": 17700
},
{
"epoch": 0.536386921802019,
"grad_norm": 0.02197265625,
"learning_rate": 0.001037895931567414,
"loss": 0.1837,
"step": 17800
},
{
"epoch": 0.539400331475064,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0010291350281435962,
"loss": 0.1819,
"step": 17900
},
{
"epoch": 0.5424137411481091,
"grad_norm": 0.024169921875,
"learning_rate": 0.0010203808765402993,
"loss": 0.1835,
"step": 18000
},
{
"epoch": 0.5454271508211541,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0010116343108291233,
"loss": 0.1828,
"step": 18100
},
{
"epoch": 0.5484405604941992,
"grad_norm": 0.0234375,
"learning_rate": 0.0010028961643589044,
"loss": 0.1802,
"step": 18200
},
{
"epoch": 0.5502486062980262,
"eval_peoplespeech-clean-transcription_loss": 1.5098843574523926,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.7248,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.346,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068,
"step": 18260
},
{
"epoch": 0.5514539701672443,
"grad_norm": 0.0277099609375,
"learning_rate": 0.0009941672696763173,
"loss": 0.1835,
"step": 18300
},
{
"epoch": 0.5544673798402893,
"grad_norm": 0.0255126953125,
"learning_rate": 0.0009854484584465506,
"loss": 0.1815,
"step": 18400
},
{
"epoch": 0.5574807895133344,
"grad_norm": 0.03076171875,
"learning_rate": 0.0009767405613740716,
"loss": 0.1817,
"step": 18500
},
{
"epoch": 0.5604941991863794,
"grad_norm": 0.0230712890625,
"learning_rate": 0.0009680444081234734,
"loss": 0.1822,
"step": 18600
},
{
"epoch": 0.5635076088594244,
"grad_norm": 0.0299072265625,
"learning_rate": 0.0009593608272404317,
"loss": 0.183,
"step": 18700
},
{
"epoch": 0.5665210185324695,
"grad_norm": 0.0213623046875,
"learning_rate": 0.0009506906460727618,
"loss": 0.1813,
"step": 18800
},
{
"epoch": 0.5695344282055146,
"grad_norm": 0.0277099609375,
"learning_rate": 0.0009420346906915895,
"loss": 0.1826,
"step": 18900
},
{
"epoch": 0.5725478378785596,
"grad_norm": 0.021728515625,
"learning_rate": 0.0009333937858126477,
"loss": 0.1799,
"step": 19000
},
{
"epoch": 0.5755612475516046,
"grad_norm": 0.0303955078125,
"learning_rate": 0.0009247687547176979,
"loss": 0.1819,
"step": 19100
},
{
"epoch": 0.5785746572246497,
"grad_norm": 0.021728515625,
"learning_rate": 0.0009161604191760915,
"loss": 0.1804,
"step": 19200
},
{
"epoch": 0.5815880668976947,
"grad_norm": 0.028564453125,
"learning_rate": 0.000907569599366473,
"loss": 0.181,
"step": 19300
},
{
"epoch": 0.5846014765707398,
"grad_norm": 0.0223388671875,
"learning_rate": 0.000898997113798635,
"loss": 0.1798,
"step": 19400
},
{
"epoch": 0.5876148862437849,
"grad_norm": 0.0279541015625,
"learning_rate": 0.0008904437792355364,
"loss": 0.179,
"step": 19500
},
{
"epoch": 0.5906282959168299,
"grad_norm": 0.02392578125,
"learning_rate": 0.0008819104106154776,
"loss": 0.1808,
"step": 19600
},
{
"epoch": 0.593641705589875,
"grad_norm": 0.028076171875,
"learning_rate": 0.0008733978209744609,
"loss": 0.1802,
"step": 19700
},
{
"epoch": 0.59665511526292,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0008649068213687225,
"loss": 0.1813,
"step": 19800
},
{
"epoch": 0.599668524935965,
"grad_norm": 0.0279541015625,
"learning_rate": 0.0008564382207974612,
"loss": 0.1807,
"step": 19900
},
{
"epoch": 0.600271206870574,
"eval_peoplespeech-clean-transcription_loss": 1.5082225799560547,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.0089,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.569,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.071,
"step": 19920
},
{
"epoch": 0.6026819346090101,
"grad_norm": 0.0234375,
"learning_rate": 0.0008479928261257557,
"loss": 0.1807,
"step": 20000
},
{
"epoch": 0.6056953442820552,
"grad_norm": 0.0284423828125,
"learning_rate": 0.0008395714420076905,
"loss": 0.1813,
"step": 20100
},
{
"epoch": 0.6087087539551002,
"grad_norm": 0.0223388671875,
"learning_rate": 0.0008311748708096898,
"loss": 0.1794,
"step": 20200
},
{
"epoch": 0.6117221636281452,
"grad_norm": 0.033935546875,
"learning_rate": 0.0008228039125340721,
"loss": 0.1809,
"step": 20300
},
{
"epoch": 0.6147355733011903,
"grad_norm": 0.0234375,
"learning_rate": 0.0008144593647428254,
"loss": 0.1796,
"step": 20400
},
{
"epoch": 0.6177489829742353,
"grad_norm": 0.0281982421875,
"learning_rate": 0.0008061420224816187,
"loss": 0.1807,
"step": 20500
},
{
"epoch": 0.6207623926472804,
"grad_norm": 0.0242919921875,
"learning_rate": 0.0007978526782040547,
"loss": 0.1806,
"step": 20600
},
{
"epoch": 0.6237758023203255,
"grad_norm": 0.0308837890625,
"learning_rate": 0.0007895921216961628,
"loss": 0.1802,
"step": 20700
},
{
"epoch": 0.6267892119933705,
"grad_norm": 0.0244140625,
"learning_rate": 0.0007813611400011535,
"loss": 0.1806,
"step": 20800
},
{
"epoch": 0.6298026216664155,
"grad_norm": 0.0322265625,
"learning_rate": 0.0007731605173444294,
"loss": 0.1799,
"step": 20900
},
{
"epoch": 0.6328160313394606,
"grad_norm": 0.0228271484375,
"learning_rate": 0.0007649910350588683,
"loss": 0.1797,
"step": 21000
},
{
"epoch": 0.6358294410125056,
"grad_norm": 0.025146484375,
"learning_rate": 0.000756853471510377,
"loss": 0.1779,
"step": 21100
},
{
"epoch": 0.6388428506855507,
"grad_norm": 0.0240478515625,
"learning_rate": 0.0007487486020237337,
"loss": 0.1786,
"step": 21200
},
{
"epoch": 0.6418562603585958,
"grad_norm": 0.0302734375,
"learning_rate": 0.0007406771988087153,
"loss": 0.1795,
"step": 21300
},
{
"epoch": 0.6448696700316408,
"grad_norm": 0.02392578125,
"learning_rate": 0.0007326400308865245,
"loss": 0.1827,
"step": 21400
},
{
"epoch": 0.6478830797046858,
"grad_norm": 0.02978515625,
"learning_rate": 0.0007246378640165184,
"loss": 0.1797,
"step": 21500
},
{
"epoch": 0.6502938074431219,
"eval_peoplespeech-clean-transcription_loss": 1.4861868619918823,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.4228,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.437,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069,
"step": 21580
},
{
"epoch": 0.6508964893777309,
"grad_norm": 0.0218505859375,
"learning_rate": 0.0007166714606232492,
"loss": 0.1812,
"step": 21600
},
{
"epoch": 0.6539098990507759,
"grad_norm": 0.0291748046875,
"learning_rate": 0.0007087415797238248,
"loss": 0.1826,
"step": 21700
},
{
"epoch": 0.656923308723821,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0007008489768555886,
"loss": 0.18,
"step": 21800
},
{
"epoch": 0.6599367183968661,
"grad_norm": 0.0286865234375,
"learning_rate": 0.0006929944040041347,
"loss": 0.1786,
"step": 21900
},
{
"epoch": 0.6629501280699112,
"grad_norm": 0.02294921875,
"learning_rate": 0.0006851786095316618,
"loss": 0.1805,
"step": 22000
},
{
"epoch": 0.6659635377429561,
"grad_norm": 0.0257568359375,
"learning_rate": 0.000677402338105672,
"loss": 0.1786,
"step": 22100
},
{
"epoch": 0.6689769474160012,
"grad_norm": 0.024658203125,
"learning_rate": 0.0006696663306280182,
"loss": 0.181,
"step": 22200
},
{
"epoch": 0.6719903570890463,
"grad_norm": 0.026123046875,
"learning_rate": 0.0006619713241643147,
"loss": 0.1797,
"step": 22300
},
{
"epoch": 0.6750037667620913,
"grad_norm": 0.0263671875,
"learning_rate": 0.0006543180518737122,
"loss": 0.1793,
"step": 22400
},
{
"epoch": 0.6780171764351364,
"grad_norm": 0.02685546875,
"learning_rate": 0.0006467072429390431,
"loss": 0.178,
"step": 22500
},
{
"epoch": 0.6810305861081815,
"grad_norm": 0.02392578125,
"learning_rate": 0.0006391396224973473,
"loss": 0.1793,
"step": 22600
},
{
"epoch": 0.6840439957812264,
"grad_norm": 0.026123046875,
"learning_rate": 0.0006316159115707838,
"loss": 0.1793,
"step": 22700
},
{
"epoch": 0.6870574054542715,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0006241368269979337,
"loss": 0.177,
"step": 22800
},
{
"epoch": 0.6900708151273166,
"grad_norm": 0.028076171875,
"learning_rate": 0.0006167030813654996,
"loss": 0.1781,
"step": 22900
},
{
"epoch": 0.6930842248003616,
"grad_norm": 0.02685546875,
"learning_rate": 0.0006093153829404155,
"loss": 0.1782,
"step": 23000
},
{
"epoch": 0.6960976344734067,
"grad_norm": 0.0296630859375,
"learning_rate": 0.0006019744356023627,
"loss": 0.179,
"step": 23100
},
{
"epoch": 0.6991110441464518,
"grad_norm": 0.0250244140625,
"learning_rate": 0.0005946809387767075,
"loss": 0.1788,
"step": 23200
},
{
"epoch": 0.7003164080156697,
"eval_peoplespeech-clean-transcription_loss": 1.503227710723877,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 14.6551,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.367,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068,
"step": 23240
},
{
"epoch": 0.7021244538194967,
"grad_norm": 0.03125,
"learning_rate": 0.000587435587367861,
"loss": 0.1789,
"step": 23300
},
{
"epoch": 0.7051378634925418,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0005802390716930713,
"loss": 0.1771,
"step": 23400
},
{
"epoch": 0.7081512731655869,
"grad_norm": 0.0311279296875,
"learning_rate": 0.0005730920774166495,
"loss": 0.1793,
"step": 23500
},
{
"epoch": 0.7111646828386319,
"grad_norm": 0.02587890625,
"learning_rate": 0.0005659952854846461,
"loss": 0.1773,
"step": 23600
},
{
"epoch": 0.714178092511677,
"grad_norm": 0.0299072265625,
"learning_rate": 0.0005589493720599683,
"loss": 0.1785,
"step": 23700
},
{
"epoch": 0.717191502184722,
"grad_norm": 0.02587890625,
"learning_rate": 0.0005519550084579583,
"loss": 0.1776,
"step": 23800
},
{
"epoch": 0.720204911857767,
"grad_norm": 0.0279541015625,
"learning_rate": 0.0005450128610824328,
"loss": 0.179,
"step": 23900
},
{
"epoch": 0.7232183215308121,
"grad_norm": 0.0224609375,
"learning_rate": 0.0005381235913621889,
"loss": 0.1779,
"step": 24000
},
{
"epoch": 0.7262317312038572,
"grad_norm": 0.02783203125,
"learning_rate": 0.0005312878556879856,
"loss": 0.1776,
"step": 24100
},
{
"epoch": 0.7292451408769022,
"grad_norm": 0.0267333984375,
"learning_rate": 0.0005245063053500047,
"loss": 0.1796,
"step": 24200
},
{
"epoch": 0.7322585505499473,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0005177795864757979,
"loss": 0.179,
"step": 24300
},
{
"epoch": 0.7352719602229923,
"grad_norm": 0.024169921875,
"learning_rate": 0.0005111083399687246,
"loss": 0.179,
"step": 24400
},
{
"epoch": 0.7382853698960373,
"grad_norm": 0.0264892578125,
"learning_rate": 0.0005044932014468884,
"loss": 0.178,
"step": 24500
},
{
"epoch": 0.7412987795690824,
"grad_norm": 0.0238037109375,
"learning_rate": 0.0004979348011825788,
"loss": 0.1797,
"step": 24600
},
{
"epoch": 0.7443121892421275,
"grad_norm": 0.0264892578125,
"learning_rate": 0.000491433764042219,
"loss": 0.1793,
"step": 24700
},
{
"epoch": 0.7473255989151725,
"grad_norm": 0.025390625,
"learning_rate": 0.0004849907094268304,
"loss": 0.1788,
"step": 24800
},
{
"epoch": 0.7503390085882176,
"grad_norm": 0.02490234375,
"learning_rate": 0.0004786062512130186,
"loss": 0.1784,
"step": 24900
},
{
"epoch": 0.7503390085882176,
"eval_peoplespeech-clean-transcription_loss": 1.5016210079193115,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.8104,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.634,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 24900
},
{
"epoch": 0.7533524182612626,
"grad_norm": 0.0234375,
"learning_rate": 0.00047228099769448437,
"loss": 0.1769,
"step": 25000
},
{
"epoch": 0.7563658279343076,
"grad_norm": 0.0277099609375,
"learning_rate": 0.00046601555152406694,
"loss": 0.1781,
"step": 25100
},
{
"epoch": 0.7593792376073527,
"grad_norm": 0.0238037109375,
"learning_rate": 0.0004598105096563256,
"loss": 0.1773,
"step": 25200
},
{
"epoch": 0.7623926472803978,
"grad_norm": 0.0262451171875,
"learning_rate": 0.00045366646329066243,
"loss": 0.1782,
"step": 25300
},
{
"epoch": 0.7654060569534428,
"grad_norm": 0.0242919921875,
"learning_rate": 0.0004475839978149959,
"loss": 0.1768,
"step": 25400
},
{
"epoch": 0.7684194666264879,
"grad_norm": 0.0269775390625,
"learning_rate": 0.00044156369274998554,
"loss": 0.1776,
"step": 25500
},
{
"epoch": 0.7714328762995329,
"grad_norm": 0.0262451171875,
"learning_rate": 0.00043560612169381583,
"loss": 0.1763,
"step": 25600
},
{
"epoch": 0.7744462859725779,
"grad_norm": 0.0260009765625,
"learning_rate": 0.00042971185226754895,
"loss": 0.1775,
"step": 25700
},
{
"epoch": 0.777459695645623,
"grad_norm": 0.0252685546875,
"learning_rate": 0.00042388144606103926,
"loss": 0.1791,
"step": 25800
},
{
"epoch": 0.7804731053186681,
"grad_norm": 0.0277099609375,
"learning_rate": 0.00041811545857942936,
"loss": 0.1802,
"step": 25900
},
{
"epoch": 0.7834865149917132,
"grad_norm": 0.02392578125,
"learning_rate": 0.00041241443919022124,
"loss": 0.1768,
"step": 26000
},
{
"epoch": 0.7864999246647582,
"grad_norm": 0.02978515625,
"learning_rate": 0.0004067789310709359,
"loss": 0.181,
"step": 26100
},
{
"epoch": 0.7895133343378032,
"grad_norm": 0.02490234375,
"learning_rate": 0.0004012094711573591,
"loss": 0.1794,
"step": 26200
},
{
"epoch": 0.7925267440108483,
"grad_norm": 0.027587890625,
"learning_rate": 0.0003957065900923845,
"loss": 0.1784,
"step": 26300
},
{
"epoch": 0.7955401536838933,
"grad_norm": 0.0245361328125,
"learning_rate": 0.00039027081217545554,
"loss": 0.1777,
"step": 26400
},
{
"epoch": 0.7985535633569384,
"grad_norm": 0.02880859375,
"learning_rate": 0.0003849026553126118,
"loss": 0.1762,
"step": 26500
},
{
"epoch": 0.8003616091607654,
"eval_peoplespeech-clean-transcription_loss": 1.4861080646514893,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7251,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.663,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 26560
},
{
"epoch": 0.8015669730299835,
"grad_norm": 0.0257568359375,
"learning_rate": 0.0003796026309671429,
"loss": 0.1792,
"step": 26600
},
{
"epoch": 0.8045803827030285,
"grad_norm": 0.0267333984375,
"learning_rate": 0.0003743712441108592,
"loss": 0.1795,
"step": 26700
},
{
"epoch": 0.8075937923760735,
"grad_norm": 0.0250244140625,
"learning_rate": 0.00036920899317597976,
"loss": 0.1764,
"step": 26800
},
{
"epoch": 0.8106072020491186,
"grad_norm": 0.0269775390625,
"learning_rate": 0.00036411637000764133,
"loss": 0.18,
"step": 26900
},
{
"epoch": 0.8136206117221636,
"grad_norm": 0.0240478515625,
"learning_rate": 0.00035909385981703777,
"loss": 0.1783,
"step": 27000
},
{
"epoch": 0.8166340213952087,
"grad_norm": 0.02587890625,
"learning_rate": 0.0003541419411351909,
"loss": 0.1801,
"step": 27100
},
{
"epoch": 0.8196474310682538,
"grad_norm": 0.02734375,
"learning_rate": 0.0003492610857673564,
"loss": 0.1781,
"step": 27200
},
{
"epoch": 0.8226608407412987,
"grad_norm": 0.0263671875,
"learning_rate": 0.000344451758748072,
"loss": 0.1772,
"step": 27300
},
{
"epoch": 0.8256742504143438,
"grad_norm": 0.0279541015625,
"learning_rate": 0.00033971441829685036,
"loss": 0.1762,
"step": 27400
},
{
"epoch": 0.8286876600873889,
"grad_norm": 0.0274658203125,
"learning_rate": 0.0003350495157745207,
"loss": 0.1785,
"step": 27500
},
{
"epoch": 0.8317010697604339,
"grad_norm": 0.0245361328125,
"learning_rate": 0.00033045749564022497,
"loss": 0.1778,
"step": 27600
},
{
"epoch": 0.834714479433479,
"grad_norm": 0.0289306640625,
"learning_rate": 0.00032593879540907076,
"loss": 0.1773,
"step": 27700
},
{
"epoch": 0.8377278891065241,
"grad_norm": 0.022705078125,
"learning_rate": 0.0003214938456104454,
"loss": 0.1785,
"step": 27800
},
{
"epoch": 0.840741298779569,
"grad_norm": 0.0274658203125,
"learning_rate": 0.0003171230697469978,
"loss": 0.179,
"step": 27900
},
{
"epoch": 0.8437547084526141,
"grad_norm": 0.026611328125,
"learning_rate": 0.00031282688425428686,
"loss": 0.1778,
"step": 28000
},
{
"epoch": 0.8467681181256592,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0003086056984611053,
"loss": 0.1782,
"step": 28100
},
{
"epoch": 0.8497815277987042,
"grad_norm": 0.0257568359375,
"learning_rate": 0.00030445991455047927,
"loss": 0.1764,
"step": 28200
},
{
"epoch": 0.8503842097333132,
"eval_peoplespeech-clean-transcription_loss": 1.485144853591919,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.3295,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.801,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.075,
"step": 28220
},
{
"epoch": 0.8527949374717493,
"grad_norm": 0.0279541015625,
"learning_rate": 0.000300389927521351,
"loss": 0.178,
"step": 28300
},
{
"epoch": 0.8558083471447944,
"grad_norm": 0.0255126953125,
"learning_rate": 0.0002963961251509423,
"loss": 0.1794,
"step": 28400
},
{
"epoch": 0.8588217568178393,
"grad_norm": 0.0289306640625,
"learning_rate": 0.0002924788879578099,
"loss": 0.1774,
"step": 28500
},
{
"epoch": 0.8618351664908844,
"grad_norm": 0.02734375,
"learning_rate": 0.00028863858916559,
"loss": 0.178,
"step": 28600
},
{
"epoch": 0.8648485761639295,
"grad_norm": 0.0247802734375,
"learning_rate": 0.0002848755946674383,
"loss": 0.1756,
"step": 28700
},
{
"epoch": 0.8678619858369745,
"grad_norm": 0.0264892578125,
"learning_rate": 0.00028119026299116905,
"loss": 0.1783,
"step": 28800
},
{
"epoch": 0.8708753955100196,
"grad_norm": 0.029052734375,
"learning_rate": 0.0002775829452650956,
"loss": 0.1789,
"step": 28900
},
{
"epoch": 0.8738888051830647,
"grad_norm": 0.0262451171875,
"learning_rate": 0.00027405398518457575,
"loss": 0.1763,
"step": 29000
},
{
"epoch": 0.8769022148561096,
"grad_norm": 0.0281982421875,
"learning_rate": 0.0002706037189792652,
"loss": 0.1771,
"step": 29100
},
{
"epoch": 0.8799156245291547,
"grad_norm": 0.0263671875,
"learning_rate": 0.00026723247538108254,
"loss": 0.1795,
"step": 29200
},
{
"epoch": 0.8829290342021998,
"grad_norm": 0.0277099609375,
"learning_rate": 0.00026394057559288856,
"loss": 0.1779,
"step": 29300
},
{
"epoch": 0.8859424438752448,
"grad_norm": 0.02685546875,
"learning_rate": 0.00026072833325788375,
"loss": 0.1779,
"step": 29400
},
{
"epoch": 0.8889558535482899,
"grad_norm": 0.02587890625,
"learning_rate": 0.0002575960544297239,
"loss": 0.1783,
"step": 29500
},
{
"epoch": 0.891969263221335,
"grad_norm": 0.0235595703125,
"learning_rate": 0.0002545440375433609,
"loss": 0.1766,
"step": 29600
},
{
"epoch": 0.8949826728943799,
"grad_norm": 0.030517578125,
"learning_rate": 0.0002515725733866084,
"loss": 0.1782,
"step": 29700
},
{
"epoch": 0.897996082567425,
"grad_norm": 0.024658203125,
"learning_rate": 0.000248681945072437,
"loss": 0.1769,
"step": 29800
},
{
"epoch": 0.9004068103058611,
"eval_peoplespeech-clean-transcription_loss": 1.481724500656128,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.9492,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.588,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072,
"step": 29880
},
{
"epoch": 0.9010094922404701,
"grad_norm": 0.0272216796875,
"learning_rate": 0.000245872428011999,
"loss": 0.1763,
"step": 29900
},
{
"epoch": 0.9040229019135152,
"grad_norm": 0.0255126953125,
"learning_rate": 0.00024314428988838856,
"loss": 0.1764,
"step": 30000
},
{
"epoch": 0.9070363115865602,
"grad_norm": 0.0274658203125,
"learning_rate": 0.000240497790631138,
"loss": 0.1754,
"step": 30100
},
{
"epoch": 0.9100497212596053,
"grad_norm": 0.02587890625,
"learning_rate": 0.00023793318239145138,
"loss": 0.1781,
"step": 30200
},
{
"epoch": 0.9130631309326503,
"grad_norm": 0.0299072265625,
"learning_rate": 0.00023545070951818084,
"loss": 0.1776,
"step": 30300
},
{
"epoch": 0.9160765406056953,
"grad_norm": 0.02392578125,
"learning_rate": 0.00023305060853454597,
"loss": 0.1789,
"step": 30400
},
{
"epoch": 0.9190899502787404,
"grad_norm": 0.0277099609375,
"learning_rate": 0.00023073310811559807,
"loss": 0.1793,
"step": 30500
},
{
"epoch": 0.9221033599517855,
"grad_norm": 0.02685546875,
"learning_rate": 0.00022849842906643277,
"loss": 0.1793,
"step": 30600
},
{
"epoch": 0.9251167696248305,
"grad_norm": 0.024169921875,
"learning_rate": 0.00022634678430115206,
"loss": 0.177,
"step": 30700
},
{
"epoch": 0.9281301792978756,
"grad_norm": 0.0247802734375,
"learning_rate": 0.0002242783788225793,
"loss": 0.1788,
"step": 30800
},
{
"epoch": 0.9311435889709206,
"grad_norm": 0.0257568359375,
"learning_rate": 0.00022229340970272572,
"loss": 0.1777,
"step": 30900
},
{
"epoch": 0.9341569986439656,
"grad_norm": 0.02392578125,
"learning_rate": 0.00022039206606401526,
"loss": 0.1776,
"step": 31000
},
{
"epoch": 0.9371704083170107,
"grad_norm": 0.02880859375,
"learning_rate": 0.0002185745290612646,
"loss": 0.1774,
"step": 31100
},
{
"epoch": 0.9401838179900558,
"grad_norm": 0.0286865234375,
"learning_rate": 0.00021684097186442405,
"loss": 0.1786,
"step": 31200
},
{
"epoch": 0.9431972276631008,
"grad_norm": 0.026123046875,
"learning_rate": 0.0002151915596420774,
"loss": 0.1777,
"step": 31300
},
{
"epoch": 0.9462106373361459,
"grad_norm": 0.0252685546875,
"learning_rate": 0.0002136264495457057,
"loss": 0.1781,
"step": 31400
},
{
"epoch": 0.9492240470091909,
"grad_norm": 0.028076171875,
"learning_rate": 0.00021214579069471447,
"loss": 0.1772,
"step": 31500
},
{
"epoch": 0.9504294108784089,
"eval_peoplespeech-clean-transcription_loss": 1.4812726974487305,
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062,
"eval_peoplespeech-clean-transcription_runtime": 13.7554,
"eval_peoplespeech-clean-transcription_samples_per_second": 4.653,
"eval_peoplespeech-clean-transcription_steps_per_second": 0.073,
"step": 31540
},
{
"epoch": 0.9522374566822359,
"grad_norm": 0.0230712890625,
"learning_rate": 0.0002107497241622257,
"loss": 0.1767,
"step": 31600
},
{
"epoch": 0.955250866355281,
"grad_norm": 0.02685546875,
"learning_rate": 0.00020943838296163657,
"loss": 0.1788,
"step": 31700
},
{
"epoch": 0.9582642760283261,
"grad_norm": 0.026123046875,
"learning_rate": 0.00020821189203394706,
"loss": 0.1783,
"step": 31800
},
{
"epoch": 0.9612776857013711,
"grad_norm": 0.0238037109375,
"learning_rate": 0.00020707036823585488,
"loss": 0.1753,
"step": 31900
},
{
"epoch": 0.9642910953744162,
"grad_norm": 0.027587890625,
"learning_rate": 0.00020601392032862275,
"loss": 0.1752,
"step": 32000
},
{
"epoch": 0.9673045050474612,
"grad_norm": 0.0255126953125,
"learning_rate": 0.00020504264896771505,
"loss": 0.177,
"step": 32100
},
{
"epoch": 0.9703179147205062,
"grad_norm": 0.025634765625,
"learning_rate": 0.00020415664669320817,
"loss": 0.1786,
"step": 32200
},
{
"epoch": 0.9733313243935513,
"grad_norm": 0.0244140625,
"learning_rate": 0.00020335599792097327,
"loss": 0.1764,
"step": 32300
},
{
"epoch": 0.9763447340665964,
"grad_norm": 0.0240478515625,
"learning_rate": 0.00020264077893463362,
"loss": 0.1781,
"step": 32400
},
{
"epoch": 0.9793581437396414,
"grad_norm": 0.0277099609375,
"learning_rate": 0.00020201105787829627,
"loss": 0.1761,
"step": 32500
},
{
"epoch": 0.9823715534126864,
"grad_norm": 0.027099609375,
"learning_rate": 0.00020146689475005947,
"loss": 0.1786,
"step": 32600
},
{
"epoch": 0.9853849630857315,
"grad_norm": 0.0242919921875,
"learning_rate": 0.00020100834139629646,
"loss": 0.1776,
"step": 32700
},
{
"epoch": 0.9883983727587765,
"grad_norm": 0.02685546875,
"learning_rate": 0.00020063544150671555,
"loss": 0.1765,
"step": 32800
},
{
"epoch": 0.9914117824318216,
"grad_norm": 0.026123046875,
"learning_rate": 0.00020034823061019724,
"loss": 0.1768,
"step": 32900
},
{
"epoch": 0.9944251921048667,
"grad_norm": 0.026611328125,
"learning_rate": 0.00020014673607140958,
"loss": 0.1771,
"step": 33000
},
{
"epoch": 0.9974386017779117,
"grad_norm": 0.02490234375,
"learning_rate": 0.00020003097708820057,
"loss": 0.1781,
"step": 33100
}
],
"logging_steps": 100,
"max_steps": 33185,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 8297,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.732431512509769e+19,
"train_batch_size": 672,
"trial_name": null,
"trial_params": null
}