{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13135968705307038, "eval_steps": 200, "global_step": 172400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.43682417860277e-05, "grad_norm": 0.4972322881221771, "learning_rate": 1.9999999990147362e-05, "loss": 1.9714, "step": 100 }, { "epoch": 0.0001487364835720554, "grad_norm": 0.6138768792152405, "learning_rate": 1.9999999958487906e-05, "loss": 1.6983, "step": 200 }, { "epoch": 0.0002231047253580831, "grad_norm": 0.92356276512146, "learning_rate": 1.999999990499435e-05, "loss": 1.6566, "step": 300 }, { "epoch": 0.0002974729671441108, "grad_norm": 0.5427595376968384, "learning_rate": 1.9999999829666684e-05, "loss": 1.6238, "step": 400 }, { "epoch": 0.00037184120893013847, "grad_norm": 1.5316662788391113, "learning_rate": 1.9999999732504913e-05, "loss": 1.6102, "step": 500 }, { "epoch": 0.0004462094507161662, "grad_norm": 0.477271169424057, "learning_rate": 1.999999961350904e-05, "loss": 1.5936, "step": 600 }, { "epoch": 0.0005205776925021939, "grad_norm": 1.1669890880584717, "learning_rate": 1.9999999472679058e-05, "loss": 1.6411, "step": 700 }, { "epoch": 0.0005949459342882216, "grad_norm": 0.6108381748199463, "learning_rate": 1.9999999310014972e-05, "loss": 1.5256, "step": 800 }, { "epoch": 0.0006693141760742492, "grad_norm": 0.6316787004470825, "learning_rate": 1.9999999125516783e-05, "loss": 1.6363, "step": 900 }, { "epoch": 0.0007436824178602769, "grad_norm": 0.8376529216766357, "learning_rate": 1.999999891918449e-05, "loss": 1.5394, "step": 1000 }, { "epoch": 0.0008180506596463047, "grad_norm": 1.0385313034057617, "learning_rate": 1.9999998691018094e-05, "loss": 1.638, "step": 1100 }, { "epoch": 0.0008924189014323324, "grad_norm": 0.8692856431007385, "learning_rate": 1.9999998441017593e-05, "loss": 1.7291, "step": 1200 }, { "epoch": 0.00096678714321836, "grad_norm": 1.059537410736084, "learning_rate": 1.9999998169182993e-05, "loss": 1.6251, "step": 1300 }, { "epoch": 0.0010411553850043877, "grad_norm": 0.3969714045524597, "learning_rate": 1.999999787551429e-05, "loss": 1.5324, "step": 1400 }, { "epoch": 0.0011155236267904154, "grad_norm": 0.6760269403457642, "learning_rate": 1.9999997560011483e-05, "loss": 1.5262, "step": 1500 }, { "epoch": 0.0011898918685764432, "grad_norm": 0.6536991596221924, "learning_rate": 1.9999997222674577e-05, "loss": 1.6578, "step": 1600 }, { "epoch": 0.0012642601103624708, "grad_norm": 1.2318875789642334, "learning_rate": 1.999999686350357e-05, "loss": 1.5478, "step": 1700 }, { "epoch": 0.0013386283521484984, "grad_norm": 1.3172451257705688, "learning_rate": 1.999999648249847e-05, "loss": 1.5926, "step": 1800 }, { "epoch": 0.0014129965939345263, "grad_norm": 1.4219484329223633, "learning_rate": 1.9999996079659265e-05, "loss": 1.5595, "step": 1900 }, { "epoch": 0.0014873648357205539, "grad_norm": 0.6480392813682556, "learning_rate": 1.9999995654985968e-05, "loss": 1.5321, "step": 2000 }, { "epoch": 0.0015617330775065815, "grad_norm": 0.5489968061447144, "learning_rate": 1.999999520847857e-05, "loss": 1.5744, "step": 2100 }, { "epoch": 0.0016361013192926093, "grad_norm": 0.7695141434669495, "learning_rate": 1.999999474013708e-05, "loss": 1.5263, "step": 2200 }, { "epoch": 0.001710469561078637, "grad_norm": 0.7596250176429749, "learning_rate": 1.9999994249961495e-05, "loss": 1.5586, "step": 2300 }, { "epoch": 0.0017848378028646648, "grad_norm": 0.8226674795150757, "learning_rate": 1.9999993737951816e-05, "loss": 1.6021, "step": 2400 }, { "epoch": 0.0018592060446506924, "grad_norm": 0.5418084859848022, "learning_rate": 1.9999993204108044e-05, "loss": 1.6234, "step": 2500 }, { "epoch": 0.00193357428643672, "grad_norm": 0.5253565907478333, "learning_rate": 1.9999992648430182e-05, "loss": 1.5487, "step": 2600 }, { "epoch": 0.0020079425282227476, "grad_norm": 1.0812253952026367, "learning_rate": 1.999999207091823e-05, "loss": 1.6161, "step": 2700 }, { "epoch": 0.0020823107700087755, "grad_norm": 0.6357698440551758, "learning_rate": 1.999999147157219e-05, "loss": 1.5693, "step": 2800 }, { "epoch": 0.0021566790117948033, "grad_norm": 0.9794847369194031, "learning_rate": 1.9999990850392064e-05, "loss": 1.5337, "step": 2900 }, { "epoch": 0.0022310472535808307, "grad_norm": 0.5611212849617004, "learning_rate": 1.9999990207377848e-05, "loss": 1.6034, "step": 3000 }, { "epoch": 0.0023054154953668585, "grad_norm": 0.8199095129966736, "learning_rate": 1.999998954252955e-05, "loss": 1.5291, "step": 3100 }, { "epoch": 0.0023797837371528864, "grad_norm": 0.6310203075408936, "learning_rate": 1.999998885584717e-05, "loss": 1.5337, "step": 3200 }, { "epoch": 0.0024541519789389138, "grad_norm": 0.8682138919830322, "learning_rate": 1.9999988147330707e-05, "loss": 1.5383, "step": 3300 }, { "epoch": 0.0025285202207249416, "grad_norm": 0.6630149483680725, "learning_rate": 1.9999987416980167e-05, "loss": 1.6387, "step": 3400 }, { "epoch": 0.0026028884625109694, "grad_norm": 0.5285632014274597, "learning_rate": 1.9999986664795547e-05, "loss": 1.5507, "step": 3500 }, { "epoch": 0.002677256704296997, "grad_norm": 0.5242965221405029, "learning_rate": 1.9999985890776846e-05, "loss": 1.6422, "step": 3600 }, { "epoch": 0.0027516249460830247, "grad_norm": 0.4600646495819092, "learning_rate": 1.9999985094924076e-05, "loss": 1.6473, "step": 3700 }, { "epoch": 0.0028259931878690525, "grad_norm": 0.6593307256698608, "learning_rate": 1.999998427723723e-05, "loss": 1.5936, "step": 3800 }, { "epoch": 0.00290036142965508, "grad_norm": 0.3825130760669708, "learning_rate": 1.9999983437716315e-05, "loss": 1.5509, "step": 3900 }, { "epoch": 0.0029747296714411077, "grad_norm": 0.46043556928634644, "learning_rate": 1.999998257636133e-05, "loss": 1.5043, "step": 4000 }, { "epoch": 0.0030490979132271356, "grad_norm": 0.751379132270813, "learning_rate": 1.999998169317227e-05, "loss": 1.5258, "step": 4100 }, { "epoch": 0.003123466155013163, "grad_norm": 0.5719695687294006, "learning_rate": 1.9999980788149155e-05, "loss": 1.669, "step": 4200 }, { "epoch": 0.003197834396799191, "grad_norm": 0.5489699244499207, "learning_rate": 1.999997986129197e-05, "loss": 1.5581, "step": 4300 }, { "epoch": 0.0032722026385852187, "grad_norm": 0.5944995880126953, "learning_rate": 1.9999978912600722e-05, "loss": 1.5717, "step": 4400 }, { "epoch": 0.003346570880371246, "grad_norm": 0.4564272165298462, "learning_rate": 1.9999977942075416e-05, "loss": 1.5178, "step": 4500 }, { "epoch": 0.003420939122157274, "grad_norm": 1.082127571105957, "learning_rate": 1.9999976949716057e-05, "loss": 1.6077, "step": 4600 }, { "epoch": 0.0034953073639433017, "grad_norm": 0.7081079483032227, "learning_rate": 1.9999975935522635e-05, "loss": 1.6147, "step": 4700 }, { "epoch": 0.0035696756057293296, "grad_norm": 1.084369421005249, "learning_rate": 1.9999974899495163e-05, "loss": 1.5796, "step": 4800 }, { "epoch": 0.003644043847515357, "grad_norm": 0.5583994388580322, "learning_rate": 1.999997384163364e-05, "loss": 1.5099, "step": 4900 }, { "epoch": 0.003718412089301385, "grad_norm": 0.563099205493927, "learning_rate": 1.999997276193807e-05, "loss": 1.5222, "step": 5000 }, { "epoch": 0.0037927803310874126, "grad_norm": 0.6037421822547913, "learning_rate": 1.9999971660408454e-05, "loss": 1.5916, "step": 5100 }, { "epoch": 0.00386714857287344, "grad_norm": 0.5209466218948364, "learning_rate": 1.9999970537044787e-05, "loss": 1.6196, "step": 5200 }, { "epoch": 0.003941516814659467, "grad_norm": 1.0418217182159424, "learning_rate": 1.9999969391847088e-05, "loss": 1.601, "step": 5300 }, { "epoch": 0.004015885056445495, "grad_norm": 1.235737681388855, "learning_rate": 1.9999968224815345e-05, "loss": 1.4994, "step": 5400 }, { "epoch": 0.004090253298231523, "grad_norm": 1.1249513626098633, "learning_rate": 1.9999967035949567e-05, "loss": 1.5871, "step": 5500 }, { "epoch": 0.004164621540017551, "grad_norm": 0.8271663784980774, "learning_rate": 1.9999965825249753e-05, "loss": 1.5734, "step": 5600 }, { "epoch": 0.004238989781803579, "grad_norm": 0.6501545906066895, "learning_rate": 1.999996459271591e-05, "loss": 1.5859, "step": 5700 }, { "epoch": 0.004313358023589607, "grad_norm": 0.6576992273330688, "learning_rate": 1.9999963338348036e-05, "loss": 1.5457, "step": 5800 }, { "epoch": 0.004387726265375634, "grad_norm": 0.5684088468551636, "learning_rate": 1.9999962062146138e-05, "loss": 1.5518, "step": 5900 }, { "epoch": 0.004462094507161661, "grad_norm": 0.6332255005836487, "learning_rate": 1.9999960764110216e-05, "loss": 1.586, "step": 6000 }, { "epoch": 0.004536462748947689, "grad_norm": 0.4564649164676666, "learning_rate": 1.9999959444240276e-05, "loss": 1.5249, "step": 6100 }, { "epoch": 0.004610830990733717, "grad_norm": 0.5801929235458374, "learning_rate": 1.9999958102536316e-05, "loss": 1.5849, "step": 6200 }, { "epoch": 0.004685199232519745, "grad_norm": 0.8843029737472534, "learning_rate": 1.9999956738998345e-05, "loss": 1.5055, "step": 6300 }, { "epoch": 0.004759567474305773, "grad_norm": 0.7232934832572937, "learning_rate": 1.999995535362636e-05, "loss": 1.5706, "step": 6400 }, { "epoch": 0.004833935716091801, "grad_norm": 0.7958771586418152, "learning_rate": 1.9999953946420368e-05, "loss": 1.5943, "step": 6500 }, { "epoch": 0.0049083039578778275, "grad_norm": 0.7699094414710999, "learning_rate": 1.999995251738037e-05, "loss": 1.6173, "step": 6600 }, { "epoch": 0.004982672199663855, "grad_norm": 0.43996062874794006, "learning_rate": 1.9999951066506368e-05, "loss": 1.5154, "step": 6700 }, { "epoch": 0.005057040441449883, "grad_norm": 0.773326575756073, "learning_rate": 1.9999949593798372e-05, "loss": 1.5791, "step": 6800 }, { "epoch": 0.005131408683235911, "grad_norm": 0.42401251196861267, "learning_rate": 1.9999948099256374e-05, "loss": 1.5429, "step": 6900 }, { "epoch": 0.005205776925021939, "grad_norm": 0.44549378752708435, "learning_rate": 1.999994658288039e-05, "loss": 1.605, "step": 7000 }, { "epoch": 0.005280145166807967, "grad_norm": 0.5648560523986816, "learning_rate": 1.999994504467041e-05, "loss": 1.5536, "step": 7100 }, { "epoch": 0.005354513408593994, "grad_norm": 1.0245320796966553, "learning_rate": 1.999994348462645e-05, "loss": 1.5509, "step": 7200 }, { "epoch": 0.0054288816503800215, "grad_norm": 0.9695309996604919, "learning_rate": 1.9999941902748505e-05, "loss": 1.5892, "step": 7300 }, { "epoch": 0.005503249892166049, "grad_norm": 0.9779026508331299, "learning_rate": 1.9999940299036584e-05, "loss": 1.5901, "step": 7400 }, { "epoch": 0.005577618133952077, "grad_norm": 0.7186980247497559, "learning_rate": 1.999993867349068e-05, "loss": 1.5402, "step": 7500 }, { "epoch": 0.005651986375738105, "grad_norm": 0.751449704170227, "learning_rate": 1.9999937026110813e-05, "loss": 1.5217, "step": 7600 }, { "epoch": 0.005726354617524133, "grad_norm": 0.7808834314346313, "learning_rate": 1.999993535689697e-05, "loss": 1.5254, "step": 7700 }, { "epoch": 0.00580072285931016, "grad_norm": 0.529984176158905, "learning_rate": 1.999993366584917e-05, "loss": 1.6134, "step": 7800 }, { "epoch": 0.005875091101096188, "grad_norm": 0.8374336361885071, "learning_rate": 1.9999931952967404e-05, "loss": 1.4759, "step": 7900 }, { "epoch": 0.0059494593428822155, "grad_norm": 0.3483956754207611, "learning_rate": 1.9999930218251683e-05, "loss": 1.5905, "step": 8000 }, { "epoch": 0.006023827584668243, "grad_norm": 0.8897103667259216, "learning_rate": 1.9999928461702004e-05, "loss": 1.6492, "step": 8100 }, { "epoch": 0.006098195826454271, "grad_norm": 0.5743923783302307, "learning_rate": 1.999992668331838e-05, "loss": 1.5322, "step": 8200 }, { "epoch": 0.006172564068240299, "grad_norm": 1.3532215356826782, "learning_rate": 1.999992488310081e-05, "loss": 1.5155, "step": 8300 }, { "epoch": 0.006246932310026326, "grad_norm": 1.118270754814148, "learning_rate": 1.9999923061049298e-05, "loss": 1.517, "step": 8400 }, { "epoch": 0.006321300551812354, "grad_norm": 1.0752383470535278, "learning_rate": 1.9999921217163847e-05, "loss": 1.5654, "step": 8500 }, { "epoch": 0.006395668793598382, "grad_norm": 0.4761950671672821, "learning_rate": 1.999991935144446e-05, "loss": 1.588, "step": 8600 }, { "epoch": 0.0064700370353844095, "grad_norm": 0.4377930164337158, "learning_rate": 1.9999917463891147e-05, "loss": 1.5932, "step": 8700 }, { "epoch": 0.006544405277170437, "grad_norm": 0.5289610624313354, "learning_rate": 1.9999915554503908e-05, "loss": 1.5362, "step": 8800 }, { "epoch": 0.006618773518956465, "grad_norm": 0.6469466090202332, "learning_rate": 1.9999913623282747e-05, "loss": 1.5515, "step": 8900 }, { "epoch": 0.006693141760742492, "grad_norm": 0.8052897453308105, "learning_rate": 1.999991167022767e-05, "loss": 1.4691, "step": 9000 }, { "epoch": 0.00676751000252852, "grad_norm": 0.4677363932132721, "learning_rate": 1.999990969533868e-05, "loss": 1.5955, "step": 9100 }, { "epoch": 0.006841878244314548, "grad_norm": 0.9299643039703369, "learning_rate": 1.9999907698615777e-05, "loss": 1.5657, "step": 9200 }, { "epoch": 0.006916246486100576, "grad_norm": 0.5175402164459229, "learning_rate": 1.9999905680058974e-05, "loss": 1.5471, "step": 9300 }, { "epoch": 0.0069906147278866035, "grad_norm": 0.6280660033226013, "learning_rate": 1.999990363966827e-05, "loss": 1.5465, "step": 9400 }, { "epoch": 0.007064982969672631, "grad_norm": 0.5920536518096924, "learning_rate": 1.999990157744367e-05, "loss": 1.5587, "step": 9500 }, { "epoch": 0.007139351211458659, "grad_norm": 0.6226286292076111, "learning_rate": 1.999989949338518e-05, "loss": 1.5399, "step": 9600 }, { "epoch": 0.007213719453244686, "grad_norm": 0.757337749004364, "learning_rate": 1.9999897387492803e-05, "loss": 1.5142, "step": 9700 }, { "epoch": 0.007288087695030714, "grad_norm": 0.5596433281898499, "learning_rate": 1.9999895259766547e-05, "loss": 1.4845, "step": 9800 }, { "epoch": 0.007362455936816742, "grad_norm": 0.8564650416374207, "learning_rate": 1.999989311020641e-05, "loss": 1.5007, "step": 9900 }, { "epoch": 0.00743682417860277, "grad_norm": 0.7305134534835815, "learning_rate": 1.99998909388124e-05, "loss": 1.4579, "step": 10000 }, { "epoch": 0.007511192420388797, "grad_norm": 0.5316299200057983, "learning_rate": 1.9999888745584525e-05, "loss": 1.5686, "step": 10100 }, { "epoch": 0.007585560662174825, "grad_norm": 0.8033043742179871, "learning_rate": 1.9999886530522786e-05, "loss": 1.5322, "step": 10200 }, { "epoch": 0.007659928903960852, "grad_norm": 0.8600965738296509, "learning_rate": 1.999988429362719e-05, "loss": 1.5171, "step": 10300 }, { "epoch": 0.00773429714574688, "grad_norm": 0.7395327091217041, "learning_rate": 1.9999882034897743e-05, "loss": 1.5651, "step": 10400 }, { "epoch": 0.007808665387532908, "grad_norm": 0.7305371761322021, "learning_rate": 1.9999879754334445e-05, "loss": 1.5098, "step": 10500 }, { "epoch": 0.007883033629318935, "grad_norm": 0.6956737637519836, "learning_rate": 1.99998774519373e-05, "loss": 1.6477, "step": 10600 }, { "epoch": 0.007957401871104964, "grad_norm": 0.8382702469825745, "learning_rate": 1.9999875127706324e-05, "loss": 1.5233, "step": 10700 }, { "epoch": 0.00803177011289099, "grad_norm": 0.37894684076309204, "learning_rate": 1.999987278164151e-05, "loss": 1.5008, "step": 10800 }, { "epoch": 0.00810613835467702, "grad_norm": 0.5010106563568115, "learning_rate": 1.9999870413742868e-05, "loss": 1.5424, "step": 10900 }, { "epoch": 0.008180506596463046, "grad_norm": 0.6536372900009155, "learning_rate": 1.9999868024010403e-05, "loss": 1.5774, "step": 11000 }, { "epoch": 0.008254874838249075, "grad_norm": 0.43751344084739685, "learning_rate": 1.9999865612444122e-05, "loss": 1.5887, "step": 11100 }, { "epoch": 0.008329243080035102, "grad_norm": 0.4979201853275299, "learning_rate": 1.999986317904403e-05, "loss": 1.5715, "step": 11200 }, { "epoch": 0.008403611321821129, "grad_norm": 0.513481855392456, "learning_rate": 1.9999860723810127e-05, "loss": 1.5182, "step": 11300 }, { "epoch": 0.008477979563607158, "grad_norm": 0.5014403462409973, "learning_rate": 1.9999858246742425e-05, "loss": 1.5185, "step": 11400 }, { "epoch": 0.008552347805393185, "grad_norm": 0.5066354274749756, "learning_rate": 1.9999855747840925e-05, "loss": 1.5964, "step": 11500 }, { "epoch": 0.008626716047179213, "grad_norm": 0.7306295037269592, "learning_rate": 1.999985322710564e-05, "loss": 1.6196, "step": 11600 }, { "epoch": 0.00870108428896524, "grad_norm": 0.3036212623119354, "learning_rate": 1.9999850684536562e-05, "loss": 1.5308, "step": 11700 }, { "epoch": 0.008775452530751267, "grad_norm": 0.51576167345047, "learning_rate": 1.999984812013371e-05, "loss": 1.5954, "step": 11800 }, { "epoch": 0.008849820772537296, "grad_norm": 0.7507824301719666, "learning_rate": 1.999984553389708e-05, "loss": 1.5184, "step": 11900 }, { "epoch": 0.008924189014323323, "grad_norm": 0.43882057070732117, "learning_rate": 1.999984292582668e-05, "loss": 1.5507, "step": 12000 }, { "epoch": 0.008998557256109352, "grad_norm": 1.0746114253997803, "learning_rate": 1.9999840295922518e-05, "loss": 1.5196, "step": 12100 }, { "epoch": 0.009072925497895378, "grad_norm": 0.6190723180770874, "learning_rate": 1.99998376441846e-05, "loss": 1.5683, "step": 12200 }, { "epoch": 0.009147293739681407, "grad_norm": 0.7086498141288757, "learning_rate": 1.9999834970612934e-05, "loss": 1.6125, "step": 12300 }, { "epoch": 0.009221661981467434, "grad_norm": 0.9270760416984558, "learning_rate": 1.999983227520752e-05, "loss": 1.6108, "step": 12400 }, { "epoch": 0.009296030223253461, "grad_norm": 0.47269493341445923, "learning_rate": 1.9999829557968365e-05, "loss": 1.5784, "step": 12500 }, { "epoch": 0.00937039846503949, "grad_norm": 0.888103723526001, "learning_rate": 1.9999826818895477e-05, "loss": 1.5406, "step": 12600 }, { "epoch": 0.009444766706825517, "grad_norm": 0.44103074073791504, "learning_rate": 1.9999824057988865e-05, "loss": 1.6345, "step": 12700 }, { "epoch": 0.009519134948611545, "grad_norm": 0.8790387511253357, "learning_rate": 1.999982127524853e-05, "loss": 1.5069, "step": 12800 }, { "epoch": 0.009593503190397572, "grad_norm": 0.7071767449378967, "learning_rate": 1.9999818470674474e-05, "loss": 1.5656, "step": 12900 }, { "epoch": 0.009667871432183601, "grad_norm": 0.36154705286026, "learning_rate": 1.9999815644266713e-05, "loss": 1.5022, "step": 13000 }, { "epoch": 0.009742239673969628, "grad_norm": 0.8780633807182312, "learning_rate": 1.9999812796025247e-05, "loss": 1.5585, "step": 13100 }, { "epoch": 0.009816607915755655, "grad_norm": 0.45413634181022644, "learning_rate": 1.9999809925950084e-05, "loss": 1.5732, "step": 13200 }, { "epoch": 0.009890976157541684, "grad_norm": 0.6584810614585876, "learning_rate": 1.999980703404123e-05, "loss": 1.5572, "step": 13300 }, { "epoch": 0.00996534439932771, "grad_norm": 0.4910299479961395, "learning_rate": 1.9999804120298694e-05, "loss": 1.5544, "step": 13400 }, { "epoch": 0.01003971264111374, "grad_norm": 0.4675331115722656, "learning_rate": 1.9999801184722477e-05, "loss": 1.5055, "step": 13500 }, { "epoch": 0.010114080882899766, "grad_norm": 0.7481106519699097, "learning_rate": 1.999979822731259e-05, "loss": 1.5148, "step": 13600 }, { "epoch": 0.010188449124685793, "grad_norm": 0.5710633993148804, "learning_rate": 1.9999795248069036e-05, "loss": 1.6359, "step": 13700 }, { "epoch": 0.010262817366471822, "grad_norm": 0.5404725074768066, "learning_rate": 1.999979224699183e-05, "loss": 1.5919, "step": 13800 }, { "epoch": 0.010337185608257849, "grad_norm": 0.5491372346878052, "learning_rate": 1.9999789224080965e-05, "loss": 1.6065, "step": 13900 }, { "epoch": 0.010411553850043878, "grad_norm": 0.3632746934890747, "learning_rate": 1.9999786179336454e-05, "loss": 1.5333, "step": 14000 }, { "epoch": 0.010485922091829905, "grad_norm": 0.43190881609916687, "learning_rate": 1.9999783112758305e-05, "loss": 1.5359, "step": 14100 }, { "epoch": 0.010560290333615933, "grad_norm": 0.6655808687210083, "learning_rate": 1.9999780024346525e-05, "loss": 1.6012, "step": 14200 }, { "epoch": 0.01063465857540196, "grad_norm": 0.7489643692970276, "learning_rate": 1.999977691410112e-05, "loss": 1.5545, "step": 14300 }, { "epoch": 0.010709026817187987, "grad_norm": 0.4741237759590149, "learning_rate": 1.9999773782022095e-05, "loss": 1.5303, "step": 14400 }, { "epoch": 0.010783395058974016, "grad_norm": 0.7895578145980835, "learning_rate": 1.9999770628109458e-05, "loss": 1.5997, "step": 14500 }, { "epoch": 0.010857763300760043, "grad_norm": 0.6510291695594788, "learning_rate": 1.9999767452363215e-05, "loss": 1.483, "step": 14600 }, { "epoch": 0.010932131542546072, "grad_norm": 0.5989207029342651, "learning_rate": 1.9999764254783376e-05, "loss": 1.5073, "step": 14700 }, { "epoch": 0.011006499784332099, "grad_norm": 0.5995681881904602, "learning_rate": 1.9999761035369946e-05, "loss": 1.5439, "step": 14800 }, { "epoch": 0.011080868026118126, "grad_norm": 0.6359573602676392, "learning_rate": 1.9999757794122933e-05, "loss": 1.5821, "step": 14900 }, { "epoch": 0.011155236267904154, "grad_norm": 0.404085636138916, "learning_rate": 1.9999754531042338e-05, "loss": 1.556, "step": 15000 }, { "epoch": 0.011229604509690181, "grad_norm": 0.660020112991333, "learning_rate": 1.9999751246128175e-05, "loss": 1.5713, "step": 15100 }, { "epoch": 0.01130397275147621, "grad_norm": 0.7031283378601074, "learning_rate": 1.9999747939380453e-05, "loss": 1.4647, "step": 15200 }, { "epoch": 0.011378340993262237, "grad_norm": 0.5159358978271484, "learning_rate": 1.9999744610799173e-05, "loss": 1.5298, "step": 15300 }, { "epoch": 0.011452709235048266, "grad_norm": 0.5451757907867432, "learning_rate": 1.9999741260384345e-05, "loss": 1.6068, "step": 15400 }, { "epoch": 0.011527077476834293, "grad_norm": 0.9550883769989014, "learning_rate": 1.9999737888135975e-05, "loss": 1.5665, "step": 15500 }, { "epoch": 0.01160144571862032, "grad_norm": 0.3711983859539032, "learning_rate": 1.999973449405407e-05, "loss": 1.5189, "step": 15600 }, { "epoch": 0.011675813960406348, "grad_norm": 1.052902340888977, "learning_rate": 1.9999731078138643e-05, "loss": 1.6834, "step": 15700 }, { "epoch": 0.011750182202192375, "grad_norm": 0.6009785532951355, "learning_rate": 1.9999727640389697e-05, "loss": 1.5497, "step": 15800 }, { "epoch": 0.011824550443978404, "grad_norm": 0.5357051491737366, "learning_rate": 1.999972418080724e-05, "loss": 1.5163, "step": 15900 }, { "epoch": 0.011898918685764431, "grad_norm": 0.5712498426437378, "learning_rate": 1.9999720699391275e-05, "loss": 1.5611, "step": 16000 }, { "epoch": 0.01197328692755046, "grad_norm": 0.5744183659553528, "learning_rate": 1.999971719614182e-05, "loss": 1.5405, "step": 16100 }, { "epoch": 0.012047655169336487, "grad_norm": 0.42877888679504395, "learning_rate": 1.9999713671058874e-05, "loss": 1.5595, "step": 16200 }, { "epoch": 0.012122023411122514, "grad_norm": 0.7209616303443909, "learning_rate": 1.9999710124142445e-05, "loss": 1.5457, "step": 16300 }, { "epoch": 0.012196391652908542, "grad_norm": 0.7052120566368103, "learning_rate": 1.999970655539255e-05, "loss": 1.5724, "step": 16400 }, { "epoch": 0.01227075989469457, "grad_norm": 0.45960021018981934, "learning_rate": 1.9999702964809182e-05, "loss": 1.5479, "step": 16500 }, { "epoch": 0.012345128136480598, "grad_norm": 0.4394296407699585, "learning_rate": 1.9999699352392362e-05, "loss": 1.55, "step": 16600 }, { "epoch": 0.012419496378266625, "grad_norm": 1.227424144744873, "learning_rate": 1.999969571814209e-05, "loss": 1.5187, "step": 16700 }, { "epoch": 0.012493864620052652, "grad_norm": 0.8249584436416626, "learning_rate": 1.9999692062058376e-05, "loss": 1.5677, "step": 16800 }, { "epoch": 0.01256823286183868, "grad_norm": 0.8973199725151062, "learning_rate": 1.999968838414123e-05, "loss": 1.5501, "step": 16900 }, { "epoch": 0.012642601103624708, "grad_norm": 0.716529905796051, "learning_rate": 1.999968468439066e-05, "loss": 1.6245, "step": 17000 }, { "epoch": 0.012716969345410736, "grad_norm": 0.5941506624221802, "learning_rate": 1.999968096280667e-05, "loss": 1.4951, "step": 17100 }, { "epoch": 0.012791337587196763, "grad_norm": 1.8864718675613403, "learning_rate": 1.999967721938927e-05, "loss": 1.5397, "step": 17200 }, { "epoch": 0.012865705828982792, "grad_norm": 0.6418184638023376, "learning_rate": 1.999967345413847e-05, "loss": 1.4693, "step": 17300 }, { "epoch": 0.012940074070768819, "grad_norm": 0.6764699220657349, "learning_rate": 1.999966966705428e-05, "loss": 1.4635, "step": 17400 }, { "epoch": 0.013014442312554846, "grad_norm": 0.7185351848602295, "learning_rate": 1.9999665858136704e-05, "loss": 1.5252, "step": 17500 }, { "epoch": 0.013088810554340875, "grad_norm": 0.42110446095466614, "learning_rate": 1.9999662027385748e-05, "loss": 1.5908, "step": 17600 }, { "epoch": 0.013163178796126902, "grad_norm": 0.6807708144187927, "learning_rate": 1.999965817480143e-05, "loss": 1.6466, "step": 17700 }, { "epoch": 0.01323754703791293, "grad_norm": 0.5771286487579346, "learning_rate": 1.999965430038375e-05, "loss": 1.5361, "step": 17800 }, { "epoch": 0.013311915279698957, "grad_norm": 0.5322648882865906, "learning_rate": 1.9999650404132715e-05, "loss": 1.5638, "step": 17900 }, { "epoch": 0.013386283521484984, "grad_norm": 0.865608274936676, "learning_rate": 1.9999646486048342e-05, "loss": 1.4568, "step": 18000 }, { "epoch": 0.013460651763271013, "grad_norm": 0.7592107057571411, "learning_rate": 1.9999642546130634e-05, "loss": 1.5669, "step": 18100 }, { "epoch": 0.01353502000505704, "grad_norm": 0.673466145992279, "learning_rate": 1.9999638584379602e-05, "loss": 1.5141, "step": 18200 }, { "epoch": 0.013609388246843069, "grad_norm": 0.574698269367218, "learning_rate": 1.9999634600795252e-05, "loss": 1.5703, "step": 18300 }, { "epoch": 0.013683756488629096, "grad_norm": 0.6722753643989563, "learning_rate": 1.9999630595377595e-05, "loss": 1.5843, "step": 18400 }, { "epoch": 0.013758124730415124, "grad_norm": 0.9738336801528931, "learning_rate": 1.9999626568126636e-05, "loss": 1.4878, "step": 18500 }, { "epoch": 0.013832492972201151, "grad_norm": 0.5274741649627686, "learning_rate": 1.999962251904239e-05, "loss": 1.5254, "step": 18600 }, { "epoch": 0.013906861213987178, "grad_norm": 1.725870966911316, "learning_rate": 1.999961844812486e-05, "loss": 1.4785, "step": 18700 }, { "epoch": 0.013981229455773207, "grad_norm": 0.6889399886131287, "learning_rate": 1.9999614355374058e-05, "loss": 1.5437, "step": 18800 }, { "epoch": 0.014055597697559234, "grad_norm": 0.576836884021759, "learning_rate": 1.9999610240789994e-05, "loss": 1.4949, "step": 18900 }, { "epoch": 0.014129965939345263, "grad_norm": 0.3870568871498108, "learning_rate": 1.9999606104372674e-05, "loss": 1.5376, "step": 19000 }, { "epoch": 0.01420433418113129, "grad_norm": 1.1045247316360474, "learning_rate": 1.9999601946122107e-05, "loss": 1.6825, "step": 19100 }, { "epoch": 0.014278702422917318, "grad_norm": 0.49821707606315613, "learning_rate": 1.9999597766038304e-05, "loss": 1.4909, "step": 19200 }, { "epoch": 0.014353070664703345, "grad_norm": 0.4011678695678711, "learning_rate": 1.9999593564121275e-05, "loss": 1.4673, "step": 19300 }, { "epoch": 0.014427438906489372, "grad_norm": 0.46667736768722534, "learning_rate": 1.9999589340371026e-05, "loss": 1.5537, "step": 19400 }, { "epoch": 0.014501807148275401, "grad_norm": 0.4063940942287445, "learning_rate": 1.9999585094787567e-05, "loss": 1.4736, "step": 19500 }, { "epoch": 0.014576175390061428, "grad_norm": 0.5824026465415955, "learning_rate": 1.9999580827370906e-05, "loss": 1.6191, "step": 19600 }, { "epoch": 0.014650543631847457, "grad_norm": 0.5595284104347229, "learning_rate": 1.999957653812106e-05, "loss": 1.5294, "step": 19700 }, { "epoch": 0.014724911873633484, "grad_norm": 0.6950704455375671, "learning_rate": 1.9999572227038028e-05, "loss": 1.5015, "step": 19800 }, { "epoch": 0.01479928011541951, "grad_norm": 0.4345974028110504, "learning_rate": 1.999956789412183e-05, "loss": 1.4955, "step": 19900 }, { "epoch": 0.01487364835720554, "grad_norm": 0.475046306848526, "learning_rate": 1.9999563539372464e-05, "loss": 1.5663, "step": 20000 }, { "epoch": 0.014948016598991566, "grad_norm": 0.3211815357208252, "learning_rate": 1.9999559162789946e-05, "loss": 1.5379, "step": 20100 }, { "epoch": 0.015022384840777595, "grad_norm": 0.7868314981460571, "learning_rate": 1.9999554764374287e-05, "loss": 1.542, "step": 20200 }, { "epoch": 0.015096753082563622, "grad_norm": 0.3961299955844879, "learning_rate": 1.9999550344125492e-05, "loss": 1.4359, "step": 20300 }, { "epoch": 0.01517112132434965, "grad_norm": 0.7971549034118652, "learning_rate": 1.9999545902043577e-05, "loss": 1.5363, "step": 20400 }, { "epoch": 0.015245489566135677, "grad_norm": 1.1090092658996582, "learning_rate": 1.9999541438128543e-05, "loss": 1.5565, "step": 20500 }, { "epoch": 0.015319857807921704, "grad_norm": 1.0558898448944092, "learning_rate": 1.9999536952380406e-05, "loss": 1.5504, "step": 20600 }, { "epoch": 0.015394226049707733, "grad_norm": 0.5869760513305664, "learning_rate": 1.9999532444799174e-05, "loss": 1.5023, "step": 20700 }, { "epoch": 0.01546859429149376, "grad_norm": 0.5132299065589905, "learning_rate": 1.9999527915384858e-05, "loss": 1.472, "step": 20800 }, { "epoch": 0.015542962533279789, "grad_norm": 0.8405370116233826, "learning_rate": 1.999952336413747e-05, "loss": 1.479, "step": 20900 }, { "epoch": 0.015617330775065816, "grad_norm": 1.0692424774169922, "learning_rate": 1.9999518791057012e-05, "loss": 1.5734, "step": 21000 }, { "epoch": 0.015691699016851843, "grad_norm": 0.39929547905921936, "learning_rate": 1.99995141961435e-05, "loss": 1.5499, "step": 21100 }, { "epoch": 0.01576606725863787, "grad_norm": 0.5001465082168579, "learning_rate": 1.999950957939694e-05, "loss": 1.5728, "step": 21200 }, { "epoch": 0.0158404355004239, "grad_norm": 0.4564245045185089, "learning_rate": 1.999950494081735e-05, "loss": 1.4685, "step": 21300 }, { "epoch": 0.015914803742209927, "grad_norm": 0.945813775062561, "learning_rate": 1.999950028040473e-05, "loss": 1.5309, "step": 21400 }, { "epoch": 0.015989171983995954, "grad_norm": 0.5529621839523315, "learning_rate": 1.9999495598159102e-05, "loss": 1.5244, "step": 21500 }, { "epoch": 0.01606354022578198, "grad_norm": 0.7338210940361023, "learning_rate": 1.9999490894080467e-05, "loss": 1.6339, "step": 21600 }, { "epoch": 0.016137908467568008, "grad_norm": 1.0055419206619263, "learning_rate": 1.999948616816884e-05, "loss": 1.613, "step": 21700 }, { "epoch": 0.01621227670935404, "grad_norm": 0.5460941195487976, "learning_rate": 1.9999481420424223e-05, "loss": 1.5819, "step": 21800 }, { "epoch": 0.016286644951140065, "grad_norm": 1.005537509918213, "learning_rate": 1.9999476650846637e-05, "loss": 1.5636, "step": 21900 }, { "epoch": 0.016361013192926092, "grad_norm": 0.8599165678024292, "learning_rate": 1.9999471859436082e-05, "loss": 1.4977, "step": 22000 }, { "epoch": 0.01643538143471212, "grad_norm": 0.41388291120529175, "learning_rate": 1.9999467046192583e-05, "loss": 1.4243, "step": 22100 }, { "epoch": 0.01650974967649815, "grad_norm": 0.4443175494670868, "learning_rate": 1.9999462211116135e-05, "loss": 1.5419, "step": 22200 }, { "epoch": 0.016584117918284177, "grad_norm": 0.9959002733230591, "learning_rate": 1.999945735420676e-05, "loss": 1.588, "step": 22300 }, { "epoch": 0.016658486160070204, "grad_norm": 0.7721849679946899, "learning_rate": 1.999945247546446e-05, "loss": 1.4806, "step": 22400 }, { "epoch": 0.01673285440185623, "grad_norm": 0.5781850814819336, "learning_rate": 1.9999447574889253e-05, "loss": 1.5864, "step": 22500 }, { "epoch": 0.016807222643642258, "grad_norm": 0.6155378222465515, "learning_rate": 1.9999442652481143e-05, "loss": 1.6002, "step": 22600 }, { "epoch": 0.016881590885428288, "grad_norm": 0.8101166486740112, "learning_rate": 1.9999437708240146e-05, "loss": 1.5385, "step": 22700 }, { "epoch": 0.016955959127214315, "grad_norm": 0.8044368624687195, "learning_rate": 1.999943274216627e-05, "loss": 1.4563, "step": 22800 }, { "epoch": 0.017030327369000342, "grad_norm": 0.3784123361110687, "learning_rate": 1.9999427754259527e-05, "loss": 1.5844, "step": 22900 }, { "epoch": 0.01710469561078637, "grad_norm": 0.8152732253074646, "learning_rate": 1.9999422744519928e-05, "loss": 1.53, "step": 23000 }, { "epoch": 0.017179063852572396, "grad_norm": 0.8851474523544312, "learning_rate": 1.9999417712947486e-05, "loss": 1.5828, "step": 23100 }, { "epoch": 0.017253432094358426, "grad_norm": 0.8275689482688904, "learning_rate": 1.9999412659542208e-05, "loss": 1.5057, "step": 23200 }, { "epoch": 0.017327800336144453, "grad_norm": 0.5356424450874329, "learning_rate": 1.9999407584304106e-05, "loss": 1.5621, "step": 23300 }, { "epoch": 0.01740216857793048, "grad_norm": 0.35889101028442383, "learning_rate": 1.999940248723319e-05, "loss": 1.5884, "step": 23400 }, { "epoch": 0.017476536819716507, "grad_norm": 0.5190862417221069, "learning_rate": 1.9999397368329477e-05, "loss": 1.6021, "step": 23500 }, { "epoch": 0.017550905061502534, "grad_norm": 0.5140055418014526, "learning_rate": 1.9999392227592967e-05, "loss": 1.5474, "step": 23600 }, { "epoch": 0.017625273303288565, "grad_norm": 0.607276201248169, "learning_rate": 1.9999387065023685e-05, "loss": 1.5002, "step": 23700 }, { "epoch": 0.01769964154507459, "grad_norm": 0.7513449192047119, "learning_rate": 1.9999381880621634e-05, "loss": 1.5098, "step": 23800 }, { "epoch": 0.01777400978686062, "grad_norm": 0.7328070402145386, "learning_rate": 1.9999376674386824e-05, "loss": 1.5768, "step": 23900 }, { "epoch": 0.017848378028646646, "grad_norm": 0.817368745803833, "learning_rate": 1.9999371446319272e-05, "loss": 1.5178, "step": 24000 }, { "epoch": 0.017922746270432676, "grad_norm": 0.844530463218689, "learning_rate": 1.999936619641899e-05, "loss": 1.5331, "step": 24100 }, { "epoch": 0.017997114512218703, "grad_norm": 0.8772881627082825, "learning_rate": 1.9999360924685978e-05, "loss": 1.5564, "step": 24200 }, { "epoch": 0.01807148275400473, "grad_norm": 0.37944692373275757, "learning_rate": 1.999935563112026e-05, "loss": 1.4823, "step": 24300 }, { "epoch": 0.018145850995790757, "grad_norm": 0.34085753560066223, "learning_rate": 1.999935031572184e-05, "loss": 1.4741, "step": 24400 }, { "epoch": 0.018220219237576784, "grad_norm": 0.8616833686828613, "learning_rate": 1.9999344978490737e-05, "loss": 1.4642, "step": 24500 }, { "epoch": 0.018294587479362814, "grad_norm": 0.9431029558181763, "learning_rate": 1.9999339619426958e-05, "loss": 1.5507, "step": 24600 }, { "epoch": 0.01836895572114884, "grad_norm": 0.5803475975990295, "learning_rate": 1.9999334238530512e-05, "loss": 1.5617, "step": 24700 }, { "epoch": 0.01844332396293487, "grad_norm": 0.7339209318161011, "learning_rate": 1.9999328835801416e-05, "loss": 1.4881, "step": 24800 }, { "epoch": 0.018517692204720895, "grad_norm": 0.7969409823417664, "learning_rate": 1.9999323411239676e-05, "loss": 1.5438, "step": 24900 }, { "epoch": 0.018592060446506922, "grad_norm": 0.6049161553382874, "learning_rate": 1.9999317964845313e-05, "loss": 1.5352, "step": 25000 }, { "epoch": 0.018666428688292953, "grad_norm": 0.625723659992218, "learning_rate": 1.999931249661833e-05, "loss": 1.4817, "step": 25100 }, { "epoch": 0.01874079693007898, "grad_norm": 0.8167730569839478, "learning_rate": 1.9999307006558745e-05, "loss": 1.4863, "step": 25200 }, { "epoch": 0.018815165171865007, "grad_norm": 0.41490304470062256, "learning_rate": 1.9999301494666566e-05, "loss": 1.4653, "step": 25300 }, { "epoch": 0.018889533413651034, "grad_norm": 0.7005138397216797, "learning_rate": 1.9999295960941802e-05, "loss": 1.5417, "step": 25400 }, { "epoch": 0.01896390165543706, "grad_norm": 0.4145418405532837, "learning_rate": 1.9999290405384476e-05, "loss": 1.5818, "step": 25500 }, { "epoch": 0.01903826989722309, "grad_norm": 0.9620917439460754, "learning_rate": 1.999928482799459e-05, "loss": 1.5717, "step": 25600 }, { "epoch": 0.019112638139009118, "grad_norm": 0.518038272857666, "learning_rate": 1.999927922877216e-05, "loss": 1.4603, "step": 25700 }, { "epoch": 0.019187006380795145, "grad_norm": 1.0701864957809448, "learning_rate": 1.9999273607717198e-05, "loss": 1.5095, "step": 25800 }, { "epoch": 0.019261374622581172, "grad_norm": 1.2206807136535645, "learning_rate": 1.9999267964829717e-05, "loss": 1.5099, "step": 25900 }, { "epoch": 0.019335742864367202, "grad_norm": 0.4838850796222687, "learning_rate": 1.999926230010973e-05, "loss": 1.5856, "step": 26000 }, { "epoch": 0.01941011110615323, "grad_norm": 0.3916015625, "learning_rate": 1.9999256613557243e-05, "loss": 1.5198, "step": 26100 }, { "epoch": 0.019484479347939256, "grad_norm": 0.4921341836452484, "learning_rate": 1.9999250905172276e-05, "loss": 1.5517, "step": 26200 }, { "epoch": 0.019558847589725283, "grad_norm": 0.4124142527580261, "learning_rate": 1.999924517495484e-05, "loss": 1.6267, "step": 26300 }, { "epoch": 0.01963321583151131, "grad_norm": 0.6755162477493286, "learning_rate": 1.9999239422904946e-05, "loss": 1.5408, "step": 26400 }, { "epoch": 0.01970758407329734, "grad_norm": 0.8833709359169006, "learning_rate": 1.9999233649022604e-05, "loss": 1.5334, "step": 26500 }, { "epoch": 0.019781952315083368, "grad_norm": 0.5344982147216797, "learning_rate": 1.9999227853307832e-05, "loss": 1.5532, "step": 26600 }, { "epoch": 0.019856320556869395, "grad_norm": 0.5524909496307373, "learning_rate": 1.999922203576064e-05, "loss": 1.5672, "step": 26700 }, { "epoch": 0.01993068879865542, "grad_norm": 0.6802098751068115, "learning_rate": 1.999921619638104e-05, "loss": 1.57, "step": 26800 }, { "epoch": 0.02000505704044145, "grad_norm": 0.6773833632469177, "learning_rate": 1.9999210335169047e-05, "loss": 1.562, "step": 26900 }, { "epoch": 0.02007942528222748, "grad_norm": 0.5428286194801331, "learning_rate": 1.999920445212467e-05, "loss": 1.5683, "step": 27000 }, { "epoch": 0.020153793524013506, "grad_norm": 0.5180791020393372, "learning_rate": 1.9999198547247927e-05, "loss": 1.6216, "step": 27100 }, { "epoch": 0.020228161765799533, "grad_norm": 0.6695342659950256, "learning_rate": 1.9999192620538825e-05, "loss": 1.4601, "step": 27200 }, { "epoch": 0.02030253000758556, "grad_norm": 1.2745898962020874, "learning_rate": 1.999918667199738e-05, "loss": 1.5002, "step": 27300 }, { "epoch": 0.020376898249371587, "grad_norm": 0.5011482834815979, "learning_rate": 1.999918070162361e-05, "loss": 1.5048, "step": 27400 }, { "epoch": 0.020451266491157617, "grad_norm": 0.4430767297744751, "learning_rate": 1.999917470941752e-05, "loss": 1.5609, "step": 27500 }, { "epoch": 0.020525634732943644, "grad_norm": 0.540259838104248, "learning_rate": 1.9999168695379124e-05, "loss": 1.5115, "step": 27600 }, { "epoch": 0.02060000297472967, "grad_norm": 0.4208228886127472, "learning_rate": 1.999916265950844e-05, "loss": 1.5318, "step": 27700 }, { "epoch": 0.020674371216515698, "grad_norm": 0.5492777824401855, "learning_rate": 1.9999156601805477e-05, "loss": 1.5001, "step": 27800 }, { "epoch": 0.020748739458301725, "grad_norm": 0.8193747997283936, "learning_rate": 1.999915052227025e-05, "loss": 1.5795, "step": 27900 }, { "epoch": 0.020823107700087756, "grad_norm": 0.6221509575843811, "learning_rate": 1.999914442090277e-05, "loss": 1.5414, "step": 28000 }, { "epoch": 0.020897475941873783, "grad_norm": 0.8481204509735107, "learning_rate": 1.9999138297703055e-05, "loss": 1.5481, "step": 28100 }, { "epoch": 0.02097184418365981, "grad_norm": 0.8506454229354858, "learning_rate": 1.9999132152671116e-05, "loss": 1.536, "step": 28200 }, { "epoch": 0.021046212425445836, "grad_norm": 0.6849836111068726, "learning_rate": 1.9999125985806964e-05, "loss": 1.5236, "step": 28300 }, { "epoch": 0.021120580667231867, "grad_norm": 0.6328344345092773, "learning_rate": 1.999911979711062e-05, "loss": 1.4921, "step": 28400 }, { "epoch": 0.021194948909017894, "grad_norm": 0.44376102089881897, "learning_rate": 1.9999113586582085e-05, "loss": 1.5039, "step": 28500 }, { "epoch": 0.02126931715080392, "grad_norm": 0.6041997075080872, "learning_rate": 1.9999107354221385e-05, "loss": 1.5522, "step": 28600 }, { "epoch": 0.021343685392589948, "grad_norm": 0.5901020169258118, "learning_rate": 1.9999101100028522e-05, "loss": 1.5321, "step": 28700 }, { "epoch": 0.021418053634375975, "grad_norm": 1.058334231376648, "learning_rate": 1.999909482400352e-05, "loss": 1.5725, "step": 28800 }, { "epoch": 0.021492421876162005, "grad_norm": 0.9694022536277771, "learning_rate": 1.9999088526146387e-05, "loss": 1.545, "step": 28900 }, { "epoch": 0.021566790117948032, "grad_norm": 0.6166462898254395, "learning_rate": 1.999908220645714e-05, "loss": 1.481, "step": 29000 }, { "epoch": 0.02164115835973406, "grad_norm": 0.4764333963394165, "learning_rate": 1.999907586493579e-05, "loss": 1.5559, "step": 29100 }, { "epoch": 0.021715526601520086, "grad_norm": 0.5028481483459473, "learning_rate": 1.9999069501582352e-05, "loss": 1.5451, "step": 29200 }, { "epoch": 0.021789894843306113, "grad_norm": 0.7064079642295837, "learning_rate": 1.9999063116396844e-05, "loss": 1.5065, "step": 29300 }, { "epoch": 0.021864263085092144, "grad_norm": 0.8854705691337585, "learning_rate": 1.9999056709379268e-05, "loss": 1.5331, "step": 29400 }, { "epoch": 0.02193863132687817, "grad_norm": 1.1931555271148682, "learning_rate": 1.999905028052965e-05, "loss": 1.5629, "step": 29500 }, { "epoch": 0.022012999568664197, "grad_norm": 0.4196559190750122, "learning_rate": 1.9999043829848e-05, "loss": 1.5969, "step": 29600 }, { "epoch": 0.022087367810450224, "grad_norm": 0.661222517490387, "learning_rate": 1.999903735733433e-05, "loss": 1.4522, "step": 29700 }, { "epoch": 0.02216173605223625, "grad_norm": 1.0771206617355347, "learning_rate": 1.9999030862988658e-05, "loss": 1.6346, "step": 29800 }, { "epoch": 0.022236104294022282, "grad_norm": 0.4439813196659088, "learning_rate": 1.9999024346810995e-05, "loss": 1.4533, "step": 29900 }, { "epoch": 0.02231047253580831, "grad_norm": 0.3492225706577301, "learning_rate": 1.999901780880136e-05, "loss": 1.4831, "step": 30000 }, { "epoch": 0.022384840777594336, "grad_norm": 0.6220123171806335, "learning_rate": 1.9999011248959757e-05, "loss": 1.5165, "step": 30100 }, { "epoch": 0.022459209019380363, "grad_norm": 0.467629998922348, "learning_rate": 1.9999004667286214e-05, "loss": 1.5315, "step": 30200 }, { "epoch": 0.022533577261166393, "grad_norm": 0.6271420121192932, "learning_rate": 1.9998998063780735e-05, "loss": 1.5861, "step": 30300 }, { "epoch": 0.02260794550295242, "grad_norm": 0.404694139957428, "learning_rate": 1.9998991438443337e-05, "loss": 1.455, "step": 30400 }, { "epoch": 0.022682313744738447, "grad_norm": 0.6882662177085876, "learning_rate": 1.9998984791274038e-05, "loss": 1.5077, "step": 30500 }, { "epoch": 0.022756681986524474, "grad_norm": 0.4796554744243622, "learning_rate": 1.9998978122272844e-05, "loss": 1.4934, "step": 30600 }, { "epoch": 0.0228310502283105, "grad_norm": 0.7510641813278198, "learning_rate": 1.9998971431439783e-05, "loss": 1.5009, "step": 30700 }, { "epoch": 0.02290541847009653, "grad_norm": 0.5859106779098511, "learning_rate": 1.9998964718774857e-05, "loss": 1.529, "step": 30800 }, { "epoch": 0.02297978671188256, "grad_norm": 0.5376309156417847, "learning_rate": 1.999895798427809e-05, "loss": 1.5485, "step": 30900 }, { "epoch": 0.023054154953668585, "grad_norm": 0.18692457675933838, "learning_rate": 1.9998951227949487e-05, "loss": 1.4897, "step": 31000 }, { "epoch": 0.023128523195454612, "grad_norm": 0.4838975667953491, "learning_rate": 1.999894444978907e-05, "loss": 1.5244, "step": 31100 }, { "epoch": 0.02320289143724064, "grad_norm": 0.8973916172981262, "learning_rate": 1.9998937649796854e-05, "loss": 1.554, "step": 31200 }, { "epoch": 0.02327725967902667, "grad_norm": 0.5681875944137573, "learning_rate": 1.999893082797285e-05, "loss": 1.5503, "step": 31300 }, { "epoch": 0.023351627920812697, "grad_norm": 0.469427227973938, "learning_rate": 1.9998923984317075e-05, "loss": 1.4381, "step": 31400 }, { "epoch": 0.023425996162598724, "grad_norm": 0.43099313974380493, "learning_rate": 1.9998917118829543e-05, "loss": 1.5112, "step": 31500 }, { "epoch": 0.02350036440438475, "grad_norm": 0.8290247917175293, "learning_rate": 1.999891023151027e-05, "loss": 1.467, "step": 31600 }, { "epoch": 0.023574732646170778, "grad_norm": 0.7134198546409607, "learning_rate": 1.999890332235927e-05, "loss": 1.5312, "step": 31700 }, { "epoch": 0.023649100887956808, "grad_norm": 0.4312078356742859, "learning_rate": 1.999889639137656e-05, "loss": 1.5417, "step": 31800 }, { "epoch": 0.023723469129742835, "grad_norm": 0.5288392305374146, "learning_rate": 1.9998889438562153e-05, "loss": 1.5432, "step": 31900 }, { "epoch": 0.023797837371528862, "grad_norm": 0.5819665789604187, "learning_rate": 1.9998882463916062e-05, "loss": 1.5703, "step": 32000 }, { "epoch": 0.02387220561331489, "grad_norm": 0.6748378276824951, "learning_rate": 1.999887546743831e-05, "loss": 1.5674, "step": 32100 }, { "epoch": 0.02394657385510092, "grad_norm": 0.5860730409622192, "learning_rate": 1.9998868449128905e-05, "loss": 1.5775, "step": 32200 }, { "epoch": 0.024020942096886946, "grad_norm": 1.1641826629638672, "learning_rate": 1.9998861408987866e-05, "loss": 1.6354, "step": 32300 }, { "epoch": 0.024095310338672973, "grad_norm": 0.6446713209152222, "learning_rate": 1.9998854347015206e-05, "loss": 1.5508, "step": 32400 }, { "epoch": 0.024169678580459, "grad_norm": 0.8211930990219116, "learning_rate": 1.9998847263210942e-05, "loss": 1.4797, "step": 32500 }, { "epoch": 0.024244046822245027, "grad_norm": 0.9733643531799316, "learning_rate": 1.9998840157575093e-05, "loss": 1.5375, "step": 32600 }, { "epoch": 0.024318415064031058, "grad_norm": 0.7882494330406189, "learning_rate": 1.9998833030107663e-05, "loss": 1.5167, "step": 32700 }, { "epoch": 0.024392783305817085, "grad_norm": 0.8701611757278442, "learning_rate": 1.999882588080868e-05, "loss": 1.578, "step": 32800 }, { "epoch": 0.02446715154760311, "grad_norm": 0.5390304923057556, "learning_rate": 1.9998818709678157e-05, "loss": 1.4868, "step": 32900 }, { "epoch": 0.02454151978938914, "grad_norm": 0.8778117299079895, "learning_rate": 1.9998811516716104e-05, "loss": 1.4611, "step": 33000 }, { "epoch": 0.024615888031175166, "grad_norm": 1.267151951789856, "learning_rate": 1.999880430192254e-05, "loss": 1.4868, "step": 33100 }, { "epoch": 0.024690256272961196, "grad_norm": 0.6846994161605835, "learning_rate": 1.9998797065297483e-05, "loss": 1.5047, "step": 33200 }, { "epoch": 0.024764624514747223, "grad_norm": 0.6609792113304138, "learning_rate": 1.9998789806840945e-05, "loss": 1.5189, "step": 33300 }, { "epoch": 0.02483899275653325, "grad_norm": 0.5603302717208862, "learning_rate": 1.9998782526552946e-05, "loss": 1.5095, "step": 33400 }, { "epoch": 0.024913360998319277, "grad_norm": 0.7241900563240051, "learning_rate": 1.9998775224433493e-05, "loss": 1.5106, "step": 33500 }, { "epoch": 0.024987729240105304, "grad_norm": 1.149263620376587, "learning_rate": 1.9998767900482616e-05, "loss": 1.5778, "step": 33600 }, { "epoch": 0.025062097481891334, "grad_norm": 0.6764651536941528, "learning_rate": 1.9998760554700318e-05, "loss": 1.4944, "step": 33700 }, { "epoch": 0.02513646572367736, "grad_norm": 0.6464880704879761, "learning_rate": 1.999875318708662e-05, "loss": 1.5719, "step": 33800 }, { "epoch": 0.025210833965463388, "grad_norm": 0.6596807241439819, "learning_rate": 1.9998745797641543e-05, "loss": 1.6179, "step": 33900 }, { "epoch": 0.025285202207249415, "grad_norm": 0.8761606812477112, "learning_rate": 1.9998738386365096e-05, "loss": 1.5256, "step": 34000 }, { "epoch": 0.025359570449035442, "grad_norm": 0.43756160140037537, "learning_rate": 1.9998730953257297e-05, "loss": 1.5477, "step": 34100 }, { "epoch": 0.025433938690821473, "grad_norm": 0.4515778720378876, "learning_rate": 1.9998723498318165e-05, "loss": 1.5666, "step": 34200 }, { "epoch": 0.0255083069326075, "grad_norm": 0.5726724863052368, "learning_rate": 1.9998716021547714e-05, "loss": 1.4878, "step": 34300 }, { "epoch": 0.025582675174393527, "grad_norm": 0.5104209184646606, "learning_rate": 1.999870852294596e-05, "loss": 1.5678, "step": 34400 }, { "epoch": 0.025657043416179554, "grad_norm": 0.7009900808334351, "learning_rate": 1.999870100251292e-05, "loss": 1.4896, "step": 34500 }, { "epoch": 0.025731411657965584, "grad_norm": 0.46048620343208313, "learning_rate": 1.9998693460248613e-05, "loss": 1.5144, "step": 34600 }, { "epoch": 0.02580577989975161, "grad_norm": 0.6157929301261902, "learning_rate": 1.999868589615305e-05, "loss": 1.5178, "step": 34700 }, { "epoch": 0.025880148141537638, "grad_norm": 0.5260864496231079, "learning_rate": 1.9998678310226253e-05, "loss": 1.5046, "step": 34800 }, { "epoch": 0.025954516383323665, "grad_norm": 0.5624649524688721, "learning_rate": 1.999867070246823e-05, "loss": 1.5418, "step": 34900 }, { "epoch": 0.026028884625109692, "grad_norm": 0.5242325663566589, "learning_rate": 1.999866307287901e-05, "loss": 1.4936, "step": 35000 }, { "epoch": 0.026103252866895722, "grad_norm": 0.42132341861724854, "learning_rate": 1.9998655421458603e-05, "loss": 1.5528, "step": 35100 }, { "epoch": 0.02617762110868175, "grad_norm": 1.2333385944366455, "learning_rate": 1.9998647748207022e-05, "loss": 1.5343, "step": 35200 }, { "epoch": 0.026251989350467776, "grad_norm": 0.4847305417060852, "learning_rate": 1.9998640053124288e-05, "loss": 1.5256, "step": 35300 }, { "epoch": 0.026326357592253803, "grad_norm": 0.4797114133834839, "learning_rate": 1.999863233621042e-05, "loss": 1.5394, "step": 35400 }, { "epoch": 0.02640072583403983, "grad_norm": 0.8396820425987244, "learning_rate": 1.999862459746543e-05, "loss": 1.5643, "step": 35500 }, { "epoch": 0.02647509407582586, "grad_norm": 0.4638078808784485, "learning_rate": 1.999861683688934e-05, "loss": 1.5372, "step": 35600 }, { "epoch": 0.026549462317611888, "grad_norm": 0.44567036628723145, "learning_rate": 1.9998609054482162e-05, "loss": 1.5471, "step": 35700 }, { "epoch": 0.026623830559397915, "grad_norm": 0.7429941892623901, "learning_rate": 1.9998601250243915e-05, "loss": 1.5551, "step": 35800 }, { "epoch": 0.02669819880118394, "grad_norm": 0.4555191695690155, "learning_rate": 1.9998593424174618e-05, "loss": 1.6057, "step": 35900 }, { "epoch": 0.02677256704296997, "grad_norm": 1.1227898597717285, "learning_rate": 1.9998585576274286e-05, "loss": 1.5223, "step": 36000 }, { "epoch": 0.026846935284756, "grad_norm": 0.5287070870399475, "learning_rate": 1.9998577706542937e-05, "loss": 1.4566, "step": 36100 }, { "epoch": 0.026921303526542026, "grad_norm": 0.43527650833129883, "learning_rate": 1.9998569814980587e-05, "loss": 1.5472, "step": 36200 }, { "epoch": 0.026995671768328053, "grad_norm": 0.8627545237541199, "learning_rate": 1.999856190158725e-05, "loss": 1.5259, "step": 36300 }, { "epoch": 0.02707004001011408, "grad_norm": 0.5693374276161194, "learning_rate": 1.9998553966362952e-05, "loss": 1.5403, "step": 36400 }, { "epoch": 0.02714440825190011, "grad_norm": 0.43485864996910095, "learning_rate": 1.9998546009307707e-05, "loss": 1.55, "step": 36500 }, { "epoch": 0.027218776493686137, "grad_norm": 0.7903422713279724, "learning_rate": 1.9998538030421526e-05, "loss": 1.5793, "step": 36600 }, { "epoch": 0.027293144735472164, "grad_norm": 0.5814279317855835, "learning_rate": 1.9998530029704436e-05, "loss": 1.5068, "step": 36700 }, { "epoch": 0.02736751297725819, "grad_norm": 0.5183308124542236, "learning_rate": 1.9998522007156444e-05, "loss": 1.4984, "step": 36800 }, { "epoch": 0.027441881219044218, "grad_norm": 0.5316556692123413, "learning_rate": 1.9998513962777578e-05, "loss": 1.5973, "step": 36900 }, { "epoch": 0.02751624946083025, "grad_norm": 0.6409894824028015, "learning_rate": 1.999850589656785e-05, "loss": 1.5491, "step": 37000 }, { "epoch": 0.027590617702616275, "grad_norm": 0.7894346117973328, "learning_rate": 1.9998497808527273e-05, "loss": 1.5117, "step": 37100 }, { "epoch": 0.027664985944402302, "grad_norm": 0.6969322562217712, "learning_rate": 1.9998489698655877e-05, "loss": 1.5079, "step": 37200 }, { "epoch": 0.02773935418618833, "grad_norm": 1.1727479696273804, "learning_rate": 1.9998481566953673e-05, "loss": 1.4889, "step": 37300 }, { "epoch": 0.027813722427974356, "grad_norm": 0.7132461071014404, "learning_rate": 1.9998473413420672e-05, "loss": 1.5284, "step": 37400 }, { "epoch": 0.027888090669760387, "grad_norm": 0.3298719525337219, "learning_rate": 1.9998465238056905e-05, "loss": 1.5616, "step": 37500 }, { "epoch": 0.027962458911546414, "grad_norm": 0.6609339714050293, "learning_rate": 1.999845704086238e-05, "loss": 1.6174, "step": 37600 }, { "epoch": 0.02803682715333244, "grad_norm": 0.5007957220077515, "learning_rate": 1.9998448821837118e-05, "loss": 1.5016, "step": 37700 }, { "epoch": 0.028111195395118468, "grad_norm": 0.9311910271644592, "learning_rate": 1.9998440580981136e-05, "loss": 1.5351, "step": 37800 }, { "epoch": 0.028185563636904495, "grad_norm": 0.7796390056610107, "learning_rate": 1.9998432318294455e-05, "loss": 1.5461, "step": 37900 }, { "epoch": 0.028259931878690525, "grad_norm": 0.8507175445556641, "learning_rate": 1.9998424033777093e-05, "loss": 1.5247, "step": 38000 }, { "epoch": 0.028334300120476552, "grad_norm": 0.3990893065929413, "learning_rate": 1.9998415727429065e-05, "loss": 1.5629, "step": 38100 }, { "epoch": 0.02840866836226258, "grad_norm": 0.852613091468811, "learning_rate": 1.9998407399250386e-05, "loss": 1.5216, "step": 38200 }, { "epoch": 0.028483036604048606, "grad_norm": 0.4536173343658447, "learning_rate": 1.9998399049241083e-05, "loss": 1.5953, "step": 38300 }, { "epoch": 0.028557404845834636, "grad_norm": 0.5260401964187622, "learning_rate": 1.999839067740117e-05, "loss": 1.616, "step": 38400 }, { "epoch": 0.028631773087620663, "grad_norm": 0.6179829835891724, "learning_rate": 1.9998382283730663e-05, "loss": 1.5295, "step": 38500 }, { "epoch": 0.02870614132940669, "grad_norm": 0.5114478468894958, "learning_rate": 1.9998373868229582e-05, "loss": 1.5425, "step": 38600 }, { "epoch": 0.028780509571192717, "grad_norm": 0.593675971031189, "learning_rate": 1.9998365430897948e-05, "loss": 1.5474, "step": 38700 }, { "epoch": 0.028854877812978744, "grad_norm": 0.4959476888179779, "learning_rate": 1.999835697173577e-05, "loss": 1.5775, "step": 38800 }, { "epoch": 0.028929246054764775, "grad_norm": 0.6730287671089172, "learning_rate": 1.9998348490743082e-05, "loss": 1.6572, "step": 38900 }, { "epoch": 0.029003614296550802, "grad_norm": 0.9137376546859741, "learning_rate": 1.999833998791989e-05, "loss": 1.5007, "step": 39000 }, { "epoch": 0.02907798253833683, "grad_norm": 1.2021700143814087, "learning_rate": 1.999833146326622e-05, "loss": 1.5095, "step": 39100 }, { "epoch": 0.029152350780122856, "grad_norm": 0.5708747506141663, "learning_rate": 1.9998322916782083e-05, "loss": 1.5644, "step": 39200 }, { "epoch": 0.029226719021908883, "grad_norm": 0.6767252087593079, "learning_rate": 1.9998314348467508e-05, "loss": 1.5248, "step": 39300 }, { "epoch": 0.029301087263694913, "grad_norm": 0.4881773889064789, "learning_rate": 1.9998305758322504e-05, "loss": 1.4889, "step": 39400 }, { "epoch": 0.02937545550548094, "grad_norm": 0.4517097771167755, "learning_rate": 1.9998297146347093e-05, "loss": 1.5388, "step": 39500 }, { "epoch": 0.029449823747266967, "grad_norm": 0.6027237176895142, "learning_rate": 1.9998288512541295e-05, "loss": 1.5702, "step": 39600 }, { "epoch": 0.029524191989052994, "grad_norm": 0.4435807764530182, "learning_rate": 1.9998279856905127e-05, "loss": 1.5708, "step": 39700 }, { "epoch": 0.02959856023083902, "grad_norm": 0.5487297773361206, "learning_rate": 1.999827117943861e-05, "loss": 1.5184, "step": 39800 }, { "epoch": 0.02967292847262505, "grad_norm": 0.8344607949256897, "learning_rate": 1.9998262480141762e-05, "loss": 1.5454, "step": 39900 }, { "epoch": 0.02974729671441108, "grad_norm": 0.8898949027061462, "learning_rate": 1.9998253759014602e-05, "loss": 1.5409, "step": 40000 }, { "epoch": 0.029821664956197105, "grad_norm": 0.897030770778656, "learning_rate": 1.9998245016057147e-05, "loss": 1.5316, "step": 40100 }, { "epoch": 0.029896033197983132, "grad_norm": 0.6615723371505737, "learning_rate": 1.999823625126942e-05, "loss": 1.6079, "step": 40200 }, { "epoch": 0.02997040143976916, "grad_norm": 0.41309353709220886, "learning_rate": 1.9998227464651438e-05, "loss": 1.5077, "step": 40300 }, { "epoch": 0.03004476968155519, "grad_norm": 0.7121081352233887, "learning_rate": 1.9998218656203218e-05, "loss": 1.5346, "step": 40400 }, { "epoch": 0.030119137923341217, "grad_norm": 0.7162127494812012, "learning_rate": 1.9998209825924784e-05, "loss": 1.5369, "step": 40500 }, { "epoch": 0.030193506165127244, "grad_norm": 0.5943055748939514, "learning_rate": 1.9998200973816152e-05, "loss": 1.5852, "step": 40600 }, { "epoch": 0.03026787440691327, "grad_norm": 0.6746940612792969, "learning_rate": 1.9998192099877344e-05, "loss": 1.5407, "step": 40700 }, { "epoch": 0.0303422426486993, "grad_norm": 0.9628979563713074, "learning_rate": 1.9998183204108375e-05, "loss": 1.4937, "step": 40800 }, { "epoch": 0.030416610890485328, "grad_norm": 0.3971594274044037, "learning_rate": 1.9998174286509267e-05, "loss": 1.5628, "step": 40900 }, { "epoch": 0.030490979132271355, "grad_norm": 0.553767204284668, "learning_rate": 1.9998165347080043e-05, "loss": 1.5182, "step": 41000 }, { "epoch": 0.030565347374057382, "grad_norm": 0.4197104573249817, "learning_rate": 1.9998156385820716e-05, "loss": 1.4853, "step": 41100 }, { "epoch": 0.03063971561584341, "grad_norm": 0.7118240594863892, "learning_rate": 1.9998147402731308e-05, "loss": 1.4737, "step": 41200 }, { "epoch": 0.03071408385762944, "grad_norm": 0.7333774566650391, "learning_rate": 1.999813839781184e-05, "loss": 1.5183, "step": 41300 }, { "epoch": 0.030788452099415466, "grad_norm": 0.509201169013977, "learning_rate": 1.9998129371062332e-05, "loss": 1.4873, "step": 41400 }, { "epoch": 0.030862820341201493, "grad_norm": 0.3249990940093994, "learning_rate": 1.9998120322482803e-05, "loss": 1.4316, "step": 41500 }, { "epoch": 0.03093718858298752, "grad_norm": 0.5361568331718445, "learning_rate": 1.9998111252073272e-05, "loss": 1.5113, "step": 41600 }, { "epoch": 0.031011556824773547, "grad_norm": 1.3092052936553955, "learning_rate": 1.9998102159833758e-05, "loss": 1.5448, "step": 41700 }, { "epoch": 0.031085925066559578, "grad_norm": 0.6276385188102722, "learning_rate": 1.999809304576428e-05, "loss": 1.6223, "step": 41800 }, { "epoch": 0.031160293308345605, "grad_norm": 0.7364848256111145, "learning_rate": 1.9998083909864863e-05, "loss": 1.4543, "step": 41900 }, { "epoch": 0.03123466155013163, "grad_norm": 0.3654361367225647, "learning_rate": 1.9998074752135523e-05, "loss": 1.6071, "step": 42000 }, { "epoch": 0.03130902979191766, "grad_norm": 1.1972397565841675, "learning_rate": 1.999806557257628e-05, "loss": 1.4909, "step": 42100 }, { "epoch": 0.031383398033703686, "grad_norm": 0.5845790505409241, "learning_rate": 1.9998056371187155e-05, "loss": 1.5687, "step": 42200 }, { "epoch": 0.03145776627548971, "grad_norm": 0.7037214636802673, "learning_rate": 1.9998047147968168e-05, "loss": 1.5561, "step": 42300 }, { "epoch": 0.03153213451727574, "grad_norm": 0.5212551951408386, "learning_rate": 1.999803790291934e-05, "loss": 1.5063, "step": 42400 }, { "epoch": 0.03160650275906177, "grad_norm": 0.6110777854919434, "learning_rate": 1.999802863604069e-05, "loss": 1.4896, "step": 42500 }, { "epoch": 0.0316808710008478, "grad_norm": 0.6877493858337402, "learning_rate": 1.999801934733224e-05, "loss": 1.5779, "step": 42600 }, { "epoch": 0.03175523924263383, "grad_norm": 0.4461131989955902, "learning_rate": 1.9998010036794005e-05, "loss": 1.5973, "step": 42700 }, { "epoch": 0.031829607484419854, "grad_norm": 1.0050228834152222, "learning_rate": 1.999800070442601e-05, "loss": 1.5149, "step": 42800 }, { "epoch": 0.03190397572620588, "grad_norm": 0.46876657009124756, "learning_rate": 1.9997991350228275e-05, "loss": 1.6122, "step": 42900 }, { "epoch": 0.03197834396799191, "grad_norm": 0.4919954240322113, "learning_rate": 1.999798197420082e-05, "loss": 1.5411, "step": 43000 }, { "epoch": 0.032052712209777935, "grad_norm": 0.9554354548454285, "learning_rate": 1.9997972576343668e-05, "loss": 1.579, "step": 43100 }, { "epoch": 0.03212708045156396, "grad_norm": 1.0671650171279907, "learning_rate": 1.9997963156656835e-05, "loss": 1.5999, "step": 43200 }, { "epoch": 0.03220144869334999, "grad_norm": 0.8465139269828796, "learning_rate": 1.999795371514034e-05, "loss": 1.5703, "step": 43300 }, { "epoch": 0.032275816935136016, "grad_norm": 0.7047709822654724, "learning_rate": 1.9997944251794212e-05, "loss": 1.5814, "step": 43400 }, { "epoch": 0.03235018517692205, "grad_norm": 0.7836155891418457, "learning_rate": 1.9997934766618465e-05, "loss": 1.5464, "step": 43500 }, { "epoch": 0.03242455341870808, "grad_norm": 0.7335034012794495, "learning_rate": 1.9997925259613124e-05, "loss": 1.5278, "step": 43600 }, { "epoch": 0.032498921660494104, "grad_norm": 0.834950864315033, "learning_rate": 1.9997915730778202e-05, "loss": 1.559, "step": 43700 }, { "epoch": 0.03257328990228013, "grad_norm": 0.7446547150611877, "learning_rate": 1.9997906180113726e-05, "loss": 1.6256, "step": 43800 }, { "epoch": 0.03264765814406616, "grad_norm": 0.5306852459907532, "learning_rate": 1.9997896607619718e-05, "loss": 1.44, "step": 43900 }, { "epoch": 0.032722026385852185, "grad_norm": 0.500023365020752, "learning_rate": 1.9997887013296196e-05, "loss": 1.5355, "step": 44000 }, { "epoch": 0.03279639462763821, "grad_norm": 0.6218491196632385, "learning_rate": 1.9997877397143182e-05, "loss": 1.5741, "step": 44100 }, { "epoch": 0.03287076286942424, "grad_norm": 0.3754362463951111, "learning_rate": 1.9997867759160696e-05, "loss": 1.6125, "step": 44200 }, { "epoch": 0.032945131111210266, "grad_norm": 0.9419918656349182, "learning_rate": 1.999785809934876e-05, "loss": 1.4781, "step": 44300 }, { "epoch": 0.0330194993529963, "grad_norm": 0.503409743309021, "learning_rate": 1.9997848417707394e-05, "loss": 1.532, "step": 44400 }, { "epoch": 0.03309386759478233, "grad_norm": 0.6554058194160461, "learning_rate": 1.999783871423662e-05, "loss": 1.5704, "step": 44500 }, { "epoch": 0.033168235836568354, "grad_norm": 0.9691445231437683, "learning_rate": 1.9997828988936462e-05, "loss": 1.5278, "step": 44600 }, { "epoch": 0.03324260407835438, "grad_norm": 0.43620389699935913, "learning_rate": 1.999781924180694e-05, "loss": 1.5436, "step": 44700 }, { "epoch": 0.03331697232014041, "grad_norm": 0.6035354137420654, "learning_rate": 1.999780947284807e-05, "loss": 1.5899, "step": 44800 }, { "epoch": 0.033391340561926434, "grad_norm": 0.3441450595855713, "learning_rate": 1.9997799682059875e-05, "loss": 1.5443, "step": 44900 }, { "epoch": 0.03346570880371246, "grad_norm": 0.5419406294822693, "learning_rate": 1.999778986944238e-05, "loss": 1.4789, "step": 45000 }, { "epoch": 0.03354007704549849, "grad_norm": 0.7912573218345642, "learning_rate": 1.9997780034995605e-05, "loss": 1.4816, "step": 45100 }, { "epoch": 0.033614445287284515, "grad_norm": 0.8978769779205322, "learning_rate": 1.9997770178719573e-05, "loss": 1.5124, "step": 45200 }, { "epoch": 0.03368881352907054, "grad_norm": 0.6722145080566406, "learning_rate": 1.99977603006143e-05, "loss": 1.5229, "step": 45300 }, { "epoch": 0.033763181770856576, "grad_norm": 0.4918314218521118, "learning_rate": 1.9997750400679815e-05, "loss": 1.562, "step": 45400 }, { "epoch": 0.0338375500126426, "grad_norm": 0.9343436360359192, "learning_rate": 1.9997740478916138e-05, "loss": 1.6147, "step": 45500 }, { "epoch": 0.03391191825442863, "grad_norm": 1.0771671533584595, "learning_rate": 1.9997730535323287e-05, "loss": 1.4441, "step": 45600 }, { "epoch": 0.03398628649621466, "grad_norm": 0.666222095489502, "learning_rate": 1.999772056990128e-05, "loss": 1.536, "step": 45700 }, { "epoch": 0.034060654738000684, "grad_norm": 0.5621564388275146, "learning_rate": 1.9997710582650153e-05, "loss": 1.5652, "step": 45800 }, { "epoch": 0.03413502297978671, "grad_norm": 0.6601430177688599, "learning_rate": 1.9997700573569912e-05, "loss": 1.5309, "step": 45900 }, { "epoch": 0.03420939122157274, "grad_norm": 0.7411925792694092, "learning_rate": 1.9997690542660585e-05, "loss": 1.4918, "step": 46000 }, { "epoch": 0.034283759463358765, "grad_norm": 0.5674101710319519, "learning_rate": 1.99976804899222e-05, "loss": 1.5383, "step": 46100 }, { "epoch": 0.03435812770514479, "grad_norm": 0.8503201007843018, "learning_rate": 1.999767041535477e-05, "loss": 1.5298, "step": 46200 }, { "epoch": 0.034432495946930826, "grad_norm": 0.8432891368865967, "learning_rate": 1.999766031895832e-05, "loss": 1.5419, "step": 46300 }, { "epoch": 0.03450686418871685, "grad_norm": 0.41764137148857117, "learning_rate": 1.9997650200732876e-05, "loss": 1.5313, "step": 46400 }, { "epoch": 0.03458123243050288, "grad_norm": 1.0963222980499268, "learning_rate": 1.9997640060678455e-05, "loss": 1.5593, "step": 46500 }, { "epoch": 0.03465560067228891, "grad_norm": 0.5194404721260071, "learning_rate": 1.9997629898795082e-05, "loss": 1.6352, "step": 46600 }, { "epoch": 0.034729968914074934, "grad_norm": 0.6641316413879395, "learning_rate": 1.9997619715082777e-05, "loss": 1.552, "step": 46700 }, { "epoch": 0.03480433715586096, "grad_norm": 1.1054824590682983, "learning_rate": 1.999760950954156e-05, "loss": 1.5176, "step": 46800 }, { "epoch": 0.03487870539764699, "grad_norm": 0.44163691997528076, "learning_rate": 1.9997599282171466e-05, "loss": 1.5985, "step": 46900 }, { "epoch": 0.034953073639433015, "grad_norm": 0.8304015398025513, "learning_rate": 1.99975890329725e-05, "loss": 1.554, "step": 47000 }, { "epoch": 0.03502744188121904, "grad_norm": 0.8395280838012695, "learning_rate": 1.9997578761944693e-05, "loss": 1.6185, "step": 47100 }, { "epoch": 0.03510181012300507, "grad_norm": 0.8857927322387695, "learning_rate": 1.9997568469088068e-05, "loss": 1.5108, "step": 47200 }, { "epoch": 0.0351761783647911, "grad_norm": 0.6471524834632874, "learning_rate": 1.999755815440265e-05, "loss": 1.4693, "step": 47300 }, { "epoch": 0.03525054660657713, "grad_norm": 0.6664785146713257, "learning_rate": 1.9997547817888453e-05, "loss": 1.5391, "step": 47400 }, { "epoch": 0.035324914848363156, "grad_norm": 0.4979814887046814, "learning_rate": 1.9997537459545505e-05, "loss": 1.5367, "step": 47500 }, { "epoch": 0.03539928309014918, "grad_norm": 0.5753507614135742, "learning_rate": 1.9997527079373828e-05, "loss": 1.547, "step": 47600 }, { "epoch": 0.03547365133193521, "grad_norm": 0.5349861979484558, "learning_rate": 1.9997516677373444e-05, "loss": 1.5317, "step": 47700 }, { "epoch": 0.03554801957372124, "grad_norm": 0.49024057388305664, "learning_rate": 1.9997506253544377e-05, "loss": 1.4412, "step": 47800 }, { "epoch": 0.035622387815507264, "grad_norm": 0.4172305166721344, "learning_rate": 1.9997495807886648e-05, "loss": 1.4767, "step": 47900 }, { "epoch": 0.03569675605729329, "grad_norm": 0.8609969615936279, "learning_rate": 1.9997485340400283e-05, "loss": 1.5191, "step": 48000 }, { "epoch": 0.03577112429907932, "grad_norm": 0.46808651089668274, "learning_rate": 1.9997474851085304e-05, "loss": 1.5835, "step": 48100 }, { "epoch": 0.03584549254086535, "grad_norm": 1.0232137441635132, "learning_rate": 1.999746433994173e-05, "loss": 1.5322, "step": 48200 }, { "epoch": 0.03591986078265138, "grad_norm": 0.6745514273643494, "learning_rate": 1.9997453806969588e-05, "loss": 1.5706, "step": 48300 }, { "epoch": 0.035994229024437406, "grad_norm": 1.114139437675476, "learning_rate": 1.99974432521689e-05, "loss": 1.5895, "step": 48400 }, { "epoch": 0.03606859726622343, "grad_norm": 0.7316710948944092, "learning_rate": 1.9997432675539686e-05, "loss": 1.4594, "step": 48500 }, { "epoch": 0.03614296550800946, "grad_norm": 0.4518311023712158, "learning_rate": 1.9997422077081973e-05, "loss": 1.5374, "step": 48600 }, { "epoch": 0.03621733374979549, "grad_norm": 0.6804157495498657, "learning_rate": 1.999741145679578e-05, "loss": 1.5915, "step": 48700 }, { "epoch": 0.036291701991581514, "grad_norm": 0.4367609918117523, "learning_rate": 1.999740081468114e-05, "loss": 1.5092, "step": 48800 }, { "epoch": 0.03636607023336754, "grad_norm": 0.7415800094604492, "learning_rate": 1.9997390150738063e-05, "loss": 1.4548, "step": 48900 }, { "epoch": 0.03644043847515357, "grad_norm": 0.4981917440891266, "learning_rate": 1.999737946496658e-05, "loss": 1.53, "step": 49000 }, { "epoch": 0.036514806716939595, "grad_norm": 0.42718860507011414, "learning_rate": 1.9997368757366712e-05, "loss": 1.4911, "step": 49100 }, { "epoch": 0.03658917495872563, "grad_norm": 0.5294268131256104, "learning_rate": 1.999735802793849e-05, "loss": 1.5203, "step": 49200 }, { "epoch": 0.036663543200511656, "grad_norm": 0.5844396948814392, "learning_rate": 1.999734727668192e-05, "loss": 1.5442, "step": 49300 }, { "epoch": 0.03673791144229768, "grad_norm": 0.7835099697113037, "learning_rate": 1.9997336503597043e-05, "loss": 1.4786, "step": 49400 }, { "epoch": 0.03681227968408371, "grad_norm": 0.6045675873756409, "learning_rate": 1.9997325708683875e-05, "loss": 1.5386, "step": 49500 }, { "epoch": 0.03688664792586974, "grad_norm": 0.651782751083374, "learning_rate": 1.9997314891942442e-05, "loss": 1.4999, "step": 49600 }, { "epoch": 0.036961016167655764, "grad_norm": 0.45741456747055054, "learning_rate": 1.9997304053372762e-05, "loss": 1.4946, "step": 49700 }, { "epoch": 0.03703538440944179, "grad_norm": 0.5363433957099915, "learning_rate": 1.999729319297486e-05, "loss": 1.5421, "step": 49800 }, { "epoch": 0.03710975265122782, "grad_norm": 0.45601820945739746, "learning_rate": 1.9997282310748768e-05, "loss": 1.5149, "step": 49900 }, { "epoch": 0.037184120893013844, "grad_norm": 0.6308805346488953, "learning_rate": 1.9997271406694504e-05, "loss": 1.5464, "step": 50000 }, { "epoch": 0.03725848913479988, "grad_norm": 0.6946158409118652, "learning_rate": 1.999726048081209e-05, "loss": 1.5865, "step": 50100 }, { "epoch": 0.037332857376585905, "grad_norm": 0.4899694323539734, "learning_rate": 1.9997249533101554e-05, "loss": 1.5839, "step": 50200 }, { "epoch": 0.03740722561837193, "grad_norm": 0.7726057767868042, "learning_rate": 1.9997238563562912e-05, "loss": 1.4571, "step": 50300 }, { "epoch": 0.03748159386015796, "grad_norm": 0.9151437878608704, "learning_rate": 1.9997227572196197e-05, "loss": 1.4459, "step": 50400 }, { "epoch": 0.037555962101943986, "grad_norm": 0.6182152032852173, "learning_rate": 1.9997216559001433e-05, "loss": 1.4586, "step": 50500 }, { "epoch": 0.03763033034373001, "grad_norm": 0.7595764398574829, "learning_rate": 1.9997205523978636e-05, "loss": 1.5382, "step": 50600 }, { "epoch": 0.03770469858551604, "grad_norm": 0.8368933796882629, "learning_rate": 1.9997194467127838e-05, "loss": 1.636, "step": 50700 }, { "epoch": 0.03777906682730207, "grad_norm": 1.3371498584747314, "learning_rate": 1.9997183388449055e-05, "loss": 1.5225, "step": 50800 }, { "epoch": 0.037853435069088094, "grad_norm": 0.6440578103065491, "learning_rate": 1.999717228794232e-05, "loss": 1.5165, "step": 50900 }, { "epoch": 0.03792780331087412, "grad_norm": 0.5950276255607605, "learning_rate": 1.999716116560765e-05, "loss": 1.552, "step": 51000 }, { "epoch": 0.038002171552660155, "grad_norm": 0.7176305651664734, "learning_rate": 1.9997150021445074e-05, "loss": 1.5019, "step": 51100 }, { "epoch": 0.03807653979444618, "grad_norm": 0.7437789440155029, "learning_rate": 1.999713885545462e-05, "loss": 1.5632, "step": 51200 }, { "epoch": 0.03815090803623221, "grad_norm": 0.48256799578666687, "learning_rate": 1.9997127667636298e-05, "loss": 1.4943, "step": 51300 }, { "epoch": 0.038225276278018236, "grad_norm": 0.8726604580879211, "learning_rate": 1.9997116457990148e-05, "loss": 1.6079, "step": 51400 }, { "epoch": 0.03829964451980426, "grad_norm": 0.6188727617263794, "learning_rate": 1.9997105226516186e-05, "loss": 1.5845, "step": 51500 }, { "epoch": 0.03837401276159029, "grad_norm": 0.5416398048400879, "learning_rate": 1.9997093973214442e-05, "loss": 1.5297, "step": 51600 }, { "epoch": 0.03844838100337632, "grad_norm": 0.8704063296318054, "learning_rate": 1.999708269808493e-05, "loss": 1.5324, "step": 51700 }, { "epoch": 0.038522749245162344, "grad_norm": 0.9990126490592957, "learning_rate": 1.9997071401127688e-05, "loss": 1.5435, "step": 51800 }, { "epoch": 0.03859711748694837, "grad_norm": 0.402971476316452, "learning_rate": 1.9997060082342732e-05, "loss": 1.5755, "step": 51900 }, { "epoch": 0.038671485728734405, "grad_norm": 0.7084416747093201, "learning_rate": 1.9997048741730092e-05, "loss": 1.5153, "step": 52000 }, { "epoch": 0.03874585397052043, "grad_norm": 0.5382058620452881, "learning_rate": 1.9997037379289786e-05, "loss": 1.5378, "step": 52100 }, { "epoch": 0.03882022221230646, "grad_norm": 0.5846664905548096, "learning_rate": 1.9997025995021845e-05, "loss": 1.5118, "step": 52200 }, { "epoch": 0.038894590454092486, "grad_norm": 0.6125427484512329, "learning_rate": 1.999701458892629e-05, "loss": 1.5567, "step": 52300 }, { "epoch": 0.03896895869587851, "grad_norm": 0.4352121949195862, "learning_rate": 1.999700316100315e-05, "loss": 1.5665, "step": 52400 }, { "epoch": 0.03904332693766454, "grad_norm": 0.4972393810749054, "learning_rate": 1.9996991711252448e-05, "loss": 1.5728, "step": 52500 }, { "epoch": 0.039117695179450566, "grad_norm": 7.361449241638184, "learning_rate": 1.9996980239674207e-05, "loss": 1.4814, "step": 52600 }, { "epoch": 0.03919206342123659, "grad_norm": 0.4652218818664551, "learning_rate": 1.9996968746268452e-05, "loss": 1.5553, "step": 52700 }, { "epoch": 0.03926643166302262, "grad_norm": 0.7898038029670715, "learning_rate": 1.9996957231035213e-05, "loss": 1.5251, "step": 52800 }, { "epoch": 0.03934079990480865, "grad_norm": 0.5735042095184326, "learning_rate": 1.999694569397451e-05, "loss": 1.474, "step": 52900 }, { "epoch": 0.03941516814659468, "grad_norm": 0.6072685122489929, "learning_rate": 1.9996934135086367e-05, "loss": 1.6186, "step": 53000 }, { "epoch": 0.03948953638838071, "grad_norm": 0.5961938500404358, "learning_rate": 1.9996922554370818e-05, "loss": 1.5622, "step": 53100 }, { "epoch": 0.039563904630166735, "grad_norm": 0.6281226277351379, "learning_rate": 1.999691095182788e-05, "loss": 1.5508, "step": 53200 }, { "epoch": 0.03963827287195276, "grad_norm": 0.9202520847320557, "learning_rate": 1.9996899327457576e-05, "loss": 1.5623, "step": 53300 }, { "epoch": 0.03971264111373879, "grad_norm": 0.4792959988117218, "learning_rate": 1.9996887681259946e-05, "loss": 1.5363, "step": 53400 }, { "epoch": 0.039787009355524816, "grad_norm": 0.4955751299858093, "learning_rate": 1.9996876013234997e-05, "loss": 1.5026, "step": 53500 }, { "epoch": 0.03986137759731084, "grad_norm": 1.114710807800293, "learning_rate": 1.9996864323382766e-05, "loss": 1.5397, "step": 53600 }, { "epoch": 0.03993574583909687, "grad_norm": 0.5231966972351074, "learning_rate": 1.9996852611703278e-05, "loss": 1.4971, "step": 53700 }, { "epoch": 0.0400101140808829, "grad_norm": 0.8232663869857788, "learning_rate": 1.9996840878196554e-05, "loss": 1.5054, "step": 53800 }, { "epoch": 0.04008448232266893, "grad_norm": 0.5382178425788879, "learning_rate": 1.999682912286262e-05, "loss": 1.47, "step": 53900 }, { "epoch": 0.04015885056445496, "grad_norm": 0.8355742692947388, "learning_rate": 1.999681734570151e-05, "loss": 1.3635, "step": 54000 }, { "epoch": 0.040233218806240985, "grad_norm": 0.715268611907959, "learning_rate": 1.9996805546713237e-05, "loss": 1.5606, "step": 54100 }, { "epoch": 0.04030758704802701, "grad_norm": 0.9210833311080933, "learning_rate": 1.9996793725897836e-05, "loss": 1.5702, "step": 54200 }, { "epoch": 0.04038195528981304, "grad_norm": 0.5435687899589539, "learning_rate": 1.9996781883255328e-05, "loss": 1.5468, "step": 54300 }, { "epoch": 0.040456323531599066, "grad_norm": 0.517410159111023, "learning_rate": 1.9996770018785743e-05, "loss": 1.597, "step": 54400 }, { "epoch": 0.04053069177338509, "grad_norm": 0.5302937030792236, "learning_rate": 1.9996758132489102e-05, "loss": 1.4796, "step": 54500 }, { "epoch": 0.04060506001517112, "grad_norm": 0.8928558230400085, "learning_rate": 1.9996746224365435e-05, "loss": 1.5311, "step": 54600 }, { "epoch": 0.04067942825695715, "grad_norm": 0.9816573262214661, "learning_rate": 1.9996734294414765e-05, "loss": 1.5409, "step": 54700 }, { "epoch": 0.040753796498743174, "grad_norm": 0.5399038791656494, "learning_rate": 1.999672234263712e-05, "loss": 1.4705, "step": 54800 }, { "epoch": 0.04082816474052921, "grad_norm": 0.815388560295105, "learning_rate": 1.9996710369032528e-05, "loss": 1.5708, "step": 54900 }, { "epoch": 0.040902532982315234, "grad_norm": 0.765061616897583, "learning_rate": 1.999669837360101e-05, "loss": 1.4828, "step": 55000 }, { "epoch": 0.04097690122410126, "grad_norm": 0.8168923258781433, "learning_rate": 1.99966863563426e-05, "loss": 1.5167, "step": 55100 }, { "epoch": 0.04105126946588729, "grad_norm": 0.6376170516014099, "learning_rate": 1.9996674317257315e-05, "loss": 1.5286, "step": 55200 }, { "epoch": 0.041125637707673315, "grad_norm": 0.7510162591934204, "learning_rate": 1.9996662256345184e-05, "loss": 1.5737, "step": 55300 }, { "epoch": 0.04120000594945934, "grad_norm": 0.5505034327507019, "learning_rate": 1.9996650173606234e-05, "loss": 1.5388, "step": 55400 }, { "epoch": 0.04127437419124537, "grad_norm": 0.49698886275291443, "learning_rate": 1.99966380690405e-05, "loss": 1.5832, "step": 55500 }, { "epoch": 0.041348742433031396, "grad_norm": 0.5520877242088318, "learning_rate": 1.9996625942647994e-05, "loss": 1.5643, "step": 55600 }, { "epoch": 0.04142311067481742, "grad_norm": 0.6185612082481384, "learning_rate": 1.999661379442875e-05, "loss": 1.5087, "step": 55700 }, { "epoch": 0.04149747891660345, "grad_norm": 0.8302799463272095, "learning_rate": 1.9996601624382795e-05, "loss": 1.6283, "step": 55800 }, { "epoch": 0.041571847158389484, "grad_norm": 0.9720719456672668, "learning_rate": 1.9996589432510155e-05, "loss": 1.5064, "step": 55900 }, { "epoch": 0.04164621540017551, "grad_norm": 0.40555909276008606, "learning_rate": 1.9996577218810855e-05, "loss": 1.5138, "step": 56000 }, { "epoch": 0.04172058364196154, "grad_norm": 0.9815244674682617, "learning_rate": 1.9996564983284918e-05, "loss": 1.4913, "step": 56100 }, { "epoch": 0.041794951883747565, "grad_norm": 1.1608703136444092, "learning_rate": 1.9996552725932382e-05, "loss": 1.4939, "step": 56200 }, { "epoch": 0.04186932012553359, "grad_norm": 0.38927561044692993, "learning_rate": 1.9996540446753264e-05, "loss": 1.5115, "step": 56300 }, { "epoch": 0.04194368836731962, "grad_norm": 0.7470927834510803, "learning_rate": 1.9996528145747594e-05, "loss": 1.5539, "step": 56400 }, { "epoch": 0.042018056609105646, "grad_norm": 0.46110135316848755, "learning_rate": 1.99965158229154e-05, "loss": 1.4602, "step": 56500 }, { "epoch": 0.04209242485089167, "grad_norm": 0.5916282534599304, "learning_rate": 1.9996503478256705e-05, "loss": 1.5721, "step": 56600 }, { "epoch": 0.0421667930926777, "grad_norm": 0.5567501187324524, "learning_rate": 1.999649111177154e-05, "loss": 1.482, "step": 56700 }, { "epoch": 0.042241161334463734, "grad_norm": 0.8147956728935242, "learning_rate": 1.9996478723459928e-05, "loss": 1.5518, "step": 56800 }, { "epoch": 0.04231552957624976, "grad_norm": 1.0146034955978394, "learning_rate": 1.9996466313321906e-05, "loss": 1.52, "step": 56900 }, { "epoch": 0.04238989781803579, "grad_norm": 0.267634779214859, "learning_rate": 1.9996453881357486e-05, "loss": 1.598, "step": 57000 }, { "epoch": 0.042464266059821815, "grad_norm": 0.7316186428070068, "learning_rate": 1.9996441427566707e-05, "loss": 1.5306, "step": 57100 }, { "epoch": 0.04253863430160784, "grad_norm": 1.078633189201355, "learning_rate": 1.999642895194959e-05, "loss": 1.4847, "step": 57200 }, { "epoch": 0.04261300254339387, "grad_norm": 0.5316028594970703, "learning_rate": 1.9996416454506164e-05, "loss": 1.5928, "step": 57300 }, { "epoch": 0.042687370785179896, "grad_norm": 0.573113739490509, "learning_rate": 1.999640393523646e-05, "loss": 1.5609, "step": 57400 }, { "epoch": 0.04276173902696592, "grad_norm": 0.45106610655784607, "learning_rate": 1.9996391394140496e-05, "loss": 1.5693, "step": 57500 }, { "epoch": 0.04283610726875195, "grad_norm": 0.6011554002761841, "learning_rate": 1.9996378831218307e-05, "loss": 1.5968, "step": 57600 }, { "epoch": 0.042910475510537976, "grad_norm": 0.7017727494239807, "learning_rate": 1.9996366246469922e-05, "loss": 1.4973, "step": 57700 }, { "epoch": 0.04298484375232401, "grad_norm": 0.8994572758674622, "learning_rate": 1.9996353639895365e-05, "loss": 1.4624, "step": 57800 }, { "epoch": 0.04305921199411004, "grad_norm": 0.8928848505020142, "learning_rate": 1.9996341011494663e-05, "loss": 1.5755, "step": 57900 }, { "epoch": 0.043133580235896064, "grad_norm": 0.7762150168418884, "learning_rate": 1.999632836126784e-05, "loss": 1.4074, "step": 58000 }, { "epoch": 0.04320794847768209, "grad_norm": 0.5097442865371704, "learning_rate": 1.9996315689214932e-05, "loss": 1.5281, "step": 58100 }, { "epoch": 0.04328231671946812, "grad_norm": 0.803718626499176, "learning_rate": 1.999630299533596e-05, "loss": 1.499, "step": 58200 }, { "epoch": 0.043356684961254145, "grad_norm": 0.5989664196968079, "learning_rate": 1.9996290279630956e-05, "loss": 1.5286, "step": 58300 }, { "epoch": 0.04343105320304017, "grad_norm": 0.45334750413894653, "learning_rate": 1.999627754209995e-05, "loss": 1.5598, "step": 58400 }, { "epoch": 0.0435054214448262, "grad_norm": 0.9461644887924194, "learning_rate": 1.999626478274296e-05, "loss": 1.4569, "step": 58500 }, { "epoch": 0.043579789686612226, "grad_norm": 0.5558738112449646, "learning_rate": 1.999625200156002e-05, "loss": 1.5329, "step": 58600 }, { "epoch": 0.04365415792839826, "grad_norm": 0.49125516414642334, "learning_rate": 1.999623919855116e-05, "loss": 1.4805, "step": 58700 }, { "epoch": 0.04372852617018429, "grad_norm": 0.6038479208946228, "learning_rate": 1.9996226373716406e-05, "loss": 1.5589, "step": 58800 }, { "epoch": 0.043802894411970314, "grad_norm": 0.4560091197490692, "learning_rate": 1.9996213527055784e-05, "loss": 1.4538, "step": 58900 }, { "epoch": 0.04387726265375634, "grad_norm": 0.6255136728286743, "learning_rate": 1.9996200658569323e-05, "loss": 1.5959, "step": 59000 }, { "epoch": 0.04395163089554237, "grad_norm": 0.8603237867355347, "learning_rate": 1.999618776825705e-05, "loss": 1.4769, "step": 59100 }, { "epoch": 0.044025999137328395, "grad_norm": 1.027685523033142, "learning_rate": 1.9996174856119e-05, "loss": 1.485, "step": 59200 }, { "epoch": 0.04410036737911442, "grad_norm": 0.6371426582336426, "learning_rate": 1.999616192215519e-05, "loss": 1.5713, "step": 59300 }, { "epoch": 0.04417473562090045, "grad_norm": 0.8155677318572998, "learning_rate": 1.9996148966365664e-05, "loss": 1.5755, "step": 59400 }, { "epoch": 0.044249103862686476, "grad_norm": 0.9418515563011169, "learning_rate": 1.9996135988750432e-05, "loss": 1.51, "step": 59500 }, { "epoch": 0.0443234721044725, "grad_norm": 0.529082179069519, "learning_rate": 1.9996122989309536e-05, "loss": 1.5254, "step": 59600 }, { "epoch": 0.04439784034625854, "grad_norm": 0.5595930218696594, "learning_rate": 1.9996109968042992e-05, "loss": 1.5515, "step": 59700 }, { "epoch": 0.044472208588044564, "grad_norm": 0.8503856062889099, "learning_rate": 1.9996096924950843e-05, "loss": 1.5123, "step": 59800 }, { "epoch": 0.04454657682983059, "grad_norm": 0.6979494690895081, "learning_rate": 1.9996083860033107e-05, "loss": 1.5213, "step": 59900 }, { "epoch": 0.04462094507161662, "grad_norm": 0.5807011723518372, "learning_rate": 1.9996070773289816e-05, "loss": 1.5411, "step": 60000 }, { "epoch": 0.044695313313402645, "grad_norm": 0.6768651604652405, "learning_rate": 1.9996057664721e-05, "loss": 1.5252, "step": 60100 }, { "epoch": 0.04476968155518867, "grad_norm": 0.3594638407230377, "learning_rate": 1.9996044534326682e-05, "loss": 1.5126, "step": 60200 }, { "epoch": 0.0448440497969747, "grad_norm": 0.4025649130344391, "learning_rate": 1.9996031382106897e-05, "loss": 1.561, "step": 60300 }, { "epoch": 0.044918418038760725, "grad_norm": 0.8125213980674744, "learning_rate": 1.9996018208061675e-05, "loss": 1.5445, "step": 60400 }, { "epoch": 0.04499278628054675, "grad_norm": 0.5969058275222778, "learning_rate": 1.9996005012191037e-05, "loss": 1.582, "step": 60500 }, { "epoch": 0.045067154522332786, "grad_norm": 1.1144986152648926, "learning_rate": 1.9995991794495016e-05, "loss": 1.5563, "step": 60600 }, { "epoch": 0.04514152276411881, "grad_norm": 0.8091686367988586, "learning_rate": 1.999597855497364e-05, "loss": 1.5199, "step": 60700 }, { "epoch": 0.04521589100590484, "grad_norm": 1.3050564527511597, "learning_rate": 1.999596529362694e-05, "loss": 1.5034, "step": 60800 }, { "epoch": 0.04529025924769087, "grad_norm": 0.5470508933067322, "learning_rate": 1.9995952010454943e-05, "loss": 1.5684, "step": 60900 }, { "epoch": 0.045364627489476894, "grad_norm": 0.9612744450569153, "learning_rate": 1.9995938705457682e-05, "loss": 1.6064, "step": 61000 }, { "epoch": 0.04543899573126292, "grad_norm": 0.9011774659156799, "learning_rate": 1.9995925378635177e-05, "loss": 1.4553, "step": 61100 }, { "epoch": 0.04551336397304895, "grad_norm": 1.70448637008667, "learning_rate": 1.9995912029987466e-05, "loss": 1.4507, "step": 61200 }, { "epoch": 0.045587732214834975, "grad_norm": 1.7321926355361938, "learning_rate": 1.999589865951457e-05, "loss": 1.5071, "step": 61300 }, { "epoch": 0.045662100456621, "grad_norm": 0.5415388941764832, "learning_rate": 1.999588526721653e-05, "loss": 1.5518, "step": 61400 }, { "epoch": 0.04573646869840703, "grad_norm": 0.8833714127540588, "learning_rate": 1.9995871853093366e-05, "loss": 1.5299, "step": 61500 }, { "epoch": 0.04581083694019306, "grad_norm": 0.49804380536079407, "learning_rate": 1.999585841714511e-05, "loss": 1.5215, "step": 61600 }, { "epoch": 0.04588520518197909, "grad_norm": 1.3999980688095093, "learning_rate": 1.999584495937179e-05, "loss": 1.5028, "step": 61700 }, { "epoch": 0.04595957342376512, "grad_norm": 1.1679743528366089, "learning_rate": 1.9995831479773438e-05, "loss": 1.4767, "step": 61800 }, { "epoch": 0.046033941665551144, "grad_norm": 0.7388249635696411, "learning_rate": 1.999581797835008e-05, "loss": 1.558, "step": 61900 }, { "epoch": 0.04610830990733717, "grad_norm": 0.6812136769294739, "learning_rate": 1.9995804455101746e-05, "loss": 1.4495, "step": 62000 }, { "epoch": 0.0461826781491232, "grad_norm": 1.3702300786972046, "learning_rate": 1.999579091002847e-05, "loss": 1.4212, "step": 62100 }, { "epoch": 0.046257046390909225, "grad_norm": 0.42544421553611755, "learning_rate": 1.999577734313028e-05, "loss": 1.5603, "step": 62200 }, { "epoch": 0.04633141463269525, "grad_norm": 0.6235955357551575, "learning_rate": 1.99957637544072e-05, "loss": 1.5164, "step": 62300 }, { "epoch": 0.04640578287448128, "grad_norm": 0.30019888281822205, "learning_rate": 1.9995750143859262e-05, "loss": 1.4764, "step": 62400 }, { "epoch": 0.04648015111626731, "grad_norm": 0.509626567363739, "learning_rate": 1.99957365114865e-05, "loss": 1.535, "step": 62500 }, { "epoch": 0.04655451935805334, "grad_norm": 0.726915717124939, "learning_rate": 1.9995722857288943e-05, "loss": 1.5428, "step": 62600 }, { "epoch": 0.046628887599839366, "grad_norm": 0.5223472714424133, "learning_rate": 1.9995709181266613e-05, "loss": 1.548, "step": 62700 }, { "epoch": 0.04670325584162539, "grad_norm": 0.5914735794067383, "learning_rate": 1.9995695483419554e-05, "loss": 1.5433, "step": 62800 }, { "epoch": 0.04677762408341142, "grad_norm": 1.1892948150634766, "learning_rate": 1.999568176374778e-05, "loss": 1.5964, "step": 62900 }, { "epoch": 0.04685199232519745, "grad_norm": 0.47329986095428467, "learning_rate": 1.9995668022251333e-05, "loss": 1.4587, "step": 63000 }, { "epoch": 0.046926360566983474, "grad_norm": 0.7776244878768921, "learning_rate": 1.9995654258930237e-05, "loss": 1.5118, "step": 63100 }, { "epoch": 0.0470007288087695, "grad_norm": 0.4600290358066559, "learning_rate": 1.9995640473784526e-05, "loss": 1.5327, "step": 63200 }, { "epoch": 0.04707509705055553, "grad_norm": 0.785589873790741, "learning_rate": 1.9995626666814226e-05, "loss": 1.5346, "step": 63300 }, { "epoch": 0.047149465292341555, "grad_norm": 0.34471455216407776, "learning_rate": 1.999561283801937e-05, "loss": 1.5669, "step": 63400 }, { "epoch": 0.04722383353412759, "grad_norm": 0.8968401551246643, "learning_rate": 1.9995598987399988e-05, "loss": 1.4522, "step": 63500 }, { "epoch": 0.047298201775913616, "grad_norm": 0.5577977895736694, "learning_rate": 1.9995585114956104e-05, "loss": 1.5894, "step": 63600 }, { "epoch": 0.04737257001769964, "grad_norm": 0.8406354188919067, "learning_rate": 1.999557122068776e-05, "loss": 1.5585, "step": 63700 }, { "epoch": 0.04744693825948567, "grad_norm": 0.6812056303024292, "learning_rate": 1.9995557304594977e-05, "loss": 1.5531, "step": 63800 }, { "epoch": 0.0475213065012717, "grad_norm": 0.6341506242752075, "learning_rate": 1.999554336667779e-05, "loss": 1.5064, "step": 63900 }, { "epoch": 0.047595674743057724, "grad_norm": 0.7291605472564697, "learning_rate": 1.999552940693623e-05, "loss": 1.4924, "step": 64000 }, { "epoch": 0.04767004298484375, "grad_norm": 0.5496443510055542, "learning_rate": 1.9995515425370317e-05, "loss": 1.5276, "step": 64100 }, { "epoch": 0.04774441122662978, "grad_norm": 0.49453896284103394, "learning_rate": 1.9995501421980096e-05, "loss": 1.4673, "step": 64200 }, { "epoch": 0.047818779468415805, "grad_norm": 0.5134396553039551, "learning_rate": 1.999548739676559e-05, "loss": 1.5857, "step": 64300 }, { "epoch": 0.04789314771020184, "grad_norm": 1.035983681678772, "learning_rate": 1.9995473349726834e-05, "loss": 1.4617, "step": 64400 }, { "epoch": 0.047967515951987866, "grad_norm": 0.4110111594200134, "learning_rate": 1.999545928086385e-05, "loss": 1.599, "step": 64500 }, { "epoch": 0.04804188419377389, "grad_norm": 0.6466584205627441, "learning_rate": 1.999544519017668e-05, "loss": 1.5212, "step": 64600 }, { "epoch": 0.04811625243555992, "grad_norm": 0.501596212387085, "learning_rate": 1.9995431077665345e-05, "loss": 1.5215, "step": 64700 }, { "epoch": 0.04819062067734595, "grad_norm": 0.547459065914154, "learning_rate": 1.9995416943329882e-05, "loss": 1.5414, "step": 64800 }, { "epoch": 0.048264988919131974, "grad_norm": 0.9374314546585083, "learning_rate": 1.999540278717032e-05, "loss": 1.5104, "step": 64900 }, { "epoch": 0.048339357160918, "grad_norm": 0.5237802267074585, "learning_rate": 1.999538860918669e-05, "loss": 1.5065, "step": 65000 }, { "epoch": 0.04841372540270403, "grad_norm": 0.534058690071106, "learning_rate": 1.9995374409379023e-05, "loss": 1.5237, "step": 65100 }, { "epoch": 0.048488093644490055, "grad_norm": 0.5253255367279053, "learning_rate": 1.999536018774735e-05, "loss": 1.5591, "step": 65200 }, { "epoch": 0.04856246188627608, "grad_norm": 0.6362668871879578, "learning_rate": 1.99953459442917e-05, "loss": 1.5155, "step": 65300 }, { "epoch": 0.048636830128062115, "grad_norm": 0.4244192838668823, "learning_rate": 1.999533167901211e-05, "loss": 1.5238, "step": 65400 }, { "epoch": 0.04871119836984814, "grad_norm": 0.7062031030654907, "learning_rate": 1.99953173919086e-05, "loss": 1.637, "step": 65500 }, { "epoch": 0.04878556661163417, "grad_norm": 0.5232000946998596, "learning_rate": 1.9995303082981215e-05, "loss": 1.4824, "step": 65600 }, { "epoch": 0.048859934853420196, "grad_norm": 0.6280112862586975, "learning_rate": 1.9995288752229976e-05, "loss": 1.5882, "step": 65700 }, { "epoch": 0.04893430309520622, "grad_norm": 1.1615891456604004, "learning_rate": 1.999527439965492e-05, "loss": 1.4839, "step": 65800 }, { "epoch": 0.04900867133699225, "grad_norm": 0.6920228600502014, "learning_rate": 1.9995260025256075e-05, "loss": 1.5071, "step": 65900 }, { "epoch": 0.04908303957877828, "grad_norm": 0.7031546235084534, "learning_rate": 1.999524562903347e-05, "loss": 1.4983, "step": 66000 }, { "epoch": 0.049157407820564304, "grad_norm": 0.4306289553642273, "learning_rate": 1.999523121098714e-05, "loss": 1.519, "step": 66100 }, { "epoch": 0.04923177606235033, "grad_norm": 0.533328652381897, "learning_rate": 1.9995216771117123e-05, "loss": 1.5628, "step": 66200 }, { "epoch": 0.049306144304136365, "grad_norm": 0.6325706839561462, "learning_rate": 1.999520230942344e-05, "loss": 1.4369, "step": 66300 }, { "epoch": 0.04938051254592239, "grad_norm": 0.43968090415000916, "learning_rate": 1.9995187825906125e-05, "loss": 1.4506, "step": 66400 }, { "epoch": 0.04945488078770842, "grad_norm": 1.3659377098083496, "learning_rate": 1.9995173320565217e-05, "loss": 1.4786, "step": 66500 }, { "epoch": 0.049529249029494446, "grad_norm": 0.7602143883705139, "learning_rate": 1.9995158793400735e-05, "loss": 1.5922, "step": 66600 }, { "epoch": 0.04960361727128047, "grad_norm": 0.4866381287574768, "learning_rate": 1.999514424441272e-05, "loss": 1.5279, "step": 66700 }, { "epoch": 0.0496779855130665, "grad_norm": 0.9451634287834167, "learning_rate": 1.9995129673601203e-05, "loss": 1.5125, "step": 66800 }, { "epoch": 0.04975235375485253, "grad_norm": 0.49570247530937195, "learning_rate": 1.999511508096621e-05, "loss": 1.556, "step": 66900 }, { "epoch": 0.049826721996638554, "grad_norm": 0.5554278492927551, "learning_rate": 1.999510046650778e-05, "loss": 1.5644, "step": 67000 }, { "epoch": 0.04990109023842458, "grad_norm": 1.070966124534607, "learning_rate": 1.9995085830225943e-05, "loss": 1.4394, "step": 67100 }, { "epoch": 0.04997545848021061, "grad_norm": 0.47984740138053894, "learning_rate": 1.999507117212073e-05, "loss": 1.5499, "step": 67200 }, { "epoch": 0.05004982672199664, "grad_norm": 0.6557255983352661, "learning_rate": 1.999505649219217e-05, "loss": 1.5894, "step": 67300 }, { "epoch": 0.05012419496378267, "grad_norm": 0.4384523332118988, "learning_rate": 1.99950417904403e-05, "loss": 1.5454, "step": 67400 }, { "epoch": 0.050198563205568696, "grad_norm": 1.0821648836135864, "learning_rate": 1.9995027066865148e-05, "loss": 1.5872, "step": 67500 }, { "epoch": 0.05027293144735472, "grad_norm": 0.8576905727386475, "learning_rate": 1.9995012321466747e-05, "loss": 1.5024, "step": 67600 }, { "epoch": 0.05034729968914075, "grad_norm": 0.7904402613639832, "learning_rate": 1.9994997554245136e-05, "loss": 1.4259, "step": 67700 }, { "epoch": 0.050421667930926777, "grad_norm": 1.2858697175979614, "learning_rate": 1.9994982765200337e-05, "loss": 1.5907, "step": 67800 }, { "epoch": 0.050496036172712803, "grad_norm": 0.35552987456321716, "learning_rate": 1.9994967954332388e-05, "loss": 1.5593, "step": 67900 }, { "epoch": 0.05057040441449883, "grad_norm": 0.5848758220672607, "learning_rate": 1.999495312164132e-05, "loss": 1.5435, "step": 68000 }, { "epoch": 0.05064477265628486, "grad_norm": 1.5355236530303955, "learning_rate": 1.999493826712717e-05, "loss": 1.5301, "step": 68100 }, { "epoch": 0.050719140898070884, "grad_norm": 0.595832109451294, "learning_rate": 1.999492339078996e-05, "loss": 1.5225, "step": 68200 }, { "epoch": 0.05079350913985692, "grad_norm": 0.47388339042663574, "learning_rate": 1.999490849262973e-05, "loss": 1.5252, "step": 68300 }, { "epoch": 0.050867877381642945, "grad_norm": 0.48052307963371277, "learning_rate": 1.999489357264651e-05, "loss": 1.5274, "step": 68400 }, { "epoch": 0.05094224562342897, "grad_norm": 0.7523823380470276, "learning_rate": 1.9994878630840334e-05, "loss": 1.5485, "step": 68500 }, { "epoch": 0.051016613865215, "grad_norm": 0.5487205982208252, "learning_rate": 1.9994863667211237e-05, "loss": 1.5851, "step": 68600 }, { "epoch": 0.051090982107001026, "grad_norm": 0.899217963218689, "learning_rate": 1.999484868175925e-05, "loss": 1.5519, "step": 68700 }, { "epoch": 0.05116535034878705, "grad_norm": 0.6217190623283386, "learning_rate": 1.9994833674484398e-05, "loss": 1.465, "step": 68800 }, { "epoch": 0.05123971859057308, "grad_norm": 0.5816856026649475, "learning_rate": 1.9994818645386725e-05, "loss": 1.4822, "step": 68900 }, { "epoch": 0.05131408683235911, "grad_norm": 0.5480476021766663, "learning_rate": 1.999480359446626e-05, "loss": 1.5958, "step": 69000 }, { "epoch": 0.051388455074145134, "grad_norm": 0.6178867220878601, "learning_rate": 1.9994788521723033e-05, "loss": 1.4214, "step": 69100 }, { "epoch": 0.05146282331593117, "grad_norm": 0.639522135257721, "learning_rate": 1.999477342715708e-05, "loss": 1.5462, "step": 69200 }, { "epoch": 0.051537191557717195, "grad_norm": 0.8950421810150146, "learning_rate": 1.9994758310768432e-05, "loss": 1.5562, "step": 69300 }, { "epoch": 0.05161155979950322, "grad_norm": 0.9787744283676147, "learning_rate": 1.9994743172557123e-05, "loss": 1.5684, "step": 69400 }, { "epoch": 0.05168592804128925, "grad_norm": 0.34816843271255493, "learning_rate": 1.999472801252319e-05, "loss": 1.5693, "step": 69500 }, { "epoch": 0.051760296283075276, "grad_norm": 0.8306708931922913, "learning_rate": 1.9994712830666658e-05, "loss": 1.6258, "step": 69600 }, { "epoch": 0.0518346645248613, "grad_norm": 0.6776370406150818, "learning_rate": 1.9994697626987562e-05, "loss": 1.432, "step": 69700 }, { "epoch": 0.05190903276664733, "grad_norm": 0.3862565755844116, "learning_rate": 1.999468240148594e-05, "loss": 1.5958, "step": 69800 }, { "epoch": 0.05198340100843336, "grad_norm": 0.42656075954437256, "learning_rate": 1.9994667154161826e-05, "loss": 1.5192, "step": 69900 }, { "epoch": 0.052057769250219384, "grad_norm": 0.7187511920928955, "learning_rate": 1.9994651885015246e-05, "loss": 1.4779, "step": 70000 }, { "epoch": 0.05213213749200541, "grad_norm": 0.7468114495277405, "learning_rate": 1.9994636594046237e-05, "loss": 1.4672, "step": 70100 }, { "epoch": 0.052206505733791445, "grad_norm": 0.5125714540481567, "learning_rate": 1.9994621281254834e-05, "loss": 1.5607, "step": 70200 }, { "epoch": 0.05228087397557747, "grad_norm": 0.6202149987220764, "learning_rate": 1.999460594664107e-05, "loss": 1.5402, "step": 70300 }, { "epoch": 0.0523552422173635, "grad_norm": 1.1004749536514282, "learning_rate": 1.9994590590204974e-05, "loss": 1.449, "step": 70400 }, { "epoch": 0.052429610459149525, "grad_norm": 0.5230892896652222, "learning_rate": 1.9994575211946588e-05, "loss": 1.5675, "step": 70500 }, { "epoch": 0.05250397870093555, "grad_norm": 0.4736848771572113, "learning_rate": 1.9994559811865936e-05, "loss": 1.4462, "step": 70600 }, { "epoch": 0.05257834694272158, "grad_norm": 0.722038209438324, "learning_rate": 1.9994544389963063e-05, "loss": 1.5297, "step": 70700 }, { "epoch": 0.052652715184507606, "grad_norm": 0.38121601939201355, "learning_rate": 1.999452894623799e-05, "loss": 1.4976, "step": 70800 }, { "epoch": 0.05272708342629363, "grad_norm": 0.7381113767623901, "learning_rate": 1.999451348069076e-05, "loss": 1.6052, "step": 70900 }, { "epoch": 0.05280145166807966, "grad_norm": 0.48022809624671936, "learning_rate": 1.99944979933214e-05, "loss": 1.5308, "step": 71000 }, { "epoch": 0.052875819909865694, "grad_norm": 0.6746697425842285, "learning_rate": 1.9994482484129952e-05, "loss": 1.5776, "step": 71100 }, { "epoch": 0.05295018815165172, "grad_norm": 1.0217593908309937, "learning_rate": 1.999446695311644e-05, "loss": 1.5206, "step": 71200 }, { "epoch": 0.05302455639343775, "grad_norm": 0.9935411810874939, "learning_rate": 1.999445140028091e-05, "loss": 1.5362, "step": 71300 }, { "epoch": 0.053098924635223775, "grad_norm": 0.6215861439704895, "learning_rate": 1.9994435825623382e-05, "loss": 1.5598, "step": 71400 }, { "epoch": 0.0531732928770098, "grad_norm": 0.48527583479881287, "learning_rate": 1.99944202291439e-05, "loss": 1.5116, "step": 71500 }, { "epoch": 0.05324766111879583, "grad_norm": 1.3961418867111206, "learning_rate": 1.9994404610842496e-05, "loss": 1.5574, "step": 71600 }, { "epoch": 0.053322029360581856, "grad_norm": 1.0021171569824219, "learning_rate": 1.9994388970719202e-05, "loss": 1.5676, "step": 71700 }, { "epoch": 0.05339639760236788, "grad_norm": 0.5348053574562073, "learning_rate": 1.9994373308774052e-05, "loss": 1.4911, "step": 71800 }, { "epoch": 0.05347076584415391, "grad_norm": 0.5527310967445374, "learning_rate": 1.9994357625007087e-05, "loss": 1.5595, "step": 71900 }, { "epoch": 0.05354513408593994, "grad_norm": 1.1981103420257568, "learning_rate": 1.999434191941833e-05, "loss": 1.5239, "step": 72000 }, { "epoch": 0.05361950232772597, "grad_norm": 0.507123589515686, "learning_rate": 1.999432619200782e-05, "loss": 1.5324, "step": 72100 }, { "epoch": 0.053693870569512, "grad_norm": 0.4210796356201172, "learning_rate": 1.99943104427756e-05, "loss": 1.5681, "step": 72200 }, { "epoch": 0.053768238811298025, "grad_norm": 0.574341893196106, "learning_rate": 1.999429467172169e-05, "loss": 1.6575, "step": 72300 }, { "epoch": 0.05384260705308405, "grad_norm": 0.5402580499649048, "learning_rate": 1.9994278878846135e-05, "loss": 1.5097, "step": 72400 }, { "epoch": 0.05391697529487008, "grad_norm": 0.5868122577667236, "learning_rate": 1.9994263064148964e-05, "loss": 1.5158, "step": 72500 }, { "epoch": 0.053991343536656106, "grad_norm": 0.5461186170578003, "learning_rate": 1.9994247227630216e-05, "loss": 1.4676, "step": 72600 }, { "epoch": 0.05406571177844213, "grad_norm": 0.56854248046875, "learning_rate": 1.999423136928992e-05, "loss": 1.5803, "step": 72700 }, { "epoch": 0.05414008002022816, "grad_norm": 0.5925450325012207, "learning_rate": 1.9994215489128113e-05, "loss": 1.5622, "step": 72800 }, { "epoch": 0.05421444826201419, "grad_norm": 0.9310332536697388, "learning_rate": 1.999419958714483e-05, "loss": 1.5552, "step": 72900 }, { "epoch": 0.05428881650380022, "grad_norm": 0.6535036563873291, "learning_rate": 1.9994183663340106e-05, "loss": 1.5079, "step": 73000 }, { "epoch": 0.05436318474558625, "grad_norm": 0.759397029876709, "learning_rate": 1.9994167717713976e-05, "loss": 1.4763, "step": 73100 }, { "epoch": 0.054437552987372274, "grad_norm": 0.8042114973068237, "learning_rate": 1.999415175026648e-05, "loss": 1.5085, "step": 73200 }, { "epoch": 0.0545119212291583, "grad_norm": 0.5362099409103394, "learning_rate": 1.999413576099764e-05, "loss": 1.4936, "step": 73300 }, { "epoch": 0.05458628947094433, "grad_norm": 0.5755407214164734, "learning_rate": 1.9994119749907502e-05, "loss": 1.5056, "step": 73400 }, { "epoch": 0.054660657712730355, "grad_norm": 0.595206081867218, "learning_rate": 1.9994103716996097e-05, "loss": 1.4753, "step": 73500 }, { "epoch": 0.05473502595451638, "grad_norm": 0.8156918287277222, "learning_rate": 1.9994087662263457e-05, "loss": 1.5586, "step": 73600 }, { "epoch": 0.05480939419630241, "grad_norm": 0.739098310470581, "learning_rate": 1.999407158570962e-05, "loss": 1.5223, "step": 73700 }, { "epoch": 0.054883762438088436, "grad_norm": 1.348789095878601, "learning_rate": 1.999405548733463e-05, "loss": 1.5179, "step": 73800 }, { "epoch": 0.05495813067987446, "grad_norm": 0.671525776386261, "learning_rate": 1.99940393671385e-05, "loss": 1.5044, "step": 73900 }, { "epoch": 0.0550324989216605, "grad_norm": 1.034043312072754, "learning_rate": 1.9994023225121288e-05, "loss": 1.4223, "step": 74000 }, { "epoch": 0.055106867163446524, "grad_norm": 1.1060287952423096, "learning_rate": 1.9994007061283018e-05, "loss": 1.5573, "step": 74100 }, { "epoch": 0.05518123540523255, "grad_norm": 0.8618998527526855, "learning_rate": 1.999399087562373e-05, "loss": 1.4898, "step": 74200 }, { "epoch": 0.05525560364701858, "grad_norm": 0.714076817035675, "learning_rate": 1.9993974668143452e-05, "loss": 1.451, "step": 74300 }, { "epoch": 0.055329971888804605, "grad_norm": 0.4572731554508209, "learning_rate": 1.9993958438842224e-05, "loss": 1.5303, "step": 74400 }, { "epoch": 0.05540434013059063, "grad_norm": 0.496499627828598, "learning_rate": 1.9993942187720082e-05, "loss": 1.6219, "step": 74500 }, { "epoch": 0.05547870837237666, "grad_norm": 0.4714408218860626, "learning_rate": 1.9993925914777064e-05, "loss": 1.5501, "step": 74600 }, { "epoch": 0.055553076614162686, "grad_norm": 0.5283282995223999, "learning_rate": 1.9993909620013203e-05, "loss": 1.5221, "step": 74700 }, { "epoch": 0.05562744485594871, "grad_norm": 0.8781616687774658, "learning_rate": 1.999389330342853e-05, "loss": 1.5478, "step": 74800 }, { "epoch": 0.05570181309773475, "grad_norm": 0.5995525121688843, "learning_rate": 1.9993876965023084e-05, "loss": 1.5069, "step": 74900 }, { "epoch": 0.055776181339520774, "grad_norm": 0.533664882183075, "learning_rate": 1.9993860604796905e-05, "loss": 1.514, "step": 75000 }, { "epoch": 0.0558505495813068, "grad_norm": 1.012466311454773, "learning_rate": 1.9993844222750023e-05, "loss": 1.473, "step": 75100 }, { "epoch": 0.05592491782309283, "grad_norm": 0.7862614393234253, "learning_rate": 1.9993827818882473e-05, "loss": 1.4832, "step": 75200 }, { "epoch": 0.055999286064878855, "grad_norm": 0.7203556299209595, "learning_rate": 1.9993811393194302e-05, "loss": 1.5157, "step": 75300 }, { "epoch": 0.05607365430666488, "grad_norm": 1.1218525171279907, "learning_rate": 1.9993794945685528e-05, "loss": 1.4169, "step": 75400 }, { "epoch": 0.05614802254845091, "grad_norm": 0.46560999751091003, "learning_rate": 1.99937784763562e-05, "loss": 1.4688, "step": 75500 }, { "epoch": 0.056222390790236935, "grad_norm": 0.9627271294593811, "learning_rate": 1.999376198520635e-05, "loss": 1.5489, "step": 75600 }, { "epoch": 0.05629675903202296, "grad_norm": 0.9937626719474792, "learning_rate": 1.9993745472236018e-05, "loss": 1.5759, "step": 75700 }, { "epoch": 0.05637112727380899, "grad_norm": 0.6520542502403259, "learning_rate": 1.9993728937445232e-05, "loss": 1.4653, "step": 75800 }, { "epoch": 0.05644549551559502, "grad_norm": 1.1701862812042236, "learning_rate": 1.9993712380834034e-05, "loss": 1.4875, "step": 75900 }, { "epoch": 0.05651986375738105, "grad_norm": 0.9439906477928162, "learning_rate": 1.999369580240246e-05, "loss": 1.5524, "step": 76000 }, { "epoch": 0.05659423199916708, "grad_norm": 1.1177873611450195, "learning_rate": 1.9993679202150543e-05, "loss": 1.558, "step": 76100 }, { "epoch": 0.056668600240953104, "grad_norm": 0.4650721549987793, "learning_rate": 1.9993662580078317e-05, "loss": 1.5035, "step": 76200 }, { "epoch": 0.05674296848273913, "grad_norm": 0.5230388045310974, "learning_rate": 1.999364593618583e-05, "loss": 1.5027, "step": 76300 }, { "epoch": 0.05681733672452516, "grad_norm": 0.6694977879524231, "learning_rate": 1.9993629270473108e-05, "loss": 1.4642, "step": 76400 }, { "epoch": 0.056891704966311185, "grad_norm": 0.6857712268829346, "learning_rate": 1.999361258294019e-05, "loss": 1.6462, "step": 76500 }, { "epoch": 0.05696607320809721, "grad_norm": 0.708351731300354, "learning_rate": 1.9993595873587112e-05, "loss": 1.4773, "step": 76600 }, { "epoch": 0.05704044144988324, "grad_norm": 0.451820969581604, "learning_rate": 1.999357914241391e-05, "loss": 1.5512, "step": 76700 }, { "epoch": 0.05711480969166927, "grad_norm": 0.9653975963592529, "learning_rate": 1.9993562389420623e-05, "loss": 1.5231, "step": 76800 }, { "epoch": 0.0571891779334553, "grad_norm": 1.0675276517868042, "learning_rate": 1.9993545614607287e-05, "loss": 1.5519, "step": 76900 }, { "epoch": 0.05726354617524133, "grad_norm": 0.6132591366767883, "learning_rate": 1.9993528817973938e-05, "loss": 1.5634, "step": 77000 }, { "epoch": 0.057337914417027354, "grad_norm": 0.6499157547950745, "learning_rate": 1.999351199952061e-05, "loss": 1.4993, "step": 77100 }, { "epoch": 0.05741228265881338, "grad_norm": 0.8147251605987549, "learning_rate": 1.999349515924734e-05, "loss": 1.5336, "step": 77200 }, { "epoch": 0.05748665090059941, "grad_norm": 0.601445198059082, "learning_rate": 1.9993478297154175e-05, "loss": 1.5546, "step": 77300 }, { "epoch": 0.057561019142385435, "grad_norm": 0.7941200137138367, "learning_rate": 1.9993461413241138e-05, "loss": 1.5751, "step": 77400 }, { "epoch": 0.05763538738417146, "grad_norm": 0.5446432828903198, "learning_rate": 1.9993444507508272e-05, "loss": 1.5638, "step": 77500 }, { "epoch": 0.05770975562595749, "grad_norm": 1.241955280303955, "learning_rate": 1.9993427579955617e-05, "loss": 1.5663, "step": 77600 }, { "epoch": 0.057784123867743516, "grad_norm": 0.47920486330986023, "learning_rate": 1.99934106305832e-05, "loss": 1.4326, "step": 77700 }, { "epoch": 0.05785849210952955, "grad_norm": 0.8999041318893433, "learning_rate": 1.9993393659391068e-05, "loss": 1.5711, "step": 77800 }, { "epoch": 0.05793286035131558, "grad_norm": 0.6789896488189697, "learning_rate": 1.9993376666379256e-05, "loss": 1.5342, "step": 77900 }, { "epoch": 0.058007228593101604, "grad_norm": 0.5044109225273132, "learning_rate": 1.9993359651547798e-05, "loss": 1.4873, "step": 78000 }, { "epoch": 0.05808159683488763, "grad_norm": 0.7116490006446838, "learning_rate": 1.9993342614896733e-05, "loss": 1.453, "step": 78100 }, { "epoch": 0.05815596507667366, "grad_norm": 0.5207152962684631, "learning_rate": 1.9993325556426096e-05, "loss": 1.4711, "step": 78200 }, { "epoch": 0.058230333318459684, "grad_norm": 0.8057217001914978, "learning_rate": 1.999330847613593e-05, "loss": 1.5021, "step": 78300 }, { "epoch": 0.05830470156024571, "grad_norm": 1.1154026985168457, "learning_rate": 1.9993291374026266e-05, "loss": 1.4475, "step": 78400 }, { "epoch": 0.05837906980203174, "grad_norm": 0.4721396267414093, "learning_rate": 1.9993274250097146e-05, "loss": 1.5285, "step": 78500 }, { "epoch": 0.058453438043817765, "grad_norm": 1.0618817806243896, "learning_rate": 1.9993257104348604e-05, "loss": 1.5323, "step": 78600 }, { "epoch": 0.0585278062856038, "grad_norm": 1.1249905824661255, "learning_rate": 1.999323993678068e-05, "loss": 1.5252, "step": 78700 }, { "epoch": 0.058602174527389826, "grad_norm": 0.48599275946617126, "learning_rate": 1.999322274739341e-05, "loss": 1.5124, "step": 78800 }, { "epoch": 0.05867654276917585, "grad_norm": 0.5065784454345703, "learning_rate": 1.999320553618683e-05, "loss": 1.5858, "step": 78900 }, { "epoch": 0.05875091101096188, "grad_norm": 0.854963481426239, "learning_rate": 1.999318830316098e-05, "loss": 1.513, "step": 79000 }, { "epoch": 0.05882527925274791, "grad_norm": 0.556955099105835, "learning_rate": 1.9993171048315895e-05, "loss": 1.514, "step": 79100 }, { "epoch": 0.058899647494533934, "grad_norm": 0.6691248416900635, "learning_rate": 1.9993153771651618e-05, "loss": 1.4574, "step": 79200 }, { "epoch": 0.05897401573631996, "grad_norm": 0.5654352903366089, "learning_rate": 1.999313647316818e-05, "loss": 1.5046, "step": 79300 }, { "epoch": 0.05904838397810599, "grad_norm": 0.9016973972320557, "learning_rate": 1.9993119152865624e-05, "loss": 1.5465, "step": 79400 }, { "epoch": 0.059122752219892015, "grad_norm": 0.4756191670894623, "learning_rate": 1.9993101810743985e-05, "loss": 1.4944, "step": 79500 }, { "epoch": 0.05919712046167804, "grad_norm": 0.44962599873542786, "learning_rate": 1.9993084446803303e-05, "loss": 1.4853, "step": 79600 }, { "epoch": 0.059271488703464076, "grad_norm": 0.5768176913261414, "learning_rate": 1.9993067061043614e-05, "loss": 1.5246, "step": 79700 }, { "epoch": 0.0593458569452501, "grad_norm": 0.6383886933326721, "learning_rate": 1.9993049653464957e-05, "loss": 1.5407, "step": 79800 }, { "epoch": 0.05942022518703613, "grad_norm": 0.5047423243522644, "learning_rate": 1.999303222406737e-05, "loss": 1.5593, "step": 79900 }, { "epoch": 0.05949459342882216, "grad_norm": 0.5224947333335876, "learning_rate": 1.999301477285089e-05, "loss": 1.5501, "step": 80000 }, { "epoch": 0.059568961670608184, "grad_norm": 0.8568351864814758, "learning_rate": 1.9992997299815557e-05, "loss": 1.5291, "step": 80100 }, { "epoch": 0.05964332991239421, "grad_norm": 0.5065781474113464, "learning_rate": 1.9992979804961406e-05, "loss": 1.4743, "step": 80200 }, { "epoch": 0.05971769815418024, "grad_norm": 0.7506331205368042, "learning_rate": 1.999296228828848e-05, "loss": 1.575, "step": 80300 }, { "epoch": 0.059792066395966265, "grad_norm": 0.7313674092292786, "learning_rate": 1.999294474979681e-05, "loss": 1.4892, "step": 80400 }, { "epoch": 0.05986643463775229, "grad_norm": 0.6475706100463867, "learning_rate": 1.999292718948644e-05, "loss": 1.4716, "step": 80500 }, { "epoch": 0.05994080287953832, "grad_norm": 0.4502275586128235, "learning_rate": 1.999290960735741e-05, "loss": 1.5449, "step": 80600 }, { "epoch": 0.06001517112132435, "grad_norm": 0.7036411762237549, "learning_rate": 1.9992892003409753e-05, "loss": 1.4786, "step": 80700 }, { "epoch": 0.06008953936311038, "grad_norm": 0.5732350945472717, "learning_rate": 1.999287437764351e-05, "loss": 1.5548, "step": 80800 }, { "epoch": 0.060163907604896406, "grad_norm": 0.6757441759109497, "learning_rate": 1.999285673005872e-05, "loss": 1.4276, "step": 80900 }, { "epoch": 0.06023827584668243, "grad_norm": 0.8502363562583923, "learning_rate": 1.999283906065542e-05, "loss": 1.5078, "step": 81000 }, { "epoch": 0.06031264408846846, "grad_norm": 0.9248318672180176, "learning_rate": 1.9992821369433654e-05, "loss": 1.5352, "step": 81100 }, { "epoch": 0.06038701233025449, "grad_norm": 0.3702896535396576, "learning_rate": 1.999280365639345e-05, "loss": 1.5567, "step": 81200 }, { "epoch": 0.060461380572040514, "grad_norm": 0.8454656004905701, "learning_rate": 1.9992785921534853e-05, "loss": 1.5327, "step": 81300 }, { "epoch": 0.06053574881382654, "grad_norm": 1.452540397644043, "learning_rate": 1.9992768164857906e-05, "loss": 1.473, "step": 81400 }, { "epoch": 0.06061011705561257, "grad_norm": 0.5796297192573547, "learning_rate": 1.999275038636264e-05, "loss": 1.4551, "step": 81500 }, { "epoch": 0.0606844852973986, "grad_norm": 0.5252229571342468, "learning_rate": 1.9992732586049096e-05, "loss": 1.4727, "step": 81600 }, { "epoch": 0.06075885353918463, "grad_norm": 1.0359326601028442, "learning_rate": 1.999271476391732e-05, "loss": 1.5017, "step": 81700 }, { "epoch": 0.060833221780970656, "grad_norm": 0.49495527148246765, "learning_rate": 1.9992696919967337e-05, "loss": 1.5521, "step": 81800 }, { "epoch": 0.06090759002275668, "grad_norm": 0.548267662525177, "learning_rate": 1.9992679054199197e-05, "loss": 1.508, "step": 81900 }, { "epoch": 0.06098195826454271, "grad_norm": 0.5110555291175842, "learning_rate": 1.999266116661294e-05, "loss": 1.5644, "step": 82000 }, { "epoch": 0.06105632650632874, "grad_norm": 0.803193211555481, "learning_rate": 1.9992643257208595e-05, "loss": 1.4233, "step": 82100 }, { "epoch": 0.061130694748114764, "grad_norm": 1.2153749465942383, "learning_rate": 1.9992625325986207e-05, "loss": 1.5741, "step": 82200 }, { "epoch": 0.06120506298990079, "grad_norm": 0.48382624983787537, "learning_rate": 1.999260737294582e-05, "loss": 1.5272, "step": 82300 }, { "epoch": 0.06127943123168682, "grad_norm": 0.4789665937423706, "learning_rate": 1.9992589398087466e-05, "loss": 1.4757, "step": 82400 }, { "epoch": 0.061353799473472845, "grad_norm": 0.5505409240722656, "learning_rate": 1.9992571401411183e-05, "loss": 1.4968, "step": 82500 }, { "epoch": 0.06142816771525888, "grad_norm": 0.7146855592727661, "learning_rate": 1.999255338291702e-05, "loss": 1.4641, "step": 82600 }, { "epoch": 0.061502535957044906, "grad_norm": 1.2916581630706787, "learning_rate": 1.9992535342605008e-05, "loss": 1.4884, "step": 82700 }, { "epoch": 0.06157690419883093, "grad_norm": 0.5506526231765747, "learning_rate": 1.9992517280475186e-05, "loss": 1.4925, "step": 82800 }, { "epoch": 0.06165127244061696, "grad_norm": 1.0735949277877808, "learning_rate": 1.9992499196527598e-05, "loss": 1.456, "step": 82900 }, { "epoch": 0.06172564068240299, "grad_norm": 0.5877838134765625, "learning_rate": 1.9992481090762284e-05, "loss": 1.4362, "step": 83000 }, { "epoch": 0.061800008924189014, "grad_norm": 0.6066355109214783, "learning_rate": 1.9992462963179275e-05, "loss": 1.5472, "step": 83100 }, { "epoch": 0.06187437716597504, "grad_norm": 0.5328536629676819, "learning_rate": 1.9992444813778622e-05, "loss": 1.5712, "step": 83200 }, { "epoch": 0.06194874540776107, "grad_norm": 0.685464084148407, "learning_rate": 1.9992426642560356e-05, "loss": 1.531, "step": 83300 }, { "epoch": 0.062023113649547094, "grad_norm": 0.6651979684829712, "learning_rate": 1.999240844952452e-05, "loss": 1.5326, "step": 83400 }, { "epoch": 0.06209748189133313, "grad_norm": 0.9877690076828003, "learning_rate": 1.9992390234671157e-05, "loss": 1.5223, "step": 83500 }, { "epoch": 0.062171850133119155, "grad_norm": 0.4471887946128845, "learning_rate": 1.9992371998000303e-05, "loss": 1.5093, "step": 83600 }, { "epoch": 0.06224621837490518, "grad_norm": 0.8113996386528015, "learning_rate": 1.9992353739511994e-05, "loss": 1.4959, "step": 83700 }, { "epoch": 0.06232058661669121, "grad_norm": 0.5820923447608948, "learning_rate": 1.999233545920628e-05, "loss": 1.5134, "step": 83800 }, { "epoch": 0.062394954858477236, "grad_norm": 0.623708188533783, "learning_rate": 1.999231715708319e-05, "loss": 1.4944, "step": 83900 }, { "epoch": 0.06246932310026326, "grad_norm": 0.5685898065567017, "learning_rate": 1.9992298833142772e-05, "loss": 1.5297, "step": 84000 }, { "epoch": 0.06254369134204929, "grad_norm": 0.5108596682548523, "learning_rate": 1.999228048738506e-05, "loss": 1.4644, "step": 84100 }, { "epoch": 0.06261805958383532, "grad_norm": 0.636935293674469, "learning_rate": 1.99922621198101e-05, "loss": 1.5105, "step": 84200 }, { "epoch": 0.06269242782562134, "grad_norm": 0.7226575613021851, "learning_rate": 1.9992243730417926e-05, "loss": 1.5828, "step": 84300 }, { "epoch": 0.06276679606740737, "grad_norm": 0.7858364582061768, "learning_rate": 1.9992225319208584e-05, "loss": 1.5216, "step": 84400 }, { "epoch": 0.0628411643091934, "grad_norm": 0.5035095810890198, "learning_rate": 1.999220688618211e-05, "loss": 1.5342, "step": 84500 }, { "epoch": 0.06291553255097942, "grad_norm": 0.515177845954895, "learning_rate": 1.9992188431338547e-05, "loss": 1.5137, "step": 84600 }, { "epoch": 0.06298990079276545, "grad_norm": 0.6190256476402283, "learning_rate": 1.9992169954677933e-05, "loss": 1.4787, "step": 84700 }, { "epoch": 0.06306426903455148, "grad_norm": 0.42270639538764954, "learning_rate": 1.999215145620031e-05, "loss": 1.5532, "step": 84800 }, { "epoch": 0.06313863727633752, "grad_norm": 0.9928336143493652, "learning_rate": 1.999213293590572e-05, "loss": 1.5139, "step": 84900 }, { "epoch": 0.06321300551812355, "grad_norm": 0.6874875426292419, "learning_rate": 1.99921143937942e-05, "loss": 1.5843, "step": 85000 }, { "epoch": 0.06328737375990957, "grad_norm": 1.4590458869934082, "learning_rate": 1.9992095829865786e-05, "loss": 1.4197, "step": 85100 }, { "epoch": 0.0633617420016956, "grad_norm": 0.7171260714530945, "learning_rate": 1.999207724412053e-05, "loss": 1.5444, "step": 85200 }, { "epoch": 0.06343611024348163, "grad_norm": 0.7907926440238953, "learning_rate": 1.9992058636558466e-05, "loss": 1.4923, "step": 85300 }, { "epoch": 0.06351047848526765, "grad_norm": 0.5244536399841309, "learning_rate": 1.9992040007179635e-05, "loss": 1.4754, "step": 85400 }, { "epoch": 0.06358484672705368, "grad_norm": 0.7662790417671204, "learning_rate": 1.999202135598408e-05, "loss": 1.4759, "step": 85500 }, { "epoch": 0.06365921496883971, "grad_norm": 0.5479734539985657, "learning_rate": 1.9992002682971837e-05, "loss": 1.5631, "step": 85600 }, { "epoch": 0.06373358321062574, "grad_norm": 0.5378610491752625, "learning_rate": 1.9991983988142952e-05, "loss": 1.4574, "step": 85700 }, { "epoch": 0.06380795145241176, "grad_norm": 0.5130261182785034, "learning_rate": 1.9991965271497463e-05, "loss": 1.4096, "step": 85800 }, { "epoch": 0.06388231969419779, "grad_norm": 0.3913695812225342, "learning_rate": 1.9991946533035408e-05, "loss": 1.4662, "step": 85900 }, { "epoch": 0.06395668793598382, "grad_norm": 0.6080545783042908, "learning_rate": 1.9991927772756833e-05, "loss": 1.5168, "step": 86000 }, { "epoch": 0.06403105617776984, "grad_norm": 0.4266175627708435, "learning_rate": 1.9991908990661782e-05, "loss": 1.5904, "step": 86100 }, { "epoch": 0.06410542441955587, "grad_norm": 0.7378482818603516, "learning_rate": 1.9991890186750284e-05, "loss": 1.5445, "step": 86200 }, { "epoch": 0.0641797926613419, "grad_norm": 0.4735576808452606, "learning_rate": 1.999187136102239e-05, "loss": 1.5184, "step": 86300 }, { "epoch": 0.06425416090312792, "grad_norm": 1.1487786769866943, "learning_rate": 1.999185251347814e-05, "loss": 1.5623, "step": 86400 }, { "epoch": 0.06432852914491395, "grad_norm": 0.6790218353271484, "learning_rate": 1.9991833644117573e-05, "loss": 1.4743, "step": 86500 }, { "epoch": 0.06440289738669998, "grad_norm": 0.6615635752677917, "learning_rate": 1.9991814752940728e-05, "loss": 1.5226, "step": 86600 }, { "epoch": 0.064477265628486, "grad_norm": 0.5001498460769653, "learning_rate": 1.9991795839947652e-05, "loss": 1.5801, "step": 86700 }, { "epoch": 0.06455163387027203, "grad_norm": 0.880649983882904, "learning_rate": 1.9991776905138382e-05, "loss": 1.5611, "step": 86800 }, { "epoch": 0.06462600211205807, "grad_norm": 0.6761185526847839, "learning_rate": 1.9991757948512962e-05, "loss": 1.5622, "step": 86900 }, { "epoch": 0.0647003703538441, "grad_norm": 0.48481419682502747, "learning_rate": 1.999173897007143e-05, "loss": 1.4518, "step": 87000 }, { "epoch": 0.06477473859563013, "grad_norm": 0.5701479315757751, "learning_rate": 1.999171996981383e-05, "loss": 1.5306, "step": 87100 }, { "epoch": 0.06484910683741615, "grad_norm": 0.7284945845603943, "learning_rate": 1.99917009477402e-05, "loss": 1.4337, "step": 87200 }, { "epoch": 0.06492347507920218, "grad_norm": 0.7202057242393494, "learning_rate": 1.999168190385059e-05, "loss": 1.5605, "step": 87300 }, { "epoch": 0.06499784332098821, "grad_norm": 0.4802098274230957, "learning_rate": 1.9991662838145034e-05, "loss": 1.5428, "step": 87400 }, { "epoch": 0.06507221156277423, "grad_norm": 0.5068057775497437, "learning_rate": 1.9991643750623574e-05, "loss": 1.4441, "step": 87500 }, { "epoch": 0.06514657980456026, "grad_norm": 0.8798725605010986, "learning_rate": 1.9991624641286255e-05, "loss": 1.5766, "step": 87600 }, { "epoch": 0.06522094804634629, "grad_norm": 0.49363136291503906, "learning_rate": 1.9991605510133115e-05, "loss": 1.6196, "step": 87700 }, { "epoch": 0.06529531628813232, "grad_norm": 0.5400691628456116, "learning_rate": 1.99915863571642e-05, "loss": 1.5392, "step": 87800 }, { "epoch": 0.06536968452991834, "grad_norm": 0.5299246311187744, "learning_rate": 1.9991567182379546e-05, "loss": 1.5645, "step": 87900 }, { "epoch": 0.06544405277170437, "grad_norm": 0.6503016352653503, "learning_rate": 1.9991547985779202e-05, "loss": 1.4476, "step": 88000 }, { "epoch": 0.0655184210134904, "grad_norm": 0.5769862532615662, "learning_rate": 1.9991528767363207e-05, "loss": 1.5248, "step": 88100 }, { "epoch": 0.06559278925527642, "grad_norm": 0.8062888383865356, "learning_rate": 1.99915095271316e-05, "loss": 1.6394, "step": 88200 }, { "epoch": 0.06566715749706245, "grad_norm": 0.4004135727882385, "learning_rate": 1.999149026508443e-05, "loss": 1.5317, "step": 88300 }, { "epoch": 0.06574152573884848, "grad_norm": 0.6382884383201599, "learning_rate": 1.9991470981221727e-05, "loss": 1.5602, "step": 88400 }, { "epoch": 0.0658158939806345, "grad_norm": 0.535750150680542, "learning_rate": 1.9991451675543544e-05, "loss": 1.5113, "step": 88500 }, { "epoch": 0.06589026222242053, "grad_norm": 1.1604392528533936, "learning_rate": 1.999143234804992e-05, "loss": 1.4996, "step": 88600 }, { "epoch": 0.06596463046420656, "grad_norm": 0.7842492461204529, "learning_rate": 1.99914129987409e-05, "loss": 1.5054, "step": 88700 }, { "epoch": 0.0660389987059926, "grad_norm": 0.7460685968399048, "learning_rate": 1.999139362761652e-05, "loss": 1.433, "step": 88800 }, { "epoch": 0.06611336694777863, "grad_norm": 0.7984693050384521, "learning_rate": 1.9991374234676826e-05, "loss": 1.5551, "step": 88900 }, { "epoch": 0.06618773518956465, "grad_norm": 0.6733551621437073, "learning_rate": 1.999135481992186e-05, "loss": 1.4334, "step": 89000 }, { "epoch": 0.06626210343135068, "grad_norm": 0.8035016059875488, "learning_rate": 1.999133538335166e-05, "loss": 1.4872, "step": 89100 }, { "epoch": 0.06633647167313671, "grad_norm": 0.4339046776294708, "learning_rate": 1.9991315924966277e-05, "loss": 1.4869, "step": 89200 }, { "epoch": 0.06641083991492273, "grad_norm": 0.6680594086647034, "learning_rate": 1.9991296444765747e-05, "loss": 1.5103, "step": 89300 }, { "epoch": 0.06648520815670876, "grad_norm": 0.697487473487854, "learning_rate": 1.9991276942750117e-05, "loss": 1.4239, "step": 89400 }, { "epoch": 0.06655957639849479, "grad_norm": 0.587734043598175, "learning_rate": 1.9991257418919424e-05, "loss": 1.5856, "step": 89500 }, { "epoch": 0.06663394464028081, "grad_norm": 0.8574571013450623, "learning_rate": 1.999123787327372e-05, "loss": 1.4818, "step": 89600 }, { "epoch": 0.06670831288206684, "grad_norm": 1.0861676931381226, "learning_rate": 1.9991218305813035e-05, "loss": 1.4883, "step": 89700 }, { "epoch": 0.06678268112385287, "grad_norm": 1.0139306783676147, "learning_rate": 1.9991198716537422e-05, "loss": 1.5099, "step": 89800 }, { "epoch": 0.0668570493656389, "grad_norm": 0.6741511225700378, "learning_rate": 1.999117910544692e-05, "loss": 1.4746, "step": 89900 }, { "epoch": 0.06693141760742492, "grad_norm": 0.9702801704406738, "learning_rate": 1.999115947254157e-05, "loss": 1.5166, "step": 90000 }, { "epoch": 0.06700578584921095, "grad_norm": 0.7757803797721863, "learning_rate": 1.9991139817821416e-05, "loss": 1.5031, "step": 90100 }, { "epoch": 0.06708015409099698, "grad_norm": 0.7200698256492615, "learning_rate": 1.9991120141286502e-05, "loss": 1.5834, "step": 90200 }, { "epoch": 0.067154522332783, "grad_norm": 0.7415780425071716, "learning_rate": 1.999110044293687e-05, "loss": 1.5689, "step": 90300 }, { "epoch": 0.06722889057456903, "grad_norm": 0.5777677297592163, "learning_rate": 1.9991080722772564e-05, "loss": 1.5139, "step": 90400 }, { "epoch": 0.06730325881635506, "grad_norm": 0.6991866827011108, "learning_rate": 1.999106098079363e-05, "loss": 1.5073, "step": 90500 }, { "epoch": 0.06737762705814108, "grad_norm": 0.6112390160560608, "learning_rate": 1.9991041217000105e-05, "loss": 1.4773, "step": 90600 }, { "epoch": 0.06745199529992713, "grad_norm": 0.8287676572799683, "learning_rate": 1.9991021431392033e-05, "loss": 1.5425, "step": 90700 }, { "epoch": 0.06752636354171315, "grad_norm": 0.8582881689071655, "learning_rate": 1.999100162396946e-05, "loss": 1.5581, "step": 90800 }, { "epoch": 0.06760073178349918, "grad_norm": 0.5585276484489441, "learning_rate": 1.999098179473243e-05, "loss": 1.5015, "step": 90900 }, { "epoch": 0.0676751000252852, "grad_norm": 0.4237435460090637, "learning_rate": 1.9990961943680984e-05, "loss": 1.523, "step": 91000 }, { "epoch": 0.06774946826707123, "grad_norm": 0.5455594658851624, "learning_rate": 1.999094207081517e-05, "loss": 1.5448, "step": 91100 }, { "epoch": 0.06782383650885726, "grad_norm": 0.48855817317962646, "learning_rate": 1.999092217613502e-05, "loss": 1.4535, "step": 91200 }, { "epoch": 0.06789820475064329, "grad_norm": 0.5199916958808899, "learning_rate": 1.999090225964059e-05, "loss": 1.4921, "step": 91300 }, { "epoch": 0.06797257299242931, "grad_norm": 0.5790271162986755, "learning_rate": 1.9990882321331916e-05, "loss": 1.5773, "step": 91400 }, { "epoch": 0.06804694123421534, "grad_norm": 0.5524342656135559, "learning_rate": 1.9990862361209043e-05, "loss": 1.4619, "step": 91500 }, { "epoch": 0.06812130947600137, "grad_norm": 0.7153291702270508, "learning_rate": 1.999084237927202e-05, "loss": 1.6042, "step": 91600 }, { "epoch": 0.0681956777177874, "grad_norm": 0.957635223865509, "learning_rate": 1.9990822375520882e-05, "loss": 1.538, "step": 91700 }, { "epoch": 0.06827004595957342, "grad_norm": 0.38240477442741394, "learning_rate": 1.9990802349955678e-05, "loss": 1.5937, "step": 91800 }, { "epoch": 0.06834441420135945, "grad_norm": 0.8961233496665955, "learning_rate": 1.999078230257645e-05, "loss": 1.5119, "step": 91900 }, { "epoch": 0.06841878244314548, "grad_norm": 0.47433900833129883, "learning_rate": 1.999076223338324e-05, "loss": 1.5449, "step": 92000 }, { "epoch": 0.0684931506849315, "grad_norm": 0.8222399353981018, "learning_rate": 1.9990742142376098e-05, "loss": 1.5334, "step": 92100 }, { "epoch": 0.06856751892671753, "grad_norm": 0.464373916387558, "learning_rate": 1.999072202955506e-05, "loss": 1.5003, "step": 92200 }, { "epoch": 0.06864188716850356, "grad_norm": 0.8799763321876526, "learning_rate": 1.9990701894920176e-05, "loss": 1.581, "step": 92300 }, { "epoch": 0.06871625541028958, "grad_norm": 0.9567086100578308, "learning_rate": 1.999068173847149e-05, "loss": 1.4373, "step": 92400 }, { "epoch": 0.06879062365207561, "grad_norm": 0.440479576587677, "learning_rate": 1.999066156020904e-05, "loss": 1.5571, "step": 92500 }, { "epoch": 0.06886499189386165, "grad_norm": 0.7486180663108826, "learning_rate": 1.9990641360132876e-05, "loss": 1.4437, "step": 92600 }, { "epoch": 0.06893936013564768, "grad_norm": 0.7576742172241211, "learning_rate": 1.9990621138243037e-05, "loss": 1.5306, "step": 92700 }, { "epoch": 0.0690137283774337, "grad_norm": 0.6755186915397644, "learning_rate": 1.9990600894539574e-05, "loss": 1.5769, "step": 92800 }, { "epoch": 0.06908809661921973, "grad_norm": 0.6093853712081909, "learning_rate": 1.9990580629022526e-05, "loss": 1.5777, "step": 92900 }, { "epoch": 0.06916246486100576, "grad_norm": 0.5788242220878601, "learning_rate": 1.9990560341691938e-05, "loss": 1.494, "step": 93000 }, { "epoch": 0.06923683310279179, "grad_norm": 0.828676700592041, "learning_rate": 1.9990540032547855e-05, "loss": 1.5651, "step": 93100 }, { "epoch": 0.06931120134457781, "grad_norm": 0.5612863302230835, "learning_rate": 1.9990519701590322e-05, "loss": 1.5584, "step": 93200 }, { "epoch": 0.06938556958636384, "grad_norm": 0.965107262134552, "learning_rate": 1.999049934881938e-05, "loss": 1.497, "step": 93300 }, { "epoch": 0.06945993782814987, "grad_norm": 0.46939852833747864, "learning_rate": 1.9990478974235078e-05, "loss": 1.5716, "step": 93400 }, { "epoch": 0.0695343060699359, "grad_norm": 0.4986964464187622, "learning_rate": 1.999045857783746e-05, "loss": 1.5762, "step": 93500 }, { "epoch": 0.06960867431172192, "grad_norm": 0.4267128109931946, "learning_rate": 1.9990438159626566e-05, "loss": 1.5101, "step": 93600 }, { "epoch": 0.06968304255350795, "grad_norm": 0.411811888217926, "learning_rate": 1.9990417719602445e-05, "loss": 1.5623, "step": 93700 }, { "epoch": 0.06975741079529398, "grad_norm": 0.8761053681373596, "learning_rate": 1.999039725776514e-05, "loss": 1.4294, "step": 93800 }, { "epoch": 0.06983177903708, "grad_norm": 0.9531000852584839, "learning_rate": 1.99903767741147e-05, "loss": 1.4925, "step": 93900 }, { "epoch": 0.06990614727886603, "grad_norm": 0.516830325126648, "learning_rate": 1.999035626865116e-05, "loss": 1.5802, "step": 94000 }, { "epoch": 0.06998051552065206, "grad_norm": 0.47061294317245483, "learning_rate": 1.9990335741374572e-05, "loss": 1.5668, "step": 94100 }, { "epoch": 0.07005488376243808, "grad_norm": 0.7790777683258057, "learning_rate": 1.9990315192284978e-05, "loss": 1.5568, "step": 94200 }, { "epoch": 0.07012925200422411, "grad_norm": 0.75156170129776, "learning_rate": 1.9990294621382426e-05, "loss": 1.5217, "step": 94300 }, { "epoch": 0.07020362024601014, "grad_norm": 1.195028305053711, "learning_rate": 1.999027402866696e-05, "loss": 1.5662, "step": 94400 }, { "epoch": 0.07027798848779618, "grad_norm": 0.6215851306915283, "learning_rate": 1.999025341413862e-05, "loss": 1.5208, "step": 94500 }, { "epoch": 0.0703523567295822, "grad_norm": 0.509843647480011, "learning_rate": 1.9990232777797458e-05, "loss": 1.489, "step": 94600 }, { "epoch": 0.07042672497136823, "grad_norm": 1.2951029539108276, "learning_rate": 1.9990212119643516e-05, "loss": 1.4729, "step": 94700 }, { "epoch": 0.07050109321315426, "grad_norm": 0.5028135776519775, "learning_rate": 1.9990191439676838e-05, "loss": 1.5579, "step": 94800 }, { "epoch": 0.07057546145494029, "grad_norm": 0.7202877998352051, "learning_rate": 1.9990170737897473e-05, "loss": 1.5282, "step": 94900 }, { "epoch": 0.07064982969672631, "grad_norm": 0.9731516242027283, "learning_rate": 1.9990150014305462e-05, "loss": 1.5194, "step": 95000 }, { "epoch": 0.07072419793851234, "grad_norm": 0.7444689273834229, "learning_rate": 1.9990129268900848e-05, "loss": 1.5198, "step": 95100 }, { "epoch": 0.07079856618029837, "grad_norm": 0.9299377202987671, "learning_rate": 1.9990108501683685e-05, "loss": 1.5393, "step": 95200 }, { "epoch": 0.0708729344220844, "grad_norm": 0.6611402630805969, "learning_rate": 1.999008771265401e-05, "loss": 1.5351, "step": 95300 }, { "epoch": 0.07094730266387042, "grad_norm": 0.4772530496120453, "learning_rate": 1.9990066901811876e-05, "loss": 1.5243, "step": 95400 }, { "epoch": 0.07102167090565645, "grad_norm": 0.42998188734054565, "learning_rate": 1.9990046069157322e-05, "loss": 1.5877, "step": 95500 }, { "epoch": 0.07109603914744247, "grad_norm": 0.7415347099304199, "learning_rate": 1.9990025214690396e-05, "loss": 1.5633, "step": 95600 }, { "epoch": 0.0711704073892285, "grad_norm": 0.657112717628479, "learning_rate": 1.999000433841114e-05, "loss": 1.4555, "step": 95700 }, { "epoch": 0.07124477563101453, "grad_norm": 0.9188429713249207, "learning_rate": 1.998998344031961e-05, "loss": 1.4329, "step": 95800 }, { "epoch": 0.07131914387280056, "grad_norm": 0.8823667168617249, "learning_rate": 1.9989962520415836e-05, "loss": 1.4754, "step": 95900 }, { "epoch": 0.07139351211458658, "grad_norm": 0.7276200652122498, "learning_rate": 1.9989941578699878e-05, "loss": 1.5286, "step": 96000 }, { "epoch": 0.07146788035637261, "grad_norm": 0.941512405872345, "learning_rate": 1.998992061517177e-05, "loss": 1.5087, "step": 96100 }, { "epoch": 0.07154224859815864, "grad_norm": 1.0310442447662354, "learning_rate": 1.998989962983157e-05, "loss": 1.5895, "step": 96200 }, { "epoch": 0.07161661683994466, "grad_norm": 1.3620883226394653, "learning_rate": 1.9989878622679317e-05, "loss": 1.474, "step": 96300 }, { "epoch": 0.0716909850817307, "grad_norm": 0.5119801163673401, "learning_rate": 1.998985759371505e-05, "loss": 1.5112, "step": 96400 }, { "epoch": 0.07176535332351673, "grad_norm": 0.8966123461723328, "learning_rate": 1.998983654293883e-05, "loss": 1.4903, "step": 96500 }, { "epoch": 0.07183972156530276, "grad_norm": 0.5336944460868835, "learning_rate": 1.998981547035069e-05, "loss": 1.5673, "step": 96600 }, { "epoch": 0.07191408980708879, "grad_norm": 1.2533961534500122, "learning_rate": 1.9989794375950688e-05, "loss": 1.5039, "step": 96700 }, { "epoch": 0.07198845804887481, "grad_norm": 1.3317081928253174, "learning_rate": 1.9989773259738858e-05, "loss": 1.567, "step": 96800 }, { "epoch": 0.07206282629066084, "grad_norm": 0.49700722098350525, "learning_rate": 1.998975212171525e-05, "loss": 1.542, "step": 96900 }, { "epoch": 0.07213719453244687, "grad_norm": 0.5809246301651001, "learning_rate": 1.9989730961879913e-05, "loss": 1.5097, "step": 97000 }, { "epoch": 0.07221156277423289, "grad_norm": 0.6107625365257263, "learning_rate": 1.9989709780232894e-05, "loss": 1.536, "step": 97100 }, { "epoch": 0.07228593101601892, "grad_norm": 0.5271338820457458, "learning_rate": 1.9989688576774234e-05, "loss": 1.5819, "step": 97200 }, { "epoch": 0.07236029925780495, "grad_norm": 0.6692411303520203, "learning_rate": 1.9989667351503988e-05, "loss": 1.4833, "step": 97300 }, { "epoch": 0.07243466749959097, "grad_norm": 1.0627728700637817, "learning_rate": 1.998964610442219e-05, "loss": 1.5404, "step": 97400 }, { "epoch": 0.072509035741377, "grad_norm": 0.5696298480033875, "learning_rate": 1.9989624835528896e-05, "loss": 1.4491, "step": 97500 }, { "epoch": 0.07258340398316303, "grad_norm": 0.5105301141738892, "learning_rate": 1.998960354482415e-05, "loss": 1.5188, "step": 97600 }, { "epoch": 0.07265777222494905, "grad_norm": 0.53251713514328, "learning_rate": 1.9989582232307998e-05, "loss": 1.5367, "step": 97700 }, { "epoch": 0.07273214046673508, "grad_norm": 0.6559078693389893, "learning_rate": 1.9989560897980485e-05, "loss": 1.4773, "step": 97800 }, { "epoch": 0.07280650870852111, "grad_norm": 0.39833974838256836, "learning_rate": 1.998953954184166e-05, "loss": 1.6063, "step": 97900 }, { "epoch": 0.07288087695030714, "grad_norm": 1.0479645729064941, "learning_rate": 1.9989518163891566e-05, "loss": 1.565, "step": 98000 }, { "epoch": 0.07295524519209316, "grad_norm": 0.7905478477478027, "learning_rate": 1.9989496764130253e-05, "loss": 1.5266, "step": 98100 }, { "epoch": 0.07302961343387919, "grad_norm": 0.4569951295852661, "learning_rate": 1.998947534255777e-05, "loss": 1.5295, "step": 98200 }, { "epoch": 0.07310398167566523, "grad_norm": 0.5308849215507507, "learning_rate": 1.9989453899174158e-05, "loss": 1.5203, "step": 98300 }, { "epoch": 0.07317834991745126, "grad_norm": 0.906802773475647, "learning_rate": 1.998943243397947e-05, "loss": 1.556, "step": 98400 }, { "epoch": 0.07325271815923728, "grad_norm": 0.5071494579315186, "learning_rate": 1.9989410946973747e-05, "loss": 1.5627, "step": 98500 }, { "epoch": 0.07332708640102331, "grad_norm": 0.5252199172973633, "learning_rate": 1.9989389438157037e-05, "loss": 1.5181, "step": 98600 }, { "epoch": 0.07340145464280934, "grad_norm": 0.5738980174064636, "learning_rate": 1.9989367907529394e-05, "loss": 1.6101, "step": 98700 }, { "epoch": 0.07347582288459537, "grad_norm": 0.6898683309555054, "learning_rate": 1.9989346355090853e-05, "loss": 1.579, "step": 98800 }, { "epoch": 0.07355019112638139, "grad_norm": 0.5396860241889954, "learning_rate": 1.998932478084147e-05, "loss": 1.5645, "step": 98900 }, { "epoch": 0.07362455936816742, "grad_norm": 0.5482293367385864, "learning_rate": 1.998930318478129e-05, "loss": 1.5453, "step": 99000 }, { "epoch": 0.07369892760995345, "grad_norm": 0.8394240736961365, "learning_rate": 1.9989281566910363e-05, "loss": 1.5025, "step": 99100 }, { "epoch": 0.07377329585173947, "grad_norm": 0.9409950971603394, "learning_rate": 1.9989259927228725e-05, "loss": 1.5489, "step": 99200 }, { "epoch": 0.0738476640935255, "grad_norm": 0.5597321391105652, "learning_rate": 1.9989238265736437e-05, "loss": 1.5994, "step": 99300 }, { "epoch": 0.07392203233531153, "grad_norm": 0.5139235258102417, "learning_rate": 1.9989216582433538e-05, "loss": 1.5478, "step": 99400 }, { "epoch": 0.07399640057709755, "grad_norm": 0.6312362551689148, "learning_rate": 1.998919487732008e-05, "loss": 1.4989, "step": 99500 }, { "epoch": 0.07407076881888358, "grad_norm": 0.6924223303794861, "learning_rate": 1.9989173150396105e-05, "loss": 1.4491, "step": 99600 }, { "epoch": 0.07414513706066961, "grad_norm": 0.5490585565567017, "learning_rate": 1.9989151401661666e-05, "loss": 1.538, "step": 99700 }, { "epoch": 0.07421950530245564, "grad_norm": 0.630455732345581, "learning_rate": 1.998912963111681e-05, "loss": 1.5286, "step": 99800 }, { "epoch": 0.07429387354424166, "grad_norm": 0.8591504693031311, "learning_rate": 1.998910783876158e-05, "loss": 1.5612, "step": 99900 }, { "epoch": 0.07436824178602769, "grad_norm": 1.0016669034957886, "learning_rate": 1.9989086024596027e-05, "loss": 1.5154, "step": 100000 }, { "epoch": 0.07444261002781372, "grad_norm": 0.6513885259628296, "learning_rate": 1.9989064188620197e-05, "loss": 1.5446, "step": 100100 }, { "epoch": 0.07451697826959976, "grad_norm": 0.6838514804840088, "learning_rate": 1.998904233083414e-05, "loss": 1.5336, "step": 100200 }, { "epoch": 0.07459134651138578, "grad_norm": 0.46571242809295654, "learning_rate": 1.9989020451237903e-05, "loss": 1.4838, "step": 100300 }, { "epoch": 0.07466571475317181, "grad_norm": 0.9936356544494629, "learning_rate": 1.998899854983153e-05, "loss": 1.5929, "step": 100400 }, { "epoch": 0.07474008299495784, "grad_norm": 0.6591018438339233, "learning_rate": 1.9988976626615075e-05, "loss": 1.54, "step": 100500 }, { "epoch": 0.07481445123674386, "grad_norm": 0.8453909754753113, "learning_rate": 1.998895468158858e-05, "loss": 1.5191, "step": 100600 }, { "epoch": 0.07488881947852989, "grad_norm": 0.6555935144424438, "learning_rate": 1.9988932714752095e-05, "loss": 1.5734, "step": 100700 }, { "epoch": 0.07496318772031592, "grad_norm": 0.6445733308792114, "learning_rate": 1.998891072610567e-05, "loss": 1.5516, "step": 100800 }, { "epoch": 0.07503755596210195, "grad_norm": 0.534389078617096, "learning_rate": 1.9988888715649357e-05, "loss": 1.5441, "step": 100900 }, { "epoch": 0.07511192420388797, "grad_norm": 1.068562388420105, "learning_rate": 1.998886668338319e-05, "loss": 1.4998, "step": 101000 }, { "epoch": 0.075186292445674, "grad_norm": 0.6331286430358887, "learning_rate": 1.998884462930723e-05, "loss": 1.5633, "step": 101100 }, { "epoch": 0.07526066068746003, "grad_norm": 1.3566038608551025, "learning_rate": 1.998882255342152e-05, "loss": 1.4621, "step": 101200 }, { "epoch": 0.07533502892924605, "grad_norm": 0.9672004580497742, "learning_rate": 1.998880045572611e-05, "loss": 1.5249, "step": 101300 }, { "epoch": 0.07540939717103208, "grad_norm": 0.36732280254364014, "learning_rate": 1.9988778336221045e-05, "loss": 1.574, "step": 101400 }, { "epoch": 0.07548376541281811, "grad_norm": 0.4788234829902649, "learning_rate": 1.998875619490638e-05, "loss": 1.5418, "step": 101500 }, { "epoch": 0.07555813365460413, "grad_norm": 0.8955681324005127, "learning_rate": 1.9988734031782157e-05, "loss": 1.5568, "step": 101600 }, { "epoch": 0.07563250189639016, "grad_norm": 0.8049163222312927, "learning_rate": 1.9988711846848427e-05, "loss": 1.4838, "step": 101700 }, { "epoch": 0.07570687013817619, "grad_norm": 0.7558008432388306, "learning_rate": 1.9988689640105235e-05, "loss": 1.4955, "step": 101800 }, { "epoch": 0.07578123837996222, "grad_norm": 0.4749026596546173, "learning_rate": 1.9988667411552635e-05, "loss": 1.5929, "step": 101900 }, { "epoch": 0.07585560662174824, "grad_norm": 0.6597522497177124, "learning_rate": 1.998864516119067e-05, "loss": 1.5584, "step": 102000 }, { "epoch": 0.07592997486353428, "grad_norm": 0.7412188053131104, "learning_rate": 1.9988622889019395e-05, "loss": 1.5842, "step": 102100 }, { "epoch": 0.07600434310532031, "grad_norm": 0.5564984679222107, "learning_rate": 1.9988600595038853e-05, "loss": 1.5764, "step": 102200 }, { "epoch": 0.07607871134710634, "grad_norm": 1.0488529205322266, "learning_rate": 1.9988578279249097e-05, "loss": 1.458, "step": 102300 }, { "epoch": 0.07615307958889236, "grad_norm": 1.40269136428833, "learning_rate": 1.998855594165017e-05, "loss": 1.4588, "step": 102400 }, { "epoch": 0.07622744783067839, "grad_norm": 0.8488138318061829, "learning_rate": 1.9988533582242127e-05, "loss": 1.522, "step": 102500 }, { "epoch": 0.07630181607246442, "grad_norm": 0.5191701054573059, "learning_rate": 1.9988511201025015e-05, "loss": 1.5036, "step": 102600 }, { "epoch": 0.07637618431425044, "grad_norm": 0.6648279428482056, "learning_rate": 1.9988488797998878e-05, "loss": 1.4929, "step": 102700 }, { "epoch": 0.07645055255603647, "grad_norm": 1.8600202798843384, "learning_rate": 1.9988466373163774e-05, "loss": 1.5692, "step": 102800 }, { "epoch": 0.0765249207978225, "grad_norm": 0.7583739757537842, "learning_rate": 1.9988443926519743e-05, "loss": 1.5145, "step": 102900 }, { "epoch": 0.07659928903960853, "grad_norm": 0.6128048300743103, "learning_rate": 1.998842145806684e-05, "loss": 1.5729, "step": 103000 }, { "epoch": 0.07667365728139455, "grad_norm": 0.7574602365493774, "learning_rate": 1.998839896780511e-05, "loss": 1.4356, "step": 103100 }, { "epoch": 0.07674802552318058, "grad_norm": 1.4134727716445923, "learning_rate": 1.9988376455734606e-05, "loss": 1.5048, "step": 103200 }, { "epoch": 0.0768223937649666, "grad_norm": 0.7592337727546692, "learning_rate": 1.9988353921855374e-05, "loss": 1.4988, "step": 103300 }, { "epoch": 0.07689676200675263, "grad_norm": 0.522486686706543, "learning_rate": 1.9988331366167465e-05, "loss": 1.5654, "step": 103400 }, { "epoch": 0.07697113024853866, "grad_norm": 0.6535342335700989, "learning_rate": 1.9988308788670925e-05, "loss": 1.4593, "step": 103500 }, { "epoch": 0.07704549849032469, "grad_norm": 0.6663926243782043, "learning_rate": 1.9988286189365808e-05, "loss": 1.477, "step": 103600 }, { "epoch": 0.07711986673211071, "grad_norm": 0.5006215572357178, "learning_rate": 1.998826356825216e-05, "loss": 1.5326, "step": 103700 }, { "epoch": 0.07719423497389674, "grad_norm": 0.6826842427253723, "learning_rate": 1.9988240925330032e-05, "loss": 1.5102, "step": 103800 }, { "epoch": 0.07726860321568277, "grad_norm": 0.2680438756942749, "learning_rate": 1.9988218260599477e-05, "loss": 1.4773, "step": 103900 }, { "epoch": 0.07734297145746881, "grad_norm": 0.9159733057022095, "learning_rate": 1.9988195574060536e-05, "loss": 1.4984, "step": 104000 }, { "epoch": 0.07741733969925484, "grad_norm": 1.0930269956588745, "learning_rate": 1.9988172865713266e-05, "loss": 1.4196, "step": 104100 }, { "epoch": 0.07749170794104086, "grad_norm": 0.6656064391136169, "learning_rate": 1.998815013555771e-05, "loss": 1.5282, "step": 104200 }, { "epoch": 0.07756607618282689, "grad_norm": 0.6679131388664246, "learning_rate": 1.9988127383593923e-05, "loss": 1.4922, "step": 104300 }, { "epoch": 0.07764044442461292, "grad_norm": 0.5231404304504395, "learning_rate": 1.9988104609821953e-05, "loss": 1.4648, "step": 104400 }, { "epoch": 0.07771481266639894, "grad_norm": 0.6543662548065186, "learning_rate": 1.998808181424185e-05, "loss": 1.5349, "step": 104500 }, { "epoch": 0.07778918090818497, "grad_norm": 0.4422987997531891, "learning_rate": 1.9988058996853666e-05, "loss": 1.5031, "step": 104600 }, { "epoch": 0.077863549149971, "grad_norm": 0.74057537317276, "learning_rate": 1.9988036157657444e-05, "loss": 1.5373, "step": 104700 }, { "epoch": 0.07793791739175703, "grad_norm": 0.8893790245056152, "learning_rate": 1.998801329665324e-05, "loss": 1.5177, "step": 104800 }, { "epoch": 0.07801228563354305, "grad_norm": 0.898235559463501, "learning_rate": 1.9987990413841103e-05, "loss": 1.5938, "step": 104900 }, { "epoch": 0.07808665387532908, "grad_norm": 0.566254198551178, "learning_rate": 1.9987967509221082e-05, "loss": 1.4581, "step": 105000 }, { "epoch": 0.0781610221171151, "grad_norm": 0.6054997444152832, "learning_rate": 1.9987944582793226e-05, "loss": 1.5248, "step": 105100 }, { "epoch": 0.07823539035890113, "grad_norm": 0.6898595690727234, "learning_rate": 1.9987921634557588e-05, "loss": 1.5482, "step": 105200 }, { "epoch": 0.07830975860068716, "grad_norm": 0.7741703391075134, "learning_rate": 1.9987898664514213e-05, "loss": 1.5175, "step": 105300 }, { "epoch": 0.07838412684247319, "grad_norm": 0.649459958076477, "learning_rate": 1.9987875672663155e-05, "loss": 1.5702, "step": 105400 }, { "epoch": 0.07845849508425921, "grad_norm": 1.0062605142593384, "learning_rate": 1.9987852659004465e-05, "loss": 1.5077, "step": 105500 }, { "epoch": 0.07853286332604524, "grad_norm": 0.5658386945724487, "learning_rate": 1.9987829623538193e-05, "loss": 1.5682, "step": 105600 }, { "epoch": 0.07860723156783127, "grad_norm": 0.5370775461196899, "learning_rate": 1.9987806566264383e-05, "loss": 1.5635, "step": 105700 }, { "epoch": 0.0786815998096173, "grad_norm": 0.5227326154708862, "learning_rate": 1.9987783487183097e-05, "loss": 1.4999, "step": 105800 }, { "epoch": 0.07875596805140334, "grad_norm": 0.7286719083786011, "learning_rate": 1.9987760386294376e-05, "loss": 1.5865, "step": 105900 }, { "epoch": 0.07883033629318936, "grad_norm": 0.48839372396469116, "learning_rate": 1.9987737263598272e-05, "loss": 1.545, "step": 106000 }, { "epoch": 0.07890470453497539, "grad_norm": 0.5450071692466736, "learning_rate": 1.9987714119094835e-05, "loss": 1.5127, "step": 106100 }, { "epoch": 0.07897907277676142, "grad_norm": 0.9064304828643799, "learning_rate": 1.998769095278412e-05, "loss": 1.5651, "step": 106200 }, { "epoch": 0.07905344101854744, "grad_norm": 0.5823491811752319, "learning_rate": 1.9987667764666177e-05, "loss": 1.4713, "step": 106300 }, { "epoch": 0.07912780926033347, "grad_norm": 0.5959489345550537, "learning_rate": 1.9987644554741053e-05, "loss": 1.5232, "step": 106400 }, { "epoch": 0.0792021775021195, "grad_norm": 0.9435197114944458, "learning_rate": 1.99876213230088e-05, "loss": 1.5014, "step": 106500 }, { "epoch": 0.07927654574390552, "grad_norm": 0.7861834764480591, "learning_rate": 1.998759806946947e-05, "loss": 1.5232, "step": 106600 }, { "epoch": 0.07935091398569155, "grad_norm": 0.5803347229957581, "learning_rate": 1.9987574794123113e-05, "loss": 1.5872, "step": 106700 }, { "epoch": 0.07942528222747758, "grad_norm": 0.5119001269340515, "learning_rate": 1.9987551496969776e-05, "loss": 1.5878, "step": 106800 }, { "epoch": 0.0794996504692636, "grad_norm": 0.4545336961746216, "learning_rate": 1.9987528178009518e-05, "loss": 1.5802, "step": 106900 }, { "epoch": 0.07957401871104963, "grad_norm": 0.5515133738517761, "learning_rate": 1.998750483724238e-05, "loss": 1.452, "step": 107000 }, { "epoch": 0.07964838695283566, "grad_norm": 1.0692124366760254, "learning_rate": 1.998748147466842e-05, "loss": 1.4221, "step": 107100 }, { "epoch": 0.07972275519462169, "grad_norm": 0.5913635492324829, "learning_rate": 1.998745809028769e-05, "loss": 1.5008, "step": 107200 }, { "epoch": 0.07979712343640771, "grad_norm": 0.5644622445106506, "learning_rate": 1.9987434684100235e-05, "loss": 1.4898, "step": 107300 }, { "epoch": 0.07987149167819374, "grad_norm": 1.2461758852005005, "learning_rate": 1.998741125610611e-05, "loss": 1.4811, "step": 107400 }, { "epoch": 0.07994585991997977, "grad_norm": 1.1428260803222656, "learning_rate": 1.9987387806305366e-05, "loss": 1.5147, "step": 107500 }, { "epoch": 0.0800202281617658, "grad_norm": 0.8803263306617737, "learning_rate": 1.9987364334698055e-05, "loss": 1.6093, "step": 107600 }, { "epoch": 0.08009459640355182, "grad_norm": 0.583833634853363, "learning_rate": 1.998734084128423e-05, "loss": 1.5074, "step": 107700 }, { "epoch": 0.08016896464533786, "grad_norm": 0.423572301864624, "learning_rate": 1.9987317326063934e-05, "loss": 1.4774, "step": 107800 }, { "epoch": 0.08024333288712389, "grad_norm": 0.735720157623291, "learning_rate": 1.9987293789037223e-05, "loss": 1.5029, "step": 107900 }, { "epoch": 0.08031770112890992, "grad_norm": 0.6082938313484192, "learning_rate": 1.9987270230204153e-05, "loss": 1.4228, "step": 108000 }, { "epoch": 0.08039206937069594, "grad_norm": 0.392838716506958, "learning_rate": 1.998724664956477e-05, "loss": 1.537, "step": 108100 }, { "epoch": 0.08046643761248197, "grad_norm": 1.8458749055862427, "learning_rate": 1.9987223047119125e-05, "loss": 1.4024, "step": 108200 }, { "epoch": 0.080540805854268, "grad_norm": 0.548555850982666, "learning_rate": 1.9987199422867275e-05, "loss": 1.4865, "step": 108300 }, { "epoch": 0.08061517409605402, "grad_norm": 1.6752604246139526, "learning_rate": 1.9987175776809263e-05, "loss": 1.5053, "step": 108400 }, { "epoch": 0.08068954233784005, "grad_norm": 0.6298259496688843, "learning_rate": 1.998715210894515e-05, "loss": 1.6348, "step": 108500 }, { "epoch": 0.08076391057962608, "grad_norm": 0.5875239968299866, "learning_rate": 1.9987128419274984e-05, "loss": 1.5597, "step": 108600 }, { "epoch": 0.0808382788214121, "grad_norm": 0.738935649394989, "learning_rate": 1.9987104707798815e-05, "loss": 1.6012, "step": 108700 }, { "epoch": 0.08091264706319813, "grad_norm": 0.5535106062889099, "learning_rate": 1.9987080974516697e-05, "loss": 1.528, "step": 108800 }, { "epoch": 0.08098701530498416, "grad_norm": 0.719341516494751, "learning_rate": 1.998705721942868e-05, "loss": 1.5549, "step": 108900 }, { "epoch": 0.08106138354677019, "grad_norm": 0.665989875793457, "learning_rate": 1.9987033442534813e-05, "loss": 1.5212, "step": 109000 }, { "epoch": 0.08113575178855621, "grad_norm": 0.5183430314064026, "learning_rate": 1.9987009643835156e-05, "loss": 1.4446, "step": 109100 }, { "epoch": 0.08121012003034224, "grad_norm": 0.8556896448135376, "learning_rate": 1.9986985823329753e-05, "loss": 1.6025, "step": 109200 }, { "epoch": 0.08128448827212827, "grad_norm": 0.615251898765564, "learning_rate": 1.9986961981018666e-05, "loss": 1.4273, "step": 109300 }, { "epoch": 0.0813588565139143, "grad_norm": 0.8173052072525024, "learning_rate": 1.998693811690193e-05, "loss": 1.4829, "step": 109400 }, { "epoch": 0.08143322475570032, "grad_norm": 0.5940482020378113, "learning_rate": 1.998691423097962e-05, "loss": 1.5261, "step": 109500 }, { "epoch": 0.08150759299748635, "grad_norm": 1.1598200798034668, "learning_rate": 1.9986890323251767e-05, "loss": 1.4685, "step": 109600 }, { "epoch": 0.08158196123927237, "grad_norm": 0.5181326270103455, "learning_rate": 1.9986866393718434e-05, "loss": 1.5543, "step": 109700 }, { "epoch": 0.08165632948105842, "grad_norm": 1.0629653930664062, "learning_rate": 1.9986842442379672e-05, "loss": 1.4274, "step": 109800 }, { "epoch": 0.08173069772284444, "grad_norm": 0.992058515548706, "learning_rate": 1.998681846923553e-05, "loss": 1.4632, "step": 109900 }, { "epoch": 0.08180506596463047, "grad_norm": 0.6007574796676636, "learning_rate": 1.998679447428607e-05, "loss": 1.53, "step": 110000 }, { "epoch": 0.0818794342064165, "grad_norm": 1.6865825653076172, "learning_rate": 1.998677045753133e-05, "loss": 1.5762, "step": 110100 }, { "epoch": 0.08195380244820252, "grad_norm": 0.7208903431892395, "learning_rate": 1.998674641897137e-05, "loss": 1.4674, "step": 110200 }, { "epoch": 0.08202817068998855, "grad_norm": 0.5151882171630859, "learning_rate": 1.9986722358606245e-05, "loss": 1.5002, "step": 110300 }, { "epoch": 0.08210253893177458, "grad_norm": 0.9637214541435242, "learning_rate": 1.9986698276436006e-05, "loss": 1.5462, "step": 110400 }, { "epoch": 0.0821769071735606, "grad_norm": 0.5407575964927673, "learning_rate": 1.99866741724607e-05, "loss": 1.4805, "step": 110500 }, { "epoch": 0.08225127541534663, "grad_norm": 0.5322892069816589, "learning_rate": 1.998665004668039e-05, "loss": 1.5482, "step": 110600 }, { "epoch": 0.08232564365713266, "grad_norm": 0.4940236806869507, "learning_rate": 1.9986625899095117e-05, "loss": 1.5099, "step": 110700 }, { "epoch": 0.08240001189891868, "grad_norm": 1.2844667434692383, "learning_rate": 1.998660172970494e-05, "loss": 1.5486, "step": 110800 }, { "epoch": 0.08247438014070471, "grad_norm": 0.4709852337837219, "learning_rate": 1.9986577538509914e-05, "loss": 1.5261, "step": 110900 }, { "epoch": 0.08254874838249074, "grad_norm": 0.8458433151245117, "learning_rate": 1.9986553325510087e-05, "loss": 1.5417, "step": 111000 }, { "epoch": 0.08262311662427677, "grad_norm": 0.6459673047065735, "learning_rate": 1.9986529090705514e-05, "loss": 1.5017, "step": 111100 }, { "epoch": 0.08269748486606279, "grad_norm": 0.8816545009613037, "learning_rate": 1.9986504834096244e-05, "loss": 1.5322, "step": 111200 }, { "epoch": 0.08277185310784882, "grad_norm": 0.5154295563697815, "learning_rate": 1.998648055568234e-05, "loss": 1.3878, "step": 111300 }, { "epoch": 0.08284622134963485, "grad_norm": 0.5381690859794617, "learning_rate": 1.9986456255463845e-05, "loss": 1.4996, "step": 111400 }, { "epoch": 0.08292058959142087, "grad_norm": 0.8961411118507385, "learning_rate": 1.9986431933440818e-05, "loss": 1.4467, "step": 111500 }, { "epoch": 0.0829949578332069, "grad_norm": 0.6580142378807068, "learning_rate": 1.9986407589613307e-05, "loss": 1.5176, "step": 111600 }, { "epoch": 0.08306932607499294, "grad_norm": 0.8758597373962402, "learning_rate": 1.9986383223981372e-05, "loss": 1.5797, "step": 111700 }, { "epoch": 0.08314369431677897, "grad_norm": 1.072066307067871, "learning_rate": 1.998635883654506e-05, "loss": 1.5229, "step": 111800 }, { "epoch": 0.083218062558565, "grad_norm": 0.8203564882278442, "learning_rate": 1.9986334427304427e-05, "loss": 1.4577, "step": 111900 }, { "epoch": 0.08329243080035102, "grad_norm": 0.691500723361969, "learning_rate": 1.9986309996259522e-05, "loss": 1.4495, "step": 112000 }, { "epoch": 0.08336679904213705, "grad_norm": 0.4605492651462555, "learning_rate": 1.9986285543410407e-05, "loss": 1.5124, "step": 112100 }, { "epoch": 0.08344116728392308, "grad_norm": 0.6596329212188721, "learning_rate": 1.998626106875713e-05, "loss": 1.5502, "step": 112200 }, { "epoch": 0.0835155355257091, "grad_norm": 0.9638049006462097, "learning_rate": 1.9986236572299746e-05, "loss": 1.513, "step": 112300 }, { "epoch": 0.08358990376749513, "grad_norm": 0.520232081413269, "learning_rate": 1.99862120540383e-05, "loss": 1.5246, "step": 112400 }, { "epoch": 0.08366427200928116, "grad_norm": 1.2472305297851562, "learning_rate": 1.998618751397286e-05, "loss": 1.5571, "step": 112500 }, { "epoch": 0.08373864025106718, "grad_norm": 1.4931305646896362, "learning_rate": 1.9986162952103472e-05, "loss": 1.6035, "step": 112600 }, { "epoch": 0.08381300849285321, "grad_norm": 0.6507347226142883, "learning_rate": 1.998613836843019e-05, "loss": 1.5832, "step": 112700 }, { "epoch": 0.08388737673463924, "grad_norm": 0.43901050090789795, "learning_rate": 1.9986113762953063e-05, "loss": 1.4757, "step": 112800 }, { "epoch": 0.08396174497642526, "grad_norm": 0.6376226544380188, "learning_rate": 1.9986089135672152e-05, "loss": 1.5299, "step": 112900 }, { "epoch": 0.08403611321821129, "grad_norm": 0.5019524693489075, "learning_rate": 1.9986064486587512e-05, "loss": 1.5358, "step": 113000 }, { "epoch": 0.08411048145999732, "grad_norm": 1.2650564908981323, "learning_rate": 1.998603981569919e-05, "loss": 1.4145, "step": 113100 }, { "epoch": 0.08418484970178335, "grad_norm": 0.5619797706604004, "learning_rate": 1.9986015123007242e-05, "loss": 1.5037, "step": 113200 }, { "epoch": 0.08425921794356937, "grad_norm": 0.6230933666229248, "learning_rate": 1.9985990408511723e-05, "loss": 1.5106, "step": 113300 }, { "epoch": 0.0843335861853554, "grad_norm": 0.5318901538848877, "learning_rate": 1.998596567221269e-05, "loss": 1.4836, "step": 113400 }, { "epoch": 0.08440795442714143, "grad_norm": 0.5024573802947998, "learning_rate": 1.9985940914110192e-05, "loss": 1.4847, "step": 113500 }, { "epoch": 0.08448232266892747, "grad_norm": 0.5011327266693115, "learning_rate": 1.9985916134204285e-05, "loss": 1.5174, "step": 113600 }, { "epoch": 0.0845566909107135, "grad_norm": 0.6056029200553894, "learning_rate": 1.998589133249502e-05, "loss": 1.4732, "step": 113700 }, { "epoch": 0.08463105915249952, "grad_norm": 0.485939085483551, "learning_rate": 1.9985866508982462e-05, "loss": 1.4688, "step": 113800 }, { "epoch": 0.08470542739428555, "grad_norm": 1.0872737169265747, "learning_rate": 1.998584166366665e-05, "loss": 1.5332, "step": 113900 }, { "epoch": 0.08477979563607158, "grad_norm": 0.6303906440734863, "learning_rate": 1.998581679654765e-05, "loss": 1.6084, "step": 114000 }, { "epoch": 0.0848541638778576, "grad_norm": 0.598016619682312, "learning_rate": 1.998579190762551e-05, "loss": 1.5643, "step": 114100 }, { "epoch": 0.08492853211964363, "grad_norm": 0.47130709886550903, "learning_rate": 1.9985766996900287e-05, "loss": 1.533, "step": 114200 }, { "epoch": 0.08500290036142966, "grad_norm": 0.7291996479034424, "learning_rate": 1.9985742064372036e-05, "loss": 1.5193, "step": 114300 }, { "epoch": 0.08507726860321568, "grad_norm": 0.8178292512893677, "learning_rate": 1.998571711004081e-05, "loss": 1.4678, "step": 114400 }, { "epoch": 0.08515163684500171, "grad_norm": 0.537441611289978, "learning_rate": 1.998569213390666e-05, "loss": 1.5549, "step": 114500 }, { "epoch": 0.08522600508678774, "grad_norm": 0.7284172177314758, "learning_rate": 1.9985667135969642e-05, "loss": 1.5368, "step": 114600 }, { "epoch": 0.08530037332857376, "grad_norm": 0.46331995725631714, "learning_rate": 1.998564211622982e-05, "loss": 1.4837, "step": 114700 }, { "epoch": 0.08537474157035979, "grad_norm": 0.5781813859939575, "learning_rate": 1.998561707468724e-05, "loss": 1.5139, "step": 114800 }, { "epoch": 0.08544910981214582, "grad_norm": 0.633541464805603, "learning_rate": 1.9985592011341953e-05, "loss": 1.5016, "step": 114900 }, { "epoch": 0.08552347805393185, "grad_norm": 0.5329047441482544, "learning_rate": 1.998556692619402e-05, "loss": 1.4913, "step": 115000 }, { "epoch": 0.08559784629571787, "grad_norm": 1.094671368598938, "learning_rate": 1.99855418192435e-05, "loss": 1.5134, "step": 115100 }, { "epoch": 0.0856722145375039, "grad_norm": 1.0344128608703613, "learning_rate": 1.9985516690490438e-05, "loss": 1.502, "step": 115200 }, { "epoch": 0.08574658277928993, "grad_norm": 0.6620404124259949, "learning_rate": 1.9985491539934893e-05, "loss": 1.5916, "step": 115300 }, { "epoch": 0.08582095102107595, "grad_norm": 0.6009657979011536, "learning_rate": 1.998546636757692e-05, "loss": 1.4489, "step": 115400 }, { "epoch": 0.085895319262862, "grad_norm": 0.6658787131309509, "learning_rate": 1.9985441173416578e-05, "loss": 1.546, "step": 115500 }, { "epoch": 0.08596968750464802, "grad_norm": 0.6819202303886414, "learning_rate": 1.9985415957453915e-05, "loss": 1.5703, "step": 115600 }, { "epoch": 0.08604405574643405, "grad_norm": 0.7331560254096985, "learning_rate": 1.998539071968899e-05, "loss": 1.5963, "step": 115700 }, { "epoch": 0.08611842398822007, "grad_norm": 1.3560550212860107, "learning_rate": 1.9985365460121854e-05, "loss": 1.5081, "step": 115800 }, { "epoch": 0.0861927922300061, "grad_norm": 0.35963961482048035, "learning_rate": 1.998534017875257e-05, "loss": 1.4743, "step": 115900 }, { "epoch": 0.08626716047179213, "grad_norm": 0.5468844771385193, "learning_rate": 1.9985314875581188e-05, "loss": 1.4833, "step": 116000 }, { "epoch": 0.08634152871357816, "grad_norm": 1.0857586860656738, "learning_rate": 1.9985289550607763e-05, "loss": 1.5037, "step": 116100 }, { "epoch": 0.08641589695536418, "grad_norm": 0.776099443435669, "learning_rate": 1.9985264203832348e-05, "loss": 1.46, "step": 116200 }, { "epoch": 0.08649026519715021, "grad_norm": 0.42475637793540955, "learning_rate": 1.9985238835255005e-05, "loss": 1.5133, "step": 116300 }, { "epoch": 0.08656463343893624, "grad_norm": 0.9143125414848328, "learning_rate": 1.9985213444875784e-05, "loss": 1.5373, "step": 116400 }, { "epoch": 0.08663900168072226, "grad_norm": 0.3708833158016205, "learning_rate": 1.9985188032694744e-05, "loss": 1.4839, "step": 116500 }, { "epoch": 0.08671336992250829, "grad_norm": 0.5340797901153564, "learning_rate": 1.998516259871194e-05, "loss": 1.5894, "step": 116600 }, { "epoch": 0.08678773816429432, "grad_norm": 0.596332311630249, "learning_rate": 1.9985137142927425e-05, "loss": 1.5077, "step": 116700 }, { "epoch": 0.08686210640608034, "grad_norm": 1.4712780714035034, "learning_rate": 1.9985111665341256e-05, "loss": 1.5066, "step": 116800 }, { "epoch": 0.08693647464786637, "grad_norm": 0.5092660784721375, "learning_rate": 1.9985086165953485e-05, "loss": 1.5264, "step": 116900 }, { "epoch": 0.0870108428896524, "grad_norm": 1.2285597324371338, "learning_rate": 1.9985060644764173e-05, "loss": 1.547, "step": 117000 }, { "epoch": 0.08708521113143843, "grad_norm": 0.6086132526397705, "learning_rate": 1.9985035101773375e-05, "loss": 1.4669, "step": 117100 }, { "epoch": 0.08715957937322445, "grad_norm": 0.5280595421791077, "learning_rate": 1.998500953698115e-05, "loss": 1.5262, "step": 117200 }, { "epoch": 0.08723394761501048, "grad_norm": 0.8597710132598877, "learning_rate": 1.998498395038754e-05, "loss": 1.4795, "step": 117300 }, { "epoch": 0.08730831585679652, "grad_norm": 0.44925370812416077, "learning_rate": 1.9984958341992616e-05, "loss": 1.5654, "step": 117400 }, { "epoch": 0.08738268409858255, "grad_norm": 0.4251132309436798, "learning_rate": 1.998493271179643e-05, "loss": 1.5327, "step": 117500 }, { "epoch": 0.08745705234036857, "grad_norm": 0.8036742806434631, "learning_rate": 1.9984907059799033e-05, "loss": 1.5112, "step": 117600 }, { "epoch": 0.0875314205821546, "grad_norm": 1.1122610569000244, "learning_rate": 1.9984881386000487e-05, "loss": 1.4798, "step": 117700 }, { "epoch": 0.08760578882394063, "grad_norm": 0.6719459891319275, "learning_rate": 1.998485569040084e-05, "loss": 1.508, "step": 117800 }, { "epoch": 0.08768015706572665, "grad_norm": 1.9517614841461182, "learning_rate": 1.998482997300016e-05, "loss": 1.4953, "step": 117900 }, { "epoch": 0.08775452530751268, "grad_norm": 0.9113404750823975, "learning_rate": 1.9984804233798495e-05, "loss": 1.4989, "step": 118000 }, { "epoch": 0.08782889354929871, "grad_norm": 0.5168036818504333, "learning_rate": 1.99847784727959e-05, "loss": 1.4715, "step": 118100 }, { "epoch": 0.08790326179108474, "grad_norm": 0.7708849310874939, "learning_rate": 1.9984752689992437e-05, "loss": 1.567, "step": 118200 }, { "epoch": 0.08797763003287076, "grad_norm": 1.164646863937378, "learning_rate": 1.998472688538816e-05, "loss": 1.4157, "step": 118300 }, { "epoch": 0.08805199827465679, "grad_norm": 0.5568968057632446, "learning_rate": 1.998470105898312e-05, "loss": 1.5998, "step": 118400 }, { "epoch": 0.08812636651644282, "grad_norm": 1.1717958450317383, "learning_rate": 1.9984675210777383e-05, "loss": 1.5579, "step": 118500 }, { "epoch": 0.08820073475822884, "grad_norm": 0.43212440609931946, "learning_rate": 1.9984649340770996e-05, "loss": 1.4959, "step": 118600 }, { "epoch": 0.08827510300001487, "grad_norm": 1.6434980630874634, "learning_rate": 1.998462344896402e-05, "loss": 1.4855, "step": 118700 }, { "epoch": 0.0883494712418009, "grad_norm": 0.7168663144111633, "learning_rate": 1.9984597535356515e-05, "loss": 1.5425, "step": 118800 }, { "epoch": 0.08842383948358692, "grad_norm": 0.9087129831314087, "learning_rate": 1.9984571599948533e-05, "loss": 1.547, "step": 118900 }, { "epoch": 0.08849820772537295, "grad_norm": 0.7705495357513428, "learning_rate": 1.998454564274013e-05, "loss": 1.5327, "step": 119000 }, { "epoch": 0.08857257596715898, "grad_norm": 0.7294080853462219, "learning_rate": 1.998451966373137e-05, "loss": 1.4922, "step": 119100 }, { "epoch": 0.088646944208945, "grad_norm": 0.511020839214325, "learning_rate": 1.99844936629223e-05, "loss": 1.5746, "step": 119200 }, { "epoch": 0.08872131245073105, "grad_norm": 1.0015604496002197, "learning_rate": 1.998446764031298e-05, "loss": 1.5573, "step": 119300 }, { "epoch": 0.08879568069251707, "grad_norm": 0.7985163331031799, "learning_rate": 1.9984441595903467e-05, "loss": 1.5865, "step": 119400 }, { "epoch": 0.0888700489343031, "grad_norm": 0.8295354843139648, "learning_rate": 1.998441552969382e-05, "loss": 1.5697, "step": 119500 }, { "epoch": 0.08894441717608913, "grad_norm": 0.6399853825569153, "learning_rate": 1.9984389441684095e-05, "loss": 1.4873, "step": 119600 }, { "epoch": 0.08901878541787515, "grad_norm": 0.5377764105796814, "learning_rate": 1.998436333187435e-05, "loss": 1.5265, "step": 119700 }, { "epoch": 0.08909315365966118, "grad_norm": 0.5614314675331116, "learning_rate": 1.998433720026464e-05, "loss": 1.5525, "step": 119800 }, { "epoch": 0.08916752190144721, "grad_norm": 1.1182438135147095, "learning_rate": 1.9984311046855018e-05, "loss": 1.4349, "step": 119900 }, { "epoch": 0.08924189014323324, "grad_norm": 0.9546105265617371, "learning_rate": 1.998428487164555e-05, "loss": 1.5165, "step": 120000 }, { "epoch": 0.08931625838501926, "grad_norm": 0.8639363050460815, "learning_rate": 1.9984258674636287e-05, "loss": 1.4879, "step": 120100 }, { "epoch": 0.08939062662680529, "grad_norm": 0.5587357878684998, "learning_rate": 1.998423245582729e-05, "loss": 1.5287, "step": 120200 }, { "epoch": 0.08946499486859132, "grad_norm": 0.9583162069320679, "learning_rate": 1.998420621521861e-05, "loss": 1.5035, "step": 120300 }, { "epoch": 0.08953936311037734, "grad_norm": 1.1195632219314575, "learning_rate": 1.9984179952810313e-05, "loss": 1.5373, "step": 120400 }, { "epoch": 0.08961373135216337, "grad_norm": 0.8882973790168762, "learning_rate": 1.998415366860245e-05, "loss": 1.4206, "step": 120500 }, { "epoch": 0.0896880995939494, "grad_norm": 0.8588703274726868, "learning_rate": 1.9984127362595077e-05, "loss": 1.4862, "step": 120600 }, { "epoch": 0.08976246783573542, "grad_norm": 1.1539242267608643, "learning_rate": 1.998410103478826e-05, "loss": 1.3643, "step": 120700 }, { "epoch": 0.08983683607752145, "grad_norm": 0.6719908714294434, "learning_rate": 1.998407468518205e-05, "loss": 1.4989, "step": 120800 }, { "epoch": 0.08991120431930748, "grad_norm": 0.49642276763916016, "learning_rate": 1.9984048313776503e-05, "loss": 1.4509, "step": 120900 }, { "epoch": 0.0899855725610935, "grad_norm": 0.5524992942810059, "learning_rate": 1.998402192057168e-05, "loss": 1.613, "step": 121000 }, { "epoch": 0.09005994080287953, "grad_norm": 0.6267023086547852, "learning_rate": 1.998399550556764e-05, "loss": 1.5736, "step": 121100 }, { "epoch": 0.09013430904466557, "grad_norm": 0.545433521270752, "learning_rate": 1.998396906876444e-05, "loss": 1.5411, "step": 121200 }, { "epoch": 0.0902086772864516, "grad_norm": 0.6862292885780334, "learning_rate": 1.9983942610162134e-05, "loss": 1.46, "step": 121300 }, { "epoch": 0.09028304552823763, "grad_norm": 0.40539294481277466, "learning_rate": 1.998391612976078e-05, "loss": 1.5194, "step": 121400 }, { "epoch": 0.09035741377002365, "grad_norm": 0.5084177851676941, "learning_rate": 1.998388962756044e-05, "loss": 1.5075, "step": 121500 }, { "epoch": 0.09043178201180968, "grad_norm": 0.6504777669906616, "learning_rate": 1.998386310356117e-05, "loss": 1.5427, "step": 121600 }, { "epoch": 0.09050615025359571, "grad_norm": 0.5811581015586853, "learning_rate": 1.998383655776303e-05, "loss": 1.4964, "step": 121700 }, { "epoch": 0.09058051849538173, "grad_norm": 0.43835243582725525, "learning_rate": 1.998380999016607e-05, "loss": 1.5913, "step": 121800 }, { "epoch": 0.09065488673716776, "grad_norm": 0.8307141065597534, "learning_rate": 1.9983783400770356e-05, "loss": 1.4709, "step": 121900 }, { "epoch": 0.09072925497895379, "grad_norm": 0.9118626117706299, "learning_rate": 1.9983756789575944e-05, "loss": 1.532, "step": 122000 }, { "epoch": 0.09080362322073982, "grad_norm": 0.7729747295379639, "learning_rate": 1.9983730156582892e-05, "loss": 1.4918, "step": 122100 }, { "epoch": 0.09087799146252584, "grad_norm": 1.1108375787734985, "learning_rate": 1.9983703501791257e-05, "loss": 1.4941, "step": 122200 }, { "epoch": 0.09095235970431187, "grad_norm": 0.5792679190635681, "learning_rate": 1.99836768252011e-05, "loss": 1.5624, "step": 122300 }, { "epoch": 0.0910267279460979, "grad_norm": 0.42307305335998535, "learning_rate": 1.9983650126812473e-05, "loss": 1.5294, "step": 122400 }, { "epoch": 0.09110109618788392, "grad_norm": 0.5456382036209106, "learning_rate": 1.9983623406625447e-05, "loss": 1.4705, "step": 122500 }, { "epoch": 0.09117546442966995, "grad_norm": 0.5862551331520081, "learning_rate": 1.9983596664640067e-05, "loss": 1.4577, "step": 122600 }, { "epoch": 0.09124983267145598, "grad_norm": 0.4586878716945648, "learning_rate": 1.9983569900856394e-05, "loss": 1.4504, "step": 122700 }, { "epoch": 0.091324200913242, "grad_norm": 0.4806223511695862, "learning_rate": 1.9983543115274493e-05, "loss": 1.4395, "step": 122800 }, { "epoch": 0.09139856915502803, "grad_norm": 0.6813477873802185, "learning_rate": 1.9983516307894416e-05, "loss": 1.5316, "step": 122900 }, { "epoch": 0.09147293739681406, "grad_norm": 0.7039106488227844, "learning_rate": 1.998348947871622e-05, "loss": 1.542, "step": 123000 }, { "epoch": 0.0915473056386001, "grad_norm": 0.8664810061454773, "learning_rate": 1.9983462627739975e-05, "loss": 1.4581, "step": 123100 }, { "epoch": 0.09162167388038613, "grad_norm": 0.6690797805786133, "learning_rate": 1.998343575496573e-05, "loss": 1.4828, "step": 123200 }, { "epoch": 0.09169604212217215, "grad_norm": 0.6566073894500732, "learning_rate": 1.998340886039354e-05, "loss": 1.5607, "step": 123300 }, { "epoch": 0.09177041036395818, "grad_norm": 0.848997950553894, "learning_rate": 1.998338194402348e-05, "loss": 1.4686, "step": 123400 }, { "epoch": 0.0918447786057442, "grad_norm": 0.8229367136955261, "learning_rate": 1.9983355005855592e-05, "loss": 1.5175, "step": 123500 }, { "epoch": 0.09191914684753023, "grad_norm": 0.49279895424842834, "learning_rate": 1.998332804588994e-05, "loss": 1.5399, "step": 123600 }, { "epoch": 0.09199351508931626, "grad_norm": 0.35640183091163635, "learning_rate": 1.9983301064126586e-05, "loss": 1.4768, "step": 123700 }, { "epoch": 0.09206788333110229, "grad_norm": 0.8930822014808655, "learning_rate": 1.9983274060565584e-05, "loss": 1.4929, "step": 123800 }, { "epoch": 0.09214225157288831, "grad_norm": 0.5881476402282715, "learning_rate": 1.9983247035207e-05, "loss": 1.4338, "step": 123900 }, { "epoch": 0.09221661981467434, "grad_norm": 0.5559602379798889, "learning_rate": 1.9983219988050886e-05, "loss": 1.4945, "step": 124000 }, { "epoch": 0.09229098805646037, "grad_norm": 0.8542813062667847, "learning_rate": 1.9983192919097304e-05, "loss": 1.557, "step": 124100 }, { "epoch": 0.0923653562982464, "grad_norm": 1.3162521123886108, "learning_rate": 1.9983165828346312e-05, "loss": 1.5121, "step": 124200 }, { "epoch": 0.09243972454003242, "grad_norm": 0.5321398973464966, "learning_rate": 1.9983138715797975e-05, "loss": 1.5154, "step": 124300 }, { "epoch": 0.09251409278181845, "grad_norm": 1.0732853412628174, "learning_rate": 1.998311158145234e-05, "loss": 1.532, "step": 124400 }, { "epoch": 0.09258846102360448, "grad_norm": 0.4347415566444397, "learning_rate": 1.998308442530948e-05, "loss": 1.5407, "step": 124500 }, { "epoch": 0.0926628292653905, "grad_norm": 0.55421382188797, "learning_rate": 1.9983057247369448e-05, "loss": 1.4489, "step": 124600 }, { "epoch": 0.09273719750717653, "grad_norm": 1.5156093835830688, "learning_rate": 1.99830300476323e-05, "loss": 1.4935, "step": 124700 }, { "epoch": 0.09281156574896256, "grad_norm": 0.5327034592628479, "learning_rate": 1.9983002826098098e-05, "loss": 1.5105, "step": 124800 }, { "epoch": 0.09288593399074858, "grad_norm": 0.7998518347740173, "learning_rate": 1.9982975582766904e-05, "loss": 1.5284, "step": 124900 }, { "epoch": 0.09296030223253463, "grad_norm": 0.9221441149711609, "learning_rate": 1.9982948317638774e-05, "loss": 1.5323, "step": 125000 }, { "epoch": 0.09303467047432065, "grad_norm": 0.4025695025920868, "learning_rate": 1.9982921030713766e-05, "loss": 1.5202, "step": 125100 }, { "epoch": 0.09310903871610668, "grad_norm": 0.8448640704154968, "learning_rate": 1.998289372199195e-05, "loss": 1.4325, "step": 125200 }, { "epoch": 0.0931834069578927, "grad_norm": 0.9149714112281799, "learning_rate": 1.998286639147337e-05, "loss": 1.502, "step": 125300 }, { "epoch": 0.09325777519967873, "grad_norm": 0.7809398174285889, "learning_rate": 1.99828390391581e-05, "loss": 1.5191, "step": 125400 }, { "epoch": 0.09333214344146476, "grad_norm": 0.6308737993240356, "learning_rate": 1.9982811665046192e-05, "loss": 1.4752, "step": 125500 }, { "epoch": 0.09340651168325079, "grad_norm": 0.6594569087028503, "learning_rate": 1.9982784269137706e-05, "loss": 1.5764, "step": 125600 }, { "epoch": 0.09348087992503681, "grad_norm": 0.8560895919799805, "learning_rate": 1.9982756851432708e-05, "loss": 1.4774, "step": 125700 }, { "epoch": 0.09355524816682284, "grad_norm": 0.5117990970611572, "learning_rate": 1.998272941193125e-05, "loss": 1.496, "step": 125800 }, { "epoch": 0.09362961640860887, "grad_norm": 0.7814161777496338, "learning_rate": 1.998270195063339e-05, "loss": 1.4153, "step": 125900 }, { "epoch": 0.0937039846503949, "grad_norm": 0.5360785722732544, "learning_rate": 1.99826744675392e-05, "loss": 1.5625, "step": 126000 }, { "epoch": 0.09377835289218092, "grad_norm": 0.7857993841171265, "learning_rate": 1.998264696264873e-05, "loss": 1.4259, "step": 126100 }, { "epoch": 0.09385272113396695, "grad_norm": 0.957544207572937, "learning_rate": 1.9982619435962043e-05, "loss": 1.5603, "step": 126200 }, { "epoch": 0.09392708937575298, "grad_norm": 0.49994486570358276, "learning_rate": 1.99825918874792e-05, "loss": 1.4487, "step": 126300 }, { "epoch": 0.094001457617539, "grad_norm": 0.716764509677887, "learning_rate": 1.9982564317200258e-05, "loss": 1.5488, "step": 126400 }, { "epoch": 0.09407582585932503, "grad_norm": 0.9095839858055115, "learning_rate": 1.998253672512528e-05, "loss": 1.471, "step": 126500 }, { "epoch": 0.09415019410111106, "grad_norm": 0.4523424506187439, "learning_rate": 1.998250911125433e-05, "loss": 1.5348, "step": 126600 }, { "epoch": 0.09422456234289708, "grad_norm": 0.8814778923988342, "learning_rate": 1.998248147558746e-05, "loss": 1.4824, "step": 126700 }, { "epoch": 0.09429893058468311, "grad_norm": 0.7840068936347961, "learning_rate": 1.9982453818124735e-05, "loss": 1.5554, "step": 126800 }, { "epoch": 0.09437329882646915, "grad_norm": 0.7583872675895691, "learning_rate": 1.9982426138866215e-05, "loss": 1.5863, "step": 126900 }, { "epoch": 0.09444766706825518, "grad_norm": 0.5518081188201904, "learning_rate": 1.9982398437811962e-05, "loss": 1.4955, "step": 127000 }, { "epoch": 0.0945220353100412, "grad_norm": 0.9458171725273132, "learning_rate": 1.998237071496203e-05, "loss": 1.5217, "step": 127100 }, { "epoch": 0.09459640355182723, "grad_norm": 0.9077417850494385, "learning_rate": 1.9982342970316487e-05, "loss": 1.5508, "step": 127200 }, { "epoch": 0.09467077179361326, "grad_norm": 0.3265109658241272, "learning_rate": 1.998231520387539e-05, "loss": 1.4698, "step": 127300 }, { "epoch": 0.09474514003539929, "grad_norm": 1.0498961210250854, "learning_rate": 1.99822874156388e-05, "loss": 1.4445, "step": 127400 }, { "epoch": 0.09481950827718531, "grad_norm": 0.7805377244949341, "learning_rate": 1.998225960560678e-05, "loss": 1.571, "step": 127500 }, { "epoch": 0.09489387651897134, "grad_norm": 0.8346536159515381, "learning_rate": 1.9982231773779384e-05, "loss": 1.5079, "step": 127600 }, { "epoch": 0.09496824476075737, "grad_norm": 0.6046540141105652, "learning_rate": 1.9982203920156683e-05, "loss": 1.5863, "step": 127700 }, { "epoch": 0.0950426130025434, "grad_norm": 0.6633752584457397, "learning_rate": 1.9982176044738733e-05, "loss": 1.5784, "step": 127800 }, { "epoch": 0.09511698124432942, "grad_norm": 0.7215563058853149, "learning_rate": 1.998214814752559e-05, "loss": 1.4398, "step": 127900 }, { "epoch": 0.09519134948611545, "grad_norm": 0.44109031558036804, "learning_rate": 1.998212022851732e-05, "loss": 1.4743, "step": 128000 }, { "epoch": 0.09526571772790147, "grad_norm": 0.649109959602356, "learning_rate": 1.9982092287713986e-05, "loss": 1.5396, "step": 128100 }, { "epoch": 0.0953400859696875, "grad_norm": 0.5660013556480408, "learning_rate": 1.998206432511564e-05, "loss": 1.4467, "step": 128200 }, { "epoch": 0.09541445421147353, "grad_norm": 1.5541423559188843, "learning_rate": 1.9982036340722355e-05, "loss": 1.4874, "step": 128300 }, { "epoch": 0.09548882245325956, "grad_norm": 0.49873071908950806, "learning_rate": 1.9982008334534187e-05, "loss": 1.4261, "step": 128400 }, { "epoch": 0.09556319069504558, "grad_norm": 0.45706143975257874, "learning_rate": 1.9981980306551193e-05, "loss": 1.4335, "step": 128500 }, { "epoch": 0.09563755893683161, "grad_norm": 0.7130033373832703, "learning_rate": 1.9981952256773435e-05, "loss": 1.4309, "step": 128600 }, { "epoch": 0.09571192717861764, "grad_norm": 0.4876111149787903, "learning_rate": 1.9981924185200984e-05, "loss": 1.4684, "step": 128700 }, { "epoch": 0.09578629542040368, "grad_norm": 0.8933620452880859, "learning_rate": 1.9981896091833887e-05, "loss": 1.5919, "step": 128800 }, { "epoch": 0.0958606636621897, "grad_norm": 0.4852176010608673, "learning_rate": 1.9981867976672216e-05, "loss": 1.5647, "step": 128900 }, { "epoch": 0.09593503190397573, "grad_norm": 0.5144751071929932, "learning_rate": 1.9981839839716032e-05, "loss": 1.6141, "step": 129000 }, { "epoch": 0.09600940014576176, "grad_norm": 0.622891902923584, "learning_rate": 1.9981811680965388e-05, "loss": 1.527, "step": 129100 }, { "epoch": 0.09608376838754779, "grad_norm": 0.497362345457077, "learning_rate": 1.9981783500420353e-05, "loss": 1.4895, "step": 129200 }, { "epoch": 0.09615813662933381, "grad_norm": 0.7410217523574829, "learning_rate": 1.9981755298080983e-05, "loss": 1.4822, "step": 129300 }, { "epoch": 0.09623250487111984, "grad_norm": 0.8928683996200562, "learning_rate": 1.9981727073947348e-05, "loss": 1.4708, "step": 129400 }, { "epoch": 0.09630687311290587, "grad_norm": 1.2203540802001953, "learning_rate": 1.9981698828019504e-05, "loss": 1.4761, "step": 129500 }, { "epoch": 0.0963812413546919, "grad_norm": 0.7035814523696899, "learning_rate": 1.998167056029751e-05, "loss": 1.4523, "step": 129600 }, { "epoch": 0.09645560959647792, "grad_norm": 0.512542188167572, "learning_rate": 1.998164227078143e-05, "loss": 1.4805, "step": 129700 }, { "epoch": 0.09652997783826395, "grad_norm": 0.4476942718029022, "learning_rate": 1.998161395947133e-05, "loss": 1.5065, "step": 129800 }, { "epoch": 0.09660434608004997, "grad_norm": 0.6084230542182922, "learning_rate": 1.998158562636727e-05, "loss": 1.4865, "step": 129900 }, { "epoch": 0.096678714321836, "grad_norm": 0.7835870981216431, "learning_rate": 1.9981557271469304e-05, "loss": 1.5774, "step": 130000 }, { "epoch": 0.09675308256362203, "grad_norm": 0.8037684559822083, "learning_rate": 1.9981528894777506e-05, "loss": 1.4525, "step": 130100 }, { "epoch": 0.09682745080540806, "grad_norm": 0.8425838947296143, "learning_rate": 1.9981500496291927e-05, "loss": 1.5334, "step": 130200 }, { "epoch": 0.09690181904719408, "grad_norm": 0.47409191727638245, "learning_rate": 1.9981472076012637e-05, "loss": 1.5051, "step": 130300 }, { "epoch": 0.09697618728898011, "grad_norm": 1.0278313159942627, "learning_rate": 1.9981443633939698e-05, "loss": 1.4938, "step": 130400 }, { "epoch": 0.09705055553076614, "grad_norm": 0.913601279258728, "learning_rate": 1.9981415170073166e-05, "loss": 1.4603, "step": 130500 }, { "epoch": 0.09712492377255216, "grad_norm": 0.8872955441474915, "learning_rate": 1.9981386684413108e-05, "loss": 1.4791, "step": 130600 }, { "epoch": 0.0971992920143382, "grad_norm": 0.5807453393936157, "learning_rate": 1.9981358176959582e-05, "loss": 1.3754, "step": 130700 }, { "epoch": 0.09727366025612423, "grad_norm": 0.6025647521018982, "learning_rate": 1.9981329647712658e-05, "loss": 1.558, "step": 130800 }, { "epoch": 0.09734802849791026, "grad_norm": 0.7837553024291992, "learning_rate": 1.9981301096672386e-05, "loss": 1.5855, "step": 130900 }, { "epoch": 0.09742239673969628, "grad_norm": 1.1049782037734985, "learning_rate": 1.998127252383884e-05, "loss": 1.4892, "step": 131000 }, { "epoch": 0.09749676498148231, "grad_norm": 0.5563526153564453, "learning_rate": 1.998124392921208e-05, "loss": 1.5171, "step": 131100 }, { "epoch": 0.09757113322326834, "grad_norm": 0.9444433450698853, "learning_rate": 1.9981215312792163e-05, "loss": 1.5936, "step": 131200 }, { "epoch": 0.09764550146505437, "grad_norm": 0.6223033666610718, "learning_rate": 1.9981186674579156e-05, "loss": 1.5535, "step": 131300 }, { "epoch": 0.09771986970684039, "grad_norm": 0.6166280508041382, "learning_rate": 1.998115801457312e-05, "loss": 1.4878, "step": 131400 }, { "epoch": 0.09779423794862642, "grad_norm": 1.0396345853805542, "learning_rate": 1.998112933277412e-05, "loss": 1.4261, "step": 131500 }, { "epoch": 0.09786860619041245, "grad_norm": 0.49584099650382996, "learning_rate": 1.9981100629182217e-05, "loss": 1.5413, "step": 131600 }, { "epoch": 0.09794297443219847, "grad_norm": 0.7018377780914307, "learning_rate": 1.9981071903797468e-05, "loss": 1.4654, "step": 131700 }, { "epoch": 0.0980173426739845, "grad_norm": 0.4426519572734833, "learning_rate": 1.9981043156619944e-05, "loss": 1.5245, "step": 131800 }, { "epoch": 0.09809171091577053, "grad_norm": 0.6294909715652466, "learning_rate": 1.9981014387649706e-05, "loss": 1.4861, "step": 131900 }, { "epoch": 0.09816607915755655, "grad_norm": 0.8408529758453369, "learning_rate": 1.9980985596886814e-05, "loss": 1.4676, "step": 132000 }, { "epoch": 0.09824044739934258, "grad_norm": 0.5221405029296875, "learning_rate": 1.9980956784331334e-05, "loss": 1.4444, "step": 132100 }, { "epoch": 0.09831481564112861, "grad_norm": 0.8916333913803101, "learning_rate": 1.9980927949983325e-05, "loss": 1.5329, "step": 132200 }, { "epoch": 0.09838918388291464, "grad_norm": 1.105298399925232, "learning_rate": 1.9980899093842856e-05, "loss": 1.4539, "step": 132300 }, { "epoch": 0.09846355212470066, "grad_norm": 0.5145508646965027, "learning_rate": 1.9980870215909984e-05, "loss": 1.5408, "step": 132400 }, { "epoch": 0.09853792036648669, "grad_norm": 0.8506743907928467, "learning_rate": 1.9980841316184774e-05, "loss": 1.4746, "step": 132500 }, { "epoch": 0.09861228860827273, "grad_norm": 0.6306768655776978, "learning_rate": 1.9980812394667287e-05, "loss": 1.5414, "step": 132600 }, { "epoch": 0.09868665685005876, "grad_norm": 1.2590394020080566, "learning_rate": 1.9980783451357593e-05, "loss": 1.527, "step": 132700 }, { "epoch": 0.09876102509184478, "grad_norm": 0.6099645495414734, "learning_rate": 1.998075448625575e-05, "loss": 1.5848, "step": 132800 }, { "epoch": 0.09883539333363081, "grad_norm": 0.5420452356338501, "learning_rate": 1.998072549936182e-05, "loss": 1.5003, "step": 132900 }, { "epoch": 0.09890976157541684, "grad_norm": 1.1183674335479736, "learning_rate": 1.9980696490675872e-05, "loss": 1.5375, "step": 133000 }, { "epoch": 0.09898412981720286, "grad_norm": 0.4385927617549896, "learning_rate": 1.9980667460197964e-05, "loss": 1.4722, "step": 133100 }, { "epoch": 0.09905849805898889, "grad_norm": 0.6260892152786255, "learning_rate": 1.9980638407928157e-05, "loss": 1.5212, "step": 133200 }, { "epoch": 0.09913286630077492, "grad_norm": 0.741334855556488, "learning_rate": 1.9980609333866524e-05, "loss": 1.4541, "step": 133300 }, { "epoch": 0.09920723454256095, "grad_norm": 1.1915795803070068, "learning_rate": 1.9980580238013123e-05, "loss": 1.5277, "step": 133400 }, { "epoch": 0.09928160278434697, "grad_norm": 1.1445544958114624, "learning_rate": 1.9980551120368012e-05, "loss": 1.5128, "step": 133500 }, { "epoch": 0.099355971026133, "grad_norm": 0.6334720849990845, "learning_rate": 1.9980521980931265e-05, "loss": 1.516, "step": 133600 }, { "epoch": 0.09943033926791903, "grad_norm": 0.6501947641372681, "learning_rate": 1.998049281970294e-05, "loss": 1.4401, "step": 133700 }, { "epoch": 0.09950470750970505, "grad_norm": 0.6056668758392334, "learning_rate": 1.9980463636683098e-05, "loss": 1.4852, "step": 133800 }, { "epoch": 0.09957907575149108, "grad_norm": 0.6787620186805725, "learning_rate": 1.998043443187181e-05, "loss": 1.538, "step": 133900 }, { "epoch": 0.09965344399327711, "grad_norm": 0.6022368669509888, "learning_rate": 1.9980405205269134e-05, "loss": 1.4987, "step": 134000 }, { "epoch": 0.09972781223506313, "grad_norm": 1.595755696296692, "learning_rate": 1.9980375956875134e-05, "loss": 1.44, "step": 134100 }, { "epoch": 0.09980218047684916, "grad_norm": 0.6521275043487549, "learning_rate": 1.998034668668988e-05, "loss": 1.4595, "step": 134200 }, { "epoch": 0.09987654871863519, "grad_norm": 0.538126528263092, "learning_rate": 1.998031739471343e-05, "loss": 1.4877, "step": 134300 }, { "epoch": 0.09995091696042122, "grad_norm": 1.2950618267059326, "learning_rate": 1.9980288080945846e-05, "loss": 1.5058, "step": 134400 }, { "epoch": 0.10002528520220724, "grad_norm": 0.5475849509239197, "learning_rate": 1.99802587453872e-05, "loss": 1.5014, "step": 134500 }, { "epoch": 0.10009965344399328, "grad_norm": 0.4150042235851288, "learning_rate": 1.9980229388037548e-05, "loss": 1.5063, "step": 134600 }, { "epoch": 0.10017402168577931, "grad_norm": 0.563135027885437, "learning_rate": 1.998020000889696e-05, "loss": 1.4624, "step": 134700 }, { "epoch": 0.10024838992756534, "grad_norm": 0.7071889042854309, "learning_rate": 1.9980170607965498e-05, "loss": 1.5548, "step": 134800 }, { "epoch": 0.10032275816935136, "grad_norm": 1.2695132493972778, "learning_rate": 1.9980141185243223e-05, "loss": 1.5628, "step": 134900 }, { "epoch": 0.10039712641113739, "grad_norm": 0.6120099425315857, "learning_rate": 1.9980111740730203e-05, "loss": 1.4447, "step": 135000 }, { "epoch": 0.10047149465292342, "grad_norm": 1.125084400177002, "learning_rate": 1.99800822744265e-05, "loss": 1.4967, "step": 135100 }, { "epoch": 0.10054586289470945, "grad_norm": 0.7774704098701477, "learning_rate": 1.9980052786332185e-05, "loss": 1.5234, "step": 135200 }, { "epoch": 0.10062023113649547, "grad_norm": 0.6094503998756409, "learning_rate": 1.998002327644731e-05, "loss": 1.4926, "step": 135300 }, { "epoch": 0.1006945993782815, "grad_norm": 1.2310646772384644, "learning_rate": 1.9979993744771953e-05, "loss": 1.5139, "step": 135400 }, { "epoch": 0.10076896762006753, "grad_norm": 0.5697330832481384, "learning_rate": 1.997996419130617e-05, "loss": 1.5998, "step": 135500 }, { "epoch": 0.10084333586185355, "grad_norm": 0.7427160739898682, "learning_rate": 1.997993461605003e-05, "loss": 1.6151, "step": 135600 }, { "epoch": 0.10091770410363958, "grad_norm": 0.4527893662452698, "learning_rate": 1.997990501900359e-05, "loss": 1.5575, "step": 135700 }, { "epoch": 0.10099207234542561, "grad_norm": 0.8954161405563354, "learning_rate": 1.9979875400166923e-05, "loss": 1.5633, "step": 135800 }, { "epoch": 0.10106644058721163, "grad_norm": 1.1898400783538818, "learning_rate": 1.997984575954009e-05, "loss": 1.471, "step": 135900 }, { "epoch": 0.10114080882899766, "grad_norm": 2.3043880462646484, "learning_rate": 1.997981609712315e-05, "loss": 1.5336, "step": 136000 }, { "epoch": 0.10121517707078369, "grad_norm": 0.6364002227783203, "learning_rate": 1.9979786412916185e-05, "loss": 1.5073, "step": 136100 }, { "epoch": 0.10128954531256971, "grad_norm": 0.8354958891868591, "learning_rate": 1.997975670691924e-05, "loss": 1.5383, "step": 136200 }, { "epoch": 0.10136391355435574, "grad_norm": 0.45337924361228943, "learning_rate": 1.9979726979132393e-05, "loss": 1.5351, "step": 136300 }, { "epoch": 0.10143828179614177, "grad_norm": 0.5469792485237122, "learning_rate": 1.9979697229555706e-05, "loss": 1.5258, "step": 136400 }, { "epoch": 0.10151265003792781, "grad_norm": 0.7817328572273254, "learning_rate": 1.997966745818924e-05, "loss": 1.5394, "step": 136500 }, { "epoch": 0.10158701827971384, "grad_norm": 0.849381148815155, "learning_rate": 1.997963766503306e-05, "loss": 1.4665, "step": 136600 }, { "epoch": 0.10166138652149986, "grad_norm": 0.5767590403556824, "learning_rate": 1.9979607850087236e-05, "loss": 1.4661, "step": 136700 }, { "epoch": 0.10173575476328589, "grad_norm": 0.6021513342857361, "learning_rate": 1.9979578013351832e-05, "loss": 1.5922, "step": 136800 }, { "epoch": 0.10181012300507192, "grad_norm": 0.4987846910953522, "learning_rate": 1.9979548154826908e-05, "loss": 1.5105, "step": 136900 }, { "epoch": 0.10188449124685794, "grad_norm": 0.5042586326599121, "learning_rate": 1.9979518274512536e-05, "loss": 1.4829, "step": 137000 }, { "epoch": 0.10195885948864397, "grad_norm": 0.6552058458328247, "learning_rate": 1.9979488372408777e-05, "loss": 1.5268, "step": 137100 }, { "epoch": 0.10203322773043, "grad_norm": 0.7282898426055908, "learning_rate": 1.9979458448515698e-05, "loss": 1.5446, "step": 137200 }, { "epoch": 0.10210759597221603, "grad_norm": 0.5449129343032837, "learning_rate": 1.9979428502833364e-05, "loss": 1.5878, "step": 137300 }, { "epoch": 0.10218196421400205, "grad_norm": 0.546790599822998, "learning_rate": 1.9979398535361843e-05, "loss": 1.5163, "step": 137400 }, { "epoch": 0.10225633245578808, "grad_norm": 0.7172774076461792, "learning_rate": 1.9979368546101196e-05, "loss": 1.5215, "step": 137500 }, { "epoch": 0.1023307006975741, "grad_norm": 1.1463347673416138, "learning_rate": 1.9979338535051486e-05, "loss": 1.5133, "step": 137600 }, { "epoch": 0.10240506893936013, "grad_norm": 0.5050210356712341, "learning_rate": 1.9979308502212786e-05, "loss": 1.5303, "step": 137700 }, { "epoch": 0.10247943718114616, "grad_norm": 0.7340983748435974, "learning_rate": 1.997927844758516e-05, "loss": 1.4665, "step": 137800 }, { "epoch": 0.10255380542293219, "grad_norm": 0.606488823890686, "learning_rate": 1.997924837116867e-05, "loss": 1.5614, "step": 137900 }, { "epoch": 0.10262817366471821, "grad_norm": 0.6133073568344116, "learning_rate": 1.997921827296338e-05, "loss": 1.5019, "step": 138000 }, { "epoch": 0.10270254190650424, "grad_norm": 1.193038821220398, "learning_rate": 1.9979188152969363e-05, "loss": 1.5643, "step": 138100 }, { "epoch": 0.10277691014829027, "grad_norm": 0.7540208697319031, "learning_rate": 1.9979158011186678e-05, "loss": 1.4914, "step": 138200 }, { "epoch": 0.1028512783900763, "grad_norm": 0.8702605366706848, "learning_rate": 1.9979127847615398e-05, "loss": 1.5142, "step": 138300 }, { "epoch": 0.10292564663186234, "grad_norm": 0.5384430885314941, "learning_rate": 1.997909766225558e-05, "loss": 1.5681, "step": 138400 }, { "epoch": 0.10300001487364836, "grad_norm": 0.6199005246162415, "learning_rate": 1.9979067455107302e-05, "loss": 1.505, "step": 138500 }, { "epoch": 0.10307438311543439, "grad_norm": 1.689314603805542, "learning_rate": 1.9979037226170615e-05, "loss": 1.4517, "step": 138600 }, { "epoch": 0.10314875135722042, "grad_norm": 0.6729611754417419, "learning_rate": 1.9979006975445598e-05, "loss": 1.6216, "step": 138700 }, { "epoch": 0.10322311959900644, "grad_norm": 0.9998461008071899, "learning_rate": 1.9978976702932308e-05, "loss": 1.5475, "step": 138800 }, { "epoch": 0.10329748784079247, "grad_norm": 0.6057224273681641, "learning_rate": 1.9978946408630813e-05, "loss": 1.4879, "step": 138900 }, { "epoch": 0.1033718560825785, "grad_norm": 0.5094553232192993, "learning_rate": 1.9978916092541185e-05, "loss": 1.4579, "step": 139000 }, { "epoch": 0.10344622432436452, "grad_norm": 0.8264681100845337, "learning_rate": 1.9978885754663488e-05, "loss": 1.5781, "step": 139100 }, { "epoch": 0.10352059256615055, "grad_norm": 0.6943295001983643, "learning_rate": 1.9978855394997782e-05, "loss": 1.3963, "step": 139200 }, { "epoch": 0.10359496080793658, "grad_norm": 0.49598583579063416, "learning_rate": 1.997882501354414e-05, "loss": 1.5092, "step": 139300 }, { "epoch": 0.1036693290497226, "grad_norm": 0.5692517757415771, "learning_rate": 1.997879461030262e-05, "loss": 1.4334, "step": 139400 }, { "epoch": 0.10374369729150863, "grad_norm": 0.7206512689590454, "learning_rate": 1.99787641852733e-05, "loss": 1.5224, "step": 139500 }, { "epoch": 0.10381806553329466, "grad_norm": 0.49679264426231384, "learning_rate": 1.9978733738456238e-05, "loss": 1.466, "step": 139600 }, { "epoch": 0.10389243377508069, "grad_norm": 0.9801473021507263, "learning_rate": 1.9978703269851504e-05, "loss": 1.5136, "step": 139700 }, { "epoch": 0.10396680201686671, "grad_norm": 0.7889474630355835, "learning_rate": 1.9978672779459165e-05, "loss": 1.5334, "step": 139800 }, { "epoch": 0.10404117025865274, "grad_norm": 0.9572933316230774, "learning_rate": 1.9978642267279285e-05, "loss": 1.4699, "step": 139900 }, { "epoch": 0.10411553850043877, "grad_norm": 0.7226629257202148, "learning_rate": 1.9978611733311936e-05, "loss": 1.4448, "step": 140000 }, { "epoch": 0.1041899067422248, "grad_norm": 0.8302561640739441, "learning_rate": 1.997858117755717e-05, "loss": 1.4462, "step": 140100 }, { "epoch": 0.10426427498401082, "grad_norm": 0.5382084250450134, "learning_rate": 1.9978550600015075e-05, "loss": 1.4627, "step": 140200 }, { "epoch": 0.10433864322579686, "grad_norm": 0.5332793593406677, "learning_rate": 1.99785200006857e-05, "loss": 1.5637, "step": 140300 }, { "epoch": 0.10441301146758289, "grad_norm": 0.4548000395298004, "learning_rate": 1.9978489379569126e-05, "loss": 1.5765, "step": 140400 }, { "epoch": 0.10448737970936892, "grad_norm": 0.6508228182792664, "learning_rate": 1.997845873666541e-05, "loss": 1.5092, "step": 140500 }, { "epoch": 0.10456174795115494, "grad_norm": 0.5362776517868042, "learning_rate": 1.9978428071974617e-05, "loss": 1.5831, "step": 140600 }, { "epoch": 0.10463611619294097, "grad_norm": 0.5363385081291199, "learning_rate": 1.9978397385496824e-05, "loss": 1.5171, "step": 140700 }, { "epoch": 0.104710484434727, "grad_norm": 1.126382827758789, "learning_rate": 1.997836667723209e-05, "loss": 1.529, "step": 140800 }, { "epoch": 0.10478485267651302, "grad_norm": 0.9231337308883667, "learning_rate": 1.9978335947180482e-05, "loss": 1.5892, "step": 140900 }, { "epoch": 0.10485922091829905, "grad_norm": 0.6200318932533264, "learning_rate": 1.9978305195342073e-05, "loss": 1.5113, "step": 141000 }, { "epoch": 0.10493358916008508, "grad_norm": 0.6148139238357544, "learning_rate": 1.997827442171693e-05, "loss": 1.5312, "step": 141100 }, { "epoch": 0.1050079574018711, "grad_norm": 0.7615642547607422, "learning_rate": 1.997824362630511e-05, "loss": 1.4914, "step": 141200 }, { "epoch": 0.10508232564365713, "grad_norm": 0.5407326221466064, "learning_rate": 1.9978212809106692e-05, "loss": 1.4971, "step": 141300 }, { "epoch": 0.10515669388544316, "grad_norm": 1.0571643114089966, "learning_rate": 1.9978181970121738e-05, "loss": 1.5615, "step": 141400 }, { "epoch": 0.10523106212722919, "grad_norm": 0.6023507118225098, "learning_rate": 1.9978151109350318e-05, "loss": 1.5049, "step": 141500 }, { "epoch": 0.10530543036901521, "grad_norm": 1.0330872535705566, "learning_rate": 1.997812022679249e-05, "loss": 1.4868, "step": 141600 }, { "epoch": 0.10537979861080124, "grad_norm": 0.7617365121841431, "learning_rate": 1.9978089322448334e-05, "loss": 1.5329, "step": 141700 }, { "epoch": 0.10545416685258727, "grad_norm": 0.46278420090675354, "learning_rate": 1.9978058396317913e-05, "loss": 1.463, "step": 141800 }, { "epoch": 0.1055285350943733, "grad_norm": 0.4801762104034424, "learning_rate": 1.9978027448401293e-05, "loss": 1.5022, "step": 141900 }, { "epoch": 0.10560290333615932, "grad_norm": 1.0246363878250122, "learning_rate": 1.997799647869854e-05, "loss": 1.5205, "step": 142000 }, { "epoch": 0.10567727157794535, "grad_norm": 1.078803539276123, "learning_rate": 1.9977965487209724e-05, "loss": 1.5032, "step": 142100 }, { "epoch": 0.10575163981973139, "grad_norm": 0.4779006838798523, "learning_rate": 1.9977934473934914e-05, "loss": 1.5492, "step": 142200 }, { "epoch": 0.10582600806151742, "grad_norm": 0.9549064636230469, "learning_rate": 1.997790343887418e-05, "loss": 1.498, "step": 142300 }, { "epoch": 0.10590037630330344, "grad_norm": 0.45103341341018677, "learning_rate": 1.997787238202758e-05, "loss": 1.5953, "step": 142400 }, { "epoch": 0.10597474454508947, "grad_norm": 1.304785132408142, "learning_rate": 1.997784130339519e-05, "loss": 1.4869, "step": 142500 }, { "epoch": 0.1060491127868755, "grad_norm": 1.2593719959259033, "learning_rate": 1.9977810202977078e-05, "loss": 1.4804, "step": 142600 }, { "epoch": 0.10612348102866152, "grad_norm": 0.48552241921424866, "learning_rate": 1.9977779080773305e-05, "loss": 1.4952, "step": 142700 }, { "epoch": 0.10619784927044755, "grad_norm": 1.041162133216858, "learning_rate": 1.9977747936783947e-05, "loss": 1.5418, "step": 142800 }, { "epoch": 0.10627221751223358, "grad_norm": 0.9710960388183594, "learning_rate": 1.9977716771009068e-05, "loss": 1.5366, "step": 142900 }, { "epoch": 0.1063465857540196, "grad_norm": 0.7851547598838806, "learning_rate": 1.9977685583448738e-05, "loss": 1.5176, "step": 143000 }, { "epoch": 0.10642095399580563, "grad_norm": 0.9707911610603333, "learning_rate": 1.9977654374103022e-05, "loss": 1.545, "step": 143100 }, { "epoch": 0.10649532223759166, "grad_norm": 0.6335864663124084, "learning_rate": 1.997762314297199e-05, "loss": 1.508, "step": 143200 }, { "epoch": 0.10656969047937769, "grad_norm": 1.0375920534133911, "learning_rate": 1.997759189005571e-05, "loss": 1.4879, "step": 143300 }, { "epoch": 0.10664405872116371, "grad_norm": 1.596803903579712, "learning_rate": 1.997756061535425e-05, "loss": 1.4983, "step": 143400 }, { "epoch": 0.10671842696294974, "grad_norm": 0.5105230212211609, "learning_rate": 1.997752931886768e-05, "loss": 1.521, "step": 143500 }, { "epoch": 0.10679279520473577, "grad_norm": 1.2617032527923584, "learning_rate": 1.9977498000596065e-05, "loss": 1.4863, "step": 143600 }, { "epoch": 0.10686716344652179, "grad_norm": 0.5550446510314941, "learning_rate": 1.9977466660539474e-05, "loss": 1.5622, "step": 143700 }, { "epoch": 0.10694153168830782, "grad_norm": 1.1208547353744507, "learning_rate": 1.9977435298697982e-05, "loss": 1.4477, "step": 143800 }, { "epoch": 0.10701589993009385, "grad_norm": 0.9976388812065125, "learning_rate": 1.9977403915071646e-05, "loss": 1.4833, "step": 143900 }, { "epoch": 0.10709026817187987, "grad_norm": 0.5002219676971436, "learning_rate": 1.9977372509660547e-05, "loss": 1.6139, "step": 144000 }, { "epoch": 0.10716463641366591, "grad_norm": 0.9400573968887329, "learning_rate": 1.997734108246474e-05, "loss": 1.555, "step": 144100 }, { "epoch": 0.10723900465545194, "grad_norm": 0.6720722317695618, "learning_rate": 1.9977309633484307e-05, "loss": 1.5196, "step": 144200 }, { "epoch": 0.10731337289723797, "grad_norm": 0.6347479820251465, "learning_rate": 1.9977278162719307e-05, "loss": 1.5292, "step": 144300 }, { "epoch": 0.107387741139024, "grad_norm": 0.5623081922531128, "learning_rate": 1.9977246670169817e-05, "loss": 1.4804, "step": 144400 }, { "epoch": 0.10746210938081002, "grad_norm": 0.760469377040863, "learning_rate": 1.9977215155835895e-05, "loss": 1.6078, "step": 144500 }, { "epoch": 0.10753647762259605, "grad_norm": 0.9557989239692688, "learning_rate": 1.9977183619717618e-05, "loss": 1.499, "step": 144600 }, { "epoch": 0.10761084586438208, "grad_norm": 0.36508235335350037, "learning_rate": 1.9977152061815056e-05, "loss": 1.5939, "step": 144700 }, { "epoch": 0.1076852141061681, "grad_norm": 0.8143123984336853, "learning_rate": 1.997712048212827e-05, "loss": 1.5223, "step": 144800 }, { "epoch": 0.10775958234795413, "grad_norm": 0.5532248020172119, "learning_rate": 1.997708888065734e-05, "loss": 1.4799, "step": 144900 }, { "epoch": 0.10783395058974016, "grad_norm": 0.9180570244789124, "learning_rate": 1.9977057257402323e-05, "loss": 1.5015, "step": 145000 }, { "epoch": 0.10790831883152618, "grad_norm": 0.7075524926185608, "learning_rate": 1.9977025612363293e-05, "loss": 1.5023, "step": 145100 }, { "epoch": 0.10798268707331221, "grad_norm": 0.5107302665710449, "learning_rate": 1.997699394554032e-05, "loss": 1.5081, "step": 145200 }, { "epoch": 0.10805705531509824, "grad_norm": 0.7115252017974854, "learning_rate": 1.9976962256933477e-05, "loss": 1.4696, "step": 145300 }, { "epoch": 0.10813142355688427, "grad_norm": 0.4716932773590088, "learning_rate": 1.9976930546542826e-05, "loss": 1.4365, "step": 145400 }, { "epoch": 0.10820579179867029, "grad_norm": 0.6276602745056152, "learning_rate": 1.997689881436844e-05, "loss": 1.6188, "step": 145500 }, { "epoch": 0.10828016004045632, "grad_norm": 0.7316505312919617, "learning_rate": 1.9976867060410385e-05, "loss": 1.4963, "step": 145600 }, { "epoch": 0.10835452828224235, "grad_norm": 0.6350568532943726, "learning_rate": 1.9976835284668736e-05, "loss": 1.4833, "step": 145700 }, { "epoch": 0.10842889652402837, "grad_norm": 0.4657195508480072, "learning_rate": 1.997680348714356e-05, "loss": 1.5455, "step": 145800 }, { "epoch": 0.1085032647658144, "grad_norm": 0.6384357810020447, "learning_rate": 1.9976771667834923e-05, "loss": 1.4852, "step": 145900 }, { "epoch": 0.10857763300760044, "grad_norm": 0.6129224300384521, "learning_rate": 1.9976739826742898e-05, "loss": 1.5368, "step": 146000 }, { "epoch": 0.10865200124938647, "grad_norm": 0.5207227468490601, "learning_rate": 1.9976707963867553e-05, "loss": 1.5254, "step": 146100 }, { "epoch": 0.1087263694911725, "grad_norm": 0.5846359133720398, "learning_rate": 1.997667607920896e-05, "loss": 1.5261, "step": 146200 }, { "epoch": 0.10880073773295852, "grad_norm": 0.6170707941055298, "learning_rate": 1.9976644172767184e-05, "loss": 1.3581, "step": 146300 }, { "epoch": 0.10887510597474455, "grad_norm": 1.1348438262939453, "learning_rate": 1.9976612244542305e-05, "loss": 1.4225, "step": 146400 }, { "epoch": 0.10894947421653058, "grad_norm": 0.8619507551193237, "learning_rate": 1.9976580294534375e-05, "loss": 1.5115, "step": 146500 }, { "epoch": 0.1090238424583166, "grad_norm": 0.8449547290802002, "learning_rate": 1.9976548322743482e-05, "loss": 1.4859, "step": 146600 }, { "epoch": 0.10909821070010263, "grad_norm": 0.8082287907600403, "learning_rate": 1.9976516329169683e-05, "loss": 1.506, "step": 146700 }, { "epoch": 0.10917257894188866, "grad_norm": 0.9964908957481384, "learning_rate": 1.9976484313813056e-05, "loss": 1.5532, "step": 146800 }, { "epoch": 0.10924694718367468, "grad_norm": 0.4375709891319275, "learning_rate": 1.9976452276673666e-05, "loss": 1.5083, "step": 146900 }, { "epoch": 0.10932131542546071, "grad_norm": 1.0198872089385986, "learning_rate": 1.9976420217751583e-05, "loss": 1.5624, "step": 147000 }, { "epoch": 0.10939568366724674, "grad_norm": 0.5535147190093994, "learning_rate": 1.997638813704688e-05, "loss": 1.5204, "step": 147100 }, { "epoch": 0.10947005190903276, "grad_norm": 1.118886113166809, "learning_rate": 1.997635603455963e-05, "loss": 1.4608, "step": 147200 }, { "epoch": 0.10954442015081879, "grad_norm": 0.5374003052711487, "learning_rate": 1.9976323910289893e-05, "loss": 1.5399, "step": 147300 }, { "epoch": 0.10961878839260482, "grad_norm": 0.4818219840526581, "learning_rate": 1.9976291764237743e-05, "loss": 1.4356, "step": 147400 }, { "epoch": 0.10969315663439085, "grad_norm": 0.7664726376533508, "learning_rate": 1.997625959640326e-05, "loss": 1.581, "step": 147500 }, { "epoch": 0.10976752487617687, "grad_norm": 1.124408483505249, "learning_rate": 1.99762274067865e-05, "loss": 1.5478, "step": 147600 }, { "epoch": 0.1098418931179629, "grad_norm": 0.5269792675971985, "learning_rate": 1.9976195195387536e-05, "loss": 1.5336, "step": 147700 }, { "epoch": 0.10991626135974893, "grad_norm": 0.5320454835891724, "learning_rate": 1.997616296220645e-05, "loss": 1.4693, "step": 147800 }, { "epoch": 0.10999062960153497, "grad_norm": 0.7873321175575256, "learning_rate": 1.99761307072433e-05, "loss": 1.5363, "step": 147900 }, { "epoch": 0.110064997843321, "grad_norm": 0.8107410073280334, "learning_rate": 1.9976098430498164e-05, "loss": 1.4201, "step": 148000 }, { "epoch": 0.11013936608510702, "grad_norm": 0.5935773849487305, "learning_rate": 1.9976066131971105e-05, "loss": 1.4594, "step": 148100 }, { "epoch": 0.11021373432689305, "grad_norm": 0.9844542741775513, "learning_rate": 1.99760338116622e-05, "loss": 1.5579, "step": 148200 }, { "epoch": 0.11028810256867908, "grad_norm": 0.49384772777557373, "learning_rate": 1.9976001469571518e-05, "loss": 1.4472, "step": 148300 }, { "epoch": 0.1103624708104651, "grad_norm": 0.7531874775886536, "learning_rate": 1.9975969105699126e-05, "loss": 1.5922, "step": 148400 }, { "epoch": 0.11043683905225113, "grad_norm": 0.538556694984436, "learning_rate": 1.99759367200451e-05, "loss": 1.474, "step": 148500 }, { "epoch": 0.11051120729403716, "grad_norm": 1.0285570621490479, "learning_rate": 1.9975904312609507e-05, "loss": 1.5131, "step": 148600 }, { "epoch": 0.11058557553582318, "grad_norm": 0.5939409136772156, "learning_rate": 1.997587188339242e-05, "loss": 1.4839, "step": 148700 }, { "epoch": 0.11065994377760921, "grad_norm": 0.6015550494194031, "learning_rate": 1.9975839432393907e-05, "loss": 1.5108, "step": 148800 }, { "epoch": 0.11073431201939524, "grad_norm": 0.4747854769229889, "learning_rate": 1.997580695961404e-05, "loss": 1.6441, "step": 148900 }, { "epoch": 0.11080868026118126, "grad_norm": 0.7376397252082825, "learning_rate": 1.9975774465052894e-05, "loss": 1.5324, "step": 149000 }, { "epoch": 0.11088304850296729, "grad_norm": 0.9207439422607422, "learning_rate": 1.9975741948710533e-05, "loss": 1.4951, "step": 149100 }, { "epoch": 0.11095741674475332, "grad_norm": 0.5748550295829773, "learning_rate": 1.9975709410587035e-05, "loss": 1.5087, "step": 149200 }, { "epoch": 0.11103178498653934, "grad_norm": 0.9244334697723389, "learning_rate": 1.9975676850682463e-05, "loss": 1.5484, "step": 149300 }, { "epoch": 0.11110615322832537, "grad_norm": 1.3170803785324097, "learning_rate": 1.9975644268996898e-05, "loss": 1.5486, "step": 149400 }, { "epoch": 0.1111805214701114, "grad_norm": 1.0309258699417114, "learning_rate": 1.99756116655304e-05, "loss": 1.5327, "step": 149500 }, { "epoch": 0.11125488971189743, "grad_norm": 0.45877984166145325, "learning_rate": 1.9975579040283052e-05, "loss": 1.4387, "step": 149600 }, { "epoch": 0.11132925795368345, "grad_norm": 0.550827145576477, "learning_rate": 1.9975546393254915e-05, "loss": 1.5481, "step": 149700 }, { "epoch": 0.1114036261954695, "grad_norm": 0.9413292407989502, "learning_rate": 1.9975513724446064e-05, "loss": 1.5409, "step": 149800 }, { "epoch": 0.11147799443725552, "grad_norm": 0.7036787271499634, "learning_rate": 1.9975481033856574e-05, "loss": 1.5372, "step": 149900 }, { "epoch": 0.11155236267904155, "grad_norm": 0.5225023031234741, "learning_rate": 1.997544832148651e-05, "loss": 1.5228, "step": 150000 }, { "epoch": 0.11436826581592728, "grad_norm": 0.5988050699234009, "learning_rate": 1.9974193704861967e-05, "loss": 1.4864, "step": 150100 }, { "epoch": 0.1144444605299952, "grad_norm": 0.6303549408912659, "learning_rate": 1.9974159321613837e-05, "loss": 1.5625, "step": 150200 }, { "epoch": 0.1145206552440631, "grad_norm": 1.3569958209991455, "learning_rate": 1.9974124915505174e-05, "loss": 1.5084, "step": 150300 }, { "epoch": 0.114596849958131, "grad_norm": 1.1625386476516724, "learning_rate": 1.9974090486536052e-05, "loss": 1.4318, "step": 150400 }, { "epoch": 0.11467304467219891, "grad_norm": 0.4321073889732361, "learning_rate": 1.9974056034706554e-05, "loss": 1.4302, "step": 150500 }, { "epoch": 0.11474923938626681, "grad_norm": 0.6575265526771545, "learning_rate": 1.9974021560016758e-05, "loss": 1.5129, "step": 150600 }, { "epoch": 0.11482543410033472, "grad_norm": 0.48436152935028076, "learning_rate": 1.9973987062466742e-05, "loss": 1.5491, "step": 150700 }, { "epoch": 0.11490162881440263, "grad_norm": 0.7429853677749634, "learning_rate": 1.997395254205659e-05, "loss": 1.5227, "step": 150800 }, { "epoch": 0.11497782352847054, "grad_norm": 0.5442507266998291, "learning_rate": 1.9973917998786372e-05, "loss": 1.5192, "step": 150900 }, { "epoch": 0.11505401824253844, "grad_norm": 0.8323450088500977, "learning_rate": 1.9973883432656175e-05, "loss": 1.517, "step": 151000 }, { "epoch": 0.11513021295660635, "grad_norm": 1.3172342777252197, "learning_rate": 1.9973848843666076e-05, "loss": 1.5363, "step": 151100 }, { "epoch": 0.11520640767067425, "grad_norm": 0.5425537824630737, "learning_rate": 1.997381423181615e-05, "loss": 1.6003, "step": 151200 }, { "epoch": 0.11528260238474217, "grad_norm": 0.7289454340934753, "learning_rate": 1.9973779597106485e-05, "loss": 1.5291, "step": 151300 }, { "epoch": 0.11535879709881007, "grad_norm": 0.7875185012817383, "learning_rate": 1.9973744939537153e-05, "loss": 1.5457, "step": 151400 }, { "epoch": 0.11543499181287797, "grad_norm": 0.5126000046730042, "learning_rate": 1.9973710259108235e-05, "loss": 1.4877, "step": 151500 }, { "epoch": 0.11551118652694588, "grad_norm": 0.510183572769165, "learning_rate": 1.997367555581981e-05, "loss": 1.5575, "step": 151600 }, { "epoch": 0.11558738124101378, "grad_norm": 1.4079546928405762, "learning_rate": 1.9973640829671964e-05, "loss": 1.4595, "step": 151700 }, { "epoch": 0.1156635759550817, "grad_norm": 0.5035670399665833, "learning_rate": 1.9973606080664768e-05, "loss": 1.4636, "step": 151800 }, { "epoch": 0.1157397706691496, "grad_norm": 0.8873507976531982, "learning_rate": 1.9973571308798308e-05, "loss": 1.4749, "step": 151900 }, { "epoch": 0.1158159653832175, "grad_norm": 0.5963075160980225, "learning_rate": 1.9973536514072657e-05, "loss": 1.4988, "step": 152000 }, { "epoch": 0.11589216009728541, "grad_norm": 1.832845687866211, "learning_rate": 1.99735016964879e-05, "loss": 1.5538, "step": 152100 }, { "epoch": 0.11596835481135331, "grad_norm": 0.6566179394721985, "learning_rate": 1.9973466856044115e-05, "loss": 1.4901, "step": 152200 }, { "epoch": 0.11604454952542122, "grad_norm": 0.6193752288818359, "learning_rate": 1.9973431992741387e-05, "loss": 1.5294, "step": 152300 }, { "epoch": 0.11612074423948913, "grad_norm": 0.9751369953155518, "learning_rate": 1.9973397106579787e-05, "loss": 1.5526, "step": 152400 }, { "epoch": 0.11619693895355704, "grad_norm": 0.3981841504573822, "learning_rate": 1.9973362197559397e-05, "loss": 1.5374, "step": 152500 }, { "epoch": 0.11627313366762494, "grad_norm": 0.5284987688064575, "learning_rate": 1.9973327265680305e-05, "loss": 1.6299, "step": 152600 }, { "epoch": 0.11634932838169285, "grad_norm": 0.6333740949630737, "learning_rate": 1.9973292310942582e-05, "loss": 1.5475, "step": 152700 }, { "epoch": 0.11642552309576075, "grad_norm": 0.5120219588279724, "learning_rate": 1.997325733334631e-05, "loss": 1.4809, "step": 152800 }, { "epoch": 0.11650171780982867, "grad_norm": 0.7208262085914612, "learning_rate": 1.9973222332891572e-05, "loss": 1.5097, "step": 152900 }, { "epoch": 0.11657791252389657, "grad_norm": 0.5504062175750732, "learning_rate": 1.9973187309578445e-05, "loss": 1.6047, "step": 153000 }, { "epoch": 0.11665410723796447, "grad_norm": 0.8021326661109924, "learning_rate": 1.9973152263407015e-05, "loss": 1.4719, "step": 153100 }, { "epoch": 0.11673030195203238, "grad_norm": 0.42202094197273254, "learning_rate": 1.9973117194377355e-05, "loss": 1.5432, "step": 153200 }, { "epoch": 0.11680649666610028, "grad_norm": 0.9435470700263977, "learning_rate": 1.9973082102489548e-05, "loss": 1.4289, "step": 153300 }, { "epoch": 0.11688269138016819, "grad_norm": 0.5273865461349487, "learning_rate": 1.9973046987743676e-05, "loss": 1.5468, "step": 153400 }, { "epoch": 0.1169588860942361, "grad_norm": 0.6325235962867737, "learning_rate": 1.997301185013982e-05, "loss": 1.5376, "step": 153500 }, { "epoch": 0.117035080808304, "grad_norm": 0.8950610756874084, "learning_rate": 1.997297668967806e-05, "loss": 1.593, "step": 153600 }, { "epoch": 0.11711127552237191, "grad_norm": 0.729888916015625, "learning_rate": 1.9972941506358473e-05, "loss": 1.5777, "step": 153700 }, { "epoch": 0.11718747023643981, "grad_norm": 0.5422887802124023, "learning_rate": 1.9972906300181144e-05, "loss": 1.4567, "step": 153800 }, { "epoch": 0.11726366495050772, "grad_norm": 0.6046540141105652, "learning_rate": 1.997287107114615e-05, "loss": 1.5062, "step": 153900 }, { "epoch": 0.11733985966457564, "grad_norm": 1.0326173305511475, "learning_rate": 1.997283581925358e-05, "loss": 1.5147, "step": 154000 }, { "epoch": 0.11741605437864354, "grad_norm": 0.6134566068649292, "learning_rate": 1.99728005445035e-05, "loss": 1.5819, "step": 154100 }, { "epoch": 0.11749224909271144, "grad_norm": 0.8338958024978638, "learning_rate": 1.9972765246896003e-05, "loss": 1.5301, "step": 154200 }, { "epoch": 0.11756844380677935, "grad_norm": 1.287925362586975, "learning_rate": 1.9972729926431168e-05, "loss": 1.463, "step": 154300 }, { "epoch": 0.11764463852084725, "grad_norm": 0.8597989678382874, "learning_rate": 1.9972694583109075e-05, "loss": 1.5948, "step": 154400 }, { "epoch": 0.11772083323491517, "grad_norm": 0.4852593243122101, "learning_rate": 1.99726592169298e-05, "loss": 1.5409, "step": 154500 }, { "epoch": 0.11779702794898307, "grad_norm": 0.586125373840332, "learning_rate": 1.997262382789343e-05, "loss": 1.4794, "step": 154600 }, { "epoch": 0.11787322266305097, "grad_norm": 0.5508261322975159, "learning_rate": 1.9972588416000045e-05, "loss": 1.4357, "step": 154700 }, { "epoch": 0.11794941737711888, "grad_norm": 0.4626682698726654, "learning_rate": 1.9972552981249727e-05, "loss": 1.5084, "step": 154800 }, { "epoch": 0.11802561209118678, "grad_norm": 1.1998405456542969, "learning_rate": 1.9972517523642553e-05, "loss": 1.5276, "step": 154900 }, { "epoch": 0.11810180680525469, "grad_norm": 0.5986573100090027, "learning_rate": 1.997248204317861e-05, "loss": 1.4622, "step": 155000 }, { "epoch": 0.1181780015193226, "grad_norm": 0.6109502911567688, "learning_rate": 1.9972446539857975e-05, "loss": 1.5406, "step": 155100 }, { "epoch": 0.11825419623339051, "grad_norm": 0.4338931739330292, "learning_rate": 1.997241101368073e-05, "loss": 1.5146, "step": 155200 }, { "epoch": 0.11833039094745841, "grad_norm": 1.1213306188583374, "learning_rate": 1.997237546464696e-05, "loss": 1.5319, "step": 155300 }, { "epoch": 0.11840658566152631, "grad_norm": 0.5361282229423523, "learning_rate": 1.997233989275674e-05, "loss": 1.544, "step": 155400 }, { "epoch": 0.11848278037559422, "grad_norm": 0.6806920170783997, "learning_rate": 1.9972304298010158e-05, "loss": 1.6291, "step": 155500 }, { "epoch": 0.11855897508966214, "grad_norm": 0.5343062877655029, "learning_rate": 1.9972268680407293e-05, "loss": 1.5001, "step": 155600 }, { "epoch": 0.11863516980373004, "grad_norm": 0.451333612203598, "learning_rate": 1.9972233039948227e-05, "loss": 1.542, "step": 155700 }, { "epoch": 0.11871136451779794, "grad_norm": 0.6382780075073242, "learning_rate": 1.9972197376633037e-05, "loss": 1.5129, "step": 155800 }, { "epoch": 0.11878755923186585, "grad_norm": 0.42809566855430603, "learning_rate": 1.9972161690461812e-05, "loss": 1.5017, "step": 155900 }, { "epoch": 0.11886375394593375, "grad_norm": 0.577850341796875, "learning_rate": 1.9972125981434633e-05, "loss": 1.478, "step": 156000 }, { "epoch": 0.11893994866000165, "grad_norm": 0.5123291611671448, "learning_rate": 1.9972090249551574e-05, "loss": 1.5562, "step": 156100 }, { "epoch": 0.11901614337406957, "grad_norm": 0.8542094826698303, "learning_rate": 1.9972054494812726e-05, "loss": 1.5455, "step": 156200 }, { "epoch": 0.11909233808813748, "grad_norm": 0.7365400791168213, "learning_rate": 1.9972018717218166e-05, "loss": 1.5988, "step": 156300 }, { "epoch": 0.11916853280220538, "grad_norm": 0.6100090146064758, "learning_rate": 1.997198291676798e-05, "loss": 1.516, "step": 156400 }, { "epoch": 0.11924472751627328, "grad_norm": 0.4397481083869934, "learning_rate": 1.9971947093462242e-05, "loss": 1.5471, "step": 156500 }, { "epoch": 0.11932092223034119, "grad_norm": 1.1678694486618042, "learning_rate": 1.9971911247301045e-05, "loss": 1.551, "step": 156600 }, { "epoch": 0.1193971169444091, "grad_norm": 0.6791478991508484, "learning_rate": 1.9971875378284463e-05, "loss": 1.478, "step": 156700 }, { "epoch": 0.11947331165847701, "grad_norm": 0.6726185083389282, "learning_rate": 1.997183948641258e-05, "loss": 1.5454, "step": 156800 }, { "epoch": 0.11954950637254491, "grad_norm": 1.0643079280853271, "learning_rate": 1.9971803571685483e-05, "loss": 1.4491, "step": 156900 }, { "epoch": 0.11962570108661282, "grad_norm": 0.5203260183334351, "learning_rate": 1.9971767634103247e-05, "loss": 1.498, "step": 157000 }, { "epoch": 0.11970189580068072, "grad_norm": 0.8281683325767517, "learning_rate": 1.997173167366596e-05, "loss": 1.5104, "step": 157100 }, { "epoch": 0.11977809051474864, "grad_norm": 0.6222112774848938, "learning_rate": 1.99716956903737e-05, "loss": 1.6362, "step": 157200 }, { "epoch": 0.11985428522881654, "grad_norm": 0.63880455493927, "learning_rate": 1.9971659684226555e-05, "loss": 1.5631, "step": 157300 }, { "epoch": 0.11993047994288444, "grad_norm": 1.1247291564941406, "learning_rate": 1.99716236552246e-05, "loss": 1.5373, "step": 157400 }, { "epoch": 0.12000667465695235, "grad_norm": 0.8027576208114624, "learning_rate": 1.9971587603367926e-05, "loss": 1.5959, "step": 157500 }, { "epoch": 0.12008286937102025, "grad_norm": 0.5778335928916931, "learning_rate": 1.997155152865661e-05, "loss": 1.5341, "step": 157600 }, { "epoch": 0.12015906408508815, "grad_norm": 0.46868038177490234, "learning_rate": 1.9971515431090735e-05, "loss": 1.5303, "step": 157700 }, { "epoch": 0.12023525879915607, "grad_norm": 1.1025221347808838, "learning_rate": 1.9971479310670383e-05, "loss": 1.5247, "step": 157800 }, { "epoch": 0.12031145351322398, "grad_norm": 0.5243988037109375, "learning_rate": 1.9971443167395643e-05, "loss": 1.4942, "step": 157900 }, { "epoch": 0.12038764822729188, "grad_norm": 0.5254366993904114, "learning_rate": 1.997140700126659e-05, "loss": 1.5076, "step": 158000 }, { "epoch": 0.12046384294135978, "grad_norm": 1.0118021965026855, "learning_rate": 1.9971370812283313e-05, "loss": 1.5534, "step": 158100 }, { "epoch": 0.12054003765542769, "grad_norm": 1.2398056983947754, "learning_rate": 1.9971334600445894e-05, "loss": 1.4548, "step": 158200 }, { "epoch": 0.1206162323694956, "grad_norm": 0.5367677211761475, "learning_rate": 1.997129836575441e-05, "loss": 1.534, "step": 158300 }, { "epoch": 0.12069242708356351, "grad_norm": 0.5356037616729736, "learning_rate": 1.997126210820895e-05, "loss": 1.4956, "step": 158400 }, { "epoch": 0.12076862179763141, "grad_norm": 0.3639816641807556, "learning_rate": 1.9971225827809597e-05, "loss": 1.5126, "step": 158500 }, { "epoch": 0.12084481651169932, "grad_norm": 0.5793240666389465, "learning_rate": 1.997118952455643e-05, "loss": 1.4941, "step": 158600 }, { "epoch": 0.12092101122576722, "grad_norm": 1.010176181793213, "learning_rate": 1.9971153198449537e-05, "loss": 1.459, "step": 158700 }, { "epoch": 0.12099720593983512, "grad_norm": 0.5604076981544495, "learning_rate": 1.9971116849488997e-05, "loss": 1.506, "step": 158800 }, { "epoch": 0.12107340065390304, "grad_norm": 0.555228054523468, "learning_rate": 1.99710804776749e-05, "loss": 1.4672, "step": 158900 }, { "epoch": 0.12114959536797094, "grad_norm": 0.5489868521690369, "learning_rate": 1.997104408300732e-05, "loss": 1.494, "step": 159000 }, { "epoch": 0.12122579008203885, "grad_norm": 1.0183372497558594, "learning_rate": 1.9971007665486348e-05, "loss": 1.4679, "step": 159100 }, { "epoch": 0.12130198479610675, "grad_norm": 1.1157522201538086, "learning_rate": 1.9970971225112063e-05, "loss": 1.505, "step": 159200 }, { "epoch": 0.12137817951017466, "grad_norm": 0.47557663917541504, "learning_rate": 1.9970934761884552e-05, "loss": 1.5367, "step": 159300 }, { "epoch": 0.12145437422424257, "grad_norm": 0.6817649602890015, "learning_rate": 1.9970898275803896e-05, "loss": 1.4863, "step": 159400 }, { "epoch": 0.12153056893831048, "grad_norm": 0.7250083088874817, "learning_rate": 1.9970861766870178e-05, "loss": 1.569, "step": 159500 }, { "epoch": 0.12160676365237838, "grad_norm": 0.7831650972366333, "learning_rate": 1.9970825235083485e-05, "loss": 1.5117, "step": 159600 }, { "epoch": 0.12168295836644628, "grad_norm": 0.6908311247825623, "learning_rate": 1.9970788680443897e-05, "loss": 1.4854, "step": 159700 }, { "epoch": 0.12175915308051419, "grad_norm": 0.9241918325424194, "learning_rate": 1.9970752102951502e-05, "loss": 1.5293, "step": 159800 }, { "epoch": 0.1218353477945821, "grad_norm": 0.9859775304794312, "learning_rate": 1.9970715502606375e-05, "loss": 1.5101, "step": 159900 }, { "epoch": 0.12191154250865001, "grad_norm": 0.7927902340888977, "learning_rate": 1.9970678879408615e-05, "loss": 1.4263, "step": 160000 }, { "epoch": 0.12198773722271791, "grad_norm": 0.5331178307533264, "learning_rate": 1.997064223335829e-05, "loss": 1.5712, "step": 160100 }, { "epoch": 0.12206393193678582, "grad_norm": 0.5178743600845337, "learning_rate": 1.9970605564455497e-05, "loss": 1.6199, "step": 160200 }, { "epoch": 0.12214012665085372, "grad_norm": 0.42395228147506714, "learning_rate": 1.997056887270031e-05, "loss": 1.5975, "step": 160300 }, { "epoch": 0.12221632136492162, "grad_norm": 0.6175497174263, "learning_rate": 1.997053215809282e-05, "loss": 1.5727, "step": 160400 }, { "epoch": 0.12229251607898954, "grad_norm": 2.8108174800872803, "learning_rate": 1.997049542063311e-05, "loss": 1.5663, "step": 160500 }, { "epoch": 0.12236871079305744, "grad_norm": 0.6319596171379089, "learning_rate": 1.997045866032126e-05, "loss": 1.5432, "step": 160600 }, { "epoch": 0.12244490550712535, "grad_norm": 1.1011673212051392, "learning_rate": 1.9970421877157357e-05, "loss": 1.4975, "step": 160700 }, { "epoch": 0.12252110022119325, "grad_norm": 0.6303352117538452, "learning_rate": 1.9970385071141488e-05, "loss": 1.5822, "step": 160800 }, { "epoch": 0.12259729493526116, "grad_norm": 0.9685313105583191, "learning_rate": 1.9970348242273733e-05, "loss": 1.5925, "step": 160900 }, { "epoch": 0.12267348964932907, "grad_norm": 1.5857430696487427, "learning_rate": 1.9970311390554177e-05, "loss": 1.5182, "step": 161000 }, { "epoch": 0.12274968436339698, "grad_norm": 0.6541360020637512, "learning_rate": 1.9970274515982906e-05, "loss": 1.5493, "step": 161100 }, { "epoch": 0.12282587907746488, "grad_norm": 0.6853638291358948, "learning_rate": 1.997023761856e-05, "loss": 1.5404, "step": 161200 }, { "epoch": 0.12290207379153278, "grad_norm": 0.5925423502922058, "learning_rate": 1.9970200698285556e-05, "loss": 1.4905, "step": 161300 }, { "epoch": 0.12297826850560069, "grad_norm": 1.2050243616104126, "learning_rate": 1.9970163755159647e-05, "loss": 1.4973, "step": 161400 }, { "epoch": 0.1230544632196686, "grad_norm": 0.4524605870246887, "learning_rate": 1.9970126789182357e-05, "loss": 1.4202, "step": 161500 }, { "epoch": 0.12313065793373651, "grad_norm": 0.5339109301567078, "learning_rate": 1.9970089800353782e-05, "loss": 1.49, "step": 161600 }, { "epoch": 0.12320685264780441, "grad_norm": 0.7778879404067993, "learning_rate": 1.9970052788673992e-05, "loss": 1.545, "step": 161700 }, { "epoch": 0.12328304736187232, "grad_norm": 0.9982026815414429, "learning_rate": 1.997001575414308e-05, "loss": 1.5049, "step": 161800 }, { "epoch": 0.12335924207594022, "grad_norm": 0.6620715856552124, "learning_rate": 1.9969978696761134e-05, "loss": 1.5412, "step": 161900 }, { "epoch": 0.12343543679000812, "grad_norm": 0.5564178824424744, "learning_rate": 1.9969941616528234e-05, "loss": 1.5256, "step": 162000 }, { "epoch": 0.12351163150407604, "grad_norm": 0.5725895762443542, "learning_rate": 1.9969904513444465e-05, "loss": 1.5294, "step": 162100 }, { "epoch": 0.12358782621814395, "grad_norm": 1.0373424291610718, "learning_rate": 1.9969867387509913e-05, "loss": 1.4766, "step": 162200 }, { "epoch": 0.12366402093221185, "grad_norm": 0.4091905355453491, "learning_rate": 1.9969830238724664e-05, "loss": 1.6414, "step": 162300 }, { "epoch": 0.12374021564627975, "grad_norm": 0.5299661755561829, "learning_rate": 1.99697930670888e-05, "loss": 1.4541, "step": 162400 }, { "epoch": 0.12381641036034766, "grad_norm": 0.5670778155326843, "learning_rate": 1.9969755872602412e-05, "loss": 1.4952, "step": 162500 }, { "epoch": 0.12389260507441557, "grad_norm": 0.5143985748291016, "learning_rate": 1.996971865526558e-05, "loss": 1.4435, "step": 162600 }, { "epoch": 0.12396879978848348, "grad_norm": 1.0555393695831299, "learning_rate": 1.9969681415078393e-05, "loss": 1.5321, "step": 162700 }, { "epoch": 0.12404499450255138, "grad_norm": 1.1085857152938843, "learning_rate": 1.9969644152040933e-05, "loss": 1.486, "step": 162800 }, { "epoch": 0.12412118921661928, "grad_norm": 0.726034939289093, "learning_rate": 1.9969606866153285e-05, "loss": 1.5553, "step": 162900 }, { "epoch": 0.12419738393068719, "grad_norm": 1.0111762285232544, "learning_rate": 1.9969569557415538e-05, "loss": 1.4017, "step": 163000 }, { "epoch": 0.12427357864475509, "grad_norm": 0.9694646000862122, "learning_rate": 1.9969532225827776e-05, "loss": 1.517, "step": 163100 }, { "epoch": 0.12434977335882301, "grad_norm": 0.5885933041572571, "learning_rate": 1.9969494871390084e-05, "loss": 1.5469, "step": 163200 }, { "epoch": 0.12442596807289091, "grad_norm": 1.0851953029632568, "learning_rate": 1.9969457494102546e-05, "loss": 1.5137, "step": 163300 }, { "epoch": 0.12450216278695882, "grad_norm": 0.9169263243675232, "learning_rate": 1.996942009396525e-05, "loss": 1.6183, "step": 163400 }, { "epoch": 0.12457835750102672, "grad_norm": 0.7932730913162231, "learning_rate": 1.9969382670978287e-05, "loss": 1.5962, "step": 163500 }, { "epoch": 0.12465455221509462, "grad_norm": 0.8161057233810425, "learning_rate": 1.9969345225141732e-05, "loss": 1.5505, "step": 163600 }, { "epoch": 0.12473074692916254, "grad_norm": 0.548876166343689, "learning_rate": 1.9969307756455677e-05, "loss": 1.5297, "step": 163700 }, { "epoch": 0.12480694164323045, "grad_norm": 1.436046838760376, "learning_rate": 1.9969270264920207e-05, "loss": 1.4394, "step": 163800 }, { "epoch": 0.12488313635729835, "grad_norm": 0.47205379605293274, "learning_rate": 1.996923275053541e-05, "loss": 1.5296, "step": 163900 }, { "epoch": 0.12495933107136625, "grad_norm": 1.0708388090133667, "learning_rate": 1.9969195213301363e-05, "loss": 1.5846, "step": 164000 }, { "epoch": 0.12503552578543417, "grad_norm": 0.5648390054702759, "learning_rate": 1.9969157653218166e-05, "loss": 1.4987, "step": 164100 }, { "epoch": 0.12511172049950206, "grad_norm": 0.5198548436164856, "learning_rate": 1.9969120070285896e-05, "loss": 1.5782, "step": 164200 }, { "epoch": 0.12518791521356998, "grad_norm": 0.8251690864562988, "learning_rate": 1.9969082464504638e-05, "loss": 1.432, "step": 164300 }, { "epoch": 0.12526410992763787, "grad_norm": 0.5741057991981506, "learning_rate": 1.9969044835874483e-05, "loss": 1.5602, "step": 164400 }, { "epoch": 0.12534030464170579, "grad_norm": 0.7008639574050903, "learning_rate": 1.9969007184395516e-05, "loss": 1.4793, "step": 164500 }, { "epoch": 0.1254164993557737, "grad_norm": 0.6717759370803833, "learning_rate": 1.996896951006782e-05, "loss": 1.6313, "step": 164600 }, { "epoch": 0.1254926940698416, "grad_norm": 0.5328442454338074, "learning_rate": 1.9968931812891488e-05, "loss": 1.456, "step": 164700 }, { "epoch": 0.1255688887839095, "grad_norm": 0.6115724444389343, "learning_rate": 1.9968894092866603e-05, "loss": 1.5155, "step": 164800 }, { "epoch": 0.1256450834979774, "grad_norm": 0.6921473145484924, "learning_rate": 1.9968856349993246e-05, "loss": 1.4825, "step": 164900 }, { "epoch": 0.12572127821204532, "grad_norm": 0.5495848059654236, "learning_rate": 1.9968818584271513e-05, "loss": 1.5661, "step": 165000 }, { "epoch": 0.12579747292611324, "grad_norm": 0.42934536933898926, "learning_rate": 1.9968780795701483e-05, "loss": 1.5129, "step": 165100 }, { "epoch": 0.12587366764018112, "grad_norm": 0.8496658205986023, "learning_rate": 1.9968742984283248e-05, "loss": 1.4722, "step": 165200 }, { "epoch": 0.12594986235424904, "grad_norm": 0.6336967945098877, "learning_rate": 1.996870515001689e-05, "loss": 1.5512, "step": 165300 }, { "epoch": 0.12602605706831693, "grad_norm": 1.8016417026519775, "learning_rate": 1.99686672929025e-05, "loss": 1.5887, "step": 165400 }, { "epoch": 0.12610225178238485, "grad_norm": 0.4638715982437134, "learning_rate": 1.9968629412940163e-05, "loss": 1.465, "step": 165500 }, { "epoch": 0.12617844649645277, "grad_norm": 0.49076393246650696, "learning_rate": 1.9968591510129962e-05, "loss": 1.453, "step": 165600 }, { "epoch": 0.12625464121052066, "grad_norm": 1.2023130655288696, "learning_rate": 1.9968553584471994e-05, "loss": 1.5668, "step": 165700 }, { "epoch": 0.12633083592458857, "grad_norm": 0.3392730951309204, "learning_rate": 1.996851563596633e-05, "loss": 1.4765, "step": 165800 }, { "epoch": 0.12640703063865646, "grad_norm": 0.6124337911605835, "learning_rate": 1.9968477664613073e-05, "loss": 1.4623, "step": 165900 }, { "epoch": 0.12648322535272438, "grad_norm": 0.9165328145027161, "learning_rate": 1.9968439670412304e-05, "loss": 1.5158, "step": 166000 }, { "epoch": 0.1265594200667923, "grad_norm": 0.8542162179946899, "learning_rate": 1.9968401653364108e-05, "loss": 1.5929, "step": 166100 }, { "epoch": 0.1266356147808602, "grad_norm": 0.5667473673820496, "learning_rate": 1.996836361346857e-05, "loss": 1.5773, "step": 166200 }, { "epoch": 0.1267118094949281, "grad_norm": 0.5652046799659729, "learning_rate": 1.9968325550725787e-05, "loss": 1.4871, "step": 166300 }, { "epoch": 0.126788004208996, "grad_norm": 0.7593627572059631, "learning_rate": 1.9968287465135835e-05, "loss": 1.5537, "step": 166400 }, { "epoch": 0.12686419892306391, "grad_norm": 0.5405901074409485, "learning_rate": 1.996824935669881e-05, "loss": 1.598, "step": 166500 }, { "epoch": 0.1269403936371318, "grad_norm": 0.5542206168174744, "learning_rate": 1.9968211225414793e-05, "loss": 1.4142, "step": 166600 }, { "epoch": 0.12701658835119972, "grad_norm": 0.6430229544639587, "learning_rate": 1.9968173071283873e-05, "loss": 1.5918, "step": 166700 }, { "epoch": 0.12709278306526764, "grad_norm": 0.8698261380195618, "learning_rate": 1.9968134894306143e-05, "loss": 1.4926, "step": 166800 }, { "epoch": 0.12716897777933553, "grad_norm": 0.8542900085449219, "learning_rate": 1.9968096694481685e-05, "loss": 1.552, "step": 166900 }, { "epoch": 0.12724517249340345, "grad_norm": 1.6189239025115967, "learning_rate": 1.9968058471810584e-05, "loss": 1.5779, "step": 167000 }, { "epoch": 0.12732136720747134, "grad_norm": 1.0285084247589111, "learning_rate": 1.9968020226292933e-05, "loss": 1.5457, "step": 167100 }, { "epoch": 0.12739756192153925, "grad_norm": 1.0399446487426758, "learning_rate": 1.9967981957928815e-05, "loss": 1.47, "step": 167200 }, { "epoch": 0.12747375663560717, "grad_norm": 0.5055843591690063, "learning_rate": 1.9967943666718325e-05, "loss": 1.5169, "step": 167300 }, { "epoch": 0.12754995134967506, "grad_norm": 0.47481849789619446, "learning_rate": 1.9967905352661544e-05, "loss": 1.4915, "step": 167400 }, { "epoch": 0.12762614606374298, "grad_norm": 0.8346291184425354, "learning_rate": 1.9967867015758566e-05, "loss": 1.53, "step": 167500 }, { "epoch": 0.12770234077781087, "grad_norm": 1.12838613986969, "learning_rate": 1.9967828656009473e-05, "loss": 1.5401, "step": 167600 }, { "epoch": 0.1277785354918788, "grad_norm": 1.228826880455017, "learning_rate": 1.9967790273414353e-05, "loss": 1.5334, "step": 167700 }, { "epoch": 0.1278547302059467, "grad_norm": 0.9010761976242065, "learning_rate": 1.9967751867973297e-05, "loss": 1.5041, "step": 167800 }, { "epoch": 0.1279309249200146, "grad_norm": 0.7046778798103333, "learning_rate": 1.9967713439686388e-05, "loss": 1.5213, "step": 167900 }, { "epoch": 0.1280071196340825, "grad_norm": 0.5042731761932373, "learning_rate": 1.9967674988553726e-05, "loss": 1.5017, "step": 168000 }, { "epoch": 0.1280833143481504, "grad_norm": 1.1500872373580933, "learning_rate": 1.9967636514575382e-05, "loss": 1.5048, "step": 168100 }, { "epoch": 0.12815950906221832, "grad_norm": 0.8680156469345093, "learning_rate": 1.996759801775146e-05, "loss": 1.5039, "step": 168200 }, { "epoch": 0.12823570377628624, "grad_norm": 0.509965181350708, "learning_rate": 1.996755949808204e-05, "loss": 1.5892, "step": 168300 }, { "epoch": 0.12831189849035413, "grad_norm": 0.6812742948532104, "learning_rate": 1.9967520955567212e-05, "loss": 1.5074, "step": 168400 }, { "epoch": 0.12838809320442204, "grad_norm": 0.7015091776847839, "learning_rate": 1.996748239020706e-05, "loss": 1.479, "step": 168500 }, { "epoch": 0.12846428791848993, "grad_norm": 1.2768102884292603, "learning_rate": 1.9967443802001683e-05, "loss": 1.5052, "step": 168600 }, { "epoch": 0.12854048263255785, "grad_norm": 0.8296292424201965, "learning_rate": 1.996740519095116e-05, "loss": 1.5507, "step": 168700 }, { "epoch": 0.12861667734662577, "grad_norm": 0.6702276468276978, "learning_rate": 1.9967366557055583e-05, "loss": 1.5176, "step": 168800 }, { "epoch": 0.12869287206069366, "grad_norm": 1.0063743591308594, "learning_rate": 1.9967327900315038e-05, "loss": 1.6116, "step": 168900 }, { "epoch": 0.12876906677476158, "grad_norm": 0.8738806843757629, "learning_rate": 1.9967289220729618e-05, "loss": 1.4699, "step": 169000 }, { "epoch": 0.12884526148882947, "grad_norm": 0.6948505640029907, "learning_rate": 1.9967250518299407e-05, "loss": 1.5045, "step": 169100 }, { "epoch": 0.12892145620289738, "grad_norm": 0.7673490643501282, "learning_rate": 1.9967211793024498e-05, "loss": 1.5626, "step": 169200 }, { "epoch": 0.12899765091696527, "grad_norm": 0.46138235926628113, "learning_rate": 1.9967173044904978e-05, "loss": 1.444, "step": 169300 }, { "epoch": 0.1290738456310332, "grad_norm": 0.6339269876480103, "learning_rate": 1.9967134273940935e-05, "loss": 1.3973, "step": 169400 }, { "epoch": 0.1291500403451011, "grad_norm": 0.49346569180488586, "learning_rate": 1.9967095480132458e-05, "loss": 1.5075, "step": 169500 }, { "epoch": 0.129226235059169, "grad_norm": 0.6539453864097595, "learning_rate": 1.9967056663479634e-05, "loss": 1.4849, "step": 169600 }, { "epoch": 0.12930242977323692, "grad_norm": 1.1009365320205688, "learning_rate": 1.9967017823982555e-05, "loss": 1.4938, "step": 169700 }, { "epoch": 0.1293786244873048, "grad_norm": 1.0660845041275024, "learning_rate": 1.9966978961641313e-05, "loss": 1.541, "step": 169800 }, { "epoch": 0.12945481920137272, "grad_norm": 1.467763900756836, "learning_rate": 1.996694007645599e-05, "loss": 1.5815, "step": 169900 }, { "epoch": 0.12953101391544064, "grad_norm": 0.9566108584403992, "learning_rate": 1.9966901168426677e-05, "loss": 1.5023, "step": 170000 }, { "epoch": 0.12960720862950853, "grad_norm": 1.4857321977615356, "learning_rate": 1.996686223755347e-05, "loss": 1.5056, "step": 170100 }, { "epoch": 0.12968340334357645, "grad_norm": 0.5650294423103333, "learning_rate": 1.996682328383645e-05, "loss": 1.5012, "step": 170200 }, { "epoch": 0.12975959805764434, "grad_norm": 0.43829429149627686, "learning_rate": 1.996678430727571e-05, "loss": 1.5582, "step": 170300 }, { "epoch": 0.12983579277171226, "grad_norm": 0.732196033000946, "learning_rate": 1.9966745307871336e-05, "loss": 1.4943, "step": 170400 }, { "epoch": 0.12991198748578017, "grad_norm": 0.7197682857513428, "learning_rate": 1.9966706285623423e-05, "loss": 1.5642, "step": 170500 }, { "epoch": 0.12998818219984806, "grad_norm": 0.5568147897720337, "learning_rate": 1.9966667240532057e-05, "loss": 1.4854, "step": 170600 }, { "epoch": 0.13006437691391598, "grad_norm": 0.7193698287010193, "learning_rate": 1.9966628172597328e-05, "loss": 1.5598, "step": 170700 }, { "epoch": 0.13014057162798387, "grad_norm": 0.6384301781654358, "learning_rate": 1.996658908181932e-05, "loss": 1.5478, "step": 170800 }, { "epoch": 0.1302167663420518, "grad_norm": 0.6799852848052979, "learning_rate": 1.9966549968198134e-05, "loss": 1.5287, "step": 170900 }, { "epoch": 0.1302929610561197, "grad_norm": 0.8556188941001892, "learning_rate": 1.9966510831733852e-05, "loss": 1.532, "step": 171000 }, { "epoch": 0.1303691557701876, "grad_norm": 1.057260513305664, "learning_rate": 1.9966471672426565e-05, "loss": 1.4975, "step": 171100 }, { "epoch": 0.1304453504842555, "grad_norm": 0.4696371257305145, "learning_rate": 1.9966432490276365e-05, "loss": 1.5812, "step": 171200 }, { "epoch": 0.1305215451983234, "grad_norm": 1.1697537899017334, "learning_rate": 1.9966393285283336e-05, "loss": 1.5089, "step": 171300 }, { "epoch": 0.13059773991239132, "grad_norm": 0.43043676018714905, "learning_rate": 1.9966354057447574e-05, "loss": 1.5434, "step": 171400 }, { "epoch": 0.13067393462645924, "grad_norm": 0.6243734955787659, "learning_rate": 1.9966314806769166e-05, "loss": 1.5369, "step": 171500 }, { "epoch": 0.13075012934052713, "grad_norm": 1.4543743133544922, "learning_rate": 1.9966275533248203e-05, "loss": 1.5123, "step": 171600 }, { "epoch": 0.13082632405459504, "grad_norm": 0.6289095878601074, "learning_rate": 1.9966236236884775e-05, "loss": 1.5214, "step": 171700 }, { "epoch": 0.13090251876866293, "grad_norm": 0.7222109436988831, "learning_rate": 1.9966196917678972e-05, "loss": 1.5727, "step": 171800 }, { "epoch": 0.13097871348273085, "grad_norm": 0.59749835729599, "learning_rate": 1.9966157575630883e-05, "loss": 1.4823, "step": 171900 }, { "epoch": 0.13105490819679874, "grad_norm": 0.6601287126541138, "learning_rate": 1.9966118210740602e-05, "loss": 1.4965, "step": 172000 }, { "epoch": 0.13113110291086666, "grad_norm": 0.6995854377746582, "learning_rate": 1.996607882300821e-05, "loss": 1.6288, "step": 172100 }, { "epoch": 0.13120729762493458, "grad_norm": 0.5496791005134583, "learning_rate": 1.9966039412433808e-05, "loss": 1.5919, "step": 172200 }, { "epoch": 0.13128349233900247, "grad_norm": 0.8692256808280945, "learning_rate": 1.996599997901748e-05, "loss": 1.6002, "step": 172300 }, { "epoch": 0.13135968705307038, "grad_norm": 0.9191569685935974, "learning_rate": 1.996596052275932e-05, "loss": 1.5559, "step": 172400 } ], "logging_steps": 100, "max_steps": 6562135, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3486437306994196e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }