{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999806054964023, "eval_steps": 500, "global_step": 12890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 7.519832075963106, "learning_rate": 5.16795865633075e-08, "loss": 1.0553, "step": 1 }, { "epoch": 0.0, "grad_norm": 5.888495993953021, "learning_rate": 1.03359173126615e-07, "loss": 1.0691, "step": 2 }, { "epoch": 0.0, "grad_norm": 7.777014625287366, "learning_rate": 1.5503875968992249e-07, "loss": 1.0115, "step": 3 }, { "epoch": 0.0, "grad_norm": 8.288166885816697, "learning_rate": 2.0671834625323e-07, "loss": 1.0531, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.728059218918297, "learning_rate": 2.583979328165375e-07, "loss": 1.0227, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.903468059340747, "learning_rate": 3.1007751937984497e-07, "loss": 1.071, "step": 6 }, { "epoch": 0.0, "grad_norm": 7.673274081631383, "learning_rate": 3.6175710594315246e-07, "loss": 1.1014, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.127045187299337, "learning_rate": 4.1343669250646e-07, "loss": 1.0198, "step": 8 }, { "epoch": 0.0, "grad_norm": 8.42937660527831, "learning_rate": 4.651162790697675e-07, "loss": 1.0276, "step": 9 }, { "epoch": 0.0, "grad_norm": 7.625662352763816, "learning_rate": 5.16795865633075e-07, "loss": 0.9806, "step": 10 }, { "epoch": 0.0, "grad_norm": 6.336066643354788, "learning_rate": 5.684754521963825e-07, "loss": 1.0316, "step": 11 }, { "epoch": 0.0, "grad_norm": 6.462067474116861, "learning_rate": 6.201550387596899e-07, "loss": 1.0035, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.403141428987006, "learning_rate": 6.718346253229975e-07, "loss": 1.0206, "step": 13 }, { "epoch": 0.0, "grad_norm": 6.128595347934199, "learning_rate": 7.235142118863049e-07, "loss": 0.9459, "step": 14 }, { "epoch": 0.0, "grad_norm": 5.395879777195401, "learning_rate": 7.751937984496125e-07, "loss": 0.9791, "step": 15 }, { "epoch": 0.0, "grad_norm": 6.316994153124984, "learning_rate": 8.2687338501292e-07, "loss": 0.8712, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.630215579253789, "learning_rate": 8.785529715762274e-07, "loss": 0.9682, "step": 17 }, { "epoch": 0.0, "grad_norm": 5.294689706674586, "learning_rate": 9.30232558139535e-07, "loss": 0.9027, "step": 18 }, { "epoch": 0.0, "grad_norm": 4.422215964176046, "learning_rate": 9.819121447028424e-07, "loss": 0.9334, "step": 19 }, { "epoch": 0.0, "grad_norm": 3.3414770939835425, "learning_rate": 1.03359173126615e-06, "loss": 0.9431, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.195632731985836, "learning_rate": 1.0852713178294575e-06, "loss": 0.934, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.612591981865614, "learning_rate": 1.136950904392765e-06, "loss": 0.9314, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.6314997357443173, "learning_rate": 1.1886304909560723e-06, "loss": 0.8186, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.764521836778098, "learning_rate": 1.2403100775193799e-06, "loss": 0.8507, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.9395074680919357, "learning_rate": 1.2919896640826874e-06, "loss": 0.8617, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.609294136301998, "learning_rate": 1.343669250645995e-06, "loss": 0.8471, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.4551541628476996, "learning_rate": 1.3953488372093025e-06, "loss": 0.8961, "step": 27 }, { "epoch": 0.0, "grad_norm": 2.71237206223252, "learning_rate": 1.4470284237726098e-06, "loss": 0.8651, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.414596350445245, "learning_rate": 1.4987080103359176e-06, "loss": 0.8452, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.522648076681847, "learning_rate": 1.550387596899225e-06, "loss": 0.8308, "step": 30 }, { "epoch": 0.0, "grad_norm": 2.482017790337925, "learning_rate": 1.6020671834625322e-06, "loss": 0.7951, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.694296117241535, "learning_rate": 1.65374677002584e-06, "loss": 0.8104, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.2735440468482206, "learning_rate": 1.7054263565891473e-06, "loss": 0.878, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.460717666737549, "learning_rate": 1.7571059431524549e-06, "loss": 0.7748, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.615230363724519, "learning_rate": 1.8087855297157624e-06, "loss": 0.8503, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.0739543624168046, "learning_rate": 1.86046511627907e-06, "loss": 0.7561, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.098811848895224, "learning_rate": 1.9121447028423773e-06, "loss": 0.7309, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.4512717663597585, "learning_rate": 1.963824289405685e-06, "loss": 0.8263, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.2516002262721893, "learning_rate": 2.0155038759689923e-06, "loss": 0.8068, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.596235532100443, "learning_rate": 2.0671834625323e-06, "loss": 0.8449, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.545090219663501, "learning_rate": 2.1188630490956074e-06, "loss": 0.8061, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.363567341283558, "learning_rate": 2.170542635658915e-06, "loss": 0.8206, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.163689095682903, "learning_rate": 2.222222222222222e-06, "loss": 0.7695, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.2080585620688122, "learning_rate": 2.27390180878553e-06, "loss": 0.7675, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.3873360143137075, "learning_rate": 2.3255813953488376e-06, "loss": 0.7691, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.494632490226066, "learning_rate": 2.3772609819121447e-06, "loss": 0.7957, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.244123945182519, "learning_rate": 2.4289405684754527e-06, "loss": 0.7594, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.277922201876151, "learning_rate": 2.4806201550387598e-06, "loss": 0.7248, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.0354233345018833, "learning_rate": 2.5322997416020673e-06, "loss": 0.7247, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.1956830238083183, "learning_rate": 2.583979328165375e-06, "loss": 0.7942, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.1086478781492746, "learning_rate": 2.635658914728683e-06, "loss": 0.7626, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.212343665165485, "learning_rate": 2.68733850129199e-06, "loss": 0.7707, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.185894513201737, "learning_rate": 2.7390180878552975e-06, "loss": 0.7752, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.391570359350091, "learning_rate": 2.790697674418605e-06, "loss": 0.7444, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.625443466510659, "learning_rate": 2.842377260981912e-06, "loss": 0.7962, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.8983609533379908, "learning_rate": 2.8940568475452197e-06, "loss": 0.7051, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.1952030249616508, "learning_rate": 2.9457364341085276e-06, "loss": 0.7571, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.077316241532325, "learning_rate": 2.997416020671835e-06, "loss": 0.7287, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.227086591108584, "learning_rate": 3.0490956072351423e-06, "loss": 0.7853, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.2554648050155466, "learning_rate": 3.10077519379845e-06, "loss": 0.7785, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.1596438471020365, "learning_rate": 3.1524547803617574e-06, "loss": 0.8295, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.282697453560096, "learning_rate": 3.2041343669250645e-06, "loss": 0.7639, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.758532025047504, "learning_rate": 3.2558139534883724e-06, "loss": 0.687, "step": 63 }, { "epoch": 0.0, "grad_norm": 2.2299768870913597, "learning_rate": 3.30749354005168e-06, "loss": 0.7668, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.111093569844413, "learning_rate": 3.3591731266149875e-06, "loss": 0.8231, "step": 65 }, { "epoch": 0.01, "grad_norm": 2.1534840631827485, "learning_rate": 3.4108527131782946e-06, "loss": 0.7143, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.216368458223642, "learning_rate": 3.462532299741602e-06, "loss": 0.7461, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.3280494216837937, "learning_rate": 3.5142118863049097e-06, "loss": 0.7476, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.241321356552836, "learning_rate": 3.5658914728682177e-06, "loss": 0.7498, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.1221608661794136, "learning_rate": 3.617571059431525e-06, "loss": 0.7522, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.0474193518921426, "learning_rate": 3.6692506459948323e-06, "loss": 0.8124, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.9518444288280057, "learning_rate": 3.72093023255814e-06, "loss": 0.7243, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.1636466718184812, "learning_rate": 3.772609819121447e-06, "loss": 0.7377, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.856977439681704, "learning_rate": 3.8242894056847545e-06, "loss": 0.7054, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.0053756710099635, "learning_rate": 3.875968992248063e-06, "loss": 0.7338, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.158365731996466, "learning_rate": 3.92764857881137e-06, "loss": 0.7236, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.4102976060464267, "learning_rate": 3.979328165374677e-06, "loss": 0.7754, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.17610689256887, "learning_rate": 4.031007751937985e-06, "loss": 0.7185, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.0942309382339497, "learning_rate": 4.082687338501292e-06, "loss": 0.6833, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.2703418958195645, "learning_rate": 4.1343669250646e-06, "loss": 0.7626, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.3007004210388113, "learning_rate": 4.186046511627907e-06, "loss": 0.7403, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.8774740106252445, "learning_rate": 4.237726098191215e-06, "loss": 0.7147, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.5136139217593367, "learning_rate": 4.289405684754522e-06, "loss": 0.7195, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.1863608527720064, "learning_rate": 4.34108527131783e-06, "loss": 0.7224, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.2834091419898668, "learning_rate": 4.3927648578811375e-06, "loss": 0.7566, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.386091234092043, "learning_rate": 4.444444444444444e-06, "loss": 0.7626, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.3674971878373423, "learning_rate": 4.4961240310077525e-06, "loss": 0.6586, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.172996505874935, "learning_rate": 4.54780361757106e-06, "loss": 0.6604, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.9070537311083688, "learning_rate": 4.599483204134368e-06, "loss": 0.6725, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.9524328607599153, "learning_rate": 4.651162790697675e-06, "loss": 0.7146, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.0347277873038725, "learning_rate": 4.702842377260982e-06, "loss": 0.7531, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.209982622000607, "learning_rate": 4.754521963824289e-06, "loss": 0.7188, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.9692459087380176, "learning_rate": 4.806201550387598e-06, "loss": 0.6813, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.1049024850181413, "learning_rate": 4.857881136950905e-06, "loss": 0.743, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.1798186450732397, "learning_rate": 4.909560723514212e-06, "loss": 0.7513, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.1645572248346174, "learning_rate": 4.9612403100775195e-06, "loss": 0.7533, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.0228318887762877, "learning_rate": 5.012919896640828e-06, "loss": 0.6592, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.206621682303021, "learning_rate": 5.064599483204135e-06, "loss": 0.7562, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.1311466852020433, "learning_rate": 5.116279069767442e-06, "loss": 0.7061, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.9809595587086415, "learning_rate": 5.16795865633075e-06, "loss": 0.7214, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.9387466972711545, "learning_rate": 5.219638242894057e-06, "loss": 0.7502, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.8229280598602298, "learning_rate": 5.271317829457366e-06, "loss": 0.6942, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.8532957824955443, "learning_rate": 5.322997416020672e-06, "loss": 0.732, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.9006885008912018, "learning_rate": 5.37467700258398e-06, "loss": 0.6513, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.9933017382249472, "learning_rate": 5.4263565891472865e-06, "loss": 0.7436, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.0787408819589306, "learning_rate": 5.478036175710595e-06, "loss": 0.6851, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.9692236315472937, "learning_rate": 5.529715762273902e-06, "loss": 0.7102, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.0958519249073144, "learning_rate": 5.58139534883721e-06, "loss": 0.6779, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.1179506397493193, "learning_rate": 5.6330749354005176e-06, "loss": 0.7571, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.9953179391474662, "learning_rate": 5.684754521963824e-06, "loss": 0.6988, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.209611906437212, "learning_rate": 5.736434108527133e-06, "loss": 0.7277, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.9401680204051799, "learning_rate": 5.788113695090439e-06, "loss": 0.6803, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.1188809424704016, "learning_rate": 5.839793281653747e-06, "loss": 0.7199, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.0943100592990525, "learning_rate": 5.891472868217055e-06, "loss": 0.6849, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.2713837146185636, "learning_rate": 5.943152454780362e-06, "loss": 0.7577, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.127209132176853, "learning_rate": 5.99483204134367e-06, "loss": 0.7871, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.034930723731037, "learning_rate": 6.046511627906977e-06, "loss": 0.7148, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.136418574307478, "learning_rate": 6.0981912144702846e-06, "loss": 0.6921, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.007500412728349, "learning_rate": 6.149870801033592e-06, "loss": 0.6471, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.0923841205992715, "learning_rate": 6.2015503875969e-06, "loss": 0.7567, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.266590622953997, "learning_rate": 6.253229974160208e-06, "loss": 0.6969, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.061399223889906, "learning_rate": 6.304909560723515e-06, "loss": 0.7352, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.9181609203148584, "learning_rate": 6.356589147286822e-06, "loss": 0.6856, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.1843507167792544, "learning_rate": 6.408268733850129e-06, "loss": 0.6914, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.226492897418139, "learning_rate": 6.459948320413437e-06, "loss": 0.7682, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.9479308374334845, "learning_rate": 6.511627906976745e-06, "loss": 0.6866, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.9634312430850824, "learning_rate": 6.563307493540052e-06, "loss": 0.6599, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.0373930546127754, "learning_rate": 6.61498708010336e-06, "loss": 0.6946, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.9244260526837411, "learning_rate": 6.666666666666667e-06, "loss": 0.7157, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.883453956330838, "learning_rate": 6.718346253229975e-06, "loss": 0.6764, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.0045545692999402, "learning_rate": 6.7700258397932826e-06, "loss": 0.6809, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.9606349757302202, "learning_rate": 6.821705426356589e-06, "loss": 0.7321, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.8672674952014412, "learning_rate": 6.873385012919898e-06, "loss": 0.6396, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.8517558322844003, "learning_rate": 6.925064599483204e-06, "loss": 0.6908, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.0440099335072635, "learning_rate": 6.976744186046513e-06, "loss": 0.7415, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.979158649093718, "learning_rate": 7.028423772609819e-06, "loss": 0.6901, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.9189054916948147, "learning_rate": 7.080103359173127e-06, "loss": 0.6229, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.8979967649651344, "learning_rate": 7.131782945736435e-06, "loss": 0.6727, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.089207860872488, "learning_rate": 7.183462532299742e-06, "loss": 0.6474, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.05975957356314, "learning_rate": 7.23514211886305e-06, "loss": 0.7032, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.1621687001374346, "learning_rate": 7.286821705426357e-06, "loss": 0.7346, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.029956346991461, "learning_rate": 7.338501291989665e-06, "loss": 0.7053, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.9328938768505268, "learning_rate": 7.390180878552973e-06, "loss": 0.6723, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.4819536225184136, "learning_rate": 7.44186046511628e-06, "loss": 0.73, "step": 144 }, { "epoch": 0.01, "grad_norm": 2.2634611122156376, "learning_rate": 7.493540051679587e-06, "loss": 0.7043, "step": 145 }, { "epoch": 0.01, "grad_norm": 2.03845568283746, "learning_rate": 7.545219638242894e-06, "loss": 0.7747, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.170724138477728, "learning_rate": 7.596899224806202e-06, "loss": 0.7935, "step": 147 }, { "epoch": 0.01, "grad_norm": 2.0504183784244647, "learning_rate": 7.648578811369509e-06, "loss": 0.6595, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.8734228494690135, "learning_rate": 7.700258397932817e-06, "loss": 0.7251, "step": 149 }, { "epoch": 0.01, "grad_norm": 2.1235225253454257, "learning_rate": 7.751937984496126e-06, "loss": 0.6747, "step": 150 }, { "epoch": 0.01, "grad_norm": 2.1341870248884347, "learning_rate": 7.803617571059433e-06, "loss": 0.6682, "step": 151 }, { "epoch": 0.01, "grad_norm": 2.1759810022127875, "learning_rate": 7.85529715762274e-06, "loss": 0.6936, "step": 152 }, { "epoch": 0.01, "grad_norm": 2.1956170030421633, "learning_rate": 7.906976744186048e-06, "loss": 0.7492, "step": 153 }, { "epoch": 0.01, "grad_norm": 2.483555592654495, "learning_rate": 7.958656330749354e-06, "loss": 0.7305, "step": 154 }, { "epoch": 0.01, "grad_norm": 2.116766405634247, "learning_rate": 8.010335917312663e-06, "loss": 0.6669, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.9162855045739617, "learning_rate": 8.06201550387597e-06, "loss": 0.7261, "step": 156 }, { "epoch": 0.01, "grad_norm": 2.054963532098712, "learning_rate": 8.113695090439278e-06, "loss": 0.7314, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.028434092710246, "learning_rate": 8.165374677002584e-06, "loss": 0.7294, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.048480270719039, "learning_rate": 8.217054263565893e-06, "loss": 0.7383, "step": 159 }, { "epoch": 0.01, "grad_norm": 2.2176711992410225, "learning_rate": 8.2687338501292e-06, "loss": 0.7124, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.069919880039789, "learning_rate": 8.320413436692508e-06, "loss": 0.7016, "step": 161 }, { "epoch": 0.01, "grad_norm": 2.205058554739053, "learning_rate": 8.372093023255815e-06, "loss": 0.6351, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.1284296744738613, "learning_rate": 8.423772609819121e-06, "loss": 0.6765, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.9087997643879109, "learning_rate": 8.47545219638243e-06, "loss": 0.7027, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.1452848850815083, "learning_rate": 8.527131782945736e-06, "loss": 0.7407, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.9991766307667835, "learning_rate": 8.578811369509045e-06, "loss": 0.6794, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.9138380269362316, "learning_rate": 8.630490956072353e-06, "loss": 0.6835, "step": 167 }, { "epoch": 0.01, "grad_norm": 2.003442543319759, "learning_rate": 8.68217054263566e-06, "loss": 0.6293, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.95394526232576, "learning_rate": 8.733850129198968e-06, "loss": 0.6773, "step": 169 }, { "epoch": 0.01, "grad_norm": 2.088166035828683, "learning_rate": 8.785529715762275e-06, "loss": 0.7069, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.9835559025534641, "learning_rate": 8.837209302325582e-06, "loss": 0.7081, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.9845261403594612, "learning_rate": 8.888888888888888e-06, "loss": 0.6452, "step": 172 }, { "epoch": 0.01, "grad_norm": 2.1896583808017813, "learning_rate": 8.940568475452197e-06, "loss": 0.6978, "step": 173 }, { "epoch": 0.01, "grad_norm": 2.034374240576255, "learning_rate": 8.992248062015505e-06, "loss": 0.7193, "step": 174 }, { "epoch": 0.01, "grad_norm": 2.254205482228632, "learning_rate": 9.043927648578812e-06, "loss": 0.7259, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.9883989999974647, "learning_rate": 9.09560723514212e-06, "loss": 0.6599, "step": 176 }, { "epoch": 0.01, "grad_norm": 2.0406451751506136, "learning_rate": 9.147286821705427e-06, "loss": 0.6291, "step": 177 }, { "epoch": 0.01, "grad_norm": 2.0446696740745978, "learning_rate": 9.198966408268735e-06, "loss": 0.643, "step": 178 }, { "epoch": 0.01, "grad_norm": 2.0482308359887362, "learning_rate": 9.250645994832042e-06, "loss": 0.7337, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.9553371871357939, "learning_rate": 9.30232558139535e-06, "loss": 0.6478, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.924814844515946, "learning_rate": 9.354005167958657e-06, "loss": 0.6263, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.047266915344053, "learning_rate": 9.405684754521964e-06, "loss": 0.6979, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.842052712981622, "learning_rate": 9.457364341085272e-06, "loss": 0.6163, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.2294991384925043, "learning_rate": 9.509043927648579e-06, "loss": 0.6639, "step": 184 }, { "epoch": 0.01, "grad_norm": 2.1086576018925034, "learning_rate": 9.560723514211887e-06, "loss": 0.6987, "step": 185 }, { "epoch": 0.01, "grad_norm": 2.413299538742816, "learning_rate": 9.612403100775196e-06, "loss": 0.6821, "step": 186 }, { "epoch": 0.01, "grad_norm": 2.2113895021622576, "learning_rate": 9.664082687338502e-06, "loss": 0.6325, "step": 187 }, { "epoch": 0.01, "grad_norm": 2.312092100717908, "learning_rate": 9.71576227390181e-06, "loss": 0.7312, "step": 188 }, { "epoch": 0.01, "grad_norm": 2.0110742814407456, "learning_rate": 9.767441860465117e-06, "loss": 0.7209, "step": 189 }, { "epoch": 0.01, "grad_norm": 2.003722660213722, "learning_rate": 9.819121447028424e-06, "loss": 0.7655, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.066364251847548, "learning_rate": 9.870801033591732e-06, "loss": 0.6983, "step": 191 }, { "epoch": 0.01, "grad_norm": 2.160208896319642, "learning_rate": 9.922480620155039e-06, "loss": 0.7163, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.969679234720519, "learning_rate": 9.974160206718347e-06, "loss": 0.7713, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.7571093021627038, "learning_rate": 1.0025839793281656e-05, "loss": 0.6597, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.9672538279531169, "learning_rate": 1.0077519379844963e-05, "loss": 0.7103, "step": 195 }, { "epoch": 0.02, "grad_norm": 2.1541642868451882, "learning_rate": 1.012919896640827e-05, "loss": 0.7753, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.8365651457895535, "learning_rate": 1.0180878552971578e-05, "loss": 0.7249, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.9360304766258072, "learning_rate": 1.0232558139534884e-05, "loss": 0.6853, "step": 198 }, { "epoch": 0.02, "grad_norm": 2.096884473617233, "learning_rate": 1.0284237726098191e-05, "loss": 0.7369, "step": 199 }, { "epoch": 0.02, "grad_norm": 1.8273847988781062, "learning_rate": 1.03359173126615e-05, "loss": 0.7152, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.8844761759607553, "learning_rate": 1.0387596899224808e-05, "loss": 0.6734, "step": 201 }, { "epoch": 0.02, "grad_norm": 1.914089218264573, "learning_rate": 1.0439276485788114e-05, "loss": 0.7143, "step": 202 }, { "epoch": 0.02, "grad_norm": 2.116071906845655, "learning_rate": 1.0490956072351421e-05, "loss": 0.7001, "step": 203 }, { "epoch": 0.02, "grad_norm": 2.0417825964367657, "learning_rate": 1.0542635658914731e-05, "loss": 0.7866, "step": 204 }, { "epoch": 0.02, "grad_norm": 2.044203084483132, "learning_rate": 1.0594315245478038e-05, "loss": 0.6179, "step": 205 }, { "epoch": 0.02, "grad_norm": 2.0420830230723035, "learning_rate": 1.0645994832041345e-05, "loss": 0.7124, "step": 206 }, { "epoch": 0.02, "grad_norm": 2.1051199487865997, "learning_rate": 1.0697674418604651e-05, "loss": 0.7409, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.8155533295501942, "learning_rate": 1.074935400516796e-05, "loss": 0.6941, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.9123865380632588, "learning_rate": 1.0801033591731266e-05, "loss": 0.7245, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.9133835814631164, "learning_rate": 1.0852713178294573e-05, "loss": 0.6549, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.7395522368006386, "learning_rate": 1.0904392764857883e-05, "loss": 0.7039, "step": 211 }, { "epoch": 0.02, "grad_norm": 1.8978653033690394, "learning_rate": 1.095607235142119e-05, "loss": 0.6929, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.0391466846845905, "learning_rate": 1.1007751937984497e-05, "loss": 0.7141, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.7644765714244997, "learning_rate": 1.1059431524547803e-05, "loss": 0.6726, "step": 214 }, { "epoch": 0.02, "grad_norm": 1.834831131693005, "learning_rate": 1.1111111111111113e-05, "loss": 0.6791, "step": 215 }, { "epoch": 0.02, "grad_norm": 1.9981262847543069, "learning_rate": 1.116279069767442e-05, "loss": 0.6801, "step": 216 }, { "epoch": 0.02, "grad_norm": 1.8607705913420696, "learning_rate": 1.1214470284237727e-05, "loss": 0.6662, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.9942224020634212, "learning_rate": 1.1266149870801035e-05, "loss": 0.7746, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.9566979647943363, "learning_rate": 1.1317829457364342e-05, "loss": 0.7071, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.0894906289355366, "learning_rate": 1.1369509043927648e-05, "loss": 0.7398, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.902596020274279, "learning_rate": 1.1421188630490959e-05, "loss": 0.6448, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.8516721411325656, "learning_rate": 1.1472868217054265e-05, "loss": 0.6533, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.9422069717376376, "learning_rate": 1.1524547803617572e-05, "loss": 0.7712, "step": 223 }, { "epoch": 0.02, "grad_norm": 2.029117344982057, "learning_rate": 1.1576227390180879e-05, "loss": 0.6953, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.9938398502397154, "learning_rate": 1.1627906976744187e-05, "loss": 0.6658, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.007778419769434, "learning_rate": 1.1679586563307494e-05, "loss": 0.6895, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.8139338248104935, "learning_rate": 1.1731266149870802e-05, "loss": 0.6562, "step": 227 }, { "epoch": 0.02, "grad_norm": 1.80716525321881, "learning_rate": 1.178294573643411e-05, "loss": 0.7093, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.1476574400631026, "learning_rate": 1.1834625322997417e-05, "loss": 0.7744, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.0661453630430664, "learning_rate": 1.1886304909560724e-05, "loss": 0.7099, "step": 230 }, { "epoch": 0.02, "grad_norm": 2.1125412525008986, "learning_rate": 1.193798449612403e-05, "loss": 0.7794, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.8573275922237884, "learning_rate": 1.198966408268734e-05, "loss": 0.6658, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.8576416784089278, "learning_rate": 1.2041343669250647e-05, "loss": 0.6768, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.922463427767487, "learning_rate": 1.2093023255813954e-05, "loss": 0.7218, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.9144802338323745, "learning_rate": 1.2144702842377262e-05, "loss": 0.7292, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.8917235028415234, "learning_rate": 1.2196382428940569e-05, "loss": 0.6994, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.7192218479626784, "learning_rate": 1.2248062015503876e-05, "loss": 0.6272, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.059429194615707, "learning_rate": 1.2299741602067184e-05, "loss": 0.6372, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.8141280130390114, "learning_rate": 1.2351421188630493e-05, "loss": 0.7174, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.8352882698287405, "learning_rate": 1.24031007751938e-05, "loss": 0.6856, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.8149697807255358, "learning_rate": 1.2454780361757106e-05, "loss": 0.6677, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.960900294950215, "learning_rate": 1.2506459948320416e-05, "loss": 0.7238, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.898006562980343, "learning_rate": 1.2558139534883723e-05, "loss": 0.7101, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.8680187593575954, "learning_rate": 1.260981912144703e-05, "loss": 0.744, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.9090085316303016, "learning_rate": 1.2661498708010338e-05, "loss": 0.6506, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.595526658646269, "learning_rate": 1.2713178294573645e-05, "loss": 0.6481, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.6954086909274888, "learning_rate": 1.2764857881136951e-05, "loss": 0.6882, "step": 247 }, { "epoch": 0.02, "grad_norm": 2.0163576435791515, "learning_rate": 1.2816537467700258e-05, "loss": 0.7297, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.895852106301838, "learning_rate": 1.2868217054263568e-05, "loss": 0.7747, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.9365275003503144, "learning_rate": 1.2919896640826875e-05, "loss": 0.7125, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.708399523251377, "learning_rate": 1.2971576227390181e-05, "loss": 0.6563, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.7895994088720608, "learning_rate": 1.302325581395349e-05, "loss": 0.6928, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.8279126687923155, "learning_rate": 1.3074935400516796e-05, "loss": 0.7071, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.9455123859953396, "learning_rate": 1.3126614987080105e-05, "loss": 0.7194, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.8876212984229028, "learning_rate": 1.3178294573643412e-05, "loss": 0.7188, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.7747709663808793, "learning_rate": 1.322997416020672e-05, "loss": 0.6819, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.821885113172778, "learning_rate": 1.3281653746770027e-05, "loss": 0.694, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.91953029112429, "learning_rate": 1.3333333333333333e-05, "loss": 0.7012, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.8682867030853527, "learning_rate": 1.3385012919896643e-05, "loss": 0.7418, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.7094975512995536, "learning_rate": 1.343669250645995e-05, "loss": 0.669, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.9776725818158296, "learning_rate": 1.3488372093023257e-05, "loss": 0.702, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.7692678899762055, "learning_rate": 1.3540051679586565e-05, "loss": 0.684, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.8705074530572576, "learning_rate": 1.3591731266149872e-05, "loss": 0.6216, "step": 263 }, { "epoch": 0.02, "grad_norm": 2.034166560357307, "learning_rate": 1.3643410852713179e-05, "loss": 0.7239, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.9058190781105089, "learning_rate": 1.3695090439276487e-05, "loss": 0.7189, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.7473392695345875, "learning_rate": 1.3746770025839795e-05, "loss": 0.6897, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.8400541861475124, "learning_rate": 1.3798449612403102e-05, "loss": 0.6924, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.6957248590566456, "learning_rate": 1.3850129198966409e-05, "loss": 0.6723, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.8766758740788159, "learning_rate": 1.3901808785529717e-05, "loss": 0.7572, "step": 269 }, { "epoch": 0.02, "grad_norm": 2.1911608988724023, "learning_rate": 1.3953488372093025e-05, "loss": 0.8032, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.7159125748885473, "learning_rate": 1.4005167958656332e-05, "loss": 0.6395, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.7821755263454584, "learning_rate": 1.4056847545219639e-05, "loss": 0.6888, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.8221975888290423, "learning_rate": 1.4108527131782947e-05, "loss": 0.727, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.7422530499772497, "learning_rate": 1.4160206718346254e-05, "loss": 0.6506, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.7871411930441092, "learning_rate": 1.421188630490956e-05, "loss": 0.731, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.7210350106250718, "learning_rate": 1.426356589147287e-05, "loss": 0.7012, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.7810746491438123, "learning_rate": 1.4315245478036177e-05, "loss": 0.6611, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.8839496644943539, "learning_rate": 1.4366925064599484e-05, "loss": 0.6647, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.8778032963553069, "learning_rate": 1.441860465116279e-05, "loss": 0.6531, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.8287017434168242, "learning_rate": 1.44702842377261e-05, "loss": 0.7095, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.8776542949642527, "learning_rate": 1.4521963824289408e-05, "loss": 0.7421, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.9099247641999344, "learning_rate": 1.4573643410852714e-05, "loss": 0.6702, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.8216094934275595, "learning_rate": 1.4625322997416023e-05, "loss": 0.6371, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.8987344266291748, "learning_rate": 1.467700258397933e-05, "loss": 0.6714, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.7156933741329128, "learning_rate": 1.4728682170542636e-05, "loss": 0.7229, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.8087905546016818, "learning_rate": 1.4780361757105946e-05, "loss": 0.7129, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.9409079574230799, "learning_rate": 1.4832041343669253e-05, "loss": 0.7056, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.6063497311990749, "learning_rate": 1.488372093023256e-05, "loss": 0.6908, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.9730694328654426, "learning_rate": 1.4935400516795866e-05, "loss": 0.6481, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.5952757469673713, "learning_rate": 1.4987080103359175e-05, "loss": 0.6512, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.98427725723922, "learning_rate": 1.5038759689922481e-05, "loss": 0.7126, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.9416110815297036, "learning_rate": 1.5090439276485788e-05, "loss": 0.7322, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.8932289917607776, "learning_rate": 1.5142118863049098e-05, "loss": 0.6591, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.7060463724441401, "learning_rate": 1.5193798449612405e-05, "loss": 0.6618, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.796732158788336, "learning_rate": 1.5245478036175711e-05, "loss": 0.6555, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.6835650023375834, "learning_rate": 1.5297157622739018e-05, "loss": 0.6566, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.1006441626998402, "learning_rate": 1.5348837209302328e-05, "loss": 0.7364, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.8399146971417395, "learning_rate": 1.5400516795865635e-05, "loss": 0.7592, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.972446906435267, "learning_rate": 1.545219638242894e-05, "loss": 0.7628, "step": 299 }, { "epoch": 0.02, "grad_norm": 2.082671161958546, "learning_rate": 1.550387596899225e-05, "loss": 0.7714, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.8677547981650926, "learning_rate": 1.555555555555556e-05, "loss": 0.754, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.9676800802722405, "learning_rate": 1.5607235142118865e-05, "loss": 0.6773, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.9592992010467447, "learning_rate": 1.5658914728682172e-05, "loss": 0.6701, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.7348913163105608, "learning_rate": 1.571059431524548e-05, "loss": 0.6879, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.609107874950825, "learning_rate": 1.5762273901808785e-05, "loss": 0.6654, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.6397025285151703, "learning_rate": 1.5813953488372095e-05, "loss": 0.6972, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.7851211911688332, "learning_rate": 1.5865633074935402e-05, "loss": 0.6876, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.9905356346683476, "learning_rate": 1.591731266149871e-05, "loss": 0.7228, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.8863281098328457, "learning_rate": 1.5968992248062015e-05, "loss": 0.7152, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.690116408136723, "learning_rate": 1.6020671834625325e-05, "loss": 0.7461, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.6336463368807541, "learning_rate": 1.6072351421188632e-05, "loss": 0.6484, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.7284714170503186, "learning_rate": 1.612403100775194e-05, "loss": 0.6475, "step": 312 }, { "epoch": 0.02, "grad_norm": 2.018403496545159, "learning_rate": 1.6175710594315245e-05, "loss": 0.7395, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.6333717228147262, "learning_rate": 1.6227390180878555e-05, "loss": 0.6498, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.6210224917273397, "learning_rate": 1.6279069767441862e-05, "loss": 0.657, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.9938309416887994, "learning_rate": 1.633074935400517e-05, "loss": 0.7661, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.8170192545498876, "learning_rate": 1.638242894056848e-05, "loss": 0.6766, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.8045935668279347, "learning_rate": 1.6434108527131786e-05, "loss": 0.6594, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.8042382015409661, "learning_rate": 1.6485788113695092e-05, "loss": 0.6845, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.5535666100814085, "learning_rate": 1.65374677002584e-05, "loss": 0.6375, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.6997108185464844, "learning_rate": 1.6589147286821706e-05, "loss": 0.6859, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.7942718225337062, "learning_rate": 1.6640826873385016e-05, "loss": 0.7387, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.6747243255820246, "learning_rate": 1.6692506459948323e-05, "loss": 0.7287, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.6454662283784893, "learning_rate": 1.674418604651163e-05, "loss": 0.6235, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.9309376362153705, "learning_rate": 1.6795865633074936e-05, "loss": 0.7316, "step": 325 }, { "epoch": 0.03, "grad_norm": 1.7199744977834737, "learning_rate": 1.6847545219638243e-05, "loss": 0.6656, "step": 326 }, { "epoch": 0.03, "grad_norm": 1.860879882125513, "learning_rate": 1.689922480620155e-05, "loss": 0.7451, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.6897752046902168, "learning_rate": 1.695090439276486e-05, "loss": 0.6836, "step": 328 }, { "epoch": 0.03, "grad_norm": 1.8385185117767382, "learning_rate": 1.7002583979328166e-05, "loss": 0.7228, "step": 329 }, { "epoch": 0.03, "grad_norm": 1.7139177537723818, "learning_rate": 1.7054263565891473e-05, "loss": 0.6212, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.9065972308984798, "learning_rate": 1.7105943152454783e-05, "loss": 0.736, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.597571037277131, "learning_rate": 1.715762273901809e-05, "loss": 0.7313, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.5904093867911444, "learning_rate": 1.7209302325581396e-05, "loss": 0.6848, "step": 333 }, { "epoch": 0.03, "grad_norm": 1.7389702620812566, "learning_rate": 1.7260981912144706e-05, "loss": 0.7166, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.7843425073020225, "learning_rate": 1.7312661498708013e-05, "loss": 0.7396, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.6234645925689897, "learning_rate": 1.736434108527132e-05, "loss": 0.6724, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.7248537747976185, "learning_rate": 1.7416020671834626e-05, "loss": 0.6683, "step": 337 }, { "epoch": 0.03, "grad_norm": 1.7138102899654486, "learning_rate": 1.7467700258397936e-05, "loss": 0.6655, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.9091086293921518, "learning_rate": 1.7519379844961243e-05, "loss": 0.6957, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.6963941929137416, "learning_rate": 1.757105943152455e-05, "loss": 0.6796, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.7328281549790534, "learning_rate": 1.7622739018087857e-05, "loss": 0.7055, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.589815631589682, "learning_rate": 1.7674418604651163e-05, "loss": 0.7067, "step": 342 }, { "epoch": 0.03, "grad_norm": 1.736638216321355, "learning_rate": 1.772609819121447e-05, "loss": 0.7284, "step": 343 }, { "epoch": 0.03, "grad_norm": 1.6392960751987011, "learning_rate": 1.7777777777777777e-05, "loss": 0.6809, "step": 344 }, { "epoch": 0.03, "grad_norm": 1.5876232640027388, "learning_rate": 1.7829457364341087e-05, "loss": 0.7757, "step": 345 }, { "epoch": 0.03, "grad_norm": 1.5829662432851723, "learning_rate": 1.7881136950904393e-05, "loss": 0.7285, "step": 346 }, { "epoch": 0.03, "grad_norm": 1.5552442311025254, "learning_rate": 1.79328165374677e-05, "loss": 0.6617, "step": 347 }, { "epoch": 0.03, "grad_norm": 1.888963836541859, "learning_rate": 1.798449612403101e-05, "loss": 0.6744, "step": 348 }, { "epoch": 0.03, "grad_norm": 1.7183686266782514, "learning_rate": 1.8036175710594317e-05, "loss": 0.7592, "step": 349 }, { "epoch": 0.03, "grad_norm": 1.754612294162425, "learning_rate": 1.8087855297157624e-05, "loss": 0.7025, "step": 350 }, { "epoch": 0.03, "grad_norm": 1.735550490673282, "learning_rate": 1.813953488372093e-05, "loss": 0.7654, "step": 351 }, { "epoch": 0.03, "grad_norm": 1.8993842507501426, "learning_rate": 1.819121447028424e-05, "loss": 0.7315, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.7647160578883143, "learning_rate": 1.8242894056847547e-05, "loss": 0.7498, "step": 353 }, { "epoch": 0.03, "grad_norm": 1.5622315748437299, "learning_rate": 1.8294573643410854e-05, "loss": 0.6246, "step": 354 }, { "epoch": 0.03, "grad_norm": 1.6830378984478318, "learning_rate": 1.8346253229974164e-05, "loss": 0.6808, "step": 355 }, { "epoch": 0.03, "grad_norm": 1.6788587788029756, "learning_rate": 1.839793281653747e-05, "loss": 0.6637, "step": 356 }, { "epoch": 0.03, "grad_norm": 1.6853859693123143, "learning_rate": 1.8449612403100777e-05, "loss": 0.7021, "step": 357 }, { "epoch": 0.03, "grad_norm": 1.7256901120754673, "learning_rate": 1.8501291989664084e-05, "loss": 0.6518, "step": 358 }, { "epoch": 0.03, "grad_norm": 1.693381902243467, "learning_rate": 1.855297157622739e-05, "loss": 0.7455, "step": 359 }, { "epoch": 0.03, "grad_norm": 1.743409280064828, "learning_rate": 1.86046511627907e-05, "loss": 0.7063, "step": 360 }, { "epoch": 0.03, "grad_norm": 1.8010066423013003, "learning_rate": 1.8656330749354007e-05, "loss": 0.7634, "step": 361 }, { "epoch": 0.03, "grad_norm": 1.7266054450170285, "learning_rate": 1.8708010335917314e-05, "loss": 0.6595, "step": 362 }, { "epoch": 0.03, "grad_norm": 1.7604733821001817, "learning_rate": 1.875968992248062e-05, "loss": 0.6977, "step": 363 }, { "epoch": 0.03, "grad_norm": 1.5710357215214774, "learning_rate": 1.8811369509043927e-05, "loss": 0.7089, "step": 364 }, { "epoch": 0.03, "grad_norm": 1.7716215810245508, "learning_rate": 1.8863049095607237e-05, "loss": 0.6371, "step": 365 }, { "epoch": 0.03, "grad_norm": 1.5191226988578574, "learning_rate": 1.8914728682170544e-05, "loss": 0.6987, "step": 366 }, { "epoch": 0.03, "grad_norm": 1.6351020494163706, "learning_rate": 1.896640826873385e-05, "loss": 0.7209, "step": 367 }, { "epoch": 0.03, "grad_norm": 1.427810270324823, "learning_rate": 1.9018087855297158e-05, "loss": 0.672, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.5550164876229366, "learning_rate": 1.9069767441860468e-05, "loss": 0.7001, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.6459538701338634, "learning_rate": 1.9121447028423774e-05, "loss": 0.8093, "step": 370 }, { "epoch": 0.03, "grad_norm": 1.6916808859066244, "learning_rate": 1.917312661498708e-05, "loss": 0.8031, "step": 371 }, { "epoch": 0.03, "grad_norm": 1.792743477559064, "learning_rate": 1.922480620155039e-05, "loss": 0.7294, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.6043612704787404, "learning_rate": 1.9276485788113698e-05, "loss": 0.6933, "step": 373 }, { "epoch": 0.03, "grad_norm": 1.7592731426018393, "learning_rate": 1.9328165374677004e-05, "loss": 0.7709, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.6771718193388752, "learning_rate": 1.937984496124031e-05, "loss": 0.6986, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.6994093514015336, "learning_rate": 1.943152454780362e-05, "loss": 0.7417, "step": 376 }, { "epoch": 0.03, "grad_norm": 1.7915316538248756, "learning_rate": 1.9483204134366928e-05, "loss": 0.7313, "step": 377 }, { "epoch": 0.03, "grad_norm": 2.0338506629382898, "learning_rate": 1.9534883720930235e-05, "loss": 0.7696, "step": 378 }, { "epoch": 0.03, "grad_norm": 1.960364695017877, "learning_rate": 1.958656330749354e-05, "loss": 0.7206, "step": 379 }, { "epoch": 0.03, "grad_norm": 1.8772124271125357, "learning_rate": 1.9638242894056848e-05, "loss": 0.7129, "step": 380 }, { "epoch": 0.03, "grad_norm": 1.9636413049756116, "learning_rate": 1.9689922480620155e-05, "loss": 0.7082, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.6527663247417765, "learning_rate": 1.9741602067183465e-05, "loss": 0.6949, "step": 382 }, { "epoch": 0.03, "grad_norm": 1.6633158696747927, "learning_rate": 1.979328165374677e-05, "loss": 0.6722, "step": 383 }, { "epoch": 0.03, "grad_norm": 1.721959810193357, "learning_rate": 1.9844961240310078e-05, "loss": 0.7567, "step": 384 }, { "epoch": 0.03, "grad_norm": 1.7364744247060135, "learning_rate": 1.9896640826873385e-05, "loss": 0.7176, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.7954586915735413, "learning_rate": 1.9948320413436695e-05, "loss": 0.7535, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.7346229719620336, "learning_rate": 2e-05, "loss": 0.7229, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.7528736498703117, "learning_rate": 1.9999999684324205e-05, "loss": 0.7176, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.5477324912304453, "learning_rate": 1.9999998737296837e-05, "loss": 0.6282, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.7132943002853476, "learning_rate": 1.9999997158917953e-05, "loss": 0.7502, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.6280519729022618, "learning_rate": 1.9999994949187657e-05, "loss": 0.6189, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.5954083061779611, "learning_rate": 1.9999992108106083e-05, "loss": 0.7113, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.573154667243791, "learning_rate": 1.9999988635673414e-05, "loss": 0.6588, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.7079942025481558, "learning_rate": 1.9999984531889875e-05, "loss": 0.6982, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.6879653995321409, "learning_rate": 1.9999979796755715e-05, "loss": 0.666, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.688187141390456, "learning_rate": 1.999997443027124e-05, "loss": 0.6362, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.610216531823565, "learning_rate": 1.9999968432436785e-05, "loss": 0.6575, "step": 397 }, { "epoch": 0.03, "grad_norm": 2.3236145772969543, "learning_rate": 1.9999961803252726e-05, "loss": 0.7453, "step": 398 }, { "epoch": 0.03, "grad_norm": 2.6347959388024034, "learning_rate": 1.999995454271949e-05, "loss": 0.7709, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.6901873630291864, "learning_rate": 1.999994665083753e-05, "loss": 0.7524, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.59903856836198, "learning_rate": 1.9999938127607342e-05, "loss": 0.6619, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.5768222530583047, "learning_rate": 1.9999928973029472e-05, "loss": 0.6928, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.6138677754701747, "learning_rate": 1.999991918710449e-05, "loss": 0.7036, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.557050816039296, "learning_rate": 1.999990876983302e-05, "loss": 0.6806, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.8214136157770862, "learning_rate": 1.999989772121571e-05, "loss": 0.7474, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.776652912399176, "learning_rate": 1.999988604125327e-05, "loss": 0.7904, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.6106579305815039, "learning_rate": 1.9999873729946432e-05, "loss": 0.7247, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.614877571449385, "learning_rate": 1.999986078729597e-05, "loss": 0.6763, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.5951139560039433, "learning_rate": 1.9999847213302703e-05, "loss": 0.6638, "step": 409 }, { "epoch": 0.03, "grad_norm": 2.9395387757454956, "learning_rate": 1.999983300796749e-05, "loss": 0.6751, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.664135711921985, "learning_rate": 1.999981817129123e-05, "loss": 0.7085, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.6385503821637333, "learning_rate": 1.9999802703274854e-05, "loss": 0.7145, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.801231013667197, "learning_rate": 1.9999786603919343e-05, "loss": 0.7448, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.7969883758602014, "learning_rate": 1.9999769873225706e-05, "loss": 0.7637, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.736104635438498, "learning_rate": 1.999975251119501e-05, "loss": 0.7366, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.5335612079356686, "learning_rate": 1.9999734517828345e-05, "loss": 0.6593, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.6600035114710792, "learning_rate": 1.999971589312685e-05, "loss": 0.7751, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.7123815982846584, "learning_rate": 1.9999696637091698e-05, "loss": 0.7392, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.630631665076908, "learning_rate": 1.9999676749724103e-05, "loss": 0.7332, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.6337467422978231, "learning_rate": 1.9999656231025323e-05, "loss": 0.6824, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.5854489931811897, "learning_rate": 1.9999635080996655e-05, "loss": 0.698, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.674222207610974, "learning_rate": 1.9999613299639433e-05, "loss": 0.7406, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.6331969178252201, "learning_rate": 1.9999590886955033e-05, "loss": 0.7627, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.7665659329947683, "learning_rate": 1.999956784294487e-05, "loss": 0.7273, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.642254846355217, "learning_rate": 1.9999544167610396e-05, "loss": 0.6495, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.6550105513498534, "learning_rate": 1.999951986095311e-05, "loss": 0.7321, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.6336809249086557, "learning_rate": 1.9999494922974544e-05, "loss": 0.7034, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.5903316564562422, "learning_rate": 1.9999469353676272e-05, "loss": 0.7146, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.652225354870139, "learning_rate": 1.999944315305991e-05, "loss": 0.7124, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.6051610944242776, "learning_rate": 1.999941632112711e-05, "loss": 0.7083, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.6624895281927967, "learning_rate": 1.9999388857879568e-05, "loss": 0.6793, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.5903397519817577, "learning_rate": 1.9999360763319015e-05, "loss": 0.7418, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.6496411308973509, "learning_rate": 1.999933203744723e-05, "loss": 0.699, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.450748921056225, "learning_rate": 1.9999302680266023e-05, "loss": 0.6681, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.4417140394303158, "learning_rate": 1.9999272691777246e-05, "loss": 0.6271, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.6257541080465774, "learning_rate": 1.99992420719828e-05, "loss": 0.6785, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.4910585616627743, "learning_rate": 1.999921082088461e-05, "loss": 0.7389, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.6186063135659114, "learning_rate": 1.999917893848465e-05, "loss": 0.6467, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.6937318104122674, "learning_rate": 1.9999146424784938e-05, "loss": 0.7253, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.5915691938396197, "learning_rate": 1.9999113279787517e-05, "loss": 0.7449, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.7866388420647545, "learning_rate": 1.9999079503494496e-05, "loss": 0.7856, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.6184684361257564, "learning_rate": 1.9999045095907988e-05, "loss": 0.7174, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.3692111923599486, "learning_rate": 1.9999010057030183e-05, "loss": 0.661, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.647713814205106, "learning_rate": 1.9998974386863276e-05, "loss": 0.6867, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.9018199285631465, "learning_rate": 1.9998938085409534e-05, "loss": 0.731, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.562980883031733, "learning_rate": 1.9998901152671243e-05, "loss": 0.6987, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.6468384566767702, "learning_rate": 1.9998863588650732e-05, "loss": 0.7059, "step": 447 }, { "epoch": 0.03, "grad_norm": 1.7768572801799298, "learning_rate": 1.9998825393350375e-05, "loss": 0.7415, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.6625920635919154, "learning_rate": 1.999878656677259e-05, "loss": 0.7469, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.6094740309384488, "learning_rate": 1.9998747108919815e-05, "loss": 0.6855, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.991797913997878, "learning_rate": 1.999870701979455e-05, "loss": 0.7273, "step": 451 }, { "epoch": 0.04, "grad_norm": 1.7667346269503605, "learning_rate": 1.9998666299399326e-05, "loss": 0.7284, "step": 452 }, { "epoch": 0.04, "grad_norm": 1.766153239672727, "learning_rate": 1.999862494773671e-05, "loss": 0.7407, "step": 453 }, { "epoch": 0.04, "grad_norm": 1.656801599742351, "learning_rate": 1.9998582964809317e-05, "loss": 0.7124, "step": 454 }, { "epoch": 0.04, "grad_norm": 1.7539647693684355, "learning_rate": 1.9998540350619793e-05, "loss": 0.7933, "step": 455 }, { "epoch": 0.04, "grad_norm": 1.668483824343145, "learning_rate": 1.9998497105170833e-05, "loss": 0.6857, "step": 456 }, { "epoch": 0.04, "grad_norm": 1.7266829090508202, "learning_rate": 1.9998453228465165e-05, "loss": 0.7264, "step": 457 }, { "epoch": 0.04, "grad_norm": 1.504589213075702, "learning_rate": 1.999840872050556e-05, "loss": 0.6976, "step": 458 }, { "epoch": 0.04, "grad_norm": 1.5411625201048247, "learning_rate": 1.999836358129483e-05, "loss": 0.6803, "step": 459 }, { "epoch": 0.04, "grad_norm": 1.6495119182136446, "learning_rate": 1.9998317810835815e-05, "loss": 0.6846, "step": 460 }, { "epoch": 0.04, "grad_norm": 1.5180157554522513, "learning_rate": 1.9998271409131417e-05, "loss": 0.7037, "step": 461 }, { "epoch": 0.04, "grad_norm": 1.7127400835580084, "learning_rate": 1.999822437618456e-05, "loss": 0.7223, "step": 462 }, { "epoch": 0.04, "grad_norm": 1.608780167659147, "learning_rate": 1.9998176711998215e-05, "loss": 0.7638, "step": 463 }, { "epoch": 0.04, "grad_norm": 1.5538310083092766, "learning_rate": 1.999812841657539e-05, "loss": 0.7472, "step": 464 }, { "epoch": 0.04, "grad_norm": 1.6357233849199189, "learning_rate": 1.9998079489919134e-05, "loss": 0.7257, "step": 465 }, { "epoch": 0.04, "grad_norm": 1.5671861186454064, "learning_rate": 1.9998029932032534e-05, "loss": 0.6688, "step": 466 }, { "epoch": 0.04, "grad_norm": 1.5890134850728637, "learning_rate": 1.9997979742918723e-05, "loss": 0.7639, "step": 467 }, { "epoch": 0.04, "grad_norm": 1.5196065614370386, "learning_rate": 1.999792892258087e-05, "loss": 0.6138, "step": 468 }, { "epoch": 0.04, "grad_norm": 1.5383873247623239, "learning_rate": 1.9997877471022182e-05, "loss": 0.6591, "step": 469 }, { "epoch": 0.04, "grad_norm": 1.645732015396448, "learning_rate": 1.9997825388245905e-05, "loss": 0.7215, "step": 470 }, { "epoch": 0.04, "grad_norm": 1.524508212479353, "learning_rate": 1.999777267425533e-05, "loss": 0.6901, "step": 471 }, { "epoch": 0.04, "grad_norm": 1.481353656344975, "learning_rate": 1.9997719329053782e-05, "loss": 0.7021, "step": 472 }, { "epoch": 0.04, "grad_norm": 1.6202897985297207, "learning_rate": 1.999766535264463e-05, "loss": 0.7857, "step": 473 }, { "epoch": 0.04, "grad_norm": 1.5237698705940714, "learning_rate": 1.9997610745031292e-05, "loss": 0.6699, "step": 474 }, { "epoch": 0.04, "grad_norm": 1.6958931486685858, "learning_rate": 1.99975555062172e-05, "loss": 0.7847, "step": 475 }, { "epoch": 0.04, "grad_norm": 1.8245844668413667, "learning_rate": 1.9997499636205847e-05, "loss": 0.7416, "step": 476 }, { "epoch": 0.04, "grad_norm": 1.7432899579231305, "learning_rate": 1.9997443135000765e-05, "loss": 0.7082, "step": 477 }, { "epoch": 0.04, "grad_norm": 1.6527655313431695, "learning_rate": 1.9997386002605515e-05, "loss": 0.7221, "step": 478 }, { "epoch": 0.04, "grad_norm": 1.526372576391039, "learning_rate": 1.999732823902371e-05, "loss": 0.7254, "step": 479 }, { "epoch": 0.04, "grad_norm": 1.50463548295059, "learning_rate": 1.9997269844258993e-05, "loss": 0.6663, "step": 480 }, { "epoch": 0.04, "grad_norm": 1.5833940828949773, "learning_rate": 1.999721081831505e-05, "loss": 0.6769, "step": 481 }, { "epoch": 0.04, "grad_norm": 1.5093683309782873, "learning_rate": 1.9997151161195613e-05, "loss": 0.6292, "step": 482 }, { "epoch": 0.04, "grad_norm": 1.5451827377593428, "learning_rate": 1.9997090872904442e-05, "loss": 0.7566, "step": 483 }, { "epoch": 0.04, "grad_norm": 1.652419356832569, "learning_rate": 1.9997029953445345e-05, "loss": 0.6829, "step": 484 }, { "epoch": 0.04, "grad_norm": 1.6704165791129075, "learning_rate": 1.999696840282217e-05, "loss": 0.678, "step": 485 }, { "epoch": 0.04, "grad_norm": 1.6164923754204596, "learning_rate": 1.9996906221038802e-05, "loss": 0.7532, "step": 486 }, { "epoch": 0.04, "grad_norm": 1.5693936672642468, "learning_rate": 1.999684340809917e-05, "loss": 0.6923, "step": 487 }, { "epoch": 0.04, "grad_norm": 1.678340069789809, "learning_rate": 1.9996779964007232e-05, "loss": 0.6951, "step": 488 }, { "epoch": 0.04, "grad_norm": 1.4737192056559285, "learning_rate": 1.9996715888767e-05, "loss": 0.6884, "step": 489 }, { "epoch": 0.04, "grad_norm": 1.4676956288112966, "learning_rate": 1.9996651182382518e-05, "loss": 0.7117, "step": 490 }, { "epoch": 0.04, "grad_norm": 1.4944319696547814, "learning_rate": 1.999658584485787e-05, "loss": 0.6733, "step": 491 }, { "epoch": 0.04, "grad_norm": 1.5298826379579882, "learning_rate": 1.9996519876197185e-05, "loss": 0.7322, "step": 492 }, { "epoch": 0.04, "grad_norm": 1.5099953776408808, "learning_rate": 1.999645327640462e-05, "loss": 0.7466, "step": 493 }, { "epoch": 0.04, "grad_norm": 1.4888009192841576, "learning_rate": 1.999638604548439e-05, "loss": 0.725, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.4929981526224776, "learning_rate": 1.9996318183440732e-05, "loss": 0.7063, "step": 495 }, { "epoch": 0.04, "grad_norm": 1.598305493345369, "learning_rate": 1.9996249690277934e-05, "loss": 0.7423, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.4226248304572933, "learning_rate": 1.999618056600032e-05, "loss": 0.6704, "step": 497 }, { "epoch": 0.04, "grad_norm": 1.7489322402155776, "learning_rate": 1.999611081061225e-05, "loss": 0.725, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.5527406704373163, "learning_rate": 1.999604042411813e-05, "loss": 0.6761, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.769345305317264, "learning_rate": 1.9995969406522412e-05, "loss": 0.7771, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.6652858338714835, "learning_rate": 1.9995897757829564e-05, "loss": 0.7055, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.663801217498132, "learning_rate": 1.9995825478044126e-05, "loss": 0.7385, "step": 502 }, { "epoch": 0.04, "grad_norm": 1.7078167048871067, "learning_rate": 1.999575256717065e-05, "loss": 0.7001, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.5668344269206198, "learning_rate": 1.9995679025213747e-05, "loss": 0.6665, "step": 504 }, { "epoch": 0.04, "grad_norm": 1.6364168239147343, "learning_rate": 1.9995604852178055e-05, "loss": 0.7096, "step": 505 }, { "epoch": 0.04, "grad_norm": 1.5765966436282393, "learning_rate": 1.9995530048068253e-05, "loss": 0.7309, "step": 506 }, { "epoch": 0.04, "grad_norm": 1.5192936807178008, "learning_rate": 1.9995454612889076e-05, "loss": 0.6978, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.6293084908077977, "learning_rate": 1.9995378546645274e-05, "loss": 0.6854, "step": 508 }, { "epoch": 0.04, "grad_norm": 1.667735456460159, "learning_rate": 1.999530184934166e-05, "loss": 0.7553, "step": 509 }, { "epoch": 0.04, "grad_norm": 1.5729917377805436, "learning_rate": 1.9995224520983068e-05, "loss": 0.6877, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.564234495661959, "learning_rate": 1.9995146561574384e-05, "loss": 0.6676, "step": 511 }, { "epoch": 0.04, "grad_norm": 1.5438824616101234, "learning_rate": 1.9995067971120527e-05, "loss": 0.6212, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.8259770743360872, "learning_rate": 1.9994988749626464e-05, "loss": 0.702, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.5362335385464132, "learning_rate": 1.9994908897097195e-05, "loss": 0.7317, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.6953742372021292, "learning_rate": 1.999482841353776e-05, "loss": 0.7131, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.6076065272604732, "learning_rate": 1.999474729895324e-05, "loss": 0.7146, "step": 516 }, { "epoch": 0.04, "grad_norm": 1.477019869898655, "learning_rate": 1.9994665553348757e-05, "loss": 0.6224, "step": 517 }, { "epoch": 0.04, "grad_norm": 1.5236802910247287, "learning_rate": 1.999458317672947e-05, "loss": 0.633, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.4541173028446257, "learning_rate": 1.9994500169100583e-05, "loss": 0.7029, "step": 519 }, { "epoch": 0.04, "grad_norm": 1.4766634951974353, "learning_rate": 1.9994416530467336e-05, "loss": 0.6331, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.688122387304618, "learning_rate": 1.9994332260835007e-05, "loss": 0.7227, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.4646164374410906, "learning_rate": 1.9994247360208924e-05, "loss": 0.675, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.5011274551065308, "learning_rate": 1.9994161828594435e-05, "loss": 0.6439, "step": 523 }, { "epoch": 0.04, "grad_norm": 1.6908090431319895, "learning_rate": 1.9994075665996952e-05, "loss": 0.6839, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.5981954023945975, "learning_rate": 1.9993988872421902e-05, "loss": 0.7136, "step": 525 }, { "epoch": 0.04, "grad_norm": 1.5478585709374062, "learning_rate": 1.999390144787478e-05, "loss": 0.7035, "step": 526 }, { "epoch": 0.04, "grad_norm": 1.4579428104492471, "learning_rate": 1.9993813392361095e-05, "loss": 0.6551, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.5187776947636051, "learning_rate": 1.999372470588641e-05, "loss": 0.6541, "step": 528 }, { "epoch": 0.04, "grad_norm": 1.4904409842799748, "learning_rate": 1.999363538845632e-05, "loss": 0.6434, "step": 529 }, { "epoch": 0.04, "grad_norm": 1.619864565403804, "learning_rate": 1.9993545440076473e-05, "loss": 0.7437, "step": 530 }, { "epoch": 0.04, "grad_norm": 1.6021414105917995, "learning_rate": 1.9993454860752538e-05, "loss": 0.6774, "step": 531 }, { "epoch": 0.04, "grad_norm": 1.5584038257804533, "learning_rate": 1.999336365049024e-05, "loss": 0.6537, "step": 532 }, { "epoch": 0.04, "grad_norm": 1.5625273892863119, "learning_rate": 1.9993271809295337e-05, "loss": 0.7039, "step": 533 }, { "epoch": 0.04, "grad_norm": 1.4000568497559387, "learning_rate": 1.9993179337173624e-05, "loss": 0.6784, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.828352824303991, "learning_rate": 1.9993086234130944e-05, "loss": 0.732, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.6990779423744238, "learning_rate": 1.999299250017317e-05, "loss": 0.6641, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.604924316166706, "learning_rate": 1.9992898135306223e-05, "loss": 0.7196, "step": 537 }, { "epoch": 0.04, "grad_norm": 1.6965750638194799, "learning_rate": 1.999280313953606e-05, "loss": 0.6855, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.5202492241075667, "learning_rate": 1.999270751286868e-05, "loss": 0.6212, "step": 539 }, { "epoch": 0.04, "grad_norm": 1.6066269606647534, "learning_rate": 1.9992611255310115e-05, "loss": 0.7084, "step": 540 }, { "epoch": 0.04, "grad_norm": 1.6071333597296182, "learning_rate": 1.9992514366866453e-05, "loss": 0.6485, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.5036425867189884, "learning_rate": 1.9992416847543802e-05, "loss": 0.7262, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.5939623373361334, "learning_rate": 1.9992318697348318e-05, "loss": 0.6811, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.8675489514738624, "learning_rate": 1.9992219916286205e-05, "loss": 0.7211, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.497878083787526, "learning_rate": 1.9992120504363694e-05, "loss": 0.6811, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.4579373321565814, "learning_rate": 1.9992020461587063e-05, "loss": 0.6915, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.591533990226762, "learning_rate": 1.9991919787962627e-05, "loss": 0.6991, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.5554914839734313, "learning_rate": 1.9991818483496747e-05, "loss": 0.6941, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.587951508841042, "learning_rate": 1.999171654819581e-05, "loss": 0.667, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.4680807333841988, "learning_rate": 1.999161398206626e-05, "loss": 0.6474, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.5560024968546469, "learning_rate": 1.999151078511457e-05, "loss": 0.6836, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.6578572949425674, "learning_rate": 1.999140695734725e-05, "loss": 0.6271, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.5658728245412266, "learning_rate": 1.9991302498770867e-05, "loss": 0.7176, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.5784429097767325, "learning_rate": 1.9991197409392004e-05, "loss": 0.6789, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.6372971707589818, "learning_rate": 1.9991091689217303e-05, "loss": 0.703, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.421081573115393, "learning_rate": 1.9990985338253434e-05, "loss": 0.7214, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.5860319062495565, "learning_rate": 1.9990878356507116e-05, "loss": 0.6939, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.6161604121398174, "learning_rate": 1.99907707439851e-05, "loss": 0.7171, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.587284662254865, "learning_rate": 1.9990662500694183e-05, "loss": 0.7268, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.5060141319381881, "learning_rate": 1.9990553626641194e-05, "loss": 0.698, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.4562455173145112, "learning_rate": 1.9990444121833016e-05, "loss": 0.658, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.4167032611084793, "learning_rate": 1.9990333986276552e-05, "loss": 0.6858, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.5886033676755311, "learning_rate": 1.999022321997876e-05, "loss": 0.6974, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.5288863331350755, "learning_rate": 1.9990111822946634e-05, "loss": 0.6808, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.2469602818591705, "learning_rate": 1.9989999795187206e-05, "loss": 0.6634, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.4832806217100252, "learning_rate": 1.998988713670755e-05, "loss": 0.6709, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.5277468254344768, "learning_rate": 1.998977384751478e-05, "loss": 0.7142, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.5811186996046123, "learning_rate": 1.9989659927616044e-05, "loss": 0.7135, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.6281699293384213, "learning_rate": 1.9989545377018538e-05, "loss": 0.6752, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.5158983750917991, "learning_rate": 1.9989430195729494e-05, "loss": 0.6805, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.6764062814021865, "learning_rate": 1.998931438375618e-05, "loss": 0.7161, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.4806756753167973, "learning_rate": 1.998919794110591e-05, "loss": 0.7263, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.4557835859341586, "learning_rate": 1.998908086778604e-05, "loss": 0.7002, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.5920415959315477, "learning_rate": 1.9988963163803958e-05, "loss": 0.7437, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.5396736596865324, "learning_rate": 1.9988844829167092e-05, "loss": 0.6971, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.6295908854475456, "learning_rate": 1.9988725863882922e-05, "loss": 0.7321, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.4903184459565266, "learning_rate": 1.998860626795895e-05, "loss": 0.69, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.5427135401330314, "learning_rate": 1.998848604140273e-05, "loss": 0.6935, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.4066609100175793, "learning_rate": 1.998836518422185e-05, "loss": 0.6507, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.5534434492894746, "learning_rate": 1.9988243696423947e-05, "loss": 0.6572, "step": 580 }, { "epoch": 0.05, "grad_norm": 1.5255378525100314, "learning_rate": 1.9988121578016683e-05, "loss": 0.693, "step": 581 }, { "epoch": 0.05, "grad_norm": 1.7531413086049314, "learning_rate": 1.9987998829007775e-05, "loss": 0.7463, "step": 582 }, { "epoch": 0.05, "grad_norm": 1.501387987108143, "learning_rate": 1.9987875449404965e-05, "loss": 0.6806, "step": 583 }, { "epoch": 0.05, "grad_norm": 1.5181521553761232, "learning_rate": 1.998775143921605e-05, "loss": 0.69, "step": 584 }, { "epoch": 0.05, "grad_norm": 1.7491486385822463, "learning_rate": 1.9987626798448858e-05, "loss": 0.6683, "step": 585 }, { "epoch": 0.05, "grad_norm": 1.5476621686426533, "learning_rate": 1.9987501527111253e-05, "loss": 0.6665, "step": 586 }, { "epoch": 0.05, "grad_norm": 1.5106475741270504, "learning_rate": 1.9987375625211155e-05, "loss": 0.6797, "step": 587 }, { "epoch": 0.05, "grad_norm": 1.5416137926811995, "learning_rate": 1.99872490927565e-05, "loss": 0.6635, "step": 588 }, { "epoch": 0.05, "grad_norm": 1.5484727256917714, "learning_rate": 1.9987121929755284e-05, "loss": 0.6813, "step": 589 }, { "epoch": 0.05, "grad_norm": 1.4053969444944152, "learning_rate": 1.9986994136215533e-05, "loss": 0.6086, "step": 590 }, { "epoch": 0.05, "grad_norm": 1.4333779487023979, "learning_rate": 1.9986865712145316e-05, "loss": 0.6764, "step": 591 }, { "epoch": 0.05, "grad_norm": 1.4560090025214762, "learning_rate": 1.9986736657552742e-05, "loss": 0.6517, "step": 592 }, { "epoch": 0.05, "grad_norm": 2.267194743443912, "learning_rate": 1.9986606972445956e-05, "loss": 0.7425, "step": 593 }, { "epoch": 0.05, "grad_norm": 1.579606748605446, "learning_rate": 1.998647665683315e-05, "loss": 0.7427, "step": 594 }, { "epoch": 0.05, "grad_norm": 1.5422188397192407, "learning_rate": 1.998634571072255e-05, "loss": 0.701, "step": 595 }, { "epoch": 0.05, "grad_norm": 1.6001968143530119, "learning_rate": 1.998621413412242e-05, "loss": 0.6989, "step": 596 }, { "epoch": 0.05, "grad_norm": 1.5119614517202604, "learning_rate": 1.998608192704107e-05, "loss": 0.6976, "step": 597 }, { "epoch": 0.05, "grad_norm": 1.607026098945014, "learning_rate": 1.9985949089486847e-05, "loss": 0.6309, "step": 598 }, { "epoch": 0.05, "grad_norm": 1.4701575981050437, "learning_rate": 1.998581562146814e-05, "loss": 0.7231, "step": 599 }, { "epoch": 0.05, "grad_norm": 1.6280420878966146, "learning_rate": 1.998568152299337e-05, "loss": 0.7428, "step": 600 }, { "epoch": 0.05, "grad_norm": 1.3699178843009088, "learning_rate": 1.9985546794071006e-05, "loss": 0.6128, "step": 601 }, { "epoch": 0.05, "grad_norm": 1.5123627151102557, "learning_rate": 1.9985411434709553e-05, "loss": 0.6496, "step": 602 }, { "epoch": 0.05, "grad_norm": 1.4369175601743995, "learning_rate": 1.998527544491756e-05, "loss": 0.6871, "step": 603 }, { "epoch": 0.05, "grad_norm": 1.5328196051974403, "learning_rate": 1.998513882470361e-05, "loss": 0.6637, "step": 604 }, { "epoch": 0.05, "grad_norm": 1.4953447264703434, "learning_rate": 1.998500157407633e-05, "loss": 0.6365, "step": 605 }, { "epoch": 0.05, "grad_norm": 1.4521188841982653, "learning_rate": 1.9984863693044385e-05, "loss": 0.6142, "step": 606 }, { "epoch": 0.05, "grad_norm": 1.5928050212897833, "learning_rate": 1.998472518161648e-05, "loss": 0.7354, "step": 607 }, { "epoch": 0.05, "grad_norm": 1.5919485944279186, "learning_rate": 1.998458603980136e-05, "loss": 0.7091, "step": 608 }, { "epoch": 0.05, "grad_norm": 1.3083587891031876, "learning_rate": 1.998444626760781e-05, "loss": 0.6477, "step": 609 }, { "epoch": 0.05, "grad_norm": 1.3466313984866822, "learning_rate": 1.9984305865044654e-05, "loss": 0.6531, "step": 610 }, { "epoch": 0.05, "grad_norm": 1.5911592107315577, "learning_rate": 1.9984164832120755e-05, "loss": 0.7384, "step": 611 }, { "epoch": 0.05, "grad_norm": 1.6353324900870023, "learning_rate": 1.998402316884502e-05, "loss": 0.7327, "step": 612 }, { "epoch": 0.05, "grad_norm": 1.653308631779101, "learning_rate": 1.998388087522639e-05, "loss": 0.671, "step": 613 }, { "epoch": 0.05, "grad_norm": 1.4320412526715238, "learning_rate": 1.9983737951273854e-05, "loss": 0.6804, "step": 614 }, { "epoch": 0.05, "grad_norm": 1.5341690422480376, "learning_rate": 1.9983594396996428e-05, "loss": 0.705, "step": 615 }, { "epoch": 0.05, "grad_norm": 1.6323730023277883, "learning_rate": 1.998345021240318e-05, "loss": 0.6864, "step": 616 }, { "epoch": 0.05, "grad_norm": 1.5669000089982936, "learning_rate": 1.9983305397503214e-05, "loss": 0.636, "step": 617 }, { "epoch": 0.05, "grad_norm": 1.501495490032769, "learning_rate": 1.9983159952305668e-05, "loss": 0.6606, "step": 618 }, { "epoch": 0.05, "grad_norm": 1.5710918712936695, "learning_rate": 1.998301387681973e-05, "loss": 0.64, "step": 619 }, { "epoch": 0.05, "grad_norm": 1.4704143752378123, "learning_rate": 1.9982867171054622e-05, "loss": 0.6388, "step": 620 }, { "epoch": 0.05, "grad_norm": 1.5505054142084593, "learning_rate": 1.9982719835019604e-05, "loss": 0.7019, "step": 621 }, { "epoch": 0.05, "grad_norm": 1.5782432889513178, "learning_rate": 1.9982571868723975e-05, "loss": 0.7124, "step": 622 }, { "epoch": 0.05, "grad_norm": 1.5165856846299397, "learning_rate": 1.9982423272177087e-05, "loss": 0.6638, "step": 623 }, { "epoch": 0.05, "grad_norm": 1.621152872133489, "learning_rate": 1.998227404538831e-05, "loss": 0.7407, "step": 624 }, { "epoch": 0.05, "grad_norm": 1.5111365170462379, "learning_rate": 1.998212418836707e-05, "loss": 0.7225, "step": 625 }, { "epoch": 0.05, "grad_norm": 1.5483996651316276, "learning_rate": 1.998197370112283e-05, "loss": 0.7263, "step": 626 }, { "epoch": 0.05, "grad_norm": 1.399157163115521, "learning_rate": 1.9981822583665094e-05, "loss": 0.6447, "step": 627 }, { "epoch": 0.05, "grad_norm": 1.4471568943214923, "learning_rate": 1.9981670836003396e-05, "loss": 0.6718, "step": 628 }, { "epoch": 0.05, "grad_norm": 1.4901968575050888, "learning_rate": 1.998151845814732e-05, "loss": 0.6279, "step": 629 }, { "epoch": 0.05, "grad_norm": 1.5009167571801751, "learning_rate": 1.9981365450106484e-05, "loss": 0.6916, "step": 630 }, { "epoch": 0.05, "grad_norm": 1.597806816187955, "learning_rate": 1.9981211811890554e-05, "loss": 0.6974, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.576796548519489, "learning_rate": 1.998105754350922e-05, "loss": 0.7309, "step": 632 }, { "epoch": 0.05, "grad_norm": 1.5228990434634961, "learning_rate": 1.9980902644972234e-05, "loss": 0.7332, "step": 633 }, { "epoch": 0.05, "grad_norm": 1.5724283264516041, "learning_rate": 1.998074711628937e-05, "loss": 0.6653, "step": 634 }, { "epoch": 0.05, "grad_norm": 1.5764066197560027, "learning_rate": 1.9980590957470437e-05, "loss": 0.6611, "step": 635 }, { "epoch": 0.05, "grad_norm": 1.624152035648111, "learning_rate": 1.9980434168525315e-05, "loss": 0.6929, "step": 636 }, { "epoch": 0.05, "grad_norm": 1.4293267467178583, "learning_rate": 1.9980276749463886e-05, "loss": 0.6679, "step": 637 }, { "epoch": 0.05, "grad_norm": 1.5297459592159572, "learning_rate": 1.9980118700296095e-05, "loss": 0.647, "step": 638 }, { "epoch": 0.05, "grad_norm": 1.5407633411746564, "learning_rate": 1.997996002103192e-05, "loss": 0.6684, "step": 639 }, { "epoch": 0.05, "grad_norm": 1.4553574671565237, "learning_rate": 1.997980071168138e-05, "loss": 0.6923, "step": 640 }, { "epoch": 0.05, "grad_norm": 1.5706140788465575, "learning_rate": 1.9979640772254528e-05, "loss": 0.7113, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.500585441782913, "learning_rate": 1.997948020276147e-05, "loss": 0.6686, "step": 642 }, { "epoch": 0.05, "grad_norm": 1.6194846382554953, "learning_rate": 1.9979319003212337e-05, "loss": 0.696, "step": 643 }, { "epoch": 0.05, "grad_norm": 1.6542761214754882, "learning_rate": 1.997915717361731e-05, "loss": 0.7594, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.5234030695227856, "learning_rate": 1.9978994713986606e-05, "loss": 0.7039, "step": 645 }, { "epoch": 0.05, "grad_norm": 1.5943618048911075, "learning_rate": 1.9978831624330483e-05, "loss": 0.6635, "step": 646 }, { "epoch": 0.05, "grad_norm": 1.4703588399364698, "learning_rate": 1.997866790465923e-05, "loss": 0.7, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.484639435353238, "learning_rate": 1.997850355498319e-05, "loss": 0.6823, "step": 648 }, { "epoch": 0.05, "grad_norm": 1.4989152800964898, "learning_rate": 1.9978338575312742e-05, "loss": 0.7432, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.4212172108057681, "learning_rate": 1.9978172965658297e-05, "loss": 0.6346, "step": 650 }, { "epoch": 0.05, "grad_norm": 1.5397759347630786, "learning_rate": 1.997800672603031e-05, "loss": 0.7287, "step": 651 }, { "epoch": 0.05, "grad_norm": 1.471766681289549, "learning_rate": 1.997783985643928e-05, "loss": 0.6427, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.406277168859184, "learning_rate": 1.997767235689574e-05, "loss": 0.6361, "step": 653 }, { "epoch": 0.05, "grad_norm": 1.540609366625842, "learning_rate": 1.9977504227410268e-05, "loss": 0.7265, "step": 654 }, { "epoch": 0.05, "grad_norm": 1.4956908318409674, "learning_rate": 1.9977335467993474e-05, "loss": 0.7131, "step": 655 }, { "epoch": 0.05, "grad_norm": 1.7909151171788398, "learning_rate": 1.997716607865602e-05, "loss": 0.6775, "step": 656 }, { "epoch": 0.05, "grad_norm": 1.7897552082035544, "learning_rate": 1.9976996059408595e-05, "loss": 0.7351, "step": 657 }, { "epoch": 0.05, "grad_norm": 1.4937434048686618, "learning_rate": 1.997682541026193e-05, "loss": 0.657, "step": 658 }, { "epoch": 0.05, "grad_norm": 1.5901092382609006, "learning_rate": 1.997665413122681e-05, "loss": 0.7749, "step": 659 }, { "epoch": 0.05, "grad_norm": 1.4276446980963735, "learning_rate": 1.9976482222314034e-05, "loss": 0.7159, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.5174070283919316, "learning_rate": 1.997630968353447e-05, "loss": 0.6609, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.586215741784857, "learning_rate": 1.9976136514899e-05, "loss": 0.6905, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.5148969651741977, "learning_rate": 1.9975962716418565e-05, "loss": 0.7307, "step": 663 }, { "epoch": 0.05, "grad_norm": 1.4337425050813954, "learning_rate": 1.9975788288104132e-05, "loss": 0.712, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.3863645719164037, "learning_rate": 1.997561322996672e-05, "loss": 0.643, "step": 665 }, { "epoch": 0.05, "grad_norm": 1.6304058974927758, "learning_rate": 1.9975437542017372e-05, "loss": 0.7391, "step": 666 }, { "epoch": 0.05, "grad_norm": 1.5533666319136317, "learning_rate": 1.9975261224267187e-05, "loss": 0.6778, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.6759539606330542, "learning_rate": 1.9975084276727298e-05, "loss": 0.812, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.5050305412209861, "learning_rate": 1.9974906699408874e-05, "loss": 0.6326, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.428899912874864, "learning_rate": 1.9974728492323122e-05, "loss": 0.6581, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.6176892667761549, "learning_rate": 1.9974549655481302e-05, "loss": 0.7051, "step": 671 }, { "epoch": 0.05, "grad_norm": 1.6651281249090957, "learning_rate": 1.9974370188894698e-05, "loss": 0.745, "step": 672 }, { "epoch": 0.05, "grad_norm": 1.4779479777687863, "learning_rate": 1.9974190092574645e-05, "loss": 0.6708, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.4294174856552997, "learning_rate": 1.997400936653251e-05, "loss": 0.6468, "step": 674 }, { "epoch": 0.05, "grad_norm": 1.585851807933539, "learning_rate": 1.9973828010779702e-05, "loss": 0.7352, "step": 675 }, { "epoch": 0.05, "grad_norm": 1.5257996073144429, "learning_rate": 1.9973646025327678e-05, "loss": 0.686, "step": 676 }, { "epoch": 0.05, "grad_norm": 1.4566771048747251, "learning_rate": 1.9973463410187922e-05, "loss": 0.6547, "step": 677 }, { "epoch": 0.05, "grad_norm": 1.4923679582215776, "learning_rate": 1.9973280165371964e-05, "loss": 0.7021, "step": 678 }, { "epoch": 0.05, "grad_norm": 1.619787806989808, "learning_rate": 1.9973096290891374e-05, "loss": 0.756, "step": 679 }, { "epoch": 0.05, "grad_norm": 1.5322144254418917, "learning_rate": 1.997291178675776e-05, "loss": 0.6807, "step": 680 }, { "epoch": 0.05, "grad_norm": 1.4520212719360082, "learning_rate": 1.997272665298277e-05, "loss": 0.6949, "step": 681 }, { "epoch": 0.05, "grad_norm": 1.3920593151329614, "learning_rate": 1.99725408895781e-05, "loss": 0.6489, "step": 682 }, { "epoch": 0.05, "grad_norm": 1.383893468736846, "learning_rate": 1.9972354496555467e-05, "loss": 0.7454, "step": 683 }, { "epoch": 0.05, "grad_norm": 1.5615148872107998, "learning_rate": 1.997216747392664e-05, "loss": 0.7026, "step": 684 }, { "epoch": 0.05, "grad_norm": 1.4900843793543455, "learning_rate": 1.9971979821703437e-05, "loss": 0.7047, "step": 685 }, { "epoch": 0.05, "grad_norm": 1.4819541062470392, "learning_rate": 1.99717915398977e-05, "loss": 0.6941, "step": 686 }, { "epoch": 0.05, "grad_norm": 1.4353172481897687, "learning_rate": 1.9971602628521312e-05, "loss": 0.6921, "step": 687 }, { "epoch": 0.05, "grad_norm": 1.4927750474281518, "learning_rate": 1.9971413087586207e-05, "loss": 0.6746, "step": 688 }, { "epoch": 0.05, "grad_norm": 1.5006879182754358, "learning_rate": 1.9971222917104344e-05, "loss": 0.6387, "step": 689 }, { "epoch": 0.05, "grad_norm": 1.5965338684131045, "learning_rate": 1.9971032117087736e-05, "loss": 0.6591, "step": 690 }, { "epoch": 0.05, "grad_norm": 1.5332447216441298, "learning_rate": 1.9970840687548425e-05, "loss": 0.6956, "step": 691 }, { "epoch": 0.05, "grad_norm": 1.5038912685688457, "learning_rate": 1.99706486284985e-05, "loss": 0.7015, "step": 692 }, { "epoch": 0.05, "grad_norm": 1.3650321756140882, "learning_rate": 1.9970455939950085e-05, "loss": 0.6381, "step": 693 }, { "epoch": 0.05, "grad_norm": 1.449135667419262, "learning_rate": 1.9970262621915348e-05, "loss": 0.6802, "step": 694 }, { "epoch": 0.05, "grad_norm": 1.511772371666896, "learning_rate": 1.9970068674406487e-05, "loss": 0.7023, "step": 695 }, { "epoch": 0.05, "grad_norm": 1.403623331865085, "learning_rate": 1.9969874097435754e-05, "loss": 0.6135, "step": 696 }, { "epoch": 0.05, "grad_norm": 1.6073687001143515, "learning_rate": 1.996967889101543e-05, "loss": 0.6853, "step": 697 }, { "epoch": 0.05, "grad_norm": 1.4207683910097149, "learning_rate": 1.9969483055157846e-05, "loss": 0.6793, "step": 698 }, { "epoch": 0.05, "grad_norm": 1.5201503404503816, "learning_rate": 1.9969286589875358e-05, "loss": 0.6931, "step": 699 }, { "epoch": 0.05, "grad_norm": 1.413567050526946, "learning_rate": 1.9969089495180372e-05, "loss": 0.7178, "step": 700 }, { "epoch": 0.05, "grad_norm": 1.458848571585027, "learning_rate": 1.9968891771085334e-05, "loss": 0.6818, "step": 701 }, { "epoch": 0.05, "grad_norm": 1.5337122369546061, "learning_rate": 1.996869341760272e-05, "loss": 0.765, "step": 702 }, { "epoch": 0.05, "grad_norm": 1.423220909214009, "learning_rate": 1.9968494434745065e-05, "loss": 0.6991, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.4333970769118678, "learning_rate": 1.9968294822524923e-05, "loss": 0.6886, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.4869486933144944, "learning_rate": 1.99680945809549e-05, "loss": 0.7294, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.447119989931308, "learning_rate": 1.9967893710047638e-05, "loss": 0.6627, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.54846110090175, "learning_rate": 1.9967692209815818e-05, "loss": 0.7537, "step": 707 }, { "epoch": 0.05, "grad_norm": 1.429511220935334, "learning_rate": 1.996749008027216e-05, "loss": 0.6193, "step": 708 }, { "epoch": 0.06, "grad_norm": 1.5018334310035786, "learning_rate": 1.9967287321429432e-05, "loss": 0.6831, "step": 709 }, { "epoch": 0.06, "grad_norm": 1.5922078918409035, "learning_rate": 1.996708393330043e-05, "loss": 0.7098, "step": 710 }, { "epoch": 0.06, "grad_norm": 1.4807718012098625, "learning_rate": 1.9966879915897996e-05, "loss": 0.6938, "step": 711 }, { "epoch": 0.06, "grad_norm": 1.4376444536688462, "learning_rate": 1.996667526923501e-05, "loss": 0.7138, "step": 712 }, { "epoch": 0.06, "grad_norm": 1.3909481830310941, "learning_rate": 1.996646999332439e-05, "loss": 0.662, "step": 713 }, { "epoch": 0.06, "grad_norm": 1.4544940365026182, "learning_rate": 1.9966264088179105e-05, "loss": 0.6851, "step": 714 }, { "epoch": 0.06, "grad_norm": 1.3572577288062038, "learning_rate": 1.9966057553812144e-05, "loss": 0.7007, "step": 715 }, { "epoch": 0.06, "grad_norm": 1.496013748778949, "learning_rate": 1.9965850390236554e-05, "loss": 0.6698, "step": 716 }, { "epoch": 0.06, "grad_norm": 1.488084995416177, "learning_rate": 1.9965642597465412e-05, "loss": 0.7788, "step": 717 }, { "epoch": 0.06, "grad_norm": 1.4330696166458783, "learning_rate": 1.9965434175511837e-05, "loss": 0.6799, "step": 718 }, { "epoch": 0.06, "grad_norm": 1.4924156453944606, "learning_rate": 1.9965225124388982e-05, "loss": 0.6159, "step": 719 }, { "epoch": 0.06, "grad_norm": 1.533901488044266, "learning_rate": 1.9965015444110058e-05, "loss": 0.6911, "step": 720 }, { "epoch": 0.06, "grad_norm": 1.5045640175797126, "learning_rate": 1.9964805134688294e-05, "loss": 0.6553, "step": 721 }, { "epoch": 0.06, "grad_norm": 1.365117451432009, "learning_rate": 1.996459419613697e-05, "loss": 0.6434, "step": 722 }, { "epoch": 0.06, "grad_norm": 1.48307486364579, "learning_rate": 1.9964382628469403e-05, "loss": 0.6738, "step": 723 }, { "epoch": 0.06, "grad_norm": 1.4180241513529275, "learning_rate": 1.9964170431698953e-05, "loss": 0.6418, "step": 724 }, { "epoch": 0.06, "grad_norm": 1.545127420942442, "learning_rate": 1.9963957605839014e-05, "loss": 0.7338, "step": 725 }, { "epoch": 0.06, "grad_norm": 1.4687238650330188, "learning_rate": 1.9963744150903026e-05, "loss": 0.6834, "step": 726 }, { "epoch": 0.06, "grad_norm": 1.47330215819562, "learning_rate": 1.996353006690446e-05, "loss": 0.7379, "step": 727 }, { "epoch": 0.06, "grad_norm": 1.319754126942464, "learning_rate": 1.996331535385684e-05, "loss": 0.6928, "step": 728 }, { "epoch": 0.06, "grad_norm": 1.3881803734601454, "learning_rate": 1.9963100011773716e-05, "loss": 0.6286, "step": 729 }, { "epoch": 0.06, "grad_norm": 1.5061512230106704, "learning_rate": 1.9962884040668686e-05, "loss": 0.6475, "step": 730 }, { "epoch": 0.06, "grad_norm": 1.4580290704034682, "learning_rate": 1.9962667440555383e-05, "loss": 0.6568, "step": 731 }, { "epoch": 0.06, "grad_norm": 1.5634361514433177, "learning_rate": 1.9962450211447485e-05, "loss": 0.7047, "step": 732 }, { "epoch": 0.06, "grad_norm": 1.478199450047416, "learning_rate": 1.9962232353358707e-05, "loss": 0.6478, "step": 733 }, { "epoch": 0.06, "grad_norm": 1.433620691695921, "learning_rate": 1.99620138663028e-05, "loss": 0.6053, "step": 734 }, { "epoch": 0.06, "grad_norm": 1.5141799172616108, "learning_rate": 1.9961794750293558e-05, "loss": 0.6871, "step": 735 }, { "epoch": 0.06, "grad_norm": 1.5598473729761482, "learning_rate": 1.9961575005344822e-05, "loss": 0.7024, "step": 736 }, { "epoch": 0.06, "grad_norm": 1.3240783076797853, "learning_rate": 1.996135463147046e-05, "loss": 0.6251, "step": 737 }, { "epoch": 0.06, "grad_norm": 1.350872767863367, "learning_rate": 1.9961133628684382e-05, "loss": 0.6219, "step": 738 }, { "epoch": 0.06, "grad_norm": 1.6001908546105366, "learning_rate": 1.996091199700055e-05, "loss": 0.7471, "step": 739 }, { "epoch": 0.06, "grad_norm": 1.3954985012974443, "learning_rate": 1.9960689736432952e-05, "loss": 0.6987, "step": 740 }, { "epoch": 0.06, "grad_norm": 1.412274109661438, "learning_rate": 1.996046684699562e-05, "loss": 0.672, "step": 741 }, { "epoch": 0.06, "grad_norm": 1.5066987504890756, "learning_rate": 1.9960243328702628e-05, "loss": 0.6547, "step": 742 }, { "epoch": 0.06, "grad_norm": 1.4124857083163054, "learning_rate": 1.9960019181568082e-05, "loss": 0.6698, "step": 743 }, { "epoch": 0.06, "grad_norm": 1.397114388863728, "learning_rate": 1.995979440560614e-05, "loss": 0.7203, "step": 744 }, { "epoch": 0.06, "grad_norm": 1.417821030501763, "learning_rate": 1.9959569000830993e-05, "loss": 0.7162, "step": 745 }, { "epoch": 0.06, "grad_norm": 1.409900420169347, "learning_rate": 1.995934296725687e-05, "loss": 0.6632, "step": 746 }, { "epoch": 0.06, "grad_norm": 1.5140070980868126, "learning_rate": 1.9959116304898045e-05, "loss": 0.7177, "step": 747 }, { "epoch": 0.06, "grad_norm": 1.5410523694278575, "learning_rate": 1.995888901376882e-05, "loss": 0.6075, "step": 748 }, { "epoch": 0.06, "grad_norm": 1.4903310841869246, "learning_rate": 1.9958661093883552e-05, "loss": 0.6364, "step": 749 }, { "epoch": 0.06, "grad_norm": 1.5639198718362435, "learning_rate": 1.995843254525663e-05, "loss": 0.7242, "step": 750 }, { "epoch": 0.06, "grad_norm": 1.4392599858767008, "learning_rate": 1.9958203367902482e-05, "loss": 0.657, "step": 751 }, { "epoch": 0.06, "grad_norm": 1.372316950558404, "learning_rate": 1.995797356183558e-05, "loss": 0.6053, "step": 752 }, { "epoch": 0.06, "grad_norm": 1.4731749574770556, "learning_rate": 1.9957743127070427e-05, "loss": 0.6608, "step": 753 }, { "epoch": 0.06, "grad_norm": 1.5437065581722909, "learning_rate": 1.995751206362158e-05, "loss": 0.6605, "step": 754 }, { "epoch": 0.06, "grad_norm": 1.4941770062972664, "learning_rate": 1.9957280371503617e-05, "loss": 0.7149, "step": 755 }, { "epoch": 0.06, "grad_norm": 1.5275310586871569, "learning_rate": 1.9957048050731175e-05, "loss": 0.6705, "step": 756 }, { "epoch": 0.06, "grad_norm": 1.3402932972687553, "learning_rate": 1.9956815101318916e-05, "loss": 0.6265, "step": 757 }, { "epoch": 0.06, "grad_norm": 1.4207912968358263, "learning_rate": 1.995658152328155e-05, "loss": 0.708, "step": 758 }, { "epoch": 0.06, "grad_norm": 1.4574596603595302, "learning_rate": 1.9956347316633824e-05, "loss": 0.6417, "step": 759 }, { "epoch": 0.06, "grad_norm": 1.3379912178481204, "learning_rate": 1.995611248139052e-05, "loss": 0.6392, "step": 760 }, { "epoch": 0.06, "grad_norm": 1.4350808561453696, "learning_rate": 1.995587701756647e-05, "loss": 0.6429, "step": 761 }, { "epoch": 0.06, "grad_norm": 1.4937766834892385, "learning_rate": 1.9955640925176543e-05, "loss": 0.7354, "step": 762 }, { "epoch": 0.06, "grad_norm": 1.457003268466653, "learning_rate": 1.995540420423564e-05, "loss": 0.6066, "step": 763 }, { "epoch": 0.06, "grad_norm": 1.5086050371945936, "learning_rate": 1.99551668547587e-05, "loss": 0.7269, "step": 764 }, { "epoch": 0.06, "grad_norm": 1.578153931003182, "learning_rate": 1.9954928876760718e-05, "loss": 0.7003, "step": 765 }, { "epoch": 0.06, "grad_norm": 1.5626557844703117, "learning_rate": 1.9954690270256717e-05, "loss": 0.741, "step": 766 }, { "epoch": 0.06, "grad_norm": 1.3792139417876308, "learning_rate": 1.9954451035261754e-05, "loss": 0.6957, "step": 767 }, { "epoch": 0.06, "grad_norm": 1.4373695480199493, "learning_rate": 1.9954211171790946e-05, "loss": 0.6691, "step": 768 }, { "epoch": 0.06, "grad_norm": 1.5044349434688922, "learning_rate": 1.9953970679859425e-05, "loss": 0.7092, "step": 769 }, { "epoch": 0.06, "grad_norm": 1.4607424682339447, "learning_rate": 1.9953729559482383e-05, "loss": 0.6683, "step": 770 }, { "epoch": 0.06, "grad_norm": 1.4423507449253523, "learning_rate": 1.9953487810675036e-05, "loss": 0.7058, "step": 771 }, { "epoch": 0.06, "grad_norm": 1.2487904895866315, "learning_rate": 1.995324543345265e-05, "loss": 0.6222, "step": 772 }, { "epoch": 0.06, "grad_norm": 1.401285308678884, "learning_rate": 1.995300242783053e-05, "loss": 0.6654, "step": 773 }, { "epoch": 0.06, "grad_norm": 1.3711359609860783, "learning_rate": 1.9952758793824016e-05, "loss": 0.6917, "step": 774 }, { "epoch": 0.06, "grad_norm": 1.5597283767544046, "learning_rate": 1.995251453144849e-05, "loss": 0.7009, "step": 775 }, { "epoch": 0.06, "grad_norm": 1.412779674017897, "learning_rate": 1.995226964071937e-05, "loss": 0.7018, "step": 776 }, { "epoch": 0.06, "grad_norm": 1.3907166782976859, "learning_rate": 1.9952024121652122e-05, "loss": 0.6661, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.3705489769068018, "learning_rate": 1.9951777974262247e-05, "loss": 0.7182, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.4892289956228386, "learning_rate": 1.9951531198565287e-05, "loss": 0.7256, "step": 779 }, { "epoch": 0.06, "grad_norm": 1.4639856698997271, "learning_rate": 1.9951283794576814e-05, "loss": 0.6965, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.421904008171216, "learning_rate": 1.9951035762312453e-05, "loss": 0.6684, "step": 781 }, { "epoch": 0.06, "grad_norm": 1.4630548973187436, "learning_rate": 1.995078710178787e-05, "loss": 0.7054, "step": 782 }, { "epoch": 0.06, "grad_norm": 1.4092327274484135, "learning_rate": 1.9950537813018753e-05, "loss": 0.6435, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.5645408272419137, "learning_rate": 1.9950287896020846e-05, "loss": 0.6852, "step": 784 }, { "epoch": 0.06, "grad_norm": 1.4678855949590572, "learning_rate": 1.995003735080993e-05, "loss": 0.7105, "step": 785 }, { "epoch": 0.06, "grad_norm": 1.5484468584903797, "learning_rate": 1.9949786177401816e-05, "loss": 0.7102, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.4400869394919489, "learning_rate": 1.994953437581237e-05, "loss": 0.6719, "step": 787 }, { "epoch": 0.06, "grad_norm": 1.5901981493001038, "learning_rate": 1.9949281946057482e-05, "loss": 0.724, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.4651656547342995, "learning_rate": 1.99490288881531e-05, "loss": 0.6788, "step": 789 }, { "epoch": 0.06, "grad_norm": 1.3357217179420204, "learning_rate": 1.994877520211519e-05, "loss": 0.7071, "step": 790 }, { "epoch": 0.06, "grad_norm": 1.4526680668886418, "learning_rate": 1.9948520887959772e-05, "loss": 0.6997, "step": 791 }, { "epoch": 0.06, "grad_norm": 1.4834590294261867, "learning_rate": 1.9948265945702905e-05, "loss": 0.6164, "step": 792 }, { "epoch": 0.06, "grad_norm": 1.429815151164952, "learning_rate": 1.994801037536068e-05, "loss": 0.7065, "step": 793 }, { "epoch": 0.06, "grad_norm": 1.4708895419495984, "learning_rate": 1.9947754176949238e-05, "loss": 0.7069, "step": 794 }, { "epoch": 0.06, "grad_norm": 1.3637496449638764, "learning_rate": 1.9947497350484748e-05, "loss": 0.6075, "step": 795 }, { "epoch": 0.06, "grad_norm": 1.487163694816019, "learning_rate": 1.994723989598343e-05, "loss": 0.6359, "step": 796 }, { "epoch": 0.06, "grad_norm": 1.314120518404876, "learning_rate": 1.9946981813461534e-05, "loss": 0.6578, "step": 797 }, { "epoch": 0.06, "grad_norm": 1.4543325677508736, "learning_rate": 1.994672310293536e-05, "loss": 0.7004, "step": 798 }, { "epoch": 0.06, "grad_norm": 1.4491741656825412, "learning_rate": 1.9946463764421236e-05, "loss": 0.7365, "step": 799 }, { "epoch": 0.06, "grad_norm": 1.5457196014736971, "learning_rate": 1.994620379793554e-05, "loss": 0.7442, "step": 800 }, { "epoch": 0.06, "grad_norm": 1.5056932013786395, "learning_rate": 1.9945943203494677e-05, "loss": 0.6771, "step": 801 }, { "epoch": 0.06, "grad_norm": 1.4700297198243129, "learning_rate": 1.994568198111511e-05, "loss": 0.5979, "step": 802 }, { "epoch": 0.06, "grad_norm": 1.419040999628735, "learning_rate": 1.9945420130813327e-05, "loss": 0.7499, "step": 803 }, { "epoch": 0.06, "grad_norm": 1.4389450439499267, "learning_rate": 1.9945157652605854e-05, "loss": 0.6901, "step": 804 }, { "epoch": 0.06, "grad_norm": 1.480339024479065, "learning_rate": 1.9944894546509276e-05, "loss": 0.7266, "step": 805 }, { "epoch": 0.06, "grad_norm": 1.4854144823540691, "learning_rate": 1.9944630812540188e-05, "loss": 0.6809, "step": 806 }, { "epoch": 0.06, "grad_norm": 1.62295579712116, "learning_rate": 1.9944366450715256e-05, "loss": 0.695, "step": 807 }, { "epoch": 0.06, "grad_norm": 1.3998409453367233, "learning_rate": 1.994410146105116e-05, "loss": 0.6088, "step": 808 }, { "epoch": 0.06, "grad_norm": 1.3946552381773603, "learning_rate": 1.9943835843564635e-05, "loss": 0.6593, "step": 809 }, { "epoch": 0.06, "grad_norm": 1.3762848226522915, "learning_rate": 1.994356959827245e-05, "loss": 0.7106, "step": 810 }, { "epoch": 0.06, "grad_norm": 1.3314331591335313, "learning_rate": 1.9943302725191416e-05, "loss": 0.6667, "step": 811 }, { "epoch": 0.06, "grad_norm": 1.543037432337072, "learning_rate": 1.9943035224338375e-05, "loss": 0.7705, "step": 812 }, { "epoch": 0.06, "grad_norm": 1.315013884917083, "learning_rate": 1.9942767095730225e-05, "loss": 0.6175, "step": 813 }, { "epoch": 0.06, "grad_norm": 1.404428489151962, "learning_rate": 1.994249833938389e-05, "loss": 0.69, "step": 814 }, { "epoch": 0.06, "grad_norm": 1.3887899914605013, "learning_rate": 1.9942228955316342e-05, "loss": 0.6877, "step": 815 }, { "epoch": 0.06, "grad_norm": 1.5660911490464346, "learning_rate": 1.994195894354458e-05, "loss": 0.7207, "step": 816 }, { "epoch": 0.06, "grad_norm": 1.315405717275919, "learning_rate": 1.9941688304085654e-05, "loss": 0.6147, "step": 817 }, { "epoch": 0.06, "grad_norm": 1.606085517269158, "learning_rate": 1.994141703695666e-05, "loss": 0.6923, "step": 818 }, { "epoch": 0.06, "grad_norm": 1.495239492007301, "learning_rate": 1.994114514217471e-05, "loss": 0.6948, "step": 819 }, { "epoch": 0.06, "grad_norm": 1.4803970037704692, "learning_rate": 1.994087261975698e-05, "loss": 0.7193, "step": 820 }, { "epoch": 0.06, "grad_norm": 1.4623073744465764, "learning_rate": 1.9940599469720675e-05, "loss": 0.6743, "step": 821 }, { "epoch": 0.06, "grad_norm": 1.3700775118988122, "learning_rate": 1.994032569208304e-05, "loss": 0.6744, "step": 822 }, { "epoch": 0.06, "grad_norm": 1.4744522920524408, "learning_rate": 1.9940051286861357e-05, "loss": 0.6349, "step": 823 }, { "epoch": 0.06, "grad_norm": 1.3809143228133267, "learning_rate": 1.993977625407295e-05, "loss": 0.7152, "step": 824 }, { "epoch": 0.06, "grad_norm": 1.3811249861954153, "learning_rate": 1.993950059373519e-05, "loss": 0.6776, "step": 825 }, { "epoch": 0.06, "grad_norm": 1.387332447348377, "learning_rate": 1.9939224305865474e-05, "loss": 0.6825, "step": 826 }, { "epoch": 0.06, "grad_norm": 1.4376772895619236, "learning_rate": 1.993894739048125e-05, "loss": 0.7792, "step": 827 }, { "epoch": 0.06, "grad_norm": 1.4289809185059477, "learning_rate": 1.9938669847599996e-05, "loss": 0.7142, "step": 828 }, { "epoch": 0.06, "grad_norm": 1.429826656719188, "learning_rate": 1.993839167723924e-05, "loss": 0.7495, "step": 829 }, { "epoch": 0.06, "grad_norm": 1.4351355138452868, "learning_rate": 1.993811287941654e-05, "loss": 0.698, "step": 830 }, { "epoch": 0.06, "grad_norm": 1.6273453000519578, "learning_rate": 1.9937833454149503e-05, "loss": 0.6459, "step": 831 }, { "epoch": 0.06, "grad_norm": 1.4846628010469245, "learning_rate": 1.9937553401455763e-05, "loss": 0.6972, "step": 832 }, { "epoch": 0.06, "grad_norm": 1.3958818749864583, "learning_rate": 1.993727272135301e-05, "loss": 0.6921, "step": 833 }, { "epoch": 0.06, "grad_norm": 1.493055560484105, "learning_rate": 1.993699141385896e-05, "loss": 0.7243, "step": 834 }, { "epoch": 0.06, "grad_norm": 1.5428751856768164, "learning_rate": 1.9936709478991368e-05, "loss": 0.6991, "step": 835 }, { "epoch": 0.06, "grad_norm": 1.433996824721222, "learning_rate": 1.9936426916768047e-05, "loss": 0.6577, "step": 836 }, { "epoch": 0.06, "grad_norm": 1.406986721759708, "learning_rate": 1.9936143727206826e-05, "loss": 0.6756, "step": 837 }, { "epoch": 0.07, "grad_norm": 1.4491021862605051, "learning_rate": 1.993585991032559e-05, "loss": 0.6922, "step": 838 }, { "epoch": 0.07, "grad_norm": 1.3861849334966856, "learning_rate": 1.9935575466142256e-05, "loss": 0.6396, "step": 839 }, { "epoch": 0.07, "grad_norm": 1.4831036393419366, "learning_rate": 1.993529039467478e-05, "loss": 0.7079, "step": 840 }, { "epoch": 0.07, "grad_norm": 1.262685069125964, "learning_rate": 1.9935004695941164e-05, "loss": 0.6484, "step": 841 }, { "epoch": 0.07, "grad_norm": 1.4497742937620992, "learning_rate": 1.9934718369959438e-05, "loss": 0.7075, "step": 842 }, { "epoch": 0.07, "grad_norm": 1.375661474123054, "learning_rate": 1.993443141674769e-05, "loss": 0.6859, "step": 843 }, { "epoch": 0.07, "grad_norm": 1.5053634915575964, "learning_rate": 1.9934143836324032e-05, "loss": 0.6376, "step": 844 }, { "epoch": 0.07, "grad_norm": 1.4212393545055704, "learning_rate": 1.9933855628706616e-05, "loss": 0.7012, "step": 845 }, { "epoch": 0.07, "grad_norm": 1.4160093951910373, "learning_rate": 1.9933566793913646e-05, "loss": 0.6748, "step": 846 }, { "epoch": 0.07, "grad_norm": 1.441844015660788, "learning_rate": 1.9933277331963354e-05, "loss": 0.7426, "step": 847 }, { "epoch": 0.07, "grad_norm": 1.4391726009568306, "learning_rate": 1.9932987242874014e-05, "loss": 0.6829, "step": 848 }, { "epoch": 0.07, "grad_norm": 1.4962067324852635, "learning_rate": 1.993269652666394e-05, "loss": 0.6847, "step": 849 }, { "epoch": 0.07, "grad_norm": 1.5302027799827125, "learning_rate": 1.9932405183351492e-05, "loss": 0.7161, "step": 850 }, { "epoch": 0.07, "grad_norm": 1.3747106594485758, "learning_rate": 1.993211321295506e-05, "loss": 0.6486, "step": 851 }, { "epoch": 0.07, "grad_norm": 1.3602210295105055, "learning_rate": 1.9931820615493075e-05, "loss": 0.706, "step": 852 }, { "epoch": 0.07, "grad_norm": 1.5689407354264588, "learning_rate": 1.9931527390984016e-05, "loss": 0.7051, "step": 853 }, { "epoch": 0.07, "grad_norm": 1.4528060891282195, "learning_rate": 1.993123353944639e-05, "loss": 0.7376, "step": 854 }, { "epoch": 0.07, "grad_norm": 1.585080897315511, "learning_rate": 1.9930939060898754e-05, "loss": 0.6934, "step": 855 }, { "epoch": 0.07, "grad_norm": 1.3884297735294173, "learning_rate": 1.99306439553597e-05, "loss": 0.6519, "step": 856 }, { "epoch": 0.07, "grad_norm": 1.270608063832158, "learning_rate": 1.9930348222847856e-05, "loss": 0.6779, "step": 857 }, { "epoch": 0.07, "grad_norm": 1.2991058740998236, "learning_rate": 1.9930051863381893e-05, "loss": 0.6505, "step": 858 }, { "epoch": 0.07, "grad_norm": 1.3009620976098495, "learning_rate": 1.9929754876980523e-05, "loss": 0.5922, "step": 859 }, { "epoch": 0.07, "grad_norm": 1.3634349222659414, "learning_rate": 1.99294572636625e-05, "loss": 0.6267, "step": 860 }, { "epoch": 0.07, "grad_norm": 1.3665605688005118, "learning_rate": 1.992915902344661e-05, "loss": 0.6439, "step": 861 }, { "epoch": 0.07, "grad_norm": 1.4513939832550313, "learning_rate": 1.9928860156351683e-05, "loss": 0.7101, "step": 862 }, { "epoch": 0.07, "grad_norm": 1.3809437597840335, "learning_rate": 1.9928560662396585e-05, "loss": 0.6228, "step": 863 }, { "epoch": 0.07, "grad_norm": 1.5473122797487913, "learning_rate": 1.9928260541600233e-05, "loss": 0.6886, "step": 864 }, { "epoch": 0.07, "grad_norm": 1.5049679662140467, "learning_rate": 1.9927959793981567e-05, "loss": 0.7393, "step": 865 }, { "epoch": 0.07, "grad_norm": 1.4529344167118305, "learning_rate": 1.9927658419559577e-05, "loss": 0.6844, "step": 866 }, { "epoch": 0.07, "grad_norm": 1.490682991867091, "learning_rate": 1.992735641835329e-05, "loss": 0.7413, "step": 867 }, { "epoch": 0.07, "grad_norm": 1.5769126692244164, "learning_rate": 1.9927053790381777e-05, "loss": 0.7244, "step": 868 }, { "epoch": 0.07, "grad_norm": 1.6577558329799882, "learning_rate": 1.9926750535664138e-05, "loss": 0.6856, "step": 869 }, { "epoch": 0.07, "grad_norm": 1.474557554837615, "learning_rate": 1.9926446654219526e-05, "loss": 0.6884, "step": 870 }, { "epoch": 0.07, "grad_norm": 1.3771862941846595, "learning_rate": 1.9926142146067122e-05, "loss": 0.6543, "step": 871 }, { "epoch": 0.07, "grad_norm": 1.4503339810333056, "learning_rate": 1.9925837011226154e-05, "loss": 0.6353, "step": 872 }, { "epoch": 0.07, "grad_norm": 1.4502429890100663, "learning_rate": 1.9925531249715883e-05, "loss": 0.7419, "step": 873 }, { "epoch": 0.07, "grad_norm": 1.4198792253094388, "learning_rate": 1.9925224861555614e-05, "loss": 0.694, "step": 874 }, { "epoch": 0.07, "grad_norm": 1.459053742624924, "learning_rate": 1.992491784676469e-05, "loss": 0.6742, "step": 875 }, { "epoch": 0.07, "grad_norm": 1.4462206487553946, "learning_rate": 1.99246102053625e-05, "loss": 0.6917, "step": 876 }, { "epoch": 0.07, "grad_norm": 1.4885586857962874, "learning_rate": 1.9924301937368463e-05, "loss": 0.7073, "step": 877 }, { "epoch": 0.07, "grad_norm": 1.514222981202289, "learning_rate": 1.992399304280204e-05, "loss": 0.7174, "step": 878 }, { "epoch": 0.07, "grad_norm": 1.572562129335279, "learning_rate": 1.992368352168274e-05, "loss": 0.7322, "step": 879 }, { "epoch": 0.07, "grad_norm": 1.4745071880984189, "learning_rate": 1.9923373374030098e-05, "loss": 0.6473, "step": 880 }, { "epoch": 0.07, "grad_norm": 1.4927841511558657, "learning_rate": 1.9923062599863694e-05, "loss": 0.7284, "step": 881 }, { "epoch": 0.07, "grad_norm": 1.4004572768689971, "learning_rate": 1.9922751199203153e-05, "loss": 0.694, "step": 882 }, { "epoch": 0.07, "grad_norm": 1.3811435865604569, "learning_rate": 1.9922439172068134e-05, "loss": 0.6778, "step": 883 }, { "epoch": 0.07, "grad_norm": 1.6417994337404593, "learning_rate": 1.9922126518478338e-05, "loss": 0.7517, "step": 884 }, { "epoch": 0.07, "grad_norm": 1.3503645510662545, "learning_rate": 1.9921813238453505e-05, "loss": 0.6337, "step": 885 }, { "epoch": 0.07, "grad_norm": 1.4140744287820188, "learning_rate": 1.992149933201341e-05, "loss": 0.6708, "step": 886 }, { "epoch": 0.07, "grad_norm": 1.3250569079432117, "learning_rate": 1.9921184799177874e-05, "loss": 0.5896, "step": 887 }, { "epoch": 0.07, "grad_norm": 1.6496578237440829, "learning_rate": 1.9920869639966754e-05, "loss": 0.7237, "step": 888 }, { "epoch": 0.07, "grad_norm": 1.530620211885439, "learning_rate": 1.9920553854399954e-05, "loss": 0.6639, "step": 889 }, { "epoch": 0.07, "grad_norm": 1.3179543106020766, "learning_rate": 1.9920237442497402e-05, "loss": 0.6178, "step": 890 }, { "epoch": 0.07, "grad_norm": 1.386334433155643, "learning_rate": 1.9919920404279078e-05, "loss": 0.6786, "step": 891 }, { "epoch": 0.07, "grad_norm": 1.4601150890608667, "learning_rate": 1.9919602739765e-05, "loss": 0.679, "step": 892 }, { "epoch": 0.07, "grad_norm": 1.4218650230644563, "learning_rate": 1.991928444897522e-05, "loss": 0.6331, "step": 893 }, { "epoch": 0.07, "grad_norm": 1.5476835045299449, "learning_rate": 1.991896553192984e-05, "loss": 0.663, "step": 894 }, { "epoch": 0.07, "grad_norm": 1.3503831778782487, "learning_rate": 1.9918645988648988e-05, "loss": 0.6662, "step": 895 }, { "epoch": 0.07, "grad_norm": 1.488689696966759, "learning_rate": 1.991832581915284e-05, "loss": 0.7097, "step": 896 }, { "epoch": 0.07, "grad_norm": 1.5004022376840858, "learning_rate": 1.991800502346162e-05, "loss": 0.6784, "step": 897 }, { "epoch": 0.07, "grad_norm": 1.5633060664962397, "learning_rate": 1.9917683601595563e-05, "loss": 0.7051, "step": 898 }, { "epoch": 0.07, "grad_norm": 1.4859099633265669, "learning_rate": 1.9917361553574974e-05, "loss": 0.7132, "step": 899 }, { "epoch": 0.07, "grad_norm": 1.2765667619949221, "learning_rate": 1.9917038879420184e-05, "loss": 0.6212, "step": 900 }, { "epoch": 0.07, "grad_norm": 1.4555703373919575, "learning_rate": 1.9916715579151567e-05, "loss": 0.7504, "step": 901 }, { "epoch": 0.07, "grad_norm": 1.4165845267474468, "learning_rate": 1.9916391652789526e-05, "loss": 0.659, "step": 902 }, { "epoch": 0.07, "grad_norm": 1.4645597055337136, "learning_rate": 1.9916067100354523e-05, "loss": 0.7273, "step": 903 }, { "epoch": 0.07, "grad_norm": 1.3920263023686752, "learning_rate": 1.9915741921867043e-05, "loss": 0.6225, "step": 904 }, { "epoch": 0.07, "grad_norm": 1.4166586632595999, "learning_rate": 1.9915416117347615e-05, "loss": 0.6823, "step": 905 }, { "epoch": 0.07, "grad_norm": 1.3831162630917924, "learning_rate": 1.9915089686816813e-05, "loss": 0.7372, "step": 906 }, { "epoch": 0.07, "grad_norm": 1.4050313119351863, "learning_rate": 1.991476263029524e-05, "loss": 0.6783, "step": 907 }, { "epoch": 0.07, "grad_norm": 1.4914840717903826, "learning_rate": 1.9914434947803552e-05, "loss": 0.6823, "step": 908 }, { "epoch": 0.07, "grad_norm": 1.487423066145206, "learning_rate": 1.9914106639362437e-05, "loss": 0.7083, "step": 909 }, { "epoch": 0.07, "grad_norm": 1.5068998589001918, "learning_rate": 1.9913777704992613e-05, "loss": 0.7593, "step": 910 }, { "epoch": 0.07, "grad_norm": 1.3678746267863946, "learning_rate": 1.991344814471486e-05, "loss": 0.6488, "step": 911 }, { "epoch": 0.07, "grad_norm": 1.3588297397128217, "learning_rate": 1.9913117958549975e-05, "loss": 0.651, "step": 912 }, { "epoch": 0.07, "grad_norm": 1.40965020988121, "learning_rate": 1.9912787146518812e-05, "loss": 0.6457, "step": 913 }, { "epoch": 0.07, "grad_norm": 1.4230327713356739, "learning_rate": 1.991245570864225e-05, "loss": 0.6117, "step": 914 }, { "epoch": 0.07, "grad_norm": 1.3945193250130232, "learning_rate": 1.9912123644941218e-05, "loss": 0.6508, "step": 915 }, { "epoch": 0.07, "grad_norm": 1.493799826457363, "learning_rate": 1.9911790955436682e-05, "loss": 0.7071, "step": 916 }, { "epoch": 0.07, "grad_norm": 1.4551092566929722, "learning_rate": 1.9911457640149642e-05, "loss": 0.6186, "step": 917 }, { "epoch": 0.07, "grad_norm": 1.4483474621158612, "learning_rate": 1.991112369910115e-05, "loss": 0.652, "step": 918 }, { "epoch": 0.07, "grad_norm": 1.3635141780327364, "learning_rate": 1.9910789132312278e-05, "loss": 0.6612, "step": 919 }, { "epoch": 0.07, "grad_norm": 1.4896489464596532, "learning_rate": 1.9910453939804156e-05, "loss": 0.6837, "step": 920 }, { "epoch": 0.07, "grad_norm": 1.4126587955708032, "learning_rate": 1.991011812159795e-05, "loss": 0.6654, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.8112980375902783, "learning_rate": 1.9909781677714854e-05, "loss": 0.6903, "step": 922 }, { "epoch": 0.07, "grad_norm": 1.541463383168267, "learning_rate": 1.9909444608176117e-05, "loss": 0.6427, "step": 923 }, { "epoch": 0.07, "grad_norm": 1.3863376577333875, "learning_rate": 1.9909106913003013e-05, "loss": 0.6712, "step": 924 }, { "epoch": 0.07, "grad_norm": 1.5331110641150918, "learning_rate": 1.9908768592216862e-05, "loss": 0.7042, "step": 925 }, { "epoch": 0.07, "grad_norm": 1.4753000827000902, "learning_rate": 1.990842964583903e-05, "loss": 0.6313, "step": 926 }, { "epoch": 0.07, "grad_norm": 1.440826382080631, "learning_rate": 1.9908090073890915e-05, "loss": 0.7069, "step": 927 }, { "epoch": 0.07, "grad_norm": 1.3562332328076356, "learning_rate": 1.9907749876393955e-05, "loss": 0.654, "step": 928 }, { "epoch": 0.07, "grad_norm": 1.4162225401085708, "learning_rate": 1.9907409053369625e-05, "loss": 0.6433, "step": 929 }, { "epoch": 0.07, "grad_norm": 1.3852365228465156, "learning_rate": 1.990706760483945e-05, "loss": 0.6739, "step": 930 }, { "epoch": 0.07, "grad_norm": 1.4268325900953065, "learning_rate": 1.990672553082498e-05, "loss": 0.7006, "step": 931 }, { "epoch": 0.07, "grad_norm": 1.4229980895993992, "learning_rate": 1.9906382831347815e-05, "loss": 0.7259, "step": 932 }, { "epoch": 0.07, "grad_norm": 1.429748533853472, "learning_rate": 1.9906039506429594e-05, "loss": 0.7383, "step": 933 }, { "epoch": 0.07, "grad_norm": 1.3603332036320621, "learning_rate": 1.990569555609199e-05, "loss": 0.6462, "step": 934 }, { "epoch": 0.07, "grad_norm": 1.5254504867315368, "learning_rate": 1.9905350980356717e-05, "loss": 0.7234, "step": 935 }, { "epoch": 0.07, "grad_norm": 1.3442592543002676, "learning_rate": 1.990500577924553e-05, "loss": 0.6751, "step": 936 }, { "epoch": 0.07, "grad_norm": 1.3598834435219844, "learning_rate": 1.990465995278023e-05, "loss": 0.7251, "step": 937 }, { "epoch": 0.07, "grad_norm": 1.3867053823767008, "learning_rate": 1.9904313500982645e-05, "loss": 0.7055, "step": 938 }, { "epoch": 0.07, "grad_norm": 1.2992096992952753, "learning_rate": 1.9903966423874648e-05, "loss": 0.6868, "step": 939 }, { "epoch": 0.07, "grad_norm": 1.4236460963139852, "learning_rate": 1.9903618721478154e-05, "loss": 0.6872, "step": 940 }, { "epoch": 0.07, "grad_norm": 1.3349318706972957, "learning_rate": 1.9903270393815112e-05, "loss": 0.6536, "step": 941 }, { "epoch": 0.07, "grad_norm": 1.5064617688746478, "learning_rate": 1.9902921440907517e-05, "loss": 0.7324, "step": 942 }, { "epoch": 0.07, "grad_norm": 1.4943537940256149, "learning_rate": 1.99025718627774e-05, "loss": 0.7533, "step": 943 }, { "epoch": 0.07, "grad_norm": 1.326285490806938, "learning_rate": 1.990222165944683e-05, "loss": 0.6096, "step": 944 }, { "epoch": 0.07, "grad_norm": 1.4308625275631561, "learning_rate": 1.9901870830937916e-05, "loss": 0.7198, "step": 945 }, { "epoch": 0.07, "grad_norm": 1.4860911036221065, "learning_rate": 1.9901519377272813e-05, "loss": 0.7275, "step": 946 }, { "epoch": 0.07, "grad_norm": 1.3758716421418185, "learning_rate": 1.9901167298473704e-05, "loss": 0.6426, "step": 947 }, { "epoch": 0.07, "grad_norm": 1.3939826377734255, "learning_rate": 1.990081459456282e-05, "loss": 0.6446, "step": 948 }, { "epoch": 0.07, "grad_norm": 1.4762685170251857, "learning_rate": 1.990046126556243e-05, "loss": 0.6721, "step": 949 }, { "epoch": 0.07, "grad_norm": 1.316357122121463, "learning_rate": 1.9900107311494844e-05, "loss": 0.6468, "step": 950 }, { "epoch": 0.07, "grad_norm": 1.4429846084789009, "learning_rate": 1.98997527323824e-05, "loss": 0.7011, "step": 951 }, { "epoch": 0.07, "grad_norm": 1.4064551733598605, "learning_rate": 1.9899397528247496e-05, "loss": 0.687, "step": 952 }, { "epoch": 0.07, "grad_norm": 1.4627218518349863, "learning_rate": 1.9899041699112547e-05, "loss": 0.6897, "step": 953 }, { "epoch": 0.07, "grad_norm": 1.3036922382056675, "learning_rate": 1.9898685245000023e-05, "loss": 0.6743, "step": 954 }, { "epoch": 0.07, "grad_norm": 1.487256195219612, "learning_rate": 1.9898328165932432e-05, "loss": 0.6504, "step": 955 }, { "epoch": 0.07, "grad_norm": 1.488864574107309, "learning_rate": 1.989797046193231e-05, "loss": 0.6602, "step": 956 }, { "epoch": 0.07, "grad_norm": 1.4369606996708997, "learning_rate": 1.9897612133022252e-05, "loss": 0.6301, "step": 957 }, { "epoch": 0.07, "grad_norm": 1.3805988934778417, "learning_rate": 1.9897253179224872e-05, "loss": 0.6489, "step": 958 }, { "epoch": 0.07, "grad_norm": 1.24645412101121, "learning_rate": 1.9896893600562836e-05, "loss": 0.6412, "step": 959 }, { "epoch": 0.07, "grad_norm": 1.5830254339029253, "learning_rate": 1.9896533397058846e-05, "loss": 0.7216, "step": 960 }, { "epoch": 0.07, "grad_norm": 1.4225778205113302, "learning_rate": 1.9896172568735642e-05, "loss": 0.6761, "step": 961 }, { "epoch": 0.07, "grad_norm": 1.425753825720842, "learning_rate": 1.989581111561601e-05, "loss": 0.6642, "step": 962 }, { "epoch": 0.07, "grad_norm": 1.3112592736453563, "learning_rate": 1.989544903772276e-05, "loss": 0.6485, "step": 963 }, { "epoch": 0.07, "grad_norm": 1.3623783652231403, "learning_rate": 1.9895086335078767e-05, "loss": 0.6011, "step": 964 }, { "epoch": 0.07, "grad_norm": 1.446612129472585, "learning_rate": 1.9894723007706916e-05, "loss": 0.7153, "step": 965 }, { "epoch": 0.07, "grad_norm": 1.330918549301755, "learning_rate": 1.989435905563015e-05, "loss": 0.679, "step": 966 }, { "epoch": 0.08, "grad_norm": 1.3840427421904582, "learning_rate": 1.9893994478871456e-05, "loss": 0.6383, "step": 967 }, { "epoch": 0.08, "grad_norm": 1.3659906434678413, "learning_rate": 1.989362927745384e-05, "loss": 0.7313, "step": 968 }, { "epoch": 0.08, "grad_norm": 1.4293000576987829, "learning_rate": 1.989326345140036e-05, "loss": 0.6224, "step": 969 }, { "epoch": 0.08, "grad_norm": 1.3549320796539817, "learning_rate": 1.9892897000734122e-05, "loss": 0.6781, "step": 970 }, { "epoch": 0.08, "grad_norm": 1.7309566869049375, "learning_rate": 1.989252992547825e-05, "loss": 0.5915, "step": 971 }, { "epoch": 0.08, "grad_norm": 1.471031932372707, "learning_rate": 1.989216222565593e-05, "loss": 0.6853, "step": 972 }, { "epoch": 0.08, "grad_norm": 1.29330896240718, "learning_rate": 1.9891793901290367e-05, "loss": 0.5932, "step": 973 }, { "epoch": 0.08, "grad_norm": 1.39955222598446, "learning_rate": 1.9891424952404825e-05, "loss": 0.6759, "step": 974 }, { "epoch": 0.08, "grad_norm": 1.3091365314821897, "learning_rate": 1.989105537902259e-05, "loss": 0.6615, "step": 975 }, { "epoch": 0.08, "grad_norm": 1.3550494862067861, "learning_rate": 1.9890685181166996e-05, "loss": 0.6901, "step": 976 }, { "epoch": 0.08, "grad_norm": 1.4487513690774159, "learning_rate": 1.989031435886142e-05, "loss": 0.6796, "step": 977 }, { "epoch": 0.08, "grad_norm": 1.5008200152433027, "learning_rate": 1.988994291212927e-05, "loss": 0.6849, "step": 978 }, { "epoch": 0.08, "grad_norm": 1.474959214503317, "learning_rate": 1.9889570840994e-05, "loss": 0.6223, "step": 979 }, { "epoch": 0.08, "grad_norm": 1.4913596208415856, "learning_rate": 1.98891981454791e-05, "loss": 0.7232, "step": 980 }, { "epoch": 0.08, "grad_norm": 1.2386380232194563, "learning_rate": 1.9888824825608097e-05, "loss": 0.6471, "step": 981 }, { "epoch": 0.08, "grad_norm": 1.5064282165451475, "learning_rate": 1.9888450881404563e-05, "loss": 0.7048, "step": 982 }, { "epoch": 0.08, "grad_norm": 1.4884845264413473, "learning_rate": 1.988807631289211e-05, "loss": 0.7184, "step": 983 }, { "epoch": 0.08, "grad_norm": 1.3160290729563668, "learning_rate": 1.988770112009438e-05, "loss": 0.6145, "step": 984 }, { "epoch": 0.08, "grad_norm": 1.4277086581772478, "learning_rate": 1.9887325303035063e-05, "loss": 0.7313, "step": 985 }, { "epoch": 0.08, "grad_norm": 1.3675087687544234, "learning_rate": 1.9886948861737892e-05, "loss": 0.6608, "step": 986 }, { "epoch": 0.08, "grad_norm": 1.531677342164226, "learning_rate": 1.988657179622663e-05, "loss": 0.7273, "step": 987 }, { "epoch": 0.08, "grad_norm": 1.4073944097587059, "learning_rate": 1.9886194106525082e-05, "loss": 0.7083, "step": 988 }, { "epoch": 0.08, "grad_norm": 1.3219467522376414, "learning_rate": 1.988581579265709e-05, "loss": 0.6774, "step": 989 }, { "epoch": 0.08, "grad_norm": 1.4440426583527428, "learning_rate": 1.9885436854646546e-05, "loss": 0.6863, "step": 990 }, { "epoch": 0.08, "grad_norm": 1.439198527083775, "learning_rate": 1.9885057292517373e-05, "loss": 0.6843, "step": 991 }, { "epoch": 0.08, "grad_norm": 1.2199872901129378, "learning_rate": 1.9884677106293528e-05, "loss": 0.643, "step": 992 }, { "epoch": 0.08, "grad_norm": 1.3182833275230958, "learning_rate": 1.9884296295999022e-05, "loss": 0.6841, "step": 993 }, { "epoch": 0.08, "grad_norm": 1.3048183809522353, "learning_rate": 1.9883914861657895e-05, "loss": 0.6242, "step": 994 }, { "epoch": 0.08, "grad_norm": 1.4444066688700568, "learning_rate": 1.988353280329423e-05, "loss": 0.7121, "step": 995 }, { "epoch": 0.08, "grad_norm": 1.382637012790591, "learning_rate": 1.9883150120932144e-05, "loss": 0.6451, "step": 996 }, { "epoch": 0.08, "grad_norm": 1.3578731966664257, "learning_rate": 1.98827668145958e-05, "loss": 0.6442, "step": 997 }, { "epoch": 0.08, "grad_norm": 1.3944573476320203, "learning_rate": 1.9882382884309402e-05, "loss": 0.7148, "step": 998 }, { "epoch": 0.08, "grad_norm": 1.3060491161019383, "learning_rate": 1.9881998330097184e-05, "loss": 0.6531, "step": 999 }, { "epoch": 0.08, "grad_norm": 1.4090578657062638, "learning_rate": 1.9881613151983425e-05, "loss": 0.7014, "step": 1000 }, { "epoch": 0.08, "grad_norm": 1.3586187341727907, "learning_rate": 1.9881227349992448e-05, "loss": 0.6605, "step": 1001 }, { "epoch": 0.08, "grad_norm": 1.459980044032585, "learning_rate": 1.9880840924148606e-05, "loss": 0.7058, "step": 1002 }, { "epoch": 0.08, "grad_norm": 1.2951328571759797, "learning_rate": 1.9880453874476302e-05, "loss": 0.6291, "step": 1003 }, { "epoch": 0.08, "grad_norm": 1.3904379815348435, "learning_rate": 1.9880066200999963e-05, "loss": 0.6767, "step": 1004 }, { "epoch": 0.08, "grad_norm": 1.5176683789372114, "learning_rate": 1.9879677903744076e-05, "loss": 0.7051, "step": 1005 }, { "epoch": 0.08, "grad_norm": 1.4120921955672878, "learning_rate": 1.9879288982733146e-05, "loss": 0.6749, "step": 1006 }, { "epoch": 0.08, "grad_norm": 1.3639943790795412, "learning_rate": 1.9878899437991736e-05, "loss": 0.6627, "step": 1007 }, { "epoch": 0.08, "grad_norm": 1.333326945686934, "learning_rate": 1.987850926954443e-05, "loss": 0.6463, "step": 1008 }, { "epoch": 0.08, "grad_norm": 1.433813426469793, "learning_rate": 1.987811847741587e-05, "loss": 0.6474, "step": 1009 }, { "epoch": 0.08, "grad_norm": 1.4291352419646783, "learning_rate": 1.9877727061630732e-05, "loss": 0.6823, "step": 1010 }, { "epoch": 0.08, "grad_norm": 1.318844808994173, "learning_rate": 1.9877335022213716e-05, "loss": 0.6547, "step": 1011 }, { "epoch": 0.08, "grad_norm": 1.3555024772207713, "learning_rate": 1.987694235918958e-05, "loss": 0.6181, "step": 1012 }, { "epoch": 0.08, "grad_norm": 1.3196808246332612, "learning_rate": 1.9876549072583116e-05, "loss": 0.6266, "step": 1013 }, { "epoch": 0.08, "grad_norm": 1.2748886452460877, "learning_rate": 1.9876155162419155e-05, "loss": 0.6764, "step": 1014 }, { "epoch": 0.08, "grad_norm": 1.5086678564280764, "learning_rate": 1.987576062872256e-05, "loss": 0.6756, "step": 1015 }, { "epoch": 0.08, "grad_norm": 1.4816374581843959, "learning_rate": 1.9875365471518248e-05, "loss": 0.672, "step": 1016 }, { "epoch": 0.08, "grad_norm": 1.3192516782835608, "learning_rate": 1.987496969083116e-05, "loss": 0.6507, "step": 1017 }, { "epoch": 0.08, "grad_norm": 1.453259615661739, "learning_rate": 1.987457328668629e-05, "loss": 0.6735, "step": 1018 }, { "epoch": 0.08, "grad_norm": 1.4272838460945279, "learning_rate": 1.987417625910866e-05, "loss": 0.6806, "step": 1019 }, { "epoch": 0.08, "grad_norm": 1.4206384165273391, "learning_rate": 1.987377860812334e-05, "loss": 0.7037, "step": 1020 }, { "epoch": 0.08, "grad_norm": 1.6001073413843292, "learning_rate": 1.9873380333755437e-05, "loss": 0.684, "step": 1021 }, { "epoch": 0.08, "grad_norm": 1.3913517028294793, "learning_rate": 1.987298143603009e-05, "loss": 0.7063, "step": 1022 }, { "epoch": 0.08, "grad_norm": 1.2660852702051149, "learning_rate": 1.9872581914972486e-05, "loss": 0.583, "step": 1023 }, { "epoch": 0.08, "grad_norm": 1.3821509982728633, "learning_rate": 1.9872181770607852e-05, "loss": 0.6473, "step": 1024 }, { "epoch": 0.08, "grad_norm": 1.4060382683621004, "learning_rate": 1.987178100296145e-05, "loss": 0.6761, "step": 1025 }, { "epoch": 0.08, "grad_norm": 1.4819007731641538, "learning_rate": 1.987137961205858e-05, "loss": 0.7451, "step": 1026 }, { "epoch": 0.08, "grad_norm": 1.3173853009584309, "learning_rate": 1.9870977597924586e-05, "loss": 0.6535, "step": 1027 }, { "epoch": 0.08, "grad_norm": 1.3595085078482159, "learning_rate": 1.987057496058485e-05, "loss": 0.6398, "step": 1028 }, { "epoch": 0.08, "grad_norm": 1.306198615564732, "learning_rate": 1.987017170006479e-05, "loss": 0.615, "step": 1029 }, { "epoch": 0.08, "grad_norm": 1.4706134743488104, "learning_rate": 1.9869767816389867e-05, "loss": 0.702, "step": 1030 }, { "epoch": 0.08, "grad_norm": 1.3656120160007177, "learning_rate": 1.986936330958558e-05, "loss": 0.6992, "step": 1031 }, { "epoch": 0.08, "grad_norm": 1.498139499153313, "learning_rate": 1.986895817967747e-05, "loss": 0.6588, "step": 1032 }, { "epoch": 0.08, "grad_norm": 1.5089207662733624, "learning_rate": 1.9868552426691113e-05, "loss": 0.6933, "step": 1033 }, { "epoch": 0.08, "grad_norm": 1.4074484063240944, "learning_rate": 1.9868146050652123e-05, "loss": 0.6611, "step": 1034 }, { "epoch": 0.08, "grad_norm": 1.4470945352387474, "learning_rate": 1.9867739051586163e-05, "loss": 0.6358, "step": 1035 }, { "epoch": 0.08, "grad_norm": 1.29824660053539, "learning_rate": 1.9867331429518922e-05, "loss": 0.6703, "step": 1036 }, { "epoch": 0.08, "grad_norm": 1.3373726614826371, "learning_rate": 1.9866923184476143e-05, "loss": 0.652, "step": 1037 }, { "epoch": 0.08, "grad_norm": 1.6000545194397107, "learning_rate": 1.9866514316483597e-05, "loss": 0.7112, "step": 1038 }, { "epoch": 0.08, "grad_norm": 1.4534900985973662, "learning_rate": 1.9866104825567096e-05, "loss": 0.6566, "step": 1039 }, { "epoch": 0.08, "grad_norm": 1.4244360862043022, "learning_rate": 1.9865694711752498e-05, "loss": 0.6325, "step": 1040 }, { "epoch": 0.08, "grad_norm": 1.4065804411161942, "learning_rate": 1.986528397506569e-05, "loss": 0.6604, "step": 1041 }, { "epoch": 0.08, "grad_norm": 1.2632019489902635, "learning_rate": 1.9864872615532605e-05, "loss": 0.6187, "step": 1042 }, { "epoch": 0.08, "grad_norm": 1.401284585571879, "learning_rate": 1.9864460633179215e-05, "loss": 0.6613, "step": 1043 }, { "epoch": 0.08, "grad_norm": 1.6324542807096245, "learning_rate": 1.9864048028031535e-05, "loss": 0.6717, "step": 1044 }, { "epoch": 0.08, "grad_norm": 1.4310049855100742, "learning_rate": 1.9863634800115606e-05, "loss": 0.6598, "step": 1045 }, { "epoch": 0.08, "grad_norm": 1.420662667026436, "learning_rate": 1.9863220949457528e-05, "loss": 0.6392, "step": 1046 }, { "epoch": 0.08, "grad_norm": 1.423003283536424, "learning_rate": 1.9862806476083422e-05, "loss": 0.6136, "step": 1047 }, { "epoch": 0.08, "grad_norm": 1.3952984662861372, "learning_rate": 1.9862391380019458e-05, "loss": 0.7323, "step": 1048 }, { "epoch": 0.08, "grad_norm": 1.4141107624732887, "learning_rate": 1.9861975661291845e-05, "loss": 0.6745, "step": 1049 }, { "epoch": 0.08, "grad_norm": 1.3056460759501103, "learning_rate": 1.9861559319926825e-05, "loss": 0.6908, "step": 1050 }, { "epoch": 0.08, "grad_norm": 1.309622289207852, "learning_rate": 1.9861142355950685e-05, "loss": 0.6436, "step": 1051 }, { "epoch": 0.08, "grad_norm": 1.333428642720928, "learning_rate": 1.9860724769389754e-05, "loss": 0.6094, "step": 1052 }, { "epoch": 0.08, "grad_norm": 1.4631047620518394, "learning_rate": 1.986030656027039e-05, "loss": 0.6917, "step": 1053 }, { "epoch": 0.08, "grad_norm": 1.3177733524650974, "learning_rate": 1.9859887728619002e-05, "loss": 0.6302, "step": 1054 }, { "epoch": 0.08, "grad_norm": 1.44428839614962, "learning_rate": 1.9859468274462034e-05, "loss": 0.647, "step": 1055 }, { "epoch": 0.08, "grad_norm": 1.386861702441382, "learning_rate": 1.9859048197825963e-05, "loss": 0.6952, "step": 1056 }, { "epoch": 0.08, "grad_norm": 1.4660066185056326, "learning_rate": 1.9858627498737313e-05, "loss": 0.7264, "step": 1057 }, { "epoch": 0.08, "grad_norm": 1.3794503151745687, "learning_rate": 1.9858206177222646e-05, "loss": 0.5743, "step": 1058 }, { "epoch": 0.08, "grad_norm": 1.447135888589788, "learning_rate": 1.9857784233308562e-05, "loss": 0.6745, "step": 1059 }, { "epoch": 0.08, "grad_norm": 1.3665941967396986, "learning_rate": 1.9857361667021696e-05, "loss": 0.5913, "step": 1060 }, { "epoch": 0.08, "grad_norm": 1.3165333400696055, "learning_rate": 1.9856938478388735e-05, "loss": 0.6582, "step": 1061 }, { "epoch": 0.08, "grad_norm": 1.3262803675188708, "learning_rate": 1.9856514667436393e-05, "loss": 0.6925, "step": 1062 }, { "epoch": 0.08, "grad_norm": 1.5519308062200847, "learning_rate": 1.9856090234191424e-05, "loss": 0.6948, "step": 1063 }, { "epoch": 0.08, "grad_norm": 1.5297323218413756, "learning_rate": 1.985566517868063e-05, "loss": 0.6706, "step": 1064 }, { "epoch": 0.08, "grad_norm": 1.3614990915133625, "learning_rate": 1.9855239500930846e-05, "loss": 0.7215, "step": 1065 }, { "epoch": 0.08, "grad_norm": 1.426998340089791, "learning_rate": 1.9854813200968942e-05, "loss": 0.6469, "step": 1066 }, { "epoch": 0.08, "grad_norm": 1.4413188297878274, "learning_rate": 1.985438627882184e-05, "loss": 0.6302, "step": 1067 }, { "epoch": 0.08, "grad_norm": 1.2690913450583652, "learning_rate": 1.9853958734516487e-05, "loss": 0.5659, "step": 1068 }, { "epoch": 0.08, "grad_norm": 1.5116095293988812, "learning_rate": 1.9853530568079882e-05, "loss": 0.7077, "step": 1069 }, { "epoch": 0.08, "grad_norm": 1.3332191607150499, "learning_rate": 1.9853101779539052e-05, "loss": 0.6324, "step": 1070 }, { "epoch": 0.08, "grad_norm": 1.3016776813589728, "learning_rate": 1.9852672368921074e-05, "loss": 0.6219, "step": 1071 }, { "epoch": 0.08, "grad_norm": 1.4503435977428494, "learning_rate": 1.9852242336253057e-05, "loss": 0.6758, "step": 1072 }, { "epoch": 0.08, "grad_norm": 1.4257728889951766, "learning_rate": 1.985181168156215e-05, "loss": 0.6645, "step": 1073 }, { "epoch": 0.08, "grad_norm": 1.3685159822840158, "learning_rate": 1.985138040487554e-05, "loss": 0.6428, "step": 1074 }, { "epoch": 0.08, "grad_norm": 1.4001830730629623, "learning_rate": 1.985094850622046e-05, "loss": 0.6404, "step": 1075 }, { "epoch": 0.08, "grad_norm": 1.5185190472606231, "learning_rate": 1.985051598562418e-05, "loss": 0.6866, "step": 1076 }, { "epoch": 0.08, "grad_norm": 1.4118465110332843, "learning_rate": 1.9850082843114e-05, "loss": 0.621, "step": 1077 }, { "epoch": 0.08, "grad_norm": 1.4080094245248436, "learning_rate": 1.984964907871727e-05, "loss": 0.7097, "step": 1078 }, { "epoch": 0.08, "grad_norm": 1.454160669891905, "learning_rate": 1.9849214692461375e-05, "loss": 0.6783, "step": 1079 }, { "epoch": 0.08, "grad_norm": 1.3085695634571022, "learning_rate": 1.9848779684373746e-05, "loss": 0.671, "step": 1080 }, { "epoch": 0.08, "grad_norm": 1.4117438765349613, "learning_rate": 1.9848344054481838e-05, "loss": 0.6425, "step": 1081 }, { "epoch": 0.08, "grad_norm": 1.4725912469334606, "learning_rate": 1.9847907802813165e-05, "loss": 0.672, "step": 1082 }, { "epoch": 0.08, "grad_norm": 1.4635007678343672, "learning_rate": 1.9847470929395257e-05, "loss": 0.7695, "step": 1083 }, { "epoch": 0.08, "grad_norm": 1.4571029195468161, "learning_rate": 1.9847033434255708e-05, "loss": 0.7051, "step": 1084 }, { "epoch": 0.08, "grad_norm": 1.309554473303262, "learning_rate": 1.984659531742213e-05, "loss": 0.6515, "step": 1085 }, { "epoch": 0.08, "grad_norm": 1.2763513098548585, "learning_rate": 1.984615657892219e-05, "loss": 0.6556, "step": 1086 }, { "epoch": 0.08, "grad_norm": 1.3691668324268231, "learning_rate": 1.9845717218783588e-05, "loss": 0.6564, "step": 1087 }, { "epoch": 0.08, "grad_norm": 1.2869796571791097, "learning_rate": 1.9845277237034057e-05, "loss": 0.6352, "step": 1088 }, { "epoch": 0.08, "grad_norm": 1.3807977341594648, "learning_rate": 1.9844836633701383e-05, "loss": 0.6988, "step": 1089 }, { "epoch": 0.08, "grad_norm": 1.3444731341252991, "learning_rate": 1.9844395408813376e-05, "loss": 0.6743, "step": 1090 }, { "epoch": 0.08, "grad_norm": 1.307205420254287, "learning_rate": 1.9843953562397896e-05, "loss": 0.6507, "step": 1091 }, { "epoch": 0.08, "grad_norm": 1.384103764814393, "learning_rate": 1.984351109448284e-05, "loss": 0.7041, "step": 1092 }, { "epoch": 0.08, "grad_norm": 1.311451084075518, "learning_rate": 1.9843068005096146e-05, "loss": 0.6769, "step": 1093 }, { "epoch": 0.08, "grad_norm": 1.475281093774608, "learning_rate": 1.9842624294265783e-05, "loss": 0.6757, "step": 1094 }, { "epoch": 0.08, "grad_norm": 1.2847458204330213, "learning_rate": 1.9842179962019768e-05, "loss": 0.629, "step": 1095 }, { "epoch": 0.09, "grad_norm": 1.477745510687581, "learning_rate": 1.9841735008386154e-05, "loss": 0.7262, "step": 1096 }, { "epoch": 0.09, "grad_norm": 1.419998775535378, "learning_rate": 1.984128943339303e-05, "loss": 0.6711, "step": 1097 }, { "epoch": 0.09, "grad_norm": 1.3256467698183303, "learning_rate": 1.984084323706853e-05, "loss": 0.6787, "step": 1098 }, { "epoch": 0.09, "grad_norm": 1.4531825884820975, "learning_rate": 1.9840396419440825e-05, "loss": 0.6695, "step": 1099 }, { "epoch": 0.09, "grad_norm": 1.3048689481914824, "learning_rate": 1.9839948980538124e-05, "loss": 0.6897, "step": 1100 }, { "epoch": 0.09, "grad_norm": 1.4104249650644398, "learning_rate": 1.9839500920388676e-05, "loss": 0.6653, "step": 1101 }, { "epoch": 0.09, "grad_norm": 1.4111668988503194, "learning_rate": 1.983905223902077e-05, "loss": 0.7027, "step": 1102 }, { "epoch": 0.09, "grad_norm": 1.36188376428691, "learning_rate": 1.9838602936462732e-05, "loss": 0.6676, "step": 1103 }, { "epoch": 0.09, "grad_norm": 1.2403181873747628, "learning_rate": 1.983815301274293e-05, "loss": 0.6282, "step": 1104 }, { "epoch": 0.09, "grad_norm": 1.3271663122818982, "learning_rate": 1.9837702467889772e-05, "loss": 0.6733, "step": 1105 }, { "epoch": 0.09, "grad_norm": 1.4194330898476917, "learning_rate": 1.9837251301931704e-05, "loss": 0.6505, "step": 1106 }, { "epoch": 0.09, "grad_norm": 1.2082126107692968, "learning_rate": 1.9836799514897204e-05, "loss": 0.6707, "step": 1107 }, { "epoch": 0.09, "grad_norm": 1.3665789312408116, "learning_rate": 1.98363471068148e-05, "loss": 0.6748, "step": 1108 }, { "epoch": 0.09, "grad_norm": 1.3399140809398349, "learning_rate": 1.9835894077713055e-05, "loss": 0.6376, "step": 1109 }, { "epoch": 0.09, "grad_norm": 1.3799549469987997, "learning_rate": 1.9835440427620568e-05, "loss": 0.677, "step": 1110 }, { "epoch": 0.09, "grad_norm": 1.2417224517340832, "learning_rate": 1.9834986156565984e-05, "loss": 0.6271, "step": 1111 }, { "epoch": 0.09, "grad_norm": 1.2371500907728634, "learning_rate": 1.9834531264577984e-05, "loss": 0.6623, "step": 1112 }, { "epoch": 0.09, "grad_norm": 1.415355533780316, "learning_rate": 1.9834075751685283e-05, "loss": 0.6715, "step": 1113 }, { "epoch": 0.09, "grad_norm": 1.555496312141032, "learning_rate": 1.9833619617916645e-05, "loss": 0.6729, "step": 1114 }, { "epoch": 0.09, "grad_norm": 1.52222421700903, "learning_rate": 1.9833162863300863e-05, "loss": 0.6321, "step": 1115 }, { "epoch": 0.09, "grad_norm": 1.4989224378128634, "learning_rate": 1.983270548786678e-05, "loss": 0.7222, "step": 1116 }, { "epoch": 0.09, "grad_norm": 1.4995699901928754, "learning_rate": 1.9832247491643267e-05, "loss": 0.7012, "step": 1117 }, { "epoch": 0.09, "grad_norm": 1.333947780213303, "learning_rate": 1.983178887465924e-05, "loss": 0.6509, "step": 1118 }, { "epoch": 0.09, "grad_norm": 1.647961588495242, "learning_rate": 1.983132963694366e-05, "loss": 0.8132, "step": 1119 }, { "epoch": 0.09, "grad_norm": 1.5941082420883146, "learning_rate": 1.9830869778525517e-05, "loss": 0.6881, "step": 1120 }, { "epoch": 0.09, "grad_norm": 1.4562004932289037, "learning_rate": 1.9830409299433845e-05, "loss": 0.6922, "step": 1121 }, { "epoch": 0.09, "grad_norm": 1.3253266867481204, "learning_rate": 1.9829948199697713e-05, "loss": 0.6131, "step": 1122 }, { "epoch": 0.09, "grad_norm": 1.3194267868511274, "learning_rate": 1.9829486479346237e-05, "loss": 0.6601, "step": 1123 }, { "epoch": 0.09, "grad_norm": 1.3032366057614475, "learning_rate": 1.9829024138408567e-05, "loss": 0.6463, "step": 1124 }, { "epoch": 0.09, "grad_norm": 1.3526579105754335, "learning_rate": 1.982856117691389e-05, "loss": 0.6676, "step": 1125 }, { "epoch": 0.09, "grad_norm": 1.4044047647130808, "learning_rate": 1.9828097594891435e-05, "loss": 0.6607, "step": 1126 }, { "epoch": 0.09, "grad_norm": 1.4112826680270278, "learning_rate": 1.982763339237048e-05, "loss": 0.5893, "step": 1127 }, { "epoch": 0.09, "grad_norm": 1.4209828354982046, "learning_rate": 1.982716856938032e-05, "loss": 0.6859, "step": 1128 }, { "epoch": 0.09, "grad_norm": 1.3437338983325133, "learning_rate": 1.9826703125950302e-05, "loss": 0.6214, "step": 1129 }, { "epoch": 0.09, "grad_norm": 1.3298890459774835, "learning_rate": 1.982623706210982e-05, "loss": 0.6727, "step": 1130 }, { "epoch": 0.09, "grad_norm": 1.3315551443910691, "learning_rate": 1.98257703778883e-05, "loss": 0.6581, "step": 1131 }, { "epoch": 0.09, "grad_norm": 1.2907969972106836, "learning_rate": 1.9825303073315197e-05, "loss": 0.6066, "step": 1132 }, { "epoch": 0.09, "grad_norm": 1.2909279936396814, "learning_rate": 1.9824835148420023e-05, "loss": 0.5687, "step": 1133 }, { "epoch": 0.09, "grad_norm": 1.424877894339963, "learning_rate": 1.982436660323231e-05, "loss": 0.7123, "step": 1134 }, { "epoch": 0.09, "grad_norm": 1.4897768210633964, "learning_rate": 1.9823897437781654e-05, "loss": 0.7402, "step": 1135 }, { "epoch": 0.09, "grad_norm": 1.4471998109666606, "learning_rate": 1.9823427652097663e-05, "loss": 0.6692, "step": 1136 }, { "epoch": 0.09, "grad_norm": 1.4191902721067329, "learning_rate": 1.9822957246210003e-05, "loss": 0.6479, "step": 1137 }, { "epoch": 0.09, "grad_norm": 1.4409011741597924, "learning_rate": 1.9822486220148376e-05, "loss": 0.6955, "step": 1138 }, { "epoch": 0.09, "grad_norm": 1.3597189369504, "learning_rate": 1.9822014573942514e-05, "loss": 0.6567, "step": 1139 }, { "epoch": 0.09, "grad_norm": 1.4575559269691574, "learning_rate": 1.98215423076222e-05, "loss": 0.6694, "step": 1140 }, { "epoch": 0.09, "grad_norm": 1.3492958599027343, "learning_rate": 1.982106942121724e-05, "loss": 0.6675, "step": 1141 }, { "epoch": 0.09, "grad_norm": 1.3921489723089153, "learning_rate": 1.9820595914757505e-05, "loss": 0.649, "step": 1142 }, { "epoch": 0.09, "grad_norm": 1.3440147848735216, "learning_rate": 1.9820121788272873e-05, "loss": 0.6429, "step": 1143 }, { "epoch": 0.09, "grad_norm": 1.3360572002837734, "learning_rate": 1.9819647041793298e-05, "loss": 0.7376, "step": 1144 }, { "epoch": 0.09, "grad_norm": 1.2438325844955456, "learning_rate": 1.9819171675348736e-05, "loss": 0.6687, "step": 1145 }, { "epoch": 0.09, "grad_norm": 1.3711687377246966, "learning_rate": 1.9818695688969207e-05, "loss": 0.7174, "step": 1146 }, { "epoch": 0.09, "grad_norm": 1.3686320929061715, "learning_rate": 1.9818219082684762e-05, "loss": 0.6525, "step": 1147 }, { "epoch": 0.09, "grad_norm": 1.4988193633991862, "learning_rate": 1.9817741856525494e-05, "loss": 0.6038, "step": 1148 }, { "epoch": 0.09, "grad_norm": 1.430496903897148, "learning_rate": 1.9817264010521524e-05, "loss": 0.678, "step": 1149 }, { "epoch": 0.09, "grad_norm": 1.3119959998883932, "learning_rate": 1.981678554470303e-05, "loss": 0.6368, "step": 1150 }, { "epoch": 0.09, "grad_norm": 1.5116578713315048, "learning_rate": 1.9816306459100218e-05, "loss": 0.7125, "step": 1151 }, { "epoch": 0.09, "grad_norm": 1.3733899486933328, "learning_rate": 1.981582675374333e-05, "loss": 0.7114, "step": 1152 }, { "epoch": 0.09, "grad_norm": 1.4422723912540691, "learning_rate": 1.9815346428662655e-05, "loss": 0.6551, "step": 1153 }, { "epoch": 0.09, "grad_norm": 1.2559756022015318, "learning_rate": 1.9814865483888527e-05, "loss": 0.6528, "step": 1154 }, { "epoch": 0.09, "grad_norm": 1.374362104007575, "learning_rate": 1.9814383919451298e-05, "loss": 0.6326, "step": 1155 }, { "epoch": 0.09, "grad_norm": 1.340186606267172, "learning_rate": 1.9813901735381373e-05, "loss": 0.6691, "step": 1156 }, { "epoch": 0.09, "grad_norm": 1.4115398097246712, "learning_rate": 1.9813418931709203e-05, "loss": 0.6561, "step": 1157 }, { "epoch": 0.09, "grad_norm": 1.382567173807268, "learning_rate": 1.9812935508465263e-05, "loss": 0.6679, "step": 1158 }, { "epoch": 0.09, "grad_norm": 1.4407357824929055, "learning_rate": 1.981245146568008e-05, "loss": 0.694, "step": 1159 }, { "epoch": 0.09, "grad_norm": 1.187108276406847, "learning_rate": 1.9811966803384208e-05, "loss": 0.6749, "step": 1160 }, { "epoch": 0.09, "grad_norm": 1.3297259835076982, "learning_rate": 1.9811481521608245e-05, "loss": 0.6108, "step": 1161 }, { "epoch": 0.09, "grad_norm": 1.335911756122454, "learning_rate": 1.981099562038284e-05, "loss": 0.6605, "step": 1162 }, { "epoch": 0.09, "grad_norm": 1.3371802455832802, "learning_rate": 1.981050909973866e-05, "loss": 0.6176, "step": 1163 }, { "epoch": 0.09, "grad_norm": 1.3062167314007176, "learning_rate": 1.981002195970642e-05, "loss": 0.5866, "step": 1164 }, { "epoch": 0.09, "grad_norm": 1.32331668883329, "learning_rate": 1.980953420031689e-05, "loss": 0.6782, "step": 1165 }, { "epoch": 0.09, "grad_norm": 1.202468742630543, "learning_rate": 1.980904582160085e-05, "loss": 0.5988, "step": 1166 }, { "epoch": 0.09, "grad_norm": 1.2650604401726164, "learning_rate": 1.980855682358914e-05, "loss": 0.6952, "step": 1167 }, { "epoch": 0.09, "grad_norm": 1.3592897585747172, "learning_rate": 1.9808067206312632e-05, "loss": 0.6597, "step": 1168 }, { "epoch": 0.09, "grad_norm": 1.4574664491223743, "learning_rate": 1.980757696980224e-05, "loss": 0.5817, "step": 1169 }, { "epoch": 0.09, "grad_norm": 1.4828517124791651, "learning_rate": 1.980708611408891e-05, "loss": 0.739, "step": 1170 }, { "epoch": 0.09, "grad_norm": 1.2886534995682364, "learning_rate": 1.9806594639203637e-05, "loss": 0.6297, "step": 1171 }, { "epoch": 0.09, "grad_norm": 1.5097725733588492, "learning_rate": 1.980610254517745e-05, "loss": 0.7327, "step": 1172 }, { "epoch": 0.09, "grad_norm": 1.3970631500448782, "learning_rate": 1.980560983204142e-05, "loss": 0.7241, "step": 1173 }, { "epoch": 0.09, "grad_norm": 1.215500730620227, "learning_rate": 1.9805116499826646e-05, "loss": 0.6264, "step": 1174 }, { "epoch": 0.09, "grad_norm": 1.360247978434702, "learning_rate": 1.980462254856428e-05, "loss": 0.6927, "step": 1175 }, { "epoch": 0.09, "grad_norm": 1.2213126407984554, "learning_rate": 1.9804127978285506e-05, "loss": 0.6612, "step": 1176 }, { "epoch": 0.09, "grad_norm": 1.4275996905255162, "learning_rate": 1.9803632789021553e-05, "loss": 0.7202, "step": 1177 }, { "epoch": 0.09, "grad_norm": 1.2579692482275089, "learning_rate": 1.9803136980803687e-05, "loss": 0.6404, "step": 1178 }, { "epoch": 0.09, "grad_norm": 1.2835999779483394, "learning_rate": 1.9802640553663196e-05, "loss": 0.6448, "step": 1179 }, { "epoch": 0.09, "grad_norm": 1.3310670915055065, "learning_rate": 1.980214350763144e-05, "loss": 0.696, "step": 1180 }, { "epoch": 0.09, "grad_norm": 1.346129904785629, "learning_rate": 1.9801645842739785e-05, "loss": 0.7355, "step": 1181 }, { "epoch": 0.09, "grad_norm": 1.406778321098541, "learning_rate": 1.980114755901966e-05, "loss": 0.7006, "step": 1182 }, { "epoch": 0.09, "grad_norm": 1.3086238117465867, "learning_rate": 1.9800648656502524e-05, "loss": 0.6399, "step": 1183 }, { "epoch": 0.09, "grad_norm": 1.5989505962304749, "learning_rate": 1.9800149135219873e-05, "loss": 0.6373, "step": 1184 }, { "epoch": 0.09, "grad_norm": 1.4296688224531342, "learning_rate": 1.9799648995203246e-05, "loss": 0.6997, "step": 1185 }, { "epoch": 0.09, "grad_norm": 1.382837887978123, "learning_rate": 1.979914823648422e-05, "loss": 0.663, "step": 1186 }, { "epoch": 0.09, "grad_norm": 1.2721115417218372, "learning_rate": 1.9798646859094405e-05, "loss": 0.6164, "step": 1187 }, { "epoch": 0.09, "grad_norm": 1.3852775283966918, "learning_rate": 1.979814486306546e-05, "loss": 0.6401, "step": 1188 }, { "epoch": 0.09, "grad_norm": 1.3031675885237042, "learning_rate": 1.9797642248429078e-05, "loss": 0.6483, "step": 1189 }, { "epoch": 0.09, "grad_norm": 1.3489120161237507, "learning_rate": 1.979713901521699e-05, "loss": 0.673, "step": 1190 }, { "epoch": 0.09, "grad_norm": 1.2828958221770725, "learning_rate": 1.9796635163460976e-05, "loss": 0.6076, "step": 1191 }, { "epoch": 0.09, "grad_norm": 1.3223242697395923, "learning_rate": 1.9796130693192835e-05, "loss": 0.6755, "step": 1192 }, { "epoch": 0.09, "grad_norm": 1.4134175637100281, "learning_rate": 1.979562560444442e-05, "loss": 0.6333, "step": 1193 }, { "epoch": 0.09, "grad_norm": 1.336011427233703, "learning_rate": 1.9795119897247627e-05, "loss": 0.6573, "step": 1194 }, { "epoch": 0.09, "grad_norm": 1.4294319966790596, "learning_rate": 1.9794613571634378e-05, "loss": 0.6447, "step": 1195 }, { "epoch": 0.09, "grad_norm": 1.3986745026150444, "learning_rate": 1.979410662763664e-05, "loss": 0.7732, "step": 1196 }, { "epoch": 0.09, "grad_norm": 1.4299542954617028, "learning_rate": 1.9793599065286417e-05, "loss": 0.6712, "step": 1197 }, { "epoch": 0.09, "grad_norm": 1.3742971357811897, "learning_rate": 1.9793090884615756e-05, "loss": 0.665, "step": 1198 }, { "epoch": 0.09, "grad_norm": 1.287680376250613, "learning_rate": 1.9792582085656744e-05, "loss": 0.6295, "step": 1199 }, { "epoch": 0.09, "grad_norm": 1.2653865825119586, "learning_rate": 1.97920726684415e-05, "loss": 0.6411, "step": 1200 }, { "epoch": 0.09, "grad_norm": 1.364137528181507, "learning_rate": 1.9791562633002194e-05, "loss": 0.6543, "step": 1201 }, { "epoch": 0.09, "grad_norm": 1.4053051635469254, "learning_rate": 1.9791051979371017e-05, "loss": 0.7137, "step": 1202 }, { "epoch": 0.09, "grad_norm": 1.4748010371873694, "learning_rate": 1.9790540707580213e-05, "loss": 0.6858, "step": 1203 }, { "epoch": 0.09, "grad_norm": 1.3118999108452385, "learning_rate": 1.9790028817662065e-05, "loss": 0.6154, "step": 1204 }, { "epoch": 0.09, "grad_norm": 1.3993442089378476, "learning_rate": 1.978951630964888e-05, "loss": 0.6687, "step": 1205 }, { "epoch": 0.09, "grad_norm": 1.4223187770618666, "learning_rate": 1.978900318357303e-05, "loss": 0.6454, "step": 1206 }, { "epoch": 0.09, "grad_norm": 1.3651858690892422, "learning_rate": 1.9788489439466902e-05, "loss": 0.6481, "step": 1207 }, { "epoch": 0.09, "grad_norm": 1.400000226497632, "learning_rate": 1.9787975077362937e-05, "loss": 0.6561, "step": 1208 }, { "epoch": 0.09, "grad_norm": 1.3360728145132874, "learning_rate": 1.9787460097293603e-05, "loss": 0.6034, "step": 1209 }, { "epoch": 0.09, "grad_norm": 1.323599926777985, "learning_rate": 1.9786944499291417e-05, "loss": 0.6412, "step": 1210 }, { "epoch": 0.09, "grad_norm": 1.4149072979326822, "learning_rate": 1.9786428283388927e-05, "loss": 0.688, "step": 1211 }, { "epoch": 0.09, "grad_norm": 1.414805685928908, "learning_rate": 1.9785911449618732e-05, "loss": 0.7068, "step": 1212 }, { "epoch": 0.09, "grad_norm": 1.4754153846721507, "learning_rate": 1.978539399801346e-05, "loss": 0.6837, "step": 1213 }, { "epoch": 0.09, "grad_norm": 1.3753551111292275, "learning_rate": 1.9784875928605777e-05, "loss": 0.6223, "step": 1214 }, { "epoch": 0.09, "grad_norm": 1.4251710186780597, "learning_rate": 1.978435724142839e-05, "loss": 0.7228, "step": 1215 }, { "epoch": 0.09, "grad_norm": 1.3832231763082845, "learning_rate": 1.9783837936514053e-05, "loss": 0.6726, "step": 1216 }, { "epoch": 0.09, "grad_norm": 1.4500479986204673, "learning_rate": 1.9783318013895552e-05, "loss": 0.6548, "step": 1217 }, { "epoch": 0.09, "grad_norm": 1.3005747588250538, "learning_rate": 1.9782797473605708e-05, "loss": 0.6567, "step": 1218 }, { "epoch": 0.09, "grad_norm": 1.5355761393007654, "learning_rate": 1.978227631567738e-05, "loss": 0.7227, "step": 1219 }, { "epoch": 0.09, "grad_norm": 1.4102631842732851, "learning_rate": 1.9781754540143486e-05, "loss": 0.742, "step": 1220 }, { "epoch": 0.09, "grad_norm": 1.3439314520106866, "learning_rate": 1.9781232147036958e-05, "loss": 0.7207, "step": 1221 }, { "epoch": 0.09, "grad_norm": 1.3944073790343496, "learning_rate": 1.978070913639078e-05, "loss": 0.676, "step": 1222 }, { "epoch": 0.09, "grad_norm": 1.3960356731748198, "learning_rate": 1.9780185508237976e-05, "loss": 0.706, "step": 1223 }, { "epoch": 0.09, "grad_norm": 1.399362907885586, "learning_rate": 1.9779661262611598e-05, "loss": 0.6458, "step": 1224 }, { "epoch": 0.1, "grad_norm": 1.4304060667545093, "learning_rate": 1.9779136399544747e-05, "loss": 0.7019, "step": 1225 }, { "epoch": 0.1, "grad_norm": 1.4063710054682113, "learning_rate": 1.9778610919070563e-05, "loss": 0.6423, "step": 1226 }, { "epoch": 0.1, "grad_norm": 1.3243160155817348, "learning_rate": 1.977808482122222e-05, "loss": 0.7011, "step": 1227 }, { "epoch": 0.1, "grad_norm": 1.4000265800132696, "learning_rate": 1.9777558106032936e-05, "loss": 0.6679, "step": 1228 }, { "epoch": 0.1, "grad_norm": 1.3050823413697417, "learning_rate": 1.9777030773535962e-05, "loss": 0.619, "step": 1229 }, { "epoch": 0.1, "grad_norm": 1.2243187664490291, "learning_rate": 1.977650282376459e-05, "loss": 0.5934, "step": 1230 }, { "epoch": 0.1, "grad_norm": 1.4588698989508349, "learning_rate": 1.977597425675216e-05, "loss": 0.6878, "step": 1231 }, { "epoch": 0.1, "grad_norm": 1.415595556630849, "learning_rate": 1.9775445072532035e-05, "loss": 0.6864, "step": 1232 }, { "epoch": 0.1, "grad_norm": 1.4319039762205037, "learning_rate": 1.9774915271137625e-05, "loss": 0.7228, "step": 1233 }, { "epoch": 0.1, "grad_norm": 1.2616836968149805, "learning_rate": 1.9774384852602387e-05, "loss": 0.6162, "step": 1234 }, { "epoch": 0.1, "grad_norm": 1.4017493813399216, "learning_rate": 1.97738538169598e-05, "loss": 0.7778, "step": 1235 }, { "epoch": 0.1, "grad_norm": 1.4409173896294583, "learning_rate": 1.9773322164243394e-05, "loss": 0.6846, "step": 1236 }, { "epoch": 0.1, "grad_norm": 1.3741735228642569, "learning_rate": 1.9772789894486743e-05, "loss": 0.6303, "step": 1237 }, { "epoch": 0.1, "grad_norm": 1.343105560843702, "learning_rate": 1.9772257007723442e-05, "loss": 0.6178, "step": 1238 }, { "epoch": 0.1, "grad_norm": 1.2393720377291753, "learning_rate": 1.9771723503987133e-05, "loss": 0.5986, "step": 1239 }, { "epoch": 0.1, "grad_norm": 1.3828590083517824, "learning_rate": 1.9771189383311507e-05, "loss": 0.7265, "step": 1240 }, { "epoch": 0.1, "grad_norm": 1.3367191727156558, "learning_rate": 1.9770654645730286e-05, "loss": 0.6923, "step": 1241 }, { "epoch": 0.1, "grad_norm": 1.3043303314880903, "learning_rate": 1.9770119291277223e-05, "loss": 0.6321, "step": 1242 }, { "epoch": 0.1, "grad_norm": 1.3399698626274916, "learning_rate": 1.9769583319986125e-05, "loss": 0.6622, "step": 1243 }, { "epoch": 0.1, "grad_norm": 1.2371714338823765, "learning_rate": 1.9769046731890825e-05, "loss": 0.601, "step": 1244 }, { "epoch": 0.1, "grad_norm": 1.417887199305655, "learning_rate": 1.9768509527025204e-05, "loss": 0.6164, "step": 1245 }, { "epoch": 0.1, "grad_norm": 1.3190168083716396, "learning_rate": 1.9767971705423176e-05, "loss": 0.6261, "step": 1246 }, { "epoch": 0.1, "grad_norm": 1.3498563637085053, "learning_rate": 1.97674332671187e-05, "loss": 0.6424, "step": 1247 }, { "epoch": 0.1, "grad_norm": 1.430250296795552, "learning_rate": 1.9766894212145767e-05, "loss": 0.6578, "step": 1248 }, { "epoch": 0.1, "grad_norm": 1.3691569502832655, "learning_rate": 1.9766354540538414e-05, "loss": 0.6788, "step": 1249 }, { "epoch": 0.1, "grad_norm": 1.339045161487761, "learning_rate": 1.976581425233071e-05, "loss": 0.6605, "step": 1250 }, { "epoch": 0.1, "grad_norm": 1.2840355621802468, "learning_rate": 1.976527334755677e-05, "loss": 0.6267, "step": 1251 }, { "epoch": 0.1, "grad_norm": 1.2673447310319617, "learning_rate": 1.976473182625074e-05, "loss": 0.6337, "step": 1252 }, { "epoch": 0.1, "grad_norm": 1.346546899612652, "learning_rate": 1.9764189688446803e-05, "loss": 0.6606, "step": 1253 }, { "epoch": 0.1, "grad_norm": 1.4501943260728969, "learning_rate": 1.9763646934179204e-05, "loss": 0.7229, "step": 1254 }, { "epoch": 0.1, "grad_norm": 1.2845667730685473, "learning_rate": 1.97631035634822e-05, "loss": 0.6559, "step": 1255 }, { "epoch": 0.1, "grad_norm": 1.397157648152016, "learning_rate": 1.976255957639009e-05, "loss": 0.6663, "step": 1256 }, { "epoch": 0.1, "grad_norm": 1.3866889628045302, "learning_rate": 1.976201497293723e-05, "loss": 0.64, "step": 1257 }, { "epoch": 0.1, "grad_norm": 1.2593359872535244, "learning_rate": 1.9761469753158e-05, "loss": 0.6902, "step": 1258 }, { "epoch": 0.1, "grad_norm": 1.3070455930006084, "learning_rate": 1.976092391708682e-05, "loss": 0.7115, "step": 1259 }, { "epoch": 0.1, "grad_norm": 1.2864158046760956, "learning_rate": 1.9760377464758154e-05, "loss": 0.6545, "step": 1260 }, { "epoch": 0.1, "grad_norm": 1.4093333455969654, "learning_rate": 1.9759830396206504e-05, "loss": 0.7259, "step": 1261 }, { "epoch": 0.1, "grad_norm": 1.2496139884021689, "learning_rate": 1.9759282711466405e-05, "loss": 0.6229, "step": 1262 }, { "epoch": 0.1, "grad_norm": 1.4022444580969724, "learning_rate": 1.9758734410572435e-05, "loss": 0.7355, "step": 1263 }, { "epoch": 0.1, "grad_norm": 1.3869299929195074, "learning_rate": 1.9758185493559216e-05, "loss": 0.6824, "step": 1264 }, { "epoch": 0.1, "grad_norm": 1.442074669818633, "learning_rate": 1.97576359604614e-05, "loss": 0.6732, "step": 1265 }, { "epoch": 0.1, "grad_norm": 1.4624501113859736, "learning_rate": 1.9757085811313686e-05, "loss": 0.6482, "step": 1266 }, { "epoch": 0.1, "grad_norm": 1.2907830517955654, "learning_rate": 1.97565350461508e-05, "loss": 0.6272, "step": 1267 }, { "epoch": 0.1, "grad_norm": 1.3325046358153128, "learning_rate": 1.9755983665007524e-05, "loss": 0.6327, "step": 1268 }, { "epoch": 0.1, "grad_norm": 1.4107812568951001, "learning_rate": 1.975543166791866e-05, "loss": 0.6973, "step": 1269 }, { "epoch": 0.1, "grad_norm": 1.3711360479280659, "learning_rate": 1.9754879054919067e-05, "loss": 0.6563, "step": 1270 }, { "epoch": 0.1, "grad_norm": 1.3825662253523505, "learning_rate": 1.975432582604363e-05, "loss": 0.6227, "step": 1271 }, { "epoch": 0.1, "grad_norm": 1.3599147821121875, "learning_rate": 1.9753771981327278e-05, "loss": 0.655, "step": 1272 }, { "epoch": 0.1, "grad_norm": 1.3522012336867513, "learning_rate": 1.9753217520804976e-05, "loss": 0.6367, "step": 1273 }, { "epoch": 0.1, "grad_norm": 1.3559167215127172, "learning_rate": 1.9752662444511738e-05, "loss": 0.6678, "step": 1274 }, { "epoch": 0.1, "grad_norm": 1.3938523464515515, "learning_rate": 1.9752106752482596e-05, "loss": 0.6849, "step": 1275 }, { "epoch": 0.1, "grad_norm": 1.2887214440660997, "learning_rate": 1.9751550444752642e-05, "loss": 0.6558, "step": 1276 }, { "epoch": 0.1, "grad_norm": 1.506260047974055, "learning_rate": 1.9750993521356997e-05, "loss": 0.6281, "step": 1277 }, { "epoch": 0.1, "grad_norm": 1.2844096510916296, "learning_rate": 1.9750435982330823e-05, "loss": 0.6187, "step": 1278 }, { "epoch": 0.1, "grad_norm": 1.3041757276583863, "learning_rate": 1.9749877827709323e-05, "loss": 0.6785, "step": 1279 }, { "epoch": 0.1, "grad_norm": 1.3923368311810609, "learning_rate": 1.974931905752773e-05, "loss": 0.6603, "step": 1280 }, { "epoch": 0.1, "grad_norm": 1.4022437354845803, "learning_rate": 1.9748759671821323e-05, "loss": 0.6251, "step": 1281 }, { "epoch": 0.1, "grad_norm": 1.2979961742380008, "learning_rate": 1.9748199670625423e-05, "loss": 0.6155, "step": 1282 }, { "epoch": 0.1, "grad_norm": 1.4421464213661455, "learning_rate": 1.9747639053975386e-05, "loss": 0.69, "step": 1283 }, { "epoch": 0.1, "grad_norm": 1.2610511548937215, "learning_rate": 1.9747077821906602e-05, "loss": 0.5852, "step": 1284 }, { "epoch": 0.1, "grad_norm": 1.2882226693533523, "learning_rate": 1.9746515974454508e-05, "loss": 0.6923, "step": 1285 }, { "epoch": 0.1, "grad_norm": 1.409332753497726, "learning_rate": 1.9745953511654573e-05, "loss": 0.7094, "step": 1286 }, { "epoch": 0.1, "grad_norm": 1.2603695390366016, "learning_rate": 1.974539043354231e-05, "loss": 0.5916, "step": 1287 }, { "epoch": 0.1, "grad_norm": 1.4142534327297618, "learning_rate": 1.9744826740153272e-05, "loss": 0.7057, "step": 1288 }, { "epoch": 0.1, "grad_norm": 1.284603707400155, "learning_rate": 1.9744262431523045e-05, "loss": 0.6134, "step": 1289 }, { "epoch": 0.1, "grad_norm": 1.2263677162962707, "learning_rate": 1.9743697507687253e-05, "loss": 0.6522, "step": 1290 }, { "epoch": 0.1, "grad_norm": 1.3702936256004872, "learning_rate": 1.974313196868157e-05, "loss": 0.6236, "step": 1291 }, { "epoch": 0.1, "grad_norm": 1.3339402734663026, "learning_rate": 1.97425658145417e-05, "loss": 0.6803, "step": 1292 }, { "epoch": 0.1, "grad_norm": 1.3788075614388753, "learning_rate": 1.974199904530338e-05, "loss": 0.6654, "step": 1293 }, { "epoch": 0.1, "grad_norm": 1.3536417408326378, "learning_rate": 1.9741431661002403e-05, "loss": 0.6845, "step": 1294 }, { "epoch": 0.1, "grad_norm": 1.3127354456120703, "learning_rate": 1.974086366167458e-05, "loss": 0.6953, "step": 1295 }, { "epoch": 0.1, "grad_norm": 1.4802084016990065, "learning_rate": 1.9740295047355784e-05, "loss": 0.6945, "step": 1296 }, { "epoch": 0.1, "grad_norm": 1.2733851697033904, "learning_rate": 1.9739725818081905e-05, "loss": 0.6042, "step": 1297 }, { "epoch": 0.1, "grad_norm": 1.3102079541644611, "learning_rate": 1.9739155973888887e-05, "loss": 0.6492, "step": 1298 }, { "epoch": 0.1, "grad_norm": 1.3442931297411025, "learning_rate": 1.97385855148127e-05, "loss": 0.6496, "step": 1299 }, { "epoch": 0.1, "grad_norm": 1.322587710754856, "learning_rate": 1.9738014440889368e-05, "loss": 0.6683, "step": 1300 }, { "epoch": 0.1, "grad_norm": 1.4261772546429026, "learning_rate": 1.9737442752154944e-05, "loss": 0.6877, "step": 1301 }, { "epoch": 0.1, "grad_norm": 1.4571349897092112, "learning_rate": 1.973687044864552e-05, "loss": 0.6411, "step": 1302 }, { "epoch": 0.1, "grad_norm": 1.2512433544062485, "learning_rate": 1.9736297530397227e-05, "loss": 0.6064, "step": 1303 }, { "epoch": 0.1, "grad_norm": 1.3203760165674958, "learning_rate": 1.973572399744624e-05, "loss": 0.6353, "step": 1304 }, { "epoch": 0.1, "grad_norm": 1.3100388021036906, "learning_rate": 1.9735149849828767e-05, "loss": 0.6889, "step": 1305 }, { "epoch": 0.1, "grad_norm": 1.5577264729490756, "learning_rate": 1.973457508758106e-05, "loss": 0.7482, "step": 1306 }, { "epoch": 0.1, "grad_norm": 1.399440512237503, "learning_rate": 1.9733999710739398e-05, "loss": 0.6147, "step": 1307 }, { "epoch": 0.1, "grad_norm": 1.429925283867604, "learning_rate": 1.9733423719340118e-05, "loss": 0.7181, "step": 1308 }, { "epoch": 0.1, "grad_norm": 1.2958207038844536, "learning_rate": 1.973284711341958e-05, "loss": 0.6567, "step": 1309 }, { "epoch": 0.1, "grad_norm": 1.5082136025863881, "learning_rate": 1.9732269893014188e-05, "loss": 0.7189, "step": 1310 }, { "epoch": 0.1, "grad_norm": 1.2778157053492332, "learning_rate": 1.9731692058160387e-05, "loss": 0.6148, "step": 1311 }, { "epoch": 0.1, "grad_norm": 1.3436301865501994, "learning_rate": 1.9731113608894656e-05, "loss": 0.6477, "step": 1312 }, { "epoch": 0.1, "grad_norm": 1.4867034317967422, "learning_rate": 1.9730534545253516e-05, "loss": 0.6589, "step": 1313 }, { "epoch": 0.1, "grad_norm": 1.5054852012127375, "learning_rate": 1.9729954867273528e-05, "loss": 0.7017, "step": 1314 }, { "epoch": 0.1, "grad_norm": 1.334658763650278, "learning_rate": 1.9729374574991288e-05, "loss": 0.6586, "step": 1315 }, { "epoch": 0.1, "grad_norm": 1.4171479006179815, "learning_rate": 1.9728793668443437e-05, "loss": 0.6929, "step": 1316 }, { "epoch": 0.1, "grad_norm": 1.3340001028538426, "learning_rate": 1.9728212147666647e-05, "loss": 0.6363, "step": 1317 }, { "epoch": 0.1, "grad_norm": 1.4973064716598479, "learning_rate": 1.9727630012697633e-05, "loss": 0.6183, "step": 1318 }, { "epoch": 0.1, "grad_norm": 1.3671159562056532, "learning_rate": 1.9727047263573148e-05, "loss": 0.6643, "step": 1319 }, { "epoch": 0.1, "grad_norm": 1.3051263219707756, "learning_rate": 1.9726463900329985e-05, "loss": 0.6885, "step": 1320 }, { "epoch": 0.1, "grad_norm": 1.3832130498565052, "learning_rate": 1.972587992300497e-05, "loss": 0.6614, "step": 1321 }, { "epoch": 0.1, "grad_norm": 1.427180191799803, "learning_rate": 1.972529533163498e-05, "loss": 0.6415, "step": 1322 }, { "epoch": 0.1, "grad_norm": 1.402858374056527, "learning_rate": 1.972471012625692e-05, "loss": 0.6604, "step": 1323 }, { "epoch": 0.1, "grad_norm": 1.3144097285596168, "learning_rate": 1.9724124306907737e-05, "loss": 0.7101, "step": 1324 }, { "epoch": 0.1, "grad_norm": 1.4888946790883095, "learning_rate": 1.9723537873624418e-05, "loss": 0.7099, "step": 1325 }, { "epoch": 0.1, "grad_norm": 1.3923004858063108, "learning_rate": 1.9722950826443983e-05, "loss": 0.635, "step": 1326 }, { "epoch": 0.1, "grad_norm": 1.3574628864926497, "learning_rate": 1.97223631654035e-05, "loss": 0.6596, "step": 1327 }, { "epoch": 0.1, "grad_norm": 1.3104516348648454, "learning_rate": 1.972177489054007e-05, "loss": 0.6785, "step": 1328 }, { "epoch": 0.1, "grad_norm": 1.494565576696541, "learning_rate": 1.972118600189083e-05, "loss": 0.7091, "step": 1329 }, { "epoch": 0.1, "grad_norm": 1.2904097943384203, "learning_rate": 1.9720596499492965e-05, "loss": 0.6412, "step": 1330 }, { "epoch": 0.1, "grad_norm": 1.2696502160304957, "learning_rate": 1.9720006383383694e-05, "loss": 0.6497, "step": 1331 }, { "epoch": 0.1, "grad_norm": 1.3764578286946552, "learning_rate": 1.971941565360027e-05, "loss": 0.6016, "step": 1332 }, { "epoch": 0.1, "grad_norm": 1.3056387716966713, "learning_rate": 1.971882431017999e-05, "loss": 0.6291, "step": 1333 }, { "epoch": 0.1, "grad_norm": 1.3281560108827877, "learning_rate": 1.9718232353160187e-05, "loss": 0.6634, "step": 1334 }, { "epoch": 0.1, "grad_norm": 1.3282381346144063, "learning_rate": 1.971763978257824e-05, "loss": 0.5961, "step": 1335 }, { "epoch": 0.1, "grad_norm": 1.3181828133361073, "learning_rate": 1.9717046598471553e-05, "loss": 0.6113, "step": 1336 }, { "epoch": 0.1, "grad_norm": 1.2592238096444863, "learning_rate": 1.9716452800877582e-05, "loss": 0.6382, "step": 1337 }, { "epoch": 0.1, "grad_norm": 1.3874714272796158, "learning_rate": 1.9715858389833816e-05, "loss": 0.6665, "step": 1338 }, { "epoch": 0.1, "grad_norm": 1.4367131277184888, "learning_rate": 1.9715263365377782e-05, "loss": 0.6465, "step": 1339 }, { "epoch": 0.1, "grad_norm": 1.3441409052543936, "learning_rate": 1.9714667727547047e-05, "loss": 0.6306, "step": 1340 }, { "epoch": 0.1, "grad_norm": 1.2598321468325648, "learning_rate": 1.9714071476379216e-05, "loss": 0.6457, "step": 1341 }, { "epoch": 0.1, "grad_norm": 1.2656911667847015, "learning_rate": 1.971347461191194e-05, "loss": 0.6455, "step": 1342 }, { "epoch": 0.1, "grad_norm": 1.393301114039878, "learning_rate": 1.9712877134182893e-05, "loss": 0.6345, "step": 1343 }, { "epoch": 0.1, "grad_norm": 1.2886923518639681, "learning_rate": 1.97122790432298e-05, "loss": 0.6618, "step": 1344 }, { "epoch": 0.1, "grad_norm": 1.3950811418568103, "learning_rate": 1.9711680339090425e-05, "loss": 0.7288, "step": 1345 }, { "epoch": 0.1, "grad_norm": 1.4447502073441947, "learning_rate": 1.9711081021802563e-05, "loss": 0.7497, "step": 1346 }, { "epoch": 0.1, "grad_norm": 1.2850127444395998, "learning_rate": 1.9710481091404056e-05, "loss": 0.6443, "step": 1347 }, { "epoch": 0.1, "grad_norm": 1.4118943004462583, "learning_rate": 1.9709880547932774e-05, "loss": 0.6737, "step": 1348 }, { "epoch": 0.1, "grad_norm": 1.3070703549643141, "learning_rate": 1.9709279391426638e-05, "loss": 0.649, "step": 1349 }, { "epoch": 0.1, "grad_norm": 1.2349974113313673, "learning_rate": 1.97086776219236e-05, "loss": 0.6706, "step": 1350 }, { "epoch": 0.1, "grad_norm": 1.3275052139122363, "learning_rate": 1.9708075239461656e-05, "loss": 0.5965, "step": 1351 }, { "epoch": 0.1, "grad_norm": 1.1952545550893299, "learning_rate": 1.9707472244078833e-05, "loss": 0.6654, "step": 1352 }, { "epoch": 0.1, "grad_norm": 1.2581092058034615, "learning_rate": 1.9706868635813205e-05, "loss": 0.6484, "step": 1353 }, { "epoch": 0.11, "grad_norm": 1.3635989368244346, "learning_rate": 1.970626441470288e-05, "loss": 0.7099, "step": 1354 }, { "epoch": 0.11, "grad_norm": 1.352673773822253, "learning_rate": 1.9705659580785997e-05, "loss": 0.7159, "step": 1355 }, { "epoch": 0.11, "grad_norm": 1.4107267963827326, "learning_rate": 1.9705054134100758e-05, "loss": 0.65, "step": 1356 }, { "epoch": 0.11, "grad_norm": 1.417609891853523, "learning_rate": 1.9704448074685377e-05, "loss": 0.6832, "step": 1357 }, { "epoch": 0.11, "grad_norm": 1.2732380435815898, "learning_rate": 1.9703841402578122e-05, "loss": 0.6452, "step": 1358 }, { "epoch": 0.11, "grad_norm": 1.281590346745653, "learning_rate": 1.9703234117817293e-05, "loss": 0.628, "step": 1359 }, { "epoch": 0.11, "grad_norm": 1.3332260903303768, "learning_rate": 1.9702626220441234e-05, "loss": 0.5842, "step": 1360 }, { "epoch": 0.11, "grad_norm": 1.4164228603527387, "learning_rate": 1.970201771048832e-05, "loss": 0.6811, "step": 1361 }, { "epoch": 0.11, "grad_norm": 1.337874987434274, "learning_rate": 1.9701408587996976e-05, "loss": 0.6857, "step": 1362 }, { "epoch": 0.11, "grad_norm": 1.3376260983215107, "learning_rate": 1.9700798853005652e-05, "loss": 0.6693, "step": 1363 }, { "epoch": 0.11, "grad_norm": 1.46019224037812, "learning_rate": 1.9700188505552847e-05, "loss": 0.6573, "step": 1364 }, { "epoch": 0.11, "grad_norm": 1.3934939076482433, "learning_rate": 1.9699577545677097e-05, "loss": 0.6704, "step": 1365 }, { "epoch": 0.11, "grad_norm": 1.3225548116650585, "learning_rate": 1.969896597341697e-05, "loss": 0.6682, "step": 1366 }, { "epoch": 0.11, "grad_norm": 1.3609855144102638, "learning_rate": 1.9698353788811083e-05, "loss": 0.666, "step": 1367 }, { "epoch": 0.11, "grad_norm": 1.1670668063388954, "learning_rate": 1.9697740991898087e-05, "loss": 0.6495, "step": 1368 }, { "epoch": 0.11, "grad_norm": 1.3525111667221432, "learning_rate": 1.9697127582716664e-05, "loss": 0.6399, "step": 1369 }, { "epoch": 0.11, "grad_norm": 1.3967121925460386, "learning_rate": 1.9696513561305548e-05, "loss": 0.7391, "step": 1370 }, { "epoch": 0.11, "grad_norm": 1.2661433159340119, "learning_rate": 1.96958989277035e-05, "loss": 0.6653, "step": 1371 }, { "epoch": 0.11, "grad_norm": 1.4694807791268958, "learning_rate": 1.9695283681949337e-05, "loss": 0.6338, "step": 1372 }, { "epoch": 0.11, "grad_norm": 1.3232123677740102, "learning_rate": 1.9694667824081888e-05, "loss": 0.6726, "step": 1373 }, { "epoch": 0.11, "grad_norm": 1.4554458460743576, "learning_rate": 1.969405135414004e-05, "loss": 0.6913, "step": 1374 }, { "epoch": 0.11, "grad_norm": 1.3616768213887198, "learning_rate": 1.969343427216272e-05, "loss": 0.6915, "step": 1375 }, { "epoch": 0.11, "grad_norm": 1.1213424353628918, "learning_rate": 1.969281657818888e-05, "loss": 0.579, "step": 1376 }, { "epoch": 0.11, "grad_norm": 1.3023375402896349, "learning_rate": 1.9692198272257523e-05, "loss": 0.6318, "step": 1377 }, { "epoch": 0.11, "grad_norm": 1.2886569685708371, "learning_rate": 1.969157935440768e-05, "loss": 0.6205, "step": 1378 }, { "epoch": 0.11, "grad_norm": 1.2081973668037393, "learning_rate": 1.9690959824678436e-05, "loss": 0.6209, "step": 1379 }, { "epoch": 0.11, "grad_norm": 1.4683630413264368, "learning_rate": 1.9690339683108894e-05, "loss": 0.6566, "step": 1380 }, { "epoch": 0.11, "grad_norm": 1.3190817881106844, "learning_rate": 1.9689718929738215e-05, "loss": 0.6452, "step": 1381 }, { "epoch": 0.11, "grad_norm": 1.4568964100642856, "learning_rate": 1.968909756460559e-05, "loss": 0.618, "step": 1382 }, { "epoch": 0.11, "grad_norm": 1.3458943224257083, "learning_rate": 1.968847558775024e-05, "loss": 0.6899, "step": 1383 }, { "epoch": 0.11, "grad_norm": 1.3247938229636835, "learning_rate": 1.9687852999211446e-05, "loss": 0.6129, "step": 1384 }, { "epoch": 0.11, "grad_norm": 1.2653562001308303, "learning_rate": 1.9687229799028506e-05, "loss": 0.6127, "step": 1385 }, { "epoch": 0.11, "grad_norm": 1.3502354805046861, "learning_rate": 1.968660598724077e-05, "loss": 0.5831, "step": 1386 }, { "epoch": 0.11, "grad_norm": 1.3013737334422124, "learning_rate": 1.9685981563887623e-05, "loss": 0.6494, "step": 1387 }, { "epoch": 0.11, "grad_norm": 1.2750670882881496, "learning_rate": 1.9685356529008487e-05, "loss": 0.6792, "step": 1388 }, { "epoch": 0.11, "grad_norm": 1.363217109111308, "learning_rate": 1.9684730882642822e-05, "loss": 0.6512, "step": 1389 }, { "epoch": 0.11, "grad_norm": 1.4343266497672076, "learning_rate": 1.9684104624830128e-05, "loss": 0.6691, "step": 1390 }, { "epoch": 0.11, "grad_norm": 1.427636181002575, "learning_rate": 1.9683477755609946e-05, "loss": 0.6198, "step": 1391 }, { "epoch": 0.11, "grad_norm": 1.3264707248245353, "learning_rate": 1.9682850275021858e-05, "loss": 0.6036, "step": 1392 }, { "epoch": 0.11, "grad_norm": 1.327145114021547, "learning_rate": 1.968222218310547e-05, "loss": 0.6261, "step": 1393 }, { "epoch": 0.11, "grad_norm": 1.4332285735563004, "learning_rate": 1.9681593479900444e-05, "loss": 0.6474, "step": 1394 }, { "epoch": 0.11, "grad_norm": 1.3317978282465646, "learning_rate": 1.9680964165446475e-05, "loss": 0.6205, "step": 1395 }, { "epoch": 0.11, "grad_norm": 1.3282284864610847, "learning_rate": 1.9680334239783286e-05, "loss": 0.6482, "step": 1396 }, { "epoch": 0.11, "grad_norm": 1.2872462105842508, "learning_rate": 1.9679703702950653e-05, "loss": 0.5826, "step": 1397 }, { "epoch": 0.11, "grad_norm": 1.317884074115748, "learning_rate": 1.9679072554988387e-05, "loss": 0.6478, "step": 1398 }, { "epoch": 0.11, "grad_norm": 1.477304100823874, "learning_rate": 1.9678440795936332e-05, "loss": 0.727, "step": 1399 }, { "epoch": 0.11, "grad_norm": 1.2830047686362602, "learning_rate": 1.9677808425834374e-05, "loss": 0.6241, "step": 1400 }, { "epoch": 0.11, "grad_norm": 1.4408850412410226, "learning_rate": 1.967717544472244e-05, "loss": 0.7432, "step": 1401 }, { "epoch": 0.11, "grad_norm": 1.374670162520849, "learning_rate": 1.967654185264049e-05, "loss": 0.7134, "step": 1402 }, { "epoch": 0.11, "grad_norm": 1.4369771255233479, "learning_rate": 1.9675907649628532e-05, "loss": 0.6893, "step": 1403 }, { "epoch": 0.11, "grad_norm": 1.370569938740028, "learning_rate": 1.9675272835726604e-05, "loss": 0.6755, "step": 1404 }, { "epoch": 0.11, "grad_norm": 1.3160468270194257, "learning_rate": 1.967463741097478e-05, "loss": 0.6197, "step": 1405 }, { "epoch": 0.11, "grad_norm": 1.2821117852374126, "learning_rate": 1.9674001375413182e-05, "loss": 0.6588, "step": 1406 }, { "epoch": 0.11, "grad_norm": 1.3724511972029891, "learning_rate": 1.9673364729081968e-05, "loss": 0.6658, "step": 1407 }, { "epoch": 0.11, "grad_norm": 1.3909350703159726, "learning_rate": 1.967272747202133e-05, "loss": 0.6973, "step": 1408 }, { "epoch": 0.11, "grad_norm": 1.4262784744081567, "learning_rate": 1.9672089604271503e-05, "loss": 0.6834, "step": 1409 }, { "epoch": 0.11, "grad_norm": 1.402619358702424, "learning_rate": 1.9671451125872757e-05, "loss": 0.6341, "step": 1410 }, { "epoch": 0.11, "grad_norm": 1.2839877489194766, "learning_rate": 1.9670812036865402e-05, "loss": 0.5866, "step": 1411 }, { "epoch": 0.11, "grad_norm": 1.3311213184900068, "learning_rate": 1.967017233728979e-05, "loss": 0.7219, "step": 1412 }, { "epoch": 0.11, "grad_norm": 1.291405092436173, "learning_rate": 1.9669532027186308e-05, "loss": 0.666, "step": 1413 }, { "epoch": 0.11, "grad_norm": 1.3878211998532612, "learning_rate": 1.966889110659538e-05, "loss": 0.6766, "step": 1414 }, { "epoch": 0.11, "grad_norm": 1.3578105123173922, "learning_rate": 1.9668249575557468e-05, "loss": 0.6407, "step": 1415 }, { "epoch": 0.11, "grad_norm": 1.5231094398376817, "learning_rate": 1.966760743411308e-05, "loss": 0.701, "step": 1416 }, { "epoch": 0.11, "grad_norm": 1.33094488238159, "learning_rate": 1.9666964682302757e-05, "loss": 0.6604, "step": 1417 }, { "epoch": 0.11, "grad_norm": 1.4229809159629738, "learning_rate": 1.9666321320167083e-05, "loss": 0.6916, "step": 1418 }, { "epoch": 0.11, "grad_norm": 1.353501312928819, "learning_rate": 1.9665677347746667e-05, "loss": 0.6485, "step": 1419 }, { "epoch": 0.11, "grad_norm": 1.4050817511317932, "learning_rate": 1.9665032765082174e-05, "loss": 0.6948, "step": 1420 }, { "epoch": 0.11, "grad_norm": 1.3017059338728252, "learning_rate": 1.96643875722143e-05, "loss": 0.6522, "step": 1421 }, { "epoch": 0.11, "grad_norm": 1.4904558609624012, "learning_rate": 1.966374176918377e-05, "loss": 0.666, "step": 1422 }, { "epoch": 0.11, "grad_norm": 1.2718702630182106, "learning_rate": 1.966309535603137e-05, "loss": 0.6637, "step": 1423 }, { "epoch": 0.11, "grad_norm": 1.3190037035857545, "learning_rate": 1.9662448332797905e-05, "loss": 0.6748, "step": 1424 }, { "epoch": 0.11, "grad_norm": 1.2997543818184787, "learning_rate": 1.9661800699524218e-05, "loss": 0.6677, "step": 1425 }, { "epoch": 0.11, "grad_norm": 1.263777243512033, "learning_rate": 1.966115245625121e-05, "loss": 0.6109, "step": 1426 }, { "epoch": 0.11, "grad_norm": 1.397406810935863, "learning_rate": 1.96605036030198e-05, "loss": 0.6861, "step": 1427 }, { "epoch": 0.11, "grad_norm": 1.34304680278459, "learning_rate": 1.9659854139870963e-05, "loss": 0.6652, "step": 1428 }, { "epoch": 0.11, "grad_norm": 1.3773783575027967, "learning_rate": 1.965920406684569e-05, "loss": 0.6398, "step": 1429 }, { "epoch": 0.11, "grad_norm": 1.2313327722756415, "learning_rate": 1.965855338398503e-05, "loss": 0.5613, "step": 1430 }, { "epoch": 0.11, "grad_norm": 1.2920659996394597, "learning_rate": 1.965790209133006e-05, "loss": 0.5961, "step": 1431 }, { "epoch": 0.11, "grad_norm": 1.3497886704234359, "learning_rate": 1.9657250188921906e-05, "loss": 0.6261, "step": 1432 }, { "epoch": 0.11, "grad_norm": 1.2567181772877916, "learning_rate": 1.9656597676801726e-05, "loss": 0.5708, "step": 1433 }, { "epoch": 0.11, "grad_norm": 1.28523026922812, "learning_rate": 1.9655944555010705e-05, "loss": 0.6334, "step": 1434 }, { "epoch": 0.11, "grad_norm": 1.3842284717039726, "learning_rate": 1.9655290823590095e-05, "loss": 0.6412, "step": 1435 }, { "epoch": 0.11, "grad_norm": 1.5027082154882883, "learning_rate": 1.9654636482581157e-05, "loss": 0.6968, "step": 1436 }, { "epoch": 0.11, "grad_norm": 1.2456872450401624, "learning_rate": 1.9653981532025206e-05, "loss": 0.6489, "step": 1437 }, { "epoch": 0.11, "grad_norm": 1.4398361793687466, "learning_rate": 1.9653325971963594e-05, "loss": 0.6881, "step": 1438 }, { "epoch": 0.11, "grad_norm": 1.363192448825361, "learning_rate": 1.9652669802437704e-05, "loss": 0.6535, "step": 1439 }, { "epoch": 0.11, "grad_norm": 1.5079080818201034, "learning_rate": 1.9652013023488972e-05, "loss": 0.7191, "step": 1440 }, { "epoch": 0.11, "grad_norm": 1.3701218423472024, "learning_rate": 1.9651355635158863e-05, "loss": 0.6231, "step": 1441 }, { "epoch": 0.11, "grad_norm": 1.4454980679541667, "learning_rate": 1.965069763748887e-05, "loss": 0.7062, "step": 1442 }, { "epoch": 0.11, "grad_norm": 1.4891703995862617, "learning_rate": 1.9650039030520552e-05, "loss": 0.6624, "step": 1443 }, { "epoch": 0.11, "grad_norm": 1.3884134172801812, "learning_rate": 1.964937981429548e-05, "loss": 0.6836, "step": 1444 }, { "epoch": 0.11, "grad_norm": 1.370083820048184, "learning_rate": 1.9648719988855278e-05, "loss": 0.6647, "step": 1445 }, { "epoch": 0.11, "grad_norm": 1.4064895002079651, "learning_rate": 1.9648059554241596e-05, "loss": 0.6579, "step": 1446 }, { "epoch": 0.11, "grad_norm": 1.3813042470368648, "learning_rate": 1.9647398510496144e-05, "loss": 0.6635, "step": 1447 }, { "epoch": 0.11, "grad_norm": 1.5169813241934325, "learning_rate": 1.9646736857660644e-05, "loss": 0.6605, "step": 1448 }, { "epoch": 0.11, "grad_norm": 1.326361663676536, "learning_rate": 1.964607459577688e-05, "loss": 0.6462, "step": 1449 }, { "epoch": 0.11, "grad_norm": 1.4212967252138153, "learning_rate": 1.964541172488666e-05, "loss": 0.6866, "step": 1450 }, { "epoch": 0.11, "grad_norm": 1.4149328261697203, "learning_rate": 1.964474824503183e-05, "loss": 0.6247, "step": 1451 }, { "epoch": 0.11, "grad_norm": 1.3792144171678165, "learning_rate": 1.9644084156254285e-05, "loss": 0.5731, "step": 1452 }, { "epoch": 0.11, "grad_norm": 1.379319774661222, "learning_rate": 1.9643419458595948e-05, "loss": 0.6721, "step": 1453 }, { "epoch": 0.11, "grad_norm": 1.2561116061677613, "learning_rate": 1.964275415209879e-05, "loss": 0.6117, "step": 1454 }, { "epoch": 0.11, "grad_norm": 1.3790784085356942, "learning_rate": 1.964208823680481e-05, "loss": 0.6504, "step": 1455 }, { "epoch": 0.11, "grad_norm": 1.297836544733584, "learning_rate": 1.9641421712756055e-05, "loss": 0.6207, "step": 1456 }, { "epoch": 0.11, "grad_norm": 1.2937489716898936, "learning_rate": 1.9640754579994607e-05, "loss": 0.6751, "step": 1457 }, { "epoch": 0.11, "grad_norm": 1.2338673960324187, "learning_rate": 1.9640086838562577e-05, "loss": 0.6105, "step": 1458 }, { "epoch": 0.11, "grad_norm": 1.3032162074089928, "learning_rate": 1.9639418488502132e-05, "loss": 0.6655, "step": 1459 }, { "epoch": 0.11, "grad_norm": 2.5097029266244295, "learning_rate": 1.9638749529855463e-05, "loss": 0.672, "step": 1460 }, { "epoch": 0.11, "grad_norm": 1.33194649614095, "learning_rate": 1.9638079962664807e-05, "loss": 0.6004, "step": 1461 }, { "epoch": 0.11, "grad_norm": 1.4529644456991018, "learning_rate": 1.963740978697244e-05, "loss": 0.6921, "step": 1462 }, { "epoch": 0.11, "grad_norm": 1.3697615887297365, "learning_rate": 1.9636739002820665e-05, "loss": 0.6528, "step": 1463 }, { "epoch": 0.11, "grad_norm": 1.4559886157513002, "learning_rate": 1.9636067610251842e-05, "loss": 0.7375, "step": 1464 }, { "epoch": 0.11, "grad_norm": 1.4152314639081294, "learning_rate": 1.9635395609308354e-05, "loss": 0.7021, "step": 1465 }, { "epoch": 0.11, "grad_norm": 1.3959513894255413, "learning_rate": 1.963472300003263e-05, "loss": 0.6407, "step": 1466 }, { "epoch": 0.11, "grad_norm": 1.3531217456100126, "learning_rate": 1.9634049782467132e-05, "loss": 0.6784, "step": 1467 }, { "epoch": 0.11, "grad_norm": 1.3059475233684217, "learning_rate": 1.9633375956654368e-05, "loss": 0.6873, "step": 1468 }, { "epoch": 0.11, "grad_norm": 1.3286261566671012, "learning_rate": 1.9632701522636878e-05, "loss": 0.6732, "step": 1469 }, { "epoch": 0.11, "grad_norm": 1.3085873276282465, "learning_rate": 1.9632026480457243e-05, "loss": 0.6367, "step": 1470 }, { "epoch": 0.11, "grad_norm": 1.2821963002337313, "learning_rate": 1.963135083015808e-05, "loss": 0.6455, "step": 1471 }, { "epoch": 0.11, "grad_norm": 1.4285298647964901, "learning_rate": 1.963067457178205e-05, "loss": 0.7056, "step": 1472 }, { "epoch": 0.11, "grad_norm": 1.3905576625336742, "learning_rate": 1.9629997705371845e-05, "loss": 0.5912, "step": 1473 }, { "epoch": 0.11, "grad_norm": 1.318258052747074, "learning_rate": 1.96293202309702e-05, "loss": 0.7171, "step": 1474 }, { "epoch": 0.11, "grad_norm": 1.41699597327307, "learning_rate": 1.962864214861989e-05, "loss": 0.6417, "step": 1475 }, { "epoch": 0.11, "grad_norm": 1.2461805163941169, "learning_rate": 1.9627963458363722e-05, "loss": 0.5996, "step": 1476 }, { "epoch": 0.11, "grad_norm": 1.3295378855213416, "learning_rate": 1.962728416024455e-05, "loss": 0.6438, "step": 1477 }, { "epoch": 0.11, "grad_norm": 1.3806184507243149, "learning_rate": 1.9626604254305258e-05, "loss": 0.6045, "step": 1478 }, { "epoch": 0.11, "grad_norm": 1.4109708171641018, "learning_rate": 1.9625923740588767e-05, "loss": 0.6211, "step": 1479 }, { "epoch": 0.11, "grad_norm": 1.4741973503431585, "learning_rate": 1.9625242619138053e-05, "loss": 0.7101, "step": 1480 }, { "epoch": 0.11, "grad_norm": 1.5061128356021916, "learning_rate": 1.962456088999611e-05, "loss": 0.642, "step": 1481 }, { "epoch": 0.11, "grad_norm": 1.3892191520727235, "learning_rate": 1.9623878553205986e-05, "loss": 0.6583, "step": 1482 }, { "epoch": 0.12, "grad_norm": 1.2544597224204415, "learning_rate": 1.9623195608810753e-05, "loss": 0.6542, "step": 1483 }, { "epoch": 0.12, "grad_norm": 1.4103987213225255, "learning_rate": 1.9622512056853533e-05, "loss": 0.6287, "step": 1484 }, { "epoch": 0.12, "grad_norm": 1.2931652092708912, "learning_rate": 1.962182789737748e-05, "loss": 0.6521, "step": 1485 }, { "epoch": 0.12, "grad_norm": 1.405806535097205, "learning_rate": 1.962114313042579e-05, "loss": 0.7069, "step": 1486 }, { "epoch": 0.12, "grad_norm": 1.3555031807783708, "learning_rate": 1.9620457756041695e-05, "loss": 0.6509, "step": 1487 }, { "epoch": 0.12, "grad_norm": 1.2078097897359783, "learning_rate": 1.961977177426847e-05, "loss": 0.6376, "step": 1488 }, { "epoch": 0.12, "grad_norm": 1.3442926420117547, "learning_rate": 1.9619085185149417e-05, "loss": 0.6412, "step": 1489 }, { "epoch": 0.12, "grad_norm": 1.514891614152846, "learning_rate": 1.9618397988727892e-05, "loss": 0.7223, "step": 1490 }, { "epoch": 0.12, "grad_norm": 1.2858533736715312, "learning_rate": 1.9617710185047274e-05, "loss": 0.66, "step": 1491 }, { "epoch": 0.12, "grad_norm": 1.4054810859007343, "learning_rate": 1.9617021774150996e-05, "loss": 0.7262, "step": 1492 }, { "epoch": 0.12, "grad_norm": 1.4371497515361626, "learning_rate": 1.961633275608251e-05, "loss": 0.7114, "step": 1493 }, { "epoch": 0.12, "grad_norm": 1.2273913456206889, "learning_rate": 1.9615643130885327e-05, "loss": 0.6181, "step": 1494 }, { "epoch": 0.12, "grad_norm": 1.267532089173306, "learning_rate": 1.9614952898602983e-05, "loss": 0.6302, "step": 1495 }, { "epoch": 0.12, "grad_norm": 1.3370799929271104, "learning_rate": 1.9614262059279054e-05, "loss": 0.6239, "step": 1496 }, { "epoch": 0.12, "grad_norm": 1.4962116722864225, "learning_rate": 1.9613570612957164e-05, "loss": 0.686, "step": 1497 }, { "epoch": 0.12, "grad_norm": 1.2704701399733982, "learning_rate": 1.9612878559680953e-05, "loss": 0.6545, "step": 1498 }, { "epoch": 0.12, "grad_norm": 1.1557092690986264, "learning_rate": 1.9612185899494127e-05, "loss": 0.6234, "step": 1499 }, { "epoch": 0.12, "grad_norm": 1.4162674883766881, "learning_rate": 1.9611492632440415e-05, "loss": 0.6503, "step": 1500 }, { "epoch": 0.12, "grad_norm": 1.4207065518253712, "learning_rate": 1.961079875856358e-05, "loss": 0.69, "step": 1501 }, { "epoch": 0.12, "grad_norm": 1.2116551395446886, "learning_rate": 1.961010427790744e-05, "loss": 0.6274, "step": 1502 }, { "epoch": 0.12, "grad_norm": 1.3271790670042098, "learning_rate": 1.960940919051583e-05, "loss": 0.6931, "step": 1503 }, { "epoch": 0.12, "grad_norm": 1.4489016121632021, "learning_rate": 1.9608713496432637e-05, "loss": 0.672, "step": 1504 }, { "epoch": 0.12, "grad_norm": 1.43767007568, "learning_rate": 1.9608017195701795e-05, "loss": 0.6606, "step": 1505 }, { "epoch": 0.12, "grad_norm": 1.3455544709371796, "learning_rate": 1.960732028836725e-05, "loss": 0.6673, "step": 1506 }, { "epoch": 0.12, "grad_norm": 1.3500249418850947, "learning_rate": 1.960662277447301e-05, "loss": 0.6585, "step": 1507 }, { "epoch": 0.12, "grad_norm": 1.4347901717905132, "learning_rate": 1.960592465406311e-05, "loss": 0.7402, "step": 1508 }, { "epoch": 0.12, "grad_norm": 1.393893953766652, "learning_rate": 1.960522592718163e-05, "loss": 0.6408, "step": 1509 }, { "epoch": 0.12, "grad_norm": 1.0830572889221242, "learning_rate": 1.9604526593872678e-05, "loss": 0.5447, "step": 1510 }, { "epoch": 0.12, "grad_norm": 1.2706019654745004, "learning_rate": 1.960382665418041e-05, "loss": 0.6652, "step": 1511 }, { "epoch": 0.12, "grad_norm": 1.2725235169041942, "learning_rate": 1.9603126108149014e-05, "loss": 0.6438, "step": 1512 }, { "epoch": 0.12, "grad_norm": 1.2505317987260216, "learning_rate": 1.9602424955822725e-05, "loss": 0.5812, "step": 1513 }, { "epoch": 0.12, "grad_norm": 1.2459194814863555, "learning_rate": 1.9601723197245803e-05, "loss": 0.6672, "step": 1514 }, { "epoch": 0.12, "grad_norm": 1.3984968716256014, "learning_rate": 1.960102083246256e-05, "loss": 0.7122, "step": 1515 }, { "epoch": 0.12, "grad_norm": 1.450813505916241, "learning_rate": 1.9600317861517335e-05, "loss": 0.6872, "step": 1516 }, { "epoch": 0.12, "grad_norm": 1.3646439351998105, "learning_rate": 1.959961428445451e-05, "loss": 0.6125, "step": 1517 }, { "epoch": 0.12, "grad_norm": 1.2954163678793544, "learning_rate": 1.9598910101318514e-05, "loss": 0.5886, "step": 1518 }, { "epoch": 0.12, "grad_norm": 1.25640629417667, "learning_rate": 1.9598205312153795e-05, "loss": 0.6207, "step": 1519 }, { "epoch": 0.12, "grad_norm": 1.4745581207462595, "learning_rate": 1.9597499917004856e-05, "loss": 0.7358, "step": 1520 }, { "epoch": 0.12, "grad_norm": 1.319866172696702, "learning_rate": 1.9596793915916227e-05, "loss": 0.6598, "step": 1521 }, { "epoch": 0.12, "grad_norm": 1.3102367051135526, "learning_rate": 1.959608730893249e-05, "loss": 0.6806, "step": 1522 }, { "epoch": 0.12, "grad_norm": 1.2986111796759294, "learning_rate": 1.959538009609825e-05, "loss": 0.6256, "step": 1523 }, { "epoch": 0.12, "grad_norm": 1.312147865287336, "learning_rate": 1.959467227745816e-05, "loss": 0.6063, "step": 1524 }, { "epoch": 0.12, "grad_norm": 1.2882306738480376, "learning_rate": 1.9593963853056905e-05, "loss": 0.6839, "step": 1525 }, { "epoch": 0.12, "grad_norm": 1.4693887011734195, "learning_rate": 1.9593254822939215e-05, "loss": 0.6933, "step": 1526 }, { "epoch": 0.12, "grad_norm": 1.400339846574915, "learning_rate": 1.9592545187149855e-05, "loss": 0.6879, "step": 1527 }, { "epoch": 0.12, "grad_norm": 1.3867996836940029, "learning_rate": 1.9591834945733625e-05, "loss": 0.6391, "step": 1528 }, { "epoch": 0.12, "grad_norm": 1.4110075264722555, "learning_rate": 1.9591124098735363e-05, "loss": 0.6457, "step": 1529 }, { "epoch": 0.12, "grad_norm": 1.3716469977132435, "learning_rate": 1.959041264619996e-05, "loss": 0.6833, "step": 1530 }, { "epoch": 0.12, "grad_norm": 1.3963892337427741, "learning_rate": 1.958970058817232e-05, "loss": 0.639, "step": 1531 }, { "epoch": 0.12, "grad_norm": 1.5121028126421716, "learning_rate": 1.958898792469741e-05, "loss": 0.7052, "step": 1532 }, { "epoch": 0.12, "grad_norm": 1.5256500609643444, "learning_rate": 1.958827465582022e-05, "loss": 0.7058, "step": 1533 }, { "epoch": 0.12, "grad_norm": 1.4835974815285566, "learning_rate": 1.958756078158578e-05, "loss": 0.7067, "step": 1534 }, { "epoch": 0.12, "grad_norm": 1.349144111517021, "learning_rate": 1.958684630203916e-05, "loss": 0.624, "step": 1535 }, { "epoch": 0.12, "grad_norm": 1.4271914680146813, "learning_rate": 1.958613121722548e-05, "loss": 0.663, "step": 1536 }, { "epoch": 0.12, "grad_norm": 1.3336491757067224, "learning_rate": 1.958541552718987e-05, "loss": 0.6791, "step": 1537 }, { "epoch": 0.12, "grad_norm": 1.5672743525071198, "learning_rate": 1.9584699231977526e-05, "loss": 0.633, "step": 1538 }, { "epoch": 0.12, "grad_norm": 1.2150944392859622, "learning_rate": 1.9583982331633668e-05, "loss": 0.5963, "step": 1539 }, { "epoch": 0.12, "grad_norm": 1.2103470377939822, "learning_rate": 1.9583264826203564e-05, "loss": 0.5991, "step": 1540 }, { "epoch": 0.12, "grad_norm": 1.49303671753505, "learning_rate": 1.9582546715732503e-05, "loss": 0.6851, "step": 1541 }, { "epoch": 0.12, "grad_norm": 1.1446403477460383, "learning_rate": 1.958182800026583e-05, "loss": 0.576, "step": 1542 }, { "epoch": 0.12, "grad_norm": 1.3180844620774201, "learning_rate": 1.9581108679848918e-05, "loss": 0.623, "step": 1543 }, { "epoch": 0.12, "grad_norm": 1.2114147107357678, "learning_rate": 1.9580388754527185e-05, "loss": 0.598, "step": 1544 }, { "epoch": 0.12, "grad_norm": 1.1869258998665362, "learning_rate": 1.957966822434608e-05, "loss": 0.6084, "step": 1545 }, { "epoch": 0.12, "grad_norm": 1.2582476319355758, "learning_rate": 1.9578947089351095e-05, "loss": 0.6042, "step": 1546 }, { "epoch": 0.12, "grad_norm": 1.2811762160478832, "learning_rate": 1.9578225349587763e-05, "loss": 0.5874, "step": 1547 }, { "epoch": 0.12, "grad_norm": 1.3432276398008005, "learning_rate": 1.9577503005101644e-05, "loss": 0.6726, "step": 1548 }, { "epoch": 0.12, "grad_norm": 1.3882739735049308, "learning_rate": 1.9576780055938348e-05, "loss": 0.6434, "step": 1549 }, { "epoch": 0.12, "grad_norm": 1.3250526345792901, "learning_rate": 1.957605650214352e-05, "loss": 0.692, "step": 1550 }, { "epoch": 0.12, "grad_norm": 1.3034620183130825, "learning_rate": 1.9575332343762832e-05, "loss": 0.6672, "step": 1551 }, { "epoch": 0.12, "grad_norm": 1.2811228060972017, "learning_rate": 1.9574607580842018e-05, "loss": 0.6272, "step": 1552 }, { "epoch": 0.12, "grad_norm": 1.3208527672879262, "learning_rate": 1.9573882213426824e-05, "loss": 0.5954, "step": 1553 }, { "epoch": 0.12, "grad_norm": 1.2306002592018632, "learning_rate": 1.9573156241563053e-05, "loss": 0.6036, "step": 1554 }, { "epoch": 0.12, "grad_norm": 1.2603278273282519, "learning_rate": 1.957242966529654e-05, "loss": 0.6039, "step": 1555 }, { "epoch": 0.12, "grad_norm": 1.2573860819884088, "learning_rate": 1.9571702484673153e-05, "loss": 0.6227, "step": 1556 }, { "epoch": 0.12, "grad_norm": 1.2413642602520683, "learning_rate": 1.9570974699738805e-05, "loss": 0.5891, "step": 1557 }, { "epoch": 0.12, "grad_norm": 1.3526493179057293, "learning_rate": 1.9570246310539443e-05, "loss": 0.6823, "step": 1558 }, { "epoch": 0.12, "grad_norm": 1.4106626578777612, "learning_rate": 1.9569517317121058e-05, "loss": 0.6279, "step": 1559 }, { "epoch": 0.12, "grad_norm": 1.233501656533108, "learning_rate": 1.9568787719529673e-05, "loss": 0.6215, "step": 1560 }, { "epoch": 0.12, "grad_norm": 1.4274298347024477, "learning_rate": 1.9568057517811345e-05, "loss": 0.674, "step": 1561 }, { "epoch": 0.12, "grad_norm": 1.2834856485828359, "learning_rate": 1.9567326712012188e-05, "loss": 0.6489, "step": 1562 }, { "epoch": 0.12, "grad_norm": 1.3149857370161504, "learning_rate": 1.956659530217833e-05, "loss": 0.6282, "step": 1563 }, { "epoch": 0.12, "grad_norm": 1.3475865000490472, "learning_rate": 1.9565863288355955e-05, "loss": 0.6411, "step": 1564 }, { "epoch": 0.12, "grad_norm": 1.2486982242821192, "learning_rate": 1.956513067059128e-05, "loss": 0.647, "step": 1565 }, { "epoch": 0.12, "grad_norm": 1.347892629928924, "learning_rate": 1.9564397448930552e-05, "loss": 0.7104, "step": 1566 }, { "epoch": 0.12, "grad_norm": 1.2879110568979681, "learning_rate": 1.9563663623420067e-05, "loss": 0.6506, "step": 1567 }, { "epoch": 0.12, "grad_norm": 1.301153914305594, "learning_rate": 1.956292919410616e-05, "loss": 0.6711, "step": 1568 }, { "epoch": 0.12, "grad_norm": 1.3308354712898856, "learning_rate": 1.9562194161035194e-05, "loss": 0.6517, "step": 1569 }, { "epoch": 0.12, "grad_norm": 1.3553894635408312, "learning_rate": 1.956145852425357e-05, "loss": 0.616, "step": 1570 }, { "epoch": 0.12, "grad_norm": 1.3778268193075993, "learning_rate": 1.9560722283807744e-05, "loss": 0.6846, "step": 1571 }, { "epoch": 0.12, "grad_norm": 1.38099633040828, "learning_rate": 1.955998543974419e-05, "loss": 0.6395, "step": 1572 }, { "epoch": 0.12, "grad_norm": 1.275608762631205, "learning_rate": 1.955924799210944e-05, "loss": 0.5865, "step": 1573 }, { "epoch": 0.12, "grad_norm": 1.2699009283726723, "learning_rate": 1.9558509940950036e-05, "loss": 0.6153, "step": 1574 }, { "epoch": 0.12, "grad_norm": 1.2834906640569594, "learning_rate": 1.955777128631259e-05, "loss": 0.6268, "step": 1575 }, { "epoch": 0.12, "grad_norm": 1.1848884777916726, "learning_rate": 1.9557032028243728e-05, "loss": 0.6072, "step": 1576 }, { "epoch": 0.12, "grad_norm": 1.4470417297399003, "learning_rate": 1.9556292166790128e-05, "loss": 0.6804, "step": 1577 }, { "epoch": 0.12, "grad_norm": 1.3033755896239438, "learning_rate": 1.9555551701998496e-05, "loss": 0.5977, "step": 1578 }, { "epoch": 0.12, "grad_norm": 1.2017973473151253, "learning_rate": 1.9554810633915587e-05, "loss": 0.5935, "step": 1579 }, { "epoch": 0.12, "grad_norm": 1.2440387199024459, "learning_rate": 1.955406896258819e-05, "loss": 0.6438, "step": 1580 }, { "epoch": 0.12, "grad_norm": 1.308410905048871, "learning_rate": 1.955332668806312e-05, "loss": 0.649, "step": 1581 }, { "epoch": 0.12, "grad_norm": 1.4653000591365797, "learning_rate": 1.9552583810387253e-05, "loss": 0.6331, "step": 1582 }, { "epoch": 0.12, "grad_norm": 1.303129076322405, "learning_rate": 1.9551840329607483e-05, "loss": 0.6694, "step": 1583 }, { "epoch": 0.12, "grad_norm": 1.287249822290956, "learning_rate": 1.9551096245770752e-05, "loss": 0.6592, "step": 1584 }, { "epoch": 0.12, "grad_norm": 1.329040167159197, "learning_rate": 1.9550351558924036e-05, "loss": 0.7047, "step": 1585 }, { "epoch": 0.12, "grad_norm": 1.3599395894910644, "learning_rate": 1.9549606269114358e-05, "loss": 0.5875, "step": 1586 }, { "epoch": 0.12, "grad_norm": 1.360770112527667, "learning_rate": 1.9548860376388762e-05, "loss": 0.6684, "step": 1587 }, { "epoch": 0.12, "grad_norm": 1.401916863968186, "learning_rate": 1.9548113880794348e-05, "loss": 0.7019, "step": 1588 }, { "epoch": 0.12, "grad_norm": 1.432876116256592, "learning_rate": 1.954736678237824e-05, "loss": 0.6843, "step": 1589 }, { "epoch": 0.12, "grad_norm": 1.3406597355252898, "learning_rate": 1.954661908118761e-05, "loss": 0.6047, "step": 1590 }, { "epoch": 0.12, "grad_norm": 1.3159802027549536, "learning_rate": 1.9545870777269664e-05, "loss": 0.6297, "step": 1591 }, { "epoch": 0.12, "grad_norm": 1.3958866574174782, "learning_rate": 1.9545121870671642e-05, "loss": 0.6356, "step": 1592 }, { "epoch": 0.12, "grad_norm": 1.3212703413105757, "learning_rate": 1.9544372361440836e-05, "loss": 0.676, "step": 1593 }, { "epoch": 0.12, "grad_norm": 1.4324014055231864, "learning_rate": 1.9543622249624557e-05, "loss": 0.7143, "step": 1594 }, { "epoch": 0.12, "grad_norm": 1.3510361898051917, "learning_rate": 1.9542871535270168e-05, "loss": 0.5936, "step": 1595 }, { "epoch": 0.12, "grad_norm": 1.433319481331774, "learning_rate": 1.9542120218425062e-05, "loss": 0.7274, "step": 1596 }, { "epoch": 0.12, "grad_norm": 1.4207225782495838, "learning_rate": 1.954136829913668e-05, "loss": 0.7697, "step": 1597 }, { "epoch": 0.12, "grad_norm": 1.172925452859515, "learning_rate": 1.954061577745249e-05, "loss": 0.6404, "step": 1598 }, { "epoch": 0.12, "grad_norm": 1.3739292570695558, "learning_rate": 1.9539862653419998e-05, "loss": 0.707, "step": 1599 }, { "epoch": 0.12, "grad_norm": 1.2791762433433942, "learning_rate": 1.9539108927086762e-05, "loss": 0.6349, "step": 1600 }, { "epoch": 0.12, "grad_norm": 1.41234982291409, "learning_rate": 1.9538354598500358e-05, "loss": 0.6279, "step": 1601 }, { "epoch": 0.12, "grad_norm": 1.3472062396164377, "learning_rate": 1.9537599667708423e-05, "loss": 0.6537, "step": 1602 }, { "epoch": 0.12, "grad_norm": 1.3598745020295593, "learning_rate": 1.953684413475861e-05, "loss": 0.6575, "step": 1603 }, { "epoch": 0.12, "grad_norm": 1.3492193030978572, "learning_rate": 1.9536087999698624e-05, "loss": 0.6921, "step": 1604 }, { "epoch": 0.12, "grad_norm": 1.3350143366126004, "learning_rate": 1.9535331262576203e-05, "loss": 0.6351, "step": 1605 }, { "epoch": 0.12, "grad_norm": 1.3007999453189176, "learning_rate": 1.9534573923439124e-05, "loss": 0.6497, "step": 1606 }, { "epoch": 0.12, "grad_norm": 1.4242711261068313, "learning_rate": 1.95338159823352e-05, "loss": 0.6587, "step": 1607 }, { "epoch": 0.12, "grad_norm": 1.3295646942483512, "learning_rate": 1.9533057439312286e-05, "loss": 0.6443, "step": 1608 }, { "epoch": 0.12, "grad_norm": 1.285179253925957, "learning_rate": 1.9532298294418272e-05, "loss": 0.66, "step": 1609 }, { "epoch": 0.12, "grad_norm": 1.4129158133786655, "learning_rate": 1.9531538547701087e-05, "loss": 0.6448, "step": 1610 }, { "epoch": 0.12, "grad_norm": 1.3419376393850617, "learning_rate": 1.9530778199208698e-05, "loss": 0.6247, "step": 1611 }, { "epoch": 0.13, "grad_norm": 1.3834339189536926, "learning_rate": 1.9530017248989107e-05, "loss": 0.6187, "step": 1612 }, { "epoch": 0.13, "grad_norm": 1.4451985082264414, "learning_rate": 1.9529255697090358e-05, "loss": 0.6755, "step": 1613 }, { "epoch": 0.13, "grad_norm": 1.349543347655055, "learning_rate": 1.9528493543560533e-05, "loss": 0.6281, "step": 1614 }, { "epoch": 0.13, "grad_norm": 1.3757705696726843, "learning_rate": 1.9527730788447752e-05, "loss": 0.6459, "step": 1615 }, { "epoch": 0.13, "grad_norm": 1.2725048277188138, "learning_rate": 1.952696743180017e-05, "loss": 0.6539, "step": 1616 }, { "epoch": 0.13, "grad_norm": 1.4875827862438296, "learning_rate": 1.952620347366598e-05, "loss": 0.6947, "step": 1617 }, { "epoch": 0.13, "grad_norm": 1.494804284799022, "learning_rate": 1.952543891409341e-05, "loss": 0.6922, "step": 1618 }, { "epoch": 0.13, "grad_norm": 1.3048225835436218, "learning_rate": 1.9524673753130745e-05, "loss": 0.6317, "step": 1619 }, { "epoch": 0.13, "grad_norm": 1.456086780749599, "learning_rate": 1.952390799082628e-05, "loss": 0.717, "step": 1620 }, { "epoch": 0.13, "grad_norm": 1.3076789505198712, "learning_rate": 1.952314162722837e-05, "loss": 0.67, "step": 1621 }, { "epoch": 0.13, "grad_norm": 1.2647147025049956, "learning_rate": 1.9522374662385396e-05, "loss": 0.5876, "step": 1622 }, { "epoch": 0.13, "grad_norm": 1.495839149584217, "learning_rate": 1.952160709634578e-05, "loss": 0.7636, "step": 1623 }, { "epoch": 0.13, "grad_norm": 1.2971599219911996, "learning_rate": 1.952083892915798e-05, "loss": 0.6187, "step": 1624 }, { "epoch": 0.13, "grad_norm": 1.3695968324982255, "learning_rate": 1.95200701608705e-05, "loss": 0.6537, "step": 1625 }, { "epoch": 0.13, "grad_norm": 1.3637421274351307, "learning_rate": 1.9519300791531874e-05, "loss": 0.6864, "step": 1626 }, { "epoch": 0.13, "grad_norm": 1.2084629493560946, "learning_rate": 1.9518530821190675e-05, "loss": 0.6336, "step": 1627 }, { "epoch": 0.13, "grad_norm": 1.3328254099821017, "learning_rate": 1.9517760249895518e-05, "loss": 0.6069, "step": 1628 }, { "epoch": 0.13, "grad_norm": 1.226836107194319, "learning_rate": 1.951698907769505e-05, "loss": 0.6174, "step": 1629 }, { "epoch": 0.13, "grad_norm": 1.2896108674708435, "learning_rate": 1.951621730463796e-05, "loss": 0.6248, "step": 1630 }, { "epoch": 0.13, "grad_norm": 1.2564568648184522, "learning_rate": 1.9515444930772972e-05, "loss": 0.6375, "step": 1631 }, { "epoch": 0.13, "grad_norm": 1.3202292963480275, "learning_rate": 1.9514671956148855e-05, "loss": 0.6724, "step": 1632 }, { "epoch": 0.13, "grad_norm": 1.2474507085507114, "learning_rate": 1.951389838081441e-05, "loss": 0.6047, "step": 1633 }, { "epoch": 0.13, "grad_norm": 1.5826478109888913, "learning_rate": 1.951312420481847e-05, "loss": 0.7121, "step": 1634 }, { "epoch": 0.13, "grad_norm": 1.3152930649662653, "learning_rate": 1.951234942820992e-05, "loss": 0.6369, "step": 1635 }, { "epoch": 0.13, "grad_norm": 1.3218693780441315, "learning_rate": 1.9511574051037672e-05, "loss": 0.6749, "step": 1636 }, { "epoch": 0.13, "grad_norm": 1.2525439125632163, "learning_rate": 1.9510798073350686e-05, "loss": 0.6181, "step": 1637 }, { "epoch": 0.13, "grad_norm": 1.546422487597453, "learning_rate": 1.9510021495197943e-05, "loss": 0.6291, "step": 1638 }, { "epoch": 0.13, "grad_norm": 1.3831334145794631, "learning_rate": 1.950924431662848e-05, "loss": 0.7096, "step": 1639 }, { "epoch": 0.13, "grad_norm": 1.2982931082047735, "learning_rate": 1.9508466537691363e-05, "loss": 0.6685, "step": 1640 }, { "epoch": 0.13, "grad_norm": 1.2439292836006322, "learning_rate": 1.9507688158435693e-05, "loss": 0.6459, "step": 1641 }, { "epoch": 0.13, "grad_norm": 1.218063356690392, "learning_rate": 1.950690917891062e-05, "loss": 0.6122, "step": 1642 }, { "epoch": 0.13, "grad_norm": 1.2613990307746286, "learning_rate": 1.950612959916532e-05, "loss": 0.6282, "step": 1643 }, { "epoch": 0.13, "grad_norm": 1.2630363646925478, "learning_rate": 1.9505349419249015e-05, "loss": 0.6726, "step": 1644 }, { "epoch": 0.13, "grad_norm": 1.4247964880996937, "learning_rate": 1.9504568639210956e-05, "loss": 0.6326, "step": 1645 }, { "epoch": 0.13, "grad_norm": 1.3253855558164094, "learning_rate": 1.9503787259100445e-05, "loss": 0.6852, "step": 1646 }, { "epoch": 0.13, "grad_norm": 1.4069105504279644, "learning_rate": 1.9503005278966808e-05, "loss": 0.6913, "step": 1647 }, { "epoch": 0.13, "grad_norm": 1.3431000579321597, "learning_rate": 1.9502222698859422e-05, "loss": 0.6527, "step": 1648 }, { "epoch": 0.13, "grad_norm": 1.298649320986588, "learning_rate": 1.950143951882769e-05, "loss": 0.6676, "step": 1649 }, { "epoch": 0.13, "grad_norm": 1.2262062964783422, "learning_rate": 1.9500655738921058e-05, "loss": 0.5631, "step": 1650 }, { "epoch": 0.13, "grad_norm": 1.2051690807670927, "learning_rate": 1.9499871359189017e-05, "loss": 0.643, "step": 1651 }, { "epoch": 0.13, "grad_norm": 1.2912285379581527, "learning_rate": 1.949908637968108e-05, "loss": 0.6451, "step": 1652 }, { "epoch": 0.13, "grad_norm": 1.413028106876958, "learning_rate": 1.9498300800446815e-05, "loss": 0.6963, "step": 1653 }, { "epoch": 0.13, "grad_norm": 1.3135889622126984, "learning_rate": 1.9497514621535815e-05, "loss": 0.6241, "step": 1654 }, { "epoch": 0.13, "grad_norm": 1.2908234561438021, "learning_rate": 1.9496727842997713e-05, "loss": 0.6443, "step": 1655 }, { "epoch": 0.13, "grad_norm": 1.2428083487737023, "learning_rate": 1.949594046488219e-05, "loss": 0.6417, "step": 1656 }, { "epoch": 0.13, "grad_norm": 1.1918627255332315, "learning_rate": 1.949515248723895e-05, "loss": 0.5729, "step": 1657 }, { "epoch": 0.13, "grad_norm": 1.3364460295152696, "learning_rate": 1.9494363910117745e-05, "loss": 0.6751, "step": 1658 }, { "epoch": 0.13, "grad_norm": 1.3817888339476168, "learning_rate": 1.9493574733568362e-05, "loss": 0.6299, "step": 1659 }, { "epoch": 0.13, "grad_norm": 1.336331850262456, "learning_rate": 1.9492784957640624e-05, "loss": 0.6401, "step": 1660 }, { "epoch": 0.13, "grad_norm": 1.5394468650506685, "learning_rate": 1.94919945823844e-05, "loss": 0.6854, "step": 1661 }, { "epoch": 0.13, "grad_norm": 1.3283612658213446, "learning_rate": 1.949120360784958e-05, "loss": 0.6765, "step": 1662 }, { "epoch": 0.13, "grad_norm": 1.3156635668395658, "learning_rate": 1.9490412034086112e-05, "loss": 0.6036, "step": 1663 }, { "epoch": 0.13, "grad_norm": 1.352655310747795, "learning_rate": 1.9489619861143966e-05, "loss": 0.6887, "step": 1664 }, { "epoch": 0.13, "grad_norm": 1.3882399690809386, "learning_rate": 1.948882708907316e-05, "loss": 0.6455, "step": 1665 }, { "epoch": 0.13, "grad_norm": 1.4230400594191541, "learning_rate": 1.9488033717923744e-05, "loss": 0.6678, "step": 1666 }, { "epoch": 0.13, "grad_norm": 1.4792071681678847, "learning_rate": 1.9487239747745807e-05, "loss": 0.6669, "step": 1667 }, { "epoch": 0.13, "grad_norm": 1.0823281772440976, "learning_rate": 1.9486445178589477e-05, "loss": 0.5852, "step": 1668 }, { "epoch": 0.13, "grad_norm": 1.30532944470769, "learning_rate": 1.9485650010504918e-05, "loss": 0.6312, "step": 1669 }, { "epoch": 0.13, "grad_norm": 1.207124132452917, "learning_rate": 1.9484854243542336e-05, "loss": 0.5761, "step": 1670 }, { "epoch": 0.13, "grad_norm": 1.4307943759977249, "learning_rate": 1.948405787775197e-05, "loss": 0.6544, "step": 1671 }, { "epoch": 0.13, "grad_norm": 1.2773409035560555, "learning_rate": 1.94832609131841e-05, "loss": 0.6917, "step": 1672 }, { "epoch": 0.13, "grad_norm": 1.3656208326332877, "learning_rate": 1.948246334988904e-05, "loss": 0.6261, "step": 1673 }, { "epoch": 0.13, "grad_norm": 1.3661389498937764, "learning_rate": 1.9481665187917147e-05, "loss": 0.6416, "step": 1674 }, { "epoch": 0.13, "grad_norm": 1.3336420248240874, "learning_rate": 1.9480866427318807e-05, "loss": 0.638, "step": 1675 }, { "epoch": 0.13, "grad_norm": 1.3478522116782916, "learning_rate": 1.9480067068144458e-05, "loss": 0.6766, "step": 1676 }, { "epoch": 0.13, "grad_norm": 1.3819711140541093, "learning_rate": 1.9479267110444566e-05, "loss": 0.6582, "step": 1677 }, { "epoch": 0.13, "grad_norm": 1.3340384834006052, "learning_rate": 1.9478466554269633e-05, "loss": 0.5807, "step": 1678 }, { "epoch": 0.13, "grad_norm": 1.2321214506212914, "learning_rate": 1.94776653996702e-05, "loss": 0.6527, "step": 1679 }, { "epoch": 0.13, "grad_norm": 1.42304382910286, "learning_rate": 1.947686364669686e-05, "loss": 0.7393, "step": 1680 }, { "epoch": 0.13, "grad_norm": 1.351307883562319, "learning_rate": 1.9476061295400218e-05, "loss": 0.6712, "step": 1681 }, { "epoch": 0.13, "grad_norm": 1.2371071625458785, "learning_rate": 1.947525834583094e-05, "loss": 0.6414, "step": 1682 }, { "epoch": 0.13, "grad_norm": 1.3435047724319267, "learning_rate": 1.9474454798039713e-05, "loss": 0.6386, "step": 1683 }, { "epoch": 0.13, "grad_norm": 1.4039604622335615, "learning_rate": 1.9473650652077275e-05, "loss": 0.6874, "step": 1684 }, { "epoch": 0.13, "grad_norm": 1.3784077205135106, "learning_rate": 1.9472845907994397e-05, "loss": 0.6332, "step": 1685 }, { "epoch": 0.13, "grad_norm": 1.2000282860442137, "learning_rate": 1.9472040565841877e-05, "loss": 0.5581, "step": 1686 }, { "epoch": 0.13, "grad_norm": 1.4381181797494187, "learning_rate": 1.947123462567057e-05, "loss": 0.6511, "step": 1687 }, { "epoch": 0.13, "grad_norm": 1.412294747622676, "learning_rate": 1.9470428087531354e-05, "loss": 0.6266, "step": 1688 }, { "epoch": 0.13, "grad_norm": 1.3290537559998392, "learning_rate": 1.9469620951475154e-05, "loss": 0.6643, "step": 1689 }, { "epoch": 0.13, "grad_norm": 1.2301033567407134, "learning_rate": 1.9468813217552926e-05, "loss": 0.6454, "step": 1690 }, { "epoch": 0.13, "grad_norm": 1.2913551518923005, "learning_rate": 1.9468004885815667e-05, "loss": 0.6124, "step": 1691 }, { "epoch": 0.13, "grad_norm": 1.2024786563018028, "learning_rate": 1.946719595631441e-05, "loss": 0.5772, "step": 1692 }, { "epoch": 0.13, "grad_norm": 1.4841037703217972, "learning_rate": 1.946638642910023e-05, "loss": 0.6568, "step": 1693 }, { "epoch": 0.13, "grad_norm": 1.3363512971057134, "learning_rate": 1.9465576304224233e-05, "loss": 0.6735, "step": 1694 }, { "epoch": 0.13, "grad_norm": 1.3300614545004312, "learning_rate": 1.9464765581737567e-05, "loss": 0.6477, "step": 1695 }, { "epoch": 0.13, "grad_norm": 1.3342649910313202, "learning_rate": 1.946395426169142e-05, "loss": 0.6188, "step": 1696 }, { "epoch": 0.13, "grad_norm": 1.356269709711965, "learning_rate": 1.9463142344137013e-05, "loss": 0.6254, "step": 1697 }, { "epoch": 0.13, "grad_norm": 1.2737688908846687, "learning_rate": 1.9462329829125604e-05, "loss": 0.6352, "step": 1698 }, { "epoch": 0.13, "grad_norm": 1.2928672819861524, "learning_rate": 1.9461516716708496e-05, "loss": 0.6746, "step": 1699 }, { "epoch": 0.13, "grad_norm": 1.2462527852232945, "learning_rate": 1.9460703006937023e-05, "loss": 0.6072, "step": 1700 }, { "epoch": 0.13, "grad_norm": 1.3600213717436305, "learning_rate": 1.9459888699862555e-05, "loss": 0.6699, "step": 1701 }, { "epoch": 0.13, "grad_norm": 1.304672241121708, "learning_rate": 1.9459073795536513e-05, "loss": 0.6614, "step": 1702 }, { "epoch": 0.13, "grad_norm": 1.260479344892915, "learning_rate": 1.9458258294010335e-05, "loss": 0.5953, "step": 1703 }, { "epoch": 0.13, "grad_norm": 1.4027928135425936, "learning_rate": 1.9457442195335514e-05, "loss": 0.6853, "step": 1704 }, { "epoch": 0.13, "grad_norm": 1.243047116900357, "learning_rate": 1.945662549956357e-05, "loss": 0.6072, "step": 1705 }, { "epoch": 0.13, "grad_norm": 1.2468337011548778, "learning_rate": 1.945580820674607e-05, "loss": 0.6176, "step": 1706 }, { "epoch": 0.13, "grad_norm": 1.4600280542160484, "learning_rate": 1.9454990316934618e-05, "loss": 0.6828, "step": 1707 }, { "epoch": 0.13, "grad_norm": 1.4361574288889705, "learning_rate": 1.9454171830180842e-05, "loss": 0.6607, "step": 1708 }, { "epoch": 0.13, "grad_norm": 1.2911049123754939, "learning_rate": 1.945335274653642e-05, "loss": 0.656, "step": 1709 }, { "epoch": 0.13, "grad_norm": 1.220834953819128, "learning_rate": 1.9452533066053067e-05, "loss": 0.5929, "step": 1710 }, { "epoch": 0.13, "grad_norm": 1.2237992474131756, "learning_rate": 1.9451712788782534e-05, "loss": 0.5865, "step": 1711 }, { "epoch": 0.13, "grad_norm": 1.386744969079819, "learning_rate": 1.9450891914776605e-05, "loss": 0.6868, "step": 1712 }, { "epoch": 0.13, "grad_norm": 1.2609752436664927, "learning_rate": 1.9450070444087113e-05, "loss": 0.6056, "step": 1713 }, { "epoch": 0.13, "grad_norm": 1.2892210429403175, "learning_rate": 1.9449248376765918e-05, "loss": 0.5985, "step": 1714 }, { "epoch": 0.13, "grad_norm": 1.4446119930628774, "learning_rate": 1.9448425712864917e-05, "loss": 0.6983, "step": 1715 }, { "epoch": 0.13, "grad_norm": 1.4259015594306956, "learning_rate": 1.9447602452436058e-05, "loss": 0.6322, "step": 1716 }, { "epoch": 0.13, "grad_norm": 1.226230503514461, "learning_rate": 1.944677859553131e-05, "loss": 0.6451, "step": 1717 }, { "epoch": 0.13, "grad_norm": 1.283456066174709, "learning_rate": 1.944595414220269e-05, "loss": 0.6641, "step": 1718 }, { "epoch": 0.13, "grad_norm": 1.3642209811962993, "learning_rate": 1.9445129092502253e-05, "loss": 0.6395, "step": 1719 }, { "epoch": 0.13, "grad_norm": 1.245708441879969, "learning_rate": 1.9444303446482086e-05, "loss": 0.6451, "step": 1720 }, { "epoch": 0.13, "grad_norm": 1.3245842353327342, "learning_rate": 1.9443477204194315e-05, "loss": 0.6655, "step": 1721 }, { "epoch": 0.13, "grad_norm": 1.2715552056854047, "learning_rate": 1.9442650365691102e-05, "loss": 0.6386, "step": 1722 }, { "epoch": 0.13, "grad_norm": 1.3462189459517546, "learning_rate": 1.944182293102466e-05, "loss": 0.6391, "step": 1723 }, { "epoch": 0.13, "grad_norm": 1.2734904366004214, "learning_rate": 1.944099490024722e-05, "loss": 0.6149, "step": 1724 }, { "epoch": 0.13, "grad_norm": 1.3659556043232803, "learning_rate": 1.9440166273411062e-05, "loss": 0.6347, "step": 1725 }, { "epoch": 0.13, "grad_norm": 1.2292438525949423, "learning_rate": 1.9439337050568504e-05, "loss": 0.5649, "step": 1726 }, { "epoch": 0.13, "grad_norm": 1.2587119257149153, "learning_rate": 1.9438507231771897e-05, "loss": 0.6533, "step": 1727 }, { "epoch": 0.13, "grad_norm": 1.3552559457439861, "learning_rate": 1.9437676817073635e-05, "loss": 0.6966, "step": 1728 }, { "epoch": 0.13, "grad_norm": 1.317781448848758, "learning_rate": 1.9436845806526137e-05, "loss": 0.6053, "step": 1729 }, { "epoch": 0.13, "grad_norm": 1.218933091715898, "learning_rate": 1.943601420018188e-05, "loss": 0.6241, "step": 1730 }, { "epoch": 0.13, "grad_norm": 1.2819330674164047, "learning_rate": 1.9435181998093363e-05, "loss": 0.5859, "step": 1731 }, { "epoch": 0.13, "grad_norm": 1.2979205868796004, "learning_rate": 1.9434349200313126e-05, "loss": 0.6189, "step": 1732 }, { "epoch": 0.13, "grad_norm": 1.2631881236027205, "learning_rate": 1.9433515806893754e-05, "loss": 0.6205, "step": 1733 }, { "epoch": 0.13, "grad_norm": 1.211010789191571, "learning_rate": 1.9432681817887854e-05, "loss": 0.6586, "step": 1734 }, { "epoch": 0.13, "grad_norm": 1.3550807166603973, "learning_rate": 1.943184723334808e-05, "loss": 0.6747, "step": 1735 }, { "epoch": 0.13, "grad_norm": 1.325161803460716, "learning_rate": 1.9431012053327135e-05, "loss": 0.6424, "step": 1736 }, { "epoch": 0.13, "grad_norm": 1.2578420694796821, "learning_rate": 1.943017627787774e-05, "loss": 0.6117, "step": 1737 }, { "epoch": 0.13, "grad_norm": 1.2846426821764276, "learning_rate": 1.9429339907052666e-05, "loss": 0.6048, "step": 1738 }, { "epoch": 0.13, "grad_norm": 1.3351920652636022, "learning_rate": 1.942850294090471e-05, "loss": 0.6545, "step": 1739 }, { "epoch": 0.13, "grad_norm": 1.2275681473253612, "learning_rate": 1.942766537948672e-05, "loss": 0.5942, "step": 1740 }, { "epoch": 0.14, "grad_norm": 1.448747913138571, "learning_rate": 1.9426827222851576e-05, "loss": 0.6672, "step": 1741 }, { "epoch": 0.14, "grad_norm": 1.3551998256423143, "learning_rate": 1.942598847105219e-05, "loss": 0.7107, "step": 1742 }, { "epoch": 0.14, "grad_norm": 1.2442038144244327, "learning_rate": 1.9425149124141524e-05, "loss": 0.584, "step": 1743 }, { "epoch": 0.14, "grad_norm": 1.4410549655837623, "learning_rate": 1.9424309182172563e-05, "loss": 0.6376, "step": 1744 }, { "epoch": 0.14, "grad_norm": 1.3702917116995883, "learning_rate": 1.9423468645198342e-05, "loss": 0.6489, "step": 1745 }, { "epoch": 0.14, "grad_norm": 1.3973419757274244, "learning_rate": 1.9422627513271927e-05, "loss": 0.6646, "step": 1746 }, { "epoch": 0.14, "grad_norm": 1.2005454313967445, "learning_rate": 1.9421785786446422e-05, "loss": 0.6064, "step": 1747 }, { "epoch": 0.14, "grad_norm": 1.4382653065076993, "learning_rate": 1.942094346477497e-05, "loss": 0.633, "step": 1748 }, { "epoch": 0.14, "grad_norm": 1.2985161657638071, "learning_rate": 1.9420100548310753e-05, "loss": 0.5816, "step": 1749 }, { "epoch": 0.14, "grad_norm": 1.2276991418873875, "learning_rate": 1.9419257037106983e-05, "loss": 0.6184, "step": 1750 }, { "epoch": 0.14, "grad_norm": 1.321233168813278, "learning_rate": 1.9418412931216926e-05, "loss": 0.6215, "step": 1751 }, { "epoch": 0.14, "grad_norm": 1.2310511104066357, "learning_rate": 1.9417568230693863e-05, "loss": 0.6148, "step": 1752 }, { "epoch": 0.14, "grad_norm": 1.201918951174184, "learning_rate": 1.941672293559113e-05, "loss": 0.6409, "step": 1753 }, { "epoch": 0.14, "grad_norm": 1.2820192679357278, "learning_rate": 1.9415877045962093e-05, "loss": 0.6141, "step": 1754 }, { "epoch": 0.14, "grad_norm": 1.4418860983287094, "learning_rate": 1.941503056186016e-05, "loss": 0.6538, "step": 1755 }, { "epoch": 0.14, "grad_norm": 1.5504665103409105, "learning_rate": 1.9414183483338777e-05, "loss": 0.7011, "step": 1756 }, { "epoch": 0.14, "grad_norm": 1.3230350121055532, "learning_rate": 1.9413335810451412e-05, "loss": 0.6637, "step": 1757 }, { "epoch": 0.14, "grad_norm": 1.2218971211413936, "learning_rate": 1.9412487543251596e-05, "loss": 0.701, "step": 1758 }, { "epoch": 0.14, "grad_norm": 1.2913811378322062, "learning_rate": 1.941163868179288e-05, "loss": 0.6714, "step": 1759 }, { "epoch": 0.14, "grad_norm": 1.2373744399981528, "learning_rate": 1.9410789226128853e-05, "loss": 0.6462, "step": 1760 }, { "epoch": 0.14, "grad_norm": 1.385578385095075, "learning_rate": 1.940993917631315e-05, "loss": 0.6733, "step": 1761 }, { "epoch": 0.14, "grad_norm": 1.385045420111922, "learning_rate": 1.9409088532399436e-05, "loss": 0.6381, "step": 1762 }, { "epoch": 0.14, "grad_norm": 1.2244025486197705, "learning_rate": 1.9408237294441422e-05, "loss": 0.6123, "step": 1763 }, { "epoch": 0.14, "grad_norm": 1.3612472969918086, "learning_rate": 1.9407385462492846e-05, "loss": 0.6183, "step": 1764 }, { "epoch": 0.14, "grad_norm": 1.3186129403867841, "learning_rate": 1.9406533036607488e-05, "loss": 0.6606, "step": 1765 }, { "epoch": 0.14, "grad_norm": 1.2598968674065567, "learning_rate": 1.940568001683917e-05, "loss": 0.6348, "step": 1766 }, { "epoch": 0.14, "grad_norm": 1.3952888119385922, "learning_rate": 1.9404826403241746e-05, "loss": 0.6662, "step": 1767 }, { "epoch": 0.14, "grad_norm": 1.206270589306881, "learning_rate": 1.940397219586911e-05, "loss": 0.5745, "step": 1768 }, { "epoch": 0.14, "grad_norm": 1.2370139294474551, "learning_rate": 1.940311739477519e-05, "loss": 0.591, "step": 1769 }, { "epoch": 0.14, "grad_norm": 1.3182917824831164, "learning_rate": 1.940226200001395e-05, "loss": 0.6754, "step": 1770 }, { "epoch": 0.14, "grad_norm": 1.412846585178781, "learning_rate": 1.940140601163941e-05, "loss": 0.6588, "step": 1771 }, { "epoch": 0.14, "grad_norm": 1.3167438470375483, "learning_rate": 1.9400549429705597e-05, "loss": 0.6503, "step": 1772 }, { "epoch": 0.14, "grad_norm": 1.3200466633264807, "learning_rate": 1.93996922542666e-05, "loss": 0.6787, "step": 1773 }, { "epoch": 0.14, "grad_norm": 1.230184564621934, "learning_rate": 1.9398834485376534e-05, "loss": 0.6585, "step": 1774 }, { "epoch": 0.14, "grad_norm": 1.303266882547969, "learning_rate": 1.9397976123089558e-05, "loss": 0.6916, "step": 1775 }, { "epoch": 0.14, "grad_norm": 1.202594541057109, "learning_rate": 1.9397117167459858e-05, "loss": 0.6339, "step": 1776 }, { "epoch": 0.14, "grad_norm": 1.2912817145530682, "learning_rate": 1.939625761854167e-05, "loss": 0.6583, "step": 1777 }, { "epoch": 0.14, "grad_norm": 1.2899321425872985, "learning_rate": 1.9395397476389265e-05, "loss": 0.5833, "step": 1778 }, { "epoch": 0.14, "grad_norm": 1.1868743754260211, "learning_rate": 1.939453674105694e-05, "loss": 0.6209, "step": 1779 }, { "epoch": 0.14, "grad_norm": 1.3231568256167092, "learning_rate": 1.9393675412599037e-05, "loss": 0.6849, "step": 1780 }, { "epoch": 0.14, "grad_norm": 1.2318035333767627, "learning_rate": 1.9392813491069944e-05, "loss": 0.5975, "step": 1781 }, { "epoch": 0.14, "grad_norm": 1.35401319465519, "learning_rate": 1.9391950976524075e-05, "loss": 0.6275, "step": 1782 }, { "epoch": 0.14, "grad_norm": 1.3256753657807507, "learning_rate": 1.9391087869015884e-05, "loss": 0.6601, "step": 1783 }, { "epoch": 0.14, "grad_norm": 1.431278365074837, "learning_rate": 1.9390224168599864e-05, "loss": 0.7256, "step": 1784 }, { "epoch": 0.14, "grad_norm": 1.3296045028657328, "learning_rate": 1.9389359875330548e-05, "loss": 0.6998, "step": 1785 }, { "epoch": 0.14, "grad_norm": 1.3195173589179643, "learning_rate": 1.9388494989262498e-05, "loss": 0.6338, "step": 1786 }, { "epoch": 0.14, "grad_norm": 1.2677220055692577, "learning_rate": 1.9387629510450318e-05, "loss": 0.6223, "step": 1787 }, { "epoch": 0.14, "grad_norm": 1.2939126525211937, "learning_rate": 1.9386763438948656e-05, "loss": 0.6535, "step": 1788 }, { "epoch": 0.14, "grad_norm": 1.379490153281343, "learning_rate": 1.938589677481219e-05, "loss": 0.6291, "step": 1789 }, { "epoch": 0.14, "grad_norm": 1.3741811568261744, "learning_rate": 1.9385029518095635e-05, "loss": 0.6691, "step": 1790 }, { "epoch": 0.14, "grad_norm": 1.2312748282006032, "learning_rate": 1.9384161668853746e-05, "loss": 0.5783, "step": 1791 }, { "epoch": 0.14, "grad_norm": 1.3642264426030555, "learning_rate": 1.9383293227141316e-05, "loss": 0.6339, "step": 1792 }, { "epoch": 0.14, "grad_norm": 1.1622154892630587, "learning_rate": 1.9382424193013172e-05, "loss": 0.5877, "step": 1793 }, { "epoch": 0.14, "grad_norm": 1.3390573134260653, "learning_rate": 1.938155456652418e-05, "loss": 0.627, "step": 1794 }, { "epoch": 0.14, "grad_norm": 1.3374954419637717, "learning_rate": 1.938068434772925e-05, "loss": 0.6592, "step": 1795 }, { "epoch": 0.14, "grad_norm": 1.3315944458845368, "learning_rate": 1.9379813536683314e-05, "loss": 0.709, "step": 1796 }, { "epoch": 0.14, "grad_norm": 1.3022277395916675, "learning_rate": 1.9378942133441357e-05, "loss": 0.683, "step": 1797 }, { "epoch": 0.14, "grad_norm": 1.357890008573226, "learning_rate": 1.9378070138058396e-05, "loss": 0.6939, "step": 1798 }, { "epoch": 0.14, "grad_norm": 1.3082880830330057, "learning_rate": 1.9377197550589484e-05, "loss": 0.6154, "step": 1799 }, { "epoch": 0.14, "grad_norm": 1.3562562617144207, "learning_rate": 1.9376324371089707e-05, "loss": 0.6646, "step": 1800 }, { "epoch": 0.14, "grad_norm": 1.3542960863135862, "learning_rate": 1.93754505996142e-05, "loss": 0.672, "step": 1801 }, { "epoch": 0.14, "grad_norm": 1.2775218501154013, "learning_rate": 1.9374576236218123e-05, "loss": 0.631, "step": 1802 }, { "epoch": 0.14, "grad_norm": 1.1758775639928287, "learning_rate": 1.9373701280956685e-05, "loss": 0.5294, "step": 1803 }, { "epoch": 0.14, "grad_norm": 1.2517750058397301, "learning_rate": 1.937282573388512e-05, "loss": 0.6509, "step": 1804 }, { "epoch": 0.14, "grad_norm": 1.1802088109840623, "learning_rate": 1.937194959505871e-05, "loss": 0.6129, "step": 1805 }, { "epoch": 0.14, "grad_norm": 1.3102995273813116, "learning_rate": 1.937107286453277e-05, "loss": 0.6104, "step": 1806 }, { "epoch": 0.14, "grad_norm": 1.3008657891922533, "learning_rate": 1.9370195542362653e-05, "loss": 0.6734, "step": 1807 }, { "epoch": 0.14, "grad_norm": 1.2025573183860743, "learning_rate": 1.9369317628603744e-05, "loss": 0.599, "step": 1808 }, { "epoch": 0.14, "grad_norm": 1.3371318363907538, "learning_rate": 1.936843912331148e-05, "loss": 0.6318, "step": 1809 }, { "epoch": 0.14, "grad_norm": 1.284488678516641, "learning_rate": 1.936756002654131e-05, "loss": 0.6081, "step": 1810 }, { "epoch": 0.14, "grad_norm": 1.217664577632365, "learning_rate": 1.9366680338348755e-05, "loss": 0.5891, "step": 1811 }, { "epoch": 0.14, "grad_norm": 1.186817876757914, "learning_rate": 1.9365800058789338e-05, "loss": 0.5698, "step": 1812 }, { "epoch": 0.14, "grad_norm": 1.2439156752613698, "learning_rate": 1.9364919187918644e-05, "loss": 0.6373, "step": 1813 }, { "epoch": 0.14, "grad_norm": 1.3006422638744117, "learning_rate": 1.9364037725792287e-05, "loss": 0.6787, "step": 1814 }, { "epoch": 0.14, "grad_norm": 1.2967798588795363, "learning_rate": 1.9363155672465913e-05, "loss": 0.6798, "step": 1815 }, { "epoch": 0.14, "grad_norm": 1.2913093636706705, "learning_rate": 1.9362273027995217e-05, "loss": 0.585, "step": 1816 }, { "epoch": 0.14, "grad_norm": 1.2658277749464204, "learning_rate": 1.9361389792435918e-05, "loss": 0.6483, "step": 1817 }, { "epoch": 0.14, "grad_norm": 1.2531786081924103, "learning_rate": 1.9360505965843784e-05, "loss": 0.6083, "step": 1818 }, { "epoch": 0.14, "grad_norm": 1.1847368517965333, "learning_rate": 1.9359621548274616e-05, "loss": 0.6145, "step": 1819 }, { "epoch": 0.14, "grad_norm": 1.2516421022472517, "learning_rate": 1.935873653978425e-05, "loss": 0.6707, "step": 1820 }, { "epoch": 0.14, "grad_norm": 1.3681852786960234, "learning_rate": 1.935785094042856e-05, "loss": 0.697, "step": 1821 }, { "epoch": 0.14, "grad_norm": 1.351966136130191, "learning_rate": 1.9356964750263464e-05, "loss": 0.633, "step": 1822 }, { "epoch": 0.14, "grad_norm": 1.2821431187296615, "learning_rate": 1.9356077969344904e-05, "loss": 0.6545, "step": 1823 }, { "epoch": 0.14, "grad_norm": 1.31030953499522, "learning_rate": 1.9355190597728874e-05, "loss": 0.6607, "step": 1824 }, { "epoch": 0.14, "grad_norm": 1.2535364669346227, "learning_rate": 1.9354302635471392e-05, "loss": 0.5522, "step": 1825 }, { "epoch": 0.14, "grad_norm": 1.3087551643685043, "learning_rate": 1.9353414082628523e-05, "loss": 0.6121, "step": 1826 }, { "epoch": 0.14, "grad_norm": 1.4113829288775175, "learning_rate": 1.9352524939256367e-05, "loss": 0.637, "step": 1827 }, { "epoch": 0.14, "grad_norm": 1.4263114883849193, "learning_rate": 1.935163520541106e-05, "loss": 0.63, "step": 1828 }, { "epoch": 0.14, "grad_norm": 1.2431996858082133, "learning_rate": 1.935074488114877e-05, "loss": 0.6011, "step": 1829 }, { "epoch": 0.14, "grad_norm": 1.3271925401692968, "learning_rate": 1.9349853966525715e-05, "loss": 0.7236, "step": 1830 }, { "epoch": 0.14, "grad_norm": 1.2365950406265822, "learning_rate": 1.934896246159814e-05, "loss": 0.64, "step": 1831 }, { "epoch": 0.14, "grad_norm": 1.2583187814001624, "learning_rate": 1.9348070366422333e-05, "loss": 0.6623, "step": 1832 }, { "epoch": 0.14, "grad_norm": 1.2761617902410398, "learning_rate": 1.934717768105461e-05, "loss": 0.5737, "step": 1833 }, { "epoch": 0.14, "grad_norm": 1.3179462152803372, "learning_rate": 1.9346284405551333e-05, "loss": 0.6379, "step": 1834 }, { "epoch": 0.14, "grad_norm": 1.2759810712271338, "learning_rate": 1.9345390539968907e-05, "loss": 0.6245, "step": 1835 }, { "epoch": 0.14, "grad_norm": 1.455047975778258, "learning_rate": 1.934449608436376e-05, "loss": 0.6904, "step": 1836 }, { "epoch": 0.14, "grad_norm": 1.3158179533536536, "learning_rate": 1.934360103879236e-05, "loss": 0.6602, "step": 1837 }, { "epoch": 0.14, "grad_norm": 1.1974933152619458, "learning_rate": 1.934270540331122e-05, "loss": 0.6181, "step": 1838 }, { "epoch": 0.14, "grad_norm": 1.404012086134293, "learning_rate": 1.934180917797689e-05, "loss": 0.5962, "step": 1839 }, { "epoch": 0.14, "grad_norm": 1.2609085930556203, "learning_rate": 1.9340912362845946e-05, "loss": 0.613, "step": 1840 }, { "epoch": 0.14, "grad_norm": 1.2277558953841405, "learning_rate": 1.934001495797501e-05, "loss": 0.6423, "step": 1841 }, { "epoch": 0.14, "grad_norm": 1.369381914630601, "learning_rate": 1.9339116963420746e-05, "loss": 0.6398, "step": 1842 }, { "epoch": 0.14, "grad_norm": 1.249633306123298, "learning_rate": 1.9338218379239842e-05, "loss": 0.6147, "step": 1843 }, { "epoch": 0.14, "grad_norm": 1.3382573202992287, "learning_rate": 1.9337319205489035e-05, "loss": 0.6244, "step": 1844 }, { "epoch": 0.14, "grad_norm": 1.1796217135850182, "learning_rate": 1.933641944222509e-05, "loss": 0.6093, "step": 1845 }, { "epoch": 0.14, "grad_norm": 1.260914927378649, "learning_rate": 1.9335519089504816e-05, "loss": 0.5725, "step": 1846 }, { "epoch": 0.14, "grad_norm": 1.4294441724818887, "learning_rate": 1.933461814738506e-05, "loss": 0.7315, "step": 1847 }, { "epoch": 0.14, "grad_norm": 1.607083587461845, "learning_rate": 1.9333716615922696e-05, "loss": 0.6928, "step": 1848 }, { "epoch": 0.14, "grad_norm": 1.2153931874773367, "learning_rate": 1.933281449517465e-05, "loss": 0.5779, "step": 1849 }, { "epoch": 0.14, "grad_norm": 1.3373316578675485, "learning_rate": 1.933191178519787e-05, "loss": 0.7226, "step": 1850 }, { "epoch": 0.14, "grad_norm": 1.370734925878231, "learning_rate": 1.9331008486049355e-05, "loss": 0.684, "step": 1851 }, { "epoch": 0.14, "grad_norm": 1.4186321629729854, "learning_rate": 1.933010459778613e-05, "loss": 0.6748, "step": 1852 }, { "epoch": 0.14, "grad_norm": 1.187836047357897, "learning_rate": 1.9329200120465268e-05, "loss": 0.6663, "step": 1853 }, { "epoch": 0.14, "grad_norm": 1.2605960445076303, "learning_rate": 1.932829505414387e-05, "loss": 0.552, "step": 1854 }, { "epoch": 0.14, "grad_norm": 1.337893476274334, "learning_rate": 1.9327389398879078e-05, "loss": 0.6848, "step": 1855 }, { "epoch": 0.14, "grad_norm": 1.4702630062063125, "learning_rate": 1.9326483154728066e-05, "loss": 0.676, "step": 1856 }, { "epoch": 0.14, "grad_norm": 1.341186162348423, "learning_rate": 1.9325576321748058e-05, "loss": 0.7112, "step": 1857 }, { "epoch": 0.14, "grad_norm": 1.2551070313578359, "learning_rate": 1.9324668899996305e-05, "loss": 0.6449, "step": 1858 }, { "epoch": 0.14, "grad_norm": 1.140458708882373, "learning_rate": 1.932376088953009e-05, "loss": 0.6111, "step": 1859 }, { "epoch": 0.14, "grad_norm": 1.2644659315703413, "learning_rate": 1.932285229040675e-05, "loss": 0.631, "step": 1860 }, { "epoch": 0.14, "grad_norm": 1.348570450011457, "learning_rate": 1.9321943102683642e-05, "loss": 0.6777, "step": 1861 }, { "epoch": 0.14, "grad_norm": 1.4518527131950292, "learning_rate": 1.9321033326418174e-05, "loss": 0.6513, "step": 1862 }, { "epoch": 0.14, "grad_norm": 1.328931451431817, "learning_rate": 1.932012296166778e-05, "loss": 0.6438, "step": 1863 }, { "epoch": 0.14, "grad_norm": 1.4358748495985703, "learning_rate": 1.931921200848994e-05, "loss": 0.6652, "step": 1864 }, { "epoch": 0.14, "grad_norm": 1.3893122531108628, "learning_rate": 1.931830046694216e-05, "loss": 0.6307, "step": 1865 }, { "epoch": 0.14, "grad_norm": 1.244040492652964, "learning_rate": 1.9317388337082002e-05, "loss": 0.5596, "step": 1866 }, { "epoch": 0.14, "grad_norm": 1.2629449513505753, "learning_rate": 1.9316475618967045e-05, "loss": 0.677, "step": 1867 }, { "epoch": 0.14, "grad_norm": 1.2777468077504603, "learning_rate": 1.9315562312654912e-05, "loss": 0.643, "step": 1868 }, { "epoch": 0.14, "grad_norm": 1.381660714514366, "learning_rate": 1.931464841820327e-05, "loss": 0.6704, "step": 1869 }, { "epoch": 0.15, "grad_norm": 1.1949993546137005, "learning_rate": 1.9313733935669817e-05, "loss": 0.5983, "step": 1870 }, { "epoch": 0.15, "grad_norm": 1.3454682653193897, "learning_rate": 1.9312818865112288e-05, "loss": 0.6332, "step": 1871 }, { "epoch": 0.15, "grad_norm": 1.2397029188585333, "learning_rate": 1.9311903206588455e-05, "loss": 0.5938, "step": 1872 }, { "epoch": 0.15, "grad_norm": 1.2834543014255149, "learning_rate": 1.9310986960156132e-05, "loss": 0.5874, "step": 1873 }, { "epoch": 0.15, "grad_norm": 1.257630945217229, "learning_rate": 1.931007012587316e-05, "loss": 0.5996, "step": 1874 }, { "epoch": 0.15, "grad_norm": 1.409777772787156, "learning_rate": 1.930915270379743e-05, "loss": 0.6249, "step": 1875 }, { "epoch": 0.15, "grad_norm": 1.3882650001961634, "learning_rate": 1.930823469398686e-05, "loss": 0.7336, "step": 1876 }, { "epoch": 0.15, "grad_norm": 1.3206648948843052, "learning_rate": 1.9307316096499412e-05, "loss": 0.6249, "step": 1877 }, { "epoch": 0.15, "grad_norm": 1.2694585221806376, "learning_rate": 1.9306396911393076e-05, "loss": 0.5818, "step": 1878 }, { "epoch": 0.15, "grad_norm": 1.3495558467247493, "learning_rate": 1.930547713872589e-05, "loss": 0.6692, "step": 1879 }, { "epoch": 0.15, "grad_norm": 1.1109811143857988, "learning_rate": 1.930455677855592e-05, "loss": 0.6271, "step": 1880 }, { "epoch": 0.15, "grad_norm": 1.3275951001573685, "learning_rate": 1.930363583094128e-05, "loss": 0.6553, "step": 1881 }, { "epoch": 0.15, "grad_norm": 1.4158037119179134, "learning_rate": 1.9302714295940107e-05, "loss": 0.638, "step": 1882 }, { "epoch": 0.15, "grad_norm": 1.2703026408696196, "learning_rate": 1.9301792173610584e-05, "loss": 0.6525, "step": 1883 }, { "epoch": 0.15, "grad_norm": 1.331144468396001, "learning_rate": 1.930086946401093e-05, "loss": 0.628, "step": 1884 }, { "epoch": 0.15, "grad_norm": 1.1939521977809402, "learning_rate": 1.9299946167199405e-05, "loss": 0.6167, "step": 1885 }, { "epoch": 0.15, "grad_norm": 1.2909777198570933, "learning_rate": 1.9299022283234288e-05, "loss": 0.6617, "step": 1886 }, { "epoch": 0.15, "grad_norm": 1.3197922895029108, "learning_rate": 1.9298097812173926e-05, "loss": 0.6428, "step": 1887 }, { "epoch": 0.15, "grad_norm": 1.254458439536501, "learning_rate": 1.9297172754076677e-05, "loss": 0.5862, "step": 1888 }, { "epoch": 0.15, "grad_norm": 1.264577879833715, "learning_rate": 1.929624710900094e-05, "loss": 0.6829, "step": 1889 }, { "epoch": 0.15, "grad_norm": 1.2316664422327095, "learning_rate": 1.9295320877005163e-05, "loss": 0.62, "step": 1890 }, { "epoch": 0.15, "grad_norm": 1.3698721984088331, "learning_rate": 1.9294394058147822e-05, "loss": 0.6855, "step": 1891 }, { "epoch": 0.15, "grad_norm": 1.2304175956628696, "learning_rate": 1.929346665248743e-05, "loss": 0.5959, "step": 1892 }, { "epoch": 0.15, "grad_norm": 1.3053276638678097, "learning_rate": 1.9292538660082543e-05, "loss": 0.6279, "step": 1893 }, { "epoch": 0.15, "grad_norm": 1.1651293071318936, "learning_rate": 1.9291610080991743e-05, "loss": 0.6432, "step": 1894 }, { "epoch": 0.15, "grad_norm": 1.2475735956473255, "learning_rate": 1.9290680915273662e-05, "loss": 0.6422, "step": 1895 }, { "epoch": 0.15, "grad_norm": 1.279957421682411, "learning_rate": 1.928975116298696e-05, "loss": 0.637, "step": 1896 }, { "epoch": 0.15, "grad_norm": 1.3006048226063103, "learning_rate": 1.928882082419034e-05, "loss": 0.6616, "step": 1897 }, { "epoch": 0.15, "grad_norm": 1.244368791243786, "learning_rate": 1.9287889898942537e-05, "loss": 0.7127, "step": 1898 }, { "epoch": 0.15, "grad_norm": 1.269686363721072, "learning_rate": 1.9286958387302327e-05, "loss": 0.61, "step": 1899 }, { "epoch": 0.15, "grad_norm": 1.3600335115623554, "learning_rate": 1.928602628932852e-05, "loss": 0.6265, "step": 1900 }, { "epoch": 0.15, "grad_norm": 1.2977633363747276, "learning_rate": 1.928509360507996e-05, "loss": 0.6553, "step": 1901 }, { "epoch": 0.15, "grad_norm": 1.243239383258666, "learning_rate": 1.928416033461554e-05, "loss": 0.6289, "step": 1902 }, { "epoch": 0.15, "grad_norm": 1.2940684364328232, "learning_rate": 1.9283226477994173e-05, "loss": 0.5865, "step": 1903 }, { "epoch": 0.15, "grad_norm": 1.2638571366081577, "learning_rate": 1.928229203527483e-05, "loss": 0.6266, "step": 1904 }, { "epoch": 0.15, "grad_norm": 1.2164493756498795, "learning_rate": 1.9281357006516496e-05, "loss": 0.6113, "step": 1905 }, { "epoch": 0.15, "grad_norm": 1.3877431175378758, "learning_rate": 1.928042139177821e-05, "loss": 0.7048, "step": 1906 }, { "epoch": 0.15, "grad_norm": 1.2450651988227195, "learning_rate": 1.9279485191119042e-05, "loss": 0.58, "step": 1907 }, { "epoch": 0.15, "grad_norm": 1.356594574572605, "learning_rate": 1.9278548404598094e-05, "loss": 0.646, "step": 1908 }, { "epoch": 0.15, "grad_norm": 1.3247773559216494, "learning_rate": 1.9277611032274517e-05, "loss": 0.6741, "step": 1909 }, { "epoch": 0.15, "grad_norm": 1.3930549821676028, "learning_rate": 1.927667307420749e-05, "loss": 0.6613, "step": 1910 }, { "epoch": 0.15, "grad_norm": 1.2741597941387712, "learning_rate": 1.927573453045623e-05, "loss": 0.6702, "step": 1911 }, { "epoch": 0.15, "grad_norm": 1.3242502124390838, "learning_rate": 1.9274795401079987e-05, "loss": 0.6474, "step": 1912 }, { "epoch": 0.15, "grad_norm": 1.3227753551232575, "learning_rate": 1.9273855686138065e-05, "loss": 0.6436, "step": 1913 }, { "epoch": 0.15, "grad_norm": 1.2362758149324338, "learning_rate": 1.927291538568979e-05, "loss": 0.6379, "step": 1914 }, { "epoch": 0.15, "grad_norm": 1.2282747114332857, "learning_rate": 1.9271974499794518e-05, "loss": 0.6129, "step": 1915 }, { "epoch": 0.15, "grad_norm": 1.2991242264835519, "learning_rate": 1.927103302851166e-05, "loss": 0.6573, "step": 1916 }, { "epoch": 0.15, "grad_norm": 1.257815959285931, "learning_rate": 1.9270090971900653e-05, "loss": 0.6114, "step": 1917 }, { "epoch": 0.15, "grad_norm": 1.321936021122329, "learning_rate": 1.926914833002098e-05, "loss": 0.6245, "step": 1918 }, { "epoch": 0.15, "grad_norm": 1.3837403024335373, "learning_rate": 1.9268205102932152e-05, "loss": 0.6319, "step": 1919 }, { "epoch": 0.15, "grad_norm": 1.297540562828054, "learning_rate": 1.9267261290693712e-05, "loss": 0.6771, "step": 1920 }, { "epoch": 0.15, "grad_norm": 1.425844708428341, "learning_rate": 1.926631689336526e-05, "loss": 0.6402, "step": 1921 }, { "epoch": 0.15, "grad_norm": 1.2936495460911377, "learning_rate": 1.926537191100641e-05, "loss": 0.6137, "step": 1922 }, { "epoch": 0.15, "grad_norm": 1.2040072587481967, "learning_rate": 1.9264426343676828e-05, "loss": 0.6284, "step": 1923 }, { "epoch": 0.15, "grad_norm": 1.1990212144223669, "learning_rate": 1.9263480191436214e-05, "loss": 0.5934, "step": 1924 }, { "epoch": 0.15, "grad_norm": 1.2670900789528925, "learning_rate": 1.9262533454344303e-05, "loss": 0.6694, "step": 1925 }, { "epoch": 0.15, "grad_norm": 1.2493116867869654, "learning_rate": 1.9261586132460866e-05, "loss": 0.5705, "step": 1926 }, { "epoch": 0.15, "grad_norm": 1.3729645228209606, "learning_rate": 1.9260638225845713e-05, "loss": 0.5774, "step": 1927 }, { "epoch": 0.15, "grad_norm": 1.2335169260311833, "learning_rate": 1.925968973455869e-05, "loss": 0.5917, "step": 1928 }, { "epoch": 0.15, "grad_norm": 1.2142230346274214, "learning_rate": 1.9258740658659683e-05, "loss": 0.6177, "step": 1929 }, { "epoch": 0.15, "grad_norm": 1.3049324856390505, "learning_rate": 1.9257790998208606e-05, "loss": 0.697, "step": 1930 }, { "epoch": 0.15, "grad_norm": 1.321748392882289, "learning_rate": 1.925684075326542e-05, "loss": 0.6988, "step": 1931 }, { "epoch": 0.15, "grad_norm": 1.2409835835307796, "learning_rate": 1.9255889923890118e-05, "loss": 0.5907, "step": 1932 }, { "epoch": 0.15, "grad_norm": 1.2417434282491842, "learning_rate": 1.9254938510142735e-05, "loss": 0.652, "step": 1933 }, { "epoch": 0.15, "grad_norm": 1.2861551038103585, "learning_rate": 1.925398651208333e-05, "loss": 0.6211, "step": 1934 }, { "epoch": 0.15, "grad_norm": 1.4243427701804547, "learning_rate": 1.925303392977201e-05, "loss": 0.6335, "step": 1935 }, { "epoch": 0.15, "grad_norm": 1.464250286683883, "learning_rate": 1.9252080763268924e-05, "loss": 0.7058, "step": 1936 }, { "epoch": 0.15, "grad_norm": 1.2647075860172492, "learning_rate": 1.9251127012634242e-05, "loss": 0.6536, "step": 1937 }, { "epoch": 0.15, "grad_norm": 1.2409793088388401, "learning_rate": 1.9250172677928184e-05, "loss": 0.5969, "step": 1938 }, { "epoch": 0.15, "grad_norm": 1.3630910046591633, "learning_rate": 1.9249217759211e-05, "loss": 0.6866, "step": 1939 }, { "epoch": 0.15, "grad_norm": 1.155029529094975, "learning_rate": 1.924826225654298e-05, "loss": 0.636, "step": 1940 }, { "epoch": 0.15, "grad_norm": 1.228628374757012, "learning_rate": 1.9247306169984446e-05, "loss": 0.5917, "step": 1941 }, { "epoch": 0.15, "grad_norm": 1.3851029989868728, "learning_rate": 1.9246349499595767e-05, "loss": 0.7361, "step": 1942 }, { "epoch": 0.15, "grad_norm": 1.1385088062808584, "learning_rate": 1.9245392245437336e-05, "loss": 0.618, "step": 1943 }, { "epoch": 0.15, "grad_norm": 1.2306579444392938, "learning_rate": 1.9244434407569596e-05, "loss": 0.5768, "step": 1944 }, { "epoch": 0.15, "grad_norm": 1.2922190541262908, "learning_rate": 1.9243475986053014e-05, "loss": 0.6444, "step": 1945 }, { "epoch": 0.15, "grad_norm": 1.1461193536916188, "learning_rate": 1.9242516980948105e-05, "loss": 0.5994, "step": 1946 }, { "epoch": 0.15, "grad_norm": 1.2921450200876514, "learning_rate": 1.9241557392315413e-05, "loss": 0.6232, "step": 1947 }, { "epoch": 0.15, "grad_norm": 1.3033486537976007, "learning_rate": 1.9240597220215524e-05, "loss": 0.5464, "step": 1948 }, { "epoch": 0.15, "grad_norm": 1.2579126258946092, "learning_rate": 1.9239636464709054e-05, "loss": 0.6244, "step": 1949 }, { "epoch": 0.15, "grad_norm": 1.269045458061357, "learning_rate": 1.9238675125856666e-05, "loss": 0.6237, "step": 1950 }, { "epoch": 0.15, "grad_norm": 1.2859785701455273, "learning_rate": 1.923771320371905e-05, "loss": 0.6317, "step": 1951 }, { "epoch": 0.15, "grad_norm": 1.1734743013465252, "learning_rate": 1.9236750698356942e-05, "loss": 0.6007, "step": 1952 }, { "epoch": 0.15, "grad_norm": 1.301467531180983, "learning_rate": 1.9235787609831105e-05, "loss": 0.7346, "step": 1953 }, { "epoch": 0.15, "grad_norm": 1.2911615563353838, "learning_rate": 1.9234823938202346e-05, "loss": 0.6401, "step": 1954 }, { "epoch": 0.15, "grad_norm": 1.3560117573833217, "learning_rate": 1.923385968353151e-05, "loss": 0.6993, "step": 1955 }, { "epoch": 0.15, "grad_norm": 1.2909150192604506, "learning_rate": 1.9232894845879465e-05, "loss": 0.5995, "step": 1956 }, { "epoch": 0.15, "grad_norm": 1.3388528969166906, "learning_rate": 1.9231929425307136e-05, "loss": 0.7416, "step": 1957 }, { "epoch": 0.15, "grad_norm": 1.231763854487558, "learning_rate": 1.9230963421875474e-05, "loss": 0.6337, "step": 1958 }, { "epoch": 0.15, "grad_norm": 1.2729127422882038, "learning_rate": 1.9229996835645463e-05, "loss": 0.691, "step": 1959 }, { "epoch": 0.15, "grad_norm": 1.305057633029852, "learning_rate": 1.9229029666678133e-05, "loss": 0.6141, "step": 1960 }, { "epoch": 0.15, "grad_norm": 1.3600797470637918, "learning_rate": 1.922806191503454e-05, "loss": 0.6625, "step": 1961 }, { "epoch": 0.15, "grad_norm": 1.3920475402420074, "learning_rate": 1.9227093580775796e-05, "loss": 0.6639, "step": 1962 }, { "epoch": 0.15, "grad_norm": 1.2714037891639316, "learning_rate": 1.9226124663963023e-05, "loss": 0.6009, "step": 1963 }, { "epoch": 0.15, "grad_norm": 1.1792031613870424, "learning_rate": 1.92251551646574e-05, "loss": 0.6305, "step": 1964 }, { "epoch": 0.15, "grad_norm": 1.2501618757337087, "learning_rate": 1.9224185082920138e-05, "loss": 0.6193, "step": 1965 }, { "epoch": 0.15, "grad_norm": 1.1921083976531102, "learning_rate": 1.922321441881248e-05, "loss": 0.5905, "step": 1966 }, { "epoch": 0.15, "grad_norm": 1.3003995409791924, "learning_rate": 1.9222243172395706e-05, "loss": 0.6846, "step": 1967 }, { "epoch": 0.15, "grad_norm": 1.3237465435699782, "learning_rate": 1.9221271343731146e-05, "loss": 0.6144, "step": 1968 }, { "epoch": 0.15, "grad_norm": 1.2862052926022414, "learning_rate": 1.922029893288015e-05, "loss": 0.5887, "step": 1969 }, { "epoch": 0.15, "grad_norm": 1.2031862813031797, "learning_rate": 1.921932593990411e-05, "loss": 0.5889, "step": 1970 }, { "epoch": 0.15, "grad_norm": 1.4197782210312542, "learning_rate": 1.9218352364864457e-05, "loss": 0.7039, "step": 1971 }, { "epoch": 0.15, "grad_norm": 1.2854706168123515, "learning_rate": 1.921737820782266e-05, "loss": 0.6811, "step": 1972 }, { "epoch": 0.15, "grad_norm": 1.214310507561309, "learning_rate": 1.921640346884022e-05, "loss": 0.5814, "step": 1973 }, { "epoch": 0.15, "grad_norm": 1.3339426416708104, "learning_rate": 1.9215428147978684e-05, "loss": 0.7675, "step": 1974 }, { "epoch": 0.15, "grad_norm": 1.2380011208580008, "learning_rate": 1.9214452245299616e-05, "loss": 0.609, "step": 1975 }, { "epoch": 0.15, "grad_norm": 1.35708987340541, "learning_rate": 1.9213475760864645e-05, "loss": 0.6503, "step": 1976 }, { "epoch": 0.15, "grad_norm": 1.1958519210877196, "learning_rate": 1.9212498694735408e-05, "loss": 0.5885, "step": 1977 }, { "epoch": 0.15, "grad_norm": 1.1480532671635908, "learning_rate": 1.92115210469736e-05, "loss": 0.6093, "step": 1978 }, { "epoch": 0.15, "grad_norm": 1.2542909406428344, "learning_rate": 1.9210542817640945e-05, "loss": 0.6319, "step": 1979 }, { "epoch": 0.15, "grad_norm": 1.3220536528360614, "learning_rate": 1.92095640067992e-05, "loss": 0.6659, "step": 1980 }, { "epoch": 0.15, "grad_norm": 1.4459513309197338, "learning_rate": 1.9208584614510163e-05, "loss": 0.6722, "step": 1981 }, { "epoch": 0.15, "grad_norm": 1.264629349141053, "learning_rate": 1.9207604640835675e-05, "loss": 0.623, "step": 1982 }, { "epoch": 0.15, "grad_norm": 1.223744989293315, "learning_rate": 1.9206624085837593e-05, "loss": 0.6286, "step": 1983 }, { "epoch": 0.15, "grad_norm": 1.2420079803302555, "learning_rate": 1.9205642949577835e-05, "loss": 0.6352, "step": 1984 }, { "epoch": 0.15, "grad_norm": 1.3823857484070743, "learning_rate": 1.9204661232118343e-05, "loss": 0.6804, "step": 1985 }, { "epoch": 0.15, "grad_norm": 1.2243581997944102, "learning_rate": 1.92036789335211e-05, "loss": 0.5816, "step": 1986 }, { "epoch": 0.15, "grad_norm": 1.3132825062851852, "learning_rate": 1.920269605384812e-05, "loss": 0.6605, "step": 1987 }, { "epoch": 0.15, "grad_norm": 1.3311862894492688, "learning_rate": 1.9201712593161458e-05, "loss": 0.641, "step": 1988 }, { "epoch": 0.15, "grad_norm": 1.3230289301459177, "learning_rate": 1.9200728551523204e-05, "loss": 0.6296, "step": 1989 }, { "epoch": 0.15, "grad_norm": 1.2198748654931182, "learning_rate": 1.919974392899549e-05, "loss": 0.5966, "step": 1990 }, { "epoch": 0.15, "grad_norm": 1.2078477388006041, "learning_rate": 1.9198758725640476e-05, "loss": 0.6012, "step": 1991 }, { "epoch": 0.15, "grad_norm": 1.3875581162112265, "learning_rate": 1.9197772941520365e-05, "loss": 0.6671, "step": 1992 }, { "epoch": 0.15, "grad_norm": 1.3004054995986967, "learning_rate": 1.9196786576697392e-05, "loss": 0.6817, "step": 1993 }, { "epoch": 0.15, "grad_norm": 1.3269488793036517, "learning_rate": 1.9195799631233835e-05, "loss": 0.6484, "step": 1994 }, { "epoch": 0.15, "grad_norm": 1.4012339246169776, "learning_rate": 1.9194812105192003e-05, "loss": 0.7139, "step": 1995 }, { "epoch": 0.15, "grad_norm": 1.1605691913422316, "learning_rate": 1.9193823998634242e-05, "loss": 0.5975, "step": 1996 }, { "epoch": 0.15, "grad_norm": 1.467215690301346, "learning_rate": 1.919283531162294e-05, "loss": 0.7013, "step": 1997 }, { "epoch": 0.16, "grad_norm": 1.324162844937865, "learning_rate": 1.9191846044220514e-05, "loss": 0.6951, "step": 1998 }, { "epoch": 0.16, "grad_norm": 1.2823524384258291, "learning_rate": 1.9190856196489424e-05, "loss": 0.6955, "step": 1999 }, { "epoch": 0.16, "grad_norm": 1.2663233208022417, "learning_rate": 1.9189865768492168e-05, "loss": 0.5796, "step": 2000 }, { "epoch": 0.16, "grad_norm": 1.196024065834504, "learning_rate": 1.918887476029127e-05, "loss": 0.5946, "step": 2001 }, { "epoch": 0.16, "grad_norm": 1.3400009997207556, "learning_rate": 1.9187883171949298e-05, "loss": 0.6037, "step": 2002 }, { "epoch": 0.16, "grad_norm": 1.33373118465062, "learning_rate": 1.9186891003528857e-05, "loss": 0.6111, "step": 2003 }, { "epoch": 0.16, "grad_norm": 1.121303684901418, "learning_rate": 1.918589825509259e-05, "loss": 0.5529, "step": 2004 }, { "epoch": 0.16, "grad_norm": 1.2073844713818416, "learning_rate": 1.9184904926703176e-05, "loss": 0.638, "step": 2005 }, { "epoch": 0.16, "grad_norm": 1.3220809739976749, "learning_rate": 1.9183911018423324e-05, "loss": 0.6488, "step": 2006 }, { "epoch": 0.16, "grad_norm": 1.3576937835383587, "learning_rate": 1.9182916530315788e-05, "loss": 0.64, "step": 2007 }, { "epoch": 0.16, "grad_norm": 1.1506137599116333, "learning_rate": 1.9181921462443354e-05, "loss": 0.6083, "step": 2008 }, { "epoch": 0.16, "grad_norm": 1.170778397236292, "learning_rate": 1.9180925814868843e-05, "loss": 0.5477, "step": 2009 }, { "epoch": 0.16, "grad_norm": 1.3602206789518763, "learning_rate": 1.917992958765512e-05, "loss": 0.6149, "step": 2010 }, { "epoch": 0.16, "grad_norm": 1.1666669959113247, "learning_rate": 1.917893278086508e-05, "loss": 0.5715, "step": 2011 }, { "epoch": 0.16, "grad_norm": 1.4121472787584541, "learning_rate": 1.9177935394561652e-05, "loss": 0.6676, "step": 2012 }, { "epoch": 0.16, "grad_norm": 1.2042105100277287, "learning_rate": 1.9176937428807818e-05, "loss": 0.5866, "step": 2013 }, { "epoch": 0.16, "grad_norm": 1.2997939570075012, "learning_rate": 1.9175938883666574e-05, "loss": 0.634, "step": 2014 }, { "epoch": 0.16, "grad_norm": 1.219849457364017, "learning_rate": 1.9174939759200965e-05, "loss": 0.6235, "step": 2015 }, { "epoch": 0.16, "grad_norm": 1.2043239020062682, "learning_rate": 1.9173940055474074e-05, "loss": 0.6389, "step": 2016 }, { "epoch": 0.16, "grad_norm": 1.3267832991276054, "learning_rate": 1.9172939772549014e-05, "loss": 0.634, "step": 2017 }, { "epoch": 0.16, "grad_norm": 1.3324048365809513, "learning_rate": 1.9171938910488945e-05, "loss": 0.6531, "step": 2018 }, { "epoch": 0.16, "grad_norm": 1.358209746341622, "learning_rate": 1.917093746935705e-05, "loss": 0.6804, "step": 2019 }, { "epoch": 0.16, "grad_norm": 1.3244731287406715, "learning_rate": 1.916993544921655e-05, "loss": 0.6695, "step": 2020 }, { "epoch": 0.16, "grad_norm": 1.4140043299391518, "learning_rate": 1.9168932850130723e-05, "loss": 0.6657, "step": 2021 }, { "epoch": 0.16, "grad_norm": 1.2142736931213822, "learning_rate": 1.9167929672162856e-05, "loss": 0.604, "step": 2022 }, { "epoch": 0.16, "grad_norm": 1.2253866208381643, "learning_rate": 1.9166925915376288e-05, "loss": 0.6437, "step": 2023 }, { "epoch": 0.16, "grad_norm": 1.3530651405256198, "learning_rate": 1.9165921579834395e-05, "loss": 0.6696, "step": 2024 }, { "epoch": 0.16, "grad_norm": 1.2925642596711908, "learning_rate": 1.916491666560058e-05, "loss": 0.6431, "step": 2025 }, { "epoch": 0.16, "grad_norm": 1.5131163925310542, "learning_rate": 1.916391117273829e-05, "loss": 0.6546, "step": 2026 }, { "epoch": 0.16, "grad_norm": 1.270793674454692, "learning_rate": 1.916290510131101e-05, "loss": 0.6366, "step": 2027 }, { "epoch": 0.16, "grad_norm": 1.2982945314141887, "learning_rate": 1.9161898451382257e-05, "loss": 0.6531, "step": 2028 }, { "epoch": 0.16, "grad_norm": 1.3241880970335667, "learning_rate": 1.9160891223015586e-05, "loss": 0.6709, "step": 2029 }, { "epoch": 0.16, "grad_norm": 1.2966803439104615, "learning_rate": 1.9159883416274585e-05, "loss": 0.6174, "step": 2030 }, { "epoch": 0.16, "grad_norm": 1.174956326991961, "learning_rate": 1.915887503122289e-05, "loss": 0.604, "step": 2031 }, { "epoch": 0.16, "grad_norm": 1.3852026590059048, "learning_rate": 1.9157866067924157e-05, "loss": 0.6698, "step": 2032 }, { "epoch": 0.16, "grad_norm": 1.341433768859375, "learning_rate": 1.9156856526442092e-05, "loss": 0.6054, "step": 2033 }, { "epoch": 0.16, "grad_norm": 1.3624354583432348, "learning_rate": 1.915584640684043e-05, "loss": 0.6264, "step": 2034 }, { "epoch": 0.16, "grad_norm": 1.2086475007742867, "learning_rate": 1.9154835709182947e-05, "loss": 0.6339, "step": 2035 }, { "epoch": 0.16, "grad_norm": 1.3800078576320352, "learning_rate": 1.9153824433533453e-05, "loss": 0.622, "step": 2036 }, { "epoch": 0.16, "grad_norm": 1.2407871246102744, "learning_rate": 1.9152812579955795e-05, "loss": 0.6017, "step": 2037 }, { "epoch": 0.16, "grad_norm": 1.3450462831099477, "learning_rate": 1.915180014851386e-05, "loss": 0.6193, "step": 2038 }, { "epoch": 0.16, "grad_norm": 1.3559467890857104, "learning_rate": 1.915078713927156e-05, "loss": 0.6751, "step": 2039 }, { "epoch": 0.16, "grad_norm": 1.2963046922613652, "learning_rate": 1.9149773552292855e-05, "loss": 0.631, "step": 2040 }, { "epoch": 0.16, "grad_norm": 1.2528847309470341, "learning_rate": 1.9148759387641745e-05, "loss": 0.554, "step": 2041 }, { "epoch": 0.16, "grad_norm": 1.1992937493247826, "learning_rate": 1.914774464538225e-05, "loss": 0.5803, "step": 2042 }, { "epoch": 0.16, "grad_norm": 1.3610252360849864, "learning_rate": 1.914672932557844e-05, "loss": 0.6453, "step": 2043 }, { "epoch": 0.16, "grad_norm": 1.4820835295340933, "learning_rate": 1.9145713428294415e-05, "loss": 0.7304, "step": 2044 }, { "epoch": 0.16, "grad_norm": 1.37786458467916, "learning_rate": 1.9144696953594316e-05, "loss": 0.7114, "step": 2045 }, { "epoch": 0.16, "grad_norm": 1.207204565549222, "learning_rate": 1.9143679901542316e-05, "loss": 0.6328, "step": 2046 }, { "epoch": 0.16, "grad_norm": 1.4232089314528413, "learning_rate": 1.914266227220263e-05, "loss": 0.6826, "step": 2047 }, { "epoch": 0.16, "grad_norm": 1.3197123952581942, "learning_rate": 1.9141644065639507e-05, "loss": 0.655, "step": 2048 }, { "epoch": 0.16, "grad_norm": 1.2843356309783234, "learning_rate": 1.914062528191723e-05, "loss": 0.6741, "step": 2049 }, { "epoch": 0.16, "grad_norm": 1.209933646250681, "learning_rate": 1.9139605921100116e-05, "loss": 0.6104, "step": 2050 }, { "epoch": 0.16, "grad_norm": 1.2265959814082021, "learning_rate": 1.9138585983252527e-05, "loss": 0.6151, "step": 2051 }, { "epoch": 0.16, "grad_norm": 1.243258128836326, "learning_rate": 1.913756546843886e-05, "loss": 0.5764, "step": 2052 }, { "epoch": 0.16, "grad_norm": 1.288175104046891, "learning_rate": 1.9136544376723537e-05, "loss": 0.6096, "step": 2053 }, { "epoch": 0.16, "grad_norm": 1.2952533832675945, "learning_rate": 1.9135522708171034e-05, "loss": 0.6438, "step": 2054 }, { "epoch": 0.16, "grad_norm": 1.2134435010943878, "learning_rate": 1.9134500462845844e-05, "loss": 0.5902, "step": 2055 }, { "epoch": 0.16, "grad_norm": 1.2717059948707234, "learning_rate": 1.9133477640812513e-05, "loss": 0.5554, "step": 2056 }, { "epoch": 0.16, "grad_norm": 1.3004287379538864, "learning_rate": 1.9132454242135618e-05, "loss": 0.6017, "step": 2057 }, { "epoch": 0.16, "grad_norm": 1.2478443154497603, "learning_rate": 1.9131430266879766e-05, "loss": 0.5813, "step": 2058 }, { "epoch": 0.16, "grad_norm": 1.451664180089235, "learning_rate": 1.913040571510961e-05, "loss": 0.6311, "step": 2059 }, { "epoch": 0.16, "grad_norm": 1.2436437647702974, "learning_rate": 1.9129380586889836e-05, "loss": 0.585, "step": 2060 }, { "epoch": 0.16, "grad_norm": 1.231593365886224, "learning_rate": 1.9128354882285166e-05, "loss": 0.6128, "step": 2061 }, { "epoch": 0.16, "grad_norm": 1.1896508965398727, "learning_rate": 1.9127328601360354e-05, "loss": 0.5924, "step": 2062 }, { "epoch": 0.16, "grad_norm": 1.3441368255960633, "learning_rate": 1.91263017441802e-05, "loss": 0.67, "step": 2063 }, { "epoch": 0.16, "grad_norm": 1.273881799619276, "learning_rate": 1.9125274310809524e-05, "loss": 0.6737, "step": 2064 }, { "epoch": 0.16, "grad_norm": 1.2376626572493064, "learning_rate": 1.9124246301313206e-05, "loss": 0.584, "step": 2065 }, { "epoch": 0.16, "grad_norm": 1.366355555680691, "learning_rate": 1.9123217715756142e-05, "loss": 0.6793, "step": 2066 }, { "epoch": 0.16, "grad_norm": 1.3335146879997093, "learning_rate": 1.9122188554203275e-05, "loss": 0.6198, "step": 2067 }, { "epoch": 0.16, "grad_norm": 1.3164377180464333, "learning_rate": 1.9121158816719577e-05, "loss": 0.6492, "step": 2068 }, { "epoch": 0.16, "grad_norm": 1.3035112663838624, "learning_rate": 1.912012850337007e-05, "loss": 0.5759, "step": 2069 }, { "epoch": 0.16, "grad_norm": 1.4558794720825778, "learning_rate": 1.9119097614219795e-05, "loss": 0.6725, "step": 2070 }, { "epoch": 0.16, "grad_norm": 1.202374707394164, "learning_rate": 1.911806614933384e-05, "loss": 0.6078, "step": 2071 }, { "epoch": 0.16, "grad_norm": 1.217059111771495, "learning_rate": 1.9117034108777323e-05, "loss": 0.6125, "step": 2072 }, { "epoch": 0.16, "grad_norm": 1.2498669553525101, "learning_rate": 1.9116001492615403e-05, "loss": 0.6607, "step": 2073 }, { "epoch": 0.16, "grad_norm": 1.425125932984774, "learning_rate": 1.911496830091328e-05, "loss": 0.7054, "step": 2074 }, { "epoch": 0.16, "grad_norm": 1.273080834365659, "learning_rate": 1.911393453373618e-05, "loss": 0.6738, "step": 2075 }, { "epoch": 0.16, "grad_norm": 1.3463576985158532, "learning_rate": 1.9112900191149374e-05, "loss": 0.6432, "step": 2076 }, { "epoch": 0.16, "grad_norm": 1.3587925419468845, "learning_rate": 1.911186527321816e-05, "loss": 0.6974, "step": 2077 }, { "epoch": 0.16, "grad_norm": 1.3472790618695383, "learning_rate": 1.911082978000788e-05, "loss": 0.5873, "step": 2078 }, { "epoch": 0.16, "grad_norm": 1.2716541089095745, "learning_rate": 1.910979371158391e-05, "loss": 0.6436, "step": 2079 }, { "epoch": 0.16, "grad_norm": 1.3235672780148966, "learning_rate": 1.910875706801166e-05, "loss": 0.601, "step": 2080 }, { "epoch": 0.16, "grad_norm": 1.3177046895133842, "learning_rate": 1.9107719849356588e-05, "loss": 0.6769, "step": 2081 }, { "epoch": 0.16, "grad_norm": 1.272032635641989, "learning_rate": 1.9106682055684168e-05, "loss": 0.6449, "step": 2082 }, { "epoch": 0.16, "grad_norm": 1.250553247089158, "learning_rate": 1.9105643687059926e-05, "loss": 0.6294, "step": 2083 }, { "epoch": 0.16, "grad_norm": 1.1390032801258525, "learning_rate": 1.9104604743549422e-05, "loss": 0.5746, "step": 2084 }, { "epoch": 0.16, "grad_norm": 1.1694509739868435, "learning_rate": 1.9103565225218243e-05, "loss": 0.6365, "step": 2085 }, { "epoch": 0.16, "grad_norm": 1.2251363834054254, "learning_rate": 1.9102525132132028e-05, "loss": 0.6455, "step": 2086 }, { "epoch": 0.16, "grad_norm": 1.3180199758436981, "learning_rate": 1.910148446435643e-05, "loss": 0.6392, "step": 2087 }, { "epoch": 0.16, "grad_norm": 1.3194633778111564, "learning_rate": 1.910044322195717e-05, "loss": 0.562, "step": 2088 }, { "epoch": 0.16, "grad_norm": 1.288310299667858, "learning_rate": 1.9099401404999972e-05, "loss": 0.6323, "step": 2089 }, { "epoch": 0.16, "grad_norm": 1.3034362732235119, "learning_rate": 1.9098359013550617e-05, "loss": 0.6591, "step": 2090 }, { "epoch": 0.16, "grad_norm": 1.2375088758824635, "learning_rate": 1.9097316047674915e-05, "loss": 0.6036, "step": 2091 }, { "epoch": 0.16, "grad_norm": 1.2155822275673247, "learning_rate": 1.9096272507438715e-05, "loss": 0.6065, "step": 2092 }, { "epoch": 0.16, "grad_norm": 1.3515791699037465, "learning_rate": 1.9095228392907904e-05, "loss": 0.6858, "step": 2093 }, { "epoch": 0.16, "grad_norm": 1.2961803552206335, "learning_rate": 1.90941837041484e-05, "loss": 0.669, "step": 2094 }, { "epoch": 0.16, "grad_norm": 1.2780059591133508, "learning_rate": 1.9093138441226156e-05, "loss": 0.5898, "step": 2095 }, { "epoch": 0.16, "grad_norm": 1.8588883861136467, "learning_rate": 1.9092092604207166e-05, "loss": 0.7157, "step": 2096 }, { "epoch": 0.16, "grad_norm": 1.175396675761132, "learning_rate": 1.9091046193157464e-05, "loss": 0.6107, "step": 2097 }, { "epoch": 0.16, "grad_norm": 1.3872467325364235, "learning_rate": 1.9089999208143113e-05, "loss": 0.6484, "step": 2098 }, { "epoch": 0.16, "grad_norm": 1.3398876127397998, "learning_rate": 1.908895164923021e-05, "loss": 0.6737, "step": 2099 }, { "epoch": 0.16, "grad_norm": 1.3554141338679262, "learning_rate": 1.9087903516484898e-05, "loss": 0.6957, "step": 2100 }, { "epoch": 0.16, "grad_norm": 1.2921149901188436, "learning_rate": 1.908685480997335e-05, "loss": 0.6873, "step": 2101 }, { "epoch": 0.16, "grad_norm": 1.6798620088990228, "learning_rate": 1.9085805529761778e-05, "loss": 0.6263, "step": 2102 }, { "epoch": 0.16, "grad_norm": 1.1164184804557442, "learning_rate": 1.9084755675916423e-05, "loss": 0.5341, "step": 2103 }, { "epoch": 0.16, "grad_norm": 1.306438937965065, "learning_rate": 1.9083705248503575e-05, "loss": 0.6658, "step": 2104 }, { "epoch": 0.16, "grad_norm": 1.1967670840224751, "learning_rate": 1.9082654247589543e-05, "loss": 0.6296, "step": 2105 }, { "epoch": 0.16, "grad_norm": 1.3573343591814218, "learning_rate": 1.9081602673240695e-05, "loss": 0.6393, "step": 2106 }, { "epoch": 0.16, "grad_norm": 1.2865097663677707, "learning_rate": 1.9080550525523413e-05, "loss": 0.6403, "step": 2107 }, { "epoch": 0.16, "grad_norm": 1.1711245868025382, "learning_rate": 1.9079497804504122e-05, "loss": 0.5849, "step": 2108 }, { "epoch": 0.16, "grad_norm": 1.3722672616377143, "learning_rate": 1.9078444510249294e-05, "loss": 0.6448, "step": 2109 }, { "epoch": 0.16, "grad_norm": 1.3890895725010728, "learning_rate": 1.9077390642825427e-05, "loss": 0.692, "step": 2110 }, { "epoch": 0.16, "grad_norm": 1.2972426295575574, "learning_rate": 1.9076336202299055e-05, "loss": 0.6433, "step": 2111 }, { "epoch": 0.16, "grad_norm": 1.245771073324085, "learning_rate": 1.907528118873675e-05, "loss": 0.6449, "step": 2112 }, { "epoch": 0.16, "grad_norm": 1.2418793589991117, "learning_rate": 1.907422560220512e-05, "loss": 0.6897, "step": 2113 }, { "epoch": 0.16, "grad_norm": 1.222195767106704, "learning_rate": 1.9073169442770814e-05, "loss": 0.6081, "step": 2114 }, { "epoch": 0.16, "grad_norm": 1.2152395309000437, "learning_rate": 1.907211271050051e-05, "loss": 0.6626, "step": 2115 }, { "epoch": 0.16, "grad_norm": 1.26159558713706, "learning_rate": 1.907105540546092e-05, "loss": 0.69, "step": 2116 }, { "epoch": 0.16, "grad_norm": 1.2454860245675936, "learning_rate": 1.9069997527718803e-05, "loss": 0.6623, "step": 2117 }, { "epoch": 0.16, "grad_norm": 1.206306956236214, "learning_rate": 1.906893907734095e-05, "loss": 0.6518, "step": 2118 }, { "epoch": 0.16, "grad_norm": 1.2104302605255737, "learning_rate": 1.9067880054394182e-05, "loss": 0.6932, "step": 2119 }, { "epoch": 0.16, "grad_norm": 1.1725419753151858, "learning_rate": 1.906682045894536e-05, "loss": 0.6345, "step": 2120 }, { "epoch": 0.16, "grad_norm": 1.3535865666017106, "learning_rate": 1.9065760291061385e-05, "loss": 0.6849, "step": 2121 }, { "epoch": 0.16, "grad_norm": 1.2504353718734038, "learning_rate": 1.9064699550809193e-05, "loss": 0.6639, "step": 2122 }, { "epoch": 0.16, "grad_norm": 1.2486380309806338, "learning_rate": 1.9063638238255747e-05, "loss": 0.6013, "step": 2123 }, { "epoch": 0.16, "grad_norm": 1.2627985440780807, "learning_rate": 1.9062576353468055e-05, "loss": 0.6623, "step": 2124 }, { "epoch": 0.16, "grad_norm": 1.2502061197093575, "learning_rate": 1.9061513896513166e-05, "loss": 0.6208, "step": 2125 }, { "epoch": 0.16, "grad_norm": 1.5639339733382052, "learning_rate": 1.9060450867458152e-05, "loss": 0.7525, "step": 2126 }, { "epoch": 0.17, "grad_norm": 1.2891186326260917, "learning_rate": 1.9059387266370127e-05, "loss": 0.6546, "step": 2127 }, { "epoch": 0.17, "grad_norm": 1.555948484082821, "learning_rate": 1.9058323093316247e-05, "loss": 0.6851, "step": 2128 }, { "epoch": 0.17, "grad_norm": 1.2131105675052543, "learning_rate": 1.905725834836369e-05, "loss": 0.6762, "step": 2129 }, { "epoch": 0.17, "grad_norm": 1.3119491147663953, "learning_rate": 1.905619303157969e-05, "loss": 0.6108, "step": 2130 }, { "epoch": 0.17, "grad_norm": 1.3226456652430132, "learning_rate": 1.9055127143031495e-05, "loss": 0.6548, "step": 2131 }, { "epoch": 0.17, "grad_norm": 1.3480355876051007, "learning_rate": 1.905406068278641e-05, "loss": 0.7146, "step": 2132 }, { "epoch": 0.17, "grad_norm": 1.3381445427954821, "learning_rate": 1.9052993650911758e-05, "loss": 0.6786, "step": 2133 }, { "epoch": 0.17, "grad_norm": 1.2937735274382405, "learning_rate": 1.905192604747491e-05, "loss": 0.6446, "step": 2134 }, { "epoch": 0.17, "grad_norm": 1.2406057207353547, "learning_rate": 1.905085787254327e-05, "loss": 0.6253, "step": 2135 }, { "epoch": 0.17, "grad_norm": 1.2631125295590657, "learning_rate": 1.9049789126184275e-05, "loss": 0.6205, "step": 2136 }, { "epoch": 0.17, "grad_norm": 1.1535828216543516, "learning_rate": 1.90487198084654e-05, "loss": 0.6474, "step": 2137 }, { "epoch": 0.17, "grad_norm": 1.3129414315623575, "learning_rate": 1.904764991945416e-05, "loss": 0.6226, "step": 2138 }, { "epoch": 0.17, "grad_norm": 1.1583938827515148, "learning_rate": 1.9046579459218103e-05, "loss": 0.6406, "step": 2139 }, { "epoch": 0.17, "grad_norm": 1.2867256483994411, "learning_rate": 1.904550842782481e-05, "loss": 0.6571, "step": 2140 }, { "epoch": 0.17, "grad_norm": 1.2765504199125304, "learning_rate": 1.90444368253419e-05, "loss": 0.64, "step": 2141 }, { "epoch": 0.17, "grad_norm": 1.2056213836061969, "learning_rate": 1.9043364651837026e-05, "loss": 0.6632, "step": 2142 }, { "epoch": 0.17, "grad_norm": 1.2662644360426416, "learning_rate": 1.9042291907377886e-05, "loss": 0.674, "step": 2143 }, { "epoch": 0.17, "grad_norm": 1.2909196826644382, "learning_rate": 1.904121859203221e-05, "loss": 0.6332, "step": 2144 }, { "epoch": 0.17, "grad_norm": 1.1797502008410858, "learning_rate": 1.9040144705867755e-05, "loss": 0.5879, "step": 2145 }, { "epoch": 0.17, "grad_norm": 1.0404876457958412, "learning_rate": 1.9039070248952324e-05, "loss": 0.6045, "step": 2146 }, { "epoch": 0.17, "grad_norm": 1.2474719232063831, "learning_rate": 1.9037995221353754e-05, "loss": 0.6385, "step": 2147 }, { "epoch": 0.17, "grad_norm": 1.285709853202506, "learning_rate": 1.9036919623139916e-05, "loss": 0.611, "step": 2148 }, { "epoch": 0.17, "grad_norm": 1.22290587390684, "learning_rate": 1.9035843454378715e-05, "loss": 0.5861, "step": 2149 }, { "epoch": 0.17, "grad_norm": 1.3315507128287138, "learning_rate": 1.90347667151381e-05, "loss": 0.5779, "step": 2150 }, { "epoch": 0.17, "grad_norm": 1.3597209534050894, "learning_rate": 1.903368940548605e-05, "loss": 0.6834, "step": 2151 }, { "epoch": 0.17, "grad_norm": 1.1954379047108512, "learning_rate": 1.9032611525490575e-05, "loss": 0.6113, "step": 2152 }, { "epoch": 0.17, "grad_norm": 1.270987371062318, "learning_rate": 1.9031533075219737e-05, "loss": 0.6587, "step": 2153 }, { "epoch": 0.17, "grad_norm": 1.2807978320656799, "learning_rate": 1.903045405474162e-05, "loss": 0.582, "step": 2154 }, { "epoch": 0.17, "grad_norm": 1.2917413792485852, "learning_rate": 1.9029374464124344e-05, "loss": 0.651, "step": 2155 }, { "epoch": 0.17, "grad_norm": 1.3435415061839777, "learning_rate": 1.902829430343607e-05, "loss": 0.6278, "step": 2156 }, { "epoch": 0.17, "grad_norm": 1.1928243733335564, "learning_rate": 1.9027213572745006e-05, "loss": 0.59, "step": 2157 }, { "epoch": 0.17, "grad_norm": 1.393836738042806, "learning_rate": 1.9026132272119368e-05, "loss": 0.6737, "step": 2158 }, { "epoch": 0.17, "grad_norm": 1.3304006938809796, "learning_rate": 1.9025050401627433e-05, "loss": 0.6413, "step": 2159 }, { "epoch": 0.17, "grad_norm": 1.3030657711336548, "learning_rate": 1.90239679613375e-05, "loss": 0.6559, "step": 2160 }, { "epoch": 0.17, "grad_norm": 1.2609678697296773, "learning_rate": 1.9022884951317916e-05, "loss": 0.5928, "step": 2161 }, { "epoch": 0.17, "grad_norm": 1.2555601436864992, "learning_rate": 1.9021801371637055e-05, "loss": 0.6179, "step": 2162 }, { "epoch": 0.17, "grad_norm": 1.2895793918843141, "learning_rate": 1.9020717222363322e-05, "loss": 0.6373, "step": 2163 }, { "epoch": 0.17, "grad_norm": 1.2674462671571585, "learning_rate": 1.9019632503565174e-05, "loss": 0.583, "step": 2164 }, { "epoch": 0.17, "grad_norm": 1.25094044593771, "learning_rate": 1.9018547215311086e-05, "loss": 0.6169, "step": 2165 }, { "epoch": 0.17, "grad_norm": 1.1661626317374814, "learning_rate": 1.9017461357669588e-05, "loss": 0.6119, "step": 2166 }, { "epoch": 0.17, "grad_norm": 1.3096499607292253, "learning_rate": 1.9016374930709223e-05, "loss": 0.6436, "step": 2167 }, { "epoch": 0.17, "grad_norm": 1.2719579889839332, "learning_rate": 1.9015287934498598e-05, "loss": 0.6431, "step": 2168 }, { "epoch": 0.17, "grad_norm": 1.3271048274113608, "learning_rate": 1.901420036910633e-05, "loss": 0.6619, "step": 2169 }, { "epoch": 0.17, "grad_norm": 1.2545936102790916, "learning_rate": 1.9013112234601084e-05, "loss": 0.6566, "step": 2170 }, { "epoch": 0.17, "grad_norm": 1.3744451964086848, "learning_rate": 1.9012023531051565e-05, "loss": 0.6734, "step": 2171 }, { "epoch": 0.17, "grad_norm": 1.2590793366678148, "learning_rate": 1.90109342585265e-05, "loss": 0.6081, "step": 2172 }, { "epoch": 0.17, "grad_norm": 1.2601475809346592, "learning_rate": 1.900984441709467e-05, "loss": 0.6688, "step": 2173 }, { "epoch": 0.17, "grad_norm": 1.1182569173713621, "learning_rate": 1.9008754006824874e-05, "loss": 0.5623, "step": 2174 }, { "epoch": 0.17, "grad_norm": 1.3822473351681779, "learning_rate": 1.900766302778596e-05, "loss": 0.6451, "step": 2175 }, { "epoch": 0.17, "grad_norm": 1.2689384132633361, "learning_rate": 1.9006571480046805e-05, "loss": 0.61, "step": 2176 }, { "epoch": 0.17, "grad_norm": 1.3104545003567227, "learning_rate": 1.9005479363676322e-05, "loss": 0.6262, "step": 2177 }, { "epoch": 0.17, "grad_norm": 1.3082689024414194, "learning_rate": 1.9004386678743468e-05, "loss": 0.6298, "step": 2178 }, { "epoch": 0.17, "grad_norm": 1.3352777737155743, "learning_rate": 1.9003293425317224e-05, "loss": 0.6366, "step": 2179 }, { "epoch": 0.17, "grad_norm": 1.3554340544995567, "learning_rate": 1.9002199603466617e-05, "loss": 0.6191, "step": 2180 }, { "epoch": 0.17, "grad_norm": 1.2130912577964308, "learning_rate": 1.90011052132607e-05, "loss": 0.6114, "step": 2181 }, { "epoch": 0.17, "grad_norm": 1.311628006596029, "learning_rate": 1.9000010254768576e-05, "loss": 0.6532, "step": 2182 }, { "epoch": 0.17, "grad_norm": 1.1812256644027996, "learning_rate": 1.899891472805937e-05, "loss": 0.5825, "step": 2183 }, { "epoch": 0.17, "grad_norm": 1.2512998498214598, "learning_rate": 1.899781863320225e-05, "loss": 0.6545, "step": 2184 }, { "epoch": 0.17, "grad_norm": 1.2153521881332408, "learning_rate": 1.8996721970266417e-05, "loss": 0.6306, "step": 2185 }, { "epoch": 0.17, "grad_norm": 1.2840863444123651, "learning_rate": 1.8995624739321104e-05, "loss": 0.6233, "step": 2186 }, { "epoch": 0.17, "grad_norm": 1.2849655241738704, "learning_rate": 1.8994526940435593e-05, "loss": 0.6324, "step": 2187 }, { "epoch": 0.17, "grad_norm": 1.2846121984343022, "learning_rate": 1.8993428573679193e-05, "loss": 0.6277, "step": 2188 }, { "epoch": 0.17, "grad_norm": 1.3546217764956616, "learning_rate": 1.8992329639121243e-05, "loss": 0.5932, "step": 2189 }, { "epoch": 0.17, "grad_norm": 1.221647974177459, "learning_rate": 1.8991230136831133e-05, "loss": 0.6211, "step": 2190 }, { "epoch": 0.17, "grad_norm": 1.2044068480845211, "learning_rate": 1.899013006687827e-05, "loss": 0.6546, "step": 2191 }, { "epoch": 0.17, "grad_norm": 1.2214997888659351, "learning_rate": 1.8989029429332117e-05, "loss": 0.6488, "step": 2192 }, { "epoch": 0.17, "grad_norm": 1.217256805053041, "learning_rate": 1.898792822426216e-05, "loss": 0.6382, "step": 2193 }, { "epoch": 0.17, "grad_norm": 1.3706500430413289, "learning_rate": 1.8986826451737918e-05, "loss": 0.6574, "step": 2194 }, { "epoch": 0.17, "grad_norm": 1.3003808399037404, "learning_rate": 1.898572411182896e-05, "loss": 0.7057, "step": 2195 }, { "epoch": 0.17, "grad_norm": 1.3763865069357932, "learning_rate": 1.898462120460488e-05, "loss": 0.6532, "step": 2196 }, { "epoch": 0.17, "grad_norm": 1.3425234586204093, "learning_rate": 1.8983517730135304e-05, "loss": 0.5741, "step": 2197 }, { "epoch": 0.17, "grad_norm": 1.2425613320639064, "learning_rate": 1.8982413688489906e-05, "loss": 0.6441, "step": 2198 }, { "epoch": 0.17, "grad_norm": 1.2721903492036613, "learning_rate": 1.898130907973839e-05, "loss": 0.6709, "step": 2199 }, { "epoch": 0.17, "grad_norm": 1.2513695843207595, "learning_rate": 1.8980203903950495e-05, "loss": 0.6058, "step": 2200 }, { "epoch": 0.17, "grad_norm": 1.3495815953267751, "learning_rate": 1.8979098161195995e-05, "loss": 0.7001, "step": 2201 }, { "epoch": 0.17, "grad_norm": 1.237918981273691, "learning_rate": 1.89779918515447e-05, "loss": 0.5909, "step": 2202 }, { "epoch": 0.17, "grad_norm": 1.236287241375756, "learning_rate": 1.8976884975066464e-05, "loss": 0.5965, "step": 2203 }, { "epoch": 0.17, "grad_norm": 1.272863996116338, "learning_rate": 1.897577753183116e-05, "loss": 0.6082, "step": 2204 }, { "epoch": 0.17, "grad_norm": 1.2897863494282011, "learning_rate": 1.8974669521908714e-05, "loss": 0.6891, "step": 2205 }, { "epoch": 0.17, "grad_norm": 1.1921505963574597, "learning_rate": 1.8973560945369076e-05, "loss": 0.6242, "step": 2206 }, { "epoch": 0.17, "grad_norm": 1.1876158908711067, "learning_rate": 1.897245180228224e-05, "loss": 0.6502, "step": 2207 }, { "epoch": 0.17, "grad_norm": 1.2537498495419412, "learning_rate": 1.897134209271823e-05, "loss": 0.6333, "step": 2208 }, { "epoch": 0.17, "grad_norm": 1.1121322859938865, "learning_rate": 1.8970231816747103e-05, "loss": 0.5998, "step": 2209 }, { "epoch": 0.17, "grad_norm": 1.2086086398035403, "learning_rate": 1.8969120974438967e-05, "loss": 0.5792, "step": 2210 }, { "epoch": 0.17, "grad_norm": 1.405265208969785, "learning_rate": 1.896800956586395e-05, "loss": 0.7066, "step": 2211 }, { "epoch": 0.17, "grad_norm": 1.2142595069926199, "learning_rate": 1.8966897591092213e-05, "loss": 0.6043, "step": 2212 }, { "epoch": 0.17, "grad_norm": 1.3413202808094546, "learning_rate": 1.8965785050193976e-05, "loss": 0.6512, "step": 2213 }, { "epoch": 0.17, "grad_norm": 1.207949144300272, "learning_rate": 1.8964671943239467e-05, "loss": 0.6276, "step": 2214 }, { "epoch": 0.17, "grad_norm": 1.1663273533623015, "learning_rate": 1.8963558270298965e-05, "loss": 0.6386, "step": 2215 }, { "epoch": 0.17, "grad_norm": 1.1964878960239114, "learning_rate": 1.8962444031442788e-05, "loss": 0.6284, "step": 2216 }, { "epoch": 0.17, "grad_norm": 1.3188564691457447, "learning_rate": 1.8961329226741277e-05, "loss": 0.6064, "step": 2217 }, { "epoch": 0.17, "grad_norm": 1.2804359082705383, "learning_rate": 1.8960213856264818e-05, "loss": 0.6543, "step": 2218 }, { "epoch": 0.17, "grad_norm": 1.2107124365572628, "learning_rate": 1.8959097920083828e-05, "loss": 0.5967, "step": 2219 }, { "epoch": 0.17, "grad_norm": 1.4385573603358472, "learning_rate": 1.8957981418268764e-05, "loss": 0.6438, "step": 2220 }, { "epoch": 0.17, "grad_norm": 1.1374573814875977, "learning_rate": 1.8956864350890117e-05, "loss": 0.5845, "step": 2221 }, { "epoch": 0.17, "grad_norm": 1.428789617701718, "learning_rate": 1.8955746718018413e-05, "loss": 0.657, "step": 2222 }, { "epoch": 0.17, "grad_norm": 1.2356568453075751, "learning_rate": 1.895462851972421e-05, "loss": 0.6111, "step": 2223 }, { "epoch": 0.17, "grad_norm": 1.1890877599451777, "learning_rate": 1.895350975607811e-05, "loss": 0.5742, "step": 2224 }, { "epoch": 0.17, "grad_norm": 1.2546147041533051, "learning_rate": 1.8952390427150747e-05, "loss": 0.6602, "step": 2225 }, { "epoch": 0.17, "grad_norm": 1.269079227670734, "learning_rate": 1.8951270533012786e-05, "loss": 0.6248, "step": 2226 }, { "epoch": 0.17, "grad_norm": 1.3069233723850515, "learning_rate": 1.8950150073734937e-05, "loss": 0.6711, "step": 2227 }, { "epoch": 0.17, "grad_norm": 1.3370850302563713, "learning_rate": 1.8949029049387933e-05, "loss": 0.6741, "step": 2228 }, { "epoch": 0.17, "grad_norm": 1.3543312510706706, "learning_rate": 1.8947907460042558e-05, "loss": 0.5988, "step": 2229 }, { "epoch": 0.17, "grad_norm": 1.3573715529959696, "learning_rate": 1.8946785305769616e-05, "loss": 0.6502, "step": 2230 }, { "epoch": 0.17, "grad_norm": 1.2871090045285887, "learning_rate": 1.894566258663996e-05, "loss": 0.6669, "step": 2231 }, { "epoch": 0.17, "grad_norm": 1.20930375670513, "learning_rate": 1.894453930272447e-05, "loss": 0.5849, "step": 2232 }, { "epoch": 0.17, "grad_norm": 1.2904696558089934, "learning_rate": 1.8943415454094068e-05, "loss": 0.5952, "step": 2233 }, { "epoch": 0.17, "grad_norm": 1.3465359218946358, "learning_rate": 1.894229104081971e-05, "loss": 0.6687, "step": 2234 }, { "epoch": 0.17, "grad_norm": 1.3892760431259157, "learning_rate": 1.8941166062972374e-05, "loss": 0.6687, "step": 2235 }, { "epoch": 0.17, "grad_norm": 1.172477719760186, "learning_rate": 1.89400405206231e-05, "loss": 0.5805, "step": 2236 }, { "epoch": 0.17, "grad_norm": 1.329783089022568, "learning_rate": 1.893891441384294e-05, "loss": 0.6709, "step": 2237 }, { "epoch": 0.17, "grad_norm": 1.1965713354797212, "learning_rate": 1.8937787742703e-05, "loss": 0.5919, "step": 2238 }, { "epoch": 0.17, "grad_norm": 1.1834011109959846, "learning_rate": 1.8936660507274403e-05, "loss": 0.6151, "step": 2239 }, { "epoch": 0.17, "grad_norm": 1.2269391010191004, "learning_rate": 1.8935532707628322e-05, "loss": 0.6235, "step": 2240 }, { "epoch": 0.17, "grad_norm": 1.398545969587041, "learning_rate": 1.8934404343835956e-05, "loss": 0.7054, "step": 2241 }, { "epoch": 0.17, "grad_norm": 1.2660875299426408, "learning_rate": 1.893327541596855e-05, "loss": 0.675, "step": 2242 }, { "epoch": 0.17, "grad_norm": 1.350481736803621, "learning_rate": 1.893214592409738e-05, "loss": 0.6923, "step": 2243 }, { "epoch": 0.17, "grad_norm": 1.257509230547599, "learning_rate": 1.8931015868293755e-05, "loss": 0.6031, "step": 2244 }, { "epoch": 0.17, "grad_norm": 1.2712820803365117, "learning_rate": 1.8929885248629017e-05, "loss": 0.6541, "step": 2245 }, { "epoch": 0.17, "grad_norm": 1.3138848673432098, "learning_rate": 1.8928754065174552e-05, "loss": 0.6442, "step": 2246 }, { "epoch": 0.17, "grad_norm": 1.1687879464420263, "learning_rate": 1.8927622318001778e-05, "loss": 0.6104, "step": 2247 }, { "epoch": 0.17, "grad_norm": 1.1142975033670908, "learning_rate": 1.8926490007182147e-05, "loss": 0.5627, "step": 2248 }, { "epoch": 0.17, "grad_norm": 1.2697622705217075, "learning_rate": 1.8925357132787142e-05, "loss": 0.6007, "step": 2249 }, { "epoch": 0.17, "grad_norm": 1.1813173466613234, "learning_rate": 1.8924223694888297e-05, "loss": 0.5944, "step": 2250 }, { "epoch": 0.17, "grad_norm": 1.5465209247445078, "learning_rate": 1.8923089693557165e-05, "loss": 0.6431, "step": 2251 }, { "epoch": 0.17, "grad_norm": 1.2957509696437204, "learning_rate": 1.8921955128865343e-05, "loss": 0.6926, "step": 2252 }, { "epoch": 0.17, "grad_norm": 1.2173761302434725, "learning_rate": 1.8920820000884467e-05, "loss": 0.5983, "step": 2253 }, { "epoch": 0.17, "grad_norm": 1.177543156474835, "learning_rate": 1.8919684309686193e-05, "loss": 0.5842, "step": 2254 }, { "epoch": 0.17, "grad_norm": 1.412638542672742, "learning_rate": 1.8918548055342232e-05, "loss": 0.6856, "step": 2255 }, { "epoch": 0.18, "grad_norm": 1.3146443334211353, "learning_rate": 1.891741123792432e-05, "loss": 0.6706, "step": 2256 }, { "epoch": 0.18, "grad_norm": 1.2936881561977949, "learning_rate": 1.891627385750423e-05, "loss": 0.6069, "step": 2257 }, { "epoch": 0.18, "grad_norm": 1.334859491161905, "learning_rate": 1.8915135914153766e-05, "loss": 0.7092, "step": 2258 }, { "epoch": 0.18, "grad_norm": 1.3024241751567636, "learning_rate": 1.8913997407944776e-05, "loss": 0.6325, "step": 2259 }, { "epoch": 0.18, "grad_norm": 1.251937128164181, "learning_rate": 1.891285833894914e-05, "loss": 0.6681, "step": 2260 }, { "epoch": 0.18, "grad_norm": 1.2178926631316538, "learning_rate": 1.8911718707238772e-05, "loss": 0.6195, "step": 2261 }, { "epoch": 0.18, "grad_norm": 1.358931765150668, "learning_rate": 1.8910578512885624e-05, "loss": 0.6746, "step": 2262 }, { "epoch": 0.18, "grad_norm": 1.2550699888524264, "learning_rate": 1.8909437755961683e-05, "loss": 0.5919, "step": 2263 }, { "epoch": 0.18, "grad_norm": 1.1356355940178202, "learning_rate": 1.890829643653897e-05, "loss": 0.642, "step": 2264 }, { "epoch": 0.18, "grad_norm": 1.2172656189673872, "learning_rate": 1.890715455468954e-05, "loss": 0.628, "step": 2265 }, { "epoch": 0.18, "grad_norm": 1.258034491052529, "learning_rate": 1.890601211048549e-05, "loss": 0.6401, "step": 2266 }, { "epoch": 0.18, "grad_norm": 1.333558937695525, "learning_rate": 1.8904869103998947e-05, "loss": 0.691, "step": 2267 }, { "epoch": 0.18, "grad_norm": 1.2932833378180306, "learning_rate": 1.8903725535302073e-05, "loss": 0.6411, "step": 2268 }, { "epoch": 0.18, "grad_norm": 1.2341741084549087, "learning_rate": 1.8902581404467067e-05, "loss": 0.6379, "step": 2269 }, { "epoch": 0.18, "grad_norm": 1.2396277198003882, "learning_rate": 1.890143671156617e-05, "loss": 0.5427, "step": 2270 }, { "epoch": 0.18, "grad_norm": 1.2724096912946752, "learning_rate": 1.8900291456671645e-05, "loss": 0.5593, "step": 2271 }, { "epoch": 0.18, "grad_norm": 1.2147699560585856, "learning_rate": 1.8899145639855803e-05, "loss": 0.6228, "step": 2272 }, { "epoch": 0.18, "grad_norm": 1.3003643845683441, "learning_rate": 1.889799926119098e-05, "loss": 0.5676, "step": 2273 }, { "epoch": 0.18, "grad_norm": 1.385570168655623, "learning_rate": 1.8896852320749558e-05, "loss": 0.6653, "step": 2274 }, { "epoch": 0.18, "grad_norm": 1.2159537000907827, "learning_rate": 1.8895704818603947e-05, "loss": 0.6018, "step": 2275 }, { "epoch": 0.18, "grad_norm": 1.346308468228186, "learning_rate": 1.8894556754826597e-05, "loss": 0.6397, "step": 2276 }, { "epoch": 0.18, "grad_norm": 1.331505903980833, "learning_rate": 1.889340812948999e-05, "loss": 0.6712, "step": 2277 }, { "epoch": 0.18, "grad_norm": 1.337518080473291, "learning_rate": 1.8892258942666637e-05, "loss": 0.6874, "step": 2278 }, { "epoch": 0.18, "grad_norm": 1.3073913971853512, "learning_rate": 1.8891109194429102e-05, "loss": 0.6233, "step": 2279 }, { "epoch": 0.18, "grad_norm": 1.2000252065395383, "learning_rate": 1.8889958884849972e-05, "loss": 0.6291, "step": 2280 }, { "epoch": 0.18, "grad_norm": 1.2879512273930918, "learning_rate": 1.8888808014001874e-05, "loss": 0.6618, "step": 2281 }, { "epoch": 0.18, "grad_norm": 1.3074045271872268, "learning_rate": 1.8887656581957464e-05, "loss": 0.7366, "step": 2282 }, { "epoch": 0.18, "grad_norm": 1.334403646550239, "learning_rate": 1.888650458878944e-05, "loss": 0.6571, "step": 2283 }, { "epoch": 0.18, "grad_norm": 1.1870009728984494, "learning_rate": 1.888535203457053e-05, "loss": 0.5928, "step": 2284 }, { "epoch": 0.18, "grad_norm": 1.2679478087138498, "learning_rate": 1.8884198919373507e-05, "loss": 0.6118, "step": 2285 }, { "epoch": 0.18, "grad_norm": 1.2155057323953022, "learning_rate": 1.8883045243271168e-05, "loss": 0.6207, "step": 2286 }, { "epoch": 0.18, "grad_norm": 1.3155058546323852, "learning_rate": 1.8881891006336354e-05, "loss": 0.6429, "step": 2287 }, { "epoch": 0.18, "grad_norm": 1.3744120641327664, "learning_rate": 1.8880736208641937e-05, "loss": 0.6492, "step": 2288 }, { "epoch": 0.18, "grad_norm": 1.2442458750522478, "learning_rate": 1.887958085026082e-05, "loss": 0.6134, "step": 2289 }, { "epoch": 0.18, "grad_norm": 1.3569818675489023, "learning_rate": 1.8878424931265957e-05, "loss": 0.6853, "step": 2290 }, { "epoch": 0.18, "grad_norm": 1.3909016880420917, "learning_rate": 1.887726845173032e-05, "loss": 0.691, "step": 2291 }, { "epoch": 0.18, "grad_norm": 1.1964597494621914, "learning_rate": 1.887611141172692e-05, "loss": 0.6377, "step": 2292 }, { "epoch": 0.18, "grad_norm": 1.2263453102985502, "learning_rate": 1.8874953811328817e-05, "loss": 0.6161, "step": 2293 }, { "epoch": 0.18, "grad_norm": 1.3458127889255969, "learning_rate": 1.8873795650609092e-05, "loss": 0.7277, "step": 2294 }, { "epoch": 0.18, "grad_norm": 1.2911352889889798, "learning_rate": 1.8872636929640863e-05, "loss": 0.6348, "step": 2295 }, { "epoch": 0.18, "grad_norm": 1.3019295970440106, "learning_rate": 1.8871477648497285e-05, "loss": 0.674, "step": 2296 }, { "epoch": 0.18, "grad_norm": 1.2017848489944882, "learning_rate": 1.8870317807251555e-05, "loss": 0.6484, "step": 2297 }, { "epoch": 0.18, "grad_norm": 1.1973057004310674, "learning_rate": 1.8869157405976896e-05, "loss": 0.6222, "step": 2298 }, { "epoch": 0.18, "grad_norm": 1.2511482210332108, "learning_rate": 1.8867996444746574e-05, "loss": 0.598, "step": 2299 }, { "epoch": 0.18, "grad_norm": 1.2101373797842334, "learning_rate": 1.886683492363388e-05, "loss": 0.6136, "step": 2300 }, { "epoch": 0.18, "grad_norm": 1.2807353777254566, "learning_rate": 1.886567284271215e-05, "loss": 0.6396, "step": 2301 }, { "epoch": 0.18, "grad_norm": 1.2229585608996965, "learning_rate": 1.886451020205476e-05, "loss": 0.6031, "step": 2302 }, { "epoch": 0.18, "grad_norm": 1.4295672590211297, "learning_rate": 1.8863347001735098e-05, "loss": 0.6923, "step": 2303 }, { "epoch": 0.18, "grad_norm": 1.24686252712044, "learning_rate": 1.8862183241826613e-05, "loss": 0.6227, "step": 2304 }, { "epoch": 0.18, "grad_norm": 1.2491685009095252, "learning_rate": 1.8861018922402776e-05, "loss": 0.6716, "step": 2305 }, { "epoch": 0.18, "grad_norm": 1.323084342522879, "learning_rate": 1.88598540435371e-05, "loss": 0.6527, "step": 2306 }, { "epoch": 0.18, "grad_norm": 1.317985425118884, "learning_rate": 1.8858688605303127e-05, "loss": 0.6852, "step": 2307 }, { "epoch": 0.18, "grad_norm": 1.5015658312074862, "learning_rate": 1.8857522607774438e-05, "loss": 0.6642, "step": 2308 }, { "epoch": 0.18, "grad_norm": 1.2061895009419612, "learning_rate": 1.8856356051024646e-05, "loss": 0.6094, "step": 2309 }, { "epoch": 0.18, "grad_norm": 1.1317907526773971, "learning_rate": 1.8855188935127405e-05, "loss": 0.5303, "step": 2310 }, { "epoch": 0.18, "grad_norm": 1.247687823923802, "learning_rate": 1.88540212601564e-05, "loss": 0.6095, "step": 2311 }, { "epoch": 0.18, "grad_norm": 1.2911394899537405, "learning_rate": 1.8852853026185348e-05, "loss": 0.6758, "step": 2312 }, { "epoch": 0.18, "grad_norm": 1.2499499310956306, "learning_rate": 1.8851684233288016e-05, "loss": 0.5879, "step": 2313 }, { "epoch": 0.18, "grad_norm": 1.4953969423373774, "learning_rate": 1.8850514881538186e-05, "loss": 0.6632, "step": 2314 }, { "epoch": 0.18, "grad_norm": 1.1763402174141706, "learning_rate": 1.884934497100969e-05, "loss": 0.5729, "step": 2315 }, { "epoch": 0.18, "grad_norm": 1.1899228225653142, "learning_rate": 1.8848174501776388e-05, "loss": 0.6233, "step": 2316 }, { "epoch": 0.18, "grad_norm": 1.3122335799344167, "learning_rate": 1.8847003473912182e-05, "loss": 0.5731, "step": 2317 }, { "epoch": 0.18, "grad_norm": 1.2591900597416277, "learning_rate": 1.8845831887490998e-05, "loss": 0.6578, "step": 2318 }, { "epoch": 0.18, "grad_norm": 1.2976041835051901, "learning_rate": 1.8844659742586813e-05, "loss": 0.6085, "step": 2319 }, { "epoch": 0.18, "grad_norm": 1.1819868100845572, "learning_rate": 1.8843487039273626e-05, "loss": 0.61, "step": 2320 }, { "epoch": 0.18, "grad_norm": 1.3166800647166592, "learning_rate": 1.884231377762547e-05, "loss": 0.6964, "step": 2321 }, { "epoch": 0.18, "grad_norm": 1.3207310571794146, "learning_rate": 1.884113995771643e-05, "loss": 0.6904, "step": 2322 }, { "epoch": 0.18, "grad_norm": 1.2725567726705949, "learning_rate": 1.883996557962061e-05, "loss": 0.6333, "step": 2323 }, { "epoch": 0.18, "grad_norm": 1.2219432665396297, "learning_rate": 1.8838790643412152e-05, "loss": 0.6419, "step": 2324 }, { "epoch": 0.18, "grad_norm": 1.351336112935263, "learning_rate": 1.883761514916524e-05, "loss": 0.6341, "step": 2325 }, { "epoch": 0.18, "grad_norm": 1.3525928252514245, "learning_rate": 1.8836439096954086e-05, "loss": 0.6883, "step": 2326 }, { "epoch": 0.18, "grad_norm": 1.2945120552771745, "learning_rate": 1.8835262486852944e-05, "loss": 0.6079, "step": 2327 }, { "epoch": 0.18, "grad_norm": 1.2097145550165276, "learning_rate": 1.8834085318936096e-05, "loss": 0.5955, "step": 2328 }, { "epoch": 0.18, "grad_norm": 1.2838586442210311, "learning_rate": 1.8832907593277868e-05, "loss": 0.6218, "step": 2329 }, { "epoch": 0.18, "grad_norm": 1.2159462001845165, "learning_rate": 1.8831729309952605e-05, "loss": 0.6233, "step": 2330 }, { "epoch": 0.18, "grad_norm": 1.3050101789172515, "learning_rate": 1.883055046903471e-05, "loss": 0.6007, "step": 2331 }, { "epoch": 0.18, "grad_norm": 1.257966500089446, "learning_rate": 1.8829371070598604e-05, "loss": 0.5754, "step": 2332 }, { "epoch": 0.18, "grad_norm": 1.2911298415637713, "learning_rate": 1.8828191114718747e-05, "loss": 0.6959, "step": 2333 }, { "epoch": 0.18, "grad_norm": 1.2363132277038633, "learning_rate": 1.8827010601469634e-05, "loss": 0.5912, "step": 2334 }, { "epoch": 0.18, "grad_norm": 1.3521765488211308, "learning_rate": 1.882582953092581e-05, "loss": 0.6586, "step": 2335 }, { "epoch": 0.18, "grad_norm": 1.117424466270734, "learning_rate": 1.8824647903161824e-05, "loss": 0.5572, "step": 2336 }, { "epoch": 0.18, "grad_norm": 1.2016136783569273, "learning_rate": 1.882346571825229e-05, "loss": 0.6283, "step": 2337 }, { "epoch": 0.18, "grad_norm": 1.2291891559332284, "learning_rate": 1.882228297627184e-05, "loss": 0.6335, "step": 2338 }, { "epoch": 0.18, "grad_norm": 1.3391902653423522, "learning_rate": 1.882109967729515e-05, "loss": 0.6329, "step": 2339 }, { "epoch": 0.18, "grad_norm": 1.2842046118808663, "learning_rate": 1.881991582139693e-05, "loss": 0.5719, "step": 2340 }, { "epoch": 0.18, "grad_norm": 1.338455192454326, "learning_rate": 1.8818731408651914e-05, "loss": 0.69, "step": 2341 }, { "epoch": 0.18, "grad_norm": 1.3343924349286822, "learning_rate": 1.8817546439134883e-05, "loss": 0.6427, "step": 2342 }, { "epoch": 0.18, "grad_norm": 1.3439261742365523, "learning_rate": 1.881636091292066e-05, "loss": 0.6574, "step": 2343 }, { "epoch": 0.18, "grad_norm": 1.379812704520425, "learning_rate": 1.8815174830084084e-05, "loss": 0.6522, "step": 2344 }, { "epoch": 0.18, "grad_norm": 1.329980384495578, "learning_rate": 1.881398819070004e-05, "loss": 0.6854, "step": 2345 }, { "epoch": 0.18, "grad_norm": 1.31596865299829, "learning_rate": 1.8812800994843446e-05, "loss": 0.6494, "step": 2346 }, { "epoch": 0.18, "grad_norm": 1.3828628013643132, "learning_rate": 1.8811613242589257e-05, "loss": 0.6334, "step": 2347 }, { "epoch": 0.18, "grad_norm": 1.3122544285966038, "learning_rate": 1.8810424934012464e-05, "loss": 0.6726, "step": 2348 }, { "epoch": 0.18, "grad_norm": 1.4490422143888047, "learning_rate": 1.880923606918809e-05, "loss": 0.6331, "step": 2349 }, { "epoch": 0.18, "grad_norm": 1.2305817567309718, "learning_rate": 1.8808046648191193e-05, "loss": 0.6084, "step": 2350 }, { "epoch": 0.18, "grad_norm": 1.406349771986899, "learning_rate": 1.8806856671096866e-05, "loss": 0.6066, "step": 2351 }, { "epoch": 0.18, "grad_norm": 1.4005646537639251, "learning_rate": 1.880566613798024e-05, "loss": 0.7023, "step": 2352 }, { "epoch": 0.18, "grad_norm": 1.153702842848037, "learning_rate": 1.880447504891648e-05, "loss": 0.5712, "step": 2353 }, { "epoch": 0.18, "grad_norm": 1.1825560499621932, "learning_rate": 1.8803283403980784e-05, "loss": 0.6223, "step": 2354 }, { "epoch": 0.18, "grad_norm": 1.3774579793263662, "learning_rate": 1.8802091203248388e-05, "loss": 0.6601, "step": 2355 }, { "epoch": 0.18, "grad_norm": 1.1526030167983166, "learning_rate": 1.880089844679456e-05, "loss": 0.5768, "step": 2356 }, { "epoch": 0.18, "grad_norm": 1.30984746743837, "learning_rate": 1.879970513469461e-05, "loss": 0.6519, "step": 2357 }, { "epoch": 0.18, "grad_norm": 1.3555014218836876, "learning_rate": 1.8798511267023874e-05, "loss": 0.6513, "step": 2358 }, { "epoch": 0.18, "grad_norm": 1.2281907566317205, "learning_rate": 1.8797316843857723e-05, "loss": 0.566, "step": 2359 }, { "epoch": 0.18, "grad_norm": 1.382401701690258, "learning_rate": 1.8796121865271578e-05, "loss": 0.6631, "step": 2360 }, { "epoch": 0.18, "grad_norm": 1.1713213820878148, "learning_rate": 1.8794926331340874e-05, "loss": 0.5769, "step": 2361 }, { "epoch": 0.18, "grad_norm": 1.3037626906277462, "learning_rate": 1.8793730242141093e-05, "loss": 0.6527, "step": 2362 }, { "epoch": 0.18, "grad_norm": 1.2611793333432209, "learning_rate": 1.8792533597747756e-05, "loss": 0.6272, "step": 2363 }, { "epoch": 0.18, "grad_norm": 1.297287840797995, "learning_rate": 1.879133639823641e-05, "loss": 0.5832, "step": 2364 }, { "epoch": 0.18, "grad_norm": 1.3728048835749782, "learning_rate": 1.8790138643682633e-05, "loss": 0.6323, "step": 2365 }, { "epoch": 0.18, "grad_norm": 1.1537831772221736, "learning_rate": 1.878894033416206e-05, "loss": 0.581, "step": 2366 }, { "epoch": 0.18, "grad_norm": 1.3256932604245857, "learning_rate": 1.8787741469750332e-05, "loss": 0.6431, "step": 2367 }, { "epoch": 0.18, "grad_norm": 1.2715987990498159, "learning_rate": 1.8786542050523152e-05, "loss": 0.6142, "step": 2368 }, { "epoch": 0.18, "grad_norm": 1.3999320201036762, "learning_rate": 1.8785342076556236e-05, "loss": 0.6424, "step": 2369 }, { "epoch": 0.18, "grad_norm": 1.3710288877441834, "learning_rate": 1.878414154792535e-05, "loss": 0.7017, "step": 2370 }, { "epoch": 0.18, "grad_norm": 1.368623731180025, "learning_rate": 1.878294046470629e-05, "loss": 0.6635, "step": 2371 }, { "epoch": 0.18, "grad_norm": 1.3156077965995685, "learning_rate": 1.878173882697488e-05, "loss": 0.5893, "step": 2372 }, { "epoch": 0.18, "grad_norm": 1.2967169332118074, "learning_rate": 1.8780536634806995e-05, "loss": 0.626, "step": 2373 }, { "epoch": 0.18, "grad_norm": 1.1316281671862123, "learning_rate": 1.8779333888278524e-05, "loss": 0.5864, "step": 2374 }, { "epoch": 0.18, "grad_norm": 1.2839720119325606, "learning_rate": 1.8778130587465414e-05, "loss": 0.6651, "step": 2375 }, { "epoch": 0.18, "grad_norm": 1.1422180129957835, "learning_rate": 1.877692673244363e-05, "loss": 0.6014, "step": 2376 }, { "epoch": 0.18, "grad_norm": 1.275558343815793, "learning_rate": 1.877572232328918e-05, "loss": 0.5993, "step": 2377 }, { "epoch": 0.18, "grad_norm": 1.1297881697546033, "learning_rate": 1.8774517360078098e-05, "loss": 0.5735, "step": 2378 }, { "epoch": 0.18, "grad_norm": 1.2213090781190319, "learning_rate": 1.877331184288647e-05, "loss": 0.5763, "step": 2379 }, { "epoch": 0.18, "grad_norm": 1.2156443518346145, "learning_rate": 1.8772105771790397e-05, "loss": 0.5928, "step": 2380 }, { "epoch": 0.18, "grad_norm": 1.2991646466943985, "learning_rate": 1.877089914686603e-05, "loss": 0.6339, "step": 2381 }, { "epoch": 0.18, "grad_norm": 1.2946729694115477, "learning_rate": 1.8769691968189548e-05, "loss": 0.638, "step": 2382 }, { "epoch": 0.18, "grad_norm": 1.2586145627783092, "learning_rate": 1.876848423583717e-05, "loss": 0.5576, "step": 2383 }, { "epoch": 0.18, "grad_norm": 1.2176354031006298, "learning_rate": 1.8767275949885136e-05, "loss": 0.6671, "step": 2384 }, { "epoch": 0.19, "grad_norm": 1.2821997402201015, "learning_rate": 1.876606711040974e-05, "loss": 0.6781, "step": 2385 }, { "epoch": 0.19, "grad_norm": 1.226713912268256, "learning_rate": 1.8764857717487304e-05, "loss": 0.607, "step": 2386 }, { "epoch": 0.19, "grad_norm": 1.2588977751979855, "learning_rate": 1.8763647771194177e-05, "loss": 0.6237, "step": 2387 }, { "epoch": 0.19, "grad_norm": 1.2773613884784183, "learning_rate": 1.8762437271606752e-05, "loss": 0.6581, "step": 2388 }, { "epoch": 0.19, "grad_norm": 1.2282736438359942, "learning_rate": 1.8761226218801455e-05, "loss": 0.5936, "step": 2389 }, { "epoch": 0.19, "grad_norm": 1.267149442766819, "learning_rate": 1.8760014612854746e-05, "loss": 0.6818, "step": 2390 }, { "epoch": 0.19, "grad_norm": 1.32343568407675, "learning_rate": 1.8758802453843112e-05, "loss": 0.6849, "step": 2391 }, { "epoch": 0.19, "grad_norm": 1.1820451028419297, "learning_rate": 1.8757589741843095e-05, "loss": 0.6348, "step": 2392 }, { "epoch": 0.19, "grad_norm": 1.258630758886283, "learning_rate": 1.8756376476931252e-05, "loss": 0.6219, "step": 2393 }, { "epoch": 0.19, "grad_norm": 1.2469574138586004, "learning_rate": 1.8755162659184186e-05, "loss": 0.6137, "step": 2394 }, { "epoch": 0.19, "grad_norm": 1.2272257864127885, "learning_rate": 1.8753948288678533e-05, "loss": 0.6553, "step": 2395 }, { "epoch": 0.19, "grad_norm": 1.2942705314323744, "learning_rate": 1.8752733365490957e-05, "loss": 0.631, "step": 2396 }, { "epoch": 0.19, "grad_norm": 1.2651904737741642, "learning_rate": 1.875151788969817e-05, "loss": 0.6386, "step": 2397 }, { "epoch": 0.19, "grad_norm": 1.2810628335597505, "learning_rate": 1.8750301861376903e-05, "loss": 0.699, "step": 2398 }, { "epoch": 0.19, "grad_norm": 1.2614042758252852, "learning_rate": 1.8749085280603935e-05, "loss": 0.6301, "step": 2399 }, { "epoch": 0.19, "grad_norm": 1.2883318593278739, "learning_rate": 1.8747868147456072e-05, "loss": 0.63, "step": 2400 }, { "epoch": 0.19, "grad_norm": 1.2328140916318495, "learning_rate": 1.874665046201016e-05, "loss": 0.632, "step": 2401 }, { "epoch": 0.19, "grad_norm": 1.3114522657589867, "learning_rate": 1.8745432224343082e-05, "loss": 0.6699, "step": 2402 }, { "epoch": 0.19, "grad_norm": 1.2232823338042742, "learning_rate": 1.874421343453174e-05, "loss": 0.5913, "step": 2403 }, { "epoch": 0.19, "grad_norm": 1.3775435676538899, "learning_rate": 1.8742994092653096e-05, "loss": 0.6376, "step": 2404 }, { "epoch": 0.19, "grad_norm": 1.1533099245900462, "learning_rate": 1.874177419878412e-05, "loss": 0.5181, "step": 2405 }, { "epoch": 0.19, "grad_norm": 1.2966567166019398, "learning_rate": 1.8740553753001844e-05, "loss": 0.6079, "step": 2406 }, { "epoch": 0.19, "grad_norm": 1.179352415886464, "learning_rate": 1.873933275538331e-05, "loss": 0.5939, "step": 2407 }, { "epoch": 0.19, "grad_norm": 1.2199868015450879, "learning_rate": 1.8738111206005615e-05, "loss": 0.5642, "step": 2408 }, { "epoch": 0.19, "grad_norm": 1.2386872982627628, "learning_rate": 1.8736889104945874e-05, "loss": 0.6102, "step": 2409 }, { "epoch": 0.19, "grad_norm": 1.3689206718048275, "learning_rate": 1.8735666452281246e-05, "loss": 0.677, "step": 2410 }, { "epoch": 0.19, "grad_norm": 1.320869689384897, "learning_rate": 1.8734443248088926e-05, "loss": 0.6654, "step": 2411 }, { "epoch": 0.19, "grad_norm": 1.3809177758629052, "learning_rate": 1.873321949244614e-05, "loss": 0.6641, "step": 2412 }, { "epoch": 0.19, "grad_norm": 1.3519685168466142, "learning_rate": 1.8731995185430148e-05, "loss": 0.5993, "step": 2413 }, { "epoch": 0.19, "grad_norm": 1.2307249256673876, "learning_rate": 1.8730770327118254e-05, "loss": 0.6285, "step": 2414 }, { "epoch": 0.19, "grad_norm": 1.2267889797780407, "learning_rate": 1.8729544917587778e-05, "loss": 0.6415, "step": 2415 }, { "epoch": 0.19, "grad_norm": 1.2773785600964451, "learning_rate": 1.8728318956916096e-05, "loss": 0.5713, "step": 2416 }, { "epoch": 0.19, "grad_norm": 1.2343488762903918, "learning_rate": 1.8727092445180605e-05, "loss": 0.5605, "step": 2417 }, { "epoch": 0.19, "grad_norm": 1.4341248404244182, "learning_rate": 1.8725865382458744e-05, "loss": 0.6518, "step": 2418 }, { "epoch": 0.19, "grad_norm": 1.3182418205691848, "learning_rate": 1.8724637768827982e-05, "loss": 0.632, "step": 2419 }, { "epoch": 0.19, "grad_norm": 1.1458106067600007, "learning_rate": 1.8723409604365823e-05, "loss": 0.5761, "step": 2420 }, { "epoch": 0.19, "grad_norm": 1.330170525969723, "learning_rate": 1.872218088914981e-05, "loss": 0.6906, "step": 2421 }, { "epoch": 0.19, "grad_norm": 1.2021378269548495, "learning_rate": 1.8720951623257514e-05, "loss": 0.6223, "step": 2422 }, { "epoch": 0.19, "grad_norm": 1.1857673150989387, "learning_rate": 1.871972180676655e-05, "loss": 0.5929, "step": 2423 }, { "epoch": 0.19, "grad_norm": 1.2485561137851693, "learning_rate": 1.8718491439754562e-05, "loss": 0.6092, "step": 2424 }, { "epoch": 0.19, "grad_norm": 1.275888342955678, "learning_rate": 1.8717260522299226e-05, "loss": 0.6699, "step": 2425 }, { "epoch": 0.19, "grad_norm": 1.2705311754733652, "learning_rate": 1.8716029054478257e-05, "loss": 0.6836, "step": 2426 }, { "epoch": 0.19, "grad_norm": 1.2274701593038857, "learning_rate": 1.8714797036369407e-05, "loss": 0.6395, "step": 2427 }, { "epoch": 0.19, "grad_norm": 1.1634775522588696, "learning_rate": 1.8713564468050455e-05, "loss": 0.5808, "step": 2428 }, { "epoch": 0.19, "grad_norm": 1.237808955764608, "learning_rate": 1.8712331349599227e-05, "loss": 0.6695, "step": 2429 }, { "epoch": 0.19, "grad_norm": 1.2227718798283174, "learning_rate": 1.8711097681093568e-05, "loss": 0.6011, "step": 2430 }, { "epoch": 0.19, "grad_norm": 1.1968562809448942, "learning_rate": 1.8709863462611368e-05, "loss": 0.6742, "step": 2431 }, { "epoch": 0.19, "grad_norm": 1.4422217235864745, "learning_rate": 1.8708628694230554e-05, "loss": 0.6482, "step": 2432 }, { "epoch": 0.19, "grad_norm": 1.2474815747996928, "learning_rate": 1.8707393376029076e-05, "loss": 0.6287, "step": 2433 }, { "epoch": 0.19, "grad_norm": 1.1562748983640754, "learning_rate": 1.8706157508084934e-05, "loss": 0.6359, "step": 2434 }, { "epoch": 0.19, "grad_norm": 1.2451794178688973, "learning_rate": 1.8704921090476148e-05, "loss": 0.6593, "step": 2435 }, { "epoch": 0.19, "grad_norm": 1.232030017298225, "learning_rate": 1.870368412328078e-05, "loss": 0.6008, "step": 2436 }, { "epoch": 0.19, "grad_norm": 1.139302623749709, "learning_rate": 1.870244660657693e-05, "loss": 0.5903, "step": 2437 }, { "epoch": 0.19, "grad_norm": 1.2632565412776657, "learning_rate": 1.870120854044273e-05, "loss": 0.6103, "step": 2438 }, { "epoch": 0.19, "grad_norm": 1.3128410532024872, "learning_rate": 1.869996992495634e-05, "loss": 0.6331, "step": 2439 }, { "epoch": 0.19, "grad_norm": 1.3096785418841905, "learning_rate": 1.8698730760195963e-05, "loss": 0.6391, "step": 2440 }, { "epoch": 0.19, "grad_norm": 1.2535515874861367, "learning_rate": 1.869749104623983e-05, "loss": 0.6135, "step": 2441 }, { "epoch": 0.19, "grad_norm": 1.2570058000926945, "learning_rate": 1.8696250783166218e-05, "loss": 0.6095, "step": 2442 }, { "epoch": 0.19, "grad_norm": 1.2589870678916961, "learning_rate": 1.869500997105343e-05, "loss": 0.6358, "step": 2443 }, { "epoch": 0.19, "grad_norm": 1.1986070992874267, "learning_rate": 1.8693768609979796e-05, "loss": 0.5585, "step": 2444 }, { "epoch": 0.19, "grad_norm": 1.1968443286441857, "learning_rate": 1.86925267000237e-05, "loss": 0.6206, "step": 2445 }, { "epoch": 0.19, "grad_norm": 1.22023404073097, "learning_rate": 1.8691284241263547e-05, "loss": 0.6005, "step": 2446 }, { "epoch": 0.19, "grad_norm": 1.2934050500950776, "learning_rate": 1.8690041233777774e-05, "loss": 0.662, "step": 2447 }, { "epoch": 0.19, "grad_norm": 1.2180060047571653, "learning_rate": 1.8688797677644865e-05, "loss": 0.5908, "step": 2448 }, { "epoch": 0.19, "grad_norm": 1.2152569426915234, "learning_rate": 1.868755357294333e-05, "loss": 0.662, "step": 2449 }, { "epoch": 0.19, "grad_norm": 1.4536301749942062, "learning_rate": 1.8686308919751718e-05, "loss": 0.6754, "step": 2450 }, { "epoch": 0.19, "grad_norm": 1.2810053708144795, "learning_rate": 1.8685063718148608e-05, "loss": 0.5837, "step": 2451 }, { "epoch": 0.19, "grad_norm": 1.3048902799440902, "learning_rate": 1.8683817968212613e-05, "loss": 0.713, "step": 2452 }, { "epoch": 0.19, "grad_norm": 1.275016180571564, "learning_rate": 1.868257167002239e-05, "loss": 0.6437, "step": 2453 }, { "epoch": 0.19, "grad_norm": 1.1846074965664053, "learning_rate": 1.8681324823656623e-05, "loss": 0.5569, "step": 2454 }, { "epoch": 0.19, "grad_norm": 1.2770317699898526, "learning_rate": 1.8680077429194025e-05, "loss": 0.6058, "step": 2455 }, { "epoch": 0.19, "grad_norm": 1.3043310626480014, "learning_rate": 1.8678829486713362e-05, "loss": 0.7058, "step": 2456 }, { "epoch": 0.19, "grad_norm": 1.462291151605452, "learning_rate": 1.867758099629341e-05, "loss": 0.6179, "step": 2457 }, { "epoch": 0.19, "grad_norm": 1.2707998656988748, "learning_rate": 1.8676331958013004e-05, "loss": 0.6079, "step": 2458 }, { "epoch": 0.19, "grad_norm": 1.2663154602452986, "learning_rate": 1.8675082371950996e-05, "loss": 0.6044, "step": 2459 }, { "epoch": 0.19, "grad_norm": 1.1410610462878796, "learning_rate": 1.867383223818628e-05, "loss": 0.5609, "step": 2460 }, { "epoch": 0.19, "grad_norm": 1.3634408677011673, "learning_rate": 1.8672581556797785e-05, "loss": 0.6513, "step": 2461 }, { "epoch": 0.19, "grad_norm": 1.2271815396157595, "learning_rate": 1.867133032786447e-05, "loss": 0.6535, "step": 2462 }, { "epoch": 0.19, "grad_norm": 1.2678103476701896, "learning_rate": 1.8670078551465336e-05, "loss": 0.6123, "step": 2463 }, { "epoch": 0.19, "grad_norm": 1.2029035290083356, "learning_rate": 1.8668826227679408e-05, "loss": 0.5776, "step": 2464 }, { "epoch": 0.19, "grad_norm": 1.2136313714966078, "learning_rate": 1.8667573356585756e-05, "loss": 0.6221, "step": 2465 }, { "epoch": 0.19, "grad_norm": 1.3528623559497148, "learning_rate": 1.866631993826348e-05, "loss": 0.6203, "step": 2466 }, { "epoch": 0.19, "grad_norm": 1.220765818702567, "learning_rate": 1.8665065972791715e-05, "loss": 0.6093, "step": 2467 }, { "epoch": 0.19, "grad_norm": 1.3541523370229156, "learning_rate": 1.8663811460249625e-05, "loss": 0.6144, "step": 2468 }, { "epoch": 0.19, "grad_norm": 1.2202700403619804, "learning_rate": 1.8662556400716423e-05, "loss": 0.6005, "step": 2469 }, { "epoch": 0.19, "grad_norm": 1.277062901410712, "learning_rate": 1.866130079427134e-05, "loss": 0.5709, "step": 2470 }, { "epoch": 0.19, "grad_norm": 1.2106963378968074, "learning_rate": 1.866004464099365e-05, "loss": 0.6306, "step": 2471 }, { "epoch": 0.19, "grad_norm": 1.270141361058738, "learning_rate": 1.8658787940962666e-05, "loss": 0.6145, "step": 2472 }, { "epoch": 0.19, "grad_norm": 1.2725296060531797, "learning_rate": 1.8657530694257722e-05, "loss": 0.6286, "step": 2473 }, { "epoch": 0.19, "grad_norm": 1.1505708666597803, "learning_rate": 1.8656272900958202e-05, "loss": 0.5983, "step": 2474 }, { "epoch": 0.19, "grad_norm": 1.211748503781468, "learning_rate": 1.865501456114351e-05, "loss": 0.6194, "step": 2475 }, { "epoch": 0.19, "grad_norm": 1.246196395807835, "learning_rate": 1.8653755674893095e-05, "loss": 0.6014, "step": 2476 }, { "epoch": 0.19, "grad_norm": 1.2684632489357968, "learning_rate": 1.865249624228644e-05, "loss": 0.6523, "step": 2477 }, { "epoch": 0.19, "grad_norm": 1.2319233365110043, "learning_rate": 1.8651236263403052e-05, "loss": 0.6185, "step": 2478 }, { "epoch": 0.19, "grad_norm": 1.2574923090007528, "learning_rate": 1.8649975738322487e-05, "loss": 0.5813, "step": 2479 }, { "epoch": 0.19, "grad_norm": 1.2989334866804474, "learning_rate": 1.8648714667124323e-05, "loss": 0.6515, "step": 2480 }, { "epoch": 0.19, "grad_norm": 1.1623842571434297, "learning_rate": 1.8647453049888186e-05, "loss": 0.5784, "step": 2481 }, { "epoch": 0.19, "grad_norm": 1.2289409537971798, "learning_rate": 1.8646190886693717e-05, "loss": 0.584, "step": 2482 }, { "epoch": 0.19, "grad_norm": 1.148413158340434, "learning_rate": 1.8644928177620612e-05, "loss": 0.571, "step": 2483 }, { "epoch": 0.19, "grad_norm": 1.2768933730033598, "learning_rate": 1.864366492274859e-05, "loss": 0.6365, "step": 2484 }, { "epoch": 0.19, "grad_norm": 1.3055138626385328, "learning_rate": 1.86424011221574e-05, "loss": 0.5909, "step": 2485 }, { "epoch": 0.19, "grad_norm": 1.2036296232092925, "learning_rate": 1.8641136775926844e-05, "loss": 0.6286, "step": 2486 }, { "epoch": 0.19, "grad_norm": 1.1856331958799164, "learning_rate": 1.863987188413674e-05, "loss": 0.5756, "step": 2487 }, { "epoch": 0.19, "grad_norm": 1.134590962767589, "learning_rate": 1.8638606446866947e-05, "loss": 0.4757, "step": 2488 }, { "epoch": 0.19, "grad_norm": 1.2513143781626266, "learning_rate": 1.8637340464197357e-05, "loss": 0.6306, "step": 2489 }, { "epoch": 0.19, "grad_norm": 1.1773843577266503, "learning_rate": 1.8636073936207907e-05, "loss": 0.5852, "step": 2490 }, { "epoch": 0.19, "grad_norm": 1.1947211005544542, "learning_rate": 1.863480686297855e-05, "loss": 0.66, "step": 2491 }, { "epoch": 0.19, "grad_norm": 1.2703404591241307, "learning_rate": 1.8633539244589285e-05, "loss": 0.5839, "step": 2492 }, { "epoch": 0.19, "grad_norm": 1.310701864205226, "learning_rate": 1.8632271081120144e-05, "loss": 0.5919, "step": 2493 }, { "epoch": 0.19, "grad_norm": 1.2219158526759577, "learning_rate": 1.8631002372651194e-05, "loss": 0.6121, "step": 2494 }, { "epoch": 0.19, "grad_norm": 1.25539051279771, "learning_rate": 1.8629733119262536e-05, "loss": 0.61, "step": 2495 }, { "epoch": 0.19, "grad_norm": 1.1989805500657778, "learning_rate": 1.86284633210343e-05, "loss": 0.5397, "step": 2496 }, { "epoch": 0.19, "grad_norm": 1.220097066347891, "learning_rate": 1.862719297804666e-05, "loss": 0.6262, "step": 2497 }, { "epoch": 0.19, "grad_norm": 1.3601523621145881, "learning_rate": 1.8625922090379812e-05, "loss": 0.6197, "step": 2498 }, { "epoch": 0.19, "grad_norm": 1.261808173727957, "learning_rate": 1.8624650658114003e-05, "loss": 0.6342, "step": 2499 }, { "epoch": 0.19, "grad_norm": 1.097432232461151, "learning_rate": 1.86233786813295e-05, "loss": 0.5538, "step": 2500 }, { "epoch": 0.19, "grad_norm": 1.4307810452386007, "learning_rate": 1.862210616010661e-05, "loss": 0.7265, "step": 2501 }, { "epoch": 0.19, "grad_norm": 1.2982976532874413, "learning_rate": 1.862083309452567e-05, "loss": 0.6277, "step": 2502 }, { "epoch": 0.19, "grad_norm": 1.2739126804975092, "learning_rate": 1.8619559484667063e-05, "loss": 0.6756, "step": 2503 }, { "epoch": 0.19, "grad_norm": 1.2373585918821108, "learning_rate": 1.8618285330611193e-05, "loss": 0.6096, "step": 2504 }, { "epoch": 0.19, "grad_norm": 1.3507258741987285, "learning_rate": 1.8617010632438508e-05, "loss": 0.6555, "step": 2505 }, { "epoch": 0.19, "grad_norm": 1.116901761307012, "learning_rate": 1.8615735390229483e-05, "loss": 0.6206, "step": 2506 }, { "epoch": 0.19, "grad_norm": 1.1878597818752807, "learning_rate": 1.8614459604064634e-05, "loss": 0.5414, "step": 2507 }, { "epoch": 0.19, "grad_norm": 1.2555094894512393, "learning_rate": 1.86131832740245e-05, "loss": 0.6433, "step": 2508 }, { "epoch": 0.19, "grad_norm": 1.2274999741284276, "learning_rate": 1.861190640018967e-05, "loss": 0.6259, "step": 2509 }, { "epoch": 0.19, "grad_norm": 1.2973945220528185, "learning_rate": 1.8610628982640758e-05, "loss": 0.6293, "step": 2510 }, { "epoch": 0.19, "grad_norm": 1.1606728787424836, "learning_rate": 1.8609351021458412e-05, "loss": 0.5604, "step": 2511 }, { "epoch": 0.19, "grad_norm": 1.3960318732571626, "learning_rate": 1.8608072516723317e-05, "loss": 0.6043, "step": 2512 }, { "epoch": 0.19, "grad_norm": 1.3159074602432916, "learning_rate": 1.8606793468516197e-05, "loss": 0.625, "step": 2513 }, { "epoch": 0.2, "grad_norm": 1.2911882386111526, "learning_rate": 1.8605513876917793e-05, "loss": 0.6286, "step": 2514 }, { "epoch": 0.2, "grad_norm": 1.1841470161774408, "learning_rate": 1.8604233742008903e-05, "loss": 0.6274, "step": 2515 }, { "epoch": 0.2, "grad_norm": 1.158395169114741, "learning_rate": 1.860295306387035e-05, "loss": 0.6291, "step": 2516 }, { "epoch": 0.2, "grad_norm": 1.1611165911004209, "learning_rate": 1.8601671842582977e-05, "loss": 0.5685, "step": 2517 }, { "epoch": 0.2, "grad_norm": 1.2891511828810491, "learning_rate": 1.8600390078227682e-05, "loss": 0.6666, "step": 2518 }, { "epoch": 0.2, "grad_norm": 1.2628273832165804, "learning_rate": 1.8599107770885392e-05, "loss": 0.5996, "step": 2519 }, { "epoch": 0.2, "grad_norm": 1.2303213848697214, "learning_rate": 1.8597824920637063e-05, "loss": 0.6127, "step": 2520 }, { "epoch": 0.2, "grad_norm": 1.2173923364147705, "learning_rate": 1.8596541527563686e-05, "loss": 0.5605, "step": 2521 }, { "epoch": 0.2, "grad_norm": 1.1828597912222343, "learning_rate": 1.859525759174629e-05, "loss": 0.6037, "step": 2522 }, { "epoch": 0.2, "grad_norm": 1.3533218927714155, "learning_rate": 1.8593973113265938e-05, "loss": 0.5849, "step": 2523 }, { "epoch": 0.2, "grad_norm": 1.1939991735874045, "learning_rate": 1.8592688092203727e-05, "loss": 0.5959, "step": 2524 }, { "epoch": 0.2, "grad_norm": 1.3305723641638398, "learning_rate": 1.8591402528640782e-05, "loss": 0.6473, "step": 2525 }, { "epoch": 0.2, "grad_norm": 1.2634680464205919, "learning_rate": 1.8590116422658267e-05, "loss": 0.5907, "step": 2526 }, { "epoch": 0.2, "grad_norm": 1.2424357903850305, "learning_rate": 1.8588829774337388e-05, "loss": 0.6681, "step": 2527 }, { "epoch": 0.2, "grad_norm": 1.290836708494584, "learning_rate": 1.858754258375937e-05, "loss": 0.6205, "step": 2528 }, { "epoch": 0.2, "grad_norm": 1.1172271934873748, "learning_rate": 1.858625485100548e-05, "loss": 0.5876, "step": 2529 }, { "epoch": 0.2, "grad_norm": 1.3565895657496478, "learning_rate": 1.8584966576157028e-05, "loss": 0.5838, "step": 2530 }, { "epoch": 0.2, "grad_norm": 1.1605733513420058, "learning_rate": 1.8583677759295337e-05, "loss": 0.5603, "step": 2531 }, { "epoch": 0.2, "grad_norm": 1.1943168732189107, "learning_rate": 1.8582388400501786e-05, "loss": 0.5914, "step": 2532 }, { "epoch": 0.2, "grad_norm": 1.340174775895629, "learning_rate": 1.8581098499857776e-05, "loss": 0.636, "step": 2533 }, { "epoch": 0.2, "grad_norm": 1.195819871718148, "learning_rate": 1.8579808057444744e-05, "loss": 0.6409, "step": 2534 }, { "epoch": 0.2, "grad_norm": 1.3152215533853757, "learning_rate": 1.8578517073344168e-05, "loss": 0.5781, "step": 2535 }, { "epoch": 0.2, "grad_norm": 1.1787502784769128, "learning_rate": 1.8577225547637545e-05, "loss": 0.6261, "step": 2536 }, { "epoch": 0.2, "grad_norm": 1.2210191485460118, "learning_rate": 1.857593348040642e-05, "loss": 0.6121, "step": 2537 }, { "epoch": 0.2, "grad_norm": 1.2325777901325448, "learning_rate": 1.857464087173237e-05, "loss": 0.6224, "step": 2538 }, { "epoch": 0.2, "grad_norm": 1.2258120628359654, "learning_rate": 1.8573347721697007e-05, "loss": 0.5919, "step": 2539 }, { "epoch": 0.2, "grad_norm": 1.2111445588315997, "learning_rate": 1.8572054030381963e-05, "loss": 0.5985, "step": 2540 }, { "epoch": 0.2, "grad_norm": 1.2401532003916247, "learning_rate": 1.8570759797868925e-05, "loss": 0.6971, "step": 2541 }, { "epoch": 0.2, "grad_norm": 1.1244443474885986, "learning_rate": 1.8569465024239603e-05, "loss": 0.5731, "step": 2542 }, { "epoch": 0.2, "grad_norm": 1.1371780463882148, "learning_rate": 1.8568169709575737e-05, "loss": 0.6314, "step": 2543 }, { "epoch": 0.2, "grad_norm": 1.2121776506260318, "learning_rate": 1.856687385395911e-05, "loss": 0.6172, "step": 2544 }, { "epoch": 0.2, "grad_norm": 1.359451423338358, "learning_rate": 1.8565577457471545e-05, "loss": 0.6485, "step": 2545 }, { "epoch": 0.2, "grad_norm": 1.2032117936175215, "learning_rate": 1.8564280520194876e-05, "loss": 0.5711, "step": 2546 }, { "epoch": 0.2, "grad_norm": 1.418571743290778, "learning_rate": 1.8562983042210998e-05, "loss": 0.6426, "step": 2547 }, { "epoch": 0.2, "grad_norm": 1.3211135691324105, "learning_rate": 1.8561685023601815e-05, "loss": 0.5697, "step": 2548 }, { "epoch": 0.2, "grad_norm": 1.3007259414415762, "learning_rate": 1.8560386464449288e-05, "loss": 0.6598, "step": 2549 }, { "epoch": 0.2, "grad_norm": 1.1542516915761523, "learning_rate": 1.8559087364835397e-05, "loss": 0.6174, "step": 2550 }, { "epoch": 0.2, "grad_norm": 1.3336154321428684, "learning_rate": 1.855778772484216e-05, "loss": 0.6085, "step": 2551 }, { "epoch": 0.2, "grad_norm": 1.269227023719383, "learning_rate": 1.8556487544551633e-05, "loss": 0.5854, "step": 2552 }, { "epoch": 0.2, "grad_norm": 1.3994029883873589, "learning_rate": 1.8555186824045903e-05, "loss": 0.6521, "step": 2553 }, { "epoch": 0.2, "grad_norm": 1.3745360458687568, "learning_rate": 1.8553885563407086e-05, "loss": 0.6457, "step": 2554 }, { "epoch": 0.2, "grad_norm": 1.1461255423433991, "learning_rate": 1.8552583762717346e-05, "loss": 0.5572, "step": 2555 }, { "epoch": 0.2, "grad_norm": 1.2501790395308439, "learning_rate": 1.8551281422058863e-05, "loss": 0.6272, "step": 2556 }, { "epoch": 0.2, "grad_norm": 1.299685959297986, "learning_rate": 1.8549978541513865e-05, "loss": 0.6544, "step": 2557 }, { "epoch": 0.2, "grad_norm": 1.1721466766790667, "learning_rate": 1.8548675121164613e-05, "loss": 0.625, "step": 2558 }, { "epoch": 0.2, "grad_norm": 1.2346274685102632, "learning_rate": 1.8547371161093397e-05, "loss": 0.6582, "step": 2559 }, { "epoch": 0.2, "grad_norm": 1.145131237055101, "learning_rate": 1.8546066661382535e-05, "loss": 0.5911, "step": 2560 }, { "epoch": 0.2, "grad_norm": 1.1776541560800873, "learning_rate": 1.8544761622114396e-05, "loss": 0.6055, "step": 2561 }, { "epoch": 0.2, "grad_norm": 1.3578178870968904, "learning_rate": 1.854345604337137e-05, "loss": 0.6334, "step": 2562 }, { "epoch": 0.2, "grad_norm": 1.1676885976947013, "learning_rate": 1.8542149925235885e-05, "loss": 0.5701, "step": 2563 }, { "epoch": 0.2, "grad_norm": 1.2746782401745258, "learning_rate": 1.8540843267790406e-05, "loss": 0.6239, "step": 2564 }, { "epoch": 0.2, "grad_norm": 1.1632292152382684, "learning_rate": 1.8539536071117424e-05, "loss": 0.5816, "step": 2565 }, { "epoch": 0.2, "grad_norm": 1.1965253573232237, "learning_rate": 1.853822833529947e-05, "loss": 0.5795, "step": 2566 }, { "epoch": 0.2, "grad_norm": 1.1052932869597845, "learning_rate": 1.853692006041911e-05, "loss": 0.589, "step": 2567 }, { "epoch": 0.2, "grad_norm": 1.2843409679943978, "learning_rate": 1.8535611246558947e-05, "loss": 0.6422, "step": 2568 }, { "epoch": 0.2, "grad_norm": 1.2733504844845251, "learning_rate": 1.8534301893801606e-05, "loss": 0.6569, "step": 2569 }, { "epoch": 0.2, "grad_norm": 1.2016262776645703, "learning_rate": 1.8532992002229755e-05, "loss": 0.6028, "step": 2570 }, { "epoch": 0.2, "grad_norm": 1.420376753324155, "learning_rate": 1.853168157192609e-05, "loss": 0.6932, "step": 2571 }, { "epoch": 0.2, "grad_norm": 1.1569254165454455, "learning_rate": 1.8530370602973356e-05, "loss": 0.5704, "step": 2572 }, { "epoch": 0.2, "grad_norm": 1.3356085924525085, "learning_rate": 1.852905909545431e-05, "loss": 0.6361, "step": 2573 }, { "epoch": 0.2, "grad_norm": 1.1938464580132024, "learning_rate": 1.8527747049451763e-05, "loss": 0.6223, "step": 2574 }, { "epoch": 0.2, "grad_norm": 1.2820819386766096, "learning_rate": 1.8526434465048544e-05, "loss": 0.6493, "step": 2575 }, { "epoch": 0.2, "grad_norm": 1.2741980592508404, "learning_rate": 1.852512134232753e-05, "loss": 0.6682, "step": 2576 }, { "epoch": 0.2, "grad_norm": 1.1854992123218688, "learning_rate": 1.8523807681371622e-05, "loss": 0.6324, "step": 2577 }, { "epoch": 0.2, "grad_norm": 1.340573081765185, "learning_rate": 1.8522493482263753e-05, "loss": 0.6448, "step": 2578 }, { "epoch": 0.2, "grad_norm": 1.214321993438132, "learning_rate": 1.8521178745086906e-05, "loss": 0.6302, "step": 2579 }, { "epoch": 0.2, "grad_norm": 1.1534556051903955, "learning_rate": 1.8519863469924078e-05, "loss": 0.5599, "step": 2580 }, { "epoch": 0.2, "grad_norm": 1.2511316422190768, "learning_rate": 1.8518547656858314e-05, "loss": 0.6031, "step": 2581 }, { "epoch": 0.2, "grad_norm": 1.309353963459886, "learning_rate": 1.8517231305972686e-05, "loss": 0.6538, "step": 2582 }, { "epoch": 0.2, "grad_norm": 1.1116357789807634, "learning_rate": 1.85159144173503e-05, "loss": 0.5839, "step": 2583 }, { "epoch": 0.2, "grad_norm": 1.3377808463173022, "learning_rate": 1.8514596991074305e-05, "loss": 0.6171, "step": 2584 }, { "epoch": 0.2, "grad_norm": 1.2714148530250609, "learning_rate": 1.8513279027227868e-05, "loss": 0.6274, "step": 2585 }, { "epoch": 0.2, "grad_norm": 1.1845584878229116, "learning_rate": 1.8511960525894207e-05, "loss": 0.5645, "step": 2586 }, { "epoch": 0.2, "grad_norm": 1.2666671135968541, "learning_rate": 1.851064148715656e-05, "loss": 0.6043, "step": 2587 }, { "epoch": 0.2, "grad_norm": 1.1955396149731525, "learning_rate": 1.8509321911098206e-05, "loss": 0.5603, "step": 2588 }, { "epoch": 0.2, "grad_norm": 1.3599628186051949, "learning_rate": 1.850800179780246e-05, "loss": 0.664, "step": 2589 }, { "epoch": 0.2, "grad_norm": 1.1808500309106609, "learning_rate": 1.8506681147352662e-05, "loss": 0.5671, "step": 2590 }, { "epoch": 0.2, "grad_norm": 1.1913907409659108, "learning_rate": 1.8505359959832198e-05, "loss": 0.6267, "step": 2591 }, { "epoch": 0.2, "grad_norm": 1.2191746289889973, "learning_rate": 1.8504038235324474e-05, "loss": 0.644, "step": 2592 }, { "epoch": 0.2, "grad_norm": 1.1284596969039795, "learning_rate": 1.8502715973912942e-05, "loss": 0.5789, "step": 2593 }, { "epoch": 0.2, "grad_norm": 1.272040367154707, "learning_rate": 1.8501393175681084e-05, "loss": 0.6216, "step": 2594 }, { "epoch": 0.2, "grad_norm": 1.2028242453648108, "learning_rate": 1.8500069840712412e-05, "loss": 0.5937, "step": 2595 }, { "epoch": 0.2, "grad_norm": 1.1447960348743038, "learning_rate": 1.849874596909048e-05, "loss": 0.5663, "step": 2596 }, { "epoch": 0.2, "grad_norm": 1.221488224093012, "learning_rate": 1.849742156089886e-05, "loss": 0.6107, "step": 2597 }, { "epoch": 0.2, "grad_norm": 1.3577475616764714, "learning_rate": 1.8496096616221182e-05, "loss": 0.65, "step": 2598 }, { "epoch": 0.2, "grad_norm": 1.2733652761373986, "learning_rate": 1.8494771135141086e-05, "loss": 0.5847, "step": 2599 }, { "epoch": 0.2, "grad_norm": 1.3544261341449815, "learning_rate": 1.8493445117742267e-05, "loss": 0.7011, "step": 2600 }, { "epoch": 0.2, "grad_norm": 1.2235235970750573, "learning_rate": 1.8492118564108433e-05, "loss": 0.6319, "step": 2601 }, { "epoch": 0.2, "grad_norm": 1.2630315039534614, "learning_rate": 1.849079147432334e-05, "loss": 0.6244, "step": 2602 }, { "epoch": 0.2, "grad_norm": 1.2065669280837659, "learning_rate": 1.8489463848470777e-05, "loss": 0.5747, "step": 2603 }, { "epoch": 0.2, "grad_norm": 1.2684779096220387, "learning_rate": 1.848813568663456e-05, "loss": 0.6744, "step": 2604 }, { "epoch": 0.2, "grad_norm": 1.2708911830471694, "learning_rate": 1.8486806988898545e-05, "loss": 0.6292, "step": 2605 }, { "epoch": 0.2, "grad_norm": 1.387470954728476, "learning_rate": 1.848547775534662e-05, "loss": 0.6168, "step": 2606 }, { "epoch": 0.2, "grad_norm": 1.21128894259088, "learning_rate": 1.8484147986062702e-05, "loss": 0.5725, "step": 2607 }, { "epoch": 0.2, "grad_norm": 1.234802606614738, "learning_rate": 1.8482817681130752e-05, "loss": 0.6094, "step": 2608 }, { "epoch": 0.2, "grad_norm": 1.2190169017705899, "learning_rate": 1.8481486840634753e-05, "loss": 0.6704, "step": 2609 }, { "epoch": 0.2, "grad_norm": 1.258088123143036, "learning_rate": 1.848015546465873e-05, "loss": 0.6575, "step": 2610 }, { "epoch": 0.2, "grad_norm": 1.215394756802738, "learning_rate": 1.8478823553286745e-05, "loss": 0.5818, "step": 2611 }, { "epoch": 0.2, "grad_norm": 1.2669526189391536, "learning_rate": 1.847749110660288e-05, "loss": 0.6307, "step": 2612 }, { "epoch": 0.2, "grad_norm": 1.091879143797114, "learning_rate": 1.847615812469127e-05, "loss": 0.571, "step": 2613 }, { "epoch": 0.2, "grad_norm": 1.1244845269013442, "learning_rate": 1.847482460763606e-05, "loss": 0.5366, "step": 2614 }, { "epoch": 0.2, "grad_norm": 1.1921238973507857, "learning_rate": 1.847349055552145e-05, "loss": 0.5979, "step": 2615 }, { "epoch": 0.2, "grad_norm": 1.2410866039559778, "learning_rate": 1.8472155968431664e-05, "loss": 0.6689, "step": 2616 }, { "epoch": 0.2, "grad_norm": 1.356489912690882, "learning_rate": 1.847082084645096e-05, "loss": 0.7003, "step": 2617 }, { "epoch": 0.2, "grad_norm": 1.2204231124157234, "learning_rate": 1.8469485189663635e-05, "loss": 0.6102, "step": 2618 }, { "epoch": 0.2, "grad_norm": 1.2619691015774441, "learning_rate": 1.846814899815401e-05, "loss": 0.6076, "step": 2619 }, { "epoch": 0.2, "grad_norm": 1.1410021746194294, "learning_rate": 1.846681227200645e-05, "loss": 0.6001, "step": 2620 }, { "epoch": 0.2, "grad_norm": 1.271208514921696, "learning_rate": 1.846547501130535e-05, "loss": 0.6767, "step": 2621 }, { "epoch": 0.2, "grad_norm": 1.0982250219744067, "learning_rate": 1.8464137216135133e-05, "loss": 0.5341, "step": 2622 }, { "epoch": 0.2, "grad_norm": 1.265605549603989, "learning_rate": 1.846279888658027e-05, "loss": 0.6375, "step": 2623 }, { "epoch": 0.2, "grad_norm": 1.2173059173294758, "learning_rate": 1.8461460022725247e-05, "loss": 0.6639, "step": 2624 }, { "epoch": 0.2, "grad_norm": 1.282403101392959, "learning_rate": 1.8460120624654595e-05, "loss": 0.5959, "step": 2625 }, { "epoch": 0.2, "grad_norm": 1.2232820901783792, "learning_rate": 1.8458780692452882e-05, "loss": 0.6204, "step": 2626 }, { "epoch": 0.2, "grad_norm": 1.2671190556152214, "learning_rate": 1.8457440226204702e-05, "loss": 0.6453, "step": 2627 }, { "epoch": 0.2, "grad_norm": 1.2586802457027126, "learning_rate": 1.8456099225994687e-05, "loss": 0.6056, "step": 2628 }, { "epoch": 0.2, "grad_norm": 1.2801579263984155, "learning_rate": 1.84547576919075e-05, "loss": 0.6207, "step": 2629 }, { "epoch": 0.2, "grad_norm": 1.2125485931569533, "learning_rate": 1.845341562402784e-05, "loss": 0.6263, "step": 2630 }, { "epoch": 0.2, "grad_norm": 1.2536828623449645, "learning_rate": 1.8452073022440436e-05, "loss": 0.6718, "step": 2631 }, { "epoch": 0.2, "grad_norm": 1.29967642021547, "learning_rate": 1.8450729887230053e-05, "loss": 0.6593, "step": 2632 }, { "epoch": 0.2, "grad_norm": 1.2974322856660658, "learning_rate": 1.8449386218481495e-05, "loss": 0.6644, "step": 2633 }, { "epoch": 0.2, "grad_norm": 1.3146686348945504, "learning_rate": 1.8448042016279592e-05, "loss": 0.6526, "step": 2634 }, { "epoch": 0.2, "grad_norm": 1.327386987620711, "learning_rate": 1.844669728070921e-05, "loss": 0.6544, "step": 2635 }, { "epoch": 0.2, "grad_norm": 1.12063802036055, "learning_rate": 1.844535201185525e-05, "loss": 0.5968, "step": 2636 }, { "epoch": 0.2, "grad_norm": 1.3718246294302483, "learning_rate": 1.8444006209802646e-05, "loss": 0.6414, "step": 2637 }, { "epoch": 0.2, "grad_norm": 1.0747232258264159, "learning_rate": 1.8442659874636365e-05, "loss": 0.5564, "step": 2638 }, { "epoch": 0.2, "grad_norm": 1.1955376705933867, "learning_rate": 1.8441313006441402e-05, "loss": 0.5549, "step": 2639 }, { "epoch": 0.2, "grad_norm": 1.2364534191022152, "learning_rate": 1.8439965605302803e-05, "loss": 0.5558, "step": 2640 }, { "epoch": 0.2, "grad_norm": 1.229463342111611, "learning_rate": 1.843861767130563e-05, "loss": 0.6207, "step": 2641 }, { "epoch": 0.2, "grad_norm": 1.301853276592066, "learning_rate": 1.8437269204534983e-05, "loss": 0.6566, "step": 2642 }, { "epoch": 0.21, "grad_norm": 1.0536674370328925, "learning_rate": 1.8435920205076003e-05, "loss": 0.5349, "step": 2643 }, { "epoch": 0.21, "grad_norm": 1.2264069379575435, "learning_rate": 1.843457067301386e-05, "loss": 0.6085, "step": 2644 }, { "epoch": 0.21, "grad_norm": 1.3700773813850702, "learning_rate": 1.8433220608433747e-05, "loss": 0.6298, "step": 2645 }, { "epoch": 0.21, "grad_norm": 1.3546180804108368, "learning_rate": 1.8431870011420908e-05, "loss": 0.6848, "step": 2646 }, { "epoch": 0.21, "grad_norm": 1.2270667624960336, "learning_rate": 1.8430518882060618e-05, "loss": 0.6283, "step": 2647 }, { "epoch": 0.21, "grad_norm": 1.2408093178608892, "learning_rate": 1.8429167220438172e-05, "loss": 0.6122, "step": 2648 }, { "epoch": 0.21, "grad_norm": 1.224188676086163, "learning_rate": 1.842781502663891e-05, "loss": 0.6537, "step": 2649 }, { "epoch": 0.21, "grad_norm": 1.2322450925141506, "learning_rate": 1.8426462300748206e-05, "loss": 0.6884, "step": 2650 }, { "epoch": 0.21, "grad_norm": 1.2752650901621032, "learning_rate": 1.842510904285146e-05, "loss": 0.6021, "step": 2651 }, { "epoch": 0.21, "grad_norm": 1.227564942687233, "learning_rate": 1.8423755253034113e-05, "loss": 0.6353, "step": 2652 }, { "epoch": 0.21, "grad_norm": 1.2917440093890669, "learning_rate": 1.8422400931381635e-05, "loss": 0.6622, "step": 2653 }, { "epoch": 0.21, "grad_norm": 1.3524081721131431, "learning_rate": 1.8421046077979535e-05, "loss": 0.6717, "step": 2654 }, { "epoch": 0.21, "grad_norm": 1.243483916531045, "learning_rate": 1.8419690692913346e-05, "loss": 0.6144, "step": 2655 }, { "epoch": 0.21, "grad_norm": 1.332726126730903, "learning_rate": 1.8418334776268643e-05, "loss": 0.6453, "step": 2656 }, { "epoch": 0.21, "grad_norm": 1.246272202870776, "learning_rate": 1.8416978328131036e-05, "loss": 0.6248, "step": 2657 }, { "epoch": 0.21, "grad_norm": 1.239141604579045, "learning_rate": 1.841562134858616e-05, "loss": 0.6275, "step": 2658 }, { "epoch": 0.21, "grad_norm": 1.2189441917449733, "learning_rate": 1.8414263837719688e-05, "loss": 0.5938, "step": 2659 }, { "epoch": 0.21, "grad_norm": 1.2622618554492198, "learning_rate": 1.8412905795617327e-05, "loss": 0.6258, "step": 2660 }, { "epoch": 0.21, "grad_norm": 1.2496346416584134, "learning_rate": 1.8411547222364824e-05, "loss": 0.6185, "step": 2661 }, { "epoch": 0.21, "grad_norm": 1.2873301569937172, "learning_rate": 1.8410188118047942e-05, "loss": 0.6237, "step": 2662 }, { "epoch": 0.21, "grad_norm": 1.1988220055586474, "learning_rate": 1.8408828482752495e-05, "loss": 0.6378, "step": 2663 }, { "epoch": 0.21, "grad_norm": 1.2618972132457877, "learning_rate": 1.8407468316564322e-05, "loss": 0.6317, "step": 2664 }, { "epoch": 0.21, "grad_norm": 1.140429022688034, "learning_rate": 1.84061076195693e-05, "loss": 0.6108, "step": 2665 }, { "epoch": 0.21, "grad_norm": 1.221287262581384, "learning_rate": 1.8404746391853328e-05, "loss": 0.6259, "step": 2666 }, { "epoch": 0.21, "grad_norm": 1.2791333276005448, "learning_rate": 1.840338463350236e-05, "loss": 0.6019, "step": 2667 }, { "epoch": 0.21, "grad_norm": 1.307468078179842, "learning_rate": 1.840202234460236e-05, "loss": 0.6018, "step": 2668 }, { "epoch": 0.21, "grad_norm": 1.222592483173903, "learning_rate": 1.8400659525239343e-05, "loss": 0.6686, "step": 2669 }, { "epoch": 0.21, "grad_norm": 1.2743021364134406, "learning_rate": 1.8399296175499346e-05, "loss": 0.6374, "step": 2670 }, { "epoch": 0.21, "grad_norm": 1.0510973827060548, "learning_rate": 1.8397932295468446e-05, "loss": 0.5676, "step": 2671 }, { "epoch": 0.21, "grad_norm": 1.2944416979777424, "learning_rate": 1.8396567885232753e-05, "loss": 0.6113, "step": 2672 }, { "epoch": 0.21, "grad_norm": 1.1775259969153802, "learning_rate": 1.839520294487841e-05, "loss": 0.6515, "step": 2673 }, { "epoch": 0.21, "grad_norm": 1.3056745621480876, "learning_rate": 1.8393837474491595e-05, "loss": 0.6288, "step": 2674 }, { "epoch": 0.21, "grad_norm": 1.243677505220659, "learning_rate": 1.839247147415851e-05, "loss": 0.6424, "step": 2675 }, { "epoch": 0.21, "grad_norm": 1.3332101298074706, "learning_rate": 1.83911049439654e-05, "loss": 0.7071, "step": 2676 }, { "epoch": 0.21, "grad_norm": 1.1380195646766325, "learning_rate": 1.8389737883998545e-05, "loss": 0.6372, "step": 2677 }, { "epoch": 0.21, "grad_norm": 1.2390205751424068, "learning_rate": 1.838837029434425e-05, "loss": 0.6337, "step": 2678 }, { "epoch": 0.21, "grad_norm": 1.4461415155684236, "learning_rate": 1.8387002175088863e-05, "loss": 0.6239, "step": 2679 }, { "epoch": 0.21, "grad_norm": 1.2419222662521607, "learning_rate": 1.8385633526318758e-05, "loss": 0.624, "step": 2680 }, { "epoch": 0.21, "grad_norm": 1.1862414868836182, "learning_rate": 1.838426434812034e-05, "loss": 0.6165, "step": 2681 }, { "epoch": 0.21, "grad_norm": 1.2945934586895842, "learning_rate": 1.838289464058006e-05, "loss": 0.6867, "step": 2682 }, { "epoch": 0.21, "grad_norm": 1.2284431375487235, "learning_rate": 1.838152440378439e-05, "loss": 0.6384, "step": 2683 }, { "epoch": 0.21, "grad_norm": 1.261263502443776, "learning_rate": 1.8380153637819843e-05, "loss": 0.6266, "step": 2684 }, { "epoch": 0.21, "grad_norm": 1.1933221639522478, "learning_rate": 1.8378782342772958e-05, "loss": 0.5394, "step": 2685 }, { "epoch": 0.21, "grad_norm": 1.1678693339926849, "learning_rate": 1.8377410518730318e-05, "loss": 0.5672, "step": 2686 }, { "epoch": 0.21, "grad_norm": 1.2024879255104957, "learning_rate": 1.8376038165778526e-05, "loss": 0.6657, "step": 2687 }, { "epoch": 0.21, "grad_norm": 1.1051933565602114, "learning_rate": 1.8374665284004235e-05, "loss": 0.5627, "step": 2688 }, { "epoch": 0.21, "grad_norm": 1.2497066153504524, "learning_rate": 1.8373291873494112e-05, "loss": 0.6754, "step": 2689 }, { "epoch": 0.21, "grad_norm": 1.2209087717402778, "learning_rate": 1.8371917934334877e-05, "loss": 0.6259, "step": 2690 }, { "epoch": 0.21, "grad_norm": 1.2217406235583503, "learning_rate": 1.8370543466613267e-05, "loss": 0.5533, "step": 2691 }, { "epoch": 0.21, "grad_norm": 1.0858125683391366, "learning_rate": 1.8369168470416064e-05, "loss": 0.5388, "step": 2692 }, { "epoch": 0.21, "grad_norm": 1.2870784865798823, "learning_rate": 1.8367792945830073e-05, "loss": 0.7085, "step": 2693 }, { "epoch": 0.21, "grad_norm": 1.1734353422305082, "learning_rate": 1.836641689294214e-05, "loss": 0.6266, "step": 2694 }, { "epoch": 0.21, "grad_norm": 1.3411617191694807, "learning_rate": 1.8365040311839148e-05, "loss": 0.618, "step": 2695 }, { "epoch": 0.21, "grad_norm": 1.3621934633415456, "learning_rate": 1.8363663202608e-05, "loss": 0.6862, "step": 2696 }, { "epoch": 0.21, "grad_norm": 1.2500355238635081, "learning_rate": 1.8362285565335645e-05, "loss": 0.618, "step": 2697 }, { "epoch": 0.21, "grad_norm": 1.3374980712591507, "learning_rate": 1.836090740010906e-05, "loss": 0.6414, "step": 2698 }, { "epoch": 0.21, "grad_norm": 1.3028346170408762, "learning_rate": 1.835952870701525e-05, "loss": 0.6513, "step": 2699 }, { "epoch": 0.21, "grad_norm": 1.2482191274390388, "learning_rate": 1.8358149486141265e-05, "loss": 0.6213, "step": 2700 }, { "epoch": 0.21, "grad_norm": 1.2262618552907405, "learning_rate": 1.835676973757418e-05, "loss": 0.5418, "step": 2701 }, { "epoch": 0.21, "grad_norm": 1.1754886098266861, "learning_rate": 1.835538946140111e-05, "loss": 0.5945, "step": 2702 }, { "epoch": 0.21, "grad_norm": 1.2011351541007982, "learning_rate": 1.835400865770919e-05, "loss": 0.6356, "step": 2703 }, { "epoch": 0.21, "grad_norm": 1.258339623357618, "learning_rate": 1.8352627326585606e-05, "loss": 0.6318, "step": 2704 }, { "epoch": 0.21, "grad_norm": 1.2204543203473468, "learning_rate": 1.8351245468117564e-05, "loss": 0.6116, "step": 2705 }, { "epoch": 0.21, "grad_norm": 1.1386343424714571, "learning_rate": 1.834986308239231e-05, "loss": 0.686, "step": 2706 }, { "epoch": 0.21, "grad_norm": 1.1711072822995203, "learning_rate": 1.8348480169497114e-05, "loss": 0.573, "step": 2707 }, { "epoch": 0.21, "grad_norm": 1.1923948595092952, "learning_rate": 1.8347096729519298e-05, "loss": 0.5769, "step": 2708 }, { "epoch": 0.21, "grad_norm": 1.2338145951634476, "learning_rate": 1.83457127625462e-05, "loss": 0.5593, "step": 2709 }, { "epoch": 0.21, "grad_norm": 1.3051236731291675, "learning_rate": 1.8344328268665193e-05, "loss": 0.6239, "step": 2710 }, { "epoch": 0.21, "grad_norm": 1.3311047506118519, "learning_rate": 1.83429432479637e-05, "loss": 0.632, "step": 2711 }, { "epoch": 0.21, "grad_norm": 1.278516178847769, "learning_rate": 1.834155770052915e-05, "loss": 0.5806, "step": 2712 }, { "epoch": 0.21, "grad_norm": 1.323828580647444, "learning_rate": 1.8340171626449025e-05, "loss": 0.5996, "step": 2713 }, { "epoch": 0.21, "grad_norm": 1.1035350696572261, "learning_rate": 1.8338785025810836e-05, "loss": 0.5794, "step": 2714 }, { "epoch": 0.21, "grad_norm": 1.3069138405207896, "learning_rate": 1.8337397898702127e-05, "loss": 0.6397, "step": 2715 }, { "epoch": 0.21, "grad_norm": 1.2692549184689246, "learning_rate": 1.833601024521047e-05, "loss": 0.5903, "step": 2716 }, { "epoch": 0.21, "grad_norm": 1.2384900900729578, "learning_rate": 1.8334622065423482e-05, "loss": 0.6285, "step": 2717 }, { "epoch": 0.21, "grad_norm": 1.11634362634413, "learning_rate": 1.8333233359428798e-05, "loss": 0.6394, "step": 2718 }, { "epoch": 0.21, "grad_norm": 1.2987121530255736, "learning_rate": 1.8331844127314104e-05, "loss": 0.6591, "step": 2719 }, { "epoch": 0.21, "grad_norm": 1.1814799554216615, "learning_rate": 1.83304543691671e-05, "loss": 0.588, "step": 2720 }, { "epoch": 0.21, "grad_norm": 1.3095366767358483, "learning_rate": 1.832906408507553e-05, "loss": 0.6206, "step": 2721 }, { "epoch": 0.21, "grad_norm": 1.314702864752759, "learning_rate": 1.8327673275127177e-05, "loss": 0.669, "step": 2722 }, { "epoch": 0.21, "grad_norm": 1.191442370258049, "learning_rate": 1.832628193940984e-05, "loss": 0.6409, "step": 2723 }, { "epoch": 0.21, "grad_norm": 1.3098278091190558, "learning_rate": 1.832489007801137e-05, "loss": 0.6143, "step": 2724 }, { "epoch": 0.21, "grad_norm": 1.1939614832622507, "learning_rate": 1.832349769101964e-05, "loss": 0.5463, "step": 2725 }, { "epoch": 0.21, "grad_norm": 1.284174071325053, "learning_rate": 1.832210477852255e-05, "loss": 0.6731, "step": 2726 }, { "epoch": 0.21, "grad_norm": 1.1573902384579537, "learning_rate": 1.8320711340608058e-05, "loss": 0.6316, "step": 2727 }, { "epoch": 0.21, "grad_norm": 1.210314633549534, "learning_rate": 1.8319317377364124e-05, "loss": 0.6184, "step": 2728 }, { "epoch": 0.21, "grad_norm": 1.216356519247249, "learning_rate": 1.8317922888878763e-05, "loss": 0.6378, "step": 2729 }, { "epoch": 0.21, "grad_norm": 1.2886793549763733, "learning_rate": 1.8316527875240016e-05, "loss": 0.5714, "step": 2730 }, { "epoch": 0.21, "grad_norm": 1.1853238293453052, "learning_rate": 1.8315132336535956e-05, "loss": 0.5943, "step": 2731 }, { "epoch": 0.21, "grad_norm": 1.4009815283516742, "learning_rate": 1.8313736272854692e-05, "loss": 0.5872, "step": 2732 }, { "epoch": 0.21, "grad_norm": 1.3185053991757636, "learning_rate": 1.8312339684284363e-05, "loss": 0.7169, "step": 2733 }, { "epoch": 0.21, "grad_norm": 1.1847111931889096, "learning_rate": 1.831094257091314e-05, "loss": 0.6049, "step": 2734 }, { "epoch": 0.21, "grad_norm": 1.2837781851403587, "learning_rate": 1.8309544932829238e-05, "loss": 0.6198, "step": 2735 }, { "epoch": 0.21, "grad_norm": 1.2952410964703984, "learning_rate": 1.8308146770120893e-05, "loss": 0.6702, "step": 2736 }, { "epoch": 0.21, "grad_norm": 1.3519706771227036, "learning_rate": 1.8306748082876377e-05, "loss": 0.6492, "step": 2737 }, { "epoch": 0.21, "grad_norm": 1.2180031664514333, "learning_rate": 1.8305348871183998e-05, "loss": 0.6082, "step": 2738 }, { "epoch": 0.21, "grad_norm": 1.2314869376762534, "learning_rate": 1.8303949135132096e-05, "loss": 0.6197, "step": 2739 }, { "epoch": 0.21, "grad_norm": 1.2433844982968294, "learning_rate": 1.8302548874809042e-05, "loss": 0.6288, "step": 2740 }, { "epoch": 0.21, "grad_norm": 1.209795357841027, "learning_rate": 1.8301148090303238e-05, "loss": 0.625, "step": 2741 }, { "epoch": 0.21, "grad_norm": 1.0811275192303724, "learning_rate": 1.829974678170313e-05, "loss": 0.5735, "step": 2742 }, { "epoch": 0.21, "grad_norm": 1.1913268516224023, "learning_rate": 1.8298344949097186e-05, "loss": 0.5761, "step": 2743 }, { "epoch": 0.21, "grad_norm": 1.2868632195532392, "learning_rate": 1.8296942592573912e-05, "loss": 0.5971, "step": 2744 }, { "epoch": 0.21, "grad_norm": 1.2610298378187574, "learning_rate": 1.8295539712221844e-05, "loss": 0.5866, "step": 2745 }, { "epoch": 0.21, "grad_norm": 1.14555682689655, "learning_rate": 1.8294136308129556e-05, "loss": 0.6317, "step": 2746 }, { "epoch": 0.21, "grad_norm": 1.1480329151219886, "learning_rate": 1.829273238038565e-05, "loss": 0.6175, "step": 2747 }, { "epoch": 0.21, "grad_norm": 1.3364857223610094, "learning_rate": 1.8291327929078764e-05, "loss": 0.6465, "step": 2748 }, { "epoch": 0.21, "grad_norm": 1.218675855679861, "learning_rate": 1.828992295429757e-05, "loss": 0.5915, "step": 2749 }, { "epoch": 0.21, "grad_norm": 1.069779256261487, "learning_rate": 1.828851745613077e-05, "loss": 0.5412, "step": 2750 }, { "epoch": 0.21, "grad_norm": 1.2703041892817974, "learning_rate": 1.8287111434667096e-05, "loss": 0.598, "step": 2751 }, { "epoch": 0.21, "grad_norm": 1.3296854613094637, "learning_rate": 1.828570488999532e-05, "loss": 0.6106, "step": 2752 }, { "epoch": 0.21, "grad_norm": 1.2935112682245427, "learning_rate": 1.8284297822204252e-05, "loss": 0.6521, "step": 2753 }, { "epoch": 0.21, "grad_norm": 1.323029876230364, "learning_rate": 1.8282890231382716e-05, "loss": 0.6468, "step": 2754 }, { "epoch": 0.21, "grad_norm": 1.2037016242300043, "learning_rate": 1.828148211761959e-05, "loss": 0.6173, "step": 2755 }, { "epoch": 0.21, "grad_norm": 1.2792445514258015, "learning_rate": 1.828007348100377e-05, "loss": 0.6499, "step": 2756 }, { "epoch": 0.21, "grad_norm": 1.3389547084302056, "learning_rate": 1.827866432162419e-05, "loss": 0.6689, "step": 2757 }, { "epoch": 0.21, "grad_norm": 1.187141063050096, "learning_rate": 1.8277254639569817e-05, "loss": 0.6608, "step": 2758 }, { "epoch": 0.21, "grad_norm": 1.3522860844756028, "learning_rate": 1.827584443492966e-05, "loss": 0.5747, "step": 2759 }, { "epoch": 0.21, "grad_norm": 1.2429242138703271, "learning_rate": 1.827443370779274e-05, "loss": 0.669, "step": 2760 }, { "epoch": 0.21, "grad_norm": 1.1528596919165448, "learning_rate": 1.827302245824813e-05, "loss": 0.5587, "step": 2761 }, { "epoch": 0.21, "grad_norm": 1.2648710194900268, "learning_rate": 1.827161068638493e-05, "loss": 0.6612, "step": 2762 }, { "epoch": 0.21, "grad_norm": 1.3514441289498784, "learning_rate": 1.8270198392292276e-05, "loss": 0.7449, "step": 2763 }, { "epoch": 0.21, "grad_norm": 1.1608384820495767, "learning_rate": 1.8268785576059324e-05, "loss": 0.5977, "step": 2764 }, { "epoch": 0.21, "grad_norm": 1.3438503760174543, "learning_rate": 1.8267372237775278e-05, "loss": 0.6776, "step": 2765 }, { "epoch": 0.21, "grad_norm": 1.1346874111619902, "learning_rate": 1.826595837752937e-05, "loss": 0.613, "step": 2766 }, { "epoch": 0.21, "grad_norm": 1.2276459786188239, "learning_rate": 1.826454399541086e-05, "loss": 0.6094, "step": 2767 }, { "epoch": 0.21, "grad_norm": 1.1437264695378795, "learning_rate": 1.826312909150905e-05, "loss": 0.5423, "step": 2768 }, { "epoch": 0.21, "grad_norm": 1.1743204667939153, "learning_rate": 1.8261713665913268e-05, "loss": 0.6243, "step": 2769 }, { "epoch": 0.21, "grad_norm": 1.3523323644801177, "learning_rate": 1.8260297718712877e-05, "loss": 0.6125, "step": 2770 }, { "epoch": 0.21, "grad_norm": 1.23234212046008, "learning_rate": 1.8258881249997275e-05, "loss": 0.6303, "step": 2771 }, { "epoch": 0.22, "grad_norm": 1.2842448983116175, "learning_rate": 1.825746425985589e-05, "loss": 0.6576, "step": 2772 }, { "epoch": 0.22, "grad_norm": 1.4215279459532615, "learning_rate": 1.825604674837818e-05, "loss": 0.658, "step": 2773 }, { "epoch": 0.22, "grad_norm": 1.340826047036285, "learning_rate": 1.8254628715653647e-05, "loss": 0.6525, "step": 2774 }, { "epoch": 0.22, "grad_norm": 1.1641832839464328, "learning_rate": 1.825321016177181e-05, "loss": 0.6072, "step": 2775 }, { "epoch": 0.22, "grad_norm": 1.2655579820008127, "learning_rate": 1.825179108682224e-05, "loss": 0.663, "step": 2776 }, { "epoch": 0.22, "grad_norm": 1.2396023318760847, "learning_rate": 1.8250371490894524e-05, "loss": 0.6118, "step": 2777 }, { "epoch": 0.22, "grad_norm": 1.0523134426858805, "learning_rate": 1.8248951374078288e-05, "loss": 0.577, "step": 2778 }, { "epoch": 0.22, "grad_norm": 1.1822575243530766, "learning_rate": 1.8247530736463194e-05, "loss": 0.6171, "step": 2779 }, { "epoch": 0.22, "grad_norm": 1.3799827199351595, "learning_rate": 1.8246109578138932e-05, "loss": 0.6211, "step": 2780 }, { "epoch": 0.22, "grad_norm": 1.223821407802544, "learning_rate": 1.8244687899195224e-05, "loss": 0.651, "step": 2781 }, { "epoch": 0.22, "grad_norm": 1.1692815948648125, "learning_rate": 1.824326569972184e-05, "loss": 0.5911, "step": 2782 }, { "epoch": 0.22, "grad_norm": 1.267942778767351, "learning_rate": 1.8241842979808554e-05, "loss": 0.6236, "step": 2783 }, { "epoch": 0.22, "grad_norm": 1.221052537882822, "learning_rate": 1.8240419739545206e-05, "loss": 0.578, "step": 2784 }, { "epoch": 0.22, "grad_norm": 1.2035246841599823, "learning_rate": 1.8238995979021642e-05, "loss": 0.5699, "step": 2785 }, { "epoch": 0.22, "grad_norm": 1.2816396911487473, "learning_rate": 1.8237571698327752e-05, "loss": 0.5972, "step": 2786 }, { "epoch": 0.22, "grad_norm": 1.2643952222970967, "learning_rate": 1.8236146897553464e-05, "loss": 0.6583, "step": 2787 }, { "epoch": 0.22, "grad_norm": 1.3673255632716947, "learning_rate": 1.8234721576788726e-05, "loss": 0.6039, "step": 2788 }, { "epoch": 0.22, "grad_norm": 1.2348573442454402, "learning_rate": 1.823329573612353e-05, "loss": 0.6543, "step": 2789 }, { "epoch": 0.22, "grad_norm": 1.1745171325817638, "learning_rate": 1.8231869375647897e-05, "loss": 0.565, "step": 2790 }, { "epoch": 0.22, "grad_norm": 1.2424715784917095, "learning_rate": 1.823044249545188e-05, "loss": 0.6605, "step": 2791 }, { "epoch": 0.22, "grad_norm": 1.2302056409702453, "learning_rate": 1.8229015095625567e-05, "loss": 0.6465, "step": 2792 }, { "epoch": 0.22, "grad_norm": 1.252995478150778, "learning_rate": 1.822758717625907e-05, "loss": 0.6635, "step": 2793 }, { "epoch": 0.22, "grad_norm": 1.2159450237244342, "learning_rate": 1.8226158737442547e-05, "loss": 0.6204, "step": 2794 }, { "epoch": 0.22, "grad_norm": 1.1720020988204702, "learning_rate": 1.8224729779266183e-05, "loss": 0.5526, "step": 2795 }, { "epoch": 0.22, "grad_norm": 1.1946464627716764, "learning_rate": 1.8223300301820193e-05, "loss": 0.6274, "step": 2796 }, { "epoch": 0.22, "grad_norm": 1.1868871311975935, "learning_rate": 1.8221870305194828e-05, "loss": 0.5913, "step": 2797 }, { "epoch": 0.22, "grad_norm": 1.3490003434443054, "learning_rate": 1.8220439789480375e-05, "loss": 0.6588, "step": 2798 }, { "epoch": 0.22, "grad_norm": 1.2190805622670569, "learning_rate": 1.8219008754767144e-05, "loss": 0.6279, "step": 2799 }, { "epoch": 0.22, "grad_norm": 1.3299475786447579, "learning_rate": 1.8217577201145483e-05, "loss": 0.6658, "step": 2800 }, { "epoch": 0.22, "grad_norm": 1.1969145467016507, "learning_rate": 1.821614512870578e-05, "loss": 0.6126, "step": 2801 }, { "epoch": 0.22, "grad_norm": 1.1769735684599172, "learning_rate": 1.821471253753844e-05, "loss": 0.6276, "step": 2802 }, { "epoch": 0.22, "grad_norm": 1.4148715744779188, "learning_rate": 1.821327942773392e-05, "loss": 0.6855, "step": 2803 }, { "epoch": 0.22, "grad_norm": 1.3296521999097788, "learning_rate": 1.8211845799382693e-05, "loss": 0.6663, "step": 2804 }, { "epoch": 0.22, "grad_norm": 1.2144700239054251, "learning_rate": 1.8210411652575275e-05, "loss": 0.6591, "step": 2805 }, { "epoch": 0.22, "grad_norm": 1.3841934205573372, "learning_rate": 1.8208976987402206e-05, "loss": 0.6926, "step": 2806 }, { "epoch": 0.22, "grad_norm": 1.2289015219973962, "learning_rate": 1.8207541803954068e-05, "loss": 0.6341, "step": 2807 }, { "epoch": 0.22, "grad_norm": 1.2868690555800195, "learning_rate": 1.8206106102321473e-05, "loss": 0.5873, "step": 2808 }, { "epoch": 0.22, "grad_norm": 1.2454855460021577, "learning_rate": 1.820466988259506e-05, "loss": 0.6953, "step": 2809 }, { "epoch": 0.22, "grad_norm": 1.0982308292361256, "learning_rate": 1.820323314486551e-05, "loss": 0.5464, "step": 2810 }, { "epoch": 0.22, "grad_norm": 1.1006471875806687, "learning_rate": 1.8201795889223525e-05, "loss": 0.5914, "step": 2811 }, { "epoch": 0.22, "grad_norm": 1.2006455671781824, "learning_rate": 1.8200358115759847e-05, "loss": 0.5893, "step": 2812 }, { "epoch": 0.22, "grad_norm": 1.3924243301706416, "learning_rate": 1.8198919824565258e-05, "loss": 0.6627, "step": 2813 }, { "epoch": 0.22, "grad_norm": 1.4486124667564348, "learning_rate": 1.8197481015730555e-05, "loss": 0.6678, "step": 2814 }, { "epoch": 0.22, "grad_norm": 1.2937493402599183, "learning_rate": 1.8196041689346585e-05, "loss": 0.6645, "step": 2815 }, { "epoch": 0.22, "grad_norm": 1.3723180363975969, "learning_rate": 1.819460184550422e-05, "loss": 0.6615, "step": 2816 }, { "epoch": 0.22, "grad_norm": 1.2960667676916562, "learning_rate": 1.8193161484294358e-05, "loss": 0.6251, "step": 2817 }, { "epoch": 0.22, "grad_norm": 1.2350914240214055, "learning_rate": 1.8191720605807937e-05, "loss": 0.621, "step": 2818 }, { "epoch": 0.22, "grad_norm": 1.1791714177299912, "learning_rate": 1.819027921013594e-05, "loss": 0.5851, "step": 2819 }, { "epoch": 0.22, "grad_norm": 1.4171525271641732, "learning_rate": 1.8188837297369353e-05, "loss": 0.6497, "step": 2820 }, { "epoch": 0.22, "grad_norm": 1.250284305665552, "learning_rate": 1.818739486759922e-05, "loss": 0.6881, "step": 2821 }, { "epoch": 0.22, "grad_norm": 1.3425361118396884, "learning_rate": 1.8185951920916607e-05, "loss": 0.655, "step": 2822 }, { "epoch": 0.22, "grad_norm": 1.2578889337856087, "learning_rate": 1.8184508457412615e-05, "loss": 0.6892, "step": 2823 }, { "epoch": 0.22, "grad_norm": 1.2068249177344736, "learning_rate": 1.818306447717838e-05, "loss": 0.6273, "step": 2824 }, { "epoch": 0.22, "grad_norm": 1.2200117182559702, "learning_rate": 1.8181619980305065e-05, "loss": 0.5969, "step": 2825 }, { "epoch": 0.22, "grad_norm": 1.2984507076909155, "learning_rate": 1.818017496688387e-05, "loss": 0.5938, "step": 2826 }, { "epoch": 0.22, "grad_norm": 1.2019157277396073, "learning_rate": 1.817872943700602e-05, "loss": 0.6362, "step": 2827 }, { "epoch": 0.22, "grad_norm": 1.2322419967809393, "learning_rate": 1.817728339076279e-05, "loss": 0.6147, "step": 2828 }, { "epoch": 0.22, "grad_norm": 1.356758493475954, "learning_rate": 1.8175836828245467e-05, "loss": 0.6504, "step": 2829 }, { "epoch": 0.22, "grad_norm": 1.2897517817870001, "learning_rate": 1.8174389749545384e-05, "loss": 0.7105, "step": 2830 }, { "epoch": 0.22, "grad_norm": 1.2157061297255327, "learning_rate": 1.81729421547539e-05, "loss": 0.6304, "step": 2831 }, { "epoch": 0.22, "grad_norm": 1.2331171024493035, "learning_rate": 1.8171494043962413e-05, "loss": 0.6363, "step": 2832 }, { "epoch": 0.22, "grad_norm": 1.2129381941406479, "learning_rate": 1.817004541726235e-05, "loss": 0.6248, "step": 2833 }, { "epoch": 0.22, "grad_norm": 1.3303364462262077, "learning_rate": 1.8168596274745165e-05, "loss": 0.6512, "step": 2834 }, { "epoch": 0.22, "grad_norm": 1.1810256240319497, "learning_rate": 1.8167146616502353e-05, "loss": 0.6057, "step": 2835 }, { "epoch": 0.22, "grad_norm": 1.2015725560753407, "learning_rate": 1.816569644262544e-05, "loss": 0.6193, "step": 2836 }, { "epoch": 0.22, "grad_norm": 1.1208913268126142, "learning_rate": 1.8164245753205977e-05, "loss": 0.5856, "step": 2837 }, { "epoch": 0.22, "grad_norm": 1.1176206909308237, "learning_rate": 1.816279454833556e-05, "loss": 0.5683, "step": 2838 }, { "epoch": 0.22, "grad_norm": 1.3175210731746485, "learning_rate": 1.8161342828105806e-05, "loss": 0.6368, "step": 2839 }, { "epoch": 0.22, "grad_norm": 1.1667704479288525, "learning_rate": 1.8159890592608378e-05, "loss": 0.5324, "step": 2840 }, { "epoch": 0.22, "grad_norm": 1.0988289212991194, "learning_rate": 1.8158437841934954e-05, "loss": 0.5394, "step": 2841 }, { "epoch": 0.22, "grad_norm": 1.2842564549000774, "learning_rate": 1.8156984576177258e-05, "loss": 0.6238, "step": 2842 }, { "epoch": 0.22, "grad_norm": 1.2009700450512546, "learning_rate": 1.815553079542704e-05, "loss": 0.6667, "step": 2843 }, { "epoch": 0.22, "grad_norm": 1.341760404443746, "learning_rate": 1.8154076499776087e-05, "loss": 0.6925, "step": 2844 }, { "epoch": 0.22, "grad_norm": 1.303661834134969, "learning_rate": 1.8152621689316216e-05, "loss": 0.6543, "step": 2845 }, { "epoch": 0.22, "grad_norm": 1.1934411353340328, "learning_rate": 1.815116636413927e-05, "loss": 0.5741, "step": 2846 }, { "epoch": 0.22, "grad_norm": 1.2367567913207473, "learning_rate": 1.8149710524337143e-05, "loss": 0.5939, "step": 2847 }, { "epoch": 0.22, "grad_norm": 1.3560427459010786, "learning_rate": 1.8148254170001743e-05, "loss": 0.6495, "step": 2848 }, { "epoch": 0.22, "grad_norm": 1.263134377740612, "learning_rate": 1.8146797301225017e-05, "loss": 0.6687, "step": 2849 }, { "epoch": 0.22, "grad_norm": 1.173790701017271, "learning_rate": 1.8145339918098944e-05, "loss": 0.6114, "step": 2850 }, { "epoch": 0.22, "grad_norm": 1.1777002636475393, "learning_rate": 1.8143882020715537e-05, "loss": 0.5727, "step": 2851 }, { "epoch": 0.22, "grad_norm": 1.1990217115331079, "learning_rate": 1.8142423609166845e-05, "loss": 0.6531, "step": 2852 }, { "epoch": 0.22, "grad_norm": 1.2039315628866218, "learning_rate": 1.8140964683544936e-05, "loss": 0.626, "step": 2853 }, { "epoch": 0.22, "grad_norm": 1.2090947558373764, "learning_rate": 1.813950524394193e-05, "loss": 0.613, "step": 2854 }, { "epoch": 0.22, "grad_norm": 1.0763227907347506, "learning_rate": 1.813804529044996e-05, "loss": 0.5894, "step": 2855 }, { "epoch": 0.22, "grad_norm": 1.103214621212828, "learning_rate": 1.8136584823161205e-05, "loss": 0.5624, "step": 2856 }, { "epoch": 0.22, "grad_norm": 1.279610398872814, "learning_rate": 1.8135123842167873e-05, "loss": 0.5917, "step": 2857 }, { "epoch": 0.22, "grad_norm": 1.1060580243278069, "learning_rate": 1.8133662347562197e-05, "loss": 0.5864, "step": 2858 }, { "epoch": 0.22, "grad_norm": 1.159203391728596, "learning_rate": 1.8132200339436455e-05, "loss": 0.5723, "step": 2859 }, { "epoch": 0.22, "grad_norm": 1.2723583961795326, "learning_rate": 1.813073781788295e-05, "loss": 0.6118, "step": 2860 }, { "epoch": 0.22, "grad_norm": 1.2273011140077157, "learning_rate": 1.8129274782994016e-05, "loss": 0.6806, "step": 2861 }, { "epoch": 0.22, "grad_norm": 1.2921420217350321, "learning_rate": 1.8127811234862026e-05, "loss": 0.6283, "step": 2862 }, { "epoch": 0.22, "grad_norm": 1.2323564369977353, "learning_rate": 1.8126347173579373e-05, "loss": 0.6112, "step": 2863 }, { "epoch": 0.22, "grad_norm": 1.207304938931562, "learning_rate": 1.8124882599238504e-05, "loss": 0.6128, "step": 2864 }, { "epoch": 0.22, "grad_norm": 1.1578066373977807, "learning_rate": 1.812341751193187e-05, "loss": 0.6047, "step": 2865 }, { "epoch": 0.22, "grad_norm": 1.4005586956898046, "learning_rate": 1.812195191175198e-05, "loss": 0.6594, "step": 2866 }, { "epoch": 0.22, "grad_norm": 1.228123540125166, "learning_rate": 1.8120485798791364e-05, "loss": 0.6249, "step": 2867 }, { "epoch": 0.22, "grad_norm": 1.1568415391597346, "learning_rate": 1.8119019173142583e-05, "loss": 0.6069, "step": 2868 }, { "epoch": 0.22, "grad_norm": 1.1911612837300505, "learning_rate": 1.811755203489823e-05, "loss": 0.6288, "step": 2869 }, { "epoch": 0.22, "grad_norm": 1.343495943759691, "learning_rate": 1.811608438415094e-05, "loss": 0.6332, "step": 2870 }, { "epoch": 0.22, "grad_norm": 1.2407355309741295, "learning_rate": 1.8114616220993367e-05, "loss": 0.6265, "step": 2871 }, { "epoch": 0.22, "grad_norm": 1.2773169185141842, "learning_rate": 1.8113147545518207e-05, "loss": 0.626, "step": 2872 }, { "epoch": 0.22, "grad_norm": 1.252796050011533, "learning_rate": 1.8111678357818184e-05, "loss": 0.6127, "step": 2873 }, { "epoch": 0.22, "grad_norm": 1.19824671414118, "learning_rate": 1.8110208657986056e-05, "loss": 0.6131, "step": 2874 }, { "epoch": 0.22, "grad_norm": 1.1883097949289403, "learning_rate": 1.810873844611461e-05, "loss": 0.5985, "step": 2875 }, { "epoch": 0.22, "grad_norm": 1.2814209521617395, "learning_rate": 1.810726772229667e-05, "loss": 0.6164, "step": 2876 }, { "epoch": 0.22, "grad_norm": 1.2923911841681872, "learning_rate": 1.8105796486625095e-05, "loss": 0.6373, "step": 2877 }, { "epoch": 0.22, "grad_norm": 1.2575262940617395, "learning_rate": 1.8104324739192766e-05, "loss": 0.6791, "step": 2878 }, { "epoch": 0.22, "grad_norm": 1.3457072884947838, "learning_rate": 1.81028524800926e-05, "loss": 0.6028, "step": 2879 }, { "epoch": 0.22, "grad_norm": 1.2840021859266266, "learning_rate": 1.8101379709417556e-05, "loss": 0.6638, "step": 2880 }, { "epoch": 0.22, "grad_norm": 1.2326226169186743, "learning_rate": 1.809990642726061e-05, "loss": 0.6518, "step": 2881 }, { "epoch": 0.22, "grad_norm": 1.1997694807368846, "learning_rate": 1.809843263371478e-05, "loss": 0.5913, "step": 2882 }, { "epoch": 0.22, "grad_norm": 1.2629495764488314, "learning_rate": 1.809695832887312e-05, "loss": 0.6485, "step": 2883 }, { "epoch": 0.22, "grad_norm": 1.099901448516766, "learning_rate": 1.8095483512828705e-05, "loss": 0.6097, "step": 2884 }, { "epoch": 0.22, "grad_norm": 1.231177328840679, "learning_rate": 1.809400818567465e-05, "loss": 0.6192, "step": 2885 }, { "epoch": 0.22, "grad_norm": 1.3685103637782334, "learning_rate": 1.809253234750409e-05, "loss": 0.641, "step": 2886 }, { "epoch": 0.22, "grad_norm": 1.1997628235875848, "learning_rate": 1.809105599841022e-05, "loss": 0.6401, "step": 2887 }, { "epoch": 0.22, "grad_norm": 1.0968343026199854, "learning_rate": 1.8089579138486234e-05, "loss": 0.5575, "step": 2888 }, { "epoch": 0.22, "grad_norm": 1.2138150879903193, "learning_rate": 1.8088101767825385e-05, "loss": 0.6557, "step": 2889 }, { "epoch": 0.22, "grad_norm": 1.2339204361674816, "learning_rate": 1.8086623886520942e-05, "loss": 0.6035, "step": 2890 }, { "epoch": 0.22, "grad_norm": 1.2712415237669306, "learning_rate": 1.8085145494666208e-05, "loss": 0.6331, "step": 2891 }, { "epoch": 0.22, "grad_norm": 1.2377801115733877, "learning_rate": 1.808366659235453e-05, "loss": 0.5811, "step": 2892 }, { "epoch": 0.22, "grad_norm": 1.3286606101312584, "learning_rate": 1.8082187179679272e-05, "loss": 0.6674, "step": 2893 }, { "epoch": 0.22, "grad_norm": 1.0994433316610714, "learning_rate": 1.8080707256733837e-05, "loss": 0.566, "step": 2894 }, { "epoch": 0.22, "grad_norm": 1.223931181045098, "learning_rate": 1.8079226823611665e-05, "loss": 0.6318, "step": 2895 }, { "epoch": 0.22, "grad_norm": 1.255745984975725, "learning_rate": 1.807774588040622e-05, "loss": 0.6282, "step": 2896 }, { "epoch": 0.22, "grad_norm": 1.329411993834681, "learning_rate": 1.8076264427210997e-05, "loss": 0.6458, "step": 2897 }, { "epoch": 0.22, "grad_norm": 1.3125825129412299, "learning_rate": 1.8074782464119536e-05, "loss": 0.6372, "step": 2898 }, { "epoch": 0.22, "grad_norm": 1.267016881705333, "learning_rate": 1.8073299991225398e-05, "loss": 0.5849, "step": 2899 }, { "epoch": 0.22, "grad_norm": 1.2533980435971066, "learning_rate": 1.8071817008622177e-05, "loss": 0.628, "step": 2900 }, { "epoch": 0.23, "grad_norm": 1.2003904700783439, "learning_rate": 1.8070333516403505e-05, "loss": 0.5822, "step": 2901 }, { "epoch": 0.23, "grad_norm": 1.2487166015160502, "learning_rate": 1.806884951466304e-05, "loss": 0.6062, "step": 2902 }, { "epoch": 0.23, "grad_norm": 1.3581030584143734, "learning_rate": 1.8067365003494475e-05, "loss": 0.6601, "step": 2903 }, { "epoch": 0.23, "grad_norm": 1.2631206459902808, "learning_rate": 1.8065879982991536e-05, "loss": 0.5929, "step": 2904 }, { "epoch": 0.23, "grad_norm": 1.2821168990644012, "learning_rate": 1.8064394453247977e-05, "loss": 0.5959, "step": 2905 }, { "epoch": 0.23, "grad_norm": 1.2204427945062277, "learning_rate": 1.8062908414357592e-05, "loss": 0.5873, "step": 2906 }, { "epoch": 0.23, "grad_norm": 1.248558500725858, "learning_rate": 1.80614218664142e-05, "loss": 0.623, "step": 2907 }, { "epoch": 0.23, "grad_norm": 1.2202886503382502, "learning_rate": 1.8059934809511654e-05, "loss": 0.5808, "step": 2908 }, { "epoch": 0.23, "grad_norm": 1.2979357414415547, "learning_rate": 1.805844724374384e-05, "loss": 0.6001, "step": 2909 }, { "epoch": 0.23, "grad_norm": 1.2011587746999037, "learning_rate": 1.805695916920467e-05, "loss": 0.5736, "step": 2910 }, { "epoch": 0.23, "grad_norm": 1.3875532191612105, "learning_rate": 1.8055470585988108e-05, "loss": 0.6179, "step": 2911 }, { "epoch": 0.23, "grad_norm": 1.2902609138404146, "learning_rate": 1.805398149418812e-05, "loss": 0.6515, "step": 2912 }, { "epoch": 0.23, "grad_norm": 1.3347383884311403, "learning_rate": 1.805249189389873e-05, "loss": 0.6505, "step": 2913 }, { "epoch": 0.23, "grad_norm": 1.3294166118671473, "learning_rate": 1.805100178521398e-05, "loss": 0.6658, "step": 2914 }, { "epoch": 0.23, "grad_norm": 1.2300501035549545, "learning_rate": 1.804951116822795e-05, "loss": 0.6171, "step": 2915 }, { "epoch": 0.23, "grad_norm": 1.2054727115430035, "learning_rate": 1.804802004303475e-05, "loss": 0.6213, "step": 2916 }, { "epoch": 0.23, "grad_norm": 1.3563128654571248, "learning_rate": 1.8046528409728523e-05, "loss": 0.6245, "step": 2917 }, { "epoch": 0.23, "grad_norm": 1.3001366928567972, "learning_rate": 1.8045036268403443e-05, "loss": 0.6708, "step": 2918 }, { "epoch": 0.23, "grad_norm": 1.1874836870378034, "learning_rate": 1.8043543619153717e-05, "loss": 0.6237, "step": 2919 }, { "epoch": 0.23, "grad_norm": 1.256463885722255, "learning_rate": 1.804205046207358e-05, "loss": 0.6283, "step": 2920 }, { "epoch": 0.23, "grad_norm": 1.2557813937595423, "learning_rate": 1.8040556797257308e-05, "loss": 0.5638, "step": 2921 }, { "epoch": 0.23, "grad_norm": 1.2026769125661274, "learning_rate": 1.80390626247992e-05, "loss": 0.6409, "step": 2922 }, { "epoch": 0.23, "grad_norm": 1.2871283152088284, "learning_rate": 1.80375679447936e-05, "loss": 0.6575, "step": 2923 }, { "epoch": 0.23, "grad_norm": 1.2586342159232753, "learning_rate": 1.803607275733486e-05, "loss": 0.6328, "step": 2924 }, { "epoch": 0.23, "grad_norm": 1.1032225092944932, "learning_rate": 1.8034577062517383e-05, "loss": 0.5829, "step": 2925 }, { "epoch": 0.23, "grad_norm": 1.3104410370388593, "learning_rate": 1.803308086043561e-05, "loss": 0.6742, "step": 2926 }, { "epoch": 0.23, "grad_norm": 1.2051548863822021, "learning_rate": 1.8031584151183995e-05, "loss": 0.5936, "step": 2927 }, { "epoch": 0.23, "grad_norm": 1.2517796722089043, "learning_rate": 1.8030086934857034e-05, "loss": 0.6459, "step": 2928 }, { "epoch": 0.23, "grad_norm": 1.2320767507333181, "learning_rate": 1.8028589211549256e-05, "loss": 0.603, "step": 2929 }, { "epoch": 0.23, "grad_norm": 1.2086792100061583, "learning_rate": 1.8027090981355217e-05, "loss": 0.6299, "step": 2930 }, { "epoch": 0.23, "grad_norm": 1.2700898804736247, "learning_rate": 1.8025592244369514e-05, "loss": 0.6605, "step": 2931 }, { "epoch": 0.23, "grad_norm": 1.206916927121023, "learning_rate": 1.802409300068676e-05, "loss": 0.614, "step": 2932 }, { "epoch": 0.23, "grad_norm": 1.168172608002909, "learning_rate": 1.8022593250401625e-05, "loss": 0.625, "step": 2933 }, { "epoch": 0.23, "grad_norm": 1.1834166240034456, "learning_rate": 1.8021092993608777e-05, "loss": 0.5964, "step": 2934 }, { "epoch": 0.23, "grad_norm": 1.2788494696389363, "learning_rate": 1.801959223040295e-05, "loss": 0.6234, "step": 2935 }, { "epoch": 0.23, "grad_norm": 1.187926767860014, "learning_rate": 1.801809096087889e-05, "loss": 0.6049, "step": 2936 }, { "epoch": 0.23, "grad_norm": 1.267593407208171, "learning_rate": 1.801658918513138e-05, "loss": 0.6388, "step": 2937 }, { "epoch": 0.23, "grad_norm": 1.136411330349278, "learning_rate": 1.801508690325523e-05, "loss": 0.5344, "step": 2938 }, { "epoch": 0.23, "grad_norm": 1.1600192724468503, "learning_rate": 1.8013584115345297e-05, "loss": 0.5305, "step": 2939 }, { "epoch": 0.23, "grad_norm": 1.2332262899152213, "learning_rate": 1.8012080821496454e-05, "loss": 0.583, "step": 2940 }, { "epoch": 0.23, "grad_norm": 1.3254157313839539, "learning_rate": 1.8010577021803608e-05, "loss": 0.6546, "step": 2941 }, { "epoch": 0.23, "grad_norm": 1.2185395866102802, "learning_rate": 1.8009072716361707e-05, "loss": 0.6275, "step": 2942 }, { "epoch": 0.23, "grad_norm": 1.271840316711964, "learning_rate": 1.8007567905265727e-05, "loss": 0.613, "step": 2943 }, { "epoch": 0.23, "grad_norm": 1.169847132567219, "learning_rate": 1.8006062588610666e-05, "loss": 0.5647, "step": 2944 }, { "epoch": 0.23, "grad_norm": 1.288062800386875, "learning_rate": 1.8004556766491573e-05, "loss": 0.6258, "step": 2945 }, { "epoch": 0.23, "grad_norm": 1.1649247630632729, "learning_rate": 1.800305043900351e-05, "loss": 0.5184, "step": 2946 }, { "epoch": 0.23, "grad_norm": 1.1514814868520415, "learning_rate": 1.8001543606241583e-05, "loss": 0.6081, "step": 2947 }, { "epoch": 0.23, "grad_norm": 1.1776919127949836, "learning_rate": 1.8000036268300927e-05, "loss": 0.5703, "step": 2948 }, { "epoch": 0.23, "grad_norm": 1.3064339193428178, "learning_rate": 1.7998528425276703e-05, "loss": 0.6157, "step": 2949 }, { "epoch": 0.23, "grad_norm": 1.2702490083374884, "learning_rate": 1.7997020077264118e-05, "loss": 0.6398, "step": 2950 }, { "epoch": 0.23, "grad_norm": 1.2168450261809818, "learning_rate": 1.799551122435839e-05, "loss": 0.6088, "step": 2951 }, { "epoch": 0.23, "grad_norm": 1.150140032328074, "learning_rate": 1.7994001866654794e-05, "loss": 0.5849, "step": 2952 }, { "epoch": 0.23, "grad_norm": 1.0685143506555084, "learning_rate": 1.7992492004248615e-05, "loss": 0.5281, "step": 2953 }, { "epoch": 0.23, "grad_norm": 1.2763959068563853, "learning_rate": 1.7990981637235174e-05, "loss": 0.607, "step": 2954 }, { "epoch": 0.23, "grad_norm": 1.1506804277702987, "learning_rate": 1.798947076570984e-05, "loss": 0.5811, "step": 2955 }, { "epoch": 0.23, "grad_norm": 1.1526404564556179, "learning_rate": 1.7987959389767993e-05, "loss": 0.6009, "step": 2956 }, { "epoch": 0.23, "grad_norm": 1.2005724892360312, "learning_rate": 1.798644750950506e-05, "loss": 0.6374, "step": 2957 }, { "epoch": 0.23, "grad_norm": 1.14599489749152, "learning_rate": 1.798493512501649e-05, "loss": 0.5896, "step": 2958 }, { "epoch": 0.23, "grad_norm": 1.1765210431864042, "learning_rate": 1.798342223639777e-05, "loss": 0.6396, "step": 2959 }, { "epoch": 0.23, "grad_norm": 1.2504256954119262, "learning_rate": 1.7981908843744413e-05, "loss": 0.574, "step": 2960 }, { "epoch": 0.23, "grad_norm": 1.2331037614852411, "learning_rate": 1.7980394947151972e-05, "loss": 0.5922, "step": 2961 }, { "epoch": 0.23, "grad_norm": 1.329365364258588, "learning_rate": 1.797888054671602e-05, "loss": 0.6719, "step": 2962 }, { "epoch": 0.23, "grad_norm": 1.2572771437637662, "learning_rate": 1.7977365642532176e-05, "loss": 0.6657, "step": 2963 }, { "epoch": 0.23, "grad_norm": 1.2879552999079014, "learning_rate": 1.7975850234696084e-05, "loss": 0.6882, "step": 2964 }, { "epoch": 0.23, "grad_norm": 1.2407461477170283, "learning_rate": 1.7974334323303414e-05, "loss": 0.6057, "step": 2965 }, { "epoch": 0.23, "grad_norm": 1.2215059859776771, "learning_rate": 1.7972817908449875e-05, "loss": 0.6278, "step": 2966 }, { "epoch": 0.23, "grad_norm": 1.167622696761847, "learning_rate": 1.7971300990231208e-05, "loss": 0.6075, "step": 2967 }, { "epoch": 0.23, "grad_norm": 1.1778253169082262, "learning_rate": 1.7969783568743185e-05, "loss": 0.6018, "step": 2968 }, { "epoch": 0.23, "grad_norm": 1.2269449305997007, "learning_rate": 1.7968265644081603e-05, "loss": 0.5796, "step": 2969 }, { "epoch": 0.23, "grad_norm": 1.31491470734766, "learning_rate": 1.7966747216342305e-05, "loss": 0.6621, "step": 2970 }, { "epoch": 0.23, "grad_norm": 1.266386627101717, "learning_rate": 1.7965228285621148e-05, "loss": 0.6113, "step": 2971 }, { "epoch": 0.23, "grad_norm": 1.1610754719171927, "learning_rate": 1.7963708852014034e-05, "loss": 0.5741, "step": 2972 }, { "epoch": 0.23, "grad_norm": 1.2845976290844119, "learning_rate": 1.7962188915616896e-05, "loss": 0.6657, "step": 2973 }, { "epoch": 0.23, "grad_norm": 1.130304124467323, "learning_rate": 1.7960668476525688e-05, "loss": 0.6081, "step": 2974 }, { "epoch": 0.23, "grad_norm": 1.2494207470577738, "learning_rate": 1.795914753483641e-05, "loss": 0.5918, "step": 2975 }, { "epoch": 0.23, "grad_norm": 1.2301678486406489, "learning_rate": 1.7957626090645085e-05, "loss": 0.619, "step": 2976 }, { "epoch": 0.23, "grad_norm": 1.1322886867302662, "learning_rate": 1.7956104144047768e-05, "loss": 0.5837, "step": 2977 }, { "epoch": 0.23, "grad_norm": 1.1486553063449865, "learning_rate": 1.795458169514055e-05, "loss": 0.5828, "step": 2978 }, { "epoch": 0.23, "grad_norm": 1.160789137302683, "learning_rate": 1.7953058744019546e-05, "loss": 0.5474, "step": 2979 }, { "epoch": 0.23, "grad_norm": 1.2342475632181233, "learning_rate": 1.7951535290780913e-05, "loss": 0.5618, "step": 2980 }, { "epoch": 0.23, "grad_norm": 1.270766376336555, "learning_rate": 1.7950011335520832e-05, "loss": 0.5931, "step": 2981 }, { "epoch": 0.23, "grad_norm": 1.2507334941298718, "learning_rate": 1.7948486878335522e-05, "loss": 0.5865, "step": 2982 }, { "epoch": 0.23, "grad_norm": 1.5868925779795653, "learning_rate": 1.7946961919321225e-05, "loss": 0.6711, "step": 2983 }, { "epoch": 0.23, "grad_norm": 1.1474935064360614, "learning_rate": 1.7945436458574216e-05, "loss": 0.5709, "step": 2984 }, { "epoch": 0.23, "grad_norm": 1.1264940513524888, "learning_rate": 1.7943910496190816e-05, "loss": 0.584, "step": 2985 }, { "epoch": 0.23, "grad_norm": 1.2317083986789328, "learning_rate": 1.794238403226736e-05, "loss": 0.6154, "step": 2986 }, { "epoch": 0.23, "grad_norm": 1.1544663874415577, "learning_rate": 1.7940857066900223e-05, "loss": 0.5924, "step": 2987 }, { "epoch": 0.23, "grad_norm": 1.277005445377626, "learning_rate": 1.7939329600185807e-05, "loss": 0.6846, "step": 2988 }, { "epoch": 0.23, "grad_norm": 1.1453012213470386, "learning_rate": 1.7937801632220556e-05, "loss": 0.5462, "step": 2989 }, { "epoch": 0.23, "grad_norm": 1.2065896025614506, "learning_rate": 1.793627316310093e-05, "loss": 0.589, "step": 2990 }, { "epoch": 0.23, "grad_norm": 1.1848137236710226, "learning_rate": 1.7934744192923436e-05, "loss": 0.6013, "step": 2991 }, { "epoch": 0.23, "grad_norm": 1.2619593246270835, "learning_rate": 1.7933214721784602e-05, "loss": 0.6333, "step": 2992 }, { "epoch": 0.23, "grad_norm": 1.2150648596606142, "learning_rate": 1.7931684749780994e-05, "loss": 0.6105, "step": 2993 }, { "epoch": 0.23, "grad_norm": 1.2555330840400099, "learning_rate": 1.7930154277009207e-05, "loss": 0.654, "step": 2994 }, { "epoch": 0.23, "grad_norm": 1.1743399064533986, "learning_rate": 1.792862330356586e-05, "loss": 0.6053, "step": 2995 }, { "epoch": 0.23, "grad_norm": 1.164000387902867, "learning_rate": 1.7927091829547624e-05, "loss": 0.541, "step": 2996 }, { "epoch": 0.23, "grad_norm": 1.2705627476443653, "learning_rate": 1.792555985505118e-05, "loss": 0.6016, "step": 2997 }, { "epoch": 0.23, "grad_norm": 1.2389304688427403, "learning_rate": 1.7924027380173253e-05, "loss": 0.5954, "step": 2998 }, { "epoch": 0.23, "grad_norm": 1.1266852577827493, "learning_rate": 1.7922494405010593e-05, "loss": 0.691, "step": 2999 }, { "epoch": 0.23, "grad_norm": 1.2202408304305663, "learning_rate": 1.792096092965999e-05, "loss": 0.6233, "step": 3000 }, { "epoch": 0.23, "grad_norm": 1.2500432960641452, "learning_rate": 1.7919426954218252e-05, "loss": 0.6002, "step": 3001 }, { "epoch": 0.23, "grad_norm": 1.2753983828905995, "learning_rate": 1.7917892478782234e-05, "loss": 0.6494, "step": 3002 }, { "epoch": 0.23, "grad_norm": 1.1316247962000014, "learning_rate": 1.791635750344881e-05, "loss": 0.5839, "step": 3003 }, { "epoch": 0.23, "grad_norm": 1.3513382742223259, "learning_rate": 1.79148220283149e-05, "loss": 0.6534, "step": 3004 }, { "epoch": 0.23, "grad_norm": 1.3354423006704952, "learning_rate": 1.7913286053477434e-05, "loss": 0.689, "step": 3005 }, { "epoch": 0.23, "grad_norm": 1.293549790301838, "learning_rate": 1.7911749579033394e-05, "loss": 0.7231, "step": 3006 }, { "epoch": 0.23, "grad_norm": 1.1181166193196503, "learning_rate": 1.7910212605079788e-05, "loss": 0.5189, "step": 3007 }, { "epoch": 0.23, "grad_norm": 1.2468709883469986, "learning_rate": 1.7908675131713642e-05, "loss": 0.5978, "step": 3008 }, { "epoch": 0.23, "grad_norm": 1.1993187480574705, "learning_rate": 1.7907137159032036e-05, "loss": 0.6734, "step": 3009 }, { "epoch": 0.23, "grad_norm": 1.0643164870731645, "learning_rate": 1.7905598687132064e-05, "loss": 0.5736, "step": 3010 }, { "epoch": 0.23, "grad_norm": 1.2758891371306944, "learning_rate": 1.790405971611086e-05, "loss": 0.589, "step": 3011 }, { "epoch": 0.23, "grad_norm": 1.2234611421251935, "learning_rate": 1.790252024606559e-05, "loss": 0.6108, "step": 3012 }, { "epoch": 0.23, "grad_norm": 1.1952096234100136, "learning_rate": 1.7900980277093438e-05, "loss": 0.6469, "step": 3013 }, { "epoch": 0.23, "grad_norm": 1.3264603448541694, "learning_rate": 1.7899439809291643e-05, "loss": 0.5981, "step": 3014 }, { "epoch": 0.23, "grad_norm": 1.1788633891126337, "learning_rate": 1.7897898842757455e-05, "loss": 0.577, "step": 3015 }, { "epoch": 0.23, "grad_norm": 1.1995112655702147, "learning_rate": 1.7896357377588165e-05, "loss": 0.5554, "step": 3016 }, { "epoch": 0.23, "grad_norm": 1.3007924763879526, "learning_rate": 1.7894815413881096e-05, "loss": 0.693, "step": 3017 }, { "epoch": 0.23, "grad_norm": 1.31782970950039, "learning_rate": 1.7893272951733596e-05, "loss": 0.6413, "step": 3018 }, { "epoch": 0.23, "grad_norm": 1.345985061942385, "learning_rate": 1.789172999124305e-05, "loss": 0.6494, "step": 3019 }, { "epoch": 0.23, "grad_norm": 1.153175855306034, "learning_rate": 1.7890186532506875e-05, "loss": 0.5905, "step": 3020 }, { "epoch": 0.23, "grad_norm": 1.2546485771327105, "learning_rate": 1.788864257562252e-05, "loss": 0.6302, "step": 3021 }, { "epoch": 0.23, "grad_norm": 1.2414322962588247, "learning_rate": 1.788709812068745e-05, "loss": 0.6255, "step": 3022 }, { "epoch": 0.23, "grad_norm": 1.1241496899291201, "learning_rate": 1.788555316779919e-05, "loss": 0.6019, "step": 3023 }, { "epoch": 0.23, "grad_norm": 1.2547422575254765, "learning_rate": 1.7884007717055273e-05, "loss": 0.6121, "step": 3024 }, { "epoch": 0.23, "grad_norm": 1.2316432131356856, "learning_rate": 1.7882461768553275e-05, "loss": 0.5705, "step": 3025 }, { "epoch": 0.23, "grad_norm": 1.209348460455779, "learning_rate": 1.7880915322390794e-05, "loss": 0.6658, "step": 3026 }, { "epoch": 0.23, "grad_norm": 1.331915484009588, "learning_rate": 1.787936837866547e-05, "loss": 0.6434, "step": 3027 }, { "epoch": 0.23, "grad_norm": 1.3494130501057997, "learning_rate": 1.7877820937474966e-05, "loss": 0.6342, "step": 3028 }, { "epoch": 0.23, "grad_norm": 1.2896999285879898, "learning_rate": 1.7876272998916987e-05, "loss": 0.6691, "step": 3029 }, { "epoch": 0.24, "grad_norm": 1.2085400875830674, "learning_rate": 1.787472456308925e-05, "loss": 0.5967, "step": 3030 }, { "epoch": 0.24, "grad_norm": 1.1905624653571254, "learning_rate": 1.787317563008953e-05, "loss": 0.6734, "step": 3031 }, { "epoch": 0.24, "grad_norm": 1.2800279697700183, "learning_rate": 1.7871626200015607e-05, "loss": 0.7052, "step": 3032 }, { "epoch": 0.24, "grad_norm": 1.1663746752237185, "learning_rate": 1.7870076272965313e-05, "loss": 0.5192, "step": 3033 }, { "epoch": 0.24, "grad_norm": 1.2075594638217475, "learning_rate": 1.78685258490365e-05, "loss": 0.5684, "step": 3034 }, { "epoch": 0.24, "grad_norm": 1.1376299794310991, "learning_rate": 1.7866974928327052e-05, "loss": 0.5841, "step": 3035 }, { "epoch": 0.24, "grad_norm": 1.2071017149075514, "learning_rate": 1.7865423510934888e-05, "loss": 0.5715, "step": 3036 }, { "epoch": 0.24, "grad_norm": 1.0675487616165151, "learning_rate": 1.7863871596957963e-05, "loss": 0.5698, "step": 3037 }, { "epoch": 0.24, "grad_norm": 1.3310970039386116, "learning_rate": 1.7862319186494245e-05, "loss": 0.6288, "step": 3038 }, { "epoch": 0.24, "grad_norm": 1.137937174616564, "learning_rate": 1.7860766279641758e-05, "loss": 0.6085, "step": 3039 }, { "epoch": 0.24, "grad_norm": 1.1180286575356846, "learning_rate": 1.7859212876498536e-05, "loss": 0.5583, "step": 3040 }, { "epoch": 0.24, "grad_norm": 1.184991143833337, "learning_rate": 1.7857658977162655e-05, "loss": 0.5962, "step": 3041 }, { "epoch": 0.24, "grad_norm": 1.1759104610176012, "learning_rate": 1.7856104581732227e-05, "loss": 0.6277, "step": 3042 }, { "epoch": 0.24, "grad_norm": 1.216858006590385, "learning_rate": 1.785454969030538e-05, "loss": 0.6217, "step": 3043 }, { "epoch": 0.24, "grad_norm": 1.218961110537572, "learning_rate": 1.785299430298029e-05, "loss": 0.5992, "step": 3044 }, { "epoch": 0.24, "grad_norm": 1.1718555703141884, "learning_rate": 1.7851438419855157e-05, "loss": 0.5835, "step": 3045 }, { "epoch": 0.24, "grad_norm": 1.0695880749405033, "learning_rate": 1.7849882041028203e-05, "loss": 0.539, "step": 3046 }, { "epoch": 0.24, "grad_norm": 1.0857060137680985, "learning_rate": 1.7848325166597698e-05, "loss": 0.555, "step": 3047 }, { "epoch": 0.24, "grad_norm": 1.255971900561838, "learning_rate": 1.7846767796661934e-05, "loss": 0.6104, "step": 3048 }, { "epoch": 0.24, "grad_norm": 1.21615372954044, "learning_rate": 1.7845209931319232e-05, "loss": 0.657, "step": 3049 }, { "epoch": 0.24, "grad_norm": 1.1972051857259582, "learning_rate": 1.7843651570667953e-05, "loss": 0.5963, "step": 3050 }, { "epoch": 0.24, "grad_norm": 1.1667782128460005, "learning_rate": 1.784209271480648e-05, "loss": 0.6278, "step": 3051 }, { "epoch": 0.24, "grad_norm": 1.2684358536737081, "learning_rate": 1.7840533363833238e-05, "loss": 0.5818, "step": 3052 }, { "epoch": 0.24, "grad_norm": 1.3054852359627946, "learning_rate": 1.783897351784667e-05, "loss": 0.6701, "step": 3053 }, { "epoch": 0.24, "grad_norm": 1.2808090474629414, "learning_rate": 1.7837413176945263e-05, "loss": 0.5995, "step": 3054 }, { "epoch": 0.24, "grad_norm": 1.26955472484306, "learning_rate": 1.7835852341227523e-05, "loss": 0.6308, "step": 3055 }, { "epoch": 0.24, "grad_norm": 1.3128604848001724, "learning_rate": 1.7834291010791998e-05, "loss": 0.6654, "step": 3056 }, { "epoch": 0.24, "grad_norm": 1.185201025703545, "learning_rate": 1.783272918573726e-05, "loss": 0.6088, "step": 3057 }, { "epoch": 0.24, "grad_norm": 1.1497198198923226, "learning_rate": 1.7831166866161923e-05, "loss": 0.625, "step": 3058 }, { "epoch": 0.24, "grad_norm": 1.135485055012956, "learning_rate": 1.7829604052164616e-05, "loss": 0.5678, "step": 3059 }, { "epoch": 0.24, "grad_norm": 1.0077649482331934, "learning_rate": 1.7828040743844008e-05, "loss": 0.5576, "step": 3060 }, { "epoch": 0.24, "grad_norm": 1.1692780265785083, "learning_rate": 1.7826476941298797e-05, "loss": 0.5841, "step": 3061 }, { "epoch": 0.24, "grad_norm": 1.2531621513662476, "learning_rate": 1.782491264462772e-05, "loss": 0.5952, "step": 3062 }, { "epoch": 0.24, "grad_norm": 1.1774960809792259, "learning_rate": 1.782334785392954e-05, "loss": 0.6002, "step": 3063 }, { "epoch": 0.24, "grad_norm": 1.3198974227785316, "learning_rate": 1.7821782569303045e-05, "loss": 0.6301, "step": 3064 }, { "epoch": 0.24, "grad_norm": 1.2412421509954603, "learning_rate": 1.782021679084706e-05, "loss": 0.6383, "step": 3065 }, { "epoch": 0.24, "grad_norm": 1.2281711016439567, "learning_rate": 1.781865051866044e-05, "loss": 0.5641, "step": 3066 }, { "epoch": 0.24, "grad_norm": 1.2609974124860723, "learning_rate": 1.781708375284208e-05, "loss": 0.6578, "step": 3067 }, { "epoch": 0.24, "grad_norm": 1.2377115377091337, "learning_rate": 1.7815516493490888e-05, "loss": 0.5483, "step": 3068 }, { "epoch": 0.24, "grad_norm": 1.1886722901914275, "learning_rate": 1.7813948740705816e-05, "loss": 0.5809, "step": 3069 }, { "epoch": 0.24, "grad_norm": 1.3063399763010588, "learning_rate": 1.7812380494585846e-05, "loss": 0.6425, "step": 3070 }, { "epoch": 0.24, "grad_norm": 1.2197979554172487, "learning_rate": 1.781081175522999e-05, "loss": 0.6027, "step": 3071 }, { "epoch": 0.24, "grad_norm": 1.3226034840008223, "learning_rate": 1.780924252273729e-05, "loss": 0.6882, "step": 3072 }, { "epoch": 0.24, "grad_norm": 1.2641265381387161, "learning_rate": 1.780767279720682e-05, "loss": 0.593, "step": 3073 }, { "epoch": 0.24, "grad_norm": 1.1856570247654552, "learning_rate": 1.780610257873768e-05, "loss": 0.5957, "step": 3074 }, { "epoch": 0.24, "grad_norm": 1.2788455545590587, "learning_rate": 1.7804531867429013e-05, "loss": 0.5838, "step": 3075 }, { "epoch": 0.24, "grad_norm": 1.0892644183290483, "learning_rate": 1.780296066337998e-05, "loss": 0.5805, "step": 3076 }, { "epoch": 0.24, "grad_norm": 1.2363920029564532, "learning_rate": 1.7801388966689784e-05, "loss": 0.6094, "step": 3077 }, { "epoch": 0.24, "grad_norm": 1.1601366754286944, "learning_rate": 1.7799816777457653e-05, "loss": 0.5863, "step": 3078 }, { "epoch": 0.24, "grad_norm": 1.1786872210325052, "learning_rate": 1.7798244095782847e-05, "loss": 0.5889, "step": 3079 }, { "epoch": 0.24, "grad_norm": 1.2907588547575215, "learning_rate": 1.779667092176466e-05, "loss": 0.6372, "step": 3080 }, { "epoch": 0.24, "grad_norm": 1.2001794700725412, "learning_rate": 1.779509725550241e-05, "loss": 0.604, "step": 3081 }, { "epoch": 0.24, "grad_norm": 1.2630275398417172, "learning_rate": 1.7793523097095452e-05, "loss": 0.5937, "step": 3082 }, { "epoch": 0.24, "grad_norm": 1.2784739868954929, "learning_rate": 1.7791948446643173e-05, "loss": 0.563, "step": 3083 }, { "epoch": 0.24, "grad_norm": 1.1527263975885382, "learning_rate": 1.7790373304244986e-05, "loss": 0.5368, "step": 3084 }, { "epoch": 0.24, "grad_norm": 1.2456382469012621, "learning_rate": 1.778879767000034e-05, "loss": 0.6421, "step": 3085 }, { "epoch": 0.24, "grad_norm": 1.2194838637457066, "learning_rate": 1.7787221544008715e-05, "loss": 0.5656, "step": 3086 }, { "epoch": 0.24, "grad_norm": 1.273948192582068, "learning_rate": 1.7785644926369616e-05, "loss": 0.6647, "step": 3087 }, { "epoch": 0.24, "grad_norm": 1.2184152387927174, "learning_rate": 1.778406781718258e-05, "loss": 0.6336, "step": 3088 }, { "epoch": 0.24, "grad_norm": 1.3443164962034504, "learning_rate": 1.7782490216547182e-05, "loss": 0.5989, "step": 3089 }, { "epoch": 0.24, "grad_norm": 1.2599534005676034, "learning_rate": 1.778091212456303e-05, "loss": 0.6655, "step": 3090 }, { "epoch": 0.24, "grad_norm": 1.2018116805466454, "learning_rate": 1.7779333541329745e-05, "loss": 0.6561, "step": 3091 }, { "epoch": 0.24, "grad_norm": 1.184220201748367, "learning_rate": 1.7777754466947002e-05, "loss": 0.5243, "step": 3092 }, { "epoch": 0.24, "grad_norm": 1.1420561290916702, "learning_rate": 1.7776174901514493e-05, "loss": 0.5828, "step": 3093 }, { "epoch": 0.24, "grad_norm": 1.1784786068087671, "learning_rate": 1.777459484513194e-05, "loss": 0.5923, "step": 3094 }, { "epoch": 0.24, "grad_norm": 1.2604503574545003, "learning_rate": 1.77730142978991e-05, "loss": 0.5698, "step": 3095 }, { "epoch": 0.24, "grad_norm": 1.2245177214910639, "learning_rate": 1.7771433259915767e-05, "loss": 0.6243, "step": 3096 }, { "epoch": 0.24, "grad_norm": 1.1310737156374882, "learning_rate": 1.7769851731281758e-05, "loss": 0.5655, "step": 3097 }, { "epoch": 0.24, "grad_norm": 1.2791102149427254, "learning_rate": 1.7768269712096922e-05, "loss": 0.5306, "step": 3098 }, { "epoch": 0.24, "grad_norm": 1.3072608650297313, "learning_rate": 1.7766687202461137e-05, "loss": 0.6488, "step": 3099 }, { "epoch": 0.24, "grad_norm": 1.3758335188078605, "learning_rate": 1.776510420247432e-05, "loss": 0.7007, "step": 3100 }, { "epoch": 0.24, "grad_norm": 1.2715708150940994, "learning_rate": 1.7763520712236414e-05, "loss": 0.6378, "step": 3101 }, { "epoch": 0.24, "grad_norm": 1.2376673286673203, "learning_rate": 1.776193673184739e-05, "loss": 0.6697, "step": 3102 }, { "epoch": 0.24, "grad_norm": 1.293950425623742, "learning_rate": 1.776035226140725e-05, "loss": 0.6214, "step": 3103 }, { "epoch": 0.24, "grad_norm": 1.1915457221770092, "learning_rate": 1.7758767301016034e-05, "loss": 0.6003, "step": 3104 }, { "epoch": 0.24, "grad_norm": 1.3364921890612806, "learning_rate": 1.7757181850773812e-05, "loss": 0.6393, "step": 3105 }, { "epoch": 0.24, "grad_norm": 1.243742012113291, "learning_rate": 1.7755595910780677e-05, "loss": 0.625, "step": 3106 }, { "epoch": 0.24, "grad_norm": 1.1462533643334123, "learning_rate": 1.7754009481136758e-05, "loss": 0.5895, "step": 3107 }, { "epoch": 0.24, "grad_norm": 1.2663942518848426, "learning_rate": 1.7752422561942216e-05, "loss": 0.6783, "step": 3108 }, { "epoch": 0.24, "grad_norm": 1.2311750534406798, "learning_rate": 1.7750835153297238e-05, "loss": 0.6015, "step": 3109 }, { "epoch": 0.24, "grad_norm": 1.1748801291985216, "learning_rate": 1.774924725530205e-05, "loss": 0.579, "step": 3110 }, { "epoch": 0.24, "grad_norm": 1.225639919596761, "learning_rate": 1.77476588680569e-05, "loss": 0.6048, "step": 3111 }, { "epoch": 0.24, "grad_norm": 1.0920129879434115, "learning_rate": 1.7746069991662076e-05, "loss": 0.588, "step": 3112 }, { "epoch": 0.24, "grad_norm": 1.2202342849654233, "learning_rate": 1.7744480626217886e-05, "loss": 0.6283, "step": 3113 }, { "epoch": 0.24, "grad_norm": 1.2688035963041178, "learning_rate": 1.774289077182468e-05, "loss": 0.6084, "step": 3114 }, { "epoch": 0.24, "grad_norm": 1.2619185629018865, "learning_rate": 1.7741300428582827e-05, "loss": 0.5455, "step": 3115 }, { "epoch": 0.24, "grad_norm": 1.106099141066414, "learning_rate": 1.7739709596592742e-05, "loss": 0.5378, "step": 3116 }, { "epoch": 0.24, "grad_norm": 1.2088591922171792, "learning_rate": 1.773811827595486e-05, "loss": 0.5974, "step": 3117 }, { "epoch": 0.24, "grad_norm": 1.2637047977079623, "learning_rate": 1.7736526466769645e-05, "loss": 0.6625, "step": 3118 }, { "epoch": 0.24, "grad_norm": 1.3631647710582162, "learning_rate": 1.77349341691376e-05, "loss": 0.6746, "step": 3119 }, { "epoch": 0.24, "grad_norm": 1.2091894022151295, "learning_rate": 1.7733341383159254e-05, "loss": 0.5602, "step": 3120 }, { "epoch": 0.24, "grad_norm": 1.3337131743991497, "learning_rate": 1.773174810893517e-05, "loss": 0.6713, "step": 3121 }, { "epoch": 0.24, "grad_norm": 1.1644816316111655, "learning_rate": 1.7730154346565932e-05, "loss": 0.6071, "step": 3122 }, { "epoch": 0.24, "grad_norm": 1.2607845000857336, "learning_rate": 1.772856009615217e-05, "loss": 0.6426, "step": 3123 }, { "epoch": 0.24, "grad_norm": 1.1358324508682223, "learning_rate": 1.7726965357794536e-05, "loss": 0.5585, "step": 3124 }, { "epoch": 0.24, "grad_norm": 1.2403745557005192, "learning_rate": 1.7725370131593713e-05, "loss": 0.6094, "step": 3125 }, { "epoch": 0.24, "grad_norm": 1.174291027532417, "learning_rate": 1.7723774417650415e-05, "loss": 0.602, "step": 3126 }, { "epoch": 0.24, "grad_norm": 1.2481000288985398, "learning_rate": 1.772217821606539e-05, "loss": 0.6371, "step": 3127 }, { "epoch": 0.24, "grad_norm": 1.3395270148383536, "learning_rate": 1.7720581526939412e-05, "loss": 0.6196, "step": 3128 }, { "epoch": 0.24, "grad_norm": 1.2598934138341693, "learning_rate": 1.771898435037329e-05, "loss": 0.5804, "step": 3129 }, { "epoch": 0.24, "grad_norm": 1.3108867539960758, "learning_rate": 1.7717386686467856e-05, "loss": 0.5992, "step": 3130 }, { "epoch": 0.24, "grad_norm": 1.355990526543217, "learning_rate": 1.771578853532399e-05, "loss": 0.6602, "step": 3131 }, { "epoch": 0.24, "grad_norm": 1.2479598562741128, "learning_rate": 1.7714189897042583e-05, "loss": 0.6633, "step": 3132 }, { "epoch": 0.24, "grad_norm": 1.3442980070248483, "learning_rate": 1.7712590771724572e-05, "loss": 0.6271, "step": 3133 }, { "epoch": 0.24, "grad_norm": 1.2984706300624225, "learning_rate": 1.771099115947091e-05, "loss": 0.6151, "step": 3134 }, { "epoch": 0.24, "grad_norm": 1.2461555967857512, "learning_rate": 1.7709391060382592e-05, "loss": 0.6417, "step": 3135 }, { "epoch": 0.24, "grad_norm": 1.3102342030781724, "learning_rate": 1.7707790474560644e-05, "loss": 0.632, "step": 3136 }, { "epoch": 0.24, "grad_norm": 1.1246453361949431, "learning_rate": 1.7706189402106113e-05, "loss": 0.5425, "step": 3137 }, { "epoch": 0.24, "grad_norm": 1.2233872346064572, "learning_rate": 1.770458784312009e-05, "loss": 0.6269, "step": 3138 }, { "epoch": 0.24, "grad_norm": 1.2542860460125764, "learning_rate": 1.7702985797703682e-05, "loss": 0.6131, "step": 3139 }, { "epoch": 0.24, "grad_norm": 1.2888452982702112, "learning_rate": 1.7701383265958042e-05, "loss": 0.6223, "step": 3140 }, { "epoch": 0.24, "grad_norm": 1.1345702641678044, "learning_rate": 1.7699780247984343e-05, "loss": 0.6009, "step": 3141 }, { "epoch": 0.24, "grad_norm": 1.23611269937668, "learning_rate": 1.7698176743883786e-05, "loss": 0.6078, "step": 3142 }, { "epoch": 0.24, "grad_norm": 1.2875427072349082, "learning_rate": 1.769657275375762e-05, "loss": 0.681, "step": 3143 }, { "epoch": 0.24, "grad_norm": 1.1695176383062844, "learning_rate": 1.7694968277707102e-05, "loss": 0.582, "step": 3144 }, { "epoch": 0.24, "grad_norm": 1.1835082876240008, "learning_rate": 1.769336331583354e-05, "loss": 0.6119, "step": 3145 }, { "epoch": 0.24, "grad_norm": 1.199357413063099, "learning_rate": 1.7691757868238256e-05, "loss": 0.6433, "step": 3146 }, { "epoch": 0.24, "grad_norm": 1.1096665979692646, "learning_rate": 1.7690151935022616e-05, "loss": 0.5449, "step": 3147 }, { "epoch": 0.24, "grad_norm": 1.2888041844340898, "learning_rate": 1.7688545516288006e-05, "loss": 0.6345, "step": 3148 }, { "epoch": 0.24, "grad_norm": 1.388267146926027, "learning_rate": 1.768693861213585e-05, "loss": 0.6499, "step": 3149 }, { "epoch": 0.24, "grad_norm": 1.085520053462167, "learning_rate": 1.76853312226676e-05, "loss": 0.55, "step": 3150 }, { "epoch": 0.24, "grad_norm": 1.1869019457995977, "learning_rate": 1.768372334798474e-05, "loss": 0.59, "step": 3151 }, { "epoch": 0.24, "grad_norm": 1.2856650229507738, "learning_rate": 1.7682114988188782e-05, "loss": 0.5742, "step": 3152 }, { "epoch": 0.24, "grad_norm": 1.292939661165538, "learning_rate": 1.768050614338127e-05, "loss": 0.6041, "step": 3153 }, { "epoch": 0.24, "grad_norm": 1.0942013490462523, "learning_rate": 1.767889681366378e-05, "loss": 0.5807, "step": 3154 }, { "epoch": 0.24, "grad_norm": 1.208738434508418, "learning_rate": 1.7677286999137916e-05, "loss": 0.5863, "step": 3155 }, { "epoch": 0.24, "grad_norm": 1.2480825022118012, "learning_rate": 1.767567669990531e-05, "loss": 0.5904, "step": 3156 }, { "epoch": 0.24, "grad_norm": 1.2206445786741504, "learning_rate": 1.767406591606764e-05, "loss": 0.5646, "step": 3157 }, { "epoch": 0.24, "grad_norm": 1.236043791896361, "learning_rate": 1.767245464772659e-05, "loss": 0.5646, "step": 3158 }, { "epoch": 0.25, "grad_norm": 1.17166435891725, "learning_rate": 1.7670842894983898e-05, "loss": 0.6143, "step": 3159 }, { "epoch": 0.25, "grad_norm": 1.1660721092664443, "learning_rate": 1.7669230657941314e-05, "loss": 0.6108, "step": 3160 }, { "epoch": 0.25, "grad_norm": 1.3440181109848686, "learning_rate": 1.7667617936700634e-05, "loss": 0.6317, "step": 3161 }, { "epoch": 0.25, "grad_norm": 1.2503476136379494, "learning_rate": 1.766600473136367e-05, "loss": 0.6245, "step": 3162 }, { "epoch": 0.25, "grad_norm": 1.2463578090888154, "learning_rate": 1.7664391042032277e-05, "loss": 0.6237, "step": 3163 }, { "epoch": 0.25, "grad_norm": 1.3054172964588686, "learning_rate": 1.766277686880834e-05, "loss": 0.6322, "step": 3164 }, { "epoch": 0.25, "grad_norm": 1.2412940116831916, "learning_rate": 1.7661162211793757e-05, "loss": 0.6475, "step": 3165 }, { "epoch": 0.25, "grad_norm": 1.227847355721397, "learning_rate": 1.765954707109048e-05, "loss": 0.6049, "step": 3166 }, { "epoch": 0.25, "grad_norm": 1.2741661093648053, "learning_rate": 1.7657931446800475e-05, "loss": 0.6355, "step": 3167 }, { "epoch": 0.25, "grad_norm": 1.2351243364239475, "learning_rate": 1.7656315339025754e-05, "loss": 0.6611, "step": 3168 }, { "epoch": 0.25, "grad_norm": 1.293660650070407, "learning_rate": 1.765469874786834e-05, "loss": 0.5834, "step": 3169 }, { "epoch": 0.25, "grad_norm": 1.28034629585367, "learning_rate": 1.7653081673430302e-05, "loss": 0.6123, "step": 3170 }, { "epoch": 0.25, "grad_norm": 1.161525137082055, "learning_rate": 1.7651464115813738e-05, "loss": 0.5789, "step": 3171 }, { "epoch": 0.25, "grad_norm": 1.3447536446386683, "learning_rate": 1.764984607512076e-05, "loss": 0.6256, "step": 3172 }, { "epoch": 0.25, "grad_norm": 1.189310450523712, "learning_rate": 1.7648227551453535e-05, "loss": 0.5704, "step": 3173 }, { "epoch": 0.25, "grad_norm": 1.1945091490013942, "learning_rate": 1.7646608544914245e-05, "loss": 0.6375, "step": 3174 }, { "epoch": 0.25, "grad_norm": 1.1590937620355648, "learning_rate": 1.764498905560511e-05, "loss": 0.6081, "step": 3175 }, { "epoch": 0.25, "grad_norm": 1.2789708311708203, "learning_rate": 1.764336908362837e-05, "loss": 0.6274, "step": 3176 }, { "epoch": 0.25, "grad_norm": 1.224904050767292, "learning_rate": 1.76417486290863e-05, "loss": 0.5833, "step": 3177 }, { "epoch": 0.25, "grad_norm": 1.2411030768253288, "learning_rate": 1.764012769208122e-05, "loss": 0.6002, "step": 3178 }, { "epoch": 0.25, "grad_norm": 1.1292730349356888, "learning_rate": 1.7638506272715458e-05, "loss": 0.5581, "step": 3179 }, { "epoch": 0.25, "grad_norm": 1.231072656087877, "learning_rate": 1.7636884371091385e-05, "loss": 0.6251, "step": 3180 }, { "epoch": 0.25, "grad_norm": 1.1978900519468991, "learning_rate": 1.76352619873114e-05, "loss": 0.6132, "step": 3181 }, { "epoch": 0.25, "grad_norm": 1.121095185494916, "learning_rate": 1.7633639121477935e-05, "loss": 0.5977, "step": 3182 }, { "epoch": 0.25, "grad_norm": 1.1506382104217876, "learning_rate": 1.7632015773693444e-05, "loss": 0.6299, "step": 3183 }, { "epoch": 0.25, "grad_norm": 1.1663758505802897, "learning_rate": 1.7630391944060424e-05, "loss": 0.6218, "step": 3184 }, { "epoch": 0.25, "grad_norm": 1.2367705266238813, "learning_rate": 1.7628767632681393e-05, "loss": 0.622, "step": 3185 }, { "epoch": 0.25, "grad_norm": 1.2750108045232404, "learning_rate": 1.7627142839658903e-05, "loss": 0.6443, "step": 3186 }, { "epoch": 0.25, "grad_norm": 1.267974838458455, "learning_rate": 1.7625517565095536e-05, "loss": 0.6656, "step": 3187 }, { "epoch": 0.25, "grad_norm": 1.2187938682290484, "learning_rate": 1.76238918090939e-05, "loss": 0.613, "step": 3188 }, { "epoch": 0.25, "grad_norm": 1.2599129524134998, "learning_rate": 1.762226557175664e-05, "loss": 0.6017, "step": 3189 }, { "epoch": 0.25, "grad_norm": 1.1959538453328042, "learning_rate": 1.762063885318643e-05, "loss": 0.581, "step": 3190 }, { "epoch": 0.25, "grad_norm": 1.2414900063071381, "learning_rate": 1.7619011653485968e-05, "loss": 0.5808, "step": 3191 }, { "epoch": 0.25, "grad_norm": 1.2167196233973314, "learning_rate": 1.7617383972758e-05, "loss": 0.6234, "step": 3192 }, { "epoch": 0.25, "grad_norm": 1.1507774441015806, "learning_rate": 1.761575581110527e-05, "loss": 0.5854, "step": 3193 }, { "epoch": 0.25, "grad_norm": 1.314947842916153, "learning_rate": 1.7614127168630594e-05, "loss": 0.6175, "step": 3194 }, { "epoch": 0.25, "grad_norm": 1.123485446378992, "learning_rate": 1.7612498045436778e-05, "loss": 0.5849, "step": 3195 }, { "epoch": 0.25, "grad_norm": 1.1255848212222526, "learning_rate": 1.761086844162669e-05, "loss": 0.5876, "step": 3196 }, { "epoch": 0.25, "grad_norm": 1.1940461475457604, "learning_rate": 1.7609238357303208e-05, "loss": 0.6501, "step": 3197 }, { "epoch": 0.25, "grad_norm": 1.233503347784292, "learning_rate": 1.760760779256925e-05, "loss": 0.6747, "step": 3198 }, { "epoch": 0.25, "grad_norm": 1.3401509369349782, "learning_rate": 1.7605976747527763e-05, "loss": 0.6246, "step": 3199 }, { "epoch": 0.25, "grad_norm": 1.2139890061843788, "learning_rate": 1.760434522228172e-05, "loss": 0.5566, "step": 3200 }, { "epoch": 0.25, "grad_norm": 1.2199347190901457, "learning_rate": 1.7602713216934132e-05, "loss": 0.6276, "step": 3201 }, { "epoch": 0.25, "grad_norm": 1.1978119790593345, "learning_rate": 1.7601080731588034e-05, "loss": 0.611, "step": 3202 }, { "epoch": 0.25, "grad_norm": 1.2223582041599093, "learning_rate": 1.7599447766346494e-05, "loss": 0.6594, "step": 3203 }, { "epoch": 0.25, "grad_norm": 1.2071774094861256, "learning_rate": 1.7597814321312605e-05, "loss": 0.6109, "step": 3204 }, { "epoch": 0.25, "grad_norm": 1.1836781487416035, "learning_rate": 1.75961803965895e-05, "loss": 0.5251, "step": 3205 }, { "epoch": 0.25, "grad_norm": 1.3086132902138354, "learning_rate": 1.7594545992280336e-05, "loss": 0.5832, "step": 3206 }, { "epoch": 0.25, "grad_norm": 1.1603592717457671, "learning_rate": 1.75929111084883e-05, "loss": 0.6271, "step": 3207 }, { "epoch": 0.25, "grad_norm": 1.247102621999723, "learning_rate": 1.7591275745316613e-05, "loss": 0.6509, "step": 3208 }, { "epoch": 0.25, "grad_norm": 1.1599715885431923, "learning_rate": 1.7589639902868524e-05, "loss": 0.5862, "step": 3209 }, { "epoch": 0.25, "grad_norm": 1.2619552626834056, "learning_rate": 1.7588003581247307e-05, "loss": 0.6285, "step": 3210 }, { "epoch": 0.25, "grad_norm": 1.265983719086781, "learning_rate": 1.7586366780556275e-05, "loss": 0.6405, "step": 3211 }, { "epoch": 0.25, "grad_norm": 1.1950286827475802, "learning_rate": 1.7584729500898768e-05, "loss": 0.5574, "step": 3212 }, { "epoch": 0.25, "grad_norm": 1.270312447448204, "learning_rate": 1.7583091742378157e-05, "loss": 0.6536, "step": 3213 }, { "epoch": 0.25, "grad_norm": 1.1762339078671982, "learning_rate": 1.7581453505097838e-05, "loss": 0.6238, "step": 3214 }, { "epoch": 0.25, "grad_norm": 1.248898402227216, "learning_rate": 1.7579814789161246e-05, "loss": 0.6491, "step": 3215 }, { "epoch": 0.25, "grad_norm": 1.2109732837927323, "learning_rate": 1.757817559467184e-05, "loss": 0.5537, "step": 3216 }, { "epoch": 0.25, "grad_norm": 1.1729884135299256, "learning_rate": 1.757653592173311e-05, "loss": 0.6663, "step": 3217 }, { "epoch": 0.25, "grad_norm": 1.2556395627549655, "learning_rate": 1.7574895770448578e-05, "loss": 0.5797, "step": 3218 }, { "epoch": 0.25, "grad_norm": 1.195463881536923, "learning_rate": 1.7573255140921793e-05, "loss": 0.6031, "step": 3219 }, { "epoch": 0.25, "grad_norm": 1.2584785451321943, "learning_rate": 1.7571614033256338e-05, "loss": 0.6481, "step": 3220 }, { "epoch": 0.25, "grad_norm": 1.1928259223820412, "learning_rate": 1.7569972447555827e-05, "loss": 0.5866, "step": 3221 }, { "epoch": 0.25, "grad_norm": 1.2776793059584677, "learning_rate": 1.7568330383923902e-05, "loss": 0.6228, "step": 3222 }, { "epoch": 0.25, "grad_norm": 1.1727822924128048, "learning_rate": 1.7566687842464224e-05, "loss": 0.5913, "step": 3223 }, { "epoch": 0.25, "grad_norm": 1.2486822812159255, "learning_rate": 1.756504482328051e-05, "loss": 0.6199, "step": 3224 }, { "epoch": 0.25, "grad_norm": 1.269318689011064, "learning_rate": 1.7563401326476484e-05, "loss": 0.5851, "step": 3225 }, { "epoch": 0.25, "grad_norm": 1.2689404330600882, "learning_rate": 1.7561757352155914e-05, "loss": 0.6424, "step": 3226 }, { "epoch": 0.25, "grad_norm": 1.2317719355357146, "learning_rate": 1.7560112900422583e-05, "loss": 0.6589, "step": 3227 }, { "epoch": 0.25, "grad_norm": 1.3138338305062551, "learning_rate": 1.7558467971380323e-05, "loss": 0.5873, "step": 3228 }, { "epoch": 0.25, "grad_norm": 1.1362952003679108, "learning_rate": 1.755682256513298e-05, "loss": 0.5645, "step": 3229 }, { "epoch": 0.25, "grad_norm": 1.1628404898018452, "learning_rate": 1.755517668178444e-05, "loss": 0.6042, "step": 3230 }, { "epoch": 0.25, "grad_norm": 1.1885953168381038, "learning_rate": 1.7553530321438625e-05, "loss": 0.5527, "step": 3231 }, { "epoch": 0.25, "grad_norm": 1.2439340273177062, "learning_rate": 1.755188348419946e-05, "loss": 0.6827, "step": 3232 }, { "epoch": 0.25, "grad_norm": 1.3462039364413905, "learning_rate": 1.755023617017093e-05, "loss": 0.6536, "step": 3233 }, { "epoch": 0.25, "grad_norm": 1.3549725946420983, "learning_rate": 1.7548588379457042e-05, "loss": 0.6188, "step": 3234 }, { "epoch": 0.25, "grad_norm": 1.3044959287422724, "learning_rate": 1.754694011216182e-05, "loss": 0.6692, "step": 3235 }, { "epoch": 0.25, "grad_norm": 1.127275232465399, "learning_rate": 1.754529136838933e-05, "loss": 0.5306, "step": 3236 }, { "epoch": 0.25, "grad_norm": 1.1800491587049007, "learning_rate": 1.7543642148243672e-05, "loss": 0.6129, "step": 3237 }, { "epoch": 0.25, "grad_norm": 1.2309093352613774, "learning_rate": 1.754199245182896e-05, "loss": 0.623, "step": 3238 }, { "epoch": 0.25, "grad_norm": 1.1864438128343366, "learning_rate": 1.7540342279249355e-05, "loss": 0.5829, "step": 3239 }, { "epoch": 0.25, "grad_norm": 1.1907681116316102, "learning_rate": 1.753869163060904e-05, "loss": 0.6444, "step": 3240 }, { "epoch": 0.25, "grad_norm": 1.1033147308321871, "learning_rate": 1.753704050601223e-05, "loss": 0.6013, "step": 3241 }, { "epoch": 0.25, "grad_norm": 1.119433033608672, "learning_rate": 1.7535388905563163e-05, "loss": 0.5642, "step": 3242 }, { "epoch": 0.25, "grad_norm": 1.1216088683289283, "learning_rate": 1.7533736829366116e-05, "loss": 0.5404, "step": 3243 }, { "epoch": 0.25, "grad_norm": 1.3375103263812862, "learning_rate": 1.7532084277525396e-05, "loss": 0.6429, "step": 3244 }, { "epoch": 0.25, "grad_norm": 1.2044718251208995, "learning_rate": 1.7530431250145335e-05, "loss": 0.63, "step": 3245 }, { "epoch": 0.25, "grad_norm": 1.224135409051966, "learning_rate": 1.7528777747330296e-05, "loss": 0.6245, "step": 3246 }, { "epoch": 0.25, "grad_norm": 1.1316154205917992, "learning_rate": 1.7527123769184676e-05, "loss": 0.5946, "step": 3247 }, { "epoch": 0.25, "grad_norm": 1.238204713014442, "learning_rate": 1.75254693158129e-05, "loss": 0.5939, "step": 3248 }, { "epoch": 0.25, "grad_norm": 1.1776740468327798, "learning_rate": 1.7523814387319413e-05, "loss": 0.5823, "step": 3249 }, { "epoch": 0.25, "grad_norm": 1.1853791421449613, "learning_rate": 1.752215898380871e-05, "loss": 0.5543, "step": 3250 }, { "epoch": 0.25, "grad_norm": 1.3081901269439555, "learning_rate": 1.7520503105385303e-05, "loss": 0.6349, "step": 3251 }, { "epoch": 0.25, "grad_norm": 1.2068558846396564, "learning_rate": 1.7518846752153732e-05, "loss": 0.6398, "step": 3252 }, { "epoch": 0.25, "grad_norm": 1.1487598615519865, "learning_rate": 1.7517189924218573e-05, "loss": 0.5859, "step": 3253 }, { "epoch": 0.25, "grad_norm": 1.2408946765128526, "learning_rate": 1.751553262168443e-05, "loss": 0.6284, "step": 3254 }, { "epoch": 0.25, "grad_norm": 1.252214425323239, "learning_rate": 1.751387484465594e-05, "loss": 0.602, "step": 3255 }, { "epoch": 0.25, "grad_norm": 1.2074338371205104, "learning_rate": 1.751221659323776e-05, "loss": 0.592, "step": 3256 }, { "epoch": 0.25, "grad_norm": 1.1190562058276843, "learning_rate": 1.7510557867534594e-05, "loss": 0.5727, "step": 3257 }, { "epoch": 0.25, "grad_norm": 1.1997168186954494, "learning_rate": 1.7508898667651158e-05, "loss": 0.6496, "step": 3258 }, { "epoch": 0.25, "grad_norm": 1.188626507944179, "learning_rate": 1.7507238993692207e-05, "loss": 0.6299, "step": 3259 }, { "epoch": 0.25, "grad_norm": 1.1822751698064238, "learning_rate": 1.750557884576253e-05, "loss": 0.6066, "step": 3260 }, { "epoch": 0.25, "grad_norm": 1.3028565768414537, "learning_rate": 1.7503918223966932e-05, "loss": 0.6039, "step": 3261 }, { "epoch": 0.25, "grad_norm": 1.1614333294057304, "learning_rate": 1.7502257128410265e-05, "loss": 0.6381, "step": 3262 }, { "epoch": 0.25, "grad_norm": 1.1393542069270943, "learning_rate": 1.75005955591974e-05, "loss": 0.5498, "step": 3263 }, { "epoch": 0.25, "grad_norm": 1.231385340731515, "learning_rate": 1.7498933516433233e-05, "loss": 0.6238, "step": 3264 }, { "epoch": 0.25, "grad_norm": 1.1913341062621896, "learning_rate": 1.749727100022271e-05, "loss": 0.6228, "step": 3265 }, { "epoch": 0.25, "grad_norm": 1.2985829057310687, "learning_rate": 1.7495608010670783e-05, "loss": 0.684, "step": 3266 }, { "epoch": 0.25, "grad_norm": 1.1821546713263802, "learning_rate": 1.7493944547882454e-05, "loss": 0.5988, "step": 3267 }, { "epoch": 0.25, "grad_norm": 1.2065675208859925, "learning_rate": 1.749228061196274e-05, "loss": 0.5963, "step": 3268 }, { "epoch": 0.25, "grad_norm": 1.1694908303288254, "learning_rate": 1.7490616203016696e-05, "loss": 0.6042, "step": 3269 }, { "epoch": 0.25, "grad_norm": 1.201746807671075, "learning_rate": 1.7488951321149405e-05, "loss": 0.6107, "step": 3270 }, { "epoch": 0.25, "grad_norm": 1.2412208298619856, "learning_rate": 1.7487285966465982e-05, "loss": 0.6231, "step": 3271 }, { "epoch": 0.25, "grad_norm": 1.1691350822686961, "learning_rate": 1.7485620139071564e-05, "loss": 0.5284, "step": 3272 }, { "epoch": 0.25, "grad_norm": 1.1960277536674158, "learning_rate": 1.7483953839071324e-05, "loss": 0.614, "step": 3273 }, { "epoch": 0.25, "grad_norm": 1.251885184647968, "learning_rate": 1.748228706657047e-05, "loss": 0.6663, "step": 3274 }, { "epoch": 0.25, "grad_norm": 1.1440939735182525, "learning_rate": 1.7480619821674226e-05, "loss": 0.5752, "step": 3275 }, { "epoch": 0.25, "grad_norm": 1.1080195783544828, "learning_rate": 1.747895210448786e-05, "loss": 0.5984, "step": 3276 }, { "epoch": 0.25, "grad_norm": 1.2818116143456204, "learning_rate": 1.747728391511666e-05, "loss": 0.6211, "step": 3277 }, { "epoch": 0.25, "grad_norm": 1.1357567245851952, "learning_rate": 1.747561525366595e-05, "loss": 0.5827, "step": 3278 }, { "epoch": 0.25, "grad_norm": 1.277899245118622, "learning_rate": 1.747394612024108e-05, "loss": 0.6351, "step": 3279 }, { "epoch": 0.25, "grad_norm": 1.2762655204554059, "learning_rate": 1.747227651494743e-05, "loss": 0.618, "step": 3280 }, { "epoch": 0.25, "grad_norm": 1.2271707083784782, "learning_rate": 1.747060643789041e-05, "loss": 0.5917, "step": 3281 }, { "epoch": 0.25, "grad_norm": 1.242839809901325, "learning_rate": 1.7468935889175466e-05, "loss": 0.5971, "step": 3282 }, { "epoch": 0.25, "grad_norm": 1.2811186188118213, "learning_rate": 1.7467264868908064e-05, "loss": 0.6199, "step": 3283 }, { "epoch": 0.25, "grad_norm": 1.1961908541480428, "learning_rate": 1.7465593377193704e-05, "loss": 0.6229, "step": 3284 }, { "epoch": 0.25, "grad_norm": 1.355854518203587, "learning_rate": 1.7463921414137916e-05, "loss": 0.6819, "step": 3285 }, { "epoch": 0.25, "grad_norm": 1.2883341263097878, "learning_rate": 1.746224897984626e-05, "loss": 0.5962, "step": 3286 }, { "epoch": 0.25, "grad_norm": 1.2496667894658595, "learning_rate": 1.7460576074424327e-05, "loss": 0.6196, "step": 3287 }, { "epoch": 0.26, "grad_norm": 1.3655899741702564, "learning_rate": 1.745890269797774e-05, "loss": 0.6604, "step": 3288 }, { "epoch": 0.26, "grad_norm": 1.0832631504115473, "learning_rate": 1.7457228850612132e-05, "loss": 0.6096, "step": 3289 }, { "epoch": 0.26, "grad_norm": 1.1328132366309567, "learning_rate": 1.7455554532433198e-05, "loss": 0.6114, "step": 3290 }, { "epoch": 0.26, "grad_norm": 1.136917568791836, "learning_rate": 1.745387974354664e-05, "loss": 0.5363, "step": 3291 }, { "epoch": 0.26, "grad_norm": 1.2180852789002845, "learning_rate": 1.74522044840582e-05, "loss": 0.5695, "step": 3292 }, { "epoch": 0.26, "grad_norm": 1.145202179678769, "learning_rate": 1.7450528754073638e-05, "loss": 0.5848, "step": 3293 }, { "epoch": 0.26, "grad_norm": 1.2232268832993016, "learning_rate": 1.744885255369876e-05, "loss": 0.5984, "step": 3294 }, { "epoch": 0.26, "grad_norm": 1.1450380108283569, "learning_rate": 1.7447175883039386e-05, "loss": 0.6363, "step": 3295 }, { "epoch": 0.26, "grad_norm": 1.3036661318962648, "learning_rate": 1.744549874220138e-05, "loss": 0.6107, "step": 3296 }, { "epoch": 0.26, "grad_norm": 1.2884760129336736, "learning_rate": 1.744382113129062e-05, "loss": 0.6404, "step": 3297 }, { "epoch": 0.26, "grad_norm": 1.2740058800637986, "learning_rate": 1.744214305041303e-05, "loss": 0.6113, "step": 3298 }, { "epoch": 0.26, "grad_norm": 1.1967480086642135, "learning_rate": 1.7440464499674553e-05, "loss": 0.6188, "step": 3299 }, { "epoch": 0.26, "grad_norm": 1.3453225980550259, "learning_rate": 1.7438785479181164e-05, "loss": 0.6536, "step": 3300 }, { "epoch": 0.26, "grad_norm": 1.2878115045682026, "learning_rate": 1.7437105989038868e-05, "loss": 0.6355, "step": 3301 }, { "epoch": 0.26, "grad_norm": 1.171885833690157, "learning_rate": 1.74354260293537e-05, "loss": 0.5886, "step": 3302 }, { "epoch": 0.26, "grad_norm": 1.4211772841721313, "learning_rate": 1.7433745600231726e-05, "loss": 0.6045, "step": 3303 }, { "epoch": 0.26, "grad_norm": 1.3650233551794055, "learning_rate": 1.743206470177904e-05, "loss": 0.6654, "step": 3304 }, { "epoch": 0.26, "grad_norm": 1.1674912921654297, "learning_rate": 1.743038333410176e-05, "loss": 0.6003, "step": 3305 }, { "epoch": 0.26, "grad_norm": 1.2374112415272294, "learning_rate": 1.7428701497306048e-05, "loss": 0.6522, "step": 3306 }, { "epoch": 0.26, "grad_norm": 1.2440135657377303, "learning_rate": 1.7427019191498086e-05, "loss": 0.576, "step": 3307 }, { "epoch": 0.26, "grad_norm": 1.1372783109504145, "learning_rate": 1.7425336416784082e-05, "loss": 0.621, "step": 3308 }, { "epoch": 0.26, "grad_norm": 1.322163114419776, "learning_rate": 1.7423653173270278e-05, "loss": 0.6274, "step": 3309 }, { "epoch": 0.26, "grad_norm": 1.201498393517673, "learning_rate": 1.742196946106295e-05, "loss": 0.5946, "step": 3310 }, { "epoch": 0.26, "grad_norm": 1.160962835885136, "learning_rate": 1.74202852802684e-05, "loss": 0.594, "step": 3311 }, { "epoch": 0.26, "grad_norm": 1.2198264430056878, "learning_rate": 1.741860063099295e-05, "loss": 0.5602, "step": 3312 }, { "epoch": 0.26, "grad_norm": 1.215235116603408, "learning_rate": 1.7416915513342973e-05, "loss": 0.5971, "step": 3313 }, { "epoch": 0.26, "grad_norm": 1.2221801123204383, "learning_rate": 1.7415229927424853e-05, "loss": 0.6551, "step": 3314 }, { "epoch": 0.26, "grad_norm": 1.10844995320124, "learning_rate": 1.741354387334501e-05, "loss": 0.5566, "step": 3315 }, { "epoch": 0.26, "grad_norm": 1.2094471281558543, "learning_rate": 1.741185735120989e-05, "loss": 0.6702, "step": 3316 }, { "epoch": 0.26, "grad_norm": 1.2872892725812624, "learning_rate": 1.7410170361125978e-05, "loss": 0.6408, "step": 3317 }, { "epoch": 0.26, "grad_norm": 1.125605685054057, "learning_rate": 1.740848290319978e-05, "loss": 0.5682, "step": 3318 }, { "epoch": 0.26, "grad_norm": 1.3357768268893488, "learning_rate": 1.7406794977537832e-05, "loss": 0.6534, "step": 3319 }, { "epoch": 0.26, "grad_norm": 1.2486897754370647, "learning_rate": 1.7405106584246705e-05, "loss": 0.6187, "step": 3320 }, { "epoch": 0.26, "grad_norm": 1.1170026186038942, "learning_rate": 1.740341772343299e-05, "loss": 0.5665, "step": 3321 }, { "epoch": 0.26, "grad_norm": 1.1826839664420352, "learning_rate": 1.7401728395203323e-05, "loss": 0.5946, "step": 3322 }, { "epoch": 0.26, "grad_norm": 1.081077954490754, "learning_rate": 1.7400038599664354e-05, "loss": 0.5587, "step": 3323 }, { "epoch": 0.26, "grad_norm": 1.3094195137560254, "learning_rate": 1.7398348336922764e-05, "loss": 0.6296, "step": 3324 }, { "epoch": 0.26, "grad_norm": 1.2678910678799522, "learning_rate": 1.7396657607085276e-05, "loss": 0.6614, "step": 3325 }, { "epoch": 0.26, "grad_norm": 1.2309330624308885, "learning_rate": 1.739496641025863e-05, "loss": 0.6511, "step": 3326 }, { "epoch": 0.26, "grad_norm": 1.1810842670252255, "learning_rate": 1.7393274746549605e-05, "loss": 0.6624, "step": 3327 }, { "epoch": 0.26, "grad_norm": 1.0927437649568406, "learning_rate": 1.7391582616064998e-05, "loss": 0.5657, "step": 3328 }, { "epoch": 0.26, "grad_norm": 1.1824833158642718, "learning_rate": 1.7389890018911647e-05, "loss": 0.6251, "step": 3329 }, { "epoch": 0.26, "grad_norm": 1.2882377066578126, "learning_rate": 1.738819695519641e-05, "loss": 0.6431, "step": 3330 }, { "epoch": 0.26, "grad_norm": 1.274624464417439, "learning_rate": 1.7386503425026183e-05, "loss": 0.6202, "step": 3331 }, { "epoch": 0.26, "grad_norm": 1.204040278277466, "learning_rate": 1.7384809428507884e-05, "loss": 0.5557, "step": 3332 }, { "epoch": 0.26, "grad_norm": 1.197070854237926, "learning_rate": 1.7383114965748465e-05, "loss": 0.6166, "step": 3333 }, { "epoch": 0.26, "grad_norm": 1.3553243774569124, "learning_rate": 1.738142003685491e-05, "loss": 0.7191, "step": 3334 }, { "epoch": 0.26, "grad_norm": 1.1880538552308022, "learning_rate": 1.737972464193422e-05, "loss": 0.5912, "step": 3335 }, { "epoch": 0.26, "grad_norm": 1.2519953061524662, "learning_rate": 1.7378028781093443e-05, "loss": 0.6046, "step": 3336 }, { "epoch": 0.26, "grad_norm": 1.2172224301772756, "learning_rate": 1.7376332454439643e-05, "loss": 0.555, "step": 3337 }, { "epoch": 0.26, "grad_norm": 1.2924892769669403, "learning_rate": 1.7374635662079915e-05, "loss": 0.6675, "step": 3338 }, { "epoch": 0.26, "grad_norm": 1.3047546095349118, "learning_rate": 1.7372938404121393e-05, "loss": 0.6306, "step": 3339 }, { "epoch": 0.26, "grad_norm": 1.3009045058498545, "learning_rate": 1.7371240680671228e-05, "loss": 0.6191, "step": 3340 }, { "epoch": 0.26, "grad_norm": 1.2138590854610696, "learning_rate": 1.7369542491836608e-05, "loss": 0.5735, "step": 3341 }, { "epoch": 0.26, "grad_norm": 1.2044767242374326, "learning_rate": 1.736784383772475e-05, "loss": 0.6026, "step": 3342 }, { "epoch": 0.26, "grad_norm": 1.1745416944596863, "learning_rate": 1.7366144718442893e-05, "loss": 0.6085, "step": 3343 }, { "epoch": 0.26, "grad_norm": 1.2627075855576206, "learning_rate": 1.736444513409832e-05, "loss": 0.6771, "step": 3344 }, { "epoch": 0.26, "grad_norm": 1.1491306833927326, "learning_rate": 1.736274508479833e-05, "loss": 0.5387, "step": 3345 }, { "epoch": 0.26, "grad_norm": 1.1292567253562806, "learning_rate": 1.7361044570650256e-05, "loss": 0.557, "step": 3346 }, { "epoch": 0.26, "grad_norm": 1.3335490151641733, "learning_rate": 1.735934359176146e-05, "loss": 0.5979, "step": 3347 }, { "epoch": 0.26, "grad_norm": 1.1763961552817495, "learning_rate": 1.7357642148239334e-05, "loss": 0.5747, "step": 3348 }, { "epoch": 0.26, "grad_norm": 1.1725573778245444, "learning_rate": 1.73559402401913e-05, "loss": 0.5861, "step": 3349 }, { "epoch": 0.26, "grad_norm": 1.1778224829870367, "learning_rate": 1.7354237867724805e-05, "loss": 0.5585, "step": 3350 }, { "epoch": 0.26, "grad_norm": 1.1484453888706514, "learning_rate": 1.7352535030947334e-05, "loss": 0.5919, "step": 3351 }, { "epoch": 0.26, "grad_norm": 1.2300903707350819, "learning_rate": 1.735083172996639e-05, "loss": 0.6022, "step": 3352 }, { "epoch": 0.26, "grad_norm": 1.2197658022454867, "learning_rate": 1.7349127964889508e-05, "loss": 0.6532, "step": 3353 }, { "epoch": 0.26, "grad_norm": 1.231761676950544, "learning_rate": 1.7347423735824266e-05, "loss": 0.6267, "step": 3354 }, { "epoch": 0.26, "grad_norm": 1.2859242008675675, "learning_rate": 1.734571904287826e-05, "loss": 0.6163, "step": 3355 }, { "epoch": 0.26, "grad_norm": 1.178086514438827, "learning_rate": 1.7344013886159104e-05, "loss": 0.617, "step": 3356 }, { "epoch": 0.26, "grad_norm": 1.3033751323140503, "learning_rate": 1.7342308265774467e-05, "loss": 0.6533, "step": 3357 }, { "epoch": 0.26, "grad_norm": 1.2567069366100936, "learning_rate": 1.7340602181832028e-05, "loss": 0.6165, "step": 3358 }, { "epoch": 0.26, "grad_norm": 1.2007666920143658, "learning_rate": 1.7338895634439496e-05, "loss": 0.6301, "step": 3359 }, { "epoch": 0.26, "grad_norm": 1.2706204480991743, "learning_rate": 1.733718862370462e-05, "loss": 0.6285, "step": 3360 }, { "epoch": 0.26, "grad_norm": 1.3147399498714427, "learning_rate": 1.7335481149735173e-05, "loss": 0.6519, "step": 3361 }, { "epoch": 0.26, "grad_norm": 1.228480934417426, "learning_rate": 1.7333773212638957e-05, "loss": 0.5986, "step": 3362 }, { "epoch": 0.26, "grad_norm": 1.1688612777855818, "learning_rate": 1.73320648125238e-05, "loss": 0.5989, "step": 3363 }, { "epoch": 0.26, "grad_norm": 1.1706408296035187, "learning_rate": 1.733035594949756e-05, "loss": 0.5714, "step": 3364 }, { "epoch": 0.26, "grad_norm": 1.184445367499233, "learning_rate": 1.732864662366813e-05, "loss": 0.5902, "step": 3365 }, { "epoch": 0.26, "grad_norm": 1.2335445652748194, "learning_rate": 1.7326936835143427e-05, "loss": 0.5393, "step": 3366 }, { "epoch": 0.26, "grad_norm": 1.2273502128717442, "learning_rate": 1.73252265840314e-05, "loss": 0.6452, "step": 3367 }, { "epoch": 0.26, "grad_norm": 1.2024560034429803, "learning_rate": 1.7323515870440027e-05, "loss": 0.626, "step": 3368 }, { "epoch": 0.26, "grad_norm": 1.094234086947422, "learning_rate": 1.7321804694477314e-05, "loss": 0.5842, "step": 3369 }, { "epoch": 0.26, "grad_norm": 1.2529117050723875, "learning_rate": 1.7320093056251293e-05, "loss": 0.6102, "step": 3370 }, { "epoch": 0.26, "grad_norm": 1.1553890528349962, "learning_rate": 1.7318380955870032e-05, "loss": 0.5824, "step": 3371 }, { "epoch": 0.26, "grad_norm": 1.1919226856024154, "learning_rate": 1.7316668393441622e-05, "loss": 0.6054, "step": 3372 }, { "epoch": 0.26, "grad_norm": 1.3072211967208809, "learning_rate": 1.731495536907419e-05, "loss": 0.6519, "step": 3373 }, { "epoch": 0.26, "grad_norm": 1.3214918207990012, "learning_rate": 1.7313241882875883e-05, "loss": 0.6443, "step": 3374 }, { "epoch": 0.26, "grad_norm": 1.15111137052502, "learning_rate": 1.7311527934954885e-05, "loss": 0.5764, "step": 3375 }, { "epoch": 0.26, "grad_norm": 1.2135490067987205, "learning_rate": 1.7309813525419403e-05, "loss": 0.6, "step": 3376 }, { "epoch": 0.26, "grad_norm": 1.0840930781116456, "learning_rate": 1.730809865437768e-05, "loss": 0.5167, "step": 3377 }, { "epoch": 0.26, "grad_norm": 1.2688477069834467, "learning_rate": 1.7306383321937986e-05, "loss": 0.6351, "step": 3378 }, { "epoch": 0.26, "grad_norm": 1.141371495777199, "learning_rate": 1.730466752820862e-05, "loss": 0.6126, "step": 3379 }, { "epoch": 0.26, "grad_norm": 1.304618468142981, "learning_rate": 1.7302951273297904e-05, "loss": 0.6449, "step": 3380 }, { "epoch": 0.26, "grad_norm": 1.1519705992596048, "learning_rate": 1.7301234557314194e-05, "loss": 0.5883, "step": 3381 }, { "epoch": 0.26, "grad_norm": 1.200634248331523, "learning_rate": 1.7299517380365877e-05, "loss": 0.6639, "step": 3382 }, { "epoch": 0.26, "grad_norm": 1.2580156339968729, "learning_rate": 1.7297799742561367e-05, "loss": 0.5991, "step": 3383 }, { "epoch": 0.26, "grad_norm": 1.2387512951487094, "learning_rate": 1.729608164400911e-05, "loss": 0.5469, "step": 3384 }, { "epoch": 0.26, "grad_norm": 1.2410634551228308, "learning_rate": 1.7294363084817573e-05, "loss": 0.6314, "step": 3385 }, { "epoch": 0.26, "grad_norm": 1.2780469539476047, "learning_rate": 1.7292644065095263e-05, "loss": 0.6622, "step": 3386 }, { "epoch": 0.26, "grad_norm": 1.2191027595386779, "learning_rate": 1.7290924584950704e-05, "loss": 0.5835, "step": 3387 }, { "epoch": 0.26, "grad_norm": 1.1113196508423067, "learning_rate": 1.7289204644492463e-05, "loss": 0.6007, "step": 3388 }, { "epoch": 0.26, "grad_norm": 1.1969095668332939, "learning_rate": 1.7287484243829126e-05, "loss": 0.6494, "step": 3389 }, { "epoch": 0.26, "grad_norm": 1.1119038945732729, "learning_rate": 1.728576338306931e-05, "loss": 0.6113, "step": 3390 }, { "epoch": 0.26, "grad_norm": 1.302236069939979, "learning_rate": 1.7284042062321663e-05, "loss": 0.6146, "step": 3391 }, { "epoch": 0.26, "grad_norm": 1.1797731886304792, "learning_rate": 1.7282320281694857e-05, "loss": 0.5641, "step": 3392 }, { "epoch": 0.26, "grad_norm": 1.254508851136797, "learning_rate": 1.72805980412976e-05, "loss": 0.597, "step": 3393 }, { "epoch": 0.26, "grad_norm": 1.3392448310061567, "learning_rate": 1.7278875341238627e-05, "loss": 0.7139, "step": 3394 }, { "epoch": 0.26, "grad_norm": 1.3181460059541965, "learning_rate": 1.7277152181626703e-05, "loss": 0.6158, "step": 3395 }, { "epoch": 0.26, "grad_norm": 1.2433147473944648, "learning_rate": 1.727542856257061e-05, "loss": 0.6183, "step": 3396 }, { "epoch": 0.26, "grad_norm": 1.2530361496017832, "learning_rate": 1.727370448417918e-05, "loss": 0.6041, "step": 3397 }, { "epoch": 0.26, "grad_norm": 1.244108094848589, "learning_rate": 1.7271979946561256e-05, "loss": 0.611, "step": 3398 }, { "epoch": 0.26, "grad_norm": 1.2718321153313898, "learning_rate": 1.7270254949825722e-05, "loss": 0.602, "step": 3399 }, { "epoch": 0.26, "grad_norm": 1.1197390462650894, "learning_rate": 1.726852949408148e-05, "loss": 0.548, "step": 3400 }, { "epoch": 0.26, "grad_norm": 1.1633571561562022, "learning_rate": 1.7266803579437472e-05, "loss": 0.6289, "step": 3401 }, { "epoch": 0.26, "grad_norm": 1.2020785747590357, "learning_rate": 1.7265077206002664e-05, "loss": 0.6174, "step": 3402 }, { "epoch": 0.26, "grad_norm": 1.1912704640539116, "learning_rate": 1.7263350373886046e-05, "loss": 0.5959, "step": 3403 }, { "epoch": 0.26, "grad_norm": 1.212634810592529, "learning_rate": 1.726162308319665e-05, "loss": 0.6457, "step": 3404 }, { "epoch": 0.26, "grad_norm": 1.1948400822978884, "learning_rate": 1.7259895334043516e-05, "loss": 0.6085, "step": 3405 }, { "epoch": 0.26, "grad_norm": 1.269153010436185, "learning_rate": 1.725816712653574e-05, "loss": 0.6412, "step": 3406 }, { "epoch": 0.26, "grad_norm": 1.176745402796988, "learning_rate": 1.7256438460782427e-05, "loss": 0.6127, "step": 3407 }, { "epoch": 0.26, "grad_norm": 1.1200628026316155, "learning_rate": 1.725470933689271e-05, "loss": 0.488, "step": 3408 }, { "epoch": 0.26, "grad_norm": 1.2441838854300658, "learning_rate": 1.7252979754975765e-05, "loss": 0.6186, "step": 3409 }, { "epoch": 0.26, "grad_norm": 1.190531375037393, "learning_rate": 1.725124971514079e-05, "loss": 0.6223, "step": 3410 }, { "epoch": 0.26, "grad_norm": 1.231457122576695, "learning_rate": 1.7249519217497007e-05, "loss": 0.6121, "step": 3411 }, { "epoch": 0.26, "grad_norm": 1.1829266572436101, "learning_rate": 1.7247788262153673e-05, "loss": 0.6121, "step": 3412 }, { "epoch": 0.26, "grad_norm": 1.290336949899096, "learning_rate": 1.724605684922007e-05, "loss": 0.6457, "step": 3413 }, { "epoch": 0.26, "grad_norm": 1.253085714661441, "learning_rate": 1.7244324978805516e-05, "loss": 0.6617, "step": 3414 }, { "epoch": 0.26, "grad_norm": 1.4160945054807461, "learning_rate": 1.7242592651019353e-05, "loss": 0.6496, "step": 3415 }, { "epoch": 0.27, "grad_norm": 1.1085313624865156, "learning_rate": 1.7240859865970948e-05, "loss": 0.5781, "step": 3416 }, { "epoch": 0.27, "grad_norm": 1.1939958289311587, "learning_rate": 1.7239126623769703e-05, "loss": 0.6262, "step": 3417 }, { "epoch": 0.27, "grad_norm": 1.2282199716125397, "learning_rate": 1.7237392924525037e-05, "loss": 0.6242, "step": 3418 }, { "epoch": 0.27, "grad_norm": 1.2968295905194807, "learning_rate": 1.7235658768346422e-05, "loss": 0.6765, "step": 3419 }, { "epoch": 0.27, "grad_norm": 1.2039937932529343, "learning_rate": 1.723392415534334e-05, "loss": 0.551, "step": 3420 }, { "epoch": 0.27, "grad_norm": 1.182970392732156, "learning_rate": 1.72321890856253e-05, "loss": 0.5846, "step": 3421 }, { "epoch": 0.27, "grad_norm": 1.2279794368917152, "learning_rate": 1.723045355930185e-05, "loss": 0.5978, "step": 3422 }, { "epoch": 0.27, "grad_norm": 1.1248688621383214, "learning_rate": 1.7228717576482563e-05, "loss": 0.5783, "step": 3423 }, { "epoch": 0.27, "grad_norm": 1.3525211704874793, "learning_rate": 1.722698113727704e-05, "loss": 0.6685, "step": 3424 }, { "epoch": 0.27, "grad_norm": 1.1919329370220593, "learning_rate": 1.7225244241794916e-05, "loss": 0.528, "step": 3425 }, { "epoch": 0.27, "grad_norm": 1.2538218722370853, "learning_rate": 1.7223506890145842e-05, "loss": 0.6043, "step": 3426 }, { "epoch": 0.27, "grad_norm": 1.1875692648513518, "learning_rate": 1.7221769082439508e-05, "loss": 0.5975, "step": 3427 }, { "epoch": 0.27, "grad_norm": 1.2182915143056035, "learning_rate": 1.7220030818785635e-05, "loss": 0.6354, "step": 3428 }, { "epoch": 0.27, "grad_norm": 1.2227457519532332, "learning_rate": 1.721829209929396e-05, "loss": 0.6272, "step": 3429 }, { "epoch": 0.27, "grad_norm": 1.271587361795596, "learning_rate": 1.721655292407427e-05, "loss": 0.684, "step": 3430 }, { "epoch": 0.27, "grad_norm": 1.1728294046486247, "learning_rate": 1.721481329323636e-05, "loss": 0.5759, "step": 3431 }, { "epoch": 0.27, "grad_norm": 1.2359590627378376, "learning_rate": 1.7213073206890063e-05, "loss": 0.6133, "step": 3432 }, { "epoch": 0.27, "grad_norm": 1.2561826868195167, "learning_rate": 1.721133266514524e-05, "loss": 0.6566, "step": 3433 }, { "epoch": 0.27, "grad_norm": 1.1822220309864055, "learning_rate": 1.720959166811178e-05, "loss": 0.6348, "step": 3434 }, { "epoch": 0.27, "grad_norm": 1.3352234476945937, "learning_rate": 1.72078502158996e-05, "loss": 0.6142, "step": 3435 }, { "epoch": 0.27, "grad_norm": 1.2966064266947603, "learning_rate": 1.720610830861865e-05, "loss": 0.6492, "step": 3436 }, { "epoch": 0.27, "grad_norm": 1.0881068564386256, "learning_rate": 1.7204365946378906e-05, "loss": 0.5589, "step": 3437 }, { "epoch": 0.27, "grad_norm": 1.13118300476648, "learning_rate": 1.7202623129290367e-05, "loss": 0.5947, "step": 3438 }, { "epoch": 0.27, "grad_norm": 1.2141565176007922, "learning_rate": 1.720087985746307e-05, "loss": 0.5997, "step": 3439 }, { "epoch": 0.27, "grad_norm": 1.2549494506716794, "learning_rate": 1.7199136131007074e-05, "loss": 0.5939, "step": 3440 }, { "epoch": 0.27, "grad_norm": 1.1754788234755198, "learning_rate": 1.719739195003247e-05, "loss": 0.6065, "step": 3441 }, { "epoch": 0.27, "grad_norm": 1.1796197934962993, "learning_rate": 1.7195647314649383e-05, "loss": 0.599, "step": 3442 }, { "epoch": 0.27, "grad_norm": 1.2069171740503573, "learning_rate": 1.7193902224967956e-05, "loss": 0.6181, "step": 3443 }, { "epoch": 0.27, "grad_norm": 1.3393779868174052, "learning_rate": 1.7192156681098364e-05, "loss": 0.5985, "step": 3444 }, { "epoch": 0.27, "grad_norm": 1.1614857771614773, "learning_rate": 1.7190410683150816e-05, "loss": 0.5979, "step": 3445 }, { "epoch": 0.27, "grad_norm": 1.221602207962269, "learning_rate": 1.7188664231235544e-05, "loss": 0.5734, "step": 3446 }, { "epoch": 0.27, "grad_norm": 1.2435657844030366, "learning_rate": 1.7186917325462808e-05, "loss": 0.6279, "step": 3447 }, { "epoch": 0.27, "grad_norm": 1.252003209009415, "learning_rate": 1.71851699659429e-05, "loss": 0.6782, "step": 3448 }, { "epoch": 0.27, "grad_norm": 1.168796054932257, "learning_rate": 1.7183422152786145e-05, "loss": 0.5885, "step": 3449 }, { "epoch": 0.27, "grad_norm": 1.1936584697205102, "learning_rate": 1.718167388610289e-05, "loss": 0.5842, "step": 3450 }, { "epoch": 0.27, "grad_norm": 1.1738174107930588, "learning_rate": 1.7179925166003506e-05, "loss": 0.5997, "step": 3451 }, { "epoch": 0.27, "grad_norm": 1.1548514284630382, "learning_rate": 1.71781759925984e-05, "loss": 0.5791, "step": 3452 }, { "epoch": 0.27, "grad_norm": 1.23274374263663, "learning_rate": 1.7176426365998015e-05, "loss": 0.6286, "step": 3453 }, { "epoch": 0.27, "grad_norm": 1.1556862668290826, "learning_rate": 1.7174676286312807e-05, "loss": 0.5293, "step": 3454 }, { "epoch": 0.27, "grad_norm": 1.2823091641597584, "learning_rate": 1.717292575365327e-05, "loss": 0.6569, "step": 3455 }, { "epoch": 0.27, "grad_norm": 1.2788120428492529, "learning_rate": 1.717117476812992e-05, "loss": 0.6217, "step": 3456 }, { "epoch": 0.27, "grad_norm": 1.3612913020009727, "learning_rate": 1.7169423329853307e-05, "loss": 0.687, "step": 3457 }, { "epoch": 0.27, "grad_norm": 1.3048652482160341, "learning_rate": 1.7167671438934014e-05, "loss": 0.5577, "step": 3458 }, { "epoch": 0.27, "grad_norm": 1.127044674015843, "learning_rate": 1.7165919095482636e-05, "loss": 0.5872, "step": 3459 }, { "epoch": 0.27, "grad_norm": 1.1676144780267974, "learning_rate": 1.716416629960982e-05, "loss": 0.6488, "step": 3460 }, { "epoch": 0.27, "grad_norm": 1.365012744382953, "learning_rate": 1.7162413051426222e-05, "loss": 0.6613, "step": 3461 }, { "epoch": 0.27, "grad_norm": 1.0879256895156963, "learning_rate": 1.7160659351042533e-05, "loss": 0.5603, "step": 3462 }, { "epoch": 0.27, "grad_norm": 1.1720538193645633, "learning_rate": 1.7158905198569476e-05, "loss": 0.5483, "step": 3463 }, { "epoch": 0.27, "grad_norm": 1.23895635162207, "learning_rate": 1.7157150594117805e-05, "loss": 0.6116, "step": 3464 }, { "epoch": 0.27, "grad_norm": 1.1681729651696895, "learning_rate": 1.7155395537798282e-05, "loss": 0.5921, "step": 3465 }, { "epoch": 0.27, "grad_norm": 1.2154166781050915, "learning_rate": 1.7153640029721726e-05, "loss": 0.6222, "step": 3466 }, { "epoch": 0.27, "grad_norm": 1.2614323908151381, "learning_rate": 1.7151884069998966e-05, "loss": 0.6172, "step": 3467 }, { "epoch": 0.27, "grad_norm": 1.1960065235546904, "learning_rate": 1.7150127658740868e-05, "loss": 0.5752, "step": 3468 }, { "epoch": 0.27, "grad_norm": 1.2162324383086238, "learning_rate": 1.7148370796058316e-05, "loss": 0.6484, "step": 3469 }, { "epoch": 0.27, "grad_norm": 1.0929767054826567, "learning_rate": 1.714661348206224e-05, "loss": 0.5337, "step": 3470 }, { "epoch": 0.27, "grad_norm": 1.2129818303067432, "learning_rate": 1.714485571686358e-05, "loss": 0.6408, "step": 3471 }, { "epoch": 0.27, "grad_norm": 1.2663385710849338, "learning_rate": 1.7143097500573314e-05, "loss": 0.6163, "step": 3472 }, { "epoch": 0.27, "grad_norm": 1.3020229427320296, "learning_rate": 1.7141338833302454e-05, "loss": 0.6458, "step": 3473 }, { "epoch": 0.27, "grad_norm": 1.2019619955951104, "learning_rate": 1.713957971516203e-05, "loss": 0.6255, "step": 3474 }, { "epoch": 0.27, "grad_norm": 1.2143210117435042, "learning_rate": 1.71378201462631e-05, "loss": 0.5798, "step": 3475 }, { "epoch": 0.27, "grad_norm": 1.2721459795989332, "learning_rate": 1.7136060126716756e-05, "loss": 0.6014, "step": 3476 }, { "epoch": 0.27, "grad_norm": 1.1761004752814077, "learning_rate": 1.7134299656634124e-05, "loss": 0.5792, "step": 3477 }, { "epoch": 0.27, "grad_norm": 1.1795035717802151, "learning_rate": 1.7132538736126342e-05, "loss": 0.6121, "step": 3478 }, { "epoch": 0.27, "grad_norm": 1.2644927529202392, "learning_rate": 1.713077736530459e-05, "loss": 0.6117, "step": 3479 }, { "epoch": 0.27, "grad_norm": 1.2594005913417883, "learning_rate": 1.712901554428008e-05, "loss": 0.6772, "step": 3480 }, { "epoch": 0.27, "grad_norm": 1.3499656902474506, "learning_rate": 1.7127253273164032e-05, "loss": 0.6002, "step": 3481 }, { "epoch": 0.27, "grad_norm": 1.2425175834040978, "learning_rate": 1.7125490552067713e-05, "loss": 0.6348, "step": 3482 }, { "epoch": 0.27, "grad_norm": 1.1711106414289294, "learning_rate": 1.7123727381102417e-05, "loss": 0.6374, "step": 3483 }, { "epoch": 0.27, "grad_norm": 1.1554935145738767, "learning_rate": 1.7121963760379453e-05, "loss": 0.5657, "step": 3484 }, { "epoch": 0.27, "grad_norm": 1.6971287995949367, "learning_rate": 1.7120199690010176e-05, "loss": 0.6261, "step": 3485 }, { "epoch": 0.27, "grad_norm": 1.1980711077219164, "learning_rate": 1.7118435170105955e-05, "loss": 0.6241, "step": 3486 }, { "epoch": 0.27, "grad_norm": 1.2369071484397642, "learning_rate": 1.7116670200778192e-05, "loss": 0.6096, "step": 3487 }, { "epoch": 0.27, "grad_norm": 1.143335805219271, "learning_rate": 1.711490478213833e-05, "loss": 0.5794, "step": 3488 }, { "epoch": 0.27, "grad_norm": 1.3044845057656975, "learning_rate": 1.7113138914297817e-05, "loss": 0.5988, "step": 3489 }, { "epoch": 0.27, "grad_norm": 1.2158888464306223, "learning_rate": 1.7111372597368143e-05, "loss": 0.6161, "step": 3490 }, { "epoch": 0.27, "grad_norm": 1.1621666132831512, "learning_rate": 1.7109605831460833e-05, "loss": 0.597, "step": 3491 }, { "epoch": 0.27, "grad_norm": 1.2148988472075475, "learning_rate": 1.710783861668742e-05, "loss": 0.5849, "step": 3492 }, { "epoch": 0.27, "grad_norm": 1.2343771246396866, "learning_rate": 1.7106070953159487e-05, "loss": 0.6387, "step": 3493 }, { "epoch": 0.27, "grad_norm": 1.1944441789188138, "learning_rate": 1.710430284098863e-05, "loss": 0.5823, "step": 3494 }, { "epoch": 0.27, "grad_norm": 1.1677784844503332, "learning_rate": 1.7102534280286483e-05, "loss": 0.5873, "step": 3495 }, { "epoch": 0.27, "grad_norm": 1.1186957149015377, "learning_rate": 1.71007652711647e-05, "loss": 0.5679, "step": 3496 }, { "epoch": 0.27, "grad_norm": 1.2166223293656484, "learning_rate": 1.7098995813734974e-05, "loss": 0.6354, "step": 3497 }, { "epoch": 0.27, "grad_norm": 1.2219242915286959, "learning_rate": 1.7097225908109015e-05, "loss": 0.6316, "step": 3498 }, { "epoch": 0.27, "grad_norm": 1.1823154508688276, "learning_rate": 1.7095455554398564e-05, "loss": 0.5865, "step": 3499 }, { "epoch": 0.27, "grad_norm": 1.2132324130318173, "learning_rate": 1.70936847527154e-05, "loss": 0.5596, "step": 3500 }, { "epoch": 0.27, "grad_norm": 1.2081526697959808, "learning_rate": 1.709191350317132e-05, "loss": 0.6299, "step": 3501 }, { "epoch": 0.27, "grad_norm": 1.1080447535927218, "learning_rate": 1.709014180587815e-05, "loss": 0.5576, "step": 3502 }, { "epoch": 0.27, "grad_norm": 1.2654575189991, "learning_rate": 1.7088369660947743e-05, "loss": 0.6246, "step": 3503 }, { "epoch": 0.27, "grad_norm": 1.1062880536447204, "learning_rate": 1.7086597068491994e-05, "loss": 0.5676, "step": 3504 }, { "epoch": 0.27, "grad_norm": 1.1743979697343756, "learning_rate": 1.7084824028622807e-05, "loss": 0.6279, "step": 3505 }, { "epoch": 0.27, "grad_norm": 1.2516906272653687, "learning_rate": 1.708305054145213e-05, "loss": 0.6408, "step": 3506 }, { "epoch": 0.27, "grad_norm": 1.112168194012932, "learning_rate": 1.7081276607091925e-05, "loss": 0.5922, "step": 3507 }, { "epoch": 0.27, "grad_norm": 1.2045648059631897, "learning_rate": 1.7079502225654192e-05, "loss": 0.5833, "step": 3508 }, { "epoch": 0.27, "grad_norm": 1.2075480123416586, "learning_rate": 1.707772739725096e-05, "loss": 0.525, "step": 3509 }, { "epoch": 0.27, "grad_norm": 1.178969006545972, "learning_rate": 1.7075952121994282e-05, "loss": 0.5985, "step": 3510 }, { "epoch": 0.27, "grad_norm": 1.2524471171287925, "learning_rate": 1.707417639999624e-05, "loss": 0.6069, "step": 3511 }, { "epoch": 0.27, "grad_norm": 1.2340928250120227, "learning_rate": 1.707240023136894e-05, "loss": 0.5795, "step": 3512 }, { "epoch": 0.27, "grad_norm": 1.1014666684718406, "learning_rate": 1.7070623616224528e-05, "loss": 0.578, "step": 3513 }, { "epoch": 0.27, "grad_norm": 1.1604010840601888, "learning_rate": 1.7068846554675166e-05, "loss": 0.5926, "step": 3514 }, { "epoch": 0.27, "grad_norm": 1.293871745771315, "learning_rate": 1.706706904683305e-05, "loss": 0.6135, "step": 3515 }, { "epoch": 0.27, "grad_norm": 1.1432762165279406, "learning_rate": 1.7065291092810406e-05, "loss": 0.5393, "step": 3516 }, { "epoch": 0.27, "grad_norm": 1.262780890990764, "learning_rate": 1.7063512692719482e-05, "loss": 0.6187, "step": 3517 }, { "epoch": 0.27, "grad_norm": 1.1681728120982255, "learning_rate": 1.7061733846672562e-05, "loss": 0.5414, "step": 3518 }, { "epoch": 0.27, "grad_norm": 1.1907958921806925, "learning_rate": 1.7059954554781945e-05, "loss": 0.5693, "step": 3519 }, { "epoch": 0.27, "grad_norm": 1.2319715254391224, "learning_rate": 1.7058174817159973e-05, "loss": 0.6132, "step": 3520 }, { "epoch": 0.27, "grad_norm": 1.240147577086982, "learning_rate": 1.7056394633919012e-05, "loss": 0.6046, "step": 3521 }, { "epoch": 0.27, "grad_norm": 1.5087442632153663, "learning_rate": 1.705461400517145e-05, "loss": 0.5853, "step": 3522 }, { "epoch": 0.27, "grad_norm": 1.4098506184169337, "learning_rate": 1.705283293102971e-05, "loss": 0.5993, "step": 3523 }, { "epoch": 0.27, "grad_norm": 1.2521401680706161, "learning_rate": 1.7051051411606238e-05, "loss": 0.5966, "step": 3524 }, { "epoch": 0.27, "grad_norm": 1.2914691076656413, "learning_rate": 1.7049269447013515e-05, "loss": 0.6186, "step": 3525 }, { "epoch": 0.27, "grad_norm": 1.3109645945280441, "learning_rate": 1.704748703736404e-05, "loss": 0.6218, "step": 3526 }, { "epoch": 0.27, "grad_norm": 1.2406136000563492, "learning_rate": 1.7045704182770346e-05, "loss": 0.5841, "step": 3527 }, { "epoch": 0.27, "grad_norm": 1.267768974807635, "learning_rate": 1.7043920883344998e-05, "loss": 0.5947, "step": 3528 }, { "epoch": 0.27, "grad_norm": 1.2891440163471761, "learning_rate": 1.7042137139200583e-05, "loss": 0.6555, "step": 3529 }, { "epoch": 0.27, "grad_norm": 1.2079516608248553, "learning_rate": 1.7040352950449716e-05, "loss": 0.6532, "step": 3530 }, { "epoch": 0.27, "grad_norm": 1.1711107432205794, "learning_rate": 1.7038568317205045e-05, "loss": 0.5984, "step": 3531 }, { "epoch": 0.27, "grad_norm": 1.2320295335059828, "learning_rate": 1.7036783239579243e-05, "loss": 0.6114, "step": 3532 }, { "epoch": 0.27, "grad_norm": 1.3518348816285979, "learning_rate": 1.703499771768501e-05, "loss": 0.6722, "step": 3533 }, { "epoch": 0.27, "grad_norm": 1.419602474930434, "learning_rate": 1.7033211751635074e-05, "loss": 0.6368, "step": 3534 }, { "epoch": 0.27, "grad_norm": 20.15933993999819, "learning_rate": 1.7031425341542193e-05, "loss": 0.6554, "step": 3535 }, { "epoch": 0.27, "grad_norm": 1.2699663561021641, "learning_rate": 1.7029638487519155e-05, "loss": 0.6151, "step": 3536 }, { "epoch": 0.27, "grad_norm": 1.3049581098592968, "learning_rate": 1.702785118967877e-05, "loss": 0.622, "step": 3537 }, { "epoch": 0.27, "grad_norm": 1.2158346274885192, "learning_rate": 1.702606344813388e-05, "loss": 0.5997, "step": 3538 }, { "epoch": 0.27, "grad_norm": 1.1389321083820854, "learning_rate": 1.7024275262997358e-05, "loss": 0.5108, "step": 3539 }, { "epoch": 0.27, "grad_norm": 1.6805761780887818, "learning_rate": 1.702248663438209e-05, "loss": 0.6208, "step": 3540 }, { "epoch": 0.27, "grad_norm": 1.3455553125886481, "learning_rate": 1.7020697562401017e-05, "loss": 0.5675, "step": 3541 }, { "epoch": 0.27, "grad_norm": 1.0953874592292865, "learning_rate": 1.7018908047167083e-05, "loss": 0.5658, "step": 3542 }, { "epoch": 0.27, "grad_norm": 1.1857180526808158, "learning_rate": 1.7017118088793267e-05, "loss": 0.609, "step": 3543 }, { "epoch": 0.27, "grad_norm": 1.150018343571857, "learning_rate": 1.701532768739259e-05, "loss": 0.5614, "step": 3544 }, { "epoch": 0.28, "grad_norm": 1.2616653667293758, "learning_rate": 1.7013536843078077e-05, "loss": 0.6334, "step": 3545 }, { "epoch": 0.28, "grad_norm": 1.2122241168430332, "learning_rate": 1.70117455559628e-05, "loss": 0.5925, "step": 3546 }, { "epoch": 0.28, "grad_norm": 1.171793820430402, "learning_rate": 1.700995382615985e-05, "loss": 0.5664, "step": 3547 }, { "epoch": 0.28, "grad_norm": 1.1670900439489724, "learning_rate": 1.7008161653782344e-05, "loss": 0.5695, "step": 3548 }, { "epoch": 0.28, "grad_norm": 1.1869272055250546, "learning_rate": 1.7006369038943443e-05, "loss": 0.5299, "step": 3549 }, { "epoch": 0.28, "grad_norm": 1.1249377975221864, "learning_rate": 1.700457598175631e-05, "loss": 0.5286, "step": 3550 }, { "epoch": 0.28, "grad_norm": 1.2016604042595487, "learning_rate": 1.700278248233416e-05, "loss": 0.6292, "step": 3551 }, { "epoch": 0.28, "grad_norm": 1.1327456224365782, "learning_rate": 1.700098854079022e-05, "loss": 0.5653, "step": 3552 }, { "epoch": 0.28, "grad_norm": 1.146229860301497, "learning_rate": 1.6999194157237753e-05, "loss": 0.5406, "step": 3553 }, { "epoch": 0.28, "grad_norm": 1.2107167196510564, "learning_rate": 1.699739933179005e-05, "loss": 0.6163, "step": 3554 }, { "epoch": 0.28, "grad_norm": 1.128816013623664, "learning_rate": 1.6995604064560426e-05, "loss": 0.5878, "step": 3555 }, { "epoch": 0.28, "grad_norm": 1.2362328563461915, "learning_rate": 1.699380835566222e-05, "loss": 0.567, "step": 3556 }, { "epoch": 0.28, "grad_norm": 1.1748535450505353, "learning_rate": 1.6992012205208814e-05, "loss": 0.6064, "step": 3557 }, { "epoch": 0.28, "grad_norm": 1.2243381911815883, "learning_rate": 1.6990215613313602e-05, "loss": 0.5744, "step": 3558 }, { "epoch": 0.28, "grad_norm": 1.2039026001634172, "learning_rate": 1.6988418580090013e-05, "loss": 0.6304, "step": 3559 }, { "epoch": 0.28, "grad_norm": 1.1472622313821053, "learning_rate": 1.6986621105651505e-05, "loss": 0.5126, "step": 3560 }, { "epoch": 0.28, "grad_norm": 1.2402514358908967, "learning_rate": 1.6984823190111558e-05, "loss": 0.6169, "step": 3561 }, { "epoch": 0.28, "grad_norm": 1.2778025978322296, "learning_rate": 1.698302483358369e-05, "loss": 0.5921, "step": 3562 }, { "epoch": 0.28, "grad_norm": 1.1746282659089236, "learning_rate": 1.6981226036181433e-05, "loss": 0.6314, "step": 3563 }, { "epoch": 0.28, "grad_norm": 1.2409812300505223, "learning_rate": 1.6979426798018356e-05, "loss": 0.5945, "step": 3564 }, { "epoch": 0.28, "grad_norm": 1.140078322162819, "learning_rate": 1.697762711920806e-05, "loss": 0.5977, "step": 3565 }, { "epoch": 0.28, "grad_norm": 1.327162854114233, "learning_rate": 1.697582699986416e-05, "loss": 0.6625, "step": 3566 }, { "epoch": 0.28, "grad_norm": 1.195544500836556, "learning_rate": 1.697402644010032e-05, "loss": 0.59, "step": 3567 }, { "epoch": 0.28, "grad_norm": 1.2330646561611454, "learning_rate": 1.6972225440030203e-05, "loss": 0.5952, "step": 3568 }, { "epoch": 0.28, "grad_norm": 1.3666670326294448, "learning_rate": 1.697042399976752e-05, "loss": 0.6384, "step": 3569 }, { "epoch": 0.28, "grad_norm": 1.1439809680636073, "learning_rate": 1.6968622119426013e-05, "loss": 0.5658, "step": 3570 }, { "epoch": 0.28, "grad_norm": 1.349977611426743, "learning_rate": 1.696681979911943e-05, "loss": 0.6397, "step": 3571 }, { "epoch": 0.28, "grad_norm": 1.1624509165256394, "learning_rate": 1.696501703896158e-05, "loss": 0.6571, "step": 3572 }, { "epoch": 0.28, "grad_norm": 1.1602184651451246, "learning_rate": 1.6963213839066263e-05, "loss": 0.5956, "step": 3573 }, { "epoch": 0.28, "grad_norm": 1.1053391774942984, "learning_rate": 1.696141019954733e-05, "loss": 0.5249, "step": 3574 }, { "epoch": 0.28, "grad_norm": 1.093012588418014, "learning_rate": 1.6959606120518656e-05, "loss": 0.5847, "step": 3575 }, { "epoch": 0.28, "grad_norm": 1.136895077587339, "learning_rate": 1.695780160209414e-05, "loss": 0.5928, "step": 3576 }, { "epoch": 0.28, "grad_norm": 1.148027878974376, "learning_rate": 1.6955996644387715e-05, "loss": 0.6394, "step": 3577 }, { "epoch": 0.28, "grad_norm": 1.2532928963150862, "learning_rate": 1.695419124751333e-05, "loss": 0.6068, "step": 3578 }, { "epoch": 0.28, "grad_norm": 1.1299222710131411, "learning_rate": 1.6952385411584974e-05, "loss": 0.6031, "step": 3579 }, { "epoch": 0.28, "grad_norm": 1.193586362238359, "learning_rate": 1.695057913671666e-05, "loss": 0.5865, "step": 3580 }, { "epoch": 0.28, "grad_norm": 1.2034823890066315, "learning_rate": 1.694877242302242e-05, "loss": 0.5917, "step": 3581 }, { "epoch": 0.28, "grad_norm": 1.2284215457475098, "learning_rate": 1.694696527061633e-05, "loss": 0.6301, "step": 3582 }, { "epoch": 0.28, "grad_norm": 1.1804113630361248, "learning_rate": 1.6945157679612478e-05, "loss": 0.5869, "step": 3583 }, { "epoch": 0.28, "grad_norm": 1.251924511467942, "learning_rate": 1.694334965012499e-05, "loss": 0.6105, "step": 3584 }, { "epoch": 0.28, "grad_norm": 1.2661377139075032, "learning_rate": 1.6941541182268015e-05, "loss": 0.5938, "step": 3585 }, { "epoch": 0.28, "grad_norm": 1.2739834697465422, "learning_rate": 1.6939732276155733e-05, "loss": 0.5946, "step": 3586 }, { "epoch": 0.28, "grad_norm": 1.1314964278936366, "learning_rate": 1.6937922931902348e-05, "loss": 0.5699, "step": 3587 }, { "epoch": 0.28, "grad_norm": 1.1397968578040283, "learning_rate": 1.6936113149622093e-05, "loss": 0.5665, "step": 3588 }, { "epoch": 0.28, "grad_norm": 1.1232114986339181, "learning_rate": 1.6934302929429226e-05, "loss": 0.5357, "step": 3589 }, { "epoch": 0.28, "grad_norm": 1.2606855478720311, "learning_rate": 1.6932492271438046e-05, "loss": 0.6386, "step": 3590 }, { "epoch": 0.28, "grad_norm": 1.2709180094957568, "learning_rate": 1.6930681175762855e-05, "loss": 0.6314, "step": 3591 }, { "epoch": 0.28, "grad_norm": 1.1488307876322585, "learning_rate": 1.692886964251801e-05, "loss": 0.5412, "step": 3592 }, { "epoch": 0.28, "grad_norm": 1.1900822020436885, "learning_rate": 1.6927057671817872e-05, "loss": 0.5871, "step": 3593 }, { "epoch": 0.28, "grad_norm": 1.1997646120790852, "learning_rate": 1.6925245263776842e-05, "loss": 0.6134, "step": 3594 }, { "epoch": 0.28, "grad_norm": 1.15087450404533, "learning_rate": 1.6923432418509356e-05, "loss": 0.6305, "step": 3595 }, { "epoch": 0.28, "grad_norm": 1.150878543716134, "learning_rate": 1.6921619136129856e-05, "loss": 0.6098, "step": 3596 }, { "epoch": 0.28, "grad_norm": 1.2434921131474457, "learning_rate": 1.691980541675283e-05, "loss": 0.6404, "step": 3597 }, { "epoch": 0.28, "grad_norm": 1.3097038456982897, "learning_rate": 1.6917991260492787e-05, "loss": 0.611, "step": 3598 }, { "epoch": 0.28, "grad_norm": 1.229576829052618, "learning_rate": 1.691617666746426e-05, "loss": 0.5951, "step": 3599 }, { "epoch": 0.28, "grad_norm": 1.2509677478679255, "learning_rate": 1.691436163778182e-05, "loss": 0.6254, "step": 3600 }, { "epoch": 0.28, "grad_norm": 1.2207993126166072, "learning_rate": 1.691254617156006e-05, "loss": 0.6067, "step": 3601 }, { "epoch": 0.28, "grad_norm": 1.2126412496190742, "learning_rate": 1.6910730268913593e-05, "loss": 0.6069, "step": 3602 }, { "epoch": 0.28, "grad_norm": 1.1482466454767337, "learning_rate": 1.690891392995707e-05, "loss": 0.5591, "step": 3603 }, { "epoch": 0.28, "grad_norm": 1.2779331072552105, "learning_rate": 1.6907097154805162e-05, "loss": 0.6097, "step": 3604 }, { "epoch": 0.28, "grad_norm": 1.1215292054998223, "learning_rate": 1.690527994357258e-05, "loss": 0.5335, "step": 3605 }, { "epoch": 0.28, "grad_norm": 1.2097671267358798, "learning_rate": 1.6903462296374048e-05, "loss": 0.5955, "step": 3606 }, { "epoch": 0.28, "grad_norm": 1.265338205880517, "learning_rate": 1.690164421332432e-05, "loss": 0.6472, "step": 3607 }, { "epoch": 0.28, "grad_norm": 1.1631976505570645, "learning_rate": 1.689982569453819e-05, "loss": 0.6159, "step": 3608 }, { "epoch": 0.28, "grad_norm": 1.139486554701297, "learning_rate": 1.689800674013046e-05, "loss": 0.5496, "step": 3609 }, { "epoch": 0.28, "grad_norm": 1.2219905808362046, "learning_rate": 1.6896187350215977e-05, "loss": 0.5765, "step": 3610 }, { "epoch": 0.28, "grad_norm": 1.128260021636023, "learning_rate": 1.689436752490961e-05, "loss": 0.5796, "step": 3611 }, { "epoch": 0.28, "grad_norm": 1.1476668282703644, "learning_rate": 1.689254726432625e-05, "loss": 0.5473, "step": 3612 }, { "epoch": 0.28, "grad_norm": 1.2214486492947931, "learning_rate": 1.689072656858082e-05, "loss": 0.5615, "step": 3613 }, { "epoch": 0.28, "grad_norm": 1.2743865145340358, "learning_rate": 1.6888905437788268e-05, "loss": 0.6311, "step": 3614 }, { "epoch": 0.28, "grad_norm": 1.1860746060654277, "learning_rate": 1.6887083872063574e-05, "loss": 0.5759, "step": 3615 }, { "epoch": 0.28, "grad_norm": 1.2199671120979143, "learning_rate": 1.6885261871521746e-05, "loss": 0.6144, "step": 3616 }, { "epoch": 0.28, "grad_norm": 1.1535508380066237, "learning_rate": 1.688343943627781e-05, "loss": 0.5686, "step": 3617 }, { "epoch": 0.28, "grad_norm": 1.1471007995347282, "learning_rate": 1.6881616566446827e-05, "loss": 0.5784, "step": 3618 }, { "epoch": 0.28, "grad_norm": 1.2844511376156666, "learning_rate": 1.6879793262143888e-05, "loss": 0.6096, "step": 3619 }, { "epoch": 0.28, "grad_norm": 1.2784397661016051, "learning_rate": 1.6877969523484107e-05, "loss": 0.6085, "step": 3620 }, { "epoch": 0.28, "grad_norm": 1.2485378774076965, "learning_rate": 1.6876145350582623e-05, "loss": 0.6199, "step": 3621 }, { "epoch": 0.28, "grad_norm": 1.4544783821951675, "learning_rate": 1.6874320743554605e-05, "loss": 0.6546, "step": 3622 }, { "epoch": 0.28, "grad_norm": 1.262795192842514, "learning_rate": 1.6872495702515253e-05, "loss": 0.6071, "step": 3623 }, { "epoch": 0.28, "grad_norm": 1.1761441098587606, "learning_rate": 1.6870670227579788e-05, "loss": 0.6048, "step": 3624 }, { "epoch": 0.28, "grad_norm": 1.1147398912989954, "learning_rate": 1.6868844318863466e-05, "loss": 0.5831, "step": 3625 }, { "epoch": 0.28, "grad_norm": 1.2985301199332893, "learning_rate": 1.6867017976481563e-05, "loss": 0.6552, "step": 3626 }, { "epoch": 0.28, "grad_norm": 1.1591833897108108, "learning_rate": 1.6865191200549387e-05, "loss": 0.619, "step": 3627 }, { "epoch": 0.28, "grad_norm": 1.3361089049911703, "learning_rate": 1.686336399118227e-05, "loss": 0.6389, "step": 3628 }, { "epoch": 0.28, "grad_norm": 1.1874962856836957, "learning_rate": 1.686153634849557e-05, "loss": 0.5827, "step": 3629 }, { "epoch": 0.28, "grad_norm": 1.1986359413308774, "learning_rate": 1.6859708272604685e-05, "loss": 0.5449, "step": 3630 }, { "epoch": 0.28, "grad_norm": 1.1426136500386732, "learning_rate": 1.6857879763625023e-05, "loss": 0.5697, "step": 3631 }, { "epoch": 0.28, "grad_norm": 1.2424993543412126, "learning_rate": 1.6856050821672028e-05, "loss": 0.6041, "step": 3632 }, { "epoch": 0.28, "grad_norm": 1.279995187511934, "learning_rate": 1.6854221446861175e-05, "loss": 0.6209, "step": 3633 }, { "epoch": 0.28, "grad_norm": 1.2880656694114747, "learning_rate": 1.6852391639307956e-05, "loss": 0.5896, "step": 3634 }, { "epoch": 0.28, "grad_norm": 1.1408451860665263, "learning_rate": 1.6850561399127902e-05, "loss": 0.5795, "step": 3635 }, { "epoch": 0.28, "grad_norm": 1.2170524512454841, "learning_rate": 1.6848730726436562e-05, "loss": 0.6073, "step": 3636 }, { "epoch": 0.28, "grad_norm": 1.0355150860397122, "learning_rate": 1.6846899621349516e-05, "loss": 0.5646, "step": 3637 }, { "epoch": 0.28, "grad_norm": 1.245254042341054, "learning_rate": 1.6845068083982373e-05, "loss": 0.6033, "step": 3638 }, { "epoch": 0.28, "grad_norm": 1.0928416158426182, "learning_rate": 1.684323611445076e-05, "loss": 0.5769, "step": 3639 }, { "epoch": 0.28, "grad_norm": 1.1603488954835428, "learning_rate": 1.684140371287035e-05, "loss": 0.5192, "step": 3640 }, { "epoch": 0.28, "grad_norm": 1.26295590053536, "learning_rate": 1.6839570879356827e-05, "loss": 0.6128, "step": 3641 }, { "epoch": 0.28, "grad_norm": 1.0958204019651383, "learning_rate": 1.6837737614025904e-05, "loss": 0.5739, "step": 3642 }, { "epoch": 0.28, "grad_norm": 1.138981929069691, "learning_rate": 1.683590391699333e-05, "loss": 0.586, "step": 3643 }, { "epoch": 0.28, "grad_norm": 1.2919229386528515, "learning_rate": 1.683406978837487e-05, "loss": 0.5916, "step": 3644 }, { "epoch": 0.28, "grad_norm": 1.2962799085736711, "learning_rate": 1.683223522828633e-05, "loss": 0.6505, "step": 3645 }, { "epoch": 0.28, "grad_norm": 1.0693247337276943, "learning_rate": 1.6830400236843525e-05, "loss": 0.5842, "step": 3646 }, { "epoch": 0.28, "grad_norm": 1.2289810148220723, "learning_rate": 1.6828564814162318e-05, "loss": 0.5976, "step": 3647 }, { "epoch": 0.28, "grad_norm": 1.1618429439528235, "learning_rate": 1.682672896035858e-05, "loss": 0.5479, "step": 3648 }, { "epoch": 0.28, "grad_norm": 1.1851398705775793, "learning_rate": 1.682489267554822e-05, "loss": 0.6044, "step": 3649 }, { "epoch": 0.28, "grad_norm": 1.190137994843335, "learning_rate": 1.6823055959847177e-05, "loss": 0.5815, "step": 3650 }, { "epoch": 0.28, "grad_norm": 1.242280968562384, "learning_rate": 1.6821218813371407e-05, "loss": 0.5952, "step": 3651 }, { "epoch": 0.28, "grad_norm": 1.2239719417167219, "learning_rate": 1.68193812362369e-05, "loss": 0.6235, "step": 3652 }, { "epoch": 0.28, "grad_norm": 1.249072780037141, "learning_rate": 1.6817543228559675e-05, "loss": 0.6111, "step": 3653 }, { "epoch": 0.28, "grad_norm": 1.255633344228541, "learning_rate": 1.6815704790455768e-05, "loss": 0.5582, "step": 3654 }, { "epoch": 0.28, "grad_norm": 1.1690479510762544, "learning_rate": 1.6813865922041258e-05, "loss": 0.6173, "step": 3655 }, { "epoch": 0.28, "grad_norm": 1.1983124231274709, "learning_rate": 1.6812026623432233e-05, "loss": 0.5681, "step": 3656 }, { "epoch": 0.28, "grad_norm": 1.1530439932470744, "learning_rate": 1.6810186894744825e-05, "loss": 0.5886, "step": 3657 }, { "epoch": 0.28, "grad_norm": 1.1777359438960062, "learning_rate": 1.680834673609518e-05, "loss": 0.6041, "step": 3658 }, { "epoch": 0.28, "grad_norm": 1.2249689137154018, "learning_rate": 1.680650614759948e-05, "loss": 0.6975, "step": 3659 }, { "epoch": 0.28, "grad_norm": 1.2726232815963103, "learning_rate": 1.6804665129373928e-05, "loss": 0.583, "step": 3660 }, { "epoch": 0.28, "grad_norm": 1.1454927747541033, "learning_rate": 1.6802823681534765e-05, "loss": 0.5201, "step": 3661 }, { "epoch": 0.28, "grad_norm": 1.1847175324260097, "learning_rate": 1.680098180419824e-05, "loss": 0.587, "step": 3662 }, { "epoch": 0.28, "grad_norm": 1.212679489850876, "learning_rate": 1.6799139497480644e-05, "loss": 0.6453, "step": 3663 }, { "epoch": 0.28, "grad_norm": 1.1576328256579598, "learning_rate": 1.6797296761498295e-05, "loss": 0.5263, "step": 3664 }, { "epoch": 0.28, "grad_norm": 1.2011842804555024, "learning_rate": 1.6795453596367533e-05, "loss": 0.6175, "step": 3665 }, { "epoch": 0.28, "grad_norm": 1.2626533471824568, "learning_rate": 1.6793610002204724e-05, "loss": 0.6154, "step": 3666 }, { "epoch": 0.28, "grad_norm": 1.2283046037807088, "learning_rate": 1.6791765979126267e-05, "loss": 0.6373, "step": 3667 }, { "epoch": 0.28, "grad_norm": 1.1343824885517877, "learning_rate": 1.6789921527248578e-05, "loss": 0.5828, "step": 3668 }, { "epoch": 0.28, "grad_norm": 1.1749861046801389, "learning_rate": 1.6788076646688117e-05, "loss": 0.6208, "step": 3669 }, { "epoch": 0.28, "grad_norm": 1.1817832167161415, "learning_rate": 1.6786231337561352e-05, "loss": 0.5823, "step": 3670 }, { "epoch": 0.28, "grad_norm": 1.2651603222452013, "learning_rate": 1.6784385599984794e-05, "loss": 0.6719, "step": 3671 }, { "epoch": 0.28, "grad_norm": 1.241392012863191, "learning_rate": 1.678253943407497e-05, "loss": 0.566, "step": 3672 }, { "epoch": 0.28, "grad_norm": 1.2266400063682432, "learning_rate": 1.6780692839948433e-05, "loss": 0.6127, "step": 3673 }, { "epoch": 0.29, "grad_norm": 1.2001393912737548, "learning_rate": 1.6778845817721778e-05, "loss": 0.5785, "step": 3674 }, { "epoch": 0.29, "grad_norm": 1.2902381391063253, "learning_rate": 1.677699836751161e-05, "loss": 0.6447, "step": 3675 }, { "epoch": 0.29, "grad_norm": 1.23759219856898, "learning_rate": 1.677515048943457e-05, "loss": 0.5944, "step": 3676 }, { "epoch": 0.29, "grad_norm": 1.2434291751695066, "learning_rate": 1.6773302183607327e-05, "loss": 0.6044, "step": 3677 }, { "epoch": 0.29, "grad_norm": 1.1192022443121712, "learning_rate": 1.6771453450146568e-05, "loss": 0.58, "step": 3678 }, { "epoch": 0.29, "grad_norm": 1.1291153657996924, "learning_rate": 1.676960428916902e-05, "loss": 0.5502, "step": 3679 }, { "epoch": 0.29, "grad_norm": 1.1483491617079862, "learning_rate": 1.6767754700791425e-05, "loss": 0.5985, "step": 3680 }, { "epoch": 0.29, "grad_norm": 1.3670887938699179, "learning_rate": 1.676590468513056e-05, "loss": 0.7028, "step": 3681 }, { "epoch": 0.29, "grad_norm": 1.197612270436084, "learning_rate": 1.6764054242303223e-05, "loss": 0.5851, "step": 3682 }, { "epoch": 0.29, "grad_norm": 1.1398866432282182, "learning_rate": 1.6762203372426243e-05, "loss": 0.5857, "step": 3683 }, { "epoch": 0.29, "grad_norm": 1.248080496417242, "learning_rate": 1.6760352075616476e-05, "loss": 0.5475, "step": 3684 }, { "epoch": 0.29, "grad_norm": 1.2040667625528025, "learning_rate": 1.6758500351990808e-05, "loss": 0.6096, "step": 3685 }, { "epoch": 0.29, "grad_norm": 1.2591083083313643, "learning_rate": 1.675664820166614e-05, "loss": 0.6257, "step": 3686 }, { "epoch": 0.29, "grad_norm": 1.265467410225235, "learning_rate": 1.6754795624759414e-05, "loss": 0.5845, "step": 3687 }, { "epoch": 0.29, "grad_norm": 1.1726670195449058, "learning_rate": 1.6752942621387583e-05, "loss": 0.605, "step": 3688 }, { "epoch": 0.29, "grad_norm": 1.303488357345625, "learning_rate": 1.6751089191667648e-05, "loss": 0.5594, "step": 3689 }, { "epoch": 0.29, "grad_norm": 1.0873268625400927, "learning_rate": 1.6749235335716624e-05, "loss": 0.6074, "step": 3690 }, { "epoch": 0.29, "grad_norm": 1.2006165748312496, "learning_rate": 1.674738105365155e-05, "loss": 0.5984, "step": 3691 }, { "epoch": 0.29, "grad_norm": 1.1164834531727266, "learning_rate": 1.67455263455895e-05, "loss": 0.593, "step": 3692 }, { "epoch": 0.29, "grad_norm": 1.1351784564141443, "learning_rate": 1.6743671211647564e-05, "loss": 0.5658, "step": 3693 }, { "epoch": 0.29, "grad_norm": 1.1342483891220712, "learning_rate": 1.6741815651942873e-05, "loss": 0.5802, "step": 3694 }, { "epoch": 0.29, "grad_norm": 1.1397256833189546, "learning_rate": 1.673995966659258e-05, "loss": 0.5906, "step": 3695 }, { "epoch": 0.29, "grad_norm": 1.3077400725258972, "learning_rate": 1.673810325571386e-05, "loss": 0.6365, "step": 3696 }, { "epoch": 0.29, "grad_norm": 1.2617434094738549, "learning_rate": 1.6736246419423915e-05, "loss": 0.661, "step": 3697 }, { "epoch": 0.29, "grad_norm": 1.0985808364441236, "learning_rate": 1.6734389157839975e-05, "loss": 0.5632, "step": 3698 }, { "epoch": 0.29, "grad_norm": 1.2235864385416373, "learning_rate": 1.673253147107931e-05, "loss": 0.6572, "step": 3699 }, { "epoch": 0.29, "grad_norm": 1.2304493251659951, "learning_rate": 1.6730673359259194e-05, "loss": 0.618, "step": 3700 }, { "epoch": 0.29, "grad_norm": 1.1962495010996768, "learning_rate": 1.6728814822496944e-05, "loss": 0.5747, "step": 3701 }, { "epoch": 0.29, "grad_norm": 1.2330223108022649, "learning_rate": 1.6726955860909903e-05, "loss": 0.5644, "step": 3702 }, { "epoch": 0.29, "grad_norm": 1.186116717923621, "learning_rate": 1.6725096474615423e-05, "loss": 0.5529, "step": 3703 }, { "epoch": 0.29, "grad_norm": 1.1317288707684572, "learning_rate": 1.6723236663730912e-05, "loss": 0.5652, "step": 3704 }, { "epoch": 0.29, "grad_norm": 1.1843244858892736, "learning_rate": 1.672137642837378e-05, "loss": 0.6114, "step": 3705 }, { "epoch": 0.29, "grad_norm": 1.2262129558913495, "learning_rate": 1.6719515768661477e-05, "loss": 0.6509, "step": 3706 }, { "epoch": 0.29, "grad_norm": 1.2056599948010467, "learning_rate": 1.6717654684711475e-05, "loss": 0.6074, "step": 3707 }, { "epoch": 0.29, "grad_norm": 1.1820732396667126, "learning_rate": 1.6715793176641275e-05, "loss": 0.6061, "step": 3708 }, { "epoch": 0.29, "grad_norm": 1.096374034324032, "learning_rate": 1.67139312445684e-05, "loss": 0.5708, "step": 3709 }, { "epoch": 0.29, "grad_norm": 1.0450719422577202, "learning_rate": 1.671206888861041e-05, "loss": 0.5936, "step": 3710 }, { "epoch": 0.29, "grad_norm": 1.2613089164530626, "learning_rate": 1.6710206108884884e-05, "loss": 0.5955, "step": 3711 }, { "epoch": 0.29, "grad_norm": 1.2456495874622497, "learning_rate": 1.6708342905509424e-05, "loss": 0.6818, "step": 3712 }, { "epoch": 0.29, "grad_norm": 1.2108561826905424, "learning_rate": 1.670647927860166e-05, "loss": 0.5932, "step": 3713 }, { "epoch": 0.29, "grad_norm": 1.344744558220863, "learning_rate": 1.6704615228279262e-05, "loss": 0.6318, "step": 3714 }, { "epoch": 0.29, "grad_norm": 1.22814940799599, "learning_rate": 1.6702750754659917e-05, "loss": 0.6323, "step": 3715 }, { "epoch": 0.29, "grad_norm": 1.4351414114362013, "learning_rate": 1.6700885857861332e-05, "loss": 0.6453, "step": 3716 }, { "epoch": 0.29, "grad_norm": 1.204848899864932, "learning_rate": 1.6699020538001252e-05, "loss": 0.533, "step": 3717 }, { "epoch": 0.29, "grad_norm": 1.3624670444667628, "learning_rate": 1.6697154795197442e-05, "loss": 0.6595, "step": 3718 }, { "epoch": 0.29, "grad_norm": 1.2947579534698466, "learning_rate": 1.6695288629567694e-05, "loss": 0.5823, "step": 3719 }, { "epoch": 0.29, "grad_norm": 1.1919116839808113, "learning_rate": 1.6693422041229836e-05, "loss": 0.6098, "step": 3720 }, { "epoch": 0.29, "grad_norm": 1.2553045729257943, "learning_rate": 1.669155503030171e-05, "loss": 0.5954, "step": 3721 }, { "epoch": 0.29, "grad_norm": 1.177709576043585, "learning_rate": 1.6689687596901192e-05, "loss": 0.5744, "step": 3722 }, { "epoch": 0.29, "grad_norm": 1.2484855537571784, "learning_rate": 1.668781974114618e-05, "loss": 0.6338, "step": 3723 }, { "epoch": 0.29, "grad_norm": 1.1868125029667411, "learning_rate": 1.6685951463154602e-05, "loss": 0.5801, "step": 3724 }, { "epoch": 0.29, "grad_norm": 1.2722151336629768, "learning_rate": 1.6684082763044415e-05, "loss": 0.6033, "step": 3725 }, { "epoch": 0.29, "grad_norm": 1.2374335434919408, "learning_rate": 1.66822136409336e-05, "loss": 0.6073, "step": 3726 }, { "epoch": 0.29, "grad_norm": 1.354431855082625, "learning_rate": 1.6680344096940157e-05, "loss": 0.6317, "step": 3727 }, { "epoch": 0.29, "grad_norm": 1.164652476478852, "learning_rate": 1.667847413118213e-05, "loss": 0.5498, "step": 3728 }, { "epoch": 0.29, "grad_norm": 1.0840897792398052, "learning_rate": 1.667660374377757e-05, "loss": 0.5425, "step": 3729 }, { "epoch": 0.29, "grad_norm": 1.2490513539717667, "learning_rate": 1.6674732934844574e-05, "loss": 0.6148, "step": 3730 }, { "epoch": 0.29, "grad_norm": 1.1622798506131633, "learning_rate": 1.6672861704501247e-05, "loss": 0.5757, "step": 3731 }, { "epoch": 0.29, "grad_norm": 1.2165724055908238, "learning_rate": 1.6670990052865738e-05, "loss": 0.5865, "step": 3732 }, { "epoch": 0.29, "grad_norm": 1.2117058070147622, "learning_rate": 1.66691179800562e-05, "loss": 0.5735, "step": 3733 }, { "epoch": 0.29, "grad_norm": 1.1343691423753561, "learning_rate": 1.6667245486190845e-05, "loss": 0.574, "step": 3734 }, { "epoch": 0.29, "grad_norm": 1.2180225940325038, "learning_rate": 1.6665372571387882e-05, "loss": 0.5949, "step": 3735 }, { "epoch": 0.29, "grad_norm": 1.3253238084378547, "learning_rate": 1.6663499235765557e-05, "loss": 0.6714, "step": 3736 }, { "epoch": 0.29, "grad_norm": 1.2749582583008534, "learning_rate": 1.6661625479442147e-05, "loss": 0.5866, "step": 3737 }, { "epoch": 0.29, "grad_norm": 1.2200862210714658, "learning_rate": 1.6659751302535952e-05, "loss": 0.6255, "step": 3738 }, { "epoch": 0.29, "grad_norm": 1.097326045751031, "learning_rate": 1.6657876705165296e-05, "loss": 0.5646, "step": 3739 }, { "epoch": 0.29, "grad_norm": 1.1692795558454008, "learning_rate": 1.6656001687448532e-05, "loss": 0.5607, "step": 3740 }, { "epoch": 0.29, "grad_norm": 1.1575621299465007, "learning_rate": 1.6654126249504042e-05, "loss": 0.6271, "step": 3741 }, { "epoch": 0.29, "grad_norm": 1.203734998758077, "learning_rate": 1.665225039145024e-05, "loss": 0.6097, "step": 3742 }, { "epoch": 0.29, "grad_norm": 1.2704827601596627, "learning_rate": 1.6650374113405536e-05, "loss": 0.6201, "step": 3743 }, { "epoch": 0.29, "grad_norm": 1.2017453693184021, "learning_rate": 1.664849741548841e-05, "loss": 0.5835, "step": 3744 }, { "epoch": 0.29, "grad_norm": 1.176355266178128, "learning_rate": 1.664662029781734e-05, "loss": 0.5748, "step": 3745 }, { "epoch": 0.29, "grad_norm": 1.287058573112449, "learning_rate": 1.6644742760510837e-05, "loss": 0.6824, "step": 3746 }, { "epoch": 0.29, "grad_norm": 1.13325965211748, "learning_rate": 1.6642864803687443e-05, "loss": 0.5917, "step": 3747 }, { "epoch": 0.29, "grad_norm": 1.1196763385852007, "learning_rate": 1.664098642746572e-05, "loss": 0.5421, "step": 3748 }, { "epoch": 0.29, "grad_norm": 1.1415380194268863, "learning_rate": 1.663910763196426e-05, "loss": 0.6158, "step": 3749 }, { "epoch": 0.29, "grad_norm": 1.157947994648923, "learning_rate": 1.663722841730168e-05, "loss": 0.5582, "step": 3750 }, { "epoch": 0.29, "grad_norm": 1.1579000710177414, "learning_rate": 1.663534878359663e-05, "loss": 0.6173, "step": 3751 }, { "epoch": 0.29, "grad_norm": 1.2256714323703495, "learning_rate": 1.6633468730967778e-05, "loss": 0.625, "step": 3752 }, { "epoch": 0.29, "grad_norm": 1.2527372906766623, "learning_rate": 1.663158825953382e-05, "loss": 0.6353, "step": 3753 }, { "epoch": 0.29, "grad_norm": 1.3130623203226444, "learning_rate": 1.662970736941348e-05, "loss": 0.5916, "step": 3754 }, { "epoch": 0.29, "grad_norm": 1.3642249134113675, "learning_rate": 1.662782606072551e-05, "loss": 0.6803, "step": 3755 }, { "epoch": 0.29, "grad_norm": 1.3849915399534007, "learning_rate": 1.6625944333588686e-05, "loss": 0.5918, "step": 3756 }, { "epoch": 0.29, "grad_norm": 1.1508884356706663, "learning_rate": 1.6624062188121808e-05, "loss": 0.5591, "step": 3757 }, { "epoch": 0.29, "grad_norm": 1.1644823482087894, "learning_rate": 1.662217962444371e-05, "loss": 0.5957, "step": 3758 }, { "epoch": 0.29, "grad_norm": 1.2755559606713933, "learning_rate": 1.6620296642673248e-05, "loss": 0.6065, "step": 3759 }, { "epoch": 0.29, "grad_norm": 1.2164102739232026, "learning_rate": 1.6618413242929302e-05, "loss": 0.575, "step": 3760 }, { "epoch": 0.29, "grad_norm": 1.1921120975992539, "learning_rate": 1.661652942533078e-05, "loss": 0.5584, "step": 3761 }, { "epoch": 0.29, "grad_norm": 1.0089075807300032, "learning_rate": 1.661464518999662e-05, "loss": 0.5146, "step": 3762 }, { "epoch": 0.29, "grad_norm": 1.2827660964583378, "learning_rate": 1.6612760537045782e-05, "loss": 0.5962, "step": 3763 }, { "epoch": 0.29, "grad_norm": 1.0936471073254517, "learning_rate": 1.6610875466597252e-05, "loss": 0.5608, "step": 3764 }, { "epoch": 0.29, "grad_norm": 1.3741402539100864, "learning_rate": 1.660898997877005e-05, "loss": 0.6879, "step": 3765 }, { "epoch": 0.29, "grad_norm": 1.0385118904296284, "learning_rate": 1.660710407368321e-05, "loss": 0.5169, "step": 3766 }, { "epoch": 0.29, "grad_norm": 1.1351121908355077, "learning_rate": 1.66052177514558e-05, "loss": 0.5849, "step": 3767 }, { "epoch": 0.29, "grad_norm": 1.081335291502842, "learning_rate": 1.660333101220692e-05, "loss": 0.5491, "step": 3768 }, { "epoch": 0.29, "grad_norm": 1.1485172685181053, "learning_rate": 1.660144385605568e-05, "loss": 0.5717, "step": 3769 }, { "epoch": 0.29, "grad_norm": 1.2980034296581175, "learning_rate": 1.659955628312123e-05, "loss": 0.6213, "step": 3770 }, { "epoch": 0.29, "grad_norm": 1.1887873648337839, "learning_rate": 1.6597668293522745e-05, "loss": 0.6154, "step": 3771 }, { "epoch": 0.29, "grad_norm": 1.1951785043646104, "learning_rate": 1.659577988737942e-05, "loss": 0.6031, "step": 3772 }, { "epoch": 0.29, "grad_norm": 1.1354199622336807, "learning_rate": 1.659389106481048e-05, "loss": 0.5819, "step": 3773 }, { "epoch": 0.29, "grad_norm": 1.2259338128362396, "learning_rate": 1.659200182593518e-05, "loss": 0.5552, "step": 3774 }, { "epoch": 0.29, "grad_norm": 1.1859512267995729, "learning_rate": 1.6590112170872792e-05, "loss": 0.5845, "step": 3775 }, { "epoch": 0.29, "grad_norm": 1.2402792134026883, "learning_rate": 1.6588222099742624e-05, "loss": 0.5888, "step": 3776 }, { "epoch": 0.29, "grad_norm": 1.1730735244797954, "learning_rate": 1.6586331612664005e-05, "loss": 0.5874, "step": 3777 }, { "epoch": 0.29, "grad_norm": 1.2310936688160352, "learning_rate": 1.658444070975629e-05, "loss": 0.6599, "step": 3778 }, { "epoch": 0.29, "grad_norm": 1.2799764677209153, "learning_rate": 1.658254939113886e-05, "loss": 0.6245, "step": 3779 }, { "epoch": 0.29, "grad_norm": 1.0665915192021374, "learning_rate": 1.658065765693112e-05, "loss": 0.5781, "step": 3780 }, { "epoch": 0.29, "grad_norm": 1.1837711516024327, "learning_rate": 1.657876550725252e-05, "loss": 0.6121, "step": 3781 }, { "epoch": 0.29, "grad_norm": 1.2186858820555029, "learning_rate": 1.6576872942222504e-05, "loss": 0.6413, "step": 3782 }, { "epoch": 0.29, "grad_norm": 1.2648061293154886, "learning_rate": 1.6574979961960572e-05, "loss": 0.5985, "step": 3783 }, { "epoch": 0.29, "grad_norm": 1.221544143878446, "learning_rate": 1.657308656658623e-05, "loss": 0.6057, "step": 3784 }, { "epoch": 0.29, "grad_norm": 1.122637758786287, "learning_rate": 1.6571192756219024e-05, "loss": 0.567, "step": 3785 }, { "epoch": 0.29, "grad_norm": 1.2191033462452205, "learning_rate": 1.6569298530978516e-05, "loss": 0.6446, "step": 3786 }, { "epoch": 0.29, "grad_norm": 1.2007567642088104, "learning_rate": 1.6567403890984292e-05, "loss": 0.6187, "step": 3787 }, { "epoch": 0.29, "grad_norm": 1.1891290132263785, "learning_rate": 1.6565508836355983e-05, "loss": 0.5518, "step": 3788 }, { "epoch": 0.29, "grad_norm": 1.25788282114907, "learning_rate": 1.6563613367213225e-05, "loss": 0.5823, "step": 3789 }, { "epoch": 0.29, "grad_norm": 1.2776215044232617, "learning_rate": 1.6561717483675695e-05, "loss": 0.6181, "step": 3790 }, { "epoch": 0.29, "grad_norm": 1.2788741716522585, "learning_rate": 1.6559821185863082e-05, "loss": 0.5985, "step": 3791 }, { "epoch": 0.29, "grad_norm": 1.2411446662456593, "learning_rate": 1.6557924473895115e-05, "loss": 0.6061, "step": 3792 }, { "epoch": 0.29, "grad_norm": 1.2374640815993796, "learning_rate": 1.6556027347891542e-05, "loss": 0.5734, "step": 3793 }, { "epoch": 0.29, "grad_norm": 1.346408963394928, "learning_rate": 1.6554129807972135e-05, "loss": 0.6389, "step": 3794 }, { "epoch": 0.29, "grad_norm": 1.1061710243267628, "learning_rate": 1.6552231854256704e-05, "loss": 0.5716, "step": 3795 }, { "epoch": 0.29, "grad_norm": 1.2882410379753582, "learning_rate": 1.6550333486865068e-05, "loss": 0.6523, "step": 3796 }, { "epoch": 0.29, "grad_norm": 1.2567514244422102, "learning_rate": 1.654843470591708e-05, "loss": 0.6178, "step": 3797 }, { "epoch": 0.29, "grad_norm": 1.1786103035761748, "learning_rate": 1.654653551153263e-05, "loss": 0.5534, "step": 3798 }, { "epoch": 0.29, "grad_norm": 1.1545824965867795, "learning_rate": 1.6544635903831616e-05, "loss": 0.6195, "step": 3799 }, { "epoch": 0.29, "grad_norm": 1.1544990685196097, "learning_rate": 1.6542735882933967e-05, "loss": 0.612, "step": 3800 }, { "epoch": 0.29, "grad_norm": 1.218692876015038, "learning_rate": 1.6540835448959648e-05, "loss": 0.559, "step": 3801 }, { "epoch": 0.29, "grad_norm": 1.2635524404411844, "learning_rate": 1.653893460202864e-05, "loss": 0.6071, "step": 3802 }, { "epoch": 0.3, "grad_norm": 1.2577346398559268, "learning_rate": 1.6537033342260957e-05, "loss": 0.6566, "step": 3803 }, { "epoch": 0.3, "grad_norm": 1.1768399664981797, "learning_rate": 1.653513166977663e-05, "loss": 0.596, "step": 3804 }, { "epoch": 0.3, "grad_norm": 1.1698678693581561, "learning_rate": 1.6533229584695726e-05, "loss": 0.5922, "step": 3805 }, { "epoch": 0.3, "grad_norm": 1.2614141515821948, "learning_rate": 1.653132708713833e-05, "loss": 0.6109, "step": 3806 }, { "epoch": 0.3, "grad_norm": 1.3486700253114035, "learning_rate": 1.6529424177224558e-05, "loss": 0.6106, "step": 3807 }, { "epoch": 0.3, "grad_norm": 1.3491381472580792, "learning_rate": 1.652752085507455e-05, "loss": 0.6157, "step": 3808 }, { "epoch": 0.3, "grad_norm": 1.1173841930181507, "learning_rate": 1.6525617120808474e-05, "loss": 0.5551, "step": 3809 }, { "epoch": 0.3, "grad_norm": 1.181219205505385, "learning_rate": 1.6523712974546522e-05, "loss": 0.6224, "step": 3810 }, { "epoch": 0.3, "grad_norm": 1.0870230209699292, "learning_rate": 1.652180841640891e-05, "loss": 0.5827, "step": 3811 }, { "epoch": 0.3, "grad_norm": 1.1779341646848465, "learning_rate": 1.6519903446515884e-05, "loss": 0.6034, "step": 3812 }, { "epoch": 0.3, "grad_norm": 1.2670920076149634, "learning_rate": 1.6517998064987713e-05, "loss": 0.5926, "step": 3813 }, { "epoch": 0.3, "grad_norm": 1.1594448284497245, "learning_rate": 1.6516092271944702e-05, "loss": 0.5928, "step": 3814 }, { "epoch": 0.3, "grad_norm": 1.2274106731659973, "learning_rate": 1.6514186067507164e-05, "loss": 0.6147, "step": 3815 }, { "epoch": 0.3, "grad_norm": 1.307674164559868, "learning_rate": 1.651227945179545e-05, "loss": 0.6301, "step": 3816 }, { "epoch": 0.3, "grad_norm": 1.0382097803781347, "learning_rate": 1.6510372424929938e-05, "loss": 0.5734, "step": 3817 }, { "epoch": 0.3, "grad_norm": 1.2147871292510144, "learning_rate": 1.650846498703102e-05, "loss": 0.6246, "step": 3818 }, { "epoch": 0.3, "grad_norm": 1.1487076110822814, "learning_rate": 1.6506557138219136e-05, "loss": 0.5224, "step": 3819 }, { "epoch": 0.3, "grad_norm": 1.3060120123943155, "learning_rate": 1.6504648878614726e-05, "loss": 0.5835, "step": 3820 }, { "epoch": 0.3, "grad_norm": 1.0709540678801464, "learning_rate": 1.6502740208338273e-05, "loss": 0.5895, "step": 3821 }, { "epoch": 0.3, "grad_norm": 1.2362763934890642, "learning_rate": 1.650083112751028e-05, "loss": 0.6449, "step": 3822 }, { "epoch": 0.3, "grad_norm": 1.1390627783839091, "learning_rate": 1.6498921636251278e-05, "loss": 0.5832, "step": 3823 }, { "epoch": 0.3, "grad_norm": 1.10270155865618, "learning_rate": 1.6497011734681824e-05, "loss": 0.5772, "step": 3824 }, { "epoch": 0.3, "grad_norm": 1.3061348202679655, "learning_rate": 1.64951014229225e-05, "loss": 0.6041, "step": 3825 }, { "epoch": 0.3, "grad_norm": 1.150218024525958, "learning_rate": 1.6493190701093913e-05, "loss": 0.5507, "step": 3826 }, { "epoch": 0.3, "grad_norm": 1.1654001525338058, "learning_rate": 1.6491279569316697e-05, "loss": 0.5608, "step": 3827 }, { "epoch": 0.3, "grad_norm": 1.2742798248566691, "learning_rate": 1.6489368027711507e-05, "loss": 0.5654, "step": 3828 }, { "epoch": 0.3, "grad_norm": 1.2060025964522365, "learning_rate": 1.648745607639904e-05, "loss": 0.6007, "step": 3829 }, { "epoch": 0.3, "grad_norm": 1.2790790867516384, "learning_rate": 1.6485543715499994e-05, "loss": 0.6137, "step": 3830 }, { "epoch": 0.3, "grad_norm": 1.211047505950281, "learning_rate": 1.6483630945135112e-05, "loss": 0.632, "step": 3831 }, { "epoch": 0.3, "grad_norm": 1.1557007077692802, "learning_rate": 1.648171776542516e-05, "loss": 0.5673, "step": 3832 }, { "epoch": 0.3, "grad_norm": 1.1948993442552436, "learning_rate": 1.647980417649092e-05, "loss": 0.6027, "step": 3833 }, { "epoch": 0.3, "grad_norm": 1.250139705480305, "learning_rate": 1.6477890178453216e-05, "loss": 0.5999, "step": 3834 }, { "epoch": 0.3, "grad_norm": 1.0587319146675518, "learning_rate": 1.6475975771432883e-05, "loss": 0.5594, "step": 3835 }, { "epoch": 0.3, "grad_norm": 1.1433898129336177, "learning_rate": 1.6474060955550783e-05, "loss": 0.5648, "step": 3836 }, { "epoch": 0.3, "grad_norm": 1.1182726411699961, "learning_rate": 1.647214573092782e-05, "loss": 0.5938, "step": 3837 }, { "epoch": 0.3, "grad_norm": 1.2268435891124432, "learning_rate": 1.64702300976849e-05, "loss": 0.656, "step": 3838 }, { "epoch": 0.3, "grad_norm": 1.1606531074666053, "learning_rate": 1.6468314055942977e-05, "loss": 0.5799, "step": 3839 }, { "epoch": 0.3, "grad_norm": 1.1225013535795143, "learning_rate": 1.646639760582301e-05, "loss": 0.6052, "step": 3840 }, { "epoch": 0.3, "grad_norm": 1.3084316324048235, "learning_rate": 1.6464480747446e-05, "loss": 0.6426, "step": 3841 }, { "epoch": 0.3, "grad_norm": 1.2054317209090708, "learning_rate": 1.646256348093297e-05, "loss": 0.5734, "step": 3842 }, { "epoch": 0.3, "grad_norm": 1.2244733768371143, "learning_rate": 1.6460645806404967e-05, "loss": 0.5561, "step": 3843 }, { "epoch": 0.3, "grad_norm": 1.147653221090019, "learning_rate": 1.645872772398306e-05, "loss": 0.6205, "step": 3844 }, { "epoch": 0.3, "grad_norm": 1.1980938932373515, "learning_rate": 1.645680923378835e-05, "loss": 0.5896, "step": 3845 }, { "epoch": 0.3, "grad_norm": 1.0626539231050376, "learning_rate": 1.6454890335941957e-05, "loss": 0.5643, "step": 3846 }, { "epoch": 0.3, "grad_norm": 1.2680301653736579, "learning_rate": 1.645297103056504e-05, "loss": 0.6094, "step": 3847 }, { "epoch": 0.3, "grad_norm": 1.213017946892014, "learning_rate": 1.6451051317778764e-05, "loss": 0.6437, "step": 3848 }, { "epoch": 0.3, "grad_norm": 1.265063502711842, "learning_rate": 1.6449131197704337e-05, "loss": 0.6289, "step": 3849 }, { "epoch": 0.3, "grad_norm": 1.218225366206749, "learning_rate": 1.6447210670462985e-05, "loss": 0.636, "step": 3850 }, { "epoch": 0.3, "grad_norm": 1.20334291342482, "learning_rate": 1.644528973617596e-05, "loss": 0.6317, "step": 3851 }, { "epoch": 0.3, "grad_norm": 1.1452117042892074, "learning_rate": 1.644336839496454e-05, "loss": 0.5835, "step": 3852 }, { "epoch": 0.3, "grad_norm": 1.3163676270385396, "learning_rate": 1.6441446646950027e-05, "loss": 0.6678, "step": 3853 }, { "epoch": 0.3, "grad_norm": 1.1686447891777894, "learning_rate": 1.643952449225376e-05, "loss": 0.5849, "step": 3854 }, { "epoch": 0.3, "grad_norm": 1.1595691261330412, "learning_rate": 1.6437601930997083e-05, "loss": 0.5801, "step": 3855 }, { "epoch": 0.3, "grad_norm": 1.2232351181993006, "learning_rate": 1.643567896330138e-05, "loss": 0.5258, "step": 3856 }, { "epoch": 0.3, "grad_norm": 1.235197300382614, "learning_rate": 1.6433755589288067e-05, "loss": 0.6101, "step": 3857 }, { "epoch": 0.3, "grad_norm": 1.1154672142453195, "learning_rate": 1.643183180907857e-05, "loss": 0.5405, "step": 3858 }, { "epoch": 0.3, "grad_norm": 1.10278393279012, "learning_rate": 1.642990762279434e-05, "loss": 0.5437, "step": 3859 }, { "epoch": 0.3, "grad_norm": 1.223961958643096, "learning_rate": 1.6427983030556872e-05, "loss": 0.6144, "step": 3860 }, { "epoch": 0.3, "grad_norm": 1.3024641268543844, "learning_rate": 1.6426058032487675e-05, "loss": 0.6771, "step": 3861 }, { "epoch": 0.3, "grad_norm": 1.1650502667204066, "learning_rate": 1.6424132628708276e-05, "loss": 0.6146, "step": 3862 }, { "epoch": 0.3, "grad_norm": 1.1345842909251438, "learning_rate": 1.642220681934024e-05, "loss": 0.55, "step": 3863 }, { "epoch": 0.3, "grad_norm": 1.1848294697157156, "learning_rate": 1.6420280604505152e-05, "loss": 0.543, "step": 3864 }, { "epoch": 0.3, "grad_norm": 1.2912866074283447, "learning_rate": 1.6418353984324628e-05, "loss": 0.657, "step": 3865 }, { "epoch": 0.3, "grad_norm": 1.1422981114715618, "learning_rate": 1.6416426958920303e-05, "loss": 0.612, "step": 3866 }, { "epoch": 0.3, "grad_norm": 1.1744473009657208, "learning_rate": 1.6414499528413838e-05, "loss": 0.5618, "step": 3867 }, { "epoch": 0.3, "grad_norm": 1.1696305206713205, "learning_rate": 1.6412571692926924e-05, "loss": 0.5347, "step": 3868 }, { "epoch": 0.3, "grad_norm": 1.2394894262480667, "learning_rate": 1.6410643452581274e-05, "loss": 0.5943, "step": 3869 }, { "epoch": 0.3, "grad_norm": 1.1992516528657549, "learning_rate": 1.640871480749863e-05, "loss": 0.6021, "step": 3870 }, { "epoch": 0.3, "grad_norm": 1.2168749543882105, "learning_rate": 1.6406785757800753e-05, "loss": 0.6033, "step": 3871 }, { "epoch": 0.3, "grad_norm": 1.2110106907537241, "learning_rate": 1.6404856303609435e-05, "loss": 0.6042, "step": 3872 }, { "epoch": 0.3, "grad_norm": 1.0960628034773212, "learning_rate": 1.64029264450465e-05, "loss": 0.5802, "step": 3873 }, { "epoch": 0.3, "grad_norm": 1.1151483238679345, "learning_rate": 1.640099618223378e-05, "loss": 0.601, "step": 3874 }, { "epoch": 0.3, "grad_norm": 1.2044190222668023, "learning_rate": 1.6399065515293145e-05, "loss": 0.6263, "step": 3875 }, { "epoch": 0.3, "grad_norm": 1.1307449414690194, "learning_rate": 1.6397134444346487e-05, "loss": 0.5414, "step": 3876 }, { "epoch": 0.3, "grad_norm": 1.253980640331903, "learning_rate": 1.6395202969515735e-05, "loss": 0.6337, "step": 3877 }, { "epoch": 0.3, "grad_norm": 1.2784498832436724, "learning_rate": 1.639327109092282e-05, "loss": 0.614, "step": 3878 }, { "epoch": 0.3, "grad_norm": 1.3343225723437493, "learning_rate": 1.639133880868971e-05, "loss": 0.6371, "step": 3879 }, { "epoch": 0.3, "grad_norm": 1.1959331123026353, "learning_rate": 1.6389406122938415e-05, "loss": 0.5897, "step": 3880 }, { "epoch": 0.3, "grad_norm": 1.174967385914844, "learning_rate": 1.6387473033790944e-05, "loss": 0.5861, "step": 3881 }, { "epoch": 0.3, "grad_norm": 1.1046235320955742, "learning_rate": 1.6385539541369346e-05, "loss": 0.551, "step": 3882 }, { "epoch": 0.3, "grad_norm": 1.2528323033842708, "learning_rate": 1.6383605645795687e-05, "loss": 0.6035, "step": 3883 }, { "epoch": 0.3, "grad_norm": 1.2644852580836072, "learning_rate": 1.6381671347192073e-05, "loss": 0.5991, "step": 3884 }, { "epoch": 0.3, "grad_norm": 1.2052585955101447, "learning_rate": 1.6379736645680622e-05, "loss": 0.561, "step": 3885 }, { "epoch": 0.3, "grad_norm": 1.3026870650318307, "learning_rate": 1.6377801541383477e-05, "loss": 0.6556, "step": 3886 }, { "epoch": 0.3, "grad_norm": 1.350319042300315, "learning_rate": 1.637586603442282e-05, "loss": 0.6403, "step": 3887 }, { "epoch": 0.3, "grad_norm": 1.1535409688829779, "learning_rate": 1.6373930124920844e-05, "loss": 0.5629, "step": 3888 }, { "epoch": 0.3, "grad_norm": 1.300501337950479, "learning_rate": 1.6371993812999773e-05, "loss": 0.5918, "step": 3889 }, { "epoch": 0.3, "grad_norm": 1.4052594829022873, "learning_rate": 1.637005709878186e-05, "loss": 0.6102, "step": 3890 }, { "epoch": 0.3, "grad_norm": 1.2750333370263558, "learning_rate": 1.6368119982389373e-05, "loss": 0.6711, "step": 3891 }, { "epoch": 0.3, "grad_norm": 1.2093439260822887, "learning_rate": 1.6366182463944618e-05, "loss": 0.6573, "step": 3892 }, { "epoch": 0.3, "grad_norm": 1.2163617625152745, "learning_rate": 1.636424454356992e-05, "loss": 0.5821, "step": 3893 }, { "epoch": 0.3, "grad_norm": 1.1146755120969853, "learning_rate": 1.636230622138763e-05, "loss": 0.6064, "step": 3894 }, { "epoch": 0.3, "grad_norm": 1.179718977148219, "learning_rate": 1.636036749752012e-05, "loss": 0.5514, "step": 3895 }, { "epoch": 0.3, "grad_norm": 1.3208943726949323, "learning_rate": 1.63584283720898e-05, "loss": 0.6838, "step": 3896 }, { "epoch": 0.3, "grad_norm": 1.3103264567866675, "learning_rate": 1.6356488845219086e-05, "loss": 0.5689, "step": 3897 }, { "epoch": 0.3, "grad_norm": 1.3054659228748, "learning_rate": 1.6354548917030437e-05, "loss": 0.5876, "step": 3898 }, { "epoch": 0.3, "grad_norm": 1.1243838106226864, "learning_rate": 1.635260858764633e-05, "loss": 0.5186, "step": 3899 }, { "epoch": 0.3, "grad_norm": 1.2194864542209984, "learning_rate": 1.6350667857189268e-05, "loss": 0.6127, "step": 3900 }, { "epoch": 0.3, "grad_norm": 1.275604744151813, "learning_rate": 1.634872672578178e-05, "loss": 0.6614, "step": 3901 }, { "epoch": 0.3, "grad_norm": 1.179660064176127, "learning_rate": 1.6346785193546418e-05, "loss": 0.5828, "step": 3902 }, { "epoch": 0.3, "grad_norm": 1.1594623584172743, "learning_rate": 1.634484326060576e-05, "loss": 0.5918, "step": 3903 }, { "epoch": 0.3, "grad_norm": 1.2652293399193577, "learning_rate": 1.6342900927082415e-05, "loss": 0.5914, "step": 3904 }, { "epoch": 0.3, "grad_norm": 1.2085145891103632, "learning_rate": 1.634095819309901e-05, "loss": 0.5946, "step": 3905 }, { "epoch": 0.3, "grad_norm": 1.2495752566641438, "learning_rate": 1.6339015058778196e-05, "loss": 0.5987, "step": 3906 }, { "epoch": 0.3, "grad_norm": 1.3488035321859129, "learning_rate": 1.6337071524242657e-05, "loss": 0.6658, "step": 3907 }, { "epoch": 0.3, "grad_norm": 1.0699372016446838, "learning_rate": 1.6335127589615097e-05, "loss": 0.5674, "step": 3908 }, { "epoch": 0.3, "grad_norm": 1.1791559499696191, "learning_rate": 1.6333183255018247e-05, "loss": 0.5647, "step": 3909 }, { "epoch": 0.3, "grad_norm": 1.2328335275536846, "learning_rate": 1.6331238520574866e-05, "loss": 0.6384, "step": 3910 }, { "epoch": 0.3, "grad_norm": 1.1918939311499923, "learning_rate": 1.632929338640773e-05, "loss": 0.6079, "step": 3911 }, { "epoch": 0.3, "grad_norm": 1.3075585213250527, "learning_rate": 1.632734785263965e-05, "loss": 0.6057, "step": 3912 }, { "epoch": 0.3, "grad_norm": 1.167633161527098, "learning_rate": 1.6325401919393455e-05, "loss": 0.5506, "step": 3913 }, { "epoch": 0.3, "grad_norm": 1.248550910238651, "learning_rate": 1.6323455586792e-05, "loss": 0.6152, "step": 3914 }, { "epoch": 0.3, "grad_norm": 1.0655156143741384, "learning_rate": 1.632150885495817e-05, "loss": 0.5355, "step": 3915 }, { "epoch": 0.3, "grad_norm": 1.1881358803256417, "learning_rate": 1.631956172401487e-05, "loss": 0.6435, "step": 3916 }, { "epoch": 0.3, "grad_norm": 1.2162741430695418, "learning_rate": 1.6317614194085033e-05, "loss": 0.5917, "step": 3917 }, { "epoch": 0.3, "grad_norm": 1.5600653945225398, "learning_rate": 1.631566626529162e-05, "loss": 0.5381, "step": 3918 }, { "epoch": 0.3, "grad_norm": 1.076173093487003, "learning_rate": 1.6313717937757612e-05, "loss": 0.5388, "step": 3919 }, { "epoch": 0.3, "grad_norm": 1.1156762845943378, "learning_rate": 1.6311769211606012e-05, "loss": 0.5769, "step": 3920 }, { "epoch": 0.3, "grad_norm": 1.27629685737159, "learning_rate": 1.630982008695986e-05, "loss": 0.6195, "step": 3921 }, { "epoch": 0.3, "grad_norm": 1.2025496853657713, "learning_rate": 1.6307870563942212e-05, "loss": 0.5878, "step": 3922 }, { "epoch": 0.3, "grad_norm": 1.2169623346635952, "learning_rate": 1.630592064267615e-05, "loss": 0.5994, "step": 3923 }, { "epoch": 0.3, "grad_norm": 1.2369774533979363, "learning_rate": 1.6303970323284784e-05, "loss": 0.633, "step": 3924 }, { "epoch": 0.3, "grad_norm": 1.119677669429823, "learning_rate": 1.630201960589125e-05, "loss": 0.5875, "step": 3925 }, { "epoch": 0.3, "grad_norm": 1.1193954949816354, "learning_rate": 1.63000684906187e-05, "loss": 0.5378, "step": 3926 }, { "epoch": 0.3, "grad_norm": 1.1732046594741175, "learning_rate": 1.6298116977590323e-05, "loss": 0.5885, "step": 3927 }, { "epoch": 0.3, "grad_norm": 1.1824317994723597, "learning_rate": 1.6296165066929327e-05, "loss": 0.5893, "step": 3928 }, { "epoch": 0.3, "grad_norm": 1.1695816487860884, "learning_rate": 1.6294212758758945e-05, "loss": 0.6354, "step": 3929 }, { "epoch": 0.3, "grad_norm": 1.1942416611624476, "learning_rate": 1.629226005320244e-05, "loss": 0.6118, "step": 3930 }, { "epoch": 0.3, "grad_norm": 1.1750110625699934, "learning_rate": 1.6290306950383095e-05, "loss": 0.6372, "step": 3931 }, { "epoch": 0.31, "grad_norm": 1.2056301837056256, "learning_rate": 1.6288353450424216e-05, "loss": 0.5981, "step": 3932 }, { "epoch": 0.31, "grad_norm": 1.2117346324162617, "learning_rate": 1.628639955344914e-05, "loss": 0.5877, "step": 3933 }, { "epoch": 0.31, "grad_norm": 1.21289696438372, "learning_rate": 1.628444525958123e-05, "loss": 0.6124, "step": 3934 }, { "epoch": 0.31, "grad_norm": 1.1563761358794684, "learning_rate": 1.6282490568943867e-05, "loss": 0.5792, "step": 3935 }, { "epoch": 0.31, "grad_norm": 1.254263996179238, "learning_rate": 1.6280535481660454e-05, "loss": 0.642, "step": 3936 }, { "epoch": 0.31, "grad_norm": 1.2512793669555577, "learning_rate": 1.627857999785444e-05, "loss": 0.5883, "step": 3937 }, { "epoch": 0.31, "grad_norm": 1.1928629990067652, "learning_rate": 1.6276624117649273e-05, "loss": 0.6174, "step": 3938 }, { "epoch": 0.31, "grad_norm": 1.2099964546120145, "learning_rate": 1.6274667841168445e-05, "loss": 0.572, "step": 3939 }, { "epoch": 0.31, "grad_norm": 1.1618369929277412, "learning_rate": 1.6272711168535465e-05, "loss": 0.6543, "step": 3940 }, { "epoch": 0.31, "grad_norm": 1.2139831634874838, "learning_rate": 1.6270754099873866e-05, "loss": 0.572, "step": 3941 }, { "epoch": 0.31, "grad_norm": 1.2067460892991102, "learning_rate": 1.62687966353072e-05, "loss": 0.6673, "step": 3942 }, { "epoch": 0.31, "grad_norm": 1.1363861541250981, "learning_rate": 1.6266838774959065e-05, "loss": 0.5476, "step": 3943 }, { "epoch": 0.31, "grad_norm": 1.159579149559187, "learning_rate": 1.6264880518953068e-05, "loss": 0.5904, "step": 3944 }, { "epoch": 0.31, "grad_norm": 1.0840394702003568, "learning_rate": 1.626292186741284e-05, "loss": 0.5894, "step": 3945 }, { "epoch": 0.31, "grad_norm": 1.3137632829918364, "learning_rate": 1.626096282046204e-05, "loss": 0.6124, "step": 3946 }, { "epoch": 0.31, "grad_norm": 1.199484929170182, "learning_rate": 1.6259003378224358e-05, "loss": 0.5361, "step": 3947 }, { "epoch": 0.31, "grad_norm": 1.126221523454137, "learning_rate": 1.6257043540823498e-05, "loss": 0.5546, "step": 3948 }, { "epoch": 0.31, "grad_norm": 1.1876042972740064, "learning_rate": 1.6255083308383195e-05, "loss": 0.5831, "step": 3949 }, { "epoch": 0.31, "grad_norm": 1.0887787239849271, "learning_rate": 1.6253122681027214e-05, "loss": 0.6001, "step": 3950 }, { "epoch": 0.31, "grad_norm": 1.2446340304149863, "learning_rate": 1.6251161658879338e-05, "loss": 0.6371, "step": 3951 }, { "epoch": 0.31, "grad_norm": 1.2377869976433886, "learning_rate": 1.6249200242063368e-05, "loss": 0.5563, "step": 3952 }, { "epoch": 0.31, "grad_norm": 1.1588291065492486, "learning_rate": 1.624723843070315e-05, "loss": 0.638, "step": 3953 }, { "epoch": 0.31, "grad_norm": 1.111704462620387, "learning_rate": 1.6245276224922538e-05, "loss": 0.602, "step": 3954 }, { "epoch": 0.31, "grad_norm": 1.234620564818056, "learning_rate": 1.6243313624845417e-05, "loss": 0.5974, "step": 3955 }, { "epoch": 0.31, "grad_norm": 1.2223304582604837, "learning_rate": 1.6241350630595697e-05, "loss": 0.5917, "step": 3956 }, { "epoch": 0.31, "grad_norm": 1.1517162101364646, "learning_rate": 1.623938724229731e-05, "loss": 0.5855, "step": 3957 }, { "epoch": 0.31, "grad_norm": 1.271761909349612, "learning_rate": 1.623742346007421e-05, "loss": 0.6164, "step": 3958 }, { "epoch": 0.31, "grad_norm": 1.2746847866335282, "learning_rate": 1.6235459284050393e-05, "loss": 0.6274, "step": 3959 }, { "epoch": 0.31, "grad_norm": 1.2029244887482289, "learning_rate": 1.6233494714349854e-05, "loss": 0.6257, "step": 3960 }, { "epoch": 0.31, "grad_norm": 1.1819769262524185, "learning_rate": 1.623152975109664e-05, "loss": 0.6301, "step": 3961 }, { "epoch": 0.31, "grad_norm": 1.2891098476153031, "learning_rate": 1.6229564394414796e-05, "loss": 0.6508, "step": 3962 }, { "epoch": 0.31, "grad_norm": 1.2121481965142733, "learning_rate": 1.6227598644428417e-05, "loss": 0.5883, "step": 3963 }, { "epoch": 0.31, "grad_norm": 1.2397736421989687, "learning_rate": 1.6225632501261603e-05, "loss": 0.6003, "step": 3964 }, { "epoch": 0.31, "grad_norm": 1.241069074277615, "learning_rate": 1.622366596503849e-05, "loss": 0.5807, "step": 3965 }, { "epoch": 0.31, "grad_norm": 1.2458192050519732, "learning_rate": 1.622169903588323e-05, "loss": 0.581, "step": 3966 }, { "epoch": 0.31, "grad_norm": 1.15965748350523, "learning_rate": 1.6219731713920017e-05, "loss": 0.583, "step": 3967 }, { "epoch": 0.31, "grad_norm": 1.2442792638549236, "learning_rate": 1.621776399927305e-05, "loss": 0.5626, "step": 3968 }, { "epoch": 0.31, "grad_norm": 1.1158338762604407, "learning_rate": 1.6215795892066556e-05, "loss": 0.5716, "step": 3969 }, { "epoch": 0.31, "grad_norm": 1.216933486152108, "learning_rate": 1.6213827392424802e-05, "loss": 0.5675, "step": 3970 }, { "epoch": 0.31, "grad_norm": 1.3129663547147643, "learning_rate": 1.6211858500472068e-05, "loss": 0.6168, "step": 3971 }, { "epoch": 0.31, "grad_norm": 1.5181623632828118, "learning_rate": 1.6209889216332657e-05, "loss": 0.5845, "step": 3972 }, { "epoch": 0.31, "grad_norm": 1.2594047561802864, "learning_rate": 1.62079195401309e-05, "loss": 0.6096, "step": 3973 }, { "epoch": 0.31, "grad_norm": 1.1115418883893513, "learning_rate": 1.6205949471991154e-05, "loss": 0.6022, "step": 3974 }, { "epoch": 0.31, "grad_norm": 1.2668267655000156, "learning_rate": 1.6203979012037797e-05, "loss": 0.6232, "step": 3975 }, { "epoch": 0.31, "grad_norm": 1.1391563366410193, "learning_rate": 1.620200816039524e-05, "loss": 0.5601, "step": 3976 }, { "epoch": 0.31, "grad_norm": 1.2115418925487111, "learning_rate": 1.6200036917187908e-05, "loss": 0.6136, "step": 3977 }, { "epoch": 0.31, "grad_norm": 1.2694049948487522, "learning_rate": 1.6198065282540258e-05, "loss": 0.6167, "step": 3978 }, { "epoch": 0.31, "grad_norm": 1.2722178510223396, "learning_rate": 1.619609325657677e-05, "loss": 0.6318, "step": 3979 }, { "epoch": 0.31, "grad_norm": 1.3864098863092906, "learning_rate": 1.619412083942195e-05, "loss": 0.6064, "step": 3980 }, { "epoch": 0.31, "grad_norm": 1.1347166172284997, "learning_rate": 1.6192148031200315e-05, "loss": 0.5682, "step": 3981 }, { "epoch": 0.31, "grad_norm": 1.1315261377340633, "learning_rate": 1.6190174832036434e-05, "loss": 0.5535, "step": 3982 }, { "epoch": 0.31, "grad_norm": 1.119340436051421, "learning_rate": 1.6188201242054875e-05, "loss": 0.5955, "step": 3983 }, { "epoch": 0.31, "grad_norm": 1.2052149764493065, "learning_rate": 1.6186227261380247e-05, "loss": 0.6179, "step": 3984 }, { "epoch": 0.31, "grad_norm": 1.170635228802279, "learning_rate": 1.6184252890137175e-05, "loss": 0.6167, "step": 3985 }, { "epoch": 0.31, "grad_norm": 1.1688227768541013, "learning_rate": 1.618227812845031e-05, "loss": 0.5885, "step": 3986 }, { "epoch": 0.31, "grad_norm": 1.2806564444855968, "learning_rate": 1.6180302976444332e-05, "loss": 0.6695, "step": 3987 }, { "epoch": 0.31, "grad_norm": 1.1579952471574613, "learning_rate": 1.617832743424394e-05, "loss": 0.5816, "step": 3988 }, { "epoch": 0.31, "grad_norm": 1.1821707553065879, "learning_rate": 1.6176351501973865e-05, "loss": 0.6079, "step": 3989 }, { "epoch": 0.31, "grad_norm": 1.191993693626286, "learning_rate": 1.6174375179758854e-05, "loss": 0.6214, "step": 3990 }, { "epoch": 0.31, "grad_norm": 1.2450894221781705, "learning_rate": 1.6172398467723674e-05, "loss": 0.5836, "step": 3991 }, { "epoch": 0.31, "grad_norm": 1.2226072551429032, "learning_rate": 1.617042136599314e-05, "loss": 0.6354, "step": 3992 }, { "epoch": 0.31, "grad_norm": 1.1979623979325693, "learning_rate": 1.6168443874692066e-05, "loss": 0.5994, "step": 3993 }, { "epoch": 0.31, "grad_norm": 1.4221004265466641, "learning_rate": 1.616646599394531e-05, "loss": 0.5728, "step": 3994 }, { "epoch": 0.31, "grad_norm": 1.2873378892337566, "learning_rate": 1.616448772387774e-05, "loss": 0.6059, "step": 3995 }, { "epoch": 0.31, "grad_norm": 1.061620235855083, "learning_rate": 1.6162509064614253e-05, "loss": 0.5424, "step": 3996 }, { "epoch": 0.31, "grad_norm": 1.1789231508331548, "learning_rate": 1.6160530016279774e-05, "loss": 0.4952, "step": 3997 }, { "epoch": 0.31, "grad_norm": 1.1910281222279882, "learning_rate": 1.6158550578999252e-05, "loss": 0.6121, "step": 3998 }, { "epoch": 0.31, "grad_norm": 1.125059867961342, "learning_rate": 1.6156570752897658e-05, "loss": 0.5408, "step": 3999 }, { "epoch": 0.31, "grad_norm": 1.2594834595730304, "learning_rate": 1.6154590538099993e-05, "loss": 0.5299, "step": 4000 }, { "epoch": 0.31, "grad_norm": 1.153355816739578, "learning_rate": 1.6152609934731266e-05, "loss": 0.5875, "step": 4001 }, { "epoch": 0.31, "grad_norm": 1.139540431012989, "learning_rate": 1.615062894291654e-05, "loss": 0.5971, "step": 4002 }, { "epoch": 0.31, "grad_norm": 1.1937285755891243, "learning_rate": 1.6148647562780865e-05, "loss": 0.5594, "step": 4003 }, { "epoch": 0.31, "grad_norm": 1.1929602821913936, "learning_rate": 1.6146665794449352e-05, "loss": 0.626, "step": 4004 }, { "epoch": 0.31, "grad_norm": 1.2669126765290302, "learning_rate": 1.6144683638047115e-05, "loss": 0.6363, "step": 4005 }, { "epoch": 0.31, "grad_norm": 1.1527505964626341, "learning_rate": 1.6142701093699296e-05, "loss": 0.5758, "step": 4006 }, { "epoch": 0.31, "grad_norm": 1.2021356949181223, "learning_rate": 1.6140718161531066e-05, "loss": 0.5874, "step": 4007 }, { "epoch": 0.31, "grad_norm": 1.2117869689351568, "learning_rate": 1.613873484166762e-05, "loss": 0.6391, "step": 4008 }, { "epoch": 0.31, "grad_norm": 1.1044450744780192, "learning_rate": 1.6136751134234163e-05, "loss": 0.5521, "step": 4009 }, { "epoch": 0.31, "grad_norm": 1.2154085863952253, "learning_rate": 1.6134767039355953e-05, "loss": 0.6514, "step": 4010 }, { "epoch": 0.31, "grad_norm": 1.178474914636623, "learning_rate": 1.6132782557158245e-05, "loss": 0.5812, "step": 4011 }, { "epoch": 0.31, "grad_norm": 1.1189564391888203, "learning_rate": 1.6130797687766335e-05, "loss": 0.544, "step": 4012 }, { "epoch": 0.31, "grad_norm": 1.1246021944951656, "learning_rate": 1.6128812431305534e-05, "loss": 0.5812, "step": 4013 }, { "epoch": 0.31, "grad_norm": 1.175541647390319, "learning_rate": 1.6126826787901185e-05, "loss": 0.6081, "step": 4014 }, { "epoch": 0.31, "grad_norm": 1.1366678337937643, "learning_rate": 1.6124840757678653e-05, "loss": 0.6106, "step": 4015 }, { "epoch": 0.31, "grad_norm": 1.1244202815591704, "learning_rate": 1.612285434076332e-05, "loss": 0.5615, "step": 4016 }, { "epoch": 0.31, "grad_norm": 1.2184845072779211, "learning_rate": 1.6120867537280608e-05, "loss": 0.6, "step": 4017 }, { "epoch": 0.31, "grad_norm": 1.1816462240795806, "learning_rate": 1.611888034735594e-05, "loss": 0.6147, "step": 4018 }, { "epoch": 0.31, "grad_norm": 1.2339105818886065, "learning_rate": 1.6116892771114793e-05, "loss": 0.633, "step": 4019 }, { "epoch": 0.31, "grad_norm": 1.0915302094260146, "learning_rate": 1.6114904808682644e-05, "loss": 0.5481, "step": 4020 }, { "epoch": 0.31, "grad_norm": 1.1885559506885552, "learning_rate": 1.6112916460185008e-05, "loss": 0.6277, "step": 4021 }, { "epoch": 0.31, "grad_norm": 1.2741424855810366, "learning_rate": 1.6110927725747413e-05, "loss": 0.6197, "step": 4022 }, { "epoch": 0.31, "grad_norm": 1.1242694602064187, "learning_rate": 1.610893860549543e-05, "loss": 0.5296, "step": 4023 }, { "epoch": 0.31, "grad_norm": 1.1756699214710347, "learning_rate": 1.610694909955463e-05, "loss": 0.5201, "step": 4024 }, { "epoch": 0.31, "grad_norm": 1.2186917022064503, "learning_rate": 1.6104959208050625e-05, "loss": 0.6423, "step": 4025 }, { "epoch": 0.31, "grad_norm": 1.1961654412370195, "learning_rate": 1.610296893110905e-05, "loss": 0.586, "step": 4026 }, { "epoch": 0.31, "grad_norm": 1.1615095369601849, "learning_rate": 1.6100978268855552e-05, "loss": 0.5639, "step": 4027 }, { "epoch": 0.31, "grad_norm": 1.2014112280579905, "learning_rate": 1.609898722141583e-05, "loss": 0.6249, "step": 4028 }, { "epoch": 0.31, "grad_norm": 1.1315937194478678, "learning_rate": 1.609699578891557e-05, "loss": 0.5543, "step": 4029 }, { "epoch": 0.31, "grad_norm": 1.3114084064089124, "learning_rate": 1.6095003971480514e-05, "loss": 0.6376, "step": 4030 }, { "epoch": 0.31, "grad_norm": 1.265077448950562, "learning_rate": 1.609301176923641e-05, "loss": 0.6216, "step": 4031 }, { "epoch": 0.31, "grad_norm": 1.1475924022928314, "learning_rate": 1.6091019182309033e-05, "loss": 0.5656, "step": 4032 }, { "epoch": 0.31, "grad_norm": 1.1737722170903604, "learning_rate": 1.6089026210824196e-05, "loss": 0.6254, "step": 4033 }, { "epoch": 0.31, "grad_norm": 1.1438404381021985, "learning_rate": 1.6087032854907715e-05, "loss": 0.5805, "step": 4034 }, { "epoch": 0.31, "grad_norm": 1.0830969674895723, "learning_rate": 1.6085039114685448e-05, "loss": 0.5331, "step": 4035 }, { "epoch": 0.31, "grad_norm": 1.1128636698004142, "learning_rate": 1.6083044990283263e-05, "loss": 0.5696, "step": 4036 }, { "epoch": 0.31, "grad_norm": 1.194368126498595, "learning_rate": 1.6081050481827066e-05, "loss": 0.6, "step": 4037 }, { "epoch": 0.31, "grad_norm": 1.1239262861197168, "learning_rate": 1.6079055589442778e-05, "loss": 0.6219, "step": 4038 }, { "epoch": 0.31, "grad_norm": 1.2518657588379594, "learning_rate": 1.6077060313256348e-05, "loss": 0.6307, "step": 4039 }, { "epoch": 0.31, "grad_norm": 1.090109953277923, "learning_rate": 1.6075064653393748e-05, "loss": 0.5383, "step": 4040 }, { "epoch": 0.31, "grad_norm": 1.2473465412676827, "learning_rate": 1.6073068609980968e-05, "loss": 0.5926, "step": 4041 }, { "epoch": 0.31, "grad_norm": 1.3225680615338957, "learning_rate": 1.6071072183144033e-05, "loss": 0.5962, "step": 4042 }, { "epoch": 0.31, "grad_norm": 1.2329081257718428, "learning_rate": 1.6069075373008997e-05, "loss": 0.597, "step": 4043 }, { "epoch": 0.31, "grad_norm": 1.1676300986786838, "learning_rate": 1.6067078179701913e-05, "loss": 0.5808, "step": 4044 }, { "epoch": 0.31, "grad_norm": 1.1526814628616309, "learning_rate": 1.6065080603348885e-05, "loss": 0.6044, "step": 4045 }, { "epoch": 0.31, "grad_norm": 1.2083597125539236, "learning_rate": 1.6063082644076026e-05, "loss": 0.6071, "step": 4046 }, { "epoch": 0.31, "grad_norm": 1.2155312313208602, "learning_rate": 1.6061084302009473e-05, "loss": 0.6456, "step": 4047 }, { "epoch": 0.31, "grad_norm": 1.1809232189145735, "learning_rate": 1.6059085577275402e-05, "loss": 0.6001, "step": 4048 }, { "epoch": 0.31, "grad_norm": 1.3228249655752855, "learning_rate": 1.605708647e-05, "loss": 0.5856, "step": 4049 }, { "epoch": 0.31, "grad_norm": 1.174252146273254, "learning_rate": 1.6055086980309475e-05, "loss": 0.6324, "step": 4050 }, { "epoch": 0.31, "grad_norm": 1.333198326944949, "learning_rate": 1.605308710833007e-05, "loss": 0.5949, "step": 4051 }, { "epoch": 0.31, "grad_norm": 1.1584392647823685, "learning_rate": 1.6051086854188046e-05, "loss": 0.5239, "step": 4052 }, { "epoch": 0.31, "grad_norm": 1.042404193455187, "learning_rate": 1.604908621800969e-05, "loss": 0.5189, "step": 4053 }, { "epoch": 0.31, "grad_norm": 1.1160018604939856, "learning_rate": 1.604708519992131e-05, "loss": 0.5914, "step": 4054 }, { "epoch": 0.31, "grad_norm": 1.246749179390805, "learning_rate": 1.6045083800049243e-05, "loss": 0.5528, "step": 4055 }, { "epoch": 0.31, "grad_norm": 1.3248094349702548, "learning_rate": 1.604308201851985e-05, "loss": 0.638, "step": 4056 }, { "epoch": 0.31, "grad_norm": 1.1998209978746488, "learning_rate": 1.6041079855459506e-05, "loss": 0.6108, "step": 4057 }, { "epoch": 0.31, "grad_norm": 1.2605346225158425, "learning_rate": 1.603907731099463e-05, "loss": 0.5983, "step": 4058 }, { "epoch": 0.31, "grad_norm": 1.2750747079092335, "learning_rate": 1.603707438525164e-05, "loss": 0.6677, "step": 4059 }, { "epoch": 0.31, "grad_norm": 1.2016606522690463, "learning_rate": 1.6035071078356998e-05, "loss": 0.5742, "step": 4060 }, { "epoch": 0.32, "grad_norm": 1.1839374509025111, "learning_rate": 1.603306739043718e-05, "loss": 0.5602, "step": 4061 }, { "epoch": 0.32, "grad_norm": 1.1035169213211413, "learning_rate": 1.6031063321618697e-05, "loss": 0.5843, "step": 4062 }, { "epoch": 0.32, "grad_norm": 1.200744999652961, "learning_rate": 1.6029058872028063e-05, "loss": 0.6201, "step": 4063 }, { "epoch": 0.32, "grad_norm": 1.152203101933758, "learning_rate": 1.6027054041791842e-05, "loss": 0.6164, "step": 4064 }, { "epoch": 0.32, "grad_norm": 1.256626826083971, "learning_rate": 1.6025048831036603e-05, "loss": 0.638, "step": 4065 }, { "epoch": 0.32, "grad_norm": 1.1813671457644892, "learning_rate": 1.6023043239888946e-05, "loss": 0.6082, "step": 4066 }, { "epoch": 0.32, "grad_norm": 1.2103966274123723, "learning_rate": 1.602103726847549e-05, "loss": 0.5989, "step": 4067 }, { "epoch": 0.32, "grad_norm": 1.2175937939181174, "learning_rate": 1.6019030916922892e-05, "loss": 0.6199, "step": 4068 }, { "epoch": 0.32, "grad_norm": 1.1165489025553181, "learning_rate": 1.6017024185357817e-05, "loss": 0.5692, "step": 4069 }, { "epoch": 0.32, "grad_norm": 1.250109190940164, "learning_rate": 1.601501707390696e-05, "loss": 0.5884, "step": 4070 }, { "epoch": 0.32, "grad_norm": 1.3070057813462548, "learning_rate": 1.6013009582697043e-05, "loss": 0.6139, "step": 4071 }, { "epoch": 0.32, "grad_norm": 1.2741008037379273, "learning_rate": 1.601100171185481e-05, "loss": 0.6513, "step": 4072 }, { "epoch": 0.32, "grad_norm": 1.0050610383320062, "learning_rate": 1.6008993461507022e-05, "loss": 0.4993, "step": 4073 }, { "epoch": 0.32, "grad_norm": 1.1282934089558068, "learning_rate": 1.6006984831780476e-05, "loss": 0.5376, "step": 4074 }, { "epoch": 0.32, "grad_norm": 1.2381305783419083, "learning_rate": 1.6004975822801986e-05, "loss": 0.6253, "step": 4075 }, { "epoch": 0.32, "grad_norm": 1.2529711697877712, "learning_rate": 1.600296643469839e-05, "loss": 0.6211, "step": 4076 }, { "epoch": 0.32, "grad_norm": 1.146491861400174, "learning_rate": 1.6000956667596554e-05, "loss": 0.5507, "step": 4077 }, { "epoch": 0.32, "grad_norm": 1.0868085482674297, "learning_rate": 1.599894652162336e-05, "loss": 0.557, "step": 4078 }, { "epoch": 0.32, "grad_norm": 1.2270822577911655, "learning_rate": 1.5996935996905722e-05, "loss": 0.5649, "step": 4079 }, { "epoch": 0.32, "grad_norm": 1.1749864597757853, "learning_rate": 1.5994925093570578e-05, "loss": 0.6118, "step": 4080 }, { "epoch": 0.32, "grad_norm": 1.2632427164876006, "learning_rate": 1.599291381174488e-05, "loss": 0.5955, "step": 4081 }, { "epoch": 0.32, "grad_norm": 1.183447246486196, "learning_rate": 1.5990902151555612e-05, "loss": 0.5786, "step": 4082 }, { "epoch": 0.32, "grad_norm": 1.124024498058253, "learning_rate": 1.5988890113129786e-05, "loss": 0.5757, "step": 4083 }, { "epoch": 0.32, "grad_norm": 1.1925538586303988, "learning_rate": 1.5986877696594425e-05, "loss": 0.6069, "step": 4084 }, { "epoch": 0.32, "grad_norm": 1.2013483181511755, "learning_rate": 1.598486490207659e-05, "loss": 0.6026, "step": 4085 }, { "epoch": 0.32, "grad_norm": 1.0765093431789332, "learning_rate": 1.598285172970335e-05, "loss": 0.5528, "step": 4086 }, { "epoch": 0.32, "grad_norm": 1.2788229027901965, "learning_rate": 1.598083817960182e-05, "loss": 0.6258, "step": 4087 }, { "epoch": 0.32, "grad_norm": 1.3049319831983297, "learning_rate": 1.5978824251899117e-05, "loss": 0.6535, "step": 4088 }, { "epoch": 0.32, "grad_norm": 1.2464490043286351, "learning_rate": 1.597680994672239e-05, "loss": 0.6026, "step": 4089 }, { "epoch": 0.32, "grad_norm": 1.1429909506445404, "learning_rate": 1.5974795264198814e-05, "loss": 0.6242, "step": 4090 }, { "epoch": 0.32, "grad_norm": 1.2879980605359609, "learning_rate": 1.5972780204455592e-05, "loss": 0.6501, "step": 4091 }, { "epoch": 0.32, "grad_norm": 1.1755463121475838, "learning_rate": 1.5970764767619933e-05, "loss": 0.5684, "step": 4092 }, { "epoch": 0.32, "grad_norm": 1.196094880206311, "learning_rate": 1.5968748953819095e-05, "loss": 0.6099, "step": 4093 }, { "epoch": 0.32, "grad_norm": 1.2817598584493335, "learning_rate": 1.596673276318034e-05, "loss": 0.5943, "step": 4094 }, { "epoch": 0.32, "grad_norm": 1.2085586809077369, "learning_rate": 1.5964716195830958e-05, "loss": 0.6354, "step": 4095 }, { "epoch": 0.32, "grad_norm": 1.35159720668442, "learning_rate": 1.5962699251898274e-05, "loss": 0.6055, "step": 4096 }, { "epoch": 0.32, "grad_norm": 1.133824705209295, "learning_rate": 1.596068193150962e-05, "loss": 0.5596, "step": 4097 }, { "epoch": 0.32, "grad_norm": 1.1371854368119316, "learning_rate": 1.595866423479236e-05, "loss": 0.5844, "step": 4098 }, { "epoch": 0.32, "grad_norm": 1.1801612357528344, "learning_rate": 1.595664616187389e-05, "loss": 0.5828, "step": 4099 }, { "epoch": 0.32, "grad_norm": 1.2468891056955147, "learning_rate": 1.595462771288161e-05, "loss": 0.6108, "step": 4100 }, { "epoch": 0.32, "grad_norm": 1.2033901170205261, "learning_rate": 1.5952608887942967e-05, "loss": 0.6022, "step": 4101 }, { "epoch": 0.32, "grad_norm": 1.2634317679962785, "learning_rate": 1.5950589687185405e-05, "loss": 0.6315, "step": 4102 }, { "epoch": 0.32, "grad_norm": 1.168779021938064, "learning_rate": 1.594857011073642e-05, "loss": 0.56, "step": 4103 }, { "epoch": 0.32, "grad_norm": 1.0974620497969982, "learning_rate": 1.5946550158723516e-05, "loss": 0.5621, "step": 4104 }, { "epoch": 0.32, "grad_norm": 1.0945140349881082, "learning_rate": 1.5944529831274213e-05, "loss": 0.5609, "step": 4105 }, { "epoch": 0.32, "grad_norm": 1.1688990635594247, "learning_rate": 1.5942509128516077e-05, "loss": 0.5748, "step": 4106 }, { "epoch": 0.32, "grad_norm": 1.2062149003214337, "learning_rate": 1.594048805057668e-05, "loss": 0.6537, "step": 4107 }, { "epoch": 0.32, "grad_norm": 1.290696605330871, "learning_rate": 1.5938466597583625e-05, "loss": 0.6344, "step": 4108 }, { "epoch": 0.32, "grad_norm": 1.2284935492880682, "learning_rate": 1.5936444769664533e-05, "loss": 0.5891, "step": 4109 }, { "epoch": 0.32, "grad_norm": 1.2105269197466848, "learning_rate": 1.593442256694705e-05, "loss": 0.631, "step": 4110 }, { "epoch": 0.32, "grad_norm": 1.196250198667185, "learning_rate": 1.593239998955886e-05, "loss": 0.593, "step": 4111 }, { "epoch": 0.32, "grad_norm": 1.285429487777566, "learning_rate": 1.593037703762765e-05, "loss": 0.6019, "step": 4112 }, { "epoch": 0.32, "grad_norm": 1.222874533573527, "learning_rate": 1.5928353711281138e-05, "loss": 0.5621, "step": 4113 }, { "epoch": 0.32, "grad_norm": 1.1762218473594288, "learning_rate": 1.5926330010647074e-05, "loss": 0.6045, "step": 4114 }, { "epoch": 0.32, "grad_norm": 1.0863788174540303, "learning_rate": 1.5924305935853218e-05, "loss": 0.5308, "step": 4115 }, { "epoch": 0.32, "grad_norm": 1.2315550837648002, "learning_rate": 1.5922281487027363e-05, "loss": 0.6585, "step": 4116 }, { "epoch": 0.32, "grad_norm": 1.156076057340976, "learning_rate": 1.5920256664297326e-05, "loss": 0.6323, "step": 4117 }, { "epoch": 0.32, "grad_norm": 1.0992176720153686, "learning_rate": 1.5918231467790938e-05, "loss": 0.576, "step": 4118 }, { "epoch": 0.32, "grad_norm": 1.254389399439678, "learning_rate": 1.5916205897636063e-05, "loss": 0.6041, "step": 4119 }, { "epoch": 0.32, "grad_norm": 1.159064758825171, "learning_rate": 1.5914179953960584e-05, "loss": 0.6353, "step": 4120 }, { "epoch": 0.32, "grad_norm": 1.172107927697421, "learning_rate": 1.5912153636892416e-05, "loss": 0.5914, "step": 4121 }, { "epoch": 0.32, "grad_norm": 1.1033789085836685, "learning_rate": 1.5910126946559484e-05, "loss": 0.5793, "step": 4122 }, { "epoch": 0.32, "grad_norm": 1.1070690097439773, "learning_rate": 1.5908099883089746e-05, "loss": 0.5886, "step": 4123 }, { "epoch": 0.32, "grad_norm": 1.2302395077253894, "learning_rate": 1.590607244661118e-05, "loss": 0.5626, "step": 4124 }, { "epoch": 0.32, "grad_norm": 1.2806528607265886, "learning_rate": 1.5904044637251793e-05, "loss": 0.6355, "step": 4125 }, { "epoch": 0.32, "grad_norm": 1.1804876077333568, "learning_rate": 1.5902016455139603e-05, "loss": 0.6016, "step": 4126 }, { "epoch": 0.32, "grad_norm": 1.1975104873452846, "learning_rate": 1.589998790040266e-05, "loss": 0.5445, "step": 4127 }, { "epoch": 0.32, "grad_norm": 1.251928082243929, "learning_rate": 1.589795897316905e-05, "loss": 0.572, "step": 4128 }, { "epoch": 0.32, "grad_norm": 1.20924101094676, "learning_rate": 1.5895929673566858e-05, "loss": 0.6552, "step": 4129 }, { "epoch": 0.32, "grad_norm": 1.1965935019987255, "learning_rate": 1.5893900001724204e-05, "loss": 0.5841, "step": 4130 }, { "epoch": 0.32, "grad_norm": 1.2484053453642063, "learning_rate": 1.5891869957769232e-05, "loss": 0.6036, "step": 4131 }, { "epoch": 0.32, "grad_norm": 1.2915145210161616, "learning_rate": 1.5889839541830115e-05, "loss": 0.5962, "step": 4132 }, { "epoch": 0.32, "grad_norm": 1.182488104455969, "learning_rate": 1.5887808754035037e-05, "loss": 0.6002, "step": 4133 }, { "epoch": 0.32, "grad_norm": 1.1066614194155773, "learning_rate": 1.588577759451222e-05, "loss": 0.5645, "step": 4134 }, { "epoch": 0.32, "grad_norm": 1.3685779585342646, "learning_rate": 1.5883746063389897e-05, "loss": 0.5852, "step": 4135 }, { "epoch": 0.32, "grad_norm": 1.19871947988535, "learning_rate": 1.5881714160796326e-05, "loss": 0.6218, "step": 4136 }, { "epoch": 0.32, "grad_norm": 1.2420773727255203, "learning_rate": 1.5879681886859794e-05, "loss": 0.6229, "step": 4137 }, { "epoch": 0.32, "grad_norm": 1.2308886099309224, "learning_rate": 1.5877649241708613e-05, "loss": 0.5782, "step": 4138 }, { "epoch": 0.32, "grad_norm": 1.1947508845177341, "learning_rate": 1.5875616225471105e-05, "loss": 0.5959, "step": 4139 }, { "epoch": 0.32, "grad_norm": 1.1290108919319388, "learning_rate": 1.5873582838275637e-05, "loss": 0.5318, "step": 4140 }, { "epoch": 0.32, "grad_norm": 1.1401085925666197, "learning_rate": 1.5871549080250577e-05, "loss": 0.5707, "step": 4141 }, { "epoch": 0.32, "grad_norm": 1.1563511881497714, "learning_rate": 1.586951495152433e-05, "loss": 0.5712, "step": 4142 }, { "epoch": 0.32, "grad_norm": 1.203907947180144, "learning_rate": 1.5867480452225323e-05, "loss": 0.6535, "step": 4143 }, { "epoch": 0.32, "grad_norm": 1.2168094150637931, "learning_rate": 1.5865445582482002e-05, "loss": 0.5632, "step": 4144 }, { "epoch": 0.32, "grad_norm": 1.2416731051250345, "learning_rate": 1.586341034242284e-05, "loss": 0.6188, "step": 4145 }, { "epoch": 0.32, "grad_norm": 1.2532720655488314, "learning_rate": 1.5861374732176332e-05, "loss": 0.6122, "step": 4146 }, { "epoch": 0.32, "grad_norm": 1.2004172831441842, "learning_rate": 1.5859338751870998e-05, "loss": 0.6108, "step": 4147 }, { "epoch": 0.32, "grad_norm": 1.0671746695823765, "learning_rate": 1.5857302401635373e-05, "loss": 0.5586, "step": 4148 }, { "epoch": 0.32, "grad_norm": 1.0990102433273106, "learning_rate": 1.5855265681598032e-05, "loss": 0.5914, "step": 4149 }, { "epoch": 0.32, "grad_norm": 1.32162428478069, "learning_rate": 1.585322859188756e-05, "loss": 0.5657, "step": 4150 }, { "epoch": 0.32, "grad_norm": 1.1037112442418504, "learning_rate": 1.5851191132632563e-05, "loss": 0.591, "step": 4151 }, { "epoch": 0.32, "grad_norm": 1.2173754937429158, "learning_rate": 1.584915330396169e-05, "loss": 0.6426, "step": 4152 }, { "epoch": 0.32, "grad_norm": 1.138650203649102, "learning_rate": 1.5847115106003585e-05, "loss": 0.5402, "step": 4153 }, { "epoch": 0.32, "grad_norm": 1.2264169497295956, "learning_rate": 1.5845076538886934e-05, "loss": 0.5583, "step": 4154 }, { "epoch": 0.32, "grad_norm": 1.2103888960961207, "learning_rate": 1.584303760274045e-05, "loss": 0.5968, "step": 4155 }, { "epoch": 0.32, "grad_norm": 1.100631374446816, "learning_rate": 1.5840998297692854e-05, "loss": 0.5782, "step": 4156 }, { "epoch": 0.32, "grad_norm": 1.3140736637253614, "learning_rate": 1.5838958623872902e-05, "loss": 0.582, "step": 4157 }, { "epoch": 0.32, "grad_norm": 1.2300131302419608, "learning_rate": 1.5836918581409365e-05, "loss": 0.558, "step": 4158 }, { "epoch": 0.32, "grad_norm": 1.1115516478035843, "learning_rate": 1.583487817043104e-05, "loss": 0.5221, "step": 4159 }, { "epoch": 0.32, "grad_norm": 1.1952403925669344, "learning_rate": 1.583283739106676e-05, "loss": 0.6187, "step": 4160 }, { "epoch": 0.32, "grad_norm": 1.197104911534478, "learning_rate": 1.5830796243445357e-05, "loss": 0.5644, "step": 4161 }, { "epoch": 0.32, "grad_norm": 1.0958404726924815, "learning_rate": 1.5828754727695703e-05, "loss": 0.5338, "step": 4162 }, { "epoch": 0.32, "grad_norm": 1.1056213273612223, "learning_rate": 1.5826712843946693e-05, "loss": 0.5319, "step": 4163 }, { "epoch": 0.32, "grad_norm": 1.1494895113001469, "learning_rate": 1.582467059232724e-05, "loss": 0.5539, "step": 4164 }, { "epoch": 0.32, "grad_norm": 1.1224026897365564, "learning_rate": 1.582262797296628e-05, "loss": 0.5406, "step": 4165 }, { "epoch": 0.32, "grad_norm": 1.29688478948736, "learning_rate": 1.5820584985992777e-05, "loss": 0.5881, "step": 4166 }, { "epoch": 0.32, "grad_norm": 1.1768997803204808, "learning_rate": 1.581854163153571e-05, "loss": 0.5997, "step": 4167 }, { "epoch": 0.32, "grad_norm": 1.1589427214087622, "learning_rate": 1.581649790972409e-05, "loss": 0.5597, "step": 4168 }, { "epoch": 0.32, "grad_norm": 1.134272456723087, "learning_rate": 1.581445382068695e-05, "loss": 0.561, "step": 4169 }, { "epoch": 0.32, "grad_norm": 1.1222380536812362, "learning_rate": 1.5812409364553344e-05, "loss": 0.6166, "step": 4170 }, { "epoch": 0.32, "grad_norm": 1.2356243329970122, "learning_rate": 1.5810364541452342e-05, "loss": 0.6328, "step": 4171 }, { "epoch": 0.32, "grad_norm": 1.1618939881314763, "learning_rate": 1.580831935151305e-05, "loss": 0.6192, "step": 4172 }, { "epoch": 0.32, "grad_norm": 1.1489546253816612, "learning_rate": 1.5806273794864592e-05, "loss": 0.5828, "step": 4173 }, { "epoch": 0.32, "grad_norm": 1.1931558738298869, "learning_rate": 1.5804227871636114e-05, "loss": 0.6067, "step": 4174 }, { "epoch": 0.32, "grad_norm": 1.224700340287395, "learning_rate": 1.5802181581956782e-05, "loss": 0.6024, "step": 4175 }, { "epoch": 0.32, "grad_norm": 1.197164658836633, "learning_rate": 1.5800134925955792e-05, "loss": 0.6414, "step": 4176 }, { "epoch": 0.32, "grad_norm": 1.0887383217957185, "learning_rate": 1.579808790376236e-05, "loss": 0.6058, "step": 4177 }, { "epoch": 0.32, "grad_norm": 1.259549996307871, "learning_rate": 1.5796040515505724e-05, "loss": 0.6651, "step": 4178 }, { "epoch": 0.32, "grad_norm": 1.0770380373394608, "learning_rate": 1.5793992761315147e-05, "loss": 0.5502, "step": 4179 }, { "epoch": 0.32, "grad_norm": 1.2953728398348576, "learning_rate": 1.5791944641319914e-05, "loss": 0.6397, "step": 4180 }, { "epoch": 0.32, "grad_norm": 1.2637665372647529, "learning_rate": 1.5789896155649333e-05, "loss": 0.5922, "step": 4181 }, { "epoch": 0.32, "grad_norm": 1.2070424594790734, "learning_rate": 1.578784730443273e-05, "loss": 0.5758, "step": 4182 }, { "epoch": 0.32, "grad_norm": 1.290383557880392, "learning_rate": 1.5785798087799476e-05, "loss": 0.5818, "step": 4183 }, { "epoch": 0.32, "grad_norm": 1.2466818160311268, "learning_rate": 1.5783748505878932e-05, "loss": 0.6033, "step": 4184 }, { "epoch": 0.32, "grad_norm": 1.371210468247492, "learning_rate": 1.5781698558800503e-05, "loss": 0.6262, "step": 4185 }, { "epoch": 0.32, "grad_norm": 1.185993694838671, "learning_rate": 1.577964824669362e-05, "loss": 0.6076, "step": 4186 }, { "epoch": 0.32, "grad_norm": 1.190057510140332, "learning_rate": 1.577759756968772e-05, "loss": 0.5602, "step": 4187 }, { "epoch": 0.32, "grad_norm": 1.1809004554076394, "learning_rate": 1.577554652791228e-05, "loss": 0.5978, "step": 4188 }, { "epoch": 0.32, "grad_norm": 1.238725889261915, "learning_rate": 1.5773495121496787e-05, "loss": 0.5387, "step": 4189 }, { "epoch": 0.33, "grad_norm": 1.12641822642419, "learning_rate": 1.5771443350570756e-05, "loss": 0.6105, "step": 4190 }, { "epoch": 0.33, "grad_norm": 1.2080687474368461, "learning_rate": 1.5769391215263737e-05, "loss": 0.5477, "step": 4191 }, { "epoch": 0.33, "grad_norm": 1.189125554622287, "learning_rate": 1.576733871570528e-05, "loss": 0.6163, "step": 4192 }, { "epoch": 0.33, "grad_norm": 1.1753070166413515, "learning_rate": 1.576528585202498e-05, "loss": 0.6237, "step": 4193 }, { "epoch": 0.33, "grad_norm": 1.2476602591281176, "learning_rate": 1.576323262435243e-05, "loss": 0.5966, "step": 4194 }, { "epoch": 0.33, "grad_norm": 1.1575786071187284, "learning_rate": 1.5761179032817275e-05, "loss": 0.56, "step": 4195 }, { "epoch": 0.33, "grad_norm": 1.247796787284363, "learning_rate": 1.5759125077549164e-05, "loss": 0.5504, "step": 4196 }, { "epoch": 0.33, "grad_norm": 1.2886518344436713, "learning_rate": 1.5757070758677775e-05, "loss": 0.6028, "step": 4197 }, { "epoch": 0.33, "grad_norm": 1.1983164521002307, "learning_rate": 1.5755016076332805e-05, "loss": 0.5647, "step": 4198 }, { "epoch": 0.33, "grad_norm": 1.1704388783240482, "learning_rate": 1.5752961030643978e-05, "loss": 0.5942, "step": 4199 }, { "epoch": 0.33, "grad_norm": 1.1912501999013279, "learning_rate": 1.5750905621741037e-05, "loss": 0.6125, "step": 4200 }, { "epoch": 0.33, "grad_norm": 1.135771156525432, "learning_rate": 1.5748849849753757e-05, "loss": 0.5413, "step": 4201 }, { "epoch": 0.33, "grad_norm": 1.084404226563342, "learning_rate": 1.5746793714811925e-05, "loss": 0.5594, "step": 4202 }, { "epoch": 0.33, "grad_norm": 1.177701124035044, "learning_rate": 1.5744737217045355e-05, "loss": 0.5573, "step": 4203 }, { "epoch": 0.33, "grad_norm": 1.1667078669403046, "learning_rate": 1.574268035658389e-05, "loss": 0.5941, "step": 4204 }, { "epoch": 0.33, "grad_norm": 1.3080694714100287, "learning_rate": 1.574062313355738e-05, "loss": 0.6122, "step": 4205 }, { "epoch": 0.33, "grad_norm": 1.1308704434974841, "learning_rate": 1.5738565548095718e-05, "loss": 0.5881, "step": 4206 }, { "epoch": 0.33, "grad_norm": 1.2058847160877186, "learning_rate": 1.5736507600328804e-05, "loss": 0.6081, "step": 4207 }, { "epoch": 0.33, "grad_norm": 1.3394470072306475, "learning_rate": 1.5734449290386568e-05, "loss": 0.6675, "step": 4208 }, { "epoch": 0.33, "grad_norm": 1.3144116331356501, "learning_rate": 1.5732390618398966e-05, "loss": 0.6165, "step": 4209 }, { "epoch": 0.33, "grad_norm": 1.1398986698591642, "learning_rate": 1.5730331584495965e-05, "loss": 0.6137, "step": 4210 }, { "epoch": 0.33, "grad_norm": 1.1406860335233333, "learning_rate": 1.5728272188807564e-05, "loss": 0.5765, "step": 4211 }, { "epoch": 0.33, "grad_norm": 1.1929319025031162, "learning_rate": 1.572621243146379e-05, "loss": 0.5761, "step": 4212 }, { "epoch": 0.33, "grad_norm": 1.2316240004056611, "learning_rate": 1.5724152312594683e-05, "loss": 0.5776, "step": 4213 }, { "epoch": 0.33, "grad_norm": 1.2885305983433712, "learning_rate": 1.5722091832330307e-05, "loss": 0.6087, "step": 4214 }, { "epoch": 0.33, "grad_norm": 1.2260871014512607, "learning_rate": 1.572003099080075e-05, "loss": 0.5777, "step": 4215 }, { "epoch": 0.33, "grad_norm": 1.0597741559392169, "learning_rate": 1.5717969788136123e-05, "loss": 0.5589, "step": 4216 }, { "epoch": 0.33, "grad_norm": 1.2328517544954958, "learning_rate": 1.571590822446657e-05, "loss": 0.5962, "step": 4217 }, { "epoch": 0.33, "grad_norm": 1.180643818242358, "learning_rate": 1.571384629992223e-05, "loss": 0.6047, "step": 4218 }, { "epoch": 0.33, "grad_norm": 1.153029208869553, "learning_rate": 1.57117840146333e-05, "loss": 0.6125, "step": 4219 }, { "epoch": 0.33, "grad_norm": 1.122415912681638, "learning_rate": 1.5709721368729977e-05, "loss": 0.5619, "step": 4220 }, { "epoch": 0.33, "grad_norm": 1.2249023963035894, "learning_rate": 1.5707658362342484e-05, "loss": 0.6708, "step": 4221 }, { "epoch": 0.33, "grad_norm": 1.343392124869427, "learning_rate": 1.570559499560107e-05, "loss": 0.5913, "step": 4222 }, { "epoch": 0.33, "grad_norm": 1.2005444880849245, "learning_rate": 1.570353126863601e-05, "loss": 0.6078, "step": 4223 }, { "epoch": 0.33, "grad_norm": 1.1056571233814998, "learning_rate": 1.570146718157759e-05, "loss": 0.5583, "step": 4224 }, { "epoch": 0.33, "grad_norm": 1.1242107165740816, "learning_rate": 1.5699402734556133e-05, "loss": 0.5937, "step": 4225 }, { "epoch": 0.33, "grad_norm": 1.1543452580311813, "learning_rate": 1.5697337927701977e-05, "loss": 0.5595, "step": 4226 }, { "epoch": 0.33, "grad_norm": 1.267246008861621, "learning_rate": 1.5695272761145486e-05, "loss": 0.6382, "step": 4227 }, { "epoch": 0.33, "grad_norm": 1.1132376143620815, "learning_rate": 1.569320723501704e-05, "loss": 0.587, "step": 4228 }, { "epoch": 0.33, "grad_norm": 1.164809838357185, "learning_rate": 1.5691141349447046e-05, "loss": 0.5429, "step": 4229 }, { "epoch": 0.33, "grad_norm": 1.2153910296516017, "learning_rate": 1.5689075104565936e-05, "loss": 0.6058, "step": 4230 }, { "epoch": 0.33, "grad_norm": 1.1422446783237805, "learning_rate": 1.5687008500504165e-05, "loss": 0.6221, "step": 4231 }, { "epoch": 0.33, "grad_norm": 1.1590503597986488, "learning_rate": 1.5684941537392205e-05, "loss": 0.6127, "step": 4232 }, { "epoch": 0.33, "grad_norm": 1.2456341795863013, "learning_rate": 1.5682874215360557e-05, "loss": 0.5934, "step": 4233 }, { "epoch": 0.33, "grad_norm": 1.2216283115040354, "learning_rate": 1.568080653453974e-05, "loss": 0.5776, "step": 4234 }, { "epoch": 0.33, "grad_norm": 1.0893601195587912, "learning_rate": 1.5678738495060292e-05, "loss": 0.5819, "step": 4235 }, { "epoch": 0.33, "grad_norm": 1.0774837743152603, "learning_rate": 1.567667009705279e-05, "loss": 0.509, "step": 4236 }, { "epoch": 0.33, "grad_norm": 1.0842760947906658, "learning_rate": 1.567460134064782e-05, "loss": 0.5292, "step": 4237 }, { "epoch": 0.33, "grad_norm": 1.1248018832071582, "learning_rate": 1.5672532225975983e-05, "loss": 0.5691, "step": 4238 }, { "epoch": 0.33, "grad_norm": 1.189464800797708, "learning_rate": 1.5670462753167922e-05, "loss": 0.5998, "step": 4239 }, { "epoch": 0.33, "grad_norm": 1.3506471036684864, "learning_rate": 1.566839292235429e-05, "loss": 0.6264, "step": 4240 }, { "epoch": 0.33, "grad_norm": 1.1231247319309805, "learning_rate": 1.5666322733665773e-05, "loss": 0.6178, "step": 4241 }, { "epoch": 0.33, "grad_norm": 1.1648143414086092, "learning_rate": 1.5664252187233066e-05, "loss": 0.5785, "step": 4242 }, { "epoch": 0.33, "grad_norm": 1.2548386859185994, "learning_rate": 1.5662181283186894e-05, "loss": 0.6065, "step": 4243 }, { "epoch": 0.33, "grad_norm": 1.2017569752874238, "learning_rate": 1.5660110021658002e-05, "loss": 0.5535, "step": 4244 }, { "epoch": 0.33, "grad_norm": 1.1170711323347944, "learning_rate": 1.5658038402777165e-05, "loss": 0.5489, "step": 4245 }, { "epoch": 0.33, "grad_norm": 1.1981973678551041, "learning_rate": 1.5655966426675172e-05, "loss": 0.6382, "step": 4246 }, { "epoch": 0.33, "grad_norm": 1.2080716090830148, "learning_rate": 1.5653894093482835e-05, "loss": 0.5793, "step": 4247 }, { "epoch": 0.33, "grad_norm": 1.1014751102061386, "learning_rate": 1.5651821403330996e-05, "loss": 0.5579, "step": 4248 }, { "epoch": 0.33, "grad_norm": 1.1856763288279222, "learning_rate": 1.5649748356350513e-05, "loss": 0.6111, "step": 4249 }, { "epoch": 0.33, "grad_norm": 1.111876608818996, "learning_rate": 1.5647674952672265e-05, "loss": 0.5396, "step": 4250 }, { "epoch": 0.33, "grad_norm": 1.1950902794609333, "learning_rate": 1.564560119242716e-05, "loss": 0.681, "step": 4251 }, { "epoch": 0.33, "grad_norm": 1.2280012306182362, "learning_rate": 1.5643527075746125e-05, "loss": 0.5947, "step": 4252 }, { "epoch": 0.33, "grad_norm": 1.1791713166341768, "learning_rate": 1.564145260276011e-05, "loss": 0.5775, "step": 4253 }, { "epoch": 0.33, "grad_norm": 1.2863204460019095, "learning_rate": 1.563937777360008e-05, "loss": 0.6008, "step": 4254 }, { "epoch": 0.33, "grad_norm": 1.15050471052642, "learning_rate": 1.563730258839704e-05, "loss": 0.6132, "step": 4255 }, { "epoch": 0.33, "grad_norm": 1.1993216305816103, "learning_rate": 1.5635227047282005e-05, "loss": 0.5771, "step": 4256 }, { "epoch": 0.33, "grad_norm": 1.1615289344484334, "learning_rate": 1.563315115038601e-05, "loss": 0.561, "step": 4257 }, { "epoch": 0.33, "grad_norm": 1.1735187954431585, "learning_rate": 1.563107489784012e-05, "loss": 0.5682, "step": 4258 }, { "epoch": 0.33, "grad_norm": 1.2188851697253185, "learning_rate": 1.562899828977542e-05, "loss": 0.5923, "step": 4259 }, { "epoch": 0.33, "grad_norm": 1.1471939100867332, "learning_rate": 1.5626921326323016e-05, "loss": 0.5777, "step": 4260 }, { "epoch": 0.33, "grad_norm": 1.1356480330638674, "learning_rate": 1.5624844007614037e-05, "loss": 0.6542, "step": 4261 }, { "epoch": 0.33, "grad_norm": 1.295458192074889, "learning_rate": 1.5622766333779637e-05, "loss": 0.6568, "step": 4262 }, { "epoch": 0.33, "grad_norm": 1.1669861095062695, "learning_rate": 1.5620688304950985e-05, "loss": 0.6234, "step": 4263 }, { "epoch": 0.33, "grad_norm": 1.186047317989056, "learning_rate": 1.5618609921259286e-05, "loss": 0.5964, "step": 4264 }, { "epoch": 0.33, "grad_norm": 1.238075503997241, "learning_rate": 1.561653118283575e-05, "loss": 0.5791, "step": 4265 }, { "epoch": 0.33, "grad_norm": 1.1897992912898259, "learning_rate": 1.5614452089811628e-05, "loss": 0.6058, "step": 4266 }, { "epoch": 0.33, "grad_norm": 1.0708700245509777, "learning_rate": 1.5612372642318176e-05, "loss": 0.5759, "step": 4267 }, { "epoch": 0.33, "grad_norm": 1.1604674978212233, "learning_rate": 1.5610292840486684e-05, "loss": 0.5637, "step": 4268 }, { "epoch": 0.33, "grad_norm": 1.1496032714681725, "learning_rate": 1.5608212684448453e-05, "loss": 0.6144, "step": 4269 }, { "epoch": 0.33, "grad_norm": 1.2419530299361183, "learning_rate": 1.560613217433483e-05, "loss": 0.6095, "step": 4270 }, { "epoch": 0.33, "grad_norm": 1.2836035534791388, "learning_rate": 1.5604051310277152e-05, "loss": 0.6439, "step": 4271 }, { "epoch": 0.33, "grad_norm": 1.1591084176371425, "learning_rate": 1.5601970092406807e-05, "loss": 0.5686, "step": 4272 }, { "epoch": 0.33, "grad_norm": 1.13279150581637, "learning_rate": 1.5599888520855186e-05, "loss": 0.5598, "step": 4273 }, { "epoch": 0.33, "grad_norm": 1.203480606040116, "learning_rate": 1.559780659575371e-05, "loss": 0.588, "step": 4274 }, { "epoch": 0.33, "grad_norm": 1.130988816938756, "learning_rate": 1.559572431723382e-05, "loss": 0.5794, "step": 4275 }, { "epoch": 0.33, "grad_norm": 1.2944949728027186, "learning_rate": 1.559364168542699e-05, "loss": 0.6655, "step": 4276 }, { "epoch": 0.33, "grad_norm": 1.1272260047928933, "learning_rate": 1.55915587004647e-05, "loss": 0.5523, "step": 4277 }, { "epoch": 0.33, "grad_norm": 1.2182135746373812, "learning_rate": 1.5589475362478458e-05, "loss": 0.5855, "step": 4278 }, { "epoch": 0.33, "grad_norm": 1.3181940271744912, "learning_rate": 1.5587391671599798e-05, "loss": 0.6113, "step": 4279 }, { "epoch": 0.33, "grad_norm": 1.2505427612686963, "learning_rate": 1.5585307627960277e-05, "loss": 0.5963, "step": 4280 }, { "epoch": 0.33, "grad_norm": 1.0647500840075304, "learning_rate": 1.5583223231691466e-05, "loss": 0.5443, "step": 4281 }, { "epoch": 0.33, "grad_norm": 1.0774165604007833, "learning_rate": 1.558113848292497e-05, "loss": 0.5454, "step": 4282 }, { "epoch": 0.33, "grad_norm": 1.109701994938504, "learning_rate": 1.5579053381792406e-05, "loss": 0.5765, "step": 4283 }, { "epoch": 0.33, "grad_norm": 1.1624594794153704, "learning_rate": 1.5576967928425414e-05, "loss": 0.5757, "step": 4284 }, { "epoch": 0.33, "grad_norm": 1.281872388817533, "learning_rate": 1.5574882122955668e-05, "loss": 0.5887, "step": 4285 }, { "epoch": 0.33, "grad_norm": 1.1957303482441077, "learning_rate": 1.557279596551485e-05, "loss": 0.5552, "step": 4286 }, { "epoch": 0.33, "grad_norm": 1.1569548855346417, "learning_rate": 1.557070945623467e-05, "loss": 0.6048, "step": 4287 }, { "epoch": 0.33, "grad_norm": 1.1554446121800024, "learning_rate": 1.556862259524686e-05, "loss": 0.5987, "step": 4288 }, { "epoch": 0.33, "grad_norm": 1.0879999239409643, "learning_rate": 1.556653538268318e-05, "loss": 0.5325, "step": 4289 }, { "epoch": 0.33, "grad_norm": 1.2413542249856198, "learning_rate": 1.5564447818675397e-05, "loss": 0.5897, "step": 4290 }, { "epoch": 0.33, "grad_norm": 1.1876448492750868, "learning_rate": 1.5562359903355315e-05, "loss": 0.5331, "step": 4291 }, { "epoch": 0.33, "grad_norm": 1.1003812584361574, "learning_rate": 1.5560271636854757e-05, "loss": 0.5632, "step": 4292 }, { "epoch": 0.33, "grad_norm": 1.1683119457824958, "learning_rate": 1.555818301930556e-05, "loss": 0.5694, "step": 4293 }, { "epoch": 0.33, "grad_norm": 1.112776524755778, "learning_rate": 1.555609405083959e-05, "loss": 0.5432, "step": 4294 }, { "epoch": 0.33, "grad_norm": 1.3055725293825904, "learning_rate": 1.5554004731588745e-05, "loss": 0.6332, "step": 4295 }, { "epoch": 0.33, "grad_norm": 1.1394203827357794, "learning_rate": 1.555191506168492e-05, "loss": 0.5719, "step": 4296 }, { "epoch": 0.33, "grad_norm": 1.3082075317709538, "learning_rate": 1.5549825041260052e-05, "loss": 0.624, "step": 4297 }, { "epoch": 0.33, "grad_norm": 1.341341388358419, "learning_rate": 1.5547734670446103e-05, "loss": 0.6568, "step": 4298 }, { "epoch": 0.33, "grad_norm": 1.2121439184785832, "learning_rate": 1.554564394937504e-05, "loss": 0.6046, "step": 4299 }, { "epoch": 0.33, "grad_norm": 1.3055558199014239, "learning_rate": 1.5543552878178857e-05, "loss": 0.6063, "step": 4300 }, { "epoch": 0.33, "grad_norm": 1.2305411181285864, "learning_rate": 1.5541461456989583e-05, "loss": 0.649, "step": 4301 }, { "epoch": 0.33, "grad_norm": 1.192278983229602, "learning_rate": 1.553936968593926e-05, "loss": 0.6031, "step": 4302 }, { "epoch": 0.33, "grad_norm": 1.0950641366822178, "learning_rate": 1.5537277565159944e-05, "loss": 0.6649, "step": 4303 }, { "epoch": 0.33, "grad_norm": 1.1334477715122804, "learning_rate": 1.5535185094783728e-05, "loss": 0.5492, "step": 4304 }, { "epoch": 0.33, "grad_norm": 1.1961000129640387, "learning_rate": 1.5533092274942724e-05, "loss": 0.6009, "step": 4305 }, { "epoch": 0.33, "grad_norm": 1.266119118767877, "learning_rate": 1.553099910576905e-05, "loss": 0.5972, "step": 4306 }, { "epoch": 0.33, "grad_norm": 1.2080482223277254, "learning_rate": 1.5528905587394872e-05, "loss": 0.5804, "step": 4307 }, { "epoch": 0.33, "grad_norm": 1.205405958892666, "learning_rate": 1.5526811719952356e-05, "loss": 0.6101, "step": 4308 }, { "epoch": 0.33, "grad_norm": 1.1561293667935484, "learning_rate": 1.55247175035737e-05, "loss": 0.6169, "step": 4309 }, { "epoch": 0.33, "grad_norm": 1.2087806443725497, "learning_rate": 1.5522622938391132e-05, "loss": 0.6186, "step": 4310 }, { "epoch": 0.33, "grad_norm": 1.2466818160311268, "learning_rate": 1.552052802453688e-05, "loss": 0.6416, "step": 4311 }, { "epoch": 0.33, "grad_norm": 1.2414732024933746, "learning_rate": 1.551843276214321e-05, "loss": 0.5562, "step": 4312 }, { "epoch": 0.33, "grad_norm": 1.3189507860397496, "learning_rate": 1.551633715134241e-05, "loss": 0.669, "step": 4313 }, { "epoch": 0.33, "grad_norm": 1.2859813047714257, "learning_rate": 1.5514241192266786e-05, "loss": 0.6141, "step": 4314 }, { "epoch": 0.33, "grad_norm": 1.0726381637926647, "learning_rate": 1.5512144885048664e-05, "loss": 0.6105, "step": 4315 }, { "epoch": 0.33, "grad_norm": 1.2531460748360561, "learning_rate": 1.5510048229820398e-05, "loss": 0.5823, "step": 4316 }, { "epoch": 0.33, "grad_norm": 1.2201612078360424, "learning_rate": 1.5507951226714356e-05, "loss": 0.5963, "step": 4317 }, { "epoch": 0.33, "grad_norm": 1.2030143686928403, "learning_rate": 1.550585387586294e-05, "loss": 0.6031, "step": 4318 }, { "epoch": 0.34, "grad_norm": 1.1650962591720035, "learning_rate": 1.550375617739856e-05, "loss": 0.6219, "step": 4319 }, { "epoch": 0.34, "grad_norm": 1.2110246196294916, "learning_rate": 1.550165813145366e-05, "loss": 0.5959, "step": 4320 }, { "epoch": 0.34, "grad_norm": 1.2279511869868345, "learning_rate": 1.5499559738160693e-05, "loss": 0.6043, "step": 4321 }, { "epoch": 0.34, "grad_norm": 1.063445568070926, "learning_rate": 1.549746099765215e-05, "loss": 0.5189, "step": 4322 }, { "epoch": 0.34, "grad_norm": 1.1293839232741927, "learning_rate": 1.5495361910060527e-05, "loss": 0.5382, "step": 4323 }, { "epoch": 0.34, "grad_norm": 1.1155763759755608, "learning_rate": 1.5493262475518353e-05, "loss": 0.6106, "step": 4324 }, { "epoch": 0.34, "grad_norm": 1.1945891341960149, "learning_rate": 1.5491162694158182e-05, "loss": 0.5521, "step": 4325 }, { "epoch": 0.34, "grad_norm": 1.3651839480275945, "learning_rate": 1.548906256611258e-05, "loss": 0.609, "step": 4326 }, { "epoch": 0.34, "grad_norm": 1.2565710445133467, "learning_rate": 1.5486962091514133e-05, "loss": 0.6111, "step": 4327 }, { "epoch": 0.34, "grad_norm": 1.1392565840530247, "learning_rate": 1.5484861270495464e-05, "loss": 0.5218, "step": 4328 }, { "epoch": 0.34, "grad_norm": 1.2287283078022297, "learning_rate": 1.5482760103189203e-05, "loss": 0.6049, "step": 4329 }, { "epoch": 0.34, "grad_norm": 1.2041847713829785, "learning_rate": 1.548065858972801e-05, "loss": 0.5831, "step": 4330 }, { "epoch": 0.34, "grad_norm": 1.1843491966637465, "learning_rate": 1.5478556730244564e-05, "loss": 0.5631, "step": 4331 }, { "epoch": 0.34, "grad_norm": 1.0813510560897177, "learning_rate": 1.5476454524871566e-05, "loss": 0.5709, "step": 4332 }, { "epoch": 0.34, "grad_norm": 1.194321614364365, "learning_rate": 1.5474351973741742e-05, "loss": 0.6366, "step": 4333 }, { "epoch": 0.34, "grad_norm": 1.2448822158801993, "learning_rate": 1.547224907698783e-05, "loss": 0.6267, "step": 4334 }, { "epoch": 0.34, "grad_norm": 1.1498157768304422, "learning_rate": 1.54701458347426e-05, "loss": 0.5494, "step": 4335 }, { "epoch": 0.34, "grad_norm": 1.0574603329877628, "learning_rate": 1.5468042247138844e-05, "loss": 0.5413, "step": 4336 }, { "epoch": 0.34, "grad_norm": 1.1500462273967669, "learning_rate": 1.5465938314309367e-05, "loss": 0.5746, "step": 4337 }, { "epoch": 0.34, "grad_norm": 1.1411223176558392, "learning_rate": 1.546383403638701e-05, "loss": 0.5566, "step": 4338 }, { "epoch": 0.34, "grad_norm": 1.225857137029262, "learning_rate": 1.5461729413504613e-05, "loss": 0.6297, "step": 4339 }, { "epoch": 0.34, "grad_norm": 1.0602756666311752, "learning_rate": 1.5459624445795062e-05, "loss": 0.5592, "step": 4340 }, { "epoch": 0.34, "grad_norm": 1.113962172689552, "learning_rate": 1.545751913339125e-05, "loss": 0.5249, "step": 4341 }, { "epoch": 0.34, "grad_norm": 1.2147959610840169, "learning_rate": 1.54554134764261e-05, "loss": 0.6144, "step": 4342 }, { "epoch": 0.34, "grad_norm": 1.189400657661709, "learning_rate": 1.5453307475032552e-05, "loss": 0.5544, "step": 4343 }, { "epoch": 0.34, "grad_norm": 1.1880256091931498, "learning_rate": 1.5451201129343566e-05, "loss": 0.5899, "step": 4344 }, { "epoch": 0.34, "grad_norm": 1.1813865703774582, "learning_rate": 1.544909443949213e-05, "loss": 0.6005, "step": 4345 }, { "epoch": 0.34, "grad_norm": 1.0560182441305859, "learning_rate": 1.5446987405611248e-05, "loss": 0.5167, "step": 4346 }, { "epoch": 0.34, "grad_norm": 1.1708212119908372, "learning_rate": 1.5444880027833947e-05, "loss": 0.5678, "step": 4347 }, { "epoch": 0.34, "grad_norm": 1.2678871189618748, "learning_rate": 1.5442772306293277e-05, "loss": 0.6456, "step": 4348 }, { "epoch": 0.34, "grad_norm": 1.081308006097804, "learning_rate": 1.5440664241122312e-05, "loss": 0.5323, "step": 4349 }, { "epoch": 0.34, "grad_norm": 1.2158441380383915, "learning_rate": 1.5438555832454143e-05, "loss": 0.6012, "step": 4350 }, { "epoch": 0.34, "grad_norm": 1.1261988715655107, "learning_rate": 1.5436447080421887e-05, "loss": 0.6277, "step": 4351 }, { "epoch": 0.34, "grad_norm": 1.1096401166454524, "learning_rate": 1.5434337985158674e-05, "loss": 0.5559, "step": 4352 }, { "epoch": 0.34, "grad_norm": 1.2222174201254479, "learning_rate": 1.5432228546797668e-05, "loss": 0.5718, "step": 4353 }, { "epoch": 0.34, "grad_norm": 1.2757120238640933, "learning_rate": 1.5430118765472052e-05, "loss": 0.582, "step": 4354 }, { "epoch": 0.34, "grad_norm": 1.1499292973884567, "learning_rate": 1.5428008641315018e-05, "loss": 0.5528, "step": 4355 }, { "epoch": 0.34, "grad_norm": 1.1802928457727033, "learning_rate": 1.5425898174459794e-05, "loss": 0.5814, "step": 4356 }, { "epoch": 0.34, "grad_norm": 1.0787393782038581, "learning_rate": 1.5423787365039627e-05, "loss": 0.533, "step": 4357 }, { "epoch": 0.34, "grad_norm": 1.2340041460588937, "learning_rate": 1.5421676213187774e-05, "loss": 0.6392, "step": 4358 }, { "epoch": 0.34, "grad_norm": 1.2280317606002826, "learning_rate": 1.5419564719037536e-05, "loss": 0.6096, "step": 4359 }, { "epoch": 0.34, "grad_norm": 1.248507371458292, "learning_rate": 1.5417452882722214e-05, "loss": 0.6251, "step": 4360 }, { "epoch": 0.34, "grad_norm": 1.226481441079024, "learning_rate": 1.541534070437514e-05, "loss": 0.6905, "step": 4361 }, { "epoch": 0.34, "grad_norm": 1.1224117174669375, "learning_rate": 1.541322818412967e-05, "loss": 0.5736, "step": 4362 }, { "epoch": 0.34, "grad_norm": 1.1324407033154424, "learning_rate": 1.5411115322119176e-05, "loss": 0.5572, "step": 4363 }, { "epoch": 0.34, "grad_norm": 1.25963105658449, "learning_rate": 1.5409002118477053e-05, "loss": 0.6141, "step": 4364 }, { "epoch": 0.34, "grad_norm": 1.2681653465594676, "learning_rate": 1.540688857333672e-05, "loss": 0.599, "step": 4365 }, { "epoch": 0.34, "grad_norm": 1.214661121440196, "learning_rate": 1.5404774686831615e-05, "loss": 0.6299, "step": 4366 }, { "epoch": 0.34, "grad_norm": 1.21888746806788, "learning_rate": 1.54026604590952e-05, "loss": 0.5957, "step": 4367 }, { "epoch": 0.34, "grad_norm": 1.1919438884412488, "learning_rate": 1.540054589026095e-05, "loss": 0.5919, "step": 4368 }, { "epoch": 0.34, "grad_norm": 1.2466692895827691, "learning_rate": 1.5398430980462382e-05, "loss": 0.5912, "step": 4369 }, { "epoch": 0.34, "grad_norm": 1.1907434339516478, "learning_rate": 1.5396315729833015e-05, "loss": 0.5307, "step": 4370 }, { "epoch": 0.34, "grad_norm": 1.1197120046742197, "learning_rate": 1.5394200138506393e-05, "loss": 0.5414, "step": 4371 }, { "epoch": 0.34, "grad_norm": 1.1677851708092752, "learning_rate": 1.5392084206616084e-05, "loss": 0.5809, "step": 4372 }, { "epoch": 0.34, "grad_norm": 1.1437043206937372, "learning_rate": 1.5389967934295677e-05, "loss": 0.5991, "step": 4373 }, { "epoch": 0.34, "grad_norm": 1.0779447543084313, "learning_rate": 1.5387851321678788e-05, "loss": 0.5676, "step": 4374 }, { "epoch": 0.34, "grad_norm": 1.2527462355953578, "learning_rate": 1.538573436889905e-05, "loss": 0.6305, "step": 4375 }, { "epoch": 0.34, "grad_norm": 1.1712196043257672, "learning_rate": 1.5383617076090114e-05, "loss": 0.6145, "step": 4376 }, { "epoch": 0.34, "grad_norm": 1.1639391430516126, "learning_rate": 1.5381499443385653e-05, "loss": 0.5949, "step": 4377 }, { "epoch": 0.34, "grad_norm": 1.1495743917624666, "learning_rate": 1.537938147091937e-05, "loss": 0.5887, "step": 4378 }, { "epoch": 0.34, "grad_norm": 1.3407043273504264, "learning_rate": 1.537726315882498e-05, "loss": 0.6772, "step": 4379 }, { "epoch": 0.34, "grad_norm": 1.250110859723331, "learning_rate": 1.5375144507236222e-05, "loss": 0.5646, "step": 4380 }, { "epoch": 0.34, "grad_norm": 1.2708810526325784, "learning_rate": 1.537302551628686e-05, "loss": 0.6346, "step": 4381 }, { "epoch": 0.34, "grad_norm": 1.0891656988169294, "learning_rate": 1.5370906186110677e-05, "loss": 0.5442, "step": 4382 }, { "epoch": 0.34, "grad_norm": 1.2144545149281407, "learning_rate": 1.536878651684148e-05, "loss": 0.5854, "step": 4383 }, { "epoch": 0.34, "grad_norm": 1.2673010853991138, "learning_rate": 1.5366666508613083e-05, "loss": 0.6416, "step": 4384 }, { "epoch": 0.34, "grad_norm": 1.1962630537670147, "learning_rate": 1.536454616155935e-05, "loss": 0.5655, "step": 4385 }, { "epoch": 0.34, "grad_norm": 1.2514959443834761, "learning_rate": 1.5362425475814133e-05, "loss": 0.5454, "step": 4386 }, { "epoch": 0.34, "grad_norm": 1.1835336701406634, "learning_rate": 1.5360304451511333e-05, "loss": 0.5463, "step": 4387 }, { "epoch": 0.34, "grad_norm": 1.2501789441770663, "learning_rate": 1.5358183088784853e-05, "loss": 0.5776, "step": 4388 }, { "epoch": 0.34, "grad_norm": 1.2462837767926083, "learning_rate": 1.5356061387768634e-05, "loss": 0.5915, "step": 4389 }, { "epoch": 0.34, "grad_norm": 1.1739182524903378, "learning_rate": 1.535393934859663e-05, "loss": 0.524, "step": 4390 }, { "epoch": 0.34, "grad_norm": 1.230764153761204, "learning_rate": 1.5351816971402803e-05, "loss": 0.6316, "step": 4391 }, { "epoch": 0.34, "grad_norm": 1.097198119213983, "learning_rate": 1.5349694256321162e-05, "loss": 0.5255, "step": 4392 }, { "epoch": 0.34, "grad_norm": 1.0370746613923232, "learning_rate": 1.5347571203485723e-05, "loss": 0.5667, "step": 4393 }, { "epoch": 0.34, "grad_norm": 1.0880541584370695, "learning_rate": 1.5345447813030526e-05, "loss": 0.5446, "step": 4394 }, { "epoch": 0.34, "grad_norm": 1.2309443932035438, "learning_rate": 1.5343324085089628e-05, "loss": 0.6333, "step": 4395 }, { "epoch": 0.34, "grad_norm": 1.1745729542887466, "learning_rate": 1.534120001979711e-05, "loss": 0.5948, "step": 4396 }, { "epoch": 0.34, "grad_norm": 1.2211402537642444, "learning_rate": 1.533907561728708e-05, "loss": 0.5645, "step": 4397 }, { "epoch": 0.34, "grad_norm": 1.1916706729812678, "learning_rate": 1.533695087769366e-05, "loss": 0.5642, "step": 4398 }, { "epoch": 0.34, "grad_norm": 1.2430540217432164, "learning_rate": 1.5334825801150998e-05, "loss": 0.6135, "step": 4399 }, { "epoch": 0.34, "grad_norm": 1.2546036346644387, "learning_rate": 1.5332700387793255e-05, "loss": 0.6196, "step": 4400 }, { "epoch": 0.34, "grad_norm": 1.2020119311427646, "learning_rate": 1.5330574637754627e-05, "loss": 0.5501, "step": 4401 }, { "epoch": 0.34, "grad_norm": 1.234787932269557, "learning_rate": 1.5328448551169318e-05, "loss": 0.6314, "step": 4402 }, { "epoch": 0.34, "grad_norm": 1.063317657452814, "learning_rate": 1.532632212817156e-05, "loss": 0.5592, "step": 4403 }, { "epoch": 0.34, "grad_norm": 1.1959654078281263, "learning_rate": 1.532419536889561e-05, "loss": 0.5993, "step": 4404 }, { "epoch": 0.34, "grad_norm": 1.2815247683027342, "learning_rate": 1.5322068273475737e-05, "loss": 0.5572, "step": 4405 }, { "epoch": 0.34, "grad_norm": 1.2080824142031188, "learning_rate": 1.531994084204623e-05, "loss": 0.5759, "step": 4406 }, { "epoch": 0.34, "grad_norm": 1.131741036755641, "learning_rate": 1.5317813074741415e-05, "loss": 0.5293, "step": 4407 }, { "epoch": 0.34, "grad_norm": 1.2769330965737027, "learning_rate": 1.531568497169562e-05, "loss": 0.5712, "step": 4408 }, { "epoch": 0.34, "grad_norm": 1.2140100200214425, "learning_rate": 1.5313556533043212e-05, "loss": 0.6009, "step": 4409 }, { "epoch": 0.34, "grad_norm": 1.2692971351141003, "learning_rate": 1.5311427758918564e-05, "loss": 0.6094, "step": 4410 }, { "epoch": 0.34, "grad_norm": 1.1314977448357104, "learning_rate": 1.5309298649456075e-05, "loss": 0.5783, "step": 4411 }, { "epoch": 0.34, "grad_norm": 1.164858705768735, "learning_rate": 1.5307169204790174e-05, "loss": 0.6357, "step": 4412 }, { "epoch": 0.34, "grad_norm": 1.1666847238959757, "learning_rate": 1.5305039425055302e-05, "loss": 0.5311, "step": 4413 }, { "epoch": 0.34, "grad_norm": 1.1897781003297598, "learning_rate": 1.5302909310385916e-05, "loss": 0.5929, "step": 4414 }, { "epoch": 0.34, "grad_norm": 1.1927447195485912, "learning_rate": 1.530077886091651e-05, "loss": 0.5851, "step": 4415 }, { "epoch": 0.34, "grad_norm": 1.1762167292074122, "learning_rate": 1.5298648076781583e-05, "loss": 0.6102, "step": 4416 }, { "epoch": 0.34, "grad_norm": 1.0890326541001754, "learning_rate": 1.5296516958115666e-05, "loss": 0.5465, "step": 4417 }, { "epoch": 0.34, "grad_norm": 1.2519195123644502, "learning_rate": 1.5294385505053305e-05, "loss": 0.5832, "step": 4418 }, { "epoch": 0.34, "grad_norm": 1.232027646714422, "learning_rate": 1.5292253717729072e-05, "loss": 0.6447, "step": 4419 }, { "epoch": 0.34, "grad_norm": 1.2657436268160225, "learning_rate": 1.529012159627756e-05, "loss": 0.6693, "step": 4420 }, { "epoch": 0.34, "grad_norm": 1.210744042478969, "learning_rate": 1.5287989140833376e-05, "loss": 0.5477, "step": 4421 }, { "epoch": 0.34, "grad_norm": 1.0917837725622568, "learning_rate": 1.5285856351531157e-05, "loss": 0.58, "step": 4422 }, { "epoch": 0.34, "grad_norm": 1.148882046769742, "learning_rate": 1.5283723228505552e-05, "loss": 0.5378, "step": 4423 }, { "epoch": 0.34, "grad_norm": 1.083430518168269, "learning_rate": 1.5281589771891244e-05, "loss": 0.4977, "step": 4424 }, { "epoch": 0.34, "grad_norm": 1.1574323125208323, "learning_rate": 1.527945598182292e-05, "loss": 0.5491, "step": 4425 }, { "epoch": 0.34, "grad_norm": 1.1496577105923107, "learning_rate": 1.5277321858435303e-05, "loss": 0.5835, "step": 4426 }, { "epoch": 0.34, "grad_norm": 1.2745523545806017, "learning_rate": 1.527518740186313e-05, "loss": 0.6344, "step": 4427 }, { "epoch": 0.34, "grad_norm": 1.2802791638977342, "learning_rate": 1.527305261224116e-05, "loss": 0.615, "step": 4428 }, { "epoch": 0.34, "grad_norm": 1.148809671107842, "learning_rate": 1.5270917489704173e-05, "loss": 0.5432, "step": 4429 }, { "epoch": 0.34, "grad_norm": 1.1369891287553175, "learning_rate": 1.5268782034386972e-05, "loss": 0.6384, "step": 4430 }, { "epoch": 0.34, "grad_norm": 1.0942323438559076, "learning_rate": 1.5266646246424374e-05, "loss": 0.5141, "step": 4431 }, { "epoch": 0.34, "grad_norm": 1.1011347921080916, "learning_rate": 1.5264510125951228e-05, "loss": 0.563, "step": 4432 }, { "epoch": 0.34, "grad_norm": 1.088406178683373, "learning_rate": 1.5262373673102396e-05, "loss": 0.5599, "step": 4433 }, { "epoch": 0.34, "grad_norm": 1.302010536702029, "learning_rate": 1.5260236888012766e-05, "loss": 0.6163, "step": 4434 }, { "epoch": 0.34, "grad_norm": 1.1073075492822713, "learning_rate": 1.5258099770817242e-05, "loss": 0.5581, "step": 4435 }, { "epoch": 0.34, "grad_norm": 1.146683579771115, "learning_rate": 1.525596232165075e-05, "loss": 0.5596, "step": 4436 }, { "epoch": 0.34, "grad_norm": 1.170456752876025, "learning_rate": 1.5253824540648237e-05, "loss": 0.5416, "step": 4437 }, { "epoch": 0.34, "grad_norm": 1.1076545807229496, "learning_rate": 1.5251686427944679e-05, "loss": 0.5367, "step": 4438 }, { "epoch": 0.34, "grad_norm": 1.1734350882554954, "learning_rate": 1.524954798367506e-05, "loss": 0.563, "step": 4439 }, { "epoch": 0.34, "grad_norm": 1.1514843856011263, "learning_rate": 1.5247409207974394e-05, "loss": 0.588, "step": 4440 }, { "epoch": 0.34, "grad_norm": 1.2255016525892417, "learning_rate": 1.5245270100977707e-05, "loss": 0.578, "step": 4441 }, { "epoch": 0.34, "grad_norm": 1.185576336414464, "learning_rate": 1.5243130662820058e-05, "loss": 0.5966, "step": 4442 }, { "epoch": 0.34, "grad_norm": 1.0944559816106305, "learning_rate": 1.5240990893636522e-05, "loss": 0.6105, "step": 4443 }, { "epoch": 0.34, "grad_norm": 1.1557597074503254, "learning_rate": 1.523885079356219e-05, "loss": 0.6533, "step": 4444 }, { "epoch": 0.34, "grad_norm": 1.1560541451026403, "learning_rate": 1.5236710362732178e-05, "loss": 0.5969, "step": 4445 }, { "epoch": 0.34, "grad_norm": 1.0838196225445664, "learning_rate": 1.5234569601281623e-05, "loss": 0.5379, "step": 4446 }, { "epoch": 0.34, "grad_norm": 1.2456933218234425, "learning_rate": 1.523242850934568e-05, "loss": 0.5772, "step": 4447 }, { "epoch": 0.35, "grad_norm": 1.1409657113174614, "learning_rate": 1.5230287087059532e-05, "loss": 0.5509, "step": 4448 }, { "epoch": 0.35, "grad_norm": 1.100200054010176, "learning_rate": 1.5228145334558377e-05, "loss": 0.5919, "step": 4449 }, { "epoch": 0.35, "grad_norm": 1.2097921553927675, "learning_rate": 1.5226003251977432e-05, "loss": 0.6066, "step": 4450 }, { "epoch": 0.35, "grad_norm": 1.2463753122060177, "learning_rate": 1.5223860839451935e-05, "loss": 0.5836, "step": 4451 }, { "epoch": 0.35, "grad_norm": 1.081399065006784, "learning_rate": 1.5221718097117157e-05, "loss": 0.5625, "step": 4452 }, { "epoch": 0.35, "grad_norm": 1.176278246906274, "learning_rate": 1.5219575025108373e-05, "loss": 0.5509, "step": 4453 }, { "epoch": 0.35, "grad_norm": 1.153852089842175, "learning_rate": 1.521743162356089e-05, "loss": 0.5421, "step": 4454 }, { "epoch": 0.35, "grad_norm": 1.238385457849631, "learning_rate": 1.521528789261003e-05, "loss": 0.6216, "step": 4455 }, { "epoch": 0.35, "grad_norm": 1.128811472573839, "learning_rate": 1.5213143832391133e-05, "loss": 0.5805, "step": 4456 }, { "epoch": 0.35, "grad_norm": 1.2818857337059608, "learning_rate": 1.5210999443039573e-05, "loss": 0.5872, "step": 4457 }, { "epoch": 0.35, "grad_norm": 1.3346661323796543, "learning_rate": 1.5208854724690734e-05, "loss": 0.6064, "step": 4458 }, { "epoch": 0.35, "grad_norm": 1.055467105528188, "learning_rate": 1.5206709677480022e-05, "loss": 0.5526, "step": 4459 }, { "epoch": 0.35, "grad_norm": 1.1356789988736544, "learning_rate": 1.5204564301542863e-05, "loss": 0.5718, "step": 4460 }, { "epoch": 0.35, "grad_norm": 1.159306070175738, "learning_rate": 1.520241859701471e-05, "loss": 0.6041, "step": 4461 }, { "epoch": 0.35, "grad_norm": 1.1956427124274813, "learning_rate": 1.5200272564031026e-05, "loss": 0.5949, "step": 4462 }, { "epoch": 0.35, "grad_norm": 1.2159479648725056, "learning_rate": 1.5198126202727311e-05, "loss": 0.6024, "step": 4463 }, { "epoch": 0.35, "grad_norm": 1.1560298091417576, "learning_rate": 1.5195979513239064e-05, "loss": 0.5788, "step": 4464 }, { "epoch": 0.35, "grad_norm": 1.1778144872440648, "learning_rate": 1.5193832495701825e-05, "loss": 0.5278, "step": 4465 }, { "epoch": 0.35, "grad_norm": 1.168228732871617, "learning_rate": 1.5191685150251146e-05, "loss": 0.586, "step": 4466 }, { "epoch": 0.35, "grad_norm": 1.2033463806984266, "learning_rate": 1.5189537477022595e-05, "loss": 0.6291, "step": 4467 }, { "epoch": 0.35, "grad_norm": 1.0862332497063598, "learning_rate": 1.518738947615177e-05, "loss": 0.5285, "step": 4468 }, { "epoch": 0.35, "grad_norm": 1.1921114976087672, "learning_rate": 1.5185241147774283e-05, "loss": 0.6343, "step": 4469 }, { "epoch": 0.35, "grad_norm": 1.0100118016035045, "learning_rate": 1.5183092492025772e-05, "loss": 0.5398, "step": 4470 }, { "epoch": 0.35, "grad_norm": 1.2192429254861237, "learning_rate": 1.518094350904189e-05, "loss": 0.5893, "step": 4471 }, { "epoch": 0.35, "grad_norm": 1.0759556270937056, "learning_rate": 1.5178794198958313e-05, "loss": 0.5805, "step": 4472 }, { "epoch": 0.35, "grad_norm": 1.2030424114466285, "learning_rate": 1.517664456191074e-05, "loss": 0.6144, "step": 4473 }, { "epoch": 0.35, "grad_norm": 1.2887942873192972, "learning_rate": 1.5174494598034889e-05, "loss": 0.6429, "step": 4474 }, { "epoch": 0.35, "grad_norm": 1.2304358583928532, "learning_rate": 1.5172344307466493e-05, "loss": 0.6387, "step": 4475 }, { "epoch": 0.35, "grad_norm": 1.210997106254138, "learning_rate": 1.517019369034132e-05, "loss": 0.6773, "step": 4476 }, { "epoch": 0.35, "grad_norm": 1.1317197066917521, "learning_rate": 1.516804274679514e-05, "loss": 0.573, "step": 4477 }, { "epoch": 0.35, "grad_norm": 1.226489799920522, "learning_rate": 1.5165891476963763e-05, "loss": 0.5784, "step": 4478 }, { "epoch": 0.35, "grad_norm": 1.098668673459862, "learning_rate": 1.5163739880983002e-05, "loss": 0.5126, "step": 4479 }, { "epoch": 0.35, "grad_norm": 1.2200222222070949, "learning_rate": 1.5161587958988699e-05, "loss": 0.6562, "step": 4480 }, { "epoch": 0.35, "grad_norm": 1.1494085656928357, "learning_rate": 1.515943571111672e-05, "loss": 0.6214, "step": 4481 }, { "epoch": 0.35, "grad_norm": 1.0634176554973584, "learning_rate": 1.5157283137502944e-05, "loss": 0.5118, "step": 4482 }, { "epoch": 0.35, "grad_norm": 1.1172460260600112, "learning_rate": 1.5155130238283277e-05, "loss": 0.5477, "step": 4483 }, { "epoch": 0.35, "grad_norm": 1.119030479350138, "learning_rate": 1.5152977013593643e-05, "loss": 0.5482, "step": 4484 }, { "epoch": 0.35, "grad_norm": 1.2970108972149006, "learning_rate": 1.5150823463569979e-05, "loss": 0.5596, "step": 4485 }, { "epoch": 0.35, "grad_norm": 1.1490856599276307, "learning_rate": 1.514866958834826e-05, "loss": 0.5687, "step": 4486 }, { "epoch": 0.35, "grad_norm": 1.1965998280984946, "learning_rate": 1.5146515388064463e-05, "loss": 0.5549, "step": 4487 }, { "epoch": 0.35, "grad_norm": 1.1337593593862998, "learning_rate": 1.5144360862854597e-05, "loss": 0.564, "step": 4488 }, { "epoch": 0.35, "grad_norm": 1.1684703961639018, "learning_rate": 1.5142206012854693e-05, "loss": 0.6251, "step": 4489 }, { "epoch": 0.35, "grad_norm": 1.1579033140327963, "learning_rate": 1.5140050838200786e-05, "loss": 0.6207, "step": 4490 }, { "epoch": 0.35, "grad_norm": 1.3167438470375483, "learning_rate": 1.5137895339028955e-05, "loss": 0.599, "step": 4491 }, { "epoch": 0.35, "grad_norm": 1.2358226737250553, "learning_rate": 1.5135739515475281e-05, "loss": 0.523, "step": 4492 }, { "epoch": 0.35, "grad_norm": 1.2380342447755142, "learning_rate": 1.5133583367675878e-05, "loss": 0.5884, "step": 4493 }, { "epoch": 0.35, "grad_norm": 1.2101236377161075, "learning_rate": 1.5131426895766868e-05, "loss": 0.5678, "step": 4494 }, { "epoch": 0.35, "grad_norm": 1.2288445303830322, "learning_rate": 1.5129270099884403e-05, "loss": 0.5841, "step": 4495 }, { "epoch": 0.35, "grad_norm": 1.54702295693919, "learning_rate": 1.5127112980164655e-05, "loss": 0.6066, "step": 4496 }, { "epoch": 0.35, "grad_norm": 1.113243236227367, "learning_rate": 1.512495553674381e-05, "loss": 0.591, "step": 4497 }, { "epoch": 0.35, "grad_norm": 1.1566739207341692, "learning_rate": 1.5122797769758081e-05, "loss": 0.5457, "step": 4498 }, { "epoch": 0.35, "grad_norm": 1.0650606154264042, "learning_rate": 1.5120639679343702e-05, "loss": 0.5679, "step": 4499 }, { "epoch": 0.35, "grad_norm": 1.1669668027279647, "learning_rate": 1.5118481265636917e-05, "loss": 0.5665, "step": 4500 }, { "epoch": 0.35, "grad_norm": 1.2619587106131802, "learning_rate": 1.5116322528774005e-05, "loss": 0.5949, "step": 4501 }, { "epoch": 0.35, "grad_norm": 1.2086465144706862, "learning_rate": 1.5114163468891252e-05, "loss": 0.5736, "step": 4502 }, { "epoch": 0.35, "grad_norm": 1.182223795596568, "learning_rate": 1.5112004086124976e-05, "loss": 0.5736, "step": 4503 }, { "epoch": 0.35, "grad_norm": 1.1786402418021102, "learning_rate": 1.5109844380611506e-05, "loss": 0.5935, "step": 4504 }, { "epoch": 0.35, "grad_norm": 1.2747217267363526, "learning_rate": 1.51076843524872e-05, "loss": 0.5938, "step": 4505 }, { "epoch": 0.35, "grad_norm": 1.2519558863378772, "learning_rate": 1.5105524001888425e-05, "loss": 0.6118, "step": 4506 }, { "epoch": 0.35, "grad_norm": 1.2270405317698923, "learning_rate": 1.510336332895158e-05, "loss": 0.6242, "step": 4507 }, { "epoch": 0.35, "grad_norm": 1.1195539473517495, "learning_rate": 1.5101202333813078e-05, "loss": 0.5322, "step": 4508 }, { "epoch": 0.35, "grad_norm": 1.1557193775745187, "learning_rate": 1.5099041016609355e-05, "loss": 0.5888, "step": 4509 }, { "epoch": 0.35, "grad_norm": 1.1709091784165144, "learning_rate": 1.5096879377476864e-05, "loss": 0.5435, "step": 4510 }, { "epoch": 0.35, "grad_norm": 1.1386916615253955, "learning_rate": 1.509471741655208e-05, "loss": 0.5176, "step": 4511 }, { "epoch": 0.35, "grad_norm": 1.200871326093473, "learning_rate": 1.5092555133971502e-05, "loss": 0.6216, "step": 4512 }, { "epoch": 0.35, "grad_norm": 1.2124918652291095, "learning_rate": 1.5090392529871645e-05, "loss": 0.5986, "step": 4513 }, { "epoch": 0.35, "grad_norm": 1.1511681200994655, "learning_rate": 1.5088229604389045e-05, "loss": 0.5848, "step": 4514 }, { "epoch": 0.35, "grad_norm": 1.2295573901278383, "learning_rate": 1.5086066357660255e-05, "loss": 0.5798, "step": 4515 }, { "epoch": 0.35, "grad_norm": 1.0988644504068925, "learning_rate": 1.5083902789821854e-05, "loss": 0.6043, "step": 4516 }, { "epoch": 0.35, "grad_norm": 1.225616819440978, "learning_rate": 1.5081738901010446e-05, "loss": 0.5765, "step": 4517 }, { "epoch": 0.35, "grad_norm": 1.1822751698064238, "learning_rate": 1.507957469136264e-05, "loss": 0.6063, "step": 4518 }, { "epoch": 0.35, "grad_norm": 1.113194244732716, "learning_rate": 1.5077410161015078e-05, "loss": 0.5454, "step": 4519 }, { "epoch": 0.35, "grad_norm": 1.2642688785506542, "learning_rate": 1.5075245310104414e-05, "loss": 0.6169, "step": 4520 }, { "epoch": 0.35, "grad_norm": 1.1296981621076856, "learning_rate": 1.507308013876733e-05, "loss": 0.5632, "step": 4521 }, { "epoch": 0.35, "grad_norm": 1.2368674405152718, "learning_rate": 1.5070914647140522e-05, "loss": 0.6307, "step": 4522 }, { "epoch": 0.35, "grad_norm": 1.2112597129125278, "learning_rate": 1.5068748835360713e-05, "loss": 0.5861, "step": 4523 }, { "epoch": 0.35, "grad_norm": 1.219331749208178, "learning_rate": 1.5066582703564638e-05, "loss": 0.6005, "step": 4524 }, { "epoch": 0.35, "grad_norm": 1.2052131960459438, "learning_rate": 1.5064416251889053e-05, "loss": 0.6435, "step": 4525 }, { "epoch": 0.35, "grad_norm": 1.123783566543255, "learning_rate": 1.5062249480470742e-05, "loss": 0.5214, "step": 4526 }, { "epoch": 0.35, "grad_norm": 1.1489162355505493, "learning_rate": 1.5060082389446509e-05, "loss": 0.5995, "step": 4527 }, { "epoch": 0.35, "grad_norm": 1.1174787128477695, "learning_rate": 1.5057914978953166e-05, "loss": 0.5736, "step": 4528 }, { "epoch": 0.35, "grad_norm": 1.1073052884852255, "learning_rate": 1.5055747249127552e-05, "loss": 0.611, "step": 4529 }, { "epoch": 0.35, "grad_norm": 1.0962024985518966, "learning_rate": 1.5053579200106531e-05, "loss": 0.5769, "step": 4530 }, { "epoch": 0.35, "grad_norm": 1.175217756425074, "learning_rate": 1.505141083202698e-05, "loss": 0.5486, "step": 4531 }, { "epoch": 0.35, "grad_norm": 1.1204931797124718, "learning_rate": 1.5049242145025806e-05, "loss": 0.6081, "step": 4532 }, { "epoch": 0.35, "grad_norm": 1.1347182981303137, "learning_rate": 1.5047073139239922e-05, "loss": 0.6092, "step": 4533 }, { "epoch": 0.35, "grad_norm": 1.2278380352344656, "learning_rate": 1.5044903814806273e-05, "loss": 0.5842, "step": 4534 }, { "epoch": 0.35, "grad_norm": 1.2165225777590694, "learning_rate": 1.5042734171861815e-05, "loss": 0.5976, "step": 4535 }, { "epoch": 0.35, "grad_norm": 1.194584843173679, "learning_rate": 1.5040564210543532e-05, "loss": 0.5858, "step": 4536 }, { "epoch": 0.35, "grad_norm": 1.0587298879323264, "learning_rate": 1.5038393930988426e-05, "loss": 0.5562, "step": 4537 }, { "epoch": 0.35, "grad_norm": 1.2387121756405723, "learning_rate": 1.5036223333333517e-05, "loss": 0.6154, "step": 4538 }, { "epoch": 0.35, "grad_norm": 1.1080072056987036, "learning_rate": 1.5034052417715846e-05, "loss": 0.6061, "step": 4539 }, { "epoch": 0.35, "grad_norm": 1.07299707475603, "learning_rate": 1.503188118427247e-05, "loss": 0.4944, "step": 4540 }, { "epoch": 0.35, "grad_norm": 1.1760255173328118, "learning_rate": 1.5029709633140476e-05, "loss": 0.5106, "step": 4541 }, { "epoch": 0.35, "grad_norm": 1.1956653448178554, "learning_rate": 1.5027537764456963e-05, "loss": 0.5875, "step": 4542 }, { "epoch": 0.35, "grad_norm": 1.1648337862035085, "learning_rate": 1.5025365578359053e-05, "loss": 0.5466, "step": 4543 }, { "epoch": 0.35, "grad_norm": 1.237878679848045, "learning_rate": 1.5023193074983886e-05, "loss": 0.6518, "step": 4544 }, { "epoch": 0.35, "grad_norm": 1.07618068130684, "learning_rate": 1.5021020254468623e-05, "loss": 0.5112, "step": 4545 }, { "epoch": 0.35, "grad_norm": 1.1895439727114845, "learning_rate": 1.5018847116950445e-05, "loss": 0.6323, "step": 4546 }, { "epoch": 0.35, "grad_norm": 1.193688529863557, "learning_rate": 1.5016673662566558e-05, "loss": 0.5816, "step": 4547 }, { "epoch": 0.35, "grad_norm": 1.2262444539387036, "learning_rate": 1.501449989145418e-05, "loss": 0.6077, "step": 4548 }, { "epoch": 0.35, "grad_norm": 1.1452765528097446, "learning_rate": 1.501232580375055e-05, "loss": 0.6007, "step": 4549 }, { "epoch": 0.35, "grad_norm": 1.173722857485436, "learning_rate": 1.5010151399592934e-05, "loss": 0.5908, "step": 4550 }, { "epoch": 0.35, "grad_norm": 1.285188204939091, "learning_rate": 1.500797667911861e-05, "loss": 0.6214, "step": 4551 }, { "epoch": 0.35, "grad_norm": 1.233078384253019, "learning_rate": 1.5005801642464879e-05, "loss": 0.57, "step": 4552 }, { "epoch": 0.35, "grad_norm": 1.1913349568032714, "learning_rate": 1.5003626289769066e-05, "loss": 0.6218, "step": 4553 }, { "epoch": 0.35, "grad_norm": 1.1941912010496085, "learning_rate": 1.5001450621168507e-05, "loss": 0.5583, "step": 4554 }, { "epoch": 0.35, "grad_norm": 1.332888106675841, "learning_rate": 1.4999274636800572e-05, "loss": 0.6261, "step": 4555 }, { "epoch": 0.35, "grad_norm": 1.1712951751595282, "learning_rate": 1.4997098336802631e-05, "loss": 0.5899, "step": 4556 }, { "epoch": 0.35, "grad_norm": 1.0782279919068292, "learning_rate": 1.4994921721312092e-05, "loss": 0.5431, "step": 4557 }, { "epoch": 0.35, "grad_norm": 1.128441790961086, "learning_rate": 1.4992744790466376e-05, "loss": 0.5064, "step": 4558 }, { "epoch": 0.35, "grad_norm": 1.3008492025319331, "learning_rate": 1.4990567544402918e-05, "loss": 0.6312, "step": 4559 }, { "epoch": 0.35, "grad_norm": 1.18534670904363, "learning_rate": 1.4988389983259188e-05, "loss": 0.537, "step": 4560 }, { "epoch": 0.35, "grad_norm": 1.1484538485862017, "learning_rate": 1.4986212107172658e-05, "loss": 0.5871, "step": 4561 }, { "epoch": 0.35, "grad_norm": 1.1217149349349158, "learning_rate": 1.4984033916280833e-05, "loss": 0.5938, "step": 4562 }, { "epoch": 0.35, "grad_norm": 1.1819202943350071, "learning_rate": 1.4981855410721236e-05, "loss": 0.5871, "step": 4563 }, { "epoch": 0.35, "grad_norm": 1.1638147999457185, "learning_rate": 1.4979676590631398e-05, "loss": 0.6018, "step": 4564 }, { "epoch": 0.35, "grad_norm": 1.1778014813942903, "learning_rate": 1.4977497456148891e-05, "loss": 0.5923, "step": 4565 }, { "epoch": 0.35, "grad_norm": 1.099815986153892, "learning_rate": 1.4975318007411284e-05, "loss": 0.507, "step": 4566 }, { "epoch": 0.35, "grad_norm": 1.0380142775713128, "learning_rate": 1.4973138244556184e-05, "loss": 0.482, "step": 4567 }, { "epoch": 0.35, "grad_norm": 1.217544203705804, "learning_rate": 1.497095816772121e-05, "loss": 0.6523, "step": 4568 }, { "epoch": 0.35, "grad_norm": 1.1779864849552317, "learning_rate": 1.4968777777043997e-05, "loss": 0.5472, "step": 4569 }, { "epoch": 0.35, "grad_norm": 1.189498424450415, "learning_rate": 1.496659707266221e-05, "loss": 0.538, "step": 4570 }, { "epoch": 0.35, "grad_norm": 1.19365597299684, "learning_rate": 1.4964416054713525e-05, "loss": 0.5788, "step": 4571 }, { "epoch": 0.35, "grad_norm": 1.2080286836787093, "learning_rate": 1.4962234723335642e-05, "loss": 0.5859, "step": 4572 }, { "epoch": 0.35, "grad_norm": 1.1123784738236282, "learning_rate": 1.4960053078666278e-05, "loss": 0.5111, "step": 4573 }, { "epoch": 0.35, "grad_norm": 1.134893886126242, "learning_rate": 1.4957871120843172e-05, "loss": 0.5944, "step": 4574 }, { "epoch": 0.35, "grad_norm": 1.1983506729523272, "learning_rate": 1.4955688850004087e-05, "loss": 0.5461, "step": 4575 }, { "epoch": 0.35, "grad_norm": 1.2331431072469832, "learning_rate": 1.495350626628679e-05, "loss": 0.6081, "step": 4576 }, { "epoch": 0.36, "grad_norm": 1.217480756619316, "learning_rate": 1.4951323369829091e-05, "loss": 0.5024, "step": 4577 }, { "epoch": 0.36, "grad_norm": 1.130900275194096, "learning_rate": 1.4949140160768803e-05, "loss": 0.5241, "step": 4578 }, { "epoch": 0.36, "grad_norm": 1.0714250893763317, "learning_rate": 1.4946956639243757e-05, "loss": 0.5332, "step": 4579 }, { "epoch": 0.36, "grad_norm": 1.1859485128140161, "learning_rate": 1.4944772805391821e-05, "loss": 0.5833, "step": 4580 }, { "epoch": 0.36, "grad_norm": 1.1704996812405288, "learning_rate": 1.4942588659350863e-05, "loss": 0.5798, "step": 4581 }, { "epoch": 0.36, "grad_norm": 1.1671876879261687, "learning_rate": 1.4940404201258782e-05, "loss": 0.5772, "step": 4582 }, { "epoch": 0.36, "grad_norm": 1.2348541102553379, "learning_rate": 1.4938219431253499e-05, "loss": 0.6385, "step": 4583 }, { "epoch": 0.36, "grad_norm": 1.1598463061886657, "learning_rate": 1.4936034349472941e-05, "loss": 0.5616, "step": 4584 }, { "epoch": 0.36, "grad_norm": 1.1316569781743098, "learning_rate": 1.4933848956055068e-05, "loss": 0.5603, "step": 4585 }, { "epoch": 0.36, "grad_norm": 1.1578602789983201, "learning_rate": 1.4931663251137856e-05, "loss": 0.5509, "step": 4586 }, { "epoch": 0.36, "grad_norm": 1.0901857885334052, "learning_rate": 1.4929477234859299e-05, "loss": 0.5413, "step": 4587 }, { "epoch": 0.36, "grad_norm": 1.1502188536504823, "learning_rate": 1.4927290907357415e-05, "loss": 0.5587, "step": 4588 }, { "epoch": 0.36, "grad_norm": 1.2573429439594714, "learning_rate": 1.4925104268770227e-05, "loss": 0.6477, "step": 4589 }, { "epoch": 0.36, "grad_norm": 1.0957070416875185, "learning_rate": 1.4922917319235804e-05, "loss": 0.5368, "step": 4590 }, { "epoch": 0.36, "grad_norm": 1.1902997491283291, "learning_rate": 1.4920730058892205e-05, "loss": 0.585, "step": 4591 }, { "epoch": 0.36, "grad_norm": 1.2319059183623426, "learning_rate": 1.4918542487877535e-05, "loss": 0.592, "step": 4592 }, { "epoch": 0.36, "grad_norm": 1.1735340327596926, "learning_rate": 1.49163546063299e-05, "loss": 0.5087, "step": 4593 }, { "epoch": 0.36, "grad_norm": 1.2132498045000901, "learning_rate": 1.4914166414387433e-05, "loss": 0.5851, "step": 4594 }, { "epoch": 0.36, "grad_norm": 1.2265314960813345, "learning_rate": 1.4911977912188284e-05, "loss": 0.5568, "step": 4595 }, { "epoch": 0.36, "grad_norm": 1.0343460078944768, "learning_rate": 1.490978909987063e-05, "loss": 0.5331, "step": 4596 }, { "epoch": 0.36, "grad_norm": 1.2338777820307305, "learning_rate": 1.4907599977572659e-05, "loss": 0.586, "step": 4597 }, { "epoch": 0.36, "grad_norm": 1.1826844200219562, "learning_rate": 1.490541054543258e-05, "loss": 0.5681, "step": 4598 }, { "epoch": 0.36, "grad_norm": 1.2097961954030196, "learning_rate": 1.4903220803588627e-05, "loss": 0.5671, "step": 4599 }, { "epoch": 0.36, "grad_norm": 1.2305487228320586, "learning_rate": 1.4901030752179044e-05, "loss": 0.6165, "step": 4600 }, { "epoch": 0.36, "grad_norm": 1.2404697945702465, "learning_rate": 1.4898840391342107e-05, "loss": 0.5561, "step": 4601 }, { "epoch": 0.36, "grad_norm": 1.1278644757596286, "learning_rate": 1.4896649721216101e-05, "loss": 0.5668, "step": 4602 }, { "epoch": 0.36, "grad_norm": 1.0960607370093876, "learning_rate": 1.4894458741939333e-05, "loss": 0.585, "step": 4603 }, { "epoch": 0.36, "grad_norm": 1.1829674199794296, "learning_rate": 1.4892267453650133e-05, "loss": 0.5935, "step": 4604 }, { "epoch": 0.36, "grad_norm": 1.0172343487945301, "learning_rate": 1.4890075856486848e-05, "loss": 0.4999, "step": 4605 }, { "epoch": 0.36, "grad_norm": 1.2785751986238711, "learning_rate": 1.4887883950587845e-05, "loss": 0.5755, "step": 4606 }, { "epoch": 0.36, "grad_norm": 1.2564671115297674, "learning_rate": 1.488569173609151e-05, "loss": 0.5793, "step": 4607 }, { "epoch": 0.36, "grad_norm": 1.2780503118279807, "learning_rate": 1.488349921313625e-05, "loss": 0.5999, "step": 4608 }, { "epoch": 0.36, "grad_norm": 1.1265165914845163, "learning_rate": 1.4881306381860485e-05, "loss": 0.5823, "step": 4609 }, { "epoch": 0.36, "grad_norm": 1.1812260176227336, "learning_rate": 1.4879113242402668e-05, "loss": 0.5505, "step": 4610 }, { "epoch": 0.36, "grad_norm": 1.1186908663673416, "learning_rate": 1.4876919794901256e-05, "loss": 0.5666, "step": 4611 }, { "epoch": 0.36, "grad_norm": 1.1935460620456722, "learning_rate": 1.487472603949474e-05, "loss": 0.5874, "step": 4612 }, { "epoch": 0.36, "grad_norm": 1.2067492998309621, "learning_rate": 1.4872531976321619e-05, "loss": 0.655, "step": 4613 }, { "epoch": 0.36, "grad_norm": 1.104979174145839, "learning_rate": 1.4870337605520408e-05, "loss": 0.5547, "step": 4614 }, { "epoch": 0.36, "grad_norm": 1.0634184962482276, "learning_rate": 1.4868142927229662e-05, "loss": 0.5519, "step": 4615 }, { "epoch": 0.36, "grad_norm": 1.1808072264447613, "learning_rate": 1.4865947941587938e-05, "loss": 0.6282, "step": 4616 }, { "epoch": 0.36, "grad_norm": 1.2570182235355767, "learning_rate": 1.4863752648733812e-05, "loss": 0.5545, "step": 4617 }, { "epoch": 0.36, "grad_norm": 1.2856973361230202, "learning_rate": 1.486155704880589e-05, "loss": 0.675, "step": 4618 }, { "epoch": 0.36, "grad_norm": 1.3064167646339222, "learning_rate": 1.4859361141942788e-05, "loss": 0.6257, "step": 4619 }, { "epoch": 0.36, "grad_norm": 1.1636661136275848, "learning_rate": 1.4857164928283143e-05, "loss": 0.5525, "step": 4620 }, { "epoch": 0.36, "grad_norm": 1.2044451023162963, "learning_rate": 1.4854968407965621e-05, "loss": 0.589, "step": 4621 }, { "epoch": 0.36, "grad_norm": 1.4738054319768479, "learning_rate": 1.4852771581128895e-05, "loss": 0.6303, "step": 4622 }, { "epoch": 0.36, "grad_norm": 1.1010681558175648, "learning_rate": 1.4850574447911661e-05, "loss": 0.5883, "step": 4623 }, { "epoch": 0.36, "grad_norm": 1.1235184450659916, "learning_rate": 1.4848377008452635e-05, "loss": 0.5364, "step": 4624 }, { "epoch": 0.36, "grad_norm": 1.2718744338875525, "learning_rate": 1.4846179262890554e-05, "loss": 0.6048, "step": 4625 }, { "epoch": 0.36, "grad_norm": 1.236600294482748, "learning_rate": 1.4843981211364175e-05, "loss": 0.6051, "step": 4626 }, { "epoch": 0.36, "grad_norm": 1.1642824515615444, "learning_rate": 1.484178285401227e-05, "loss": 0.5463, "step": 4627 }, { "epoch": 0.36, "grad_norm": 1.2228783353986021, "learning_rate": 1.4839584190973633e-05, "loss": 0.5762, "step": 4628 }, { "epoch": 0.36, "grad_norm": 1.1734859837498324, "learning_rate": 1.4837385222387078e-05, "loss": 0.5542, "step": 4629 }, { "epoch": 0.36, "grad_norm": 1.0824503723820524, "learning_rate": 1.4835185948391433e-05, "loss": 0.6399, "step": 4630 }, { "epoch": 0.36, "grad_norm": 1.1092326650513769, "learning_rate": 1.4832986369125558e-05, "loss": 0.5604, "step": 4631 }, { "epoch": 0.36, "grad_norm": 1.3168809527247678, "learning_rate": 1.4830786484728315e-05, "loss": 0.6606, "step": 4632 }, { "epoch": 0.36, "grad_norm": 1.1675323385460907, "learning_rate": 1.4828586295338597e-05, "loss": 0.5924, "step": 4633 }, { "epoch": 0.36, "grad_norm": 1.1717783061507685, "learning_rate": 1.4826385801095315e-05, "loss": 0.5686, "step": 4634 }, { "epoch": 0.36, "grad_norm": 1.197491174956039, "learning_rate": 1.4824185002137396e-05, "loss": 0.5732, "step": 4635 }, { "epoch": 0.36, "grad_norm": 1.130614997140509, "learning_rate": 1.4821983898603791e-05, "loss": 0.5606, "step": 4636 }, { "epoch": 0.36, "grad_norm": 1.1701476527237085, "learning_rate": 1.4819782490633463e-05, "loss": 0.5743, "step": 4637 }, { "epoch": 0.36, "grad_norm": 1.1435780380301737, "learning_rate": 1.4817580778365396e-05, "loss": 0.5634, "step": 4638 }, { "epoch": 0.36, "grad_norm": 1.2038043692601252, "learning_rate": 1.4815378761938603e-05, "loss": 0.6005, "step": 4639 }, { "epoch": 0.36, "grad_norm": 1.1275125632252232, "learning_rate": 1.4813176441492104e-05, "loss": 0.5623, "step": 4640 }, { "epoch": 0.36, "grad_norm": 1.1594266813126217, "learning_rate": 1.4810973817164941e-05, "loss": 0.5531, "step": 4641 }, { "epoch": 0.36, "grad_norm": 1.2656768505636797, "learning_rate": 1.4808770889096184e-05, "loss": 0.6042, "step": 4642 }, { "epoch": 0.36, "grad_norm": 1.1705646564105916, "learning_rate": 1.4806567657424908e-05, "loss": 0.5571, "step": 4643 }, { "epoch": 0.36, "grad_norm": 1.1688436338075159, "learning_rate": 1.4804364122290217e-05, "loss": 0.5717, "step": 4644 }, { "epoch": 0.36, "grad_norm": 1.200153346992501, "learning_rate": 1.4802160283831233e-05, "loss": 0.5803, "step": 4645 }, { "epoch": 0.36, "grad_norm": 1.2668605942511664, "learning_rate": 1.4799956142187094e-05, "loss": 0.5679, "step": 4646 }, { "epoch": 0.36, "grad_norm": 1.3013203279983399, "learning_rate": 1.479775169749696e-05, "loss": 0.6632, "step": 4647 }, { "epoch": 0.36, "grad_norm": 1.156828297517851, "learning_rate": 1.4795546949900006e-05, "loss": 0.5417, "step": 4648 }, { "epoch": 0.36, "grad_norm": 1.1721538466257775, "learning_rate": 1.4793341899535434e-05, "loss": 0.5572, "step": 4649 }, { "epoch": 0.36, "grad_norm": 1.2127220540378292, "learning_rate": 1.4791136546542454e-05, "loss": 0.5855, "step": 4650 }, { "epoch": 0.36, "grad_norm": 1.2433614881223698, "learning_rate": 1.4788930891060307e-05, "loss": 0.6232, "step": 4651 }, { "epoch": 0.36, "grad_norm": 1.1928735921164484, "learning_rate": 1.4786724933228247e-05, "loss": 0.6087, "step": 4652 }, { "epoch": 0.36, "grad_norm": 1.23211027580133, "learning_rate": 1.4784518673185542e-05, "loss": 0.6004, "step": 4653 }, { "epoch": 0.36, "grad_norm": 1.157972907976523, "learning_rate": 1.478231211107149e-05, "loss": 0.5712, "step": 4654 }, { "epoch": 0.36, "grad_norm": 1.1700557574818624, "learning_rate": 1.47801052470254e-05, "loss": 0.6394, "step": 4655 }, { "epoch": 0.36, "grad_norm": 1.2900370295333148, "learning_rate": 1.4777898081186606e-05, "loss": 0.6146, "step": 4656 }, { "epoch": 0.36, "grad_norm": 1.2731685705505695, "learning_rate": 1.4775690613694453e-05, "loss": 0.6077, "step": 4657 }, { "epoch": 0.36, "grad_norm": 1.286985631618011, "learning_rate": 1.4773482844688313e-05, "loss": 0.6205, "step": 4658 }, { "epoch": 0.36, "grad_norm": 1.0994185016086377, "learning_rate": 1.4771274774307573e-05, "loss": 0.5765, "step": 4659 }, { "epoch": 0.36, "grad_norm": 1.2076429280106338, "learning_rate": 1.4769066402691641e-05, "loss": 0.6024, "step": 4660 }, { "epoch": 0.36, "grad_norm": 1.2295042102519795, "learning_rate": 1.476685772997994e-05, "loss": 0.6074, "step": 4661 }, { "epoch": 0.36, "grad_norm": 1.1001036161692197, "learning_rate": 1.476464875631192e-05, "loss": 0.5811, "step": 4662 }, { "epoch": 0.36, "grad_norm": 1.3983361830850933, "learning_rate": 1.4762439481827038e-05, "loss": 0.6713, "step": 4663 }, { "epoch": 0.36, "grad_norm": 1.1104922511634667, "learning_rate": 1.4760229906664782e-05, "loss": 0.5738, "step": 4664 }, { "epoch": 0.36, "grad_norm": 1.1672524389460859, "learning_rate": 1.4758020030964653e-05, "loss": 0.6111, "step": 4665 }, { "epoch": 0.36, "grad_norm": 1.2111903265080481, "learning_rate": 1.4755809854866172e-05, "loss": 0.6154, "step": 4666 }, { "epoch": 0.36, "grad_norm": 1.0708957948292044, "learning_rate": 1.4753599378508876e-05, "loss": 0.5897, "step": 4667 }, { "epoch": 0.36, "grad_norm": 1.1909672162711045, "learning_rate": 1.4751388602032326e-05, "loss": 0.585, "step": 4668 }, { "epoch": 0.36, "grad_norm": 1.3186552492787873, "learning_rate": 1.4749177525576102e-05, "loss": 0.5289, "step": 4669 }, { "epoch": 0.36, "grad_norm": 1.2254133247313488, "learning_rate": 1.4746966149279796e-05, "loss": 0.5357, "step": 4670 }, { "epoch": 0.36, "grad_norm": 1.212856765249875, "learning_rate": 1.4744754473283024e-05, "loss": 0.6066, "step": 4671 }, { "epoch": 0.36, "grad_norm": 1.2209873204334436, "learning_rate": 1.4742542497725428e-05, "loss": 0.5713, "step": 4672 }, { "epoch": 0.36, "grad_norm": 1.1066103052145848, "learning_rate": 1.4740330222746653e-05, "loss": 0.5507, "step": 4673 }, { "epoch": 0.36, "grad_norm": 1.056525938117344, "learning_rate": 1.4738117648486375e-05, "loss": 0.5389, "step": 4674 }, { "epoch": 0.36, "grad_norm": 1.242217777291066, "learning_rate": 1.473590477508428e-05, "loss": 0.6362, "step": 4675 }, { "epoch": 0.36, "grad_norm": 1.1351115607163516, "learning_rate": 1.4733691602680088e-05, "loss": 0.61, "step": 4676 }, { "epoch": 0.36, "grad_norm": 1.10468385687717, "learning_rate": 1.4731478131413519e-05, "loss": 0.5112, "step": 4677 }, { "epoch": 0.36, "grad_norm": 1.2354500350605704, "learning_rate": 1.4729264361424325e-05, "loss": 0.5876, "step": 4678 }, { "epoch": 0.36, "grad_norm": 1.1107857022348495, "learning_rate": 1.4727050292852272e-05, "loss": 0.6043, "step": 4679 }, { "epoch": 0.36, "grad_norm": 1.1257512445309525, "learning_rate": 1.4724835925837146e-05, "loss": 0.5526, "step": 4680 }, { "epoch": 0.36, "grad_norm": 1.3035727667008492, "learning_rate": 1.4722621260518752e-05, "loss": 0.5747, "step": 4681 }, { "epoch": 0.36, "grad_norm": 1.0787942440114195, "learning_rate": 1.4720406297036913e-05, "loss": 0.5057, "step": 4682 }, { "epoch": 0.36, "grad_norm": 1.2150950279279153, "learning_rate": 1.4718191035531468e-05, "loss": 0.5375, "step": 4683 }, { "epoch": 0.36, "grad_norm": 1.2089244229884872, "learning_rate": 1.471597547614228e-05, "loss": 0.5637, "step": 4684 }, { "epoch": 0.36, "grad_norm": 1.140700925625432, "learning_rate": 1.471375961900923e-05, "loss": 0.5832, "step": 4685 }, { "epoch": 0.36, "grad_norm": 1.1614544216939748, "learning_rate": 1.4711543464272218e-05, "loss": 0.5573, "step": 4686 }, { "epoch": 0.36, "grad_norm": 1.2904417577530933, "learning_rate": 1.4709327012071157e-05, "loss": 0.5754, "step": 4687 }, { "epoch": 0.36, "grad_norm": 1.1884137202052194, "learning_rate": 1.4707110262545983e-05, "loss": 0.5735, "step": 4688 }, { "epoch": 0.36, "grad_norm": 1.3137574303268986, "learning_rate": 1.4704893215836653e-05, "loss": 0.6075, "step": 4689 }, { "epoch": 0.36, "grad_norm": 1.1424801515486995, "learning_rate": 1.4702675872083141e-05, "loss": 0.591, "step": 4690 }, { "epoch": 0.36, "grad_norm": 1.0066994723496627, "learning_rate": 1.470045823142544e-05, "loss": 0.5258, "step": 4691 }, { "epoch": 0.36, "grad_norm": 1.0826355386328324, "learning_rate": 1.469824029400356e-05, "loss": 0.554, "step": 4692 }, { "epoch": 0.36, "grad_norm": 1.171799059632538, "learning_rate": 1.4696022059957527e-05, "loss": 0.6025, "step": 4693 }, { "epoch": 0.36, "grad_norm": 1.1649420059078481, "learning_rate": 1.4693803529427393e-05, "loss": 0.5831, "step": 4694 }, { "epoch": 0.36, "grad_norm": 1.2874050234975287, "learning_rate": 1.469158470255323e-05, "loss": 0.5842, "step": 4695 }, { "epoch": 0.36, "grad_norm": 1.1068601445084842, "learning_rate": 1.4689365579475117e-05, "loss": 0.5143, "step": 4696 }, { "epoch": 0.36, "grad_norm": 1.2106024001596387, "learning_rate": 1.4687146160333162e-05, "loss": 0.582, "step": 4697 }, { "epoch": 0.36, "grad_norm": 1.1885202943628286, "learning_rate": 1.4684926445267485e-05, "loss": 0.5579, "step": 4698 }, { "epoch": 0.36, "grad_norm": 1.265046258163542, "learning_rate": 1.4682706434418229e-05, "loss": 0.6054, "step": 4699 }, { "epoch": 0.36, "grad_norm": 1.1653156064217927, "learning_rate": 1.468048612792556e-05, "loss": 0.6027, "step": 4700 }, { "epoch": 0.36, "grad_norm": 1.2136577446995616, "learning_rate": 1.467826552592965e-05, "loss": 0.584, "step": 4701 }, { "epoch": 0.36, "grad_norm": 1.1824699581112545, "learning_rate": 1.4676044628570707e-05, "loss": 0.5522, "step": 4702 }, { "epoch": 0.36, "grad_norm": 0.9981257338452493, "learning_rate": 1.4673823435988933e-05, "loss": 0.4888, "step": 4703 }, { "epoch": 0.36, "grad_norm": 1.2072119716422292, "learning_rate": 1.4671601948324577e-05, "loss": 0.5952, "step": 4704 }, { "epoch": 0.37, "grad_norm": 1.1882638983929963, "learning_rate": 1.4669380165717889e-05, "loss": 0.6087, "step": 4705 }, { "epoch": 0.37, "grad_norm": 1.216398121752131, "learning_rate": 1.4667158088309137e-05, "loss": 0.6106, "step": 4706 }, { "epoch": 0.37, "grad_norm": 1.187912919381898, "learning_rate": 1.4664935716238615e-05, "loss": 0.5989, "step": 4707 }, { "epoch": 0.37, "grad_norm": 1.1497304476083048, "learning_rate": 1.4662713049646637e-05, "loss": 0.6008, "step": 4708 }, { "epoch": 0.37, "grad_norm": 1.152337905093095, "learning_rate": 1.4660490088673525e-05, "loss": 0.56, "step": 4709 }, { "epoch": 0.37, "grad_norm": 1.1848559306485862, "learning_rate": 1.4658266833459629e-05, "loss": 0.5928, "step": 4710 }, { "epoch": 0.37, "grad_norm": 1.2017497835676911, "learning_rate": 1.4656043284145316e-05, "loss": 0.5771, "step": 4711 }, { "epoch": 0.37, "grad_norm": 1.1954164646787175, "learning_rate": 1.4653819440870965e-05, "loss": 0.5681, "step": 4712 }, { "epoch": 0.37, "grad_norm": 1.0869511326366013, "learning_rate": 1.4651595303776986e-05, "loss": 0.5519, "step": 4713 }, { "epoch": 0.37, "grad_norm": 1.0623309618054928, "learning_rate": 1.4649370873003794e-05, "loss": 0.5548, "step": 4714 }, { "epoch": 0.37, "grad_norm": 1.2850538404295495, "learning_rate": 1.4647146148691831e-05, "loss": 0.6135, "step": 4715 }, { "epoch": 0.37, "grad_norm": 1.1221894017593557, "learning_rate": 1.4644921130981558e-05, "loss": 0.5488, "step": 4716 }, { "epoch": 0.37, "grad_norm": 1.1878619395351635, "learning_rate": 1.4642695820013446e-05, "loss": 0.5899, "step": 4717 }, { "epoch": 0.37, "grad_norm": 1.2727229448034345, "learning_rate": 1.4640470215927998e-05, "loss": 0.6163, "step": 4718 }, { "epoch": 0.37, "grad_norm": 1.28863749578199, "learning_rate": 1.463824431886572e-05, "loss": 0.602, "step": 4719 }, { "epoch": 0.37, "grad_norm": 1.12852978616599, "learning_rate": 1.4636018128967149e-05, "loss": 0.5947, "step": 4720 }, { "epoch": 0.37, "grad_norm": 1.2562087835126583, "learning_rate": 1.4633791646372837e-05, "loss": 0.6434, "step": 4721 }, { "epoch": 0.37, "grad_norm": 1.373995761009685, "learning_rate": 1.4631564871223346e-05, "loss": 0.5608, "step": 4722 }, { "epoch": 0.37, "grad_norm": 1.223947982203224, "learning_rate": 1.4629337803659274e-05, "loss": 0.5634, "step": 4723 }, { "epoch": 0.37, "grad_norm": 1.1323658029570405, "learning_rate": 1.4627110443821217e-05, "loss": 0.6322, "step": 4724 }, { "epoch": 0.37, "grad_norm": 1.1576216011355829, "learning_rate": 1.462488279184981e-05, "loss": 0.5958, "step": 4725 }, { "epoch": 0.37, "grad_norm": 1.1545356722938178, "learning_rate": 1.4622654847885688e-05, "loss": 0.5311, "step": 4726 }, { "epoch": 0.37, "grad_norm": 1.1210583405435337, "learning_rate": 1.4620426612069519e-05, "loss": 0.5077, "step": 4727 }, { "epoch": 0.37, "grad_norm": 1.1855321942949153, "learning_rate": 1.4618198084541977e-05, "loss": 0.6277, "step": 4728 }, { "epoch": 0.37, "grad_norm": 1.137618190787337, "learning_rate": 1.4615969265443762e-05, "loss": 0.5715, "step": 4729 }, { "epoch": 0.37, "grad_norm": 1.088656308405371, "learning_rate": 1.4613740154915594e-05, "loss": 0.497, "step": 4730 }, { "epoch": 0.37, "grad_norm": 1.1511257652986857, "learning_rate": 1.4611510753098208e-05, "loss": 0.6052, "step": 4731 }, { "epoch": 0.37, "grad_norm": 1.2175385738896742, "learning_rate": 1.4609281060132352e-05, "loss": 0.5224, "step": 4732 }, { "epoch": 0.37, "grad_norm": 1.3204040495603833, "learning_rate": 1.4607051076158805e-05, "loss": 0.6724, "step": 4733 }, { "epoch": 0.37, "grad_norm": 1.2755219886974971, "learning_rate": 1.4604820801318351e-05, "loss": 0.5868, "step": 4734 }, { "epoch": 0.37, "grad_norm": 1.0580284608174968, "learning_rate": 1.4602590235751806e-05, "loss": 0.5534, "step": 4735 }, { "epoch": 0.37, "grad_norm": 1.1920072946805294, "learning_rate": 1.4600359379599992e-05, "loss": 0.5588, "step": 4736 }, { "epoch": 0.37, "grad_norm": 1.2067158110939797, "learning_rate": 1.4598128233003754e-05, "loss": 0.6165, "step": 4737 }, { "epoch": 0.37, "grad_norm": 1.1458799988462935, "learning_rate": 1.4595896796103959e-05, "loss": 0.5861, "step": 4738 }, { "epoch": 0.37, "grad_norm": 1.2734813565618301, "learning_rate": 1.4593665069041484e-05, "loss": 0.5554, "step": 4739 }, { "epoch": 0.37, "grad_norm": 1.0933440953884168, "learning_rate": 1.4591433051957237e-05, "loss": 0.5109, "step": 4740 }, { "epoch": 0.37, "grad_norm": 1.079024064401115, "learning_rate": 1.4589200744992134e-05, "loss": 0.5394, "step": 4741 }, { "epoch": 0.37, "grad_norm": 1.1387230156591666, "learning_rate": 1.4586968148287106e-05, "loss": 0.5086, "step": 4742 }, { "epoch": 0.37, "grad_norm": 1.1098926035158136, "learning_rate": 1.4584735261983118e-05, "loss": 0.5804, "step": 4743 }, { "epoch": 0.37, "grad_norm": 1.2295573901278383, "learning_rate": 1.4582502086221136e-05, "loss": 0.5668, "step": 4744 }, { "epoch": 0.37, "grad_norm": 1.1761754791295764, "learning_rate": 1.4580268621142155e-05, "loss": 0.5156, "step": 4745 }, { "epoch": 0.37, "grad_norm": 1.1185849925845492, "learning_rate": 1.4578034866887186e-05, "loss": 0.5891, "step": 4746 }, { "epoch": 0.37, "grad_norm": 1.169121979865239, "learning_rate": 1.4575800823597255e-05, "loss": 0.5588, "step": 4747 }, { "epoch": 0.37, "grad_norm": 1.2061909339951176, "learning_rate": 1.4573566491413409e-05, "loss": 0.6009, "step": 4748 }, { "epoch": 0.37, "grad_norm": 1.2298542244132797, "learning_rate": 1.4571331870476716e-05, "loss": 0.6244, "step": 4749 }, { "epoch": 0.37, "grad_norm": 1.2345690514344285, "learning_rate": 1.4569096960928255e-05, "loss": 0.5856, "step": 4750 }, { "epoch": 0.37, "grad_norm": 1.28085516451661, "learning_rate": 1.4566861762909133e-05, "loss": 0.596, "step": 4751 }, { "epoch": 0.37, "grad_norm": 1.1875397023038996, "learning_rate": 1.456462627656046e-05, "loss": 0.5694, "step": 4752 }, { "epoch": 0.37, "grad_norm": 1.2369710446810522, "learning_rate": 1.4562390502023384e-05, "loss": 0.6166, "step": 4753 }, { "epoch": 0.37, "grad_norm": 1.2610003903575175, "learning_rate": 1.4560154439439056e-05, "loss": 0.6153, "step": 4754 }, { "epoch": 0.37, "grad_norm": 1.1656802923674987, "learning_rate": 1.4557918088948652e-05, "loss": 0.5819, "step": 4755 }, { "epoch": 0.37, "grad_norm": 1.0626824166276674, "learning_rate": 1.4555681450693365e-05, "loss": 0.5174, "step": 4756 }, { "epoch": 0.37, "grad_norm": 1.0902383288288213, "learning_rate": 1.45534445248144e-05, "loss": 0.5509, "step": 4757 }, { "epoch": 0.37, "grad_norm": 1.2193939757324839, "learning_rate": 1.4551207311452991e-05, "loss": 0.5316, "step": 4758 }, { "epoch": 0.37, "grad_norm": 1.202331281154888, "learning_rate": 1.4548969810750382e-05, "loss": 0.6397, "step": 4759 }, { "epoch": 0.37, "grad_norm": 1.1999417588882453, "learning_rate": 1.454673202284784e-05, "loss": 0.5375, "step": 4760 }, { "epoch": 0.37, "grad_norm": 1.1422843881574958, "learning_rate": 1.4544493947886648e-05, "loss": 0.5942, "step": 4761 }, { "epoch": 0.37, "grad_norm": 1.162060956186703, "learning_rate": 1.4542255586008105e-05, "loss": 0.5818, "step": 4762 }, { "epoch": 0.37, "grad_norm": 1.1087109902281118, "learning_rate": 1.4540016937353531e-05, "loss": 0.5732, "step": 4763 }, { "epoch": 0.37, "grad_norm": 1.1766382686945234, "learning_rate": 1.4537778002064268e-05, "loss": 0.5504, "step": 4764 }, { "epoch": 0.37, "grad_norm": 1.1572959396233062, "learning_rate": 1.4535538780281666e-05, "loss": 0.5931, "step": 4765 }, { "epoch": 0.37, "grad_norm": 1.191828718482068, "learning_rate": 1.4533299272147103e-05, "loss": 0.5953, "step": 4766 }, { "epoch": 0.37, "grad_norm": 1.1207880008610662, "learning_rate": 1.4531059477801965e-05, "loss": 0.5145, "step": 4767 }, { "epoch": 0.37, "grad_norm": 1.1240205739877187, "learning_rate": 1.4528819397387663e-05, "loss": 0.4981, "step": 4768 }, { "epoch": 0.37, "grad_norm": 1.1965528547645943, "learning_rate": 1.4526579031045631e-05, "loss": 0.6224, "step": 4769 }, { "epoch": 0.37, "grad_norm": 1.2179431689930071, "learning_rate": 1.452433837891731e-05, "loss": 0.6308, "step": 4770 }, { "epoch": 0.37, "grad_norm": 1.1330384127846598, "learning_rate": 1.4522097441144166e-05, "loss": 0.5509, "step": 4771 }, { "epoch": 0.37, "grad_norm": 1.1060478931011515, "learning_rate": 1.4519856217867676e-05, "loss": 0.5299, "step": 4772 }, { "epoch": 0.37, "grad_norm": 1.1436129586922932, "learning_rate": 1.4517614709229345e-05, "loss": 0.5858, "step": 4773 }, { "epoch": 0.37, "grad_norm": 1.175322433628499, "learning_rate": 1.451537291537069e-05, "loss": 0.5401, "step": 4774 }, { "epoch": 0.37, "grad_norm": 1.2540039784978272, "learning_rate": 1.4513130836433247e-05, "loss": 0.6495, "step": 4775 }, { "epoch": 0.37, "grad_norm": 1.2930560125770585, "learning_rate": 1.451088847255857e-05, "loss": 0.6149, "step": 4776 }, { "epoch": 0.37, "grad_norm": 1.211144066696648, "learning_rate": 1.4508645823888228e-05, "loss": 0.6141, "step": 4777 }, { "epoch": 0.37, "grad_norm": 1.1302796559601922, "learning_rate": 1.4506402890563813e-05, "loss": 0.5816, "step": 4778 }, { "epoch": 0.37, "grad_norm": 1.2926972902345264, "learning_rate": 1.4504159672726937e-05, "loss": 0.6145, "step": 4779 }, { "epoch": 0.37, "grad_norm": 1.201346978551093, "learning_rate": 1.4501916170519221e-05, "loss": 0.5835, "step": 4780 }, { "epoch": 0.37, "grad_norm": 1.2820652949655547, "learning_rate": 1.4499672384082312e-05, "loss": 0.616, "step": 4781 }, { "epoch": 0.37, "grad_norm": 1.250056027110474, "learning_rate": 1.4497428313557866e-05, "loss": 0.6359, "step": 4782 }, { "epoch": 0.37, "grad_norm": 1.2056339904703068, "learning_rate": 1.449518395908757e-05, "loss": 0.581, "step": 4783 }, { "epoch": 0.37, "grad_norm": 1.0803945122823422, "learning_rate": 1.4492939320813117e-05, "loss": 0.5485, "step": 4784 }, { "epoch": 0.37, "grad_norm": 1.097617749390821, "learning_rate": 1.4490694398876228e-05, "loss": 0.5644, "step": 4785 }, { "epoch": 0.37, "grad_norm": 1.2280019586870559, "learning_rate": 1.448844919341863e-05, "loss": 0.5822, "step": 4786 }, { "epoch": 0.37, "grad_norm": 1.1724804649271054, "learning_rate": 1.4486203704582075e-05, "loss": 0.5991, "step": 4787 }, { "epoch": 0.37, "grad_norm": 1.2577953455922122, "learning_rate": 1.4483957932508338e-05, "loss": 0.6295, "step": 4788 }, { "epoch": 0.37, "grad_norm": 1.3325087958183344, "learning_rate": 1.4481711877339202e-05, "loss": 0.66, "step": 4789 }, { "epoch": 0.37, "grad_norm": 1.1686819699321465, "learning_rate": 1.447946553921647e-05, "loss": 0.5606, "step": 4790 }, { "epoch": 0.37, "grad_norm": 1.1877135034669553, "learning_rate": 1.4477218918281967e-05, "loss": 0.5913, "step": 4791 }, { "epoch": 0.37, "grad_norm": 1.0846278574643111, "learning_rate": 1.4474972014677537e-05, "loss": 0.5443, "step": 4792 }, { "epoch": 0.37, "grad_norm": 1.1014566573569242, "learning_rate": 1.4472724828545035e-05, "loss": 0.5775, "step": 4793 }, { "epoch": 0.37, "grad_norm": 1.0747883344345852, "learning_rate": 1.447047736002634e-05, "loss": 0.5655, "step": 4794 }, { "epoch": 0.37, "grad_norm": 1.2638569951254384, "learning_rate": 1.4468229609263343e-05, "loss": 0.592, "step": 4795 }, { "epoch": 0.37, "grad_norm": 1.0124472569044305, "learning_rate": 1.4465981576397957e-05, "loss": 0.5514, "step": 4796 }, { "epoch": 0.37, "grad_norm": 1.3247691223238744, "learning_rate": 1.4463733261572114e-05, "loss": 0.6501, "step": 4797 }, { "epoch": 0.37, "grad_norm": 1.1495173042491573, "learning_rate": 1.4461484664927758e-05, "loss": 0.6023, "step": 4798 }, { "epoch": 0.37, "grad_norm": 1.1215567347223456, "learning_rate": 1.4459235786606861e-05, "loss": 0.5399, "step": 4799 }, { "epoch": 0.37, "grad_norm": 1.2231811274309439, "learning_rate": 1.44569866267514e-05, "loss": 0.5888, "step": 4800 }, { "epoch": 0.37, "grad_norm": 1.2151362812078543, "learning_rate": 1.4454737185503375e-05, "loss": 0.5955, "step": 4801 }, { "epoch": 0.37, "grad_norm": 1.082043864593238, "learning_rate": 1.4452487463004815e-05, "loss": 0.5976, "step": 4802 }, { "epoch": 0.37, "grad_norm": 1.0975717532396327, "learning_rate": 1.4450237459397742e-05, "loss": 0.5471, "step": 4803 }, { "epoch": 0.37, "grad_norm": 1.2611125517280033, "learning_rate": 1.4447987174824225e-05, "loss": 0.5977, "step": 4804 }, { "epoch": 0.37, "grad_norm": 1.042119970235621, "learning_rate": 1.4445736609426324e-05, "loss": 0.5044, "step": 4805 }, { "epoch": 0.37, "grad_norm": 1.2066174634277038, "learning_rate": 1.4443485763346135e-05, "loss": 0.6184, "step": 4806 }, { "epoch": 0.37, "grad_norm": 1.081636763180403, "learning_rate": 1.4441234636725767e-05, "loss": 0.5276, "step": 4807 }, { "epoch": 0.37, "grad_norm": 1.1733889654817238, "learning_rate": 1.4438983229707338e-05, "loss": 0.5471, "step": 4808 }, { "epoch": 0.37, "grad_norm": 1.165812258980968, "learning_rate": 1.4436731542433e-05, "loss": 0.5866, "step": 4809 }, { "epoch": 0.37, "grad_norm": 1.277588847032594, "learning_rate": 1.4434479575044908e-05, "loss": 0.5726, "step": 4810 }, { "epoch": 0.37, "grad_norm": 1.3050374001591714, "learning_rate": 1.443222732768524e-05, "loss": 0.6454, "step": 4811 }, { "epoch": 0.37, "grad_norm": 1.185846331764837, "learning_rate": 1.4429974800496194e-05, "loss": 0.563, "step": 4812 }, { "epoch": 0.37, "grad_norm": 1.205603239378219, "learning_rate": 1.4427721993619983e-05, "loss": 0.5707, "step": 4813 }, { "epoch": 0.37, "grad_norm": 1.2266679949252222, "learning_rate": 1.4425468907198843e-05, "loss": 0.5924, "step": 4814 }, { "epoch": 0.37, "grad_norm": 1.149111387837983, "learning_rate": 1.4423215541375013e-05, "loss": 0.5273, "step": 4815 }, { "epoch": 0.37, "grad_norm": 1.1426260652836573, "learning_rate": 1.4420961896290764e-05, "loss": 0.5476, "step": 4816 }, { "epoch": 0.37, "grad_norm": 1.303687711930291, "learning_rate": 1.4418707972088386e-05, "loss": 0.7074, "step": 4817 }, { "epoch": 0.37, "grad_norm": 1.138059840926854, "learning_rate": 1.4416453768910173e-05, "loss": 0.5787, "step": 4818 }, { "epoch": 0.37, "grad_norm": 1.1330971721886915, "learning_rate": 1.4414199286898449e-05, "loss": 0.5522, "step": 4819 }, { "epoch": 0.37, "grad_norm": 1.2630363646925478, "learning_rate": 1.4411944526195551e-05, "loss": 0.608, "step": 4820 }, { "epoch": 0.37, "grad_norm": 1.1907944906550014, "learning_rate": 1.4409689486943829e-05, "loss": 0.5857, "step": 4821 }, { "epoch": 0.37, "grad_norm": 1.2335958315328632, "learning_rate": 1.4407434169285664e-05, "loss": 0.5955, "step": 4822 }, { "epoch": 0.37, "grad_norm": 1.1670974492439778, "learning_rate": 1.4405178573363435e-05, "loss": 0.5905, "step": 4823 }, { "epoch": 0.37, "grad_norm": 1.194352506209174, "learning_rate": 1.4402922699319557e-05, "loss": 0.5869, "step": 4824 }, { "epoch": 0.37, "grad_norm": 1.102264180816152, "learning_rate": 1.4400666547296456e-05, "loss": 0.5743, "step": 4825 }, { "epoch": 0.37, "grad_norm": 1.0537866206384232, "learning_rate": 1.4398410117436566e-05, "loss": 0.4935, "step": 4826 }, { "epoch": 0.37, "grad_norm": 1.1581988535604466, "learning_rate": 1.4396153409882356e-05, "loss": 0.5872, "step": 4827 }, { "epoch": 0.37, "grad_norm": 1.1849326941936331, "learning_rate": 1.4393896424776296e-05, "loss": 0.6047, "step": 4828 }, { "epoch": 0.37, "grad_norm": 1.1805735917053748, "learning_rate": 1.439163916226089e-05, "loss": 0.5915, "step": 4829 }, { "epoch": 0.37, "grad_norm": 1.1801905790891936, "learning_rate": 1.4389381622478644e-05, "loss": 0.5934, "step": 4830 }, { "epoch": 0.37, "grad_norm": 1.1340017456027796, "learning_rate": 1.438712380557209e-05, "loss": 0.5643, "step": 4831 }, { "epoch": 0.37, "grad_norm": 1.1279649869341102, "learning_rate": 1.4384865711683778e-05, "loss": 0.5166, "step": 4832 }, { "epoch": 0.37, "grad_norm": 1.099863189078922, "learning_rate": 1.4382607340956265e-05, "loss": 0.5906, "step": 4833 }, { "epoch": 0.38, "grad_norm": 1.1926667596270755, "learning_rate": 1.4380348693532144e-05, "loss": 0.6094, "step": 4834 }, { "epoch": 0.38, "grad_norm": 1.2270666167713262, "learning_rate": 1.4378089769554009e-05, "loss": 0.6197, "step": 4835 }, { "epoch": 0.38, "grad_norm": 1.2928307220450972, "learning_rate": 1.4375830569164478e-05, "loss": 0.6853, "step": 4836 }, { "epoch": 0.38, "grad_norm": 1.1981011566511857, "learning_rate": 1.4373571092506189e-05, "loss": 0.5279, "step": 4837 }, { "epoch": 0.38, "grad_norm": 1.0756776093512608, "learning_rate": 1.437131133972179e-05, "loss": 0.4926, "step": 4838 }, { "epoch": 0.38, "grad_norm": 1.200482029263595, "learning_rate": 1.4369051310953954e-05, "loss": 0.5534, "step": 4839 }, { "epoch": 0.38, "grad_norm": 1.1552876256360114, "learning_rate": 1.436679100634537e-05, "loss": 0.5862, "step": 4840 }, { "epoch": 0.38, "grad_norm": 1.2420653277122962, "learning_rate": 1.4364530426038734e-05, "loss": 0.6291, "step": 4841 }, { "epoch": 0.38, "grad_norm": 1.114312908605802, "learning_rate": 1.436226957017678e-05, "loss": 0.5336, "step": 4842 }, { "epoch": 0.38, "grad_norm": 1.1131179422093747, "learning_rate": 1.436000843890224e-05, "loss": 0.5634, "step": 4843 }, { "epoch": 0.38, "grad_norm": 1.042571545805598, "learning_rate": 1.435774703235787e-05, "loss": 0.5255, "step": 4844 }, { "epoch": 0.38, "grad_norm": 1.1318485762883639, "learning_rate": 1.4355485350686449e-05, "loss": 0.5397, "step": 4845 }, { "epoch": 0.38, "grad_norm": 1.2434428847034074, "learning_rate": 1.4353223394030767e-05, "loss": 0.6402, "step": 4846 }, { "epoch": 0.38, "grad_norm": 1.1766315819874225, "learning_rate": 1.4350961162533627e-05, "loss": 0.5766, "step": 4847 }, { "epoch": 0.38, "grad_norm": 1.2268830384716451, "learning_rate": 1.434869865633787e-05, "loss": 0.5781, "step": 4848 }, { "epoch": 0.38, "grad_norm": 1.2116030433056135, "learning_rate": 1.4346435875586324e-05, "loss": 0.6044, "step": 4849 }, { "epoch": 0.38, "grad_norm": 1.1843054114358138, "learning_rate": 1.434417282042186e-05, "loss": 0.5525, "step": 4850 }, { "epoch": 0.38, "grad_norm": 1.1525653690658018, "learning_rate": 1.434190949098735e-05, "loss": 0.5536, "step": 4851 }, { "epoch": 0.38, "grad_norm": 1.2550317104482467, "learning_rate": 1.4339645887425693e-05, "loss": 0.6192, "step": 4852 }, { "epoch": 0.38, "grad_norm": 1.2371649779919311, "learning_rate": 1.4337382009879806e-05, "loss": 0.5949, "step": 4853 }, { "epoch": 0.38, "grad_norm": 1.1263838839364222, "learning_rate": 1.433511785849261e-05, "loss": 0.5938, "step": 4854 }, { "epoch": 0.38, "grad_norm": 1.234956628447132, "learning_rate": 1.433285343340706e-05, "loss": 0.627, "step": 4855 }, { "epoch": 0.38, "grad_norm": 1.0569472229484256, "learning_rate": 1.4330588734766113e-05, "loss": 0.5622, "step": 4856 }, { "epoch": 0.38, "grad_norm": 1.2879857045262912, "learning_rate": 1.432832376271276e-05, "loss": 0.5922, "step": 4857 }, { "epoch": 0.38, "grad_norm": 1.107385759252496, "learning_rate": 1.4326058517389998e-05, "loss": 0.5996, "step": 4858 }, { "epoch": 0.38, "grad_norm": 1.2098262487143545, "learning_rate": 1.432379299894084e-05, "loss": 0.5246, "step": 4859 }, { "epoch": 0.38, "grad_norm": 1.183172220133152, "learning_rate": 1.4321527207508324e-05, "loss": 0.6075, "step": 4860 }, { "epoch": 0.38, "grad_norm": 1.2062395579622367, "learning_rate": 1.4319261143235496e-05, "loss": 0.6014, "step": 4861 }, { "epoch": 0.38, "grad_norm": 1.2227280080779541, "learning_rate": 1.431699480626543e-05, "loss": 0.6177, "step": 4862 }, { "epoch": 0.38, "grad_norm": 1.1390181942291235, "learning_rate": 1.431472819674121e-05, "loss": 0.6039, "step": 4863 }, { "epoch": 0.38, "grad_norm": 1.2663982054580196, "learning_rate": 1.4312461314805936e-05, "loss": 0.6207, "step": 4864 }, { "epoch": 0.38, "grad_norm": 1.099522588906349, "learning_rate": 1.431019416060273e-05, "loss": 0.5091, "step": 4865 }, { "epoch": 0.38, "grad_norm": 1.1178514868272384, "learning_rate": 1.430792673427473e-05, "loss": 0.5329, "step": 4866 }, { "epoch": 0.38, "grad_norm": 1.1970950529430149, "learning_rate": 1.4305659035965087e-05, "loss": 0.5721, "step": 4867 }, { "epoch": 0.38, "grad_norm": 1.1873983038472684, "learning_rate": 1.430339106581698e-05, "loss": 0.5644, "step": 4868 }, { "epoch": 0.38, "grad_norm": 1.2634657819980597, "learning_rate": 1.4301122823973588e-05, "loss": 0.5808, "step": 4869 }, { "epoch": 0.38, "grad_norm": 1.1288353392825703, "learning_rate": 1.429885431057812e-05, "loss": 0.5662, "step": 4870 }, { "epoch": 0.38, "grad_norm": 1.1901173608272502, "learning_rate": 1.4296585525773803e-05, "loss": 0.6045, "step": 4871 }, { "epoch": 0.38, "grad_norm": 1.1990183808872086, "learning_rate": 1.4294316469703873e-05, "loss": 0.5554, "step": 4872 }, { "epoch": 0.38, "grad_norm": 1.162629954960051, "learning_rate": 1.429204714251159e-05, "loss": 0.5556, "step": 4873 }, { "epoch": 0.38, "grad_norm": 1.0618223946727976, "learning_rate": 1.4289777544340227e-05, "loss": 0.6083, "step": 4874 }, { "epoch": 0.38, "grad_norm": 1.2307555818030167, "learning_rate": 1.4287507675333075e-05, "loss": 0.6497, "step": 4875 }, { "epoch": 0.38, "grad_norm": 1.1254117530076173, "learning_rate": 1.4285237535633442e-05, "loss": 0.5986, "step": 4876 }, { "epoch": 0.38, "grad_norm": 1.1017090179182312, "learning_rate": 1.4282967125384652e-05, "loss": 0.5943, "step": 4877 }, { "epoch": 0.38, "grad_norm": 1.1277232059550661, "learning_rate": 1.4280696444730057e-05, "loss": 0.5673, "step": 4878 }, { "epoch": 0.38, "grad_norm": 1.217047553776633, "learning_rate": 1.4278425493813004e-05, "loss": 0.6337, "step": 4879 }, { "epoch": 0.38, "grad_norm": 1.1508302739015464, "learning_rate": 1.4276154272776876e-05, "loss": 0.5765, "step": 4880 }, { "epoch": 0.38, "grad_norm": 1.1430070642501653, "learning_rate": 1.4273882781765069e-05, "loss": 0.5336, "step": 4881 }, { "epoch": 0.38, "grad_norm": 1.0953040390667381, "learning_rate": 1.427161102092099e-05, "loss": 0.5424, "step": 4882 }, { "epoch": 0.38, "grad_norm": 1.2456077177893994, "learning_rate": 1.4269338990388072e-05, "loss": 0.5926, "step": 4883 }, { "epoch": 0.38, "grad_norm": 1.157212140601062, "learning_rate": 1.4267066690309754e-05, "loss": 0.5766, "step": 4884 }, { "epoch": 0.38, "grad_norm": 1.2686799936758892, "learning_rate": 1.4264794120829499e-05, "loss": 0.6392, "step": 4885 }, { "epoch": 0.38, "grad_norm": 1.3063366455106868, "learning_rate": 1.4262521282090791e-05, "loss": 0.6272, "step": 4886 }, { "epoch": 0.38, "grad_norm": 1.0878681064103706, "learning_rate": 1.4260248174237121e-05, "loss": 0.6112, "step": 4887 }, { "epoch": 0.38, "grad_norm": 1.1843401377838811, "learning_rate": 1.4257974797412006e-05, "loss": 0.6256, "step": 4888 }, { "epoch": 0.38, "grad_norm": 1.2296357256642696, "learning_rate": 1.4255701151758972e-05, "loss": 0.6256, "step": 4889 }, { "epoch": 0.38, "grad_norm": 1.0200358344309548, "learning_rate": 1.4253427237421567e-05, "loss": 0.5213, "step": 4890 }, { "epoch": 0.38, "grad_norm": 1.2829383797628846, "learning_rate": 1.4251153054543357e-05, "loss": 0.6187, "step": 4891 }, { "epoch": 0.38, "grad_norm": 1.0860861806834865, "learning_rate": 1.4248878603267922e-05, "loss": 0.5749, "step": 4892 }, { "epoch": 0.38, "grad_norm": 1.1488703735842798, "learning_rate": 1.4246603883738859e-05, "loss": 0.6043, "step": 4893 }, { "epoch": 0.38, "grad_norm": 1.1044169568142737, "learning_rate": 1.4244328896099782e-05, "loss": 0.5685, "step": 4894 }, { "epoch": 0.38, "grad_norm": 1.1651589778762321, "learning_rate": 1.4242053640494322e-05, "loss": 0.5881, "step": 4895 }, { "epoch": 0.38, "grad_norm": 1.1452661960291812, "learning_rate": 1.4239778117066132e-05, "loss": 0.5642, "step": 4896 }, { "epoch": 0.38, "grad_norm": 1.048361903732733, "learning_rate": 1.4237502325958876e-05, "loss": 0.5737, "step": 4897 }, { "epoch": 0.38, "grad_norm": 1.2277367189186532, "learning_rate": 1.4235226267316234e-05, "loss": 0.5551, "step": 4898 }, { "epoch": 0.38, "grad_norm": 1.3070099313004027, "learning_rate": 1.4232949941281908e-05, "loss": 0.6612, "step": 4899 }, { "epoch": 0.38, "grad_norm": 1.2529759268430862, "learning_rate": 1.4230673347999612e-05, "loss": 0.627, "step": 4900 }, { "epoch": 0.38, "grad_norm": 1.2075407563907425, "learning_rate": 1.4228396487613081e-05, "loss": 0.6432, "step": 4901 }, { "epoch": 0.38, "grad_norm": 1.125172972627046, "learning_rate": 1.4226119360266065e-05, "loss": 0.5594, "step": 4902 }, { "epoch": 0.38, "grad_norm": 1.3071368861676242, "learning_rate": 1.4223841966102327e-05, "loss": 0.6807, "step": 4903 }, { "epoch": 0.38, "grad_norm": 1.0771641526930442, "learning_rate": 1.4221564305265657e-05, "loss": 0.56, "step": 4904 }, { "epoch": 0.38, "grad_norm": 1.1093863365776122, "learning_rate": 1.4219286377899848e-05, "loss": 0.5597, "step": 4905 }, { "epoch": 0.38, "grad_norm": 1.1987146069629033, "learning_rate": 1.4217008184148727e-05, "loss": 0.5724, "step": 4906 }, { "epoch": 0.38, "grad_norm": 1.3141783473854975, "learning_rate": 1.4214729724156118e-05, "loss": 0.6102, "step": 4907 }, { "epoch": 0.38, "grad_norm": 1.2733609697245454, "learning_rate": 1.4212450998065876e-05, "loss": 0.6545, "step": 4908 }, { "epoch": 0.38, "grad_norm": 1.1138952870765229, "learning_rate": 1.4210172006021872e-05, "loss": 0.5782, "step": 4909 }, { "epoch": 0.38, "grad_norm": 1.0836727759843126, "learning_rate": 1.4207892748167985e-05, "loss": 0.5971, "step": 4910 }, { "epoch": 0.38, "grad_norm": 1.1116477359225532, "learning_rate": 1.420561322464812e-05, "loss": 0.5668, "step": 4911 }, { "epoch": 0.38, "grad_norm": 1.188494466679667, "learning_rate": 1.4203333435606196e-05, "loss": 0.5782, "step": 4912 }, { "epoch": 0.38, "grad_norm": 1.276618728426569, "learning_rate": 1.4201053381186141e-05, "loss": 0.5927, "step": 4913 }, { "epoch": 0.38, "grad_norm": 1.1233440503553442, "learning_rate": 1.4198773061531917e-05, "loss": 0.5638, "step": 4914 }, { "epoch": 0.38, "grad_norm": 0.9997992612106881, "learning_rate": 1.4196492476787482e-05, "loss": 0.5276, "step": 4915 }, { "epoch": 0.38, "grad_norm": 1.2454187843217115, "learning_rate": 1.419421162709683e-05, "loss": 0.6094, "step": 4916 }, { "epoch": 0.38, "grad_norm": 1.1780997701113802, "learning_rate": 1.4191930512603956e-05, "loss": 0.5681, "step": 4917 }, { "epoch": 0.38, "grad_norm": 1.295072060645785, "learning_rate": 1.4189649133452881e-05, "loss": 0.5976, "step": 4918 }, { "epoch": 0.38, "grad_norm": 1.0245101200011653, "learning_rate": 1.4187367489787642e-05, "loss": 0.5122, "step": 4919 }, { "epoch": 0.38, "grad_norm": 1.2923113024446344, "learning_rate": 1.4185085581752289e-05, "loss": 0.5874, "step": 4920 }, { "epoch": 0.38, "grad_norm": 1.1116119719513675, "learning_rate": 1.4182803409490891e-05, "loss": 0.59, "step": 4921 }, { "epoch": 0.38, "grad_norm": 1.260335346887879, "learning_rate": 1.4180520973147534e-05, "loss": 0.6102, "step": 4922 }, { "epoch": 0.38, "grad_norm": 1.2737362283068865, "learning_rate": 1.4178238272866316e-05, "loss": 0.5511, "step": 4923 }, { "epoch": 0.38, "grad_norm": 1.1566248105818988, "learning_rate": 1.4175955308791363e-05, "loss": 0.5751, "step": 4924 }, { "epoch": 0.38, "grad_norm": 1.2336226476240444, "learning_rate": 1.4173672081066806e-05, "loss": 0.569, "step": 4925 }, { "epoch": 0.38, "grad_norm": 1.2475597881751865, "learning_rate": 1.4171388589836792e-05, "loss": 0.5867, "step": 4926 }, { "epoch": 0.38, "grad_norm": 1.0598683584090418, "learning_rate": 1.4169104835245502e-05, "loss": 0.5513, "step": 4927 }, { "epoch": 0.38, "grad_norm": 1.2302143621122017, "learning_rate": 1.416682081743711e-05, "loss": 0.6471, "step": 4928 }, { "epoch": 0.38, "grad_norm": 1.239611515825394, "learning_rate": 1.4164536536555824e-05, "loss": 0.5612, "step": 4929 }, { "epoch": 0.38, "grad_norm": 1.1506194063679558, "learning_rate": 1.4162251992745858e-05, "loss": 0.5476, "step": 4930 }, { "epoch": 0.38, "grad_norm": 1.1681051015205106, "learning_rate": 1.4159967186151446e-05, "loss": 0.6005, "step": 4931 }, { "epoch": 0.38, "grad_norm": 1.3163056377595899, "learning_rate": 1.4157682116916851e-05, "loss": 0.6112, "step": 4932 }, { "epoch": 0.38, "grad_norm": 1.256711584666228, "learning_rate": 1.415539678518633e-05, "loss": 0.6149, "step": 4933 }, { "epoch": 0.38, "grad_norm": 1.202299007899473, "learning_rate": 1.415311119110417e-05, "loss": 0.6107, "step": 4934 }, { "epoch": 0.38, "grad_norm": 1.3072290392866028, "learning_rate": 1.415082533481467e-05, "loss": 0.6436, "step": 4935 }, { "epoch": 0.38, "grad_norm": 1.1667086843462804, "learning_rate": 1.4148539216462153e-05, "loss": 0.6443, "step": 4936 }, { "epoch": 0.38, "grad_norm": 1.1747074229628005, "learning_rate": 1.4146252836190958e-05, "loss": 0.5495, "step": 4937 }, { "epoch": 0.38, "grad_norm": 1.2530183589774615, "learning_rate": 1.4143966194145424e-05, "loss": 0.6058, "step": 4938 }, { "epoch": 0.38, "grad_norm": 1.0756300654449973, "learning_rate": 1.4141679290469925e-05, "loss": 0.5889, "step": 4939 }, { "epoch": 0.38, "grad_norm": 1.251629482577088, "learning_rate": 1.4139392125308842e-05, "loss": 0.643, "step": 4940 }, { "epoch": 0.38, "grad_norm": 1.1757323803044788, "learning_rate": 1.413710469880658e-05, "loss": 0.5421, "step": 4941 }, { "epoch": 0.38, "grad_norm": 1.2961164807056123, "learning_rate": 1.4134817011107555e-05, "loss": 0.5945, "step": 4942 }, { "epoch": 0.38, "grad_norm": 1.155718913411746, "learning_rate": 1.4132529062356197e-05, "loss": 0.5825, "step": 4943 }, { "epoch": 0.38, "grad_norm": 1.1206681776265388, "learning_rate": 1.4130240852696958e-05, "loss": 0.5698, "step": 4944 }, { "epoch": 0.38, "grad_norm": 1.0634337417486805, "learning_rate": 1.4127952382274305e-05, "loss": 0.5843, "step": 4945 }, { "epoch": 0.38, "grad_norm": 1.1516916790043479, "learning_rate": 1.4125663651232725e-05, "loss": 0.5607, "step": 4946 }, { "epoch": 0.38, "grad_norm": 1.188723084860073, "learning_rate": 1.4123374659716708e-05, "loss": 0.5515, "step": 4947 }, { "epoch": 0.38, "grad_norm": 1.1545604527515752, "learning_rate": 1.4121085407870776e-05, "loss": 0.5645, "step": 4948 }, { "epoch": 0.38, "grad_norm": 1.04304921272021, "learning_rate": 1.411879589583946e-05, "loss": 0.5281, "step": 4949 }, { "epoch": 0.38, "grad_norm": 1.142491942195733, "learning_rate": 1.4116506123767308e-05, "loss": 0.5592, "step": 4950 }, { "epoch": 0.38, "grad_norm": 1.1670457133383783, "learning_rate": 1.411421609179889e-05, "loss": 0.5679, "step": 4951 }, { "epoch": 0.38, "grad_norm": 1.1221702272237901, "learning_rate": 1.411192580007878e-05, "loss": 0.5746, "step": 4952 }, { "epoch": 0.38, "grad_norm": 1.189275167648375, "learning_rate": 1.410963524875158e-05, "loss": 0.5174, "step": 4953 }, { "epoch": 0.38, "grad_norm": 1.2529479075271828, "learning_rate": 1.4107344437961902e-05, "loss": 0.5678, "step": 4954 }, { "epoch": 0.38, "grad_norm": 1.2215801536941462, "learning_rate": 1.4105053367854382e-05, "loss": 0.6142, "step": 4955 }, { "epoch": 0.38, "grad_norm": 1.1335327489570246, "learning_rate": 1.4102762038573663e-05, "loss": 0.5655, "step": 4956 }, { "epoch": 0.38, "grad_norm": 1.2208438883684272, "learning_rate": 1.410047045026441e-05, "loss": 0.6312, "step": 4957 }, { "epoch": 0.38, "grad_norm": 1.177590736246212, "learning_rate": 1.40981786030713e-05, "loss": 0.5922, "step": 4958 }, { "epoch": 0.38, "grad_norm": 1.097529556727991, "learning_rate": 1.4095886497139029e-05, "loss": 0.5457, "step": 4959 }, { "epoch": 0.38, "grad_norm": 1.1193182306681342, "learning_rate": 1.4093594132612314e-05, "loss": 0.4991, "step": 4960 }, { "epoch": 0.38, "grad_norm": 1.1690475941767988, "learning_rate": 1.4091301509635882e-05, "loss": 0.533, "step": 4961 }, { "epoch": 0.38, "grad_norm": 1.168178067540347, "learning_rate": 1.4089008628354477e-05, "loss": 0.6165, "step": 4962 }, { "epoch": 0.39, "grad_norm": 1.1656709861377175, "learning_rate": 1.4086715488912858e-05, "loss": 0.6242, "step": 4963 }, { "epoch": 0.39, "grad_norm": 1.2235963272537176, "learning_rate": 1.4084422091455808e-05, "loss": 0.6116, "step": 4964 }, { "epoch": 0.39, "grad_norm": 1.2199280254051401, "learning_rate": 1.408212843612812e-05, "loss": 0.5957, "step": 4965 }, { "epoch": 0.39, "grad_norm": 1.090983625325818, "learning_rate": 1.40798345230746e-05, "loss": 0.571, "step": 4966 }, { "epoch": 0.39, "grad_norm": 1.1602652142149223, "learning_rate": 1.407754035244008e-05, "loss": 0.5741, "step": 4967 }, { "epoch": 0.39, "grad_norm": 1.1282554783484182, "learning_rate": 1.4075245924369397e-05, "loss": 0.5207, "step": 4968 }, { "epoch": 0.39, "grad_norm": 1.1144929950266353, "learning_rate": 1.4072951239007414e-05, "loss": 0.5441, "step": 4969 }, { "epoch": 0.39, "grad_norm": 1.2034443518147127, "learning_rate": 1.4070656296499006e-05, "loss": 0.6181, "step": 4970 }, { "epoch": 0.39, "grad_norm": 1.2046234410691394, "learning_rate": 1.4068361096989064e-05, "loss": 0.5711, "step": 4971 }, { "epoch": 0.39, "grad_norm": 1.270449403186937, "learning_rate": 1.4066065640622499e-05, "loss": 0.6, "step": 4972 }, { "epoch": 0.39, "grad_norm": 1.1815090137675432, "learning_rate": 1.4063769927544228e-05, "loss": 0.5679, "step": 4973 }, { "epoch": 0.39, "grad_norm": 1.1709912846319916, "learning_rate": 1.4061473957899194e-05, "loss": 0.5673, "step": 4974 }, { "epoch": 0.39, "grad_norm": 1.194542281384923, "learning_rate": 1.4059177731832358e-05, "loss": 0.6002, "step": 4975 }, { "epoch": 0.39, "grad_norm": 1.1589470929666976, "learning_rate": 1.4056881249488692e-05, "loss": 0.5775, "step": 4976 }, { "epoch": 0.39, "grad_norm": 1.1443728184327646, "learning_rate": 1.4054584511013178e-05, "loss": 0.5174, "step": 4977 }, { "epoch": 0.39, "grad_norm": 1.202087053704193, "learning_rate": 1.4052287516550824e-05, "loss": 0.5972, "step": 4978 }, { "epoch": 0.39, "grad_norm": 1.2615736650601708, "learning_rate": 1.4049990266246652e-05, "loss": 0.6203, "step": 4979 }, { "epoch": 0.39, "grad_norm": 1.174427761579736, "learning_rate": 1.4047692760245702e-05, "loss": 0.5885, "step": 4980 }, { "epoch": 0.39, "grad_norm": 1.1007120623751325, "learning_rate": 1.4045394998693022e-05, "loss": 0.5585, "step": 4981 }, { "epoch": 0.39, "grad_norm": 1.2990526048376556, "learning_rate": 1.4043096981733687e-05, "loss": 0.6313, "step": 4982 }, { "epoch": 0.39, "grad_norm": 1.195364857443232, "learning_rate": 1.4040798709512777e-05, "loss": 0.5521, "step": 4983 }, { "epoch": 0.39, "grad_norm": 1.148793327585384, "learning_rate": 1.4038500182175399e-05, "loss": 0.5952, "step": 4984 }, { "epoch": 0.39, "grad_norm": 1.2284884548366712, "learning_rate": 1.4036201399866669e-05, "loss": 0.5935, "step": 4985 }, { "epoch": 0.39, "grad_norm": 1.197719817642587, "learning_rate": 1.4033902362731719e-05, "loss": 0.5765, "step": 4986 }, { "epoch": 0.39, "grad_norm": 1.1187142030753594, "learning_rate": 1.40316030709157e-05, "loss": 0.5516, "step": 4987 }, { "epoch": 0.39, "grad_norm": 1.1827435351162627, "learning_rate": 1.402930352456378e-05, "loss": 0.5638, "step": 4988 }, { "epoch": 0.39, "grad_norm": 1.2784737537869166, "learning_rate": 1.402700372382114e-05, "loss": 0.5931, "step": 4989 }, { "epoch": 0.39, "grad_norm": 1.1319007624575315, "learning_rate": 1.4024703668832978e-05, "loss": 0.5451, "step": 4990 }, { "epoch": 0.39, "grad_norm": 1.2729598477662063, "learning_rate": 1.402240335974451e-05, "loss": 0.5577, "step": 4991 }, { "epoch": 0.39, "grad_norm": 1.2129385381255733, "learning_rate": 1.4020102796700962e-05, "loss": 0.5971, "step": 4992 }, { "epoch": 0.39, "grad_norm": 1.1671036287990175, "learning_rate": 1.4017801979847586e-05, "loss": 0.6157, "step": 4993 }, { "epoch": 0.39, "grad_norm": 1.1993560712393094, "learning_rate": 1.401550090932964e-05, "loss": 0.5694, "step": 4994 }, { "epoch": 0.39, "grad_norm": 1.1787565992055329, "learning_rate": 1.4013199585292405e-05, "loss": 0.5989, "step": 4995 }, { "epoch": 0.39, "grad_norm": 1.1017810253070914, "learning_rate": 1.4010898007881177e-05, "loss": 0.5756, "step": 4996 }, { "epoch": 0.39, "grad_norm": 1.0318563008361465, "learning_rate": 1.400859617724126e-05, "loss": 0.5273, "step": 4997 }, { "epoch": 0.39, "grad_norm": 1.0754525607086431, "learning_rate": 1.4006294093517989e-05, "loss": 0.5224, "step": 4998 }, { "epoch": 0.39, "grad_norm": 1.0694144720911443, "learning_rate": 1.4003991756856699e-05, "loss": 0.5834, "step": 4999 }, { "epoch": 0.39, "grad_norm": 1.250856392275336, "learning_rate": 1.4001689167402752e-05, "loss": 0.5918, "step": 5000 }, { "epoch": 0.39, "grad_norm": 1.1297402122936293, "learning_rate": 1.3999386325301525e-05, "loss": 0.6011, "step": 5001 }, { "epoch": 0.39, "grad_norm": 1.10318884948437, "learning_rate": 1.39970832306984e-05, "loss": 0.5746, "step": 5002 }, { "epoch": 0.39, "grad_norm": 1.1938638822548917, "learning_rate": 1.3994779883738794e-05, "loss": 0.6109, "step": 5003 }, { "epoch": 0.39, "grad_norm": 1.1559941807834713, "learning_rate": 1.399247628456812e-05, "loss": 0.5714, "step": 5004 }, { "epoch": 0.39, "grad_norm": 1.1841538114518846, "learning_rate": 1.3990172433331819e-05, "loss": 0.5876, "step": 5005 }, { "epoch": 0.39, "grad_norm": 1.182314745079797, "learning_rate": 1.398786833017535e-05, "loss": 0.5583, "step": 5006 }, { "epoch": 0.39, "grad_norm": 1.0280871584343054, "learning_rate": 1.3985563975244175e-05, "loss": 0.536, "step": 5007 }, { "epoch": 0.39, "grad_norm": 1.1811960944738003, "learning_rate": 1.3983259368683784e-05, "loss": 0.5858, "step": 5008 }, { "epoch": 0.39, "grad_norm": 1.1201477312681183, "learning_rate": 1.3980954510639677e-05, "loss": 0.5802, "step": 5009 }, { "epoch": 0.39, "grad_norm": 1.16789270874303, "learning_rate": 1.3978649401257375e-05, "loss": 0.6132, "step": 5010 }, { "epoch": 0.39, "grad_norm": 1.271520611177799, "learning_rate": 1.3976344040682409e-05, "loss": 0.6627, "step": 5011 }, { "epoch": 0.39, "grad_norm": 1.1646887099758994, "learning_rate": 1.3974038429060326e-05, "loss": 0.5353, "step": 5012 }, { "epoch": 0.39, "grad_norm": 1.0602171439755919, "learning_rate": 1.3971732566536695e-05, "loss": 0.526, "step": 5013 }, { "epoch": 0.39, "grad_norm": 1.1034120224112942, "learning_rate": 1.3969426453257096e-05, "loss": 0.5937, "step": 5014 }, { "epoch": 0.39, "grad_norm": 1.2315280290574246, "learning_rate": 1.3967120089367128e-05, "loss": 0.5864, "step": 5015 }, { "epoch": 0.39, "grad_norm": 1.0512289915582456, "learning_rate": 1.3964813475012398e-05, "loss": 0.5216, "step": 5016 }, { "epoch": 0.39, "grad_norm": 1.198551750456754, "learning_rate": 1.3962506610338537e-05, "loss": 0.5415, "step": 5017 }, { "epoch": 0.39, "grad_norm": 1.160523173559806, "learning_rate": 1.3960199495491192e-05, "loss": 0.5936, "step": 5018 }, { "epoch": 0.39, "grad_norm": 1.0838572934551802, "learning_rate": 1.3957892130616018e-05, "loss": 0.5861, "step": 5019 }, { "epoch": 0.39, "grad_norm": 1.2284209634914356, "learning_rate": 1.3955584515858696e-05, "loss": 0.6109, "step": 5020 }, { "epoch": 0.39, "grad_norm": 1.1760741721234578, "learning_rate": 1.3953276651364918e-05, "loss": 0.5597, "step": 5021 }, { "epoch": 0.39, "grad_norm": 1.2884355812326782, "learning_rate": 1.3950968537280383e-05, "loss": 0.5995, "step": 5022 }, { "epoch": 0.39, "grad_norm": 1.1295864603488763, "learning_rate": 1.3948660173750823e-05, "loss": 0.5355, "step": 5023 }, { "epoch": 0.39, "grad_norm": 1.1063434927706548, "learning_rate": 1.3946351560921974e-05, "loss": 0.5495, "step": 5024 }, { "epoch": 0.39, "grad_norm": 1.2446574001709683, "learning_rate": 1.3944042698939591e-05, "loss": 0.6116, "step": 5025 }, { "epoch": 0.39, "grad_norm": 1.1797739464620633, "learning_rate": 1.3941733587949442e-05, "loss": 0.6027, "step": 5026 }, { "epoch": 0.39, "grad_norm": 1.1064585645375302, "learning_rate": 1.3939424228097314e-05, "loss": 0.5717, "step": 5027 }, { "epoch": 0.39, "grad_norm": 1.1813293551477768, "learning_rate": 1.3937114619529012e-05, "loss": 0.5832, "step": 5028 }, { "epoch": 0.39, "grad_norm": 1.114436838142661, "learning_rate": 1.3934804762390351e-05, "loss": 0.5662, "step": 5029 }, { "epoch": 0.39, "grad_norm": 1.1960966741823895, "learning_rate": 1.3932494656827165e-05, "loss": 0.5762, "step": 5030 }, { "epoch": 0.39, "grad_norm": 1.1692603888891244, "learning_rate": 1.3930184302985302e-05, "loss": 0.5788, "step": 5031 }, { "epoch": 0.39, "grad_norm": 1.1236667680309351, "learning_rate": 1.3927873701010625e-05, "loss": 0.5405, "step": 5032 }, { "epoch": 0.39, "grad_norm": 1.2086853742237078, "learning_rate": 1.3925562851049017e-05, "loss": 0.6183, "step": 5033 }, { "epoch": 0.39, "grad_norm": 1.1783311132349865, "learning_rate": 1.3923251753246371e-05, "loss": 0.5797, "step": 5034 }, { "epoch": 0.39, "grad_norm": 1.2271953820892694, "learning_rate": 1.3920940407748607e-05, "loss": 0.6282, "step": 5035 }, { "epoch": 0.39, "grad_norm": 1.2846407798636061, "learning_rate": 1.3918628814701643e-05, "loss": 0.6647, "step": 5036 }, { "epoch": 0.39, "grad_norm": 1.1136105234513498, "learning_rate": 1.391631697425142e-05, "loss": 0.5859, "step": 5037 }, { "epoch": 0.39, "grad_norm": 1.1289616871458, "learning_rate": 1.3914004886543904e-05, "loss": 0.586, "step": 5038 }, { "epoch": 0.39, "grad_norm": 1.097468512828539, "learning_rate": 1.3911692551725065e-05, "loss": 0.4819, "step": 5039 }, { "epoch": 0.39, "grad_norm": 1.2165046941162556, "learning_rate": 1.3909379969940894e-05, "loss": 0.516, "step": 5040 }, { "epoch": 0.39, "grad_norm": 1.192142846707376, "learning_rate": 1.3907067141337397e-05, "loss": 0.5959, "step": 5041 }, { "epoch": 0.39, "grad_norm": 1.0752386072296287, "learning_rate": 1.3904754066060593e-05, "loss": 0.5593, "step": 5042 }, { "epoch": 0.39, "grad_norm": 1.1220058226748795, "learning_rate": 1.3902440744256516e-05, "loss": 0.5524, "step": 5043 }, { "epoch": 0.39, "grad_norm": 1.0781451237224033, "learning_rate": 1.3900127176071226e-05, "loss": 0.5714, "step": 5044 }, { "epoch": 0.39, "grad_norm": 1.1659100099715796, "learning_rate": 1.3897813361650783e-05, "loss": 0.5683, "step": 5045 }, { "epoch": 0.39, "grad_norm": 1.1880990073598108, "learning_rate": 1.3895499301141273e-05, "loss": 0.5649, "step": 5046 }, { "epoch": 0.39, "grad_norm": 1.134200935565453, "learning_rate": 1.389318499468879e-05, "loss": 0.59, "step": 5047 }, { "epoch": 0.39, "grad_norm": 1.2836453910929442, "learning_rate": 1.3890870442439455e-05, "loss": 0.559, "step": 5048 }, { "epoch": 0.39, "grad_norm": 1.1648478578921058, "learning_rate": 1.3888555644539397e-05, "loss": 0.6468, "step": 5049 }, { "epoch": 0.39, "grad_norm": 1.1400754466940612, "learning_rate": 1.3886240601134757e-05, "loss": 0.5487, "step": 5050 }, { "epoch": 0.39, "grad_norm": 1.0193843808844685, "learning_rate": 1.3883925312371698e-05, "loss": 0.5642, "step": 5051 }, { "epoch": 0.39, "grad_norm": 1.0672951930207055, "learning_rate": 1.3881609778396394e-05, "loss": 0.5826, "step": 5052 }, { "epoch": 0.39, "grad_norm": 1.172555192002202, "learning_rate": 1.3879293999355037e-05, "loss": 0.5745, "step": 5053 }, { "epoch": 0.39, "grad_norm": 1.1489998096070826, "learning_rate": 1.387697797539384e-05, "loss": 0.5357, "step": 5054 }, { "epoch": 0.39, "grad_norm": 1.2987461150242523, "learning_rate": 1.3874661706659018e-05, "loss": 0.5867, "step": 5055 }, { "epoch": 0.39, "grad_norm": 1.1839521010382115, "learning_rate": 1.3872345193296812e-05, "loss": 0.6301, "step": 5056 }, { "epoch": 0.39, "grad_norm": 1.0940891285074374, "learning_rate": 1.3870028435453476e-05, "loss": 0.5202, "step": 5057 }, { "epoch": 0.39, "grad_norm": 1.1252981426633528, "learning_rate": 1.3867711433275275e-05, "loss": 0.6074, "step": 5058 }, { "epoch": 0.39, "grad_norm": 1.1649098224693075, "learning_rate": 1.38653941869085e-05, "loss": 0.5616, "step": 5059 }, { "epoch": 0.39, "grad_norm": 1.1961083349613146, "learning_rate": 1.3863076696499447e-05, "loss": 0.5461, "step": 5060 }, { "epoch": 0.39, "grad_norm": 1.0949045354781428, "learning_rate": 1.3860758962194432e-05, "loss": 0.5873, "step": 5061 }, { "epoch": 0.39, "grad_norm": 1.3233252918005483, "learning_rate": 1.385844098413978e-05, "loss": 0.605, "step": 5062 }, { "epoch": 0.39, "grad_norm": 1.225336081276207, "learning_rate": 1.3856122762481845e-05, "loss": 0.5775, "step": 5063 }, { "epoch": 0.39, "grad_norm": 1.189911251445215, "learning_rate": 1.3853804297366986e-05, "loss": 0.5683, "step": 5064 }, { "epoch": 0.39, "grad_norm": 1.1688029904515929, "learning_rate": 1.3851485588941578e-05, "loss": 0.569, "step": 5065 }, { "epoch": 0.39, "grad_norm": 1.1842199500863406, "learning_rate": 1.3849166637352011e-05, "loss": 0.5554, "step": 5066 }, { "epoch": 0.39, "grad_norm": 1.1851311697974825, "learning_rate": 1.38468474427447e-05, "loss": 0.5569, "step": 5067 }, { "epoch": 0.39, "grad_norm": 1.0676355228564274, "learning_rate": 1.3844528005266057e-05, "loss": 0.5444, "step": 5068 }, { "epoch": 0.39, "grad_norm": 1.0443988953007817, "learning_rate": 1.3842208325062532e-05, "loss": 0.5232, "step": 5069 }, { "epoch": 0.39, "grad_norm": 1.1501182142948272, "learning_rate": 1.383988840228057e-05, "loss": 0.553, "step": 5070 }, { "epoch": 0.39, "grad_norm": 1.0773497848950297, "learning_rate": 1.3837568237066641e-05, "loss": 0.5317, "step": 5071 }, { "epoch": 0.39, "grad_norm": 1.0995766887244798, "learning_rate": 1.383524782956723e-05, "loss": 0.5444, "step": 5072 }, { "epoch": 0.39, "grad_norm": 1.1433419046738715, "learning_rate": 1.3832927179928834e-05, "loss": 0.5872, "step": 5073 }, { "epoch": 0.39, "grad_norm": 1.2043275644238283, "learning_rate": 1.3830606288297976e-05, "loss": 0.5952, "step": 5074 }, { "epoch": 0.39, "grad_norm": 1.2522080945881557, "learning_rate": 1.3828285154821175e-05, "loss": 0.5521, "step": 5075 }, { "epoch": 0.39, "grad_norm": 1.1262906935746209, "learning_rate": 1.3825963779644981e-05, "loss": 0.575, "step": 5076 }, { "epoch": 0.39, "grad_norm": 1.1679750780425355, "learning_rate": 1.3823642162915958e-05, "loss": 0.4945, "step": 5077 }, { "epoch": 0.39, "grad_norm": 1.2046123575210388, "learning_rate": 1.3821320304780671e-05, "loss": 0.5832, "step": 5078 }, { "epoch": 0.39, "grad_norm": 1.0860870038864228, "learning_rate": 1.3818998205385726e-05, "loss": 0.512, "step": 5079 }, { "epoch": 0.39, "grad_norm": 1.0958168664330807, "learning_rate": 1.3816675864877715e-05, "loss": 0.5357, "step": 5080 }, { "epoch": 0.39, "grad_norm": 1.14363766311371, "learning_rate": 1.3814353283403266e-05, "loss": 0.5657, "step": 5081 }, { "epoch": 0.39, "grad_norm": 1.0858749261844856, "learning_rate": 1.3812030461109017e-05, "loss": 0.4903, "step": 5082 }, { "epoch": 0.39, "grad_norm": 1.2557622180809869, "learning_rate": 1.3809707398141615e-05, "loss": 0.6093, "step": 5083 }, { "epoch": 0.39, "grad_norm": 1.277200346663464, "learning_rate": 1.3807384094647728e-05, "loss": 0.6032, "step": 5084 }, { "epoch": 0.39, "grad_norm": 1.2659834836783417, "learning_rate": 1.3805060550774045e-05, "loss": 0.5976, "step": 5085 }, { "epoch": 0.39, "grad_norm": 1.1807909219771489, "learning_rate": 1.3802736766667253e-05, "loss": 0.5401, "step": 5086 }, { "epoch": 0.39, "grad_norm": 1.2161290768674509, "learning_rate": 1.3800412742474074e-05, "loss": 0.6131, "step": 5087 }, { "epoch": 0.39, "grad_norm": 1.2514854664615915, "learning_rate": 1.3798088478341226e-05, "loss": 0.6247, "step": 5088 }, { "epoch": 0.39, "grad_norm": 1.2108117315138005, "learning_rate": 1.3795763974415459e-05, "loss": 0.5639, "step": 5089 }, { "epoch": 0.39, "grad_norm": 1.171089417676693, "learning_rate": 1.379343923084353e-05, "loss": 0.5985, "step": 5090 }, { "epoch": 0.39, "grad_norm": 1.0987285114569603, "learning_rate": 1.379111424777221e-05, "loss": 0.5674, "step": 5091 }, { "epoch": 0.4, "grad_norm": 1.2470046394280194, "learning_rate": 1.3788789025348288e-05, "loss": 0.5939, "step": 5092 }, { "epoch": 0.4, "grad_norm": 1.2564369404183413, "learning_rate": 1.3786463563718564e-05, "loss": 0.6035, "step": 5093 }, { "epoch": 0.4, "grad_norm": 1.3448791638942845, "learning_rate": 1.3784137863029864e-05, "loss": 0.6392, "step": 5094 }, { "epoch": 0.4, "grad_norm": 1.3296999848941775, "learning_rate": 1.3781811923429018e-05, "loss": 0.6535, "step": 5095 }, { "epoch": 0.4, "grad_norm": 1.1156698201897324, "learning_rate": 1.3779485745062869e-05, "loss": 0.5705, "step": 5096 }, { "epoch": 0.4, "grad_norm": 1.1670516888760163, "learning_rate": 1.3777159328078289e-05, "loss": 0.621, "step": 5097 }, { "epoch": 0.4, "grad_norm": 1.1223376879341824, "learning_rate": 1.377483267262215e-05, "loss": 0.5483, "step": 5098 }, { "epoch": 0.4, "grad_norm": 1.0578393817230047, "learning_rate": 1.3772505778841351e-05, "loss": 0.532, "step": 5099 }, { "epoch": 0.4, "grad_norm": 1.1010552719752902, "learning_rate": 1.37701786468828e-05, "loss": 0.5705, "step": 5100 }, { "epoch": 0.4, "grad_norm": 1.2344446283331352, "learning_rate": 1.3767851276893415e-05, "loss": 0.5687, "step": 5101 }, { "epoch": 0.4, "grad_norm": 1.1179940573375722, "learning_rate": 1.3765523669020144e-05, "loss": 0.528, "step": 5102 }, { "epoch": 0.4, "grad_norm": 1.2326976147245252, "learning_rate": 1.3763195823409932e-05, "loss": 0.5715, "step": 5103 }, { "epoch": 0.4, "grad_norm": 1.1652079841062606, "learning_rate": 1.3760867740209755e-05, "loss": 0.5853, "step": 5104 }, { "epoch": 0.4, "grad_norm": 1.096963303970221, "learning_rate": 1.3758539419566595e-05, "loss": 0.5749, "step": 5105 }, { "epoch": 0.4, "grad_norm": 1.0746071408136593, "learning_rate": 1.3756210861627446e-05, "loss": 0.5525, "step": 5106 }, { "epoch": 0.4, "grad_norm": 1.0965460324729155, "learning_rate": 1.3753882066539328e-05, "loss": 0.5335, "step": 5107 }, { "epoch": 0.4, "grad_norm": 1.1645182287116982, "learning_rate": 1.3751553034449267e-05, "loss": 0.6002, "step": 5108 }, { "epoch": 0.4, "grad_norm": 1.157325038663377, "learning_rate": 1.374922376550431e-05, "loss": 0.5593, "step": 5109 }, { "epoch": 0.4, "grad_norm": 1.1870787023791176, "learning_rate": 1.3746894259851513e-05, "loss": 0.5456, "step": 5110 }, { "epoch": 0.4, "grad_norm": 1.302207233891866, "learning_rate": 1.3744564517637947e-05, "loss": 0.5702, "step": 5111 }, { "epoch": 0.4, "grad_norm": 1.1542231863805963, "learning_rate": 1.3742234539010703e-05, "loss": 0.5469, "step": 5112 }, { "epoch": 0.4, "grad_norm": 1.1879417701957935, "learning_rate": 1.3739904324116888e-05, "loss": 0.5793, "step": 5113 }, { "epoch": 0.4, "grad_norm": 1.1136928935026065, "learning_rate": 1.3737573873103615e-05, "loss": 0.5757, "step": 5114 }, { "epoch": 0.4, "grad_norm": 1.2274395182328943, "learning_rate": 1.3735243186118026e-05, "loss": 0.5635, "step": 5115 }, { "epoch": 0.4, "grad_norm": 1.1150440915546074, "learning_rate": 1.3732912263307256e-05, "loss": 0.5287, "step": 5116 }, { "epoch": 0.4, "grad_norm": 1.225364099616156, "learning_rate": 1.3730581104818477e-05, "loss": 0.5333, "step": 5117 }, { "epoch": 0.4, "grad_norm": 1.1317513066430256, "learning_rate": 1.372824971079887e-05, "loss": 0.5736, "step": 5118 }, { "epoch": 0.4, "grad_norm": 1.209555988420983, "learning_rate": 1.3725918081395619e-05, "loss": 0.5854, "step": 5119 }, { "epoch": 0.4, "grad_norm": 1.1674513675599836, "learning_rate": 1.372358621675594e-05, "loss": 0.5563, "step": 5120 }, { "epoch": 0.4, "grad_norm": 1.223065390091859, "learning_rate": 1.3721254117027049e-05, "loss": 0.6077, "step": 5121 }, { "epoch": 0.4, "grad_norm": 1.1131230827542071, "learning_rate": 1.3718921782356187e-05, "loss": 0.5505, "step": 5122 }, { "epoch": 0.4, "grad_norm": 1.270391835718321, "learning_rate": 1.3716589212890608e-05, "loss": 0.6009, "step": 5123 }, { "epoch": 0.4, "grad_norm": 1.2485479026743516, "learning_rate": 1.3714256408777575e-05, "loss": 0.6012, "step": 5124 }, { "epoch": 0.4, "grad_norm": 1.208180445283391, "learning_rate": 1.3711923370164373e-05, "loss": 0.6001, "step": 5125 }, { "epoch": 0.4, "grad_norm": 1.2076025046206034, "learning_rate": 1.3709590097198295e-05, "loss": 0.5781, "step": 5126 }, { "epoch": 0.4, "grad_norm": 1.0729987412451536, "learning_rate": 1.3707256590026656e-05, "loss": 0.517, "step": 5127 }, { "epoch": 0.4, "grad_norm": 1.181185144329341, "learning_rate": 1.3704922848796782e-05, "loss": 0.5925, "step": 5128 }, { "epoch": 0.4, "grad_norm": 1.14652955260288, "learning_rate": 1.3702588873656015e-05, "loss": 0.5673, "step": 5129 }, { "epoch": 0.4, "grad_norm": 1.2077652264383187, "learning_rate": 1.370025466475171e-05, "loss": 0.6318, "step": 5130 }, { "epoch": 0.4, "grad_norm": 1.2391190447418405, "learning_rate": 1.3697920222231233e-05, "loss": 0.5819, "step": 5131 }, { "epoch": 0.4, "grad_norm": 1.0633947868187816, "learning_rate": 1.3695585546241977e-05, "loss": 0.5433, "step": 5132 }, { "epoch": 0.4, "grad_norm": 1.1979733937418402, "learning_rate": 1.3693250636931339e-05, "loss": 0.571, "step": 5133 }, { "epoch": 0.4, "grad_norm": 1.278536318603655, "learning_rate": 1.3690915494446732e-05, "loss": 0.5792, "step": 5134 }, { "epoch": 0.4, "grad_norm": 1.2121630957620921, "learning_rate": 1.3688580118935591e-05, "loss": 0.5875, "step": 5135 }, { "epoch": 0.4, "grad_norm": 1.1831097510272532, "learning_rate": 1.3686244510545353e-05, "loss": 0.5541, "step": 5136 }, { "epoch": 0.4, "grad_norm": 1.1002999503653201, "learning_rate": 1.3683908669423479e-05, "loss": 0.5572, "step": 5137 }, { "epoch": 0.4, "grad_norm": 1.1298810188984791, "learning_rate": 1.3681572595717446e-05, "loss": 0.584, "step": 5138 }, { "epoch": 0.4, "grad_norm": 1.0699068400568583, "learning_rate": 1.3679236289574742e-05, "loss": 0.5415, "step": 5139 }, { "epoch": 0.4, "grad_norm": 1.1055363610825557, "learning_rate": 1.3676899751142866e-05, "loss": 0.5655, "step": 5140 }, { "epoch": 0.4, "grad_norm": 1.1047699677630374, "learning_rate": 1.3674562980569342e-05, "loss": 0.6065, "step": 5141 }, { "epoch": 0.4, "grad_norm": 1.1178692424855816, "learning_rate": 1.3672225978001694e-05, "loss": 0.5032, "step": 5142 }, { "epoch": 0.4, "grad_norm": 1.1723398939514398, "learning_rate": 1.3669888743587478e-05, "loss": 0.6144, "step": 5143 }, { "epoch": 0.4, "grad_norm": 1.1890604407634775, "learning_rate": 1.3667551277474252e-05, "loss": 0.5509, "step": 5144 }, { "epoch": 0.4, "grad_norm": 1.2264492687272461, "learning_rate": 1.3665213579809588e-05, "loss": 0.6048, "step": 5145 }, { "epoch": 0.4, "grad_norm": 1.1425296088457548, "learning_rate": 1.3662875650741082e-05, "loss": 0.558, "step": 5146 }, { "epoch": 0.4, "grad_norm": 1.1903742588773563, "learning_rate": 1.3660537490416337e-05, "loss": 0.6399, "step": 5147 }, { "epoch": 0.4, "grad_norm": 1.059610364306384, "learning_rate": 1.3658199098982979e-05, "loss": 0.5719, "step": 5148 }, { "epoch": 0.4, "grad_norm": 1.3932991034046338, "learning_rate": 1.3655860476588636e-05, "loss": 0.6588, "step": 5149 }, { "epoch": 0.4, "grad_norm": 1.3501578733180393, "learning_rate": 1.3653521623380956e-05, "loss": 0.6182, "step": 5150 }, { "epoch": 0.4, "grad_norm": 1.133479691386181, "learning_rate": 1.365118253950761e-05, "loss": 0.5656, "step": 5151 }, { "epoch": 0.4, "grad_norm": 1.1529509932565465, "learning_rate": 1.364884322511627e-05, "loss": 0.5785, "step": 5152 }, { "epoch": 0.4, "grad_norm": 1.1952192980763319, "learning_rate": 1.3646503680354633e-05, "loss": 0.5223, "step": 5153 }, { "epoch": 0.4, "grad_norm": 1.0270377163819115, "learning_rate": 1.3644163905370406e-05, "loss": 0.5162, "step": 5154 }, { "epoch": 0.4, "grad_norm": 1.081539276323409, "learning_rate": 1.3641823900311308e-05, "loss": 0.4914, "step": 5155 }, { "epoch": 0.4, "grad_norm": 1.2481224741597374, "learning_rate": 1.3639483665325082e-05, "loss": 0.5705, "step": 5156 }, { "epoch": 0.4, "grad_norm": 1.239927671461129, "learning_rate": 1.363714320055947e-05, "loss": 0.6092, "step": 5157 }, { "epoch": 0.4, "grad_norm": 1.1684606020434887, "learning_rate": 1.3634802506162246e-05, "loss": 0.5975, "step": 5158 }, { "epoch": 0.4, "grad_norm": 1.159317484043346, "learning_rate": 1.3632461582281184e-05, "loss": 0.536, "step": 5159 }, { "epoch": 0.4, "grad_norm": 1.2963703966280629, "learning_rate": 1.3630120429064082e-05, "loss": 0.6333, "step": 5160 }, { "epoch": 0.4, "grad_norm": 1.1223450698642972, "learning_rate": 1.3627779046658752e-05, "loss": 0.5367, "step": 5161 }, { "epoch": 0.4, "grad_norm": 1.152854780257862, "learning_rate": 1.3625437435213009e-05, "loss": 0.5699, "step": 5162 }, { "epoch": 0.4, "grad_norm": 1.1573887964800986, "learning_rate": 1.3623095594874698e-05, "loss": 0.5517, "step": 5163 }, { "epoch": 0.4, "grad_norm": 1.1705242255766188, "learning_rate": 1.362075352579167e-05, "loss": 0.5387, "step": 5164 }, { "epoch": 0.4, "grad_norm": 1.2282702954566522, "learning_rate": 1.361841122811179e-05, "loss": 0.5727, "step": 5165 }, { "epoch": 0.4, "grad_norm": 1.1651345251451357, "learning_rate": 1.361606870198294e-05, "loss": 0.6193, "step": 5166 }, { "epoch": 0.4, "grad_norm": 1.2741724245595392, "learning_rate": 1.3613725947553018e-05, "loss": 0.5958, "step": 5167 }, { "epoch": 0.4, "grad_norm": 1.1994403547378543, "learning_rate": 1.3611382964969933e-05, "loss": 0.6221, "step": 5168 }, { "epoch": 0.4, "grad_norm": 1.2577098543921341, "learning_rate": 1.360903975438161e-05, "loss": 0.5529, "step": 5169 }, { "epoch": 0.4, "grad_norm": 1.2031053318546336, "learning_rate": 1.3606696315935983e-05, "loss": 0.5918, "step": 5170 }, { "epoch": 0.4, "grad_norm": 1.2037690655712658, "learning_rate": 1.3604352649781012e-05, "loss": 0.5967, "step": 5171 }, { "epoch": 0.4, "grad_norm": 1.1690285253903214, "learning_rate": 1.3602008756064661e-05, "loss": 0.6055, "step": 5172 }, { "epoch": 0.4, "grad_norm": 1.1573498108979243, "learning_rate": 1.3599664634934918e-05, "loss": 0.566, "step": 5173 }, { "epoch": 0.4, "grad_norm": 1.1630579050627143, "learning_rate": 1.359732028653977e-05, "loss": 0.5949, "step": 5174 }, { "epoch": 0.4, "grad_norm": 1.1540691841814061, "learning_rate": 1.3594975711027231e-05, "loss": 0.596, "step": 5175 }, { "epoch": 0.4, "grad_norm": 1.1927771513740384, "learning_rate": 1.3592630908545332e-05, "loss": 0.5648, "step": 5176 }, { "epoch": 0.4, "grad_norm": 1.0961138113718683, "learning_rate": 1.3590285879242107e-05, "loss": 0.5902, "step": 5177 }, { "epoch": 0.4, "grad_norm": 1.1605104361725986, "learning_rate": 1.358794062326561e-05, "loss": 0.5967, "step": 5178 }, { "epoch": 0.4, "grad_norm": 1.0735656972680843, "learning_rate": 1.3585595140763912e-05, "loss": 0.5982, "step": 5179 }, { "epoch": 0.4, "grad_norm": 1.1756079156294366, "learning_rate": 1.3583249431885091e-05, "loss": 0.6237, "step": 5180 }, { "epoch": 0.4, "grad_norm": 1.137341253500891, "learning_rate": 1.3580903496777248e-05, "loss": 0.5724, "step": 5181 }, { "epoch": 0.4, "grad_norm": 1.2936730440259925, "learning_rate": 1.3578557335588492e-05, "loss": 0.6853, "step": 5182 }, { "epoch": 0.4, "grad_norm": 1.160855528862785, "learning_rate": 1.3576210948466948e-05, "loss": 0.5354, "step": 5183 }, { "epoch": 0.4, "grad_norm": 1.1667562972561536, "learning_rate": 1.3573864335560756e-05, "loss": 0.5645, "step": 5184 }, { "epoch": 0.4, "grad_norm": 1.1011772293658901, "learning_rate": 1.357151749701807e-05, "loss": 0.5405, "step": 5185 }, { "epoch": 0.4, "grad_norm": 1.1593123426754495, "learning_rate": 1.3569170432987058e-05, "loss": 0.5793, "step": 5186 }, { "epoch": 0.4, "grad_norm": 1.141263025633567, "learning_rate": 1.35668231436159e-05, "loss": 0.5594, "step": 5187 }, { "epoch": 0.4, "grad_norm": 1.1514834020977904, "learning_rate": 1.3564475629052798e-05, "loss": 0.5516, "step": 5188 }, { "epoch": 0.4, "grad_norm": 1.1484035045756322, "learning_rate": 1.3562127889445958e-05, "loss": 0.6041, "step": 5189 }, { "epoch": 0.4, "grad_norm": 1.286528947053089, "learning_rate": 1.3559779924943603e-05, "loss": 0.6325, "step": 5190 }, { "epoch": 0.4, "grad_norm": 1.3521997790553795, "learning_rate": 1.355743173569398e-05, "loss": 0.6333, "step": 5191 }, { "epoch": 0.4, "grad_norm": 1.2142113514577728, "learning_rate": 1.3555083321845335e-05, "loss": 0.5448, "step": 5192 }, { "epoch": 0.4, "grad_norm": 1.1258377029273912, "learning_rate": 1.355273468354594e-05, "loss": 0.5584, "step": 5193 }, { "epoch": 0.4, "grad_norm": 1.1216301781064957, "learning_rate": 1.3550385820944074e-05, "loss": 0.5458, "step": 5194 }, { "epoch": 0.4, "grad_norm": 1.2637951184980942, "learning_rate": 1.3548036734188033e-05, "loss": 0.587, "step": 5195 }, { "epoch": 0.4, "grad_norm": 1.2355631650851207, "learning_rate": 1.3545687423426128e-05, "loss": 0.6126, "step": 5196 }, { "epoch": 0.4, "grad_norm": 1.085701951202441, "learning_rate": 1.354333788880668e-05, "loss": 0.4982, "step": 5197 }, { "epoch": 0.4, "grad_norm": 1.1498156731535774, "learning_rate": 1.3540988130478034e-05, "loss": 0.5754, "step": 5198 }, { "epoch": 0.4, "grad_norm": 1.1721203356472376, "learning_rate": 1.3538638148588537e-05, "loss": 0.5825, "step": 5199 }, { "epoch": 0.4, "grad_norm": 1.2816878710325035, "learning_rate": 1.3536287943286555e-05, "loss": 0.5741, "step": 5200 }, { "epoch": 0.4, "grad_norm": 1.2869727564360933, "learning_rate": 1.353393751472047e-05, "loss": 0.6197, "step": 5201 }, { "epoch": 0.4, "grad_norm": 1.1260877754494374, "learning_rate": 1.3531586863038678e-05, "loss": 0.5892, "step": 5202 }, { "epoch": 0.4, "grad_norm": 1.2899028004392399, "learning_rate": 1.3529235988389588e-05, "loss": 0.5654, "step": 5203 }, { "epoch": 0.4, "grad_norm": 1.1726781000570679, "learning_rate": 1.3526884890921621e-05, "loss": 0.6146, "step": 5204 }, { "epoch": 0.4, "grad_norm": 1.2676549103554768, "learning_rate": 1.3524533570783214e-05, "loss": 0.6235, "step": 5205 }, { "epoch": 0.4, "grad_norm": 1.1817948674189769, "learning_rate": 1.3522182028122817e-05, "loss": 0.5903, "step": 5206 }, { "epoch": 0.4, "grad_norm": 1.1638322128774148, "learning_rate": 1.3519830263088899e-05, "loss": 0.555, "step": 5207 }, { "epoch": 0.4, "grad_norm": 1.7848086362996562, "learning_rate": 1.3517478275829935e-05, "loss": 0.5924, "step": 5208 }, { "epoch": 0.4, "grad_norm": 1.1323558018315305, "learning_rate": 1.3515126066494422e-05, "loss": 0.5676, "step": 5209 }, { "epoch": 0.4, "grad_norm": 1.297026889592874, "learning_rate": 1.351277363523086e-05, "loss": 0.5744, "step": 5210 }, { "epoch": 0.4, "grad_norm": 1.2595048974681344, "learning_rate": 1.3510420982187777e-05, "loss": 0.6017, "step": 5211 }, { "epoch": 0.4, "grad_norm": 1.185826527827758, "learning_rate": 1.350806810751371e-05, "loss": 0.5712, "step": 5212 }, { "epoch": 0.4, "grad_norm": 1.202157957161739, "learning_rate": 1.3505715011357202e-05, "loss": 0.6262, "step": 5213 }, { "epoch": 0.4, "grad_norm": 1.173455253700427, "learning_rate": 1.3503361693866817e-05, "loss": 0.5991, "step": 5214 }, { "epoch": 0.4, "grad_norm": 1.1796111025293248, "learning_rate": 1.350100815519113e-05, "loss": 0.562, "step": 5215 }, { "epoch": 0.4, "grad_norm": 1.2338232907959152, "learning_rate": 1.349865439547874e-05, "loss": 0.6106, "step": 5216 }, { "epoch": 0.4, "grad_norm": 1.1161664551743316, "learning_rate": 1.3496300414878247e-05, "loss": 0.5736, "step": 5217 }, { "epoch": 0.4, "grad_norm": 1.2048698752332165, "learning_rate": 1.3493946213538269e-05, "loss": 0.5271, "step": 5218 }, { "epoch": 0.4, "grad_norm": 1.165472981048526, "learning_rate": 1.3491591791607443e-05, "loss": 0.5496, "step": 5219 }, { "epoch": 0.4, "grad_norm": 1.1799031243725278, "learning_rate": 1.3489237149234409e-05, "loss": 0.5648, "step": 5220 }, { "epoch": 0.41, "grad_norm": 1.318597435826858, "learning_rate": 1.3486882286567831e-05, "loss": 0.563, "step": 5221 }, { "epoch": 0.41, "grad_norm": 1.2489523789128933, "learning_rate": 1.3484527203756388e-05, "loss": 0.5671, "step": 5222 }, { "epoch": 0.41, "grad_norm": 1.2314839368385135, "learning_rate": 1.3482171900948765e-05, "loss": 0.5908, "step": 5223 }, { "epoch": 0.41, "grad_norm": 1.2438119786601036, "learning_rate": 1.3479816378293662e-05, "loss": 0.6246, "step": 5224 }, { "epoch": 0.41, "grad_norm": 1.0880687300732523, "learning_rate": 1.3477460635939799e-05, "loss": 0.5579, "step": 5225 }, { "epoch": 0.41, "grad_norm": 1.138819323459468, "learning_rate": 1.3475104674035902e-05, "loss": 0.5878, "step": 5226 }, { "epoch": 0.41, "grad_norm": 1.2050894515681492, "learning_rate": 1.347274849273072e-05, "loss": 0.5829, "step": 5227 }, { "epoch": 0.41, "grad_norm": 1.2568860166304936, "learning_rate": 1.3470392092173008e-05, "loss": 0.6067, "step": 5228 }, { "epoch": 0.41, "grad_norm": 1.251285178407395, "learning_rate": 1.3468035472511539e-05, "loss": 0.6112, "step": 5229 }, { "epoch": 0.41, "grad_norm": 1.165477021261257, "learning_rate": 1.3465678633895096e-05, "loss": 0.5608, "step": 5230 }, { "epoch": 0.41, "grad_norm": 1.229551427503285, "learning_rate": 1.3463321576472482e-05, "loss": 0.5796, "step": 5231 }, { "epoch": 0.41, "grad_norm": 1.1709672082044502, "learning_rate": 1.346096430039251e-05, "loss": 0.5179, "step": 5232 }, { "epoch": 0.41, "grad_norm": 1.1369282113279262, "learning_rate": 1.3458606805804005e-05, "loss": 0.5679, "step": 5233 }, { "epoch": 0.41, "grad_norm": 1.1793896128482513, "learning_rate": 1.3456249092855805e-05, "loss": 0.6115, "step": 5234 }, { "epoch": 0.41, "grad_norm": 1.2023482354212447, "learning_rate": 1.345389116169677e-05, "loss": 0.6598, "step": 5235 }, { "epoch": 0.41, "grad_norm": 1.2172925499769187, "learning_rate": 1.3451533012475765e-05, "loss": 0.5765, "step": 5236 }, { "epoch": 0.41, "grad_norm": 1.149692757654883, "learning_rate": 1.3449174645341675e-05, "loss": 0.5298, "step": 5237 }, { "epoch": 0.41, "grad_norm": 1.1926994434452942, "learning_rate": 1.3446816060443395e-05, "loss": 0.5698, "step": 5238 }, { "epoch": 0.41, "grad_norm": 1.0959700261040861, "learning_rate": 1.344445725792983e-05, "loss": 0.5416, "step": 5239 }, { "epoch": 0.41, "grad_norm": 1.2075576868750588, "learning_rate": 1.3442098237949913e-05, "loss": 0.5785, "step": 5240 }, { "epoch": 0.41, "grad_norm": 1.178225033912204, "learning_rate": 1.3439739000652569e-05, "loss": 0.5528, "step": 5241 }, { "epoch": 0.41, "grad_norm": 1.169026842835405, "learning_rate": 1.343737954618676e-05, "loss": 0.6329, "step": 5242 }, { "epoch": 0.41, "grad_norm": 1.1740442670473006, "learning_rate": 1.3435019874701444e-05, "loss": 0.5754, "step": 5243 }, { "epoch": 0.41, "grad_norm": 1.1784416339875603, "learning_rate": 1.3432659986345602e-05, "loss": 0.5779, "step": 5244 }, { "epoch": 0.41, "grad_norm": 1.2582109187541184, "learning_rate": 1.3430299881268223e-05, "loss": 0.6197, "step": 5245 }, { "epoch": 0.41, "grad_norm": 1.0791516046442569, "learning_rate": 1.3427939559618314e-05, "loss": 0.5075, "step": 5246 }, { "epoch": 0.41, "grad_norm": 1.141248349776647, "learning_rate": 1.3425579021544896e-05, "loss": 0.5397, "step": 5247 }, { "epoch": 0.41, "grad_norm": 1.1102728635464842, "learning_rate": 1.3423218267197005e-05, "loss": 0.5453, "step": 5248 }, { "epoch": 0.41, "grad_norm": 1.2214566521934216, "learning_rate": 1.3420857296723674e-05, "loss": 0.5963, "step": 5249 }, { "epoch": 0.41, "grad_norm": 1.2928796835472165, "learning_rate": 1.341849611027398e-05, "loss": 0.6264, "step": 5250 }, { "epoch": 0.41, "grad_norm": 1.1855863410627516, "learning_rate": 1.3416134707996987e-05, "loss": 0.5444, "step": 5251 }, { "epoch": 0.41, "grad_norm": 1.1259374421495798, "learning_rate": 1.3413773090041786e-05, "loss": 0.5365, "step": 5252 }, { "epoch": 0.41, "grad_norm": 1.2129149503618208, "learning_rate": 1.3411411256557476e-05, "loss": 0.5945, "step": 5253 }, { "epoch": 0.41, "grad_norm": 1.1577300831319854, "learning_rate": 1.3409049207693172e-05, "loss": 0.5292, "step": 5254 }, { "epoch": 0.41, "grad_norm": 1.033072335342668, "learning_rate": 1.3406686943598003e-05, "loss": 0.527, "step": 5255 }, { "epoch": 0.41, "grad_norm": 1.2454299833050129, "learning_rate": 1.3404324464421113e-05, "loss": 0.6421, "step": 5256 }, { "epoch": 0.41, "grad_norm": 1.0841114416480866, "learning_rate": 1.3401961770311655e-05, "loss": 0.5374, "step": 5257 }, { "epoch": 0.41, "grad_norm": 1.1010238196094357, "learning_rate": 1.3399598861418798e-05, "loss": 0.5386, "step": 5258 }, { "epoch": 0.41, "grad_norm": 1.2353411888923391, "learning_rate": 1.3397235737891726e-05, "loss": 0.6201, "step": 5259 }, { "epoch": 0.41, "grad_norm": 1.173044816103312, "learning_rate": 1.3394872399879634e-05, "loss": 0.5306, "step": 5260 }, { "epoch": 0.41, "grad_norm": 1.3148521055884241, "learning_rate": 1.339250884753173e-05, "loss": 0.5534, "step": 5261 }, { "epoch": 0.41, "grad_norm": 1.1990808165311708, "learning_rate": 1.3390145080997243e-05, "loss": 0.5092, "step": 5262 }, { "epoch": 0.41, "grad_norm": 1.111259096277895, "learning_rate": 1.3387781100425407e-05, "loss": 0.577, "step": 5263 }, { "epoch": 0.41, "grad_norm": 1.1874351483756493, "learning_rate": 1.3385416905965469e-05, "loss": 0.5754, "step": 5264 }, { "epoch": 0.41, "grad_norm": 1.1739894356227907, "learning_rate": 1.3383052497766695e-05, "loss": 0.5689, "step": 5265 }, { "epoch": 0.41, "grad_norm": 1.182610131391522, "learning_rate": 1.3380687875978362e-05, "loss": 0.5888, "step": 5266 }, { "epoch": 0.41, "grad_norm": 1.1114020297880585, "learning_rate": 1.3378323040749764e-05, "loss": 0.5546, "step": 5267 }, { "epoch": 0.41, "grad_norm": 1.3920953242723582, "learning_rate": 1.3375957992230201e-05, "loss": 0.604, "step": 5268 }, { "epoch": 0.41, "grad_norm": 1.099141375347361, "learning_rate": 1.3373592730568992e-05, "loss": 0.5828, "step": 5269 }, { "epoch": 0.41, "grad_norm": 1.1208625049883711, "learning_rate": 1.337122725591547e-05, "loss": 0.5746, "step": 5270 }, { "epoch": 0.41, "grad_norm": 1.187791487405812, "learning_rate": 1.3368861568418974e-05, "loss": 0.6015, "step": 5271 }, { "epoch": 0.41, "grad_norm": 1.2439445209305997, "learning_rate": 1.3366495668228869e-05, "loss": 0.5979, "step": 5272 }, { "epoch": 0.41, "grad_norm": 1.2590533468141456, "learning_rate": 1.3364129555494524e-05, "loss": 0.5534, "step": 5273 }, { "epoch": 0.41, "grad_norm": 1.2511264017436263, "learning_rate": 1.3361763230365323e-05, "loss": 0.6046, "step": 5274 }, { "epoch": 0.41, "grad_norm": 1.0830822188991507, "learning_rate": 1.3359396692990667e-05, "loss": 0.5522, "step": 5275 }, { "epoch": 0.41, "grad_norm": 1.2390720958747032, "learning_rate": 1.3357029943519961e-05, "loss": 0.6104, "step": 5276 }, { "epoch": 0.41, "grad_norm": 1.150432592380982, "learning_rate": 1.335466298210264e-05, "loss": 0.5587, "step": 5277 }, { "epoch": 0.41, "grad_norm": 1.2277011809806118, "learning_rate": 1.3352295808888134e-05, "loss": 0.5624, "step": 5278 }, { "epoch": 0.41, "grad_norm": 1.1422140470621214, "learning_rate": 1.3349928424025897e-05, "loss": 0.5466, "step": 5279 }, { "epoch": 0.41, "grad_norm": 1.09349253212638, "learning_rate": 1.3347560827665398e-05, "loss": 0.5071, "step": 5280 }, { "epoch": 0.41, "grad_norm": 1.223498216001818, "learning_rate": 1.3345193019956111e-05, "loss": 0.6364, "step": 5281 }, { "epoch": 0.41, "grad_norm": 1.289006966059338, "learning_rate": 1.334282500104753e-05, "loss": 0.5651, "step": 5282 }, { "epoch": 0.41, "grad_norm": 1.18752660219612, "learning_rate": 1.334045677108916e-05, "loss": 0.6393, "step": 5283 }, { "epoch": 0.41, "grad_norm": 1.158542525431692, "learning_rate": 1.3338088330230518e-05, "loss": 0.5726, "step": 5284 }, { "epoch": 0.41, "grad_norm": 1.197256464713122, "learning_rate": 1.3335719678621139e-05, "loss": 0.5801, "step": 5285 }, { "epoch": 0.41, "grad_norm": 1.1925979907483184, "learning_rate": 1.3333350816410568e-05, "loss": 0.5056, "step": 5286 }, { "epoch": 0.41, "grad_norm": 1.1867715207308893, "learning_rate": 1.3330981743748362e-05, "loss": 0.5801, "step": 5287 }, { "epoch": 0.41, "grad_norm": 1.229042995287446, "learning_rate": 1.3328612460784091e-05, "loss": 0.5919, "step": 5288 }, { "epoch": 0.41, "grad_norm": 1.2551172890891202, "learning_rate": 1.3326242967667342e-05, "loss": 0.5564, "step": 5289 }, { "epoch": 0.41, "grad_norm": 1.1319927011564592, "learning_rate": 1.3323873264547713e-05, "loss": 0.5559, "step": 5290 }, { "epoch": 0.41, "grad_norm": 1.097894554470465, "learning_rate": 1.3321503351574819e-05, "loss": 0.5697, "step": 5291 }, { "epoch": 0.41, "grad_norm": 1.251819954631812, "learning_rate": 1.331913322889828e-05, "loss": 0.5668, "step": 5292 }, { "epoch": 0.41, "grad_norm": 1.1211037452597765, "learning_rate": 1.3316762896667737e-05, "loss": 0.5977, "step": 5293 }, { "epoch": 0.41, "grad_norm": 1.1597795484971984, "learning_rate": 1.3314392355032837e-05, "loss": 0.6342, "step": 5294 }, { "epoch": 0.41, "grad_norm": 1.2404239059166946, "learning_rate": 1.3312021604143247e-05, "loss": 0.5782, "step": 5295 }, { "epoch": 0.41, "grad_norm": 1.1565144210754121, "learning_rate": 1.3309650644148648e-05, "loss": 0.5637, "step": 5296 }, { "epoch": 0.41, "grad_norm": 1.1266292324750615, "learning_rate": 1.3307279475198729e-05, "loss": 0.5848, "step": 5297 }, { "epoch": 0.41, "grad_norm": 1.2360664560822057, "learning_rate": 1.3304908097443194e-05, "loss": 0.5793, "step": 5298 }, { "epoch": 0.41, "grad_norm": 1.162907738090381, "learning_rate": 1.3302536511031755e-05, "loss": 0.5645, "step": 5299 }, { "epoch": 0.41, "grad_norm": 1.1384226295574347, "learning_rate": 1.3300164716114151e-05, "loss": 0.5372, "step": 5300 }, { "epoch": 0.41, "grad_norm": 1.1184305065157014, "learning_rate": 1.3297792712840121e-05, "loss": 0.5599, "step": 5301 }, { "epoch": 0.41, "grad_norm": 1.0984647223842157, "learning_rate": 1.3295420501359424e-05, "loss": 0.5585, "step": 5302 }, { "epoch": 0.41, "grad_norm": 1.303668966582131, "learning_rate": 1.3293048081821827e-05, "loss": 0.5829, "step": 5303 }, { "epoch": 0.41, "grad_norm": 1.3968011243431138, "learning_rate": 1.3290675454377114e-05, "loss": 0.6195, "step": 5304 }, { "epoch": 0.41, "grad_norm": 1.2358054069987254, "learning_rate": 1.3288302619175082e-05, "loss": 0.562, "step": 5305 }, { "epoch": 0.41, "grad_norm": 1.1645383950140562, "learning_rate": 1.3285929576365541e-05, "loss": 0.5729, "step": 5306 }, { "epoch": 0.41, "grad_norm": 1.176074374847541, "learning_rate": 1.3283556326098315e-05, "loss": 0.5978, "step": 5307 }, { "epoch": 0.41, "grad_norm": 1.0618518647653676, "learning_rate": 1.3281182868523235e-05, "loss": 0.5393, "step": 5308 }, { "epoch": 0.41, "grad_norm": 1.1610846609746257, "learning_rate": 1.327880920379015e-05, "loss": 0.5998, "step": 5309 }, { "epoch": 0.41, "grad_norm": 1.11523271817754, "learning_rate": 1.3276435332048924e-05, "loss": 0.5493, "step": 5310 }, { "epoch": 0.41, "grad_norm": 1.141820672825066, "learning_rate": 1.3274061253449434e-05, "loss": 0.5584, "step": 5311 }, { "epoch": 0.41, "grad_norm": 1.1834210058875512, "learning_rate": 1.3271686968141566e-05, "loss": 0.5643, "step": 5312 }, { "epoch": 0.41, "grad_norm": 1.14208984375, "learning_rate": 1.3269312476275216e-05, "loss": 0.5913, "step": 5313 }, { "epoch": 0.41, "grad_norm": 1.3027190017365033, "learning_rate": 1.3266937778000304e-05, "loss": 0.5957, "step": 5314 }, { "epoch": 0.41, "grad_norm": 1.251638435412809, "learning_rate": 1.3264562873466753e-05, "loss": 0.5749, "step": 5315 }, { "epoch": 0.41, "grad_norm": 1.1748023029154155, "learning_rate": 1.3262187762824507e-05, "loss": 0.5329, "step": 5316 }, { "epoch": 0.41, "grad_norm": 1.083255887319949, "learning_rate": 1.3259812446223519e-05, "loss": 0.529, "step": 5317 }, { "epoch": 0.41, "grad_norm": 1.2883288983658967, "learning_rate": 1.325743692381375e-05, "loss": 0.5766, "step": 5318 }, { "epoch": 0.41, "grad_norm": 1.259754080312896, "learning_rate": 1.3255061195745182e-05, "loss": 0.5885, "step": 5319 }, { "epoch": 0.41, "grad_norm": 1.1492570820113677, "learning_rate": 1.3252685262167803e-05, "loss": 0.5734, "step": 5320 }, { "epoch": 0.41, "grad_norm": 1.2455881940779543, "learning_rate": 1.3250309123231629e-05, "loss": 0.576, "step": 5321 }, { "epoch": 0.41, "grad_norm": 1.1059794510006804, "learning_rate": 1.3247932779086668e-05, "loss": 0.57, "step": 5322 }, { "epoch": 0.41, "grad_norm": 1.2401644949863038, "learning_rate": 1.324555622988295e-05, "loss": 0.5872, "step": 5323 }, { "epoch": 0.41, "grad_norm": 1.210066549819751, "learning_rate": 1.3243179475770527e-05, "loss": 0.6203, "step": 5324 }, { "epoch": 0.41, "grad_norm": 1.2145093844079853, "learning_rate": 1.324080251689945e-05, "loss": 0.5659, "step": 5325 }, { "epoch": 0.41, "grad_norm": 1.1001292434539458, "learning_rate": 1.3238425353419788e-05, "loss": 0.5789, "step": 5326 }, { "epoch": 0.41, "grad_norm": 1.1445031276541822, "learning_rate": 1.3236047985481628e-05, "loss": 0.5317, "step": 5327 }, { "epoch": 0.41, "grad_norm": 1.0828071869338616, "learning_rate": 1.3233670413235059e-05, "loss": 0.5317, "step": 5328 }, { "epoch": 0.41, "grad_norm": 1.2630660478292075, "learning_rate": 1.3231292636830198e-05, "loss": 0.5649, "step": 5329 }, { "epoch": 0.41, "grad_norm": 1.1830154869633025, "learning_rate": 1.3228914656417156e-05, "loss": 0.5365, "step": 5330 }, { "epoch": 0.41, "grad_norm": 1.2508666372133053, "learning_rate": 1.3226536472146076e-05, "loss": 0.5445, "step": 5331 }, { "epoch": 0.41, "grad_norm": 1.0856833400654309, "learning_rate": 1.3224158084167104e-05, "loss": 0.5069, "step": 5332 }, { "epoch": 0.41, "grad_norm": 1.23172819078547, "learning_rate": 1.3221779492630393e-05, "loss": 0.632, "step": 5333 }, { "epoch": 0.41, "grad_norm": 1.2798880339617424, "learning_rate": 1.3219400697686125e-05, "loss": 0.5986, "step": 5334 }, { "epoch": 0.41, "grad_norm": 1.200395584468608, "learning_rate": 1.3217021699484476e-05, "loss": 0.5474, "step": 5335 }, { "epoch": 0.41, "grad_norm": 1.1187671617438322, "learning_rate": 1.3214642498175654e-05, "loss": 0.5266, "step": 5336 }, { "epoch": 0.41, "grad_norm": 1.244319693286147, "learning_rate": 1.3212263093909865e-05, "loss": 0.5722, "step": 5337 }, { "epoch": 0.41, "grad_norm": 1.2204440643076186, "learning_rate": 1.320988348683733e-05, "loss": 0.5923, "step": 5338 }, { "epoch": 0.41, "grad_norm": 1.070028893980839, "learning_rate": 1.3207503677108294e-05, "loss": 0.5591, "step": 5339 }, { "epoch": 0.41, "grad_norm": 1.2247816144168766, "learning_rate": 1.3205123664872999e-05, "loss": 0.6196, "step": 5340 }, { "epoch": 0.41, "grad_norm": 1.2793022330368473, "learning_rate": 1.3202743450281714e-05, "loss": 0.5531, "step": 5341 }, { "epoch": 0.41, "grad_norm": 1.1453216219300684, "learning_rate": 1.3200363033484709e-05, "loss": 0.552, "step": 5342 }, { "epoch": 0.41, "grad_norm": 1.1078184786744194, "learning_rate": 1.3197982414632272e-05, "loss": 0.5344, "step": 5343 }, { "epoch": 0.41, "grad_norm": 1.2137208023252337, "learning_rate": 1.319560159387471e-05, "loss": 0.6294, "step": 5344 }, { "epoch": 0.41, "grad_norm": 1.0982733787029875, "learning_rate": 1.3193220571362328e-05, "loss": 0.561, "step": 5345 }, { "epoch": 0.41, "grad_norm": 1.1329781772872085, "learning_rate": 1.319083934724546e-05, "loss": 0.5656, "step": 5346 }, { "epoch": 0.41, "grad_norm": 1.2756605344661, "learning_rate": 1.3188457921674443e-05, "loss": 0.6146, "step": 5347 }, { "epoch": 0.41, "grad_norm": 1.3251142596516732, "learning_rate": 1.3186076294799624e-05, "loss": 0.6147, "step": 5348 }, { "epoch": 0.41, "grad_norm": 1.1498387928630491, "learning_rate": 1.318369446677137e-05, "loss": 0.5602, "step": 5349 }, { "epoch": 0.42, "grad_norm": 1.2273020853198169, "learning_rate": 1.3181312437740059e-05, "loss": 0.5581, "step": 5350 }, { "epoch": 0.42, "grad_norm": 1.2477606742217344, "learning_rate": 1.3178930207856082e-05, "loss": 0.6612, "step": 5351 }, { "epoch": 0.42, "grad_norm": 1.1590274752634329, "learning_rate": 1.3176547777269838e-05, "loss": 0.5668, "step": 5352 }, { "epoch": 0.42, "grad_norm": 1.1709371756030678, "learning_rate": 1.3174165146131746e-05, "loss": 0.5459, "step": 5353 }, { "epoch": 0.42, "grad_norm": 1.1294575965118923, "learning_rate": 1.317178231459223e-05, "loss": 0.5451, "step": 5354 }, { "epoch": 0.42, "grad_norm": 1.1875674579935054, "learning_rate": 1.3169399282801731e-05, "loss": 0.5504, "step": 5355 }, { "epoch": 0.42, "grad_norm": 1.1710273219959833, "learning_rate": 1.3167016050910709e-05, "loss": 0.5638, "step": 5356 }, { "epoch": 0.42, "grad_norm": 1.1267822770801894, "learning_rate": 1.316463261906962e-05, "loss": 0.5479, "step": 5357 }, { "epoch": 0.42, "grad_norm": 1.0659092454120829, "learning_rate": 1.3162248987428945e-05, "loss": 0.5677, "step": 5358 }, { "epoch": 0.42, "grad_norm": 1.077452684943488, "learning_rate": 1.3159865156139177e-05, "loss": 0.5328, "step": 5359 }, { "epoch": 0.42, "grad_norm": 1.1878566708238993, "learning_rate": 1.3157481125350819e-05, "loss": 0.5938, "step": 5360 }, { "epoch": 0.42, "grad_norm": 1.136143699516451, "learning_rate": 1.315509689521439e-05, "loss": 0.5728, "step": 5361 }, { "epoch": 0.42, "grad_norm": 1.2602971811117896, "learning_rate": 1.3152712465880415e-05, "loss": 0.5448, "step": 5362 }, { "epoch": 0.42, "grad_norm": 1.1265945260711152, "learning_rate": 1.3150327837499433e-05, "loss": 0.5356, "step": 5363 }, { "epoch": 0.42, "grad_norm": 1.1729767261710076, "learning_rate": 1.3147943010222001e-05, "loss": 0.6101, "step": 5364 }, { "epoch": 0.42, "grad_norm": 1.1346307828696363, "learning_rate": 1.3145557984198689e-05, "loss": 0.5428, "step": 5365 }, { "epoch": 0.42, "grad_norm": 1.3409336650636199, "learning_rate": 1.3143172759580072e-05, "loss": 0.6448, "step": 5366 }, { "epoch": 0.42, "grad_norm": 1.1121967052262058, "learning_rate": 1.3140787336516743e-05, "loss": 0.5277, "step": 5367 }, { "epoch": 0.42, "grad_norm": 1.2291394516254261, "learning_rate": 1.3138401715159301e-05, "loss": 0.5759, "step": 5368 }, { "epoch": 0.42, "grad_norm": 1.1726890788098938, "learning_rate": 1.313601589565837e-05, "loss": 0.6022, "step": 5369 }, { "epoch": 0.42, "grad_norm": 1.1335142921452013, "learning_rate": 1.3133629878164573e-05, "loss": 0.5853, "step": 5370 }, { "epoch": 0.42, "grad_norm": 1.1698067278867612, "learning_rate": 1.3131243662828558e-05, "loss": 0.5806, "step": 5371 }, { "epoch": 0.42, "grad_norm": 1.2313468585184584, "learning_rate": 1.3128857249800975e-05, "loss": 0.5683, "step": 5372 }, { "epoch": 0.42, "grad_norm": 1.251265409833926, "learning_rate": 1.3126470639232487e-05, "loss": 0.5452, "step": 5373 }, { "epoch": 0.42, "grad_norm": 1.3091814685634704, "learning_rate": 1.312408383127378e-05, "loss": 0.6112, "step": 5374 }, { "epoch": 0.42, "grad_norm": 1.237139924963708, "learning_rate": 1.3121696826075542e-05, "loss": 0.5878, "step": 5375 }, { "epoch": 0.42, "grad_norm": 1.1172732340321048, "learning_rate": 1.3119309623788479e-05, "loss": 0.5318, "step": 5376 }, { "epoch": 0.42, "grad_norm": 1.0744853295824257, "learning_rate": 1.3116922224563306e-05, "loss": 0.5582, "step": 5377 }, { "epoch": 0.42, "grad_norm": 1.2414673451105471, "learning_rate": 1.311453462855075e-05, "loss": 0.5564, "step": 5378 }, { "epoch": 0.42, "grad_norm": 1.154450537043692, "learning_rate": 1.3112146835901552e-05, "loss": 0.6046, "step": 5379 }, { "epoch": 0.42, "grad_norm": 1.0977557805777245, "learning_rate": 1.3109758846766472e-05, "loss": 0.5358, "step": 5380 }, { "epoch": 0.42, "grad_norm": 1.1126979437474374, "learning_rate": 1.310737066129627e-05, "loss": 0.555, "step": 5381 }, { "epoch": 0.42, "grad_norm": 1.1137912050753107, "learning_rate": 1.3104982279641727e-05, "loss": 0.5925, "step": 5382 }, { "epoch": 0.42, "grad_norm": 1.1289658580209436, "learning_rate": 1.3102593701953632e-05, "loss": 0.5432, "step": 5383 }, { "epoch": 0.42, "grad_norm": 1.2640440686064511, "learning_rate": 1.3100204928382787e-05, "loss": 0.6193, "step": 5384 }, { "epoch": 0.42, "grad_norm": 1.1640846711165926, "learning_rate": 1.3097815959080013e-05, "loss": 0.5981, "step": 5385 }, { "epoch": 0.42, "grad_norm": 1.2061643481110838, "learning_rate": 1.3095426794196136e-05, "loss": 0.5771, "step": 5386 }, { "epoch": 0.42, "grad_norm": 1.1018096430150346, "learning_rate": 1.3093037433881995e-05, "loss": 0.5421, "step": 5387 }, { "epoch": 0.42, "grad_norm": 1.1087404504857186, "learning_rate": 1.3090647878288441e-05, "loss": 0.5428, "step": 5388 }, { "epoch": 0.42, "grad_norm": 1.328570481928475, "learning_rate": 1.3088258127566338e-05, "loss": 0.6409, "step": 5389 }, { "epoch": 0.42, "grad_norm": 1.2403104503457993, "learning_rate": 1.3085868181866571e-05, "loss": 0.598, "step": 5390 }, { "epoch": 0.42, "grad_norm": 1.2386832081238086, "learning_rate": 1.3083478041340023e-05, "loss": 0.5799, "step": 5391 }, { "epoch": 0.42, "grad_norm": 1.1597264068335127, "learning_rate": 1.3081087706137596e-05, "loss": 0.608, "step": 5392 }, { "epoch": 0.42, "grad_norm": 1.1772125758332481, "learning_rate": 1.307869717641021e-05, "loss": 0.559, "step": 5393 }, { "epoch": 0.42, "grad_norm": 1.0811032057766397, "learning_rate": 1.3076306452308782e-05, "loss": 0.5869, "step": 5394 }, { "epoch": 0.42, "grad_norm": 1.205162403986662, "learning_rate": 1.3073915533984262e-05, "loss": 0.5905, "step": 5395 }, { "epoch": 0.42, "grad_norm": 1.2341183262763087, "learning_rate": 1.307152442158759e-05, "loss": 0.613, "step": 5396 }, { "epoch": 0.42, "grad_norm": 1.1664420263548732, "learning_rate": 1.3069133115269734e-05, "loss": 0.5878, "step": 5397 }, { "epoch": 0.42, "grad_norm": 1.0628185355719757, "learning_rate": 1.3066741615181675e-05, "loss": 0.5476, "step": 5398 }, { "epoch": 0.42, "grad_norm": 1.218004585605126, "learning_rate": 1.306434992147439e-05, "loss": 0.6091, "step": 5399 }, { "epoch": 0.42, "grad_norm": 1.090991984274113, "learning_rate": 1.306195803429889e-05, "loss": 0.4964, "step": 5400 }, { "epoch": 0.42, "grad_norm": 1.113897588008314, "learning_rate": 1.3059565953806177e-05, "loss": 0.5366, "step": 5401 }, { "epoch": 0.42, "grad_norm": 1.2068524274488157, "learning_rate": 1.305717368014728e-05, "loss": 0.6072, "step": 5402 }, { "epoch": 0.42, "grad_norm": 1.210839987489532, "learning_rate": 1.3054781213473238e-05, "loss": 0.5776, "step": 5403 }, { "epoch": 0.42, "grad_norm": 1.1703435427695652, "learning_rate": 1.3052388553935096e-05, "loss": 0.5825, "step": 5404 }, { "epoch": 0.42, "grad_norm": 1.168563844267776, "learning_rate": 1.304999570168392e-05, "loss": 0.5624, "step": 5405 }, { "epoch": 0.42, "grad_norm": 1.2287186544306676, "learning_rate": 1.3047602656870775e-05, "loss": 0.5939, "step": 5406 }, { "epoch": 0.42, "grad_norm": 1.2171640592186117, "learning_rate": 1.3045209419646749e-05, "loss": 0.594, "step": 5407 }, { "epoch": 0.42, "grad_norm": 1.1286022476537598, "learning_rate": 1.3042815990162944e-05, "loss": 0.573, "step": 5408 }, { "epoch": 0.42, "grad_norm": 1.1666646968734098, "learning_rate": 1.3040422368570466e-05, "loss": 0.5778, "step": 5409 }, { "epoch": 0.42, "grad_norm": 1.1808447813898302, "learning_rate": 1.3038028555020444e-05, "loss": 0.5682, "step": 5410 }, { "epoch": 0.42, "grad_norm": 1.1902489716674234, "learning_rate": 1.3035634549663995e-05, "loss": 0.5291, "step": 5411 }, { "epoch": 0.42, "grad_norm": 1.1010248481863851, "learning_rate": 1.3033240352652281e-05, "loss": 0.5467, "step": 5412 }, { "epoch": 0.42, "grad_norm": 1.274650557633351, "learning_rate": 1.3030845964136452e-05, "loss": 0.6078, "step": 5413 }, { "epoch": 0.42, "grad_norm": 1.1680105960966254, "learning_rate": 1.3028451384267679e-05, "loss": 0.5619, "step": 5414 }, { "epoch": 0.42, "grad_norm": 1.1307461011483286, "learning_rate": 1.3026056613197151e-05, "loss": 0.5746, "step": 5415 }, { "epoch": 0.42, "grad_norm": 1.1679891118855061, "learning_rate": 1.3023661651076051e-05, "loss": 0.5522, "step": 5416 }, { "epoch": 0.42, "grad_norm": 1.2030811549532923, "learning_rate": 1.3021266498055592e-05, "loss": 0.5574, "step": 5417 }, { "epoch": 0.42, "grad_norm": 1.2734235517053876, "learning_rate": 1.3018871154286991e-05, "loss": 0.6376, "step": 5418 }, { "epoch": 0.42, "grad_norm": 1.1515076270933733, "learning_rate": 1.3016475619921477e-05, "loss": 0.5837, "step": 5419 }, { "epoch": 0.42, "grad_norm": 1.2972100985439499, "learning_rate": 1.3014079895110299e-05, "loss": 0.6417, "step": 5420 }, { "epoch": 0.42, "grad_norm": 1.1155738647880546, "learning_rate": 1.3011683980004705e-05, "loss": 0.5376, "step": 5421 }, { "epoch": 0.42, "grad_norm": 1.1662838625719312, "learning_rate": 1.3009287874755963e-05, "loss": 0.6136, "step": 5422 }, { "epoch": 0.42, "grad_norm": 1.1438499740515875, "learning_rate": 1.3006891579515351e-05, "loss": 0.528, "step": 5423 }, { "epoch": 0.42, "grad_norm": 1.1405953181337083, "learning_rate": 1.3004495094434157e-05, "loss": 0.571, "step": 5424 }, { "epoch": 0.42, "grad_norm": 1.1564098453865295, "learning_rate": 1.3002098419663692e-05, "loss": 0.5731, "step": 5425 }, { "epoch": 0.42, "grad_norm": 1.0746658781804426, "learning_rate": 1.2999701555355264e-05, "loss": 0.5333, "step": 5426 }, { "epoch": 0.42, "grad_norm": 1.1580312257157257, "learning_rate": 1.2997304501660197e-05, "loss": 0.5383, "step": 5427 }, { "epoch": 0.42, "grad_norm": 1.1553913227213064, "learning_rate": 1.2994907258729835e-05, "loss": 0.5779, "step": 5428 }, { "epoch": 0.42, "grad_norm": 1.1828487053070678, "learning_rate": 1.2992509826715525e-05, "loss": 0.5551, "step": 5429 }, { "epoch": 0.42, "grad_norm": 1.202424030957592, "learning_rate": 1.2990112205768632e-05, "loss": 0.6345, "step": 5430 }, { "epoch": 0.42, "grad_norm": 1.2214735849829235, "learning_rate": 1.2987714396040527e-05, "loss": 0.6044, "step": 5431 }, { "epoch": 0.42, "grad_norm": 1.1452872738005164, "learning_rate": 1.2985316397682597e-05, "loss": 0.5728, "step": 5432 }, { "epoch": 0.42, "grad_norm": 1.097056486325496, "learning_rate": 1.2982918210846243e-05, "loss": 0.542, "step": 5433 }, { "epoch": 0.42, "grad_norm": 1.077531125798468, "learning_rate": 1.298051983568287e-05, "loss": 0.5364, "step": 5434 }, { "epoch": 0.42, "grad_norm": 1.2556836611923696, "learning_rate": 1.2978121272343904e-05, "loss": 0.5945, "step": 5435 }, { "epoch": 0.42, "grad_norm": 1.165231003022694, "learning_rate": 1.2975722520980777e-05, "loss": 0.5832, "step": 5436 }, { "epoch": 0.42, "grad_norm": 1.2382334510410542, "learning_rate": 1.2973323581744935e-05, "loss": 0.5825, "step": 5437 }, { "epoch": 0.42, "grad_norm": 1.2646124286017335, "learning_rate": 1.2970924454787834e-05, "loss": 0.5928, "step": 5438 }, { "epoch": 0.42, "grad_norm": 1.168861634741927, "learning_rate": 1.2968525140260946e-05, "loss": 0.6037, "step": 5439 }, { "epoch": 0.42, "grad_norm": 1.072208034726781, "learning_rate": 1.2966125638315745e-05, "loss": 0.5404, "step": 5440 }, { "epoch": 0.42, "grad_norm": 1.0987807517581774, "learning_rate": 1.2963725949103734e-05, "loss": 0.5726, "step": 5441 }, { "epoch": 0.42, "grad_norm": 1.1791842062806022, "learning_rate": 1.2961326072776412e-05, "loss": 0.5786, "step": 5442 }, { "epoch": 0.42, "grad_norm": 1.2925609394958855, "learning_rate": 1.2958926009485297e-05, "loss": 0.5977, "step": 5443 }, { "epoch": 0.42, "grad_norm": 1.2435406686032486, "learning_rate": 1.2956525759381917e-05, "loss": 0.6286, "step": 5444 }, { "epoch": 0.42, "grad_norm": 1.1867145148999685, "learning_rate": 1.295412532261781e-05, "loss": 0.542, "step": 5445 }, { "epoch": 0.42, "grad_norm": 1.0957595348287095, "learning_rate": 1.2951724699344532e-05, "loss": 0.5126, "step": 5446 }, { "epoch": 0.42, "grad_norm": 1.2980519664571566, "learning_rate": 1.2949323889713643e-05, "loss": 0.6512, "step": 5447 }, { "epoch": 0.42, "grad_norm": 1.2518532366486848, "learning_rate": 1.2946922893876722e-05, "loss": 0.5531, "step": 5448 }, { "epoch": 0.42, "grad_norm": 1.0681618572413925, "learning_rate": 1.2944521711985357e-05, "loss": 0.5069, "step": 5449 }, { "epoch": 0.42, "grad_norm": 1.2524750048794988, "learning_rate": 1.2942120344191141e-05, "loss": 0.5952, "step": 5450 }, { "epoch": 0.42, "grad_norm": 1.1794976087920306, "learning_rate": 1.293971879064569e-05, "loss": 0.5681, "step": 5451 }, { "epoch": 0.42, "grad_norm": 1.0510496909846871, "learning_rate": 1.2937317051500622e-05, "loss": 0.5169, "step": 5452 }, { "epoch": 0.42, "grad_norm": 1.2114186469301964, "learning_rate": 1.2934915126907575e-05, "loss": 0.5952, "step": 5453 }, { "epoch": 0.42, "grad_norm": 1.1396633431604761, "learning_rate": 1.2932513017018197e-05, "loss": 0.5781, "step": 5454 }, { "epoch": 0.42, "grad_norm": 1.1030882421318362, "learning_rate": 1.293011072198414e-05, "loss": 0.5656, "step": 5455 }, { "epoch": 0.42, "grad_norm": 1.1908544544583044, "learning_rate": 1.2927708241957077e-05, "loss": 0.5282, "step": 5456 }, { "epoch": 0.42, "grad_norm": 1.1783158873669861, "learning_rate": 1.2925305577088687e-05, "loss": 0.5524, "step": 5457 }, { "epoch": 0.42, "grad_norm": 1.1286463982909718, "learning_rate": 1.2922902727530663e-05, "loss": 0.5551, "step": 5458 }, { "epoch": 0.42, "grad_norm": 1.1902179733162859, "learning_rate": 1.2920499693434712e-05, "loss": 0.6409, "step": 5459 }, { "epoch": 0.42, "grad_norm": 1.2854356548982162, "learning_rate": 1.2918096474952544e-05, "loss": 0.6034, "step": 5460 }, { "epoch": 0.42, "grad_norm": 1.156583892451261, "learning_rate": 1.2915693072235893e-05, "loss": 0.5793, "step": 5461 }, { "epoch": 0.42, "grad_norm": 1.2416669126591449, "learning_rate": 1.2913289485436492e-05, "loss": 0.5335, "step": 5462 }, { "epoch": 0.42, "grad_norm": 1.1119004637883199, "learning_rate": 1.2910885714706093e-05, "loss": 0.5481, "step": 5463 }, { "epoch": 0.42, "grad_norm": 1.2679661419948003, "learning_rate": 1.290848176019647e-05, "loss": 0.6347, "step": 5464 }, { "epoch": 0.42, "grad_norm": 1.2130405990586628, "learning_rate": 1.290607762205938e-05, "loss": 0.6087, "step": 5465 }, { "epoch": 0.42, "grad_norm": 1.1688536286948465, "learning_rate": 1.2903673300446623e-05, "loss": 0.5916, "step": 5466 }, { "epoch": 0.42, "grad_norm": 1.303191646637205, "learning_rate": 1.2901268795509982e-05, "loss": 0.6474, "step": 5467 }, { "epoch": 0.42, "grad_norm": 1.100066284870186, "learning_rate": 1.2898864107401275e-05, "loss": 0.5632, "step": 5468 }, { "epoch": 0.42, "grad_norm": 1.2421815500176878, "learning_rate": 1.2896459236272325e-05, "loss": 0.5645, "step": 5469 }, { "epoch": 0.42, "grad_norm": 1.1206938133238493, "learning_rate": 1.2894054182274956e-05, "loss": 0.5103, "step": 5470 }, { "epoch": 0.42, "grad_norm": 1.1077585398199128, "learning_rate": 1.2891648945561017e-05, "loss": 0.5008, "step": 5471 }, { "epoch": 0.42, "grad_norm": 1.185392969941593, "learning_rate": 1.2889243526282357e-05, "loss": 0.564, "step": 5472 }, { "epoch": 0.42, "grad_norm": 1.1988177296873601, "learning_rate": 1.288683792459085e-05, "loss": 0.5892, "step": 5473 }, { "epoch": 0.42, "grad_norm": 1.2340225972157672, "learning_rate": 1.2884432140638375e-05, "loss": 0.6147, "step": 5474 }, { "epoch": 0.42, "grad_norm": 1.2384168869486223, "learning_rate": 1.2882026174576812e-05, "loss": 0.5768, "step": 5475 }, { "epoch": 0.42, "grad_norm": 1.1702327665458885, "learning_rate": 1.2879620026558067e-05, "loss": 0.5321, "step": 5476 }, { "epoch": 0.42, "grad_norm": 1.0680964005035958, "learning_rate": 1.2877213696734052e-05, "loss": 0.5685, "step": 5477 }, { "epoch": 0.42, "grad_norm": 1.133198376408608, "learning_rate": 1.2874807185256692e-05, "loss": 0.5319, "step": 5478 }, { "epoch": 0.43, "grad_norm": 1.2608793791241737, "learning_rate": 1.2872400492277928e-05, "loss": 0.5705, "step": 5479 }, { "epoch": 0.43, "grad_norm": 1.3037042624770598, "learning_rate": 1.2869993617949696e-05, "loss": 0.6176, "step": 5480 }, { "epoch": 0.43, "grad_norm": 1.2906044725166501, "learning_rate": 1.2867586562423962e-05, "loss": 0.5741, "step": 5481 }, { "epoch": 0.43, "grad_norm": 1.1569897115541745, "learning_rate": 1.2865179325852693e-05, "loss": 0.5376, "step": 5482 }, { "epoch": 0.43, "grad_norm": 1.2820102018108113, "learning_rate": 1.2862771908387867e-05, "loss": 0.6216, "step": 5483 }, { "epoch": 0.43, "grad_norm": 1.2591156931585912, "learning_rate": 1.2860364310181488e-05, "loss": 0.5675, "step": 5484 }, { "epoch": 0.43, "grad_norm": 1.1636384537255418, "learning_rate": 1.2857956531385548e-05, "loss": 0.579, "step": 5485 }, { "epoch": 0.43, "grad_norm": 1.1578285679493767, "learning_rate": 1.2855548572152066e-05, "loss": 0.5645, "step": 5486 }, { "epoch": 0.43, "grad_norm": 1.1476531172177928, "learning_rate": 1.2853140432633074e-05, "loss": 0.503, "step": 5487 }, { "epoch": 0.43, "grad_norm": 1.2773189717260567, "learning_rate": 1.2850732112980602e-05, "loss": 0.5681, "step": 5488 }, { "epoch": 0.43, "grad_norm": 1.2145730357881368, "learning_rate": 1.2848323613346708e-05, "loss": 0.5966, "step": 5489 }, { "epoch": 0.43, "grad_norm": 1.1647737109736245, "learning_rate": 1.2845914933883443e-05, "loss": 0.6117, "step": 5490 }, { "epoch": 0.43, "grad_norm": 1.1003217811702242, "learning_rate": 1.2843506074742888e-05, "loss": 0.5412, "step": 5491 }, { "epoch": 0.43, "grad_norm": 1.202108870598096, "learning_rate": 1.2841097036077125e-05, "loss": 0.615, "step": 5492 }, { "epoch": 0.43, "grad_norm": 1.1819249339173739, "learning_rate": 1.283868781803825e-05, "loss": 0.5932, "step": 5493 }, { "epoch": 0.43, "grad_norm": 1.0883979641669264, "learning_rate": 1.2836278420778366e-05, "loss": 0.5207, "step": 5494 }, { "epoch": 0.43, "grad_norm": 1.1347918351473658, "learning_rate": 1.2833868844449588e-05, "loss": 0.5725, "step": 5495 }, { "epoch": 0.43, "grad_norm": 1.232446201767509, "learning_rate": 1.283145908920405e-05, "loss": 0.592, "step": 5496 }, { "epoch": 0.43, "grad_norm": 1.3793116937930812, "learning_rate": 1.2829049155193896e-05, "loss": 0.6154, "step": 5497 }, { "epoch": 0.43, "grad_norm": 1.246839533327724, "learning_rate": 1.282663904257127e-05, "loss": 0.6238, "step": 5498 }, { "epoch": 0.43, "grad_norm": 1.0381274497630815, "learning_rate": 1.2824228751488339e-05, "loss": 0.505, "step": 5499 }, { "epoch": 0.43, "grad_norm": 1.212783194435812, "learning_rate": 1.2821818282097273e-05, "loss": 0.5273, "step": 5500 }, { "epoch": 0.43, "grad_norm": 1.1216896945427937, "learning_rate": 1.281940763455026e-05, "loss": 0.5618, "step": 5501 }, { "epoch": 0.43, "grad_norm": 1.3120100378402657, "learning_rate": 1.28169968089995e-05, "loss": 0.5818, "step": 5502 }, { "epoch": 0.43, "grad_norm": 1.1351507849670044, "learning_rate": 1.2814585805597197e-05, "loss": 0.5136, "step": 5503 }, { "epoch": 0.43, "grad_norm": 1.2382589633031131, "learning_rate": 1.2812174624495569e-05, "loss": 0.5979, "step": 5504 }, { "epoch": 0.43, "grad_norm": 1.1588280264088582, "learning_rate": 1.2809763265846851e-05, "loss": 0.5037, "step": 5505 }, { "epoch": 0.43, "grad_norm": 1.1998168249197858, "learning_rate": 1.2807351729803277e-05, "loss": 0.5475, "step": 5506 }, { "epoch": 0.43, "grad_norm": 1.2553727555384517, "learning_rate": 1.280494001651711e-05, "loss": 0.6203, "step": 5507 }, { "epoch": 0.43, "grad_norm": 1.2009800703537514, "learning_rate": 1.2802528126140604e-05, "loss": 0.6095, "step": 5508 }, { "epoch": 0.43, "grad_norm": 1.2123136514830408, "learning_rate": 1.2800116058826037e-05, "loss": 0.6154, "step": 5509 }, { "epoch": 0.43, "grad_norm": 1.1539214632326678, "learning_rate": 1.2797703814725702e-05, "loss": 0.576, "step": 5510 }, { "epoch": 0.43, "grad_norm": 1.1659747297057943, "learning_rate": 1.2795291393991885e-05, "loss": 0.6005, "step": 5511 }, { "epoch": 0.43, "grad_norm": 1.1356948488428176, "learning_rate": 1.2792878796776904e-05, "loss": 0.6047, "step": 5512 }, { "epoch": 0.43, "grad_norm": 1.085219605630756, "learning_rate": 1.2790466023233075e-05, "loss": 0.5306, "step": 5513 }, { "epoch": 0.43, "grad_norm": 1.2194775586036035, "learning_rate": 1.2788053073512728e-05, "loss": 0.6436, "step": 5514 }, { "epoch": 0.43, "grad_norm": 1.1882131342579478, "learning_rate": 1.2785639947768207e-05, "loss": 0.5168, "step": 5515 }, { "epoch": 0.43, "grad_norm": 1.133888943369136, "learning_rate": 1.2783226646151863e-05, "loss": 0.5642, "step": 5516 }, { "epoch": 0.43, "grad_norm": 1.153933860123063, "learning_rate": 1.2780813168816062e-05, "loss": 0.5221, "step": 5517 }, { "epoch": 0.43, "grad_norm": 1.1556269023543317, "learning_rate": 1.2778399515913177e-05, "loss": 0.5666, "step": 5518 }, { "epoch": 0.43, "grad_norm": 1.175304886646964, "learning_rate": 1.2775985687595598e-05, "loss": 0.5668, "step": 5519 }, { "epoch": 0.43, "grad_norm": 1.1536244145709227, "learning_rate": 1.277357168401572e-05, "loss": 0.4967, "step": 5520 }, { "epoch": 0.43, "grad_norm": 1.1527478560173565, "learning_rate": 1.2771157505325951e-05, "loss": 0.5899, "step": 5521 }, { "epoch": 0.43, "grad_norm": 1.133982560553497, "learning_rate": 1.2768743151678711e-05, "loss": 0.5354, "step": 5522 }, { "epoch": 0.43, "grad_norm": 1.19549359713714, "learning_rate": 1.2766328623226434e-05, "loss": 0.59, "step": 5523 }, { "epoch": 0.43, "grad_norm": 1.160543820156621, "learning_rate": 1.2763913920121554e-05, "loss": 0.5791, "step": 5524 }, { "epoch": 0.43, "grad_norm": 1.2247717352747132, "learning_rate": 1.2761499042516531e-05, "loss": 0.5581, "step": 5525 }, { "epoch": 0.43, "grad_norm": 1.0866881595916071, "learning_rate": 1.2759083990563825e-05, "loss": 0.4941, "step": 5526 }, { "epoch": 0.43, "grad_norm": 1.1619143538764027, "learning_rate": 1.2756668764415913e-05, "loss": 0.5733, "step": 5527 }, { "epoch": 0.43, "grad_norm": 1.0770966974802965, "learning_rate": 1.275425336422528e-05, "loss": 0.5467, "step": 5528 }, { "epoch": 0.43, "grad_norm": 1.1545138340745242, "learning_rate": 1.2751837790144419e-05, "loss": 0.6053, "step": 5529 }, { "epoch": 0.43, "grad_norm": 1.1765427768872843, "learning_rate": 1.2749422042325846e-05, "loss": 0.5294, "step": 5530 }, { "epoch": 0.43, "grad_norm": 1.301201921320522, "learning_rate": 1.2747006120922068e-05, "loss": 0.6116, "step": 5531 }, { "epoch": 0.43, "grad_norm": 1.1333565819450089, "learning_rate": 1.2744590026085622e-05, "loss": 0.5351, "step": 5532 }, { "epoch": 0.43, "grad_norm": 1.2013959970184338, "learning_rate": 1.2742173757969052e-05, "loss": 0.5668, "step": 5533 }, { "epoch": 0.43, "grad_norm": 1.1777583130891491, "learning_rate": 1.2739757316724901e-05, "loss": 0.521, "step": 5534 }, { "epoch": 0.43, "grad_norm": 1.215725790125941, "learning_rate": 1.2737340702505737e-05, "loss": 0.5632, "step": 5535 }, { "epoch": 0.43, "grad_norm": 1.22141585638205, "learning_rate": 1.273492391546413e-05, "loss": 0.5806, "step": 5536 }, { "epoch": 0.43, "grad_norm": 1.1856712012169837, "learning_rate": 1.2732506955752665e-05, "loss": 0.5805, "step": 5537 }, { "epoch": 0.43, "grad_norm": 1.1348465646054706, "learning_rate": 1.2730089823523943e-05, "loss": 0.564, "step": 5538 }, { "epoch": 0.43, "grad_norm": 1.3781617832248785, "learning_rate": 1.2727672518930561e-05, "loss": 0.6869, "step": 5539 }, { "epoch": 0.43, "grad_norm": 1.1141266942087853, "learning_rate": 1.2725255042125142e-05, "loss": 0.5721, "step": 5540 }, { "epoch": 0.43, "grad_norm": 1.1295708412728245, "learning_rate": 1.2722837393260308e-05, "loss": 0.5473, "step": 5541 }, { "epoch": 0.43, "grad_norm": 1.19208789774343, "learning_rate": 1.2720419572488705e-05, "loss": 0.5541, "step": 5542 }, { "epoch": 0.43, "grad_norm": 1.2233611198737826, "learning_rate": 1.2718001579962978e-05, "loss": 0.5988, "step": 5543 }, { "epoch": 0.43, "grad_norm": 1.2402776275061307, "learning_rate": 1.2715583415835788e-05, "loss": 0.6028, "step": 5544 }, { "epoch": 0.43, "grad_norm": 1.0926991455351278, "learning_rate": 1.2713165080259805e-05, "loss": 0.5535, "step": 5545 }, { "epoch": 0.43, "grad_norm": 1.1117348622208707, "learning_rate": 1.2710746573387716e-05, "loss": 0.5454, "step": 5546 }, { "epoch": 0.43, "grad_norm": 1.1983621128448163, "learning_rate": 1.2708327895372208e-05, "loss": 0.5822, "step": 5547 }, { "epoch": 0.43, "grad_norm": 1.0697513975137425, "learning_rate": 1.2705909046365987e-05, "loss": 0.5298, "step": 5548 }, { "epoch": 0.43, "grad_norm": 1.1547100534675536, "learning_rate": 1.2703490026521766e-05, "loss": 0.521, "step": 5549 }, { "epoch": 0.43, "grad_norm": 1.1326668481255966, "learning_rate": 1.2701070835992273e-05, "loss": 0.5192, "step": 5550 }, { "epoch": 0.43, "grad_norm": 1.0812013383620682, "learning_rate": 1.2698651474930239e-05, "loss": 0.567, "step": 5551 }, { "epoch": 0.43, "grad_norm": 1.1635503986592208, "learning_rate": 1.269623194348842e-05, "loss": 0.586, "step": 5552 }, { "epoch": 0.43, "grad_norm": 1.1103739471997414, "learning_rate": 1.2693812241819565e-05, "loss": 0.508, "step": 5553 }, { "epoch": 0.43, "grad_norm": 1.3577002370274034, "learning_rate": 1.2691392370076443e-05, "loss": 0.6356, "step": 5554 }, { "epoch": 0.43, "grad_norm": 1.099004982535233, "learning_rate": 1.2688972328411836e-05, "loss": 0.5487, "step": 5555 }, { "epoch": 0.43, "grad_norm": 1.31773268886971, "learning_rate": 1.2686552116978535e-05, "loss": 0.6177, "step": 5556 }, { "epoch": 0.43, "grad_norm": 1.1867376691403835, "learning_rate": 1.2684131735929337e-05, "loss": 0.5549, "step": 5557 }, { "epoch": 0.43, "grad_norm": 1.1766972316486015, "learning_rate": 1.2681711185417053e-05, "loss": 0.5532, "step": 5558 }, { "epoch": 0.43, "grad_norm": 1.1879420210693632, "learning_rate": 1.2679290465594507e-05, "loss": 0.5639, "step": 5559 }, { "epoch": 0.43, "grad_norm": 1.1284058197569167, "learning_rate": 1.2676869576614527e-05, "loss": 0.5732, "step": 5560 }, { "epoch": 0.43, "grad_norm": 1.1950623992366058, "learning_rate": 1.2674448518629964e-05, "loss": 0.5867, "step": 5561 }, { "epoch": 0.43, "grad_norm": 1.2503843193526816, "learning_rate": 1.2672027291793669e-05, "loss": 0.6329, "step": 5562 }, { "epoch": 0.43, "grad_norm": 1.1781078651052352, "learning_rate": 1.2669605896258503e-05, "loss": 0.5366, "step": 5563 }, { "epoch": 0.43, "grad_norm": 1.1897859655764778, "learning_rate": 1.2667184332177342e-05, "loss": 0.5648, "step": 5564 }, { "epoch": 0.43, "grad_norm": 1.254836690923467, "learning_rate": 1.2664762599703073e-05, "loss": 0.6093, "step": 5565 }, { "epoch": 0.43, "grad_norm": 1.2439129439922414, "learning_rate": 1.2662340698988595e-05, "loss": 0.5668, "step": 5566 }, { "epoch": 0.43, "grad_norm": 1.142804037077906, "learning_rate": 1.2659918630186814e-05, "loss": 0.5681, "step": 5567 }, { "epoch": 0.43, "grad_norm": 1.1572250173051393, "learning_rate": 1.2657496393450646e-05, "loss": 0.5647, "step": 5568 }, { "epoch": 0.43, "grad_norm": 1.1247183129196163, "learning_rate": 1.2655073988933016e-05, "loss": 0.5784, "step": 5569 }, { "epoch": 0.43, "grad_norm": 1.1487916672780292, "learning_rate": 1.2652651416786867e-05, "loss": 0.5301, "step": 5570 }, { "epoch": 0.43, "grad_norm": 1.2677035277042292, "learning_rate": 1.2650228677165153e-05, "loss": 0.593, "step": 5571 }, { "epoch": 0.43, "grad_norm": 1.145158302991299, "learning_rate": 1.2647805770220826e-05, "loss": 0.5562, "step": 5572 }, { "epoch": 0.43, "grad_norm": 1.1512460944226481, "learning_rate": 1.264538269610686e-05, "loss": 0.5712, "step": 5573 }, { "epoch": 0.43, "grad_norm": 1.1257718934961527, "learning_rate": 1.2642959454976236e-05, "loss": 0.5677, "step": 5574 }, { "epoch": 0.43, "grad_norm": 1.2000249581920426, "learning_rate": 1.2640536046981943e-05, "loss": 0.5582, "step": 5575 }, { "epoch": 0.43, "grad_norm": 1.2453488123253045, "learning_rate": 1.263811247227699e-05, "loss": 0.5999, "step": 5576 }, { "epoch": 0.43, "grad_norm": 1.2416045542463858, "learning_rate": 1.2635688731014386e-05, "loss": 0.5865, "step": 5577 }, { "epoch": 0.43, "grad_norm": 1.110429880136358, "learning_rate": 1.2633264823347156e-05, "loss": 0.5823, "step": 5578 }, { "epoch": 0.43, "grad_norm": 1.2268713786945737, "learning_rate": 1.2630840749428327e-05, "loss": 0.5936, "step": 5579 }, { "epoch": 0.43, "grad_norm": 1.1897675798547742, "learning_rate": 1.2628416509410947e-05, "loss": 0.6204, "step": 5580 }, { "epoch": 0.43, "grad_norm": 1.1945103465734335, "learning_rate": 1.2625992103448077e-05, "loss": 0.584, "step": 5581 }, { "epoch": 0.43, "grad_norm": 1.129620547177039, "learning_rate": 1.2623567531692774e-05, "loss": 0.5422, "step": 5582 }, { "epoch": 0.43, "grad_norm": 1.106649139296138, "learning_rate": 1.2621142794298118e-05, "loss": 0.5495, "step": 5583 }, { "epoch": 0.43, "grad_norm": 1.2286843091571198, "learning_rate": 1.2618717891417194e-05, "loss": 0.5841, "step": 5584 }, { "epoch": 0.43, "grad_norm": 1.2267494302192081, "learning_rate": 1.2616292823203098e-05, "loss": 0.5958, "step": 5585 }, { "epoch": 0.43, "grad_norm": 1.1938057172464853, "learning_rate": 1.2613867589808939e-05, "loss": 0.5393, "step": 5586 }, { "epoch": 0.43, "grad_norm": 1.253108260882443, "learning_rate": 1.2611442191387836e-05, "loss": 0.5691, "step": 5587 }, { "epoch": 0.43, "grad_norm": 1.2112076488763577, "learning_rate": 1.2609016628092907e-05, "loss": 0.5426, "step": 5588 }, { "epoch": 0.43, "grad_norm": 1.1524016802176051, "learning_rate": 1.2606590900077303e-05, "loss": 0.5564, "step": 5589 }, { "epoch": 0.43, "grad_norm": 1.1014883679274898, "learning_rate": 1.2604165007494164e-05, "loss": 0.5093, "step": 5590 }, { "epoch": 0.43, "grad_norm": 1.9059208445048783, "learning_rate": 1.2601738950496654e-05, "loss": 0.5726, "step": 5591 }, { "epoch": 0.43, "grad_norm": 1.0752109453691523, "learning_rate": 1.2599312729237943e-05, "loss": 0.5657, "step": 5592 }, { "epoch": 0.43, "grad_norm": 1.1366389925003257, "learning_rate": 1.2596886343871204e-05, "loss": 0.5124, "step": 5593 }, { "epoch": 0.43, "grad_norm": 1.1415359830661342, "learning_rate": 1.2594459794549636e-05, "loss": 0.5544, "step": 5594 }, { "epoch": 0.43, "grad_norm": 1.2531306640040374, "learning_rate": 1.2592033081426434e-05, "loss": 0.5962, "step": 5595 }, { "epoch": 0.43, "grad_norm": 1.0777766452752768, "learning_rate": 1.2589606204654809e-05, "loss": 0.5125, "step": 5596 }, { "epoch": 0.43, "grad_norm": 1.1206248828145364, "learning_rate": 1.2587179164387987e-05, "loss": 0.5679, "step": 5597 }, { "epoch": 0.43, "grad_norm": 1.157364849085201, "learning_rate": 1.2584751960779192e-05, "loss": 0.5925, "step": 5598 }, { "epoch": 0.43, "grad_norm": 1.1152819407886694, "learning_rate": 1.2582324593981673e-05, "loss": 0.5686, "step": 5599 }, { "epoch": 0.43, "grad_norm": 1.298006047109851, "learning_rate": 1.2579897064148678e-05, "loss": 0.5408, "step": 5600 }, { "epoch": 0.43, "grad_norm": 1.162330619203509, "learning_rate": 1.2577469371433473e-05, "loss": 0.5657, "step": 5601 }, { "epoch": 0.43, "grad_norm": 1.1326334318498235, "learning_rate": 1.2575041515989328e-05, "loss": 0.5385, "step": 5602 }, { "epoch": 0.43, "grad_norm": 1.2027095721500707, "learning_rate": 1.2572613497969524e-05, "loss": 0.5648, "step": 5603 }, { "epoch": 0.43, "grad_norm": 1.2390831598048317, "learning_rate": 1.257018531752736e-05, "loss": 0.5448, "step": 5604 }, { "epoch": 0.43, "grad_norm": 1.2188423806415503, "learning_rate": 1.2567756974816134e-05, "loss": 0.5245, "step": 5605 }, { "epoch": 0.43, "grad_norm": 1.0975876104539766, "learning_rate": 1.2565328469989165e-05, "loss": 0.5449, "step": 5606 }, { "epoch": 0.43, "grad_norm": 1.1708930924574363, "learning_rate": 1.2562899803199773e-05, "loss": 0.626, "step": 5607 }, { "epoch": 0.44, "grad_norm": 1.2815041638863705, "learning_rate": 1.2560470974601294e-05, "loss": 0.6245, "step": 5608 }, { "epoch": 0.44, "grad_norm": 1.2052114156399512, "learning_rate": 1.255804198434707e-05, "loss": 0.5591, "step": 5609 }, { "epoch": 0.44, "grad_norm": 1.1299411029995607, "learning_rate": 1.2555612832590458e-05, "loss": 0.5619, "step": 5610 }, { "epoch": 0.44, "grad_norm": 1.3317409881521767, "learning_rate": 1.2553183519484826e-05, "loss": 0.6291, "step": 5611 }, { "epoch": 0.44, "grad_norm": 1.0300141645587158, "learning_rate": 1.2550754045183544e-05, "loss": 0.5636, "step": 5612 }, { "epoch": 0.44, "grad_norm": 1.0895233230160215, "learning_rate": 1.2548324409839999e-05, "loss": 0.5437, "step": 5613 }, { "epoch": 0.44, "grad_norm": 1.2064299829641423, "learning_rate": 1.2545894613607585e-05, "loss": 0.6266, "step": 5614 }, { "epoch": 0.44, "grad_norm": 1.2018781864879817, "learning_rate": 1.254346465663971e-05, "loss": 0.6192, "step": 5615 }, { "epoch": 0.44, "grad_norm": 1.288902272756921, "learning_rate": 1.254103453908979e-05, "loss": 0.6152, "step": 5616 }, { "epoch": 0.44, "grad_norm": 1.189199335994593, "learning_rate": 1.2538604261111247e-05, "loss": 0.6015, "step": 5617 }, { "epoch": 0.44, "grad_norm": 1.0927091823364878, "learning_rate": 1.253617382285752e-05, "loss": 0.5194, "step": 5618 }, { "epoch": 0.44, "grad_norm": 1.1990401541959328, "learning_rate": 1.2533743224482055e-05, "loss": 0.5738, "step": 5619 }, { "epoch": 0.44, "grad_norm": 1.2292325546364247, "learning_rate": 1.253131246613831e-05, "loss": 0.5667, "step": 5620 }, { "epoch": 0.44, "grad_norm": 1.1838606731718777, "learning_rate": 1.2528881547979748e-05, "loss": 0.5645, "step": 5621 }, { "epoch": 0.44, "grad_norm": 1.133026471154515, "learning_rate": 1.2526450470159845e-05, "loss": 0.5647, "step": 5622 }, { "epoch": 0.44, "grad_norm": 1.0842670244002264, "learning_rate": 1.2524019232832089e-05, "loss": 0.5356, "step": 5623 }, { "epoch": 0.44, "grad_norm": 1.1829979533212456, "learning_rate": 1.252158783614998e-05, "loss": 0.6011, "step": 5624 }, { "epoch": 0.44, "grad_norm": 1.2348087852342216, "learning_rate": 1.2519156280267017e-05, "loss": 0.602, "step": 5625 }, { "epoch": 0.44, "grad_norm": 1.1313194169301475, "learning_rate": 1.2516724565336724e-05, "loss": 0.5417, "step": 5626 }, { "epoch": 0.44, "grad_norm": 1.2771990399531177, "learning_rate": 1.2514292691512624e-05, "loss": 0.634, "step": 5627 }, { "epoch": 0.44, "grad_norm": 1.2768444053506123, "learning_rate": 1.2511860658948252e-05, "loss": 0.5424, "step": 5628 }, { "epoch": 0.44, "grad_norm": 1.0379950985140463, "learning_rate": 1.250942846779716e-05, "loss": 0.5099, "step": 5629 }, { "epoch": 0.44, "grad_norm": 1.1584306721760087, "learning_rate": 1.2506996118212897e-05, "loss": 0.5554, "step": 5630 }, { "epoch": 0.44, "grad_norm": 1.2251718011391377, "learning_rate": 1.250456361034904e-05, "loss": 0.6173, "step": 5631 }, { "epoch": 0.44, "grad_norm": 1.2020889379061062, "learning_rate": 1.2502130944359161e-05, "loss": 0.5743, "step": 5632 }, { "epoch": 0.44, "grad_norm": 1.221823460499267, "learning_rate": 1.249969812039684e-05, "loss": 0.5637, "step": 5633 }, { "epoch": 0.44, "grad_norm": 1.079684139086382, "learning_rate": 1.2497265138615686e-05, "loss": 0.538, "step": 5634 }, { "epoch": 0.44, "grad_norm": 1.2561091386763716, "learning_rate": 1.2494831999169296e-05, "loss": 0.5183, "step": 5635 }, { "epoch": 0.44, "grad_norm": 1.2243682283191835, "learning_rate": 1.2492398702211293e-05, "loss": 0.5708, "step": 5636 }, { "epoch": 0.44, "grad_norm": 1.1549289476742823, "learning_rate": 1.2489965247895302e-05, "loss": 0.5946, "step": 5637 }, { "epoch": 0.44, "grad_norm": 1.0833222987762312, "learning_rate": 1.2487531636374954e-05, "loss": 0.5671, "step": 5638 }, { "epoch": 0.44, "grad_norm": 1.261160712356664, "learning_rate": 1.24850978678039e-05, "loss": 0.5592, "step": 5639 }, { "epoch": 0.44, "grad_norm": 1.2041147792861158, "learning_rate": 1.2482663942335798e-05, "loss": 0.5817, "step": 5640 }, { "epoch": 0.44, "grad_norm": 1.0903270019196591, "learning_rate": 1.2480229860124313e-05, "loss": 0.5514, "step": 5641 }, { "epoch": 0.44, "grad_norm": 1.2564350428399476, "learning_rate": 1.2477795621323121e-05, "loss": 0.5771, "step": 5642 }, { "epoch": 0.44, "grad_norm": 1.164034849516926, "learning_rate": 1.2475361226085907e-05, "loss": 0.5226, "step": 5643 }, { "epoch": 0.44, "grad_norm": 1.2202677934690032, "learning_rate": 1.2472926674566366e-05, "loss": 0.5313, "step": 5644 }, { "epoch": 0.44, "grad_norm": 1.1276619465438138, "learning_rate": 1.2470491966918205e-05, "loss": 0.5251, "step": 5645 }, { "epoch": 0.44, "grad_norm": 1.206963299563885, "learning_rate": 1.2468057103295144e-05, "loss": 0.5663, "step": 5646 }, { "epoch": 0.44, "grad_norm": 1.1818964405424834, "learning_rate": 1.2465622083850903e-05, "loss": 0.5999, "step": 5647 }, { "epoch": 0.44, "grad_norm": 1.3265614450043886, "learning_rate": 1.2463186908739217e-05, "loss": 0.593, "step": 5648 }, { "epoch": 0.44, "grad_norm": 1.1676976836663495, "learning_rate": 1.2460751578113832e-05, "loss": 0.5849, "step": 5649 }, { "epoch": 0.44, "grad_norm": 1.1147956586488383, "learning_rate": 1.2458316092128509e-05, "loss": 0.5933, "step": 5650 }, { "epoch": 0.44, "grad_norm": 1.1741671715205786, "learning_rate": 1.2455880450937006e-05, "loss": 0.5776, "step": 5651 }, { "epoch": 0.44, "grad_norm": 1.1617512640029863, "learning_rate": 1.2453444654693099e-05, "loss": 0.5895, "step": 5652 }, { "epoch": 0.44, "grad_norm": 1.1563366522409184, "learning_rate": 1.245100870355057e-05, "loss": 0.5467, "step": 5653 }, { "epoch": 0.44, "grad_norm": 1.1359980025307461, "learning_rate": 1.2448572597663218e-05, "loss": 0.5751, "step": 5654 }, { "epoch": 0.44, "grad_norm": 1.0829127057541117, "learning_rate": 1.2446136337184847e-05, "loss": 0.5796, "step": 5655 }, { "epoch": 0.44, "grad_norm": 1.1313118828133837, "learning_rate": 1.2443699922269268e-05, "loss": 0.5241, "step": 5656 }, { "epoch": 0.44, "grad_norm": 1.2017290513347676, "learning_rate": 1.2441263353070305e-05, "loss": 0.6428, "step": 5657 }, { "epoch": 0.44, "grad_norm": 1.2297537527692084, "learning_rate": 1.243882662974179e-05, "loss": 0.5594, "step": 5658 }, { "epoch": 0.44, "grad_norm": 1.2139172716311857, "learning_rate": 1.2436389752437565e-05, "loss": 0.5995, "step": 5659 }, { "epoch": 0.44, "grad_norm": 1.1963637970916101, "learning_rate": 1.243395272131149e-05, "loss": 0.5927, "step": 5660 }, { "epoch": 0.44, "grad_norm": 1.1619556998575276, "learning_rate": 1.243151553651742e-05, "loss": 0.5908, "step": 5661 }, { "epoch": 0.44, "grad_norm": 1.136419512502045, "learning_rate": 1.2429078198209227e-05, "loss": 0.6033, "step": 5662 }, { "epoch": 0.44, "grad_norm": 1.2015794512243771, "learning_rate": 1.2426640706540796e-05, "loss": 0.6205, "step": 5663 }, { "epoch": 0.44, "grad_norm": 1.2196878101799054, "learning_rate": 1.2424203061666018e-05, "loss": 0.6196, "step": 5664 }, { "epoch": 0.44, "grad_norm": 1.1001492897510252, "learning_rate": 1.2421765263738795e-05, "loss": 0.5458, "step": 5665 }, { "epoch": 0.44, "grad_norm": 1.1485767409842995, "learning_rate": 1.2419327312913034e-05, "loss": 0.5601, "step": 5666 }, { "epoch": 0.44, "grad_norm": 1.2697973353963758, "learning_rate": 1.2416889209342658e-05, "loss": 0.6374, "step": 5667 }, { "epoch": 0.44, "grad_norm": 1.1694339505316522, "learning_rate": 1.2414450953181598e-05, "loss": 0.5717, "step": 5668 }, { "epoch": 0.44, "grad_norm": 1.2132129578735729, "learning_rate": 1.2412012544583791e-05, "loss": 0.5539, "step": 5669 }, { "epoch": 0.44, "grad_norm": 1.1065480924377782, "learning_rate": 1.2409573983703189e-05, "loss": 0.5327, "step": 5670 }, { "epoch": 0.44, "grad_norm": 1.2059210452271647, "learning_rate": 1.2407135270693748e-05, "loss": 0.5784, "step": 5671 }, { "epoch": 0.44, "grad_norm": 1.1985990930109207, "learning_rate": 1.240469640570944e-05, "loss": 0.5868, "step": 5672 }, { "epoch": 0.44, "grad_norm": 1.2559529650804542, "learning_rate": 1.240225738890424e-05, "loss": 0.6066, "step": 5673 }, { "epoch": 0.44, "grad_norm": 1.0678661819017123, "learning_rate": 1.2399818220432136e-05, "loss": 0.5225, "step": 5674 }, { "epoch": 0.44, "grad_norm": 1.1958938880147316, "learning_rate": 1.239737890044713e-05, "loss": 0.5535, "step": 5675 }, { "epoch": 0.44, "grad_norm": 1.1642319216742743, "learning_rate": 1.2394939429103224e-05, "loss": 0.5682, "step": 5676 }, { "epoch": 0.44, "grad_norm": 1.1415536314053027, "learning_rate": 1.2392499806554433e-05, "loss": 0.5845, "step": 5677 }, { "epoch": 0.44, "grad_norm": 1.1686232146174917, "learning_rate": 1.2390060032954787e-05, "loss": 0.5718, "step": 5678 }, { "epoch": 0.44, "grad_norm": 1.214916950731231, "learning_rate": 1.2387620108458318e-05, "loss": 0.5272, "step": 5679 }, { "epoch": 0.44, "grad_norm": 1.1916406119550509, "learning_rate": 1.2385180033219077e-05, "loss": 0.5934, "step": 5680 }, { "epoch": 0.44, "grad_norm": 1.1582562850923244, "learning_rate": 1.2382739807391113e-05, "loss": 0.5916, "step": 5681 }, { "epoch": 0.44, "grad_norm": 1.1741218389848078, "learning_rate": 1.238029943112849e-05, "loss": 0.51, "step": 5682 }, { "epoch": 0.44, "grad_norm": 0.9802493575642978, "learning_rate": 1.2377858904585284e-05, "loss": 0.5269, "step": 5683 }, { "epoch": 0.44, "grad_norm": 1.252618336217392, "learning_rate": 1.237541822791558e-05, "loss": 0.5678, "step": 5684 }, { "epoch": 0.44, "grad_norm": 1.0191928111788477, "learning_rate": 1.2372977401273465e-05, "loss": 0.535, "step": 5685 }, { "epoch": 0.44, "grad_norm": 1.1349284437704643, "learning_rate": 1.2370536424813044e-05, "loss": 0.5501, "step": 5686 }, { "epoch": 0.44, "grad_norm": 1.1382052739851187, "learning_rate": 1.2368095298688428e-05, "loss": 0.535, "step": 5687 }, { "epoch": 0.44, "grad_norm": 1.134629259434978, "learning_rate": 1.236565402305374e-05, "loss": 0.5907, "step": 5688 }, { "epoch": 0.44, "grad_norm": 1.217449668298233, "learning_rate": 1.2363212598063103e-05, "loss": 0.6254, "step": 5689 }, { "epoch": 0.44, "grad_norm": 1.2451542388788717, "learning_rate": 1.2360771023870668e-05, "loss": 0.6002, "step": 5690 }, { "epoch": 0.44, "grad_norm": 1.2742246289705255, "learning_rate": 1.2358329300630576e-05, "loss": 0.6187, "step": 5691 }, { "epoch": 0.44, "grad_norm": 1.1497961298975805, "learning_rate": 1.2355887428496986e-05, "loss": 0.5844, "step": 5692 }, { "epoch": 0.44, "grad_norm": 1.109832991502645, "learning_rate": 1.2353445407624071e-05, "loss": 0.5787, "step": 5693 }, { "epoch": 0.44, "grad_norm": 1.0901601462002743, "learning_rate": 1.2351003238166004e-05, "loss": 0.5423, "step": 5694 }, { "epoch": 0.44, "grad_norm": 1.1664810146482412, "learning_rate": 1.2348560920276973e-05, "loss": 0.5297, "step": 5695 }, { "epoch": 0.44, "grad_norm": 1.1852418107975102, "learning_rate": 1.2346118454111176e-05, "loss": 0.5865, "step": 5696 }, { "epoch": 0.44, "grad_norm": 1.2308784892572304, "learning_rate": 1.2343675839822813e-05, "loss": 0.5934, "step": 5697 }, { "epoch": 0.44, "grad_norm": 1.1731898751304157, "learning_rate": 1.2341233077566104e-05, "loss": 0.5783, "step": 5698 }, { "epoch": 0.44, "grad_norm": 1.158076416083538, "learning_rate": 1.2338790167495272e-05, "loss": 0.5773, "step": 5699 }, { "epoch": 0.44, "grad_norm": 1.0918072476516998, "learning_rate": 1.2336347109764551e-05, "loss": 0.5666, "step": 5700 }, { "epoch": 0.44, "grad_norm": 1.172473907017691, "learning_rate": 1.2333903904528182e-05, "loss": 0.5694, "step": 5701 }, { "epoch": 0.44, "grad_norm": 1.1775588985056196, "learning_rate": 1.2331460551940417e-05, "loss": 0.5915, "step": 5702 }, { "epoch": 0.44, "grad_norm": 0.9588980703889227, "learning_rate": 1.232901705215552e-05, "loss": 0.5136, "step": 5703 }, { "epoch": 0.44, "grad_norm": 1.2558507844859523, "learning_rate": 1.232657340532776e-05, "loss": 0.599, "step": 5704 }, { "epoch": 0.44, "grad_norm": 1.279749852711989, "learning_rate": 1.2324129611611417e-05, "loss": 0.6208, "step": 5705 }, { "epoch": 0.44, "grad_norm": 1.218997245553889, "learning_rate": 1.2321685671160784e-05, "loss": 0.5686, "step": 5706 }, { "epoch": 0.44, "grad_norm": 1.0930544140002052, "learning_rate": 1.231924158413015e-05, "loss": 0.5025, "step": 5707 }, { "epoch": 0.44, "grad_norm": 1.2123553927207038, "learning_rate": 1.2316797350673834e-05, "loss": 0.5576, "step": 5708 }, { "epoch": 0.44, "grad_norm": 1.299455534185615, "learning_rate": 1.2314352970946146e-05, "loss": 0.5765, "step": 5709 }, { "epoch": 0.44, "grad_norm": 1.1586965503731155, "learning_rate": 1.2311908445101414e-05, "loss": 0.6, "step": 5710 }, { "epoch": 0.44, "grad_norm": 1.284343845332993, "learning_rate": 1.2309463773293977e-05, "loss": 0.6017, "step": 5711 }, { "epoch": 0.44, "grad_norm": 1.0505608536689617, "learning_rate": 1.2307018955678174e-05, "loss": 0.5418, "step": 5712 }, { "epoch": 0.44, "grad_norm": 1.1649264003769886, "learning_rate": 1.2304573992408363e-05, "loss": 0.5433, "step": 5713 }, { "epoch": 0.44, "grad_norm": 1.1021096784667377, "learning_rate": 1.2302128883638904e-05, "loss": 0.5567, "step": 5714 }, { "epoch": 0.44, "grad_norm": 1.1644419621262367, "learning_rate": 1.2299683629524175e-05, "loss": 0.5814, "step": 5715 }, { "epoch": 0.44, "grad_norm": 1.3517347457586624, "learning_rate": 1.2297238230218551e-05, "loss": 0.6268, "step": 5716 }, { "epoch": 0.44, "grad_norm": 1.2117459951420755, "learning_rate": 1.2294792685876424e-05, "loss": 0.5515, "step": 5717 }, { "epoch": 0.44, "grad_norm": 1.1427577733368208, "learning_rate": 1.2292346996652198e-05, "loss": 0.5639, "step": 5718 }, { "epoch": 0.44, "grad_norm": 1.1971142223518676, "learning_rate": 1.2289901162700276e-05, "loss": 0.5631, "step": 5719 }, { "epoch": 0.44, "grad_norm": 1.2168086802979947, "learning_rate": 1.228745518417508e-05, "loss": 0.622, "step": 5720 }, { "epoch": 0.44, "grad_norm": 1.2120541254176294, "learning_rate": 1.228500906123104e-05, "loss": 0.5595, "step": 5721 }, { "epoch": 0.44, "grad_norm": 1.2924462498716816, "learning_rate": 1.2282562794022586e-05, "loss": 0.6143, "step": 5722 }, { "epoch": 0.44, "grad_norm": 1.2596763401772133, "learning_rate": 1.2280116382704166e-05, "loss": 0.5961, "step": 5723 }, { "epoch": 0.44, "grad_norm": 1.3843915296820637, "learning_rate": 1.2277669827430234e-05, "loss": 0.6031, "step": 5724 }, { "epoch": 0.44, "grad_norm": 1.093212976770467, "learning_rate": 1.2275223128355258e-05, "loss": 0.5511, "step": 5725 }, { "epoch": 0.44, "grad_norm": 1.1682795490631581, "learning_rate": 1.2272776285633708e-05, "loss": 0.5524, "step": 5726 }, { "epoch": 0.44, "grad_norm": 1.2210305713166165, "learning_rate": 1.2270329299420061e-05, "loss": 0.6075, "step": 5727 }, { "epoch": 0.44, "grad_norm": 1.4010380694072542, "learning_rate": 1.2267882169868813e-05, "loss": 0.5523, "step": 5728 }, { "epoch": 0.44, "grad_norm": 1.1338893113350184, "learning_rate": 1.2265434897134462e-05, "loss": 0.5625, "step": 5729 }, { "epoch": 0.44, "grad_norm": 1.1550434622080412, "learning_rate": 1.2262987481371523e-05, "loss": 0.5506, "step": 5730 }, { "epoch": 0.44, "grad_norm": 1.0366448405524578, "learning_rate": 1.2260539922734505e-05, "loss": 0.5239, "step": 5731 }, { "epoch": 0.44, "grad_norm": 1.378430119513949, "learning_rate": 1.2258092221377938e-05, "loss": 0.6496, "step": 5732 }, { "epoch": 0.44, "grad_norm": 1.2256839302675868, "learning_rate": 1.2255644377456357e-05, "loss": 0.5612, "step": 5733 }, { "epoch": 0.44, "grad_norm": 1.279751902020504, "learning_rate": 1.2253196391124313e-05, "loss": 0.6126, "step": 5734 }, { "epoch": 0.44, "grad_norm": 1.1559703591846446, "learning_rate": 1.2250748262536357e-05, "loss": 0.5656, "step": 5735 }, { "epoch": 0.44, "grad_norm": 1.1631600895200256, "learning_rate": 1.2248299991847048e-05, "loss": 0.4909, "step": 5736 }, { "epoch": 0.45, "grad_norm": 1.1719034827267767, "learning_rate": 1.2245851579210958e-05, "loss": 0.5376, "step": 5737 }, { "epoch": 0.45, "grad_norm": 1.1778271387111043, "learning_rate": 1.2243403024782675e-05, "loss": 0.5458, "step": 5738 }, { "epoch": 0.45, "grad_norm": 1.2291002201719787, "learning_rate": 1.2240954328716783e-05, "loss": 0.5324, "step": 5739 }, { "epoch": 0.45, "grad_norm": 1.1940587767857695, "learning_rate": 1.2238505491167884e-05, "loss": 0.5983, "step": 5740 }, { "epoch": 0.45, "grad_norm": 1.1452531848655838, "learning_rate": 1.2236056512290584e-05, "loss": 0.5081, "step": 5741 }, { "epoch": 0.45, "grad_norm": 1.1813157825217495, "learning_rate": 1.2233607392239497e-05, "loss": 0.5692, "step": 5742 }, { "epoch": 0.45, "grad_norm": 1.171500082441743, "learning_rate": 1.2231158131169251e-05, "loss": 0.5278, "step": 5743 }, { "epoch": 0.45, "grad_norm": 1.1124261617266489, "learning_rate": 1.2228708729234487e-05, "loss": 0.5186, "step": 5744 }, { "epoch": 0.45, "grad_norm": 1.0741828496309291, "learning_rate": 1.222625918658984e-05, "loss": 0.5729, "step": 5745 }, { "epoch": 0.45, "grad_norm": 1.3032568665891962, "learning_rate": 1.2223809503389962e-05, "loss": 0.5913, "step": 5746 }, { "epoch": 0.45, "grad_norm": 1.172448285066508, "learning_rate": 1.2221359679789518e-05, "loss": 0.6391, "step": 5747 }, { "epoch": 0.45, "grad_norm": 1.1912769184893888, "learning_rate": 1.2218909715943174e-05, "loss": 0.5524, "step": 5748 }, { "epoch": 0.45, "grad_norm": 1.190448413479408, "learning_rate": 1.2216459612005616e-05, "loss": 0.5552, "step": 5749 }, { "epoch": 0.45, "grad_norm": 1.1883309117560483, "learning_rate": 1.2214009368131525e-05, "loss": 0.5022, "step": 5750 }, { "epoch": 0.45, "grad_norm": 1.273073905103042, "learning_rate": 1.22115589844756e-05, "loss": 0.5961, "step": 5751 }, { "epoch": 0.45, "grad_norm": 1.3431829098678116, "learning_rate": 1.2209108461192546e-05, "loss": 0.5901, "step": 5752 }, { "epoch": 0.45, "grad_norm": 1.135715736874924, "learning_rate": 1.2206657798437078e-05, "loss": 0.541, "step": 5753 }, { "epoch": 0.45, "grad_norm": 1.1776193338008218, "learning_rate": 1.2204206996363918e-05, "loss": 0.5781, "step": 5754 }, { "epoch": 0.45, "grad_norm": 1.2144509321299104, "learning_rate": 1.2201756055127798e-05, "loss": 0.5979, "step": 5755 }, { "epoch": 0.45, "grad_norm": 1.0928416158426182, "learning_rate": 1.219930497488346e-05, "loss": 0.573, "step": 5756 }, { "epoch": 0.45, "grad_norm": 1.1879574246050468, "learning_rate": 1.2196853755785649e-05, "loss": 0.5774, "step": 5757 }, { "epoch": 0.45, "grad_norm": 1.2330869400639546, "learning_rate": 1.2194402397989128e-05, "loss": 0.5453, "step": 5758 }, { "epoch": 0.45, "grad_norm": 1.1897570091885952, "learning_rate": 1.2191950901648664e-05, "loss": 0.527, "step": 5759 }, { "epoch": 0.45, "grad_norm": 1.080463637245422, "learning_rate": 1.2189499266919028e-05, "loss": 0.5033, "step": 5760 }, { "epoch": 0.45, "grad_norm": 1.1655886074245116, "learning_rate": 1.2187047493955006e-05, "loss": 0.5931, "step": 5761 }, { "epoch": 0.45, "grad_norm": 1.174902197643059, "learning_rate": 1.2184595582911394e-05, "loss": 0.5324, "step": 5762 }, { "epoch": 0.45, "grad_norm": 1.2115452379635694, "learning_rate": 1.218214353394299e-05, "loss": 0.5163, "step": 5763 }, { "epoch": 0.45, "grad_norm": 1.1526727756237607, "learning_rate": 1.2179691347204609e-05, "loss": 0.4868, "step": 5764 }, { "epoch": 0.45, "grad_norm": 1.2146941457791842, "learning_rate": 1.2177239022851068e-05, "loss": 0.5646, "step": 5765 }, { "epoch": 0.45, "grad_norm": 1.1720135416153417, "learning_rate": 1.2174786561037191e-05, "loss": 0.5774, "step": 5766 }, { "epoch": 0.45, "grad_norm": 1.2000503390403146, "learning_rate": 1.2172333961917819e-05, "loss": 0.5609, "step": 5767 }, { "epoch": 0.45, "grad_norm": 1.1779116470253348, "learning_rate": 1.2169881225647797e-05, "loss": 0.5655, "step": 5768 }, { "epoch": 0.45, "grad_norm": 1.1407609100890166, "learning_rate": 1.2167428352381977e-05, "loss": 0.5076, "step": 5769 }, { "epoch": 0.45, "grad_norm": 1.0374528276680997, "learning_rate": 1.2164975342275227e-05, "loss": 0.514, "step": 5770 }, { "epoch": 0.45, "grad_norm": 1.1877798955226615, "learning_rate": 1.2162522195482408e-05, "loss": 0.5438, "step": 5771 }, { "epoch": 0.45, "grad_norm": 1.1487102573896208, "learning_rate": 1.2160068912158408e-05, "loss": 0.544, "step": 5772 }, { "epoch": 0.45, "grad_norm": 1.2242168180624513, "learning_rate": 1.2157615492458113e-05, "loss": 0.5536, "step": 5773 }, { "epoch": 0.45, "grad_norm": 1.101818082121585, "learning_rate": 1.2155161936536422e-05, "loss": 0.5893, "step": 5774 }, { "epoch": 0.45, "grad_norm": 1.083124538009906, "learning_rate": 1.2152708244548237e-05, "loss": 0.5432, "step": 5775 }, { "epoch": 0.45, "grad_norm": 1.1948722577151858, "learning_rate": 1.2150254416648473e-05, "loss": 0.541, "step": 5776 }, { "epoch": 0.45, "grad_norm": 1.2256619980755752, "learning_rate": 1.2147800452992055e-05, "loss": 0.5762, "step": 5777 }, { "epoch": 0.45, "grad_norm": 1.1837845953747075, "learning_rate": 1.2145346353733913e-05, "loss": 0.5393, "step": 5778 }, { "epoch": 0.45, "grad_norm": 1.1906830140245028, "learning_rate": 1.2142892119028988e-05, "loss": 0.5614, "step": 5779 }, { "epoch": 0.45, "grad_norm": 1.1461272065129666, "learning_rate": 1.2140437749032227e-05, "loss": 0.5559, "step": 5780 }, { "epoch": 0.45, "grad_norm": 1.1644388396965029, "learning_rate": 1.2137983243898585e-05, "loss": 0.565, "step": 5781 }, { "epoch": 0.45, "grad_norm": 1.2571805705259522, "learning_rate": 1.2135528603783036e-05, "loss": 0.5691, "step": 5782 }, { "epoch": 0.45, "grad_norm": 1.0542582450763196, "learning_rate": 1.2133073828840543e-05, "loss": 0.5131, "step": 5783 }, { "epoch": 0.45, "grad_norm": 1.1099808341596908, "learning_rate": 1.2130618919226099e-05, "loss": 0.5439, "step": 5784 }, { "epoch": 0.45, "grad_norm": 1.2504267917635397, "learning_rate": 1.2128163875094687e-05, "loss": 0.614, "step": 5785 }, { "epoch": 0.45, "grad_norm": 1.2536251430391578, "learning_rate": 1.2125708696601309e-05, "loss": 0.64, "step": 5786 }, { "epoch": 0.45, "grad_norm": 1.2027608146871105, "learning_rate": 1.2123253383900974e-05, "loss": 0.579, "step": 5787 }, { "epoch": 0.45, "grad_norm": 1.2219459005795537, "learning_rate": 1.2120797937148699e-05, "loss": 0.608, "step": 5788 }, { "epoch": 0.45, "grad_norm": 1.1487539984101123, "learning_rate": 1.2118342356499508e-05, "loss": 0.5553, "step": 5789 }, { "epoch": 0.45, "grad_norm": 1.0749243620943467, "learning_rate": 1.2115886642108437e-05, "loss": 0.5285, "step": 5790 }, { "epoch": 0.45, "grad_norm": 1.0901008767511062, "learning_rate": 1.2113430794130522e-05, "loss": 0.5143, "step": 5791 }, { "epoch": 0.45, "grad_norm": 1.109954843633605, "learning_rate": 1.2110974812720819e-05, "loss": 0.5127, "step": 5792 }, { "epoch": 0.45, "grad_norm": 1.1113190608668968, "learning_rate": 1.2108518698034384e-05, "loss": 0.5305, "step": 5793 }, { "epoch": 0.45, "grad_norm": 1.1599832527826188, "learning_rate": 1.2106062450226287e-05, "loss": 0.5953, "step": 5794 }, { "epoch": 0.45, "grad_norm": 1.1504900489419883, "learning_rate": 1.2103606069451601e-05, "loss": 0.5835, "step": 5795 }, { "epoch": 0.45, "grad_norm": 1.2313824848601616, "learning_rate": 1.210114955586541e-05, "loss": 0.6057, "step": 5796 }, { "epoch": 0.45, "grad_norm": 1.2464898893831475, "learning_rate": 1.2098692909622808e-05, "loss": 0.5481, "step": 5797 }, { "epoch": 0.45, "grad_norm": 1.2924023911974978, "learning_rate": 1.2096236130878894e-05, "loss": 0.5437, "step": 5798 }, { "epoch": 0.45, "grad_norm": 1.1411081623293935, "learning_rate": 1.2093779219788777e-05, "loss": 0.5064, "step": 5799 }, { "epoch": 0.45, "grad_norm": 1.062769631225352, "learning_rate": 1.2091322176507579e-05, "loss": 0.4804, "step": 5800 }, { "epoch": 0.45, "grad_norm": 1.0799403180303564, "learning_rate": 1.2088865001190418e-05, "loss": 0.5354, "step": 5801 }, { "epoch": 0.45, "grad_norm": 1.2463022374537462, "learning_rate": 1.2086407693992434e-05, "loss": 0.6065, "step": 5802 }, { "epoch": 0.45, "grad_norm": 1.106121396278204, "learning_rate": 1.2083950255068766e-05, "loss": 0.5653, "step": 5803 }, { "epoch": 0.45, "grad_norm": 1.1750484476855685, "learning_rate": 1.208149268457457e-05, "loss": 0.5781, "step": 5804 }, { "epoch": 0.45, "grad_norm": 1.173305147430331, "learning_rate": 1.2079034982665001e-05, "loss": 0.5533, "step": 5805 }, { "epoch": 0.45, "grad_norm": 1.1877700097147526, "learning_rate": 1.2076577149495226e-05, "loss": 0.575, "step": 5806 }, { "epoch": 0.45, "grad_norm": 1.226780185698904, "learning_rate": 1.207411918522042e-05, "loss": 0.5951, "step": 5807 }, { "epoch": 0.45, "grad_norm": 1.1669546464444174, "learning_rate": 1.2071661089995772e-05, "loss": 0.5916, "step": 5808 }, { "epoch": 0.45, "grad_norm": 1.147030493981939, "learning_rate": 1.2069202863976471e-05, "loss": 0.5472, "step": 5809 }, { "epoch": 0.45, "grad_norm": 1.1124389138756328, "learning_rate": 1.2066744507317718e-05, "loss": 0.566, "step": 5810 }, { "epoch": 0.45, "grad_norm": 1.2951715151052539, "learning_rate": 1.2064286020174718e-05, "loss": 0.5823, "step": 5811 }, { "epoch": 0.45, "grad_norm": 1.2010264238822317, "learning_rate": 1.2061827402702691e-05, "loss": 0.5236, "step": 5812 }, { "epoch": 0.45, "grad_norm": 1.1301946448743838, "learning_rate": 1.2059368655056864e-05, "loss": 0.5331, "step": 5813 }, { "epoch": 0.45, "grad_norm": 1.1961510403477786, "learning_rate": 1.2056909777392471e-05, "loss": 0.6108, "step": 5814 }, { "epoch": 0.45, "grad_norm": 1.18671762894558, "learning_rate": 1.2054450769864752e-05, "loss": 0.5675, "step": 5815 }, { "epoch": 0.45, "grad_norm": 1.1760007330249915, "learning_rate": 1.2051991632628952e-05, "loss": 0.5775, "step": 5816 }, { "epoch": 0.45, "grad_norm": 1.16608463255676, "learning_rate": 1.2049532365840333e-05, "loss": 0.6161, "step": 5817 }, { "epoch": 0.45, "grad_norm": 1.1621810249872406, "learning_rate": 1.2047072969654165e-05, "loss": 0.5382, "step": 5818 }, { "epoch": 0.45, "grad_norm": 1.1802851192660717, "learning_rate": 1.204461344422572e-05, "loss": 0.5502, "step": 5819 }, { "epoch": 0.45, "grad_norm": 1.1400551613629568, "learning_rate": 1.2042153789710278e-05, "loss": 0.5125, "step": 5820 }, { "epoch": 0.45, "grad_norm": 1.1618702361946471, "learning_rate": 1.2039694006263129e-05, "loss": 0.5481, "step": 5821 }, { "epoch": 0.45, "grad_norm": 1.1806254416081747, "learning_rate": 1.2037234094039573e-05, "loss": 0.5591, "step": 5822 }, { "epoch": 0.45, "grad_norm": 1.2368320203564345, "learning_rate": 1.2034774053194922e-05, "loss": 0.5767, "step": 5823 }, { "epoch": 0.45, "grad_norm": 1.1410431813820914, "learning_rate": 1.2032313883884485e-05, "loss": 0.5382, "step": 5824 }, { "epoch": 0.45, "grad_norm": 1.222155678748634, "learning_rate": 1.202985358626359e-05, "loss": 0.5459, "step": 5825 }, { "epoch": 0.45, "grad_norm": 1.202775978887758, "learning_rate": 1.2027393160487561e-05, "loss": 0.5589, "step": 5826 }, { "epoch": 0.45, "grad_norm": 1.1627100826474233, "learning_rate": 1.2024932606711741e-05, "loss": 0.5983, "step": 5827 }, { "epoch": 0.45, "grad_norm": 1.1437455432575716, "learning_rate": 1.2022471925091483e-05, "loss": 0.5652, "step": 5828 }, { "epoch": 0.45, "grad_norm": 1.13861978977748, "learning_rate": 1.2020011115782135e-05, "loss": 0.5256, "step": 5829 }, { "epoch": 0.45, "grad_norm": 1.0433482076317901, "learning_rate": 1.2017550178939064e-05, "loss": 0.5304, "step": 5830 }, { "epoch": 0.45, "grad_norm": 1.1142188156485253, "learning_rate": 1.2015089114717642e-05, "loss": 0.5115, "step": 5831 }, { "epoch": 0.45, "grad_norm": 1.1799556099090744, "learning_rate": 1.2012627923273246e-05, "loss": 0.5378, "step": 5832 }, { "epoch": 0.45, "grad_norm": 1.2779304020486262, "learning_rate": 1.2010166604761266e-05, "loss": 0.5505, "step": 5833 }, { "epoch": 0.45, "grad_norm": 1.2344693015087467, "learning_rate": 1.20077051593371e-05, "loss": 0.5774, "step": 5834 }, { "epoch": 0.45, "grad_norm": 1.1387612780484628, "learning_rate": 1.2005243587156143e-05, "loss": 0.5335, "step": 5835 }, { "epoch": 0.45, "grad_norm": 1.1495471186992472, "learning_rate": 1.2002781888373818e-05, "loss": 0.5972, "step": 5836 }, { "epoch": 0.45, "grad_norm": 1.1760604374854247, "learning_rate": 1.2000320063145536e-05, "loss": 0.6117, "step": 5837 }, { "epoch": 0.45, "grad_norm": 1.1046612489594707, "learning_rate": 1.199785811162673e-05, "loss": 0.5099, "step": 5838 }, { "epoch": 0.45, "grad_norm": 1.1250570070869994, "learning_rate": 1.1995396033972834e-05, "loss": 0.5631, "step": 5839 }, { "epoch": 0.45, "grad_norm": 1.110835765789218, "learning_rate": 1.1992933830339288e-05, "loss": 0.502, "step": 5840 }, { "epoch": 0.45, "grad_norm": 1.1313335893634813, "learning_rate": 1.1990471500881551e-05, "loss": 0.593, "step": 5841 }, { "epoch": 0.45, "grad_norm": 1.1774903609251823, "learning_rate": 1.1988009045755077e-05, "loss": 0.5985, "step": 5842 }, { "epoch": 0.45, "grad_norm": 1.2243414042655396, "learning_rate": 1.1985546465115336e-05, "loss": 0.5613, "step": 5843 }, { "epoch": 0.45, "grad_norm": 1.1738508733236663, "learning_rate": 1.1983083759117804e-05, "loss": 0.5925, "step": 5844 }, { "epoch": 0.45, "grad_norm": 1.0072078813318732, "learning_rate": 1.1980620927917958e-05, "loss": 0.5195, "step": 5845 }, { "epoch": 0.45, "grad_norm": 1.1427341452470745, "learning_rate": 1.19781579716713e-05, "loss": 0.5703, "step": 5846 }, { "epoch": 0.45, "grad_norm": 1.3303544573880208, "learning_rate": 1.1975694890533318e-05, "loss": 0.5964, "step": 5847 }, { "epoch": 0.45, "grad_norm": 1.2233942504097892, "learning_rate": 1.1973231684659527e-05, "loss": 0.6063, "step": 5848 }, { "epoch": 0.45, "grad_norm": 1.2731503121842511, "learning_rate": 1.1970768354205443e-05, "loss": 0.5583, "step": 5849 }, { "epoch": 0.45, "grad_norm": 1.0754297816765037, "learning_rate": 1.1968304899326577e-05, "loss": 0.5909, "step": 5850 }, { "epoch": 0.45, "grad_norm": 1.1399600559899563, "learning_rate": 1.1965841320178473e-05, "loss": 0.5293, "step": 5851 }, { "epoch": 0.45, "grad_norm": 1.2077981432299578, "learning_rate": 1.1963377616916662e-05, "loss": 0.5339, "step": 5852 }, { "epoch": 0.45, "grad_norm": 1.2131592089208965, "learning_rate": 1.1960913789696694e-05, "loss": 0.5758, "step": 5853 }, { "epoch": 0.45, "grad_norm": 1.215401769700864, "learning_rate": 1.1958449838674122e-05, "loss": 0.5389, "step": 5854 }, { "epoch": 0.45, "grad_norm": 1.1799737444074103, "learning_rate": 1.1955985764004504e-05, "loss": 0.5464, "step": 5855 }, { "epoch": 0.45, "grad_norm": 1.2863515843054454, "learning_rate": 1.1953521565843415e-05, "loss": 0.6042, "step": 5856 }, { "epoch": 0.45, "grad_norm": 1.204111561729822, "learning_rate": 1.195105724434643e-05, "loss": 0.5337, "step": 5857 }, { "epoch": 0.45, "grad_norm": 1.1542259233230394, "learning_rate": 1.1948592799669136e-05, "loss": 0.5513, "step": 5858 }, { "epoch": 0.45, "grad_norm": 1.0744879367980187, "learning_rate": 1.1946128231967127e-05, "loss": 0.5253, "step": 5859 }, { "epoch": 0.45, "grad_norm": 1.0442689941592997, "learning_rate": 1.1943663541395998e-05, "loss": 0.5464, "step": 5860 }, { "epoch": 0.45, "grad_norm": 1.1750569187744138, "learning_rate": 1.1941198728111364e-05, "loss": 0.571, "step": 5861 }, { "epoch": 0.45, "grad_norm": 1.1717132456724335, "learning_rate": 1.1938733792268837e-05, "loss": 0.5556, "step": 5862 }, { "epoch": 0.45, "grad_norm": 1.1851471630897263, "learning_rate": 1.1936268734024048e-05, "loss": 0.5668, "step": 5863 }, { "epoch": 0.45, "grad_norm": 1.1747150339368848, "learning_rate": 1.1933803553532622e-05, "loss": 0.5544, "step": 5864 }, { "epoch": 0.45, "grad_norm": 1.1452167007744813, "learning_rate": 1.1931338250950197e-05, "loss": 0.5183, "step": 5865 }, { "epoch": 0.46, "grad_norm": 1.194493181259474, "learning_rate": 1.1928872826432427e-05, "loss": 0.5388, "step": 5866 }, { "epoch": 0.46, "grad_norm": 1.1237559327508897, "learning_rate": 1.1926407280134962e-05, "loss": 0.5561, "step": 5867 }, { "epoch": 0.46, "grad_norm": 1.3663842593692355, "learning_rate": 1.1923941612213468e-05, "loss": 0.5684, "step": 5868 }, { "epoch": 0.46, "grad_norm": 1.2856317817238363, "learning_rate": 1.1921475822823613e-05, "loss": 0.6646, "step": 5869 }, { "epoch": 0.46, "grad_norm": 1.0807219478258843, "learning_rate": 1.1919009912121075e-05, "loss": 0.5468, "step": 5870 }, { "epoch": 0.46, "grad_norm": 1.1001585542676102, "learning_rate": 1.1916543880261541e-05, "loss": 0.5462, "step": 5871 }, { "epoch": 0.46, "grad_norm": 1.210953103241838, "learning_rate": 1.1914077727400706e-05, "loss": 0.5531, "step": 5872 }, { "epoch": 0.46, "grad_norm": 1.1054588290466927, "learning_rate": 1.1911611453694267e-05, "loss": 0.5374, "step": 5873 }, { "epoch": 0.46, "grad_norm": 1.14059197364965, "learning_rate": 1.1909145059297935e-05, "loss": 0.527, "step": 5874 }, { "epoch": 0.46, "grad_norm": 1.1699562131399075, "learning_rate": 1.1906678544367423e-05, "loss": 0.5134, "step": 5875 }, { "epoch": 0.46, "grad_norm": 1.2421120194828592, "learning_rate": 1.190421190905846e-05, "loss": 0.5647, "step": 5876 }, { "epoch": 0.46, "grad_norm": 1.2952461124428827, "learning_rate": 1.1901745153526773e-05, "loss": 0.5676, "step": 5877 }, { "epoch": 0.46, "grad_norm": 1.2217136442373169, "learning_rate": 1.1899278277928103e-05, "loss": 0.594, "step": 5878 }, { "epoch": 0.46, "grad_norm": 1.1674616296413667, "learning_rate": 1.1896811282418199e-05, "loss": 0.5698, "step": 5879 }, { "epoch": 0.46, "grad_norm": 1.213094598938523, "learning_rate": 1.1894344167152809e-05, "loss": 0.6098, "step": 5880 }, { "epoch": 0.46, "grad_norm": 1.238448026479051, "learning_rate": 1.1891876932287701e-05, "loss": 0.6178, "step": 5881 }, { "epoch": 0.46, "grad_norm": 1.2836003494325223, "learning_rate": 1.1889409577978639e-05, "loss": 0.6002, "step": 5882 }, { "epoch": 0.46, "grad_norm": 1.1942288841247801, "learning_rate": 1.1886942104381403e-05, "loss": 0.5484, "step": 5883 }, { "epoch": 0.46, "grad_norm": 1.3050263929803976, "learning_rate": 1.1884474511651778e-05, "loss": 0.5789, "step": 5884 }, { "epoch": 0.46, "grad_norm": 1.208662739062639, "learning_rate": 1.1882006799945551e-05, "loss": 0.6153, "step": 5885 }, { "epoch": 0.46, "grad_norm": 1.0800346379553527, "learning_rate": 1.1879538969418526e-05, "loss": 0.568, "step": 5886 }, { "epoch": 0.46, "grad_norm": 1.1419719424801138, "learning_rate": 1.1877071020226512e-05, "loss": 0.5483, "step": 5887 }, { "epoch": 0.46, "grad_norm": 1.2478569733996008, "learning_rate": 1.1874602952525317e-05, "loss": 0.5996, "step": 5888 }, { "epoch": 0.46, "grad_norm": 1.3204928844856738, "learning_rate": 1.1872134766470769e-05, "loss": 0.6373, "step": 5889 }, { "epoch": 0.46, "grad_norm": 1.1594939219721387, "learning_rate": 1.1869666462218693e-05, "loss": 0.5458, "step": 5890 }, { "epoch": 0.46, "grad_norm": 1.0793814593874516, "learning_rate": 1.1867198039924923e-05, "loss": 0.516, "step": 5891 }, { "epoch": 0.46, "grad_norm": 1.1567645088079015, "learning_rate": 1.1864729499745312e-05, "loss": 0.5673, "step": 5892 }, { "epoch": 0.46, "grad_norm": 1.0312249729703138, "learning_rate": 1.1862260841835706e-05, "loss": 0.5082, "step": 5893 }, { "epoch": 0.46, "grad_norm": 1.086463690964292, "learning_rate": 1.1859792066351964e-05, "loss": 0.5152, "step": 5894 }, { "epoch": 0.46, "grad_norm": 1.121048344878929, "learning_rate": 1.1857323173449956e-05, "loss": 0.5682, "step": 5895 }, { "epoch": 0.46, "grad_norm": 1.3202165196310283, "learning_rate": 1.1854854163285548e-05, "loss": 0.6055, "step": 5896 }, { "epoch": 0.46, "grad_norm": 1.186041136624872, "learning_rate": 1.185238503601463e-05, "loss": 0.5791, "step": 5897 }, { "epoch": 0.46, "grad_norm": 1.22254216938579, "learning_rate": 1.1849915791793091e-05, "loss": 0.5845, "step": 5898 }, { "epoch": 0.46, "grad_norm": 1.1606132044113173, "learning_rate": 1.1847446430776822e-05, "loss": 0.5692, "step": 5899 }, { "epoch": 0.46, "grad_norm": 1.1930835361379735, "learning_rate": 1.1844976953121725e-05, "loss": 0.5913, "step": 5900 }, { "epoch": 0.46, "grad_norm": 1.1986993416592568, "learning_rate": 1.1842507358983715e-05, "loss": 0.6328, "step": 5901 }, { "epoch": 0.46, "grad_norm": 1.1882834610677058, "learning_rate": 1.1840037648518712e-05, "loss": 0.5788, "step": 5902 }, { "epoch": 0.46, "grad_norm": 1.1837200940875108, "learning_rate": 1.1837567821882638e-05, "loss": 0.5791, "step": 5903 }, { "epoch": 0.46, "grad_norm": 1.1003249772070514, "learning_rate": 1.1835097879231427e-05, "loss": 0.5656, "step": 5904 }, { "epoch": 0.46, "grad_norm": 1.2701610235574328, "learning_rate": 1.1832627820721017e-05, "loss": 0.5365, "step": 5905 }, { "epoch": 0.46, "grad_norm": 1.143477804594884, "learning_rate": 1.1830157646507358e-05, "loss": 0.5089, "step": 5906 }, { "epoch": 0.46, "grad_norm": 1.2677134484235162, "learning_rate": 1.1827687356746406e-05, "loss": 0.5629, "step": 5907 }, { "epoch": 0.46, "grad_norm": 1.1531484089778088, "learning_rate": 1.182521695159412e-05, "loss": 0.539, "step": 5908 }, { "epoch": 0.46, "grad_norm": 1.1058788280332412, "learning_rate": 1.1822746431206473e-05, "loss": 0.5695, "step": 5909 }, { "epoch": 0.46, "grad_norm": 1.1808895025217803, "learning_rate": 1.1820275795739438e-05, "loss": 0.5691, "step": 5910 }, { "epoch": 0.46, "grad_norm": 1.2212088307964457, "learning_rate": 1.1817805045349e-05, "loss": 0.6193, "step": 5911 }, { "epoch": 0.46, "grad_norm": 1.1192837768393247, "learning_rate": 1.1815334180191153e-05, "loss": 0.5424, "step": 5912 }, { "epoch": 0.46, "grad_norm": 1.2534083153145257, "learning_rate": 1.1812863200421894e-05, "loss": 0.5966, "step": 5913 }, { "epoch": 0.46, "grad_norm": 1.2925274144701855, "learning_rate": 1.1810392106197224e-05, "loss": 0.6072, "step": 5914 }, { "epoch": 0.46, "grad_norm": 1.1839332723082427, "learning_rate": 1.1807920897673162e-05, "loss": 0.55, "step": 5915 }, { "epoch": 0.46, "grad_norm": 1.1519622688538522, "learning_rate": 1.1805449575005726e-05, "loss": 0.5678, "step": 5916 }, { "epoch": 0.46, "grad_norm": 1.1629580179970105, "learning_rate": 1.1802978138350945e-05, "loss": 0.5458, "step": 5917 }, { "epoch": 0.46, "grad_norm": 1.1714943839902012, "learning_rate": 1.1800506587864851e-05, "loss": 0.5418, "step": 5918 }, { "epoch": 0.46, "grad_norm": 1.243544838628027, "learning_rate": 1.1798034923703486e-05, "loss": 0.6272, "step": 5919 }, { "epoch": 0.46, "grad_norm": 1.1981541881876347, "learning_rate": 1.17955631460229e-05, "loss": 0.519, "step": 5920 }, { "epoch": 0.46, "grad_norm": 1.1811447237927264, "learning_rate": 1.1793091254979148e-05, "loss": 0.5514, "step": 5921 }, { "epoch": 0.46, "grad_norm": 1.1235853413593588, "learning_rate": 1.1790619250728295e-05, "loss": 0.5534, "step": 5922 }, { "epoch": 0.46, "grad_norm": 1.1624878851966867, "learning_rate": 1.178814713342641e-05, "loss": 0.5502, "step": 5923 }, { "epoch": 0.46, "grad_norm": 1.114220260000697, "learning_rate": 1.1785674903229572e-05, "loss": 0.55, "step": 5924 }, { "epoch": 0.46, "grad_norm": 1.2299317171945692, "learning_rate": 1.1783202560293863e-05, "loss": 0.5908, "step": 5925 }, { "epoch": 0.46, "grad_norm": 1.234587301032679, "learning_rate": 1.1780730104775374e-05, "loss": 0.5998, "step": 5926 }, { "epoch": 0.46, "grad_norm": 1.0639864117141906, "learning_rate": 1.1778257536830211e-05, "loss": 0.5353, "step": 5927 }, { "epoch": 0.46, "grad_norm": 1.1595802289999118, "learning_rate": 1.1775784856614473e-05, "loss": 0.5223, "step": 5928 }, { "epoch": 0.46, "grad_norm": 1.125575077574416, "learning_rate": 1.1773312064284275e-05, "loss": 0.5552, "step": 5929 }, { "epoch": 0.46, "grad_norm": 1.1473486273162008, "learning_rate": 1.1770839159995738e-05, "loss": 0.5426, "step": 5930 }, { "epoch": 0.46, "grad_norm": 1.132527966775059, "learning_rate": 1.1768366143904986e-05, "loss": 0.5297, "step": 5931 }, { "epoch": 0.46, "grad_norm": 1.0903657599637857, "learning_rate": 1.1765893016168158e-05, "loss": 0.5232, "step": 5932 }, { "epoch": 0.46, "grad_norm": 1.1708886636893996, "learning_rate": 1.1763419776941395e-05, "loss": 0.616, "step": 5933 }, { "epoch": 0.46, "grad_norm": 1.2104614798850823, "learning_rate": 1.1760946426380838e-05, "loss": 0.5964, "step": 5934 }, { "epoch": 0.46, "grad_norm": 1.1134263328915142, "learning_rate": 1.1758472964642651e-05, "loss": 0.5122, "step": 5935 }, { "epoch": 0.46, "grad_norm": 1.1541368919048283, "learning_rate": 1.1755999391882993e-05, "loss": 0.5348, "step": 5936 }, { "epoch": 0.46, "grad_norm": 1.1571005708378703, "learning_rate": 1.1753525708258034e-05, "loss": 0.5326, "step": 5937 }, { "epoch": 0.46, "grad_norm": 1.1534524013452785, "learning_rate": 1.175105191392395e-05, "loss": 0.6112, "step": 5938 }, { "epoch": 0.46, "grad_norm": 1.207627183285773, "learning_rate": 1.1748578009036925e-05, "loss": 0.5764, "step": 5939 }, { "epoch": 0.46, "grad_norm": 1.0799936327040143, "learning_rate": 1.1746103993753146e-05, "loss": 0.4969, "step": 5940 }, { "epoch": 0.46, "grad_norm": 1.0990050910054272, "learning_rate": 1.1743629868228815e-05, "loss": 0.5508, "step": 5941 }, { "epoch": 0.46, "grad_norm": 1.0899727584339496, "learning_rate": 1.1741155632620135e-05, "loss": 0.5599, "step": 5942 }, { "epoch": 0.46, "grad_norm": 1.192224390501563, "learning_rate": 1.1738681287083318e-05, "loss": 0.5345, "step": 5943 }, { "epoch": 0.46, "grad_norm": 2.0077912444536987, "learning_rate": 1.1736206831774576e-05, "loss": 0.5608, "step": 5944 }, { "epoch": 0.46, "grad_norm": 0.9353286074428118, "learning_rate": 1.1733732266850144e-05, "loss": 0.4908, "step": 5945 }, { "epoch": 0.46, "grad_norm": 1.0984879461780905, "learning_rate": 1.1731257592466248e-05, "loss": 0.555, "step": 5946 }, { "epoch": 0.46, "grad_norm": 1.0896925735902507, "learning_rate": 1.1728782808779126e-05, "loss": 0.5817, "step": 5947 }, { "epoch": 0.46, "grad_norm": 1.1967637471018489, "learning_rate": 1.172630791594503e-05, "loss": 0.5933, "step": 5948 }, { "epoch": 0.46, "grad_norm": 1.0948715454249143, "learning_rate": 1.1723832914120203e-05, "loss": 0.5489, "step": 5949 }, { "epoch": 0.46, "grad_norm": 1.12736728403082, "learning_rate": 1.1721357803460915e-05, "loss": 0.5393, "step": 5950 }, { "epoch": 0.46, "grad_norm": 1.1518496213653773, "learning_rate": 1.1718882584123425e-05, "loss": 0.5458, "step": 5951 }, { "epoch": 0.46, "grad_norm": 1.1689298114051585, "learning_rate": 1.1716407256264014e-05, "loss": 0.5602, "step": 5952 }, { "epoch": 0.46, "grad_norm": 1.1658066349821727, "learning_rate": 1.1713931820038952e-05, "loss": 0.5317, "step": 5953 }, { "epoch": 0.46, "grad_norm": 1.1068523900519198, "learning_rate": 1.1711456275604534e-05, "loss": 0.595, "step": 5954 }, { "epoch": 0.46, "grad_norm": 1.0445035578410105, "learning_rate": 1.170898062311705e-05, "loss": 0.5791, "step": 5955 }, { "epoch": 0.46, "grad_norm": 1.0967450143680333, "learning_rate": 1.1706504862732801e-05, "loss": 0.527, "step": 5956 }, { "epoch": 0.46, "grad_norm": 1.2187901514726485, "learning_rate": 1.17040289946081e-05, "loss": 0.6348, "step": 5957 }, { "epoch": 0.46, "grad_norm": 1.1074112718400848, "learning_rate": 1.1701553018899255e-05, "loss": 0.4888, "step": 5958 }, { "epoch": 0.46, "grad_norm": 1.1410849179622504, "learning_rate": 1.1699076935762585e-05, "loss": 0.5592, "step": 5959 }, { "epoch": 0.46, "grad_norm": 1.206214653248334, "learning_rate": 1.1696600745354427e-05, "loss": 0.5846, "step": 5960 }, { "epoch": 0.46, "grad_norm": 1.1250317357143491, "learning_rate": 1.1694124447831108e-05, "loss": 0.5303, "step": 5961 }, { "epoch": 0.46, "grad_norm": 1.008943676986175, "learning_rate": 1.1691648043348972e-05, "loss": 0.4977, "step": 5962 }, { "epoch": 0.46, "grad_norm": 1.1748748022704938, "learning_rate": 1.1689171532064371e-05, "loss": 0.5849, "step": 5963 }, { "epoch": 0.46, "grad_norm": 1.223466988277587, "learning_rate": 1.1686694914133652e-05, "loss": 0.5684, "step": 5964 }, { "epoch": 0.46, "grad_norm": 1.276509750565055, "learning_rate": 1.1684218189713183e-05, "loss": 0.5751, "step": 5965 }, { "epoch": 0.46, "grad_norm": 1.2072021955898635, "learning_rate": 1.1681741358959328e-05, "loss": 0.5492, "step": 5966 }, { "epoch": 0.46, "grad_norm": 1.2206299294461564, "learning_rate": 1.1679264422028469e-05, "loss": 0.6172, "step": 5967 }, { "epoch": 0.46, "grad_norm": 1.2630238588696714, "learning_rate": 1.167678737907698e-05, "loss": 0.5606, "step": 5968 }, { "epoch": 0.46, "grad_norm": 1.0384634485254356, "learning_rate": 1.1674310230261251e-05, "loss": 0.5534, "step": 5969 }, { "epoch": 0.46, "grad_norm": 1.0826442373090766, "learning_rate": 1.167183297573768e-05, "loss": 0.5247, "step": 5970 }, { "epoch": 0.46, "grad_norm": 1.135455501253906, "learning_rate": 1.166935561566267e-05, "loss": 0.5342, "step": 5971 }, { "epoch": 0.46, "grad_norm": 1.1690433113748349, "learning_rate": 1.1666878150192626e-05, "loss": 0.5679, "step": 5972 }, { "epoch": 0.46, "grad_norm": 1.1313390686209732, "learning_rate": 1.1664400579483965e-05, "loss": 0.5665, "step": 5973 }, { "epoch": 0.46, "grad_norm": 1.1735206747228935, "learning_rate": 1.1661922903693107e-05, "loss": 0.5417, "step": 5974 }, { "epoch": 0.46, "grad_norm": 1.1505834032586726, "learning_rate": 1.165944512297648e-05, "loss": 0.5898, "step": 5975 }, { "epoch": 0.46, "grad_norm": 1.0794256905921384, "learning_rate": 1.1656967237490524e-05, "loss": 0.5122, "step": 5976 }, { "epoch": 0.46, "grad_norm": 1.1225912480128515, "learning_rate": 1.1654489247391678e-05, "loss": 0.5448, "step": 5977 }, { "epoch": 0.46, "grad_norm": 1.2004672332956667, "learning_rate": 1.1652011152836388e-05, "loss": 0.5809, "step": 5978 }, { "epoch": 0.46, "grad_norm": 1.1600084306969605, "learning_rate": 1.1649532953981111e-05, "loss": 0.6212, "step": 5979 }, { "epoch": 0.46, "grad_norm": 1.1847955123296074, "learning_rate": 1.1647054650982306e-05, "loss": 0.575, "step": 5980 }, { "epoch": 0.46, "grad_norm": 1.089012512636066, "learning_rate": 1.1644576243996446e-05, "loss": 0.5012, "step": 5981 }, { "epoch": 0.46, "grad_norm": 1.2342043891531973, "learning_rate": 1.1642097733180003e-05, "loss": 0.5971, "step": 5982 }, { "epoch": 0.46, "grad_norm": 1.2085579411257474, "learning_rate": 1.1639619118689456e-05, "loss": 0.5863, "step": 5983 }, { "epoch": 0.46, "grad_norm": 1.1374002622537998, "learning_rate": 1.1637140400681296e-05, "loss": 0.5664, "step": 5984 }, { "epoch": 0.46, "grad_norm": 1.1734640410529102, "learning_rate": 1.1634661579312012e-05, "loss": 0.5435, "step": 5985 }, { "epoch": 0.46, "grad_norm": 1.1797157435794052, "learning_rate": 1.1632182654738116e-05, "loss": 0.5925, "step": 5986 }, { "epoch": 0.46, "grad_norm": 1.2278237145567825, "learning_rate": 1.1629703627116104e-05, "loss": 0.6195, "step": 5987 }, { "epoch": 0.46, "grad_norm": 1.0704874048930837, "learning_rate": 1.1627224496602496e-05, "loss": 0.5312, "step": 5988 }, { "epoch": 0.46, "grad_norm": 1.2364541421940813, "learning_rate": 1.1624745263353808e-05, "loss": 0.6087, "step": 5989 }, { "epoch": 0.46, "grad_norm": 1.1730046231958415, "learning_rate": 1.1622265927526566e-05, "loss": 0.5496, "step": 5990 }, { "epoch": 0.46, "grad_norm": 1.035689594416083, "learning_rate": 1.1619786489277312e-05, "loss": 0.5185, "step": 5991 }, { "epoch": 0.46, "grad_norm": 1.1143388509744039, "learning_rate": 1.1617306948762576e-05, "loss": 0.5245, "step": 5992 }, { "epoch": 0.46, "grad_norm": 1.2708955916283018, "learning_rate": 1.1614827306138912e-05, "loss": 0.5783, "step": 5993 }, { "epoch": 0.47, "grad_norm": 1.113705631147262, "learning_rate": 1.1612347561562865e-05, "loss": 0.5612, "step": 5994 }, { "epoch": 0.47, "grad_norm": 1.1133749403764557, "learning_rate": 1.1609867715190997e-05, "loss": 0.5667, "step": 5995 }, { "epoch": 0.47, "grad_norm": 1.1728270160479073, "learning_rate": 1.1607387767179881e-05, "loss": 0.5273, "step": 5996 }, { "epoch": 0.47, "grad_norm": 1.132064361825146, "learning_rate": 1.1604907717686075e-05, "loss": 0.5189, "step": 5997 }, { "epoch": 0.47, "grad_norm": 1.1384956655373737, "learning_rate": 1.1602427566866166e-05, "loss": 0.5398, "step": 5998 }, { "epoch": 0.47, "grad_norm": 1.2144947102550459, "learning_rate": 1.1599947314876738e-05, "loss": 0.5539, "step": 5999 }, { "epoch": 0.47, "grad_norm": 1.119428294759224, "learning_rate": 1.1597466961874381e-05, "loss": 0.5131, "step": 6000 }, { "epoch": 0.47, "grad_norm": 1.2082339717307802, "learning_rate": 1.1594986508015698e-05, "loss": 0.555, "step": 6001 }, { "epoch": 0.47, "grad_norm": 1.1382908911679626, "learning_rate": 1.1592505953457282e-05, "loss": 0.6189, "step": 6002 }, { "epoch": 0.47, "grad_norm": 1.3104182491484984, "learning_rate": 1.1590025298355749e-05, "loss": 0.5667, "step": 6003 }, { "epoch": 0.47, "grad_norm": 1.3662180924245277, "learning_rate": 1.1587544542867716e-05, "loss": 0.6686, "step": 6004 }, { "epoch": 0.47, "grad_norm": 1.151598103980875, "learning_rate": 1.1585063687149807e-05, "loss": 0.5373, "step": 6005 }, { "epoch": 0.47, "grad_norm": 1.1814977133846232, "learning_rate": 1.158258273135865e-05, "loss": 0.5538, "step": 6006 }, { "epoch": 0.47, "grad_norm": 1.224022391516502, "learning_rate": 1.158010167565088e-05, "loss": 0.6384, "step": 6007 }, { "epoch": 0.47, "grad_norm": 1.1989991424977446, "learning_rate": 1.1577620520183135e-05, "loss": 0.5239, "step": 6008 }, { "epoch": 0.47, "grad_norm": 1.1996822393430697, "learning_rate": 1.1575139265112072e-05, "loss": 0.5065, "step": 6009 }, { "epoch": 0.47, "grad_norm": 1.1306896444705934, "learning_rate": 1.1572657910594336e-05, "loss": 0.5249, "step": 6010 }, { "epoch": 0.47, "grad_norm": 1.1442817702513723, "learning_rate": 1.1570176456786597e-05, "loss": 0.5541, "step": 6011 }, { "epoch": 0.47, "grad_norm": 1.2164129199443567, "learning_rate": 1.1567694903845515e-05, "loss": 0.5739, "step": 6012 }, { "epoch": 0.47, "grad_norm": 1.2389645781914767, "learning_rate": 1.1565213251927764e-05, "loss": 0.5953, "step": 6013 }, { "epoch": 0.47, "grad_norm": 1.3176629381565335, "learning_rate": 1.1562731501190027e-05, "loss": 0.6181, "step": 6014 }, { "epoch": 0.47, "grad_norm": 1.080050587087489, "learning_rate": 1.1560249651788985e-05, "loss": 0.4979, "step": 6015 }, { "epoch": 0.47, "grad_norm": 1.213336610728508, "learning_rate": 1.155776770388134e-05, "loss": 0.5931, "step": 6016 }, { "epoch": 0.47, "grad_norm": 1.2257029929626926, "learning_rate": 1.1555285657623776e-05, "loss": 0.5298, "step": 6017 }, { "epoch": 0.47, "grad_norm": 1.199148766280429, "learning_rate": 1.1552803513173004e-05, "loss": 0.5782, "step": 6018 }, { "epoch": 0.47, "grad_norm": 1.1464487099502134, "learning_rate": 1.1550321270685739e-05, "loss": 0.5308, "step": 6019 }, { "epoch": 0.47, "grad_norm": 1.2900742230760318, "learning_rate": 1.1547838930318689e-05, "loss": 0.5616, "step": 6020 }, { "epoch": 0.47, "grad_norm": 1.181885143863875, "learning_rate": 1.1545356492228585e-05, "loss": 0.5739, "step": 6021 }, { "epoch": 0.47, "grad_norm": 1.1470877052572075, "learning_rate": 1.1542873956572151e-05, "loss": 0.5653, "step": 6022 }, { "epoch": 0.47, "grad_norm": 1.0975241802219553, "learning_rate": 1.1540391323506124e-05, "loss": 0.5422, "step": 6023 }, { "epoch": 0.47, "grad_norm": 1.2671880606804111, "learning_rate": 1.1537908593187246e-05, "loss": 0.5618, "step": 6024 }, { "epoch": 0.47, "grad_norm": 1.2110152188863899, "learning_rate": 1.1535425765772262e-05, "loss": 0.5546, "step": 6025 }, { "epoch": 0.47, "grad_norm": 1.1214674482991491, "learning_rate": 1.1532942841417931e-05, "loss": 0.5415, "step": 6026 }, { "epoch": 0.47, "grad_norm": 1.3119752379819598, "learning_rate": 1.1530459820281008e-05, "loss": 0.6052, "step": 6027 }, { "epoch": 0.47, "grad_norm": 1.1237423542853127, "learning_rate": 1.1527976702518257e-05, "loss": 0.5355, "step": 6028 }, { "epoch": 0.47, "grad_norm": 1.1256897189583506, "learning_rate": 1.1525493488286458e-05, "loss": 0.5348, "step": 6029 }, { "epoch": 0.47, "grad_norm": 1.2458725018335104, "learning_rate": 1.1523010177742382e-05, "loss": 0.565, "step": 6030 }, { "epoch": 0.47, "grad_norm": 1.1306324996404287, "learning_rate": 1.152052677104282e-05, "loss": 0.5562, "step": 6031 }, { "epoch": 0.47, "grad_norm": 1.1892979212134052, "learning_rate": 1.1518043268344554e-05, "loss": 0.6081, "step": 6032 }, { "epoch": 0.47, "grad_norm": 1.2624901459564288, "learning_rate": 1.1515559669804386e-05, "loss": 0.6001, "step": 6033 }, { "epoch": 0.47, "grad_norm": 1.1884519876965736, "learning_rate": 1.1513075975579116e-05, "loss": 0.5533, "step": 6034 }, { "epoch": 0.47, "grad_norm": 1.0712219606733848, "learning_rate": 1.1510592185825553e-05, "loss": 0.4657, "step": 6035 }, { "epoch": 0.47, "grad_norm": 1.229893674131583, "learning_rate": 1.150810830070051e-05, "loss": 0.5969, "step": 6036 }, { "epoch": 0.47, "grad_norm": 1.1520841144377467, "learning_rate": 1.1505624320360813e-05, "loss": 0.5821, "step": 6037 }, { "epoch": 0.47, "grad_norm": 1.1657764694346948, "learning_rate": 1.1503140244963283e-05, "loss": 0.5733, "step": 6038 }, { "epoch": 0.47, "grad_norm": 1.1349233494726247, "learning_rate": 1.1500656074664756e-05, "loss": 0.5414, "step": 6039 }, { "epoch": 0.47, "grad_norm": 1.1861377732920508, "learning_rate": 1.1498171809622067e-05, "loss": 0.5422, "step": 6040 }, { "epoch": 0.47, "grad_norm": 1.150809763796626, "learning_rate": 1.1495687449992059e-05, "loss": 0.4986, "step": 6041 }, { "epoch": 0.47, "grad_norm": 1.1371039298375025, "learning_rate": 1.149320299593159e-05, "loss": 0.5916, "step": 6042 }, { "epoch": 0.47, "grad_norm": 1.1367008692852423, "learning_rate": 1.149071844759751e-05, "loss": 0.5277, "step": 6043 }, { "epoch": 0.47, "grad_norm": 1.1330809702580076, "learning_rate": 1.1488233805146685e-05, "loss": 0.5555, "step": 6044 }, { "epoch": 0.47, "grad_norm": 1.186683273538549, "learning_rate": 1.1485749068735982e-05, "loss": 0.5772, "step": 6045 }, { "epoch": 0.47, "grad_norm": 1.1466717802440118, "learning_rate": 1.148326423852227e-05, "loss": 0.5352, "step": 6046 }, { "epoch": 0.47, "grad_norm": 1.167547807143518, "learning_rate": 1.1480779314662438e-05, "loss": 0.5804, "step": 6047 }, { "epoch": 0.47, "grad_norm": 1.290652733382439, "learning_rate": 1.1478294297313366e-05, "loss": 0.6415, "step": 6048 }, { "epoch": 0.47, "grad_norm": 1.1767032088278035, "learning_rate": 1.1475809186631947e-05, "loss": 0.6049, "step": 6049 }, { "epoch": 0.47, "grad_norm": 1.129735094600574, "learning_rate": 1.1473323982775085e-05, "loss": 0.5539, "step": 6050 }, { "epoch": 0.47, "grad_norm": 1.177364715207251, "learning_rate": 1.1470838685899675e-05, "loss": 0.532, "step": 6051 }, { "epoch": 0.47, "grad_norm": 1.1061015121030253, "learning_rate": 1.146835329616263e-05, "loss": 0.5021, "step": 6052 }, { "epoch": 0.47, "grad_norm": 1.1822799592411755, "learning_rate": 1.1465867813720865e-05, "loss": 0.5967, "step": 6053 }, { "epoch": 0.47, "grad_norm": 1.1386216743094857, "learning_rate": 1.1463382238731305e-05, "loss": 0.5561, "step": 6054 }, { "epoch": 0.47, "grad_norm": 1.1723453849243168, "learning_rate": 1.1460896571350875e-05, "loss": 0.5998, "step": 6055 }, { "epoch": 0.47, "grad_norm": 1.210706331891644, "learning_rate": 1.1458410811736503e-05, "loss": 0.5615, "step": 6056 }, { "epoch": 0.47, "grad_norm": 1.1605290286021857, "learning_rate": 1.1455924960045136e-05, "loss": 0.5542, "step": 6057 }, { "epoch": 0.47, "grad_norm": 1.3127881142599516, "learning_rate": 1.145343901643371e-05, "loss": 0.5599, "step": 6058 }, { "epoch": 0.47, "grad_norm": 1.1901311836542792, "learning_rate": 1.1450952981059182e-05, "loss": 0.5495, "step": 6059 }, { "epoch": 0.47, "grad_norm": 1.2635045124995357, "learning_rate": 1.144846685407851e-05, "loss": 0.623, "step": 6060 }, { "epoch": 0.47, "grad_norm": 1.236935868425668, "learning_rate": 1.1445980635648649e-05, "loss": 0.5437, "step": 6061 }, { "epoch": 0.47, "grad_norm": 1.1302637827876, "learning_rate": 1.1443494325926572e-05, "loss": 0.5638, "step": 6062 }, { "epoch": 0.47, "grad_norm": 1.2087317774443476, "learning_rate": 1.1441007925069248e-05, "loss": 0.5703, "step": 6063 }, { "epoch": 0.47, "grad_norm": 1.2696397470915577, "learning_rate": 1.1438521433233657e-05, "loss": 0.5883, "step": 6064 }, { "epoch": 0.47, "grad_norm": 1.0951124697019443, "learning_rate": 1.1436034850576794e-05, "loss": 0.5382, "step": 6065 }, { "epoch": 0.47, "grad_norm": 1.0863121539069758, "learning_rate": 1.1433548177255638e-05, "loss": 0.5102, "step": 6066 }, { "epoch": 0.47, "grad_norm": 1.2123883815396206, "learning_rate": 1.143106141342719e-05, "loss": 0.5828, "step": 6067 }, { "epoch": 0.47, "grad_norm": 1.1480736707615016, "learning_rate": 1.1428574559248448e-05, "loss": 0.5817, "step": 6068 }, { "epoch": 0.47, "grad_norm": 1.1037466701919598, "learning_rate": 1.1426087614876424e-05, "loss": 0.5306, "step": 6069 }, { "epoch": 0.47, "grad_norm": 1.171892750923502, "learning_rate": 1.1423600580468137e-05, "loss": 0.5238, "step": 6070 }, { "epoch": 0.47, "grad_norm": 1.21161450565429, "learning_rate": 1.1421113456180597e-05, "loss": 0.5793, "step": 6071 }, { "epoch": 0.47, "grad_norm": 1.145258909795778, "learning_rate": 1.1418626242170833e-05, "loss": 0.5717, "step": 6072 }, { "epoch": 0.47, "grad_norm": 1.1455312793145906, "learning_rate": 1.1416138938595874e-05, "loss": 0.5468, "step": 6073 }, { "epoch": 0.47, "grad_norm": 1.1801996193160358, "learning_rate": 1.1413651545612758e-05, "loss": 0.566, "step": 6074 }, { "epoch": 0.47, "grad_norm": 1.1543277536254237, "learning_rate": 1.1411164063378529e-05, "loss": 0.5614, "step": 6075 }, { "epoch": 0.47, "grad_norm": 1.2549773303108371, "learning_rate": 1.1408676492050229e-05, "loss": 0.5937, "step": 6076 }, { "epoch": 0.47, "grad_norm": 1.0943050610967415, "learning_rate": 1.1406188831784912e-05, "loss": 0.5143, "step": 6077 }, { "epoch": 0.47, "grad_norm": 1.2521838185404046, "learning_rate": 1.1403701082739644e-05, "loss": 0.6241, "step": 6078 }, { "epoch": 0.47, "grad_norm": 1.2241330231799306, "learning_rate": 1.1401213245071481e-05, "loss": 0.591, "step": 6079 }, { "epoch": 0.47, "grad_norm": 1.1258022838483297, "learning_rate": 1.1398725318937503e-05, "loss": 0.5129, "step": 6080 }, { "epoch": 0.47, "grad_norm": 1.251965408145916, "learning_rate": 1.1396237304494772e-05, "loss": 0.5122, "step": 6081 }, { "epoch": 0.47, "grad_norm": 1.1887100980782717, "learning_rate": 1.1393749201900377e-05, "loss": 0.5747, "step": 6082 }, { "epoch": 0.47, "grad_norm": 1.1874656672282982, "learning_rate": 1.1391261011311407e-05, "loss": 0.513, "step": 6083 }, { "epoch": 0.47, "grad_norm": 1.0929043361614448, "learning_rate": 1.138877273288495e-05, "loss": 0.5162, "step": 6084 }, { "epoch": 0.47, "grad_norm": 1.0594942549027224, "learning_rate": 1.1386284366778106e-05, "loss": 0.506, "step": 6085 }, { "epoch": 0.47, "grad_norm": 1.237677345669188, "learning_rate": 1.1383795913147978e-05, "loss": 0.5822, "step": 6086 }, { "epoch": 0.47, "grad_norm": 1.0246579413587495, "learning_rate": 1.138130737215167e-05, "loss": 0.4975, "step": 6087 }, { "epoch": 0.47, "grad_norm": 1.1503271010885996, "learning_rate": 1.1378818743946308e-05, "loss": 0.5731, "step": 6088 }, { "epoch": 0.47, "grad_norm": 1.3168991931780432, "learning_rate": 1.1376330028689e-05, "loss": 0.54, "step": 6089 }, { "epoch": 0.47, "grad_norm": 1.1430622346810895, "learning_rate": 1.137384122653688e-05, "loss": 0.516, "step": 6090 }, { "epoch": 0.47, "grad_norm": 1.0391458463584249, "learning_rate": 1.137135233764707e-05, "loss": 0.5263, "step": 6091 }, { "epoch": 0.47, "grad_norm": 1.2606152412214606, "learning_rate": 1.1368863362176713e-05, "loss": 0.5872, "step": 6092 }, { "epoch": 0.47, "grad_norm": 1.2150401848948964, "learning_rate": 1.1366374300282954e-05, "loss": 0.5402, "step": 6093 }, { "epoch": 0.47, "grad_norm": 1.2435444551780566, "learning_rate": 1.1363885152122933e-05, "loss": 0.5731, "step": 6094 }, { "epoch": 0.47, "grad_norm": 1.1744543553654667, "learning_rate": 1.1361395917853808e-05, "loss": 0.5379, "step": 6095 }, { "epoch": 0.47, "grad_norm": 1.2422188329043826, "learning_rate": 1.1358906597632731e-05, "loss": 0.5761, "step": 6096 }, { "epoch": 0.47, "grad_norm": 1.169244535132021, "learning_rate": 1.135641719161687e-05, "loss": 0.5983, "step": 6097 }, { "epoch": 0.47, "grad_norm": 1.1300913256478453, "learning_rate": 1.1353927699963396e-05, "loss": 0.5251, "step": 6098 }, { "epoch": 0.47, "grad_norm": 1.2876857456920567, "learning_rate": 1.135143812282948e-05, "loss": 0.5808, "step": 6099 }, { "epoch": 0.47, "grad_norm": 1.1864677761507028, "learning_rate": 1.1348948460372302e-05, "loss": 0.5515, "step": 6100 }, { "epoch": 0.47, "grad_norm": 1.0960707430284047, "learning_rate": 1.1346458712749049e-05, "loss": 0.5575, "step": 6101 }, { "epoch": 0.47, "grad_norm": 1.1661438735643315, "learning_rate": 1.1343968880116907e-05, "loss": 0.5612, "step": 6102 }, { "epoch": 0.47, "grad_norm": 1.1487139414543932, "learning_rate": 1.1341478962633081e-05, "loss": 0.5233, "step": 6103 }, { "epoch": 0.47, "grad_norm": 1.0363604756585434, "learning_rate": 1.1338988960454763e-05, "loss": 0.4851, "step": 6104 }, { "epoch": 0.47, "grad_norm": 1.1794222603017963, "learning_rate": 1.1336498873739166e-05, "loss": 0.5996, "step": 6105 }, { "epoch": 0.47, "grad_norm": 1.2842968789355962, "learning_rate": 1.13340087026435e-05, "loss": 0.6117, "step": 6106 }, { "epoch": 0.47, "grad_norm": 1.1565401383397298, "learning_rate": 1.1331518447324978e-05, "loss": 0.5492, "step": 6107 }, { "epoch": 0.47, "grad_norm": 1.2901046239579763, "learning_rate": 1.1329028107940832e-05, "loss": 0.6147, "step": 6108 }, { "epoch": 0.47, "grad_norm": 1.0247321057884973, "learning_rate": 1.1326537684648282e-05, "loss": 0.5042, "step": 6109 }, { "epoch": 0.47, "grad_norm": 1.1642414441915516, "learning_rate": 1.1324047177604565e-05, "loss": 0.5156, "step": 6110 }, { "epoch": 0.47, "grad_norm": 1.2482839726279775, "learning_rate": 1.1321556586966917e-05, "loss": 0.5936, "step": 6111 }, { "epoch": 0.47, "grad_norm": 1.1575613060817325, "learning_rate": 1.1319065912892584e-05, "loss": 0.5384, "step": 6112 }, { "epoch": 0.47, "grad_norm": 1.2922929455541055, "learning_rate": 1.1316575155538816e-05, "loss": 0.6094, "step": 6113 }, { "epoch": 0.47, "grad_norm": 1.1511815821792062, "learning_rate": 1.1314084315062863e-05, "loss": 0.5347, "step": 6114 }, { "epoch": 0.47, "grad_norm": 1.1563639713512337, "learning_rate": 1.131159339162199e-05, "loss": 0.5689, "step": 6115 }, { "epoch": 0.47, "grad_norm": 1.1636079758498936, "learning_rate": 1.1309102385373459e-05, "loss": 0.5629, "step": 6116 }, { "epoch": 0.47, "grad_norm": 1.2872494518599655, "learning_rate": 1.1306611296474536e-05, "loss": 0.5658, "step": 6117 }, { "epoch": 0.47, "grad_norm": 1.1639668982246267, "learning_rate": 1.1304120125082504e-05, "loss": 0.5931, "step": 6118 }, { "epoch": 0.47, "grad_norm": 1.1237137116714113, "learning_rate": 1.1301628871354641e-05, "loss": 0.5275, "step": 6119 }, { "epoch": 0.47, "grad_norm": 1.1171285973873313, "learning_rate": 1.129913753544823e-05, "loss": 0.5402, "step": 6120 }, { "epoch": 0.47, "grad_norm": 1.2248233687287509, "learning_rate": 1.1296646117520567e-05, "loss": 0.5971, "step": 6121 }, { "epoch": 0.47, "grad_norm": 1.144457557571205, "learning_rate": 1.1294154617728942e-05, "loss": 0.5616, "step": 6122 }, { "epoch": 0.48, "grad_norm": 1.128843206737472, "learning_rate": 1.1291663036230658e-05, "loss": 0.6098, "step": 6123 }, { "epoch": 0.48, "grad_norm": 1.1859439392317106, "learning_rate": 1.1289171373183026e-05, "loss": 0.5655, "step": 6124 }, { "epoch": 0.48, "grad_norm": 1.2119237021801355, "learning_rate": 1.1286679628743349e-05, "loss": 0.5533, "step": 6125 }, { "epoch": 0.48, "grad_norm": 1.2164796565746279, "learning_rate": 1.1284187803068953e-05, "loss": 0.6083, "step": 6126 }, { "epoch": 0.48, "grad_norm": 1.1327797194374818, "learning_rate": 1.1281695896317153e-05, "loss": 0.5423, "step": 6127 }, { "epoch": 0.48, "grad_norm": 1.2823281287869477, "learning_rate": 1.127920390864528e-05, "loss": 0.5764, "step": 6128 }, { "epoch": 0.48, "grad_norm": 1.070036748183764, "learning_rate": 1.1276711840210663e-05, "loss": 0.5204, "step": 6129 }, { "epoch": 0.48, "grad_norm": 1.2320117781992606, "learning_rate": 1.127421969117064e-05, "loss": 0.5419, "step": 6130 }, { "epoch": 0.48, "grad_norm": 1.1750960270215633, "learning_rate": 1.1271727461682558e-05, "loss": 0.5746, "step": 6131 }, { "epoch": 0.48, "grad_norm": 1.2556764935216203, "learning_rate": 1.1269235151903754e-05, "loss": 0.5761, "step": 6132 }, { "epoch": 0.48, "grad_norm": 1.1858653814328417, "learning_rate": 1.126674276199159e-05, "loss": 0.5399, "step": 6133 }, { "epoch": 0.48, "grad_norm": 1.120311610435654, "learning_rate": 1.1264250292103423e-05, "loss": 0.4987, "step": 6134 }, { "epoch": 0.48, "grad_norm": 1.2430094272903576, "learning_rate": 1.1261757742396606e-05, "loss": 0.6163, "step": 6135 }, { "epoch": 0.48, "grad_norm": 1.1713331877357887, "learning_rate": 1.1259265113028517e-05, "loss": 0.567, "step": 6136 }, { "epoch": 0.48, "grad_norm": 1.2023741125247693, "learning_rate": 1.1256772404156521e-05, "loss": 0.5412, "step": 6137 }, { "epoch": 0.48, "grad_norm": 1.1715327462452352, "learning_rate": 1.1254279615938001e-05, "loss": 0.603, "step": 6138 }, { "epoch": 0.48, "grad_norm": 1.1307029813630516, "learning_rate": 1.1251786748530342e-05, "loss": 0.5531, "step": 6139 }, { "epoch": 0.48, "grad_norm": 1.1911774962897999, "learning_rate": 1.124929380209092e-05, "loss": 0.5529, "step": 6140 }, { "epoch": 0.48, "grad_norm": 1.1671098593916343, "learning_rate": 1.124680077677714e-05, "loss": 0.5478, "step": 6141 }, { "epoch": 0.48, "grad_norm": 1.209040528011753, "learning_rate": 1.124430767274639e-05, "loss": 0.6081, "step": 6142 }, { "epoch": 0.48, "grad_norm": 1.1465392221462014, "learning_rate": 1.124181449015608e-05, "loss": 0.58, "step": 6143 }, { "epoch": 0.48, "grad_norm": 1.131175627819122, "learning_rate": 1.1239321229163615e-05, "loss": 0.5669, "step": 6144 }, { "epoch": 0.48, "grad_norm": 1.2186527213374134, "learning_rate": 1.1236827889926402e-05, "loss": 0.5652, "step": 6145 }, { "epoch": 0.48, "grad_norm": 1.2058958867967815, "learning_rate": 1.1234334472601868e-05, "loss": 0.607, "step": 6146 }, { "epoch": 0.48, "grad_norm": 1.2508591560343088, "learning_rate": 1.1231840977347427e-05, "loss": 0.5752, "step": 6147 }, { "epoch": 0.48, "grad_norm": 1.3113149561034705, "learning_rate": 1.1229347404320515e-05, "loss": 0.6231, "step": 6148 }, { "epoch": 0.48, "grad_norm": 1.1797706119994518, "learning_rate": 1.1226853753678555e-05, "loss": 0.5521, "step": 6149 }, { "epoch": 0.48, "grad_norm": 1.1573637675782535, "learning_rate": 1.1224360025578987e-05, "loss": 0.5661, "step": 6150 }, { "epoch": 0.48, "grad_norm": 1.0177478612110338, "learning_rate": 1.1221866220179254e-05, "loss": 0.5118, "step": 6151 }, { "epoch": 0.48, "grad_norm": 1.2031172714976002, "learning_rate": 1.1219372337636802e-05, "loss": 0.5805, "step": 6152 }, { "epoch": 0.48, "grad_norm": 1.1654660768551133, "learning_rate": 1.1216878378109085e-05, "loss": 0.591, "step": 6153 }, { "epoch": 0.48, "grad_norm": 1.278893000791008, "learning_rate": 1.1214384341753557e-05, "loss": 0.5893, "step": 6154 }, { "epoch": 0.48, "grad_norm": 1.0944814688384625, "learning_rate": 1.1211890228727679e-05, "loss": 0.5593, "step": 6155 }, { "epoch": 0.48, "grad_norm": 1.2107200673460006, "learning_rate": 1.120939603918892e-05, "loss": 0.5401, "step": 6156 }, { "epoch": 0.48, "grad_norm": 1.1891111186009367, "learning_rate": 1.1206901773294749e-05, "loss": 0.5467, "step": 6157 }, { "epoch": 0.48, "grad_norm": 1.2391264524948702, "learning_rate": 1.1204407431202642e-05, "loss": 0.5943, "step": 6158 }, { "epoch": 0.48, "grad_norm": 1.1448040009037483, "learning_rate": 1.120191301307008e-05, "loss": 0.553, "step": 6159 }, { "epoch": 0.48, "grad_norm": 1.0451598279734644, "learning_rate": 1.1199418519054549e-05, "loss": 0.4847, "step": 6160 }, { "epoch": 0.48, "grad_norm": 1.1327962939977638, "learning_rate": 1.1196923949313537e-05, "loss": 0.5197, "step": 6161 }, { "epoch": 0.48, "grad_norm": 1.1831119173498421, "learning_rate": 1.1194429304004541e-05, "loss": 0.5999, "step": 6162 }, { "epoch": 0.48, "grad_norm": 1.211938997637693, "learning_rate": 1.1191934583285063e-05, "loss": 0.5342, "step": 6163 }, { "epoch": 0.48, "grad_norm": 1.0937776834526711, "learning_rate": 1.1189439787312603e-05, "loss": 0.5601, "step": 6164 }, { "epoch": 0.48, "grad_norm": 1.1182546787108651, "learning_rate": 1.1186944916244673e-05, "loss": 0.5559, "step": 6165 }, { "epoch": 0.48, "grad_norm": 1.4341959922639842, "learning_rate": 1.1184449970238787e-05, "loss": 0.5554, "step": 6166 }, { "epoch": 0.48, "grad_norm": 1.1721113856638508, "learning_rate": 1.1181954949452463e-05, "loss": 0.5801, "step": 6167 }, { "epoch": 0.48, "grad_norm": 1.1589267265091858, "learning_rate": 1.1179459854043227e-05, "loss": 0.5391, "step": 6168 }, { "epoch": 0.48, "grad_norm": 1.1216112597206436, "learning_rate": 1.1176964684168603e-05, "loss": 0.5426, "step": 6169 }, { "epoch": 0.48, "grad_norm": 1.1056470962998983, "learning_rate": 1.1174469439986126e-05, "loss": 0.5618, "step": 6170 }, { "epoch": 0.48, "grad_norm": 1.2247803004468978, "learning_rate": 1.1171974121653333e-05, "loss": 0.5663, "step": 6171 }, { "epoch": 0.48, "grad_norm": 1.1264885485287592, "learning_rate": 1.116947872932777e-05, "loss": 0.5633, "step": 6172 }, { "epoch": 0.48, "grad_norm": 1.1898257919829878, "learning_rate": 1.1166983263166979e-05, "loss": 0.5433, "step": 6173 }, { "epoch": 0.48, "grad_norm": 1.1917158381225335, "learning_rate": 1.1164487723328516e-05, "loss": 0.5849, "step": 6174 }, { "epoch": 0.48, "grad_norm": 1.0908833675891934, "learning_rate": 1.1161992109969932e-05, "loss": 0.5728, "step": 6175 }, { "epoch": 0.48, "grad_norm": 1.2274191228012494, "learning_rate": 1.115949642324879e-05, "loss": 0.6096, "step": 6176 }, { "epoch": 0.48, "grad_norm": 1.140941001282802, "learning_rate": 1.1157000663322662e-05, "loss": 0.5523, "step": 6177 }, { "epoch": 0.48, "grad_norm": 1.2186166250168242, "learning_rate": 1.115450483034911e-05, "loss": 0.5749, "step": 6178 }, { "epoch": 0.48, "grad_norm": 1.1872916289770912, "learning_rate": 1.115200892448571e-05, "loss": 0.5628, "step": 6179 }, { "epoch": 0.48, "grad_norm": 1.1816432479955392, "learning_rate": 1.1149512945890044e-05, "loss": 0.5599, "step": 6180 }, { "epoch": 0.48, "grad_norm": 1.1784970674959776, "learning_rate": 1.1147016894719695e-05, "loss": 0.5754, "step": 6181 }, { "epoch": 0.48, "grad_norm": 1.1310563253615806, "learning_rate": 1.1144520771132252e-05, "loss": 0.534, "step": 6182 }, { "epoch": 0.48, "grad_norm": 1.1835764767233026, "learning_rate": 1.1142024575285308e-05, "loss": 0.5458, "step": 6183 }, { "epoch": 0.48, "grad_norm": 1.1392449168798588, "learning_rate": 1.1139528307336463e-05, "loss": 0.5747, "step": 6184 }, { "epoch": 0.48, "grad_norm": 1.158187634523703, "learning_rate": 1.1137031967443312e-05, "loss": 0.5877, "step": 6185 }, { "epoch": 0.48, "grad_norm": 1.2891116046222504, "learning_rate": 1.1134535555763466e-05, "loss": 0.5957, "step": 6186 }, { "epoch": 0.48, "grad_norm": 1.1423768999322064, "learning_rate": 1.113203907245454e-05, "loss": 0.5516, "step": 6187 }, { "epoch": 0.48, "grad_norm": 1.1826018152096238, "learning_rate": 1.1129542517674147e-05, "loss": 0.57, "step": 6188 }, { "epoch": 0.48, "grad_norm": 1.1678530020117521, "learning_rate": 1.1127045891579906e-05, "loss": 0.4987, "step": 6189 }, { "epoch": 0.48, "grad_norm": 1.1482155514031898, "learning_rate": 1.1124549194329445e-05, "loss": 0.5555, "step": 6190 }, { "epoch": 0.48, "grad_norm": 1.1638159778875492, "learning_rate": 1.112205242608039e-05, "loss": 0.5227, "step": 6191 }, { "epoch": 0.48, "grad_norm": 1.3314113572883042, "learning_rate": 1.1119555586990376e-05, "loss": 0.6453, "step": 6192 }, { "epoch": 0.48, "grad_norm": 1.1504087592413548, "learning_rate": 1.1117058677217043e-05, "loss": 0.5506, "step": 6193 }, { "epoch": 0.48, "grad_norm": 1.1109482261687162, "learning_rate": 1.111456169691803e-05, "loss": 0.5282, "step": 6194 }, { "epoch": 0.48, "grad_norm": 1.2655304767491267, "learning_rate": 1.1112064646250988e-05, "loss": 0.5556, "step": 6195 }, { "epoch": 0.48, "grad_norm": 1.1314574457140631, "learning_rate": 1.1109567525373569e-05, "loss": 0.4926, "step": 6196 }, { "epoch": 0.48, "grad_norm": 1.2467107889370879, "learning_rate": 1.1107070334443426e-05, "loss": 0.6065, "step": 6197 }, { "epoch": 0.48, "grad_norm": 1.098095083247121, "learning_rate": 1.1104573073618222e-05, "loss": 0.5326, "step": 6198 }, { "epoch": 0.48, "grad_norm": 1.0949034467133698, "learning_rate": 1.1102075743055618e-05, "loss": 0.5128, "step": 6199 }, { "epoch": 0.48, "grad_norm": 1.2051167536005194, "learning_rate": 1.1099578342913289e-05, "loss": 0.5695, "step": 6200 }, { "epoch": 0.48, "grad_norm": 1.1850271075403587, "learning_rate": 1.1097080873348905e-05, "loss": 0.598, "step": 6201 }, { "epoch": 0.48, "grad_norm": 1.270252574525389, "learning_rate": 1.1094583334520146e-05, "loss": 0.6388, "step": 6202 }, { "epoch": 0.48, "grad_norm": 1.1029827620576405, "learning_rate": 1.1092085726584693e-05, "loss": 0.5326, "step": 6203 }, { "epoch": 0.48, "grad_norm": 1.1215251664064685, "learning_rate": 1.1089588049700234e-05, "loss": 0.5437, "step": 6204 }, { "epoch": 0.48, "grad_norm": 1.155188459816482, "learning_rate": 1.1087090304024462e-05, "loss": 0.544, "step": 6205 }, { "epoch": 0.48, "grad_norm": 1.1474209912428457, "learning_rate": 1.1084592489715067e-05, "loss": 0.5899, "step": 6206 }, { "epoch": 0.48, "grad_norm": 1.2369202556301464, "learning_rate": 1.1082094606929754e-05, "loss": 0.5627, "step": 6207 }, { "epoch": 0.48, "grad_norm": 1.2608004792179852, "learning_rate": 1.1079596655826227e-05, "loss": 0.5612, "step": 6208 }, { "epoch": 0.48, "grad_norm": 1.1596070604892992, "learning_rate": 1.1077098636562191e-05, "loss": 0.5742, "step": 6209 }, { "epoch": 0.48, "grad_norm": 1.1999462791180133, "learning_rate": 1.1074600549295363e-05, "loss": 0.5375, "step": 6210 }, { "epoch": 0.48, "grad_norm": 1.1406132945675063, "learning_rate": 1.1072102394183456e-05, "loss": 0.6112, "step": 6211 }, { "epoch": 0.48, "grad_norm": 1.1389317420454288, "learning_rate": 1.1069604171384194e-05, "loss": 0.5766, "step": 6212 }, { "epoch": 0.48, "grad_norm": 1.21567475087168, "learning_rate": 1.1067105881055303e-05, "loss": 0.61, "step": 6213 }, { "epoch": 0.48, "grad_norm": 1.1913558198855603, "learning_rate": 1.106460752335451e-05, "loss": 0.5465, "step": 6214 }, { "epoch": 0.48, "grad_norm": 1.2962121763385255, "learning_rate": 1.1062109098439555e-05, "loss": 0.5838, "step": 6215 }, { "epoch": 0.48, "grad_norm": 1.1993759996530284, "learning_rate": 1.105961060646817e-05, "loss": 0.5563, "step": 6216 }, { "epoch": 0.48, "grad_norm": 1.2047749884634675, "learning_rate": 1.1057112047598102e-05, "loss": 0.5677, "step": 6217 }, { "epoch": 0.48, "grad_norm": 1.0570390271043815, "learning_rate": 1.1054613421987098e-05, "loss": 0.5383, "step": 6218 }, { "epoch": 0.48, "grad_norm": 1.188914761281565, "learning_rate": 1.1052114729792902e-05, "loss": 0.5815, "step": 6219 }, { "epoch": 0.48, "grad_norm": 1.2046005811394422, "learning_rate": 1.104961597117328e-05, "loss": 0.5691, "step": 6220 }, { "epoch": 0.48, "grad_norm": 1.1500618275317596, "learning_rate": 1.1047117146285984e-05, "loss": 0.5111, "step": 6221 }, { "epoch": 0.48, "grad_norm": 1.0534887212105184, "learning_rate": 1.1044618255288781e-05, "loss": 0.5623, "step": 6222 }, { "epoch": 0.48, "grad_norm": 1.1762681124161223, "learning_rate": 1.104211929833944e-05, "loss": 0.54, "step": 6223 }, { "epoch": 0.48, "grad_norm": 1.0849735729673213, "learning_rate": 1.1039620275595729e-05, "loss": 0.5644, "step": 6224 }, { "epoch": 0.48, "grad_norm": 1.082915182598213, "learning_rate": 1.1037121187215427e-05, "loss": 0.5286, "step": 6225 }, { "epoch": 0.48, "grad_norm": 1.1481968114964451, "learning_rate": 1.1034622033356311e-05, "loss": 0.5686, "step": 6226 }, { "epoch": 0.48, "grad_norm": 1.1205564799851595, "learning_rate": 1.103212281417617e-05, "loss": 0.5383, "step": 6227 }, { "epoch": 0.48, "grad_norm": 1.0921265133487799, "learning_rate": 1.1029623529832793e-05, "loss": 0.5392, "step": 6228 }, { "epoch": 0.48, "grad_norm": 1.04841506183565, "learning_rate": 1.1027124180483965e-05, "loss": 0.5416, "step": 6229 }, { "epoch": 0.48, "grad_norm": 1.168596029084298, "learning_rate": 1.1024624766287492e-05, "loss": 0.5397, "step": 6230 }, { "epoch": 0.48, "grad_norm": 1.1717812055563395, "learning_rate": 1.1022125287401172e-05, "loss": 0.5511, "step": 6231 }, { "epoch": 0.48, "grad_norm": 1.248813256541239, "learning_rate": 1.1019625743982807e-05, "loss": 0.5685, "step": 6232 }, { "epoch": 0.48, "grad_norm": 1.1908274761257898, "learning_rate": 1.101712613619021e-05, "loss": 0.5442, "step": 6233 }, { "epoch": 0.48, "grad_norm": 1.1641961347436167, "learning_rate": 1.1014626464181191e-05, "loss": 0.543, "step": 6234 }, { "epoch": 0.48, "grad_norm": 1.084253556104743, "learning_rate": 1.1012126728113567e-05, "loss": 0.4856, "step": 6235 }, { "epoch": 0.48, "grad_norm": 1.1528492998561048, "learning_rate": 1.1009626928145163e-05, "loss": 0.6098, "step": 6236 }, { "epoch": 0.48, "grad_norm": 1.146757700895703, "learning_rate": 1.1007127064433802e-05, "loss": 0.5844, "step": 6237 }, { "epoch": 0.48, "grad_norm": 1.1120951440916955, "learning_rate": 1.1004627137137314e-05, "loss": 0.5116, "step": 6238 }, { "epoch": 0.48, "grad_norm": 1.1138632340672925, "learning_rate": 1.1002127146413531e-05, "loss": 0.5143, "step": 6239 }, { "epoch": 0.48, "grad_norm": 1.2878599163787527, "learning_rate": 1.0999627092420291e-05, "loss": 0.5805, "step": 6240 }, { "epoch": 0.48, "grad_norm": 1.2106485329675938, "learning_rate": 1.0997126975315433e-05, "loss": 0.5232, "step": 6241 }, { "epoch": 0.48, "grad_norm": 1.0727876320192133, "learning_rate": 1.0994626795256806e-05, "loss": 0.5055, "step": 6242 }, { "epoch": 0.48, "grad_norm": 1.1713025538685444, "learning_rate": 1.0992126552402261e-05, "loss": 0.5475, "step": 6243 }, { "epoch": 0.48, "grad_norm": 1.263902646060917, "learning_rate": 1.0989626246909642e-05, "loss": 0.5828, "step": 6244 }, { "epoch": 0.48, "grad_norm": 1.2439657474659542, "learning_rate": 1.0987125878936814e-05, "loss": 0.5851, "step": 6245 }, { "epoch": 0.48, "grad_norm": 1.193978556378137, "learning_rate": 1.0984625448641639e-05, "loss": 0.5448, "step": 6246 }, { "epoch": 0.48, "grad_norm": 1.191254002583395, "learning_rate": 1.0982124956181979e-05, "loss": 0.5728, "step": 6247 }, { "epoch": 0.48, "grad_norm": 1.1800802726435635, "learning_rate": 1.0979624401715702e-05, "loss": 0.5526, "step": 6248 }, { "epoch": 0.48, "grad_norm": 1.193635449730372, "learning_rate": 1.0977123785400684e-05, "loss": 0.5814, "step": 6249 }, { "epoch": 0.48, "grad_norm": 1.2722000006762226, "learning_rate": 1.0974623107394797e-05, "loss": 0.5663, "step": 6250 }, { "epoch": 0.48, "grad_norm": 1.2157040705130804, "learning_rate": 1.097212236785593e-05, "loss": 0.5649, "step": 6251 }, { "epoch": 0.49, "grad_norm": 1.282610519807765, "learning_rate": 1.0969621566941959e-05, "loss": 0.5823, "step": 6252 }, { "epoch": 0.49, "grad_norm": 1.0865906322437042, "learning_rate": 1.0967120704810776e-05, "loss": 0.5579, "step": 6253 }, { "epoch": 0.49, "grad_norm": 1.1548739828581531, "learning_rate": 1.0964619781620271e-05, "loss": 0.5986, "step": 6254 }, { "epoch": 0.49, "grad_norm": 1.2150745233985072, "learning_rate": 1.0962118797528344e-05, "loss": 0.5374, "step": 6255 }, { "epoch": 0.49, "grad_norm": 1.15572927966926, "learning_rate": 1.0959617752692897e-05, "loss": 0.5211, "step": 6256 }, { "epoch": 0.49, "grad_norm": 1.1692748151115242, "learning_rate": 1.0957116647271829e-05, "loss": 0.5543, "step": 6257 }, { "epoch": 0.49, "grad_norm": 1.0414781527011072, "learning_rate": 1.0954615481423047e-05, "loss": 0.5168, "step": 6258 }, { "epoch": 0.49, "grad_norm": 1.1498510782594316, "learning_rate": 1.0952114255304465e-05, "loss": 0.5256, "step": 6259 }, { "epoch": 0.49, "grad_norm": 1.1825853338763397, "learning_rate": 1.0949612969073995e-05, "loss": 0.5309, "step": 6260 }, { "epoch": 0.49, "grad_norm": 1.2336561789971732, "learning_rate": 1.0947111622889563e-05, "loss": 0.6284, "step": 6261 }, { "epoch": 0.49, "grad_norm": 1.1557986950972743, "learning_rate": 1.0944610216909086e-05, "loss": 0.5068, "step": 6262 }, { "epoch": 0.49, "grad_norm": 1.30853617527261, "learning_rate": 1.0942108751290494e-05, "loss": 0.599, "step": 6263 }, { "epoch": 0.49, "grad_norm": 1.068262461951388, "learning_rate": 1.0939607226191716e-05, "loss": 0.5627, "step": 6264 }, { "epoch": 0.49, "grad_norm": 1.1874667213193926, "learning_rate": 1.0937105641770682e-05, "loss": 0.5241, "step": 6265 }, { "epoch": 0.49, "grad_norm": 1.1902940405277826, "learning_rate": 1.0934603998185338e-05, "loss": 0.5698, "step": 6266 }, { "epoch": 0.49, "grad_norm": 1.0901840936430078, "learning_rate": 1.0932102295593621e-05, "loss": 0.5125, "step": 6267 }, { "epoch": 0.49, "grad_norm": 1.1855604494628034, "learning_rate": 1.0929600534153477e-05, "loss": 0.5043, "step": 6268 }, { "epoch": 0.49, "grad_norm": 1.1662671506383175, "learning_rate": 1.0927098714022854e-05, "loss": 0.5761, "step": 6269 }, { "epoch": 0.49, "grad_norm": 1.1207815659396392, "learning_rate": 1.0924596835359706e-05, "loss": 0.5716, "step": 6270 }, { "epoch": 0.49, "grad_norm": 1.0947266713941182, "learning_rate": 1.092209489832199e-05, "loss": 0.5675, "step": 6271 }, { "epoch": 0.49, "grad_norm": 1.327508087492767, "learning_rate": 1.0919592903067668e-05, "loss": 0.6027, "step": 6272 }, { "epoch": 0.49, "grad_norm": 1.1924033073394782, "learning_rate": 1.09170908497547e-05, "loss": 0.5869, "step": 6273 }, { "epoch": 0.49, "grad_norm": 1.0611016664755974, "learning_rate": 1.0914588738541055e-05, "loss": 0.5745, "step": 6274 }, { "epoch": 0.49, "grad_norm": 1.0868997495793626, "learning_rate": 1.0912086569584702e-05, "loss": 0.537, "step": 6275 }, { "epoch": 0.49, "grad_norm": 1.1296068281438767, "learning_rate": 1.090958434304362e-05, "loss": 0.5627, "step": 6276 }, { "epoch": 0.49, "grad_norm": 1.2191840645953926, "learning_rate": 1.0907082059075786e-05, "loss": 0.5763, "step": 6277 }, { "epoch": 0.49, "grad_norm": 1.052527922307498, "learning_rate": 1.090457971783918e-05, "loss": 0.53, "step": 6278 }, { "epoch": 0.49, "grad_norm": 1.2259940998937324, "learning_rate": 1.0902077319491792e-05, "loss": 0.5499, "step": 6279 }, { "epoch": 0.49, "grad_norm": 1.2058268837711255, "learning_rate": 1.0899574864191607e-05, "loss": 0.5682, "step": 6280 }, { "epoch": 0.49, "grad_norm": 1.2088781750798747, "learning_rate": 1.089707235209662e-05, "loss": 0.603, "step": 6281 }, { "epoch": 0.49, "grad_norm": 1.2780784337297917, "learning_rate": 1.089456978336483e-05, "loss": 0.6067, "step": 6282 }, { "epoch": 0.49, "grad_norm": 1.174701790810231, "learning_rate": 1.089206715815423e-05, "loss": 0.5304, "step": 6283 }, { "epoch": 0.49, "grad_norm": 1.1634918452666847, "learning_rate": 1.0889564476622829e-05, "loss": 0.5746, "step": 6284 }, { "epoch": 0.49, "grad_norm": 1.1119609834254902, "learning_rate": 1.0887061738928632e-05, "loss": 0.4633, "step": 6285 }, { "epoch": 0.49, "grad_norm": 1.1644985739083769, "learning_rate": 1.0884558945229652e-05, "loss": 0.5484, "step": 6286 }, { "epoch": 0.49, "grad_norm": 1.0371188003485314, "learning_rate": 1.0882056095683905e-05, "loss": 0.5112, "step": 6287 }, { "epoch": 0.49, "grad_norm": 1.0866193757464007, "learning_rate": 1.0879553190449402e-05, "loss": 0.5504, "step": 6288 }, { "epoch": 0.49, "grad_norm": 1.2967099923558516, "learning_rate": 1.0877050229684169e-05, "loss": 0.5938, "step": 6289 }, { "epoch": 0.49, "grad_norm": 1.1989126404982595, "learning_rate": 1.087454721354623e-05, "loss": 0.5652, "step": 6290 }, { "epoch": 0.49, "grad_norm": 1.1253676343619814, "learning_rate": 1.0872044142193614e-05, "loss": 0.5584, "step": 6291 }, { "epoch": 0.49, "grad_norm": 1.1026007987241102, "learning_rate": 1.0869541015784353e-05, "loss": 0.5484, "step": 6292 }, { "epoch": 0.49, "grad_norm": 1.0925516921150882, "learning_rate": 1.0867037834476477e-05, "loss": 0.5529, "step": 6293 }, { "epoch": 0.49, "grad_norm": 1.1207733228044954, "learning_rate": 1.0864534598428034e-05, "loss": 0.5793, "step": 6294 }, { "epoch": 0.49, "grad_norm": 1.2878947199211705, "learning_rate": 1.0862031307797059e-05, "loss": 0.5352, "step": 6295 }, { "epoch": 0.49, "grad_norm": 1.162445891566968, "learning_rate": 1.08595279627416e-05, "loss": 0.5072, "step": 6296 }, { "epoch": 0.49, "grad_norm": 1.2525412001086693, "learning_rate": 1.085702456341971e-05, "loss": 0.5884, "step": 6297 }, { "epoch": 0.49, "grad_norm": 1.1914150550391847, "learning_rate": 1.085452110998943e-05, "loss": 0.5651, "step": 6298 }, { "epoch": 0.49, "grad_norm": 1.179156051066751, "learning_rate": 1.0852017602608831e-05, "loss": 0.5558, "step": 6299 }, { "epoch": 0.49, "grad_norm": 1.1901141555110957, "learning_rate": 1.084951404143596e-05, "loss": 0.5582, "step": 6300 }, { "epoch": 0.49, "grad_norm": 1.1525455621414233, "learning_rate": 1.0847010426628888e-05, "loss": 0.5806, "step": 6301 }, { "epoch": 0.49, "grad_norm": 1.245375375345019, "learning_rate": 1.0844506758345676e-05, "loss": 0.5989, "step": 6302 }, { "epoch": 0.49, "grad_norm": 1.2113059683073955, "learning_rate": 1.0842003036744395e-05, "loss": 0.605, "step": 6303 }, { "epoch": 0.49, "grad_norm": 1.1326986321408101, "learning_rate": 1.0839499261983119e-05, "loss": 0.5538, "step": 6304 }, { "epoch": 0.49, "grad_norm": 1.1612838762349662, "learning_rate": 1.0836995434219924e-05, "loss": 0.5823, "step": 6305 }, { "epoch": 0.49, "grad_norm": 1.0554097845970336, "learning_rate": 1.0834491553612889e-05, "loss": 0.5296, "step": 6306 }, { "epoch": 0.49, "grad_norm": 1.127822514185915, "learning_rate": 1.0831987620320096e-05, "loss": 0.5667, "step": 6307 }, { "epoch": 0.49, "grad_norm": 1.2216793459762962, "learning_rate": 1.0829483634499632e-05, "loss": 0.6178, "step": 6308 }, { "epoch": 0.49, "grad_norm": 1.0998509956356397, "learning_rate": 1.0826979596309586e-05, "loss": 0.5163, "step": 6309 }, { "epoch": 0.49, "grad_norm": 1.2600095054479847, "learning_rate": 1.082447550590805e-05, "loss": 0.5673, "step": 6310 }, { "epoch": 0.49, "grad_norm": 1.112431626951256, "learning_rate": 1.0821971363453125e-05, "loss": 0.5487, "step": 6311 }, { "epoch": 0.49, "grad_norm": 1.133429418492436, "learning_rate": 1.0819467169102906e-05, "loss": 0.5242, "step": 6312 }, { "epoch": 0.49, "grad_norm": 1.1067683799566865, "learning_rate": 1.0816962923015495e-05, "loss": 0.5654, "step": 6313 }, { "epoch": 0.49, "grad_norm": 1.1814836382029623, "learning_rate": 1.0814458625349002e-05, "loss": 0.6146, "step": 6314 }, { "epoch": 0.49, "grad_norm": 1.197842482329181, "learning_rate": 1.0811954276261532e-05, "loss": 0.5133, "step": 6315 }, { "epoch": 0.49, "grad_norm": 1.2046544645655877, "learning_rate": 1.08094498759112e-05, "loss": 0.5578, "step": 6316 }, { "epoch": 0.49, "grad_norm": 1.2002400078765783, "learning_rate": 1.0806945424456124e-05, "loss": 0.5531, "step": 6317 }, { "epoch": 0.49, "grad_norm": 1.2547361770631573, "learning_rate": 1.0804440922054415e-05, "loss": 0.6205, "step": 6318 }, { "epoch": 0.49, "grad_norm": 1.0131846526726394, "learning_rate": 1.0801936368864203e-05, "loss": 0.4707, "step": 6319 }, { "epoch": 0.49, "grad_norm": 1.2048835782836915, "learning_rate": 1.0799431765043609e-05, "loss": 0.5665, "step": 6320 }, { "epoch": 0.49, "grad_norm": 1.12954234645605, "learning_rate": 1.0796927110750766e-05, "loss": 0.5378, "step": 6321 }, { "epoch": 0.49, "grad_norm": 1.1835233963304486, "learning_rate": 1.0794422406143802e-05, "loss": 0.5567, "step": 6322 }, { "epoch": 0.49, "grad_norm": 1.1189268750219665, "learning_rate": 1.079191765138085e-05, "loss": 0.5382, "step": 6323 }, { "epoch": 0.49, "grad_norm": 1.14656578697717, "learning_rate": 1.0789412846620052e-05, "loss": 0.561, "step": 6324 }, { "epoch": 0.49, "grad_norm": 1.1947010944939058, "learning_rate": 1.078690799201955e-05, "loss": 0.5726, "step": 6325 }, { "epoch": 0.49, "grad_norm": 1.1365450696890864, "learning_rate": 1.0784403087737487e-05, "loss": 0.5449, "step": 6326 }, { "epoch": 0.49, "grad_norm": 1.1423639080370231, "learning_rate": 1.078189813393201e-05, "loss": 0.53, "step": 6327 }, { "epoch": 0.49, "grad_norm": 1.0225251768785275, "learning_rate": 1.0779393130761267e-05, "loss": 0.5306, "step": 6328 }, { "epoch": 0.49, "grad_norm": 1.118557230362184, "learning_rate": 1.0776888078383415e-05, "loss": 0.51, "step": 6329 }, { "epoch": 0.49, "grad_norm": 1.182975532049757, "learning_rate": 1.0774382976956613e-05, "loss": 0.5317, "step": 6330 }, { "epoch": 0.49, "grad_norm": 1.1382466433362692, "learning_rate": 1.0771877826639016e-05, "loss": 0.5587, "step": 6331 }, { "epoch": 0.49, "grad_norm": 1.1963473558820212, "learning_rate": 1.0769372627588792e-05, "loss": 0.5636, "step": 6332 }, { "epoch": 0.49, "grad_norm": 1.0906775235095851, "learning_rate": 1.0766867379964101e-05, "loss": 0.5241, "step": 6333 }, { "epoch": 0.49, "grad_norm": 1.1583652736642014, "learning_rate": 1.0764362083923117e-05, "loss": 0.5167, "step": 6334 }, { "epoch": 0.49, "grad_norm": 1.2013230143416829, "learning_rate": 1.0761856739624012e-05, "loss": 0.5582, "step": 6335 }, { "epoch": 0.49, "grad_norm": 1.1879429743884447, "learning_rate": 1.0759351347224961e-05, "loss": 0.5507, "step": 6336 }, { "epoch": 0.49, "grad_norm": 1.153090102745418, "learning_rate": 1.0756845906884141e-05, "loss": 0.5871, "step": 6337 }, { "epoch": 0.49, "grad_norm": 1.1621570737650624, "learning_rate": 1.0754340418759734e-05, "loss": 0.556, "step": 6338 }, { "epoch": 0.49, "grad_norm": 1.1537395752077297, "learning_rate": 1.0751834883009922e-05, "loss": 0.5413, "step": 6339 }, { "epoch": 0.49, "grad_norm": 1.2374101818126393, "learning_rate": 1.0749329299792898e-05, "loss": 0.6011, "step": 6340 }, { "epoch": 0.49, "grad_norm": 1.1537678340471695, "learning_rate": 1.074682366926685e-05, "loss": 0.5396, "step": 6341 }, { "epoch": 0.49, "grad_norm": 1.1881184724149834, "learning_rate": 1.074431799158997e-05, "loss": 0.5643, "step": 6342 }, { "epoch": 0.49, "grad_norm": 1.1101484154808001, "learning_rate": 1.0741812266920453e-05, "loss": 0.5376, "step": 6343 }, { "epoch": 0.49, "grad_norm": 1.2491237907270207, "learning_rate": 1.0739306495416502e-05, "loss": 0.578, "step": 6344 }, { "epoch": 0.49, "grad_norm": 1.1629287011291416, "learning_rate": 1.0736800677236316e-05, "loss": 0.5562, "step": 6345 }, { "epoch": 0.49, "grad_norm": 1.1693825728529563, "learning_rate": 1.0734294812538105e-05, "loss": 0.601, "step": 6346 }, { "epoch": 0.49, "grad_norm": 1.247201552688239, "learning_rate": 1.0731788901480071e-05, "loss": 0.5603, "step": 6347 }, { "epoch": 0.49, "grad_norm": 1.1101095427339125, "learning_rate": 1.072928294422043e-05, "loss": 0.5114, "step": 6348 }, { "epoch": 0.49, "grad_norm": 1.1596878596155982, "learning_rate": 1.0726776940917391e-05, "loss": 0.585, "step": 6349 }, { "epoch": 0.49, "grad_norm": 1.1022584488792138, "learning_rate": 1.0724270891729178e-05, "loss": 0.5236, "step": 6350 }, { "epoch": 0.49, "grad_norm": 1.0470160204291132, "learning_rate": 1.0721764796814005e-05, "loss": 0.4791, "step": 6351 }, { "epoch": 0.49, "grad_norm": 1.113496297846422, "learning_rate": 1.0719258656330095e-05, "loss": 0.5885, "step": 6352 }, { "epoch": 0.49, "grad_norm": 1.2683157868410164, "learning_rate": 1.071675247043568e-05, "loss": 0.6067, "step": 6353 }, { "epoch": 0.49, "grad_norm": 1.1515656511323173, "learning_rate": 1.0714246239288977e-05, "loss": 0.5735, "step": 6354 }, { "epoch": 0.49, "grad_norm": 1.187368988037953, "learning_rate": 1.0711739963048229e-05, "loss": 0.5395, "step": 6355 }, { "epoch": 0.49, "grad_norm": 1.1191194275983907, "learning_rate": 1.0709233641871663e-05, "loss": 0.567, "step": 6356 }, { "epoch": 0.49, "grad_norm": 1.2201148484844067, "learning_rate": 1.0706727275917519e-05, "loss": 0.583, "step": 6357 }, { "epoch": 0.49, "grad_norm": 1.1183576524556678, "learning_rate": 1.0704220865344036e-05, "loss": 0.5268, "step": 6358 }, { "epoch": 0.49, "grad_norm": 1.1074173538680858, "learning_rate": 1.0701714410309454e-05, "loss": 0.5301, "step": 6359 }, { "epoch": 0.49, "grad_norm": 1.2581336517718718, "learning_rate": 1.0699207910972022e-05, "loss": 0.6065, "step": 6360 }, { "epoch": 0.49, "grad_norm": 1.198044291898656, "learning_rate": 1.069670136748999e-05, "loss": 0.5631, "step": 6361 }, { "epoch": 0.49, "grad_norm": 1.2029244887482289, "learning_rate": 1.0694194780021603e-05, "loss": 0.567, "step": 6362 }, { "epoch": 0.49, "grad_norm": 1.1244606738611367, "learning_rate": 1.069168814872512e-05, "loss": 0.5642, "step": 6363 }, { "epoch": 0.49, "grad_norm": 1.1542943964123136, "learning_rate": 1.0689181473758793e-05, "loss": 0.5415, "step": 6364 }, { "epoch": 0.49, "grad_norm": 1.106997830916901, "learning_rate": 1.0686674755280886e-05, "loss": 0.5411, "step": 6365 }, { "epoch": 0.49, "grad_norm": 1.2113052794102166, "learning_rate": 1.068416799344966e-05, "loss": 0.5413, "step": 6366 }, { "epoch": 0.49, "grad_norm": 1.190706491560063, "learning_rate": 1.0681661188423373e-05, "loss": 0.5769, "step": 6367 }, { "epoch": 0.49, "grad_norm": 1.1948511567253493, "learning_rate": 1.0679154340360305e-05, "loss": 0.5415, "step": 6368 }, { "epoch": 0.49, "grad_norm": 1.2501847607441907, "learning_rate": 1.0676647449418713e-05, "loss": 0.5796, "step": 6369 }, { "epoch": 0.49, "grad_norm": 1.1075798339141731, "learning_rate": 1.067414051575688e-05, "loss": 0.5408, "step": 6370 }, { "epoch": 0.49, "grad_norm": 1.1636644745406046, "learning_rate": 1.0671633539533082e-05, "loss": 0.5405, "step": 6371 }, { "epoch": 0.49, "grad_norm": 1.2107047072577046, "learning_rate": 1.0669126520905588e-05, "loss": 0.5397, "step": 6372 }, { "epoch": 0.49, "grad_norm": 1.2012178737857797, "learning_rate": 1.0666619460032688e-05, "loss": 0.5712, "step": 6373 }, { "epoch": 0.49, "grad_norm": 1.1286669942893288, "learning_rate": 1.0664112357072658e-05, "loss": 0.4973, "step": 6374 }, { "epoch": 0.49, "grad_norm": 1.1936473842275321, "learning_rate": 1.0661605212183791e-05, "loss": 0.5289, "step": 6375 }, { "epoch": 0.49, "grad_norm": 1.1477808729509356, "learning_rate": 1.0659098025524374e-05, "loss": 0.5972, "step": 6376 }, { "epoch": 0.49, "grad_norm": 1.1864971142270393, "learning_rate": 1.0656590797252697e-05, "loss": 0.5544, "step": 6377 }, { "epoch": 0.49, "grad_norm": 1.2993287921302266, "learning_rate": 1.0654083527527056e-05, "loss": 0.593, "step": 6378 }, { "epoch": 0.49, "grad_norm": 1.2390407315010037, "learning_rate": 1.0651576216505747e-05, "loss": 0.5953, "step": 6379 }, { "epoch": 0.49, "grad_norm": 1.2142161130993105, "learning_rate": 1.0649068864347072e-05, "loss": 0.5534, "step": 6380 }, { "epoch": 0.5, "grad_norm": 1.2344725365069458, "learning_rate": 1.0646561471209328e-05, "loss": 0.5721, "step": 6381 }, { "epoch": 0.5, "grad_norm": 1.2690621785922231, "learning_rate": 1.0644054037250825e-05, "loss": 0.577, "step": 6382 }, { "epoch": 0.5, "grad_norm": 1.1976319790226606, "learning_rate": 1.0641546562629865e-05, "loss": 0.5779, "step": 6383 }, { "epoch": 0.5, "grad_norm": 1.260962481056273, "learning_rate": 1.0639039047504763e-05, "loss": 0.5964, "step": 6384 }, { "epoch": 0.5, "grad_norm": 1.1301798780290067, "learning_rate": 1.0636531492033826e-05, "loss": 0.5542, "step": 6385 }, { "epoch": 0.5, "grad_norm": 1.2013236097315585, "learning_rate": 1.0634023896375376e-05, "loss": 0.5422, "step": 6386 }, { "epoch": 0.5, "grad_norm": 1.219123245208269, "learning_rate": 1.0631516260687722e-05, "loss": 0.592, "step": 6387 }, { "epoch": 0.5, "grad_norm": 1.3344785967239943, "learning_rate": 1.062900858512919e-05, "loss": 0.5912, "step": 6388 }, { "epoch": 0.5, "grad_norm": 1.261559491100791, "learning_rate": 1.06265008698581e-05, "loss": 0.6231, "step": 6389 }, { "epoch": 0.5, "grad_norm": 1.128106120789023, "learning_rate": 1.0623993115032781e-05, "loss": 0.5315, "step": 6390 }, { "epoch": 0.5, "grad_norm": 1.1808282250558813, "learning_rate": 1.0621485320811552e-05, "loss": 0.5405, "step": 6391 }, { "epoch": 0.5, "grad_norm": 1.1439660665574554, "learning_rate": 1.0618977487352752e-05, "loss": 0.5359, "step": 6392 }, { "epoch": 0.5, "grad_norm": 1.2414900063071381, "learning_rate": 1.061646961481471e-05, "loss": 0.6137, "step": 6393 }, { "epoch": 0.5, "grad_norm": 1.136545541682416, "learning_rate": 1.0613961703355758e-05, "loss": 0.5369, "step": 6394 }, { "epoch": 0.5, "grad_norm": 1.221299414908821, "learning_rate": 1.0611453753134237e-05, "loss": 0.6023, "step": 6395 }, { "epoch": 0.5, "grad_norm": 1.0786747843665214, "learning_rate": 1.060894576430849e-05, "loss": 0.5066, "step": 6396 }, { "epoch": 0.5, "grad_norm": 1.2116762430493864, "learning_rate": 1.0606437737036849e-05, "loss": 0.6093, "step": 6397 }, { "epoch": 0.5, "grad_norm": 1.2845438509769538, "learning_rate": 1.0603929671477669e-05, "loss": 0.6411, "step": 6398 }, { "epoch": 0.5, "grad_norm": 1.1755855055182531, "learning_rate": 1.0601421567789289e-05, "loss": 0.5427, "step": 6399 }, { "epoch": 0.5, "grad_norm": 1.14649404492435, "learning_rate": 1.0598913426130067e-05, "loss": 0.5598, "step": 6400 }, { "epoch": 0.5, "grad_norm": 1.2899880525494083, "learning_rate": 1.0596405246658348e-05, "loss": 0.575, "step": 6401 }, { "epoch": 0.5, "grad_norm": 1.112014317310567, "learning_rate": 1.0593897029532487e-05, "loss": 0.5334, "step": 6402 }, { "epoch": 0.5, "grad_norm": 1.0481667591163069, "learning_rate": 1.0591388774910847e-05, "loss": 0.544, "step": 6403 }, { "epoch": 0.5, "grad_norm": 1.1072609328666656, "learning_rate": 1.0588880482951778e-05, "loss": 0.5097, "step": 6404 }, { "epoch": 0.5, "grad_norm": 1.1279984886686873, "learning_rate": 1.0586372153813649e-05, "loss": 0.574, "step": 6405 }, { "epoch": 0.5, "grad_norm": 1.1469497905740156, "learning_rate": 1.058386378765482e-05, "loss": 0.5426, "step": 6406 }, { "epoch": 0.5, "grad_norm": 1.216356617252466, "learning_rate": 1.0581355384633655e-05, "loss": 0.5958, "step": 6407 }, { "epoch": 0.5, "grad_norm": 1.1625914527442378, "learning_rate": 1.0578846944908528e-05, "loss": 0.4933, "step": 6408 }, { "epoch": 0.5, "grad_norm": 1.1409076706208394, "learning_rate": 1.0576338468637805e-05, "loss": 0.591, "step": 6409 }, { "epoch": 0.5, "grad_norm": 1.1989841293799026, "learning_rate": 1.0573829955979864e-05, "loss": 0.5128, "step": 6410 }, { "epoch": 0.5, "grad_norm": 1.2182205713084933, "learning_rate": 1.0571321407093076e-05, "loss": 0.6106, "step": 6411 }, { "epoch": 0.5, "grad_norm": 1.5133196414770653, "learning_rate": 1.0568812822135819e-05, "loss": 0.5609, "step": 6412 }, { "epoch": 0.5, "grad_norm": 1.1435371742894647, "learning_rate": 1.0566304201266473e-05, "loss": 0.5382, "step": 6413 }, { "epoch": 0.5, "grad_norm": 1.212682488069999, "learning_rate": 1.0563795544643422e-05, "loss": 0.5654, "step": 6414 }, { "epoch": 0.5, "grad_norm": 1.1751436044311612, "learning_rate": 1.0561286852425052e-05, "loss": 0.556, "step": 6415 }, { "epoch": 0.5, "grad_norm": 1.297821159379831, "learning_rate": 1.0558778124769747e-05, "loss": 0.5435, "step": 6416 }, { "epoch": 0.5, "grad_norm": 1.2000484516375873, "learning_rate": 1.0556269361835891e-05, "loss": 0.554, "step": 6417 }, { "epoch": 0.5, "grad_norm": 1.1933590253713942, "learning_rate": 1.0553760563781883e-05, "loss": 0.5305, "step": 6418 }, { "epoch": 0.5, "grad_norm": 1.1562032947901797, "learning_rate": 1.0551251730766114e-05, "loss": 0.5307, "step": 6419 }, { "epoch": 0.5, "grad_norm": 1.3157086434131653, "learning_rate": 1.054874286294698e-05, "loss": 0.5819, "step": 6420 }, { "epoch": 0.5, "grad_norm": 1.0745875609779298, "learning_rate": 1.0546233960482876e-05, "loss": 0.523, "step": 6421 }, { "epoch": 0.5, "grad_norm": 1.101834419182634, "learning_rate": 1.0543725023532205e-05, "loss": 0.5474, "step": 6422 }, { "epoch": 0.5, "grad_norm": 1.2312347933996854, "learning_rate": 1.0541216052253366e-05, "loss": 0.5225, "step": 6423 }, { "epoch": 0.5, "grad_norm": 1.143481974647742, "learning_rate": 1.0538707046804768e-05, "loss": 0.5907, "step": 6424 }, { "epoch": 0.5, "grad_norm": 1.3307627345838706, "learning_rate": 1.0536198007344816e-05, "loss": 0.5076, "step": 6425 }, { "epoch": 0.5, "grad_norm": 1.1568975439610916, "learning_rate": 1.0533688934031916e-05, "loss": 0.5882, "step": 6426 }, { "epoch": 0.5, "grad_norm": 1.1576403429377826, "learning_rate": 1.0531179827024478e-05, "loss": 0.5685, "step": 6427 }, { "epoch": 0.5, "grad_norm": 1.322009694336566, "learning_rate": 1.0528670686480918e-05, "loss": 0.5823, "step": 6428 }, { "epoch": 0.5, "grad_norm": 1.1246501060601453, "learning_rate": 1.052616151255965e-05, "loss": 0.5357, "step": 6429 }, { "epoch": 0.5, "grad_norm": 1.1874247577320272, "learning_rate": 1.0523652305419092e-05, "loss": 0.5853, "step": 6430 }, { "epoch": 0.5, "grad_norm": 1.2036285337522425, "learning_rate": 1.0521143065217664e-05, "loss": 0.6928, "step": 6431 }, { "epoch": 0.5, "grad_norm": 1.0968589738071735, "learning_rate": 1.0518633792113782e-05, "loss": 0.5713, "step": 6432 }, { "epoch": 0.5, "grad_norm": 1.1880031824647297, "learning_rate": 1.051612448626587e-05, "loss": 0.5931, "step": 6433 }, { "epoch": 0.5, "grad_norm": 1.1100371090811751, "learning_rate": 1.0513615147832364e-05, "loss": 0.525, "step": 6434 }, { "epoch": 0.5, "grad_norm": 1.1496380609581494, "learning_rate": 1.051110577697168e-05, "loss": 0.607, "step": 6435 }, { "epoch": 0.5, "grad_norm": 1.0676011876875386, "learning_rate": 1.050859637384225e-05, "loss": 0.5838, "step": 6436 }, { "epoch": 0.5, "grad_norm": 1.0737795955145297, "learning_rate": 1.050608693860251e-05, "loss": 0.496, "step": 6437 }, { "epoch": 0.5, "grad_norm": 1.1503433191906365, "learning_rate": 1.0503577471410889e-05, "loss": 0.5445, "step": 6438 }, { "epoch": 0.5, "grad_norm": 1.1423082342667716, "learning_rate": 1.0501067972425824e-05, "loss": 0.5432, "step": 6439 }, { "epoch": 0.5, "grad_norm": 1.1628046087885573, "learning_rate": 1.0498558441805753e-05, "loss": 0.5342, "step": 6440 }, { "epoch": 0.5, "grad_norm": 1.110299920296153, "learning_rate": 1.0496048879709116e-05, "loss": 0.5762, "step": 6441 }, { "epoch": 0.5, "grad_norm": 1.175374261622373, "learning_rate": 1.0493539286294352e-05, "loss": 0.5589, "step": 6442 }, { "epoch": 0.5, "grad_norm": 1.1793212827448147, "learning_rate": 1.0491029661719907e-05, "loss": 0.5008, "step": 6443 }, { "epoch": 0.5, "grad_norm": 2.108561267016032, "learning_rate": 1.0488520006144227e-05, "loss": 0.5186, "step": 6444 }, { "epoch": 0.5, "grad_norm": 1.2138178378789635, "learning_rate": 1.0486010319725759e-05, "loss": 0.6598, "step": 6445 }, { "epoch": 0.5, "grad_norm": 1.1794840150876267, "learning_rate": 1.0483500602622951e-05, "loss": 0.5719, "step": 6446 }, { "epoch": 0.5, "grad_norm": 1.1307855822480315, "learning_rate": 1.0480990854994257e-05, "loss": 0.542, "step": 6447 }, { "epoch": 0.5, "grad_norm": 1.1702967888423044, "learning_rate": 1.0478481076998127e-05, "loss": 0.5754, "step": 6448 }, { "epoch": 0.5, "grad_norm": 1.2183695957529324, "learning_rate": 1.0475971268793019e-05, "loss": 0.5705, "step": 6449 }, { "epoch": 0.5, "grad_norm": 1.1231250503531536, "learning_rate": 1.0473461430537388e-05, "loss": 0.5615, "step": 6450 }, { "epoch": 0.5, "grad_norm": 1.1772062974466484, "learning_rate": 1.0470951562389695e-05, "loss": 0.5459, "step": 6451 }, { "epoch": 0.5, "grad_norm": 1.1594341869618447, "learning_rate": 1.04684416645084e-05, "loss": 0.5658, "step": 6452 }, { "epoch": 0.5, "grad_norm": 1.1167754900747189, "learning_rate": 1.0465931737051964e-05, "loss": 0.5336, "step": 6453 }, { "epoch": 0.5, "grad_norm": 1.1716011744699424, "learning_rate": 1.0463421780178857e-05, "loss": 0.5337, "step": 6454 }, { "epoch": 0.5, "grad_norm": 1.2492031895644735, "learning_rate": 1.0460911794047542e-05, "loss": 0.5967, "step": 6455 }, { "epoch": 0.5, "grad_norm": 1.158638832002312, "learning_rate": 1.0458401778816482e-05, "loss": 0.5402, "step": 6456 }, { "epoch": 0.5, "grad_norm": 1.105887990653393, "learning_rate": 1.0455891734644158e-05, "loss": 0.5824, "step": 6457 }, { "epoch": 0.5, "grad_norm": 1.2536555719994473, "learning_rate": 1.0453381661689035e-05, "loss": 0.5849, "step": 6458 }, { "epoch": 0.5, "grad_norm": 1.2694543433736711, "learning_rate": 1.045087156010959e-05, "loss": 0.6392, "step": 6459 }, { "epoch": 0.5, "grad_norm": 1.0417108272092714, "learning_rate": 1.0448361430064296e-05, "loss": 0.5086, "step": 6460 }, { "epoch": 0.5, "grad_norm": 1.2602374472239826, "learning_rate": 1.044585127171163e-05, "loss": 0.5603, "step": 6461 }, { "epoch": 0.5, "grad_norm": 1.1535407105278217, "learning_rate": 1.0443341085210077e-05, "loss": 0.535, "step": 6462 }, { "epoch": 0.5, "grad_norm": 1.1858109458173658, "learning_rate": 1.0440830870718108e-05, "loss": 0.5779, "step": 6463 }, { "epoch": 0.5, "grad_norm": 1.2357698599338334, "learning_rate": 1.0438320628394219e-05, "loss": 0.4938, "step": 6464 }, { "epoch": 0.5, "grad_norm": 1.1479697280408172, "learning_rate": 1.0435810358396882e-05, "loss": 0.5555, "step": 6465 }, { "epoch": 0.5, "grad_norm": 1.1795676972648603, "learning_rate": 1.043330006088459e-05, "loss": 0.5988, "step": 6466 }, { "epoch": 0.5, "grad_norm": 1.2177928687459614, "learning_rate": 1.0430789736015829e-05, "loss": 0.6048, "step": 6467 }, { "epoch": 0.5, "grad_norm": 1.1370087872973356, "learning_rate": 1.0428279383949089e-05, "loss": 0.5043, "step": 6468 }, { "epoch": 0.5, "grad_norm": 1.164688454093573, "learning_rate": 1.0425769004842865e-05, "loss": 0.5256, "step": 6469 }, { "epoch": 0.5, "grad_norm": 1.2612197408836483, "learning_rate": 1.0423258598855645e-05, "loss": 0.5819, "step": 6470 }, { "epoch": 0.5, "grad_norm": 1.1981013058989534, "learning_rate": 1.0420748166145926e-05, "loss": 0.6174, "step": 6471 }, { "epoch": 0.5, "grad_norm": 1.3651490191625855, "learning_rate": 1.0418237706872206e-05, "loss": 0.596, "step": 6472 }, { "epoch": 0.5, "grad_norm": 1.0885770265716346, "learning_rate": 1.0415727221192977e-05, "loss": 0.5212, "step": 6473 }, { "epoch": 0.5, "grad_norm": 1.0616219202030865, "learning_rate": 1.041321670926675e-05, "loss": 0.4994, "step": 6474 }, { "epoch": 0.5, "grad_norm": 1.1637901141078357, "learning_rate": 1.0410706171252017e-05, "loss": 0.5353, "step": 6475 }, { "epoch": 0.5, "grad_norm": 1.2454489352011935, "learning_rate": 1.0408195607307283e-05, "loss": 0.559, "step": 6476 }, { "epoch": 0.5, "grad_norm": 1.1409057376215426, "learning_rate": 1.0405685017591057e-05, "loss": 0.5341, "step": 6477 }, { "epoch": 0.5, "grad_norm": 1.17630221462805, "learning_rate": 1.040317440226184e-05, "loss": 0.5392, "step": 6478 }, { "epoch": 0.5, "grad_norm": 1.2302056409702453, "learning_rate": 1.0400663761478145e-05, "loss": 0.5698, "step": 6479 }, { "epoch": 0.5, "grad_norm": 1.1188492587080476, "learning_rate": 1.039815309539848e-05, "loss": 0.5897, "step": 6480 }, { "epoch": 0.5, "grad_norm": 1.1265768021139977, "learning_rate": 1.0395642404181355e-05, "loss": 0.5319, "step": 6481 }, { "epoch": 0.5, "grad_norm": 1.2868430711621264, "learning_rate": 1.0393131687985283e-05, "loss": 0.5368, "step": 6482 }, { "epoch": 0.5, "grad_norm": 1.2184903773075373, "learning_rate": 1.039062094696878e-05, "loss": 0.5443, "step": 6483 }, { "epoch": 0.5, "grad_norm": 1.1159259101823455, "learning_rate": 1.038811018129036e-05, "loss": 0.4983, "step": 6484 }, { "epoch": 0.5, "grad_norm": 1.1391438312562574, "learning_rate": 1.0385599391108546e-05, "loss": 0.5673, "step": 6485 }, { "epoch": 0.5, "grad_norm": 1.1781463661583662, "learning_rate": 1.0383088576581847e-05, "loss": 0.5897, "step": 6486 }, { "epoch": 0.5, "grad_norm": 1.1682227633546378, "learning_rate": 1.0380577737868795e-05, "loss": 0.5534, "step": 6487 }, { "epoch": 0.5, "grad_norm": 1.0834568699067162, "learning_rate": 1.0378066875127904e-05, "loss": 0.5223, "step": 6488 }, { "epoch": 0.5, "grad_norm": 1.211989997495161, "learning_rate": 1.03755559885177e-05, "loss": 0.5408, "step": 6489 }, { "epoch": 0.5, "grad_norm": 1.2030624274293904, "learning_rate": 1.0373045078196713e-05, "loss": 0.5529, "step": 6490 }, { "epoch": 0.5, "grad_norm": 1.1346852574474686, "learning_rate": 1.037053414432346e-05, "loss": 0.5373, "step": 6491 }, { "epoch": 0.5, "grad_norm": 0.9813056504814902, "learning_rate": 1.0368023187056477e-05, "loss": 0.4685, "step": 6492 }, { "epoch": 0.5, "grad_norm": 1.224564303552805, "learning_rate": 1.0365512206554294e-05, "loss": 0.5302, "step": 6493 }, { "epoch": 0.5, "grad_norm": 1.2173315254374182, "learning_rate": 1.0363001202975439e-05, "loss": 0.5652, "step": 6494 }, { "epoch": 0.5, "grad_norm": 1.1619547765142086, "learning_rate": 1.0360490176478443e-05, "loss": 0.5281, "step": 6495 }, { "epoch": 0.5, "grad_norm": 1.3244909046216498, "learning_rate": 1.0357979127221842e-05, "loss": 0.618, "step": 6496 }, { "epoch": 0.5, "grad_norm": 1.1800158214316623, "learning_rate": 1.0355468055364171e-05, "loss": 0.5515, "step": 6497 }, { "epoch": 0.5, "grad_norm": 1.1716459940628152, "learning_rate": 1.0352956961063972e-05, "loss": 0.5988, "step": 6498 }, { "epoch": 0.5, "grad_norm": 1.2110682755272661, "learning_rate": 1.0350445844479775e-05, "loss": 0.5478, "step": 6499 }, { "epoch": 0.5, "grad_norm": 1.2359333100953198, "learning_rate": 1.0347934705770126e-05, "loss": 0.6416, "step": 6500 }, { "epoch": 0.5, "grad_norm": 1.0897027474800642, "learning_rate": 1.0345423545093563e-05, "loss": 0.5335, "step": 6501 }, { "epoch": 0.5, "grad_norm": 1.228188135984859, "learning_rate": 1.0342912362608628e-05, "loss": 0.5646, "step": 6502 }, { "epoch": 0.5, "grad_norm": 1.122441667770039, "learning_rate": 1.0340401158473869e-05, "loss": 0.5241, "step": 6503 }, { "epoch": 0.5, "grad_norm": 1.106340314120338, "learning_rate": 1.0337889932847828e-05, "loss": 0.5667, "step": 6504 }, { "epoch": 0.5, "grad_norm": 1.1356259366054169, "learning_rate": 1.0335378685889053e-05, "loss": 0.5444, "step": 6505 }, { "epoch": 0.5, "grad_norm": 1.2017887671328018, "learning_rate": 1.033286741775609e-05, "loss": 0.537, "step": 6506 }, { "epoch": 0.5, "grad_norm": 1.2231736231102452, "learning_rate": 1.0330356128607489e-05, "loss": 0.5794, "step": 6507 }, { "epoch": 0.5, "grad_norm": 1.1158043361957681, "learning_rate": 1.0327844818601802e-05, "loss": 0.5099, "step": 6508 }, { "epoch": 0.5, "grad_norm": 1.1514681835706855, "learning_rate": 1.0325333487897579e-05, "loss": 0.5287, "step": 6509 }, { "epoch": 0.51, "grad_norm": 1.2631800076050756, "learning_rate": 1.0322822136653376e-05, "loss": 0.5816, "step": 6510 }, { "epoch": 0.51, "grad_norm": 1.2516436737444387, "learning_rate": 1.0320310765027746e-05, "loss": 0.5551, "step": 6511 }, { "epoch": 0.51, "grad_norm": 1.0623838697808952, "learning_rate": 1.0317799373179242e-05, "loss": 0.5489, "step": 6512 }, { "epoch": 0.51, "grad_norm": 1.1943054444856593, "learning_rate": 1.0315287961266427e-05, "loss": 0.6022, "step": 6513 }, { "epoch": 0.51, "grad_norm": 1.1144732067332557, "learning_rate": 1.0312776529447857e-05, "loss": 0.5251, "step": 6514 }, { "epoch": 0.51, "grad_norm": 1.0348766021144762, "learning_rate": 1.031026507788209e-05, "loss": 0.5296, "step": 6515 }, { "epoch": 0.51, "grad_norm": 1.0781295333988135, "learning_rate": 1.0307753606727685e-05, "loss": 0.5198, "step": 6516 }, { "epoch": 0.51, "grad_norm": 1.2003042669791053, "learning_rate": 1.0305242116143209e-05, "loss": 0.537, "step": 6517 }, { "epoch": 0.51, "grad_norm": 1.1016393864630574, "learning_rate": 1.0302730606287226e-05, "loss": 0.5368, "step": 6518 }, { "epoch": 0.51, "grad_norm": 1.153430594292088, "learning_rate": 1.0300219077318294e-05, "loss": 0.5304, "step": 6519 }, { "epoch": 0.51, "grad_norm": 1.2458989580066322, "learning_rate": 1.0297707529394984e-05, "loss": 0.5492, "step": 6520 }, { "epoch": 0.51, "grad_norm": 1.1302176913153803, "learning_rate": 1.0295195962675864e-05, "loss": 0.5189, "step": 6521 }, { "epoch": 0.51, "grad_norm": 1.1454132639442505, "learning_rate": 1.0292684377319495e-05, "loss": 0.5302, "step": 6522 }, { "epoch": 0.51, "grad_norm": 1.1407558940874796, "learning_rate": 1.0290172773484455e-05, "loss": 0.5018, "step": 6523 }, { "epoch": 0.51, "grad_norm": 1.0986633567802637, "learning_rate": 1.0287661151329312e-05, "loss": 0.5305, "step": 6524 }, { "epoch": 0.51, "grad_norm": 1.069648369561804, "learning_rate": 1.0285149511012632e-05, "loss": 0.5605, "step": 6525 }, { "epoch": 0.51, "grad_norm": 1.2477564705169022, "learning_rate": 1.0282637852692996e-05, "loss": 0.5725, "step": 6526 }, { "epoch": 0.51, "grad_norm": 1.2772713736931434, "learning_rate": 1.028012617652897e-05, "loss": 0.5525, "step": 6527 }, { "epoch": 0.51, "grad_norm": 1.1024198509891407, "learning_rate": 1.0277614482679136e-05, "loss": 0.5512, "step": 6528 }, { "epoch": 0.51, "grad_norm": 1.1801470436192658, "learning_rate": 1.027510277130207e-05, "loss": 0.5535, "step": 6529 }, { "epoch": 0.51, "grad_norm": 1.1889222813063238, "learning_rate": 1.027259104255634e-05, "loss": 0.5709, "step": 6530 }, { "epoch": 0.51, "grad_norm": 1.1782939334435116, "learning_rate": 1.027007929660054e-05, "loss": 0.5268, "step": 6531 }, { "epoch": 0.51, "grad_norm": 1.1985508055758203, "learning_rate": 1.0267567533593233e-05, "loss": 0.5682, "step": 6532 }, { "epoch": 0.51, "grad_norm": 1.0923754092476652, "learning_rate": 1.026505575369301e-05, "loss": 0.4851, "step": 6533 }, { "epoch": 0.51, "grad_norm": 0.9899474862649464, "learning_rate": 1.0262543957058451e-05, "loss": 0.5371, "step": 6534 }, { "epoch": 0.51, "grad_norm": 1.1415644917861016, "learning_rate": 1.0260032143848134e-05, "loss": 0.5193, "step": 6535 }, { "epoch": 0.51, "grad_norm": 1.1370661358751857, "learning_rate": 1.0257520314220647e-05, "loss": 0.54, "step": 6536 }, { "epoch": 0.51, "grad_norm": 1.0558569182361355, "learning_rate": 1.0255008468334575e-05, "loss": 0.5174, "step": 6537 }, { "epoch": 0.51, "grad_norm": 1.1331114801945867, "learning_rate": 1.0252496606348503e-05, "loss": 0.5731, "step": 6538 }, { "epoch": 0.51, "grad_norm": 1.216038009579673, "learning_rate": 1.024998472842102e-05, "loss": 0.5394, "step": 6539 }, { "epoch": 0.51, "grad_norm": 1.0205418047194605, "learning_rate": 1.0247472834710708e-05, "loss": 0.5132, "step": 6540 }, { "epoch": 0.51, "grad_norm": 1.1261061422619703, "learning_rate": 1.024496092537616e-05, "loss": 0.518, "step": 6541 }, { "epoch": 0.51, "grad_norm": 1.2768046790209342, "learning_rate": 1.0242449000575963e-05, "loss": 0.6223, "step": 6542 }, { "epoch": 0.51, "grad_norm": 1.143730638684103, "learning_rate": 1.0239937060468713e-05, "loss": 0.5265, "step": 6543 }, { "epoch": 0.51, "grad_norm": 1.105533126196717, "learning_rate": 1.0237425105212996e-05, "loss": 0.5309, "step": 6544 }, { "epoch": 0.51, "grad_norm": 1.1376822147515908, "learning_rate": 1.0234913134967409e-05, "loss": 0.5072, "step": 6545 }, { "epoch": 0.51, "grad_norm": 1.3780283956771728, "learning_rate": 1.0232401149890544e-05, "loss": 0.6105, "step": 6546 }, { "epoch": 0.51, "grad_norm": 1.1719564282419253, "learning_rate": 1.0229889150140991e-05, "loss": 0.5812, "step": 6547 }, { "epoch": 0.51, "grad_norm": 1.1801045167024367, "learning_rate": 1.0227377135877354e-05, "loss": 0.6039, "step": 6548 }, { "epoch": 0.51, "grad_norm": 1.3057707441253552, "learning_rate": 1.0224865107258225e-05, "loss": 0.5544, "step": 6549 }, { "epoch": 0.51, "grad_norm": 1.206555615385461, "learning_rate": 1.0222353064442201e-05, "loss": 0.5313, "step": 6550 }, { "epoch": 0.51, "grad_norm": 1.1341920016726996, "learning_rate": 1.0219841007587881e-05, "loss": 0.4894, "step": 6551 }, { "epoch": 0.51, "grad_norm": 1.1782227574272128, "learning_rate": 1.0217328936853864e-05, "loss": 0.5468, "step": 6552 }, { "epoch": 0.51, "grad_norm": 1.0071137245799828, "learning_rate": 1.021481685239875e-05, "loss": 0.5132, "step": 6553 }, { "epoch": 0.51, "grad_norm": 1.2673552189287915, "learning_rate": 1.0212304754381139e-05, "loss": 0.5952, "step": 6554 }, { "epoch": 0.51, "grad_norm": 1.3368441087632277, "learning_rate": 1.0209792642959634e-05, "loss": 0.5348, "step": 6555 }, { "epoch": 0.51, "grad_norm": 1.0995009048652482, "learning_rate": 1.0207280518292837e-05, "loss": 0.5244, "step": 6556 }, { "epoch": 0.51, "grad_norm": 1.1372245371197143, "learning_rate": 1.0204768380539352e-05, "loss": 0.5116, "step": 6557 }, { "epoch": 0.51, "grad_norm": 1.155992015203863, "learning_rate": 1.0202256229857781e-05, "loss": 0.4794, "step": 6558 }, { "epoch": 0.51, "grad_norm": 1.254772184371548, "learning_rate": 1.0199744066406735e-05, "loss": 0.5829, "step": 6559 }, { "epoch": 0.51, "grad_norm": 1.09942245927465, "learning_rate": 1.0197231890344809e-05, "loss": 0.544, "step": 6560 }, { "epoch": 0.51, "grad_norm": 1.2019369526575665, "learning_rate": 1.0194719701830622e-05, "loss": 0.5544, "step": 6561 }, { "epoch": 0.51, "grad_norm": 1.126106724490322, "learning_rate": 1.0192207501022771e-05, "loss": 0.5929, "step": 6562 }, { "epoch": 0.51, "grad_norm": 1.2013904899826602, "learning_rate": 1.0189695288079873e-05, "loss": 0.5481, "step": 6563 }, { "epoch": 0.51, "grad_norm": 1.2206449693178236, "learning_rate": 1.0187183063160531e-05, "loss": 0.6079, "step": 6564 }, { "epoch": 0.51, "grad_norm": 1.114337941664481, "learning_rate": 1.0184670826423355e-05, "loss": 0.5575, "step": 6565 }, { "epoch": 0.51, "grad_norm": 1.2035699987463646, "learning_rate": 1.018215857802696e-05, "loss": 0.5743, "step": 6566 }, { "epoch": 0.51, "grad_norm": 1.1147611185098123, "learning_rate": 1.017964631812995e-05, "loss": 0.552, "step": 6567 }, { "epoch": 0.51, "grad_norm": 1.2153862726131532, "learning_rate": 1.0177134046890944e-05, "loss": 0.5884, "step": 6568 }, { "epoch": 0.51, "grad_norm": 1.1977840626617977, "learning_rate": 1.0174621764468553e-05, "loss": 0.5469, "step": 6569 }, { "epoch": 0.51, "grad_norm": 1.0818145204384482, "learning_rate": 1.0172109471021385e-05, "loss": 0.5288, "step": 6570 }, { "epoch": 0.51, "grad_norm": 1.2089178655568527, "learning_rate": 1.0169597166708061e-05, "loss": 0.5069, "step": 6571 }, { "epoch": 0.51, "grad_norm": 1.1449184348693413, "learning_rate": 1.0167084851687193e-05, "loss": 0.5405, "step": 6572 }, { "epoch": 0.51, "grad_norm": 1.3064836484759563, "learning_rate": 1.0164572526117396e-05, "loss": 0.5443, "step": 6573 }, { "epoch": 0.51, "grad_norm": 1.2010027014219982, "learning_rate": 1.0162060190157285e-05, "loss": 0.5643, "step": 6574 }, { "epoch": 0.51, "grad_norm": 1.2232356054697529, "learning_rate": 1.015954784396548e-05, "loss": 0.5998, "step": 6575 }, { "epoch": 0.51, "grad_norm": 1.2996876102859292, "learning_rate": 1.0157035487700592e-05, "loss": 0.6302, "step": 6576 }, { "epoch": 0.51, "grad_norm": 1.1514674588739016, "learning_rate": 1.0154523121521249e-05, "loss": 0.5674, "step": 6577 }, { "epoch": 0.51, "grad_norm": 1.0995095785330056, "learning_rate": 1.0152010745586064e-05, "loss": 0.5496, "step": 6578 }, { "epoch": 0.51, "grad_norm": 1.1704674978457041, "learning_rate": 1.0149498360053656e-05, "loss": 0.5491, "step": 6579 }, { "epoch": 0.51, "grad_norm": 1.2343202892685265, "learning_rate": 1.014698596508264e-05, "loss": 0.5643, "step": 6580 }, { "epoch": 0.51, "grad_norm": 1.1821688393580443, "learning_rate": 1.0144473560831645e-05, "loss": 0.5698, "step": 6581 }, { "epoch": 0.51, "grad_norm": 1.1871477407699609, "learning_rate": 1.0141961147459289e-05, "loss": 0.5708, "step": 6582 }, { "epoch": 0.51, "grad_norm": 1.1251248184473497, "learning_rate": 1.0139448725124194e-05, "loss": 0.5394, "step": 6583 }, { "epoch": 0.51, "grad_norm": 1.2159984044547656, "learning_rate": 1.0136936293984983e-05, "loss": 0.5143, "step": 6584 }, { "epoch": 0.51, "grad_norm": 1.1610665394676354, "learning_rate": 1.0134423854200274e-05, "loss": 0.5475, "step": 6585 }, { "epoch": 0.51, "grad_norm": 1.0504687103266939, "learning_rate": 1.0131911405928694e-05, "loss": 0.4593, "step": 6586 }, { "epoch": 0.51, "grad_norm": 1.0737200880047597, "learning_rate": 1.0129398949328868e-05, "loss": 0.5123, "step": 6587 }, { "epoch": 0.51, "grad_norm": 1.230050927325156, "learning_rate": 1.0126886484559417e-05, "loss": 0.5036, "step": 6588 }, { "epoch": 0.51, "grad_norm": 1.101732011021924, "learning_rate": 1.012437401177897e-05, "loss": 0.5352, "step": 6589 }, { "epoch": 0.51, "grad_norm": 1.1523324222331206, "learning_rate": 1.0121861531146147e-05, "loss": 0.5841, "step": 6590 }, { "epoch": 0.51, "grad_norm": 1.2120752219755637, "learning_rate": 1.0119349042819578e-05, "loss": 0.5782, "step": 6591 }, { "epoch": 0.51, "grad_norm": 1.2562836542888172, "learning_rate": 1.0116836546957891e-05, "loss": 0.5675, "step": 6592 }, { "epoch": 0.51, "grad_norm": 1.190674203571873, "learning_rate": 1.0114324043719705e-05, "loss": 0.5856, "step": 6593 }, { "epoch": 0.51, "grad_norm": 1.1906225910312582, "learning_rate": 1.0111811533263656e-05, "loss": 0.5644, "step": 6594 }, { "epoch": 0.51, "grad_norm": 1.2795944217278874, "learning_rate": 1.0109299015748364e-05, "loss": 0.5661, "step": 6595 }, { "epoch": 0.51, "grad_norm": 1.3449736944980009, "learning_rate": 1.0106786491332464e-05, "loss": 0.5914, "step": 6596 }, { "epoch": 0.51, "grad_norm": 1.2152631225913308, "learning_rate": 1.0104273960174584e-05, "loss": 0.5965, "step": 6597 }, { "epoch": 0.51, "grad_norm": 1.1265948964194838, "learning_rate": 1.0101761422433348e-05, "loss": 0.5187, "step": 6598 }, { "epoch": 0.51, "grad_norm": 1.2651839253000516, "learning_rate": 1.0099248878267387e-05, "loss": 0.5677, "step": 6599 }, { "epoch": 0.51, "grad_norm": 1.195605971269191, "learning_rate": 1.0096736327835335e-05, "loss": 0.5369, "step": 6600 }, { "epoch": 0.51, "grad_norm": 1.1175418105187145, "learning_rate": 1.0094223771295817e-05, "loss": 0.5727, "step": 6601 }, { "epoch": 0.51, "grad_norm": 1.2082615973565014, "learning_rate": 1.0091711208807471e-05, "loss": 0.5648, "step": 6602 }, { "epoch": 0.51, "grad_norm": 1.1560729638736833, "learning_rate": 1.008919864052892e-05, "loss": 0.5104, "step": 6603 }, { "epoch": 0.51, "grad_norm": 1.1925774992545393, "learning_rate": 1.0086686066618795e-05, "loss": 0.5291, "step": 6604 }, { "epoch": 0.51, "grad_norm": 1.1949278268606622, "learning_rate": 1.0084173487235737e-05, "loss": 0.6123, "step": 6605 }, { "epoch": 0.51, "grad_norm": 1.095876588197386, "learning_rate": 1.008166090253837e-05, "loss": 0.5442, "step": 6606 }, { "epoch": 0.51, "grad_norm": 1.2621101268410695, "learning_rate": 1.0079148312685334e-05, "loss": 0.5848, "step": 6607 }, { "epoch": 0.51, "grad_norm": 1.172862997007255, "learning_rate": 1.007663571783525e-05, "loss": 0.5713, "step": 6608 }, { "epoch": 0.51, "grad_norm": 1.1865492076194548, "learning_rate": 1.007412311814676e-05, "loss": 0.5255, "step": 6609 }, { "epoch": 0.51, "grad_norm": 1.195853815113784, "learning_rate": 1.0071610513778494e-05, "loss": 0.5771, "step": 6610 }, { "epoch": 0.51, "grad_norm": 1.0966509359230607, "learning_rate": 1.0069097904889087e-05, "loss": 0.5368, "step": 6611 }, { "epoch": 0.51, "grad_norm": 1.1946093916049472, "learning_rate": 1.0066585291637176e-05, "loss": 0.6046, "step": 6612 }, { "epoch": 0.51, "grad_norm": 1.1394684558735477, "learning_rate": 1.006407267418139e-05, "loss": 0.5404, "step": 6613 }, { "epoch": 0.51, "grad_norm": 1.1732227967134585, "learning_rate": 1.0061560052680363e-05, "loss": 0.5562, "step": 6614 }, { "epoch": 0.51, "grad_norm": 1.1894899559895769, "learning_rate": 1.0059047427292736e-05, "loss": 0.5676, "step": 6615 }, { "epoch": 0.51, "grad_norm": 1.2033830836535635, "learning_rate": 1.0056534798177138e-05, "loss": 0.5798, "step": 6616 }, { "epoch": 0.51, "grad_norm": 1.2348429118958113, "learning_rate": 1.005402216549221e-05, "loss": 0.5515, "step": 6617 }, { "epoch": 0.51, "grad_norm": 1.103960173869838, "learning_rate": 1.005150952939658e-05, "loss": 0.5235, "step": 6618 }, { "epoch": 0.51, "grad_norm": 1.1810135619881144, "learning_rate": 1.0048996890048886e-05, "loss": 0.527, "step": 6619 }, { "epoch": 0.51, "grad_norm": 1.0970790336361398, "learning_rate": 1.004648424760777e-05, "loss": 0.4899, "step": 6620 }, { "epoch": 0.51, "grad_norm": 1.1840508211092986, "learning_rate": 1.0043971602231862e-05, "loss": 0.5785, "step": 6621 }, { "epoch": 0.51, "grad_norm": 1.1611851198367176, "learning_rate": 1.0041458954079801e-05, "loss": 0.5741, "step": 6622 }, { "epoch": 0.51, "grad_norm": 1.105816358499722, "learning_rate": 1.003894630331022e-05, "loss": 0.5021, "step": 6623 }, { "epoch": 0.51, "grad_norm": 1.1876195546250112, "learning_rate": 1.0036433650081759e-05, "loss": 0.518, "step": 6624 }, { "epoch": 0.51, "grad_norm": 1.235508699939931, "learning_rate": 1.0033920994553054e-05, "loss": 0.5556, "step": 6625 }, { "epoch": 0.51, "grad_norm": 1.1087737267330544, "learning_rate": 1.003140833688274e-05, "loss": 0.4528, "step": 6626 }, { "epoch": 0.51, "grad_norm": 1.1096194897809544, "learning_rate": 1.0028895677229458e-05, "loss": 0.5449, "step": 6627 }, { "epoch": 0.51, "grad_norm": 1.111613634170427, "learning_rate": 1.0026383015751844e-05, "loss": 0.5611, "step": 6628 }, { "epoch": 0.51, "grad_norm": 1.209598071203132, "learning_rate": 1.0023870352608529e-05, "loss": 0.5912, "step": 6629 }, { "epoch": 0.51, "grad_norm": 1.1306951268485905, "learning_rate": 1.002135768795816e-05, "loss": 0.4876, "step": 6630 }, { "epoch": 0.51, "grad_norm": 1.1687027273970612, "learning_rate": 1.0018845021959368e-05, "loss": 0.5534, "step": 6631 }, { "epoch": 0.51, "grad_norm": 1.1685012573085252, "learning_rate": 1.0016332354770792e-05, "loss": 0.5429, "step": 6632 }, { "epoch": 0.51, "grad_norm": 1.1147397843598823, "learning_rate": 1.0013819686551074e-05, "loss": 0.5024, "step": 6633 }, { "epoch": 0.51, "grad_norm": 1.1629865141043234, "learning_rate": 1.0011307017458843e-05, "loss": 0.5447, "step": 6634 }, { "epoch": 0.51, "grad_norm": 1.1114728193164345, "learning_rate": 1.0008794347652744e-05, "loss": 0.5397, "step": 6635 }, { "epoch": 0.51, "grad_norm": 1.0910426282972538, "learning_rate": 1.0006281677291411e-05, "loss": 0.5077, "step": 6636 }, { "epoch": 0.51, "grad_norm": 1.199703106312897, "learning_rate": 1.0003769006533486e-05, "loss": 0.5656, "step": 6637 }, { "epoch": 0.51, "grad_norm": 1.1136969074819794, "learning_rate": 1.0001256335537604e-05, "loss": 0.5386, "step": 6638 }, { "epoch": 0.52, "grad_norm": 1.123661887901538, "learning_rate": 9.998743664462401e-06, "loss": 0.5245, "step": 6639 }, { "epoch": 0.52, "grad_norm": 1.1360052956941233, "learning_rate": 9.996230993466517e-06, "loss": 0.5185, "step": 6640 }, { "epoch": 0.52, "grad_norm": 1.1896329095578912, "learning_rate": 9.993718322708592e-06, "loss": 0.5385, "step": 6641 }, { "epoch": 0.52, "grad_norm": 1.1937608808251252, "learning_rate": 9.99120565234726e-06, "loss": 0.5775, "step": 6642 }, { "epoch": 0.52, "grad_norm": 1.1417846532866414, "learning_rate": 9.988692982541159e-06, "loss": 0.5625, "step": 6643 }, { "epoch": 0.52, "grad_norm": 1.2271778482631197, "learning_rate": 9.986180313448933e-06, "loss": 0.558, "step": 6644 }, { "epoch": 0.52, "grad_norm": 1.1966580067235342, "learning_rate": 9.98366764522921e-06, "loss": 0.5977, "step": 6645 }, { "epoch": 0.52, "grad_norm": 1.1479942867854638, "learning_rate": 9.981154978040636e-06, "loss": 0.5413, "step": 6646 }, { "epoch": 0.52, "grad_norm": 1.1383323621303572, "learning_rate": 9.978642312041843e-06, "loss": 0.6055, "step": 6647 }, { "epoch": 0.52, "grad_norm": 1.176632899069099, "learning_rate": 9.976129647391471e-06, "loss": 0.5126, "step": 6648 }, { "epoch": 0.52, "grad_norm": 1.1551517219305074, "learning_rate": 9.973616984248162e-06, "loss": 0.5925, "step": 6649 }, { "epoch": 0.52, "grad_norm": 1.2088785202200736, "learning_rate": 9.971104322770545e-06, "loss": 0.6194, "step": 6650 }, { "epoch": 0.52, "grad_norm": 1.1984971947953453, "learning_rate": 9.968591663117263e-06, "loss": 0.485, "step": 6651 }, { "epoch": 0.52, "grad_norm": 1.1998027659293227, "learning_rate": 9.966079005446949e-06, "loss": 0.5658, "step": 6652 }, { "epoch": 0.52, "grad_norm": 1.2112554317388005, "learning_rate": 9.963566349918243e-06, "loss": 0.5284, "step": 6653 }, { "epoch": 0.52, "grad_norm": 1.0795774213265117, "learning_rate": 9.961053696689785e-06, "loss": 0.464, "step": 6654 }, { "epoch": 0.52, "grad_norm": 1.3131545569076983, "learning_rate": 9.958541045920204e-06, "loss": 0.5983, "step": 6655 }, { "epoch": 0.52, "grad_norm": 1.0439433141534018, "learning_rate": 9.956028397768143e-06, "loss": 0.5037, "step": 6656 }, { "epoch": 0.52, "grad_norm": 1.0522319323655531, "learning_rate": 9.953515752392233e-06, "loss": 0.5325, "step": 6657 }, { "epoch": 0.52, "grad_norm": 1.0810909110102431, "learning_rate": 9.951003109951114e-06, "loss": 0.5279, "step": 6658 }, { "epoch": 0.52, "grad_norm": 1.109893784982816, "learning_rate": 9.948490470603425e-06, "loss": 0.5258, "step": 6659 }, { "epoch": 0.52, "grad_norm": 1.1533490467287377, "learning_rate": 9.945977834507796e-06, "loss": 0.5382, "step": 6660 }, { "epoch": 0.52, "grad_norm": 1.0864777353272719, "learning_rate": 9.943465201822865e-06, "loss": 0.5156, "step": 6661 }, { "epoch": 0.52, "grad_norm": 1.116589899887985, "learning_rate": 9.940952572707267e-06, "loss": 0.5135, "step": 6662 }, { "epoch": 0.52, "grad_norm": 1.1224579701700543, "learning_rate": 9.938439947319639e-06, "loss": 0.5442, "step": 6663 }, { "epoch": 0.52, "grad_norm": 1.2587810601926352, "learning_rate": 9.935927325818616e-06, "loss": 0.5419, "step": 6664 }, { "epoch": 0.52, "grad_norm": 1.0638526832876256, "learning_rate": 9.933414708362829e-06, "loss": 0.548, "step": 6665 }, { "epoch": 0.52, "grad_norm": 1.3376120172812929, "learning_rate": 9.930902095110916e-06, "loss": 0.5877, "step": 6666 }, { "epoch": 0.52, "grad_norm": 1.0784191891208117, "learning_rate": 9.928389486221507e-06, "loss": 0.49, "step": 6667 }, { "epoch": 0.52, "grad_norm": 1.232844695817907, "learning_rate": 9.925876881853242e-06, "loss": 0.5598, "step": 6668 }, { "epoch": 0.52, "grad_norm": 1.2627350106898854, "learning_rate": 9.92336428216475e-06, "loss": 0.6332, "step": 6669 }, { "epoch": 0.52, "grad_norm": 1.175360670940941, "learning_rate": 9.920851687314673e-06, "loss": 0.5429, "step": 6670 }, { "epoch": 0.52, "grad_norm": 1.2129608477966873, "learning_rate": 9.918339097461631e-06, "loss": 0.5597, "step": 6671 }, { "epoch": 0.52, "grad_norm": 1.2944959397412539, "learning_rate": 9.915826512764265e-06, "loss": 0.5731, "step": 6672 }, { "epoch": 0.52, "grad_norm": 1.1980466799740528, "learning_rate": 9.913313933381203e-06, "loss": 0.5604, "step": 6673 }, { "epoch": 0.52, "grad_norm": 1.1586571972503563, "learning_rate": 9.91080135947108e-06, "loss": 0.5591, "step": 6674 }, { "epoch": 0.52, "grad_norm": 1.2079291106038474, "learning_rate": 9.908288791192532e-06, "loss": 0.6064, "step": 6675 }, { "epoch": 0.52, "grad_norm": 1.1589997046587932, "learning_rate": 9.905776228704185e-06, "loss": 0.5198, "step": 6676 }, { "epoch": 0.52, "grad_norm": 1.0285197442273248, "learning_rate": 9.903263672164668e-06, "loss": 0.5242, "step": 6677 }, { "epoch": 0.52, "grad_norm": 1.135374657556637, "learning_rate": 9.900751121732613e-06, "loss": 0.5524, "step": 6678 }, { "epoch": 0.52, "grad_norm": 1.1864209040542244, "learning_rate": 9.898238577566654e-06, "loss": 0.5433, "step": 6679 }, { "epoch": 0.52, "grad_norm": 1.193954943495299, "learning_rate": 9.895726039825421e-06, "loss": 0.5374, "step": 6680 }, { "epoch": 0.52, "grad_norm": 1.1034898603014098, "learning_rate": 9.89321350866754e-06, "loss": 0.5038, "step": 6681 }, { "epoch": 0.52, "grad_norm": 1.1904055035131913, "learning_rate": 9.890700984251638e-06, "loss": 0.5619, "step": 6682 }, { "epoch": 0.52, "grad_norm": 1.0613637907671543, "learning_rate": 9.888188466736347e-06, "loss": 0.5205, "step": 6683 }, { "epoch": 0.52, "grad_norm": 1.1402003921928328, "learning_rate": 9.885675956280295e-06, "loss": 0.5368, "step": 6684 }, { "epoch": 0.52, "grad_norm": 1.2505179762526284, "learning_rate": 9.883163453042115e-06, "loss": 0.6086, "step": 6685 }, { "epoch": 0.52, "grad_norm": 1.2176526827315106, "learning_rate": 9.880650957180427e-06, "loss": 0.5601, "step": 6686 }, { "epoch": 0.52, "grad_norm": 1.1829621798547385, "learning_rate": 9.878138468853856e-06, "loss": 0.5446, "step": 6687 }, { "epoch": 0.52, "grad_norm": 1.1524636933572054, "learning_rate": 9.875625988221033e-06, "loss": 0.5235, "step": 6688 }, { "epoch": 0.52, "grad_norm": 1.104295249265777, "learning_rate": 9.873113515440583e-06, "loss": 0.542, "step": 6689 }, { "epoch": 0.52, "grad_norm": 1.1996496960823801, "learning_rate": 9.870601050671137e-06, "loss": 0.5884, "step": 6690 }, { "epoch": 0.52, "grad_norm": 1.158612852652886, "learning_rate": 9.86808859407131e-06, "loss": 0.5379, "step": 6691 }, { "epoch": 0.52, "grad_norm": 1.2031040437532907, "learning_rate": 9.865576145799729e-06, "loss": 0.5425, "step": 6692 }, { "epoch": 0.52, "grad_norm": 1.1316717784150023, "learning_rate": 9.86306370601502e-06, "loss": 0.517, "step": 6693 }, { "epoch": 0.52, "grad_norm": 1.325490470293655, "learning_rate": 9.860551274875806e-06, "loss": 0.5813, "step": 6694 }, { "epoch": 0.52, "grad_norm": 1.2344710880013376, "learning_rate": 9.858038852540713e-06, "loss": 0.5207, "step": 6695 }, { "epoch": 0.52, "grad_norm": 1.1529321752253516, "learning_rate": 9.855526439168359e-06, "loss": 0.5231, "step": 6696 }, { "epoch": 0.52, "grad_norm": 1.1133799726649902, "learning_rate": 9.853014034917361e-06, "loss": 0.5354, "step": 6697 }, { "epoch": 0.52, "grad_norm": 1.2422150422887546, "learning_rate": 9.850501639946349e-06, "loss": 0.5913, "step": 6698 }, { "epoch": 0.52, "grad_norm": 1.2543223038397229, "learning_rate": 9.847989254413938e-06, "loss": 0.565, "step": 6699 }, { "epoch": 0.52, "grad_norm": 1.172135947102419, "learning_rate": 9.845476878478754e-06, "loss": 0.5242, "step": 6700 }, { "epoch": 0.52, "grad_norm": 1.3321386389079346, "learning_rate": 9.84296451229941e-06, "loss": 0.608, "step": 6701 }, { "epoch": 0.52, "grad_norm": 1.127404557225205, "learning_rate": 9.840452156034523e-06, "loss": 0.5198, "step": 6702 }, { "epoch": 0.52, "grad_norm": 1.128997799022276, "learning_rate": 9.837939809842717e-06, "loss": 0.4907, "step": 6703 }, { "epoch": 0.52, "grad_norm": 1.0543452519967167, "learning_rate": 9.835427473882606e-06, "loss": 0.5482, "step": 6704 }, { "epoch": 0.52, "grad_norm": 1.1739258685808915, "learning_rate": 9.83291514831281e-06, "loss": 0.5725, "step": 6705 }, { "epoch": 0.52, "grad_norm": 1.18268084177563, "learning_rate": 9.830402833291942e-06, "loss": 0.5413, "step": 6706 }, { "epoch": 0.52, "grad_norm": 1.1920730975895073, "learning_rate": 9.827890528978617e-06, "loss": 0.574, "step": 6707 }, { "epoch": 0.52, "grad_norm": 1.1162062917697777, "learning_rate": 9.82537823553145e-06, "loss": 0.541, "step": 6708 }, { "epoch": 0.52, "grad_norm": 1.2010820060543905, "learning_rate": 9.822865953109055e-06, "loss": 0.5556, "step": 6709 }, { "epoch": 0.52, "grad_norm": 1.1876201568848452, "learning_rate": 9.820353681870052e-06, "loss": 0.5575, "step": 6710 }, { "epoch": 0.52, "grad_norm": 1.1585664999310887, "learning_rate": 9.817841421973046e-06, "loss": 0.534, "step": 6711 }, { "epoch": 0.52, "grad_norm": 1.1678580037051753, "learning_rate": 9.815329173576648e-06, "loss": 0.5664, "step": 6712 }, { "epoch": 0.52, "grad_norm": 1.1033214296945408, "learning_rate": 9.812816936839472e-06, "loss": 0.528, "step": 6713 }, { "epoch": 0.52, "grad_norm": 1.1152892090900546, "learning_rate": 9.810304711920127e-06, "loss": 0.5323, "step": 6714 }, { "epoch": 0.52, "grad_norm": 1.1848273065334767, "learning_rate": 9.80779249897723e-06, "loss": 0.5652, "step": 6715 }, { "epoch": 0.52, "grad_norm": 1.1234445415164733, "learning_rate": 9.805280298169383e-06, "loss": 0.562, "step": 6716 }, { "epoch": 0.52, "grad_norm": 1.0949469964598548, "learning_rate": 9.802768109655192e-06, "loss": 0.5287, "step": 6717 }, { "epoch": 0.52, "grad_norm": 1.250375881900646, "learning_rate": 9.800255933593269e-06, "loss": 0.5525, "step": 6718 }, { "epoch": 0.52, "grad_norm": 1.0301054758860864, "learning_rate": 9.797743770142219e-06, "loss": 0.5037, "step": 6719 }, { "epoch": 0.52, "grad_norm": 1.1472910653846946, "learning_rate": 9.795231619460652e-06, "loss": 0.5301, "step": 6720 }, { "epoch": 0.52, "grad_norm": 1.0766018598694587, "learning_rate": 9.792719481707168e-06, "loss": 0.4882, "step": 6721 }, { "epoch": 0.52, "grad_norm": 1.1165987077122363, "learning_rate": 9.79020735704037e-06, "loss": 0.506, "step": 6722 }, { "epoch": 0.52, "grad_norm": 1.0982313176959915, "learning_rate": 9.787695245618864e-06, "loss": 0.5402, "step": 6723 }, { "epoch": 0.52, "grad_norm": 1.104722974496296, "learning_rate": 9.785183147601252e-06, "loss": 0.534, "step": 6724 }, { "epoch": 0.52, "grad_norm": 1.1942002850255082, "learning_rate": 9.782671063146138e-06, "loss": 0.5297, "step": 6725 }, { "epoch": 0.52, "grad_norm": 1.2304632762000023, "learning_rate": 9.780158992412124e-06, "loss": 0.5548, "step": 6726 }, { "epoch": 0.52, "grad_norm": 1.038458110594458, "learning_rate": 9.777646935557802e-06, "loss": 0.5134, "step": 6727 }, { "epoch": 0.52, "grad_norm": 1.2464058225166397, "learning_rate": 9.775134892741778e-06, "loss": 0.5645, "step": 6728 }, { "epoch": 0.52, "grad_norm": 1.1757873332224278, "learning_rate": 9.772622864122645e-06, "loss": 0.5481, "step": 6729 }, { "epoch": 0.52, "grad_norm": 1.0662611977994314, "learning_rate": 9.770110849859009e-06, "loss": 0.5524, "step": 6730 }, { "epoch": 0.52, "grad_norm": 1.1548211832051936, "learning_rate": 9.767598850109463e-06, "loss": 0.5567, "step": 6731 }, { "epoch": 0.52, "grad_norm": 1.1617516231439333, "learning_rate": 9.765086865032596e-06, "loss": 0.4977, "step": 6732 }, { "epoch": 0.52, "grad_norm": 1.3047199359448107, "learning_rate": 9.762574894787006e-06, "loss": 0.5427, "step": 6733 }, { "epoch": 0.52, "grad_norm": 1.203732126802435, "learning_rate": 9.760062939531289e-06, "loss": 0.5459, "step": 6734 }, { "epoch": 0.52, "grad_norm": 1.1284393084038036, "learning_rate": 9.757550999424038e-06, "loss": 0.5466, "step": 6735 }, { "epoch": 0.52, "grad_norm": 1.1869552266193888, "learning_rate": 9.755039074623846e-06, "loss": 0.5742, "step": 6736 }, { "epoch": 0.52, "grad_norm": 1.632009865642161, "learning_rate": 9.752527165289297e-06, "loss": 0.6006, "step": 6737 }, { "epoch": 0.52, "grad_norm": 1.2485300480958519, "learning_rate": 9.750015271578982e-06, "loss": 0.5613, "step": 6738 }, { "epoch": 0.52, "grad_norm": 1.1568116035587794, "learning_rate": 9.747503393651499e-06, "loss": 0.5488, "step": 6739 }, { "epoch": 0.52, "grad_norm": 1.0118085313234109, "learning_rate": 9.744991531665425e-06, "loss": 0.5363, "step": 6740 }, { "epoch": 0.52, "grad_norm": 1.1239115960152977, "learning_rate": 9.742479685779356e-06, "loss": 0.5595, "step": 6741 }, { "epoch": 0.52, "grad_norm": 1.262332306479604, "learning_rate": 9.739967856151868e-06, "loss": 0.5743, "step": 6742 }, { "epoch": 0.52, "grad_norm": 1.251172660091367, "learning_rate": 9.737456042941552e-06, "loss": 0.5425, "step": 6743 }, { "epoch": 0.52, "grad_norm": 1.156354538599304, "learning_rate": 9.734944246306994e-06, "loss": 0.4942, "step": 6744 }, { "epoch": 0.52, "grad_norm": 1.2061475463208424, "learning_rate": 9.732432466406769e-06, "loss": 0.5433, "step": 6745 }, { "epoch": 0.52, "grad_norm": 1.1490041152442163, "learning_rate": 9.729920703399468e-06, "loss": 0.5294, "step": 6746 }, { "epoch": 0.52, "grad_norm": 1.170693730395212, "learning_rate": 9.727408957443661e-06, "loss": 0.5873, "step": 6747 }, { "epoch": 0.52, "grad_norm": 1.1811728316032652, "learning_rate": 9.724897228697933e-06, "loss": 0.5533, "step": 6748 }, { "epoch": 0.52, "grad_norm": 1.2222789633228246, "learning_rate": 9.722385517320866e-06, "loss": 0.5175, "step": 6749 }, { "epoch": 0.52, "grad_norm": 1.0903614960997197, "learning_rate": 9.71987382347103e-06, "loss": 0.5097, "step": 6750 }, { "epoch": 0.52, "grad_norm": 1.103902455274103, "learning_rate": 9.717362147307009e-06, "loss": 0.5288, "step": 6751 }, { "epoch": 0.52, "grad_norm": 1.1689437827958349, "learning_rate": 9.714850488987371e-06, "loss": 0.531, "step": 6752 }, { "epoch": 0.52, "grad_norm": 1.1734801425627166, "learning_rate": 9.712338848670691e-06, "loss": 0.5627, "step": 6753 }, { "epoch": 0.52, "grad_norm": 1.108512381860031, "learning_rate": 9.709827226515547e-06, "loss": 0.4915, "step": 6754 }, { "epoch": 0.52, "grad_norm": 1.0919641356395131, "learning_rate": 9.707315622680505e-06, "loss": 0.4829, "step": 6755 }, { "epoch": 0.52, "grad_norm": 1.157064923936836, "learning_rate": 9.70480403732414e-06, "loss": 0.5811, "step": 6756 }, { "epoch": 0.52, "grad_norm": 1.182154419224635, "learning_rate": 9.702292470605017e-06, "loss": 0.562, "step": 6757 }, { "epoch": 0.52, "grad_norm": 1.1521575260014056, "learning_rate": 9.699780922681707e-06, "loss": 0.6059, "step": 6758 }, { "epoch": 0.52, "grad_norm": 1.1294052446766776, "learning_rate": 9.697269393712779e-06, "loss": 0.5141, "step": 6759 }, { "epoch": 0.52, "grad_norm": 1.1084767318162467, "learning_rate": 9.694757883856791e-06, "loss": 0.4807, "step": 6760 }, { "epoch": 0.52, "grad_norm": 1.081503012717782, "learning_rate": 9.69224639327232e-06, "loss": 0.5312, "step": 6761 }, { "epoch": 0.52, "grad_norm": 1.1142215973621492, "learning_rate": 9.689734922117915e-06, "loss": 0.4868, "step": 6762 }, { "epoch": 0.52, "grad_norm": 1.1839190750464534, "learning_rate": 9.687223470552146e-06, "loss": 0.545, "step": 6763 }, { "epoch": 0.52, "grad_norm": 1.2167807588688702, "learning_rate": 9.684712038733575e-06, "loss": 0.547, "step": 6764 }, { "epoch": 0.52, "grad_norm": 1.1254207036382493, "learning_rate": 9.682200626820758e-06, "loss": 0.5596, "step": 6765 }, { "epoch": 0.52, "grad_norm": 1.1276042253994945, "learning_rate": 9.679689234972259e-06, "loss": 0.5294, "step": 6766 }, { "epoch": 0.52, "grad_norm": 1.1455863802113557, "learning_rate": 9.677177863346627e-06, "loss": 0.5391, "step": 6767 }, { "epoch": 0.53, "grad_norm": 1.1438606041965453, "learning_rate": 9.674666512102423e-06, "loss": 0.4781, "step": 6768 }, { "epoch": 0.53, "grad_norm": 1.190637609460096, "learning_rate": 9.672155181398201e-06, "loss": 0.5484, "step": 6769 }, { "epoch": 0.53, "grad_norm": 1.0915448438700075, "learning_rate": 9.669643871392513e-06, "loss": 0.4701, "step": 6770 }, { "epoch": 0.53, "grad_norm": 1.32897414942817, "learning_rate": 9.667132582243916e-06, "loss": 0.5755, "step": 6771 }, { "epoch": 0.53, "grad_norm": 1.2611725277353434, "learning_rate": 9.664621314110952e-06, "loss": 0.5244, "step": 6772 }, { "epoch": 0.53, "grad_norm": 1.109609498505618, "learning_rate": 9.662110067152173e-06, "loss": 0.4966, "step": 6773 }, { "epoch": 0.53, "grad_norm": 1.1127352797923649, "learning_rate": 9.659598841526133e-06, "loss": 0.5074, "step": 6774 }, { "epoch": 0.53, "grad_norm": 1.0763404561840126, "learning_rate": 9.657087637391372e-06, "loss": 0.4972, "step": 6775 }, { "epoch": 0.53, "grad_norm": 1.1935854134266628, "learning_rate": 9.654576454906437e-06, "loss": 0.5213, "step": 6776 }, { "epoch": 0.53, "grad_norm": 1.2369850667177, "learning_rate": 9.652065294229877e-06, "loss": 0.5326, "step": 6777 }, { "epoch": 0.53, "grad_norm": 1.1852745987826399, "learning_rate": 9.649554155520227e-06, "loss": 0.5173, "step": 6778 }, { "epoch": 0.53, "grad_norm": 1.0512529753689384, "learning_rate": 9.647043038936033e-06, "loss": 0.5297, "step": 6779 }, { "epoch": 0.53, "grad_norm": 1.1237311625360409, "learning_rate": 9.644531944635829e-06, "loss": 0.5414, "step": 6780 }, { "epoch": 0.53, "grad_norm": 1.3321954172942962, "learning_rate": 9.64202087277816e-06, "loss": 0.604, "step": 6781 }, { "epoch": 0.53, "grad_norm": 1.098598361004965, "learning_rate": 9.639509823521562e-06, "loss": 0.5335, "step": 6782 }, { "epoch": 0.53, "grad_norm": 1.2298731740059234, "learning_rate": 9.636998797024566e-06, "loss": 0.5467, "step": 6783 }, { "epoch": 0.53, "grad_norm": 1.092405582861112, "learning_rate": 9.634487793445711e-06, "loss": 0.552, "step": 6784 }, { "epoch": 0.53, "grad_norm": 1.0590108352445429, "learning_rate": 9.631976812943523e-06, "loss": 0.468, "step": 6785 }, { "epoch": 0.53, "grad_norm": 1.1622103607153123, "learning_rate": 9.62946585567654e-06, "loss": 0.5303, "step": 6786 }, { "epoch": 0.53, "grad_norm": 1.2147668648581655, "learning_rate": 9.626954921803294e-06, "loss": 0.5373, "step": 6787 }, { "epoch": 0.53, "grad_norm": 1.1345811913944404, "learning_rate": 9.624444011482302e-06, "loss": 0.5027, "step": 6788 }, { "epoch": 0.53, "grad_norm": 1.3148610812590216, "learning_rate": 9.6219331248721e-06, "loss": 0.6111, "step": 6789 }, { "epoch": 0.53, "grad_norm": 1.120634775879702, "learning_rate": 9.619422262131208e-06, "loss": 0.4998, "step": 6790 }, { "epoch": 0.53, "grad_norm": 1.18938006096794, "learning_rate": 9.616911423418153e-06, "loss": 0.5491, "step": 6791 }, { "epoch": 0.53, "grad_norm": 1.3028522764096522, "learning_rate": 9.61440060889146e-06, "loss": 0.6272, "step": 6792 }, { "epoch": 0.53, "grad_norm": 1.0668678262890638, "learning_rate": 9.611889818709643e-06, "loss": 0.5332, "step": 6793 }, { "epoch": 0.53, "grad_norm": 1.1850208202577182, "learning_rate": 9.609379053031224e-06, "loss": 0.5584, "step": 6794 }, { "epoch": 0.53, "grad_norm": 1.1292958362829233, "learning_rate": 9.60686831201472e-06, "loss": 0.5175, "step": 6795 }, { "epoch": 0.53, "grad_norm": 1.1613028155615366, "learning_rate": 9.604357595818647e-06, "loss": 0.5409, "step": 6796 }, { "epoch": 0.53, "grad_norm": 1.1523969217737962, "learning_rate": 9.601846904601526e-06, "loss": 0.608, "step": 6797 }, { "epoch": 0.53, "grad_norm": 1.1970531779198983, "learning_rate": 9.599336238521859e-06, "loss": 0.5111, "step": 6798 }, { "epoch": 0.53, "grad_norm": 1.1778648898810051, "learning_rate": 9.596825597738164e-06, "loss": 0.5495, "step": 6799 }, { "epoch": 0.53, "grad_norm": 1.1870283896916536, "learning_rate": 9.594314982408947e-06, "loss": 0.5553, "step": 6800 }, { "epoch": 0.53, "grad_norm": 1.1901931840700481, "learning_rate": 9.591804392692719e-06, "loss": 0.5627, "step": 6801 }, { "epoch": 0.53, "grad_norm": 1.1053506093874272, "learning_rate": 9.589293828747988e-06, "loss": 0.5174, "step": 6802 }, { "epoch": 0.53, "grad_norm": 1.2732401969977203, "learning_rate": 9.586783290733254e-06, "loss": 0.5677, "step": 6803 }, { "epoch": 0.53, "grad_norm": 1.1656762528593017, "learning_rate": 9.584272778807026e-06, "loss": 0.5585, "step": 6804 }, { "epoch": 0.53, "grad_norm": 1.1832376580222328, "learning_rate": 9.581762293127798e-06, "loss": 0.5162, "step": 6805 }, { "epoch": 0.53, "grad_norm": 1.0895200952932398, "learning_rate": 9.579251833854076e-06, "loss": 0.5438, "step": 6806 }, { "epoch": 0.53, "grad_norm": 1.126061786162506, "learning_rate": 9.57674140114436e-06, "loss": 0.5528, "step": 6807 }, { "epoch": 0.53, "grad_norm": 1.2020671206504352, "learning_rate": 9.574230995157137e-06, "loss": 0.5021, "step": 6808 }, { "epoch": 0.53, "grad_norm": 1.2667014642168681, "learning_rate": 9.571720616050913e-06, "loss": 0.5896, "step": 6809 }, { "epoch": 0.53, "grad_norm": 1.1196191107699502, "learning_rate": 9.569210263984173e-06, "loss": 0.517, "step": 6810 }, { "epoch": 0.53, "grad_norm": 1.0555079906744835, "learning_rate": 9.566699939115412e-06, "loss": 0.5122, "step": 6811 }, { "epoch": 0.53, "grad_norm": 1.1921523962689797, "learning_rate": 9.564189641603123e-06, "loss": 0.5579, "step": 6812 }, { "epoch": 0.53, "grad_norm": 1.2114677498807633, "learning_rate": 9.561679371605786e-06, "loss": 0.5154, "step": 6813 }, { "epoch": 0.53, "grad_norm": 1.132669584532855, "learning_rate": 9.559169129281893e-06, "loss": 0.5237, "step": 6814 }, { "epoch": 0.53, "grad_norm": 1.1251716482822274, "learning_rate": 9.556658914789926e-06, "loss": 0.5107, "step": 6815 }, { "epoch": 0.53, "grad_norm": 1.1155361962971095, "learning_rate": 9.554148728288371e-06, "loss": 0.488, "step": 6816 }, { "epoch": 0.53, "grad_norm": 1.2365542622316654, "learning_rate": 9.551638569935708e-06, "loss": 0.5538, "step": 6817 }, { "epoch": 0.53, "grad_norm": 1.1413754124167022, "learning_rate": 9.549128439890413e-06, "loss": 0.5174, "step": 6818 }, { "epoch": 0.53, "grad_norm": 1.210704461100857, "learning_rate": 9.546618338310968e-06, "loss": 0.5515, "step": 6819 }, { "epoch": 0.53, "grad_norm": 1.158660643917127, "learning_rate": 9.544108265355843e-06, "loss": 0.5085, "step": 6820 }, { "epoch": 0.53, "grad_norm": 1.2350867911201326, "learning_rate": 9.541598221183516e-06, "loss": 0.5753, "step": 6821 }, { "epoch": 0.53, "grad_norm": 1.192111247612642, "learning_rate": 9.539088205952463e-06, "loss": 0.4856, "step": 6822 }, { "epoch": 0.53, "grad_norm": 1.1520995835071648, "learning_rate": 9.536578219821146e-06, "loss": 0.4988, "step": 6823 }, { "epoch": 0.53, "grad_norm": 1.298774477202078, "learning_rate": 9.53406826294804e-06, "loss": 0.5703, "step": 6824 }, { "epoch": 0.53, "grad_norm": 1.22550797537454, "learning_rate": 9.531558335491602e-06, "loss": 0.531, "step": 6825 }, { "epoch": 0.53, "grad_norm": 1.168792587157156, "learning_rate": 9.529048437610307e-06, "loss": 0.5501, "step": 6826 }, { "epoch": 0.53, "grad_norm": 1.2242872675434198, "learning_rate": 9.526538569462617e-06, "loss": 0.5519, "step": 6827 }, { "epoch": 0.53, "grad_norm": 1.2389060288447225, "learning_rate": 9.524028731206984e-06, "loss": 0.5388, "step": 6828 }, { "epoch": 0.53, "grad_norm": 1.1037690808384706, "learning_rate": 9.521518923001877e-06, "loss": 0.446, "step": 6829 }, { "epoch": 0.53, "grad_norm": 1.1709760651296448, "learning_rate": 9.519009145005747e-06, "loss": 0.5549, "step": 6830 }, { "epoch": 0.53, "grad_norm": 1.1214836585648127, "learning_rate": 9.516499397377049e-06, "loss": 0.5331, "step": 6831 }, { "epoch": 0.53, "grad_norm": 1.1658898673939344, "learning_rate": 9.513989680274241e-06, "loss": 0.5721, "step": 6832 }, { "epoch": 0.53, "grad_norm": 1.1407743904838614, "learning_rate": 9.511479993855776e-06, "loss": 0.5538, "step": 6833 }, { "epoch": 0.53, "grad_norm": 1.2103429011302314, "learning_rate": 9.508970338280097e-06, "loss": 0.5871, "step": 6834 }, { "epoch": 0.53, "grad_norm": 1.1783403194783333, "learning_rate": 9.50646071370565e-06, "loss": 0.563, "step": 6835 }, { "epoch": 0.53, "grad_norm": 1.25163257797966, "learning_rate": 9.503951120290886e-06, "loss": 0.5629, "step": 6836 }, { "epoch": 0.53, "grad_norm": 1.0621521324202032, "learning_rate": 9.501441558194247e-06, "loss": 0.5365, "step": 6837 }, { "epoch": 0.53, "grad_norm": 1.3286018413147922, "learning_rate": 9.498932027574179e-06, "loss": 0.5943, "step": 6838 }, { "epoch": 0.53, "grad_norm": 1.140774860727318, "learning_rate": 9.496422528589115e-06, "loss": 0.5767, "step": 6839 }, { "epoch": 0.53, "grad_norm": 1.2509644125855384, "learning_rate": 9.493913061397493e-06, "loss": 0.5278, "step": 6840 }, { "epoch": 0.53, "grad_norm": 1.0820306440551384, "learning_rate": 9.49140362615775e-06, "loss": 0.5561, "step": 6841 }, { "epoch": 0.53, "grad_norm": 1.1487220359777455, "learning_rate": 9.488894223028322e-06, "loss": 0.5719, "step": 6842 }, { "epoch": 0.53, "grad_norm": 1.2308671578778951, "learning_rate": 9.48638485216764e-06, "loss": 0.5494, "step": 6843 }, { "epoch": 0.53, "grad_norm": 1.1264529382599893, "learning_rate": 9.483875513734131e-06, "loss": 0.5611, "step": 6844 }, { "epoch": 0.53, "grad_norm": 1.1681477081067346, "learning_rate": 9.481366207886223e-06, "loss": 0.5515, "step": 6845 }, { "epoch": 0.53, "grad_norm": 1.1517450878882316, "learning_rate": 9.47885693478234e-06, "loss": 0.5623, "step": 6846 }, { "epoch": 0.53, "grad_norm": 1.1045334164912133, "learning_rate": 9.476347694580911e-06, "loss": 0.5322, "step": 6847 }, { "epoch": 0.53, "grad_norm": 1.156335363587699, "learning_rate": 9.473838487440354e-06, "loss": 0.5257, "step": 6848 }, { "epoch": 0.53, "grad_norm": 1.1668430376706829, "learning_rate": 9.471329313519086e-06, "loss": 0.5719, "step": 6849 }, { "epoch": 0.53, "grad_norm": 1.1390214910045442, "learning_rate": 9.468820172975525e-06, "loss": 0.5465, "step": 6850 }, { "epoch": 0.53, "grad_norm": 1.3029624361872645, "learning_rate": 9.466311065968088e-06, "loss": 0.5726, "step": 6851 }, { "epoch": 0.53, "grad_norm": 1.0708037870811096, "learning_rate": 9.463801992655187e-06, "loss": 0.5075, "step": 6852 }, { "epoch": 0.53, "grad_norm": 1.1286208376062532, "learning_rate": 9.461292953195234e-06, "loss": 0.5303, "step": 6853 }, { "epoch": 0.53, "grad_norm": 1.1378655695096467, "learning_rate": 9.458783947746635e-06, "loss": 0.4978, "step": 6854 }, { "epoch": 0.53, "grad_norm": 1.2296301027350895, "learning_rate": 9.456274976467798e-06, "loss": 0.5611, "step": 6855 }, { "epoch": 0.53, "grad_norm": 1.2008565845572483, "learning_rate": 9.453766039517126e-06, "loss": 0.5897, "step": 6856 }, { "epoch": 0.53, "grad_norm": 1.1358637789766757, "learning_rate": 9.451257137053022e-06, "loss": 0.5175, "step": 6857 }, { "epoch": 0.53, "grad_norm": 1.0870034454417437, "learning_rate": 9.448748269233889e-06, "loss": 0.5717, "step": 6858 }, { "epoch": 0.53, "grad_norm": 1.1680117698069887, "learning_rate": 9.44623943621812e-06, "loss": 0.562, "step": 6859 }, { "epoch": 0.53, "grad_norm": 1.2131268797933095, "learning_rate": 9.443730638164112e-06, "loss": 0.5597, "step": 6860 }, { "epoch": 0.53, "grad_norm": 1.161619195218998, "learning_rate": 9.441221875230257e-06, "loss": 0.5318, "step": 6861 }, { "epoch": 0.53, "grad_norm": 1.193824889545189, "learning_rate": 9.438713147574951e-06, "loss": 0.5891, "step": 6862 }, { "epoch": 0.53, "grad_norm": 1.2061538717281686, "learning_rate": 9.43620445535658e-06, "loss": 0.5521, "step": 6863 }, { "epoch": 0.53, "grad_norm": 1.1358672423326084, "learning_rate": 9.43369579873353e-06, "loss": 0.5489, "step": 6864 }, { "epoch": 0.53, "grad_norm": 1.112326014768923, "learning_rate": 9.431187177864184e-06, "loss": 0.5347, "step": 6865 }, { "epoch": 0.53, "grad_norm": 1.2957539596452956, "learning_rate": 9.428678592906925e-06, "loss": 0.5118, "step": 6866 }, { "epoch": 0.53, "grad_norm": 1.2199467383155636, "learning_rate": 9.426170044020139e-06, "loss": 0.5734, "step": 6867 }, { "epoch": 0.53, "grad_norm": 1.2289500234171722, "learning_rate": 9.423661531362197e-06, "loss": 0.5753, "step": 6868 }, { "epoch": 0.53, "grad_norm": 1.159313576605824, "learning_rate": 9.421153055091477e-06, "loss": 0.5575, "step": 6869 }, { "epoch": 0.53, "grad_norm": 1.133401967289168, "learning_rate": 9.418644615366346e-06, "loss": 0.5535, "step": 6870 }, { "epoch": 0.53, "grad_norm": 1.0925077195407658, "learning_rate": 9.416136212345183e-06, "loss": 0.5117, "step": 6871 }, { "epoch": 0.53, "grad_norm": 1.1616052383553008, "learning_rate": 9.413627846186354e-06, "loss": 0.5611, "step": 6872 }, { "epoch": 0.53, "grad_norm": 1.192798388994563, "learning_rate": 9.411119517048226e-06, "loss": 0.5265, "step": 6873 }, { "epoch": 0.53, "grad_norm": 1.172370195666303, "learning_rate": 9.40861122508916e-06, "loss": 0.5269, "step": 6874 }, { "epoch": 0.53, "grad_norm": 1.13235438061179, "learning_rate": 9.406102970467515e-06, "loss": 0.5494, "step": 6875 }, { "epoch": 0.53, "grad_norm": 1.1302768082977968, "learning_rate": 9.403594753341655e-06, "loss": 0.5299, "step": 6876 }, { "epoch": 0.53, "grad_norm": 1.1358724898214763, "learning_rate": 9.401086573869937e-06, "loss": 0.5687, "step": 6877 }, { "epoch": 0.53, "grad_norm": 1.1984956033440617, "learning_rate": 9.398578432210713e-06, "loss": 0.5353, "step": 6878 }, { "epoch": 0.53, "grad_norm": 1.262870664288372, "learning_rate": 9.396070328522336e-06, "loss": 0.5797, "step": 6879 }, { "epoch": 0.53, "grad_norm": 1.2578837688466031, "learning_rate": 9.393562262963154e-06, "loss": 0.5696, "step": 6880 }, { "epoch": 0.53, "grad_norm": 1.0445816769692662, "learning_rate": 9.391054235691515e-06, "loss": 0.4845, "step": 6881 }, { "epoch": 0.53, "grad_norm": 1.1939792552724164, "learning_rate": 9.388546246865764e-06, "loss": 0.5524, "step": 6882 }, { "epoch": 0.53, "grad_norm": 1.185410267031021, "learning_rate": 9.386038296644245e-06, "loss": 0.5581, "step": 6883 }, { "epoch": 0.53, "grad_norm": 1.1838205957358947, "learning_rate": 9.383530385185295e-06, "loss": 0.5594, "step": 6884 }, { "epoch": 0.53, "grad_norm": 1.2021331662184975, "learning_rate": 9.381022512647251e-06, "loss": 0.5972, "step": 6885 }, { "epoch": 0.53, "grad_norm": 1.2612453552856213, "learning_rate": 9.37851467918845e-06, "loss": 0.5922, "step": 6886 }, { "epoch": 0.53, "grad_norm": 1.115468977588695, "learning_rate": 9.376006884967224e-06, "loss": 0.473, "step": 6887 }, { "epoch": 0.53, "grad_norm": 1.140491111944837, "learning_rate": 9.373499130141901e-06, "loss": 0.5621, "step": 6888 }, { "epoch": 0.53, "grad_norm": 1.0999414948597301, "learning_rate": 9.370991414870814e-06, "loss": 0.5277, "step": 6889 }, { "epoch": 0.53, "grad_norm": 1.1706286605557767, "learning_rate": 9.368483739312281e-06, "loss": 0.5493, "step": 6890 }, { "epoch": 0.53, "grad_norm": 1.1421705252538015, "learning_rate": 9.365976103624628e-06, "loss": 0.554, "step": 6891 }, { "epoch": 0.53, "grad_norm": 1.174512007001139, "learning_rate": 9.363468507966175e-06, "loss": 0.5224, "step": 6892 }, { "epoch": 0.53, "grad_norm": 1.1151110151645698, "learning_rate": 9.360960952495239e-06, "loss": 0.5403, "step": 6893 }, { "epoch": 0.53, "grad_norm": 1.2198132498656216, "learning_rate": 9.35845343737014e-06, "loss": 0.5749, "step": 6894 }, { "epoch": 0.53, "grad_norm": 1.2073646751536584, "learning_rate": 9.355945962749179e-06, "loss": 0.5631, "step": 6895 }, { "epoch": 0.53, "grad_norm": 1.1083731627418414, "learning_rate": 9.353438528790673e-06, "loss": 0.4935, "step": 6896 }, { "epoch": 0.54, "grad_norm": 1.1268531054138002, "learning_rate": 9.350931135652932e-06, "loss": 0.5463, "step": 6897 }, { "epoch": 0.54, "grad_norm": 1.2076929741427251, "learning_rate": 9.348423783494253e-06, "loss": 0.5921, "step": 6898 }, { "epoch": 0.54, "grad_norm": 1.1776943421400163, "learning_rate": 9.345916472472947e-06, "loss": 0.5425, "step": 6899 }, { "epoch": 0.54, "grad_norm": 1.2562159481462871, "learning_rate": 9.343409202747306e-06, "loss": 0.591, "step": 6900 }, { "epoch": 0.54, "grad_norm": 1.0744898228649296, "learning_rate": 9.340901974475627e-06, "loss": 0.5436, "step": 6901 }, { "epoch": 0.54, "grad_norm": 1.2216132837387694, "learning_rate": 9.33839478781621e-06, "loss": 0.5736, "step": 6902 }, { "epoch": 0.54, "grad_norm": 1.1346967088582525, "learning_rate": 9.335887642927342e-06, "loss": 0.5508, "step": 6903 }, { "epoch": 0.54, "grad_norm": 1.2722628271618521, "learning_rate": 9.333380539967319e-06, "loss": 0.5887, "step": 6904 }, { "epoch": 0.54, "grad_norm": 1.1576385408546024, "learning_rate": 9.330873479094415e-06, "loss": 0.5157, "step": 6905 }, { "epoch": 0.54, "grad_norm": 1.0669097829466203, "learning_rate": 9.328366460466921e-06, "loss": 0.5398, "step": 6906 }, { "epoch": 0.54, "grad_norm": 1.140139228115547, "learning_rate": 9.325859484243121e-06, "loss": 0.552, "step": 6907 }, { "epoch": 0.54, "grad_norm": 1.1109227948080684, "learning_rate": 9.323352550581285e-06, "loss": 0.4995, "step": 6908 }, { "epoch": 0.54, "grad_norm": 1.323131688983571, "learning_rate": 9.320845659639702e-06, "loss": 0.6106, "step": 6909 }, { "epoch": 0.54, "grad_norm": 1.1660169540729968, "learning_rate": 9.31833881157663e-06, "loss": 0.5586, "step": 6910 }, { "epoch": 0.54, "grad_norm": 1.2727689802005524, "learning_rate": 9.315832006550345e-06, "loss": 0.5651, "step": 6911 }, { "epoch": 0.54, "grad_norm": 1.2606625698015634, "learning_rate": 9.313325244719117e-06, "loss": 0.5756, "step": 6912 }, { "epoch": 0.54, "grad_norm": 1.1544550805024634, "learning_rate": 9.310818526241209e-06, "loss": 0.515, "step": 6913 }, { "epoch": 0.54, "grad_norm": 1.2779881896128449, "learning_rate": 9.308311851274885e-06, "loss": 0.5994, "step": 6914 }, { "epoch": 0.54, "grad_norm": 1.2377965321386084, "learning_rate": 9.3058052199784e-06, "loss": 0.5549, "step": 6915 }, { "epoch": 0.54, "grad_norm": 1.1371878478617607, "learning_rate": 9.303298632510014e-06, "loss": 0.537, "step": 6916 }, { "epoch": 0.54, "grad_norm": 1.149749162518121, "learning_rate": 9.30079208902798e-06, "loss": 0.5475, "step": 6917 }, { "epoch": 0.54, "grad_norm": 1.3233202471354304, "learning_rate": 9.298285589690548e-06, "loss": 0.5925, "step": 6918 }, { "epoch": 0.54, "grad_norm": 1.063030951733851, "learning_rate": 9.29577913465597e-06, "loss": 0.5224, "step": 6919 }, { "epoch": 0.54, "grad_norm": 1.1598342808374553, "learning_rate": 9.293272724082484e-06, "loss": 0.5138, "step": 6920 }, { "epoch": 0.54, "grad_norm": 1.150487044071225, "learning_rate": 9.290766358128338e-06, "loss": 0.4975, "step": 6921 }, { "epoch": 0.54, "grad_norm": 1.1169438129695117, "learning_rate": 9.288260036951774e-06, "loss": 0.5267, "step": 6922 }, { "epoch": 0.54, "grad_norm": 1.2041056216033064, "learning_rate": 9.285753760711023e-06, "loss": 0.5532, "step": 6923 }, { "epoch": 0.54, "grad_norm": 1.1674193043563388, "learning_rate": 9.283247529564326e-06, "loss": 0.5646, "step": 6924 }, { "epoch": 0.54, "grad_norm": 1.1768027395955605, "learning_rate": 9.280741343669908e-06, "loss": 0.545, "step": 6925 }, { "epoch": 0.54, "grad_norm": 1.081877494381608, "learning_rate": 9.278235203185999e-06, "loss": 0.4693, "step": 6926 }, { "epoch": 0.54, "grad_norm": 1.1036741969862582, "learning_rate": 9.275729108270825e-06, "loss": 0.5205, "step": 6927 }, { "epoch": 0.54, "grad_norm": 1.1665832058254102, "learning_rate": 9.27322305908261e-06, "loss": 0.4809, "step": 6928 }, { "epoch": 0.54, "grad_norm": 1.2345403246684257, "learning_rate": 9.270717055779575e-06, "loss": 0.5734, "step": 6929 }, { "epoch": 0.54, "grad_norm": 1.1085644837079052, "learning_rate": 9.268211098519932e-06, "loss": 0.4978, "step": 6930 }, { "epoch": 0.54, "grad_norm": 1.0934605351649394, "learning_rate": 9.265705187461898e-06, "loss": 0.5376, "step": 6931 }, { "epoch": 0.54, "grad_norm": 1.117967133484211, "learning_rate": 9.263199322763687e-06, "loss": 0.5437, "step": 6932 }, { "epoch": 0.54, "grad_norm": 1.1633346125223145, "learning_rate": 9.2606935045835e-06, "loss": 0.5395, "step": 6933 }, { "epoch": 0.54, "grad_norm": 1.2807320268849065, "learning_rate": 9.258187733079552e-06, "loss": 0.5728, "step": 6934 }, { "epoch": 0.54, "grad_norm": 1.1802597173877796, "learning_rate": 9.255682008410034e-06, "loss": 0.5462, "step": 6935 }, { "epoch": 0.54, "grad_norm": 1.0465943828657278, "learning_rate": 9.253176330733153e-06, "loss": 0.4721, "step": 6936 }, { "epoch": 0.54, "grad_norm": 1.0977897698589962, "learning_rate": 9.250670700207103e-06, "loss": 0.4856, "step": 6937 }, { "epoch": 0.54, "grad_norm": 1.1913492658151363, "learning_rate": 9.248165116990078e-06, "loss": 0.5302, "step": 6938 }, { "epoch": 0.54, "grad_norm": 1.2471735470191296, "learning_rate": 9.245659581240271e-06, "loss": 0.5617, "step": 6939 }, { "epoch": 0.54, "grad_norm": 1.0122175128723463, "learning_rate": 9.243154093115862e-06, "loss": 0.4811, "step": 6940 }, { "epoch": 0.54, "grad_norm": 1.1170565121561231, "learning_rate": 9.240648652775042e-06, "loss": 0.5079, "step": 6941 }, { "epoch": 0.54, "grad_norm": 1.2224205691816439, "learning_rate": 9.23814326037599e-06, "loss": 0.5465, "step": 6942 }, { "epoch": 0.54, "grad_norm": 1.1123657209817601, "learning_rate": 9.235637916076884e-06, "loss": 0.5586, "step": 6943 }, { "epoch": 0.54, "grad_norm": 1.0958176823261065, "learning_rate": 9.233132620035899e-06, "loss": 0.5004, "step": 6944 }, { "epoch": 0.54, "grad_norm": 1.1805004830074655, "learning_rate": 9.230627372411213e-06, "loss": 0.5226, "step": 6945 }, { "epoch": 0.54, "grad_norm": 1.1487981009556634, "learning_rate": 9.228122173360986e-06, "loss": 0.4874, "step": 6946 }, { "epoch": 0.54, "grad_norm": 1.1204726462508574, "learning_rate": 9.22561702304339e-06, "loss": 0.5522, "step": 6947 }, { "epoch": 0.54, "grad_norm": 1.189242941039336, "learning_rate": 9.223111921616585e-06, "loss": 0.5627, "step": 6948 }, { "epoch": 0.54, "grad_norm": 1.184960108540278, "learning_rate": 9.220606869238733e-06, "loss": 0.5342, "step": 6949 }, { "epoch": 0.54, "grad_norm": 1.2431689529742278, "learning_rate": 9.218101866067995e-06, "loss": 0.5962, "step": 6950 }, { "epoch": 0.54, "grad_norm": 1.1778757696852609, "learning_rate": 9.215596912262515e-06, "loss": 0.5381, "step": 6951 }, { "epoch": 0.54, "grad_norm": 1.2069500645861622, "learning_rate": 9.213092007980453e-06, "loss": 0.5376, "step": 6952 }, { "epoch": 0.54, "grad_norm": 1.151024324910753, "learning_rate": 9.21058715337995e-06, "loss": 0.5071, "step": 6953 }, { "epoch": 0.54, "grad_norm": 1.0645189177452357, "learning_rate": 9.208082348619149e-06, "loss": 0.5426, "step": 6954 }, { "epoch": 0.54, "grad_norm": 1.1235549440925061, "learning_rate": 9.205577593856203e-06, "loss": 0.5517, "step": 6955 }, { "epoch": 0.54, "grad_norm": 1.1496807297893243, "learning_rate": 9.203072889249237e-06, "loss": 0.5198, "step": 6956 }, { "epoch": 0.54, "grad_norm": 1.0767754110292076, "learning_rate": 9.200568234956393e-06, "loss": 0.5457, "step": 6957 }, { "epoch": 0.54, "grad_norm": 1.1517957515849504, "learning_rate": 9.198063631135799e-06, "loss": 0.5566, "step": 6958 }, { "epoch": 0.54, "grad_norm": 1.173953184552595, "learning_rate": 9.195559077945586e-06, "loss": 0.564, "step": 6959 }, { "epoch": 0.54, "grad_norm": 1.221985215389513, "learning_rate": 9.193054575543883e-06, "loss": 0.5874, "step": 6960 }, { "epoch": 0.54, "grad_norm": 1.2115907445276382, "learning_rate": 9.190550124088802e-06, "loss": 0.5805, "step": 6961 }, { "epoch": 0.54, "grad_norm": 1.2395495348684942, "learning_rate": 9.188045723738471e-06, "loss": 0.5715, "step": 6962 }, { "epoch": 0.54, "grad_norm": 1.0463720793240696, "learning_rate": 9.185541374651001e-06, "loss": 0.5656, "step": 6963 }, { "epoch": 0.54, "grad_norm": 1.2738552227489512, "learning_rate": 9.183037076984505e-06, "loss": 0.5581, "step": 6964 }, { "epoch": 0.54, "grad_norm": 1.1700438370602617, "learning_rate": 9.180532830897099e-06, "loss": 0.5691, "step": 6965 }, { "epoch": 0.54, "grad_norm": 1.1466987058595464, "learning_rate": 9.178028636546879e-06, "loss": 0.5725, "step": 6966 }, { "epoch": 0.54, "grad_norm": 1.0806897933712363, "learning_rate": 9.175524494091951e-06, "loss": 0.5095, "step": 6967 }, { "epoch": 0.54, "grad_norm": 1.10736918113791, "learning_rate": 9.173020403690417e-06, "loss": 0.502, "step": 6968 }, { "epoch": 0.54, "grad_norm": 1.1199586551562242, "learning_rate": 9.17051636550037e-06, "loss": 0.5631, "step": 6969 }, { "epoch": 0.54, "grad_norm": 1.0493294141272902, "learning_rate": 9.168012379679909e-06, "loss": 0.5044, "step": 6970 }, { "epoch": 0.54, "grad_norm": 1.2162419457477667, "learning_rate": 9.165508446387114e-06, "loss": 0.5426, "step": 6971 }, { "epoch": 0.54, "grad_norm": 1.2508817900374918, "learning_rate": 9.16300456578008e-06, "loss": 0.571, "step": 6972 }, { "epoch": 0.54, "grad_norm": 1.1812503390841527, "learning_rate": 9.160500738016883e-06, "loss": 0.5793, "step": 6973 }, { "epoch": 0.54, "grad_norm": 1.274405737371036, "learning_rate": 9.157996963255605e-06, "loss": 0.5297, "step": 6974 }, { "epoch": 0.54, "grad_norm": 1.009623061839108, "learning_rate": 9.155493241654327e-06, "loss": 0.4643, "step": 6975 }, { "epoch": 0.54, "grad_norm": 1.2002514794214814, "learning_rate": 9.152989573371115e-06, "loss": 0.5486, "step": 6976 }, { "epoch": 0.54, "grad_norm": 1.2772244272290916, "learning_rate": 9.150485958564043e-06, "loss": 0.603, "step": 6977 }, { "epoch": 0.54, "grad_norm": 1.1033836623426903, "learning_rate": 9.147982397391172e-06, "loss": 0.5454, "step": 6978 }, { "epoch": 0.54, "grad_norm": 1.2115514367960964, "learning_rate": 9.145478890010568e-06, "loss": 0.571, "step": 6979 }, { "epoch": 0.54, "grad_norm": 1.2421910987657923, "learning_rate": 9.142975436580297e-06, "loss": 0.5611, "step": 6980 }, { "epoch": 0.54, "grad_norm": 1.1870311514214298, "learning_rate": 9.140472037258403e-06, "loss": 0.5261, "step": 6981 }, { "epoch": 0.54, "grad_norm": 1.1489866850955477, "learning_rate": 9.137968692202945e-06, "loss": 0.5315, "step": 6982 }, { "epoch": 0.54, "grad_norm": 1.0719251526423996, "learning_rate": 9.13546540157197e-06, "loss": 0.5379, "step": 6983 }, { "epoch": 0.54, "grad_norm": 1.3033732116107446, "learning_rate": 9.132962165523523e-06, "loss": 0.5357, "step": 6984 }, { "epoch": 0.54, "grad_norm": 1.210522537503795, "learning_rate": 9.130458984215652e-06, "loss": 0.5308, "step": 6985 }, { "epoch": 0.54, "grad_norm": 1.2480797800612613, "learning_rate": 9.12795585780639e-06, "loss": 0.5878, "step": 6986 }, { "epoch": 0.54, "grad_norm": 1.14679730643311, "learning_rate": 9.125452786453774e-06, "loss": 0.5225, "step": 6987 }, { "epoch": 0.54, "grad_norm": 1.221749307553021, "learning_rate": 9.122949770315834e-06, "loss": 0.5507, "step": 6988 }, { "epoch": 0.54, "grad_norm": 1.3355400431152264, "learning_rate": 9.1204468095506e-06, "loss": 0.6395, "step": 6989 }, { "epoch": 0.54, "grad_norm": 1.282097931312729, "learning_rate": 9.1179439043161e-06, "loss": 0.5805, "step": 6990 }, { "epoch": 0.54, "grad_norm": 1.1581507859136013, "learning_rate": 9.11544105477035e-06, "loss": 0.5557, "step": 6991 }, { "epoch": 0.54, "grad_norm": 1.2234400470263787, "learning_rate": 9.11293826107137e-06, "loss": 0.6168, "step": 6992 }, { "epoch": 0.54, "grad_norm": 1.1379268034170729, "learning_rate": 9.110435523377173e-06, "loss": 0.5406, "step": 6993 }, { "epoch": 0.54, "grad_norm": 1.0996749614446293, "learning_rate": 9.10793284184577e-06, "loss": 0.5462, "step": 6994 }, { "epoch": 0.54, "grad_norm": 1.0379292898330597, "learning_rate": 9.105430216635174e-06, "loss": 0.5362, "step": 6995 }, { "epoch": 0.54, "grad_norm": 1.1427434817912512, "learning_rate": 9.102927647903382e-06, "loss": 0.5539, "step": 6996 }, { "epoch": 0.54, "grad_norm": 1.1408329081807391, "learning_rate": 9.100425135808396e-06, "loss": 0.5236, "step": 6997 }, { "epoch": 0.54, "grad_norm": 1.0264476740935105, "learning_rate": 9.09792268050821e-06, "loss": 0.4896, "step": 6998 }, { "epoch": 0.54, "grad_norm": 1.1323937003666193, "learning_rate": 9.09542028216082e-06, "loss": 0.487, "step": 6999 }, { "epoch": 0.54, "grad_norm": 1.1424002745362227, "learning_rate": 9.092917940924216e-06, "loss": 0.5429, "step": 7000 }, { "epoch": 0.54, "grad_norm": 1.4034000764289825, "learning_rate": 9.090415656956382e-06, "loss": 0.5719, "step": 7001 }, { "epoch": 0.54, "grad_norm": 1.0883032189292592, "learning_rate": 9.0879134304153e-06, "loss": 0.5291, "step": 7002 }, { "epoch": 0.54, "grad_norm": 1.1988742594513302, "learning_rate": 9.085411261458948e-06, "loss": 0.554, "step": 7003 }, { "epoch": 0.54, "grad_norm": 1.155488149855366, "learning_rate": 9.082909150245302e-06, "loss": 0.5646, "step": 7004 }, { "epoch": 0.54, "grad_norm": 1.2430118248822817, "learning_rate": 9.080407096932334e-06, "loss": 0.5543, "step": 7005 }, { "epoch": 0.54, "grad_norm": 1.143835331386233, "learning_rate": 9.077905101678011e-06, "loss": 0.5131, "step": 7006 }, { "epoch": 0.54, "grad_norm": 1.1471499017442481, "learning_rate": 9.075403164640296e-06, "loss": 0.5024, "step": 7007 }, { "epoch": 0.54, "grad_norm": 1.138680826089791, "learning_rate": 9.072901285977148e-06, "loss": 0.52, "step": 7008 }, { "epoch": 0.54, "grad_norm": 1.2051025585992894, "learning_rate": 9.070399465846524e-06, "loss": 0.5342, "step": 7009 }, { "epoch": 0.54, "grad_norm": 1.1450505038954624, "learning_rate": 9.06789770440638e-06, "loss": 0.5494, "step": 7010 }, { "epoch": 0.54, "grad_norm": 1.1068477588922272, "learning_rate": 9.065396001814665e-06, "loss": 0.5358, "step": 7011 }, { "epoch": 0.54, "grad_norm": 1.2966130003492022, "learning_rate": 9.06289435822932e-06, "loss": 0.6066, "step": 7012 }, { "epoch": 0.54, "grad_norm": 1.228411889965284, "learning_rate": 9.06039277380829e-06, "loss": 0.5751, "step": 7013 }, { "epoch": 0.54, "grad_norm": 1.0887813517176645, "learning_rate": 9.057891248709508e-06, "loss": 0.5133, "step": 7014 }, { "epoch": 0.54, "grad_norm": 1.391514911296203, "learning_rate": 9.055389783090916e-06, "loss": 0.5703, "step": 7015 }, { "epoch": 0.54, "grad_norm": 1.1722422723627295, "learning_rate": 9.05288837711044e-06, "loss": 0.532, "step": 7016 }, { "epoch": 0.54, "grad_norm": 1.085709692023892, "learning_rate": 9.050387030926008e-06, "loss": 0.5014, "step": 7017 }, { "epoch": 0.54, "grad_norm": 1.1962447177686633, "learning_rate": 9.047885744695539e-06, "loss": 0.5419, "step": 7018 }, { "epoch": 0.54, "grad_norm": 1.1480357706597895, "learning_rate": 9.045384518576956e-06, "loss": 0.5024, "step": 7019 }, { "epoch": 0.54, "grad_norm": 1.20420491686902, "learning_rate": 9.042883352728176e-06, "loss": 0.5586, "step": 7020 }, { "epoch": 0.54, "grad_norm": 1.1029276944252733, "learning_rate": 9.040382247307107e-06, "loss": 0.5225, "step": 7021 }, { "epoch": 0.54, "grad_norm": 1.311248409608471, "learning_rate": 9.037881202471657e-06, "loss": 0.6048, "step": 7022 }, { "epoch": 0.54, "grad_norm": 1.1548144733989967, "learning_rate": 9.03538021837973e-06, "loss": 0.5379, "step": 7023 }, { "epoch": 0.54, "grad_norm": 1.1492036612302654, "learning_rate": 9.032879295189226e-06, "loss": 0.5451, "step": 7024 }, { "epoch": 0.54, "grad_norm": 1.2538196854702794, "learning_rate": 9.030378433058045e-06, "loss": 0.6014, "step": 7025 }, { "epoch": 0.55, "grad_norm": 1.1552907212061672, "learning_rate": 9.027877632144076e-06, "loss": 0.5065, "step": 7026 }, { "epoch": 0.55, "grad_norm": 1.1356965282967875, "learning_rate": 9.025376892605205e-06, "loss": 0.5296, "step": 7027 }, { "epoch": 0.55, "grad_norm": 1.2273073303918798, "learning_rate": 9.02287621459932e-06, "loss": 0.5217, "step": 7028 }, { "epoch": 0.55, "grad_norm": 1.1045813351649905, "learning_rate": 9.0203755982843e-06, "loss": 0.5463, "step": 7029 }, { "epoch": 0.55, "grad_norm": 1.2339777729839565, "learning_rate": 9.017875043818024e-06, "loss": 0.5377, "step": 7030 }, { "epoch": 0.55, "grad_norm": 1.0785493982892813, "learning_rate": 9.015374551358364e-06, "loss": 0.5226, "step": 7031 }, { "epoch": 0.55, "grad_norm": 1.1916739741459232, "learning_rate": 9.012874121063189e-06, "loss": 0.5848, "step": 7032 }, { "epoch": 0.55, "grad_norm": 1.204853896389094, "learning_rate": 9.01037375309036e-06, "loss": 0.5662, "step": 7033 }, { "epoch": 0.55, "grad_norm": 1.2784209302874183, "learning_rate": 9.007873447597744e-06, "loss": 0.5707, "step": 7034 }, { "epoch": 0.55, "grad_norm": 1.143587107080752, "learning_rate": 9.005373204743196e-06, "loss": 0.5527, "step": 7035 }, { "epoch": 0.55, "grad_norm": 1.0765289987962836, "learning_rate": 9.00287302468457e-06, "loss": 0.4727, "step": 7036 }, { "epoch": 0.55, "grad_norm": 1.1779686234607318, "learning_rate": 9.000372907579716e-06, "loss": 0.5093, "step": 7037 }, { "epoch": 0.55, "grad_norm": 1.199891687511814, "learning_rate": 8.997872853586474e-06, "loss": 0.5887, "step": 7038 }, { "epoch": 0.55, "grad_norm": 1.1354772860708937, "learning_rate": 8.995372862862687e-06, "loss": 0.4775, "step": 7039 }, { "epoch": 0.55, "grad_norm": 1.1514607812892144, "learning_rate": 8.9928729355662e-06, "loss": 0.5055, "step": 7040 }, { "epoch": 0.55, "grad_norm": 1.2774361393264921, "learning_rate": 8.990373071854842e-06, "loss": 0.6212, "step": 7041 }, { "epoch": 0.55, "grad_norm": 1.2539147112314308, "learning_rate": 8.987873271886436e-06, "loss": 0.567, "step": 7042 }, { "epoch": 0.55, "grad_norm": 1.0432130334641108, "learning_rate": 8.985373535818814e-06, "loss": 0.5036, "step": 7043 }, { "epoch": 0.55, "grad_norm": 1.2721665950248542, "learning_rate": 8.982873863809793e-06, "loss": 0.5396, "step": 7044 }, { "epoch": 0.55, "grad_norm": 1.1076160509604467, "learning_rate": 8.980374256017196e-06, "loss": 0.4962, "step": 7045 }, { "epoch": 0.55, "grad_norm": 1.2018725824753307, "learning_rate": 8.977874712598833e-06, "loss": 0.5538, "step": 7046 }, { "epoch": 0.55, "grad_norm": 1.137887360567805, "learning_rate": 8.975375233712511e-06, "loss": 0.5593, "step": 7047 }, { "epoch": 0.55, "grad_norm": 1.1979539395490664, "learning_rate": 8.972875819516037e-06, "loss": 0.5256, "step": 7048 }, { "epoch": 0.55, "grad_norm": 1.271288269202131, "learning_rate": 8.97037647016721e-06, "loss": 0.56, "step": 7049 }, { "epoch": 0.55, "grad_norm": 1.1684079062261288, "learning_rate": 8.967877185823833e-06, "loss": 0.5269, "step": 7050 }, { "epoch": 0.55, "grad_norm": 1.1487415975070363, "learning_rate": 8.96537796664369e-06, "loss": 0.5491, "step": 7051 }, { "epoch": 0.55, "grad_norm": 1.0950324578025312, "learning_rate": 8.96287881278458e-06, "loss": 0.5636, "step": 7052 }, { "epoch": 0.55, "grad_norm": 1.1393221377415492, "learning_rate": 8.960379724404275e-06, "loss": 0.5054, "step": 7053 }, { "epoch": 0.55, "grad_norm": 1.2108883755471758, "learning_rate": 8.957880701660563e-06, "loss": 0.5465, "step": 7054 }, { "epoch": 0.55, "grad_norm": 1.2598750577015054, "learning_rate": 8.955381744711222e-06, "loss": 0.5956, "step": 7055 }, { "epoch": 0.55, "grad_norm": 1.2598406155762636, "learning_rate": 8.952882853714017e-06, "loss": 0.5896, "step": 7056 }, { "epoch": 0.55, "grad_norm": 1.1970730948793176, "learning_rate": 8.950384028826724e-06, "loss": 0.56, "step": 7057 }, { "epoch": 0.55, "grad_norm": 1.226250432643367, "learning_rate": 8.9478852702071e-06, "loss": 0.6067, "step": 7058 }, { "epoch": 0.55, "grad_norm": 1.1200959553656014, "learning_rate": 8.945386578012906e-06, "loss": 0.532, "step": 7059 }, { "epoch": 0.55, "grad_norm": 1.0510084622711193, "learning_rate": 8.942887952401901e-06, "loss": 0.4692, "step": 7060 }, { "epoch": 0.55, "grad_norm": 1.1454068632921695, "learning_rate": 8.94038939353183e-06, "loss": 0.5528, "step": 7061 }, { "epoch": 0.55, "grad_norm": 1.1907246124969835, "learning_rate": 8.937890901560452e-06, "loss": 0.5423, "step": 7062 }, { "epoch": 0.55, "grad_norm": 1.2802901045051178, "learning_rate": 8.935392476645493e-06, "loss": 0.5969, "step": 7063 }, { "epoch": 0.55, "grad_norm": 1.1649678952782836, "learning_rate": 8.932894118944699e-06, "loss": 0.5564, "step": 7064 }, { "epoch": 0.55, "grad_norm": 1.1245136269252156, "learning_rate": 8.930395828615808e-06, "loss": 0.5204, "step": 7065 }, { "epoch": 0.55, "grad_norm": 1.158987413382461, "learning_rate": 8.927897605816546e-06, "loss": 0.5468, "step": 7066 }, { "epoch": 0.55, "grad_norm": 1.1992520007764307, "learning_rate": 8.925399450704642e-06, "loss": 0.5747, "step": 7067 }, { "epoch": 0.55, "grad_norm": 1.0992629486286078, "learning_rate": 8.922901363437812e-06, "loss": 0.5121, "step": 7068 }, { "epoch": 0.55, "grad_norm": 1.087809205218736, "learning_rate": 8.920403344173776e-06, "loss": 0.5131, "step": 7069 }, { "epoch": 0.55, "grad_norm": 1.15644850186857, "learning_rate": 8.917905393070249e-06, "loss": 0.4964, "step": 7070 }, { "epoch": 0.55, "grad_norm": 1.1522958519242956, "learning_rate": 8.915407510284933e-06, "loss": 0.4904, "step": 7071 }, { "epoch": 0.55, "grad_norm": 1.0675906915130946, "learning_rate": 8.912909695975543e-06, "loss": 0.561, "step": 7072 }, { "epoch": 0.55, "grad_norm": 1.1408533886655317, "learning_rate": 8.910411950299769e-06, "loss": 0.4897, "step": 7073 }, { "epoch": 0.55, "grad_norm": 1.0958324770809256, "learning_rate": 8.907914273415308e-06, "loss": 0.527, "step": 7074 }, { "epoch": 0.55, "grad_norm": 1.2821511146910982, "learning_rate": 8.905416665479856e-06, "loss": 0.5092, "step": 7075 }, { "epoch": 0.55, "grad_norm": 1.2290909091858149, "learning_rate": 8.902919126651096e-06, "loss": 0.5701, "step": 7076 }, { "epoch": 0.55, "grad_norm": 1.2122036129224039, "learning_rate": 8.900421657086716e-06, "loss": 0.5234, "step": 7077 }, { "epoch": 0.55, "grad_norm": 1.1998593983137111, "learning_rate": 8.897924256944386e-06, "loss": 0.5974, "step": 7078 }, { "epoch": 0.55, "grad_norm": 1.1215766637571554, "learning_rate": 8.895426926381782e-06, "loss": 0.5151, "step": 7079 }, { "epoch": 0.55, "grad_norm": 1.086803447787699, "learning_rate": 8.892929665556577e-06, "loss": 0.5216, "step": 7080 }, { "epoch": 0.55, "grad_norm": 1.0911354968811233, "learning_rate": 8.890432474626433e-06, "loss": 0.4995, "step": 7081 }, { "epoch": 0.55, "grad_norm": 1.1227341722431547, "learning_rate": 8.887935353749017e-06, "loss": 0.5727, "step": 7082 }, { "epoch": 0.55, "grad_norm": 1.3099574939733143, "learning_rate": 8.885438303081972e-06, "loss": 0.5852, "step": 7083 }, { "epoch": 0.55, "grad_norm": 1.2274453454368328, "learning_rate": 8.88294132278296e-06, "loss": 0.5978, "step": 7084 }, { "epoch": 0.55, "grad_norm": 1.1841064450608139, "learning_rate": 8.880444413009627e-06, "loss": 0.5602, "step": 7085 }, { "epoch": 0.55, "grad_norm": 1.3034456933330383, "learning_rate": 8.877947573919612e-06, "loss": 0.5742, "step": 7086 }, { "epoch": 0.55, "grad_norm": 1.2213081020406638, "learning_rate": 8.87545080567056e-06, "loss": 0.5324, "step": 7087 }, { "epoch": 0.55, "grad_norm": 1.2171063221022014, "learning_rate": 8.872954108420096e-06, "loss": 0.5474, "step": 7088 }, { "epoch": 0.55, "grad_norm": 1.0927276738106495, "learning_rate": 8.870457482325854e-06, "loss": 0.534, "step": 7089 }, { "epoch": 0.55, "grad_norm": 1.1417210159916185, "learning_rate": 8.867960927545461e-06, "loss": 0.52, "step": 7090 }, { "epoch": 0.55, "grad_norm": 1.1728181731007823, "learning_rate": 8.865464444236534e-06, "loss": 0.5447, "step": 7091 }, { "epoch": 0.55, "grad_norm": 1.076641554898837, "learning_rate": 8.862968032556694e-06, "loss": 0.4897, "step": 7092 }, { "epoch": 0.55, "grad_norm": 1.1660198166866136, "learning_rate": 8.860471692663542e-06, "loss": 0.5194, "step": 7093 }, { "epoch": 0.55, "grad_norm": 1.1576782375229382, "learning_rate": 8.857975424714694e-06, "loss": 0.5384, "step": 7094 }, { "epoch": 0.55, "grad_norm": 1.2249639019221212, "learning_rate": 8.855479228867751e-06, "loss": 0.5313, "step": 7095 }, { "epoch": 0.55, "grad_norm": 1.208790999371075, "learning_rate": 8.852983105280307e-06, "loss": 0.5283, "step": 7096 }, { "epoch": 0.55, "grad_norm": 1.0993255194653686, "learning_rate": 8.850487054109961e-06, "loss": 0.5268, "step": 7097 }, { "epoch": 0.55, "grad_norm": 1.1716331232314943, "learning_rate": 8.847991075514294e-06, "loss": 0.5317, "step": 7098 }, { "epoch": 0.55, "grad_norm": 1.1810950162426344, "learning_rate": 8.845495169650894e-06, "loss": 0.5561, "step": 7099 }, { "epoch": 0.55, "grad_norm": 1.268011786011996, "learning_rate": 8.842999336677342e-06, "loss": 0.591, "step": 7100 }, { "epoch": 0.55, "grad_norm": 1.239070171702857, "learning_rate": 8.84050357675121e-06, "loss": 0.5664, "step": 7101 }, { "epoch": 0.55, "grad_norm": 1.2000389152415756, "learning_rate": 8.838007890030074e-06, "loss": 0.5522, "step": 7102 }, { "epoch": 0.55, "grad_norm": 1.1787176124111955, "learning_rate": 8.83551227667149e-06, "loss": 0.5402, "step": 7103 }, { "epoch": 0.55, "grad_norm": 1.2485091856044537, "learning_rate": 8.833016736833023e-06, "loss": 0.5624, "step": 7104 }, { "epoch": 0.55, "grad_norm": 1.257974507578442, "learning_rate": 8.830521270672233e-06, "loss": 0.5433, "step": 7105 }, { "epoch": 0.55, "grad_norm": 1.1308969020381792, "learning_rate": 8.828025878346667e-06, "loss": 0.5292, "step": 7106 }, { "epoch": 0.55, "grad_norm": 1.1160898218001667, "learning_rate": 8.825530560013876e-06, "loss": 0.4983, "step": 7107 }, { "epoch": 0.55, "grad_norm": 1.1349720857355323, "learning_rate": 8.8230353158314e-06, "loss": 0.578, "step": 7108 }, { "epoch": 0.55, "grad_norm": 1.091982475978547, "learning_rate": 8.820540145956776e-06, "loss": 0.465, "step": 7109 }, { "epoch": 0.55, "grad_norm": 1.2091692904797267, "learning_rate": 8.818045050547539e-06, "loss": 0.5271, "step": 7110 }, { "epoch": 0.55, "grad_norm": 1.1360981613784953, "learning_rate": 8.815550029761215e-06, "loss": 0.5331, "step": 7111 }, { "epoch": 0.55, "grad_norm": 1.138158090142865, "learning_rate": 8.813055083755327e-06, "loss": 0.5017, "step": 7112 }, { "epoch": 0.55, "grad_norm": 1.1919370875717665, "learning_rate": 8.8105602126874e-06, "loss": 0.5528, "step": 7113 }, { "epoch": 0.55, "grad_norm": 1.064462924177229, "learning_rate": 8.808065416714938e-06, "loss": 0.4861, "step": 7114 }, { "epoch": 0.55, "grad_norm": 1.1121106334340165, "learning_rate": 8.805570695995462e-06, "loss": 0.4776, "step": 7115 }, { "epoch": 0.55, "grad_norm": 1.1453971841893755, "learning_rate": 8.803076050686465e-06, "loss": 0.542, "step": 7116 }, { "epoch": 0.55, "grad_norm": 1.1840272116014923, "learning_rate": 8.800581480945453e-06, "loss": 0.5305, "step": 7117 }, { "epoch": 0.55, "grad_norm": 1.3058131952361969, "learning_rate": 8.798086986929923e-06, "loss": 0.5811, "step": 7118 }, { "epoch": 0.55, "grad_norm": 1.1610344539527588, "learning_rate": 8.795592568797362e-06, "loss": 0.5349, "step": 7119 }, { "epoch": 0.55, "grad_norm": 1.1838873571444342, "learning_rate": 8.793098226705255e-06, "loss": 0.5324, "step": 7120 }, { "epoch": 0.55, "grad_norm": 1.2025679252299515, "learning_rate": 8.790603960811082e-06, "loss": 0.5428, "step": 7121 }, { "epoch": 0.55, "grad_norm": 1.1145408063719777, "learning_rate": 8.78810977127232e-06, "loss": 0.5201, "step": 7122 }, { "epoch": 0.55, "grad_norm": 1.1309701078024728, "learning_rate": 8.785615658246448e-06, "loss": 0.5456, "step": 7123 }, { "epoch": 0.55, "grad_norm": 1.1486368330710166, "learning_rate": 8.783121621890917e-06, "loss": 0.5575, "step": 7124 }, { "epoch": 0.55, "grad_norm": 1.256711584666228, "learning_rate": 8.780627662363201e-06, "loss": 0.58, "step": 7125 }, { "epoch": 0.55, "grad_norm": 1.1477685134647573, "learning_rate": 8.77813377982075e-06, "loss": 0.5745, "step": 7126 }, { "epoch": 0.55, "grad_norm": 1.1382409355025513, "learning_rate": 8.775639974421015e-06, "loss": 0.596, "step": 7127 }, { "epoch": 0.55, "grad_norm": 1.1452729617758346, "learning_rate": 8.773146246321452e-06, "loss": 0.5872, "step": 7128 }, { "epoch": 0.55, "grad_norm": 1.1428474304518808, "learning_rate": 8.77065259567949e-06, "loss": 0.547, "step": 7129 }, { "epoch": 0.55, "grad_norm": 1.28632688687355, "learning_rate": 8.768159022652574e-06, "loss": 0.6348, "step": 7130 }, { "epoch": 0.55, "grad_norm": 1.2055499422193672, "learning_rate": 8.765665527398133e-06, "loss": 0.5535, "step": 7131 }, { "epoch": 0.55, "grad_norm": 1.0953780455302633, "learning_rate": 8.763172110073596e-06, "loss": 0.5218, "step": 7132 }, { "epoch": 0.55, "grad_norm": 1.0788903215186887, "learning_rate": 8.760678770836391e-06, "loss": 0.519, "step": 7133 }, { "epoch": 0.55, "grad_norm": 1.1224003000310658, "learning_rate": 8.758185509843921e-06, "loss": 0.4871, "step": 7134 }, { "epoch": 0.55, "grad_norm": 1.0695437155443668, "learning_rate": 8.755692327253612e-06, "loss": 0.5097, "step": 7135 }, { "epoch": 0.55, "grad_norm": 1.1795728008770674, "learning_rate": 8.753199223222863e-06, "loss": 0.5573, "step": 7136 }, { "epoch": 0.55, "grad_norm": 1.2191648510814888, "learning_rate": 8.75070619790908e-06, "loss": 0.5931, "step": 7137 }, { "epoch": 0.55, "grad_norm": 1.1634564454172729, "learning_rate": 8.748213251469663e-06, "loss": 0.5397, "step": 7138 }, { "epoch": 0.55, "grad_norm": 1.1976217266165836, "learning_rate": 8.745720384062002e-06, "loss": 0.5591, "step": 7139 }, { "epoch": 0.55, "grad_norm": 1.046246183199284, "learning_rate": 8.74322759584348e-06, "loss": 0.4605, "step": 7140 }, { "epoch": 0.55, "grad_norm": 1.1382486855817477, "learning_rate": 8.740734886971485e-06, "loss": 0.528, "step": 7141 }, { "epoch": 0.55, "grad_norm": 1.1312567189053966, "learning_rate": 8.738242257603394e-06, "loss": 0.5534, "step": 7142 }, { "epoch": 0.55, "grad_norm": 1.0714343797435453, "learning_rate": 8.735749707896584e-06, "loss": 0.4893, "step": 7143 }, { "epoch": 0.55, "grad_norm": 1.27354393262686, "learning_rate": 8.733257238008414e-06, "loss": 0.6157, "step": 7144 }, { "epoch": 0.55, "grad_norm": 1.151990519550838, "learning_rate": 8.730764848096247e-06, "loss": 0.5211, "step": 7145 }, { "epoch": 0.55, "grad_norm": 1.1484824450774165, "learning_rate": 8.728272538317447e-06, "loss": 0.5343, "step": 7146 }, { "epoch": 0.55, "grad_norm": 1.2753813248159431, "learning_rate": 8.725780308829358e-06, "loss": 0.5678, "step": 7147 }, { "epoch": 0.55, "grad_norm": 1.0761916475504654, "learning_rate": 8.72328815978934e-06, "loss": 0.5449, "step": 7148 }, { "epoch": 0.55, "grad_norm": 1.2135464036491619, "learning_rate": 8.720796091354725e-06, "loss": 0.536, "step": 7149 }, { "epoch": 0.55, "grad_norm": 1.290066830656032, "learning_rate": 8.71830410368285e-06, "loss": 0.5861, "step": 7150 }, { "epoch": 0.55, "grad_norm": 1.1461821747559466, "learning_rate": 8.715812196931049e-06, "loss": 0.5073, "step": 7151 }, { "epoch": 0.55, "grad_norm": 1.1372245371197143, "learning_rate": 8.713320371256651e-06, "loss": 0.5703, "step": 7152 }, { "epoch": 0.55, "grad_norm": 1.227331661405224, "learning_rate": 8.71082862681698e-06, "loss": 0.5832, "step": 7153 }, { "epoch": 0.55, "grad_norm": 1.2050269810042098, "learning_rate": 8.708336963769345e-06, "loss": 0.5622, "step": 7154 }, { "epoch": 0.56, "grad_norm": 1.1607089283578502, "learning_rate": 8.705845382271063e-06, "loss": 0.5249, "step": 7155 }, { "epoch": 0.56, "grad_norm": 1.2006896500902056, "learning_rate": 8.703353882479436e-06, "loss": 0.6039, "step": 7156 }, { "epoch": 0.56, "grad_norm": 1.1997359958951372, "learning_rate": 8.70086246455177e-06, "loss": 0.523, "step": 7157 }, { "epoch": 0.56, "grad_norm": 1.149052876755601, "learning_rate": 8.698371128645364e-06, "loss": 0.57, "step": 7158 }, { "epoch": 0.56, "grad_norm": 1.2437832257610775, "learning_rate": 8.6958798749175e-06, "loss": 0.5645, "step": 7159 }, { "epoch": 0.56, "grad_norm": 1.2376319314007216, "learning_rate": 8.693388703525467e-06, "loss": 0.5769, "step": 7160 }, { "epoch": 0.56, "grad_norm": 1.1158212163284837, "learning_rate": 8.690897614626546e-06, "loss": 0.5515, "step": 7161 }, { "epoch": 0.56, "grad_norm": 1.3077068454810339, "learning_rate": 8.688406608378012e-06, "loss": 0.5694, "step": 7162 }, { "epoch": 0.56, "grad_norm": 1.1859861562445195, "learning_rate": 8.685915684937138e-06, "loss": 0.5276, "step": 7163 }, { "epoch": 0.56, "grad_norm": 1.1966382322855935, "learning_rate": 8.68342484446119e-06, "loss": 0.5182, "step": 7164 }, { "epoch": 0.56, "grad_norm": 1.1723239293101348, "learning_rate": 8.68093408710742e-06, "loss": 0.5786, "step": 7165 }, { "epoch": 0.56, "grad_norm": 1.2801214225368167, "learning_rate": 8.678443413033085e-06, "loss": 0.5472, "step": 7166 }, { "epoch": 0.56, "grad_norm": 1.1055663911542983, "learning_rate": 8.675952822395437e-06, "loss": 0.4865, "step": 7167 }, { "epoch": 0.56, "grad_norm": 1.2392195266569208, "learning_rate": 8.67346231535172e-06, "loss": 0.5382, "step": 7168 }, { "epoch": 0.56, "grad_norm": 1.1553552103647624, "learning_rate": 8.670971892059173e-06, "loss": 0.5446, "step": 7169 }, { "epoch": 0.56, "grad_norm": 1.0555646286680438, "learning_rate": 8.668481552675024e-06, "loss": 0.4894, "step": 7170 }, { "epoch": 0.56, "grad_norm": 1.169164906258231, "learning_rate": 8.665991297356503e-06, "loss": 0.5314, "step": 7171 }, { "epoch": 0.56, "grad_norm": 1.2251974394366452, "learning_rate": 8.663501126260836e-06, "loss": 0.5657, "step": 7172 }, { "epoch": 0.56, "grad_norm": 1.0571001502656456, "learning_rate": 8.661011039545238e-06, "loss": 0.5271, "step": 7173 }, { "epoch": 0.56, "grad_norm": 1.1893826668953762, "learning_rate": 8.658521037366926e-06, "loss": 0.5124, "step": 7174 }, { "epoch": 0.56, "grad_norm": 1.0664217322486766, "learning_rate": 8.656031119883095e-06, "loss": 0.5068, "step": 7175 }, { "epoch": 0.56, "grad_norm": 1.2050477059604079, "learning_rate": 8.653541287250954e-06, "loss": 0.5621, "step": 7176 }, { "epoch": 0.56, "grad_norm": 1.0894080490903262, "learning_rate": 8.6510515396277e-06, "loss": 0.4987, "step": 7177 }, { "epoch": 0.56, "grad_norm": 1.0904926296231676, "learning_rate": 8.648561877170522e-06, "loss": 0.4856, "step": 7178 }, { "epoch": 0.56, "grad_norm": 1.0408307281677367, "learning_rate": 8.64607230003661e-06, "loss": 0.4872, "step": 7179 }, { "epoch": 0.56, "grad_norm": 1.1827934757451357, "learning_rate": 8.643582808383133e-06, "loss": 0.5246, "step": 7180 }, { "epoch": 0.56, "grad_norm": 1.3385979112418176, "learning_rate": 8.641093402367272e-06, "loss": 0.55, "step": 7181 }, { "epoch": 0.56, "grad_norm": 1.1946439681128436, "learning_rate": 8.638604082146195e-06, "loss": 0.6115, "step": 7182 }, { "epoch": 0.56, "grad_norm": 1.2237109915075493, "learning_rate": 8.636114847877068e-06, "loss": 0.5065, "step": 7183 }, { "epoch": 0.56, "grad_norm": 1.3110380204664942, "learning_rate": 8.633625699717051e-06, "loss": 0.5248, "step": 7184 }, { "epoch": 0.56, "grad_norm": 1.0426367756428179, "learning_rate": 8.631136637823288e-06, "loss": 0.5071, "step": 7185 }, { "epoch": 0.56, "grad_norm": 1.1508867783857837, "learning_rate": 8.628647662352932e-06, "loss": 0.5257, "step": 7186 }, { "epoch": 0.56, "grad_norm": 1.1011325186374112, "learning_rate": 8.626158773463124e-06, "loss": 0.539, "step": 7187 }, { "epoch": 0.56, "grad_norm": 1.1338877869041564, "learning_rate": 8.623669971311002e-06, "loss": 0.4981, "step": 7188 }, { "epoch": 0.56, "grad_norm": 1.2576198548823194, "learning_rate": 8.621181256053699e-06, "loss": 0.5641, "step": 7189 }, { "epoch": 0.56, "grad_norm": 1.163963057603609, "learning_rate": 8.618692627848331e-06, "loss": 0.5258, "step": 7190 }, { "epoch": 0.56, "grad_norm": 1.2466958722775159, "learning_rate": 8.616204086852026e-06, "loss": 0.5606, "step": 7191 }, { "epoch": 0.56, "grad_norm": 1.2311160368099745, "learning_rate": 8.613715633221895e-06, "loss": 0.5969, "step": 7192 }, { "epoch": 0.56, "grad_norm": 1.1809099949351327, "learning_rate": 8.611227267115052e-06, "loss": 0.5437, "step": 7193 }, { "epoch": 0.56, "grad_norm": 1.2797272169496263, "learning_rate": 8.608738988688598e-06, "loss": 0.576, "step": 7194 }, { "epoch": 0.56, "grad_norm": 1.1589875162388705, "learning_rate": 8.606250798099626e-06, "loss": 0.5715, "step": 7195 }, { "epoch": 0.56, "grad_norm": 1.2012791531426092, "learning_rate": 8.603762695505231e-06, "loss": 0.5358, "step": 7196 }, { "epoch": 0.56, "grad_norm": 1.1834082631220921, "learning_rate": 8.601274681062502e-06, "loss": 0.5103, "step": 7197 }, { "epoch": 0.56, "grad_norm": 1.204446092060373, "learning_rate": 8.598786754928519e-06, "loss": 0.4844, "step": 7198 }, { "epoch": 0.56, "grad_norm": 1.1348855354130147, "learning_rate": 8.596298917260361e-06, "loss": 0.5131, "step": 7199 }, { "epoch": 0.56, "grad_norm": 1.2509721313683964, "learning_rate": 8.59381116821509e-06, "loss": 0.5928, "step": 7200 }, { "epoch": 0.56, "grad_norm": 1.2113007523618666, "learning_rate": 8.591323507949773e-06, "loss": 0.5475, "step": 7201 }, { "epoch": 0.56, "grad_norm": 1.113959764877169, "learning_rate": 8.588835936621473e-06, "loss": 0.5209, "step": 7202 }, { "epoch": 0.56, "grad_norm": 1.149132861902752, "learning_rate": 8.586348454387244e-06, "loss": 0.5237, "step": 7203 }, { "epoch": 0.56, "grad_norm": 1.1629394131473179, "learning_rate": 8.583861061404131e-06, "loss": 0.552, "step": 7204 }, { "epoch": 0.56, "grad_norm": 1.0557785042725218, "learning_rate": 8.581373757829172e-06, "loss": 0.5328, "step": 7205 }, { "epoch": 0.56, "grad_norm": 1.2137346509911722, "learning_rate": 8.578886543819406e-06, "loss": 0.5114, "step": 7206 }, { "epoch": 0.56, "grad_norm": 1.0513011114370965, "learning_rate": 8.576399419531865e-06, "loss": 0.4742, "step": 7207 }, { "epoch": 0.56, "grad_norm": 1.2870882579593583, "learning_rate": 8.573912385123576e-06, "loss": 0.5477, "step": 7208 }, { "epoch": 0.56, "grad_norm": 1.2033791707062595, "learning_rate": 8.571425440751557e-06, "loss": 0.5532, "step": 7209 }, { "epoch": 0.56, "grad_norm": 1.1112153275941123, "learning_rate": 8.568938586572816e-06, "loss": 0.544, "step": 7210 }, { "epoch": 0.56, "grad_norm": 1.269166113355937, "learning_rate": 8.566451822744367e-06, "loss": 0.5632, "step": 7211 }, { "epoch": 0.56, "grad_norm": 1.103377341999504, "learning_rate": 8.563965149423207e-06, "loss": 0.5588, "step": 7212 }, { "epoch": 0.56, "grad_norm": 1.1829084168500636, "learning_rate": 8.561478566766341e-06, "loss": 0.5701, "step": 7213 }, { "epoch": 0.56, "grad_norm": 1.1282394182083846, "learning_rate": 8.558992074930757e-06, "loss": 0.555, "step": 7214 }, { "epoch": 0.56, "grad_norm": 1.1897374206442264, "learning_rate": 8.556505674073435e-06, "loss": 0.5654, "step": 7215 }, { "epoch": 0.56, "grad_norm": 1.1373050396863185, "learning_rate": 8.554019364351354e-06, "loss": 0.5329, "step": 7216 }, { "epoch": 0.56, "grad_norm": 1.2656010284037458, "learning_rate": 8.551533145921493e-06, "loss": 0.5823, "step": 7217 }, { "epoch": 0.56, "grad_norm": 1.0771660894087123, "learning_rate": 8.54904701894082e-06, "loss": 0.5265, "step": 7218 }, { "epoch": 0.56, "grad_norm": 1.1263101154842148, "learning_rate": 8.54656098356629e-06, "loss": 0.5315, "step": 7219 }, { "epoch": 0.56, "grad_norm": 1.190866566980221, "learning_rate": 8.54407503995487e-06, "loss": 0.6044, "step": 7220 }, { "epoch": 0.56, "grad_norm": 1.1645208390903092, "learning_rate": 8.541589188263499e-06, "loss": 0.5599, "step": 7221 }, { "epoch": 0.56, "grad_norm": 1.3026463880665522, "learning_rate": 8.53910342864913e-06, "loss": 0.5796, "step": 7222 }, { "epoch": 0.56, "grad_norm": 1.1717749998023268, "learning_rate": 8.536617761268697e-06, "loss": 0.5296, "step": 7223 }, { "epoch": 0.56, "grad_norm": 1.1187839438973723, "learning_rate": 8.534132186279134e-06, "loss": 0.4973, "step": 7224 }, { "epoch": 0.56, "grad_norm": 1.1452520919211118, "learning_rate": 8.531646703837375e-06, "loss": 0.515, "step": 7225 }, { "epoch": 0.56, "grad_norm": 1.149693120562664, "learning_rate": 8.52916131410033e-06, "loss": 0.5897, "step": 7226 }, { "epoch": 0.56, "grad_norm": 1.2401958790008578, "learning_rate": 8.526676017224917e-06, "loss": 0.4929, "step": 7227 }, { "epoch": 0.56, "grad_norm": 1.0882690976704232, "learning_rate": 8.524190813368055e-06, "loss": 0.5132, "step": 7228 }, { "epoch": 0.56, "grad_norm": 1.1196918295186495, "learning_rate": 8.521705702686636e-06, "loss": 0.5099, "step": 7229 }, { "epoch": 0.56, "grad_norm": 1.0947322794265588, "learning_rate": 8.519220685337567e-06, "loss": 0.4871, "step": 7230 }, { "epoch": 0.56, "grad_norm": 1.1736105718868504, "learning_rate": 8.516735761477734e-06, "loss": 0.5403, "step": 7231 }, { "epoch": 0.56, "grad_norm": 1.1571712432386958, "learning_rate": 8.514250931264023e-06, "loss": 0.5147, "step": 7232 }, { "epoch": 0.56, "grad_norm": 1.207567213253128, "learning_rate": 8.51176619485332e-06, "loss": 0.5414, "step": 7233 }, { "epoch": 0.56, "grad_norm": 1.1109181806169475, "learning_rate": 8.509281552402492e-06, "loss": 0.4779, "step": 7234 }, { "epoch": 0.56, "grad_norm": 1.1273065868869139, "learning_rate": 8.506797004068414e-06, "loss": 0.5445, "step": 7235 }, { "epoch": 0.56, "grad_norm": 1.1870942175381705, "learning_rate": 8.504312550007943e-06, "loss": 0.5392, "step": 7236 }, { "epoch": 0.56, "grad_norm": 1.144063964859484, "learning_rate": 8.501828190377936e-06, "loss": 0.5915, "step": 7237 }, { "epoch": 0.56, "grad_norm": 1.2614229877202074, "learning_rate": 8.499343925335249e-06, "loss": 0.5357, "step": 7238 }, { "epoch": 0.56, "grad_norm": 1.0709019729246994, "learning_rate": 8.496859755036719e-06, "loss": 0.5138, "step": 7239 }, { "epoch": 0.56, "grad_norm": 1.2774763126006197, "learning_rate": 8.49437567963919e-06, "loss": 0.5745, "step": 7240 }, { "epoch": 0.56, "grad_norm": 1.0626795560919822, "learning_rate": 8.491891699299491e-06, "loss": 0.5294, "step": 7241 }, { "epoch": 0.56, "grad_norm": 1.0893981460152122, "learning_rate": 8.48940781417445e-06, "loss": 0.5322, "step": 7242 }, { "epoch": 0.56, "grad_norm": 1.17873657502087, "learning_rate": 8.486924024420887e-06, "loss": 0.5115, "step": 7243 }, { "epoch": 0.56, "grad_norm": 1.1926663098435129, "learning_rate": 8.484440330195615e-06, "loss": 0.528, "step": 7244 }, { "epoch": 0.56, "grad_norm": 1.1950658406590542, "learning_rate": 8.481956731655451e-06, "loss": 0.4774, "step": 7245 }, { "epoch": 0.56, "grad_norm": 1.1552145677724501, "learning_rate": 8.479473228957185e-06, "loss": 0.5308, "step": 7246 }, { "epoch": 0.56, "grad_norm": 1.1907529947362159, "learning_rate": 8.47698982225762e-06, "loss": 0.5542, "step": 7247 }, { "epoch": 0.56, "grad_norm": 1.1525258065931892, "learning_rate": 8.474506511713543e-06, "loss": 0.5582, "step": 7248 }, { "epoch": 0.56, "grad_norm": 1.2058761155597932, "learning_rate": 8.472023297481741e-06, "loss": 0.5131, "step": 7249 }, { "epoch": 0.56, "grad_norm": 1.1127829524045973, "learning_rate": 8.469540179718997e-06, "loss": 0.5099, "step": 7250 }, { "epoch": 0.56, "grad_norm": 1.2010304933785343, "learning_rate": 8.467057158582072e-06, "loss": 0.5843, "step": 7251 }, { "epoch": 0.56, "grad_norm": 1.3161147612735844, "learning_rate": 8.46457423422774e-06, "loss": 0.5905, "step": 7252 }, { "epoch": 0.56, "grad_norm": 1.128347872542259, "learning_rate": 8.462091406812759e-06, "loss": 0.5235, "step": 7253 }, { "epoch": 0.56, "grad_norm": 1.0829132561644017, "learning_rate": 8.459608676493878e-06, "loss": 0.4717, "step": 7254 }, { "epoch": 0.56, "grad_norm": 1.1453541476183215, "learning_rate": 8.457126043427855e-06, "loss": 0.5432, "step": 7255 }, { "epoch": 0.56, "grad_norm": 1.2237496164260901, "learning_rate": 8.45464350777142e-06, "loss": 0.5754, "step": 7256 }, { "epoch": 0.56, "grad_norm": 1.1572886776248892, "learning_rate": 8.452161069681315e-06, "loss": 0.5131, "step": 7257 }, { "epoch": 0.56, "grad_norm": 1.2030709985280958, "learning_rate": 8.449678729314266e-06, "loss": 0.5472, "step": 7258 }, { "epoch": 0.56, "grad_norm": 1.1487454371315013, "learning_rate": 8.447196486826996e-06, "loss": 0.533, "step": 7259 }, { "epoch": 0.56, "grad_norm": 1.0968306073283058, "learning_rate": 8.44471434237623e-06, "loss": 0.4957, "step": 7260 }, { "epoch": 0.56, "grad_norm": 1.175836099251446, "learning_rate": 8.442232296118667e-06, "loss": 0.5089, "step": 7261 }, { "epoch": 0.56, "grad_norm": 1.1631159678441056, "learning_rate": 8.439750348211016e-06, "loss": 0.5248, "step": 7262 }, { "epoch": 0.56, "grad_norm": 1.183753377391988, "learning_rate": 8.437268498809975e-06, "loss": 0.5329, "step": 7263 }, { "epoch": 0.56, "grad_norm": 1.2399476688632751, "learning_rate": 8.434786748072237e-06, "loss": 0.6115, "step": 7264 }, { "epoch": 0.56, "grad_norm": 1.0345711836073948, "learning_rate": 8.43230509615449e-06, "loss": 0.5, "step": 7265 }, { "epoch": 0.56, "grad_norm": 1.2119532601090794, "learning_rate": 8.429823543213406e-06, "loss": 0.5383, "step": 7266 }, { "epoch": 0.56, "grad_norm": 1.0496316172745663, "learning_rate": 8.427342089405667e-06, "loss": 0.4923, "step": 7267 }, { "epoch": 0.56, "grad_norm": 1.192263985476433, "learning_rate": 8.424860734887932e-06, "loss": 0.5526, "step": 7268 }, { "epoch": 0.56, "grad_norm": 1.2567423657469419, "learning_rate": 8.422379479816865e-06, "loss": 0.5885, "step": 7269 }, { "epoch": 0.56, "grad_norm": 1.2610462865025873, "learning_rate": 8.419898324349122e-06, "loss": 0.492, "step": 7270 }, { "epoch": 0.56, "grad_norm": 1.1361697729511353, "learning_rate": 8.417417268641354e-06, "loss": 0.5552, "step": 7271 }, { "epoch": 0.56, "grad_norm": 1.1669624101359166, "learning_rate": 8.414936312850196e-06, "loss": 0.5574, "step": 7272 }, { "epoch": 0.56, "grad_norm": 1.2420983432467272, "learning_rate": 8.412455457132285e-06, "loss": 0.5897, "step": 7273 }, { "epoch": 0.56, "grad_norm": 1.2690424990680642, "learning_rate": 8.409974701644251e-06, "loss": 0.5629, "step": 7274 }, { "epoch": 0.56, "grad_norm": 1.097771146469405, "learning_rate": 8.40749404654272e-06, "loss": 0.5428, "step": 7275 }, { "epoch": 0.56, "grad_norm": 1.1377758865311753, "learning_rate": 8.405013491984307e-06, "loss": 0.5241, "step": 7276 }, { "epoch": 0.56, "grad_norm": 1.2046186909895835, "learning_rate": 8.40253303812562e-06, "loss": 0.5155, "step": 7277 }, { "epoch": 0.56, "grad_norm": 1.18020295256656, "learning_rate": 8.400052685123263e-06, "loss": 0.5699, "step": 7278 }, { "epoch": 0.56, "grad_norm": 1.1504131632282575, "learning_rate": 8.397572433133836e-06, "loss": 0.5481, "step": 7279 }, { "epoch": 0.56, "grad_norm": 1.2071228980174176, "learning_rate": 8.395092282313927e-06, "loss": 0.5788, "step": 7280 }, { "epoch": 0.56, "grad_norm": 1.0344017878172516, "learning_rate": 8.392612232820125e-06, "loss": 0.4855, "step": 7281 }, { "epoch": 0.56, "grad_norm": 1.1339002976899328, "learning_rate": 8.390132284809005e-06, "loss": 0.538, "step": 7282 }, { "epoch": 0.57, "grad_norm": 1.1394780807194689, "learning_rate": 8.387652438437138e-06, "loss": 0.536, "step": 7283 }, { "epoch": 0.57, "grad_norm": 1.139242196266246, "learning_rate": 8.385172693861092e-06, "loss": 0.5599, "step": 7284 }, { "epoch": 0.57, "grad_norm": 1.1531086080569763, "learning_rate": 8.382693051237424e-06, "loss": 0.5404, "step": 7285 }, { "epoch": 0.57, "grad_norm": 1.1701272774612959, "learning_rate": 8.380213510722692e-06, "loss": 0.5402, "step": 7286 }, { "epoch": 0.57, "grad_norm": 1.3134095809477861, "learning_rate": 8.377734072473437e-06, "loss": 0.6476, "step": 7287 }, { "epoch": 0.57, "grad_norm": 1.1960375213861296, "learning_rate": 8.375254736646197e-06, "loss": 0.5625, "step": 7288 }, { "epoch": 0.57, "grad_norm": 1.157223678134591, "learning_rate": 8.372775503397507e-06, "loss": 0.5332, "step": 7289 }, { "epoch": 0.57, "grad_norm": 1.0540106402842708, "learning_rate": 8.370296372883898e-06, "loss": 0.5107, "step": 7290 }, { "epoch": 0.57, "grad_norm": 1.1678402424927279, "learning_rate": 8.367817345261888e-06, "loss": 0.5454, "step": 7291 }, { "epoch": 0.57, "grad_norm": 1.2041928890149463, "learning_rate": 8.36533842068799e-06, "loss": 0.5472, "step": 7292 }, { "epoch": 0.57, "grad_norm": 1.1474760533412618, "learning_rate": 8.362859599318708e-06, "loss": 0.5468, "step": 7293 }, { "epoch": 0.57, "grad_norm": 1.2105314496889517, "learning_rate": 8.360380881310545e-06, "loss": 0.5101, "step": 7294 }, { "epoch": 0.57, "grad_norm": 1.192729827598338, "learning_rate": 8.357902266819999e-06, "loss": 0.5766, "step": 7295 }, { "epoch": 0.57, "grad_norm": 1.3202619373639282, "learning_rate": 8.355423756003557e-06, "loss": 0.5666, "step": 7296 }, { "epoch": 0.57, "grad_norm": 1.123303777087027, "learning_rate": 8.352945349017699e-06, "loss": 0.5174, "step": 7297 }, { "epoch": 0.57, "grad_norm": 1.0945743179288843, "learning_rate": 8.350467046018892e-06, "loss": 0.5087, "step": 7298 }, { "epoch": 0.57, "grad_norm": 1.1474797933124947, "learning_rate": 8.347988847163615e-06, "loss": 0.5414, "step": 7299 }, { "epoch": 0.57, "grad_norm": 1.0913709652548347, "learning_rate": 8.345510752608324e-06, "loss": 0.503, "step": 7300 }, { "epoch": 0.57, "grad_norm": 1.1080336722549131, "learning_rate": 8.343032762509478e-06, "loss": 0.5209, "step": 7301 }, { "epoch": 0.57, "grad_norm": 1.13434097827514, "learning_rate": 8.340554877023523e-06, "loss": 0.5619, "step": 7302 }, { "epoch": 0.57, "grad_norm": 1.0580863722066831, "learning_rate": 8.338077096306895e-06, "loss": 0.4432, "step": 7303 }, { "epoch": 0.57, "grad_norm": 1.112115242677624, "learning_rate": 8.335599420516036e-06, "loss": 0.5087, "step": 7304 }, { "epoch": 0.57, "grad_norm": 1.1741007711972185, "learning_rate": 8.333121849807374e-06, "loss": 0.5509, "step": 7305 }, { "epoch": 0.57, "grad_norm": 1.229404582689245, "learning_rate": 8.330644384337334e-06, "loss": 0.5892, "step": 7306 }, { "epoch": 0.57, "grad_norm": 1.163419353826195, "learning_rate": 8.328167024262323e-06, "loss": 0.5503, "step": 7307 }, { "epoch": 0.57, "grad_norm": 1.2644014448718197, "learning_rate": 8.32568976973875e-06, "loss": 0.5323, "step": 7308 }, { "epoch": 0.57, "grad_norm": 1.204033595237437, "learning_rate": 8.323212620923023e-06, "loss": 0.5449, "step": 7309 }, { "epoch": 0.57, "grad_norm": 1.1461771824794256, "learning_rate": 8.320735577971533e-06, "loss": 0.5388, "step": 7310 }, { "epoch": 0.57, "grad_norm": 1.2193195772700351, "learning_rate": 8.318258641040674e-06, "loss": 0.5264, "step": 7311 }, { "epoch": 0.57, "grad_norm": 1.2366653633282179, "learning_rate": 8.31578181028682e-06, "loss": 0.5255, "step": 7312 }, { "epoch": 0.57, "grad_norm": 1.0600838396816714, "learning_rate": 8.31330508586635e-06, "loss": 0.5186, "step": 7313 }, { "epoch": 0.57, "grad_norm": 1.3015407143700979, "learning_rate": 8.31082846793563e-06, "loss": 0.6232, "step": 7314 }, { "epoch": 0.57, "grad_norm": 1.07671999948839, "learning_rate": 8.308351956651026e-06, "loss": 0.5044, "step": 7315 }, { "epoch": 0.57, "grad_norm": 1.5728134811947052, "learning_rate": 8.305875552168894e-06, "loss": 0.5224, "step": 7316 }, { "epoch": 0.57, "grad_norm": 1.1416076190092765, "learning_rate": 8.303399254645578e-06, "loss": 0.4906, "step": 7317 }, { "epoch": 0.57, "grad_norm": 1.1407239688107764, "learning_rate": 8.300923064237417e-06, "loss": 0.5019, "step": 7318 }, { "epoch": 0.57, "grad_norm": 1.2180392809517295, "learning_rate": 8.298446981100749e-06, "loss": 0.5711, "step": 7319 }, { "epoch": 0.57, "grad_norm": 1.1200144287440175, "learning_rate": 8.295971005391902e-06, "loss": 0.5164, "step": 7320 }, { "epoch": 0.57, "grad_norm": 1.2108949223077898, "learning_rate": 8.2934951372672e-06, "loss": 0.5398, "step": 7321 }, { "epoch": 0.57, "grad_norm": 1.2023682629600465, "learning_rate": 8.291019376882955e-06, "loss": 0.5501, "step": 7322 }, { "epoch": 0.57, "grad_norm": 1.190400246059741, "learning_rate": 8.28854372439547e-06, "loss": 0.5521, "step": 7323 }, { "epoch": 0.57, "grad_norm": 1.1737444398725967, "learning_rate": 8.28606817996105e-06, "loss": 0.5256, "step": 7324 }, { "epoch": 0.57, "grad_norm": 1.237148163611668, "learning_rate": 8.283592743735988e-06, "loss": 0.5686, "step": 7325 }, { "epoch": 0.57, "grad_norm": 1.2128477227183603, "learning_rate": 8.281117415876574e-06, "loss": 0.5371, "step": 7326 }, { "epoch": 0.57, "grad_norm": 1.254491461501668, "learning_rate": 8.27864219653909e-06, "loss": 0.533, "step": 7327 }, { "epoch": 0.57, "grad_norm": 1.067887559440243, "learning_rate": 8.276167085879798e-06, "loss": 0.5263, "step": 7328 }, { "epoch": 0.57, "grad_norm": 1.2076692345693945, "learning_rate": 8.273692084054974e-06, "loss": 0.555, "step": 7329 }, { "epoch": 0.57, "grad_norm": 1.268412593081061, "learning_rate": 8.271217191220874e-06, "loss": 0.6188, "step": 7330 }, { "epoch": 0.57, "grad_norm": 1.2103028141562966, "learning_rate": 8.268742407533754e-06, "loss": 0.58, "step": 7331 }, { "epoch": 0.57, "grad_norm": 1.1120631463934354, "learning_rate": 8.26626773314986e-06, "loss": 0.5115, "step": 7332 }, { "epoch": 0.57, "grad_norm": 1.1895547958041968, "learning_rate": 8.263793168225425e-06, "loss": 0.5811, "step": 7333 }, { "epoch": 0.57, "grad_norm": 1.2021514124023913, "learning_rate": 8.261318712916685e-06, "loss": 0.5762, "step": 7334 }, { "epoch": 0.57, "grad_norm": 1.1191333284522993, "learning_rate": 8.258844367379866e-06, "loss": 0.4982, "step": 7335 }, { "epoch": 0.57, "grad_norm": 1.140360239733101, "learning_rate": 8.256370131771185e-06, "loss": 0.5017, "step": 7336 }, { "epoch": 0.57, "grad_norm": 1.246165641217594, "learning_rate": 8.253896006246858e-06, "loss": 0.5016, "step": 7337 }, { "epoch": 0.57, "grad_norm": 1.086722275641157, "learning_rate": 8.25142199096308e-06, "loss": 0.5661, "step": 7338 }, { "epoch": 0.57, "grad_norm": 1.1408019255240671, "learning_rate": 8.248948086076052e-06, "loss": 0.5342, "step": 7339 }, { "epoch": 0.57, "grad_norm": 1.1338437877922387, "learning_rate": 8.24647429174197e-06, "loss": 0.5049, "step": 7340 }, { "epoch": 0.57, "grad_norm": 1.1914391685065533, "learning_rate": 8.244000608117009e-06, "loss": 0.5201, "step": 7341 }, { "epoch": 0.57, "grad_norm": 1.178209199603273, "learning_rate": 8.241527035357354e-06, "loss": 0.5986, "step": 7342 }, { "epoch": 0.57, "grad_norm": 1.106167791296938, "learning_rate": 8.239053573619164e-06, "loss": 0.5394, "step": 7343 }, { "epoch": 0.57, "grad_norm": 1.0767330084479267, "learning_rate": 8.23658022305861e-06, "loss": 0.4816, "step": 7344 }, { "epoch": 0.57, "grad_norm": 1.1955369726101355, "learning_rate": 8.234106983831846e-06, "loss": 0.5693, "step": 7345 }, { "epoch": 0.57, "grad_norm": 1.1524646243046868, "learning_rate": 8.231633856095015e-06, "loss": 0.5273, "step": 7346 }, { "epoch": 0.57, "grad_norm": 1.2637895532261076, "learning_rate": 8.22916084000427e-06, "loss": 0.5234, "step": 7347 }, { "epoch": 0.57, "grad_norm": 1.1900154375629686, "learning_rate": 8.22668793571573e-06, "loss": 0.5517, "step": 7348 }, { "epoch": 0.57, "grad_norm": 1.1714373979501782, "learning_rate": 8.22421514338553e-06, "loss": 0.5775, "step": 7349 }, { "epoch": 0.57, "grad_norm": 1.0615243920512325, "learning_rate": 8.221742463169794e-06, "loss": 0.5222, "step": 7350 }, { "epoch": 0.57, "grad_norm": 1.2751697389898624, "learning_rate": 8.219269895224627e-06, "loss": 0.533, "step": 7351 }, { "epoch": 0.57, "grad_norm": 1.132269367371601, "learning_rate": 8.216797439706142e-06, "loss": 0.5235, "step": 7352 }, { "epoch": 0.57, "grad_norm": 1.1282529953811817, "learning_rate": 8.214325096770433e-06, "loss": 0.4858, "step": 7353 }, { "epoch": 0.57, "grad_norm": 1.1682322023204603, "learning_rate": 8.211852866573591e-06, "loss": 0.5311, "step": 7354 }, { "epoch": 0.57, "grad_norm": 1.2195835197630178, "learning_rate": 8.209380749271708e-06, "loss": 0.5723, "step": 7355 }, { "epoch": 0.57, "grad_norm": 1.2121316251885463, "learning_rate": 8.206908745020852e-06, "loss": 0.5515, "step": 7356 }, { "epoch": 0.57, "grad_norm": 1.3240574651302344, "learning_rate": 8.204436853977105e-06, "loss": 0.5828, "step": 7357 }, { "epoch": 0.57, "grad_norm": 1.1867401804191295, "learning_rate": 8.201965076296518e-06, "loss": 0.6011, "step": 7358 }, { "epoch": 0.57, "grad_norm": 1.1582329217123883, "learning_rate": 8.199493412135152e-06, "loss": 0.5356, "step": 7359 }, { "epoch": 0.57, "grad_norm": 1.2143649908842091, "learning_rate": 8.197021861649059e-06, "loss": 0.5619, "step": 7360 }, { "epoch": 0.57, "grad_norm": 1.2721781676289097, "learning_rate": 8.194550424994274e-06, "loss": 0.586, "step": 7361 }, { "epoch": 0.57, "grad_norm": 1.2765637737440574, "learning_rate": 8.192079102326842e-06, "loss": 0.5512, "step": 7362 }, { "epoch": 0.57, "grad_norm": 1.2384502885164446, "learning_rate": 8.189607893802779e-06, "loss": 0.5519, "step": 7363 }, { "epoch": 0.57, "grad_norm": 1.2799023309340236, "learning_rate": 8.187136799578111e-06, "loss": 0.6006, "step": 7364 }, { "epoch": 0.57, "grad_norm": 1.2247462340665136, "learning_rate": 8.18466581980885e-06, "loss": 0.566, "step": 7365 }, { "epoch": 0.57, "grad_norm": 1.1231214946337613, "learning_rate": 8.182194954651e-06, "loss": 0.5144, "step": 7366 }, { "epoch": 0.57, "grad_norm": 1.143971433206162, "learning_rate": 8.179724204260567e-06, "loss": 0.5299, "step": 7367 }, { "epoch": 0.57, "grad_norm": 1.1346617764409792, "learning_rate": 8.17725356879353e-06, "loss": 0.5151, "step": 7368 }, { "epoch": 0.57, "grad_norm": 1.131069763325533, "learning_rate": 8.174783048405882e-06, "loss": 0.548, "step": 7369 }, { "epoch": 0.57, "grad_norm": 1.220352781756787, "learning_rate": 8.172312643253597e-06, "loss": 0.5244, "step": 7370 }, { "epoch": 0.57, "grad_norm": 1.2574127699399982, "learning_rate": 8.169842353492644e-06, "loss": 0.5885, "step": 7371 }, { "epoch": 0.57, "grad_norm": 1.1841725360063617, "learning_rate": 8.167372179278988e-06, "loss": 0.5662, "step": 7372 }, { "epoch": 0.57, "grad_norm": 1.137280826622926, "learning_rate": 8.164902120768578e-06, "loss": 0.5198, "step": 7373 }, { "epoch": 0.57, "grad_norm": 1.263724324524932, "learning_rate": 8.162432178117365e-06, "loss": 0.6203, "step": 7374 }, { "epoch": 0.57, "grad_norm": 1.1550726695959108, "learning_rate": 8.159962351481291e-06, "loss": 0.5361, "step": 7375 }, { "epoch": 0.57, "grad_norm": 1.1247691871378398, "learning_rate": 8.157492641016285e-06, "loss": 0.5336, "step": 7376 }, { "epoch": 0.57, "grad_norm": 1.0661411165407342, "learning_rate": 8.15502304687828e-06, "loss": 0.4792, "step": 7377 }, { "epoch": 0.57, "grad_norm": 1.106380558391724, "learning_rate": 8.152553569223183e-06, "loss": 0.5188, "step": 7378 }, { "epoch": 0.57, "grad_norm": 1.3108831619440742, "learning_rate": 8.150084208206912e-06, "loss": 0.5872, "step": 7379 }, { "epoch": 0.57, "grad_norm": 1.1911717418571253, "learning_rate": 8.147614963985371e-06, "loss": 0.5382, "step": 7380 }, { "epoch": 0.57, "grad_norm": 1.1009492722898906, "learning_rate": 8.145145836714452e-06, "loss": 0.4933, "step": 7381 }, { "epoch": 0.57, "grad_norm": 1.2646844453729982, "learning_rate": 8.142676826550046e-06, "loss": 0.5811, "step": 7382 }, { "epoch": 0.57, "grad_norm": 1.1039165477447856, "learning_rate": 8.14020793364804e-06, "loss": 0.535, "step": 7383 }, { "epoch": 0.57, "grad_norm": 1.1592916227317223, "learning_rate": 8.137739158164297e-06, "loss": 0.5634, "step": 7384 }, { "epoch": 0.57, "grad_norm": 1.2085740682704509, "learning_rate": 8.135270500254692e-06, "loss": 0.5594, "step": 7385 }, { "epoch": 0.57, "grad_norm": 1.1477347578874182, "learning_rate": 8.132801960075077e-06, "loss": 0.5134, "step": 7386 }, { "epoch": 0.57, "grad_norm": 1.151745191391416, "learning_rate": 8.130333537781309e-06, "loss": 0.5551, "step": 7387 }, { "epoch": 0.57, "grad_norm": 1.1227198912567786, "learning_rate": 8.127865233529235e-06, "loss": 0.5144, "step": 7388 }, { "epoch": 0.57, "grad_norm": 1.147854196241342, "learning_rate": 8.125397047474684e-06, "loss": 0.5008, "step": 7389 }, { "epoch": 0.57, "grad_norm": 1.2012506722757004, "learning_rate": 8.122928979773491e-06, "loss": 0.5657, "step": 7390 }, { "epoch": 0.57, "grad_norm": 1.1194352166673986, "learning_rate": 8.120461030581474e-06, "loss": 0.4959, "step": 7391 }, { "epoch": 0.57, "grad_norm": 1.2512962772527103, "learning_rate": 8.117993200054449e-06, "loss": 0.563, "step": 7392 }, { "epoch": 0.57, "grad_norm": 1.107005261295845, "learning_rate": 8.115525488348227e-06, "loss": 0.4826, "step": 7393 }, { "epoch": 0.57, "grad_norm": 1.0240724064775895, "learning_rate": 8.1130578956186e-06, "loss": 0.4592, "step": 7394 }, { "epoch": 0.57, "grad_norm": 1.1889458938749113, "learning_rate": 8.110590422021365e-06, "loss": 0.5709, "step": 7395 }, { "epoch": 0.57, "grad_norm": 1.2442022335302863, "learning_rate": 8.108123067712302e-06, "loss": 0.5585, "step": 7396 }, { "epoch": 0.57, "grad_norm": 1.2719706412681977, "learning_rate": 8.105655832847193e-06, "loss": 0.5882, "step": 7397 }, { "epoch": 0.57, "grad_norm": 1.2631577355296888, "learning_rate": 8.103188717581808e-06, "loss": 0.5847, "step": 7398 }, { "epoch": 0.57, "grad_norm": 1.2090703043189124, "learning_rate": 8.100721722071899e-06, "loss": 0.5252, "step": 7399 }, { "epoch": 0.57, "grad_norm": 1.2898948063210043, "learning_rate": 8.09825484647323e-06, "loss": 0.5635, "step": 7400 }, { "epoch": 0.57, "grad_norm": 1.212812043376425, "learning_rate": 8.095788090941543e-06, "loss": 0.4894, "step": 7401 }, { "epoch": 0.57, "grad_norm": 1.1037216129303111, "learning_rate": 8.093321455632578e-06, "loss": 0.5261, "step": 7402 }, { "epoch": 0.57, "grad_norm": 1.217128751264944, "learning_rate": 8.09085494070207e-06, "loss": 0.5909, "step": 7403 }, { "epoch": 0.57, "grad_norm": 1.1675637860405172, "learning_rate": 8.088388546305737e-06, "loss": 0.5218, "step": 7404 }, { "epoch": 0.57, "grad_norm": 1.1547107761292843, "learning_rate": 8.085922272599297e-06, "loss": 0.4862, "step": 7405 }, { "epoch": 0.57, "grad_norm": 1.1765822410193867, "learning_rate": 8.08345611973846e-06, "loss": 0.5368, "step": 7406 }, { "epoch": 0.57, "grad_norm": 1.1904028497538757, "learning_rate": 8.080990087878925e-06, "loss": 0.5099, "step": 7407 }, { "epoch": 0.57, "grad_norm": 1.1265825161366, "learning_rate": 8.078524177176392e-06, "loss": 0.5165, "step": 7408 }, { "epoch": 0.57, "grad_norm": 1.108522759409965, "learning_rate": 8.076058387786536e-06, "loss": 0.4836, "step": 7409 }, { "epoch": 0.57, "grad_norm": 1.1494970299813894, "learning_rate": 8.073592719865041e-06, "loss": 0.4565, "step": 7410 }, { "epoch": 0.57, "grad_norm": 1.2419361843938974, "learning_rate": 8.071127173567576e-06, "loss": 0.573, "step": 7411 }, { "epoch": 0.58, "grad_norm": 1.1678633626386437, "learning_rate": 8.068661749049805e-06, "loss": 0.5478, "step": 7412 }, { "epoch": 0.58, "grad_norm": 1.2158832579714967, "learning_rate": 8.066196446467385e-06, "loss": 0.5682, "step": 7413 }, { "epoch": 0.58, "grad_norm": 1.144539946955945, "learning_rate": 8.063731265975955e-06, "loss": 0.58, "step": 7414 }, { "epoch": 0.58, "grad_norm": 1.2938475604290853, "learning_rate": 8.061266207731165e-06, "loss": 0.5421, "step": 7415 }, { "epoch": 0.58, "grad_norm": 1.2323920825192576, "learning_rate": 8.058801271888637e-06, "loss": 0.5627, "step": 7416 }, { "epoch": 0.58, "grad_norm": 1.2274845812231219, "learning_rate": 8.056336458604002e-06, "loss": 0.5575, "step": 7417 }, { "epoch": 0.58, "grad_norm": 1.1157593568738184, "learning_rate": 8.053871768032878e-06, "loss": 0.5078, "step": 7418 }, { "epoch": 0.58, "grad_norm": 1.1072647548406465, "learning_rate": 8.051407200330866e-06, "loss": 0.5665, "step": 7419 }, { "epoch": 0.58, "grad_norm": 1.1389622522517782, "learning_rate": 8.048942755653573e-06, "loss": 0.4904, "step": 7420 }, { "epoch": 0.58, "grad_norm": 1.1711855067977095, "learning_rate": 8.046478434156588e-06, "loss": 0.5653, "step": 7421 }, { "epoch": 0.58, "grad_norm": 1.287530902380584, "learning_rate": 8.044014235995496e-06, "loss": 0.5378, "step": 7422 }, { "epoch": 0.58, "grad_norm": 1.1900621680842285, "learning_rate": 8.041550161325884e-06, "loss": 0.5696, "step": 7423 }, { "epoch": 0.58, "grad_norm": 1.1822198630331715, "learning_rate": 8.039086210303308e-06, "loss": 0.5236, "step": 7424 }, { "epoch": 0.58, "grad_norm": 1.107481939715341, "learning_rate": 8.036622383083341e-06, "loss": 0.5087, "step": 7425 }, { "epoch": 0.58, "grad_norm": 1.188553693990787, "learning_rate": 8.034158679821529e-06, "loss": 0.5522, "step": 7426 }, { "epoch": 0.58, "grad_norm": 1.2020325593235863, "learning_rate": 8.031695100673423e-06, "loss": 0.5276, "step": 7427 }, { "epoch": 0.58, "grad_norm": 1.094103456333685, "learning_rate": 8.029231645794564e-06, "loss": 0.5193, "step": 7428 }, { "epoch": 0.58, "grad_norm": 1.1391752775899786, "learning_rate": 8.026768315340475e-06, "loss": 0.5384, "step": 7429 }, { "epoch": 0.58, "grad_norm": 1.1254403524061307, "learning_rate": 8.024305109466685e-06, "loss": 0.5018, "step": 7430 }, { "epoch": 0.58, "grad_norm": 1.3492063149573306, "learning_rate": 8.021842028328703e-06, "loss": 0.621, "step": 7431 }, { "epoch": 0.58, "grad_norm": 1.3184535466221496, "learning_rate": 8.01937907208204e-06, "loss": 0.5571, "step": 7432 }, { "epoch": 0.58, "grad_norm": 1.1071027669372246, "learning_rate": 8.016916240882202e-06, "loss": 0.5073, "step": 7433 }, { "epoch": 0.58, "grad_norm": 1.125009324776893, "learning_rate": 8.014453534884666e-06, "loss": 0.5359, "step": 7434 }, { "epoch": 0.58, "grad_norm": 1.1636805579817773, "learning_rate": 8.011990954244926e-06, "loss": 0.55, "step": 7435 }, { "epoch": 0.58, "grad_norm": 1.1802868867720506, "learning_rate": 8.00952849911845e-06, "loss": 0.4918, "step": 7436 }, { "epoch": 0.58, "grad_norm": 1.0579360664354809, "learning_rate": 8.007066169660711e-06, "loss": 0.5026, "step": 7437 }, { "epoch": 0.58, "grad_norm": 1.1517000113673141, "learning_rate": 8.004603966027167e-06, "loss": 0.5381, "step": 7438 }, { "epoch": 0.58, "grad_norm": 1.2140958391126289, "learning_rate": 8.002141888373274e-06, "loss": 0.6047, "step": 7439 }, { "epoch": 0.58, "grad_norm": 1.206806297697629, "learning_rate": 7.999679936854467e-06, "loss": 0.6038, "step": 7440 }, { "epoch": 0.58, "grad_norm": 1.123099682437124, "learning_rate": 7.997218111626186e-06, "loss": 0.5236, "step": 7441 }, { "epoch": 0.58, "grad_norm": 1.1292954668203181, "learning_rate": 7.994756412843857e-06, "loss": 0.5644, "step": 7442 }, { "epoch": 0.58, "grad_norm": 1.243385073545735, "learning_rate": 7.992294840662904e-06, "loss": 0.521, "step": 7443 }, { "epoch": 0.58, "grad_norm": 1.1395898066772119, "learning_rate": 7.989833395238736e-06, "loss": 0.5561, "step": 7444 }, { "epoch": 0.58, "grad_norm": 1.1119903575801624, "learning_rate": 7.987372076726758e-06, "loss": 0.5033, "step": 7445 }, { "epoch": 0.58, "grad_norm": 1.1777031484737517, "learning_rate": 7.984910885282361e-06, "loss": 0.604, "step": 7446 }, { "epoch": 0.58, "grad_norm": 1.1155423943206333, "learning_rate": 7.982449821060936e-06, "loss": 0.4984, "step": 7447 }, { "epoch": 0.58, "grad_norm": 1.1368127108932402, "learning_rate": 7.979988884217867e-06, "loss": 0.5662, "step": 7448 }, { "epoch": 0.58, "grad_norm": 1.2103154707521777, "learning_rate": 7.97752807490852e-06, "loss": 0.5269, "step": 7449 }, { "epoch": 0.58, "grad_norm": 1.2905908944932365, "learning_rate": 7.97506739328826e-06, "loss": 0.5872, "step": 7450 }, { "epoch": 0.58, "grad_norm": 1.1726942123581245, "learning_rate": 7.972606839512442e-06, "loss": 0.5305, "step": 7451 }, { "epoch": 0.58, "grad_norm": 1.2484401507104799, "learning_rate": 7.970146413736414e-06, "loss": 0.5875, "step": 7452 }, { "epoch": 0.58, "grad_norm": 1.1762299552774345, "learning_rate": 7.967686116115517e-06, "loss": 0.5538, "step": 7453 }, { "epoch": 0.58, "grad_norm": 1.3523709740149084, "learning_rate": 7.965225946805081e-06, "loss": 0.5547, "step": 7454 }, { "epoch": 0.58, "grad_norm": 1.1544494528065696, "learning_rate": 7.962765905960428e-06, "loss": 0.5179, "step": 7455 }, { "epoch": 0.58, "grad_norm": 1.1057632108033297, "learning_rate": 7.960305993736874e-06, "loss": 0.5098, "step": 7456 }, { "epoch": 0.58, "grad_norm": 1.2342755724301664, "learning_rate": 7.957846210289725e-06, "loss": 0.6316, "step": 7457 }, { "epoch": 0.58, "grad_norm": 1.2917100017210297, "learning_rate": 7.955386555774284e-06, "loss": 0.5408, "step": 7458 }, { "epoch": 0.58, "grad_norm": 1.2149208755788323, "learning_rate": 7.952927030345836e-06, "loss": 0.5383, "step": 7459 }, { "epoch": 0.58, "grad_norm": 1.2376940083869323, "learning_rate": 7.950467634159669e-06, "loss": 0.6077, "step": 7460 }, { "epoch": 0.58, "grad_norm": 1.34088503576488, "learning_rate": 7.94800836737105e-06, "loss": 0.5636, "step": 7461 }, { "epoch": 0.58, "grad_norm": 1.1595330410814864, "learning_rate": 7.945549230135251e-06, "loss": 0.5838, "step": 7462 }, { "epoch": 0.58, "grad_norm": 1.1809877215465043, "learning_rate": 7.94309022260753e-06, "loss": 0.5433, "step": 7463 }, { "epoch": 0.58, "grad_norm": 1.119626989749777, "learning_rate": 7.940631344943137e-06, "loss": 0.4819, "step": 7464 }, { "epoch": 0.58, "grad_norm": 1.2423304349607722, "learning_rate": 7.93817259729731e-06, "loss": 0.5268, "step": 7465 }, { "epoch": 0.58, "grad_norm": 1.1871286614706904, "learning_rate": 7.935713979825285e-06, "loss": 0.5322, "step": 7466 }, { "epoch": 0.58, "grad_norm": 1.2254199884536756, "learning_rate": 7.933255492682287e-06, "loss": 0.5376, "step": 7467 }, { "epoch": 0.58, "grad_norm": 1.0613239736128324, "learning_rate": 7.93079713602353e-06, "loss": 0.4779, "step": 7468 }, { "epoch": 0.58, "grad_norm": 1.1185703389062083, "learning_rate": 7.92833891000423e-06, "loss": 0.5153, "step": 7469 }, { "epoch": 0.58, "grad_norm": 1.1915358175896347, "learning_rate": 7.925880814779583e-06, "loss": 0.5657, "step": 7470 }, { "epoch": 0.58, "grad_norm": 1.0711968103154779, "learning_rate": 7.923422850504777e-06, "loss": 0.5555, "step": 7471 }, { "epoch": 0.58, "grad_norm": 1.2811896612334384, "learning_rate": 7.920965017335002e-06, "loss": 0.5162, "step": 7472 }, { "epoch": 0.58, "grad_norm": 1.2086162345584948, "learning_rate": 7.918507315425432e-06, "loss": 0.5665, "step": 7473 }, { "epoch": 0.58, "grad_norm": 1.1265145279700568, "learning_rate": 7.916049744931236e-06, "loss": 0.5498, "step": 7474 }, { "epoch": 0.58, "grad_norm": 1.1811039486164794, "learning_rate": 7.91359230600757e-06, "loss": 0.5302, "step": 7475 }, { "epoch": 0.58, "grad_norm": 1.0706571594859717, "learning_rate": 7.911134998809585e-06, "loss": 0.521, "step": 7476 }, { "epoch": 0.58, "grad_norm": 1.0910320298504563, "learning_rate": 7.908677823492424e-06, "loss": 0.5727, "step": 7477 }, { "epoch": 0.58, "grad_norm": 1.159279694457198, "learning_rate": 7.906220780211225e-06, "loss": 0.5083, "step": 7478 }, { "epoch": 0.58, "grad_norm": 1.2002011726552926, "learning_rate": 7.90376386912111e-06, "loss": 0.5253, "step": 7479 }, { "epoch": 0.58, "grad_norm": 1.268780953165951, "learning_rate": 7.901307090377197e-06, "loss": 0.5538, "step": 7480 }, { "epoch": 0.58, "grad_norm": 1.1283256859475594, "learning_rate": 7.898850444134592e-06, "loss": 0.5385, "step": 7481 }, { "epoch": 0.58, "grad_norm": 1.1390025474982035, "learning_rate": 7.8963939305484e-06, "loss": 0.5325, "step": 7482 }, { "epoch": 0.58, "grad_norm": 1.0603178841397687, "learning_rate": 7.893937549773716e-06, "loss": 0.5175, "step": 7483 }, { "epoch": 0.58, "grad_norm": 1.0824843466774459, "learning_rate": 7.891481301965618e-06, "loss": 0.5412, "step": 7484 }, { "epoch": 0.58, "grad_norm": 1.3265900661807983, "learning_rate": 7.889025187279185e-06, "loss": 0.5421, "step": 7485 }, { "epoch": 0.58, "grad_norm": 1.2074165099761256, "learning_rate": 7.886569205869481e-06, "loss": 0.5717, "step": 7486 }, { "epoch": 0.58, "grad_norm": 1.0868792395484372, "learning_rate": 7.884113357891566e-06, "loss": 0.5636, "step": 7487 }, { "epoch": 0.58, "grad_norm": 1.2002539624254245, "learning_rate": 7.881657643500495e-06, "loss": 0.6008, "step": 7488 }, { "epoch": 0.58, "grad_norm": 1.2799451277564378, "learning_rate": 7.879202062851303e-06, "loss": 0.5787, "step": 7489 }, { "epoch": 0.58, "grad_norm": 1.1200855786121509, "learning_rate": 7.876746616099031e-06, "loss": 0.4872, "step": 7490 }, { "epoch": 0.58, "grad_norm": 1.0010039535124273, "learning_rate": 7.874291303398696e-06, "loss": 0.5124, "step": 7491 }, { "epoch": 0.58, "grad_norm": 1.03330788324661, "learning_rate": 7.871836124905316e-06, "loss": 0.4874, "step": 7492 }, { "epoch": 0.58, "grad_norm": 1.1881740062706498, "learning_rate": 7.869381080773906e-06, "loss": 0.477, "step": 7493 }, { "epoch": 0.58, "grad_norm": 1.1375168558066568, "learning_rate": 7.866926171159458e-06, "loss": 0.5021, "step": 7494 }, { "epoch": 0.58, "grad_norm": 1.023417232400103, "learning_rate": 7.86447139621697e-06, "loss": 0.5129, "step": 7495 }, { "epoch": 0.58, "grad_norm": 1.0473909387738125, "learning_rate": 7.862016756101417e-06, "loss": 0.5049, "step": 7496 }, { "epoch": 0.58, "grad_norm": 1.2152867138246457, "learning_rate": 7.859562250967776e-06, "loss": 0.5431, "step": 7497 }, { "epoch": 0.58, "grad_norm": 1.178171611238214, "learning_rate": 7.857107880971015e-06, "loss": 0.529, "step": 7498 }, { "epoch": 0.58, "grad_norm": 1.1518408243591383, "learning_rate": 7.854653646266089e-06, "loss": 0.496, "step": 7499 }, { "epoch": 0.58, "grad_norm": 1.138098596961056, "learning_rate": 7.85219954700795e-06, "loss": 0.5358, "step": 7500 }, { "epoch": 0.58, "grad_norm": 1.1159691736516886, "learning_rate": 7.84974558335153e-06, "loss": 0.4809, "step": 7501 }, { "epoch": 0.58, "grad_norm": 1.2103259603597454, "learning_rate": 7.847291755451766e-06, "loss": 0.5712, "step": 7502 }, { "epoch": 0.58, "grad_norm": 1.2463382493036335, "learning_rate": 7.844838063463582e-06, "loss": 0.5461, "step": 7503 }, { "epoch": 0.58, "grad_norm": 1.197938216748105, "learning_rate": 7.842384507541889e-06, "loss": 0.549, "step": 7504 }, { "epoch": 0.58, "grad_norm": 1.303995005773564, "learning_rate": 7.839931087841595e-06, "loss": 0.569, "step": 7505 }, { "epoch": 0.58, "grad_norm": 1.2191946734542334, "learning_rate": 7.837477804517595e-06, "loss": 0.5447, "step": 7506 }, { "epoch": 0.58, "grad_norm": 1.103673440906869, "learning_rate": 7.835024657724778e-06, "loss": 0.5132, "step": 7507 }, { "epoch": 0.58, "grad_norm": 1.2174770848117127, "learning_rate": 7.832571647618024e-06, "loss": 0.5735, "step": 7508 }, { "epoch": 0.58, "grad_norm": 1.1819323471253134, "learning_rate": 7.830118774352205e-06, "loss": 0.513, "step": 7509 }, { "epoch": 0.58, "grad_norm": 1.2180005238849785, "learning_rate": 7.827666038082185e-06, "loss": 0.5557, "step": 7510 }, { "epoch": 0.58, "grad_norm": 1.1654775326795914, "learning_rate": 7.825213438962812e-06, "loss": 0.4963, "step": 7511 }, { "epoch": 0.58, "grad_norm": 1.0044061032353246, "learning_rate": 7.822760977148936e-06, "loss": 0.4914, "step": 7512 }, { "epoch": 0.58, "grad_norm": 1.105309734432696, "learning_rate": 7.820308652795393e-06, "loss": 0.5098, "step": 7513 }, { "epoch": 0.58, "grad_norm": 1.0785365217672074, "learning_rate": 7.81785646605701e-06, "loss": 0.5751, "step": 7514 }, { "epoch": 0.58, "grad_norm": 1.2183900937497338, "learning_rate": 7.81540441708861e-06, "loss": 0.4992, "step": 7515 }, { "epoch": 0.58, "grad_norm": 1.2433800880463783, "learning_rate": 7.812952506044996e-06, "loss": 0.5612, "step": 7516 }, { "epoch": 0.58, "grad_norm": 1.1851945884704642, "learning_rate": 7.810500733080974e-06, "loss": 0.5334, "step": 7517 }, { "epoch": 0.58, "grad_norm": 1.103708868070359, "learning_rate": 7.80804909835134e-06, "loss": 0.5132, "step": 7518 }, { "epoch": 0.58, "grad_norm": 1.2963949486997477, "learning_rate": 7.805597602010873e-06, "loss": 0.6022, "step": 7519 }, { "epoch": 0.58, "grad_norm": 1.2372414829456624, "learning_rate": 7.803146244214355e-06, "loss": 0.567, "step": 7520 }, { "epoch": 0.58, "grad_norm": 1.1047388369705629, "learning_rate": 7.800695025116546e-06, "loss": 0.4538, "step": 7521 }, { "epoch": 0.58, "grad_norm": 1.1197263240231736, "learning_rate": 7.798243944872204e-06, "loss": 0.5379, "step": 7522 }, { "epoch": 0.58, "grad_norm": 1.0979699063406962, "learning_rate": 7.795793003636085e-06, "loss": 0.5311, "step": 7523 }, { "epoch": 0.58, "grad_norm": 1.167235025972854, "learning_rate": 7.793342201562923e-06, "loss": 0.5486, "step": 7524 }, { "epoch": 0.58, "grad_norm": 1.1880808965452814, "learning_rate": 7.790891538807459e-06, "loss": 0.5442, "step": 7525 }, { "epoch": 0.58, "grad_norm": 1.1404507125011358, "learning_rate": 7.788441015524403e-06, "loss": 0.5421, "step": 7526 }, { "epoch": 0.58, "grad_norm": 1.1725054761518772, "learning_rate": 7.785990631868478e-06, "loss": 0.5171, "step": 7527 }, { "epoch": 0.58, "grad_norm": 1.2991012400819353, "learning_rate": 7.783540387994387e-06, "loss": 0.5217, "step": 7528 }, { "epoch": 0.58, "grad_norm": 1.1395736970557997, "learning_rate": 7.781090284056827e-06, "loss": 0.54, "step": 7529 }, { "epoch": 0.58, "grad_norm": 1.1179872864493376, "learning_rate": 7.778640320210487e-06, "loss": 0.4949, "step": 7530 }, { "epoch": 0.58, "grad_norm": 1.1500296423035712, "learning_rate": 7.776190496610043e-06, "loss": 0.5148, "step": 7531 }, { "epoch": 0.58, "grad_norm": 1.2500664216514035, "learning_rate": 7.773740813410165e-06, "loss": 0.587, "step": 7532 }, { "epoch": 0.58, "grad_norm": 1.1721832887130992, "learning_rate": 7.771291270765518e-06, "loss": 0.4816, "step": 7533 }, { "epoch": 0.58, "grad_norm": 1.199138278311463, "learning_rate": 7.768841868830747e-06, "loss": 0.524, "step": 7534 }, { "epoch": 0.58, "grad_norm": 1.2628818973011953, "learning_rate": 7.766392607760508e-06, "loss": 0.5892, "step": 7535 }, { "epoch": 0.58, "grad_norm": 1.1327899799081569, "learning_rate": 7.76394348770942e-06, "loss": 0.4763, "step": 7536 }, { "epoch": 0.58, "grad_norm": 1.068678618160993, "learning_rate": 7.76149450883212e-06, "loss": 0.5075, "step": 7537 }, { "epoch": 0.58, "grad_norm": 1.143850443030654, "learning_rate": 7.759045671283219e-06, "loss": 0.5496, "step": 7538 }, { "epoch": 0.58, "grad_norm": 1.1350588920203621, "learning_rate": 7.756596975217327e-06, "loss": 0.5346, "step": 7539 }, { "epoch": 0.58, "grad_norm": 1.1705322201973778, "learning_rate": 7.754148420789047e-06, "loss": 0.5059, "step": 7540 }, { "epoch": 0.59, "grad_norm": 1.2948119518177412, "learning_rate": 7.751700008152959e-06, "loss": 0.578, "step": 7541 }, { "epoch": 0.59, "grad_norm": 1.0714561867561079, "learning_rate": 7.749251737463648e-06, "loss": 0.5123, "step": 7542 }, { "epoch": 0.59, "grad_norm": 1.2774052503053566, "learning_rate": 7.74680360887569e-06, "loss": 0.5184, "step": 7543 }, { "epoch": 0.59, "grad_norm": 1.1999482163541297, "learning_rate": 7.744355622543643e-06, "loss": 0.5446, "step": 7544 }, { "epoch": 0.59, "grad_norm": 1.3017220517259611, "learning_rate": 7.741907778622064e-06, "loss": 0.578, "step": 7545 }, { "epoch": 0.59, "grad_norm": 1.0748018104255457, "learning_rate": 7.739460077265502e-06, "loss": 0.514, "step": 7546 }, { "epoch": 0.59, "grad_norm": 1.2338023245555976, "learning_rate": 7.737012518628482e-06, "loss": 0.4957, "step": 7547 }, { "epoch": 0.59, "grad_norm": 1.218626602968466, "learning_rate": 7.73456510286554e-06, "loss": 0.5326, "step": 7548 }, { "epoch": 0.59, "grad_norm": 1.3630973014133738, "learning_rate": 7.732117830131189e-06, "loss": 0.5607, "step": 7549 }, { "epoch": 0.59, "grad_norm": 1.120810496205146, "learning_rate": 7.72967070057994e-06, "loss": 0.5164, "step": 7550 }, { "epoch": 0.59, "grad_norm": 1.2480994558227565, "learning_rate": 7.727223714366299e-06, "loss": 0.553, "step": 7551 }, { "epoch": 0.59, "grad_norm": 1.2262898524785526, "learning_rate": 7.724776871644745e-06, "loss": 0.5761, "step": 7552 }, { "epoch": 0.59, "grad_norm": 1.1602232428837291, "learning_rate": 7.722330172569767e-06, "loss": 0.5497, "step": 7553 }, { "epoch": 0.59, "grad_norm": 1.2029902890575266, "learning_rate": 7.719883617295835e-06, "loss": 0.5727, "step": 7554 }, { "epoch": 0.59, "grad_norm": 1.2652610443875372, "learning_rate": 7.717437205977414e-06, "loss": 0.5182, "step": 7555 }, { "epoch": 0.59, "grad_norm": 1.1500395415598474, "learning_rate": 7.714990938768964e-06, "loss": 0.5374, "step": 7556 }, { "epoch": 0.59, "grad_norm": 1.1985369803906565, "learning_rate": 7.712544815824921e-06, "loss": 0.5478, "step": 7557 }, { "epoch": 0.59, "grad_norm": 1.1611700284622768, "learning_rate": 7.710098837299726e-06, "loss": 0.5886, "step": 7558 }, { "epoch": 0.59, "grad_norm": 1.1845211009396022, "learning_rate": 7.707653003347805e-06, "loss": 0.5464, "step": 7559 }, { "epoch": 0.59, "grad_norm": 1.322771434871977, "learning_rate": 7.705207314123577e-06, "loss": 0.6593, "step": 7560 }, { "epoch": 0.59, "grad_norm": 1.111559154986994, "learning_rate": 7.702761769781454e-06, "loss": 0.4898, "step": 7561 }, { "epoch": 0.59, "grad_norm": 1.1570569392925638, "learning_rate": 7.700316370475828e-06, "loss": 0.4695, "step": 7562 }, { "epoch": 0.59, "grad_norm": 1.2980350224071178, "learning_rate": 7.697871116361099e-06, "loss": 0.5757, "step": 7563 }, { "epoch": 0.59, "grad_norm": 1.1888964122215295, "learning_rate": 7.69542600759164e-06, "loss": 0.5171, "step": 7564 }, { "epoch": 0.59, "grad_norm": 1.1658745812969207, "learning_rate": 7.692981044321826e-06, "loss": 0.5855, "step": 7565 }, { "epoch": 0.59, "grad_norm": 1.2530777234983987, "learning_rate": 7.690536226706028e-06, "loss": 0.5078, "step": 7566 }, { "epoch": 0.59, "grad_norm": 1.2676092534879186, "learning_rate": 7.688091554898587e-06, "loss": 0.5521, "step": 7567 }, { "epoch": 0.59, "grad_norm": 1.2287932599234257, "learning_rate": 7.685647029053857e-06, "loss": 0.5336, "step": 7568 }, { "epoch": 0.59, "grad_norm": 1.2328067909949798, "learning_rate": 7.683202649326169e-06, "loss": 0.5133, "step": 7569 }, { "epoch": 0.59, "grad_norm": 1.1685160499649005, "learning_rate": 7.68075841586985e-06, "loss": 0.5128, "step": 7570 }, { "epoch": 0.59, "grad_norm": 1.1003401447128849, "learning_rate": 7.678314328839223e-06, "loss": 0.5109, "step": 7571 }, { "epoch": 0.59, "grad_norm": 1.2123902005690965, "learning_rate": 7.675870388388586e-06, "loss": 0.5613, "step": 7572 }, { "epoch": 0.59, "grad_norm": 1.1539914522631276, "learning_rate": 7.673426594672243e-06, "loss": 0.4736, "step": 7573 }, { "epoch": 0.59, "grad_norm": 1.170372622973876, "learning_rate": 7.670982947844482e-06, "loss": 0.522, "step": 7574 }, { "epoch": 0.59, "grad_norm": 1.0466722320501818, "learning_rate": 7.668539448059585e-06, "loss": 0.4948, "step": 7575 }, { "epoch": 0.59, "grad_norm": 1.0414242971105694, "learning_rate": 7.666096095471823e-06, "loss": 0.4884, "step": 7576 }, { "epoch": 0.59, "grad_norm": 1.192964429166772, "learning_rate": 7.663652890235452e-06, "loss": 0.5673, "step": 7577 }, { "epoch": 0.59, "grad_norm": 1.1525280304021817, "learning_rate": 7.661209832504731e-06, "loss": 0.5482, "step": 7578 }, { "epoch": 0.59, "grad_norm": 1.1793854181408703, "learning_rate": 7.658766922433898e-06, "loss": 0.499, "step": 7579 }, { "epoch": 0.59, "grad_norm": 1.2662259311779778, "learning_rate": 7.656324160177187e-06, "loss": 0.52, "step": 7580 }, { "epoch": 0.59, "grad_norm": 1.2434548205031937, "learning_rate": 7.653881545888829e-06, "loss": 0.5092, "step": 7581 }, { "epoch": 0.59, "grad_norm": 1.2173877830410018, "learning_rate": 7.65143907972303e-06, "loss": 0.5567, "step": 7582 }, { "epoch": 0.59, "grad_norm": 1.2011819978613083, "learning_rate": 7.648996761834e-06, "loss": 0.5432, "step": 7583 }, { "epoch": 0.59, "grad_norm": 1.1359903945046632, "learning_rate": 7.64655459237593e-06, "loss": 0.5661, "step": 7584 }, { "epoch": 0.59, "grad_norm": 1.2234511548835636, "learning_rate": 7.644112571503014e-06, "loss": 0.5574, "step": 7585 }, { "epoch": 0.59, "grad_norm": 1.1583801443031645, "learning_rate": 7.641670699369429e-06, "loss": 0.5709, "step": 7586 }, { "epoch": 0.59, "grad_norm": 1.1141114468943734, "learning_rate": 7.639228976129337e-06, "loss": 0.5244, "step": 7587 }, { "epoch": 0.59, "grad_norm": 1.1627786199918313, "learning_rate": 7.636787401936899e-06, "loss": 0.5644, "step": 7588 }, { "epoch": 0.59, "grad_norm": 1.124749897600938, "learning_rate": 7.634345976946265e-06, "loss": 0.5353, "step": 7589 }, { "epoch": 0.59, "grad_norm": 1.0454095860942514, "learning_rate": 7.631904701311574e-06, "loss": 0.4875, "step": 7590 }, { "epoch": 0.59, "grad_norm": 1.0700293953146631, "learning_rate": 7.62946357518696e-06, "loss": 0.5329, "step": 7591 }, { "epoch": 0.59, "grad_norm": 1.1372642650099527, "learning_rate": 7.627022598726539e-06, "loss": 0.5555, "step": 7592 }, { "epoch": 0.59, "grad_norm": 1.0988647758590586, "learning_rate": 7.624581772084425e-06, "loss": 0.4828, "step": 7593 }, { "epoch": 0.59, "grad_norm": 1.1854360110688413, "learning_rate": 7.622141095414717e-06, "loss": 0.5528, "step": 7594 }, { "epoch": 0.59, "grad_norm": 1.0399068399115785, "learning_rate": 7.619700568871511e-06, "loss": 0.4943, "step": 7595 }, { "epoch": 0.59, "grad_norm": 1.2005507933656396, "learning_rate": 7.617260192608892e-06, "loss": 0.5032, "step": 7596 }, { "epoch": 0.59, "grad_norm": 1.2502070732260793, "learning_rate": 7.614819966780926e-06, "loss": 0.5761, "step": 7597 }, { "epoch": 0.59, "grad_norm": 1.2954981285502771, "learning_rate": 7.6123798915416845e-06, "loss": 0.5198, "step": 7598 }, { "epoch": 0.59, "grad_norm": 1.1211739753967689, "learning_rate": 7.609939967045217e-06, "loss": 0.5431, "step": 7599 }, { "epoch": 0.59, "grad_norm": 1.1132347766478696, "learning_rate": 7.60750019344557e-06, "loss": 0.5113, "step": 7600 }, { "epoch": 0.59, "grad_norm": 1.0685703550461578, "learning_rate": 7.605060570896781e-06, "loss": 0.5003, "step": 7601 }, { "epoch": 0.59, "grad_norm": 1.253303549358541, "learning_rate": 7.602621099552874e-06, "loss": 0.5236, "step": 7602 }, { "epoch": 0.59, "grad_norm": 1.1137780937992867, "learning_rate": 7.600181779567867e-06, "loss": 0.5163, "step": 7603 }, { "epoch": 0.59, "grad_norm": 1.2922049856790265, "learning_rate": 7.597742611095762e-06, "loss": 0.5713, "step": 7604 }, { "epoch": 0.59, "grad_norm": 1.0113709194283491, "learning_rate": 7.595303594290562e-06, "loss": 0.509, "step": 7605 }, { "epoch": 0.59, "grad_norm": 1.2947273395954355, "learning_rate": 7.592864729306253e-06, "loss": 0.5899, "step": 7606 }, { "epoch": 0.59, "grad_norm": 1.1870841753871757, "learning_rate": 7.5904260162968145e-06, "loss": 0.5193, "step": 7607 }, { "epoch": 0.59, "grad_norm": 1.249563808629943, "learning_rate": 7.5879874554162124e-06, "loss": 0.5446, "step": 7608 }, { "epoch": 0.59, "grad_norm": 1.1636638598823918, "learning_rate": 7.585549046818405e-06, "loss": 0.5522, "step": 7609 }, { "epoch": 0.59, "grad_norm": 1.1918571244386105, "learning_rate": 7.583110790657342e-06, "loss": 0.5686, "step": 7610 }, { "epoch": 0.59, "grad_norm": 1.2336789353609325, "learning_rate": 7.580672687086967e-06, "loss": 0.5429, "step": 7611 }, { "epoch": 0.59, "grad_norm": 1.1284992579946584, "learning_rate": 7.578234736261208e-06, "loss": 0.4427, "step": 7612 }, { "epoch": 0.59, "grad_norm": 1.1799169658282695, "learning_rate": 7.575796938333986e-06, "loss": 0.5426, "step": 7613 }, { "epoch": 0.59, "grad_norm": 1.1395142254746995, "learning_rate": 7.573359293459206e-06, "loss": 0.4955, "step": 7614 }, { "epoch": 0.59, "grad_norm": 1.0797373008007733, "learning_rate": 7.5709218017907734e-06, "loss": 0.4721, "step": 7615 }, { "epoch": 0.59, "grad_norm": 1.157151824211684, "learning_rate": 7.568484463482584e-06, "loss": 0.5152, "step": 7616 }, { "epoch": 0.59, "grad_norm": 1.2051507318964612, "learning_rate": 7.566047278688514e-06, "loss": 0.5205, "step": 7617 }, { "epoch": 0.59, "grad_norm": 1.1926528162577588, "learning_rate": 7.563610247562437e-06, "loss": 0.5057, "step": 7618 }, { "epoch": 0.59, "grad_norm": 1.1477989964946236, "learning_rate": 7.561173370258215e-06, "loss": 0.5426, "step": 7619 }, { "epoch": 0.59, "grad_norm": 1.160051231897204, "learning_rate": 7.558736646929699e-06, "loss": 0.5412, "step": 7620 }, { "epoch": 0.59, "grad_norm": 1.2793791067470277, "learning_rate": 7.556300077730735e-06, "loss": 0.5875, "step": 7621 }, { "epoch": 0.59, "grad_norm": 1.1494886816495486, "learning_rate": 7.553863662815156e-06, "loss": 0.5226, "step": 7622 }, { "epoch": 0.59, "grad_norm": 1.1578831866021364, "learning_rate": 7.551427402336784e-06, "loss": 0.4992, "step": 7623 }, { "epoch": 0.59, "grad_norm": 1.2472773942353286, "learning_rate": 7.548991296449431e-06, "loss": 0.547, "step": 7624 }, { "epoch": 0.59, "grad_norm": 1.1410487707247865, "learning_rate": 7.546555345306904e-06, "loss": 0.5297, "step": 7625 }, { "epoch": 0.59, "grad_norm": 1.2668888704418546, "learning_rate": 7.544119549062998e-06, "loss": 0.5275, "step": 7626 }, { "epoch": 0.59, "grad_norm": 1.3520743663159378, "learning_rate": 7.541683907871494e-06, "loss": 0.5752, "step": 7627 }, { "epoch": 0.59, "grad_norm": 1.1963845724787652, "learning_rate": 7.539248421886169e-06, "loss": 0.5619, "step": 7628 }, { "epoch": 0.59, "grad_norm": 1.2333444566912413, "learning_rate": 7.536813091260786e-06, "loss": 0.5273, "step": 7629 }, { "epoch": 0.59, "grad_norm": 1.1379288462365693, "learning_rate": 7.5343779161491e-06, "loss": 0.5302, "step": 7630 }, { "epoch": 0.59, "grad_norm": 1.259515639940586, "learning_rate": 7.531942896704859e-06, "loss": 0.5402, "step": 7631 }, { "epoch": 0.59, "grad_norm": 1.1645478126600004, "learning_rate": 7.529508033081796e-06, "loss": 0.5634, "step": 7632 }, { "epoch": 0.59, "grad_norm": 1.154329870691558, "learning_rate": 7.5270733254336385e-06, "loss": 0.5316, "step": 7633 }, { "epoch": 0.59, "grad_norm": 1.1067942298959335, "learning_rate": 7.524638773914097e-06, "loss": 0.5262, "step": 7634 }, { "epoch": 0.59, "grad_norm": 1.1012922455979763, "learning_rate": 7.522204378676882e-06, "loss": 0.4972, "step": 7635 }, { "epoch": 0.59, "grad_norm": 1.127135052202406, "learning_rate": 7.5197701398756895e-06, "loss": 0.5698, "step": 7636 }, { "epoch": 0.59, "grad_norm": 1.0509799925318029, "learning_rate": 7.517336057664204e-06, "loss": 0.434, "step": 7637 }, { "epoch": 0.59, "grad_norm": 1.13333891114564, "learning_rate": 7.514902132196104e-06, "loss": 0.4974, "step": 7638 }, { "epoch": 0.59, "grad_norm": 1.1210752479010047, "learning_rate": 7.5124683636250495e-06, "loss": 0.4977, "step": 7639 }, { "epoch": 0.59, "grad_norm": 1.102641125424338, "learning_rate": 7.5100347521047025e-06, "loss": 0.4417, "step": 7640 }, { "epoch": 0.59, "grad_norm": 1.1227709091132043, "learning_rate": 7.5076012977887095e-06, "loss": 0.5072, "step": 7641 }, { "epoch": 0.59, "grad_norm": 1.1782862950009478, "learning_rate": 7.505168000830708e-06, "loss": 0.5614, "step": 7642 }, { "epoch": 0.59, "grad_norm": 1.1371494276985863, "learning_rate": 7.502734861384319e-06, "loss": 0.5376, "step": 7643 }, { "epoch": 0.59, "grad_norm": 1.2617740678028007, "learning_rate": 7.500301879603161e-06, "loss": 0.5574, "step": 7644 }, { "epoch": 0.59, "grad_norm": 1.2344276803279728, "learning_rate": 7.497869055640843e-06, "loss": 0.5284, "step": 7645 }, { "epoch": 0.59, "grad_norm": 1.1017987694555291, "learning_rate": 7.495436389650962e-06, "loss": 0.5071, "step": 7646 }, { "epoch": 0.59, "grad_norm": 1.2738314996164046, "learning_rate": 7.493003881787106e-06, "loss": 0.5595, "step": 7647 }, { "epoch": 0.59, "grad_norm": 1.0699546382771965, "learning_rate": 7.490571532202846e-06, "loss": 0.4724, "step": 7648 }, { "epoch": 0.59, "grad_norm": 1.2542344373893626, "learning_rate": 7.488139341051751e-06, "loss": 0.5412, "step": 7649 }, { "epoch": 0.59, "grad_norm": 1.2142250472629428, "learning_rate": 7.48570730848738e-06, "loss": 0.5147, "step": 7650 }, { "epoch": 0.59, "grad_norm": 1.1245022838360708, "learning_rate": 7.48327543466328e-06, "loss": 0.5037, "step": 7651 }, { "epoch": 0.59, "grad_norm": 1.0819864091636586, "learning_rate": 7.480843719732989e-06, "loss": 0.4783, "step": 7652 }, { "epoch": 0.59, "grad_norm": 1.2437783377021394, "learning_rate": 7.478412163850026e-06, "loss": 0.511, "step": 7653 }, { "epoch": 0.59, "grad_norm": 1.213420758206791, "learning_rate": 7.475980767167914e-06, "loss": 0.5529, "step": 7654 }, { "epoch": 0.59, "grad_norm": 1.102253690267413, "learning_rate": 7.473549529840157e-06, "loss": 0.4977, "step": 7655 }, { "epoch": 0.59, "grad_norm": 1.2417763563430688, "learning_rate": 7.471118452020256e-06, "loss": 0.576, "step": 7656 }, { "epoch": 0.59, "grad_norm": 1.2196627891123033, "learning_rate": 7.4686875338616914e-06, "loss": 0.5549, "step": 7657 }, { "epoch": 0.59, "grad_norm": 1.1907378275920073, "learning_rate": 7.466256775517948e-06, "loss": 0.547, "step": 7658 }, { "epoch": 0.59, "grad_norm": 1.1468677302241865, "learning_rate": 7.463826177142483e-06, "loss": 0.4931, "step": 7659 }, { "epoch": 0.59, "grad_norm": 1.1262522721347055, "learning_rate": 7.461395738888755e-06, "loss": 0.4846, "step": 7660 }, { "epoch": 0.59, "grad_norm": 1.1475780151375445, "learning_rate": 7.458965460910214e-06, "loss": 0.5383, "step": 7661 }, { "epoch": 0.59, "grad_norm": 1.1554937209087068, "learning_rate": 7.456535343360292e-06, "loss": 0.5361, "step": 7662 }, { "epoch": 0.59, "grad_norm": 1.028826727433479, "learning_rate": 7.454105386392419e-06, "loss": 0.4948, "step": 7663 }, { "epoch": 0.59, "grad_norm": 1.1928663468393321, "learning_rate": 7.451675590160006e-06, "loss": 0.5492, "step": 7664 }, { "epoch": 0.59, "grad_norm": 1.2017612407008635, "learning_rate": 7.449245954816459e-06, "loss": 0.5441, "step": 7665 }, { "epoch": 0.59, "grad_norm": 1.0750432959420864, "learning_rate": 7.446816480515177e-06, "loss": 0.5057, "step": 7666 }, { "epoch": 0.59, "grad_norm": 1.1238838593018492, "learning_rate": 7.444387167409541e-06, "loss": 0.4991, "step": 7667 }, { "epoch": 0.59, "grad_norm": 1.2414968237895796, "learning_rate": 7.441958015652934e-06, "loss": 0.5342, "step": 7668 }, { "epoch": 0.59, "grad_norm": 1.188644660590536, "learning_rate": 7.4395290253987105e-06, "loss": 0.5184, "step": 7669 }, { "epoch": 0.6, "grad_norm": 1.170451762287841, "learning_rate": 7.43710019680023e-06, "loss": 0.5241, "step": 7670 }, { "epoch": 0.6, "grad_norm": 1.2056543589290634, "learning_rate": 7.434671530010839e-06, "loss": 0.5559, "step": 7671 }, { "epoch": 0.6, "grad_norm": 1.2776988524557864, "learning_rate": 7.4322430251838674e-06, "loss": 0.4768, "step": 7672 }, { "epoch": 0.6, "grad_norm": 1.2197474286239363, "learning_rate": 7.4298146824726445e-06, "loss": 0.5505, "step": 7673 }, { "epoch": 0.6, "grad_norm": 1.1540999656068396, "learning_rate": 7.427386502030478e-06, "loss": 0.4792, "step": 7674 }, { "epoch": 0.6, "grad_norm": 1.1754700004776857, "learning_rate": 7.424958484010675e-06, "loss": 0.5453, "step": 7675 }, { "epoch": 0.6, "grad_norm": 1.2249731956196317, "learning_rate": 7.42253062856653e-06, "loss": 0.526, "step": 7676 }, { "epoch": 0.6, "grad_norm": 1.231689816069384, "learning_rate": 7.420102935851324e-06, "loss": 0.5738, "step": 7677 }, { "epoch": 0.6, "grad_norm": 1.189048861251189, "learning_rate": 7.417675406018332e-06, "loss": 0.5025, "step": 7678 }, { "epoch": 0.6, "grad_norm": 1.143413792375082, "learning_rate": 7.4152480392208105e-06, "loss": 0.5292, "step": 7679 }, { "epoch": 0.6, "grad_norm": 1.170508032483189, "learning_rate": 7.412820835612016e-06, "loss": 0.4964, "step": 7680 }, { "epoch": 0.6, "grad_norm": 1.1980959329676009, "learning_rate": 7.410393795345193e-06, "loss": 0.5273, "step": 7681 }, { "epoch": 0.6, "grad_norm": 1.1215090630344111, "learning_rate": 7.407966918573568e-06, "loss": 0.5034, "step": 7682 }, { "epoch": 0.6, "grad_norm": 1.1825853338763397, "learning_rate": 7.40554020545037e-06, "loss": 0.5244, "step": 7683 }, { "epoch": 0.6, "grad_norm": 1.1462150920348926, "learning_rate": 7.403113656128799e-06, "loss": 0.5359, "step": 7684 }, { "epoch": 0.6, "grad_norm": 1.1330780770317208, "learning_rate": 7.400687270762061e-06, "loss": 0.5165, "step": 7685 }, { "epoch": 0.6, "grad_norm": 1.2432446569791638, "learning_rate": 7.398261049503348e-06, "loss": 0.5654, "step": 7686 }, { "epoch": 0.6, "grad_norm": 1.188720678054079, "learning_rate": 7.395834992505837e-06, "loss": 0.5894, "step": 7687 }, { "epoch": 0.6, "grad_norm": 1.203485707298391, "learning_rate": 7.393409099922703e-06, "loss": 0.5521, "step": 7688 }, { "epoch": 0.6, "grad_norm": 1.3112134986211703, "learning_rate": 7.390983371907095e-06, "loss": 0.6046, "step": 7689 }, { "epoch": 0.6, "grad_norm": 1.3880458016025612, "learning_rate": 7.388557808612169e-06, "loss": 0.6076, "step": 7690 }, { "epoch": 0.6, "grad_norm": 1.1220474174427975, "learning_rate": 7.3861324101910635e-06, "loss": 0.5573, "step": 7691 }, { "epoch": 0.6, "grad_norm": 1.2300879964132578, "learning_rate": 7.383707176796903e-06, "loss": 0.5501, "step": 7692 }, { "epoch": 0.6, "grad_norm": 1.1069855545293743, "learning_rate": 7.38128210858281e-06, "loss": 0.5312, "step": 7693 }, { "epoch": 0.6, "grad_norm": 1.0154173418642058, "learning_rate": 7.378857205701885e-06, "loss": 0.4921, "step": 7694 }, { "epoch": 0.6, "grad_norm": 1.230866092529174, "learning_rate": 7.376432468307227e-06, "loss": 0.5444, "step": 7695 }, { "epoch": 0.6, "grad_norm": 1.1537083191952524, "learning_rate": 7.3740078965519266e-06, "loss": 0.5298, "step": 7696 }, { "epoch": 0.6, "grad_norm": 1.1363854198101881, "learning_rate": 7.371583490589053e-06, "loss": 0.5462, "step": 7697 }, { "epoch": 0.6, "grad_norm": 1.105611569510098, "learning_rate": 7.369159250571678e-06, "loss": 0.5163, "step": 7698 }, { "epoch": 0.6, "grad_norm": 1.089266661850344, "learning_rate": 7.3667351766528505e-06, "loss": 0.5066, "step": 7699 }, { "epoch": 0.6, "grad_norm": 1.076758140205487, "learning_rate": 7.364311268985616e-06, "loss": 0.5309, "step": 7700 }, { "epoch": 0.6, "grad_norm": 1.1024403422297548, "learning_rate": 7.36188752772301e-06, "loss": 0.4984, "step": 7701 }, { "epoch": 0.6, "grad_norm": 1.2113851396661608, "learning_rate": 7.359463953018056e-06, "loss": 0.4815, "step": 7702 }, { "epoch": 0.6, "grad_norm": 1.2617883810664767, "learning_rate": 7.357040545023768e-06, "loss": 0.5286, "step": 7703 }, { "epoch": 0.6, "grad_norm": 1.1882840629910512, "learning_rate": 7.354617303893143e-06, "loss": 0.5431, "step": 7704 }, { "epoch": 0.6, "grad_norm": 1.17086377064702, "learning_rate": 7.352194229779177e-06, "loss": 0.5774, "step": 7705 }, { "epoch": 0.6, "grad_norm": 1.2108643048222572, "learning_rate": 7.349771322834851e-06, "loss": 0.5097, "step": 7706 }, { "epoch": 0.6, "grad_norm": 1.096815445633583, "learning_rate": 7.347348583213133e-06, "loss": 0.5229, "step": 7707 }, { "epoch": 0.6, "grad_norm": 1.146123046084517, "learning_rate": 7.344926011066985e-06, "loss": 0.4909, "step": 7708 }, { "epoch": 0.6, "grad_norm": 1.1098479216587311, "learning_rate": 7.34250360654936e-06, "loss": 0.5004, "step": 7709 }, { "epoch": 0.6, "grad_norm": 1.1521967907317086, "learning_rate": 7.3400813698131905e-06, "loss": 0.587, "step": 7710 }, { "epoch": 0.6, "grad_norm": 1.1912626086085143, "learning_rate": 7.337659301011408e-06, "loss": 0.5536, "step": 7711 }, { "epoch": 0.6, "grad_norm": 1.2622791380364655, "learning_rate": 7.335237400296927e-06, "loss": 0.5728, "step": 7712 }, { "epoch": 0.6, "grad_norm": 1.1782423350543982, "learning_rate": 7.332815667822659e-06, "loss": 0.6221, "step": 7713 }, { "epoch": 0.6, "grad_norm": 1.2107529530336583, "learning_rate": 7.3303941037415024e-06, "loss": 0.5269, "step": 7714 }, { "epoch": 0.6, "grad_norm": 1.2087518964592812, "learning_rate": 7.327972708206334e-06, "loss": 0.5397, "step": 7715 }, { "epoch": 0.6, "grad_norm": 1.237342936249032, "learning_rate": 7.325551481370038e-06, "loss": 0.5233, "step": 7716 }, { "epoch": 0.6, "grad_norm": 1.168068769886466, "learning_rate": 7.323130423385473e-06, "loss": 0.5806, "step": 7717 }, { "epoch": 0.6, "grad_norm": 1.1706644545065703, "learning_rate": 7.320709534405494e-06, "loss": 0.4631, "step": 7718 }, { "epoch": 0.6, "grad_norm": 1.2245851845914941, "learning_rate": 7.318288814582951e-06, "loss": 0.506, "step": 7719 }, { "epoch": 0.6, "grad_norm": 1.1291926987428518, "learning_rate": 7.315868264070667e-06, "loss": 0.5415, "step": 7720 }, { "epoch": 0.6, "grad_norm": 1.1108798713598735, "learning_rate": 7.313447883021469e-06, "loss": 0.5213, "step": 7721 }, { "epoch": 0.6, "grad_norm": 1.2668293532667123, "learning_rate": 7.3110276715881645e-06, "loss": 0.5411, "step": 7722 }, { "epoch": 0.6, "grad_norm": 1.316964277401386, "learning_rate": 7.308607629923557e-06, "loss": 0.5327, "step": 7723 }, { "epoch": 0.6, "grad_norm": 1.0794143706686312, "learning_rate": 7.3061877581804395e-06, "loss": 0.563, "step": 7724 }, { "epoch": 0.6, "grad_norm": 1.2034152783066068, "learning_rate": 7.303768056511583e-06, "loss": 0.5531, "step": 7725 }, { "epoch": 0.6, "grad_norm": 1.272269479747504, "learning_rate": 7.301348525069762e-06, "loss": 0.5658, "step": 7726 }, { "epoch": 0.6, "grad_norm": 1.2227362463377844, "learning_rate": 7.29892916400773e-06, "loss": 0.49, "step": 7727 }, { "epoch": 0.6, "grad_norm": 1.136811504971153, "learning_rate": 7.296509973478235e-06, "loss": 0.4989, "step": 7728 }, { "epoch": 0.6, "grad_norm": 1.2317801616979487, "learning_rate": 7.2940909536340184e-06, "loss": 0.5197, "step": 7729 }, { "epoch": 0.6, "grad_norm": 1.1958420521665303, "learning_rate": 7.291672104627796e-06, "loss": 0.531, "step": 7730 }, { "epoch": 0.6, "grad_norm": 1.1883224349750479, "learning_rate": 7.289253426612288e-06, "loss": 0.5429, "step": 7731 }, { "epoch": 0.6, "grad_norm": 1.1940057131121007, "learning_rate": 7.286834919740195e-06, "loss": 0.5843, "step": 7732 }, { "epoch": 0.6, "grad_norm": 1.1652537657262145, "learning_rate": 7.2844165841642135e-06, "loss": 0.5263, "step": 7733 }, { "epoch": 0.6, "grad_norm": 1.1133999945238846, "learning_rate": 7.281998420037026e-06, "loss": 0.5187, "step": 7734 }, { "epoch": 0.6, "grad_norm": 1.178772931811347, "learning_rate": 7.279580427511297e-06, "loss": 0.5398, "step": 7735 }, { "epoch": 0.6, "grad_norm": 1.156470200474254, "learning_rate": 7.277162606739694e-06, "loss": 0.4845, "step": 7736 }, { "epoch": 0.6, "grad_norm": 1.2183695957529324, "learning_rate": 7.274744957874862e-06, "loss": 0.5565, "step": 7737 }, { "epoch": 0.6, "grad_norm": 1.1619184064631618, "learning_rate": 7.27232748106944e-06, "loss": 0.4968, "step": 7738 }, { "epoch": 0.6, "grad_norm": 1.2777065030320225, "learning_rate": 7.269910176476062e-06, "loss": 0.5697, "step": 7739 }, { "epoch": 0.6, "grad_norm": 1.0787274985325965, "learning_rate": 7.267493044247338e-06, "loss": 0.5213, "step": 7740 }, { "epoch": 0.6, "grad_norm": 1.240664478125496, "learning_rate": 7.265076084535874e-06, "loss": 0.5802, "step": 7741 }, { "epoch": 0.6, "grad_norm": 1.1855375738894567, "learning_rate": 7.262659297494266e-06, "loss": 0.497, "step": 7742 }, { "epoch": 0.6, "grad_norm": 1.1670912696562183, "learning_rate": 7.2602426832751005e-06, "loss": 0.4886, "step": 7743 }, { "epoch": 0.6, "grad_norm": 1.2025837857565378, "learning_rate": 7.2578262420309534e-06, "loss": 0.5525, "step": 7744 }, { "epoch": 0.6, "grad_norm": 1.168066371550638, "learning_rate": 7.25540997391438e-06, "loss": 0.5167, "step": 7745 }, { "epoch": 0.6, "grad_norm": 1.1812902514346169, "learning_rate": 7.252993879077936e-06, "loss": 0.5359, "step": 7746 }, { "epoch": 0.6, "grad_norm": 1.1309621497380071, "learning_rate": 7.250577957674159e-06, "loss": 0.4906, "step": 7747 }, { "epoch": 0.6, "grad_norm": 1.1578495714685226, "learning_rate": 7.24816220985558e-06, "loss": 0.4922, "step": 7748 }, { "epoch": 0.6, "grad_norm": 1.1195265286033935, "learning_rate": 7.245746635774724e-06, "loss": 0.4856, "step": 7749 }, { "epoch": 0.6, "grad_norm": 1.2022404577392567, "learning_rate": 7.2433312355840916e-06, "loss": 0.5518, "step": 7750 }, { "epoch": 0.6, "grad_norm": 1.1430677098621624, "learning_rate": 7.2409160094361774e-06, "loss": 0.5409, "step": 7751 }, { "epoch": 0.6, "grad_norm": 1.2245437142079847, "learning_rate": 7.2385009574834705e-06, "loss": 0.5524, "step": 7752 }, { "epoch": 0.6, "grad_norm": 1.120899196849831, "learning_rate": 7.236086079878446e-06, "loss": 0.4832, "step": 7753 }, { "epoch": 0.6, "grad_norm": 1.1531149142684882, "learning_rate": 7.2336713767735725e-06, "loss": 0.5234, "step": 7754 }, { "epoch": 0.6, "grad_norm": 1.161670095300928, "learning_rate": 7.231256848321293e-06, "loss": 0.4833, "step": 7755 }, { "epoch": 0.6, "grad_norm": 1.1041531771909487, "learning_rate": 7.228842494674053e-06, "loss": 0.4979, "step": 7756 }, { "epoch": 0.6, "grad_norm": 1.1242680287644997, "learning_rate": 7.226428315984283e-06, "loss": 0.5051, "step": 7757 }, { "epoch": 0.6, "grad_norm": 1.2279510413670842, "learning_rate": 7.224014312404404e-06, "loss": 0.5773, "step": 7758 }, { "epoch": 0.6, "grad_norm": 1.1478084476273744, "learning_rate": 7.221600484086826e-06, "loss": 0.5261, "step": 7759 }, { "epoch": 0.6, "grad_norm": 1.1540163993562722, "learning_rate": 7.219186831183944e-06, "loss": 0.5333, "step": 7760 }, { "epoch": 0.6, "grad_norm": 1.2380839771328047, "learning_rate": 7.216773353848141e-06, "loss": 0.522, "step": 7761 }, { "epoch": 0.6, "grad_norm": 1.1390615748439636, "learning_rate": 7.214360052231797e-06, "loss": 0.4815, "step": 7762 }, { "epoch": 0.6, "grad_norm": 1.103024101505859, "learning_rate": 7.211946926487274e-06, "loss": 0.5089, "step": 7763 }, { "epoch": 0.6, "grad_norm": 1.0285385784133456, "learning_rate": 7.209533976766928e-06, "loss": 0.5471, "step": 7764 }, { "epoch": 0.6, "grad_norm": 1.1227130957794027, "learning_rate": 7.207121203223102e-06, "loss": 0.5557, "step": 7765 }, { "epoch": 0.6, "grad_norm": 1.1430226561636287, "learning_rate": 7.204708606008119e-06, "loss": 0.5567, "step": 7766 }, { "epoch": 0.6, "grad_norm": 1.1506392982502094, "learning_rate": 7.202296185274302e-06, "loss": 0.4934, "step": 7767 }, { "epoch": 0.6, "grad_norm": 1.0627155085395406, "learning_rate": 7.199883941173963e-06, "loss": 0.5273, "step": 7768 }, { "epoch": 0.6, "grad_norm": 1.1614702791474936, "learning_rate": 7.197471873859399e-06, "loss": 0.5168, "step": 7769 }, { "epoch": 0.6, "grad_norm": 1.2146122457335764, "learning_rate": 7.195059983482897e-06, "loss": 0.5171, "step": 7770 }, { "epoch": 0.6, "grad_norm": 1.1708842349046111, "learning_rate": 7.192648270196726e-06, "loss": 0.5207, "step": 7771 }, { "epoch": 0.6, "grad_norm": 1.1807842083074005, "learning_rate": 7.1902367341531536e-06, "loss": 0.5832, "step": 7772 }, { "epoch": 0.6, "grad_norm": 1.245341202274705, "learning_rate": 7.187825375504431e-06, "loss": 0.542, "step": 7773 }, { "epoch": 0.6, "grad_norm": 1.217804811221884, "learning_rate": 7.185414194402805e-06, "loss": 0.569, "step": 7774 }, { "epoch": 0.6, "grad_norm": 1.2187131729430711, "learning_rate": 7.183003191000505e-06, "loss": 0.5518, "step": 7775 }, { "epoch": 0.6, "grad_norm": 1.2335917728304344, "learning_rate": 7.180592365449742e-06, "loss": 0.5684, "step": 7776 }, { "epoch": 0.6, "grad_norm": 1.1324912306254484, "learning_rate": 7.17818171790273e-06, "loss": 0.4846, "step": 7777 }, { "epoch": 0.6, "grad_norm": 1.1520112673321037, "learning_rate": 7.175771248511664e-06, "loss": 0.5322, "step": 7778 }, { "epoch": 0.6, "grad_norm": 1.2145664597784722, "learning_rate": 7.173360957428732e-06, "loss": 0.5466, "step": 7779 }, { "epoch": 0.6, "grad_norm": 1.2196088845604676, "learning_rate": 7.170950844806109e-06, "loss": 0.5069, "step": 7780 }, { "epoch": 0.6, "grad_norm": 1.1349496610124286, "learning_rate": 7.168540910795951e-06, "loss": 0.5332, "step": 7781 }, { "epoch": 0.6, "grad_norm": 1.1940255311475914, "learning_rate": 7.166131155550414e-06, "loss": 0.5538, "step": 7782 }, { "epoch": 0.6, "grad_norm": 1.190497279888636, "learning_rate": 7.163721579221638e-06, "loss": 0.5228, "step": 7783 }, { "epoch": 0.6, "grad_norm": 1.0679824415220855, "learning_rate": 7.161312181961754e-06, "loss": 0.4807, "step": 7784 }, { "epoch": 0.6, "grad_norm": 1.1247656366160539, "learning_rate": 7.158902963922879e-06, "loss": 0.5591, "step": 7785 }, { "epoch": 0.6, "grad_norm": 1.1692783834076288, "learning_rate": 7.156493925257114e-06, "loss": 0.5372, "step": 7786 }, { "epoch": 0.6, "grad_norm": 1.2981125315920066, "learning_rate": 7.154085066116558e-06, "loss": 0.5805, "step": 7787 }, { "epoch": 0.6, "grad_norm": 1.2085272151137993, "learning_rate": 7.1516763866532955e-06, "loss": 0.531, "step": 7788 }, { "epoch": 0.6, "grad_norm": 1.0897060840576522, "learning_rate": 7.1492678870194e-06, "loss": 0.4923, "step": 7789 }, { "epoch": 0.6, "grad_norm": 1.1414289384763234, "learning_rate": 7.146859567366933e-06, "loss": 0.4814, "step": 7790 }, { "epoch": 0.6, "grad_norm": 1.1963225441743155, "learning_rate": 7.144451427847937e-06, "loss": 0.5589, "step": 7791 }, { "epoch": 0.6, "grad_norm": 1.1827548739909985, "learning_rate": 7.142043468614455e-06, "loss": 0.5394, "step": 7792 }, { "epoch": 0.6, "grad_norm": 1.1961650924286023, "learning_rate": 7.139635689818514e-06, "loss": 0.5794, "step": 7793 }, { "epoch": 0.6, "grad_norm": 1.1906948279382121, "learning_rate": 7.137228091612132e-06, "loss": 0.5365, "step": 7794 }, { "epoch": 0.6, "grad_norm": 1.090560677279326, "learning_rate": 7.134820674147313e-06, "loss": 0.5128, "step": 7795 }, { "epoch": 0.6, "grad_norm": 1.149568792022806, "learning_rate": 7.132413437576043e-06, "loss": 0.5389, "step": 7796 }, { "epoch": 0.6, "grad_norm": 1.202005484763658, "learning_rate": 7.130006382050305e-06, "loss": 0.4747, "step": 7797 }, { "epoch": 0.6, "grad_norm": 1.1965656069866897, "learning_rate": 7.127599507722074e-06, "loss": 0.5338, "step": 7798 }, { "epoch": 0.61, "grad_norm": 1.3071009077536964, "learning_rate": 7.1251928147433065e-06, "loss": 0.5717, "step": 7799 }, { "epoch": 0.61, "grad_norm": 1.0713810842072236, "learning_rate": 7.122786303265952e-06, "loss": 0.4685, "step": 7800 }, { "epoch": 0.61, "grad_norm": 1.2521506405139966, "learning_rate": 7.1203799734419375e-06, "loss": 0.5251, "step": 7801 }, { "epoch": 0.61, "grad_norm": 1.140593907180692, "learning_rate": 7.117973825423193e-06, "loss": 0.5511, "step": 7802 }, { "epoch": 0.61, "grad_norm": 1.2983479695031779, "learning_rate": 7.115567859361629e-06, "loss": 0.5779, "step": 7803 }, { "epoch": 0.61, "grad_norm": 1.2454885131048963, "learning_rate": 7.1131620754091505e-06, "loss": 0.5042, "step": 7804 }, { "epoch": 0.61, "grad_norm": 1.321252928110738, "learning_rate": 7.110756473717646e-06, "loss": 0.5228, "step": 7805 }, { "epoch": 0.61, "grad_norm": 1.13213143757669, "learning_rate": 7.108351054438988e-06, "loss": 0.5296, "step": 7806 }, { "epoch": 0.61, "grad_norm": 1.249380101035593, "learning_rate": 7.105945817725046e-06, "loss": 0.4867, "step": 7807 }, { "epoch": 0.61, "grad_norm": 1.1128109658071885, "learning_rate": 7.103540763727678e-06, "loss": 0.5154, "step": 7808 }, { "epoch": 0.61, "grad_norm": 1.1646007343191, "learning_rate": 7.101135892598725e-06, "loss": 0.5527, "step": 7809 }, { "epoch": 0.61, "grad_norm": 1.2326544347269293, "learning_rate": 7.098731204490023e-06, "loss": 0.5327, "step": 7810 }, { "epoch": 0.61, "grad_norm": 1.1507014063673973, "learning_rate": 7.096326699553384e-06, "loss": 0.5387, "step": 7811 }, { "epoch": 0.61, "grad_norm": 1.233105646630224, "learning_rate": 7.093922377940622e-06, "loss": 0.5323, "step": 7812 }, { "epoch": 0.61, "grad_norm": 1.1774480417859856, "learning_rate": 7.091518239803533e-06, "loss": 0.5185, "step": 7813 }, { "epoch": 0.61, "grad_norm": 1.2019897653712068, "learning_rate": 7.089114285293906e-06, "loss": 0.5744, "step": 7814 }, { "epoch": 0.61, "grad_norm": 1.1609124182228903, "learning_rate": 7.086710514563513e-06, "loss": 0.5477, "step": 7815 }, { "epoch": 0.61, "grad_norm": 1.1763191386871017, "learning_rate": 7.084306927764113e-06, "loss": 0.5639, "step": 7816 }, { "epoch": 0.61, "grad_norm": 1.2453669518181323, "learning_rate": 7.08190352504746e-06, "loss": 0.545, "step": 7817 }, { "epoch": 0.61, "grad_norm": 1.2543348013799753, "learning_rate": 7.079500306565291e-06, "loss": 0.5758, "step": 7818 }, { "epoch": 0.61, "grad_norm": 1.1122521177868907, "learning_rate": 7.07709727246934e-06, "loss": 0.5454, "step": 7819 }, { "epoch": 0.61, "grad_norm": 1.2221274405391953, "learning_rate": 7.074694422911315e-06, "loss": 0.5464, "step": 7820 }, { "epoch": 0.61, "grad_norm": 1.19652939231342, "learning_rate": 7.072291758042926e-06, "loss": 0.5225, "step": 7821 }, { "epoch": 0.61, "grad_norm": 1.1074683769998526, "learning_rate": 7.0698892780158625e-06, "loss": 0.5044, "step": 7822 }, { "epoch": 0.61, "grad_norm": 1.1954839246906357, "learning_rate": 7.067486982981804e-06, "loss": 0.5531, "step": 7823 }, { "epoch": 0.61, "grad_norm": 1.0720141173187567, "learning_rate": 7.065084873092425e-06, "loss": 0.517, "step": 7824 }, { "epoch": 0.61, "grad_norm": 1.2354404824562912, "learning_rate": 7.062682948499377e-06, "loss": 0.5565, "step": 7825 }, { "epoch": 0.61, "grad_norm": 1.254465234055037, "learning_rate": 7.060281209354315e-06, "loss": 0.5384, "step": 7826 }, { "epoch": 0.61, "grad_norm": 1.2194844502688962, "learning_rate": 7.057879655808862e-06, "loss": 0.5459, "step": 7827 }, { "epoch": 0.61, "grad_norm": 1.2146731437774914, "learning_rate": 7.055478288014646e-06, "loss": 0.5148, "step": 7828 }, { "epoch": 0.61, "grad_norm": 1.1701081753305558, "learning_rate": 7.053077106123278e-06, "loss": 0.5894, "step": 7829 }, { "epoch": 0.61, "grad_norm": 1.2052994931275567, "learning_rate": 7.0506761102863565e-06, "loss": 0.5449, "step": 7830 }, { "epoch": 0.61, "grad_norm": 1.196580999136246, "learning_rate": 7.048275300655472e-06, "loss": 0.521, "step": 7831 }, { "epoch": 0.61, "grad_norm": 1.13749348565217, "learning_rate": 7.0458746773821915e-06, "loss": 0.514, "step": 7832 }, { "epoch": 0.61, "grad_norm": 1.1666929491806517, "learning_rate": 7.043474240618086e-06, "loss": 0.5396, "step": 7833 }, { "epoch": 0.61, "grad_norm": 1.0910625683948953, "learning_rate": 7.0410739905147065e-06, "loss": 0.5458, "step": 7834 }, { "epoch": 0.61, "grad_norm": 1.204570545897053, "learning_rate": 7.03867392722359e-06, "loss": 0.5585, "step": 7835 }, { "epoch": 0.61, "grad_norm": 1.2072295979608385, "learning_rate": 7.0362740508962705e-06, "loss": 0.541, "step": 7836 }, { "epoch": 0.61, "grad_norm": 1.0771549670798806, "learning_rate": 7.033874361684256e-06, "loss": 0.51, "step": 7837 }, { "epoch": 0.61, "grad_norm": 1.1567993405588204, "learning_rate": 7.031474859739058e-06, "loss": 0.5428, "step": 7838 }, { "epoch": 0.61, "grad_norm": 1.1498365638530126, "learning_rate": 7.02907554521217e-06, "loss": 0.5344, "step": 7839 }, { "epoch": 0.61, "grad_norm": 1.0391374718715964, "learning_rate": 7.026676418255068e-06, "loss": 0.5073, "step": 7840 }, { "epoch": 0.61, "grad_norm": 1.3187526540141947, "learning_rate": 7.024277479019227e-06, "loss": 0.6021, "step": 7841 }, { "epoch": 0.61, "grad_norm": 1.2710415351707958, "learning_rate": 7.021878727656097e-06, "loss": 0.5852, "step": 7842 }, { "epoch": 0.61, "grad_norm": 1.2183561422262938, "learning_rate": 7.0194801643171324e-06, "loss": 0.5688, "step": 7843 }, { "epoch": 0.61, "grad_norm": 1.0695383655431216, "learning_rate": 7.017081789153759e-06, "loss": 0.5091, "step": 7844 }, { "epoch": 0.61, "grad_norm": 1.1687122644870616, "learning_rate": 7.014683602317402e-06, "loss": 0.5178, "step": 7845 }, { "epoch": 0.61, "grad_norm": 1.0709712653379195, "learning_rate": 7.012285603959477e-06, "loss": 0.4918, "step": 7846 }, { "epoch": 0.61, "grad_norm": 1.1266700217142505, "learning_rate": 7.009887794231371e-06, "loss": 0.5317, "step": 7847 }, { "epoch": 0.61, "grad_norm": 1.191408151093514, "learning_rate": 7.0074901732844795e-06, "loss": 0.5349, "step": 7848 }, { "epoch": 0.61, "grad_norm": 1.0818673570915007, "learning_rate": 7.005092741270168e-06, "loss": 0.4733, "step": 7849 }, { "epoch": 0.61, "grad_norm": 1.1800136494251199, "learning_rate": 7.0026954983398045e-06, "loss": 0.5287, "step": 7850 }, { "epoch": 0.61, "grad_norm": 1.2905621677279835, "learning_rate": 7.0002984446447416e-06, "loss": 0.5673, "step": 7851 }, { "epoch": 0.61, "grad_norm": 1.1364672405524396, "learning_rate": 6.997901580336312e-06, "loss": 0.5333, "step": 7852 }, { "epoch": 0.61, "grad_norm": 1.1678508584222989, "learning_rate": 6.995504905565844e-06, "loss": 0.5066, "step": 7853 }, { "epoch": 0.61, "grad_norm": 1.0731681541337377, "learning_rate": 6.993108420484652e-06, "loss": 0.4843, "step": 7854 }, { "epoch": 0.61, "grad_norm": 1.1146248723545837, "learning_rate": 6.990712125244039e-06, "loss": 0.5617, "step": 7855 }, { "epoch": 0.61, "grad_norm": 1.2610995541872534, "learning_rate": 6.9883160199952996e-06, "loss": 0.5487, "step": 7856 }, { "epoch": 0.61, "grad_norm": 1.1544294200517597, "learning_rate": 6.985920104889703e-06, "loss": 0.5068, "step": 7857 }, { "epoch": 0.61, "grad_norm": 1.4344190669718466, "learning_rate": 6.983524380078523e-06, "loss": 0.5083, "step": 7858 }, { "epoch": 0.61, "grad_norm": 1.167468675197255, "learning_rate": 6.9811288457130115e-06, "loss": 0.5143, "step": 7859 }, { "epoch": 0.61, "grad_norm": 1.2997469985965413, "learning_rate": 6.9787335019444105e-06, "loss": 0.5593, "step": 7860 }, { "epoch": 0.61, "grad_norm": 1.2287374760095242, "learning_rate": 6.976338348923955e-06, "loss": 0.5573, "step": 7861 }, { "epoch": 0.61, "grad_norm": 1.194006911189079, "learning_rate": 6.973943386802855e-06, "loss": 0.4866, "step": 7862 }, { "epoch": 0.61, "grad_norm": 1.1653919181246282, "learning_rate": 6.971548615732324e-06, "loss": 0.525, "step": 7863 }, { "epoch": 0.61, "grad_norm": 1.1742388979261293, "learning_rate": 6.9691540358635504e-06, "loss": 0.5382, "step": 7864 }, { "epoch": 0.61, "grad_norm": 1.177622320054089, "learning_rate": 6.96675964734772e-06, "loss": 0.516, "step": 7865 }, { "epoch": 0.61, "grad_norm": 1.2093866076639326, "learning_rate": 6.964365450336008e-06, "loss": 0.5821, "step": 7866 }, { "epoch": 0.61, "grad_norm": 1.100729336404497, "learning_rate": 6.961971444979563e-06, "loss": 0.486, "step": 7867 }, { "epoch": 0.61, "grad_norm": 1.0935850291497822, "learning_rate": 6.959577631429535e-06, "loss": 0.5151, "step": 7868 }, { "epoch": 0.61, "grad_norm": 1.2244038629951342, "learning_rate": 6.9571840098370566e-06, "loss": 0.5779, "step": 7869 }, { "epoch": 0.61, "grad_norm": 1.031875478465976, "learning_rate": 6.95479058035325e-06, "loss": 0.4658, "step": 7870 }, { "epoch": 0.61, "grad_norm": 1.0577686092224674, "learning_rate": 6.952397343129232e-06, "loss": 0.4849, "step": 7871 }, { "epoch": 0.61, "grad_norm": 1.1623036967423783, "learning_rate": 6.950004298316086e-06, "loss": 0.5704, "step": 7872 }, { "epoch": 0.61, "grad_norm": 1.1364622055999398, "learning_rate": 6.947611446064908e-06, "loss": 0.5005, "step": 7873 }, { "epoch": 0.61, "grad_norm": 1.1683675027894271, "learning_rate": 6.945218786526764e-06, "loss": 0.5791, "step": 7874 }, { "epoch": 0.61, "grad_norm": 1.1427361794726965, "learning_rate": 6.94282631985272e-06, "loss": 0.5238, "step": 7875 }, { "epoch": 0.61, "grad_norm": 1.2391517058655634, "learning_rate": 6.940434046193824e-06, "loss": 0.5186, "step": 7876 }, { "epoch": 0.61, "grad_norm": 1.1726582262199163, "learning_rate": 6.9380419657011146e-06, "loss": 0.5252, "step": 7877 }, { "epoch": 0.61, "grad_norm": 1.2253517930283762, "learning_rate": 6.9356500785256135e-06, "loss": 0.55, "step": 7878 }, { "epoch": 0.61, "grad_norm": 1.1347181930740233, "learning_rate": 6.933258384818329e-06, "loss": 0.4905, "step": 7879 }, { "epoch": 0.61, "grad_norm": 1.2204177400778107, "learning_rate": 6.930866884730265e-06, "loss": 0.5451, "step": 7880 }, { "epoch": 0.61, "grad_norm": 1.1676987556021625, "learning_rate": 6.92847557841241e-06, "loss": 0.5668, "step": 7881 }, { "epoch": 0.61, "grad_norm": 1.1790499448575396, "learning_rate": 6.926084466015744e-06, "loss": 0.5273, "step": 7882 }, { "epoch": 0.61, "grad_norm": 1.1865000781361859, "learning_rate": 6.92369354769122e-06, "loss": 0.4774, "step": 7883 }, { "epoch": 0.61, "grad_norm": 1.347984915198981, "learning_rate": 6.921302823589793e-06, "loss": 0.5311, "step": 7884 }, { "epoch": 0.61, "grad_norm": 1.0963504395531058, "learning_rate": 6.918912293862403e-06, "loss": 0.4951, "step": 7885 }, { "epoch": 0.61, "grad_norm": 1.1679240444647572, "learning_rate": 6.916521958659977e-06, "loss": 0.5576, "step": 7886 }, { "epoch": 0.61, "grad_norm": 1.154004674806167, "learning_rate": 6.914131818133432e-06, "loss": 0.5403, "step": 7887 }, { "epoch": 0.61, "grad_norm": 1.1454715966941336, "learning_rate": 6.911741872433664e-06, "loss": 0.5142, "step": 7888 }, { "epoch": 0.61, "grad_norm": 1.1530003632795518, "learning_rate": 6.909352121711563e-06, "loss": 0.5231, "step": 7889 }, { "epoch": 0.61, "grad_norm": 1.1927150354099902, "learning_rate": 6.906962566118009e-06, "loss": 0.5091, "step": 7890 }, { "epoch": 0.61, "grad_norm": 1.2525211658326436, "learning_rate": 6.904573205803864e-06, "loss": 0.5635, "step": 7891 }, { "epoch": 0.61, "grad_norm": 1.2196444139388871, "learning_rate": 6.902184040919989e-06, "loss": 0.5389, "step": 7892 }, { "epoch": 0.61, "grad_norm": 1.1768412833466393, "learning_rate": 6.899795071617214e-06, "loss": 0.5252, "step": 7893 }, { "epoch": 0.61, "grad_norm": 1.0752564567898892, "learning_rate": 6.897406298046372e-06, "loss": 0.5161, "step": 7894 }, { "epoch": 0.61, "grad_norm": 1.2337644008425996, "learning_rate": 6.895017720358275e-06, "loss": 0.5328, "step": 7895 }, { "epoch": 0.61, "grad_norm": 1.1659156334717273, "learning_rate": 6.892629338703731e-06, "loss": 0.4627, "step": 7896 }, { "epoch": 0.61, "grad_norm": 1.114672998879971, "learning_rate": 6.890241153233532e-06, "loss": 0.51, "step": 7897 }, { "epoch": 0.61, "grad_norm": 1.0345112068413678, "learning_rate": 6.88785316409845e-06, "loss": 0.4291, "step": 7898 }, { "epoch": 0.61, "grad_norm": 1.2609283995270413, "learning_rate": 6.885465371449254e-06, "loss": 0.5425, "step": 7899 }, { "epoch": 0.61, "grad_norm": 1.1207460402538525, "learning_rate": 6.883077775436697e-06, "loss": 0.5067, "step": 7900 }, { "epoch": 0.61, "grad_norm": 1.0903690945125262, "learning_rate": 6.880690376211522e-06, "loss": 0.5209, "step": 7901 }, { "epoch": 0.61, "grad_norm": 1.1704934686920958, "learning_rate": 6.878303173924461e-06, "loss": 0.5338, "step": 7902 }, { "epoch": 0.61, "grad_norm": 1.2291753359127904, "learning_rate": 6.875916168726225e-06, "loss": 0.5307, "step": 7903 }, { "epoch": 0.61, "grad_norm": 1.0942555485341354, "learning_rate": 6.8735293607675145e-06, "loss": 0.465, "step": 7904 }, { "epoch": 0.61, "grad_norm": 1.1536306146288118, "learning_rate": 6.8711427501990295e-06, "loss": 0.5163, "step": 7905 }, { "epoch": 0.61, "grad_norm": 1.1635876909725476, "learning_rate": 6.8687563371714425e-06, "loss": 0.5227, "step": 7906 }, { "epoch": 0.61, "grad_norm": 1.165310031175236, "learning_rate": 6.866370121835428e-06, "loss": 0.4596, "step": 7907 }, { "epoch": 0.61, "grad_norm": 1.1714660948382547, "learning_rate": 6.863984104341634e-06, "loss": 0.5202, "step": 7908 }, { "epoch": 0.61, "grad_norm": 1.1461271025024393, "learning_rate": 6.861598284840701e-06, "loss": 0.5232, "step": 7909 }, { "epoch": 0.61, "grad_norm": 1.0713591643967781, "learning_rate": 6.859212663483261e-06, "loss": 0.5011, "step": 7910 }, { "epoch": 0.61, "grad_norm": 1.2168841139379114, "learning_rate": 6.8568272404199275e-06, "loss": 0.5276, "step": 7911 }, { "epoch": 0.61, "grad_norm": 1.3146629222751425, "learning_rate": 6.854442015801312e-06, "loss": 0.5626, "step": 7912 }, { "epoch": 0.61, "grad_norm": 1.1516127514582708, "learning_rate": 6.852056989778001e-06, "loss": 0.5152, "step": 7913 }, { "epoch": 0.61, "grad_norm": 1.1675119176415645, "learning_rate": 6.849672162500569e-06, "loss": 0.5379, "step": 7914 }, { "epoch": 0.61, "grad_norm": 1.1914884945357567, "learning_rate": 6.847287534119589e-06, "loss": 0.574, "step": 7915 }, { "epoch": 0.61, "grad_norm": 1.0768226276045214, "learning_rate": 6.844903104785611e-06, "loss": 0.5135, "step": 7916 }, { "epoch": 0.61, "grad_norm": 1.1621777939089832, "learning_rate": 6.842518874649182e-06, "loss": 0.5081, "step": 7917 }, { "epoch": 0.61, "grad_norm": 1.0835137522678033, "learning_rate": 6.840134843860826e-06, "loss": 0.5088, "step": 7918 }, { "epoch": 0.61, "grad_norm": 1.2174607328940494, "learning_rate": 6.837751012571059e-06, "loss": 0.5342, "step": 7919 }, { "epoch": 0.61, "grad_norm": 1.1238253076628746, "learning_rate": 6.835367380930384e-06, "loss": 0.531, "step": 7920 }, { "epoch": 0.61, "grad_norm": 1.2086033629002453, "learning_rate": 6.832983949089293e-06, "loss": 0.5187, "step": 7921 }, { "epoch": 0.61, "grad_norm": 1.1637631741934549, "learning_rate": 6.83060071719827e-06, "loss": 0.5124, "step": 7922 }, { "epoch": 0.61, "grad_norm": 1.31596208544439, "learning_rate": 6.828217685407774e-06, "loss": 0.5818, "step": 7923 }, { "epoch": 0.61, "grad_norm": 1.1935038627398995, "learning_rate": 6.825834853868259e-06, "loss": 0.5797, "step": 7924 }, { "epoch": 0.61, "grad_norm": 1.4366428473431188, "learning_rate": 6.823452222730162e-06, "loss": 0.5467, "step": 7925 }, { "epoch": 0.61, "grad_norm": 1.0911741169592177, "learning_rate": 6.82106979214392e-06, "loss": 0.4954, "step": 7926 }, { "epoch": 0.61, "grad_norm": 1.1608370443550886, "learning_rate": 6.8186875622599434e-06, "loss": 0.5174, "step": 7927 }, { "epoch": 0.62, "grad_norm": 1.1283981605418782, "learning_rate": 6.8163055332286344e-06, "loss": 0.4814, "step": 7928 }, { "epoch": 0.62, "grad_norm": 1.1954016558738603, "learning_rate": 6.81392370520038e-06, "loss": 0.4782, "step": 7929 }, { "epoch": 0.62, "grad_norm": 1.2694381444975502, "learning_rate": 6.81154207832556e-06, "loss": 0.5992, "step": 7930 }, { "epoch": 0.62, "grad_norm": 1.1908383376058862, "learning_rate": 6.809160652754539e-06, "loss": 0.534, "step": 7931 }, { "epoch": 0.62, "grad_norm": 1.288604007447456, "learning_rate": 6.806779428637671e-06, "loss": 0.5975, "step": 7932 }, { "epoch": 0.62, "grad_norm": 1.227494584205629, "learning_rate": 6.804398406125295e-06, "loss": 0.5705, "step": 7933 }, { "epoch": 0.62, "grad_norm": 1.2062536407573883, "learning_rate": 6.802017585367728e-06, "loss": 0.541, "step": 7934 }, { "epoch": 0.62, "grad_norm": 1.229015109314627, "learning_rate": 6.799636966515293e-06, "loss": 0.5743, "step": 7935 }, { "epoch": 0.62, "grad_norm": 1.2845677010798349, "learning_rate": 6.797256549718287e-06, "loss": 0.5599, "step": 7936 }, { "epoch": 0.62, "grad_norm": 1.1578203311711954, "learning_rate": 6.794876335127002e-06, "loss": 0.5248, "step": 7937 }, { "epoch": 0.62, "grad_norm": 1.2289615664730857, "learning_rate": 6.79249632289171e-06, "loss": 0.5245, "step": 7938 }, { "epoch": 0.62, "grad_norm": 1.2706730329795046, "learning_rate": 6.790116513162672e-06, "loss": 0.5255, "step": 7939 }, { "epoch": 0.62, "grad_norm": 1.2013442497314895, "learning_rate": 6.787736906090139e-06, "loss": 0.5322, "step": 7940 }, { "epoch": 0.62, "grad_norm": 1.2509952874312156, "learning_rate": 6.7853575018243504e-06, "loss": 0.5498, "step": 7941 }, { "epoch": 0.62, "grad_norm": 1.1785243786672281, "learning_rate": 6.7829783005155235e-06, "loss": 0.5028, "step": 7942 }, { "epoch": 0.62, "grad_norm": 1.2342227890232524, "learning_rate": 6.780599302313882e-06, "loss": 0.5354, "step": 7943 }, { "epoch": 0.62, "grad_norm": 1.2008684969237855, "learning_rate": 6.778220507369609e-06, "loss": 0.5699, "step": 7944 }, { "epoch": 0.62, "grad_norm": 1.0941171847212174, "learning_rate": 6.7758419158328995e-06, "loss": 0.5166, "step": 7945 }, { "epoch": 0.62, "grad_norm": 1.1501688978310545, "learning_rate": 6.773463527853925e-06, "loss": 0.4776, "step": 7946 }, { "epoch": 0.62, "grad_norm": 1.1178643903667111, "learning_rate": 6.7710853435828435e-06, "loss": 0.5202, "step": 7947 }, { "epoch": 0.62, "grad_norm": 1.1799499017781123, "learning_rate": 6.768707363169809e-06, "loss": 0.4878, "step": 7948 }, { "epoch": 0.62, "grad_norm": 1.1902226306322088, "learning_rate": 6.766329586764944e-06, "loss": 0.5255, "step": 7949 }, { "epoch": 0.62, "grad_norm": 1.127517003779116, "learning_rate": 6.7639520145183754e-06, "loss": 0.4937, "step": 7950 }, { "epoch": 0.62, "grad_norm": 1.1889673503246165, "learning_rate": 6.761574646580215e-06, "loss": 0.5909, "step": 7951 }, { "epoch": 0.62, "grad_norm": 1.207067001455262, "learning_rate": 6.759197483100553e-06, "loss": 0.5352, "step": 7952 }, { "epoch": 0.62, "grad_norm": 1.1140449982668799, "learning_rate": 6.756820524229477e-06, "loss": 0.49, "step": 7953 }, { "epoch": 0.62, "grad_norm": 1.3042881520069007, "learning_rate": 6.754443770117052e-06, "loss": 0.607, "step": 7954 }, { "epoch": 0.62, "grad_norm": 1.1481597460710176, "learning_rate": 6.7520672209133355e-06, "loss": 0.5398, "step": 7955 }, { "epoch": 0.62, "grad_norm": 1.1262324787601778, "learning_rate": 6.749690876768374e-06, "loss": 0.4918, "step": 7956 }, { "epoch": 0.62, "grad_norm": 1.2529525695281212, "learning_rate": 6.747314737832196e-06, "loss": 0.5323, "step": 7957 }, { "epoch": 0.62, "grad_norm": 1.1282482407478194, "learning_rate": 6.744938804254823e-06, "loss": 0.483, "step": 7958 }, { "epoch": 0.62, "grad_norm": 1.380644440278784, "learning_rate": 6.742563076186255e-06, "loss": 0.6295, "step": 7959 }, { "epoch": 0.62, "grad_norm": 1.180658559731154, "learning_rate": 6.740187553776485e-06, "loss": 0.5184, "step": 7960 }, { "epoch": 0.62, "grad_norm": 1.167019359320177, "learning_rate": 6.737812237175494e-06, "loss": 0.5266, "step": 7961 }, { "epoch": 0.62, "grad_norm": 1.2478725926550733, "learning_rate": 6.735437126533246e-06, "loss": 0.5071, "step": 7962 }, { "epoch": 0.62, "grad_norm": 1.1053018612466925, "learning_rate": 6.7330622219997e-06, "loss": 0.4912, "step": 7963 }, { "epoch": 0.62, "grad_norm": 1.1683931122005797, "learning_rate": 6.730687523724787e-06, "loss": 0.5673, "step": 7964 }, { "epoch": 0.62, "grad_norm": 1.0695282227589669, "learning_rate": 6.728313031858437e-06, "loss": 0.4512, "step": 7965 }, { "epoch": 0.62, "grad_norm": 1.1487810827622318, "learning_rate": 6.725938746550569e-06, "loss": 0.4804, "step": 7966 }, { "epoch": 0.62, "grad_norm": 1.2762308201109906, "learning_rate": 6.723564667951076e-06, "loss": 0.6093, "step": 7967 }, { "epoch": 0.62, "grad_norm": 1.1882297883436632, "learning_rate": 6.7211907962098535e-06, "loss": 0.5255, "step": 7968 }, { "epoch": 0.62, "grad_norm": 1.170393248600238, "learning_rate": 6.718817131476769e-06, "loss": 0.5315, "step": 7969 }, { "epoch": 0.62, "grad_norm": 1.1443256284604553, "learning_rate": 6.7164436739016896e-06, "loss": 0.5166, "step": 7970 }, { "epoch": 0.62, "grad_norm": 1.211035004658763, "learning_rate": 6.714070423634461e-06, "loss": 0.5379, "step": 7971 }, { "epoch": 0.62, "grad_norm": 1.2546069602722156, "learning_rate": 6.7116973808249185e-06, "loss": 0.5592, "step": 7972 }, { "epoch": 0.62, "grad_norm": 1.261041985287015, "learning_rate": 6.709324545622891e-06, "loss": 0.5618, "step": 7973 }, { "epoch": 0.62, "grad_norm": 1.253423199644568, "learning_rate": 6.706951918178177e-06, "loss": 0.5893, "step": 7974 }, { "epoch": 0.62, "grad_norm": 1.064863325402281, "learning_rate": 6.70457949864058e-06, "loss": 0.4969, "step": 7975 }, { "epoch": 0.62, "grad_norm": 1.0972214784633085, "learning_rate": 6.702207287159882e-06, "loss": 0.4627, "step": 7976 }, { "epoch": 0.62, "grad_norm": 1.2453620221140367, "learning_rate": 6.699835283885851e-06, "loss": 0.5873, "step": 7977 }, { "epoch": 0.62, "grad_norm": 1.2013890512033891, "learning_rate": 6.697463488968249e-06, "loss": 0.4977, "step": 7978 }, { "epoch": 0.62, "grad_norm": 1.1240667074642812, "learning_rate": 6.695091902556812e-06, "loss": 0.5582, "step": 7979 }, { "epoch": 0.62, "grad_norm": 1.1377490640802794, "learning_rate": 6.692720524801273e-06, "loss": 0.5416, "step": 7980 }, { "epoch": 0.62, "grad_norm": 1.164344087887588, "learning_rate": 6.690349355851353e-06, "loss": 0.5033, "step": 7981 }, { "epoch": 0.62, "grad_norm": 1.1565506518462345, "learning_rate": 6.687978395856753e-06, "loss": 0.5019, "step": 7982 }, { "epoch": 0.62, "grad_norm": 1.1000411437749689, "learning_rate": 6.6856076449671644e-06, "loss": 0.468, "step": 7983 }, { "epoch": 0.62, "grad_norm": 1.229097504474967, "learning_rate": 6.683237103332268e-06, "loss": 0.5227, "step": 7984 }, { "epoch": 0.62, "grad_norm": 1.1920046944909812, "learning_rate": 6.6808667711017224e-06, "loss": 0.5343, "step": 7985 }, { "epoch": 0.62, "grad_norm": 1.185502882617292, "learning_rate": 6.6784966484251845e-06, "loss": 0.4656, "step": 7986 }, { "epoch": 0.62, "grad_norm": 1.0356141427207093, "learning_rate": 6.676126735452286e-06, "loss": 0.4529, "step": 7987 }, { "epoch": 0.62, "grad_norm": 1.0815745467490792, "learning_rate": 6.673757032332657e-06, "loss": 0.4856, "step": 7988 }, { "epoch": 0.62, "grad_norm": 1.1164200287403079, "learning_rate": 6.671387539215912e-06, "loss": 0.5345, "step": 7989 }, { "epoch": 0.62, "grad_norm": 1.2616897910370313, "learning_rate": 6.66901825625164e-06, "loss": 0.5522, "step": 7990 }, { "epoch": 0.62, "grad_norm": 1.2044423805158917, "learning_rate": 6.666649183589435e-06, "loss": 0.5221, "step": 7991 }, { "epoch": 0.62, "grad_norm": 1.242394915843983, "learning_rate": 6.664280321378862e-06, "loss": 0.513, "step": 7992 }, { "epoch": 0.62, "grad_norm": 1.290600315994042, "learning_rate": 6.661911669769481e-06, "loss": 0.5323, "step": 7993 }, { "epoch": 0.62, "grad_norm": 1.19674048800859, "learning_rate": 6.659543228910844e-06, "loss": 0.4963, "step": 7994 }, { "epoch": 0.62, "grad_norm": 1.183839778698033, "learning_rate": 6.657174998952474e-06, "loss": 0.5644, "step": 7995 }, { "epoch": 0.62, "grad_norm": 1.1439531969186578, "learning_rate": 6.654806980043893e-06, "loss": 0.5814, "step": 7996 }, { "epoch": 0.62, "grad_norm": 1.2879789017287988, "learning_rate": 6.6524391723346045e-06, "loss": 0.5661, "step": 7997 }, { "epoch": 0.62, "grad_norm": 1.1458643417786534, "learning_rate": 6.650071575974104e-06, "loss": 0.5082, "step": 7998 }, { "epoch": 0.62, "grad_norm": 1.183028082810128, "learning_rate": 6.64770419111187e-06, "loss": 0.5384, "step": 7999 }, { "epoch": 0.62, "grad_norm": 1.3180446672989186, "learning_rate": 6.645337017897364e-06, "loss": 0.6019, "step": 8000 }, { "epoch": 0.62, "grad_norm": 1.1009780198933996, "learning_rate": 6.64297005648004e-06, "loss": 0.54, "step": 8001 }, { "epoch": 0.62, "grad_norm": 1.1427698219097382, "learning_rate": 6.640603307009337e-06, "loss": 0.5164, "step": 8002 }, { "epoch": 0.62, "grad_norm": 1.146831609336497, "learning_rate": 6.638236769634677e-06, "loss": 0.4894, "step": 8003 }, { "epoch": 0.62, "grad_norm": 1.1889526617221147, "learning_rate": 6.635870444505479e-06, "loss": 0.5086, "step": 8004 }, { "epoch": 0.62, "grad_norm": 1.435674088219694, "learning_rate": 6.633504331771133e-06, "loss": 0.5725, "step": 8005 }, { "epoch": 0.62, "grad_norm": 1.1108984896092007, "learning_rate": 6.631138431581028e-06, "loss": 0.5025, "step": 8006 }, { "epoch": 0.62, "grad_norm": 1.1144714418143478, "learning_rate": 6.628772744084534e-06, "loss": 0.5254, "step": 8007 }, { "epoch": 0.62, "grad_norm": 1.2043269705190347, "learning_rate": 6.62640726943101e-06, "loss": 0.5023, "step": 8008 }, { "epoch": 0.62, "grad_norm": 1.1540997073768196, "learning_rate": 6.624042007769804e-06, "loss": 0.5433, "step": 8009 }, { "epoch": 0.62, "grad_norm": 1.1627167468896367, "learning_rate": 6.621676959250239e-06, "loss": 0.5499, "step": 8010 }, { "epoch": 0.62, "grad_norm": 1.2574048062766159, "learning_rate": 6.619312124021641e-06, "loss": 0.5617, "step": 8011 }, { "epoch": 0.62, "grad_norm": 1.07319820129949, "learning_rate": 6.6169475022333075e-06, "loss": 0.5464, "step": 8012 }, { "epoch": 0.62, "grad_norm": 1.224178694779988, "learning_rate": 6.614583094034533e-06, "loss": 0.5208, "step": 8013 }, { "epoch": 0.62, "grad_norm": 1.146619226690463, "learning_rate": 6.612218899574598e-06, "loss": 0.539, "step": 8014 }, { "epoch": 0.62, "grad_norm": 1.0959674156071852, "learning_rate": 6.6098549190027584e-06, "loss": 0.4975, "step": 8015 }, { "epoch": 0.62, "grad_norm": 1.1302351999678646, "learning_rate": 6.60749115246827e-06, "loss": 0.5423, "step": 8016 }, { "epoch": 0.62, "grad_norm": 1.1742665619294352, "learning_rate": 6.605127600120368e-06, "loss": 0.4982, "step": 8017 }, { "epoch": 0.62, "grad_norm": 1.115350827418697, "learning_rate": 6.602764262108274e-06, "loss": 0.5443, "step": 8018 }, { "epoch": 0.62, "grad_norm": 1.1259885788976922, "learning_rate": 6.600401138581205e-06, "loss": 0.532, "step": 8019 }, { "epoch": 0.62, "grad_norm": 1.1503542520313583, "learning_rate": 6.598038229688347e-06, "loss": 0.5526, "step": 8020 }, { "epoch": 0.62, "grad_norm": 1.2370709301558145, "learning_rate": 6.5956755355788895e-06, "loss": 0.6149, "step": 8021 }, { "epoch": 0.62, "grad_norm": 1.0876145620403093, "learning_rate": 6.5933130564019976e-06, "loss": 0.5004, "step": 8022 }, { "epoch": 0.62, "grad_norm": 1.1713440264371344, "learning_rate": 6.590950792306829e-06, "loss": 0.5335, "step": 8023 }, { "epoch": 0.62, "grad_norm": 1.180905452312604, "learning_rate": 6.5885887434425275e-06, "loss": 0.5437, "step": 8024 }, { "epoch": 0.62, "grad_norm": 1.204417686082076, "learning_rate": 6.586226909958218e-06, "loss": 0.5385, "step": 8025 }, { "epoch": 0.62, "grad_norm": 1.0626486505977926, "learning_rate": 6.583865292003015e-06, "loss": 0.5315, "step": 8026 }, { "epoch": 0.62, "grad_norm": 1.0937478746666014, "learning_rate": 6.581503889726022e-06, "loss": 0.5229, "step": 8027 }, { "epoch": 0.62, "grad_norm": 1.2301821904819097, "learning_rate": 6.579142703276325e-06, "loss": 0.5037, "step": 8028 }, { "epoch": 0.62, "grad_norm": 1.099661844468642, "learning_rate": 6.576781732803001e-06, "loss": 0.5408, "step": 8029 }, { "epoch": 0.62, "grad_norm": 1.131593245389108, "learning_rate": 6.574420978455105e-06, "loss": 0.538, "step": 8030 }, { "epoch": 0.62, "grad_norm": 1.1688908537987979, "learning_rate": 6.572060440381688e-06, "loss": 0.5571, "step": 8031 }, { "epoch": 0.62, "grad_norm": 1.103406080362045, "learning_rate": 6.569700118731779e-06, "loss": 0.4678, "step": 8032 }, { "epoch": 0.62, "grad_norm": 1.1022211364856964, "learning_rate": 6.5673400136544e-06, "loss": 0.5141, "step": 8033 }, { "epoch": 0.62, "grad_norm": 1.1820127799638995, "learning_rate": 6.564980125298559e-06, "loss": 0.5604, "step": 8034 }, { "epoch": 0.62, "grad_norm": 1.158854000934489, "learning_rate": 6.562620453813242e-06, "loss": 0.5273, "step": 8035 }, { "epoch": 0.62, "grad_norm": 1.2873508996645737, "learning_rate": 6.560260999347432e-06, "loss": 0.4882, "step": 8036 }, { "epoch": 0.62, "grad_norm": 1.1934890801449982, "learning_rate": 6.557901762050091e-06, "loss": 0.5406, "step": 8037 }, { "epoch": 0.62, "grad_norm": 1.2175066547877735, "learning_rate": 6.555542742070169e-06, "loss": 0.5582, "step": 8038 }, { "epoch": 0.62, "grad_norm": 1.2787965684393074, "learning_rate": 6.553183939556608e-06, "loss": 0.5725, "step": 8039 }, { "epoch": 0.62, "grad_norm": 1.1375518053435603, "learning_rate": 6.550825354658328e-06, "loss": 0.5195, "step": 8040 }, { "epoch": 0.62, "grad_norm": 1.1587409946457963, "learning_rate": 6.548466987524238e-06, "loss": 0.5572, "step": 8041 }, { "epoch": 0.62, "grad_norm": 1.3383814002051737, "learning_rate": 6.546108838303233e-06, "loss": 0.517, "step": 8042 }, { "epoch": 0.62, "grad_norm": 1.1342899553350756, "learning_rate": 6.543750907144196e-06, "loss": 0.5599, "step": 8043 }, { "epoch": 0.62, "grad_norm": 1.2611076835738857, "learning_rate": 6.5413931941959994e-06, "loss": 0.564, "step": 8044 }, { "epoch": 0.62, "grad_norm": 1.1934999174030916, "learning_rate": 6.539035699607494e-06, "loss": 0.538, "step": 8045 }, { "epoch": 0.62, "grad_norm": 1.1913859881569846, "learning_rate": 6.53667842352752e-06, "loss": 0.5237, "step": 8046 }, { "epoch": 0.62, "grad_norm": 1.142456987268308, "learning_rate": 6.534321366104905e-06, "loss": 0.5534, "step": 8047 }, { "epoch": 0.62, "grad_norm": 1.2650165743740918, "learning_rate": 6.531964527488463e-06, "loss": 0.5439, "step": 8048 }, { "epoch": 0.62, "grad_norm": 1.177078241432902, "learning_rate": 6.529607907826994e-06, "loss": 0.5396, "step": 8049 }, { "epoch": 0.62, "grad_norm": 1.293195952307599, "learning_rate": 6.527251507269283e-06, "loss": 0.598, "step": 8050 }, { "epoch": 0.62, "grad_norm": 1.2534863966366128, "learning_rate": 6.524895325964102e-06, "loss": 0.5722, "step": 8051 }, { "epoch": 0.62, "grad_norm": 1.1097887906184716, "learning_rate": 6.522539364060205e-06, "loss": 0.5223, "step": 8052 }, { "epoch": 0.62, "grad_norm": 1.1559349352500576, "learning_rate": 6.5201836217063395e-06, "loss": 0.5119, "step": 8053 }, { "epoch": 0.62, "grad_norm": 1.160296653189349, "learning_rate": 6.517828099051238e-06, "loss": 0.5649, "step": 8054 }, { "epoch": 0.62, "grad_norm": 1.0796615045401898, "learning_rate": 6.515472796243615e-06, "loss": 0.4854, "step": 8055 }, { "epoch": 0.62, "grad_norm": 1.180626754234388, "learning_rate": 6.513117713432171e-06, "loss": 0.5219, "step": 8056 }, { "epoch": 0.63, "grad_norm": 1.2019430522725991, "learning_rate": 6.510762850765594e-06, "loss": 0.4822, "step": 8057 }, { "epoch": 0.63, "grad_norm": 1.3580501282615738, "learning_rate": 6.50840820839256e-06, "loss": 0.5589, "step": 8058 }, { "epoch": 0.63, "grad_norm": 1.0983115307134994, "learning_rate": 6.506053786461732e-06, "loss": 0.5241, "step": 8059 }, { "epoch": 0.63, "grad_norm": 1.1142515003450155, "learning_rate": 6.5036995851217565e-06, "loss": 0.5246, "step": 8060 }, { "epoch": 0.63, "grad_norm": 1.135803588362028, "learning_rate": 6.501345604521263e-06, "loss": 0.5331, "step": 8061 }, { "epoch": 0.63, "grad_norm": 1.1589763048364812, "learning_rate": 6.49899184480887e-06, "loss": 0.5037, "step": 8062 }, { "epoch": 0.63, "grad_norm": 1.1659016769169397, "learning_rate": 6.496638306133186e-06, "loss": 0.5319, "step": 8063 }, { "epoch": 0.63, "grad_norm": 1.1396461885588618, "learning_rate": 6.494284988642803e-06, "loss": 0.489, "step": 8064 }, { "epoch": 0.63, "grad_norm": 1.190222981182132, "learning_rate": 6.491931892486294e-06, "loss": 0.5518, "step": 8065 }, { "epoch": 0.63, "grad_norm": 1.1799287865049926, "learning_rate": 6.489579017812224e-06, "loss": 0.5291, "step": 8066 }, { "epoch": 0.63, "grad_norm": 1.2032853552234715, "learning_rate": 6.487226364769141e-06, "loss": 0.5397, "step": 8067 }, { "epoch": 0.63, "grad_norm": 1.1988851971805627, "learning_rate": 6.484873933505581e-06, "loss": 0.5361, "step": 8068 }, { "epoch": 0.63, "grad_norm": 1.1916125009582101, "learning_rate": 6.482521724170068e-06, "loss": 0.5381, "step": 8069 }, { "epoch": 0.63, "grad_norm": 1.1859034295898843, "learning_rate": 6.480169736911104e-06, "loss": 0.5412, "step": 8070 }, { "epoch": 0.63, "grad_norm": 1.1138086508604188, "learning_rate": 6.477817971877186e-06, "loss": 0.5013, "step": 8071 }, { "epoch": 0.63, "grad_norm": 1.2226249032679162, "learning_rate": 6.47546642921679e-06, "loss": 0.4959, "step": 8072 }, { "epoch": 0.63, "grad_norm": 1.1791949222715465, "learning_rate": 6.4731151090783814e-06, "loss": 0.498, "step": 8073 }, { "epoch": 0.63, "grad_norm": 1.1192722209899926, "learning_rate": 6.470764011610415e-06, "loss": 0.544, "step": 8074 }, { "epoch": 0.63, "grad_norm": 1.255417480561332, "learning_rate": 6.468413136961325e-06, "loss": 0.5461, "step": 8075 }, { "epoch": 0.63, "grad_norm": 1.2249791805288721, "learning_rate": 6.4660624852795336e-06, "loss": 0.5201, "step": 8076 }, { "epoch": 0.63, "grad_norm": 1.2620239361126833, "learning_rate": 6.463712056713449e-06, "loss": 0.568, "step": 8077 }, { "epoch": 0.63, "grad_norm": 1.201600682082281, "learning_rate": 6.461361851411466e-06, "loss": 0.5274, "step": 8078 }, { "epoch": 0.63, "grad_norm": 1.294499162864488, "learning_rate": 6.45901186952197e-06, "loss": 0.5912, "step": 8079 }, { "epoch": 0.63, "grad_norm": 1.1582935934774032, "learning_rate": 6.456662111193322e-06, "loss": 0.5443, "step": 8080 }, { "epoch": 0.63, "grad_norm": 1.2203371521710187, "learning_rate": 6.454312576573878e-06, "loss": 0.567, "step": 8081 }, { "epoch": 0.63, "grad_norm": 1.172150388784612, "learning_rate": 6.451963265811971e-06, "loss": 0.4928, "step": 8082 }, { "epoch": 0.63, "grad_norm": 1.1801006780929693, "learning_rate": 6.449614179055929e-06, "loss": 0.563, "step": 8083 }, { "epoch": 0.63, "grad_norm": 1.1425055065064067, "learning_rate": 6.447265316454063e-06, "loss": 0.5077, "step": 8084 }, { "epoch": 0.63, "grad_norm": 1.1370770915358563, "learning_rate": 6.444916678154667e-06, "loss": 0.4995, "step": 8085 }, { "epoch": 0.63, "grad_norm": 1.104175687549137, "learning_rate": 6.442568264306024e-06, "loss": 0.5123, "step": 8086 }, { "epoch": 0.63, "grad_norm": 1.2270560273962663, "learning_rate": 6.440220075056398e-06, "loss": 0.5398, "step": 8087 }, { "epoch": 0.63, "grad_norm": 1.241972082880874, "learning_rate": 6.437872110554044e-06, "loss": 0.5909, "step": 8088 }, { "epoch": 0.63, "grad_norm": 1.1515595952363562, "learning_rate": 6.4355243709472045e-06, "loss": 0.5595, "step": 8089 }, { "epoch": 0.63, "grad_norm": 1.0947170342227233, "learning_rate": 6.433176856384103e-06, "loss": 0.5011, "step": 8090 }, { "epoch": 0.63, "grad_norm": 1.1940678118517964, "learning_rate": 6.430829567012946e-06, "loss": 0.5401, "step": 8091 }, { "epoch": 0.63, "grad_norm": 1.2344310120054822, "learning_rate": 6.428482502981933e-06, "loss": 0.5629, "step": 8092 }, { "epoch": 0.63, "grad_norm": 1.1614183438976375, "learning_rate": 6.426135664439246e-06, "loss": 0.5458, "step": 8093 }, { "epoch": 0.63, "grad_norm": 1.0956054752784061, "learning_rate": 6.423789051533056e-06, "loss": 0.5425, "step": 8094 }, { "epoch": 0.63, "grad_norm": 1.221843266379924, "learning_rate": 6.4214426644115104e-06, "loss": 0.5249, "step": 8095 }, { "epoch": 0.63, "grad_norm": 1.1928426120026385, "learning_rate": 6.419096503222757e-06, "loss": 0.5446, "step": 8096 }, { "epoch": 0.63, "grad_norm": 1.228508492890292, "learning_rate": 6.416750568114911e-06, "loss": 0.5188, "step": 8097 }, { "epoch": 0.63, "grad_norm": 1.192796490116827, "learning_rate": 6.414404859236091e-06, "loss": 0.5588, "step": 8098 }, { "epoch": 0.63, "grad_norm": 1.2827298526880522, "learning_rate": 6.412059376734392e-06, "loss": 0.5426, "step": 8099 }, { "epoch": 0.63, "grad_norm": 1.1082316672273647, "learning_rate": 6.409714120757895e-06, "loss": 0.5164, "step": 8100 }, { "epoch": 0.63, "grad_norm": 1.2100529547208914, "learning_rate": 6.407369091454672e-06, "loss": 0.5197, "step": 8101 }, { "epoch": 0.63, "grad_norm": 1.2260628915838525, "learning_rate": 6.40502428897277e-06, "loss": 0.5549, "step": 8102 }, { "epoch": 0.63, "grad_norm": 1.1489381801651146, "learning_rate": 6.402679713460234e-06, "loss": 0.5005, "step": 8103 }, { "epoch": 0.63, "grad_norm": 1.174123412707984, "learning_rate": 6.400335365065087e-06, "loss": 0.5305, "step": 8104 }, { "epoch": 0.63, "grad_norm": 1.2181456118738843, "learning_rate": 6.397991243935339e-06, "loss": 0.5451, "step": 8105 }, { "epoch": 0.63, "grad_norm": 1.1271746069052053, "learning_rate": 6.395647350218992e-06, "loss": 0.4685, "step": 8106 }, { "epoch": 0.63, "grad_norm": 1.1139567149740126, "learning_rate": 6.393303684064019e-06, "loss": 0.5267, "step": 8107 }, { "epoch": 0.63, "grad_norm": 1.0985159985640889, "learning_rate": 6.390960245618394e-06, "loss": 0.4817, "step": 8108 }, { "epoch": 0.63, "grad_norm": 1.1378639980229739, "learning_rate": 6.388617035030069e-06, "loss": 0.577, "step": 8109 }, { "epoch": 0.63, "grad_norm": 1.2483777964117484, "learning_rate": 6.386274052446982e-06, "loss": 0.5169, "step": 8110 }, { "epoch": 0.63, "grad_norm": 1.1303506869951976, "learning_rate": 6.383931298017063e-06, "loss": 0.4857, "step": 8111 }, { "epoch": 0.63, "grad_norm": 1.1579573114726083, "learning_rate": 6.381588771888213e-06, "loss": 0.512, "step": 8112 }, { "epoch": 0.63, "grad_norm": 1.0717487583389012, "learning_rate": 6.379246474208332e-06, "loss": 0.5091, "step": 8113 }, { "epoch": 0.63, "grad_norm": 1.1624755795282637, "learning_rate": 6.376904405125304e-06, "loss": 0.5323, "step": 8114 }, { "epoch": 0.63, "grad_norm": 1.143481140638387, "learning_rate": 6.374562564786993e-06, "loss": 0.5195, "step": 8115 }, { "epoch": 0.63, "grad_norm": 1.1233260628684765, "learning_rate": 6.372220953341254e-06, "loss": 0.5013, "step": 8116 }, { "epoch": 0.63, "grad_norm": 1.080642911234163, "learning_rate": 6.36987957093592e-06, "loss": 0.5109, "step": 8117 }, { "epoch": 0.63, "grad_norm": 1.1621251209378372, "learning_rate": 6.367538417718817e-06, "loss": 0.4952, "step": 8118 }, { "epoch": 0.63, "grad_norm": 1.2936428652278116, "learning_rate": 6.365197493837757e-06, "loss": 0.5466, "step": 8119 }, { "epoch": 0.63, "grad_norm": 1.0631700534681894, "learning_rate": 6.362856799440531e-06, "loss": 0.517, "step": 8120 }, { "epoch": 0.63, "grad_norm": 1.1429910549404527, "learning_rate": 6.360516334674924e-06, "loss": 0.518, "step": 8121 }, { "epoch": 0.63, "grad_norm": 1.2255141522179076, "learning_rate": 6.358176099688693e-06, "loss": 0.5635, "step": 8122 }, { "epoch": 0.63, "grad_norm": 1.1898499877525728, "learning_rate": 6.355836094629596e-06, "loss": 0.5652, "step": 8123 }, { "epoch": 0.63, "grad_norm": 1.2327496898204358, "learning_rate": 6.353496319645369e-06, "loss": 0.5372, "step": 8124 }, { "epoch": 0.63, "grad_norm": 1.2067161568526497, "learning_rate": 6.351156774883731e-06, "loss": 0.554, "step": 8125 }, { "epoch": 0.63, "grad_norm": 1.1636667282846072, "learning_rate": 6.348817460492396e-06, "loss": 0.4822, "step": 8126 }, { "epoch": 0.63, "grad_norm": 1.1580437330213513, "learning_rate": 6.346478376619046e-06, "loss": 0.4633, "step": 8127 }, { "epoch": 0.63, "grad_norm": 1.1926301766764904, "learning_rate": 6.344139523411368e-06, "loss": 0.514, "step": 8128 }, { "epoch": 0.63, "grad_norm": 1.1422670120137954, "learning_rate": 6.341800901017024e-06, "loss": 0.5179, "step": 8129 }, { "epoch": 0.63, "grad_norm": 1.086437467019316, "learning_rate": 6.339462509583663e-06, "loss": 0.4955, "step": 8130 }, { "epoch": 0.63, "grad_norm": 1.1578803038724266, "learning_rate": 6.337124349258923e-06, "loss": 0.5505, "step": 8131 }, { "epoch": 0.63, "grad_norm": 1.2396365189028185, "learning_rate": 6.334786420190415e-06, "loss": 0.5078, "step": 8132 }, { "epoch": 0.63, "grad_norm": 1.1917749553553898, "learning_rate": 6.3324487225257526e-06, "loss": 0.5211, "step": 8133 }, { "epoch": 0.63, "grad_norm": 1.1814016053131546, "learning_rate": 6.330111256412525e-06, "loss": 0.5064, "step": 8134 }, { "epoch": 0.63, "grad_norm": 1.173899516697287, "learning_rate": 6.3277740219983066e-06, "loss": 0.5611, "step": 8135 }, { "epoch": 0.63, "grad_norm": 1.0959501209081624, "learning_rate": 6.325437019430665e-06, "loss": 0.4615, "step": 8136 }, { "epoch": 0.63, "grad_norm": 1.1833449501935949, "learning_rate": 6.323100248857137e-06, "loss": 0.5287, "step": 8137 }, { "epoch": 0.63, "grad_norm": 1.1935617927660638, "learning_rate": 6.320763710425262e-06, "loss": 0.5522, "step": 8138 }, { "epoch": 0.63, "grad_norm": 1.246307593863828, "learning_rate": 6.318427404282557e-06, "loss": 0.5709, "step": 8139 }, { "epoch": 0.63, "grad_norm": 1.1493925936888927, "learning_rate": 6.316091330576523e-06, "loss": 0.5546, "step": 8140 }, { "epoch": 0.63, "grad_norm": 1.2851754508859623, "learning_rate": 6.313755489454654e-06, "loss": 0.5924, "step": 8141 }, { "epoch": 0.63, "grad_norm": 1.2259033764697531, "learning_rate": 6.311419881064416e-06, "loss": 0.5237, "step": 8142 }, { "epoch": 0.63, "grad_norm": 1.1730304870578598, "learning_rate": 6.309084505553269e-06, "loss": 0.5347, "step": 8143 }, { "epoch": 0.63, "grad_norm": 1.2374726552543034, "learning_rate": 6.306749363068665e-06, "loss": 0.5426, "step": 8144 }, { "epoch": 0.63, "grad_norm": 1.1704444800648626, "learning_rate": 6.304414453758024e-06, "loss": 0.5033, "step": 8145 }, { "epoch": 0.63, "grad_norm": 1.1944052051535528, "learning_rate": 6.3020797777687705e-06, "loss": 0.5466, "step": 8146 }, { "epoch": 0.63, "grad_norm": 1.2234306442438918, "learning_rate": 6.299745335248295e-06, "loss": 0.4611, "step": 8147 }, { "epoch": 0.63, "grad_norm": 1.1135698446354996, "learning_rate": 6.297411126343988e-06, "loss": 0.4788, "step": 8148 }, { "epoch": 0.63, "grad_norm": 1.2336560340508356, "learning_rate": 6.295077151203221e-06, "loss": 0.5432, "step": 8149 }, { "epoch": 0.63, "grad_norm": 1.0181511786498405, "learning_rate": 6.292743409973345e-06, "loss": 0.4528, "step": 8150 }, { "epoch": 0.63, "grad_norm": 1.2724951317175053, "learning_rate": 6.290409902801706e-06, "loss": 0.5483, "step": 8151 }, { "epoch": 0.63, "grad_norm": 1.1709874670496545, "learning_rate": 6.288076629835633e-06, "loss": 0.5352, "step": 8152 }, { "epoch": 0.63, "grad_norm": 1.2000504880456666, "learning_rate": 6.285743591222428e-06, "loss": 0.5182, "step": 8153 }, { "epoch": 0.63, "grad_norm": 1.1198374125281498, "learning_rate": 6.283410787109396e-06, "loss": 0.5429, "step": 8154 }, { "epoch": 0.63, "grad_norm": 1.1693575458049734, "learning_rate": 6.2810782176438145e-06, "loss": 0.5388, "step": 8155 }, { "epoch": 0.63, "grad_norm": 1.2620790044022467, "learning_rate": 6.278745882972952e-06, "loss": 0.5676, "step": 8156 }, { "epoch": 0.63, "grad_norm": 1.1385941388927983, "learning_rate": 6.276413783244064e-06, "loss": 0.5279, "step": 8157 }, { "epoch": 0.63, "grad_norm": 1.2475136347543503, "learning_rate": 6.274081918604382e-06, "loss": 0.5334, "step": 8158 }, { "epoch": 0.63, "grad_norm": 1.1627673938822398, "learning_rate": 6.271750289201134e-06, "loss": 0.5334, "step": 8159 }, { "epoch": 0.63, "grad_norm": 1.1870628355342407, "learning_rate": 6.269418895181523e-06, "loss": 0.5608, "step": 8160 }, { "epoch": 0.63, "grad_norm": 1.285418637282944, "learning_rate": 6.267087736692744e-06, "loss": 0.5837, "step": 8161 }, { "epoch": 0.63, "grad_norm": 1.0852448703949569, "learning_rate": 6.26475681388198e-06, "loss": 0.5261, "step": 8162 }, { "epoch": 0.63, "grad_norm": 1.1479191551019983, "learning_rate": 6.262426126896386e-06, "loss": 0.5194, "step": 8163 }, { "epoch": 0.63, "grad_norm": 1.198075286757198, "learning_rate": 6.260095675883116e-06, "loss": 0.5426, "step": 8164 }, { "epoch": 0.63, "grad_norm": 1.5790903896674928, "learning_rate": 6.257765460989298e-06, "loss": 0.493, "step": 8165 }, { "epoch": 0.63, "grad_norm": 1.193184747733611, "learning_rate": 6.255435482362056e-06, "loss": 0.5195, "step": 8166 }, { "epoch": 0.63, "grad_norm": 1.140465032778158, "learning_rate": 6.253105740148493e-06, "loss": 0.4906, "step": 8167 }, { "epoch": 0.63, "grad_norm": 1.0894346392223409, "learning_rate": 6.2507762344956925e-06, "loss": 0.4609, "step": 8168 }, { "epoch": 0.63, "grad_norm": 1.1849297263639302, "learning_rate": 6.248446965550735e-06, "loss": 0.513, "step": 8169 }, { "epoch": 0.63, "grad_norm": 1.0231387452150094, "learning_rate": 6.246117933460673e-06, "loss": 0.4737, "step": 8170 }, { "epoch": 0.63, "grad_norm": 1.376445660520037, "learning_rate": 6.2437891383725535e-06, "loss": 0.5844, "step": 8171 }, { "epoch": 0.63, "grad_norm": 1.1341401311520276, "learning_rate": 6.241460580433411e-06, "loss": 0.4763, "step": 8172 }, { "epoch": 0.63, "grad_norm": 1.2005624605300265, "learning_rate": 6.239132259790248e-06, "loss": 0.5166, "step": 8173 }, { "epoch": 0.63, "grad_norm": 1.1994956625652882, "learning_rate": 6.23680417659007e-06, "loss": 0.555, "step": 8174 }, { "epoch": 0.63, "grad_norm": 1.1545582844827547, "learning_rate": 6.234476330979859e-06, "loss": 0.5358, "step": 8175 }, { "epoch": 0.63, "grad_norm": 1.1117929247561533, "learning_rate": 6.232148723106586e-06, "loss": 0.517, "step": 8176 }, { "epoch": 0.63, "grad_norm": 1.1711607218520421, "learning_rate": 6.2298213531172055e-06, "loss": 0.5025, "step": 8177 }, { "epoch": 0.63, "grad_norm": 1.1826246971692047, "learning_rate": 6.227494221158652e-06, "loss": 0.4513, "step": 8178 }, { "epoch": 0.63, "grad_norm": 1.1192390971109658, "learning_rate": 6.225167327377852e-06, "loss": 0.5284, "step": 8179 }, { "epoch": 0.63, "grad_norm": 1.165086027415438, "learning_rate": 6.222840671921715e-06, "loss": 0.472, "step": 8180 }, { "epoch": 0.63, "grad_norm": 1.1796114562327191, "learning_rate": 6.220514254937131e-06, "loss": 0.497, "step": 8181 }, { "epoch": 0.63, "grad_norm": 1.2233615096500101, "learning_rate": 6.218188076570988e-06, "loss": 0.5247, "step": 8182 }, { "epoch": 0.63, "grad_norm": 1.1942694108202858, "learning_rate": 6.215862136970139e-06, "loss": 0.5362, "step": 8183 }, { "epoch": 0.63, "grad_norm": 1.2695039716871048, "learning_rate": 6.213536436281438e-06, "loss": 0.573, "step": 8184 }, { "epoch": 0.63, "grad_norm": 1.2133352843642704, "learning_rate": 6.211210974651716e-06, "loss": 0.4941, "step": 8185 }, { "epoch": 0.64, "grad_norm": 1.2585189446655267, "learning_rate": 6.208885752227791e-06, "loss": 0.5347, "step": 8186 }, { "epoch": 0.64, "grad_norm": 1.1384544623025161, "learning_rate": 6.2065607691564736e-06, "loss": 0.4804, "step": 8187 }, { "epoch": 0.64, "grad_norm": 1.228707060582671, "learning_rate": 6.204236025584542e-06, "loss": 0.5652, "step": 8188 }, { "epoch": 0.64, "grad_norm": 1.2145456027958572, "learning_rate": 6.201911521658777e-06, "loss": 0.4816, "step": 8189 }, { "epoch": 0.64, "grad_norm": 1.1635301127780793, "learning_rate": 6.19958725752593e-06, "loss": 0.541, "step": 8190 }, { "epoch": 0.64, "grad_norm": 1.1273108696322753, "learning_rate": 6.197263233332747e-06, "loss": 0.5061, "step": 8191 }, { "epoch": 0.64, "grad_norm": 1.1827566378061876, "learning_rate": 6.19493944922596e-06, "loss": 0.5096, "step": 8192 }, { "epoch": 0.64, "grad_norm": 1.2844155910788315, "learning_rate": 6.192615905352273e-06, "loss": 0.521, "step": 8193 }, { "epoch": 0.64, "grad_norm": 1.1024948395069822, "learning_rate": 6.190292601858389e-06, "loss": 0.4921, "step": 8194 }, { "epoch": 0.64, "grad_norm": 1.1057808371480182, "learning_rate": 6.1879695388909865e-06, "loss": 0.441, "step": 8195 }, { "epoch": 0.64, "grad_norm": 1.2051472698140704, "learning_rate": 6.185646716596735e-06, "loss": 0.4912, "step": 8196 }, { "epoch": 0.64, "grad_norm": 1.262531408542438, "learning_rate": 6.183324135122289e-06, "loss": 0.5507, "step": 8197 }, { "epoch": 0.64, "grad_norm": 1.2005207561015714, "learning_rate": 6.181001794614279e-06, "loss": 0.5171, "step": 8198 }, { "epoch": 0.64, "grad_norm": 1.2581858584318575, "learning_rate": 6.17867969521933e-06, "loss": 0.5506, "step": 8199 }, { "epoch": 0.64, "grad_norm": 1.0402499419425246, "learning_rate": 6.176357837084046e-06, "loss": 0.5074, "step": 8200 }, { "epoch": 0.64, "grad_norm": 1.339137789413524, "learning_rate": 6.17403622035502e-06, "loss": 0.5281, "step": 8201 }, { "epoch": 0.64, "grad_norm": 1.2314929877301088, "learning_rate": 6.1717148451788265e-06, "loss": 0.5521, "step": 8202 }, { "epoch": 0.64, "grad_norm": 1.2629923814013706, "learning_rate": 6.169393711702027e-06, "loss": 0.5692, "step": 8203 }, { "epoch": 0.64, "grad_norm": 1.1138745784765145, "learning_rate": 6.167072820071167e-06, "loss": 0.5313, "step": 8204 }, { "epoch": 0.64, "grad_norm": 1.1767591294403938, "learning_rate": 6.164752170432773e-06, "loss": 0.5292, "step": 8205 }, { "epoch": 0.64, "grad_norm": 1.1553288476317225, "learning_rate": 6.162431762933361e-06, "loss": 0.531, "step": 8206 }, { "epoch": 0.64, "grad_norm": 1.1536558795201628, "learning_rate": 6.160111597719433e-06, "loss": 0.5361, "step": 8207 }, { "epoch": 0.64, "grad_norm": 1.3086055470404578, "learning_rate": 6.157791674937471e-06, "loss": 0.5402, "step": 8208 }, { "epoch": 0.64, "grad_norm": 1.2474205582729367, "learning_rate": 6.1554719947339435e-06, "loss": 0.5066, "step": 8209 }, { "epoch": 0.64, "grad_norm": 1.1854479275533065, "learning_rate": 6.153152557255303e-06, "loss": 0.5158, "step": 8210 }, { "epoch": 0.64, "grad_norm": 1.2329209854103622, "learning_rate": 6.150833362647988e-06, "loss": 0.5584, "step": 8211 }, { "epoch": 0.64, "grad_norm": 1.3002949856818755, "learning_rate": 6.148514411058424e-06, "loss": 0.5073, "step": 8212 }, { "epoch": 0.64, "grad_norm": 1.1295832415704656, "learning_rate": 6.146195702633018e-06, "loss": 0.5404, "step": 8213 }, { "epoch": 0.64, "grad_norm": 1.1528537462217887, "learning_rate": 6.143877237518158e-06, "loss": 0.562, "step": 8214 }, { "epoch": 0.64, "grad_norm": 1.2224765925906589, "learning_rate": 6.141559015860221e-06, "loss": 0.5699, "step": 8215 }, { "epoch": 0.64, "grad_norm": 1.1697335067698311, "learning_rate": 6.1392410378055725e-06, "loss": 0.5119, "step": 8216 }, { "epoch": 0.64, "grad_norm": 1.2611346236084933, "learning_rate": 6.136923303500556e-06, "loss": 0.5468, "step": 8217 }, { "epoch": 0.64, "grad_norm": 1.3625432068859702, "learning_rate": 6.134605813091503e-06, "loss": 0.5605, "step": 8218 }, { "epoch": 0.64, "grad_norm": 1.0981414374226928, "learning_rate": 6.132288566724728e-06, "loss": 0.5418, "step": 8219 }, { "epoch": 0.64, "grad_norm": 1.1336824956815332, "learning_rate": 6.129971564546529e-06, "loss": 0.4852, "step": 8220 }, { "epoch": 0.64, "grad_norm": 1.0734901315371415, "learning_rate": 6.127654806703189e-06, "loss": 0.493, "step": 8221 }, { "epoch": 0.64, "grad_norm": 1.1775994421239755, "learning_rate": 6.125338293340985e-06, "loss": 0.5664, "step": 8222 }, { "epoch": 0.64, "grad_norm": 1.0850686089406505, "learning_rate": 6.123022024606165e-06, "loss": 0.5001, "step": 8223 }, { "epoch": 0.64, "grad_norm": 1.152421334451245, "learning_rate": 6.120706000644965e-06, "loss": 0.5164, "step": 8224 }, { "epoch": 0.64, "grad_norm": 1.1588395992892193, "learning_rate": 6.11839022160361e-06, "loss": 0.5319, "step": 8225 }, { "epoch": 0.64, "grad_norm": 1.2549046139734341, "learning_rate": 6.116074687628305e-06, "loss": 0.5439, "step": 8226 }, { "epoch": 0.64, "grad_norm": 1.197136279239915, "learning_rate": 6.113759398865247e-06, "loss": 0.5232, "step": 8227 }, { "epoch": 0.64, "grad_norm": 1.2277228825486712, "learning_rate": 6.111444355460608e-06, "loss": 0.512, "step": 8228 }, { "epoch": 0.64, "grad_norm": 1.1986172438094187, "learning_rate": 6.109129557560547e-06, "loss": 0.5357, "step": 8229 }, { "epoch": 0.64, "grad_norm": 1.2094532391772521, "learning_rate": 6.106815005311211e-06, "loss": 0.5506, "step": 8230 }, { "epoch": 0.64, "grad_norm": 1.3012841887659479, "learning_rate": 6.104500698858731e-06, "loss": 0.5308, "step": 8231 }, { "epoch": 0.64, "grad_norm": 1.177776835674133, "learning_rate": 6.1021866383492205e-06, "loss": 0.5641, "step": 8232 }, { "epoch": 0.64, "grad_norm": 1.1547059755821678, "learning_rate": 6.0998728239287784e-06, "loss": 0.4707, "step": 8233 }, { "epoch": 0.64, "grad_norm": 1.1894349846920798, "learning_rate": 6.097559255743486e-06, "loss": 0.5333, "step": 8234 }, { "epoch": 0.64, "grad_norm": 1.167801861019125, "learning_rate": 6.095245933939411e-06, "loss": 0.5321, "step": 8235 }, { "epoch": 0.64, "grad_norm": 1.2140268111905477, "learning_rate": 6.092932858662604e-06, "loss": 0.5507, "step": 8236 }, { "epoch": 0.64, "grad_norm": 1.243314124172889, "learning_rate": 6.0906200300591074e-06, "loss": 0.5647, "step": 8237 }, { "epoch": 0.64, "grad_norm": 1.0433527207528352, "learning_rate": 6.088307448274937e-06, "loss": 0.4527, "step": 8238 }, { "epoch": 0.64, "grad_norm": 1.2909960954490376, "learning_rate": 6.0859951134561e-06, "loss": 0.5848, "step": 8239 }, { "epoch": 0.64, "grad_norm": 1.0789159554743135, "learning_rate": 6.083683025748584e-06, "loss": 0.4831, "step": 8240 }, { "epoch": 0.64, "grad_norm": 1.2012878858167118, "learning_rate": 6.081371185298361e-06, "loss": 0.4837, "step": 8241 }, { "epoch": 0.64, "grad_norm": 1.304273345451697, "learning_rate": 6.079059592251398e-06, "loss": 0.5552, "step": 8242 }, { "epoch": 0.64, "grad_norm": 1.1499823216447809, "learning_rate": 6.076748246753632e-06, "loss": 0.5411, "step": 8243 }, { "epoch": 0.64, "grad_norm": 1.202401030070231, "learning_rate": 6.074437148950987e-06, "loss": 0.5333, "step": 8244 }, { "epoch": 0.64, "grad_norm": 1.1568132523555825, "learning_rate": 6.072126298989378e-06, "loss": 0.5068, "step": 8245 }, { "epoch": 0.64, "grad_norm": 1.0978312505591579, "learning_rate": 6.069815697014701e-06, "loss": 0.4789, "step": 8246 }, { "epoch": 0.64, "grad_norm": 1.1675491344723636, "learning_rate": 6.067505343172839e-06, "loss": 0.5373, "step": 8247 }, { "epoch": 0.64, "grad_norm": 1.2823501608692853, "learning_rate": 6.065195237609655e-06, "loss": 0.5246, "step": 8248 }, { "epoch": 0.64, "grad_norm": 1.2692917818019045, "learning_rate": 6.062885380470992e-06, "loss": 0.524, "step": 8249 }, { "epoch": 0.64, "grad_norm": 1.2624052559833838, "learning_rate": 6.0605757719026884e-06, "loss": 0.5993, "step": 8250 }, { "epoch": 0.64, "grad_norm": 1.1384870794716562, "learning_rate": 6.058266412050561e-06, "loss": 0.5219, "step": 8251 }, { "epoch": 0.64, "grad_norm": 1.1366436595956209, "learning_rate": 6.055957301060413e-06, "loss": 0.5149, "step": 8252 }, { "epoch": 0.64, "grad_norm": 1.1804160085492914, "learning_rate": 6.053648439078033e-06, "loss": 0.5218, "step": 8253 }, { "epoch": 0.64, "grad_norm": 1.1742472733353682, "learning_rate": 6.05133982624918e-06, "loss": 0.5603, "step": 8254 }, { "epoch": 0.64, "grad_norm": 1.1696696065175627, "learning_rate": 6.04903146271962e-06, "loss": 0.5343, "step": 8255 }, { "epoch": 0.64, "grad_norm": 1.0823302148606238, "learning_rate": 6.046723348635086e-06, "loss": 0.5017, "step": 8256 }, { "epoch": 0.64, "grad_norm": 1.7344631825616788, "learning_rate": 6.044415484141306e-06, "loss": 0.5411, "step": 8257 }, { "epoch": 0.64, "grad_norm": 1.2492929843319336, "learning_rate": 6.042107869383982e-06, "loss": 0.535, "step": 8258 }, { "epoch": 0.64, "grad_norm": 1.2330886318839456, "learning_rate": 6.039800504508813e-06, "loss": 0.504, "step": 8259 }, { "epoch": 0.64, "grad_norm": 1.1370313286738691, "learning_rate": 6.0374933896614665e-06, "loss": 0.5477, "step": 8260 }, { "epoch": 0.64, "grad_norm": 1.235705515698876, "learning_rate": 6.035186524987605e-06, "loss": 0.5588, "step": 8261 }, { "epoch": 0.64, "grad_norm": 1.2104577867851762, "learning_rate": 6.032879910632876e-06, "loss": 0.5505, "step": 8262 }, { "epoch": 0.64, "grad_norm": 1.1999942123750353, "learning_rate": 6.030573546742904e-06, "loss": 0.5093, "step": 8263 }, { "epoch": 0.64, "grad_norm": 1.2554652899437484, "learning_rate": 6.028267433463309e-06, "loss": 0.5354, "step": 8264 }, { "epoch": 0.64, "grad_norm": 1.1396285106809467, "learning_rate": 6.025961570939676e-06, "loss": 0.482, "step": 8265 }, { "epoch": 0.64, "grad_norm": 1.2477806893948147, "learning_rate": 6.023655959317594e-06, "loss": 0.5834, "step": 8266 }, { "epoch": 0.64, "grad_norm": 1.178961776932395, "learning_rate": 6.021350598742628e-06, "loss": 0.5392, "step": 8267 }, { "epoch": 0.64, "grad_norm": 1.2412358603387617, "learning_rate": 6.019045489360325e-06, "loss": 0.5755, "step": 8268 }, { "epoch": 0.64, "grad_norm": 1.1502549718074888, "learning_rate": 6.016740631316221e-06, "loss": 0.5257, "step": 8269 }, { "epoch": 0.64, "grad_norm": 1.2423343691581445, "learning_rate": 6.01443602475583e-06, "loss": 0.5144, "step": 8270 }, { "epoch": 0.64, "grad_norm": 1.192894328356021, "learning_rate": 6.0121316698246535e-06, "loss": 0.5078, "step": 8271 }, { "epoch": 0.64, "grad_norm": 1.2212386032556652, "learning_rate": 6.009827566668183e-06, "loss": 0.5382, "step": 8272 }, { "epoch": 0.64, "grad_norm": 1.1543874947078763, "learning_rate": 6.007523715431882e-06, "loss": 0.5206, "step": 8273 }, { "epoch": 0.64, "grad_norm": 1.2207255369036356, "learning_rate": 6.0052201162612125e-06, "loss": 0.5538, "step": 8274 }, { "epoch": 0.64, "grad_norm": 1.0982657807048068, "learning_rate": 6.002916769301601e-06, "loss": 0.4986, "step": 8275 }, { "epoch": 0.64, "grad_norm": 1.2048283693748971, "learning_rate": 6.000613674698478e-06, "loss": 0.5504, "step": 8276 }, { "epoch": 0.64, "grad_norm": 1.1398957939372207, "learning_rate": 5.99831083259725e-06, "loss": 0.497, "step": 8277 }, { "epoch": 0.64, "grad_norm": 1.192742370830532, "learning_rate": 5.996008243143302e-06, "loss": 0.5538, "step": 8278 }, { "epoch": 0.64, "grad_norm": 1.1809412376100377, "learning_rate": 5.993705906482016e-06, "loss": 0.483, "step": 8279 }, { "epoch": 0.64, "grad_norm": 1.2401758375395675, "learning_rate": 5.991403822758741e-06, "loss": 0.5735, "step": 8280 }, { "epoch": 0.64, "grad_norm": 1.2014972029108515, "learning_rate": 5.9891019921188264e-06, "loss": 0.5472, "step": 8281 }, { "epoch": 0.64, "grad_norm": 1.1871760076944107, "learning_rate": 5.986800414707596e-06, "loss": 0.5072, "step": 8282 }, { "epoch": 0.64, "grad_norm": 1.065242873333841, "learning_rate": 5.984499090670361e-06, "loss": 0.4722, "step": 8283 }, { "epoch": 0.64, "grad_norm": 1.2389210392943288, "learning_rate": 5.982198020152419e-06, "loss": 0.4703, "step": 8284 }, { "epoch": 0.64, "grad_norm": 1.2349325441942252, "learning_rate": 5.979897203299041e-06, "loss": 0.5288, "step": 8285 }, { "epoch": 0.64, "grad_norm": 1.1463681215681572, "learning_rate": 5.977596640255494e-06, "loss": 0.5446, "step": 8286 }, { "epoch": 0.64, "grad_norm": 1.200546275411955, "learning_rate": 5.975296331167025e-06, "loss": 0.5458, "step": 8287 }, { "epoch": 0.64, "grad_norm": 1.229241719082832, "learning_rate": 5.972996276178862e-06, "loss": 0.5615, "step": 8288 }, { "epoch": 0.64, "grad_norm": 1.0795875801190369, "learning_rate": 5.970696475436224e-06, "loss": 0.5118, "step": 8289 }, { "epoch": 0.64, "grad_norm": 1.1201241051510997, "learning_rate": 5.968396929084303e-06, "loss": 0.5111, "step": 8290 }, { "epoch": 0.64, "grad_norm": 1.21812701808866, "learning_rate": 5.966097637268284e-06, "loss": 0.5223, "step": 8291 }, { "epoch": 0.64, "grad_norm": 1.1651956048564998, "learning_rate": 5.963798600133334e-06, "loss": 0.5376, "step": 8292 }, { "epoch": 0.64, "grad_norm": 1.0966688174272066, "learning_rate": 5.961499817824603e-06, "loss": 0.4903, "step": 8293 }, { "epoch": 0.64, "grad_norm": 1.1281372407793313, "learning_rate": 5.959201290487227e-06, "loss": 0.5058, "step": 8294 }, { "epoch": 0.64, "grad_norm": 1.0451609115279918, "learning_rate": 5.956903018266317e-06, "loss": 0.46, "step": 8295 }, { "epoch": 0.64, "grad_norm": 1.02968762271863, "learning_rate": 5.954605001306979e-06, "loss": 0.4942, "step": 8296 }, { "epoch": 0.64, "grad_norm": 1.1389752829484674, "learning_rate": 5.952307239754302e-06, "loss": 0.5019, "step": 8297 }, { "epoch": 0.64, "grad_norm": 1.0504699018870116, "learning_rate": 5.950009733753348e-06, "loss": 0.505, "step": 8298 }, { "epoch": 0.64, "grad_norm": 1.2733212751463376, "learning_rate": 5.94771248344918e-06, "loss": 0.6072, "step": 8299 }, { "epoch": 0.64, "grad_norm": 1.1068717222418305, "learning_rate": 5.945415488986827e-06, "loss": 0.5147, "step": 8300 }, { "epoch": 0.64, "grad_norm": 1.140142573927685, "learning_rate": 5.943118750511312e-06, "loss": 0.5457, "step": 8301 }, { "epoch": 0.64, "grad_norm": 1.1281983687194435, "learning_rate": 5.940822268167643e-06, "loss": 0.5293, "step": 8302 }, { "epoch": 0.64, "grad_norm": 1.1480746052687518, "learning_rate": 5.938526042100805e-06, "loss": 0.5418, "step": 8303 }, { "epoch": 0.64, "grad_norm": 1.0439502227068225, "learning_rate": 5.936230072455777e-06, "loss": 0.5033, "step": 8304 }, { "epoch": 0.64, "grad_norm": 1.1160185240059313, "learning_rate": 5.933934359377506e-06, "loss": 0.5487, "step": 8305 }, { "epoch": 0.64, "grad_norm": 1.130777095772815, "learning_rate": 5.931638903010936e-06, "loss": 0.489, "step": 8306 }, { "epoch": 0.64, "grad_norm": 1.2304808116503323, "learning_rate": 5.929343703500996e-06, "loss": 0.6044, "step": 8307 }, { "epoch": 0.64, "grad_norm": 1.2536175356836792, "learning_rate": 5.927048760992589e-06, "loss": 0.5763, "step": 8308 }, { "epoch": 0.64, "grad_norm": 1.3269634328401938, "learning_rate": 5.9247540756306075e-06, "loss": 0.5933, "step": 8309 }, { "epoch": 0.64, "grad_norm": 1.0907472536488314, "learning_rate": 5.922459647559926e-06, "loss": 0.5136, "step": 8310 }, { "epoch": 0.64, "grad_norm": 1.2105952609874586, "learning_rate": 5.920165476925402e-06, "loss": 0.5953, "step": 8311 }, { "epoch": 0.64, "grad_norm": 1.2333478879499373, "learning_rate": 5.917871563871884e-06, "loss": 0.5153, "step": 8312 }, { "epoch": 0.64, "grad_norm": 1.1397103077912958, "learning_rate": 5.915577908544194e-06, "loss": 0.5051, "step": 8313 }, { "epoch": 0.64, "grad_norm": 1.2077586627129169, "learning_rate": 5.913284511087142e-06, "loss": 0.5548, "step": 8314 }, { "epoch": 0.65, "grad_norm": 1.1768030941430447, "learning_rate": 5.910991371645527e-06, "loss": 0.5132, "step": 8315 }, { "epoch": 0.65, "grad_norm": 1.077438135694472, "learning_rate": 5.90869849036412e-06, "loss": 0.4878, "step": 8316 }, { "epoch": 0.65, "grad_norm": 1.2826787379381503, "learning_rate": 5.906405867387688e-06, "loss": 0.5751, "step": 8317 }, { "epoch": 0.65, "grad_norm": 1.2665143126331475, "learning_rate": 5.904113502860971e-06, "loss": 0.5401, "step": 8318 }, { "epoch": 0.65, "grad_norm": 1.1841015623389202, "learning_rate": 5.901821396928702e-06, "loss": 0.5724, "step": 8319 }, { "epoch": 0.65, "grad_norm": 1.1058476206573513, "learning_rate": 5.899529549735594e-06, "loss": 0.4697, "step": 8320 }, { "epoch": 0.65, "grad_norm": 1.2409869456376905, "learning_rate": 5.897237961426339e-06, "loss": 0.5898, "step": 8321 }, { "epoch": 0.65, "grad_norm": 1.2756947363669284, "learning_rate": 5.894946632145619e-06, "loss": 0.5793, "step": 8322 }, { "epoch": 0.65, "grad_norm": 1.1509343207981138, "learning_rate": 5.892655562038098e-06, "loss": 0.5017, "step": 8323 }, { "epoch": 0.65, "grad_norm": 1.2297489543521822, "learning_rate": 5.8903647512484205e-06, "loss": 0.5716, "step": 8324 }, { "epoch": 0.65, "grad_norm": 1.2654204497638248, "learning_rate": 5.888074199921223e-06, "loss": 0.5429, "step": 8325 }, { "epoch": 0.65, "grad_norm": 1.17487368614918, "learning_rate": 5.885783908201114e-06, "loss": 0.5171, "step": 8326 }, { "epoch": 0.65, "grad_norm": 1.1666095186042373, "learning_rate": 5.883493876232693e-06, "loss": 0.5151, "step": 8327 }, { "epoch": 0.65, "grad_norm": 1.2161169218864893, "learning_rate": 5.8812041041605426e-06, "loss": 0.5609, "step": 8328 }, { "epoch": 0.65, "grad_norm": 1.1686488184250539, "learning_rate": 5.878914592129226e-06, "loss": 0.5094, "step": 8329 }, { "epoch": 0.65, "grad_norm": 1.1911274067741016, "learning_rate": 5.876625340283296e-06, "loss": 0.5063, "step": 8330 }, { "epoch": 0.65, "grad_norm": 1.1299172596473543, "learning_rate": 5.87433634876728e-06, "loss": 0.501, "step": 8331 }, { "epoch": 0.65, "grad_norm": 1.2161870074058212, "learning_rate": 5.872047617725697e-06, "loss": 0.5331, "step": 8332 }, { "epoch": 0.65, "grad_norm": 1.0527753662734873, "learning_rate": 5.869759147303042e-06, "loss": 0.4686, "step": 8333 }, { "epoch": 0.65, "grad_norm": 1.2055201284024752, "learning_rate": 5.867470937643804e-06, "loss": 0.4812, "step": 8334 }, { "epoch": 0.65, "grad_norm": 1.1050213558843327, "learning_rate": 5.865182988892449e-06, "loss": 0.5358, "step": 8335 }, { "epoch": 0.65, "grad_norm": 1.1816902087188583, "learning_rate": 5.862895301193421e-06, "loss": 0.5774, "step": 8336 }, { "epoch": 0.65, "grad_norm": 1.118904128725864, "learning_rate": 5.86060787469116e-06, "loss": 0.4766, "step": 8337 }, { "epoch": 0.65, "grad_norm": 1.1479559167612174, "learning_rate": 5.858320709530077e-06, "loss": 0.5263, "step": 8338 }, { "epoch": 0.65, "grad_norm": 1.284340364680649, "learning_rate": 5.8560338058545775e-06, "loss": 0.5256, "step": 8339 }, { "epoch": 0.65, "grad_norm": 1.149892909827278, "learning_rate": 5.853747163809047e-06, "loss": 0.5303, "step": 8340 }, { "epoch": 0.65, "grad_norm": 1.0886447559560994, "learning_rate": 5.851460783537848e-06, "loss": 0.5177, "step": 8341 }, { "epoch": 0.65, "grad_norm": 1.2905010173073816, "learning_rate": 5.8491746651853305e-06, "loss": 0.5882, "step": 8342 }, { "epoch": 0.65, "grad_norm": 1.1416802424066883, "learning_rate": 5.846888808895833e-06, "loss": 0.4971, "step": 8343 }, { "epoch": 0.65, "grad_norm": 1.2945659257553124, "learning_rate": 5.8446032148136725e-06, "loss": 0.5462, "step": 8344 }, { "epoch": 0.65, "grad_norm": 1.1650222303864493, "learning_rate": 5.842317883083153e-06, "loss": 0.4941, "step": 8345 }, { "epoch": 0.65, "grad_norm": 1.2245997378321456, "learning_rate": 5.840032813848555e-06, "loss": 0.516, "step": 8346 }, { "epoch": 0.65, "grad_norm": 1.061853155817389, "learning_rate": 5.837748007254146e-06, "loss": 0.508, "step": 8347 }, { "epoch": 0.65, "grad_norm": 1.1234121242835344, "learning_rate": 5.835463463444179e-06, "loss": 0.5253, "step": 8348 }, { "epoch": 0.65, "grad_norm": 1.0684471303545413, "learning_rate": 5.833179182562891e-06, "loss": 0.4648, "step": 8349 }, { "epoch": 0.65, "grad_norm": 1.184203289941239, "learning_rate": 5.830895164754502e-06, "loss": 0.5707, "step": 8350 }, { "epoch": 0.65, "grad_norm": 1.118113900604512, "learning_rate": 5.828611410163207e-06, "loss": 0.453, "step": 8351 }, { "epoch": 0.65, "grad_norm": 1.121939416592712, "learning_rate": 5.826327918933197e-06, "loss": 0.5192, "step": 8352 }, { "epoch": 0.65, "grad_norm": 1.119115592849685, "learning_rate": 5.824044691208641e-06, "loss": 0.5349, "step": 8353 }, { "epoch": 0.65, "grad_norm": 1.1414705565168812, "learning_rate": 5.821761727133686e-06, "loss": 0.495, "step": 8354 }, { "epoch": 0.65, "grad_norm": 1.2759335167136183, "learning_rate": 5.8194790268524725e-06, "loss": 0.5713, "step": 8355 }, { "epoch": 0.65, "grad_norm": 1.2192126643033732, "learning_rate": 5.817196590509113e-06, "loss": 0.5445, "step": 8356 }, { "epoch": 0.65, "grad_norm": 1.4275029069166099, "learning_rate": 5.814914418247714e-06, "loss": 0.5057, "step": 8357 }, { "epoch": 0.65, "grad_norm": 1.2192946464150107, "learning_rate": 5.812632510212359e-06, "loss": 0.548, "step": 8358 }, { "epoch": 0.65, "grad_norm": 1.169197329424752, "learning_rate": 5.810350866547119e-06, "loss": 0.5311, "step": 8359 }, { "epoch": 0.65, "grad_norm": 1.1895552968709187, "learning_rate": 5.808069487396048e-06, "loss": 0.4849, "step": 8360 }, { "epoch": 0.65, "grad_norm": 1.0370600054640842, "learning_rate": 5.805788372903174e-06, "loss": 0.496, "step": 8361 }, { "epoch": 0.65, "grad_norm": 1.133862607215138, "learning_rate": 5.80350752321252e-06, "loss": 0.5086, "step": 8362 }, { "epoch": 0.65, "grad_norm": 1.1106481634074084, "learning_rate": 5.801226938468089e-06, "loss": 0.4932, "step": 8363 }, { "epoch": 0.65, "grad_norm": 1.128475331359232, "learning_rate": 5.798946618813861e-06, "loss": 0.4842, "step": 8364 }, { "epoch": 0.65, "grad_norm": 1.1648247290631113, "learning_rate": 5.796666564393811e-06, "loss": 0.5588, "step": 8365 }, { "epoch": 0.65, "grad_norm": 1.1704829785764537, "learning_rate": 5.7943867753518845e-06, "loss": 0.5009, "step": 8366 }, { "epoch": 0.65, "grad_norm": 1.0497571073883374, "learning_rate": 5.792107251832018e-06, "loss": 0.4907, "step": 8367 }, { "epoch": 0.65, "grad_norm": 1.202705756129117, "learning_rate": 5.789827993978131e-06, "loss": 0.5751, "step": 8368 }, { "epoch": 0.65, "grad_norm": 1.2167405900750052, "learning_rate": 5.787549001934125e-06, "loss": 0.5303, "step": 8369 }, { "epoch": 0.65, "grad_norm": 1.196261658646584, "learning_rate": 5.785270275843883e-06, "loss": 0.5101, "step": 8370 }, { "epoch": 0.65, "grad_norm": 1.207802436658382, "learning_rate": 5.7829918158512774e-06, "loss": 0.5534, "step": 8371 }, { "epoch": 0.65, "grad_norm": 1.1272561444394045, "learning_rate": 5.7807136221001515e-06, "loss": 0.5044, "step": 8372 }, { "epoch": 0.65, "grad_norm": 1.1363927104871914, "learning_rate": 5.778435694734348e-06, "loss": 0.5429, "step": 8373 }, { "epoch": 0.65, "grad_norm": 1.1579923647067174, "learning_rate": 5.776158033897674e-06, "loss": 0.5115, "step": 8374 }, { "epoch": 0.65, "grad_norm": 1.3101519516912479, "learning_rate": 5.773880639733938e-06, "loss": 0.623, "step": 8375 }, { "epoch": 0.65, "grad_norm": 1.2101153135917795, "learning_rate": 5.771603512386923e-06, "loss": 0.535, "step": 8376 }, { "epoch": 0.65, "grad_norm": 1.0191385967311948, "learning_rate": 5.769326652000391e-06, "loss": 0.4757, "step": 8377 }, { "epoch": 0.65, "grad_norm": 1.160298707994621, "learning_rate": 5.7670500587180935e-06, "loss": 0.5258, "step": 8378 }, { "epoch": 0.65, "grad_norm": 1.0811363403028997, "learning_rate": 5.764773732683766e-06, "loss": 0.5088, "step": 8379 }, { "epoch": 0.65, "grad_norm": 1.2227004655636986, "learning_rate": 5.7624976740411244e-06, "loss": 0.5118, "step": 8380 }, { "epoch": 0.65, "grad_norm": 1.195517079916166, "learning_rate": 5.76022188293387e-06, "loss": 0.5365, "step": 8381 }, { "epoch": 0.65, "grad_norm": 1.1263613940665935, "learning_rate": 5.757946359505679e-06, "loss": 0.5192, "step": 8382 }, { "epoch": 0.65, "grad_norm": 1.2229793231429569, "learning_rate": 5.755671103900225e-06, "loss": 0.5266, "step": 8383 }, { "epoch": 0.65, "grad_norm": 1.245085592430017, "learning_rate": 5.753396116261148e-06, "loss": 0.5082, "step": 8384 }, { "epoch": 0.65, "grad_norm": 1.1785393995416413, "learning_rate": 5.751121396732082e-06, "loss": 0.4906, "step": 8385 }, { "epoch": 0.65, "grad_norm": 1.2330567769407892, "learning_rate": 5.7488469454566484e-06, "loss": 0.5328, "step": 8386 }, { "epoch": 0.65, "grad_norm": 1.175403217363587, "learning_rate": 5.746572762578437e-06, "loss": 0.5163, "step": 8387 }, { "epoch": 0.65, "grad_norm": 1.206452956444843, "learning_rate": 5.744298848241032e-06, "loss": 0.5356, "step": 8388 }, { "epoch": 0.65, "grad_norm": 1.2145841265894468, "learning_rate": 5.742025202587997e-06, "loss": 0.5426, "step": 8389 }, { "epoch": 0.65, "grad_norm": 1.2460494078396926, "learning_rate": 5.739751825762878e-06, "loss": 0.6032, "step": 8390 }, { "epoch": 0.65, "grad_norm": 1.0768910409149992, "learning_rate": 5.7374787179092106e-06, "loss": 0.5589, "step": 8391 }, { "epoch": 0.65, "grad_norm": 1.1603555732867057, "learning_rate": 5.7352058791705e-06, "loss": 0.5156, "step": 8392 }, { "epoch": 0.65, "grad_norm": 1.1952020930742986, "learning_rate": 5.732933309690251e-06, "loss": 0.5132, "step": 8393 }, { "epoch": 0.65, "grad_norm": 1.2150487696117778, "learning_rate": 5.730661009611931e-06, "loss": 0.5564, "step": 8394 }, { "epoch": 0.65, "grad_norm": 1.0558316841403654, "learning_rate": 5.7283889790790096e-06, "loss": 0.4844, "step": 8395 }, { "epoch": 0.65, "grad_norm": 1.1781275458270426, "learning_rate": 5.7261172182349344e-06, "loss": 0.574, "step": 8396 }, { "epoch": 0.65, "grad_norm": 1.2799412160279555, "learning_rate": 5.723845727223125e-06, "loss": 0.5643, "step": 8397 }, { "epoch": 0.65, "grad_norm": 1.057899726190161, "learning_rate": 5.721574506186998e-06, "loss": 0.4961, "step": 8398 }, { "epoch": 0.65, "grad_norm": 1.0321424264100818, "learning_rate": 5.719303555269946e-06, "loss": 0.4315, "step": 8399 }, { "epoch": 0.65, "grad_norm": 1.1565343146405827, "learning_rate": 5.717032874615345e-06, "loss": 0.5138, "step": 8400 }, { "epoch": 0.65, "grad_norm": 1.1410248460563566, "learning_rate": 5.714762464366561e-06, "loss": 0.4653, "step": 8401 }, { "epoch": 0.65, "grad_norm": 1.128930220299961, "learning_rate": 5.712492324666927e-06, "loss": 0.5215, "step": 8402 }, { "epoch": 0.65, "grad_norm": 1.2231399992898826, "learning_rate": 5.7102224556597775e-06, "loss": 0.5664, "step": 8403 }, { "epoch": 0.65, "grad_norm": 1.2105101292693339, "learning_rate": 5.7079528574884125e-06, "loss": 0.5789, "step": 8404 }, { "epoch": 0.65, "grad_norm": 1.1467437710639632, "learning_rate": 5.7056835302961266e-06, "loss": 0.4865, "step": 8405 }, { "epoch": 0.65, "grad_norm": 1.1429240950063595, "learning_rate": 5.703414474226201e-06, "loss": 0.5174, "step": 8406 }, { "epoch": 0.65, "grad_norm": 1.1908314303166476, "learning_rate": 5.701145689421882e-06, "loss": 0.5352, "step": 8407 }, { "epoch": 0.65, "grad_norm": 1.2017686307418005, "learning_rate": 5.698877176026415e-06, "loss": 0.5329, "step": 8408 }, { "epoch": 0.65, "grad_norm": 1.210155012747007, "learning_rate": 5.696608934183023e-06, "loss": 0.5465, "step": 8409 }, { "epoch": 0.65, "grad_norm": 1.186342227112146, "learning_rate": 5.694340964034911e-06, "loss": 0.5734, "step": 8410 }, { "epoch": 0.65, "grad_norm": 1.1401453969490392, "learning_rate": 5.692073265725273e-06, "loss": 0.484, "step": 8411 }, { "epoch": 0.65, "grad_norm": 1.0952495105170241, "learning_rate": 5.689805839397271e-06, "loss": 0.5017, "step": 8412 }, { "epoch": 0.65, "grad_norm": 1.2199979896997868, "learning_rate": 5.687538685194069e-06, "loss": 0.4978, "step": 8413 }, { "epoch": 0.65, "grad_norm": 1.077079431808361, "learning_rate": 5.685271803258794e-06, "loss": 0.4802, "step": 8414 }, { "epoch": 0.65, "grad_norm": 1.2055374333610978, "learning_rate": 5.683005193734572e-06, "loss": 0.5326, "step": 8415 }, { "epoch": 0.65, "grad_norm": 1.1562690733290126, "learning_rate": 5.680738856764508e-06, "loss": 0.5369, "step": 8416 }, { "epoch": 0.65, "grad_norm": 1.1738617395548328, "learning_rate": 5.67847279249168e-06, "loss": 0.5227, "step": 8417 }, { "epoch": 0.65, "grad_norm": 1.2168957224660126, "learning_rate": 5.676207001059163e-06, "loss": 0.5432, "step": 8418 }, { "epoch": 0.65, "grad_norm": 1.0244021346831507, "learning_rate": 5.673941482610004e-06, "loss": 0.4651, "step": 8419 }, { "epoch": 0.65, "grad_norm": 1.1248860301462438, "learning_rate": 5.67167623728724e-06, "loss": 0.4723, "step": 8420 }, { "epoch": 0.65, "grad_norm": 1.0739386175931611, "learning_rate": 5.6694112652338895e-06, "loss": 0.5137, "step": 8421 }, { "epoch": 0.65, "grad_norm": 1.059763301027072, "learning_rate": 5.667146566592945e-06, "loss": 0.4813, "step": 8422 }, { "epoch": 0.65, "grad_norm": 1.20870554332382, "learning_rate": 5.6648821415073965e-06, "loss": 0.49, "step": 8423 }, { "epoch": 0.65, "grad_norm": 1.0993197179864558, "learning_rate": 5.662617990120201e-06, "loss": 0.4769, "step": 8424 }, { "epoch": 0.65, "grad_norm": 1.270928749296342, "learning_rate": 5.660354112574309e-06, "loss": 0.5251, "step": 8425 }, { "epoch": 0.65, "grad_norm": 1.147109373087845, "learning_rate": 5.658090509012651e-06, "loss": 0.5149, "step": 8426 }, { "epoch": 0.65, "grad_norm": 1.2236796717945462, "learning_rate": 5.655827179578145e-06, "loss": 0.5439, "step": 8427 }, { "epoch": 0.65, "grad_norm": 1.1690986296927226, "learning_rate": 5.653564124413678e-06, "loss": 0.4971, "step": 8428 }, { "epoch": 0.65, "grad_norm": 1.1824483837612974, "learning_rate": 5.651301343662132e-06, "loss": 0.5388, "step": 8429 }, { "epoch": 0.65, "grad_norm": 1.0618730827255858, "learning_rate": 5.649038837466369e-06, "loss": 0.4805, "step": 8430 }, { "epoch": 0.65, "grad_norm": 1.1365684069022948, "learning_rate": 5.646776605969237e-06, "loss": 0.5238, "step": 8431 }, { "epoch": 0.65, "grad_norm": 1.1641473931201003, "learning_rate": 5.644514649313554e-06, "loss": 0.5511, "step": 8432 }, { "epoch": 0.65, "grad_norm": 1.2852973744554859, "learning_rate": 5.642252967642134e-06, "loss": 0.531, "step": 8433 }, { "epoch": 0.65, "grad_norm": 1.2137818433078988, "learning_rate": 5.639991561097767e-06, "loss": 0.5753, "step": 8434 }, { "epoch": 0.65, "grad_norm": 1.1422237009555516, "learning_rate": 5.637730429823224e-06, "loss": 0.5511, "step": 8435 }, { "epoch": 0.65, "grad_norm": 1.1509193021654671, "learning_rate": 5.6354695739612665e-06, "loss": 0.4956, "step": 8436 }, { "epoch": 0.65, "grad_norm": 1.1055873631982116, "learning_rate": 5.6332089936546375e-06, "loss": 0.5111, "step": 8437 }, { "epoch": 0.65, "grad_norm": 1.1053391774942984, "learning_rate": 5.6309486890460494e-06, "loss": 0.5161, "step": 8438 }, { "epoch": 0.65, "grad_norm": 1.1290814748817206, "learning_rate": 5.628688660278212e-06, "loss": 0.5155, "step": 8439 }, { "epoch": 0.65, "grad_norm": 1.1467821816452228, "learning_rate": 5.6264289074938126e-06, "loss": 0.5373, "step": 8440 }, { "epoch": 0.65, "grad_norm": 1.32883142869552, "learning_rate": 5.624169430835524e-06, "loss": 0.5678, "step": 8441 }, { "epoch": 0.65, "grad_norm": 1.2227426809163227, "learning_rate": 5.621910230445993e-06, "loss": 0.5664, "step": 8442 }, { "epoch": 0.65, "grad_norm": 1.2090601982041547, "learning_rate": 5.619651306467861e-06, "loss": 0.5866, "step": 8443 }, { "epoch": 0.66, "grad_norm": 1.195964809770765, "learning_rate": 5.617392659043737e-06, "loss": 0.4943, "step": 8444 }, { "epoch": 0.66, "grad_norm": 1.077583453405085, "learning_rate": 5.615134288316227e-06, "loss": 0.551, "step": 8445 }, { "epoch": 0.66, "grad_norm": 1.1009499761008235, "learning_rate": 5.612876194427911e-06, "loss": 0.526, "step": 8446 }, { "epoch": 0.66, "grad_norm": 1.0947278692287028, "learning_rate": 5.61061837752136e-06, "loss": 0.489, "step": 8447 }, { "epoch": 0.66, "grad_norm": 1.1213544482655873, "learning_rate": 5.608360837739113e-06, "loss": 0.5151, "step": 8448 }, { "epoch": 0.66, "grad_norm": 1.2054608941208498, "learning_rate": 5.6061035752237035e-06, "loss": 0.4951, "step": 8449 }, { "epoch": 0.66, "grad_norm": 1.1468636764258395, "learning_rate": 5.6038465901176455e-06, "loss": 0.5374, "step": 8450 }, { "epoch": 0.66, "grad_norm": 1.1641627530956273, "learning_rate": 5.601589882563436e-06, "loss": 0.493, "step": 8451 }, { "epoch": 0.66, "grad_norm": 1.1911011852486217, "learning_rate": 5.599333452703548e-06, "loss": 0.5355, "step": 8452 }, { "epoch": 0.66, "grad_norm": 1.1434821831499857, "learning_rate": 5.5970773006804465e-06, "loss": 0.5303, "step": 8453 }, { "epoch": 0.66, "grad_norm": 1.1601964256445938, "learning_rate": 5.594821426636567e-06, "loss": 0.5065, "step": 8454 }, { "epoch": 0.66, "grad_norm": 1.0777001580464676, "learning_rate": 5.5925658307143405e-06, "loss": 0.4361, "step": 8455 }, { "epoch": 0.66, "grad_norm": 1.1149810663677027, "learning_rate": 5.590310513056171e-06, "loss": 0.5034, "step": 8456 }, { "epoch": 0.66, "grad_norm": 1.0965889734402938, "learning_rate": 5.588055473804453e-06, "loss": 0.5395, "step": 8457 }, { "epoch": 0.66, "grad_norm": 1.2157905054077165, "learning_rate": 5.585800713101552e-06, "loss": 0.5396, "step": 8458 }, { "epoch": 0.66, "grad_norm": 1.1496163370930719, "learning_rate": 5.583546231089827e-06, "loss": 0.5204, "step": 8459 }, { "epoch": 0.66, "grad_norm": 1.1547193447981894, "learning_rate": 5.581292027911614e-06, "loss": 0.5335, "step": 8460 }, { "epoch": 0.66, "grad_norm": 1.1413997475023059, "learning_rate": 5.579038103709238e-06, "loss": 0.5011, "step": 8461 }, { "epoch": 0.66, "grad_norm": 1.1848394806704596, "learning_rate": 5.576784458624991e-06, "loss": 0.5875, "step": 8462 }, { "epoch": 0.66, "grad_norm": 1.1876907195479356, "learning_rate": 5.5745310928011656e-06, "loss": 0.5729, "step": 8463 }, { "epoch": 0.66, "grad_norm": 1.2527726893428552, "learning_rate": 5.57227800638002e-06, "loss": 0.5458, "step": 8464 }, { "epoch": 0.66, "grad_norm": 1.1716997651371692, "learning_rate": 5.570025199503808e-06, "loss": 0.5917, "step": 8465 }, { "epoch": 0.66, "grad_norm": 1.1851059220855302, "learning_rate": 5.567772672314762e-06, "loss": 0.5326, "step": 8466 }, { "epoch": 0.66, "grad_norm": 1.1172082538944497, "learning_rate": 5.565520424955097e-06, "loss": 0.4709, "step": 8467 }, { "epoch": 0.66, "grad_norm": 1.2249893985990217, "learning_rate": 5.563268457567004e-06, "loss": 0.5497, "step": 8468 }, { "epoch": 0.66, "grad_norm": 1.1785875459700161, "learning_rate": 5.561016770292662e-06, "loss": 0.5436, "step": 8469 }, { "epoch": 0.66, "grad_norm": 1.2230458476751565, "learning_rate": 5.558765363274234e-06, "loss": 0.5536, "step": 8470 }, { "epoch": 0.66, "grad_norm": 1.1308483063941799, "learning_rate": 5.556514236653867e-06, "loss": 0.5109, "step": 8471 }, { "epoch": 0.66, "grad_norm": 1.1662427211423936, "learning_rate": 5.5542633905736775e-06, "loss": 0.5295, "step": 8472 }, { "epoch": 0.66, "grad_norm": 1.1748412674356221, "learning_rate": 5.552012825175781e-06, "loss": 0.5335, "step": 8473 }, { "epoch": 0.66, "grad_norm": 1.2096217729218548, "learning_rate": 5.549762540602261e-06, "loss": 0.5262, "step": 8474 }, { "epoch": 0.66, "grad_norm": 1.2318628073852937, "learning_rate": 5.54751253699519e-06, "loss": 0.5159, "step": 8475 }, { "epoch": 0.66, "grad_norm": 1.1905554563382166, "learning_rate": 5.545262814496625e-06, "loss": 0.5302, "step": 8476 }, { "epoch": 0.66, "grad_norm": 1.1472164591618066, "learning_rate": 5.543013373248601e-06, "loss": 0.5139, "step": 8477 }, { "epoch": 0.66, "grad_norm": 1.238370537171197, "learning_rate": 5.540764213393144e-06, "loss": 0.5426, "step": 8478 }, { "epoch": 0.66, "grad_norm": 1.1846252579615788, "learning_rate": 5.538515335072243e-06, "loss": 0.5121, "step": 8479 }, { "epoch": 0.66, "grad_norm": 1.2225057003457815, "learning_rate": 5.536266738427886e-06, "loss": 0.5435, "step": 8480 }, { "epoch": 0.66, "grad_norm": 1.2056883715105913, "learning_rate": 5.534018423602047e-06, "loss": 0.5828, "step": 8481 }, { "epoch": 0.66, "grad_norm": 1.2752135827182332, "learning_rate": 5.531770390736659e-06, "loss": 0.5625, "step": 8482 }, { "epoch": 0.66, "grad_norm": 1.1836965283378318, "learning_rate": 5.529522639973666e-06, "loss": 0.5268, "step": 8483 }, { "epoch": 0.66, "grad_norm": 1.223927625987873, "learning_rate": 5.527275171454969e-06, "loss": 0.4761, "step": 8484 }, { "epoch": 0.66, "grad_norm": 1.2690409491164432, "learning_rate": 5.525027985322464e-06, "loss": 0.5811, "step": 8485 }, { "epoch": 0.66, "grad_norm": 1.160096394956788, "learning_rate": 5.5227810817180325e-06, "loss": 0.478, "step": 8486 }, { "epoch": 0.66, "grad_norm": 1.2689591747417468, "learning_rate": 5.520534460783531e-06, "loss": 0.5614, "step": 8487 }, { "epoch": 0.66, "grad_norm": 1.1784244875112335, "learning_rate": 5.5182881226608035e-06, "loss": 0.5059, "step": 8488 }, { "epoch": 0.66, "grad_norm": 1.1366005013432052, "learning_rate": 5.516042067491665e-06, "loss": 0.5046, "step": 8489 }, { "epoch": 0.66, "grad_norm": 1.1004874753191654, "learning_rate": 5.513796295417925e-06, "loss": 0.486, "step": 8490 }, { "epoch": 0.66, "grad_norm": 1.1285325326045819, "learning_rate": 5.511550806581374e-06, "loss": 0.5122, "step": 8491 }, { "epoch": 0.66, "grad_norm": 1.1319279867794765, "learning_rate": 5.5093056011237755e-06, "loss": 0.4847, "step": 8492 }, { "epoch": 0.66, "grad_norm": 1.1975101389286498, "learning_rate": 5.507060679186886e-06, "loss": 0.5649, "step": 8493 }, { "epoch": 0.66, "grad_norm": 1.0671562938679564, "learning_rate": 5.504816040912433e-06, "loss": 0.5085, "step": 8494 }, { "epoch": 0.66, "grad_norm": 1.0458487419624567, "learning_rate": 5.5025716864421356e-06, "loss": 0.4772, "step": 8495 }, { "epoch": 0.66, "grad_norm": 1.240019195392685, "learning_rate": 5.500327615917691e-06, "loss": 0.5743, "step": 8496 }, { "epoch": 0.66, "grad_norm": 1.2476582526547406, "learning_rate": 5.498083829480778e-06, "loss": 0.5587, "step": 8497 }, { "epoch": 0.66, "grad_norm": 1.0980409647138942, "learning_rate": 5.495840327273065e-06, "loss": 0.5549, "step": 8498 }, { "epoch": 0.66, "grad_norm": 1.0337226585453367, "learning_rate": 5.493597109436186e-06, "loss": 0.456, "step": 8499 }, { "epoch": 0.66, "grad_norm": 1.113755724000618, "learning_rate": 5.491354176111773e-06, "loss": 0.5262, "step": 8500 }, { "epoch": 0.66, "grad_norm": 1.0530288086380135, "learning_rate": 5.489111527441435e-06, "loss": 0.5012, "step": 8501 }, { "epoch": 0.66, "grad_norm": 1.2221671396664937, "learning_rate": 5.486869163566756e-06, "loss": 0.5657, "step": 8502 }, { "epoch": 0.66, "grad_norm": 1.070779461872384, "learning_rate": 5.4846270846293145e-06, "loss": 0.4805, "step": 8503 }, { "epoch": 0.66, "grad_norm": 1.157434938880406, "learning_rate": 5.4823852907706585e-06, "loss": 0.4983, "step": 8504 }, { "epoch": 0.66, "grad_norm": 1.1750843606249153, "learning_rate": 5.480143782132327e-06, "loss": 0.5561, "step": 8505 }, { "epoch": 0.66, "grad_norm": 1.20566795418941, "learning_rate": 5.477902558855837e-06, "loss": 0.5203, "step": 8506 }, { "epoch": 0.66, "grad_norm": 1.1302033466750618, "learning_rate": 5.475661621082689e-06, "loss": 0.5136, "step": 8507 }, { "epoch": 0.66, "grad_norm": 1.2599388299202012, "learning_rate": 5.4734209689543705e-06, "loss": 0.5404, "step": 8508 }, { "epoch": 0.66, "grad_norm": 1.1136368568398085, "learning_rate": 5.471180602612336e-06, "loss": 0.453, "step": 8509 }, { "epoch": 0.66, "grad_norm": 1.1352565314149756, "learning_rate": 5.468940522198036e-06, "loss": 0.4893, "step": 8510 }, { "epoch": 0.66, "grad_norm": 1.2023863568604916, "learning_rate": 5.4667007278529015e-06, "loss": 0.496, "step": 8511 }, { "epoch": 0.66, "grad_norm": 1.1360211411107353, "learning_rate": 5.464461219718336e-06, "loss": 0.4935, "step": 8512 }, { "epoch": 0.66, "grad_norm": 1.1070736399781234, "learning_rate": 5.462221997935737e-06, "loss": 0.4995, "step": 8513 }, { "epoch": 0.66, "grad_norm": 1.260995616306245, "learning_rate": 5.45998306264647e-06, "loss": 0.4525, "step": 8514 }, { "epoch": 0.66, "grad_norm": 1.2252535791101584, "learning_rate": 5.457744413991897e-06, "loss": 0.5422, "step": 8515 }, { "epoch": 0.66, "grad_norm": 1.2797914902909833, "learning_rate": 5.455506052113354e-06, "loss": 0.5789, "step": 8516 }, { "epoch": 0.66, "grad_norm": 1.1960791329674854, "learning_rate": 5.453267977152161e-06, "loss": 0.5278, "step": 8517 }, { "epoch": 0.66, "grad_norm": 1.2269451734982912, "learning_rate": 5.4510301892496224e-06, "loss": 0.5339, "step": 8518 }, { "epoch": 0.66, "grad_norm": 1.1615682929066196, "learning_rate": 5.448792688547012e-06, "loss": 0.4901, "step": 8519 }, { "epoch": 0.66, "grad_norm": 1.1604961064448818, "learning_rate": 5.446555475185602e-06, "loss": 0.5481, "step": 8520 }, { "epoch": 0.66, "grad_norm": 1.2052080526436777, "learning_rate": 5.444318549306641e-06, "loss": 0.5326, "step": 8521 }, { "epoch": 0.66, "grad_norm": 1.240185257547323, "learning_rate": 5.44208191105135e-06, "loss": 0.5421, "step": 8522 }, { "epoch": 0.66, "grad_norm": 1.1496226624613037, "learning_rate": 5.439845560560948e-06, "loss": 0.5259, "step": 8523 }, { "epoch": 0.66, "grad_norm": 1.2448385966670596, "learning_rate": 5.437609497976619e-06, "loss": 0.5128, "step": 8524 }, { "epoch": 0.66, "grad_norm": 1.1222563772753282, "learning_rate": 5.435373723439541e-06, "loss": 0.4668, "step": 8525 }, { "epoch": 0.66, "grad_norm": 1.2988610284982853, "learning_rate": 5.43313823709087e-06, "loss": 0.5933, "step": 8526 }, { "epoch": 0.66, "grad_norm": 0.9877893180987131, "learning_rate": 5.430903039071744e-06, "loss": 0.4621, "step": 8527 }, { "epoch": 0.66, "grad_norm": 1.171360716823358, "learning_rate": 5.428668129523288e-06, "loss": 0.5426, "step": 8528 }, { "epoch": 0.66, "grad_norm": 1.1813154293286559, "learning_rate": 5.426433508586593e-06, "loss": 0.51, "step": 8529 }, { "epoch": 0.66, "grad_norm": 1.2110931788632529, "learning_rate": 5.4241991764027464e-06, "loss": 0.5136, "step": 8530 }, { "epoch": 0.66, "grad_norm": 1.1125414615577245, "learning_rate": 5.421965133112818e-06, "loss": 0.5046, "step": 8531 }, { "epoch": 0.66, "grad_norm": 1.2388047035525844, "learning_rate": 5.419731378857849e-06, "loss": 0.5183, "step": 8532 }, { "epoch": 0.66, "grad_norm": 1.2087526361225658, "learning_rate": 5.417497913778866e-06, "loss": 0.5537, "step": 8533 }, { "epoch": 0.66, "grad_norm": 1.1528444398553923, "learning_rate": 5.4152647380168876e-06, "loss": 0.5384, "step": 8534 }, { "epoch": 0.66, "grad_norm": 1.2111791554175395, "learning_rate": 5.413031851712895e-06, "loss": 0.5742, "step": 8535 }, { "epoch": 0.66, "grad_norm": 1.2373031459639174, "learning_rate": 5.410799255007868e-06, "loss": 0.5383, "step": 8536 }, { "epoch": 0.66, "grad_norm": 1.2333312148428242, "learning_rate": 5.408566948042762e-06, "loss": 0.5188, "step": 8537 }, { "epoch": 0.66, "grad_norm": 1.103125903423685, "learning_rate": 5.406334930958513e-06, "loss": 0.4999, "step": 8538 }, { "epoch": 0.66, "grad_norm": 1.2777737235037205, "learning_rate": 5.404103203896044e-06, "loss": 0.5057, "step": 8539 }, { "epoch": 0.66, "grad_norm": 1.2015502333790002, "learning_rate": 5.401871766996247e-06, "loss": 0.5329, "step": 8540 }, { "epoch": 0.66, "grad_norm": 1.23725940407713, "learning_rate": 5.399640620400013e-06, "loss": 0.5443, "step": 8541 }, { "epoch": 0.66, "grad_norm": 1.2897518742150835, "learning_rate": 5.397409764248197e-06, "loss": 0.5461, "step": 8542 }, { "epoch": 0.66, "grad_norm": 1.1129164784120786, "learning_rate": 5.395179198681648e-06, "loss": 0.5037, "step": 8543 }, { "epoch": 0.66, "grad_norm": 1.149926083718583, "learning_rate": 5.392948923841199e-06, "loss": 0.5661, "step": 8544 }, { "epoch": 0.66, "grad_norm": 1.1224459159752647, "learning_rate": 5.39071893986765e-06, "loss": 0.4964, "step": 8545 }, { "epoch": 0.66, "grad_norm": 1.277206413515417, "learning_rate": 5.388489246901796e-06, "loss": 0.5627, "step": 8546 }, { "epoch": 0.66, "grad_norm": 1.2132288266245856, "learning_rate": 5.386259845084405e-06, "loss": 0.5584, "step": 8547 }, { "epoch": 0.66, "grad_norm": 1.1921675454165848, "learning_rate": 5.384030734556236e-06, "loss": 0.5132, "step": 8548 }, { "epoch": 0.66, "grad_norm": 1.2253165264301842, "learning_rate": 5.381801915458026e-06, "loss": 0.5458, "step": 8549 }, { "epoch": 0.66, "grad_norm": 1.1537922693777483, "learning_rate": 5.379573387930484e-06, "loss": 0.4989, "step": 8550 }, { "epoch": 0.66, "grad_norm": 1.1037426200265723, "learning_rate": 5.377345152114315e-06, "loss": 0.4588, "step": 8551 }, { "epoch": 0.66, "grad_norm": 1.2108923134525391, "learning_rate": 5.3751172081501935e-06, "loss": 0.517, "step": 8552 }, { "epoch": 0.66, "grad_norm": 1.3200071986320028, "learning_rate": 5.372889556178782e-06, "loss": 0.6122, "step": 8553 }, { "epoch": 0.66, "grad_norm": 1.1628269064056198, "learning_rate": 5.370662196340732e-06, "loss": 0.5179, "step": 8554 }, { "epoch": 0.66, "grad_norm": 1.228707448663035, "learning_rate": 5.368435128776657e-06, "loss": 0.5027, "step": 8555 }, { "epoch": 0.66, "grad_norm": 1.142716410819025, "learning_rate": 5.366208353627167e-06, "loss": 0.4519, "step": 8556 }, { "epoch": 0.66, "grad_norm": 1.3166599199065854, "learning_rate": 5.363981871032852e-06, "loss": 0.5132, "step": 8557 }, { "epoch": 0.66, "grad_norm": 1.1404255731857622, "learning_rate": 5.36175568113428e-06, "loss": 0.4961, "step": 8558 }, { "epoch": 0.66, "grad_norm": 1.214787227382845, "learning_rate": 5.359529784072006e-06, "loss": 0.6058, "step": 8559 }, { "epoch": 0.66, "grad_norm": 1.2205537506263417, "learning_rate": 5.357304179986553e-06, "loss": 0.5143, "step": 8560 }, { "epoch": 0.66, "grad_norm": 1.1882664064462138, "learning_rate": 5.355078869018446e-06, "loss": 0.5136, "step": 8561 }, { "epoch": 0.66, "grad_norm": 1.1278910576616812, "learning_rate": 5.3528538513081705e-06, "loss": 0.5075, "step": 8562 }, { "epoch": 0.66, "grad_norm": 1.177618017822402, "learning_rate": 5.350629126996207e-06, "loss": 0.571, "step": 8563 }, { "epoch": 0.66, "grad_norm": 1.219897732356841, "learning_rate": 5.34840469622302e-06, "loss": 0.5951, "step": 8564 }, { "epoch": 0.66, "grad_norm": 1.1129376868449574, "learning_rate": 5.346180559129037e-06, "loss": 0.5193, "step": 8565 }, { "epoch": 0.66, "grad_norm": 1.2290367876821564, "learning_rate": 5.3439567158546865e-06, "loss": 0.5916, "step": 8566 }, { "epoch": 0.66, "grad_norm": 1.1429654500083193, "learning_rate": 5.341733166540372e-06, "loss": 0.5337, "step": 8567 }, { "epoch": 0.66, "grad_norm": 1.3126618875846319, "learning_rate": 5.339509911326475e-06, "loss": 0.5541, "step": 8568 }, { "epoch": 0.66, "grad_norm": 1.2042186274861924, "learning_rate": 5.337286950353366e-06, "loss": 0.5425, "step": 8569 }, { "epoch": 0.66, "grad_norm": 1.1255786255429039, "learning_rate": 5.3350642837613845e-06, "loss": 0.555, "step": 8570 }, { "epoch": 0.66, "grad_norm": 1.1246557238754393, "learning_rate": 5.332841911690867e-06, "loss": 0.5074, "step": 8571 }, { "epoch": 0.66, "grad_norm": 1.1832620892546886, "learning_rate": 5.330619834282116e-06, "loss": 0.5007, "step": 8572 }, { "epoch": 0.67, "grad_norm": 1.231502183755045, "learning_rate": 5.328398051675423e-06, "loss": 0.5083, "step": 8573 }, { "epoch": 0.67, "grad_norm": 1.100667711823135, "learning_rate": 5.32617656401107e-06, "loss": 0.454, "step": 8574 }, { "epoch": 0.67, "grad_norm": 1.0903712264317869, "learning_rate": 5.323955371429299e-06, "loss": 0.4832, "step": 8575 }, { "epoch": 0.67, "grad_norm": 1.1942476004420102, "learning_rate": 5.32173447407035e-06, "loss": 0.5588, "step": 8576 }, { "epoch": 0.67, "grad_norm": 1.232772704667899, "learning_rate": 5.319513872074442e-06, "loss": 0.5315, "step": 8577 }, { "epoch": 0.67, "grad_norm": 1.1489232910751808, "learning_rate": 5.31729356558177e-06, "loss": 0.4915, "step": 8578 }, { "epoch": 0.67, "grad_norm": 1.219642165893346, "learning_rate": 5.31507355473252e-06, "loss": 0.534, "step": 8579 }, { "epoch": 0.67, "grad_norm": 1.1251108856655108, "learning_rate": 5.312853839666843e-06, "loss": 0.4948, "step": 8580 }, { "epoch": 0.67, "grad_norm": 1.256947047873108, "learning_rate": 5.31063442052489e-06, "loss": 0.5913, "step": 8581 }, { "epoch": 0.67, "grad_norm": 1.2139167806203217, "learning_rate": 5.308415297446774e-06, "loss": 0.5521, "step": 8582 }, { "epoch": 0.67, "grad_norm": 1.0642537061444397, "learning_rate": 5.306196470572606e-06, "loss": 0.4871, "step": 8583 }, { "epoch": 0.67, "grad_norm": 1.1650686332230558, "learning_rate": 5.303977940042477e-06, "loss": 0.5077, "step": 8584 }, { "epoch": 0.67, "grad_norm": 1.2050793120689047, "learning_rate": 5.301759705996446e-06, "loss": 0.5008, "step": 8585 }, { "epoch": 0.67, "grad_norm": 1.1789422112523302, "learning_rate": 5.299541768574563e-06, "loss": 0.5113, "step": 8586 }, { "epoch": 0.67, "grad_norm": 1.0785134762774005, "learning_rate": 5.297324127916858e-06, "loss": 0.5204, "step": 8587 }, { "epoch": 0.67, "grad_norm": 1.1194235026438761, "learning_rate": 5.2951067841633465e-06, "loss": 0.4781, "step": 8588 }, { "epoch": 0.67, "grad_norm": 1.1258937146823271, "learning_rate": 5.292889737454019e-06, "loss": 0.497, "step": 8589 }, { "epoch": 0.67, "grad_norm": 1.1190385755310723, "learning_rate": 5.29067298792885e-06, "loss": 0.5278, "step": 8590 }, { "epoch": 0.67, "grad_norm": 1.2700620979235986, "learning_rate": 5.288456535727786e-06, "loss": 0.4954, "step": 8591 }, { "epoch": 0.67, "grad_norm": 1.1660641351829342, "learning_rate": 5.286240380990772e-06, "loss": 0.5957, "step": 8592 }, { "epoch": 0.67, "grad_norm": 1.1979560790281831, "learning_rate": 5.284024523857721e-06, "loss": 0.5393, "step": 8593 }, { "epoch": 0.67, "grad_norm": 1.2155599660074705, "learning_rate": 5.281808964468534e-06, "loss": 0.5384, "step": 8594 }, { "epoch": 0.67, "grad_norm": 1.1032186733166185, "learning_rate": 5.2795937029630905e-06, "loss": 0.5244, "step": 8595 }, { "epoch": 0.67, "grad_norm": 1.1402868004639521, "learning_rate": 5.277378739481249e-06, "loss": 0.4849, "step": 8596 }, { "epoch": 0.67, "grad_norm": 1.1043772886432794, "learning_rate": 5.275164074162854e-06, "loss": 0.4652, "step": 8597 }, { "epoch": 0.67, "grad_norm": 1.1901302320879987, "learning_rate": 5.2729497071477276e-06, "loss": 0.4694, "step": 8598 }, { "epoch": 0.67, "grad_norm": 1.1750360706555412, "learning_rate": 5.270735638575677e-06, "loss": 0.5124, "step": 8599 }, { "epoch": 0.67, "grad_norm": 1.1766057972837987, "learning_rate": 5.268521868586487e-06, "loss": 0.5406, "step": 8600 }, { "epoch": 0.67, "grad_norm": 1.2372698579506118, "learning_rate": 5.266308397319918e-06, "loss": 0.5525, "step": 8601 }, { "epoch": 0.67, "grad_norm": 1.219008198310753, "learning_rate": 5.264095224915722e-06, "loss": 0.5339, "step": 8602 }, { "epoch": 0.67, "grad_norm": 1.2359080391903958, "learning_rate": 5.261882351513629e-06, "loss": 0.4405, "step": 8603 }, { "epoch": 0.67, "grad_norm": 1.2587329032214187, "learning_rate": 5.259669777253349e-06, "loss": 0.5463, "step": 8604 }, { "epoch": 0.67, "grad_norm": 1.1094577382356248, "learning_rate": 5.257457502274577e-06, "loss": 0.4718, "step": 8605 }, { "epoch": 0.67, "grad_norm": 1.1723870239649903, "learning_rate": 5.255245526716976e-06, "loss": 0.5151, "step": 8606 }, { "epoch": 0.67, "grad_norm": 1.1580931947461246, "learning_rate": 5.253033850720206e-06, "loss": 0.5603, "step": 8607 }, { "epoch": 0.67, "grad_norm": 1.2049124184908713, "learning_rate": 5.2508224744239e-06, "loss": 0.5852, "step": 8608 }, { "epoch": 0.67, "grad_norm": 1.172163253936146, "learning_rate": 5.2486113979676765e-06, "loss": 0.5342, "step": 8609 }, { "epoch": 0.67, "grad_norm": 1.23633598337192, "learning_rate": 5.24640062149113e-06, "loss": 0.5674, "step": 8610 }, { "epoch": 0.67, "grad_norm": 1.15119312837603, "learning_rate": 5.244190145133834e-06, "loss": 0.4746, "step": 8611 }, { "epoch": 0.67, "grad_norm": 1.1033367180748352, "learning_rate": 5.241979969035351e-06, "loss": 0.495, "step": 8612 }, { "epoch": 0.67, "grad_norm": 1.1158579671266355, "learning_rate": 5.239770093335219e-06, "loss": 0.5072, "step": 8613 }, { "epoch": 0.67, "grad_norm": 1.1623490799593086, "learning_rate": 5.237560518172963e-06, "loss": 0.5323, "step": 8614 }, { "epoch": 0.67, "grad_norm": 1.1601295339674342, "learning_rate": 5.235351243688085e-06, "loss": 0.5236, "step": 8615 }, { "epoch": 0.67, "grad_norm": 1.1915982951488509, "learning_rate": 5.233142270020062e-06, "loss": 0.5056, "step": 8616 }, { "epoch": 0.67, "grad_norm": 1.1061701621864002, "learning_rate": 5.230933597308361e-06, "loss": 0.4866, "step": 8617 }, { "epoch": 0.67, "grad_norm": 1.1430898710414628, "learning_rate": 5.228725225692426e-06, "loss": 0.46, "step": 8618 }, { "epoch": 0.67, "grad_norm": 1.0734983490823276, "learning_rate": 5.22651715531169e-06, "loss": 0.4287, "step": 8619 }, { "epoch": 0.67, "grad_norm": 1.2872246327407593, "learning_rate": 5.224309386305553e-06, "loss": 0.5998, "step": 8620 }, { "epoch": 0.67, "grad_norm": 1.129060992026886, "learning_rate": 5.222101918813399e-06, "loss": 0.5322, "step": 8621 }, { "epoch": 0.67, "grad_norm": 1.2066310478366338, "learning_rate": 5.219894752974602e-06, "loss": 0.5578, "step": 8622 }, { "epoch": 0.67, "grad_norm": 1.1879721756865282, "learning_rate": 5.217687888928512e-06, "loss": 0.5333, "step": 8623 }, { "epoch": 0.67, "grad_norm": 1.119153993004121, "learning_rate": 5.215481326814459e-06, "loss": 0.5662, "step": 8624 }, { "epoch": 0.67, "grad_norm": 1.029380433022499, "learning_rate": 5.213275066771759e-06, "loss": 0.4834, "step": 8625 }, { "epoch": 0.67, "grad_norm": 1.138691295111383, "learning_rate": 5.211069108939695e-06, "loss": 0.5219, "step": 8626 }, { "epoch": 0.67, "grad_norm": 1.1652682927023097, "learning_rate": 5.2088634534575465e-06, "loss": 0.5716, "step": 8627 }, { "epoch": 0.67, "grad_norm": 1.18220100671412, "learning_rate": 5.206658100464568e-06, "loss": 0.5451, "step": 8628 }, { "epoch": 0.67, "grad_norm": 1.170097223301615, "learning_rate": 5.204453050099997e-06, "loss": 0.5406, "step": 8629 }, { "epoch": 0.67, "grad_norm": 1.113602280758709, "learning_rate": 5.202248302503047e-06, "loss": 0.4961, "step": 8630 }, { "epoch": 0.67, "grad_norm": 1.3095343099107075, "learning_rate": 5.200043857812911e-06, "loss": 0.5421, "step": 8631 }, { "epoch": 0.67, "grad_norm": 1.2304595946925025, "learning_rate": 5.19783971616877e-06, "loss": 0.5503, "step": 8632 }, { "epoch": 0.67, "grad_norm": 1.163510902337313, "learning_rate": 5.195635877709783e-06, "loss": 0.4635, "step": 8633 }, { "epoch": 0.67, "grad_norm": 1.0673613690092136, "learning_rate": 5.193432342575093e-06, "loss": 0.4947, "step": 8634 }, { "epoch": 0.67, "grad_norm": 1.0948594597110899, "learning_rate": 5.191229110903819e-06, "loss": 0.5048, "step": 8635 }, { "epoch": 0.67, "grad_norm": 1.1360119591909523, "learning_rate": 5.189026182835059e-06, "loss": 0.5391, "step": 8636 }, { "epoch": 0.67, "grad_norm": 1.129882073958542, "learning_rate": 5.186823558507897e-06, "loss": 0.5073, "step": 8637 }, { "epoch": 0.67, "grad_norm": 1.1633678642292653, "learning_rate": 5.184621238061397e-06, "loss": 0.5447, "step": 8638 }, { "epoch": 0.67, "grad_norm": 1.1809202410084578, "learning_rate": 5.182419221634605e-06, "loss": 0.5435, "step": 8639 }, { "epoch": 0.67, "grad_norm": 1.160708671598167, "learning_rate": 5.180217509366544e-06, "loss": 0.5088, "step": 8640 }, { "epoch": 0.67, "grad_norm": 1.1131224401874014, "learning_rate": 5.178016101396215e-06, "loss": 0.5037, "step": 8641 }, { "epoch": 0.67, "grad_norm": 1.2057839773200891, "learning_rate": 5.175814997862606e-06, "loss": 0.5706, "step": 8642 }, { "epoch": 0.67, "grad_norm": 1.1948588888097982, "learning_rate": 5.173614198904686e-06, "loss": 0.5504, "step": 8643 }, { "epoch": 0.67, "grad_norm": 1.1293594876075312, "learning_rate": 5.171413704661403e-06, "loss": 0.5333, "step": 8644 }, { "epoch": 0.67, "grad_norm": 1.2648609822296937, "learning_rate": 5.169213515271686e-06, "loss": 0.5791, "step": 8645 }, { "epoch": 0.67, "grad_norm": 1.088862315624611, "learning_rate": 5.167013630874447e-06, "loss": 0.4896, "step": 8646 }, { "epoch": 0.67, "grad_norm": 1.2043051938076799, "learning_rate": 5.164814051608567e-06, "loss": 0.5396, "step": 8647 }, { "epoch": 0.67, "grad_norm": 1.2169972556309723, "learning_rate": 5.162614777612924e-06, "loss": 0.5411, "step": 8648 }, { "epoch": 0.67, "grad_norm": 1.1343408731839044, "learning_rate": 5.16041580902637e-06, "loss": 0.547, "step": 8649 }, { "epoch": 0.67, "grad_norm": 1.1817151258958554, "learning_rate": 5.158217145987732e-06, "loss": 0.5318, "step": 8650 }, { "epoch": 0.67, "grad_norm": 1.097955574682945, "learning_rate": 5.156018788635831e-06, "loss": 0.5265, "step": 8651 }, { "epoch": 0.67, "grad_norm": 1.198500278226194, "learning_rate": 5.153820737109449e-06, "loss": 0.519, "step": 8652 }, { "epoch": 0.67, "grad_norm": 1.212108120070904, "learning_rate": 5.151622991547368e-06, "loss": 0.5715, "step": 8653 }, { "epoch": 0.67, "grad_norm": 1.1006655456942376, "learning_rate": 5.149425552088342e-06, "loss": 0.4883, "step": 8654 }, { "epoch": 0.67, "grad_norm": 1.0808006477462726, "learning_rate": 5.1472284188711065e-06, "loss": 0.497, "step": 8655 }, { "epoch": 0.67, "grad_norm": 1.151524553230149, "learning_rate": 5.145031592034382e-06, "loss": 0.5578, "step": 8656 }, { "epoch": 0.67, "grad_norm": 1.1521271582897061, "learning_rate": 5.1428350717168575e-06, "loss": 0.4992, "step": 8657 }, { "epoch": 0.67, "grad_norm": 1.1410046820816142, "learning_rate": 5.140638858057214e-06, "loss": 0.5163, "step": 8658 }, { "epoch": 0.67, "grad_norm": 1.2454646803703624, "learning_rate": 5.138442951194115e-06, "loss": 0.5194, "step": 8659 }, { "epoch": 0.67, "grad_norm": 1.1634886178286548, "learning_rate": 5.136247351266191e-06, "loss": 0.4653, "step": 8660 }, { "epoch": 0.67, "grad_norm": 1.1077306138780623, "learning_rate": 5.134052058412069e-06, "loss": 0.4784, "step": 8661 }, { "epoch": 0.67, "grad_norm": 1.219274505822627, "learning_rate": 5.131857072770341e-06, "loss": 0.5251, "step": 8662 }, { "epoch": 0.67, "grad_norm": 1.1778286568780172, "learning_rate": 5.129662394479593e-06, "loss": 0.5141, "step": 8663 }, { "epoch": 0.67, "grad_norm": 1.2318246304120248, "learning_rate": 5.1274680236783855e-06, "loss": 0.5268, "step": 8664 }, { "epoch": 0.67, "grad_norm": 1.0200855604451335, "learning_rate": 5.125273960505261e-06, "loss": 0.4815, "step": 8665 }, { "epoch": 0.67, "grad_norm": 1.1186225584103466, "learning_rate": 5.123080205098745e-06, "loss": 0.5091, "step": 8666 }, { "epoch": 0.67, "grad_norm": 1.196980229226421, "learning_rate": 5.120886757597334e-06, "loss": 0.5641, "step": 8667 }, { "epoch": 0.67, "grad_norm": 1.0676452928246292, "learning_rate": 5.118693618139514e-06, "loss": 0.4918, "step": 8668 }, { "epoch": 0.67, "grad_norm": 1.1282094633231983, "learning_rate": 5.116500786863755e-06, "loss": 0.5331, "step": 8669 }, { "epoch": 0.67, "grad_norm": 1.1163498198847663, "learning_rate": 5.114308263908493e-06, "loss": 0.5015, "step": 8670 }, { "epoch": 0.67, "grad_norm": 1.2569282219239768, "learning_rate": 5.11211604941216e-06, "loss": 0.5146, "step": 8671 }, { "epoch": 0.67, "grad_norm": 1.024830459901714, "learning_rate": 5.109924143513156e-06, "loss": 0.4596, "step": 8672 }, { "epoch": 0.67, "grad_norm": 1.1936340515387955, "learning_rate": 5.107732546349871e-06, "loss": 0.5151, "step": 8673 }, { "epoch": 0.67, "grad_norm": 1.217341415996466, "learning_rate": 5.105541258060669e-06, "loss": 0.5312, "step": 8674 }, { "epoch": 0.67, "grad_norm": 1.1673112121957276, "learning_rate": 5.103350278783901e-06, "loss": 0.5564, "step": 8675 }, { "epoch": 0.67, "grad_norm": 1.2313955056436072, "learning_rate": 5.101159608657896e-06, "loss": 0.4974, "step": 8676 }, { "epoch": 0.67, "grad_norm": 1.094801206876865, "learning_rate": 5.098969247820958e-06, "loss": 0.5384, "step": 8677 }, { "epoch": 0.67, "grad_norm": 1.2621264669948749, "learning_rate": 5.096779196411375e-06, "loss": 0.5435, "step": 8678 }, { "epoch": 0.67, "grad_norm": 1.1854095630846337, "learning_rate": 5.094589454567423e-06, "loss": 0.4999, "step": 8679 }, { "epoch": 0.67, "grad_norm": 1.2592366845443836, "learning_rate": 5.092400022427344e-06, "loss": 0.5658, "step": 8680 }, { "epoch": 0.67, "grad_norm": 1.1612638073973949, "learning_rate": 5.090210900129375e-06, "loss": 0.4336, "step": 8681 }, { "epoch": 0.67, "grad_norm": 1.1419678713025245, "learning_rate": 5.088022087811719e-06, "loss": 0.5373, "step": 8682 }, { "epoch": 0.67, "grad_norm": 1.1215963267244438, "learning_rate": 5.0858335856125715e-06, "loss": 0.4958, "step": 8683 }, { "epoch": 0.67, "grad_norm": 1.0964654728558387, "learning_rate": 5.083645393670103e-06, "loss": 0.4404, "step": 8684 }, { "epoch": 0.67, "grad_norm": 1.107117626236695, "learning_rate": 5.081457512122466e-06, "loss": 0.4801, "step": 8685 }, { "epoch": 0.67, "grad_norm": 1.1830450114170197, "learning_rate": 5.0792699411077976e-06, "loss": 0.4896, "step": 8686 }, { "epoch": 0.67, "grad_norm": 1.2074752040552232, "learning_rate": 5.077082680764201e-06, "loss": 0.5487, "step": 8687 }, { "epoch": 0.67, "grad_norm": 1.2304433668636838, "learning_rate": 5.074895731229772e-06, "loss": 0.5596, "step": 8688 }, { "epoch": 0.67, "grad_norm": 1.1889513582878708, "learning_rate": 5.0727090926425915e-06, "loss": 0.5316, "step": 8689 }, { "epoch": 0.67, "grad_norm": 1.333391540965462, "learning_rate": 5.070522765140703e-06, "loss": 0.6063, "step": 8690 }, { "epoch": 0.67, "grad_norm": 1.2302187711104398, "learning_rate": 5.068336748862148e-06, "loss": 0.5517, "step": 8691 }, { "epoch": 0.67, "grad_norm": 1.15457893449672, "learning_rate": 5.066151043944936e-06, "loss": 0.5197, "step": 8692 }, { "epoch": 0.67, "grad_norm": 1.3483178767499935, "learning_rate": 5.063965650527063e-06, "loss": 0.4857, "step": 8693 }, { "epoch": 0.67, "grad_norm": 1.1334459835520658, "learning_rate": 5.0617805687465064e-06, "loss": 0.5382, "step": 8694 }, { "epoch": 0.67, "grad_norm": 1.3725335801816276, "learning_rate": 5.059595798741218e-06, "loss": 0.5671, "step": 8695 }, { "epoch": 0.67, "grad_norm": 1.2457734177555044, "learning_rate": 5.0574113406491365e-06, "loss": 0.5113, "step": 8696 }, { "epoch": 0.67, "grad_norm": 1.216837531768351, "learning_rate": 5.055227194608183e-06, "loss": 0.5538, "step": 8697 }, { "epoch": 0.67, "grad_norm": 1.1249549644780816, "learning_rate": 5.053043360756247e-06, "loss": 0.5618, "step": 8698 }, { "epoch": 0.67, "grad_norm": 1.2482107708552286, "learning_rate": 5.050859839231203e-06, "loss": 0.4982, "step": 8699 }, { "epoch": 0.67, "grad_norm": 1.2062114412934317, "learning_rate": 5.0486766301709115e-06, "loss": 0.5312, "step": 8700 }, { "epoch": 0.68, "grad_norm": 1.1889998851837025, "learning_rate": 5.046493733713209e-06, "loss": 0.5058, "step": 8701 }, { "epoch": 0.68, "grad_norm": 1.2844974951328014, "learning_rate": 5.04431114999592e-06, "loss": 0.5222, "step": 8702 }, { "epoch": 0.68, "grad_norm": 1.2528252621288747, "learning_rate": 5.0421288791568305e-06, "loss": 0.5214, "step": 8703 }, { "epoch": 0.68, "grad_norm": 1.1496130188531966, "learning_rate": 5.0399469213337234e-06, "loss": 0.5005, "step": 8704 }, { "epoch": 0.68, "grad_norm": 1.2741424855810366, "learning_rate": 5.037765276664359e-06, "loss": 0.5238, "step": 8705 }, { "epoch": 0.68, "grad_norm": 1.1740731032874818, "learning_rate": 5.035583945286474e-06, "loss": 0.5199, "step": 8706 }, { "epoch": 0.68, "grad_norm": 1.2949348552622904, "learning_rate": 5.033402927337792e-06, "loss": 0.5813, "step": 8707 }, { "epoch": 0.68, "grad_norm": 1.2079318738920737, "learning_rate": 5.031222222956007e-06, "loss": 0.5605, "step": 8708 }, { "epoch": 0.68, "grad_norm": 1.186743746425818, "learning_rate": 5.029041832278794e-06, "loss": 0.5521, "step": 8709 }, { "epoch": 0.68, "grad_norm": 1.1617586520229715, "learning_rate": 5.026861755443817e-06, "loss": 0.4885, "step": 8710 }, { "epoch": 0.68, "grad_norm": 1.225214028637384, "learning_rate": 5.024681992588717e-06, "loss": 0.535, "step": 8711 }, { "epoch": 0.68, "grad_norm": 1.0467486518794695, "learning_rate": 5.022502543851116e-06, "loss": 0.49, "step": 8712 }, { "epoch": 0.68, "grad_norm": 1.1716284937674728, "learning_rate": 5.020323409368604e-06, "loss": 0.5247, "step": 8713 }, { "epoch": 0.68, "grad_norm": 1.105648390121989, "learning_rate": 5.018144589278768e-06, "loss": 0.4994, "step": 8714 }, { "epoch": 0.68, "grad_norm": 1.0612254070941622, "learning_rate": 5.015966083719166e-06, "loss": 0.4692, "step": 8715 }, { "epoch": 0.68, "grad_norm": 1.2910441108365716, "learning_rate": 5.013787892827341e-06, "loss": 0.5977, "step": 8716 }, { "epoch": 0.68, "grad_norm": 1.1590311779604443, "learning_rate": 5.011610016740815e-06, "loss": 0.5083, "step": 8717 }, { "epoch": 0.68, "grad_norm": 1.2803542564832306, "learning_rate": 5.009432455597085e-06, "loss": 0.5506, "step": 8718 }, { "epoch": 0.68, "grad_norm": 1.31318151854397, "learning_rate": 5.007255209533629e-06, "loss": 0.5438, "step": 8719 }, { "epoch": 0.68, "grad_norm": 1.226707692876925, "learning_rate": 5.00507827868791e-06, "loss": 0.5622, "step": 8720 }, { "epoch": 0.68, "grad_norm": 1.1978867181544943, "learning_rate": 5.00290166319737e-06, "loss": 0.5242, "step": 8721 }, { "epoch": 0.68, "grad_norm": 1.1723747205196022, "learning_rate": 5.000725363199433e-06, "loss": 0.4761, "step": 8722 }, { "epoch": 0.68, "grad_norm": 1.1578956955068445, "learning_rate": 4.998549378831494e-06, "loss": 0.556, "step": 8723 }, { "epoch": 0.68, "grad_norm": 1.1441949864656908, "learning_rate": 4.996373710230937e-06, "loss": 0.4635, "step": 8724 }, { "epoch": 0.68, "grad_norm": 1.087636209047633, "learning_rate": 4.994198357535122e-06, "loss": 0.463, "step": 8725 }, { "epoch": 0.68, "grad_norm": 1.1115647317189916, "learning_rate": 4.992023320881391e-06, "loss": 0.5276, "step": 8726 }, { "epoch": 0.68, "grad_norm": 1.1090953098236815, "learning_rate": 4.989848600407069e-06, "loss": 0.5932, "step": 8727 }, { "epoch": 0.68, "grad_norm": 1.2186306626018797, "learning_rate": 4.987674196249454e-06, "loss": 0.5357, "step": 8728 }, { "epoch": 0.68, "grad_norm": 1.113205649502695, "learning_rate": 4.9855001085458246e-06, "loss": 0.5084, "step": 8729 }, { "epoch": 0.68, "grad_norm": 1.35781143416702, "learning_rate": 4.983326337433444e-06, "loss": 0.5489, "step": 8730 }, { "epoch": 0.68, "grad_norm": 1.1932374482996237, "learning_rate": 4.981152883049555e-06, "loss": 0.557, "step": 8731 }, { "epoch": 0.68, "grad_norm": 1.2348166532752183, "learning_rate": 4.978979745531382e-06, "loss": 0.5066, "step": 8732 }, { "epoch": 0.68, "grad_norm": 1.163448453406435, "learning_rate": 4.976806925016117e-06, "loss": 0.5448, "step": 8733 }, { "epoch": 0.68, "grad_norm": 1.245600827102656, "learning_rate": 4.974634421640949e-06, "loss": 0.5817, "step": 8734 }, { "epoch": 0.68, "grad_norm": 1.2253249905063204, "learning_rate": 4.972462235543038e-06, "loss": 0.5275, "step": 8735 }, { "epoch": 0.68, "grad_norm": 1.2104947665171129, "learning_rate": 4.970290366859523e-06, "loss": 0.5017, "step": 8736 }, { "epoch": 0.68, "grad_norm": 1.1282664667335658, "learning_rate": 4.968118815727532e-06, "loss": 0.521, "step": 8737 }, { "epoch": 0.68, "grad_norm": 1.1283011217088237, "learning_rate": 4.965947582284161e-06, "loss": 0.4936, "step": 8738 }, { "epoch": 0.68, "grad_norm": 1.1676207569413988, "learning_rate": 4.963776666666487e-06, "loss": 0.5043, "step": 8739 }, { "epoch": 0.68, "grad_norm": 1.193596249810098, "learning_rate": 4.961606069011576e-06, "loss": 0.4662, "step": 8740 }, { "epoch": 0.68, "grad_norm": 1.233221166682347, "learning_rate": 4.959435789456468e-06, "loss": 0.5592, "step": 8741 }, { "epoch": 0.68, "grad_norm": 1.144769689310219, "learning_rate": 4.957265828138189e-06, "loss": 0.5146, "step": 8742 }, { "epoch": 0.68, "grad_norm": 1.1987768595434038, "learning_rate": 4.955096185193732e-06, "loss": 0.5294, "step": 8743 }, { "epoch": 0.68, "grad_norm": 1.2877682286081171, "learning_rate": 4.95292686076008e-06, "loss": 0.5436, "step": 8744 }, { "epoch": 0.68, "grad_norm": 1.2587809654904685, "learning_rate": 4.950757854974195e-06, "loss": 0.5423, "step": 8745 }, { "epoch": 0.68, "grad_norm": 1.2094043501414709, "learning_rate": 4.948589167973018e-06, "loss": 0.5208, "step": 8746 }, { "epoch": 0.68, "grad_norm": 1.4005826555053236, "learning_rate": 4.946420799893472e-06, "loss": 0.489, "step": 8747 }, { "epoch": 0.68, "grad_norm": 1.0945550408184055, "learning_rate": 4.944252750872455e-06, "loss": 0.5158, "step": 8748 }, { "epoch": 0.68, "grad_norm": 1.208639758269382, "learning_rate": 4.94208502104684e-06, "loss": 0.5094, "step": 8749 }, { "epoch": 0.68, "grad_norm": 1.2441755496502385, "learning_rate": 4.9399176105534954e-06, "loss": 0.5618, "step": 8750 }, { "epoch": 0.68, "grad_norm": 1.1204868494885198, "learning_rate": 4.937750519529258e-06, "loss": 0.4654, "step": 8751 }, { "epoch": 0.68, "grad_norm": 1.0849894494980843, "learning_rate": 4.935583748110947e-06, "loss": 0.484, "step": 8752 }, { "epoch": 0.68, "grad_norm": 1.1989675749625017, "learning_rate": 4.933417296435367e-06, "loss": 0.5035, "step": 8753 }, { "epoch": 0.68, "grad_norm": 1.1804838208613058, "learning_rate": 4.931251164639289e-06, "loss": 0.4876, "step": 8754 }, { "epoch": 0.68, "grad_norm": 1.2706302522249588, "learning_rate": 4.929085352859478e-06, "loss": 0.5765, "step": 8755 }, { "epoch": 0.68, "grad_norm": 1.110206238858068, "learning_rate": 4.92691986123267e-06, "loss": 0.4961, "step": 8756 }, { "epoch": 0.68, "grad_norm": 1.1098562459391241, "learning_rate": 4.924754689895589e-06, "loss": 0.4791, "step": 8757 }, { "epoch": 0.68, "grad_norm": 1.1283116342119364, "learning_rate": 4.922589838984929e-06, "loss": 0.5065, "step": 8758 }, { "epoch": 0.68, "grad_norm": 1.198320530799597, "learning_rate": 4.920425308637365e-06, "loss": 0.5839, "step": 8759 }, { "epoch": 0.68, "grad_norm": 1.2064124437757886, "learning_rate": 4.918261098989557e-06, "loss": 0.5306, "step": 8760 }, { "epoch": 0.68, "grad_norm": 1.1800092548880279, "learning_rate": 4.9160972101781455e-06, "loss": 0.5327, "step": 8761 }, { "epoch": 0.68, "grad_norm": 1.1973206848129216, "learning_rate": 4.913933642339747e-06, "loss": 0.522, "step": 8762 }, { "epoch": 0.68, "grad_norm": 1.1384549335042284, "learning_rate": 4.911770395610961e-06, "loss": 0.4791, "step": 8763 }, { "epoch": 0.68, "grad_norm": 1.0463926997794826, "learning_rate": 4.909607470128358e-06, "loss": 0.4773, "step": 8764 }, { "epoch": 0.68, "grad_norm": 1.2602889519082285, "learning_rate": 4.907444866028499e-06, "loss": 0.492, "step": 8765 }, { "epoch": 0.68, "grad_norm": 1.1354075731852413, "learning_rate": 4.90528258344792e-06, "loss": 0.5507, "step": 8766 }, { "epoch": 0.68, "grad_norm": 1.1896138200121311, "learning_rate": 4.90312062252314e-06, "loss": 0.5114, "step": 8767 }, { "epoch": 0.68, "grad_norm": 1.1859678121319162, "learning_rate": 4.900958983390651e-06, "loss": 0.4944, "step": 8768 }, { "epoch": 0.68, "grad_norm": 1.3874209922771823, "learning_rate": 4.898797666186926e-06, "loss": 0.6106, "step": 8769 }, { "epoch": 0.68, "grad_norm": 1.1651552946524932, "learning_rate": 4.896636671048424e-06, "loss": 0.514, "step": 8770 }, { "epoch": 0.68, "grad_norm": 1.2597598053553805, "learning_rate": 4.894475998111578e-06, "loss": 0.4905, "step": 8771 }, { "epoch": 0.68, "grad_norm": 1.1090669875969148, "learning_rate": 4.892315647512802e-06, "loss": 0.4978, "step": 8772 }, { "epoch": 0.68, "grad_norm": 1.3318073610327208, "learning_rate": 4.890155619388497e-06, "loss": 0.5835, "step": 8773 }, { "epoch": 0.68, "grad_norm": 1.1785955364684588, "learning_rate": 4.887995913875025e-06, "loss": 0.5015, "step": 8774 }, { "epoch": 0.68, "grad_norm": 1.270471594350697, "learning_rate": 4.8858365311087475e-06, "loss": 0.5193, "step": 8775 }, { "epoch": 0.68, "grad_norm": 1.1723017614858855, "learning_rate": 4.883677471225995e-06, "loss": 0.5309, "step": 8776 }, { "epoch": 0.68, "grad_norm": 1.0159863342732784, "learning_rate": 4.881518734363084e-06, "loss": 0.4668, "step": 8777 }, { "epoch": 0.68, "grad_norm": 1.1852034396569366, "learning_rate": 4.8793603206563034e-06, "loss": 0.5226, "step": 8778 }, { "epoch": 0.68, "grad_norm": 1.1410620387744448, "learning_rate": 4.8772022302419206e-06, "loss": 0.5536, "step": 8779 }, { "epoch": 0.68, "grad_norm": 1.0879014732052814, "learning_rate": 4.875044463256192e-06, "loss": 0.5078, "step": 8780 }, { "epoch": 0.68, "grad_norm": 1.1385734083678445, "learning_rate": 4.872887019835347e-06, "loss": 0.5632, "step": 8781 }, { "epoch": 0.68, "grad_norm": 1.1599100795785098, "learning_rate": 4.870729900115597e-06, "loss": 0.5211, "step": 8782 }, { "epoch": 0.68, "grad_norm": 1.1990562602246906, "learning_rate": 4.868573104233137e-06, "loss": 0.4641, "step": 8783 }, { "epoch": 0.68, "grad_norm": 1.1820757608548764, "learning_rate": 4.8664166323241255e-06, "loss": 0.5194, "step": 8784 }, { "epoch": 0.68, "grad_norm": 1.151315676610267, "learning_rate": 4.864260484524719e-06, "loss": 0.5376, "step": 8785 }, { "epoch": 0.68, "grad_norm": 1.2034011127653805, "learning_rate": 4.862104660971045e-06, "loss": 0.4728, "step": 8786 }, { "epoch": 0.68, "grad_norm": 1.233055375111033, "learning_rate": 4.859949161799216e-06, "loss": 0.5638, "step": 8787 }, { "epoch": 0.68, "grad_norm": 1.1503347179253807, "learning_rate": 4.857793987145315e-06, "loss": 0.5237, "step": 8788 }, { "epoch": 0.68, "grad_norm": 1.191761251621735, "learning_rate": 4.855639137145406e-06, "loss": 0.5091, "step": 8789 }, { "epoch": 0.68, "grad_norm": 1.1556485648096828, "learning_rate": 4.853484611935541e-06, "loss": 0.5012, "step": 8790 }, { "epoch": 0.68, "grad_norm": 1.2088060386157224, "learning_rate": 4.8513304116517435e-06, "loss": 0.5373, "step": 8791 }, { "epoch": 0.68, "grad_norm": 1.1651812815934823, "learning_rate": 4.8491765364300205e-06, "loss": 0.5175, "step": 8792 }, { "epoch": 0.68, "grad_norm": 1.3091126736814878, "learning_rate": 4.847022986406362e-06, "loss": 0.5356, "step": 8793 }, { "epoch": 0.68, "grad_norm": 1.1208746293885075, "learning_rate": 4.844869761716725e-06, "loss": 0.4709, "step": 8794 }, { "epoch": 0.68, "grad_norm": 1.241821523051576, "learning_rate": 4.842716862497056e-06, "loss": 0.51, "step": 8795 }, { "epoch": 0.68, "grad_norm": 1.1208106025650384, "learning_rate": 4.840564288883284e-06, "loss": 0.4849, "step": 8796 }, { "epoch": 0.68, "grad_norm": 1.3080502876128315, "learning_rate": 4.838412041011304e-06, "loss": 0.571, "step": 8797 }, { "epoch": 0.68, "grad_norm": 1.146630402969278, "learning_rate": 4.836260119017005e-06, "loss": 0.5836, "step": 8798 }, { "epoch": 0.68, "grad_norm": 1.2412763888962388, "learning_rate": 4.8341085230362425e-06, "loss": 0.4665, "step": 8799 }, { "epoch": 0.68, "grad_norm": 1.1507169976202056, "learning_rate": 4.831957253204862e-06, "loss": 0.5157, "step": 8800 }, { "epoch": 0.68, "grad_norm": 1.011558549488498, "learning_rate": 4.829806309658683e-06, "loss": 0.4807, "step": 8801 }, { "epoch": 0.68, "grad_norm": 1.1604659569417641, "learning_rate": 4.827655692533506e-06, "loss": 0.4749, "step": 8802 }, { "epoch": 0.68, "grad_norm": 1.110122321494262, "learning_rate": 4.825505401965116e-06, "loss": 0.4615, "step": 8803 }, { "epoch": 0.68, "grad_norm": 1.1882902326877618, "learning_rate": 4.823355438089262e-06, "loss": 0.4703, "step": 8804 }, { "epoch": 0.68, "grad_norm": 1.2340727810348104, "learning_rate": 4.821205801041688e-06, "loss": 0.5237, "step": 8805 }, { "epoch": 0.68, "grad_norm": 1.2388076866567943, "learning_rate": 4.819056490958115e-06, "loss": 0.514, "step": 8806 }, { "epoch": 0.68, "grad_norm": 1.1694750816296748, "learning_rate": 4.816907507974231e-06, "loss": 0.5062, "step": 8807 }, { "epoch": 0.68, "grad_norm": 1.3178097179547208, "learning_rate": 4.814758852225717e-06, "loss": 0.5395, "step": 8808 }, { "epoch": 0.68, "grad_norm": 1.3070496516234882, "learning_rate": 4.8126105238482345e-06, "loss": 0.5812, "step": 8809 }, { "epoch": 0.68, "grad_norm": 1.2633119804688122, "learning_rate": 4.810462522977408e-06, "loss": 0.5497, "step": 8810 }, { "epoch": 0.68, "grad_norm": 1.1145987762141907, "learning_rate": 4.808314849748858e-06, "loss": 0.5404, "step": 8811 }, { "epoch": 0.68, "grad_norm": 1.2025891386370484, "learning_rate": 4.806167504298175e-06, "loss": 0.5238, "step": 8812 }, { "epoch": 0.68, "grad_norm": 1.16903076879311, "learning_rate": 4.8040204867609355e-06, "loss": 0.499, "step": 8813 }, { "epoch": 0.68, "grad_norm": 1.3988739270357924, "learning_rate": 4.801873797272694e-06, "loss": 0.61, "step": 8814 }, { "epoch": 0.68, "grad_norm": 1.1930018512203053, "learning_rate": 4.799727435968975e-06, "loss": 0.553, "step": 8815 }, { "epoch": 0.68, "grad_norm": 1.133625501717757, "learning_rate": 4.797581402985296e-06, "loss": 0.4546, "step": 8816 }, { "epoch": 0.68, "grad_norm": 1.2365295824552878, "learning_rate": 4.795435698457141e-06, "loss": 0.5232, "step": 8817 }, { "epoch": 0.68, "grad_norm": 1.20709993728702, "learning_rate": 4.793290322519981e-06, "loss": 0.5143, "step": 8818 }, { "epoch": 0.68, "grad_norm": 1.108837857297423, "learning_rate": 4.791145275309271e-06, "loss": 0.5246, "step": 8819 }, { "epoch": 0.68, "grad_norm": 1.1630115757511255, "learning_rate": 4.7890005569604305e-06, "loss": 0.4973, "step": 8820 }, { "epoch": 0.68, "grad_norm": 1.2096021118616156, "learning_rate": 4.786856167608869e-06, "loss": 0.4928, "step": 8821 }, { "epoch": 0.68, "grad_norm": 1.292562830152313, "learning_rate": 4.784712107389975e-06, "loss": 0.523, "step": 8822 }, { "epoch": 0.68, "grad_norm": 1.1632057467745005, "learning_rate": 4.7825683764391114e-06, "loss": 0.526, "step": 8823 }, { "epoch": 0.68, "grad_norm": 1.175446877824334, "learning_rate": 4.780424974891629e-06, "loss": 0.5266, "step": 8824 }, { "epoch": 0.68, "grad_norm": 1.1399257031707766, "learning_rate": 4.778281902882844e-06, "loss": 0.4544, "step": 8825 }, { "epoch": 0.68, "grad_norm": 1.3028203429720102, "learning_rate": 4.776139160548068e-06, "loss": 0.5362, "step": 8826 }, { "epoch": 0.68, "grad_norm": 1.3395818337550256, "learning_rate": 4.773996748022573e-06, "loss": 0.5655, "step": 8827 }, { "epoch": 0.68, "grad_norm": 1.2426064222723974, "learning_rate": 4.771854665441626e-06, "loss": 0.5484, "step": 8828 }, { "epoch": 0.68, "grad_norm": 1.1789879651122728, "learning_rate": 4.769712912940472e-06, "loss": 0.5561, "step": 8829 }, { "epoch": 0.69, "grad_norm": 1.1415945139358161, "learning_rate": 4.767571490654322e-06, "loss": 0.4989, "step": 8830 }, { "epoch": 0.69, "grad_norm": 1.2775170911478029, "learning_rate": 4.76543039871838e-06, "loss": 0.542, "step": 8831 }, { "epoch": 0.69, "grad_norm": 1.1832591172348037, "learning_rate": 4.7632896372678235e-06, "loss": 0.5082, "step": 8832 }, { "epoch": 0.69, "grad_norm": 1.179159741106128, "learning_rate": 4.761149206437811e-06, "loss": 0.5019, "step": 8833 }, { "epoch": 0.69, "grad_norm": 1.17375779536282, "learning_rate": 4.759009106363482e-06, "loss": 0.5362, "step": 8834 }, { "epoch": 0.69, "grad_norm": 1.1856792947888322, "learning_rate": 4.756869337179942e-06, "loss": 0.5244, "step": 8835 }, { "epoch": 0.69, "grad_norm": 1.247044550331615, "learning_rate": 4.754729899022298e-06, "loss": 0.5349, "step": 8836 }, { "epoch": 0.69, "grad_norm": 1.1721126061201548, "learning_rate": 4.752590792025612e-06, "loss": 0.4768, "step": 8837 }, { "epoch": 0.69, "grad_norm": 1.2372419646999036, "learning_rate": 4.750452016324943e-06, "loss": 0.5551, "step": 8838 }, { "epoch": 0.69, "grad_norm": 1.201077837480701, "learning_rate": 4.748313572055326e-06, "loss": 0.5171, "step": 8839 }, { "epoch": 0.69, "grad_norm": 1.0790756015330216, "learning_rate": 4.746175459351765e-06, "loss": 0.4984, "step": 8840 }, { "epoch": 0.69, "grad_norm": 1.1799470224449369, "learning_rate": 4.744037678349254e-06, "loss": 0.5413, "step": 8841 }, { "epoch": 0.69, "grad_norm": 1.1348939911662688, "learning_rate": 4.74190022918276e-06, "loss": 0.514, "step": 8842 }, { "epoch": 0.69, "grad_norm": 1.059176351551484, "learning_rate": 4.7397631119872335e-06, "loss": 0.5196, "step": 8843 }, { "epoch": 0.69, "grad_norm": 1.1550405207870562, "learning_rate": 4.737626326897604e-06, "loss": 0.516, "step": 8844 }, { "epoch": 0.69, "grad_norm": 1.1138553678289136, "learning_rate": 4.735489874048772e-06, "loss": 0.5321, "step": 8845 }, { "epoch": 0.69, "grad_norm": 1.1132218729882424, "learning_rate": 4.733353753575629e-06, "loss": 0.4097, "step": 8846 }, { "epoch": 0.69, "grad_norm": 1.4144234381054221, "learning_rate": 4.731217965613033e-06, "loss": 0.5674, "step": 8847 }, { "epoch": 0.69, "grad_norm": 1.1599987706638268, "learning_rate": 4.729082510295829e-06, "loss": 0.5178, "step": 8848 }, { "epoch": 0.69, "grad_norm": 1.2147427728520006, "learning_rate": 4.726947387758845e-06, "loss": 0.4921, "step": 8849 }, { "epoch": 0.69, "grad_norm": 1.2836594604835394, "learning_rate": 4.724812598136873e-06, "loss": 0.5406, "step": 8850 }, { "epoch": 0.69, "grad_norm": 1.155998821297543, "learning_rate": 4.722678141564698e-06, "loss": 0.521, "step": 8851 }, { "epoch": 0.69, "grad_norm": 1.0403337661068388, "learning_rate": 4.720544018177081e-06, "loss": 0.4557, "step": 8852 }, { "epoch": 0.69, "grad_norm": 1.257351998327825, "learning_rate": 4.718410228108757e-06, "loss": 0.5171, "step": 8853 }, { "epoch": 0.69, "grad_norm": 1.2262320589703215, "learning_rate": 4.716276771494449e-06, "loss": 0.4803, "step": 8854 }, { "epoch": 0.69, "grad_norm": 1.2557203533296322, "learning_rate": 4.714143648468845e-06, "loss": 0.5221, "step": 8855 }, { "epoch": 0.69, "grad_norm": 1.1901707481051114, "learning_rate": 4.712010859166628e-06, "loss": 0.5721, "step": 8856 }, { "epoch": 0.69, "grad_norm": 1.277576810240922, "learning_rate": 4.709878403722445e-06, "loss": 0.5115, "step": 8857 }, { "epoch": 0.69, "grad_norm": 1.1938510013213617, "learning_rate": 4.70774628227093e-06, "loss": 0.5465, "step": 8858 }, { "epoch": 0.69, "grad_norm": 1.17692839466426, "learning_rate": 4.7056144949467005e-06, "loss": 0.5193, "step": 8859 }, { "epoch": 0.69, "grad_norm": 1.1323715404049608, "learning_rate": 4.70348304188434e-06, "loss": 0.5282, "step": 8860 }, { "epoch": 0.69, "grad_norm": 1.1541353425734135, "learning_rate": 4.7013519232184225e-06, "loss": 0.5003, "step": 8861 }, { "epoch": 0.69, "grad_norm": 1.3676767400311258, "learning_rate": 4.699221139083494e-06, "loss": 0.5499, "step": 8862 }, { "epoch": 0.69, "grad_norm": 1.2077386258568266, "learning_rate": 4.697090689614084e-06, "loss": 0.5235, "step": 8863 }, { "epoch": 0.69, "grad_norm": 1.2054689042690176, "learning_rate": 4.6949605749446995e-06, "loss": 0.5145, "step": 8864 }, { "epoch": 0.69, "grad_norm": 1.2237714367743355, "learning_rate": 4.692830795209826e-06, "loss": 0.5602, "step": 8865 }, { "epoch": 0.69, "grad_norm": 1.1963040596158168, "learning_rate": 4.690701350543927e-06, "loss": 0.5039, "step": 8866 }, { "epoch": 0.69, "grad_norm": 1.225439298451685, "learning_rate": 4.6885722410814396e-06, "loss": 0.5547, "step": 8867 }, { "epoch": 0.69, "grad_norm": 1.2616693823636926, "learning_rate": 4.68644346695679e-06, "loss": 0.5171, "step": 8868 }, { "epoch": 0.69, "grad_norm": 1.3255002283119859, "learning_rate": 4.684315028304379e-06, "loss": 0.5759, "step": 8869 }, { "epoch": 0.69, "grad_norm": 1.2304056787220903, "learning_rate": 4.68218692525859e-06, "loss": 0.5558, "step": 8870 }, { "epoch": 0.69, "grad_norm": 1.2182677855560362, "learning_rate": 4.680059157953772e-06, "loss": 0.5184, "step": 8871 }, { "epoch": 0.69, "grad_norm": 1.218183875033662, "learning_rate": 4.6779317265242675e-06, "loss": 0.522, "step": 8872 }, { "epoch": 0.69, "grad_norm": 1.226259230521611, "learning_rate": 4.6758046311043915e-06, "loss": 0.574, "step": 8873 }, { "epoch": 0.69, "grad_norm": 1.2420230972611777, "learning_rate": 4.673677871828437e-06, "loss": 0.5738, "step": 8874 }, { "epoch": 0.69, "grad_norm": 1.2252305689658154, "learning_rate": 4.671551448830684e-06, "loss": 0.5285, "step": 8875 }, { "epoch": 0.69, "grad_norm": 1.2394194561424576, "learning_rate": 4.669425362245378e-06, "loss": 0.5243, "step": 8876 }, { "epoch": 0.69, "grad_norm": 1.2249773801933952, "learning_rate": 4.667299612206747e-06, "loss": 0.5395, "step": 8877 }, { "epoch": 0.69, "grad_norm": 1.2668172142426282, "learning_rate": 4.665174198849006e-06, "loss": 0.541, "step": 8878 }, { "epoch": 0.69, "grad_norm": 1.2793023728114212, "learning_rate": 4.6630491223063415e-06, "loss": 0.5331, "step": 8879 }, { "epoch": 0.69, "grad_norm": 1.1886959077259043, "learning_rate": 4.660924382712923e-06, "loss": 0.5406, "step": 8880 }, { "epoch": 0.69, "grad_norm": 1.137707467075631, "learning_rate": 4.658799980202893e-06, "loss": 0.4997, "step": 8881 }, { "epoch": 0.69, "grad_norm": 1.2855292709383201, "learning_rate": 4.656675914910376e-06, "loss": 0.5292, "step": 8882 }, { "epoch": 0.69, "grad_norm": 1.2334257411275467, "learning_rate": 4.654552186969477e-06, "loss": 0.5588, "step": 8883 }, { "epoch": 0.69, "grad_norm": 1.1992880338355738, "learning_rate": 4.6524287965142765e-06, "loss": 0.5029, "step": 8884 }, { "epoch": 0.69, "grad_norm": 1.2504801781573172, "learning_rate": 4.6503057436788405e-06, "loss": 0.5656, "step": 8885 }, { "epoch": 0.69, "grad_norm": 1.2238899320062235, "learning_rate": 4.648183028597203e-06, "loss": 0.513, "step": 8886 }, { "epoch": 0.69, "grad_norm": 1.2239236813232504, "learning_rate": 4.646060651403379e-06, "loss": 0.4781, "step": 8887 }, { "epoch": 0.69, "grad_norm": 1.2229128924717194, "learning_rate": 4.643938612231369e-06, "loss": 0.5468, "step": 8888 }, { "epoch": 0.69, "grad_norm": 1.1652624614730998, "learning_rate": 4.641816911215148e-06, "loss": 0.489, "step": 8889 }, { "epoch": 0.69, "grad_norm": 1.1466826441302262, "learning_rate": 4.639695548488673e-06, "loss": 0.482, "step": 8890 }, { "epoch": 0.69, "grad_norm": 1.181788209888564, "learning_rate": 4.63757452418587e-06, "loss": 0.5463, "step": 8891 }, { "epoch": 0.69, "grad_norm": 1.2867847548453584, "learning_rate": 4.635453838440654e-06, "loss": 0.5273, "step": 8892 }, { "epoch": 0.69, "grad_norm": 1.089022255021199, "learning_rate": 4.633333491386915e-06, "loss": 0.4706, "step": 8893 }, { "epoch": 0.69, "grad_norm": 1.1810854782095423, "learning_rate": 4.631213483158525e-06, "loss": 0.475, "step": 8894 }, { "epoch": 0.69, "grad_norm": 1.1169802066582672, "learning_rate": 4.6290938138893225e-06, "loss": 0.5131, "step": 8895 }, { "epoch": 0.69, "grad_norm": 1.2482594292634268, "learning_rate": 4.626974483713142e-06, "loss": 0.5399, "step": 8896 }, { "epoch": 0.69, "grad_norm": 1.1119416861132356, "learning_rate": 4.62485549276378e-06, "loss": 0.5233, "step": 8897 }, { "epoch": 0.69, "grad_norm": 1.2294986352001556, "learning_rate": 4.622736841175023e-06, "loss": 0.5361, "step": 8898 }, { "epoch": 0.69, "grad_norm": 1.146167405874918, "learning_rate": 4.620618529080632e-06, "loss": 0.5348, "step": 8899 }, { "epoch": 0.69, "grad_norm": 1.1541456197662716, "learning_rate": 4.61850055661435e-06, "loss": 0.4828, "step": 8900 }, { "epoch": 0.69, "grad_norm": 1.208325528525216, "learning_rate": 4.61638292390989e-06, "loss": 0.5133, "step": 8901 }, { "epoch": 0.69, "grad_norm": 1.2489954249601358, "learning_rate": 4.614265631100952e-06, "loss": 0.5533, "step": 8902 }, { "epoch": 0.69, "grad_norm": 1.311399679814041, "learning_rate": 4.612148678321211e-06, "loss": 0.5022, "step": 8903 }, { "epoch": 0.69, "grad_norm": 1.1329007344577136, "learning_rate": 4.610032065704325e-06, "loss": 0.5266, "step": 8904 }, { "epoch": 0.69, "grad_norm": 1.213177092749023, "learning_rate": 4.607915793383921e-06, "loss": 0.5389, "step": 8905 }, { "epoch": 0.69, "grad_norm": 1.2040836923824745, "learning_rate": 4.605799861493615e-06, "loss": 0.4958, "step": 8906 }, { "epoch": 0.69, "grad_norm": 1.0646999807832698, "learning_rate": 4.60368427016699e-06, "loss": 0.5065, "step": 8907 }, { "epoch": 0.69, "grad_norm": 1.1537965571308, "learning_rate": 4.601569019537619e-06, "loss": 0.5029, "step": 8908 }, { "epoch": 0.69, "grad_norm": 1.199606916499002, "learning_rate": 4.599454109739047e-06, "loss": 0.4962, "step": 8909 }, { "epoch": 0.69, "grad_norm": 1.193114659867559, "learning_rate": 4.597339540904806e-06, "loss": 0.5113, "step": 8910 }, { "epoch": 0.69, "grad_norm": 1.2220495991950413, "learning_rate": 4.595225313168389e-06, "loss": 0.5851, "step": 8911 }, { "epoch": 0.69, "grad_norm": 1.2444790510113326, "learning_rate": 4.5931114266632826e-06, "loss": 0.5201, "step": 8912 }, { "epoch": 0.69, "grad_norm": 1.1684836078941885, "learning_rate": 4.590997881522948e-06, "loss": 0.4916, "step": 8913 }, { "epoch": 0.69, "grad_norm": 1.1607906777531398, "learning_rate": 4.588884677880828e-06, "loss": 0.4283, "step": 8914 }, { "epoch": 0.69, "grad_norm": 1.1203906682408602, "learning_rate": 4.586771815870332e-06, "loss": 0.5341, "step": 8915 }, { "epoch": 0.69, "grad_norm": 1.1136149123926693, "learning_rate": 4.5846592956248636e-06, "loss": 0.5302, "step": 8916 }, { "epoch": 0.69, "grad_norm": 1.1012851014101719, "learning_rate": 4.582547117277789e-06, "loss": 0.45, "step": 8917 }, { "epoch": 0.69, "grad_norm": 1.1800675443132653, "learning_rate": 4.580435280962466e-06, "loss": 0.5559, "step": 8918 }, { "epoch": 0.69, "grad_norm": 1.06715070848172, "learning_rate": 4.578323786812225e-06, "loss": 0.495, "step": 8919 }, { "epoch": 0.69, "grad_norm": 1.2273294760045803, "learning_rate": 4.5762126349603755e-06, "loss": 0.5142, "step": 8920 }, { "epoch": 0.69, "grad_norm": 1.1333739895166621, "learning_rate": 4.574101825540209e-06, "loss": 0.4715, "step": 8921 }, { "epoch": 0.69, "grad_norm": 1.241579062723198, "learning_rate": 4.571991358684984e-06, "loss": 0.5607, "step": 8922 }, { "epoch": 0.69, "grad_norm": 1.1906023158518209, "learning_rate": 4.569881234527951e-06, "loss": 0.5334, "step": 8923 }, { "epoch": 0.69, "grad_norm": 1.2194152874648232, "learning_rate": 4.5677714532023335e-06, "loss": 0.5325, "step": 8924 }, { "epoch": 0.69, "grad_norm": 1.1392613450624556, "learning_rate": 4.565662014841328e-06, "loss": 0.5424, "step": 8925 }, { "epoch": 0.69, "grad_norm": 1.2520186813258727, "learning_rate": 4.563552919578119e-06, "loss": 0.4893, "step": 8926 }, { "epoch": 0.69, "grad_norm": 1.1390535686545693, "learning_rate": 4.561444167545861e-06, "loss": 0.5007, "step": 8927 }, { "epoch": 0.69, "grad_norm": 1.1405801633618593, "learning_rate": 4.559335758877691e-06, "loss": 0.5332, "step": 8928 }, { "epoch": 0.69, "grad_norm": 1.1148411045698017, "learning_rate": 4.557227693706724e-06, "loss": 0.5014, "step": 8929 }, { "epoch": 0.69, "grad_norm": 1.0837685859960804, "learning_rate": 4.555119972166055e-06, "loss": 0.4503, "step": 8930 }, { "epoch": 0.69, "grad_norm": 1.194008957901136, "learning_rate": 4.553012594388757e-06, "loss": 0.4709, "step": 8931 }, { "epoch": 0.69, "grad_norm": 1.1867817664225282, "learning_rate": 4.550905560507873e-06, "loss": 0.4792, "step": 8932 }, { "epoch": 0.69, "grad_norm": 1.126105083664196, "learning_rate": 4.548798870656434e-06, "loss": 0.4946, "step": 8933 }, { "epoch": 0.69, "grad_norm": 1.207783930392495, "learning_rate": 4.546692524967452e-06, "loss": 0.4909, "step": 8934 }, { "epoch": 0.69, "grad_norm": 1.2831898867128917, "learning_rate": 4.544586523573902e-06, "loss": 0.5173, "step": 8935 }, { "epoch": 0.69, "grad_norm": 1.1978771645487492, "learning_rate": 4.542480866608754e-06, "loss": 0.4947, "step": 8936 }, { "epoch": 0.69, "grad_norm": 1.2085342185432035, "learning_rate": 4.540375554204944e-06, "loss": 0.4989, "step": 8937 }, { "epoch": 0.69, "grad_norm": 1.3025025214319115, "learning_rate": 4.5382705864953915e-06, "loss": 0.5308, "step": 8938 }, { "epoch": 0.69, "grad_norm": 1.1459573447426146, "learning_rate": 4.536165963612995e-06, "loss": 0.5085, "step": 8939 }, { "epoch": 0.69, "grad_norm": 1.1297496562230431, "learning_rate": 4.534061685690633e-06, "loss": 0.4663, "step": 8940 }, { "epoch": 0.69, "grad_norm": 1.1866729265397942, "learning_rate": 4.531957752861161e-06, "loss": 0.5364, "step": 8941 }, { "epoch": 0.69, "grad_norm": 1.0903828152533144, "learning_rate": 4.5298541652574016e-06, "loss": 0.4813, "step": 8942 }, { "epoch": 0.69, "grad_norm": 1.130244481553155, "learning_rate": 4.527750923012172e-06, "loss": 0.5402, "step": 8943 }, { "epoch": 0.69, "grad_norm": 1.2343152671557724, "learning_rate": 4.525648026258264e-06, "loss": 0.5803, "step": 8944 }, { "epoch": 0.69, "grad_norm": 1.2548806750802566, "learning_rate": 4.5235454751284355e-06, "loss": 0.4966, "step": 8945 }, { "epoch": 0.69, "grad_norm": 1.2473058754399593, "learning_rate": 4.52144326975544e-06, "loss": 0.5163, "step": 8946 }, { "epoch": 0.69, "grad_norm": 1.2336144337481356, "learning_rate": 4.5193414102719935e-06, "loss": 0.5444, "step": 8947 }, { "epoch": 0.69, "grad_norm": 1.1298813881696133, "learning_rate": 4.5172398968108e-06, "loss": 0.4969, "step": 8948 }, { "epoch": 0.69, "grad_norm": 1.1866582597656203, "learning_rate": 4.515138729504539e-06, "loss": 0.5538, "step": 8949 }, { "epoch": 0.69, "grad_norm": 1.1886517811661692, "learning_rate": 4.513037908485868e-06, "loss": 0.496, "step": 8950 }, { "epoch": 0.69, "grad_norm": 1.1325533340223817, "learning_rate": 4.5109374338874254e-06, "loss": 0.5065, "step": 8951 }, { "epoch": 0.69, "grad_norm": 1.1962072975086617, "learning_rate": 4.508837305841821e-06, "loss": 0.4837, "step": 8952 }, { "epoch": 0.69, "grad_norm": 1.0876591160444244, "learning_rate": 4.506737524481647e-06, "loss": 0.4869, "step": 8953 }, { "epoch": 0.69, "grad_norm": 1.183129650818177, "learning_rate": 4.504638089939478e-06, "loss": 0.5118, "step": 8954 }, { "epoch": 0.69, "grad_norm": 1.1534481123128846, "learning_rate": 4.502539002347856e-06, "loss": 0.4832, "step": 8955 }, { "epoch": 0.69, "grad_norm": 1.0818674672799473, "learning_rate": 4.500440261839313e-06, "loss": 0.5361, "step": 8956 }, { "epoch": 0.69, "grad_norm": 1.1698920703078248, "learning_rate": 4.498341868546347e-06, "loss": 0.4734, "step": 8957 }, { "epoch": 0.69, "grad_norm": 1.1398021917892027, "learning_rate": 4.496243822601443e-06, "loss": 0.5307, "step": 8958 }, { "epoch": 0.7, "grad_norm": 1.116735567035923, "learning_rate": 4.494146124137062e-06, "loss": 0.5238, "step": 8959 }, { "epoch": 0.7, "grad_norm": 1.103240014150979, "learning_rate": 4.4920487732856425e-06, "loss": 0.4728, "step": 8960 }, { "epoch": 0.7, "grad_norm": 1.1926066870319436, "learning_rate": 4.489951770179606e-06, "loss": 0.5121, "step": 8961 }, { "epoch": 0.7, "grad_norm": 1.2199523570185355, "learning_rate": 4.487855114951337e-06, "loss": 0.503, "step": 8962 }, { "epoch": 0.7, "grad_norm": 1.20279337288249, "learning_rate": 4.485758807733215e-06, "loss": 0.5601, "step": 8963 }, { "epoch": 0.7, "grad_norm": 1.368473298483761, "learning_rate": 4.483662848657593e-06, "loss": 0.5541, "step": 8964 }, { "epoch": 0.7, "grad_norm": 1.294734705405784, "learning_rate": 4.481567237856792e-06, "loss": 0.5298, "step": 8965 }, { "epoch": 0.7, "grad_norm": 1.166117090254619, "learning_rate": 4.479471975463125e-06, "loss": 0.4939, "step": 8966 }, { "epoch": 0.7, "grad_norm": 1.1123919230908959, "learning_rate": 4.477377061608873e-06, "loss": 0.5006, "step": 8967 }, { "epoch": 0.7, "grad_norm": 1.2363939312962728, "learning_rate": 4.475282496426298e-06, "loss": 0.4813, "step": 8968 }, { "epoch": 0.7, "grad_norm": 1.2289990079082709, "learning_rate": 4.473188280047644e-06, "loss": 0.5464, "step": 8969 }, { "epoch": 0.7, "grad_norm": 1.2900006666107822, "learning_rate": 4.471094412605128e-06, "loss": 0.4877, "step": 8970 }, { "epoch": 0.7, "grad_norm": 1.2796262360790707, "learning_rate": 4.469000894230947e-06, "loss": 0.5291, "step": 8971 }, { "epoch": 0.7, "grad_norm": 1.438104419519546, "learning_rate": 4.46690772505728e-06, "loss": 0.5639, "step": 8972 }, { "epoch": 0.7, "grad_norm": 1.143021456793221, "learning_rate": 4.464814905216271e-06, "loss": 0.48, "step": 8973 }, { "epoch": 0.7, "grad_norm": 1.2237088483490053, "learning_rate": 4.46272243484006e-06, "loss": 0.5276, "step": 8974 }, { "epoch": 0.7, "grad_norm": 1.1318541583749597, "learning_rate": 4.4606303140607456e-06, "loss": 0.514, "step": 8975 }, { "epoch": 0.7, "grad_norm": 1.2253948412242925, "learning_rate": 4.458538543010418e-06, "loss": 0.5543, "step": 8976 }, { "epoch": 0.7, "grad_norm": 1.119878662042073, "learning_rate": 4.456447121821147e-06, "loss": 0.5048, "step": 8977 }, { "epoch": 0.7, "grad_norm": 1.1276534894180283, "learning_rate": 4.454356050624966e-06, "loss": 0.493, "step": 8978 }, { "epoch": 0.7, "grad_norm": 1.234279966921267, "learning_rate": 4.4522653295539e-06, "loss": 0.5141, "step": 8979 }, { "epoch": 0.7, "grad_norm": 1.161801799173484, "learning_rate": 4.450174958739945e-06, "loss": 0.4985, "step": 8980 }, { "epoch": 0.7, "grad_norm": 1.2165572663769564, "learning_rate": 4.448084938315079e-06, "loss": 0.5171, "step": 8981 }, { "epoch": 0.7, "grad_norm": 1.2023938917743457, "learning_rate": 4.4459952684112596e-06, "loss": 0.5307, "step": 8982 }, { "epoch": 0.7, "grad_norm": 1.1567017987070833, "learning_rate": 4.443905949160409e-06, "loss": 0.5185, "step": 8983 }, { "epoch": 0.7, "grad_norm": 1.1106023849048148, "learning_rate": 4.441816980694446e-06, "loss": 0.5237, "step": 8984 }, { "epoch": 0.7, "grad_norm": 1.1334221613424291, "learning_rate": 4.439728363145248e-06, "loss": 0.4591, "step": 8985 }, { "epoch": 0.7, "grad_norm": 1.1269532307801617, "learning_rate": 4.437640096644687e-06, "loss": 0.4717, "step": 8986 }, { "epoch": 0.7, "grad_norm": 1.128895162244155, "learning_rate": 4.435552181324608e-06, "loss": 0.5089, "step": 8987 }, { "epoch": 0.7, "grad_norm": 1.3376907087890968, "learning_rate": 4.433464617316825e-06, "loss": 0.5502, "step": 8988 }, { "epoch": 0.7, "grad_norm": 1.2668222016076862, "learning_rate": 4.431377404753141e-06, "loss": 0.5537, "step": 8989 }, { "epoch": 0.7, "grad_norm": 1.1287194331731123, "learning_rate": 4.429290543765331e-06, "loss": 0.4858, "step": 8990 }, { "epoch": 0.7, "grad_norm": 1.198636538055483, "learning_rate": 4.42720403448515e-06, "loss": 0.4949, "step": 8991 }, { "epoch": 0.7, "grad_norm": 1.2931505519136397, "learning_rate": 4.4251178770443334e-06, "loss": 0.5091, "step": 8992 }, { "epoch": 0.7, "grad_norm": 1.286485210966762, "learning_rate": 4.4230320715745855e-06, "loss": 0.5279, "step": 8993 }, { "epoch": 0.7, "grad_norm": 1.1815235426724866, "learning_rate": 4.420946618207599e-06, "loss": 0.5304, "step": 8994 }, { "epoch": 0.7, "grad_norm": 1.0962344154807344, "learning_rate": 4.418861517075034e-06, "loss": 0.4803, "step": 8995 }, { "epoch": 0.7, "grad_norm": 1.137327208337767, "learning_rate": 4.416776768308535e-06, "loss": 0.4889, "step": 8996 }, { "epoch": 0.7, "grad_norm": 1.2428856093571454, "learning_rate": 4.4146923720397285e-06, "loss": 0.5146, "step": 8997 }, { "epoch": 0.7, "grad_norm": 1.2433412100531351, "learning_rate": 4.412608328400205e-06, "loss": 0.5488, "step": 8998 }, { "epoch": 0.7, "grad_norm": 1.1965893676004902, "learning_rate": 4.410524637521545e-06, "loss": 0.489, "step": 8999 }, { "epoch": 0.7, "grad_norm": 1.1757382103022482, "learning_rate": 4.408441299535302e-06, "loss": 0.5419, "step": 9000 }, { "epoch": 0.7, "grad_norm": 1.1486985824584184, "learning_rate": 4.406358314573009e-06, "loss": 0.525, "step": 9001 }, { "epoch": 0.7, "grad_norm": 1.2258168280393857, "learning_rate": 4.404275682766179e-06, "loss": 0.5373, "step": 9002 }, { "epoch": 0.7, "grad_norm": 1.227782741932641, "learning_rate": 4.402193404246291e-06, "loss": 0.5119, "step": 9003 }, { "epoch": 0.7, "grad_norm": 1.3380673925194606, "learning_rate": 4.400111479144818e-06, "loss": 0.5753, "step": 9004 }, { "epoch": 0.7, "grad_norm": 1.3106562970981084, "learning_rate": 4.398029907593197e-06, "loss": 0.5248, "step": 9005 }, { "epoch": 0.7, "grad_norm": 1.226884593100216, "learning_rate": 4.395948689722847e-06, "loss": 0.5076, "step": 9006 }, { "epoch": 0.7, "grad_norm": 1.1029174263584895, "learning_rate": 4.393867825665176e-06, "loss": 0.4753, "step": 9007 }, { "epoch": 0.7, "grad_norm": 1.2616214773751981, "learning_rate": 4.391787315551548e-06, "loss": 0.5051, "step": 9008 }, { "epoch": 0.7, "grad_norm": 1.2207651351437323, "learning_rate": 4.389707159513321e-06, "loss": 0.5314, "step": 9009 }, { "epoch": 0.7, "grad_norm": 1.2251088463829816, "learning_rate": 4.387627357681827e-06, "loss": 0.5467, "step": 9010 }, { "epoch": 0.7, "grad_norm": 1.343512536291729, "learning_rate": 4.3855479101883735e-06, "loss": 0.4916, "step": 9011 }, { "epoch": 0.7, "grad_norm": 1.367318762881241, "learning_rate": 4.383468817164251e-06, "loss": 0.5804, "step": 9012 }, { "epoch": 0.7, "grad_norm": 1.1305766172162948, "learning_rate": 4.3813900787407175e-06, "loss": 0.5422, "step": 9013 }, { "epoch": 0.7, "grad_norm": 1.2445914561046298, "learning_rate": 4.379311695049018e-06, "loss": 0.5362, "step": 9014 }, { "epoch": 0.7, "grad_norm": 1.1317631037319846, "learning_rate": 4.377233666220368e-06, "loss": 0.478, "step": 9015 }, { "epoch": 0.7, "grad_norm": 1.1509479409748156, "learning_rate": 4.375155992385965e-06, "loss": 0.5206, "step": 9016 }, { "epoch": 0.7, "grad_norm": 1.2466173178209359, "learning_rate": 4.373078673676988e-06, "loss": 0.5729, "step": 9017 }, { "epoch": 0.7, "grad_norm": 1.1610129946655845, "learning_rate": 4.371001710224583e-06, "loss": 0.5047, "step": 9018 }, { "epoch": 0.7, "grad_norm": 1.3180130567265718, "learning_rate": 4.368925102159881e-06, "loss": 0.6043, "step": 9019 }, { "epoch": 0.7, "grad_norm": 1.2063429267943142, "learning_rate": 4.36684884961399e-06, "loss": 0.5395, "step": 9020 }, { "epoch": 0.7, "grad_norm": 1.063843830959475, "learning_rate": 4.364772952717995e-06, "loss": 0.4738, "step": 9021 }, { "epoch": 0.7, "grad_norm": 1.2126084642957016, "learning_rate": 4.362697411602961e-06, "loss": 0.5186, "step": 9022 }, { "epoch": 0.7, "grad_norm": 1.3467054468337225, "learning_rate": 4.36062222639992e-06, "loss": 0.5187, "step": 9023 }, { "epoch": 0.7, "grad_norm": 1.008991350422317, "learning_rate": 4.358547397239896e-06, "loss": 0.4546, "step": 9024 }, { "epoch": 0.7, "grad_norm": 1.1955933583724399, "learning_rate": 4.356472924253878e-06, "loss": 0.4827, "step": 9025 }, { "epoch": 0.7, "grad_norm": 1.167043466119726, "learning_rate": 4.354398807572841e-06, "loss": 0.5093, "step": 9026 }, { "epoch": 0.7, "grad_norm": 1.1582693045687293, "learning_rate": 4.352325047327735e-06, "loss": 0.4977, "step": 9027 }, { "epoch": 0.7, "grad_norm": 1.1859898752902895, "learning_rate": 4.350251643649491e-06, "loss": 0.5098, "step": 9028 }, { "epoch": 0.7, "grad_norm": 1.1979494118015914, "learning_rate": 4.348178596669006e-06, "loss": 0.5576, "step": 9029 }, { "epoch": 0.7, "grad_norm": 1.1964071908354001, "learning_rate": 4.346105906517165e-06, "loss": 0.5045, "step": 9030 }, { "epoch": 0.7, "grad_norm": 1.0921139606390746, "learning_rate": 4.344033573324829e-06, "loss": 0.5427, "step": 9031 }, { "epoch": 0.7, "grad_norm": 1.1642329456046006, "learning_rate": 4.341961597222837e-06, "loss": 0.517, "step": 9032 }, { "epoch": 0.7, "grad_norm": 1.2031739955685403, "learning_rate": 4.339889978341998e-06, "loss": 0.5285, "step": 9033 }, { "epoch": 0.7, "grad_norm": 1.1498781367708157, "learning_rate": 4.337818716813112e-06, "loss": 0.4556, "step": 9034 }, { "epoch": 0.7, "grad_norm": 1.1543747412532068, "learning_rate": 4.3357478127669376e-06, "loss": 0.5595, "step": 9035 }, { "epoch": 0.7, "grad_norm": 1.1339921793995993, "learning_rate": 4.333677266334228e-06, "loss": 0.5066, "step": 9036 }, { "epoch": 0.7, "grad_norm": 1.143158072634017, "learning_rate": 4.331607077645708e-06, "loss": 0.5123, "step": 9037 }, { "epoch": 0.7, "grad_norm": 1.130208145821317, "learning_rate": 4.329537246832081e-06, "loss": 0.5337, "step": 9038 }, { "epoch": 0.7, "grad_norm": 1.2730728282547898, "learning_rate": 4.32746777402402e-06, "loss": 0.569, "step": 9039 }, { "epoch": 0.7, "grad_norm": 1.2290372726524488, "learning_rate": 4.325398659352185e-06, "loss": 0.5115, "step": 9040 }, { "epoch": 0.7, "grad_norm": 1.1891656036390024, "learning_rate": 4.32332990294721e-06, "loss": 0.5049, "step": 9041 }, { "epoch": 0.7, "grad_norm": 1.1179078987243145, "learning_rate": 4.321261504939709e-06, "loss": 0.4646, "step": 9042 }, { "epoch": 0.7, "grad_norm": 1.1863923176972653, "learning_rate": 4.319193465460264e-06, "loss": 0.4984, "step": 9043 }, { "epoch": 0.7, "grad_norm": 1.1185951168319976, "learning_rate": 4.317125784639447e-06, "loss": 0.5301, "step": 9044 }, { "epoch": 0.7, "grad_norm": 1.176268315106781, "learning_rate": 4.315058462607798e-06, "loss": 0.5121, "step": 9045 }, { "epoch": 0.7, "grad_norm": 1.2470900997781682, "learning_rate": 4.312991499495838e-06, "loss": 0.4866, "step": 9046 }, { "epoch": 0.7, "grad_norm": 1.1629646808184575, "learning_rate": 4.3109248954340644e-06, "loss": 0.5193, "step": 9047 }, { "epoch": 0.7, "grad_norm": 1.2061735395796462, "learning_rate": 4.3088586505529584e-06, "loss": 0.5191, "step": 9048 }, { "epoch": 0.7, "grad_norm": 1.2681183920021164, "learning_rate": 4.306792764982964e-06, "loss": 0.5095, "step": 9049 }, { "epoch": 0.7, "grad_norm": 1.2479141952838317, "learning_rate": 4.304727238854517e-06, "loss": 0.5393, "step": 9050 }, { "epoch": 0.7, "grad_norm": 1.1450066734519282, "learning_rate": 4.302662072298022e-06, "loss": 0.5013, "step": 9051 }, { "epoch": 0.7, "grad_norm": 1.0914050441508638, "learning_rate": 4.300597265443869e-06, "loss": 0.4534, "step": 9052 }, { "epoch": 0.7, "grad_norm": 1.2837500488073235, "learning_rate": 4.298532818422411e-06, "loss": 0.5182, "step": 9053 }, { "epoch": 0.7, "grad_norm": 1.2041853653579049, "learning_rate": 4.296468731363996e-06, "loss": 0.5444, "step": 9054 }, { "epoch": 0.7, "grad_norm": 1.21620430761527, "learning_rate": 4.294405004398933e-06, "loss": 0.5009, "step": 9055 }, { "epoch": 0.7, "grad_norm": 1.0903579975321351, "learning_rate": 4.292341637657519e-06, "loss": 0.4994, "step": 9056 }, { "epoch": 0.7, "grad_norm": 1.2316055133144848, "learning_rate": 4.290278631270025e-06, "loss": 0.5261, "step": 9057 }, { "epoch": 0.7, "grad_norm": 1.0141801974465687, "learning_rate": 4.288215985366702e-06, "loss": 0.4315, "step": 9058 }, { "epoch": 0.7, "grad_norm": 1.0767958367420276, "learning_rate": 4.286153700077771e-06, "loss": 0.5066, "step": 9059 }, { "epoch": 0.7, "grad_norm": 1.177301988576125, "learning_rate": 4.284091775533436e-06, "loss": 0.5125, "step": 9060 }, { "epoch": 0.7, "grad_norm": 1.17649758655059, "learning_rate": 4.282030211863876e-06, "loss": 0.4898, "step": 9061 }, { "epoch": 0.7, "grad_norm": 1.182321550885035, "learning_rate": 4.279969009199254e-06, "loss": 0.5145, "step": 9062 }, { "epoch": 0.7, "grad_norm": 1.051888488755576, "learning_rate": 4.277908167669696e-06, "loss": 0.4722, "step": 9063 }, { "epoch": 0.7, "grad_norm": 1.1668018137901053, "learning_rate": 4.275847687405323e-06, "loss": 0.4904, "step": 9064 }, { "epoch": 0.7, "grad_norm": 1.2407152580271852, "learning_rate": 4.273787568536212e-06, "loss": 0.5195, "step": 9065 }, { "epoch": 0.7, "grad_norm": 1.1544091804206766, "learning_rate": 4.271727811192437e-06, "loss": 0.5086, "step": 9066 }, { "epoch": 0.7, "grad_norm": 1.2598565593694218, "learning_rate": 4.269668415504038e-06, "loss": 0.5537, "step": 9067 }, { "epoch": 0.7, "grad_norm": 1.2668573949086492, "learning_rate": 4.2676093816010415e-06, "loss": 0.5703, "step": 9068 }, { "epoch": 0.7, "grad_norm": 1.1430526921599626, "learning_rate": 4.265550709613435e-06, "loss": 0.507, "step": 9069 }, { "epoch": 0.7, "grad_norm": 1.252846718803969, "learning_rate": 4.263492399671198e-06, "loss": 0.5123, "step": 9070 }, { "epoch": 0.7, "grad_norm": 1.3555196703050318, "learning_rate": 4.261434451904284e-06, "loss": 0.5459, "step": 9071 }, { "epoch": 0.7, "grad_norm": 1.229745028360689, "learning_rate": 4.259376866442623e-06, "loss": 0.4873, "step": 9072 }, { "epoch": 0.7, "grad_norm": 1.255695955313812, "learning_rate": 4.2573196434161135e-06, "loss": 0.5207, "step": 9073 }, { "epoch": 0.7, "grad_norm": 1.1803718754360313, "learning_rate": 4.255262782954648e-06, "loss": 0.5578, "step": 9074 }, { "epoch": 0.7, "grad_norm": 1.2344349713786555, "learning_rate": 4.253206285188079e-06, "loss": 0.5368, "step": 9075 }, { "epoch": 0.7, "grad_norm": 1.4108657953876795, "learning_rate": 4.251150150246245e-06, "loss": 0.575, "step": 9076 }, { "epoch": 0.7, "grad_norm": 1.1229832374123374, "learning_rate": 4.249094378258962e-06, "loss": 0.4783, "step": 9077 }, { "epoch": 0.7, "grad_norm": 1.3340768628431836, "learning_rate": 4.247038969356027e-06, "loss": 0.4533, "step": 9078 }, { "epoch": 0.7, "grad_norm": 1.1259958839578421, "learning_rate": 4.244983923667199e-06, "loss": 0.4871, "step": 9079 }, { "epoch": 0.7, "grad_norm": 1.2534941474403605, "learning_rate": 4.242929241322228e-06, "loss": 0.5428, "step": 9080 }, { "epoch": 0.7, "grad_norm": 1.213416091686869, "learning_rate": 4.2408749224508365e-06, "loss": 0.5219, "step": 9081 }, { "epoch": 0.7, "grad_norm": 1.2258433280673946, "learning_rate": 4.238820967182727e-06, "loss": 0.5277, "step": 9082 }, { "epoch": 0.7, "grad_norm": 1.2000336503238085, "learning_rate": 4.236767375647572e-06, "loss": 0.5672, "step": 9083 }, { "epoch": 0.7, "grad_norm": 1.2368155869858863, "learning_rate": 4.234714147975029e-06, "loss": 0.4728, "step": 9084 }, { "epoch": 0.7, "grad_norm": 1.2055063337003815, "learning_rate": 4.2326612842947225e-06, "loss": 0.4983, "step": 9085 }, { "epoch": 0.7, "grad_norm": 1.1654620366044424, "learning_rate": 4.230608784736267e-06, "loss": 0.4837, "step": 9086 }, { "epoch": 0.7, "grad_norm": 1.060445313482052, "learning_rate": 4.228556649429243e-06, "loss": 0.4678, "step": 9087 }, { "epoch": 0.71, "grad_norm": 1.2776551406629741, "learning_rate": 4.226504878503215e-06, "loss": 0.5178, "step": 9088 }, { "epoch": 0.71, "grad_norm": 1.299215066965357, "learning_rate": 4.224453472087725e-06, "loss": 0.5927, "step": 9089 }, { "epoch": 0.71, "grad_norm": 1.2028548692915406, "learning_rate": 4.2224024303122826e-06, "loss": 0.5724, "step": 9090 }, { "epoch": 0.71, "grad_norm": 1.1625284929782964, "learning_rate": 4.220351753306382e-06, "loss": 0.5354, "step": 9091 }, { "epoch": 0.71, "grad_norm": 1.2092231674892457, "learning_rate": 4.218301441199499e-06, "loss": 0.498, "step": 9092 }, { "epoch": 0.71, "grad_norm": 1.2556249895186018, "learning_rate": 4.216251494121071e-06, "loss": 0.5134, "step": 9093 }, { "epoch": 0.71, "grad_norm": 1.190240959246921, "learning_rate": 4.2142019122005295e-06, "loss": 0.5008, "step": 9094 }, { "epoch": 0.71, "grad_norm": 1.2247433627140112, "learning_rate": 4.21215269556727e-06, "loss": 0.5458, "step": 9095 }, { "epoch": 0.71, "grad_norm": 1.1163885821258495, "learning_rate": 4.210103844350671e-06, "loss": 0.5175, "step": 9096 }, { "epoch": 0.71, "grad_norm": 1.226696031433189, "learning_rate": 4.208055358680089e-06, "loss": 0.5964, "step": 9097 }, { "epoch": 0.71, "grad_norm": 1.1163335884639174, "learning_rate": 4.2060072386848535e-06, "loss": 0.5178, "step": 9098 }, { "epoch": 0.71, "grad_norm": 1.1274637159781098, "learning_rate": 4.20395948449428e-06, "loss": 0.542, "step": 9099 }, { "epoch": 0.71, "grad_norm": 1.1440689663572732, "learning_rate": 4.201912096237643e-06, "loss": 0.5457, "step": 9100 }, { "epoch": 0.71, "grad_norm": 1.169601422015864, "learning_rate": 4.1998650740442096e-06, "loss": 0.5159, "step": 9101 }, { "epoch": 0.71, "grad_norm": 1.284009938207532, "learning_rate": 4.197818418043221e-06, "loss": 0.5242, "step": 9102 }, { "epoch": 0.71, "grad_norm": 1.236423482727802, "learning_rate": 4.19577212836389e-06, "loss": 0.5448, "step": 9103 }, { "epoch": 0.71, "grad_norm": 1.31498469448973, "learning_rate": 4.193726205135412e-06, "loss": 0.5721, "step": 9104 }, { "epoch": 0.71, "grad_norm": 1.2688654635652281, "learning_rate": 4.191680648486952e-06, "loss": 0.5855, "step": 9105 }, { "epoch": 0.71, "grad_norm": 1.2228991964848057, "learning_rate": 4.18963545854766e-06, "loss": 0.5339, "step": 9106 }, { "epoch": 0.71, "grad_norm": 1.2019483088265752, "learning_rate": 4.187590635446659e-06, "loss": 0.4596, "step": 9107 }, { "epoch": 0.71, "grad_norm": 1.2065018168663766, "learning_rate": 4.185546179313049e-06, "loss": 0.5293, "step": 9108 }, { "epoch": 0.71, "grad_norm": 1.1991801302279743, "learning_rate": 4.183502090275911e-06, "loss": 0.536, "step": 9109 }, { "epoch": 0.71, "grad_norm": 1.115730402479264, "learning_rate": 4.181458368464293e-06, "loss": 0.5061, "step": 9110 }, { "epoch": 0.71, "grad_norm": 1.1519502646849555, "learning_rate": 4.179415014007227e-06, "loss": 0.5176, "step": 9111 }, { "epoch": 0.71, "grad_norm": 1.3001728181410255, "learning_rate": 4.177372027033724e-06, "loss": 0.5684, "step": 9112 }, { "epoch": 0.71, "grad_norm": 1.3123773335854758, "learning_rate": 4.175329407672763e-06, "loss": 0.5427, "step": 9113 }, { "epoch": 0.71, "grad_norm": 1.2562943768610535, "learning_rate": 4.1732871560533105e-06, "loss": 0.5222, "step": 9114 }, { "epoch": 0.71, "grad_norm": 1.1608950128392148, "learning_rate": 4.1712452723043e-06, "loss": 0.5034, "step": 9115 }, { "epoch": 0.71, "grad_norm": 1.1486048673782692, "learning_rate": 4.169203756554646e-06, "loss": 0.5158, "step": 9116 }, { "epoch": 0.71, "grad_norm": 1.2299901122184236, "learning_rate": 4.167162608933243e-06, "loss": 0.5481, "step": 9117 }, { "epoch": 0.71, "grad_norm": 1.2585822172956644, "learning_rate": 4.1651218295689576e-06, "loss": 0.5697, "step": 9118 }, { "epoch": 0.71, "grad_norm": 1.0749486489562008, "learning_rate": 4.163081418590639e-06, "loss": 0.4492, "step": 9119 }, { "epoch": 0.71, "grad_norm": 1.214341185408686, "learning_rate": 4.1610413761271005e-06, "loss": 0.4727, "step": 9120 }, { "epoch": 0.71, "grad_norm": 1.228963118472335, "learning_rate": 4.159001702307146e-06, "loss": 0.5474, "step": 9121 }, { "epoch": 0.71, "grad_norm": 1.1330237356091868, "learning_rate": 4.156962397259553e-06, "loss": 0.5053, "step": 9122 }, { "epoch": 0.71, "grad_norm": 1.2549235652733375, "learning_rate": 4.154923461113066e-06, "loss": 0.5257, "step": 9123 }, { "epoch": 0.71, "grad_norm": 1.2300608125245454, "learning_rate": 4.152884893996421e-06, "loss": 0.5583, "step": 9124 }, { "epoch": 0.71, "grad_norm": 1.4199510909777995, "learning_rate": 4.1508466960383165e-06, "loss": 0.5789, "step": 9125 }, { "epoch": 0.71, "grad_norm": 1.2658145432943335, "learning_rate": 4.148808867367438e-06, "loss": 0.5786, "step": 9126 }, { "epoch": 0.71, "grad_norm": 1.32282018935476, "learning_rate": 4.146771408112443e-06, "loss": 0.5474, "step": 9127 }, { "epoch": 0.71, "grad_norm": 1.1603524912284828, "learning_rate": 4.144734318401969e-06, "loss": 0.4861, "step": 9128 }, { "epoch": 0.71, "grad_norm": 1.3406056276002114, "learning_rate": 4.14269759836463e-06, "loss": 0.5396, "step": 9129 }, { "epoch": 0.71, "grad_norm": 1.1821412090179717, "learning_rate": 4.1406612481290066e-06, "loss": 0.5314, "step": 9130 }, { "epoch": 0.71, "grad_norm": 1.1814413107502952, "learning_rate": 4.138625267823669e-06, "loss": 0.527, "step": 9131 }, { "epoch": 0.71, "grad_norm": 1.1621023482422803, "learning_rate": 4.136589657577164e-06, "loss": 0.5375, "step": 9132 }, { "epoch": 0.71, "grad_norm": 1.0597174054546443, "learning_rate": 4.134554417518001e-06, "loss": 0.4648, "step": 9133 }, { "epoch": 0.71, "grad_norm": 1.2100368965450459, "learning_rate": 4.132519547774678e-06, "loss": 0.4732, "step": 9134 }, { "epoch": 0.71, "grad_norm": 1.1344728601033054, "learning_rate": 4.130485048475673e-06, "loss": 0.4928, "step": 9135 }, { "epoch": 0.71, "grad_norm": 1.1242668624030703, "learning_rate": 4.128450919749426e-06, "loss": 0.5065, "step": 9136 }, { "epoch": 0.71, "grad_norm": 1.183035841785023, "learning_rate": 4.1264171617243655e-06, "loss": 0.4838, "step": 9137 }, { "epoch": 0.71, "grad_norm": 1.303411122112108, "learning_rate": 4.124383774528893e-06, "loss": 0.5825, "step": 9138 }, { "epoch": 0.71, "grad_norm": 1.0176304311360211, "learning_rate": 4.122350758291387e-06, "loss": 0.449, "step": 9139 }, { "epoch": 0.71, "grad_norm": 1.0302434112367764, "learning_rate": 4.120318113140207e-06, "loss": 0.4652, "step": 9140 }, { "epoch": 0.71, "grad_norm": 1.2378909581991357, "learning_rate": 4.118285839203675e-06, "loss": 0.5182, "step": 9141 }, { "epoch": 0.71, "grad_norm": 0.9856735021138948, "learning_rate": 4.116253936610107e-06, "loss": 0.453, "step": 9142 }, { "epoch": 0.71, "grad_norm": 1.2048638893752568, "learning_rate": 4.114222405487781e-06, "loss": 0.4992, "step": 9143 }, { "epoch": 0.71, "grad_norm": 1.297629217501277, "learning_rate": 4.112191245964962e-06, "loss": 0.5592, "step": 9144 }, { "epoch": 0.71, "grad_norm": 1.156058527583534, "learning_rate": 4.110160458169888e-06, "loss": 0.4844, "step": 9145 }, { "epoch": 0.71, "grad_norm": 1.3217858214541571, "learning_rate": 4.10813004223077e-06, "loss": 0.5343, "step": 9146 }, { "epoch": 0.71, "grad_norm": 1.253360379913028, "learning_rate": 4.106099998275801e-06, "loss": 0.5238, "step": 9147 }, { "epoch": 0.71, "grad_norm": 1.2262744929893845, "learning_rate": 4.104070326433146e-06, "loss": 0.5616, "step": 9148 }, { "epoch": 0.71, "grad_norm": 1.2276122345279603, "learning_rate": 4.102041026830952e-06, "loss": 0.5136, "step": 9149 }, { "epoch": 0.71, "grad_norm": 1.212738027509996, "learning_rate": 4.100012099597339e-06, "loss": 0.5255, "step": 9150 }, { "epoch": 0.71, "grad_norm": 1.1425299740288044, "learning_rate": 4.0979835448604e-06, "loss": 0.5023, "step": 9151 }, { "epoch": 0.71, "grad_norm": 1.3176413608263036, "learning_rate": 4.095955362748214e-06, "loss": 0.5592, "step": 9152 }, { "epoch": 0.71, "grad_norm": 1.127117178097935, "learning_rate": 4.093927553388822e-06, "loss": 0.5445, "step": 9153 }, { "epoch": 0.71, "grad_norm": 1.1204352488501264, "learning_rate": 4.091900116910256e-06, "loss": 0.4846, "step": 9154 }, { "epoch": 0.71, "grad_norm": 1.1651226055315875, "learning_rate": 4.089873053440521e-06, "loss": 0.4943, "step": 9155 }, { "epoch": 0.71, "grad_norm": 1.2361898480506883, "learning_rate": 4.087846363107588e-06, "loss": 0.5198, "step": 9156 }, { "epoch": 0.71, "grad_norm": 1.1158977613207686, "learning_rate": 4.085820046039417e-06, "loss": 0.5175, "step": 9157 }, { "epoch": 0.71, "grad_norm": 1.2050143677895648, "learning_rate": 4.083794102363939e-06, "loss": 0.5262, "step": 9158 }, { "epoch": 0.71, "grad_norm": 1.1567126714496982, "learning_rate": 4.081768532209064e-06, "loss": 0.5194, "step": 9159 }, { "epoch": 0.71, "grad_norm": 1.2458617852490823, "learning_rate": 4.079743335702679e-06, "loss": 0.5365, "step": 9160 }, { "epoch": 0.71, "grad_norm": 1.2723189514059556, "learning_rate": 4.077718512972638e-06, "loss": 0.5193, "step": 9161 }, { "epoch": 0.71, "grad_norm": 1.1843739572485206, "learning_rate": 4.075694064146786e-06, "loss": 0.5017, "step": 9162 }, { "epoch": 0.71, "grad_norm": 1.090513126352009, "learning_rate": 4.07366998935293e-06, "loss": 0.5133, "step": 9163 }, { "epoch": 0.71, "grad_norm": 1.1978434277688532, "learning_rate": 4.071646288718863e-06, "loss": 0.5584, "step": 9164 }, { "epoch": 0.71, "grad_norm": 1.280004733568618, "learning_rate": 4.069622962372355e-06, "loss": 0.5757, "step": 9165 }, { "epoch": 0.71, "grad_norm": 1.147629849602213, "learning_rate": 4.067600010441143e-06, "loss": 0.5448, "step": 9166 }, { "epoch": 0.71, "grad_norm": 1.1799589438488978, "learning_rate": 4.06557743305295e-06, "loss": 0.5137, "step": 9167 }, { "epoch": 0.71, "grad_norm": 1.066421508679898, "learning_rate": 4.06355523033547e-06, "loss": 0.4865, "step": 9168 }, { "epoch": 0.71, "grad_norm": 1.2605736322107632, "learning_rate": 4.0615334024163775e-06, "loss": 0.5355, "step": 9169 }, { "epoch": 0.71, "grad_norm": 1.3299224359080555, "learning_rate": 4.059511949423322e-06, "loss": 0.5626, "step": 9170 }, { "epoch": 0.71, "grad_norm": 1.1347075823385895, "learning_rate": 4.0574908714839245e-06, "loss": 0.4856, "step": 9171 }, { "epoch": 0.71, "grad_norm": 1.2916612727555072, "learning_rate": 4.05547016872579e-06, "loss": 0.5258, "step": 9172 }, { "epoch": 0.71, "grad_norm": 1.2433949486104487, "learning_rate": 4.05344984127649e-06, "loss": 0.5682, "step": 9173 }, { "epoch": 0.71, "grad_norm": 1.1087062593108779, "learning_rate": 4.051429889263582e-06, "loss": 0.5214, "step": 9174 }, { "epoch": 0.71, "grad_norm": 1.1999989807601414, "learning_rate": 4.049410312814598e-06, "loss": 0.5506, "step": 9175 }, { "epoch": 0.71, "grad_norm": 1.1401144478887495, "learning_rate": 4.0473911120570396e-06, "loss": 0.5099, "step": 9176 }, { "epoch": 0.71, "grad_norm": 1.1448725689436217, "learning_rate": 4.045372287118391e-06, "loss": 0.4827, "step": 9177 }, { "epoch": 0.71, "grad_norm": 1.2162708106617781, "learning_rate": 4.043353838126113e-06, "loss": 0.533, "step": 9178 }, { "epoch": 0.71, "grad_norm": 1.1807670453691057, "learning_rate": 4.041335765207638e-06, "loss": 0.4799, "step": 9179 }, { "epoch": 0.71, "grad_norm": 1.1492767381592752, "learning_rate": 4.039318068490383e-06, "loss": 0.5103, "step": 9180 }, { "epoch": 0.71, "grad_norm": 1.1679734960351158, "learning_rate": 4.037300748101728e-06, "loss": 0.5272, "step": 9181 }, { "epoch": 0.71, "grad_norm": 1.1251872754517631, "learning_rate": 4.0352838041690435e-06, "loss": 0.52, "step": 9182 }, { "epoch": 0.71, "grad_norm": 1.0471188844736952, "learning_rate": 4.033267236819664e-06, "loss": 0.4527, "step": 9183 }, { "epoch": 0.71, "grad_norm": 1.2601790348648976, "learning_rate": 4.031251046180906e-06, "loss": 0.5334, "step": 9184 }, { "epoch": 0.71, "grad_norm": 1.1409762116141424, "learning_rate": 4.029235232380069e-06, "loss": 0.4469, "step": 9185 }, { "epoch": 0.71, "grad_norm": 1.2055794092143792, "learning_rate": 4.027219795544413e-06, "loss": 0.5227, "step": 9186 }, { "epoch": 0.71, "grad_norm": 1.1711845398384093, "learning_rate": 4.025204735801187e-06, "loss": 0.5013, "step": 9187 }, { "epoch": 0.71, "grad_norm": 1.2251611953867116, "learning_rate": 4.023190053277612e-06, "loss": 0.5534, "step": 9188 }, { "epoch": 0.71, "grad_norm": 1.3760132957304065, "learning_rate": 4.021175748100885e-06, "loss": 0.546, "step": 9189 }, { "epoch": 0.71, "grad_norm": 1.2826929108683494, "learning_rate": 4.019161820398183e-06, "loss": 0.5515, "step": 9190 }, { "epoch": 0.71, "grad_norm": 1.2477387001536355, "learning_rate": 4.017148270296652e-06, "loss": 0.5109, "step": 9191 }, { "epoch": 0.71, "grad_norm": 1.1340588257751902, "learning_rate": 4.015135097923416e-06, "loss": 0.4834, "step": 9192 }, { "epoch": 0.71, "grad_norm": 1.1923391723145023, "learning_rate": 4.013122303405579e-06, "loss": 0.5053, "step": 9193 }, { "epoch": 0.71, "grad_norm": 1.0859296510261816, "learning_rate": 4.011109886870218e-06, "loss": 0.4916, "step": 9194 }, { "epoch": 0.71, "grad_norm": 1.188481577703572, "learning_rate": 4.009097848444389e-06, "loss": 0.4899, "step": 9195 }, { "epoch": 0.71, "grad_norm": 1.2902014122683947, "learning_rate": 4.007086188255125e-06, "loss": 0.5312, "step": 9196 }, { "epoch": 0.71, "grad_norm": 1.113974104660785, "learning_rate": 4.005074906429426e-06, "loss": 0.4828, "step": 9197 }, { "epoch": 0.71, "grad_norm": 1.149717331437313, "learning_rate": 4.003064003094278e-06, "loss": 0.5427, "step": 9198 }, { "epoch": 0.71, "grad_norm": 1.2601682980476445, "learning_rate": 4.00105347837664e-06, "loss": 0.4983, "step": 9199 }, { "epoch": 0.71, "grad_norm": 1.2905802721304305, "learning_rate": 3.99904333240345e-06, "loss": 0.5543, "step": 9200 }, { "epoch": 0.71, "grad_norm": 1.235720757964405, "learning_rate": 3.9970335653016146e-06, "loss": 0.5093, "step": 9201 }, { "epoch": 0.71, "grad_norm": 1.1723006937589986, "learning_rate": 3.995024177198018e-06, "loss": 0.5109, "step": 9202 }, { "epoch": 0.71, "grad_norm": 1.2080630241241732, "learning_rate": 3.993015168219527e-06, "loss": 0.5561, "step": 9203 }, { "epoch": 0.71, "grad_norm": 1.1754927677336082, "learning_rate": 3.991006538492981e-06, "loss": 0.5244, "step": 9204 }, { "epoch": 0.71, "grad_norm": 1.0976940433607125, "learning_rate": 3.9889982881451924e-06, "loss": 0.4569, "step": 9205 }, { "epoch": 0.71, "grad_norm": 1.2162140603160938, "learning_rate": 3.98699041730296e-06, "loss": 0.5393, "step": 9206 }, { "epoch": 0.71, "grad_norm": 1.292645094085734, "learning_rate": 3.9849829260930416e-06, "loss": 0.5385, "step": 9207 }, { "epoch": 0.71, "grad_norm": 1.175047382454505, "learning_rate": 3.982975814642185e-06, "loss": 0.4959, "step": 9208 }, { "epoch": 0.71, "grad_norm": 1.1190184414991386, "learning_rate": 3.980969083077108e-06, "loss": 0.4977, "step": 9209 }, { "epoch": 0.71, "grad_norm": 1.242665756574865, "learning_rate": 3.9789627315245115e-06, "loss": 0.5725, "step": 9210 }, { "epoch": 0.71, "grad_norm": 1.2050901440186441, "learning_rate": 3.976956760111061e-06, "loss": 0.4904, "step": 9211 }, { "epoch": 0.71, "grad_norm": 1.2056426916015859, "learning_rate": 3.9749511689634025e-06, "loss": 0.4892, "step": 9212 }, { "epoch": 0.71, "grad_norm": 1.13164091363705, "learning_rate": 3.972945958208162e-06, "loss": 0.5123, "step": 9213 }, { "epoch": 0.71, "grad_norm": 1.222067840656587, "learning_rate": 3.9709411279719375e-06, "loss": 0.5824, "step": 9214 }, { "epoch": 0.71, "grad_norm": 1.193208575671176, "learning_rate": 3.968936678381307e-06, "loss": 0.4893, "step": 9215 }, { "epoch": 0.71, "grad_norm": 1.1840454347598417, "learning_rate": 3.966932609562822e-06, "loss": 0.5458, "step": 9216 }, { "epoch": 0.72, "grad_norm": 1.1467082700186808, "learning_rate": 3.964928921643006e-06, "loss": 0.4631, "step": 9217 }, { "epoch": 0.72, "grad_norm": 1.2393138442963556, "learning_rate": 3.962925614748363e-06, "loss": 0.5464, "step": 9218 }, { "epoch": 0.72, "grad_norm": 1.3700323098960099, "learning_rate": 3.960922689005373e-06, "loss": 0.4768, "step": 9219 }, { "epoch": 0.72, "grad_norm": 1.1401455537833538, "learning_rate": 3.958920144540496e-06, "loss": 0.4996, "step": 9220 }, { "epoch": 0.72, "grad_norm": 1.1672620900129118, "learning_rate": 3.956917981480156e-06, "loss": 0.5197, "step": 9221 }, { "epoch": 0.72, "grad_norm": 1.3190673735491731, "learning_rate": 3.95491619995076e-06, "loss": 0.5718, "step": 9222 }, { "epoch": 0.72, "grad_norm": 1.1202619171428119, "learning_rate": 3.952914800078693e-06, "loss": 0.4645, "step": 9223 }, { "epoch": 0.72, "grad_norm": 1.0950828605375833, "learning_rate": 3.950913781990313e-06, "loss": 0.4546, "step": 9224 }, { "epoch": 0.72, "grad_norm": 1.1518040831950365, "learning_rate": 3.948913145811956e-06, "loss": 0.461, "step": 9225 }, { "epoch": 0.72, "grad_norm": 1.2472318516288499, "learning_rate": 3.946912891669934e-06, "loss": 0.5452, "step": 9226 }, { "epoch": 0.72, "grad_norm": 1.2215676138110405, "learning_rate": 3.9449130196905275e-06, "loss": 0.5369, "step": 9227 }, { "epoch": 0.72, "grad_norm": 1.3032043158454456, "learning_rate": 3.942913530000002e-06, "loss": 0.555, "step": 9228 }, { "epoch": 0.72, "grad_norm": 1.1938240407780623, "learning_rate": 3.940914422724597e-06, "loss": 0.5054, "step": 9229 }, { "epoch": 0.72, "grad_norm": 1.1565339538794939, "learning_rate": 3.938915697990528e-06, "loss": 0.5335, "step": 9230 }, { "epoch": 0.72, "grad_norm": 1.1029229387009631, "learning_rate": 3.936917355923982e-06, "loss": 0.5061, "step": 9231 }, { "epoch": 0.72, "grad_norm": 1.8393031660460972, "learning_rate": 3.934919396651121e-06, "loss": 0.4768, "step": 9232 }, { "epoch": 0.72, "grad_norm": 1.0909862477478638, "learning_rate": 3.932921820298091e-06, "loss": 0.4682, "step": 9233 }, { "epoch": 0.72, "grad_norm": 1.1714377541215373, "learning_rate": 3.930924626991008e-06, "loss": 0.5307, "step": 9234 }, { "epoch": 0.72, "grad_norm": 1.0746758060867339, "learning_rate": 3.9289278168559665e-06, "loss": 0.4797, "step": 9235 }, { "epoch": 0.72, "grad_norm": 1.224787356935798, "learning_rate": 3.926931390019036e-06, "loss": 0.524, "step": 9236 }, { "epoch": 0.72, "grad_norm": 1.2695473537890891, "learning_rate": 3.9249353466062575e-06, "loss": 0.479, "step": 9237 }, { "epoch": 0.72, "grad_norm": 1.0773588581976077, "learning_rate": 3.922939686743655e-06, "loss": 0.4738, "step": 9238 }, { "epoch": 0.72, "grad_norm": 1.1997496085399793, "learning_rate": 3.920944410557222e-06, "loss": 0.489, "step": 9239 }, { "epoch": 0.72, "grad_norm": 1.3874839713055724, "learning_rate": 3.918949518172936e-06, "loss": 0.5511, "step": 9240 }, { "epoch": 0.72, "grad_norm": 1.2583054234186295, "learning_rate": 3.916955009716741e-06, "loss": 0.5154, "step": 9241 }, { "epoch": 0.72, "grad_norm": 1.2227689063482987, "learning_rate": 3.914960885314557e-06, "loss": 0.5448, "step": 9242 }, { "epoch": 0.72, "grad_norm": 1.16173659043508, "learning_rate": 3.912967145092288e-06, "loss": 0.4664, "step": 9243 }, { "epoch": 0.72, "grad_norm": 1.1343483871827158, "learning_rate": 3.910973789175807e-06, "loss": 0.4785, "step": 9244 }, { "epoch": 0.72, "grad_norm": 1.2999481190819453, "learning_rate": 3.908980817690966e-06, "loss": 0.5031, "step": 9245 }, { "epoch": 0.72, "grad_norm": 1.166828990043494, "learning_rate": 3.906988230763592e-06, "loss": 0.5191, "step": 9246 }, { "epoch": 0.72, "grad_norm": 1.2320738964646103, "learning_rate": 3.90499602851949e-06, "loss": 0.5247, "step": 9247 }, { "epoch": 0.72, "grad_norm": 1.0787309243116863, "learning_rate": 3.903004211084431e-06, "loss": 0.4801, "step": 9248 }, { "epoch": 0.72, "grad_norm": 1.2106633029721567, "learning_rate": 3.901012778584172e-06, "loss": 0.5072, "step": 9249 }, { "epoch": 0.72, "grad_norm": 1.07588100483352, "learning_rate": 3.8990217311444475e-06, "loss": 0.4942, "step": 9250 }, { "epoch": 0.72, "grad_norm": 1.087377677264296, "learning_rate": 3.897031068890954e-06, "loss": 0.4642, "step": 9251 }, { "epoch": 0.72, "grad_norm": 1.2158418829662845, "learning_rate": 3.89504079194938e-06, "loss": 0.5453, "step": 9252 }, { "epoch": 0.72, "grad_norm": 1.2622034423304875, "learning_rate": 3.893050900445375e-06, "loss": 0.5472, "step": 9253 }, { "epoch": 0.72, "grad_norm": 1.170307178765335, "learning_rate": 3.891061394504575e-06, "loss": 0.4958, "step": 9254 }, { "epoch": 0.72, "grad_norm": 1.2740703952480115, "learning_rate": 3.889072274252586e-06, "loss": 0.5372, "step": 9255 }, { "epoch": 0.72, "grad_norm": 1.0767860390907344, "learning_rate": 3.887083539814993e-06, "loss": 0.462, "step": 9256 }, { "epoch": 0.72, "grad_norm": 1.1228507492638438, "learning_rate": 3.885095191317357e-06, "loss": 0.5027, "step": 9257 }, { "epoch": 0.72, "grad_norm": 1.275574558425168, "learning_rate": 3.883107228885209e-06, "loss": 0.5603, "step": 9258 }, { "epoch": 0.72, "grad_norm": 1.2504615885109909, "learning_rate": 3.881119652644059e-06, "loss": 0.5159, "step": 9259 }, { "epoch": 0.72, "grad_norm": 1.2307179032223476, "learning_rate": 3.8791324627193996e-06, "loss": 0.5248, "step": 9260 }, { "epoch": 0.72, "grad_norm": 1.2152022050641655, "learning_rate": 3.877145659236682e-06, "loss": 0.5123, "step": 9261 }, { "epoch": 0.72, "grad_norm": 1.0877914520480494, "learning_rate": 3.875159242321353e-06, "loss": 0.4731, "step": 9262 }, { "epoch": 0.72, "grad_norm": 1.258201728448149, "learning_rate": 3.873173212098818e-06, "loss": 0.5634, "step": 9263 }, { "epoch": 0.72, "grad_norm": 1.1698818295460138, "learning_rate": 3.871187568694468e-06, "loss": 0.5068, "step": 9264 }, { "epoch": 0.72, "grad_norm": 1.157321330507798, "learning_rate": 3.869202312233668e-06, "loss": 0.4622, "step": 9265 }, { "epoch": 0.72, "grad_norm": 1.1319227210042582, "learning_rate": 3.8672174428417555e-06, "loss": 0.4614, "step": 9266 }, { "epoch": 0.72, "grad_norm": 1.148809100385579, "learning_rate": 3.865232960644051e-06, "loss": 0.4694, "step": 9267 }, { "epoch": 0.72, "grad_norm": 1.1638486524944238, "learning_rate": 3.8632488657658375e-06, "loss": 0.5079, "step": 9268 }, { "epoch": 0.72, "grad_norm": 1.2258697789002149, "learning_rate": 3.861265158332383e-06, "loss": 0.508, "step": 9269 }, { "epoch": 0.72, "grad_norm": 1.2231359546231242, "learning_rate": 3.859281838468937e-06, "loss": 0.494, "step": 9270 }, { "epoch": 0.72, "grad_norm": 1.0851564411468009, "learning_rate": 3.857298906300705e-06, "loss": 0.5212, "step": 9271 }, { "epoch": 0.72, "grad_norm": 1.1181771756531849, "learning_rate": 3.85531636195289e-06, "loss": 0.4744, "step": 9272 }, { "epoch": 0.72, "grad_norm": 1.174684995672424, "learning_rate": 3.8533342055506505e-06, "loss": 0.5124, "step": 9273 }, { "epoch": 0.72, "grad_norm": 1.2072320172393856, "learning_rate": 3.851352437219137e-06, "loss": 0.5216, "step": 9274 }, { "epoch": 0.72, "grad_norm": 1.2233539577134949, "learning_rate": 3.849371057083465e-06, "loss": 0.4894, "step": 9275 }, { "epoch": 0.72, "grad_norm": 1.0967186558255064, "learning_rate": 3.8473900652687336e-06, "loss": 0.4987, "step": 9276 }, { "epoch": 0.72, "grad_norm": 1.1696979900748865, "learning_rate": 3.845409461900012e-06, "loss": 0.523, "step": 9277 }, { "epoch": 0.72, "grad_norm": 1.2535209658025441, "learning_rate": 3.843429247102343e-06, "loss": 0.5261, "step": 9278 }, { "epoch": 0.72, "grad_norm": 1.2239009383792616, "learning_rate": 3.841449421000748e-06, "loss": 0.5455, "step": 9279 }, { "epoch": 0.72, "grad_norm": 1.2086710731908556, "learning_rate": 3.839469983720229e-06, "loss": 0.5539, "step": 9280 }, { "epoch": 0.72, "grad_norm": 1.2962919556201942, "learning_rate": 3.837490935385751e-06, "loss": 0.521, "step": 9281 }, { "epoch": 0.72, "grad_norm": 1.156273609642032, "learning_rate": 3.835512276122267e-06, "loss": 0.4906, "step": 9282 }, { "epoch": 0.72, "grad_norm": 1.156572348535469, "learning_rate": 3.833534006054694e-06, "loss": 0.5028, "step": 9283 }, { "epoch": 0.72, "grad_norm": 1.282507442207382, "learning_rate": 3.8315561253079344e-06, "loss": 0.5582, "step": 9284 }, { "epoch": 0.72, "grad_norm": 1.1559097717375064, "learning_rate": 3.829578634006862e-06, "loss": 0.4893, "step": 9285 }, { "epoch": 0.72, "grad_norm": 1.3159027042105154, "learning_rate": 3.827601532276325e-06, "loss": 0.5633, "step": 9286 }, { "epoch": 0.72, "grad_norm": 1.0551041521847013, "learning_rate": 3.825624820241153e-06, "loss": 0.471, "step": 9287 }, { "epoch": 0.72, "grad_norm": 1.2604334281038085, "learning_rate": 3.823648498026138e-06, "loss": 0.5181, "step": 9288 }, { "epoch": 0.72, "grad_norm": 1.0864020801487757, "learning_rate": 3.821672565756058e-06, "loss": 0.4641, "step": 9289 }, { "epoch": 0.72, "grad_norm": 1.2304104745779894, "learning_rate": 3.81969702355567e-06, "loss": 0.5294, "step": 9290 }, { "epoch": 0.72, "grad_norm": 1.1659333728808234, "learning_rate": 3.8177218715496915e-06, "loss": 0.5302, "step": 9291 }, { "epoch": 0.72, "grad_norm": 1.2711768181215732, "learning_rate": 3.8157471098628295e-06, "loss": 0.5153, "step": 9292 }, { "epoch": 0.72, "grad_norm": 1.210046895986327, "learning_rate": 3.8137727386197564e-06, "loss": 0.4897, "step": 9293 }, { "epoch": 0.72, "grad_norm": 1.110144496052831, "learning_rate": 3.8117987579451275e-06, "loss": 0.5018, "step": 9294 }, { "epoch": 0.72, "grad_norm": 1.1120689885879511, "learning_rate": 3.809825167963569e-06, "loss": 0.4739, "step": 9295 }, { "epoch": 0.72, "grad_norm": 1.2287830249776177, "learning_rate": 3.807851968799685e-06, "loss": 0.5394, "step": 9296 }, { "epoch": 0.72, "grad_norm": 1.255823019162969, "learning_rate": 3.8058791605780577e-06, "loss": 0.5251, "step": 9297 }, { "epoch": 0.72, "grad_norm": 1.2025825962242997, "learning_rate": 3.8039067434232324e-06, "loss": 0.5203, "step": 9298 }, { "epoch": 0.72, "grad_norm": 1.182422625084933, "learning_rate": 3.8019347174597454e-06, "loss": 0.5131, "step": 9299 }, { "epoch": 0.72, "grad_norm": 1.1508865194348052, "learning_rate": 3.7999630828120947e-06, "loss": 0.4717, "step": 9300 }, { "epoch": 0.72, "grad_norm": 1.2077238694374473, "learning_rate": 3.797991839604762e-06, "loss": 0.4728, "step": 9301 }, { "epoch": 0.72, "grad_norm": 1.1981971191287568, "learning_rate": 3.7960209879622025e-06, "loss": 0.5582, "step": 9302 }, { "epoch": 0.72, "grad_norm": 1.2085117285131313, "learning_rate": 3.79405052800885e-06, "loss": 0.5187, "step": 9303 }, { "epoch": 0.72, "grad_norm": 1.2688084819074847, "learning_rate": 3.792080459869103e-06, "loss": 0.5462, "step": 9304 }, { "epoch": 0.72, "grad_norm": 1.157197615506893, "learning_rate": 3.7901107836673444e-06, "loss": 0.4585, "step": 9305 }, { "epoch": 0.72, "grad_norm": 1.3535944047380466, "learning_rate": 3.788141499527932e-06, "loss": 0.5848, "step": 9306 }, { "epoch": 0.72, "grad_norm": 1.2523252793994752, "learning_rate": 3.7861726075751948e-06, "loss": 0.5391, "step": 9307 }, { "epoch": 0.72, "grad_norm": 1.130220644601081, "learning_rate": 3.7842041079334446e-06, "loss": 0.5103, "step": 9308 }, { "epoch": 0.72, "grad_norm": 1.1395874007092592, "learning_rate": 3.7822360007269564e-06, "loss": 0.4914, "step": 9309 }, { "epoch": 0.72, "grad_norm": 1.225183720338488, "learning_rate": 3.780268286079988e-06, "loss": 0.5236, "step": 9310 }, { "epoch": 0.72, "grad_norm": 1.2772257339134647, "learning_rate": 3.77830096411677e-06, "loss": 0.5128, "step": 9311 }, { "epoch": 0.72, "grad_norm": 1.1542111540849496, "learning_rate": 3.776334034961513e-06, "loss": 0.4983, "step": 9312 }, { "epoch": 0.72, "grad_norm": 1.269094350904054, "learning_rate": 3.7743674987384017e-06, "loss": 0.5637, "step": 9313 }, { "epoch": 0.72, "grad_norm": 1.1873309366443683, "learning_rate": 3.7724013555715867e-06, "loss": 0.4959, "step": 9314 }, { "epoch": 0.72, "grad_norm": 1.1918699769112768, "learning_rate": 3.7704356055852043e-06, "loss": 0.4748, "step": 9315 }, { "epoch": 0.72, "grad_norm": 1.225826018022242, "learning_rate": 3.768470248903362e-06, "loss": 0.549, "step": 9316 }, { "epoch": 0.72, "grad_norm": 1.2046000863309725, "learning_rate": 3.766505285650144e-06, "loss": 0.5535, "step": 9317 }, { "epoch": 0.72, "grad_norm": 1.3875861235687084, "learning_rate": 3.7645407159496104e-06, "loss": 0.4658, "step": 9318 }, { "epoch": 0.72, "grad_norm": 1.1329643936925746, "learning_rate": 3.762576539925793e-06, "loss": 0.4972, "step": 9319 }, { "epoch": 0.72, "grad_norm": 1.1519205641777288, "learning_rate": 3.7606127577026965e-06, "loss": 0.4955, "step": 9320 }, { "epoch": 0.72, "grad_norm": 1.2569953206809021, "learning_rate": 3.7586493694043068e-06, "loss": 0.5415, "step": 9321 }, { "epoch": 0.72, "grad_norm": 1.1525568878134829, "learning_rate": 3.7566863751545833e-06, "loss": 0.5547, "step": 9322 }, { "epoch": 0.72, "grad_norm": 1.26928328219844, "learning_rate": 3.7547237750774647e-06, "loss": 0.6111, "step": 9323 }, { "epoch": 0.72, "grad_norm": 1.2869445046862316, "learning_rate": 3.7527615692968513e-06, "loss": 0.5336, "step": 9324 }, { "epoch": 0.72, "grad_norm": 1.2728515868503767, "learning_rate": 3.7507997579366317e-06, "loss": 0.5836, "step": 9325 }, { "epoch": 0.72, "grad_norm": 1.2223000296959026, "learning_rate": 3.7488383411206654e-06, "loss": 0.4893, "step": 9326 }, { "epoch": 0.72, "grad_norm": 1.1165511446358587, "learning_rate": 3.7468773189727857e-06, "loss": 0.4623, "step": 9327 }, { "epoch": 0.72, "grad_norm": 1.2386484173370365, "learning_rate": 3.744916691616807e-06, "loss": 0.551, "step": 9328 }, { "epoch": 0.72, "grad_norm": 1.1764325844011927, "learning_rate": 3.742956459176508e-06, "loss": 0.5148, "step": 9329 }, { "epoch": 0.72, "grad_norm": 1.2103421624387882, "learning_rate": 3.7409966217756477e-06, "loss": 0.4758, "step": 9330 }, { "epoch": 0.72, "grad_norm": 1.1721515583496773, "learning_rate": 3.739037179537962e-06, "loss": 0.5186, "step": 9331 }, { "epoch": 0.72, "grad_norm": 1.2036209075252815, "learning_rate": 3.737078132587163e-06, "loss": 0.4936, "step": 9332 }, { "epoch": 0.72, "grad_norm": 1.2664356225929865, "learning_rate": 3.735119481046936e-06, "loss": 0.5838, "step": 9333 }, { "epoch": 0.72, "grad_norm": 1.1488216043265833, "learning_rate": 3.7331612250409354e-06, "loss": 0.5309, "step": 9334 }, { "epoch": 0.72, "grad_norm": 1.2473215971686313, "learning_rate": 3.7312033646928004e-06, "loss": 0.5032, "step": 9335 }, { "epoch": 0.72, "grad_norm": 1.1328500675680737, "learning_rate": 3.7292459001261383e-06, "loss": 0.5307, "step": 9336 }, { "epoch": 0.72, "grad_norm": 1.1376216488021684, "learning_rate": 3.7272888314645363e-06, "loss": 0.4674, "step": 9337 }, { "epoch": 0.72, "grad_norm": 1.0640978859102415, "learning_rate": 3.725332158831556e-06, "loss": 0.4742, "step": 9338 }, { "epoch": 0.72, "grad_norm": 1.1563694350958473, "learning_rate": 3.7233758823507303e-06, "loss": 0.5684, "step": 9339 }, { "epoch": 0.72, "grad_norm": 1.2073625523468285, "learning_rate": 3.7214200021455647e-06, "loss": 0.5513, "step": 9340 }, { "epoch": 0.72, "grad_norm": 1.0869069334826291, "learning_rate": 3.719464518339547e-06, "loss": 0.4856, "step": 9341 }, { "epoch": 0.72, "grad_norm": 1.206105936173928, "learning_rate": 3.7175094310561375e-06, "loss": 0.5413, "step": 9342 }, { "epoch": 0.72, "grad_norm": 1.1111378699630299, "learning_rate": 3.7155547404187754e-06, "loss": 0.498, "step": 9343 }, { "epoch": 0.72, "grad_norm": 1.177880273388775, "learning_rate": 3.7136004465508624e-06, "loss": 0.5172, "step": 9344 }, { "epoch": 0.72, "grad_norm": 1.2648978322502944, "learning_rate": 3.711646549575786e-06, "loss": 0.5064, "step": 9345 }, { "epoch": 0.73, "grad_norm": 1.184756623638608, "learning_rate": 3.709693049616907e-06, "loss": 0.546, "step": 9346 }, { "epoch": 0.73, "grad_norm": 1.1899467659110678, "learning_rate": 3.7077399467975594e-06, "loss": 0.5459, "step": 9347 }, { "epoch": 0.73, "grad_norm": 1.0921243302791952, "learning_rate": 3.7057872412410566e-06, "loss": 0.5071, "step": 9348 }, { "epoch": 0.73, "grad_norm": 1.1971435484638682, "learning_rate": 3.703834933070679e-06, "loss": 0.5243, "step": 9349 }, { "epoch": 0.73, "grad_norm": 1.1763220775707472, "learning_rate": 3.7018830224096824e-06, "loss": 0.5178, "step": 9350 }, { "epoch": 0.73, "grad_norm": 1.102764582988765, "learning_rate": 3.6999315093813048e-06, "loss": 0.475, "step": 9351 }, { "epoch": 0.73, "grad_norm": 1.2368553929560284, "learning_rate": 3.6979803941087546e-06, "loss": 0.5676, "step": 9352 }, { "epoch": 0.73, "grad_norm": 1.1846536353887762, "learning_rate": 3.696029676715216e-06, "loss": 0.5264, "step": 9353 }, { "epoch": 0.73, "grad_norm": 1.159076072220522, "learning_rate": 3.694079357323853e-06, "loss": 0.5438, "step": 9354 }, { "epoch": 0.73, "grad_norm": 1.1696867794164079, "learning_rate": 3.69212943605779e-06, "loss": 0.5435, "step": 9355 }, { "epoch": 0.73, "grad_norm": 1.3939657052937475, "learning_rate": 3.6901799130401393e-06, "loss": 0.5959, "step": 9356 }, { "epoch": 0.73, "grad_norm": 1.1136429048675198, "learning_rate": 3.688230788393986e-06, "loss": 0.5107, "step": 9357 }, { "epoch": 0.73, "grad_norm": 1.1645852265771934, "learning_rate": 3.6862820622423913e-06, "loss": 0.4796, "step": 9358 }, { "epoch": 0.73, "grad_norm": 1.2764804267413663, "learning_rate": 3.684333734708384e-06, "loss": 0.5308, "step": 9359 }, { "epoch": 0.73, "grad_norm": 1.157024845618142, "learning_rate": 3.68238580591497e-06, "loss": 0.4817, "step": 9360 }, { "epoch": 0.73, "grad_norm": 1.194352506209174, "learning_rate": 3.680438275985133e-06, "loss": 0.536, "step": 9361 }, { "epoch": 0.73, "grad_norm": 1.2729384959961452, "learning_rate": 3.6784911450418337e-06, "loss": 0.5242, "step": 9362 }, { "epoch": 0.73, "grad_norm": 1.2697319928428525, "learning_rate": 3.676544413208002e-06, "loss": 0.5193, "step": 9363 }, { "epoch": 0.73, "grad_norm": 1.1208878703598866, "learning_rate": 3.6745980806065507e-06, "loss": 0.5217, "step": 9364 }, { "epoch": 0.73, "grad_norm": 1.2064323544404632, "learning_rate": 3.6726521473603525e-06, "loss": 0.5193, "step": 9365 }, { "epoch": 0.73, "grad_norm": 1.1178337308868647, "learning_rate": 3.670706613592271e-06, "loss": 0.4714, "step": 9366 }, { "epoch": 0.73, "grad_norm": 1.2075579336733664, "learning_rate": 3.6687614794251348e-06, "loss": 0.4973, "step": 9367 }, { "epoch": 0.73, "grad_norm": 1.2252114989204077, "learning_rate": 3.6668167449817548e-06, "loss": 0.5328, "step": 9368 }, { "epoch": 0.73, "grad_norm": 1.0726376636781376, "learning_rate": 3.6648724103849086e-06, "loss": 0.5084, "step": 9369 }, { "epoch": 0.73, "grad_norm": 1.2401961193041842, "learning_rate": 3.662928475757348e-06, "loss": 0.54, "step": 9370 }, { "epoch": 0.73, "grad_norm": 1.196323989046907, "learning_rate": 3.6609849412218092e-06, "loss": 0.5113, "step": 9371 }, { "epoch": 0.73, "grad_norm": 1.068363894046801, "learning_rate": 3.6590418069009947e-06, "loss": 0.5007, "step": 9372 }, { "epoch": 0.73, "grad_norm": 1.1908152130453231, "learning_rate": 3.657099072917587e-06, "loss": 0.4856, "step": 9373 }, { "epoch": 0.73, "grad_norm": 1.080447418377442, "learning_rate": 3.6551567393942422e-06, "loss": 0.5003, "step": 9374 }, { "epoch": 0.73, "grad_norm": 1.3184209964620741, "learning_rate": 3.6532148064535855e-06, "loss": 0.5416, "step": 9375 }, { "epoch": 0.73, "grad_norm": 1.1106599699835906, "learning_rate": 3.6512732742182223e-06, "loss": 0.4943, "step": 9376 }, { "epoch": 0.73, "grad_norm": 1.2009596722491962, "learning_rate": 3.649332142810732e-06, "loss": 0.5597, "step": 9377 }, { "epoch": 0.73, "grad_norm": 1.1328043970278625, "learning_rate": 3.6473914123536725e-06, "loss": 0.4907, "step": 9378 }, { "epoch": 0.73, "grad_norm": 1.1424279269100082, "learning_rate": 3.645451082969569e-06, "loss": 0.4784, "step": 9379 }, { "epoch": 0.73, "grad_norm": 1.282054323015519, "learning_rate": 3.64351115478092e-06, "loss": 0.5729, "step": 9380 }, { "epoch": 0.73, "grad_norm": 1.1911864030959958, "learning_rate": 3.6415716279102065e-06, "loss": 0.4772, "step": 9381 }, { "epoch": 0.73, "grad_norm": 1.2475869729832347, "learning_rate": 3.6396325024798817e-06, "loss": 0.5345, "step": 9382 }, { "epoch": 0.73, "grad_norm": 1.1954470789327094, "learning_rate": 3.6376937786123722e-06, "loss": 0.4915, "step": 9383 }, { "epoch": 0.73, "grad_norm": 1.1322935823192142, "learning_rate": 3.6357554564300824e-06, "loss": 0.4622, "step": 9384 }, { "epoch": 0.73, "grad_norm": 1.1771800696308359, "learning_rate": 3.6338175360553827e-06, "loss": 0.59, "step": 9385 }, { "epoch": 0.73, "grad_norm": 1.1472558929996926, "learning_rate": 3.6318800176106283e-06, "loss": 0.524, "step": 9386 }, { "epoch": 0.73, "grad_norm": 1.2529121808013093, "learning_rate": 3.629942901218142e-06, "loss": 0.5223, "step": 9387 }, { "epoch": 0.73, "grad_norm": 1.1593039107827388, "learning_rate": 3.6280061870002303e-06, "loss": 0.5269, "step": 9388 }, { "epoch": 0.73, "grad_norm": 1.1656866328331474, "learning_rate": 3.6260698750791624e-06, "loss": 0.4952, "step": 9389 }, { "epoch": 0.73, "grad_norm": 1.239932045920407, "learning_rate": 3.6241339655771844e-06, "loss": 0.5095, "step": 9390 }, { "epoch": 0.73, "grad_norm": 1.131180264763077, "learning_rate": 3.6221984586165247e-06, "loss": 0.4894, "step": 9391 }, { "epoch": 0.73, "grad_norm": 1.1253704944466592, "learning_rate": 3.620263354319382e-06, "loss": 0.5021, "step": 9392 }, { "epoch": 0.73, "grad_norm": 1.2463284932080216, "learning_rate": 3.6183286528079287e-06, "loss": 0.5387, "step": 9393 }, { "epoch": 0.73, "grad_norm": 1.1575249524650533, "learning_rate": 3.6163943542043156e-06, "loss": 0.464, "step": 9394 }, { "epoch": 0.73, "grad_norm": 1.2383408396729982, "learning_rate": 3.61446045863066e-06, "loss": 0.5171, "step": 9395 }, { "epoch": 0.73, "grad_norm": 1.238486769314854, "learning_rate": 3.612526966209059e-06, "loss": 0.529, "step": 9396 }, { "epoch": 0.73, "grad_norm": 1.2254701840879783, "learning_rate": 3.6105938770615902e-06, "loss": 0.5012, "step": 9397 }, { "epoch": 0.73, "grad_norm": 1.1532174112150761, "learning_rate": 3.608661191310291e-06, "loss": 0.4944, "step": 9398 }, { "epoch": 0.73, "grad_norm": 1.2398912811302558, "learning_rate": 3.6067289090771883e-06, "loss": 0.5186, "step": 9399 }, { "epoch": 0.73, "grad_norm": 1.2043771544409003, "learning_rate": 3.6047970304842727e-06, "loss": 0.5218, "step": 9400 }, { "epoch": 0.73, "grad_norm": 1.207110405459133, "learning_rate": 3.6028655556535142e-06, "loss": 0.4893, "step": 9401 }, { "epoch": 0.73, "grad_norm": 1.232905756876433, "learning_rate": 3.600934484706858e-06, "loss": 0.506, "step": 9402 }, { "epoch": 0.73, "grad_norm": 1.203818579569143, "learning_rate": 3.5990038177662234e-06, "loss": 0.5199, "step": 9403 }, { "epoch": 0.73, "grad_norm": 1.1479331225942453, "learning_rate": 3.5970735549535065e-06, "loss": 0.4979, "step": 9404 }, { "epoch": 0.73, "grad_norm": 2.278866293278656, "learning_rate": 3.5951436963905663e-06, "loss": 0.5025, "step": 9405 }, { "epoch": 0.73, "grad_norm": 1.2012205036523376, "learning_rate": 3.59321424219925e-06, "loss": 0.5143, "step": 9406 }, { "epoch": 0.73, "grad_norm": 1.2108788260734868, "learning_rate": 3.591285192501376e-06, "loss": 0.5444, "step": 9407 }, { "epoch": 0.73, "grad_norm": 1.1544723764603242, "learning_rate": 3.58935654741873e-06, "loss": 0.4352, "step": 9408 }, { "epoch": 0.73, "grad_norm": 1.148496509515144, "learning_rate": 3.5874283070730787e-06, "loss": 0.4986, "step": 9409 }, { "epoch": 0.73, "grad_norm": 1.1626005272827842, "learning_rate": 3.585500471586166e-06, "loss": 0.4759, "step": 9410 }, { "epoch": 0.73, "grad_norm": 1.2613769635207994, "learning_rate": 3.583573041079701e-06, "loss": 0.5354, "step": 9411 }, { "epoch": 0.73, "grad_norm": 1.2848791038105478, "learning_rate": 3.581646015675374e-06, "loss": 0.5735, "step": 9412 }, { "epoch": 0.73, "grad_norm": 1.2028704782746054, "learning_rate": 3.579719395494847e-06, "loss": 0.5234, "step": 9413 }, { "epoch": 0.73, "grad_norm": 1.266301056942487, "learning_rate": 3.577793180659761e-06, "loss": 0.5607, "step": 9414 }, { "epoch": 0.73, "grad_norm": 1.194764504072646, "learning_rate": 3.575867371291728e-06, "loss": 0.5197, "step": 9415 }, { "epoch": 0.73, "grad_norm": 1.2372307879532023, "learning_rate": 3.5739419675123275e-06, "loss": 0.5199, "step": 9416 }, { "epoch": 0.73, "grad_norm": 1.2479295750089943, "learning_rate": 3.5720169694431294e-06, "loss": 0.5336, "step": 9417 }, { "epoch": 0.73, "grad_norm": 1.1309637835173993, "learning_rate": 3.5700923772056606e-06, "loss": 0.5064, "step": 9418 }, { "epoch": 0.73, "grad_norm": 1.1952385973056323, "learning_rate": 3.5681681909214338e-06, "loss": 0.5145, "step": 9419 }, { "epoch": 0.73, "grad_norm": 1.1421187039594531, "learning_rate": 3.5662444107119365e-06, "loss": 0.5244, "step": 9420 }, { "epoch": 0.73, "grad_norm": 1.2065019650749584, "learning_rate": 3.5643210366986205e-06, "loss": 0.4596, "step": 9421 }, { "epoch": 0.73, "grad_norm": 1.217841616804464, "learning_rate": 3.5623980690029202e-06, "loss": 0.5398, "step": 9422 }, { "epoch": 0.73, "grad_norm": 1.185905238983062, "learning_rate": 3.560475507746244e-06, "loss": 0.4767, "step": 9423 }, { "epoch": 0.73, "grad_norm": 1.2575858723497246, "learning_rate": 3.5585533530499726e-06, "loss": 0.585, "step": 9424 }, { "epoch": 0.73, "grad_norm": 1.1313473928267659, "learning_rate": 3.556631605035464e-06, "loss": 0.4843, "step": 9425 }, { "epoch": 0.73, "grad_norm": 1.1414290429149618, "learning_rate": 3.554710263824043e-06, "loss": 0.5394, "step": 9426 }, { "epoch": 0.73, "grad_norm": 1.2171740490778613, "learning_rate": 3.5527893295370196e-06, "loss": 0.5175, "step": 9427 }, { "epoch": 0.73, "grad_norm": 1.1237341328689883, "learning_rate": 3.550868802295666e-06, "loss": 0.4835, "step": 9428 }, { "epoch": 0.73, "grad_norm": 1.2128348468234456, "learning_rate": 3.548948682221238e-06, "loss": 0.5069, "step": 9429 }, { "epoch": 0.73, "grad_norm": 1.1712848957770694, "learning_rate": 3.547028969434966e-06, "loss": 0.5175, "step": 9430 }, { "epoch": 0.73, "grad_norm": 1.1700183147235923, "learning_rate": 3.545109664058044e-06, "loss": 0.4947, "step": 9431 }, { "epoch": 0.73, "grad_norm": 1.1560020696463091, "learning_rate": 3.5431907662116528e-06, "loss": 0.4864, "step": 9432 }, { "epoch": 0.73, "grad_norm": 1.1559592732082835, "learning_rate": 3.5412722760169403e-06, "loss": 0.4897, "step": 9433 }, { "epoch": 0.73, "grad_norm": 1.2422307804652986, "learning_rate": 3.5393541935950327e-06, "loss": 0.4787, "step": 9434 }, { "epoch": 0.73, "grad_norm": 1.1150600744751, "learning_rate": 3.53743651906703e-06, "loss": 0.4936, "step": 9435 }, { "epoch": 0.73, "grad_norm": 1.2099919226668443, "learning_rate": 3.5355192525539996e-06, "loss": 0.5095, "step": 9436 }, { "epoch": 0.73, "grad_norm": 1.1187619938553792, "learning_rate": 3.5336023941769947e-06, "loss": 0.4575, "step": 9437 }, { "epoch": 0.73, "grad_norm": 1.186138376304257, "learning_rate": 3.5316859440570284e-06, "loss": 0.5227, "step": 9438 }, { "epoch": 0.73, "grad_norm": 1.2782591359979427, "learning_rate": 3.5297699023151013e-06, "loss": 0.5308, "step": 9439 }, { "epoch": 0.73, "grad_norm": 1.1651090487413547, "learning_rate": 3.527854269072186e-06, "loss": 0.5115, "step": 9440 }, { "epoch": 0.73, "grad_norm": 1.3675091174447633, "learning_rate": 3.525939044449218e-06, "loss": 0.5431, "step": 9441 }, { "epoch": 0.73, "grad_norm": 1.346361284464671, "learning_rate": 3.524024228567121e-06, "loss": 0.5604, "step": 9442 }, { "epoch": 0.73, "grad_norm": 1.3062517759320174, "learning_rate": 3.5221098215467852e-06, "loss": 0.561, "step": 9443 }, { "epoch": 0.73, "grad_norm": 1.0930372368035426, "learning_rate": 3.520195823509078e-06, "loss": 0.4577, "step": 9444 }, { "epoch": 0.73, "grad_norm": 1.2473986259965422, "learning_rate": 3.518282234574845e-06, "loss": 0.5526, "step": 9445 }, { "epoch": 0.73, "grad_norm": 1.1495726288844041, "learning_rate": 3.5163690548648898e-06, "loss": 0.4712, "step": 9446 }, { "epoch": 0.73, "grad_norm": 1.2018343455500333, "learning_rate": 3.514456284500012e-06, "loss": 0.5224, "step": 9447 }, { "epoch": 0.73, "grad_norm": 1.2390626674041059, "learning_rate": 3.5125439236009674e-06, "loss": 0.4882, "step": 9448 }, { "epoch": 0.73, "grad_norm": 1.2288681034053315, "learning_rate": 3.510631972288494e-06, "loss": 0.5326, "step": 9449 }, { "epoch": 0.73, "grad_norm": 1.220903059798556, "learning_rate": 3.508720430683309e-06, "loss": 0.5283, "step": 9450 }, { "epoch": 0.73, "grad_norm": 1.289878263421879, "learning_rate": 3.5068092989060907e-06, "loss": 0.5671, "step": 9451 }, { "epoch": 0.73, "grad_norm": 1.2561371349287989, "learning_rate": 3.504898577077502e-06, "loss": 0.5717, "step": 9452 }, { "epoch": 0.73, "grad_norm": 1.1130103065993753, "learning_rate": 3.502988265318176e-06, "loss": 0.5147, "step": 9453 }, { "epoch": 0.73, "grad_norm": 1.1680463682174476, "learning_rate": 3.501078363748721e-06, "loss": 0.5658, "step": 9454 }, { "epoch": 0.73, "grad_norm": 1.1959511042076074, "learning_rate": 3.4991688724897223e-06, "loss": 0.5251, "step": 9455 }, { "epoch": 0.73, "grad_norm": 1.2124657616254628, "learning_rate": 3.49725979166173e-06, "loss": 0.5339, "step": 9456 }, { "epoch": 0.73, "grad_norm": 1.2430410271807268, "learning_rate": 3.4953511213852785e-06, "loss": 0.5359, "step": 9457 }, { "epoch": 0.73, "grad_norm": 1.2150701085024065, "learning_rate": 3.493442861780868e-06, "loss": 0.5163, "step": 9458 }, { "epoch": 0.73, "grad_norm": 1.179041755224835, "learning_rate": 3.4915350129689798e-06, "loss": 0.5008, "step": 9459 }, { "epoch": 0.73, "grad_norm": 1.2507684253080418, "learning_rate": 3.489627575070068e-06, "loss": 0.5181, "step": 9460 }, { "epoch": 0.73, "grad_norm": 1.153933860123063, "learning_rate": 3.487720548204553e-06, "loss": 0.4959, "step": 9461 }, { "epoch": 0.73, "grad_norm": 1.1792388972523369, "learning_rate": 3.4858139324928388e-06, "loss": 0.5336, "step": 9462 }, { "epoch": 0.73, "grad_norm": 1.2206944335628083, "learning_rate": 3.4839077280553e-06, "loss": 0.5073, "step": 9463 }, { "epoch": 0.73, "grad_norm": 1.0341639531048983, "learning_rate": 3.4820019350122847e-06, "loss": 0.462, "step": 9464 }, { "epoch": 0.73, "grad_norm": 1.1788765349286616, "learning_rate": 3.4800965534841158e-06, "loss": 0.538, "step": 9465 }, { "epoch": 0.73, "grad_norm": 1.1516457722963354, "learning_rate": 3.4781915835910927e-06, "loss": 0.4807, "step": 9466 }, { "epoch": 0.73, "grad_norm": 1.1233892565729153, "learning_rate": 3.476287025453484e-06, "loss": 0.5268, "step": 9467 }, { "epoch": 0.73, "grad_norm": 1.2700723287306177, "learning_rate": 3.474382879191529e-06, "loss": 0.5311, "step": 9468 }, { "epoch": 0.73, "grad_norm": 1.0271704349893325, "learning_rate": 3.472479144925451e-06, "loss": 0.4595, "step": 9469 }, { "epoch": 0.73, "grad_norm": 1.3595503770670667, "learning_rate": 3.4705758227754426e-06, "loss": 0.5599, "step": 9470 }, { "epoch": 0.73, "grad_norm": 1.125043974122935, "learning_rate": 3.4686729128616726e-06, "loss": 0.5038, "step": 9471 }, { "epoch": 0.73, "grad_norm": 1.0696727204924688, "learning_rate": 3.4667704153042758e-06, "loss": 0.4675, "step": 9472 }, { "epoch": 0.73, "grad_norm": 1.1897369697532185, "learning_rate": 3.46486833022337e-06, "loss": 0.5165, "step": 9473 }, { "epoch": 0.73, "grad_norm": 1.167925728608404, "learning_rate": 3.462966657739042e-06, "loss": 0.5232, "step": 9474 }, { "epoch": 0.74, "grad_norm": 1.187137699071726, "learning_rate": 3.461065397971357e-06, "loss": 0.4585, "step": 9475 }, { "epoch": 0.74, "grad_norm": 1.2564532594742266, "learning_rate": 3.4591645510403528e-06, "loss": 0.6012, "step": 9476 }, { "epoch": 0.74, "grad_norm": 1.213434954141269, "learning_rate": 3.457264117066037e-06, "loss": 0.4945, "step": 9477 }, { "epoch": 0.74, "grad_norm": 1.0629648986683833, "learning_rate": 3.45536409616839e-06, "loss": 0.4273, "step": 9478 }, { "epoch": 0.74, "grad_norm": 1.2954013218813911, "learning_rate": 3.453464488467373e-06, "loss": 0.5355, "step": 9479 }, { "epoch": 0.74, "grad_norm": 1.1615639825277282, "learning_rate": 3.4515652940829192e-06, "loss": 0.452, "step": 9480 }, { "epoch": 0.74, "grad_norm": 1.1676074333507098, "learning_rate": 3.449666513134937e-06, "loss": 0.4498, "step": 9481 }, { "epoch": 0.74, "grad_norm": 1.1809245312098, "learning_rate": 3.4477681457433e-06, "loss": 0.5256, "step": 9482 }, { "epoch": 0.74, "grad_norm": 1.2467951698848505, "learning_rate": 3.4458701920278646e-06, "loss": 0.5342, "step": 9483 }, { "epoch": 0.74, "grad_norm": 1.1513420276950452, "learning_rate": 3.4439726521084595e-06, "loss": 0.5352, "step": 9484 }, { "epoch": 0.74, "grad_norm": 1.2094466846127767, "learning_rate": 3.4420755261048843e-06, "loss": 0.4968, "step": 9485 }, { "epoch": 0.74, "grad_norm": 1.2738476894154822, "learning_rate": 3.4401788141369196e-06, "loss": 0.5073, "step": 9486 }, { "epoch": 0.74, "grad_norm": 1.28192293126791, "learning_rate": 3.4382825163243106e-06, "loss": 0.5609, "step": 9487 }, { "epoch": 0.74, "grad_norm": 1.0975383545901471, "learning_rate": 3.4363866327867768e-06, "loss": 0.5158, "step": 9488 }, { "epoch": 0.74, "grad_norm": 1.360212221697563, "learning_rate": 3.434491163644019e-06, "loss": 0.5538, "step": 9489 }, { "epoch": 0.74, "grad_norm": 1.0967503946937076, "learning_rate": 3.432596109015708e-06, "loss": 0.5207, "step": 9490 }, { "epoch": 0.74, "grad_norm": 1.2211018879937154, "learning_rate": 3.430701469021491e-06, "loss": 0.5025, "step": 9491 }, { "epoch": 0.74, "grad_norm": 1.1892160263902438, "learning_rate": 3.4288072437809794e-06, "loss": 0.498, "step": 9492 }, { "epoch": 0.74, "grad_norm": 1.2119828664966084, "learning_rate": 3.4269134334137698e-06, "loss": 0.5662, "step": 9493 }, { "epoch": 0.74, "grad_norm": 1.2544732163794503, "learning_rate": 3.4250200380394284e-06, "loss": 0.5173, "step": 9494 }, { "epoch": 0.74, "grad_norm": 1.2514662725862404, "learning_rate": 3.4231270577774976e-06, "loss": 0.5239, "step": 9495 }, { "epoch": 0.74, "grad_norm": 1.1575979160064678, "learning_rate": 3.421234492747484e-06, "loss": 0.5165, "step": 9496 }, { "epoch": 0.74, "grad_norm": 1.305615078243322, "learning_rate": 3.419342343068882e-06, "loss": 0.5665, "step": 9497 }, { "epoch": 0.74, "grad_norm": 1.2087566796071885, "learning_rate": 3.417450608861147e-06, "loss": 0.4663, "step": 9498 }, { "epoch": 0.74, "grad_norm": 1.1930274814020905, "learning_rate": 3.4155592902437162e-06, "loss": 0.5326, "step": 9499 }, { "epoch": 0.74, "grad_norm": 1.2406117743644756, "learning_rate": 3.4136683873359987e-06, "loss": 0.5467, "step": 9500 }, { "epoch": 0.74, "grad_norm": 1.2319065473553306, "learning_rate": 3.4117779002573803e-06, "loss": 0.4889, "step": 9501 }, { "epoch": 0.74, "grad_norm": 1.2642127269600925, "learning_rate": 3.40988782912721e-06, "loss": 0.5367, "step": 9502 }, { "epoch": 0.74, "grad_norm": 1.2914795842611744, "learning_rate": 3.4079981740648215e-06, "loss": 0.5291, "step": 9503 }, { "epoch": 0.74, "grad_norm": 1.2091311364153738, "learning_rate": 3.406108935189519e-06, "loss": 0.5409, "step": 9504 }, { "epoch": 0.74, "grad_norm": 1.199552160381181, "learning_rate": 3.404220112620583e-06, "loss": 0.446, "step": 9505 }, { "epoch": 0.74, "grad_norm": 1.2489497541059247, "learning_rate": 3.402331706477258e-06, "loss": 0.5278, "step": 9506 }, { "epoch": 0.74, "grad_norm": 1.151139124316079, "learning_rate": 3.400443716878774e-06, "loss": 0.4736, "step": 9507 }, { "epoch": 0.74, "grad_norm": 1.2149511455399715, "learning_rate": 3.398556143944325e-06, "loss": 0.5619, "step": 9508 }, { "epoch": 0.74, "grad_norm": 1.2118997503844922, "learning_rate": 3.3966689877930857e-06, "loss": 0.5061, "step": 9509 }, { "epoch": 0.74, "grad_norm": 1.1959842464819288, "learning_rate": 3.394782248544202e-06, "loss": 0.5532, "step": 9510 }, { "epoch": 0.74, "grad_norm": 1.2025265877494704, "learning_rate": 3.392895926316795e-06, "loss": 0.5236, "step": 9511 }, { "epoch": 0.74, "grad_norm": 1.0676761653365265, "learning_rate": 3.3910100212299547e-06, "loss": 0.4885, "step": 9512 }, { "epoch": 0.74, "grad_norm": 1.189237227365878, "learning_rate": 3.3891245334027487e-06, "loss": 0.5269, "step": 9513 }, { "epoch": 0.74, "grad_norm": 1.3206982472331639, "learning_rate": 3.387239462954219e-06, "loss": 0.5313, "step": 9514 }, { "epoch": 0.74, "grad_norm": 1.2133309613892842, "learning_rate": 3.385354810003383e-06, "loss": 0.4849, "step": 9515 }, { "epoch": 0.74, "grad_norm": 1.2180806302503446, "learning_rate": 3.383470574669222e-06, "loss": 0.5528, "step": 9516 }, { "epoch": 0.74, "grad_norm": 1.217348221829534, "learning_rate": 3.3815867570707028e-06, "loss": 0.4838, "step": 9517 }, { "epoch": 0.74, "grad_norm": 1.3387243638491388, "learning_rate": 3.3797033573267546e-06, "loss": 0.5698, "step": 9518 }, { "epoch": 0.74, "grad_norm": 1.2024565982718625, "learning_rate": 3.377820375556291e-06, "loss": 0.5233, "step": 9519 }, { "epoch": 0.74, "grad_norm": 1.2012118201088033, "learning_rate": 3.3759378118781917e-06, "loss": 0.5467, "step": 9520 }, { "epoch": 0.74, "grad_norm": 1.265468587747006, "learning_rate": 3.3740556664113145e-06, "loss": 0.5555, "step": 9521 }, { "epoch": 0.74, "grad_norm": 1.1315449430277773, "learning_rate": 3.372173939274492e-06, "loss": 0.5347, "step": 9522 }, { "epoch": 0.74, "grad_norm": 1.2533361737408477, "learning_rate": 3.3702926305865202e-06, "loss": 0.5456, "step": 9523 }, { "epoch": 0.74, "grad_norm": 1.1186749886027787, "learning_rate": 3.36841174046618e-06, "loss": 0.4308, "step": 9524 }, { "epoch": 0.74, "grad_norm": 1.1082871166909192, "learning_rate": 3.3665312690322238e-06, "loss": 0.4934, "step": 9525 }, { "epoch": 0.74, "grad_norm": 1.2138847171809726, "learning_rate": 3.3646512164033696e-06, "loss": 0.5159, "step": 9526 }, { "epoch": 0.74, "grad_norm": 1.1476236690625292, "learning_rate": 3.362771582698321e-06, "loss": 0.4966, "step": 9527 }, { "epoch": 0.74, "grad_norm": 1.263644941889011, "learning_rate": 3.3608923680357432e-06, "loss": 0.5217, "step": 9528 }, { "epoch": 0.74, "grad_norm": 1.1583409348016596, "learning_rate": 3.359013572534283e-06, "loss": 0.5223, "step": 9529 }, { "epoch": 0.74, "grad_norm": 1.1760657083617911, "learning_rate": 3.3571351963125596e-06, "loss": 0.5018, "step": 9530 }, { "epoch": 0.74, "grad_norm": 1.207479005003192, "learning_rate": 3.3552572394891635e-06, "loss": 0.4748, "step": 9531 }, { "epoch": 0.74, "grad_norm": 1.155805244480373, "learning_rate": 3.353379702182664e-06, "loss": 0.4969, "step": 9532 }, { "epoch": 0.74, "grad_norm": 1.1666799726181558, "learning_rate": 3.3515025845115923e-06, "loss": 0.4845, "step": 9533 }, { "epoch": 0.74, "grad_norm": 1.2978001707076032, "learning_rate": 3.349625886594464e-06, "loss": 0.51, "step": 9534 }, { "epoch": 0.74, "grad_norm": 1.2741658754681031, "learning_rate": 3.3477496085497685e-06, "loss": 0.5663, "step": 9535 }, { "epoch": 0.74, "grad_norm": 1.2649210632332923, "learning_rate": 3.345873750495957e-06, "loss": 0.5349, "step": 9536 }, { "epoch": 0.74, "grad_norm": 1.2388800487906175, "learning_rate": 3.343998312551471e-06, "loss": 0.5229, "step": 9537 }, { "epoch": 0.74, "grad_norm": 1.1441923297153567, "learning_rate": 3.3421232948347084e-06, "loss": 0.5154, "step": 9538 }, { "epoch": 0.74, "grad_norm": 1.0550261907354799, "learning_rate": 3.3402486974640522e-06, "loss": 0.4497, "step": 9539 }, { "epoch": 0.74, "grad_norm": 1.1793390732121094, "learning_rate": 3.3383745205578555e-06, "loss": 0.534, "step": 9540 }, { "epoch": 0.74, "grad_norm": 1.2234965596370864, "learning_rate": 3.3365007642344447e-06, "loss": 0.5412, "step": 9541 }, { "epoch": 0.74, "grad_norm": 1.2635890455973273, "learning_rate": 3.334627428612124e-06, "loss": 0.4643, "step": 9542 }, { "epoch": 0.74, "grad_norm": 1.2149688067572466, "learning_rate": 3.3327545138091576e-06, "loss": 0.5446, "step": 9543 }, { "epoch": 0.74, "grad_norm": 1.1730049788913282, "learning_rate": 3.3308820199437987e-06, "loss": 0.5009, "step": 9544 }, { "epoch": 0.74, "grad_norm": 1.2804875314858442, "learning_rate": 3.3290099471342687e-06, "loss": 0.5835, "step": 9545 }, { "epoch": 0.74, "grad_norm": 1.2764840222145815, "learning_rate": 3.3271382954987554e-06, "loss": 0.5901, "step": 9546 }, { "epoch": 0.74, "grad_norm": 1.1999281484232625, "learning_rate": 3.325267065155431e-06, "loss": 0.5276, "step": 9547 }, { "epoch": 0.74, "grad_norm": 1.1760959647623928, "learning_rate": 3.323396256222432e-06, "loss": 0.5212, "step": 9548 }, { "epoch": 0.74, "grad_norm": 1.1815716179488593, "learning_rate": 3.321525868817873e-06, "loss": 0.5038, "step": 9549 }, { "epoch": 0.74, "grad_norm": 1.3645097067191698, "learning_rate": 3.319655903059843e-06, "loss": 0.5188, "step": 9550 }, { "epoch": 0.74, "grad_norm": 1.2459354599055037, "learning_rate": 3.3177863590664027e-06, "loss": 0.5191, "step": 9551 }, { "epoch": 0.74, "grad_norm": 1.3007752931017413, "learning_rate": 3.315917236955587e-06, "loss": 0.5706, "step": 9552 }, { "epoch": 0.74, "grad_norm": 1.2057969779382451, "learning_rate": 3.314048536845399e-06, "loss": 0.537, "step": 9553 }, { "epoch": 0.74, "grad_norm": 1.2124738729749147, "learning_rate": 3.312180258853822e-06, "loss": 0.5345, "step": 9554 }, { "epoch": 0.74, "grad_norm": 1.0849620362421304, "learning_rate": 3.3103124030988133e-06, "loss": 0.4574, "step": 9555 }, { "epoch": 0.74, "grad_norm": 1.1479840064446605, "learning_rate": 3.308444969698292e-06, "loss": 0.5157, "step": 9556 }, { "epoch": 0.74, "grad_norm": 1.1782256409741256, "learning_rate": 3.3065779587701686e-06, "loss": 0.5117, "step": 9557 }, { "epoch": 0.74, "grad_norm": 1.195709461792309, "learning_rate": 3.3047113704323085e-06, "loss": 0.4774, "step": 9558 }, { "epoch": 0.74, "grad_norm": 1.176182725869622, "learning_rate": 3.302845204802563e-06, "loss": 0.5136, "step": 9559 }, { "epoch": 0.74, "grad_norm": 1.3085574927929533, "learning_rate": 3.300979461998751e-06, "loss": 0.5196, "step": 9560 }, { "epoch": 0.74, "grad_norm": 1.2593984615895066, "learning_rate": 3.2991141421386696e-06, "loss": 0.5173, "step": 9561 }, { "epoch": 0.74, "grad_norm": 1.2253522794571168, "learning_rate": 3.2972492453400873e-06, "loss": 0.5662, "step": 9562 }, { "epoch": 0.74, "grad_norm": 1.2340296491557334, "learning_rate": 3.2953847717207375e-06, "loss": 0.5348, "step": 9563 }, { "epoch": 0.74, "grad_norm": 1.1331738125975577, "learning_rate": 3.29352072139834e-06, "loss": 0.5102, "step": 9564 }, { "epoch": 0.74, "grad_norm": 1.257181281696201, "learning_rate": 3.291657094490582e-06, "loss": 0.5326, "step": 9565 }, { "epoch": 0.74, "grad_norm": 1.2155917891253631, "learning_rate": 3.2897938911151196e-06, "loss": 0.5237, "step": 9566 }, { "epoch": 0.74, "grad_norm": 1.195306316683864, "learning_rate": 3.287931111389593e-06, "loss": 0.5074, "step": 9567 }, { "epoch": 0.74, "grad_norm": 1.289391400542176, "learning_rate": 3.2860687554316006e-06, "loss": 0.5756, "step": 9568 }, { "epoch": 0.74, "grad_norm": 1.1225045926652168, "learning_rate": 3.2842068233587275e-06, "loss": 0.4464, "step": 9569 }, { "epoch": 0.74, "grad_norm": 1.0891565049701193, "learning_rate": 3.2823453152885266e-06, "loss": 0.4572, "step": 9570 }, { "epoch": 0.74, "grad_norm": 1.2095725457558757, "learning_rate": 3.280484231338524e-06, "loss": 0.5182, "step": 9571 }, { "epoch": 0.74, "grad_norm": 1.1536845020619162, "learning_rate": 3.2786235716262236e-06, "loss": 0.5242, "step": 9572 }, { "epoch": 0.74, "grad_norm": 1.2496519080910367, "learning_rate": 3.276763336269092e-06, "loss": 0.507, "step": 9573 }, { "epoch": 0.74, "grad_norm": 1.1976518863566135, "learning_rate": 3.2749035253845773e-06, "loss": 0.5128, "step": 9574 }, { "epoch": 0.74, "grad_norm": 1.3234836935113166, "learning_rate": 3.273044139090105e-06, "loss": 0.5381, "step": 9575 }, { "epoch": 0.74, "grad_norm": 1.2696282921895148, "learning_rate": 3.271185177503058e-06, "loss": 0.5333, "step": 9576 }, { "epoch": 0.74, "grad_norm": 1.1917294423479057, "learning_rate": 3.2693266407408064e-06, "loss": 0.4782, "step": 9577 }, { "epoch": 0.74, "grad_norm": 1.1496572439821997, "learning_rate": 3.267468528920694e-06, "loss": 0.5007, "step": 9578 }, { "epoch": 0.74, "grad_norm": 1.0720940680225253, "learning_rate": 3.265610842160025e-06, "loss": 0.4496, "step": 9579 }, { "epoch": 0.74, "grad_norm": 1.2297235563583229, "learning_rate": 3.263753580576089e-06, "loss": 0.5162, "step": 9580 }, { "epoch": 0.74, "grad_norm": 1.224461450723111, "learning_rate": 3.261896744286144e-06, "loss": 0.498, "step": 9581 }, { "epoch": 0.74, "grad_norm": 1.1557453703922704, "learning_rate": 3.2600403334074205e-06, "loss": 0.5217, "step": 9582 }, { "epoch": 0.74, "grad_norm": 1.1602727144397882, "learning_rate": 3.2581843480571285e-06, "loss": 0.5032, "step": 9583 }, { "epoch": 0.74, "grad_norm": 1.2238037769319816, "learning_rate": 3.2563287883524376e-06, "loss": 0.5503, "step": 9584 }, { "epoch": 0.74, "grad_norm": 1.1477382373543588, "learning_rate": 3.254473654410507e-06, "loss": 0.5313, "step": 9585 }, { "epoch": 0.74, "grad_norm": 1.227598882293441, "learning_rate": 3.2526189463484536e-06, "loss": 0.5105, "step": 9586 }, { "epoch": 0.74, "grad_norm": 1.2740430737703992, "learning_rate": 3.250764664283378e-06, "loss": 0.5357, "step": 9587 }, { "epoch": 0.74, "grad_norm": 1.1937142451658767, "learning_rate": 3.248910808332354e-06, "loss": 0.468, "step": 9588 }, { "epoch": 0.74, "grad_norm": 1.2481086250037112, "learning_rate": 3.2470573786124184e-06, "loss": 0.511, "step": 9589 }, { "epoch": 0.74, "grad_norm": 1.140881914442385, "learning_rate": 3.2452043752405914e-06, "loss": 0.4756, "step": 9590 }, { "epoch": 0.74, "grad_norm": 1.1246766049428927, "learning_rate": 3.2433517983338627e-06, "loss": 0.4674, "step": 9591 }, { "epoch": 0.74, "grad_norm": 1.1718132511400805, "learning_rate": 3.241499648009193e-06, "loss": 0.5062, "step": 9592 }, { "epoch": 0.74, "grad_norm": 1.2260687253305598, "learning_rate": 3.2396479243835243e-06, "loss": 0.4923, "step": 9593 }, { "epoch": 0.74, "grad_norm": 1.3285709754295572, "learning_rate": 3.237796627573757e-06, "loss": 0.5492, "step": 9594 }, { "epoch": 0.74, "grad_norm": 1.2990738026868491, "learning_rate": 3.2359457576967812e-06, "loss": 0.5298, "step": 9595 }, { "epoch": 0.74, "grad_norm": 1.1824105772922286, "learning_rate": 3.2340953148694444e-06, "loss": 0.4678, "step": 9596 }, { "epoch": 0.74, "grad_norm": 1.1391189247253153, "learning_rate": 3.2322452992085775e-06, "loss": 0.4816, "step": 9597 }, { "epoch": 0.74, "grad_norm": 1.1966098402037928, "learning_rate": 3.2303957108309846e-06, "loss": 0.4823, "step": 9598 }, { "epoch": 0.74, "grad_norm": 1.1605868583989596, "learning_rate": 3.2285465498534343e-06, "loss": 0.5056, "step": 9599 }, { "epoch": 0.74, "grad_norm": 1.1648864901995553, "learning_rate": 3.2266978163926765e-06, "loss": 0.4967, "step": 9600 }, { "epoch": 0.74, "grad_norm": 1.053097975296423, "learning_rate": 3.2248495105654308e-06, "loss": 0.4267, "step": 9601 }, { "epoch": 0.74, "grad_norm": 1.269275299093629, "learning_rate": 3.2230016324883906e-06, "loss": 0.5859, "step": 9602 }, { "epoch": 0.74, "grad_norm": 1.260502326303444, "learning_rate": 3.2211541822782255e-06, "loss": 0.5236, "step": 9603 }, { "epoch": 0.75, "grad_norm": 1.2566710795992027, "learning_rate": 3.2193071600515678e-06, "loss": 0.5346, "step": 9604 }, { "epoch": 0.75, "grad_norm": 1.1875216833443867, "learning_rate": 3.2174605659250367e-06, "loss": 0.5626, "step": 9605 }, { "epoch": 0.75, "grad_norm": 1.1233260628684765, "learning_rate": 3.21561440001521e-06, "loss": 0.4808, "step": 9606 }, { "epoch": 0.75, "grad_norm": 1.2106343535936837, "learning_rate": 3.213768662438649e-06, "loss": 0.5286, "step": 9607 }, { "epoch": 0.75, "grad_norm": 1.2317500149645764, "learning_rate": 3.2119233533118864e-06, "loss": 0.5129, "step": 9608 }, { "epoch": 0.75, "grad_norm": 1.273805857548278, "learning_rate": 3.2100784727514235e-06, "loss": 0.5209, "step": 9609 }, { "epoch": 0.75, "grad_norm": 1.2752639216859851, "learning_rate": 3.2082340208737363e-06, "loss": 0.4705, "step": 9610 }, { "epoch": 0.75, "grad_norm": 1.2152096114644306, "learning_rate": 3.206389997795277e-06, "loss": 0.5098, "step": 9611 }, { "epoch": 0.75, "grad_norm": 1.139154557640684, "learning_rate": 3.204546403632468e-06, "loss": 0.4852, "step": 9612 }, { "epoch": 0.75, "grad_norm": 1.2513026126009654, "learning_rate": 3.2027032385017065e-06, "loss": 0.5046, "step": 9613 }, { "epoch": 0.75, "grad_norm": 1.2896581949537642, "learning_rate": 3.2008605025193564e-06, "loss": 0.5802, "step": 9614 }, { "epoch": 0.75, "grad_norm": 1.2939197926433892, "learning_rate": 3.199018195801765e-06, "loss": 0.5221, "step": 9615 }, { "epoch": 0.75, "grad_norm": 1.300807230921914, "learning_rate": 3.1971763184652404e-06, "loss": 0.527, "step": 9616 }, { "epoch": 0.75, "grad_norm": 1.2733365352367334, "learning_rate": 3.1953348706260723e-06, "loss": 0.5559, "step": 9617 }, { "epoch": 0.75, "grad_norm": 1.218033066202077, "learning_rate": 3.1934938524005243e-06, "loss": 0.5444, "step": 9618 }, { "epoch": 0.75, "grad_norm": 1.2230639280764783, "learning_rate": 3.1916532639048237e-06, "loss": 0.527, "step": 9619 }, { "epoch": 0.75, "grad_norm": 1.244658214273411, "learning_rate": 3.1898131052551784e-06, "loss": 0.5449, "step": 9620 }, { "epoch": 0.75, "grad_norm": 1.2124241716722137, "learning_rate": 3.187973376567769e-06, "loss": 0.4891, "step": 9621 }, { "epoch": 0.75, "grad_norm": 1.2611029099287843, "learning_rate": 3.1861340779587444e-06, "loss": 0.5272, "step": 9622 }, { "epoch": 0.75, "grad_norm": 1.234839919213569, "learning_rate": 3.1842952095442335e-06, "loss": 0.5283, "step": 9623 }, { "epoch": 0.75, "grad_norm": 1.155577180325893, "learning_rate": 3.182456771440329e-06, "loss": 0.5187, "step": 9624 }, { "epoch": 0.75, "grad_norm": 1.1982110974693967, "learning_rate": 3.1806187637631035e-06, "loss": 0.4872, "step": 9625 }, { "epoch": 0.75, "grad_norm": 1.3175504336032817, "learning_rate": 3.178781186628597e-06, "loss": 0.5188, "step": 9626 }, { "epoch": 0.75, "grad_norm": 1.132336957365549, "learning_rate": 3.176944040152826e-06, "loss": 0.4636, "step": 9627 }, { "epoch": 0.75, "grad_norm": 1.1718656921017068, "learning_rate": 3.1751073244517817e-06, "loss": 0.4784, "step": 9628 }, { "epoch": 0.75, "grad_norm": 1.2166551534987615, "learning_rate": 3.1732710396414257e-06, "loss": 0.5338, "step": 9629 }, { "epoch": 0.75, "grad_norm": 1.220696923812374, "learning_rate": 3.1714351858376867e-06, "loss": 0.498, "step": 9630 }, { "epoch": 0.75, "grad_norm": 1.2440496917502224, "learning_rate": 3.1695997631564756e-06, "loss": 0.5264, "step": 9631 }, { "epoch": 0.75, "grad_norm": 1.2735555395170235, "learning_rate": 3.167764771713673e-06, "loss": 0.5668, "step": 9632 }, { "epoch": 0.75, "grad_norm": 1.2009856785310227, "learning_rate": 3.165930211625131e-06, "loss": 0.4957, "step": 9633 }, { "epoch": 0.75, "grad_norm": 1.1969525922120872, "learning_rate": 3.1640960830066723e-06, "loss": 0.5027, "step": 9634 }, { "epoch": 0.75, "grad_norm": 1.141580573314053, "learning_rate": 3.1622623859740998e-06, "loss": 0.4756, "step": 9635 }, { "epoch": 0.75, "grad_norm": 1.2348034272144184, "learning_rate": 3.160429120643177e-06, "loss": 0.5102, "step": 9636 }, { "epoch": 0.75, "grad_norm": 1.1220948537827005, "learning_rate": 3.1585962871296514e-06, "loss": 0.5148, "step": 9637 }, { "epoch": 0.75, "grad_norm": 1.1861523460012184, "learning_rate": 3.15676388554924e-06, "loss": 0.5513, "step": 9638 }, { "epoch": 0.75, "grad_norm": 1.1555888889214319, "learning_rate": 3.154931916017633e-06, "loss": 0.5081, "step": 9639 }, { "epoch": 0.75, "grad_norm": 1.2182523249262076, "learning_rate": 3.1531003786504877e-06, "loss": 0.5487, "step": 9640 }, { "epoch": 0.75, "grad_norm": 1.1503821795271751, "learning_rate": 3.151269273563441e-06, "loss": 0.4865, "step": 9641 }, { "epoch": 0.75, "grad_norm": 1.1729633617874444, "learning_rate": 3.149438600872099e-06, "loss": 0.5045, "step": 9642 }, { "epoch": 0.75, "grad_norm": 1.0840679514515028, "learning_rate": 3.147608360692046e-06, "loss": 0.4947, "step": 9643 }, { "epoch": 0.75, "grad_norm": 1.1614280947639186, "learning_rate": 3.1457785531388275e-06, "loss": 0.5187, "step": 9644 }, { "epoch": 0.75, "grad_norm": 1.1710159713639752, "learning_rate": 3.1439491783279754e-06, "loss": 0.5399, "step": 9645 }, { "epoch": 0.75, "grad_norm": 1.2505672121589768, "learning_rate": 3.1421202363749816e-06, "loss": 0.5327, "step": 9646 }, { "epoch": 0.75, "grad_norm": 1.210515496338738, "learning_rate": 3.1402917273953183e-06, "loss": 0.554, "step": 9647 }, { "epoch": 0.75, "grad_norm": 1.2018282453832836, "learning_rate": 3.13846365150443e-06, "loss": 0.5085, "step": 9648 }, { "epoch": 0.75, "grad_norm": 1.1599178390345357, "learning_rate": 3.136636008817736e-06, "loss": 0.5033, "step": 9649 }, { "epoch": 0.75, "grad_norm": 1.1208079967447666, "learning_rate": 3.134808799450617e-06, "loss": 0.4635, "step": 9650 }, { "epoch": 0.75, "grad_norm": 1.1668448255383752, "learning_rate": 3.1329820235184392e-06, "loss": 0.5017, "step": 9651 }, { "epoch": 0.75, "grad_norm": 1.1973444304426357, "learning_rate": 3.131155681136535e-06, "loss": 0.5014, "step": 9652 }, { "epoch": 0.75, "grad_norm": 1.2872798268471883, "learning_rate": 3.129329772420214e-06, "loss": 0.573, "step": 9653 }, { "epoch": 0.75, "grad_norm": 1.1669371268718376, "learning_rate": 3.12750429748475e-06, "loss": 0.4799, "step": 9654 }, { "epoch": 0.75, "grad_norm": 1.1191617154835367, "learning_rate": 3.1256792564453997e-06, "loss": 0.4965, "step": 9655 }, { "epoch": 0.75, "grad_norm": 1.1682877631195296, "learning_rate": 3.1238546494173814e-06, "loss": 0.4773, "step": 9656 }, { "epoch": 0.75, "grad_norm": 1.2555377364417983, "learning_rate": 3.122030476515896e-06, "loss": 0.5172, "step": 9657 }, { "epoch": 0.75, "grad_norm": 1.2261297835471494, "learning_rate": 3.120206737856112e-06, "loss": 0.4972, "step": 9658 }, { "epoch": 0.75, "grad_norm": 1.2282230774832554, "learning_rate": 3.1183834335531748e-06, "loss": 0.4974, "step": 9659 }, { "epoch": 0.75, "grad_norm": 1.2060289387908014, "learning_rate": 3.116560563722193e-06, "loss": 0.4786, "step": 9660 }, { "epoch": 0.75, "grad_norm": 1.1947679463531546, "learning_rate": 3.1147381284782562e-06, "loss": 0.545, "step": 9661 }, { "epoch": 0.75, "grad_norm": 1.4195915583058847, "learning_rate": 3.112916127936425e-06, "loss": 0.5533, "step": 9662 }, { "epoch": 0.75, "grad_norm": 1.1560191877779025, "learning_rate": 3.111094562211735e-06, "loss": 0.4798, "step": 9663 }, { "epoch": 0.75, "grad_norm": 1.1619429781738808, "learning_rate": 3.109273431419183e-06, "loss": 0.4808, "step": 9664 }, { "epoch": 0.75, "grad_norm": 1.1951582566018901, "learning_rate": 3.107452735673755e-06, "loss": 0.5223, "step": 9665 }, { "epoch": 0.75, "grad_norm": 1.32950197532596, "learning_rate": 3.1056324750903934e-06, "loss": 0.5584, "step": 9666 }, { "epoch": 0.75, "grad_norm": 1.2145058017716204, "learning_rate": 3.1038126497840225e-06, "loss": 0.5298, "step": 9667 }, { "epoch": 0.75, "grad_norm": 1.1613504961298915, "learning_rate": 3.1019932598695402e-06, "loss": 0.4781, "step": 9668 }, { "epoch": 0.75, "grad_norm": 1.2345857561035964, "learning_rate": 3.100174305461815e-06, "loss": 0.4983, "step": 9669 }, { "epoch": 0.75, "grad_norm": 1.2016430930701383, "learning_rate": 3.0983557866756818e-06, "loss": 0.5158, "step": 9670 }, { "epoch": 0.75, "grad_norm": 1.1028421963427857, "learning_rate": 3.096537703625955e-06, "loss": 0.5089, "step": 9671 }, { "epoch": 0.75, "grad_norm": 1.2161646098852747, "learning_rate": 3.0947200564274206e-06, "loss": 0.4954, "step": 9672 }, { "epoch": 0.75, "grad_norm": 1.351248864624224, "learning_rate": 3.092902845194837e-06, "loss": 0.5527, "step": 9673 }, { "epoch": 0.75, "grad_norm": 1.076700790226702, "learning_rate": 3.0910860700429315e-06, "loss": 0.4599, "step": 9674 }, { "epoch": 0.75, "grad_norm": 1.1896682820272197, "learning_rate": 3.0892697310864107e-06, "loss": 0.4777, "step": 9675 }, { "epoch": 0.75, "grad_norm": 1.1848758514172655, "learning_rate": 3.0874538284399424e-06, "loss": 0.531, "step": 9676 }, { "epoch": 0.75, "grad_norm": 1.0745385820927151, "learning_rate": 3.0856383622181785e-06, "loss": 0.5203, "step": 9677 }, { "epoch": 0.75, "grad_norm": 1.1340142551313297, "learning_rate": 3.083823332535738e-06, "loss": 0.5117, "step": 9678 }, { "epoch": 0.75, "grad_norm": 1.2985881382932598, "learning_rate": 3.0820087395072172e-06, "loss": 0.5805, "step": 9679 }, { "epoch": 0.75, "grad_norm": 1.2501920552532362, "learning_rate": 3.0801945832471736e-06, "loss": 0.5804, "step": 9680 }, { "epoch": 0.75, "grad_norm": 1.2163620075272463, "learning_rate": 3.078380863870146e-06, "loss": 0.4998, "step": 9681 }, { "epoch": 0.75, "grad_norm": 1.114146595652614, "learning_rate": 3.076567581490647e-06, "loss": 0.4944, "step": 9682 }, { "epoch": 0.75, "grad_norm": 1.2460728945526496, "learning_rate": 3.0747547362231588e-06, "loss": 0.5611, "step": 9683 }, { "epoch": 0.75, "grad_norm": 1.2827749249139446, "learning_rate": 3.072942328182131e-06, "loss": 0.54, "step": 9684 }, { "epoch": 0.75, "grad_norm": 1.2000758683539983, "learning_rate": 3.071130357481996e-06, "loss": 0.5028, "step": 9685 }, { "epoch": 0.75, "grad_norm": 1.2700625672284762, "learning_rate": 3.0693188242371464e-06, "loss": 0.5593, "step": 9686 }, { "epoch": 0.75, "grad_norm": 1.2973007973873976, "learning_rate": 3.067507728561958e-06, "loss": 0.4982, "step": 9687 }, { "epoch": 0.75, "grad_norm": 1.0810890364595844, "learning_rate": 3.065697070570772e-06, "loss": 0.4986, "step": 9688 }, { "epoch": 0.75, "grad_norm": 1.2541923315436387, "learning_rate": 3.0638868503779075e-06, "loss": 0.5283, "step": 9689 }, { "epoch": 0.75, "grad_norm": 1.2070078924690613, "learning_rate": 3.0620770680976554e-06, "loss": 0.536, "step": 9690 }, { "epoch": 0.75, "grad_norm": 1.1986306702506266, "learning_rate": 3.060267723844269e-06, "loss": 0.4594, "step": 9691 }, { "epoch": 0.75, "grad_norm": 1.2195843506028903, "learning_rate": 3.058458817731985e-06, "loss": 0.5094, "step": 9692 }, { "epoch": 0.75, "grad_norm": 1.2256512506719115, "learning_rate": 3.0566503498750135e-06, "loss": 0.5288, "step": 9693 }, { "epoch": 0.75, "grad_norm": 1.198845373423006, "learning_rate": 3.0548423203875245e-06, "loss": 0.4933, "step": 9694 }, { "epoch": 0.75, "grad_norm": 1.224672063412419, "learning_rate": 3.0530347293836758e-06, "loss": 0.4683, "step": 9695 }, { "epoch": 0.75, "grad_norm": 1.269420020292581, "learning_rate": 3.0512275769775834e-06, "loss": 0.5721, "step": 9696 }, { "epoch": 0.75, "grad_norm": 1.1948019196331745, "learning_rate": 3.0494208632833443e-06, "loss": 0.5035, "step": 9697 }, { "epoch": 0.75, "grad_norm": 1.156843548578526, "learning_rate": 3.0476145884150265e-06, "loss": 0.4974, "step": 9698 }, { "epoch": 0.75, "grad_norm": 1.2688441368470538, "learning_rate": 3.0458087524866697e-06, "loss": 0.5456, "step": 9699 }, { "epoch": 0.75, "grad_norm": 1.1903322976434654, "learning_rate": 3.0440033556122883e-06, "loss": 0.5462, "step": 9700 }, { "epoch": 0.75, "grad_norm": 1.0663380583594542, "learning_rate": 3.04219839790586e-06, "loss": 0.4863, "step": 9701 }, { "epoch": 0.75, "grad_norm": 1.020107880852447, "learning_rate": 3.0403938794813448e-06, "loss": 0.4864, "step": 9702 }, { "epoch": 0.75, "grad_norm": 1.1512526179313922, "learning_rate": 3.0385898004526725e-06, "loss": 0.4166, "step": 9703 }, { "epoch": 0.75, "grad_norm": 1.1410519049236627, "learning_rate": 3.0367861609337412e-06, "loss": 0.4846, "step": 9704 }, { "epoch": 0.75, "grad_norm": 1.1613091286017547, "learning_rate": 3.0349829610384274e-06, "loss": 0.4816, "step": 9705 }, { "epoch": 0.75, "grad_norm": 1.112162727493802, "learning_rate": 3.03318020088057e-06, "loss": 0.5055, "step": 9706 }, { "epoch": 0.75, "grad_norm": 1.1611980038141168, "learning_rate": 3.031377880573991e-06, "loss": 0.4837, "step": 9707 }, { "epoch": 0.75, "grad_norm": 1.1704282858681387, "learning_rate": 3.0295760002324804e-06, "loss": 0.5137, "step": 9708 }, { "epoch": 0.75, "grad_norm": 1.1833039486061843, "learning_rate": 3.0277745599697996e-06, "loss": 0.5305, "step": 9709 }, { "epoch": 0.75, "grad_norm": 1.1505943338181377, "learning_rate": 3.025973559899685e-06, "loss": 0.51, "step": 9710 }, { "epoch": 0.75, "grad_norm": 1.2589359360530683, "learning_rate": 3.0241730001358383e-06, "loss": 0.517, "step": 9711 }, { "epoch": 0.75, "grad_norm": 1.2993040202517023, "learning_rate": 3.0223728807919406e-06, "loss": 0.6323, "step": 9712 }, { "epoch": 0.75, "grad_norm": 1.2725565384780952, "learning_rate": 3.020573201981646e-06, "loss": 0.5138, "step": 9713 }, { "epoch": 0.75, "grad_norm": 1.2987660328643698, "learning_rate": 3.018773963818571e-06, "loss": 0.5545, "step": 9714 }, { "epoch": 0.75, "grad_norm": 1.2787221767944847, "learning_rate": 3.016975166416317e-06, "loss": 0.5264, "step": 9715 }, { "epoch": 0.75, "grad_norm": 1.2301707073318102, "learning_rate": 3.015176809888445e-06, "loss": 0.5101, "step": 9716 }, { "epoch": 0.75, "grad_norm": 1.309659381656543, "learning_rate": 3.0133788943484987e-06, "loss": 0.505, "step": 9717 }, { "epoch": 0.75, "grad_norm": 1.1884990805982245, "learning_rate": 3.011581419909988e-06, "loss": 0.5427, "step": 9718 }, { "epoch": 0.75, "grad_norm": 1.1365239871204527, "learning_rate": 3.0097843866863985e-06, "loss": 0.5214, "step": 9719 }, { "epoch": 0.75, "grad_norm": 1.1672198596232213, "learning_rate": 3.0079877947911883e-06, "loss": 0.4931, "step": 9720 }, { "epoch": 0.75, "grad_norm": 1.2158417849195762, "learning_rate": 3.0061916443377805e-06, "loss": 0.4731, "step": 9721 }, { "epoch": 0.75, "grad_norm": 1.2957583756350022, "learning_rate": 3.004395935439577e-06, "loss": 0.5218, "step": 9722 }, { "epoch": 0.75, "grad_norm": 1.2692954445968987, "learning_rate": 3.002600668209953e-06, "loss": 0.5631, "step": 9723 }, { "epoch": 0.75, "grad_norm": 1.2135708140964503, "learning_rate": 3.000805842762248e-06, "loss": 0.5186, "step": 9724 }, { "epoch": 0.75, "grad_norm": 1.1757931629477214, "learning_rate": 2.999011459209784e-06, "loss": 0.4773, "step": 9725 }, { "epoch": 0.75, "grad_norm": 1.2630064448949543, "learning_rate": 2.9972175176658448e-06, "loss": 0.5139, "step": 9726 }, { "epoch": 0.75, "grad_norm": 1.2904038357530452, "learning_rate": 2.995424018243692e-06, "loss": 0.5303, "step": 9727 }, { "epoch": 0.75, "grad_norm": 1.0653556707110878, "learning_rate": 2.9936309610565606e-06, "loss": 0.4723, "step": 9728 }, { "epoch": 0.75, "grad_norm": 1.2497233084575268, "learning_rate": 2.9918383462176547e-06, "loss": 0.4913, "step": 9729 }, { "epoch": 0.75, "grad_norm": 1.2938495874084577, "learning_rate": 2.9900461738401545e-06, "loss": 0.5679, "step": 9730 }, { "epoch": 0.75, "grad_norm": 1.2638106351015859, "learning_rate": 2.988254444037203e-06, "loss": 0.519, "step": 9731 }, { "epoch": 0.75, "grad_norm": 1.204300392979703, "learning_rate": 2.9864631569219237e-06, "loss": 0.4885, "step": 9732 }, { "epoch": 0.76, "grad_norm": 1.1726332690829093, "learning_rate": 2.984672312607414e-06, "loss": 0.4703, "step": 9733 }, { "epoch": 0.76, "grad_norm": 1.2400835561287917, "learning_rate": 2.982881911206733e-06, "loss": 0.5033, "step": 9734 }, { "epoch": 0.76, "grad_norm": 1.241585735696034, "learning_rate": 2.981091952832923e-06, "loss": 0.4944, "step": 9735 }, { "epoch": 0.76, "grad_norm": 1.2364526960099262, "learning_rate": 2.9793024375989877e-06, "loss": 0.5357, "step": 9736 }, { "epoch": 0.76, "grad_norm": 1.2138513270943172, "learning_rate": 2.9775133656179113e-06, "loss": 0.5658, "step": 9737 }, { "epoch": 0.76, "grad_norm": 1.2376171461202323, "learning_rate": 2.975724737002648e-06, "loss": 0.557, "step": 9738 }, { "epoch": 0.76, "grad_norm": 1.1327460960130638, "learning_rate": 2.973936551866121e-06, "loss": 0.4955, "step": 9739 }, { "epoch": 0.76, "grad_norm": 1.077405219368041, "learning_rate": 2.97214881032123e-06, "loss": 0.4842, "step": 9740 }, { "epoch": 0.76, "grad_norm": 1.423243440533533, "learning_rate": 2.9703615124808484e-06, "loss": 0.605, "step": 9741 }, { "epoch": 0.76, "grad_norm": 1.0908098211782857, "learning_rate": 2.9685746584578078e-06, "loss": 0.4956, "step": 9742 }, { "epoch": 0.76, "grad_norm": 1.2265318848500568, "learning_rate": 2.966788248364929e-06, "loss": 0.5175, "step": 9743 }, { "epoch": 0.76, "grad_norm": 1.1097582839996938, "learning_rate": 2.9650022823149925e-06, "loss": 0.3929, "step": 9744 }, { "epoch": 0.76, "grad_norm": 1.162524596333507, "learning_rate": 2.9632167604207586e-06, "loss": 0.4391, "step": 9745 }, { "epoch": 0.76, "grad_norm": 1.2255626904231984, "learning_rate": 2.9614316827949574e-06, "loss": 0.4882, "step": 9746 }, { "epoch": 0.76, "grad_norm": 1.1049093172138844, "learning_rate": 2.959647049550286e-06, "loss": 0.4703, "step": 9747 }, { "epoch": 0.76, "grad_norm": 1.2790026610334093, "learning_rate": 2.9578628607994187e-06, "loss": 0.521, "step": 9748 }, { "epoch": 0.76, "grad_norm": 1.2820677589922698, "learning_rate": 2.956079116655003e-06, "loss": 0.5336, "step": 9749 }, { "epoch": 0.76, "grad_norm": 1.3268876089610115, "learning_rate": 2.9542958172296533e-06, "loss": 0.5115, "step": 9750 }, { "epoch": 0.76, "grad_norm": 1.1877541521048391, "learning_rate": 2.9525129626359637e-06, "loss": 0.5069, "step": 9751 }, { "epoch": 0.76, "grad_norm": 1.2870209219239013, "learning_rate": 2.950730552986487e-06, "loss": 0.523, "step": 9752 }, { "epoch": 0.76, "grad_norm": 1.210638390793444, "learning_rate": 2.948948588393764e-06, "loss": 0.4904, "step": 9753 }, { "epoch": 0.76, "grad_norm": 1.2198304986459967, "learning_rate": 2.9471670689702927e-06, "loss": 0.5251, "step": 9754 }, { "epoch": 0.76, "grad_norm": 1.1133374116350807, "learning_rate": 2.945385994828551e-06, "loss": 0.4778, "step": 9755 }, { "epoch": 0.76, "grad_norm": 1.266781925721349, "learning_rate": 2.9436053660809914e-06, "loss": 0.5526, "step": 9756 }, { "epoch": 0.76, "grad_norm": 1.276616954225686, "learning_rate": 2.941825182840029e-06, "loss": 0.5367, "step": 9757 }, { "epoch": 0.76, "grad_norm": 1.0861925882108026, "learning_rate": 2.940045445218057e-06, "loss": 0.4439, "step": 9758 }, { "epoch": 0.76, "grad_norm": 1.223251246748148, "learning_rate": 2.9382661533274424e-06, "loss": 0.5487, "step": 9759 }, { "epoch": 0.76, "grad_norm": 1.2432689637472019, "learning_rate": 2.936487307280518e-06, "loss": 0.5079, "step": 9760 }, { "epoch": 0.76, "grad_norm": 1.341238735894706, "learning_rate": 2.9347089071895963e-06, "loss": 0.5597, "step": 9761 }, { "epoch": 0.76, "grad_norm": 1.199765804405271, "learning_rate": 2.9329309531669505e-06, "loss": 0.4765, "step": 9762 }, { "epoch": 0.76, "grad_norm": 1.2067155641234408, "learning_rate": 2.931153445324837e-06, "loss": 0.529, "step": 9763 }, { "epoch": 0.76, "grad_norm": 1.2344159952430254, "learning_rate": 2.929376383775475e-06, "loss": 0.5688, "step": 9764 }, { "epoch": 0.76, "grad_norm": 1.1055547997510957, "learning_rate": 2.927599768631061e-06, "loss": 0.5002, "step": 9765 }, { "epoch": 0.76, "grad_norm": 1.223891636539499, "learning_rate": 2.9258236000037656e-06, "loss": 0.5093, "step": 9766 }, { "epoch": 0.76, "grad_norm": 1.0899147912938831, "learning_rate": 2.9240478780057214e-06, "loss": 0.5142, "step": 9767 }, { "epoch": 0.76, "grad_norm": 1.2348902146195369, "learning_rate": 2.9222726027490413e-06, "loss": 0.53, "step": 9768 }, { "epoch": 0.76, "grad_norm": 1.276223674431669, "learning_rate": 2.9204977743458084e-06, "loss": 0.5536, "step": 9769 }, { "epoch": 0.76, "grad_norm": 1.21900057050834, "learning_rate": 2.918723392908076e-06, "loss": 0.5431, "step": 9770 }, { "epoch": 0.76, "grad_norm": 1.2101023101113764, "learning_rate": 2.916949458547874e-06, "loss": 0.5313, "step": 9771 }, { "epoch": 0.76, "grad_norm": 1.1394050030882883, "learning_rate": 2.9151759713771933e-06, "loss": 0.5097, "step": 9772 }, { "epoch": 0.76, "grad_norm": 1.2700705453848662, "learning_rate": 2.9134029315080094e-06, "loss": 0.5033, "step": 9773 }, { "epoch": 0.76, "grad_norm": 1.2294417682275165, "learning_rate": 2.911630339052257e-06, "loss": 0.5199, "step": 9774 }, { "epoch": 0.76, "grad_norm": 1.14783249054286, "learning_rate": 2.909858194121853e-06, "loss": 0.5145, "step": 9775 }, { "epoch": 0.76, "grad_norm": 1.1075335518639304, "learning_rate": 2.908086496828685e-06, "loss": 0.4875, "step": 9776 }, { "epoch": 0.76, "grad_norm": 1.1784986353766367, "learning_rate": 2.906315247284602e-06, "loss": 0.4948, "step": 9777 }, { "epoch": 0.76, "grad_norm": 1.1317728994356206, "learning_rate": 2.904544445601436e-06, "loss": 0.482, "step": 9778 }, { "epoch": 0.76, "grad_norm": 1.2665373257220667, "learning_rate": 2.9027740918909873e-06, "loss": 0.5572, "step": 9779 }, { "epoch": 0.76, "grad_norm": 1.0476162691616189, "learning_rate": 2.9010041862650273e-06, "loss": 0.4903, "step": 9780 }, { "epoch": 0.76, "grad_norm": 1.1946357357017348, "learning_rate": 2.899234728835302e-06, "loss": 0.4706, "step": 9781 }, { "epoch": 0.76, "grad_norm": 1.1590615704847174, "learning_rate": 2.8974657197135203e-06, "loss": 0.5094, "step": 9782 }, { "epoch": 0.76, "grad_norm": 1.3795955329216594, "learning_rate": 2.895697159011375e-06, "loss": 0.534, "step": 9783 }, { "epoch": 0.76, "grad_norm": 1.1630723569703318, "learning_rate": 2.893929046840518e-06, "loss": 0.4747, "step": 9784 }, { "epoch": 0.76, "grad_norm": 1.1057504355705519, "learning_rate": 2.892161383312583e-06, "loss": 0.5357, "step": 9785 }, { "epoch": 0.76, "grad_norm": 1.1968714203544453, "learning_rate": 2.8903941685391745e-06, "loss": 0.4947, "step": 9786 }, { "epoch": 0.76, "grad_norm": 1.2296795933237805, "learning_rate": 2.8886274026318593e-06, "loss": 0.5358, "step": 9787 }, { "epoch": 0.76, "grad_norm": 1.2436282841152793, "learning_rate": 2.886861085702186e-06, "loss": 0.5366, "step": 9788 }, { "epoch": 0.76, "grad_norm": 1.2671993965314863, "learning_rate": 2.885095217861672e-06, "loss": 0.5508, "step": 9789 }, { "epoch": 0.76, "grad_norm": 1.2071877782368567, "learning_rate": 2.8833297992218055e-06, "loss": 0.5654, "step": 9790 }, { "epoch": 0.76, "grad_norm": 1.2480367501912277, "learning_rate": 2.881564829894048e-06, "loss": 0.5357, "step": 9791 }, { "epoch": 0.76, "grad_norm": 1.2153232033126546, "learning_rate": 2.8798003099898297e-06, "loss": 0.4976, "step": 9792 }, { "epoch": 0.76, "grad_norm": 1.1746961078901785, "learning_rate": 2.8780362396205495e-06, "loss": 0.4913, "step": 9793 }, { "epoch": 0.76, "grad_norm": 1.0602920254125476, "learning_rate": 2.8762726188975876e-06, "loss": 0.4982, "step": 9794 }, { "epoch": 0.76, "grad_norm": 1.1111228498527537, "learning_rate": 2.874509447932288e-06, "loss": 0.4899, "step": 9795 }, { "epoch": 0.76, "grad_norm": 1.1744743510596547, "learning_rate": 2.872746726835969e-06, "loss": 0.5362, "step": 9796 }, { "epoch": 0.76, "grad_norm": 1.2808508832858856, "learning_rate": 2.870984455719924e-06, "loss": 0.5181, "step": 9797 }, { "epoch": 0.76, "grad_norm": 1.2362708971901422, "learning_rate": 2.8692226346954087e-06, "loss": 0.4979, "step": 9798 }, { "epoch": 0.76, "grad_norm": 1.2511913820878486, "learning_rate": 2.8674612638736576e-06, "loss": 0.5314, "step": 9799 }, { "epoch": 0.76, "grad_norm": 1.3125190733477405, "learning_rate": 2.865700343365877e-06, "loss": 0.56, "step": 9800 }, { "epoch": 0.76, "grad_norm": 1.2053376201279824, "learning_rate": 2.8639398732832448e-06, "loss": 0.5058, "step": 9801 }, { "epoch": 0.76, "grad_norm": 1.3062222071707097, "learning_rate": 2.862179853736905e-06, "loss": 0.5553, "step": 9802 }, { "epoch": 0.76, "grad_norm": 1.1668391043521174, "learning_rate": 2.860420284837975e-06, "loss": 0.5037, "step": 9803 }, { "epoch": 0.76, "grad_norm": 1.2755488112114801, "learning_rate": 2.858661166697547e-06, "loss": 0.5874, "step": 9804 }, { "epoch": 0.76, "grad_norm": 1.2417344040710159, "learning_rate": 2.8569024994266848e-06, "loss": 0.5099, "step": 9805 }, { "epoch": 0.76, "grad_norm": 1.0850638298737119, "learning_rate": 2.855144283136421e-06, "loss": 0.491, "step": 9806 }, { "epoch": 0.76, "grad_norm": 1.1009858698618142, "learning_rate": 2.853386517937764e-06, "loss": 0.4518, "step": 9807 }, { "epoch": 0.76, "grad_norm": 1.1702659241151738, "learning_rate": 2.8516292039416847e-06, "loss": 0.4775, "step": 9808 }, { "epoch": 0.76, "grad_norm": 1.1523150424290927, "learning_rate": 2.8498723412591357e-06, "loss": 0.4938, "step": 9809 }, { "epoch": 0.76, "grad_norm": 1.1364860689792136, "learning_rate": 2.848115930001034e-06, "loss": 0.4626, "step": 9810 }, { "epoch": 0.76, "grad_norm": 1.2122917232370285, "learning_rate": 2.8463599702782764e-06, "loss": 0.5119, "step": 9811 }, { "epoch": 0.76, "grad_norm": 1.2063386281714108, "learning_rate": 2.8446044622017223e-06, "loss": 0.4895, "step": 9812 }, { "epoch": 0.76, "grad_norm": 1.276436206262035, "learning_rate": 2.842849405882202e-06, "loss": 0.5402, "step": 9813 }, { "epoch": 0.76, "grad_norm": 1.0930694097448517, "learning_rate": 2.841094801430524e-06, "loss": 0.482, "step": 9814 }, { "epoch": 0.76, "grad_norm": 1.1627584744302293, "learning_rate": 2.839340648957467e-06, "loss": 0.5275, "step": 9815 }, { "epoch": 0.76, "grad_norm": 1.4099065078690134, "learning_rate": 2.8375869485737782e-06, "loss": 0.6152, "step": 9816 }, { "epoch": 0.76, "grad_norm": 1.3101427617797599, "learning_rate": 2.8358337003901826e-06, "loss": 0.5267, "step": 9817 }, { "epoch": 0.76, "grad_norm": 1.0559675572642508, "learning_rate": 2.8340809045173646e-06, "loss": 0.4663, "step": 9818 }, { "epoch": 0.76, "grad_norm": 1.266785266410155, "learning_rate": 2.83232856106599e-06, "loss": 0.5083, "step": 9819 }, { "epoch": 0.76, "grad_norm": 1.1709059714233228, "learning_rate": 2.830576670146694e-06, "loss": 0.4736, "step": 9820 }, { "epoch": 0.76, "grad_norm": 1.2864326237671966, "learning_rate": 2.828825231870085e-06, "loss": 0.5468, "step": 9821 }, { "epoch": 0.76, "grad_norm": 1.312573067584006, "learning_rate": 2.827074246346737e-06, "loss": 0.5718, "step": 9822 }, { "epoch": 0.76, "grad_norm": 1.201206212987339, "learning_rate": 2.825323713687197e-06, "loss": 0.4914, "step": 9823 }, { "epoch": 0.76, "grad_norm": 1.2015729033140081, "learning_rate": 2.823573634001987e-06, "loss": 0.4981, "step": 9824 }, { "epoch": 0.76, "grad_norm": 1.2069773739145497, "learning_rate": 2.821824007401599e-06, "loss": 0.5177, "step": 9825 }, { "epoch": 0.76, "grad_norm": 1.305339216452509, "learning_rate": 2.8200748339964966e-06, "loss": 0.5806, "step": 9826 }, { "epoch": 0.76, "grad_norm": 1.1048882244072646, "learning_rate": 2.818326113897115e-06, "loss": 0.4816, "step": 9827 }, { "epoch": 0.76, "grad_norm": 0.9766718688756859, "learning_rate": 2.8165778472138572e-06, "loss": 0.4057, "step": 9828 }, { "epoch": 0.76, "grad_norm": 1.307712315011487, "learning_rate": 2.8148300340571e-06, "loss": 0.5007, "step": 9829 }, { "epoch": 0.76, "grad_norm": 1.1727222177552468, "learning_rate": 2.813082674537194e-06, "loss": 0.4607, "step": 9830 }, { "epoch": 0.76, "grad_norm": 1.2639675354621294, "learning_rate": 2.8113357687644615e-06, "loss": 0.5698, "step": 9831 }, { "epoch": 0.76, "grad_norm": 1.1162285590595595, "learning_rate": 2.8095893168491907e-06, "loss": 0.4915, "step": 9832 }, { "epoch": 0.76, "grad_norm": 1.2145241565367146, "learning_rate": 2.8078433189016406e-06, "loss": 0.539, "step": 9833 }, { "epoch": 0.76, "grad_norm": 1.1205390860648494, "learning_rate": 2.8060977750320485e-06, "loss": 0.4505, "step": 9834 }, { "epoch": 0.76, "grad_norm": 1.2136081409939452, "learning_rate": 2.8043526853506187e-06, "loss": 0.5186, "step": 9835 }, { "epoch": 0.76, "grad_norm": 1.2579950235750537, "learning_rate": 2.80260804996753e-06, "loss": 0.5054, "step": 9836 }, { "epoch": 0.76, "grad_norm": 1.0470881458621706, "learning_rate": 2.8008638689929314e-06, "loss": 0.4611, "step": 9837 }, { "epoch": 0.76, "grad_norm": 1.2401703585225536, "learning_rate": 2.799120142536935e-06, "loss": 0.5706, "step": 9838 }, { "epoch": 0.76, "grad_norm": 1.2317386916028414, "learning_rate": 2.7973768707096373e-06, "loss": 0.5346, "step": 9839 }, { "epoch": 0.76, "grad_norm": 1.0838109333019748, "learning_rate": 2.795634053621098e-06, "loss": 0.4016, "step": 9840 }, { "epoch": 0.76, "grad_norm": 1.273861867018515, "learning_rate": 2.793891691381353e-06, "loss": 0.4628, "step": 9841 }, { "epoch": 0.76, "grad_norm": 1.048437347639502, "learning_rate": 2.792149784100404e-06, "loss": 0.4574, "step": 9842 }, { "epoch": 0.76, "grad_norm": 1.1666399180661071, "learning_rate": 2.790408331888225e-06, "loss": 0.5149, "step": 9843 }, { "epoch": 0.76, "grad_norm": 1.1567957337693864, "learning_rate": 2.788667334854764e-06, "loss": 0.4376, "step": 9844 }, { "epoch": 0.76, "grad_norm": 1.1276946115965643, "learning_rate": 2.78692679310994e-06, "loss": 0.4975, "step": 9845 }, { "epoch": 0.76, "grad_norm": 1.1262328492276015, "learning_rate": 2.7851867067636407e-06, "loss": 0.4986, "step": 9846 }, { "epoch": 0.76, "grad_norm": 1.166700254819657, "learning_rate": 2.783447075925729e-06, "loss": 0.4907, "step": 9847 }, { "epoch": 0.76, "grad_norm": 1.2237907727845774, "learning_rate": 2.78170790070604e-06, "loss": 0.5076, "step": 9848 }, { "epoch": 0.76, "grad_norm": 1.2771501773601277, "learning_rate": 2.779969181214368e-06, "loss": 0.5389, "step": 9849 }, { "epoch": 0.76, "grad_norm": 1.1612377327885974, "learning_rate": 2.7782309175604937e-06, "loss": 0.4894, "step": 9850 }, { "epoch": 0.76, "grad_norm": 1.2163480417660855, "learning_rate": 2.7764931098541627e-06, "loss": 0.5454, "step": 9851 }, { "epoch": 0.76, "grad_norm": 1.1415034530159667, "learning_rate": 2.7747557582050878e-06, "loss": 0.5224, "step": 9852 }, { "epoch": 0.76, "grad_norm": 1.1984250303015094, "learning_rate": 2.7730188627229617e-06, "loss": 0.5258, "step": 9853 }, { "epoch": 0.76, "grad_norm": 1.2364348113927364, "learning_rate": 2.7712824235174384e-06, "loss": 0.5428, "step": 9854 }, { "epoch": 0.76, "grad_norm": 1.1589806248392363, "learning_rate": 2.769546440698151e-06, "loss": 0.4916, "step": 9855 }, { "epoch": 0.76, "grad_norm": 1.23084618970854, "learning_rate": 2.767810914374701e-06, "loss": 0.5467, "step": 9856 }, { "epoch": 0.76, "grad_norm": 1.2831385115754264, "learning_rate": 2.7660758446566616e-06, "loss": 0.5767, "step": 9857 }, { "epoch": 0.76, "grad_norm": 1.2239159380557223, "learning_rate": 2.7643412316535788e-06, "loss": 0.4891, "step": 9858 }, { "epoch": 0.76, "grad_norm": 1.2424230772035525, "learning_rate": 2.7626070754749623e-06, "loss": 0.5698, "step": 9859 }, { "epoch": 0.76, "grad_norm": 1.2754815669445625, "learning_rate": 2.7608733762303007e-06, "loss": 0.5527, "step": 9860 }, { "epoch": 0.76, "grad_norm": 1.2197909189293077, "learning_rate": 2.7591401340290546e-06, "loss": 0.542, "step": 9861 }, { "epoch": 0.77, "grad_norm": 1.2989702419458093, "learning_rate": 2.7574073489806473e-06, "loss": 0.5575, "step": 9862 }, { "epoch": 0.77, "grad_norm": 1.1437430418042591, "learning_rate": 2.7556750211944848e-06, "loss": 0.5385, "step": 9863 }, { "epoch": 0.77, "grad_norm": 1.2048709635678325, "learning_rate": 2.7539431507799298e-06, "loss": 0.5523, "step": 9864 }, { "epoch": 0.77, "grad_norm": 1.2138302123007683, "learning_rate": 2.752211737846329e-06, "loss": 0.5013, "step": 9865 }, { "epoch": 0.77, "grad_norm": 1.1721150978811634, "learning_rate": 2.7504807825029946e-06, "loss": 0.5307, "step": 9866 }, { "epoch": 0.77, "grad_norm": 1.1296348464534995, "learning_rate": 2.7487502848592107e-06, "loss": 0.5224, "step": 9867 }, { "epoch": 0.77, "grad_norm": 1.0287660102015583, "learning_rate": 2.7470202450242368e-06, "loss": 0.4638, "step": 9868 }, { "epoch": 0.77, "grad_norm": 1.1097480254236276, "learning_rate": 2.745290663107292e-06, "loss": 0.5129, "step": 9869 }, { "epoch": 0.77, "grad_norm": 1.185768722566527, "learning_rate": 2.7435615392175763e-06, "loss": 0.5038, "step": 9870 }, { "epoch": 0.77, "grad_norm": 1.1983693746329496, "learning_rate": 2.741832873464262e-06, "loss": 0.533, "step": 9871 }, { "epoch": 0.77, "grad_norm": 1.2524288898162346, "learning_rate": 2.7401046659564833e-06, "loss": 0.5055, "step": 9872 }, { "epoch": 0.77, "grad_norm": 1.2555332264607286, "learning_rate": 2.7383769168033557e-06, "loss": 0.5082, "step": 9873 }, { "epoch": 0.77, "grad_norm": 1.2141966737029612, "learning_rate": 2.736649626113955e-06, "loss": 0.5693, "step": 9874 }, { "epoch": 0.77, "grad_norm": 1.147146108738421, "learning_rate": 2.7349227939973388e-06, "loss": 0.4981, "step": 9875 }, { "epoch": 0.77, "grad_norm": 1.3258379823486097, "learning_rate": 2.7331964205625282e-06, "loss": 0.5669, "step": 9876 }, { "epoch": 0.77, "grad_norm": 1.2285800061845278, "learning_rate": 2.7314705059185196e-06, "loss": 0.4827, "step": 9877 }, { "epoch": 0.77, "grad_norm": 1.1998316289083573, "learning_rate": 2.7297450501742817e-06, "loss": 0.521, "step": 9878 }, { "epoch": 0.77, "grad_norm": 1.197441449097756, "learning_rate": 2.728020053438746e-06, "loss": 0.496, "step": 9879 }, { "epoch": 0.77, "grad_norm": 1.1359180370064426, "learning_rate": 2.7262955158208215e-06, "loss": 0.4892, "step": 9880 }, { "epoch": 0.77, "grad_norm": 1.1325942256709565, "learning_rate": 2.724571437429393e-06, "loss": 0.4609, "step": 9881 }, { "epoch": 0.77, "grad_norm": 1.1403616509752306, "learning_rate": 2.722847818373302e-06, "loss": 0.5117, "step": 9882 }, { "epoch": 0.77, "grad_norm": 1.2522275626864017, "learning_rate": 2.721124658761376e-06, "loss": 0.5464, "step": 9883 }, { "epoch": 0.77, "grad_norm": 1.191775105395397, "learning_rate": 2.7194019587024024e-06, "loss": 0.5021, "step": 9884 }, { "epoch": 0.77, "grad_norm": 1.2361227289765888, "learning_rate": 2.717679718305145e-06, "loss": 0.537, "step": 9885 }, { "epoch": 0.77, "grad_norm": 1.2553181527665496, "learning_rate": 2.7159579376783397e-06, "loss": 0.5136, "step": 9886 }, { "epoch": 0.77, "grad_norm": 1.2008053103973155, "learning_rate": 2.7142366169306898e-06, "loss": 0.5227, "step": 9887 }, { "epoch": 0.77, "grad_norm": 1.2377461140509411, "learning_rate": 2.712515756170876e-06, "loss": 0.549, "step": 9888 }, { "epoch": 0.77, "grad_norm": 1.210476301412687, "learning_rate": 2.710795355507537e-06, "loss": 0.4939, "step": 9889 }, { "epoch": 0.77, "grad_norm": 1.238102319346952, "learning_rate": 2.709075415049298e-06, "loss": 0.5496, "step": 9890 }, { "epoch": 0.77, "grad_norm": 1.1124386995550088, "learning_rate": 2.7073559349047406e-06, "loss": 0.4835, "step": 9891 }, { "epoch": 0.77, "grad_norm": 1.169190906038654, "learning_rate": 2.705636915182429e-06, "loss": 0.5328, "step": 9892 }, { "epoch": 0.77, "grad_norm": 1.1445753590283219, "learning_rate": 2.7039183559908954e-06, "loss": 0.4852, "step": 9893 }, { "epoch": 0.77, "grad_norm": 1.2546000715034669, "learning_rate": 2.702200257438636e-06, "loss": 0.4741, "step": 9894 }, { "epoch": 0.77, "grad_norm": 1.1876169448288676, "learning_rate": 2.700482619634126e-06, "loss": 0.4768, "step": 9895 }, { "epoch": 0.77, "grad_norm": 1.1513088946028456, "learning_rate": 2.698765442685809e-06, "loss": 0.4838, "step": 9896 }, { "epoch": 0.77, "grad_norm": 1.1666610184033224, "learning_rate": 2.6970487267020985e-06, "loss": 0.5128, "step": 9897 }, { "epoch": 0.77, "grad_norm": 1.0987791786181917, "learning_rate": 2.695332471791384e-06, "loss": 0.5172, "step": 9898 }, { "epoch": 0.77, "grad_norm": 1.2175867446922293, "learning_rate": 2.6936166780620143e-06, "loss": 0.4417, "step": 9899 }, { "epoch": 0.77, "grad_norm": 1.1778550220646553, "learning_rate": 2.691901345622322e-06, "loss": 0.5309, "step": 9900 }, { "epoch": 0.77, "grad_norm": 1.2065606048418924, "learning_rate": 2.6901864745806004e-06, "loss": 0.533, "step": 9901 }, { "epoch": 0.77, "grad_norm": 1.2534833058119357, "learning_rate": 2.68847206504512e-06, "loss": 0.5359, "step": 9902 }, { "epoch": 0.77, "grad_norm": 1.1870311514214298, "learning_rate": 2.6867581171241207e-06, "loss": 0.5264, "step": 9903 }, { "epoch": 0.77, "grad_norm": 1.2038317994621088, "learning_rate": 2.685044630925816e-06, "loss": 0.4904, "step": 9904 }, { "epoch": 0.77, "grad_norm": 1.201498492734855, "learning_rate": 2.6833316065583805e-06, "loss": 0.4722, "step": 9905 }, { "epoch": 0.77, "grad_norm": 1.1909770755403253, "learning_rate": 2.6816190441299695e-06, "loss": 0.5363, "step": 9906 }, { "epoch": 0.77, "grad_norm": 1.3065477460800525, "learning_rate": 2.6799069437487067e-06, "loss": 0.5407, "step": 9907 }, { "epoch": 0.77, "grad_norm": 1.2176764234675306, "learning_rate": 2.678195305522686e-06, "loss": 0.4867, "step": 9908 }, { "epoch": 0.77, "grad_norm": 1.254108068083646, "learning_rate": 2.676484129559973e-06, "loss": 0.4984, "step": 9909 }, { "epoch": 0.77, "grad_norm": 1.2363606670128513, "learning_rate": 2.6747734159686012e-06, "loss": 0.5248, "step": 9910 }, { "epoch": 0.77, "grad_norm": 1.2031140512736145, "learning_rate": 2.6730631648565753e-06, "loss": 0.4716, "step": 9911 }, { "epoch": 0.77, "grad_norm": 1.1020562438423822, "learning_rate": 2.6713533763318724e-06, "loss": 0.4827, "step": 9912 }, { "epoch": 0.77, "grad_norm": 1.1987326068431798, "learning_rate": 2.6696440505024423e-06, "loss": 0.4959, "step": 9913 }, { "epoch": 0.77, "grad_norm": 1.1936124792326168, "learning_rate": 2.667935187476206e-06, "loss": 0.4999, "step": 9914 }, { "epoch": 0.77, "grad_norm": 1.2021843341639942, "learning_rate": 2.666226787361046e-06, "loss": 0.5125, "step": 9915 }, { "epoch": 0.77, "grad_norm": 1.2260604122330965, "learning_rate": 2.6645188502648266e-06, "loss": 0.5427, "step": 9916 }, { "epoch": 0.77, "grad_norm": 1.1891810915704066, "learning_rate": 2.662811376295379e-06, "loss": 0.4743, "step": 9917 }, { "epoch": 0.77, "grad_norm": 1.2013367578494354, "learning_rate": 2.661104365560504e-06, "loss": 0.545, "step": 9918 }, { "epoch": 0.77, "grad_norm": 1.2662350161721243, "learning_rate": 2.6593978181679758e-06, "loss": 0.5615, "step": 9919 }, { "epoch": 0.77, "grad_norm": 1.227243319554038, "learning_rate": 2.657691734225537e-06, "loss": 0.5301, "step": 9920 }, { "epoch": 0.77, "grad_norm": 1.3088219287128833, "learning_rate": 2.655986113840897e-06, "loss": 0.5568, "step": 9921 }, { "epoch": 0.77, "grad_norm": 1.1906285483640324, "learning_rate": 2.6542809571217445e-06, "loss": 0.4635, "step": 9922 }, { "epoch": 0.77, "grad_norm": 1.2277681778742962, "learning_rate": 2.6525762641757336e-06, "loss": 0.5062, "step": 9923 }, { "epoch": 0.77, "grad_norm": 1.1496360389450744, "learning_rate": 2.650872035110493e-06, "loss": 0.5044, "step": 9924 }, { "epoch": 0.77, "grad_norm": 1.084056020207639, "learning_rate": 2.6491682700336165e-06, "loss": 0.4536, "step": 9925 }, { "epoch": 0.77, "grad_norm": 1.1961519871243682, "learning_rate": 2.6474649690526697e-06, "loss": 0.5105, "step": 9926 }, { "epoch": 0.77, "grad_norm": 1.1817719693918878, "learning_rate": 2.645762132275196e-06, "loss": 0.4965, "step": 9927 }, { "epoch": 0.77, "grad_norm": 1.2424026878672974, "learning_rate": 2.6440597598087005e-06, "loss": 0.5254, "step": 9928 }, { "epoch": 0.77, "grad_norm": 1.1675381584384779, "learning_rate": 2.642357851760666e-06, "loss": 0.4911, "step": 9929 }, { "epoch": 0.77, "grad_norm": 1.230200408255395, "learning_rate": 2.640656408238542e-06, "loss": 0.5253, "step": 9930 }, { "epoch": 0.77, "grad_norm": 1.3127762185949048, "learning_rate": 2.6389554293497455e-06, "loss": 0.5661, "step": 9931 }, { "epoch": 0.77, "grad_norm": 1.173912311905404, "learning_rate": 2.6372549152016703e-06, "loss": 0.4749, "step": 9932 }, { "epoch": 0.77, "grad_norm": 1.1288718248022118, "learning_rate": 2.6355548659016796e-06, "loss": 0.4752, "step": 9933 }, { "epoch": 0.77, "grad_norm": 1.2205444721119403, "learning_rate": 2.633855281557108e-06, "loss": 0.4786, "step": 9934 }, { "epoch": 0.77, "grad_norm": 1.2400897565012714, "learning_rate": 2.6321561622752543e-06, "loss": 0.4998, "step": 9935 }, { "epoch": 0.77, "grad_norm": 1.226849904981837, "learning_rate": 2.6304575081633944e-06, "loss": 0.4727, "step": 9936 }, { "epoch": 0.77, "grad_norm": 1.2553298332113563, "learning_rate": 2.628759319328774e-06, "loss": 0.4812, "step": 9937 }, { "epoch": 0.77, "grad_norm": 1.1466965747045235, "learning_rate": 2.6270615958786094e-06, "loss": 0.4982, "step": 9938 }, { "epoch": 0.77, "grad_norm": 1.1735122433362208, "learning_rate": 2.625364337920088e-06, "loss": 0.527, "step": 9939 }, { "epoch": 0.77, "grad_norm": 1.1700131694345968, "learning_rate": 2.6236675455603634e-06, "loss": 0.4744, "step": 9940 }, { "epoch": 0.77, "grad_norm": 1.1405423800710872, "learning_rate": 2.6219712189065616e-06, "loss": 0.4521, "step": 9941 }, { "epoch": 0.77, "grad_norm": 1.1533850668640135, "learning_rate": 2.6202753580657813e-06, "loss": 0.5114, "step": 9942 }, { "epoch": 0.77, "grad_norm": 1.2608868953945076, "learning_rate": 2.6185799631450926e-06, "loss": 0.534, "step": 9943 }, { "epoch": 0.77, "grad_norm": 1.1812717839407174, "learning_rate": 2.6168850342515375e-06, "loss": 0.4902, "step": 9944 }, { "epoch": 0.77, "grad_norm": 1.1256638793330935, "learning_rate": 2.6151905714921187e-06, "loss": 0.489, "step": 9945 }, { "epoch": 0.77, "grad_norm": 1.3486541591670242, "learning_rate": 2.6134965749738195e-06, "loss": 0.5749, "step": 9946 }, { "epoch": 0.77, "grad_norm": 1.1043014564054794, "learning_rate": 2.61180304480359e-06, "loss": 0.4637, "step": 9947 }, { "epoch": 0.77, "grad_norm": 1.1245948273809971, "learning_rate": 2.6101099810883535e-06, "loss": 0.5029, "step": 9948 }, { "epoch": 0.77, "grad_norm": 1.3349345943416269, "learning_rate": 2.6084173839350036e-06, "loss": 0.5671, "step": 9949 }, { "epoch": 0.77, "grad_norm": 1.0676599755229847, "learning_rate": 2.6067252534503996e-06, "loss": 0.4801, "step": 9950 }, { "epoch": 0.77, "grad_norm": 1.263210064846937, "learning_rate": 2.6050335897413713e-06, "loss": 0.492, "step": 9951 }, { "epoch": 0.77, "grad_norm": 1.2609827593654122, "learning_rate": 2.6033423929147263e-06, "loss": 0.5172, "step": 9952 }, { "epoch": 0.77, "grad_norm": 1.2073606270071737, "learning_rate": 2.6016516630772372e-06, "loss": 0.4781, "step": 9953 }, { "epoch": 0.77, "grad_norm": 1.2026512402220375, "learning_rate": 2.5999614003356523e-06, "loss": 0.4969, "step": 9954 }, { "epoch": 0.77, "grad_norm": 1.1661161190925189, "learning_rate": 2.5982716047966803e-06, "loss": 0.5243, "step": 9955 }, { "epoch": 0.77, "grad_norm": 1.1935415175767379, "learning_rate": 2.59658227656701e-06, "loss": 0.4837, "step": 9956 }, { "epoch": 0.77, "grad_norm": 1.2761928961791225, "learning_rate": 2.5948934157532968e-06, "loss": 0.5632, "step": 9957 }, { "epoch": 0.77, "grad_norm": 1.129951230996921, "learning_rate": 2.5932050224621685e-06, "loss": 0.5282, "step": 9958 }, { "epoch": 0.77, "grad_norm": 1.266337394372129, "learning_rate": 2.5915170968002236e-06, "loss": 0.5493, "step": 9959 }, { "epoch": 0.77, "grad_norm": 1.108274693240437, "learning_rate": 2.589829638874026e-06, "loss": 0.5348, "step": 9960 }, { "epoch": 0.77, "grad_norm": 1.1932922444293494, "learning_rate": 2.5881426487901127e-06, "loss": 0.4623, "step": 9961 }, { "epoch": 0.77, "grad_norm": 1.2101212734735998, "learning_rate": 2.586456126654995e-06, "loss": 0.496, "step": 9962 }, { "epoch": 0.77, "grad_norm": 1.1229493737396894, "learning_rate": 2.584770072575149e-06, "loss": 0.494, "step": 9963 }, { "epoch": 0.77, "grad_norm": 1.2912410475798146, "learning_rate": 2.583084486657027e-06, "loss": 0.507, "step": 9964 }, { "epoch": 0.77, "grad_norm": 1.3017216854133336, "learning_rate": 2.5813993690070504e-06, "loss": 0.5185, "step": 9965 }, { "epoch": 0.77, "grad_norm": 1.2180455935389933, "learning_rate": 2.579714719731604e-06, "loss": 0.5493, "step": 9966 }, { "epoch": 0.77, "grad_norm": 1.266237040281112, "learning_rate": 2.5780305389370507e-06, "loss": 0.5473, "step": 9967 }, { "epoch": 0.77, "grad_norm": 1.1009691953793233, "learning_rate": 2.576346826729722e-06, "loss": 0.4465, "step": 9968 }, { "epoch": 0.77, "grad_norm": 1.2663953344120586, "learning_rate": 2.5746635832159216e-06, "loss": 0.4871, "step": 9969 }, { "epoch": 0.77, "grad_norm": 1.1452919056538333, "learning_rate": 2.572980808501919e-06, "loss": 0.4802, "step": 9970 }, { "epoch": 0.77, "grad_norm": 1.2000431867616592, "learning_rate": 2.571298502693954e-06, "loss": 0.5243, "step": 9971 }, { "epoch": 0.77, "grad_norm": 1.081226366258881, "learning_rate": 2.5696166658982413e-06, "loss": 0.4674, "step": 9972 }, { "epoch": 0.77, "grad_norm": 1.180248253538023, "learning_rate": 2.5679352982209637e-06, "loss": 0.5176, "step": 9973 }, { "epoch": 0.77, "grad_norm": 1.260633822983754, "learning_rate": 2.5662543997682756e-06, "loss": 0.5136, "step": 9974 }, { "epoch": 0.77, "grad_norm": 1.171092624167524, "learning_rate": 2.5645739706463037e-06, "loss": 0.4875, "step": 9975 }, { "epoch": 0.77, "grad_norm": 1.1673263262976492, "learning_rate": 2.5628940109611356e-06, "loss": 0.4814, "step": 9976 }, { "epoch": 0.77, "grad_norm": 1.2444236349644187, "learning_rate": 2.5612145208188376e-06, "loss": 0.573, "step": 9977 }, { "epoch": 0.77, "grad_norm": 1.3020340210795742, "learning_rate": 2.5595355003254473e-06, "loss": 0.5376, "step": 9978 }, { "epoch": 0.77, "grad_norm": 1.260800951970293, "learning_rate": 2.557856949586972e-06, "loss": 0.537, "step": 9979 }, { "epoch": 0.77, "grad_norm": 1.2257463692150925, "learning_rate": 2.5561788687093835e-06, "loss": 0.4947, "step": 9980 }, { "epoch": 0.77, "grad_norm": 1.2394408082635595, "learning_rate": 2.554501257798624e-06, "loss": 0.4917, "step": 9981 }, { "epoch": 0.77, "grad_norm": 1.0865589257423698, "learning_rate": 2.5528241169606147e-06, "loss": 0.4663, "step": 9982 }, { "epoch": 0.77, "grad_norm": 1.1127785601819211, "learning_rate": 2.551147446301242e-06, "loss": 0.4692, "step": 9983 }, { "epoch": 0.77, "grad_norm": 1.0347785692568185, "learning_rate": 2.5494712459263615e-06, "loss": 0.4479, "step": 9984 }, { "epoch": 0.77, "grad_norm": 1.279606579281968, "learning_rate": 2.547795515941803e-06, "loss": 0.5007, "step": 9985 }, { "epoch": 0.77, "grad_norm": 1.2124789855551736, "learning_rate": 2.5461202564533603e-06, "loss": 0.5167, "step": 9986 }, { "epoch": 0.77, "grad_norm": 1.2867602046919882, "learning_rate": 2.544445467566802e-06, "loss": 0.5212, "step": 9987 }, { "epoch": 0.77, "grad_norm": 1.150153040032195, "learning_rate": 2.5427711493878673e-06, "loss": 0.5471, "step": 9988 }, { "epoch": 0.77, "grad_norm": 1.2188503517504345, "learning_rate": 2.5410973020222662e-06, "loss": 0.5214, "step": 9989 }, { "epoch": 0.78, "grad_norm": 1.1628562258398445, "learning_rate": 2.539423925575676e-06, "loss": 0.5012, "step": 9990 }, { "epoch": 0.78, "grad_norm": 1.1414263275072591, "learning_rate": 2.5377510201537427e-06, "loss": 0.4883, "step": 9991 }, { "epoch": 0.78, "grad_norm": 1.2317932750547442, "learning_rate": 2.5360785858620863e-06, "loss": 0.5331, "step": 9992 }, { "epoch": 0.78, "grad_norm": 1.2076154856613535, "learning_rate": 2.534406622806298e-06, "loss": 0.504, "step": 9993 }, { "epoch": 0.78, "grad_norm": 1.160594459259228, "learning_rate": 2.532735131091937e-06, "loss": 0.5296, "step": 9994 }, { "epoch": 0.78, "grad_norm": 1.2041926415269755, "learning_rate": 2.531064110824536e-06, "loss": 0.5302, "step": 9995 }, { "epoch": 0.78, "grad_norm": 1.1928340173768832, "learning_rate": 2.52939356210959e-06, "loss": 0.5035, "step": 9996 }, { "epoch": 0.78, "grad_norm": 1.2604478511683186, "learning_rate": 2.527723485052571e-06, "loss": 0.5003, "step": 9997 }, { "epoch": 0.78, "grad_norm": 1.1923388223867433, "learning_rate": 2.526053879758924e-06, "loss": 0.4868, "step": 9998 }, { "epoch": 0.78, "grad_norm": 1.1555894562952946, "learning_rate": 2.524384746334052e-06, "loss": 0.5027, "step": 9999 }, { "epoch": 0.78, "grad_norm": 1.1767387166986665, "learning_rate": 2.5227160848833443e-06, "loss": 0.491, "step": 10000 }, { "epoch": 0.78, "grad_norm": 1.2164645162064749, "learning_rate": 2.5210478955121444e-06, "loss": 0.533, "step": 10001 }, { "epoch": 0.78, "grad_norm": 1.2196183168072343, "learning_rate": 2.5193801783257763e-06, "loss": 0.5334, "step": 10002 }, { "epoch": 0.78, "grad_norm": 1.1693086625106097, "learning_rate": 2.5177129334295336e-06, "loss": 0.4849, "step": 10003 }, { "epoch": 0.78, "grad_norm": 1.2579749813927619, "learning_rate": 2.5160461609286766e-06, "loss": 0.4923, "step": 10004 }, { "epoch": 0.78, "grad_norm": 1.3370630530965744, "learning_rate": 2.514379860928441e-06, "loss": 0.552, "step": 10005 }, { "epoch": 0.78, "grad_norm": 1.1293249178887756, "learning_rate": 2.5127140335340217e-06, "loss": 0.4526, "step": 10006 }, { "epoch": 0.78, "grad_norm": 1.1095443918631438, "learning_rate": 2.511048678850595e-06, "loss": 0.4645, "step": 10007 }, { "epoch": 0.78, "grad_norm": 1.2825226859452796, "learning_rate": 2.5093837969833067e-06, "loss": 0.5017, "step": 10008 }, { "epoch": 0.78, "grad_norm": 1.167332759943817, "learning_rate": 2.507719388037262e-06, "loss": 0.4616, "step": 10009 }, { "epoch": 0.78, "grad_norm": 1.2167484769794, "learning_rate": 2.5060554521175506e-06, "loss": 0.4895, "step": 10010 }, { "epoch": 0.78, "grad_norm": 1.0355720692508503, "learning_rate": 2.504391989329219e-06, "loss": 0.4541, "step": 10011 }, { "epoch": 0.78, "grad_norm": 1.0623148588551712, "learning_rate": 2.5027289997772942e-06, "loss": 0.4406, "step": 10012 }, { "epoch": 0.78, "grad_norm": 1.178867838482162, "learning_rate": 2.5010664835667677e-06, "loss": 0.4825, "step": 10013 }, { "epoch": 0.78, "grad_norm": 1.1962115827109674, "learning_rate": 2.499404440802604e-06, "loss": 0.4951, "step": 10014 }, { "epoch": 0.78, "grad_norm": 1.1557178303645517, "learning_rate": 2.4977428715897357e-06, "loss": 0.4658, "step": 10015 }, { "epoch": 0.78, "grad_norm": 1.1800318840536121, "learning_rate": 2.496081776033069e-06, "loss": 0.5266, "step": 10016 }, { "epoch": 0.78, "grad_norm": 1.1925639047034136, "learning_rate": 2.494421154237473e-06, "loss": 0.4502, "step": 10017 }, { "epoch": 0.78, "grad_norm": 1.181635126778918, "learning_rate": 2.4927610063077956e-06, "loss": 0.5177, "step": 10018 }, { "epoch": 0.78, "grad_norm": 1.2086557363779336, "learning_rate": 2.4911013323488454e-06, "loss": 0.5447, "step": 10019 }, { "epoch": 0.78, "grad_norm": 1.148304367133798, "learning_rate": 2.4894421324654084e-06, "loss": 0.4846, "step": 10020 }, { "epoch": 0.78, "grad_norm": 1.113414983915189, "learning_rate": 2.487783406762242e-06, "loss": 0.4754, "step": 10021 }, { "epoch": 0.78, "grad_norm": 1.2629171532093313, "learning_rate": 2.4861251553440645e-06, "loss": 0.6041, "step": 10022 }, { "epoch": 0.78, "grad_norm": 1.0953053451064958, "learning_rate": 2.4844673783155716e-06, "loss": 0.4727, "step": 10023 }, { "epoch": 0.78, "grad_norm": 1.1419672971609276, "learning_rate": 2.482810075781429e-06, "loss": 0.4655, "step": 10024 }, { "epoch": 0.78, "grad_norm": 1.307920231364143, "learning_rate": 2.4811532478462697e-06, "loss": 0.5604, "step": 10025 }, { "epoch": 0.78, "grad_norm": 1.2963349930093868, "learning_rate": 2.4794968946147012e-06, "loss": 0.5272, "step": 10026 }, { "epoch": 0.78, "grad_norm": 1.198603966402989, "learning_rate": 2.4778410161912913e-06, "loss": 0.507, "step": 10027 }, { "epoch": 0.78, "grad_norm": 1.225843765677728, "learning_rate": 2.4761856126805906e-06, "loss": 0.5515, "step": 10028 }, { "epoch": 0.78, "grad_norm": 1.119766619498295, "learning_rate": 2.4745306841871063e-06, "loss": 0.4824, "step": 10029 }, { "epoch": 0.78, "grad_norm": 1.188048487030218, "learning_rate": 2.4728762308153264e-06, "loss": 0.522, "step": 10030 }, { "epoch": 0.78, "grad_norm": 1.187309500823671, "learning_rate": 2.4712222526697083e-06, "loss": 0.5183, "step": 10031 }, { "epoch": 0.78, "grad_norm": 1.186278216606256, "learning_rate": 2.4695687498546694e-06, "loss": 0.5115, "step": 10032 }, { "epoch": 0.78, "grad_norm": 1.214572201520226, "learning_rate": 2.4679157224746076e-06, "loss": 0.4824, "step": 10033 }, { "epoch": 0.78, "grad_norm": 1.1838195383982342, "learning_rate": 2.4662631706338856e-06, "loss": 0.5137, "step": 10034 }, { "epoch": 0.78, "grad_norm": 1.1206735494705995, "learning_rate": 2.4646110944368393e-06, "loss": 0.5, "step": 10035 }, { "epoch": 0.78, "grad_norm": 1.1044551124724868, "learning_rate": 2.4629594939877754e-06, "loss": 0.4861, "step": 10036 }, { "epoch": 0.78, "grad_norm": 1.165729276681926, "learning_rate": 2.461308369390961e-06, "loss": 0.4922, "step": 10037 }, { "epoch": 0.78, "grad_norm": 1.2298576654095985, "learning_rate": 2.4596577207506477e-06, "loss": 0.5183, "step": 10038 }, { "epoch": 0.78, "grad_norm": 1.2426334756121429, "learning_rate": 2.458007548171042e-06, "loss": 0.5322, "step": 10039 }, { "epoch": 0.78, "grad_norm": 1.2618085043903091, "learning_rate": 2.4563578517563314e-06, "loss": 0.5362, "step": 10040 }, { "epoch": 0.78, "grad_norm": 1.2909303945781236, "learning_rate": 2.4547086316106727e-06, "loss": 0.5335, "step": 10041 }, { "epoch": 0.78, "grad_norm": 1.2597374728110449, "learning_rate": 2.453059887838184e-06, "loss": 0.4994, "step": 10042 }, { "epoch": 0.78, "grad_norm": 1.2404129500499466, "learning_rate": 2.451411620542962e-06, "loss": 0.5025, "step": 10043 }, { "epoch": 0.78, "grad_norm": 1.2238815067068816, "learning_rate": 2.4497638298290693e-06, "loss": 0.4968, "step": 10044 }, { "epoch": 0.78, "grad_norm": 1.1828750593807622, "learning_rate": 2.4481165158005395e-06, "loss": 0.4855, "step": 10045 }, { "epoch": 0.78, "grad_norm": 1.171897531928569, "learning_rate": 2.4464696785613805e-06, "loss": 0.4971, "step": 10046 }, { "epoch": 0.78, "grad_norm": 1.1963336547014285, "learning_rate": 2.444823318215559e-06, "loss": 0.4717, "step": 10047 }, { "epoch": 0.78, "grad_norm": 1.2407457634021282, "learning_rate": 2.4431774348670236e-06, "loss": 0.5609, "step": 10048 }, { "epoch": 0.78, "grad_norm": 1.1881292081594819, "learning_rate": 2.441532028619682e-06, "loss": 0.5203, "step": 10049 }, { "epoch": 0.78, "grad_norm": 1.2974123932914685, "learning_rate": 2.4398870995774194e-06, "loss": 0.5319, "step": 10050 }, { "epoch": 0.78, "grad_norm": 1.202422642985484, "learning_rate": 2.4382426478440925e-06, "loss": 0.5096, "step": 10051 }, { "epoch": 0.78, "grad_norm": 1.1573668575954223, "learning_rate": 2.4365986735235183e-06, "loss": 0.4958, "step": 10052 }, { "epoch": 0.78, "grad_norm": 1.3252900777735628, "learning_rate": 2.4349551767194914e-06, "loss": 0.5182, "step": 10053 }, { "epoch": 0.78, "grad_norm": 1.2774157489235207, "learning_rate": 2.433312157535774e-06, "loss": 0.5626, "step": 10054 }, { "epoch": 0.78, "grad_norm": 1.1322958458639376, "learning_rate": 2.431669616076101e-06, "loss": 0.4618, "step": 10055 }, { "epoch": 0.78, "grad_norm": 1.114822284836688, "learning_rate": 2.430027552444174e-06, "loss": 0.4799, "step": 10056 }, { "epoch": 0.78, "grad_norm": 1.2591295159239135, "learning_rate": 2.4283859667436615e-06, "loss": 0.5261, "step": 10057 }, { "epoch": 0.78, "grad_norm": 1.212643608948486, "learning_rate": 2.4267448590782093e-06, "loss": 0.5242, "step": 10058 }, { "epoch": 0.78, "grad_norm": 1.1440619329947614, "learning_rate": 2.425104229551425e-06, "loss": 0.5239, "step": 10059 }, { "epoch": 0.78, "grad_norm": 1.171990147337657, "learning_rate": 2.4234640782668917e-06, "loss": 0.5115, "step": 10060 }, { "epoch": 0.78, "grad_norm": 1.2037450010328585, "learning_rate": 2.4218244053281636e-06, "loss": 0.5091, "step": 10061 }, { "epoch": 0.78, "grad_norm": 1.1519383121338802, "learning_rate": 2.420185210838756e-06, "loss": 0.4638, "step": 10062 }, { "epoch": 0.78, "grad_norm": 1.1667125159291576, "learning_rate": 2.418546494902163e-06, "loss": 0.4872, "step": 10063 }, { "epoch": 0.78, "grad_norm": 1.3481308253502409, "learning_rate": 2.416908257621845e-06, "loss": 0.5743, "step": 10064 }, { "epoch": 0.78, "grad_norm": 1.1603950229089637, "learning_rate": 2.415270499101232e-06, "loss": 0.521, "step": 10065 }, { "epoch": 0.78, "grad_norm": 1.2866470826508272, "learning_rate": 2.413633219443725e-06, "loss": 0.5425, "step": 10066 }, { "epoch": 0.78, "grad_norm": 1.1823853218974247, "learning_rate": 2.411996418752696e-06, "loss": 0.5001, "step": 10067 }, { "epoch": 0.78, "grad_norm": 1.1507584869350262, "learning_rate": 2.410360097131482e-06, "loss": 0.4919, "step": 10068 }, { "epoch": 0.78, "grad_norm": 1.1746519628427923, "learning_rate": 2.4087242546833887e-06, "loss": 0.5151, "step": 10069 }, { "epoch": 0.78, "grad_norm": 1.1421706818100408, "learning_rate": 2.407088891511701e-06, "loss": 0.4504, "step": 10070 }, { "epoch": 0.78, "grad_norm": 1.199424750810467, "learning_rate": 2.4054540077196644e-06, "loss": 0.4759, "step": 10071 }, { "epoch": 0.78, "grad_norm": 1.1506963818987264, "learning_rate": 2.403819603410502e-06, "loss": 0.4707, "step": 10072 }, { "epoch": 0.78, "grad_norm": 1.2772116869863952, "learning_rate": 2.4021856786873964e-06, "loss": 0.5053, "step": 10073 }, { "epoch": 0.78, "grad_norm": 1.2042466421969442, "learning_rate": 2.400552233653508e-06, "loss": 0.4835, "step": 10074 }, { "epoch": 0.78, "grad_norm": 1.2620288007324452, "learning_rate": 2.398919268411967e-06, "loss": 0.53, "step": 10075 }, { "epoch": 0.78, "grad_norm": 1.144201758546483, "learning_rate": 2.3972867830658665e-06, "loss": 0.4688, "step": 10076 }, { "epoch": 0.78, "grad_norm": 1.0487533912103724, "learning_rate": 2.3956547777182805e-06, "loss": 0.4783, "step": 10077 }, { "epoch": 0.78, "grad_norm": 1.2807614860578087, "learning_rate": 2.394023252472242e-06, "loss": 0.5428, "step": 10078 }, { "epoch": 0.78, "grad_norm": 1.1751886439566128, "learning_rate": 2.392392207430754e-06, "loss": 0.4952, "step": 10079 }, { "epoch": 0.78, "grad_norm": 1.2347904423672806, "learning_rate": 2.390761642696795e-06, "loss": 0.5202, "step": 10080 }, { "epoch": 0.78, "grad_norm": 1.1340833704171946, "learning_rate": 2.3891315583733133e-06, "loss": 0.5496, "step": 10081 }, { "epoch": 0.78, "grad_norm": 1.347717151095056, "learning_rate": 2.387501954563225e-06, "loss": 0.5304, "step": 10082 }, { "epoch": 0.78, "grad_norm": 1.235878957719505, "learning_rate": 2.385872831369411e-06, "loss": 0.4912, "step": 10083 }, { "epoch": 0.78, "grad_norm": 1.2240811658326685, "learning_rate": 2.38424418889473e-06, "loss": 0.4682, "step": 10084 }, { "epoch": 0.78, "grad_norm": 1.2674457968835273, "learning_rate": 2.382616027242005e-06, "loss": 0.5136, "step": 10085 }, { "epoch": 0.78, "grad_norm": 1.2216633430442685, "learning_rate": 2.3809883465140304e-06, "loss": 0.461, "step": 10086 }, { "epoch": 0.78, "grad_norm": 1.2392178913056242, "learning_rate": 2.3793611468135734e-06, "loss": 0.5249, "step": 10087 }, { "epoch": 0.78, "grad_norm": 1.1419433395405967, "learning_rate": 2.3777344282433645e-06, "loss": 0.4885, "step": 10088 }, { "epoch": 0.78, "grad_norm": 1.1360705123327532, "learning_rate": 2.3761081909061036e-06, "loss": 0.4726, "step": 10089 }, { "epoch": 0.78, "grad_norm": 1.2173158080965867, "learning_rate": 2.374482434904467e-06, "loss": 0.5024, "step": 10090 }, { "epoch": 0.78, "grad_norm": 1.2974416115472984, "learning_rate": 2.372857160341098e-06, "loss": 0.4977, "step": 10091 }, { "epoch": 0.78, "grad_norm": 1.209564217838977, "learning_rate": 2.371232367318609e-06, "loss": 0.5027, "step": 10092 }, { "epoch": 0.78, "grad_norm": 1.1404234303078846, "learning_rate": 2.369608055939576e-06, "loss": 0.464, "step": 10093 }, { "epoch": 0.78, "grad_norm": 1.2176849895988486, "learning_rate": 2.3679842263065554e-06, "loss": 0.5141, "step": 10094 }, { "epoch": 0.78, "grad_norm": 1.1304284099770816, "learning_rate": 2.366360878522067e-06, "loss": 0.5312, "step": 10095 }, { "epoch": 0.78, "grad_norm": 1.2151739524208165, "learning_rate": 2.3647380126886033e-06, "loss": 0.4729, "step": 10096 }, { "epoch": 0.78, "grad_norm": 1.0972838397906044, "learning_rate": 2.363115628908619e-06, "loss": 0.4701, "step": 10097 }, { "epoch": 0.78, "grad_norm": 1.2570521739975058, "learning_rate": 2.3614937272845484e-06, "loss": 0.5217, "step": 10098 }, { "epoch": 0.78, "grad_norm": 1.1563948463235374, "learning_rate": 2.3598723079187848e-06, "loss": 0.4791, "step": 10099 }, { "epoch": 0.78, "grad_norm": 1.211298685660256, "learning_rate": 2.358251370913701e-06, "loss": 0.4732, "step": 10100 }, { "epoch": 0.78, "grad_norm": 1.0808573941680326, "learning_rate": 2.356630916371635e-06, "loss": 0.5037, "step": 10101 }, { "epoch": 0.78, "grad_norm": 1.2364363540114214, "learning_rate": 2.3550109443948967e-06, "loss": 0.501, "step": 10102 }, { "epoch": 0.78, "grad_norm": 1.0159774755511717, "learning_rate": 2.353391455085756e-06, "loss": 0.4007, "step": 10103 }, { "epoch": 0.78, "grad_norm": 1.1697519016499072, "learning_rate": 2.3517724485464655e-06, "loss": 0.4867, "step": 10104 }, { "epoch": 0.78, "grad_norm": 1.248692639458742, "learning_rate": 2.3501539248792406e-06, "loss": 0.502, "step": 10105 }, { "epoch": 0.78, "grad_norm": 1.1643580119213923, "learning_rate": 2.348535884186267e-06, "loss": 0.4728, "step": 10106 }, { "epoch": 0.78, "grad_norm": 1.188584585303675, "learning_rate": 2.3469183265696984e-06, "loss": 0.4422, "step": 10107 }, { "epoch": 0.78, "grad_norm": 1.2102148545458833, "learning_rate": 2.3453012521316633e-06, "loss": 0.4905, "step": 10108 }, { "epoch": 0.78, "grad_norm": 1.209539184465441, "learning_rate": 2.343684660974249e-06, "loss": 0.5305, "step": 10109 }, { "epoch": 0.78, "grad_norm": 1.075547828366324, "learning_rate": 2.3420685531995247e-06, "loss": 0.4604, "step": 10110 }, { "epoch": 0.78, "grad_norm": 1.1997112045867584, "learning_rate": 2.340452928909522e-06, "loss": 0.5389, "step": 10111 }, { "epoch": 0.78, "grad_norm": 1.2543143205549179, "learning_rate": 2.3388377882062472e-06, "loss": 0.5065, "step": 10112 }, { "epoch": 0.78, "grad_norm": 1.1929581337526975, "learning_rate": 2.337223131191666e-06, "loss": 0.4766, "step": 10113 }, { "epoch": 0.78, "grad_norm": 1.1575132634533807, "learning_rate": 2.335608957967723e-06, "loss": 0.4926, "step": 10114 }, { "epoch": 0.78, "grad_norm": 1.3458825422252627, "learning_rate": 2.33399526863633e-06, "loss": 0.5577, "step": 10115 }, { "epoch": 0.78, "grad_norm": 1.1670118513804872, "learning_rate": 2.33238206329937e-06, "loss": 0.4899, "step": 10116 }, { "epoch": 0.78, "grad_norm": 1.1514919947827544, "learning_rate": 2.3307693420586873e-06, "loss": 0.4623, "step": 10117 }, { "epoch": 0.78, "grad_norm": 1.1456185341843501, "learning_rate": 2.3291571050161066e-06, "loss": 0.4777, "step": 10118 }, { "epoch": 0.79, "grad_norm": 1.1668270999862007, "learning_rate": 2.3275453522734116e-06, "loss": 0.5206, "step": 10119 }, { "epoch": 0.79, "grad_norm": 1.2782400643708445, "learning_rate": 2.325934083932362e-06, "loss": 0.5409, "step": 10120 }, { "epoch": 0.79, "grad_norm": 1.11572265625, "learning_rate": 2.3243233000946874e-06, "loss": 0.4567, "step": 10121 }, { "epoch": 0.79, "grad_norm": 1.1744299946688732, "learning_rate": 2.3227130008620847e-06, "loss": 0.4407, "step": 10122 }, { "epoch": 0.79, "grad_norm": 1.2944843824762486, "learning_rate": 2.321103186336222e-06, "loss": 0.5814, "step": 10123 }, { "epoch": 0.79, "grad_norm": 1.3216213984071001, "learning_rate": 2.319493856618731e-06, "loss": 0.5074, "step": 10124 }, { "epoch": 0.79, "grad_norm": 1.1083881663380104, "learning_rate": 2.3178850118112185e-06, "loss": 0.4991, "step": 10125 }, { "epoch": 0.79, "grad_norm": 1.3344712269590773, "learning_rate": 2.3162766520152624e-06, "loss": 0.5435, "step": 10126 }, { "epoch": 0.79, "grad_norm": 1.136803482935145, "learning_rate": 2.3146687773324017e-06, "loss": 0.4609, "step": 10127 }, { "epoch": 0.79, "grad_norm": 1.1339425074318483, "learning_rate": 2.313061387864155e-06, "loss": 0.4937, "step": 10128 }, { "epoch": 0.79, "grad_norm": 1.2276166528717671, "learning_rate": 2.311454483711999e-06, "loss": 0.466, "step": 10129 }, { "epoch": 0.79, "grad_norm": 1.2216988614633686, "learning_rate": 2.309848064977389e-06, "loss": 0.5624, "step": 10130 }, { "epoch": 0.79, "grad_norm": 1.1410175327388248, "learning_rate": 2.3082421317617463e-06, "loss": 0.5206, "step": 10131 }, { "epoch": 0.79, "grad_norm": 1.1642994991442626, "learning_rate": 2.3066366841664633e-06, "loss": 0.5073, "step": 10132 }, { "epoch": 0.79, "grad_norm": 1.2936334658858177, "learning_rate": 2.3050317222929006e-06, "loss": 0.4881, "step": 10133 }, { "epoch": 0.79, "grad_norm": 1.218495611393429, "learning_rate": 2.3034272462423846e-06, "loss": 0.4935, "step": 10134 }, { "epoch": 0.79, "grad_norm": 1.191787958752573, "learning_rate": 2.3018232561162144e-06, "loss": 0.5166, "step": 10135 }, { "epoch": 0.79, "grad_norm": 1.2516170532681377, "learning_rate": 2.3002197520156634e-06, "loss": 0.5487, "step": 10136 }, { "epoch": 0.79, "grad_norm": 1.0504019240469935, "learning_rate": 2.2986167340419606e-06, "loss": 0.472, "step": 10137 }, { "epoch": 0.79, "grad_norm": 1.1487923936627922, "learning_rate": 2.2970142022963216e-06, "loss": 0.4904, "step": 10138 }, { "epoch": 0.79, "grad_norm": 1.2419663718273535, "learning_rate": 2.295412156879915e-06, "loss": 0.5323, "step": 10139 }, { "epoch": 0.79, "grad_norm": 1.2529952403021525, "learning_rate": 2.2938105978938897e-06, "loss": 0.5181, "step": 10140 }, { "epoch": 0.79, "grad_norm": 1.1726901461831587, "learning_rate": 2.2922095254393594e-06, "loss": 0.4811, "step": 10141 }, { "epoch": 0.79, "grad_norm": 1.2230176787545148, "learning_rate": 2.290608939617408e-06, "loss": 0.5283, "step": 10142 }, { "epoch": 0.79, "grad_norm": 1.2528957204756141, "learning_rate": 2.2890088405290935e-06, "loss": 0.4803, "step": 10143 }, { "epoch": 0.79, "grad_norm": 1.1980383216893382, "learning_rate": 2.2874092282754313e-06, "loss": 0.5198, "step": 10144 }, { "epoch": 0.79, "grad_norm": 1.1562906722055037, "learning_rate": 2.2858101029574164e-06, "loss": 0.4644, "step": 10145 }, { "epoch": 0.79, "grad_norm": 1.1054367761739514, "learning_rate": 2.284211464676013e-06, "loss": 0.4602, "step": 10146 }, { "epoch": 0.79, "grad_norm": 1.1415859511770745, "learning_rate": 2.2826133135321437e-06, "loss": 0.4697, "step": 10147 }, { "epoch": 0.79, "grad_norm": 1.2473425750937144, "learning_rate": 2.2810156496267165e-06, "loss": 0.5214, "step": 10148 }, { "epoch": 0.79, "grad_norm": 1.2849709977316985, "learning_rate": 2.2794184730605926e-06, "loss": 0.5002, "step": 10149 }, { "epoch": 0.79, "grad_norm": 1.230826867684282, "learning_rate": 2.277821783934614e-06, "loss": 0.5676, "step": 10150 }, { "epoch": 0.79, "grad_norm": 1.1926225300983107, "learning_rate": 2.276225582349587e-06, "loss": 0.4976, "step": 10151 }, { "epoch": 0.79, "grad_norm": 1.2403045874718333, "learning_rate": 2.2746298684062884e-06, "loss": 0.5158, "step": 10152 }, { "epoch": 0.79, "grad_norm": 1.2616096662010758, "learning_rate": 2.273034642205467e-06, "loss": 0.5316, "step": 10153 }, { "epoch": 0.79, "grad_norm": 1.1504857488658893, "learning_rate": 2.2714399038478317e-06, "loss": 0.4866, "step": 10154 }, { "epoch": 0.79, "grad_norm": 1.1106666245439514, "learning_rate": 2.2698456534340694e-06, "loss": 0.4931, "step": 10155 }, { "epoch": 0.79, "grad_norm": 1.2648908110375974, "learning_rate": 2.2682518910648353e-06, "loss": 0.5406, "step": 10156 }, { "epoch": 0.79, "grad_norm": 1.0745793517621873, "learning_rate": 2.266658616840748e-06, "loss": 0.4133, "step": 10157 }, { "epoch": 0.79, "grad_norm": 1.27854405740696, "learning_rate": 2.265065830862404e-06, "loss": 0.5401, "step": 10158 }, { "epoch": 0.79, "grad_norm": 1.3219401692948791, "learning_rate": 2.2634735332303583e-06, "loss": 0.5505, "step": 10159 }, { "epoch": 0.79, "grad_norm": 1.2212095141069266, "learning_rate": 2.261881724045143e-06, "loss": 0.4825, "step": 10160 }, { "epoch": 0.79, "grad_norm": 1.091805118536494, "learning_rate": 2.2602904034072583e-06, "loss": 0.4823, "step": 10161 }, { "epoch": 0.79, "grad_norm": 1.2066577716274733, "learning_rate": 2.2586995714171712e-06, "loss": 0.5196, "step": 10162 }, { "epoch": 0.79, "grad_norm": 1.2136639327423837, "learning_rate": 2.257109228175324e-06, "loss": 0.5029, "step": 10163 }, { "epoch": 0.79, "grad_norm": 1.1878321333750181, "learning_rate": 2.2555193737821156e-06, "loss": 0.5324, "step": 10164 }, { "epoch": 0.79, "grad_norm": 1.3371091021872668, "learning_rate": 2.253930008337927e-06, "loss": 0.5146, "step": 10165 }, { "epoch": 0.79, "grad_norm": 1.2784012549574402, "learning_rate": 2.252341131943102e-06, "loss": 0.5207, "step": 10166 }, { "epoch": 0.79, "grad_norm": 1.1155664914814352, "learning_rate": 2.250752744697953e-06, "loss": 0.541, "step": 10167 }, { "epoch": 0.79, "grad_norm": 1.193134892354234, "learning_rate": 2.249164846702766e-06, "loss": 0.4344, "step": 10168 }, { "epoch": 0.79, "grad_norm": 1.227594852317764, "learning_rate": 2.247577438057789e-06, "loss": 0.5493, "step": 10169 }, { "epoch": 0.79, "grad_norm": 1.2751448249842616, "learning_rate": 2.2459905188632446e-06, "loss": 0.5256, "step": 10170 }, { "epoch": 0.79, "grad_norm": 1.2167122261888477, "learning_rate": 2.244404089219325e-06, "loss": 0.5444, "step": 10171 }, { "epoch": 0.79, "grad_norm": 1.2273949392074224, "learning_rate": 2.242818149226189e-06, "loss": 0.5473, "step": 10172 }, { "epoch": 0.79, "grad_norm": 1.2131404895766427, "learning_rate": 2.2412326989839673e-06, "loss": 0.489, "step": 10173 }, { "epoch": 0.79, "grad_norm": 1.165382046986016, "learning_rate": 2.2396477385927527e-06, "loss": 0.4718, "step": 10174 }, { "epoch": 0.79, "grad_norm": 1.4393539083958824, "learning_rate": 2.238063268152615e-06, "loss": 0.5621, "step": 10175 }, { "epoch": 0.79, "grad_norm": 1.1952756489577088, "learning_rate": 2.236479287763591e-06, "loss": 0.5411, "step": 10176 }, { "epoch": 0.79, "grad_norm": 1.2702662761015613, "learning_rate": 2.2348957975256825e-06, "loss": 0.52, "step": 10177 }, { "epoch": 0.79, "grad_norm": 1.2513012788461038, "learning_rate": 2.2333127975388646e-06, "loss": 0.5292, "step": 10178 }, { "epoch": 0.79, "grad_norm": 1.2521675866459296, "learning_rate": 2.2317302879030833e-06, "loss": 0.5621, "step": 10179 }, { "epoch": 0.79, "grad_norm": 1.13786158840586, "learning_rate": 2.230148268718245e-06, "loss": 0.4824, "step": 10180 }, { "epoch": 0.79, "grad_norm": 1.173078046620181, "learning_rate": 2.228566740084234e-06, "loss": 0.4786, "step": 10181 }, { "epoch": 0.79, "grad_norm": 1.1558363406511902, "learning_rate": 2.2269857021009e-06, "loss": 0.5146, "step": 10182 }, { "epoch": 0.79, "grad_norm": 1.1292032029456813, "learning_rate": 2.225405154868062e-06, "loss": 0.5, "step": 10183 }, { "epoch": 0.79, "grad_norm": 1.1289229342273814, "learning_rate": 2.2238250984855112e-06, "loss": 0.4808, "step": 10184 }, { "epoch": 0.79, "grad_norm": 1.2538177839308264, "learning_rate": 2.2222455330529993e-06, "loss": 0.5224, "step": 10185 }, { "epoch": 0.79, "grad_norm": 1.3138769737753615, "learning_rate": 2.2206664586702566e-06, "loss": 0.5375, "step": 10186 }, { "epoch": 0.79, "grad_norm": 1.1931142602100457, "learning_rate": 2.219087875436974e-06, "loss": 0.4765, "step": 10187 }, { "epoch": 0.79, "grad_norm": 1.145986211577279, "learning_rate": 2.2175097834528183e-06, "loss": 0.4783, "step": 10188 }, { "epoch": 0.79, "grad_norm": 1.100529883378034, "learning_rate": 2.2159321828174252e-06, "loss": 0.4952, "step": 10189 }, { "epoch": 0.79, "grad_norm": 1.1984984878479574, "learning_rate": 2.214355073630391e-06, "loss": 0.464, "step": 10190 }, { "epoch": 0.79, "grad_norm": 1.195610408185424, "learning_rate": 2.212778455991289e-06, "loss": 0.4916, "step": 10191 }, { "epoch": 0.79, "grad_norm": 1.2169761464422881, "learning_rate": 2.211202329999661e-06, "loss": 0.4983, "step": 10192 }, { "epoch": 0.79, "grad_norm": 1.2559834800137961, "learning_rate": 2.2096266957550138e-06, "loss": 0.5574, "step": 10193 }, { "epoch": 0.79, "grad_norm": 1.1271198750963534, "learning_rate": 2.208051553356829e-06, "loss": 0.4698, "step": 10194 }, { "epoch": 0.79, "grad_norm": 1.1662122091076532, "learning_rate": 2.2064769029045497e-06, "loss": 0.497, "step": 10195 }, { "epoch": 0.79, "grad_norm": 1.2296533698613163, "learning_rate": 2.2049027444975934e-06, "loss": 0.5144, "step": 10196 }, { "epoch": 0.79, "grad_norm": 1.2364310030446166, "learning_rate": 2.2033290782353434e-06, "loss": 0.5392, "step": 10197 }, { "epoch": 0.79, "grad_norm": 1.1779016277935124, "learning_rate": 2.2017559042171534e-06, "loss": 0.5042, "step": 10198 }, { "epoch": 0.79, "grad_norm": 1.2021413968683448, "learning_rate": 2.2001832225423493e-06, "loss": 0.5654, "step": 10199 }, { "epoch": 0.79, "grad_norm": 1.1834056440386747, "learning_rate": 2.1986110333102175e-06, "loss": 0.4727, "step": 10200 }, { "epoch": 0.79, "grad_norm": 1.331240198161974, "learning_rate": 2.1970393366200216e-06, "loss": 0.479, "step": 10201 }, { "epoch": 0.79, "grad_norm": 1.1756101971778818, "learning_rate": 2.195468132570989e-06, "loss": 0.4601, "step": 10202 }, { "epoch": 0.79, "grad_norm": 1.0827746540187844, "learning_rate": 2.193897421262321e-06, "loss": 0.4897, "step": 10203 }, { "epoch": 0.79, "grad_norm": 1.1487147716645416, "learning_rate": 2.1923272027931853e-06, "loss": 0.4507, "step": 10204 }, { "epoch": 0.79, "grad_norm": 1.2662310620894188, "learning_rate": 2.1907574772627116e-06, "loss": 0.531, "step": 10205 }, { "epoch": 0.79, "grad_norm": 1.2257299331027671, "learning_rate": 2.189188244770013e-06, "loss": 0.4869, "step": 10206 }, { "epoch": 0.79, "grad_norm": 1.1055489770468852, "learning_rate": 2.187619505414156e-06, "loss": 0.5016, "step": 10207 }, { "epoch": 0.79, "grad_norm": 1.2545303265283958, "learning_rate": 2.186051259294185e-06, "loss": 0.52, "step": 10208 }, { "epoch": 0.79, "grad_norm": 1.1895532424960171, "learning_rate": 2.1844835065091165e-06, "loss": 0.5058, "step": 10209 }, { "epoch": 0.79, "grad_norm": 1.190883634415636, "learning_rate": 2.1829162471579234e-06, "loss": 0.5163, "step": 10210 }, { "epoch": 0.79, "grad_norm": 1.0893931123734641, "learning_rate": 2.18134948133956e-06, "loss": 0.4837, "step": 10211 }, { "epoch": 0.79, "grad_norm": 1.1292518166017984, "learning_rate": 2.1797832091529414e-06, "loss": 0.5445, "step": 10212 }, { "epoch": 0.79, "grad_norm": 1.1243821142712682, "learning_rate": 2.178217430696956e-06, "loss": 0.5161, "step": 10213 }, { "epoch": 0.79, "grad_norm": 1.0830331838953589, "learning_rate": 2.1766521460704627e-06, "loss": 0.4431, "step": 10214 }, { "epoch": 0.79, "grad_norm": 1.2119092918136107, "learning_rate": 2.1750873553722796e-06, "loss": 0.4775, "step": 10215 }, { "epoch": 0.79, "grad_norm": 1.0401988305247927, "learning_rate": 2.1735230587012057e-06, "loss": 0.4159, "step": 10216 }, { "epoch": 0.79, "grad_norm": 1.318567737079112, "learning_rate": 2.1719592561559978e-06, "loss": 0.5214, "step": 10217 }, { "epoch": 0.79, "grad_norm": 1.0961440995847413, "learning_rate": 2.1703959478353886e-06, "loss": 0.4706, "step": 10218 }, { "epoch": 0.79, "grad_norm": 1.2243958793600707, "learning_rate": 2.168833133838082e-06, "loss": 0.4916, "step": 10219 }, { "epoch": 0.79, "grad_norm": 1.2505341818956475, "learning_rate": 2.167270814262741e-06, "loss": 0.4605, "step": 10220 }, { "epoch": 0.79, "grad_norm": 1.3083759639055283, "learning_rate": 2.165708989208004e-06, "loss": 0.4989, "step": 10221 }, { "epoch": 0.79, "grad_norm": 1.321453932846483, "learning_rate": 2.1641476587724784e-06, "loss": 0.5485, "step": 10222 }, { "epoch": 0.79, "grad_norm": 1.3265343958525244, "learning_rate": 2.16258682305474e-06, "loss": 0.5625, "step": 10223 }, { "epoch": 0.79, "grad_norm": 1.1665851984643831, "learning_rate": 2.1610264821533323e-06, "loss": 0.5387, "step": 10224 }, { "epoch": 0.79, "grad_norm": 1.1498525296899968, "learning_rate": 2.159466636166765e-06, "loss": 0.508, "step": 10225 }, { "epoch": 0.79, "grad_norm": 1.2169015511788683, "learning_rate": 2.1579072851935222e-06, "loss": 0.5239, "step": 10226 }, { "epoch": 0.79, "grad_norm": 1.3006873111197723, "learning_rate": 2.1563484293320503e-06, "loss": 0.5655, "step": 10227 }, { "epoch": 0.79, "grad_norm": 1.2583264077019156, "learning_rate": 2.154790068680771e-06, "loss": 0.4888, "step": 10228 }, { "epoch": 0.79, "grad_norm": 1.3641088209119965, "learning_rate": 2.1532322033380725e-06, "loss": 0.5536, "step": 10229 }, { "epoch": 0.79, "grad_norm": 1.2896213591623045, "learning_rate": 2.1516748334023065e-06, "loss": 0.5742, "step": 10230 }, { "epoch": 0.79, "grad_norm": 1.266656996455841, "learning_rate": 2.1501179589717993e-06, "loss": 0.558, "step": 10231 }, { "epoch": 0.79, "grad_norm": 1.23049209814778, "learning_rate": 2.148561580144847e-06, "loss": 0.5469, "step": 10232 }, { "epoch": 0.79, "grad_norm": 1.083509461439479, "learning_rate": 2.1470056970197085e-06, "loss": 0.489, "step": 10233 }, { "epoch": 0.79, "grad_norm": 1.1973030619656757, "learning_rate": 2.14545030969462e-06, "loss": 0.5326, "step": 10234 }, { "epoch": 0.79, "grad_norm": 1.1525056369668465, "learning_rate": 2.143895418267775e-06, "loss": 0.532, "step": 10235 }, { "epoch": 0.79, "grad_norm": 1.2896594890402306, "learning_rate": 2.1423410228373477e-06, "loss": 0.5312, "step": 10236 }, { "epoch": 0.79, "grad_norm": 1.1524543838410315, "learning_rate": 2.1407871235014675e-06, "loss": 0.4639, "step": 10237 }, { "epoch": 0.79, "grad_norm": 1.3704034094412325, "learning_rate": 2.139233720358246e-06, "loss": 0.5722, "step": 10238 }, { "epoch": 0.79, "grad_norm": 1.2197110714932773, "learning_rate": 2.137680813505756e-06, "loss": 0.5238, "step": 10239 }, { "epoch": 0.79, "grad_norm": 1.23459961211722, "learning_rate": 2.1361284030420416e-06, "loss": 0.5142, "step": 10240 }, { "epoch": 0.79, "grad_norm": 1.1625394650395113, "learning_rate": 2.1345764890651123e-06, "loss": 0.4782, "step": 10241 }, { "epoch": 0.79, "grad_norm": 1.232935440257451, "learning_rate": 2.1330250716729484e-06, "loss": 0.4937, "step": 10242 }, { "epoch": 0.79, "grad_norm": 1.0461478483585889, "learning_rate": 2.1314741509635007e-06, "loss": 0.4287, "step": 10243 }, { "epoch": 0.79, "grad_norm": 1.117240317636678, "learning_rate": 2.1299237270346885e-06, "loss": 0.4419, "step": 10244 }, { "epoch": 0.79, "grad_norm": 1.0641729983681847, "learning_rate": 2.1283737999843922e-06, "loss": 0.4996, "step": 10245 }, { "epoch": 0.79, "grad_norm": 1.2339668081880149, "learning_rate": 2.126824369910474e-06, "loss": 0.5177, "step": 10246 }, { "epoch": 0.79, "grad_norm": 1.0542395311486665, "learning_rate": 2.125275436910751e-06, "loss": 0.4682, "step": 10247 }, { "epoch": 0.8, "grad_norm": 1.286338332083872, "learning_rate": 2.123727001083017e-06, "loss": 0.5072, "step": 10248 }, { "epoch": 0.8, "grad_norm": 1.1217494203631044, "learning_rate": 2.1221790625250336e-06, "loss": 0.477, "step": 10249 }, { "epoch": 0.8, "grad_norm": 1.1588988506260418, "learning_rate": 2.1206316213345334e-06, "loss": 0.5209, "step": 10250 }, { "epoch": 0.8, "grad_norm": 1.3158661048308031, "learning_rate": 2.119084677609209e-06, "loss": 0.4853, "step": 10251 }, { "epoch": 0.8, "grad_norm": 1.2928356090579884, "learning_rate": 2.1175382314467285e-06, "loss": 0.4804, "step": 10252 }, { "epoch": 0.8, "grad_norm": 1.230745653754229, "learning_rate": 2.1159922829447267e-06, "loss": 0.4912, "step": 10253 }, { "epoch": 0.8, "grad_norm": 1.186337956503034, "learning_rate": 2.1144468322008125e-06, "loss": 0.5167, "step": 10254 }, { "epoch": 0.8, "grad_norm": 1.1992578655412016, "learning_rate": 2.11290187931255e-06, "loss": 0.5538, "step": 10255 }, { "epoch": 0.8, "grad_norm": 1.1897909752606575, "learning_rate": 2.111357424377487e-06, "loss": 0.5504, "step": 10256 }, { "epoch": 0.8, "grad_norm": 1.3010672406670374, "learning_rate": 2.109813467493127e-06, "loss": 0.5576, "step": 10257 }, { "epoch": 0.8, "grad_norm": 1.2899843560977982, "learning_rate": 2.1082700087569517e-06, "loss": 0.585, "step": 10258 }, { "epoch": 0.8, "grad_norm": 1.2554375160951472, "learning_rate": 2.106727048266406e-06, "loss": 0.5399, "step": 10259 }, { "epoch": 0.8, "grad_norm": 1.2978699326201804, "learning_rate": 2.105184586118908e-06, "loss": 0.5437, "step": 10260 }, { "epoch": 0.8, "grad_norm": 1.1338721745114873, "learning_rate": 2.1036426224118366e-06, "loss": 0.4785, "step": 10261 }, { "epoch": 0.8, "grad_norm": 1.2578823473000358, "learning_rate": 2.1021011572425466e-06, "loss": 0.4409, "step": 10262 }, { "epoch": 0.8, "grad_norm": 1.1481951503265189, "learning_rate": 2.1005601907083585e-06, "loss": 0.4557, "step": 10263 }, { "epoch": 0.8, "grad_norm": 1.1707571674610573, "learning_rate": 2.0990197229065636e-06, "loss": 0.4925, "step": 10264 }, { "epoch": 0.8, "grad_norm": 1.3843083453869778, "learning_rate": 2.097479753934415e-06, "loss": 0.526, "step": 10265 }, { "epoch": 0.8, "grad_norm": 1.0259899894493696, "learning_rate": 2.095940283889143e-06, "loss": 0.4505, "step": 10266 }, { "epoch": 0.8, "grad_norm": 1.3241290846993647, "learning_rate": 2.0944013128679385e-06, "loss": 0.5724, "step": 10267 }, { "epoch": 0.8, "grad_norm": 1.1714965209127777, "learning_rate": 2.092862840967966e-06, "loss": 0.5172, "step": 10268 }, { "epoch": 0.8, "grad_norm": 1.181992508378791, "learning_rate": 2.0913248682863583e-06, "loss": 0.4796, "step": 10269 }, { "epoch": 0.8, "grad_norm": 1.1627365343375886, "learning_rate": 2.0897873949202175e-06, "loss": 0.53, "step": 10270 }, { "epoch": 0.8, "grad_norm": 1.1728276259038721, "learning_rate": 2.088250420966608e-06, "loss": 0.4762, "step": 10271 }, { "epoch": 0.8, "grad_norm": 1.196731224089679, "learning_rate": 2.086713946522567e-06, "loss": 0.5219, "step": 10272 }, { "epoch": 0.8, "grad_norm": 1.1620583402823577, "learning_rate": 2.085177971685103e-06, "loss": 0.5089, "step": 10273 }, { "epoch": 0.8, "grad_norm": 1.1714205560094304, "learning_rate": 2.0836424965511913e-06, "loss": 0.4839, "step": 10274 }, { "epoch": 0.8, "grad_norm": 1.1311805282252312, "learning_rate": 2.082107521217769e-06, "loss": 0.456, "step": 10275 }, { "epoch": 0.8, "grad_norm": 1.253755221674534, "learning_rate": 2.080573045781753e-06, "loss": 0.4884, "step": 10276 }, { "epoch": 0.8, "grad_norm": 1.0926340677330009, "learning_rate": 2.0790390703400164e-06, "loss": 0.4614, "step": 10277 }, { "epoch": 0.8, "grad_norm": 1.240784050189275, "learning_rate": 2.0775055949894096e-06, "loss": 0.5123, "step": 10278 }, { "epoch": 0.8, "grad_norm": 1.2754458171591807, "learning_rate": 2.0759726198267495e-06, "loss": 0.535, "step": 10279 }, { "epoch": 0.8, "grad_norm": 1.2359064959122852, "learning_rate": 2.0744401449488238e-06, "loss": 0.5527, "step": 10280 }, { "epoch": 0.8, "grad_norm": 1.095415482189708, "learning_rate": 2.072908170452379e-06, "loss": 0.4655, "step": 10281 }, { "epoch": 0.8, "grad_norm": 1.235275278261656, "learning_rate": 2.07137669643414e-06, "loss": 0.5061, "step": 10282 }, { "epoch": 0.8, "grad_norm": 1.1767587748796635, "learning_rate": 2.0698457229907966e-06, "loss": 0.4943, "step": 10283 }, { "epoch": 0.8, "grad_norm": 1.1228389647051331, "learning_rate": 2.0683152502190095e-06, "loss": 0.5053, "step": 10284 }, { "epoch": 0.8, "grad_norm": 1.1843336958715984, "learning_rate": 2.0667852782153996e-06, "loss": 0.4801, "step": 10285 }, { "epoch": 0.8, "grad_norm": 1.2250850064113754, "learning_rate": 2.0652558070765682e-06, "loss": 0.5531, "step": 10286 }, { "epoch": 0.8, "grad_norm": 1.1554970738445292, "learning_rate": 2.0637268368990727e-06, "loss": 0.4522, "step": 10287 }, { "epoch": 0.8, "grad_norm": 1.1650304674085714, "learning_rate": 2.0621983677794486e-06, "loss": 0.5014, "step": 10288 }, { "epoch": 0.8, "grad_norm": 1.2780542293439354, "learning_rate": 2.0606703998141942e-06, "loss": 0.501, "step": 10289 }, { "epoch": 0.8, "grad_norm": 1.1134729588397931, "learning_rate": 2.0591429330997793e-06, "loss": 0.465, "step": 10290 }, { "epoch": 0.8, "grad_norm": 1.095193836352529, "learning_rate": 2.0576159677326437e-06, "loss": 0.468, "step": 10291 }, { "epoch": 0.8, "grad_norm": 1.2125889008169002, "learning_rate": 2.0560895038091865e-06, "loss": 0.4905, "step": 10292 }, { "epoch": 0.8, "grad_norm": 1.2807207177333237, "learning_rate": 2.0545635414257835e-06, "loss": 0.5412, "step": 10293 }, { "epoch": 0.8, "grad_norm": 1.1189182986039712, "learning_rate": 2.053038080678781e-06, "loss": 0.4888, "step": 10294 }, { "epoch": 0.8, "grad_norm": 1.2092515098746894, "learning_rate": 2.051513121664481e-06, "loss": 0.5595, "step": 10295 }, { "epoch": 0.8, "grad_norm": 1.3039500727983295, "learning_rate": 2.04998866447917e-06, "loss": 0.5679, "step": 10296 }, { "epoch": 0.8, "grad_norm": 1.185788276139838, "learning_rate": 2.048464709219089e-06, "loss": 0.5664, "step": 10297 }, { "epoch": 0.8, "grad_norm": 1.1052751674931791, "learning_rate": 2.046941255980456e-06, "loss": 0.5147, "step": 10298 }, { "epoch": 0.8, "grad_norm": 1.33807064432412, "learning_rate": 2.0454183048594524e-06, "loss": 0.5219, "step": 10299 }, { "epoch": 0.8, "grad_norm": 1.2498117782026792, "learning_rate": 2.0438958559522314e-06, "loss": 0.5476, "step": 10300 }, { "epoch": 0.8, "grad_norm": 1.2121789290854608, "learning_rate": 2.042373909354917e-06, "loss": 0.4863, "step": 10301 }, { "epoch": 0.8, "grad_norm": 1.2257827418399094, "learning_rate": 2.040852465163591e-06, "loss": 0.4721, "step": 10302 }, { "epoch": 0.8, "grad_norm": 1.2895661728332928, "learning_rate": 2.0393315234743116e-06, "loss": 0.4944, "step": 10303 }, { "epoch": 0.8, "grad_norm": 1.182525152378456, "learning_rate": 2.0378110843831077e-06, "loss": 0.53, "step": 10304 }, { "epoch": 0.8, "grad_norm": 1.2645572349624634, "learning_rate": 2.036291147985967e-06, "loss": 0.5323, "step": 10305 }, { "epoch": 0.8, "grad_norm": 1.1360488962913127, "learning_rate": 2.034771714378857e-06, "loss": 0.4909, "step": 10306 }, { "epoch": 0.8, "grad_norm": 1.1664564363346435, "learning_rate": 2.0332527836577e-06, "loss": 0.5156, "step": 10307 }, { "epoch": 0.8, "grad_norm": 1.1656944561021052, "learning_rate": 2.031734355918399e-06, "loss": 0.5147, "step": 10308 }, { "epoch": 0.8, "grad_norm": 1.2040978498935313, "learning_rate": 2.0302164312568175e-06, "loss": 0.4824, "step": 10309 }, { "epoch": 0.8, "grad_norm": 1.0852949587972098, "learning_rate": 2.028699009768792e-06, "loss": 0.4967, "step": 10310 }, { "epoch": 0.8, "grad_norm": 1.133489367084751, "learning_rate": 2.0271820915501273e-06, "loss": 0.4768, "step": 10311 }, { "epoch": 0.8, "grad_norm": 1.2591906751055966, "learning_rate": 2.025665676696589e-06, "loss": 0.5354, "step": 10312 }, { "epoch": 0.8, "grad_norm": 1.2247059371818034, "learning_rate": 2.0241497653039178e-06, "loss": 0.5186, "step": 10313 }, { "epoch": 0.8, "grad_norm": 1.214661710292141, "learning_rate": 2.0226343574678255e-06, "loss": 0.514, "step": 10314 }, { "epoch": 0.8, "grad_norm": 1.1841975519518038, "learning_rate": 2.0211194532839807e-06, "loss": 0.5078, "step": 10315 }, { "epoch": 0.8, "grad_norm": 1.1738868736760906, "learning_rate": 2.019605052848034e-06, "loss": 0.4821, "step": 10316 }, { "epoch": 0.8, "grad_norm": 1.1561511744974924, "learning_rate": 2.0180911562555904e-06, "loss": 0.5153, "step": 10317 }, { "epoch": 0.8, "grad_norm": 1.144552809997178, "learning_rate": 2.016577763602233e-06, "loss": 0.4528, "step": 10318 }, { "epoch": 0.8, "grad_norm": 1.1583653765758537, "learning_rate": 2.015064874983511e-06, "loss": 0.5034, "step": 10319 }, { "epoch": 0.8, "grad_norm": 1.1670531189146744, "learning_rate": 2.01355249049494e-06, "loss": 0.4809, "step": 10320 }, { "epoch": 0.8, "grad_norm": 1.149081562082276, "learning_rate": 2.012040610232008e-06, "loss": 0.4941, "step": 10321 }, { "epoch": 0.8, "grad_norm": 1.2771470504638869, "learning_rate": 2.0105292342901617e-06, "loss": 0.5262, "step": 10322 }, { "epoch": 0.8, "grad_norm": 1.1730759125785757, "learning_rate": 2.009018362764825e-06, "loss": 0.5145, "step": 10323 }, { "epoch": 0.8, "grad_norm": 1.2472225326324355, "learning_rate": 2.007507995751391e-06, "loss": 0.533, "step": 10324 }, { "epoch": 0.8, "grad_norm": 1.3087537069914892, "learning_rate": 2.005998133345208e-06, "loss": 0.531, "step": 10325 }, { "epoch": 0.8, "grad_norm": 1.1604079156587876, "learning_rate": 2.004488775641611e-06, "loss": 0.5015, "step": 10326 }, { "epoch": 0.8, "grad_norm": 1.405106779161079, "learning_rate": 2.002979922735886e-06, "loss": 0.5834, "step": 10327 }, { "epoch": 0.8, "grad_norm": 1.2972890353825752, "learning_rate": 2.001471574723298e-06, "loss": 0.549, "step": 10328 }, { "epoch": 0.8, "grad_norm": 1.1045161479910992, "learning_rate": 1.999963731699076e-06, "loss": 0.4414, "step": 10329 }, { "epoch": 0.8, "grad_norm": 1.1388594144144728, "learning_rate": 1.9984563937584177e-06, "loss": 0.4868, "step": 10330 }, { "epoch": 0.8, "grad_norm": 1.1982704414201537, "learning_rate": 1.996949560996494e-06, "loss": 0.5345, "step": 10331 }, { "epoch": 0.8, "grad_norm": 1.3591611408953175, "learning_rate": 1.9954432335084307e-06, "loss": 0.5535, "step": 10332 }, { "epoch": 0.8, "grad_norm": 1.1413411021978321, "learning_rate": 1.9939374113893353e-06, "loss": 0.4713, "step": 10333 }, { "epoch": 0.8, "grad_norm": 1.2577695661526864, "learning_rate": 1.992432094734279e-06, "loss": 0.5294, "step": 10334 }, { "epoch": 0.8, "grad_norm": 1.152397645886078, "learning_rate": 1.9909272836382955e-06, "loss": 0.4752, "step": 10335 }, { "epoch": 0.8, "grad_norm": 1.2084929865016871, "learning_rate": 1.9894229781963957e-06, "loss": 0.5307, "step": 10336 }, { "epoch": 0.8, "grad_norm": 1.184222466704197, "learning_rate": 1.9879191785035513e-06, "loss": 0.5068, "step": 10337 }, { "epoch": 0.8, "grad_norm": 1.288310299667858, "learning_rate": 1.9864158846547054e-06, "loss": 0.5117, "step": 10338 }, { "epoch": 0.8, "grad_norm": 1.1521021185522449, "learning_rate": 1.9849130967447693e-06, "loss": 0.4821, "step": 10339 }, { "epoch": 0.8, "grad_norm": 1.3211294502113327, "learning_rate": 1.9834108148686225e-06, "loss": 0.5224, "step": 10340 }, { "epoch": 0.8, "grad_norm": 1.126520877233239, "learning_rate": 1.9819090391211104e-06, "loss": 0.498, "step": 10341 }, { "epoch": 0.8, "grad_norm": 1.2187549884400615, "learning_rate": 1.9804077695970513e-06, "loss": 0.5028, "step": 10342 }, { "epoch": 0.8, "grad_norm": 1.3013953056887446, "learning_rate": 1.978907006391223e-06, "loss": 0.5243, "step": 10343 }, { "epoch": 0.8, "grad_norm": 1.222376002164524, "learning_rate": 1.977406749598382e-06, "loss": 0.5209, "step": 10344 }, { "epoch": 0.8, "grad_norm": 1.2206317361937802, "learning_rate": 1.9759069993132405e-06, "loss": 0.4998, "step": 10345 }, { "epoch": 0.8, "grad_norm": 1.1608321664498062, "learning_rate": 1.9744077556304885e-06, "loss": 0.4799, "step": 10346 }, { "epoch": 0.8, "grad_norm": 1.1636473152101807, "learning_rate": 1.9729090186447853e-06, "loss": 0.4657, "step": 10347 }, { "epoch": 0.8, "grad_norm": 1.2029562992869192, "learning_rate": 1.9714107884507474e-06, "loss": 0.5271, "step": 10348 }, { "epoch": 0.8, "grad_norm": 1.2204066534511777, "learning_rate": 1.9699130651429676e-06, "loss": 0.4997, "step": 10349 }, { "epoch": 0.8, "grad_norm": 1.1497398828640724, "learning_rate": 1.9684158488160065e-06, "loss": 0.5064, "step": 10350 }, { "epoch": 0.8, "grad_norm": 1.1631250382904588, "learning_rate": 1.9669191395643906e-06, "loss": 0.4446, "step": 10351 }, { "epoch": 0.8, "grad_norm": 1.1349997615393839, "learning_rate": 1.965422937482616e-06, "loss": 0.4859, "step": 10352 }, { "epoch": 0.8, "grad_norm": 1.0774769701344173, "learning_rate": 1.9639272426651435e-06, "loss": 0.4684, "step": 10353 }, { "epoch": 0.8, "grad_norm": 1.1835434402499982, "learning_rate": 1.962432055206406e-06, "loss": 0.4571, "step": 10354 }, { "epoch": 0.8, "grad_norm": 1.1445839514983547, "learning_rate": 1.9609373752008e-06, "loss": 0.5004, "step": 10355 }, { "epoch": 0.8, "grad_norm": 1.2275289142101145, "learning_rate": 1.9594432027426925e-06, "loss": 0.5227, "step": 10356 }, { "epoch": 0.8, "grad_norm": 1.2593028556662307, "learning_rate": 1.9579495379264223e-06, "loss": 0.4979, "step": 10357 }, { "epoch": 0.8, "grad_norm": 1.2159801699784512, "learning_rate": 1.9564563808462867e-06, "loss": 0.5156, "step": 10358 }, { "epoch": 0.8, "grad_norm": 1.2114540229036244, "learning_rate": 1.9549637315965587e-06, "loss": 0.5387, "step": 10359 }, { "epoch": 0.8, "grad_norm": 1.2060288399463288, "learning_rate": 1.9534715902714775e-06, "loss": 0.5223, "step": 10360 }, { "epoch": 0.8, "grad_norm": 1.2462118923899952, "learning_rate": 1.9519799569652498e-06, "loss": 0.5481, "step": 10361 }, { "epoch": 0.8, "grad_norm": 1.1920891977481711, "learning_rate": 1.9504888317720515e-06, "loss": 0.4916, "step": 10362 }, { "epoch": 0.8, "grad_norm": 1.337029930649945, "learning_rate": 1.948998214786022e-06, "loss": 0.5658, "step": 10363 }, { "epoch": 0.8, "grad_norm": 1.1651741198959462, "learning_rate": 1.9475081061012746e-06, "loss": 0.4734, "step": 10364 }, { "epoch": 0.8, "grad_norm": 1.0222954821134806, "learning_rate": 1.946018505811883e-06, "loss": 0.4284, "step": 10365 }, { "epoch": 0.8, "grad_norm": 1.1425155231247648, "learning_rate": 1.9445294140118965e-06, "loss": 0.4887, "step": 10366 }, { "epoch": 0.8, "grad_norm": 1.1424013180341783, "learning_rate": 1.9430408307953317e-06, "loss": 0.5349, "step": 10367 }, { "epoch": 0.8, "grad_norm": 1.3395171365157932, "learning_rate": 1.9415527562561655e-06, "loss": 0.5672, "step": 10368 }, { "epoch": 0.8, "grad_norm": 1.1631960620520938, "learning_rate": 1.9400651904883492e-06, "loss": 0.5056, "step": 10369 }, { "epoch": 0.8, "grad_norm": 1.1770906476268992, "learning_rate": 1.9385781335858014e-06, "loss": 0.4884, "step": 10370 }, { "epoch": 0.8, "grad_norm": 1.2783768235575272, "learning_rate": 1.937091585642408e-06, "loss": 0.5395, "step": 10371 }, { "epoch": 0.8, "grad_norm": 1.1769348264652402, "learning_rate": 1.935605546752023e-06, "loss": 0.5312, "step": 10372 }, { "epoch": 0.8, "grad_norm": 1.2309911194333034, "learning_rate": 1.934120017008465e-06, "loss": 0.5031, "step": 10373 }, { "epoch": 0.8, "grad_norm": 1.2335654392581403, "learning_rate": 1.932634996505528e-06, "loss": 0.4825, "step": 10374 }, { "epoch": 0.8, "grad_norm": 1.1826541305807252, "learning_rate": 1.931150485336962e-06, "loss": 0.5027, "step": 10375 }, { "epoch": 0.8, "grad_norm": 1.1497857619876597, "learning_rate": 1.9296664835964975e-06, "loss": 0.4593, "step": 10376 }, { "epoch": 0.81, "grad_norm": 1.2023675689420044, "learning_rate": 1.928182991377826e-06, "loss": 0.5154, "step": 10377 }, { "epoch": 0.81, "grad_norm": 1.1732740570307119, "learning_rate": 1.926700008774606e-06, "loss": 0.467, "step": 10378 }, { "epoch": 0.81, "grad_norm": 1.2388419918389142, "learning_rate": 1.9252175358804657e-06, "loss": 0.4995, "step": 10379 }, { "epoch": 0.81, "grad_norm": 1.2624740465725812, "learning_rate": 1.9237355727890037e-06, "loss": 0.525, "step": 10380 }, { "epoch": 0.81, "grad_norm": 1.2348135639890256, "learning_rate": 1.922254119593784e-06, "loss": 0.4428, "step": 10381 }, { "epoch": 0.81, "grad_norm": 1.1873731545412778, "learning_rate": 1.9207731763883388e-06, "loss": 0.515, "step": 10382 }, { "epoch": 0.81, "grad_norm": 1.393612855741153, "learning_rate": 1.9192927432661645e-06, "loss": 0.5578, "step": 10383 }, { "epoch": 0.81, "grad_norm": 1.1886440588497955, "learning_rate": 1.9178128203207324e-06, "loss": 0.5003, "step": 10384 }, { "epoch": 0.81, "grad_norm": 1.2326494541819835, "learning_rate": 1.916333407645472e-06, "loss": 0.5637, "step": 10385 }, { "epoch": 0.81, "grad_norm": 1.233964441325004, "learning_rate": 1.914854505333791e-06, "loss": 0.4889, "step": 10386 }, { "epoch": 0.81, "grad_norm": 1.2268538402378648, "learning_rate": 1.9133761134790618e-06, "loss": 0.5438, "step": 10387 }, { "epoch": 0.81, "grad_norm": 1.0855906090260632, "learning_rate": 1.9118982321746173e-06, "loss": 0.4048, "step": 10388 }, { "epoch": 0.81, "grad_norm": 1.2265365014692144, "learning_rate": 1.9104208615137654e-06, "loss": 0.4727, "step": 10389 }, { "epoch": 0.81, "grad_norm": 1.0774274034228972, "learning_rate": 1.908944001589782e-06, "loss": 0.469, "step": 10390 }, { "epoch": 0.81, "grad_norm": 1.158155572182002, "learning_rate": 1.907467652495909e-06, "loss": 0.4746, "step": 10391 }, { "epoch": 0.81, "grad_norm": 1.191380384821055, "learning_rate": 1.9059918143253564e-06, "loss": 0.4869, "step": 10392 }, { "epoch": 0.81, "grad_norm": 1.2269686372754622, "learning_rate": 1.9045164871713007e-06, "loss": 0.4924, "step": 10393 }, { "epoch": 0.81, "grad_norm": 1.1213116583568241, "learning_rate": 1.903041671126884e-06, "loss": 0.4862, "step": 10394 }, { "epoch": 0.81, "grad_norm": 1.2229470099216209, "learning_rate": 1.9015673662852207e-06, "loss": 0.5175, "step": 10395 }, { "epoch": 0.81, "grad_norm": 1.3604078205905663, "learning_rate": 1.900093572739392e-06, "loss": 0.5074, "step": 10396 }, { "epoch": 0.81, "grad_norm": 1.1943786064504713, "learning_rate": 1.898620290582447e-06, "loss": 0.474, "step": 10397 }, { "epoch": 0.81, "grad_norm": 1.2919564332289357, "learning_rate": 1.897147519907403e-06, "loss": 0.5327, "step": 10398 }, { "epoch": 0.81, "grad_norm": 1.187736387315714, "learning_rate": 1.8956752608072382e-06, "loss": 0.5016, "step": 10399 }, { "epoch": 0.81, "grad_norm": 1.1687796339062262, "learning_rate": 1.894203513374907e-06, "loss": 0.5086, "step": 10400 }, { "epoch": 0.81, "grad_norm": 1.3468612316162474, "learning_rate": 1.8927322777033285e-06, "loss": 0.5067, "step": 10401 }, { "epoch": 0.81, "grad_norm": 1.2436114612654876, "learning_rate": 1.8912615538853919e-06, "loss": 0.5292, "step": 10402 }, { "epoch": 0.81, "grad_norm": 1.2027217139543736, "learning_rate": 1.8897913420139492e-06, "loss": 0.4489, "step": 10403 }, { "epoch": 0.81, "grad_norm": 1.1082299999357392, "learning_rate": 1.8883216421818196e-06, "loss": 0.4796, "step": 10404 }, { "epoch": 0.81, "grad_norm": 1.1507077257710712, "learning_rate": 1.8868524544817957e-06, "loss": 0.5064, "step": 10405 }, { "epoch": 0.81, "grad_norm": 1.1087962507989588, "learning_rate": 1.8853837790066343e-06, "loss": 0.511, "step": 10406 }, { "epoch": 0.81, "grad_norm": 1.2132909240140473, "learning_rate": 1.8839156158490612e-06, "loss": 0.5222, "step": 10407 }, { "epoch": 0.81, "grad_norm": 1.2021377277904344, "learning_rate": 1.8824479651017712e-06, "loss": 0.4608, "step": 10408 }, { "epoch": 0.81, "grad_norm": 1.183529137588327, "learning_rate": 1.8809808268574192e-06, "loss": 0.5101, "step": 10409 }, { "epoch": 0.81, "grad_norm": 1.1541069894412248, "learning_rate": 1.8795142012086364e-06, "loss": 0.4803, "step": 10410 }, { "epoch": 0.81, "grad_norm": 1.2045538208410849, "learning_rate": 1.8780480882480189e-06, "loss": 0.5278, "step": 10411 }, { "epoch": 0.81, "grad_norm": 1.3228246952236407, "learning_rate": 1.8765824880681317e-06, "loss": 0.5211, "step": 10412 }, { "epoch": 0.81, "grad_norm": 1.2195136783172147, "learning_rate": 1.8751174007615026e-06, "loss": 0.4967, "step": 10413 }, { "epoch": 0.81, "grad_norm": 1.2479047858554162, "learning_rate": 1.8736528264206289e-06, "loss": 0.5097, "step": 10414 }, { "epoch": 0.81, "grad_norm": 1.158638832002312, "learning_rate": 1.872188765137979e-06, "loss": 0.5247, "step": 10415 }, { "epoch": 0.81, "grad_norm": 1.10435893825904, "learning_rate": 1.8707252170059864e-06, "loss": 0.4589, "step": 10416 }, { "epoch": 0.81, "grad_norm": 1.214204184415717, "learning_rate": 1.869262182117052e-06, "loss": 0.4974, "step": 10417 }, { "epoch": 0.81, "grad_norm": 1.146734571044995, "learning_rate": 1.8677996605635473e-06, "loss": 0.4902, "step": 10418 }, { "epoch": 0.81, "grad_norm": 1.1523582845514786, "learning_rate": 1.866337652437805e-06, "loss": 0.5315, "step": 10419 }, { "epoch": 0.81, "grad_norm": 1.1235626893729556, "learning_rate": 1.8648761578321296e-06, "loss": 0.4829, "step": 10420 }, { "epoch": 0.81, "grad_norm": 1.1653543766647423, "learning_rate": 1.8634151768387954e-06, "loss": 0.4622, "step": 10421 }, { "epoch": 0.81, "grad_norm": 1.0817316515014181, "learning_rate": 1.8619547095500423e-06, "loss": 0.4459, "step": 10422 }, { "epoch": 0.81, "grad_norm": 1.241798531914272, "learning_rate": 1.8604947560580756e-06, "loss": 0.4962, "step": 10423 }, { "epoch": 0.81, "grad_norm": 1.1386425609970265, "learning_rate": 1.8590353164550656e-06, "loss": 0.5111, "step": 10424 }, { "epoch": 0.81, "grad_norm": 1.244623207387527, "learning_rate": 1.857576390833159e-06, "loss": 0.5167, "step": 10425 }, { "epoch": 0.81, "grad_norm": 1.172029001925191, "learning_rate": 1.8561179792844642e-06, "loss": 0.5243, "step": 10426 }, { "epoch": 0.81, "grad_norm": 1.2720301053185297, "learning_rate": 1.8546600819010575e-06, "loss": 0.5693, "step": 10427 }, { "epoch": 0.81, "grad_norm": 1.156249742250156, "learning_rate": 1.8532026987749874e-06, "loss": 0.4858, "step": 10428 }, { "epoch": 0.81, "grad_norm": 1.2187584607735416, "learning_rate": 1.8517458299982604e-06, "loss": 0.519, "step": 10429 }, { "epoch": 0.81, "grad_norm": 1.2533085429414896, "learning_rate": 1.8502894756628587e-06, "loss": 0.5415, "step": 10430 }, { "epoch": 0.81, "grad_norm": 1.1305972307716194, "learning_rate": 1.8488336358607296e-06, "loss": 0.453, "step": 10431 }, { "epoch": 0.81, "grad_norm": 1.2000558522260178, "learning_rate": 1.8473783106837896e-06, "loss": 0.4781, "step": 10432 }, { "epoch": 0.81, "grad_norm": 1.1443368271399246, "learning_rate": 1.8459235002239183e-06, "loss": 0.4921, "step": 10433 }, { "epoch": 0.81, "grad_norm": 1.2608846263364868, "learning_rate": 1.844469204572964e-06, "loss": 0.557, "step": 10434 }, { "epoch": 0.81, "grad_norm": 1.1178104824424413, "learning_rate": 1.843015423822746e-06, "loss": 0.4885, "step": 10435 }, { "epoch": 0.81, "grad_norm": 1.1748830209493364, "learning_rate": 1.841562158065049e-06, "loss": 0.5095, "step": 10436 }, { "epoch": 0.81, "grad_norm": 1.165367367987973, "learning_rate": 1.8401094073916237e-06, "loss": 0.4664, "step": 10437 }, { "epoch": 0.81, "grad_norm": 1.295025483326214, "learning_rate": 1.8386571718941947e-06, "loss": 0.5833, "step": 10438 }, { "epoch": 0.81, "grad_norm": 1.1498459464009567, "learning_rate": 1.8372054516644422e-06, "loss": 0.4455, "step": 10439 }, { "epoch": 0.81, "grad_norm": 1.1600520539936359, "learning_rate": 1.8357542467940249e-06, "loss": 0.4456, "step": 10440 }, { "epoch": 0.81, "grad_norm": 1.1858258241284776, "learning_rate": 1.8343035573745637e-06, "loss": 0.4609, "step": 10441 }, { "epoch": 0.81, "grad_norm": 1.2969841968570677, "learning_rate": 1.8328533834976503e-06, "loss": 0.538, "step": 10442 }, { "epoch": 0.81, "grad_norm": 1.247649653446576, "learning_rate": 1.83140372525484e-06, "loss": 0.5145, "step": 10443 }, { "epoch": 0.81, "grad_norm": 1.1970741903024715, "learning_rate": 1.8299545827376552e-06, "loss": 0.5031, "step": 10444 }, { "epoch": 0.81, "grad_norm": 1.1042433778038725, "learning_rate": 1.8285059560375883e-06, "loss": 0.5108, "step": 10445 }, { "epoch": 0.81, "grad_norm": 1.154651619607953, "learning_rate": 1.8270578452461007e-06, "loss": 0.4967, "step": 10446 }, { "epoch": 0.81, "grad_norm": 1.1493458691172969, "learning_rate": 1.825610250454618e-06, "loss": 0.5224, "step": 10447 }, { "epoch": 0.81, "grad_norm": 1.2318701620021442, "learning_rate": 1.824163171754536e-06, "loss": 0.4985, "step": 10448 }, { "epoch": 0.81, "grad_norm": 1.0756030232180962, "learning_rate": 1.8227166092372138e-06, "loss": 0.4431, "step": 10449 }, { "epoch": 0.81, "grad_norm": 1.1653508986481198, "learning_rate": 1.8212705629939798e-06, "loss": 0.5148, "step": 10450 }, { "epoch": 0.81, "grad_norm": 1.4137273992052395, "learning_rate": 1.8198250331161327e-06, "loss": 0.5678, "step": 10451 }, { "epoch": 0.81, "grad_norm": 1.2140237180978994, "learning_rate": 1.8183800196949375e-06, "loss": 0.5141, "step": 10452 }, { "epoch": 0.81, "grad_norm": 1.1900106792611889, "learning_rate": 1.8169355228216211e-06, "loss": 0.524, "step": 10453 }, { "epoch": 0.81, "grad_norm": 1.2498308544158938, "learning_rate": 1.8154915425873865e-06, "loss": 0.4813, "step": 10454 }, { "epoch": 0.81, "grad_norm": 1.2530844303708175, "learning_rate": 1.8140480790833958e-06, "loss": 0.5208, "step": 10455 }, { "epoch": 0.81, "grad_norm": 1.1372842856635434, "learning_rate": 1.8126051324007821e-06, "loss": 0.4889, "step": 10456 }, { "epoch": 0.81, "grad_norm": 1.1247584825455763, "learning_rate": 1.8111627026306488e-06, "loss": 0.4374, "step": 10457 }, { "epoch": 0.81, "grad_norm": 1.2168712807526494, "learning_rate": 1.8097207898640633e-06, "loss": 0.4463, "step": 10458 }, { "epoch": 0.81, "grad_norm": 1.215735693748371, "learning_rate": 1.808279394192063e-06, "loss": 0.5298, "step": 10459 }, { "epoch": 0.81, "grad_norm": 1.3411277646325948, "learning_rate": 1.8068385157056446e-06, "loss": 0.5594, "step": 10460 }, { "epoch": 0.81, "grad_norm": 1.203137880727011, "learning_rate": 1.8053981544957832e-06, "loss": 0.5018, "step": 10461 }, { "epoch": 0.81, "grad_norm": 1.3381786175693573, "learning_rate": 1.8039583106534164e-06, "loss": 0.5225, "step": 10462 }, { "epoch": 0.81, "grad_norm": 1.1489099062930568, "learning_rate": 1.8025189842694458e-06, "loss": 0.5054, "step": 10463 }, { "epoch": 0.81, "grad_norm": 1.280028575116321, "learning_rate": 1.8010801754347473e-06, "loss": 0.5046, "step": 10464 }, { "epoch": 0.81, "grad_norm": 1.1998320760058416, "learning_rate": 1.7996418842401552e-06, "loss": 0.5023, "step": 10465 }, { "epoch": 0.81, "grad_norm": 1.277502440881357, "learning_rate": 1.7982041107764803e-06, "loss": 0.5216, "step": 10466 }, { "epoch": 0.81, "grad_norm": 1.2627140052633197, "learning_rate": 1.796766855134494e-06, "loss": 0.5333, "step": 10467 }, { "epoch": 0.81, "grad_norm": 1.3363706990641024, "learning_rate": 1.795330117404941e-06, "loss": 0.4937, "step": 10468 }, { "epoch": 0.81, "grad_norm": 1.1386407811939736, "learning_rate": 1.7938938976785302e-06, "loss": 0.5339, "step": 10469 }, { "epoch": 0.81, "grad_norm": 1.1885433131258651, "learning_rate": 1.792458196045932e-06, "loss": 0.4912, "step": 10470 }, { "epoch": 0.81, "grad_norm": 1.1723792453554376, "learning_rate": 1.7910230125977945e-06, "loss": 0.483, "step": 10471 }, { "epoch": 0.81, "grad_norm": 1.3337188948031438, "learning_rate": 1.789588347424729e-06, "loss": 0.5345, "step": 10472 }, { "epoch": 0.81, "grad_norm": 1.1924109553234687, "learning_rate": 1.7881542006173091e-06, "loss": 0.5484, "step": 10473 }, { "epoch": 0.81, "grad_norm": 1.2019992862896063, "learning_rate": 1.786720572266084e-06, "loss": 0.4812, "step": 10474 }, { "epoch": 0.81, "grad_norm": 1.1503896923741443, "learning_rate": 1.7852874624615624e-06, "loss": 0.5159, "step": 10475 }, { "epoch": 0.81, "grad_norm": 1.1572777072855462, "learning_rate": 1.783854871294225e-06, "loss": 0.4836, "step": 10476 }, { "epoch": 0.81, "grad_norm": 1.1532732302287767, "learning_rate": 1.7824227988545194e-06, "loss": 0.509, "step": 10477 }, { "epoch": 0.81, "grad_norm": 1.3525361980311499, "learning_rate": 1.7809912452328592e-06, "loss": 0.5405, "step": 10478 }, { "epoch": 0.81, "grad_norm": 1.25077342901221, "learning_rate": 1.7795602105196297e-06, "loss": 0.543, "step": 10479 }, { "epoch": 0.81, "grad_norm": 1.1658554094872522, "learning_rate": 1.778129694805173e-06, "loss": 0.4991, "step": 10480 }, { "epoch": 0.81, "grad_norm": 1.216405128865592, "learning_rate": 1.776699698179808e-06, "loss": 0.4853, "step": 10481 }, { "epoch": 0.81, "grad_norm": 1.162459735788342, "learning_rate": 1.7752702207338202e-06, "loss": 0.5001, "step": 10482 }, { "epoch": 0.81, "grad_norm": 1.1763861231425468, "learning_rate": 1.7738412625574542e-06, "loss": 0.4947, "step": 10483 }, { "epoch": 0.81, "grad_norm": 1.3161269437790237, "learning_rate": 1.7724128237409344e-06, "loss": 0.5357, "step": 10484 }, { "epoch": 0.81, "grad_norm": 1.297538541614623, "learning_rate": 1.7709849043744387e-06, "loss": 0.5203, "step": 10485 }, { "epoch": 0.81, "grad_norm": 1.199755321830302, "learning_rate": 1.7695575045481218e-06, "loss": 0.4795, "step": 10486 }, { "epoch": 0.81, "grad_norm": 1.1301971763142185, "learning_rate": 1.7681306243521035e-06, "loss": 0.4911, "step": 10487 }, { "epoch": 0.81, "grad_norm": 1.0538742320723389, "learning_rate": 1.7667042638764697e-06, "loss": 0.4589, "step": 10488 }, { "epoch": 0.81, "grad_norm": 1.0407850858339627, "learning_rate": 1.7652784232112763e-06, "loss": 0.4434, "step": 10489 }, { "epoch": 0.81, "grad_norm": 1.1943022005021457, "learning_rate": 1.76385310244654e-06, "loss": 0.5133, "step": 10490 }, { "epoch": 0.81, "grad_norm": 1.3291034908687103, "learning_rate": 1.762428301672251e-06, "loss": 0.5734, "step": 10491 }, { "epoch": 0.81, "grad_norm": 1.3266406122761054, "learning_rate": 1.761004020978363e-06, "loss": 0.6024, "step": 10492 }, { "epoch": 0.81, "grad_norm": 1.1645543640209564, "learning_rate": 1.7595802604547974e-06, "loss": 0.496, "step": 10493 }, { "epoch": 0.81, "grad_norm": 1.2184970299735831, "learning_rate": 1.7581570201914478e-06, "loss": 0.5359, "step": 10494 }, { "epoch": 0.81, "grad_norm": 1.1513206983762794, "learning_rate": 1.7567343002781656e-06, "loss": 0.5129, "step": 10495 }, { "epoch": 0.81, "grad_norm": 1.330582264097016, "learning_rate": 1.7553121008047768e-06, "loss": 0.5321, "step": 10496 }, { "epoch": 0.81, "grad_norm": 1.209381827007413, "learning_rate": 1.753890421861072e-06, "loss": 0.5503, "step": 10497 }, { "epoch": 0.81, "grad_norm": 1.2934417781312084, "learning_rate": 1.752469263536809e-06, "loss": 0.5061, "step": 10498 }, { "epoch": 0.81, "grad_norm": 1.2345777900323305, "learning_rate": 1.7510486259217151e-06, "loss": 0.5093, "step": 10499 }, { "epoch": 0.81, "grad_norm": 1.2739734574941015, "learning_rate": 1.7496285091054788e-06, "loss": 0.5533, "step": 10500 }, { "epoch": 0.81, "grad_norm": 1.1736133144039462, "learning_rate": 1.7482089131777635e-06, "loss": 0.4895, "step": 10501 }, { "epoch": 0.81, "grad_norm": 1.244689484981838, "learning_rate": 1.74678983822819e-06, "loss": 0.4912, "step": 10502 }, { "epoch": 0.81, "grad_norm": 1.3912718479329513, "learning_rate": 1.7453712843463554e-06, "loss": 0.5747, "step": 10503 }, { "epoch": 0.81, "grad_norm": 1.2169156085479538, "learning_rate": 1.7439532516218226e-06, "loss": 0.5029, "step": 10504 }, { "epoch": 0.81, "grad_norm": 1.1270013068359976, "learning_rate": 1.7425357401441134e-06, "loss": 0.4715, "step": 10505 }, { "epoch": 0.82, "grad_norm": 1.253966570677824, "learning_rate": 1.7411187500027272e-06, "loss": 0.5125, "step": 10506 }, { "epoch": 0.82, "grad_norm": 1.2157143175119676, "learning_rate": 1.7397022812871234e-06, "loss": 0.5049, "step": 10507 }, { "epoch": 0.82, "grad_norm": 1.2118043811480281, "learning_rate": 1.7382863340867316e-06, "loss": 0.5172, "step": 10508 }, { "epoch": 0.82, "grad_norm": 1.3806981878883626, "learning_rate": 1.7368709084909496e-06, "loss": 0.5133, "step": 10509 }, { "epoch": 0.82, "grad_norm": 1.2219046333017223, "learning_rate": 1.735456004589141e-06, "loss": 0.4581, "step": 10510 }, { "epoch": 0.82, "grad_norm": 1.217999594091714, "learning_rate": 1.7340416224706346e-06, "loss": 0.4843, "step": 10511 }, { "epoch": 0.82, "grad_norm": 1.162585044129144, "learning_rate": 1.7326277622247245e-06, "loss": 0.5077, "step": 10512 }, { "epoch": 0.82, "grad_norm": 1.092069042587265, "learning_rate": 1.7312144239406781e-06, "loss": 0.4631, "step": 10513 }, { "epoch": 0.82, "grad_norm": 1.3889924228013881, "learning_rate": 1.7298016077077273e-06, "loss": 0.5579, "step": 10514 }, { "epoch": 0.82, "grad_norm": 1.1529110821220014, "learning_rate": 1.728389313615071e-06, "loss": 0.4757, "step": 10515 }, { "epoch": 0.82, "grad_norm": 1.31379771791104, "learning_rate": 1.7269775417518708e-06, "loss": 0.5394, "step": 10516 }, { "epoch": 0.82, "grad_norm": 1.2688102670270993, "learning_rate": 1.7255662922072613e-06, "loss": 0.505, "step": 10517 }, { "epoch": 0.82, "grad_norm": 1.2353242531875206, "learning_rate": 1.7241555650703433e-06, "loss": 0.5218, "step": 10518 }, { "epoch": 0.82, "grad_norm": 1.267278321347615, "learning_rate": 1.7227453604301814e-06, "loss": 0.4969, "step": 10519 }, { "epoch": 0.82, "grad_norm": 1.1978121781043962, "learning_rate": 1.7213356783758128e-06, "loss": 0.4724, "step": 10520 }, { "epoch": 0.82, "grad_norm": 1.210791006763986, "learning_rate": 1.7199265189962345e-06, "loss": 0.5214, "step": 10521 }, { "epoch": 0.82, "grad_norm": 1.239744314858782, "learning_rate": 1.7185178823804127e-06, "loss": 0.553, "step": 10522 }, { "epoch": 0.82, "grad_norm": 1.2265063716413458, "learning_rate": 1.7171097686172832e-06, "loss": 0.5414, "step": 10523 }, { "epoch": 0.82, "grad_norm": 1.1923382225103458, "learning_rate": 1.7157021777957494e-06, "loss": 0.4642, "step": 10524 }, { "epoch": 0.82, "grad_norm": 1.131043308828589, "learning_rate": 1.7142951100046802e-06, "loss": 0.4543, "step": 10525 }, { "epoch": 0.82, "grad_norm": 1.1441874329436754, "learning_rate": 1.7128885653329065e-06, "loss": 0.4569, "step": 10526 }, { "epoch": 0.82, "grad_norm": 1.1432271043531488, "learning_rate": 1.711482543869234e-06, "loss": 0.5425, "step": 10527 }, { "epoch": 0.82, "grad_norm": 1.0816328506493416, "learning_rate": 1.7100770457024307e-06, "loss": 0.5059, "step": 10528 }, { "epoch": 0.82, "grad_norm": 1.4127508582100923, "learning_rate": 1.7086720709212357e-06, "loss": 0.5717, "step": 10529 }, { "epoch": 0.82, "grad_norm": 1.156142100892866, "learning_rate": 1.7072676196143512e-06, "loss": 0.5183, "step": 10530 }, { "epoch": 0.82, "grad_norm": 1.1123369997604466, "learning_rate": 1.7058636918704474e-06, "loss": 0.4447, "step": 10531 }, { "epoch": 0.82, "grad_norm": 1.188770768699168, "learning_rate": 1.704460287778159e-06, "loss": 0.4777, "step": 10532 }, { "epoch": 0.82, "grad_norm": 1.3072897263876002, "learning_rate": 1.7030574074260908e-06, "loss": 0.5636, "step": 10533 }, { "epoch": 0.82, "grad_norm": 1.143892650359272, "learning_rate": 1.7016550509028162e-06, "loss": 0.4896, "step": 10534 }, { "epoch": 0.82, "grad_norm": 1.130228977044135, "learning_rate": 1.7002532182968734e-06, "loss": 0.4782, "step": 10535 }, { "epoch": 0.82, "grad_norm": 1.1603531590084593, "learning_rate": 1.6988519096967647e-06, "loss": 0.5064, "step": 10536 }, { "epoch": 0.82, "grad_norm": 1.1335112948567967, "learning_rate": 1.697451125190962e-06, "loss": 0.508, "step": 10537 }, { "epoch": 0.82, "grad_norm": 1.2376035646971673, "learning_rate": 1.6960508648679064e-06, "loss": 0.5099, "step": 10538 }, { "epoch": 0.82, "grad_norm": 1.2352067101031956, "learning_rate": 1.6946511288160017e-06, "loss": 0.5089, "step": 10539 }, { "epoch": 0.82, "grad_norm": 1.1461286626593559, "learning_rate": 1.693251917123624e-06, "loss": 0.4921, "step": 10540 }, { "epoch": 0.82, "grad_norm": 1.3278851460806542, "learning_rate": 1.6918532298791102e-06, "loss": 0.5413, "step": 10541 }, { "epoch": 0.82, "grad_norm": 1.2688491162450473, "learning_rate": 1.6904550671707632e-06, "loss": 0.4886, "step": 10542 }, { "epoch": 0.82, "grad_norm": 1.3579935969312724, "learning_rate": 1.68905742908686e-06, "loss": 0.5718, "step": 10543 }, { "epoch": 0.82, "grad_norm": 1.2583445495880312, "learning_rate": 1.6876603157156402e-06, "loss": 0.5128, "step": 10544 }, { "epoch": 0.82, "grad_norm": 1.1596382603102533, "learning_rate": 1.686263727145313e-06, "loss": 0.4761, "step": 10545 }, { "epoch": 0.82, "grad_norm": 1.4515619382662144, "learning_rate": 1.684867663464047e-06, "loss": 0.4988, "step": 10546 }, { "epoch": 0.82, "grad_norm": 1.1546457863836639, "learning_rate": 1.6834721247599871e-06, "loss": 0.5124, "step": 10547 }, { "epoch": 0.82, "grad_norm": 1.371756542887517, "learning_rate": 1.6820771111212386e-06, "loss": 0.5701, "step": 10548 }, { "epoch": 0.82, "grad_norm": 1.2650184590798446, "learning_rate": 1.6806826226358775e-06, "loss": 0.4859, "step": 10549 }, { "epoch": 0.82, "grad_norm": 1.297595134400607, "learning_rate": 1.6792886593919454e-06, "loss": 0.5629, "step": 10550 }, { "epoch": 0.82, "grad_norm": 1.1903674490401843, "learning_rate": 1.6778952214774513e-06, "loss": 0.4868, "step": 10551 }, { "epoch": 0.82, "grad_norm": 1.2320864261712654, "learning_rate": 1.6765023089803645e-06, "loss": 0.4971, "step": 10552 }, { "epoch": 0.82, "grad_norm": 1.2018654410408118, "learning_rate": 1.6751099219886314e-06, "loss": 0.4749, "step": 10553 }, { "epoch": 0.82, "grad_norm": 1.1420793536920715, "learning_rate": 1.6737180605901592e-06, "loss": 0.4877, "step": 10554 }, { "epoch": 0.82, "grad_norm": 1.2102358354142642, "learning_rate": 1.672326724872827e-06, "loss": 0.4966, "step": 10555 }, { "epoch": 0.82, "grad_norm": 1.1361701401783366, "learning_rate": 1.6709359149244708e-06, "loss": 0.4591, "step": 10556 }, { "epoch": 0.82, "grad_norm": 1.195401406565811, "learning_rate": 1.6695456308329027e-06, "loss": 0.4591, "step": 10557 }, { "epoch": 0.82, "grad_norm": 1.2573925288634176, "learning_rate": 1.6681558726858983e-06, "loss": 0.5205, "step": 10558 }, { "epoch": 0.82, "grad_norm": 1.2768040721461484, "learning_rate": 1.6667666405712002e-06, "loss": 0.4884, "step": 10559 }, { "epoch": 0.82, "grad_norm": 1.193325860135069, "learning_rate": 1.6653779345765209e-06, "loss": 0.5381, "step": 10560 }, { "epoch": 0.82, "grad_norm": 1.0951323356961833, "learning_rate": 1.6639897547895334e-06, "loss": 0.4846, "step": 10561 }, { "epoch": 0.82, "grad_norm": 1.2313968125547885, "learning_rate": 1.662602101297879e-06, "loss": 0.4823, "step": 10562 }, { "epoch": 0.82, "grad_norm": 1.1681266856500292, "learning_rate": 1.661214974189168e-06, "loss": 0.4634, "step": 10563 }, { "epoch": 0.82, "grad_norm": 1.2898688366470343, "learning_rate": 1.6598283735509791e-06, "loss": 0.5213, "step": 10564 }, { "epoch": 0.82, "grad_norm": 1.2660283988159908, "learning_rate": 1.6584422994708539e-06, "loss": 0.5922, "step": 10565 }, { "epoch": 0.82, "grad_norm": 1.2480377053653755, "learning_rate": 1.6570567520363058e-06, "loss": 0.5292, "step": 10566 }, { "epoch": 0.82, "grad_norm": 1.1522369335258873, "learning_rate": 1.6556717313348058e-06, "loss": 0.4968, "step": 10567 }, { "epoch": 0.82, "grad_norm": 1.2200183626262613, "learning_rate": 1.654287237453801e-06, "loss": 0.5661, "step": 10568 }, { "epoch": 0.82, "grad_norm": 1.1331459343938992, "learning_rate": 1.6529032704807012e-06, "loss": 0.4938, "step": 10569 }, { "epoch": 0.82, "grad_norm": 1.2208820669626403, "learning_rate": 1.6515198305028868e-06, "loss": 0.5376, "step": 10570 }, { "epoch": 0.82, "grad_norm": 1.313192139642635, "learning_rate": 1.6501369176076964e-06, "loss": 0.5588, "step": 10571 }, { "epoch": 0.82, "grad_norm": 1.126480452979602, "learning_rate": 1.6487545318824405e-06, "loss": 0.4844, "step": 10572 }, { "epoch": 0.82, "grad_norm": 1.0932797102067502, "learning_rate": 1.6473726734143969e-06, "loss": 0.4108, "step": 10573 }, { "epoch": 0.82, "grad_norm": 1.1992780937894623, "learning_rate": 1.6459913422908113e-06, "loss": 0.5026, "step": 10574 }, { "epoch": 0.82, "grad_norm": 1.1143017826214296, "learning_rate": 1.6446105385988932e-06, "loss": 0.479, "step": 10575 }, { "epoch": 0.82, "grad_norm": 1.0995762550691859, "learning_rate": 1.6432302624258211e-06, "loss": 0.4681, "step": 10576 }, { "epoch": 0.82, "grad_norm": 1.1709815115963567, "learning_rate": 1.641850513858737e-06, "loss": 0.4255, "step": 10577 }, { "epoch": 0.82, "grad_norm": 1.1894593387435193, "learning_rate": 1.640471292984751e-06, "loss": 0.5529, "step": 10578 }, { "epoch": 0.82, "grad_norm": 1.1155630185302747, "learning_rate": 1.6390925998909423e-06, "loss": 0.4771, "step": 10579 }, { "epoch": 0.82, "grad_norm": 1.2387560586944755, "learning_rate": 1.637714434664357e-06, "loss": 0.5498, "step": 10580 }, { "epoch": 0.82, "grad_norm": 1.172648314589578, "learning_rate": 1.6363367973920031e-06, "loss": 0.4916, "step": 10581 }, { "epoch": 0.82, "grad_norm": 1.1733047918258428, "learning_rate": 1.6349596881608555e-06, "loss": 0.5129, "step": 10582 }, { "epoch": 0.82, "grad_norm": 1.0808998555280984, "learning_rate": 1.6335831070578612e-06, "loss": 0.4442, "step": 10583 }, { "epoch": 0.82, "grad_norm": 1.420055189000459, "learning_rate": 1.6322070541699298e-06, "loss": 0.5632, "step": 10584 }, { "epoch": 0.82, "grad_norm": 1.4057960625309385, "learning_rate": 1.6308315295839395e-06, "loss": 0.5387, "step": 10585 }, { "epoch": 0.82, "grad_norm": 1.0657794493674142, "learning_rate": 1.6294565333867362e-06, "loss": 0.4544, "step": 10586 }, { "epoch": 0.82, "grad_norm": 1.0917078299199416, "learning_rate": 1.6280820656651252e-06, "loss": 0.4467, "step": 10587 }, { "epoch": 0.82, "grad_norm": 1.1970082639601753, "learning_rate": 1.6267081265058882e-06, "loss": 0.5093, "step": 10588 }, { "epoch": 0.82, "grad_norm": 1.0844817249205754, "learning_rate": 1.6253347159957666e-06, "loss": 0.4324, "step": 10589 }, { "epoch": 0.82, "grad_norm": 1.3469163273181914, "learning_rate": 1.6239618342214746e-06, "loss": 0.4984, "step": 10590 }, { "epoch": 0.82, "grad_norm": 1.1597337563576289, "learning_rate": 1.6225894812696875e-06, "loss": 0.5273, "step": 10591 }, { "epoch": 0.82, "grad_norm": 1.1174359345473404, "learning_rate": 1.6212176572270445e-06, "loss": 0.5066, "step": 10592 }, { "epoch": 0.82, "grad_norm": 1.2679483728187548, "learning_rate": 1.6198463621801607e-06, "loss": 0.5363, "step": 10593 }, { "epoch": 0.82, "grad_norm": 1.1787600376676683, "learning_rate": 1.6184755962156118e-06, "loss": 0.4586, "step": 10594 }, { "epoch": 0.82, "grad_norm": 1.1610539107147957, "learning_rate": 1.6171053594199403e-06, "loss": 0.4825, "step": 10595 }, { "epoch": 0.82, "grad_norm": 1.1591404536227137, "learning_rate": 1.6157356518796608e-06, "loss": 0.4964, "step": 10596 }, { "epoch": 0.82, "grad_norm": 1.2739170786222964, "learning_rate": 1.6143664736812449e-06, "loss": 0.5388, "step": 10597 }, { "epoch": 0.82, "grad_norm": 1.1487295596822786, "learning_rate": 1.6129978249111366e-06, "loss": 0.5308, "step": 10598 }, { "epoch": 0.82, "grad_norm": 1.053988585426585, "learning_rate": 1.6116297056557507e-06, "loss": 0.4454, "step": 10599 }, { "epoch": 0.82, "grad_norm": 1.140154493303633, "learning_rate": 1.6102621160014563e-06, "loss": 0.4721, "step": 10600 }, { "epoch": 0.82, "grad_norm": 1.0979149130144294, "learning_rate": 1.6088950560346017e-06, "loss": 0.5351, "step": 10601 }, { "epoch": 0.82, "grad_norm": 1.3602556029114805, "learning_rate": 1.607528525841493e-06, "loss": 0.5462, "step": 10602 }, { "epoch": 0.82, "grad_norm": 1.2487420905304725, "learning_rate": 1.6061625255084079e-06, "loss": 0.4939, "step": 10603 }, { "epoch": 0.82, "grad_norm": 1.264048925451849, "learning_rate": 1.6047970551215898e-06, "loss": 0.5182, "step": 10604 }, { "epoch": 0.82, "grad_norm": 1.3149038735128054, "learning_rate": 1.603432114767246e-06, "loss": 0.5147, "step": 10605 }, { "epoch": 0.82, "grad_norm": 1.1976032123405373, "learning_rate": 1.6020677045315558e-06, "loss": 0.4532, "step": 10606 }, { "epoch": 0.82, "grad_norm": 1.2281628998399992, "learning_rate": 1.600703824500658e-06, "loss": 0.5406, "step": 10607 }, { "epoch": 0.82, "grad_norm": 1.2557731824280873, "learning_rate": 1.5993404747606612e-06, "loss": 0.5092, "step": 10608 }, { "epoch": 0.82, "grad_norm": 1.2202247597855143, "learning_rate": 1.5979776553976444e-06, "loss": 0.5009, "step": 10609 }, { "epoch": 0.82, "grad_norm": 1.260890109886381, "learning_rate": 1.5966153664976447e-06, "loss": 0.5499, "step": 10610 }, { "epoch": 0.82, "grad_norm": 1.1915798373477975, "learning_rate": 1.5952536081466752e-06, "loss": 0.4717, "step": 10611 }, { "epoch": 0.82, "grad_norm": 1.2130895872219338, "learning_rate": 1.593892380430706e-06, "loss": 0.4822, "step": 10612 }, { "epoch": 0.82, "grad_norm": 1.1936408427396823, "learning_rate": 1.5925316834356797e-06, "loss": 0.4967, "step": 10613 }, { "epoch": 0.82, "grad_norm": 1.113920918118061, "learning_rate": 1.5911715172475062e-06, "loss": 0.4447, "step": 10614 }, { "epoch": 0.82, "grad_norm": 1.354397397069576, "learning_rate": 1.589811881952058e-06, "loss": 0.5498, "step": 10615 }, { "epoch": 0.82, "grad_norm": 1.1929171128205, "learning_rate": 1.5884527776351765e-06, "loss": 0.4722, "step": 10616 }, { "epoch": 0.82, "grad_norm": 1.1839754603297938, "learning_rate": 1.587094204382672e-06, "loss": 0.5038, "step": 10617 }, { "epoch": 0.82, "grad_norm": 1.1277828236536438, "learning_rate": 1.5857361622803124e-06, "loss": 0.4634, "step": 10618 }, { "epoch": 0.82, "grad_norm": 1.2086732430136258, "learning_rate": 1.5843786514138438e-06, "loss": 0.5452, "step": 10619 }, { "epoch": 0.82, "grad_norm": 1.243875041024383, "learning_rate": 1.5830216718689674e-06, "loss": 0.4856, "step": 10620 }, { "epoch": 0.82, "grad_norm": 1.1107088586063996, "learning_rate": 1.5816652237313579e-06, "loss": 0.4601, "step": 10621 }, { "epoch": 0.82, "grad_norm": 1.141746753241139, "learning_rate": 1.5803093070866582e-06, "loss": 0.4985, "step": 10622 }, { "epoch": 0.82, "grad_norm": 1.138993494274794, "learning_rate": 1.5789539220204698e-06, "loss": 0.4817, "step": 10623 }, { "epoch": 0.82, "grad_norm": 1.2651400637830874, "learning_rate": 1.5775990686183672e-06, "loss": 0.5021, "step": 10624 }, { "epoch": 0.82, "grad_norm": 1.303035626902938, "learning_rate": 1.5762447469658892e-06, "loss": 0.5787, "step": 10625 }, { "epoch": 0.82, "grad_norm": 1.141180399666159, "learning_rate": 1.5748909571485415e-06, "loss": 0.4397, "step": 10626 }, { "epoch": 0.82, "grad_norm": 1.2237553637877017, "learning_rate": 1.573537699251797e-06, "loss": 0.4826, "step": 10627 }, { "epoch": 0.82, "grad_norm": 1.1775421947362295, "learning_rate": 1.5721849733610905e-06, "loss": 0.5236, "step": 10628 }, { "epoch": 0.82, "grad_norm": 1.0834415210597546, "learning_rate": 1.5708327795618317e-06, "loss": 0.4447, "step": 10629 }, { "epoch": 0.82, "grad_norm": 1.165043001895713, "learning_rate": 1.5694811179393853e-06, "loss": 0.5152, "step": 10630 }, { "epoch": 0.82, "grad_norm": 1.2859387551877466, "learning_rate": 1.5681299885790912e-06, "loss": 0.4722, "step": 10631 }, { "epoch": 0.82, "grad_norm": 1.2709186191811892, "learning_rate": 1.5667793915662566e-06, "loss": 0.5463, "step": 10632 }, { "epoch": 0.82, "grad_norm": 1.161564803553512, "learning_rate": 1.5654293269861464e-06, "loss": 0.4952, "step": 10633 }, { "epoch": 0.82, "grad_norm": 1.295246480586887, "learning_rate": 1.5640797949239983e-06, "loss": 0.5319, "step": 10634 }, { "epoch": 0.83, "grad_norm": 1.2390392883358687, "learning_rate": 1.5627307954650174e-06, "loss": 0.4864, "step": 10635 }, { "epoch": 0.83, "grad_norm": 1.1896794045976744, "learning_rate": 1.5613823286943718e-06, "loss": 0.5396, "step": 10636 }, { "epoch": 0.83, "grad_norm": 1.2407145374187092, "learning_rate": 1.5600343946971997e-06, "loss": 0.542, "step": 10637 }, { "epoch": 0.83, "grad_norm": 1.1586477831679753, "learning_rate": 1.558686993558599e-06, "loss": 0.493, "step": 10638 }, { "epoch": 0.83, "grad_norm": 1.1317715828137198, "learning_rate": 1.557340125363641e-06, "loss": 0.4916, "step": 10639 }, { "epoch": 0.83, "grad_norm": 1.250607247672225, "learning_rate": 1.5559937901973575e-06, "loss": 0.5091, "step": 10640 }, { "epoch": 0.83, "grad_norm": 1.2454636275081175, "learning_rate": 1.554647988144752e-06, "loss": 0.4796, "step": 10641 }, { "epoch": 0.83, "grad_norm": 1.1786074715424588, "learning_rate": 1.5533027192907924e-06, "loss": 0.5, "step": 10642 }, { "epoch": 0.83, "grad_norm": 1.3496931963670329, "learning_rate": 1.5519579837204103e-06, "loss": 0.515, "step": 10643 }, { "epoch": 0.83, "grad_norm": 1.155420314967855, "learning_rate": 1.5506137815185063e-06, "loss": 0.4891, "step": 10644 }, { "epoch": 0.83, "grad_norm": 1.1867319434049721, "learning_rate": 1.5492701127699472e-06, "loss": 0.4803, "step": 10645 }, { "epoch": 0.83, "grad_norm": 1.0859875564564818, "learning_rate": 1.5479269775595652e-06, "loss": 0.4627, "step": 10646 }, { "epoch": 0.83, "grad_norm": 1.3417575169639346, "learning_rate": 1.5465843759721633e-06, "loss": 0.4812, "step": 10647 }, { "epoch": 0.83, "grad_norm": 1.0905783307165362, "learning_rate": 1.5452423080925017e-06, "loss": 0.4835, "step": 10648 }, { "epoch": 0.83, "grad_norm": 1.1912132732257752, "learning_rate": 1.5439007740053158e-06, "loss": 0.5001, "step": 10649 }, { "epoch": 0.83, "grad_norm": 1.1660853481692872, "learning_rate": 1.542559773795299e-06, "loss": 0.5076, "step": 10650 }, { "epoch": 0.83, "grad_norm": 1.2455843658633547, "learning_rate": 1.5412193075471193e-06, "loss": 0.5132, "step": 10651 }, { "epoch": 0.83, "grad_norm": 1.2122231826191785, "learning_rate": 1.5398793753454079e-06, "loss": 0.5049, "step": 10652 }, { "epoch": 0.83, "grad_norm": 1.2730598591847297, "learning_rate": 1.5385399772747578e-06, "loss": 0.5382, "step": 10653 }, { "epoch": 0.83, "grad_norm": 1.2040212191394304, "learning_rate": 1.537201113419735e-06, "loss": 0.5187, "step": 10654 }, { "epoch": 0.83, "grad_norm": 1.2002762595706624, "learning_rate": 1.5358627838648676e-06, "loss": 0.5191, "step": 10655 }, { "epoch": 0.83, "grad_norm": 1.1650171141943202, "learning_rate": 1.5345249886946512e-06, "loss": 0.5007, "step": 10656 }, { "epoch": 0.83, "grad_norm": 1.1814721862289355, "learning_rate": 1.5331877279935515e-06, "loss": 0.5262, "step": 10657 }, { "epoch": 0.83, "grad_norm": 1.287374744063012, "learning_rate": 1.5318510018459908e-06, "loss": 0.5571, "step": 10658 }, { "epoch": 0.83, "grad_norm": 1.0429881804867251, "learning_rate": 1.5305148103363698e-06, "loss": 0.4691, "step": 10659 }, { "epoch": 0.83, "grad_norm": 1.2026368178732705, "learning_rate": 1.5291791535490419e-06, "loss": 0.5112, "step": 10660 }, { "epoch": 0.83, "grad_norm": 1.1618844463750302, "learning_rate": 1.5278440315683385e-06, "loss": 0.4974, "step": 10661 }, { "epoch": 0.83, "grad_norm": 1.279191247204793, "learning_rate": 1.5265094444785544e-06, "loss": 0.5216, "step": 10662 }, { "epoch": 0.83, "grad_norm": 1.2477094167179268, "learning_rate": 1.5251753923639435e-06, "loss": 0.535, "step": 10663 }, { "epoch": 0.83, "grad_norm": 1.1094570398217969, "learning_rate": 1.5238418753087347e-06, "loss": 0.4709, "step": 10664 }, { "epoch": 0.83, "grad_norm": 1.150298602272318, "learning_rate": 1.52250889339712e-06, "loss": 0.5148, "step": 10665 }, { "epoch": 0.83, "grad_norm": 1.2391737841051351, "learning_rate": 1.5211764467132562e-06, "loss": 0.5111, "step": 10666 }, { "epoch": 0.83, "grad_norm": 1.2618902698063414, "learning_rate": 1.5198445353412705e-06, "loss": 0.5339, "step": 10667 }, { "epoch": 0.83, "grad_norm": 1.2754243668069645, "learning_rate": 1.5185131593652492e-06, "loss": 0.5474, "step": 10668 }, { "epoch": 0.83, "grad_norm": 1.1565161733720979, "learning_rate": 1.5171823188692537e-06, "loss": 0.4839, "step": 10669 }, { "epoch": 0.83, "grad_norm": 1.2418106755116487, "learning_rate": 1.515852013937301e-06, "loss": 0.51, "step": 10670 }, { "epoch": 0.83, "grad_norm": 1.1223652504326402, "learning_rate": 1.5145222446533835e-06, "loss": 0.5, "step": 10671 }, { "epoch": 0.83, "grad_norm": 1.2221621163838106, "learning_rate": 1.5131930111014558e-06, "loss": 0.4724, "step": 10672 }, { "epoch": 0.83, "grad_norm": 1.2141082107009922, "learning_rate": 1.5118643133654421e-06, "loss": 0.544, "step": 10673 }, { "epoch": 0.83, "grad_norm": 1.2580655714118167, "learning_rate": 1.5105361515292248e-06, "loss": 0.4804, "step": 10674 }, { "epoch": 0.83, "grad_norm": 1.2292671270319728, "learning_rate": 1.5092085256766597e-06, "loss": 0.5168, "step": 10675 }, { "epoch": 0.83, "grad_norm": 1.0648835877924034, "learning_rate": 1.5078814358915673e-06, "loss": 0.4423, "step": 10676 }, { "epoch": 0.83, "grad_norm": 1.1668641343348982, "learning_rate": 1.5065548822577336e-06, "loss": 0.4548, "step": 10677 }, { "epoch": 0.83, "grad_norm": 1.1516953535320928, "learning_rate": 1.5052288648589131e-06, "loss": 0.4911, "step": 10678 }, { "epoch": 0.83, "grad_norm": 1.219413625554132, "learning_rate": 1.503903383778822e-06, "loss": 0.5264, "step": 10679 }, { "epoch": 0.83, "grad_norm": 1.2380641903451444, "learning_rate": 1.502578439101141e-06, "loss": 0.5702, "step": 10680 }, { "epoch": 0.83, "grad_norm": 1.1643114271843449, "learning_rate": 1.5012540309095247e-06, "loss": 0.4866, "step": 10681 }, { "epoch": 0.83, "grad_norm": 1.199663160653029, "learning_rate": 1.4999301592875891e-06, "loss": 0.529, "step": 10682 }, { "epoch": 0.83, "grad_norm": 1.1609746441861055, "learning_rate": 1.4986068243189188e-06, "loss": 0.4953, "step": 10683 }, { "epoch": 0.83, "grad_norm": 1.2624824031698814, "learning_rate": 1.4972840260870603e-06, "loss": 0.5146, "step": 10684 }, { "epoch": 0.83, "grad_norm": 1.2543424519040083, "learning_rate": 1.4959617646755276e-06, "loss": 0.5037, "step": 10685 }, { "epoch": 0.83, "grad_norm": 1.2434565940861557, "learning_rate": 1.494640040167805e-06, "loss": 0.5153, "step": 10686 }, { "epoch": 0.83, "grad_norm": 1.2467298647897496, "learning_rate": 1.4933188526473385e-06, "loss": 0.4857, "step": 10687 }, { "epoch": 0.83, "grad_norm": 1.161222334122599, "learning_rate": 1.4919982021975432e-06, "loss": 0.4957, "step": 10688 }, { "epoch": 0.83, "grad_norm": 1.101285588515358, "learning_rate": 1.490678088901798e-06, "loss": 0.467, "step": 10689 }, { "epoch": 0.83, "grad_norm": 1.1081466859437221, "learning_rate": 1.489358512843444e-06, "loss": 0.472, "step": 10690 }, { "epoch": 0.83, "grad_norm": 1.142412900797478, "learning_rate": 1.4880394741057968e-06, "loss": 0.4944, "step": 10691 }, { "epoch": 0.83, "grad_norm": 1.178900854301871, "learning_rate": 1.4867209727721332e-06, "loss": 0.4587, "step": 10692 }, { "epoch": 0.83, "grad_norm": 1.22633194426983, "learning_rate": 1.4854030089257e-06, "loss": 0.5165, "step": 10693 }, { "epoch": 0.83, "grad_norm": 1.154534123497548, "learning_rate": 1.4840855826497013e-06, "loss": 0.5071, "step": 10694 }, { "epoch": 0.83, "grad_norm": 1.2420594731218504, "learning_rate": 1.4827686940273178e-06, "loss": 0.4821, "step": 10695 }, { "epoch": 0.83, "grad_norm": 1.2937649583182145, "learning_rate": 1.4814523431416882e-06, "loss": 0.5263, "step": 10696 }, { "epoch": 0.83, "grad_norm": 1.2260895807480467, "learning_rate": 1.4801365300759253e-06, "loss": 0.4687, "step": 10697 }, { "epoch": 0.83, "grad_norm": 1.3067974914450806, "learning_rate": 1.4788212549130964e-06, "loss": 0.5591, "step": 10698 }, { "epoch": 0.83, "grad_norm": 1.3168350562730655, "learning_rate": 1.4775065177362492e-06, "loss": 0.5635, "step": 10699 }, { "epoch": 0.83, "grad_norm": 1.1561035888352202, "learning_rate": 1.4761923186283822e-06, "loss": 0.4237, "step": 10700 }, { "epoch": 0.83, "grad_norm": 1.1750204470052563, "learning_rate": 1.4748786576724716e-06, "loss": 0.4973, "step": 10701 }, { "epoch": 0.83, "grad_norm": 1.3664606832716801, "learning_rate": 1.4735655349514555e-06, "loss": 0.5479, "step": 10702 }, { "epoch": 0.83, "grad_norm": 1.28754793837104, "learning_rate": 1.4722529505482396e-06, "loss": 0.5688, "step": 10703 }, { "epoch": 0.83, "grad_norm": 1.3797582312281547, "learning_rate": 1.47094090454569e-06, "loss": 0.5157, "step": 10704 }, { "epoch": 0.83, "grad_norm": 1.2582705119958655, "learning_rate": 1.4696293970266463e-06, "loss": 0.5462, "step": 10705 }, { "epoch": 0.83, "grad_norm": 1.1306954431388179, "learning_rate": 1.4683184280739082e-06, "loss": 0.4743, "step": 10706 }, { "epoch": 0.83, "grad_norm": 1.2099052705805882, "learning_rate": 1.467007997770249e-06, "loss": 0.493, "step": 10707 }, { "epoch": 0.83, "grad_norm": 1.1512392602309827, "learning_rate": 1.4656981061983966e-06, "loss": 0.5054, "step": 10708 }, { "epoch": 0.83, "grad_norm": 1.2088871979984055, "learning_rate": 1.464388753441056e-06, "loss": 0.5698, "step": 10709 }, { "epoch": 0.83, "grad_norm": 1.2783778959379315, "learning_rate": 1.4630799395808893e-06, "loss": 0.577, "step": 10710 }, { "epoch": 0.83, "grad_norm": 1.1232985239455786, "learning_rate": 1.4617716647005298e-06, "loss": 0.4855, "step": 10711 }, { "epoch": 0.83, "grad_norm": 1.2508608238169414, "learning_rate": 1.4604639288825773e-06, "loss": 0.4672, "step": 10712 }, { "epoch": 0.83, "grad_norm": 1.1214602200395811, "learning_rate": 1.4591567322095978e-06, "loss": 0.4392, "step": 10713 }, { "epoch": 0.83, "grad_norm": 1.1259987424467393, "learning_rate": 1.4578500747641167e-06, "loss": 0.5118, "step": 10714 }, { "epoch": 0.83, "grad_norm": 1.2225311020247238, "learning_rate": 1.456543956628631e-06, "loss": 0.4707, "step": 10715 }, { "epoch": 0.83, "grad_norm": 1.2100539398781158, "learning_rate": 1.455238377885605e-06, "loss": 0.4967, "step": 10716 }, { "epoch": 0.83, "grad_norm": 1.137771224080233, "learning_rate": 1.453933338617467e-06, "loss": 0.5059, "step": 10717 }, { "epoch": 0.83, "grad_norm": 1.2057733493341938, "learning_rate": 1.4526288389066068e-06, "loss": 0.4884, "step": 10718 }, { "epoch": 0.83, "grad_norm": 1.2189737750292187, "learning_rate": 1.4513248788353894e-06, "loss": 0.4715, "step": 10719 }, { "epoch": 0.83, "grad_norm": 1.1831893985590594, "learning_rate": 1.4500214584861349e-06, "loss": 0.5097, "step": 10720 }, { "epoch": 0.83, "grad_norm": 1.23139942637299, "learning_rate": 1.4487185779411382e-06, "loss": 0.4887, "step": 10721 }, { "epoch": 0.83, "grad_norm": 1.3156255563476547, "learning_rate": 1.4474162372826562e-06, "loss": 0.5304, "step": 10722 }, { "epoch": 0.83, "grad_norm": 1.2479036872880824, "learning_rate": 1.446114436592916e-06, "loss": 0.5598, "step": 10723 }, { "epoch": 0.83, "grad_norm": 1.1843007811791368, "learning_rate": 1.4448131759540996e-06, "loss": 0.5057, "step": 10724 }, { "epoch": 0.83, "grad_norm": 1.2374767012281316, "learning_rate": 1.4435124554483671e-06, "loss": 0.519, "step": 10725 }, { "epoch": 0.83, "grad_norm": 1.240530624451376, "learning_rate": 1.4422122751578394e-06, "loss": 0.5137, "step": 10726 }, { "epoch": 0.83, "grad_norm": 1.2339787390406027, "learning_rate": 1.4409126351646052e-06, "loss": 0.5416, "step": 10727 }, { "epoch": 0.83, "grad_norm": 1.2254550575192762, "learning_rate": 1.4396135355507135e-06, "loss": 0.5091, "step": 10728 }, { "epoch": 0.83, "grad_norm": 1.1919957437951312, "learning_rate": 1.4383149763981863e-06, "loss": 0.5135, "step": 10729 }, { "epoch": 0.83, "grad_norm": 1.1402930207704394, "learning_rate": 1.4370169577890059e-06, "loss": 0.5007, "step": 10730 }, { "epoch": 0.83, "grad_norm": 1.2649005653292626, "learning_rate": 1.4357194798051244e-06, "loss": 0.5077, "step": 10731 }, { "epoch": 0.83, "grad_norm": 1.2354540876582532, "learning_rate": 1.4344225425284565e-06, "loss": 0.5043, "step": 10732 }, { "epoch": 0.83, "grad_norm": 1.1919058330770496, "learning_rate": 1.4331261460408874e-06, "loss": 0.485, "step": 10733 }, { "epoch": 0.83, "grad_norm": 1.2504249803994862, "learning_rate": 1.4318302904242654e-06, "loss": 0.4858, "step": 10734 }, { "epoch": 0.83, "grad_norm": 1.1635475299702271, "learning_rate": 1.4305349757604014e-06, "loss": 0.5002, "step": 10735 }, { "epoch": 0.83, "grad_norm": 1.2957456336237656, "learning_rate": 1.429240202131077e-06, "loss": 0.4974, "step": 10736 }, { "epoch": 0.83, "grad_norm": 1.11815931827255, "learning_rate": 1.4279459696180398e-06, "loss": 0.4841, "step": 10737 }, { "epoch": 0.83, "grad_norm": 1.3639096011608889, "learning_rate": 1.4266522783029978e-06, "loss": 0.5187, "step": 10738 }, { "epoch": 0.83, "grad_norm": 1.2202920694631334, "learning_rate": 1.4253591282676316e-06, "loss": 0.5205, "step": 10739 }, { "epoch": 0.83, "grad_norm": 1.1667114430872203, "learning_rate": 1.424066519593581e-06, "loss": 0.5079, "step": 10740 }, { "epoch": 0.83, "grad_norm": 1.1430497198830127, "learning_rate": 1.4227744523624575e-06, "loss": 0.4168, "step": 10741 }, { "epoch": 0.83, "grad_norm": 1.3526177228508531, "learning_rate": 1.421482926655835e-06, "loss": 0.5473, "step": 10742 }, { "epoch": 0.83, "grad_norm": 1.2059125932393542, "learning_rate": 1.4201919425552557e-06, "loss": 0.478, "step": 10743 }, { "epoch": 0.83, "grad_norm": 1.1257928597502185, "learning_rate": 1.4189015001422257e-06, "loss": 0.4847, "step": 10744 }, { "epoch": 0.83, "grad_norm": 1.2357940725639691, "learning_rate": 1.4176115994982153e-06, "loss": 0.4943, "step": 10745 }, { "epoch": 0.83, "grad_norm": 1.134541737306114, "learning_rate": 1.416322240704664e-06, "loss": 0.4747, "step": 10746 }, { "epoch": 0.83, "grad_norm": 1.2373541601537699, "learning_rate": 1.4150334238429776e-06, "loss": 0.5243, "step": 10747 }, { "epoch": 0.83, "grad_norm": 1.1749621608403984, "learning_rate": 1.413745148994522e-06, "loss": 0.4926, "step": 10748 }, { "epoch": 0.83, "grad_norm": 1.211961030626305, "learning_rate": 1.4124574162406356e-06, "loss": 0.5169, "step": 10749 }, { "epoch": 0.83, "grad_norm": 1.1651775984402475, "learning_rate": 1.411170225662617e-06, "loss": 0.4731, "step": 10750 }, { "epoch": 0.83, "grad_norm": 1.2407771327141737, "learning_rate": 1.409883577341734e-06, "loss": 0.5455, "step": 10751 }, { "epoch": 0.83, "grad_norm": 1.365969654997289, "learning_rate": 1.4085974713592199e-06, "loss": 0.5209, "step": 10752 }, { "epoch": 0.83, "grad_norm": 1.2404837770581318, "learning_rate": 1.4073119077962738e-06, "loss": 0.5274, "step": 10753 }, { "epoch": 0.83, "grad_norm": 1.0377283350613595, "learning_rate": 1.4060268867340621e-06, "loss": 0.4271, "step": 10754 }, { "epoch": 0.83, "grad_norm": 1.1906535788482806, "learning_rate": 1.4047424082537086e-06, "loss": 0.5017, "step": 10755 }, { "epoch": 0.83, "grad_norm": 1.2003521859407196, "learning_rate": 1.4034584724363131e-06, "loss": 0.5402, "step": 10756 }, { "epoch": 0.83, "grad_norm": 1.1219848387776727, "learning_rate": 1.4021750793629397e-06, "loss": 0.4741, "step": 10757 }, { "epoch": 0.83, "grad_norm": 1.196548222087757, "learning_rate": 1.4008922291146087e-06, "loss": 0.4337, "step": 10758 }, { "epoch": 0.83, "grad_norm": 1.1567767721771123, "learning_rate": 1.3996099217723202e-06, "loss": 0.4881, "step": 10759 }, { "epoch": 0.83, "grad_norm": 1.1268658529831788, "learning_rate": 1.3983281574170271e-06, "loss": 0.4501, "step": 10760 }, { "epoch": 0.83, "grad_norm": 1.2337646423986006, "learning_rate": 1.3970469361296557e-06, "loss": 0.4722, "step": 10761 }, { "epoch": 0.83, "grad_norm": 1.251927130037995, "learning_rate": 1.3957662579910969e-06, "loss": 0.4932, "step": 10762 }, { "epoch": 0.83, "grad_norm": 1.2588248118317495, "learning_rate": 1.394486123082206e-06, "loss": 0.4983, "step": 10763 }, { "epoch": 0.84, "grad_norm": 1.3571947964431077, "learning_rate": 1.3932065314838071e-06, "loss": 0.5309, "step": 10764 }, { "epoch": 0.84, "grad_norm": 1.1875720253233086, "learning_rate": 1.3919274832766838e-06, "loss": 0.4871, "step": 10765 }, { "epoch": 0.84, "grad_norm": 1.2049814244180048, "learning_rate": 1.390648978541589e-06, "loss": 0.489, "step": 10766 }, { "epoch": 0.84, "grad_norm": 1.1682337329563806, "learning_rate": 1.3893710173592457e-06, "loss": 0.5027, "step": 10767 }, { "epoch": 0.84, "grad_norm": 1.2058632146528905, "learning_rate": 1.3880935998103317e-06, "loss": 0.5019, "step": 10768 }, { "epoch": 0.84, "grad_norm": 1.1302106244932704, "learning_rate": 1.386816725975504e-06, "loss": 0.4566, "step": 10769 }, { "epoch": 0.84, "grad_norm": 1.2500268456437313, "learning_rate": 1.385540395935372e-06, "loss": 0.5164, "step": 10770 }, { "epoch": 0.84, "grad_norm": 1.2638334144509422, "learning_rate": 1.3842646097705193e-06, "loss": 0.4984, "step": 10771 }, { "epoch": 0.84, "grad_norm": 1.2933552328611988, "learning_rate": 1.3829893675614924e-06, "loss": 0.4955, "step": 10772 }, { "epoch": 0.84, "grad_norm": 1.0794999573898838, "learning_rate": 1.381714669388805e-06, "loss": 0.471, "step": 10773 }, { "epoch": 0.84, "grad_norm": 1.0502251497201327, "learning_rate": 1.380440515332938e-06, "loss": 0.4248, "step": 10774 }, { "epoch": 0.84, "grad_norm": 1.2664682852007916, "learning_rate": 1.3791669054743295e-06, "loss": 0.5356, "step": 10775 }, { "epoch": 0.84, "grad_norm": 1.2468282991999549, "learning_rate": 1.3778938398933927e-06, "loss": 0.5251, "step": 10776 }, { "epoch": 0.84, "grad_norm": 1.1621070156554307, "learning_rate": 1.3766213186705036e-06, "loss": 0.4653, "step": 10777 }, { "epoch": 0.84, "grad_norm": 1.294201496602907, "learning_rate": 1.3753493418859987e-06, "loss": 0.5818, "step": 10778 }, { "epoch": 0.84, "grad_norm": 1.2030049549010255, "learning_rate": 1.3740779096201883e-06, "loss": 0.5253, "step": 10779 }, { "epoch": 0.84, "grad_norm": 1.2743483953986934, "learning_rate": 1.372807021953345e-06, "loss": 0.5098, "step": 10780 }, { "epoch": 0.84, "grad_norm": 1.2291839188911362, "learning_rate": 1.3715366789657025e-06, "loss": 0.5083, "step": 10781 }, { "epoch": 0.84, "grad_norm": 1.2275395480620062, "learning_rate": 1.3702668807374664e-06, "loss": 0.5127, "step": 10782 }, { "epoch": 0.84, "grad_norm": 1.235526212024401, "learning_rate": 1.368997627348806e-06, "loss": 0.5532, "step": 10783 }, { "epoch": 0.84, "grad_norm": 1.149829151067626, "learning_rate": 1.367728918879856e-06, "loss": 0.4838, "step": 10784 }, { "epoch": 0.84, "grad_norm": 1.2707991621499186, "learning_rate": 1.3664607554107178e-06, "loss": 0.5214, "step": 10785 }, { "epoch": 0.84, "grad_norm": 1.1669629719800076, "learning_rate": 1.3651931370214533e-06, "loss": 0.4473, "step": 10786 }, { "epoch": 0.84, "grad_norm": 1.1423184613295985, "learning_rate": 1.3639260637920971e-06, "loss": 0.4627, "step": 10787 }, { "epoch": 0.84, "grad_norm": 1.1849397364716565, "learning_rate": 1.3626595358026429e-06, "loss": 0.4989, "step": 10788 }, { "epoch": 0.84, "grad_norm": 1.3661531295707572, "learning_rate": 1.3613935531330558e-06, "loss": 0.5729, "step": 10789 }, { "epoch": 0.84, "grad_norm": 1.1020821502236913, "learning_rate": 1.360128115863265e-06, "loss": 0.4742, "step": 10790 }, { "epoch": 0.84, "grad_norm": 1.198002599315339, "learning_rate": 1.3588632240731591e-06, "loss": 0.531, "step": 10791 }, { "epoch": 0.84, "grad_norm": 1.2487884849292366, "learning_rate": 1.3575988778426008e-06, "loss": 0.4717, "step": 10792 }, { "epoch": 0.84, "grad_norm": 1.2853957303823997, "learning_rate": 1.356335077251415e-06, "loss": 0.5587, "step": 10793 }, { "epoch": 0.84, "grad_norm": 1.16087185659976, "learning_rate": 1.35507182237939e-06, "loss": 0.4719, "step": 10794 }, { "epoch": 0.84, "grad_norm": 1.0618740369625115, "learning_rate": 1.3538091133062858e-06, "loss": 0.4866, "step": 10795 }, { "epoch": 0.84, "grad_norm": 1.4409372450580755, "learning_rate": 1.3525469501118183e-06, "loss": 0.5358, "step": 10796 }, { "epoch": 0.84, "grad_norm": 1.2967474541507673, "learning_rate": 1.3512853328756792e-06, "loss": 0.5178, "step": 10797 }, { "epoch": 0.84, "grad_norm": 1.1611695151467718, "learning_rate": 1.350024261677516e-06, "loss": 0.4664, "step": 10798 }, { "epoch": 0.84, "grad_norm": 1.1657886380037057, "learning_rate": 1.3487637365969497e-06, "loss": 0.5048, "step": 10799 }, { "epoch": 0.84, "grad_norm": 1.3451269768405352, "learning_rate": 1.347503757713564e-06, "loss": 0.5801, "step": 10800 }, { "epoch": 0.84, "grad_norm": 1.1820919467548117, "learning_rate": 1.3462443251069069e-06, "loss": 0.4986, "step": 10801 }, { "epoch": 0.84, "grad_norm": 1.175646904148628, "learning_rate": 1.344985438856492e-06, "loss": 0.4639, "step": 10802 }, { "epoch": 0.84, "grad_norm": 1.2158919347785935, "learning_rate": 1.3437270990418005e-06, "loss": 0.4836, "step": 10803 }, { "epoch": 0.84, "grad_norm": 1.1412074548451747, "learning_rate": 1.3424693057422778e-06, "loss": 0.4691, "step": 10804 }, { "epoch": 0.84, "grad_norm": 1.2210774328468081, "learning_rate": 1.3412120590373368e-06, "loss": 0.4906, "step": 10805 }, { "epoch": 0.84, "grad_norm": 1.3015930575187398, "learning_rate": 1.3399553590063496e-06, "loss": 0.4965, "step": 10806 }, { "epoch": 0.84, "grad_norm": 1.2948082691375216, "learning_rate": 1.338699205728663e-06, "loss": 0.5724, "step": 10807 }, { "epoch": 0.84, "grad_norm": 1.1802022455142054, "learning_rate": 1.3374435992835798e-06, "loss": 0.4926, "step": 10808 }, { "epoch": 0.84, "grad_norm": 1.1665733958608424, "learning_rate": 1.3361885397503749e-06, "loss": 0.5317, "step": 10809 }, { "epoch": 0.84, "grad_norm": 1.2625637000603989, "learning_rate": 1.334934027208289e-06, "loss": 0.5471, "step": 10810 }, { "epoch": 0.84, "grad_norm": 1.3277707244485306, "learning_rate": 1.333680061736522e-06, "loss": 0.5281, "step": 10811 }, { "epoch": 0.84, "grad_norm": 1.2786571970924778, "learning_rate": 1.3324266434142452e-06, "loss": 0.5368, "step": 10812 }, { "epoch": 0.84, "grad_norm": 1.3105343677361685, "learning_rate": 1.331173772320593e-06, "loss": 0.5353, "step": 10813 }, { "epoch": 0.84, "grad_norm": 1.213867531221668, "learning_rate": 1.3299214485346657e-06, "loss": 0.536, "step": 10814 }, { "epoch": 0.84, "grad_norm": 1.1527844636919835, "learning_rate": 1.3286696721355308e-06, "loss": 0.5245, "step": 10815 }, { "epoch": 0.84, "grad_norm": 1.3641731820313083, "learning_rate": 1.3274184432022163e-06, "loss": 0.5001, "step": 10816 }, { "epoch": 0.84, "grad_norm": 1.1386973670998044, "learning_rate": 1.3261677618137225e-06, "loss": 0.5058, "step": 10817 }, { "epoch": 0.84, "grad_norm": 1.208461765610928, "learning_rate": 1.3249176280490062e-06, "loss": 0.5306, "step": 10818 }, { "epoch": 0.84, "grad_norm": 1.19888057351629, "learning_rate": 1.3236680419869974e-06, "loss": 0.5333, "step": 10819 }, { "epoch": 0.84, "grad_norm": 1.17742789410155, "learning_rate": 1.322419003706592e-06, "loss": 0.5125, "step": 10820 }, { "epoch": 0.84, "grad_norm": 1.4860746591124063, "learning_rate": 1.3211705132866425e-06, "loss": 0.5186, "step": 10821 }, { "epoch": 0.84, "grad_norm": 1.2032560301930184, "learning_rate": 1.319922570805976e-06, "loss": 0.4705, "step": 10822 }, { "epoch": 0.84, "grad_norm": 1.0833945379329721, "learning_rate": 1.3186751763433803e-06, "loss": 0.4589, "step": 10823 }, { "epoch": 0.84, "grad_norm": 1.1397373455706485, "learning_rate": 1.3174283299776103e-06, "loss": 0.4907, "step": 10824 }, { "epoch": 0.84, "grad_norm": 1.2937090272911513, "learning_rate": 1.3161820317873886e-06, "loss": 0.5254, "step": 10825 }, { "epoch": 0.84, "grad_norm": 1.2711673933288596, "learning_rate": 1.3149362818513955e-06, "loss": 0.4953, "step": 10826 }, { "epoch": 0.84, "grad_norm": 1.2915378731946339, "learning_rate": 1.3136910802482862e-06, "loss": 0.5293, "step": 10827 }, { "epoch": 0.84, "grad_norm": 1.238818656720174, "learning_rate": 1.3124464270566727e-06, "loss": 0.4607, "step": 10828 }, { "epoch": 0.84, "grad_norm": 1.2065017180606452, "learning_rate": 1.3112023223551374e-06, "loss": 0.5077, "step": 10829 }, { "epoch": 0.84, "grad_norm": 1.1675112029034365, "learning_rate": 1.3099587662222302e-06, "loss": 0.4655, "step": 10830 }, { "epoch": 0.84, "grad_norm": 1.1661638073077676, "learning_rate": 1.3087157587364596e-06, "loss": 0.4585, "step": 10831 }, { "epoch": 0.84, "grad_norm": 1.175343530230575, "learning_rate": 1.3074732999763029e-06, "loss": 0.5753, "step": 10832 }, { "epoch": 0.84, "grad_norm": 1.1335883279114707, "learning_rate": 1.306231390020205e-06, "loss": 0.4821, "step": 10833 }, { "epoch": 0.84, "grad_norm": 1.2057703833679223, "learning_rate": 1.3049900289465733e-06, "loss": 0.5463, "step": 10834 }, { "epoch": 0.84, "grad_norm": 1.0682319411687036, "learning_rate": 1.303749216833784e-06, "loss": 0.456, "step": 10835 }, { "epoch": 0.84, "grad_norm": 1.247203846640096, "learning_rate": 1.30250895376017e-06, "loss": 0.5179, "step": 10836 }, { "epoch": 0.84, "grad_norm": 1.1588513777910028, "learning_rate": 1.3012692398040416e-06, "loss": 0.4801, "step": 10837 }, { "epoch": 0.84, "grad_norm": 1.2390458787762963, "learning_rate": 1.3000300750436645e-06, "loss": 0.573, "step": 10838 }, { "epoch": 0.84, "grad_norm": 1.1628762671955695, "learning_rate": 1.2987914595572738e-06, "loss": 0.5235, "step": 10839 }, { "epoch": 0.84, "grad_norm": 1.2102574069137875, "learning_rate": 1.297553393423071e-06, "loss": 0.5248, "step": 10840 }, { "epoch": 0.84, "grad_norm": 1.1891242513775562, "learning_rate": 1.296315876719223e-06, "loss": 0.499, "step": 10841 }, { "epoch": 0.84, "grad_norm": 1.3099195909210037, "learning_rate": 1.295078909523857e-06, "loss": 0.5159, "step": 10842 }, { "epoch": 0.84, "grad_norm": 1.1284373012258375, "learning_rate": 1.2938424919150705e-06, "loss": 0.5088, "step": 10843 }, { "epoch": 0.84, "grad_norm": 1.188858610259817, "learning_rate": 1.2926066239709256e-06, "loss": 0.5223, "step": 10844 }, { "epoch": 0.84, "grad_norm": 1.190536481715684, "learning_rate": 1.2913713057694498e-06, "loss": 0.5015, "step": 10845 }, { "epoch": 0.84, "grad_norm": 1.2773474363687967, "learning_rate": 1.2901365373886331e-06, "loss": 0.5389, "step": 10846 }, { "epoch": 0.84, "grad_norm": 1.2536104988387609, "learning_rate": 1.2889023189064364e-06, "loss": 0.5022, "step": 10847 }, { "epoch": 0.84, "grad_norm": 1.208849429486005, "learning_rate": 1.2876686504007764e-06, "loss": 0.4989, "step": 10848 }, { "epoch": 0.84, "grad_norm": 1.1443225032280204, "learning_rate": 1.2864355319495448e-06, "loss": 0.4371, "step": 10849 }, { "epoch": 0.84, "grad_norm": 1.1924101055498921, "learning_rate": 1.285202963630594e-06, "loss": 0.5226, "step": 10850 }, { "epoch": 0.84, "grad_norm": 1.2944129645776967, "learning_rate": 1.2839709455217453e-06, "loss": 0.5451, "step": 10851 }, { "epoch": 0.84, "grad_norm": 1.2238177063148723, "learning_rate": 1.2827394777007774e-06, "loss": 0.5202, "step": 10852 }, { "epoch": 0.84, "grad_norm": 1.1125773027972965, "learning_rate": 1.2815085602454401e-06, "loss": 0.4273, "step": 10853 }, { "epoch": 0.84, "grad_norm": 1.2329779819679016, "learning_rate": 1.2802781932334495e-06, "loss": 0.5011, "step": 10854 }, { "epoch": 0.84, "grad_norm": 1.157375715598917, "learning_rate": 1.2790483767424878e-06, "loss": 0.4952, "step": 10855 }, { "epoch": 0.84, "grad_norm": 1.310867293140858, "learning_rate": 1.2778191108501925e-06, "loss": 0.5292, "step": 10856 }, { "epoch": 0.84, "grad_norm": 1.193135142135876, "learning_rate": 1.2765903956341807e-06, "loss": 0.4734, "step": 10857 }, { "epoch": 0.84, "grad_norm": 1.15281770948524, "learning_rate": 1.2753622311720203e-06, "loss": 0.4928, "step": 10858 }, { "epoch": 0.84, "grad_norm": 1.2165344837097434, "learning_rate": 1.2741346175412571e-06, "loss": 0.4891, "step": 10859 }, { "epoch": 0.84, "grad_norm": 1.3436418977768463, "learning_rate": 1.2729075548193947e-06, "loss": 0.6186, "step": 10860 }, { "epoch": 0.84, "grad_norm": 1.1532568982784446, "learning_rate": 1.2716810430839056e-06, "loss": 0.5101, "step": 10861 }, { "epoch": 0.84, "grad_norm": 1.2762731329215957, "learning_rate": 1.270455082412223e-06, "loss": 0.5155, "step": 10862 }, { "epoch": 0.84, "grad_norm": 1.1895692264413007, "learning_rate": 1.2692296728817487e-06, "loss": 0.5011, "step": 10863 }, { "epoch": 0.84, "grad_norm": 1.2093423489049155, "learning_rate": 1.268004814569851e-06, "loss": 0.5906, "step": 10864 }, { "epoch": 0.84, "grad_norm": 1.1630280269679925, "learning_rate": 1.266780507553863e-06, "loss": 0.4933, "step": 10865 }, { "epoch": 0.84, "grad_norm": 1.017427107428367, "learning_rate": 1.2655567519110756e-06, "loss": 0.4223, "step": 10866 }, { "epoch": 0.84, "grad_norm": 1.1516943184550408, "learning_rate": 1.2643335477187567e-06, "loss": 0.5277, "step": 10867 }, { "epoch": 0.84, "grad_norm": 1.1864273346336496, "learning_rate": 1.2631108950541303e-06, "loss": 0.4524, "step": 10868 }, { "epoch": 0.84, "grad_norm": 1.1194909630905223, "learning_rate": 1.261888793994388e-06, "loss": 0.5079, "step": 10869 }, { "epoch": 0.84, "grad_norm": 1.1862825376753299, "learning_rate": 1.260667244616689e-06, "loss": 0.4726, "step": 10870 }, { "epoch": 0.84, "grad_norm": 1.122631546842427, "learning_rate": 1.2594462469981582e-06, "loss": 0.4809, "step": 10871 }, { "epoch": 0.84, "grad_norm": 1.098077659216121, "learning_rate": 1.2582258012158799e-06, "loss": 0.4574, "step": 10872 }, { "epoch": 0.84, "grad_norm": 1.1962322112607366, "learning_rate": 1.2570059073469076e-06, "loss": 0.4667, "step": 10873 }, { "epoch": 0.84, "grad_norm": 1.2463609175838648, "learning_rate": 1.25578656546826e-06, "loss": 0.5413, "step": 10874 }, { "epoch": 0.84, "grad_norm": 1.2412484896401919, "learning_rate": 1.254567775656923e-06, "loss": 0.4892, "step": 10875 }, { "epoch": 0.84, "grad_norm": 1.1532129145658276, "learning_rate": 1.2533495379898407e-06, "loss": 0.4851, "step": 10876 }, { "epoch": 0.84, "grad_norm": 1.1250474707866498, "learning_rate": 1.252131852543932e-06, "loss": 0.471, "step": 10877 }, { "epoch": 0.84, "grad_norm": 1.241355425380615, "learning_rate": 1.250914719396069e-06, "loss": 0.5385, "step": 10878 }, { "epoch": 0.84, "grad_norm": 1.1449373846132629, "learning_rate": 1.2496981386231e-06, "loss": 0.472, "step": 10879 }, { "epoch": 0.84, "grad_norm": 1.2023882901652603, "learning_rate": 1.2484821103018329e-06, "loss": 0.5339, "step": 10880 }, { "epoch": 0.84, "grad_norm": 1.168978914496552, "learning_rate": 1.2472666345090435e-06, "loss": 0.4872, "step": 10881 }, { "epoch": 0.84, "grad_norm": 1.106973278005702, "learning_rate": 1.2460517113214688e-06, "loss": 0.4774, "step": 10882 }, { "epoch": 0.84, "grad_norm": 1.0929780143034311, "learning_rate": 1.2448373408158133e-06, "loss": 0.4846, "step": 10883 }, { "epoch": 0.84, "grad_norm": 1.2446006511296048, "learning_rate": 1.2436235230687466e-06, "loss": 0.5218, "step": 10884 }, { "epoch": 0.84, "grad_norm": 1.138021712080953, "learning_rate": 1.2424102581569064e-06, "loss": 0.4933, "step": 10885 }, { "epoch": 0.84, "grad_norm": 1.1251463794844778, "learning_rate": 1.2411975461568881e-06, "loss": 0.493, "step": 10886 }, { "epoch": 0.84, "grad_norm": 1.2287749242728667, "learning_rate": 1.2399853871452605e-06, "loss": 0.514, "step": 10887 }, { "epoch": 0.84, "grad_norm": 1.206398955719468, "learning_rate": 1.2387737811985479e-06, "loss": 0.4976, "step": 10888 }, { "epoch": 0.84, "grad_norm": 1.208941580611243, "learning_rate": 1.23756272839325e-06, "loss": 0.5029, "step": 10889 }, { "epoch": 0.84, "grad_norm": 1.255789984748273, "learning_rate": 1.2363522288058238e-06, "loss": 0.5212, "step": 10890 }, { "epoch": 0.84, "grad_norm": 1.258270038292831, "learning_rate": 1.2351422825126969e-06, "loss": 0.4995, "step": 10891 }, { "epoch": 0.84, "grad_norm": 1.2292192200024226, "learning_rate": 1.2339328895902603e-06, "loss": 0.5207, "step": 10892 }, { "epoch": 0.85, "grad_norm": 1.1894136870161656, "learning_rate": 1.2327240501148651e-06, "loss": 0.4845, "step": 10893 }, { "epoch": 0.85, "grad_norm": 1.2650543621881953, "learning_rate": 1.2315157641628338e-06, "loss": 0.5372, "step": 10894 }, { "epoch": 0.85, "grad_norm": 1.198015684375401, "learning_rate": 1.2303080318104533e-06, "loss": 0.4852, "step": 10895 }, { "epoch": 0.85, "grad_norm": 1.1911042878256393, "learning_rate": 1.229100853133971e-06, "loss": 0.4934, "step": 10896 }, { "epoch": 0.85, "grad_norm": 1.2255815118352613, "learning_rate": 1.2278942282096063e-06, "loss": 0.4829, "step": 10897 }, { "epoch": 0.85, "grad_norm": 1.2046207196716854, "learning_rate": 1.2266881571135337e-06, "loss": 0.5593, "step": 10898 }, { "epoch": 0.85, "grad_norm": 1.1641985410524214, "learning_rate": 1.2254826399219032e-06, "loss": 0.473, "step": 10899 }, { "epoch": 0.85, "grad_norm": 1.309079527007699, "learning_rate": 1.2242776767108233e-06, "loss": 0.5477, "step": 10900 }, { "epoch": 0.85, "grad_norm": 1.2956451191423466, "learning_rate": 1.2230732675563705e-06, "loss": 0.5157, "step": 10901 }, { "epoch": 0.85, "grad_norm": 1.318197147136756, "learning_rate": 1.2218694125345887e-06, "loss": 0.5097, "step": 10902 }, { "epoch": 0.85, "grad_norm": 1.2899736363282295, "learning_rate": 1.2206661117214768e-06, "loss": 0.5074, "step": 10903 }, { "epoch": 0.85, "grad_norm": 1.1875261002734072, "learning_rate": 1.2194633651930089e-06, "loss": 0.447, "step": 10904 }, { "epoch": 0.85, "grad_norm": 1.1686532556812843, "learning_rate": 1.2182611730251225e-06, "loss": 0.4576, "step": 10905 }, { "epoch": 0.85, "grad_norm": 1.1182941211206503, "learning_rate": 1.2170595352937142e-06, "loss": 0.4488, "step": 10906 }, { "epoch": 0.85, "grad_norm": 1.26196277254576, "learning_rate": 1.215858452074653e-06, "loss": 0.4595, "step": 10907 }, { "epoch": 0.85, "grad_norm": 1.26661469181406, "learning_rate": 1.214657923443766e-06, "loss": 0.5371, "step": 10908 }, { "epoch": 0.85, "grad_norm": 1.1408964382818523, "learning_rate": 1.2134579494768507e-06, "loss": 0.4682, "step": 10909 }, { "epoch": 0.85, "grad_norm": 1.1716665972731792, "learning_rate": 1.2122585302496682e-06, "loss": 0.4829, "step": 10910 }, { "epoch": 0.85, "grad_norm": 1.2747453397855322, "learning_rate": 1.2110596658379426e-06, "loss": 0.5306, "step": 10911 }, { "epoch": 0.85, "grad_norm": 1.2727065065318452, "learning_rate": 1.2098613563173678e-06, "loss": 0.5048, "step": 10912 }, { "epoch": 0.85, "grad_norm": 1.0808364385963682, "learning_rate": 1.2086636017635955e-06, "loss": 0.4559, "step": 10913 }, { "epoch": 0.85, "grad_norm": 1.1184747389258711, "learning_rate": 1.2074664022522464e-06, "loss": 0.4905, "step": 10914 }, { "epoch": 0.85, "grad_norm": 1.1918389707099555, "learning_rate": 1.2062697578589089e-06, "loss": 0.4776, "step": 10915 }, { "epoch": 0.85, "grad_norm": 1.1282726476531821, "learning_rate": 1.2050736686591292e-06, "loss": 0.4776, "step": 10916 }, { "epoch": 0.85, "grad_norm": 1.1985809916668981, "learning_rate": 1.2038781347284266e-06, "loss": 0.4855, "step": 10917 }, { "epoch": 0.85, "grad_norm": 1.2888363726557173, "learning_rate": 1.202683156142278e-06, "loss": 0.5117, "step": 10918 }, { "epoch": 0.85, "grad_norm": 1.3553973352269555, "learning_rate": 1.2014887329761293e-06, "loss": 0.4993, "step": 10919 }, { "epoch": 0.85, "grad_norm": 1.2015193777692599, "learning_rate": 1.2002948653053915e-06, "loss": 0.5126, "step": 10920 }, { "epoch": 0.85, "grad_norm": 1.2364997925378338, "learning_rate": 1.1991015532054395e-06, "loss": 0.5, "step": 10921 }, { "epoch": 0.85, "grad_norm": 1.1539568973240086, "learning_rate": 1.1979087967516146e-06, "loss": 0.4963, "step": 10922 }, { "epoch": 0.85, "grad_norm": 1.2573862241992435, "learning_rate": 1.1967165960192185e-06, "loss": 0.5441, "step": 10923 }, { "epoch": 0.85, "grad_norm": 1.2326866385415258, "learning_rate": 1.1955249510835232e-06, "loss": 0.4765, "step": 10924 }, { "epoch": 0.85, "grad_norm": 1.2356714128606008, "learning_rate": 1.1943338620197642e-06, "loss": 0.4759, "step": 10925 }, { "epoch": 0.85, "grad_norm": 1.3008147455891728, "learning_rate": 1.193143328903138e-06, "loss": 0.484, "step": 10926 }, { "epoch": 0.85, "grad_norm": 1.2964257070996899, "learning_rate": 1.1919533518088121e-06, "loss": 0.5137, "step": 10927 }, { "epoch": 0.85, "grad_norm": 1.148413054536943, "learning_rate": 1.1907639308119134e-06, "loss": 0.4726, "step": 10928 }, { "epoch": 0.85, "grad_norm": 1.2314015561466816, "learning_rate": 1.1895750659875372e-06, "loss": 0.5329, "step": 10929 }, { "epoch": 0.85, "grad_norm": 1.2561606228287285, "learning_rate": 1.1883867574107433e-06, "loss": 0.5213, "step": 10930 }, { "epoch": 0.85, "grad_norm": 1.160572632330765, "learning_rate": 1.1871990051565551e-06, "loss": 0.4701, "step": 10931 }, { "epoch": 0.85, "grad_norm": 1.1548002794497303, "learning_rate": 1.186011809299964e-06, "loss": 0.535, "step": 10932 }, { "epoch": 0.85, "grad_norm": 1.091084146994098, "learning_rate": 1.1848251699159185e-06, "loss": 0.437, "step": 10933 }, { "epoch": 0.85, "grad_norm": 1.3319150364987997, "learning_rate": 1.1836390870793414e-06, "loss": 0.5169, "step": 10934 }, { "epoch": 0.85, "grad_norm": 1.301156158947863, "learning_rate": 1.1824535608651177e-06, "loss": 0.5143, "step": 10935 }, { "epoch": 0.85, "grad_norm": 1.2165011173561442, "learning_rate": 1.1812685913480904e-06, "loss": 0.5389, "step": 10936 }, { "epoch": 0.85, "grad_norm": 1.2067531524579094, "learning_rate": 1.180084178603077e-06, "loss": 0.4974, "step": 10937 }, { "epoch": 0.85, "grad_norm": 1.1960559103952126, "learning_rate": 1.1789003227048533e-06, "loss": 0.4589, "step": 10938 }, { "epoch": 0.85, "grad_norm": 1.2521339797674484, "learning_rate": 1.1777170237281633e-06, "loss": 0.5686, "step": 10939 }, { "epoch": 0.85, "grad_norm": 1.2981094092735477, "learning_rate": 1.1765342817477133e-06, "loss": 0.5404, "step": 10940 }, { "epoch": 0.85, "grad_norm": 1.216255912727184, "learning_rate": 1.1753520968381782e-06, "loss": 0.4981, "step": 10941 }, { "epoch": 0.85, "grad_norm": 1.1948853770552794, "learning_rate": 1.1741704690741961e-06, "loss": 0.5024, "step": 10942 }, { "epoch": 0.85, "grad_norm": 1.2560423722135725, "learning_rate": 1.1729893985303653e-06, "loss": 0.537, "step": 10943 }, { "epoch": 0.85, "grad_norm": 1.2590150948332368, "learning_rate": 1.171808885281256e-06, "loss": 0.501, "step": 10944 }, { "epoch": 0.85, "grad_norm": 1.240003236874078, "learning_rate": 1.1706289294014005e-06, "loss": 0.5355, "step": 10945 }, { "epoch": 0.85, "grad_norm": 1.139649012809811, "learning_rate": 1.169449530965292e-06, "loss": 0.4487, "step": 10946 }, { "epoch": 0.85, "grad_norm": 1.2459014457191502, "learning_rate": 1.168270690047395e-06, "loss": 0.5409, "step": 10947 }, { "epoch": 0.85, "grad_norm": 1.178618799636493, "learning_rate": 1.1670924067221367e-06, "loss": 0.499, "step": 10948 }, { "epoch": 0.85, "grad_norm": 1.146812431021523, "learning_rate": 1.1659146810639043e-06, "loss": 0.5009, "step": 10949 }, { "epoch": 0.85, "grad_norm": 1.2949706193544122, "learning_rate": 1.1647375131470562e-06, "loss": 0.4855, "step": 10950 }, { "epoch": 0.85, "grad_norm": 1.2314065901425824, "learning_rate": 1.1635609030459127e-06, "loss": 0.532, "step": 10951 }, { "epoch": 0.85, "grad_norm": 1.1257868769953112, "learning_rate": 1.1623848508347603e-06, "loss": 0.4624, "step": 10952 }, { "epoch": 0.85, "grad_norm": 1.134018880468286, "learning_rate": 1.1612093565878502e-06, "loss": 0.5205, "step": 10953 }, { "epoch": 0.85, "grad_norm": 1.3056232956746885, "learning_rate": 1.1600344203793922e-06, "loss": 0.585, "step": 10954 }, { "epoch": 0.85, "grad_norm": 1.0941822287873288, "learning_rate": 1.1588600422835728e-06, "loss": 0.4613, "step": 10955 }, { "epoch": 0.85, "grad_norm": 1.3033279370702722, "learning_rate": 1.157686222374531e-06, "loss": 0.5306, "step": 10956 }, { "epoch": 0.85, "grad_norm": 1.1631717730284945, "learning_rate": 1.156512960726378e-06, "loss": 0.5017, "step": 10957 }, { "epoch": 0.85, "grad_norm": 1.1042728492975822, "learning_rate": 1.15534025741319e-06, "loss": 0.5094, "step": 10958 }, { "epoch": 0.85, "grad_norm": 1.1033958167470623, "learning_rate": 1.1541681125090031e-06, "loss": 0.4616, "step": 10959 }, { "epoch": 0.85, "grad_norm": 1.2065354597474753, "learning_rate": 1.1529965260878207e-06, "loss": 0.5443, "step": 10960 }, { "epoch": 0.85, "grad_norm": 1.2799111791430533, "learning_rate": 1.1518254982236121e-06, "loss": 0.512, "step": 10961 }, { "epoch": 0.85, "grad_norm": 1.2412995337584116, "learning_rate": 1.1506550289903107e-06, "loss": 0.5096, "step": 10962 }, { "epoch": 0.85, "grad_norm": 1.1974353763289312, "learning_rate": 1.149485118461816e-06, "loss": 0.4898, "step": 10963 }, { "epoch": 0.85, "grad_norm": 1.1782457750173052, "learning_rate": 1.148315766711986e-06, "loss": 0.4912, "step": 10964 }, { "epoch": 0.85, "grad_norm": 1.2835463437911176, "learning_rate": 1.1471469738146534e-06, "loss": 0.5307, "step": 10965 }, { "epoch": 0.85, "grad_norm": 1.1431393541005277, "learning_rate": 1.145978739843604e-06, "loss": 0.4886, "step": 10966 }, { "epoch": 0.85, "grad_norm": 1.3437348741966413, "learning_rate": 1.1448110648725974e-06, "loss": 0.4795, "step": 10967 }, { "epoch": 0.85, "grad_norm": 1.1581387429572234, "learning_rate": 1.1436439489753581e-06, "loss": 0.4731, "step": 10968 }, { "epoch": 0.85, "grad_norm": 1.1961365894541909, "learning_rate": 1.1424773922255662e-06, "loss": 0.488, "step": 10969 }, { "epoch": 0.85, "grad_norm": 1.3464965695813065, "learning_rate": 1.1413113946968756e-06, "loss": 0.5224, "step": 10970 }, { "epoch": 0.85, "grad_norm": 1.2091670229591636, "learning_rate": 1.1401459564629013e-06, "loss": 0.5662, "step": 10971 }, { "epoch": 0.85, "grad_norm": 1.235892316954457, "learning_rate": 1.1389810775972244e-06, "loss": 0.5629, "step": 10972 }, { "epoch": 0.85, "grad_norm": 1.2497525447049591, "learning_rate": 1.1378167581733901e-06, "loss": 0.5027, "step": 10973 }, { "epoch": 0.85, "grad_norm": 1.2061069245551066, "learning_rate": 1.1366529982649043e-06, "loss": 0.5051, "step": 10974 }, { "epoch": 0.85, "grad_norm": 1.2388547898823448, "learning_rate": 1.1354897979452472e-06, "loss": 0.5195, "step": 10975 }, { "epoch": 0.85, "grad_norm": 1.1407296642202176, "learning_rate": 1.13432715728785e-06, "loss": 0.4769, "step": 10976 }, { "epoch": 0.85, "grad_norm": 1.212526570845022, "learning_rate": 1.1331650763661217e-06, "loss": 0.5651, "step": 10977 }, { "epoch": 0.85, "grad_norm": 1.2109669343385894, "learning_rate": 1.1320035552534304e-06, "loss": 0.5194, "step": 10978 }, { "epoch": 0.85, "grad_norm": 1.152942876725815, "learning_rate": 1.130842594023106e-06, "loss": 0.452, "step": 10979 }, { "epoch": 0.85, "grad_norm": 1.0769316108334046, "learning_rate": 1.129682192748447e-06, "loss": 0.5106, "step": 10980 }, { "epoch": 0.85, "grad_norm": 1.2568610721615041, "learning_rate": 1.1285223515027155e-06, "loss": 0.5194, "step": 10981 }, { "epoch": 0.85, "grad_norm": 1.2569610841655474, "learning_rate": 1.12736307035914e-06, "loss": 0.5669, "step": 10982 }, { "epoch": 0.85, "grad_norm": 1.2924073720682197, "learning_rate": 1.1262043493909113e-06, "loss": 0.5731, "step": 10983 }, { "epoch": 0.85, "grad_norm": 1.1220376430765475, "learning_rate": 1.125046188671184e-06, "loss": 0.5025, "step": 10984 }, { "epoch": 0.85, "grad_norm": 1.255659689682778, "learning_rate": 1.123888588273081e-06, "loss": 0.5235, "step": 10985 }, { "epoch": 0.85, "grad_norm": 1.2200607196362419, "learning_rate": 1.1227315482696844e-06, "loss": 0.4696, "step": 10986 }, { "epoch": 0.85, "grad_norm": 1.2064723724003361, "learning_rate": 1.1215750687340455e-06, "loss": 0.4811, "step": 10987 }, { "epoch": 0.85, "grad_norm": 1.1083721947609209, "learning_rate": 1.1204191497391815e-06, "loss": 0.4939, "step": 10988 }, { "epoch": 0.85, "grad_norm": 1.2110291477100763, "learning_rate": 1.119263791358066e-06, "loss": 0.5515, "step": 10989 }, { "epoch": 0.85, "grad_norm": 1.2576024608694756, "learning_rate": 1.118108993663647e-06, "loss": 0.546, "step": 10990 }, { "epoch": 0.85, "grad_norm": 1.1645289772919507, "learning_rate": 1.1169547567288319e-06, "loss": 0.4856, "step": 10991 }, { "epoch": 0.85, "grad_norm": 1.0279983349723543, "learning_rate": 1.115801080626493e-06, "loss": 0.4487, "step": 10992 }, { "epoch": 0.85, "grad_norm": 1.210358265810039, "learning_rate": 1.1146479654294706e-06, "loss": 0.5128, "step": 10993 }, { "epoch": 0.85, "grad_norm": 1.2835304620781385, "learning_rate": 1.1134954112105645e-06, "loss": 0.5464, "step": 10994 }, { "epoch": 0.85, "grad_norm": 1.2349588968796155, "learning_rate": 1.1123434180425396e-06, "loss": 0.494, "step": 10995 }, { "epoch": 0.85, "grad_norm": 1.2615550971410479, "learning_rate": 1.1111919859981291e-06, "loss": 0.5084, "step": 10996 }, { "epoch": 0.85, "grad_norm": 1.3699009589433255, "learning_rate": 1.1100411151500279e-06, "loss": 0.5049, "step": 10997 }, { "epoch": 0.85, "grad_norm": 1.1878724267099376, "learning_rate": 1.1088908055709003e-06, "loss": 0.564, "step": 10998 }, { "epoch": 0.85, "grad_norm": 1.2212558807106515, "learning_rate": 1.1077410573333659e-06, "loss": 0.5244, "step": 10999 }, { "epoch": 0.85, "grad_norm": 1.1960268067924829, "learning_rate": 1.1065918705100164e-06, "loss": 0.4708, "step": 11000 }, { "epoch": 0.85, "grad_norm": 1.236982464702292, "learning_rate": 1.1054432451734053e-06, "loss": 0.5139, "step": 11001 }, { "epoch": 0.85, "grad_norm": 1.2934956471031154, "learning_rate": 1.1042951813960535e-06, "loss": 0.499, "step": 11002 }, { "epoch": 0.85, "grad_norm": 1.185914738251953, "learning_rate": 1.1031476792504436e-06, "loss": 0.4702, "step": 11003 }, { "epoch": 0.85, "grad_norm": 1.3610791015537416, "learning_rate": 1.1020007388090227e-06, "loss": 0.5566, "step": 11004 }, { "epoch": 0.85, "grad_norm": 1.1509418300409455, "learning_rate": 1.1008543601442012e-06, "loss": 0.4898, "step": 11005 }, { "epoch": 0.85, "grad_norm": 1.0690610488752403, "learning_rate": 1.0997085433283572e-06, "loss": 0.4547, "step": 11006 }, { "epoch": 0.85, "grad_norm": 1.172408224227052, "learning_rate": 1.0985632884338327e-06, "loss": 0.5034, "step": 11007 }, { "epoch": 0.85, "grad_norm": 1.152837149815908, "learning_rate": 1.097418595532933e-06, "loss": 0.5033, "step": 11008 }, { "epoch": 0.85, "grad_norm": 1.18503761980242, "learning_rate": 1.0962744646979296e-06, "loss": 0.5057, "step": 11009 }, { "epoch": 0.85, "grad_norm": 1.3533746114708962, "learning_rate": 1.0951308960010554e-06, "loss": 0.5416, "step": 11010 }, { "epoch": 0.85, "grad_norm": 1.1421855545549224, "learning_rate": 1.0939878895145107e-06, "loss": 0.4833, "step": 11011 }, { "epoch": 0.85, "grad_norm": 1.2718850250223959, "learning_rate": 1.0928454453104597e-06, "loss": 0.5475, "step": 11012 }, { "epoch": 0.85, "grad_norm": 1.1922615858184182, "learning_rate": 1.0917035634610328e-06, "loss": 0.5016, "step": 11013 }, { "epoch": 0.85, "grad_norm": 1.230251571623262, "learning_rate": 1.09056224403832e-06, "loss": 0.5077, "step": 11014 }, { "epoch": 0.85, "grad_norm": 1.129394795128725, "learning_rate": 1.0894214871143783e-06, "loss": 0.4975, "step": 11015 }, { "epoch": 0.85, "grad_norm": 1.1328901066998234, "learning_rate": 1.0882812927612297e-06, "loss": 0.4733, "step": 11016 }, { "epoch": 0.85, "grad_norm": 1.2175706390160521, "learning_rate": 1.0871416610508622e-06, "loss": 0.5217, "step": 11017 }, { "epoch": 0.85, "grad_norm": 1.2137954457473987, "learning_rate": 1.0860025920552252e-06, "loss": 0.4689, "step": 11018 }, { "epoch": 0.85, "grad_norm": 1.2838029780316345, "learning_rate": 1.0848640858462378e-06, "loss": 0.5279, "step": 11019 }, { "epoch": 0.85, "grad_norm": 1.1396354668017237, "learning_rate": 1.0837261424957735e-06, "loss": 0.4426, "step": 11020 }, { "epoch": 0.85, "grad_norm": 1.2505302258315805, "learning_rate": 1.0825887620756804e-06, "loss": 0.5382, "step": 11021 }, { "epoch": 0.86, "grad_norm": 1.226496360599236, "learning_rate": 1.0814519446577665e-06, "loss": 0.5071, "step": 11022 }, { "epoch": 0.86, "grad_norm": 1.2339909112895453, "learning_rate": 1.0803156903138069e-06, "loss": 0.5321, "step": 11023 }, { "epoch": 0.86, "grad_norm": 1.1292351372335896, "learning_rate": 1.0791799991155371e-06, "loss": 0.4608, "step": 11024 }, { "epoch": 0.86, "grad_norm": 1.178304910456379, "learning_rate": 1.078044871134658e-06, "loss": 0.4722, "step": 11025 }, { "epoch": 0.86, "grad_norm": 1.0947160541666423, "learning_rate": 1.0769103064428366e-06, "loss": 0.4624, "step": 11026 }, { "epoch": 0.86, "grad_norm": 1.1775900276254219, "learning_rate": 1.0757763051117055e-06, "loss": 0.5176, "step": 11027 }, { "epoch": 0.86, "grad_norm": 1.2413996471231832, "learning_rate": 1.0746428672128583e-06, "loss": 0.4981, "step": 11028 }, { "epoch": 0.86, "grad_norm": 1.1791926476589687, "learning_rate": 1.0735099928178584e-06, "loss": 0.4937, "step": 11029 }, { "epoch": 0.86, "grad_norm": 1.4016056083930608, "learning_rate": 1.0723776819982257e-06, "loss": 0.5165, "step": 11030 }, { "epoch": 0.86, "grad_norm": 1.3500335071078888, "learning_rate": 1.0712459348254488e-06, "loss": 0.5668, "step": 11031 }, { "epoch": 0.86, "grad_norm": 1.297593848228336, "learning_rate": 1.070114751370984e-06, "loss": 0.5351, "step": 11032 }, { "epoch": 0.86, "grad_norm": 1.2029376193684853, "learning_rate": 1.0689841317062478e-06, "loss": 0.5146, "step": 11033 }, { "epoch": 0.86, "grad_norm": 1.2583092603001815, "learning_rate": 1.0678540759026224e-06, "loss": 0.5115, "step": 11034 }, { "epoch": 0.86, "grad_norm": 1.1310975346117904, "learning_rate": 1.06672458403145e-06, "loss": 0.4644, "step": 11035 }, { "epoch": 0.86, "grad_norm": 1.155860061877357, "learning_rate": 1.0655956561640456e-06, "loss": 0.4531, "step": 11036 }, { "epoch": 0.86, "grad_norm": 1.2350248243948958, "learning_rate": 1.0644672923716815e-06, "loss": 0.5187, "step": 11037 }, { "epoch": 0.86, "grad_norm": 1.1229920481691575, "learning_rate": 1.063339492725599e-06, "loss": 0.4594, "step": 11038 }, { "epoch": 0.86, "grad_norm": 1.2306060714389633, "learning_rate": 1.062212257297004e-06, "loss": 0.5002, "step": 11039 }, { "epoch": 0.86, "grad_norm": 1.1198946291879468, "learning_rate": 1.061085586157059e-06, "loss": 0.4899, "step": 11040 }, { "epoch": 0.86, "grad_norm": 1.106094345090458, "learning_rate": 1.0599594793769007e-06, "loss": 0.4546, "step": 11041 }, { "epoch": 0.86, "grad_norm": 1.0821773829745938, "learning_rate": 1.0588339370276246e-06, "loss": 0.4569, "step": 11042 }, { "epoch": 0.86, "grad_norm": 1.1525582324064279, "learning_rate": 1.0577089591802946e-06, "loss": 0.5114, "step": 11043 }, { "epoch": 0.86, "grad_norm": 1.2049383889272243, "learning_rate": 1.0565845459059343e-06, "loss": 0.5388, "step": 11044 }, { "epoch": 0.86, "grad_norm": 1.2112109460036242, "learning_rate": 1.055460697275531e-06, "loss": 0.4769, "step": 11045 }, { "epoch": 0.86, "grad_norm": 1.2140897514593778, "learning_rate": 1.0543374133600414e-06, "loss": 0.5327, "step": 11046 }, { "epoch": 0.86, "grad_norm": 1.2406017810560912, "learning_rate": 1.0532146942303856e-06, "loss": 0.4614, "step": 11047 }, { "epoch": 0.86, "grad_norm": 1.1999057454604056, "learning_rate": 1.0520925399574445e-06, "loss": 0.5037, "step": 11048 }, { "epoch": 0.86, "grad_norm": 1.3304978659238906, "learning_rate": 1.050970950612069e-06, "loss": 0.5186, "step": 11049 }, { "epoch": 0.86, "grad_norm": 1.313559785496592, "learning_rate": 1.0498499262650664e-06, "loss": 0.5166, "step": 11050 }, { "epoch": 0.86, "grad_norm": 1.4039201721961587, "learning_rate": 1.048729466987214e-06, "loss": 0.5652, "step": 11051 }, { "epoch": 0.86, "grad_norm": 1.1895264350836758, "learning_rate": 1.047609572849253e-06, "loss": 0.4987, "step": 11052 }, { "epoch": 0.86, "grad_norm": 1.2395595366600698, "learning_rate": 1.0464902439218905e-06, "loss": 0.4588, "step": 11053 }, { "epoch": 0.86, "grad_norm": 1.1489206452585214, "learning_rate": 1.0453714802757908e-06, "loss": 0.4597, "step": 11054 }, { "epoch": 0.86, "grad_norm": 1.131600882978287, "learning_rate": 1.0442532819815908e-06, "loss": 0.4819, "step": 11055 }, { "epoch": 0.86, "grad_norm": 1.239627960213853, "learning_rate": 1.0431356491098854e-06, "loss": 0.5627, "step": 11056 }, { "epoch": 0.86, "grad_norm": 1.228975776893027, "learning_rate": 1.0420185817312377e-06, "loss": 0.5407, "step": 11057 }, { "epoch": 0.86, "grad_norm": 1.3174200031525334, "learning_rate": 1.040902079916174e-06, "loss": 0.4887, "step": 11058 }, { "epoch": 0.86, "grad_norm": 1.124350678295888, "learning_rate": 1.0397861437351842e-06, "loss": 0.5002, "step": 11059 }, { "epoch": 0.86, "grad_norm": 1.4144751858090445, "learning_rate": 1.0386707732587265e-06, "loss": 0.5842, "step": 11060 }, { "epoch": 0.86, "grad_norm": 1.2034571300679713, "learning_rate": 1.0375559685572145e-06, "loss": 0.4942, "step": 11061 }, { "epoch": 0.86, "grad_norm": 1.2950542031362913, "learning_rate": 1.0364417297010354e-06, "loss": 0.5092, "step": 11062 }, { "epoch": 0.86, "grad_norm": 1.300741796496622, "learning_rate": 1.0353280567605373e-06, "loss": 0.4402, "step": 11063 }, { "epoch": 0.86, "grad_norm": 1.1856222364426632, "learning_rate": 1.0342149498060284e-06, "loss": 0.4901, "step": 11064 }, { "epoch": 0.86, "grad_norm": 1.2007141233505538, "learning_rate": 1.033102408907789e-06, "loss": 0.4733, "step": 11065 }, { "epoch": 0.86, "grad_norm": 1.2115407118118497, "learning_rate": 1.0319904341360554e-06, "loss": 0.5184, "step": 11066 }, { "epoch": 0.86, "grad_norm": 1.2610145706012588, "learning_rate": 1.0308790255610356e-06, "loss": 0.5298, "step": 11067 }, { "epoch": 0.86, "grad_norm": 1.1800056685295375, "learning_rate": 1.0297681832528971e-06, "loss": 0.4939, "step": 11068 }, { "epoch": 0.86, "grad_norm": 1.130417811703316, "learning_rate": 1.0286579072817726e-06, "loss": 0.4671, "step": 11069 }, { "epoch": 0.86, "grad_norm": 1.337200170404787, "learning_rate": 1.0275481977177638e-06, "loss": 0.5938, "step": 11070 }, { "epoch": 0.86, "grad_norm": 1.1450482655642957, "learning_rate": 1.0264390546309254e-06, "loss": 0.4816, "step": 11071 }, { "epoch": 0.86, "grad_norm": 1.179749493517386, "learning_rate": 1.0253304780912887e-06, "loss": 0.4636, "step": 11072 }, { "epoch": 0.86, "grad_norm": 1.1485406219479515, "learning_rate": 1.024222468168843e-06, "loss": 0.4796, "step": 11073 }, { "epoch": 0.86, "grad_norm": 1.2519898312524385, "learning_rate": 1.023115024933541e-06, "loss": 0.5411, "step": 11074 }, { "epoch": 0.86, "grad_norm": 1.196609391902362, "learning_rate": 1.0220081484553025e-06, "loss": 0.4884, "step": 11075 }, { "epoch": 0.86, "grad_norm": 1.20362318549424, "learning_rate": 1.0209018388040093e-06, "loss": 0.4696, "step": 11076 }, { "epoch": 0.86, "grad_norm": 1.3149237732496482, "learning_rate": 1.019796096049508e-06, "loss": 0.5093, "step": 11077 }, { "epoch": 0.86, "grad_norm": 1.1467307766653911, "learning_rate": 1.0186909202616114e-06, "loss": 0.4702, "step": 11078 }, { "epoch": 0.86, "grad_norm": 1.1576955368124666, "learning_rate": 1.017586311510095e-06, "loss": 0.4703, "step": 11079 }, { "epoch": 0.86, "grad_norm": 1.216619634807926, "learning_rate": 1.0164822698646992e-06, "loss": 0.5605, "step": 11080 }, { "epoch": 0.86, "grad_norm": 1.2133083146436283, "learning_rate": 1.0153787953951245e-06, "loss": 0.5514, "step": 11081 }, { "epoch": 0.86, "grad_norm": 1.4289492175700447, "learning_rate": 1.0142758881710413e-06, "loss": 0.5101, "step": 11082 }, { "epoch": 0.86, "grad_norm": 1.2952194217237716, "learning_rate": 1.013173548262083e-06, "loss": 0.5515, "step": 11083 }, { "epoch": 0.86, "grad_norm": 1.1800721911799206, "learning_rate": 1.0120717757378428e-06, "loss": 0.4534, "step": 11084 }, { "epoch": 0.86, "grad_norm": 1.3216230670926006, "learning_rate": 1.0109705706678862e-06, "loss": 0.5408, "step": 11085 }, { "epoch": 0.86, "grad_norm": 1.2886630277396456, "learning_rate": 1.009869933121731e-06, "loss": 0.4884, "step": 11086 }, { "epoch": 0.86, "grad_norm": 1.2858279713444503, "learning_rate": 1.0087698631688713e-06, "loss": 0.5274, "step": 11087 }, { "epoch": 0.86, "grad_norm": 1.1717983475093516, "learning_rate": 1.0076703608787575e-06, "loss": 0.5071, "step": 11088 }, { "epoch": 0.86, "grad_norm": 1.311829577382489, "learning_rate": 1.0065714263208092e-06, "loss": 0.5615, "step": 11089 }, { "epoch": 0.86, "grad_norm": 1.238725889261915, "learning_rate": 1.005473059564408e-06, "loss": 0.4752, "step": 11090 }, { "epoch": 0.86, "grad_norm": 1.1352940706100734, "learning_rate": 1.004375260678897e-06, "loss": 0.4839, "step": 11091 }, { "epoch": 0.86, "grad_norm": 1.2168244041892375, "learning_rate": 1.0032780297335886e-06, "loss": 0.4637, "step": 11092 }, { "epoch": 0.86, "grad_norm": 1.1732857414278512, "learning_rate": 1.002181366797753e-06, "loss": 0.5696, "step": 11093 }, { "epoch": 0.86, "grad_norm": 1.2030735252565226, "learning_rate": 1.0010852719406306e-06, "loss": 0.5126, "step": 11094 }, { "epoch": 0.86, "grad_norm": 1.1032855578928529, "learning_rate": 9.999897452314256e-07, "loss": 0.4557, "step": 11095 }, { "epoch": 0.86, "grad_norm": 1.2141840575645526, "learning_rate": 9.988947867392995e-07, "loss": 0.4811, "step": 11096 }, { "epoch": 0.86, "grad_norm": 1.1192433574718834, "learning_rate": 9.978003965333849e-07, "loss": 0.4859, "step": 11097 }, { "epoch": 0.86, "grad_norm": 1.2169969127932458, "learning_rate": 9.967065746827764e-07, "loss": 0.5249, "step": 11098 }, { "epoch": 0.86, "grad_norm": 1.2591108172839338, "learning_rate": 9.956133212565332e-07, "loss": 0.4623, "step": 11099 }, { "epoch": 0.86, "grad_norm": 1.2404433667571837, "learning_rate": 9.945206363236804e-07, "loss": 0.5624, "step": 11100 }, { "epoch": 0.86, "grad_norm": 1.139843346306126, "learning_rate": 9.93428519953199e-07, "loss": 0.5046, "step": 11101 }, { "epoch": 0.86, "grad_norm": 1.0546810573804961, "learning_rate": 9.92336972214044e-07, "loss": 0.4624, "step": 11102 }, { "epoch": 0.86, "grad_norm": 1.2546564157704336, "learning_rate": 9.912459931751296e-07, "loss": 0.466, "step": 11103 }, { "epoch": 0.86, "grad_norm": 1.2656281082680292, "learning_rate": 9.901555829053333e-07, "loss": 0.5338, "step": 11104 }, { "epoch": 0.86, "grad_norm": 1.3375466900100528, "learning_rate": 9.890657414735017e-07, "loss": 0.5899, "step": 11105 }, { "epoch": 0.86, "grad_norm": 1.2502162269496075, "learning_rate": 9.879764689484383e-07, "loss": 0.5204, "step": 11106 }, { "epoch": 0.86, "grad_norm": 1.153685432024286, "learning_rate": 9.868877653989161e-07, "loss": 0.4511, "step": 11107 }, { "epoch": 0.86, "grad_norm": 1.1333907657451077, "learning_rate": 9.857996308936713e-07, "loss": 0.4797, "step": 11108 }, { "epoch": 0.86, "grad_norm": 1.2702463806136817, "learning_rate": 9.847120655014032e-07, "loss": 0.5111, "step": 11109 }, { "epoch": 0.86, "grad_norm": 1.1980189183035805, "learning_rate": 9.836250692907745e-07, "loss": 0.471, "step": 11110 }, { "epoch": 0.86, "grad_norm": 1.1265073850062017, "learning_rate": 9.825386423304162e-07, "loss": 0.4891, "step": 11111 }, { "epoch": 0.86, "grad_norm": 1.192428000653777, "learning_rate": 9.814527846889165e-07, "loss": 0.5272, "step": 11112 }, { "epoch": 0.86, "grad_norm": 1.0941625634431469, "learning_rate": 9.80367496434831e-07, "loss": 0.4911, "step": 11113 }, { "epoch": 0.86, "grad_norm": 1.319738138985596, "learning_rate": 9.792827776366797e-07, "loss": 0.5725, "step": 11114 }, { "epoch": 0.86, "grad_norm": 1.1911902560196133, "learning_rate": 9.781986283629484e-07, "loss": 0.4553, "step": 11115 }, { "epoch": 0.86, "grad_norm": 1.2118604034914062, "learning_rate": 9.771150486820857e-07, "loss": 0.4901, "step": 11116 }, { "epoch": 0.86, "grad_norm": 1.1574768053371496, "learning_rate": 9.760320386625e-07, "loss": 0.4629, "step": 11117 }, { "epoch": 0.86, "grad_norm": 1.1492088996885166, "learning_rate": 9.749495983725688e-07, "loss": 0.3864, "step": 11118 }, { "epoch": 0.86, "grad_norm": 1.292519113779314, "learning_rate": 9.73867727880633e-07, "loss": 0.4772, "step": 11119 }, { "epoch": 0.86, "grad_norm": 1.204174277777636, "learning_rate": 9.72786427254996e-07, "loss": 0.5069, "step": 11120 }, { "epoch": 0.86, "grad_norm": 1.1778397394371976, "learning_rate": 9.717056965639281e-07, "loss": 0.4645, "step": 11121 }, { "epoch": 0.86, "grad_norm": 1.3041749050054863, "learning_rate": 9.7062553587566e-07, "loss": 0.5567, "step": 11122 }, { "epoch": 0.86, "grad_norm": 1.21181072621425, "learning_rate": 9.695459452583843e-07, "loss": 0.5382, "step": 11123 }, { "epoch": 0.86, "grad_norm": 1.22384610033517, "learning_rate": 9.684669247802647e-07, "loss": 0.5001, "step": 11124 }, { "epoch": 0.86, "grad_norm": 1.136770869892159, "learning_rate": 9.673884745094253e-07, "loss": 0.4699, "step": 11125 }, { "epoch": 0.86, "grad_norm": 1.175447841277305, "learning_rate": 9.66310594513955e-07, "loss": 0.4949, "step": 11126 }, { "epoch": 0.86, "grad_norm": 1.231113180313904, "learning_rate": 9.652332848619027e-07, "loss": 0.5281, "step": 11127 }, { "epoch": 0.86, "grad_norm": 1.2594876241375021, "learning_rate": 9.641565456212864e-07, "loss": 0.4885, "step": 11128 }, { "epoch": 0.86, "grad_norm": 1.2586171674296198, "learning_rate": 9.63080376860086e-07, "loss": 0.4656, "step": 11129 }, { "epoch": 0.86, "grad_norm": 1.2159352198461497, "learning_rate": 9.620047786462461e-07, "loss": 0.4531, "step": 11130 }, { "epoch": 0.86, "grad_norm": 1.1642756426974885, "learning_rate": 9.609297510476767e-07, "loss": 0.4431, "step": 11131 }, { "epoch": 0.86, "grad_norm": 1.1896148721997954, "learning_rate": 9.59855294132247e-07, "loss": 0.4788, "step": 11132 }, { "epoch": 0.86, "grad_norm": 1.0920543605876847, "learning_rate": 9.587814079677915e-07, "loss": 0.4649, "step": 11133 }, { "epoch": 0.86, "grad_norm": 1.2793039103307267, "learning_rate": 9.577080926221127e-07, "loss": 0.5307, "step": 11134 }, { "epoch": 0.86, "grad_norm": 1.2251348265584732, "learning_rate": 9.566353481629742e-07, "loss": 0.4814, "step": 11135 }, { "epoch": 0.86, "grad_norm": 1.0523110637388777, "learning_rate": 9.55563174658105e-07, "loss": 0.4391, "step": 11136 }, { "epoch": 0.86, "grad_norm": 1.2141699685702245, "learning_rate": 9.544915721751946e-07, "loss": 0.5207, "step": 11137 }, { "epoch": 0.86, "grad_norm": 1.1176559959783576, "learning_rate": 9.534205407818997e-07, "loss": 0.5235, "step": 11138 }, { "epoch": 0.86, "grad_norm": 1.3219456701123153, "learning_rate": 9.523500805458408e-07, "loss": 0.5502, "step": 11139 }, { "epoch": 0.86, "grad_norm": 1.1973584685179508, "learning_rate": 9.512801915346004e-07, "loss": 0.5235, "step": 11140 }, { "epoch": 0.86, "grad_norm": 1.2014360338980115, "learning_rate": 9.502108738157279e-07, "loss": 0.4954, "step": 11141 }, { "epoch": 0.86, "grad_norm": 1.2806681265394901, "learning_rate": 9.491421274567348e-07, "loss": 0.5363, "step": 11142 }, { "epoch": 0.86, "grad_norm": 1.293565410769431, "learning_rate": 9.480739525250938e-07, "loss": 0.4993, "step": 11143 }, { "epoch": 0.86, "grad_norm": 1.213010920212061, "learning_rate": 9.470063490882453e-07, "loss": 0.4709, "step": 11144 }, { "epoch": 0.86, "grad_norm": 1.332143202744742, "learning_rate": 9.459393172135934e-07, "loss": 0.5443, "step": 11145 }, { "epoch": 0.86, "grad_norm": 1.1991046764282272, "learning_rate": 9.448728569685073e-07, "loss": 0.4954, "step": 11146 }, { "epoch": 0.86, "grad_norm": 1.1676084032724046, "learning_rate": 9.438069684203144e-07, "loss": 0.4936, "step": 11147 }, { "epoch": 0.86, "grad_norm": 1.3055712967232531, "learning_rate": 9.427416516363108e-07, "loss": 0.5068, "step": 11148 }, { "epoch": 0.86, "grad_norm": 1.1371821347136535, "learning_rate": 9.416769066837561e-07, "loss": 0.527, "step": 11149 }, { "epoch": 0.86, "grad_norm": 1.213476312975917, "learning_rate": 9.406127336298731e-07, "loss": 0.5763, "step": 11150 }, { "epoch": 0.87, "grad_norm": 1.1345408967247246, "learning_rate": 9.395491325418505e-07, "loss": 0.4662, "step": 11151 }, { "epoch": 0.87, "grad_norm": 1.124287485615347, "learning_rate": 9.384861034868376e-07, "loss": 0.4687, "step": 11152 }, { "epoch": 0.87, "grad_norm": 1.0808653902659178, "learning_rate": 9.374236465319453e-07, "loss": 0.4809, "step": 11153 }, { "epoch": 0.87, "grad_norm": 1.186661725560806, "learning_rate": 9.363617617442555e-07, "loss": 0.453, "step": 11154 }, { "epoch": 0.87, "grad_norm": 1.1402385004589048, "learning_rate": 9.353004491908102e-07, "loss": 0.4664, "step": 11155 }, { "epoch": 0.87, "grad_norm": 1.204701072527355, "learning_rate": 9.342397089386168e-07, "loss": 0.4924, "step": 11156 }, { "epoch": 0.87, "grad_norm": 1.1756724056925454, "learning_rate": 9.331795410546418e-07, "loss": 0.4596, "step": 11157 }, { "epoch": 0.87, "grad_norm": 1.2837324052475951, "learning_rate": 9.321199456058205e-07, "loss": 0.504, "step": 11158 }, { "epoch": 0.87, "grad_norm": 1.1566139370133486, "learning_rate": 9.310609226590516e-07, "loss": 0.5008, "step": 11159 }, { "epoch": 0.87, "grad_norm": 1.1717083621756874, "learning_rate": 9.300024722811973e-07, "loss": 0.5294, "step": 11160 }, { "epoch": 0.87, "grad_norm": 1.1600006718469187, "learning_rate": 9.289445945390829e-07, "loss": 0.4973, "step": 11161 }, { "epoch": 0.87, "grad_norm": 1.264032704458381, "learning_rate": 9.278872894994962e-07, "loss": 0.5283, "step": 11162 }, { "epoch": 0.87, "grad_norm": 1.167258515553021, "learning_rate": 9.268305572291892e-07, "loss": 0.4497, "step": 11163 }, { "epoch": 0.87, "grad_norm": 1.2316437938684506, "learning_rate": 9.257743977948808e-07, "loss": 0.4425, "step": 11164 }, { "epoch": 0.87, "grad_norm": 1.1928585518713626, "learning_rate": 9.247188112632522e-07, "loss": 0.5146, "step": 11165 }, { "epoch": 0.87, "grad_norm": 1.2915489491790895, "learning_rate": 9.236637977009466e-07, "loss": 0.5629, "step": 11166 }, { "epoch": 0.87, "grad_norm": 1.1797096806139902, "learning_rate": 9.226093571745753e-07, "loss": 0.4874, "step": 11167 }, { "epoch": 0.87, "grad_norm": 1.2036613160736935, "learning_rate": 9.215554897507062e-07, "loss": 0.5092, "step": 11168 }, { "epoch": 0.87, "grad_norm": 1.246713992170833, "learning_rate": 9.205021954958781e-07, "loss": 0.4835, "step": 11169 }, { "epoch": 0.87, "grad_norm": 1.2867357930373173, "learning_rate": 9.194494744765902e-07, "loss": 0.5126, "step": 11170 }, { "epoch": 0.87, "grad_norm": 1.396936175481047, "learning_rate": 9.183973267593083e-07, "loss": 0.5147, "step": 11171 }, { "epoch": 0.87, "grad_norm": 1.2231216276236363, "learning_rate": 9.173457524104579e-07, "loss": 0.5107, "step": 11172 }, { "epoch": 0.87, "grad_norm": 1.1079604573303279, "learning_rate": 9.162947514964283e-07, "loss": 0.4855, "step": 11173 }, { "epoch": 0.87, "grad_norm": 1.2296632582566496, "learning_rate": 9.152443240835774e-07, "loss": 0.4597, "step": 11174 }, { "epoch": 0.87, "grad_norm": 1.1822916050489412, "learning_rate": 9.141944702382233e-07, "loss": 0.4939, "step": 11175 }, { "epoch": 0.87, "grad_norm": 1.15621984287834, "learning_rate": 9.131451900266497e-07, "loss": 0.5091, "step": 11176 }, { "epoch": 0.87, "grad_norm": 1.2923908152108374, "learning_rate": 9.120964835151025e-07, "loss": 0.5664, "step": 11177 }, { "epoch": 0.87, "grad_norm": 1.1683436783909869, "learning_rate": 9.110483507697909e-07, "loss": 0.4956, "step": 11178 }, { "epoch": 0.87, "grad_norm": 1.143494380465078, "learning_rate": 9.100007918568898e-07, "loss": 0.459, "step": 11179 }, { "epoch": 0.87, "grad_norm": 1.2018140611360189, "learning_rate": 9.089538068425375e-07, "loss": 0.5266, "step": 11180 }, { "epoch": 0.87, "grad_norm": 1.1874766598465667, "learning_rate": 9.079073957928353e-07, "loss": 0.4988, "step": 11181 }, { "epoch": 0.87, "grad_norm": 1.208609330237781, "learning_rate": 9.068615587738495e-07, "loss": 0.5083, "step": 11182 }, { "epoch": 0.87, "grad_norm": 1.2322878999026405, "learning_rate": 9.058162958516059e-07, "loss": 0.4758, "step": 11183 }, { "epoch": 0.87, "grad_norm": 1.2208875348988535, "learning_rate": 9.047716070920987e-07, "loss": 0.4875, "step": 11184 }, { "epoch": 0.87, "grad_norm": 1.2170034756700947, "learning_rate": 9.037274925612849e-07, "loss": 0.5116, "step": 11185 }, { "epoch": 0.87, "grad_norm": 1.2934713164851488, "learning_rate": 9.026839523250863e-07, "loss": 0.5082, "step": 11186 }, { "epoch": 0.87, "grad_norm": 1.3123901866205288, "learning_rate": 9.016409864493869e-07, "loss": 0.545, "step": 11187 }, { "epoch": 0.87, "grad_norm": 1.26818132669297, "learning_rate": 9.005985950000318e-07, "loss": 0.5059, "step": 11188 }, { "epoch": 0.87, "grad_norm": 1.1834738390520687, "learning_rate": 8.995567780428338e-07, "loss": 0.4754, "step": 11189 }, { "epoch": 0.87, "grad_norm": 1.1995655266695722, "learning_rate": 8.985155356435704e-07, "loss": 0.5217, "step": 11190 }, { "epoch": 0.87, "grad_norm": 1.2144283062695491, "learning_rate": 8.974748678679768e-07, "loss": 0.4439, "step": 11191 }, { "epoch": 0.87, "grad_norm": 1.1772206769277709, "learning_rate": 8.964347747817603e-07, "loss": 0.529, "step": 11192 }, { "epoch": 0.87, "grad_norm": 1.199213729997, "learning_rate": 8.953952564505819e-07, "loss": 0.4795, "step": 11193 }, { "epoch": 0.87, "grad_norm": 1.1775321723608596, "learning_rate": 8.943563129400756e-07, "loss": 0.4613, "step": 11194 }, { "epoch": 0.87, "grad_norm": 1.1814972089007225, "learning_rate": 8.933179443158336e-07, "loss": 0.5584, "step": 11195 }, { "epoch": 0.87, "grad_norm": 1.2351378003358497, "learning_rate": 8.922801506434131e-07, "loss": 0.5025, "step": 11196 }, { "epoch": 0.87, "grad_norm": 1.3081507601983309, "learning_rate": 8.912429319883398e-07, "loss": 0.4902, "step": 11197 }, { "epoch": 0.87, "grad_norm": 1.1408358862332597, "learning_rate": 8.902062884160922e-07, "loss": 0.4499, "step": 11198 }, { "epoch": 0.87, "grad_norm": 1.1992552313707423, "learning_rate": 8.891702199921226e-07, "loss": 0.4653, "step": 11199 }, { "epoch": 0.87, "grad_norm": 1.2486846679154369, "learning_rate": 8.881347267818441e-07, "loss": 0.4959, "step": 11200 }, { "epoch": 0.87, "grad_norm": 1.136180055267908, "learning_rate": 8.8709980885063e-07, "loss": 0.4721, "step": 11201 }, { "epoch": 0.87, "grad_norm": 1.2635046068476634, "learning_rate": 8.860654662638235e-07, "loss": 0.5093, "step": 11202 }, { "epoch": 0.87, "grad_norm": 1.206876627576729, "learning_rate": 8.850316990867236e-07, "loss": 0.4926, "step": 11203 }, { "epoch": 0.87, "grad_norm": 1.2964802796246564, "learning_rate": 8.839985073845991e-07, "loss": 0.5135, "step": 11204 }, { "epoch": 0.87, "grad_norm": 1.2375531387479795, "learning_rate": 8.829658912226813e-07, "loss": 0.5201, "step": 11205 }, { "epoch": 0.87, "grad_norm": 1.1901414505052301, "learning_rate": 8.819338506661646e-07, "loss": 0.4904, "step": 11206 }, { "epoch": 0.87, "grad_norm": 1.2286828538277166, "learning_rate": 8.80902385780209e-07, "loss": 0.4874, "step": 11207 }, { "epoch": 0.87, "grad_norm": 1.3429025594877946, "learning_rate": 8.798714966299327e-07, "loss": 0.5427, "step": 11208 }, { "epoch": 0.87, "grad_norm": 1.1468429394640147, "learning_rate": 8.788411832804223e-07, "loss": 0.5172, "step": 11209 }, { "epoch": 0.87, "grad_norm": 1.304672697976943, "learning_rate": 8.77811445796728e-07, "loss": 0.501, "step": 11210 }, { "epoch": 0.87, "grad_norm": 1.1765707919979842, "learning_rate": 8.767822842438601e-07, "loss": 0.4648, "step": 11211 }, { "epoch": 0.87, "grad_norm": 1.1916922304216908, "learning_rate": 8.757536986867987e-07, "loss": 0.5106, "step": 11212 }, { "epoch": 0.87, "grad_norm": 1.1705712250162306, "learning_rate": 8.747256891904787e-07, "loss": 0.4985, "step": 11213 }, { "epoch": 0.87, "grad_norm": 1.2584556214908422, "learning_rate": 8.736982558198059e-07, "loss": 0.4938, "step": 11214 }, { "epoch": 0.87, "grad_norm": 1.1309118176358817, "learning_rate": 8.726713986396484e-07, "loss": 0.4599, "step": 11215 }, { "epoch": 0.87, "grad_norm": 1.2241311242172317, "learning_rate": 8.716451177148355e-07, "loss": 0.4867, "step": 11216 }, { "epoch": 0.87, "grad_norm": 1.108570451874717, "learning_rate": 8.706194131101653e-07, "loss": 0.4942, "step": 11217 }, { "epoch": 0.87, "grad_norm": 1.2683000433801723, "learning_rate": 8.695942848903905e-07, "loss": 0.5085, "step": 11218 }, { "epoch": 0.87, "grad_norm": 1.168563844267776, "learning_rate": 8.685697331202348e-07, "loss": 0.4712, "step": 11219 }, { "epoch": 0.87, "grad_norm": 1.1786564748803117, "learning_rate": 8.675457578643865e-07, "loss": 0.4629, "step": 11220 }, { "epoch": 0.87, "grad_norm": 1.2282432169230988, "learning_rate": 8.665223591874894e-07, "loss": 0.5262, "step": 11221 }, { "epoch": 0.87, "grad_norm": 1.1806366493696432, "learning_rate": 8.654995371541585e-07, "loss": 0.5021, "step": 11222 }, { "epoch": 0.87, "grad_norm": 1.2780642562645959, "learning_rate": 8.644772918289723e-07, "loss": 0.5358, "step": 11223 }, { "epoch": 0.87, "grad_norm": 1.2631192303374286, "learning_rate": 8.634556232764646e-07, "loss": 0.5371, "step": 11224 }, { "epoch": 0.87, "grad_norm": 1.2665903624757802, "learning_rate": 8.624345315611427e-07, "loss": 0.5115, "step": 11225 }, { "epoch": 0.87, "grad_norm": 1.2488369776436161, "learning_rate": 8.614140167474716e-07, "loss": 0.5275, "step": 11226 }, { "epoch": 0.87, "grad_norm": 1.2711884935141544, "learning_rate": 8.603940788998832e-07, "loss": 0.5312, "step": 11227 }, { "epoch": 0.87, "grad_norm": 1.2271019302193817, "learning_rate": 8.593747180827728e-07, "loss": 0.5376, "step": 11228 }, { "epoch": 0.87, "grad_norm": 1.2257255079579459, "learning_rate": 8.58355934360493e-07, "loss": 0.536, "step": 11229 }, { "epoch": 0.87, "grad_norm": 1.148538598002825, "learning_rate": 8.573377277973704e-07, "loss": 0.451, "step": 11230 }, { "epoch": 0.87, "grad_norm": 1.2276454930989134, "learning_rate": 8.563200984576847e-07, "loss": 0.525, "step": 11231 }, { "epoch": 0.87, "grad_norm": 1.1447443845019263, "learning_rate": 8.553030464056867e-07, "loss": 0.4689, "step": 11232 }, { "epoch": 0.87, "grad_norm": 1.1342899553350756, "learning_rate": 8.542865717055904e-07, "loss": 0.485, "step": 11233 }, { "epoch": 0.87, "grad_norm": 1.17823768097095, "learning_rate": 8.532706744215657e-07, "loss": 0.5005, "step": 11234 }, { "epoch": 0.87, "grad_norm": 1.26999996042627, "learning_rate": 8.522553546177536e-07, "loss": 0.506, "step": 11235 }, { "epoch": 0.87, "grad_norm": 1.209582204096564, "learning_rate": 8.512406123582583e-07, "loss": 0.4593, "step": 11236 }, { "epoch": 0.87, "grad_norm": 1.2588033623545312, "learning_rate": 8.502264477071442e-07, "loss": 0.538, "step": 11237 }, { "epoch": 0.87, "grad_norm": 1.3303412402657289, "learning_rate": 8.492128607284434e-07, "loss": 0.5367, "step": 11238 }, { "epoch": 0.87, "grad_norm": 1.206029630701883, "learning_rate": 8.481998514861434e-07, "loss": 0.4401, "step": 11239 }, { "epoch": 0.87, "grad_norm": 1.0809843321307926, "learning_rate": 8.471874200442066e-07, "loss": 0.4749, "step": 11240 }, { "epoch": 0.87, "grad_norm": 1.2413176363892566, "learning_rate": 8.461755664665483e-07, "loss": 0.524, "step": 11241 }, { "epoch": 0.87, "grad_norm": 1.144646752684332, "learning_rate": 8.451642908170544e-07, "loss": 0.4626, "step": 11242 }, { "epoch": 0.87, "grad_norm": 1.156760077474445, "learning_rate": 8.441535931595735e-07, "loss": 0.5196, "step": 11243 }, { "epoch": 0.87, "grad_norm": 1.30150829078651, "learning_rate": 8.431434735579113e-07, "loss": 0.5514, "step": 11244 }, { "epoch": 0.87, "grad_norm": 1.2666890416344163, "learning_rate": 8.421339320758459e-07, "loss": 0.5174, "step": 11245 }, { "epoch": 0.87, "grad_norm": 1.1652637914051374, "learning_rate": 8.411249687771128e-07, "loss": 0.4734, "step": 11246 }, { "epoch": 0.87, "grad_norm": 1.2255565138244648, "learning_rate": 8.401165837254144e-07, "loss": 0.4757, "step": 11247 }, { "epoch": 0.87, "grad_norm": 1.1574821608386052, "learning_rate": 8.391087769844164e-07, "loss": 0.4636, "step": 11248 }, { "epoch": 0.87, "grad_norm": 1.239983192299173, "learning_rate": 8.381015486177446e-07, "loss": 0.5103, "step": 11249 }, { "epoch": 0.87, "grad_norm": 1.1932116727685784, "learning_rate": 8.370948986889915e-07, "loss": 0.4919, "step": 11250 }, { "epoch": 0.87, "grad_norm": 1.1838144027447353, "learning_rate": 8.360888272617107e-07, "loss": 0.5026, "step": 11251 }, { "epoch": 0.87, "grad_norm": 1.6026362099686522, "learning_rate": 8.350833343994225e-07, "loss": 0.4943, "step": 11252 }, { "epoch": 0.87, "grad_norm": 1.2231686039609668, "learning_rate": 8.340784201656094e-07, "loss": 0.5212, "step": 11253 }, { "epoch": 0.87, "grad_norm": 1.226911167227712, "learning_rate": 8.330740846237128e-07, "loss": 0.4926, "step": 11254 }, { "epoch": 0.87, "grad_norm": 1.3333723638702852, "learning_rate": 8.320703278371456e-07, "loss": 0.5012, "step": 11255 }, { "epoch": 0.87, "grad_norm": 1.1544069086032005, "learning_rate": 8.31067149869279e-07, "loss": 0.4843, "step": 11256 }, { "epoch": 0.87, "grad_norm": 1.2797548828270313, "learning_rate": 8.300645507834481e-07, "loss": 0.5283, "step": 11257 }, { "epoch": 0.87, "grad_norm": 1.1862572642225533, "learning_rate": 8.290625306429545e-07, "loss": 0.4402, "step": 11258 }, { "epoch": 0.87, "grad_norm": 1.2463512573123212, "learning_rate": 8.280610895110575e-07, "loss": 0.5039, "step": 11259 }, { "epoch": 0.87, "grad_norm": 1.2776910152326302, "learning_rate": 8.270602274509864e-07, "loss": 0.5541, "step": 11260 }, { "epoch": 0.87, "grad_norm": 1.2480170734411393, "learning_rate": 8.260599445259276e-07, "loss": 0.5114, "step": 11261 }, { "epoch": 0.87, "grad_norm": 1.196292500393657, "learning_rate": 8.250602407990361e-07, "loss": 0.4747, "step": 11262 }, { "epoch": 0.87, "grad_norm": 1.1423940657036784, "learning_rate": 8.2406111633343e-07, "loss": 0.4607, "step": 11263 }, { "epoch": 0.87, "grad_norm": 1.1827345143338315, "learning_rate": 8.230625711921858e-07, "loss": 0.5077, "step": 11264 }, { "epoch": 0.87, "grad_norm": 1.3033689128835662, "learning_rate": 8.220646054383475e-07, "loss": 0.5258, "step": 11265 }, { "epoch": 0.87, "grad_norm": 1.171313291079321, "learning_rate": 8.210672191349222e-07, "loss": 0.466, "step": 11266 }, { "epoch": 0.87, "grad_norm": 1.1957569168787214, "learning_rate": 8.20070412344881e-07, "loss": 0.4996, "step": 11267 }, { "epoch": 0.87, "grad_norm": 1.2988797514557535, "learning_rate": 8.19074185131159e-07, "loss": 0.5458, "step": 11268 }, { "epoch": 0.87, "grad_norm": 1.1304129607189908, "learning_rate": 8.180785375566491e-07, "loss": 0.4761, "step": 11269 }, { "epoch": 0.87, "grad_norm": 1.1488017847387322, "learning_rate": 8.170834696842156e-07, "loss": 0.4788, "step": 11270 }, { "epoch": 0.87, "grad_norm": 1.1078762084548042, "learning_rate": 8.160889815766782e-07, "loss": 0.4699, "step": 11271 }, { "epoch": 0.87, "grad_norm": 1.1308643821908522, "learning_rate": 8.150950732968255e-07, "loss": 0.4765, "step": 11272 }, { "epoch": 0.87, "grad_norm": 1.298289756769988, "learning_rate": 8.141017449074096e-07, "loss": 0.497, "step": 11273 }, { "epoch": 0.87, "grad_norm": 1.2390328421777432, "learning_rate": 8.131089964711447e-07, "loss": 0.4963, "step": 11274 }, { "epoch": 0.87, "grad_norm": 1.1554863444156502, "learning_rate": 8.121168280507053e-07, "loss": 0.507, "step": 11275 }, { "epoch": 0.87, "grad_norm": 1.170073994473048, "learning_rate": 8.111252397087344e-07, "loss": 0.5472, "step": 11276 }, { "epoch": 0.87, "grad_norm": 1.2045351162134736, "learning_rate": 8.101342315078342e-07, "loss": 0.5229, "step": 11277 }, { "epoch": 0.87, "grad_norm": 1.2169387269342302, "learning_rate": 8.091438035105747e-07, "loss": 0.4822, "step": 11278 }, { "epoch": 0.88, "grad_norm": 1.243795733353835, "learning_rate": 8.08153955779487e-07, "loss": 0.5003, "step": 11279 }, { "epoch": 0.88, "grad_norm": 1.1594836921844616, "learning_rate": 8.071646883770634e-07, "loss": 0.4696, "step": 11280 }, { "epoch": 0.88, "grad_norm": 1.2186486617774832, "learning_rate": 8.061760013657605e-07, "loss": 0.5129, "step": 11281 }, { "epoch": 0.88, "grad_norm": 1.1750611289335768, "learning_rate": 8.051878948080006e-07, "loss": 0.5128, "step": 11282 }, { "epoch": 0.88, "grad_norm": 1.2175840033155843, "learning_rate": 8.042003687661671e-07, "loss": 0.5118, "step": 11283 }, { "epoch": 0.88, "grad_norm": 1.33371138676787, "learning_rate": 8.032134233026101e-07, "loss": 0.5344, "step": 11284 }, { "epoch": 0.88, "grad_norm": 1.1675892088827573, "learning_rate": 8.022270584796376e-07, "loss": 0.4784, "step": 11285 }, { "epoch": 0.88, "grad_norm": 1.1941431846004729, "learning_rate": 8.012412743595255e-07, "loss": 0.5054, "step": 11286 }, { "epoch": 0.88, "grad_norm": 1.2484860311726615, "learning_rate": 8.002560710045115e-07, "loss": 0.5308, "step": 11287 }, { "epoch": 0.88, "grad_norm": 1.2217283292580965, "learning_rate": 7.992714484767949e-07, "loss": 0.5337, "step": 11288 }, { "epoch": 0.88, "grad_norm": 1.147829634499345, "learning_rate": 7.982874068385438e-07, "loss": 0.4634, "step": 11289 }, { "epoch": 0.88, "grad_norm": 1.218583706979347, "learning_rate": 7.973039461518827e-07, "loss": 0.4937, "step": 11290 }, { "epoch": 0.88, "grad_norm": 1.302723302608259, "learning_rate": 7.963210664789022e-07, "loss": 0.5007, "step": 11291 }, { "epoch": 0.88, "grad_norm": 1.1091457721128348, "learning_rate": 7.953387678816571e-07, "loss": 0.4481, "step": 11292 }, { "epoch": 0.88, "grad_norm": 1.177266700213032, "learning_rate": 7.943570504221654e-07, "loss": 0.4766, "step": 11293 }, { "epoch": 0.88, "grad_norm": 1.0073171773452696, "learning_rate": 7.933759141624098e-07, "loss": 0.4559, "step": 11294 }, { "epoch": 0.88, "grad_norm": 1.1714233036537556, "learning_rate": 7.923953591643308e-07, "loss": 0.4675, "step": 11295 }, { "epoch": 0.88, "grad_norm": 1.1046513207539383, "learning_rate": 7.914153854898376e-07, "loss": 0.4606, "step": 11296 }, { "epoch": 0.88, "grad_norm": 1.1417194498116632, "learning_rate": 7.90435993200801e-07, "loss": 0.4495, "step": 11297 }, { "epoch": 0.88, "grad_norm": 1.271493422421848, "learning_rate": 7.89457182359058e-07, "loss": 0.516, "step": 11298 }, { "epoch": 0.88, "grad_norm": 1.261224750383257, "learning_rate": 7.884789530264004e-07, "loss": 0.507, "step": 11299 }, { "epoch": 0.88, "grad_norm": 1.2302893613810355, "learning_rate": 7.875013052645941e-07, "loss": 0.5111, "step": 11300 }, { "epoch": 0.88, "grad_norm": 1.0920654403305488, "learning_rate": 7.865242391353589e-07, "loss": 0.4337, "step": 11301 }, { "epoch": 0.88, "grad_norm": 1.0906805838600118, "learning_rate": 7.855477547003831e-07, "loss": 0.4428, "step": 11302 }, { "epoch": 0.88, "grad_norm": 1.2393748751903324, "learning_rate": 7.845718520213186e-07, "loss": 0.5743, "step": 11303 }, { "epoch": 0.88, "grad_norm": 1.1909548044861884, "learning_rate": 7.835965311597804e-07, "loss": 0.4726, "step": 11304 }, { "epoch": 0.88, "grad_norm": 1.138015269855835, "learning_rate": 7.826217921773416e-07, "loss": 0.4813, "step": 11305 }, { "epoch": 0.88, "grad_norm": 1.2169900070413333, "learning_rate": 7.81647635135544e-07, "loss": 0.501, "step": 11306 }, { "epoch": 0.88, "grad_norm": 1.3549569782592175, "learning_rate": 7.806740600958918e-07, "loss": 0.5692, "step": 11307 }, { "epoch": 0.88, "grad_norm": 1.1757004415428356, "learning_rate": 7.797010671198534e-07, "loss": 0.4679, "step": 11308 }, { "epoch": 0.88, "grad_norm": 1.242894433351586, "learning_rate": 7.787286562688556e-07, "loss": 0.4583, "step": 11309 }, { "epoch": 0.88, "grad_norm": 1.2404504782967962, "learning_rate": 7.777568276042946e-07, "loss": 0.5195, "step": 11310 }, { "epoch": 0.88, "grad_norm": 1.1420621831903188, "learning_rate": 7.767855811875236e-07, "loss": 0.4584, "step": 11311 }, { "epoch": 0.88, "grad_norm": 1.3082915455326396, "learning_rate": 7.758149170798656e-07, "loss": 0.5237, "step": 11312 }, { "epoch": 0.88, "grad_norm": 1.1417436731546178, "learning_rate": 7.748448353426019e-07, "loss": 0.4933, "step": 11313 }, { "epoch": 0.88, "grad_norm": 1.2384882131238193, "learning_rate": 7.7387533603698e-07, "loss": 0.5721, "step": 11314 }, { "epoch": 0.88, "grad_norm": 1.2964811531341784, "learning_rate": 7.729064192242075e-07, "loss": 0.5366, "step": 11315 }, { "epoch": 0.88, "grad_norm": 1.1619825277902553, "learning_rate": 7.71938084965459e-07, "loss": 0.4861, "step": 11316 }, { "epoch": 0.88, "grad_norm": 1.1922181911692673, "learning_rate": 7.709703333218698e-07, "loss": 0.498, "step": 11317 }, { "epoch": 0.88, "grad_norm": 1.2431795489357234, "learning_rate": 7.700031643545402e-07, "loss": 0.5175, "step": 11318 }, { "epoch": 0.88, "grad_norm": 1.2842321348902581, "learning_rate": 7.690365781245291e-07, "loss": 0.532, "step": 11319 }, { "epoch": 0.88, "grad_norm": 1.1670132814679615, "learning_rate": 7.680705746928663e-07, "loss": 0.485, "step": 11320 }, { "epoch": 0.88, "grad_norm": 1.1312991326553257, "learning_rate": 7.671051541205376e-07, "loss": 0.482, "step": 11321 }, { "epoch": 0.88, "grad_norm": 1.234337818213493, "learning_rate": 7.661403164684955e-07, "loss": 0.5314, "step": 11322 }, { "epoch": 0.88, "grad_norm": 1.0515430057526833, "learning_rate": 7.651760617976556e-07, "loss": 0.4569, "step": 11323 }, { "epoch": 0.88, "grad_norm": 1.062439187497396, "learning_rate": 7.64212390168898e-07, "loss": 0.4803, "step": 11324 }, { "epoch": 0.88, "grad_norm": 1.1316083624984197, "learning_rate": 7.63249301643062e-07, "loss": 0.4445, "step": 11325 }, { "epoch": 0.88, "grad_norm": 1.0981880610189232, "learning_rate": 7.622867962809521e-07, "loss": 0.4418, "step": 11326 }, { "epoch": 0.88, "grad_norm": 1.4834911726794078, "learning_rate": 7.613248741433365e-07, "loss": 0.5421, "step": 11327 }, { "epoch": 0.88, "grad_norm": 1.297139519980659, "learning_rate": 7.603635352909489e-07, "loss": 0.5212, "step": 11328 }, { "epoch": 0.88, "grad_norm": 1.2572005779620647, "learning_rate": 7.594027797844805e-07, "loss": 0.5428, "step": 11329 }, { "epoch": 0.88, "grad_norm": 1.1773678033570112, "learning_rate": 7.584426076845908e-07, "loss": 0.5424, "step": 11330 }, { "epoch": 0.88, "grad_norm": 1.211774475272321, "learning_rate": 7.574830190518978e-07, "loss": 0.5016, "step": 11331 }, { "epoch": 0.88, "grad_norm": 1.1955396149731525, "learning_rate": 7.565240139469877e-07, "loss": 0.5345, "step": 11332 }, { "epoch": 0.88, "grad_norm": 1.1980447894147563, "learning_rate": 7.555655924304062e-07, "loss": 0.5159, "step": 11333 }, { "epoch": 0.88, "grad_norm": 1.16401743961654, "learning_rate": 7.54607754562664e-07, "loss": 0.5148, "step": 11334 }, { "epoch": 0.88, "grad_norm": 1.2553314000920262, "learning_rate": 7.536505004042361e-07, "loss": 0.5302, "step": 11335 }, { "epoch": 0.88, "grad_norm": 1.2720684813502816, "learning_rate": 7.526938300155539e-07, "loss": 0.5237, "step": 11336 }, { "epoch": 0.88, "grad_norm": 1.1479334860583175, "learning_rate": 7.517377434570217e-07, "loss": 0.4627, "step": 11337 }, { "epoch": 0.88, "grad_norm": 1.2516650078140692, "learning_rate": 7.50782240789002e-07, "loss": 0.4981, "step": 11338 }, { "epoch": 0.88, "grad_norm": 1.2217910191390557, "learning_rate": 7.498273220718167e-07, "loss": 0.5245, "step": 11339 }, { "epoch": 0.88, "grad_norm": 1.290836708494584, "learning_rate": 7.488729873657586e-07, "loss": 0.5503, "step": 11340 }, { "epoch": 0.88, "grad_norm": 1.343059761731569, "learning_rate": 7.479192367310773e-07, "loss": 0.4808, "step": 11341 }, { "epoch": 0.88, "grad_norm": 1.221843266379924, "learning_rate": 7.46966070227989e-07, "loss": 0.4829, "step": 11342 }, { "epoch": 0.88, "grad_norm": 1.2567819199887884, "learning_rate": 7.460134879166725e-07, "loss": 0.5545, "step": 11343 }, { "epoch": 0.88, "grad_norm": 1.1358596334306892, "learning_rate": 7.450614898572683e-07, "loss": 0.5319, "step": 11344 }, { "epoch": 0.88, "grad_norm": 1.2651083562830525, "learning_rate": 7.44110076109883e-07, "loss": 0.4735, "step": 11345 }, { "epoch": 0.88, "grad_norm": 1.1864908849701237, "learning_rate": 7.431592467345816e-07, "loss": 0.4745, "step": 11346 }, { "epoch": 0.88, "grad_norm": 1.2582502373464548, "learning_rate": 7.422090017913952e-07, "loss": 0.5295, "step": 11347 }, { "epoch": 0.88, "grad_norm": 1.2223549858629155, "learning_rate": 7.412593413403202e-07, "loss": 0.4948, "step": 11348 }, { "epoch": 0.88, "grad_norm": 1.2842134304472528, "learning_rate": 7.403102654413108e-07, "loss": 0.5468, "step": 11349 }, { "epoch": 0.88, "grad_norm": 1.1531296975471728, "learning_rate": 7.393617741542891e-07, "loss": 0.4962, "step": 11350 }, { "epoch": 0.88, "grad_norm": 1.197141158586891, "learning_rate": 7.384138675391362e-07, "loss": 0.434, "step": 11351 }, { "epoch": 0.88, "grad_norm": 1.2516980082367521, "learning_rate": 7.374665456556984e-07, "loss": 0.519, "step": 11352 }, { "epoch": 0.88, "grad_norm": 1.252221089220329, "learning_rate": 7.365198085637871e-07, "loss": 0.5258, "step": 11353 }, { "epoch": 0.88, "grad_norm": 1.3108531065409659, "learning_rate": 7.35573656323173e-07, "loss": 0.4826, "step": 11354 }, { "epoch": 0.88, "grad_norm": 1.2326545797910435, "learning_rate": 7.346280889935931e-07, "loss": 0.4995, "step": 11355 }, { "epoch": 0.88, "grad_norm": 1.1569810051475684, "learning_rate": 7.336831066347438e-07, "loss": 0.4898, "step": 11356 }, { "epoch": 0.88, "grad_norm": 1.1640434519598215, "learning_rate": 7.327387093062887e-07, "loss": 0.4469, "step": 11357 }, { "epoch": 0.88, "grad_norm": 1.1720978080017563, "learning_rate": 7.317948970678524e-07, "loss": 0.483, "step": 11358 }, { "epoch": 0.88, "grad_norm": 1.1839182191777131, "learning_rate": 7.308516699790202e-07, "loss": 0.4855, "step": 11359 }, { "epoch": 0.88, "grad_norm": 1.253123529295403, "learning_rate": 7.29909028099347e-07, "loss": 0.4706, "step": 11360 }, { "epoch": 0.88, "grad_norm": 1.188400027867705, "learning_rate": 7.289669714883419e-07, "loss": 0.5155, "step": 11361 }, { "epoch": 0.88, "grad_norm": 1.222107639263959, "learning_rate": 7.280255002054848e-07, "loss": 0.5247, "step": 11362 }, { "epoch": 0.88, "grad_norm": 1.186555034703382, "learning_rate": 7.270846143102139e-07, "loss": 0.4576, "step": 11363 }, { "epoch": 0.88, "grad_norm": 1.2188331869052162, "learning_rate": 7.26144313861934e-07, "loss": 0.5334, "step": 11364 }, { "epoch": 0.88, "grad_norm": 1.3326202611707463, "learning_rate": 7.252045989200118e-07, "loss": 0.5555, "step": 11365 }, { "epoch": 0.88, "grad_norm": 1.3235553441280918, "learning_rate": 7.242654695437734e-07, "loss": 0.5047, "step": 11366 }, { "epoch": 0.88, "grad_norm": 1.1612001596848196, "learning_rate": 7.233269257925124e-07, "loss": 0.4436, "step": 11367 }, { "epoch": 0.88, "grad_norm": 1.2319523178292437, "learning_rate": 7.223889677254858e-07, "loss": 0.4354, "step": 11368 }, { "epoch": 0.88, "grad_norm": 1.183256397074468, "learning_rate": 7.214515954019086e-07, "loss": 0.5105, "step": 11369 }, { "epoch": 0.88, "grad_norm": 1.231004725390614, "learning_rate": 7.205148088809632e-07, "loss": 0.476, "step": 11370 }, { "epoch": 0.88, "grad_norm": 1.2966763907331895, "learning_rate": 7.195786082217937e-07, "loss": 0.5428, "step": 11371 }, { "epoch": 0.88, "grad_norm": 1.314128138369065, "learning_rate": 7.18642993483507e-07, "loss": 0.5137, "step": 11372 }, { "epoch": 0.88, "grad_norm": 1.1548414156155586, "learning_rate": 7.177079647251728e-07, "loss": 0.4577, "step": 11373 }, { "epoch": 0.88, "grad_norm": 1.254703779103097, "learning_rate": 7.167735220058258e-07, "loss": 0.4953, "step": 11374 }, { "epoch": 0.88, "grad_norm": 1.0821017025482853, "learning_rate": 7.158396653844635e-07, "loss": 0.4574, "step": 11375 }, { "epoch": 0.88, "grad_norm": 1.2697560742182812, "learning_rate": 7.149063949200408e-07, "loss": 0.4829, "step": 11376 }, { "epoch": 0.88, "grad_norm": 1.234020713471559, "learning_rate": 7.139737106714817e-07, "loss": 0.4451, "step": 11377 }, { "epoch": 0.88, "grad_norm": 1.2116252299853332, "learning_rate": 7.130416126976747e-07, "loss": 0.4725, "step": 11378 }, { "epoch": 0.88, "grad_norm": 1.2450852573264932, "learning_rate": 7.121101010574626e-07, "loss": 0.5025, "step": 11379 }, { "epoch": 0.88, "grad_norm": 1.2100191140832603, "learning_rate": 7.111791758096609e-07, "loss": 0.4665, "step": 11380 }, { "epoch": 0.88, "grad_norm": 1.1004965203423887, "learning_rate": 7.102488370130411e-07, "loss": 0.5057, "step": 11381 }, { "epoch": 0.88, "grad_norm": 1.15057159192129, "learning_rate": 7.093190847263398e-07, "loss": 0.487, "step": 11382 }, { "epoch": 0.88, "grad_norm": 1.2147374244599554, "learning_rate": 7.083899190082588e-07, "loss": 0.4996, "step": 11383 }, { "epoch": 0.88, "grad_norm": 1.2225960421134476, "learning_rate": 7.074613399174601e-07, "loss": 0.4965, "step": 11384 }, { "epoch": 0.88, "grad_norm": 1.282002809384581, "learning_rate": 7.065333475125713e-07, "loss": 0.4787, "step": 11385 }, { "epoch": 0.88, "grad_norm": 1.2490485862126859, "learning_rate": 7.05605941852181e-07, "loss": 0.544, "step": 11386 }, { "epoch": 0.88, "grad_norm": 1.2084481524581705, "learning_rate": 7.046791229948391e-07, "loss": 0.46, "step": 11387 }, { "epoch": 0.88, "grad_norm": 1.161967754550203, "learning_rate": 7.037528909990632e-07, "loss": 0.4995, "step": 11388 }, { "epoch": 0.88, "grad_norm": 1.2209253215743834, "learning_rate": 7.028272459233277e-07, "loss": 0.5006, "step": 11389 }, { "epoch": 0.88, "grad_norm": 1.216686017174321, "learning_rate": 7.019021878260757e-07, "loss": 0.5209, "step": 11390 }, { "epoch": 0.88, "grad_norm": 1.0741391239132188, "learning_rate": 7.009777167657117e-07, "loss": 0.4743, "step": 11391 }, { "epoch": 0.88, "grad_norm": 1.2367567913207473, "learning_rate": 7.000538328006001e-07, "loss": 0.502, "step": 11392 }, { "epoch": 0.88, "grad_norm": 1.2878968488307398, "learning_rate": 6.99130535989071e-07, "loss": 0.4929, "step": 11393 }, { "epoch": 0.88, "grad_norm": 1.2053002349094968, "learning_rate": 6.982078263894176e-07, "loss": 0.52, "step": 11394 }, { "epoch": 0.88, "grad_norm": 1.0935505821449385, "learning_rate": 6.972857040598945e-07, "loss": 0.4696, "step": 11395 }, { "epoch": 0.88, "grad_norm": 1.2263248966687565, "learning_rate": 6.96364169058722e-07, "loss": 0.5011, "step": 11396 }, { "epoch": 0.88, "grad_norm": 1.3132745183242291, "learning_rate": 6.954432214440798e-07, "loss": 0.5406, "step": 11397 }, { "epoch": 0.88, "grad_norm": 1.1288686039861384, "learning_rate": 6.945228612741129e-07, "loss": 0.4861, "step": 11398 }, { "epoch": 0.88, "grad_norm": 1.2181872022081492, "learning_rate": 6.936030886069256e-07, "loss": 0.5083, "step": 11399 }, { "epoch": 0.88, "grad_norm": 1.1324142282102119, "learning_rate": 6.926839035005905e-07, "loss": 0.4333, "step": 11400 }, { "epoch": 0.88, "grad_norm": 0.9944093112552583, "learning_rate": 6.917653060131413e-07, "loss": 0.4189, "step": 11401 }, { "epoch": 0.88, "grad_norm": 1.0954491087959555, "learning_rate": 6.908472962025714e-07, "loss": 0.4524, "step": 11402 }, { "epoch": 0.88, "grad_norm": 1.415816341715057, "learning_rate": 6.899298741268412e-07, "loss": 0.531, "step": 11403 }, { "epoch": 0.88, "grad_norm": 1.1697057355579827, "learning_rate": 6.89013039843871e-07, "loss": 0.4878, "step": 11404 }, { "epoch": 0.88, "grad_norm": 1.2172217446285696, "learning_rate": 6.880967934115457e-07, "loss": 0.5402, "step": 11405 }, { "epoch": 0.88, "grad_norm": 1.1613533702458352, "learning_rate": 6.871811348877144e-07, "loss": 0.5041, "step": 11406 }, { "epoch": 0.88, "grad_norm": 1.2884818416554964, "learning_rate": 6.862660643301855e-07, "loss": 0.5085, "step": 11407 }, { "epoch": 0.89, "grad_norm": 1.0959561033820955, "learning_rate": 6.853515817967327e-07, "loss": 0.4653, "step": 11408 }, { "epoch": 0.89, "grad_norm": 1.234250074414797, "learning_rate": 6.844376873450908e-07, "loss": 0.5013, "step": 11409 }, { "epoch": 0.89, "grad_norm": 1.1417181446600595, "learning_rate": 6.835243810329595e-07, "loss": 0.5016, "step": 11410 }, { "epoch": 0.89, "grad_norm": 1.1355340296199523, "learning_rate": 6.826116629180024e-07, "loss": 0.48, "step": 11411 }, { "epoch": 0.89, "grad_norm": 1.1304767073197668, "learning_rate": 6.816995330578413e-07, "loss": 0.4767, "step": 11412 }, { "epoch": 0.89, "grad_norm": 1.3036671834739988, "learning_rate": 6.807879915100646e-07, "loss": 0.5729, "step": 11413 }, { "epoch": 0.89, "grad_norm": 1.2731069124010923, "learning_rate": 6.798770383322218e-07, "loss": 0.5003, "step": 11414 }, { "epoch": 0.89, "grad_norm": 1.1115256403832667, "learning_rate": 6.789666735818279e-07, "loss": 0.452, "step": 11415 }, { "epoch": 0.89, "grad_norm": 1.2920868508050043, "learning_rate": 6.780568973163604e-07, "loss": 0.5029, "step": 11416 }, { "epoch": 0.89, "grad_norm": 1.0904333782458138, "learning_rate": 6.771477095932533e-07, "loss": 0.4334, "step": 11417 }, { "epoch": 0.89, "grad_norm": 1.1416562787625226, "learning_rate": 6.762391104699129e-07, "loss": 0.4941, "step": 11418 }, { "epoch": 0.89, "grad_norm": 1.265655517210204, "learning_rate": 6.753311000036999e-07, "loss": 0.54, "step": 11419 }, { "epoch": 0.89, "grad_norm": 1.1419123348152156, "learning_rate": 6.744236782519431e-07, "loss": 0.5165, "step": 11420 }, { "epoch": 0.89, "grad_norm": 1.1049265795692982, "learning_rate": 6.735168452719354e-07, "loss": 0.5151, "step": 11421 }, { "epoch": 0.89, "grad_norm": 1.2633155662391082, "learning_rate": 6.726106011209266e-07, "loss": 0.5072, "step": 11422 }, { "epoch": 0.89, "grad_norm": 1.2010426521572934, "learning_rate": 6.71704945856132e-07, "loss": 0.5102, "step": 11423 }, { "epoch": 0.89, "grad_norm": 1.3141656025503459, "learning_rate": 6.707998795347326e-07, "loss": 0.5289, "step": 11424 }, { "epoch": 0.89, "grad_norm": 1.2124094722813237, "learning_rate": 6.698954022138692e-07, "loss": 0.5087, "step": 11425 }, { "epoch": 0.89, "grad_norm": 1.2292741577648272, "learning_rate": 6.689915139506475e-07, "loss": 0.5242, "step": 11426 }, { "epoch": 0.89, "grad_norm": 1.2817797612335684, "learning_rate": 6.680882148021318e-07, "loss": 0.5443, "step": 11427 }, { "epoch": 0.89, "grad_norm": 1.2314015561466816, "learning_rate": 6.67185504825355e-07, "loss": 0.4935, "step": 11428 }, { "epoch": 0.89, "grad_norm": 1.2000672758635287, "learning_rate": 6.662833840773064e-07, "loss": 0.4746, "step": 11429 }, { "epoch": 0.89, "grad_norm": 1.2415612999588386, "learning_rate": 6.653818526149436e-07, "loss": 0.5062, "step": 11430 }, { "epoch": 0.89, "grad_norm": 1.2564989422157289, "learning_rate": 6.644809104951866e-07, "loss": 0.496, "step": 11431 }, { "epoch": 0.89, "grad_norm": 1.169186215924927, "learning_rate": 6.635805577749133e-07, "loss": 0.4704, "step": 11432 }, { "epoch": 0.89, "grad_norm": 1.2325190341090977, "learning_rate": 6.626807945109681e-07, "loss": 0.5077, "step": 11433 }, { "epoch": 0.89, "grad_norm": 1.1688490902106825, "learning_rate": 6.61781620760159e-07, "loss": 0.5123, "step": 11434 }, { "epoch": 0.89, "grad_norm": 1.349198230439379, "learning_rate": 6.608830365792551e-07, "loss": 0.5181, "step": 11435 }, { "epoch": 0.89, "grad_norm": 1.2908356002892785, "learning_rate": 6.59985042024991e-07, "loss": 0.5258, "step": 11436 }, { "epoch": 0.89, "grad_norm": 1.3881548685006135, "learning_rate": 6.590876371540567e-07, "loss": 0.5603, "step": 11437 }, { "epoch": 0.89, "grad_norm": 1.184198961285118, "learning_rate": 6.581908220231137e-07, "loss": 0.4849, "step": 11438 }, { "epoch": 0.89, "grad_norm": 1.1663623083566974, "learning_rate": 6.572945966887812e-07, "loss": 0.4801, "step": 11439 }, { "epoch": 0.89, "grad_norm": 1.2436056139538596, "learning_rate": 6.563989612076416e-07, "loss": 0.5004, "step": 11440 }, { "epoch": 0.89, "grad_norm": 1.2734395126607705, "learning_rate": 6.55503915636243e-07, "loss": 0.4826, "step": 11441 }, { "epoch": 0.89, "grad_norm": 1.2667629636165563, "learning_rate": 6.546094600310949e-07, "loss": 0.5281, "step": 11442 }, { "epoch": 0.89, "grad_norm": 1.2414451636234511, "learning_rate": 6.537155944486662e-07, "loss": 0.5047, "step": 11443 }, { "epoch": 0.89, "grad_norm": 1.306529634833789, "learning_rate": 6.52822318945392e-07, "loss": 0.5592, "step": 11444 }, { "epoch": 0.89, "grad_norm": 1.172634488996047, "learning_rate": 6.519296335776703e-07, "loss": 0.5198, "step": 11445 }, { "epoch": 0.89, "grad_norm": 1.2214905175376964, "learning_rate": 6.510375384018619e-07, "loss": 0.4983, "step": 11446 }, { "epoch": 0.89, "grad_norm": 1.1749153370487948, "learning_rate": 6.50146033474287e-07, "loss": 0.473, "step": 11447 }, { "epoch": 0.89, "grad_norm": 1.2645459225613036, "learning_rate": 6.492551188512331e-07, "loss": 0.5368, "step": 11448 }, { "epoch": 0.89, "grad_norm": 1.2899787651946117, "learning_rate": 6.483647945889449e-07, "loss": 0.528, "step": 11449 }, { "epoch": 0.89, "grad_norm": 1.3149651129703641, "learning_rate": 6.474750607436364e-07, "loss": 0.5215, "step": 11450 }, { "epoch": 0.89, "grad_norm": 1.1656019540916982, "learning_rate": 6.465859173714784e-07, "loss": 0.484, "step": 11451 }, { "epoch": 0.89, "grad_norm": 1.3168021492525686, "learning_rate": 6.456973645286113e-07, "loss": 0.4718, "step": 11452 }, { "epoch": 0.89, "grad_norm": 1.0959935202963917, "learning_rate": 6.448094022711304e-07, "loss": 0.4295, "step": 11453 }, { "epoch": 0.89, "grad_norm": 1.2052575569794224, "learning_rate": 6.439220306550975e-07, "loss": 0.4882, "step": 11454 }, { "epoch": 0.89, "grad_norm": 1.2282212819027056, "learning_rate": 6.430352497365377e-07, "loss": 0.5026, "step": 11455 }, { "epoch": 0.89, "grad_norm": 1.2250761027927906, "learning_rate": 6.421490595714408e-07, "loss": 0.4626, "step": 11456 }, { "epoch": 0.89, "grad_norm": 1.0775728332084265, "learning_rate": 6.412634602157519e-07, "loss": 0.5068, "step": 11457 }, { "epoch": 0.89, "grad_norm": 1.0999708106763129, "learning_rate": 6.403784517253863e-07, "loss": 0.4958, "step": 11458 }, { "epoch": 0.89, "grad_norm": 1.1620736252871733, "learning_rate": 6.394940341562173e-07, "loss": 0.4906, "step": 11459 }, { "epoch": 0.89, "grad_norm": 1.1828295062718412, "learning_rate": 6.386102075640843e-07, "loss": 0.4696, "step": 11460 }, { "epoch": 0.89, "grad_norm": 1.2605761382468663, "learning_rate": 6.377269720047863e-07, "loss": 0.5236, "step": 11461 }, { "epoch": 0.89, "grad_norm": 1.2409199419318881, "learning_rate": 6.368443275340897e-07, "loss": 0.5308, "step": 11462 }, { "epoch": 0.89, "grad_norm": 1.2605009077099896, "learning_rate": 6.359622742077165e-07, "loss": 0.5165, "step": 11463 }, { "epoch": 0.89, "grad_norm": 1.1387573524179118, "learning_rate": 6.350808120813567e-07, "loss": 0.4512, "step": 11464 }, { "epoch": 0.89, "grad_norm": 1.3529922772657272, "learning_rate": 6.341999412106625e-07, "loss": 0.5448, "step": 11465 }, { "epoch": 0.89, "grad_norm": 1.1773897239249582, "learning_rate": 6.333196616512493e-07, "loss": 0.5101, "step": 11466 }, { "epoch": 0.89, "grad_norm": 1.2891223315600924, "learning_rate": 6.324399734586884e-07, "loss": 0.5234, "step": 11467 }, { "epoch": 0.89, "grad_norm": 1.2360339063234378, "learning_rate": 6.315608766885251e-07, "loss": 0.4961, "step": 11468 }, { "epoch": 0.89, "grad_norm": 1.1634495292572455, "learning_rate": 6.306823713962563e-07, "loss": 0.4774, "step": 11469 }, { "epoch": 0.89, "grad_norm": 1.2369934509523381, "learning_rate": 6.298044576373485e-07, "loss": 0.5346, "step": 11470 }, { "epoch": 0.89, "grad_norm": 1.1100206242612793, "learning_rate": 6.289271354672299e-07, "loss": 0.4141, "step": 11471 }, { "epoch": 0.89, "grad_norm": 1.1974501599828717, "learning_rate": 6.280504049412905e-07, "loss": 0.5232, "step": 11472 }, { "epoch": 0.89, "grad_norm": 1.2707960665298836, "learning_rate": 6.271742661148806e-07, "loss": 0.4956, "step": 11473 }, { "epoch": 0.89, "grad_norm": 1.1678222258144697, "learning_rate": 6.262987190433168e-07, "loss": 0.4848, "step": 11474 }, { "epoch": 0.89, "grad_norm": 1.3655513455498731, "learning_rate": 6.254237637818772e-07, "loss": 0.4862, "step": 11475 }, { "epoch": 0.89, "grad_norm": 1.2370121466272352, "learning_rate": 6.245494003858022e-07, "loss": 0.5202, "step": 11476 }, { "epoch": 0.89, "grad_norm": 1.1478723186273423, "learning_rate": 6.236756289102941e-07, "loss": 0.4699, "step": 11477 }, { "epoch": 0.89, "grad_norm": 1.331742151831977, "learning_rate": 6.2280244941052e-07, "loss": 0.4812, "step": 11478 }, { "epoch": 0.89, "grad_norm": 1.2106345997647863, "learning_rate": 6.219298619416059e-07, "loss": 0.5125, "step": 11479 }, { "epoch": 0.89, "grad_norm": 1.185470000387417, "learning_rate": 6.210578665586442e-07, "loss": 0.5052, "step": 11480 }, { "epoch": 0.89, "grad_norm": 1.212580691926514, "learning_rate": 6.201864633166877e-07, "loss": 0.5194, "step": 11481 }, { "epoch": 0.89, "grad_norm": 1.2551719005990398, "learning_rate": 6.193156522707555e-07, "loss": 0.486, "step": 11482 }, { "epoch": 0.89, "grad_norm": 1.2418040037475517, "learning_rate": 6.184454334758227e-07, "loss": 0.5533, "step": 11483 }, { "epoch": 0.89, "grad_norm": 1.2998216084812626, "learning_rate": 6.175758069868321e-07, "loss": 0.5332, "step": 11484 }, { "epoch": 0.89, "grad_norm": 1.230697416826139, "learning_rate": 6.167067728586873e-07, "loss": 0.5078, "step": 11485 }, { "epoch": 0.89, "grad_norm": 1.2159069842350292, "learning_rate": 6.158383311462568e-07, "loss": 0.5583, "step": 11486 }, { "epoch": 0.89, "grad_norm": 1.2315289970357401, "learning_rate": 6.149704819043667e-07, "loss": 0.5096, "step": 11487 }, { "epoch": 0.89, "grad_norm": 1.2246629134404277, "learning_rate": 6.141032251878132e-07, "loss": 0.4753, "step": 11488 }, { "epoch": 0.89, "grad_norm": 1.302875059386743, "learning_rate": 6.132365610513457e-07, "loss": 0.5786, "step": 11489 }, { "epoch": 0.89, "grad_norm": 1.2691977194838018, "learning_rate": 6.123704895496829e-07, "loss": 0.4784, "step": 11490 }, { "epoch": 0.89, "grad_norm": 1.1189072184138322, "learning_rate": 6.115050107375053e-07, "loss": 0.4325, "step": 11491 }, { "epoch": 0.89, "grad_norm": 1.2353978325543034, "learning_rate": 6.106401246694549e-07, "loss": 0.4636, "step": 11492 }, { "epoch": 0.89, "grad_norm": 1.2985534836510755, "learning_rate": 6.097758314001379e-07, "loss": 0.5072, "step": 11493 }, { "epoch": 0.89, "grad_norm": 1.2031456090970258, "learning_rate": 6.089121309841173e-07, "loss": 0.4634, "step": 11494 }, { "epoch": 0.89, "grad_norm": 1.1414363535958993, "learning_rate": 6.080490234759262e-07, "loss": 0.458, "step": 11495 }, { "epoch": 0.89, "grad_norm": 1.2074600495073577, "learning_rate": 6.071865089300577e-07, "loss": 0.4909, "step": 11496 }, { "epoch": 0.89, "grad_norm": 1.059273983350523, "learning_rate": 6.063245874009638e-07, "loss": 0.4521, "step": 11497 }, { "epoch": 0.89, "grad_norm": 1.1210650397156736, "learning_rate": 6.054632589430654e-07, "loss": 0.484, "step": 11498 }, { "epoch": 0.89, "grad_norm": 1.2262033313266074, "learning_rate": 6.0460252361074e-07, "loss": 0.5371, "step": 11499 }, { "epoch": 0.89, "grad_norm": 1.251673198476342, "learning_rate": 6.037423814583299e-07, "loss": 0.5448, "step": 11500 }, { "epoch": 0.89, "grad_norm": 1.2883652622087698, "learning_rate": 6.028828325401426e-07, "loss": 0.5224, "step": 11501 }, { "epoch": 0.89, "grad_norm": 1.2830469509641087, "learning_rate": 6.020238769104447e-07, "loss": 0.4772, "step": 11502 }, { "epoch": 0.89, "grad_norm": 1.263750312646663, "learning_rate": 6.011655146234674e-07, "loss": 0.5337, "step": 11503 }, { "epoch": 0.89, "grad_norm": 1.1592515699792645, "learning_rate": 6.003077457334017e-07, "loss": 0.4858, "step": 11504 }, { "epoch": 0.89, "grad_norm": 1.2008153370751402, "learning_rate": 5.994505702944042e-07, "loss": 0.5066, "step": 11505 }, { "epoch": 0.89, "grad_norm": 1.2612124628959334, "learning_rate": 5.98593988360594e-07, "loss": 0.5015, "step": 11506 }, { "epoch": 0.89, "grad_norm": 1.147990340806951, "learning_rate": 5.977379999860488e-07, "loss": 0.4738, "step": 11507 }, { "epoch": 0.89, "grad_norm": 1.114273164795503, "learning_rate": 5.968826052248145e-07, "loss": 0.4502, "step": 11508 }, { "epoch": 0.89, "grad_norm": 1.2976836934825, "learning_rate": 5.960278041308931e-07, "loss": 0.4902, "step": 11509 }, { "epoch": 0.89, "grad_norm": 1.2014418880015592, "learning_rate": 5.951735967582551e-07, "loss": 0.5329, "step": 11510 }, { "epoch": 0.89, "grad_norm": 1.2742328617289833, "learning_rate": 5.943199831608304e-07, "loss": 0.5202, "step": 11511 }, { "epoch": 0.89, "grad_norm": 1.2587320982219765, "learning_rate": 5.934669633925116e-07, "loss": 0.4809, "step": 11512 }, { "epoch": 0.89, "grad_norm": 1.16595699092592, "learning_rate": 5.926145375071568e-07, "loss": 0.4664, "step": 11513 }, { "epoch": 0.89, "grad_norm": 1.1908485983601393, "learning_rate": 5.917627055585807e-07, "loss": 0.4664, "step": 11514 }, { "epoch": 0.89, "grad_norm": 1.157351613430679, "learning_rate": 5.909114676005645e-07, "loss": 0.462, "step": 11515 }, { "epoch": 0.89, "grad_norm": 1.277930635256316, "learning_rate": 5.900608236868532e-07, "loss": 0.5313, "step": 11516 }, { "epoch": 0.89, "grad_norm": 1.4648454589833781, "learning_rate": 5.892107738711505e-07, "loss": 0.5294, "step": 11517 }, { "epoch": 0.89, "grad_norm": 1.2096183236361553, "learning_rate": 5.883613182071257e-07, "loss": 0.5709, "step": 11518 }, { "epoch": 0.89, "grad_norm": 1.1865059054613942, "learning_rate": 5.875124567484069e-07, "loss": 0.5131, "step": 11519 }, { "epoch": 0.89, "grad_norm": 1.148821137376876, "learning_rate": 5.866641895485892e-07, "loss": 0.4299, "step": 11520 }, { "epoch": 0.89, "grad_norm": 1.1251920430196, "learning_rate": 5.858165166612273e-07, "loss": 0.4955, "step": 11521 }, { "epoch": 0.89, "grad_norm": 1.195691366552233, "learning_rate": 5.849694381398396e-07, "loss": 0.544, "step": 11522 }, { "epoch": 0.89, "grad_norm": 1.1546910060060207, "learning_rate": 5.841229540379079e-07, "loss": 0.5319, "step": 11523 }, { "epoch": 0.89, "grad_norm": 1.1479977135452735, "learning_rate": 5.832770644088726e-07, "loss": 0.5128, "step": 11524 }, { "epoch": 0.89, "grad_norm": 1.2128555857930658, "learning_rate": 5.824317693061387e-07, "loss": 0.4753, "step": 11525 }, { "epoch": 0.89, "grad_norm": 1.241292619155997, "learning_rate": 5.815870687830782e-07, "loss": 0.5141, "step": 11526 }, { "epoch": 0.89, "grad_norm": 1.1653126397801563, "learning_rate": 5.807429628930172e-07, "loss": 0.4541, "step": 11527 }, { "epoch": 0.89, "grad_norm": 1.177684523506373, "learning_rate": 5.798994516892509e-07, "loss": 0.5136, "step": 11528 }, { "epoch": 0.89, "grad_norm": 1.0739360090439753, "learning_rate": 5.790565352250322e-07, "loss": 0.4287, "step": 11529 }, { "epoch": 0.89, "grad_norm": 1.301586646397559, "learning_rate": 5.782142135535806e-07, "loss": 0.4968, "step": 11530 }, { "epoch": 0.89, "grad_norm": 1.1061639655327724, "learning_rate": 5.77372486728075e-07, "loss": 0.4648, "step": 11531 }, { "epoch": 0.89, "grad_norm": 1.1582700250097908, "learning_rate": 5.765313548016593e-07, "loss": 0.5057, "step": 11532 }, { "epoch": 0.89, "grad_norm": 1.3245410808213256, "learning_rate": 5.756908178274389e-07, "loss": 0.5502, "step": 11533 }, { "epoch": 0.89, "grad_norm": 1.1218823042182844, "learning_rate": 5.748508758584792e-07, "loss": 0.4495, "step": 11534 }, { "epoch": 0.89, "grad_norm": 1.2571300289967215, "learning_rate": 5.740115289478109e-07, "loss": 0.4927, "step": 11535 }, { "epoch": 0.89, "grad_norm": 1.2030677781802523, "learning_rate": 5.731727771484275e-07, "loss": 0.5042, "step": 11536 }, { "epoch": 0.9, "grad_norm": 1.1734218814790778, "learning_rate": 5.723346205132818e-07, "loss": 0.5223, "step": 11537 }, { "epoch": 0.9, "grad_norm": 1.3448370595175896, "learning_rate": 5.714970590952939e-07, "loss": 0.5226, "step": 11538 }, { "epoch": 0.9, "grad_norm": 1.0855007253791147, "learning_rate": 5.706600929473382e-07, "loss": 0.4566, "step": 11539 }, { "epoch": 0.9, "grad_norm": 1.2138332076775586, "learning_rate": 5.698237221222614e-07, "loss": 0.5171, "step": 11540 }, { "epoch": 0.9, "grad_norm": 1.2539276881776915, "learning_rate": 5.689879466728654e-07, "loss": 0.5312, "step": 11541 }, { "epoch": 0.9, "grad_norm": 1.3131047626662606, "learning_rate": 5.681527666519182e-07, "loss": 0.5372, "step": 11542 }, { "epoch": 0.9, "grad_norm": 1.2661229319706675, "learning_rate": 5.673181821121509e-07, "loss": 0.4981, "step": 11543 }, { "epoch": 0.9, "grad_norm": 1.1311052809400346, "learning_rate": 5.664841931062504e-07, "loss": 0.5265, "step": 11544 }, { "epoch": 0.9, "grad_norm": 1.2112644861573485, "learning_rate": 5.656507996868743e-07, "loss": 0.4923, "step": 11545 }, { "epoch": 0.9, "grad_norm": 1.1716073811446706, "learning_rate": 5.648180019066385e-07, "loss": 0.5145, "step": 11546 }, { "epoch": 0.9, "grad_norm": 1.033528786806913, "learning_rate": 5.639857998181208e-07, "loss": 0.4607, "step": 11547 }, { "epoch": 0.9, "grad_norm": 1.071128589778587, "learning_rate": 5.631541934738627e-07, "loss": 0.4024, "step": 11548 }, { "epoch": 0.9, "grad_norm": 1.1293005865056507, "learning_rate": 5.623231829263698e-07, "loss": 0.4428, "step": 11549 }, { "epoch": 0.9, "grad_norm": 1.0832866450788494, "learning_rate": 5.614927682281046e-07, "loss": 0.4554, "step": 11550 }, { "epoch": 0.9, "grad_norm": 1.252620239577666, "learning_rate": 5.606629494314963e-07, "loss": 0.4963, "step": 11551 }, { "epoch": 0.9, "grad_norm": 1.3517702858025151, "learning_rate": 5.598337265889375e-07, "loss": 0.5377, "step": 11552 }, { "epoch": 0.9, "grad_norm": 1.2697677626744661, "learning_rate": 5.590050997527808e-07, "loss": 0.4895, "step": 11553 }, { "epoch": 0.9, "grad_norm": 1.0985110067025359, "learning_rate": 5.581770689753429e-07, "loss": 0.438, "step": 11554 }, { "epoch": 0.9, "grad_norm": 1.1086773895262008, "learning_rate": 5.57349634308898e-07, "loss": 0.4963, "step": 11555 }, { "epoch": 0.9, "grad_norm": 1.2357812910553831, "learning_rate": 5.565227958056896e-07, "loss": 0.4954, "step": 11556 }, { "epoch": 0.9, "grad_norm": 1.1912208788135292, "learning_rate": 5.556965535179182e-07, "loss": 0.4829, "step": 11557 }, { "epoch": 0.9, "grad_norm": 1.3335518757213265, "learning_rate": 5.548709074977487e-07, "loss": 0.5646, "step": 11558 }, { "epoch": 0.9, "grad_norm": 1.3082056181625616, "learning_rate": 5.540458577973118e-07, "loss": 0.5054, "step": 11559 }, { "epoch": 0.9, "grad_norm": 1.2630733151363325, "learning_rate": 5.532214044686923e-07, "loss": 0.4922, "step": 11560 }, { "epoch": 0.9, "grad_norm": 1.1330235251823495, "learning_rate": 5.523975475639443e-07, "loss": 0.4787, "step": 11561 }, { "epoch": 0.9, "grad_norm": 1.173797708583798, "learning_rate": 5.515742871350838e-07, "loss": 0.4792, "step": 11562 }, { "epoch": 0.9, "grad_norm": 1.1836511075565557, "learning_rate": 5.507516232340848e-07, "loss": 0.4875, "step": 11563 }, { "epoch": 0.9, "grad_norm": 1.191391841613695, "learning_rate": 5.499295559128892e-07, "loss": 0.4711, "step": 11564 }, { "epoch": 0.9, "grad_norm": 1.2340279586258522, "learning_rate": 5.491080852233955e-07, "loss": 0.5353, "step": 11565 }, { "epoch": 0.9, "grad_norm": 1.1752932223235342, "learning_rate": 5.482872112174698e-07, "loss": 0.4687, "step": 11566 }, { "epoch": 0.9, "grad_norm": 1.2523062411835026, "learning_rate": 5.474669339469351e-07, "loss": 0.4597, "step": 11567 }, { "epoch": 0.9, "grad_norm": 1.148199926183579, "learning_rate": 5.466472534635814e-07, "loss": 0.5053, "step": 11568 }, { "epoch": 0.9, "grad_norm": 1.2781162084449653, "learning_rate": 5.458281698191615e-07, "loss": 0.5407, "step": 11569 }, { "epoch": 0.9, "grad_norm": 1.2211590945282969, "learning_rate": 5.450096830653851e-07, "loss": 0.4956, "step": 11570 }, { "epoch": 0.9, "grad_norm": 1.2045974638426897, "learning_rate": 5.441917932539287e-07, "loss": 0.5199, "step": 11571 }, { "epoch": 0.9, "grad_norm": 1.1920544971354834, "learning_rate": 5.433745004364299e-07, "loss": 0.5059, "step": 11572 }, { "epoch": 0.9, "grad_norm": 1.1258671385637893, "learning_rate": 5.425578046644886e-07, "loss": 0.4482, "step": 11573 }, { "epoch": 0.9, "grad_norm": 1.3430994810127084, "learning_rate": 5.417417059896679e-07, "loss": 0.5298, "step": 11574 }, { "epoch": 0.9, "grad_norm": 1.30765711728383, "learning_rate": 5.4092620446349e-07, "loss": 0.5017, "step": 11575 }, { "epoch": 0.9, "grad_norm": 1.3845164689572027, "learning_rate": 5.401113001374459e-07, "loss": 0.5462, "step": 11576 }, { "epoch": 0.9, "grad_norm": 1.2505986687415667, "learning_rate": 5.392969930629799e-07, "loss": 0.4704, "step": 11577 }, { "epoch": 0.9, "grad_norm": 1.1860635501985373, "learning_rate": 5.384832832915055e-07, "loss": 0.4972, "step": 11578 }, { "epoch": 0.9, "grad_norm": 1.2304688468811966, "learning_rate": 5.376701708743981e-07, "loss": 0.515, "step": 11579 }, { "epoch": 0.9, "grad_norm": 1.0690601010534004, "learning_rate": 5.368576558629901e-07, "loss": 0.4622, "step": 11580 }, { "epoch": 0.9, "grad_norm": 1.2907584853335778, "learning_rate": 5.360457383085816e-07, "loss": 0.5495, "step": 11581 }, { "epoch": 0.9, "grad_norm": 1.3180428584168025, "learning_rate": 5.352344182624336e-07, "loss": 0.5582, "step": 11582 }, { "epoch": 0.9, "grad_norm": 1.2552603662029, "learning_rate": 5.344236957757687e-07, "loss": 0.5372, "step": 11583 }, { "epoch": 0.9, "grad_norm": 1.2918543781762506, "learning_rate": 5.336135708997725e-07, "loss": 0.5407, "step": 11584 }, { "epoch": 0.9, "grad_norm": 1.054848778720736, "learning_rate": 5.328040436855908e-07, "loss": 0.453, "step": 11585 }, { "epoch": 0.9, "grad_norm": 1.310663573389164, "learning_rate": 5.31995114184336e-07, "loss": 0.5397, "step": 11586 }, { "epoch": 0.9, "grad_norm": 1.2875970544621982, "learning_rate": 5.311867824470762e-07, "loss": 0.5238, "step": 11587 }, { "epoch": 0.9, "grad_norm": 1.1991838083550863, "learning_rate": 5.303790485248472e-07, "loss": 0.5118, "step": 11588 }, { "epoch": 0.9, "grad_norm": 1.1703311159820398, "learning_rate": 5.295719124686482e-07, "loss": 0.4961, "step": 11589 }, { "epoch": 0.9, "grad_norm": 1.1631919114320926, "learning_rate": 5.28765374329433e-07, "loss": 0.4735, "step": 11590 }, { "epoch": 0.9, "grad_norm": 1.2684882941722748, "learning_rate": 5.279594341581252e-07, "loss": 0.4885, "step": 11591 }, { "epoch": 0.9, "grad_norm": 1.2766723734941412, "learning_rate": 5.271540920056073e-07, "loss": 0.5179, "step": 11592 }, { "epoch": 0.9, "grad_norm": 1.1883534326142073, "learning_rate": 5.263493479227255e-07, "loss": 0.4671, "step": 11593 }, { "epoch": 0.9, "grad_norm": 1.2319199012845141, "learning_rate": 5.25545201960288e-07, "loss": 0.5094, "step": 11594 }, { "epoch": 0.9, "grad_norm": 1.2287588197191746, "learning_rate": 5.247416541690642e-07, "loss": 0.4652, "step": 11595 }, { "epoch": 0.9, "grad_norm": 1.476701517978316, "learning_rate": 5.239387045997835e-07, "loss": 0.5068, "step": 11596 }, { "epoch": 0.9, "grad_norm": 1.163370989533444, "learning_rate": 5.231363533031431e-07, "loss": 0.4676, "step": 11597 }, { "epoch": 0.9, "grad_norm": 1.2597525189331573, "learning_rate": 5.22334600329798e-07, "loss": 0.521, "step": 11598 }, { "epoch": 0.9, "grad_norm": 1.325676669669993, "learning_rate": 5.2153344573037e-07, "loss": 0.4791, "step": 11599 }, { "epoch": 0.9, "grad_norm": 1.256546378403438, "learning_rate": 5.207328895554365e-07, "loss": 0.4921, "step": 11600 }, { "epoch": 0.9, "grad_norm": 1.3392610756560481, "learning_rate": 5.199329318555424e-07, "loss": 0.5081, "step": 11601 }, { "epoch": 0.9, "grad_norm": 1.20082164093368, "learning_rate": 5.191335726811931e-07, "loss": 0.4769, "step": 11602 }, { "epoch": 0.9, "grad_norm": 1.1291575432628187, "learning_rate": 5.183348120828558e-07, "loss": 0.4957, "step": 11603 }, { "epoch": 0.9, "grad_norm": 1.1954025035208389, "learning_rate": 5.175366501109625e-07, "loss": 0.4782, "step": 11604 }, { "epoch": 0.9, "grad_norm": 1.0798469834188902, "learning_rate": 5.167390868159028e-07, "loss": 0.4801, "step": 11605 }, { "epoch": 0.9, "grad_norm": 1.3001650705440604, "learning_rate": 5.15942122248031e-07, "loss": 0.4899, "step": 11606 }, { "epoch": 0.9, "grad_norm": 1.2203706578501075, "learning_rate": 5.151457564576645e-07, "loss": 0.5215, "step": 11607 }, { "epoch": 0.9, "grad_norm": 1.1625401828342996, "learning_rate": 5.14349989495082e-07, "loss": 0.4965, "step": 11608 }, { "epoch": 0.9, "grad_norm": 1.1946233620316558, "learning_rate": 5.135548214105235e-07, "loss": 0.5228, "step": 11609 }, { "epoch": 0.9, "grad_norm": 1.171749108228106, "learning_rate": 5.127602522541942e-07, "loss": 0.4461, "step": 11610 }, { "epoch": 0.9, "grad_norm": 1.240285172511022, "learning_rate": 5.119662820762572e-07, "loss": 0.5171, "step": 11611 }, { "epoch": 0.9, "grad_norm": 1.2563639764656076, "learning_rate": 5.111729109268405e-07, "loss": 0.5591, "step": 11612 }, { "epoch": 0.9, "grad_norm": 1.2266489472266957, "learning_rate": 5.103801388560337e-07, "loss": 0.5103, "step": 11613 }, { "epoch": 0.9, "grad_norm": 1.097857853848195, "learning_rate": 5.095879659138892e-07, "loss": 0.4718, "step": 11614 }, { "epoch": 0.9, "grad_norm": 1.306902165032515, "learning_rate": 5.087963921504213e-07, "loss": 0.5301, "step": 11615 }, { "epoch": 0.9, "grad_norm": 1.3801666637344339, "learning_rate": 5.080054176156035e-07, "loss": 0.5312, "step": 11616 }, { "epoch": 0.9, "grad_norm": 1.1768228979828945, "learning_rate": 5.072150423593769e-07, "loss": 0.5405, "step": 11617 }, { "epoch": 0.9, "grad_norm": 1.097660051115249, "learning_rate": 5.064252664316405e-07, "loss": 0.4732, "step": 11618 }, { "epoch": 0.9, "grad_norm": 1.2503430849361337, "learning_rate": 5.056360898822577e-07, "loss": 0.5135, "step": 11619 }, { "epoch": 0.9, "grad_norm": 1.16105427007145, "learning_rate": 5.048475127610531e-07, "loss": 0.485, "step": 11620 }, { "epoch": 0.9, "grad_norm": 1.2480716613314087, "learning_rate": 5.040595351178134e-07, "loss": 0.4756, "step": 11621 }, { "epoch": 0.9, "grad_norm": 1.330302349801889, "learning_rate": 5.032721570022881e-07, "loss": 0.5101, "step": 11622 }, { "epoch": 0.9, "grad_norm": 1.1913059880327987, "learning_rate": 5.02485378464187e-07, "loss": 0.5096, "step": 11623 }, { "epoch": 0.9, "grad_norm": 1.1441361718629999, "learning_rate": 5.016991995531872e-07, "loss": 0.4722, "step": 11624 }, { "epoch": 0.9, "grad_norm": 1.117943141385377, "learning_rate": 5.009136203189214e-07, "loss": 0.4632, "step": 11625 }, { "epoch": 0.9, "grad_norm": 1.2158038402498605, "learning_rate": 5.001286408109862e-07, "loss": 0.5401, "step": 11626 }, { "epoch": 0.9, "grad_norm": 1.1248110506540474, "learning_rate": 4.993442610789423e-07, "loss": 0.503, "step": 11627 }, { "epoch": 0.9, "grad_norm": 1.333943356599647, "learning_rate": 4.985604811723133e-07, "loss": 0.5397, "step": 11628 }, { "epoch": 0.9, "grad_norm": 1.230414882873489, "learning_rate": 4.977773011405806e-07, "loss": 0.4974, "step": 11629 }, { "epoch": 0.9, "grad_norm": 1.2653788103443642, "learning_rate": 4.969947210331938e-07, "loss": 0.542, "step": 11630 }, { "epoch": 0.9, "grad_norm": 1.094276301597115, "learning_rate": 4.962127408995587e-07, "loss": 0.4612, "step": 11631 }, { "epoch": 0.9, "grad_norm": 1.1960067727366028, "learning_rate": 4.95431360789046e-07, "loss": 0.469, "step": 11632 }, { "epoch": 0.9, "grad_norm": 1.20365740403089, "learning_rate": 4.946505807509883e-07, "loss": 0.487, "step": 11633 }, { "epoch": 0.9, "grad_norm": 1.2335867960699443, "learning_rate": 4.938704008346818e-07, "loss": 0.5124, "step": 11634 }, { "epoch": 0.9, "grad_norm": 1.2364535155144885, "learning_rate": 4.930908210893826e-07, "loss": 0.4667, "step": 11635 }, { "epoch": 0.9, "grad_norm": 1.3809632258232114, "learning_rate": 4.92311841564308e-07, "loss": 0.562, "step": 11636 }, { "epoch": 0.9, "grad_norm": 1.1480338496624145, "learning_rate": 4.915334623086387e-07, "loss": 0.4954, "step": 11637 }, { "epoch": 0.9, "grad_norm": 1.172027832237827, "learning_rate": 4.907556833715199e-07, "loss": 0.4747, "step": 11638 }, { "epoch": 0.9, "grad_norm": 1.2427988047680911, "learning_rate": 4.899785048020567e-07, "loss": 0.4948, "step": 11639 }, { "epoch": 0.9, "grad_norm": 1.170583241904792, "learning_rate": 4.892019266493164e-07, "loss": 0.5279, "step": 11640 }, { "epoch": 0.9, "grad_norm": 1.196469314385757, "learning_rate": 4.884259489623267e-07, "loss": 0.475, "step": 11641 }, { "epoch": 0.9, "grad_norm": 1.1514619718691659, "learning_rate": 4.876505717900803e-07, "loss": 0.4724, "step": 11642 }, { "epoch": 0.9, "grad_norm": 1.2511098226403587, "learning_rate": 4.868757951815295e-07, "loss": 0.5248, "step": 11643 }, { "epoch": 0.9, "grad_norm": 1.3178662091193285, "learning_rate": 4.861016191855939e-07, "loss": 0.5234, "step": 11644 }, { "epoch": 0.9, "grad_norm": 1.2354141400436287, "learning_rate": 4.853280438511476e-07, "loss": 0.5092, "step": 11645 }, { "epoch": 0.9, "grad_norm": 1.098511440778354, "learning_rate": 4.845550692270296e-07, "loss": 0.498, "step": 11646 }, { "epoch": 0.9, "grad_norm": 1.2128744078210814, "learning_rate": 4.837826953620428e-07, "loss": 0.4699, "step": 11647 }, { "epoch": 0.9, "grad_norm": 1.1940351654893464, "learning_rate": 4.830109223049528e-07, "loss": 0.4796, "step": 11648 }, { "epoch": 0.9, "grad_norm": 1.2091029883946196, "learning_rate": 4.82239750104484e-07, "loss": 0.5288, "step": 11649 }, { "epoch": 0.9, "grad_norm": 1.1562578871174463, "learning_rate": 4.814691788093262e-07, "loss": 0.4894, "step": 11650 }, { "epoch": 0.9, "grad_norm": 1.3747158190356996, "learning_rate": 4.806992084681273e-07, "loss": 0.5272, "step": 11651 }, { "epoch": 0.9, "grad_norm": 1.2156153739803044, "learning_rate": 4.799298391295004e-07, "loss": 0.5203, "step": 11652 }, { "epoch": 0.9, "grad_norm": 1.1915433210724786, "learning_rate": 4.791610708420192e-07, "loss": 0.4939, "step": 11653 }, { "epoch": 0.9, "grad_norm": 1.3170288605826161, "learning_rate": 4.783929036542234e-07, "loss": 0.5232, "step": 11654 }, { "epoch": 0.9, "grad_norm": 1.2679209664320248, "learning_rate": 4.776253376146078e-07, "loss": 0.5041, "step": 11655 }, { "epoch": 0.9, "grad_norm": 1.1599221041438859, "learning_rate": 4.768583727716314e-07, "loss": 0.4681, "step": 11656 }, { "epoch": 0.9, "grad_norm": 1.1336253965601941, "learning_rate": 4.760920091737198e-07, "loss": 0.473, "step": 11657 }, { "epoch": 0.9, "grad_norm": 1.1305282186962131, "learning_rate": 4.7532624686925655e-07, "loss": 0.4607, "step": 11658 }, { "epoch": 0.9, "grad_norm": 1.1431852374257534, "learning_rate": 4.745610859065886e-07, "loss": 0.4501, "step": 11659 }, { "epoch": 0.9, "grad_norm": 1.3306119634546028, "learning_rate": 4.7379652633402386e-07, "loss": 0.4928, "step": 11660 }, { "epoch": 0.9, "grad_norm": 1.1978248173980799, "learning_rate": 4.730325681998338e-07, "loss": 0.5595, "step": 11661 }, { "epoch": 0.9, "grad_norm": 1.219627015913165, "learning_rate": 4.722692115522498e-07, "loss": 0.5345, "step": 11662 }, { "epoch": 0.9, "grad_norm": 1.223947982203224, "learning_rate": 4.715064564394667e-07, "loss": 0.5003, "step": 11663 }, { "epoch": 0.9, "grad_norm": 1.2324123472882256, "learning_rate": 4.707443029096437e-07, "loss": 0.457, "step": 11664 }, { "epoch": 0.9, "grad_norm": 1.0759650445315745, "learning_rate": 4.6998275101089454e-07, "loss": 0.4797, "step": 11665 }, { "epoch": 0.91, "grad_norm": 1.185037468909366, "learning_rate": 4.692218007913052e-07, "loss": 0.5174, "step": 11666 }, { "epoch": 0.91, "grad_norm": 1.2233893783284107, "learning_rate": 4.6846145229891393e-07, "loss": 0.4962, "step": 11667 }, { "epoch": 0.91, "grad_norm": 1.2070307561879552, "learning_rate": 4.677017055817279e-07, "loss": 0.498, "step": 11668 }, { "epoch": 0.91, "grad_norm": 1.306967017401899, "learning_rate": 4.6694256068771314e-07, "loss": 0.5078, "step": 11669 }, { "epoch": 0.91, "grad_norm": 1.1385053509606156, "learning_rate": 4.661840176647991e-07, "loss": 0.4506, "step": 11670 }, { "epoch": 0.91, "grad_norm": 1.260228981146994, "learning_rate": 4.654260765608776e-07, "loss": 0.5134, "step": 11671 }, { "epoch": 0.91, "grad_norm": 1.2414791558705525, "learning_rate": 4.646687374237979e-07, "loss": 0.481, "step": 11672 }, { "epoch": 0.91, "grad_norm": 1.224950569479002, "learning_rate": 4.639120003013764e-07, "loss": 0.4756, "step": 11673 }, { "epoch": 0.91, "grad_norm": 1.2187451093526673, "learning_rate": 4.6315586524139143e-07, "loss": 0.537, "step": 11674 }, { "epoch": 0.91, "grad_norm": 1.3131997649938285, "learning_rate": 4.6240033229157934e-07, "loss": 0.563, "step": 11675 }, { "epoch": 0.91, "grad_norm": 1.1431704819663746, "learning_rate": 4.61645401499643e-07, "loss": 0.4919, "step": 11676 }, { "epoch": 0.91, "grad_norm": 1.1505580190943998, "learning_rate": 4.608910729132432e-07, "loss": 0.4722, "step": 11677 }, { "epoch": 0.91, "grad_norm": 1.1179001142651064, "learning_rate": 4.6013734658000406e-07, "loss": 0.4819, "step": 11678 }, { "epoch": 0.91, "grad_norm": 1.2458995799352273, "learning_rate": 4.5938422254751425e-07, "loss": 0.5234, "step": 11679 }, { "epoch": 0.91, "grad_norm": 1.1915886411042884, "learning_rate": 4.5863170086332234e-07, "loss": 0.5037, "step": 11680 }, { "epoch": 0.91, "grad_norm": 1.1996697189869534, "learning_rate": 4.578797815749381e-07, "loss": 0.4958, "step": 11681 }, { "epoch": 0.91, "grad_norm": 1.3254755858047529, "learning_rate": 4.571284647298335e-07, "loss": 0.5179, "step": 11682 }, { "epoch": 0.91, "grad_norm": 1.4197493373562327, "learning_rate": 4.5637775037544296e-07, "loss": 0.5397, "step": 11683 }, { "epoch": 0.91, "grad_norm": 1.1658080665480766, "learning_rate": 4.556276385591663e-07, "loss": 0.4958, "step": 11684 }, { "epoch": 0.91, "grad_norm": 1.196314472784978, "learning_rate": 4.548781293283566e-07, "loss": 0.4759, "step": 11685 }, { "epoch": 0.91, "grad_norm": 1.084411152173807, "learning_rate": 4.541292227303384e-07, "loss": 0.4522, "step": 11686 }, { "epoch": 0.91, "grad_norm": 1.252107131697988, "learning_rate": 4.533809188123917e-07, "loss": 0.5192, "step": 11687 }, { "epoch": 0.91, "grad_norm": 1.241396430176583, "learning_rate": 4.526332176217618e-07, "loss": 0.5326, "step": 11688 }, { "epoch": 0.91, "grad_norm": 1.1364092848018812, "learning_rate": 4.518861192056545e-07, "loss": 0.4621, "step": 11689 }, { "epoch": 0.91, "grad_norm": 1.3141722697926925, "learning_rate": 4.511396236112386e-07, "loss": 0.4955, "step": 11690 }, { "epoch": 0.91, "grad_norm": 1.2908912864286366, "learning_rate": 4.503937308856454e-07, "loss": 0.5272, "step": 11691 }, { "epoch": 0.91, "grad_norm": 1.1712128866823428, "learning_rate": 4.4964844107596384e-07, "loss": 0.4973, "step": 11692 }, { "epoch": 0.91, "grad_norm": 1.1982866572776498, "learning_rate": 4.489037542292507e-07, "loss": 0.4444, "step": 11693 }, { "epoch": 0.91, "grad_norm": 1.172101164292414, "learning_rate": 4.481596703925195e-07, "loss": 0.4915, "step": 11694 }, { "epoch": 0.91, "grad_norm": 1.2125661419004228, "learning_rate": 4.4741618961274936e-07, "loss": 0.4956, "step": 11695 }, { "epoch": 0.91, "grad_norm": 1.153808335454769, "learning_rate": 4.4667331193688155e-07, "loss": 0.4789, "step": 11696 }, { "epoch": 0.91, "grad_norm": 1.1952670718268237, "learning_rate": 4.459310374118142e-07, "loss": 0.4465, "step": 11697 }, { "epoch": 0.91, "grad_norm": 1.3161940587573808, "learning_rate": 4.451893660844142e-07, "loss": 0.4697, "step": 11698 }, { "epoch": 0.91, "grad_norm": 1.2213759376304432, "learning_rate": 4.4444829800150524e-07, "loss": 0.4709, "step": 11699 }, { "epoch": 0.91, "grad_norm": 1.192370315473772, "learning_rate": 4.437078332098754e-07, "loss": 0.4807, "step": 11700 }, { "epoch": 0.91, "grad_norm": 1.2790178067367342, "learning_rate": 4.4296797175627517e-07, "loss": 0.5051, "step": 11701 }, { "epoch": 0.91, "grad_norm": 1.199433347903852, "learning_rate": 4.422287136874126e-07, "loss": 0.472, "step": 11702 }, { "epoch": 0.91, "grad_norm": 1.2479101353868722, "learning_rate": 4.41490059049966e-07, "loss": 0.5427, "step": 11703 }, { "epoch": 0.91, "grad_norm": 1.159155005792415, "learning_rate": 4.407520078905647e-07, "loss": 0.4453, "step": 11704 }, { "epoch": 0.91, "grad_norm": 1.225054402992305, "learning_rate": 4.4001456025580925e-07, "loss": 0.5116, "step": 11705 }, { "epoch": 0.91, "grad_norm": 1.1330927535032986, "learning_rate": 4.3927771619225787e-07, "loss": 0.4546, "step": 11706 }, { "epoch": 0.91, "grad_norm": 1.3007622794752653, "learning_rate": 4.385414757464312e-07, "loss": 0.4898, "step": 11707 }, { "epoch": 0.91, "grad_norm": 1.2646846810232406, "learning_rate": 4.37805838964811e-07, "loss": 0.5187, "step": 11708 }, { "epoch": 0.91, "grad_norm": 1.2703951669143438, "learning_rate": 4.370708058938422e-07, "loss": 0.5491, "step": 11709 }, { "epoch": 0.91, "grad_norm": 1.1870486757024492, "learning_rate": 4.363363765799322e-07, "loss": 0.5152, "step": 11710 }, { "epoch": 0.91, "grad_norm": 1.098214167197262, "learning_rate": 4.356025510694495e-07, "loss": 0.5115, "step": 11711 }, { "epoch": 0.91, "grad_norm": 1.2990648555848965, "learning_rate": 4.348693294087236e-07, "loss": 0.5695, "step": 11712 }, { "epoch": 0.91, "grad_norm": 1.1769447526402916, "learning_rate": 4.3413671164404757e-07, "loss": 0.5, "step": 11713 }, { "epoch": 0.91, "grad_norm": 1.2447782646106818, "learning_rate": 4.3340469782167214e-07, "loss": 0.476, "step": 11714 }, { "epoch": 0.91, "grad_norm": 1.2250338705401749, "learning_rate": 4.3267328798781595e-07, "loss": 0.5429, "step": 11715 }, { "epoch": 0.91, "grad_norm": 1.3735424466012531, "learning_rate": 4.319424821886553e-07, "loss": 0.5384, "step": 11716 }, { "epoch": 0.91, "grad_norm": 1.1890178817418209, "learning_rate": 4.3121228047033227e-07, "loss": 0.4781, "step": 11717 }, { "epoch": 0.91, "grad_norm": 1.157881384896909, "learning_rate": 4.3048268287894435e-07, "loss": 0.4957, "step": 11718 }, { "epoch": 0.91, "grad_norm": 1.2604530056005232, "learning_rate": 4.2975368946055805e-07, "loss": 0.4931, "step": 11719 }, { "epoch": 0.91, "grad_norm": 1.2466276932215556, "learning_rate": 4.290253002611966e-07, "loss": 0.4998, "step": 11720 }, { "epoch": 0.91, "grad_norm": 1.1932600763591998, "learning_rate": 4.282975153268476e-07, "loss": 0.4837, "step": 11721 }, { "epoch": 0.91, "grad_norm": 1.2554693254090885, "learning_rate": 4.275703347034621e-07, "loss": 0.558, "step": 11722 }, { "epoch": 0.91, "grad_norm": 1.3863700750715937, "learning_rate": 4.268437584369478e-07, "loss": 0.5683, "step": 11723 }, { "epoch": 0.91, "grad_norm": 1.2086752648904282, "learning_rate": 4.2611778657317695e-07, "loss": 0.5212, "step": 11724 }, { "epoch": 0.91, "grad_norm": 1.1653620998704188, "learning_rate": 4.25392419157985e-07, "loss": 0.4999, "step": 11725 }, { "epoch": 0.91, "grad_norm": 1.2040753265022415, "learning_rate": 4.2466765623716766e-07, "loss": 0.5015, "step": 11726 }, { "epoch": 0.91, "grad_norm": 1.3222846927141374, "learning_rate": 4.2394349785648494e-07, "loss": 0.5322, "step": 11727 }, { "epoch": 0.91, "grad_norm": 1.2089381293848507, "learning_rate": 4.232199440616536e-07, "loss": 0.4804, "step": 11728 }, { "epoch": 0.91, "grad_norm": 1.1606385740802105, "learning_rate": 4.2249699489835815e-07, "loss": 0.4857, "step": 11729 }, { "epoch": 0.91, "grad_norm": 1.3053066589064044, "learning_rate": 4.2177465041223995e-07, "loss": 0.5312, "step": 11730 }, { "epoch": 0.91, "grad_norm": 1.1614812612052423, "learning_rate": 4.2105291064890474e-07, "loss": 0.4595, "step": 11731 }, { "epoch": 0.91, "grad_norm": 1.1995305951260657, "learning_rate": 4.203317756539216e-07, "loss": 0.5121, "step": 11732 }, { "epoch": 0.91, "grad_norm": 1.2239049805179607, "learning_rate": 4.196112454728185e-07, "loss": 0.5229, "step": 11733 }, { "epoch": 0.91, "grad_norm": 1.2171196915000573, "learning_rate": 4.1889132015108467e-07, "loss": 0.5001, "step": 11734 }, { "epoch": 0.91, "grad_norm": 1.8535005751743459, "learning_rate": 4.181719997341738e-07, "loss": 0.4794, "step": 11735 }, { "epoch": 0.91, "grad_norm": 1.2263811791392332, "learning_rate": 4.1745328426749943e-07, "loss": 0.5545, "step": 11736 }, { "epoch": 0.91, "grad_norm": 1.2140691317618955, "learning_rate": 4.167351737964409e-07, "loss": 0.5407, "step": 11737 }, { "epoch": 0.91, "grad_norm": 1.247686151899819, "learning_rate": 4.1601766836633196e-07, "loss": 0.5128, "step": 11738 }, { "epoch": 0.91, "grad_norm": 1.1683219452183686, "learning_rate": 4.153007680224752e-07, "loss": 0.5228, "step": 11739 }, { "epoch": 0.91, "grad_norm": 1.2837260906515053, "learning_rate": 4.1458447281013113e-07, "loss": 0.4502, "step": 11740 }, { "epoch": 0.91, "grad_norm": 1.2071021593122753, "learning_rate": 4.138687827745236e-07, "loss": 0.4725, "step": 11741 }, { "epoch": 0.91, "grad_norm": 1.2537044469710232, "learning_rate": 4.131536979608386e-07, "loss": 0.4991, "step": 11742 }, { "epoch": 0.91, "grad_norm": 1.193695520486006, "learning_rate": 4.124392184142223e-07, "loss": 0.4982, "step": 11743 }, { "epoch": 0.91, "grad_norm": 1.1998289959975694, "learning_rate": 4.1172534417978305e-07, "loss": 0.5043, "step": 11744 }, { "epoch": 0.91, "grad_norm": 1.1520581424895415, "learning_rate": 4.1101207530259144e-07, "loss": 0.4442, "step": 11745 }, { "epoch": 0.91, "grad_norm": 1.1643199763911125, "learning_rate": 4.102994118276804e-07, "loss": 0.5122, "step": 11746 }, { "epoch": 0.91, "grad_norm": 1.1934336437833286, "learning_rate": 4.095873538000439e-07, "loss": 0.5159, "step": 11747 }, { "epoch": 0.91, "grad_norm": 1.253856051857348, "learning_rate": 4.088759012646382e-07, "loss": 0.486, "step": 11748 }, { "epoch": 0.91, "grad_norm": 1.1820389005485281, "learning_rate": 4.081650542663795e-07, "loss": 0.5113, "step": 11749 }, { "epoch": 0.91, "grad_norm": 1.1781774797564402, "learning_rate": 4.0745481285014876e-07, "loss": 0.483, "step": 11750 }, { "epoch": 0.91, "grad_norm": 1.0738288310448638, "learning_rate": 4.0674517706078556e-07, "loss": 0.4665, "step": 11751 }, { "epoch": 0.91, "grad_norm": 1.2804562971290436, "learning_rate": 4.060361469430962e-07, "loss": 0.503, "step": 11752 }, { "epoch": 0.91, "grad_norm": 1.2892818379919415, "learning_rate": 4.0532772254184394e-07, "loss": 0.5132, "step": 11753 }, { "epoch": 0.91, "grad_norm": 1.2004887817252679, "learning_rate": 4.0461990390175175e-07, "loss": 0.4391, "step": 11754 }, { "epoch": 0.91, "grad_norm": 1.2543431646833745, "learning_rate": 4.0391269106751174e-07, "loss": 0.5051, "step": 11755 }, { "epoch": 0.91, "grad_norm": 1.2769473799526383, "learning_rate": 4.032060840837726e-07, "loss": 0.4994, "step": 11756 }, { "epoch": 0.91, "grad_norm": 1.1998899985599114, "learning_rate": 4.0250008299514755e-07, "loss": 0.4979, "step": 11757 }, { "epoch": 0.91, "grad_norm": 1.2316834766254283, "learning_rate": 4.017946878462076e-07, "loss": 0.4457, "step": 11758 }, { "epoch": 0.91, "grad_norm": 1.1375048039932254, "learning_rate": 4.010898986814893e-07, "loss": 0.4964, "step": 11759 }, { "epoch": 0.91, "grad_norm": 1.299131383842934, "learning_rate": 4.0038571554548934e-07, "loss": 0.5086, "step": 11760 }, { "epoch": 0.91, "grad_norm": 1.1954543584282178, "learning_rate": 3.9968213848266655e-07, "loss": 0.4802, "step": 11761 }, { "epoch": 0.91, "grad_norm": 1.2054921432865644, "learning_rate": 3.9897916753744324e-07, "loss": 0.5338, "step": 11762 }, { "epoch": 0.91, "grad_norm": 1.2764553048407306, "learning_rate": 3.9827680275419944e-07, "loss": 0.5121, "step": 11763 }, { "epoch": 0.91, "grad_norm": 1.2053758942694601, "learning_rate": 3.9757504417727856e-07, "loss": 0.4842, "step": 11764 }, { "epoch": 0.91, "grad_norm": 1.1320032846854713, "learning_rate": 3.9687389185098733e-07, "loss": 0.4689, "step": 11765 }, { "epoch": 0.91, "grad_norm": 1.214284639398115, "learning_rate": 3.9617334581959267e-07, "loss": 0.5429, "step": 11766 }, { "epoch": 0.91, "grad_norm": 1.2946557969823145, "learning_rate": 3.9547340612732356e-07, "loss": 0.5099, "step": 11767 }, { "epoch": 0.91, "grad_norm": 1.2584743298658398, "learning_rate": 3.9477407281837246e-07, "loss": 0.5073, "step": 11768 }, { "epoch": 0.91, "grad_norm": 1.1653233810330084, "learning_rate": 3.940753459368896e-07, "loss": 0.4227, "step": 11769 }, { "epoch": 0.91, "grad_norm": 1.2595806133826835, "learning_rate": 3.9337722552698963e-07, "loss": 0.5476, "step": 11770 }, { "epoch": 0.91, "grad_norm": 1.2531475968832562, "learning_rate": 3.9267971163274966e-07, "loss": 0.5068, "step": 11771 }, { "epoch": 0.91, "grad_norm": 1.0947538401381067, "learning_rate": 3.919828042982077e-07, "loss": 0.45, "step": 11772 }, { "epoch": 0.91, "grad_norm": 1.2633416571296776, "learning_rate": 3.9128650356736297e-07, "loss": 0.4809, "step": 11773 }, { "epoch": 0.91, "grad_norm": 1.237929333279952, "learning_rate": 3.905908094841737e-07, "loss": 0.5068, "step": 11774 }, { "epoch": 0.91, "grad_norm": 1.1030452299588724, "learning_rate": 3.898957220925648e-07, "loss": 0.4438, "step": 11775 }, { "epoch": 0.91, "grad_norm": 1.2417517803556468, "learning_rate": 3.8920124143642104e-07, "loss": 0.5079, "step": 11776 }, { "epoch": 0.91, "grad_norm": 1.1804708444209424, "learning_rate": 3.885073675595874e-07, "loss": 0.4584, "step": 11777 }, { "epoch": 0.91, "grad_norm": 1.0939406092630664, "learning_rate": 3.878141005058733e-07, "loss": 0.454, "step": 11778 }, { "epoch": 0.91, "grad_norm": 1.1600049880348036, "learning_rate": 3.871214403190471e-07, "loss": 0.4961, "step": 11779 }, { "epoch": 0.91, "grad_norm": 1.2780535297883957, "learning_rate": 3.864293870428404e-07, "loss": 0.5328, "step": 11780 }, { "epoch": 0.91, "grad_norm": 1.2095696876606805, "learning_rate": 3.8573794072094495e-07, "loss": 0.5176, "step": 11781 }, { "epoch": 0.91, "grad_norm": 1.2659270785544752, "learning_rate": 3.8504710139701804e-07, "loss": 0.5117, "step": 11782 }, { "epoch": 0.91, "grad_norm": 1.319515461711487, "learning_rate": 3.843568691146748e-07, "loss": 0.5153, "step": 11783 }, { "epoch": 0.91, "grad_norm": 1.2002694562539997, "learning_rate": 3.8366724391749153e-07, "loss": 0.5164, "step": 11784 }, { "epoch": 0.91, "grad_norm": 1.2488768299903783, "learning_rate": 3.829782258490078e-07, "loss": 0.4832, "step": 11785 }, { "epoch": 0.91, "grad_norm": 1.3546710126636907, "learning_rate": 3.8228981495272654e-07, "loss": 0.5357, "step": 11786 }, { "epoch": 0.91, "grad_norm": 1.2989807039285735, "learning_rate": 3.8160201127211083e-07, "loss": 0.489, "step": 11787 }, { "epoch": 0.91, "grad_norm": 1.1660209924008844, "learning_rate": 3.809148148505848e-07, "loss": 0.477, "step": 11788 }, { "epoch": 0.91, "grad_norm": 1.2577406110418798, "learning_rate": 3.8022822573153374e-07, "loss": 0.4625, "step": 11789 }, { "epoch": 0.91, "grad_norm": 1.103991866520623, "learning_rate": 3.795422439583063e-07, "loss": 0.4281, "step": 11790 }, { "epoch": 0.91, "grad_norm": 1.1095122131596897, "learning_rate": 3.788568695742123e-07, "loss": 0.4694, "step": 11791 }, { "epoch": 0.91, "grad_norm": 1.2214688028356104, "learning_rate": 3.781721026225227e-07, "loss": 0.5078, "step": 11792 }, { "epoch": 0.91, "grad_norm": 1.2617199310202725, "learning_rate": 3.7748794314647066e-07, "loss": 0.5729, "step": 11793 }, { "epoch": 0.91, "grad_norm": 1.2734457378472044, "learning_rate": 3.768043911892505e-07, "loss": 0.539, "step": 11794 }, { "epoch": 0.92, "grad_norm": 1.40469694208657, "learning_rate": 3.7612144679401664e-07, "loss": 0.5234, "step": 11795 }, { "epoch": 0.92, "grad_norm": 1.1789927679006709, "learning_rate": 3.754391100038901e-07, "loss": 0.5356, "step": 11796 }, { "epoch": 0.92, "grad_norm": 1.2780748427419666, "learning_rate": 3.747573808619476e-07, "loss": 0.5241, "step": 11797 }, { "epoch": 0.92, "grad_norm": 1.2062431651499914, "learning_rate": 3.740762594112324e-07, "loss": 0.4773, "step": 11798 }, { "epoch": 0.92, "grad_norm": 1.195164340941025, "learning_rate": 3.733957456947457e-07, "loss": 0.5391, "step": 11799 }, { "epoch": 0.92, "grad_norm": 1.1602979888131897, "learning_rate": 3.72715839755452e-07, "loss": 0.4759, "step": 11800 }, { "epoch": 0.92, "grad_norm": 1.2576929355445683, "learning_rate": 3.720365416362792e-07, "loss": 0.5166, "step": 11801 }, { "epoch": 0.92, "grad_norm": 1.248847239147885, "learning_rate": 3.713578513801119e-07, "loss": 0.5368, "step": 11802 }, { "epoch": 0.92, "grad_norm": 1.2689911148145008, "learning_rate": 3.706797690298014e-07, "loss": 0.5151, "step": 11803 }, { "epoch": 0.92, "grad_norm": 1.1390971049009015, "learning_rate": 3.7000229462815785e-07, "loss": 0.4449, "step": 11804 }, { "epoch": 0.92, "grad_norm": 1.2716373287148128, "learning_rate": 3.6932542821795256e-07, "loss": 0.5376, "step": 11805 }, { "epoch": 0.92, "grad_norm": 1.2276580194511737, "learning_rate": 3.6864916984192143e-07, "loss": 0.4925, "step": 11806 }, { "epoch": 0.92, "grad_norm": 1.201898469819754, "learning_rate": 3.6797351954275916e-07, "loss": 0.4607, "step": 11807 }, { "epoch": 0.92, "grad_norm": 1.2896428044687338, "learning_rate": 3.6729847736312387e-07, "loss": 0.5385, "step": 11808 }, { "epoch": 0.92, "grad_norm": 1.2846397127112779, "learning_rate": 3.6662404334563363e-07, "loss": 0.5583, "step": 11809 }, { "epoch": 0.92, "grad_norm": 1.2214329361339653, "learning_rate": 3.6595021753286886e-07, "loss": 0.4906, "step": 11810 }, { "epoch": 0.92, "grad_norm": 1.2725794423005357, "learning_rate": 3.652769999673733e-07, "loss": 0.5469, "step": 11811 }, { "epoch": 0.92, "grad_norm": 1.2037241546137951, "learning_rate": 3.646043906916474e-07, "loss": 0.5198, "step": 11812 }, { "epoch": 0.92, "grad_norm": 1.360362297284121, "learning_rate": 3.639323897481606e-07, "loss": 0.5368, "step": 11813 }, { "epoch": 0.92, "grad_norm": 1.2828800717955788, "learning_rate": 3.632609971793366e-07, "loss": 0.4928, "step": 11814 }, { "epoch": 0.92, "grad_norm": 1.11524998107787, "learning_rate": 3.6259021302756383e-07, "loss": 0.4862, "step": 11815 }, { "epoch": 0.92, "grad_norm": 1.1956216250350546, "learning_rate": 3.619200373351939e-07, "loss": 0.4736, "step": 11816 }, { "epoch": 0.92, "grad_norm": 1.2944798240047037, "learning_rate": 3.612504701445385e-07, "loss": 0.4855, "step": 11817 }, { "epoch": 0.92, "grad_norm": 1.192429400259517, "learning_rate": 3.6058151149787166e-07, "loss": 0.5089, "step": 11818 }, { "epoch": 0.92, "grad_norm": 1.350053683732373, "learning_rate": 3.5991316143742515e-07, "loss": 0.4956, "step": 11819 }, { "epoch": 0.92, "grad_norm": 1.2975610504046746, "learning_rate": 3.592454200053963e-07, "loss": 0.5721, "step": 11820 }, { "epoch": 0.92, "grad_norm": 1.2104513853853138, "learning_rate": 3.585782872439458e-07, "loss": 0.5139, "step": 11821 }, { "epoch": 0.92, "grad_norm": 1.318205964381494, "learning_rate": 3.5791176319519006e-07, "loss": 0.5689, "step": 11822 }, { "epoch": 0.92, "grad_norm": 1.205212108020372, "learning_rate": 3.5724584790121084e-07, "loss": 0.4791, "step": 11823 }, { "epoch": 0.92, "grad_norm": 1.2967040627210051, "learning_rate": 3.565805414040535e-07, "loss": 0.5225, "step": 11824 }, { "epoch": 0.92, "grad_norm": 1.326910742890564, "learning_rate": 3.5591584374571773e-07, "loss": 0.5302, "step": 11825 }, { "epoch": 0.92, "grad_norm": 1.1723913962326695, "learning_rate": 3.5525175496817223e-07, "loss": 0.4868, "step": 11826 }, { "epoch": 0.92, "grad_norm": 1.1615463303330051, "learning_rate": 3.545882751133445e-07, "loss": 0.502, "step": 11827 }, { "epoch": 0.92, "grad_norm": 1.225542652839979, "learning_rate": 3.5392540422312213e-07, "loss": 0.4472, "step": 11828 }, { "epoch": 0.92, "grad_norm": 1.1211358570870824, "learning_rate": 3.5326314233935734e-07, "loss": 0.5319, "step": 11829 }, { "epoch": 0.92, "grad_norm": 1.2231918478092207, "learning_rate": 3.5260148950385985e-07, "loss": 0.4812, "step": 11830 }, { "epoch": 0.92, "grad_norm": 1.208629944449851, "learning_rate": 3.5194044575840523e-07, "loss": 0.4487, "step": 11831 }, { "epoch": 0.92, "grad_norm": 1.052311686796946, "learning_rate": 3.5128001114472674e-07, "loss": 0.4481, "step": 11832 }, { "epoch": 0.92, "grad_norm": 1.2427054710813539, "learning_rate": 3.506201857045222e-07, "loss": 0.4825, "step": 11833 }, { "epoch": 0.92, "grad_norm": 1.1732240668177534, "learning_rate": 3.4996096947945036e-07, "loss": 0.4688, "step": 11834 }, { "epoch": 0.92, "grad_norm": 1.25371243413933, "learning_rate": 3.4930236251112914e-07, "loss": 0.4958, "step": 11835 }, { "epoch": 0.92, "grad_norm": 1.1516007436460405, "learning_rate": 3.4864436484114086e-07, "loss": 0.4779, "step": 11836 }, { "epoch": 0.92, "grad_norm": 1.2052656179324965, "learning_rate": 3.4798697651102887e-07, "loss": 0.5564, "step": 11837 }, { "epoch": 0.92, "grad_norm": 1.2081124608099083, "learning_rate": 3.4733019756229557e-07, "loss": 0.4667, "step": 11838 }, { "epoch": 0.92, "grad_norm": 1.194664025022433, "learning_rate": 3.4667402803641005e-07, "loss": 0.4947, "step": 11839 }, { "epoch": 0.92, "grad_norm": 1.0570926510274017, "learning_rate": 3.460184679747969e-07, "loss": 0.4169, "step": 11840 }, { "epoch": 0.92, "grad_norm": 1.119244316050855, "learning_rate": 3.453635174188463e-07, "loss": 0.4697, "step": 11841 }, { "epoch": 0.92, "grad_norm": 1.2132183129861274, "learning_rate": 3.447091764099075e-07, "loss": 0.4992, "step": 11842 }, { "epoch": 0.92, "grad_norm": 1.2914518003282607, "learning_rate": 3.440554449892941e-07, "loss": 0.5004, "step": 11843 }, { "epoch": 0.92, "grad_norm": 1.1518716653329535, "learning_rate": 3.434023231982786e-07, "loss": 0.4562, "step": 11844 }, { "epoch": 0.92, "grad_norm": 1.2147686312593688, "learning_rate": 3.4274981107809466e-07, "loss": 0.4195, "step": 11845 }, { "epoch": 0.92, "grad_norm": 1.1839667509926344, "learning_rate": 3.4209790866994055e-07, "loss": 0.4858, "step": 11846 }, { "epoch": 0.92, "grad_norm": 1.171469554699259, "learning_rate": 3.414466160149732e-07, "loss": 0.4887, "step": 11847 }, { "epoch": 0.92, "grad_norm": 1.210771660062191, "learning_rate": 3.4079593315431315e-07, "loss": 0.4957, "step": 11848 }, { "epoch": 0.92, "grad_norm": 1.2522919147351381, "learning_rate": 3.401458601290408e-07, "loss": 0.534, "step": 11849 }, { "epoch": 0.92, "grad_norm": 1.2936390409939664, "learning_rate": 3.3949639698019896e-07, "loss": 0.5641, "step": 11850 }, { "epoch": 0.92, "grad_norm": 1.369830731703924, "learning_rate": 3.388475437487915e-07, "loss": 0.5514, "step": 11851 }, { "epoch": 0.92, "grad_norm": 1.297526230519369, "learning_rate": 3.381993004757822e-07, "loss": 0.5662, "step": 11852 }, { "epoch": 0.92, "grad_norm": 1.2452877870259023, "learning_rate": 3.3755166720210065e-07, "loss": 0.5143, "step": 11853 }, { "epoch": 0.92, "grad_norm": 1.1829554785076084, "learning_rate": 3.3690464396863407e-07, "loss": 0.5026, "step": 11854 }, { "epoch": 0.92, "grad_norm": 1.1929361994711225, "learning_rate": 3.3625823081623097e-07, "loss": 0.4908, "step": 11855 }, { "epoch": 0.92, "grad_norm": 1.153294213344485, "learning_rate": 3.3561242778570426e-07, "loss": 0.5194, "step": 11856 }, { "epoch": 0.92, "grad_norm": 1.2180884106123477, "learning_rate": 3.349672349178279e-07, "loss": 0.466, "step": 11857 }, { "epoch": 0.92, "grad_norm": 1.2845498367447854, "learning_rate": 3.343226522533338e-07, "loss": 0.558, "step": 11858 }, { "epoch": 0.92, "grad_norm": 1.172581777425249, "learning_rate": 3.3367867983292056e-07, "loss": 0.4776, "step": 11859 }, { "epoch": 0.92, "grad_norm": 1.239165414622704, "learning_rate": 3.330353176972423e-07, "loss": 0.47, "step": 11860 }, { "epoch": 0.92, "grad_norm": 1.1473822384082426, "learning_rate": 3.323925658869209e-07, "loss": 0.4775, "step": 11861 }, { "epoch": 0.92, "grad_norm": 1.1340337024437142, "learning_rate": 3.3175042444253405e-07, "loss": 0.4497, "step": 11862 }, { "epoch": 0.92, "grad_norm": 1.2249561652300989, "learning_rate": 3.311088934046247e-07, "loss": 0.5151, "step": 11863 }, { "epoch": 0.92, "grad_norm": 1.1540089617700202, "learning_rate": 3.3046797281369614e-07, "loss": 0.4877, "step": 11864 }, { "epoch": 0.92, "grad_norm": 1.2289523999375467, "learning_rate": 3.298276627102126e-07, "loss": 0.4859, "step": 11865 }, { "epoch": 0.92, "grad_norm": 1.187750940411227, "learning_rate": 3.2918796313459954e-07, "loss": 0.512, "step": 11866 }, { "epoch": 0.92, "grad_norm": 1.19838081434691, "learning_rate": 3.285488741272458e-07, "loss": 0.4784, "step": 11867 }, { "epoch": 0.92, "grad_norm": 1.2647895881502962, "learning_rate": 3.2791039572849903e-07, "loss": 0.4906, "step": 11868 }, { "epoch": 0.92, "grad_norm": 1.2120988260806527, "learning_rate": 3.272725279786715e-07, "loss": 0.5222, "step": 11869 }, { "epoch": 0.92, "grad_norm": 1.1655362931520417, "learning_rate": 3.2663527091803317e-07, "loss": 0.4923, "step": 11870 }, { "epoch": 0.92, "grad_norm": 1.300798891423282, "learning_rate": 3.2599862458681963e-07, "loss": 0.5189, "step": 11871 }, { "epoch": 0.92, "grad_norm": 1.2424211582214633, "learning_rate": 3.2536258902522323e-07, "loss": 0.507, "step": 11872 }, { "epoch": 0.92, "grad_norm": 1.2824541341748668, "learning_rate": 3.247271642734007e-07, "loss": 0.4833, "step": 11873 }, { "epoch": 0.92, "grad_norm": 1.2379879769648892, "learning_rate": 3.24092350371471e-07, "loss": 0.5265, "step": 11874 }, { "epoch": 0.92, "grad_norm": 1.242445577047439, "learning_rate": 3.234581473595122e-07, "loss": 0.4934, "step": 11875 }, { "epoch": 0.92, "grad_norm": 1.2357918056645432, "learning_rate": 3.228245552775633e-07, "loss": 0.4952, "step": 11876 }, { "epoch": 0.92, "grad_norm": 1.1593829316641642, "learning_rate": 3.22191574165629e-07, "loss": 0.5036, "step": 11877 }, { "epoch": 0.92, "grad_norm": 1.242463950818148, "learning_rate": 3.215592040636717e-07, "loss": 0.5641, "step": 11878 }, { "epoch": 0.92, "grad_norm": 1.2007545304412486, "learning_rate": 3.209274450116162e-07, "loss": 0.5184, "step": 11879 }, { "epoch": 0.92, "grad_norm": 1.2131656451641326, "learning_rate": 3.202962970493484e-07, "loss": 0.5185, "step": 11880 }, { "epoch": 0.92, "grad_norm": 1.2582918284478506, "learning_rate": 3.196657602167175e-07, "loss": 0.4557, "step": 11881 }, { "epoch": 0.92, "grad_norm": 1.2227858209660794, "learning_rate": 3.1903583455352937e-07, "loss": 0.5374, "step": 11882 }, { "epoch": 0.92, "grad_norm": 1.168920531046059, "learning_rate": 3.1840652009955563e-07, "loss": 0.4883, "step": 11883 }, { "epoch": 0.92, "grad_norm": 1.3303419571299668, "learning_rate": 3.1777781689453e-07, "loss": 0.5477, "step": 11884 }, { "epoch": 0.92, "grad_norm": 1.2804068140175722, "learning_rate": 3.1714972497814413e-07, "loss": 0.5048, "step": 11885 }, { "epoch": 0.92, "grad_norm": 1.1467945517615132, "learning_rate": 3.1652224439005287e-07, "loss": 0.4773, "step": 11886 }, { "epoch": 0.92, "grad_norm": 1.174258846529829, "learning_rate": 3.158953751698723e-07, "loss": 0.4696, "step": 11887 }, { "epoch": 0.92, "grad_norm": 1.260874510069792, "learning_rate": 3.152691173571809e-07, "loss": 0.5119, "step": 11888 }, { "epoch": 0.92, "grad_norm": 1.139834299760827, "learning_rate": 3.146434709915158e-07, "loss": 0.481, "step": 11889 }, { "epoch": 0.92, "grad_norm": 1.1358686591569918, "learning_rate": 3.1401843611237993e-07, "loss": 0.477, "step": 11890 }, { "epoch": 0.92, "grad_norm": 1.268411042359718, "learning_rate": 3.1339401275923277e-07, "loss": 0.4924, "step": 11891 }, { "epoch": 0.92, "grad_norm": 1.2053667461678765, "learning_rate": 3.127702009714961e-07, "loss": 0.49, "step": 11892 }, { "epoch": 0.92, "grad_norm": 1.159756524168175, "learning_rate": 3.121470007885574e-07, "loss": 0.4644, "step": 11893 }, { "epoch": 0.92, "grad_norm": 1.2298578108035971, "learning_rate": 3.1152441224976073e-07, "loss": 0.4833, "step": 11894 }, { "epoch": 0.92, "grad_norm": 1.2799199807218755, "learning_rate": 3.1090243539441565e-07, "loss": 0.5611, "step": 11895 }, { "epoch": 0.92, "grad_norm": 1.31700301863861, "learning_rate": 3.1028107026178756e-07, "loss": 0.5299, "step": 11896 }, { "epoch": 0.92, "grad_norm": 1.1041180881874268, "learning_rate": 3.096603168911072e-07, "loss": 0.4529, "step": 11897 }, { "epoch": 0.92, "grad_norm": 1.2636458852651218, "learning_rate": 3.090401753215677e-07, "loss": 0.5014, "step": 11898 }, { "epoch": 0.92, "grad_norm": 1.3141831096587264, "learning_rate": 3.084206455923211e-07, "loss": 0.5246, "step": 11899 }, { "epoch": 0.92, "grad_norm": 1.1742321467951986, "learning_rate": 3.078017277424805e-07, "loss": 0.5124, "step": 11900 }, { "epoch": 0.92, "grad_norm": 1.2465338811603952, "learning_rate": 3.071834218111225e-07, "loss": 0.5203, "step": 11901 }, { "epoch": 0.92, "grad_norm": 1.2592303891134542, "learning_rate": 3.0656572783728247e-07, "loss": 0.5941, "step": 11902 }, { "epoch": 0.92, "grad_norm": 1.1550183308785742, "learning_rate": 3.059486458599592e-07, "loss": 0.4972, "step": 11903 }, { "epoch": 0.92, "grad_norm": 1.3015029325741627, "learning_rate": 3.0533217591811383e-07, "loss": 0.5139, "step": 11904 }, { "epoch": 0.92, "grad_norm": 1.267953261717419, "learning_rate": 3.0471631805066626e-07, "loss": 0.4956, "step": 11905 }, { "epoch": 0.92, "grad_norm": 1.265810729162535, "learning_rate": 3.0410107229649764e-07, "loss": 0.4983, "step": 11906 }, { "epoch": 0.92, "grad_norm": 1.2888394711921056, "learning_rate": 3.034864386944525e-07, "loss": 0.5086, "step": 11907 }, { "epoch": 0.92, "grad_norm": 1.255999425313903, "learning_rate": 3.028724172833364e-07, "loss": 0.5289, "step": 11908 }, { "epoch": 0.92, "grad_norm": 1.2235600844900756, "learning_rate": 3.022590081019161e-07, "loss": 0.5266, "step": 11909 }, { "epoch": 0.92, "grad_norm": 1.3734475388230556, "learning_rate": 3.0164621118891733e-07, "loss": 0.4774, "step": 11910 }, { "epoch": 0.92, "grad_norm": 1.1895065420900075, "learning_rate": 3.010340265830314e-07, "loss": 0.5026, "step": 11911 }, { "epoch": 0.92, "grad_norm": 1.233633180631941, "learning_rate": 3.0042245432290616e-07, "loss": 0.5101, "step": 11912 }, { "epoch": 0.92, "grad_norm": 1.2901873680188227, "learning_rate": 2.998114944471542e-07, "loss": 0.5005, "step": 11913 }, { "epoch": 0.92, "grad_norm": 1.232769948710911, "learning_rate": 2.992011469943501e-07, "loss": 0.518, "step": 11914 }, { "epoch": 0.92, "grad_norm": 1.1740572637502646, "learning_rate": 2.985914120030275e-07, "loss": 0.4807, "step": 11915 }, { "epoch": 0.92, "grad_norm": 1.1624653247050696, "learning_rate": 2.979822895116802e-07, "loss": 0.465, "step": 11916 }, { "epoch": 0.92, "grad_norm": 1.1991851503716289, "learning_rate": 2.973737795587672e-07, "loss": 0.4782, "step": 11917 }, { "epoch": 0.92, "grad_norm": 1.0942294568544784, "learning_rate": 2.967658821827069e-07, "loss": 0.4484, "step": 11918 }, { "epoch": 0.92, "grad_norm": 1.2111436237750206, "learning_rate": 2.9615859742187944e-07, "loss": 0.5323, "step": 11919 }, { "epoch": 0.92, "grad_norm": 1.4270931531285191, "learning_rate": 2.955519253146233e-07, "loss": 0.5519, "step": 11920 }, { "epoch": 0.92, "grad_norm": 1.215772610996343, "learning_rate": 2.9494586589924434e-07, "loss": 0.5311, "step": 11921 }, { "epoch": 0.92, "grad_norm": 1.2044263960002966, "learning_rate": 2.9434041921400204e-07, "loss": 0.5183, "step": 11922 }, { "epoch": 0.92, "grad_norm": 1.1510258784297343, "learning_rate": 2.9373558529712466e-07, "loss": 0.4968, "step": 11923 }, { "epoch": 0.93, "grad_norm": 1.268845921916506, "learning_rate": 2.9313136418679835e-07, "loss": 0.4591, "step": 11924 }, { "epoch": 0.93, "grad_norm": 1.1582245848856378, "learning_rate": 2.9252775592116924e-07, "loss": 0.4784, "step": 11925 }, { "epoch": 0.93, "grad_norm": 1.2377338342629929, "learning_rate": 2.919247605383468e-07, "loss": 0.5133, "step": 11926 }, { "epoch": 0.93, "grad_norm": 1.3280985212491447, "learning_rate": 2.913223780764007e-07, "loss": 0.5569, "step": 11927 }, { "epoch": 0.93, "grad_norm": 1.0928988278254506, "learning_rate": 2.9072060857336384e-07, "loss": 0.4218, "step": 11928 }, { "epoch": 0.93, "grad_norm": 1.110030558139715, "learning_rate": 2.9011945206722904e-07, "loss": 0.4884, "step": 11929 }, { "epoch": 0.93, "grad_norm": 1.134434347990986, "learning_rate": 2.895189085959482e-07, "loss": 0.4737, "step": 11930 }, { "epoch": 0.93, "grad_norm": 1.1509882308961452, "learning_rate": 2.8891897819743996e-07, "loss": 0.4277, "step": 11931 }, { "epoch": 0.93, "grad_norm": 1.2279753596259868, "learning_rate": 2.883196609095773e-07, "loss": 0.5068, "step": 11932 }, { "epoch": 0.93, "grad_norm": 1.1836579056779666, "learning_rate": 2.877209567702011e-07, "loss": 0.5076, "step": 11933 }, { "epoch": 0.93, "grad_norm": 1.2130509668289684, "learning_rate": 2.871228658171088e-07, "loss": 0.4813, "step": 11934 }, { "epoch": 0.93, "grad_norm": 1.2656736482289208, "learning_rate": 2.8652538808806253e-07, "loss": 0.5475, "step": 11935 }, { "epoch": 0.93, "grad_norm": 1.1824683954963262, "learning_rate": 2.8592852362078315e-07, "loss": 0.5113, "step": 11936 }, { "epoch": 0.93, "grad_norm": 1.1837851995856066, "learning_rate": 2.85332272452955e-07, "loss": 0.484, "step": 11937 }, { "epoch": 0.93, "grad_norm": 1.1781996382082538, "learning_rate": 2.8473663462222025e-07, "loss": 0.5109, "step": 11938 }, { "epoch": 0.93, "grad_norm": 1.161489369386853, "learning_rate": 2.8414161016618757e-07, "loss": 0.474, "step": 11939 }, { "epoch": 0.93, "grad_norm": 1.19865061072465, "learning_rate": 2.8354719912242037e-07, "loss": 0.4294, "step": 11940 }, { "epoch": 0.93, "grad_norm": 1.2778090349926128, "learning_rate": 2.8295340152845076e-07, "loss": 0.5432, "step": 11941 }, { "epoch": 0.93, "grad_norm": 1.2800668510473698, "learning_rate": 2.823602174217643e-07, "loss": 0.5377, "step": 11942 }, { "epoch": 0.93, "grad_norm": 1.340754519166004, "learning_rate": 2.817676468398145e-07, "loss": 0.5583, "step": 11943 }, { "epoch": 0.93, "grad_norm": 1.2766522043336932, "learning_rate": 2.811756898200124e-07, "loss": 0.5518, "step": 11944 }, { "epoch": 0.93, "grad_norm": 1.1960889002666162, "learning_rate": 2.8058434639973155e-07, "loss": 0.4657, "step": 11945 }, { "epoch": 0.93, "grad_norm": 1.2276840427821718, "learning_rate": 2.799936166163075e-07, "loss": 0.4676, "step": 11946 }, { "epoch": 0.93, "grad_norm": 1.1255634804053662, "learning_rate": 2.79403500507035e-07, "loss": 0.4804, "step": 11947 }, { "epoch": 0.93, "grad_norm": 1.1909780264299954, "learning_rate": 2.788139981091698e-07, "loss": 0.4561, "step": 11948 }, { "epoch": 0.93, "grad_norm": 1.1905603626557855, "learning_rate": 2.782251094599331e-07, "loss": 0.4857, "step": 11949 }, { "epoch": 0.93, "grad_norm": 1.1718303926243183, "learning_rate": 2.7763683459650193e-07, "loss": 0.5005, "step": 11950 }, { "epoch": 0.93, "grad_norm": 1.2764484872879782, "learning_rate": 2.770491735560199e-07, "loss": 0.4717, "step": 11951 }, { "epoch": 0.93, "grad_norm": 1.1982047798765632, "learning_rate": 2.764621263755862e-07, "loss": 0.5143, "step": 11952 }, { "epoch": 0.93, "grad_norm": 1.194503211022297, "learning_rate": 2.758756930922646e-07, "loss": 0.4927, "step": 11953 }, { "epoch": 0.93, "grad_norm": 1.1853288578882966, "learning_rate": 2.752898737430809e-07, "loss": 0.5167, "step": 11954 }, { "epoch": 0.93, "grad_norm": 1.1788184394264087, "learning_rate": 2.747046683650201e-07, "loss": 0.4516, "step": 11955 }, { "epoch": 0.93, "grad_norm": 1.307880856494758, "learning_rate": 2.741200769950303e-07, "loss": 0.525, "step": 11956 }, { "epoch": 0.93, "grad_norm": 1.3012016922836251, "learning_rate": 2.735360996700187e-07, "loss": 0.5012, "step": 11957 }, { "epoch": 0.93, "grad_norm": 1.178059952050663, "learning_rate": 2.7295273642685473e-07, "loss": 0.5073, "step": 11958 }, { "epoch": 0.93, "grad_norm": 1.2309126764618947, "learning_rate": 2.7236998730236996e-07, "loss": 0.5447, "step": 11959 }, { "epoch": 0.93, "grad_norm": 1.1406729178618724, "learning_rate": 2.71787852333355e-07, "loss": 0.4653, "step": 11960 }, { "epoch": 0.93, "grad_norm": 1.1757647744478734, "learning_rate": 2.7120633155656606e-07, "loss": 0.4984, "step": 11961 }, { "epoch": 0.93, "grad_norm": 1.185925845766163, "learning_rate": 2.706254250087126e-07, "loss": 0.4842, "step": 11962 }, { "epoch": 0.93, "grad_norm": 1.2629092714518357, "learning_rate": 2.7004513272647415e-07, "loss": 0.4513, "step": 11963 }, { "epoch": 0.93, "grad_norm": 1.3132943973643378, "learning_rate": 2.69465454746487e-07, "loss": 0.5086, "step": 11964 }, { "epoch": 0.93, "grad_norm": 1.3004609134524585, "learning_rate": 2.688863911053474e-07, "loss": 0.5084, "step": 11965 }, { "epoch": 0.93, "grad_norm": 1.2063437667533226, "learning_rate": 2.683079418396173e-07, "loss": 0.5034, "step": 11966 }, { "epoch": 0.93, "grad_norm": 1.208598727097031, "learning_rate": 2.6773010698581516e-07, "loss": 0.5736, "step": 11967 }, { "epoch": 0.93, "grad_norm": 1.1611731083505412, "learning_rate": 2.671528865804229e-07, "loss": 0.4801, "step": 11968 }, { "epoch": 0.93, "grad_norm": 1.170193903252669, "learning_rate": 2.6657628065988483e-07, "loss": 0.4723, "step": 11969 }, { "epoch": 0.93, "grad_norm": 1.087732930285538, "learning_rate": 2.6600028926060283e-07, "loss": 0.4972, "step": 11970 }, { "epoch": 0.93, "grad_norm": 1.1423176786494544, "learning_rate": 2.6542491241894454e-07, "loss": 0.4859, "step": 11971 }, { "epoch": 0.93, "grad_norm": 1.155597451079196, "learning_rate": 2.648501501712342e-07, "loss": 0.506, "step": 11972 }, { "epoch": 0.93, "grad_norm": 1.2153282058183568, "learning_rate": 2.6427600255376164e-07, "loss": 0.4823, "step": 11973 }, { "epoch": 0.93, "grad_norm": 1.1563043839320493, "learning_rate": 2.6370246960277344e-07, "loss": 0.4881, "step": 11974 }, { "epoch": 0.93, "grad_norm": 1.167189117798201, "learning_rate": 2.631295513544818e-07, "loss": 0.4691, "step": 11975 }, { "epoch": 0.93, "grad_norm": 1.1438665445286416, "learning_rate": 2.625572478450578e-07, "loss": 0.4951, "step": 11976 }, { "epoch": 0.93, "grad_norm": 1.3229689650323093, "learning_rate": 2.619855591106324e-07, "loss": 0.546, "step": 11977 }, { "epoch": 0.93, "grad_norm": 1.215251694656694, "learning_rate": 2.614144851873002e-07, "loss": 0.4954, "step": 11978 }, { "epoch": 0.93, "grad_norm": 1.274239550806064, "learning_rate": 2.608440261111178e-07, "loss": 0.5024, "step": 11979 }, { "epoch": 0.93, "grad_norm": 1.273374076153158, "learning_rate": 2.602741819180976e-07, "loss": 0.5202, "step": 11980 }, { "epoch": 0.93, "grad_norm": 1.1657967673550882, "learning_rate": 2.597049526442197e-07, "loss": 0.5017, "step": 11981 }, { "epoch": 0.93, "grad_norm": 1.2373461637342138, "learning_rate": 2.5913633832542083e-07, "loss": 0.5082, "step": 11982 }, { "epoch": 0.93, "grad_norm": 1.1556111710310504, "learning_rate": 2.5856833899760123e-07, "loss": 0.4943, "step": 11983 }, { "epoch": 0.93, "grad_norm": 1.176614307818194, "learning_rate": 2.580009546966211e-07, "loss": 0.4251, "step": 11984 }, { "epoch": 0.93, "grad_norm": 1.2693501504694302, "learning_rate": 2.574341854583029e-07, "loss": 0.5186, "step": 11985 }, { "epoch": 0.93, "grad_norm": 1.1877954015226257, "learning_rate": 2.568680313184302e-07, "loss": 0.5017, "step": 11986 }, { "epoch": 0.93, "grad_norm": 1.2849889025969692, "learning_rate": 2.563024923127477e-07, "loss": 0.5308, "step": 11987 }, { "epoch": 0.93, "grad_norm": 1.23379657568553, "learning_rate": 2.5573756847695806e-07, "loss": 0.506, "step": 11988 }, { "epoch": 0.93, "grad_norm": 1.1914391685065533, "learning_rate": 2.551732598467305e-07, "loss": 0.5038, "step": 11989 }, { "epoch": 0.93, "grad_norm": 1.1735511998993036, "learning_rate": 2.5460956645769085e-07, "loss": 0.5073, "step": 11990 }, { "epoch": 0.93, "grad_norm": 1.0828736809514763, "learning_rate": 2.5404648834542855e-07, "loss": 0.4625, "step": 11991 }, { "epoch": 0.93, "grad_norm": 1.2085040837804049, "learning_rate": 2.534840255454962e-07, "loss": 0.5056, "step": 11992 }, { "epoch": 0.93, "grad_norm": 1.2284209634914356, "learning_rate": 2.529221780933999e-07, "loss": 0.497, "step": 11993 }, { "epoch": 0.93, "grad_norm": 1.234403972044294, "learning_rate": 2.523609460246168e-07, "loss": 0.518, "step": 11994 }, { "epoch": 0.93, "grad_norm": 1.0907697674439658, "learning_rate": 2.5180032937457744e-07, "loss": 0.4483, "step": 11995 }, { "epoch": 0.93, "grad_norm": 1.2134257685555707, "learning_rate": 2.512403281786768e-07, "loss": 0.4758, "step": 11996 }, { "epoch": 0.93, "grad_norm": 1.1679209313443497, "learning_rate": 2.5068094247227227e-07, "loss": 0.4917, "step": 11997 }, { "epoch": 0.93, "grad_norm": 1.2417213956987827, "learning_rate": 2.501221722906799e-07, "loss": 0.5062, "step": 11998 }, { "epoch": 0.93, "grad_norm": 1.1856591864339103, "learning_rate": 2.4956401766917713e-07, "loss": 0.4469, "step": 11999 }, { "epoch": 0.93, "grad_norm": 1.2145081574789132, "learning_rate": 2.4900647864300353e-07, "loss": 0.5042, "step": 12000 }, { "epoch": 0.93, "grad_norm": 1.190122719695915, "learning_rate": 2.4844955524735983e-07, "loss": 0.5048, "step": 12001 }, { "epoch": 0.93, "grad_norm": 1.1787707575146524, "learning_rate": 2.4789324751740674e-07, "loss": 0.4698, "step": 12002 }, { "epoch": 0.93, "grad_norm": 1.2061319303294689, "learning_rate": 2.4733755548826734e-07, "loss": 0.4878, "step": 12003 }, { "epoch": 0.93, "grad_norm": 1.2186741927608036, "learning_rate": 2.467824791950246e-07, "loss": 0.4798, "step": 12004 }, { "epoch": 0.93, "grad_norm": 1.2429860745031263, "learning_rate": 2.4622801867272395e-07, "loss": 0.5458, "step": 12005 }, { "epoch": 0.93, "grad_norm": 1.21187530627746, "learning_rate": 2.456741739563717e-07, "loss": 0.4979, "step": 12006 }, { "epoch": 0.93, "grad_norm": 1.2818807584502108, "learning_rate": 2.4512094508093553e-07, "loss": 0.512, "step": 12007 }, { "epoch": 0.93, "grad_norm": 1.3008086513806378, "learning_rate": 2.445683320813408e-07, "loss": 0.505, "step": 12008 }, { "epoch": 0.93, "grad_norm": 1.165823097883934, "learning_rate": 2.4401633499248065e-07, "loss": 0.5171, "step": 12009 }, { "epoch": 0.93, "grad_norm": 1.2662404765517519, "learning_rate": 2.434649538492018e-07, "loss": 0.5399, "step": 12010 }, { "epoch": 0.93, "grad_norm": 1.1070861845849567, "learning_rate": 2.4291418868631845e-07, "loss": 0.4199, "step": 12011 }, { "epoch": 0.93, "grad_norm": 1.074086573011623, "learning_rate": 2.423640395386018e-07, "loss": 0.4101, "step": 12012 }, { "epoch": 0.93, "grad_norm": 1.3085612278782843, "learning_rate": 2.418145064407862e-07, "loss": 0.5049, "step": 12013 }, { "epoch": 0.93, "grad_norm": 1.2489764791553917, "learning_rate": 2.4126558942756617e-07, "loss": 0.5287, "step": 12014 }, { "epoch": 0.93, "grad_norm": 1.248631347960901, "learning_rate": 2.407172885335984e-07, "loss": 0.479, "step": 12015 }, { "epoch": 0.93, "grad_norm": 1.241694466495158, "learning_rate": 2.401696037934997e-07, "loss": 0.5094, "step": 12016 }, { "epoch": 0.93, "grad_norm": 1.2000223257054292, "learning_rate": 2.39622535241848e-07, "loss": 0.4584, "step": 12017 }, { "epoch": 0.93, "grad_norm": 1.4087392180740177, "learning_rate": 2.3907608291318217e-07, "loss": 0.5408, "step": 12018 }, { "epoch": 0.93, "grad_norm": 1.0935510181897552, "learning_rate": 2.3853024684200363e-07, "loss": 0.45, "step": 12019 }, { "epoch": 0.93, "grad_norm": 1.2200969686431282, "learning_rate": 2.379850270627726e-07, "loss": 0.5164, "step": 12020 }, { "epoch": 0.93, "grad_norm": 1.1740251778789763, "learning_rate": 2.3744042360991149e-07, "loss": 0.4775, "step": 12021 }, { "epoch": 0.93, "grad_norm": 1.1981639883177975, "learning_rate": 2.3689643651780614e-07, "loss": 0.4422, "step": 12022 }, { "epoch": 0.93, "grad_norm": 1.311206952707571, "learning_rate": 2.363530658207991e-07, "loss": 0.5161, "step": 12023 }, { "epoch": 0.93, "grad_norm": 1.3359648049224406, "learning_rate": 2.3581031155319622e-07, "loss": 0.4915, "step": 12024 }, { "epoch": 0.93, "grad_norm": 1.2361912945423417, "learning_rate": 2.3526817374926457e-07, "loss": 0.5389, "step": 12025 }, { "epoch": 0.93, "grad_norm": 1.2609622447104212, "learning_rate": 2.3472665244323346e-07, "loss": 0.5958, "step": 12026 }, { "epoch": 0.93, "grad_norm": 1.2986885167861468, "learning_rate": 2.3418574766929215e-07, "loss": 0.4672, "step": 12027 }, { "epoch": 0.93, "grad_norm": 1.194463740175709, "learning_rate": 2.336454594615878e-07, "loss": 0.5082, "step": 12028 }, { "epoch": 0.93, "grad_norm": 1.2091560303535505, "learning_rate": 2.331057878542342e-07, "loss": 0.481, "step": 12029 }, { "epoch": 0.93, "grad_norm": 1.3155809753010863, "learning_rate": 2.3256673288130194e-07, "loss": 0.5244, "step": 12030 }, { "epoch": 0.93, "grad_norm": 1.2707554006392356, "learning_rate": 2.3202829457682597e-07, "loss": 0.5044, "step": 12031 }, { "epoch": 0.93, "grad_norm": 1.2441987842997217, "learning_rate": 2.314904729748002e-07, "loss": 0.5169, "step": 12032 }, { "epoch": 0.93, "grad_norm": 1.3247685374215392, "learning_rate": 2.309532681091786e-07, "loss": 0.5618, "step": 12033 }, { "epoch": 0.93, "grad_norm": 1.1598597703228415, "learning_rate": 2.3041668001387852e-07, "loss": 0.4502, "step": 12034 }, { "epoch": 0.93, "grad_norm": 1.146899328571442, "learning_rate": 2.2988070872277834e-07, "loss": 0.4714, "step": 12035 }, { "epoch": 0.93, "grad_norm": 1.446801980073105, "learning_rate": 2.293453542697166e-07, "loss": 0.5608, "step": 12036 }, { "epoch": 0.93, "grad_norm": 1.3620473092162897, "learning_rate": 2.2881061668849292e-07, "loss": 0.4841, "step": 12037 }, { "epoch": 0.93, "grad_norm": 1.2709417401128855, "learning_rate": 2.2827649601286693e-07, "loss": 0.559, "step": 12038 }, { "epoch": 0.93, "grad_norm": 1.1602017685922585, "learning_rate": 2.2774299227656282e-07, "loss": 0.4496, "step": 12039 }, { "epoch": 0.93, "grad_norm": 1.0776044169235615, "learning_rate": 2.272101055132603e-07, "loss": 0.4796, "step": 12040 }, { "epoch": 0.93, "grad_norm": 1.2741059029367194, "learning_rate": 2.2667783575660463e-07, "loss": 0.5094, "step": 12041 }, { "epoch": 0.93, "grad_norm": 1.2534939096863569, "learning_rate": 2.2614618304020118e-07, "loss": 0.5361, "step": 12042 }, { "epoch": 0.93, "grad_norm": 1.1791482160133449, "learning_rate": 2.2561514739761649e-07, "loss": 0.4875, "step": 12043 }, { "epoch": 0.93, "grad_norm": 1.2272947033285686, "learning_rate": 2.2508472886237586e-07, "loss": 0.5262, "step": 12044 }, { "epoch": 0.93, "grad_norm": 1.1974734052918359, "learning_rate": 2.2455492746796814e-07, "loss": 0.4956, "step": 12045 }, { "epoch": 0.93, "grad_norm": 1.1009479729454479, "learning_rate": 2.240257432478421e-07, "loss": 0.47, "step": 12046 }, { "epoch": 0.93, "grad_norm": 1.2522820146351907, "learning_rate": 2.234971762354099e-07, "loss": 0.5039, "step": 12047 }, { "epoch": 0.93, "grad_norm": 1.3355769959294632, "learning_rate": 2.229692264640404e-07, "loss": 0.5158, "step": 12048 }, { "epoch": 0.93, "grad_norm": 1.1656456237277526, "learning_rate": 2.2244189396706695e-07, "loss": 0.5079, "step": 12049 }, { "epoch": 0.93, "grad_norm": 1.2153635170376567, "learning_rate": 2.219151787777807e-07, "loss": 0.5088, "step": 12050 }, { "epoch": 0.93, "grad_norm": 1.2055785687228227, "learning_rate": 2.2138908092943834e-07, "loss": 0.5088, "step": 12051 }, { "epoch": 0.93, "grad_norm": 1.1917718545243419, "learning_rate": 2.2086360045525444e-07, "loss": 0.4959, "step": 12052 }, { "epoch": 0.94, "grad_norm": 1.3766810804509986, "learning_rate": 2.2033873738840584e-07, "loss": 0.4894, "step": 12053 }, { "epoch": 0.94, "grad_norm": 1.1321325431876532, "learning_rate": 2.1981449176202818e-07, "loss": 0.4704, "step": 12054 }, { "epoch": 0.94, "grad_norm": 1.1921969432131843, "learning_rate": 2.192908636092206e-07, "loss": 0.4869, "step": 12055 }, { "epoch": 0.94, "grad_norm": 1.2414569745944832, "learning_rate": 2.1876785296304214e-07, "loss": 0.5467, "step": 12056 }, { "epoch": 0.94, "grad_norm": 1.124624825544048, "learning_rate": 2.1824545985651535e-07, "loss": 0.4943, "step": 12057 }, { "epoch": 0.94, "grad_norm": 1.2874571080146386, "learning_rate": 2.1772368432261935e-07, "loss": 0.4755, "step": 12058 }, { "epoch": 0.94, "grad_norm": 1.155986188743347, "learning_rate": 2.1720252639429674e-07, "loss": 0.4736, "step": 12059 }, { "epoch": 0.94, "grad_norm": 1.2450519858847429, "learning_rate": 2.1668198610445114e-07, "loss": 0.4918, "step": 12060 }, { "epoch": 0.94, "grad_norm": 1.0777794104385683, "learning_rate": 2.1616206348594737e-07, "loss": 0.4503, "step": 12061 }, { "epoch": 0.94, "grad_norm": 1.2460140095132886, "learning_rate": 2.1564275857160922e-07, "loss": 0.4804, "step": 12062 }, { "epoch": 0.94, "grad_norm": 1.1207008337859716, "learning_rate": 2.151240713942271e-07, "loss": 0.4991, "step": 12063 }, { "epoch": 0.94, "grad_norm": 1.2149277930918623, "learning_rate": 2.1460600198654368e-07, "loss": 0.49, "step": 12064 }, { "epoch": 0.94, "grad_norm": 1.17303587317349, "learning_rate": 2.1408855038126953e-07, "loss": 0.4569, "step": 12065 }, { "epoch": 0.94, "grad_norm": 1.21212734709437, "learning_rate": 2.135717166110729e-07, "loss": 0.4933, "step": 12066 }, { "epoch": 0.94, "grad_norm": 1.0934133283980239, "learning_rate": 2.1305550070858773e-07, "loss": 0.4728, "step": 12067 }, { "epoch": 0.94, "grad_norm": 1.3152276261295004, "learning_rate": 2.1253990270640013e-07, "loss": 0.5402, "step": 12068 }, { "epoch": 0.94, "grad_norm": 1.2297246226968586, "learning_rate": 2.1202492263706743e-07, "loss": 0.5255, "step": 12069 }, { "epoch": 0.94, "grad_norm": 1.1621308140420006, "learning_rate": 2.1151056053309915e-07, "loss": 0.4744, "step": 12070 }, { "epoch": 0.94, "grad_norm": 1.225138426764048, "learning_rate": 2.1099681642697156e-07, "loss": 0.4856, "step": 12071 }, { "epoch": 0.94, "grad_norm": 1.1916113004738296, "learning_rate": 2.104836903511198e-07, "loss": 0.4949, "step": 12072 }, { "epoch": 0.94, "grad_norm": 1.3308733608808658, "learning_rate": 2.0997118233794023e-07, "loss": 0.4825, "step": 12073 }, { "epoch": 0.94, "grad_norm": 1.1602075225083806, "learning_rate": 2.0945929241978913e-07, "loss": 0.4781, "step": 12074 }, { "epoch": 0.94, "grad_norm": 1.176962832244029, "learning_rate": 2.089480206289851e-07, "loss": 0.5153, "step": 12075 }, { "epoch": 0.94, "grad_norm": 1.2696486668207583, "learning_rate": 2.0843736699780792e-07, "loss": 0.4938, "step": 12076 }, { "epoch": 0.94, "grad_norm": 1.2157514315170592, "learning_rate": 2.0792733155849842e-07, "loss": 0.5243, "step": 12077 }, { "epoch": 0.94, "grad_norm": 1.1521961699558692, "learning_rate": 2.074179143432564e-07, "loss": 0.4574, "step": 12078 }, { "epoch": 0.94, "grad_norm": 1.1489113589126132, "learning_rate": 2.0690911538424507e-07, "loss": 0.4659, "step": 12079 }, { "epoch": 0.94, "grad_norm": 1.195690967755979, "learning_rate": 2.0640093471358648e-07, "loss": 0.5091, "step": 12080 }, { "epoch": 0.94, "grad_norm": 1.20140458000584, "learning_rate": 2.0589337236336493e-07, "loss": 0.5135, "step": 12081 }, { "epoch": 0.94, "grad_norm": 1.3190972869775741, "learning_rate": 2.0538642836562484e-07, "loss": 0.4888, "step": 12082 }, { "epoch": 0.94, "grad_norm": 1.2585202234103547, "learning_rate": 2.0488010275237502e-07, "loss": 0.5402, "step": 12083 }, { "epoch": 0.94, "grad_norm": 1.3221546391348353, "learning_rate": 2.0437439555557993e-07, "loss": 0.5438, "step": 12084 }, { "epoch": 0.94, "grad_norm": 1.203023782410991, "learning_rate": 2.0386930680716732e-07, "loss": 0.4829, "step": 12085 }, { "epoch": 0.94, "grad_norm": 1.2283256153362836, "learning_rate": 2.0336483653902727e-07, "loss": 0.515, "step": 12086 }, { "epoch": 0.94, "grad_norm": 1.2224562118738795, "learning_rate": 2.0286098478300986e-07, "loss": 0.4931, "step": 12087 }, { "epoch": 0.94, "grad_norm": 1.1949322164137284, "learning_rate": 2.0235775157092407e-07, "loss": 0.4824, "step": 12088 }, { "epoch": 0.94, "grad_norm": 1.3721185750380827, "learning_rate": 2.0185513693454338e-07, "loss": 0.5229, "step": 12089 }, { "epoch": 0.94, "grad_norm": 1.2677016469903972, "learning_rate": 2.01353140905598e-07, "loss": 0.5052, "step": 12090 }, { "epoch": 0.94, "grad_norm": 1.088415159817075, "learning_rate": 2.0085176351578472e-07, "loss": 0.482, "step": 12091 }, { "epoch": 0.94, "grad_norm": 1.2008831390457646, "learning_rate": 2.0035100479675607e-07, "loss": 0.4701, "step": 12092 }, { "epoch": 0.94, "grad_norm": 1.3032987135771403, "learning_rate": 1.9985086478012782e-07, "loss": 0.4644, "step": 12093 }, { "epoch": 0.94, "grad_norm": 1.2269952095827388, "learning_rate": 1.9935134349747698e-07, "loss": 0.5035, "step": 12094 }, { "epoch": 0.94, "grad_norm": 1.2716009552419554, "learning_rate": 1.9885244098034052e-07, "loss": 0.5514, "step": 12095 }, { "epoch": 0.94, "grad_norm": 1.1780067748796237, "learning_rate": 1.9835415726021656e-07, "loss": 0.489, "step": 12096 }, { "epoch": 0.94, "grad_norm": 1.1294849852652744, "learning_rate": 1.9785649236856442e-07, "loss": 0.4691, "step": 12097 }, { "epoch": 0.94, "grad_norm": 1.1899185147155678, "learning_rate": 1.9735944633680448e-07, "loss": 0.4463, "step": 12098 }, { "epoch": 0.94, "grad_norm": 1.1943997157886674, "learning_rate": 1.9686301919631833e-07, "loss": 0.4662, "step": 12099 }, { "epoch": 0.94, "grad_norm": 1.3422750317915735, "learning_rate": 1.9636721097844648e-07, "loss": 0.573, "step": 12100 }, { "epoch": 0.94, "grad_norm": 1.1873344004760293, "learning_rate": 1.9587202171449272e-07, "loss": 0.4481, "step": 12101 }, { "epoch": 0.94, "grad_norm": 1.2233954684271022, "learning_rate": 1.9537745143572096e-07, "loss": 0.5118, "step": 12102 }, { "epoch": 0.94, "grad_norm": 1.3482171259825788, "learning_rate": 1.948835001733551e-07, "loss": 0.4809, "step": 12103 }, { "epoch": 0.94, "grad_norm": 1.2038698244123216, "learning_rate": 1.9439016795858357e-07, "loss": 0.5116, "step": 12104 }, { "epoch": 0.94, "grad_norm": 1.1548691829894526, "learning_rate": 1.9389745482254918e-07, "loss": 0.5095, "step": 12105 }, { "epoch": 0.94, "grad_norm": 1.187787774001014, "learning_rate": 1.9340536079636263e-07, "loss": 0.5525, "step": 12106 }, { "epoch": 0.94, "grad_norm": 1.199857212551977, "learning_rate": 1.9291388591109017e-07, "loss": 0.5203, "step": 12107 }, { "epoch": 0.94, "grad_norm": 1.2188206676633773, "learning_rate": 1.9242303019776253e-07, "loss": 0.5279, "step": 12108 }, { "epoch": 0.94, "grad_norm": 1.220417837756893, "learning_rate": 1.919327936873694e-07, "loss": 0.5136, "step": 12109 }, { "epoch": 0.94, "grad_norm": 1.1514269268919335, "learning_rate": 1.9144317641086152e-07, "loss": 0.5104, "step": 12110 }, { "epoch": 0.94, "grad_norm": 1.3972273977338436, "learning_rate": 1.9095417839915198e-07, "loss": 0.5184, "step": 12111 }, { "epoch": 0.94, "grad_norm": 1.2253323843640653, "learning_rate": 1.9046579968311274e-07, "loss": 0.4759, "step": 12112 }, { "epoch": 0.94, "grad_norm": 1.2712405860274745, "learning_rate": 1.8997804029357801e-07, "loss": 0.4777, "step": 12113 }, { "epoch": 0.94, "grad_norm": 1.2710229179843224, "learning_rate": 1.894909002613432e-07, "loss": 0.4859, "step": 12114 }, { "epoch": 0.94, "grad_norm": 1.2055270504157332, "learning_rate": 1.8900437961716257e-07, "loss": 0.5429, "step": 12115 }, { "epoch": 0.94, "grad_norm": 1.2961122958722953, "learning_rate": 1.8851847839175375e-07, "loss": 0.5402, "step": 12116 }, { "epoch": 0.94, "grad_norm": 1.28574777457356, "learning_rate": 1.8803319661579554e-07, "loss": 0.515, "step": 12117 }, { "epoch": 0.94, "grad_norm": 1.1895949806850161, "learning_rate": 1.8754853431992348e-07, "loss": 0.4661, "step": 12118 }, { "epoch": 0.94, "grad_norm": 1.2118984224476805, "learning_rate": 1.870644915347386e-07, "loss": 0.5025, "step": 12119 }, { "epoch": 0.94, "grad_norm": 1.143400447356674, "learning_rate": 1.865810682907987e-07, "loss": 0.4463, "step": 12120 }, { "epoch": 0.94, "grad_norm": 1.2652369716767213, "learning_rate": 1.8609826461862824e-07, "loss": 0.4942, "step": 12121 }, { "epoch": 0.94, "grad_norm": 1.1889098983068962, "learning_rate": 1.8561608054870615e-07, "loss": 0.4918, "step": 12122 }, { "epoch": 0.94, "grad_norm": 1.0636012597097029, "learning_rate": 1.8513451611147704e-07, "loss": 0.4285, "step": 12123 }, { "epoch": 0.94, "grad_norm": 1.2256293178951103, "learning_rate": 1.8465357133734542e-07, "loss": 0.5635, "step": 12124 }, { "epoch": 0.94, "grad_norm": 1.2966667375752512, "learning_rate": 1.8417324625667254e-07, "loss": 0.488, "step": 12125 }, { "epoch": 0.94, "grad_norm": 1.220424675273221, "learning_rate": 1.8369354089978643e-07, "loss": 0.4745, "step": 12126 }, { "epoch": 0.94, "grad_norm": 1.2795046572076876, "learning_rate": 1.832144552969728e-07, "loss": 0.5058, "step": 12127 }, { "epoch": 0.94, "grad_norm": 1.2634688484025987, "learning_rate": 1.827359894784775e-07, "loss": 0.5507, "step": 12128 }, { "epoch": 0.94, "grad_norm": 1.1381632223315028, "learning_rate": 1.8225814347451077e-07, "loss": 0.4786, "step": 12129 }, { "epoch": 0.94, "grad_norm": 1.1123390359908596, "learning_rate": 1.817809173152396e-07, "loss": 0.4237, "step": 12130 }, { "epoch": 0.94, "grad_norm": 1.1959656570186052, "learning_rate": 1.8130431103079437e-07, "loss": 0.505, "step": 12131 }, { "epoch": 0.94, "grad_norm": 1.2462354716962607, "learning_rate": 1.8082832465126544e-07, "loss": 0.5069, "step": 12132 }, { "epoch": 0.94, "grad_norm": 1.1986920818700946, "learning_rate": 1.803529582067054e-07, "loss": 0.4646, "step": 12133 }, { "epoch": 0.94, "grad_norm": 1.3195592322807088, "learning_rate": 1.7987821172712584e-07, "loss": 0.5974, "step": 12134 }, { "epoch": 0.94, "grad_norm": 1.312850950640985, "learning_rate": 1.7940408524249942e-07, "loss": 0.5147, "step": 12135 }, { "epoch": 0.94, "grad_norm": 1.2052311977810213, "learning_rate": 1.789305787827611e-07, "loss": 0.5016, "step": 12136 }, { "epoch": 0.94, "grad_norm": 1.314242704647338, "learning_rate": 1.7845769237780585e-07, "loss": 0.5303, "step": 12137 }, { "epoch": 0.94, "grad_norm": 1.251032927030242, "learning_rate": 1.779854260574887e-07, "loss": 0.5468, "step": 12138 }, { "epoch": 0.94, "grad_norm": 1.2081209467441474, "learning_rate": 1.7751377985162689e-07, "loss": 0.448, "step": 12139 }, { "epoch": 0.94, "grad_norm": 1.0903966451479938, "learning_rate": 1.770427537899966e-07, "loss": 0.4337, "step": 12140 }, { "epoch": 0.94, "grad_norm": 1.2070133738752629, "learning_rate": 1.7657234790233736e-07, "loss": 0.4701, "step": 12141 }, { "epoch": 0.94, "grad_norm": 1.1493545815011306, "learning_rate": 1.7610256221834765e-07, "loss": 0.506, "step": 12142 }, { "epoch": 0.94, "grad_norm": 1.2990020406059466, "learning_rate": 1.756333967676882e-07, "loss": 0.5221, "step": 12143 }, { "epoch": 0.94, "grad_norm": 1.2151532039675281, "learning_rate": 1.7516485157997975e-07, "loss": 0.5042, "step": 12144 }, { "epoch": 0.94, "grad_norm": 1.4123089281328949, "learning_rate": 1.746969266848031e-07, "loss": 0.5706, "step": 12145 }, { "epoch": 0.94, "grad_norm": 1.2871002058012264, "learning_rate": 1.7422962211170236e-07, "loss": 0.543, "step": 12146 }, { "epoch": 0.94, "grad_norm": 1.2048201570809265, "learning_rate": 1.737629378901795e-07, "loss": 0.5779, "step": 12147 }, { "epoch": 0.94, "grad_norm": 1.1451704304051766, "learning_rate": 1.7329687404969874e-07, "loss": 0.4568, "step": 12148 }, { "epoch": 0.94, "grad_norm": 1.0856477090044963, "learning_rate": 1.7283143061968654e-07, "loss": 0.4355, "step": 12149 }, { "epoch": 0.94, "grad_norm": 1.2774099163685264, "learning_rate": 1.7236660762952606e-07, "loss": 0.5469, "step": 12150 }, { "epoch": 0.94, "grad_norm": 1.2163923886292756, "learning_rate": 1.7190240510856605e-07, "loss": 0.4998, "step": 12151 }, { "epoch": 0.94, "grad_norm": 1.180338395783769, "learning_rate": 1.7143882308611305e-07, "loss": 0.4558, "step": 12152 }, { "epoch": 0.94, "grad_norm": 1.3467759062741584, "learning_rate": 1.7097586159143698e-07, "loss": 0.5154, "step": 12153 }, { "epoch": 0.94, "grad_norm": 1.2212399210362663, "learning_rate": 1.7051352065376448e-07, "loss": 0.4634, "step": 12154 }, { "epoch": 0.94, "grad_norm": 1.2382064941175563, "learning_rate": 1.7005180030228886e-07, "loss": 0.4759, "step": 12155 }, { "epoch": 0.94, "grad_norm": 1.17365907286405, "learning_rate": 1.695907005661568e-07, "loss": 0.501, "step": 12156 }, { "epoch": 0.94, "grad_norm": 1.2413998871934906, "learning_rate": 1.691302214744839e-07, "loss": 0.4929, "step": 12157 }, { "epoch": 0.94, "grad_norm": 1.2592669779072443, "learning_rate": 1.6867036305634022e-07, "loss": 0.5177, "step": 12158 }, { "epoch": 0.94, "grad_norm": 1.305444235260479, "learning_rate": 1.6821112534075924e-07, "loss": 0.5249, "step": 12159 }, { "epoch": 0.94, "grad_norm": 1.2793911731705891, "learning_rate": 1.6775250835673552e-07, "loss": 0.5262, "step": 12160 }, { "epoch": 0.94, "grad_norm": 1.211365458021725, "learning_rate": 1.6729451213322255e-07, "loss": 0.4999, "step": 12161 }, { "epoch": 0.94, "grad_norm": 1.2238898346042506, "learning_rate": 1.6683713669913836e-07, "loss": 0.5112, "step": 12162 }, { "epoch": 0.94, "grad_norm": 1.2393212508850167, "learning_rate": 1.6638038208335762e-07, "loss": 0.507, "step": 12163 }, { "epoch": 0.94, "grad_norm": 1.2889482852159195, "learning_rate": 1.6592424831471832e-07, "loss": 0.5763, "step": 12164 }, { "epoch": 0.94, "grad_norm": 1.204404819042177, "learning_rate": 1.6546873542201858e-07, "loss": 0.4693, "step": 12165 }, { "epoch": 0.94, "grad_norm": 1.1430678141510713, "learning_rate": 1.650138434340165e-07, "loss": 0.4736, "step": 12166 }, { "epoch": 0.94, "grad_norm": 1.1266679584808017, "learning_rate": 1.6455957237943354e-07, "loss": 0.5101, "step": 12167 }, { "epoch": 0.94, "grad_norm": 1.2498853154024294, "learning_rate": 1.6410592228694788e-07, "loss": 0.4867, "step": 12168 }, { "epoch": 0.94, "grad_norm": 1.1998884586311045, "learning_rate": 1.6365289318520216e-07, "loss": 0.4719, "step": 12169 }, { "epoch": 0.94, "grad_norm": 1.1959578822511996, "learning_rate": 1.6320048510279906e-07, "loss": 0.4996, "step": 12170 }, { "epoch": 0.94, "grad_norm": 1.3564610435721631, "learning_rate": 1.6274869806829906e-07, "loss": 0.537, "step": 12171 }, { "epoch": 0.94, "grad_norm": 1.2130775491906285, "learning_rate": 1.6229753211022825e-07, "loss": 0.4888, "step": 12172 }, { "epoch": 0.94, "grad_norm": 1.10593094616037, "learning_rate": 1.6184698725706938e-07, "loss": 0.4739, "step": 12173 }, { "epoch": 0.94, "grad_norm": 1.1251039456983958, "learning_rate": 1.613970635372686e-07, "loss": 0.4732, "step": 12174 }, { "epoch": 0.94, "grad_norm": 1.208432812816052, "learning_rate": 1.6094776097923205e-07, "loss": 0.4979, "step": 12175 }, { "epoch": 0.94, "grad_norm": 1.228300576120459, "learning_rate": 1.6049907961132595e-07, "loss": 0.5279, "step": 12176 }, { "epoch": 0.94, "grad_norm": 1.178895242183351, "learning_rate": 1.6005101946187873e-07, "loss": 0.4616, "step": 12177 }, { "epoch": 0.94, "grad_norm": 1.2592580319743523, "learning_rate": 1.596035805591778e-07, "loss": 0.5268, "step": 12178 }, { "epoch": 0.94, "grad_norm": 1.2461039862615906, "learning_rate": 1.5915676293147275e-07, "loss": 0.5007, "step": 12179 }, { "epoch": 0.94, "grad_norm": 1.1933419933395568, "learning_rate": 1.5871056660697326e-07, "loss": 0.4544, "step": 12180 }, { "epoch": 0.94, "grad_norm": 1.2475679102361281, "learning_rate": 1.58264991613849e-07, "loss": 0.5029, "step": 12181 }, { "epoch": 0.95, "grad_norm": 1.1796509187597046, "learning_rate": 1.5782003798023306e-07, "loss": 0.4684, "step": 12182 }, { "epoch": 0.95, "grad_norm": 1.1946990489675904, "learning_rate": 1.5737570573421735e-07, "loss": 0.5202, "step": 12183 }, { "epoch": 0.95, "grad_norm": 1.237524047772037, "learning_rate": 1.56931994903855e-07, "loss": 0.5776, "step": 12184 }, { "epoch": 0.95, "grad_norm": 1.1674334980487466, "learning_rate": 1.5648890551715924e-07, "loss": 0.4949, "step": 12185 }, { "epoch": 0.95, "grad_norm": 1.3034596404575625, "learning_rate": 1.5604643760210426e-07, "loss": 0.4857, "step": 12186 }, { "epoch": 0.95, "grad_norm": 1.1972559668694358, "learning_rate": 1.5560459118662663e-07, "loss": 0.5006, "step": 12187 }, { "epoch": 0.95, "grad_norm": 1.1583195799873276, "learning_rate": 1.5516336629862073e-07, "loss": 0.4864, "step": 12188 }, { "epoch": 0.95, "grad_norm": 1.302249206150199, "learning_rate": 1.5472276296594424e-07, "loss": 0.5478, "step": 12189 }, { "epoch": 0.95, "grad_norm": 1.4013798198064138, "learning_rate": 1.5428278121641494e-07, "loss": 0.5301, "step": 12190 }, { "epoch": 0.95, "grad_norm": 1.1608281100708564, "learning_rate": 1.538434210778106e-07, "loss": 0.5069, "step": 12191 }, { "epoch": 0.95, "grad_norm": 1.3397885414070871, "learning_rate": 1.5340468257787012e-07, "loss": 0.5336, "step": 12192 }, { "epoch": 0.95, "grad_norm": 1.2724435121480506, "learning_rate": 1.5296656574429469e-07, "loss": 0.5334, "step": 12193 }, { "epoch": 0.95, "grad_norm": 1.240427365643985, "learning_rate": 1.5252907060474332e-07, "loss": 0.5081, "step": 12194 }, { "epoch": 0.95, "grad_norm": 1.1448613234418503, "learning_rate": 1.5209219718683833e-07, "loss": 0.509, "step": 12195 }, { "epoch": 0.95, "grad_norm": 1.1004543276525134, "learning_rate": 1.5165594551816209e-07, "loss": 0.4182, "step": 12196 }, { "epoch": 0.95, "grad_norm": 1.2538325683241198, "learning_rate": 1.5122031562625593e-07, "loss": 0.5403, "step": 12197 }, { "epoch": 0.95, "grad_norm": 1.1693993931748687, "learning_rate": 1.5078530753862453e-07, "loss": 0.4503, "step": 12198 }, { "epoch": 0.95, "grad_norm": 1.0690122628360594, "learning_rate": 1.5035092128273144e-07, "loss": 0.4536, "step": 12199 }, { "epoch": 0.95, "grad_norm": 1.287949191130858, "learning_rate": 1.4991715688600362e-07, "loss": 0.5689, "step": 12200 }, { "epoch": 0.95, "grad_norm": 1.1418118507533332, "learning_rate": 1.494840143758236e-07, "loss": 0.4651, "step": 12201 }, { "epoch": 0.95, "grad_norm": 1.2050147140364351, "learning_rate": 1.4905149377954064e-07, "loss": 0.5358, "step": 12202 }, { "epoch": 0.95, "grad_norm": 1.2112102570523997, "learning_rate": 1.4861959512446067e-07, "loss": 0.4725, "step": 12203 }, { "epoch": 0.95, "grad_norm": 1.1682220490514175, "learning_rate": 1.4818831843785297e-07, "loss": 0.4725, "step": 12204 }, { "epoch": 0.95, "grad_norm": 1.2655820957022974, "learning_rate": 1.4775766374694466e-07, "loss": 0.5276, "step": 12205 }, { "epoch": 0.95, "grad_norm": 1.2657145244818975, "learning_rate": 1.4732763107892734e-07, "loss": 0.5174, "step": 12206 }, { "epoch": 0.95, "grad_norm": 1.193764925159402, "learning_rate": 1.4689822046094816e-07, "loss": 0.4801, "step": 12207 }, { "epoch": 0.95, "grad_norm": 1.3143625214002959, "learning_rate": 1.4646943192011986e-07, "loss": 0.5459, "step": 12208 }, { "epoch": 0.95, "grad_norm": 1.2619091161959535, "learning_rate": 1.4604126548351416e-07, "loss": 0.5323, "step": 12209 }, { "epoch": 0.95, "grad_norm": 1.3238760355600567, "learning_rate": 1.4561372117816276e-07, "loss": 0.5621, "step": 12210 }, { "epoch": 0.95, "grad_norm": 1.2289468223824649, "learning_rate": 1.451867990310596e-07, "loss": 0.4882, "step": 12211 }, { "epoch": 0.95, "grad_norm": 1.2559498328680467, "learning_rate": 1.4476049906915756e-07, "loss": 0.4945, "step": 12212 }, { "epoch": 0.95, "grad_norm": 1.1901960386156154, "learning_rate": 1.4433482131937183e-07, "loss": 0.4715, "step": 12213 }, { "epoch": 0.95, "grad_norm": 1.2658871508708545, "learning_rate": 1.439097658085764e-07, "loss": 0.5432, "step": 12214 }, { "epoch": 0.95, "grad_norm": 1.3043492044220428, "learning_rate": 1.4348533256360985e-07, "loss": 0.4938, "step": 12215 }, { "epoch": 0.95, "grad_norm": 1.274175979766508, "learning_rate": 1.430615216112674e-07, "loss": 0.4766, "step": 12216 }, { "epoch": 0.95, "grad_norm": 1.1647408064466906, "learning_rate": 1.426383329783043e-07, "loss": 0.4779, "step": 12217 }, { "epoch": 0.95, "grad_norm": 1.2916490902268833, "learning_rate": 1.4221576669144144e-07, "loss": 0.5268, "step": 12218 }, { "epoch": 0.95, "grad_norm": 1.3536018024880108, "learning_rate": 1.417938227773563e-07, "loss": 0.5474, "step": 12219 }, { "epoch": 0.95, "grad_norm": 1.2916682868865321, "learning_rate": 1.4137250126268876e-07, "loss": 0.5385, "step": 12220 }, { "epoch": 0.95, "grad_norm": 1.172899586681365, "learning_rate": 1.409518021740408e-07, "loss": 0.4714, "step": 12221 }, { "epoch": 0.95, "grad_norm": 1.2524518761745613, "learning_rate": 1.4053172553797012e-07, "loss": 0.5316, "step": 12222 }, { "epoch": 0.95, "grad_norm": 1.2400412581870919, "learning_rate": 1.4011227138099882e-07, "loss": 0.4826, "step": 12223 }, { "epoch": 0.95, "grad_norm": 1.151157712768384, "learning_rate": 1.3969343972961124e-07, "loss": 0.4675, "step": 12224 }, { "epoch": 0.95, "grad_norm": 1.2691926944908332, "learning_rate": 1.3927523061024962e-07, "loss": 0.538, "step": 12225 }, { "epoch": 0.95, "grad_norm": 1.2552149232978944, "learning_rate": 1.3885764404931835e-07, "loss": 0.5108, "step": 12226 }, { "epoch": 0.95, "grad_norm": 1.1463036467832868, "learning_rate": 1.3844068007317856e-07, "loss": 0.4776, "step": 12227 }, { "epoch": 0.95, "grad_norm": 1.2764279877148006, "learning_rate": 1.3802433870815922e-07, "loss": 0.5208, "step": 12228 }, { "epoch": 0.95, "grad_norm": 1.1551585329751224, "learning_rate": 1.3760861998054264e-07, "loss": 0.4995, "step": 12229 }, { "epoch": 0.95, "grad_norm": 1.244411468967765, "learning_rate": 1.3719352391657893e-07, "loss": 0.5064, "step": 12230 }, { "epoch": 0.95, "grad_norm": 1.115744612663472, "learning_rate": 1.3677905054247265e-07, "loss": 0.4775, "step": 12231 }, { "epoch": 0.95, "grad_norm": 1.1452109756332838, "learning_rate": 1.363651998843929e-07, "loss": 0.4523, "step": 12232 }, { "epoch": 0.95, "grad_norm": 1.2152056875493829, "learning_rate": 1.3595197196846655e-07, "loss": 0.4958, "step": 12233 }, { "epoch": 0.95, "grad_norm": 1.38415849770913, "learning_rate": 1.3553936682078494e-07, "loss": 0.5255, "step": 12234 }, { "epoch": 0.95, "grad_norm": 1.298169374841616, "learning_rate": 1.3512738446739726e-07, "loss": 0.5321, "step": 12235 }, { "epoch": 0.95, "grad_norm": 1.1781939215699344, "learning_rate": 1.347160249343149e-07, "loss": 0.4636, "step": 12236 }, { "epoch": 0.95, "grad_norm": 1.242034326862186, "learning_rate": 1.34305288247506e-07, "loss": 0.5068, "step": 12237 }, { "epoch": 0.95, "grad_norm": 1.2131952219650455, "learning_rate": 1.3389517443290535e-07, "loss": 0.5173, "step": 12238 }, { "epoch": 0.95, "grad_norm": 1.2971394280789845, "learning_rate": 1.3348568351640446e-07, "loss": 0.5145, "step": 12239 }, { "epoch": 0.95, "grad_norm": 1.3390528176668712, "learning_rate": 1.3307681552385598e-07, "loss": 0.5027, "step": 12240 }, { "epoch": 0.95, "grad_norm": 1.1539731160640494, "learning_rate": 1.3266857048107706e-07, "loss": 0.4661, "step": 12241 }, { "epoch": 0.95, "grad_norm": 1.0989461901146003, "learning_rate": 1.322609484138382e-07, "loss": 0.4582, "step": 12242 }, { "epoch": 0.95, "grad_norm": 1.2298251451866886, "learning_rate": 1.3185394934787766e-07, "loss": 0.5426, "step": 12243 }, { "epoch": 0.95, "grad_norm": 1.3323725377826037, "learning_rate": 1.3144757330888934e-07, "loss": 0.5382, "step": 12244 }, { "epoch": 0.95, "grad_norm": 1.2823948746068934, "learning_rate": 1.3104182032253164e-07, "loss": 0.5275, "step": 12245 }, { "epoch": 0.95, "grad_norm": 1.179933888555367, "learning_rate": 1.3063669041442074e-07, "loss": 0.4926, "step": 12246 }, { "epoch": 0.95, "grad_norm": 1.3084780969328043, "learning_rate": 1.302321836101339e-07, "loss": 0.5481, "step": 12247 }, { "epoch": 0.95, "grad_norm": 1.2178695139118783, "learning_rate": 1.2982829993521185e-07, "loss": 0.5642, "step": 12248 }, { "epoch": 0.95, "grad_norm": 1.1828781331446212, "learning_rate": 1.2942503941515082e-07, "loss": 0.4654, "step": 12249 }, { "epoch": 0.95, "grad_norm": 1.2747656326307972, "learning_rate": 1.2902240207541384e-07, "loss": 0.5077, "step": 12250 }, { "epoch": 0.95, "grad_norm": 1.194592227714184, "learning_rate": 1.286203879414205e-07, "loss": 0.4848, "step": 12251 }, { "epoch": 0.95, "grad_norm": 1.241201285167893, "learning_rate": 1.2821899703855057e-07, "loss": 0.5149, "step": 12252 }, { "epoch": 0.95, "grad_norm": 1.2949184227806783, "learning_rate": 1.2781822939214817e-07, "loss": 0.5862, "step": 12253 }, { "epoch": 0.95, "grad_norm": 1.0716363001585496, "learning_rate": 1.2741808502751417e-07, "loss": 0.4715, "step": 12254 }, { "epoch": 0.95, "grad_norm": 1.2851832888463306, "learning_rate": 1.2701856396991285e-07, "loss": 0.4644, "step": 12255 }, { "epoch": 0.95, "grad_norm": 1.3106029060768147, "learning_rate": 1.2661966624456733e-07, "loss": 0.4978, "step": 12256 }, { "epoch": 0.95, "grad_norm": 1.277710188354227, "learning_rate": 1.2622139187666083e-07, "loss": 0.5025, "step": 12257 }, { "epoch": 0.95, "grad_norm": 1.1920888477470342, "learning_rate": 1.2582374089134096e-07, "loss": 0.4723, "step": 12258 }, { "epoch": 0.95, "grad_norm": 1.1803714209674714, "learning_rate": 1.2542671331371214e-07, "loss": 0.4962, "step": 12259 }, { "epoch": 0.95, "grad_norm": 1.1894195000667245, "learning_rate": 1.2503030916884097e-07, "loss": 0.4883, "step": 12260 }, { "epoch": 0.95, "grad_norm": 1.3001962898689665, "learning_rate": 1.2463452848175516e-07, "loss": 0.5612, "step": 12261 }, { "epoch": 0.95, "grad_norm": 1.3101333443283396, "learning_rate": 1.2423937127744146e-07, "loss": 0.5686, "step": 12262 }, { "epoch": 0.95, "grad_norm": 1.1750593028422982, "learning_rate": 1.2384483758084765e-07, "loss": 0.4837, "step": 12263 }, { "epoch": 0.95, "grad_norm": 1.2637020148761489, "learning_rate": 1.234509274168838e-07, "loss": 0.479, "step": 12264 }, { "epoch": 0.95, "grad_norm": 1.1423764303482071, "learning_rate": 1.2305764081042003e-07, "loss": 0.4461, "step": 12265 }, { "epoch": 0.95, "grad_norm": 1.2452852502232261, "learning_rate": 1.226649777862854e-07, "loss": 0.5491, "step": 12266 }, { "epoch": 0.95, "grad_norm": 1.205153452097077, "learning_rate": 1.2227293836927112e-07, "loss": 0.5095, "step": 12267 }, { "epoch": 0.95, "grad_norm": 1.1616423365609658, "learning_rate": 1.2188152258412855e-07, "loss": 0.4445, "step": 12268 }, { "epoch": 0.95, "grad_norm": 1.328134424512747, "learning_rate": 1.2149073045557014e-07, "loss": 0.5564, "step": 12269 }, { "epoch": 0.95, "grad_norm": 1.2008558400304161, "learning_rate": 1.2110056200826725e-07, "loss": 0.4861, "step": 12270 }, { "epoch": 0.95, "grad_norm": 1.1278239411164566, "learning_rate": 1.2071101726685464e-07, "loss": 0.4918, "step": 12271 }, { "epoch": 0.95, "grad_norm": 1.3167760312658008, "learning_rate": 1.2032209625592705e-07, "loss": 0.5583, "step": 12272 }, { "epoch": 0.95, "grad_norm": 1.2127233810727611, "learning_rate": 1.199337990000371e-07, "loss": 0.5628, "step": 12273 }, { "epoch": 0.95, "grad_norm": 1.3518681704224773, "learning_rate": 1.195461255237007e-07, "loss": 0.5273, "step": 12274 }, { "epoch": 0.95, "grad_norm": 1.2611280540741643, "learning_rate": 1.1915907585139385e-07, "loss": 0.4632, "step": 12275 }, { "epoch": 0.95, "grad_norm": 1.293948168481518, "learning_rate": 1.1877265000755367e-07, "loss": 0.5143, "step": 12276 }, { "epoch": 0.95, "grad_norm": 1.1330178962499473, "learning_rate": 1.1838684801657619e-07, "loss": 0.436, "step": 12277 }, { "epoch": 0.95, "grad_norm": 1.3154319530940493, "learning_rate": 1.1800166990281858e-07, "loss": 0.5554, "step": 12278 }, { "epoch": 0.95, "grad_norm": 1.2347074616251603, "learning_rate": 1.1761711569060141e-07, "loss": 0.4567, "step": 12279 }, { "epoch": 0.95, "grad_norm": 1.1541361688837604, "learning_rate": 1.1723318540420081e-07, "loss": 0.4389, "step": 12280 }, { "epoch": 0.95, "grad_norm": 1.2898600105355889, "learning_rate": 1.1684987906785739e-07, "loss": 0.496, "step": 12281 }, { "epoch": 0.95, "grad_norm": 1.2520016379635124, "learning_rate": 1.164671967057729e-07, "loss": 0.5243, "step": 12282 }, { "epoch": 0.95, "grad_norm": 1.24934470166198, "learning_rate": 1.1608513834210578e-07, "loss": 0.4977, "step": 12283 }, { "epoch": 0.95, "grad_norm": 1.2150429320108975, "learning_rate": 1.1570370400097786e-07, "loss": 0.4987, "step": 12284 }, { "epoch": 0.95, "grad_norm": 1.1670298805693553, "learning_rate": 1.1532289370647209e-07, "loss": 0.4671, "step": 12285 }, { "epoch": 0.95, "grad_norm": 1.2199556793698574, "learning_rate": 1.1494270748263037e-07, "loss": 0.4974, "step": 12286 }, { "epoch": 0.95, "grad_norm": 1.2136574009184797, "learning_rate": 1.1456314535345569e-07, "loss": 0.4839, "step": 12287 }, { "epoch": 0.95, "grad_norm": 1.2427437454554302, "learning_rate": 1.1418420734291113e-07, "loss": 0.5363, "step": 12288 }, { "epoch": 0.95, "grad_norm": 1.1346942399858833, "learning_rate": 1.1380589347492199e-07, "loss": 0.4042, "step": 12289 }, { "epoch": 0.95, "grad_norm": 1.289938704047202, "learning_rate": 1.134282037733725e-07, "loss": 0.5281, "step": 12290 }, { "epoch": 0.95, "grad_norm": 1.1204729122307449, "learning_rate": 1.1305113826210911e-07, "loss": 0.4439, "step": 12291 }, { "epoch": 0.95, "grad_norm": 1.1737345374148167, "learning_rate": 1.1267469696493726e-07, "loss": 0.4181, "step": 12292 }, { "epoch": 0.95, "grad_norm": 1.2308029931852427, "learning_rate": 1.1229887990562349e-07, "loss": 0.4944, "step": 12293 }, { "epoch": 0.95, "grad_norm": 1.1460344773810023, "learning_rate": 1.1192368710789547e-07, "loss": 0.476, "step": 12294 }, { "epoch": 0.95, "grad_norm": 1.1794433341068746, "learning_rate": 1.1154911859543982e-07, "loss": 0.4919, "step": 12295 }, { "epoch": 0.95, "grad_norm": 1.121196888289827, "learning_rate": 1.111751743919065e-07, "loss": 0.4683, "step": 12296 }, { "epoch": 0.95, "grad_norm": 1.1420167766681575, "learning_rate": 1.1080185452090553e-07, "loss": 0.4603, "step": 12297 }, { "epoch": 0.95, "grad_norm": 1.210361959213607, "learning_rate": 1.104291590060036e-07, "loss": 0.4957, "step": 12298 }, { "epoch": 0.95, "grad_norm": 1.239055163059905, "learning_rate": 1.1005708787073189e-07, "loss": 0.53, "step": 12299 }, { "epoch": 0.95, "grad_norm": 1.1607990474982268, "learning_rate": 1.096856411385827e-07, "loss": 0.4737, "step": 12300 }, { "epoch": 0.95, "grad_norm": 1.2387028887862161, "learning_rate": 1.093148188330051e-07, "loss": 0.5061, "step": 12301 }, { "epoch": 0.95, "grad_norm": 1.3665108886575255, "learning_rate": 1.0894462097741366e-07, "loss": 0.5371, "step": 12302 }, { "epoch": 0.95, "grad_norm": 1.2483374984087723, "learning_rate": 1.0857504759517856e-07, "loss": 0.4795, "step": 12303 }, { "epoch": 0.95, "grad_norm": 1.141345384503703, "learning_rate": 1.082060987096345e-07, "loss": 0.466, "step": 12304 }, { "epoch": 0.95, "grad_norm": 1.2948525066408771, "learning_rate": 1.0783777434407394e-07, "loss": 0.5574, "step": 12305 }, { "epoch": 0.95, "grad_norm": 1.2755014742338422, "learning_rate": 1.0747007452175051e-07, "loss": 0.4813, "step": 12306 }, { "epoch": 0.95, "grad_norm": 1.36835800201321, "learning_rate": 1.071029992658823e-07, "loss": 0.5468, "step": 12307 }, { "epoch": 0.95, "grad_norm": 1.1439187555941306, "learning_rate": 1.0673654859964078e-07, "loss": 0.5287, "step": 12308 }, { "epoch": 0.95, "grad_norm": 1.1009962642166276, "learning_rate": 1.0637072254616298e-07, "loss": 0.4413, "step": 12309 }, { "epoch": 0.95, "grad_norm": 1.2153051058406228, "learning_rate": 1.0600552112854712e-07, "loss": 0.513, "step": 12310 }, { "epoch": 0.96, "grad_norm": 1.4056457492896397, "learning_rate": 1.0564094436984806e-07, "loss": 0.5283, "step": 12311 }, { "epoch": 0.96, "grad_norm": 1.207458025595689, "learning_rate": 1.0527699229308519e-07, "loss": 0.5225, "step": 12312 }, { "epoch": 0.96, "grad_norm": 1.2940130252035187, "learning_rate": 1.0491366492123567e-07, "loss": 0.5125, "step": 12313 }, { "epoch": 0.96, "grad_norm": 1.2343347277288745, "learning_rate": 1.0455096227723782e-07, "loss": 0.5343, "step": 12314 }, { "epoch": 0.96, "grad_norm": 1.2877700337307048, "learning_rate": 1.0418888438399222e-07, "loss": 0.5395, "step": 12315 }, { "epoch": 0.96, "grad_norm": 1.2487096802519029, "learning_rate": 1.0382743126435723e-07, "loss": 0.449, "step": 12316 }, { "epoch": 0.96, "grad_norm": 1.1995727811726649, "learning_rate": 1.0346660294115462e-07, "loss": 0.5099, "step": 12317 }, { "epoch": 0.96, "grad_norm": 1.214555859568537, "learning_rate": 1.0310639943716505e-07, "loss": 0.4675, "step": 12318 }, { "epoch": 0.96, "grad_norm": 1.2260081988584794, "learning_rate": 1.027468207751292e-07, "loss": 0.4819, "step": 12319 }, { "epoch": 0.96, "grad_norm": 1.1420835288510485, "learning_rate": 1.0238786697775006e-07, "loss": 0.4671, "step": 12320 }, { "epoch": 0.96, "grad_norm": 1.2172092088127224, "learning_rate": 1.0202953806768945e-07, "loss": 0.4873, "step": 12321 }, { "epoch": 0.96, "grad_norm": 1.3107450196155004, "learning_rate": 1.0167183406757042e-07, "loss": 0.518, "step": 12322 }, { "epoch": 0.96, "grad_norm": 1.177888167479233, "learning_rate": 1.0131475499997823e-07, "loss": 0.4877, "step": 12323 }, { "epoch": 0.96, "grad_norm": 1.2067505346486307, "learning_rate": 1.0095830088745595e-07, "loss": 0.5093, "step": 12324 }, { "epoch": 0.96, "grad_norm": 1.1271417152575818, "learning_rate": 1.0060247175250892e-07, "loss": 0.5121, "step": 12325 }, { "epoch": 0.96, "grad_norm": 1.2373623973753018, "learning_rate": 1.0024726761760028e-07, "loss": 0.4876, "step": 12326 }, { "epoch": 0.96, "grad_norm": 1.2337505837606269, "learning_rate": 9.989268850515876e-08, "loss": 0.4988, "step": 12327 }, { "epoch": 0.96, "grad_norm": 1.1731605599185688, "learning_rate": 9.953873443756979e-08, "loss": 0.4922, "step": 12328 }, { "epoch": 0.96, "grad_norm": 1.2897906472118852, "learning_rate": 9.918540543717992e-08, "loss": 0.547, "step": 12329 }, { "epoch": 0.96, "grad_norm": 1.202552907101029, "learning_rate": 9.883270152629686e-08, "loss": 0.5223, "step": 12330 }, { "epoch": 0.96, "grad_norm": 1.218616135899447, "learning_rate": 9.848062272718839e-08, "loss": 0.5378, "step": 12331 }, { "epoch": 0.96, "grad_norm": 1.198193089754735, "learning_rate": 9.812916906208337e-08, "loss": 0.4807, "step": 12332 }, { "epoch": 0.96, "grad_norm": 1.2650485197570398, "learning_rate": 9.777834055317181e-08, "loss": 0.5138, "step": 12333 }, { "epoch": 0.96, "grad_norm": 1.3114702635755955, "learning_rate": 9.742813722260158e-08, "loss": 0.5745, "step": 12334 }, { "epoch": 0.96, "grad_norm": 1.3081470695051902, "learning_rate": 9.707855909248387e-08, "loss": 0.5163, "step": 12335 }, { "epoch": 0.96, "grad_norm": 1.217707750740706, "learning_rate": 9.67296061848888e-08, "loss": 0.5089, "step": 12336 }, { "epoch": 0.96, "grad_norm": 1.1921186474757552, "learning_rate": 9.638127852184764e-08, "loss": 0.483, "step": 12337 }, { "epoch": 0.96, "grad_norm": 1.3254214426286584, "learning_rate": 9.60335761253528e-08, "loss": 0.5436, "step": 12338 }, { "epoch": 0.96, "grad_norm": 1.1557377376413358, "learning_rate": 9.568649901735672e-08, "loss": 0.491, "step": 12339 }, { "epoch": 0.96, "grad_norm": 1.4133291292277972, "learning_rate": 9.534004721976964e-08, "loss": 0.5685, "step": 12340 }, { "epoch": 0.96, "grad_norm": 1.3218123815688771, "learning_rate": 9.49942207544674e-08, "loss": 0.5135, "step": 12341 }, { "epoch": 0.96, "grad_norm": 1.0873486797244585, "learning_rate": 9.464901964328365e-08, "loss": 0.4766, "step": 12342 }, { "epoch": 0.96, "grad_norm": 1.391677029888986, "learning_rate": 9.430444390801208e-08, "loss": 0.5173, "step": 12343 }, { "epoch": 0.96, "grad_norm": 1.2115421385354124, "learning_rate": 9.39604935704086e-08, "loss": 0.4663, "step": 12344 }, { "epoch": 0.96, "grad_norm": 1.1313327463984348, "learning_rate": 9.361716865218584e-08, "loss": 0.4564, "step": 12345 }, { "epoch": 0.96, "grad_norm": 1.2953986071414358, "learning_rate": 9.327446917502203e-08, "loss": 0.5349, "step": 12346 }, { "epoch": 0.96, "grad_norm": 1.2836237991539876, "learning_rate": 9.29323951605532e-08, "loss": 0.4866, "step": 12347 }, { "epoch": 0.96, "grad_norm": 1.236916786093268, "learning_rate": 9.259094663037649e-08, "loss": 0.4894, "step": 12348 }, { "epoch": 0.96, "grad_norm": 1.1705139903454993, "learning_rate": 9.225012360604802e-08, "loss": 0.4518, "step": 12349 }, { "epoch": 0.96, "grad_norm": 1.1464493338381698, "learning_rate": 9.190992610908611e-08, "loss": 0.4778, "step": 12350 }, { "epoch": 0.96, "grad_norm": 1.187191370962509, "learning_rate": 9.157035416097027e-08, "loss": 0.4557, "step": 12351 }, { "epoch": 0.96, "grad_norm": 1.291838183346705, "learning_rate": 9.123140778313777e-08, "loss": 0.5257, "step": 12352 }, { "epoch": 0.96, "grad_norm": 1.244238354064877, "learning_rate": 9.08930869969904e-08, "loss": 0.4983, "step": 12353 }, { "epoch": 0.96, "grad_norm": 1.2552400904182437, "learning_rate": 9.055539182388662e-08, "loss": 0.5225, "step": 12354 }, { "epoch": 0.96, "grad_norm": 1.2814056953175004, "learning_rate": 9.021832228514715e-08, "loss": 0.483, "step": 12355 }, { "epoch": 0.96, "grad_norm": 1.2992610812100918, "learning_rate": 8.988187840205164e-08, "loss": 0.5104, "step": 12356 }, { "epoch": 0.96, "grad_norm": 1.1911457715064353, "learning_rate": 8.954606019584312e-08, "loss": 0.5231, "step": 12357 }, { "epoch": 0.96, "grad_norm": 1.1331069563593128, "learning_rate": 8.921086768772346e-08, "loss": 0.4375, "step": 12358 }, { "epoch": 0.96, "grad_norm": 1.2591847107959995, "learning_rate": 8.887630089885357e-08, "loss": 0.5229, "step": 12359 }, { "epoch": 0.96, "grad_norm": 1.2132962787824835, "learning_rate": 8.854235985035875e-08, "loss": 0.4656, "step": 12360 }, { "epoch": 0.96, "grad_norm": 1.2163328017525223, "learning_rate": 8.820904456331992e-08, "loss": 0.5026, "step": 12361 }, { "epoch": 0.96, "grad_norm": 1.2499289492441437, "learning_rate": 8.787635505878245e-08, "loss": 0.536, "step": 12362 }, { "epoch": 0.96, "grad_norm": 1.1698546223105282, "learning_rate": 8.754429135775178e-08, "loss": 0.5059, "step": 12363 }, { "epoch": 0.96, "grad_norm": 1.131999862162039, "learning_rate": 8.721285348119113e-08, "loss": 0.4584, "step": 12364 }, { "epoch": 0.96, "grad_norm": 1.3023521857855165, "learning_rate": 8.688204145002598e-08, "loss": 0.5216, "step": 12365 }, { "epoch": 0.96, "grad_norm": 1.2223730277215143, "learning_rate": 8.655185528514187e-08, "loss": 0.5182, "step": 12366 }, { "epoch": 0.96, "grad_norm": 1.218679181511168, "learning_rate": 8.622229500738655e-08, "loss": 0.4857, "step": 12367 }, { "epoch": 0.96, "grad_norm": 1.2070293241318828, "learning_rate": 8.58933606375667e-08, "loss": 0.4279, "step": 12368 }, { "epoch": 0.96, "grad_norm": 1.3088704286418436, "learning_rate": 8.556505219644795e-08, "loss": 0.5166, "step": 12369 }, { "epoch": 0.96, "grad_norm": 1.3150833682187608, "learning_rate": 8.523736970475927e-08, "loss": 0.4945, "step": 12370 }, { "epoch": 0.96, "grad_norm": 1.0519207869756797, "learning_rate": 8.491031318318854e-08, "loss": 0.4668, "step": 12371 }, { "epoch": 0.96, "grad_norm": 1.089009775995349, "learning_rate": 8.458388265238593e-08, "loss": 0.4625, "step": 12372 }, { "epoch": 0.96, "grad_norm": 1.2290739843480374, "learning_rate": 8.425807813295939e-08, "loss": 0.5232, "step": 12373 }, { "epoch": 0.96, "grad_norm": 1.071744086720638, "learning_rate": 8.393289964547912e-08, "loss": 0.4929, "step": 12374 }, { "epoch": 0.96, "grad_norm": 1.216458685399548, "learning_rate": 8.360834721047429e-08, "loss": 0.4963, "step": 12375 }, { "epoch": 0.96, "grad_norm": 1.1171888338094365, "learning_rate": 8.328442084843624e-08, "loss": 0.467, "step": 12376 }, { "epoch": 0.96, "grad_norm": 1.19275806217831, "learning_rate": 8.296112057981643e-08, "loss": 0.5244, "step": 12377 }, { "epoch": 0.96, "grad_norm": 1.1741212805663994, "learning_rate": 8.263844642502628e-08, "loss": 0.463, "step": 12378 }, { "epoch": 0.96, "grad_norm": 1.2538473525430873, "learning_rate": 8.23163984044395e-08, "loss": 0.4948, "step": 12379 }, { "epoch": 0.96, "grad_norm": 1.2948280634205498, "learning_rate": 8.19949765383854e-08, "loss": 0.5345, "step": 12380 }, { "epoch": 0.96, "grad_norm": 1.2498698166767315, "learning_rate": 8.167418084715772e-08, "loss": 0.5011, "step": 12381 }, { "epoch": 0.96, "grad_norm": 1.252073189963555, "learning_rate": 8.135401135101251e-08, "loss": 0.4759, "step": 12382 }, { "epoch": 0.96, "grad_norm": 1.2972628002141737, "learning_rate": 8.103446807016135e-08, "loss": 0.5339, "step": 12383 }, { "epoch": 0.96, "grad_norm": 1.1711803666364815, "learning_rate": 8.071555102478035e-08, "loss": 0.4632, "step": 12384 }, { "epoch": 0.96, "grad_norm": 1.1988101225763215, "learning_rate": 8.039726023500227e-08, "loss": 0.5281, "step": 12385 }, { "epoch": 0.96, "grad_norm": 1.18085457374639, "learning_rate": 8.007959572092328e-08, "loss": 0.5131, "step": 12386 }, { "epoch": 0.96, "grad_norm": 1.1791846612067254, "learning_rate": 7.976255750260065e-08, "loss": 0.4708, "step": 12387 }, { "epoch": 0.96, "grad_norm": 1.23472831594887, "learning_rate": 7.944614560004838e-08, "loss": 0.5509, "step": 12388 }, { "epoch": 0.96, "grad_norm": 1.1848678529670518, "learning_rate": 7.913036003324492e-08, "loss": 0.4916, "step": 12389 }, { "epoch": 0.96, "grad_norm": 1.253816357774344, "learning_rate": 7.881520082212657e-08, "loss": 0.5403, "step": 12390 }, { "epoch": 0.96, "grad_norm": 1.2233893783284107, "learning_rate": 7.850066798659072e-08, "loss": 0.5231, "step": 12391 }, { "epoch": 0.96, "grad_norm": 1.231055516408078, "learning_rate": 7.818676154649707e-08, "loss": 0.504, "step": 12392 }, { "epoch": 0.96, "grad_norm": 1.3453729277008863, "learning_rate": 7.787348152166197e-08, "loss": 0.5042, "step": 12393 }, { "epoch": 0.96, "grad_norm": 1.194942591656876, "learning_rate": 7.756082793186626e-08, "loss": 0.4935, "step": 12394 }, { "epoch": 0.96, "grad_norm": 1.3137209526647342, "learning_rate": 7.724880079684748e-08, "loss": 0.5091, "step": 12395 }, { "epoch": 0.96, "grad_norm": 1.159682514300226, "learning_rate": 7.693740013630768e-08, "loss": 0.5093, "step": 12396 }, { "epoch": 0.96, "grad_norm": 1.1842477332510826, "learning_rate": 7.662662596990555e-08, "loss": 0.4457, "step": 12397 }, { "epoch": 0.96, "grad_norm": 1.2793293024277577, "learning_rate": 7.631647831726207e-08, "loss": 0.5212, "step": 12398 }, { "epoch": 0.96, "grad_norm": 1.0988704170146235, "learning_rate": 7.600695719795936e-08, "loss": 0.4316, "step": 12399 }, { "epoch": 0.96, "grad_norm": 1.1589832476901958, "learning_rate": 7.569806263153734e-08, "loss": 0.4934, "step": 12400 }, { "epoch": 0.96, "grad_norm": 1.1761044283063211, "learning_rate": 7.53897946375004e-08, "loss": 0.5132, "step": 12401 }, { "epoch": 0.96, "grad_norm": 1.2647926984713764, "learning_rate": 7.508215323531076e-08, "loss": 0.4584, "step": 12402 }, { "epoch": 0.96, "grad_norm": 1.2613883043418646, "learning_rate": 7.477513844438955e-08, "loss": 0.5084, "step": 12403 }, { "epoch": 0.96, "grad_norm": 1.09665561014378, "learning_rate": 7.446875028412126e-08, "loss": 0.453, "step": 12404 }, { "epoch": 0.96, "grad_norm": 1.114006047470416, "learning_rate": 7.416298877384931e-08, "loss": 0.4832, "step": 12405 }, { "epoch": 0.96, "grad_norm": 1.170540673049112, "learning_rate": 7.385785393287936e-08, "loss": 0.4795, "step": 12406 }, { "epoch": 0.96, "grad_norm": 1.2339810092707444, "learning_rate": 7.35533457804749e-08, "loss": 0.5068, "step": 12407 }, { "epoch": 0.96, "grad_norm": 1.1914994500399467, "learning_rate": 7.324946433586055e-08, "loss": 0.4664, "step": 12408 }, { "epoch": 0.96, "grad_norm": 1.2010054806465922, "learning_rate": 7.29462096182243e-08, "loss": 0.5039, "step": 12409 }, { "epoch": 0.96, "grad_norm": 1.2048789281685088, "learning_rate": 7.264358164671082e-08, "loss": 0.5071, "step": 12410 }, { "epoch": 0.96, "grad_norm": 1.2081824679848838, "learning_rate": 7.234158044042482e-08, "loss": 0.4874, "step": 12411 }, { "epoch": 0.96, "grad_norm": 1.1325617019316214, "learning_rate": 7.204020601843665e-08, "loss": 0.4623, "step": 12412 }, { "epoch": 0.96, "grad_norm": 1.2255159517648435, "learning_rate": 7.173945839977103e-08, "loss": 0.4807, "step": 12413 }, { "epoch": 0.96, "grad_norm": 1.2583110129458865, "learning_rate": 7.143933760341615e-08, "loss": 0.5206, "step": 12414 }, { "epoch": 0.96, "grad_norm": 1.3173090162597205, "learning_rate": 7.113984364832127e-08, "loss": 0.5683, "step": 12415 }, { "epoch": 0.96, "grad_norm": 1.2134573529250456, "learning_rate": 7.08409765533935e-08, "loss": 0.4864, "step": 12416 }, { "epoch": 0.96, "grad_norm": 1.2282197289660592, "learning_rate": 7.054273633750219e-08, "loss": 0.5317, "step": 12417 }, { "epoch": 0.96, "grad_norm": 1.2343168124234116, "learning_rate": 7.024512301947783e-08, "loss": 0.5057, "step": 12418 }, { "epoch": 0.96, "grad_norm": 1.2378050071838158, "learning_rate": 6.994813661810984e-08, "loss": 0.4947, "step": 12419 }, { "epoch": 0.96, "grad_norm": 1.1777560863106833, "learning_rate": 6.965177715214878e-08, "loss": 0.4611, "step": 12420 }, { "epoch": 0.96, "grad_norm": 1.2014387625091465, "learning_rate": 6.93560446403041e-08, "loss": 0.4762, "step": 12421 }, { "epoch": 0.96, "grad_norm": 1.1815626386648932, "learning_rate": 6.906093910124756e-08, "loss": 0.4744, "step": 12422 }, { "epoch": 0.96, "grad_norm": 1.2697310539885565, "learning_rate": 6.876646055361091e-08, "loss": 0.5249, "step": 12423 }, { "epoch": 0.96, "grad_norm": 1.3944536289015632, "learning_rate": 6.847260901598595e-08, "loss": 0.634, "step": 12424 }, { "epoch": 0.96, "grad_norm": 1.2221366094939305, "learning_rate": 6.817938450692674e-08, "loss": 0.5356, "step": 12425 }, { "epoch": 0.96, "grad_norm": 1.241579302758814, "learning_rate": 6.788678704494289e-08, "loss": 0.4945, "step": 12426 }, { "epoch": 0.96, "grad_norm": 1.286607520015567, "learning_rate": 6.759481664850853e-08, "loss": 0.5496, "step": 12427 }, { "epoch": 0.96, "grad_norm": 1.2600355228835922, "learning_rate": 6.73034733360589e-08, "loss": 0.4979, "step": 12428 }, { "epoch": 0.96, "grad_norm": 1.2865756929845544, "learning_rate": 6.70127571259871e-08, "loss": 0.5709, "step": 12429 }, { "epoch": 0.96, "grad_norm": 1.259320273523649, "learning_rate": 6.672266803664729e-08, "loss": 0.4574, "step": 12430 }, { "epoch": 0.96, "grad_norm": 1.1934977699358034, "learning_rate": 6.643320608635373e-08, "loss": 0.5093, "step": 12431 }, { "epoch": 0.96, "grad_norm": 1.2332053135250418, "learning_rate": 6.614437129338402e-08, "loss": 0.526, "step": 12432 }, { "epoch": 0.96, "grad_norm": 1.2581479590756683, "learning_rate": 6.585616367597025e-08, "loss": 0.5149, "step": 12433 }, { "epoch": 0.96, "grad_norm": 1.1675335127372417, "learning_rate": 6.556858325231119e-08, "loss": 0.4959, "step": 12434 }, { "epoch": 0.96, "grad_norm": 1.2431266161724495, "learning_rate": 6.528163004056231e-08, "loss": 0.5139, "step": 12435 }, { "epoch": 0.96, "grad_norm": 1.265639410996514, "learning_rate": 6.499530405884025e-08, "loss": 0.4859, "step": 12436 }, { "epoch": 0.96, "grad_norm": 1.376424788169291, "learning_rate": 6.470960532522275e-08, "loss": 0.474, "step": 12437 }, { "epoch": 0.96, "grad_norm": 1.2535500659294578, "learning_rate": 6.44245338577465e-08, "loss": 0.5081, "step": 12438 }, { "epoch": 0.96, "grad_norm": 1.3082010619408375, "learning_rate": 6.414008967441155e-08, "loss": 0.525, "step": 12439 }, { "epoch": 0.97, "grad_norm": 1.204484097617681, "learning_rate": 6.385627279317463e-08, "loss": 0.4792, "step": 12440 }, { "epoch": 0.97, "grad_norm": 1.107075685889368, "learning_rate": 6.357308323195476e-08, "loss": 0.4474, "step": 12441 }, { "epoch": 0.97, "grad_norm": 1.349187716075802, "learning_rate": 6.329052100863209e-08, "loss": 0.5518, "step": 12442 }, { "epoch": 0.97, "grad_norm": 1.198955047143343, "learning_rate": 6.300858614104455e-08, "loss": 0.466, "step": 12443 }, { "epoch": 0.97, "grad_norm": 1.3348395760546958, "learning_rate": 6.272727864699234e-08, "loss": 0.5237, "step": 12444 }, { "epoch": 0.97, "grad_norm": 1.244967103776308, "learning_rate": 6.244659854423795e-08, "loss": 0.4893, "step": 12445 }, { "epoch": 0.97, "grad_norm": 1.1692606437709234, "learning_rate": 6.216654585050052e-08, "loss": 0.4229, "step": 12446 }, { "epoch": 0.97, "grad_norm": 1.1444910973285545, "learning_rate": 6.188712058346147e-08, "loss": 0.4876, "step": 12447 }, { "epoch": 0.97, "grad_norm": 1.179135780918476, "learning_rate": 6.160832276076223e-08, "loss": 0.5015, "step": 12448 }, { "epoch": 0.97, "grad_norm": 1.1920127950628068, "learning_rate": 6.13301524000054e-08, "loss": 0.4765, "step": 12449 }, { "epoch": 0.97, "grad_norm": 1.3099366542555526, "learning_rate": 6.105260951875247e-08, "loss": 0.5431, "step": 12450 }, { "epoch": 0.97, "grad_norm": 1.2526664903433207, "learning_rate": 6.077569413452722e-08, "loss": 0.4864, "step": 12451 }, { "epoch": 0.97, "grad_norm": 1.126515744914939, "learning_rate": 6.049940626481121e-08, "loss": 0.4542, "step": 12452 }, { "epoch": 0.97, "grad_norm": 1.1824531724944594, "learning_rate": 6.022374592704938e-08, "loss": 0.5002, "step": 12453 }, { "epoch": 0.97, "grad_norm": 1.2310202195360875, "learning_rate": 5.994871313864448e-08, "loss": 0.547, "step": 12454 }, { "epoch": 0.97, "grad_norm": 1.2322254054532231, "learning_rate": 5.96743079169615e-08, "loss": 0.5195, "step": 12455 }, { "epoch": 0.97, "grad_norm": 1.3136923233423534, "learning_rate": 5.9400530279324354e-08, "loss": 0.4679, "step": 12456 }, { "epoch": 0.97, "grad_norm": 1.2298715262268494, "learning_rate": 5.9127380243019225e-08, "loss": 0.5101, "step": 12457 }, { "epoch": 0.97, "grad_norm": 1.1989055808642282, "learning_rate": 5.88548578252901e-08, "loss": 0.5015, "step": 12458 }, { "epoch": 0.97, "grad_norm": 1.2786382246341397, "learning_rate": 5.858296304334321e-08, "loss": 0.474, "step": 12459 }, { "epoch": 0.97, "grad_norm": 1.2971925921100096, "learning_rate": 5.831169591434593e-08, "loss": 0.5081, "step": 12460 }, { "epoch": 0.97, "grad_norm": 1.189433932345285, "learning_rate": 5.804105645542235e-08, "loss": 0.4768, "step": 12461 }, { "epoch": 0.97, "grad_norm": 1.2423990897143997, "learning_rate": 5.7771044683662125e-08, "loss": 0.5466, "step": 12462 }, { "epoch": 0.97, "grad_norm": 1.3103228631986958, "learning_rate": 5.750166061611051e-08, "loss": 0.5119, "step": 12463 }, { "epoch": 0.97, "grad_norm": 1.2751552954699292, "learning_rate": 5.7232904269775014e-08, "loss": 0.5203, "step": 12464 }, { "epoch": 0.97, "grad_norm": 1.1144996801814857, "learning_rate": 5.696477566162428e-08, "loss": 0.481, "step": 12465 }, { "epoch": 0.97, "grad_norm": 1.2273057762987907, "learning_rate": 5.6697274808587e-08, "loss": 0.5185, "step": 12466 }, { "epoch": 0.97, "grad_norm": 1.158732764353542, "learning_rate": 5.6430401727550766e-08, "loss": 0.4804, "step": 12467 }, { "epoch": 0.97, "grad_norm": 1.2442779704746576, "learning_rate": 5.6164156435365435e-08, "loss": 0.5379, "step": 12468 }, { "epoch": 0.97, "grad_norm": 1.2501807082207745, "learning_rate": 5.5898538948840896e-08, "loss": 0.4778, "step": 12469 }, { "epoch": 0.97, "grad_norm": 1.1344399698951528, "learning_rate": 5.563354928474596e-08, "loss": 0.4688, "step": 12470 }, { "epoch": 0.97, "grad_norm": 1.278626570650382, "learning_rate": 5.536918745981168e-08, "loss": 0.5, "step": 12471 }, { "epoch": 0.97, "grad_norm": 1.2756372654356487, "learning_rate": 5.5105453490728047e-08, "loss": 0.5128, "step": 12472 }, { "epoch": 0.97, "grad_norm": 1.2517831481664743, "learning_rate": 5.484234739414618e-08, "loss": 0.5001, "step": 12473 }, { "epoch": 0.97, "grad_norm": 1.1978757215495914, "learning_rate": 5.4579869186676126e-08, "loss": 0.4772, "step": 12474 }, { "epoch": 0.97, "grad_norm": 1.1302540267403467, "learning_rate": 5.431801888489241e-08, "loss": 0.515, "step": 12475 }, { "epoch": 0.97, "grad_norm": 1.2754454432999216, "learning_rate": 5.405679650532403e-08, "loss": 0.5182, "step": 12476 }, { "epoch": 0.97, "grad_norm": 1.132319218014642, "learning_rate": 5.3796202064464454e-08, "loss": 0.4324, "step": 12477 }, { "epoch": 0.97, "grad_norm": 1.265646004208183, "learning_rate": 5.353623557876608e-08, "loss": 0.4767, "step": 12478 }, { "epoch": 0.97, "grad_norm": 1.2585955723394726, "learning_rate": 5.3276897064641344e-08, "loss": 0.5004, "step": 12479 }, { "epoch": 0.97, "grad_norm": 1.165475384721079, "learning_rate": 5.301818653846602e-08, "loss": 0.4736, "step": 12480 }, { "epoch": 0.97, "grad_norm": 1.2184190545321012, "learning_rate": 5.27601040165715e-08, "loss": 0.4696, "step": 12481 }, { "epoch": 0.97, "grad_norm": 1.1733015913805975, "learning_rate": 5.250264951525364e-08, "loss": 0.5176, "step": 12482 }, { "epoch": 0.97, "grad_norm": 1.1832925142492279, "learning_rate": 5.224582305076498e-08, "loss": 0.5312, "step": 12483 }, { "epoch": 0.97, "grad_norm": 1.1988818661554093, "learning_rate": 5.198962463932145e-08, "loss": 0.4946, "step": 12484 }, { "epoch": 0.97, "grad_norm": 1.2209559796714005, "learning_rate": 5.173405429709677e-08, "loss": 0.5064, "step": 12485 }, { "epoch": 0.97, "grad_norm": 1.3001314206818908, "learning_rate": 5.147911204022915e-08, "loss": 0.5079, "step": 12486 }, { "epoch": 0.97, "grad_norm": 1.2037131618198813, "learning_rate": 5.1224797884812385e-08, "loss": 0.4723, "step": 12487 }, { "epoch": 0.97, "grad_norm": 1.1876966915865976, "learning_rate": 5.097111184690251e-08, "loss": 0.5197, "step": 12488 }, { "epoch": 0.97, "grad_norm": 1.1044524680634409, "learning_rate": 5.071805394251672e-08, "loss": 0.4765, "step": 12489 }, { "epoch": 0.97, "grad_norm": 1.1593487430694185, "learning_rate": 5.046562418763223e-08, "loss": 0.5001, "step": 12490 }, { "epoch": 0.97, "grad_norm": 1.3758537936215205, "learning_rate": 5.0213822598185194e-08, "loss": 0.5038, "step": 12491 }, { "epoch": 0.97, "grad_norm": 1.1214693616541775, "learning_rate": 4.996264919007399e-08, "loss": 0.4701, "step": 12492 }, { "epoch": 0.97, "grad_norm": 1.3261214458782196, "learning_rate": 4.971210397915594e-08, "loss": 0.5007, "step": 12493 }, { "epoch": 0.97, "grad_norm": 1.1851359476904144, "learning_rate": 4.9462186981249496e-08, "loss": 0.4835, "step": 12494 }, { "epoch": 0.97, "grad_norm": 1.1880002223056367, "learning_rate": 4.9212898212133154e-08, "loss": 0.4855, "step": 12495 }, { "epoch": 0.97, "grad_norm": 1.274575222538688, "learning_rate": 4.8964237687546543e-08, "loss": 0.5616, "step": 12496 }, { "epoch": 0.97, "grad_norm": 1.316144243648743, "learning_rate": 4.871620542318711e-08, "loss": 0.5992, "step": 12497 }, { "epoch": 0.97, "grad_norm": 1.2236908748943929, "learning_rate": 4.846880143471677e-08, "loss": 0.5141, "step": 12498 }, { "epoch": 0.97, "grad_norm": 1.2364336062205494, "learning_rate": 4.822202573775303e-08, "loss": 0.5309, "step": 12499 }, { "epoch": 0.97, "grad_norm": 1.2401378203526143, "learning_rate": 4.797587834787787e-08, "loss": 0.5368, "step": 12500 }, { "epoch": 0.97, "grad_norm": 1.1809495654856321, "learning_rate": 4.773035928063108e-08, "loss": 0.4937, "step": 12501 }, { "epoch": 0.97, "grad_norm": 1.1918283684044366, "learning_rate": 4.748546855151359e-08, "loss": 0.4847, "step": 12502 }, { "epoch": 0.97, "grad_norm": 1.198322420923864, "learning_rate": 4.724120617598637e-08, "loss": 0.4441, "step": 12503 }, { "epoch": 0.97, "grad_norm": 1.226661337982573, "learning_rate": 4.699757216947154e-08, "loss": 0.4981, "step": 12504 }, { "epoch": 0.97, "grad_norm": 1.2429011951744944, "learning_rate": 4.675456654735122e-08, "loss": 0.4803, "step": 12505 }, { "epoch": 0.97, "grad_norm": 1.1273467700307132, "learning_rate": 4.6512189324966484e-08, "loss": 0.4133, "step": 12506 }, { "epoch": 0.97, "grad_norm": 1.3403173560501256, "learning_rate": 4.627044051762064e-08, "loss": 0.5396, "step": 12507 }, { "epoch": 0.97, "grad_norm": 1.3218808311532189, "learning_rate": 4.602932014057704e-08, "loss": 0.5339, "step": 12508 }, { "epoch": 0.97, "grad_norm": 1.1948304045610205, "learning_rate": 4.5788828209056836e-08, "loss": 0.4882, "step": 12509 }, { "epoch": 0.97, "grad_norm": 1.3097140854121707, "learning_rate": 4.5548964738246774e-08, "loss": 0.5099, "step": 12510 }, { "epoch": 0.97, "grad_norm": 1.2479411335596724, "learning_rate": 4.530972974328696e-08, "loss": 0.5339, "step": 12511 }, { "epoch": 0.97, "grad_norm": 1.2560117163445763, "learning_rate": 4.50711232392842e-08, "loss": 0.5056, "step": 12512 }, { "epoch": 0.97, "grad_norm": 1.3385808570359352, "learning_rate": 4.4833145241302e-08, "loss": 0.5483, "step": 12513 }, { "epoch": 0.97, "grad_norm": 1.1379624736612282, "learning_rate": 4.4595795764365015e-08, "loss": 0.4174, "step": 12514 }, { "epoch": 0.97, "grad_norm": 1.173764244537531, "learning_rate": 4.4359074823459025e-08, "loss": 0.4725, "step": 12515 }, { "epoch": 0.97, "grad_norm": 1.1221474404383212, "learning_rate": 4.412298243352875e-08, "loss": 0.4476, "step": 12516 }, { "epoch": 0.97, "grad_norm": 1.2210884157578026, "learning_rate": 4.388751860948004e-08, "loss": 0.5291, "step": 12517 }, { "epoch": 0.97, "grad_norm": 1.2707082134418177, "learning_rate": 4.3652683366178784e-08, "loss": 0.4512, "step": 12518 }, { "epoch": 0.97, "grad_norm": 1.3398238200048331, "learning_rate": 4.341847671845201e-08, "loss": 0.552, "step": 12519 }, { "epoch": 0.97, "grad_norm": 1.3016117869704498, "learning_rate": 4.318489868108677e-08, "loss": 0.5273, "step": 12520 }, { "epoch": 0.97, "grad_norm": 1.2658775925239178, "learning_rate": 4.2951949268827955e-08, "loss": 0.5208, "step": 12521 }, { "epoch": 0.97, "grad_norm": 1.1328662728010623, "learning_rate": 4.2719628496384894e-08, "loss": 0.4769, "step": 12522 }, { "epoch": 0.97, "grad_norm": 1.2399536295649827, "learning_rate": 4.248793637842474e-08, "loss": 0.4867, "step": 12523 }, { "epoch": 0.97, "grad_norm": 1.2494636815606133, "learning_rate": 4.22568729295747e-08, "loss": 0.5725, "step": 12524 }, { "epoch": 0.97, "grad_norm": 1.1977545034140378, "learning_rate": 4.202643816442309e-08, "loss": 0.5215, "step": 12525 }, { "epoch": 0.97, "grad_norm": 1.2078838115135055, "learning_rate": 4.179663209751939e-08, "loss": 0.5288, "step": 12526 }, { "epoch": 0.97, "grad_norm": 1.2131186745433011, "learning_rate": 4.156745474337198e-08, "loss": 0.4746, "step": 12527 }, { "epoch": 0.97, "grad_norm": 1.2790758713713688, "learning_rate": 4.133890611644931e-08, "loss": 0.5215, "step": 12528 }, { "epoch": 0.97, "grad_norm": 1.2232141653862378, "learning_rate": 4.111098623118204e-08, "loss": 0.4987, "step": 12529 }, { "epoch": 0.97, "grad_norm": 1.2894984577372375, "learning_rate": 4.0883695101959774e-08, "loss": 0.5045, "step": 12530 }, { "epoch": 0.97, "grad_norm": 1.2462082095777405, "learning_rate": 4.0657032743131044e-08, "loss": 0.5209, "step": 12531 }, { "epoch": 0.97, "grad_norm": 1.2267800399401552, "learning_rate": 4.0430999169007726e-08, "loss": 0.5071, "step": 12532 }, { "epoch": 0.97, "grad_norm": 1.2586559050619879, "learning_rate": 4.0205594393859513e-08, "loss": 0.5277, "step": 12533 }, { "epoch": 0.97, "grad_norm": 1.1806697671782416, "learning_rate": 3.9980818431918366e-08, "loss": 0.459, "step": 12534 }, { "epoch": 0.97, "grad_norm": 1.2617609353586483, "learning_rate": 3.975667129737515e-08, "loss": 0.5088, "step": 12535 }, { "epoch": 0.97, "grad_norm": 1.0412725020744353, "learning_rate": 3.9533153004381873e-08, "loss": 0.4463, "step": 12536 }, { "epoch": 0.97, "grad_norm": 1.2060807816003063, "learning_rate": 3.9310263567049476e-08, "loss": 0.5401, "step": 12537 }, { "epoch": 0.97, "grad_norm": 1.1969549824655619, "learning_rate": 3.9088002999450034e-08, "loss": 0.5117, "step": 12538 }, { "epoch": 0.97, "grad_norm": 1.1970245965041508, "learning_rate": 3.8866371315616766e-08, "loss": 0.4764, "step": 12539 }, { "epoch": 0.97, "grad_norm": 1.2294792919982487, "learning_rate": 3.864536852954293e-08, "loss": 0.5033, "step": 12540 }, { "epoch": 0.97, "grad_norm": 1.1885830307275689, "learning_rate": 3.842499465518068e-08, "loss": 0.5274, "step": 12541 }, { "epoch": 0.97, "grad_norm": 1.1805036134450464, "learning_rate": 3.8205249706443345e-08, "loss": 0.4782, "step": 12542 }, { "epoch": 0.97, "grad_norm": 1.3933795693284015, "learning_rate": 3.798613369720427e-08, "loss": 0.5868, "step": 12543 }, { "epoch": 0.97, "grad_norm": 1.248727436797367, "learning_rate": 3.776764664129684e-08, "loss": 0.5094, "step": 12544 }, { "epoch": 0.97, "grad_norm": 1.1594935621318927, "learning_rate": 3.7549788552517786e-08, "loss": 0.4392, "step": 12545 }, { "epoch": 0.97, "grad_norm": 1.411331532355703, "learning_rate": 3.733255944461944e-08, "loss": 0.5604, "step": 12546 }, { "epoch": 0.97, "grad_norm": 1.1500280874388213, "learning_rate": 3.711595933131751e-08, "loss": 0.5048, "step": 12547 }, { "epoch": 0.97, "grad_norm": 1.225116241545206, "learning_rate": 3.68999882262866e-08, "loss": 0.4946, "step": 12548 }, { "epoch": 0.97, "grad_norm": 1.2390220183293383, "learning_rate": 3.668464614316247e-08, "loss": 0.5018, "step": 12549 }, { "epoch": 0.97, "grad_norm": 1.3830168406053522, "learning_rate": 3.646993309554092e-08, "loss": 0.508, "step": 12550 }, { "epoch": 0.97, "grad_norm": 1.15229367939418, "learning_rate": 3.6255849096976655e-08, "loss": 0.4659, "step": 12551 }, { "epoch": 0.97, "grad_norm": 1.2691241271783298, "learning_rate": 3.6042394160987756e-08, "loss": 0.4978, "step": 12552 }, { "epoch": 0.97, "grad_norm": 1.2810706966822991, "learning_rate": 3.5829568301049e-08, "loss": 0.473, "step": 12553 }, { "epoch": 0.97, "grad_norm": 1.252853046312049, "learning_rate": 3.561737153059741e-08, "loss": 0.5371, "step": 12554 }, { "epoch": 0.97, "grad_norm": 1.3147221781602605, "learning_rate": 3.5405803863032274e-08, "loss": 0.4885, "step": 12555 }, { "epoch": 0.97, "grad_norm": 1.1985113685651188, "learning_rate": 3.519486531170735e-08, "loss": 0.5107, "step": 12556 }, { "epoch": 0.97, "grad_norm": 1.2007917097203231, "learning_rate": 3.4984555889944204e-08, "loss": 0.4907, "step": 12557 }, { "epoch": 0.97, "grad_norm": 1.3001402687545658, "learning_rate": 3.477487561101778e-08, "loss": 0.505, "step": 12558 }, { "epoch": 0.97, "grad_norm": 1.2350823994997568, "learning_rate": 3.4565824488166366e-08, "loss": 0.5286, "step": 12559 }, { "epoch": 0.97, "grad_norm": 1.16510285860732, "learning_rate": 3.435740253459052e-08, "loss": 0.5068, "step": 12560 }, { "epoch": 0.97, "grad_norm": 1.14552285004849, "learning_rate": 3.41496097634475e-08, "loss": 0.4835, "step": 12561 }, { "epoch": 0.97, "grad_norm": 1.308576486957611, "learning_rate": 3.3942446187857915e-08, "loss": 0.6124, "step": 12562 }, { "epoch": 0.97, "grad_norm": 1.180858611807922, "learning_rate": 3.373591182089797e-08, "loss": 0.5003, "step": 12563 }, { "epoch": 0.97, "grad_norm": 1.2974452867530155, "learning_rate": 3.353000667560946e-08, "loss": 0.4998, "step": 12564 }, { "epoch": 0.97, "grad_norm": 1.1624007172386073, "learning_rate": 3.3324730764991985e-08, "loss": 0.4966, "step": 12565 }, { "epoch": 0.97, "grad_norm": 1.1742607246251906, "learning_rate": 3.312008410200518e-08, "loss": 0.4783, "step": 12566 }, { "epoch": 0.97, "grad_norm": 1.3040481645576383, "learning_rate": 3.2916066699570926e-08, "loss": 0.5205, "step": 12567 }, { "epoch": 0.98, "grad_norm": 1.083866092311369, "learning_rate": 3.271267857056781e-08, "loss": 0.5162, "step": 12568 }, { "epoch": 0.98, "grad_norm": 1.1746480556724765, "learning_rate": 3.250991972783779e-08, "loss": 0.449, "step": 12569 }, { "epoch": 0.98, "grad_norm": 1.3303790095242969, "learning_rate": 3.230779018418284e-08, "loss": 0.5377, "step": 12570 }, { "epoch": 0.98, "grad_norm": 1.2407941861421647, "learning_rate": 3.210628995236276e-08, "loss": 0.5397, "step": 12571 }, { "epoch": 0.98, "grad_norm": 1.0852404765651726, "learning_rate": 3.190541904510069e-08, "loss": 0.4063, "step": 12572 }, { "epoch": 0.98, "grad_norm": 1.2172157705449325, "learning_rate": 3.170517747507762e-08, "loss": 0.4855, "step": 12573 }, { "epoch": 0.98, "grad_norm": 1.1867336008575373, "learning_rate": 3.1505565254936755e-08, "loss": 0.5503, "step": 12574 }, { "epoch": 0.98, "grad_norm": 1.2104968838310863, "learning_rate": 3.130658239728024e-08, "loss": 0.5343, "step": 12575 }, { "epoch": 0.98, "grad_norm": 1.0946370478706835, "learning_rate": 3.1108228914670245e-08, "loss": 0.4572, "step": 12576 }, { "epoch": 0.98, "grad_norm": 1.2518267634712072, "learning_rate": 3.0910504819631205e-08, "loss": 0.5355, "step": 12577 }, { "epoch": 0.98, "grad_norm": 1.2287984986304756, "learning_rate": 3.071341012464535e-08, "loss": 0.5213, "step": 12578 }, { "epoch": 0.98, "grad_norm": 1.3161426585916085, "learning_rate": 3.051694484215717e-08, "loss": 0.5149, "step": 12579 }, { "epoch": 0.98, "grad_norm": 1.2385400447508659, "learning_rate": 3.032110898457008e-08, "loss": 0.4832, "step": 12580 }, { "epoch": 0.98, "grad_norm": 1.2257748644378619, "learning_rate": 3.012590256424752e-08, "loss": 0.5006, "step": 12581 }, { "epoch": 0.98, "grad_norm": 1.2571520759736823, "learning_rate": 2.993132559351519e-08, "loss": 0.5157, "step": 12582 }, { "epoch": 0.98, "grad_norm": 1.3422654845349395, "learning_rate": 2.9737378084656597e-08, "loss": 0.4989, "step": 12583 }, { "epoch": 0.98, "grad_norm": 1.1401826183547275, "learning_rate": 2.954406004991639e-08, "loss": 0.475, "step": 12584 }, { "epoch": 0.98, "grad_norm": 1.20654203012724, "learning_rate": 2.935137150150147e-08, "loss": 0.4997, "step": 12585 }, { "epoch": 0.98, "grad_norm": 1.1993375837364884, "learning_rate": 2.915931245157544e-08, "loss": 0.5235, "step": 12586 }, { "epoch": 0.98, "grad_norm": 1.154345051491068, "learning_rate": 2.8967882912265265e-08, "loss": 0.4759, "step": 12587 }, { "epoch": 0.98, "grad_norm": 1.212403769459618, "learning_rate": 2.8777082895656837e-08, "loss": 0.4856, "step": 12588 }, { "epoch": 0.98, "grad_norm": 1.2418752793732681, "learning_rate": 2.8586912413794966e-08, "loss": 0.5041, "step": 12589 }, { "epoch": 0.98, "grad_norm": 1.3622836859940326, "learning_rate": 2.8397371478687818e-08, "loss": 0.5332, "step": 12590 }, { "epoch": 0.98, "grad_norm": 1.1319309356028986, "learning_rate": 2.820846010230138e-08, "loss": 0.4785, "step": 12591 }, { "epoch": 0.98, "grad_norm": 1.2693726895422595, "learning_rate": 2.8020178296562784e-08, "loss": 0.5224, "step": 12592 }, { "epoch": 0.98, "grad_norm": 1.3295500346937035, "learning_rate": 2.783252607335807e-08, "loss": 0.5083, "step": 12593 }, { "epoch": 0.98, "grad_norm": 1.1587727321629475, "learning_rate": 2.764550344453554e-08, "loss": 0.507, "step": 12594 }, { "epoch": 0.98, "grad_norm": 1.1748750052015278, "learning_rate": 2.7459110421903524e-08, "loss": 0.5074, "step": 12595 }, { "epoch": 0.98, "grad_norm": 1.128433709424777, "learning_rate": 2.727334701722928e-08, "loss": 0.4599, "step": 12596 }, { "epoch": 0.98, "grad_norm": 1.2057224325674472, "learning_rate": 2.70882132422412e-08, "loss": 0.4755, "step": 12597 }, { "epoch": 0.98, "grad_norm": 1.1270810588501117, "learning_rate": 2.6903709108627718e-08, "loss": 0.4236, "step": 12598 }, { "epoch": 0.98, "grad_norm": 1.3004548634194812, "learning_rate": 2.6719834628037287e-08, "loss": 0.5096, "step": 12599 }, { "epoch": 0.98, "grad_norm": 1.3358761065023312, "learning_rate": 2.6536589812079517e-08, "loss": 0.5493, "step": 12600 }, { "epoch": 0.98, "grad_norm": 1.0738701827054769, "learning_rate": 2.6353974672322923e-08, "loss": 0.482, "step": 12601 }, { "epoch": 0.98, "grad_norm": 1.2054185680097944, "learning_rate": 2.6171989220297177e-08, "loss": 0.4911, "step": 12602 }, { "epoch": 0.98, "grad_norm": 1.2355684233290045, "learning_rate": 2.5990633467491976e-08, "loss": 0.4885, "step": 12603 }, { "epoch": 0.98, "grad_norm": 1.119647272544111, "learning_rate": 2.580990742535705e-08, "loss": 0.4366, "step": 12604 }, { "epoch": 0.98, "grad_norm": 1.04430067189396, "learning_rate": 2.562981110530216e-08, "loss": 0.463, "step": 12605 }, { "epoch": 0.98, "grad_norm": 1.1836326265402604, "learning_rate": 2.545034451869821e-08, "loss": 0.5025, "step": 12606 }, { "epoch": 0.98, "grad_norm": 1.3094176019188473, "learning_rate": 2.5271507676877248e-08, "loss": 0.5762, "step": 12607 }, { "epoch": 0.98, "grad_norm": 1.229317068607476, "learning_rate": 2.5093300591128023e-08, "loss": 0.5105, "step": 12608 }, { "epoch": 0.98, "grad_norm": 1.1557713626254547, "learning_rate": 2.4915723272702642e-08, "loss": 0.4225, "step": 12609 }, { "epoch": 0.98, "grad_norm": 1.2488192226556767, "learning_rate": 2.4738775732812138e-08, "loss": 0.4733, "step": 12610 }, { "epoch": 0.98, "grad_norm": 1.272525765208751, "learning_rate": 2.4562457982628683e-08, "loss": 0.5141, "step": 12611 }, { "epoch": 0.98, "grad_norm": 1.196111075726186, "learning_rate": 2.438677003328338e-08, "loss": 0.5235, "step": 12612 }, { "epoch": 0.98, "grad_norm": 1.1794870976886063, "learning_rate": 2.4211711895868462e-08, "loss": 0.4887, "step": 12613 }, { "epoch": 0.98, "grad_norm": 1.168045653806355, "learning_rate": 2.403728358143731e-08, "loss": 0.45, "step": 12614 }, { "epoch": 0.98, "grad_norm": 1.1773530206650702, "learning_rate": 2.3863485101001114e-08, "loss": 0.483, "step": 12615 }, { "epoch": 0.98, "grad_norm": 1.14599396128836, "learning_rate": 2.3690316465533325e-08, "loss": 0.4959, "step": 12616 }, { "epoch": 0.98, "grad_norm": 1.252387056416989, "learning_rate": 2.3517777685966305e-08, "loss": 0.5047, "step": 12617 }, { "epoch": 0.98, "grad_norm": 1.2420292879512145, "learning_rate": 2.334586877319467e-08, "loss": 0.5223, "step": 12618 }, { "epoch": 0.98, "grad_norm": 1.1678464181173436, "learning_rate": 2.3174589738070853e-08, "loss": 0.4807, "step": 12619 }, { "epoch": 0.98, "grad_norm": 1.2828047088869678, "learning_rate": 2.3003940591408425e-08, "loss": 0.4784, "step": 12620 }, { "epoch": 0.98, "grad_norm": 1.144458911679828, "learning_rate": 2.28339213439821e-08, "loss": 0.5012, "step": 12621 }, { "epoch": 0.98, "grad_norm": 1.2144344903860433, "learning_rate": 2.266453200652552e-08, "loss": 0.4906, "step": 12622 }, { "epoch": 0.98, "grad_norm": 1.3063654816608978, "learning_rate": 2.2495772589733456e-08, "loss": 0.5064, "step": 12623 }, { "epoch": 0.98, "grad_norm": 1.2055968122019955, "learning_rate": 2.232764310426072e-08, "loss": 0.5038, "step": 12624 }, { "epoch": 0.98, "grad_norm": 1.242079244250955, "learning_rate": 2.2160143560721048e-08, "loss": 0.4767, "step": 12625 }, { "epoch": 0.98, "grad_norm": 1.3478167009896238, "learning_rate": 2.1993273969691532e-08, "loss": 0.5037, "step": 12626 }, { "epoch": 0.98, "grad_norm": 1.0968958165293736, "learning_rate": 2.1827034341704855e-08, "loss": 0.4541, "step": 12627 }, { "epoch": 0.98, "grad_norm": 1.2237881427154937, "learning_rate": 2.166142468725929e-08, "loss": 0.5013, "step": 12628 }, { "epoch": 0.98, "grad_norm": 1.1549132068440089, "learning_rate": 2.1496445016809808e-08, "loss": 0.4535, "step": 12629 }, { "epoch": 0.98, "grad_norm": 1.1663659877688526, "learning_rate": 2.133209534077141e-08, "loss": 0.4821, "step": 12630 }, { "epoch": 0.98, "grad_norm": 1.3071388013406051, "learning_rate": 2.1168375669521346e-08, "loss": 0.5582, "step": 12631 }, { "epoch": 0.98, "grad_norm": 1.2073892599468885, "learning_rate": 2.1005286013394688e-08, "loss": 0.4629, "step": 12632 }, { "epoch": 0.98, "grad_norm": 1.2083539906193934, "learning_rate": 2.0842826382689864e-08, "loss": 0.4557, "step": 12633 }, { "epoch": 0.98, "grad_norm": 1.154444806064512, "learning_rate": 2.068099678766311e-08, "loss": 0.4534, "step": 12634 }, { "epoch": 0.98, "grad_norm": 1.2691488306232885, "learning_rate": 2.0519797238531813e-08, "loss": 0.5131, "step": 12635 }, { "epoch": 0.98, "grad_norm": 1.1546102701874512, "learning_rate": 2.0359227745472278e-08, "loss": 0.4635, "step": 12636 }, { "epoch": 0.98, "grad_norm": 1.22282886200742, "learning_rate": 2.0199288318624165e-08, "loss": 0.5263, "step": 12637 }, { "epoch": 0.98, "grad_norm": 1.2637600757412284, "learning_rate": 2.003997896808274e-08, "loss": 0.513, "step": 12638 }, { "epoch": 0.98, "grad_norm": 1.3187990260841724, "learning_rate": 1.988129970390773e-08, "loss": 0.4691, "step": 12639 }, { "epoch": 0.98, "grad_norm": 1.220886363200298, "learning_rate": 1.972325053611779e-08, "loss": 0.4486, "step": 12640 }, { "epoch": 0.98, "grad_norm": 1.1272970696167466, "learning_rate": 1.9565831474689377e-08, "loss": 0.5075, "step": 12641 }, { "epoch": 0.98, "grad_norm": 1.2195276078262047, "learning_rate": 1.9409042529562327e-08, "loss": 0.4991, "step": 12642 }, { "epoch": 0.98, "grad_norm": 1.165288548324413, "learning_rate": 1.9252883710635383e-08, "loss": 0.5002, "step": 12643 }, { "epoch": 0.98, "grad_norm": 1.257544163104745, "learning_rate": 1.909735502776844e-08, "loss": 0.563, "step": 12644 }, { "epoch": 0.98, "grad_norm": 1.1696889196413247, "learning_rate": 1.8942456490780305e-08, "loss": 0.4523, "step": 12645 }, { "epoch": 0.98, "grad_norm": 1.3571724861538996, "learning_rate": 1.8788188109449822e-08, "loss": 0.5102, "step": 12646 }, { "epoch": 0.98, "grad_norm": 1.232620247474563, "learning_rate": 1.8634549893516983e-08, "loss": 0.4757, "step": 12647 }, { "epoch": 0.98, "grad_norm": 1.3154257453570524, "learning_rate": 1.8481541852682917e-08, "loss": 0.5532, "step": 12648 }, { "epoch": 0.98, "grad_norm": 1.1616027240495295, "learning_rate": 1.832916399660656e-08, "loss": 0.4633, "step": 12649 }, { "epoch": 0.98, "grad_norm": 1.192816877909245, "learning_rate": 1.8177416334907995e-08, "loss": 0.5134, "step": 12650 }, { "epoch": 0.98, "grad_norm": 1.1525956218050522, "learning_rate": 1.8026298877169557e-08, "loss": 0.4664, "step": 12651 }, { "epoch": 0.98, "grad_norm": 1.1901307329124517, "learning_rate": 1.7875811632930285e-08, "loss": 0.4796, "step": 12652 }, { "epoch": 0.98, "grad_norm": 1.1985796489738958, "learning_rate": 1.7725954611692577e-08, "loss": 0.4977, "step": 12653 }, { "epoch": 0.98, "grad_norm": 1.2222830108254599, "learning_rate": 1.757672782291775e-08, "loss": 0.4819, "step": 12654 }, { "epoch": 0.98, "grad_norm": 1.142257097579135, "learning_rate": 1.742813127602605e-08, "loss": 0.5035, "step": 12655 }, { "epoch": 0.98, "grad_norm": 1.11314546526645, "learning_rate": 1.728016498039886e-08, "loss": 0.4671, "step": 12656 }, { "epoch": 0.98, "grad_norm": 1.2919311509019973, "learning_rate": 1.713282894537982e-08, "loss": 0.5232, "step": 12657 }, { "epoch": 0.98, "grad_norm": 1.1539355646850773, "learning_rate": 1.6986123180270376e-08, "loss": 0.4443, "step": 12658 }, { "epoch": 0.98, "grad_norm": 1.2444507924884904, "learning_rate": 1.6840047694332007e-08, "loss": 0.4721, "step": 12659 }, { "epoch": 0.98, "grad_norm": 1.2600276704050837, "learning_rate": 1.6694602496788447e-08, "loss": 0.481, "step": 12660 }, { "epoch": 0.98, "grad_norm": 1.1196517975268374, "learning_rate": 1.6549787596821242e-08, "loss": 0.4641, "step": 12661 }, { "epoch": 0.98, "grad_norm": 1.2072081698534942, "learning_rate": 1.640560300357308e-08, "loss": 0.5326, "step": 12662 }, { "epoch": 0.98, "grad_norm": 1.2229784946094853, "learning_rate": 1.6262048726148894e-08, "loss": 0.5128, "step": 12663 }, { "epoch": 0.98, "grad_norm": 1.297595134400607, "learning_rate": 1.6119124773610328e-08, "loss": 0.4879, "step": 12664 }, { "epoch": 0.98, "grad_norm": 1.3088338147772178, "learning_rate": 1.5976831154981275e-08, "loss": 0.4948, "step": 12665 }, { "epoch": 0.98, "grad_norm": 1.2027569988287352, "learning_rate": 1.583516787924566e-08, "loss": 0.4992, "step": 12666 }, { "epoch": 0.98, "grad_norm": 1.211887946450703, "learning_rate": 1.569413495534744e-08, "loss": 0.5026, "step": 12667 }, { "epoch": 0.98, "grad_norm": 1.212820299846413, "learning_rate": 1.5553732392191712e-08, "loss": 0.5573, "step": 12668 }, { "epoch": 0.98, "grad_norm": 1.1885641750619365, "learning_rate": 1.5413960198641388e-08, "loss": 0.511, "step": 12669 }, { "epoch": 0.98, "grad_norm": 1.283391140383113, "learning_rate": 1.527481838352052e-08, "loss": 0.525, "step": 12670 }, { "epoch": 0.98, "grad_norm": 1.1577687470152433, "learning_rate": 1.513630695561541e-08, "loss": 0.5019, "step": 12671 }, { "epoch": 0.98, "grad_norm": 1.1802811802432251, "learning_rate": 1.499842592367018e-08, "loss": 0.4751, "step": 12672 }, { "epoch": 0.98, "grad_norm": 1.184738763592111, "learning_rate": 1.4861175296390084e-08, "loss": 0.4256, "step": 12673 }, { "epoch": 0.98, "grad_norm": 1.1066242554783943, "learning_rate": 1.4724555082441528e-08, "loss": 0.4851, "step": 12674 }, { "epoch": 0.98, "grad_norm": 1.2692062666224042, "learning_rate": 1.458856529044872e-08, "loss": 0.5065, "step": 12675 }, { "epoch": 0.98, "grad_norm": 1.233754738563783, "learning_rate": 1.4453205928997016e-08, "loss": 0.5212, "step": 12676 }, { "epoch": 0.98, "grad_norm": 1.2134549460580195, "learning_rate": 1.4318477006632914e-08, "loss": 0.5174, "step": 12677 }, { "epoch": 0.98, "grad_norm": 1.1839502886601159, "learning_rate": 1.418437853186294e-08, "loss": 0.488, "step": 12678 }, { "epoch": 0.98, "grad_norm": 1.243352283933351, "learning_rate": 1.405091051315366e-08, "loss": 0.4615, "step": 12679 }, { "epoch": 0.98, "grad_norm": 1.2655166297009866, "learning_rate": 1.3918072958931662e-08, "loss": 0.4869, "step": 12680 }, { "epoch": 0.98, "grad_norm": 1.2254379851869046, "learning_rate": 1.3785865877581351e-08, "loss": 0.4764, "step": 12681 }, { "epoch": 0.98, "grad_norm": 1.2155500609533092, "learning_rate": 1.3654289277452715e-08, "loss": 0.4928, "step": 12682 }, { "epoch": 0.98, "grad_norm": 1.2407765562561746, "learning_rate": 1.3523343166851332e-08, "loss": 0.5097, "step": 12683 }, { "epoch": 0.98, "grad_norm": 1.2287690548665986, "learning_rate": 1.3393027554045035e-08, "loss": 0.4988, "step": 12684 }, { "epoch": 0.98, "grad_norm": 1.2577501364465047, "learning_rate": 1.3263342447260575e-08, "loss": 0.455, "step": 12685 }, { "epoch": 0.98, "grad_norm": 1.304038154599678, "learning_rate": 1.3134287854685846e-08, "loss": 0.4923, "step": 12686 }, { "epoch": 0.98, "grad_norm": 1.106439602223515, "learning_rate": 1.3005863784468774e-08, "loss": 0.4866, "step": 12687 }, { "epoch": 0.98, "grad_norm": 1.2633800611854116, "learning_rate": 1.2878070244718433e-08, "loss": 0.5417, "step": 12688 }, { "epoch": 0.98, "grad_norm": 1.186966776340299, "learning_rate": 1.2750907243501698e-08, "loss": 0.4708, "step": 12689 }, { "epoch": 0.98, "grad_norm": 1.2494317193488416, "learning_rate": 1.2624374788848814e-08, "loss": 0.4961, "step": 12690 }, { "epoch": 0.98, "grad_norm": 1.3156602596532554, "learning_rate": 1.2498472888745616e-08, "loss": 0.4772, "step": 12691 }, { "epoch": 0.98, "grad_norm": 1.2698992386592534, "learning_rate": 1.2373201551143521e-08, "loss": 0.5202, "step": 12692 }, { "epoch": 0.98, "grad_norm": 1.1804663001084925, "learning_rate": 1.2248560783950646e-08, "loss": 0.5111, "step": 12693 }, { "epoch": 0.98, "grad_norm": 1.2526284239938883, "learning_rate": 1.2124550595036255e-08, "loss": 0.5367, "step": 12694 }, { "epoch": 0.98, "grad_norm": 1.2306195363494494, "learning_rate": 1.2001170992228528e-08, "loss": 0.4877, "step": 12695 }, { "epoch": 0.98, "grad_norm": 1.177407746072349, "learning_rate": 1.187842198331901e-08, "loss": 0.5063, "step": 12696 }, { "epoch": 0.99, "grad_norm": 1.181903248756463, "learning_rate": 1.175630357605706e-08, "loss": 0.4957, "step": 12697 }, { "epoch": 0.99, "grad_norm": 1.109665523688508, "learning_rate": 1.1634815778150954e-08, "loss": 0.5012, "step": 12698 }, { "epoch": 0.99, "grad_norm": 1.1468266198870443, "learning_rate": 1.1513958597273445e-08, "loss": 0.4636, "step": 12699 }, { "epoch": 0.99, "grad_norm": 1.3306020637423956, "learning_rate": 1.1393732041052874e-08, "loss": 0.5574, "step": 12700 }, { "epoch": 0.99, "grad_norm": 1.231717593095607, "learning_rate": 1.1274136117080946e-08, "loss": 0.501, "step": 12701 }, { "epoch": 0.99, "grad_norm": 1.2513176172452023, "learning_rate": 1.115517083290718e-08, "loss": 0.5012, "step": 12702 }, { "epoch": 0.99, "grad_norm": 1.1676995723145023, "learning_rate": 1.1036836196043344e-08, "loss": 0.496, "step": 12703 }, { "epoch": 0.99, "grad_norm": 1.2389142557482795, "learning_rate": 1.0919132213960126e-08, "loss": 0.5206, "step": 12704 }, { "epoch": 0.99, "grad_norm": 1.1331752327895728, "learning_rate": 1.0802058894089363e-08, "loss": 0.4722, "step": 12705 }, { "epoch": 0.99, "grad_norm": 1.1956660925767462, "learning_rate": 1.0685616243821806e-08, "loss": 0.4979, "step": 12706 }, { "epoch": 0.99, "grad_norm": 1.2624269747623686, "learning_rate": 1.0569804270509354e-08, "loss": 0.5209, "step": 12707 }, { "epoch": 0.99, "grad_norm": 1.2372739527617718, "learning_rate": 1.0454622981463935e-08, "loss": 0.5085, "step": 12708 }, { "epoch": 0.99, "grad_norm": 1.4619405638306442, "learning_rate": 1.034007238395751e-08, "loss": 0.5298, "step": 12709 }, { "epoch": 0.99, "grad_norm": 1.2525100778414118, "learning_rate": 1.0226152485222073e-08, "loss": 0.4629, "step": 12710 }, { "epoch": 0.99, "grad_norm": 1.2862428286457241, "learning_rate": 1.0112863292450758e-08, "loss": 0.5394, "step": 12711 }, { "epoch": 0.99, "grad_norm": 1.0701279864016118, "learning_rate": 1.0000204812794511e-08, "loss": 0.4706, "step": 12712 }, { "epoch": 0.99, "grad_norm": 1.163185608610436, "learning_rate": 9.888177053367642e-09, "loss": 0.4616, "step": 12713 }, { "epoch": 0.99, "grad_norm": 1.2559422395934243, "learning_rate": 9.776780021241161e-09, "loss": 0.524, "step": 12714 }, { "epoch": 0.99, "grad_norm": 1.1807718409210963, "learning_rate": 9.666013723450552e-09, "loss": 0.4642, "step": 12715 }, { "epoch": 0.99, "grad_norm": 1.314801106296906, "learning_rate": 9.555878166987998e-09, "loss": 0.525, "step": 12716 }, { "epoch": 0.99, "grad_norm": 1.2418551210253237, "learning_rate": 9.446373358805716e-09, "loss": 0.5123, "step": 12717 }, { "epoch": 0.99, "grad_norm": 1.2614900836471672, "learning_rate": 9.337499305819287e-09, "loss": 0.499, "step": 12718 }, { "epoch": 0.99, "grad_norm": 1.4273683676348017, "learning_rate": 9.229256014900989e-09, "loss": 0.5802, "step": 12719 }, { "epoch": 0.99, "grad_norm": 1.1766008327769735, "learning_rate": 9.121643492885358e-09, "loss": 0.4721, "step": 12720 }, { "epoch": 0.99, "grad_norm": 1.1745903599693428, "learning_rate": 9.014661746566954e-09, "loss": 0.4524, "step": 12721 }, { "epoch": 0.99, "grad_norm": 1.348019139179454, "learning_rate": 8.90831078269927e-09, "loss": 0.5121, "step": 12722 }, { "epoch": 0.99, "grad_norm": 1.301335809390326, "learning_rate": 8.80259060799804e-09, "loss": 0.5051, "step": 12723 }, { "epoch": 0.99, "grad_norm": 1.1327991353266051, "learning_rate": 8.697501229135708e-09, "loss": 0.4515, "step": 12724 }, { "epoch": 0.99, "grad_norm": 1.1477310706795962, "learning_rate": 8.593042652749184e-09, "loss": 0.4624, "step": 12725 }, { "epoch": 0.99, "grad_norm": 1.3856188213122878, "learning_rate": 8.489214885433195e-09, "loss": 0.5233, "step": 12726 }, { "epoch": 0.99, "grad_norm": 1.2141359972216665, "learning_rate": 8.386017933741386e-09, "loss": 0.4808, "step": 12727 }, { "epoch": 0.99, "grad_norm": 1.2309942183122504, "learning_rate": 8.283451804190767e-09, "loss": 0.5559, "step": 12728 }, { "epoch": 0.99, "grad_norm": 1.1809277614687603, "learning_rate": 8.18151650325616e-09, "loss": 0.4801, "step": 12729 }, { "epoch": 0.99, "grad_norm": 1.3661456252677353, "learning_rate": 8.080212037374636e-09, "loss": 0.54, "step": 12730 }, { "epoch": 0.99, "grad_norm": 1.2866423111094476, "learning_rate": 7.97953841293997e-09, "loss": 0.5094, "step": 12731 }, { "epoch": 0.99, "grad_norm": 1.2295635951048012, "learning_rate": 7.879495636308188e-09, "loss": 0.5078, "step": 12732 }, { "epoch": 0.99, "grad_norm": 1.2228714141184835, "learning_rate": 7.78008371379757e-09, "loss": 0.4787, "step": 12733 }, { "epoch": 0.99, "grad_norm": 1.2432130622646405, "learning_rate": 7.681302651683098e-09, "loss": 0.5176, "step": 12734 }, { "epoch": 0.99, "grad_norm": 1.2042672815934001, "learning_rate": 7.583152456200892e-09, "loss": 0.4908, "step": 12735 }, { "epoch": 0.99, "grad_norm": 1.2644028590890768, "learning_rate": 7.485633133549331e-09, "loss": 0.5024, "step": 12736 }, { "epoch": 0.99, "grad_norm": 1.2938829860431587, "learning_rate": 7.3887446898834865e-09, "loss": 0.4993, "step": 12737 }, { "epoch": 0.99, "grad_norm": 1.3347526784106538, "learning_rate": 7.292487131321802e-09, "loss": 0.5021, "step": 12738 }, { "epoch": 0.99, "grad_norm": 1.1829435873019603, "learning_rate": 7.1968604639416354e-09, "loss": 0.4752, "step": 12739 }, { "epoch": 0.99, "grad_norm": 1.2377566119691414, "learning_rate": 7.1018646937781596e-09, "loss": 0.4571, "step": 12740 }, { "epoch": 0.99, "grad_norm": 1.228612316796155, "learning_rate": 7.007499826832132e-09, "loss": 0.4611, "step": 12741 }, { "epoch": 0.99, "grad_norm": 1.2366804009743393, "learning_rate": 6.913765869058786e-09, "loss": 0.4754, "step": 12742 }, { "epoch": 0.99, "grad_norm": 1.182124973370936, "learning_rate": 6.820662826376723e-09, "loss": 0.5172, "step": 12743 }, { "epoch": 0.99, "grad_norm": 1.2616431624030153, "learning_rate": 6.728190704664572e-09, "loss": 0.4868, "step": 12744 }, { "epoch": 0.99, "grad_norm": 1.230429754698932, "learning_rate": 6.636349509760997e-09, "loss": 0.5769, "step": 12745 }, { "epoch": 0.99, "grad_norm": 1.1692607457236275, "learning_rate": 6.545139247462473e-09, "loss": 0.4374, "step": 12746 }, { "epoch": 0.99, "grad_norm": 1.1358010694175988, "learning_rate": 6.454559923529946e-09, "loss": 0.4582, "step": 12747 }, { "epoch": 0.99, "grad_norm": 1.2873372873245224, "learning_rate": 6.364611543679955e-09, "loss": 0.5132, "step": 12748 }, { "epoch": 0.99, "grad_norm": 1.331729172268932, "learning_rate": 6.275294113592401e-09, "loss": 0.535, "step": 12749 }, { "epoch": 0.99, "grad_norm": 1.1422743695123125, "learning_rate": 6.186607638907216e-09, "loss": 0.4593, "step": 12750 }, { "epoch": 0.99, "grad_norm": 1.2191683222476324, "learning_rate": 6.098552125222146e-09, "loss": 0.516, "step": 12751 }, { "epoch": 0.99, "grad_norm": 1.1540944394717993, "learning_rate": 6.0111275780971865e-09, "loss": 0.4492, "step": 12752 }, { "epoch": 0.99, "grad_norm": 1.2025172692722335, "learning_rate": 5.924334003052368e-09, "loss": 0.5226, "step": 12753 }, { "epoch": 0.99, "grad_norm": 1.2681666625780705, "learning_rate": 5.838171405566639e-09, "loss": 0.5018, "step": 12754 }, { "epoch": 0.99, "grad_norm": 1.0935665521729219, "learning_rate": 5.752639791080095e-09, "loss": 0.4704, "step": 12755 }, { "epoch": 0.99, "grad_norm": 1.1377924930832464, "learning_rate": 5.667739164993968e-09, "loss": 0.4579, "step": 12756 }, { "epoch": 0.99, "grad_norm": 1.3449772398203494, "learning_rate": 5.583469532666197e-09, "loss": 0.4832, "step": 12757 }, { "epoch": 0.99, "grad_norm": 1.1875041660436534, "learning_rate": 5.49983089941919e-09, "loss": 0.4417, "step": 12758 }, { "epoch": 0.99, "grad_norm": 1.1745423034253408, "learning_rate": 5.416823270532057e-09, "loss": 0.495, "step": 12759 }, { "epoch": 0.99, "grad_norm": 1.2282987806532002, "learning_rate": 5.334446651246161e-09, "loss": 0.487, "step": 12760 }, { "epoch": 0.99, "grad_norm": 1.342449889997135, "learning_rate": 5.252701046762898e-09, "loss": 0.5254, "step": 12761 }, { "epoch": 0.99, "grad_norm": 1.2715543619281244, "learning_rate": 5.171586462242584e-09, "loss": 0.4865, "step": 12762 }, { "epoch": 0.99, "grad_norm": 1.094478745872354, "learning_rate": 5.091102902806677e-09, "loss": 0.4544, "step": 12763 }, { "epoch": 0.99, "grad_norm": 1.3019438808787112, "learning_rate": 5.011250373535559e-09, "loss": 0.4927, "step": 12764 }, { "epoch": 0.99, "grad_norm": 1.298966892258081, "learning_rate": 4.932028879472972e-09, "loss": 0.5164, "step": 12765 }, { "epoch": 0.99, "grad_norm": 1.1492545406906582, "learning_rate": 4.8534384256182506e-09, "loss": 0.4609, "step": 12766 }, { "epoch": 0.99, "grad_norm": 1.1557608936018389, "learning_rate": 4.775479016934092e-09, "loss": 0.5113, "step": 12767 }, { "epoch": 0.99, "grad_norm": 1.0462360424901878, "learning_rate": 4.698150658343226e-09, "loss": 0.4544, "step": 12768 }, { "epoch": 0.99, "grad_norm": 1.2507010877989286, "learning_rate": 4.621453354726191e-09, "loss": 0.5325, "step": 12769 }, { "epoch": 0.99, "grad_norm": 1.2383028142838413, "learning_rate": 4.545387110926891e-09, "loss": 0.4787, "step": 12770 }, { "epoch": 0.99, "grad_norm": 1.1392576827492746, "learning_rate": 4.4699519317459305e-09, "loss": 0.4477, "step": 12771 }, { "epoch": 0.99, "grad_norm": 1.2822288402760853, "learning_rate": 4.395147821948387e-09, "loss": 0.5515, "step": 12772 }, { "epoch": 0.99, "grad_norm": 1.2124480147868137, "learning_rate": 4.320974786254928e-09, "loss": 0.4641, "step": 12773 }, { "epoch": 0.99, "grad_norm": 1.2341956962057914, "learning_rate": 4.247432829349585e-09, "loss": 0.5244, "step": 12774 }, { "epoch": 0.99, "grad_norm": 1.2746961025762316, "learning_rate": 4.17452195587531e-09, "loss": 0.5264, "step": 12775 }, { "epoch": 0.99, "grad_norm": 1.1431327842918724, "learning_rate": 4.102242170435089e-09, "loss": 0.4609, "step": 12776 }, { "epoch": 0.99, "grad_norm": 1.2277499240105445, "learning_rate": 4.030593477591938e-09, "loss": 0.4699, "step": 12777 }, { "epoch": 0.99, "grad_norm": 1.207457087684253, "learning_rate": 3.959575881870015e-09, "loss": 0.4785, "step": 12778 }, { "epoch": 0.99, "grad_norm": 1.2098685684230215, "learning_rate": 3.889189387752401e-09, "loss": 0.477, "step": 12779 }, { "epoch": 0.99, "grad_norm": 1.1490477413322115, "learning_rate": 3.81943399968443e-09, "loss": 0.4662, "step": 12780 }, { "epoch": 0.99, "grad_norm": 1.15566826697599, "learning_rate": 3.750309722068135e-09, "loss": 0.4696, "step": 12781 }, { "epoch": 0.99, "grad_norm": 1.1612448674346205, "learning_rate": 3.6818165592689138e-09, "loss": 0.4977, "step": 12782 }, { "epoch": 0.99, "grad_norm": 1.32667039987304, "learning_rate": 3.6139545156110845e-09, "loss": 0.5464, "step": 12783 }, { "epoch": 0.99, "grad_norm": 1.1690986296927226, "learning_rate": 3.546723595378998e-09, "loss": 0.5062, "step": 12784 }, { "epoch": 0.99, "grad_norm": 1.242103237916961, "learning_rate": 3.480123802817037e-09, "loss": 0.4797, "step": 12785 }, { "epoch": 0.99, "grad_norm": 1.1727203371959878, "learning_rate": 3.4141551421296158e-09, "loss": 0.5447, "step": 12786 }, { "epoch": 0.99, "grad_norm": 1.2458725975168847, "learning_rate": 3.348817617483402e-09, "loss": 0.5028, "step": 12787 }, { "epoch": 0.99, "grad_norm": 1.3535245644630611, "learning_rate": 3.284111233000653e-09, "loss": 0.5261, "step": 12788 }, { "epoch": 0.99, "grad_norm": 1.2896032412979295, "learning_rate": 3.2200359927692105e-09, "loss": 0.476, "step": 12789 }, { "epoch": 0.99, "grad_norm": 1.1653521261845836, "learning_rate": 3.1565919008336164e-09, "loss": 0.5147, "step": 12790 }, { "epoch": 0.99, "grad_norm": 1.1364300023553127, "learning_rate": 3.093778961199556e-09, "loss": 0.4568, "step": 12791 }, { "epoch": 0.99, "grad_norm": 1.212805506964488, "learning_rate": 3.0315971778316354e-09, "loss": 0.4889, "step": 12792 }, { "epoch": 0.99, "grad_norm": 1.2480915760040547, "learning_rate": 2.9700465546567136e-09, "loss": 0.5756, "step": 12793 }, { "epoch": 0.99, "grad_norm": 1.2684883881497169, "learning_rate": 2.909127095560571e-09, "loss": 0.5234, "step": 12794 }, { "epoch": 0.99, "grad_norm": 1.1934599638905135, "learning_rate": 2.8488388043901303e-09, "loss": 0.475, "step": 12795 }, { "epoch": 0.99, "grad_norm": 1.2552156355818238, "learning_rate": 2.7891816849501264e-09, "loss": 0.4996, "step": 12796 }, { "epoch": 0.99, "grad_norm": 1.365215077625375, "learning_rate": 2.7301557410086554e-09, "loss": 0.5418, "step": 12797 }, { "epoch": 0.99, "grad_norm": 1.1764671884866285, "learning_rate": 2.671760976291626e-09, "loss": 0.4622, "step": 12798 }, { "epoch": 0.99, "grad_norm": 1.2295036285086227, "learning_rate": 2.6139973944849795e-09, "loss": 0.469, "step": 12799 }, { "epoch": 0.99, "grad_norm": 1.1373493765571152, "learning_rate": 2.556864999236908e-09, "loss": 0.5109, "step": 12800 }, { "epoch": 0.99, "grad_norm": 1.1620364382639425, "learning_rate": 2.5003637941534153e-09, "loss": 0.4847, "step": 12801 }, { "epoch": 0.99, "grad_norm": 1.2873899764669479, "learning_rate": 2.4444937828027592e-09, "loss": 0.5283, "step": 12802 }, { "epoch": 0.99, "grad_norm": 1.285990018455841, "learning_rate": 2.389254968712118e-09, "loss": 0.4971, "step": 12803 }, { "epoch": 0.99, "grad_norm": 1.141489511282108, "learning_rate": 2.334647355368702e-09, "loss": 0.4104, "step": 12804 }, { "epoch": 0.99, "grad_norm": 1.2558244430406191, "learning_rate": 2.280670946219754e-09, "loss": 0.4653, "step": 12805 }, { "epoch": 0.99, "grad_norm": 1.2653039595715418, "learning_rate": 2.2273257446736586e-09, "loss": 0.4555, "step": 12806 }, { "epoch": 0.99, "grad_norm": 1.212344625683869, "learning_rate": 2.174611754097722e-09, "loss": 0.5237, "step": 12807 }, { "epoch": 0.99, "grad_norm": 1.325821033750812, "learning_rate": 2.122528977821503e-09, "loss": 0.5268, "step": 12808 }, { "epoch": 0.99, "grad_norm": 1.2797011340932254, "learning_rate": 2.071077419131262e-09, "loss": 0.5498, "step": 12809 }, { "epoch": 0.99, "grad_norm": 1.177253941462722, "learning_rate": 2.0202570812777323e-09, "loss": 0.4783, "step": 12810 }, { "epoch": 0.99, "grad_norm": 1.234470846583571, "learning_rate": 1.9700679674672373e-09, "loss": 0.5146, "step": 12811 }, { "epoch": 0.99, "grad_norm": 1.2242221737286103, "learning_rate": 1.9205100808694644e-09, "loss": 0.5137, "step": 12812 }, { "epoch": 0.99, "grad_norm": 1.185292249607657, "learning_rate": 1.8715834246130215e-09, "loss": 0.5357, "step": 12813 }, { "epoch": 0.99, "grad_norm": 1.2970198584854682, "learning_rate": 1.8232880017876597e-09, "loss": 0.5064, "step": 12814 }, { "epoch": 0.99, "grad_norm": 1.283950936137628, "learning_rate": 1.775623815440941e-09, "loss": 0.5173, "step": 12815 }, { "epoch": 0.99, "grad_norm": 1.1472506456238225, "learning_rate": 1.7285908685837904e-09, "loss": 0.4178, "step": 12816 }, { "epoch": 0.99, "grad_norm": 1.1838934490613877, "learning_rate": 1.6821891641860543e-09, "loss": 0.4775, "step": 12817 }, { "epoch": 0.99, "grad_norm": 1.3355287517736054, "learning_rate": 1.636418705174281e-09, "loss": 0.5806, "step": 12818 }, { "epoch": 0.99, "grad_norm": 1.2837279943197735, "learning_rate": 1.591279494441711e-09, "loss": 0.5243, "step": 12819 }, { "epoch": 0.99, "grad_norm": 1.258090871015429, "learning_rate": 1.5467715348360668e-09, "loss": 0.4976, "step": 12820 }, { "epoch": 0.99, "grad_norm": 1.1104370191663984, "learning_rate": 1.502894829167323e-09, "loss": 0.4364, "step": 12821 }, { "epoch": 0.99, "grad_norm": 1.2667553410527597, "learning_rate": 1.459649380207706e-09, "loss": 0.4724, "step": 12822 }, { "epoch": 0.99, "grad_norm": 1.313096274307284, "learning_rate": 1.4170351906850344e-09, "loss": 0.537, "step": 12823 }, { "epoch": 0.99, "grad_norm": 1.1776148290993629, "learning_rate": 1.3750522632915986e-09, "loss": 0.468, "step": 12824 }, { "epoch": 0.99, "grad_norm": 1.4211429765678127, "learning_rate": 1.3337006006763908e-09, "loss": 0.5084, "step": 12825 }, { "epoch": 1.0, "grad_norm": 1.0702948464378694, "learning_rate": 1.292980205451766e-09, "loss": 0.4506, "step": 12826 }, { "epoch": 1.0, "grad_norm": 1.3529927618590143, "learning_rate": 1.25289108018678e-09, "loss": 0.5111, "step": 12827 }, { "epoch": 1.0, "grad_norm": 1.3467819695085206, "learning_rate": 1.2134332274149619e-09, "loss": 0.5082, "step": 12828 }, { "epoch": 1.0, "grad_norm": 1.1463209617389374, "learning_rate": 1.1746066496243214e-09, "loss": 0.4773, "step": 12829 }, { "epoch": 1.0, "grad_norm": 1.2291410034000652, "learning_rate": 1.1364113492695617e-09, "loss": 0.4777, "step": 12830 }, { "epoch": 1.0, "grad_norm": 1.1750131930970078, "learning_rate": 1.0988473287598667e-09, "loss": 0.521, "step": 12831 }, { "epoch": 1.0, "grad_norm": 1.3122985321913427, "learning_rate": 1.061914590467783e-09, "loss": 0.5624, "step": 12832 }, { "epoch": 1.0, "grad_norm": 1.193020936515233, "learning_rate": 1.0256131367236688e-09, "loss": 0.5322, "step": 12833 }, { "epoch": 1.0, "grad_norm": 1.2552957412022692, "learning_rate": 9.899429698212448e-10, "loss": 0.4719, "step": 12834 }, { "epoch": 1.0, "grad_norm": 1.1478150945312688, "learning_rate": 9.549040920120433e-10, "loss": 0.4432, "step": 12835 }, { "epoch": 1.0, "grad_norm": 1.1489883451210987, "learning_rate": 9.204965055076287e-10, "loss": 0.49, "step": 12836 }, { "epoch": 1.0, "grad_norm": 1.2036015941369709, "learning_rate": 8.867202124818175e-10, "loss": 0.4963, "step": 12837 }, { "epoch": 1.0, "grad_norm": 1.1304358445277378, "learning_rate": 8.535752150651277e-10, "loss": 0.4901, "step": 12838 }, { "epoch": 1.0, "grad_norm": 1.2300702130816425, "learning_rate": 8.210615153503299e-10, "loss": 0.5489, "step": 12839 }, { "epoch": 1.0, "grad_norm": 1.1910347781544575, "learning_rate": 7.891791153924466e-10, "loss": 0.5424, "step": 12840 }, { "epoch": 1.0, "grad_norm": 1.1025158159155233, "learning_rate": 7.579280172020919e-10, "loss": 0.4601, "step": 12841 }, { "epoch": 1.0, "grad_norm": 1.13989950648964, "learning_rate": 7.273082227532424e-10, "loss": 0.5087, "step": 12842 }, { "epoch": 1.0, "grad_norm": 1.2908781731593217, "learning_rate": 6.97319733977686e-10, "loss": 0.5117, "step": 12843 }, { "epoch": 1.0, "grad_norm": 1.1354390180061806, "learning_rate": 6.679625527716837e-10, "loss": 0.4893, "step": 12844 }, { "epoch": 1.0, "grad_norm": 1.3138743425755401, "learning_rate": 6.392366809859773e-10, "loss": 0.5462, "step": 12845 }, { "epoch": 1.0, "grad_norm": 1.2061990381257566, "learning_rate": 6.111421204357814e-10, "loss": 0.4884, "step": 12846 }, { "epoch": 1.0, "grad_norm": 1.2542096302788721, "learning_rate": 5.836788728930121e-10, "loss": 0.5311, "step": 12847 }, { "epoch": 1.0, "grad_norm": 1.3202656393369088, "learning_rate": 5.568469400940579e-10, "loss": 0.5222, "step": 12848 }, { "epoch": 1.0, "grad_norm": 1.3227679652146, "learning_rate": 5.30646323730899e-10, "loss": 0.5325, "step": 12849 }, { "epoch": 1.0, "grad_norm": 1.2876945867063403, "learning_rate": 5.050770254588777e-10, "loss": 0.5751, "step": 12850 }, { "epoch": 1.0, "grad_norm": 1.187574032935247, "learning_rate": 4.801390468922584e-10, "loss": 0.4579, "step": 12851 }, { "epoch": 1.0, "grad_norm": 1.285203463258182, "learning_rate": 4.5583238960533736e-10, "loss": 0.5233, "step": 12852 }, { "epoch": 1.0, "grad_norm": 1.1133538473451257, "learning_rate": 4.321570551324428e-10, "loss": 0.4226, "step": 12853 }, { "epoch": 1.0, "grad_norm": 1.1663889838318557, "learning_rate": 4.091130449679348e-10, "loss": 0.4677, "step": 12854 }, { "epoch": 1.0, "grad_norm": 1.1724266788193352, "learning_rate": 3.8670036056731585e-10, "loss": 0.493, "step": 12855 }, { "epoch": 1.0, "grad_norm": 1.13859785568944, "learning_rate": 3.649190033461203e-10, "loss": 0.4663, "step": 12856 }, { "epoch": 1.0, "grad_norm": 1.2017358463883105, "learning_rate": 3.43768974677694e-10, "loss": 0.4671, "step": 12857 }, { "epoch": 1.0, "grad_norm": 1.3005809916172948, "learning_rate": 3.232502758998557e-10, "loss": 0.5257, "step": 12858 }, { "epoch": 1.0, "grad_norm": 1.1712508001497808, "learning_rate": 3.0336290830601523e-10, "loss": 0.4861, "step": 12859 }, { "epoch": 1.0, "grad_norm": 1.1428841466353052, "learning_rate": 2.8410687315294504e-10, "loss": 0.4977, "step": 12860 }, { "epoch": 1.0, "grad_norm": 1.3101388037287138, "learning_rate": 2.6548217165633937e-10, "loss": 0.5179, "step": 12861 }, { "epoch": 1.0, "grad_norm": 1.1900309144340138, "learning_rate": 2.474888049908142e-10, "loss": 0.4883, "step": 12862 }, { "epoch": 1.0, "grad_norm": 1.0761382552916974, "learning_rate": 2.3012677429323782e-10, "loss": 0.4621, "step": 12863 }, { "epoch": 1.0, "grad_norm": 1.1498276996075498, "learning_rate": 2.1339608066051064e-10, "loss": 0.5184, "step": 12864 }, { "epoch": 1.0, "grad_norm": 1.2162790436526083, "learning_rate": 1.9729672514845477e-10, "loss": 0.4903, "step": 12865 }, { "epoch": 1.0, "grad_norm": 1.364325050002346, "learning_rate": 1.81828708771814e-10, "loss": 0.5696, "step": 12866 }, { "epoch": 1.0, "grad_norm": 1.210383036020965, "learning_rate": 1.669920325098051e-10, "loss": 0.5131, "step": 12867 }, { "epoch": 1.0, "grad_norm": 1.2000195938735925, "learning_rate": 1.5278669729723582e-10, "loss": 0.4936, "step": 12868 }, { "epoch": 1.0, "grad_norm": 1.17990206352433, "learning_rate": 1.392127040322766e-10, "loss": 0.497, "step": 12869 }, { "epoch": 1.0, "grad_norm": 1.1404167403217738, "learning_rate": 1.2627005357090938e-10, "loss": 0.4577, "step": 12870 }, { "epoch": 1.0, "grad_norm": 1.2433683912194202, "learning_rate": 1.1395874673136853e-10, "loss": 0.5056, "step": 12871 }, { "epoch": 1.0, "grad_norm": 1.2172539160338953, "learning_rate": 1.0227878428969995e-10, "loss": 0.4555, "step": 12872 }, { "epoch": 1.0, "grad_norm": 1.2806800411938544, "learning_rate": 9.123016698309173e-11, "loss": 0.4979, "step": 12873 }, { "epoch": 1.0, "grad_norm": 1.2302234223442006, "learning_rate": 8.081289551098437e-11, "loss": 0.4998, "step": 12874 }, { "epoch": 1.0, "grad_norm": 1.265595094303915, "learning_rate": 7.102697052951968e-11, "loss": 0.4699, "step": 12875 }, { "epoch": 1.0, "grad_norm": 1.3867426050847587, "learning_rate": 6.187239265709188e-11, "loss": 0.5241, "step": 12876 }, { "epoch": 1.0, "grad_norm": 1.126374358869901, "learning_rate": 5.334916247212718e-11, "loss": 0.4993, "step": 12877 }, { "epoch": 1.0, "grad_norm": 1.2527189248455288, "learning_rate": 4.5457280510863286e-11, "loss": 0.5114, "step": 12878 }, { "epoch": 1.0, "grad_norm": 1.2873742810691888, "learning_rate": 3.8196747274010794e-11, "loss": 0.5341, "step": 12879 }, { "epoch": 1.0, "grad_norm": 1.073346480907056, "learning_rate": 3.1567563217871356e-11, "loss": 0.4682, "step": 12880 }, { "epoch": 1.0, "grad_norm": 1.216398121752131, "learning_rate": 2.5569728763219505e-11, "loss": 0.4824, "step": 12881 }, { "epoch": 1.0, "grad_norm": 1.237128506395995, "learning_rate": 2.0203244286420842e-11, "loss": 0.498, "step": 12882 }, { "epoch": 1.0, "grad_norm": 1.2252380606881614, "learning_rate": 1.546811012720362e-11, "loss": 0.5131, "step": 12883 }, { "epoch": 1.0, "grad_norm": 1.1420296681286557, "learning_rate": 1.1364326585328045e-11, "loss": 0.4729, "step": 12884 }, { "epoch": 1.0, "grad_norm": 1.1551170468989012, "learning_rate": 7.891893918365868e-12, "loss": 0.4921, "step": 12885 }, { "epoch": 1.0, "grad_norm": 1.5112252625112261, "learning_rate": 5.0508123472514655e-12, "loss": 0.4912, "step": 12886 }, { "epoch": 1.0, "grad_norm": 1.1867831726870262, "learning_rate": 2.841082049620525e-12, "loss": 0.5362, "step": 12887 }, { "epoch": 1.0, "grad_norm": 1.3153736807745786, "learning_rate": 1.2627031664713685e-12, "loss": 0.5467, "step": 12888 }, { "epoch": 1.0, "grad_norm": 1.3907085779341495, "learning_rate": 3.156757966138457e-13, "loss": 0.5349, "step": 12889 }, { "epoch": 1.0, "grad_norm": 1.3798019914655004, "learning_rate": 0.0, "loss": 0.4959, "step": 12890 }, { "epoch": 1.0, "step": 12890, "total_flos": 4.01818824385495e+16, "train_loss": 0.5671361568664931, "train_runtime": 271490.8709, "train_samples_per_second": 6.077, "train_steps_per_second": 0.047 } ], "logging_steps": 1.0, "max_steps": 12890, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 700, "total_flos": 4.01818824385495e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }