{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.498960498960499, "eval_steps": 500, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005544774050457444, "grad_norm": 1.7043471336364746, "learning_rate": 8.000000000000001e-07, "loss": 1.8725, "step": 1 }, { "epoch": 0.0011089548100914888, "grad_norm": 1.2943507432937622, "learning_rate": 1.6000000000000001e-06, "loss": 1.882, "step": 2 }, { "epoch": 0.0016634322151372332, "grad_norm": 1.2082455158233643, "learning_rate": 2.4000000000000003e-06, "loss": 1.8304, "step": 3 }, { "epoch": 0.0022179096201829776, "grad_norm": 0.9585644602775574, "learning_rate": 3.2000000000000003e-06, "loss": 1.8958, "step": 4 }, { "epoch": 0.0027723870252287217, "grad_norm": 1.1721417903900146, "learning_rate": 4.000000000000001e-06, "loss": 1.9696, "step": 5 }, { "epoch": 0.0033268644302744664, "grad_norm": 0.9636610150337219, "learning_rate": 4.800000000000001e-06, "loss": 1.8921, "step": 6 }, { "epoch": 0.0038813418353202105, "grad_norm": 1.1441367864608765, "learning_rate": 5.600000000000001e-06, "loss": 1.8852, "step": 7 }, { "epoch": 0.004435819240365955, "grad_norm": 0.7332591414451599, "learning_rate": 6.4000000000000006e-06, "loss": 1.8977, "step": 8 }, { "epoch": 0.0049902966454117, "grad_norm": 0.736126184463501, "learning_rate": 7.2000000000000005e-06, "loss": 1.8914, "step": 9 }, { "epoch": 0.0055447740504574435, "grad_norm": 0.5888351798057556, "learning_rate": 8.000000000000001e-06, "loss": 1.959, "step": 10 }, { "epoch": 0.006099251455503188, "grad_norm": 0.5506263971328735, "learning_rate": 8.8e-06, "loss": 1.8849, "step": 11 }, { "epoch": 0.006653728860548933, "grad_norm": 0.5089067220687866, "learning_rate": 9.600000000000001e-06, "loss": 1.8318, "step": 12 }, { "epoch": 0.007208206265594677, "grad_norm": 0.5495124459266663, "learning_rate": 1.04e-05, "loss": 1.8303, "step": 13 }, { "epoch": 0.007762683670640421, "grad_norm": 0.5372406840324402, "learning_rate": 1.1200000000000001e-05, "loss": 1.837, "step": 14 }, { "epoch": 0.008317161075686166, "grad_norm": 0.4969247877597809, "learning_rate": 1.2e-05, "loss": 1.8305, "step": 15 }, { "epoch": 0.00887163848073191, "grad_norm": 0.43892425298690796, "learning_rate": 1.2800000000000001e-05, "loss": 1.8299, "step": 16 }, { "epoch": 0.009426115885777655, "grad_norm": 0.4186934530735016, "learning_rate": 1.3600000000000002e-05, "loss": 1.8882, "step": 17 }, { "epoch": 0.0099805932908234, "grad_norm": 0.36783891916275024, "learning_rate": 1.4400000000000001e-05, "loss": 1.7997, "step": 18 }, { "epoch": 0.010535070695869144, "grad_norm": 0.3592284023761749, "learning_rate": 1.5200000000000002e-05, "loss": 1.7928, "step": 19 }, { "epoch": 0.011089548100914887, "grad_norm": 0.4156821072101593, "learning_rate": 1.6000000000000003e-05, "loss": 1.8382, "step": 20 }, { "epoch": 0.011644025505960632, "grad_norm": 0.36838528513908386, "learning_rate": 1.6800000000000002e-05, "loss": 1.765, "step": 21 }, { "epoch": 0.012198502911006376, "grad_norm": 0.4337383508682251, "learning_rate": 1.76e-05, "loss": 1.7961, "step": 22 }, { "epoch": 0.01275298031605212, "grad_norm": 0.34529271721839905, "learning_rate": 1.8400000000000003e-05, "loss": 1.7795, "step": 23 }, { "epoch": 0.013307457721097865, "grad_norm": 0.3445529341697693, "learning_rate": 1.9200000000000003e-05, "loss": 1.8074, "step": 24 }, { "epoch": 0.01386193512614361, "grad_norm": 0.32090720534324646, "learning_rate": 2e-05, "loss": 1.7261, "step": 25 }, { "epoch": 0.014416412531189355, "grad_norm": 0.30814656615257263, "learning_rate": 2.08e-05, "loss": 1.7856, "step": 26 }, { "epoch": 0.0149708899362351, "grad_norm": 0.3047591745853424, "learning_rate": 2.1600000000000003e-05, "loss": 1.7895, "step": 27 }, { "epoch": 0.015525367341280842, "grad_norm": 0.2949264347553253, "learning_rate": 2.2400000000000002e-05, "loss": 1.8077, "step": 28 }, { "epoch": 0.016079844746326587, "grad_norm": 0.2805996239185333, "learning_rate": 2.32e-05, "loss": 1.7806, "step": 29 }, { "epoch": 0.01663432215137233, "grad_norm": 0.2810855805873871, "learning_rate": 2.4e-05, "loss": 1.7969, "step": 30 }, { "epoch": 0.017188799556418076, "grad_norm": 0.2863365411758423, "learning_rate": 2.4800000000000003e-05, "loss": 1.8053, "step": 31 }, { "epoch": 0.01774327696146382, "grad_norm": 0.28471997380256653, "learning_rate": 2.5600000000000002e-05, "loss": 1.7866, "step": 32 }, { "epoch": 0.018297754366509565, "grad_norm": 0.2715449631214142, "learning_rate": 2.6400000000000005e-05, "loss": 1.7546, "step": 33 }, { "epoch": 0.01885223177155531, "grad_norm": 0.256256639957428, "learning_rate": 2.7200000000000004e-05, "loss": 1.7548, "step": 34 }, { "epoch": 0.019406709176601054, "grad_norm": 0.26935723423957825, "learning_rate": 2.8e-05, "loss": 1.7742, "step": 35 }, { "epoch": 0.0199611865816468, "grad_norm": 0.24071869254112244, "learning_rate": 2.8800000000000002e-05, "loss": 1.7903, "step": 36 }, { "epoch": 0.020515663986692544, "grad_norm": 0.26816120743751526, "learning_rate": 2.96e-05, "loss": 1.83, "step": 37 }, { "epoch": 0.021070141391738288, "grad_norm": 0.2523341774940491, "learning_rate": 3.0400000000000004e-05, "loss": 1.7478, "step": 38 }, { "epoch": 0.02162461879678403, "grad_norm": 0.25324249267578125, "learning_rate": 3.1200000000000006e-05, "loss": 1.763, "step": 39 }, { "epoch": 0.022179096201829774, "grad_norm": 0.2640393376350403, "learning_rate": 3.2000000000000005e-05, "loss": 1.7722, "step": 40 }, { "epoch": 0.02273357360687552, "grad_norm": 0.2618652880191803, "learning_rate": 3.28e-05, "loss": 1.693, "step": 41 }, { "epoch": 0.023288051011921263, "grad_norm": 0.2571040093898773, "learning_rate": 3.3600000000000004e-05, "loss": 1.6989, "step": 42 }, { "epoch": 0.023842528416967008, "grad_norm": 0.23714734613895416, "learning_rate": 3.44e-05, "loss": 1.769, "step": 43 }, { "epoch": 0.024397005822012752, "grad_norm": 0.2353614717721939, "learning_rate": 3.52e-05, "loss": 1.7387, "step": 44 }, { "epoch": 0.024951483227058497, "grad_norm": 0.2511078715324402, "learning_rate": 3.6e-05, "loss": 1.7478, "step": 45 }, { "epoch": 0.02550596063210424, "grad_norm": 0.2509211003780365, "learning_rate": 3.680000000000001e-05, "loss": 1.7231, "step": 46 }, { "epoch": 0.026060438037149986, "grad_norm": 0.2628484070301056, "learning_rate": 3.76e-05, "loss": 1.8243, "step": 47 }, { "epoch": 0.02661491544219573, "grad_norm": 0.23542599380016327, "learning_rate": 3.8400000000000005e-05, "loss": 1.6862, "step": 48 }, { "epoch": 0.027169392847241475, "grad_norm": 0.2473030835390091, "learning_rate": 3.9200000000000004e-05, "loss": 1.6858, "step": 49 }, { "epoch": 0.02772387025228722, "grad_norm": 0.24878697097301483, "learning_rate": 4e-05, "loss": 1.7847, "step": 50 }, { "epoch": 0.028278347657332965, "grad_norm": 0.23445457220077515, "learning_rate": 4.08e-05, "loss": 1.7048, "step": 51 }, { "epoch": 0.02883282506237871, "grad_norm": 0.23827125132083893, "learning_rate": 4.16e-05, "loss": 1.6579, "step": 52 }, { "epoch": 0.029387302467424454, "grad_norm": 0.2579360902309418, "learning_rate": 4.240000000000001e-05, "loss": 1.7823, "step": 53 }, { "epoch": 0.0299417798724702, "grad_norm": 0.2626456320285797, "learning_rate": 4.3200000000000007e-05, "loss": 1.7795, "step": 54 }, { "epoch": 0.03049625727751594, "grad_norm": 0.24642065167427063, "learning_rate": 4.4000000000000006e-05, "loss": 1.7575, "step": 55 }, { "epoch": 0.031050734682561684, "grad_norm": 0.24688799679279327, "learning_rate": 4.4800000000000005e-05, "loss": 1.7619, "step": 56 }, { "epoch": 0.03160521208760743, "grad_norm": 0.24448993802070618, "learning_rate": 4.56e-05, "loss": 1.7114, "step": 57 }, { "epoch": 0.03215968949265317, "grad_norm": 0.24135121703147888, "learning_rate": 4.64e-05, "loss": 1.7762, "step": 58 }, { "epoch": 0.03271416689769892, "grad_norm": 0.24882449209690094, "learning_rate": 4.72e-05, "loss": 1.6934, "step": 59 }, { "epoch": 0.03326864430274466, "grad_norm": 0.24142266809940338, "learning_rate": 4.8e-05, "loss": 1.6882, "step": 60 }, { "epoch": 0.03382312170779041, "grad_norm": 0.2599814236164093, "learning_rate": 4.88e-05, "loss": 1.755, "step": 61 }, { "epoch": 0.03437759911283615, "grad_norm": 0.2435712218284607, "learning_rate": 4.9600000000000006e-05, "loss": 1.7064, "step": 62 }, { "epoch": 0.034932076517881896, "grad_norm": 0.25077876448631287, "learning_rate": 5.0400000000000005e-05, "loss": 1.6786, "step": 63 }, { "epoch": 0.03548655392292764, "grad_norm": 0.25001490116119385, "learning_rate": 5.1200000000000004e-05, "loss": 1.6983, "step": 64 }, { "epoch": 0.036041031327973386, "grad_norm": 0.24909815192222595, "learning_rate": 5.2000000000000004e-05, "loss": 1.6403, "step": 65 }, { "epoch": 0.03659550873301913, "grad_norm": 0.2543540298938751, "learning_rate": 5.280000000000001e-05, "loss": 1.6812, "step": 66 }, { "epoch": 0.037149986138064875, "grad_norm": 0.26552706956863403, "learning_rate": 5.360000000000001e-05, "loss": 1.7326, "step": 67 }, { "epoch": 0.03770446354311062, "grad_norm": 0.24254681169986725, "learning_rate": 5.440000000000001e-05, "loss": 1.7514, "step": 68 }, { "epoch": 0.038258940948156364, "grad_norm": 0.2548975944519043, "learning_rate": 5.52e-05, "loss": 1.6883, "step": 69 }, { "epoch": 0.03881341835320211, "grad_norm": 0.25074952840805054, "learning_rate": 5.6e-05, "loss": 1.7236, "step": 70 }, { "epoch": 0.03936789575824785, "grad_norm": 0.26199671626091003, "learning_rate": 5.6800000000000005e-05, "loss": 1.6943, "step": 71 }, { "epoch": 0.0399223731632936, "grad_norm": 0.27566343545913696, "learning_rate": 5.7600000000000004e-05, "loss": 1.6859, "step": 72 }, { "epoch": 0.04047685056833934, "grad_norm": 0.26068150997161865, "learning_rate": 5.84e-05, "loss": 1.7378, "step": 73 }, { "epoch": 0.04103132797338509, "grad_norm": 0.2568175196647644, "learning_rate": 5.92e-05, "loss": 1.6894, "step": 74 }, { "epoch": 0.04158580537843083, "grad_norm": 0.2596546709537506, "learning_rate": 6.000000000000001e-05, "loss": 1.7519, "step": 75 }, { "epoch": 0.042140282783476576, "grad_norm": 0.2714129686355591, "learning_rate": 6.080000000000001e-05, "loss": 1.7159, "step": 76 }, { "epoch": 0.04269476018852232, "grad_norm": 0.2649083733558655, "learning_rate": 6.16e-05, "loss": 1.7031, "step": 77 }, { "epoch": 0.04324923759356806, "grad_norm": 0.30052971839904785, "learning_rate": 6.240000000000001e-05, "loss": 1.7358, "step": 78 }, { "epoch": 0.0438037149986138, "grad_norm": 0.30009427666664124, "learning_rate": 6.32e-05, "loss": 1.6739, "step": 79 }, { "epoch": 0.04435819240365955, "grad_norm": 0.27122125029563904, "learning_rate": 6.400000000000001e-05, "loss": 1.721, "step": 80 }, { "epoch": 0.04491266980870529, "grad_norm": 0.30771657824516296, "learning_rate": 6.48e-05, "loss": 1.8069, "step": 81 }, { "epoch": 0.04546714721375104, "grad_norm": 0.24620802700519562, "learning_rate": 6.56e-05, "loss": 1.6847, "step": 82 }, { "epoch": 0.04602162461879678, "grad_norm": 0.2720058262348175, "learning_rate": 6.64e-05, "loss": 1.7179, "step": 83 }, { "epoch": 0.046576102023842526, "grad_norm": 0.24807985126972198, "learning_rate": 6.720000000000001e-05, "loss": 1.6593, "step": 84 }, { "epoch": 0.04713057942888827, "grad_norm": 0.2534205913543701, "learning_rate": 6.8e-05, "loss": 1.7475, "step": 85 }, { "epoch": 0.047685056833934016, "grad_norm": 0.28452959656715393, "learning_rate": 6.88e-05, "loss": 1.6402, "step": 86 }, { "epoch": 0.04823953423897976, "grad_norm": 0.2749129831790924, "learning_rate": 6.960000000000001e-05, "loss": 1.7025, "step": 87 }, { "epoch": 0.048794011644025505, "grad_norm": 0.26466885209083557, "learning_rate": 7.04e-05, "loss": 1.7151, "step": 88 }, { "epoch": 0.04934848904907125, "grad_norm": 0.32394999265670776, "learning_rate": 7.120000000000001e-05, "loss": 1.6569, "step": 89 }, { "epoch": 0.049902966454116994, "grad_norm": 0.23801451921463013, "learning_rate": 7.2e-05, "loss": 1.7009, "step": 90 }, { "epoch": 0.05045744385916274, "grad_norm": 0.33740007877349854, "learning_rate": 7.280000000000001e-05, "loss": 1.6932, "step": 91 }, { "epoch": 0.05101192126420848, "grad_norm": 0.2623404562473297, "learning_rate": 7.360000000000001e-05, "loss": 1.7311, "step": 92 }, { "epoch": 0.05156639866925423, "grad_norm": 0.292054146528244, "learning_rate": 7.44e-05, "loss": 1.6297, "step": 93 }, { "epoch": 0.05212087607429997, "grad_norm": 0.3140726387500763, "learning_rate": 7.52e-05, "loss": 1.7442, "step": 94 }, { "epoch": 0.05267535347934572, "grad_norm": 0.2532927989959717, "learning_rate": 7.6e-05, "loss": 1.736, "step": 95 }, { "epoch": 0.05322983088439146, "grad_norm": 0.31100720167160034, "learning_rate": 7.680000000000001e-05, "loss": 1.6931, "step": 96 }, { "epoch": 0.053784308289437206, "grad_norm": 0.275055855512619, "learning_rate": 7.76e-05, "loss": 1.6661, "step": 97 }, { "epoch": 0.05433878569448295, "grad_norm": 0.3106204569339752, "learning_rate": 7.840000000000001e-05, "loss": 1.7421, "step": 98 }, { "epoch": 0.054893263099528695, "grad_norm": 0.30006468296051025, "learning_rate": 7.92e-05, "loss": 1.686, "step": 99 }, { "epoch": 0.05544774050457444, "grad_norm": 0.2701585292816162, "learning_rate": 8e-05, "loss": 1.6339, "step": 100 }, { "epoch": 0.056002217909620185, "grad_norm": 0.31820419430732727, "learning_rate": 7.999993193868717e-05, "loss": 1.772, "step": 101 }, { "epoch": 0.05655669531466593, "grad_norm": 0.25506505370140076, "learning_rate": 7.999972775498027e-05, "loss": 1.6857, "step": 102 }, { "epoch": 0.057111172719711674, "grad_norm": 0.27725860476493835, "learning_rate": 7.999938744957418e-05, "loss": 1.756, "step": 103 }, { "epoch": 0.05766565012475742, "grad_norm": 0.237404003739357, "learning_rate": 7.999891102362694e-05, "loss": 1.6907, "step": 104 }, { "epoch": 0.05822012752980316, "grad_norm": 0.26032620668411255, "learning_rate": 7.999829847875989e-05, "loss": 1.6277, "step": 105 }, { "epoch": 0.05877460493484891, "grad_norm": 0.23683027923107147, "learning_rate": 7.999754981705756e-05, "loss": 1.6519, "step": 106 }, { "epoch": 0.05932908233989465, "grad_norm": 0.2389203906059265, "learning_rate": 7.999666504106769e-05, "loss": 1.6287, "step": 107 }, { "epoch": 0.0598835597449404, "grad_norm": 0.25654640793800354, "learning_rate": 7.999564415380122e-05, "loss": 1.7226, "step": 108 }, { "epoch": 0.060438037149986135, "grad_norm": 0.25321975350379944, "learning_rate": 7.99944871587323e-05, "loss": 1.6724, "step": 109 }, { "epoch": 0.06099251455503188, "grad_norm": 0.25723859667778015, "learning_rate": 7.999319405979828e-05, "loss": 1.7587, "step": 110 }, { "epoch": 0.061546991960077624, "grad_norm": 0.23742374777793884, "learning_rate": 7.999176486139964e-05, "loss": 1.7245, "step": 111 }, { "epoch": 0.06210146936512337, "grad_norm": 0.23199070990085602, "learning_rate": 7.999019956840004e-05, "loss": 1.7447, "step": 112 }, { "epoch": 0.06265594677016911, "grad_norm": 0.2442471832036972, "learning_rate": 7.998849818612628e-05, "loss": 1.8053, "step": 113 }, { "epoch": 0.06321042417521486, "grad_norm": 0.22759103775024414, "learning_rate": 7.998666072036827e-05, "loss": 1.6593, "step": 114 }, { "epoch": 0.0637649015802606, "grad_norm": 0.23776134848594666, "learning_rate": 7.998468717737903e-05, "loss": 1.7303, "step": 115 }, { "epoch": 0.06431937898530635, "grad_norm": 0.2140997052192688, "learning_rate": 7.998257756387466e-05, "loss": 1.6146, "step": 116 }, { "epoch": 0.06487385639035209, "grad_norm": 0.2312529981136322, "learning_rate": 7.99803318870343e-05, "loss": 1.691, "step": 117 }, { "epoch": 0.06542833379539784, "grad_norm": 0.23457646369934082, "learning_rate": 7.997795015450015e-05, "loss": 1.6844, "step": 118 }, { "epoch": 0.06598281120044358, "grad_norm": 0.25258150696754456, "learning_rate": 7.997543237437738e-05, "loss": 1.7305, "step": 119 }, { "epoch": 0.06653728860548933, "grad_norm": 0.21662160754203796, "learning_rate": 7.99727785552342e-05, "loss": 1.6637, "step": 120 }, { "epoch": 0.06709176601053507, "grad_norm": 0.23025107383728027, "learning_rate": 7.99699887061017e-05, "loss": 1.7312, "step": 121 }, { "epoch": 0.06764624341558081, "grad_norm": 0.2787688374519348, "learning_rate": 7.996706283647393e-05, "loss": 1.7159, "step": 122 }, { "epoch": 0.06820072082062656, "grad_norm": 0.26639994978904724, "learning_rate": 7.996400095630781e-05, "loss": 1.7311, "step": 123 }, { "epoch": 0.0687551982256723, "grad_norm": 0.28086450695991516, "learning_rate": 7.996080307602312e-05, "loss": 1.6401, "step": 124 }, { "epoch": 0.06930967563071805, "grad_norm": 0.24936339259147644, "learning_rate": 7.995746920650248e-05, "loss": 1.6197, "step": 125 }, { "epoch": 0.06986415303576379, "grad_norm": 0.263729065656662, "learning_rate": 7.995399935909122e-05, "loss": 1.6693, "step": 126 }, { "epoch": 0.07041863044080954, "grad_norm": 0.3544696271419525, "learning_rate": 7.99503935455975e-05, "loss": 1.6949, "step": 127 }, { "epoch": 0.07097310784585528, "grad_norm": 0.23669736087322235, "learning_rate": 7.994665177829211e-05, "loss": 1.6262, "step": 128 }, { "epoch": 0.07152758525090103, "grad_norm": 0.3165992498397827, "learning_rate": 7.994277406990857e-05, "loss": 1.6604, "step": 129 }, { "epoch": 0.07208206265594677, "grad_norm": 0.29037636518478394, "learning_rate": 7.993876043364294e-05, "loss": 1.5977, "step": 130 }, { "epoch": 0.07263654006099252, "grad_norm": 0.26414918899536133, "learning_rate": 7.993461088315389e-05, "loss": 1.7664, "step": 131 }, { "epoch": 0.07319101746603826, "grad_norm": 0.3492623567581177, "learning_rate": 7.993032543256263e-05, "loss": 1.7055, "step": 132 }, { "epoch": 0.073745494871084, "grad_norm": 0.2867588698863983, "learning_rate": 7.992590409645282e-05, "loss": 1.8092, "step": 133 }, { "epoch": 0.07429997227612975, "grad_norm": 0.248403400182724, "learning_rate": 7.992134688987056e-05, "loss": 1.6885, "step": 134 }, { "epoch": 0.0748544496811755, "grad_norm": 0.306547075510025, "learning_rate": 7.991665382832433e-05, "loss": 1.7218, "step": 135 }, { "epoch": 0.07540892708622124, "grad_norm": 0.25006142258644104, "learning_rate": 7.99118249277849e-05, "loss": 1.6728, "step": 136 }, { "epoch": 0.07596340449126698, "grad_norm": 0.29479724168777466, "learning_rate": 7.990686020468536e-05, "loss": 1.6751, "step": 137 }, { "epoch": 0.07651788189631273, "grad_norm": 0.23760248720645905, "learning_rate": 7.990175967592098e-05, "loss": 1.625, "step": 138 }, { "epoch": 0.07707235930135847, "grad_norm": 0.30315011739730835, "learning_rate": 7.98965233588492e-05, "loss": 1.6881, "step": 139 }, { "epoch": 0.07762683670640422, "grad_norm": 0.22332574427127838, "learning_rate": 7.989115127128955e-05, "loss": 1.6179, "step": 140 }, { "epoch": 0.07818131411144996, "grad_norm": 0.28878095746040344, "learning_rate": 7.98856434315236e-05, "loss": 1.7134, "step": 141 }, { "epoch": 0.0787357915164957, "grad_norm": 0.231205016374588, "learning_rate": 7.987999985829486e-05, "loss": 1.6805, "step": 142 }, { "epoch": 0.07929026892154145, "grad_norm": 0.29243049025535583, "learning_rate": 7.987422057080881e-05, "loss": 1.7045, "step": 143 }, { "epoch": 0.0798447463265872, "grad_norm": 0.23502500355243683, "learning_rate": 7.986830558873275e-05, "loss": 1.6984, "step": 144 }, { "epoch": 0.08039922373163294, "grad_norm": 0.23500199615955353, "learning_rate": 7.986225493219573e-05, "loss": 1.5982, "step": 145 }, { "epoch": 0.08095370113667869, "grad_norm": 0.2353154569864273, "learning_rate": 7.985606862178855e-05, "loss": 1.7826, "step": 146 }, { "epoch": 0.08150817854172443, "grad_norm": 0.2630767524242401, "learning_rate": 7.984974667856362e-05, "loss": 1.7355, "step": 147 }, { "epoch": 0.08206265594677017, "grad_norm": 0.221221461892128, "learning_rate": 7.984328912403494e-05, "loss": 1.7203, "step": 148 }, { "epoch": 0.08261713335181592, "grad_norm": 0.23368635773658752, "learning_rate": 7.983669598017798e-05, "loss": 1.6725, "step": 149 }, { "epoch": 0.08317161075686166, "grad_norm": 0.22409488260746002, "learning_rate": 7.982996726942963e-05, "loss": 1.685, "step": 150 }, { "epoch": 0.08372608816190741, "grad_norm": 0.2248145341873169, "learning_rate": 7.982310301468815e-05, "loss": 1.6743, "step": 151 }, { "epoch": 0.08428056556695315, "grad_norm": 0.2202092409133911, "learning_rate": 7.981610323931306e-05, "loss": 1.6568, "step": 152 }, { "epoch": 0.0848350429719989, "grad_norm": 0.2243170440196991, "learning_rate": 7.980896796712504e-05, "loss": 1.7107, "step": 153 }, { "epoch": 0.08538952037704464, "grad_norm": 0.23059862852096558, "learning_rate": 7.980169722240589e-05, "loss": 1.6998, "step": 154 }, { "epoch": 0.08594399778209039, "grad_norm": 0.2216973900794983, "learning_rate": 7.979429102989842e-05, "loss": 1.6527, "step": 155 }, { "epoch": 0.08649847518713612, "grad_norm": 0.23553825914859772, "learning_rate": 7.978674941480643e-05, "loss": 1.6883, "step": 156 }, { "epoch": 0.08705295259218186, "grad_norm": 0.2300165742635727, "learning_rate": 7.977907240279449e-05, "loss": 1.6638, "step": 157 }, { "epoch": 0.0876074299972276, "grad_norm": 0.23822426795959473, "learning_rate": 7.977126001998798e-05, "loss": 1.6435, "step": 158 }, { "epoch": 0.08816190740227335, "grad_norm": 0.2425059974193573, "learning_rate": 7.976331229297298e-05, "loss": 1.6745, "step": 159 }, { "epoch": 0.0887163848073191, "grad_norm": 0.26218435168266296, "learning_rate": 7.975522924879609e-05, "loss": 1.6546, "step": 160 }, { "epoch": 0.08927086221236484, "grad_norm": 0.23087595403194427, "learning_rate": 7.974701091496448e-05, "loss": 1.6581, "step": 161 }, { "epoch": 0.08982533961741059, "grad_norm": 0.24700067937374115, "learning_rate": 7.973865731944565e-05, "loss": 1.7501, "step": 162 }, { "epoch": 0.09037981702245633, "grad_norm": 0.22620323300361633, "learning_rate": 7.973016849066742e-05, "loss": 1.698, "step": 163 }, { "epoch": 0.09093429442750207, "grad_norm": 0.21732601523399353, "learning_rate": 7.972154445751788e-05, "loss": 1.622, "step": 164 }, { "epoch": 0.09148877183254782, "grad_norm": 0.22213229537010193, "learning_rate": 7.971278524934515e-05, "loss": 1.7224, "step": 165 }, { "epoch": 0.09204324923759356, "grad_norm": 0.27407029271125793, "learning_rate": 7.970389089595738e-05, "loss": 1.679, "step": 166 }, { "epoch": 0.09259772664263931, "grad_norm": 0.24043461680412292, "learning_rate": 7.969486142762266e-05, "loss": 1.6364, "step": 167 }, { "epoch": 0.09315220404768505, "grad_norm": 0.22668756544589996, "learning_rate": 7.968569687506886e-05, "loss": 1.6504, "step": 168 }, { "epoch": 0.0937066814527308, "grad_norm": 0.2548286020755768, "learning_rate": 7.967639726948355e-05, "loss": 1.6703, "step": 169 }, { "epoch": 0.09426115885777654, "grad_norm": 0.2330275923013687, "learning_rate": 7.96669626425139e-05, "loss": 1.691, "step": 170 }, { "epoch": 0.09481563626282229, "grad_norm": 0.2295777052640915, "learning_rate": 7.965739302626656e-05, "loss": 1.66, "step": 171 }, { "epoch": 0.09537011366786803, "grad_norm": 0.23224836587905884, "learning_rate": 7.964768845330756e-05, "loss": 1.6317, "step": 172 }, { "epoch": 0.09592459107291378, "grad_norm": 0.21784305572509766, "learning_rate": 7.963784895666221e-05, "loss": 1.674, "step": 173 }, { "epoch": 0.09647906847795952, "grad_norm": 0.25221794843673706, "learning_rate": 7.962787456981498e-05, "loss": 1.6848, "step": 174 }, { "epoch": 0.09703354588300526, "grad_norm": 0.2345496118068695, "learning_rate": 7.961776532670931e-05, "loss": 1.6349, "step": 175 }, { "epoch": 0.09758802328805101, "grad_norm": 0.23866072297096252, "learning_rate": 7.960752126174765e-05, "loss": 1.6078, "step": 176 }, { "epoch": 0.09814250069309675, "grad_norm": 0.2709258794784546, "learning_rate": 7.959714240979124e-05, "loss": 1.6811, "step": 177 }, { "epoch": 0.0986969780981425, "grad_norm": 0.22360965609550476, "learning_rate": 7.958662880615997e-05, "loss": 1.633, "step": 178 }, { "epoch": 0.09925145550318824, "grad_norm": 0.25358742475509644, "learning_rate": 7.957598048663234e-05, "loss": 1.6449, "step": 179 }, { "epoch": 0.09980593290823399, "grad_norm": 0.2268538922071457, "learning_rate": 7.956519748744525e-05, "loss": 1.6675, "step": 180 }, { "epoch": 0.10036041031327973, "grad_norm": 0.26430633664131165, "learning_rate": 7.9554279845294e-05, "loss": 1.7128, "step": 181 }, { "epoch": 0.10091488771832548, "grad_norm": 0.22235046327114105, "learning_rate": 7.9543227597332e-05, "loss": 1.6033, "step": 182 }, { "epoch": 0.10146936512337122, "grad_norm": 0.26423388719558716, "learning_rate": 7.953204078117081e-05, "loss": 1.6274, "step": 183 }, { "epoch": 0.10202384252841697, "grad_norm": 0.22570672631263733, "learning_rate": 7.952071943487987e-05, "loss": 1.6938, "step": 184 }, { "epoch": 0.10257831993346271, "grad_norm": 0.25327280163764954, "learning_rate": 7.95092635969865e-05, "loss": 1.7074, "step": 185 }, { "epoch": 0.10313279733850846, "grad_norm": 0.21103128790855408, "learning_rate": 7.949767330647562e-05, "loss": 1.6202, "step": 186 }, { "epoch": 0.1036872747435542, "grad_norm": 0.24825690686702728, "learning_rate": 7.94859486027898e-05, "loss": 1.6757, "step": 187 }, { "epoch": 0.10424175214859994, "grad_norm": 0.2204912304878235, "learning_rate": 7.947408952582892e-05, "loss": 1.6779, "step": 188 }, { "epoch": 0.10479622955364569, "grad_norm": 0.26558375358581543, "learning_rate": 7.946209611595026e-05, "loss": 1.6829, "step": 189 }, { "epoch": 0.10535070695869143, "grad_norm": 0.2372429221868515, "learning_rate": 7.944996841396815e-05, "loss": 1.6825, "step": 190 }, { "epoch": 0.10590518436373718, "grad_norm": 0.21156929433345795, "learning_rate": 7.943770646115396e-05, "loss": 1.6063, "step": 191 }, { "epoch": 0.10645966176878292, "grad_norm": 0.21675823628902435, "learning_rate": 7.94253102992359e-05, "loss": 1.612, "step": 192 }, { "epoch": 0.10701413917382867, "grad_norm": 0.2156941145658493, "learning_rate": 7.941277997039894e-05, "loss": 1.6382, "step": 193 }, { "epoch": 0.10756861657887441, "grad_norm": 0.20940068364143372, "learning_rate": 7.940011551728463e-05, "loss": 1.6607, "step": 194 }, { "epoch": 0.10812309398392016, "grad_norm": 0.21804174780845642, "learning_rate": 7.93873169829909e-05, "loss": 1.6386, "step": 195 }, { "epoch": 0.1086775713889659, "grad_norm": 0.21801279485225677, "learning_rate": 7.937438441107203e-05, "loss": 1.6612, "step": 196 }, { "epoch": 0.10923204879401165, "grad_norm": 0.254651814699173, "learning_rate": 7.93613178455384e-05, "loss": 1.6548, "step": 197 }, { "epoch": 0.10978652619905739, "grad_norm": 0.20725683867931366, "learning_rate": 7.934811733085637e-05, "loss": 1.6508, "step": 198 }, { "epoch": 0.11034100360410314, "grad_norm": 0.25397831201553345, "learning_rate": 7.933478291194821e-05, "loss": 1.6434, "step": 199 }, { "epoch": 0.11089548100914888, "grad_norm": 0.21976791322231293, "learning_rate": 7.932131463419177e-05, "loss": 1.6073, "step": 200 }, { "epoch": 0.11144995841419462, "grad_norm": 0.23310157656669617, "learning_rate": 7.930771254342051e-05, "loss": 1.6755, "step": 201 }, { "epoch": 0.11200443581924037, "grad_norm": 0.24408988654613495, "learning_rate": 7.929397668592323e-05, "loss": 1.6681, "step": 202 }, { "epoch": 0.11255891322428611, "grad_norm": 0.22306199371814728, "learning_rate": 7.928010710844397e-05, "loss": 1.6328, "step": 203 }, { "epoch": 0.11311339062933186, "grad_norm": 0.2562360465526581, "learning_rate": 7.926610385818179e-05, "loss": 1.639, "step": 204 }, { "epoch": 0.1136678680343776, "grad_norm": 0.22006537020206451, "learning_rate": 7.925196698279068e-05, "loss": 1.6636, "step": 205 }, { "epoch": 0.11422234543942335, "grad_norm": 0.23495469987392426, "learning_rate": 7.923769653037935e-05, "loss": 1.6408, "step": 206 }, { "epoch": 0.11477682284446909, "grad_norm": 0.215364009141922, "learning_rate": 7.92232925495111e-05, "loss": 1.6994, "step": 207 }, { "epoch": 0.11533130024951484, "grad_norm": 0.2487768530845642, "learning_rate": 7.920875508920361e-05, "loss": 1.6307, "step": 208 }, { "epoch": 0.11588577765456058, "grad_norm": 0.2132566124200821, "learning_rate": 7.919408419892881e-05, "loss": 1.6538, "step": 209 }, { "epoch": 0.11644025505960633, "grad_norm": 0.22761502861976624, "learning_rate": 7.917927992861272e-05, "loss": 1.6135, "step": 210 }, { "epoch": 0.11699473246465207, "grad_norm": 0.22848926484584808, "learning_rate": 7.916434232863522e-05, "loss": 1.7293, "step": 211 }, { "epoch": 0.11754920986969782, "grad_norm": 0.21311335265636444, "learning_rate": 7.914927144982995e-05, "loss": 1.6792, "step": 212 }, { "epoch": 0.11810368727474356, "grad_norm": 0.23164795339107513, "learning_rate": 7.913406734348412e-05, "loss": 1.5472, "step": 213 }, { "epoch": 0.1186581646797893, "grad_norm": 0.21141847968101501, "learning_rate": 7.911873006133827e-05, "loss": 1.6259, "step": 214 }, { "epoch": 0.11921264208483505, "grad_norm": 0.2537563443183899, "learning_rate": 7.910325965558621e-05, "loss": 1.7058, "step": 215 }, { "epoch": 0.1197671194898808, "grad_norm": 0.2353929728269577, "learning_rate": 7.908765617887473e-05, "loss": 1.6479, "step": 216 }, { "epoch": 0.12032159689492654, "grad_norm": 0.20736263692378998, "learning_rate": 7.907191968430347e-05, "loss": 1.603, "step": 217 }, { "epoch": 0.12087607429997227, "grad_norm": 0.23365424573421478, "learning_rate": 7.905605022542478e-05, "loss": 1.6792, "step": 218 }, { "epoch": 0.12143055170501801, "grad_norm": 0.22553478181362152, "learning_rate": 7.904004785624345e-05, "loss": 1.7089, "step": 219 }, { "epoch": 0.12198502911006376, "grad_norm": 0.2107611447572708, "learning_rate": 7.902391263121662e-05, "loss": 1.6541, "step": 220 }, { "epoch": 0.1225395065151095, "grad_norm": 0.21160966157913208, "learning_rate": 7.900764460525349e-05, "loss": 1.6459, "step": 221 }, { "epoch": 0.12309398392015525, "grad_norm": 0.21517297625541687, "learning_rate": 7.899124383371524e-05, "loss": 1.6354, "step": 222 }, { "epoch": 0.12364846132520099, "grad_norm": 0.21948044002056122, "learning_rate": 7.897471037241476e-05, "loss": 1.7011, "step": 223 }, { "epoch": 0.12420293873024674, "grad_norm": 0.21453773975372314, "learning_rate": 7.895804427761651e-05, "loss": 1.6508, "step": 224 }, { "epoch": 0.12475741613529248, "grad_norm": 0.23449410498142242, "learning_rate": 7.894124560603631e-05, "loss": 1.6891, "step": 225 }, { "epoch": 0.12531189354033823, "grad_norm": 0.22934886813163757, "learning_rate": 7.892431441484113e-05, "loss": 1.6989, "step": 226 }, { "epoch": 0.12586637094538397, "grad_norm": 0.24871671199798584, "learning_rate": 7.890725076164894e-05, "loss": 1.6596, "step": 227 }, { "epoch": 0.12642084835042972, "grad_norm": 0.21111659705638885, "learning_rate": 7.889005470452845e-05, "loss": 1.6158, "step": 228 }, { "epoch": 0.12697532575547546, "grad_norm": 0.24391771852970123, "learning_rate": 7.8872726301999e-05, "loss": 1.6943, "step": 229 }, { "epoch": 0.1275298031605212, "grad_norm": 0.2066636085510254, "learning_rate": 7.885526561303024e-05, "loss": 1.6627, "step": 230 }, { "epoch": 0.12808428056556695, "grad_norm": 0.2221907377243042, "learning_rate": 7.883767269704209e-05, "loss": 1.6358, "step": 231 }, { "epoch": 0.1286387579706127, "grad_norm": 0.21195903420448303, "learning_rate": 7.881994761390437e-05, "loss": 1.6259, "step": 232 }, { "epoch": 0.12919323537565844, "grad_norm": 0.22241343557834625, "learning_rate": 7.88020904239367e-05, "loss": 1.7442, "step": 233 }, { "epoch": 0.12974771278070418, "grad_norm": 0.20912249386310577, "learning_rate": 7.878410118790827e-05, "loss": 1.6546, "step": 234 }, { "epoch": 0.13030219018574993, "grad_norm": 0.21954858303070068, "learning_rate": 7.876597996703763e-05, "loss": 1.6339, "step": 235 }, { "epoch": 0.13085666759079567, "grad_norm": 0.23016227781772614, "learning_rate": 7.874772682299251e-05, "loss": 1.6333, "step": 236 }, { "epoch": 0.13141114499584142, "grad_norm": 0.22401848435401917, "learning_rate": 7.872934181788953e-05, "loss": 1.7135, "step": 237 }, { "epoch": 0.13196562240088716, "grad_norm": 0.23134422302246094, "learning_rate": 7.871082501429409e-05, "loss": 1.704, "step": 238 }, { "epoch": 0.1325200998059329, "grad_norm": 0.24263045191764832, "learning_rate": 7.869217647522006e-05, "loss": 1.7007, "step": 239 }, { "epoch": 0.13307457721097865, "grad_norm": 0.21756958961486816, "learning_rate": 7.867339626412965e-05, "loss": 1.6961, "step": 240 }, { "epoch": 0.1336290546160244, "grad_norm": 0.23123596608638763, "learning_rate": 7.865448444493317e-05, "loss": 1.6698, "step": 241 }, { "epoch": 0.13418353202107014, "grad_norm": 0.21551208198070526, "learning_rate": 7.863544108198877e-05, "loss": 1.6982, "step": 242 }, { "epoch": 0.13473800942611588, "grad_norm": 0.22842957079410553, "learning_rate": 7.861626624010226e-05, "loss": 1.5909, "step": 243 }, { "epoch": 0.13529248683116163, "grad_norm": 0.22002427279949188, "learning_rate": 7.85969599845269e-05, "loss": 1.649, "step": 244 }, { "epoch": 0.13584696423620737, "grad_norm": 0.23170796036720276, "learning_rate": 7.857752238096313e-05, "loss": 1.685, "step": 245 }, { "epoch": 0.13640144164125312, "grad_norm": 0.21300175786018372, "learning_rate": 7.855795349555839e-05, "loss": 1.6501, "step": 246 }, { "epoch": 0.13695591904629886, "grad_norm": 0.2336280345916748, "learning_rate": 7.853825339490689e-05, "loss": 1.6482, "step": 247 }, { "epoch": 0.1375103964513446, "grad_norm": 0.23497241735458374, "learning_rate": 7.851842214604937e-05, "loss": 1.5985, "step": 248 }, { "epoch": 0.13806487385639035, "grad_norm": 0.21944530308246613, "learning_rate": 7.849845981647285e-05, "loss": 1.6658, "step": 249 }, { "epoch": 0.1386193512614361, "grad_norm": 0.21688306331634521, "learning_rate": 7.847836647411049e-05, "loss": 1.6014, "step": 250 }, { "epoch": 0.13917382866648184, "grad_norm": 0.21929487586021423, "learning_rate": 7.84581421873412e-05, "loss": 1.6299, "step": 251 }, { "epoch": 0.13972830607152759, "grad_norm": 0.21958981454372406, "learning_rate": 7.843778702498961e-05, "loss": 1.6961, "step": 252 }, { "epoch": 0.14028278347657333, "grad_norm": 0.21822874248027802, "learning_rate": 7.841730105632563e-05, "loss": 1.6779, "step": 253 }, { "epoch": 0.14083726088161908, "grad_norm": 0.21482256054878235, "learning_rate": 7.839668435106437e-05, "loss": 1.6304, "step": 254 }, { "epoch": 0.14139173828666482, "grad_norm": 0.22191710770130157, "learning_rate": 7.837593697936582e-05, "loss": 1.6461, "step": 255 }, { "epoch": 0.14194621569171056, "grad_norm": 0.227654829621315, "learning_rate": 7.835505901183468e-05, "loss": 1.7217, "step": 256 }, { "epoch": 0.1425006930967563, "grad_norm": 0.2576013505458832, "learning_rate": 7.833405051952002e-05, "loss": 1.7567, "step": 257 }, { "epoch": 0.14305517050180205, "grad_norm": 0.20585085451602936, "learning_rate": 7.831291157391513e-05, "loss": 1.6422, "step": 258 }, { "epoch": 0.1436096479068478, "grad_norm": 0.22537145018577576, "learning_rate": 7.82916422469572e-05, "loss": 1.6604, "step": 259 }, { "epoch": 0.14416412531189354, "grad_norm": 0.23208315670490265, "learning_rate": 7.827024261102718e-05, "loss": 1.615, "step": 260 }, { "epoch": 0.1447186027169393, "grad_norm": 0.2133701741695404, "learning_rate": 7.824871273894943e-05, "loss": 1.6413, "step": 261 }, { "epoch": 0.14527308012198503, "grad_norm": 0.2303103655576706, "learning_rate": 7.82270527039915e-05, "loss": 1.661, "step": 262 }, { "epoch": 0.14582755752703078, "grad_norm": 0.22526273131370544, "learning_rate": 7.820526257986393e-05, "loss": 1.6806, "step": 263 }, { "epoch": 0.14638203493207652, "grad_norm": 0.22275319695472717, "learning_rate": 7.818334244071994e-05, "loss": 1.7037, "step": 264 }, { "epoch": 0.14693651233712227, "grad_norm": 0.22790545225143433, "learning_rate": 7.81612923611552e-05, "loss": 1.654, "step": 265 }, { "epoch": 0.147490989742168, "grad_norm": 0.22253917157649994, "learning_rate": 7.813911241620755e-05, "loss": 1.6492, "step": 266 }, { "epoch": 0.14804546714721376, "grad_norm": 0.22290851175785065, "learning_rate": 7.811680268135684e-05, "loss": 1.6355, "step": 267 }, { "epoch": 0.1485999445522595, "grad_norm": 0.22694170475006104, "learning_rate": 7.809436323252456e-05, "loss": 1.622, "step": 268 }, { "epoch": 0.14915442195730524, "grad_norm": 0.21007151901721954, "learning_rate": 7.80717941460736e-05, "loss": 1.6312, "step": 269 }, { "epoch": 0.149708899362351, "grad_norm": 0.2363370954990387, "learning_rate": 7.804909549880806e-05, "loss": 1.6222, "step": 270 }, { "epoch": 0.15026337676739673, "grad_norm": 0.26099053025245667, "learning_rate": 7.802626736797292e-05, "loss": 1.6424, "step": 271 }, { "epoch": 0.15081785417244248, "grad_norm": 0.21376170217990875, "learning_rate": 7.800330983125381e-05, "loss": 1.6055, "step": 272 }, { "epoch": 0.15137233157748822, "grad_norm": 0.26935890316963196, "learning_rate": 7.798022296677675e-05, "loss": 1.6858, "step": 273 }, { "epoch": 0.15192680898253397, "grad_norm": 0.22003325819969177, "learning_rate": 7.795700685310783e-05, "loss": 1.6803, "step": 274 }, { "epoch": 0.1524812863875797, "grad_norm": 0.25127747654914856, "learning_rate": 7.793366156925302e-05, "loss": 1.5456, "step": 275 }, { "epoch": 0.15303576379262546, "grad_norm": 0.21749310195446014, "learning_rate": 7.791018719465785e-05, "loss": 1.6904, "step": 276 }, { "epoch": 0.1535902411976712, "grad_norm": 0.2340991199016571, "learning_rate": 7.788658380920716e-05, "loss": 1.6946, "step": 277 }, { "epoch": 0.15414471860271695, "grad_norm": 0.21612657606601715, "learning_rate": 7.786285149322483e-05, "loss": 1.5836, "step": 278 }, { "epoch": 0.1546991960077627, "grad_norm": 0.2601798474788666, "learning_rate": 7.783899032747346e-05, "loss": 1.679, "step": 279 }, { "epoch": 0.15525367341280844, "grad_norm": 0.24051488935947418, "learning_rate": 7.78150003931542e-05, "loss": 1.6504, "step": 280 }, { "epoch": 0.15580815081785418, "grad_norm": 0.22417373955249786, "learning_rate": 7.779088177190636e-05, "loss": 1.6767, "step": 281 }, { "epoch": 0.15636262822289992, "grad_norm": 0.23958833515644073, "learning_rate": 7.776663454580718e-05, "loss": 1.6636, "step": 282 }, { "epoch": 0.15691710562794567, "grad_norm": 0.22947099804878235, "learning_rate": 7.774225879737156e-05, "loss": 1.6653, "step": 283 }, { "epoch": 0.1574715830329914, "grad_norm": 0.23993003368377686, "learning_rate": 7.771775460955178e-05, "loss": 1.6637, "step": 284 }, { "epoch": 0.15802606043803716, "grad_norm": 0.2175922393798828, "learning_rate": 7.76931220657372e-05, "loss": 1.6086, "step": 285 }, { "epoch": 0.1585805378430829, "grad_norm": 0.22795039415359497, "learning_rate": 7.766836124975399e-05, "loss": 1.6274, "step": 286 }, { "epoch": 0.15913501524812865, "grad_norm": 0.2554066479206085, "learning_rate": 7.764347224586482e-05, "loss": 1.6059, "step": 287 }, { "epoch": 0.1596894926531744, "grad_norm": 0.2622605860233307, "learning_rate": 7.761845513876861e-05, "loss": 1.6815, "step": 288 }, { "epoch": 0.16024397005822014, "grad_norm": 0.25482630729675293, "learning_rate": 7.759331001360021e-05, "loss": 1.7245, "step": 289 }, { "epoch": 0.16079844746326588, "grad_norm": 0.23500695824623108, "learning_rate": 7.756803695593015e-05, "loss": 1.6039, "step": 290 }, { "epoch": 0.16135292486831163, "grad_norm": 0.24643713235855103, "learning_rate": 7.754263605176429e-05, "loss": 1.6804, "step": 291 }, { "epoch": 0.16190740227335737, "grad_norm": 0.301843523979187, "learning_rate": 7.751710738754357e-05, "loss": 1.6837, "step": 292 }, { "epoch": 0.16246187967840311, "grad_norm": 0.22402901947498322, "learning_rate": 7.749145105014372e-05, "loss": 1.7017, "step": 293 }, { "epoch": 0.16301635708344886, "grad_norm": 0.30275362730026245, "learning_rate": 7.746566712687493e-05, "loss": 1.7249, "step": 294 }, { "epoch": 0.1635708344884946, "grad_norm": 0.20885106921195984, "learning_rate": 7.74397557054816e-05, "loss": 1.6964, "step": 295 }, { "epoch": 0.16412531189354035, "grad_norm": 0.28100892901420593, "learning_rate": 7.741371687414198e-05, "loss": 1.6428, "step": 296 }, { "epoch": 0.1646797892985861, "grad_norm": 0.24655266106128693, "learning_rate": 7.738755072146794e-05, "loss": 1.638, "step": 297 }, { "epoch": 0.16523426670363184, "grad_norm": 0.2542823255062103, "learning_rate": 7.736125733650461e-05, "loss": 1.6958, "step": 298 }, { "epoch": 0.16578874410867758, "grad_norm": 0.24665483832359314, "learning_rate": 7.733483680873009e-05, "loss": 1.6095, "step": 299 }, { "epoch": 0.16634322151372333, "grad_norm": 0.25384095311164856, "learning_rate": 7.730828922805517e-05, "loss": 1.6943, "step": 300 }, { "epoch": 0.16689769891876907, "grad_norm": 0.26568636298179626, "learning_rate": 7.728161468482304e-05, "loss": 1.693, "step": 301 }, { "epoch": 0.16745217632381482, "grad_norm": 0.2455059140920639, "learning_rate": 7.72548132698089e-05, "loss": 1.6415, "step": 302 }, { "epoch": 0.16800665372886056, "grad_norm": 0.24293363094329834, "learning_rate": 7.722788507421971e-05, "loss": 1.6402, "step": 303 }, { "epoch": 0.1685611311339063, "grad_norm": 0.22879062592983246, "learning_rate": 7.720083018969393e-05, "loss": 1.6153, "step": 304 }, { "epoch": 0.16911560853895205, "grad_norm": 0.21951867640018463, "learning_rate": 7.717364870830107e-05, "loss": 1.6482, "step": 305 }, { "epoch": 0.1696700859439978, "grad_norm": 0.2079135924577713, "learning_rate": 7.71463407225415e-05, "loss": 1.5899, "step": 306 }, { "epoch": 0.17022456334904354, "grad_norm": 0.20707400143146515, "learning_rate": 7.71189063253461e-05, "loss": 1.6667, "step": 307 }, { "epoch": 0.17077904075408928, "grad_norm": 0.22458939254283905, "learning_rate": 7.709134561007592e-05, "loss": 1.5537, "step": 308 }, { "epoch": 0.17133351815913503, "grad_norm": 0.21969960629940033, "learning_rate": 7.706365867052188e-05, "loss": 1.6693, "step": 309 }, { "epoch": 0.17188799556418077, "grad_norm": 0.22047635912895203, "learning_rate": 7.703584560090447e-05, "loss": 1.5548, "step": 310 }, { "epoch": 0.1724424729692265, "grad_norm": 0.22892658412456512, "learning_rate": 7.700790649587336e-05, "loss": 1.6342, "step": 311 }, { "epoch": 0.17299695037427223, "grad_norm": 0.23613335192203522, "learning_rate": 7.697984145050718e-05, "loss": 1.6324, "step": 312 }, { "epoch": 0.17355142777931798, "grad_norm": 0.2484220266342163, "learning_rate": 7.695165056031313e-05, "loss": 1.6467, "step": 313 }, { "epoch": 0.17410590518436372, "grad_norm": 0.2812964618206024, "learning_rate": 7.692333392122663e-05, "loss": 1.6744, "step": 314 }, { "epoch": 0.17466038258940947, "grad_norm": 0.2211124300956726, "learning_rate": 7.689489162961109e-05, "loss": 1.6213, "step": 315 }, { "epoch": 0.1752148599944552, "grad_norm": 0.22334423661231995, "learning_rate": 7.686632378225748e-05, "loss": 1.6909, "step": 316 }, { "epoch": 0.17576933739950096, "grad_norm": 0.2386290282011032, "learning_rate": 7.683763047638407e-05, "loss": 1.6193, "step": 317 }, { "epoch": 0.1763238148045467, "grad_norm": 0.20850642025470734, "learning_rate": 7.680881180963605e-05, "loss": 1.6541, "step": 318 }, { "epoch": 0.17687829220959245, "grad_norm": 0.22296078503131866, "learning_rate": 7.677986788008524e-05, "loss": 1.6285, "step": 319 }, { "epoch": 0.1774327696146382, "grad_norm": 0.20673969388008118, "learning_rate": 7.675079878622974e-05, "loss": 1.6835, "step": 320 }, { "epoch": 0.17798724701968394, "grad_norm": 0.21489155292510986, "learning_rate": 7.672160462699359e-05, "loss": 1.6354, "step": 321 }, { "epoch": 0.17854172442472968, "grad_norm": 0.21602220833301544, "learning_rate": 7.669228550172639e-05, "loss": 1.5907, "step": 322 }, { "epoch": 0.17909620182977543, "grad_norm": 0.2056579738855362, "learning_rate": 7.666284151020309e-05, "loss": 1.6304, "step": 323 }, { "epoch": 0.17965067923482117, "grad_norm": 0.22408543527126312, "learning_rate": 7.663327275262353e-05, "loss": 1.636, "step": 324 }, { "epoch": 0.18020515663986691, "grad_norm": 0.20583994686603546, "learning_rate": 7.66035793296121e-05, "loss": 1.6401, "step": 325 }, { "epoch": 0.18075963404491266, "grad_norm": 0.21680684387683868, "learning_rate": 7.657376134221749e-05, "loss": 1.6827, "step": 326 }, { "epoch": 0.1813141114499584, "grad_norm": 0.218284010887146, "learning_rate": 7.654381889191225e-05, "loss": 1.585, "step": 327 }, { "epoch": 0.18186858885500415, "grad_norm": 0.20787712931632996, "learning_rate": 7.651375208059252e-05, "loss": 1.6359, "step": 328 }, { "epoch": 0.1824230662600499, "grad_norm": 0.216603085398674, "learning_rate": 7.648356101057764e-05, "loss": 1.6884, "step": 329 }, { "epoch": 0.18297754366509564, "grad_norm": 0.2224433422088623, "learning_rate": 7.645324578460978e-05, "loss": 1.6869, "step": 330 }, { "epoch": 0.18353202107014138, "grad_norm": 0.21461333334445953, "learning_rate": 7.642280650585366e-05, "loss": 1.5748, "step": 331 }, { "epoch": 0.18408649847518713, "grad_norm": 0.20232297480106354, "learning_rate": 7.639224327789613e-05, "loss": 1.6181, "step": 332 }, { "epoch": 0.18464097588023287, "grad_norm": 0.2159082591533661, "learning_rate": 7.636155620474589e-05, "loss": 1.6346, "step": 333 }, { "epoch": 0.18519545328527862, "grad_norm": 0.21551144123077393, "learning_rate": 7.633074539083302e-05, "loss": 1.6951, "step": 334 }, { "epoch": 0.18574993069032436, "grad_norm": 0.2233298122882843, "learning_rate": 7.629981094100878e-05, "loss": 1.6491, "step": 335 }, { "epoch": 0.1863044080953701, "grad_norm": 0.21482495963573456, "learning_rate": 7.626875296054512e-05, "loss": 1.6596, "step": 336 }, { "epoch": 0.18685888550041585, "grad_norm": 0.2133273333311081, "learning_rate": 7.623757155513439e-05, "loss": 1.6508, "step": 337 }, { "epoch": 0.1874133629054616, "grad_norm": 0.20683740079402924, "learning_rate": 7.620626683088894e-05, "loss": 1.6145, "step": 338 }, { "epoch": 0.18796784031050734, "grad_norm": 0.20707263052463531, "learning_rate": 7.617483889434083e-05, "loss": 1.6415, "step": 339 }, { "epoch": 0.18852231771555308, "grad_norm": 0.2513361871242523, "learning_rate": 7.614328785244138e-05, "loss": 1.7166, "step": 340 }, { "epoch": 0.18907679512059883, "grad_norm": 0.21265187859535217, "learning_rate": 7.611161381256084e-05, "loss": 1.6454, "step": 341 }, { "epoch": 0.18963127252564457, "grad_norm": 0.28207194805145264, "learning_rate": 7.607981688248807e-05, "loss": 1.6473, "step": 342 }, { "epoch": 0.19018574993069032, "grad_norm": 0.256237268447876, "learning_rate": 7.604789717043011e-05, "loss": 1.7602, "step": 343 }, { "epoch": 0.19074022733573606, "grad_norm": 0.2375311702489853, "learning_rate": 7.601585478501181e-05, "loss": 1.6399, "step": 344 }, { "epoch": 0.1912947047407818, "grad_norm": 0.23850996792316437, "learning_rate": 7.598368983527554e-05, "loss": 1.6538, "step": 345 }, { "epoch": 0.19184918214582755, "grad_norm": 0.2116571068763733, "learning_rate": 7.595140243068072e-05, "loss": 1.6129, "step": 346 }, { "epoch": 0.1924036595508733, "grad_norm": 0.2089197337627411, "learning_rate": 7.591899268110352e-05, "loss": 1.6216, "step": 347 }, { "epoch": 0.19295813695591904, "grad_norm": 0.20319993793964386, "learning_rate": 7.588646069683642e-05, "loss": 1.6086, "step": 348 }, { "epoch": 0.19351261436096479, "grad_norm": 0.2238406538963318, "learning_rate": 7.585380658858793e-05, "loss": 1.6992, "step": 349 }, { "epoch": 0.19406709176601053, "grad_norm": 0.21192164719104767, "learning_rate": 7.58210304674821e-05, "loss": 1.6756, "step": 350 }, { "epoch": 0.19462156917105627, "grad_norm": 0.21115197241306305, "learning_rate": 7.578813244505823e-05, "loss": 1.5859, "step": 351 }, { "epoch": 0.19517604657610202, "grad_norm": 0.20488910377025604, "learning_rate": 7.575511263327044e-05, "loss": 1.6174, "step": 352 }, { "epoch": 0.19573052398114776, "grad_norm": 0.21583472192287445, "learning_rate": 7.572197114448733e-05, "loss": 1.639, "step": 353 }, { "epoch": 0.1962850013861935, "grad_norm": 0.22085356712341309, "learning_rate": 7.568870809149155e-05, "loss": 1.7244, "step": 354 }, { "epoch": 0.19683947879123925, "grad_norm": 0.20893336832523346, "learning_rate": 7.565532358747949e-05, "loss": 1.6678, "step": 355 }, { "epoch": 0.197393956196285, "grad_norm": 0.20535239577293396, "learning_rate": 7.562181774606075e-05, "loss": 1.5594, "step": 356 }, { "epoch": 0.19794843360133074, "grad_norm": 0.23027966916561127, "learning_rate": 7.558819068125796e-05, "loss": 1.7349, "step": 357 }, { "epoch": 0.1985029110063765, "grad_norm": 0.20420025289058685, "learning_rate": 7.555444250750618e-05, "loss": 1.6259, "step": 358 }, { "epoch": 0.19905738841142223, "grad_norm": 0.21578669548034668, "learning_rate": 7.552057333965271e-05, "loss": 1.652, "step": 359 }, { "epoch": 0.19961186581646798, "grad_norm": 0.21920384466648102, "learning_rate": 7.548658329295651e-05, "loss": 1.6564, "step": 360 }, { "epoch": 0.20016634322151372, "grad_norm": 0.23153354227542877, "learning_rate": 7.545247248308798e-05, "loss": 1.6196, "step": 361 }, { "epoch": 0.20072082062655947, "grad_norm": 0.22002752125263214, "learning_rate": 7.541824102612839e-05, "loss": 1.6524, "step": 362 }, { "epoch": 0.2012752980316052, "grad_norm": 0.21228551864624023, "learning_rate": 7.53838890385697e-05, "loss": 1.7047, "step": 363 }, { "epoch": 0.20182977543665095, "grad_norm": 0.21426419913768768, "learning_rate": 7.534941663731394e-05, "loss": 1.6433, "step": 364 }, { "epoch": 0.2023842528416967, "grad_norm": 0.20719939470291138, "learning_rate": 7.531482393967295e-05, "loss": 1.6065, "step": 365 }, { "epoch": 0.20293873024674244, "grad_norm": 0.21919012069702148, "learning_rate": 7.528011106336797e-05, "loss": 1.6418, "step": 366 }, { "epoch": 0.2034932076517882, "grad_norm": 0.201614648103714, "learning_rate": 7.524527812652917e-05, "loss": 1.5683, "step": 367 }, { "epoch": 0.20404768505683393, "grad_norm": 0.20898142457008362, "learning_rate": 7.521032524769537e-05, "loss": 1.6671, "step": 368 }, { "epoch": 0.20460216246187968, "grad_norm": 0.2096984088420868, "learning_rate": 7.517525254581346e-05, "loss": 1.5993, "step": 369 }, { "epoch": 0.20515663986692542, "grad_norm": 0.21319280564785004, "learning_rate": 7.514006014023817e-05, "loss": 1.5941, "step": 370 }, { "epoch": 0.20571111727197117, "grad_norm": 0.2070310115814209, "learning_rate": 7.510474815073157e-05, "loss": 1.6734, "step": 371 }, { "epoch": 0.2062655946770169, "grad_norm": 0.20351865887641907, "learning_rate": 7.506931669746266e-05, "loss": 1.6756, "step": 372 }, { "epoch": 0.20682007208206266, "grad_norm": 0.20868165791034698, "learning_rate": 7.503376590100702e-05, "loss": 1.6391, "step": 373 }, { "epoch": 0.2073745494871084, "grad_norm": 0.24216507375240326, "learning_rate": 7.499809588234634e-05, "loss": 1.7327, "step": 374 }, { "epoch": 0.20792902689215415, "grad_norm": 0.21316882967948914, "learning_rate": 7.496230676286802e-05, "loss": 1.6154, "step": 375 }, { "epoch": 0.2084835042971999, "grad_norm": 0.23029237985610962, "learning_rate": 7.492639866436479e-05, "loss": 1.6068, "step": 376 }, { "epoch": 0.20903798170224563, "grad_norm": 0.21493342518806458, "learning_rate": 7.489037170903429e-05, "loss": 1.6474, "step": 377 }, { "epoch": 0.20959245910729138, "grad_norm": 0.2048824578523636, "learning_rate": 7.485422601947858e-05, "loss": 1.5741, "step": 378 }, { "epoch": 0.21014693651233712, "grad_norm": 0.20953327417373657, "learning_rate": 7.481796171870383e-05, "loss": 1.609, "step": 379 }, { "epoch": 0.21070141391738287, "grad_norm": 0.21259397268295288, "learning_rate": 7.478157893011984e-05, "loss": 1.681, "step": 380 }, { "epoch": 0.2112558913224286, "grad_norm": 0.2035396844148636, "learning_rate": 7.474507777753962e-05, "loss": 1.5764, "step": 381 }, { "epoch": 0.21181036872747436, "grad_norm": 0.21311511099338531, "learning_rate": 7.470845838517899e-05, "loss": 1.6694, "step": 382 }, { "epoch": 0.2123648461325201, "grad_norm": 0.21380078792572021, "learning_rate": 7.467172087765616e-05, "loss": 1.6693, "step": 383 }, { "epoch": 0.21291932353756585, "grad_norm": 0.20704282820224762, "learning_rate": 7.463486537999125e-05, "loss": 1.6214, "step": 384 }, { "epoch": 0.2134738009426116, "grad_norm": 0.20259855687618256, "learning_rate": 7.459789201760596e-05, "loss": 1.6286, "step": 385 }, { "epoch": 0.21402827834765734, "grad_norm": 0.2159855216741562, "learning_rate": 7.456080091632305e-05, "loss": 1.6627, "step": 386 }, { "epoch": 0.21458275575270308, "grad_norm": 0.21545730531215668, "learning_rate": 7.452359220236601e-05, "loss": 1.6175, "step": 387 }, { "epoch": 0.21513723315774883, "grad_norm": 0.21161332726478577, "learning_rate": 7.44862660023585e-05, "loss": 1.6789, "step": 388 }, { "epoch": 0.21569171056279457, "grad_norm": 0.2151462882757187, "learning_rate": 7.444882244332403e-05, "loss": 1.6574, "step": 389 }, { "epoch": 0.21624618796784031, "grad_norm": 0.23342940211296082, "learning_rate": 7.441126165268552e-05, "loss": 1.6155, "step": 390 }, { "epoch": 0.21680066537288606, "grad_norm": 0.2156396359205246, "learning_rate": 7.437358375826476e-05, "loss": 1.6173, "step": 391 }, { "epoch": 0.2173551427779318, "grad_norm": 0.2337697595357895, "learning_rate": 7.433578888828215e-05, "loss": 1.6365, "step": 392 }, { "epoch": 0.21790962018297755, "grad_norm": 0.21427282691001892, "learning_rate": 7.429787717135608e-05, "loss": 1.6423, "step": 393 }, { "epoch": 0.2184640975880233, "grad_norm": 0.23731502890586853, "learning_rate": 7.425984873650262e-05, "loss": 1.6152, "step": 394 }, { "epoch": 0.21901857499306904, "grad_norm": 0.21108436584472656, "learning_rate": 7.422170371313501e-05, "loss": 1.6023, "step": 395 }, { "epoch": 0.21957305239811478, "grad_norm": 0.24385766685009003, "learning_rate": 7.418344223106331e-05, "loss": 1.6968, "step": 396 }, { "epoch": 0.22012752980316053, "grad_norm": 0.22708410024642944, "learning_rate": 7.414506442049382e-05, "loss": 1.6314, "step": 397 }, { "epoch": 0.22068200720820627, "grad_norm": 0.2248944193124771, "learning_rate": 7.410657041202877e-05, "loss": 1.6085, "step": 398 }, { "epoch": 0.22123648461325202, "grad_norm": 0.2051655650138855, "learning_rate": 7.406796033666577e-05, "loss": 1.6075, "step": 399 }, { "epoch": 0.22179096201829776, "grad_norm": 0.24941745400428772, "learning_rate": 7.402923432579749e-05, "loss": 1.7027, "step": 400 }, { "epoch": 0.2223454394233435, "grad_norm": 0.20685605704784393, "learning_rate": 7.399039251121104e-05, "loss": 1.5774, "step": 401 }, { "epoch": 0.22289991682838925, "grad_norm": 0.25279372930526733, "learning_rate": 7.395143502508767e-05, "loss": 1.659, "step": 402 }, { "epoch": 0.223454394233435, "grad_norm": 0.21988433599472046, "learning_rate": 7.391236200000227e-05, "loss": 1.6034, "step": 403 }, { "epoch": 0.22400887163848074, "grad_norm": 0.25263452529907227, "learning_rate": 7.387317356892294e-05, "loss": 1.6467, "step": 404 }, { "epoch": 0.22456334904352648, "grad_norm": 0.2200496941804886, "learning_rate": 7.383386986521044e-05, "loss": 1.5981, "step": 405 }, { "epoch": 0.22511782644857223, "grad_norm": 0.22581440210342407, "learning_rate": 7.379445102261787e-05, "loss": 1.6179, "step": 406 }, { "epoch": 0.22567230385361797, "grad_norm": 0.2082045078277588, "learning_rate": 7.375491717529014e-05, "loss": 1.6553, "step": 407 }, { "epoch": 0.22622678125866372, "grad_norm": 0.21761249005794525, "learning_rate": 7.371526845776351e-05, "loss": 1.5159, "step": 408 }, { "epoch": 0.22678125866370946, "grad_norm": 0.22059603035449982, "learning_rate": 7.36755050049652e-05, "loss": 1.6327, "step": 409 }, { "epoch": 0.2273357360687552, "grad_norm": 0.21581657230854034, "learning_rate": 7.363562695221285e-05, "loss": 1.6646, "step": 410 }, { "epoch": 0.22789021347380095, "grad_norm": 0.21198253333568573, "learning_rate": 7.359563443521407e-05, "loss": 1.6374, "step": 411 }, { "epoch": 0.2284446908788467, "grad_norm": 0.21386800706386566, "learning_rate": 7.3555527590066e-05, "loss": 1.6085, "step": 412 }, { "epoch": 0.22899916828389244, "grad_norm": 0.21344758570194244, "learning_rate": 7.351530655325492e-05, "loss": 1.6691, "step": 413 }, { "epoch": 0.22955364568893818, "grad_norm": 0.21234236657619476, "learning_rate": 7.347497146165562e-05, "loss": 1.5652, "step": 414 }, { "epoch": 0.23010812309398393, "grad_norm": 0.21020184457302094, "learning_rate": 7.343452245253108e-05, "loss": 1.6386, "step": 415 }, { "epoch": 0.23066260049902967, "grad_norm": 0.22808673977851868, "learning_rate": 7.339395966353193e-05, "loss": 1.6169, "step": 416 }, { "epoch": 0.23121707790407542, "grad_norm": 0.21194703876972198, "learning_rate": 7.335328323269599e-05, "loss": 1.6279, "step": 417 }, { "epoch": 0.23177155530912116, "grad_norm": 0.2219015657901764, "learning_rate": 7.331249329844784e-05, "loss": 1.6187, "step": 418 }, { "epoch": 0.2323260327141669, "grad_norm": 0.2099056988954544, "learning_rate": 7.327158999959831e-05, "loss": 1.6131, "step": 419 }, { "epoch": 0.23288051011921265, "grad_norm": 0.2129475176334381, "learning_rate": 7.323057347534401e-05, "loss": 1.7079, "step": 420 }, { "epoch": 0.2334349875242584, "grad_norm": 0.20943042635917664, "learning_rate": 7.318944386526683e-05, "loss": 1.6559, "step": 421 }, { "epoch": 0.23398946492930414, "grad_norm": 0.21280522644519806, "learning_rate": 7.314820130933358e-05, "loss": 1.6158, "step": 422 }, { "epoch": 0.2345439423343499, "grad_norm": 0.21108831465244293, "learning_rate": 7.310684594789535e-05, "loss": 1.5913, "step": 423 }, { "epoch": 0.23509841973939563, "grad_norm": 0.21344444155693054, "learning_rate": 7.306537792168717e-05, "loss": 1.6157, "step": 424 }, { "epoch": 0.23565289714444138, "grad_norm": 0.20498408377170563, "learning_rate": 7.302379737182746e-05, "loss": 1.6026, "step": 425 }, { "epoch": 0.23620737454948712, "grad_norm": 0.21187523007392883, "learning_rate": 7.298210443981754e-05, "loss": 1.6558, "step": 426 }, { "epoch": 0.23676185195453286, "grad_norm": 0.2079460322856903, "learning_rate": 7.29402992675412e-05, "loss": 1.7147, "step": 427 }, { "epoch": 0.2373163293595786, "grad_norm": 0.22233690321445465, "learning_rate": 7.289838199726419e-05, "loss": 1.681, "step": 428 }, { "epoch": 0.23787080676462435, "grad_norm": 0.21120913326740265, "learning_rate": 7.285635277163373e-05, "loss": 1.6054, "step": 429 }, { "epoch": 0.2384252841696701, "grad_norm": 0.22804483771324158, "learning_rate": 7.281421173367805e-05, "loss": 1.716, "step": 430 }, { "epoch": 0.23897976157471584, "grad_norm": 0.21392664313316345, "learning_rate": 7.277195902680584e-05, "loss": 1.6429, "step": 431 }, { "epoch": 0.2395342389797616, "grad_norm": 0.21854659914970398, "learning_rate": 7.272959479480584e-05, "loss": 1.6437, "step": 432 }, { "epoch": 0.24008871638480733, "grad_norm": 0.21218635141849518, "learning_rate": 7.268711918184635e-05, "loss": 1.7084, "step": 433 }, { "epoch": 0.24064319378985308, "grad_norm": 0.21876314282417297, "learning_rate": 7.26445323324746e-05, "loss": 1.6482, "step": 434 }, { "epoch": 0.24119767119489882, "grad_norm": 0.2061976045370102, "learning_rate": 7.260183439161651e-05, "loss": 1.6213, "step": 435 }, { "epoch": 0.24175214859994454, "grad_norm": 0.22555728256702423, "learning_rate": 7.255902550457592e-05, "loss": 1.7004, "step": 436 }, { "epoch": 0.24230662600499028, "grad_norm": 0.20105816423892975, "learning_rate": 7.251610581703432e-05, "loss": 1.5968, "step": 437 }, { "epoch": 0.24286110341003603, "grad_norm": 0.22831952571868896, "learning_rate": 7.24730754750502e-05, "loss": 1.6453, "step": 438 }, { "epoch": 0.24341558081508177, "grad_norm": 0.20313981175422668, "learning_rate": 7.242993462505861e-05, "loss": 1.6452, "step": 439 }, { "epoch": 0.24397005822012752, "grad_norm": 0.21168267726898193, "learning_rate": 7.238668341387078e-05, "loss": 1.6479, "step": 440 }, { "epoch": 0.24452453562517326, "grad_norm": 0.20957493782043457, "learning_rate": 7.234332198867334e-05, "loss": 1.63, "step": 441 }, { "epoch": 0.245079013030219, "grad_norm": 0.2083057314157486, "learning_rate": 7.22998504970281e-05, "loss": 1.6362, "step": 442 }, { "epoch": 0.24563349043526475, "grad_norm": 0.23519518971443176, "learning_rate": 7.22562690868714e-05, "loss": 1.7074, "step": 443 }, { "epoch": 0.2461879678403105, "grad_norm": 0.21451757848262787, "learning_rate": 7.221257790651364e-05, "loss": 1.5854, "step": 444 }, { "epoch": 0.24674244524535624, "grad_norm": 0.21790243685245514, "learning_rate": 7.216877710463877e-05, "loss": 1.5891, "step": 445 }, { "epoch": 0.24729692265040198, "grad_norm": 0.2403048574924469, "learning_rate": 7.212486683030377e-05, "loss": 1.6683, "step": 446 }, { "epoch": 0.24785140005544773, "grad_norm": 0.252909779548645, "learning_rate": 7.208084723293823e-05, "loss": 1.5939, "step": 447 }, { "epoch": 0.24840587746049347, "grad_norm": 0.22861145436763763, "learning_rate": 7.203671846234371e-05, "loss": 1.6284, "step": 448 }, { "epoch": 0.24896035486553922, "grad_norm": 0.2679903209209442, "learning_rate": 7.199248066869331e-05, "loss": 1.6749, "step": 449 }, { "epoch": 0.24951483227058496, "grad_norm": 0.19942152500152588, "learning_rate": 7.194813400253114e-05, "loss": 1.5894, "step": 450 }, { "epoch": 0.25006930967563074, "grad_norm": 0.23093563318252563, "learning_rate": 7.190367861477183e-05, "loss": 1.5947, "step": 451 }, { "epoch": 0.25062378708067645, "grad_norm": 0.22997967898845673, "learning_rate": 7.185911465669998e-05, "loss": 1.6138, "step": 452 }, { "epoch": 0.2511782644857222, "grad_norm": 0.2587045729160309, "learning_rate": 7.181444227996966e-05, "loss": 1.6803, "step": 453 }, { "epoch": 0.25173274189076794, "grad_norm": 0.2321499139070511, "learning_rate": 7.17696616366039e-05, "loss": 1.7357, "step": 454 }, { "epoch": 0.2522872192958137, "grad_norm": 0.24048465490341187, "learning_rate": 7.172477287899418e-05, "loss": 1.6514, "step": 455 }, { "epoch": 0.25284169670085943, "grad_norm": 0.22340674698352814, "learning_rate": 7.167977615989985e-05, "loss": 1.6029, "step": 456 }, { "epoch": 0.2533961741059052, "grad_norm": 0.21953530609607697, "learning_rate": 7.163467163244775e-05, "loss": 1.5879, "step": 457 }, { "epoch": 0.2539506515109509, "grad_norm": 0.22932195663452148, "learning_rate": 7.158945945013151e-05, "loss": 1.6812, "step": 458 }, { "epoch": 0.2545051289159967, "grad_norm": 0.21497461199760437, "learning_rate": 7.15441397668112e-05, "loss": 1.6981, "step": 459 }, { "epoch": 0.2550596063210424, "grad_norm": 0.22530704736709595, "learning_rate": 7.149871273671262e-05, "loss": 1.6335, "step": 460 }, { "epoch": 0.2556140837260882, "grad_norm": 0.20990800857543945, "learning_rate": 7.145317851442696e-05, "loss": 1.6352, "step": 461 }, { "epoch": 0.2561685611311339, "grad_norm": 0.2138289213180542, "learning_rate": 7.140753725491019e-05, "loss": 1.6216, "step": 462 }, { "epoch": 0.25672303853617967, "grad_norm": 0.2041904479265213, "learning_rate": 7.136178911348248e-05, "loss": 1.6162, "step": 463 }, { "epoch": 0.2572775159412254, "grad_norm": 0.22262172400951385, "learning_rate": 7.131593424582777e-05, "loss": 1.633, "step": 464 }, { "epoch": 0.25783199334627116, "grad_norm": 0.20571322739124298, "learning_rate": 7.12699728079932e-05, "loss": 1.6332, "step": 465 }, { "epoch": 0.2583864707513169, "grad_norm": 0.22316017746925354, "learning_rate": 7.122390495638853e-05, "loss": 1.6925, "step": 466 }, { "epoch": 0.25894094815636265, "grad_norm": 0.20561595261096954, "learning_rate": 7.117773084778568e-05, "loss": 1.5525, "step": 467 }, { "epoch": 0.25949542556140837, "grad_norm": 0.2053271234035492, "learning_rate": 7.113145063931821e-05, "loss": 1.6606, "step": 468 }, { "epoch": 0.26004990296645414, "grad_norm": 0.2116890847682953, "learning_rate": 7.108506448848069e-05, "loss": 1.5761, "step": 469 }, { "epoch": 0.26060438037149986, "grad_norm": 0.20428834855556488, "learning_rate": 7.103857255312823e-05, "loss": 1.6077, "step": 470 }, { "epoch": 0.2611588577765456, "grad_norm": 0.20274734497070312, "learning_rate": 7.099197499147594e-05, "loss": 1.5833, "step": 471 }, { "epoch": 0.26171333518159134, "grad_norm": 0.20062290132045746, "learning_rate": 7.094527196209838e-05, "loss": 1.5884, "step": 472 }, { "epoch": 0.2622678125866371, "grad_norm": 0.2172449678182602, "learning_rate": 7.089846362392904e-05, "loss": 1.7173, "step": 473 }, { "epoch": 0.26282228999168283, "grad_norm": 0.21342062950134277, "learning_rate": 7.085155013625974e-05, "loss": 1.6288, "step": 474 }, { "epoch": 0.2633767673967286, "grad_norm": 0.2020542323589325, "learning_rate": 7.080453165874018e-05, "loss": 1.5916, "step": 475 }, { "epoch": 0.2639312448017743, "grad_norm": 0.20829129219055176, "learning_rate": 7.07574083513773e-05, "loss": 1.6227, "step": 476 }, { "epoch": 0.2644857222068201, "grad_norm": 0.20422028005123138, "learning_rate": 7.071018037453485e-05, "loss": 1.63, "step": 477 }, { "epoch": 0.2650401996118658, "grad_norm": 0.19959598779678345, "learning_rate": 7.066284788893268e-05, "loss": 1.6, "step": 478 }, { "epoch": 0.2655946770169116, "grad_norm": 0.21311348676681519, "learning_rate": 7.061541105564642e-05, "loss": 1.6359, "step": 479 }, { "epoch": 0.2661491544219573, "grad_norm": 0.21512793004512787, "learning_rate": 7.056787003610667e-05, "loss": 1.6067, "step": 480 }, { "epoch": 0.2667036318270031, "grad_norm": 0.2027646005153656, "learning_rate": 7.052022499209864e-05, "loss": 1.6362, "step": 481 }, { "epoch": 0.2672581092320488, "grad_norm": 0.23081980645656586, "learning_rate": 7.047247608576157e-05, "loss": 1.6021, "step": 482 }, { "epoch": 0.26781258663709456, "grad_norm": 0.20609967410564423, "learning_rate": 7.04246234795881e-05, "loss": 1.6012, "step": 483 }, { "epoch": 0.2683670640421403, "grad_norm": 0.20472081005573273, "learning_rate": 7.03766673364238e-05, "loss": 1.5602, "step": 484 }, { "epoch": 0.26892154144718605, "grad_norm": 0.2030676007270813, "learning_rate": 7.032860781946657e-05, "loss": 1.6053, "step": 485 }, { "epoch": 0.26947601885223177, "grad_norm": 0.22049987316131592, "learning_rate": 7.028044509226612e-05, "loss": 1.6074, "step": 486 }, { "epoch": 0.27003049625727754, "grad_norm": 0.20548531413078308, "learning_rate": 7.023217931872334e-05, "loss": 1.5976, "step": 487 }, { "epoch": 0.27058497366232326, "grad_norm": 0.22220125794410706, "learning_rate": 7.018381066308984e-05, "loss": 1.589, "step": 488 }, { "epoch": 0.27113945106736903, "grad_norm": 0.209039568901062, "learning_rate": 7.013533928996733e-05, "loss": 1.6254, "step": 489 }, { "epoch": 0.27169392847241475, "grad_norm": 0.21839891374111176, "learning_rate": 7.00867653643071e-05, "loss": 1.6726, "step": 490 }, { "epoch": 0.2722484058774605, "grad_norm": 0.20095285773277283, "learning_rate": 7.003808905140936e-05, "loss": 1.5836, "step": 491 }, { "epoch": 0.27280288328250624, "grad_norm": 0.19979004561901093, "learning_rate": 6.998931051692283e-05, "loss": 1.5875, "step": 492 }, { "epoch": 0.273357360687552, "grad_norm": 0.20679202675819397, "learning_rate": 6.994042992684406e-05, "loss": 1.5333, "step": 493 }, { "epoch": 0.2739118380925977, "grad_norm": 0.1942681074142456, "learning_rate": 6.989144744751689e-05, "loss": 1.5397, "step": 494 }, { "epoch": 0.2744663154976435, "grad_norm": 0.210038959980011, "learning_rate": 6.984236324563192e-05, "loss": 1.6923, "step": 495 }, { "epoch": 0.2750207929026892, "grad_norm": 0.213044673204422, "learning_rate": 6.979317748822594e-05, "loss": 1.6881, "step": 496 }, { "epoch": 0.275575270307735, "grad_norm": 0.21259163320064545, "learning_rate": 6.974389034268127e-05, "loss": 1.6463, "step": 497 }, { "epoch": 0.2761297477127807, "grad_norm": 0.2128421813249588, "learning_rate": 6.969450197672534e-05, "loss": 1.6319, "step": 498 }, { "epoch": 0.2766842251178264, "grad_norm": 0.21015048027038574, "learning_rate": 6.964501255842995e-05, "loss": 1.5996, "step": 499 }, { "epoch": 0.2772387025228722, "grad_norm": 0.20773227512836456, "learning_rate": 6.959542225621087e-05, "loss": 1.6476, "step": 500 }, { "epoch": 0.2777931799279179, "grad_norm": 0.21952128410339355, "learning_rate": 6.954573123882718e-05, "loss": 1.5959, "step": 501 }, { "epoch": 0.2783476573329637, "grad_norm": 0.20505724847316742, "learning_rate": 6.949593967538062e-05, "loss": 1.583, "step": 502 }, { "epoch": 0.2789021347380094, "grad_norm": 0.21379536390304565, "learning_rate": 6.94460477353152e-05, "loss": 1.6331, "step": 503 }, { "epoch": 0.27945661214305517, "grad_norm": 0.22115319967269897, "learning_rate": 6.939605558841644e-05, "loss": 1.5612, "step": 504 }, { "epoch": 0.2800110895481009, "grad_norm": 0.2217654585838318, "learning_rate": 6.934596340481088e-05, "loss": 1.5818, "step": 505 }, { "epoch": 0.28056556695314666, "grad_norm": 0.21580982208251953, "learning_rate": 6.929577135496556e-05, "loss": 1.6032, "step": 506 }, { "epoch": 0.2811200443581924, "grad_norm": 0.2084866166114807, "learning_rate": 6.924547960968726e-05, "loss": 1.5739, "step": 507 }, { "epoch": 0.28167452176323815, "grad_norm": 0.20981299877166748, "learning_rate": 6.919508834012213e-05, "loss": 1.6299, "step": 508 }, { "epoch": 0.28222899916828387, "grad_norm": 0.20049329102039337, "learning_rate": 6.914459771775496e-05, "loss": 1.6606, "step": 509 }, { "epoch": 0.28278347657332964, "grad_norm": 0.22596615552902222, "learning_rate": 6.909400791440864e-05, "loss": 1.5644, "step": 510 }, { "epoch": 0.28333795397837536, "grad_norm": 0.20176082849502563, "learning_rate": 6.904331910224361e-05, "loss": 1.5579, "step": 511 }, { "epoch": 0.28389243138342113, "grad_norm": 0.22138462960720062, "learning_rate": 6.899253145375723e-05, "loss": 1.6555, "step": 512 }, { "epoch": 0.28444690878846685, "grad_norm": 0.2055101990699768, "learning_rate": 6.894164514178317e-05, "loss": 1.591, "step": 513 }, { "epoch": 0.2850013861935126, "grad_norm": 0.2184118628501892, "learning_rate": 6.88906603394909e-05, "loss": 1.6579, "step": 514 }, { "epoch": 0.28555586359855833, "grad_norm": 0.21609501540660858, "learning_rate": 6.883957722038508e-05, "loss": 1.6977, "step": 515 }, { "epoch": 0.2861103410036041, "grad_norm": 0.20950822532176971, "learning_rate": 6.878839595830489e-05, "loss": 1.6842, "step": 516 }, { "epoch": 0.2866648184086498, "grad_norm": 0.20638826489448547, "learning_rate": 6.873711672742353e-05, "loss": 1.5596, "step": 517 }, { "epoch": 0.2872192958136956, "grad_norm": 0.20869210362434387, "learning_rate": 6.868573970224757e-05, "loss": 1.5563, "step": 518 }, { "epoch": 0.2877737732187413, "grad_norm": 0.20834721624851227, "learning_rate": 6.863426505761645e-05, "loss": 1.6254, "step": 519 }, { "epoch": 0.2883282506237871, "grad_norm": 0.20829017460346222, "learning_rate": 6.85826929687017e-05, "loss": 1.6216, "step": 520 }, { "epoch": 0.2888827280288328, "grad_norm": 0.20338068902492523, "learning_rate": 6.853102361100657e-05, "loss": 1.5475, "step": 521 }, { "epoch": 0.2894372054338786, "grad_norm": 0.21138431131839752, "learning_rate": 6.847925716036526e-05, "loss": 1.625, "step": 522 }, { "epoch": 0.2899916828389243, "grad_norm": 0.21660204231739044, "learning_rate": 6.842739379294241e-05, "loss": 1.5498, "step": 523 }, { "epoch": 0.29054616024397006, "grad_norm": 0.2074098438024521, "learning_rate": 6.837543368523244e-05, "loss": 1.6134, "step": 524 }, { "epoch": 0.2911006376490158, "grad_norm": 0.20286381244659424, "learning_rate": 6.832337701405904e-05, "loss": 1.6212, "step": 525 }, { "epoch": 0.29165511505406155, "grad_norm": 0.21887677907943726, "learning_rate": 6.827122395657445e-05, "loss": 1.6228, "step": 526 }, { "epoch": 0.29220959245910727, "grad_norm": 0.20947344601154327, "learning_rate": 6.821897469025895e-05, "loss": 1.6106, "step": 527 }, { "epoch": 0.29276406986415304, "grad_norm": 0.20572635531425476, "learning_rate": 6.816662939292024e-05, "loss": 1.5996, "step": 528 }, { "epoch": 0.29331854726919876, "grad_norm": 0.20719175040721893, "learning_rate": 6.81141882426928e-05, "loss": 1.6113, "step": 529 }, { "epoch": 0.29387302467424453, "grad_norm": 0.2026231288909912, "learning_rate": 6.80616514180373e-05, "loss": 1.5744, "step": 530 }, { "epoch": 0.29442750207929025, "grad_norm": 0.19878005981445312, "learning_rate": 6.800901909774e-05, "loss": 1.5897, "step": 531 }, { "epoch": 0.294981979484336, "grad_norm": 0.2110818773508072, "learning_rate": 6.795629146091215e-05, "loss": 1.5741, "step": 532 }, { "epoch": 0.29553645688938174, "grad_norm": 0.20320414006710052, "learning_rate": 6.790346868698936e-05, "loss": 1.6345, "step": 533 }, { "epoch": 0.2960909342944275, "grad_norm": 0.19918134808540344, "learning_rate": 6.785055095573098e-05, "loss": 1.5811, "step": 534 }, { "epoch": 0.2966454116994732, "grad_norm": 0.20493772625923157, "learning_rate": 6.779753844721955e-05, "loss": 1.5529, "step": 535 }, { "epoch": 0.297199889104519, "grad_norm": 0.20805048942565918, "learning_rate": 6.774443134186008e-05, "loss": 1.5974, "step": 536 }, { "epoch": 0.2977543665095647, "grad_norm": 0.21310682594776154, "learning_rate": 6.769122982037959e-05, "loss": 1.6811, "step": 537 }, { "epoch": 0.2983088439146105, "grad_norm": 0.21554915606975555, "learning_rate": 6.763793406382629e-05, "loss": 1.6707, "step": 538 }, { "epoch": 0.2988633213196562, "grad_norm": 0.21235989034175873, "learning_rate": 6.758454425356917e-05, "loss": 1.6141, "step": 539 }, { "epoch": 0.299417798724702, "grad_norm": 0.2248426377773285, "learning_rate": 6.753106057129725e-05, "loss": 1.6711, "step": 540 }, { "epoch": 0.2999722761297477, "grad_norm": 0.20382218062877655, "learning_rate": 6.747748319901902e-05, "loss": 1.5494, "step": 541 }, { "epoch": 0.30052675353479347, "grad_norm": 0.21593333780765533, "learning_rate": 6.74238123190618e-05, "loss": 1.6111, "step": 542 }, { "epoch": 0.3010812309398392, "grad_norm": 0.20532597601413727, "learning_rate": 6.737004811407109e-05, "loss": 1.5824, "step": 543 }, { "epoch": 0.30163570834488496, "grad_norm": 0.23563292622566223, "learning_rate": 6.731619076701002e-05, "loss": 1.5641, "step": 544 }, { "epoch": 0.3021901857499307, "grad_norm": 0.2265416830778122, "learning_rate": 6.726224046115871e-05, "loss": 1.7597, "step": 545 }, { "epoch": 0.30274466315497645, "grad_norm": 0.21692894399166107, "learning_rate": 6.720819738011355e-05, "loss": 1.5962, "step": 546 }, { "epoch": 0.30329914056002216, "grad_norm": 0.2314797043800354, "learning_rate": 6.715406170778671e-05, "loss": 1.5945, "step": 547 }, { "epoch": 0.30385361796506793, "grad_norm": 0.21994145214557648, "learning_rate": 6.709983362840544e-05, "loss": 1.6904, "step": 548 }, { "epoch": 0.30440809537011365, "grad_norm": 0.21997472643852234, "learning_rate": 6.704551332651144e-05, "loss": 1.6322, "step": 549 }, { "epoch": 0.3049625727751594, "grad_norm": 0.22159717977046967, "learning_rate": 6.699110098696029e-05, "loss": 1.6348, "step": 550 }, { "epoch": 0.30551705018020514, "grad_norm": 0.20532500743865967, "learning_rate": 6.693659679492072e-05, "loss": 1.5946, "step": 551 }, { "epoch": 0.3060715275852509, "grad_norm": 0.22436174750328064, "learning_rate": 6.688200093587409e-05, "loss": 1.5677, "step": 552 }, { "epoch": 0.30662600499029663, "grad_norm": 0.20958715677261353, "learning_rate": 6.682731359561369e-05, "loss": 1.5468, "step": 553 }, { "epoch": 0.3071804823953424, "grad_norm": 0.23304277658462524, "learning_rate": 6.677253496024412e-05, "loss": 1.6636, "step": 554 }, { "epoch": 0.3077349598003881, "grad_norm": 0.2050829380750656, "learning_rate": 6.671766521618069e-05, "loss": 1.5629, "step": 555 }, { "epoch": 0.3082894372054339, "grad_norm": 0.22310495376586914, "learning_rate": 6.666270455014874e-05, "loss": 1.6744, "step": 556 }, { "epoch": 0.3088439146104796, "grad_norm": 0.21029001474380493, "learning_rate": 6.6607653149183e-05, "loss": 1.6397, "step": 557 }, { "epoch": 0.3093983920155254, "grad_norm": 0.21131139993667603, "learning_rate": 6.655251120062702e-05, "loss": 1.62, "step": 558 }, { "epoch": 0.3099528694205711, "grad_norm": 0.21726414561271667, "learning_rate": 6.649727889213246e-05, "loss": 1.601, "step": 559 }, { "epoch": 0.31050734682561687, "grad_norm": 0.2165653556585312, "learning_rate": 6.644195641165851e-05, "loss": 1.577, "step": 560 }, { "epoch": 0.3110618242306626, "grad_norm": 0.20710572600364685, "learning_rate": 6.63865439474712e-05, "loss": 1.5241, "step": 561 }, { "epoch": 0.31161630163570836, "grad_norm": 0.20715083181858063, "learning_rate": 6.633104168814276e-05, "loss": 1.5959, "step": 562 }, { "epoch": 0.3121707790407541, "grad_norm": 0.2072436511516571, "learning_rate": 6.627544982255105e-05, "loss": 1.669, "step": 563 }, { "epoch": 0.31272525644579985, "grad_norm": 0.22219659388065338, "learning_rate": 6.62197685398788e-05, "loss": 1.6437, "step": 564 }, { "epoch": 0.31327973385084557, "grad_norm": 0.209031879901886, "learning_rate": 6.616399802961312e-05, "loss": 1.6819, "step": 565 }, { "epoch": 0.31383421125589134, "grad_norm": 0.21048156917095184, "learning_rate": 6.610813848154467e-05, "loss": 1.6213, "step": 566 }, { "epoch": 0.31438868866093705, "grad_norm": 0.20893770456314087, "learning_rate": 6.605219008576718e-05, "loss": 1.6092, "step": 567 }, { "epoch": 0.3149431660659828, "grad_norm": 0.20855359733104706, "learning_rate": 6.599615303267672e-05, "loss": 1.6285, "step": 568 }, { "epoch": 0.31549764347102854, "grad_norm": 0.20627255737781525, "learning_rate": 6.594002751297106e-05, "loss": 1.5787, "step": 569 }, { "epoch": 0.3160521208760743, "grad_norm": 0.21437032520771027, "learning_rate": 6.588381371764903e-05, "loss": 1.5824, "step": 570 }, { "epoch": 0.31660659828112003, "grad_norm": 0.21238544583320618, "learning_rate": 6.582751183800983e-05, "loss": 1.6169, "step": 571 }, { "epoch": 0.3171610756861658, "grad_norm": 0.20815801620483398, "learning_rate": 6.57711220656525e-05, "loss": 1.5866, "step": 572 }, { "epoch": 0.3177155530912115, "grad_norm": 0.21221747994422913, "learning_rate": 6.57146445924751e-05, "loss": 1.6005, "step": 573 }, { "epoch": 0.3182700304962573, "grad_norm": 0.22167359292507172, "learning_rate": 6.565807961067421e-05, "loss": 1.661, "step": 574 }, { "epoch": 0.318824507901303, "grad_norm": 0.22634592652320862, "learning_rate": 6.560142731274416e-05, "loss": 1.6441, "step": 575 }, { "epoch": 0.3193789853063488, "grad_norm": 0.20446883141994476, "learning_rate": 6.554468789147644e-05, "loss": 1.5857, "step": 576 }, { "epoch": 0.3199334627113945, "grad_norm": 0.22486351430416107, "learning_rate": 6.548786153995901e-05, "loss": 1.6204, "step": 577 }, { "epoch": 0.3204879401164403, "grad_norm": 0.2034136950969696, "learning_rate": 6.543094845157569e-05, "loss": 1.5765, "step": 578 }, { "epoch": 0.321042417521486, "grad_norm": 0.21498997509479523, "learning_rate": 6.537394882000545e-05, "loss": 1.5481, "step": 579 }, { "epoch": 0.32159689492653176, "grad_norm": 0.2029666006565094, "learning_rate": 6.531686283922179e-05, "loss": 1.578, "step": 580 }, { "epoch": 0.3221513723315775, "grad_norm": 0.20419777929782867, "learning_rate": 6.525969070349205e-05, "loss": 1.5908, "step": 581 }, { "epoch": 0.32270584973662325, "grad_norm": 0.2099248170852661, "learning_rate": 6.520243260737674e-05, "loss": 1.5928, "step": 582 }, { "epoch": 0.32326032714166897, "grad_norm": 0.21225160360336304, "learning_rate": 6.514508874572893e-05, "loss": 1.5871, "step": 583 }, { "epoch": 0.32381480454671474, "grad_norm": 0.20643840730190277, "learning_rate": 6.508765931369356e-05, "loss": 1.5607, "step": 584 }, { "epoch": 0.32436928195176046, "grad_norm": 0.2086772918701172, "learning_rate": 6.503014450670674e-05, "loss": 1.5538, "step": 585 }, { "epoch": 0.32492375935680623, "grad_norm": 0.2010130137205124, "learning_rate": 6.497254452049513e-05, "loss": 1.5856, "step": 586 }, { "epoch": 0.32547823676185195, "grad_norm": 0.2193068265914917, "learning_rate": 6.491485955107526e-05, "loss": 1.7064, "step": 587 }, { "epoch": 0.3260327141668977, "grad_norm": 0.2186783403158188, "learning_rate": 6.485708979475291e-05, "loss": 1.5548, "step": 588 }, { "epoch": 0.32658719157194344, "grad_norm": 0.213240385055542, "learning_rate": 6.47992354481223e-05, "loss": 1.6239, "step": 589 }, { "epoch": 0.3271416689769892, "grad_norm": 0.2521916627883911, "learning_rate": 6.474129670806561e-05, "loss": 1.6636, "step": 590 }, { "epoch": 0.3276961463820349, "grad_norm": 0.21178847551345825, "learning_rate": 6.468327377175214e-05, "loss": 1.5943, "step": 591 }, { "epoch": 0.3282506237870807, "grad_norm": 0.2146124243736267, "learning_rate": 6.462516683663778e-05, "loss": 1.615, "step": 592 }, { "epoch": 0.3288051011921264, "grad_norm": 0.20442825555801392, "learning_rate": 6.456697610046423e-05, "loss": 1.5938, "step": 593 }, { "epoch": 0.3293595785971722, "grad_norm": 0.2278599590063095, "learning_rate": 6.450870176125838e-05, "loss": 1.6331, "step": 594 }, { "epoch": 0.3299140560022179, "grad_norm": 0.2228085696697235, "learning_rate": 6.445034401733164e-05, "loss": 1.6489, "step": 595 }, { "epoch": 0.3304685334072637, "grad_norm": 0.24472852051258087, "learning_rate": 6.439190306727926e-05, "loss": 1.5416, "step": 596 }, { "epoch": 0.3310230108123094, "grad_norm": 0.22040173411369324, "learning_rate": 6.433337910997958e-05, "loss": 1.5253, "step": 597 }, { "epoch": 0.33157748821735517, "grad_norm": 0.27518245577812195, "learning_rate": 6.427477234459353e-05, "loss": 1.6411, "step": 598 }, { "epoch": 0.3321319656224009, "grad_norm": 0.2157251238822937, "learning_rate": 6.421608297056374e-05, "loss": 1.6153, "step": 599 }, { "epoch": 0.33268644302744665, "grad_norm": 0.2443874329328537, "learning_rate": 6.415731118761401e-05, "loss": 1.5696, "step": 600 }, { "epoch": 0.33324092043249237, "grad_norm": 0.22603949904441833, "learning_rate": 6.409845719574857e-05, "loss": 1.6176, "step": 601 }, { "epoch": 0.33379539783753814, "grad_norm": 0.20818479359149933, "learning_rate": 6.403952119525143e-05, "loss": 1.5308, "step": 602 }, { "epoch": 0.33434987524258386, "grad_norm": 0.2425011247396469, "learning_rate": 6.398050338668567e-05, "loss": 1.5735, "step": 603 }, { "epoch": 0.33490435264762963, "grad_norm": 0.21690739691257477, "learning_rate": 6.392140397089275e-05, "loss": 1.6721, "step": 604 }, { "epoch": 0.33545883005267535, "grad_norm": 0.24877040088176727, "learning_rate": 6.386222314899187e-05, "loss": 1.6139, "step": 605 }, { "epoch": 0.3360133074577211, "grad_norm": 0.2150987684726715, "learning_rate": 6.380296112237926e-05, "loss": 1.5697, "step": 606 }, { "epoch": 0.33656778486276684, "grad_norm": 0.22079655528068542, "learning_rate": 6.374361809272749e-05, "loss": 1.6363, "step": 607 }, { "epoch": 0.3371222622678126, "grad_norm": 0.20685946941375732, "learning_rate": 6.368419426198475e-05, "loss": 1.6146, "step": 608 }, { "epoch": 0.33767673967285833, "grad_norm": 0.21263012290000916, "learning_rate": 6.362468983237427e-05, "loss": 1.5774, "step": 609 }, { "epoch": 0.3382312170779041, "grad_norm": 0.21780216693878174, "learning_rate": 6.356510500639353e-05, "loss": 1.6147, "step": 610 }, { "epoch": 0.3387856944829498, "grad_norm": 0.21480686962604523, "learning_rate": 6.350543998681358e-05, "loss": 1.6554, "step": 611 }, { "epoch": 0.3393401718879956, "grad_norm": 0.2028435915708542, "learning_rate": 6.344569497667843e-05, "loss": 1.546, "step": 612 }, { "epoch": 0.3398946492930413, "grad_norm": 0.22665052115917206, "learning_rate": 6.338587017930425e-05, "loss": 1.571, "step": 613 }, { "epoch": 0.3404491266980871, "grad_norm": 0.20434421300888062, "learning_rate": 6.332596579827876e-05, "loss": 1.586, "step": 614 }, { "epoch": 0.3410036041031328, "grad_norm": 0.20660161972045898, "learning_rate": 6.326598203746049e-05, "loss": 1.5352, "step": 615 }, { "epoch": 0.34155808150817857, "grad_norm": 0.19511684775352478, "learning_rate": 6.320591910097813e-05, "loss": 1.5336, "step": 616 }, { "epoch": 0.3421125589132243, "grad_norm": 0.21570967137813568, "learning_rate": 6.314577719322978e-05, "loss": 1.625, "step": 617 }, { "epoch": 0.34266703631827006, "grad_norm": 0.20801737904548645, "learning_rate": 6.308555651888233e-05, "loss": 1.5529, "step": 618 }, { "epoch": 0.3432215137233158, "grad_norm": 0.21180784702301025, "learning_rate": 6.302525728287064e-05, "loss": 1.6666, "step": 619 }, { "epoch": 0.34377599112836155, "grad_norm": 0.20075224339962006, "learning_rate": 6.296487969039701e-05, "loss": 1.6188, "step": 620 }, { "epoch": 0.34433046853340726, "grad_norm": 0.22708876430988312, "learning_rate": 6.290442394693033e-05, "loss": 1.6372, "step": 621 }, { "epoch": 0.344884945938453, "grad_norm": 0.20980872213840485, "learning_rate": 6.284389025820547e-05, "loss": 1.584, "step": 622 }, { "epoch": 0.34543942334349875, "grad_norm": 0.20637759566307068, "learning_rate": 6.278327883022255e-05, "loss": 1.5419, "step": 623 }, { "epoch": 0.34599390074854447, "grad_norm": 0.2151980698108673, "learning_rate": 6.272258986924624e-05, "loss": 1.6066, "step": 624 }, { "epoch": 0.34654837815359024, "grad_norm": 0.21410825848579407, "learning_rate": 6.266182358180504e-05, "loss": 1.657, "step": 625 }, { "epoch": 0.34710285555863596, "grad_norm": 0.20414431393146515, "learning_rate": 6.260098017469063e-05, "loss": 1.6131, "step": 626 }, { "epoch": 0.34765733296368173, "grad_norm": 0.19871225953102112, "learning_rate": 6.254005985495711e-05, "loss": 1.5843, "step": 627 }, { "epoch": 0.34821181036872745, "grad_norm": 0.20605245232582092, "learning_rate": 6.247906282992034e-05, "loss": 1.6049, "step": 628 }, { "epoch": 0.3487662877737732, "grad_norm": 0.20710204541683197, "learning_rate": 6.241798930715719e-05, "loss": 1.6005, "step": 629 }, { "epoch": 0.34932076517881894, "grad_norm": 0.212701216340065, "learning_rate": 6.235683949450486e-05, "loss": 1.5958, "step": 630 }, { "epoch": 0.3498752425838647, "grad_norm": 0.2133304476737976, "learning_rate": 6.229561360006019e-05, "loss": 1.6924, "step": 631 }, { "epoch": 0.3504297199889104, "grad_norm": 0.208997905254364, "learning_rate": 6.223431183217892e-05, "loss": 1.6412, "step": 632 }, { "epoch": 0.3509841973939562, "grad_norm": 0.2044183760881424, "learning_rate": 6.217293439947498e-05, "loss": 1.6015, "step": 633 }, { "epoch": 0.3515386747990019, "grad_norm": 0.2030702829360962, "learning_rate": 6.211148151081978e-05, "loss": 1.5971, "step": 634 }, { "epoch": 0.3520931522040477, "grad_norm": 0.20268899202346802, "learning_rate": 6.204995337534159e-05, "loss": 1.5271, "step": 635 }, { "epoch": 0.3526476296090934, "grad_norm": 0.2157873958349228, "learning_rate": 6.198835020242467e-05, "loss": 1.6008, "step": 636 }, { "epoch": 0.3532021070141392, "grad_norm": 0.20912663638591766, "learning_rate": 6.192667220170863e-05, "loss": 1.5696, "step": 637 }, { "epoch": 0.3537565844191849, "grad_norm": 0.21048259735107422, "learning_rate": 6.18649195830878e-05, "loss": 1.5968, "step": 638 }, { "epoch": 0.35431106182423067, "grad_norm": 0.21538151800632477, "learning_rate": 6.180309255671035e-05, "loss": 1.5876, "step": 639 }, { "epoch": 0.3548655392292764, "grad_norm": 0.22569550573825836, "learning_rate": 6.174119133297775e-05, "loss": 1.5357, "step": 640 }, { "epoch": 0.35542001663432216, "grad_norm": 0.20763731002807617, "learning_rate": 6.167921612254391e-05, "loss": 1.6395, "step": 641 }, { "epoch": 0.3559744940393679, "grad_norm": 0.23909436166286469, "learning_rate": 6.161716713631453e-05, "loss": 1.6401, "step": 642 }, { "epoch": 0.35652897144441364, "grad_norm": 0.20901797711849213, "learning_rate": 6.155504458544641e-05, "loss": 1.6965, "step": 643 }, { "epoch": 0.35708344884945936, "grad_norm": 0.21390089392662048, "learning_rate": 6.149284868134663e-05, "loss": 1.5991, "step": 644 }, { "epoch": 0.35763792625450513, "grad_norm": 0.2071395069360733, "learning_rate": 6.143057963567198e-05, "loss": 1.6255, "step": 645 }, { "epoch": 0.35819240365955085, "grad_norm": 0.23318380117416382, "learning_rate": 6.136823766032808e-05, "loss": 1.6149, "step": 646 }, { "epoch": 0.3587468810645966, "grad_norm": 0.21669737994670868, "learning_rate": 6.130582296746876e-05, "loss": 1.6068, "step": 647 }, { "epoch": 0.35930135846964234, "grad_norm": 0.20202122628688812, "learning_rate": 6.124333576949533e-05, "loss": 1.5232, "step": 648 }, { "epoch": 0.3598558358746881, "grad_norm": 0.20784695446491241, "learning_rate": 6.118077627905584e-05, "loss": 1.5963, "step": 649 }, { "epoch": 0.36041031327973383, "grad_norm": 0.20467756688594818, "learning_rate": 6.111814470904431e-05, "loss": 1.5672, "step": 650 }, { "epoch": 0.3609647906847796, "grad_norm": 0.20664702355861664, "learning_rate": 6.105544127260012e-05, "loss": 1.5897, "step": 651 }, { "epoch": 0.3615192680898253, "grad_norm": 0.21211595833301544, "learning_rate": 6.0992666183107134e-05, "loss": 1.5792, "step": 652 }, { "epoch": 0.3620737454948711, "grad_norm": 0.22114971280097961, "learning_rate": 6.092981965419313e-05, "loss": 1.5907, "step": 653 }, { "epoch": 0.3626282228999168, "grad_norm": 0.22703395783901215, "learning_rate": 6.086690189972898e-05, "loss": 1.5498, "step": 654 }, { "epoch": 0.3631827003049626, "grad_norm": 0.22778987884521484, "learning_rate": 6.080391313382793e-05, "loss": 1.6132, "step": 655 }, { "epoch": 0.3637371777100083, "grad_norm": 0.21941795945167542, "learning_rate": 6.074085357084487e-05, "loss": 1.6606, "step": 656 }, { "epoch": 0.36429165511505407, "grad_norm": 0.20801624655723572, "learning_rate": 6.0677723425375636e-05, "loss": 1.558, "step": 657 }, { "epoch": 0.3648461325200998, "grad_norm": 0.22957980632781982, "learning_rate": 6.061452291225627e-05, "loss": 1.4949, "step": 658 }, { "epoch": 0.36540060992514556, "grad_norm": 0.20350852608680725, "learning_rate": 6.055125224656225e-05, "loss": 1.5406, "step": 659 }, { "epoch": 0.3659550873301913, "grad_norm": 0.21100889146327972, "learning_rate": 6.048791164360781e-05, "loss": 1.5888, "step": 660 }, { "epoch": 0.36650956473523705, "grad_norm": 0.21262866258621216, "learning_rate": 6.0424501318945194e-05, "loss": 1.61, "step": 661 }, { "epoch": 0.36706404214028276, "grad_norm": 0.22323627769947052, "learning_rate": 6.036102148836387e-05, "loss": 1.6143, "step": 662 }, { "epoch": 0.36761851954532854, "grad_norm": 0.22186584770679474, "learning_rate": 6.02974723678899e-05, "loss": 1.5533, "step": 663 }, { "epoch": 0.36817299695037425, "grad_norm": 0.2202465534210205, "learning_rate": 6.0233854173785086e-05, "loss": 1.6063, "step": 664 }, { "epoch": 0.36872747435542, "grad_norm": 0.2212614119052887, "learning_rate": 6.017016712254635e-05, "loss": 1.5558, "step": 665 }, { "epoch": 0.36928195176046574, "grad_norm": 0.21273072063922882, "learning_rate": 6.0106411430904865e-05, "loss": 1.5717, "step": 666 }, { "epoch": 0.3698364291655115, "grad_norm": 0.24066902697086334, "learning_rate": 6.004258731582546e-05, "loss": 1.6385, "step": 667 }, { "epoch": 0.37039090657055723, "grad_norm": 0.2065984606742859, "learning_rate": 5.997869499450581e-05, "loss": 1.6573, "step": 668 }, { "epoch": 0.370945383975603, "grad_norm": 0.29595062136650085, "learning_rate": 5.991473468437562e-05, "loss": 1.6945, "step": 669 }, { "epoch": 0.3714998613806487, "grad_norm": 0.20868165791034698, "learning_rate": 5.985070660309609e-05, "loss": 1.5274, "step": 670 }, { "epoch": 0.3720543387856945, "grad_norm": 0.2694888412952423, "learning_rate": 5.978661096855893e-05, "loss": 1.5829, "step": 671 }, { "epoch": 0.3726088161907402, "grad_norm": 0.21415413916110992, "learning_rate": 5.972244799888583e-05, "loss": 1.6068, "step": 672 }, { "epoch": 0.373163293595786, "grad_norm": 0.2538868486881256, "learning_rate": 5.9658217912427554e-05, "loss": 1.586, "step": 673 }, { "epoch": 0.3737177710008317, "grad_norm": 0.22458183765411377, "learning_rate": 5.959392092776333e-05, "loss": 1.5573, "step": 674 }, { "epoch": 0.3742722484058775, "grad_norm": 0.23578275740146637, "learning_rate": 5.952955726370001e-05, "loss": 1.5725, "step": 675 }, { "epoch": 0.3748267258109232, "grad_norm": 0.24922388792037964, "learning_rate": 5.946512713927135e-05, "loss": 1.6212, "step": 676 }, { "epoch": 0.37538120321596896, "grad_norm": 0.21490588784217834, "learning_rate": 5.940063077373732e-05, "loss": 1.61, "step": 677 }, { "epoch": 0.3759356806210147, "grad_norm": 0.26785025000572205, "learning_rate": 5.933606838658328e-05, "loss": 1.5897, "step": 678 }, { "epoch": 0.37649015802606045, "grad_norm": 0.2139958143234253, "learning_rate": 5.927144019751925e-05, "loss": 1.5807, "step": 679 }, { "epoch": 0.37704463543110617, "grad_norm": 0.25820040702819824, "learning_rate": 5.9206746426479215e-05, "loss": 1.5907, "step": 680 }, { "epoch": 0.37759911283615194, "grad_norm": 0.21500596404075623, "learning_rate": 5.9141987293620334e-05, "loss": 1.5283, "step": 681 }, { "epoch": 0.37815359024119766, "grad_norm": 0.21151086688041687, "learning_rate": 5.907716301932217e-05, "loss": 1.644, "step": 682 }, { "epoch": 0.37870806764624343, "grad_norm": 0.24681684374809265, "learning_rate": 5.901227382418599e-05, "loss": 1.55, "step": 683 }, { "epoch": 0.37926254505128915, "grad_norm": 0.20953518152236938, "learning_rate": 5.894731992903399e-05, "loss": 1.5991, "step": 684 }, { "epoch": 0.3798170224563349, "grad_norm": 0.2509237825870514, "learning_rate": 5.888230155490853e-05, "loss": 1.6488, "step": 685 }, { "epoch": 0.38037149986138064, "grad_norm": 0.22760917246341705, "learning_rate": 5.8817218923071406e-05, "loss": 1.5948, "step": 686 }, { "epoch": 0.3809259772664264, "grad_norm": 0.20528970658779144, "learning_rate": 5.875207225500308e-05, "loss": 1.53, "step": 687 }, { "epoch": 0.3814804546714721, "grad_norm": 0.3011578619480133, "learning_rate": 5.8686861772401974e-05, "loss": 1.6489, "step": 688 }, { "epoch": 0.3820349320765179, "grad_norm": 0.20146943628787994, "learning_rate": 5.8621587697183595e-05, "loss": 1.5495, "step": 689 }, { "epoch": 0.3825894094815636, "grad_norm": 0.24754847586154938, "learning_rate": 5.8556250251479945e-05, "loss": 1.6115, "step": 690 }, { "epoch": 0.3831438868866094, "grad_norm": 0.21535150706768036, "learning_rate": 5.84908496576386e-05, "loss": 1.6324, "step": 691 }, { "epoch": 0.3836983642916551, "grad_norm": 0.21527227759361267, "learning_rate": 5.8425386138222116e-05, "loss": 1.5944, "step": 692 }, { "epoch": 0.3842528416967009, "grad_norm": 0.2230699062347412, "learning_rate": 5.8359859916007116e-05, "loss": 1.5704, "step": 693 }, { "epoch": 0.3848073191017466, "grad_norm": 0.20711487531661987, "learning_rate": 5.8294271213983646e-05, "loss": 1.614, "step": 694 }, { "epoch": 0.38536179650679236, "grad_norm": 0.23301945626735687, "learning_rate": 5.822862025535436e-05, "loss": 1.6114, "step": 695 }, { "epoch": 0.3859162739118381, "grad_norm": 0.22132247686386108, "learning_rate": 5.816290726353378e-05, "loss": 1.6219, "step": 696 }, { "epoch": 0.38647075131688385, "grad_norm": 0.22144806385040283, "learning_rate": 5.809713246214756e-05, "loss": 1.6163, "step": 697 }, { "epoch": 0.38702522872192957, "grad_norm": 0.21476450562477112, "learning_rate": 5.8031296075031625e-05, "loss": 1.5815, "step": 698 }, { "epoch": 0.38757970612697534, "grad_norm": 0.2073630541563034, "learning_rate": 5.7965398326231535e-05, "loss": 1.5726, "step": 699 }, { "epoch": 0.38813418353202106, "grad_norm": 0.23282212018966675, "learning_rate": 5.7899439440001656e-05, "loss": 1.5905, "step": 700 }, { "epoch": 0.38868866093706683, "grad_norm": 0.2030622363090515, "learning_rate": 5.7833419640804426e-05, "loss": 1.606, "step": 701 }, { "epoch": 0.38924313834211255, "grad_norm": 0.22797855734825134, "learning_rate": 5.7767339153309526e-05, "loss": 1.6297, "step": 702 }, { "epoch": 0.3897976157471583, "grad_norm": 0.21430207788944244, "learning_rate": 5.770119820239321e-05, "loss": 1.647, "step": 703 }, { "epoch": 0.39035209315220404, "grad_norm": 0.21015015244483948, "learning_rate": 5.7634997013137465e-05, "loss": 1.5667, "step": 704 }, { "epoch": 0.3909065705572498, "grad_norm": 0.2301322966814041, "learning_rate": 5.7568735810829294e-05, "loss": 1.6313, "step": 705 }, { "epoch": 0.3914610479622955, "grad_norm": 0.21517878770828247, "learning_rate": 5.750241482095993e-05, "loss": 1.671, "step": 706 }, { "epoch": 0.3920155253673413, "grad_norm": 0.2164376974105835, "learning_rate": 5.743603426922401e-05, "loss": 1.6186, "step": 707 }, { "epoch": 0.392570002772387, "grad_norm": 0.2114233523607254, "learning_rate": 5.736959438151895e-05, "loss": 1.7116, "step": 708 }, { "epoch": 0.3931244801774328, "grad_norm": 0.22350315749645233, "learning_rate": 5.730309538394404e-05, "loss": 1.6869, "step": 709 }, { "epoch": 0.3936789575824785, "grad_norm": 0.2107817828655243, "learning_rate": 5.723653750279974e-05, "loss": 1.6738, "step": 710 }, { "epoch": 0.3942334349875243, "grad_norm": 0.20846299827098846, "learning_rate": 5.716992096458686e-05, "loss": 1.5339, "step": 711 }, { "epoch": 0.39478791239257, "grad_norm": 0.2204621285200119, "learning_rate": 5.710324599600589e-05, "loss": 1.5552, "step": 712 }, { "epoch": 0.39534238979761577, "grad_norm": 0.23123124241828918, "learning_rate": 5.7036512823956085e-05, "loss": 1.7236, "step": 713 }, { "epoch": 0.3958968672026615, "grad_norm": 0.24838416278362274, "learning_rate": 5.696972167553485e-05, "loss": 1.6249, "step": 714 }, { "epoch": 0.39645134460770726, "grad_norm": 0.2113085836172104, "learning_rate": 5.6902872778036825e-05, "loss": 1.5164, "step": 715 }, { "epoch": 0.397005822012753, "grad_norm": 0.2129892259836197, "learning_rate": 5.6835966358953186e-05, "loss": 1.6242, "step": 716 }, { "epoch": 0.39756029941779875, "grad_norm": 0.21519723534584045, "learning_rate": 5.67690026459709e-05, "loss": 1.5984, "step": 717 }, { "epoch": 0.39811477682284446, "grad_norm": 0.20214593410491943, "learning_rate": 5.670198186697185e-05, "loss": 1.6229, "step": 718 }, { "epoch": 0.39866925422789024, "grad_norm": 0.21002614498138428, "learning_rate": 5.6634904250032166e-05, "loss": 1.6142, "step": 719 }, { "epoch": 0.39922373163293595, "grad_norm": 0.20920030772686005, "learning_rate": 5.656777002342136e-05, "loss": 1.5819, "step": 720 }, { "epoch": 0.3997782090379817, "grad_norm": 0.21228238940238953, "learning_rate": 5.650057941560164e-05, "loss": 1.6284, "step": 721 }, { "epoch": 0.40033268644302744, "grad_norm": 0.331759512424469, "learning_rate": 5.643333265522702e-05, "loss": 1.5552, "step": 722 }, { "epoch": 0.4008871638480732, "grad_norm": 0.20200534164905548, "learning_rate": 5.636602997114268e-05, "loss": 1.545, "step": 723 }, { "epoch": 0.40144164125311893, "grad_norm": 0.2071356177330017, "learning_rate": 5.629867159238404e-05, "loss": 1.5618, "step": 724 }, { "epoch": 0.4019961186581647, "grad_norm": 0.2137397825717926, "learning_rate": 5.62312577481761e-05, "loss": 1.5858, "step": 725 }, { "epoch": 0.4025505960632104, "grad_norm": 0.2188277244567871, "learning_rate": 5.616378866793259e-05, "loss": 1.6123, "step": 726 }, { "epoch": 0.4031050734682562, "grad_norm": 0.21393775939941406, "learning_rate": 5.609626458125521e-05, "loss": 1.6253, "step": 727 }, { "epoch": 0.4036595508733019, "grad_norm": 0.2320735603570938, "learning_rate": 5.6028685717932895e-05, "loss": 1.5904, "step": 728 }, { "epoch": 0.4042140282783477, "grad_norm": 0.22155635058879852, "learning_rate": 5.596105230794091e-05, "loss": 1.6295, "step": 729 }, { "epoch": 0.4047685056833934, "grad_norm": 0.21663862466812134, "learning_rate": 5.589336458144023e-05, "loss": 1.5455, "step": 730 }, { "epoch": 0.40532298308843917, "grad_norm": 0.2116219699382782, "learning_rate": 5.582562276877659e-05, "loss": 1.6089, "step": 731 }, { "epoch": 0.4058774604934849, "grad_norm": 0.21755720674991608, "learning_rate": 5.575782710047985e-05, "loss": 1.5895, "step": 732 }, { "epoch": 0.40643193789853066, "grad_norm": 0.21230702102184296, "learning_rate": 5.5689977807263105e-05, "loss": 1.5425, "step": 733 }, { "epoch": 0.4069864153035764, "grad_norm": 0.21914036571979523, "learning_rate": 5.5622075120021976e-05, "loss": 1.659, "step": 734 }, { "epoch": 0.40754089270862215, "grad_norm": 0.2092135101556778, "learning_rate": 5.5554119269833746e-05, "loss": 1.5671, "step": 735 }, { "epoch": 0.40809537011366787, "grad_norm": 0.20937521755695343, "learning_rate": 5.548611048795663e-05, "loss": 1.552, "step": 736 }, { "epoch": 0.40864984751871364, "grad_norm": 0.20987878739833832, "learning_rate": 5.5418049005828994e-05, "loss": 1.565, "step": 737 }, { "epoch": 0.40920432492375935, "grad_norm": 0.22819262742996216, "learning_rate": 5.534993505506851e-05, "loss": 1.5928, "step": 738 }, { "epoch": 0.4097588023288051, "grad_norm": 0.2156732827425003, "learning_rate": 5.5281768867471455e-05, "loss": 1.6158, "step": 739 }, { "epoch": 0.41031327973385084, "grad_norm": 0.21924850344657898, "learning_rate": 5.521355067501181e-05, "loss": 1.5497, "step": 740 }, { "epoch": 0.4108677571388966, "grad_norm": 0.20754189789295197, "learning_rate": 5.5145280709840566e-05, "loss": 1.5645, "step": 741 }, { "epoch": 0.41142223454394233, "grad_norm": 0.22220148146152496, "learning_rate": 5.5076959204284915e-05, "loss": 1.5868, "step": 742 }, { "epoch": 0.4119767119489881, "grad_norm": 0.20873475074768066, "learning_rate": 5.5008586390847404e-05, "loss": 1.616, "step": 743 }, { "epoch": 0.4125311893540338, "grad_norm": 0.2225092649459839, "learning_rate": 5.494016250220521e-05, "loss": 1.6132, "step": 744 }, { "epoch": 0.4130856667590796, "grad_norm": 0.20154620707035065, "learning_rate": 5.487168777120932e-05, "loss": 1.5352, "step": 745 }, { "epoch": 0.4136401441641253, "grad_norm": 0.20975901186466217, "learning_rate": 5.480316243088375e-05, "loss": 1.5762, "step": 746 }, { "epoch": 0.41419462156917103, "grad_norm": 0.2048894613981247, "learning_rate": 5.4734586714424706e-05, "loss": 1.5744, "step": 747 }, { "epoch": 0.4147490989742168, "grad_norm": 0.2058396339416504, "learning_rate": 5.466596085519988e-05, "loss": 1.6035, "step": 748 }, { "epoch": 0.4153035763792625, "grad_norm": 0.21916894614696503, "learning_rate": 5.459728508674756e-05, "loss": 1.6108, "step": 749 }, { "epoch": 0.4158580537843083, "grad_norm": 0.2189890742301941, "learning_rate": 5.4528559642775885e-05, "loss": 1.6031, "step": 750 }, { "epoch": 0.416412531189354, "grad_norm": 0.2107512503862381, "learning_rate": 5.445978475716207e-05, "loss": 1.5418, "step": 751 }, { "epoch": 0.4169670085943998, "grad_norm": 0.20556119084358215, "learning_rate": 5.4390960663951565e-05, "loss": 1.597, "step": 752 }, { "epoch": 0.4175214859994455, "grad_norm": 0.20787164568901062, "learning_rate": 5.4322087597357264e-05, "loss": 1.6246, "step": 753 }, { "epoch": 0.41807596340449127, "grad_norm": 0.22257111966609955, "learning_rate": 5.4253165791758743e-05, "loss": 1.5532, "step": 754 }, { "epoch": 0.418630440809537, "grad_norm": 0.20410916209220886, "learning_rate": 5.4184195481701425e-05, "loss": 1.5668, "step": 755 }, { "epoch": 0.41918491821458276, "grad_norm": 0.234087273478508, "learning_rate": 5.411517690189581e-05, "loss": 1.5941, "step": 756 }, { "epoch": 0.4197393956196285, "grad_norm": 0.21471847593784332, "learning_rate": 5.404611028721665e-05, "loss": 1.5839, "step": 757 }, { "epoch": 0.42029387302467425, "grad_norm": 0.2194783240556717, "learning_rate": 5.3976995872702174e-05, "loss": 1.664, "step": 758 }, { "epoch": 0.42084835042971996, "grad_norm": 0.21059168875217438, "learning_rate": 5.390783389355326e-05, "loss": 1.5688, "step": 759 }, { "epoch": 0.42140282783476574, "grad_norm": 0.22869731485843658, "learning_rate": 5.3838624585132666e-05, "loss": 1.5823, "step": 760 }, { "epoch": 0.42195730523981145, "grad_norm": 0.21715888381004333, "learning_rate": 5.3769368182964226e-05, "loss": 1.6105, "step": 761 }, { "epoch": 0.4225117826448572, "grad_norm": 0.2125747799873352, "learning_rate": 5.3700064922732e-05, "loss": 1.5806, "step": 762 }, { "epoch": 0.42306626004990294, "grad_norm": 0.20356424152851105, "learning_rate": 5.363071504027956e-05, "loss": 1.5695, "step": 763 }, { "epoch": 0.4236207374549487, "grad_norm": 0.21644245088100433, "learning_rate": 5.3561318771609076e-05, "loss": 1.6602, "step": 764 }, { "epoch": 0.42417521485999443, "grad_norm": 0.21127082407474518, "learning_rate": 5.349187635288063e-05, "loss": 1.554, "step": 765 }, { "epoch": 0.4247296922650402, "grad_norm": 0.22828276455402374, "learning_rate": 5.3422388020411325e-05, "loss": 1.6498, "step": 766 }, { "epoch": 0.4252841696700859, "grad_norm": 0.22633306682109833, "learning_rate": 5.3352854010674515e-05, "loss": 1.6079, "step": 767 }, { "epoch": 0.4258386470751317, "grad_norm": 0.2162792831659317, "learning_rate": 5.3283274560299e-05, "loss": 1.6315, "step": 768 }, { "epoch": 0.4263931244801774, "grad_norm": 0.23089319467544556, "learning_rate": 5.321364990606821e-05, "loss": 1.638, "step": 769 }, { "epoch": 0.4269476018852232, "grad_norm": 0.2013845294713974, "learning_rate": 5.3143980284919424e-05, "loss": 1.5903, "step": 770 }, { "epoch": 0.4275020792902689, "grad_norm": 0.21818867325782776, "learning_rate": 5.307426593394294e-05, "loss": 1.5645, "step": 771 }, { "epoch": 0.42805655669531467, "grad_norm": 0.2172323316335678, "learning_rate": 5.300450709038126e-05, "loss": 1.5598, "step": 772 }, { "epoch": 0.4286110341003604, "grad_norm": 0.21557918190956116, "learning_rate": 5.29347039916283e-05, "loss": 1.5937, "step": 773 }, { "epoch": 0.42916551150540616, "grad_norm": 0.6955245733261108, "learning_rate": 5.286485687522861e-05, "loss": 1.6414, "step": 774 }, { "epoch": 0.4297199889104519, "grad_norm": 0.21430703997612, "learning_rate": 5.27949659788765e-05, "loss": 1.5782, "step": 775 }, { "epoch": 0.43027446631549765, "grad_norm": 0.2163209319114685, "learning_rate": 5.272503154041527e-05, "loss": 1.5992, "step": 776 }, { "epoch": 0.43082894372054337, "grad_norm": 0.21228471398353577, "learning_rate": 5.265505379783642e-05, "loss": 1.5764, "step": 777 }, { "epoch": 0.43138342112558914, "grad_norm": 0.21286910772323608, "learning_rate": 5.258503298927879e-05, "loss": 1.5892, "step": 778 }, { "epoch": 0.43193789853063486, "grad_norm": 0.24243953824043274, "learning_rate": 5.25149693530278e-05, "loss": 1.6385, "step": 779 }, { "epoch": 0.43249237593568063, "grad_norm": 0.20691627264022827, "learning_rate": 5.244486312751459e-05, "loss": 1.5992, "step": 780 }, { "epoch": 0.43304685334072635, "grad_norm": 0.21702216565608978, "learning_rate": 5.237471455131526e-05, "loss": 1.5866, "step": 781 }, { "epoch": 0.4336013307457721, "grad_norm": 0.22178849577903748, "learning_rate": 5.230452386315e-05, "loss": 1.6211, "step": 782 }, { "epoch": 0.43415580815081783, "grad_norm": 0.2103152573108673, "learning_rate": 5.223429130188235e-05, "loss": 1.5822, "step": 783 }, { "epoch": 0.4347102855558636, "grad_norm": 0.2260008603334427, "learning_rate": 5.216401710651831e-05, "loss": 1.5565, "step": 784 }, { "epoch": 0.4352647629609093, "grad_norm": 0.22004656493663788, "learning_rate": 5.20937015162056e-05, "loss": 1.6813, "step": 785 }, { "epoch": 0.4358192403659551, "grad_norm": 0.21554167568683624, "learning_rate": 5.202334477023277e-05, "loss": 1.6137, "step": 786 }, { "epoch": 0.4363737177710008, "grad_norm": 0.2462761402130127, "learning_rate": 5.195294710802845e-05, "loss": 1.6396, "step": 787 }, { "epoch": 0.4369281951760466, "grad_norm": 0.20096971094608307, "learning_rate": 5.1882508769160504e-05, "loss": 1.5586, "step": 788 }, { "epoch": 0.4374826725810923, "grad_norm": 0.21837452054023743, "learning_rate": 5.1812029993335226e-05, "loss": 1.5882, "step": 789 }, { "epoch": 0.4380371499861381, "grad_norm": 0.20460233092308044, "learning_rate": 5.174151102039653e-05, "loss": 1.5031, "step": 790 }, { "epoch": 0.4385916273911838, "grad_norm": 0.2106686383485794, "learning_rate": 5.167095209032509e-05, "loss": 1.5377, "step": 791 }, { "epoch": 0.43914610479622956, "grad_norm": 0.24171209335327148, "learning_rate": 5.160035344323758e-05, "loss": 1.6242, "step": 792 }, { "epoch": 0.4397005822012753, "grad_norm": 0.21198348701000214, "learning_rate": 5.152971531938583e-05, "loss": 1.5975, "step": 793 }, { "epoch": 0.44025505960632105, "grad_norm": 0.2270112782716751, "learning_rate": 5.145903795915603e-05, "loss": 1.5772, "step": 794 }, { "epoch": 0.44080953701136677, "grad_norm": 0.20378631353378296, "learning_rate": 5.138832160306785e-05, "loss": 1.5778, "step": 795 }, { "epoch": 0.44136401441641254, "grad_norm": 0.23949572443962097, "learning_rate": 5.1317566491773714e-05, "loss": 1.6321, "step": 796 }, { "epoch": 0.44191849182145826, "grad_norm": 0.20986564457416534, "learning_rate": 5.1246772866057885e-05, "loss": 1.5273, "step": 797 }, { "epoch": 0.44247296922650403, "grad_norm": 0.20840664207935333, "learning_rate": 5.117594096683574e-05, "loss": 1.5354, "step": 798 }, { "epoch": 0.44302744663154975, "grad_norm": 0.24688231945037842, "learning_rate": 5.1105071035152884e-05, "loss": 1.637, "step": 799 }, { "epoch": 0.4435819240365955, "grad_norm": 0.20614393055438995, "learning_rate": 5.1034163312184325e-05, "loss": 1.5573, "step": 800 }, { "epoch": 0.44413640144164124, "grad_norm": 0.22784316539764404, "learning_rate": 5.096321803923372e-05, "loss": 1.6117, "step": 801 }, { "epoch": 0.444690878846687, "grad_norm": 0.22619880735874176, "learning_rate": 5.089223545773248e-05, "loss": 1.5151, "step": 802 }, { "epoch": 0.4452453562517327, "grad_norm": 0.2142421305179596, "learning_rate": 5.082121580923899e-05, "loss": 1.6123, "step": 803 }, { "epoch": 0.4457998336567785, "grad_norm": 0.21790969371795654, "learning_rate": 5.0750159335437775e-05, "loss": 1.5859, "step": 804 }, { "epoch": 0.4463543110618242, "grad_norm": 0.21449251472949982, "learning_rate": 5.067906627813868e-05, "loss": 1.6656, "step": 805 }, { "epoch": 0.44690878846687, "grad_norm": 0.221047043800354, "learning_rate": 5.0607936879276055e-05, "loss": 1.5889, "step": 806 }, { "epoch": 0.4474632658719157, "grad_norm": 0.22916845977306366, "learning_rate": 5.0536771380907885e-05, "loss": 1.5901, "step": 807 }, { "epoch": 0.4480177432769615, "grad_norm": 0.20636655390262604, "learning_rate": 5.046557002521507e-05, "loss": 1.5883, "step": 808 }, { "epoch": 0.4485722206820072, "grad_norm": 0.23739656805992126, "learning_rate": 5.039433305450047e-05, "loss": 1.6089, "step": 809 }, { "epoch": 0.44912669808705297, "grad_norm": 0.2077774852514267, "learning_rate": 5.03230607111882e-05, "loss": 1.5868, "step": 810 }, { "epoch": 0.4496811754920987, "grad_norm": 0.21799373626708984, "learning_rate": 5.025175323782267e-05, "loss": 1.565, "step": 811 }, { "epoch": 0.45023565289714446, "grad_norm": 0.21249794960021973, "learning_rate": 5.0180410877067955e-05, "loss": 1.5063, "step": 812 }, { "epoch": 0.4507901303021902, "grad_norm": 0.21143661439418793, "learning_rate": 5.0109033871706754e-05, "loss": 1.62, "step": 813 }, { "epoch": 0.45134460770723595, "grad_norm": 0.21807655692100525, "learning_rate": 5.0037622464639724e-05, "loss": 1.585, "step": 814 }, { "epoch": 0.45189908511228166, "grad_norm": 0.2183266580104828, "learning_rate": 4.996617689888455e-05, "loss": 1.6266, "step": 815 }, { "epoch": 0.45245356251732743, "grad_norm": 0.20334866642951965, "learning_rate": 4.989469741757519e-05, "loss": 1.6247, "step": 816 }, { "epoch": 0.45300803992237315, "grad_norm": 0.21051491796970367, "learning_rate": 4.9823184263961e-05, "loss": 1.5917, "step": 817 }, { "epoch": 0.4535625173274189, "grad_norm": 0.2286101132631302, "learning_rate": 4.975163768140596e-05, "loss": 1.6318, "step": 818 }, { "epoch": 0.45411699473246464, "grad_norm": 0.20518524944782257, "learning_rate": 4.9680057913387775e-05, "loss": 1.5723, "step": 819 }, { "epoch": 0.4546714721375104, "grad_norm": 0.2158391773700714, "learning_rate": 4.960844520349709e-05, "loss": 1.6655, "step": 820 }, { "epoch": 0.45522594954255613, "grad_norm": 0.20380595326423645, "learning_rate": 4.953679979543666e-05, "loss": 1.5681, "step": 821 }, { "epoch": 0.4557804269476019, "grad_norm": 0.2011675089597702, "learning_rate": 4.946512193302051e-05, "loss": 1.5654, "step": 822 }, { "epoch": 0.4563349043526476, "grad_norm": 0.21012163162231445, "learning_rate": 4.939341186017313e-05, "loss": 1.6037, "step": 823 }, { "epoch": 0.4568893817576934, "grad_norm": 0.2087511122226715, "learning_rate": 4.932166982092858e-05, "loss": 1.5656, "step": 824 }, { "epoch": 0.4574438591627391, "grad_norm": 0.21227777004241943, "learning_rate": 4.924989605942973e-05, "loss": 1.6027, "step": 825 }, { "epoch": 0.4579983365677849, "grad_norm": 0.20791299641132355, "learning_rate": 4.9178090819927414e-05, "loss": 1.5582, "step": 826 }, { "epoch": 0.4585528139728306, "grad_norm": 0.2057400643825531, "learning_rate": 4.910625434677956e-05, "loss": 1.6189, "step": 827 }, { "epoch": 0.45910729137787637, "grad_norm": 0.20824283361434937, "learning_rate": 4.903438688445043e-05, "loss": 1.5573, "step": 828 }, { "epoch": 0.4596617687829221, "grad_norm": 0.20382678508758545, "learning_rate": 4.896248867750969e-05, "loss": 1.5942, "step": 829 }, { "epoch": 0.46021624618796786, "grad_norm": 0.21372130513191223, "learning_rate": 4.8890559970631667e-05, "loss": 1.5624, "step": 830 }, { "epoch": 0.4607707235930136, "grad_norm": 0.20644085109233856, "learning_rate": 4.881860100859446e-05, "loss": 1.6326, "step": 831 }, { "epoch": 0.46132520099805935, "grad_norm": 0.20801600813865662, "learning_rate": 4.874661203627917e-05, "loss": 1.5424, "step": 832 }, { "epoch": 0.46187967840310507, "grad_norm": 0.21283288300037384, "learning_rate": 4.867459329866897e-05, "loss": 1.6228, "step": 833 }, { "epoch": 0.46243415580815084, "grad_norm": 0.21098175644874573, "learning_rate": 4.860254504084835e-05, "loss": 1.5839, "step": 834 }, { "epoch": 0.46298863321319655, "grad_norm": 0.20932459831237793, "learning_rate": 4.853046750800228e-05, "loss": 1.5911, "step": 835 }, { "epoch": 0.4635431106182423, "grad_norm": 0.2054491639137268, "learning_rate": 4.8458360945415317e-05, "loss": 1.6222, "step": 836 }, { "epoch": 0.46409758802328804, "grad_norm": 0.19949305057525635, "learning_rate": 4.838622559847084e-05, "loss": 1.5386, "step": 837 }, { "epoch": 0.4646520654283338, "grad_norm": 0.21647600829601288, "learning_rate": 4.831406171265015e-05, "loss": 1.5216, "step": 838 }, { "epoch": 0.46520654283337953, "grad_norm": 0.20176930725574493, "learning_rate": 4.824186953353171e-05, "loss": 1.5054, "step": 839 }, { "epoch": 0.4657610202384253, "grad_norm": 0.20189246535301208, "learning_rate": 4.816964930679024e-05, "loss": 1.5422, "step": 840 }, { "epoch": 0.466315497643471, "grad_norm": 0.2124892771244049, "learning_rate": 4.8097401278195904e-05, "loss": 1.6112, "step": 841 }, { "epoch": 0.4668699750485168, "grad_norm": 0.20635344088077545, "learning_rate": 4.8025125693613485e-05, "loss": 1.55, "step": 842 }, { "epoch": 0.4674244524535625, "grad_norm": 0.20471568405628204, "learning_rate": 4.7952822799001564e-05, "loss": 1.5934, "step": 843 }, { "epoch": 0.4679789298586083, "grad_norm": 0.20640766620635986, "learning_rate": 4.78804928404116e-05, "loss": 1.5859, "step": 844 }, { "epoch": 0.468533407263654, "grad_norm": 0.2087356001138687, "learning_rate": 4.780813606398722e-05, "loss": 1.6468, "step": 845 }, { "epoch": 0.4690878846686998, "grad_norm": 0.20477420091629028, "learning_rate": 4.7735752715963265e-05, "loss": 1.5926, "step": 846 }, { "epoch": 0.4696423620737455, "grad_norm": 0.20607273280620575, "learning_rate": 4.766334304266503e-05, "loss": 1.5688, "step": 847 }, { "epoch": 0.47019683947879126, "grad_norm": 0.20167583227157593, "learning_rate": 4.7590907290507396e-05, "loss": 1.5408, "step": 848 }, { "epoch": 0.470751316883837, "grad_norm": 0.22671446204185486, "learning_rate": 4.751844570599395e-05, "loss": 1.5693, "step": 849 }, { "epoch": 0.47130579428888275, "grad_norm": 0.2094809114933014, "learning_rate": 4.7445958535716265e-05, "loss": 1.586, "step": 850 }, { "epoch": 0.47186027169392847, "grad_norm": 0.21534985303878784, "learning_rate": 4.73734460263529e-05, "loss": 1.6699, "step": 851 }, { "epoch": 0.47241474909897424, "grad_norm": 0.21628102660179138, "learning_rate": 4.730090842466871e-05, "loss": 1.6307, "step": 852 }, { "epoch": 0.47296922650401996, "grad_norm": 0.21070408821105957, "learning_rate": 4.7228345977513886e-05, "loss": 1.5859, "step": 853 }, { "epoch": 0.47352370390906573, "grad_norm": 0.21383161842823029, "learning_rate": 4.715575893182324e-05, "loss": 1.672, "step": 854 }, { "epoch": 0.47407818131411145, "grad_norm": 0.2086363434791565, "learning_rate": 4.7083147534615224e-05, "loss": 1.5431, "step": 855 }, { "epoch": 0.4746326587191572, "grad_norm": 0.1970445066690445, "learning_rate": 4.70105120329912e-05, "loss": 1.4742, "step": 856 }, { "epoch": 0.47518713612420294, "grad_norm": 0.20317339897155762, "learning_rate": 4.6937852674134555e-05, "loss": 1.5643, "step": 857 }, { "epoch": 0.4757416135292487, "grad_norm": 0.20548830926418304, "learning_rate": 4.6865169705309815e-05, "loss": 1.5369, "step": 858 }, { "epoch": 0.4762960909342944, "grad_norm": 0.20916157960891724, "learning_rate": 4.679246337386195e-05, "loss": 1.544, "step": 859 }, { "epoch": 0.4768505683393402, "grad_norm": 0.2089296132326126, "learning_rate": 4.671973392721535e-05, "loss": 1.5906, "step": 860 }, { "epoch": 0.4774050457443859, "grad_norm": 0.21110884845256805, "learning_rate": 4.6646981612873105e-05, "loss": 1.6617, "step": 861 }, { "epoch": 0.4779595231494317, "grad_norm": 0.20762455463409424, "learning_rate": 4.65742066784161e-05, "loss": 1.5733, "step": 862 }, { "epoch": 0.4785140005544774, "grad_norm": 0.20924974977970123, "learning_rate": 4.650140937150222e-05, "loss": 1.6104, "step": 863 }, { "epoch": 0.4790684779595232, "grad_norm": 0.21603722870349884, "learning_rate": 4.642858993986549e-05, "loss": 1.5838, "step": 864 }, { "epoch": 0.4796229553645689, "grad_norm": 0.20661696791648865, "learning_rate": 4.635574863131522e-05, "loss": 1.6063, "step": 865 }, { "epoch": 0.48017743276961466, "grad_norm": 0.2147568166255951, "learning_rate": 4.6282885693735145e-05, "loss": 1.5424, "step": 866 }, { "epoch": 0.4807319101746604, "grad_norm": 0.20484501123428345, "learning_rate": 4.621000137508263e-05, "loss": 1.5754, "step": 867 }, { "epoch": 0.48128638757970615, "grad_norm": 0.22287361323833466, "learning_rate": 4.61370959233878e-05, "loss": 1.6868, "step": 868 }, { "epoch": 0.48184086498475187, "grad_norm": 0.21111956238746643, "learning_rate": 4.6064169586752706e-05, "loss": 1.4827, "step": 869 }, { "epoch": 0.48239534238979764, "grad_norm": 0.20634272694587708, "learning_rate": 4.599122261335044e-05, "loss": 1.5634, "step": 870 }, { "epoch": 0.48294981979484336, "grad_norm": 0.2151550054550171, "learning_rate": 4.591825525142433e-05, "loss": 1.5334, "step": 871 }, { "epoch": 0.4835042971998891, "grad_norm": 0.21538786590099335, "learning_rate": 4.584526774928713e-05, "loss": 1.563, "step": 872 }, { "epoch": 0.48405877460493485, "grad_norm": 0.2079581320285797, "learning_rate": 4.5772260355320075e-05, "loss": 1.5912, "step": 873 }, { "epoch": 0.48461325200998057, "grad_norm": 0.21243925392627716, "learning_rate": 4.5699233317972145e-05, "loss": 1.5245, "step": 874 }, { "epoch": 0.48516772941502634, "grad_norm": 0.20607562363147736, "learning_rate": 4.562618688575911e-05, "loss": 1.5385, "step": 875 }, { "epoch": 0.48572220682007206, "grad_norm": 0.2081446647644043, "learning_rate": 4.555312130726279e-05, "loss": 1.589, "step": 876 }, { "epoch": 0.48627668422511783, "grad_norm": 0.20849229395389557, "learning_rate": 4.5480036831130144e-05, "loss": 1.6432, "step": 877 }, { "epoch": 0.48683116163016354, "grad_norm": 0.220262348651886, "learning_rate": 4.540693370607244e-05, "loss": 1.5364, "step": 878 }, { "epoch": 0.4873856390352093, "grad_norm": 0.2163216471672058, "learning_rate": 4.533381218086443e-05, "loss": 1.6256, "step": 879 }, { "epoch": 0.48794011644025503, "grad_norm": 0.21877123415470123, "learning_rate": 4.5260672504343436e-05, "loss": 1.6153, "step": 880 }, { "epoch": 0.4884945938453008, "grad_norm": 0.20430409908294678, "learning_rate": 4.518751492540859e-05, "loss": 1.5887, "step": 881 }, { "epoch": 0.4890490712503465, "grad_norm": 0.21504053473472595, "learning_rate": 4.5114339693019924e-05, "loss": 1.6251, "step": 882 }, { "epoch": 0.4896035486553923, "grad_norm": 0.21704961359500885, "learning_rate": 4.504114705619758e-05, "loss": 1.6214, "step": 883 }, { "epoch": 0.490158026060438, "grad_norm": 0.2103128433227539, "learning_rate": 4.4967937264020896e-05, "loss": 1.5078, "step": 884 }, { "epoch": 0.4907125034654838, "grad_norm": 0.21497170627117157, "learning_rate": 4.4894710565627585e-05, "loss": 1.6292, "step": 885 }, { "epoch": 0.4912669808705295, "grad_norm": 0.20846691727638245, "learning_rate": 4.4821467210212924e-05, "loss": 1.5809, "step": 886 }, { "epoch": 0.4918214582755753, "grad_norm": 0.20751187205314636, "learning_rate": 4.474820744702887e-05, "loss": 1.5428, "step": 887 }, { "epoch": 0.492375935680621, "grad_norm": 0.22106949985027313, "learning_rate": 4.4674931525383176e-05, "loss": 1.57, "step": 888 }, { "epoch": 0.49293041308566676, "grad_norm": 0.21874569356441498, "learning_rate": 4.460163969463864e-05, "loss": 1.565, "step": 889 }, { "epoch": 0.4934848904907125, "grad_norm": 0.21572619676589966, "learning_rate": 4.452833220421216e-05, "loss": 1.5302, "step": 890 }, { "epoch": 0.49403936789575825, "grad_norm": 0.2403019368648529, "learning_rate": 4.445500930357393e-05, "loss": 1.5533, "step": 891 }, { "epoch": 0.49459384530080397, "grad_norm": 0.2276349812746048, "learning_rate": 4.438167124224663e-05, "loss": 1.5833, "step": 892 }, { "epoch": 0.49514832270584974, "grad_norm": 0.20668740570545197, "learning_rate": 4.430831826980445e-05, "loss": 1.5467, "step": 893 }, { "epoch": 0.49570280011089546, "grad_norm": 0.21800528466701508, "learning_rate": 4.4234950635872406e-05, "loss": 1.5423, "step": 894 }, { "epoch": 0.49625727751594123, "grad_norm": 0.21564136445522308, "learning_rate": 4.416156859012534e-05, "loss": 1.5414, "step": 895 }, { "epoch": 0.49681175492098695, "grad_norm": 0.21869921684265137, "learning_rate": 4.4088172382287205e-05, "loss": 1.5703, "step": 896 }, { "epoch": 0.4973662323260327, "grad_norm": 0.2215128093957901, "learning_rate": 4.401476226213009e-05, "loss": 1.5246, "step": 897 }, { "epoch": 0.49792070973107844, "grad_norm": 0.24564118683338165, "learning_rate": 4.394133847947346e-05, "loss": 1.6237, "step": 898 }, { "epoch": 0.4984751871361242, "grad_norm": 0.20941196382045746, "learning_rate": 4.386790128418328e-05, "loss": 1.5816, "step": 899 }, { "epoch": 0.4990296645411699, "grad_norm": 0.21743817627429962, "learning_rate": 4.3794450926171106e-05, "loss": 1.5763, "step": 900 }, { "epoch": 0.4995841419462157, "grad_norm": 0.2064601182937622, "learning_rate": 4.3720987655393384e-05, "loss": 1.549, "step": 901 }, { "epoch": 0.5001386193512615, "grad_norm": 0.2088851034641266, "learning_rate": 4.36475117218504e-05, "loss": 1.5859, "step": 902 }, { "epoch": 0.5006930967563071, "grad_norm": 0.21022847294807434, "learning_rate": 4.357402337558561e-05, "loss": 1.5749, "step": 903 }, { "epoch": 0.5012475741613529, "grad_norm": 0.21769824624061584, "learning_rate": 4.350052286668466e-05, "loss": 1.6526, "step": 904 }, { "epoch": 0.5018020515663987, "grad_norm": 0.2308962196111679, "learning_rate": 4.342701044527461e-05, "loss": 1.6642, "step": 905 }, { "epoch": 0.5023565289714444, "grad_norm": 0.21604010462760925, "learning_rate": 4.335348636152306e-05, "loss": 1.6659, "step": 906 }, { "epoch": 0.5029110063764901, "grad_norm": 0.20606866478919983, "learning_rate": 4.3279950865637296e-05, "loss": 1.5198, "step": 907 }, { "epoch": 0.5034654837815359, "grad_norm": 0.20940081775188446, "learning_rate": 4.320640420786344e-05, "loss": 1.572, "step": 908 }, { "epoch": 0.5040199611865817, "grad_norm": 0.22230613231658936, "learning_rate": 4.313284663848558e-05, "loss": 1.5601, "step": 909 }, { "epoch": 0.5045744385916274, "grad_norm": 0.21619489789009094, "learning_rate": 4.305927840782497e-05, "loss": 1.6177, "step": 910 }, { "epoch": 0.5051289159966731, "grad_norm": 0.2147083431482315, "learning_rate": 4.298569976623912e-05, "loss": 1.6277, "step": 911 }, { "epoch": 0.5056833934017189, "grad_norm": 0.21617257595062256, "learning_rate": 4.291211096412099e-05, "loss": 1.6043, "step": 912 }, { "epoch": 0.5062378708067646, "grad_norm": 0.20666974782943726, "learning_rate": 4.283851225189807e-05, "loss": 1.5511, "step": 913 }, { "epoch": 0.5067923482118104, "grad_norm": 0.22238971292972565, "learning_rate": 4.276490388003164e-05, "loss": 1.6442, "step": 914 }, { "epoch": 0.5073468256168561, "grad_norm": 0.21716102957725525, "learning_rate": 4.269128609901581e-05, "loss": 1.5887, "step": 915 }, { "epoch": 0.5079013030219018, "grad_norm": 0.20268242061138153, "learning_rate": 4.261765915937674e-05, "loss": 1.5302, "step": 916 }, { "epoch": 0.5084557804269476, "grad_norm": 0.21744856238365173, "learning_rate": 4.254402331167171e-05, "loss": 1.5459, "step": 917 }, { "epoch": 0.5090102578319934, "grad_norm": 0.20796500146389008, "learning_rate": 4.247037880648836e-05, "loss": 1.5942, "step": 918 }, { "epoch": 0.509564735237039, "grad_norm": 0.22836215794086456, "learning_rate": 4.239672589444376e-05, "loss": 1.6449, "step": 919 }, { "epoch": 0.5101192126420848, "grad_norm": 0.20672906935214996, "learning_rate": 4.232306482618362e-05, "loss": 1.5657, "step": 920 }, { "epoch": 0.5106736900471306, "grad_norm": 0.21046310663223267, "learning_rate": 4.22493958523814e-05, "loss": 1.5618, "step": 921 }, { "epoch": 0.5112281674521764, "grad_norm": 0.2109476923942566, "learning_rate": 4.2175719223737426e-05, "loss": 1.5723, "step": 922 }, { "epoch": 0.511782644857222, "grad_norm": 0.2226995974779129, "learning_rate": 4.210203519097813e-05, "loss": 1.6005, "step": 923 }, { "epoch": 0.5123371222622678, "grad_norm": 0.21203988790512085, "learning_rate": 4.202834400485508e-05, "loss": 1.5898, "step": 924 }, { "epoch": 0.5128915996673136, "grad_norm": 0.21835650503635406, "learning_rate": 4.1954645916144245e-05, "loss": 1.5458, "step": 925 }, { "epoch": 0.5134460770723593, "grad_norm": 0.22072114050388336, "learning_rate": 4.188094117564505e-05, "loss": 1.5888, "step": 926 }, { "epoch": 0.514000554477405, "grad_norm": 0.20924319326877594, "learning_rate": 4.1807230034179566e-05, "loss": 1.5856, "step": 927 }, { "epoch": 0.5145550318824508, "grad_norm": 0.21937541663646698, "learning_rate": 4.1733512742591646e-05, "loss": 1.5418, "step": 928 }, { "epoch": 0.5151095092874965, "grad_norm": 0.2233019769191742, "learning_rate": 4.165978955174606e-05, "loss": 1.6723, "step": 929 }, { "epoch": 0.5156639866925423, "grad_norm": 0.22729487717151642, "learning_rate": 4.15860607125277e-05, "loss": 1.5608, "step": 930 }, { "epoch": 0.516218464097588, "grad_norm": 0.20513324439525604, "learning_rate": 4.151232647584061e-05, "loss": 1.5652, "step": 931 }, { "epoch": 0.5167729415026338, "grad_norm": 0.2118518203496933, "learning_rate": 4.143858709260726e-05, "loss": 1.6201, "step": 932 }, { "epoch": 0.5173274189076795, "grad_norm": 0.20294101536273956, "learning_rate": 4.136484281376758e-05, "loss": 1.5273, "step": 933 }, { "epoch": 0.5178818963127253, "grad_norm": 0.21510812640190125, "learning_rate": 4.1291093890278244e-05, "loss": 1.5741, "step": 934 }, { "epoch": 0.518436373717771, "grad_norm": 0.20787851512432098, "learning_rate": 4.1217340573111625e-05, "loss": 1.6002, "step": 935 }, { "epoch": 0.5189908511228167, "grad_norm": 0.20799653232097626, "learning_rate": 4.114358311325513e-05, "loss": 1.541, "step": 936 }, { "epoch": 0.5195453285278625, "grad_norm": 0.20286118984222412, "learning_rate": 4.106982176171025e-05, "loss": 1.4923, "step": 937 }, { "epoch": 0.5200998059329083, "grad_norm": 0.2020486742258072, "learning_rate": 4.099605676949169e-05, "loss": 1.4772, "step": 938 }, { "epoch": 0.5206542833379539, "grad_norm": 0.21534226834774017, "learning_rate": 4.0922288387626536e-05, "loss": 1.5663, "step": 939 }, { "epoch": 0.5212087607429997, "grad_norm": 0.24164481461048126, "learning_rate": 4.0848516867153474e-05, "loss": 1.6155, "step": 940 }, { "epoch": 0.5217632381480455, "grad_norm": 0.20027059316635132, "learning_rate": 4.077474245912182e-05, "loss": 1.5143, "step": 941 }, { "epoch": 0.5223177155530913, "grad_norm": 0.2217823565006256, "learning_rate": 4.070096541459071e-05, "loss": 1.5583, "step": 942 }, { "epoch": 0.5228721929581369, "grad_norm": 0.21679724752902985, "learning_rate": 4.0627185984628295e-05, "loss": 1.5588, "step": 943 }, { "epoch": 0.5234266703631827, "grad_norm": 0.20818306505680084, "learning_rate": 4.055340442031079e-05, "loss": 1.5417, "step": 944 }, { "epoch": 0.5239811477682285, "grad_norm": 0.21085157990455627, "learning_rate": 4.0479620972721726e-05, "loss": 1.5735, "step": 945 }, { "epoch": 0.5245356251732742, "grad_norm": 0.21629397571086884, "learning_rate": 4.040583589295101e-05, "loss": 1.6296, "step": 946 }, { "epoch": 0.5250901025783199, "grad_norm": 0.2124132364988327, "learning_rate": 4.03320494320941e-05, "loss": 1.5781, "step": 947 }, { "epoch": 0.5256445799833657, "grad_norm": 0.20531564950942993, "learning_rate": 4.0258261841251175e-05, "loss": 1.5726, "step": 948 }, { "epoch": 0.5261990573884114, "grad_norm": 0.20956727862358093, "learning_rate": 4.018447337152626e-05, "loss": 1.5653, "step": 949 }, { "epoch": 0.5267535347934572, "grad_norm": 0.20362792909145355, "learning_rate": 4.0110684274026365e-05, "loss": 1.576, "step": 950 }, { "epoch": 0.5273080121985029, "grad_norm": 0.20399406552314758, "learning_rate": 4.00368947998606e-05, "loss": 1.5732, "step": 951 }, { "epoch": 0.5278624896035486, "grad_norm": 0.21037329733371735, "learning_rate": 3.9963105200139415e-05, "loss": 1.5909, "step": 952 }, { "epoch": 0.5284169670085944, "grad_norm": 0.2091919481754303, "learning_rate": 3.9889315725973655e-05, "loss": 1.5478, "step": 953 }, { "epoch": 0.5289714444136402, "grad_norm": 0.2109626978635788, "learning_rate": 3.981552662847375e-05, "loss": 1.5515, "step": 954 }, { "epoch": 0.5295259218186859, "grad_norm": 0.21235403418540955, "learning_rate": 3.9741738158748824e-05, "loss": 1.5435, "step": 955 }, { "epoch": 0.5300803992237316, "grad_norm": 0.2205905020236969, "learning_rate": 3.966795056790591e-05, "loss": 1.6312, "step": 956 }, { "epoch": 0.5306348766287774, "grad_norm": 0.2057129591703415, "learning_rate": 3.959416410704901e-05, "loss": 1.5718, "step": 957 }, { "epoch": 0.5311893540338232, "grad_norm": 0.2083214968442917, "learning_rate": 3.952037902727829e-05, "loss": 1.5801, "step": 958 }, { "epoch": 0.5317438314388688, "grad_norm": 0.21389059722423553, "learning_rate": 3.944659557968922e-05, "loss": 1.5781, "step": 959 }, { "epoch": 0.5322983088439146, "grad_norm": 0.20008264482021332, "learning_rate": 3.937281401537171e-05, "loss": 1.5221, "step": 960 }, { "epoch": 0.5328527862489604, "grad_norm": 0.22723457217216492, "learning_rate": 3.92990345854093e-05, "loss": 1.6409, "step": 961 }, { "epoch": 0.5334072636540061, "grad_norm": 0.21602332592010498, "learning_rate": 3.922525754087819e-05, "loss": 1.624, "step": 962 }, { "epoch": 0.5339617410590518, "grad_norm": 0.2093724012374878, "learning_rate": 3.915148313284653e-05, "loss": 1.5583, "step": 963 }, { "epoch": 0.5345162184640976, "grad_norm": 0.20897488296031952, "learning_rate": 3.907771161237347e-05, "loss": 1.6248, "step": 964 }, { "epoch": 0.5350706958691434, "grad_norm": 0.21805384755134583, "learning_rate": 3.900394323050833e-05, "loss": 1.6227, "step": 965 }, { "epoch": 0.5356251732741891, "grad_norm": 0.22409921884536743, "learning_rate": 3.893017823828977e-05, "loss": 1.5582, "step": 966 }, { "epoch": 0.5361796506792348, "grad_norm": 0.22005674242973328, "learning_rate": 3.8856416886744874e-05, "loss": 1.6032, "step": 967 }, { "epoch": 0.5367341280842806, "grad_norm": 0.20452825725078583, "learning_rate": 3.878265942688838e-05, "loss": 1.5501, "step": 968 }, { "epoch": 0.5372886054893263, "grad_norm": 0.21274332702159882, "learning_rate": 3.8708906109721776e-05, "loss": 1.5386, "step": 969 }, { "epoch": 0.5378430828943721, "grad_norm": 0.21413595974445343, "learning_rate": 3.863515718623242e-05, "loss": 1.6382, "step": 970 }, { "epoch": 0.5383975602994178, "grad_norm": 0.22043178975582123, "learning_rate": 3.856141290739276e-05, "loss": 1.5306, "step": 971 }, { "epoch": 0.5389520377044635, "grad_norm": 0.2085334211587906, "learning_rate": 3.8487673524159404e-05, "loss": 1.5452, "step": 972 }, { "epoch": 0.5395065151095093, "grad_norm": 0.2164732664823532, "learning_rate": 3.8413939287472305e-05, "loss": 1.6273, "step": 973 }, { "epoch": 0.5400609925145551, "grad_norm": 0.23173940181732178, "learning_rate": 3.8340210448253945e-05, "loss": 1.629, "step": 974 }, { "epoch": 0.5406154699196007, "grad_norm": 0.20250822603702545, "learning_rate": 3.826648725740836e-05, "loss": 1.5313, "step": 975 }, { "epoch": 0.5411699473246465, "grad_norm": 0.20862893760204315, "learning_rate": 3.819276996582045e-05, "loss": 1.574, "step": 976 }, { "epoch": 0.5417244247296923, "grad_norm": 0.2386544793844223, "learning_rate": 3.8119058824354966e-05, "loss": 1.6064, "step": 977 }, { "epoch": 0.5422789021347381, "grad_norm": 0.2093050479888916, "learning_rate": 3.804535408385577e-05, "loss": 1.5596, "step": 978 }, { "epoch": 0.5428333795397837, "grad_norm": 0.22440102696418762, "learning_rate": 3.7971655995144937e-05, "loss": 1.5967, "step": 979 }, { "epoch": 0.5433878569448295, "grad_norm": 0.21898169815540314, "learning_rate": 3.789796480902188e-05, "loss": 1.5639, "step": 980 }, { "epoch": 0.5439423343498753, "grad_norm": 0.21143299341201782, "learning_rate": 3.782428077626259e-05, "loss": 1.5549, "step": 981 }, { "epoch": 0.544496811754921, "grad_norm": 0.21122704446315765, "learning_rate": 3.775060414761861e-05, "loss": 1.5778, "step": 982 }, { "epoch": 0.5450512891599667, "grad_norm": 0.22585324943065643, "learning_rate": 3.767693517381638e-05, "loss": 1.6119, "step": 983 }, { "epoch": 0.5456057665650125, "grad_norm": 0.1983765810728073, "learning_rate": 3.760327410555625e-05, "loss": 1.4554, "step": 984 }, { "epoch": 0.5461602439700582, "grad_norm": 0.210382342338562, "learning_rate": 3.752962119351166e-05, "loss": 1.562, "step": 985 }, { "epoch": 0.546714721375104, "grad_norm": 0.2129284292459488, "learning_rate": 3.745597668832831e-05, "loss": 1.6004, "step": 986 }, { "epoch": 0.5472691987801497, "grad_norm": 0.20643486082553864, "learning_rate": 3.738234084062327e-05, "loss": 1.5137, "step": 987 }, { "epoch": 0.5478236761851955, "grad_norm": 0.20947743952274323, "learning_rate": 3.730871390098419e-05, "loss": 1.5833, "step": 988 }, { "epoch": 0.5483781535902412, "grad_norm": 0.2061213254928589, "learning_rate": 3.723509611996837e-05, "loss": 1.5231, "step": 989 }, { "epoch": 0.548932630995287, "grad_norm": 0.21855583786964417, "learning_rate": 3.716148774810194e-05, "loss": 1.5806, "step": 990 }, { "epoch": 0.5494871084003327, "grad_norm": 0.21223531663417816, "learning_rate": 3.708788903587904e-05, "loss": 1.5188, "step": 991 }, { "epoch": 0.5500415858053784, "grad_norm": 0.20913442969322205, "learning_rate": 3.701430023376089e-05, "loss": 1.6229, "step": 992 }, { "epoch": 0.5505960632104242, "grad_norm": 0.2168632298707962, "learning_rate": 3.6940721592175026e-05, "loss": 1.5483, "step": 993 }, { "epoch": 0.55115054061547, "grad_norm": 0.21936000883579254, "learning_rate": 3.686715336151443e-05, "loss": 1.5724, "step": 994 }, { "epoch": 0.5517050180205156, "grad_norm": 0.21921208500862122, "learning_rate": 3.6793595792136565e-05, "loss": 1.6074, "step": 995 }, { "epoch": 0.5522594954255614, "grad_norm": 0.21475403010845184, "learning_rate": 3.672004913436271e-05, "loss": 1.6166, "step": 996 }, { "epoch": 0.5528139728306072, "grad_norm": 0.2194800078868866, "learning_rate": 3.664651363847695e-05, "loss": 1.5691, "step": 997 }, { "epoch": 0.5533684502356528, "grad_norm": 0.21276038885116577, "learning_rate": 3.65729895547254e-05, "loss": 1.5916, "step": 998 }, { "epoch": 0.5539229276406986, "grad_norm": 0.2259768396615982, "learning_rate": 3.649947713331536e-05, "loss": 1.5617, "step": 999 }, { "epoch": 0.5544774050457444, "grad_norm": 0.21104209125041962, "learning_rate": 3.64259766244144e-05, "loss": 1.5882, "step": 1000 }, { "epoch": 0.5550318824507902, "grad_norm": 0.21075722575187683, "learning_rate": 3.63524882781496e-05, "loss": 1.6089, "step": 1001 }, { "epoch": 0.5555863598558358, "grad_norm": 0.23340259492397308, "learning_rate": 3.627901234460663e-05, "loss": 1.6235, "step": 1002 }, { "epoch": 0.5561408372608816, "grad_norm": 0.22706355154514313, "learning_rate": 3.6205549073828894e-05, "loss": 1.5388, "step": 1003 }, { "epoch": 0.5566953146659274, "grad_norm": 0.22355878353118896, "learning_rate": 3.613209871581674e-05, "loss": 1.5329, "step": 1004 }, { "epoch": 0.5572497920709731, "grad_norm": 0.22762571275234222, "learning_rate": 3.6058661520526555e-05, "loss": 1.5367, "step": 1005 }, { "epoch": 0.5578042694760188, "grad_norm": 0.21231630444526672, "learning_rate": 3.598523773786991e-05, "loss": 1.5366, "step": 1006 }, { "epoch": 0.5583587468810646, "grad_norm": 0.22039973735809326, "learning_rate": 3.591182761771281e-05, "loss": 1.6085, "step": 1007 }, { "epoch": 0.5589132242861103, "grad_norm": 0.22262850403785706, "learning_rate": 3.5838431409874666e-05, "loss": 1.5297, "step": 1008 }, { "epoch": 0.5594677016911561, "grad_norm": 0.20765410363674164, "learning_rate": 3.576504936412762e-05, "loss": 1.5488, "step": 1009 }, { "epoch": 0.5600221790962018, "grad_norm": 0.21379908919334412, "learning_rate": 3.5691681730195564e-05, "loss": 1.581, "step": 1010 }, { "epoch": 0.5605766565012475, "grad_norm": 0.2365419566631317, "learning_rate": 3.561832875775338e-05, "loss": 1.6571, "step": 1011 }, { "epoch": 0.5611311339062933, "grad_norm": 0.2150985300540924, "learning_rate": 3.5544990696426077e-05, "loss": 1.546, "step": 1012 }, { "epoch": 0.5616856113113391, "grad_norm": 0.20792889595031738, "learning_rate": 3.5471667795787845e-05, "loss": 1.5404, "step": 1013 }, { "epoch": 0.5622400887163848, "grad_norm": 0.23230226337909698, "learning_rate": 3.5398360305361375e-05, "loss": 1.5393, "step": 1014 }, { "epoch": 0.5627945661214305, "grad_norm": 0.20976661145687103, "learning_rate": 3.532506847461684e-05, "loss": 1.5753, "step": 1015 }, { "epoch": 0.5633490435264763, "grad_norm": 0.21504442393779755, "learning_rate": 3.5251792552971146e-05, "loss": 1.5944, "step": 1016 }, { "epoch": 0.5639035209315221, "grad_norm": 0.21869289875030518, "learning_rate": 3.517853278978708e-05, "loss": 1.5166, "step": 1017 }, { "epoch": 0.5644579983365677, "grad_norm": 0.22892804443836212, "learning_rate": 3.510528943437243e-05, "loss": 1.6643, "step": 1018 }, { "epoch": 0.5650124757416135, "grad_norm": 0.21030141413211823, "learning_rate": 3.503206273597913e-05, "loss": 1.5408, "step": 1019 }, { "epoch": 0.5655669531466593, "grad_norm": 0.23448987305164337, "learning_rate": 3.495885294380243e-05, "loss": 1.5352, "step": 1020 }, { "epoch": 0.566121430551705, "grad_norm": 0.20386260747909546, "learning_rate": 3.488566030698008e-05, "loss": 1.5978, "step": 1021 }, { "epoch": 0.5666759079567507, "grad_norm": 0.22522738575935364, "learning_rate": 3.481248507459143e-05, "loss": 1.6675, "step": 1022 }, { "epoch": 0.5672303853617965, "grad_norm": 0.2048107385635376, "learning_rate": 3.4739327495656584e-05, "loss": 1.4263, "step": 1023 }, { "epoch": 0.5677848627668423, "grad_norm": 0.21060484647750854, "learning_rate": 3.466618781913559e-05, "loss": 1.5268, "step": 1024 }, { "epoch": 0.568339340171888, "grad_norm": 0.22075191140174866, "learning_rate": 3.459306629392757e-05, "loss": 1.6015, "step": 1025 }, { "epoch": 0.5688938175769337, "grad_norm": 0.20704005658626556, "learning_rate": 3.4519963168869855e-05, "loss": 1.4969, "step": 1026 }, { "epoch": 0.5694482949819795, "grad_norm": 0.21155935525894165, "learning_rate": 3.444687869273722e-05, "loss": 1.5355, "step": 1027 }, { "epoch": 0.5700027723870252, "grad_norm": 0.21162542700767517, "learning_rate": 3.437381311424091e-05, "loss": 1.5899, "step": 1028 }, { "epoch": 0.570557249792071, "grad_norm": 0.2058079093694687, "learning_rate": 3.4300766682027875e-05, "loss": 1.4967, "step": 1029 }, { "epoch": 0.5711117271971167, "grad_norm": 0.21342620253562927, "learning_rate": 3.422773964467994e-05, "loss": 1.5941, "step": 1030 }, { "epoch": 0.5716662046021624, "grad_norm": 0.21482619643211365, "learning_rate": 3.4154732250712876e-05, "loss": 1.5887, "step": 1031 }, { "epoch": 0.5722206820072082, "grad_norm": 0.20948712527751923, "learning_rate": 3.4081744748575684e-05, "loss": 1.5695, "step": 1032 }, { "epoch": 0.572775159412254, "grad_norm": 0.2241201102733612, "learning_rate": 3.400877738664958e-05, "loss": 1.5542, "step": 1033 }, { "epoch": 0.5733296368172996, "grad_norm": 0.21154385805130005, "learning_rate": 3.39358304132473e-05, "loss": 1.5349, "step": 1034 }, { "epoch": 0.5738841142223454, "grad_norm": 0.22654007375240326, "learning_rate": 3.386290407661221e-05, "loss": 1.6839, "step": 1035 }, { "epoch": 0.5744385916273912, "grad_norm": 0.2175297737121582, "learning_rate": 3.3789998624917376e-05, "loss": 1.5791, "step": 1036 }, { "epoch": 0.574993069032437, "grad_norm": 0.2157989740371704, "learning_rate": 3.3717114306264875e-05, "loss": 1.5716, "step": 1037 }, { "epoch": 0.5755475464374826, "grad_norm": 0.2078414410352707, "learning_rate": 3.364425136868479e-05, "loss": 1.586, "step": 1038 }, { "epoch": 0.5761020238425284, "grad_norm": 0.2155274897813797, "learning_rate": 3.357141006013451e-05, "loss": 1.5882, "step": 1039 }, { "epoch": 0.5766565012475742, "grad_norm": 0.21952180564403534, "learning_rate": 3.349859062849779e-05, "loss": 1.6528, "step": 1040 }, { "epoch": 0.5772109786526199, "grad_norm": 0.2106204628944397, "learning_rate": 3.3425793321583914e-05, "loss": 1.5356, "step": 1041 }, { "epoch": 0.5777654560576656, "grad_norm": 0.2236543446779251, "learning_rate": 3.335301838712692e-05, "loss": 1.5639, "step": 1042 }, { "epoch": 0.5783199334627114, "grad_norm": 0.21134650707244873, "learning_rate": 3.328026607278466e-05, "loss": 1.5635, "step": 1043 }, { "epoch": 0.5788744108677571, "grad_norm": 0.21437637507915497, "learning_rate": 3.3207536626138046e-05, "loss": 1.557, "step": 1044 }, { "epoch": 0.5794288882728029, "grad_norm": 0.21564576029777527, "learning_rate": 3.313483029469019e-05, "loss": 1.548, "step": 1045 }, { "epoch": 0.5799833656778486, "grad_norm": 0.21482598781585693, "learning_rate": 3.306214732586546e-05, "loss": 1.5844, "step": 1046 }, { "epoch": 0.5805378430828944, "grad_norm": 0.20883773267269135, "learning_rate": 3.2989487967008806e-05, "loss": 1.5653, "step": 1047 }, { "epoch": 0.5810923204879401, "grad_norm": 0.20583604276180267, "learning_rate": 3.291685246538478e-05, "loss": 1.5298, "step": 1048 }, { "epoch": 0.5816467978929859, "grad_norm": 0.20545633137226105, "learning_rate": 3.2844241068176766e-05, "loss": 1.5625, "step": 1049 }, { "epoch": 0.5822012752980316, "grad_norm": 0.2127029448747635, "learning_rate": 3.277165402248612e-05, "loss": 1.6048, "step": 1050 }, { "epoch": 0.5827557527030773, "grad_norm": 0.20467516779899597, "learning_rate": 3.26990915753313e-05, "loss": 1.5292, "step": 1051 }, { "epoch": 0.5833102301081231, "grad_norm": 0.20428596436977386, "learning_rate": 3.2626553973647115e-05, "loss": 1.5542, "step": 1052 }, { "epoch": 0.5838647075131689, "grad_norm": 0.21240785717964172, "learning_rate": 3.255404146428375e-05, "loss": 1.5601, "step": 1053 }, { "epoch": 0.5844191849182145, "grad_norm": 0.20584693551063538, "learning_rate": 3.248155429400605e-05, "loss": 1.5476, "step": 1054 }, { "epoch": 0.5849736623232603, "grad_norm": 0.21363112330436707, "learning_rate": 3.2409092709492624e-05, "loss": 1.6318, "step": 1055 }, { "epoch": 0.5855281397283061, "grad_norm": 0.22125588357448578, "learning_rate": 3.233665695733498e-05, "loss": 1.5496, "step": 1056 }, { "epoch": 0.5860826171333519, "grad_norm": 0.22506000101566315, "learning_rate": 3.2264247284036755e-05, "loss": 1.547, "step": 1057 }, { "epoch": 0.5866370945383975, "grad_norm": 0.21581967175006866, "learning_rate": 3.2191863936012794e-05, "loss": 1.5397, "step": 1058 }, { "epoch": 0.5871915719434433, "grad_norm": 0.21308638155460358, "learning_rate": 3.211950715958841e-05, "loss": 1.5873, "step": 1059 }, { "epoch": 0.5877460493484891, "grad_norm": 0.20769579708576202, "learning_rate": 3.2047177200998456e-05, "loss": 1.5548, "step": 1060 }, { "epoch": 0.5883005267535348, "grad_norm": 0.20457741618156433, "learning_rate": 3.197487430638652e-05, "loss": 1.55, "step": 1061 }, { "epoch": 0.5888550041585805, "grad_norm": 0.21090541779994965, "learning_rate": 3.1902598721804096e-05, "loss": 1.6617, "step": 1062 }, { "epoch": 0.5894094815636263, "grad_norm": 0.21099694073200226, "learning_rate": 3.1830350693209774e-05, "loss": 1.5077, "step": 1063 }, { "epoch": 0.589963958968672, "grad_norm": 0.21446220576763153, "learning_rate": 3.1758130466468293e-05, "loss": 1.6078, "step": 1064 }, { "epoch": 0.5905184363737178, "grad_norm": 0.21016094088554382, "learning_rate": 3.168593828734986e-05, "loss": 1.5688, "step": 1065 }, { "epoch": 0.5910729137787635, "grad_norm": 0.21141645312309265, "learning_rate": 3.161377440152918e-05, "loss": 1.5527, "step": 1066 }, { "epoch": 0.5916273911838092, "grad_norm": 0.2074812948703766, "learning_rate": 3.154163905458469e-05, "loss": 1.5399, "step": 1067 }, { "epoch": 0.592181868588855, "grad_norm": 0.2177177220582962, "learning_rate": 3.146953249199774e-05, "loss": 1.599, "step": 1068 }, { "epoch": 0.5927363459939008, "grad_norm": 0.20859509706497192, "learning_rate": 3.139745495915166e-05, "loss": 1.5818, "step": 1069 }, { "epoch": 0.5932908233989465, "grad_norm": 0.20710986852645874, "learning_rate": 3.1325406701331056e-05, "loss": 1.559, "step": 1070 }, { "epoch": 0.5938453008039922, "grad_norm": 0.21879532933235168, "learning_rate": 3.1253387963720835e-05, "loss": 1.5392, "step": 1071 }, { "epoch": 0.594399778209038, "grad_norm": 0.2117648422718048, "learning_rate": 3.118139899140553e-05, "loss": 1.6009, "step": 1072 }, { "epoch": 0.5949542556140838, "grad_norm": 0.21254301071166992, "learning_rate": 3.110944002936835e-05, "loss": 1.5663, "step": 1073 }, { "epoch": 0.5955087330191294, "grad_norm": 0.2058122456073761, "learning_rate": 3.1037511322490324e-05, "loss": 1.5066, "step": 1074 }, { "epoch": 0.5960632104241752, "grad_norm": 0.2119075357913971, "learning_rate": 3.096561311554959e-05, "loss": 1.5925, "step": 1075 }, { "epoch": 0.596617687829221, "grad_norm": 0.2224556803703308, "learning_rate": 3.089374565322045e-05, "loss": 1.6041, "step": 1076 }, { "epoch": 0.5971721652342667, "grad_norm": 0.21044711768627167, "learning_rate": 3.082190918007259e-05, "loss": 1.5778, "step": 1077 }, { "epoch": 0.5977266426393124, "grad_norm": 0.21917963027954102, "learning_rate": 3.0750103940570284e-05, "loss": 1.668, "step": 1078 }, { "epoch": 0.5982811200443582, "grad_norm": 0.21519285440444946, "learning_rate": 3.067833017907144e-05, "loss": 1.567, "step": 1079 }, { "epoch": 0.598835597449404, "grad_norm": 0.21112053096294403, "learning_rate": 3.0606588139826884e-05, "loss": 1.5426, "step": 1080 }, { "epoch": 0.5993900748544497, "grad_norm": 0.21877877414226532, "learning_rate": 3.0534878066979494e-05, "loss": 1.5749, "step": 1081 }, { "epoch": 0.5999445522594954, "grad_norm": 0.2124583125114441, "learning_rate": 3.046320020456334e-05, "loss": 1.4941, "step": 1082 }, { "epoch": 0.6004990296645412, "grad_norm": 0.21378760039806366, "learning_rate": 3.0391554796502925e-05, "loss": 1.5903, "step": 1083 }, { "epoch": 0.6010535070695869, "grad_norm": 0.21284620463848114, "learning_rate": 3.031994208661223e-05, "loss": 1.5439, "step": 1084 }, { "epoch": 0.6016079844746327, "grad_norm": 0.2211058884859085, "learning_rate": 3.0248362318594055e-05, "loss": 1.5932, "step": 1085 }, { "epoch": 0.6021624618796784, "grad_norm": 0.20590293407440186, "learning_rate": 3.0176815736039007e-05, "loss": 1.5591, "step": 1086 }, { "epoch": 0.6027169392847241, "grad_norm": 0.21580100059509277, "learning_rate": 3.010530258242483e-05, "loss": 1.5505, "step": 1087 }, { "epoch": 0.6032714166897699, "grad_norm": 0.2178206592798233, "learning_rate": 3.0033823101115473e-05, "loss": 1.4999, "step": 1088 }, { "epoch": 0.6038258940948157, "grad_norm": 0.22579710185527802, "learning_rate": 2.9962377535360286e-05, "loss": 1.622, "step": 1089 }, { "epoch": 0.6043803714998613, "grad_norm": 0.22102071344852448, "learning_rate": 2.9890966128293243e-05, "loss": 1.5054, "step": 1090 }, { "epoch": 0.6049348489049071, "grad_norm": 0.2073606252670288, "learning_rate": 2.9819589122932055e-05, "loss": 1.4963, "step": 1091 }, { "epoch": 0.6054893263099529, "grad_norm": 0.20773765444755554, "learning_rate": 2.9748246762177326e-05, "loss": 1.5046, "step": 1092 }, { "epoch": 0.6060438037149987, "grad_norm": 0.2178242802619934, "learning_rate": 2.9676939288811825e-05, "loss": 1.623, "step": 1093 }, { "epoch": 0.6065982811200443, "grad_norm": 0.21794721484184265, "learning_rate": 2.960566694549954e-05, "loss": 1.5074, "step": 1094 }, { "epoch": 0.6071527585250901, "grad_norm": 0.22172760963439941, "learning_rate": 2.953442997478494e-05, "loss": 1.6091, "step": 1095 }, { "epoch": 0.6077072359301359, "grad_norm": 0.20793460309505463, "learning_rate": 2.9463228619092132e-05, "loss": 1.5604, "step": 1096 }, { "epoch": 0.6082617133351816, "grad_norm": 0.2183390110731125, "learning_rate": 2.9392063120723962e-05, "loss": 1.4959, "step": 1097 }, { "epoch": 0.6088161907402273, "grad_norm": 0.2197534292936325, "learning_rate": 2.9320933721861335e-05, "loss": 1.536, "step": 1098 }, { "epoch": 0.6093706681452731, "grad_norm": 0.2130371481180191, "learning_rate": 2.9249840664562242e-05, "loss": 1.6402, "step": 1099 }, { "epoch": 0.6099251455503188, "grad_norm": 0.21440336108207703, "learning_rate": 2.917878419076102e-05, "loss": 1.5631, "step": 1100 }, { "epoch": 0.6104796229553646, "grad_norm": 0.22678157687187195, "learning_rate": 2.9107764542267536e-05, "loss": 1.5603, "step": 1101 }, { "epoch": 0.6110341003604103, "grad_norm": 0.21499857306480408, "learning_rate": 2.903678196076628e-05, "loss": 1.642, "step": 1102 }, { "epoch": 0.611588577765456, "grad_norm": 0.20951659977436066, "learning_rate": 2.896583668781569e-05, "loss": 1.5311, "step": 1103 }, { "epoch": 0.6121430551705018, "grad_norm": 0.2207638919353485, "learning_rate": 2.8894928964847133e-05, "loss": 1.5827, "step": 1104 }, { "epoch": 0.6126975325755476, "grad_norm": 0.2097187638282776, "learning_rate": 2.8824059033164262e-05, "loss": 1.5433, "step": 1105 }, { "epoch": 0.6132520099805933, "grad_norm": 0.20598512887954712, "learning_rate": 2.875322713394213e-05, "loss": 1.5966, "step": 1106 }, { "epoch": 0.613806487385639, "grad_norm": 0.20717155933380127, "learning_rate": 2.8682433508226303e-05, "loss": 1.4861, "step": 1107 }, { "epoch": 0.6143609647906848, "grad_norm": 0.20514699816703796, "learning_rate": 2.8611678396932164e-05, "loss": 1.5664, "step": 1108 }, { "epoch": 0.6149154421957306, "grad_norm": 0.21211731433868408, "learning_rate": 2.8540962040843982e-05, "loss": 1.5026, "step": 1109 }, { "epoch": 0.6154699196007762, "grad_norm": 0.21061904728412628, "learning_rate": 2.8470284680614172e-05, "loss": 1.5795, "step": 1110 }, { "epoch": 0.616024397005822, "grad_norm": 0.2107418179512024, "learning_rate": 2.8399646556762435e-05, "loss": 1.6298, "step": 1111 }, { "epoch": 0.6165788744108678, "grad_norm": 0.20848070085048676, "learning_rate": 2.832904790967492e-05, "loss": 1.5619, "step": 1112 }, { "epoch": 0.6171333518159136, "grad_norm": 0.21443457901477814, "learning_rate": 2.8258488979603488e-05, "loss": 1.5624, "step": 1113 }, { "epoch": 0.6176878292209592, "grad_norm": 0.21567028760910034, "learning_rate": 2.8187970006664784e-05, "loss": 1.5691, "step": 1114 }, { "epoch": 0.618242306626005, "grad_norm": 0.20712579786777496, "learning_rate": 2.8117491230839502e-05, "loss": 1.5106, "step": 1115 }, { "epoch": 0.6187967840310508, "grad_norm": 0.2100321501493454, "learning_rate": 2.8047052891971565e-05, "loss": 1.5948, "step": 1116 }, { "epoch": 0.6193512614360965, "grad_norm": 0.21011658012866974, "learning_rate": 2.7976655229767248e-05, "loss": 1.5616, "step": 1117 }, { "epoch": 0.6199057388411422, "grad_norm": 0.22047735750675201, "learning_rate": 2.790629848379442e-05, "loss": 1.6123, "step": 1118 }, { "epoch": 0.620460216246188, "grad_norm": 0.2138945609331131, "learning_rate": 2.78359828934817e-05, "loss": 1.5528, "step": 1119 }, { "epoch": 0.6210146936512337, "grad_norm": 0.216939777135849, "learning_rate": 2.7765708698117663e-05, "loss": 1.5472, "step": 1120 }, { "epoch": 0.6215691710562794, "grad_norm": 0.22868293523788452, "learning_rate": 2.7695476136850018e-05, "loss": 1.5588, "step": 1121 }, { "epoch": 0.6221236484613252, "grad_norm": 0.2176186591386795, "learning_rate": 2.7625285448684757e-05, "loss": 1.5469, "step": 1122 }, { "epoch": 0.622678125866371, "grad_norm": 0.21710044145584106, "learning_rate": 2.7555136872485415e-05, "loss": 1.5845, "step": 1123 }, { "epoch": 0.6232326032714167, "grad_norm": 0.21951667964458466, "learning_rate": 2.748503064697221e-05, "loss": 1.5436, "step": 1124 }, { "epoch": 0.6237870806764624, "grad_norm": 0.2149830311536789, "learning_rate": 2.7414967010721212e-05, "loss": 1.5896, "step": 1125 }, { "epoch": 0.6243415580815082, "grad_norm": 0.21103326976299286, "learning_rate": 2.7344946202163592e-05, "loss": 1.6166, "step": 1126 }, { "epoch": 0.6248960354865539, "grad_norm": 0.22287732362747192, "learning_rate": 2.727496845958474e-05, "loss": 1.5062, "step": 1127 }, { "epoch": 0.6254505128915997, "grad_norm": 0.2150767594575882, "learning_rate": 2.7205034021123505e-05, "loss": 1.5854, "step": 1128 }, { "epoch": 0.6260049902966454, "grad_norm": 0.21351411938667297, "learning_rate": 2.71351431247714e-05, "loss": 1.6161, "step": 1129 }, { "epoch": 0.6265594677016911, "grad_norm": 0.2130012810230255, "learning_rate": 2.7065296008371703e-05, "loss": 1.5533, "step": 1130 }, { "epoch": 0.6271139451067369, "grad_norm": 0.22186146676540375, "learning_rate": 2.699549290961876e-05, "loss": 1.5779, "step": 1131 }, { "epoch": 0.6276684225117827, "grad_norm": 0.2098151296377182, "learning_rate": 2.6925734066057076e-05, "loss": 1.5365, "step": 1132 }, { "epoch": 0.6282228999168283, "grad_norm": 0.22018581628799438, "learning_rate": 2.6856019715080576e-05, "loss": 1.5884, "step": 1133 }, { "epoch": 0.6287773773218741, "grad_norm": 0.2144719660282135, "learning_rate": 2.6786350093931805e-05, "loss": 1.5707, "step": 1134 }, { "epoch": 0.6293318547269199, "grad_norm": 0.2225160151720047, "learning_rate": 2.6716725439701013e-05, "loss": 1.6019, "step": 1135 }, { "epoch": 0.6298863321319657, "grad_norm": 0.21820110082626343, "learning_rate": 2.66471459893255e-05, "loss": 1.6112, "step": 1136 }, { "epoch": 0.6304408095370113, "grad_norm": 0.2195034772157669, "learning_rate": 2.6577611979588685e-05, "loss": 1.5993, "step": 1137 }, { "epoch": 0.6309952869420571, "grad_norm": 0.214851975440979, "learning_rate": 2.6508123647119376e-05, "loss": 1.544, "step": 1138 }, { "epoch": 0.6315497643471029, "grad_norm": 0.21557554602622986, "learning_rate": 2.643868122839093e-05, "loss": 1.569, "step": 1139 }, { "epoch": 0.6321042417521486, "grad_norm": 0.21042174100875854, "learning_rate": 2.6369284959720447e-05, "loss": 1.5386, "step": 1140 }, { "epoch": 0.6326587191571943, "grad_norm": 0.20624153316020966, "learning_rate": 2.629993507726801e-05, "loss": 1.5237, "step": 1141 }, { "epoch": 0.6332131965622401, "grad_norm": 0.208335280418396, "learning_rate": 2.6230631817035784e-05, "loss": 1.5465, "step": 1142 }, { "epoch": 0.6337676739672858, "grad_norm": 0.21543239057064056, "learning_rate": 2.6161375414867337e-05, "loss": 1.5521, "step": 1143 }, { "epoch": 0.6343221513723316, "grad_norm": 0.21806393563747406, "learning_rate": 2.6092166106446753e-05, "loss": 1.504, "step": 1144 }, { "epoch": 0.6348766287773773, "grad_norm": 0.20788079500198364, "learning_rate": 2.602300412729784e-05, "loss": 1.5175, "step": 1145 }, { "epoch": 0.635431106182423, "grad_norm": 0.21668802201747894, "learning_rate": 2.5953889712783364e-05, "loss": 1.5238, "step": 1146 }, { "epoch": 0.6359855835874688, "grad_norm": 0.219620943069458, "learning_rate": 2.5884823098104202e-05, "loss": 1.5186, "step": 1147 }, { "epoch": 0.6365400609925146, "grad_norm": 0.22174449265003204, "learning_rate": 2.5815804518298575e-05, "loss": 1.6057, "step": 1148 }, { "epoch": 0.6370945383975603, "grad_norm": 0.2142484486103058, "learning_rate": 2.5746834208241266e-05, "loss": 1.5701, "step": 1149 }, { "epoch": 0.637649015802606, "grad_norm": 0.21401485800743103, "learning_rate": 2.5677912402642742e-05, "loss": 1.5556, "step": 1150 }, { "epoch": 0.6382034932076518, "grad_norm": 0.21097297966480255, "learning_rate": 2.560903933604844e-05, "loss": 1.561, "step": 1151 }, { "epoch": 0.6387579706126976, "grad_norm": 0.21315309405326843, "learning_rate": 2.554021524283794e-05, "loss": 1.5939, "step": 1152 }, { "epoch": 0.6393124480177432, "grad_norm": 0.21164047718048096, "learning_rate": 2.5471440357224112e-05, "loss": 1.5832, "step": 1153 }, { "epoch": 0.639866925422789, "grad_norm": 0.1997397392988205, "learning_rate": 2.5402714913252463e-05, "loss": 1.4702, "step": 1154 }, { "epoch": 0.6404214028278348, "grad_norm": 0.2216353565454483, "learning_rate": 2.5334039144800128e-05, "loss": 1.6679, "step": 1155 }, { "epoch": 0.6409758802328805, "grad_norm": 0.20689931511878967, "learning_rate": 2.5265413285575297e-05, "loss": 1.5088, "step": 1156 }, { "epoch": 0.6415303576379262, "grad_norm": 0.21255846321582794, "learning_rate": 2.5196837569116267e-05, "loss": 1.5848, "step": 1157 }, { "epoch": 0.642084835042972, "grad_norm": 0.19903656840324402, "learning_rate": 2.5128312228790684e-05, "loss": 1.5309, "step": 1158 }, { "epoch": 0.6426393124480178, "grad_norm": 0.2166842818260193, "learning_rate": 2.505983749779481e-05, "loss": 1.6536, "step": 1159 }, { "epoch": 0.6431937898530635, "grad_norm": 0.21221613883972168, "learning_rate": 2.4991413609152606e-05, "loss": 1.5502, "step": 1160 }, { "epoch": 0.6437482672581092, "grad_norm": 0.2121618390083313, "learning_rate": 2.4923040795715095e-05, "loss": 1.566, "step": 1161 }, { "epoch": 0.644302744663155, "grad_norm": 0.21857164800167084, "learning_rate": 2.485471929015944e-05, "loss": 1.6212, "step": 1162 }, { "epoch": 0.6448572220682007, "grad_norm": 0.21056576073169708, "learning_rate": 2.4786449324988203e-05, "loss": 1.5362, "step": 1163 }, { "epoch": 0.6454116994732465, "grad_norm": 0.21527813374996185, "learning_rate": 2.4718231132528562e-05, "loss": 1.5372, "step": 1164 }, { "epoch": 0.6459661768782922, "grad_norm": 0.20520104467868805, "learning_rate": 2.4650064944931495e-05, "loss": 1.5219, "step": 1165 }, { "epoch": 0.6465206542833379, "grad_norm": 0.20994901657104492, "learning_rate": 2.4581950994171013e-05, "loss": 1.5247, "step": 1166 }, { "epoch": 0.6470751316883837, "grad_norm": 0.21351756155490875, "learning_rate": 2.4513889512043383e-05, "loss": 1.572, "step": 1167 }, { "epoch": 0.6476296090934295, "grad_norm": 0.21030496060848236, "learning_rate": 2.444588073016627e-05, "loss": 1.5433, "step": 1168 }, { "epoch": 0.6481840864984751, "grad_norm": 0.2144717425107956, "learning_rate": 2.4377924879978045e-05, "loss": 1.563, "step": 1169 }, { "epoch": 0.6487385639035209, "grad_norm": 0.20969878137111664, "learning_rate": 2.4310022192736905e-05, "loss": 1.5527, "step": 1170 }, { "epoch": 0.6492930413085667, "grad_norm": 0.20833313465118408, "learning_rate": 2.4242172899520164e-05, "loss": 1.5475, "step": 1171 }, { "epoch": 0.6498475187136125, "grad_norm": 0.21155394613742828, "learning_rate": 2.417437723122343e-05, "loss": 1.565, "step": 1172 }, { "epoch": 0.6504019961186581, "grad_norm": 0.2123716175556183, "learning_rate": 2.4106635418559786e-05, "loss": 1.5333, "step": 1173 }, { "epoch": 0.6509564735237039, "grad_norm": 0.21838416159152985, "learning_rate": 2.40389476920591e-05, "loss": 1.6055, "step": 1174 }, { "epoch": 0.6515109509287497, "grad_norm": 0.21394363045692444, "learning_rate": 2.397131428206711e-05, "loss": 1.5401, "step": 1175 }, { "epoch": 0.6520654283337954, "grad_norm": 0.21762172877788544, "learning_rate": 2.3903735418744783e-05, "loss": 1.6162, "step": 1176 }, { "epoch": 0.6526199057388411, "grad_norm": 0.21325650811195374, "learning_rate": 2.3836211332067426e-05, "loss": 1.5997, "step": 1177 }, { "epoch": 0.6531743831438869, "grad_norm": 0.2168748527765274, "learning_rate": 2.376874225182391e-05, "loss": 1.5998, "step": 1178 }, { "epoch": 0.6537288605489326, "grad_norm": 0.22087283432483673, "learning_rate": 2.3701328407615977e-05, "loss": 1.6222, "step": 1179 }, { "epoch": 0.6542833379539784, "grad_norm": 0.21224388480186462, "learning_rate": 2.3633970028857327e-05, "loss": 1.4607, "step": 1180 }, { "epoch": 0.6548378153590241, "grad_norm": 0.21814611554145813, "learning_rate": 2.356666734477298e-05, "loss": 1.575, "step": 1181 }, { "epoch": 0.6553922927640699, "grad_norm": 0.21967004239559174, "learning_rate": 2.3499420584398377e-05, "loss": 1.5645, "step": 1182 }, { "epoch": 0.6559467701691156, "grad_norm": 0.2046661376953125, "learning_rate": 2.343222997657865e-05, "loss": 1.5153, "step": 1183 }, { "epoch": 0.6565012475741614, "grad_norm": 0.21323862671852112, "learning_rate": 2.336509574996784e-05, "loss": 1.5637, "step": 1184 }, { "epoch": 0.6570557249792071, "grad_norm": 0.21644696593284607, "learning_rate": 2.3298018133028166e-05, "loss": 1.5501, "step": 1185 }, { "epoch": 0.6576102023842528, "grad_norm": 0.22033213078975677, "learning_rate": 2.3230997354029116e-05, "loss": 1.5611, "step": 1186 }, { "epoch": 0.6581646797892986, "grad_norm": 0.2171270102262497, "learning_rate": 2.3164033641046824e-05, "loss": 1.578, "step": 1187 }, { "epoch": 0.6587191571943444, "grad_norm": 0.2252931147813797, "learning_rate": 2.309712722196319e-05, "loss": 1.62, "step": 1188 }, { "epoch": 0.65927363459939, "grad_norm": 0.20670726895332336, "learning_rate": 2.303027832446516e-05, "loss": 1.4831, "step": 1189 }, { "epoch": 0.6598281120044358, "grad_norm": 0.2236548513174057, "learning_rate": 2.296348717604392e-05, "loss": 1.6453, "step": 1190 }, { "epoch": 0.6603825894094816, "grad_norm": 0.22350645065307617, "learning_rate": 2.2896754003994124e-05, "loss": 1.5666, "step": 1191 }, { "epoch": 0.6609370668145274, "grad_norm": 0.21167486906051636, "learning_rate": 2.2830079035413153e-05, "loss": 1.5366, "step": 1192 }, { "epoch": 0.661491544219573, "grad_norm": 0.2091861069202423, "learning_rate": 2.2763462497200282e-05, "loss": 1.5729, "step": 1193 }, { "epoch": 0.6620460216246188, "grad_norm": 0.20955854654312134, "learning_rate": 2.2696904616055953e-05, "loss": 1.5978, "step": 1194 }, { "epoch": 0.6626004990296646, "grad_norm": 0.20902609825134277, "learning_rate": 2.2630405618481052e-05, "loss": 1.5898, "step": 1195 }, { "epoch": 0.6631549764347103, "grad_norm": 0.2072347104549408, "learning_rate": 2.2563965730775995e-05, "loss": 1.517, "step": 1196 }, { "epoch": 0.663709453839756, "grad_norm": 0.20107337832450867, "learning_rate": 2.2497585179040087e-05, "loss": 1.5093, "step": 1197 }, { "epoch": 0.6642639312448018, "grad_norm": 0.2148445099592209, "learning_rate": 2.243126418917071e-05, "loss": 1.5597, "step": 1198 }, { "epoch": 0.6648184086498475, "grad_norm": 0.20870907604694366, "learning_rate": 2.236500298686253e-05, "loss": 1.5352, "step": 1199 }, { "epoch": 0.6653728860548933, "grad_norm": 0.21237953007221222, "learning_rate": 2.229880179760681e-05, "loss": 1.5499, "step": 1200 }, { "epoch": 0.665927363459939, "grad_norm": 0.21513739228248596, "learning_rate": 2.2232660846690477e-05, "loss": 1.5859, "step": 1201 }, { "epoch": 0.6664818408649847, "grad_norm": 0.22024357318878174, "learning_rate": 2.2166580359195594e-05, "loss": 1.6493, "step": 1202 }, { "epoch": 0.6670363182700305, "grad_norm": 0.21224506199359894, "learning_rate": 2.210056055999835e-05, "loss": 1.588, "step": 1203 }, { "epoch": 0.6675907956750763, "grad_norm": 0.21365848183631897, "learning_rate": 2.2034601673768475e-05, "loss": 1.5165, "step": 1204 }, { "epoch": 0.668145273080122, "grad_norm": 0.21693235635757446, "learning_rate": 2.1968703924968388e-05, "loss": 1.6435, "step": 1205 }, { "epoch": 0.6686997504851677, "grad_norm": 0.2145548313856125, "learning_rate": 2.1902867537852453e-05, "loss": 1.5788, "step": 1206 }, { "epoch": 0.6692542278902135, "grad_norm": 0.21968676149845123, "learning_rate": 2.1837092736466233e-05, "loss": 1.6045, "step": 1207 }, { "epoch": 0.6698087052952593, "grad_norm": 0.2109319120645523, "learning_rate": 2.1771379744645643e-05, "loss": 1.5182, "step": 1208 }, { "epoch": 0.6703631827003049, "grad_norm": 0.21400365233421326, "learning_rate": 2.170572878601636e-05, "loss": 1.5532, "step": 1209 }, { "epoch": 0.6709176601053507, "grad_norm": 0.20740005373954773, "learning_rate": 2.1640140083992908e-05, "loss": 1.5601, "step": 1210 }, { "epoch": 0.6714721375103965, "grad_norm": 0.21574941277503967, "learning_rate": 2.1574613861777904e-05, "loss": 1.5462, "step": 1211 }, { "epoch": 0.6720266149154422, "grad_norm": 0.2179066389799118, "learning_rate": 2.1509150342361407e-05, "loss": 1.6072, "step": 1212 }, { "epoch": 0.6725810923204879, "grad_norm": 0.21342717111110687, "learning_rate": 2.144374974852007e-05, "loss": 1.6028, "step": 1213 }, { "epoch": 0.6731355697255337, "grad_norm": 0.21398906409740448, "learning_rate": 2.1378412302816408e-05, "loss": 1.595, "step": 1214 }, { "epoch": 0.6736900471305795, "grad_norm": 0.20966409146785736, "learning_rate": 2.1313138227598053e-05, "loss": 1.5391, "step": 1215 }, { "epoch": 0.6742445245356252, "grad_norm": 0.20586159825325012, "learning_rate": 2.1247927744996913e-05, "loss": 1.5392, "step": 1216 }, { "epoch": 0.6747990019406709, "grad_norm": 0.21044990420341492, "learning_rate": 2.11827810769286e-05, "loss": 1.6125, "step": 1217 }, { "epoch": 0.6753534793457167, "grad_norm": 0.22008857131004333, "learning_rate": 2.111769844509149e-05, "loss": 1.5878, "step": 1218 }, { "epoch": 0.6759079567507624, "grad_norm": 0.21583212912082672, "learning_rate": 2.105268007096603e-05, "loss": 1.5553, "step": 1219 }, { "epoch": 0.6764624341558082, "grad_norm": 0.20590542256832123, "learning_rate": 2.0987726175814025e-05, "loss": 1.5257, "step": 1220 }, { "epoch": 0.6770169115608539, "grad_norm": 0.21506734192371368, "learning_rate": 2.0922836980677844e-05, "loss": 1.6258, "step": 1221 }, { "epoch": 0.6775713889658996, "grad_norm": 0.21825283765792847, "learning_rate": 2.085801270637968e-05, "loss": 1.5429, "step": 1222 }, { "epoch": 0.6781258663709454, "grad_norm": 0.2100907862186432, "learning_rate": 2.0793253573520785e-05, "loss": 1.5905, "step": 1223 }, { "epoch": 0.6786803437759912, "grad_norm": 0.21609370410442352, "learning_rate": 2.0728559802480754e-05, "loss": 1.6102, "step": 1224 }, { "epoch": 0.6792348211810368, "grad_norm": 0.21235951781272888, "learning_rate": 2.0663931613416746e-05, "loss": 1.4997, "step": 1225 }, { "epoch": 0.6797892985860826, "grad_norm": 0.20869648456573486, "learning_rate": 2.0599369226262693e-05, "loss": 1.5351, "step": 1226 }, { "epoch": 0.6803437759911284, "grad_norm": 0.21568715572357178, "learning_rate": 2.053487286072865e-05, "loss": 1.5294, "step": 1227 }, { "epoch": 0.6808982533961742, "grad_norm": 0.21656538546085358, "learning_rate": 2.0470442736300013e-05, "loss": 1.5462, "step": 1228 }, { "epoch": 0.6814527308012198, "grad_norm": 0.21754758059978485, "learning_rate": 2.0406079072236684e-05, "loss": 1.5732, "step": 1229 }, { "epoch": 0.6820072082062656, "grad_norm": 0.21004228293895721, "learning_rate": 2.0341782087572453e-05, "loss": 1.5723, "step": 1230 }, { "epoch": 0.6825616856113114, "grad_norm": 0.22100019454956055, "learning_rate": 2.0277552001114183e-05, "loss": 1.5375, "step": 1231 }, { "epoch": 0.6831161630163571, "grad_norm": 0.21162226796150208, "learning_rate": 2.0213389031441072e-05, "loss": 1.5535, "step": 1232 }, { "epoch": 0.6836706404214028, "grad_norm": 0.21510981023311615, "learning_rate": 2.0149293396903936e-05, "loss": 1.5419, "step": 1233 }, { "epoch": 0.6842251178264486, "grad_norm": 0.22378797829151154, "learning_rate": 2.0085265315624375e-05, "loss": 1.5729, "step": 1234 }, { "epoch": 0.6847795952314943, "grad_norm": 0.2192632257938385, "learning_rate": 2.002130500549422e-05, "loss": 1.4287, "step": 1235 }, { "epoch": 0.6853340726365401, "grad_norm": 0.22077576816082, "learning_rate": 1.995741268417455e-05, "loss": 1.5635, "step": 1236 }, { "epoch": 0.6858885500415858, "grad_norm": 0.22820354998111725, "learning_rate": 1.9893588569095148e-05, "loss": 1.5514, "step": 1237 }, { "epoch": 0.6864430274466315, "grad_norm": 0.22799494862556458, "learning_rate": 1.9829832877453673e-05, "loss": 1.6064, "step": 1238 }, { "epoch": 0.6869975048516773, "grad_norm": 0.21330177783966064, "learning_rate": 1.976614582621492e-05, "loss": 1.5972, "step": 1239 }, { "epoch": 0.6875519822567231, "grad_norm": 0.22794507443904877, "learning_rate": 1.9702527632110128e-05, "loss": 1.5362, "step": 1240 }, { "epoch": 0.6881064596617688, "grad_norm": 0.2161223590373993, "learning_rate": 1.9638978511636133e-05, "loss": 1.5349, "step": 1241 }, { "epoch": 0.6886609370668145, "grad_norm": 0.21102319657802582, "learning_rate": 1.9575498681054816e-05, "loss": 1.5777, "step": 1242 }, { "epoch": 0.6892154144718603, "grad_norm": 0.2153400331735611, "learning_rate": 1.9512088356392206e-05, "loss": 1.5413, "step": 1243 }, { "epoch": 0.689769891876906, "grad_norm": 0.23227231204509735, "learning_rate": 1.9448747753437766e-05, "loss": 1.5721, "step": 1244 }, { "epoch": 0.6903243692819517, "grad_norm": 0.21719689667224884, "learning_rate": 1.938547708774373e-05, "loss": 1.5343, "step": 1245 }, { "epoch": 0.6908788466869975, "grad_norm": 0.20928537845611572, "learning_rate": 1.9322276574624374e-05, "loss": 1.5005, "step": 1246 }, { "epoch": 0.6914333240920433, "grad_norm": 0.23064006865024567, "learning_rate": 1.925914642915514e-05, "loss": 1.5943, "step": 1247 }, { "epoch": 0.6919878014970889, "grad_norm": 0.22050850093364716, "learning_rate": 1.919608686617208e-05, "loss": 1.5496, "step": 1248 }, { "epoch": 0.6925422789021347, "grad_norm": 0.22092626988887787, "learning_rate": 1.9133098100271018e-05, "loss": 1.6028, "step": 1249 }, { "epoch": 0.6930967563071805, "grad_norm": 0.23645174503326416, "learning_rate": 1.9070180345806867e-05, "loss": 1.5723, "step": 1250 }, { "epoch": 0.6936512337122263, "grad_norm": 0.22588878870010376, "learning_rate": 1.9007333816892886e-05, "loss": 1.59, "step": 1251 }, { "epoch": 0.6942057111172719, "grad_norm": 0.2189507782459259, "learning_rate": 1.8944558727399894e-05, "loss": 1.5932, "step": 1252 }, { "epoch": 0.6947601885223177, "grad_norm": 0.21700263023376465, "learning_rate": 1.8881855290955702e-05, "loss": 1.5475, "step": 1253 }, { "epoch": 0.6953146659273635, "grad_norm": 0.21820667386054993, "learning_rate": 1.8819223720944176e-05, "loss": 1.534, "step": 1254 }, { "epoch": 0.6958691433324092, "grad_norm": 0.22174763679504395, "learning_rate": 1.875666423050467e-05, "loss": 1.6515, "step": 1255 }, { "epoch": 0.6964236207374549, "grad_norm": 0.22205999493598938, "learning_rate": 1.8694177032531247e-05, "loss": 1.6275, "step": 1256 }, { "epoch": 0.6969780981425007, "grad_norm": 0.21358875930309296, "learning_rate": 1.863176233967193e-05, "loss": 1.6198, "step": 1257 }, { "epoch": 0.6975325755475464, "grad_norm": 0.21394740045070648, "learning_rate": 1.856942036432804e-05, "loss": 1.5314, "step": 1258 }, { "epoch": 0.6980870529525922, "grad_norm": 0.21883390843868256, "learning_rate": 1.850715131865336e-05, "loss": 1.6133, "step": 1259 }, { "epoch": 0.6986415303576379, "grad_norm": 0.21168088912963867, "learning_rate": 1.8444955414553595e-05, "loss": 1.6031, "step": 1260 }, { "epoch": 0.6991960077626836, "grad_norm": 0.2189856618642807, "learning_rate": 1.8382832863685477e-05, "loss": 1.5858, "step": 1261 }, { "epoch": 0.6997504851677294, "grad_norm": 0.20865415036678314, "learning_rate": 1.8320783877456107e-05, "loss": 1.5458, "step": 1262 }, { "epoch": 0.7003049625727752, "grad_norm": 0.21896782517433167, "learning_rate": 1.825880866702226e-05, "loss": 1.5763, "step": 1263 }, { "epoch": 0.7008594399778209, "grad_norm": 0.2206234335899353, "learning_rate": 1.8196907443289656e-05, "loss": 1.5721, "step": 1264 }, { "epoch": 0.7014139173828666, "grad_norm": 0.217427596449852, "learning_rate": 1.813508041691222e-05, "loss": 1.6216, "step": 1265 }, { "epoch": 0.7019683947879124, "grad_norm": 0.2126351296901703, "learning_rate": 1.8073327798291387e-05, "loss": 1.5936, "step": 1266 }, { "epoch": 0.7025228721929582, "grad_norm": 0.2115585058927536, "learning_rate": 1.8011649797575343e-05, "loss": 1.5893, "step": 1267 }, { "epoch": 0.7030773495980038, "grad_norm": 0.21458736062049866, "learning_rate": 1.7950046624658418e-05, "loss": 1.4703, "step": 1268 }, { "epoch": 0.7036318270030496, "grad_norm": 0.21214710175991058, "learning_rate": 1.788851848918022e-05, "loss": 1.6151, "step": 1269 }, { "epoch": 0.7041863044080954, "grad_norm": 0.21619988977909088, "learning_rate": 1.7827065600525043e-05, "loss": 1.4581, "step": 1270 }, { "epoch": 0.7047407818131411, "grad_norm": 0.21274301409721375, "learning_rate": 1.77656881678211e-05, "loss": 1.5052, "step": 1271 }, { "epoch": 0.7052952592181868, "grad_norm": 0.2131747454404831, "learning_rate": 1.7704386399939818e-05, "loss": 1.5616, "step": 1272 }, { "epoch": 0.7058497366232326, "grad_norm": 0.22459973394870758, "learning_rate": 1.7643160505495146e-05, "loss": 1.6162, "step": 1273 }, { "epoch": 0.7064042140282784, "grad_norm": 0.21642784774303436, "learning_rate": 1.7582010692842823e-05, "loss": 1.5565, "step": 1274 }, { "epoch": 0.7069586914333241, "grad_norm": 0.22068682312965393, "learning_rate": 1.7520937170079667e-05, "loss": 1.5865, "step": 1275 }, { "epoch": 0.7075131688383698, "grad_norm": 0.2210376113653183, "learning_rate": 1.7459940145042904e-05, "loss": 1.5605, "step": 1276 }, { "epoch": 0.7080676462434156, "grad_norm": 0.21524764597415924, "learning_rate": 1.7399019825309387e-05, "loss": 1.568, "step": 1277 }, { "epoch": 0.7086221236484613, "grad_norm": 0.20720575749874115, "learning_rate": 1.733817641819496e-05, "loss": 1.5723, "step": 1278 }, { "epoch": 0.7091766010535071, "grad_norm": 0.22238507866859436, "learning_rate": 1.7277410130753775e-05, "loss": 1.5773, "step": 1279 }, { "epoch": 0.7097310784585528, "grad_norm": 0.20956286787986755, "learning_rate": 1.7216721169777452e-05, "loss": 1.5252, "step": 1280 }, { "epoch": 0.7102855558635985, "grad_norm": 0.21438319981098175, "learning_rate": 1.7156109741794533e-05, "loss": 1.4835, "step": 1281 }, { "epoch": 0.7108400332686443, "grad_norm": 0.21574389934539795, "learning_rate": 1.709557605306967e-05, "loss": 1.588, "step": 1282 }, { "epoch": 0.7113945106736901, "grad_norm": 0.2138158082962036, "learning_rate": 1.7035120309602994e-05, "loss": 1.5567, "step": 1283 }, { "epoch": 0.7119489880787357, "grad_norm": 0.21723328530788422, "learning_rate": 1.6974742717129373e-05, "loss": 1.5732, "step": 1284 }, { "epoch": 0.7125034654837815, "grad_norm": 0.2203451544046402, "learning_rate": 1.6914443481117678e-05, "loss": 1.6133, "step": 1285 }, { "epoch": 0.7130579428888273, "grad_norm": 0.20896215736865997, "learning_rate": 1.6854222806770228e-05, "loss": 1.5527, "step": 1286 }, { "epoch": 0.7136124202938731, "grad_norm": 0.20967014133930206, "learning_rate": 1.679408089902188e-05, "loss": 1.616, "step": 1287 }, { "epoch": 0.7141668976989187, "grad_norm": 0.21301911771297455, "learning_rate": 1.673401796253952e-05, "loss": 1.5695, "step": 1288 }, { "epoch": 0.7147213751039645, "grad_norm": 0.22176018357276917, "learning_rate": 1.667403420172125e-05, "loss": 1.5707, "step": 1289 }, { "epoch": 0.7152758525090103, "grad_norm": 0.20806194841861725, "learning_rate": 1.6614129820695755e-05, "loss": 1.547, "step": 1290 }, { "epoch": 0.715830329914056, "grad_norm": 0.21885931491851807, "learning_rate": 1.6554305023321587e-05, "loss": 1.5618, "step": 1291 }, { "epoch": 0.7163848073191017, "grad_norm": 0.21963781118392944, "learning_rate": 1.6494560013186413e-05, "loss": 1.5697, "step": 1292 }, { "epoch": 0.7169392847241475, "grad_norm": 0.20642498135566711, "learning_rate": 1.6434894993606474e-05, "loss": 1.5343, "step": 1293 }, { "epoch": 0.7174937621291932, "grad_norm": 0.21944020688533783, "learning_rate": 1.6375310167625736e-05, "loss": 1.589, "step": 1294 }, { "epoch": 0.718048239534239, "grad_norm": 0.20910033583641052, "learning_rate": 1.631580573801526e-05, "loss": 1.5539, "step": 1295 }, { "epoch": 0.7186027169392847, "grad_norm": 0.21879972517490387, "learning_rate": 1.625638190727253e-05, "loss": 1.557, "step": 1296 }, { "epoch": 0.7191571943443305, "grad_norm": 0.21549879014492035, "learning_rate": 1.6197038877620745e-05, "loss": 1.5955, "step": 1297 }, { "epoch": 0.7197116717493762, "grad_norm": 0.21491822600364685, "learning_rate": 1.6137776851008135e-05, "loss": 1.5934, "step": 1298 }, { "epoch": 0.720266149154422, "grad_norm": 0.21684475243091583, "learning_rate": 1.607859602910726e-05, "loss": 1.5928, "step": 1299 }, { "epoch": 0.7208206265594677, "grad_norm": 0.2135036587715149, "learning_rate": 1.601949661331434e-05, "loss": 1.5901, "step": 1300 }, { "epoch": 0.7213751039645134, "grad_norm": 0.21174384653568268, "learning_rate": 1.596047880474859e-05, "loss": 1.5478, "step": 1301 }, { "epoch": 0.7219295813695592, "grad_norm": 0.21973606944084167, "learning_rate": 1.5901542804251446e-05, "loss": 1.5873, "step": 1302 }, { "epoch": 0.722484058774605, "grad_norm": 0.21199335157871246, "learning_rate": 1.5842688812385997e-05, "loss": 1.5681, "step": 1303 }, { "epoch": 0.7230385361796506, "grad_norm": 0.20926666259765625, "learning_rate": 1.578391702943628e-05, "loss": 1.5635, "step": 1304 }, { "epoch": 0.7235930135846964, "grad_norm": 0.20591633021831512, "learning_rate": 1.5725227655406485e-05, "loss": 1.5811, "step": 1305 }, { "epoch": 0.7241474909897422, "grad_norm": 0.21061578392982483, "learning_rate": 1.5666620890020423e-05, "loss": 1.5401, "step": 1306 }, { "epoch": 0.724701968394788, "grad_norm": 0.21330875158309937, "learning_rate": 1.5608096932720758e-05, "loss": 1.5631, "step": 1307 }, { "epoch": 0.7252564457998336, "grad_norm": 0.21160577237606049, "learning_rate": 1.5549655982668365e-05, "loss": 1.5541, "step": 1308 }, { "epoch": 0.7258109232048794, "grad_norm": 0.21944500505924225, "learning_rate": 1.549129823874164e-05, "loss": 1.5407, "step": 1309 }, { "epoch": 0.7263654006099252, "grad_norm": 0.21115024387836456, "learning_rate": 1.543302389953578e-05, "loss": 1.5101, "step": 1310 }, { "epoch": 0.7269198780149709, "grad_norm": 0.21656592190265656, "learning_rate": 1.5374833163362228e-05, "loss": 1.5505, "step": 1311 }, { "epoch": 0.7274743554200166, "grad_norm": 0.20774325728416443, "learning_rate": 1.5316726228247874e-05, "loss": 1.5188, "step": 1312 }, { "epoch": 0.7280288328250624, "grad_norm": 0.22355897724628448, "learning_rate": 1.525870329193441e-05, "loss": 1.5638, "step": 1313 }, { "epoch": 0.7285833102301081, "grad_norm": 0.20786559581756592, "learning_rate": 1.5200764551877707e-05, "loss": 1.5139, "step": 1314 }, { "epoch": 0.7291377876351539, "grad_norm": 0.21378779411315918, "learning_rate": 1.51429102052471e-05, "loss": 1.5272, "step": 1315 }, { "epoch": 0.7296922650401996, "grad_norm": 0.2113804817199707, "learning_rate": 1.5085140448924738e-05, "loss": 1.5503, "step": 1316 }, { "epoch": 0.7302467424452453, "grad_norm": 0.2093944549560547, "learning_rate": 1.5027455479504896e-05, "loss": 1.5027, "step": 1317 }, { "epoch": 0.7308012198502911, "grad_norm": 0.2111719399690628, "learning_rate": 1.4969855493293275e-05, "loss": 1.4586, "step": 1318 }, { "epoch": 0.7313556972553369, "grad_norm": 0.21029268205165863, "learning_rate": 1.491234068630646e-05, "loss": 1.6253, "step": 1319 }, { "epoch": 0.7319101746603826, "grad_norm": 0.2066301554441452, "learning_rate": 1.4854911254271086e-05, "loss": 1.4414, "step": 1320 }, { "epoch": 0.7324646520654283, "grad_norm": 0.2162986844778061, "learning_rate": 1.4797567392623275e-05, "loss": 1.5728, "step": 1321 }, { "epoch": 0.7330191294704741, "grad_norm": 0.21040524542331696, "learning_rate": 1.4740309296507969e-05, "loss": 1.5839, "step": 1322 }, { "epoch": 0.7335736068755199, "grad_norm": 0.22399760782718658, "learning_rate": 1.4683137160778218e-05, "loss": 1.6573, "step": 1323 }, { "epoch": 0.7341280842805655, "grad_norm": 0.20881177484989166, "learning_rate": 1.4626051179994569e-05, "loss": 1.5418, "step": 1324 }, { "epoch": 0.7346825616856113, "grad_norm": 0.21787725389003754, "learning_rate": 1.4569051548424323e-05, "loss": 1.6005, "step": 1325 }, { "epoch": 0.7352370390906571, "grad_norm": 0.2111303210258484, "learning_rate": 1.4512138460041004e-05, "loss": 1.5171, "step": 1326 }, { "epoch": 0.7357915164957028, "grad_norm": 0.20768657326698303, "learning_rate": 1.4455312108523587e-05, "loss": 1.5723, "step": 1327 }, { "epoch": 0.7363459939007485, "grad_norm": 0.20773515105247498, "learning_rate": 1.4398572687255858e-05, "loss": 1.5349, "step": 1328 }, { "epoch": 0.7369004713057943, "grad_norm": 0.20800316333770752, "learning_rate": 1.43419203893258e-05, "loss": 1.5316, "step": 1329 }, { "epoch": 0.73745494871084, "grad_norm": 0.21494507789611816, "learning_rate": 1.42853554075249e-05, "loss": 1.5631, "step": 1330 }, { "epoch": 0.7380094261158858, "grad_norm": 0.2120676040649414, "learning_rate": 1.4228877934347506e-05, "loss": 1.5709, "step": 1331 }, { "epoch": 0.7385639035209315, "grad_norm": 0.20988710224628448, "learning_rate": 1.4172488161990168e-05, "loss": 1.5334, "step": 1332 }, { "epoch": 0.7391183809259773, "grad_norm": 0.21031689643859863, "learning_rate": 1.4116186282350981e-05, "loss": 1.5124, "step": 1333 }, { "epoch": 0.739672858331023, "grad_norm": 0.21639370918273926, "learning_rate": 1.4059972487028936e-05, "loss": 1.567, "step": 1334 }, { "epoch": 0.7402273357360688, "grad_norm": 0.21508349478244781, "learning_rate": 1.4003846967323283e-05, "loss": 1.5132, "step": 1335 }, { "epoch": 0.7407818131411145, "grad_norm": 0.21374095976352692, "learning_rate": 1.3947809914232808e-05, "loss": 1.555, "step": 1336 }, { "epoch": 0.7413362905461602, "grad_norm": 0.22325333952903748, "learning_rate": 1.3891861518455342e-05, "loss": 1.5703, "step": 1337 }, { "epoch": 0.741890767951206, "grad_norm": 0.22245821356773376, "learning_rate": 1.3836001970386898e-05, "loss": 1.5424, "step": 1338 }, { "epoch": 0.7424452453562518, "grad_norm": 0.2141106277704239, "learning_rate": 1.3780231460121204e-05, "loss": 1.601, "step": 1339 }, { "epoch": 0.7429997227612974, "grad_norm": 0.21222813427448273, "learning_rate": 1.3724550177448968e-05, "loss": 1.5908, "step": 1340 }, { "epoch": 0.7435542001663432, "grad_norm": 0.21440914273262024, "learning_rate": 1.3668958311857247e-05, "loss": 1.6119, "step": 1341 }, { "epoch": 0.744108677571389, "grad_norm": 0.21355807781219482, "learning_rate": 1.3613456052528822e-05, "loss": 1.5494, "step": 1342 }, { "epoch": 0.7446631549764348, "grad_norm": 0.22582007944583893, "learning_rate": 1.3558043588341488e-05, "loss": 1.6013, "step": 1343 }, { "epoch": 0.7452176323814804, "grad_norm": 0.22058305144309998, "learning_rate": 1.3502721107867536e-05, "loss": 1.6472, "step": 1344 }, { "epoch": 0.7457721097865262, "grad_norm": 0.21767617762088776, "learning_rate": 1.3447488799372997e-05, "loss": 1.5594, "step": 1345 }, { "epoch": 0.746326587191572, "grad_norm": 0.21455064415931702, "learning_rate": 1.339234685081702e-05, "loss": 1.4785, "step": 1346 }, { "epoch": 0.7468810645966177, "grad_norm": 0.2150326520204544, "learning_rate": 1.3337295449851282e-05, "loss": 1.5622, "step": 1347 }, { "epoch": 0.7474355420016634, "grad_norm": 0.21228265762329102, "learning_rate": 1.328233478381932e-05, "loss": 1.5579, "step": 1348 }, { "epoch": 0.7479900194067092, "grad_norm": 0.21863175928592682, "learning_rate": 1.3227465039755889e-05, "loss": 1.5563, "step": 1349 }, { "epoch": 0.748544496811755, "grad_norm": 0.22516675293445587, "learning_rate": 1.3172686404386323e-05, "loss": 1.5843, "step": 1350 }, { "epoch": 0.7490989742168007, "grad_norm": 0.21071338653564453, "learning_rate": 1.3117999064125923e-05, "loss": 1.4893, "step": 1351 }, { "epoch": 0.7496534516218464, "grad_norm": 0.21058571338653564, "learning_rate": 1.3063403205079302e-05, "loss": 1.4929, "step": 1352 }, { "epoch": 0.7502079290268922, "grad_norm": 0.2110683172941208, "learning_rate": 1.300889901303973e-05, "loss": 1.5716, "step": 1353 }, { "epoch": 0.7507624064319379, "grad_norm": 0.22310392558574677, "learning_rate": 1.2954486673488554e-05, "loss": 1.5591, "step": 1354 }, { "epoch": 0.7513168838369837, "grad_norm": 0.21150308847427368, "learning_rate": 1.290016637159457e-05, "loss": 1.5552, "step": 1355 }, { "epoch": 0.7518713612420294, "grad_norm": 0.2172410786151886, "learning_rate": 1.2845938292213296e-05, "loss": 1.6018, "step": 1356 }, { "epoch": 0.7524258386470751, "grad_norm": 0.21438592672348022, "learning_rate": 1.2791802619886457e-05, "loss": 1.5254, "step": 1357 }, { "epoch": 0.7529803160521209, "grad_norm": 0.2097012847661972, "learning_rate": 1.2737759538841297e-05, "loss": 1.4957, "step": 1358 }, { "epoch": 0.7535347934571667, "grad_norm": 0.22424669563770294, "learning_rate": 1.2683809232989975e-05, "loss": 1.5994, "step": 1359 }, { "epoch": 0.7540892708622123, "grad_norm": 0.23129421472549438, "learning_rate": 1.2629951885928931e-05, "loss": 1.6463, "step": 1360 }, { "epoch": 0.7546437482672581, "grad_norm": 0.2219121903181076, "learning_rate": 1.2576187680938214e-05, "loss": 1.641, "step": 1361 }, { "epoch": 0.7551982256723039, "grad_norm": 0.21605202555656433, "learning_rate": 1.2522516800980995e-05, "loss": 1.6419, "step": 1362 }, { "epoch": 0.7557527030773497, "grad_norm": 0.2168269157409668, "learning_rate": 1.2468939428702762e-05, "loss": 1.5664, "step": 1363 }, { "epoch": 0.7563071804823953, "grad_norm": 0.21760636568069458, "learning_rate": 1.2415455746430846e-05, "loss": 1.5758, "step": 1364 }, { "epoch": 0.7568616578874411, "grad_norm": 0.2120659053325653, "learning_rate": 1.2362065936173728e-05, "loss": 1.584, "step": 1365 }, { "epoch": 0.7574161352924869, "grad_norm": 0.20472942292690277, "learning_rate": 1.230877017962043e-05, "loss": 1.5304, "step": 1366 }, { "epoch": 0.7579706126975326, "grad_norm": 0.21528665721416473, "learning_rate": 1.2255568658139918e-05, "loss": 1.6002, "step": 1367 }, { "epoch": 0.7585250901025783, "grad_norm": 0.21428853273391724, "learning_rate": 1.2202461552780473e-05, "loss": 1.6109, "step": 1368 }, { "epoch": 0.7590795675076241, "grad_norm": 0.21304574608802795, "learning_rate": 1.214944904426902e-05, "loss": 1.5883, "step": 1369 }, { "epoch": 0.7596340449126698, "grad_norm": 0.22913344204425812, "learning_rate": 1.209653131301066e-05, "loss": 1.6736, "step": 1370 }, { "epoch": 0.7601885223177155, "grad_norm": 0.2191346287727356, "learning_rate": 1.2043708539087865e-05, "loss": 1.5518, "step": 1371 }, { "epoch": 0.7607429997227613, "grad_norm": 0.21301528811454773, "learning_rate": 1.1990980902260008e-05, "loss": 1.5366, "step": 1372 }, { "epoch": 0.761297477127807, "grad_norm": 0.21601028740406036, "learning_rate": 1.1938348581962713e-05, "loss": 1.5593, "step": 1373 }, { "epoch": 0.7618519545328528, "grad_norm": 0.21673454344272614, "learning_rate": 1.1885811757307209e-05, "loss": 1.589, "step": 1374 }, { "epoch": 0.7624064319378985, "grad_norm": 0.20797033607959747, "learning_rate": 1.1833370607079778e-05, "loss": 1.5379, "step": 1375 }, { "epoch": 0.7629609093429442, "grad_norm": 0.22476620972156525, "learning_rate": 1.1781025309741056e-05, "loss": 1.5536, "step": 1376 }, { "epoch": 0.76351538674799, "grad_norm": 0.2215532660484314, "learning_rate": 1.1728776043425563e-05, "loss": 1.5604, "step": 1377 }, { "epoch": 0.7640698641530358, "grad_norm": 0.214621901512146, "learning_rate": 1.1676622985940983e-05, "loss": 1.5223, "step": 1378 }, { "epoch": 0.7646243415580815, "grad_norm": 0.2120085209608078, "learning_rate": 1.1624566314767573e-05, "loss": 1.5189, "step": 1379 }, { "epoch": 0.7651788189631272, "grad_norm": 0.21431514620780945, "learning_rate": 1.1572606207057607e-05, "loss": 1.5565, "step": 1380 }, { "epoch": 0.765733296368173, "grad_norm": 0.21584293246269226, "learning_rate": 1.152074283963475e-05, "loss": 1.5717, "step": 1381 }, { "epoch": 0.7662877737732188, "grad_norm": 0.22375768423080444, "learning_rate": 1.1468976388993438e-05, "loss": 1.6198, "step": 1382 }, { "epoch": 0.7668422511782644, "grad_norm": 0.22090241312980652, "learning_rate": 1.1417307031298304e-05, "loss": 1.5913, "step": 1383 }, { "epoch": 0.7673967285833102, "grad_norm": 0.20872661471366882, "learning_rate": 1.1365734942383565e-05, "loss": 1.5244, "step": 1384 }, { "epoch": 0.767951205988356, "grad_norm": 0.218302920460701, "learning_rate": 1.131426029775244e-05, "loss": 1.5787, "step": 1385 }, { "epoch": 0.7685056833934018, "grad_norm": 0.21635451912879944, "learning_rate": 1.1262883272576492e-05, "loss": 1.5958, "step": 1386 }, { "epoch": 0.7690601607984474, "grad_norm": 0.21018511056900024, "learning_rate": 1.1211604041695114e-05, "loss": 1.5487, "step": 1387 }, { "epoch": 0.7696146382034932, "grad_norm": 0.23798473179340363, "learning_rate": 1.1160422779614928e-05, "loss": 1.6402, "step": 1388 }, { "epoch": 0.770169115608539, "grad_norm": 0.21271711587905884, "learning_rate": 1.1109339660509098e-05, "loss": 1.5796, "step": 1389 }, { "epoch": 0.7707235930135847, "grad_norm": 0.20341956615447998, "learning_rate": 1.1058354858216842e-05, "loss": 1.5225, "step": 1390 }, { "epoch": 0.7712780704186304, "grad_norm": 0.23549644649028778, "learning_rate": 1.1007468546242786e-05, "loss": 1.5797, "step": 1391 }, { "epoch": 0.7718325478236762, "grad_norm": 0.22186161577701569, "learning_rate": 1.0956680897756394e-05, "loss": 1.5828, "step": 1392 }, { "epoch": 0.7723870252287219, "grad_norm": 0.22768884897232056, "learning_rate": 1.0905992085591373e-05, "loss": 1.5781, "step": 1393 }, { "epoch": 0.7729415026337677, "grad_norm": 0.20948225259780884, "learning_rate": 1.0855402282245047e-05, "loss": 1.5476, "step": 1394 }, { "epoch": 0.7734959800388134, "grad_norm": 0.21078969538211823, "learning_rate": 1.0804911659877874e-05, "loss": 1.4978, "step": 1395 }, { "epoch": 0.7740504574438591, "grad_norm": 0.22138863801956177, "learning_rate": 1.0754520390312755e-05, "loss": 1.5183, "step": 1396 }, { "epoch": 0.7746049348489049, "grad_norm": 0.2178879678249359, "learning_rate": 1.0704228645034464e-05, "loss": 1.5743, "step": 1397 }, { "epoch": 0.7751594122539507, "grad_norm": 0.20594577491283417, "learning_rate": 1.0654036595189124e-05, "loss": 1.4646, "step": 1398 }, { "epoch": 0.7757138896589963, "grad_norm": 0.21161887049674988, "learning_rate": 1.0603944411583576e-05, "loss": 1.5743, "step": 1399 }, { "epoch": 0.7762683670640421, "grad_norm": 0.21460282802581787, "learning_rate": 1.0553952264684804e-05, "loss": 1.628, "step": 1400 }, { "epoch": 0.7768228444690879, "grad_norm": 0.2193065732717514, "learning_rate": 1.0504060324619374e-05, "loss": 1.5382, "step": 1401 }, { "epoch": 0.7773773218741337, "grad_norm": 0.21738368272781372, "learning_rate": 1.0454268761172824e-05, "loss": 1.5957, "step": 1402 }, { "epoch": 0.7779317992791793, "grad_norm": 0.21127985417842865, "learning_rate": 1.040457774378913e-05, "loss": 1.5551, "step": 1403 }, { "epoch": 0.7784862766842251, "grad_norm": 0.20940497517585754, "learning_rate": 1.0354987441570064e-05, "loss": 1.5498, "step": 1404 }, { "epoch": 0.7790407540892709, "grad_norm": 0.21508832275867462, "learning_rate": 1.0305498023274674e-05, "loss": 1.6172, "step": 1405 }, { "epoch": 0.7795952314943166, "grad_norm": 0.21197104454040527, "learning_rate": 1.025610965731874e-05, "loss": 1.5321, "step": 1406 }, { "epoch": 0.7801497088993623, "grad_norm": 0.21619556844234467, "learning_rate": 1.0206822511774073e-05, "loss": 1.5089, "step": 1407 }, { "epoch": 0.7807041863044081, "grad_norm": 0.2153014987707138, "learning_rate": 1.015763675436808e-05, "loss": 1.5755, "step": 1408 }, { "epoch": 0.7812586637094538, "grad_norm": 0.20888642966747284, "learning_rate": 1.0108552552483122e-05, "loss": 1.5995, "step": 1409 }, { "epoch": 0.7818131411144996, "grad_norm": 0.2115897834300995, "learning_rate": 1.0059570073155953e-05, "loss": 1.5222, "step": 1410 }, { "epoch": 0.7823676185195453, "grad_norm": 0.20816390216350555, "learning_rate": 1.0010689483077187e-05, "loss": 1.517, "step": 1411 }, { "epoch": 0.782922095924591, "grad_norm": 0.21498888731002808, "learning_rate": 9.961910948590643e-06, "loss": 1.5944, "step": 1412 }, { "epoch": 0.7834765733296368, "grad_norm": 0.2085815966129303, "learning_rate": 9.91323463569292e-06, "loss": 1.4606, "step": 1413 }, { "epoch": 0.7840310507346826, "grad_norm": 0.21854795515537262, "learning_rate": 9.864660710032669e-06, "loss": 1.6048, "step": 1414 }, { "epoch": 0.7845855281397283, "grad_norm": 0.21158936619758606, "learning_rate": 9.816189336910166e-06, "loss": 1.5845, "step": 1415 }, { "epoch": 0.785140005544774, "grad_norm": 0.21523204445838928, "learning_rate": 9.767820681276671e-06, "loss": 1.5289, "step": 1416 }, { "epoch": 0.7856944829498198, "grad_norm": 0.2180699110031128, "learning_rate": 9.719554907733895e-06, "loss": 1.5658, "step": 1417 }, { "epoch": 0.7862489603548656, "grad_norm": 0.21391287446022034, "learning_rate": 9.671392180533443e-06, "loss": 1.579, "step": 1418 }, { "epoch": 0.7868034377599112, "grad_norm": 0.20858129858970642, "learning_rate": 9.62333266357622e-06, "loss": 1.5127, "step": 1419 }, { "epoch": 0.787357915164957, "grad_norm": 0.2106478065252304, "learning_rate": 9.575376520411907e-06, "loss": 1.5466, "step": 1420 }, { "epoch": 0.7879123925700028, "grad_norm": 0.20766615867614746, "learning_rate": 9.527523914238452e-06, "loss": 1.5704, "step": 1421 }, { "epoch": 0.7884668699750486, "grad_norm": 0.21175894141197205, "learning_rate": 9.479775007901378e-06, "loss": 1.5494, "step": 1422 }, { "epoch": 0.7890213473800942, "grad_norm": 0.2133675217628479, "learning_rate": 9.43212996389335e-06, "loss": 1.5659, "step": 1423 }, { "epoch": 0.78957582478514, "grad_norm": 0.2181132733821869, "learning_rate": 9.384588944353594e-06, "loss": 1.6126, "step": 1424 }, { "epoch": 0.7901303021901858, "grad_norm": 0.22013451159000397, "learning_rate": 9.337152111067312e-06, "loss": 1.5657, "step": 1425 }, { "epoch": 0.7906847795952315, "grad_norm": 0.22332167625427246, "learning_rate": 9.28981962546517e-06, "loss": 1.5943, "step": 1426 }, { "epoch": 0.7912392570002772, "grad_norm": 0.20821382105350494, "learning_rate": 9.242591648622698e-06, "loss": 1.5187, "step": 1427 }, { "epoch": 0.791793734405323, "grad_norm": 0.2182924449443817, "learning_rate": 9.19546834125983e-06, "loss": 1.585, "step": 1428 }, { "epoch": 0.7923482118103687, "grad_norm": 0.20459789037704468, "learning_rate": 9.148449863740279e-06, "loss": 1.52, "step": 1429 }, { "epoch": 0.7929026892154145, "grad_norm": 0.218237042427063, "learning_rate": 9.101536376070981e-06, "loss": 1.5027, "step": 1430 }, { "epoch": 0.7934571666204602, "grad_norm": 0.2164810746908188, "learning_rate": 9.05472803790163e-06, "loss": 1.5413, "step": 1431 }, { "epoch": 0.794011644025506, "grad_norm": 0.21726112067699432, "learning_rate": 9.008025008524068e-06, "loss": 1.5963, "step": 1432 }, { "epoch": 0.7945661214305517, "grad_norm": 0.22692659497261047, "learning_rate": 8.96142744687178e-06, "loss": 1.5632, "step": 1433 }, { "epoch": 0.7951205988355975, "grad_norm": 0.21242159605026245, "learning_rate": 8.914935511519313e-06, "loss": 1.5366, "step": 1434 }, { "epoch": 0.7956750762406432, "grad_norm": 0.214253231883049, "learning_rate": 8.868549360681791e-06, "loss": 1.5982, "step": 1435 }, { "epoch": 0.7962295536456889, "grad_norm": 0.21711012721061707, "learning_rate": 8.822269152214327e-06, "loss": 1.5109, "step": 1436 }, { "epoch": 0.7967840310507347, "grad_norm": 0.22008074820041656, "learning_rate": 8.776095043611494e-06, "loss": 1.5869, "step": 1437 }, { "epoch": 0.7973385084557805, "grad_norm": 0.21308183670043945, "learning_rate": 8.730027192006809e-06, "loss": 1.5816, "step": 1438 }, { "epoch": 0.7978929858608261, "grad_norm": 0.21756571531295776, "learning_rate": 8.684065754172235e-06, "loss": 1.5787, "step": 1439 }, { "epoch": 0.7984474632658719, "grad_norm": 0.21786224842071533, "learning_rate": 8.638210886517524e-06, "loss": 1.5704, "step": 1440 }, { "epoch": 0.7990019406709177, "grad_norm": 0.21699659526348114, "learning_rate": 8.592462745089815e-06, "loss": 1.5663, "step": 1441 }, { "epoch": 0.7995564180759634, "grad_norm": 0.21736855804920197, "learning_rate": 8.546821485573033e-06, "loss": 1.5873, "step": 1442 }, { "epoch": 0.8001108954810091, "grad_norm": 0.20840026438236237, "learning_rate": 8.501287263287383e-06, "loss": 1.543, "step": 1443 }, { "epoch": 0.8006653728860549, "grad_norm": 0.21741265058517456, "learning_rate": 8.45586023318882e-06, "loss": 1.5909, "step": 1444 }, { "epoch": 0.8012198502911007, "grad_norm": 0.20903275907039642, "learning_rate": 8.410540549868478e-06, "loss": 1.5251, "step": 1445 }, { "epoch": 0.8017743276961464, "grad_norm": 0.21058255434036255, "learning_rate": 8.365328367552257e-06, "loss": 1.5511, "step": 1446 }, { "epoch": 0.8023288051011921, "grad_norm": 0.21151955425739288, "learning_rate": 8.320223840100152e-06, "loss": 1.6012, "step": 1447 }, { "epoch": 0.8028832825062379, "grad_norm": 0.2185509204864502, "learning_rate": 8.27522712100584e-06, "loss": 1.5945, "step": 1448 }, { "epoch": 0.8034377599112836, "grad_norm": 0.22160445153713226, "learning_rate": 8.230338363396107e-06, "loss": 1.5107, "step": 1449 }, { "epoch": 0.8039922373163294, "grad_norm": 0.2089175581932068, "learning_rate": 8.185557720030347e-06, "loss": 1.5586, "step": 1450 }, { "epoch": 0.8045467147213751, "grad_norm": 0.20976027846336365, "learning_rate": 8.140885343300034e-06, "loss": 1.5384, "step": 1451 }, { "epoch": 0.8051011921264208, "grad_norm": 0.21759721636772156, "learning_rate": 8.09632138522817e-06, "loss": 1.5683, "step": 1452 }, { "epoch": 0.8056556695314666, "grad_norm": 0.20990976691246033, "learning_rate": 8.051865997468856e-06, "loss": 1.5553, "step": 1453 }, { "epoch": 0.8062101469365124, "grad_norm": 0.21652382612228394, "learning_rate": 8.007519331306701e-06, "loss": 1.5073, "step": 1454 }, { "epoch": 0.806764624341558, "grad_norm": 0.21122099459171295, "learning_rate": 7.9632815376563e-06, "loss": 1.5442, "step": 1455 }, { "epoch": 0.8073191017466038, "grad_norm": 0.21625083684921265, "learning_rate": 7.919152767061765e-06, "loss": 1.5177, "step": 1456 }, { "epoch": 0.8078735791516496, "grad_norm": 0.21200041472911835, "learning_rate": 7.875133169696231e-06, "loss": 1.62, "step": 1457 }, { "epoch": 0.8084280565566954, "grad_norm": 0.21699056029319763, "learning_rate": 7.83122289536125e-06, "loss": 1.6298, "step": 1458 }, { "epoch": 0.808982533961741, "grad_norm": 0.21260713040828705, "learning_rate": 7.78742209348637e-06, "loss": 1.5463, "step": 1459 }, { "epoch": 0.8095370113667868, "grad_norm": 0.21496307849884033, "learning_rate": 7.743730913128606e-06, "loss": 1.5319, "step": 1460 }, { "epoch": 0.8100914887718326, "grad_norm": 0.2123148888349533, "learning_rate": 7.700149502971901e-06, "loss": 1.5688, "step": 1461 }, { "epoch": 0.8106459661768783, "grad_norm": 0.2114924192428589, "learning_rate": 7.656678011326674e-06, "loss": 1.4625, "step": 1462 }, { "epoch": 0.811200443581924, "grad_norm": 0.21606843173503876, "learning_rate": 7.613316586129231e-06, "loss": 1.6005, "step": 1463 }, { "epoch": 0.8117549209869698, "grad_norm": 0.21045103669166565, "learning_rate": 7.570065374941386e-06, "loss": 1.5143, "step": 1464 }, { "epoch": 0.8123093983920155, "grad_norm": 0.21124376356601715, "learning_rate": 7.526924524949826e-06, "loss": 1.5344, "step": 1465 }, { "epoch": 0.8128638757970613, "grad_norm": 0.2107442021369934, "learning_rate": 7.483894182965699e-06, "loss": 1.5411, "step": 1466 }, { "epoch": 0.813418353202107, "grad_norm": 0.21770091354846954, "learning_rate": 7.440974495424087e-06, "loss": 1.6174, "step": 1467 }, { "epoch": 0.8139728306071528, "grad_norm": 0.21479737758636475, "learning_rate": 7.398165608383499e-06, "loss": 1.5734, "step": 1468 }, { "epoch": 0.8145273080121985, "grad_norm": 0.22530944645404816, "learning_rate": 7.355467667525404e-06, "loss": 1.5711, "step": 1469 }, { "epoch": 0.8150817854172443, "grad_norm": 0.2182978242635727, "learning_rate": 7.312880818153676e-06, "loss": 1.5933, "step": 1470 }, { "epoch": 0.81563626282229, "grad_norm": 0.20854482054710388, "learning_rate": 7.270405205194158e-06, "loss": 1.5411, "step": 1471 }, { "epoch": 0.8161907402273357, "grad_norm": 0.21064384281635284, "learning_rate": 7.228040973194175e-06, "loss": 1.5622, "step": 1472 }, { "epoch": 0.8167452176323815, "grad_norm": 0.2184503674507141, "learning_rate": 7.185788266321969e-06, "loss": 1.5769, "step": 1473 }, { "epoch": 0.8172996950374273, "grad_norm": 0.2059141993522644, "learning_rate": 7.143647228366277e-06, "loss": 1.5357, "step": 1474 }, { "epoch": 0.8178541724424729, "grad_norm": 0.21199941635131836, "learning_rate": 7.10161800273582e-06, "loss": 1.5796, "step": 1475 }, { "epoch": 0.8184086498475187, "grad_norm": 0.2183256447315216, "learning_rate": 7.059700732458807e-06, "loss": 1.5835, "step": 1476 }, { "epoch": 0.8189631272525645, "grad_norm": 0.21991661190986633, "learning_rate": 7.0178955601824816e-06, "loss": 1.6238, "step": 1477 }, { "epoch": 0.8195176046576103, "grad_norm": 0.2150619775056839, "learning_rate": 6.976202628172548e-06, "loss": 1.6039, "step": 1478 }, { "epoch": 0.8200720820626559, "grad_norm": 0.21119360625743866, "learning_rate": 6.934622078312836e-06, "loss": 1.5284, "step": 1479 }, { "epoch": 0.8206265594677017, "grad_norm": 0.21574904024600983, "learning_rate": 6.8931540521046536e-06, "loss": 1.5784, "step": 1480 }, { "epoch": 0.8211810368727475, "grad_norm": 0.21076872944831848, "learning_rate": 6.851798690666429e-06, "loss": 1.5671, "step": 1481 }, { "epoch": 0.8217355142777932, "grad_norm": 0.21518339216709137, "learning_rate": 6.810556134733173e-06, "loss": 1.5842, "step": 1482 }, { "epoch": 0.8222899916828389, "grad_norm": 0.21064461767673492, "learning_rate": 6.769426524656007e-06, "loss": 1.5794, "step": 1483 }, { "epoch": 0.8228444690878847, "grad_norm": 0.20996730029582977, "learning_rate": 6.728410000401693e-06, "loss": 1.5662, "step": 1484 }, { "epoch": 0.8233989464929304, "grad_norm": 0.21565823256969452, "learning_rate": 6.687506701552156e-06, "loss": 1.5227, "step": 1485 }, { "epoch": 0.8239534238979762, "grad_norm": 0.20833303034305573, "learning_rate": 6.6467167673040134e-06, "loss": 1.5601, "step": 1486 }, { "epoch": 0.8245079013030219, "grad_norm": 0.2161036729812622, "learning_rate": 6.606040336468091e-06, "loss": 1.5762, "step": 1487 }, { "epoch": 0.8250623787080676, "grad_norm": 0.20862828195095062, "learning_rate": 6.565477547468933e-06, "loss": 1.5346, "step": 1488 }, { "epoch": 0.8256168561131134, "grad_norm": 0.20955637097358704, "learning_rate": 6.52502853834438e-06, "loss": 1.5954, "step": 1489 }, { "epoch": 0.8261713335181592, "grad_norm": 0.21076586842536926, "learning_rate": 6.484693446745089e-06, "loss": 1.5716, "step": 1490 }, { "epoch": 0.8267258109232049, "grad_norm": 0.21574625372886658, "learning_rate": 6.4444724099339996e-06, "loss": 1.5426, "step": 1491 }, { "epoch": 0.8272802883282506, "grad_norm": 0.207157164812088, "learning_rate": 6.404365564785946e-06, "loss": 1.5192, "step": 1492 }, { "epoch": 0.8278347657332964, "grad_norm": 0.21191108226776123, "learning_rate": 6.364373047787156e-06, "loss": 1.6491, "step": 1493 }, { "epoch": 0.8283892431383421, "grad_norm": 0.21367955207824707, "learning_rate": 6.324494995034789e-06, "loss": 1.5497, "step": 1494 }, { "epoch": 0.8289437205433878, "grad_norm": 0.20905102789402008, "learning_rate": 6.28473154223649e-06, "loss": 1.5512, "step": 1495 }, { "epoch": 0.8294981979484336, "grad_norm": 0.21540366113185883, "learning_rate": 6.245082824709863e-06, "loss": 1.5571, "step": 1496 }, { "epoch": 0.8300526753534794, "grad_norm": 0.21667438745498657, "learning_rate": 6.205548977382143e-06, "loss": 1.6235, "step": 1497 }, { "epoch": 0.830607152758525, "grad_norm": 0.215713232755661, "learning_rate": 6.166130134789572e-06, "loss": 1.5784, "step": 1498 }, { "epoch": 0.8311616301635708, "grad_norm": 0.20865316689014435, "learning_rate": 6.126826431077071e-06, "loss": 1.5567, "step": 1499 }, { "epoch": 0.8317161075686166, "grad_norm": 0.2152477353811264, "learning_rate": 6.087637999997729e-06, "loss": 1.6069, "step": 1500 }, { "epoch": 0.0005544005544005544, "grad_norm": 0.2160109579563141, "learning_rate": 5.240839353917247e-05, "loss": 1.5558, "step": 1501 }, { "epoch": 0.0011088011088011087, "grad_norm": 0.2080707848072052, "learning_rate": 5.237431426727592e-05, "loss": 1.4763, "step": 1502 }, { "epoch": 0.0016632016632016633, "grad_norm": 0.2181939035654068, "learning_rate": 5.234022505971223e-05, "loss": 1.4663, "step": 1503 }, { "epoch": 0.0022176022176022174, "grad_norm": 0.22565068304538727, "learning_rate": 5.2306125943852505e-05, "loss": 1.5126, "step": 1504 }, { "epoch": 0.002772002772002772, "grad_norm": 0.21895378828048706, "learning_rate": 5.2272016947075854e-05, "loss": 1.4284, "step": 1505 }, { "epoch": 0.0033264033264033266, "grad_norm": 0.2262556552886963, "learning_rate": 5.2237898096769284e-05, "loss": 1.4929, "step": 1506 }, { "epoch": 0.0038808038808038807, "grad_norm": 0.23350562155246735, "learning_rate": 5.2203769420327745e-05, "loss": 1.4788, "step": 1507 }, { "epoch": 0.004435204435204435, "grad_norm": 0.22991852462291718, "learning_rate": 5.2169630945154055e-05, "loss": 1.5161, "step": 1508 }, { "epoch": 0.00498960498960499, "grad_norm": 0.22740978002548218, "learning_rate": 5.213548269865891e-05, "loss": 1.452, "step": 1509 }, { "epoch": 0.005544005544005544, "grad_norm": 0.23259387910366058, "learning_rate": 5.210132470826086e-05, "loss": 1.5036, "step": 1510 }, { "epoch": 0.006098406098406098, "grad_norm": 0.23216551542282104, "learning_rate": 5.206715700138624e-05, "loss": 1.5346, "step": 1511 }, { "epoch": 0.006652806652806653, "grad_norm": 0.23284415900707245, "learning_rate": 5.203297960546923e-05, "loss": 1.5223, "step": 1512 }, { "epoch": 0.007207207207207207, "grad_norm": 0.2359096258878708, "learning_rate": 5.1998792547951786e-05, "loss": 1.4985, "step": 1513 }, { "epoch": 0.0077616077616077615, "grad_norm": 0.23292788863182068, "learning_rate": 5.196459585628358e-05, "loss": 1.4901, "step": 1514 }, { "epoch": 0.008316008316008316, "grad_norm": 0.22852382063865662, "learning_rate": 5.193038955792207e-05, "loss": 1.5416, "step": 1515 }, { "epoch": 0.00887040887040887, "grad_norm": 0.23578786849975586, "learning_rate": 5.18961736803324e-05, "loss": 1.5084, "step": 1516 }, { "epoch": 0.009424809424809425, "grad_norm": 0.22275063395500183, "learning_rate": 5.1861948250987406e-05, "loss": 1.5299, "step": 1517 }, { "epoch": 0.00997920997920998, "grad_norm": 0.25028637051582336, "learning_rate": 5.18277132973676e-05, "loss": 1.5482, "step": 1518 }, { "epoch": 0.010533610533610533, "grad_norm": 0.2336578071117401, "learning_rate": 5.179346884696115e-05, "loss": 1.5558, "step": 1519 }, { "epoch": 0.011088011088011088, "grad_norm": 0.2411772906780243, "learning_rate": 5.1759214927263836e-05, "loss": 1.5077, "step": 1520 }, { "epoch": 0.011642411642411643, "grad_norm": 0.2362968772649765, "learning_rate": 5.172495156577903e-05, "loss": 1.533, "step": 1521 }, { "epoch": 0.012196812196812196, "grad_norm": 0.24614384770393372, "learning_rate": 5.1690678790017705e-05, "loss": 1.5646, "step": 1522 }, { "epoch": 0.012751212751212751, "grad_norm": 0.2757493853569031, "learning_rate": 5.165639662749839e-05, "loss": 1.5946, "step": 1523 }, { "epoch": 0.013305613305613306, "grad_norm": 0.3118709921836853, "learning_rate": 5.162210510574715e-05, "loss": 1.5103, "step": 1524 }, { "epoch": 0.01386001386001386, "grad_norm": 0.29125118255615234, "learning_rate": 5.1587804252297585e-05, "loss": 1.6195, "step": 1525 }, { "epoch": 0.014414414414414415, "grad_norm": 0.3565502166748047, "learning_rate": 5.1553494094690725e-05, "loss": 1.6053, "step": 1526 }, { "epoch": 0.01496881496881497, "grad_norm": 0.28563183546066284, "learning_rate": 5.151917466047514e-05, "loss": 1.5101, "step": 1527 }, { "epoch": 0.015523215523215523, "grad_norm": 0.3112012445926666, "learning_rate": 5.14848459772068e-05, "loss": 1.4989, "step": 1528 }, { "epoch": 0.016077616077616076, "grad_norm": 0.3166447579860687, "learning_rate": 5.145050807244913e-05, "loss": 1.492, "step": 1529 }, { "epoch": 0.016632016632016633, "grad_norm": 0.2751536965370178, "learning_rate": 5.141616097377295e-05, "loss": 1.4733, "step": 1530 }, { "epoch": 0.017186417186417186, "grad_norm": 0.341612845659256, "learning_rate": 5.138180470875646e-05, "loss": 1.5516, "step": 1531 }, { "epoch": 0.01774081774081774, "grad_norm": 0.2327624261379242, "learning_rate": 5.134743930498523e-05, "loss": 1.5544, "step": 1532 }, { "epoch": 0.018295218295218296, "grad_norm": 0.3177069127559662, "learning_rate": 5.131306479005215e-05, "loss": 1.5412, "step": 1533 }, { "epoch": 0.01884961884961885, "grad_norm": 0.2456265091896057, "learning_rate": 5.1278681191557445e-05, "loss": 1.5229, "step": 1534 }, { "epoch": 0.019404019404019403, "grad_norm": 0.25386062264442444, "learning_rate": 5.1244288537108614e-05, "loss": 1.5133, "step": 1535 }, { "epoch": 0.01995841995841996, "grad_norm": 0.31216731667518616, "learning_rate": 5.120988685432045e-05, "loss": 1.561, "step": 1536 }, { "epoch": 0.020512820512820513, "grad_norm": 0.23161141574382782, "learning_rate": 5.117547617081496e-05, "loss": 1.5507, "step": 1537 }, { "epoch": 0.021067221067221066, "grad_norm": 0.29082682728767395, "learning_rate": 5.114105651422142e-05, "loss": 1.5108, "step": 1538 }, { "epoch": 0.021621621621621623, "grad_norm": 0.24282529950141907, "learning_rate": 5.110662791217628e-05, "loss": 1.5547, "step": 1539 }, { "epoch": 0.022176022176022176, "grad_norm": 0.2670525312423706, "learning_rate": 5.10721903923232e-05, "loss": 1.5435, "step": 1540 }, { "epoch": 0.02273042273042273, "grad_norm": 0.27900922298431396, "learning_rate": 5.103774398231296e-05, "loss": 1.5044, "step": 1541 }, { "epoch": 0.023284823284823286, "grad_norm": 0.24645482003688812, "learning_rate": 5.100328870980353e-05, "loss": 1.5417, "step": 1542 }, { "epoch": 0.02383922383922384, "grad_norm": 0.26645559072494507, "learning_rate": 5.096882460245993e-05, "loss": 1.5127, "step": 1543 }, { "epoch": 0.024393624393624393, "grad_norm": 0.23979048430919647, "learning_rate": 5.0934351687954336e-05, "loss": 1.5096, "step": 1544 }, { "epoch": 0.02494802494802495, "grad_norm": 0.23912698030471802, "learning_rate": 5.089986999396597e-05, "loss": 1.5155, "step": 1545 }, { "epoch": 0.025502425502425503, "grad_norm": 0.2803586423397064, "learning_rate": 5.08653795481811e-05, "loss": 1.5175, "step": 1546 }, { "epoch": 0.026056826056826056, "grad_norm": 0.23462045192718506, "learning_rate": 5.0830880378293046e-05, "loss": 1.5086, "step": 1547 }, { "epoch": 0.026611226611226613, "grad_norm": 0.253179669380188, "learning_rate": 5.079637251200208e-05, "loss": 1.5137, "step": 1548 }, { "epoch": 0.027165627165627166, "grad_norm": 0.2448858767747879, "learning_rate": 5.0761855977015496e-05, "loss": 1.56, "step": 1549 }, { "epoch": 0.02772002772002772, "grad_norm": 0.2447686493396759, "learning_rate": 5.0727330801047556e-05, "loss": 1.4992, "step": 1550 }, { "epoch": 0.028274428274428276, "grad_norm": 0.24525319039821625, "learning_rate": 5.069279701181943e-05, "loss": 1.5274, "step": 1551 }, { "epoch": 0.02882882882882883, "grad_norm": 0.22166022658348083, "learning_rate": 5.065825463705923e-05, "loss": 1.4836, "step": 1552 }, { "epoch": 0.029383229383229383, "grad_norm": 0.24004797637462616, "learning_rate": 5.062370370450195e-05, "loss": 1.5382, "step": 1553 }, { "epoch": 0.02993762993762994, "grad_norm": 0.2243812531232834, "learning_rate": 5.0589144241889455e-05, "loss": 1.4457, "step": 1554 }, { "epoch": 0.030492030492030493, "grad_norm": 0.2552390694618225, "learning_rate": 5.055457627697045e-05, "loss": 1.4924, "step": 1555 }, { "epoch": 0.031046431046431046, "grad_norm": 0.22708649933338165, "learning_rate": 5.0519999837500474e-05, "loss": 1.5284, "step": 1556 }, { "epoch": 0.0316008316008316, "grad_norm": 0.2597266435623169, "learning_rate": 5.04854149512419e-05, "loss": 1.6339, "step": 1557 }, { "epoch": 0.03215523215523215, "grad_norm": 0.22946907579898834, "learning_rate": 5.045082164596383e-05, "loss": 1.5116, "step": 1558 }, { "epoch": 0.03270963270963271, "grad_norm": 0.22875460982322693, "learning_rate": 5.041621994944216e-05, "loss": 1.5387, "step": 1559 }, { "epoch": 0.033264033264033266, "grad_norm": 0.24510407447814941, "learning_rate": 5.038160988945951e-05, "loss": 1.5685, "step": 1560 }, { "epoch": 0.033818433818433816, "grad_norm": 0.22690841555595398, "learning_rate": 5.034699149380523e-05, "loss": 1.4985, "step": 1561 }, { "epoch": 0.03437283437283437, "grad_norm": 0.25019338726997375, "learning_rate": 5.031236479027536e-05, "loss": 1.5878, "step": 1562 }, { "epoch": 0.03492723492723493, "grad_norm": 0.24228759109973907, "learning_rate": 5.027772980667259e-05, "loss": 1.506, "step": 1563 }, { "epoch": 0.03548163548163548, "grad_norm": 0.2401711642742157, "learning_rate": 5.024308657080628e-05, "loss": 1.5543, "step": 1564 }, { "epoch": 0.036036036036036036, "grad_norm": 0.2637322247028351, "learning_rate": 5.02084351104924e-05, "loss": 1.5212, "step": 1565 }, { "epoch": 0.03659043659043659, "grad_norm": 0.23215605318546295, "learning_rate": 5.017377545355354e-05, "loss": 1.5183, "step": 1566 }, { "epoch": 0.03714483714483714, "grad_norm": 0.238456591963768, "learning_rate": 5.013910762781887e-05, "loss": 1.5244, "step": 1567 }, { "epoch": 0.0376992376992377, "grad_norm": 0.23898053169250488, "learning_rate": 5.010443166112408e-05, "loss": 1.536, "step": 1568 }, { "epoch": 0.038253638253638256, "grad_norm": 0.2242014855146408, "learning_rate": 5.006974758131148e-05, "loss": 1.5126, "step": 1569 }, { "epoch": 0.038808038808038806, "grad_norm": 0.25203588604927063, "learning_rate": 5.00350554162298e-05, "loss": 1.5606, "step": 1570 }, { "epoch": 0.03936243936243936, "grad_norm": 0.23183748126029968, "learning_rate": 5.000035519373433e-05, "loss": 1.4658, "step": 1571 }, { "epoch": 0.03991683991683992, "grad_norm": 0.23368437588214874, "learning_rate": 4.9965646941686806e-05, "loss": 1.4938, "step": 1572 }, { "epoch": 0.04047124047124047, "grad_norm": 0.23212756216526031, "learning_rate": 4.9930930687955385e-05, "loss": 1.5084, "step": 1573 }, { "epoch": 0.041025641025641026, "grad_norm": 0.22654865682125092, "learning_rate": 4.98962064604147e-05, "loss": 1.5725, "step": 1574 }, { "epoch": 0.04158004158004158, "grad_norm": 0.2407563030719757, "learning_rate": 4.986147428694573e-05, "loss": 1.6177, "step": 1575 }, { "epoch": 0.04213444213444213, "grad_norm": 0.2389269769191742, "learning_rate": 4.982673419543591e-05, "loss": 1.5341, "step": 1576 }, { "epoch": 0.04268884268884269, "grad_norm": 0.22539055347442627, "learning_rate": 4.979198621377894e-05, "loss": 1.4943, "step": 1577 }, { "epoch": 0.043243243243243246, "grad_norm": 0.24309422075748444, "learning_rate": 4.975723036987492e-05, "loss": 1.4989, "step": 1578 }, { "epoch": 0.043797643797643795, "grad_norm": 0.22778275609016418, "learning_rate": 4.972246669163026e-05, "loss": 1.572, "step": 1579 }, { "epoch": 0.04435204435204435, "grad_norm": 0.24670810997486115, "learning_rate": 4.9687695206957615e-05, "loss": 1.502, "step": 1580 }, { "epoch": 0.04490644490644491, "grad_norm": 0.23164881765842438, "learning_rate": 4.965291594377596e-05, "loss": 1.511, "step": 1581 }, { "epoch": 0.04546084546084546, "grad_norm": 0.2356569617986679, "learning_rate": 4.96181289300105e-05, "loss": 1.4987, "step": 1582 }, { "epoch": 0.046015246015246015, "grad_norm": 0.24136026203632355, "learning_rate": 4.958333419359262e-05, "loss": 1.5419, "step": 1583 }, { "epoch": 0.04656964656964657, "grad_norm": 0.23624971508979797, "learning_rate": 4.954853176245999e-05, "loss": 1.5114, "step": 1584 }, { "epoch": 0.04712404712404712, "grad_norm": 0.24542221426963806, "learning_rate": 4.9513721664556366e-05, "loss": 1.5203, "step": 1585 }, { "epoch": 0.04767844767844768, "grad_norm": 0.23352719843387604, "learning_rate": 4.9478903927831745e-05, "loss": 1.4603, "step": 1586 }, { "epoch": 0.048232848232848236, "grad_norm": 0.2294367402791977, "learning_rate": 4.944407858024219e-05, "loss": 1.5754, "step": 1587 }, { "epoch": 0.048787248787248785, "grad_norm": 0.22647875547409058, "learning_rate": 4.9409245649749905e-05, "loss": 1.5211, "step": 1588 }, { "epoch": 0.04934164934164934, "grad_norm": 0.2577221095561981, "learning_rate": 4.937440516432319e-05, "loss": 1.6394, "step": 1589 }, { "epoch": 0.0498960498960499, "grad_norm": 0.23961055278778076, "learning_rate": 4.933955715193639e-05, "loss": 1.5358, "step": 1590 }, { "epoch": 0.05045045045045045, "grad_norm": 0.23955407738685608, "learning_rate": 4.930470164056991e-05, "loss": 1.5162, "step": 1591 }, { "epoch": 0.051004851004851005, "grad_norm": 0.23558299243450165, "learning_rate": 4.926983865821017e-05, "loss": 1.5878, "step": 1592 }, { "epoch": 0.05155925155925156, "grad_norm": 0.22514784336090088, "learning_rate": 4.923496823284959e-05, "loss": 1.4693, "step": 1593 }, { "epoch": 0.05211365211365211, "grad_norm": 0.22910267114639282, "learning_rate": 4.920009039248656e-05, "loss": 1.5006, "step": 1594 }, { "epoch": 0.05266805266805267, "grad_norm": 0.22974339127540588, "learning_rate": 4.9165205165125436e-05, "loss": 1.4919, "step": 1595 }, { "epoch": 0.053222453222453225, "grad_norm": 0.23269398510456085, "learning_rate": 4.913031257877651e-05, "loss": 1.6078, "step": 1596 }, { "epoch": 0.053776853776853775, "grad_norm": 0.2222321331501007, "learning_rate": 4.9095412661455933e-05, "loss": 1.4849, "step": 1597 }, { "epoch": 0.05433125433125433, "grad_norm": 0.2347315102815628, "learning_rate": 4.9060505441185794e-05, "loss": 1.5575, "step": 1598 }, { "epoch": 0.05488565488565489, "grad_norm": 0.23612529039382935, "learning_rate": 4.9025590945994056e-05, "loss": 1.5393, "step": 1599 }, { "epoch": 0.05544005544005544, "grad_norm": 0.23861654102802277, "learning_rate": 4.8990669203914475e-05, "loss": 1.5072, "step": 1600 }, { "epoch": 0.055994455994455995, "grad_norm": 0.24118700623512268, "learning_rate": 4.895574024298667e-05, "loss": 1.5093, "step": 1601 }, { "epoch": 0.05654885654885655, "grad_norm": 0.2555467486381531, "learning_rate": 4.892080409125601e-05, "loss": 1.4938, "step": 1602 }, { "epoch": 0.0571032571032571, "grad_norm": 0.22886864840984344, "learning_rate": 4.8885860776773675e-05, "loss": 1.5223, "step": 1603 }, { "epoch": 0.05765765765765766, "grad_norm": 0.24102212488651276, "learning_rate": 4.88509103275966e-05, "loss": 1.4797, "step": 1604 }, { "epoch": 0.058212058212058215, "grad_norm": 0.2412523478269577, "learning_rate": 4.8815952771787396e-05, "loss": 1.5216, "step": 1605 }, { "epoch": 0.058766458766458765, "grad_norm": 0.24153347313404083, "learning_rate": 4.878098813741446e-05, "loss": 1.5564, "step": 1606 }, { "epoch": 0.05932085932085932, "grad_norm": 0.2564477324485779, "learning_rate": 4.874601645255181e-05, "loss": 1.5507, "step": 1607 }, { "epoch": 0.05987525987525988, "grad_norm": 0.2371818721294403, "learning_rate": 4.871103774527914e-05, "loss": 1.5564, "step": 1608 }, { "epoch": 0.06042966042966043, "grad_norm": 0.2681926488876343, "learning_rate": 4.8676052043681796e-05, "loss": 1.5429, "step": 1609 }, { "epoch": 0.060984060984060985, "grad_norm": 0.23062719404697418, "learning_rate": 4.864105937585072e-05, "loss": 1.5252, "step": 1610 }, { "epoch": 0.06153846153846154, "grad_norm": 0.24026815593242645, "learning_rate": 4.860605976988249e-05, "loss": 1.5088, "step": 1611 }, { "epoch": 0.06209286209286209, "grad_norm": 0.24074113368988037, "learning_rate": 4.8571053253879184e-05, "loss": 1.5441, "step": 1612 }, { "epoch": 0.06264726264726264, "grad_norm": 0.23633922636508942, "learning_rate": 4.8536039855948495e-05, "loss": 1.5376, "step": 1613 }, { "epoch": 0.0632016632016632, "grad_norm": 0.246513232588768, "learning_rate": 4.85010196042036e-05, "loss": 1.4954, "step": 1614 }, { "epoch": 0.06375606375606375, "grad_norm": 0.25328004360198975, "learning_rate": 4.8465992526763194e-05, "loss": 1.4648, "step": 1615 }, { "epoch": 0.0643104643104643, "grad_norm": 0.23787277936935425, "learning_rate": 4.843095865175147e-05, "loss": 1.4624, "step": 1616 }, { "epoch": 0.06486486486486487, "grad_norm": 0.28395015001296997, "learning_rate": 4.839591800729804e-05, "loss": 1.5397, "step": 1617 }, { "epoch": 0.06541926541926542, "grad_norm": 0.22746042907238007, "learning_rate": 4.836087062153799e-05, "loss": 1.4719, "step": 1618 }, { "epoch": 0.06597366597366597, "grad_norm": 0.30903488397598267, "learning_rate": 4.832581652261178e-05, "loss": 1.6077, "step": 1619 }, { "epoch": 0.06652806652806653, "grad_norm": 0.24124033749103546, "learning_rate": 4.8290755738665306e-05, "loss": 1.5053, "step": 1620 }, { "epoch": 0.06708246708246708, "grad_norm": 0.27052849531173706, "learning_rate": 4.825568829784978e-05, "loss": 1.5197, "step": 1621 }, { "epoch": 0.06763686763686763, "grad_norm": 0.27808162569999695, "learning_rate": 4.82206142283218e-05, "loss": 1.5196, "step": 1622 }, { "epoch": 0.0681912681912682, "grad_norm": 0.22812043130397797, "learning_rate": 4.818553355824329e-05, "loss": 1.516, "step": 1623 }, { "epoch": 0.06874566874566874, "grad_norm": 0.2735099196434021, "learning_rate": 4.8150446315781436e-05, "loss": 1.5255, "step": 1624 }, { "epoch": 0.0693000693000693, "grad_norm": 0.24241039156913757, "learning_rate": 4.811535252910871e-05, "loss": 1.5783, "step": 1625 }, { "epoch": 0.06985446985446986, "grad_norm": 0.24059553444385529, "learning_rate": 4.8080252226402884e-05, "loss": 1.5004, "step": 1626 }, { "epoch": 0.07040887040887041, "grad_norm": 0.2772434949874878, "learning_rate": 4.8045145435846896e-05, "loss": 1.533, "step": 1627 }, { "epoch": 0.07096327096327096, "grad_norm": 0.24444307386875153, "learning_rate": 4.801003218562895e-05, "loss": 1.5005, "step": 1628 }, { "epoch": 0.07151767151767152, "grad_norm": 0.3272010087966919, "learning_rate": 4.797491250394238e-05, "loss": 1.5825, "step": 1629 }, { "epoch": 0.07207207207207207, "grad_norm": 0.25123974680900574, "learning_rate": 4.793978641898575e-05, "loss": 1.5807, "step": 1630 }, { "epoch": 0.07262647262647262, "grad_norm": 0.2576984167098999, "learning_rate": 4.790465395896271e-05, "loss": 1.5032, "step": 1631 }, { "epoch": 0.07318087318087318, "grad_norm": 0.293028861284256, "learning_rate": 4.7869515152082056e-05, "loss": 1.5517, "step": 1632 }, { "epoch": 0.07373527373527373, "grad_norm": 0.23065245151519775, "learning_rate": 4.783437002655768e-05, "loss": 1.5369, "step": 1633 }, { "epoch": 0.07428967428967428, "grad_norm": 0.3111153542995453, "learning_rate": 4.779921861060853e-05, "loss": 1.4929, "step": 1634 }, { "epoch": 0.07484407484407485, "grad_norm": 0.2615680396556854, "learning_rate": 4.7764060932458615e-05, "loss": 1.5728, "step": 1635 }, { "epoch": 0.0753984753984754, "grad_norm": 0.2610320448875427, "learning_rate": 4.7728897020336985e-05, "loss": 1.5559, "step": 1636 }, { "epoch": 0.07595287595287595, "grad_norm": 0.27500590682029724, "learning_rate": 4.7693726902477676e-05, "loss": 1.5183, "step": 1637 }, { "epoch": 0.07650727650727651, "grad_norm": 0.24821346998214722, "learning_rate": 4.765855060711972e-05, "loss": 1.5283, "step": 1638 }, { "epoch": 0.07706167706167706, "grad_norm": 0.2438855618238449, "learning_rate": 4.762336816250709e-05, "loss": 1.4856, "step": 1639 }, { "epoch": 0.07761607761607761, "grad_norm": 0.2547777593135834, "learning_rate": 4.758817959688872e-05, "loss": 1.5003, "step": 1640 }, { "epoch": 0.07817047817047817, "grad_norm": 0.24655909836292267, "learning_rate": 4.755298493851845e-05, "loss": 1.4848, "step": 1641 }, { "epoch": 0.07872487872487872, "grad_norm": 0.24383637309074402, "learning_rate": 4.751778421565501e-05, "loss": 1.494, "step": 1642 }, { "epoch": 0.07927927927927927, "grad_norm": 0.23917736113071442, "learning_rate": 4.748257745656199e-05, "loss": 1.4843, "step": 1643 }, { "epoch": 0.07983367983367984, "grad_norm": 0.24065594375133514, "learning_rate": 4.744736468950784e-05, "loss": 1.5374, "step": 1644 }, { "epoch": 0.08038808038808039, "grad_norm": 0.22580263018608093, "learning_rate": 4.741214594276585e-05, "loss": 1.4158, "step": 1645 }, { "epoch": 0.08094248094248094, "grad_norm": 0.24750377237796783, "learning_rate": 4.737692124461406e-05, "loss": 1.5305, "step": 1646 }, { "epoch": 0.0814968814968815, "grad_norm": 0.226656973361969, "learning_rate": 4.734169062333534e-05, "loss": 1.4258, "step": 1647 }, { "epoch": 0.08205128205128205, "grad_norm": 0.23321537673473358, "learning_rate": 4.73064541072173e-05, "loss": 1.4666, "step": 1648 }, { "epoch": 0.0826056826056826, "grad_norm": 0.27421921491622925, "learning_rate": 4.727121172455226e-05, "loss": 1.5928, "step": 1649 }, { "epoch": 0.08316008316008316, "grad_norm": 0.2396385222673416, "learning_rate": 4.723596350363727e-05, "loss": 1.5201, "step": 1650 }, { "epoch": 0.08371448371448371, "grad_norm": 0.2699028551578522, "learning_rate": 4.7200709472774104e-05, "loss": 1.5241, "step": 1651 }, { "epoch": 0.08426888426888426, "grad_norm": 0.2384534776210785, "learning_rate": 4.716544966026911e-05, "loss": 1.5247, "step": 1652 }, { "epoch": 0.08482328482328483, "grad_norm": 0.2358793318271637, "learning_rate": 4.713018409443338e-05, "loss": 1.4825, "step": 1653 }, { "epoch": 0.08537768537768538, "grad_norm": 0.2665063142776489, "learning_rate": 4.709491280358255e-05, "loss": 1.5231, "step": 1654 }, { "epoch": 0.08593208593208593, "grad_norm": 0.23082147538661957, "learning_rate": 4.70596358160369e-05, "loss": 1.5718, "step": 1655 }, { "epoch": 0.08648648648648649, "grad_norm": 0.26113733649253845, "learning_rate": 4.7024353160121246e-05, "loss": 1.5361, "step": 1656 }, { "epoch": 0.08704088704088704, "grad_norm": 0.24743878841400146, "learning_rate": 4.698906486416498e-05, "loss": 1.5258, "step": 1657 }, { "epoch": 0.08759528759528759, "grad_norm": 0.22798630595207214, "learning_rate": 4.695377095650202e-05, "loss": 1.489, "step": 1658 }, { "epoch": 0.08814968814968815, "grad_norm": 0.26702702045440674, "learning_rate": 4.691847146547077e-05, "loss": 1.5345, "step": 1659 }, { "epoch": 0.0887040887040887, "grad_norm": 0.2464192807674408, "learning_rate": 4.688316641941417e-05, "loss": 1.5123, "step": 1660 }, { "epoch": 0.08925848925848925, "grad_norm": 0.24607868492603302, "learning_rate": 4.684785584667955e-05, "loss": 1.6203, "step": 1661 }, { "epoch": 0.08981288981288982, "grad_norm": 0.2512187361717224, "learning_rate": 4.6812539775618725e-05, "loss": 1.5422, "step": 1662 }, { "epoch": 0.09036729036729037, "grad_norm": 0.23812708258628845, "learning_rate": 4.6777218234587915e-05, "loss": 1.5922, "step": 1663 }, { "epoch": 0.09092169092169092, "grad_norm": 0.23268821835517883, "learning_rate": 4.67418912519477e-05, "loss": 1.5076, "step": 1664 }, { "epoch": 0.09147609147609148, "grad_norm": 0.24235466122627258, "learning_rate": 4.6706558856063114e-05, "loss": 1.6064, "step": 1665 }, { "epoch": 0.09203049203049203, "grad_norm": 0.22621028125286102, "learning_rate": 4.6671221075303416e-05, "loss": 1.4482, "step": 1666 }, { "epoch": 0.09258489258489258, "grad_norm": 0.24217869341373444, "learning_rate": 4.6635877938042296e-05, "loss": 1.5858, "step": 1667 }, { "epoch": 0.09313929313929314, "grad_norm": 0.23775462806224823, "learning_rate": 4.660052947265768e-05, "loss": 1.5253, "step": 1668 }, { "epoch": 0.0936936936936937, "grad_norm": 0.23735234141349792, "learning_rate": 4.65651757075318e-05, "loss": 1.4924, "step": 1669 }, { "epoch": 0.09424809424809424, "grad_norm": 0.25632667541503906, "learning_rate": 4.652981667105115e-05, "loss": 1.5294, "step": 1670 }, { "epoch": 0.09480249480249481, "grad_norm": 0.2412187159061432, "learning_rate": 4.6494452391606434e-05, "loss": 1.502, "step": 1671 }, { "epoch": 0.09535689535689536, "grad_norm": 0.2430036962032318, "learning_rate": 4.6459082897592575e-05, "loss": 1.5277, "step": 1672 }, { "epoch": 0.09591129591129591, "grad_norm": 0.25209280848503113, "learning_rate": 4.642370821740868e-05, "loss": 1.5468, "step": 1673 }, { "epoch": 0.09646569646569647, "grad_norm": 0.24189290404319763, "learning_rate": 4.638832837945803e-05, "loss": 1.4973, "step": 1674 }, { "epoch": 0.09702009702009702, "grad_norm": 0.247356578707695, "learning_rate": 4.6352943412148044e-05, "loss": 1.5341, "step": 1675 }, { "epoch": 0.09757449757449757, "grad_norm": 0.23435866832733154, "learning_rate": 4.6317553343890244e-05, "loss": 1.5131, "step": 1676 }, { "epoch": 0.09812889812889813, "grad_norm": 0.23501558601856232, "learning_rate": 4.628215820310028e-05, "loss": 1.5537, "step": 1677 }, { "epoch": 0.09868329868329868, "grad_norm": 0.24884603917598724, "learning_rate": 4.624675801819783e-05, "loss": 1.572, "step": 1678 }, { "epoch": 0.09923769923769923, "grad_norm": 0.2383822798728943, "learning_rate": 4.6211352817606675e-05, "loss": 1.4968, "step": 1679 }, { "epoch": 0.0997920997920998, "grad_norm": 0.23201164603233337, "learning_rate": 4.617594262975457e-05, "loss": 1.4662, "step": 1680 }, { "epoch": 0.10034650034650035, "grad_norm": 0.23198291659355164, "learning_rate": 4.614052748307331e-05, "loss": 1.4821, "step": 1681 }, { "epoch": 0.1009009009009009, "grad_norm": 0.2490098774433136, "learning_rate": 4.6105107405998666e-05, "loss": 1.5801, "step": 1682 }, { "epoch": 0.10145530145530146, "grad_norm": 0.2421119511127472, "learning_rate": 4.6069682426970363e-05, "loss": 1.5037, "step": 1683 }, { "epoch": 0.10200970200970201, "grad_norm": 0.23865409195423126, "learning_rate": 4.6034252574432045e-05, "loss": 1.5474, "step": 1684 }, { "epoch": 0.10256410256410256, "grad_norm": 0.2445671111345291, "learning_rate": 4.5998817876831326e-05, "loss": 1.5644, "step": 1685 }, { "epoch": 0.10311850311850312, "grad_norm": 0.25363487005233765, "learning_rate": 4.596337836261962e-05, "loss": 1.5046, "step": 1686 }, { "epoch": 0.10367290367290367, "grad_norm": 0.23598498106002808, "learning_rate": 4.592793406025229e-05, "loss": 1.4994, "step": 1687 }, { "epoch": 0.10422730422730422, "grad_norm": 0.2493610829114914, "learning_rate": 4.589248499818851e-05, "loss": 1.4859, "step": 1688 }, { "epoch": 0.10478170478170479, "grad_norm": 0.23938624560832977, "learning_rate": 4.585703120489126e-05, "loss": 1.5674, "step": 1689 }, { "epoch": 0.10533610533610534, "grad_norm": 0.26452407240867615, "learning_rate": 4.582157270882736e-05, "loss": 1.5616, "step": 1690 }, { "epoch": 0.10589050589050589, "grad_norm": 0.24048514664173126, "learning_rate": 4.5786109538467366e-05, "loss": 1.5294, "step": 1691 }, { "epoch": 0.10644490644490645, "grad_norm": 0.2471228837966919, "learning_rate": 4.5750641722285615e-05, "loss": 1.5334, "step": 1692 }, { "epoch": 0.106999306999307, "grad_norm": 0.24606989324092865, "learning_rate": 4.571516928876015e-05, "loss": 1.4702, "step": 1693 }, { "epoch": 0.10755370755370755, "grad_norm": 0.2339276373386383, "learning_rate": 4.567969226637275e-05, "loss": 1.525, "step": 1694 }, { "epoch": 0.10810810810810811, "grad_norm": 0.23296022415161133, "learning_rate": 4.564421068360886e-05, "loss": 1.4668, "step": 1695 }, { "epoch": 0.10866250866250866, "grad_norm": 0.24360519647598267, "learning_rate": 4.560872456895759e-05, "loss": 1.4837, "step": 1696 }, { "epoch": 0.10921690921690921, "grad_norm": 0.24057696759700775, "learning_rate": 4.5573233950911675e-05, "loss": 1.5589, "step": 1697 }, { "epoch": 0.10977130977130978, "grad_norm": 0.24787548184394836, "learning_rate": 4.553773885796748e-05, "loss": 1.4862, "step": 1698 }, { "epoch": 0.11032571032571033, "grad_norm": 0.24439023435115814, "learning_rate": 4.5502239318624975e-05, "loss": 1.551, "step": 1699 }, { "epoch": 0.11088011088011088, "grad_norm": 0.23626047372817993, "learning_rate": 4.5466735361387697e-05, "loss": 1.5673, "step": 1700 }, { "epoch": 0.11143451143451144, "grad_norm": 0.2404555380344391, "learning_rate": 4.54312270147627e-05, "loss": 1.4432, "step": 1701 }, { "epoch": 0.11198891198891199, "grad_norm": 0.2425653636455536, "learning_rate": 4.539571430726059e-05, "loss": 1.527, "step": 1702 }, { "epoch": 0.11254331254331254, "grad_norm": 0.24132198095321655, "learning_rate": 4.5360197267395465e-05, "loss": 1.5271, "step": 1703 }, { "epoch": 0.1130977130977131, "grad_norm": 0.25043484568595886, "learning_rate": 4.532467592368491e-05, "loss": 1.4719, "step": 1704 }, { "epoch": 0.11365211365211365, "grad_norm": 0.24764753878116608, "learning_rate": 4.528915030464995e-05, "loss": 1.504, "step": 1705 }, { "epoch": 0.1142065142065142, "grad_norm": 0.23885896801948547, "learning_rate": 4.5253620438815066e-05, "loss": 1.557, "step": 1706 }, { "epoch": 0.11476091476091477, "grad_norm": 0.2617689371109009, "learning_rate": 4.521808635470813e-05, "loss": 1.6442, "step": 1707 }, { "epoch": 0.11531531531531532, "grad_norm": 0.2249898612499237, "learning_rate": 4.51825480808604e-05, "loss": 1.4618, "step": 1708 }, { "epoch": 0.11586971586971587, "grad_norm": 0.24235212802886963, "learning_rate": 4.5147005645806515e-05, "loss": 1.5605, "step": 1709 }, { "epoch": 0.11642411642411643, "grad_norm": 0.24356012046337128, "learning_rate": 4.511145907808444e-05, "loss": 1.4986, "step": 1710 }, { "epoch": 0.11697851697851698, "grad_norm": 0.23570431768894196, "learning_rate": 4.507590840623546e-05, "loss": 1.494, "step": 1711 }, { "epoch": 0.11753291753291753, "grad_norm": 0.2461371123790741, "learning_rate": 4.5040353658804155e-05, "loss": 1.5085, "step": 1712 }, { "epoch": 0.1180873180873181, "grad_norm": 0.2513980269432068, "learning_rate": 4.500479486433839e-05, "loss": 1.5236, "step": 1713 }, { "epoch": 0.11864171864171864, "grad_norm": 0.25200584530830383, "learning_rate": 4.4969232051389276e-05, "loss": 1.5267, "step": 1714 }, { "epoch": 0.1191961191961192, "grad_norm": 0.2579837441444397, "learning_rate": 4.493366524851112e-05, "loss": 1.4894, "step": 1715 }, { "epoch": 0.11975051975051976, "grad_norm": 0.23613086342811584, "learning_rate": 4.4898094484261475e-05, "loss": 1.4962, "step": 1716 }, { "epoch": 0.12030492030492031, "grad_norm": 0.23936963081359863, "learning_rate": 4.4862519787201066e-05, "loss": 1.5206, "step": 1717 }, { "epoch": 0.12085932085932086, "grad_norm": 0.2370792180299759, "learning_rate": 4.4826941185893735e-05, "loss": 1.4904, "step": 1718 }, { "epoch": 0.12141372141372142, "grad_norm": 0.22792117297649384, "learning_rate": 4.479135870890652e-05, "loss": 1.5012, "step": 1719 }, { "epoch": 0.12196812196812197, "grad_norm": 0.2386564016342163, "learning_rate": 4.475577238480952e-05, "loss": 1.4689, "step": 1720 }, { "epoch": 0.12252252252252252, "grad_norm": 0.2350076138973236, "learning_rate": 4.472018224217596e-05, "loss": 1.5176, "step": 1721 }, { "epoch": 0.12307692307692308, "grad_norm": 0.23799531161785126, "learning_rate": 4.468458830958213e-05, "loss": 1.4751, "step": 1722 }, { "epoch": 0.12363132363132363, "grad_norm": 0.24065637588500977, "learning_rate": 4.464899061560731e-05, "loss": 1.5211, "step": 1723 }, { "epoch": 0.12418572418572418, "grad_norm": 0.24629543721675873, "learning_rate": 4.461338918883387e-05, "loss": 1.5812, "step": 1724 }, { "epoch": 0.12474012474012475, "grad_norm": 0.24027995765209198, "learning_rate": 4.457778405784712e-05, "loss": 1.5109, "step": 1725 }, { "epoch": 0.12529452529452528, "grad_norm": 0.23162126541137695, "learning_rate": 4.4542175251235387e-05, "loss": 1.4878, "step": 1726 }, { "epoch": 0.12584892584892585, "grad_norm": 0.23792633414268494, "learning_rate": 4.4506562797589934e-05, "loss": 1.5453, "step": 1727 }, { "epoch": 0.1264033264033264, "grad_norm": 0.23414942622184753, "learning_rate": 4.4470946725504936e-05, "loss": 1.5394, "step": 1728 }, { "epoch": 0.12695772695772695, "grad_norm": 0.2346811443567276, "learning_rate": 4.4435327063577505e-05, "loss": 1.5706, "step": 1729 }, { "epoch": 0.1275121275121275, "grad_norm": 0.23633529245853424, "learning_rate": 4.43997038404076e-05, "loss": 1.5063, "step": 1730 }, { "epoch": 0.12806652806652807, "grad_norm": 0.24427150189876556, "learning_rate": 4.4364077084598074e-05, "loss": 1.5013, "step": 1731 }, { "epoch": 0.1286209286209286, "grad_norm": 0.24648012220859528, "learning_rate": 4.43284468247546e-05, "loss": 1.5591, "step": 1732 }, { "epoch": 0.12917532917532917, "grad_norm": 0.2427440583705902, "learning_rate": 4.429281308948564e-05, "loss": 1.4942, "step": 1733 }, { "epoch": 0.12972972972972974, "grad_norm": 0.2318679690361023, "learning_rate": 4.4257175907402515e-05, "loss": 1.4916, "step": 1734 }, { "epoch": 0.13028413028413027, "grad_norm": 0.2468242049217224, "learning_rate": 4.422153530711921e-05, "loss": 1.5722, "step": 1735 }, { "epoch": 0.13083853083853084, "grad_norm": 0.23176340758800507, "learning_rate": 4.418589131725257e-05, "loss": 1.543, "step": 1736 }, { "epoch": 0.1313929313929314, "grad_norm": 0.23824675381183624, "learning_rate": 4.415024396642207e-05, "loss": 1.5199, "step": 1737 }, { "epoch": 0.13194733194733194, "grad_norm": 0.2343084067106247, "learning_rate": 4.411459328324994e-05, "loss": 1.5434, "step": 1738 }, { "epoch": 0.1325017325017325, "grad_norm": 0.24300968647003174, "learning_rate": 4.407893929636107e-05, "loss": 1.5244, "step": 1739 }, { "epoch": 0.13305613305613306, "grad_norm": 0.23985837399959564, "learning_rate": 4.404328203438296e-05, "loss": 1.5392, "step": 1740 }, { "epoch": 0.1336105336105336, "grad_norm": 0.24163936078548431, "learning_rate": 4.400762152594581e-05, "loss": 1.519, "step": 1741 }, { "epoch": 0.13416493416493416, "grad_norm": 0.24336585402488708, "learning_rate": 4.39719577996824e-05, "loss": 1.5373, "step": 1742 }, { "epoch": 0.13471933471933473, "grad_norm": 0.2352674901485443, "learning_rate": 4.393629088422804e-05, "loss": 1.5558, "step": 1743 }, { "epoch": 0.13527373527373526, "grad_norm": 0.233093723654747, "learning_rate": 4.390062080822072e-05, "loss": 1.4829, "step": 1744 }, { "epoch": 0.13582813582813583, "grad_norm": 0.23982946574687958, "learning_rate": 4.386494760030084e-05, "loss": 1.4693, "step": 1745 }, { "epoch": 0.1363825363825364, "grad_norm": 0.2503097653388977, "learning_rate": 4.3829271289111384e-05, "loss": 1.5743, "step": 1746 }, { "epoch": 0.13693693693693693, "grad_norm": 0.2348773330450058, "learning_rate": 4.3793591903297825e-05, "loss": 1.4904, "step": 1747 }, { "epoch": 0.1374913374913375, "grad_norm": 0.23469799757003784, "learning_rate": 4.375790947150808e-05, "loss": 1.5358, "step": 1748 }, { "epoch": 0.13804573804573805, "grad_norm": 0.24238461256027222, "learning_rate": 4.3722224022392555e-05, "loss": 1.5216, "step": 1749 }, { "epoch": 0.1386001386001386, "grad_norm": 0.2337646335363388, "learning_rate": 4.3686535584604e-05, "loss": 1.5154, "step": 1750 }, { "epoch": 0.13915453915453915, "grad_norm": 0.23583389818668365, "learning_rate": 4.3650844186797666e-05, "loss": 1.5066, "step": 1751 }, { "epoch": 0.13970893970893972, "grad_norm": 0.24508389830589294, "learning_rate": 4.361514985763109e-05, "loss": 1.4856, "step": 1752 }, { "epoch": 0.14026334026334025, "grad_norm": 0.2271859496831894, "learning_rate": 4.357945262576421e-05, "loss": 1.4521, "step": 1753 }, { "epoch": 0.14081774081774082, "grad_norm": 0.23057958483695984, "learning_rate": 4.3543752519859306e-05, "loss": 1.4738, "step": 1754 }, { "epoch": 0.14137214137214138, "grad_norm": 0.23309603333473206, "learning_rate": 4.350804956858093e-05, "loss": 1.4947, "step": 1755 }, { "epoch": 0.14192654192654192, "grad_norm": 0.23328322172164917, "learning_rate": 4.3472343800595936e-05, "loss": 1.4989, "step": 1756 }, { "epoch": 0.14248094248094248, "grad_norm": 0.24792000651359558, "learning_rate": 4.343663524457344e-05, "loss": 1.5045, "step": 1757 }, { "epoch": 0.14303534303534304, "grad_norm": 0.24662478268146515, "learning_rate": 4.340092392918478e-05, "loss": 1.5543, "step": 1758 }, { "epoch": 0.14358974358974358, "grad_norm": 0.25486528873443604, "learning_rate": 4.336520988310355e-05, "loss": 1.574, "step": 1759 }, { "epoch": 0.14414414414414414, "grad_norm": 0.22766557335853577, "learning_rate": 4.3329493135005485e-05, "loss": 1.4893, "step": 1760 }, { "epoch": 0.1446985446985447, "grad_norm": 0.23879112303256989, "learning_rate": 4.329377371356855e-05, "loss": 1.5346, "step": 1761 }, { "epoch": 0.14525294525294524, "grad_norm": 0.24858912825584412, "learning_rate": 4.325805164747278e-05, "loss": 1.5427, "step": 1762 }, { "epoch": 0.1458073458073458, "grad_norm": 0.2679620385169983, "learning_rate": 4.32223269654004e-05, "loss": 1.5459, "step": 1763 }, { "epoch": 0.14636174636174637, "grad_norm": 0.23490265011787415, "learning_rate": 4.318659969603571e-05, "loss": 1.5441, "step": 1764 }, { "epoch": 0.1469161469161469, "grad_norm": 0.23390935361385345, "learning_rate": 4.315086986806509e-05, "loss": 1.5425, "step": 1765 }, { "epoch": 0.14747054747054747, "grad_norm": 0.2335522174835205, "learning_rate": 4.3115137510176954e-05, "loss": 1.5266, "step": 1766 }, { "epoch": 0.14802494802494803, "grad_norm": 0.24284155666828156, "learning_rate": 4.307940265106178e-05, "loss": 1.5315, "step": 1767 }, { "epoch": 0.14857934857934857, "grad_norm": 0.23893727362155914, "learning_rate": 4.3043665319412045e-05, "loss": 1.4494, "step": 1768 }, { "epoch": 0.14913374913374913, "grad_norm": 0.23251236975193024, "learning_rate": 4.3007925543922194e-05, "loss": 1.549, "step": 1769 }, { "epoch": 0.1496881496881497, "grad_norm": 0.2353825718164444, "learning_rate": 4.2972183353288635e-05, "loss": 1.5089, "step": 1770 }, { "epoch": 0.15024255024255023, "grad_norm": 0.23454025387763977, "learning_rate": 4.293643877620977e-05, "loss": 1.502, "step": 1771 }, { "epoch": 0.1507969507969508, "grad_norm": 0.2456715703010559, "learning_rate": 4.2900691841385836e-05, "loss": 1.5064, "step": 1772 }, { "epoch": 0.15135135135135136, "grad_norm": 0.24128831923007965, "learning_rate": 4.2864942577519e-05, "loss": 1.5586, "step": 1773 }, { "epoch": 0.1519057519057519, "grad_norm": 0.23839378356933594, "learning_rate": 4.282919101331333e-05, "loss": 1.4957, "step": 1774 }, { "epoch": 0.15246015246015246, "grad_norm": 0.26576605439186096, "learning_rate": 4.279343717747469e-05, "loss": 1.5002, "step": 1775 }, { "epoch": 0.15301455301455302, "grad_norm": 0.2448890507221222, "learning_rate": 4.275768109871079e-05, "loss": 1.5439, "step": 1776 }, { "epoch": 0.15356895356895356, "grad_norm": 0.25102654099464417, "learning_rate": 4.272192280573114e-05, "loss": 1.6086, "step": 1777 }, { "epoch": 0.15412335412335412, "grad_norm": 0.2524796426296234, "learning_rate": 4.268616232724703e-05, "loss": 1.5564, "step": 1778 }, { "epoch": 0.1546777546777547, "grad_norm": 0.23300211131572723, "learning_rate": 4.265039969197151e-05, "loss": 1.4663, "step": 1779 }, { "epoch": 0.15523215523215522, "grad_norm": 0.23859506845474243, "learning_rate": 4.261463492861934e-05, "loss": 1.5091, "step": 1780 }, { "epoch": 0.15578655578655579, "grad_norm": 0.23729431629180908, "learning_rate": 4.2578868065906996e-05, "loss": 1.5135, "step": 1781 }, { "epoch": 0.15634095634095635, "grad_norm": 0.23727060854434967, "learning_rate": 4.2543099132552674e-05, "loss": 1.5014, "step": 1782 }, { "epoch": 0.15689535689535689, "grad_norm": 0.23521551489830017, "learning_rate": 4.250732815727619e-05, "loss": 1.4986, "step": 1783 }, { "epoch": 0.15744975744975745, "grad_norm": 0.24837486445903778, "learning_rate": 4.247155516879902e-05, "loss": 1.5406, "step": 1784 }, { "epoch": 0.158004158004158, "grad_norm": 0.2510465383529663, "learning_rate": 4.243578019584424e-05, "loss": 1.5522, "step": 1785 }, { "epoch": 0.15855855855855855, "grad_norm": 0.24216195940971375, "learning_rate": 4.240000326713654e-05, "loss": 1.5457, "step": 1786 }, { "epoch": 0.1591129591129591, "grad_norm": 0.24078871309757233, "learning_rate": 4.236422441140217e-05, "loss": 1.4664, "step": 1787 }, { "epoch": 0.15966735966735968, "grad_norm": 0.2495059072971344, "learning_rate": 4.232844365736892e-05, "loss": 1.5306, "step": 1788 }, { "epoch": 0.1602217602217602, "grad_norm": 0.23124068975448608, "learning_rate": 4.229266103376612e-05, "loss": 1.533, "step": 1789 }, { "epoch": 0.16077616077616078, "grad_norm": 0.25814753770828247, "learning_rate": 4.2256876569324596e-05, "loss": 1.5456, "step": 1790 }, { "epoch": 0.16133056133056134, "grad_norm": 0.2408868670463562, "learning_rate": 4.222109029277665e-05, "loss": 1.5401, "step": 1791 }, { "epoch": 0.16188496188496188, "grad_norm": 0.2344392091035843, "learning_rate": 4.218530223285602e-05, "loss": 1.49, "step": 1792 }, { "epoch": 0.16243936243936244, "grad_norm": 0.260307639837265, "learning_rate": 4.2149512418297924e-05, "loss": 1.5052, "step": 1793 }, { "epoch": 0.162993762993763, "grad_norm": 0.2375381737947464, "learning_rate": 4.211372087783893e-05, "loss": 1.5177, "step": 1794 }, { "epoch": 0.16354816354816354, "grad_norm": 0.24120914936065674, "learning_rate": 4.2077927640217046e-05, "loss": 1.503, "step": 1795 }, { "epoch": 0.1641025641025641, "grad_norm": 0.24641527235507965, "learning_rate": 4.20421327341716e-05, "loss": 1.492, "step": 1796 }, { "epoch": 0.16465696465696467, "grad_norm": 0.22899188101291656, "learning_rate": 4.200633618844327e-05, "loss": 1.4746, "step": 1797 }, { "epoch": 0.1652113652113652, "grad_norm": 0.23603221774101257, "learning_rate": 4.1970538031774094e-05, "loss": 1.4926, "step": 1798 }, { "epoch": 0.16576576576576577, "grad_norm": 0.2547254264354706, "learning_rate": 4.1934738292907335e-05, "loss": 1.509, "step": 1799 }, { "epoch": 0.16632016632016633, "grad_norm": 0.24782003462314606, "learning_rate": 4.189893700058757e-05, "loss": 1.5429, "step": 1800 }, { "epoch": 0.16687456687456687, "grad_norm": 0.23784643411636353, "learning_rate": 4.1863134183560614e-05, "loss": 1.5129, "step": 1801 }, { "epoch": 0.16742896742896743, "grad_norm": 0.24207793176174164, "learning_rate": 4.1827329870573495e-05, "loss": 1.5007, "step": 1802 }, { "epoch": 0.167983367983368, "grad_norm": 0.24227993190288544, "learning_rate": 4.1791524090374465e-05, "loss": 1.5221, "step": 1803 }, { "epoch": 0.16853776853776853, "grad_norm": 0.24385443329811096, "learning_rate": 4.1755716871712914e-05, "loss": 1.5247, "step": 1804 }, { "epoch": 0.1690921690921691, "grad_norm": 0.229455828666687, "learning_rate": 4.1719908243339456e-05, "loss": 1.4452, "step": 1805 }, { "epoch": 0.16964656964656966, "grad_norm": 0.24333839118480682, "learning_rate": 4.168409823400575e-05, "loss": 1.4744, "step": 1806 }, { "epoch": 0.1702009702009702, "grad_norm": 0.24000468850135803, "learning_rate": 4.164828687246464e-05, "loss": 1.4976, "step": 1807 }, { "epoch": 0.17075537075537076, "grad_norm": 0.23323607444763184, "learning_rate": 4.161247418747001e-05, "loss": 1.4862, "step": 1808 }, { "epoch": 0.17130977130977132, "grad_norm": 0.25204262137413025, "learning_rate": 4.157666020777683e-05, "loss": 1.5488, "step": 1809 }, { "epoch": 0.17186417186417186, "grad_norm": 0.24729879200458527, "learning_rate": 4.154084496214107e-05, "loss": 1.4971, "step": 1810 }, { "epoch": 0.17241857241857242, "grad_norm": 0.23654821515083313, "learning_rate": 4.1505028479319804e-05, "loss": 1.5323, "step": 1811 }, { "epoch": 0.17297297297297298, "grad_norm": 0.23780423402786255, "learning_rate": 4.146921078807097e-05, "loss": 1.4631, "step": 1812 }, { "epoch": 0.17352737352737352, "grad_norm": 0.24382425844669342, "learning_rate": 4.143339191715361e-05, "loss": 1.4875, "step": 1813 }, { "epoch": 0.17408177408177408, "grad_norm": 0.2383335679769516, "learning_rate": 4.139757189532763e-05, "loss": 1.525, "step": 1814 }, { "epoch": 0.17463617463617465, "grad_norm": 0.23220306634902954, "learning_rate": 4.136175075135387e-05, "loss": 1.4611, "step": 1815 }, { "epoch": 0.17519057519057518, "grad_norm": 0.247987762093544, "learning_rate": 4.132592851399409e-05, "loss": 1.5614, "step": 1816 }, { "epoch": 0.17574497574497575, "grad_norm": 0.24563199281692505, "learning_rate": 4.129010521201092e-05, "loss": 1.5672, "step": 1817 }, { "epoch": 0.1762993762993763, "grad_norm": 0.2311255931854248, "learning_rate": 4.125428087416786e-05, "loss": 1.493, "step": 1818 }, { "epoch": 0.17685377685377685, "grad_norm": 0.24840684235095978, "learning_rate": 4.121845552922918e-05, "loss": 1.4945, "step": 1819 }, { "epoch": 0.1774081774081774, "grad_norm": 0.2520841062068939, "learning_rate": 4.1182629205960066e-05, "loss": 1.513, "step": 1820 }, { "epoch": 0.17796257796257797, "grad_norm": 0.24336931109428406, "learning_rate": 4.11468019331264e-05, "loss": 1.4973, "step": 1821 }, { "epoch": 0.1785169785169785, "grad_norm": 0.2441493719816208, "learning_rate": 4.111097373949485e-05, "loss": 1.4859, "step": 1822 }, { "epoch": 0.17907137907137907, "grad_norm": 0.2503821551799774, "learning_rate": 4.1075144653832846e-05, "loss": 1.5853, "step": 1823 }, { "epoch": 0.17962577962577964, "grad_norm": 0.24211685359477997, "learning_rate": 4.103931470490849e-05, "loss": 1.5061, "step": 1824 }, { "epoch": 0.18018018018018017, "grad_norm": 0.23480790853500366, "learning_rate": 4.100348392149064e-05, "loss": 1.453, "step": 1825 }, { "epoch": 0.18073458073458074, "grad_norm": 0.2344498336315155, "learning_rate": 4.0967652332348765e-05, "loss": 1.4802, "step": 1826 }, { "epoch": 0.1812889812889813, "grad_norm": 0.23497405648231506, "learning_rate": 4.0931819966253e-05, "loss": 1.4518, "step": 1827 }, { "epoch": 0.18184338184338183, "grad_norm": 0.2339646965265274, "learning_rate": 4.0895986851974115e-05, "loss": 1.4791, "step": 1828 }, { "epoch": 0.1823977823977824, "grad_norm": 0.24394597113132477, "learning_rate": 4.086015301828348e-05, "loss": 1.5645, "step": 1829 }, { "epoch": 0.18295218295218296, "grad_norm": 0.24641010165214539, "learning_rate": 4.082431849395304e-05, "loss": 1.5, "step": 1830 }, { "epoch": 0.1835065835065835, "grad_norm": 0.23395437002182007, "learning_rate": 4.078848330775528e-05, "loss": 1.4479, "step": 1831 }, { "epoch": 0.18406098406098406, "grad_norm": 0.23971296846866608, "learning_rate": 4.075264748846323e-05, "loss": 1.5588, "step": 1832 }, { "epoch": 0.18461538461538463, "grad_norm": 0.24130338430404663, "learning_rate": 4.071681106485042e-05, "loss": 1.4989, "step": 1833 }, { "epoch": 0.18516978516978516, "grad_norm": 0.23449750244617462, "learning_rate": 4.068097406569087e-05, "loss": 1.511, "step": 1834 }, { "epoch": 0.18572418572418573, "grad_norm": 0.23731078207492828, "learning_rate": 4.064513651975909e-05, "loss": 1.4682, "step": 1835 }, { "epoch": 0.1862785862785863, "grad_norm": 0.23681940138339996, "learning_rate": 4.0609298455829966e-05, "loss": 1.4879, "step": 1836 }, { "epoch": 0.18683298683298682, "grad_norm": 0.24243465065956116, "learning_rate": 4.0573459902678866e-05, "loss": 1.5081, "step": 1837 }, { "epoch": 0.1873873873873874, "grad_norm": 0.23885251581668854, "learning_rate": 4.053762088908148e-05, "loss": 1.5038, "step": 1838 }, { "epoch": 0.18794178794178795, "grad_norm": 0.23990903794765472, "learning_rate": 4.050178144381396e-05, "loss": 1.511, "step": 1839 }, { "epoch": 0.1884961884961885, "grad_norm": 0.23311123251914978, "learning_rate": 4.0465941595652725e-05, "loss": 1.4236, "step": 1840 }, { "epoch": 0.18905058905058905, "grad_norm": 0.24595153331756592, "learning_rate": 4.0430101373374536e-05, "loss": 1.4995, "step": 1841 }, { "epoch": 0.18960498960498962, "grad_norm": 0.2332063913345337, "learning_rate": 4.039426080575649e-05, "loss": 1.5391, "step": 1842 }, { "epoch": 0.19015939015939015, "grad_norm": 0.2442326694726944, "learning_rate": 4.03584199215759e-05, "loss": 1.4831, "step": 1843 }, { "epoch": 0.19071379071379072, "grad_norm": 0.25192317366600037, "learning_rate": 4.03225787496104e-05, "loss": 1.5815, "step": 1844 }, { "epoch": 0.19126819126819128, "grad_norm": 0.23741456866264343, "learning_rate": 4.0286737318637826e-05, "loss": 1.5164, "step": 1845 }, { "epoch": 0.19182259182259181, "grad_norm": 0.2436688095331192, "learning_rate": 4.02508956574362e-05, "loss": 1.5242, "step": 1846 }, { "epoch": 0.19237699237699238, "grad_norm": 0.24163664877414703, "learning_rate": 4.021505379478376e-05, "loss": 1.5417, "step": 1847 }, { "epoch": 0.19293139293139294, "grad_norm": 0.23189128935337067, "learning_rate": 4.017921175945887e-05, "loss": 1.3813, "step": 1848 }, { "epoch": 0.19348579348579348, "grad_norm": 0.237282857298851, "learning_rate": 4.014336958024009e-05, "loss": 1.546, "step": 1849 }, { "epoch": 0.19404019404019404, "grad_norm": 0.24780717492103577, "learning_rate": 4.010752728590604e-05, "loss": 1.5906, "step": 1850 }, { "epoch": 0.1945945945945946, "grad_norm": 0.25032544136047363, "learning_rate": 4.007168490523547e-05, "loss": 1.6188, "step": 1851 }, { "epoch": 0.19514899514899514, "grad_norm": 0.23743852972984314, "learning_rate": 4.003584246700718e-05, "loss": 1.5123, "step": 1852 }, { "epoch": 0.1957033957033957, "grad_norm": 0.2762291431427002, "learning_rate": 4e-05, "loss": 1.5657, "step": 1853 }, { "epoch": 0.19625779625779627, "grad_norm": 0.2416423261165619, "learning_rate": 3.9964157532992834e-05, "loss": 1.4887, "step": 1854 }, { "epoch": 0.1968121968121968, "grad_norm": 0.22938209772109985, "learning_rate": 3.9928315094764545e-05, "loss": 1.4747, "step": 1855 }, { "epoch": 0.19736659736659737, "grad_norm": 0.22495947778224945, "learning_rate": 3.9892472714093974e-05, "loss": 1.495, "step": 1856 }, { "epoch": 0.19792099792099793, "grad_norm": 0.23761068284511566, "learning_rate": 3.985663041975993e-05, "loss": 1.5267, "step": 1857 }, { "epoch": 0.19847539847539847, "grad_norm": 0.23502880334854126, "learning_rate": 3.9820788240541145e-05, "loss": 1.5076, "step": 1858 }, { "epoch": 0.19902979902979903, "grad_norm": 0.23836706578731537, "learning_rate": 3.978494620521627e-05, "loss": 1.537, "step": 1859 }, { "epoch": 0.1995841995841996, "grad_norm": 0.2333037257194519, "learning_rate": 3.974910434256383e-05, "loss": 1.5186, "step": 1860 }, { "epoch": 0.20013860013860013, "grad_norm": 0.24056397378444672, "learning_rate": 3.971326268136218e-05, "loss": 1.5308, "step": 1861 }, { "epoch": 0.2006930006930007, "grad_norm": 0.23831728100776672, "learning_rate": 3.96774212503896e-05, "loss": 1.4924, "step": 1862 }, { "epoch": 0.20124740124740126, "grad_norm": 0.2567371129989624, "learning_rate": 3.96415800784241e-05, "loss": 1.526, "step": 1863 }, { "epoch": 0.2018018018018018, "grad_norm": 0.2384667843580246, "learning_rate": 3.960573919424353e-05, "loss": 1.4853, "step": 1864 }, { "epoch": 0.20235620235620236, "grad_norm": 0.24428196251392365, "learning_rate": 3.956989862662548e-05, "loss": 1.5021, "step": 1865 }, { "epoch": 0.20291060291060292, "grad_norm": 0.23921051621437073, "learning_rate": 3.953405840434729e-05, "loss": 1.4594, "step": 1866 }, { "epoch": 0.20346500346500346, "grad_norm": 0.2328265905380249, "learning_rate": 3.9498218556186055e-05, "loss": 1.4605, "step": 1867 }, { "epoch": 0.20401940401940402, "grad_norm": 0.23491501808166504, "learning_rate": 3.946237911091852e-05, "loss": 1.5453, "step": 1868 }, { "epoch": 0.20457380457380459, "grad_norm": 0.2391657829284668, "learning_rate": 3.9426540097321154e-05, "loss": 1.5038, "step": 1869 }, { "epoch": 0.20512820512820512, "grad_norm": 0.22968065738677979, "learning_rate": 3.939070154417005e-05, "loss": 1.5481, "step": 1870 }, { "epoch": 0.20568260568260568, "grad_norm": 0.25142428278923035, "learning_rate": 3.9354863480240925e-05, "loss": 1.561, "step": 1871 }, { "epoch": 0.20623700623700625, "grad_norm": 0.24254752695560455, "learning_rate": 3.931902593430914e-05, "loss": 1.5635, "step": 1872 }, { "epoch": 0.20679140679140678, "grad_norm": 0.24585455656051636, "learning_rate": 3.92831889351496e-05, "loss": 1.4705, "step": 1873 }, { "epoch": 0.20734580734580735, "grad_norm": 0.2334129512310028, "learning_rate": 3.9247352511536785e-05, "loss": 1.5409, "step": 1874 }, { "epoch": 0.2079002079002079, "grad_norm": 0.24198950827121735, "learning_rate": 3.921151669224474e-05, "loss": 1.5238, "step": 1875 }, { "epoch": 0.20845460845460845, "grad_norm": 0.2565530240535736, "learning_rate": 3.9175681506046966e-05, "loss": 1.546, "step": 1876 }, { "epoch": 0.209009009009009, "grad_norm": 0.24233520030975342, "learning_rate": 3.913984698171652e-05, "loss": 1.4994, "step": 1877 }, { "epoch": 0.20956340956340958, "grad_norm": 0.2374197095632553, "learning_rate": 3.910401314802588e-05, "loss": 1.5062, "step": 1878 }, { "epoch": 0.2101178101178101, "grad_norm": 0.24045586585998535, "learning_rate": 3.906818003374701e-05, "loss": 1.4875, "step": 1879 }, { "epoch": 0.21067221067221067, "grad_norm": 0.24725095927715302, "learning_rate": 3.903234766765125e-05, "loss": 1.5095, "step": 1880 }, { "epoch": 0.21122661122661124, "grad_norm": 0.24202238023281097, "learning_rate": 3.899651607850937e-05, "loss": 1.483, "step": 1881 }, { "epoch": 0.21178101178101177, "grad_norm": 0.23895207047462463, "learning_rate": 3.8960685295091514e-05, "loss": 1.5884, "step": 1882 }, { "epoch": 0.21233541233541234, "grad_norm": 0.24083030223846436, "learning_rate": 3.892485534616717e-05, "loss": 1.4398, "step": 1883 }, { "epoch": 0.2128898128898129, "grad_norm": 0.24503645300865173, "learning_rate": 3.8889026260505163e-05, "loss": 1.4255, "step": 1884 }, { "epoch": 0.21344421344421344, "grad_norm": 0.243561789393425, "learning_rate": 3.8853198066873614e-05, "loss": 1.5023, "step": 1885 }, { "epoch": 0.213998613998614, "grad_norm": 0.25789567828178406, "learning_rate": 3.881737079403994e-05, "loss": 1.4915, "step": 1886 }, { "epoch": 0.21455301455301456, "grad_norm": 0.2317241132259369, "learning_rate": 3.878154447077083e-05, "loss": 1.4411, "step": 1887 }, { "epoch": 0.2151074151074151, "grad_norm": 0.24554963409900665, "learning_rate": 3.874571912583217e-05, "loss": 1.4999, "step": 1888 }, { "epoch": 0.21566181566181566, "grad_norm": 0.25356563925743103, "learning_rate": 3.8709894787989096e-05, "loss": 1.5121, "step": 1889 }, { "epoch": 0.21621621621621623, "grad_norm": 0.2475980818271637, "learning_rate": 3.867407148600593e-05, "loss": 1.5479, "step": 1890 }, { "epoch": 0.21677061677061676, "grad_norm": 0.2546240985393524, "learning_rate": 3.863824924864615e-05, "loss": 1.5202, "step": 1891 }, { "epoch": 0.21732501732501733, "grad_norm": 0.2595331072807312, "learning_rate": 3.860242810467238e-05, "loss": 1.5717, "step": 1892 }, { "epoch": 0.2178794178794179, "grad_norm": 0.23848137259483337, "learning_rate": 3.856660808284639e-05, "loss": 1.5155, "step": 1893 }, { "epoch": 0.21843381843381843, "grad_norm": 0.2522137463092804, "learning_rate": 3.853078921192903e-05, "loss": 1.4841, "step": 1894 }, { "epoch": 0.218988218988219, "grad_norm": 0.2507745027542114, "learning_rate": 3.849497152068021e-05, "loss": 1.5134, "step": 1895 }, { "epoch": 0.21954261954261955, "grad_norm": 0.2380571812391281, "learning_rate": 3.845915503785893e-05, "loss": 1.4629, "step": 1896 }, { "epoch": 0.2200970200970201, "grad_norm": 0.2607712745666504, "learning_rate": 3.842333979222319e-05, "loss": 1.5857, "step": 1897 }, { "epoch": 0.22065142065142065, "grad_norm": 0.23979657888412476, "learning_rate": 3.8387525812529995e-05, "loss": 1.4991, "step": 1898 }, { "epoch": 0.22120582120582122, "grad_norm": 0.2478070855140686, "learning_rate": 3.8351713127535375e-05, "loss": 1.5575, "step": 1899 }, { "epoch": 0.22176022176022175, "grad_norm": 0.27279654145240784, "learning_rate": 3.831590176599426e-05, "loss": 1.5349, "step": 1900 }, { "epoch": 0.22231462231462232, "grad_norm": 0.2409866750240326, "learning_rate": 3.828009175666056e-05, "loss": 1.5778, "step": 1901 }, { "epoch": 0.22286902286902288, "grad_norm": 0.26726746559143066, "learning_rate": 3.824428312828709e-05, "loss": 1.5323, "step": 1902 }, { "epoch": 0.22342342342342342, "grad_norm": 0.2427883893251419, "learning_rate": 3.8208475909625555e-05, "loss": 1.4914, "step": 1903 }, { "epoch": 0.22397782397782398, "grad_norm": 0.25623229146003723, "learning_rate": 3.8172670129426525e-05, "loss": 1.529, "step": 1904 }, { "epoch": 0.22453222453222454, "grad_norm": 0.24120859801769257, "learning_rate": 3.8136865816439406e-05, "loss": 1.5035, "step": 1905 }, { "epoch": 0.22508662508662508, "grad_norm": 0.23750057816505432, "learning_rate": 3.810106299941244e-05, "loss": 1.4797, "step": 1906 }, { "epoch": 0.22564102564102564, "grad_norm": 0.231480672955513, "learning_rate": 3.8065261707092665e-05, "loss": 1.4404, "step": 1907 }, { "epoch": 0.2261954261954262, "grad_norm": 0.24299313127994537, "learning_rate": 3.802946196822591e-05, "loss": 1.4796, "step": 1908 }, { "epoch": 0.22674982674982674, "grad_norm": 0.2305423468351364, "learning_rate": 3.799366381155673e-05, "loss": 1.4326, "step": 1909 }, { "epoch": 0.2273042273042273, "grad_norm": 0.2506888806819916, "learning_rate": 3.795786726582841e-05, "loss": 1.5478, "step": 1910 }, { "epoch": 0.22785862785862787, "grad_norm": 0.240033358335495, "learning_rate": 3.792207235978297e-05, "loss": 1.5263, "step": 1911 }, { "epoch": 0.2284130284130284, "grad_norm": 0.2436107099056244, "learning_rate": 3.7886279122161076e-05, "loss": 1.4648, "step": 1912 }, { "epoch": 0.22896742896742897, "grad_norm": 0.25216901302337646, "learning_rate": 3.785048758170209e-05, "loss": 1.5847, "step": 1913 }, { "epoch": 0.22952182952182953, "grad_norm": 0.27026569843292236, "learning_rate": 3.7814697767143985e-05, "loss": 1.5749, "step": 1914 }, { "epoch": 0.23007623007623007, "grad_norm": 0.23860034346580505, "learning_rate": 3.7778909707223366e-05, "loss": 1.5104, "step": 1915 }, { "epoch": 0.23063063063063063, "grad_norm": 0.24010181427001953, "learning_rate": 3.774312343067542e-05, "loss": 1.4519, "step": 1916 }, { "epoch": 0.2311850311850312, "grad_norm": 0.245852991938591, "learning_rate": 3.770733896623389e-05, "loss": 1.4838, "step": 1917 }, { "epoch": 0.23173943173943173, "grad_norm": 0.236805722117424, "learning_rate": 3.7671556342631095e-05, "loss": 1.4808, "step": 1918 }, { "epoch": 0.2322938322938323, "grad_norm": 0.23129390180110931, "learning_rate": 3.763577558859785e-05, "loss": 1.4882, "step": 1919 }, { "epoch": 0.23284823284823286, "grad_norm": 0.23539075255393982, "learning_rate": 3.759999673286348e-05, "loss": 1.4092, "step": 1920 }, { "epoch": 0.2334026334026334, "grad_norm": 0.24532531201839447, "learning_rate": 3.7564219804155774e-05, "loss": 1.5785, "step": 1921 }, { "epoch": 0.23395703395703396, "grad_norm": 0.23199212551116943, "learning_rate": 3.7528444831200985e-05, "loss": 1.4577, "step": 1922 }, { "epoch": 0.23451143451143452, "grad_norm": 0.2398049533367157, "learning_rate": 3.7492671842723815e-05, "loss": 1.4724, "step": 1923 }, { "epoch": 0.23506583506583506, "grad_norm": 0.2403053641319275, "learning_rate": 3.7456900867447326e-05, "loss": 1.4667, "step": 1924 }, { "epoch": 0.23562023562023562, "grad_norm": 0.2339790314435959, "learning_rate": 3.7421131934093004e-05, "loss": 1.5378, "step": 1925 }, { "epoch": 0.2361746361746362, "grad_norm": 0.23639380931854248, "learning_rate": 3.738536507138068e-05, "loss": 1.473, "step": 1926 }, { "epoch": 0.23672903672903672, "grad_norm": 0.2434394806623459, "learning_rate": 3.7349600308028504e-05, "loss": 1.488, "step": 1927 }, { "epoch": 0.2372834372834373, "grad_norm": 0.236919105052948, "learning_rate": 3.7313837672752974e-05, "loss": 1.451, "step": 1928 }, { "epoch": 0.23783783783783785, "grad_norm": 0.23392297327518463, "learning_rate": 3.727807719426887e-05, "loss": 1.4536, "step": 1929 }, { "epoch": 0.2383922383922384, "grad_norm": 0.23652836680412292, "learning_rate": 3.724231890128922e-05, "loss": 1.5359, "step": 1930 }, { "epoch": 0.23894663894663895, "grad_norm": 0.2469359040260315, "learning_rate": 3.720656282252533e-05, "loss": 1.4845, "step": 1931 }, { "epoch": 0.23950103950103951, "grad_norm": 0.23652082681655884, "learning_rate": 3.717080898668668e-05, "loss": 1.4706, "step": 1932 }, { "epoch": 0.24005544005544005, "grad_norm": 0.23702210187911987, "learning_rate": 3.7135057422481005e-05, "loss": 1.487, "step": 1933 }, { "epoch": 0.24060984060984061, "grad_norm": 0.23361067473888397, "learning_rate": 3.7099308158614184e-05, "loss": 1.4948, "step": 1934 }, { "epoch": 0.24116424116424118, "grad_norm": 0.2416001707315445, "learning_rate": 3.706356122379025e-05, "loss": 1.5821, "step": 1935 }, { "epoch": 0.2417186417186417, "grad_norm": 0.24221090972423553, "learning_rate": 3.702781664671138e-05, "loss": 1.5382, "step": 1936 }, { "epoch": 0.24227304227304228, "grad_norm": 0.24278214573860168, "learning_rate": 3.699207445607781e-05, "loss": 1.4853, "step": 1937 }, { "epoch": 0.24282744282744284, "grad_norm": 0.23186738789081573, "learning_rate": 3.695633468058797e-05, "loss": 1.5022, "step": 1938 }, { "epoch": 0.24338184338184338, "grad_norm": 0.2328137904405594, "learning_rate": 3.692059734893822e-05, "loss": 1.4733, "step": 1939 }, { "epoch": 0.24393624393624394, "grad_norm": 0.23375138640403748, "learning_rate": 3.688486248982305e-05, "loss": 1.4468, "step": 1940 }, { "epoch": 0.2444906444906445, "grad_norm": 0.2423093169927597, "learning_rate": 3.6849130131934924e-05, "loss": 1.5267, "step": 1941 }, { "epoch": 0.24504504504504504, "grad_norm": 0.23785637319087982, "learning_rate": 3.6813400303964294e-05, "loss": 1.5279, "step": 1942 }, { "epoch": 0.2455994455994456, "grad_norm": 0.24158895015716553, "learning_rate": 3.677767303459961e-05, "loss": 1.54, "step": 1943 }, { "epoch": 0.24615384615384617, "grad_norm": 0.24916300177574158, "learning_rate": 3.674194835252723e-05, "loss": 1.5099, "step": 1944 }, { "epoch": 0.2467082467082467, "grad_norm": 0.2550554871559143, "learning_rate": 3.6706226286431464e-05, "loss": 1.5847, "step": 1945 }, { "epoch": 0.24726264726264727, "grad_norm": 0.26122114062309265, "learning_rate": 3.667050686499452e-05, "loss": 1.5373, "step": 1946 }, { "epoch": 0.24781704781704783, "grad_norm": 0.26247137784957886, "learning_rate": 3.6634790116896465e-05, "loss": 1.5201, "step": 1947 }, { "epoch": 0.24837144837144837, "grad_norm": 0.2423628717660904, "learning_rate": 3.659907607081523e-05, "loss": 1.472, "step": 1948 }, { "epoch": 0.24892584892584893, "grad_norm": 0.2361409217119217, "learning_rate": 3.656336475542658e-05, "loss": 1.4643, "step": 1949 }, { "epoch": 0.2494802494802495, "grad_norm": 0.2555186450481415, "learning_rate": 3.6527656199404084e-05, "loss": 1.5888, "step": 1950 }, { "epoch": 0.25003465003465003, "grad_norm": 0.2588319480419159, "learning_rate": 3.649195043141909e-05, "loss": 1.5218, "step": 1951 }, { "epoch": 0.25058905058905057, "grad_norm": 0.243848979473114, "learning_rate": 3.6456247480140715e-05, "loss": 1.5381, "step": 1952 }, { "epoch": 0.25114345114345116, "grad_norm": 0.2595714032649994, "learning_rate": 3.6420547374235795e-05, "loss": 1.6138, "step": 1953 }, { "epoch": 0.2516978516978517, "grad_norm": 0.2524738311767578, "learning_rate": 3.638485014236892e-05, "loss": 1.4773, "step": 1954 }, { "epoch": 0.25225225225225223, "grad_norm": 0.23948527872562408, "learning_rate": 3.6349155813202354e-05, "loss": 1.5014, "step": 1955 }, { "epoch": 0.2528066528066528, "grad_norm": 0.23268122971057892, "learning_rate": 3.631346441539601e-05, "loss": 1.4811, "step": 1956 }, { "epoch": 0.25336105336105336, "grad_norm": 0.2428063005208969, "learning_rate": 3.627777597760746e-05, "loss": 1.5381, "step": 1957 }, { "epoch": 0.2539154539154539, "grad_norm": 0.2317744791507721, "learning_rate": 3.6242090528491926e-05, "loss": 1.4628, "step": 1958 }, { "epoch": 0.2544698544698545, "grad_norm": 0.23907609283924103, "learning_rate": 3.620640809670219e-05, "loss": 1.5911, "step": 1959 }, { "epoch": 0.255024255024255, "grad_norm": 0.23972199857234955, "learning_rate": 3.617072871088862e-05, "loss": 1.5331, "step": 1960 }, { "epoch": 0.25557865557865556, "grad_norm": 0.24425937235355377, "learning_rate": 3.613505239969918e-05, "loss": 1.4959, "step": 1961 }, { "epoch": 0.25613305613305615, "grad_norm": 0.2344840168952942, "learning_rate": 3.6099379191779296e-05, "loss": 1.4759, "step": 1962 }, { "epoch": 0.2566874566874567, "grad_norm": 0.24837380647659302, "learning_rate": 3.6063709115771965e-05, "loss": 1.507, "step": 1963 }, { "epoch": 0.2572418572418572, "grad_norm": 0.24232055246829987, "learning_rate": 3.602804220031762e-05, "loss": 1.4734, "step": 1964 }, { "epoch": 0.2577962577962578, "grad_norm": 0.23468682169914246, "learning_rate": 3.59923784740542e-05, "loss": 1.4937, "step": 1965 }, { "epoch": 0.25835065835065835, "grad_norm": 0.24601204693317413, "learning_rate": 3.595671796561706e-05, "loss": 1.5605, "step": 1966 }, { "epoch": 0.2589050589050589, "grad_norm": 0.24206316471099854, "learning_rate": 3.592106070363896e-05, "loss": 1.4341, "step": 1967 }, { "epoch": 0.2594594594594595, "grad_norm": 0.2431585192680359, "learning_rate": 3.588540671675006e-05, "loss": 1.4281, "step": 1968 }, { "epoch": 0.26001386001386, "grad_norm": 0.24677950143814087, "learning_rate": 3.584975603357793e-05, "loss": 1.5395, "step": 1969 }, { "epoch": 0.26056826056826055, "grad_norm": 0.2448202520608902, "learning_rate": 3.581410868274744e-05, "loss": 1.4758, "step": 1970 }, { "epoch": 0.26112266112266114, "grad_norm": 0.24564029276371002, "learning_rate": 3.577846469288079e-05, "loss": 1.5062, "step": 1971 }, { "epoch": 0.2616770616770617, "grad_norm": 0.2437591552734375, "learning_rate": 3.57428240925975e-05, "loss": 1.5241, "step": 1972 }, { "epoch": 0.2622314622314622, "grad_norm": 0.24159181118011475, "learning_rate": 3.5707186910514364e-05, "loss": 1.5628, "step": 1973 }, { "epoch": 0.2627858627858628, "grad_norm": 0.2343732714653015, "learning_rate": 3.5671553175245415e-05, "loss": 1.4952, "step": 1974 }, { "epoch": 0.26334026334026334, "grad_norm": 0.23274484276771545, "learning_rate": 3.563592291540193e-05, "loss": 1.4688, "step": 1975 }, { "epoch": 0.2638946638946639, "grad_norm": 0.24280990660190582, "learning_rate": 3.5600296159592406e-05, "loss": 1.4843, "step": 1976 }, { "epoch": 0.26444906444906446, "grad_norm": 0.23763741552829742, "learning_rate": 3.55646729364225e-05, "loss": 1.5105, "step": 1977 }, { "epoch": 0.265003465003465, "grad_norm": 0.23756805062294006, "learning_rate": 3.552905327449508e-05, "loss": 1.4849, "step": 1978 }, { "epoch": 0.26555786555786554, "grad_norm": 0.26990193128585815, "learning_rate": 3.549343720241008e-05, "loss": 1.5256, "step": 1979 }, { "epoch": 0.2661122661122661, "grad_norm": 0.247808039188385, "learning_rate": 3.545782474876462e-05, "loss": 1.5549, "step": 1980 }, { "epoch": 0.26666666666666666, "grad_norm": 0.26396945118904114, "learning_rate": 3.54222159421529e-05, "loss": 1.594, "step": 1981 }, { "epoch": 0.2672210672210672, "grad_norm": 0.24312561750411987, "learning_rate": 3.538661081116615e-05, "loss": 1.5494, "step": 1982 }, { "epoch": 0.2677754677754678, "grad_norm": 0.2550090253353119, "learning_rate": 3.535100938439269e-05, "loss": 1.5713, "step": 1983 }, { "epoch": 0.2683298683298683, "grad_norm": 0.23545357584953308, "learning_rate": 3.531541169041787e-05, "loss": 1.4983, "step": 1984 }, { "epoch": 0.26888426888426886, "grad_norm": 0.2458602339029312, "learning_rate": 3.5279817757824034e-05, "loss": 1.5139, "step": 1985 }, { "epoch": 0.26943866943866945, "grad_norm": 0.2540222108364105, "learning_rate": 3.524422761519048e-05, "loss": 1.5418, "step": 1986 }, { "epoch": 0.26999306999307, "grad_norm": 0.24538208544254303, "learning_rate": 3.520864129109349e-05, "loss": 1.5297, "step": 1987 }, { "epoch": 0.2705474705474705, "grad_norm": 0.24229013919830322, "learning_rate": 3.517305881410628e-05, "loss": 1.5313, "step": 1988 }, { "epoch": 0.2711018711018711, "grad_norm": 0.24148136377334595, "learning_rate": 3.513748021279895e-05, "loss": 1.512, "step": 1989 }, { "epoch": 0.27165627165627165, "grad_norm": 0.24261786043643951, "learning_rate": 3.510190551573854e-05, "loss": 1.4891, "step": 1990 }, { "epoch": 0.2722106722106722, "grad_norm": 0.24065755307674408, "learning_rate": 3.506633475148889e-05, "loss": 1.4829, "step": 1991 }, { "epoch": 0.2727650727650728, "grad_norm": 0.2447405904531479, "learning_rate": 3.503076794861074e-05, "loss": 1.4901, "step": 1992 }, { "epoch": 0.2733194733194733, "grad_norm": 0.23551733791828156, "learning_rate": 3.4995205135661624e-05, "loss": 1.4642, "step": 1993 }, { "epoch": 0.27387387387387385, "grad_norm": 0.23185968399047852, "learning_rate": 3.495964634119585e-05, "loss": 1.4213, "step": 1994 }, { "epoch": 0.27442827442827444, "grad_norm": 0.24040478467941284, "learning_rate": 3.492409159376456e-05, "loss": 1.4849, "step": 1995 }, { "epoch": 0.274982674982675, "grad_norm": 0.2485688328742981, "learning_rate": 3.488854092191558e-05, "loss": 1.5238, "step": 1996 }, { "epoch": 0.2755370755370755, "grad_norm": 0.2541649043560028, "learning_rate": 3.48529943541935e-05, "loss": 1.5055, "step": 1997 }, { "epoch": 0.2760914760914761, "grad_norm": 0.2477186918258667, "learning_rate": 3.48174519191396e-05, "loss": 1.5169, "step": 1998 }, { "epoch": 0.27664587664587664, "grad_norm": 0.23662298917770386, "learning_rate": 3.4781913645291866e-05, "loss": 1.3729, "step": 1999 }, { "epoch": 0.2772002772002772, "grad_norm": 0.24513690173625946, "learning_rate": 3.4746379561184934e-05, "loss": 1.4822, "step": 2000 }, { "epoch": 0.27775467775467777, "grad_norm": 0.2447725236415863, "learning_rate": 3.471084969535005e-05, "loss": 1.5268, "step": 2001 }, { "epoch": 0.2783090783090783, "grad_norm": 0.25396421551704407, "learning_rate": 3.46753240763151e-05, "loss": 1.5646, "step": 2002 }, { "epoch": 0.27886347886347884, "grad_norm": 0.23695114254951477, "learning_rate": 3.463980273260454e-05, "loss": 1.5055, "step": 2003 }, { "epoch": 0.27941787941787943, "grad_norm": 0.24153874814510345, "learning_rate": 3.460428569273942e-05, "loss": 1.5451, "step": 2004 }, { "epoch": 0.27997227997227997, "grad_norm": 0.24928006529808044, "learning_rate": 3.456877298523731e-05, "loss": 1.5748, "step": 2005 }, { "epoch": 0.2805266805266805, "grad_norm": 0.23034846782684326, "learning_rate": 3.453326463861231e-05, "loss": 1.4877, "step": 2006 }, { "epoch": 0.2810810810810811, "grad_norm": 0.24618618190288544, "learning_rate": 3.449776068137503e-05, "loss": 1.5352, "step": 2007 }, { "epoch": 0.28163548163548163, "grad_norm": 0.24578626453876495, "learning_rate": 3.446226114203253e-05, "loss": 1.5399, "step": 2008 }, { "epoch": 0.28218988218988217, "grad_norm": 0.2477465569972992, "learning_rate": 3.442676604908834e-05, "loss": 1.5671, "step": 2009 }, { "epoch": 0.28274428274428276, "grad_norm": 0.2803698778152466, "learning_rate": 3.439127543104243e-05, "loss": 1.5987, "step": 2010 }, { "epoch": 0.2832986832986833, "grad_norm": 0.2432841658592224, "learning_rate": 3.435578931639115e-05, "loss": 1.5266, "step": 2011 }, { "epoch": 0.28385308385308383, "grad_norm": 0.2531413435935974, "learning_rate": 3.432030773362725e-05, "loss": 1.431, "step": 2012 }, { "epoch": 0.2844074844074844, "grad_norm": 0.23896397650241852, "learning_rate": 3.428483071123986e-05, "loss": 1.4389, "step": 2013 }, { "epoch": 0.28496188496188496, "grad_norm": 0.24649015069007874, "learning_rate": 3.424935827771439e-05, "loss": 1.5552, "step": 2014 }, { "epoch": 0.2855162855162855, "grad_norm": 0.24773798882961273, "learning_rate": 3.4213890461532634e-05, "loss": 1.4448, "step": 2015 }, { "epoch": 0.2860706860706861, "grad_norm": 0.24518926441669464, "learning_rate": 3.417842729117264e-05, "loss": 1.4916, "step": 2016 }, { "epoch": 0.2866250866250866, "grad_norm": 0.24006028473377228, "learning_rate": 3.414296879510875e-05, "loss": 1.5036, "step": 2017 }, { "epoch": 0.28717948717948716, "grad_norm": 0.2587105929851532, "learning_rate": 3.4107515001811505e-05, "loss": 1.539, "step": 2018 }, { "epoch": 0.28773388773388775, "grad_norm": 0.24387259781360626, "learning_rate": 3.4072065939747716e-05, "loss": 1.5217, "step": 2019 }, { "epoch": 0.2882882882882883, "grad_norm": 0.2412419468164444, "learning_rate": 3.403662163738039e-05, "loss": 1.5463, "step": 2020 }, { "epoch": 0.2888426888426888, "grad_norm": 0.25685346126556396, "learning_rate": 3.4001182123168694e-05, "loss": 1.5457, "step": 2021 }, { "epoch": 0.2893970893970894, "grad_norm": 0.23482085764408112, "learning_rate": 3.396574742556796e-05, "loss": 1.4328, "step": 2022 }, { "epoch": 0.28995148995148995, "grad_norm": 0.23975978791713715, "learning_rate": 3.393031757302965e-05, "loss": 1.501, "step": 2023 }, { "epoch": 0.2905058905058905, "grad_norm": 0.24331505596637726, "learning_rate": 3.389489259400134e-05, "loss": 1.4572, "step": 2024 }, { "epoch": 0.2910602910602911, "grad_norm": 0.23449477553367615, "learning_rate": 3.3859472516926705e-05, "loss": 1.4669, "step": 2025 }, { "epoch": 0.2916146916146916, "grad_norm": 0.2451181858778, "learning_rate": 3.382405737024545e-05, "loss": 1.4681, "step": 2026 }, { "epoch": 0.29216909216909215, "grad_norm": 0.23755547404289246, "learning_rate": 3.378864718239334e-05, "loss": 1.4756, "step": 2027 }, { "epoch": 0.29272349272349274, "grad_norm": 0.2354809194803238, "learning_rate": 3.375324198180219e-05, "loss": 1.4428, "step": 2028 }, { "epoch": 0.2932778932778933, "grad_norm": 0.23908185958862305, "learning_rate": 3.371784179689973e-05, "loss": 1.5289, "step": 2029 }, { "epoch": 0.2938322938322938, "grad_norm": 0.23663519322872162, "learning_rate": 3.3682446656109756e-05, "loss": 1.5233, "step": 2030 }, { "epoch": 0.2943866943866944, "grad_norm": 0.22898779809474945, "learning_rate": 3.364705658785196e-05, "loss": 1.4469, "step": 2031 }, { "epoch": 0.29494109494109494, "grad_norm": 0.2507427930831909, "learning_rate": 3.361167162054198e-05, "loss": 1.4469, "step": 2032 }, { "epoch": 0.2954954954954955, "grad_norm": 0.23878999054431915, "learning_rate": 3.3576291782591325e-05, "loss": 1.5035, "step": 2033 }, { "epoch": 0.29604989604989607, "grad_norm": 0.25780054926872253, "learning_rate": 3.3540917102407445e-05, "loss": 1.4904, "step": 2034 }, { "epoch": 0.2966042966042966, "grad_norm": 0.24974551796913147, "learning_rate": 3.350554760839358e-05, "loss": 1.5542, "step": 2035 }, { "epoch": 0.29715869715869714, "grad_norm": 0.23620545864105225, "learning_rate": 3.3470183328948854e-05, "loss": 1.4665, "step": 2036 }, { "epoch": 0.29771309771309773, "grad_norm": 0.24705183506011963, "learning_rate": 3.343482429246821e-05, "loss": 1.529, "step": 2037 }, { "epoch": 0.29826749826749827, "grad_norm": 0.24671830236911774, "learning_rate": 3.339947052734233e-05, "loss": 1.5133, "step": 2038 }, { "epoch": 0.2988218988218988, "grad_norm": 0.24683672189712524, "learning_rate": 3.336412206195772e-05, "loss": 1.4926, "step": 2039 }, { "epoch": 0.2993762993762994, "grad_norm": 0.24571949243545532, "learning_rate": 3.3328778924696604e-05, "loss": 1.527, "step": 2040 }, { "epoch": 0.29993069993069993, "grad_norm": 0.23397287726402283, "learning_rate": 3.329344114393691e-05, "loss": 1.4393, "step": 2041 }, { "epoch": 0.30048510048510046, "grad_norm": 0.2449628859758377, "learning_rate": 3.325810874805231e-05, "loss": 1.5467, "step": 2042 }, { "epoch": 0.30103950103950106, "grad_norm": 0.2481212466955185, "learning_rate": 3.3222781765412105e-05, "loss": 1.5045, "step": 2043 }, { "epoch": 0.3015939015939016, "grad_norm": 0.249929741024971, "learning_rate": 3.318746022438128e-05, "loss": 1.5549, "step": 2044 }, { "epoch": 0.30214830214830213, "grad_norm": 0.25751230120658875, "learning_rate": 3.315214415332045e-05, "loss": 1.6122, "step": 2045 }, { "epoch": 0.3027027027027027, "grad_norm": 0.2472805380821228, "learning_rate": 3.311683358058584e-05, "loss": 1.5257, "step": 2046 }, { "epoch": 0.30325710325710326, "grad_norm": 0.23646825551986694, "learning_rate": 3.308152853452923e-05, "loss": 1.4976, "step": 2047 }, { "epoch": 0.3038115038115038, "grad_norm": 0.24284186959266663, "learning_rate": 3.304622904349799e-05, "loss": 1.5001, "step": 2048 }, { "epoch": 0.3043659043659044, "grad_norm": 0.2357424944639206, "learning_rate": 3.301093513583504e-05, "loss": 1.49, "step": 2049 }, { "epoch": 0.3049203049203049, "grad_norm": 0.25159215927124023, "learning_rate": 3.297564683987877e-05, "loss": 1.4943, "step": 2050 }, { "epoch": 0.30547470547470545, "grad_norm": 0.25652387738227844, "learning_rate": 3.294036418396311e-05, "loss": 1.6062, "step": 2051 }, { "epoch": 0.30602910602910605, "grad_norm": 0.24441581964492798, "learning_rate": 3.2905087196417456e-05, "loss": 1.5322, "step": 2052 }, { "epoch": 0.3065835065835066, "grad_norm": 0.24489133059978485, "learning_rate": 3.2869815905566626e-05, "loss": 1.5035, "step": 2053 }, { "epoch": 0.3071379071379071, "grad_norm": 0.23649930953979492, "learning_rate": 3.28345503397309e-05, "loss": 1.5086, "step": 2054 }, { "epoch": 0.3076923076923077, "grad_norm": 0.25667867064476013, "learning_rate": 3.2799290527225916e-05, "loss": 1.5664, "step": 2055 }, { "epoch": 0.30824670824670825, "grad_norm": 0.2522377371788025, "learning_rate": 3.2764036496362735e-05, "loss": 1.5255, "step": 2056 }, { "epoch": 0.3088011088011088, "grad_norm": 0.2377931773662567, "learning_rate": 3.272878827544777e-05, "loss": 1.4683, "step": 2057 }, { "epoch": 0.3093555093555094, "grad_norm": 0.24453473091125488, "learning_rate": 3.2693545892782726e-05, "loss": 1.5261, "step": 2058 }, { "epoch": 0.3099099099099099, "grad_norm": 0.2343941032886505, "learning_rate": 3.2658309376664666e-05, "loss": 1.4635, "step": 2059 }, { "epoch": 0.31046431046431044, "grad_norm": 0.24524693191051483, "learning_rate": 3.262307875538594e-05, "loss": 1.5096, "step": 2060 }, { "epoch": 0.31101871101871104, "grad_norm": 0.2511862516403198, "learning_rate": 3.2587854057234166e-05, "loss": 1.5723, "step": 2061 }, { "epoch": 0.31157311157311157, "grad_norm": 0.243792325258255, "learning_rate": 3.2552635310492164e-05, "loss": 1.5323, "step": 2062 }, { "epoch": 0.3121275121275121, "grad_norm": 0.24465428292751312, "learning_rate": 3.251742254343802e-05, "loss": 1.526, "step": 2063 }, { "epoch": 0.3126819126819127, "grad_norm": 0.24928951263427734, "learning_rate": 3.248221578434501e-05, "loss": 1.5422, "step": 2064 }, { "epoch": 0.31323631323631324, "grad_norm": 0.2402416169643402, "learning_rate": 3.244701506148156e-05, "loss": 1.4727, "step": 2065 }, { "epoch": 0.31379071379071377, "grad_norm": 0.2531827688217163, "learning_rate": 3.241182040311129e-05, "loss": 1.5447, "step": 2066 }, { "epoch": 0.31434511434511436, "grad_norm": 0.24015752971172333, "learning_rate": 3.2376631837492926e-05, "loss": 1.4873, "step": 2067 }, { "epoch": 0.3148995148995149, "grad_norm": 0.2419661581516266, "learning_rate": 3.23414493928803e-05, "loss": 1.5196, "step": 2068 }, { "epoch": 0.31545391545391543, "grad_norm": 0.25649067759513855, "learning_rate": 3.230627309752234e-05, "loss": 1.5897, "step": 2069 }, { "epoch": 0.316008316008316, "grad_norm": 0.23616193234920502, "learning_rate": 3.227110297966303e-05, "loss": 1.4572, "step": 2070 }, { "epoch": 0.31656271656271656, "grad_norm": 0.24475052952766418, "learning_rate": 3.223593906754139e-05, "loss": 1.5197, "step": 2071 }, { "epoch": 0.3171171171171171, "grad_norm": 0.24402351677417755, "learning_rate": 3.220078138939149e-05, "loss": 1.4853, "step": 2072 }, { "epoch": 0.3176715176715177, "grad_norm": 0.24022163450717926, "learning_rate": 3.216562997344234e-05, "loss": 1.4634, "step": 2073 }, { "epoch": 0.3182259182259182, "grad_norm": 0.24532344937324524, "learning_rate": 3.213048484791796e-05, "loss": 1.4937, "step": 2074 }, { "epoch": 0.31878031878031876, "grad_norm": 0.24136970937252045, "learning_rate": 3.2095346041037286e-05, "loss": 1.5123, "step": 2075 }, { "epoch": 0.31933471933471935, "grad_norm": 0.2419130802154541, "learning_rate": 3.206021358101426e-05, "loss": 1.5302, "step": 2076 }, { "epoch": 0.3198891198891199, "grad_norm": 0.2426813393831253, "learning_rate": 3.202508749605762e-05, "loss": 1.5524, "step": 2077 }, { "epoch": 0.3204435204435204, "grad_norm": 0.24929314851760864, "learning_rate": 3.198996781437106e-05, "loss": 1.515, "step": 2078 }, { "epoch": 0.320997920997921, "grad_norm": 0.2438211888074875, "learning_rate": 3.195485456415311e-05, "loss": 1.5266, "step": 2079 }, { "epoch": 0.32155232155232155, "grad_norm": 0.24355317652225494, "learning_rate": 3.191974777359712e-05, "loss": 1.6029, "step": 2080 }, { "epoch": 0.3221067221067221, "grad_norm": 0.2340444177389145, "learning_rate": 3.1884647470891296e-05, "loss": 1.4692, "step": 2081 }, { "epoch": 0.3226611226611227, "grad_norm": 0.23865939676761627, "learning_rate": 3.1849553684218584e-05, "loss": 1.4269, "step": 2082 }, { "epoch": 0.3232155232155232, "grad_norm": 0.2349475771188736, "learning_rate": 3.181446644175672e-05, "loss": 1.4979, "step": 2083 }, { "epoch": 0.32376992376992375, "grad_norm": 0.24846778810024261, "learning_rate": 3.177938577167821e-05, "loss": 1.5707, "step": 2084 }, { "epoch": 0.32432432432432434, "grad_norm": 0.24489082396030426, "learning_rate": 3.1744311702150235e-05, "loss": 1.4875, "step": 2085 }, { "epoch": 0.3248787248787249, "grad_norm": 0.2449137270450592, "learning_rate": 3.170924426133472e-05, "loss": 1.5487, "step": 2086 }, { "epoch": 0.3254331254331254, "grad_norm": 0.2664063274860382, "learning_rate": 3.167418347738824e-05, "loss": 1.5303, "step": 2087 }, { "epoch": 0.325987525987526, "grad_norm": 0.2455577403306961, "learning_rate": 3.163912937846203e-05, "loss": 1.5088, "step": 2088 }, { "epoch": 0.32654192654192654, "grad_norm": 0.24089355766773224, "learning_rate": 3.160408199270198e-05, "loss": 1.4883, "step": 2089 }, { "epoch": 0.3270963270963271, "grad_norm": 0.23775029182434082, "learning_rate": 3.156904134824853e-05, "loss": 1.5142, "step": 2090 }, { "epoch": 0.32765072765072767, "grad_norm": 0.2627474367618561, "learning_rate": 3.153400747323681e-05, "loss": 1.5385, "step": 2091 }, { "epoch": 0.3282051282051282, "grad_norm": 0.243947833776474, "learning_rate": 3.149898039579641e-05, "loss": 1.5404, "step": 2092 }, { "epoch": 0.32875952875952874, "grad_norm": 0.24070952832698822, "learning_rate": 3.146396014405152e-05, "loss": 1.4326, "step": 2093 }, { "epoch": 0.32931392931392933, "grad_norm": 0.24919423460960388, "learning_rate": 3.142894674612082e-05, "loss": 1.4982, "step": 2094 }, { "epoch": 0.32986832986832987, "grad_norm": 0.23329725861549377, "learning_rate": 3.139394023011752e-05, "loss": 1.4753, "step": 2095 }, { "epoch": 0.3304227304227304, "grad_norm": 0.23908893764019012, "learning_rate": 3.135894062414928e-05, "loss": 1.5252, "step": 2096 }, { "epoch": 0.330977130977131, "grad_norm": 0.2631895840167999, "learning_rate": 3.132394795631822e-05, "loss": 1.4646, "step": 2097 }, { "epoch": 0.33153153153153153, "grad_norm": 0.24497759342193604, "learning_rate": 3.128896225472087e-05, "loss": 1.4828, "step": 2098 }, { "epoch": 0.33208593208593207, "grad_norm": 0.2384822964668274, "learning_rate": 3.125398354744821e-05, "loss": 1.4582, "step": 2099 }, { "epoch": 0.33264033264033266, "grad_norm": 0.2602536678314209, "learning_rate": 3.1219011862585554e-05, "loss": 1.4918, "step": 2100 }, { "epoch": 0.3331947331947332, "grad_norm": 0.25207874178886414, "learning_rate": 3.118404722821262e-05, "loss": 1.4804, "step": 2101 }, { "epoch": 0.33374913374913373, "grad_norm": 0.2420540601015091, "learning_rate": 3.1149089672403423e-05, "loss": 1.5256, "step": 2102 }, { "epoch": 0.3343035343035343, "grad_norm": 0.2681584358215332, "learning_rate": 3.1114139223226345e-05, "loss": 1.5025, "step": 2103 }, { "epoch": 0.33485793485793486, "grad_norm": 0.23432348668575287, "learning_rate": 3.107919590874401e-05, "loss": 1.419, "step": 2104 }, { "epoch": 0.3354123354123354, "grad_norm": 0.2465900331735611, "learning_rate": 3.1044259757013347e-05, "loss": 1.4918, "step": 2105 }, { "epoch": 0.335966735966736, "grad_norm": 0.2464490532875061, "learning_rate": 3.100933079608553e-05, "loss": 1.4775, "step": 2106 }, { "epoch": 0.3365211365211365, "grad_norm": 0.23902343213558197, "learning_rate": 3.097440905400595e-05, "loss": 1.4586, "step": 2107 }, { "epoch": 0.33707553707553706, "grad_norm": 0.2496717870235443, "learning_rate": 3.093949455881421e-05, "loss": 1.5151, "step": 2108 }, { "epoch": 0.33762993762993765, "grad_norm": 0.23891068994998932, "learning_rate": 3.090458733854409e-05, "loss": 1.4362, "step": 2109 }, { "epoch": 0.3381843381843382, "grad_norm": 0.24659186601638794, "learning_rate": 3.086968742122351e-05, "loss": 1.6172, "step": 2110 }, { "epoch": 0.3387387387387387, "grad_norm": 0.24592448770999908, "learning_rate": 3.083479483487457e-05, "loss": 1.5172, "step": 2111 }, { "epoch": 0.3392931392931393, "grad_norm": 0.2452557384967804, "learning_rate": 3.0799909607513444e-05, "loss": 1.5278, "step": 2112 }, { "epoch": 0.33984753984753985, "grad_norm": 0.2611304521560669, "learning_rate": 3.076503176715042e-05, "loss": 1.5362, "step": 2113 }, { "epoch": 0.3404019404019404, "grad_norm": 0.24202358722686768, "learning_rate": 3.0730161341789835e-05, "loss": 1.486, "step": 2114 }, { "epoch": 0.340956340956341, "grad_norm": 0.2522967457771301, "learning_rate": 3.0695298359430095e-05, "loss": 1.4223, "step": 2115 }, { "epoch": 0.3415107415107415, "grad_norm": 0.2446078360080719, "learning_rate": 3.066044284806363e-05, "loss": 1.4744, "step": 2116 }, { "epoch": 0.34206514206514205, "grad_norm": 0.2501811385154724, "learning_rate": 3.062559483567682e-05, "loss": 1.5196, "step": 2117 }, { "epoch": 0.34261954261954264, "grad_norm": 0.26266831159591675, "learning_rate": 3.059075435025011e-05, "loss": 1.4637, "step": 2118 }, { "epoch": 0.3431739431739432, "grad_norm": 0.25267231464385986, "learning_rate": 3.055592141975783e-05, "loss": 1.5167, "step": 2119 }, { "epoch": 0.3437283437283437, "grad_norm": 0.24963614344596863, "learning_rate": 3.052109607216827e-05, "loss": 1.4685, "step": 2120 }, { "epoch": 0.3442827442827443, "grad_norm": 0.25220683217048645, "learning_rate": 3.0486278335443634e-05, "loss": 1.5133, "step": 2121 }, { "epoch": 0.34483714483714484, "grad_norm": 0.2585376501083374, "learning_rate": 3.0451468237540016e-05, "loss": 1.5018, "step": 2122 }, { "epoch": 0.3453915453915454, "grad_norm": 0.2521873712539673, "learning_rate": 3.041666580640738e-05, "loss": 1.5576, "step": 2123 }, { "epoch": 0.34594594594594597, "grad_norm": 0.2547726035118103, "learning_rate": 3.0381871069989514e-05, "loss": 1.5226, "step": 2124 }, { "epoch": 0.3465003465003465, "grad_norm": 0.25323522090911865, "learning_rate": 3.034708405622405e-05, "loss": 1.5377, "step": 2125 }, { "epoch": 0.34705474705474704, "grad_norm": 0.24524371325969696, "learning_rate": 3.031230479304239e-05, "loss": 1.5115, "step": 2126 }, { "epoch": 0.34760914760914763, "grad_norm": 0.251549631357193, "learning_rate": 3.0277533308369754e-05, "loss": 1.5274, "step": 2127 }, { "epoch": 0.34816354816354816, "grad_norm": 0.24416305124759674, "learning_rate": 3.0242769630125092e-05, "loss": 1.4714, "step": 2128 }, { "epoch": 0.3487179487179487, "grad_norm": 0.24270397424697876, "learning_rate": 3.0208013786221072e-05, "loss": 1.5101, "step": 2129 }, { "epoch": 0.3492723492723493, "grad_norm": 0.2527427077293396, "learning_rate": 3.017326580456411e-05, "loss": 1.4917, "step": 2130 }, { "epoch": 0.3498267498267498, "grad_norm": 0.25532466173171997, "learning_rate": 3.013852571305428e-05, "loss": 1.5148, "step": 2131 }, { "epoch": 0.35038115038115036, "grad_norm": 0.2418915331363678, "learning_rate": 3.010379353958532e-05, "loss": 1.4503, "step": 2132 }, { "epoch": 0.35093555093555096, "grad_norm": 0.24791748821735382, "learning_rate": 3.0069069312044636e-05, "loss": 1.505, "step": 2133 }, { "epoch": 0.3514899514899515, "grad_norm": 0.2552606165409088, "learning_rate": 3.0034353058313214e-05, "loss": 1.5293, "step": 2134 }, { "epoch": 0.352044352044352, "grad_norm": 0.2621256113052368, "learning_rate": 2.9999644806265675e-05, "loss": 1.5634, "step": 2135 }, { "epoch": 0.3525987525987526, "grad_norm": 0.25225964188575745, "learning_rate": 2.9964944583770193e-05, "loss": 1.5677, "step": 2136 }, { "epoch": 0.35315315315315315, "grad_norm": 0.24795326590538025, "learning_rate": 2.993025241868853e-05, "loss": 1.4796, "step": 2137 }, { "epoch": 0.3537075537075537, "grad_norm": 0.23735886812210083, "learning_rate": 2.9895568338875915e-05, "loss": 1.5127, "step": 2138 }, { "epoch": 0.3542619542619543, "grad_norm": 0.2496514767408371, "learning_rate": 2.9860892372181146e-05, "loss": 1.5152, "step": 2139 }, { "epoch": 0.3548163548163548, "grad_norm": 0.2430220991373062, "learning_rate": 2.9826224546446472e-05, "loss": 1.5439, "step": 2140 }, { "epoch": 0.35537075537075535, "grad_norm": 0.2505948841571808, "learning_rate": 2.9791564889507607e-05, "loss": 1.462, "step": 2141 }, { "epoch": 0.35592515592515594, "grad_norm": 0.2661728262901306, "learning_rate": 2.975691342919373e-05, "loss": 1.5804, "step": 2142 }, { "epoch": 0.3564795564795565, "grad_norm": 0.24620886147022247, "learning_rate": 2.972227019332742e-05, "loss": 1.5373, "step": 2143 }, { "epoch": 0.357033957033957, "grad_norm": 0.2625667154788971, "learning_rate": 2.968763520972465e-05, "loss": 1.5646, "step": 2144 }, { "epoch": 0.3575883575883576, "grad_norm": 0.2646366357803345, "learning_rate": 2.965300850619478e-05, "loss": 1.5187, "step": 2145 }, { "epoch": 0.35814275814275814, "grad_norm": 0.25516343116760254, "learning_rate": 2.9618390110540497e-05, "loss": 1.582, "step": 2146 }, { "epoch": 0.3586971586971587, "grad_norm": 0.2546088695526123, "learning_rate": 2.9583780050557857e-05, "loss": 1.5022, "step": 2147 }, { "epoch": 0.35925155925155927, "grad_norm": 0.2564910054206848, "learning_rate": 2.9549178354036195e-05, "loss": 1.4597, "step": 2148 }, { "epoch": 0.3598059598059598, "grad_norm": 0.23713424801826477, "learning_rate": 2.9514585048758123e-05, "loss": 1.4122, "step": 2149 }, { "epoch": 0.36036036036036034, "grad_norm": 0.23953530192375183, "learning_rate": 2.9480000162499536e-05, "loss": 1.5136, "step": 2150 }, { "epoch": 0.36091476091476093, "grad_norm": 0.24593326449394226, "learning_rate": 2.944542372302956e-05, "loss": 1.5019, "step": 2151 }, { "epoch": 0.36146916146916147, "grad_norm": 0.24657000601291656, "learning_rate": 2.941085575811056e-05, "loss": 1.502, "step": 2152 }, { "epoch": 0.362023562023562, "grad_norm": 0.2646307945251465, "learning_rate": 2.9376296295498056e-05, "loss": 1.5801, "step": 2153 }, { "epoch": 0.3625779625779626, "grad_norm": 0.24299390614032745, "learning_rate": 2.934174536294077e-05, "loss": 1.4557, "step": 2154 }, { "epoch": 0.36313236313236313, "grad_norm": 0.2365856021642685, "learning_rate": 2.9307202988180578e-05, "loss": 1.3735, "step": 2155 }, { "epoch": 0.36368676368676367, "grad_norm": 0.2475372701883316, "learning_rate": 2.9272669198952454e-05, "loss": 1.498, "step": 2156 }, { "epoch": 0.36424116424116426, "grad_norm": 0.25320836901664734, "learning_rate": 2.923814402298451e-05, "loss": 1.503, "step": 2157 }, { "epoch": 0.3647955647955648, "grad_norm": 0.2417120337486267, "learning_rate": 2.9203627487997937e-05, "loss": 1.4888, "step": 2158 }, { "epoch": 0.36534996534996533, "grad_norm": 0.24594061076641083, "learning_rate": 2.916911962170697e-05, "loss": 1.4461, "step": 2159 }, { "epoch": 0.3659043659043659, "grad_norm": 0.2541366517543793, "learning_rate": 2.913462045181891e-05, "loss": 1.5204, "step": 2160 }, { "epoch": 0.36645876645876646, "grad_norm": 0.25532305240631104, "learning_rate": 2.910013000603404e-05, "loss": 1.4785, "step": 2161 }, { "epoch": 0.367013167013167, "grad_norm": 0.24607594311237335, "learning_rate": 2.9065648312045677e-05, "loss": 1.4834, "step": 2162 }, { "epoch": 0.3675675675675676, "grad_norm": 0.26791536808013916, "learning_rate": 2.9031175397540097e-05, "loss": 1.5346, "step": 2163 }, { "epoch": 0.3681219681219681, "grad_norm": 0.2421042025089264, "learning_rate": 2.89967112901965e-05, "loss": 1.5164, "step": 2164 }, { "epoch": 0.36867636867636866, "grad_norm": 0.2403196096420288, "learning_rate": 2.8962256017687066e-05, "loss": 1.4459, "step": 2165 }, { "epoch": 0.36923076923076925, "grad_norm": 0.2512976825237274, "learning_rate": 2.8927809607676807e-05, "loss": 1.4996, "step": 2166 }, { "epoch": 0.3697851697851698, "grad_norm": 0.2484413981437683, "learning_rate": 2.8893372087823724e-05, "loss": 1.4732, "step": 2167 }, { "epoch": 0.3703395703395703, "grad_norm": 0.24748267233371735, "learning_rate": 2.885894348577859e-05, "loss": 1.4958, "step": 2168 }, { "epoch": 0.3708939708939709, "grad_norm": 0.2434426248073578, "learning_rate": 2.8824523829185046e-05, "loss": 1.4759, "step": 2169 }, { "epoch": 0.37144837144837145, "grad_norm": 0.24941876530647278, "learning_rate": 2.879011314567957e-05, "loss": 1.4725, "step": 2170 }, { "epoch": 0.372002772002772, "grad_norm": 0.24453915655612946, "learning_rate": 2.8755711462891393e-05, "loss": 1.498, "step": 2171 }, { "epoch": 0.3725571725571726, "grad_norm": 0.244066521525383, "learning_rate": 2.8721318808442568e-05, "loss": 1.4667, "step": 2172 }, { "epoch": 0.3731115731115731, "grad_norm": 0.24692927300930023, "learning_rate": 2.8686935209947857e-05, "loss": 1.4848, "step": 2173 }, { "epoch": 0.37366597366597365, "grad_norm": 0.24672725796699524, "learning_rate": 2.865256069501478e-05, "loss": 1.5445, "step": 2174 }, { "epoch": 0.37422037422037424, "grad_norm": 0.24522803723812103, "learning_rate": 2.8618195291243554e-05, "loss": 1.4904, "step": 2175 }, { "epoch": 0.3747747747747748, "grad_norm": 0.24777989089488983, "learning_rate": 2.8583839026227066e-05, "loss": 1.4827, "step": 2176 }, { "epoch": 0.3753291753291753, "grad_norm": 0.2669071555137634, "learning_rate": 2.854949192755088e-05, "loss": 1.5275, "step": 2177 }, { "epoch": 0.3758835758835759, "grad_norm": 0.24552519619464874, "learning_rate": 2.851515402279322e-05, "loss": 1.4673, "step": 2178 }, { "epoch": 0.37643797643797644, "grad_norm": 0.2597273588180542, "learning_rate": 2.848082533952488e-05, "loss": 1.4818, "step": 2179 }, { "epoch": 0.376992376992377, "grad_norm": 0.23745884001255035, "learning_rate": 2.8446505905309295e-05, "loss": 1.4583, "step": 2180 }, { "epoch": 0.37754677754677757, "grad_norm": 0.24379105865955353, "learning_rate": 2.841219574770242e-05, "loss": 1.5209, "step": 2181 }, { "epoch": 0.3781011781011781, "grad_norm": 0.236824169754982, "learning_rate": 2.8377894894252844e-05, "loss": 1.4489, "step": 2182 }, { "epoch": 0.37865557865557864, "grad_norm": 0.2622652053833008, "learning_rate": 2.8343603372501606e-05, "loss": 1.5815, "step": 2183 }, { "epoch": 0.37920997920997923, "grad_norm": 0.24711135029792786, "learning_rate": 2.830932120998231e-05, "loss": 1.5952, "step": 2184 }, { "epoch": 0.37976437976437977, "grad_norm": 0.2437206506729126, "learning_rate": 2.827504843422099e-05, "loss": 1.4968, "step": 2185 }, { "epoch": 0.3803187803187803, "grad_norm": 0.24244265258312225, "learning_rate": 2.824078507273618e-05, "loss": 1.4356, "step": 2186 }, { "epoch": 0.3808731808731809, "grad_norm": 0.25723302364349365, "learning_rate": 2.8206531153038863e-05, "loss": 1.5071, "step": 2187 }, { "epoch": 0.38142758142758143, "grad_norm": 0.2378910928964615, "learning_rate": 2.8172286702632407e-05, "loss": 1.4513, "step": 2188 }, { "epoch": 0.38198198198198197, "grad_norm": 0.2421424835920334, "learning_rate": 2.8138051749012604e-05, "loss": 1.4911, "step": 2189 }, { "epoch": 0.38253638253638256, "grad_norm": 0.24509172141551971, "learning_rate": 2.8103826319667617e-05, "loss": 1.5577, "step": 2190 }, { "epoch": 0.3830907830907831, "grad_norm": 0.24877887964248657, "learning_rate": 2.8069610442077944e-05, "loss": 1.5437, "step": 2191 }, { "epoch": 0.38364518364518363, "grad_norm": 0.2372499704360962, "learning_rate": 2.8035404143716433e-05, "loss": 1.5138, "step": 2192 }, { "epoch": 0.3841995841995842, "grad_norm": 0.24694252014160156, "learning_rate": 2.800120745204823e-05, "loss": 1.4985, "step": 2193 }, { "epoch": 0.38475398475398476, "grad_norm": 0.24533367156982422, "learning_rate": 2.7967020394530772e-05, "loss": 1.557, "step": 2194 }, { "epoch": 0.3853083853083853, "grad_norm": 0.23841796815395355, "learning_rate": 2.7932842998613775e-05, "loss": 1.5061, "step": 2195 }, { "epoch": 0.3858627858627859, "grad_norm": 0.24892525374889374, "learning_rate": 2.789867529173915e-05, "loss": 1.5156, "step": 2196 }, { "epoch": 0.3864171864171864, "grad_norm": 0.25081667304039, "learning_rate": 2.7864517301341086e-05, "loss": 1.5064, "step": 2197 }, { "epoch": 0.38697158697158696, "grad_norm": 0.2593148946762085, "learning_rate": 2.783036905484594e-05, "loss": 1.5624, "step": 2198 }, { "epoch": 0.38752598752598755, "grad_norm": 0.24643583595752716, "learning_rate": 2.7796230579672265e-05, "loss": 1.527, "step": 2199 }, { "epoch": 0.3880803880803881, "grad_norm": 0.24408619105815887, "learning_rate": 2.776210190323072e-05, "loss": 1.5022, "step": 2200 }, { "epoch": 0.3886347886347886, "grad_norm": 0.23719295859336853, "learning_rate": 2.7727983052924153e-05, "loss": 1.4373, "step": 2201 }, { "epoch": 0.3891891891891892, "grad_norm": 0.2583237290382385, "learning_rate": 2.7693874056147505e-05, "loss": 1.5502, "step": 2202 }, { "epoch": 0.38974358974358975, "grad_norm": 0.23981530964374542, "learning_rate": 2.765977494028778e-05, "loss": 1.5017, "step": 2203 }, { "epoch": 0.3902979902979903, "grad_norm": 0.24570141732692719, "learning_rate": 2.7625685732724084e-05, "loss": 1.5571, "step": 2204 }, { "epoch": 0.3908523908523909, "grad_norm": 0.2476937621831894, "learning_rate": 2.759160646082754e-05, "loss": 1.4971, "step": 2205 }, { "epoch": 0.3914067914067914, "grad_norm": 0.23947833478450775, "learning_rate": 2.755753715196131e-05, "loss": 1.4607, "step": 2206 }, { "epoch": 0.39196119196119195, "grad_norm": 0.2423194944858551, "learning_rate": 2.7523477833480554e-05, "loss": 1.4961, "step": 2207 }, { "epoch": 0.39251559251559254, "grad_norm": 0.25477972626686096, "learning_rate": 2.7489428532732402e-05, "loss": 1.5305, "step": 2208 }, { "epoch": 0.3930699930699931, "grad_norm": 0.24010416865348816, "learning_rate": 2.745538927705595e-05, "loss": 1.515, "step": 2209 }, { "epoch": 0.3936243936243936, "grad_norm": 0.24376268684864044, "learning_rate": 2.7421360093782233e-05, "loss": 1.5071, "step": 2210 }, { "epoch": 0.3941787941787942, "grad_norm": 0.24127423763275146, "learning_rate": 2.738734101023417e-05, "loss": 1.4866, "step": 2211 }, { "epoch": 0.39473319473319474, "grad_norm": 0.2399214655160904, "learning_rate": 2.7353332053726594e-05, "loss": 1.5024, "step": 2212 }, { "epoch": 0.3952875952875953, "grad_norm": 0.2611742615699768, "learning_rate": 2.7319333251566225e-05, "loss": 1.5402, "step": 2213 }, { "epoch": 0.39584199584199586, "grad_norm": 0.24111641943454742, "learning_rate": 2.72853446310516e-05, "loss": 1.4874, "step": 2214 }, { "epoch": 0.3963963963963964, "grad_norm": 0.24981702864170074, "learning_rate": 2.7251366219473082e-05, "loss": 1.4697, "step": 2215 }, { "epoch": 0.39695079695079694, "grad_norm": 0.24776077270507812, "learning_rate": 2.7217398044112848e-05, "loss": 1.5034, "step": 2216 }, { "epoch": 0.3975051975051975, "grad_norm": 0.24590174853801727, "learning_rate": 2.718344013224485e-05, "loss": 1.4879, "step": 2217 }, { "epoch": 0.39805959805959806, "grad_norm": 0.24167826771736145, "learning_rate": 2.71494925111348e-05, "loss": 1.4857, "step": 2218 }, { "epoch": 0.3986139986139986, "grad_norm": 0.255820631980896, "learning_rate": 2.7115555208040167e-05, "loss": 1.4898, "step": 2219 }, { "epoch": 0.3991683991683992, "grad_norm": 0.25160956382751465, "learning_rate": 2.7081628250210095e-05, "loss": 1.4886, "step": 2220 }, { "epoch": 0.3997227997227997, "grad_norm": 0.24128420650959015, "learning_rate": 2.704771166488545e-05, "loss": 1.4996, "step": 2221 }, { "epoch": 0.40027720027720026, "grad_norm": 0.2524079382419586, "learning_rate": 2.7013805479298773e-05, "loss": 1.51, "step": 2222 }, { "epoch": 0.40083160083160085, "grad_norm": 0.25827160477638245, "learning_rate": 2.6979909720674233e-05, "loss": 1.5132, "step": 2223 }, { "epoch": 0.4013860013860014, "grad_norm": 0.24078689515590668, "learning_rate": 2.6946024416227648e-05, "loss": 1.443, "step": 2224 }, { "epoch": 0.4019404019404019, "grad_norm": 0.25596845149993896, "learning_rate": 2.691214959316642e-05, "loss": 1.502, "step": 2225 }, { "epoch": 0.4024948024948025, "grad_norm": 0.24278657138347626, "learning_rate": 2.6878285278689563e-05, "loss": 1.509, "step": 2226 }, { "epoch": 0.40304920304920305, "grad_norm": 0.24795490503311157, "learning_rate": 2.684443149998762e-05, "loss": 1.5277, "step": 2227 }, { "epoch": 0.4036036036036036, "grad_norm": 0.24315330386161804, "learning_rate": 2.6810588284242716e-05, "loss": 1.4525, "step": 2228 }, { "epoch": 0.4041580041580042, "grad_norm": 0.24117666482925415, "learning_rate": 2.6776755658628477e-05, "loss": 1.4767, "step": 2229 }, { "epoch": 0.4047124047124047, "grad_norm": 0.2394149899482727, "learning_rate": 2.6742933650309994e-05, "loss": 1.4466, "step": 2230 }, { "epoch": 0.40526680526680525, "grad_norm": 0.24937331676483154, "learning_rate": 2.6709122286443885e-05, "loss": 1.5266, "step": 2231 }, { "epoch": 0.40582120582120584, "grad_norm": 0.24798640608787537, "learning_rate": 2.6675321594178175e-05, "loss": 1.5391, "step": 2232 }, { "epoch": 0.4063756063756064, "grad_norm": 0.23749056458473206, "learning_rate": 2.6641531600652348e-05, "loss": 1.4674, "step": 2233 }, { "epoch": 0.4069300069300069, "grad_norm": 0.24988266825675964, "learning_rate": 2.66077523329973e-05, "loss": 1.5406, "step": 2234 }, { "epoch": 0.4074844074844075, "grad_norm": 0.25045865774154663, "learning_rate": 2.6573983818335302e-05, "loss": 1.4799, "step": 2235 }, { "epoch": 0.40803880803880804, "grad_norm": 0.2517133057117462, "learning_rate": 2.654022608378e-05, "loss": 1.4369, "step": 2236 }, { "epoch": 0.4085932085932086, "grad_norm": 0.2736227512359619, "learning_rate": 2.6506479156436367e-05, "loss": 1.5238, "step": 2237 }, { "epoch": 0.40914760914760917, "grad_norm": 0.25867921113967896, "learning_rate": 2.647274306340072e-05, "loss": 1.4502, "step": 2238 }, { "epoch": 0.4097020097020097, "grad_norm": 0.2537078261375427, "learning_rate": 2.6439017831760674e-05, "loss": 1.5216, "step": 2239 }, { "epoch": 0.41025641025641024, "grad_norm": 0.24368835985660553, "learning_rate": 2.6405303488595104e-05, "loss": 1.5214, "step": 2240 }, { "epoch": 0.41081081081081083, "grad_norm": 0.2472364604473114, "learning_rate": 2.637160006097416e-05, "loss": 1.4834, "step": 2241 }, { "epoch": 0.41136521136521137, "grad_norm": 0.24850264191627502, "learning_rate": 2.6337907575959226e-05, "loss": 1.5275, "step": 2242 }, { "epoch": 0.4119196119196119, "grad_norm": 0.24505524337291718, "learning_rate": 2.630422606060291e-05, "loss": 1.5067, "step": 2243 }, { "epoch": 0.4124740124740125, "grad_norm": 0.2500763535499573, "learning_rate": 2.6270555541948975e-05, "loss": 1.5312, "step": 2244 }, { "epoch": 0.41302841302841303, "grad_norm": 0.24269507825374603, "learning_rate": 2.6236896047032404e-05, "loss": 1.427, "step": 2245 }, { "epoch": 0.41358281358281357, "grad_norm": 0.24373352527618408, "learning_rate": 2.620324760287929e-05, "loss": 1.5043, "step": 2246 }, { "epoch": 0.41413721413721416, "grad_norm": 0.24181844294071198, "learning_rate": 2.616961023650686e-05, "loss": 1.4616, "step": 2247 }, { "epoch": 0.4146916146916147, "grad_norm": 0.2511554956436157, "learning_rate": 2.6135983974923466e-05, "loss": 1.4643, "step": 2248 }, { "epoch": 0.41524601524601523, "grad_norm": 0.256124347448349, "learning_rate": 2.610236884512853e-05, "loss": 1.5547, "step": 2249 }, { "epoch": 0.4158004158004158, "grad_norm": 0.24149104952812195, "learning_rate": 2.6068764874112528e-05, "loss": 1.4821, "step": 2250 }, { "epoch": 0.41635481635481636, "grad_norm": 0.254535049200058, "learning_rate": 2.6035172088856993e-05, "loss": 1.4343, "step": 2251 }, { "epoch": 0.4169092169092169, "grad_norm": 0.2482708990573883, "learning_rate": 2.6001590516334464e-05, "loss": 1.48, "step": 2252 }, { "epoch": 0.4174636174636175, "grad_norm": 0.23738013207912445, "learning_rate": 2.596802018350848e-05, "loss": 1.4425, "step": 2253 }, { "epoch": 0.418018018018018, "grad_norm": 0.2620631456375122, "learning_rate": 2.5934461117333573e-05, "loss": 1.4934, "step": 2254 }, { "epoch": 0.41857241857241856, "grad_norm": 0.24778437614440918, "learning_rate": 2.5900913344755186e-05, "loss": 1.4381, "step": 2255 }, { "epoch": 0.41912681912681915, "grad_norm": 0.2398407906293869, "learning_rate": 2.586737689270974e-05, "loss": 1.4754, "step": 2256 }, { "epoch": 0.4196812196812197, "grad_norm": 0.2570435404777527, "learning_rate": 2.583385178812453e-05, "loss": 1.483, "step": 2257 }, { "epoch": 0.4202356202356202, "grad_norm": 0.2511042058467865, "learning_rate": 2.580033805791779e-05, "loss": 1.5109, "step": 2258 }, { "epoch": 0.4207900207900208, "grad_norm": 0.24296511709690094, "learning_rate": 2.5766835728998558e-05, "loss": 1.514, "step": 2259 }, { "epoch": 0.42134442134442135, "grad_norm": 0.2554164528846741, "learning_rate": 2.5733344828266763e-05, "loss": 1.5532, "step": 2260 }, { "epoch": 0.4218988218988219, "grad_norm": 0.2651786208152771, "learning_rate": 2.569986538261314e-05, "loss": 1.5478, "step": 2261 }, { "epoch": 0.4224532224532225, "grad_norm": 0.24324794113636017, "learning_rate": 2.5666397418919227e-05, "loss": 1.5053, "step": 2262 }, { "epoch": 0.423007623007623, "grad_norm": 0.25034764409065247, "learning_rate": 2.563294096405734e-05, "loss": 1.5332, "step": 2263 }, { "epoch": 0.42356202356202355, "grad_norm": 0.25336191058158875, "learning_rate": 2.5599496044890568e-05, "loss": 1.4522, "step": 2264 }, { "epoch": 0.42411642411642414, "grad_norm": 0.241899311542511, "learning_rate": 2.556606268827273e-05, "loss": 1.4677, "step": 2265 }, { "epoch": 0.4246708246708247, "grad_norm": 0.2674090564250946, "learning_rate": 2.5532640921048356e-05, "loss": 1.5427, "step": 2266 }, { "epoch": 0.4252252252252252, "grad_norm": 0.2400083839893341, "learning_rate": 2.549923077005267e-05, "loss": 1.4724, "step": 2267 }, { "epoch": 0.4257796257796258, "grad_norm": 0.2435728907585144, "learning_rate": 2.546583226211158e-05, "loss": 1.4468, "step": 2268 }, { "epoch": 0.42633402633402634, "grad_norm": 0.24947336316108704, "learning_rate": 2.5432445424041648e-05, "loss": 1.4932, "step": 2269 }, { "epoch": 0.4268884268884269, "grad_norm": 0.2623175382614136, "learning_rate": 2.539907028265004e-05, "loss": 1.4907, "step": 2270 }, { "epoch": 0.42744282744282747, "grad_norm": 0.24072036147117615, "learning_rate": 2.5365706864734563e-05, "loss": 1.4664, "step": 2271 }, { "epoch": 0.427997227997228, "grad_norm": 0.2598769962787628, "learning_rate": 2.533235519708358e-05, "loss": 1.4741, "step": 2272 }, { "epoch": 0.42855162855162854, "grad_norm": 0.25220799446105957, "learning_rate": 2.529901530647603e-05, "loss": 1.4528, "step": 2273 }, { "epoch": 0.42910602910602913, "grad_norm": 0.2480127364397049, "learning_rate": 2.5265687219681426e-05, "loss": 1.4453, "step": 2274 }, { "epoch": 0.42966042966042967, "grad_norm": 0.2453289031982422, "learning_rate": 2.5232370963459774e-05, "loss": 1.4694, "step": 2275 }, { "epoch": 0.4302148302148302, "grad_norm": 0.26884186267852783, "learning_rate": 2.519906656456157e-05, "loss": 1.5072, "step": 2276 }, { "epoch": 0.4307692307692308, "grad_norm": 0.25229039788246155, "learning_rate": 2.5165774049727815e-05, "loss": 1.5043, "step": 2277 }, { "epoch": 0.43132363132363133, "grad_norm": 0.24530819058418274, "learning_rate": 2.5132493445689958e-05, "loss": 1.4694, "step": 2278 }, { "epoch": 0.43187803187803186, "grad_norm": 0.2492954134941101, "learning_rate": 2.5099224779169875e-05, "loss": 1.4923, "step": 2279 }, { "epoch": 0.43243243243243246, "grad_norm": 0.24679702520370483, "learning_rate": 2.5065968076879874e-05, "loss": 1.4773, "step": 2280 }, { "epoch": 0.432986832986833, "grad_norm": 0.2495153546333313, "learning_rate": 2.5032723365522653e-05, "loss": 1.5131, "step": 2281 }, { "epoch": 0.43354123354123353, "grad_norm": 0.24070455133914948, "learning_rate": 2.4999490671791266e-05, "loss": 1.5361, "step": 2282 }, { "epoch": 0.4340956340956341, "grad_norm": 0.2685326337814331, "learning_rate": 2.496627002236914e-05, "loss": 1.5445, "step": 2283 }, { "epoch": 0.43465003465003466, "grad_norm": 0.24034175276756287, "learning_rate": 2.4933061443930012e-05, "loss": 1.3819, "step": 2284 }, { "epoch": 0.4352044352044352, "grad_norm": 0.25140661001205444, "learning_rate": 2.4899864963137942e-05, "loss": 1.4835, "step": 2285 }, { "epoch": 0.4357588357588358, "grad_norm": 0.2567353844642639, "learning_rate": 2.4866680606647276e-05, "loss": 1.5394, "step": 2286 }, { "epoch": 0.4363132363132363, "grad_norm": 0.25162211060523987, "learning_rate": 2.4833508401102603e-05, "loss": 1.4312, "step": 2287 }, { "epoch": 0.43686763686763685, "grad_norm": 0.2480967789888382, "learning_rate": 2.480034837313877e-05, "loss": 1.5406, "step": 2288 }, { "epoch": 0.43742203742203745, "grad_norm": 0.2571992576122284, "learning_rate": 2.4767200549380873e-05, "loss": 1.5609, "step": 2289 }, { "epoch": 0.437976437976438, "grad_norm": 0.24016328155994415, "learning_rate": 2.473406495644418e-05, "loss": 1.4501, "step": 2290 }, { "epoch": 0.4385308385308385, "grad_norm": 0.24095511436462402, "learning_rate": 2.4700941620934122e-05, "loss": 1.4842, "step": 2291 }, { "epoch": 0.4390852390852391, "grad_norm": 0.2448769360780716, "learning_rate": 2.466783056944632e-05, "loss": 1.3976, "step": 2292 }, { "epoch": 0.43963963963963965, "grad_norm": 0.24332155287265778, "learning_rate": 2.4634731828566533e-05, "loss": 1.4711, "step": 2293 }, { "epoch": 0.4401940401940402, "grad_norm": 0.24825070798397064, "learning_rate": 2.4601645424870606e-05, "loss": 1.5271, "step": 2294 }, { "epoch": 0.4407484407484408, "grad_norm": 0.25368747115135193, "learning_rate": 2.4568571384924505e-05, "loss": 1.5143, "step": 2295 }, { "epoch": 0.4413028413028413, "grad_norm": 0.24270454049110413, "learning_rate": 2.453550973528425e-05, "loss": 1.5246, "step": 2296 }, { "epoch": 0.44185724185724184, "grad_norm": 0.23317597806453705, "learning_rate": 2.4502460502495925e-05, "loss": 1.4798, "step": 2297 }, { "epoch": 0.44241164241164244, "grad_norm": 0.24395090341567993, "learning_rate": 2.4469423713095655e-05, "loss": 1.4931, "step": 2298 }, { "epoch": 0.44296604296604297, "grad_norm": 0.24514877796173096, "learning_rate": 2.443639939360954e-05, "loss": 1.4965, "step": 2299 }, { "epoch": 0.4435204435204435, "grad_norm": 0.24590745568275452, "learning_rate": 2.4403387570553694e-05, "loss": 1.4529, "step": 2300 }, { "epoch": 0.4440748440748441, "grad_norm": 0.24439887702465057, "learning_rate": 2.4370388270434208e-05, "loss": 1.5099, "step": 2301 }, { "epoch": 0.44462924462924464, "grad_norm": 0.24853557348251343, "learning_rate": 2.433740151974707e-05, "loss": 1.5255, "step": 2302 }, { "epoch": 0.44518364518364517, "grad_norm": 0.2561310827732086, "learning_rate": 2.430442734497823e-05, "loss": 1.5205, "step": 2303 }, { "epoch": 0.44573804573804576, "grad_norm": 0.2356433868408203, "learning_rate": 2.4271465772603553e-05, "loss": 1.4305, "step": 2304 }, { "epoch": 0.4462924462924463, "grad_norm": 0.24362589418888092, "learning_rate": 2.4238516829088763e-05, "loss": 1.5347, "step": 2305 }, { "epoch": 0.44684684684684683, "grad_norm": 0.2500246465206146, "learning_rate": 2.4205580540889422e-05, "loss": 1.4943, "step": 2306 }, { "epoch": 0.4474012474012474, "grad_norm": 0.24313810467720032, "learning_rate": 2.4172656934450972e-05, "loss": 1.4591, "step": 2307 }, { "epoch": 0.44795564795564796, "grad_norm": 0.23804335296154022, "learning_rate": 2.413974603620866e-05, "loss": 1.4951, "step": 2308 }, { "epoch": 0.4485100485100485, "grad_norm": 0.24687306582927704, "learning_rate": 2.4106847872587507e-05, "loss": 1.4815, "step": 2309 }, { "epoch": 0.4490644490644491, "grad_norm": 0.24419593811035156, "learning_rate": 2.4073962470002343e-05, "loss": 1.4866, "step": 2310 }, { "epoch": 0.4496188496188496, "grad_norm": 0.2432204931974411, "learning_rate": 2.404108985485772e-05, "loss": 1.5024, "step": 2311 }, { "epoch": 0.45017325017325016, "grad_norm": 0.2391861081123352, "learning_rate": 2.400823005354795e-05, "loss": 1.4004, "step": 2312 }, { "epoch": 0.45072765072765075, "grad_norm": 0.25889071822166443, "learning_rate": 2.397538309245705e-05, "loss": 1.5283, "step": 2313 }, { "epoch": 0.4512820512820513, "grad_norm": 0.241373673081398, "learning_rate": 2.3942548997958702e-05, "loss": 1.4516, "step": 2314 }, { "epoch": 0.4518364518364518, "grad_norm": 0.24054937064647675, "learning_rate": 2.3909727796416294e-05, "loss": 1.4596, "step": 2315 }, { "epoch": 0.4523908523908524, "grad_norm": 0.24515198171138763, "learning_rate": 2.3876919514182823e-05, "loss": 1.4388, "step": 2316 }, { "epoch": 0.45294525294525295, "grad_norm": 0.2591582238674164, "learning_rate": 2.3844124177600962e-05, "loss": 1.5488, "step": 2317 }, { "epoch": 0.4534996534996535, "grad_norm": 0.24447019398212433, "learning_rate": 2.3811341813002933e-05, "loss": 1.4187, "step": 2318 }, { "epoch": 0.4540540540540541, "grad_norm": 0.2537336051464081, "learning_rate": 2.3778572446710597e-05, "loss": 1.5009, "step": 2319 }, { "epoch": 0.4546084546084546, "grad_norm": 0.2570236325263977, "learning_rate": 2.374581610503535e-05, "loss": 1.5503, "step": 2320 }, { "epoch": 0.45516285516285515, "grad_norm": 0.2470417022705078, "learning_rate": 2.371307281427812e-05, "loss": 1.5156, "step": 2321 }, { "epoch": 0.45571725571725574, "grad_norm": 0.25512251257896423, "learning_rate": 2.3680342600729382e-05, "loss": 1.5622, "step": 2322 }, { "epoch": 0.4562716562716563, "grad_norm": 0.24980416893959045, "learning_rate": 2.3647625490669085e-05, "loss": 1.5199, "step": 2323 }, { "epoch": 0.4568260568260568, "grad_norm": 0.2384525090456009, "learning_rate": 2.3614921510366676e-05, "loss": 1.4834, "step": 2324 }, { "epoch": 0.4573804573804574, "grad_norm": 0.2454557865858078, "learning_rate": 2.358223068608106e-05, "loss": 1.4931, "step": 2325 }, { "epoch": 0.45793485793485794, "grad_norm": 0.2473176270723343, "learning_rate": 2.3549553044060557e-05, "loss": 1.4954, "step": 2326 }, { "epoch": 0.4584892584892585, "grad_norm": 0.2480940818786621, "learning_rate": 2.3516888610542926e-05, "loss": 1.5065, "step": 2327 }, { "epoch": 0.45904365904365907, "grad_norm": 0.23897838592529297, "learning_rate": 2.3484237411755322e-05, "loss": 1.5045, "step": 2328 }, { "epoch": 0.4595980595980596, "grad_norm": 0.24588221311569214, "learning_rate": 2.3451599473914248e-05, "loss": 1.5182, "step": 2329 }, { "epoch": 0.46015246015246014, "grad_norm": 0.2433626651763916, "learning_rate": 2.3418974823225585e-05, "loss": 1.4927, "step": 2330 }, { "epoch": 0.46070686070686073, "grad_norm": 0.25784170627593994, "learning_rate": 2.338636348588453e-05, "loss": 1.5791, "step": 2331 }, { "epoch": 0.46126126126126127, "grad_norm": 0.24226407706737518, "learning_rate": 2.335376548807559e-05, "loss": 1.5106, "step": 2332 }, { "epoch": 0.4618156618156618, "grad_norm": 0.25261199474334717, "learning_rate": 2.332118085597259e-05, "loss": 1.48, "step": 2333 }, { "epoch": 0.4623700623700624, "grad_norm": 0.24431148171424866, "learning_rate": 2.3288609615738574e-05, "loss": 1.4843, "step": 2334 }, { "epoch": 0.46292446292446293, "grad_norm": 0.2392502725124359, "learning_rate": 2.3256051793525874e-05, "loss": 1.4468, "step": 2335 }, { "epoch": 0.46347886347886347, "grad_norm": 0.24358941614627838, "learning_rate": 2.3223507415476035e-05, "loss": 1.495, "step": 2336 }, { "epoch": 0.46403326403326406, "grad_norm": 0.24001990258693695, "learning_rate": 2.3190976507719805e-05, "loss": 1.47, "step": 2337 }, { "epoch": 0.4645876645876646, "grad_norm": 0.24004314839839935, "learning_rate": 2.3158459096377136e-05, "loss": 1.5167, "step": 2338 }, { "epoch": 0.46514206514206513, "grad_norm": 0.251761257648468, "learning_rate": 2.3125955207557088e-05, "loss": 1.4908, "step": 2339 }, { "epoch": 0.4656964656964657, "grad_norm": 0.24197779595851898, "learning_rate": 2.3093464867357923e-05, "loss": 1.4029, "step": 2340 }, { "epoch": 0.46625086625086626, "grad_norm": 0.23290924727916718, "learning_rate": 2.3060988101866997e-05, "loss": 1.387, "step": 2341 }, { "epoch": 0.4668052668052668, "grad_norm": 0.2407480925321579, "learning_rate": 2.3028524937160777e-05, "loss": 1.5154, "step": 2342 }, { "epoch": 0.4673596673596674, "grad_norm": 0.24823831021785736, "learning_rate": 2.2996075399304796e-05, "loss": 1.4293, "step": 2343 }, { "epoch": 0.4679140679140679, "grad_norm": 0.23478254675865173, "learning_rate": 2.2963639514353674e-05, "loss": 1.4336, "step": 2344 }, { "epoch": 0.46846846846846846, "grad_norm": 0.2441752701997757, "learning_rate": 2.2931217308351023e-05, "loss": 1.4814, "step": 2345 }, { "epoch": 0.46902286902286905, "grad_norm": 0.24491608142852783, "learning_rate": 2.2898808807329505e-05, "loss": 1.4409, "step": 2346 }, { "epoch": 0.4695772695772696, "grad_norm": 0.24464474618434906, "learning_rate": 2.2866414037310775e-05, "loss": 1.488, "step": 2347 }, { "epoch": 0.4701316701316701, "grad_norm": 0.25816524028778076, "learning_rate": 2.2834033024305457e-05, "loss": 1.5401, "step": 2348 }, { "epoch": 0.4706860706860707, "grad_norm": 0.2725927233695984, "learning_rate": 2.2801665794313125e-05, "loss": 1.5226, "step": 2349 }, { "epoch": 0.47124047124047125, "grad_norm": 0.2505475580692291, "learning_rate": 2.27693123733223e-05, "loss": 1.554, "step": 2350 }, { "epoch": 0.4717948717948718, "grad_norm": 0.2606370747089386, "learning_rate": 2.2736972787310405e-05, "loss": 1.5111, "step": 2351 }, { "epoch": 0.4723492723492724, "grad_norm": 0.26247549057006836, "learning_rate": 2.2704647062243757e-05, "loss": 1.5115, "step": 2352 }, { "epoch": 0.4729036729036729, "grad_norm": 0.2565583288669586, "learning_rate": 2.2672335224077554e-05, "loss": 1.5056, "step": 2353 }, { "epoch": 0.47345807345807345, "grad_norm": 0.247178852558136, "learning_rate": 2.264003729875581e-05, "loss": 1.5197, "step": 2354 }, { "epoch": 0.47401247401247404, "grad_norm": 0.26213309168815613, "learning_rate": 2.2607753312211396e-05, "loss": 1.5015, "step": 2355 }, { "epoch": 0.4745668745668746, "grad_norm": 0.2632255554199219, "learning_rate": 2.2575483290366e-05, "loss": 1.5274, "step": 2356 }, { "epoch": 0.4751212751212751, "grad_norm": 0.2477589249610901, "learning_rate": 2.2543227259130074e-05, "loss": 1.5077, "step": 2357 }, { "epoch": 0.4756756756756757, "grad_norm": 0.2577234208583832, "learning_rate": 2.251098524440286e-05, "loss": 1.4906, "step": 2358 }, { "epoch": 0.47623007623007624, "grad_norm": 0.2688502073287964, "learning_rate": 2.247875727207231e-05, "loss": 1.4825, "step": 2359 }, { "epoch": 0.4767844767844768, "grad_norm": 0.2517628073692322, "learning_rate": 2.244654336801513e-05, "loss": 1.5557, "step": 2360 }, { "epoch": 0.47733887733887737, "grad_norm": 0.2668411433696747, "learning_rate": 2.2414343558096735e-05, "loss": 1.514, "step": 2361 }, { "epoch": 0.4778932778932779, "grad_norm": 0.2638101279735565, "learning_rate": 2.2382157868171196e-05, "loss": 1.4488, "step": 2362 }, { "epoch": 0.47844767844767844, "grad_norm": 0.2618071138858795, "learning_rate": 2.234998632408127e-05, "loss": 1.4796, "step": 2363 }, { "epoch": 0.47900207900207903, "grad_norm": 0.24607138335704803, "learning_rate": 2.231782895165835e-05, "loss": 1.4408, "step": 2364 }, { "epoch": 0.47955647955647956, "grad_norm": 0.267446905374527, "learning_rate": 2.228568577672245e-05, "loss": 1.501, "step": 2365 }, { "epoch": 0.4801108801108801, "grad_norm": 0.28419119119644165, "learning_rate": 2.2253556825082176e-05, "loss": 1.5556, "step": 2366 }, { "epoch": 0.4806652806652807, "grad_norm": 0.24734729528427124, "learning_rate": 2.222144212253473e-05, "loss": 1.4127, "step": 2367 }, { "epoch": 0.48121968121968123, "grad_norm": 0.24796847999095917, "learning_rate": 2.2189341694865867e-05, "loss": 1.458, "step": 2368 }, { "epoch": 0.48177408177408176, "grad_norm": 0.26452696323394775, "learning_rate": 2.2157255567849864e-05, "loss": 1.4877, "step": 2369 }, { "epoch": 0.48232848232848236, "grad_norm": 0.2553812265396118, "learning_rate": 2.212518376724952e-05, "loss": 1.4751, "step": 2370 }, { "epoch": 0.4828828828828829, "grad_norm": 0.25002917647361755, "learning_rate": 2.209312631881616e-05, "loss": 1.5005, "step": 2371 }, { "epoch": 0.4834372834372834, "grad_norm": 0.24362796545028687, "learning_rate": 2.2061083248289553e-05, "loss": 1.4761, "step": 2372 }, { "epoch": 0.483991683991684, "grad_norm": 0.2532689869403839, "learning_rate": 2.202905458139796e-05, "loss": 1.4906, "step": 2373 }, { "epoch": 0.48454608454608455, "grad_norm": 0.26276910305023193, "learning_rate": 2.1997040343858015e-05, "loss": 1.4908, "step": 2374 }, { "epoch": 0.4851004851004851, "grad_norm": 0.2650887966156006, "learning_rate": 2.1965040561374823e-05, "loss": 1.5502, "step": 2375 }, { "epoch": 0.4856548856548857, "grad_norm": 0.25577637553215027, "learning_rate": 2.193305525964186e-05, "loss": 1.4266, "step": 2376 }, { "epoch": 0.4862092862092862, "grad_norm": 0.24950306117534637, "learning_rate": 2.1901084464340977e-05, "loss": 1.4948, "step": 2377 }, { "epoch": 0.48676368676368675, "grad_norm": 0.2548583149909973, "learning_rate": 2.1869128201142404e-05, "loss": 1.4876, "step": 2378 }, { "epoch": 0.48731808731808735, "grad_norm": 0.24919457733631134, "learning_rate": 2.1837186495704613e-05, "loss": 1.4818, "step": 2379 }, { "epoch": 0.4878724878724879, "grad_norm": 0.25619757175445557, "learning_rate": 2.1805259373674518e-05, "loss": 1.5021, "step": 2380 }, { "epoch": 0.4884268884268884, "grad_norm": 0.2412337213754654, "learning_rate": 2.177334686068723e-05, "loss": 1.469, "step": 2381 }, { "epoch": 0.488981288981289, "grad_norm": 0.25425437092781067, "learning_rate": 2.174144898236616e-05, "loss": 1.5223, "step": 2382 }, { "epoch": 0.48953568953568954, "grad_norm": 0.254067987203598, "learning_rate": 2.1709565764322983e-05, "loss": 1.5254, "step": 2383 }, { "epoch": 0.4900900900900901, "grad_norm": 0.2596265375614166, "learning_rate": 2.1677697232157558e-05, "loss": 1.4359, "step": 2384 }, { "epoch": 0.49064449064449067, "grad_norm": 0.25426945090293884, "learning_rate": 2.1645843411457987e-05, "loss": 1.4728, "step": 2385 }, { "epoch": 0.4911988911988912, "grad_norm": 0.2617255747318268, "learning_rate": 2.1614004327800552e-05, "loss": 1.5147, "step": 2386 }, { "epoch": 0.49175329175329174, "grad_norm": 0.26310354471206665, "learning_rate": 2.158218000674971e-05, "loss": 1.5434, "step": 2387 }, { "epoch": 0.49230769230769234, "grad_norm": 0.2539352774620056, "learning_rate": 2.1550370473858072e-05, "loss": 1.5178, "step": 2388 }, { "epoch": 0.49286209286209287, "grad_norm": 0.255906879901886, "learning_rate": 2.1518575754666324e-05, "loss": 1.5534, "step": 2389 }, { "epoch": 0.4934164934164934, "grad_norm": 0.2647375464439392, "learning_rate": 2.1486795874703317e-05, "loss": 1.5322, "step": 2390 }, { "epoch": 0.493970893970894, "grad_norm": 0.26155751943588257, "learning_rate": 2.1455030859485967e-05, "loss": 1.5019, "step": 2391 }, { "epoch": 0.49452529452529453, "grad_norm": 0.23829710483551025, "learning_rate": 2.1423280734519244e-05, "loss": 1.4731, "step": 2392 }, { "epoch": 0.49507969507969507, "grad_norm": 0.2774219214916229, "learning_rate": 2.139154552529619e-05, "loss": 1.5312, "step": 2393 }, { "epoch": 0.49563409563409566, "grad_norm": 0.24211084842681885, "learning_rate": 2.1359825257297818e-05, "loss": 1.4379, "step": 2394 }, { "epoch": 0.4961884961884962, "grad_norm": 0.24709615111351013, "learning_rate": 2.132811995599318e-05, "loss": 1.4631, "step": 2395 }, { "epoch": 0.49674289674289673, "grad_norm": 0.2408011555671692, "learning_rate": 2.1296429646839344e-05, "loss": 1.5023, "step": 2396 }, { "epoch": 0.4972972972972973, "grad_norm": 0.25827330350875854, "learning_rate": 2.1264754355281293e-05, "loss": 1.5409, "step": 2397 }, { "epoch": 0.49785169785169786, "grad_norm": 0.25079503655433655, "learning_rate": 2.1233094106751944e-05, "loss": 1.5202, "step": 2398 }, { "epoch": 0.4984060984060984, "grad_norm": 0.2534651458263397, "learning_rate": 2.120144892667217e-05, "loss": 1.5153, "step": 2399 }, { "epoch": 0.498960498960499, "grad_norm": 0.2588454484939575, "learning_rate": 2.116981884045073e-05, "loss": 1.537, "step": 2400 } ], "logging_steps": 1, "max_steps": 3606, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.320030015179981e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }