{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3657, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00027344818156959256, "grad_norm": 0.5185216069221497, "learning_rate": 5.000000000000001e-07, "loss": 1.952, "step": 1 }, { "epoch": 0.0005468963631391851, "grad_norm": 0.48264726996421814, "learning_rate": 1.0000000000000002e-06, "loss": 1.9283, "step": 2 }, { "epoch": 0.0008203445447087777, "grad_norm": 0.3865518867969513, "learning_rate": 1.5e-06, "loss": 1.8876, "step": 3 }, { "epoch": 0.0010937927262783702, "grad_norm": 0.34167248010635376, "learning_rate": 2.0000000000000003e-06, "loss": 1.9148, "step": 4 }, { "epoch": 0.0013672409078479629, "grad_norm": 0.6942479014396667, "learning_rate": 2.5e-06, "loss": 1.946, "step": 5 }, { "epoch": 0.0016406890894175555, "grad_norm": 0.34968069195747375, "learning_rate": 3e-06, "loss": 1.9213, "step": 6 }, { "epoch": 0.0019141372709871479, "grad_norm": 0.48089227080345154, "learning_rate": 3.5000000000000004e-06, "loss": 1.8915, "step": 7 }, { "epoch": 0.0021875854525567405, "grad_norm": 0.34136202931404114, "learning_rate": 4.000000000000001e-06, "loss": 1.9219, "step": 8 }, { "epoch": 0.002461033634126333, "grad_norm": 0.529236912727356, "learning_rate": 4.5e-06, "loss": 1.9322, "step": 9 }, { "epoch": 0.0027344818156959257, "grad_norm": 0.4663859009742737, "learning_rate": 5e-06, "loss": 1.8154, "step": 10 }, { "epoch": 0.0030079299972655183, "grad_norm": 0.9236092567443848, "learning_rate": 5.500000000000001e-06, "loss": 1.9887, "step": 11 }, { "epoch": 0.003281378178835111, "grad_norm": 0.43542373180389404, "learning_rate": 6e-06, "loss": 1.963, "step": 12 }, { "epoch": 0.003554826360404703, "grad_norm": 0.33968186378479004, "learning_rate": 6.5000000000000004e-06, "loss": 1.7446, "step": 13 }, { "epoch": 0.0038282745419742957, "grad_norm": 0.2824525535106659, "learning_rate": 7.000000000000001e-06, "loss": 1.8923, "step": 14 }, { "epoch": 0.004101722723543888, "grad_norm": 0.2588050663471222, "learning_rate": 7.5e-06, "loss": 1.7194, "step": 15 }, { "epoch": 0.004375170905113481, "grad_norm": 0.3694664537906647, "learning_rate": 8.000000000000001e-06, "loss": 1.8807, "step": 16 }, { "epoch": 0.004648619086683074, "grad_norm": 0.2177015244960785, "learning_rate": 8.500000000000002e-06, "loss": 1.8355, "step": 17 }, { "epoch": 0.004922067268252666, "grad_norm": 0.395831435918808, "learning_rate": 9e-06, "loss": 1.9272, "step": 18 }, { "epoch": 0.005195515449822259, "grad_norm": 0.23232288658618927, "learning_rate": 9.5e-06, "loss": 1.8483, "step": 19 }, { "epoch": 0.005468963631391851, "grad_norm": 0.211470827460289, "learning_rate": 1e-05, "loss": 1.8662, "step": 20 }, { "epoch": 0.005742411812961444, "grad_norm": 0.21474166214466095, "learning_rate": 1.05e-05, "loss": 1.8321, "step": 21 }, { "epoch": 0.006015859994531037, "grad_norm": 0.22423861920833588, "learning_rate": 1.1000000000000001e-05, "loss": 1.8677, "step": 22 }, { "epoch": 0.006289308176100629, "grad_norm": 0.22832632064819336, "learning_rate": 1.1500000000000002e-05, "loss": 1.8584, "step": 23 }, { "epoch": 0.006562756357670222, "grad_norm": 0.2289043813943863, "learning_rate": 1.2e-05, "loss": 1.9455, "step": 24 }, { "epoch": 0.006836204539239814, "grad_norm": 0.21795853972434998, "learning_rate": 1.25e-05, "loss": 1.8726, "step": 25 }, { "epoch": 0.007109652720809406, "grad_norm": 0.2133929580450058, "learning_rate": 1.3000000000000001e-05, "loss": 1.8685, "step": 26 }, { "epoch": 0.007383100902378999, "grad_norm": 0.18260058760643005, "learning_rate": 1.3500000000000001e-05, "loss": 1.8412, "step": 27 }, { "epoch": 0.0076565490839485915, "grad_norm": 0.18040010333061218, "learning_rate": 1.4000000000000001e-05, "loss": 1.8779, "step": 28 }, { "epoch": 0.007929997265518185, "grad_norm": 0.17110276222229004, "learning_rate": 1.45e-05, "loss": 1.8708, "step": 29 }, { "epoch": 0.008203445447087777, "grad_norm": 0.17357124388217926, "learning_rate": 1.5e-05, "loss": 1.8665, "step": 30 }, { "epoch": 0.00847689362865737, "grad_norm": 0.1595795750617981, "learning_rate": 1.55e-05, "loss": 1.861, "step": 31 }, { "epoch": 0.008750341810226962, "grad_norm": 0.18393680453300476, "learning_rate": 1.6000000000000003e-05, "loss": 1.9642, "step": 32 }, { "epoch": 0.009023789991796555, "grad_norm": 0.17827364802360535, "learning_rate": 1.65e-05, "loss": 1.8671, "step": 33 }, { "epoch": 0.009297238173366147, "grad_norm": 0.1620740294456482, "learning_rate": 1.7000000000000003e-05, "loss": 1.8421, "step": 34 }, { "epoch": 0.009570686354935739, "grad_norm": 0.15932734310626984, "learning_rate": 1.75e-05, "loss": 1.6872, "step": 35 }, { "epoch": 0.009844134536505332, "grad_norm": 0.1550036072731018, "learning_rate": 1.8e-05, "loss": 1.7036, "step": 36 }, { "epoch": 0.010117582718074924, "grad_norm": 0.17445598542690277, "learning_rate": 1.85e-05, "loss": 1.9004, "step": 37 }, { "epoch": 0.010391030899644518, "grad_norm": 0.15758390724658966, "learning_rate": 1.9e-05, "loss": 1.8423, "step": 38 }, { "epoch": 0.01066447908121411, "grad_norm": 0.16238977015018463, "learning_rate": 1.9500000000000003e-05, "loss": 1.7722, "step": 39 }, { "epoch": 0.010937927262783703, "grad_norm": 0.14447326958179474, "learning_rate": 2e-05, "loss": 1.7876, "step": 40 }, { "epoch": 0.011211375444353295, "grad_norm": 0.1725250482559204, "learning_rate": 2.05e-05, "loss": 1.7639, "step": 41 }, { "epoch": 0.011484823625922888, "grad_norm": 0.14800074696540833, "learning_rate": 2.1e-05, "loss": 1.7457, "step": 42 }, { "epoch": 0.01175827180749248, "grad_norm": 0.16171656548976898, "learning_rate": 2.15e-05, "loss": 1.8488, "step": 43 }, { "epoch": 0.012031719989062073, "grad_norm": 0.17418281733989716, "learning_rate": 2.2000000000000003e-05, "loss": 1.8017, "step": 44 }, { "epoch": 0.012305168170631665, "grad_norm": 0.1688196063041687, "learning_rate": 2.25e-05, "loss": 1.9224, "step": 45 }, { "epoch": 0.012578616352201259, "grad_norm": 0.1520584672689438, "learning_rate": 2.3000000000000003e-05, "loss": 1.8044, "step": 46 }, { "epoch": 0.01285206453377085, "grad_norm": 0.1390339732170105, "learning_rate": 2.35e-05, "loss": 1.709, "step": 47 }, { "epoch": 0.013125512715340444, "grad_norm": 0.17691773176193237, "learning_rate": 2.4e-05, "loss": 1.8488, "step": 48 }, { "epoch": 0.013398960896910036, "grad_norm": 0.1481151133775711, "learning_rate": 2.45e-05, "loss": 1.9118, "step": 49 }, { "epoch": 0.013672409078479627, "grad_norm": 0.14990705251693726, "learning_rate": 2.5e-05, "loss": 1.7629, "step": 50 }, { "epoch": 0.01394585726004922, "grad_norm": 0.15109795331954956, "learning_rate": 2.5500000000000003e-05, "loss": 1.7675, "step": 51 }, { "epoch": 0.014219305441618812, "grad_norm": 0.15651960670948029, "learning_rate": 2.6000000000000002e-05, "loss": 1.7662, "step": 52 }, { "epoch": 0.014492753623188406, "grad_norm": 0.15988552570343018, "learning_rate": 2.6500000000000004e-05, "loss": 1.7708, "step": 53 }, { "epoch": 0.014766201804757998, "grad_norm": 0.1471298187971115, "learning_rate": 2.7000000000000002e-05, "loss": 1.7393, "step": 54 }, { "epoch": 0.015039649986327591, "grad_norm": 0.1633843183517456, "learning_rate": 2.7500000000000004e-05, "loss": 1.8271, "step": 55 }, { "epoch": 0.015313098167897183, "grad_norm": 0.1614934802055359, "learning_rate": 2.8000000000000003e-05, "loss": 1.7251, "step": 56 }, { "epoch": 0.015586546349466776, "grad_norm": 0.15689364075660706, "learning_rate": 2.8499999999999998e-05, "loss": 1.7851, "step": 57 }, { "epoch": 0.01585999453103637, "grad_norm": 0.1521284133195877, "learning_rate": 2.9e-05, "loss": 1.8146, "step": 58 }, { "epoch": 0.01613344271260596, "grad_norm": 0.15829654037952423, "learning_rate": 2.95e-05, "loss": 1.8003, "step": 59 }, { "epoch": 0.016406890894175553, "grad_norm": 0.1448163092136383, "learning_rate": 3e-05, "loss": 1.7512, "step": 60 }, { "epoch": 0.016680339075745145, "grad_norm": 0.1601008176803589, "learning_rate": 3.05e-05, "loss": 1.7764, "step": 61 }, { "epoch": 0.01695378725731474, "grad_norm": 0.14987356960773468, "learning_rate": 3.1e-05, "loss": 1.7891, "step": 62 }, { "epoch": 0.017227235438884332, "grad_norm": 0.16058160364627838, "learning_rate": 3.15e-05, "loss": 1.7912, "step": 63 }, { "epoch": 0.017500683620453924, "grad_norm": 0.15416061878204346, "learning_rate": 3.2000000000000005e-05, "loss": 1.8189, "step": 64 }, { "epoch": 0.017774131802023516, "grad_norm": 0.15167462825775146, "learning_rate": 3.2500000000000004e-05, "loss": 1.6827, "step": 65 }, { "epoch": 0.01804757998359311, "grad_norm": 0.1508171409368515, "learning_rate": 3.3e-05, "loss": 1.7364, "step": 66 }, { "epoch": 0.018321028165162703, "grad_norm": 0.15617215633392334, "learning_rate": 3.35e-05, "loss": 1.7085, "step": 67 }, { "epoch": 0.018594476346732294, "grad_norm": 0.15548895299434662, "learning_rate": 3.4000000000000007e-05, "loss": 1.8138, "step": 68 }, { "epoch": 0.018867924528301886, "grad_norm": 0.17112572491168976, "learning_rate": 3.45e-05, "loss": 1.8459, "step": 69 }, { "epoch": 0.019141372709871478, "grad_norm": 0.18400102853775024, "learning_rate": 3.5e-05, "loss": 1.9113, "step": 70 }, { "epoch": 0.019414820891441073, "grad_norm": 0.16024211049079895, "learning_rate": 3.55e-05, "loss": 1.7861, "step": 71 }, { "epoch": 0.019688269073010665, "grad_norm": 0.16651467978954315, "learning_rate": 3.6e-05, "loss": 1.626, "step": 72 }, { "epoch": 0.019961717254580257, "grad_norm": 0.15933339297771454, "learning_rate": 3.65e-05, "loss": 1.7464, "step": 73 }, { "epoch": 0.02023516543614985, "grad_norm": 0.15582682192325592, "learning_rate": 3.7e-05, "loss": 1.7713, "step": 74 }, { "epoch": 0.020508613617719443, "grad_norm": 0.15848350524902344, "learning_rate": 3.7500000000000003e-05, "loss": 1.7918, "step": 75 }, { "epoch": 0.020782061799289035, "grad_norm": 0.1563899666070938, "learning_rate": 3.8e-05, "loss": 1.7257, "step": 76 }, { "epoch": 0.021055509980858627, "grad_norm": 0.17177645862102509, "learning_rate": 3.85e-05, "loss": 1.784, "step": 77 }, { "epoch": 0.02132895816242822, "grad_norm": 0.15895935893058777, "learning_rate": 3.9000000000000006e-05, "loss": 1.7906, "step": 78 }, { "epoch": 0.021602406343997814, "grad_norm": 0.15804462134838104, "learning_rate": 3.9500000000000005e-05, "loss": 1.6952, "step": 79 }, { "epoch": 0.021875854525567406, "grad_norm": 0.15477772057056427, "learning_rate": 4e-05, "loss": 1.6538, "step": 80 }, { "epoch": 0.022149302707136997, "grad_norm": 0.1593542844057083, "learning_rate": 4.05e-05, "loss": 1.727, "step": 81 }, { "epoch": 0.02242275088870659, "grad_norm": 0.164969801902771, "learning_rate": 4.1e-05, "loss": 1.6443, "step": 82 }, { "epoch": 0.02269619907027618, "grad_norm": 0.16219045221805573, "learning_rate": 4.15e-05, "loss": 1.7775, "step": 83 }, { "epoch": 0.022969647251845776, "grad_norm": 0.15507090091705322, "learning_rate": 4.2e-05, "loss": 1.6936, "step": 84 }, { "epoch": 0.023243095433415368, "grad_norm": 0.17881833016872406, "learning_rate": 4.25e-05, "loss": 1.8355, "step": 85 }, { "epoch": 0.02351654361498496, "grad_norm": 0.16070392727851868, "learning_rate": 4.3e-05, "loss": 1.6853, "step": 86 }, { "epoch": 0.02378999179655455, "grad_norm": 0.16585201025009155, "learning_rate": 4.35e-05, "loss": 1.7516, "step": 87 }, { "epoch": 0.024063439978124147, "grad_norm": 0.1578633040189743, "learning_rate": 4.4000000000000006e-05, "loss": 1.7062, "step": 88 }, { "epoch": 0.02433688815969374, "grad_norm": 0.17426982522010803, "learning_rate": 4.4500000000000004e-05, "loss": 1.7312, "step": 89 }, { "epoch": 0.02461033634126333, "grad_norm": 0.16550804674625397, "learning_rate": 4.5e-05, "loss": 1.7708, "step": 90 }, { "epoch": 0.024883784522832922, "grad_norm": 0.17182576656341553, "learning_rate": 4.55e-05, "loss": 1.7964, "step": 91 }, { "epoch": 0.025157232704402517, "grad_norm": 0.1642204076051712, "learning_rate": 4.600000000000001e-05, "loss": 1.7858, "step": 92 }, { "epoch": 0.02543068088597211, "grad_norm": 0.17677852511405945, "learning_rate": 4.6500000000000005e-05, "loss": 1.701, "step": 93 }, { "epoch": 0.0257041290675417, "grad_norm": 0.17346423864364624, "learning_rate": 4.7e-05, "loss": 1.8136, "step": 94 }, { "epoch": 0.025977577249111292, "grad_norm": 0.17641915380954742, "learning_rate": 4.75e-05, "loss": 1.7921, "step": 95 }, { "epoch": 0.026251025430680888, "grad_norm": 0.19822247326374054, "learning_rate": 4.8e-05, "loss": 1.7887, "step": 96 }, { "epoch": 0.02652447361225048, "grad_norm": 0.1817287653684616, "learning_rate": 4.85e-05, "loss": 1.7063, "step": 97 }, { "epoch": 0.02679792179382007, "grad_norm": 0.1644916832447052, "learning_rate": 4.9e-05, "loss": 1.684, "step": 98 }, { "epoch": 0.027071369975389663, "grad_norm": 0.19094440340995789, "learning_rate": 4.9500000000000004e-05, "loss": 1.8106, "step": 99 }, { "epoch": 0.027344818156959255, "grad_norm": 0.16572299599647522, "learning_rate": 5e-05, "loss": 1.6812, "step": 100 }, { "epoch": 0.02761826633852885, "grad_norm": 0.16459035873413086, "learning_rate": 5e-05, "loss": 1.697, "step": 101 }, { "epoch": 0.02789171452009844, "grad_norm": 0.18967324495315552, "learning_rate": 5e-05, "loss": 1.7078, "step": 102 }, { "epoch": 0.028165162701668033, "grad_norm": 0.1775873899459839, "learning_rate": 5e-05, "loss": 1.7995, "step": 103 }, { "epoch": 0.028438610883237625, "grad_norm": 0.18641340732574463, "learning_rate": 5e-05, "loss": 1.8392, "step": 104 }, { "epoch": 0.02871205906480722, "grad_norm": 0.17424939572811127, "learning_rate": 5e-05, "loss": 1.7695, "step": 105 }, { "epoch": 0.028985507246376812, "grad_norm": 0.19932492077350616, "learning_rate": 5e-05, "loss": 1.6987, "step": 106 }, { "epoch": 0.029258955427946404, "grad_norm": 0.1666601300239563, "learning_rate": 5e-05, "loss": 1.6425, "step": 107 }, { "epoch": 0.029532403609515995, "grad_norm": 0.19102485477924347, "learning_rate": 5e-05, "loss": 1.7754, "step": 108 }, { "epoch": 0.02980585179108559, "grad_norm": 0.17251844704151154, "learning_rate": 5e-05, "loss": 1.7315, "step": 109 }, { "epoch": 0.030079299972655182, "grad_norm": 0.1837855726480484, "learning_rate": 5e-05, "loss": 1.805, "step": 110 }, { "epoch": 0.030352748154224774, "grad_norm": 0.19414560496807098, "learning_rate": 5e-05, "loss": 1.6129, "step": 111 }, { "epoch": 0.030626196335794366, "grad_norm": 0.16896690428256989, "learning_rate": 5e-05, "loss": 1.8306, "step": 112 }, { "epoch": 0.03089964451736396, "grad_norm": 0.18901929259300232, "learning_rate": 5e-05, "loss": 1.7591, "step": 113 }, { "epoch": 0.031173092698933553, "grad_norm": 0.17992505431175232, "learning_rate": 5e-05, "loss": 1.7676, "step": 114 }, { "epoch": 0.031446540880503145, "grad_norm": 0.1731676608324051, "learning_rate": 5e-05, "loss": 1.7749, "step": 115 }, { "epoch": 0.03171998906207274, "grad_norm": 0.17806388437747955, "learning_rate": 5e-05, "loss": 1.7912, "step": 116 }, { "epoch": 0.03199343724364233, "grad_norm": 0.176969513297081, "learning_rate": 5e-05, "loss": 1.7966, "step": 117 }, { "epoch": 0.03226688542521192, "grad_norm": 0.18301358819007874, "learning_rate": 5e-05, "loss": 1.7454, "step": 118 }, { "epoch": 0.03254033360678151, "grad_norm": 0.18621793389320374, "learning_rate": 5e-05, "loss": 1.8087, "step": 119 }, { "epoch": 0.03281378178835111, "grad_norm": 0.17661884427070618, "learning_rate": 5e-05, "loss": 1.8529, "step": 120 }, { "epoch": 0.0330872299699207, "grad_norm": 0.1590997874736786, "learning_rate": 5e-05, "loss": 1.6437, "step": 121 }, { "epoch": 0.03336067815149029, "grad_norm": 0.15839175879955292, "learning_rate": 5e-05, "loss": 1.6921, "step": 122 }, { "epoch": 0.033634126333059886, "grad_norm": 0.1695318967103958, "learning_rate": 5e-05, "loss": 1.7514, "step": 123 }, { "epoch": 0.03390757451462948, "grad_norm": 0.17511457204818726, "learning_rate": 5e-05, "loss": 1.7665, "step": 124 }, { "epoch": 0.03418102269619907, "grad_norm": 0.16365903615951538, "learning_rate": 5e-05, "loss": 1.7289, "step": 125 }, { "epoch": 0.034454470877768664, "grad_norm": 0.16163618862628937, "learning_rate": 5e-05, "loss": 1.7243, "step": 126 }, { "epoch": 0.03472791905933825, "grad_norm": 0.1713578999042511, "learning_rate": 5e-05, "loss": 1.8056, "step": 127 }, { "epoch": 0.03500136724090785, "grad_norm": 0.16412892937660217, "learning_rate": 5e-05, "loss": 1.7385, "step": 128 }, { "epoch": 0.03527481542247744, "grad_norm": 0.16879543662071228, "learning_rate": 5e-05, "loss": 1.7961, "step": 129 }, { "epoch": 0.03554826360404703, "grad_norm": 0.1659931093454361, "learning_rate": 5e-05, "loss": 1.6833, "step": 130 }, { "epoch": 0.035821711785616626, "grad_norm": 0.16397275030612946, "learning_rate": 5e-05, "loss": 1.7543, "step": 131 }, { "epoch": 0.03609515996718622, "grad_norm": 0.17216797173023224, "learning_rate": 5e-05, "loss": 1.8237, "step": 132 }, { "epoch": 0.03636860814875581, "grad_norm": 0.15959759056568146, "learning_rate": 5e-05, "loss": 1.6513, "step": 133 }, { "epoch": 0.036642056330325405, "grad_norm": 0.17499953508377075, "learning_rate": 5e-05, "loss": 1.7132, "step": 134 }, { "epoch": 0.03691550451189499, "grad_norm": 0.17289526760578156, "learning_rate": 5e-05, "loss": 1.6642, "step": 135 }, { "epoch": 0.03718895269346459, "grad_norm": 0.1796165257692337, "learning_rate": 5e-05, "loss": 1.6765, "step": 136 }, { "epoch": 0.037462400875034184, "grad_norm": 0.20457540452480316, "learning_rate": 5e-05, "loss": 1.7167, "step": 137 }, { "epoch": 0.03773584905660377, "grad_norm": 0.18604455888271332, "learning_rate": 5e-05, "loss": 1.7398, "step": 138 }, { "epoch": 0.03800929723817337, "grad_norm": 0.20968973636627197, "learning_rate": 5e-05, "loss": 1.7422, "step": 139 }, { "epoch": 0.038282745419742956, "grad_norm": 0.21795013546943665, "learning_rate": 5e-05, "loss": 1.7568, "step": 140 }, { "epoch": 0.03855619360131255, "grad_norm": 0.19364839792251587, "learning_rate": 5e-05, "loss": 1.7591, "step": 141 }, { "epoch": 0.038829641782882146, "grad_norm": 0.2641007602214813, "learning_rate": 5e-05, "loss": 1.8116, "step": 142 }, { "epoch": 0.039103089964451734, "grad_norm": 0.17127980291843414, "learning_rate": 5e-05, "loss": 1.7593, "step": 143 }, { "epoch": 0.03937653814602133, "grad_norm": 0.2609815001487732, "learning_rate": 5e-05, "loss": 1.7366, "step": 144 }, { "epoch": 0.039649986327590925, "grad_norm": 0.1736164689064026, "learning_rate": 5e-05, "loss": 1.6295, "step": 145 }, { "epoch": 0.03992343450916051, "grad_norm": 0.17097944021224976, "learning_rate": 5e-05, "loss": 1.6762, "step": 146 }, { "epoch": 0.04019688269073011, "grad_norm": 0.19413994252681732, "learning_rate": 5e-05, "loss": 1.7808, "step": 147 }, { "epoch": 0.0404703308722997, "grad_norm": 0.19383320212364197, "learning_rate": 5e-05, "loss": 1.7765, "step": 148 }, { "epoch": 0.04074377905386929, "grad_norm": 0.18459807336330414, "learning_rate": 5e-05, "loss": 1.7168, "step": 149 }, { "epoch": 0.04101722723543889, "grad_norm": 0.18464331328868866, "learning_rate": 5e-05, "loss": 1.8481, "step": 150 }, { "epoch": 0.041290675417008475, "grad_norm": 0.18524886667728424, "learning_rate": 5e-05, "loss": 1.7175, "step": 151 }, { "epoch": 0.04156412359857807, "grad_norm": 0.1744994819164276, "learning_rate": 5e-05, "loss": 1.7207, "step": 152 }, { "epoch": 0.04183757178014766, "grad_norm": 0.1815788894891739, "learning_rate": 5e-05, "loss": 1.6548, "step": 153 }, { "epoch": 0.042111019961717254, "grad_norm": 0.21097545325756073, "learning_rate": 5e-05, "loss": 1.7963, "step": 154 }, { "epoch": 0.04238446814328685, "grad_norm": 0.17117224633693695, "learning_rate": 5e-05, "loss": 1.7514, "step": 155 }, { "epoch": 0.04265791632485644, "grad_norm": 0.17618225514888763, "learning_rate": 5e-05, "loss": 1.7029, "step": 156 }, { "epoch": 0.04293136450642603, "grad_norm": 0.223564013838768, "learning_rate": 5e-05, "loss": 1.8719, "step": 157 }, { "epoch": 0.04320481268799563, "grad_norm": 0.17528285086154938, "learning_rate": 5e-05, "loss": 1.8264, "step": 158 }, { "epoch": 0.043478260869565216, "grad_norm": 0.20005618035793304, "learning_rate": 5e-05, "loss": 1.7559, "step": 159 }, { "epoch": 0.04375170905113481, "grad_norm": 0.18977142870426178, "learning_rate": 5e-05, "loss": 1.7066, "step": 160 }, { "epoch": 0.0440251572327044, "grad_norm": 0.18722118437290192, "learning_rate": 5e-05, "loss": 1.7769, "step": 161 }, { "epoch": 0.044298605414273995, "grad_norm": 0.16823123395442963, "learning_rate": 5e-05, "loss": 1.7666, "step": 162 }, { "epoch": 0.04457205359584359, "grad_norm": 0.18529324233531952, "learning_rate": 5e-05, "loss": 1.6337, "step": 163 }, { "epoch": 0.04484550177741318, "grad_norm": 0.18550924956798553, "learning_rate": 5e-05, "loss": 1.6754, "step": 164 }, { "epoch": 0.045118949958982774, "grad_norm": 0.15453596413135529, "learning_rate": 5e-05, "loss": 1.6601, "step": 165 }, { "epoch": 0.04539239814055236, "grad_norm": 0.17858926951885223, "learning_rate": 5e-05, "loss": 1.6343, "step": 166 }, { "epoch": 0.04566584632212196, "grad_norm": 0.1769731193780899, "learning_rate": 5e-05, "loss": 1.6508, "step": 167 }, { "epoch": 0.04593929450369155, "grad_norm": 0.16900819540023804, "learning_rate": 5e-05, "loss": 1.7167, "step": 168 }, { "epoch": 0.04621274268526114, "grad_norm": 0.18043388426303864, "learning_rate": 5e-05, "loss": 1.75, "step": 169 }, { "epoch": 0.046486190866830736, "grad_norm": 0.2040599286556244, "learning_rate": 5e-05, "loss": 1.7314, "step": 170 }, { "epoch": 0.04675963904840033, "grad_norm": 0.17198055982589722, "learning_rate": 5e-05, "loss": 1.6897, "step": 171 }, { "epoch": 0.04703308722996992, "grad_norm": 0.16982243955135345, "learning_rate": 5e-05, "loss": 1.7046, "step": 172 }, { "epoch": 0.047306535411539515, "grad_norm": 0.1677250862121582, "learning_rate": 5e-05, "loss": 1.7385, "step": 173 }, { "epoch": 0.0475799835931091, "grad_norm": 0.16259929537773132, "learning_rate": 5e-05, "loss": 1.7417, "step": 174 }, { "epoch": 0.0478534317746787, "grad_norm": 0.1767575442790985, "learning_rate": 5e-05, "loss": 1.7981, "step": 175 }, { "epoch": 0.04812687995624829, "grad_norm": 0.17178016901016235, "learning_rate": 5e-05, "loss": 1.7238, "step": 176 }, { "epoch": 0.04840032813781788, "grad_norm": 0.1756935715675354, "learning_rate": 5e-05, "loss": 1.6331, "step": 177 }, { "epoch": 0.04867377631938748, "grad_norm": 0.15742701292037964, "learning_rate": 5e-05, "loss": 1.7367, "step": 178 }, { "epoch": 0.04894722450095707, "grad_norm": 0.16502848267555237, "learning_rate": 5e-05, "loss": 1.6908, "step": 179 }, { "epoch": 0.04922067268252666, "grad_norm": 0.1676185131072998, "learning_rate": 5e-05, "loss": 1.6611, "step": 180 }, { "epoch": 0.049494120864096255, "grad_norm": 0.19482578337192535, "learning_rate": 5e-05, "loss": 1.7986, "step": 181 }, { "epoch": 0.049767569045665844, "grad_norm": 0.15637359023094177, "learning_rate": 5e-05, "loss": 1.7225, "step": 182 }, { "epoch": 0.05004101722723544, "grad_norm": 0.17269264161586761, "learning_rate": 5e-05, "loss": 1.692, "step": 183 }, { "epoch": 0.050314465408805034, "grad_norm": 0.17836996912956238, "learning_rate": 5e-05, "loss": 1.7457, "step": 184 }, { "epoch": 0.05058791359037462, "grad_norm": 0.1829950511455536, "learning_rate": 5e-05, "loss": 1.7991, "step": 185 }, { "epoch": 0.05086136177194422, "grad_norm": 0.18094682693481445, "learning_rate": 5e-05, "loss": 1.7173, "step": 186 }, { "epoch": 0.051134809953513806, "grad_norm": 0.14723382890224457, "learning_rate": 5e-05, "loss": 1.6355, "step": 187 }, { "epoch": 0.0514082581350834, "grad_norm": 0.18650388717651367, "learning_rate": 5e-05, "loss": 1.7328, "step": 188 }, { "epoch": 0.051681706316652996, "grad_norm": 0.1545383185148239, "learning_rate": 5e-05, "loss": 1.6386, "step": 189 }, { "epoch": 0.051955154498222585, "grad_norm": 0.18034450709819794, "learning_rate": 5e-05, "loss": 1.7728, "step": 190 }, { "epoch": 0.05222860267979218, "grad_norm": 0.17649757862091064, "learning_rate": 5e-05, "loss": 1.8434, "step": 191 }, { "epoch": 0.052502050861361775, "grad_norm": 0.1520988494157791, "learning_rate": 5e-05, "loss": 1.6414, "step": 192 }, { "epoch": 0.05277549904293136, "grad_norm": 0.1721171736717224, "learning_rate": 5e-05, "loss": 1.7516, "step": 193 }, { "epoch": 0.05304894722450096, "grad_norm": 0.16801105439662933, "learning_rate": 5e-05, "loss": 1.741, "step": 194 }, { "epoch": 0.05332239540607055, "grad_norm": 0.1697547286748886, "learning_rate": 5e-05, "loss": 1.7154, "step": 195 }, { "epoch": 0.05359584358764014, "grad_norm": 0.163418710231781, "learning_rate": 5e-05, "loss": 1.7957, "step": 196 }, { "epoch": 0.05386929176920974, "grad_norm": 0.15881727635860443, "learning_rate": 5e-05, "loss": 1.71, "step": 197 }, { "epoch": 0.054142739950779326, "grad_norm": 0.18331541121006012, "learning_rate": 5e-05, "loss": 1.7412, "step": 198 }, { "epoch": 0.05441618813234892, "grad_norm": 0.15311068296432495, "learning_rate": 5e-05, "loss": 1.7389, "step": 199 }, { "epoch": 0.05468963631391851, "grad_norm": 0.17510437965393066, "learning_rate": 5e-05, "loss": 1.7061, "step": 200 }, { "epoch": 0.054963084495488104, "grad_norm": 0.1618979126214981, "learning_rate": 5e-05, "loss": 1.7949, "step": 201 }, { "epoch": 0.0552365326770577, "grad_norm": 0.16283638775348663, "learning_rate": 5e-05, "loss": 1.6262, "step": 202 }, { "epoch": 0.05550998085862729, "grad_norm": 0.17936623096466064, "learning_rate": 5e-05, "loss": 1.7559, "step": 203 }, { "epoch": 0.05578342904019688, "grad_norm": 0.15585114061832428, "learning_rate": 5e-05, "loss": 1.6809, "step": 204 }, { "epoch": 0.05605687722176648, "grad_norm": 0.16536429524421692, "learning_rate": 5e-05, "loss": 1.8167, "step": 205 }, { "epoch": 0.056330325403336066, "grad_norm": 0.16469433903694153, "learning_rate": 5e-05, "loss": 1.762, "step": 206 }, { "epoch": 0.05660377358490566, "grad_norm": 0.15904970467090607, "learning_rate": 5e-05, "loss": 1.7354, "step": 207 }, { "epoch": 0.05687722176647525, "grad_norm": 0.15505826473236084, "learning_rate": 5e-05, "loss": 1.6751, "step": 208 }, { "epoch": 0.057150669948044845, "grad_norm": 0.1706695258617401, "learning_rate": 5e-05, "loss": 1.8423, "step": 209 }, { "epoch": 0.05742411812961444, "grad_norm": 0.1645784229040146, "learning_rate": 5e-05, "loss": 1.7291, "step": 210 }, { "epoch": 0.05769756631118403, "grad_norm": 0.15510506927967072, "learning_rate": 5e-05, "loss": 1.6877, "step": 211 }, { "epoch": 0.057971014492753624, "grad_norm": 0.16437038779258728, "learning_rate": 5e-05, "loss": 1.6895, "step": 212 }, { "epoch": 0.05824446267432322, "grad_norm": 0.15057580173015594, "learning_rate": 5e-05, "loss": 1.6953, "step": 213 }, { "epoch": 0.05851791085589281, "grad_norm": 0.17037834227085114, "learning_rate": 5e-05, "loss": 1.6958, "step": 214 }, { "epoch": 0.0587913590374624, "grad_norm": 0.1691209077835083, "learning_rate": 5e-05, "loss": 1.6792, "step": 215 }, { "epoch": 0.05906480721903199, "grad_norm": 0.18123316764831543, "learning_rate": 5e-05, "loss": 1.7774, "step": 216 }, { "epoch": 0.059338255400601586, "grad_norm": 0.17319968342781067, "learning_rate": 5e-05, "loss": 1.7516, "step": 217 }, { "epoch": 0.05961170358217118, "grad_norm": 0.16802047193050385, "learning_rate": 5e-05, "loss": 1.6909, "step": 218 }, { "epoch": 0.05988515176374077, "grad_norm": 0.16676077246665955, "learning_rate": 5e-05, "loss": 1.613, "step": 219 }, { "epoch": 0.060158599945310365, "grad_norm": 0.15997642278671265, "learning_rate": 5e-05, "loss": 1.6497, "step": 220 }, { "epoch": 0.06043204812687995, "grad_norm": 0.1547921746969223, "learning_rate": 5e-05, "loss": 1.6874, "step": 221 }, { "epoch": 0.06070549630844955, "grad_norm": 0.17867213487625122, "learning_rate": 5e-05, "loss": 1.6957, "step": 222 }, { "epoch": 0.060978944490019144, "grad_norm": 0.15124128758907318, "learning_rate": 5e-05, "loss": 1.6269, "step": 223 }, { "epoch": 0.06125239267158873, "grad_norm": 0.15765389800071716, "learning_rate": 5e-05, "loss": 1.7699, "step": 224 }, { "epoch": 0.06152584085315833, "grad_norm": 0.18673233687877655, "learning_rate": 5e-05, "loss": 1.8988, "step": 225 }, { "epoch": 0.06179928903472792, "grad_norm": 0.148275688290596, "learning_rate": 5e-05, "loss": 1.6565, "step": 226 }, { "epoch": 0.06207273721629751, "grad_norm": 0.17454782128334045, "learning_rate": 5e-05, "loss": 1.7273, "step": 227 }, { "epoch": 0.062346185397867106, "grad_norm": 0.15484969317913055, "learning_rate": 5e-05, "loss": 1.6832, "step": 228 }, { "epoch": 0.0626196335794367, "grad_norm": 0.16521553695201874, "learning_rate": 5e-05, "loss": 1.886, "step": 229 }, { "epoch": 0.06289308176100629, "grad_norm": 0.1734921932220459, "learning_rate": 5e-05, "loss": 1.8453, "step": 230 }, { "epoch": 0.06316652994257588, "grad_norm": 0.16037142276763916, "learning_rate": 5e-05, "loss": 1.6604, "step": 231 }, { "epoch": 0.06343997812414548, "grad_norm": 0.194137305021286, "learning_rate": 5e-05, "loss": 1.7101, "step": 232 }, { "epoch": 0.06371342630571507, "grad_norm": 0.16026362776756287, "learning_rate": 5e-05, "loss": 1.7296, "step": 233 }, { "epoch": 0.06398687448728466, "grad_norm": 0.1773460954427719, "learning_rate": 5e-05, "loss": 1.6812, "step": 234 }, { "epoch": 0.06426032266885426, "grad_norm": 0.19645363092422485, "learning_rate": 5e-05, "loss": 1.8694, "step": 235 }, { "epoch": 0.06453377085042385, "grad_norm": 0.16966943442821503, "learning_rate": 5e-05, "loss": 1.6903, "step": 236 }, { "epoch": 0.06480721903199343, "grad_norm": 0.21228881180286407, "learning_rate": 5e-05, "loss": 1.7405, "step": 237 }, { "epoch": 0.06508066721356302, "grad_norm": 0.16073106229305267, "learning_rate": 5e-05, "loss": 1.7031, "step": 238 }, { "epoch": 0.06535411539513263, "grad_norm": 0.1698150485754013, "learning_rate": 5e-05, "loss": 1.6663, "step": 239 }, { "epoch": 0.06562756357670221, "grad_norm": 0.1578008383512497, "learning_rate": 5e-05, "loss": 1.6899, "step": 240 }, { "epoch": 0.0659010117582718, "grad_norm": 0.1813380867242813, "learning_rate": 5e-05, "loss": 1.7034, "step": 241 }, { "epoch": 0.0661744599398414, "grad_norm": 0.16481561958789825, "learning_rate": 5e-05, "loss": 1.7453, "step": 242 }, { "epoch": 0.06644790812141099, "grad_norm": 0.15525515377521515, "learning_rate": 5e-05, "loss": 1.7296, "step": 243 }, { "epoch": 0.06672135630298058, "grad_norm": 0.19922387599945068, "learning_rate": 5e-05, "loss": 1.7433, "step": 244 }, { "epoch": 0.06699480448455018, "grad_norm": 0.18391215801239014, "learning_rate": 5e-05, "loss": 1.7203, "step": 245 }, { "epoch": 0.06726825266611977, "grad_norm": 0.15986333787441254, "learning_rate": 5e-05, "loss": 1.6375, "step": 246 }, { "epoch": 0.06754170084768936, "grad_norm": 0.15006937086582184, "learning_rate": 5e-05, "loss": 1.6448, "step": 247 }, { "epoch": 0.06781514902925896, "grad_norm": 0.19520802795886993, "learning_rate": 5e-05, "loss": 1.6555, "step": 248 }, { "epoch": 0.06808859721082855, "grad_norm": 0.14804011583328247, "learning_rate": 5e-05, "loss": 1.682, "step": 249 }, { "epoch": 0.06836204539239814, "grad_norm": 0.18628214299678802, "learning_rate": 5e-05, "loss": 1.7169, "step": 250 }, { "epoch": 0.06863549357396773, "grad_norm": 0.15606792271137238, "learning_rate": 5e-05, "loss": 1.7105, "step": 251 }, { "epoch": 0.06890894175553733, "grad_norm": 0.17179042100906372, "learning_rate": 5e-05, "loss": 1.703, "step": 252 }, { "epoch": 0.06918238993710692, "grad_norm": 0.1775059700012207, "learning_rate": 5e-05, "loss": 1.6497, "step": 253 }, { "epoch": 0.0694558381186765, "grad_norm": 0.15647803246974945, "learning_rate": 5e-05, "loss": 1.6498, "step": 254 }, { "epoch": 0.06972928630024611, "grad_norm": 0.1764082908630371, "learning_rate": 5e-05, "loss": 1.7309, "step": 255 }, { "epoch": 0.0700027344818157, "grad_norm": 0.1760226935148239, "learning_rate": 5e-05, "loss": 1.7507, "step": 256 }, { "epoch": 0.07027618266338528, "grad_norm": 0.1542169153690338, "learning_rate": 5e-05, "loss": 1.569, "step": 257 }, { "epoch": 0.07054963084495489, "grad_norm": 0.21275527775287628, "learning_rate": 5e-05, "loss": 1.7928, "step": 258 }, { "epoch": 0.07082307902652447, "grad_norm": 0.16845186054706573, "learning_rate": 5e-05, "loss": 1.7246, "step": 259 }, { "epoch": 0.07109652720809406, "grad_norm": 0.2582179605960846, "learning_rate": 5e-05, "loss": 1.6447, "step": 260 }, { "epoch": 0.07136997538966366, "grad_norm": 0.16100846230983734, "learning_rate": 5e-05, "loss": 1.7214, "step": 261 }, { "epoch": 0.07164342357123325, "grad_norm": 0.19200801849365234, "learning_rate": 5e-05, "loss": 1.7948, "step": 262 }, { "epoch": 0.07191687175280284, "grad_norm": 0.2010166496038437, "learning_rate": 5e-05, "loss": 1.6726, "step": 263 }, { "epoch": 0.07219031993437244, "grad_norm": 0.15795177221298218, "learning_rate": 5e-05, "loss": 1.7046, "step": 264 }, { "epoch": 0.07246376811594203, "grad_norm": 0.1857866495847702, "learning_rate": 5e-05, "loss": 1.6915, "step": 265 }, { "epoch": 0.07273721629751162, "grad_norm": 0.16975003480911255, "learning_rate": 5e-05, "loss": 1.6847, "step": 266 }, { "epoch": 0.07301066447908121, "grad_norm": 0.14858882129192352, "learning_rate": 5e-05, "loss": 1.7224, "step": 267 }, { "epoch": 0.07328411266065081, "grad_norm": 0.20861102640628815, "learning_rate": 5e-05, "loss": 1.6836, "step": 268 }, { "epoch": 0.0735575608422204, "grad_norm": 0.1622302383184433, "learning_rate": 5e-05, "loss": 1.6482, "step": 269 }, { "epoch": 0.07383100902378999, "grad_norm": 0.1789664328098297, "learning_rate": 5e-05, "loss": 1.7508, "step": 270 }, { "epoch": 0.07410445720535959, "grad_norm": 0.16430623829364777, "learning_rate": 5e-05, "loss": 1.735, "step": 271 }, { "epoch": 0.07437790538692918, "grad_norm": 0.15610988438129425, "learning_rate": 5e-05, "loss": 1.6569, "step": 272 }, { "epoch": 0.07465135356849877, "grad_norm": 0.16325096786022186, "learning_rate": 5e-05, "loss": 1.6165, "step": 273 }, { "epoch": 0.07492480175006837, "grad_norm": 0.15770940482616425, "learning_rate": 5e-05, "loss": 1.727, "step": 274 }, { "epoch": 0.07519824993163796, "grad_norm": 0.15711399912834167, "learning_rate": 5e-05, "loss": 1.6336, "step": 275 }, { "epoch": 0.07547169811320754, "grad_norm": 0.16381201148033142, "learning_rate": 5e-05, "loss": 1.69, "step": 276 }, { "epoch": 0.07574514629477715, "grad_norm": 0.15251821279525757, "learning_rate": 5e-05, "loss": 1.6241, "step": 277 }, { "epoch": 0.07601859447634673, "grad_norm": 0.177230566740036, "learning_rate": 5e-05, "loss": 1.7341, "step": 278 }, { "epoch": 0.07629204265791632, "grad_norm": 0.16340211033821106, "learning_rate": 5e-05, "loss": 1.726, "step": 279 }, { "epoch": 0.07656549083948591, "grad_norm": 0.15226496756076813, "learning_rate": 5e-05, "loss": 1.7703, "step": 280 }, { "epoch": 0.07683893902105551, "grad_norm": 0.1857025921344757, "learning_rate": 5e-05, "loss": 1.6637, "step": 281 }, { "epoch": 0.0771123872026251, "grad_norm": 0.1753539890050888, "learning_rate": 5e-05, "loss": 1.8154, "step": 282 }, { "epoch": 0.07738583538419469, "grad_norm": 0.16672004759311676, "learning_rate": 5e-05, "loss": 1.6617, "step": 283 }, { "epoch": 0.07765928356576429, "grad_norm": 0.186684250831604, "learning_rate": 5e-05, "loss": 1.6501, "step": 284 }, { "epoch": 0.07793273174733388, "grad_norm": 0.18949177861213684, "learning_rate": 5e-05, "loss": 1.7384, "step": 285 }, { "epoch": 0.07820617992890347, "grad_norm": 0.1607401967048645, "learning_rate": 5e-05, "loss": 1.719, "step": 286 }, { "epoch": 0.07847962811047307, "grad_norm": 0.15836010873317719, "learning_rate": 5e-05, "loss": 1.6533, "step": 287 }, { "epoch": 0.07875307629204266, "grad_norm": 0.17572252452373505, "learning_rate": 5e-05, "loss": 1.7285, "step": 288 }, { "epoch": 0.07902652447361225, "grad_norm": 0.159013569355011, "learning_rate": 5e-05, "loss": 1.704, "step": 289 }, { "epoch": 0.07929997265518185, "grad_norm": 0.16590869426727295, "learning_rate": 5e-05, "loss": 1.7192, "step": 290 }, { "epoch": 0.07957342083675144, "grad_norm": 0.1848197877407074, "learning_rate": 5e-05, "loss": 1.7157, "step": 291 }, { "epoch": 0.07984686901832103, "grad_norm": 0.1681578904390335, "learning_rate": 5e-05, "loss": 1.729, "step": 292 }, { "epoch": 0.08012031719989061, "grad_norm": 0.18026727437973022, "learning_rate": 5e-05, "loss": 1.6524, "step": 293 }, { "epoch": 0.08039376538146022, "grad_norm": 0.18624775111675262, "learning_rate": 5e-05, "loss": 1.7211, "step": 294 }, { "epoch": 0.0806672135630298, "grad_norm": 0.17745369672775269, "learning_rate": 5e-05, "loss": 1.7268, "step": 295 }, { "epoch": 0.0809406617445994, "grad_norm": 0.196435809135437, "learning_rate": 5e-05, "loss": 1.6314, "step": 296 }, { "epoch": 0.081214109926169, "grad_norm": 0.15979966521263123, "learning_rate": 5e-05, "loss": 1.73, "step": 297 }, { "epoch": 0.08148755810773858, "grad_norm": 0.16943073272705078, "learning_rate": 5e-05, "loss": 1.7083, "step": 298 }, { "epoch": 0.08176100628930817, "grad_norm": 0.18523457646369934, "learning_rate": 5e-05, "loss": 1.7286, "step": 299 }, { "epoch": 0.08203445447087777, "grad_norm": 0.15192170441150665, "learning_rate": 5e-05, "loss": 1.7399, "step": 300 }, { "epoch": 0.08230790265244736, "grad_norm": 0.18083369731903076, "learning_rate": 5e-05, "loss": 1.7006, "step": 301 }, { "epoch": 0.08258135083401695, "grad_norm": 0.16836769878864288, "learning_rate": 5e-05, "loss": 1.8089, "step": 302 }, { "epoch": 0.08285479901558655, "grad_norm": 0.15962150692939758, "learning_rate": 5e-05, "loss": 1.7109, "step": 303 }, { "epoch": 0.08312824719715614, "grad_norm": 0.18868067860603333, "learning_rate": 5e-05, "loss": 1.7697, "step": 304 }, { "epoch": 0.08340169537872573, "grad_norm": 0.16335056722164154, "learning_rate": 5e-05, "loss": 1.7866, "step": 305 }, { "epoch": 0.08367514356029532, "grad_norm": 0.23511578142642975, "learning_rate": 5e-05, "loss": 1.8343, "step": 306 }, { "epoch": 0.08394859174186492, "grad_norm": 0.15618833899497986, "learning_rate": 5e-05, "loss": 1.633, "step": 307 }, { "epoch": 0.08422203992343451, "grad_norm": 0.16993245482444763, "learning_rate": 5e-05, "loss": 1.7104, "step": 308 }, { "epoch": 0.0844954881050041, "grad_norm": 0.18835750222206116, "learning_rate": 5e-05, "loss": 1.7113, "step": 309 }, { "epoch": 0.0847689362865737, "grad_norm": 0.16639582812786102, "learning_rate": 5e-05, "loss": 1.816, "step": 310 }, { "epoch": 0.08504238446814329, "grad_norm": 0.15975739061832428, "learning_rate": 5e-05, "loss": 1.5883, "step": 311 }, { "epoch": 0.08531583264971287, "grad_norm": 0.20657338201999664, "learning_rate": 5e-05, "loss": 1.6521, "step": 312 }, { "epoch": 0.08558928083128248, "grad_norm": 0.15396147966384888, "learning_rate": 5e-05, "loss": 1.7602, "step": 313 }, { "epoch": 0.08586272901285207, "grad_norm": 0.18306365609169006, "learning_rate": 5e-05, "loss": 1.6811, "step": 314 }, { "epoch": 0.08613617719442165, "grad_norm": 0.1487811654806137, "learning_rate": 5e-05, "loss": 1.5397, "step": 315 }, { "epoch": 0.08640962537599126, "grad_norm": 0.14201436936855316, "learning_rate": 5e-05, "loss": 1.5702, "step": 316 }, { "epoch": 0.08668307355756084, "grad_norm": 0.17401176691055298, "learning_rate": 5e-05, "loss": 1.6514, "step": 317 }, { "epoch": 0.08695652173913043, "grad_norm": 0.16217194497585297, "learning_rate": 5e-05, "loss": 1.7284, "step": 318 }, { "epoch": 0.08722996992070002, "grad_norm": 0.1471739411354065, "learning_rate": 5e-05, "loss": 1.6665, "step": 319 }, { "epoch": 0.08750341810226962, "grad_norm": 0.1779485046863556, "learning_rate": 5e-05, "loss": 1.7358, "step": 320 }, { "epoch": 0.08777686628383921, "grad_norm": 0.15634022653102875, "learning_rate": 5e-05, "loss": 1.6477, "step": 321 }, { "epoch": 0.0880503144654088, "grad_norm": 0.1602734923362732, "learning_rate": 5e-05, "loss": 1.7381, "step": 322 }, { "epoch": 0.0883237626469784, "grad_norm": 0.1770986020565033, "learning_rate": 5e-05, "loss": 1.7719, "step": 323 }, { "epoch": 0.08859721082854799, "grad_norm": 0.1577000766992569, "learning_rate": 5e-05, "loss": 1.7571, "step": 324 }, { "epoch": 0.08887065901011758, "grad_norm": 0.158709317445755, "learning_rate": 5e-05, "loss": 1.7026, "step": 325 }, { "epoch": 0.08914410719168718, "grad_norm": 0.1561996042728424, "learning_rate": 5e-05, "loss": 1.602, "step": 326 }, { "epoch": 0.08941755537325677, "grad_norm": 0.14826953411102295, "learning_rate": 5e-05, "loss": 1.7566, "step": 327 }, { "epoch": 0.08969100355482636, "grad_norm": 0.1718713790178299, "learning_rate": 5e-05, "loss": 1.7978, "step": 328 }, { "epoch": 0.08996445173639596, "grad_norm": 0.1710497885942459, "learning_rate": 5e-05, "loss": 1.7204, "step": 329 }, { "epoch": 0.09023789991796555, "grad_norm": 0.1561228483915329, "learning_rate": 5e-05, "loss": 1.7124, "step": 330 }, { "epoch": 0.09051134809953514, "grad_norm": 0.18225868046283722, "learning_rate": 5e-05, "loss": 1.7286, "step": 331 }, { "epoch": 0.09078479628110472, "grad_norm": 0.1495756208896637, "learning_rate": 5e-05, "loss": 1.6065, "step": 332 }, { "epoch": 0.09105824446267433, "grad_norm": 0.15466246008872986, "learning_rate": 5e-05, "loss": 1.7078, "step": 333 }, { "epoch": 0.09133169264424391, "grad_norm": 0.15771101415157318, "learning_rate": 5e-05, "loss": 1.691, "step": 334 }, { "epoch": 0.0916051408258135, "grad_norm": 0.1566590517759323, "learning_rate": 5e-05, "loss": 1.8164, "step": 335 }, { "epoch": 0.0918785890073831, "grad_norm": 0.1567210704088211, "learning_rate": 5e-05, "loss": 1.6887, "step": 336 }, { "epoch": 0.09215203718895269, "grad_norm": 0.1516759693622589, "learning_rate": 5e-05, "loss": 1.5887, "step": 337 }, { "epoch": 0.09242548537052228, "grad_norm": 0.15259622037410736, "learning_rate": 5e-05, "loss": 1.7337, "step": 338 }, { "epoch": 0.09269893355209188, "grad_norm": 0.14548294246196747, "learning_rate": 5e-05, "loss": 1.6738, "step": 339 }, { "epoch": 0.09297238173366147, "grad_norm": 0.14872747659683228, "learning_rate": 5e-05, "loss": 1.7034, "step": 340 }, { "epoch": 0.09324582991523106, "grad_norm": 0.1520431935787201, "learning_rate": 5e-05, "loss": 1.6948, "step": 341 }, { "epoch": 0.09351927809680066, "grad_norm": 0.15893866121768951, "learning_rate": 5e-05, "loss": 1.7664, "step": 342 }, { "epoch": 0.09379272627837025, "grad_norm": 0.1537715345621109, "learning_rate": 5e-05, "loss": 1.6939, "step": 343 }, { "epoch": 0.09406617445993984, "grad_norm": 0.15221412479877472, "learning_rate": 5e-05, "loss": 1.7216, "step": 344 }, { "epoch": 0.09433962264150944, "grad_norm": 0.16553561389446259, "learning_rate": 5e-05, "loss": 1.7498, "step": 345 }, { "epoch": 0.09461307082307903, "grad_norm": 0.148160919547081, "learning_rate": 5e-05, "loss": 1.6938, "step": 346 }, { "epoch": 0.09488651900464862, "grad_norm": 0.14013923704624176, "learning_rate": 5e-05, "loss": 1.5675, "step": 347 }, { "epoch": 0.0951599671862182, "grad_norm": 0.14623858034610748, "learning_rate": 5e-05, "loss": 1.6729, "step": 348 }, { "epoch": 0.09543341536778781, "grad_norm": 0.16254989802837372, "learning_rate": 5e-05, "loss": 1.8071, "step": 349 }, { "epoch": 0.0957068635493574, "grad_norm": 0.1588207483291626, "learning_rate": 5e-05, "loss": 1.6073, "step": 350 }, { "epoch": 0.09598031173092698, "grad_norm": 0.15622512996196747, "learning_rate": 5e-05, "loss": 1.7482, "step": 351 }, { "epoch": 0.09625375991249659, "grad_norm": 0.15577097237110138, "learning_rate": 5e-05, "loss": 1.7231, "step": 352 }, { "epoch": 0.09652720809406617, "grad_norm": 0.1480865478515625, "learning_rate": 5e-05, "loss": 1.6877, "step": 353 }, { "epoch": 0.09680065627563576, "grad_norm": 0.15626661479473114, "learning_rate": 5e-05, "loss": 1.7725, "step": 354 }, { "epoch": 0.09707410445720537, "grad_norm": 0.17652183771133423, "learning_rate": 5e-05, "loss": 1.6995, "step": 355 }, { "epoch": 0.09734755263877495, "grad_norm": 0.15815986692905426, "learning_rate": 5e-05, "loss": 1.7932, "step": 356 }, { "epoch": 0.09762100082034454, "grad_norm": 0.1668958067893982, "learning_rate": 5e-05, "loss": 1.7085, "step": 357 }, { "epoch": 0.09789444900191414, "grad_norm": 0.15382306277751923, "learning_rate": 5e-05, "loss": 1.776, "step": 358 }, { "epoch": 0.09816789718348373, "grad_norm": 0.1443256139755249, "learning_rate": 5e-05, "loss": 1.6132, "step": 359 }, { "epoch": 0.09844134536505332, "grad_norm": 0.14953619241714478, "learning_rate": 5e-05, "loss": 1.6197, "step": 360 }, { "epoch": 0.09871479354662291, "grad_norm": 0.16327831149101257, "learning_rate": 5e-05, "loss": 1.6632, "step": 361 }, { "epoch": 0.09898824172819251, "grad_norm": 0.16656070947647095, "learning_rate": 5e-05, "loss": 1.732, "step": 362 }, { "epoch": 0.0992616899097621, "grad_norm": 0.14592863619327545, "learning_rate": 5e-05, "loss": 1.6502, "step": 363 }, { "epoch": 0.09953513809133169, "grad_norm": 0.152685284614563, "learning_rate": 5e-05, "loss": 1.7162, "step": 364 }, { "epoch": 0.09980858627290129, "grad_norm": 0.14510676264762878, "learning_rate": 5e-05, "loss": 1.5949, "step": 365 }, { "epoch": 0.10008203445447088, "grad_norm": 0.16652299463748932, "learning_rate": 5e-05, "loss": 1.7656, "step": 366 }, { "epoch": 0.10035548263604047, "grad_norm": 0.1451842486858368, "learning_rate": 5e-05, "loss": 1.6449, "step": 367 }, { "epoch": 0.10062893081761007, "grad_norm": 0.18037185072898865, "learning_rate": 5e-05, "loss": 1.7886, "step": 368 }, { "epoch": 0.10090237899917966, "grad_norm": 0.14976035058498383, "learning_rate": 5e-05, "loss": 1.5979, "step": 369 }, { "epoch": 0.10117582718074924, "grad_norm": 0.15457609295845032, "learning_rate": 5e-05, "loss": 1.6899, "step": 370 }, { "epoch": 0.10144927536231885, "grad_norm": 0.19134309887886047, "learning_rate": 5e-05, "loss": 1.7097, "step": 371 }, { "epoch": 0.10172272354388844, "grad_norm": 0.14750947058200836, "learning_rate": 5e-05, "loss": 1.6993, "step": 372 }, { "epoch": 0.10199617172545802, "grad_norm": 0.1667216420173645, "learning_rate": 5e-05, "loss": 1.7053, "step": 373 }, { "epoch": 0.10226961990702761, "grad_norm": 0.1675473004579544, "learning_rate": 5e-05, "loss": 1.6688, "step": 374 }, { "epoch": 0.10254306808859721, "grad_norm": 0.14588837325572968, "learning_rate": 5e-05, "loss": 1.6963, "step": 375 }, { "epoch": 0.1028165162701668, "grad_norm": 0.17661041021347046, "learning_rate": 5e-05, "loss": 1.6563, "step": 376 }, { "epoch": 0.10308996445173639, "grad_norm": 0.16083909571170807, "learning_rate": 5e-05, "loss": 1.6893, "step": 377 }, { "epoch": 0.10336341263330599, "grad_norm": 0.14849358797073364, "learning_rate": 5e-05, "loss": 1.6621, "step": 378 }, { "epoch": 0.10363686081487558, "grad_norm": 0.15476635098457336, "learning_rate": 5e-05, "loss": 1.7159, "step": 379 }, { "epoch": 0.10391030899644517, "grad_norm": 0.16712796688079834, "learning_rate": 5e-05, "loss": 1.728, "step": 380 }, { "epoch": 0.10418375717801477, "grad_norm": 0.15185219049453735, "learning_rate": 5e-05, "loss": 1.674, "step": 381 }, { "epoch": 0.10445720535958436, "grad_norm": 0.16834664344787598, "learning_rate": 5e-05, "loss": 1.7249, "step": 382 }, { "epoch": 0.10473065354115395, "grad_norm": 0.15080732107162476, "learning_rate": 5e-05, "loss": 1.7108, "step": 383 }, { "epoch": 0.10500410172272355, "grad_norm": 0.17864330112934113, "learning_rate": 5e-05, "loss": 1.743, "step": 384 }, { "epoch": 0.10527754990429314, "grad_norm": 0.1494104564189911, "learning_rate": 5e-05, "loss": 1.7065, "step": 385 }, { "epoch": 0.10555099808586273, "grad_norm": 0.1590685397386551, "learning_rate": 5e-05, "loss": 1.6841, "step": 386 }, { "epoch": 0.10582444626743231, "grad_norm": 0.15133905410766602, "learning_rate": 5e-05, "loss": 1.7493, "step": 387 }, { "epoch": 0.10609789444900192, "grad_norm": 0.16563621163368225, "learning_rate": 5e-05, "loss": 1.7181, "step": 388 }, { "epoch": 0.1063713426305715, "grad_norm": 0.14327523112297058, "learning_rate": 5e-05, "loss": 1.6653, "step": 389 }, { "epoch": 0.1066447908121411, "grad_norm": 0.14864635467529297, "learning_rate": 5e-05, "loss": 1.5826, "step": 390 }, { "epoch": 0.1069182389937107, "grad_norm": 0.15405511856079102, "learning_rate": 5e-05, "loss": 1.7333, "step": 391 }, { "epoch": 0.10719168717528028, "grad_norm": 0.1498226821422577, "learning_rate": 5e-05, "loss": 1.7149, "step": 392 }, { "epoch": 0.10746513535684987, "grad_norm": 0.14723950624465942, "learning_rate": 5e-05, "loss": 1.7088, "step": 393 }, { "epoch": 0.10773858353841947, "grad_norm": 0.15685135126113892, "learning_rate": 5e-05, "loss": 1.717, "step": 394 }, { "epoch": 0.10801203171998906, "grad_norm": 0.16080626845359802, "learning_rate": 5e-05, "loss": 1.7603, "step": 395 }, { "epoch": 0.10828547990155865, "grad_norm": 0.17556969821453094, "learning_rate": 5e-05, "loss": 1.7453, "step": 396 }, { "epoch": 0.10855892808312825, "grad_norm": 0.16690774261951447, "learning_rate": 5e-05, "loss": 1.7831, "step": 397 }, { "epoch": 0.10883237626469784, "grad_norm": 0.16693341732025146, "learning_rate": 5e-05, "loss": 1.7015, "step": 398 }, { "epoch": 0.10910582444626743, "grad_norm": 0.15416108071804047, "learning_rate": 5e-05, "loss": 1.6887, "step": 399 }, { "epoch": 0.10937927262783702, "grad_norm": 0.15936125814914703, "learning_rate": 5e-05, "loss": 1.6774, "step": 400 }, { "epoch": 0.10965272080940662, "grad_norm": 0.15093334019184113, "learning_rate": 5e-05, "loss": 1.6507, "step": 401 }, { "epoch": 0.10992616899097621, "grad_norm": 0.1748283952474594, "learning_rate": 5e-05, "loss": 1.6893, "step": 402 }, { "epoch": 0.1101996171725458, "grad_norm": 0.17557309567928314, "learning_rate": 5e-05, "loss": 1.7279, "step": 403 }, { "epoch": 0.1104730653541154, "grad_norm": 0.15320509672164917, "learning_rate": 5e-05, "loss": 1.6621, "step": 404 }, { "epoch": 0.11074651353568499, "grad_norm": 0.1499612033367157, "learning_rate": 5e-05, "loss": 1.7715, "step": 405 }, { "epoch": 0.11101996171725458, "grad_norm": 0.16688010096549988, "learning_rate": 5e-05, "loss": 1.7823, "step": 406 }, { "epoch": 0.11129340989882418, "grad_norm": 0.157332643866539, "learning_rate": 5e-05, "loss": 1.6124, "step": 407 }, { "epoch": 0.11156685808039377, "grad_norm": 0.14416515827178955, "learning_rate": 5e-05, "loss": 1.6094, "step": 408 }, { "epoch": 0.11184030626196335, "grad_norm": 0.1920742392539978, "learning_rate": 5e-05, "loss": 1.7436, "step": 409 }, { "epoch": 0.11211375444353296, "grad_norm": 0.1514004021883011, "learning_rate": 5e-05, "loss": 1.7049, "step": 410 }, { "epoch": 0.11238720262510254, "grad_norm": 0.16829192638397217, "learning_rate": 5e-05, "loss": 1.7128, "step": 411 }, { "epoch": 0.11266065080667213, "grad_norm": 0.16184952855110168, "learning_rate": 5e-05, "loss": 1.697, "step": 412 }, { "epoch": 0.11293409898824174, "grad_norm": 0.15670904517173767, "learning_rate": 5e-05, "loss": 1.6045, "step": 413 }, { "epoch": 0.11320754716981132, "grad_norm": 0.17832054197788239, "learning_rate": 5e-05, "loss": 1.7311, "step": 414 }, { "epoch": 0.11348099535138091, "grad_norm": 0.15141215920448303, "learning_rate": 5e-05, "loss": 1.7269, "step": 415 }, { "epoch": 0.1137544435329505, "grad_norm": 0.16881325840950012, "learning_rate": 5e-05, "loss": 1.753, "step": 416 }, { "epoch": 0.1140278917145201, "grad_norm": 0.14302976429462433, "learning_rate": 5e-05, "loss": 1.6762, "step": 417 }, { "epoch": 0.11430133989608969, "grad_norm": 0.15213017165660858, "learning_rate": 5e-05, "loss": 1.6379, "step": 418 }, { "epoch": 0.11457478807765928, "grad_norm": 0.14254313707351685, "learning_rate": 5e-05, "loss": 1.702, "step": 419 }, { "epoch": 0.11484823625922888, "grad_norm": 0.14474402368068695, "learning_rate": 5e-05, "loss": 1.6177, "step": 420 }, { "epoch": 0.11512168444079847, "grad_norm": 0.1474299430847168, "learning_rate": 5e-05, "loss": 1.6375, "step": 421 }, { "epoch": 0.11539513262236806, "grad_norm": 0.15810638666152954, "learning_rate": 5e-05, "loss": 1.7308, "step": 422 }, { "epoch": 0.11566858080393766, "grad_norm": 0.15957583487033844, "learning_rate": 5e-05, "loss": 1.662, "step": 423 }, { "epoch": 0.11594202898550725, "grad_norm": 0.1710730344057083, "learning_rate": 5e-05, "loss": 1.8709, "step": 424 }, { "epoch": 0.11621547716707684, "grad_norm": 0.15612466633319855, "learning_rate": 5e-05, "loss": 1.6212, "step": 425 }, { "epoch": 0.11648892534864644, "grad_norm": 0.1628880500793457, "learning_rate": 5e-05, "loss": 1.8161, "step": 426 }, { "epoch": 0.11676237353021603, "grad_norm": 0.15160787105560303, "learning_rate": 5e-05, "loss": 1.738, "step": 427 }, { "epoch": 0.11703582171178561, "grad_norm": 0.15054672956466675, "learning_rate": 5e-05, "loss": 1.6689, "step": 428 }, { "epoch": 0.1173092698933552, "grad_norm": 0.16296370327472687, "learning_rate": 5e-05, "loss": 1.652, "step": 429 }, { "epoch": 0.1175827180749248, "grad_norm": 0.14984969794750214, "learning_rate": 5e-05, "loss": 1.678, "step": 430 }, { "epoch": 0.1178561662564944, "grad_norm": 0.1585860401391983, "learning_rate": 5e-05, "loss": 1.5993, "step": 431 }, { "epoch": 0.11812961443806398, "grad_norm": 0.16059096157550812, "learning_rate": 5e-05, "loss": 1.6874, "step": 432 }, { "epoch": 0.11840306261963358, "grad_norm": 0.16525696218013763, "learning_rate": 5e-05, "loss": 1.6795, "step": 433 }, { "epoch": 0.11867651080120317, "grad_norm": 0.18887005746364594, "learning_rate": 5e-05, "loss": 1.6089, "step": 434 }, { "epoch": 0.11894995898277276, "grad_norm": 0.15065988898277283, "learning_rate": 5e-05, "loss": 1.6332, "step": 435 }, { "epoch": 0.11922340716434236, "grad_norm": 0.19893988966941833, "learning_rate": 5e-05, "loss": 1.721, "step": 436 }, { "epoch": 0.11949685534591195, "grad_norm": 0.14734850823879242, "learning_rate": 5e-05, "loss": 1.6332, "step": 437 }, { "epoch": 0.11977030352748154, "grad_norm": 0.18723982572555542, "learning_rate": 5e-05, "loss": 1.777, "step": 438 }, { "epoch": 0.12004375170905114, "grad_norm": 0.1655622124671936, "learning_rate": 5e-05, "loss": 1.835, "step": 439 }, { "epoch": 0.12031719989062073, "grad_norm": 0.1495118886232376, "learning_rate": 5e-05, "loss": 1.5813, "step": 440 }, { "epoch": 0.12059064807219032, "grad_norm": 0.1608804315328598, "learning_rate": 5e-05, "loss": 1.6728, "step": 441 }, { "epoch": 0.1208640962537599, "grad_norm": 0.1557897925376892, "learning_rate": 5e-05, "loss": 1.7919, "step": 442 }, { "epoch": 0.12113754443532951, "grad_norm": 0.14694997668266296, "learning_rate": 5e-05, "loss": 1.6916, "step": 443 }, { "epoch": 0.1214109926168991, "grad_norm": 0.15379074215888977, "learning_rate": 5e-05, "loss": 1.7453, "step": 444 }, { "epoch": 0.12168444079846868, "grad_norm": 0.1691819578409195, "learning_rate": 5e-05, "loss": 1.7035, "step": 445 }, { "epoch": 0.12195788898003829, "grad_norm": 0.16844218969345093, "learning_rate": 5e-05, "loss": 1.6465, "step": 446 }, { "epoch": 0.12223133716160788, "grad_norm": 0.1534705013036728, "learning_rate": 5e-05, "loss": 1.7147, "step": 447 }, { "epoch": 0.12250478534317746, "grad_norm": 0.16286121308803558, "learning_rate": 5e-05, "loss": 1.795, "step": 448 }, { "epoch": 0.12277823352474707, "grad_norm": 0.1552685797214508, "learning_rate": 5e-05, "loss": 1.7379, "step": 449 }, { "epoch": 0.12305168170631665, "grad_norm": 0.15220941603183746, "learning_rate": 5e-05, "loss": 1.5912, "step": 450 }, { "epoch": 0.12332512988788624, "grad_norm": 0.15760937333106995, "learning_rate": 5e-05, "loss": 1.723, "step": 451 }, { "epoch": 0.12359857806945584, "grad_norm": 0.14814068377017975, "learning_rate": 5e-05, "loss": 1.6482, "step": 452 }, { "epoch": 0.12387202625102543, "grad_norm": 0.15846781432628632, "learning_rate": 5e-05, "loss": 1.6243, "step": 453 }, { "epoch": 0.12414547443259502, "grad_norm": 0.14988292753696442, "learning_rate": 5e-05, "loss": 1.6261, "step": 454 }, { "epoch": 0.12441892261416461, "grad_norm": 0.16977062821388245, "learning_rate": 5e-05, "loss": 1.6638, "step": 455 }, { "epoch": 0.12469237079573421, "grad_norm": 0.16247521340847015, "learning_rate": 5e-05, "loss": 1.6939, "step": 456 }, { "epoch": 0.1249658189773038, "grad_norm": 0.15425889194011688, "learning_rate": 5e-05, "loss": 1.6645, "step": 457 }, { "epoch": 0.1252392671588734, "grad_norm": 0.18221181631088257, "learning_rate": 5e-05, "loss": 1.7, "step": 458 }, { "epoch": 0.12551271534044298, "grad_norm": 0.14771276712417603, "learning_rate": 5e-05, "loss": 1.7018, "step": 459 }, { "epoch": 0.12578616352201258, "grad_norm": 0.175098717212677, "learning_rate": 5e-05, "loss": 1.78, "step": 460 }, { "epoch": 0.12605961170358218, "grad_norm": 0.16985467076301575, "learning_rate": 5e-05, "loss": 1.6305, "step": 461 }, { "epoch": 0.12633305988515175, "grad_norm": 0.1552826166152954, "learning_rate": 5e-05, "loss": 1.6986, "step": 462 }, { "epoch": 0.12660650806672136, "grad_norm": 0.17271657288074493, "learning_rate": 5e-05, "loss": 1.7182, "step": 463 }, { "epoch": 0.12687995624829096, "grad_norm": 0.1511317491531372, "learning_rate": 5e-05, "loss": 1.6274, "step": 464 }, { "epoch": 0.12715340442986053, "grad_norm": 0.1526670604944229, "learning_rate": 5e-05, "loss": 1.6481, "step": 465 }, { "epoch": 0.12742685261143014, "grad_norm": 0.15212751924991608, "learning_rate": 5e-05, "loss": 1.7155, "step": 466 }, { "epoch": 0.12770030079299974, "grad_norm": 0.16328099370002747, "learning_rate": 5e-05, "loss": 1.7052, "step": 467 }, { "epoch": 0.1279737489745693, "grad_norm": 0.14235898852348328, "learning_rate": 5e-05, "loss": 1.6514, "step": 468 }, { "epoch": 0.12824719715613891, "grad_norm": 0.17346210777759552, "learning_rate": 5e-05, "loss": 1.6711, "step": 469 }, { "epoch": 0.12852064533770852, "grad_norm": 0.17061196267604828, "learning_rate": 5e-05, "loss": 1.6815, "step": 470 }, { "epoch": 0.1287940935192781, "grad_norm": 0.16796523332595825, "learning_rate": 5e-05, "loss": 1.6304, "step": 471 }, { "epoch": 0.1290675417008477, "grad_norm": 0.17930328845977783, "learning_rate": 5e-05, "loss": 1.7399, "step": 472 }, { "epoch": 0.1293409898824173, "grad_norm": 0.16334758698940277, "learning_rate": 5e-05, "loss": 1.6031, "step": 473 }, { "epoch": 0.12961443806398687, "grad_norm": 0.18290555477142334, "learning_rate": 5e-05, "loss": 1.7326, "step": 474 }, { "epoch": 0.12988788624555647, "grad_norm": 0.15675663948059082, "learning_rate": 5e-05, "loss": 1.6692, "step": 475 }, { "epoch": 0.13016133442712605, "grad_norm": 0.1945638507604599, "learning_rate": 5e-05, "loss": 1.6891, "step": 476 }, { "epoch": 0.13043478260869565, "grad_norm": 0.19286639988422394, "learning_rate": 5e-05, "loss": 1.7076, "step": 477 }, { "epoch": 0.13070823079026525, "grad_norm": 0.17249007523059845, "learning_rate": 5e-05, "loss": 1.706, "step": 478 }, { "epoch": 0.13098167897183483, "grad_norm": 0.1968424767255783, "learning_rate": 5e-05, "loss": 1.6126, "step": 479 }, { "epoch": 0.13125512715340443, "grad_norm": 0.19258543848991394, "learning_rate": 5e-05, "loss": 1.7572, "step": 480 }, { "epoch": 0.13152857533497403, "grad_norm": 0.16817843914031982, "learning_rate": 5e-05, "loss": 1.6691, "step": 481 }, { "epoch": 0.1318020235165436, "grad_norm": 0.21118032932281494, "learning_rate": 5e-05, "loss": 1.6716, "step": 482 }, { "epoch": 0.1320754716981132, "grad_norm": 0.15985234081745148, "learning_rate": 5e-05, "loss": 1.7348, "step": 483 }, { "epoch": 0.1323489198796828, "grad_norm": 0.20505684614181519, "learning_rate": 5e-05, "loss": 1.7032, "step": 484 }, { "epoch": 0.13262236806125238, "grad_norm": 0.18702927231788635, "learning_rate": 5e-05, "loss": 1.6254, "step": 485 }, { "epoch": 0.13289581624282198, "grad_norm": 0.17232342064380646, "learning_rate": 5e-05, "loss": 1.7039, "step": 486 }, { "epoch": 0.1331692644243916, "grad_norm": 0.20174479484558105, "learning_rate": 5e-05, "loss": 1.7167, "step": 487 }, { "epoch": 0.13344271260596116, "grad_norm": 0.15040408074855804, "learning_rate": 5e-05, "loss": 1.7868, "step": 488 }, { "epoch": 0.13371616078753076, "grad_norm": 0.2027222216129303, "learning_rate": 5e-05, "loss": 1.7302, "step": 489 }, { "epoch": 0.13398960896910037, "grad_norm": 0.1705685704946518, "learning_rate": 5e-05, "loss": 1.7658, "step": 490 }, { "epoch": 0.13426305715066994, "grad_norm": 0.15521638095378876, "learning_rate": 5e-05, "loss": 1.7048, "step": 491 }, { "epoch": 0.13453650533223954, "grad_norm": 0.16530796885490417, "learning_rate": 5e-05, "loss": 1.7096, "step": 492 }, { "epoch": 0.13480995351380914, "grad_norm": 0.1526857316493988, "learning_rate": 5e-05, "loss": 1.6364, "step": 493 }, { "epoch": 0.13508340169537872, "grad_norm": 0.15197399258613586, "learning_rate": 5e-05, "loss": 1.7544, "step": 494 }, { "epoch": 0.13535684987694832, "grad_norm": 0.17518053948879242, "learning_rate": 5e-05, "loss": 1.6008, "step": 495 }, { "epoch": 0.13563029805851792, "grad_norm": 0.18786345422267914, "learning_rate": 5e-05, "loss": 1.6955, "step": 496 }, { "epoch": 0.1359037462400875, "grad_norm": 0.16572856903076172, "learning_rate": 5e-05, "loss": 1.8124, "step": 497 }, { "epoch": 0.1361771944216571, "grad_norm": 0.20353186130523682, "learning_rate": 5e-05, "loss": 1.6766, "step": 498 }, { "epoch": 0.1364506426032267, "grad_norm": 0.15591098368167877, "learning_rate": 5e-05, "loss": 1.6693, "step": 499 }, { "epoch": 0.13672409078479628, "grad_norm": 0.17146877944469452, "learning_rate": 5e-05, "loss": 1.5863, "step": 500 }, { "epoch": 0.13699753896636588, "grad_norm": 0.1932230293750763, "learning_rate": 5e-05, "loss": 1.6442, "step": 501 }, { "epoch": 0.13727098714793545, "grad_norm": 0.14423272013664246, "learning_rate": 5e-05, "loss": 1.6168, "step": 502 }, { "epoch": 0.13754443532950505, "grad_norm": 0.15283560752868652, "learning_rate": 5e-05, "loss": 1.661, "step": 503 }, { "epoch": 0.13781788351107466, "grad_norm": 0.19705916941165924, "learning_rate": 5e-05, "loss": 1.7297, "step": 504 }, { "epoch": 0.13809133169264423, "grad_norm": 0.16028250753879547, "learning_rate": 5e-05, "loss": 1.764, "step": 505 }, { "epoch": 0.13836477987421383, "grad_norm": 0.18517790734767914, "learning_rate": 5e-05, "loss": 1.6758, "step": 506 }, { "epoch": 0.13863822805578344, "grad_norm": 0.17298881709575653, "learning_rate": 5e-05, "loss": 1.6147, "step": 507 }, { "epoch": 0.138911676237353, "grad_norm": 0.1618654429912567, "learning_rate": 5e-05, "loss": 1.7223, "step": 508 }, { "epoch": 0.1391851244189226, "grad_norm": 0.17122521996498108, "learning_rate": 5e-05, "loss": 1.6623, "step": 509 }, { "epoch": 0.13945857260049221, "grad_norm": 0.1496722251176834, "learning_rate": 5e-05, "loss": 1.6599, "step": 510 }, { "epoch": 0.1397320207820618, "grad_norm": 0.14283980429172516, "learning_rate": 5e-05, "loss": 1.6108, "step": 511 }, { "epoch": 0.1400054689636314, "grad_norm": 0.15560725331306458, "learning_rate": 5e-05, "loss": 1.6407, "step": 512 }, { "epoch": 0.140278917145201, "grad_norm": 0.14533740282058716, "learning_rate": 5e-05, "loss": 1.6877, "step": 513 }, { "epoch": 0.14055236532677057, "grad_norm": 0.15537096560001373, "learning_rate": 5e-05, "loss": 1.6337, "step": 514 }, { "epoch": 0.14082581350834017, "grad_norm": 0.14118176698684692, "learning_rate": 5e-05, "loss": 1.6312, "step": 515 }, { "epoch": 0.14109926168990977, "grad_norm": 0.14146625995635986, "learning_rate": 5e-05, "loss": 1.5738, "step": 516 }, { "epoch": 0.14137270987147935, "grad_norm": 0.15033139288425446, "learning_rate": 5e-05, "loss": 1.7774, "step": 517 }, { "epoch": 0.14164615805304895, "grad_norm": 0.141265869140625, "learning_rate": 5e-05, "loss": 1.6147, "step": 518 }, { "epoch": 0.14191960623461855, "grad_norm": 0.14538274705410004, "learning_rate": 5e-05, "loss": 1.6542, "step": 519 }, { "epoch": 0.14219305441618812, "grad_norm": 0.15939506888389587, "learning_rate": 5e-05, "loss": 1.6933, "step": 520 }, { "epoch": 0.14246650259775773, "grad_norm": 0.1489834487438202, "learning_rate": 5e-05, "loss": 1.6392, "step": 521 }, { "epoch": 0.14273995077932733, "grad_norm": 0.18363149464130402, "learning_rate": 5e-05, "loss": 1.7042, "step": 522 }, { "epoch": 0.1430133989608969, "grad_norm": 0.16449148952960968, "learning_rate": 5e-05, "loss": 1.6792, "step": 523 }, { "epoch": 0.1432868471424665, "grad_norm": 0.15585970878601074, "learning_rate": 5e-05, "loss": 1.6823, "step": 524 }, { "epoch": 0.1435602953240361, "grad_norm": 0.1625368297100067, "learning_rate": 5e-05, "loss": 1.6709, "step": 525 }, { "epoch": 0.14383374350560568, "grad_norm": 0.16228622198104858, "learning_rate": 5e-05, "loss": 1.7291, "step": 526 }, { "epoch": 0.14410719168717528, "grad_norm": 0.16529570519924164, "learning_rate": 5e-05, "loss": 1.7617, "step": 527 }, { "epoch": 0.1443806398687449, "grad_norm": 0.1438167542219162, "learning_rate": 5e-05, "loss": 1.6428, "step": 528 }, { "epoch": 0.14465408805031446, "grad_norm": 0.16496515274047852, "learning_rate": 5e-05, "loss": 1.771, "step": 529 }, { "epoch": 0.14492753623188406, "grad_norm": 0.16147810220718384, "learning_rate": 5e-05, "loss": 1.6306, "step": 530 }, { "epoch": 0.14520098441345364, "grad_norm": 0.14431102573871613, "learning_rate": 5e-05, "loss": 1.5672, "step": 531 }, { "epoch": 0.14547443259502324, "grad_norm": 0.1534319818019867, "learning_rate": 5e-05, "loss": 1.7233, "step": 532 }, { "epoch": 0.14574788077659284, "grad_norm": 0.15924040973186493, "learning_rate": 5e-05, "loss": 1.6973, "step": 533 }, { "epoch": 0.14602132895816242, "grad_norm": 0.14544963836669922, "learning_rate": 5e-05, "loss": 1.6773, "step": 534 }, { "epoch": 0.14629477713973202, "grad_norm": 0.1628415733575821, "learning_rate": 5e-05, "loss": 1.6427, "step": 535 }, { "epoch": 0.14656822532130162, "grad_norm": 0.1434401571750641, "learning_rate": 5e-05, "loss": 1.6043, "step": 536 }, { "epoch": 0.1468416735028712, "grad_norm": 0.17323043942451477, "learning_rate": 5e-05, "loss": 1.7487, "step": 537 }, { "epoch": 0.1471151216844408, "grad_norm": 0.15013372898101807, "learning_rate": 5e-05, "loss": 1.6936, "step": 538 }, { "epoch": 0.1473885698660104, "grad_norm": 0.14302760362625122, "learning_rate": 5e-05, "loss": 1.6237, "step": 539 }, { "epoch": 0.14766201804757997, "grad_norm": 0.16000758111476898, "learning_rate": 5e-05, "loss": 1.7007, "step": 540 }, { "epoch": 0.14793546622914958, "grad_norm": 0.1614782214164734, "learning_rate": 5e-05, "loss": 1.794, "step": 541 }, { "epoch": 0.14820891441071918, "grad_norm": 0.17829090356826782, "learning_rate": 5e-05, "loss": 1.7155, "step": 542 }, { "epoch": 0.14848236259228875, "grad_norm": 0.16465310752391815, "learning_rate": 5e-05, "loss": 1.6136, "step": 543 }, { "epoch": 0.14875581077385835, "grad_norm": 0.17050433158874512, "learning_rate": 5e-05, "loss": 1.6685, "step": 544 }, { "epoch": 0.14902925895542796, "grad_norm": 0.1535949409008026, "learning_rate": 5e-05, "loss": 1.6712, "step": 545 }, { "epoch": 0.14930270713699753, "grad_norm": 0.1774369478225708, "learning_rate": 5e-05, "loss": 1.6754, "step": 546 }, { "epoch": 0.14957615531856713, "grad_norm": 0.14995476603507996, "learning_rate": 5e-05, "loss": 1.6214, "step": 547 }, { "epoch": 0.14984960350013674, "grad_norm": 0.16445566713809967, "learning_rate": 5e-05, "loss": 1.6705, "step": 548 }, { "epoch": 0.1501230516817063, "grad_norm": 0.1804472953081131, "learning_rate": 5e-05, "loss": 1.7822, "step": 549 }, { "epoch": 0.1503964998632759, "grad_norm": 0.15526773035526276, "learning_rate": 5e-05, "loss": 1.6319, "step": 550 }, { "epoch": 0.15066994804484551, "grad_norm": 0.1779652237892151, "learning_rate": 5e-05, "loss": 1.6667, "step": 551 }, { "epoch": 0.1509433962264151, "grad_norm": 0.15680311620235443, "learning_rate": 5e-05, "loss": 1.663, "step": 552 }, { "epoch": 0.1512168444079847, "grad_norm": 0.15443289279937744, "learning_rate": 5e-05, "loss": 1.65, "step": 553 }, { "epoch": 0.1514902925895543, "grad_norm": 0.16194204986095428, "learning_rate": 5e-05, "loss": 1.7575, "step": 554 }, { "epoch": 0.15176374077112387, "grad_norm": 0.1542530059814453, "learning_rate": 5e-05, "loss": 1.6408, "step": 555 }, { "epoch": 0.15203718895269347, "grad_norm": 0.14716754853725433, "learning_rate": 5e-05, "loss": 1.6898, "step": 556 }, { "epoch": 0.15231063713426304, "grad_norm": 0.15577493607997894, "learning_rate": 5e-05, "loss": 1.6614, "step": 557 }, { "epoch": 0.15258408531583265, "grad_norm": 0.14232535660266876, "learning_rate": 5e-05, "loss": 1.6025, "step": 558 }, { "epoch": 0.15285753349740225, "grad_norm": 0.15629936754703522, "learning_rate": 5e-05, "loss": 1.6389, "step": 559 }, { "epoch": 0.15313098167897182, "grad_norm": 0.15671175718307495, "learning_rate": 5e-05, "loss": 1.6616, "step": 560 }, { "epoch": 0.15340442986054142, "grad_norm": 0.15339986979961395, "learning_rate": 5e-05, "loss": 1.7227, "step": 561 }, { "epoch": 0.15367787804211103, "grad_norm": 0.14904935657978058, "learning_rate": 5e-05, "loss": 1.7057, "step": 562 }, { "epoch": 0.1539513262236806, "grad_norm": 0.16447263956069946, "learning_rate": 5e-05, "loss": 1.7246, "step": 563 }, { "epoch": 0.1542247744052502, "grad_norm": 0.15060292184352875, "learning_rate": 5e-05, "loss": 1.731, "step": 564 }, { "epoch": 0.1544982225868198, "grad_norm": 0.15564222633838654, "learning_rate": 5e-05, "loss": 1.662, "step": 565 }, { "epoch": 0.15477167076838938, "grad_norm": 0.1540018767118454, "learning_rate": 5e-05, "loss": 1.6793, "step": 566 }, { "epoch": 0.15504511894995898, "grad_norm": 0.14922669529914856, "learning_rate": 5e-05, "loss": 1.6619, "step": 567 }, { "epoch": 0.15531856713152858, "grad_norm": 0.1617313027381897, "learning_rate": 5e-05, "loss": 1.6571, "step": 568 }, { "epoch": 0.15559201531309816, "grad_norm": 0.15420418977737427, "learning_rate": 5e-05, "loss": 1.6897, "step": 569 }, { "epoch": 0.15586546349466776, "grad_norm": 0.16023240983486176, "learning_rate": 5e-05, "loss": 1.7376, "step": 570 }, { "epoch": 0.15613891167623736, "grad_norm": 0.17280931770801544, "learning_rate": 5e-05, "loss": 1.7339, "step": 571 }, { "epoch": 0.15641235985780694, "grad_norm": 0.15177179872989655, "learning_rate": 5e-05, "loss": 1.7808, "step": 572 }, { "epoch": 0.15668580803937654, "grad_norm": 0.14648132026195526, "learning_rate": 5e-05, "loss": 1.6798, "step": 573 }, { "epoch": 0.15695925622094614, "grad_norm": 0.1551511436700821, "learning_rate": 5e-05, "loss": 1.8416, "step": 574 }, { "epoch": 0.15723270440251572, "grad_norm": 0.14707240462303162, "learning_rate": 5e-05, "loss": 1.6569, "step": 575 }, { "epoch": 0.15750615258408532, "grad_norm": 0.15362726151943207, "learning_rate": 5e-05, "loss": 1.6738, "step": 576 }, { "epoch": 0.15777960076565492, "grad_norm": 0.1611640602350235, "learning_rate": 5e-05, "loss": 1.6615, "step": 577 }, { "epoch": 0.1580530489472245, "grad_norm": 0.14445427060127258, "learning_rate": 5e-05, "loss": 1.6479, "step": 578 }, { "epoch": 0.1583264971287941, "grad_norm": 0.1491660177707672, "learning_rate": 5e-05, "loss": 1.6616, "step": 579 }, { "epoch": 0.1585999453103637, "grad_norm": 0.16628406941890717, "learning_rate": 5e-05, "loss": 1.7884, "step": 580 }, { "epoch": 0.15887339349193327, "grad_norm": 0.1535821259021759, "learning_rate": 5e-05, "loss": 1.7276, "step": 581 }, { "epoch": 0.15914684167350288, "grad_norm": 0.14763818681240082, "learning_rate": 5e-05, "loss": 1.526, "step": 582 }, { "epoch": 0.15942028985507245, "grad_norm": 0.15529736876487732, "learning_rate": 5e-05, "loss": 1.7708, "step": 583 }, { "epoch": 0.15969373803664205, "grad_norm": 0.15036877989768982, "learning_rate": 5e-05, "loss": 1.7016, "step": 584 }, { "epoch": 0.15996718621821165, "grad_norm": 0.15483739972114563, "learning_rate": 5e-05, "loss": 1.5965, "step": 585 }, { "epoch": 0.16024063439978123, "grad_norm": 0.15531662106513977, "learning_rate": 5e-05, "loss": 1.6769, "step": 586 }, { "epoch": 0.16051408258135083, "grad_norm": 0.1560511589050293, "learning_rate": 5e-05, "loss": 1.7538, "step": 587 }, { "epoch": 0.16078753076292043, "grad_norm": 0.1831214427947998, "learning_rate": 5e-05, "loss": 1.7097, "step": 588 }, { "epoch": 0.16106097894449, "grad_norm": 0.15891355276107788, "learning_rate": 5e-05, "loss": 1.6161, "step": 589 }, { "epoch": 0.1613344271260596, "grad_norm": 0.1625254601240158, "learning_rate": 5e-05, "loss": 1.644, "step": 590 }, { "epoch": 0.1616078753076292, "grad_norm": 0.15376971662044525, "learning_rate": 5e-05, "loss": 1.701, "step": 591 }, { "epoch": 0.1618813234891988, "grad_norm": 0.14270102977752686, "learning_rate": 5e-05, "loss": 1.5089, "step": 592 }, { "epoch": 0.1621547716707684, "grad_norm": 0.14729395508766174, "learning_rate": 5e-05, "loss": 1.6989, "step": 593 }, { "epoch": 0.162428219852338, "grad_norm": 0.1675315499305725, "learning_rate": 5e-05, "loss": 1.7195, "step": 594 }, { "epoch": 0.16270166803390756, "grad_norm": 0.14959470927715302, "learning_rate": 5e-05, "loss": 1.678, "step": 595 }, { "epoch": 0.16297511621547717, "grad_norm": 0.15854990482330322, "learning_rate": 5e-05, "loss": 1.6452, "step": 596 }, { "epoch": 0.16324856439704677, "grad_norm": 0.151190385222435, "learning_rate": 5e-05, "loss": 1.6751, "step": 597 }, { "epoch": 0.16352201257861634, "grad_norm": 0.1632450520992279, "learning_rate": 5e-05, "loss": 1.662, "step": 598 }, { "epoch": 0.16379546076018595, "grad_norm": 0.1520984023809433, "learning_rate": 5e-05, "loss": 1.7377, "step": 599 }, { "epoch": 0.16406890894175555, "grad_norm": 0.14863604307174683, "learning_rate": 5e-05, "loss": 1.5927, "step": 600 }, { "epoch": 0.16434235712332512, "grad_norm": 0.15424251556396484, "learning_rate": 5e-05, "loss": 1.6265, "step": 601 }, { "epoch": 0.16461580530489472, "grad_norm": 0.15759313106536865, "learning_rate": 5e-05, "loss": 1.6659, "step": 602 }, { "epoch": 0.16488925348646433, "grad_norm": 0.1515471190214157, "learning_rate": 5e-05, "loss": 1.6585, "step": 603 }, { "epoch": 0.1651627016680339, "grad_norm": 0.16653236746788025, "learning_rate": 5e-05, "loss": 1.6593, "step": 604 }, { "epoch": 0.1654361498496035, "grad_norm": 0.15483258664608002, "learning_rate": 5e-05, "loss": 1.7138, "step": 605 }, { "epoch": 0.1657095980311731, "grad_norm": 0.14448527991771698, "learning_rate": 5e-05, "loss": 1.6627, "step": 606 }, { "epoch": 0.16598304621274268, "grad_norm": 0.15589672327041626, "learning_rate": 5e-05, "loss": 1.5996, "step": 607 }, { "epoch": 0.16625649439431228, "grad_norm": 0.16023333370685577, "learning_rate": 5e-05, "loss": 1.6351, "step": 608 }, { "epoch": 0.16652994257588188, "grad_norm": 0.16108067333698273, "learning_rate": 5e-05, "loss": 1.716, "step": 609 }, { "epoch": 0.16680339075745146, "grad_norm": 0.14648942649364471, "learning_rate": 5e-05, "loss": 1.662, "step": 610 }, { "epoch": 0.16707683893902106, "grad_norm": 0.14486436545848846, "learning_rate": 5e-05, "loss": 1.6219, "step": 611 }, { "epoch": 0.16735028712059064, "grad_norm": 0.15563012659549713, "learning_rate": 5e-05, "loss": 1.6803, "step": 612 }, { "epoch": 0.16762373530216024, "grad_norm": 0.1571030616760254, "learning_rate": 5e-05, "loss": 1.7195, "step": 613 }, { "epoch": 0.16789718348372984, "grad_norm": 0.14509615302085876, "learning_rate": 5e-05, "loss": 1.6462, "step": 614 }, { "epoch": 0.1681706316652994, "grad_norm": 0.15147417783737183, "learning_rate": 5e-05, "loss": 1.7463, "step": 615 }, { "epoch": 0.16844407984686902, "grad_norm": 0.1549796462059021, "learning_rate": 5e-05, "loss": 1.6997, "step": 616 }, { "epoch": 0.16871752802843862, "grad_norm": 0.16344091296195984, "learning_rate": 5e-05, "loss": 1.6065, "step": 617 }, { "epoch": 0.1689909762100082, "grad_norm": 0.1531122624874115, "learning_rate": 5e-05, "loss": 1.5878, "step": 618 }, { "epoch": 0.1692644243915778, "grad_norm": 0.17184039950370789, "learning_rate": 5e-05, "loss": 1.6921, "step": 619 }, { "epoch": 0.1695378725731474, "grad_norm": 0.16684702038764954, "learning_rate": 5e-05, "loss": 1.7952, "step": 620 }, { "epoch": 0.16981132075471697, "grad_norm": 0.1621457189321518, "learning_rate": 5e-05, "loss": 1.6935, "step": 621 }, { "epoch": 0.17008476893628657, "grad_norm": 0.15157224237918854, "learning_rate": 5e-05, "loss": 1.747, "step": 622 }, { "epoch": 0.17035821711785618, "grad_norm": 0.1498030573129654, "learning_rate": 5e-05, "loss": 1.6138, "step": 623 }, { "epoch": 0.17063166529942575, "grad_norm": 0.15279249846935272, "learning_rate": 5e-05, "loss": 1.6191, "step": 624 }, { "epoch": 0.17090511348099535, "grad_norm": 0.15653859078884125, "learning_rate": 5e-05, "loss": 1.6271, "step": 625 }, { "epoch": 0.17117856166256495, "grad_norm": 0.17365135252475739, "learning_rate": 5e-05, "loss": 1.733, "step": 626 }, { "epoch": 0.17145200984413453, "grad_norm": 0.14621610939502716, "learning_rate": 5e-05, "loss": 1.5571, "step": 627 }, { "epoch": 0.17172545802570413, "grad_norm": 0.1693270057439804, "learning_rate": 5e-05, "loss": 1.7436, "step": 628 }, { "epoch": 0.17199890620727373, "grad_norm": 0.15240801870822906, "learning_rate": 5e-05, "loss": 1.7051, "step": 629 }, { "epoch": 0.1722723543888433, "grad_norm": 0.14783060550689697, "learning_rate": 5e-05, "loss": 1.6765, "step": 630 }, { "epoch": 0.1725458025704129, "grad_norm": 0.15038736164569855, "learning_rate": 5e-05, "loss": 1.5878, "step": 631 }, { "epoch": 0.1728192507519825, "grad_norm": 0.16009370982646942, "learning_rate": 5e-05, "loss": 1.7588, "step": 632 }, { "epoch": 0.17309269893355209, "grad_norm": 0.15497462451457977, "learning_rate": 5e-05, "loss": 1.6953, "step": 633 }, { "epoch": 0.1733661471151217, "grad_norm": 0.18641214072704315, "learning_rate": 5e-05, "loss": 1.6218, "step": 634 }, { "epoch": 0.1736395952966913, "grad_norm": 0.1536916345357895, "learning_rate": 5e-05, "loss": 1.6898, "step": 635 }, { "epoch": 0.17391304347826086, "grad_norm": 0.15958872437477112, "learning_rate": 5e-05, "loss": 1.6823, "step": 636 }, { "epoch": 0.17418649165983047, "grad_norm": 0.16902394592761993, "learning_rate": 5e-05, "loss": 1.7029, "step": 637 }, { "epoch": 0.17445993984140004, "grad_norm": 0.1553962379693985, "learning_rate": 5e-05, "loss": 1.7663, "step": 638 }, { "epoch": 0.17473338802296964, "grad_norm": 0.1627410352230072, "learning_rate": 5e-05, "loss": 1.5562, "step": 639 }, { "epoch": 0.17500683620453925, "grad_norm": 0.16163355112075806, "learning_rate": 5e-05, "loss": 1.6688, "step": 640 }, { "epoch": 0.17528028438610882, "grad_norm": 0.14881187677383423, "learning_rate": 5e-05, "loss": 1.6582, "step": 641 }, { "epoch": 0.17555373256767842, "grad_norm": 0.1964666247367859, "learning_rate": 5e-05, "loss": 1.6861, "step": 642 }, { "epoch": 0.17582718074924802, "grad_norm": 0.15720294415950775, "learning_rate": 5e-05, "loss": 1.6249, "step": 643 }, { "epoch": 0.1761006289308176, "grad_norm": 0.15254618227481842, "learning_rate": 5e-05, "loss": 1.6902, "step": 644 }, { "epoch": 0.1763740771123872, "grad_norm": 0.19420553743839264, "learning_rate": 5e-05, "loss": 1.7313, "step": 645 }, { "epoch": 0.1766475252939568, "grad_norm": 0.1590331643819809, "learning_rate": 5e-05, "loss": 1.6135, "step": 646 }, { "epoch": 0.17692097347552638, "grad_norm": 0.18741661310195923, "learning_rate": 5e-05, "loss": 1.6959, "step": 647 }, { "epoch": 0.17719442165709598, "grad_norm": 0.16829299926757812, "learning_rate": 5e-05, "loss": 1.7166, "step": 648 }, { "epoch": 0.17746786983866558, "grad_norm": 0.16889558732509613, "learning_rate": 5e-05, "loss": 1.6289, "step": 649 }, { "epoch": 0.17774131802023516, "grad_norm": 0.18458721041679382, "learning_rate": 5e-05, "loss": 1.6351, "step": 650 }, { "epoch": 0.17801476620180476, "grad_norm": 0.1489873081445694, "learning_rate": 5e-05, "loss": 1.6426, "step": 651 }, { "epoch": 0.17828821438337436, "grad_norm": 0.1619219332933426, "learning_rate": 5e-05, "loss": 1.6849, "step": 652 }, { "epoch": 0.17856166256494393, "grad_norm": 0.15107835829257965, "learning_rate": 5e-05, "loss": 1.6455, "step": 653 }, { "epoch": 0.17883511074651354, "grad_norm": 0.15547437965869904, "learning_rate": 5e-05, "loss": 1.7137, "step": 654 }, { "epoch": 0.17910855892808314, "grad_norm": 0.18254241347312927, "learning_rate": 5e-05, "loss": 1.7565, "step": 655 }, { "epoch": 0.1793820071096527, "grad_norm": 0.15430989861488342, "learning_rate": 5e-05, "loss": 1.7157, "step": 656 }, { "epoch": 0.17965545529122232, "grad_norm": 0.14642038941383362, "learning_rate": 5e-05, "loss": 1.6962, "step": 657 }, { "epoch": 0.17992890347279192, "grad_norm": 0.15086182951927185, "learning_rate": 5e-05, "loss": 1.6187, "step": 658 }, { "epoch": 0.1802023516543615, "grad_norm": 0.1483525186777115, "learning_rate": 5e-05, "loss": 1.6572, "step": 659 }, { "epoch": 0.1804757998359311, "grad_norm": 0.1709260642528534, "learning_rate": 5e-05, "loss": 1.6813, "step": 660 }, { "epoch": 0.1807492480175007, "grad_norm": 0.17065215110778809, "learning_rate": 5e-05, "loss": 1.7901, "step": 661 }, { "epoch": 0.18102269619907027, "grad_norm": 0.15385740995407104, "learning_rate": 5e-05, "loss": 1.6925, "step": 662 }, { "epoch": 0.18129614438063987, "grad_norm": 0.16739514470100403, "learning_rate": 5e-05, "loss": 1.6025, "step": 663 }, { "epoch": 0.18156959256220945, "grad_norm": 0.16431251168251038, "learning_rate": 5e-05, "loss": 1.6316, "step": 664 }, { "epoch": 0.18184304074377905, "grad_norm": 0.15166325867176056, "learning_rate": 5e-05, "loss": 1.7378, "step": 665 }, { "epoch": 0.18211648892534865, "grad_norm": 0.1769624650478363, "learning_rate": 5e-05, "loss": 1.6963, "step": 666 }, { "epoch": 0.18238993710691823, "grad_norm": 0.17551544308662415, "learning_rate": 5e-05, "loss": 1.6722, "step": 667 }, { "epoch": 0.18266338528848783, "grad_norm": 0.1519336998462677, "learning_rate": 5e-05, "loss": 1.7812, "step": 668 }, { "epoch": 0.18293683347005743, "grad_norm": 0.1523965448141098, "learning_rate": 5e-05, "loss": 1.6374, "step": 669 }, { "epoch": 0.183210281651627, "grad_norm": 0.16304127871990204, "learning_rate": 5e-05, "loss": 1.6436, "step": 670 }, { "epoch": 0.1834837298331966, "grad_norm": 0.1389431655406952, "learning_rate": 5e-05, "loss": 1.6136, "step": 671 }, { "epoch": 0.1837571780147662, "grad_norm": 0.15939609706401825, "learning_rate": 5e-05, "loss": 1.6407, "step": 672 }, { "epoch": 0.18403062619633578, "grad_norm": 0.15122279524803162, "learning_rate": 5e-05, "loss": 1.605, "step": 673 }, { "epoch": 0.18430407437790539, "grad_norm": 0.14799101650714874, "learning_rate": 5e-05, "loss": 1.6199, "step": 674 }, { "epoch": 0.184577522559475, "grad_norm": 0.17853890359401703, "learning_rate": 5e-05, "loss": 1.7246, "step": 675 }, { "epoch": 0.18485097074104456, "grad_norm": 0.14898012578487396, "learning_rate": 5e-05, "loss": 1.7212, "step": 676 }, { "epoch": 0.18512441892261416, "grad_norm": 0.1488298773765564, "learning_rate": 5e-05, "loss": 1.6592, "step": 677 }, { "epoch": 0.18539786710418377, "grad_norm": 0.14956548810005188, "learning_rate": 5e-05, "loss": 1.6025, "step": 678 }, { "epoch": 0.18567131528575334, "grad_norm": 0.14764147996902466, "learning_rate": 5e-05, "loss": 1.6695, "step": 679 }, { "epoch": 0.18594476346732294, "grad_norm": 0.16988615691661835, "learning_rate": 5e-05, "loss": 1.7755, "step": 680 }, { "epoch": 0.18621821164889255, "grad_norm": 0.1601964235305786, "learning_rate": 5e-05, "loss": 1.7565, "step": 681 }, { "epoch": 0.18649165983046212, "grad_norm": 0.17461629211902618, "learning_rate": 5e-05, "loss": 1.7552, "step": 682 }, { "epoch": 0.18676510801203172, "grad_norm": 0.15619756281375885, "learning_rate": 5e-05, "loss": 1.7162, "step": 683 }, { "epoch": 0.18703855619360132, "grad_norm": 0.15257790684700012, "learning_rate": 5e-05, "loss": 1.7154, "step": 684 }, { "epoch": 0.1873120043751709, "grad_norm": 0.15032611787319183, "learning_rate": 5e-05, "loss": 1.5825, "step": 685 }, { "epoch": 0.1875854525567405, "grad_norm": 0.1578359454870224, "learning_rate": 5e-05, "loss": 1.706, "step": 686 }, { "epoch": 0.1878589007383101, "grad_norm": 0.16593731939792633, "learning_rate": 5e-05, "loss": 1.624, "step": 687 }, { "epoch": 0.18813234891987968, "grad_norm": 0.17991593480110168, "learning_rate": 5e-05, "loss": 1.7293, "step": 688 }, { "epoch": 0.18840579710144928, "grad_norm": 0.15078522264957428, "learning_rate": 5e-05, "loss": 1.6419, "step": 689 }, { "epoch": 0.18867924528301888, "grad_norm": 0.20055970549583435, "learning_rate": 5e-05, "loss": 1.7743, "step": 690 }, { "epoch": 0.18895269346458846, "grad_norm": 0.14143189787864685, "learning_rate": 5e-05, "loss": 1.7, "step": 691 }, { "epoch": 0.18922614164615806, "grad_norm": 0.17427237331867218, "learning_rate": 5e-05, "loss": 1.6503, "step": 692 }, { "epoch": 0.18949958982772763, "grad_norm": 0.18519847095012665, "learning_rate": 5e-05, "loss": 1.7039, "step": 693 }, { "epoch": 0.18977303800929723, "grad_norm": 0.15196627378463745, "learning_rate": 5e-05, "loss": 1.7847, "step": 694 }, { "epoch": 0.19004648619086684, "grad_norm": 0.17246317863464355, "learning_rate": 5e-05, "loss": 1.7097, "step": 695 }, { "epoch": 0.1903199343724364, "grad_norm": 0.16350534558296204, "learning_rate": 5e-05, "loss": 1.6783, "step": 696 }, { "epoch": 0.190593382554006, "grad_norm": 0.15077216923236847, "learning_rate": 5e-05, "loss": 1.7, "step": 697 }, { "epoch": 0.19086683073557562, "grad_norm": 0.17161305248737335, "learning_rate": 5e-05, "loss": 1.757, "step": 698 }, { "epoch": 0.1911402789171452, "grad_norm": 0.15666204690933228, "learning_rate": 5e-05, "loss": 1.6094, "step": 699 }, { "epoch": 0.1914137270987148, "grad_norm": 0.15116041898727417, "learning_rate": 5e-05, "loss": 1.7604, "step": 700 }, { "epoch": 0.1916871752802844, "grad_norm": 0.17067068815231323, "learning_rate": 5e-05, "loss": 1.6355, "step": 701 }, { "epoch": 0.19196062346185397, "grad_norm": 0.1541707068681717, "learning_rate": 5e-05, "loss": 1.7279, "step": 702 }, { "epoch": 0.19223407164342357, "grad_norm": 0.154357448220253, "learning_rate": 5e-05, "loss": 1.5981, "step": 703 }, { "epoch": 0.19250751982499317, "grad_norm": 0.1600044220685959, "learning_rate": 5e-05, "loss": 1.7863, "step": 704 }, { "epoch": 0.19278096800656275, "grad_norm": 0.15702758729457855, "learning_rate": 5e-05, "loss": 1.752, "step": 705 }, { "epoch": 0.19305441618813235, "grad_norm": 0.15750904381275177, "learning_rate": 5e-05, "loss": 1.6763, "step": 706 }, { "epoch": 0.19332786436970195, "grad_norm": 0.14942318201065063, "learning_rate": 5e-05, "loss": 1.5828, "step": 707 }, { "epoch": 0.19360131255127153, "grad_norm": 0.14567220211029053, "learning_rate": 5e-05, "loss": 1.647, "step": 708 }, { "epoch": 0.19387476073284113, "grad_norm": 0.15003998577594757, "learning_rate": 5e-05, "loss": 1.6545, "step": 709 }, { "epoch": 0.19414820891441073, "grad_norm": 0.1552526205778122, "learning_rate": 5e-05, "loss": 1.7039, "step": 710 }, { "epoch": 0.1944216570959803, "grad_norm": 0.16224539279937744, "learning_rate": 5e-05, "loss": 1.6792, "step": 711 }, { "epoch": 0.1946951052775499, "grad_norm": 0.15637609362602234, "learning_rate": 5e-05, "loss": 1.7131, "step": 712 }, { "epoch": 0.1949685534591195, "grad_norm": 0.14550897479057312, "learning_rate": 5e-05, "loss": 1.6662, "step": 713 }, { "epoch": 0.19524200164068908, "grad_norm": 0.14311423897743225, "learning_rate": 5e-05, "loss": 1.5609, "step": 714 }, { "epoch": 0.19551544982225869, "grad_norm": 0.1632792055606842, "learning_rate": 5e-05, "loss": 1.716, "step": 715 }, { "epoch": 0.1957888980038283, "grad_norm": 0.14958421885967255, "learning_rate": 5e-05, "loss": 1.6868, "step": 716 }, { "epoch": 0.19606234618539786, "grad_norm": 0.15640190243721008, "learning_rate": 5e-05, "loss": 1.6763, "step": 717 }, { "epoch": 0.19633579436696746, "grad_norm": 0.1410387009382248, "learning_rate": 5e-05, "loss": 1.6593, "step": 718 }, { "epoch": 0.19660924254853704, "grad_norm": 0.14812639355659485, "learning_rate": 5e-05, "loss": 1.6907, "step": 719 }, { "epoch": 0.19688269073010664, "grad_norm": 0.15623793005943298, "learning_rate": 5e-05, "loss": 1.6879, "step": 720 }, { "epoch": 0.19715613891167624, "grad_norm": 0.1545989066362381, "learning_rate": 5e-05, "loss": 1.7276, "step": 721 }, { "epoch": 0.19742958709324582, "grad_norm": 0.15051992237567902, "learning_rate": 5e-05, "loss": 1.7184, "step": 722 }, { "epoch": 0.19770303527481542, "grad_norm": 0.14712779223918915, "learning_rate": 5e-05, "loss": 1.6231, "step": 723 }, { "epoch": 0.19797648345638502, "grad_norm": 0.15088264644145966, "learning_rate": 5e-05, "loss": 1.662, "step": 724 }, { "epoch": 0.1982499316379546, "grad_norm": 0.14665238559246063, "learning_rate": 5e-05, "loss": 1.6626, "step": 725 }, { "epoch": 0.1985233798195242, "grad_norm": 0.16032913327217102, "learning_rate": 5e-05, "loss": 1.7283, "step": 726 }, { "epoch": 0.1987968280010938, "grad_norm": 0.15149696171283722, "learning_rate": 5e-05, "loss": 1.6387, "step": 727 }, { "epoch": 0.19907027618266337, "grad_norm": 0.15610548853874207, "learning_rate": 5e-05, "loss": 1.7084, "step": 728 }, { "epoch": 0.19934372436423298, "grad_norm": 0.16325855255126953, "learning_rate": 5e-05, "loss": 1.567, "step": 729 }, { "epoch": 0.19961717254580258, "grad_norm": 0.1529161036014557, "learning_rate": 5e-05, "loss": 1.6343, "step": 730 }, { "epoch": 0.19989062072737215, "grad_norm": 0.1644391119480133, "learning_rate": 5e-05, "loss": 1.6918, "step": 731 }, { "epoch": 0.20016406890894176, "grad_norm": 0.14825589954853058, "learning_rate": 5e-05, "loss": 1.6954, "step": 732 }, { "epoch": 0.20043751709051136, "grad_norm": 0.15434464812278748, "learning_rate": 5e-05, "loss": 1.751, "step": 733 }, { "epoch": 0.20071096527208093, "grad_norm": 0.14505796134471893, "learning_rate": 5e-05, "loss": 1.6398, "step": 734 }, { "epoch": 0.20098441345365053, "grad_norm": 0.14808180928230286, "learning_rate": 5e-05, "loss": 1.6475, "step": 735 }, { "epoch": 0.20125786163522014, "grad_norm": 0.15832236409187317, "learning_rate": 5e-05, "loss": 1.6888, "step": 736 }, { "epoch": 0.2015313098167897, "grad_norm": 0.15063433349132538, "learning_rate": 5e-05, "loss": 1.714, "step": 737 }, { "epoch": 0.2018047579983593, "grad_norm": 0.14673678576946259, "learning_rate": 5e-05, "loss": 1.6566, "step": 738 }, { "epoch": 0.20207820617992892, "grad_norm": 0.16134969890117645, "learning_rate": 5e-05, "loss": 1.618, "step": 739 }, { "epoch": 0.2023516543614985, "grad_norm": 0.15674619376659393, "learning_rate": 5e-05, "loss": 1.6548, "step": 740 }, { "epoch": 0.2026251025430681, "grad_norm": 0.16632983088493347, "learning_rate": 5e-05, "loss": 1.6608, "step": 741 }, { "epoch": 0.2028985507246377, "grad_norm": 0.15681079030036926, "learning_rate": 5e-05, "loss": 1.6452, "step": 742 }, { "epoch": 0.20317199890620727, "grad_norm": 0.17244625091552734, "learning_rate": 5e-05, "loss": 1.6092, "step": 743 }, { "epoch": 0.20344544708777687, "grad_norm": 0.17212654650211334, "learning_rate": 5e-05, "loss": 1.6497, "step": 744 }, { "epoch": 0.20371889526934644, "grad_norm": 0.14502903819084167, "learning_rate": 5e-05, "loss": 1.6578, "step": 745 }, { "epoch": 0.20399234345091605, "grad_norm": 0.19574536383152008, "learning_rate": 5e-05, "loss": 1.7769, "step": 746 }, { "epoch": 0.20426579163248565, "grad_norm": 0.16160033643245697, "learning_rate": 5e-05, "loss": 1.6885, "step": 747 }, { "epoch": 0.20453923981405522, "grad_norm": 0.15171994268894196, "learning_rate": 5e-05, "loss": 1.6761, "step": 748 }, { "epoch": 0.20481268799562483, "grad_norm": 0.16578879952430725, "learning_rate": 5e-05, "loss": 1.6024, "step": 749 }, { "epoch": 0.20508613617719443, "grad_norm": 0.1527402251958847, "learning_rate": 5e-05, "loss": 1.6214, "step": 750 }, { "epoch": 0.205359584358764, "grad_norm": 0.15434326231479645, "learning_rate": 5e-05, "loss": 1.591, "step": 751 }, { "epoch": 0.2056330325403336, "grad_norm": 0.15459883213043213, "learning_rate": 5e-05, "loss": 1.604, "step": 752 }, { "epoch": 0.2059064807219032, "grad_norm": 0.17635492980480194, "learning_rate": 5e-05, "loss": 1.7674, "step": 753 }, { "epoch": 0.20617992890347278, "grad_norm": 0.1530025154352188, "learning_rate": 5e-05, "loss": 1.5982, "step": 754 }, { "epoch": 0.20645337708504238, "grad_norm": 0.16364556550979614, "learning_rate": 5e-05, "loss": 1.5648, "step": 755 }, { "epoch": 0.20672682526661199, "grad_norm": 0.1753033846616745, "learning_rate": 5e-05, "loss": 1.6603, "step": 756 }, { "epoch": 0.20700027344818156, "grad_norm": 0.17838945984840393, "learning_rate": 5e-05, "loss": 1.6572, "step": 757 }, { "epoch": 0.20727372162975116, "grad_norm": 0.20289616286754608, "learning_rate": 5e-05, "loss": 1.713, "step": 758 }, { "epoch": 0.20754716981132076, "grad_norm": 0.1460665762424469, "learning_rate": 5e-05, "loss": 1.6701, "step": 759 }, { "epoch": 0.20782061799289034, "grad_norm": 0.1894819289445877, "learning_rate": 5e-05, "loss": 1.6389, "step": 760 }, { "epoch": 0.20809406617445994, "grad_norm": 0.17014984786510468, "learning_rate": 5e-05, "loss": 1.6229, "step": 761 }, { "epoch": 0.20836751435602954, "grad_norm": 0.14133282005786896, "learning_rate": 5e-05, "loss": 1.4855, "step": 762 }, { "epoch": 0.20864096253759912, "grad_norm": 0.17830294370651245, "learning_rate": 5e-05, "loss": 1.8294, "step": 763 }, { "epoch": 0.20891441071916872, "grad_norm": 0.17372062802314758, "learning_rate": 5e-05, "loss": 1.6214, "step": 764 }, { "epoch": 0.20918785890073832, "grad_norm": 0.14572595059871674, "learning_rate": 5e-05, "loss": 1.6391, "step": 765 }, { "epoch": 0.2094613070823079, "grad_norm": 0.19754233956336975, "learning_rate": 5e-05, "loss": 1.7374, "step": 766 }, { "epoch": 0.2097347552638775, "grad_norm": 0.17328870296478271, "learning_rate": 5e-05, "loss": 1.6893, "step": 767 }, { "epoch": 0.2100082034454471, "grad_norm": 0.15320096909999847, "learning_rate": 5e-05, "loss": 1.6703, "step": 768 }, { "epoch": 0.21028165162701667, "grad_norm": 0.19963279366493225, "learning_rate": 5e-05, "loss": 1.8128, "step": 769 }, { "epoch": 0.21055509980858628, "grad_norm": 0.15706828236579895, "learning_rate": 5e-05, "loss": 1.6998, "step": 770 }, { "epoch": 0.21082854799015588, "grad_norm": 0.18131129443645477, "learning_rate": 5e-05, "loss": 1.6364, "step": 771 }, { "epoch": 0.21110199617172545, "grad_norm": 0.19108878076076508, "learning_rate": 5e-05, "loss": 1.7074, "step": 772 }, { "epoch": 0.21137544435329506, "grad_norm": 0.14925052225589752, "learning_rate": 5e-05, "loss": 1.6908, "step": 773 }, { "epoch": 0.21164889253486463, "grad_norm": 0.1783328801393509, "learning_rate": 5e-05, "loss": 1.7353, "step": 774 }, { "epoch": 0.21192234071643423, "grad_norm": 0.17631973326206207, "learning_rate": 5e-05, "loss": 1.683, "step": 775 }, { "epoch": 0.21219578889800383, "grad_norm": 0.15554718673229218, "learning_rate": 5e-05, "loss": 1.6239, "step": 776 }, { "epoch": 0.2124692370795734, "grad_norm": 0.1897638440132141, "learning_rate": 5e-05, "loss": 1.713, "step": 777 }, { "epoch": 0.212742685261143, "grad_norm": 0.14565777778625488, "learning_rate": 5e-05, "loss": 1.5607, "step": 778 }, { "epoch": 0.2130161334427126, "grad_norm": 0.14770552515983582, "learning_rate": 5e-05, "loss": 1.6054, "step": 779 }, { "epoch": 0.2132895816242822, "grad_norm": 0.1784149557352066, "learning_rate": 5e-05, "loss": 1.7054, "step": 780 }, { "epoch": 0.2135630298058518, "grad_norm": 0.15223895013332367, "learning_rate": 5e-05, "loss": 1.756, "step": 781 }, { "epoch": 0.2138364779874214, "grad_norm": 0.15329818427562714, "learning_rate": 5e-05, "loss": 1.7035, "step": 782 }, { "epoch": 0.21410992616899097, "grad_norm": 0.1558707058429718, "learning_rate": 5e-05, "loss": 1.6835, "step": 783 }, { "epoch": 0.21438337435056057, "grad_norm": 0.15981443226337433, "learning_rate": 5e-05, "loss": 1.7418, "step": 784 }, { "epoch": 0.21465682253213017, "grad_norm": 0.1494167298078537, "learning_rate": 5e-05, "loss": 1.5906, "step": 785 }, { "epoch": 0.21493027071369974, "grad_norm": 0.15564508736133575, "learning_rate": 5e-05, "loss": 1.6873, "step": 786 }, { "epoch": 0.21520371889526935, "grad_norm": 0.1639227420091629, "learning_rate": 5e-05, "loss": 1.7083, "step": 787 }, { "epoch": 0.21547716707683895, "grad_norm": 0.14128105342388153, "learning_rate": 5e-05, "loss": 1.608, "step": 788 }, { "epoch": 0.21575061525840852, "grad_norm": 0.16327965259552002, "learning_rate": 5e-05, "loss": 1.6022, "step": 789 }, { "epoch": 0.21602406343997813, "grad_norm": 0.1559935361146927, "learning_rate": 5e-05, "loss": 1.6483, "step": 790 }, { "epoch": 0.21629751162154773, "grad_norm": 0.1511768251657486, "learning_rate": 5e-05, "loss": 1.5955, "step": 791 }, { "epoch": 0.2165709598031173, "grad_norm": 0.16056060791015625, "learning_rate": 5e-05, "loss": 1.6471, "step": 792 }, { "epoch": 0.2168444079846869, "grad_norm": 0.16120120882987976, "learning_rate": 5e-05, "loss": 1.8007, "step": 793 }, { "epoch": 0.2171178561662565, "grad_norm": 0.1515311449766159, "learning_rate": 5e-05, "loss": 1.6438, "step": 794 }, { "epoch": 0.21739130434782608, "grad_norm": 0.15457142889499664, "learning_rate": 5e-05, "loss": 1.7115, "step": 795 }, { "epoch": 0.21766475252939568, "grad_norm": 0.15303310751914978, "learning_rate": 5e-05, "loss": 1.6677, "step": 796 }, { "epoch": 0.21793820071096529, "grad_norm": 0.1515769511461258, "learning_rate": 5e-05, "loss": 1.7371, "step": 797 }, { "epoch": 0.21821164889253486, "grad_norm": 0.1483011692762375, "learning_rate": 5e-05, "loss": 1.6359, "step": 798 }, { "epoch": 0.21848509707410446, "grad_norm": 0.1543753296136856, "learning_rate": 5e-05, "loss": 1.7473, "step": 799 }, { "epoch": 0.21875854525567404, "grad_norm": 0.14442037045955658, "learning_rate": 5e-05, "loss": 1.6086, "step": 800 }, { "epoch": 0.21903199343724364, "grad_norm": 0.14965881407260895, "learning_rate": 5e-05, "loss": 1.6522, "step": 801 }, { "epoch": 0.21930544161881324, "grad_norm": 0.16104258596897125, "learning_rate": 5e-05, "loss": 1.5625, "step": 802 }, { "epoch": 0.21957888980038281, "grad_norm": 0.16634979844093323, "learning_rate": 5e-05, "loss": 1.6574, "step": 803 }, { "epoch": 0.21985233798195242, "grad_norm": 0.1422521471977234, "learning_rate": 5e-05, "loss": 1.6317, "step": 804 }, { "epoch": 0.22012578616352202, "grad_norm": 0.17888352274894714, "learning_rate": 5e-05, "loss": 1.7694, "step": 805 }, { "epoch": 0.2203992343450916, "grad_norm": 0.16180840134620667, "learning_rate": 5e-05, "loss": 1.6889, "step": 806 }, { "epoch": 0.2206726825266612, "grad_norm": 0.1583503931760788, "learning_rate": 5e-05, "loss": 1.6678, "step": 807 }, { "epoch": 0.2209461307082308, "grad_norm": 0.19179575145244598, "learning_rate": 5e-05, "loss": 1.7058, "step": 808 }, { "epoch": 0.22121957888980037, "grad_norm": 0.14800883829593658, "learning_rate": 5e-05, "loss": 1.6305, "step": 809 }, { "epoch": 0.22149302707136997, "grad_norm": 0.17706219851970673, "learning_rate": 5e-05, "loss": 1.691, "step": 810 }, { "epoch": 0.22176647525293958, "grad_norm": 0.1569800078868866, "learning_rate": 5e-05, "loss": 1.5772, "step": 811 }, { "epoch": 0.22203992343450915, "grad_norm": 0.1444752961397171, "learning_rate": 5e-05, "loss": 1.6368, "step": 812 }, { "epoch": 0.22231337161607875, "grad_norm": 0.15736012160778046, "learning_rate": 5e-05, "loss": 1.5703, "step": 813 }, { "epoch": 0.22258681979764836, "grad_norm": 0.18501217663288116, "learning_rate": 5e-05, "loss": 1.6198, "step": 814 }, { "epoch": 0.22286026797921793, "grad_norm": 0.14157734811306, "learning_rate": 5e-05, "loss": 1.672, "step": 815 }, { "epoch": 0.22313371616078753, "grad_norm": 0.16897475719451904, "learning_rate": 5e-05, "loss": 1.6156, "step": 816 }, { "epoch": 0.22340716434235713, "grad_norm": 0.15974189341068268, "learning_rate": 5e-05, "loss": 1.5237, "step": 817 }, { "epoch": 0.2236806125239267, "grad_norm": 0.14449931681156158, "learning_rate": 5e-05, "loss": 1.6376, "step": 818 }, { "epoch": 0.2239540607054963, "grad_norm": 0.1783899962902069, "learning_rate": 5e-05, "loss": 1.707, "step": 819 }, { "epoch": 0.2242275088870659, "grad_norm": 0.15125927329063416, "learning_rate": 5e-05, "loss": 1.6233, "step": 820 }, { "epoch": 0.2245009570686355, "grad_norm": 0.15262438356876373, "learning_rate": 5e-05, "loss": 1.6687, "step": 821 }, { "epoch": 0.2247744052502051, "grad_norm": 0.16108544170856476, "learning_rate": 5e-05, "loss": 1.647, "step": 822 }, { "epoch": 0.2250478534317747, "grad_norm": 0.1504691243171692, "learning_rate": 5e-05, "loss": 1.6531, "step": 823 }, { "epoch": 0.22532130161334427, "grad_norm": 0.1500689536333084, "learning_rate": 5e-05, "loss": 1.5914, "step": 824 }, { "epoch": 0.22559474979491387, "grad_norm": 0.15878424048423767, "learning_rate": 5e-05, "loss": 1.6758, "step": 825 }, { "epoch": 0.22586819797648347, "grad_norm": 0.15859322249889374, "learning_rate": 5e-05, "loss": 1.6787, "step": 826 }, { "epoch": 0.22614164615805304, "grad_norm": 0.15319658815860748, "learning_rate": 5e-05, "loss": 1.6832, "step": 827 }, { "epoch": 0.22641509433962265, "grad_norm": 0.15381459891796112, "learning_rate": 5e-05, "loss": 1.6422, "step": 828 }, { "epoch": 0.22668854252119222, "grad_norm": 0.16640834510326385, "learning_rate": 5e-05, "loss": 1.7306, "step": 829 }, { "epoch": 0.22696199070276182, "grad_norm": 0.1528118997812271, "learning_rate": 5e-05, "loss": 1.6858, "step": 830 }, { "epoch": 0.22723543888433143, "grad_norm": 0.15058903396129608, "learning_rate": 5e-05, "loss": 1.6336, "step": 831 }, { "epoch": 0.227508887065901, "grad_norm": 0.1552654355764389, "learning_rate": 5e-05, "loss": 1.677, "step": 832 }, { "epoch": 0.2277823352474706, "grad_norm": 0.16159558296203613, "learning_rate": 5e-05, "loss": 1.682, "step": 833 }, { "epoch": 0.2280557834290402, "grad_norm": 0.14523838460445404, "learning_rate": 5e-05, "loss": 1.5887, "step": 834 }, { "epoch": 0.22832923161060978, "grad_norm": 0.16024388372898102, "learning_rate": 5e-05, "loss": 1.7047, "step": 835 }, { "epoch": 0.22860267979217938, "grad_norm": 0.15451057255268097, "learning_rate": 5e-05, "loss": 1.6663, "step": 836 }, { "epoch": 0.22887612797374898, "grad_norm": 0.14806225895881653, "learning_rate": 5e-05, "loss": 1.6782, "step": 837 }, { "epoch": 0.22914957615531856, "grad_norm": 0.15798652172088623, "learning_rate": 5e-05, "loss": 1.7092, "step": 838 }, { "epoch": 0.22942302433688816, "grad_norm": 0.15452131628990173, "learning_rate": 5e-05, "loss": 1.7137, "step": 839 }, { "epoch": 0.22969647251845776, "grad_norm": 0.15365323424339294, "learning_rate": 5e-05, "loss": 1.7464, "step": 840 }, { "epoch": 0.22996992070002734, "grad_norm": 0.14897583425045013, "learning_rate": 5e-05, "loss": 1.6712, "step": 841 }, { "epoch": 0.23024336888159694, "grad_norm": 0.1519531011581421, "learning_rate": 5e-05, "loss": 1.7296, "step": 842 }, { "epoch": 0.23051681706316654, "grad_norm": 0.14781694114208221, "learning_rate": 5e-05, "loss": 1.6932, "step": 843 }, { "epoch": 0.23079026524473611, "grad_norm": 0.1538834124803543, "learning_rate": 5e-05, "loss": 1.6682, "step": 844 }, { "epoch": 0.23106371342630572, "grad_norm": 0.14690068364143372, "learning_rate": 5e-05, "loss": 1.6194, "step": 845 }, { "epoch": 0.23133716160787532, "grad_norm": 0.14810669422149658, "learning_rate": 5e-05, "loss": 1.6565, "step": 846 }, { "epoch": 0.2316106097894449, "grad_norm": 0.1599397510290146, "learning_rate": 5e-05, "loss": 1.7253, "step": 847 }, { "epoch": 0.2318840579710145, "grad_norm": 0.14897406101226807, "learning_rate": 5e-05, "loss": 1.6383, "step": 848 }, { "epoch": 0.2321575061525841, "grad_norm": 0.14833606779575348, "learning_rate": 5e-05, "loss": 1.5975, "step": 849 }, { "epoch": 0.23243095433415367, "grad_norm": 0.15134021639823914, "learning_rate": 5e-05, "loss": 1.6181, "step": 850 }, { "epoch": 0.23270440251572327, "grad_norm": 0.1754770278930664, "learning_rate": 5e-05, "loss": 1.6905, "step": 851 }, { "epoch": 0.23297785069729288, "grad_norm": 0.15956006944179535, "learning_rate": 5e-05, "loss": 1.7139, "step": 852 }, { "epoch": 0.23325129887886245, "grad_norm": 0.1927836388349533, "learning_rate": 5e-05, "loss": 1.7274, "step": 853 }, { "epoch": 0.23352474706043205, "grad_norm": 0.17408336699008942, "learning_rate": 5e-05, "loss": 1.6502, "step": 854 }, { "epoch": 0.23379819524200163, "grad_norm": 0.15393292903900146, "learning_rate": 5e-05, "loss": 1.6537, "step": 855 }, { "epoch": 0.23407164342357123, "grad_norm": 0.1970151662826538, "learning_rate": 5e-05, "loss": 1.6247, "step": 856 }, { "epoch": 0.23434509160514083, "grad_norm": 0.14329130947589874, "learning_rate": 5e-05, "loss": 1.5819, "step": 857 }, { "epoch": 0.2346185397867104, "grad_norm": 0.14831414818763733, "learning_rate": 5e-05, "loss": 1.6632, "step": 858 }, { "epoch": 0.23489198796828, "grad_norm": 0.16687729954719543, "learning_rate": 5e-05, "loss": 1.6668, "step": 859 }, { "epoch": 0.2351654361498496, "grad_norm": 0.14544977247714996, "learning_rate": 5e-05, "loss": 1.6716, "step": 860 }, { "epoch": 0.23543888433141918, "grad_norm": 0.15175144374370575, "learning_rate": 5e-05, "loss": 1.7159, "step": 861 }, { "epoch": 0.2357123325129888, "grad_norm": 0.17007999122142792, "learning_rate": 5e-05, "loss": 1.8236, "step": 862 }, { "epoch": 0.2359857806945584, "grad_norm": 0.1416562795639038, "learning_rate": 5e-05, "loss": 1.6291, "step": 863 }, { "epoch": 0.23625922887612796, "grad_norm": 0.16543252766132355, "learning_rate": 5e-05, "loss": 1.695, "step": 864 }, { "epoch": 0.23653267705769757, "grad_norm": 0.16213998198509216, "learning_rate": 5e-05, "loss": 1.7187, "step": 865 }, { "epoch": 0.23680612523926717, "grad_norm": 0.15842589735984802, "learning_rate": 5e-05, "loss": 1.7074, "step": 866 }, { "epoch": 0.23707957342083674, "grad_norm": 0.16753311455249786, "learning_rate": 5e-05, "loss": 1.6487, "step": 867 }, { "epoch": 0.23735302160240634, "grad_norm": 0.15180423855781555, "learning_rate": 5e-05, "loss": 1.6396, "step": 868 }, { "epoch": 0.23762646978397595, "grad_norm": 0.1585211455821991, "learning_rate": 5e-05, "loss": 1.6959, "step": 869 }, { "epoch": 0.23789991796554552, "grad_norm": 0.1608756184577942, "learning_rate": 5e-05, "loss": 1.7013, "step": 870 }, { "epoch": 0.23817336614711512, "grad_norm": 0.14348694682121277, "learning_rate": 5e-05, "loss": 1.5302, "step": 871 }, { "epoch": 0.23844681432868473, "grad_norm": 0.14560513198375702, "learning_rate": 5e-05, "loss": 1.6678, "step": 872 }, { "epoch": 0.2387202625102543, "grad_norm": 0.1609930694103241, "learning_rate": 5e-05, "loss": 1.6599, "step": 873 }, { "epoch": 0.2389937106918239, "grad_norm": 0.15456150472164154, "learning_rate": 5e-05, "loss": 1.6087, "step": 874 }, { "epoch": 0.2392671588733935, "grad_norm": 0.14654546976089478, "learning_rate": 5e-05, "loss": 1.6199, "step": 875 }, { "epoch": 0.23954060705496308, "grad_norm": 0.14911627769470215, "learning_rate": 5e-05, "loss": 1.7099, "step": 876 }, { "epoch": 0.23981405523653268, "grad_norm": 0.17598745226860046, "learning_rate": 5e-05, "loss": 1.6854, "step": 877 }, { "epoch": 0.24008750341810228, "grad_norm": 0.1485157608985901, "learning_rate": 5e-05, "loss": 1.652, "step": 878 }, { "epoch": 0.24036095159967186, "grad_norm": 0.19263607263565063, "learning_rate": 5e-05, "loss": 1.6623, "step": 879 }, { "epoch": 0.24063439978124146, "grad_norm": 0.15963439643383026, "learning_rate": 5e-05, "loss": 1.7693, "step": 880 }, { "epoch": 0.24090784796281103, "grad_norm": 0.1660141795873642, "learning_rate": 5e-05, "loss": 1.5823, "step": 881 }, { "epoch": 0.24118129614438064, "grad_norm": 0.16761627793312073, "learning_rate": 5e-05, "loss": 1.6647, "step": 882 }, { "epoch": 0.24145474432595024, "grad_norm": 0.1527351289987564, "learning_rate": 5e-05, "loss": 1.5621, "step": 883 }, { "epoch": 0.2417281925075198, "grad_norm": 0.1732681542634964, "learning_rate": 5e-05, "loss": 1.8039, "step": 884 }, { "epoch": 0.24200164068908941, "grad_norm": 0.15724296867847443, "learning_rate": 5e-05, "loss": 1.5098, "step": 885 }, { "epoch": 0.24227508887065902, "grad_norm": 0.1623707413673401, "learning_rate": 5e-05, "loss": 1.7287, "step": 886 }, { "epoch": 0.2425485370522286, "grad_norm": 0.15608251094818115, "learning_rate": 5e-05, "loss": 1.6904, "step": 887 }, { "epoch": 0.2428219852337982, "grad_norm": 0.16214315593242645, "learning_rate": 5e-05, "loss": 1.6739, "step": 888 }, { "epoch": 0.2430954334153678, "grad_norm": 0.15404769778251648, "learning_rate": 5e-05, "loss": 1.6115, "step": 889 }, { "epoch": 0.24336888159693737, "grad_norm": 0.17034713923931122, "learning_rate": 5e-05, "loss": 1.6412, "step": 890 }, { "epoch": 0.24364232977850697, "grad_norm": 0.14412027597427368, "learning_rate": 5e-05, "loss": 1.5481, "step": 891 }, { "epoch": 0.24391577796007657, "grad_norm": 0.1591709554195404, "learning_rate": 5e-05, "loss": 1.7115, "step": 892 }, { "epoch": 0.24418922614164615, "grad_norm": 0.18128591775894165, "learning_rate": 5e-05, "loss": 1.6538, "step": 893 }, { "epoch": 0.24446267432321575, "grad_norm": 0.1539849489927292, "learning_rate": 5e-05, "loss": 1.6899, "step": 894 }, { "epoch": 0.24473612250478535, "grad_norm": 0.14450113475322723, "learning_rate": 5e-05, "loss": 1.5931, "step": 895 }, { "epoch": 0.24500957068635493, "grad_norm": 0.152107372879982, "learning_rate": 5e-05, "loss": 1.6001, "step": 896 }, { "epoch": 0.24528301886792453, "grad_norm": 0.14510129392147064, "learning_rate": 5e-05, "loss": 1.6048, "step": 897 }, { "epoch": 0.24555646704949413, "grad_norm": 0.1490333080291748, "learning_rate": 5e-05, "loss": 1.6107, "step": 898 }, { "epoch": 0.2458299152310637, "grad_norm": 0.14951343834400177, "learning_rate": 5e-05, "loss": 1.7005, "step": 899 }, { "epoch": 0.2461033634126333, "grad_norm": 0.1492195427417755, "learning_rate": 5e-05, "loss": 1.6831, "step": 900 }, { "epoch": 0.2463768115942029, "grad_norm": 0.15410034358501434, "learning_rate": 5e-05, "loss": 1.6933, "step": 901 }, { "epoch": 0.24665025977577248, "grad_norm": 0.14823299646377563, "learning_rate": 5e-05, "loss": 1.637, "step": 902 }, { "epoch": 0.2469237079573421, "grad_norm": 0.14969344437122345, "learning_rate": 5e-05, "loss": 1.6546, "step": 903 }, { "epoch": 0.2471971561389117, "grad_norm": 0.14941661059856415, "learning_rate": 5e-05, "loss": 1.6124, "step": 904 }, { "epoch": 0.24747060432048126, "grad_norm": 0.1853816956281662, "learning_rate": 5e-05, "loss": 1.6285, "step": 905 }, { "epoch": 0.24774405250205087, "grad_norm": 0.15117646753787994, "learning_rate": 5e-05, "loss": 1.6274, "step": 906 }, { "epoch": 0.24801750068362047, "grad_norm": 0.1522308588027954, "learning_rate": 5e-05, "loss": 1.735, "step": 907 }, { "epoch": 0.24829094886519004, "grad_norm": 0.13882996141910553, "learning_rate": 5e-05, "loss": 1.5867, "step": 908 }, { "epoch": 0.24856439704675964, "grad_norm": 0.1577073335647583, "learning_rate": 5e-05, "loss": 1.6647, "step": 909 }, { "epoch": 0.24883784522832922, "grad_norm": 0.15113559365272522, "learning_rate": 5e-05, "loss": 1.6303, "step": 910 }, { "epoch": 0.24911129340989882, "grad_norm": 0.15019430220127106, "learning_rate": 5e-05, "loss": 1.6976, "step": 911 }, { "epoch": 0.24938474159146842, "grad_norm": 0.14112697541713715, "learning_rate": 5e-05, "loss": 1.54, "step": 912 }, { "epoch": 0.249658189773038, "grad_norm": 0.14087800681591034, "learning_rate": 5e-05, "loss": 1.5316, "step": 913 }, { "epoch": 0.2499316379546076, "grad_norm": 0.14896658062934875, "learning_rate": 5e-05, "loss": 1.561, "step": 914 }, { "epoch": 0.2502050861361772, "grad_norm": 0.15148289501667023, "learning_rate": 5e-05, "loss": 1.6698, "step": 915 }, { "epoch": 0.2504785343177468, "grad_norm": 0.1539052277803421, "learning_rate": 5e-05, "loss": 1.6017, "step": 916 }, { "epoch": 0.2507519824993164, "grad_norm": 0.16018076241016388, "learning_rate": 5e-05, "loss": 1.6956, "step": 917 }, { "epoch": 0.25102543068088595, "grad_norm": 0.17696644365787506, "learning_rate": 5e-05, "loss": 1.7196, "step": 918 }, { "epoch": 0.25129887886245555, "grad_norm": 0.1509835124015808, "learning_rate": 5e-05, "loss": 1.6243, "step": 919 }, { "epoch": 0.25157232704402516, "grad_norm": 0.15202875435352325, "learning_rate": 5e-05, "loss": 1.6868, "step": 920 }, { "epoch": 0.25184577522559476, "grad_norm": 0.16320888698101044, "learning_rate": 5e-05, "loss": 1.697, "step": 921 }, { "epoch": 0.25211922340716436, "grad_norm": 0.15281890332698822, "learning_rate": 5e-05, "loss": 1.5824, "step": 922 }, { "epoch": 0.2523926715887339, "grad_norm": 0.1686651110649109, "learning_rate": 5e-05, "loss": 1.6362, "step": 923 }, { "epoch": 0.2526661197703035, "grad_norm": 0.16698114573955536, "learning_rate": 5e-05, "loss": 1.6557, "step": 924 }, { "epoch": 0.2529395679518731, "grad_norm": 0.16273106634616852, "learning_rate": 5e-05, "loss": 1.7441, "step": 925 }, { "epoch": 0.2532130161334427, "grad_norm": 0.16961312294006348, "learning_rate": 5e-05, "loss": 1.7053, "step": 926 }, { "epoch": 0.2534864643150123, "grad_norm": 0.1489211916923523, "learning_rate": 5e-05, "loss": 1.6257, "step": 927 }, { "epoch": 0.2537599124965819, "grad_norm": 0.15058746933937073, "learning_rate": 5e-05, "loss": 1.61, "step": 928 }, { "epoch": 0.25403336067815147, "grad_norm": 0.16194166243076324, "learning_rate": 5e-05, "loss": 1.6667, "step": 929 }, { "epoch": 0.25430680885972107, "grad_norm": 0.1765557825565338, "learning_rate": 5e-05, "loss": 1.6572, "step": 930 }, { "epoch": 0.25458025704129067, "grad_norm": 0.14746791124343872, "learning_rate": 5e-05, "loss": 1.6596, "step": 931 }, { "epoch": 0.25485370522286027, "grad_norm": 0.14352820813655853, "learning_rate": 5e-05, "loss": 1.5291, "step": 932 }, { "epoch": 0.2551271534044299, "grad_norm": 0.15766541659832, "learning_rate": 5e-05, "loss": 1.6496, "step": 933 }, { "epoch": 0.2554006015859995, "grad_norm": 0.1461249589920044, "learning_rate": 5e-05, "loss": 1.6093, "step": 934 }, { "epoch": 0.255674049767569, "grad_norm": 0.15647587180137634, "learning_rate": 5e-05, "loss": 1.7245, "step": 935 }, { "epoch": 0.2559474979491386, "grad_norm": 0.16043874621391296, "learning_rate": 5e-05, "loss": 1.7584, "step": 936 }, { "epoch": 0.2562209461307082, "grad_norm": 0.16398166120052338, "learning_rate": 5e-05, "loss": 1.7888, "step": 937 }, { "epoch": 0.25649439431227783, "grad_norm": 0.1568370908498764, "learning_rate": 5e-05, "loss": 1.6886, "step": 938 }, { "epoch": 0.25676784249384743, "grad_norm": 0.14432042837142944, "learning_rate": 5e-05, "loss": 1.5691, "step": 939 }, { "epoch": 0.25704129067541703, "grad_norm": 0.15498638153076172, "learning_rate": 5e-05, "loss": 1.6414, "step": 940 }, { "epoch": 0.2573147388569866, "grad_norm": 0.1573934257030487, "learning_rate": 5e-05, "loss": 1.7171, "step": 941 }, { "epoch": 0.2575881870385562, "grad_norm": 0.16507598757743835, "learning_rate": 5e-05, "loss": 1.7697, "step": 942 }, { "epoch": 0.2578616352201258, "grad_norm": 0.16209067404270172, "learning_rate": 5e-05, "loss": 1.7732, "step": 943 }, { "epoch": 0.2581350834016954, "grad_norm": 0.16477470099925995, "learning_rate": 5e-05, "loss": 1.6439, "step": 944 }, { "epoch": 0.258408531583265, "grad_norm": 0.15217360854148865, "learning_rate": 5e-05, "loss": 1.6341, "step": 945 }, { "epoch": 0.2586819797648346, "grad_norm": 0.16021955013275146, "learning_rate": 5e-05, "loss": 1.6829, "step": 946 }, { "epoch": 0.25895542794640414, "grad_norm": 0.16192196309566498, "learning_rate": 5e-05, "loss": 1.6225, "step": 947 }, { "epoch": 0.25922887612797374, "grad_norm": 0.15227045118808746, "learning_rate": 5e-05, "loss": 1.6503, "step": 948 }, { "epoch": 0.25950232430954334, "grad_norm": 0.17690598964691162, "learning_rate": 5e-05, "loss": 1.7092, "step": 949 }, { "epoch": 0.25977577249111294, "grad_norm": 0.1463916003704071, "learning_rate": 5e-05, "loss": 1.5948, "step": 950 }, { "epoch": 0.26004922067268255, "grad_norm": 0.16608351469039917, "learning_rate": 5e-05, "loss": 1.6339, "step": 951 }, { "epoch": 0.2603226688542521, "grad_norm": 0.16047005355358124, "learning_rate": 5e-05, "loss": 1.6928, "step": 952 }, { "epoch": 0.2605961170358217, "grad_norm": 0.1434023380279541, "learning_rate": 5e-05, "loss": 1.5613, "step": 953 }, { "epoch": 0.2608695652173913, "grad_norm": 0.1558619737625122, "learning_rate": 5e-05, "loss": 1.6868, "step": 954 }, { "epoch": 0.2611430133989609, "grad_norm": 0.14743700623512268, "learning_rate": 5e-05, "loss": 1.612, "step": 955 }, { "epoch": 0.2614164615805305, "grad_norm": 0.15097011625766754, "learning_rate": 5e-05, "loss": 1.5625, "step": 956 }, { "epoch": 0.2616899097621001, "grad_norm": 0.14291320741176605, "learning_rate": 5e-05, "loss": 1.6787, "step": 957 }, { "epoch": 0.26196335794366965, "grad_norm": 0.15289993584156036, "learning_rate": 5e-05, "loss": 1.754, "step": 958 }, { "epoch": 0.26223680612523925, "grad_norm": 0.15618576109409332, "learning_rate": 5e-05, "loss": 1.7212, "step": 959 }, { "epoch": 0.26251025430680885, "grad_norm": 0.14384949207305908, "learning_rate": 5e-05, "loss": 1.6108, "step": 960 }, { "epoch": 0.26278370248837846, "grad_norm": 0.15583495795726776, "learning_rate": 5e-05, "loss": 1.6998, "step": 961 }, { "epoch": 0.26305715066994806, "grad_norm": 0.1544669270515442, "learning_rate": 5e-05, "loss": 1.646, "step": 962 }, { "epoch": 0.26333059885151766, "grad_norm": 0.15387408435344696, "learning_rate": 5e-05, "loss": 1.7415, "step": 963 }, { "epoch": 0.2636040470330872, "grad_norm": 0.15197156369686127, "learning_rate": 5e-05, "loss": 1.6324, "step": 964 }, { "epoch": 0.2638774952146568, "grad_norm": 0.15596544742584229, "learning_rate": 5e-05, "loss": 1.6116, "step": 965 }, { "epoch": 0.2641509433962264, "grad_norm": 0.15174002945423126, "learning_rate": 5e-05, "loss": 1.6552, "step": 966 }, { "epoch": 0.264424391577796, "grad_norm": 0.16541074216365814, "learning_rate": 5e-05, "loss": 1.7092, "step": 967 }, { "epoch": 0.2646978397593656, "grad_norm": 0.15054158866405487, "learning_rate": 5e-05, "loss": 1.6584, "step": 968 }, { "epoch": 0.2649712879409352, "grad_norm": 0.16845272481441498, "learning_rate": 5e-05, "loss": 1.7485, "step": 969 }, { "epoch": 0.26524473612250477, "grad_norm": 0.15951582789421082, "learning_rate": 5e-05, "loss": 1.6003, "step": 970 }, { "epoch": 0.26551818430407437, "grad_norm": 0.15601181983947754, "learning_rate": 5e-05, "loss": 1.6222, "step": 971 }, { "epoch": 0.26579163248564397, "grad_norm": 0.1529085487127304, "learning_rate": 5e-05, "loss": 1.687, "step": 972 }, { "epoch": 0.26606508066721357, "grad_norm": 0.15299002826213837, "learning_rate": 5e-05, "loss": 1.716, "step": 973 }, { "epoch": 0.2663385288487832, "grad_norm": 0.17247562110424042, "learning_rate": 5e-05, "loss": 1.6112, "step": 974 }, { "epoch": 0.2666119770303528, "grad_norm": 0.16275718808174133, "learning_rate": 5e-05, "loss": 1.6481, "step": 975 }, { "epoch": 0.2668854252119223, "grad_norm": 0.18116293847560883, "learning_rate": 5e-05, "loss": 1.5794, "step": 976 }, { "epoch": 0.2671588733934919, "grad_norm": 0.14527344703674316, "learning_rate": 5e-05, "loss": 1.6521, "step": 977 }, { "epoch": 0.2674323215750615, "grad_norm": 0.15483027696609497, "learning_rate": 5e-05, "loss": 1.637, "step": 978 }, { "epoch": 0.26770576975663113, "grad_norm": 0.15565639734268188, "learning_rate": 5e-05, "loss": 1.6142, "step": 979 }, { "epoch": 0.26797921793820073, "grad_norm": 0.15443767607212067, "learning_rate": 5e-05, "loss": 1.5859, "step": 980 }, { "epoch": 0.2682526661197703, "grad_norm": 0.15313716232776642, "learning_rate": 5e-05, "loss": 1.6508, "step": 981 }, { "epoch": 0.2685261143013399, "grad_norm": 0.15430563688278198, "learning_rate": 5e-05, "loss": 1.6352, "step": 982 }, { "epoch": 0.2687995624829095, "grad_norm": 0.15906836092472076, "learning_rate": 5e-05, "loss": 1.7201, "step": 983 }, { "epoch": 0.2690730106644791, "grad_norm": 0.151002898812294, "learning_rate": 5e-05, "loss": 1.7418, "step": 984 }, { "epoch": 0.2693464588460487, "grad_norm": 0.1649433970451355, "learning_rate": 5e-05, "loss": 1.6364, "step": 985 }, { "epoch": 0.2696199070276183, "grad_norm": 0.15000388026237488, "learning_rate": 5e-05, "loss": 1.5924, "step": 986 }, { "epoch": 0.26989335520918784, "grad_norm": 0.1750802844762802, "learning_rate": 5e-05, "loss": 1.621, "step": 987 }, { "epoch": 0.27016680339075744, "grad_norm": 0.14742594957351685, "learning_rate": 5e-05, "loss": 1.6103, "step": 988 }, { "epoch": 0.27044025157232704, "grad_norm": 0.1554790437221527, "learning_rate": 5e-05, "loss": 1.6996, "step": 989 }, { "epoch": 0.27071369975389664, "grad_norm": 0.16735535860061646, "learning_rate": 5e-05, "loss": 1.6705, "step": 990 }, { "epoch": 0.27098714793546624, "grad_norm": 0.1675184816122055, "learning_rate": 5e-05, "loss": 1.7114, "step": 991 }, { "epoch": 0.27126059611703585, "grad_norm": 0.1542404443025589, "learning_rate": 5e-05, "loss": 1.5953, "step": 992 }, { "epoch": 0.2715340442986054, "grad_norm": 0.1605929136276245, "learning_rate": 5e-05, "loss": 1.5541, "step": 993 }, { "epoch": 0.271807492480175, "grad_norm": 0.17429675161838531, "learning_rate": 5e-05, "loss": 1.6393, "step": 994 }, { "epoch": 0.2720809406617446, "grad_norm": 0.16409185528755188, "learning_rate": 5e-05, "loss": 1.7244, "step": 995 }, { "epoch": 0.2723543888433142, "grad_norm": 0.16131426393985748, "learning_rate": 5e-05, "loss": 1.6339, "step": 996 }, { "epoch": 0.2726278370248838, "grad_norm": 0.14714978635311127, "learning_rate": 5e-05, "loss": 1.5154, "step": 997 }, { "epoch": 0.2729012852064534, "grad_norm": 0.14420582354068756, "learning_rate": 5e-05, "loss": 1.654, "step": 998 }, { "epoch": 0.27317473338802295, "grad_norm": 0.15642261505126953, "learning_rate": 5e-05, "loss": 1.5478, "step": 999 }, { "epoch": 0.27344818156959255, "grad_norm": 0.14731954038143158, "learning_rate": 5e-05, "loss": 1.5659, "step": 1000 }, { "epoch": 0.27372162975116215, "grad_norm": 0.16006474196910858, "learning_rate": 5e-05, "loss": 1.6048, "step": 1001 }, { "epoch": 0.27399507793273176, "grad_norm": 0.15757764875888824, "learning_rate": 5e-05, "loss": 1.6572, "step": 1002 }, { "epoch": 0.27426852611430136, "grad_norm": 0.15396282076835632, "learning_rate": 5e-05, "loss": 1.7424, "step": 1003 }, { "epoch": 0.2745419742958709, "grad_norm": 0.15785497426986694, "learning_rate": 5e-05, "loss": 1.7142, "step": 1004 }, { "epoch": 0.2748154224774405, "grad_norm": 0.14254657924175262, "learning_rate": 5e-05, "loss": 1.5812, "step": 1005 }, { "epoch": 0.2750888706590101, "grad_norm": 0.1501447856426239, "learning_rate": 5e-05, "loss": 1.7112, "step": 1006 }, { "epoch": 0.2753623188405797, "grad_norm": 0.16908079385757446, "learning_rate": 5e-05, "loss": 1.7688, "step": 1007 }, { "epoch": 0.2756357670221493, "grad_norm": 0.1612824648618698, "learning_rate": 5e-05, "loss": 1.6473, "step": 1008 }, { "epoch": 0.2759092152037189, "grad_norm": 0.1502557396888733, "learning_rate": 5e-05, "loss": 1.6008, "step": 1009 }, { "epoch": 0.27618266338528846, "grad_norm": 0.1549758017063141, "learning_rate": 5e-05, "loss": 1.6473, "step": 1010 }, { "epoch": 0.27645611156685806, "grad_norm": 0.1512656807899475, "learning_rate": 5e-05, "loss": 1.7336, "step": 1011 }, { "epoch": 0.27672955974842767, "grad_norm": 0.1543295830488205, "learning_rate": 5e-05, "loss": 1.6293, "step": 1012 }, { "epoch": 0.27700300792999727, "grad_norm": 0.15347859263420105, "learning_rate": 5e-05, "loss": 1.589, "step": 1013 }, { "epoch": 0.27727645611156687, "grad_norm": 0.15677101910114288, "learning_rate": 5e-05, "loss": 1.7283, "step": 1014 }, { "epoch": 0.2775499042931365, "grad_norm": 0.16513799130916595, "learning_rate": 5e-05, "loss": 1.6725, "step": 1015 }, { "epoch": 0.277823352474706, "grad_norm": 0.15116244554519653, "learning_rate": 5e-05, "loss": 1.639, "step": 1016 }, { "epoch": 0.2780968006562756, "grad_norm": 0.1464737504720688, "learning_rate": 5e-05, "loss": 1.6598, "step": 1017 }, { "epoch": 0.2783702488378452, "grad_norm": 0.1716412454843521, "learning_rate": 5e-05, "loss": 1.6941, "step": 1018 }, { "epoch": 0.2786436970194148, "grad_norm": 0.15639688074588776, "learning_rate": 5e-05, "loss": 1.6334, "step": 1019 }, { "epoch": 0.27891714520098443, "grad_norm": 0.1594480276107788, "learning_rate": 5e-05, "loss": 1.6916, "step": 1020 }, { "epoch": 0.27919059338255403, "grad_norm": 0.14737538993358612, "learning_rate": 5e-05, "loss": 1.5393, "step": 1021 }, { "epoch": 0.2794640415641236, "grad_norm": 0.15937146544456482, "learning_rate": 5e-05, "loss": 1.7071, "step": 1022 }, { "epoch": 0.2797374897456932, "grad_norm": 0.15143409371376038, "learning_rate": 5e-05, "loss": 1.6276, "step": 1023 }, { "epoch": 0.2800109379272628, "grad_norm": 0.16173769533634186, "learning_rate": 5e-05, "loss": 1.6131, "step": 1024 }, { "epoch": 0.2802843861088324, "grad_norm": 0.14734511077404022, "learning_rate": 5e-05, "loss": 1.6051, "step": 1025 }, { "epoch": 0.280557834290402, "grad_norm": 0.1571275144815445, "learning_rate": 5e-05, "loss": 1.7169, "step": 1026 }, { "epoch": 0.2808312824719716, "grad_norm": 0.15668824315071106, "learning_rate": 5e-05, "loss": 1.8378, "step": 1027 }, { "epoch": 0.28110473065354113, "grad_norm": 0.15768049657344818, "learning_rate": 5e-05, "loss": 1.6911, "step": 1028 }, { "epoch": 0.28137817883511074, "grad_norm": 0.15700852870941162, "learning_rate": 5e-05, "loss": 1.7142, "step": 1029 }, { "epoch": 0.28165162701668034, "grad_norm": 0.14887461066246033, "learning_rate": 5e-05, "loss": 1.6241, "step": 1030 }, { "epoch": 0.28192507519824994, "grad_norm": 0.15155330300331116, "learning_rate": 5e-05, "loss": 1.7063, "step": 1031 }, { "epoch": 0.28219852337981954, "grad_norm": 0.1655176430940628, "learning_rate": 5e-05, "loss": 1.6063, "step": 1032 }, { "epoch": 0.2824719715613891, "grad_norm": 0.1478133499622345, "learning_rate": 5e-05, "loss": 1.6977, "step": 1033 }, { "epoch": 0.2827454197429587, "grad_norm": 0.14731581509113312, "learning_rate": 5e-05, "loss": 1.6238, "step": 1034 }, { "epoch": 0.2830188679245283, "grad_norm": 0.16165371239185333, "learning_rate": 5e-05, "loss": 1.5795, "step": 1035 }, { "epoch": 0.2832923161060979, "grad_norm": 0.1721602976322174, "learning_rate": 5e-05, "loss": 1.7447, "step": 1036 }, { "epoch": 0.2835657642876675, "grad_norm": 0.14430297911167145, "learning_rate": 5e-05, "loss": 1.5664, "step": 1037 }, { "epoch": 0.2838392124692371, "grad_norm": 0.15474802255630493, "learning_rate": 5e-05, "loss": 1.6093, "step": 1038 }, { "epoch": 0.28411266065080665, "grad_norm": 0.16973662376403809, "learning_rate": 5e-05, "loss": 1.601, "step": 1039 }, { "epoch": 0.28438610883237625, "grad_norm": 0.170043483376503, "learning_rate": 5e-05, "loss": 1.6629, "step": 1040 }, { "epoch": 0.28465955701394585, "grad_norm": 0.1592838615179062, "learning_rate": 5e-05, "loss": 1.6638, "step": 1041 }, { "epoch": 0.28493300519551545, "grad_norm": 0.1679082214832306, "learning_rate": 5e-05, "loss": 1.6667, "step": 1042 }, { "epoch": 0.28520645337708506, "grad_norm": 0.17934280633926392, "learning_rate": 5e-05, "loss": 1.6224, "step": 1043 }, { "epoch": 0.28547990155865466, "grad_norm": 0.14445669949054718, "learning_rate": 5e-05, "loss": 1.6861, "step": 1044 }, { "epoch": 0.2857533497402242, "grad_norm": 0.15397311747074127, "learning_rate": 5e-05, "loss": 1.7506, "step": 1045 }, { "epoch": 0.2860267979217938, "grad_norm": 0.15878726541996002, "learning_rate": 5e-05, "loss": 1.7283, "step": 1046 }, { "epoch": 0.2863002461033634, "grad_norm": 0.15634529292583466, "learning_rate": 5e-05, "loss": 1.6385, "step": 1047 }, { "epoch": 0.286573694284933, "grad_norm": 0.15060849487781525, "learning_rate": 5e-05, "loss": 1.6744, "step": 1048 }, { "epoch": 0.2868471424665026, "grad_norm": 0.16258785128593445, "learning_rate": 5e-05, "loss": 1.6701, "step": 1049 }, { "epoch": 0.2871205906480722, "grad_norm": 0.14815811812877655, "learning_rate": 5e-05, "loss": 1.5947, "step": 1050 }, { "epoch": 0.28739403882964176, "grad_norm": 0.17626261711120605, "learning_rate": 5e-05, "loss": 1.6407, "step": 1051 }, { "epoch": 0.28766748701121136, "grad_norm": 0.1558643877506256, "learning_rate": 5e-05, "loss": 1.6002, "step": 1052 }, { "epoch": 0.28794093519278097, "grad_norm": 0.14758311212062836, "learning_rate": 5e-05, "loss": 1.5985, "step": 1053 }, { "epoch": 0.28821438337435057, "grad_norm": 0.16778376698493958, "learning_rate": 5e-05, "loss": 1.6799, "step": 1054 }, { "epoch": 0.28848783155592017, "grad_norm": 0.16925834119319916, "learning_rate": 5e-05, "loss": 1.6951, "step": 1055 }, { "epoch": 0.2887612797374898, "grad_norm": 0.16058339178562164, "learning_rate": 5e-05, "loss": 1.6221, "step": 1056 }, { "epoch": 0.2890347279190593, "grad_norm": 0.16010229289531708, "learning_rate": 5e-05, "loss": 1.6665, "step": 1057 }, { "epoch": 0.2893081761006289, "grad_norm": 0.15044192969799042, "learning_rate": 5e-05, "loss": 1.6247, "step": 1058 }, { "epoch": 0.2895816242821985, "grad_norm": 0.14547835290431976, "learning_rate": 5e-05, "loss": 1.6111, "step": 1059 }, { "epoch": 0.2898550724637681, "grad_norm": 0.15208472311496735, "learning_rate": 5e-05, "loss": 1.7198, "step": 1060 }, { "epoch": 0.29012852064533773, "grad_norm": 0.15193967521190643, "learning_rate": 5e-05, "loss": 1.6775, "step": 1061 }, { "epoch": 0.2904019688269073, "grad_norm": 0.15141284465789795, "learning_rate": 5e-05, "loss": 1.6727, "step": 1062 }, { "epoch": 0.2906754170084769, "grad_norm": 0.16151681542396545, "learning_rate": 5e-05, "loss": 1.7054, "step": 1063 }, { "epoch": 0.2909488651900465, "grad_norm": 0.15598168969154358, "learning_rate": 5e-05, "loss": 1.6536, "step": 1064 }, { "epoch": 0.2912223133716161, "grad_norm": 0.147295281291008, "learning_rate": 5e-05, "loss": 1.658, "step": 1065 }, { "epoch": 0.2914957615531857, "grad_norm": 0.1515830159187317, "learning_rate": 5e-05, "loss": 1.6548, "step": 1066 }, { "epoch": 0.2917692097347553, "grad_norm": 0.1516982913017273, "learning_rate": 5e-05, "loss": 1.6866, "step": 1067 }, { "epoch": 0.29204265791632483, "grad_norm": 0.16418761014938354, "learning_rate": 5e-05, "loss": 1.7676, "step": 1068 }, { "epoch": 0.29231610609789443, "grad_norm": 0.1566287726163864, "learning_rate": 5e-05, "loss": 1.6747, "step": 1069 }, { "epoch": 0.29258955427946404, "grad_norm": 0.16158966720104218, "learning_rate": 5e-05, "loss": 1.6606, "step": 1070 }, { "epoch": 0.29286300246103364, "grad_norm": 0.1478201299905777, "learning_rate": 5e-05, "loss": 1.708, "step": 1071 }, { "epoch": 0.29313645064260324, "grad_norm": 0.15915492177009583, "learning_rate": 5e-05, "loss": 1.6656, "step": 1072 }, { "epoch": 0.29340989882417284, "grad_norm": 0.16170357167720795, "learning_rate": 5e-05, "loss": 1.6458, "step": 1073 }, { "epoch": 0.2936833470057424, "grad_norm": 0.1563751995563507, "learning_rate": 5e-05, "loss": 1.6192, "step": 1074 }, { "epoch": 0.293956795187312, "grad_norm": 0.1516135334968567, "learning_rate": 5e-05, "loss": 1.6441, "step": 1075 }, { "epoch": 0.2942302433688816, "grad_norm": 0.14756453037261963, "learning_rate": 5e-05, "loss": 1.5897, "step": 1076 }, { "epoch": 0.2945036915504512, "grad_norm": 0.1514975130558014, "learning_rate": 5e-05, "loss": 1.6956, "step": 1077 }, { "epoch": 0.2947771397320208, "grad_norm": 0.1433197408914566, "learning_rate": 5e-05, "loss": 1.5484, "step": 1078 }, { "epoch": 0.2950505879135904, "grad_norm": 0.14783865213394165, "learning_rate": 5e-05, "loss": 1.6135, "step": 1079 }, { "epoch": 0.29532403609515995, "grad_norm": 0.1563805192708969, "learning_rate": 5e-05, "loss": 1.6578, "step": 1080 }, { "epoch": 0.29559748427672955, "grad_norm": 0.15550269186496735, "learning_rate": 5e-05, "loss": 1.6739, "step": 1081 }, { "epoch": 0.29587093245829915, "grad_norm": 0.1647748500108719, "learning_rate": 5e-05, "loss": 1.8268, "step": 1082 }, { "epoch": 0.29614438063986875, "grad_norm": 0.14956046640872955, "learning_rate": 5e-05, "loss": 1.6989, "step": 1083 }, { "epoch": 0.29641782882143836, "grad_norm": 0.1558298021554947, "learning_rate": 5e-05, "loss": 1.6784, "step": 1084 }, { "epoch": 0.2966912770030079, "grad_norm": 0.16264335811138153, "learning_rate": 5e-05, "loss": 1.6733, "step": 1085 }, { "epoch": 0.2969647251845775, "grad_norm": 0.15490394830703735, "learning_rate": 5e-05, "loss": 1.6337, "step": 1086 }, { "epoch": 0.2972381733661471, "grad_norm": 0.15151673555374146, "learning_rate": 5e-05, "loss": 1.6566, "step": 1087 }, { "epoch": 0.2975116215477167, "grad_norm": 0.15661917626857758, "learning_rate": 5e-05, "loss": 1.6329, "step": 1088 }, { "epoch": 0.2977850697292863, "grad_norm": 0.1620190590620041, "learning_rate": 5e-05, "loss": 1.6779, "step": 1089 }, { "epoch": 0.2980585179108559, "grad_norm": 0.15718665719032288, "learning_rate": 5e-05, "loss": 1.6008, "step": 1090 }, { "epoch": 0.29833196609242546, "grad_norm": 0.15119051933288574, "learning_rate": 5e-05, "loss": 1.6514, "step": 1091 }, { "epoch": 0.29860541427399506, "grad_norm": 0.172483429312706, "learning_rate": 5e-05, "loss": 1.6253, "step": 1092 }, { "epoch": 0.29887886245556466, "grad_norm": 0.17236994206905365, "learning_rate": 5e-05, "loss": 1.6927, "step": 1093 }, { "epoch": 0.29915231063713427, "grad_norm": 0.1658979058265686, "learning_rate": 5e-05, "loss": 1.6684, "step": 1094 }, { "epoch": 0.29942575881870387, "grad_norm": 0.16414695978164673, "learning_rate": 5e-05, "loss": 1.5933, "step": 1095 }, { "epoch": 0.29969920700027347, "grad_norm": 0.156681627035141, "learning_rate": 5e-05, "loss": 1.6016, "step": 1096 }, { "epoch": 0.299972655181843, "grad_norm": 0.14425235986709595, "learning_rate": 5e-05, "loss": 1.5941, "step": 1097 }, { "epoch": 0.3002461033634126, "grad_norm": 0.16902539134025574, "learning_rate": 5e-05, "loss": 1.5992, "step": 1098 }, { "epoch": 0.3005195515449822, "grad_norm": 0.17275625467300415, "learning_rate": 5e-05, "loss": 1.7861, "step": 1099 }, { "epoch": 0.3007929997265518, "grad_norm": 0.15677183866500854, "learning_rate": 5e-05, "loss": 1.6925, "step": 1100 }, { "epoch": 0.3010664479081214, "grad_norm": 0.1713411509990692, "learning_rate": 5e-05, "loss": 1.6661, "step": 1101 }, { "epoch": 0.30133989608969103, "grad_norm": 0.15756134688854218, "learning_rate": 5e-05, "loss": 1.6203, "step": 1102 }, { "epoch": 0.3016133442712606, "grad_norm": 0.15823891758918762, "learning_rate": 5e-05, "loss": 1.6594, "step": 1103 }, { "epoch": 0.3018867924528302, "grad_norm": 0.15777519345283508, "learning_rate": 5e-05, "loss": 1.6548, "step": 1104 }, { "epoch": 0.3021602406343998, "grad_norm": 0.15784233808517456, "learning_rate": 5e-05, "loss": 1.6849, "step": 1105 }, { "epoch": 0.3024336888159694, "grad_norm": 0.1561124622821808, "learning_rate": 5e-05, "loss": 1.6548, "step": 1106 }, { "epoch": 0.302707136997539, "grad_norm": 0.1538151651620865, "learning_rate": 5e-05, "loss": 1.6838, "step": 1107 }, { "epoch": 0.3029805851791086, "grad_norm": 0.14938168227672577, "learning_rate": 5e-05, "loss": 1.6129, "step": 1108 }, { "epoch": 0.30325403336067813, "grad_norm": 0.1569468080997467, "learning_rate": 5e-05, "loss": 1.6651, "step": 1109 }, { "epoch": 0.30352748154224773, "grad_norm": 0.15352605283260345, "learning_rate": 5e-05, "loss": 1.7078, "step": 1110 }, { "epoch": 0.30380092972381734, "grad_norm": 0.15539826452732086, "learning_rate": 5e-05, "loss": 1.643, "step": 1111 }, { "epoch": 0.30407437790538694, "grad_norm": 0.14606958627700806, "learning_rate": 5e-05, "loss": 1.6365, "step": 1112 }, { "epoch": 0.30434782608695654, "grad_norm": 0.15335801243782043, "learning_rate": 5e-05, "loss": 1.6818, "step": 1113 }, { "epoch": 0.3046212742685261, "grad_norm": 0.15371185541152954, "learning_rate": 5e-05, "loss": 1.6413, "step": 1114 }, { "epoch": 0.3048947224500957, "grad_norm": 0.15357623994350433, "learning_rate": 5e-05, "loss": 1.6887, "step": 1115 }, { "epoch": 0.3051681706316653, "grad_norm": 0.14678330719470978, "learning_rate": 5e-05, "loss": 1.7126, "step": 1116 }, { "epoch": 0.3054416188132349, "grad_norm": 0.1587059050798416, "learning_rate": 5e-05, "loss": 1.6373, "step": 1117 }, { "epoch": 0.3057150669948045, "grad_norm": 0.1466260552406311, "learning_rate": 5e-05, "loss": 1.6006, "step": 1118 }, { "epoch": 0.3059885151763741, "grad_norm": 0.14602194726467133, "learning_rate": 5e-05, "loss": 1.6627, "step": 1119 }, { "epoch": 0.30626196335794365, "grad_norm": 0.1472383588552475, "learning_rate": 5e-05, "loss": 1.5288, "step": 1120 }, { "epoch": 0.30653541153951325, "grad_norm": 0.1505843549966812, "learning_rate": 5e-05, "loss": 1.6624, "step": 1121 }, { "epoch": 0.30680885972108285, "grad_norm": 0.15707598626613617, "learning_rate": 5e-05, "loss": 1.6482, "step": 1122 }, { "epoch": 0.30708230790265245, "grad_norm": 0.15016111731529236, "learning_rate": 5e-05, "loss": 1.5778, "step": 1123 }, { "epoch": 0.30735575608422205, "grad_norm": 0.1513780951499939, "learning_rate": 5e-05, "loss": 1.5779, "step": 1124 }, { "epoch": 0.30762920426579166, "grad_norm": 0.15368494391441345, "learning_rate": 5e-05, "loss": 1.6531, "step": 1125 }, { "epoch": 0.3079026524473612, "grad_norm": 0.1544412523508072, "learning_rate": 5e-05, "loss": 1.7159, "step": 1126 }, { "epoch": 0.3081761006289308, "grad_norm": 0.14770282804965973, "learning_rate": 5e-05, "loss": 1.6338, "step": 1127 }, { "epoch": 0.3084495488105004, "grad_norm": 0.14697682857513428, "learning_rate": 5e-05, "loss": 1.6817, "step": 1128 }, { "epoch": 0.30872299699207, "grad_norm": 0.15029069781303406, "learning_rate": 5e-05, "loss": 1.7186, "step": 1129 }, { "epoch": 0.3089964451736396, "grad_norm": 0.15909984707832336, "learning_rate": 5e-05, "loss": 1.7119, "step": 1130 }, { "epoch": 0.3092698933552092, "grad_norm": 0.14838500320911407, "learning_rate": 5e-05, "loss": 1.615, "step": 1131 }, { "epoch": 0.30954334153677876, "grad_norm": 0.15336006879806519, "learning_rate": 5e-05, "loss": 1.6544, "step": 1132 }, { "epoch": 0.30981678971834836, "grad_norm": 0.15481799840927124, "learning_rate": 5e-05, "loss": 1.6279, "step": 1133 }, { "epoch": 0.31009023789991796, "grad_norm": 0.1593121588230133, "learning_rate": 5e-05, "loss": 1.6697, "step": 1134 }, { "epoch": 0.31036368608148757, "grad_norm": 0.16930562257766724, "learning_rate": 5e-05, "loss": 1.6706, "step": 1135 }, { "epoch": 0.31063713426305717, "grad_norm": 0.15615466237068176, "learning_rate": 5e-05, "loss": 1.6018, "step": 1136 }, { "epoch": 0.31091058244462677, "grad_norm": 0.15975496172904968, "learning_rate": 5e-05, "loss": 1.696, "step": 1137 }, { "epoch": 0.3111840306261963, "grad_norm": 0.14851494133472443, "learning_rate": 5e-05, "loss": 1.5931, "step": 1138 }, { "epoch": 0.3114574788077659, "grad_norm": 0.1657806932926178, "learning_rate": 5e-05, "loss": 1.7412, "step": 1139 }, { "epoch": 0.3117309269893355, "grad_norm": 0.15335924923419952, "learning_rate": 5e-05, "loss": 1.6903, "step": 1140 }, { "epoch": 0.3120043751709051, "grad_norm": 0.15214680135250092, "learning_rate": 5e-05, "loss": 1.7338, "step": 1141 }, { "epoch": 0.3122778233524747, "grad_norm": 0.16068771481513977, "learning_rate": 5e-05, "loss": 1.712, "step": 1142 }, { "epoch": 0.3125512715340443, "grad_norm": 0.15814535319805145, "learning_rate": 5e-05, "loss": 1.7013, "step": 1143 }, { "epoch": 0.3128247197156139, "grad_norm": 0.1487572193145752, "learning_rate": 5e-05, "loss": 1.6186, "step": 1144 }, { "epoch": 0.3130981678971835, "grad_norm": 0.15032756328582764, "learning_rate": 5e-05, "loss": 1.6842, "step": 1145 }, { "epoch": 0.3133716160787531, "grad_norm": 0.15075697004795074, "learning_rate": 5e-05, "loss": 1.536, "step": 1146 }, { "epoch": 0.3136450642603227, "grad_norm": 0.15556646883487701, "learning_rate": 5e-05, "loss": 1.6043, "step": 1147 }, { "epoch": 0.3139185124418923, "grad_norm": 0.16194835305213928, "learning_rate": 5e-05, "loss": 1.7423, "step": 1148 }, { "epoch": 0.31419196062346183, "grad_norm": 0.1608458310365677, "learning_rate": 5e-05, "loss": 1.6599, "step": 1149 }, { "epoch": 0.31446540880503143, "grad_norm": 0.16816149652004242, "learning_rate": 5e-05, "loss": 1.7054, "step": 1150 }, { "epoch": 0.31473885698660103, "grad_norm": 0.14813366532325745, "learning_rate": 5e-05, "loss": 1.7635, "step": 1151 }, { "epoch": 0.31501230516817064, "grad_norm": 0.16501514613628387, "learning_rate": 5e-05, "loss": 1.6414, "step": 1152 }, { "epoch": 0.31528575334974024, "grad_norm": 0.15683704614639282, "learning_rate": 5e-05, "loss": 1.6307, "step": 1153 }, { "epoch": 0.31555920153130984, "grad_norm": 0.1599043607711792, "learning_rate": 5e-05, "loss": 1.678, "step": 1154 }, { "epoch": 0.3158326497128794, "grad_norm": 0.15843378007411957, "learning_rate": 5e-05, "loss": 1.6729, "step": 1155 }, { "epoch": 0.316106097894449, "grad_norm": 0.14226911962032318, "learning_rate": 5e-05, "loss": 1.584, "step": 1156 }, { "epoch": 0.3163795460760186, "grad_norm": 0.17751207947731018, "learning_rate": 5e-05, "loss": 1.6461, "step": 1157 }, { "epoch": 0.3166529942575882, "grad_norm": 0.1599876433610916, "learning_rate": 5e-05, "loss": 1.698, "step": 1158 }, { "epoch": 0.3169264424391578, "grad_norm": 0.14740917086601257, "learning_rate": 5e-05, "loss": 1.6424, "step": 1159 }, { "epoch": 0.3171998906207274, "grad_norm": 0.15979237854480743, "learning_rate": 5e-05, "loss": 1.6335, "step": 1160 }, { "epoch": 0.31747333880229694, "grad_norm": 0.16154277324676514, "learning_rate": 5e-05, "loss": 1.7442, "step": 1161 }, { "epoch": 0.31774678698386655, "grad_norm": 0.15989361703395844, "learning_rate": 5e-05, "loss": 1.639, "step": 1162 }, { "epoch": 0.31802023516543615, "grad_norm": 0.15814067423343658, "learning_rate": 5e-05, "loss": 1.7051, "step": 1163 }, { "epoch": 0.31829368334700575, "grad_norm": 0.15459835529327393, "learning_rate": 5e-05, "loss": 1.5727, "step": 1164 }, { "epoch": 0.31856713152857535, "grad_norm": 0.15153050422668457, "learning_rate": 5e-05, "loss": 1.6371, "step": 1165 }, { "epoch": 0.3188405797101449, "grad_norm": 0.1500144898891449, "learning_rate": 5e-05, "loss": 1.6598, "step": 1166 }, { "epoch": 0.3191140278917145, "grad_norm": 0.15817582607269287, "learning_rate": 5e-05, "loss": 1.6488, "step": 1167 }, { "epoch": 0.3193874760732841, "grad_norm": 0.15822099149227142, "learning_rate": 5e-05, "loss": 1.5906, "step": 1168 }, { "epoch": 0.3196609242548537, "grad_norm": 0.14896897971630096, "learning_rate": 5e-05, "loss": 1.6791, "step": 1169 }, { "epoch": 0.3199343724364233, "grad_norm": 0.1585737019777298, "learning_rate": 5e-05, "loss": 1.7674, "step": 1170 }, { "epoch": 0.3202078206179929, "grad_norm": 0.16357015073299408, "learning_rate": 5e-05, "loss": 1.6621, "step": 1171 }, { "epoch": 0.32048126879956246, "grad_norm": 0.14537674188613892, "learning_rate": 5e-05, "loss": 1.5276, "step": 1172 }, { "epoch": 0.32075471698113206, "grad_norm": 0.15648779273033142, "learning_rate": 5e-05, "loss": 1.6323, "step": 1173 }, { "epoch": 0.32102816516270166, "grad_norm": 0.14875225722789764, "learning_rate": 5e-05, "loss": 1.6257, "step": 1174 }, { "epoch": 0.32130161334427126, "grad_norm": 0.14988994598388672, "learning_rate": 5e-05, "loss": 1.642, "step": 1175 }, { "epoch": 0.32157506152584087, "grad_norm": 0.14954978227615356, "learning_rate": 5e-05, "loss": 1.6477, "step": 1176 }, { "epoch": 0.32184850970741047, "grad_norm": 0.15913893282413483, "learning_rate": 5e-05, "loss": 1.7523, "step": 1177 }, { "epoch": 0.32212195788898, "grad_norm": 0.17450571060180664, "learning_rate": 5e-05, "loss": 1.7723, "step": 1178 }, { "epoch": 0.3223954060705496, "grad_norm": 0.14600080251693726, "learning_rate": 5e-05, "loss": 1.6497, "step": 1179 }, { "epoch": 0.3226688542521192, "grad_norm": 0.14759889245033264, "learning_rate": 5e-05, "loss": 1.5792, "step": 1180 }, { "epoch": 0.3229423024336888, "grad_norm": 0.17649635672569275, "learning_rate": 5e-05, "loss": 1.7302, "step": 1181 }, { "epoch": 0.3232157506152584, "grad_norm": 0.15906043350696564, "learning_rate": 5e-05, "loss": 1.6046, "step": 1182 }, { "epoch": 0.323489198796828, "grad_norm": 0.14562730491161346, "learning_rate": 5e-05, "loss": 1.566, "step": 1183 }, { "epoch": 0.3237626469783976, "grad_norm": 0.15766160190105438, "learning_rate": 5e-05, "loss": 1.6397, "step": 1184 }, { "epoch": 0.3240360951599672, "grad_norm": 0.16190050542354584, "learning_rate": 5e-05, "loss": 1.5876, "step": 1185 }, { "epoch": 0.3243095433415368, "grad_norm": 0.15443246066570282, "learning_rate": 5e-05, "loss": 1.6817, "step": 1186 }, { "epoch": 0.3245829915231064, "grad_norm": 0.1686965525150299, "learning_rate": 5e-05, "loss": 1.5725, "step": 1187 }, { "epoch": 0.324856439704676, "grad_norm": 0.16837440431118011, "learning_rate": 5e-05, "loss": 1.681, "step": 1188 }, { "epoch": 0.3251298878862456, "grad_norm": 0.15975917875766754, "learning_rate": 5e-05, "loss": 1.6988, "step": 1189 }, { "epoch": 0.32540333606781513, "grad_norm": 0.16166743636131287, "learning_rate": 5e-05, "loss": 1.6254, "step": 1190 }, { "epoch": 0.32567678424938473, "grad_norm": 0.15348750352859497, "learning_rate": 5e-05, "loss": 1.6757, "step": 1191 }, { "epoch": 0.32595023243095433, "grad_norm": 0.16833312809467316, "learning_rate": 5e-05, "loss": 1.6262, "step": 1192 }, { "epoch": 0.32622368061252394, "grad_norm": 0.1633581817150116, "learning_rate": 5e-05, "loss": 1.7312, "step": 1193 }, { "epoch": 0.32649712879409354, "grad_norm": 0.14953148365020752, "learning_rate": 5e-05, "loss": 1.5993, "step": 1194 }, { "epoch": 0.3267705769756631, "grad_norm": 0.1685009002685547, "learning_rate": 5e-05, "loss": 1.7026, "step": 1195 }, { "epoch": 0.3270440251572327, "grad_norm": 0.1520211100578308, "learning_rate": 5e-05, "loss": 1.6178, "step": 1196 }, { "epoch": 0.3273174733388023, "grad_norm": 0.15548743307590485, "learning_rate": 5e-05, "loss": 1.6335, "step": 1197 }, { "epoch": 0.3275909215203719, "grad_norm": 0.1540190726518631, "learning_rate": 5e-05, "loss": 1.7661, "step": 1198 }, { "epoch": 0.3278643697019415, "grad_norm": 0.15100663900375366, "learning_rate": 5e-05, "loss": 1.5361, "step": 1199 }, { "epoch": 0.3281378178835111, "grad_norm": 0.15432646870613098, "learning_rate": 5e-05, "loss": 1.7029, "step": 1200 }, { "epoch": 0.32841126606508064, "grad_norm": 0.15209780633449554, "learning_rate": 5e-05, "loss": 1.6857, "step": 1201 }, { "epoch": 0.32868471424665024, "grad_norm": 0.1498226374387741, "learning_rate": 5e-05, "loss": 1.7318, "step": 1202 }, { "epoch": 0.32895816242821985, "grad_norm": 0.149881511926651, "learning_rate": 5e-05, "loss": 1.6591, "step": 1203 }, { "epoch": 0.32923161060978945, "grad_norm": 0.15110871195793152, "learning_rate": 5e-05, "loss": 1.6238, "step": 1204 }, { "epoch": 0.32950505879135905, "grad_norm": 0.1504354178905487, "learning_rate": 5e-05, "loss": 1.6124, "step": 1205 }, { "epoch": 0.32977850697292865, "grad_norm": 0.16923676431179047, "learning_rate": 5e-05, "loss": 1.7914, "step": 1206 }, { "epoch": 0.3300519551544982, "grad_norm": 0.17524860799312592, "learning_rate": 5e-05, "loss": 1.6875, "step": 1207 }, { "epoch": 0.3303254033360678, "grad_norm": 0.1538068652153015, "learning_rate": 5e-05, "loss": 1.6375, "step": 1208 }, { "epoch": 0.3305988515176374, "grad_norm": 0.15115374326705933, "learning_rate": 5e-05, "loss": 1.6759, "step": 1209 }, { "epoch": 0.330872299699207, "grad_norm": 0.1774495393037796, "learning_rate": 5e-05, "loss": 1.6865, "step": 1210 }, { "epoch": 0.3311457478807766, "grad_norm": 0.16683407127857208, "learning_rate": 5e-05, "loss": 1.6031, "step": 1211 }, { "epoch": 0.3314191960623462, "grad_norm": 0.16064640879631042, "learning_rate": 5e-05, "loss": 1.7608, "step": 1212 }, { "epoch": 0.33169264424391576, "grad_norm": 0.16114073991775513, "learning_rate": 5e-05, "loss": 1.6658, "step": 1213 }, { "epoch": 0.33196609242548536, "grad_norm": 0.161068856716156, "learning_rate": 5e-05, "loss": 1.6989, "step": 1214 }, { "epoch": 0.33223954060705496, "grad_norm": 0.155606210231781, "learning_rate": 5e-05, "loss": 1.6235, "step": 1215 }, { "epoch": 0.33251298878862456, "grad_norm": 0.18368175625801086, "learning_rate": 5e-05, "loss": 1.6516, "step": 1216 }, { "epoch": 0.33278643697019417, "grad_norm": 0.1500716358423233, "learning_rate": 5e-05, "loss": 1.6361, "step": 1217 }, { "epoch": 0.33305988515176377, "grad_norm": 0.15658661723136902, "learning_rate": 5e-05, "loss": 1.5645, "step": 1218 }, { "epoch": 0.3333333333333333, "grad_norm": 0.16541090607643127, "learning_rate": 5e-05, "loss": 1.7556, "step": 1219 }, { "epoch": 0.3336067815149029, "grad_norm": 0.15288498997688293, "learning_rate": 5e-05, "loss": 1.6349, "step": 1220 }, { "epoch": 0.3338802296964725, "grad_norm": 0.16073960065841675, "learning_rate": 5e-05, "loss": 1.6157, "step": 1221 }, { "epoch": 0.3341536778780421, "grad_norm": 0.1543978601694107, "learning_rate": 5e-05, "loss": 1.6628, "step": 1222 }, { "epoch": 0.3344271260596117, "grad_norm": 0.14811809360980988, "learning_rate": 5e-05, "loss": 1.5668, "step": 1223 }, { "epoch": 0.33470057424118127, "grad_norm": 0.15395627915859222, "learning_rate": 5e-05, "loss": 1.6593, "step": 1224 }, { "epoch": 0.3349740224227509, "grad_norm": 0.16432489454746246, "learning_rate": 5e-05, "loss": 1.6334, "step": 1225 }, { "epoch": 0.3352474706043205, "grad_norm": 0.15168853104114532, "learning_rate": 5e-05, "loss": 1.6247, "step": 1226 }, { "epoch": 0.3355209187858901, "grad_norm": 0.15608245134353638, "learning_rate": 5e-05, "loss": 1.6651, "step": 1227 }, { "epoch": 0.3357943669674597, "grad_norm": 0.16598603129386902, "learning_rate": 5e-05, "loss": 1.7173, "step": 1228 }, { "epoch": 0.3360678151490293, "grad_norm": 0.14476749300956726, "learning_rate": 5e-05, "loss": 1.6363, "step": 1229 }, { "epoch": 0.3363412633305988, "grad_norm": 0.16102361679077148, "learning_rate": 5e-05, "loss": 1.6763, "step": 1230 }, { "epoch": 0.33661471151216843, "grad_norm": 0.16813768446445465, "learning_rate": 5e-05, "loss": 1.6754, "step": 1231 }, { "epoch": 0.33688815969373803, "grad_norm": 0.15631164610385895, "learning_rate": 5e-05, "loss": 1.6529, "step": 1232 }, { "epoch": 0.33716160787530763, "grad_norm": 0.1545805037021637, "learning_rate": 5e-05, "loss": 1.7156, "step": 1233 }, { "epoch": 0.33743505605687724, "grad_norm": 0.17586275935173035, "learning_rate": 5e-05, "loss": 1.6615, "step": 1234 }, { "epoch": 0.33770850423844684, "grad_norm": 0.15202665328979492, "learning_rate": 5e-05, "loss": 1.6291, "step": 1235 }, { "epoch": 0.3379819524200164, "grad_norm": 0.16369853913784027, "learning_rate": 5e-05, "loss": 1.6827, "step": 1236 }, { "epoch": 0.338255400601586, "grad_norm": 0.15997561812400818, "learning_rate": 5e-05, "loss": 1.7339, "step": 1237 }, { "epoch": 0.3385288487831556, "grad_norm": 0.14640896022319794, "learning_rate": 5e-05, "loss": 1.623, "step": 1238 }, { "epoch": 0.3388022969647252, "grad_norm": 0.1671455204486847, "learning_rate": 5e-05, "loss": 1.6769, "step": 1239 }, { "epoch": 0.3390757451462948, "grad_norm": 0.16016924381256104, "learning_rate": 5e-05, "loss": 1.7308, "step": 1240 }, { "epoch": 0.3393491933278644, "grad_norm": 0.16020850837230682, "learning_rate": 5e-05, "loss": 1.7288, "step": 1241 }, { "epoch": 0.33962264150943394, "grad_norm": 0.14866626262664795, "learning_rate": 5e-05, "loss": 1.5764, "step": 1242 }, { "epoch": 0.33989608969100354, "grad_norm": 0.17916180193424225, "learning_rate": 5e-05, "loss": 1.7789, "step": 1243 }, { "epoch": 0.34016953787257315, "grad_norm": 0.14788402616977692, "learning_rate": 5e-05, "loss": 1.63, "step": 1244 }, { "epoch": 0.34044298605414275, "grad_norm": 0.15443041920661926, "learning_rate": 5e-05, "loss": 1.6499, "step": 1245 }, { "epoch": 0.34071643423571235, "grad_norm": 0.15852518379688263, "learning_rate": 5e-05, "loss": 1.6446, "step": 1246 }, { "epoch": 0.3409898824172819, "grad_norm": 0.15751297771930695, "learning_rate": 5e-05, "loss": 1.6294, "step": 1247 }, { "epoch": 0.3412633305988515, "grad_norm": 0.15762586891651154, "learning_rate": 5e-05, "loss": 1.6864, "step": 1248 }, { "epoch": 0.3415367787804211, "grad_norm": 0.15331421792507172, "learning_rate": 5e-05, "loss": 1.6898, "step": 1249 }, { "epoch": 0.3418102269619907, "grad_norm": 0.165785014629364, "learning_rate": 5e-05, "loss": 1.703, "step": 1250 }, { "epoch": 0.3420836751435603, "grad_norm": 0.1514601707458496, "learning_rate": 5e-05, "loss": 1.6489, "step": 1251 }, { "epoch": 0.3423571233251299, "grad_norm": 0.187610924243927, "learning_rate": 5e-05, "loss": 1.6109, "step": 1252 }, { "epoch": 0.34263057150669946, "grad_norm": 0.1479722261428833, "learning_rate": 5e-05, "loss": 1.6528, "step": 1253 }, { "epoch": 0.34290401968826906, "grad_norm": 0.1570201814174652, "learning_rate": 5e-05, "loss": 1.6706, "step": 1254 }, { "epoch": 0.34317746786983866, "grad_norm": 0.1705133467912674, "learning_rate": 5e-05, "loss": 1.7262, "step": 1255 }, { "epoch": 0.34345091605140826, "grad_norm": 0.15776453912258148, "learning_rate": 5e-05, "loss": 1.6374, "step": 1256 }, { "epoch": 0.34372436423297786, "grad_norm": 0.16335074603557587, "learning_rate": 5e-05, "loss": 1.6261, "step": 1257 }, { "epoch": 0.34399781241454747, "grad_norm": 0.17720459401607513, "learning_rate": 5e-05, "loss": 1.5963, "step": 1258 }, { "epoch": 0.344271260596117, "grad_norm": 0.14796927571296692, "learning_rate": 5e-05, "loss": 1.5923, "step": 1259 }, { "epoch": 0.3445447087776866, "grad_norm": 0.16118891537189484, "learning_rate": 5e-05, "loss": 1.5986, "step": 1260 }, { "epoch": 0.3448181569592562, "grad_norm": 0.16453957557678223, "learning_rate": 5e-05, "loss": 1.7498, "step": 1261 }, { "epoch": 0.3450916051408258, "grad_norm": 0.15277455747127533, "learning_rate": 5e-05, "loss": 1.6196, "step": 1262 }, { "epoch": 0.3453650533223954, "grad_norm": 0.1496528536081314, "learning_rate": 5e-05, "loss": 1.635, "step": 1263 }, { "epoch": 0.345638501503965, "grad_norm": 0.1616893708705902, "learning_rate": 5e-05, "loss": 1.591, "step": 1264 }, { "epoch": 0.34591194968553457, "grad_norm": 0.1540791094303131, "learning_rate": 5e-05, "loss": 1.6713, "step": 1265 }, { "epoch": 0.34618539786710417, "grad_norm": 0.15063992142677307, "learning_rate": 5e-05, "loss": 1.6739, "step": 1266 }, { "epoch": 0.3464588460486738, "grad_norm": 0.15687578916549683, "learning_rate": 5e-05, "loss": 1.7105, "step": 1267 }, { "epoch": 0.3467322942302434, "grad_norm": 0.14987704157829285, "learning_rate": 5e-05, "loss": 1.7308, "step": 1268 }, { "epoch": 0.347005742411813, "grad_norm": 0.1480540633201599, "learning_rate": 5e-05, "loss": 1.7043, "step": 1269 }, { "epoch": 0.3472791905933826, "grad_norm": 0.1529364138841629, "learning_rate": 5e-05, "loss": 1.5753, "step": 1270 }, { "epoch": 0.3475526387749521, "grad_norm": 0.14511734247207642, "learning_rate": 5e-05, "loss": 1.5622, "step": 1271 }, { "epoch": 0.34782608695652173, "grad_norm": 0.14741328358650208, "learning_rate": 5e-05, "loss": 1.6804, "step": 1272 }, { "epoch": 0.34809953513809133, "grad_norm": 0.14820526540279388, "learning_rate": 5e-05, "loss": 1.5904, "step": 1273 }, { "epoch": 0.34837298331966093, "grad_norm": 0.1521809697151184, "learning_rate": 5e-05, "loss": 1.7002, "step": 1274 }, { "epoch": 0.34864643150123054, "grad_norm": 0.1577511727809906, "learning_rate": 5e-05, "loss": 1.6897, "step": 1275 }, { "epoch": 0.3489198796828001, "grad_norm": 0.14861604571342468, "learning_rate": 5e-05, "loss": 1.6671, "step": 1276 }, { "epoch": 0.3491933278643697, "grad_norm": 0.15119168162345886, "learning_rate": 5e-05, "loss": 1.6387, "step": 1277 }, { "epoch": 0.3494667760459393, "grad_norm": 0.1554817259311676, "learning_rate": 5e-05, "loss": 1.5855, "step": 1278 }, { "epoch": 0.3497402242275089, "grad_norm": 0.151669442653656, "learning_rate": 5e-05, "loss": 1.6634, "step": 1279 }, { "epoch": 0.3500136724090785, "grad_norm": 0.1499747335910797, "learning_rate": 5e-05, "loss": 1.627, "step": 1280 }, { "epoch": 0.3502871205906481, "grad_norm": 0.15303084254264832, "learning_rate": 5e-05, "loss": 1.6594, "step": 1281 }, { "epoch": 0.35056056877221764, "grad_norm": 0.1563277691602707, "learning_rate": 5e-05, "loss": 1.8, "step": 1282 }, { "epoch": 0.35083401695378724, "grad_norm": 0.16208653151988983, "learning_rate": 5e-05, "loss": 1.7052, "step": 1283 }, { "epoch": 0.35110746513535684, "grad_norm": 0.14594794809818268, "learning_rate": 5e-05, "loss": 1.5883, "step": 1284 }, { "epoch": 0.35138091331692645, "grad_norm": 0.1515040099620819, "learning_rate": 5e-05, "loss": 1.681, "step": 1285 }, { "epoch": 0.35165436149849605, "grad_norm": 0.15303464233875275, "learning_rate": 5e-05, "loss": 1.6776, "step": 1286 }, { "epoch": 0.35192780968006565, "grad_norm": 0.15074992179870605, "learning_rate": 5e-05, "loss": 1.6131, "step": 1287 }, { "epoch": 0.3522012578616352, "grad_norm": 0.15136325359344482, "learning_rate": 5e-05, "loss": 1.6393, "step": 1288 }, { "epoch": 0.3524747060432048, "grad_norm": 0.161278635263443, "learning_rate": 5e-05, "loss": 1.5921, "step": 1289 }, { "epoch": 0.3527481542247744, "grad_norm": 0.16516782343387604, "learning_rate": 5e-05, "loss": 1.6514, "step": 1290 }, { "epoch": 0.353021602406344, "grad_norm": 0.1504773199558258, "learning_rate": 5e-05, "loss": 1.6053, "step": 1291 }, { "epoch": 0.3532950505879136, "grad_norm": 0.15545381605625153, "learning_rate": 5e-05, "loss": 1.6231, "step": 1292 }, { "epoch": 0.3535684987694832, "grad_norm": 0.14545585215091705, "learning_rate": 5e-05, "loss": 1.6238, "step": 1293 }, { "epoch": 0.35384194695105275, "grad_norm": 0.15290163457393646, "learning_rate": 5e-05, "loss": 1.6625, "step": 1294 }, { "epoch": 0.35411539513262236, "grad_norm": 0.15580037236213684, "learning_rate": 5e-05, "loss": 1.6871, "step": 1295 }, { "epoch": 0.35438884331419196, "grad_norm": 0.15643970668315887, "learning_rate": 5e-05, "loss": 1.6451, "step": 1296 }, { "epoch": 0.35466229149576156, "grad_norm": 0.14689311385154724, "learning_rate": 5e-05, "loss": 1.612, "step": 1297 }, { "epoch": 0.35493573967733116, "grad_norm": 0.152411088347435, "learning_rate": 5e-05, "loss": 1.5722, "step": 1298 }, { "epoch": 0.35520918785890077, "grad_norm": 0.16959436237812042, "learning_rate": 5e-05, "loss": 1.7114, "step": 1299 }, { "epoch": 0.3554826360404703, "grad_norm": 0.15447081625461578, "learning_rate": 5e-05, "loss": 1.7206, "step": 1300 }, { "epoch": 0.3557560842220399, "grad_norm": 0.17055825889110565, "learning_rate": 5e-05, "loss": 1.5612, "step": 1301 }, { "epoch": 0.3560295324036095, "grad_norm": 0.157841756939888, "learning_rate": 5e-05, "loss": 1.6815, "step": 1302 }, { "epoch": 0.3563029805851791, "grad_norm": 0.17073825001716614, "learning_rate": 5e-05, "loss": 1.6889, "step": 1303 }, { "epoch": 0.3565764287667487, "grad_norm": 0.17082183063030243, "learning_rate": 5e-05, "loss": 1.6979, "step": 1304 }, { "epoch": 0.35684987694831827, "grad_norm": 0.15137112140655518, "learning_rate": 5e-05, "loss": 1.6226, "step": 1305 }, { "epoch": 0.35712332512988787, "grad_norm": 0.15363116562366486, "learning_rate": 5e-05, "loss": 1.6242, "step": 1306 }, { "epoch": 0.35739677331145747, "grad_norm": 0.15610884130001068, "learning_rate": 5e-05, "loss": 1.5751, "step": 1307 }, { "epoch": 0.3576702214930271, "grad_norm": 0.15649494528770447, "learning_rate": 5e-05, "loss": 1.6683, "step": 1308 }, { "epoch": 0.3579436696745967, "grad_norm": 0.1602485328912735, "learning_rate": 5e-05, "loss": 1.7109, "step": 1309 }, { "epoch": 0.3582171178561663, "grad_norm": 0.16601736843585968, "learning_rate": 5e-05, "loss": 1.6664, "step": 1310 }, { "epoch": 0.3584905660377358, "grad_norm": 0.15012311935424805, "learning_rate": 5e-05, "loss": 1.5708, "step": 1311 }, { "epoch": 0.3587640142193054, "grad_norm": 0.16618122160434723, "learning_rate": 5e-05, "loss": 1.7141, "step": 1312 }, { "epoch": 0.35903746240087503, "grad_norm": 0.1614745706319809, "learning_rate": 5e-05, "loss": 1.6907, "step": 1313 }, { "epoch": 0.35931091058244463, "grad_norm": 0.1518169492483139, "learning_rate": 5e-05, "loss": 1.6233, "step": 1314 }, { "epoch": 0.35958435876401423, "grad_norm": 0.16219857335090637, "learning_rate": 5e-05, "loss": 1.6707, "step": 1315 }, { "epoch": 0.35985780694558384, "grad_norm": 0.15628166496753693, "learning_rate": 5e-05, "loss": 1.7647, "step": 1316 }, { "epoch": 0.3601312551271534, "grad_norm": 0.14191019535064697, "learning_rate": 5e-05, "loss": 1.5061, "step": 1317 }, { "epoch": 0.360404703308723, "grad_norm": 0.148356094956398, "learning_rate": 5e-05, "loss": 1.6642, "step": 1318 }, { "epoch": 0.3606781514902926, "grad_norm": 0.14994607865810394, "learning_rate": 5e-05, "loss": 1.655, "step": 1319 }, { "epoch": 0.3609515996718622, "grad_norm": 0.15752872824668884, "learning_rate": 5e-05, "loss": 1.5816, "step": 1320 }, { "epoch": 0.3612250478534318, "grad_norm": 0.15151529014110565, "learning_rate": 5e-05, "loss": 1.6372, "step": 1321 }, { "epoch": 0.3614984960350014, "grad_norm": 0.15662340819835663, "learning_rate": 5e-05, "loss": 1.6894, "step": 1322 }, { "epoch": 0.36177194421657094, "grad_norm": 0.16049139201641083, "learning_rate": 5e-05, "loss": 1.616, "step": 1323 }, { "epoch": 0.36204539239814054, "grad_norm": 0.15439613163471222, "learning_rate": 5e-05, "loss": 1.6672, "step": 1324 }, { "epoch": 0.36231884057971014, "grad_norm": 0.15884248912334442, "learning_rate": 5e-05, "loss": 1.6366, "step": 1325 }, { "epoch": 0.36259228876127975, "grad_norm": 0.15194907784461975, "learning_rate": 5e-05, "loss": 1.7004, "step": 1326 }, { "epoch": 0.36286573694284935, "grad_norm": 0.1542571783065796, "learning_rate": 5e-05, "loss": 1.6714, "step": 1327 }, { "epoch": 0.3631391851244189, "grad_norm": 0.15640923380851746, "learning_rate": 5e-05, "loss": 1.7018, "step": 1328 }, { "epoch": 0.3634126333059885, "grad_norm": 0.15510603785514832, "learning_rate": 5e-05, "loss": 1.6173, "step": 1329 }, { "epoch": 0.3636860814875581, "grad_norm": 0.14958932995796204, "learning_rate": 5e-05, "loss": 1.6507, "step": 1330 }, { "epoch": 0.3639595296691277, "grad_norm": 0.15672361850738525, "learning_rate": 5e-05, "loss": 1.6679, "step": 1331 }, { "epoch": 0.3642329778506973, "grad_norm": 0.16806292533874512, "learning_rate": 5e-05, "loss": 1.6097, "step": 1332 }, { "epoch": 0.3645064260322669, "grad_norm": 0.14968685805797577, "learning_rate": 5e-05, "loss": 1.6181, "step": 1333 }, { "epoch": 0.36477987421383645, "grad_norm": 0.14979314804077148, "learning_rate": 5e-05, "loss": 1.705, "step": 1334 }, { "epoch": 0.36505332239540605, "grad_norm": 0.14740002155303955, "learning_rate": 5e-05, "loss": 1.6532, "step": 1335 }, { "epoch": 0.36532677057697566, "grad_norm": 0.15307697653770447, "learning_rate": 5e-05, "loss": 1.5616, "step": 1336 }, { "epoch": 0.36560021875854526, "grad_norm": 0.165946364402771, "learning_rate": 5e-05, "loss": 1.7643, "step": 1337 }, { "epoch": 0.36587366694011486, "grad_norm": 0.1671787053346634, "learning_rate": 5e-05, "loss": 1.7023, "step": 1338 }, { "epoch": 0.36614711512168446, "grad_norm": 0.1597784012556076, "learning_rate": 5e-05, "loss": 1.6975, "step": 1339 }, { "epoch": 0.366420563303254, "grad_norm": 0.1569102704524994, "learning_rate": 5e-05, "loss": 1.7043, "step": 1340 }, { "epoch": 0.3666940114848236, "grad_norm": 0.1625957041978836, "learning_rate": 5e-05, "loss": 1.6712, "step": 1341 }, { "epoch": 0.3669674596663932, "grad_norm": 0.16208802163600922, "learning_rate": 5e-05, "loss": 1.6891, "step": 1342 }, { "epoch": 0.3672409078479628, "grad_norm": 0.1636815369129181, "learning_rate": 5e-05, "loss": 1.7703, "step": 1343 }, { "epoch": 0.3675143560295324, "grad_norm": 0.1954984962940216, "learning_rate": 5e-05, "loss": 1.6462, "step": 1344 }, { "epoch": 0.367787804211102, "grad_norm": 0.16720376908779144, "learning_rate": 5e-05, "loss": 1.6952, "step": 1345 }, { "epoch": 0.36806125239267157, "grad_norm": 0.1478559672832489, "learning_rate": 5e-05, "loss": 1.6585, "step": 1346 }, { "epoch": 0.36833470057424117, "grad_norm": 0.18975849449634552, "learning_rate": 5e-05, "loss": 1.6765, "step": 1347 }, { "epoch": 0.36860814875581077, "grad_norm": 0.1447249799966812, "learning_rate": 5e-05, "loss": 1.633, "step": 1348 }, { "epoch": 0.3688815969373804, "grad_norm": 0.19498814642429352, "learning_rate": 5e-05, "loss": 1.7036, "step": 1349 }, { "epoch": 0.36915504511895, "grad_norm": 0.16138319671154022, "learning_rate": 5e-05, "loss": 1.6062, "step": 1350 }, { "epoch": 0.3694284933005196, "grad_norm": 0.15296588838100433, "learning_rate": 5e-05, "loss": 1.6813, "step": 1351 }, { "epoch": 0.3697019414820891, "grad_norm": 0.1728629469871521, "learning_rate": 5e-05, "loss": 1.723, "step": 1352 }, { "epoch": 0.3699753896636587, "grad_norm": 0.17153020203113556, "learning_rate": 5e-05, "loss": 1.6255, "step": 1353 }, { "epoch": 0.37024883784522833, "grad_norm": 0.1482992023229599, "learning_rate": 5e-05, "loss": 1.5935, "step": 1354 }, { "epoch": 0.37052228602679793, "grad_norm": 0.19278480112552643, "learning_rate": 5e-05, "loss": 1.7194, "step": 1355 }, { "epoch": 0.37079573420836753, "grad_norm": 0.15969671308994293, "learning_rate": 5e-05, "loss": 1.7102, "step": 1356 }, { "epoch": 0.3710691823899371, "grad_norm": 0.1841089278459549, "learning_rate": 5e-05, "loss": 1.6842, "step": 1357 }, { "epoch": 0.3713426305715067, "grad_norm": 0.1696590632200241, "learning_rate": 5e-05, "loss": 1.6301, "step": 1358 }, { "epoch": 0.3716160787530763, "grad_norm": 0.15877504646778107, "learning_rate": 5e-05, "loss": 1.6211, "step": 1359 }, { "epoch": 0.3718895269346459, "grad_norm": 0.17480207979679108, "learning_rate": 5e-05, "loss": 1.5707, "step": 1360 }, { "epoch": 0.3721629751162155, "grad_norm": 0.1526334136724472, "learning_rate": 5e-05, "loss": 1.6062, "step": 1361 }, { "epoch": 0.3724364232977851, "grad_norm": 0.1520829051733017, "learning_rate": 5e-05, "loss": 1.713, "step": 1362 }, { "epoch": 0.37270987147935464, "grad_norm": 0.17460103332996368, "learning_rate": 5e-05, "loss": 1.5544, "step": 1363 }, { "epoch": 0.37298331966092424, "grad_norm": 0.16600289940834045, "learning_rate": 5e-05, "loss": 1.6942, "step": 1364 }, { "epoch": 0.37325676784249384, "grad_norm": 0.15874885022640228, "learning_rate": 5e-05, "loss": 1.5408, "step": 1365 }, { "epoch": 0.37353021602406344, "grad_norm": 0.15118831396102905, "learning_rate": 5e-05, "loss": 1.6963, "step": 1366 }, { "epoch": 0.37380366420563305, "grad_norm": 0.16530974209308624, "learning_rate": 5e-05, "loss": 1.6564, "step": 1367 }, { "epoch": 0.37407711238720265, "grad_norm": 0.16382399201393127, "learning_rate": 5e-05, "loss": 1.5668, "step": 1368 }, { "epoch": 0.3743505605687722, "grad_norm": 0.16075266897678375, "learning_rate": 5e-05, "loss": 1.5714, "step": 1369 }, { "epoch": 0.3746240087503418, "grad_norm": 0.16619382798671722, "learning_rate": 5e-05, "loss": 1.6636, "step": 1370 }, { "epoch": 0.3748974569319114, "grad_norm": 0.16388960182666779, "learning_rate": 5e-05, "loss": 1.5565, "step": 1371 }, { "epoch": 0.375170905113481, "grad_norm": 0.17126598954200745, "learning_rate": 5e-05, "loss": 1.6027, "step": 1372 }, { "epoch": 0.3754443532950506, "grad_norm": 0.15221962332725525, "learning_rate": 5e-05, "loss": 1.7189, "step": 1373 }, { "epoch": 0.3757178014766202, "grad_norm": 0.1918559968471527, "learning_rate": 5e-05, "loss": 1.6066, "step": 1374 }, { "epoch": 0.37599124965818975, "grad_norm": 0.1808595359325409, "learning_rate": 5e-05, "loss": 1.7059, "step": 1375 }, { "epoch": 0.37626469783975935, "grad_norm": 0.16812893748283386, "learning_rate": 5e-05, "loss": 1.8234, "step": 1376 }, { "epoch": 0.37653814602132896, "grad_norm": 0.17743167281150818, "learning_rate": 5e-05, "loss": 1.6533, "step": 1377 }, { "epoch": 0.37681159420289856, "grad_norm": 0.16644933819770813, "learning_rate": 5e-05, "loss": 1.6681, "step": 1378 }, { "epoch": 0.37708504238446816, "grad_norm": 0.1525644063949585, "learning_rate": 5e-05, "loss": 1.6302, "step": 1379 }, { "epoch": 0.37735849056603776, "grad_norm": 0.1539047807455063, "learning_rate": 5e-05, "loss": 1.6559, "step": 1380 }, { "epoch": 0.3776319387476073, "grad_norm": 0.17377246916294098, "learning_rate": 5e-05, "loss": 1.6563, "step": 1381 }, { "epoch": 0.3779053869291769, "grad_norm": 0.1529269814491272, "learning_rate": 5e-05, "loss": 1.6278, "step": 1382 }, { "epoch": 0.3781788351107465, "grad_norm": 0.17307884991168976, "learning_rate": 5e-05, "loss": 1.8326, "step": 1383 }, { "epoch": 0.3784522832923161, "grad_norm": 0.15691839158535004, "learning_rate": 5e-05, "loss": 1.652, "step": 1384 }, { "epoch": 0.3787257314738857, "grad_norm": 0.16453763842582703, "learning_rate": 5e-05, "loss": 1.6961, "step": 1385 }, { "epoch": 0.37899917965545526, "grad_norm": 0.17871487140655518, "learning_rate": 5e-05, "loss": 1.6779, "step": 1386 }, { "epoch": 0.37927262783702487, "grad_norm": 0.15350158512592316, "learning_rate": 5e-05, "loss": 1.6196, "step": 1387 }, { "epoch": 0.37954607601859447, "grad_norm": 0.1600034087896347, "learning_rate": 5e-05, "loss": 1.6356, "step": 1388 }, { "epoch": 0.37981952420016407, "grad_norm": 0.17216049134731293, "learning_rate": 5e-05, "loss": 1.6116, "step": 1389 }, { "epoch": 0.3800929723817337, "grad_norm": 0.15600605309009552, "learning_rate": 5e-05, "loss": 1.6993, "step": 1390 }, { "epoch": 0.3803664205633033, "grad_norm": 0.14978420734405518, "learning_rate": 5e-05, "loss": 1.7346, "step": 1391 }, { "epoch": 0.3806398687448728, "grad_norm": 0.176108255982399, "learning_rate": 5e-05, "loss": 1.6644, "step": 1392 }, { "epoch": 0.3809133169264424, "grad_norm": 0.1513052135705948, "learning_rate": 5e-05, "loss": 1.648, "step": 1393 }, { "epoch": 0.381186765108012, "grad_norm": 0.16158834099769592, "learning_rate": 5e-05, "loss": 1.6868, "step": 1394 }, { "epoch": 0.38146021328958163, "grad_norm": 0.1669863909482956, "learning_rate": 5e-05, "loss": 1.6286, "step": 1395 }, { "epoch": 0.38173366147115123, "grad_norm": 0.16049465537071228, "learning_rate": 5e-05, "loss": 1.7456, "step": 1396 }, { "epoch": 0.38200710965272083, "grad_norm": 0.17042703926563263, "learning_rate": 5e-05, "loss": 1.663, "step": 1397 }, { "epoch": 0.3822805578342904, "grad_norm": 0.1597413718700409, "learning_rate": 5e-05, "loss": 1.6331, "step": 1398 }, { "epoch": 0.38255400601586, "grad_norm": 0.15566933155059814, "learning_rate": 5e-05, "loss": 1.6423, "step": 1399 }, { "epoch": 0.3828274541974296, "grad_norm": 0.15969716012477875, "learning_rate": 5e-05, "loss": 1.7039, "step": 1400 }, { "epoch": 0.3831009023789992, "grad_norm": 0.15060196816921234, "learning_rate": 5e-05, "loss": 1.6484, "step": 1401 }, { "epoch": 0.3833743505605688, "grad_norm": 0.15443742275238037, "learning_rate": 5e-05, "loss": 1.5378, "step": 1402 }, { "epoch": 0.3836477987421384, "grad_norm": 0.15732194483280182, "learning_rate": 5e-05, "loss": 1.6646, "step": 1403 }, { "epoch": 0.38392124692370794, "grad_norm": 0.15805989503860474, "learning_rate": 5e-05, "loss": 1.5314, "step": 1404 }, { "epoch": 0.38419469510527754, "grad_norm": 0.1553335189819336, "learning_rate": 5e-05, "loss": 1.6566, "step": 1405 }, { "epoch": 0.38446814328684714, "grad_norm": 0.16199639439582825, "learning_rate": 5e-05, "loss": 1.6392, "step": 1406 }, { "epoch": 0.38474159146841674, "grad_norm": 0.15157069265842438, "learning_rate": 5e-05, "loss": 1.5499, "step": 1407 }, { "epoch": 0.38501503964998635, "grad_norm": 0.15281042456626892, "learning_rate": 5e-05, "loss": 1.6623, "step": 1408 }, { "epoch": 0.3852884878315559, "grad_norm": 0.16575610637664795, "learning_rate": 5e-05, "loss": 1.6758, "step": 1409 }, { "epoch": 0.3855619360131255, "grad_norm": 0.17022867500782013, "learning_rate": 5e-05, "loss": 1.7497, "step": 1410 }, { "epoch": 0.3858353841946951, "grad_norm": 0.1417384147644043, "learning_rate": 5e-05, "loss": 1.615, "step": 1411 }, { "epoch": 0.3861088323762647, "grad_norm": 0.18117718398571014, "learning_rate": 5e-05, "loss": 1.7536, "step": 1412 }, { "epoch": 0.3863822805578343, "grad_norm": 0.15728381276130676, "learning_rate": 5e-05, "loss": 1.577, "step": 1413 }, { "epoch": 0.3866557287394039, "grad_norm": 0.163984015583992, "learning_rate": 5e-05, "loss": 1.6305, "step": 1414 }, { "epoch": 0.38692917692097345, "grad_norm": 0.16205452382564545, "learning_rate": 5e-05, "loss": 1.5719, "step": 1415 }, { "epoch": 0.38720262510254305, "grad_norm": 0.16791151463985443, "learning_rate": 5e-05, "loss": 1.6911, "step": 1416 }, { "epoch": 0.38747607328411265, "grad_norm": 0.165225088596344, "learning_rate": 5e-05, "loss": 1.5915, "step": 1417 }, { "epoch": 0.38774952146568226, "grad_norm": 0.1673455834388733, "learning_rate": 5e-05, "loss": 1.6484, "step": 1418 }, { "epoch": 0.38802296964725186, "grad_norm": 0.15316179394721985, "learning_rate": 5e-05, "loss": 1.6133, "step": 1419 }, { "epoch": 0.38829641782882146, "grad_norm": 0.17565707862377167, "learning_rate": 5e-05, "loss": 1.7247, "step": 1420 }, { "epoch": 0.388569866010391, "grad_norm": 0.15834634006023407, "learning_rate": 5e-05, "loss": 1.6689, "step": 1421 }, { "epoch": 0.3888433141919606, "grad_norm": 0.16178841888904572, "learning_rate": 5e-05, "loss": 1.6276, "step": 1422 }, { "epoch": 0.3891167623735302, "grad_norm": 0.15787442028522491, "learning_rate": 5e-05, "loss": 1.6655, "step": 1423 }, { "epoch": 0.3893902105550998, "grad_norm": 0.17375800013542175, "learning_rate": 5e-05, "loss": 1.6643, "step": 1424 }, { "epoch": 0.3896636587366694, "grad_norm": 0.1527184098958969, "learning_rate": 5e-05, "loss": 1.5199, "step": 1425 }, { "epoch": 0.389937106918239, "grad_norm": 0.17630016803741455, "learning_rate": 5e-05, "loss": 1.9053, "step": 1426 }, { "epoch": 0.39021055509980856, "grad_norm": 0.16821622848510742, "learning_rate": 5e-05, "loss": 1.6229, "step": 1427 }, { "epoch": 0.39048400328137817, "grad_norm": 0.1571454554796219, "learning_rate": 5e-05, "loss": 1.5945, "step": 1428 }, { "epoch": 0.39075745146294777, "grad_norm": 0.16041232645511627, "learning_rate": 5e-05, "loss": 1.6161, "step": 1429 }, { "epoch": 0.39103089964451737, "grad_norm": 0.15614961087703705, "learning_rate": 5e-05, "loss": 1.6234, "step": 1430 }, { "epoch": 0.391304347826087, "grad_norm": 0.1785866618156433, "learning_rate": 5e-05, "loss": 1.5821, "step": 1431 }, { "epoch": 0.3915777960076566, "grad_norm": 0.1583407074213028, "learning_rate": 5e-05, "loss": 1.6031, "step": 1432 }, { "epoch": 0.3918512441892261, "grad_norm": 0.15513792634010315, "learning_rate": 5e-05, "loss": 1.6701, "step": 1433 }, { "epoch": 0.3921246923707957, "grad_norm": 0.15645377337932587, "learning_rate": 5e-05, "loss": 1.6072, "step": 1434 }, { "epoch": 0.3923981405523653, "grad_norm": 0.17222340404987335, "learning_rate": 5e-05, "loss": 1.7521, "step": 1435 }, { "epoch": 0.39267158873393493, "grad_norm": 0.16205964982509613, "learning_rate": 5e-05, "loss": 1.5481, "step": 1436 }, { "epoch": 0.39294503691550453, "grad_norm": 0.16628527641296387, "learning_rate": 5e-05, "loss": 1.6072, "step": 1437 }, { "epoch": 0.3932184850970741, "grad_norm": 0.15461629629135132, "learning_rate": 5e-05, "loss": 1.6637, "step": 1438 }, { "epoch": 0.3934919332786437, "grad_norm": 0.17771334946155548, "learning_rate": 5e-05, "loss": 1.7054, "step": 1439 }, { "epoch": 0.3937653814602133, "grad_norm": 0.16035428643226624, "learning_rate": 5e-05, "loss": 1.6933, "step": 1440 }, { "epoch": 0.3940388296417829, "grad_norm": 0.15316614508628845, "learning_rate": 5e-05, "loss": 1.4949, "step": 1441 }, { "epoch": 0.3943122778233525, "grad_norm": 0.15236356854438782, "learning_rate": 5e-05, "loss": 1.6445, "step": 1442 }, { "epoch": 0.3945857260049221, "grad_norm": 0.15374061465263367, "learning_rate": 5e-05, "loss": 1.7432, "step": 1443 }, { "epoch": 0.39485917418649163, "grad_norm": 0.15344202518463135, "learning_rate": 5e-05, "loss": 1.584, "step": 1444 }, { "epoch": 0.39513262236806124, "grad_norm": 0.15145696699619293, "learning_rate": 5e-05, "loss": 1.6804, "step": 1445 }, { "epoch": 0.39540607054963084, "grad_norm": 0.15984676778316498, "learning_rate": 5e-05, "loss": 1.7636, "step": 1446 }, { "epoch": 0.39567951873120044, "grad_norm": 0.14700670540332794, "learning_rate": 5e-05, "loss": 1.5888, "step": 1447 }, { "epoch": 0.39595296691277004, "grad_norm": 0.1534331887960434, "learning_rate": 5e-05, "loss": 1.5919, "step": 1448 }, { "epoch": 0.39622641509433965, "grad_norm": 0.15421104431152344, "learning_rate": 5e-05, "loss": 1.5659, "step": 1449 }, { "epoch": 0.3964998632759092, "grad_norm": 0.17723333835601807, "learning_rate": 5e-05, "loss": 1.6337, "step": 1450 }, { "epoch": 0.3967733114574788, "grad_norm": 0.1645742654800415, "learning_rate": 5e-05, "loss": 1.6573, "step": 1451 }, { "epoch": 0.3970467596390484, "grad_norm": 0.1458151489496231, "learning_rate": 5e-05, "loss": 1.4709, "step": 1452 }, { "epoch": 0.397320207820618, "grad_norm": 0.15043888986110687, "learning_rate": 5e-05, "loss": 1.6267, "step": 1453 }, { "epoch": 0.3975936560021876, "grad_norm": 0.16159576177597046, "learning_rate": 5e-05, "loss": 1.7, "step": 1454 }, { "epoch": 0.3978671041837572, "grad_norm": 0.15626250207424164, "learning_rate": 5e-05, "loss": 1.6397, "step": 1455 }, { "epoch": 0.39814055236532675, "grad_norm": 0.15309499204158783, "learning_rate": 5e-05, "loss": 1.6503, "step": 1456 }, { "epoch": 0.39841400054689635, "grad_norm": 0.1638927310705185, "learning_rate": 5e-05, "loss": 1.6134, "step": 1457 }, { "epoch": 0.39868744872846595, "grad_norm": 0.156220942735672, "learning_rate": 5e-05, "loss": 1.6544, "step": 1458 }, { "epoch": 0.39896089691003556, "grad_norm": 0.15519675612449646, "learning_rate": 5e-05, "loss": 1.6289, "step": 1459 }, { "epoch": 0.39923434509160516, "grad_norm": 0.16165190935134888, "learning_rate": 5e-05, "loss": 1.6655, "step": 1460 }, { "epoch": 0.39950779327317476, "grad_norm": 0.14794325828552246, "learning_rate": 5e-05, "loss": 1.6321, "step": 1461 }, { "epoch": 0.3997812414547443, "grad_norm": 0.1512671858072281, "learning_rate": 5e-05, "loss": 1.6119, "step": 1462 }, { "epoch": 0.4000546896363139, "grad_norm": 0.14749833941459656, "learning_rate": 5e-05, "loss": 1.6302, "step": 1463 }, { "epoch": 0.4003281378178835, "grad_norm": 0.1552843600511551, "learning_rate": 5e-05, "loss": 1.6113, "step": 1464 }, { "epoch": 0.4006015859994531, "grad_norm": 0.15012173354625702, "learning_rate": 5e-05, "loss": 1.5396, "step": 1465 }, { "epoch": 0.4008750341810227, "grad_norm": 0.15311282873153687, "learning_rate": 5e-05, "loss": 1.6266, "step": 1466 }, { "epoch": 0.40114848236259226, "grad_norm": 0.15125569701194763, "learning_rate": 5e-05, "loss": 1.5979, "step": 1467 }, { "epoch": 0.40142193054416186, "grad_norm": 0.1545860469341278, "learning_rate": 5e-05, "loss": 1.6872, "step": 1468 }, { "epoch": 0.40169537872573147, "grad_norm": 0.1852022111415863, "learning_rate": 5e-05, "loss": 1.6847, "step": 1469 }, { "epoch": 0.40196882690730107, "grad_norm": 0.1469736099243164, "learning_rate": 5e-05, "loss": 1.58, "step": 1470 }, { "epoch": 0.40224227508887067, "grad_norm": 0.17848168313503265, "learning_rate": 5e-05, "loss": 1.6899, "step": 1471 }, { "epoch": 0.4025157232704403, "grad_norm": 0.16272708773612976, "learning_rate": 5e-05, "loss": 1.6655, "step": 1472 }, { "epoch": 0.4027891714520098, "grad_norm": 0.1590905487537384, "learning_rate": 5e-05, "loss": 1.6239, "step": 1473 }, { "epoch": 0.4030626196335794, "grad_norm": 0.16318002343177795, "learning_rate": 5e-05, "loss": 1.6324, "step": 1474 }, { "epoch": 0.403336067815149, "grad_norm": 0.1604347825050354, "learning_rate": 5e-05, "loss": 1.6793, "step": 1475 }, { "epoch": 0.4036095159967186, "grad_norm": 0.1481696516275406, "learning_rate": 5e-05, "loss": 1.6391, "step": 1476 }, { "epoch": 0.40388296417828823, "grad_norm": 0.1675114631652832, "learning_rate": 5e-05, "loss": 1.5925, "step": 1477 }, { "epoch": 0.40415641235985783, "grad_norm": 0.15092286467552185, "learning_rate": 5e-05, "loss": 1.6294, "step": 1478 }, { "epoch": 0.4044298605414274, "grad_norm": 0.1638384312391281, "learning_rate": 5e-05, "loss": 1.6552, "step": 1479 }, { "epoch": 0.404703308722997, "grad_norm": 0.1602986752986908, "learning_rate": 5e-05, "loss": 1.6264, "step": 1480 }, { "epoch": 0.4049767569045666, "grad_norm": 0.1523655354976654, "learning_rate": 5e-05, "loss": 1.6013, "step": 1481 }, { "epoch": 0.4052502050861362, "grad_norm": 0.1629789173603058, "learning_rate": 5e-05, "loss": 1.6301, "step": 1482 }, { "epoch": 0.4055236532677058, "grad_norm": 0.14943981170654297, "learning_rate": 5e-05, "loss": 1.5619, "step": 1483 }, { "epoch": 0.4057971014492754, "grad_norm": 0.15840235352516174, "learning_rate": 5e-05, "loss": 1.6715, "step": 1484 }, { "epoch": 0.40607054963084493, "grad_norm": 0.15214814245700836, "learning_rate": 5e-05, "loss": 1.6118, "step": 1485 }, { "epoch": 0.40634399781241454, "grad_norm": 0.15232683718204498, "learning_rate": 5e-05, "loss": 1.604, "step": 1486 }, { "epoch": 0.40661744599398414, "grad_norm": 0.15846048295497894, "learning_rate": 5e-05, "loss": 1.6588, "step": 1487 }, { "epoch": 0.40689089417555374, "grad_norm": 0.15377342700958252, "learning_rate": 5e-05, "loss": 1.5945, "step": 1488 }, { "epoch": 0.40716434235712334, "grad_norm": 0.14967899024486542, "learning_rate": 5e-05, "loss": 1.5724, "step": 1489 }, { "epoch": 0.4074377905386929, "grad_norm": 0.15246756374835968, "learning_rate": 5e-05, "loss": 1.651, "step": 1490 }, { "epoch": 0.4077112387202625, "grad_norm": 0.1686270534992218, "learning_rate": 5e-05, "loss": 1.519, "step": 1491 }, { "epoch": 0.4079846869018321, "grad_norm": 0.1610393226146698, "learning_rate": 5e-05, "loss": 1.6535, "step": 1492 }, { "epoch": 0.4082581350834017, "grad_norm": 0.17595553398132324, "learning_rate": 5e-05, "loss": 1.6219, "step": 1493 }, { "epoch": 0.4085315832649713, "grad_norm": 0.1568283587694168, "learning_rate": 5e-05, "loss": 1.6203, "step": 1494 }, { "epoch": 0.4088050314465409, "grad_norm": 0.15611572563648224, "learning_rate": 5e-05, "loss": 1.7053, "step": 1495 }, { "epoch": 0.40907847962811045, "grad_norm": 0.16013391315937042, "learning_rate": 5e-05, "loss": 1.6718, "step": 1496 }, { "epoch": 0.40935192780968005, "grad_norm": 0.14747720956802368, "learning_rate": 5e-05, "loss": 1.5962, "step": 1497 }, { "epoch": 0.40962537599124965, "grad_norm": 0.1694934070110321, "learning_rate": 5e-05, "loss": 1.6709, "step": 1498 }, { "epoch": 0.40989882417281925, "grad_norm": 0.17449548840522766, "learning_rate": 5e-05, "loss": 1.6565, "step": 1499 }, { "epoch": 0.41017227235438886, "grad_norm": 0.15884292125701904, "learning_rate": 5e-05, "loss": 1.6468, "step": 1500 }, { "epoch": 0.41044572053595846, "grad_norm": 0.1549280881881714, "learning_rate": 5e-05, "loss": 1.5994, "step": 1501 }, { "epoch": 0.410719168717528, "grad_norm": 0.15527546405792236, "learning_rate": 5e-05, "loss": 1.555, "step": 1502 }, { "epoch": 0.4109926168990976, "grad_norm": 0.1679718941450119, "learning_rate": 5e-05, "loss": 1.6539, "step": 1503 }, { "epoch": 0.4112660650806672, "grad_norm": 0.14265212416648865, "learning_rate": 5e-05, "loss": 1.434, "step": 1504 }, { "epoch": 0.4115395132622368, "grad_norm": 0.1542886644601822, "learning_rate": 5e-05, "loss": 1.5932, "step": 1505 }, { "epoch": 0.4118129614438064, "grad_norm": 0.1690497249364853, "learning_rate": 5e-05, "loss": 1.6344, "step": 1506 }, { "epoch": 0.412086409625376, "grad_norm": 0.1507546305656433, "learning_rate": 5e-05, "loss": 1.5773, "step": 1507 }, { "epoch": 0.41235985780694556, "grad_norm": 0.15997721254825592, "learning_rate": 5e-05, "loss": 1.694, "step": 1508 }, { "epoch": 0.41263330598851516, "grad_norm": 0.15315738320350647, "learning_rate": 5e-05, "loss": 1.6413, "step": 1509 }, { "epoch": 0.41290675417008477, "grad_norm": 0.14834025502204895, "learning_rate": 5e-05, "loss": 1.6389, "step": 1510 }, { "epoch": 0.41318020235165437, "grad_norm": 0.15537337958812714, "learning_rate": 5e-05, "loss": 1.6266, "step": 1511 }, { "epoch": 0.41345365053322397, "grad_norm": 0.15986233949661255, "learning_rate": 5e-05, "loss": 1.5657, "step": 1512 }, { "epoch": 0.4137270987147936, "grad_norm": 0.15568973124027252, "learning_rate": 5e-05, "loss": 1.6403, "step": 1513 }, { "epoch": 0.4140005468963631, "grad_norm": 0.16333115100860596, "learning_rate": 5e-05, "loss": 1.6504, "step": 1514 }, { "epoch": 0.4142739950779327, "grad_norm": 0.1546471118927002, "learning_rate": 5e-05, "loss": 1.6104, "step": 1515 }, { "epoch": 0.4145474432595023, "grad_norm": 0.16248218715190887, "learning_rate": 5e-05, "loss": 1.6648, "step": 1516 }, { "epoch": 0.4148208914410719, "grad_norm": 0.16074463725090027, "learning_rate": 5e-05, "loss": 1.6331, "step": 1517 }, { "epoch": 0.41509433962264153, "grad_norm": 0.15557120740413666, "learning_rate": 5e-05, "loss": 1.6255, "step": 1518 }, { "epoch": 0.4153677878042111, "grad_norm": 0.15570150315761566, "learning_rate": 5e-05, "loss": 1.6743, "step": 1519 }, { "epoch": 0.4156412359857807, "grad_norm": 0.17130300402641296, "learning_rate": 5e-05, "loss": 1.6354, "step": 1520 }, { "epoch": 0.4159146841673503, "grad_norm": 0.1627659946680069, "learning_rate": 5e-05, "loss": 1.6975, "step": 1521 }, { "epoch": 0.4161881323489199, "grad_norm": 0.1583612710237503, "learning_rate": 5e-05, "loss": 1.6427, "step": 1522 }, { "epoch": 0.4164615805304895, "grad_norm": 0.15877516567707062, "learning_rate": 5e-05, "loss": 1.6016, "step": 1523 }, { "epoch": 0.4167350287120591, "grad_norm": 0.16064798831939697, "learning_rate": 5e-05, "loss": 1.605, "step": 1524 }, { "epoch": 0.41700847689362863, "grad_norm": 0.16150295734405518, "learning_rate": 5e-05, "loss": 1.7196, "step": 1525 }, { "epoch": 0.41728192507519823, "grad_norm": 0.1557587832212448, "learning_rate": 5e-05, "loss": 1.6024, "step": 1526 }, { "epoch": 0.41755537325676784, "grad_norm": 0.14889568090438843, "learning_rate": 5e-05, "loss": 1.6842, "step": 1527 }, { "epoch": 0.41782882143833744, "grad_norm": 0.15397529304027557, "learning_rate": 5e-05, "loss": 1.6452, "step": 1528 }, { "epoch": 0.41810226961990704, "grad_norm": 0.14965930581092834, "learning_rate": 5e-05, "loss": 1.599, "step": 1529 }, { "epoch": 0.41837571780147664, "grad_norm": 0.14950762689113617, "learning_rate": 5e-05, "loss": 1.6073, "step": 1530 }, { "epoch": 0.4186491659830462, "grad_norm": 0.1583251804113388, "learning_rate": 5e-05, "loss": 1.661, "step": 1531 }, { "epoch": 0.4189226141646158, "grad_norm": 0.1470499336719513, "learning_rate": 5e-05, "loss": 1.4718, "step": 1532 }, { "epoch": 0.4191960623461854, "grad_norm": 0.15115216374397278, "learning_rate": 5e-05, "loss": 1.6577, "step": 1533 }, { "epoch": 0.419469510527755, "grad_norm": 0.1576387733221054, "learning_rate": 5e-05, "loss": 1.6743, "step": 1534 }, { "epoch": 0.4197429587093246, "grad_norm": 0.15287528932094574, "learning_rate": 5e-05, "loss": 1.619, "step": 1535 }, { "epoch": 0.4200164068908942, "grad_norm": 0.15793505311012268, "learning_rate": 5e-05, "loss": 1.604, "step": 1536 }, { "epoch": 0.42028985507246375, "grad_norm": 0.14916017651557922, "learning_rate": 5e-05, "loss": 1.6273, "step": 1537 }, { "epoch": 0.42056330325403335, "grad_norm": 0.17765149474143982, "learning_rate": 5e-05, "loss": 1.7046, "step": 1538 }, { "epoch": 0.42083675143560295, "grad_norm": 0.15169952809810638, "learning_rate": 5e-05, "loss": 1.5879, "step": 1539 }, { "epoch": 0.42111019961717255, "grad_norm": 0.15626677870750427, "learning_rate": 5e-05, "loss": 1.5916, "step": 1540 }, { "epoch": 0.42138364779874216, "grad_norm": 0.17149586975574493, "learning_rate": 5e-05, "loss": 1.5988, "step": 1541 }, { "epoch": 0.42165709598031176, "grad_norm": 0.15350784361362457, "learning_rate": 5e-05, "loss": 1.7237, "step": 1542 }, { "epoch": 0.4219305441618813, "grad_norm": 0.1620538979768753, "learning_rate": 5e-05, "loss": 1.6646, "step": 1543 }, { "epoch": 0.4222039923434509, "grad_norm": 0.19055283069610596, "learning_rate": 5e-05, "loss": 1.6122, "step": 1544 }, { "epoch": 0.4224774405250205, "grad_norm": 0.16404415667057037, "learning_rate": 5e-05, "loss": 1.718, "step": 1545 }, { "epoch": 0.4227508887065901, "grad_norm": 0.18905016779899597, "learning_rate": 5e-05, "loss": 1.6112, "step": 1546 }, { "epoch": 0.4230243368881597, "grad_norm": 0.15663960576057434, "learning_rate": 5e-05, "loss": 1.5915, "step": 1547 }, { "epoch": 0.42329778506972926, "grad_norm": 0.15725626051425934, "learning_rate": 5e-05, "loss": 1.6258, "step": 1548 }, { "epoch": 0.42357123325129886, "grad_norm": 0.14791657030582428, "learning_rate": 5e-05, "loss": 1.6198, "step": 1549 }, { "epoch": 0.42384468143286846, "grad_norm": 0.1585807204246521, "learning_rate": 5e-05, "loss": 1.6669, "step": 1550 }, { "epoch": 0.42411812961443807, "grad_norm": 0.1611957997083664, "learning_rate": 5e-05, "loss": 1.6661, "step": 1551 }, { "epoch": 0.42439157779600767, "grad_norm": 0.15032406151294708, "learning_rate": 5e-05, "loss": 1.6543, "step": 1552 }, { "epoch": 0.42466502597757727, "grad_norm": 0.15738679468631744, "learning_rate": 5e-05, "loss": 1.6384, "step": 1553 }, { "epoch": 0.4249384741591468, "grad_norm": 0.1558157354593277, "learning_rate": 5e-05, "loss": 1.6816, "step": 1554 }, { "epoch": 0.4252119223407164, "grad_norm": 0.15282602608203888, "learning_rate": 5e-05, "loss": 1.6191, "step": 1555 }, { "epoch": 0.425485370522286, "grad_norm": 0.16039276123046875, "learning_rate": 5e-05, "loss": 1.6248, "step": 1556 }, { "epoch": 0.4257588187038556, "grad_norm": 0.1603170931339264, "learning_rate": 5e-05, "loss": 1.6796, "step": 1557 }, { "epoch": 0.4260322668854252, "grad_norm": 0.16182708740234375, "learning_rate": 5e-05, "loss": 1.62, "step": 1558 }, { "epoch": 0.42630571506699483, "grad_norm": 0.15992410480976105, "learning_rate": 5e-05, "loss": 1.5315, "step": 1559 }, { "epoch": 0.4265791632485644, "grad_norm": 0.15438152849674225, "learning_rate": 5e-05, "loss": 1.6707, "step": 1560 }, { "epoch": 0.426852611430134, "grad_norm": 0.1619657427072525, "learning_rate": 5e-05, "loss": 1.6402, "step": 1561 }, { "epoch": 0.4271260596117036, "grad_norm": 0.16311468183994293, "learning_rate": 5e-05, "loss": 1.6292, "step": 1562 }, { "epoch": 0.4273995077932732, "grad_norm": 0.15871946513652802, "learning_rate": 5e-05, "loss": 1.7462, "step": 1563 }, { "epoch": 0.4276729559748428, "grad_norm": 0.17374621331691742, "learning_rate": 5e-05, "loss": 1.6766, "step": 1564 }, { "epoch": 0.4279464041564124, "grad_norm": 0.1653493344783783, "learning_rate": 5e-05, "loss": 1.6076, "step": 1565 }, { "epoch": 0.42821985233798193, "grad_norm": 0.16838444769382477, "learning_rate": 5e-05, "loss": 1.754, "step": 1566 }, { "epoch": 0.42849330051955153, "grad_norm": 0.14829325675964355, "learning_rate": 5e-05, "loss": 1.5052, "step": 1567 }, { "epoch": 0.42876674870112114, "grad_norm": 0.1606733649969101, "learning_rate": 5e-05, "loss": 1.7269, "step": 1568 }, { "epoch": 0.42904019688269074, "grad_norm": 0.16772723197937012, "learning_rate": 5e-05, "loss": 1.7469, "step": 1569 }, { "epoch": 0.42931364506426034, "grad_norm": 0.1608705222606659, "learning_rate": 5e-05, "loss": 1.6371, "step": 1570 }, { "epoch": 0.42958709324582994, "grad_norm": 0.16294115781784058, "learning_rate": 5e-05, "loss": 1.5675, "step": 1571 }, { "epoch": 0.4298605414273995, "grad_norm": 0.16304922103881836, "learning_rate": 5e-05, "loss": 1.7321, "step": 1572 }, { "epoch": 0.4301339896089691, "grad_norm": 0.18761757016181946, "learning_rate": 5e-05, "loss": 1.7382, "step": 1573 }, { "epoch": 0.4304074377905387, "grad_norm": 0.18934416770935059, "learning_rate": 5e-05, "loss": 1.5394, "step": 1574 }, { "epoch": 0.4306808859721083, "grad_norm": 0.16312015056610107, "learning_rate": 5e-05, "loss": 1.6435, "step": 1575 }, { "epoch": 0.4309543341536779, "grad_norm": 0.20754271745681763, "learning_rate": 5e-05, "loss": 1.625, "step": 1576 }, { "epoch": 0.43122778233524744, "grad_norm": 0.19495531916618347, "learning_rate": 5e-05, "loss": 1.6369, "step": 1577 }, { "epoch": 0.43150123051681705, "grad_norm": 0.1650926023721695, "learning_rate": 5e-05, "loss": 1.7474, "step": 1578 }, { "epoch": 0.43177467869838665, "grad_norm": 0.20554234087467194, "learning_rate": 5e-05, "loss": 1.5715, "step": 1579 }, { "epoch": 0.43204812687995625, "grad_norm": 0.18859036266803741, "learning_rate": 5e-05, "loss": 1.6938, "step": 1580 }, { "epoch": 0.43232157506152585, "grad_norm": 0.16272279620170593, "learning_rate": 5e-05, "loss": 1.583, "step": 1581 }, { "epoch": 0.43259502324309546, "grad_norm": 0.20159992575645447, "learning_rate": 5e-05, "loss": 1.594, "step": 1582 }, { "epoch": 0.432868471424665, "grad_norm": 0.16306354105472565, "learning_rate": 5e-05, "loss": 1.6359, "step": 1583 }, { "epoch": 0.4331419196062346, "grad_norm": 0.16837598383426666, "learning_rate": 5e-05, "loss": 1.6118, "step": 1584 }, { "epoch": 0.4334153677878042, "grad_norm": 0.19100947678089142, "learning_rate": 5e-05, "loss": 1.6267, "step": 1585 }, { "epoch": 0.4336888159693738, "grad_norm": 0.15260100364685059, "learning_rate": 5e-05, "loss": 1.5452, "step": 1586 }, { "epoch": 0.4339622641509434, "grad_norm": 0.15616540610790253, "learning_rate": 5e-05, "loss": 1.7126, "step": 1587 }, { "epoch": 0.434235712332513, "grad_norm": 0.16332991421222687, "learning_rate": 5e-05, "loss": 1.6253, "step": 1588 }, { "epoch": 0.43450916051408256, "grad_norm": 0.16623741388320923, "learning_rate": 5e-05, "loss": 1.6323, "step": 1589 }, { "epoch": 0.43478260869565216, "grad_norm": 0.15148966014385223, "learning_rate": 5e-05, "loss": 1.6107, "step": 1590 }, { "epoch": 0.43505605687722176, "grad_norm": 0.1654033362865448, "learning_rate": 5e-05, "loss": 1.5779, "step": 1591 }, { "epoch": 0.43532950505879137, "grad_norm": 0.17548300325870514, "learning_rate": 5e-05, "loss": 1.6037, "step": 1592 }, { "epoch": 0.43560295324036097, "grad_norm": 0.1487036496400833, "learning_rate": 5e-05, "loss": 1.5709, "step": 1593 }, { "epoch": 0.43587640142193057, "grad_norm": 0.17089228332042694, "learning_rate": 5e-05, "loss": 1.6508, "step": 1594 }, { "epoch": 0.4361498496035001, "grad_norm": 0.17811010777950287, "learning_rate": 5e-05, "loss": 1.5717, "step": 1595 }, { "epoch": 0.4364232977850697, "grad_norm": 0.16123969852924347, "learning_rate": 5e-05, "loss": 1.657, "step": 1596 }, { "epoch": 0.4366967459666393, "grad_norm": 0.15267348289489746, "learning_rate": 5e-05, "loss": 1.637, "step": 1597 }, { "epoch": 0.4369701941482089, "grad_norm": 0.19433604180812836, "learning_rate": 5e-05, "loss": 1.5795, "step": 1598 }, { "epoch": 0.4372436423297785, "grad_norm": 0.15938717126846313, "learning_rate": 5e-05, "loss": 1.719, "step": 1599 }, { "epoch": 0.4375170905113481, "grad_norm": 0.16735820472240448, "learning_rate": 5e-05, "loss": 1.7389, "step": 1600 }, { "epoch": 0.4377905386929177, "grad_norm": 0.19536766409873962, "learning_rate": 5e-05, "loss": 1.7317, "step": 1601 }, { "epoch": 0.4380639868744873, "grad_norm": 0.18374434113502502, "learning_rate": 5e-05, "loss": 1.541, "step": 1602 }, { "epoch": 0.4383374350560569, "grad_norm": 0.15462787449359894, "learning_rate": 5e-05, "loss": 1.6388, "step": 1603 }, { "epoch": 0.4386108832376265, "grad_norm": 0.18189293146133423, "learning_rate": 5e-05, "loss": 1.6179, "step": 1604 }, { "epoch": 0.4388843314191961, "grad_norm": 0.17478562891483307, "learning_rate": 5e-05, "loss": 1.6332, "step": 1605 }, { "epoch": 0.43915777960076563, "grad_norm": 0.16604724526405334, "learning_rate": 5e-05, "loss": 1.6758, "step": 1606 }, { "epoch": 0.43943122778233523, "grad_norm": 0.14763274788856506, "learning_rate": 5e-05, "loss": 1.5179, "step": 1607 }, { "epoch": 0.43970467596390483, "grad_norm": 0.18372495472431183, "learning_rate": 5e-05, "loss": 1.6489, "step": 1608 }, { "epoch": 0.43997812414547444, "grad_norm": 0.19224227964878082, "learning_rate": 5e-05, "loss": 1.6223, "step": 1609 }, { "epoch": 0.44025157232704404, "grad_norm": 0.1518845111131668, "learning_rate": 5e-05, "loss": 1.6314, "step": 1610 }, { "epoch": 0.44052502050861364, "grad_norm": 0.18382224440574646, "learning_rate": 5e-05, "loss": 1.6356, "step": 1611 }, { "epoch": 0.4407984686901832, "grad_norm": 0.18745778501033783, "learning_rate": 5e-05, "loss": 1.6189, "step": 1612 }, { "epoch": 0.4410719168717528, "grad_norm": 0.1606750637292862, "learning_rate": 5e-05, "loss": 1.7485, "step": 1613 }, { "epoch": 0.4413453650533224, "grad_norm": 0.176387757062912, "learning_rate": 5e-05, "loss": 1.6141, "step": 1614 }, { "epoch": 0.441618813234892, "grad_norm": 0.17925933003425598, "learning_rate": 5e-05, "loss": 1.6457, "step": 1615 }, { "epoch": 0.4418922614164616, "grad_norm": 0.15073414146900177, "learning_rate": 5e-05, "loss": 1.5818, "step": 1616 }, { "epoch": 0.4421657095980312, "grad_norm": 0.18990382552146912, "learning_rate": 5e-05, "loss": 1.6838, "step": 1617 }, { "epoch": 0.44243915777960074, "grad_norm": 0.1909227818250656, "learning_rate": 5e-05, "loss": 1.7176, "step": 1618 }, { "epoch": 0.44271260596117035, "grad_norm": 0.14983290433883667, "learning_rate": 5e-05, "loss": 1.5743, "step": 1619 }, { "epoch": 0.44298605414273995, "grad_norm": 0.18672508001327515, "learning_rate": 5e-05, "loss": 1.7437, "step": 1620 }, { "epoch": 0.44325950232430955, "grad_norm": 0.1633402407169342, "learning_rate": 5e-05, "loss": 1.6302, "step": 1621 }, { "epoch": 0.44353295050587915, "grad_norm": 0.15954262018203735, "learning_rate": 5e-05, "loss": 1.6534, "step": 1622 }, { "epoch": 0.44380639868744876, "grad_norm": 0.17406605184078217, "learning_rate": 5e-05, "loss": 1.6408, "step": 1623 }, { "epoch": 0.4440798468690183, "grad_norm": 0.15827330946922302, "learning_rate": 5e-05, "loss": 1.7256, "step": 1624 }, { "epoch": 0.4443532950505879, "grad_norm": 0.1576947122812271, "learning_rate": 5e-05, "loss": 1.6882, "step": 1625 }, { "epoch": 0.4446267432321575, "grad_norm": 0.161362424492836, "learning_rate": 5e-05, "loss": 1.5669, "step": 1626 }, { "epoch": 0.4449001914137271, "grad_norm": 0.15085799992084503, "learning_rate": 5e-05, "loss": 1.6761, "step": 1627 }, { "epoch": 0.4451736395952967, "grad_norm": 0.14607954025268555, "learning_rate": 5e-05, "loss": 1.5689, "step": 1628 }, { "epoch": 0.44544708777686626, "grad_norm": 0.149903804063797, "learning_rate": 5e-05, "loss": 1.6317, "step": 1629 }, { "epoch": 0.44572053595843586, "grad_norm": 0.15304753184318542, "learning_rate": 5e-05, "loss": 1.6986, "step": 1630 }, { "epoch": 0.44599398414000546, "grad_norm": 0.15582279860973358, "learning_rate": 5e-05, "loss": 1.6101, "step": 1631 }, { "epoch": 0.44626743232157506, "grad_norm": 0.15496088564395905, "learning_rate": 5e-05, "loss": 1.6459, "step": 1632 }, { "epoch": 0.44654088050314467, "grad_norm": 0.1606382578611374, "learning_rate": 5e-05, "loss": 1.6516, "step": 1633 }, { "epoch": 0.44681432868471427, "grad_norm": 0.16134901344776154, "learning_rate": 5e-05, "loss": 1.7784, "step": 1634 }, { "epoch": 0.4470877768662838, "grad_norm": 0.16512495279312134, "learning_rate": 5e-05, "loss": 1.6066, "step": 1635 }, { "epoch": 0.4473612250478534, "grad_norm": 0.15310251712799072, "learning_rate": 5e-05, "loss": 1.6425, "step": 1636 }, { "epoch": 0.447634673229423, "grad_norm": 0.1573248952627182, "learning_rate": 5e-05, "loss": 1.6168, "step": 1637 }, { "epoch": 0.4479081214109926, "grad_norm": 0.15116660296916962, "learning_rate": 5e-05, "loss": 1.5542, "step": 1638 }, { "epoch": 0.4481815695925622, "grad_norm": 0.1584998220205307, "learning_rate": 5e-05, "loss": 1.6503, "step": 1639 }, { "epoch": 0.4484550177741318, "grad_norm": 0.15654048323631287, "learning_rate": 5e-05, "loss": 1.6022, "step": 1640 }, { "epoch": 0.44872846595570137, "grad_norm": 0.1555994600057602, "learning_rate": 5e-05, "loss": 1.5843, "step": 1641 }, { "epoch": 0.449001914137271, "grad_norm": 0.1540311872959137, "learning_rate": 5e-05, "loss": 1.6218, "step": 1642 }, { "epoch": 0.4492753623188406, "grad_norm": 0.16408203542232513, "learning_rate": 5e-05, "loss": 1.6489, "step": 1643 }, { "epoch": 0.4495488105004102, "grad_norm": 0.15430676937103271, "learning_rate": 5e-05, "loss": 1.6169, "step": 1644 }, { "epoch": 0.4498222586819798, "grad_norm": 0.15662290155887604, "learning_rate": 5e-05, "loss": 1.7067, "step": 1645 }, { "epoch": 0.4500957068635494, "grad_norm": 0.16638533771038055, "learning_rate": 5e-05, "loss": 1.6786, "step": 1646 }, { "epoch": 0.45036915504511893, "grad_norm": 0.15840692818164825, "learning_rate": 5e-05, "loss": 1.5811, "step": 1647 }, { "epoch": 0.45064260322668853, "grad_norm": 0.15642379224300385, "learning_rate": 5e-05, "loss": 1.6151, "step": 1648 }, { "epoch": 0.45091605140825813, "grad_norm": 0.15364129841327667, "learning_rate": 5e-05, "loss": 1.5745, "step": 1649 }, { "epoch": 0.45118949958982774, "grad_norm": 0.16230598092079163, "learning_rate": 5e-05, "loss": 1.5954, "step": 1650 }, { "epoch": 0.45146294777139734, "grad_norm": 0.16471756994724274, "learning_rate": 5e-05, "loss": 1.6509, "step": 1651 }, { "epoch": 0.45173639595296694, "grad_norm": 0.16866976022720337, "learning_rate": 5e-05, "loss": 1.7927, "step": 1652 }, { "epoch": 0.4520098441345365, "grad_norm": 0.16828210651874542, "learning_rate": 5e-05, "loss": 1.7027, "step": 1653 }, { "epoch": 0.4522832923161061, "grad_norm": 0.15160506963729858, "learning_rate": 5e-05, "loss": 1.6926, "step": 1654 }, { "epoch": 0.4525567404976757, "grad_norm": 0.1498889923095703, "learning_rate": 5e-05, "loss": 1.6022, "step": 1655 }, { "epoch": 0.4528301886792453, "grad_norm": 0.16272372007369995, "learning_rate": 5e-05, "loss": 1.6532, "step": 1656 }, { "epoch": 0.4531036368608149, "grad_norm": 0.16485629975795746, "learning_rate": 5e-05, "loss": 1.6615, "step": 1657 }, { "epoch": 0.45337708504238444, "grad_norm": 0.15402807295322418, "learning_rate": 5e-05, "loss": 1.6162, "step": 1658 }, { "epoch": 0.45365053322395404, "grad_norm": 0.14957159757614136, "learning_rate": 5e-05, "loss": 1.646, "step": 1659 }, { "epoch": 0.45392398140552365, "grad_norm": 0.15733934938907623, "learning_rate": 5e-05, "loss": 1.5475, "step": 1660 }, { "epoch": 0.45419742958709325, "grad_norm": 0.16374224424362183, "learning_rate": 5e-05, "loss": 1.6937, "step": 1661 }, { "epoch": 0.45447087776866285, "grad_norm": 0.1516236960887909, "learning_rate": 5e-05, "loss": 1.5963, "step": 1662 }, { "epoch": 0.45474432595023245, "grad_norm": 0.1528540402650833, "learning_rate": 5e-05, "loss": 1.5636, "step": 1663 }, { "epoch": 0.455017774131802, "grad_norm": 0.1831309050321579, "learning_rate": 5e-05, "loss": 1.7391, "step": 1664 }, { "epoch": 0.4552912223133716, "grad_norm": 0.1571461260318756, "learning_rate": 5e-05, "loss": 1.56, "step": 1665 }, { "epoch": 0.4555646704949412, "grad_norm": 0.17489275336265564, "learning_rate": 5e-05, "loss": 1.7431, "step": 1666 }, { "epoch": 0.4558381186765108, "grad_norm": 0.16362959146499634, "learning_rate": 5e-05, "loss": 1.6041, "step": 1667 }, { "epoch": 0.4561115668580804, "grad_norm": 0.1590225100517273, "learning_rate": 5e-05, "loss": 1.7021, "step": 1668 }, { "epoch": 0.45638501503965, "grad_norm": 0.1953715831041336, "learning_rate": 5e-05, "loss": 1.7408, "step": 1669 }, { "epoch": 0.45665846322121956, "grad_norm": 0.1608944684267044, "learning_rate": 5e-05, "loss": 1.6262, "step": 1670 }, { "epoch": 0.45693191140278916, "grad_norm": 0.16777822375297546, "learning_rate": 5e-05, "loss": 1.6342, "step": 1671 }, { "epoch": 0.45720535958435876, "grad_norm": 0.16327768564224243, "learning_rate": 5e-05, "loss": 1.6965, "step": 1672 }, { "epoch": 0.45747880776592836, "grad_norm": 0.15659572184085846, "learning_rate": 5e-05, "loss": 1.6095, "step": 1673 }, { "epoch": 0.45775225594749797, "grad_norm": 0.17218998074531555, "learning_rate": 5e-05, "loss": 1.6831, "step": 1674 }, { "epoch": 0.45802570412906757, "grad_norm": 0.1536104679107666, "learning_rate": 5e-05, "loss": 1.5643, "step": 1675 }, { "epoch": 0.4582991523106371, "grad_norm": 0.15404142439365387, "learning_rate": 5e-05, "loss": 1.5892, "step": 1676 }, { "epoch": 0.4585726004922067, "grad_norm": 0.15226425230503082, "learning_rate": 5e-05, "loss": 1.582, "step": 1677 }, { "epoch": 0.4588460486737763, "grad_norm": 0.1722135990858078, "learning_rate": 5e-05, "loss": 1.6726, "step": 1678 }, { "epoch": 0.4591194968553459, "grad_norm": 0.14889506995677948, "learning_rate": 5e-05, "loss": 1.5599, "step": 1679 }, { "epoch": 0.4593929450369155, "grad_norm": 0.15106317400932312, "learning_rate": 5e-05, "loss": 1.6128, "step": 1680 }, { "epoch": 0.45966639321848507, "grad_norm": 0.1514967978000641, "learning_rate": 5e-05, "loss": 1.5257, "step": 1681 }, { "epoch": 0.45993984140005467, "grad_norm": 0.1474781632423401, "learning_rate": 5e-05, "loss": 1.5199, "step": 1682 }, { "epoch": 0.4602132895816243, "grad_norm": 0.15649180114269257, "learning_rate": 5e-05, "loss": 1.7461, "step": 1683 }, { "epoch": 0.4604867377631939, "grad_norm": 0.15975254774093628, "learning_rate": 5e-05, "loss": 1.6344, "step": 1684 }, { "epoch": 0.4607601859447635, "grad_norm": 0.15253420174121857, "learning_rate": 5e-05, "loss": 1.5446, "step": 1685 }, { "epoch": 0.4610336341263331, "grad_norm": 0.16303133964538574, "learning_rate": 5e-05, "loss": 1.6806, "step": 1686 }, { "epoch": 0.4613070823079026, "grad_norm": 0.15411800146102905, "learning_rate": 5e-05, "loss": 1.6656, "step": 1687 }, { "epoch": 0.46158053048947223, "grad_norm": 0.15066716074943542, "learning_rate": 5e-05, "loss": 1.6403, "step": 1688 }, { "epoch": 0.46185397867104183, "grad_norm": 0.15379618108272552, "learning_rate": 5e-05, "loss": 1.5969, "step": 1689 }, { "epoch": 0.46212742685261143, "grad_norm": 0.15352702140808105, "learning_rate": 5e-05, "loss": 1.6403, "step": 1690 }, { "epoch": 0.46240087503418104, "grad_norm": 0.158903107047081, "learning_rate": 5e-05, "loss": 1.6892, "step": 1691 }, { "epoch": 0.46267432321575064, "grad_norm": 0.15684601664543152, "learning_rate": 5e-05, "loss": 1.6744, "step": 1692 }, { "epoch": 0.4629477713973202, "grad_norm": 0.1492249220609665, "learning_rate": 5e-05, "loss": 1.613, "step": 1693 }, { "epoch": 0.4632212195788898, "grad_norm": 0.1778024435043335, "learning_rate": 5e-05, "loss": 1.7122, "step": 1694 }, { "epoch": 0.4634946677604594, "grad_norm": 0.16280174255371094, "learning_rate": 5e-05, "loss": 1.6553, "step": 1695 }, { "epoch": 0.463768115942029, "grad_norm": 0.1631561666727066, "learning_rate": 5e-05, "loss": 1.646, "step": 1696 }, { "epoch": 0.4640415641235986, "grad_norm": 0.1574755609035492, "learning_rate": 5e-05, "loss": 1.7003, "step": 1697 }, { "epoch": 0.4643150123051682, "grad_norm": 0.15220339596271515, "learning_rate": 5e-05, "loss": 1.5751, "step": 1698 }, { "epoch": 0.46458846048673774, "grad_norm": 0.14914649724960327, "learning_rate": 5e-05, "loss": 1.498, "step": 1699 }, { "epoch": 0.46486190866830734, "grad_norm": 0.19843582808971405, "learning_rate": 5e-05, "loss": 1.7326, "step": 1700 }, { "epoch": 0.46513535684987695, "grad_norm": 0.1573631912469864, "learning_rate": 5e-05, "loss": 1.6891, "step": 1701 }, { "epoch": 0.46540880503144655, "grad_norm": 0.16770999133586884, "learning_rate": 5e-05, "loss": 1.6852, "step": 1702 }, { "epoch": 0.46568225321301615, "grad_norm": 0.17956487834453583, "learning_rate": 5e-05, "loss": 1.6751, "step": 1703 }, { "epoch": 0.46595570139458575, "grad_norm": 0.1517421156167984, "learning_rate": 5e-05, "loss": 1.6928, "step": 1704 }, { "epoch": 0.4662291495761553, "grad_norm": 0.164058655500412, "learning_rate": 5e-05, "loss": 1.6313, "step": 1705 }, { "epoch": 0.4665025977577249, "grad_norm": 0.16080081462860107, "learning_rate": 5e-05, "loss": 1.5901, "step": 1706 }, { "epoch": 0.4667760459392945, "grad_norm": 0.149660125374794, "learning_rate": 5e-05, "loss": 1.6411, "step": 1707 }, { "epoch": 0.4670494941208641, "grad_norm": 0.1580061912536621, "learning_rate": 5e-05, "loss": 1.6815, "step": 1708 }, { "epoch": 0.4673229423024337, "grad_norm": 0.16102102398872375, "learning_rate": 5e-05, "loss": 1.6063, "step": 1709 }, { "epoch": 0.46759639048400325, "grad_norm": 0.16004008054733276, "learning_rate": 5e-05, "loss": 1.5849, "step": 1710 }, { "epoch": 0.46786983866557286, "grad_norm": 0.14842753112316132, "learning_rate": 5e-05, "loss": 1.6713, "step": 1711 }, { "epoch": 0.46814328684714246, "grad_norm": 0.17115214467048645, "learning_rate": 5e-05, "loss": 1.7549, "step": 1712 }, { "epoch": 0.46841673502871206, "grad_norm": 0.16055969893932343, "learning_rate": 5e-05, "loss": 1.7193, "step": 1713 }, { "epoch": 0.46869018321028166, "grad_norm": 0.14992430806159973, "learning_rate": 5e-05, "loss": 1.6529, "step": 1714 }, { "epoch": 0.46896363139185127, "grad_norm": 0.1594884842634201, "learning_rate": 5e-05, "loss": 1.5389, "step": 1715 }, { "epoch": 0.4692370795734208, "grad_norm": 0.15089979767799377, "learning_rate": 5e-05, "loss": 1.5729, "step": 1716 }, { "epoch": 0.4695105277549904, "grad_norm": 0.15678352117538452, "learning_rate": 5e-05, "loss": 1.6118, "step": 1717 }, { "epoch": 0.46978397593656, "grad_norm": 0.15891622006893158, "learning_rate": 5e-05, "loss": 1.6665, "step": 1718 }, { "epoch": 0.4700574241181296, "grad_norm": 0.15619421005249023, "learning_rate": 5e-05, "loss": 1.657, "step": 1719 }, { "epoch": 0.4703308722996992, "grad_norm": 0.15846212208271027, "learning_rate": 5e-05, "loss": 1.649, "step": 1720 }, { "epoch": 0.4706043204812688, "grad_norm": 0.1617954969406128, "learning_rate": 5e-05, "loss": 1.665, "step": 1721 }, { "epoch": 0.47087776866283837, "grad_norm": 0.15690676867961884, "learning_rate": 5e-05, "loss": 1.5683, "step": 1722 }, { "epoch": 0.47115121684440797, "grad_norm": 0.15898874402046204, "learning_rate": 5e-05, "loss": 1.5895, "step": 1723 }, { "epoch": 0.4714246650259776, "grad_norm": 0.1485942155122757, "learning_rate": 5e-05, "loss": 1.5482, "step": 1724 }, { "epoch": 0.4716981132075472, "grad_norm": 0.15413321554660797, "learning_rate": 5e-05, "loss": 1.6355, "step": 1725 }, { "epoch": 0.4719715613891168, "grad_norm": 0.16040156781673431, "learning_rate": 5e-05, "loss": 1.6984, "step": 1726 }, { "epoch": 0.4722450095706864, "grad_norm": 0.1513047069311142, "learning_rate": 5e-05, "loss": 1.5586, "step": 1727 }, { "epoch": 0.4725184577522559, "grad_norm": 0.1601109653711319, "learning_rate": 5e-05, "loss": 1.7306, "step": 1728 }, { "epoch": 0.47279190593382553, "grad_norm": 0.15264980494976044, "learning_rate": 5e-05, "loss": 1.6797, "step": 1729 }, { "epoch": 0.47306535411539513, "grad_norm": 0.15382279455661774, "learning_rate": 5e-05, "loss": 1.7204, "step": 1730 }, { "epoch": 0.47333880229696473, "grad_norm": 0.15572312474250793, "learning_rate": 5e-05, "loss": 1.5855, "step": 1731 }, { "epoch": 0.47361225047853434, "grad_norm": 0.15744370222091675, "learning_rate": 5e-05, "loss": 1.6487, "step": 1732 }, { "epoch": 0.47388569866010394, "grad_norm": 0.1619214117527008, "learning_rate": 5e-05, "loss": 1.6556, "step": 1733 }, { "epoch": 0.4741591468416735, "grad_norm": 0.1546183079481125, "learning_rate": 5e-05, "loss": 1.585, "step": 1734 }, { "epoch": 0.4744325950232431, "grad_norm": 0.1744302213191986, "learning_rate": 5e-05, "loss": 1.7555, "step": 1735 }, { "epoch": 0.4747060432048127, "grad_norm": 0.16364151239395142, "learning_rate": 5e-05, "loss": 1.6622, "step": 1736 }, { "epoch": 0.4749794913863823, "grad_norm": 0.14857520163059235, "learning_rate": 5e-05, "loss": 1.5803, "step": 1737 }, { "epoch": 0.4752529395679519, "grad_norm": 0.1706264317035675, "learning_rate": 5e-05, "loss": 1.6314, "step": 1738 }, { "epoch": 0.47552638774952144, "grad_norm": 0.15458574891090393, "learning_rate": 5e-05, "loss": 1.5577, "step": 1739 }, { "epoch": 0.47579983593109104, "grad_norm": 0.1472868025302887, "learning_rate": 5e-05, "loss": 1.5581, "step": 1740 }, { "epoch": 0.47607328411266064, "grad_norm": 0.16088488698005676, "learning_rate": 5e-05, "loss": 1.6711, "step": 1741 }, { "epoch": 0.47634673229423025, "grad_norm": 0.16059233248233795, "learning_rate": 5e-05, "loss": 1.6239, "step": 1742 }, { "epoch": 0.47662018047579985, "grad_norm": 0.15403006970882416, "learning_rate": 5e-05, "loss": 1.5916, "step": 1743 }, { "epoch": 0.47689362865736945, "grad_norm": 0.15987733006477356, "learning_rate": 5e-05, "loss": 1.5877, "step": 1744 }, { "epoch": 0.477167076838939, "grad_norm": 0.1711868941783905, "learning_rate": 5e-05, "loss": 1.6065, "step": 1745 }, { "epoch": 0.4774405250205086, "grad_norm": 0.16569697856903076, "learning_rate": 5e-05, "loss": 1.6181, "step": 1746 }, { "epoch": 0.4777139732020782, "grad_norm": 0.15485012531280518, "learning_rate": 5e-05, "loss": 1.6009, "step": 1747 }, { "epoch": 0.4779874213836478, "grad_norm": 0.16187280416488647, "learning_rate": 5e-05, "loss": 1.5976, "step": 1748 }, { "epoch": 0.4782608695652174, "grad_norm": 0.1522330790758133, "learning_rate": 5e-05, "loss": 1.7015, "step": 1749 }, { "epoch": 0.478534317746787, "grad_norm": 0.1496533900499344, "learning_rate": 5e-05, "loss": 1.6114, "step": 1750 }, { "epoch": 0.47880776592835655, "grad_norm": 0.16666877269744873, "learning_rate": 5e-05, "loss": 1.6728, "step": 1751 }, { "epoch": 0.47908121410992616, "grad_norm": 0.1623934507369995, "learning_rate": 5e-05, "loss": 1.5624, "step": 1752 }, { "epoch": 0.47935466229149576, "grad_norm": 0.1671043336391449, "learning_rate": 5e-05, "loss": 1.6066, "step": 1753 }, { "epoch": 0.47962811047306536, "grad_norm": 0.16084609925746918, "learning_rate": 5e-05, "loss": 1.5419, "step": 1754 }, { "epoch": 0.47990155865463496, "grad_norm": 0.18174776434898376, "learning_rate": 5e-05, "loss": 1.6554, "step": 1755 }, { "epoch": 0.48017500683620457, "grad_norm": 0.15653514862060547, "learning_rate": 5e-05, "loss": 1.5938, "step": 1756 }, { "epoch": 0.4804484550177741, "grad_norm": 0.16829371452331543, "learning_rate": 5e-05, "loss": 1.6566, "step": 1757 }, { "epoch": 0.4807219031993437, "grad_norm": 0.16628386080265045, "learning_rate": 5e-05, "loss": 1.6325, "step": 1758 }, { "epoch": 0.4809953513809133, "grad_norm": 0.15540596842765808, "learning_rate": 5e-05, "loss": 1.5648, "step": 1759 }, { "epoch": 0.4812687995624829, "grad_norm": 0.18352627754211426, "learning_rate": 5e-05, "loss": 1.7056, "step": 1760 }, { "epoch": 0.4815422477440525, "grad_norm": 0.16480763256549835, "learning_rate": 5e-05, "loss": 1.6272, "step": 1761 }, { "epoch": 0.48181569592562207, "grad_norm": 0.16389521956443787, "learning_rate": 5e-05, "loss": 1.7246, "step": 1762 }, { "epoch": 0.48208914410719167, "grad_norm": 0.14809884130954742, "learning_rate": 5e-05, "loss": 1.5785, "step": 1763 }, { "epoch": 0.48236259228876127, "grad_norm": 0.1639075130224228, "learning_rate": 5e-05, "loss": 1.5561, "step": 1764 }, { "epoch": 0.4826360404703309, "grad_norm": 0.17026962339878082, "learning_rate": 5e-05, "loss": 1.599, "step": 1765 }, { "epoch": 0.4829094886519005, "grad_norm": 0.16182227432727814, "learning_rate": 5e-05, "loss": 1.5801, "step": 1766 }, { "epoch": 0.4831829368334701, "grad_norm": 0.17417606711387634, "learning_rate": 5e-05, "loss": 1.5894, "step": 1767 }, { "epoch": 0.4834563850150396, "grad_norm": 0.1606941670179367, "learning_rate": 5e-05, "loss": 1.6389, "step": 1768 }, { "epoch": 0.4837298331966092, "grad_norm": 0.152814581990242, "learning_rate": 5e-05, "loss": 1.6011, "step": 1769 }, { "epoch": 0.48400328137817883, "grad_norm": 0.16164222359657288, "learning_rate": 5e-05, "loss": 1.5673, "step": 1770 }, { "epoch": 0.48427672955974843, "grad_norm": 0.16344451904296875, "learning_rate": 5e-05, "loss": 1.6209, "step": 1771 }, { "epoch": 0.48455017774131803, "grad_norm": 0.1603250950574875, "learning_rate": 5e-05, "loss": 1.6105, "step": 1772 }, { "epoch": 0.48482362592288764, "grad_norm": 0.16991026699543, "learning_rate": 5e-05, "loss": 1.6723, "step": 1773 }, { "epoch": 0.4850970741044572, "grad_norm": 0.1601463109254837, "learning_rate": 5e-05, "loss": 1.5577, "step": 1774 }, { "epoch": 0.4853705222860268, "grad_norm": 0.16674864292144775, "learning_rate": 5e-05, "loss": 1.6598, "step": 1775 }, { "epoch": 0.4856439704675964, "grad_norm": 0.16150332987308502, "learning_rate": 5e-05, "loss": 1.6948, "step": 1776 }, { "epoch": 0.485917418649166, "grad_norm": 0.16552074253559113, "learning_rate": 5e-05, "loss": 1.6746, "step": 1777 }, { "epoch": 0.4861908668307356, "grad_norm": 0.1552310287952423, "learning_rate": 5e-05, "loss": 1.5961, "step": 1778 }, { "epoch": 0.4864643150123052, "grad_norm": 0.16018088161945343, "learning_rate": 5e-05, "loss": 1.6024, "step": 1779 }, { "epoch": 0.48673776319387474, "grad_norm": 0.16856145858764648, "learning_rate": 5e-05, "loss": 1.6187, "step": 1780 }, { "epoch": 0.48701121137544434, "grad_norm": 0.16507881879806519, "learning_rate": 5e-05, "loss": 1.6644, "step": 1781 }, { "epoch": 0.48728465955701394, "grad_norm": 0.1524297147989273, "learning_rate": 5e-05, "loss": 1.6013, "step": 1782 }, { "epoch": 0.48755810773858355, "grad_norm": 0.16545777022838593, "learning_rate": 5e-05, "loss": 1.6813, "step": 1783 }, { "epoch": 0.48783155592015315, "grad_norm": 0.17331644892692566, "learning_rate": 5e-05, "loss": 1.6797, "step": 1784 }, { "epoch": 0.48810500410172275, "grad_norm": 0.16021324694156647, "learning_rate": 5e-05, "loss": 1.5828, "step": 1785 }, { "epoch": 0.4883784522832923, "grad_norm": 0.15434862673282623, "learning_rate": 5e-05, "loss": 1.5449, "step": 1786 }, { "epoch": 0.4886519004648619, "grad_norm": 0.15682034194469452, "learning_rate": 5e-05, "loss": 1.6503, "step": 1787 }, { "epoch": 0.4889253486464315, "grad_norm": 0.17640839517116547, "learning_rate": 5e-05, "loss": 1.7292, "step": 1788 }, { "epoch": 0.4891987968280011, "grad_norm": 0.16206398606300354, "learning_rate": 5e-05, "loss": 1.5844, "step": 1789 }, { "epoch": 0.4894722450095707, "grad_norm": 0.15871930122375488, "learning_rate": 5e-05, "loss": 1.685, "step": 1790 }, { "epoch": 0.48974569319114025, "grad_norm": 0.16885024309158325, "learning_rate": 5e-05, "loss": 1.7065, "step": 1791 }, { "epoch": 0.49001914137270985, "grad_norm": 0.16324368119239807, "learning_rate": 5e-05, "loss": 1.6381, "step": 1792 }, { "epoch": 0.49029258955427946, "grad_norm": 0.15525028109550476, "learning_rate": 5e-05, "loss": 1.6529, "step": 1793 }, { "epoch": 0.49056603773584906, "grad_norm": 0.15970031917095184, "learning_rate": 5e-05, "loss": 1.751, "step": 1794 }, { "epoch": 0.49083948591741866, "grad_norm": 0.16955524682998657, "learning_rate": 5e-05, "loss": 1.7307, "step": 1795 }, { "epoch": 0.49111293409898826, "grad_norm": 0.153351828455925, "learning_rate": 5e-05, "loss": 1.5379, "step": 1796 }, { "epoch": 0.4913863822805578, "grad_norm": 0.16103506088256836, "learning_rate": 5e-05, "loss": 1.6945, "step": 1797 }, { "epoch": 0.4916598304621274, "grad_norm": 0.17233219742774963, "learning_rate": 5e-05, "loss": 1.6656, "step": 1798 }, { "epoch": 0.491933278643697, "grad_norm": 0.15539239346981049, "learning_rate": 5e-05, "loss": 1.6, "step": 1799 }, { "epoch": 0.4922067268252666, "grad_norm": 0.1534857302904129, "learning_rate": 5e-05, "loss": 1.5498, "step": 1800 }, { "epoch": 0.4924801750068362, "grad_norm": 0.18539227545261383, "learning_rate": 5e-05, "loss": 1.5413, "step": 1801 }, { "epoch": 0.4927536231884058, "grad_norm": 0.15599320828914642, "learning_rate": 5e-05, "loss": 1.7108, "step": 1802 }, { "epoch": 0.49302707136997537, "grad_norm": 0.15700851380825043, "learning_rate": 5e-05, "loss": 1.6198, "step": 1803 }, { "epoch": 0.49330051955154497, "grad_norm": 0.15811684727668762, "learning_rate": 5e-05, "loss": 1.5085, "step": 1804 }, { "epoch": 0.49357396773311457, "grad_norm": 0.15955877304077148, "learning_rate": 5e-05, "loss": 1.5673, "step": 1805 }, { "epoch": 0.4938474159146842, "grad_norm": 0.15809409320354462, "learning_rate": 5e-05, "loss": 1.6075, "step": 1806 }, { "epoch": 0.4941208640962538, "grad_norm": 0.17937301099300385, "learning_rate": 5e-05, "loss": 1.5832, "step": 1807 }, { "epoch": 0.4943943122778234, "grad_norm": 0.16430814564228058, "learning_rate": 5e-05, "loss": 1.6945, "step": 1808 }, { "epoch": 0.4946677604593929, "grad_norm": 0.1564474105834961, "learning_rate": 5e-05, "loss": 1.6259, "step": 1809 }, { "epoch": 0.4949412086409625, "grad_norm": 0.17254872620105743, "learning_rate": 5e-05, "loss": 1.7017, "step": 1810 }, { "epoch": 0.49521465682253213, "grad_norm": 0.16017508506774902, "learning_rate": 5e-05, "loss": 1.6124, "step": 1811 }, { "epoch": 0.49548810500410173, "grad_norm": 0.15552417933940887, "learning_rate": 5e-05, "loss": 1.5943, "step": 1812 }, { "epoch": 0.49576155318567133, "grad_norm": 0.14992982149124146, "learning_rate": 5e-05, "loss": 1.5989, "step": 1813 }, { "epoch": 0.49603500136724094, "grad_norm": 0.14720216393470764, "learning_rate": 5e-05, "loss": 1.4911, "step": 1814 }, { "epoch": 0.4963084495488105, "grad_norm": 0.15855912864208221, "learning_rate": 5e-05, "loss": 1.5896, "step": 1815 }, { "epoch": 0.4965818977303801, "grad_norm": 0.1551193743944168, "learning_rate": 5e-05, "loss": 1.58, "step": 1816 }, { "epoch": 0.4968553459119497, "grad_norm": 0.15198324620723724, "learning_rate": 5e-05, "loss": 1.6181, "step": 1817 }, { "epoch": 0.4971287940935193, "grad_norm": 0.1556388884782791, "learning_rate": 5e-05, "loss": 1.6119, "step": 1818 }, { "epoch": 0.4974022422750889, "grad_norm": 0.1501542180776596, "learning_rate": 5e-05, "loss": 1.5566, "step": 1819 }, { "epoch": 0.49767569045665844, "grad_norm": 0.1648813784122467, "learning_rate": 5e-05, "loss": 1.6296, "step": 1820 }, { "epoch": 0.49794913863822804, "grad_norm": 0.15707071125507355, "learning_rate": 5e-05, "loss": 1.6604, "step": 1821 }, { "epoch": 0.49822258681979764, "grad_norm": 0.1539454311132431, "learning_rate": 5e-05, "loss": 1.6051, "step": 1822 }, { "epoch": 0.49849603500136724, "grad_norm": 0.15157034993171692, "learning_rate": 5e-05, "loss": 1.6315, "step": 1823 }, { "epoch": 0.49876948318293685, "grad_norm": 0.15487664937973022, "learning_rate": 5e-05, "loss": 1.676, "step": 1824 }, { "epoch": 0.49904293136450645, "grad_norm": 0.15155303478240967, "learning_rate": 5e-05, "loss": 1.5685, "step": 1825 }, { "epoch": 0.499316379546076, "grad_norm": 0.14079029858112335, "learning_rate": 5e-05, "loss": 1.5199, "step": 1826 }, { "epoch": 0.4995898277276456, "grad_norm": 0.15700970590114594, "learning_rate": 5e-05, "loss": 1.6312, "step": 1827 }, { "epoch": 0.4998632759092152, "grad_norm": 0.1630944162607193, "learning_rate": 5e-05, "loss": 1.7161, "step": 1828 }, { "epoch": 0.5001367240907848, "grad_norm": 0.15556710958480835, "learning_rate": 5e-05, "loss": 1.6417, "step": 1829 }, { "epoch": 0.5004101722723544, "grad_norm": 0.15315458178520203, "learning_rate": 5e-05, "loss": 1.6343, "step": 1830 }, { "epoch": 0.500683620453924, "grad_norm": 0.15903054177761078, "learning_rate": 5e-05, "loss": 1.6433, "step": 1831 }, { "epoch": 0.5009570686354936, "grad_norm": 0.16064853966236115, "learning_rate": 5e-05, "loss": 1.6739, "step": 1832 }, { "epoch": 0.5012305168170632, "grad_norm": 0.14736616611480713, "learning_rate": 5e-05, "loss": 1.547, "step": 1833 }, { "epoch": 0.5015039649986328, "grad_norm": 0.17235925793647766, "learning_rate": 5e-05, "loss": 1.5768, "step": 1834 }, { "epoch": 0.5017774131802023, "grad_norm": 0.15816731750965118, "learning_rate": 5e-05, "loss": 1.6226, "step": 1835 }, { "epoch": 0.5020508613617719, "grad_norm": 0.15823869407176971, "learning_rate": 5e-05, "loss": 1.6582, "step": 1836 }, { "epoch": 0.5023243095433415, "grad_norm": 0.14915357530117035, "learning_rate": 5e-05, "loss": 1.5551, "step": 1837 }, { "epoch": 0.5025977577249111, "grad_norm": 0.15896442532539368, "learning_rate": 5e-05, "loss": 1.6161, "step": 1838 }, { "epoch": 0.5028712059064807, "grad_norm": 0.15622593462467194, "learning_rate": 5e-05, "loss": 1.5816, "step": 1839 }, { "epoch": 0.5031446540880503, "grad_norm": 0.16117733716964722, "learning_rate": 5e-05, "loss": 1.6266, "step": 1840 }, { "epoch": 0.5034181022696199, "grad_norm": 0.15878431499004364, "learning_rate": 5e-05, "loss": 1.6835, "step": 1841 }, { "epoch": 0.5036915504511895, "grad_norm": 0.15273039042949677, "learning_rate": 5e-05, "loss": 1.6389, "step": 1842 }, { "epoch": 0.5039649986327591, "grad_norm": 0.1612463891506195, "learning_rate": 5e-05, "loss": 1.6066, "step": 1843 }, { "epoch": 0.5042384468143287, "grad_norm": 0.17296503484249115, "learning_rate": 5e-05, "loss": 1.664, "step": 1844 }, { "epoch": 0.5045118949958983, "grad_norm": 0.1609872728586197, "learning_rate": 5e-05, "loss": 1.727, "step": 1845 }, { "epoch": 0.5047853431774678, "grad_norm": 0.18367880582809448, "learning_rate": 5e-05, "loss": 1.6324, "step": 1846 }, { "epoch": 0.5050587913590374, "grad_norm": 0.1622532606124878, "learning_rate": 5e-05, "loss": 1.6035, "step": 1847 }, { "epoch": 0.505332239540607, "grad_norm": 0.16167710721492767, "learning_rate": 5e-05, "loss": 1.7208, "step": 1848 }, { "epoch": 0.5056056877221766, "grad_norm": 0.17007885873317719, "learning_rate": 5e-05, "loss": 1.4988, "step": 1849 }, { "epoch": 0.5058791359037462, "grad_norm": 0.1878514140844345, "learning_rate": 5e-05, "loss": 1.698, "step": 1850 }, { "epoch": 0.5061525840853158, "grad_norm": 0.1594289392232895, "learning_rate": 5e-05, "loss": 1.6375, "step": 1851 }, { "epoch": 0.5064260322668854, "grad_norm": 0.17619235813617706, "learning_rate": 5e-05, "loss": 1.5621, "step": 1852 }, { "epoch": 0.506699480448455, "grad_norm": 0.15811192989349365, "learning_rate": 5e-05, "loss": 1.4968, "step": 1853 }, { "epoch": 0.5069729286300246, "grad_norm": 0.15754997730255127, "learning_rate": 5e-05, "loss": 1.6578, "step": 1854 }, { "epoch": 0.5072463768115942, "grad_norm": 0.17825719714164734, "learning_rate": 5e-05, "loss": 1.6794, "step": 1855 }, { "epoch": 0.5075198249931638, "grad_norm": 0.1577797830104828, "learning_rate": 5e-05, "loss": 1.5553, "step": 1856 }, { "epoch": 0.5077932731747334, "grad_norm": 0.164150670170784, "learning_rate": 5e-05, "loss": 1.6357, "step": 1857 }, { "epoch": 0.5080667213563029, "grad_norm": 0.16957907378673553, "learning_rate": 5e-05, "loss": 1.6714, "step": 1858 }, { "epoch": 0.5083401695378725, "grad_norm": 0.15430040657520294, "learning_rate": 5e-05, "loss": 1.5349, "step": 1859 }, { "epoch": 0.5086136177194421, "grad_norm": 0.1596827656030655, "learning_rate": 5e-05, "loss": 1.6752, "step": 1860 }, { "epoch": 0.5088870659010117, "grad_norm": 0.15648046135902405, "learning_rate": 5e-05, "loss": 1.5519, "step": 1861 }, { "epoch": 0.5091605140825813, "grad_norm": 0.154343381524086, "learning_rate": 5e-05, "loss": 1.5832, "step": 1862 }, { "epoch": 0.5094339622641509, "grad_norm": 0.15734641253948212, "learning_rate": 5e-05, "loss": 1.6465, "step": 1863 }, { "epoch": 0.5097074104457205, "grad_norm": 0.16264241933822632, "learning_rate": 5e-05, "loss": 1.7062, "step": 1864 }, { "epoch": 0.5099808586272901, "grad_norm": 0.1580963432788849, "learning_rate": 5e-05, "loss": 1.6073, "step": 1865 }, { "epoch": 0.5102543068088597, "grad_norm": 0.15470340847969055, "learning_rate": 5e-05, "loss": 1.591, "step": 1866 }, { "epoch": 0.5105277549904293, "grad_norm": 0.18182386457920074, "learning_rate": 5e-05, "loss": 1.7503, "step": 1867 }, { "epoch": 0.510801203171999, "grad_norm": 0.16213788092136383, "learning_rate": 5e-05, "loss": 1.5868, "step": 1868 }, { "epoch": 0.5110746513535686, "grad_norm": 0.1534777283668518, "learning_rate": 5e-05, "loss": 1.5489, "step": 1869 }, { "epoch": 0.511348099535138, "grad_norm": 0.1524716168642044, "learning_rate": 5e-05, "loss": 1.5555, "step": 1870 }, { "epoch": 0.5116215477167076, "grad_norm": 0.15701556205749512, "learning_rate": 5e-05, "loss": 1.5622, "step": 1871 }, { "epoch": 0.5118949958982772, "grad_norm": 0.1485898792743683, "learning_rate": 5e-05, "loss": 1.5619, "step": 1872 }, { "epoch": 0.5121684440798469, "grad_norm": 0.16039031744003296, "learning_rate": 5e-05, "loss": 1.6172, "step": 1873 }, { "epoch": 0.5124418922614165, "grad_norm": 0.167929545044899, "learning_rate": 5e-05, "loss": 1.6437, "step": 1874 }, { "epoch": 0.5127153404429861, "grad_norm": 0.16350795328617096, "learning_rate": 5e-05, "loss": 1.6416, "step": 1875 }, { "epoch": 0.5129887886245557, "grad_norm": 0.15972229838371277, "learning_rate": 5e-05, "loss": 1.7117, "step": 1876 }, { "epoch": 0.5132622368061253, "grad_norm": 0.15788406133651733, "learning_rate": 5e-05, "loss": 1.5657, "step": 1877 }, { "epoch": 0.5135356849876949, "grad_norm": 0.1540132462978363, "learning_rate": 5e-05, "loss": 1.6087, "step": 1878 }, { "epoch": 0.5138091331692645, "grad_norm": 0.16247431933879852, "learning_rate": 5e-05, "loss": 1.6432, "step": 1879 }, { "epoch": 0.5140825813508341, "grad_norm": 0.15716880559921265, "learning_rate": 5e-05, "loss": 1.5565, "step": 1880 }, { "epoch": 0.5143560295324036, "grad_norm": 0.15379799902439117, "learning_rate": 5e-05, "loss": 1.6237, "step": 1881 }, { "epoch": 0.5146294777139732, "grad_norm": 0.15428978204727173, "learning_rate": 5e-05, "loss": 1.5128, "step": 1882 }, { "epoch": 0.5149029258955428, "grad_norm": 0.15110410749912262, "learning_rate": 5e-05, "loss": 1.5246, "step": 1883 }, { "epoch": 0.5151763740771124, "grad_norm": 0.15420377254486084, "learning_rate": 5e-05, "loss": 1.6952, "step": 1884 }, { "epoch": 0.515449822258682, "grad_norm": 0.15766222774982452, "learning_rate": 5e-05, "loss": 1.6611, "step": 1885 }, { "epoch": 0.5157232704402516, "grad_norm": 0.1532919555902481, "learning_rate": 5e-05, "loss": 1.5322, "step": 1886 }, { "epoch": 0.5159967186218212, "grad_norm": 0.15919700264930725, "learning_rate": 5e-05, "loss": 1.6387, "step": 1887 }, { "epoch": 0.5162701668033908, "grad_norm": 0.15913300216197968, "learning_rate": 5e-05, "loss": 1.6679, "step": 1888 }, { "epoch": 0.5165436149849604, "grad_norm": 0.16154128313064575, "learning_rate": 5e-05, "loss": 1.72, "step": 1889 }, { "epoch": 0.51681706316653, "grad_norm": 0.16438822448253632, "learning_rate": 5e-05, "loss": 1.7099, "step": 1890 }, { "epoch": 0.5170905113480996, "grad_norm": 0.1546657532453537, "learning_rate": 5e-05, "loss": 1.6098, "step": 1891 }, { "epoch": 0.5173639595296692, "grad_norm": 0.15779191255569458, "learning_rate": 5e-05, "loss": 1.6176, "step": 1892 }, { "epoch": 0.5176374077112387, "grad_norm": 0.15966758131980896, "learning_rate": 5e-05, "loss": 1.665, "step": 1893 }, { "epoch": 0.5179108558928083, "grad_norm": 0.17288130521774292, "learning_rate": 5e-05, "loss": 1.6342, "step": 1894 }, { "epoch": 0.5181843040743779, "grad_norm": 0.16509395837783813, "learning_rate": 5e-05, "loss": 1.649, "step": 1895 }, { "epoch": 0.5184577522559475, "grad_norm": 0.14995107054710388, "learning_rate": 5e-05, "loss": 1.4966, "step": 1896 }, { "epoch": 0.5187312004375171, "grad_norm": 0.15514351427555084, "learning_rate": 5e-05, "loss": 1.5691, "step": 1897 }, { "epoch": 0.5190046486190867, "grad_norm": 0.15947668254375458, "learning_rate": 5e-05, "loss": 1.534, "step": 1898 }, { "epoch": 0.5192780968006563, "grad_norm": 0.16156336665153503, "learning_rate": 5e-05, "loss": 1.7277, "step": 1899 }, { "epoch": 0.5195515449822259, "grad_norm": 0.17509359121322632, "learning_rate": 5e-05, "loss": 1.8131, "step": 1900 }, { "epoch": 0.5198249931637955, "grad_norm": 0.1574561595916748, "learning_rate": 5e-05, "loss": 1.618, "step": 1901 }, { "epoch": 0.5200984413453651, "grad_norm": 0.1618000864982605, "learning_rate": 5e-05, "loss": 1.5925, "step": 1902 }, { "epoch": 0.5203718895269347, "grad_norm": 0.15169738233089447, "learning_rate": 5e-05, "loss": 1.6704, "step": 1903 }, { "epoch": 0.5206453377085042, "grad_norm": 0.15747463703155518, "learning_rate": 5e-05, "loss": 1.6319, "step": 1904 }, { "epoch": 0.5209187858900738, "grad_norm": 0.15733812749385834, "learning_rate": 5e-05, "loss": 1.6501, "step": 1905 }, { "epoch": 0.5211922340716434, "grad_norm": 0.15596060454845428, "learning_rate": 5e-05, "loss": 1.6661, "step": 1906 }, { "epoch": 0.521465682253213, "grad_norm": 0.15629130601882935, "learning_rate": 5e-05, "loss": 1.652, "step": 1907 }, { "epoch": 0.5217391304347826, "grad_norm": 0.15206284821033478, "learning_rate": 5e-05, "loss": 1.5696, "step": 1908 }, { "epoch": 0.5220125786163522, "grad_norm": 0.16571788489818573, "learning_rate": 5e-05, "loss": 1.6589, "step": 1909 }, { "epoch": 0.5222860267979218, "grad_norm": 0.1548844426870346, "learning_rate": 5e-05, "loss": 1.6127, "step": 1910 }, { "epoch": 0.5225594749794914, "grad_norm": 0.16375820338726044, "learning_rate": 5e-05, "loss": 1.5907, "step": 1911 }, { "epoch": 0.522832923161061, "grad_norm": 0.15710307657718658, "learning_rate": 5e-05, "loss": 1.5947, "step": 1912 }, { "epoch": 0.5231063713426306, "grad_norm": 0.15390416979789734, "learning_rate": 5e-05, "loss": 1.6072, "step": 1913 }, { "epoch": 0.5233798195242002, "grad_norm": 0.15938615798950195, "learning_rate": 5e-05, "loss": 1.6348, "step": 1914 }, { "epoch": 0.5236532677057698, "grad_norm": 0.14976823329925537, "learning_rate": 5e-05, "loss": 1.5721, "step": 1915 }, { "epoch": 0.5239267158873393, "grad_norm": 0.15962772071361542, "learning_rate": 5e-05, "loss": 1.6077, "step": 1916 }, { "epoch": 0.5242001640689089, "grad_norm": 0.1549055576324463, "learning_rate": 5e-05, "loss": 1.5865, "step": 1917 }, { "epoch": 0.5244736122504785, "grad_norm": 0.1605559140443802, "learning_rate": 5e-05, "loss": 1.5902, "step": 1918 }, { "epoch": 0.5247470604320481, "grad_norm": 0.15829849243164062, "learning_rate": 5e-05, "loss": 1.6124, "step": 1919 }, { "epoch": 0.5250205086136177, "grad_norm": 0.1654774397611618, "learning_rate": 5e-05, "loss": 1.6368, "step": 1920 }, { "epoch": 0.5252939567951873, "grad_norm": 0.16315412521362305, "learning_rate": 5e-05, "loss": 1.6415, "step": 1921 }, { "epoch": 0.5255674049767569, "grad_norm": 0.14765046536922455, "learning_rate": 5e-05, "loss": 1.6004, "step": 1922 }, { "epoch": 0.5258408531583265, "grad_norm": 0.15793031454086304, "learning_rate": 5e-05, "loss": 1.6902, "step": 1923 }, { "epoch": 0.5261143013398961, "grad_norm": 0.165854349732399, "learning_rate": 5e-05, "loss": 1.6598, "step": 1924 }, { "epoch": 0.5263877495214657, "grad_norm": 0.15993940830230713, "learning_rate": 5e-05, "loss": 1.6446, "step": 1925 }, { "epoch": 0.5266611977030353, "grad_norm": 0.1579129993915558, "learning_rate": 5e-05, "loss": 1.6392, "step": 1926 }, { "epoch": 0.5269346458846048, "grad_norm": 0.1714421659708023, "learning_rate": 5e-05, "loss": 1.6083, "step": 1927 }, { "epoch": 0.5272080940661744, "grad_norm": 0.15757611393928528, "learning_rate": 5e-05, "loss": 1.6344, "step": 1928 }, { "epoch": 0.527481542247744, "grad_norm": 0.15477389097213745, "learning_rate": 5e-05, "loss": 1.6428, "step": 1929 }, { "epoch": 0.5277549904293136, "grad_norm": 0.14917831122875214, "learning_rate": 5e-05, "loss": 1.4957, "step": 1930 }, { "epoch": 0.5280284386108832, "grad_norm": 0.17004439234733582, "learning_rate": 5e-05, "loss": 1.7057, "step": 1931 }, { "epoch": 0.5283018867924528, "grad_norm": 0.1737544983625412, "learning_rate": 5e-05, "loss": 1.6653, "step": 1932 }, { "epoch": 0.5285753349740224, "grad_norm": 0.15638841688632965, "learning_rate": 5e-05, "loss": 1.581, "step": 1933 }, { "epoch": 0.528848783155592, "grad_norm": 0.19510416686534882, "learning_rate": 5e-05, "loss": 1.7202, "step": 1934 }, { "epoch": 0.5291222313371616, "grad_norm": 0.16090962290763855, "learning_rate": 5e-05, "loss": 1.6374, "step": 1935 }, { "epoch": 0.5293956795187312, "grad_norm": 0.1696002036333084, "learning_rate": 5e-05, "loss": 1.7889, "step": 1936 }, { "epoch": 0.5296691277003008, "grad_norm": 0.16609039902687073, "learning_rate": 5e-05, "loss": 1.6189, "step": 1937 }, { "epoch": 0.5299425758818704, "grad_norm": 0.1632707715034485, "learning_rate": 5e-05, "loss": 1.6715, "step": 1938 }, { "epoch": 0.5302160240634399, "grad_norm": 0.1675024926662445, "learning_rate": 5e-05, "loss": 1.6406, "step": 1939 }, { "epoch": 0.5304894722450095, "grad_norm": 0.1700305938720703, "learning_rate": 5e-05, "loss": 1.5702, "step": 1940 }, { "epoch": 0.5307629204265791, "grad_norm": 0.16249525547027588, "learning_rate": 5e-05, "loss": 1.6847, "step": 1941 }, { "epoch": 0.5310363686081487, "grad_norm": 0.16194301843643188, "learning_rate": 5e-05, "loss": 1.5392, "step": 1942 }, { "epoch": 0.5313098167897183, "grad_norm": 0.16195613145828247, "learning_rate": 5e-05, "loss": 1.6185, "step": 1943 }, { "epoch": 0.5315832649712879, "grad_norm": 0.15754897892475128, "learning_rate": 5e-05, "loss": 1.6188, "step": 1944 }, { "epoch": 0.5318567131528575, "grad_norm": 0.16981816291809082, "learning_rate": 5e-05, "loss": 1.6742, "step": 1945 }, { "epoch": 0.5321301613344271, "grad_norm": 0.16634345054626465, "learning_rate": 5e-05, "loss": 1.5054, "step": 1946 }, { "epoch": 0.5324036095159967, "grad_norm": 0.16170655190944672, "learning_rate": 5e-05, "loss": 1.6678, "step": 1947 }, { "epoch": 0.5326770576975663, "grad_norm": 0.1492849886417389, "learning_rate": 5e-05, "loss": 1.6136, "step": 1948 }, { "epoch": 0.532950505879136, "grad_norm": 0.18234175443649292, "learning_rate": 5e-05, "loss": 1.7196, "step": 1949 }, { "epoch": 0.5332239540607056, "grad_norm": 0.158365860581398, "learning_rate": 5e-05, "loss": 1.4944, "step": 1950 }, { "epoch": 0.533497402242275, "grad_norm": 0.1587635576725006, "learning_rate": 5e-05, "loss": 1.6334, "step": 1951 }, { "epoch": 0.5337708504238446, "grad_norm": 0.18955689668655396, "learning_rate": 5e-05, "loss": 1.6253, "step": 1952 }, { "epoch": 0.5340442986054142, "grad_norm": 0.1624482125043869, "learning_rate": 5e-05, "loss": 1.6511, "step": 1953 }, { "epoch": 0.5343177467869838, "grad_norm": 0.1539737582206726, "learning_rate": 5e-05, "loss": 1.4702, "step": 1954 }, { "epoch": 0.5345911949685535, "grad_norm": 0.17508608102798462, "learning_rate": 5e-05, "loss": 1.6406, "step": 1955 }, { "epoch": 0.534864643150123, "grad_norm": 0.15725727379322052, "learning_rate": 5e-05, "loss": 1.5327, "step": 1956 }, { "epoch": 0.5351380913316927, "grad_norm": 0.16343533992767334, "learning_rate": 5e-05, "loss": 1.6075, "step": 1957 }, { "epoch": 0.5354115395132623, "grad_norm": 0.16122648119926453, "learning_rate": 5e-05, "loss": 1.577, "step": 1958 }, { "epoch": 0.5356849876948319, "grad_norm": 0.16027076542377472, "learning_rate": 5e-05, "loss": 1.6378, "step": 1959 }, { "epoch": 0.5359584358764015, "grad_norm": 0.15738053619861603, "learning_rate": 5e-05, "loss": 1.702, "step": 1960 }, { "epoch": 0.5362318840579711, "grad_norm": 0.15926112234592438, "learning_rate": 5e-05, "loss": 1.6522, "step": 1961 }, { "epoch": 0.5365053322395406, "grad_norm": 0.15066231787204742, "learning_rate": 5e-05, "loss": 1.4936, "step": 1962 }, { "epoch": 0.5367787804211102, "grad_norm": 0.14897476136684418, "learning_rate": 5e-05, "loss": 1.5098, "step": 1963 }, { "epoch": 0.5370522286026798, "grad_norm": 0.1546986699104309, "learning_rate": 5e-05, "loss": 1.5904, "step": 1964 }, { "epoch": 0.5373256767842494, "grad_norm": 0.15693390369415283, "learning_rate": 5e-05, "loss": 1.6098, "step": 1965 }, { "epoch": 0.537599124965819, "grad_norm": 0.16184082627296448, "learning_rate": 5e-05, "loss": 1.7093, "step": 1966 }, { "epoch": 0.5378725731473886, "grad_norm": 0.157283753156662, "learning_rate": 5e-05, "loss": 1.7046, "step": 1967 }, { "epoch": 0.5381460213289582, "grad_norm": 0.16195395588874817, "learning_rate": 5e-05, "loss": 1.6401, "step": 1968 }, { "epoch": 0.5384194695105278, "grad_norm": 0.16151578724384308, "learning_rate": 5e-05, "loss": 1.6325, "step": 1969 }, { "epoch": 0.5386929176920974, "grad_norm": 0.1499180942773819, "learning_rate": 5e-05, "loss": 1.5444, "step": 1970 }, { "epoch": 0.538966365873667, "grad_norm": 0.16066747903823853, "learning_rate": 5e-05, "loss": 1.5921, "step": 1971 }, { "epoch": 0.5392398140552366, "grad_norm": 0.15843364596366882, "learning_rate": 5e-05, "loss": 1.5952, "step": 1972 }, { "epoch": 0.5395132622368062, "grad_norm": 0.15702027082443237, "learning_rate": 5e-05, "loss": 1.5061, "step": 1973 }, { "epoch": 0.5397867104183757, "grad_norm": 0.15099968016147614, "learning_rate": 5e-05, "loss": 1.6227, "step": 1974 }, { "epoch": 0.5400601585999453, "grad_norm": 0.15741010010242462, "learning_rate": 5e-05, "loss": 1.647, "step": 1975 }, { "epoch": 0.5403336067815149, "grad_norm": 0.1735711693763733, "learning_rate": 5e-05, "loss": 1.5785, "step": 1976 }, { "epoch": 0.5406070549630845, "grad_norm": 0.1666189730167389, "learning_rate": 5e-05, "loss": 1.6775, "step": 1977 }, { "epoch": 0.5408805031446541, "grad_norm": 0.17314961552619934, "learning_rate": 5e-05, "loss": 1.6201, "step": 1978 }, { "epoch": 0.5411539513262237, "grad_norm": 0.17330363392829895, "learning_rate": 5e-05, "loss": 1.6672, "step": 1979 }, { "epoch": 0.5414273995077933, "grad_norm": 0.16131727397441864, "learning_rate": 5e-05, "loss": 1.6451, "step": 1980 }, { "epoch": 0.5417008476893629, "grad_norm": 0.16308879852294922, "learning_rate": 5e-05, "loss": 1.5366, "step": 1981 }, { "epoch": 0.5419742958709325, "grad_norm": 0.1608009785413742, "learning_rate": 5e-05, "loss": 1.636, "step": 1982 }, { "epoch": 0.5422477440525021, "grad_norm": 0.1665000170469284, "learning_rate": 5e-05, "loss": 1.6159, "step": 1983 }, { "epoch": 0.5425211922340717, "grad_norm": 0.15207409858703613, "learning_rate": 5e-05, "loss": 1.5366, "step": 1984 }, { "epoch": 0.5427946404156412, "grad_norm": 0.17597833275794983, "learning_rate": 5e-05, "loss": 1.6525, "step": 1985 }, { "epoch": 0.5430680885972108, "grad_norm": 0.15393896400928497, "learning_rate": 5e-05, "loss": 1.5363, "step": 1986 }, { "epoch": 0.5433415367787804, "grad_norm": 0.17519448697566986, "learning_rate": 5e-05, "loss": 1.6806, "step": 1987 }, { "epoch": 0.54361498496035, "grad_norm": 0.1612955778837204, "learning_rate": 5e-05, "loss": 1.531, "step": 1988 }, { "epoch": 0.5438884331419196, "grad_norm": 0.15321476757526398, "learning_rate": 5e-05, "loss": 1.4887, "step": 1989 }, { "epoch": 0.5441618813234892, "grad_norm": 0.1556374877691269, "learning_rate": 5e-05, "loss": 1.625, "step": 1990 }, { "epoch": 0.5444353295050588, "grad_norm": 0.1725417673587799, "learning_rate": 5e-05, "loss": 1.6286, "step": 1991 }, { "epoch": 0.5447087776866284, "grad_norm": 0.15200626850128174, "learning_rate": 5e-05, "loss": 1.5596, "step": 1992 }, { "epoch": 0.544982225868198, "grad_norm": 0.1692056953907013, "learning_rate": 5e-05, "loss": 1.7184, "step": 1993 }, { "epoch": 0.5452556740497676, "grad_norm": 0.15530715882778168, "learning_rate": 5e-05, "loss": 1.493, "step": 1994 }, { "epoch": 0.5455291222313372, "grad_norm": 0.1559220254421234, "learning_rate": 5e-05, "loss": 1.5517, "step": 1995 }, { "epoch": 0.5458025704129068, "grad_norm": 0.15481142699718475, "learning_rate": 5e-05, "loss": 1.5246, "step": 1996 }, { "epoch": 0.5460760185944763, "grad_norm": 0.1693953573703766, "learning_rate": 5e-05, "loss": 1.6766, "step": 1997 }, { "epoch": 0.5463494667760459, "grad_norm": 0.14999975264072418, "learning_rate": 5e-05, "loss": 1.5715, "step": 1998 }, { "epoch": 0.5466229149576155, "grad_norm": 0.17395493388175964, "learning_rate": 5e-05, "loss": 1.6317, "step": 1999 }, { "epoch": 0.5468963631391851, "grad_norm": 0.16458867490291595, "learning_rate": 5e-05, "loss": 1.5627, "step": 2000 }, { "epoch": 0.5471698113207547, "grad_norm": 0.15648111701011658, "learning_rate": 5e-05, "loss": 1.6565, "step": 2001 }, { "epoch": 0.5474432595023243, "grad_norm": 0.16169799864292145, "learning_rate": 5e-05, "loss": 1.6626, "step": 2002 }, { "epoch": 0.5477167076838939, "grad_norm": 0.17330998182296753, "learning_rate": 5e-05, "loss": 1.6268, "step": 2003 }, { "epoch": 0.5479901558654635, "grad_norm": 0.1588200032711029, "learning_rate": 5e-05, "loss": 1.5532, "step": 2004 }, { "epoch": 0.5482636040470331, "grad_norm": 0.16601422429084778, "learning_rate": 5e-05, "loss": 1.5976, "step": 2005 }, { "epoch": 0.5485370522286027, "grad_norm": 0.17725588381290436, "learning_rate": 5e-05, "loss": 1.6347, "step": 2006 }, { "epoch": 0.5488105004101723, "grad_norm": 0.15480291843414307, "learning_rate": 5e-05, "loss": 1.585, "step": 2007 }, { "epoch": 0.5490839485917418, "grad_norm": 0.1640474647283554, "learning_rate": 5e-05, "loss": 1.4857, "step": 2008 }, { "epoch": 0.5493573967733114, "grad_norm": 0.17515790462493896, "learning_rate": 5e-05, "loss": 1.6093, "step": 2009 }, { "epoch": 0.549630844954881, "grad_norm": 0.15881620347499847, "learning_rate": 5e-05, "loss": 1.5243, "step": 2010 }, { "epoch": 0.5499042931364506, "grad_norm": 0.1577688604593277, "learning_rate": 5e-05, "loss": 1.6015, "step": 2011 }, { "epoch": 0.5501777413180202, "grad_norm": 0.16591089963912964, "learning_rate": 5e-05, "loss": 1.6122, "step": 2012 }, { "epoch": 0.5504511894995898, "grad_norm": 0.16563619673252106, "learning_rate": 5e-05, "loss": 1.6371, "step": 2013 }, { "epoch": 0.5507246376811594, "grad_norm": 0.17261956632137299, "learning_rate": 5e-05, "loss": 1.6834, "step": 2014 }, { "epoch": 0.550998085862729, "grad_norm": 0.15990975499153137, "learning_rate": 5e-05, "loss": 1.6495, "step": 2015 }, { "epoch": 0.5512715340442986, "grad_norm": 0.15920180082321167, "learning_rate": 5e-05, "loss": 1.6299, "step": 2016 }, { "epoch": 0.5515449822258682, "grad_norm": 0.15567980706691742, "learning_rate": 5e-05, "loss": 1.5456, "step": 2017 }, { "epoch": 0.5518184304074378, "grad_norm": 0.155466690659523, "learning_rate": 5e-05, "loss": 1.6061, "step": 2018 }, { "epoch": 0.5520918785890074, "grad_norm": 0.16620704531669617, "learning_rate": 5e-05, "loss": 1.6645, "step": 2019 }, { "epoch": 0.5523653267705769, "grad_norm": 0.16219502687454224, "learning_rate": 5e-05, "loss": 1.7455, "step": 2020 }, { "epoch": 0.5526387749521465, "grad_norm": 0.1692618727684021, "learning_rate": 5e-05, "loss": 1.6036, "step": 2021 }, { "epoch": 0.5529122231337161, "grad_norm": 0.1535937339067459, "learning_rate": 5e-05, "loss": 1.614, "step": 2022 }, { "epoch": 0.5531856713152857, "grad_norm": 0.16089798510074615, "learning_rate": 5e-05, "loss": 1.585, "step": 2023 }, { "epoch": 0.5534591194968553, "grad_norm": 0.16011877357959747, "learning_rate": 5e-05, "loss": 1.6358, "step": 2024 }, { "epoch": 0.5537325676784249, "grad_norm": 0.1536594033241272, "learning_rate": 5e-05, "loss": 1.6066, "step": 2025 }, { "epoch": 0.5540060158599945, "grad_norm": 0.15482646226882935, "learning_rate": 5e-05, "loss": 1.5291, "step": 2026 }, { "epoch": 0.5542794640415641, "grad_norm": 0.16264241933822632, "learning_rate": 5e-05, "loss": 1.5984, "step": 2027 }, { "epoch": 0.5545529122231337, "grad_norm": 0.16012269258499146, "learning_rate": 5e-05, "loss": 1.6847, "step": 2028 }, { "epoch": 0.5548263604047033, "grad_norm": 0.16615892946720123, "learning_rate": 5e-05, "loss": 1.5873, "step": 2029 }, { "epoch": 0.555099808586273, "grad_norm": 0.15969812870025635, "learning_rate": 5e-05, "loss": 1.5295, "step": 2030 }, { "epoch": 0.5553732567678425, "grad_norm": 0.17502804100513458, "learning_rate": 5e-05, "loss": 1.6885, "step": 2031 }, { "epoch": 0.555646704949412, "grad_norm": 0.15848223865032196, "learning_rate": 5e-05, "loss": 1.5479, "step": 2032 }, { "epoch": 0.5559201531309816, "grad_norm": 0.16243097186088562, "learning_rate": 5e-05, "loss": 1.6315, "step": 2033 }, { "epoch": 0.5561936013125512, "grad_norm": 0.15360400080680847, "learning_rate": 5e-05, "loss": 1.5927, "step": 2034 }, { "epoch": 0.5564670494941208, "grad_norm": 0.17348462343215942, "learning_rate": 5e-05, "loss": 1.6338, "step": 2035 }, { "epoch": 0.5567404976756904, "grad_norm": 0.14669735729694366, "learning_rate": 5e-05, "loss": 1.5976, "step": 2036 }, { "epoch": 0.55701394585726, "grad_norm": 0.17374375462532043, "learning_rate": 5e-05, "loss": 1.6696, "step": 2037 }, { "epoch": 0.5572873940388297, "grad_norm": 0.1698565036058426, "learning_rate": 5e-05, "loss": 1.7155, "step": 2038 }, { "epoch": 0.5575608422203993, "grad_norm": 0.16311299800872803, "learning_rate": 5e-05, "loss": 1.5581, "step": 2039 }, { "epoch": 0.5578342904019689, "grad_norm": 0.1789819747209549, "learning_rate": 5e-05, "loss": 1.6041, "step": 2040 }, { "epoch": 0.5581077385835385, "grad_norm": 0.15918207168579102, "learning_rate": 5e-05, "loss": 1.6056, "step": 2041 }, { "epoch": 0.5583811867651081, "grad_norm": 0.1748282015323639, "learning_rate": 5e-05, "loss": 1.6903, "step": 2042 }, { "epoch": 0.5586546349466776, "grad_norm": 0.15822182595729828, "learning_rate": 5e-05, "loss": 1.6031, "step": 2043 }, { "epoch": 0.5589280831282472, "grad_norm": 0.15568383038043976, "learning_rate": 5e-05, "loss": 1.6616, "step": 2044 }, { "epoch": 0.5592015313098168, "grad_norm": 0.1490369737148285, "learning_rate": 5e-05, "loss": 1.554, "step": 2045 }, { "epoch": 0.5594749794913864, "grad_norm": 0.1597105860710144, "learning_rate": 5e-05, "loss": 1.5907, "step": 2046 }, { "epoch": 0.559748427672956, "grad_norm": 0.15021972358226776, "learning_rate": 5e-05, "loss": 1.5364, "step": 2047 }, { "epoch": 0.5600218758545256, "grad_norm": 0.16100069880485535, "learning_rate": 5e-05, "loss": 1.6302, "step": 2048 }, { "epoch": 0.5602953240360952, "grad_norm": 0.1672513782978058, "learning_rate": 5e-05, "loss": 1.6362, "step": 2049 }, { "epoch": 0.5605687722176648, "grad_norm": 0.14868006110191345, "learning_rate": 5e-05, "loss": 1.5026, "step": 2050 }, { "epoch": 0.5608422203992344, "grad_norm": 0.15839874744415283, "learning_rate": 5e-05, "loss": 1.676, "step": 2051 }, { "epoch": 0.561115668580804, "grad_norm": 0.15299920737743378, "learning_rate": 5e-05, "loss": 1.6009, "step": 2052 }, { "epoch": 0.5613891167623736, "grad_norm": 0.15907810628414154, "learning_rate": 5e-05, "loss": 1.6038, "step": 2053 }, { "epoch": 0.5616625649439432, "grad_norm": 0.15567612648010254, "learning_rate": 5e-05, "loss": 1.6237, "step": 2054 }, { "epoch": 0.5619360131255127, "grad_norm": 0.1545831263065338, "learning_rate": 5e-05, "loss": 1.5655, "step": 2055 }, { "epoch": 0.5622094613070823, "grad_norm": 0.15580891072750092, "learning_rate": 5e-05, "loss": 1.5292, "step": 2056 }, { "epoch": 0.5624829094886519, "grad_norm": 0.1534157246351242, "learning_rate": 5e-05, "loss": 1.5798, "step": 2057 }, { "epoch": 0.5627563576702215, "grad_norm": 0.17224334180355072, "learning_rate": 5e-05, "loss": 1.705, "step": 2058 }, { "epoch": 0.5630298058517911, "grad_norm": 0.15283888578414917, "learning_rate": 5e-05, "loss": 1.6189, "step": 2059 }, { "epoch": 0.5633032540333607, "grad_norm": 0.1531190425157547, "learning_rate": 5e-05, "loss": 1.4706, "step": 2060 }, { "epoch": 0.5635767022149303, "grad_norm": 0.15777452290058136, "learning_rate": 5e-05, "loss": 1.5925, "step": 2061 }, { "epoch": 0.5638501503964999, "grad_norm": 0.16896897554397583, "learning_rate": 5e-05, "loss": 1.6667, "step": 2062 }, { "epoch": 0.5641235985780695, "grad_norm": 0.15329276025295258, "learning_rate": 5e-05, "loss": 1.5491, "step": 2063 }, { "epoch": 0.5643970467596391, "grad_norm": 0.16101738810539246, "learning_rate": 5e-05, "loss": 1.6393, "step": 2064 }, { "epoch": 0.5646704949412087, "grad_norm": 0.1673561930656433, "learning_rate": 5e-05, "loss": 1.6073, "step": 2065 }, { "epoch": 0.5649439431227782, "grad_norm": 0.1541615128517151, "learning_rate": 5e-05, "loss": 1.559, "step": 2066 }, { "epoch": 0.5652173913043478, "grad_norm": 0.16007159650325775, "learning_rate": 5e-05, "loss": 1.584, "step": 2067 }, { "epoch": 0.5654908394859174, "grad_norm": 0.1529233604669571, "learning_rate": 5e-05, "loss": 1.6093, "step": 2068 }, { "epoch": 0.565764287667487, "grad_norm": 0.1614188849925995, "learning_rate": 5e-05, "loss": 1.6406, "step": 2069 }, { "epoch": 0.5660377358490566, "grad_norm": 0.15437230467796326, "learning_rate": 5e-05, "loss": 1.6067, "step": 2070 }, { "epoch": 0.5663111840306262, "grad_norm": 0.15617471933364868, "learning_rate": 5e-05, "loss": 1.6215, "step": 2071 }, { "epoch": 0.5665846322121958, "grad_norm": 0.15771955251693726, "learning_rate": 5e-05, "loss": 1.5109, "step": 2072 }, { "epoch": 0.5668580803937654, "grad_norm": 0.1565471887588501, "learning_rate": 5e-05, "loss": 1.5407, "step": 2073 }, { "epoch": 0.567131528575335, "grad_norm": 0.16085505485534668, "learning_rate": 5e-05, "loss": 1.6118, "step": 2074 }, { "epoch": 0.5674049767569046, "grad_norm": 0.15981802344322205, "learning_rate": 5e-05, "loss": 1.6008, "step": 2075 }, { "epoch": 0.5676784249384742, "grad_norm": 0.16176685690879822, "learning_rate": 5e-05, "loss": 1.6723, "step": 2076 }, { "epoch": 0.5679518731200438, "grad_norm": 0.17309653759002686, "learning_rate": 5e-05, "loss": 1.7196, "step": 2077 }, { "epoch": 0.5682253213016133, "grad_norm": 0.15585723519325256, "learning_rate": 5e-05, "loss": 1.5884, "step": 2078 }, { "epoch": 0.5684987694831829, "grad_norm": 0.1600128412246704, "learning_rate": 5e-05, "loss": 1.6116, "step": 2079 }, { "epoch": 0.5687722176647525, "grad_norm": 0.16564618051052094, "learning_rate": 5e-05, "loss": 1.5695, "step": 2080 }, { "epoch": 0.5690456658463221, "grad_norm": 0.15419046580791473, "learning_rate": 5e-05, "loss": 1.5458, "step": 2081 }, { "epoch": 0.5693191140278917, "grad_norm": 0.17247512936592102, "learning_rate": 5e-05, "loss": 1.6021, "step": 2082 }, { "epoch": 0.5695925622094613, "grad_norm": 0.16553881764411926, "learning_rate": 5e-05, "loss": 1.5771, "step": 2083 }, { "epoch": 0.5698660103910309, "grad_norm": 0.16219867765903473, "learning_rate": 5e-05, "loss": 1.6749, "step": 2084 }, { "epoch": 0.5701394585726005, "grad_norm": 0.17330633103847504, "learning_rate": 5e-05, "loss": 1.6143, "step": 2085 }, { "epoch": 0.5704129067541701, "grad_norm": 0.16274374723434448, "learning_rate": 5e-05, "loss": 1.6205, "step": 2086 }, { "epoch": 0.5706863549357397, "grad_norm": 0.15556305646896362, "learning_rate": 5e-05, "loss": 1.5875, "step": 2087 }, { "epoch": 0.5709598031173093, "grad_norm": 0.15635040402412415, "learning_rate": 5e-05, "loss": 1.6349, "step": 2088 }, { "epoch": 0.5712332512988788, "grad_norm": 0.160813108086586, "learning_rate": 5e-05, "loss": 1.604, "step": 2089 }, { "epoch": 0.5715066994804484, "grad_norm": 0.15628398954868317, "learning_rate": 5e-05, "loss": 1.5023, "step": 2090 }, { "epoch": 0.571780147662018, "grad_norm": 0.18704648315906525, "learning_rate": 5e-05, "loss": 1.6758, "step": 2091 }, { "epoch": 0.5720535958435876, "grad_norm": 0.1672285944223404, "learning_rate": 5e-05, "loss": 1.643, "step": 2092 }, { "epoch": 0.5723270440251572, "grad_norm": 0.15511822700500488, "learning_rate": 5e-05, "loss": 1.6143, "step": 2093 }, { "epoch": 0.5726004922067268, "grad_norm": 0.16968627274036407, "learning_rate": 5e-05, "loss": 1.649, "step": 2094 }, { "epoch": 0.5728739403882964, "grad_norm": 0.16604706645011902, "learning_rate": 5e-05, "loss": 1.6111, "step": 2095 }, { "epoch": 0.573147388569866, "grad_norm": 0.16677343845367432, "learning_rate": 5e-05, "loss": 1.6142, "step": 2096 }, { "epoch": 0.5734208367514356, "grad_norm": 0.16563932597637177, "learning_rate": 5e-05, "loss": 1.6043, "step": 2097 }, { "epoch": 0.5736942849330052, "grad_norm": 0.15738388895988464, "learning_rate": 5e-05, "loss": 1.5803, "step": 2098 }, { "epoch": 0.5739677331145748, "grad_norm": 0.15241163969039917, "learning_rate": 5e-05, "loss": 1.5756, "step": 2099 }, { "epoch": 0.5742411812961444, "grad_norm": 0.1751902997493744, "learning_rate": 5e-05, "loss": 1.6738, "step": 2100 }, { "epoch": 0.5745146294777139, "grad_norm": 0.15986758470535278, "learning_rate": 5e-05, "loss": 1.5693, "step": 2101 }, { "epoch": 0.5747880776592835, "grad_norm": 0.19663068652153015, "learning_rate": 5e-05, "loss": 1.59, "step": 2102 }, { "epoch": 0.5750615258408531, "grad_norm": 0.15884363651275635, "learning_rate": 5e-05, "loss": 1.6378, "step": 2103 }, { "epoch": 0.5753349740224227, "grad_norm": 0.1701025813817978, "learning_rate": 5e-05, "loss": 1.6226, "step": 2104 }, { "epoch": 0.5756084222039923, "grad_norm": 0.1757257878780365, "learning_rate": 5e-05, "loss": 1.7625, "step": 2105 }, { "epoch": 0.5758818703855619, "grad_norm": 0.15682871639728546, "learning_rate": 5e-05, "loss": 1.6039, "step": 2106 }, { "epoch": 0.5761553185671315, "grad_norm": 0.15779271721839905, "learning_rate": 5e-05, "loss": 1.6915, "step": 2107 }, { "epoch": 0.5764287667487011, "grad_norm": 0.15956608951091766, "learning_rate": 5e-05, "loss": 1.622, "step": 2108 }, { "epoch": 0.5767022149302707, "grad_norm": 0.15776754915714264, "learning_rate": 5e-05, "loss": 1.6435, "step": 2109 }, { "epoch": 0.5769756631118403, "grad_norm": 0.17491094768047333, "learning_rate": 5e-05, "loss": 1.5058, "step": 2110 }, { "epoch": 0.5772491112934099, "grad_norm": 0.15431523323059082, "learning_rate": 5e-05, "loss": 1.6608, "step": 2111 }, { "epoch": 0.5775225594749795, "grad_norm": 0.15970687568187714, "learning_rate": 5e-05, "loss": 1.6203, "step": 2112 }, { "epoch": 0.577796007656549, "grad_norm": 0.16885408759117126, "learning_rate": 5e-05, "loss": 1.6354, "step": 2113 }, { "epoch": 0.5780694558381186, "grad_norm": 0.1878814995288849, "learning_rate": 5e-05, "loss": 1.6947, "step": 2114 }, { "epoch": 0.5783429040196882, "grad_norm": 0.17708611488342285, "learning_rate": 5e-05, "loss": 1.5832, "step": 2115 }, { "epoch": 0.5786163522012578, "grad_norm": 0.16223332285881042, "learning_rate": 5e-05, "loss": 1.7159, "step": 2116 }, { "epoch": 0.5788898003828274, "grad_norm": 0.1625894010066986, "learning_rate": 5e-05, "loss": 1.6246, "step": 2117 }, { "epoch": 0.579163248564397, "grad_norm": 0.1753711998462677, "learning_rate": 5e-05, "loss": 1.629, "step": 2118 }, { "epoch": 0.5794366967459667, "grad_norm": 0.15611834824085236, "learning_rate": 5e-05, "loss": 1.6562, "step": 2119 }, { "epoch": 0.5797101449275363, "grad_norm": 0.1676332801580429, "learning_rate": 5e-05, "loss": 1.6435, "step": 2120 }, { "epoch": 0.5799835931091059, "grad_norm": 0.16473741829395294, "learning_rate": 5e-05, "loss": 1.5665, "step": 2121 }, { "epoch": 0.5802570412906755, "grad_norm": 0.1505088359117508, "learning_rate": 5e-05, "loss": 1.5784, "step": 2122 }, { "epoch": 0.5805304894722451, "grad_norm": 0.1565895527601242, "learning_rate": 5e-05, "loss": 1.6327, "step": 2123 }, { "epoch": 0.5808039376538146, "grad_norm": 0.16196846961975098, "learning_rate": 5e-05, "loss": 1.503, "step": 2124 }, { "epoch": 0.5810773858353842, "grad_norm": 0.1584513634443283, "learning_rate": 5e-05, "loss": 1.6884, "step": 2125 }, { "epoch": 0.5813508340169538, "grad_norm": 0.1517631560564041, "learning_rate": 5e-05, "loss": 1.4673, "step": 2126 }, { "epoch": 0.5816242821985234, "grad_norm": 0.15401685237884521, "learning_rate": 5e-05, "loss": 1.5718, "step": 2127 }, { "epoch": 0.581897730380093, "grad_norm": 0.1545703262090683, "learning_rate": 5e-05, "loss": 1.6156, "step": 2128 }, { "epoch": 0.5821711785616626, "grad_norm": 0.1678171306848526, "learning_rate": 5e-05, "loss": 1.7368, "step": 2129 }, { "epoch": 0.5824446267432322, "grad_norm": 0.15641961991786957, "learning_rate": 5e-05, "loss": 1.7208, "step": 2130 }, { "epoch": 0.5827180749248018, "grad_norm": 0.16505669057369232, "learning_rate": 5e-05, "loss": 1.6648, "step": 2131 }, { "epoch": 0.5829915231063714, "grad_norm": 0.15975305438041687, "learning_rate": 5e-05, "loss": 1.577, "step": 2132 }, { "epoch": 0.583264971287941, "grad_norm": 0.15941214561462402, "learning_rate": 5e-05, "loss": 1.6433, "step": 2133 }, { "epoch": 0.5835384194695106, "grad_norm": 0.15701207518577576, "learning_rate": 5e-05, "loss": 1.5609, "step": 2134 }, { "epoch": 0.5838118676510802, "grad_norm": 0.1584734469652176, "learning_rate": 5e-05, "loss": 1.5644, "step": 2135 }, { "epoch": 0.5840853158326497, "grad_norm": 0.149795264005661, "learning_rate": 5e-05, "loss": 1.5459, "step": 2136 }, { "epoch": 0.5843587640142193, "grad_norm": 0.16000156104564667, "learning_rate": 5e-05, "loss": 1.623, "step": 2137 }, { "epoch": 0.5846322121957889, "grad_norm": 0.14752228558063507, "learning_rate": 5e-05, "loss": 1.4835, "step": 2138 }, { "epoch": 0.5849056603773585, "grad_norm": 0.1613810956478119, "learning_rate": 5e-05, "loss": 1.5714, "step": 2139 }, { "epoch": 0.5851791085589281, "grad_norm": 0.15406599640846252, "learning_rate": 5e-05, "loss": 1.5929, "step": 2140 }, { "epoch": 0.5854525567404977, "grad_norm": 0.15958181023597717, "learning_rate": 5e-05, "loss": 1.6232, "step": 2141 }, { "epoch": 0.5857260049220673, "grad_norm": 0.16171377897262573, "learning_rate": 5e-05, "loss": 1.7343, "step": 2142 }, { "epoch": 0.5859994531036369, "grad_norm": 0.15244810283184052, "learning_rate": 5e-05, "loss": 1.6367, "step": 2143 }, { "epoch": 0.5862729012852065, "grad_norm": 0.16295252740383148, "learning_rate": 5e-05, "loss": 1.6822, "step": 2144 }, { "epoch": 0.5865463494667761, "grad_norm": 0.16270601749420166, "learning_rate": 5e-05, "loss": 1.5552, "step": 2145 }, { "epoch": 0.5868197976483457, "grad_norm": 0.1615038365125656, "learning_rate": 5e-05, "loss": 1.6273, "step": 2146 }, { "epoch": 0.5870932458299152, "grad_norm": 0.17501547932624817, "learning_rate": 5e-05, "loss": 1.6598, "step": 2147 }, { "epoch": 0.5873666940114848, "grad_norm": 0.15854844450950623, "learning_rate": 5e-05, "loss": 1.6657, "step": 2148 }, { "epoch": 0.5876401421930544, "grad_norm": 0.15156009793281555, "learning_rate": 5e-05, "loss": 1.6571, "step": 2149 }, { "epoch": 0.587913590374624, "grad_norm": 0.15848124027252197, "learning_rate": 5e-05, "loss": 1.6176, "step": 2150 }, { "epoch": 0.5881870385561936, "grad_norm": 0.15376807749271393, "learning_rate": 5e-05, "loss": 1.659, "step": 2151 }, { "epoch": 0.5884604867377632, "grad_norm": 0.15940286219120026, "learning_rate": 5e-05, "loss": 1.6093, "step": 2152 }, { "epoch": 0.5887339349193328, "grad_norm": 0.1563788503408432, "learning_rate": 5e-05, "loss": 1.7226, "step": 2153 }, { "epoch": 0.5890073831009024, "grad_norm": 0.14890335500240326, "learning_rate": 5e-05, "loss": 1.5368, "step": 2154 }, { "epoch": 0.589280831282472, "grad_norm": 0.16119109094142914, "learning_rate": 5e-05, "loss": 1.64, "step": 2155 }, { "epoch": 0.5895542794640416, "grad_norm": 0.15738506615161896, "learning_rate": 5e-05, "loss": 1.6306, "step": 2156 }, { "epoch": 0.5898277276456112, "grad_norm": 0.15417338907718658, "learning_rate": 5e-05, "loss": 1.5886, "step": 2157 }, { "epoch": 0.5901011758271808, "grad_norm": 0.15935613214969635, "learning_rate": 5e-05, "loss": 1.7004, "step": 2158 }, { "epoch": 0.5903746240087503, "grad_norm": 0.15843814611434937, "learning_rate": 5e-05, "loss": 1.684, "step": 2159 }, { "epoch": 0.5906480721903199, "grad_norm": 0.15500840544700623, "learning_rate": 5e-05, "loss": 1.6215, "step": 2160 }, { "epoch": 0.5909215203718895, "grad_norm": 0.15437741577625275, "learning_rate": 5e-05, "loss": 1.5473, "step": 2161 }, { "epoch": 0.5911949685534591, "grad_norm": 0.17060165107250214, "learning_rate": 5e-05, "loss": 1.6937, "step": 2162 }, { "epoch": 0.5914684167350287, "grad_norm": 0.15199008584022522, "learning_rate": 5e-05, "loss": 1.5557, "step": 2163 }, { "epoch": 0.5917418649165983, "grad_norm": 0.16401898860931396, "learning_rate": 5e-05, "loss": 1.6955, "step": 2164 }, { "epoch": 0.5920153130981679, "grad_norm": 0.16618569195270538, "learning_rate": 5e-05, "loss": 1.6962, "step": 2165 }, { "epoch": 0.5922887612797375, "grad_norm": 0.1566200703382492, "learning_rate": 5e-05, "loss": 1.6495, "step": 2166 }, { "epoch": 0.5925622094613071, "grad_norm": 0.15811263024806976, "learning_rate": 5e-05, "loss": 1.6775, "step": 2167 }, { "epoch": 0.5928356576428767, "grad_norm": 0.16176708042621613, "learning_rate": 5e-05, "loss": 1.5412, "step": 2168 }, { "epoch": 0.5931091058244463, "grad_norm": 0.15909145772457123, "learning_rate": 5e-05, "loss": 1.6401, "step": 2169 }, { "epoch": 0.5933825540060158, "grad_norm": 0.15169541537761688, "learning_rate": 5e-05, "loss": 1.6311, "step": 2170 }, { "epoch": 0.5936560021875854, "grad_norm": 0.1585574597120285, "learning_rate": 5e-05, "loss": 1.613, "step": 2171 }, { "epoch": 0.593929450369155, "grad_norm": 0.1560421884059906, "learning_rate": 5e-05, "loss": 1.581, "step": 2172 }, { "epoch": 0.5942028985507246, "grad_norm": 0.1590547412633896, "learning_rate": 5e-05, "loss": 1.581, "step": 2173 }, { "epoch": 0.5944763467322942, "grad_norm": 0.1633371263742447, "learning_rate": 5e-05, "loss": 1.5969, "step": 2174 }, { "epoch": 0.5947497949138638, "grad_norm": 0.16914770007133484, "learning_rate": 5e-05, "loss": 1.6087, "step": 2175 }, { "epoch": 0.5950232430954334, "grad_norm": 0.15823769569396973, "learning_rate": 5e-05, "loss": 1.6317, "step": 2176 }, { "epoch": 0.595296691277003, "grad_norm": 0.1536397933959961, "learning_rate": 5e-05, "loss": 1.5132, "step": 2177 }, { "epoch": 0.5955701394585726, "grad_norm": 0.16653746366500854, "learning_rate": 5e-05, "loss": 1.5937, "step": 2178 }, { "epoch": 0.5958435876401422, "grad_norm": 0.17655491828918457, "learning_rate": 5e-05, "loss": 1.7522, "step": 2179 }, { "epoch": 0.5961170358217118, "grad_norm": 0.16123391687870026, "learning_rate": 5e-05, "loss": 1.6493, "step": 2180 }, { "epoch": 0.5963904840032814, "grad_norm": 0.16177600622177124, "learning_rate": 5e-05, "loss": 1.6801, "step": 2181 }, { "epoch": 0.5966639321848509, "grad_norm": 0.16101764142513275, "learning_rate": 5e-05, "loss": 1.6148, "step": 2182 }, { "epoch": 0.5969373803664205, "grad_norm": 0.15031690895557404, "learning_rate": 5e-05, "loss": 1.5556, "step": 2183 }, { "epoch": 0.5972108285479901, "grad_norm": 0.15750014781951904, "learning_rate": 5e-05, "loss": 1.6032, "step": 2184 }, { "epoch": 0.5974842767295597, "grad_norm": 0.16449135541915894, "learning_rate": 5e-05, "loss": 1.6762, "step": 2185 }, { "epoch": 0.5977577249111293, "grad_norm": 0.1698635369539261, "learning_rate": 5e-05, "loss": 1.6279, "step": 2186 }, { "epoch": 0.5980311730926989, "grad_norm": 0.16983488202095032, "learning_rate": 5e-05, "loss": 1.5975, "step": 2187 }, { "epoch": 0.5983046212742685, "grad_norm": 0.16298316419124603, "learning_rate": 5e-05, "loss": 1.603, "step": 2188 }, { "epoch": 0.5985780694558381, "grad_norm": 0.16714924573898315, "learning_rate": 5e-05, "loss": 1.6296, "step": 2189 }, { "epoch": 0.5988515176374077, "grad_norm": 0.16718335449695587, "learning_rate": 5e-05, "loss": 1.7187, "step": 2190 }, { "epoch": 0.5991249658189773, "grad_norm": 0.16621999442577362, "learning_rate": 5e-05, "loss": 1.631, "step": 2191 }, { "epoch": 0.5993984140005469, "grad_norm": 0.15571008622646332, "learning_rate": 5e-05, "loss": 1.6298, "step": 2192 }, { "epoch": 0.5996718621821165, "grad_norm": 0.16420917212963104, "learning_rate": 5e-05, "loss": 1.721, "step": 2193 }, { "epoch": 0.599945310363686, "grad_norm": 0.174006387591362, "learning_rate": 5e-05, "loss": 1.6524, "step": 2194 }, { "epoch": 0.6002187585452556, "grad_norm": 0.1774541437625885, "learning_rate": 5e-05, "loss": 1.7686, "step": 2195 }, { "epoch": 0.6004922067268252, "grad_norm": 0.1665443480014801, "learning_rate": 5e-05, "loss": 1.6123, "step": 2196 }, { "epoch": 0.6007656549083948, "grad_norm": 0.1589983105659485, "learning_rate": 5e-05, "loss": 1.6771, "step": 2197 }, { "epoch": 0.6010391030899644, "grad_norm": 0.171598419547081, "learning_rate": 5e-05, "loss": 1.6487, "step": 2198 }, { "epoch": 0.601312551271534, "grad_norm": 0.16494937241077423, "learning_rate": 5e-05, "loss": 1.5661, "step": 2199 }, { "epoch": 0.6015859994531036, "grad_norm": 0.16418486833572388, "learning_rate": 5e-05, "loss": 1.5924, "step": 2200 }, { "epoch": 0.6018594476346733, "grad_norm": 0.16469943523406982, "learning_rate": 5e-05, "loss": 1.6722, "step": 2201 }, { "epoch": 0.6021328958162429, "grad_norm": 0.1632019281387329, "learning_rate": 5e-05, "loss": 1.5975, "step": 2202 }, { "epoch": 0.6024063439978125, "grad_norm": 0.1550121158361435, "learning_rate": 5e-05, "loss": 1.6071, "step": 2203 }, { "epoch": 0.6026797921793821, "grad_norm": 0.16901268064975739, "learning_rate": 5e-05, "loss": 1.7557, "step": 2204 }, { "epoch": 0.6029532403609515, "grad_norm": 0.16295814514160156, "learning_rate": 5e-05, "loss": 1.6243, "step": 2205 }, { "epoch": 0.6032266885425212, "grad_norm": 0.15237173438072205, "learning_rate": 5e-05, "loss": 1.5015, "step": 2206 }, { "epoch": 0.6035001367240908, "grad_norm": 0.17193639278411865, "learning_rate": 5e-05, "loss": 1.6746, "step": 2207 }, { "epoch": 0.6037735849056604, "grad_norm": 0.154008686542511, "learning_rate": 5e-05, "loss": 1.5166, "step": 2208 }, { "epoch": 0.60404703308723, "grad_norm": 0.1634528785943985, "learning_rate": 5e-05, "loss": 1.6208, "step": 2209 }, { "epoch": 0.6043204812687996, "grad_norm": 0.16617950797080994, "learning_rate": 5e-05, "loss": 1.6649, "step": 2210 }, { "epoch": 0.6045939294503692, "grad_norm": 0.1584254801273346, "learning_rate": 5e-05, "loss": 1.6875, "step": 2211 }, { "epoch": 0.6048673776319388, "grad_norm": 0.15937229990959167, "learning_rate": 5e-05, "loss": 1.5211, "step": 2212 }, { "epoch": 0.6051408258135084, "grad_norm": 0.15841583907604218, "learning_rate": 5e-05, "loss": 1.5409, "step": 2213 }, { "epoch": 0.605414273995078, "grad_norm": 0.15823234617710114, "learning_rate": 5e-05, "loss": 1.6317, "step": 2214 }, { "epoch": 0.6056877221766476, "grad_norm": 0.15863433480262756, "learning_rate": 5e-05, "loss": 1.59, "step": 2215 }, { "epoch": 0.6059611703582172, "grad_norm": 0.16127796471118927, "learning_rate": 5e-05, "loss": 1.6605, "step": 2216 }, { "epoch": 0.6062346185397867, "grad_norm": 0.15523792803287506, "learning_rate": 5e-05, "loss": 1.551, "step": 2217 }, { "epoch": 0.6065080667213563, "grad_norm": 0.16734172403812408, "learning_rate": 5e-05, "loss": 1.5959, "step": 2218 }, { "epoch": 0.6067815149029259, "grad_norm": 0.16805590689182281, "learning_rate": 5e-05, "loss": 1.6665, "step": 2219 }, { "epoch": 0.6070549630844955, "grad_norm": 0.1635158360004425, "learning_rate": 5e-05, "loss": 1.7377, "step": 2220 }, { "epoch": 0.6073284112660651, "grad_norm": 0.17108480632305145, "learning_rate": 5e-05, "loss": 1.6587, "step": 2221 }, { "epoch": 0.6076018594476347, "grad_norm": 0.1763361096382141, "learning_rate": 5e-05, "loss": 1.735, "step": 2222 }, { "epoch": 0.6078753076292043, "grad_norm": 0.153673455119133, "learning_rate": 5e-05, "loss": 1.5807, "step": 2223 }, { "epoch": 0.6081487558107739, "grad_norm": 0.17884378135204315, "learning_rate": 5e-05, "loss": 1.7078, "step": 2224 }, { "epoch": 0.6084222039923435, "grad_norm": 0.2724404036998749, "learning_rate": 5e-05, "loss": 1.72, "step": 2225 }, { "epoch": 0.6086956521739131, "grad_norm": 0.16155698895454407, "learning_rate": 5e-05, "loss": 1.6043, "step": 2226 }, { "epoch": 0.6089691003554827, "grad_norm": 0.17532581090927124, "learning_rate": 5e-05, "loss": 1.6597, "step": 2227 }, { "epoch": 0.6092425485370522, "grad_norm": 0.1655532866716385, "learning_rate": 5e-05, "loss": 1.6069, "step": 2228 }, { "epoch": 0.6095159967186218, "grad_norm": 0.158722385764122, "learning_rate": 5e-05, "loss": 1.5885, "step": 2229 }, { "epoch": 0.6097894449001914, "grad_norm": 0.17299902439117432, "learning_rate": 5e-05, "loss": 1.5863, "step": 2230 }, { "epoch": 0.610062893081761, "grad_norm": 0.17708896100521088, "learning_rate": 5e-05, "loss": 1.656, "step": 2231 }, { "epoch": 0.6103363412633306, "grad_norm": 0.16440723836421967, "learning_rate": 5e-05, "loss": 1.6651, "step": 2232 }, { "epoch": 0.6106097894449002, "grad_norm": 0.16679327189922333, "learning_rate": 5e-05, "loss": 1.6108, "step": 2233 }, { "epoch": 0.6108832376264698, "grad_norm": 0.18106336891651154, "learning_rate": 5e-05, "loss": 1.6973, "step": 2234 }, { "epoch": 0.6111566858080394, "grad_norm": 0.1723693609237671, "learning_rate": 5e-05, "loss": 1.6928, "step": 2235 }, { "epoch": 0.611430133989609, "grad_norm": 0.1619867980480194, "learning_rate": 5e-05, "loss": 1.5845, "step": 2236 }, { "epoch": 0.6117035821711786, "grad_norm": 0.18252336978912354, "learning_rate": 5e-05, "loss": 1.6004, "step": 2237 }, { "epoch": 0.6119770303527482, "grad_norm": 0.166873037815094, "learning_rate": 5e-05, "loss": 1.6284, "step": 2238 }, { "epoch": 0.6122504785343178, "grad_norm": 0.16315174102783203, "learning_rate": 5e-05, "loss": 1.607, "step": 2239 }, { "epoch": 0.6125239267158873, "grad_norm": 0.17975984513759613, "learning_rate": 5e-05, "loss": 1.6128, "step": 2240 }, { "epoch": 0.6127973748974569, "grad_norm": 0.1571521908044815, "learning_rate": 5e-05, "loss": 1.6401, "step": 2241 }, { "epoch": 0.6130708230790265, "grad_norm": 0.16314658522605896, "learning_rate": 5e-05, "loss": 1.5639, "step": 2242 }, { "epoch": 0.6133442712605961, "grad_norm": 0.1598280519247055, "learning_rate": 5e-05, "loss": 1.5334, "step": 2243 }, { "epoch": 0.6136177194421657, "grad_norm": 0.15419380366802216, "learning_rate": 5e-05, "loss": 1.5093, "step": 2244 }, { "epoch": 0.6138911676237353, "grad_norm": 0.15684397518634796, "learning_rate": 5e-05, "loss": 1.6236, "step": 2245 }, { "epoch": 0.6141646158053049, "grad_norm": 0.1650727540254593, "learning_rate": 5e-05, "loss": 1.7009, "step": 2246 }, { "epoch": 0.6144380639868745, "grad_norm": 0.16277483105659485, "learning_rate": 5e-05, "loss": 1.6878, "step": 2247 }, { "epoch": 0.6147115121684441, "grad_norm": 0.1605841964483261, "learning_rate": 5e-05, "loss": 1.6445, "step": 2248 }, { "epoch": 0.6149849603500137, "grad_norm": 0.16627003252506256, "learning_rate": 5e-05, "loss": 1.6217, "step": 2249 }, { "epoch": 0.6152584085315833, "grad_norm": 0.15815889835357666, "learning_rate": 5e-05, "loss": 1.6161, "step": 2250 }, { "epoch": 0.6155318567131528, "grad_norm": 0.15406928956508636, "learning_rate": 5e-05, "loss": 1.6202, "step": 2251 }, { "epoch": 0.6158053048947224, "grad_norm": 0.1534588783979416, "learning_rate": 5e-05, "loss": 1.5494, "step": 2252 }, { "epoch": 0.616078753076292, "grad_norm": 0.15792147815227509, "learning_rate": 5e-05, "loss": 1.5803, "step": 2253 }, { "epoch": 0.6163522012578616, "grad_norm": 0.15371176600456238, "learning_rate": 5e-05, "loss": 1.5671, "step": 2254 }, { "epoch": 0.6166256494394312, "grad_norm": 0.15262968838214874, "learning_rate": 5e-05, "loss": 1.5196, "step": 2255 }, { "epoch": 0.6168990976210008, "grad_norm": 0.15933872759342194, "learning_rate": 5e-05, "loss": 1.5735, "step": 2256 }, { "epoch": 0.6171725458025704, "grad_norm": 0.15623560547828674, "learning_rate": 5e-05, "loss": 1.529, "step": 2257 }, { "epoch": 0.61744599398414, "grad_norm": 0.15916681289672852, "learning_rate": 5e-05, "loss": 1.5828, "step": 2258 }, { "epoch": 0.6177194421657096, "grad_norm": 0.15046894550323486, "learning_rate": 5e-05, "loss": 1.6456, "step": 2259 }, { "epoch": 0.6179928903472792, "grad_norm": 0.15784558653831482, "learning_rate": 5e-05, "loss": 1.5167, "step": 2260 }, { "epoch": 0.6182663385288488, "grad_norm": 0.16395771503448486, "learning_rate": 5e-05, "loss": 1.5806, "step": 2261 }, { "epoch": 0.6185397867104184, "grad_norm": 0.15254901349544525, "learning_rate": 5e-05, "loss": 1.5617, "step": 2262 }, { "epoch": 0.6188132348919879, "grad_norm": 0.15680168569087982, "learning_rate": 5e-05, "loss": 1.5795, "step": 2263 }, { "epoch": 0.6190866830735575, "grad_norm": 0.1724022775888443, "learning_rate": 5e-05, "loss": 1.6245, "step": 2264 }, { "epoch": 0.6193601312551271, "grad_norm": 0.1564149111509323, "learning_rate": 5e-05, "loss": 1.5742, "step": 2265 }, { "epoch": 0.6196335794366967, "grad_norm": 0.18149738013744354, "learning_rate": 5e-05, "loss": 1.5351, "step": 2266 }, { "epoch": 0.6199070276182663, "grad_norm": 0.15708599984645844, "learning_rate": 5e-05, "loss": 1.6182, "step": 2267 }, { "epoch": 0.6201804757998359, "grad_norm": 0.16995084285736084, "learning_rate": 5e-05, "loss": 1.6893, "step": 2268 }, { "epoch": 0.6204539239814055, "grad_norm": 0.16211822628974915, "learning_rate": 5e-05, "loss": 1.702, "step": 2269 }, { "epoch": 0.6207273721629751, "grad_norm": 0.16163219511508942, "learning_rate": 5e-05, "loss": 1.6688, "step": 2270 }, { "epoch": 0.6210008203445447, "grad_norm": 0.16371332108974457, "learning_rate": 5e-05, "loss": 1.5554, "step": 2271 }, { "epoch": 0.6212742685261143, "grad_norm": 0.1574903279542923, "learning_rate": 5e-05, "loss": 1.6372, "step": 2272 }, { "epoch": 0.6215477167076839, "grad_norm": 0.15815797448158264, "learning_rate": 5e-05, "loss": 1.5674, "step": 2273 }, { "epoch": 0.6218211648892535, "grad_norm": 0.1578124612569809, "learning_rate": 5e-05, "loss": 1.5226, "step": 2274 }, { "epoch": 0.622094613070823, "grad_norm": 0.1595599204301834, "learning_rate": 5e-05, "loss": 1.5657, "step": 2275 }, { "epoch": 0.6223680612523926, "grad_norm": 0.15654776990413666, "learning_rate": 5e-05, "loss": 1.6009, "step": 2276 }, { "epoch": 0.6226415094339622, "grad_norm": 0.1564917415380478, "learning_rate": 5e-05, "loss": 1.6166, "step": 2277 }, { "epoch": 0.6229149576155318, "grad_norm": 0.1595001518726349, "learning_rate": 5e-05, "loss": 1.5389, "step": 2278 }, { "epoch": 0.6231884057971014, "grad_norm": 0.16073504090309143, "learning_rate": 5e-05, "loss": 1.5948, "step": 2279 }, { "epoch": 0.623461853978671, "grad_norm": 0.15986725687980652, "learning_rate": 5e-05, "loss": 1.6057, "step": 2280 }, { "epoch": 0.6237353021602406, "grad_norm": 0.1577160656452179, "learning_rate": 5e-05, "loss": 1.6144, "step": 2281 }, { "epoch": 0.6240087503418102, "grad_norm": 0.1593600958585739, "learning_rate": 5e-05, "loss": 1.6538, "step": 2282 }, { "epoch": 0.6242821985233798, "grad_norm": 0.15497250854969025, "learning_rate": 5e-05, "loss": 1.5714, "step": 2283 }, { "epoch": 0.6245556467049495, "grad_norm": 0.1659761667251587, "learning_rate": 5e-05, "loss": 1.6146, "step": 2284 }, { "epoch": 0.624829094886519, "grad_norm": 0.16305339336395264, "learning_rate": 5e-05, "loss": 1.6935, "step": 2285 }, { "epoch": 0.6251025430680885, "grad_norm": 0.16828620433807373, "learning_rate": 5e-05, "loss": 1.6341, "step": 2286 }, { "epoch": 0.6253759912496581, "grad_norm": 0.16056501865386963, "learning_rate": 5e-05, "loss": 1.6103, "step": 2287 }, { "epoch": 0.6256494394312277, "grad_norm": 0.1658848077058792, "learning_rate": 5e-05, "loss": 1.6548, "step": 2288 }, { "epoch": 0.6259228876127974, "grad_norm": 0.1599251627922058, "learning_rate": 5e-05, "loss": 1.5715, "step": 2289 }, { "epoch": 0.626196335794367, "grad_norm": 0.15655700862407684, "learning_rate": 5e-05, "loss": 1.5145, "step": 2290 }, { "epoch": 0.6264697839759366, "grad_norm": 0.16632331907749176, "learning_rate": 5e-05, "loss": 1.6599, "step": 2291 }, { "epoch": 0.6267432321575062, "grad_norm": 0.16444595158100128, "learning_rate": 5e-05, "loss": 1.6591, "step": 2292 }, { "epoch": 0.6270166803390758, "grad_norm": 0.1640361100435257, "learning_rate": 5e-05, "loss": 1.5751, "step": 2293 }, { "epoch": 0.6272901285206454, "grad_norm": 0.16003577411174774, "learning_rate": 5e-05, "loss": 1.6096, "step": 2294 }, { "epoch": 0.627563576702215, "grad_norm": 0.16625234484672546, "learning_rate": 5e-05, "loss": 1.6685, "step": 2295 }, { "epoch": 0.6278370248837846, "grad_norm": 0.15295128524303436, "learning_rate": 5e-05, "loss": 1.569, "step": 2296 }, { "epoch": 0.6281104730653542, "grad_norm": 0.16834108531475067, "learning_rate": 5e-05, "loss": 1.6813, "step": 2297 }, { "epoch": 0.6283839212469237, "grad_norm": 0.16331374645233154, "learning_rate": 5e-05, "loss": 1.5879, "step": 2298 }, { "epoch": 0.6286573694284933, "grad_norm": 0.17699094116687775, "learning_rate": 5e-05, "loss": 1.673, "step": 2299 }, { "epoch": 0.6289308176100629, "grad_norm": 0.1626228392124176, "learning_rate": 5e-05, "loss": 1.6703, "step": 2300 }, { "epoch": 0.6292042657916325, "grad_norm": 0.16163185238838196, "learning_rate": 5e-05, "loss": 1.6249, "step": 2301 }, { "epoch": 0.6294777139732021, "grad_norm": 0.17444822192192078, "learning_rate": 5e-05, "loss": 1.5542, "step": 2302 }, { "epoch": 0.6297511621547717, "grad_norm": 0.1661493182182312, "learning_rate": 5e-05, "loss": 1.6295, "step": 2303 }, { "epoch": 0.6300246103363413, "grad_norm": 0.1628965586423874, "learning_rate": 5e-05, "loss": 1.5952, "step": 2304 }, { "epoch": 0.6302980585179109, "grad_norm": 0.1656888723373413, "learning_rate": 5e-05, "loss": 1.594, "step": 2305 }, { "epoch": 0.6305715066994805, "grad_norm": 0.15425047278404236, "learning_rate": 5e-05, "loss": 1.4985, "step": 2306 }, { "epoch": 0.6308449548810501, "grad_norm": 0.15567493438720703, "learning_rate": 5e-05, "loss": 1.5571, "step": 2307 }, { "epoch": 0.6311184030626197, "grad_norm": 0.17899873852729797, "learning_rate": 5e-05, "loss": 1.6662, "step": 2308 }, { "epoch": 0.6313918512441892, "grad_norm": 0.15900376439094543, "learning_rate": 5e-05, "loss": 1.6517, "step": 2309 }, { "epoch": 0.6316652994257588, "grad_norm": 0.15737038850784302, "learning_rate": 5e-05, "loss": 1.5635, "step": 2310 }, { "epoch": 0.6319387476073284, "grad_norm": 0.16273202002048492, "learning_rate": 5e-05, "loss": 1.5987, "step": 2311 }, { "epoch": 0.632212195788898, "grad_norm": 0.15607643127441406, "learning_rate": 5e-05, "loss": 1.5978, "step": 2312 }, { "epoch": 0.6324856439704676, "grad_norm": 0.16705310344696045, "learning_rate": 5e-05, "loss": 1.6226, "step": 2313 }, { "epoch": 0.6327590921520372, "grad_norm": 0.15748460590839386, "learning_rate": 5e-05, "loss": 1.5379, "step": 2314 }, { "epoch": 0.6330325403336068, "grad_norm": 0.16817143559455872, "learning_rate": 5e-05, "loss": 1.6564, "step": 2315 }, { "epoch": 0.6333059885151764, "grad_norm": 0.15591007471084595, "learning_rate": 5e-05, "loss": 1.539, "step": 2316 }, { "epoch": 0.633579436696746, "grad_norm": 0.1618664264678955, "learning_rate": 5e-05, "loss": 1.5866, "step": 2317 }, { "epoch": 0.6338528848783156, "grad_norm": 0.1688256859779358, "learning_rate": 5e-05, "loss": 1.7367, "step": 2318 }, { "epoch": 0.6341263330598852, "grad_norm": 0.17073442041873932, "learning_rate": 5e-05, "loss": 1.6241, "step": 2319 }, { "epoch": 0.6343997812414548, "grad_norm": 0.16293637454509735, "learning_rate": 5e-05, "loss": 1.5913, "step": 2320 }, { "epoch": 0.6346732294230243, "grad_norm": 0.15301881730556488, "learning_rate": 5e-05, "loss": 1.5685, "step": 2321 }, { "epoch": 0.6349466776045939, "grad_norm": 0.1594955027103424, "learning_rate": 5e-05, "loss": 1.6564, "step": 2322 }, { "epoch": 0.6352201257861635, "grad_norm": 0.15445536375045776, "learning_rate": 5e-05, "loss": 1.5536, "step": 2323 }, { "epoch": 0.6354935739677331, "grad_norm": 0.166269913315773, "learning_rate": 5e-05, "loss": 1.6064, "step": 2324 }, { "epoch": 0.6357670221493027, "grad_norm": 0.15730029344558716, "learning_rate": 5e-05, "loss": 1.5387, "step": 2325 }, { "epoch": 0.6360404703308723, "grad_norm": 0.15941552817821503, "learning_rate": 5e-05, "loss": 1.6213, "step": 2326 }, { "epoch": 0.6363139185124419, "grad_norm": 0.16579844057559967, "learning_rate": 5e-05, "loss": 1.5993, "step": 2327 }, { "epoch": 0.6365873666940115, "grad_norm": 0.17182214558124542, "learning_rate": 5e-05, "loss": 1.6941, "step": 2328 }, { "epoch": 0.6368608148755811, "grad_norm": 0.1528700590133667, "learning_rate": 5e-05, "loss": 1.5408, "step": 2329 }, { "epoch": 0.6371342630571507, "grad_norm": 0.15841877460479736, "learning_rate": 5e-05, "loss": 1.5145, "step": 2330 }, { "epoch": 0.6374077112387203, "grad_norm": 0.17149239778518677, "learning_rate": 5e-05, "loss": 1.7896, "step": 2331 }, { "epoch": 0.6376811594202898, "grad_norm": 0.17149433493614197, "learning_rate": 5e-05, "loss": 1.6635, "step": 2332 }, { "epoch": 0.6379546076018594, "grad_norm": 0.1553071290254593, "learning_rate": 5e-05, "loss": 1.6115, "step": 2333 }, { "epoch": 0.638228055783429, "grad_norm": 0.17740413546562195, "learning_rate": 5e-05, "loss": 1.6419, "step": 2334 }, { "epoch": 0.6385015039649986, "grad_norm": 0.16571162641048431, "learning_rate": 5e-05, "loss": 1.6287, "step": 2335 }, { "epoch": 0.6387749521465682, "grad_norm": 0.16401976346969604, "learning_rate": 5e-05, "loss": 1.5881, "step": 2336 }, { "epoch": 0.6390484003281378, "grad_norm": 0.16186074912548065, "learning_rate": 5e-05, "loss": 1.644, "step": 2337 }, { "epoch": 0.6393218485097074, "grad_norm": 0.16843372583389282, "learning_rate": 5e-05, "loss": 1.708, "step": 2338 }, { "epoch": 0.639595296691277, "grad_norm": 0.1627928465604782, "learning_rate": 5e-05, "loss": 1.623, "step": 2339 }, { "epoch": 0.6398687448728466, "grad_norm": 0.15548692643642426, "learning_rate": 5e-05, "loss": 1.6938, "step": 2340 }, { "epoch": 0.6401421930544162, "grad_norm": 0.15752455592155457, "learning_rate": 5e-05, "loss": 1.6264, "step": 2341 }, { "epoch": 0.6404156412359858, "grad_norm": 0.1618221253156662, "learning_rate": 5e-05, "loss": 1.6214, "step": 2342 }, { "epoch": 0.6406890894175554, "grad_norm": 0.15189188718795776, "learning_rate": 5e-05, "loss": 1.6024, "step": 2343 }, { "epoch": 0.6409625375991249, "grad_norm": 0.16147050261497498, "learning_rate": 5e-05, "loss": 1.6617, "step": 2344 }, { "epoch": 0.6412359857806945, "grad_norm": 0.16319455206394196, "learning_rate": 5e-05, "loss": 1.6428, "step": 2345 }, { "epoch": 0.6415094339622641, "grad_norm": 0.15911765396595, "learning_rate": 5e-05, "loss": 1.5411, "step": 2346 }, { "epoch": 0.6417828821438337, "grad_norm": 0.15844199061393738, "learning_rate": 5e-05, "loss": 1.5859, "step": 2347 }, { "epoch": 0.6420563303254033, "grad_norm": 0.16009950637817383, "learning_rate": 5e-05, "loss": 1.6266, "step": 2348 }, { "epoch": 0.6423297785069729, "grad_norm": 0.15512198209762573, "learning_rate": 5e-05, "loss": 1.6013, "step": 2349 }, { "epoch": 0.6426032266885425, "grad_norm": 0.1645142138004303, "learning_rate": 5e-05, "loss": 1.545, "step": 2350 }, { "epoch": 0.6428766748701121, "grad_norm": 0.15614928305149078, "learning_rate": 5e-05, "loss": 1.6131, "step": 2351 }, { "epoch": 0.6431501230516817, "grad_norm": 0.16520023345947266, "learning_rate": 5e-05, "loss": 1.6631, "step": 2352 }, { "epoch": 0.6434235712332513, "grad_norm": 0.164496511220932, "learning_rate": 5e-05, "loss": 1.6092, "step": 2353 }, { "epoch": 0.6436970194148209, "grad_norm": 0.16124795377254486, "learning_rate": 5e-05, "loss": 1.5647, "step": 2354 }, { "epoch": 0.6439704675963905, "grad_norm": 0.1593211442232132, "learning_rate": 5e-05, "loss": 1.5575, "step": 2355 }, { "epoch": 0.64424391577796, "grad_norm": 0.15209631621837616, "learning_rate": 5e-05, "loss": 1.5939, "step": 2356 }, { "epoch": 0.6445173639595296, "grad_norm": 0.1607188880443573, "learning_rate": 5e-05, "loss": 1.6497, "step": 2357 }, { "epoch": 0.6447908121410992, "grad_norm": 0.1642126739025116, "learning_rate": 5e-05, "loss": 1.6879, "step": 2358 }, { "epoch": 0.6450642603226688, "grad_norm": 0.15646396577358246, "learning_rate": 5e-05, "loss": 1.5971, "step": 2359 }, { "epoch": 0.6453377085042384, "grad_norm": 0.16889351606369019, "learning_rate": 5e-05, "loss": 1.6108, "step": 2360 }, { "epoch": 0.645611156685808, "grad_norm": 0.15696920454502106, "learning_rate": 5e-05, "loss": 1.6325, "step": 2361 }, { "epoch": 0.6458846048673776, "grad_norm": 0.17429569363594055, "learning_rate": 5e-05, "loss": 1.6995, "step": 2362 }, { "epoch": 0.6461580530489472, "grad_norm": 0.16672879457473755, "learning_rate": 5e-05, "loss": 1.6846, "step": 2363 }, { "epoch": 0.6464315012305168, "grad_norm": 0.15300826728343964, "learning_rate": 5e-05, "loss": 1.5222, "step": 2364 }, { "epoch": 0.6467049494120864, "grad_norm": 0.16159702837467194, "learning_rate": 5e-05, "loss": 1.6988, "step": 2365 }, { "epoch": 0.646978397593656, "grad_norm": 0.1599888950586319, "learning_rate": 5e-05, "loss": 1.6864, "step": 2366 }, { "epoch": 0.6472518457752255, "grad_norm": 0.15492287278175354, "learning_rate": 5e-05, "loss": 1.5455, "step": 2367 }, { "epoch": 0.6475252939567951, "grad_norm": 0.16030439734458923, "learning_rate": 5e-05, "loss": 1.6159, "step": 2368 }, { "epoch": 0.6477987421383647, "grad_norm": 0.16615253686904907, "learning_rate": 5e-05, "loss": 1.6585, "step": 2369 }, { "epoch": 0.6480721903199343, "grad_norm": 0.15947528183460236, "learning_rate": 5e-05, "loss": 1.5924, "step": 2370 }, { "epoch": 0.648345638501504, "grad_norm": 0.1585034430027008, "learning_rate": 5e-05, "loss": 1.5964, "step": 2371 }, { "epoch": 0.6486190866830736, "grad_norm": 0.15772749483585358, "learning_rate": 5e-05, "loss": 1.5869, "step": 2372 }, { "epoch": 0.6488925348646432, "grad_norm": 0.16060665249824524, "learning_rate": 5e-05, "loss": 1.6111, "step": 2373 }, { "epoch": 0.6491659830462128, "grad_norm": 0.15704810619354248, "learning_rate": 5e-05, "loss": 1.6257, "step": 2374 }, { "epoch": 0.6494394312277824, "grad_norm": 0.16538581252098083, "learning_rate": 5e-05, "loss": 1.5543, "step": 2375 }, { "epoch": 0.649712879409352, "grad_norm": 0.15830865502357483, "learning_rate": 5e-05, "loss": 1.6593, "step": 2376 }, { "epoch": 0.6499863275909216, "grad_norm": 0.17541712522506714, "learning_rate": 5e-05, "loss": 1.8108, "step": 2377 }, { "epoch": 0.6502597757724912, "grad_norm": 0.15826934576034546, "learning_rate": 5e-05, "loss": 1.6312, "step": 2378 }, { "epoch": 0.6505332239540607, "grad_norm": 0.1586860567331314, "learning_rate": 5e-05, "loss": 1.6282, "step": 2379 }, { "epoch": 0.6508066721356303, "grad_norm": 0.15930341184139252, "learning_rate": 5e-05, "loss": 1.6039, "step": 2380 }, { "epoch": 0.6510801203171999, "grad_norm": 0.16288048028945923, "learning_rate": 5e-05, "loss": 1.64, "step": 2381 }, { "epoch": 0.6513535684987695, "grad_norm": 0.15390625596046448, "learning_rate": 5e-05, "loss": 1.5283, "step": 2382 }, { "epoch": 0.6516270166803391, "grad_norm": 0.15672685205936432, "learning_rate": 5e-05, "loss": 1.5796, "step": 2383 }, { "epoch": 0.6519004648619087, "grad_norm": 0.16665740311145782, "learning_rate": 5e-05, "loss": 1.6128, "step": 2384 }, { "epoch": 0.6521739130434783, "grad_norm": 0.16864562034606934, "learning_rate": 5e-05, "loss": 1.5693, "step": 2385 }, { "epoch": 0.6524473612250479, "grad_norm": 0.17148347198963165, "learning_rate": 5e-05, "loss": 1.6832, "step": 2386 }, { "epoch": 0.6527208094066175, "grad_norm": 0.15762430429458618, "learning_rate": 5e-05, "loss": 1.5791, "step": 2387 }, { "epoch": 0.6529942575881871, "grad_norm": 0.16125012934207916, "learning_rate": 5e-05, "loss": 1.6339, "step": 2388 }, { "epoch": 0.6532677057697567, "grad_norm": 0.17223118245601654, "learning_rate": 5e-05, "loss": 1.6022, "step": 2389 }, { "epoch": 0.6535411539513262, "grad_norm": 0.16604146361351013, "learning_rate": 5e-05, "loss": 1.6289, "step": 2390 }, { "epoch": 0.6538146021328958, "grad_norm": 0.16918644309043884, "learning_rate": 5e-05, "loss": 1.6586, "step": 2391 }, { "epoch": 0.6540880503144654, "grad_norm": 0.16766510903835297, "learning_rate": 5e-05, "loss": 1.6329, "step": 2392 }, { "epoch": 0.654361498496035, "grad_norm": 0.1656326800584793, "learning_rate": 5e-05, "loss": 1.6551, "step": 2393 }, { "epoch": 0.6546349466776046, "grad_norm": 0.16642306745052338, "learning_rate": 5e-05, "loss": 1.589, "step": 2394 }, { "epoch": 0.6549083948591742, "grad_norm": 0.15996260941028595, "learning_rate": 5e-05, "loss": 1.5731, "step": 2395 }, { "epoch": 0.6551818430407438, "grad_norm": 0.16654618084430695, "learning_rate": 5e-05, "loss": 1.7088, "step": 2396 }, { "epoch": 0.6554552912223134, "grad_norm": 0.16626910865306854, "learning_rate": 5e-05, "loss": 1.5951, "step": 2397 }, { "epoch": 0.655728739403883, "grad_norm": 0.16408471763134003, "learning_rate": 5e-05, "loss": 1.5914, "step": 2398 }, { "epoch": 0.6560021875854526, "grad_norm": 0.15765917301177979, "learning_rate": 5e-05, "loss": 1.5525, "step": 2399 }, { "epoch": 0.6562756357670222, "grad_norm": 0.15567491948604584, "learning_rate": 5e-05, "loss": 1.5724, "step": 2400 }, { "epoch": 0.6565490839485918, "grad_norm": 0.1593087911605835, "learning_rate": 5e-05, "loss": 1.6831, "step": 2401 }, { "epoch": 0.6568225321301613, "grad_norm": 0.17593322694301605, "learning_rate": 5e-05, "loss": 1.6298, "step": 2402 }, { "epoch": 0.6570959803117309, "grad_norm": 0.16059966385364532, "learning_rate": 5e-05, "loss": 1.6232, "step": 2403 }, { "epoch": 0.6573694284933005, "grad_norm": 0.15784414112567902, "learning_rate": 5e-05, "loss": 1.5765, "step": 2404 }, { "epoch": 0.6576428766748701, "grad_norm": 0.16477881371974945, "learning_rate": 5e-05, "loss": 1.6309, "step": 2405 }, { "epoch": 0.6579163248564397, "grad_norm": 0.15968461334705353, "learning_rate": 5e-05, "loss": 1.5916, "step": 2406 }, { "epoch": 0.6581897730380093, "grad_norm": 0.15735867619514465, "learning_rate": 5e-05, "loss": 1.5926, "step": 2407 }, { "epoch": 0.6584632212195789, "grad_norm": 0.15733473002910614, "learning_rate": 5e-05, "loss": 1.6399, "step": 2408 }, { "epoch": 0.6587366694011485, "grad_norm": 0.16281574964523315, "learning_rate": 5e-05, "loss": 1.7522, "step": 2409 }, { "epoch": 0.6590101175827181, "grad_norm": 0.17003872990608215, "learning_rate": 5e-05, "loss": 1.6451, "step": 2410 }, { "epoch": 0.6592835657642877, "grad_norm": 0.15980958938598633, "learning_rate": 5e-05, "loss": 1.6791, "step": 2411 }, { "epoch": 0.6595570139458573, "grad_norm": 0.15881507098674774, "learning_rate": 5e-05, "loss": 1.6218, "step": 2412 }, { "epoch": 0.6598304621274268, "grad_norm": 0.1568790227174759, "learning_rate": 5e-05, "loss": 1.6652, "step": 2413 }, { "epoch": 0.6601039103089964, "grad_norm": 0.16632792353630066, "learning_rate": 5e-05, "loss": 1.6607, "step": 2414 }, { "epoch": 0.660377358490566, "grad_norm": 0.15554708242416382, "learning_rate": 5e-05, "loss": 1.6423, "step": 2415 }, { "epoch": 0.6606508066721356, "grad_norm": 0.16607338190078735, "learning_rate": 5e-05, "loss": 1.5636, "step": 2416 }, { "epoch": 0.6609242548537052, "grad_norm": 0.1500883400440216, "learning_rate": 5e-05, "loss": 1.5665, "step": 2417 }, { "epoch": 0.6611977030352748, "grad_norm": 0.1519675999879837, "learning_rate": 5e-05, "loss": 1.5243, "step": 2418 }, { "epoch": 0.6614711512168444, "grad_norm": 0.1638784110546112, "learning_rate": 5e-05, "loss": 1.6462, "step": 2419 }, { "epoch": 0.661744599398414, "grad_norm": 0.15792463719844818, "learning_rate": 5e-05, "loss": 1.5241, "step": 2420 }, { "epoch": 0.6620180475799836, "grad_norm": 0.15407374501228333, "learning_rate": 5e-05, "loss": 1.5822, "step": 2421 }, { "epoch": 0.6622914957615532, "grad_norm": 0.1588737964630127, "learning_rate": 5e-05, "loss": 1.7011, "step": 2422 }, { "epoch": 0.6625649439431228, "grad_norm": 0.15952999889850616, "learning_rate": 5e-05, "loss": 1.5825, "step": 2423 }, { "epoch": 0.6628383921246924, "grad_norm": 0.1603320688009262, "learning_rate": 5e-05, "loss": 1.5174, "step": 2424 }, { "epoch": 0.6631118403062619, "grad_norm": 0.1592378169298172, "learning_rate": 5e-05, "loss": 1.5717, "step": 2425 }, { "epoch": 0.6633852884878315, "grad_norm": 0.15770871937274933, "learning_rate": 5e-05, "loss": 1.603, "step": 2426 }, { "epoch": 0.6636587366694011, "grad_norm": 0.14396196603775024, "learning_rate": 5e-05, "loss": 1.4309, "step": 2427 }, { "epoch": 0.6639321848509707, "grad_norm": 0.15671992301940918, "learning_rate": 5e-05, "loss": 1.6278, "step": 2428 }, { "epoch": 0.6642056330325403, "grad_norm": 0.157623752951622, "learning_rate": 5e-05, "loss": 1.5608, "step": 2429 }, { "epoch": 0.6644790812141099, "grad_norm": 0.1747284233570099, "learning_rate": 5e-05, "loss": 1.6987, "step": 2430 }, { "epoch": 0.6647525293956795, "grad_norm": 0.16402825713157654, "learning_rate": 5e-05, "loss": 1.6901, "step": 2431 }, { "epoch": 0.6650259775772491, "grad_norm": 0.15663176774978638, "learning_rate": 5e-05, "loss": 1.5185, "step": 2432 }, { "epoch": 0.6652994257588187, "grad_norm": 0.14828836917877197, "learning_rate": 5e-05, "loss": 1.525, "step": 2433 }, { "epoch": 0.6655728739403883, "grad_norm": 0.15145589411258698, "learning_rate": 5e-05, "loss": 1.5436, "step": 2434 }, { "epoch": 0.6658463221219579, "grad_norm": 0.166320338845253, "learning_rate": 5e-05, "loss": 1.6605, "step": 2435 }, { "epoch": 0.6661197703035275, "grad_norm": 0.15976975858211517, "learning_rate": 5e-05, "loss": 1.6091, "step": 2436 }, { "epoch": 0.666393218485097, "grad_norm": 0.1547703742980957, "learning_rate": 5e-05, "loss": 1.594, "step": 2437 }, { "epoch": 0.6666666666666666, "grad_norm": 0.15781809389591217, "learning_rate": 5e-05, "loss": 1.6204, "step": 2438 }, { "epoch": 0.6669401148482362, "grad_norm": 0.16567228734493256, "learning_rate": 5e-05, "loss": 1.5822, "step": 2439 }, { "epoch": 0.6672135630298058, "grad_norm": 0.16364511847496033, "learning_rate": 5e-05, "loss": 1.606, "step": 2440 }, { "epoch": 0.6674870112113754, "grad_norm": 0.17084093391895294, "learning_rate": 5e-05, "loss": 1.6213, "step": 2441 }, { "epoch": 0.667760459392945, "grad_norm": 0.16154657304286957, "learning_rate": 5e-05, "loss": 1.5344, "step": 2442 }, { "epoch": 0.6680339075745146, "grad_norm": 0.1677396446466446, "learning_rate": 5e-05, "loss": 1.5043, "step": 2443 }, { "epoch": 0.6683073557560842, "grad_norm": 0.16632121801376343, "learning_rate": 5e-05, "loss": 1.6781, "step": 2444 }, { "epoch": 0.6685808039376538, "grad_norm": 0.16119202971458435, "learning_rate": 5e-05, "loss": 1.5862, "step": 2445 }, { "epoch": 0.6688542521192234, "grad_norm": 0.16189579665660858, "learning_rate": 5e-05, "loss": 1.5532, "step": 2446 }, { "epoch": 0.669127700300793, "grad_norm": 0.16972647607326508, "learning_rate": 5e-05, "loss": 1.634, "step": 2447 }, { "epoch": 0.6694011484823625, "grad_norm": 0.1550510823726654, "learning_rate": 5e-05, "loss": 1.6588, "step": 2448 }, { "epoch": 0.6696745966639321, "grad_norm": 0.16665950417518616, "learning_rate": 5e-05, "loss": 1.6175, "step": 2449 }, { "epoch": 0.6699480448455017, "grad_norm": 0.17640990018844604, "learning_rate": 5e-05, "loss": 1.5892, "step": 2450 }, { "epoch": 0.6702214930270713, "grad_norm": 0.1729980856180191, "learning_rate": 5e-05, "loss": 1.6211, "step": 2451 }, { "epoch": 0.670494941208641, "grad_norm": 0.1840631514787674, "learning_rate": 5e-05, "loss": 1.6796, "step": 2452 }, { "epoch": 0.6707683893902106, "grad_norm": 0.16333408653736115, "learning_rate": 5e-05, "loss": 1.6035, "step": 2453 }, { "epoch": 0.6710418375717802, "grad_norm": 0.15522709488868713, "learning_rate": 5e-05, "loss": 1.5799, "step": 2454 }, { "epoch": 0.6713152857533498, "grad_norm": 0.1594085544347763, "learning_rate": 5e-05, "loss": 1.6033, "step": 2455 }, { "epoch": 0.6715887339349194, "grad_norm": 0.174288809299469, "learning_rate": 5e-05, "loss": 1.5725, "step": 2456 }, { "epoch": 0.671862182116489, "grad_norm": 0.1599048525094986, "learning_rate": 5e-05, "loss": 1.5534, "step": 2457 }, { "epoch": 0.6721356302980586, "grad_norm": 0.17705102264881134, "learning_rate": 5e-05, "loss": 1.6193, "step": 2458 }, { "epoch": 0.6724090784796282, "grad_norm": 0.17394457757472992, "learning_rate": 5e-05, "loss": 1.5991, "step": 2459 }, { "epoch": 0.6726825266611977, "grad_norm": 0.17613448202610016, "learning_rate": 5e-05, "loss": 1.6804, "step": 2460 }, { "epoch": 0.6729559748427673, "grad_norm": 0.1572728008031845, "learning_rate": 5e-05, "loss": 1.6653, "step": 2461 }, { "epoch": 0.6732294230243369, "grad_norm": 0.17541466653347015, "learning_rate": 5e-05, "loss": 1.5699, "step": 2462 }, { "epoch": 0.6735028712059065, "grad_norm": 0.1598716527223587, "learning_rate": 5e-05, "loss": 1.5847, "step": 2463 }, { "epoch": 0.6737763193874761, "grad_norm": 0.16459186375141144, "learning_rate": 5e-05, "loss": 1.5796, "step": 2464 }, { "epoch": 0.6740497675690457, "grad_norm": 0.16075541079044342, "learning_rate": 5e-05, "loss": 1.6633, "step": 2465 }, { "epoch": 0.6743232157506153, "grad_norm": 0.16022174060344696, "learning_rate": 5e-05, "loss": 1.574, "step": 2466 }, { "epoch": 0.6745966639321849, "grad_norm": 0.16279369592666626, "learning_rate": 5e-05, "loss": 1.6139, "step": 2467 }, { "epoch": 0.6748701121137545, "grad_norm": 0.15538600087165833, "learning_rate": 5e-05, "loss": 1.6165, "step": 2468 }, { "epoch": 0.6751435602953241, "grad_norm": 0.15767286717891693, "learning_rate": 5e-05, "loss": 1.6236, "step": 2469 }, { "epoch": 0.6754170084768937, "grad_norm": 0.15616439282894135, "learning_rate": 5e-05, "loss": 1.6096, "step": 2470 }, { "epoch": 0.6756904566584632, "grad_norm": 0.1593167930841446, "learning_rate": 5e-05, "loss": 1.5664, "step": 2471 }, { "epoch": 0.6759639048400328, "grad_norm": 0.15188159048557281, "learning_rate": 5e-05, "loss": 1.6384, "step": 2472 }, { "epoch": 0.6762373530216024, "grad_norm": 0.1587432473897934, "learning_rate": 5e-05, "loss": 1.6228, "step": 2473 }, { "epoch": 0.676510801203172, "grad_norm": 0.15247757732868195, "learning_rate": 5e-05, "loss": 1.4628, "step": 2474 }, { "epoch": 0.6767842493847416, "grad_norm": 0.1572297066450119, "learning_rate": 5e-05, "loss": 1.5996, "step": 2475 }, { "epoch": 0.6770576975663112, "grad_norm": 0.1596899777650833, "learning_rate": 5e-05, "loss": 1.6908, "step": 2476 }, { "epoch": 0.6773311457478808, "grad_norm": 0.15775376558303833, "learning_rate": 5e-05, "loss": 1.5248, "step": 2477 }, { "epoch": 0.6776045939294504, "grad_norm": 0.16116631031036377, "learning_rate": 5e-05, "loss": 1.5714, "step": 2478 }, { "epoch": 0.67787804211102, "grad_norm": 0.15727153420448303, "learning_rate": 5e-05, "loss": 1.5822, "step": 2479 }, { "epoch": 0.6781514902925896, "grad_norm": 0.15772849321365356, "learning_rate": 5e-05, "loss": 1.64, "step": 2480 }, { "epoch": 0.6784249384741592, "grad_norm": 0.15587928891181946, "learning_rate": 5e-05, "loss": 1.5437, "step": 2481 }, { "epoch": 0.6786983866557288, "grad_norm": 0.16047807037830353, "learning_rate": 5e-05, "loss": 1.5917, "step": 2482 }, { "epoch": 0.6789718348372983, "grad_norm": 0.1718113124370575, "learning_rate": 5e-05, "loss": 1.6332, "step": 2483 }, { "epoch": 0.6792452830188679, "grad_norm": 0.1675024926662445, "learning_rate": 5e-05, "loss": 1.6186, "step": 2484 }, { "epoch": 0.6795187312004375, "grad_norm": 0.16398410499095917, "learning_rate": 5e-05, "loss": 1.6521, "step": 2485 }, { "epoch": 0.6797921793820071, "grad_norm": 0.18062223494052887, "learning_rate": 5e-05, "loss": 1.5848, "step": 2486 }, { "epoch": 0.6800656275635767, "grad_norm": 0.15654748678207397, "learning_rate": 5e-05, "loss": 1.5452, "step": 2487 }, { "epoch": 0.6803390757451463, "grad_norm": 0.1599476933479309, "learning_rate": 5e-05, "loss": 1.6255, "step": 2488 }, { "epoch": 0.6806125239267159, "grad_norm": 0.16449692845344543, "learning_rate": 5e-05, "loss": 1.6717, "step": 2489 }, { "epoch": 0.6808859721082855, "grad_norm": 0.16174077987670898, "learning_rate": 5e-05, "loss": 1.5248, "step": 2490 }, { "epoch": 0.6811594202898551, "grad_norm": 0.1614287793636322, "learning_rate": 5e-05, "loss": 1.5408, "step": 2491 }, { "epoch": 0.6814328684714247, "grad_norm": 0.1738748550415039, "learning_rate": 5e-05, "loss": 1.6189, "step": 2492 }, { "epoch": 0.6817063166529943, "grad_norm": 0.1673515886068344, "learning_rate": 5e-05, "loss": 1.68, "step": 2493 }, { "epoch": 0.6819797648345638, "grad_norm": 0.16801506280899048, "learning_rate": 5e-05, "loss": 1.6737, "step": 2494 }, { "epoch": 0.6822532130161334, "grad_norm": 0.1836167722940445, "learning_rate": 5e-05, "loss": 1.6566, "step": 2495 }, { "epoch": 0.682526661197703, "grad_norm": 0.1564912497997284, "learning_rate": 5e-05, "loss": 1.5649, "step": 2496 }, { "epoch": 0.6828001093792726, "grad_norm": 0.161629781126976, "learning_rate": 5e-05, "loss": 1.6278, "step": 2497 }, { "epoch": 0.6830735575608422, "grad_norm": 0.18047556281089783, "learning_rate": 5e-05, "loss": 1.623, "step": 2498 }, { "epoch": 0.6833470057424118, "grad_norm": 0.1663326621055603, "learning_rate": 5e-05, "loss": 1.6222, "step": 2499 }, { "epoch": 0.6836204539239814, "grad_norm": 0.16856786608695984, "learning_rate": 5e-05, "loss": 1.6573, "step": 2500 }, { "epoch": 0.683893902105551, "grad_norm": 0.17234691977500916, "learning_rate": 5e-05, "loss": 1.6514, "step": 2501 }, { "epoch": 0.6841673502871206, "grad_norm": 0.15840747952461243, "learning_rate": 5e-05, "loss": 1.5428, "step": 2502 }, { "epoch": 0.6844407984686902, "grad_norm": 0.17479360103607178, "learning_rate": 5e-05, "loss": 1.6271, "step": 2503 }, { "epoch": 0.6847142466502598, "grad_norm": 0.16867142915725708, "learning_rate": 5e-05, "loss": 1.5434, "step": 2504 }, { "epoch": 0.6849876948318294, "grad_norm": 0.16198626160621643, "learning_rate": 5e-05, "loss": 1.6075, "step": 2505 }, { "epoch": 0.6852611430133989, "grad_norm": 0.16754168272018433, "learning_rate": 5e-05, "loss": 1.7111, "step": 2506 }, { "epoch": 0.6855345911949685, "grad_norm": 0.1667136549949646, "learning_rate": 5e-05, "loss": 1.582, "step": 2507 }, { "epoch": 0.6858080393765381, "grad_norm": 0.15942475199699402, "learning_rate": 5e-05, "loss": 1.6081, "step": 2508 }, { "epoch": 0.6860814875581077, "grad_norm": 0.1539759784936905, "learning_rate": 5e-05, "loss": 1.5774, "step": 2509 }, { "epoch": 0.6863549357396773, "grad_norm": 0.1718335896730423, "learning_rate": 5e-05, "loss": 1.7125, "step": 2510 }, { "epoch": 0.6866283839212469, "grad_norm": 0.16027317941188812, "learning_rate": 5e-05, "loss": 1.634, "step": 2511 }, { "epoch": 0.6869018321028165, "grad_norm": 0.1722375601530075, "learning_rate": 5e-05, "loss": 1.6105, "step": 2512 }, { "epoch": 0.6871752802843861, "grad_norm": 0.15657594799995422, "learning_rate": 5e-05, "loss": 1.5136, "step": 2513 }, { "epoch": 0.6874487284659557, "grad_norm": 0.16483189165592194, "learning_rate": 5e-05, "loss": 1.629, "step": 2514 }, { "epoch": 0.6877221766475253, "grad_norm": 0.1574966162443161, "learning_rate": 5e-05, "loss": 1.5831, "step": 2515 }, { "epoch": 0.6879956248290949, "grad_norm": 0.16608235239982605, "learning_rate": 5e-05, "loss": 1.6769, "step": 2516 }, { "epoch": 0.6882690730106645, "grad_norm": 0.1636161059141159, "learning_rate": 5e-05, "loss": 1.6551, "step": 2517 }, { "epoch": 0.688542521192234, "grad_norm": 0.15895555913448334, "learning_rate": 5e-05, "loss": 1.5672, "step": 2518 }, { "epoch": 0.6888159693738036, "grad_norm": 0.17441825568675995, "learning_rate": 5e-05, "loss": 1.6559, "step": 2519 }, { "epoch": 0.6890894175553732, "grad_norm": 0.15818622708320618, "learning_rate": 5e-05, "loss": 1.6146, "step": 2520 }, { "epoch": 0.6893628657369428, "grad_norm": 0.16466504335403442, "learning_rate": 5e-05, "loss": 1.6309, "step": 2521 }, { "epoch": 0.6896363139185124, "grad_norm": 0.1637655645608902, "learning_rate": 5e-05, "loss": 1.6369, "step": 2522 }, { "epoch": 0.689909762100082, "grad_norm": 0.17039266228675842, "learning_rate": 5e-05, "loss": 1.6315, "step": 2523 }, { "epoch": 0.6901832102816516, "grad_norm": 0.1624341756105423, "learning_rate": 5e-05, "loss": 1.5878, "step": 2524 }, { "epoch": 0.6904566584632212, "grad_norm": 0.17213301360607147, "learning_rate": 5e-05, "loss": 1.6135, "step": 2525 }, { "epoch": 0.6907301066447908, "grad_norm": 0.17695990204811096, "learning_rate": 5e-05, "loss": 1.6008, "step": 2526 }, { "epoch": 0.6910035548263604, "grad_norm": 0.1731920838356018, "learning_rate": 5e-05, "loss": 1.7317, "step": 2527 }, { "epoch": 0.69127700300793, "grad_norm": 0.16777953505516052, "learning_rate": 5e-05, "loss": 1.608, "step": 2528 }, { "epoch": 0.6915504511894995, "grad_norm": 0.16122910380363464, "learning_rate": 5e-05, "loss": 1.5529, "step": 2529 }, { "epoch": 0.6918238993710691, "grad_norm": 0.16527071595191956, "learning_rate": 5e-05, "loss": 1.621, "step": 2530 }, { "epoch": 0.6920973475526387, "grad_norm": 0.1596560925245285, "learning_rate": 5e-05, "loss": 1.6079, "step": 2531 }, { "epoch": 0.6923707957342083, "grad_norm": 0.15807481110095978, "learning_rate": 5e-05, "loss": 1.5349, "step": 2532 }, { "epoch": 0.692644243915778, "grad_norm": 0.16306331753730774, "learning_rate": 5e-05, "loss": 1.6461, "step": 2533 }, { "epoch": 0.6929176920973475, "grad_norm": 0.16564686596393585, "learning_rate": 5e-05, "loss": 1.5821, "step": 2534 }, { "epoch": 0.6931911402789172, "grad_norm": 0.1704019457101822, "learning_rate": 5e-05, "loss": 1.5851, "step": 2535 }, { "epoch": 0.6934645884604868, "grad_norm": 0.15806765854358673, "learning_rate": 5e-05, "loss": 1.6363, "step": 2536 }, { "epoch": 0.6937380366420564, "grad_norm": 0.15724487602710724, "learning_rate": 5e-05, "loss": 1.608, "step": 2537 }, { "epoch": 0.694011484823626, "grad_norm": 0.16402623057365417, "learning_rate": 5e-05, "loss": 1.5813, "step": 2538 }, { "epoch": 0.6942849330051956, "grad_norm": 0.15913555026054382, "learning_rate": 5e-05, "loss": 1.6284, "step": 2539 }, { "epoch": 0.6945583811867652, "grad_norm": 0.16074508428573608, "learning_rate": 5e-05, "loss": 1.528, "step": 2540 }, { "epoch": 0.6948318293683347, "grad_norm": 0.15752652287483215, "learning_rate": 5e-05, "loss": 1.5023, "step": 2541 }, { "epoch": 0.6951052775499043, "grad_norm": 0.15707096457481384, "learning_rate": 5e-05, "loss": 1.6251, "step": 2542 }, { "epoch": 0.6953787257314739, "grad_norm": 0.15630987286567688, "learning_rate": 5e-05, "loss": 1.5003, "step": 2543 }, { "epoch": 0.6956521739130435, "grad_norm": 0.15872447192668915, "learning_rate": 5e-05, "loss": 1.5698, "step": 2544 }, { "epoch": 0.6959256220946131, "grad_norm": 0.16033059358596802, "learning_rate": 5e-05, "loss": 1.5535, "step": 2545 }, { "epoch": 0.6961990702761827, "grad_norm": 0.1571992188692093, "learning_rate": 5e-05, "loss": 1.573, "step": 2546 }, { "epoch": 0.6964725184577523, "grad_norm": 0.15957675874233246, "learning_rate": 5e-05, "loss": 1.6449, "step": 2547 }, { "epoch": 0.6967459666393219, "grad_norm": 0.17184406518936157, "learning_rate": 5e-05, "loss": 1.7537, "step": 2548 }, { "epoch": 0.6970194148208915, "grad_norm": 0.1593320220708847, "learning_rate": 5e-05, "loss": 1.6327, "step": 2549 }, { "epoch": 0.6972928630024611, "grad_norm": 0.16837316751480103, "learning_rate": 5e-05, "loss": 1.6125, "step": 2550 }, { "epoch": 0.6975663111840307, "grad_norm": 0.1569320261478424, "learning_rate": 5e-05, "loss": 1.6095, "step": 2551 }, { "epoch": 0.6978397593656002, "grad_norm": 0.1526784896850586, "learning_rate": 5e-05, "loss": 1.5959, "step": 2552 }, { "epoch": 0.6981132075471698, "grad_norm": 0.16312266886234283, "learning_rate": 5e-05, "loss": 1.6402, "step": 2553 }, { "epoch": 0.6983866557287394, "grad_norm": 0.16702058911323547, "learning_rate": 5e-05, "loss": 1.5529, "step": 2554 }, { "epoch": 0.698660103910309, "grad_norm": 0.16597692668437958, "learning_rate": 5e-05, "loss": 1.6619, "step": 2555 }, { "epoch": 0.6989335520918786, "grad_norm": 0.16194956004619598, "learning_rate": 5e-05, "loss": 1.7461, "step": 2556 }, { "epoch": 0.6992070002734482, "grad_norm": 0.15624722838401794, "learning_rate": 5e-05, "loss": 1.6271, "step": 2557 }, { "epoch": 0.6994804484550178, "grad_norm": 0.15689989924430847, "learning_rate": 5e-05, "loss": 1.5491, "step": 2558 }, { "epoch": 0.6997538966365874, "grad_norm": 0.1616222858428955, "learning_rate": 5e-05, "loss": 1.5906, "step": 2559 }, { "epoch": 0.700027344818157, "grad_norm": 0.15930163860321045, "learning_rate": 5e-05, "loss": 1.5583, "step": 2560 }, { "epoch": 0.7003007929997266, "grad_norm": 0.16471946239471436, "learning_rate": 5e-05, "loss": 1.6327, "step": 2561 }, { "epoch": 0.7005742411812962, "grad_norm": 0.16374364495277405, "learning_rate": 5e-05, "loss": 1.6228, "step": 2562 }, { "epoch": 0.7008476893628658, "grad_norm": 0.1631327122449875, "learning_rate": 5e-05, "loss": 1.6547, "step": 2563 }, { "epoch": 0.7011211375444353, "grad_norm": 0.15994025766849518, "learning_rate": 5e-05, "loss": 1.5833, "step": 2564 }, { "epoch": 0.7013945857260049, "grad_norm": 0.16121256351470947, "learning_rate": 5e-05, "loss": 1.6305, "step": 2565 }, { "epoch": 0.7016680339075745, "grad_norm": 0.16354970633983612, "learning_rate": 5e-05, "loss": 1.5552, "step": 2566 }, { "epoch": 0.7019414820891441, "grad_norm": 0.161824032664299, "learning_rate": 5e-05, "loss": 1.6749, "step": 2567 }, { "epoch": 0.7022149302707137, "grad_norm": 0.17040131986141205, "learning_rate": 5e-05, "loss": 1.6291, "step": 2568 }, { "epoch": 0.7024883784522833, "grad_norm": 0.1799769103527069, "learning_rate": 5e-05, "loss": 1.657, "step": 2569 }, { "epoch": 0.7027618266338529, "grad_norm": 0.15718813240528107, "learning_rate": 5e-05, "loss": 1.5858, "step": 2570 }, { "epoch": 0.7030352748154225, "grad_norm": 0.16162623465061188, "learning_rate": 5e-05, "loss": 1.6128, "step": 2571 }, { "epoch": 0.7033087229969921, "grad_norm": 0.16257646679878235, "learning_rate": 5e-05, "loss": 1.624, "step": 2572 }, { "epoch": 0.7035821711785617, "grad_norm": 0.16675184667110443, "learning_rate": 5e-05, "loss": 1.6738, "step": 2573 }, { "epoch": 0.7038556193601313, "grad_norm": 0.17329177260398865, "learning_rate": 5e-05, "loss": 1.6952, "step": 2574 }, { "epoch": 0.7041290675417008, "grad_norm": 0.15674902498722076, "learning_rate": 5e-05, "loss": 1.5858, "step": 2575 }, { "epoch": 0.7044025157232704, "grad_norm": 0.15690606832504272, "learning_rate": 5e-05, "loss": 1.5772, "step": 2576 }, { "epoch": 0.70467596390484, "grad_norm": 0.16210415959358215, "learning_rate": 5e-05, "loss": 1.6705, "step": 2577 }, { "epoch": 0.7049494120864096, "grad_norm": 0.16525010764598846, "learning_rate": 5e-05, "loss": 1.6188, "step": 2578 }, { "epoch": 0.7052228602679792, "grad_norm": 0.1738116294145584, "learning_rate": 5e-05, "loss": 1.5788, "step": 2579 }, { "epoch": 0.7054963084495488, "grad_norm": 0.1632319688796997, "learning_rate": 5e-05, "loss": 1.6327, "step": 2580 }, { "epoch": 0.7057697566311184, "grad_norm": 0.16341067850589752, "learning_rate": 5e-05, "loss": 1.6567, "step": 2581 }, { "epoch": 0.706043204812688, "grad_norm": 0.17945139110088348, "learning_rate": 5e-05, "loss": 1.6919, "step": 2582 }, { "epoch": 0.7063166529942576, "grad_norm": 0.15467429161071777, "learning_rate": 5e-05, "loss": 1.618, "step": 2583 }, { "epoch": 0.7065901011758272, "grad_norm": 0.16300177574157715, "learning_rate": 5e-05, "loss": 1.6526, "step": 2584 }, { "epoch": 0.7068635493573968, "grad_norm": 0.15929557383060455, "learning_rate": 5e-05, "loss": 1.5512, "step": 2585 }, { "epoch": 0.7071369975389664, "grad_norm": 0.16156993806362152, "learning_rate": 5e-05, "loss": 1.6352, "step": 2586 }, { "epoch": 0.7074104457205359, "grad_norm": 0.15560369193553925, "learning_rate": 5e-05, "loss": 1.5727, "step": 2587 }, { "epoch": 0.7076838939021055, "grad_norm": 0.16337203979492188, "learning_rate": 5e-05, "loss": 1.561, "step": 2588 }, { "epoch": 0.7079573420836751, "grad_norm": 0.1646268218755722, "learning_rate": 5e-05, "loss": 1.5965, "step": 2589 }, { "epoch": 0.7082307902652447, "grad_norm": 0.16066445410251617, "learning_rate": 5e-05, "loss": 1.6132, "step": 2590 }, { "epoch": 0.7085042384468143, "grad_norm": 0.14791713654994965, "learning_rate": 5e-05, "loss": 1.5, "step": 2591 }, { "epoch": 0.7087776866283839, "grad_norm": 0.15416625142097473, "learning_rate": 5e-05, "loss": 1.5718, "step": 2592 }, { "epoch": 0.7090511348099535, "grad_norm": 0.1604636311531067, "learning_rate": 5e-05, "loss": 1.6064, "step": 2593 }, { "epoch": 0.7093245829915231, "grad_norm": 0.15781953930854797, "learning_rate": 5e-05, "loss": 1.5769, "step": 2594 }, { "epoch": 0.7095980311730927, "grad_norm": 0.15604273974895477, "learning_rate": 5e-05, "loss": 1.588, "step": 2595 }, { "epoch": 0.7098714793546623, "grad_norm": 0.15518909692764282, "learning_rate": 5e-05, "loss": 1.59, "step": 2596 }, { "epoch": 0.7101449275362319, "grad_norm": 0.16104191541671753, "learning_rate": 5e-05, "loss": 1.6747, "step": 2597 }, { "epoch": 0.7104183757178015, "grad_norm": 0.16382278501987457, "learning_rate": 5e-05, "loss": 1.6737, "step": 2598 }, { "epoch": 0.710691823899371, "grad_norm": 0.16717152297496796, "learning_rate": 5e-05, "loss": 1.6239, "step": 2599 }, { "epoch": 0.7109652720809406, "grad_norm": 0.16594311594963074, "learning_rate": 5e-05, "loss": 1.7318, "step": 2600 }, { "epoch": 0.7112387202625102, "grad_norm": 0.16552476584911346, "learning_rate": 5e-05, "loss": 1.7376, "step": 2601 }, { "epoch": 0.7115121684440798, "grad_norm": 0.17463426291942596, "learning_rate": 5e-05, "loss": 1.6286, "step": 2602 }, { "epoch": 0.7117856166256494, "grad_norm": 0.159657821059227, "learning_rate": 5e-05, "loss": 1.5664, "step": 2603 }, { "epoch": 0.712059064807219, "grad_norm": 0.16044557094573975, "learning_rate": 5e-05, "loss": 1.5265, "step": 2604 }, { "epoch": 0.7123325129887886, "grad_norm": 0.15536926686763763, "learning_rate": 5e-05, "loss": 1.5381, "step": 2605 }, { "epoch": 0.7126059611703582, "grad_norm": 0.1617160588502884, "learning_rate": 5e-05, "loss": 1.6025, "step": 2606 }, { "epoch": 0.7128794093519278, "grad_norm": 0.16367921233177185, "learning_rate": 5e-05, "loss": 1.7194, "step": 2607 }, { "epoch": 0.7131528575334974, "grad_norm": 0.16112937033176422, "learning_rate": 5e-05, "loss": 1.4914, "step": 2608 }, { "epoch": 0.713426305715067, "grad_norm": 0.16444730758666992, "learning_rate": 5e-05, "loss": 1.625, "step": 2609 }, { "epoch": 0.7136997538966365, "grad_norm": 0.16794142127037048, "learning_rate": 5e-05, "loss": 1.6348, "step": 2610 }, { "epoch": 0.7139732020782061, "grad_norm": 0.1628817617893219, "learning_rate": 5e-05, "loss": 1.6083, "step": 2611 }, { "epoch": 0.7142466502597757, "grad_norm": 0.18700134754180908, "learning_rate": 5e-05, "loss": 1.6996, "step": 2612 }, { "epoch": 0.7145200984413453, "grad_norm": 0.15966500341892242, "learning_rate": 5e-05, "loss": 1.6113, "step": 2613 }, { "epoch": 0.7147935466229149, "grad_norm": 0.18392211198806763, "learning_rate": 5e-05, "loss": 1.6337, "step": 2614 }, { "epoch": 0.7150669948044845, "grad_norm": 0.17130230367183685, "learning_rate": 5e-05, "loss": 1.6511, "step": 2615 }, { "epoch": 0.7153404429860541, "grad_norm": 0.16397760808467865, "learning_rate": 5e-05, "loss": 1.6283, "step": 2616 }, { "epoch": 0.7156138911676238, "grad_norm": 0.16686753928661346, "learning_rate": 5e-05, "loss": 1.6066, "step": 2617 }, { "epoch": 0.7158873393491934, "grad_norm": 0.1645701676607132, "learning_rate": 5e-05, "loss": 1.6268, "step": 2618 }, { "epoch": 0.716160787530763, "grad_norm": 0.1619407832622528, "learning_rate": 5e-05, "loss": 1.6309, "step": 2619 }, { "epoch": 0.7164342357123326, "grad_norm": 0.17755883932113647, "learning_rate": 5e-05, "loss": 1.6246, "step": 2620 }, { "epoch": 0.7167076838939022, "grad_norm": 0.17687168717384338, "learning_rate": 5e-05, "loss": 1.6039, "step": 2621 }, { "epoch": 0.7169811320754716, "grad_norm": 0.16282087564468384, "learning_rate": 5e-05, "loss": 1.5709, "step": 2622 }, { "epoch": 0.7172545802570413, "grad_norm": 0.1705690324306488, "learning_rate": 5e-05, "loss": 1.4957, "step": 2623 }, { "epoch": 0.7175280284386109, "grad_norm": 0.17099738121032715, "learning_rate": 5e-05, "loss": 1.5331, "step": 2624 }, { "epoch": 0.7178014766201805, "grad_norm": 0.1703016757965088, "learning_rate": 5e-05, "loss": 1.6962, "step": 2625 }, { "epoch": 0.7180749248017501, "grad_norm": 0.1631249189376831, "learning_rate": 5e-05, "loss": 1.5764, "step": 2626 }, { "epoch": 0.7183483729833197, "grad_norm": 0.1670769453048706, "learning_rate": 5e-05, "loss": 1.5328, "step": 2627 }, { "epoch": 0.7186218211648893, "grad_norm": 0.15617656707763672, "learning_rate": 5e-05, "loss": 1.5934, "step": 2628 }, { "epoch": 0.7188952693464589, "grad_norm": 0.15787526965141296, "learning_rate": 5e-05, "loss": 1.5847, "step": 2629 }, { "epoch": 0.7191687175280285, "grad_norm": 0.160158172249794, "learning_rate": 5e-05, "loss": 1.5974, "step": 2630 }, { "epoch": 0.7194421657095981, "grad_norm": 0.16433510184288025, "learning_rate": 5e-05, "loss": 1.573, "step": 2631 }, { "epoch": 0.7197156138911677, "grad_norm": 0.15415172278881073, "learning_rate": 5e-05, "loss": 1.5221, "step": 2632 }, { "epoch": 0.7199890620727372, "grad_norm": 0.16252250969409943, "learning_rate": 5e-05, "loss": 1.6093, "step": 2633 }, { "epoch": 0.7202625102543068, "grad_norm": 0.1922326236963272, "learning_rate": 5e-05, "loss": 1.5648, "step": 2634 }, { "epoch": 0.7205359584358764, "grad_norm": 0.16628074645996094, "learning_rate": 5e-05, "loss": 1.5623, "step": 2635 }, { "epoch": 0.720809406617446, "grad_norm": 0.19099196791648865, "learning_rate": 5e-05, "loss": 1.6423, "step": 2636 }, { "epoch": 0.7210828547990156, "grad_norm": 0.20283576846122742, "learning_rate": 5e-05, "loss": 1.6107, "step": 2637 }, { "epoch": 0.7213563029805852, "grad_norm": 0.15855461359024048, "learning_rate": 5e-05, "loss": 1.6438, "step": 2638 }, { "epoch": 0.7216297511621548, "grad_norm": 0.21607458591461182, "learning_rate": 5e-05, "loss": 1.6484, "step": 2639 }, { "epoch": 0.7219031993437244, "grad_norm": 0.1732168048620224, "learning_rate": 5e-05, "loss": 1.5945, "step": 2640 }, { "epoch": 0.722176647525294, "grad_norm": 0.18066106736660004, "learning_rate": 5e-05, "loss": 1.6576, "step": 2641 }, { "epoch": 0.7224500957068636, "grad_norm": 0.1668356955051422, "learning_rate": 5e-05, "loss": 1.6666, "step": 2642 }, { "epoch": 0.7227235438884332, "grad_norm": 0.17484630644321442, "learning_rate": 5e-05, "loss": 1.5694, "step": 2643 }, { "epoch": 0.7229969920700028, "grad_norm": 0.16421037912368774, "learning_rate": 5e-05, "loss": 1.5838, "step": 2644 }, { "epoch": 0.7232704402515723, "grad_norm": 0.16860564053058624, "learning_rate": 5e-05, "loss": 1.6144, "step": 2645 }, { "epoch": 0.7235438884331419, "grad_norm": 0.15611010789871216, "learning_rate": 5e-05, "loss": 1.5933, "step": 2646 }, { "epoch": 0.7238173366147115, "grad_norm": 0.1699523776769638, "learning_rate": 5e-05, "loss": 1.6102, "step": 2647 }, { "epoch": 0.7240907847962811, "grad_norm": 0.15928548574447632, "learning_rate": 5e-05, "loss": 1.5433, "step": 2648 }, { "epoch": 0.7243642329778507, "grad_norm": 0.15467146039009094, "learning_rate": 5e-05, "loss": 1.5628, "step": 2649 }, { "epoch": 0.7246376811594203, "grad_norm": 0.1566615253686905, "learning_rate": 5e-05, "loss": 1.6284, "step": 2650 }, { "epoch": 0.7249111293409899, "grad_norm": 0.1829017996788025, "learning_rate": 5e-05, "loss": 1.5719, "step": 2651 }, { "epoch": 0.7251845775225595, "grad_norm": 0.20479872822761536, "learning_rate": 5e-05, "loss": 1.483, "step": 2652 }, { "epoch": 0.7254580257041291, "grad_norm": 0.16582070291042328, "learning_rate": 5e-05, "loss": 1.6091, "step": 2653 }, { "epoch": 0.7257314738856987, "grad_norm": 0.16045349836349487, "learning_rate": 5e-05, "loss": 1.603, "step": 2654 }, { "epoch": 0.7260049220672683, "grad_norm": 0.21731440722942352, "learning_rate": 5e-05, "loss": 1.5638, "step": 2655 }, { "epoch": 0.7262783702488378, "grad_norm": 0.16208265721797943, "learning_rate": 5e-05, "loss": 1.5898, "step": 2656 }, { "epoch": 0.7265518184304074, "grad_norm": 0.1588432937860489, "learning_rate": 5e-05, "loss": 1.5425, "step": 2657 }, { "epoch": 0.726825266611977, "grad_norm": 0.1615055352449417, "learning_rate": 5e-05, "loss": 1.6128, "step": 2658 }, { "epoch": 0.7270987147935466, "grad_norm": 0.14946486055850983, "learning_rate": 5e-05, "loss": 1.5624, "step": 2659 }, { "epoch": 0.7273721629751162, "grad_norm": 0.15998360514640808, "learning_rate": 5e-05, "loss": 1.7129, "step": 2660 }, { "epoch": 0.7276456111566858, "grad_norm": 0.15510539710521698, "learning_rate": 5e-05, "loss": 1.6051, "step": 2661 }, { "epoch": 0.7279190593382554, "grad_norm": 0.15238802134990692, "learning_rate": 5e-05, "loss": 1.5503, "step": 2662 }, { "epoch": 0.728192507519825, "grad_norm": 0.16379080712795258, "learning_rate": 5e-05, "loss": 1.5657, "step": 2663 }, { "epoch": 0.7284659557013946, "grad_norm": 0.1604292243719101, "learning_rate": 5e-05, "loss": 1.5491, "step": 2664 }, { "epoch": 0.7287394038829642, "grad_norm": 0.16709019243717194, "learning_rate": 5e-05, "loss": 1.5678, "step": 2665 }, { "epoch": 0.7290128520645338, "grad_norm": 0.15374033153057098, "learning_rate": 5e-05, "loss": 1.5231, "step": 2666 }, { "epoch": 0.7292863002461034, "grad_norm": 0.165635883808136, "learning_rate": 5e-05, "loss": 1.6576, "step": 2667 }, { "epoch": 0.7295597484276729, "grad_norm": 0.15312857925891876, "learning_rate": 5e-05, "loss": 1.5964, "step": 2668 }, { "epoch": 0.7298331966092425, "grad_norm": 0.16065338253974915, "learning_rate": 5e-05, "loss": 1.6054, "step": 2669 }, { "epoch": 0.7301066447908121, "grad_norm": 0.16220992803573608, "learning_rate": 5e-05, "loss": 1.6551, "step": 2670 }, { "epoch": 0.7303800929723817, "grad_norm": 0.16524186730384827, "learning_rate": 5e-05, "loss": 1.5719, "step": 2671 }, { "epoch": 0.7306535411539513, "grad_norm": 0.1688978374004364, "learning_rate": 5e-05, "loss": 1.5573, "step": 2672 }, { "epoch": 0.7309269893355209, "grad_norm": 0.16838806867599487, "learning_rate": 5e-05, "loss": 1.6978, "step": 2673 }, { "epoch": 0.7312004375170905, "grad_norm": 0.15754219889640808, "learning_rate": 5e-05, "loss": 1.5535, "step": 2674 }, { "epoch": 0.7314738856986601, "grad_norm": 0.17631256580352783, "learning_rate": 5e-05, "loss": 1.6562, "step": 2675 }, { "epoch": 0.7317473338802297, "grad_norm": 0.17020723223686218, "learning_rate": 5e-05, "loss": 1.6649, "step": 2676 }, { "epoch": 0.7320207820617993, "grad_norm": 0.17932482063770294, "learning_rate": 5e-05, "loss": 1.6102, "step": 2677 }, { "epoch": 0.7322942302433689, "grad_norm": 0.1644601821899414, "learning_rate": 5e-05, "loss": 1.5042, "step": 2678 }, { "epoch": 0.7325676784249385, "grad_norm": 0.18472503125667572, "learning_rate": 5e-05, "loss": 1.5805, "step": 2679 }, { "epoch": 0.732841126606508, "grad_norm": 0.16369123756885529, "learning_rate": 5e-05, "loss": 1.6079, "step": 2680 }, { "epoch": 0.7331145747880776, "grad_norm": 0.16636909544467926, "learning_rate": 5e-05, "loss": 1.5607, "step": 2681 }, { "epoch": 0.7333880229696472, "grad_norm": 0.16647064685821533, "learning_rate": 5e-05, "loss": 1.5819, "step": 2682 }, { "epoch": 0.7336614711512168, "grad_norm": 0.17708119750022888, "learning_rate": 5e-05, "loss": 1.6366, "step": 2683 }, { "epoch": 0.7339349193327864, "grad_norm": 0.15985938906669617, "learning_rate": 5e-05, "loss": 1.6187, "step": 2684 }, { "epoch": 0.734208367514356, "grad_norm": 0.1802692860364914, "learning_rate": 5e-05, "loss": 1.5459, "step": 2685 }, { "epoch": 0.7344818156959256, "grad_norm": 0.156411275267601, "learning_rate": 5e-05, "loss": 1.5549, "step": 2686 }, { "epoch": 0.7347552638774952, "grad_norm": 0.1642201840877533, "learning_rate": 5e-05, "loss": 1.6463, "step": 2687 }, { "epoch": 0.7350287120590648, "grad_norm": 0.16783371567726135, "learning_rate": 5e-05, "loss": 1.5811, "step": 2688 }, { "epoch": 0.7353021602406344, "grad_norm": 0.15901906788349152, "learning_rate": 5e-05, "loss": 1.5787, "step": 2689 }, { "epoch": 0.735575608422204, "grad_norm": 0.1638929694890976, "learning_rate": 5e-05, "loss": 1.6526, "step": 2690 }, { "epoch": 0.7358490566037735, "grad_norm": 0.15809820592403412, "learning_rate": 5e-05, "loss": 1.6082, "step": 2691 }, { "epoch": 0.7361225047853431, "grad_norm": 0.1644376665353775, "learning_rate": 5e-05, "loss": 1.5305, "step": 2692 }, { "epoch": 0.7363959529669127, "grad_norm": 0.16332222521305084, "learning_rate": 5e-05, "loss": 1.6534, "step": 2693 }, { "epoch": 0.7366694011484823, "grad_norm": 0.18329092860221863, "learning_rate": 5e-05, "loss": 1.603, "step": 2694 }, { "epoch": 0.7369428493300519, "grad_norm": 0.1716667115688324, "learning_rate": 5e-05, "loss": 1.7023, "step": 2695 }, { "epoch": 0.7372162975116215, "grad_norm": 0.16460417211055756, "learning_rate": 5e-05, "loss": 1.6165, "step": 2696 }, { "epoch": 0.7374897456931911, "grad_norm": 0.16401362419128418, "learning_rate": 5e-05, "loss": 1.5556, "step": 2697 }, { "epoch": 0.7377631938747607, "grad_norm": 0.15876410901546478, "learning_rate": 5e-05, "loss": 1.5701, "step": 2698 }, { "epoch": 0.7380366420563303, "grad_norm": 0.18006089329719543, "learning_rate": 5e-05, "loss": 1.6061, "step": 2699 }, { "epoch": 0.7383100902379, "grad_norm": 0.16289302706718445, "learning_rate": 5e-05, "loss": 1.6073, "step": 2700 }, { "epoch": 0.7385835384194696, "grad_norm": 0.15855976939201355, "learning_rate": 5e-05, "loss": 1.5566, "step": 2701 }, { "epoch": 0.7388569866010392, "grad_norm": 0.16138465702533722, "learning_rate": 5e-05, "loss": 1.6222, "step": 2702 }, { "epoch": 0.7391304347826086, "grad_norm": 0.1679658591747284, "learning_rate": 5e-05, "loss": 1.6472, "step": 2703 }, { "epoch": 0.7394038829641782, "grad_norm": 0.16092632710933685, "learning_rate": 5e-05, "loss": 1.6278, "step": 2704 }, { "epoch": 0.7396773311457479, "grad_norm": 0.17156141996383667, "learning_rate": 5e-05, "loss": 1.6397, "step": 2705 }, { "epoch": 0.7399507793273175, "grad_norm": 0.17136305570602417, "learning_rate": 5e-05, "loss": 1.6552, "step": 2706 }, { "epoch": 0.7402242275088871, "grad_norm": 0.16168268024921417, "learning_rate": 5e-05, "loss": 1.5059, "step": 2707 }, { "epoch": 0.7404976756904567, "grad_norm": 0.16185155510902405, "learning_rate": 5e-05, "loss": 1.5465, "step": 2708 }, { "epoch": 0.7407711238720263, "grad_norm": 0.15480349957942963, "learning_rate": 5e-05, "loss": 1.5585, "step": 2709 }, { "epoch": 0.7410445720535959, "grad_norm": 0.16501635313034058, "learning_rate": 5e-05, "loss": 1.6614, "step": 2710 }, { "epoch": 0.7413180202351655, "grad_norm": 0.15960432589054108, "learning_rate": 5e-05, "loss": 1.5774, "step": 2711 }, { "epoch": 0.7415914684167351, "grad_norm": 0.16600382328033447, "learning_rate": 5e-05, "loss": 1.6463, "step": 2712 }, { "epoch": 0.7418649165983047, "grad_norm": 0.16479994356632233, "learning_rate": 5e-05, "loss": 1.6348, "step": 2713 }, { "epoch": 0.7421383647798742, "grad_norm": 0.15985356271266937, "learning_rate": 5e-05, "loss": 1.5811, "step": 2714 }, { "epoch": 0.7424118129614438, "grad_norm": 0.16183726489543915, "learning_rate": 5e-05, "loss": 1.6204, "step": 2715 }, { "epoch": 0.7426852611430134, "grad_norm": 0.1608124077320099, "learning_rate": 5e-05, "loss": 1.5919, "step": 2716 }, { "epoch": 0.742958709324583, "grad_norm": 0.16460396349430084, "learning_rate": 5e-05, "loss": 1.5784, "step": 2717 }, { "epoch": 0.7432321575061526, "grad_norm": 0.1570330709218979, "learning_rate": 5e-05, "loss": 1.577, "step": 2718 }, { "epoch": 0.7435056056877222, "grad_norm": 0.17111632227897644, "learning_rate": 5e-05, "loss": 1.5384, "step": 2719 }, { "epoch": 0.7437790538692918, "grad_norm": 0.1649581640958786, "learning_rate": 5e-05, "loss": 1.5283, "step": 2720 }, { "epoch": 0.7440525020508614, "grad_norm": 0.16623181104660034, "learning_rate": 5e-05, "loss": 1.6578, "step": 2721 }, { "epoch": 0.744325950232431, "grad_norm": 0.16478942334651947, "learning_rate": 5e-05, "loss": 1.6588, "step": 2722 }, { "epoch": 0.7445993984140006, "grad_norm": 0.16344524919986725, "learning_rate": 5e-05, "loss": 1.6124, "step": 2723 }, { "epoch": 0.7448728465955702, "grad_norm": 0.1609012335538864, "learning_rate": 5e-05, "loss": 1.6005, "step": 2724 }, { "epoch": 0.7451462947771398, "grad_norm": 0.17075218260288239, "learning_rate": 5e-05, "loss": 1.7217, "step": 2725 }, { "epoch": 0.7454197429587093, "grad_norm": 0.16447240114212036, "learning_rate": 5e-05, "loss": 1.6142, "step": 2726 }, { "epoch": 0.7456931911402789, "grad_norm": 0.15842659771442413, "learning_rate": 5e-05, "loss": 1.566, "step": 2727 }, { "epoch": 0.7459666393218485, "grad_norm": 0.1670447736978531, "learning_rate": 5e-05, "loss": 1.6299, "step": 2728 }, { "epoch": 0.7462400875034181, "grad_norm": 0.1572618931531906, "learning_rate": 5e-05, "loss": 1.5555, "step": 2729 }, { "epoch": 0.7465135356849877, "grad_norm": 0.16439321637153625, "learning_rate": 5e-05, "loss": 1.6149, "step": 2730 }, { "epoch": 0.7467869838665573, "grad_norm": 0.15821623802185059, "learning_rate": 5e-05, "loss": 1.6452, "step": 2731 }, { "epoch": 0.7470604320481269, "grad_norm": 0.16016224026679993, "learning_rate": 5e-05, "loss": 1.5371, "step": 2732 }, { "epoch": 0.7473338802296965, "grad_norm": 0.1666966676712036, "learning_rate": 5e-05, "loss": 1.5912, "step": 2733 }, { "epoch": 0.7476073284112661, "grad_norm": 0.1630515605211258, "learning_rate": 5e-05, "loss": 1.5903, "step": 2734 }, { "epoch": 0.7478807765928357, "grad_norm": 0.1738271415233612, "learning_rate": 5e-05, "loss": 1.6498, "step": 2735 }, { "epoch": 0.7481542247744053, "grad_norm": 0.16823971271514893, "learning_rate": 5e-05, "loss": 1.6878, "step": 2736 }, { "epoch": 0.7484276729559748, "grad_norm": 0.16174787282943726, "learning_rate": 5e-05, "loss": 1.5538, "step": 2737 }, { "epoch": 0.7487011211375444, "grad_norm": 0.15992052853107452, "learning_rate": 5e-05, "loss": 1.5307, "step": 2738 }, { "epoch": 0.748974569319114, "grad_norm": 0.17191752791404724, "learning_rate": 5e-05, "loss": 1.6057, "step": 2739 }, { "epoch": 0.7492480175006836, "grad_norm": 0.16447550058364868, "learning_rate": 5e-05, "loss": 1.6311, "step": 2740 }, { "epoch": 0.7495214656822532, "grad_norm": 0.16319876909255981, "learning_rate": 5e-05, "loss": 1.5762, "step": 2741 }, { "epoch": 0.7497949138638228, "grad_norm": 0.16644878685474396, "learning_rate": 5e-05, "loss": 1.5566, "step": 2742 }, { "epoch": 0.7500683620453924, "grad_norm": 0.16339637339115143, "learning_rate": 5e-05, "loss": 1.6049, "step": 2743 }, { "epoch": 0.750341810226962, "grad_norm": 0.1628854125738144, "learning_rate": 5e-05, "loss": 1.5755, "step": 2744 }, { "epoch": 0.7506152584085316, "grad_norm": 0.15712201595306396, "learning_rate": 5e-05, "loss": 1.5888, "step": 2745 }, { "epoch": 0.7508887065901012, "grad_norm": 0.15863806009292603, "learning_rate": 5e-05, "loss": 1.5662, "step": 2746 }, { "epoch": 0.7511621547716708, "grad_norm": 0.16975349187850952, "learning_rate": 5e-05, "loss": 1.6032, "step": 2747 }, { "epoch": 0.7514356029532404, "grad_norm": 0.15435737371444702, "learning_rate": 5e-05, "loss": 1.5288, "step": 2748 }, { "epoch": 0.7517090511348099, "grad_norm": 0.16747300326824188, "learning_rate": 5e-05, "loss": 1.59, "step": 2749 }, { "epoch": 0.7519824993163795, "grad_norm": 0.17562800645828247, "learning_rate": 5e-05, "loss": 1.6135, "step": 2750 }, { "epoch": 0.7522559474979491, "grad_norm": 0.1631203293800354, "learning_rate": 5e-05, "loss": 1.6197, "step": 2751 }, { "epoch": 0.7525293956795187, "grad_norm": 0.17813941836357117, "learning_rate": 5e-05, "loss": 1.607, "step": 2752 }, { "epoch": 0.7528028438610883, "grad_norm": 0.15916167199611664, "learning_rate": 5e-05, "loss": 1.6042, "step": 2753 }, { "epoch": 0.7530762920426579, "grad_norm": 0.18315176665782928, "learning_rate": 5e-05, "loss": 1.6809, "step": 2754 }, { "epoch": 0.7533497402242275, "grad_norm": 0.16371318697929382, "learning_rate": 5e-05, "loss": 1.58, "step": 2755 }, { "epoch": 0.7536231884057971, "grad_norm": 0.1591997742652893, "learning_rate": 5e-05, "loss": 1.5595, "step": 2756 }, { "epoch": 0.7538966365873667, "grad_norm": 0.1630491465330124, "learning_rate": 5e-05, "loss": 1.6129, "step": 2757 }, { "epoch": 0.7541700847689363, "grad_norm": 0.1691833734512329, "learning_rate": 5e-05, "loss": 1.6539, "step": 2758 }, { "epoch": 0.7544435329505059, "grad_norm": 0.16124382615089417, "learning_rate": 5e-05, "loss": 1.6068, "step": 2759 }, { "epoch": 0.7547169811320755, "grad_norm": 0.16120778024196625, "learning_rate": 5e-05, "loss": 1.6477, "step": 2760 }, { "epoch": 0.754990429313645, "grad_norm": 0.16382825374603271, "learning_rate": 5e-05, "loss": 1.6095, "step": 2761 }, { "epoch": 0.7552638774952146, "grad_norm": 0.16977065801620483, "learning_rate": 5e-05, "loss": 1.6844, "step": 2762 }, { "epoch": 0.7555373256767842, "grad_norm": 0.16113348305225372, "learning_rate": 5e-05, "loss": 1.6008, "step": 2763 }, { "epoch": 0.7558107738583538, "grad_norm": 0.17500002682209015, "learning_rate": 5e-05, "loss": 1.5729, "step": 2764 }, { "epoch": 0.7560842220399234, "grad_norm": 0.16505929827690125, "learning_rate": 5e-05, "loss": 1.5522, "step": 2765 }, { "epoch": 0.756357670221493, "grad_norm": 0.16675904393196106, "learning_rate": 5e-05, "loss": 1.6734, "step": 2766 }, { "epoch": 0.7566311184030626, "grad_norm": 0.20538048446178436, "learning_rate": 5e-05, "loss": 1.6777, "step": 2767 }, { "epoch": 0.7569045665846322, "grad_norm": 0.17019402980804443, "learning_rate": 5e-05, "loss": 1.6098, "step": 2768 }, { "epoch": 0.7571780147662018, "grad_norm": 0.18559719622135162, "learning_rate": 5e-05, "loss": 1.6489, "step": 2769 }, { "epoch": 0.7574514629477714, "grad_norm": 0.16804775595664978, "learning_rate": 5e-05, "loss": 1.5647, "step": 2770 }, { "epoch": 0.757724911129341, "grad_norm": 0.1561867594718933, "learning_rate": 5e-05, "loss": 1.5402, "step": 2771 }, { "epoch": 0.7579983593109105, "grad_norm": 0.1730283796787262, "learning_rate": 5e-05, "loss": 1.6805, "step": 2772 }, { "epoch": 0.7582718074924801, "grad_norm": 0.17423667013645172, "learning_rate": 5e-05, "loss": 1.6453, "step": 2773 }, { "epoch": 0.7585452556740497, "grad_norm": 0.1615607887506485, "learning_rate": 5e-05, "loss": 1.5584, "step": 2774 }, { "epoch": 0.7588187038556193, "grad_norm": 0.16401253640651703, "learning_rate": 5e-05, "loss": 1.5841, "step": 2775 }, { "epoch": 0.7590921520371889, "grad_norm": 0.1691298633813858, "learning_rate": 5e-05, "loss": 1.6887, "step": 2776 }, { "epoch": 0.7593656002187585, "grad_norm": 0.15560850501060486, "learning_rate": 5e-05, "loss": 1.4923, "step": 2777 }, { "epoch": 0.7596390484003281, "grad_norm": 0.1585303395986557, "learning_rate": 5e-05, "loss": 1.5857, "step": 2778 }, { "epoch": 0.7599124965818977, "grad_norm": 0.16869275271892548, "learning_rate": 5e-05, "loss": 1.5873, "step": 2779 }, { "epoch": 0.7601859447634673, "grad_norm": 0.17015528678894043, "learning_rate": 5e-05, "loss": 1.7419, "step": 2780 }, { "epoch": 0.760459392945037, "grad_norm": 0.1577947735786438, "learning_rate": 5e-05, "loss": 1.5961, "step": 2781 }, { "epoch": 0.7607328411266066, "grad_norm": 0.16766513884067535, "learning_rate": 5e-05, "loss": 1.6623, "step": 2782 }, { "epoch": 0.7610062893081762, "grad_norm": 0.15825945138931274, "learning_rate": 5e-05, "loss": 1.6, "step": 2783 }, { "epoch": 0.7612797374897456, "grad_norm": 0.16338619589805603, "learning_rate": 5e-05, "loss": 1.677, "step": 2784 }, { "epoch": 0.7615531856713152, "grad_norm": 0.16439124941825867, "learning_rate": 5e-05, "loss": 1.6444, "step": 2785 }, { "epoch": 0.7618266338528848, "grad_norm": 0.1587378978729248, "learning_rate": 5e-05, "loss": 1.6477, "step": 2786 }, { "epoch": 0.7621000820344545, "grad_norm": 0.16001760959625244, "learning_rate": 5e-05, "loss": 1.638, "step": 2787 }, { "epoch": 0.762373530216024, "grad_norm": 0.16488565504550934, "learning_rate": 5e-05, "loss": 1.5758, "step": 2788 }, { "epoch": 0.7626469783975937, "grad_norm": 0.17125697433948517, "learning_rate": 5e-05, "loss": 1.6488, "step": 2789 }, { "epoch": 0.7629204265791633, "grad_norm": 0.16552145779132843, "learning_rate": 5e-05, "loss": 1.6288, "step": 2790 }, { "epoch": 0.7631938747607329, "grad_norm": 0.16142013669013977, "learning_rate": 5e-05, "loss": 1.5717, "step": 2791 }, { "epoch": 0.7634673229423025, "grad_norm": 0.17424419522285461, "learning_rate": 5e-05, "loss": 1.6741, "step": 2792 }, { "epoch": 0.7637407711238721, "grad_norm": 0.1656377911567688, "learning_rate": 5e-05, "loss": 1.5975, "step": 2793 }, { "epoch": 0.7640142193054417, "grad_norm": 0.1610795110464096, "learning_rate": 5e-05, "loss": 1.6058, "step": 2794 }, { "epoch": 0.7642876674870112, "grad_norm": 0.160441592335701, "learning_rate": 5e-05, "loss": 1.5343, "step": 2795 }, { "epoch": 0.7645611156685808, "grad_norm": 0.16838759183883667, "learning_rate": 5e-05, "loss": 1.5573, "step": 2796 }, { "epoch": 0.7648345638501504, "grad_norm": 0.16315698623657227, "learning_rate": 5e-05, "loss": 1.5484, "step": 2797 }, { "epoch": 0.76510801203172, "grad_norm": 0.17179346084594727, "learning_rate": 5e-05, "loss": 1.7635, "step": 2798 }, { "epoch": 0.7653814602132896, "grad_norm": 0.15899762511253357, "learning_rate": 5e-05, "loss": 1.645, "step": 2799 }, { "epoch": 0.7656549083948592, "grad_norm": 0.1585722267627716, "learning_rate": 5e-05, "loss": 1.5657, "step": 2800 }, { "epoch": 0.7659283565764288, "grad_norm": 0.16446919739246368, "learning_rate": 5e-05, "loss": 1.6099, "step": 2801 }, { "epoch": 0.7662018047579984, "grad_norm": 0.16195422410964966, "learning_rate": 5e-05, "loss": 1.5312, "step": 2802 }, { "epoch": 0.766475252939568, "grad_norm": 0.1638331413269043, "learning_rate": 5e-05, "loss": 1.573, "step": 2803 }, { "epoch": 0.7667487011211376, "grad_norm": 0.16117534041404724, "learning_rate": 5e-05, "loss": 1.5535, "step": 2804 }, { "epoch": 0.7670221493027072, "grad_norm": 0.1906486600637436, "learning_rate": 5e-05, "loss": 1.6745, "step": 2805 }, { "epoch": 0.7672955974842768, "grad_norm": 0.1637091189622879, "learning_rate": 5e-05, "loss": 1.6076, "step": 2806 }, { "epoch": 0.7675690456658463, "grad_norm": 0.16975869238376617, "learning_rate": 5e-05, "loss": 1.5807, "step": 2807 }, { "epoch": 0.7678424938474159, "grad_norm": 0.16839958727359772, "learning_rate": 5e-05, "loss": 1.5136, "step": 2808 }, { "epoch": 0.7681159420289855, "grad_norm": 0.16886620223522186, "learning_rate": 5e-05, "loss": 1.6107, "step": 2809 }, { "epoch": 0.7683893902105551, "grad_norm": 0.16913080215454102, "learning_rate": 5e-05, "loss": 1.547, "step": 2810 }, { "epoch": 0.7686628383921247, "grad_norm": 0.17105981707572937, "learning_rate": 5e-05, "loss": 1.6501, "step": 2811 }, { "epoch": 0.7689362865736943, "grad_norm": 0.16617277264595032, "learning_rate": 5e-05, "loss": 1.6319, "step": 2812 }, { "epoch": 0.7692097347552639, "grad_norm": 0.16640552878379822, "learning_rate": 5e-05, "loss": 1.5152, "step": 2813 }, { "epoch": 0.7694831829368335, "grad_norm": 0.1717168390750885, "learning_rate": 5e-05, "loss": 1.5904, "step": 2814 }, { "epoch": 0.7697566311184031, "grad_norm": 0.15507300198078156, "learning_rate": 5e-05, "loss": 1.4552, "step": 2815 }, { "epoch": 0.7700300792999727, "grad_norm": 0.16422231495380402, "learning_rate": 5e-05, "loss": 1.6003, "step": 2816 }, { "epoch": 0.7703035274815423, "grad_norm": 0.16993476450443268, "learning_rate": 5e-05, "loss": 1.5668, "step": 2817 }, { "epoch": 0.7705769756631118, "grad_norm": 0.16527430713176727, "learning_rate": 5e-05, "loss": 1.5809, "step": 2818 }, { "epoch": 0.7708504238446814, "grad_norm": 0.16234123706817627, "learning_rate": 5e-05, "loss": 1.5393, "step": 2819 }, { "epoch": 0.771123872026251, "grad_norm": 0.17365649342536926, "learning_rate": 5e-05, "loss": 1.5758, "step": 2820 }, { "epoch": 0.7713973202078206, "grad_norm": 0.17129479348659515, "learning_rate": 5e-05, "loss": 1.6084, "step": 2821 }, { "epoch": 0.7716707683893902, "grad_norm": 0.16345179080963135, "learning_rate": 5e-05, "loss": 1.5886, "step": 2822 }, { "epoch": 0.7719442165709598, "grad_norm": 0.17358049750328064, "learning_rate": 5e-05, "loss": 1.6457, "step": 2823 }, { "epoch": 0.7722176647525294, "grad_norm": 0.17579670250415802, "learning_rate": 5e-05, "loss": 1.7123, "step": 2824 }, { "epoch": 0.772491112934099, "grad_norm": 0.1691892147064209, "learning_rate": 5e-05, "loss": 1.6226, "step": 2825 }, { "epoch": 0.7727645611156686, "grad_norm": 0.17657522857189178, "learning_rate": 5e-05, "loss": 1.6072, "step": 2826 }, { "epoch": 0.7730380092972382, "grad_norm": 0.17486031353473663, "learning_rate": 5e-05, "loss": 1.5497, "step": 2827 }, { "epoch": 0.7733114574788078, "grad_norm": 0.16051729023456573, "learning_rate": 5e-05, "loss": 1.4969, "step": 2828 }, { "epoch": 0.7735849056603774, "grad_norm": 0.16621479392051697, "learning_rate": 5e-05, "loss": 1.6151, "step": 2829 }, { "epoch": 0.7738583538419469, "grad_norm": 0.16369469463825226, "learning_rate": 5e-05, "loss": 1.6707, "step": 2830 }, { "epoch": 0.7741318020235165, "grad_norm": 0.1664346307516098, "learning_rate": 5e-05, "loss": 1.552, "step": 2831 }, { "epoch": 0.7744052502050861, "grad_norm": 0.1688821017742157, "learning_rate": 5e-05, "loss": 1.566, "step": 2832 }, { "epoch": 0.7746786983866557, "grad_norm": 0.15843422710895538, "learning_rate": 5e-05, "loss": 1.6504, "step": 2833 }, { "epoch": 0.7749521465682253, "grad_norm": 0.16451962292194366, "learning_rate": 5e-05, "loss": 1.5611, "step": 2834 }, { "epoch": 0.7752255947497949, "grad_norm": 0.17255376279354095, "learning_rate": 5e-05, "loss": 1.5388, "step": 2835 }, { "epoch": 0.7754990429313645, "grad_norm": 0.1702892780303955, "learning_rate": 5e-05, "loss": 1.6152, "step": 2836 }, { "epoch": 0.7757724911129341, "grad_norm": 0.16479864716529846, "learning_rate": 5e-05, "loss": 1.677, "step": 2837 }, { "epoch": 0.7760459392945037, "grad_norm": 0.16758742928504944, "learning_rate": 5e-05, "loss": 1.5406, "step": 2838 }, { "epoch": 0.7763193874760733, "grad_norm": 0.1617206186056137, "learning_rate": 5e-05, "loss": 1.635, "step": 2839 }, { "epoch": 0.7765928356576429, "grad_norm": 0.1629093438386917, "learning_rate": 5e-05, "loss": 1.5516, "step": 2840 }, { "epoch": 0.7768662838392125, "grad_norm": 0.16901403665542603, "learning_rate": 5e-05, "loss": 1.6223, "step": 2841 }, { "epoch": 0.777139732020782, "grad_norm": 0.16104839742183685, "learning_rate": 5e-05, "loss": 1.6432, "step": 2842 }, { "epoch": 0.7774131802023516, "grad_norm": 0.15974918007850647, "learning_rate": 5e-05, "loss": 1.5034, "step": 2843 }, { "epoch": 0.7776866283839212, "grad_norm": 0.1669033169746399, "learning_rate": 5e-05, "loss": 1.6008, "step": 2844 }, { "epoch": 0.7779600765654908, "grad_norm": 0.17953120172023773, "learning_rate": 5e-05, "loss": 1.6714, "step": 2845 }, { "epoch": 0.7782335247470604, "grad_norm": 0.16718675196170807, "learning_rate": 5e-05, "loss": 1.5205, "step": 2846 }, { "epoch": 0.77850697292863, "grad_norm": 0.16724152863025665, "learning_rate": 5e-05, "loss": 1.5884, "step": 2847 }, { "epoch": 0.7787804211101996, "grad_norm": 0.16082365810871124, "learning_rate": 5e-05, "loss": 1.6615, "step": 2848 }, { "epoch": 0.7790538692917692, "grad_norm": 0.15853464603424072, "learning_rate": 5e-05, "loss": 1.5306, "step": 2849 }, { "epoch": 0.7793273174733388, "grad_norm": 0.1667121946811676, "learning_rate": 5e-05, "loss": 1.5941, "step": 2850 }, { "epoch": 0.7796007656549084, "grad_norm": 0.1697937250137329, "learning_rate": 5e-05, "loss": 1.6127, "step": 2851 }, { "epoch": 0.779874213836478, "grad_norm": 0.158981055021286, "learning_rate": 5e-05, "loss": 1.5672, "step": 2852 }, { "epoch": 0.7801476620180475, "grad_norm": 0.1826319545507431, "learning_rate": 5e-05, "loss": 1.6604, "step": 2853 }, { "epoch": 0.7804211101996171, "grad_norm": 0.17479664087295532, "learning_rate": 5e-05, "loss": 1.6348, "step": 2854 }, { "epoch": 0.7806945583811867, "grad_norm": 0.17905382812023163, "learning_rate": 5e-05, "loss": 1.5434, "step": 2855 }, { "epoch": 0.7809680065627563, "grad_norm": 0.1738382875919342, "learning_rate": 5e-05, "loss": 1.6045, "step": 2856 }, { "epoch": 0.7812414547443259, "grad_norm": 0.16724707186222076, "learning_rate": 5e-05, "loss": 1.5591, "step": 2857 }, { "epoch": 0.7815149029258955, "grad_norm": 0.17346110939979553, "learning_rate": 5e-05, "loss": 1.5721, "step": 2858 }, { "epoch": 0.7817883511074651, "grad_norm": 0.16252191364765167, "learning_rate": 5e-05, "loss": 1.5935, "step": 2859 }, { "epoch": 0.7820617992890347, "grad_norm": 0.1672922968864441, "learning_rate": 5e-05, "loss": 1.6467, "step": 2860 }, { "epoch": 0.7823352474706043, "grad_norm": 0.1666378527879715, "learning_rate": 5e-05, "loss": 1.6392, "step": 2861 }, { "epoch": 0.782608695652174, "grad_norm": 0.1650533229112625, "learning_rate": 5e-05, "loss": 1.5737, "step": 2862 }, { "epoch": 0.7828821438337435, "grad_norm": 0.16479378938674927, "learning_rate": 5e-05, "loss": 1.6066, "step": 2863 }, { "epoch": 0.7831555920153132, "grad_norm": 0.16486263275146484, "learning_rate": 5e-05, "loss": 1.5617, "step": 2864 }, { "epoch": 0.7834290401968826, "grad_norm": 0.1614285111427307, "learning_rate": 5e-05, "loss": 1.6226, "step": 2865 }, { "epoch": 0.7837024883784522, "grad_norm": 0.176860049366951, "learning_rate": 5e-05, "loss": 1.5129, "step": 2866 }, { "epoch": 0.7839759365600218, "grad_norm": 0.16381007432937622, "learning_rate": 5e-05, "loss": 1.6524, "step": 2867 }, { "epoch": 0.7842493847415914, "grad_norm": 0.15981391072273254, "learning_rate": 5e-05, "loss": 1.5611, "step": 2868 }, { "epoch": 0.784522832923161, "grad_norm": 0.17717792093753815, "learning_rate": 5e-05, "loss": 1.691, "step": 2869 }, { "epoch": 0.7847962811047307, "grad_norm": 0.15418937802314758, "learning_rate": 5e-05, "loss": 1.5502, "step": 2870 }, { "epoch": 0.7850697292863003, "grad_norm": 0.17375600337982178, "learning_rate": 5e-05, "loss": 1.6708, "step": 2871 }, { "epoch": 0.7853431774678699, "grad_norm": 0.16681233048439026, "learning_rate": 5e-05, "loss": 1.6323, "step": 2872 }, { "epoch": 0.7856166256494395, "grad_norm": 0.15948568284511566, "learning_rate": 5e-05, "loss": 1.5582, "step": 2873 }, { "epoch": 0.7858900738310091, "grad_norm": 0.16835032403469086, "learning_rate": 5e-05, "loss": 1.5979, "step": 2874 }, { "epoch": 0.7861635220125787, "grad_norm": 0.16147814691066742, "learning_rate": 5e-05, "loss": 1.5618, "step": 2875 }, { "epoch": 0.7864369701941482, "grad_norm": 0.15999336540699005, "learning_rate": 5e-05, "loss": 1.5351, "step": 2876 }, { "epoch": 0.7867104183757178, "grad_norm": 0.17828235030174255, "learning_rate": 5e-05, "loss": 1.6319, "step": 2877 }, { "epoch": 0.7869838665572874, "grad_norm": 0.16279451549053192, "learning_rate": 5e-05, "loss": 1.5923, "step": 2878 }, { "epoch": 0.787257314738857, "grad_norm": 0.16363869607448578, "learning_rate": 5e-05, "loss": 1.6645, "step": 2879 }, { "epoch": 0.7875307629204266, "grad_norm": 0.17845980823040009, "learning_rate": 5e-05, "loss": 1.6098, "step": 2880 }, { "epoch": 0.7878042111019962, "grad_norm": 0.17278538644313812, "learning_rate": 5e-05, "loss": 1.6211, "step": 2881 }, { "epoch": 0.7880776592835658, "grad_norm": 0.15572743117809296, "learning_rate": 5e-05, "loss": 1.5377, "step": 2882 }, { "epoch": 0.7883511074651354, "grad_norm": 0.18251000344753265, "learning_rate": 5e-05, "loss": 1.769, "step": 2883 }, { "epoch": 0.788624555646705, "grad_norm": 0.17334499955177307, "learning_rate": 5e-05, "loss": 1.6428, "step": 2884 }, { "epoch": 0.7888980038282746, "grad_norm": 0.17535856366157532, "learning_rate": 5e-05, "loss": 1.7187, "step": 2885 }, { "epoch": 0.7891714520098442, "grad_norm": 0.16317224502563477, "learning_rate": 5e-05, "loss": 1.4863, "step": 2886 }, { "epoch": 0.7894449001914138, "grad_norm": 0.18952353298664093, "learning_rate": 5e-05, "loss": 1.7284, "step": 2887 }, { "epoch": 0.7897183483729833, "grad_norm": 0.17403848469257355, "learning_rate": 5e-05, "loss": 1.5511, "step": 2888 }, { "epoch": 0.7899917965545529, "grad_norm": 0.17016200721263885, "learning_rate": 5e-05, "loss": 1.5673, "step": 2889 }, { "epoch": 0.7902652447361225, "grad_norm": 0.16698943078517914, "learning_rate": 5e-05, "loss": 1.5506, "step": 2890 }, { "epoch": 0.7905386929176921, "grad_norm": 0.1672915667295456, "learning_rate": 5e-05, "loss": 1.5712, "step": 2891 }, { "epoch": 0.7908121410992617, "grad_norm": 0.17247925698757172, "learning_rate": 5e-05, "loss": 1.4931, "step": 2892 }, { "epoch": 0.7910855892808313, "grad_norm": 0.15129195153713226, "learning_rate": 5e-05, "loss": 1.5219, "step": 2893 }, { "epoch": 0.7913590374624009, "grad_norm": 0.16134491562843323, "learning_rate": 5e-05, "loss": 1.5749, "step": 2894 }, { "epoch": 0.7916324856439705, "grad_norm": 0.17356084287166595, "learning_rate": 5e-05, "loss": 1.607, "step": 2895 }, { "epoch": 0.7919059338255401, "grad_norm": 0.16742588579654694, "learning_rate": 5e-05, "loss": 1.515, "step": 2896 }, { "epoch": 0.7921793820071097, "grad_norm": 0.16483165323734283, "learning_rate": 5e-05, "loss": 1.5906, "step": 2897 }, { "epoch": 0.7924528301886793, "grad_norm": 0.1711149662733078, "learning_rate": 5e-05, "loss": 1.6759, "step": 2898 }, { "epoch": 0.7927262783702488, "grad_norm": 0.1694236695766449, "learning_rate": 5e-05, "loss": 1.5804, "step": 2899 }, { "epoch": 0.7929997265518184, "grad_norm": 0.15667274594306946, "learning_rate": 5e-05, "loss": 1.5717, "step": 2900 }, { "epoch": 0.793273174733388, "grad_norm": 0.18144182860851288, "learning_rate": 5e-05, "loss": 1.5945, "step": 2901 }, { "epoch": 0.7935466229149576, "grad_norm": 0.16796544194221497, "learning_rate": 5e-05, "loss": 1.6503, "step": 2902 }, { "epoch": 0.7938200710965272, "grad_norm": 0.16234607994556427, "learning_rate": 5e-05, "loss": 1.6427, "step": 2903 }, { "epoch": 0.7940935192780968, "grad_norm": 0.16550585627555847, "learning_rate": 5e-05, "loss": 1.6079, "step": 2904 }, { "epoch": 0.7943669674596664, "grad_norm": 0.1626591831445694, "learning_rate": 5e-05, "loss": 1.5215, "step": 2905 }, { "epoch": 0.794640415641236, "grad_norm": 0.1648007035255432, "learning_rate": 5e-05, "loss": 1.4877, "step": 2906 }, { "epoch": 0.7949138638228056, "grad_norm": 0.1771146059036255, "learning_rate": 5e-05, "loss": 1.5802, "step": 2907 }, { "epoch": 0.7951873120043752, "grad_norm": 0.16548393666744232, "learning_rate": 5e-05, "loss": 1.6135, "step": 2908 }, { "epoch": 0.7954607601859448, "grad_norm": 0.18224813044071198, "learning_rate": 5e-05, "loss": 1.5849, "step": 2909 }, { "epoch": 0.7957342083675144, "grad_norm": 0.19087089598178864, "learning_rate": 5e-05, "loss": 1.5959, "step": 2910 }, { "epoch": 0.7960076565490839, "grad_norm": 0.16123077273368835, "learning_rate": 5e-05, "loss": 1.5631, "step": 2911 }, { "epoch": 0.7962811047306535, "grad_norm": 0.18250906467437744, "learning_rate": 5e-05, "loss": 1.6527, "step": 2912 }, { "epoch": 0.7965545529122231, "grad_norm": 0.18301981687545776, "learning_rate": 5e-05, "loss": 1.6438, "step": 2913 }, { "epoch": 0.7968280010937927, "grad_norm": 0.15846803784370422, "learning_rate": 5e-05, "loss": 1.5266, "step": 2914 }, { "epoch": 0.7971014492753623, "grad_norm": 0.16750794649124146, "learning_rate": 5e-05, "loss": 1.6276, "step": 2915 }, { "epoch": 0.7973748974569319, "grad_norm": 0.16995219886302948, "learning_rate": 5e-05, "loss": 1.7035, "step": 2916 }, { "epoch": 0.7976483456385015, "grad_norm": 0.16649973392486572, "learning_rate": 5e-05, "loss": 1.5616, "step": 2917 }, { "epoch": 0.7979217938200711, "grad_norm": 0.14843766391277313, "learning_rate": 5e-05, "loss": 1.428, "step": 2918 }, { "epoch": 0.7981952420016407, "grad_norm": 0.17352357506752014, "learning_rate": 5e-05, "loss": 1.6872, "step": 2919 }, { "epoch": 0.7984686901832103, "grad_norm": 0.16598494350910187, "learning_rate": 5e-05, "loss": 1.6047, "step": 2920 }, { "epoch": 0.7987421383647799, "grad_norm": 0.1559354066848755, "learning_rate": 5e-05, "loss": 1.5798, "step": 2921 }, { "epoch": 0.7990155865463495, "grad_norm": 0.1671651154756546, "learning_rate": 5e-05, "loss": 1.599, "step": 2922 }, { "epoch": 0.799289034727919, "grad_norm": 0.15780937671661377, "learning_rate": 5e-05, "loss": 1.5554, "step": 2923 }, { "epoch": 0.7995624829094886, "grad_norm": 0.16018469631671906, "learning_rate": 5e-05, "loss": 1.5614, "step": 2924 }, { "epoch": 0.7998359310910582, "grad_norm": 0.16393598914146423, "learning_rate": 5e-05, "loss": 1.6024, "step": 2925 }, { "epoch": 0.8001093792726278, "grad_norm": 0.15699848532676697, "learning_rate": 5e-05, "loss": 1.5343, "step": 2926 }, { "epoch": 0.8003828274541974, "grad_norm": 0.1754962056875229, "learning_rate": 5e-05, "loss": 1.6674, "step": 2927 }, { "epoch": 0.800656275635767, "grad_norm": 0.16628246009349823, "learning_rate": 5e-05, "loss": 1.6243, "step": 2928 }, { "epoch": 0.8009297238173366, "grad_norm": 0.1607159823179245, "learning_rate": 5e-05, "loss": 1.6163, "step": 2929 }, { "epoch": 0.8012031719989062, "grad_norm": 0.17041060328483582, "learning_rate": 5e-05, "loss": 1.6411, "step": 2930 }, { "epoch": 0.8014766201804758, "grad_norm": 0.1616741120815277, "learning_rate": 5e-05, "loss": 1.4652, "step": 2931 }, { "epoch": 0.8017500683620454, "grad_norm": 0.16323620080947876, "learning_rate": 5e-05, "loss": 1.6336, "step": 2932 }, { "epoch": 0.802023516543615, "grad_norm": 0.16059629619121552, "learning_rate": 5e-05, "loss": 1.6735, "step": 2933 }, { "epoch": 0.8022969647251845, "grad_norm": 0.17597347497940063, "learning_rate": 5e-05, "loss": 1.6076, "step": 2934 }, { "epoch": 0.8025704129067541, "grad_norm": 0.16613614559173584, "learning_rate": 5e-05, "loss": 1.607, "step": 2935 }, { "epoch": 0.8028438610883237, "grad_norm": 0.17486806213855743, "learning_rate": 5e-05, "loss": 1.6023, "step": 2936 }, { "epoch": 0.8031173092698933, "grad_norm": 0.1755034178495407, "learning_rate": 5e-05, "loss": 1.601, "step": 2937 }, { "epoch": 0.8033907574514629, "grad_norm": 0.16530238091945648, "learning_rate": 5e-05, "loss": 1.5338, "step": 2938 }, { "epoch": 0.8036642056330325, "grad_norm": 0.18131126463413239, "learning_rate": 5e-05, "loss": 1.7062, "step": 2939 }, { "epoch": 0.8039376538146021, "grad_norm": 0.1728467047214508, "learning_rate": 5e-05, "loss": 1.6545, "step": 2940 }, { "epoch": 0.8042111019961717, "grad_norm": 0.15401597321033478, "learning_rate": 5e-05, "loss": 1.4845, "step": 2941 }, { "epoch": 0.8044845501777413, "grad_norm": 0.18329520523548126, "learning_rate": 5e-05, "loss": 1.6014, "step": 2942 }, { "epoch": 0.8047579983593109, "grad_norm": 0.16439451277256012, "learning_rate": 5e-05, "loss": 1.5848, "step": 2943 }, { "epoch": 0.8050314465408805, "grad_norm": 0.16697899997234344, "learning_rate": 5e-05, "loss": 1.6285, "step": 2944 }, { "epoch": 0.8053048947224501, "grad_norm": 0.1735549122095108, "learning_rate": 5e-05, "loss": 1.7079, "step": 2945 }, { "epoch": 0.8055783429040196, "grad_norm": 0.16093853116035461, "learning_rate": 5e-05, "loss": 1.5457, "step": 2946 }, { "epoch": 0.8058517910855892, "grad_norm": 0.1812943071126938, "learning_rate": 5e-05, "loss": 1.7846, "step": 2947 }, { "epoch": 0.8061252392671588, "grad_norm": 0.1680847555398941, "learning_rate": 5e-05, "loss": 1.6172, "step": 2948 }, { "epoch": 0.8063986874487284, "grad_norm": 0.1760333627462387, "learning_rate": 5e-05, "loss": 1.6645, "step": 2949 }, { "epoch": 0.806672135630298, "grad_norm": 0.16148562729358673, "learning_rate": 5e-05, "loss": 1.5527, "step": 2950 }, { "epoch": 0.8069455838118677, "grad_norm": 0.16784906387329102, "learning_rate": 5e-05, "loss": 1.6316, "step": 2951 }, { "epoch": 0.8072190319934373, "grad_norm": 0.16758820414543152, "learning_rate": 5e-05, "loss": 1.566, "step": 2952 }, { "epoch": 0.8074924801750069, "grad_norm": 0.16588006913661957, "learning_rate": 5e-05, "loss": 1.5411, "step": 2953 }, { "epoch": 0.8077659283565765, "grad_norm": 0.15759024024009705, "learning_rate": 5e-05, "loss": 1.497, "step": 2954 }, { "epoch": 0.8080393765381461, "grad_norm": 0.1606815755367279, "learning_rate": 5e-05, "loss": 1.5874, "step": 2955 }, { "epoch": 0.8083128247197157, "grad_norm": 0.171406090259552, "learning_rate": 5e-05, "loss": 1.6286, "step": 2956 }, { "epoch": 0.8085862729012852, "grad_norm": 0.1659841537475586, "learning_rate": 5e-05, "loss": 1.642, "step": 2957 }, { "epoch": 0.8088597210828548, "grad_norm": 0.1642453819513321, "learning_rate": 5e-05, "loss": 1.6138, "step": 2958 }, { "epoch": 0.8091331692644244, "grad_norm": 0.1737990528345108, "learning_rate": 5e-05, "loss": 1.6755, "step": 2959 }, { "epoch": 0.809406617445994, "grad_norm": 0.16041895747184753, "learning_rate": 5e-05, "loss": 1.5752, "step": 2960 }, { "epoch": 0.8096800656275636, "grad_norm": 0.16759610176086426, "learning_rate": 5e-05, "loss": 1.5574, "step": 2961 }, { "epoch": 0.8099535138091332, "grad_norm": 0.17192721366882324, "learning_rate": 5e-05, "loss": 1.6143, "step": 2962 }, { "epoch": 0.8102269619907028, "grad_norm": 0.17108719050884247, "learning_rate": 5e-05, "loss": 1.668, "step": 2963 }, { "epoch": 0.8105004101722724, "grad_norm": 0.1675434559583664, "learning_rate": 5e-05, "loss": 1.596, "step": 2964 }, { "epoch": 0.810773858353842, "grad_norm": 0.16867220401763916, "learning_rate": 5e-05, "loss": 1.6051, "step": 2965 }, { "epoch": 0.8110473065354116, "grad_norm": 0.16799405217170715, "learning_rate": 5e-05, "loss": 1.6316, "step": 2966 }, { "epoch": 0.8113207547169812, "grad_norm": 0.1661425083875656, "learning_rate": 5e-05, "loss": 1.6354, "step": 2967 }, { "epoch": 0.8115942028985508, "grad_norm": 0.17176170647144318, "learning_rate": 5e-05, "loss": 1.5873, "step": 2968 }, { "epoch": 0.8118676510801203, "grad_norm": 0.16512513160705566, "learning_rate": 5e-05, "loss": 1.5885, "step": 2969 }, { "epoch": 0.8121410992616899, "grad_norm": 0.1690187305212021, "learning_rate": 5e-05, "loss": 1.6306, "step": 2970 }, { "epoch": 0.8124145474432595, "grad_norm": 0.17420509457588196, "learning_rate": 5e-05, "loss": 1.49, "step": 2971 }, { "epoch": 0.8126879956248291, "grad_norm": 0.16634975373744965, "learning_rate": 5e-05, "loss": 1.5559, "step": 2972 }, { "epoch": 0.8129614438063987, "grad_norm": 0.1683768481016159, "learning_rate": 5e-05, "loss": 1.5532, "step": 2973 }, { "epoch": 0.8132348919879683, "grad_norm": 0.16414394974708557, "learning_rate": 5e-05, "loss": 1.5992, "step": 2974 }, { "epoch": 0.8135083401695379, "grad_norm": 0.16777318716049194, "learning_rate": 5e-05, "loss": 1.6227, "step": 2975 }, { "epoch": 0.8137817883511075, "grad_norm": 0.16975970566272736, "learning_rate": 5e-05, "loss": 1.589, "step": 2976 }, { "epoch": 0.8140552365326771, "grad_norm": 0.16181518137454987, "learning_rate": 5e-05, "loss": 1.6104, "step": 2977 }, { "epoch": 0.8143286847142467, "grad_norm": 0.16820965707302094, "learning_rate": 5e-05, "loss": 1.597, "step": 2978 }, { "epoch": 0.8146021328958163, "grad_norm": 0.17384564876556396, "learning_rate": 5e-05, "loss": 1.61, "step": 2979 }, { "epoch": 0.8148755810773858, "grad_norm": 0.17363837361335754, "learning_rate": 5e-05, "loss": 1.6574, "step": 2980 }, { "epoch": 0.8151490292589554, "grad_norm": 0.17065000534057617, "learning_rate": 5e-05, "loss": 1.6128, "step": 2981 }, { "epoch": 0.815422477440525, "grad_norm": 0.1675603985786438, "learning_rate": 5e-05, "loss": 1.6028, "step": 2982 }, { "epoch": 0.8156959256220946, "grad_norm": 0.16641339659690857, "learning_rate": 5e-05, "loss": 1.6176, "step": 2983 }, { "epoch": 0.8159693738036642, "grad_norm": 0.16285206377506256, "learning_rate": 5e-05, "loss": 1.6211, "step": 2984 }, { "epoch": 0.8162428219852338, "grad_norm": 0.1820947378873825, "learning_rate": 5e-05, "loss": 1.6473, "step": 2985 }, { "epoch": 0.8165162701668034, "grad_norm": 0.16042275726795197, "learning_rate": 5e-05, "loss": 1.5818, "step": 2986 }, { "epoch": 0.816789718348373, "grad_norm": 0.1615065634250641, "learning_rate": 5e-05, "loss": 1.5165, "step": 2987 }, { "epoch": 0.8170631665299426, "grad_norm": 0.17090201377868652, "learning_rate": 5e-05, "loss": 1.653, "step": 2988 }, { "epoch": 0.8173366147115122, "grad_norm": 0.1570872813463211, "learning_rate": 5e-05, "loss": 1.5234, "step": 2989 }, { "epoch": 0.8176100628930818, "grad_norm": 0.16220347583293915, "learning_rate": 5e-05, "loss": 1.6026, "step": 2990 }, { "epoch": 0.8178835110746514, "grad_norm": 0.17709346115589142, "learning_rate": 5e-05, "loss": 1.5405, "step": 2991 }, { "epoch": 0.8181569592562209, "grad_norm": 0.15809805691242218, "learning_rate": 5e-05, "loss": 1.5744, "step": 2992 }, { "epoch": 0.8184304074377905, "grad_norm": 0.17646804451942444, "learning_rate": 5e-05, "loss": 1.6328, "step": 2993 }, { "epoch": 0.8187038556193601, "grad_norm": 0.19047993421554565, "learning_rate": 5e-05, "loss": 1.6451, "step": 2994 }, { "epoch": 0.8189773038009297, "grad_norm": 0.16458064317703247, "learning_rate": 5e-05, "loss": 1.6942, "step": 2995 }, { "epoch": 0.8192507519824993, "grad_norm": 0.17090070247650146, "learning_rate": 5e-05, "loss": 1.623, "step": 2996 }, { "epoch": 0.8195242001640689, "grad_norm": 0.1800215095281601, "learning_rate": 5e-05, "loss": 1.5693, "step": 2997 }, { "epoch": 0.8197976483456385, "grad_norm": 0.16883789002895355, "learning_rate": 5e-05, "loss": 1.5624, "step": 2998 }, { "epoch": 0.8200710965272081, "grad_norm": 0.16943061351776123, "learning_rate": 5e-05, "loss": 1.6307, "step": 2999 }, { "epoch": 0.8203445447087777, "grad_norm": 0.1675194501876831, "learning_rate": 5e-05, "loss": 1.5902, "step": 3000 }, { "epoch": 0.8206179928903473, "grad_norm": 0.16143114864826202, "learning_rate": 5e-05, "loss": 1.5696, "step": 3001 }, { "epoch": 0.8208914410719169, "grad_norm": 0.16645751893520355, "learning_rate": 5e-05, "loss": 1.6472, "step": 3002 }, { "epoch": 0.8211648892534865, "grad_norm": 0.16916631162166595, "learning_rate": 5e-05, "loss": 1.5685, "step": 3003 }, { "epoch": 0.821438337435056, "grad_norm": 0.1605367660522461, "learning_rate": 5e-05, "loss": 1.6069, "step": 3004 }, { "epoch": 0.8217117856166256, "grad_norm": 0.1694014072418213, "learning_rate": 5e-05, "loss": 1.6411, "step": 3005 }, { "epoch": 0.8219852337981952, "grad_norm": 0.15868619084358215, "learning_rate": 5e-05, "loss": 1.5634, "step": 3006 }, { "epoch": 0.8222586819797648, "grad_norm": 0.16402362287044525, "learning_rate": 5e-05, "loss": 1.6254, "step": 3007 }, { "epoch": 0.8225321301613344, "grad_norm": 0.16820335388183594, "learning_rate": 5e-05, "loss": 1.6344, "step": 3008 }, { "epoch": 0.822805578342904, "grad_norm": 0.1659119576215744, "learning_rate": 5e-05, "loss": 1.6044, "step": 3009 }, { "epoch": 0.8230790265244736, "grad_norm": 0.177097886800766, "learning_rate": 5e-05, "loss": 1.7114, "step": 3010 }, { "epoch": 0.8233524747060432, "grad_norm": 0.16565869748592377, "learning_rate": 5e-05, "loss": 1.5614, "step": 3011 }, { "epoch": 0.8236259228876128, "grad_norm": 0.16346898674964905, "learning_rate": 5e-05, "loss": 1.6737, "step": 3012 }, { "epoch": 0.8238993710691824, "grad_norm": 0.16942520439624786, "learning_rate": 5e-05, "loss": 1.6536, "step": 3013 }, { "epoch": 0.824172819250752, "grad_norm": 0.16573195159435272, "learning_rate": 5e-05, "loss": 1.5418, "step": 3014 }, { "epoch": 0.8244462674323215, "grad_norm": 0.16428519785404205, "learning_rate": 5e-05, "loss": 1.5565, "step": 3015 }, { "epoch": 0.8247197156138911, "grad_norm": 0.1609538197517395, "learning_rate": 5e-05, "loss": 1.5613, "step": 3016 }, { "epoch": 0.8249931637954607, "grad_norm": 0.16682593524456024, "learning_rate": 5e-05, "loss": 1.6829, "step": 3017 }, { "epoch": 0.8252666119770303, "grad_norm": 0.16691312193870544, "learning_rate": 5e-05, "loss": 1.5133, "step": 3018 }, { "epoch": 0.8255400601585999, "grad_norm": 0.16984447836875916, "learning_rate": 5e-05, "loss": 1.6283, "step": 3019 }, { "epoch": 0.8258135083401695, "grad_norm": 0.1617315411567688, "learning_rate": 5e-05, "loss": 1.5689, "step": 3020 }, { "epoch": 0.8260869565217391, "grad_norm": 0.17331373691558838, "learning_rate": 5e-05, "loss": 1.635, "step": 3021 }, { "epoch": 0.8263604047033087, "grad_norm": 0.1681394726037979, "learning_rate": 5e-05, "loss": 1.6635, "step": 3022 }, { "epoch": 0.8266338528848783, "grad_norm": 0.15742860734462738, "learning_rate": 5e-05, "loss": 1.4818, "step": 3023 }, { "epoch": 0.8269073010664479, "grad_norm": 0.18009516596794128, "learning_rate": 5e-05, "loss": 1.6404, "step": 3024 }, { "epoch": 0.8271807492480175, "grad_norm": 0.16786369681358337, "learning_rate": 5e-05, "loss": 1.5971, "step": 3025 }, { "epoch": 0.8274541974295871, "grad_norm": 0.16309179365634918, "learning_rate": 5e-05, "loss": 1.5124, "step": 3026 }, { "epoch": 0.8277276456111566, "grad_norm": 0.15728282928466797, "learning_rate": 5e-05, "loss": 1.5749, "step": 3027 }, { "epoch": 0.8280010937927262, "grad_norm": 0.16731971502304077, "learning_rate": 5e-05, "loss": 1.5051, "step": 3028 }, { "epoch": 0.8282745419742958, "grad_norm": 0.17260520160198212, "learning_rate": 5e-05, "loss": 1.5307, "step": 3029 }, { "epoch": 0.8285479901558654, "grad_norm": 0.16881436109542847, "learning_rate": 5e-05, "loss": 1.6312, "step": 3030 }, { "epoch": 0.828821438337435, "grad_norm": 0.16781583428382874, "learning_rate": 5e-05, "loss": 1.5454, "step": 3031 }, { "epoch": 0.8290948865190046, "grad_norm": 0.16846032440662384, "learning_rate": 5e-05, "loss": 1.6269, "step": 3032 }, { "epoch": 0.8293683347005743, "grad_norm": 0.1699826866388321, "learning_rate": 5e-05, "loss": 1.6368, "step": 3033 }, { "epoch": 0.8296417828821439, "grad_norm": 0.16753311455249786, "learning_rate": 5e-05, "loss": 1.6949, "step": 3034 }, { "epoch": 0.8299152310637135, "grad_norm": 0.17179977893829346, "learning_rate": 5e-05, "loss": 1.6023, "step": 3035 }, { "epoch": 0.8301886792452831, "grad_norm": 0.1626930981874466, "learning_rate": 5e-05, "loss": 1.6162, "step": 3036 }, { "epoch": 0.8304621274268527, "grad_norm": 0.1600092500448227, "learning_rate": 5e-05, "loss": 1.5765, "step": 3037 }, { "epoch": 0.8307355756084221, "grad_norm": 0.1597457230091095, "learning_rate": 5e-05, "loss": 1.6114, "step": 3038 }, { "epoch": 0.8310090237899918, "grad_norm": 0.16018109023571014, "learning_rate": 5e-05, "loss": 1.6279, "step": 3039 }, { "epoch": 0.8312824719715614, "grad_norm": 0.15814651548862457, "learning_rate": 5e-05, "loss": 1.5031, "step": 3040 }, { "epoch": 0.831555920153131, "grad_norm": 0.1627386063337326, "learning_rate": 5e-05, "loss": 1.5399, "step": 3041 }, { "epoch": 0.8318293683347006, "grad_norm": 0.17200924456119537, "learning_rate": 5e-05, "loss": 1.6497, "step": 3042 }, { "epoch": 0.8321028165162702, "grad_norm": 0.16421593725681305, "learning_rate": 5e-05, "loss": 1.653, "step": 3043 }, { "epoch": 0.8323762646978398, "grad_norm": 0.16235610842704773, "learning_rate": 5e-05, "loss": 1.498, "step": 3044 }, { "epoch": 0.8326497128794094, "grad_norm": 0.16004261374473572, "learning_rate": 5e-05, "loss": 1.5331, "step": 3045 }, { "epoch": 0.832923161060979, "grad_norm": 0.17818929255008698, "learning_rate": 5e-05, "loss": 1.6328, "step": 3046 }, { "epoch": 0.8331966092425486, "grad_norm": 0.17339615523815155, "learning_rate": 5e-05, "loss": 1.6716, "step": 3047 }, { "epoch": 0.8334700574241182, "grad_norm": 0.16837354004383087, "learning_rate": 5e-05, "loss": 1.6282, "step": 3048 }, { "epoch": 0.8337435056056878, "grad_norm": 0.16777902841567993, "learning_rate": 5e-05, "loss": 1.596, "step": 3049 }, { "epoch": 0.8340169537872573, "grad_norm": 0.16719725728034973, "learning_rate": 5e-05, "loss": 1.6055, "step": 3050 }, { "epoch": 0.8342904019688269, "grad_norm": 0.16437400877475739, "learning_rate": 5e-05, "loss": 1.6341, "step": 3051 }, { "epoch": 0.8345638501503965, "grad_norm": 0.1672685593366623, "learning_rate": 5e-05, "loss": 1.6135, "step": 3052 }, { "epoch": 0.8348372983319661, "grad_norm": 0.16196739673614502, "learning_rate": 5e-05, "loss": 1.5584, "step": 3053 }, { "epoch": 0.8351107465135357, "grad_norm": 0.15699756145477295, "learning_rate": 5e-05, "loss": 1.5569, "step": 3054 }, { "epoch": 0.8353841946951053, "grad_norm": 0.1717248409986496, "learning_rate": 5e-05, "loss": 1.6093, "step": 3055 }, { "epoch": 0.8356576428766749, "grad_norm": 0.17324498295783997, "learning_rate": 5e-05, "loss": 1.6929, "step": 3056 }, { "epoch": 0.8359310910582445, "grad_norm": 0.17252692580223083, "learning_rate": 5e-05, "loss": 1.6724, "step": 3057 }, { "epoch": 0.8362045392398141, "grad_norm": 0.17730343341827393, "learning_rate": 5e-05, "loss": 1.6094, "step": 3058 }, { "epoch": 0.8364779874213837, "grad_norm": 0.15670277178287506, "learning_rate": 5e-05, "loss": 1.5384, "step": 3059 }, { "epoch": 0.8367514356029533, "grad_norm": 0.171427920460701, "learning_rate": 5e-05, "loss": 1.5808, "step": 3060 }, { "epoch": 0.8370248837845229, "grad_norm": 0.1834273487329483, "learning_rate": 5e-05, "loss": 1.6069, "step": 3061 }, { "epoch": 0.8372983319660924, "grad_norm": 0.1576196849346161, "learning_rate": 5e-05, "loss": 1.6032, "step": 3062 }, { "epoch": 0.837571780147662, "grad_norm": 0.17979919910430908, "learning_rate": 5e-05, "loss": 1.6884, "step": 3063 }, { "epoch": 0.8378452283292316, "grad_norm": 0.16846168041229248, "learning_rate": 5e-05, "loss": 1.4665, "step": 3064 }, { "epoch": 0.8381186765108012, "grad_norm": 0.17571622133255005, "learning_rate": 5e-05, "loss": 1.5971, "step": 3065 }, { "epoch": 0.8383921246923708, "grad_norm": 0.1625003069639206, "learning_rate": 5e-05, "loss": 1.6018, "step": 3066 }, { "epoch": 0.8386655728739404, "grad_norm": 0.16521279513835907, "learning_rate": 5e-05, "loss": 1.5108, "step": 3067 }, { "epoch": 0.83893902105551, "grad_norm": 0.19198334217071533, "learning_rate": 5e-05, "loss": 1.6102, "step": 3068 }, { "epoch": 0.8392124692370796, "grad_norm": 0.16152355074882507, "learning_rate": 5e-05, "loss": 1.5801, "step": 3069 }, { "epoch": 0.8394859174186492, "grad_norm": 0.159002423286438, "learning_rate": 5e-05, "loss": 1.5931, "step": 3070 }, { "epoch": 0.8397593656002188, "grad_norm": 0.16774891316890717, "learning_rate": 5e-05, "loss": 1.613, "step": 3071 }, { "epoch": 0.8400328137817884, "grad_norm": 0.17755338549613953, "learning_rate": 5e-05, "loss": 1.6408, "step": 3072 }, { "epoch": 0.8403062619633579, "grad_norm": 0.15868759155273438, "learning_rate": 5e-05, "loss": 1.6276, "step": 3073 }, { "epoch": 0.8405797101449275, "grad_norm": 0.1694575846195221, "learning_rate": 5e-05, "loss": 1.6421, "step": 3074 }, { "epoch": 0.8408531583264971, "grad_norm": 0.1652621626853943, "learning_rate": 5e-05, "loss": 1.5798, "step": 3075 }, { "epoch": 0.8411266065080667, "grad_norm": 0.1710357666015625, "learning_rate": 5e-05, "loss": 1.5947, "step": 3076 }, { "epoch": 0.8414000546896363, "grad_norm": 0.16072213649749756, "learning_rate": 5e-05, "loss": 1.5291, "step": 3077 }, { "epoch": 0.8416735028712059, "grad_norm": 0.18232187628746033, "learning_rate": 5e-05, "loss": 1.6492, "step": 3078 }, { "epoch": 0.8419469510527755, "grad_norm": 0.1688283234834671, "learning_rate": 5e-05, "loss": 1.6395, "step": 3079 }, { "epoch": 0.8422203992343451, "grad_norm": 0.170608788728714, "learning_rate": 5e-05, "loss": 1.6103, "step": 3080 }, { "epoch": 0.8424938474159147, "grad_norm": 0.16807672381401062, "learning_rate": 5e-05, "loss": 1.535, "step": 3081 }, { "epoch": 0.8427672955974843, "grad_norm": 0.17390793561935425, "learning_rate": 5e-05, "loss": 1.6763, "step": 3082 }, { "epoch": 0.8430407437790539, "grad_norm": 0.1748964488506317, "learning_rate": 5e-05, "loss": 1.6523, "step": 3083 }, { "epoch": 0.8433141919606235, "grad_norm": 0.17339888215065002, "learning_rate": 5e-05, "loss": 1.4646, "step": 3084 }, { "epoch": 0.843587640142193, "grad_norm": 0.16728384792804718, "learning_rate": 5e-05, "loss": 1.5108, "step": 3085 }, { "epoch": 0.8438610883237626, "grad_norm": 0.1811836063861847, "learning_rate": 5e-05, "loss": 1.6013, "step": 3086 }, { "epoch": 0.8441345365053322, "grad_norm": 0.15502838790416718, "learning_rate": 5e-05, "loss": 1.5567, "step": 3087 }, { "epoch": 0.8444079846869018, "grad_norm": 0.17469522356987, "learning_rate": 5e-05, "loss": 1.6128, "step": 3088 }, { "epoch": 0.8446814328684714, "grad_norm": 0.1718490719795227, "learning_rate": 5e-05, "loss": 1.6778, "step": 3089 }, { "epoch": 0.844954881050041, "grad_norm": 0.1672654151916504, "learning_rate": 5e-05, "loss": 1.5281, "step": 3090 }, { "epoch": 0.8452283292316106, "grad_norm": 0.18899762630462646, "learning_rate": 5e-05, "loss": 1.5707, "step": 3091 }, { "epoch": 0.8455017774131802, "grad_norm": 0.16991832852363586, "learning_rate": 5e-05, "loss": 1.6655, "step": 3092 }, { "epoch": 0.8457752255947498, "grad_norm": 0.16526849567890167, "learning_rate": 5e-05, "loss": 1.5944, "step": 3093 }, { "epoch": 0.8460486737763194, "grad_norm": 0.1665259599685669, "learning_rate": 5e-05, "loss": 1.6564, "step": 3094 }, { "epoch": 0.846322121957889, "grad_norm": 0.16578637063503265, "learning_rate": 5e-05, "loss": 1.5696, "step": 3095 }, { "epoch": 0.8465955701394585, "grad_norm": 0.15842179954051971, "learning_rate": 5e-05, "loss": 1.5215, "step": 3096 }, { "epoch": 0.8468690183210281, "grad_norm": 0.1621725708246231, "learning_rate": 5e-05, "loss": 1.6389, "step": 3097 }, { "epoch": 0.8471424665025977, "grad_norm": 0.15811792016029358, "learning_rate": 5e-05, "loss": 1.5293, "step": 3098 }, { "epoch": 0.8474159146841673, "grad_norm": 0.17002242803573608, "learning_rate": 5e-05, "loss": 1.5781, "step": 3099 }, { "epoch": 0.8476893628657369, "grad_norm": 0.16223548352718353, "learning_rate": 5e-05, "loss": 1.5776, "step": 3100 }, { "epoch": 0.8479628110473065, "grad_norm": 0.17608049511909485, "learning_rate": 5e-05, "loss": 1.5716, "step": 3101 }, { "epoch": 0.8482362592288761, "grad_norm": 0.16243280470371246, "learning_rate": 5e-05, "loss": 1.582, "step": 3102 }, { "epoch": 0.8485097074104457, "grad_norm": 0.1691652089357376, "learning_rate": 5e-05, "loss": 1.5969, "step": 3103 }, { "epoch": 0.8487831555920153, "grad_norm": 0.17655591666698456, "learning_rate": 5e-05, "loss": 1.5828, "step": 3104 }, { "epoch": 0.8490566037735849, "grad_norm": 0.16086623072624207, "learning_rate": 5e-05, "loss": 1.5631, "step": 3105 }, { "epoch": 0.8493300519551545, "grad_norm": 0.1816483736038208, "learning_rate": 5e-05, "loss": 1.723, "step": 3106 }, { "epoch": 0.8496035001367241, "grad_norm": 0.17240236699581146, "learning_rate": 5e-05, "loss": 1.669, "step": 3107 }, { "epoch": 0.8498769483182936, "grad_norm": 0.16476856172084808, "learning_rate": 5e-05, "loss": 1.5872, "step": 3108 }, { "epoch": 0.8501503964998632, "grad_norm": 0.15981562435626984, "learning_rate": 5e-05, "loss": 1.5647, "step": 3109 }, { "epoch": 0.8504238446814328, "grad_norm": 0.1652696579694748, "learning_rate": 5e-05, "loss": 1.5882, "step": 3110 }, { "epoch": 0.8506972928630024, "grad_norm": 0.16290616989135742, "learning_rate": 5e-05, "loss": 1.6325, "step": 3111 }, { "epoch": 0.850970741044572, "grad_norm": 0.16608236730098724, "learning_rate": 5e-05, "loss": 1.6638, "step": 3112 }, { "epoch": 0.8512441892261416, "grad_norm": 0.16186489164829254, "learning_rate": 5e-05, "loss": 1.5525, "step": 3113 }, { "epoch": 0.8515176374077112, "grad_norm": 0.1654919683933258, "learning_rate": 5e-05, "loss": 1.5305, "step": 3114 }, { "epoch": 0.8517910855892808, "grad_norm": 0.16424906253814697, "learning_rate": 5e-05, "loss": 1.5848, "step": 3115 }, { "epoch": 0.8520645337708505, "grad_norm": 0.16823723912239075, "learning_rate": 5e-05, "loss": 1.5737, "step": 3116 }, { "epoch": 0.85233798195242, "grad_norm": 0.16373160481452942, "learning_rate": 5e-05, "loss": 1.6274, "step": 3117 }, { "epoch": 0.8526114301339897, "grad_norm": 0.18197789788246155, "learning_rate": 5e-05, "loss": 1.6227, "step": 3118 }, { "epoch": 0.8528848783155591, "grad_norm": 0.15607775747776031, "learning_rate": 5e-05, "loss": 1.5371, "step": 3119 }, { "epoch": 0.8531583264971287, "grad_norm": 0.16798537969589233, "learning_rate": 5e-05, "loss": 1.5632, "step": 3120 }, { "epoch": 0.8534317746786984, "grad_norm": 0.164012148976326, "learning_rate": 5e-05, "loss": 1.5629, "step": 3121 }, { "epoch": 0.853705222860268, "grad_norm": 0.176783949136734, "learning_rate": 5e-05, "loss": 1.5977, "step": 3122 }, { "epoch": 0.8539786710418376, "grad_norm": 0.15592290461063385, "learning_rate": 5e-05, "loss": 1.5529, "step": 3123 }, { "epoch": 0.8542521192234072, "grad_norm": 0.18195119500160217, "learning_rate": 5e-05, "loss": 1.6908, "step": 3124 }, { "epoch": 0.8545255674049768, "grad_norm": 0.174040749669075, "learning_rate": 5e-05, "loss": 1.5646, "step": 3125 }, { "epoch": 0.8547990155865464, "grad_norm": 0.16298840939998627, "learning_rate": 5e-05, "loss": 1.5824, "step": 3126 }, { "epoch": 0.855072463768116, "grad_norm": 0.16243524849414825, "learning_rate": 5e-05, "loss": 1.603, "step": 3127 }, { "epoch": 0.8553459119496856, "grad_norm": 0.16204892098903656, "learning_rate": 5e-05, "loss": 1.595, "step": 3128 }, { "epoch": 0.8556193601312552, "grad_norm": 0.16708557307720184, "learning_rate": 5e-05, "loss": 1.6143, "step": 3129 }, { "epoch": 0.8558928083128248, "grad_norm": 0.1747390627861023, "learning_rate": 5e-05, "loss": 1.5863, "step": 3130 }, { "epoch": 0.8561662564943943, "grad_norm": 0.17126700282096863, "learning_rate": 5e-05, "loss": 1.6091, "step": 3131 }, { "epoch": 0.8564397046759639, "grad_norm": 0.1731352061033249, "learning_rate": 5e-05, "loss": 1.6073, "step": 3132 }, { "epoch": 0.8567131528575335, "grad_norm": 0.17451922595500946, "learning_rate": 5e-05, "loss": 1.7312, "step": 3133 }, { "epoch": 0.8569866010391031, "grad_norm": 0.156746968626976, "learning_rate": 5e-05, "loss": 1.4434, "step": 3134 }, { "epoch": 0.8572600492206727, "grad_norm": 0.1696559190750122, "learning_rate": 5e-05, "loss": 1.6185, "step": 3135 }, { "epoch": 0.8575334974022423, "grad_norm": 0.1689455807209015, "learning_rate": 5e-05, "loss": 1.5066, "step": 3136 }, { "epoch": 0.8578069455838119, "grad_norm": 0.16820566356182098, "learning_rate": 5e-05, "loss": 1.5294, "step": 3137 }, { "epoch": 0.8580803937653815, "grad_norm": 0.18438181281089783, "learning_rate": 5e-05, "loss": 1.6513, "step": 3138 }, { "epoch": 0.8583538419469511, "grad_norm": 0.1611601859331131, "learning_rate": 5e-05, "loss": 1.4604, "step": 3139 }, { "epoch": 0.8586272901285207, "grad_norm": 0.16456107795238495, "learning_rate": 5e-05, "loss": 1.5664, "step": 3140 }, { "epoch": 0.8589007383100903, "grad_norm": 0.16825945675373077, "learning_rate": 5e-05, "loss": 1.5777, "step": 3141 }, { "epoch": 0.8591741864916599, "grad_norm": 0.16525815427303314, "learning_rate": 5e-05, "loss": 1.5464, "step": 3142 }, { "epoch": 0.8594476346732294, "grad_norm": 0.17219798266887665, "learning_rate": 5e-05, "loss": 1.6255, "step": 3143 }, { "epoch": 0.859721082854799, "grad_norm": 0.1633169800043106, "learning_rate": 5e-05, "loss": 1.5723, "step": 3144 }, { "epoch": 0.8599945310363686, "grad_norm": 0.15741117298603058, "learning_rate": 5e-05, "loss": 1.5098, "step": 3145 }, { "epoch": 0.8602679792179382, "grad_norm": 0.17558981478214264, "learning_rate": 5e-05, "loss": 1.6152, "step": 3146 }, { "epoch": 0.8605414273995078, "grad_norm": 0.16872353851795197, "learning_rate": 5e-05, "loss": 1.6884, "step": 3147 }, { "epoch": 0.8608148755810774, "grad_norm": 0.18673337996006012, "learning_rate": 5e-05, "loss": 1.6064, "step": 3148 }, { "epoch": 0.861088323762647, "grad_norm": 0.16644282639026642, "learning_rate": 5e-05, "loss": 1.518, "step": 3149 }, { "epoch": 0.8613617719442166, "grad_norm": 0.16721606254577637, "learning_rate": 5e-05, "loss": 1.5891, "step": 3150 }, { "epoch": 0.8616352201257862, "grad_norm": 0.17002660036087036, "learning_rate": 5e-05, "loss": 1.5312, "step": 3151 }, { "epoch": 0.8619086683073558, "grad_norm": 0.16748939454555511, "learning_rate": 5e-05, "loss": 1.6485, "step": 3152 }, { "epoch": 0.8621821164889254, "grad_norm": 0.15232722461223602, "learning_rate": 5e-05, "loss": 1.4527, "step": 3153 }, { "epoch": 0.8624555646704949, "grad_norm": 0.16974018514156342, "learning_rate": 5e-05, "loss": 1.5227, "step": 3154 }, { "epoch": 0.8627290128520645, "grad_norm": 0.16853350400924683, "learning_rate": 5e-05, "loss": 1.5233, "step": 3155 }, { "epoch": 0.8630024610336341, "grad_norm": 0.16759264469146729, "learning_rate": 5e-05, "loss": 1.6178, "step": 3156 }, { "epoch": 0.8632759092152037, "grad_norm": 0.18532946705818176, "learning_rate": 5e-05, "loss": 1.4664, "step": 3157 }, { "epoch": 0.8635493573967733, "grad_norm": 0.16451618075370789, "learning_rate": 5e-05, "loss": 1.5393, "step": 3158 }, { "epoch": 0.8638228055783429, "grad_norm": 0.17090554535388947, "learning_rate": 5e-05, "loss": 1.6055, "step": 3159 }, { "epoch": 0.8640962537599125, "grad_norm": 0.16245882213115692, "learning_rate": 5e-05, "loss": 1.5864, "step": 3160 }, { "epoch": 0.8643697019414821, "grad_norm": 0.16497839987277985, "learning_rate": 5e-05, "loss": 1.5725, "step": 3161 }, { "epoch": 0.8646431501230517, "grad_norm": 0.1678629368543625, "learning_rate": 5e-05, "loss": 1.6436, "step": 3162 }, { "epoch": 0.8649165983046213, "grad_norm": 0.17666424810886383, "learning_rate": 5e-05, "loss": 1.5639, "step": 3163 }, { "epoch": 0.8651900464861909, "grad_norm": 0.16444604098796844, "learning_rate": 5e-05, "loss": 1.6184, "step": 3164 }, { "epoch": 0.8654634946677605, "grad_norm": 0.1679319143295288, "learning_rate": 5e-05, "loss": 1.5206, "step": 3165 }, { "epoch": 0.86573694284933, "grad_norm": 0.16246174275875092, "learning_rate": 5e-05, "loss": 1.5794, "step": 3166 }, { "epoch": 0.8660103910308996, "grad_norm": 0.17036394774913788, "learning_rate": 5e-05, "loss": 1.6325, "step": 3167 }, { "epoch": 0.8662838392124692, "grad_norm": 0.1656508892774582, "learning_rate": 5e-05, "loss": 1.5285, "step": 3168 }, { "epoch": 0.8665572873940388, "grad_norm": 0.17411690950393677, "learning_rate": 5e-05, "loss": 1.5536, "step": 3169 }, { "epoch": 0.8668307355756084, "grad_norm": 0.1696336418390274, "learning_rate": 5e-05, "loss": 1.5778, "step": 3170 }, { "epoch": 0.867104183757178, "grad_norm": 0.17345334589481354, "learning_rate": 5e-05, "loss": 1.5805, "step": 3171 }, { "epoch": 0.8673776319387476, "grad_norm": 0.1722104400396347, "learning_rate": 5e-05, "loss": 1.6734, "step": 3172 }, { "epoch": 0.8676510801203172, "grad_norm": 0.17595550417900085, "learning_rate": 5e-05, "loss": 1.6609, "step": 3173 }, { "epoch": 0.8679245283018868, "grad_norm": 0.1680113971233368, "learning_rate": 5e-05, "loss": 1.6642, "step": 3174 }, { "epoch": 0.8681979764834564, "grad_norm": 0.16808103024959564, "learning_rate": 5e-05, "loss": 1.5834, "step": 3175 }, { "epoch": 0.868471424665026, "grad_norm": 0.16665758192539215, "learning_rate": 5e-05, "loss": 1.6229, "step": 3176 }, { "epoch": 0.8687448728465955, "grad_norm": 0.16951392590999603, "learning_rate": 5e-05, "loss": 1.5884, "step": 3177 }, { "epoch": 0.8690183210281651, "grad_norm": 0.16685131192207336, "learning_rate": 5e-05, "loss": 1.632, "step": 3178 }, { "epoch": 0.8692917692097347, "grad_norm": 0.1711168885231018, "learning_rate": 5e-05, "loss": 1.6524, "step": 3179 }, { "epoch": 0.8695652173913043, "grad_norm": 0.16593927145004272, "learning_rate": 5e-05, "loss": 1.6038, "step": 3180 }, { "epoch": 0.8698386655728739, "grad_norm": 0.16577638685703278, "learning_rate": 5e-05, "loss": 1.6469, "step": 3181 }, { "epoch": 0.8701121137544435, "grad_norm": 0.17342355847358704, "learning_rate": 5e-05, "loss": 1.6564, "step": 3182 }, { "epoch": 0.8703855619360131, "grad_norm": 0.15923261642456055, "learning_rate": 5e-05, "loss": 1.5506, "step": 3183 }, { "epoch": 0.8706590101175827, "grad_norm": 0.18326669931411743, "learning_rate": 5e-05, "loss": 1.5866, "step": 3184 }, { "epoch": 0.8709324582991523, "grad_norm": 0.17581258714199066, "learning_rate": 5e-05, "loss": 1.5521, "step": 3185 }, { "epoch": 0.8712059064807219, "grad_norm": 0.1683703362941742, "learning_rate": 5e-05, "loss": 1.6314, "step": 3186 }, { "epoch": 0.8714793546622915, "grad_norm": 0.17400458455085754, "learning_rate": 5e-05, "loss": 1.586, "step": 3187 }, { "epoch": 0.8717528028438611, "grad_norm": 0.1864955574274063, "learning_rate": 5e-05, "loss": 1.6241, "step": 3188 }, { "epoch": 0.8720262510254306, "grad_norm": 0.16646967828273773, "learning_rate": 5e-05, "loss": 1.568, "step": 3189 }, { "epoch": 0.8722996992070002, "grad_norm": 0.17674346268177032, "learning_rate": 5e-05, "loss": 1.6457, "step": 3190 }, { "epoch": 0.8725731473885698, "grad_norm": 0.18272368609905243, "learning_rate": 5e-05, "loss": 1.6196, "step": 3191 }, { "epoch": 0.8728465955701394, "grad_norm": 0.16906492412090302, "learning_rate": 5e-05, "loss": 1.5682, "step": 3192 }, { "epoch": 0.873120043751709, "grad_norm": 0.16809342801570892, "learning_rate": 5e-05, "loss": 1.5799, "step": 3193 }, { "epoch": 0.8733934919332786, "grad_norm": 0.1753772646188736, "learning_rate": 5e-05, "loss": 1.5376, "step": 3194 }, { "epoch": 0.8736669401148482, "grad_norm": 0.16110126674175262, "learning_rate": 5e-05, "loss": 1.6114, "step": 3195 }, { "epoch": 0.8739403882964178, "grad_norm": 0.16913153231143951, "learning_rate": 5e-05, "loss": 1.5882, "step": 3196 }, { "epoch": 0.8742138364779874, "grad_norm": 0.17413096129894257, "learning_rate": 5e-05, "loss": 1.577, "step": 3197 }, { "epoch": 0.874487284659557, "grad_norm": 0.16093304753303528, "learning_rate": 5e-05, "loss": 1.5553, "step": 3198 }, { "epoch": 0.8747607328411267, "grad_norm": 0.1645004004240036, "learning_rate": 5e-05, "loss": 1.5537, "step": 3199 }, { "epoch": 0.8750341810226961, "grad_norm": 0.1645384430885315, "learning_rate": 5e-05, "loss": 1.629, "step": 3200 }, { "epoch": 0.8753076292042657, "grad_norm": 0.17203739285469055, "learning_rate": 5e-05, "loss": 1.6529, "step": 3201 }, { "epoch": 0.8755810773858353, "grad_norm": 0.1711164116859436, "learning_rate": 5e-05, "loss": 1.63, "step": 3202 }, { "epoch": 0.875854525567405, "grad_norm": 0.17426781356334686, "learning_rate": 5e-05, "loss": 1.7013, "step": 3203 }, { "epoch": 0.8761279737489746, "grad_norm": 0.1662566065788269, "learning_rate": 5e-05, "loss": 1.5933, "step": 3204 }, { "epoch": 0.8764014219305442, "grad_norm": 0.17674902081489563, "learning_rate": 5e-05, "loss": 1.5578, "step": 3205 }, { "epoch": 0.8766748701121138, "grad_norm": 0.15999096632003784, "learning_rate": 5e-05, "loss": 1.6195, "step": 3206 }, { "epoch": 0.8769483182936834, "grad_norm": 0.16697584092617035, "learning_rate": 5e-05, "loss": 1.589, "step": 3207 }, { "epoch": 0.877221766475253, "grad_norm": 0.16489756107330322, "learning_rate": 5e-05, "loss": 1.6149, "step": 3208 }, { "epoch": 0.8774952146568226, "grad_norm": 0.16519208252429962, "learning_rate": 5e-05, "loss": 1.5, "step": 3209 }, { "epoch": 0.8777686628383922, "grad_norm": 0.17452332377433777, "learning_rate": 5e-05, "loss": 1.6418, "step": 3210 }, { "epoch": 0.8780421110199618, "grad_norm": 0.16637583076953888, "learning_rate": 5e-05, "loss": 1.5775, "step": 3211 }, { "epoch": 0.8783155592015313, "grad_norm": 0.15892595052719116, "learning_rate": 5e-05, "loss": 1.498, "step": 3212 }, { "epoch": 0.8785890073831009, "grad_norm": 0.17607276141643524, "learning_rate": 5e-05, "loss": 1.6636, "step": 3213 }, { "epoch": 0.8788624555646705, "grad_norm": 0.1674719601869583, "learning_rate": 5e-05, "loss": 1.6106, "step": 3214 }, { "epoch": 0.8791359037462401, "grad_norm": 0.17102982103824615, "learning_rate": 5e-05, "loss": 1.6174, "step": 3215 }, { "epoch": 0.8794093519278097, "grad_norm": 0.16860932111740112, "learning_rate": 5e-05, "loss": 1.5382, "step": 3216 }, { "epoch": 0.8796828001093793, "grad_norm": 0.15637625753879547, "learning_rate": 5e-05, "loss": 1.5326, "step": 3217 }, { "epoch": 0.8799562482909489, "grad_norm": 0.16792798042297363, "learning_rate": 5e-05, "loss": 1.6486, "step": 3218 }, { "epoch": 0.8802296964725185, "grad_norm": 0.17826606333255768, "learning_rate": 5e-05, "loss": 1.5327, "step": 3219 }, { "epoch": 0.8805031446540881, "grad_norm": 0.1626116782426834, "learning_rate": 5e-05, "loss": 1.5084, "step": 3220 }, { "epoch": 0.8807765928356577, "grad_norm": 0.16171365976333618, "learning_rate": 5e-05, "loss": 1.559, "step": 3221 }, { "epoch": 0.8810500410172273, "grad_norm": 0.16499905288219452, "learning_rate": 5e-05, "loss": 1.4079, "step": 3222 }, { "epoch": 0.8813234891987969, "grad_norm": 0.16512024402618408, "learning_rate": 5e-05, "loss": 1.5858, "step": 3223 }, { "epoch": 0.8815969373803664, "grad_norm": 0.1706002801656723, "learning_rate": 5e-05, "loss": 1.6038, "step": 3224 }, { "epoch": 0.881870385561936, "grad_norm": 0.17629894614219666, "learning_rate": 5e-05, "loss": 1.6537, "step": 3225 }, { "epoch": 0.8821438337435056, "grad_norm": 0.16706202924251556, "learning_rate": 5e-05, "loss": 1.5556, "step": 3226 }, { "epoch": 0.8824172819250752, "grad_norm": 0.1704121083021164, "learning_rate": 5e-05, "loss": 1.6341, "step": 3227 }, { "epoch": 0.8826907301066448, "grad_norm": 0.17939800024032593, "learning_rate": 5e-05, "loss": 1.4981, "step": 3228 }, { "epoch": 0.8829641782882144, "grad_norm": 0.17186492681503296, "learning_rate": 5e-05, "loss": 1.5591, "step": 3229 }, { "epoch": 0.883237626469784, "grad_norm": 0.160826176404953, "learning_rate": 5e-05, "loss": 1.5478, "step": 3230 }, { "epoch": 0.8835110746513536, "grad_norm": 0.190667062997818, "learning_rate": 5e-05, "loss": 1.5104, "step": 3231 }, { "epoch": 0.8837845228329232, "grad_norm": 0.18037305772304535, "learning_rate": 5e-05, "loss": 1.6009, "step": 3232 }, { "epoch": 0.8840579710144928, "grad_norm": 0.15924717485904694, "learning_rate": 5e-05, "loss": 1.5045, "step": 3233 }, { "epoch": 0.8843314191960624, "grad_norm": 0.19811680912971497, "learning_rate": 5e-05, "loss": 1.5899, "step": 3234 }, { "epoch": 0.8846048673776319, "grad_norm": 0.1798917055130005, "learning_rate": 5e-05, "loss": 1.5403, "step": 3235 }, { "epoch": 0.8848783155592015, "grad_norm": 0.17577169835567474, "learning_rate": 5e-05, "loss": 1.5191, "step": 3236 }, { "epoch": 0.8851517637407711, "grad_norm": 0.18266037106513977, "learning_rate": 5e-05, "loss": 1.5779, "step": 3237 }, { "epoch": 0.8854252119223407, "grad_norm": 0.1692240834236145, "learning_rate": 5e-05, "loss": 1.5353, "step": 3238 }, { "epoch": 0.8856986601039103, "grad_norm": 0.1703694760799408, "learning_rate": 5e-05, "loss": 1.5582, "step": 3239 }, { "epoch": 0.8859721082854799, "grad_norm": 0.1754840612411499, "learning_rate": 5e-05, "loss": 1.5573, "step": 3240 }, { "epoch": 0.8862455564670495, "grad_norm": 0.16203297674655914, "learning_rate": 5e-05, "loss": 1.5276, "step": 3241 }, { "epoch": 0.8865190046486191, "grad_norm": 0.15817001461982727, "learning_rate": 5e-05, "loss": 1.583, "step": 3242 }, { "epoch": 0.8867924528301887, "grad_norm": 0.1767439991235733, "learning_rate": 5e-05, "loss": 1.6644, "step": 3243 }, { "epoch": 0.8870659010117583, "grad_norm": 0.16701658070087433, "learning_rate": 5e-05, "loss": 1.5825, "step": 3244 }, { "epoch": 0.8873393491933279, "grad_norm": 0.16390787065029144, "learning_rate": 5e-05, "loss": 1.6174, "step": 3245 }, { "epoch": 0.8876127973748975, "grad_norm": 0.1741890013217926, "learning_rate": 5e-05, "loss": 1.5714, "step": 3246 }, { "epoch": 0.887886245556467, "grad_norm": 0.1701374500989914, "learning_rate": 5e-05, "loss": 1.6041, "step": 3247 }, { "epoch": 0.8881596937380366, "grad_norm": 0.18417386710643768, "learning_rate": 5e-05, "loss": 1.6007, "step": 3248 }, { "epoch": 0.8884331419196062, "grad_norm": 0.18053127825260162, "learning_rate": 5e-05, "loss": 1.5415, "step": 3249 }, { "epoch": 0.8887065901011758, "grad_norm": 0.16399511694908142, "learning_rate": 5e-05, "loss": 1.5805, "step": 3250 }, { "epoch": 0.8889800382827454, "grad_norm": 0.1606212705373764, "learning_rate": 5e-05, "loss": 1.5433, "step": 3251 }, { "epoch": 0.889253486464315, "grad_norm": 0.16091781854629517, "learning_rate": 5e-05, "loss": 1.5801, "step": 3252 }, { "epoch": 0.8895269346458846, "grad_norm": 0.17079661786556244, "learning_rate": 5e-05, "loss": 1.6032, "step": 3253 }, { "epoch": 0.8898003828274542, "grad_norm": 0.16236819326877594, "learning_rate": 5e-05, "loss": 1.5309, "step": 3254 }, { "epoch": 0.8900738310090238, "grad_norm": 0.16366977989673615, "learning_rate": 5e-05, "loss": 1.4854, "step": 3255 }, { "epoch": 0.8903472791905934, "grad_norm": 0.1692305952310562, "learning_rate": 5e-05, "loss": 1.5546, "step": 3256 }, { "epoch": 0.890620727372163, "grad_norm": 0.1591336876153946, "learning_rate": 5e-05, "loss": 1.5724, "step": 3257 }, { "epoch": 0.8908941755537325, "grad_norm": 0.1661178022623062, "learning_rate": 5e-05, "loss": 1.6642, "step": 3258 }, { "epoch": 0.8911676237353021, "grad_norm": 0.1741829365491867, "learning_rate": 5e-05, "loss": 1.5808, "step": 3259 }, { "epoch": 0.8914410719168717, "grad_norm": 0.16046129167079926, "learning_rate": 5e-05, "loss": 1.6136, "step": 3260 }, { "epoch": 0.8917145200984413, "grad_norm": 0.16220787167549133, "learning_rate": 5e-05, "loss": 1.5145, "step": 3261 }, { "epoch": 0.8919879682800109, "grad_norm": 0.17300353944301605, "learning_rate": 5e-05, "loss": 1.5805, "step": 3262 }, { "epoch": 0.8922614164615805, "grad_norm": 0.1636408567428589, "learning_rate": 5e-05, "loss": 1.5679, "step": 3263 }, { "epoch": 0.8925348646431501, "grad_norm": 0.1789669394493103, "learning_rate": 5e-05, "loss": 1.7185, "step": 3264 }, { "epoch": 0.8928083128247197, "grad_norm": 0.161798357963562, "learning_rate": 5e-05, "loss": 1.4734, "step": 3265 }, { "epoch": 0.8930817610062893, "grad_norm": 0.1663116067647934, "learning_rate": 5e-05, "loss": 1.559, "step": 3266 }, { "epoch": 0.8933552091878589, "grad_norm": 0.16649827361106873, "learning_rate": 5e-05, "loss": 1.5498, "step": 3267 }, { "epoch": 0.8936286573694285, "grad_norm": 0.1723203808069229, "learning_rate": 5e-05, "loss": 1.623, "step": 3268 }, { "epoch": 0.8939021055509981, "grad_norm": 0.16643695533275604, "learning_rate": 5e-05, "loss": 1.6505, "step": 3269 }, { "epoch": 0.8941755537325676, "grad_norm": 0.16589291393756866, "learning_rate": 5e-05, "loss": 1.6633, "step": 3270 }, { "epoch": 0.8944490019141372, "grad_norm": 0.1624186635017395, "learning_rate": 5e-05, "loss": 1.5022, "step": 3271 }, { "epoch": 0.8947224500957068, "grad_norm": 0.1685740202665329, "learning_rate": 5e-05, "loss": 1.6225, "step": 3272 }, { "epoch": 0.8949958982772764, "grad_norm": 0.16466566920280457, "learning_rate": 5e-05, "loss": 1.6597, "step": 3273 }, { "epoch": 0.895269346458846, "grad_norm": 0.16794006526470184, "learning_rate": 5e-05, "loss": 1.5795, "step": 3274 }, { "epoch": 0.8955427946404156, "grad_norm": 0.1675807386636734, "learning_rate": 5e-05, "loss": 1.6751, "step": 3275 }, { "epoch": 0.8958162428219852, "grad_norm": 0.1637728214263916, "learning_rate": 5e-05, "loss": 1.5746, "step": 3276 }, { "epoch": 0.8960896910035548, "grad_norm": 0.16383953392505646, "learning_rate": 5e-05, "loss": 1.5741, "step": 3277 }, { "epoch": 0.8963631391851244, "grad_norm": 0.16199614107608795, "learning_rate": 5e-05, "loss": 1.5629, "step": 3278 }, { "epoch": 0.896636587366694, "grad_norm": 0.17193222045898438, "learning_rate": 5e-05, "loss": 1.6165, "step": 3279 }, { "epoch": 0.8969100355482637, "grad_norm": 0.16081291437149048, "learning_rate": 5e-05, "loss": 1.5044, "step": 3280 }, { "epoch": 0.8971834837298331, "grad_norm": 0.16551733016967773, "learning_rate": 5e-05, "loss": 1.6052, "step": 3281 }, { "epoch": 0.8974569319114027, "grad_norm": 0.1634800136089325, "learning_rate": 5e-05, "loss": 1.5435, "step": 3282 }, { "epoch": 0.8977303800929723, "grad_norm": 0.17315998673439026, "learning_rate": 5e-05, "loss": 1.6515, "step": 3283 }, { "epoch": 0.898003828274542, "grad_norm": 0.17361579835414886, "learning_rate": 5e-05, "loss": 1.6037, "step": 3284 }, { "epoch": 0.8982772764561116, "grad_norm": 0.17537641525268555, "learning_rate": 5e-05, "loss": 1.6171, "step": 3285 }, { "epoch": 0.8985507246376812, "grad_norm": 0.17007243633270264, "learning_rate": 5e-05, "loss": 1.6756, "step": 3286 }, { "epoch": 0.8988241728192508, "grad_norm": 0.1736474186182022, "learning_rate": 5e-05, "loss": 1.5852, "step": 3287 }, { "epoch": 0.8990976210008204, "grad_norm": 0.1645202934741974, "learning_rate": 5e-05, "loss": 1.6534, "step": 3288 }, { "epoch": 0.89937106918239, "grad_norm": 0.16424302756786346, "learning_rate": 5e-05, "loss": 1.5194, "step": 3289 }, { "epoch": 0.8996445173639596, "grad_norm": 0.17903803288936615, "learning_rate": 5e-05, "loss": 1.5683, "step": 3290 }, { "epoch": 0.8999179655455292, "grad_norm": 0.16927549242973328, "learning_rate": 5e-05, "loss": 1.6283, "step": 3291 }, { "epoch": 0.9001914137270988, "grad_norm": 0.16738973557949066, "learning_rate": 5e-05, "loss": 1.6196, "step": 3292 }, { "epoch": 0.9004648619086683, "grad_norm": 0.16482320427894592, "learning_rate": 5e-05, "loss": 1.5135, "step": 3293 }, { "epoch": 0.9007383100902379, "grad_norm": 0.1613280475139618, "learning_rate": 5e-05, "loss": 1.6036, "step": 3294 }, { "epoch": 0.9010117582718075, "grad_norm": 0.16160857677459717, "learning_rate": 5e-05, "loss": 1.5348, "step": 3295 }, { "epoch": 0.9012852064533771, "grad_norm": 0.1576218158006668, "learning_rate": 5e-05, "loss": 1.5171, "step": 3296 }, { "epoch": 0.9015586546349467, "grad_norm": 0.1682513952255249, "learning_rate": 5e-05, "loss": 1.5429, "step": 3297 }, { "epoch": 0.9018321028165163, "grad_norm": 0.16176675260066986, "learning_rate": 5e-05, "loss": 1.5041, "step": 3298 }, { "epoch": 0.9021055509980859, "grad_norm": 0.16141022741794586, "learning_rate": 5e-05, "loss": 1.5655, "step": 3299 }, { "epoch": 0.9023789991796555, "grad_norm": 0.16335152089595795, "learning_rate": 5e-05, "loss": 1.6064, "step": 3300 }, { "epoch": 0.9026524473612251, "grad_norm": 0.16691720485687256, "learning_rate": 5e-05, "loss": 1.6316, "step": 3301 }, { "epoch": 0.9029258955427947, "grad_norm": 0.16636385023593903, "learning_rate": 5e-05, "loss": 1.59, "step": 3302 }, { "epoch": 0.9031993437243643, "grad_norm": 0.1710832118988037, "learning_rate": 5e-05, "loss": 1.5199, "step": 3303 }, { "epoch": 0.9034727919059339, "grad_norm": 0.169814333319664, "learning_rate": 5e-05, "loss": 1.5754, "step": 3304 }, { "epoch": 0.9037462400875034, "grad_norm": 0.1706944704055786, "learning_rate": 5e-05, "loss": 1.6101, "step": 3305 }, { "epoch": 0.904019688269073, "grad_norm": 0.15784287452697754, "learning_rate": 5e-05, "loss": 1.5232, "step": 3306 }, { "epoch": 0.9042931364506426, "grad_norm": 0.1703675389289856, "learning_rate": 5e-05, "loss": 1.6001, "step": 3307 }, { "epoch": 0.9045665846322122, "grad_norm": 0.16346101462841034, "learning_rate": 5e-05, "loss": 1.5885, "step": 3308 }, { "epoch": 0.9048400328137818, "grad_norm": 0.18370665609836578, "learning_rate": 5e-05, "loss": 1.6754, "step": 3309 }, { "epoch": 0.9051134809953514, "grad_norm": 0.16520695388317108, "learning_rate": 5e-05, "loss": 1.5961, "step": 3310 }, { "epoch": 0.905386929176921, "grad_norm": 0.17497234046459198, "learning_rate": 5e-05, "loss": 1.5912, "step": 3311 }, { "epoch": 0.9056603773584906, "grad_norm": 0.16902846097946167, "learning_rate": 5e-05, "loss": 1.6057, "step": 3312 }, { "epoch": 0.9059338255400602, "grad_norm": 0.16770271956920624, "learning_rate": 5e-05, "loss": 1.6274, "step": 3313 }, { "epoch": 0.9062072737216298, "grad_norm": 0.16241686046123505, "learning_rate": 5e-05, "loss": 1.5494, "step": 3314 }, { "epoch": 0.9064807219031994, "grad_norm": 0.16672220826148987, "learning_rate": 5e-05, "loss": 1.6654, "step": 3315 }, { "epoch": 0.9067541700847689, "grad_norm": 0.1626524180173874, "learning_rate": 5e-05, "loss": 1.5578, "step": 3316 }, { "epoch": 0.9070276182663385, "grad_norm": 0.16794568300247192, "learning_rate": 5e-05, "loss": 1.6463, "step": 3317 }, { "epoch": 0.9073010664479081, "grad_norm": 0.16510622203350067, "learning_rate": 5e-05, "loss": 1.597, "step": 3318 }, { "epoch": 0.9075745146294777, "grad_norm": 0.16373200714588165, "learning_rate": 5e-05, "loss": 1.6206, "step": 3319 }, { "epoch": 0.9078479628110473, "grad_norm": 0.16739732027053833, "learning_rate": 5e-05, "loss": 1.6041, "step": 3320 }, { "epoch": 0.9081214109926169, "grad_norm": 0.17469698190689087, "learning_rate": 5e-05, "loss": 1.6364, "step": 3321 }, { "epoch": 0.9083948591741865, "grad_norm": 0.1681319773197174, "learning_rate": 5e-05, "loss": 1.5485, "step": 3322 }, { "epoch": 0.9086683073557561, "grad_norm": 0.1650519222021103, "learning_rate": 5e-05, "loss": 1.5211, "step": 3323 }, { "epoch": 0.9089417555373257, "grad_norm": 0.167753666639328, "learning_rate": 5e-05, "loss": 1.6338, "step": 3324 }, { "epoch": 0.9092152037188953, "grad_norm": 0.164119154214859, "learning_rate": 5e-05, "loss": 1.5542, "step": 3325 }, { "epoch": 0.9094886519004649, "grad_norm": 0.16457271575927734, "learning_rate": 5e-05, "loss": 1.6337, "step": 3326 }, { "epoch": 0.9097621000820345, "grad_norm": 0.17210492491722107, "learning_rate": 5e-05, "loss": 1.6361, "step": 3327 }, { "epoch": 0.910035548263604, "grad_norm": 0.17178331315517426, "learning_rate": 5e-05, "loss": 1.6849, "step": 3328 }, { "epoch": 0.9103089964451736, "grad_norm": 0.1700570285320282, "learning_rate": 5e-05, "loss": 1.6171, "step": 3329 }, { "epoch": 0.9105824446267432, "grad_norm": 0.159864142537117, "learning_rate": 5e-05, "loss": 1.5302, "step": 3330 }, { "epoch": 0.9108558928083128, "grad_norm": 0.16526418924331665, "learning_rate": 5e-05, "loss": 1.5488, "step": 3331 }, { "epoch": 0.9111293409898824, "grad_norm": 0.16879980266094208, "learning_rate": 5e-05, "loss": 1.6115, "step": 3332 }, { "epoch": 0.911402789171452, "grad_norm": 0.1696682572364807, "learning_rate": 5e-05, "loss": 1.6198, "step": 3333 }, { "epoch": 0.9116762373530216, "grad_norm": 0.1723857969045639, "learning_rate": 5e-05, "loss": 1.4767, "step": 3334 }, { "epoch": 0.9119496855345912, "grad_norm": 0.16016238927841187, "learning_rate": 5e-05, "loss": 1.5293, "step": 3335 }, { "epoch": 0.9122231337161608, "grad_norm": 0.1660720705986023, "learning_rate": 5e-05, "loss": 1.5294, "step": 3336 }, { "epoch": 0.9124965818977304, "grad_norm": 0.16862016916275024, "learning_rate": 5e-05, "loss": 1.5987, "step": 3337 }, { "epoch": 0.9127700300793, "grad_norm": 0.17382635176181793, "learning_rate": 5e-05, "loss": 1.6834, "step": 3338 }, { "epoch": 0.9130434782608695, "grad_norm": 0.18816977739334106, "learning_rate": 5e-05, "loss": 1.5269, "step": 3339 }, { "epoch": 0.9133169264424391, "grad_norm": 0.17092393338680267, "learning_rate": 5e-05, "loss": 1.5092, "step": 3340 }, { "epoch": 0.9135903746240087, "grad_norm": 0.1623528152704239, "learning_rate": 5e-05, "loss": 1.4633, "step": 3341 }, { "epoch": 0.9138638228055783, "grad_norm": 0.16653479635715485, "learning_rate": 5e-05, "loss": 1.5334, "step": 3342 }, { "epoch": 0.9141372709871479, "grad_norm": 0.1750710904598236, "learning_rate": 5e-05, "loss": 1.6056, "step": 3343 }, { "epoch": 0.9144107191687175, "grad_norm": 0.1836736500263214, "learning_rate": 5e-05, "loss": 1.6098, "step": 3344 }, { "epoch": 0.9146841673502871, "grad_norm": 0.1607818603515625, "learning_rate": 5e-05, "loss": 1.5236, "step": 3345 }, { "epoch": 0.9149576155318567, "grad_norm": 0.1604374796152115, "learning_rate": 5e-05, "loss": 1.4829, "step": 3346 }, { "epoch": 0.9152310637134263, "grad_norm": 0.16865043342113495, "learning_rate": 5e-05, "loss": 1.6105, "step": 3347 }, { "epoch": 0.9155045118949959, "grad_norm": 0.17235668003559113, "learning_rate": 5e-05, "loss": 1.6007, "step": 3348 }, { "epoch": 0.9157779600765655, "grad_norm": 0.16054973006248474, "learning_rate": 5e-05, "loss": 1.5466, "step": 3349 }, { "epoch": 0.9160514082581351, "grad_norm": 0.1764586865901947, "learning_rate": 5e-05, "loss": 1.6551, "step": 3350 }, { "epoch": 0.9163248564397046, "grad_norm": 0.15826088190078735, "learning_rate": 5e-05, "loss": 1.538, "step": 3351 }, { "epoch": 0.9165983046212742, "grad_norm": 0.16789792478084564, "learning_rate": 5e-05, "loss": 1.5845, "step": 3352 }, { "epoch": 0.9168717528028438, "grad_norm": 0.1768852025270462, "learning_rate": 5e-05, "loss": 1.5847, "step": 3353 }, { "epoch": 0.9171452009844134, "grad_norm": 0.16304756700992584, "learning_rate": 5e-05, "loss": 1.5508, "step": 3354 }, { "epoch": 0.917418649165983, "grad_norm": 0.16250407695770264, "learning_rate": 5e-05, "loss": 1.5004, "step": 3355 }, { "epoch": 0.9176920973475526, "grad_norm": 0.18279612064361572, "learning_rate": 5e-05, "loss": 1.6446, "step": 3356 }, { "epoch": 0.9179655455291222, "grad_norm": 0.16867072880268097, "learning_rate": 5e-05, "loss": 1.5235, "step": 3357 }, { "epoch": 0.9182389937106918, "grad_norm": 0.16388383507728577, "learning_rate": 5e-05, "loss": 1.5635, "step": 3358 }, { "epoch": 0.9185124418922614, "grad_norm": 0.16723370552062988, "learning_rate": 5e-05, "loss": 1.6193, "step": 3359 }, { "epoch": 0.918785890073831, "grad_norm": 0.1766999363899231, "learning_rate": 5e-05, "loss": 1.6716, "step": 3360 }, { "epoch": 0.9190593382554006, "grad_norm": 0.16779550909996033, "learning_rate": 5e-05, "loss": 1.5566, "step": 3361 }, { "epoch": 0.9193327864369701, "grad_norm": 0.16861297190189362, "learning_rate": 5e-05, "loss": 1.6456, "step": 3362 }, { "epoch": 0.9196062346185397, "grad_norm": 0.18011167645454407, "learning_rate": 5e-05, "loss": 1.6536, "step": 3363 }, { "epoch": 0.9198796828001093, "grad_norm": 0.1704559475183487, "learning_rate": 5e-05, "loss": 1.6436, "step": 3364 }, { "epoch": 0.920153130981679, "grad_norm": 0.16970516741275787, "learning_rate": 5e-05, "loss": 1.5132, "step": 3365 }, { "epoch": 0.9204265791632485, "grad_norm": 0.1771494746208191, "learning_rate": 5e-05, "loss": 1.569, "step": 3366 }, { "epoch": 0.9207000273448182, "grad_norm": 0.17308154702186584, "learning_rate": 5e-05, "loss": 1.6102, "step": 3367 }, { "epoch": 0.9209734755263878, "grad_norm": 0.1670798808336258, "learning_rate": 5e-05, "loss": 1.5677, "step": 3368 }, { "epoch": 0.9212469237079574, "grad_norm": 0.1702473908662796, "learning_rate": 5e-05, "loss": 1.56, "step": 3369 }, { "epoch": 0.921520371889527, "grad_norm": 0.1635974645614624, "learning_rate": 5e-05, "loss": 1.5166, "step": 3370 }, { "epoch": 0.9217938200710966, "grad_norm": 0.16810756921768188, "learning_rate": 5e-05, "loss": 1.5151, "step": 3371 }, { "epoch": 0.9220672682526662, "grad_norm": 0.15942378342151642, "learning_rate": 5e-05, "loss": 1.5737, "step": 3372 }, { "epoch": 0.9223407164342358, "grad_norm": 0.17249250411987305, "learning_rate": 5e-05, "loss": 1.5527, "step": 3373 }, { "epoch": 0.9226141646158053, "grad_norm": 0.16102191805839539, "learning_rate": 5e-05, "loss": 1.5104, "step": 3374 }, { "epoch": 0.9228876127973749, "grad_norm": 0.17065097391605377, "learning_rate": 5e-05, "loss": 1.5749, "step": 3375 }, { "epoch": 0.9231610609789445, "grad_norm": 0.17183524370193481, "learning_rate": 5e-05, "loss": 1.5149, "step": 3376 }, { "epoch": 0.9234345091605141, "grad_norm": 0.17372307181358337, "learning_rate": 5e-05, "loss": 1.6725, "step": 3377 }, { "epoch": 0.9237079573420837, "grad_norm": 0.17314964532852173, "learning_rate": 5e-05, "loss": 1.5872, "step": 3378 }, { "epoch": 0.9239814055236533, "grad_norm": 0.17013640701770782, "learning_rate": 5e-05, "loss": 1.6004, "step": 3379 }, { "epoch": 0.9242548537052229, "grad_norm": 0.16398966312408447, "learning_rate": 5e-05, "loss": 1.5362, "step": 3380 }, { "epoch": 0.9245283018867925, "grad_norm": 0.16314879059791565, "learning_rate": 5e-05, "loss": 1.473, "step": 3381 }, { "epoch": 0.9248017500683621, "grad_norm": 0.164910227060318, "learning_rate": 5e-05, "loss": 1.5161, "step": 3382 }, { "epoch": 0.9250751982499317, "grad_norm": 0.16402222216129303, "learning_rate": 5e-05, "loss": 1.5689, "step": 3383 }, { "epoch": 0.9253486464315013, "grad_norm": 0.1658565104007721, "learning_rate": 5e-05, "loss": 1.5275, "step": 3384 }, { "epoch": 0.9256220946130709, "grad_norm": 0.1655322015285492, "learning_rate": 5e-05, "loss": 1.5571, "step": 3385 }, { "epoch": 0.9258955427946404, "grad_norm": 0.16581609845161438, "learning_rate": 5e-05, "loss": 1.6255, "step": 3386 }, { "epoch": 0.92616899097621, "grad_norm": 0.1806444674730301, "learning_rate": 5e-05, "loss": 1.6025, "step": 3387 }, { "epoch": 0.9264424391577796, "grad_norm": 0.1640556901693344, "learning_rate": 5e-05, "loss": 1.5615, "step": 3388 }, { "epoch": 0.9267158873393492, "grad_norm": 0.15630288422107697, "learning_rate": 5e-05, "loss": 1.5421, "step": 3389 }, { "epoch": 0.9269893355209188, "grad_norm": 0.16430017352104187, "learning_rate": 5e-05, "loss": 1.594, "step": 3390 }, { "epoch": 0.9272627837024884, "grad_norm": 0.17217886447906494, "learning_rate": 5e-05, "loss": 1.5472, "step": 3391 }, { "epoch": 0.927536231884058, "grad_norm": 0.17046159505844116, "learning_rate": 5e-05, "loss": 1.5272, "step": 3392 }, { "epoch": 0.9278096800656276, "grad_norm": 0.16787946224212646, "learning_rate": 5e-05, "loss": 1.5965, "step": 3393 }, { "epoch": 0.9280831282471972, "grad_norm": 0.1614540070295334, "learning_rate": 5e-05, "loss": 1.4654, "step": 3394 }, { "epoch": 0.9283565764287668, "grad_norm": 0.16552212834358215, "learning_rate": 5e-05, "loss": 1.5901, "step": 3395 }, { "epoch": 0.9286300246103364, "grad_norm": 0.1740752011537552, "learning_rate": 5e-05, "loss": 1.6107, "step": 3396 }, { "epoch": 0.9289034727919059, "grad_norm": 0.16223378479480743, "learning_rate": 5e-05, "loss": 1.5006, "step": 3397 }, { "epoch": 0.9291769209734755, "grad_norm": 0.18281352519989014, "learning_rate": 5e-05, "loss": 1.6221, "step": 3398 }, { "epoch": 0.9294503691550451, "grad_norm": 0.17504733800888062, "learning_rate": 5e-05, "loss": 1.5975, "step": 3399 }, { "epoch": 0.9297238173366147, "grad_norm": 0.1651788204908371, "learning_rate": 5e-05, "loss": 1.5912, "step": 3400 }, { "epoch": 0.9299972655181843, "grad_norm": 0.172093003988266, "learning_rate": 5e-05, "loss": 1.6516, "step": 3401 }, { "epoch": 0.9302707136997539, "grad_norm": 0.17026016116142273, "learning_rate": 5e-05, "loss": 1.5742, "step": 3402 }, { "epoch": 0.9305441618813235, "grad_norm": 0.17041485011577606, "learning_rate": 5e-05, "loss": 1.6175, "step": 3403 }, { "epoch": 0.9308176100628931, "grad_norm": 0.17498993873596191, "learning_rate": 5e-05, "loss": 1.554, "step": 3404 }, { "epoch": 0.9310910582444627, "grad_norm": 0.1785575896501541, "learning_rate": 5e-05, "loss": 1.5705, "step": 3405 }, { "epoch": 0.9313645064260323, "grad_norm": 0.17882578074932098, "learning_rate": 5e-05, "loss": 1.5676, "step": 3406 }, { "epoch": 0.9316379546076019, "grad_norm": 0.16166472434997559, "learning_rate": 5e-05, "loss": 1.5654, "step": 3407 }, { "epoch": 0.9319114027891715, "grad_norm": 0.1717950403690338, "learning_rate": 5e-05, "loss": 1.5881, "step": 3408 }, { "epoch": 0.932184850970741, "grad_norm": 0.17605219781398773, "learning_rate": 5e-05, "loss": 1.5802, "step": 3409 }, { "epoch": 0.9324582991523106, "grad_norm": 0.16096574068069458, "learning_rate": 5e-05, "loss": 1.51, "step": 3410 }, { "epoch": 0.9327317473338802, "grad_norm": 0.16242116689682007, "learning_rate": 5e-05, "loss": 1.5665, "step": 3411 }, { "epoch": 0.9330051955154498, "grad_norm": 0.17469222843647003, "learning_rate": 5e-05, "loss": 1.601, "step": 3412 }, { "epoch": 0.9332786436970194, "grad_norm": 0.16520720720291138, "learning_rate": 5e-05, "loss": 1.5964, "step": 3413 }, { "epoch": 0.933552091878589, "grad_norm": 0.17878501117229462, "learning_rate": 5e-05, "loss": 1.7176, "step": 3414 }, { "epoch": 0.9338255400601586, "grad_norm": 0.16792459785938263, "learning_rate": 5e-05, "loss": 1.5655, "step": 3415 }, { "epoch": 0.9340989882417282, "grad_norm": 0.1701999306678772, "learning_rate": 5e-05, "loss": 1.5766, "step": 3416 }, { "epoch": 0.9343724364232978, "grad_norm": 0.17062143981456757, "learning_rate": 5e-05, "loss": 1.6155, "step": 3417 }, { "epoch": 0.9346458846048674, "grad_norm": 0.17067895829677582, "learning_rate": 5e-05, "loss": 1.5856, "step": 3418 }, { "epoch": 0.934919332786437, "grad_norm": 0.17343777418136597, "learning_rate": 5e-05, "loss": 1.5841, "step": 3419 }, { "epoch": 0.9351927809680065, "grad_norm": 0.16160957515239716, "learning_rate": 5e-05, "loss": 1.5255, "step": 3420 }, { "epoch": 0.9354662291495761, "grad_norm": 0.15786494314670563, "learning_rate": 5e-05, "loss": 1.507, "step": 3421 }, { "epoch": 0.9357396773311457, "grad_norm": 0.18263064324855804, "learning_rate": 5e-05, "loss": 1.7325, "step": 3422 }, { "epoch": 0.9360131255127153, "grad_norm": 0.16316089034080505, "learning_rate": 5e-05, "loss": 1.421, "step": 3423 }, { "epoch": 0.9362865736942849, "grad_norm": 0.16691453754901886, "learning_rate": 5e-05, "loss": 1.6236, "step": 3424 }, { "epoch": 0.9365600218758545, "grad_norm": 0.16807349026203156, "learning_rate": 5e-05, "loss": 1.5508, "step": 3425 }, { "epoch": 0.9368334700574241, "grad_norm": 0.16728545725345612, "learning_rate": 5e-05, "loss": 1.4871, "step": 3426 }, { "epoch": 0.9371069182389937, "grad_norm": 0.1841699630022049, "learning_rate": 5e-05, "loss": 1.5571, "step": 3427 }, { "epoch": 0.9373803664205633, "grad_norm": 0.17346476018428802, "learning_rate": 5e-05, "loss": 1.5599, "step": 3428 }, { "epoch": 0.9376538146021329, "grad_norm": 0.17243662476539612, "learning_rate": 5e-05, "loss": 1.6099, "step": 3429 }, { "epoch": 0.9379272627837025, "grad_norm": 0.16930198669433594, "learning_rate": 5e-05, "loss": 1.6239, "step": 3430 }, { "epoch": 0.9382007109652721, "grad_norm": 0.17017267644405365, "learning_rate": 5e-05, "loss": 1.6719, "step": 3431 }, { "epoch": 0.9384741591468416, "grad_norm": 0.16782057285308838, "learning_rate": 5e-05, "loss": 1.6005, "step": 3432 }, { "epoch": 0.9387476073284112, "grad_norm": 0.16345928609371185, "learning_rate": 5e-05, "loss": 1.6027, "step": 3433 }, { "epoch": 0.9390210555099808, "grad_norm": 0.16896604001522064, "learning_rate": 5e-05, "loss": 1.6702, "step": 3434 }, { "epoch": 0.9392945036915504, "grad_norm": 0.16361366212368011, "learning_rate": 5e-05, "loss": 1.6016, "step": 3435 }, { "epoch": 0.93956795187312, "grad_norm": 0.1642320603132248, "learning_rate": 5e-05, "loss": 1.5547, "step": 3436 }, { "epoch": 0.9398414000546896, "grad_norm": 0.1652129888534546, "learning_rate": 5e-05, "loss": 1.5415, "step": 3437 }, { "epoch": 0.9401148482362592, "grad_norm": 0.16417425870895386, "learning_rate": 5e-05, "loss": 1.5715, "step": 3438 }, { "epoch": 0.9403882964178288, "grad_norm": 0.16755367815494537, "learning_rate": 5e-05, "loss": 1.4629, "step": 3439 }, { "epoch": 0.9406617445993984, "grad_norm": 0.1799638420343399, "learning_rate": 5e-05, "loss": 1.5797, "step": 3440 }, { "epoch": 0.940935192780968, "grad_norm": 0.17268924415111542, "learning_rate": 5e-05, "loss": 1.4751, "step": 3441 }, { "epoch": 0.9412086409625376, "grad_norm": 0.16716521978378296, "learning_rate": 5e-05, "loss": 1.5751, "step": 3442 }, { "epoch": 0.9414820891441071, "grad_norm": 0.17508552968502045, "learning_rate": 5e-05, "loss": 1.5959, "step": 3443 }, { "epoch": 0.9417555373256767, "grad_norm": 0.1688762605190277, "learning_rate": 5e-05, "loss": 1.6451, "step": 3444 }, { "epoch": 0.9420289855072463, "grad_norm": 0.17425693571567535, "learning_rate": 5e-05, "loss": 1.6965, "step": 3445 }, { "epoch": 0.9423024336888159, "grad_norm": 0.16022168099880219, "learning_rate": 5e-05, "loss": 1.5167, "step": 3446 }, { "epoch": 0.9425758818703855, "grad_norm": 0.16879145801067352, "learning_rate": 5e-05, "loss": 1.5844, "step": 3447 }, { "epoch": 0.9428493300519551, "grad_norm": 0.17180216312408447, "learning_rate": 5e-05, "loss": 1.5252, "step": 3448 }, { "epoch": 0.9431227782335248, "grad_norm": 0.17670823633670807, "learning_rate": 5e-05, "loss": 1.5545, "step": 3449 }, { "epoch": 0.9433962264150944, "grad_norm": 0.1751013547182083, "learning_rate": 5e-05, "loss": 1.6204, "step": 3450 }, { "epoch": 0.943669674596664, "grad_norm": 0.16705964505672455, "learning_rate": 5e-05, "loss": 1.5458, "step": 3451 }, { "epoch": 0.9439431227782336, "grad_norm": 0.17305395007133484, "learning_rate": 5e-05, "loss": 1.5924, "step": 3452 }, { "epoch": 0.9442165709598032, "grad_norm": 0.17383334040641785, "learning_rate": 5e-05, "loss": 1.6049, "step": 3453 }, { "epoch": 0.9444900191413728, "grad_norm": 0.16880303621292114, "learning_rate": 5e-05, "loss": 1.5726, "step": 3454 }, { "epoch": 0.9447634673229423, "grad_norm": 0.17519088089466095, "learning_rate": 5e-05, "loss": 1.5866, "step": 3455 }, { "epoch": 0.9450369155045119, "grad_norm": 0.1687891036272049, "learning_rate": 5e-05, "loss": 1.5832, "step": 3456 }, { "epoch": 0.9453103636860815, "grad_norm": 0.16175159811973572, "learning_rate": 5e-05, "loss": 1.6019, "step": 3457 }, { "epoch": 0.9455838118676511, "grad_norm": 0.17194457352161407, "learning_rate": 5e-05, "loss": 1.6486, "step": 3458 }, { "epoch": 0.9458572600492207, "grad_norm": 0.17335595190525055, "learning_rate": 5e-05, "loss": 1.6298, "step": 3459 }, { "epoch": 0.9461307082307903, "grad_norm": 0.17388460040092468, "learning_rate": 5e-05, "loss": 1.6029, "step": 3460 }, { "epoch": 0.9464041564123599, "grad_norm": 0.15883868932724, "learning_rate": 5e-05, "loss": 1.5193, "step": 3461 }, { "epoch": 0.9466776045939295, "grad_norm": 0.16564515233039856, "learning_rate": 5e-05, "loss": 1.5726, "step": 3462 }, { "epoch": 0.9469510527754991, "grad_norm": 0.1713998019695282, "learning_rate": 5e-05, "loss": 1.619, "step": 3463 }, { "epoch": 0.9472245009570687, "grad_norm": 0.16872616112232208, "learning_rate": 5e-05, "loss": 1.6437, "step": 3464 }, { "epoch": 0.9474979491386383, "grad_norm": 0.17497317492961884, "learning_rate": 5e-05, "loss": 1.533, "step": 3465 }, { "epoch": 0.9477713973202079, "grad_norm": 0.15996572375297546, "learning_rate": 5e-05, "loss": 1.4727, "step": 3466 }, { "epoch": 0.9480448455017774, "grad_norm": 0.1631600260734558, "learning_rate": 5e-05, "loss": 1.5921, "step": 3467 }, { "epoch": 0.948318293683347, "grad_norm": 0.1694975644350052, "learning_rate": 5e-05, "loss": 1.6251, "step": 3468 }, { "epoch": 0.9485917418649166, "grad_norm": 0.1703629493713379, "learning_rate": 5e-05, "loss": 1.6061, "step": 3469 }, { "epoch": 0.9488651900464862, "grad_norm": 0.16327685117721558, "learning_rate": 5e-05, "loss": 1.5616, "step": 3470 }, { "epoch": 0.9491386382280558, "grad_norm": 0.16655370593070984, "learning_rate": 5e-05, "loss": 1.6223, "step": 3471 }, { "epoch": 0.9494120864096254, "grad_norm": 0.18266244232654572, "learning_rate": 5e-05, "loss": 1.5366, "step": 3472 }, { "epoch": 0.949685534591195, "grad_norm": 0.1686059832572937, "learning_rate": 5e-05, "loss": 1.6063, "step": 3473 }, { "epoch": 0.9499589827727646, "grad_norm": 0.17475081980228424, "learning_rate": 5e-05, "loss": 1.5283, "step": 3474 }, { "epoch": 0.9502324309543342, "grad_norm": 0.17627473175525665, "learning_rate": 5e-05, "loss": 1.5529, "step": 3475 }, { "epoch": 0.9505058791359038, "grad_norm": 0.1728144884109497, "learning_rate": 5e-05, "loss": 1.4832, "step": 3476 }, { "epoch": 0.9507793273174734, "grad_norm": 0.19535605609416962, "learning_rate": 5e-05, "loss": 1.5747, "step": 3477 }, { "epoch": 0.9510527754990429, "grad_norm": 0.2011018991470337, "learning_rate": 5e-05, "loss": 1.5601, "step": 3478 }, { "epoch": 0.9513262236806125, "grad_norm": 0.17405074834823608, "learning_rate": 5e-05, "loss": 1.6053, "step": 3479 }, { "epoch": 0.9515996718621821, "grad_norm": 0.20217041671276093, "learning_rate": 5e-05, "loss": 1.607, "step": 3480 }, { "epoch": 0.9518731200437517, "grad_norm": 0.17352132499217987, "learning_rate": 5e-05, "loss": 1.5501, "step": 3481 }, { "epoch": 0.9521465682253213, "grad_norm": 0.17558707296848297, "learning_rate": 5e-05, "loss": 1.6479, "step": 3482 }, { "epoch": 0.9524200164068909, "grad_norm": 0.17117784917354584, "learning_rate": 5e-05, "loss": 1.5656, "step": 3483 }, { "epoch": 0.9526934645884605, "grad_norm": 0.16838552057743073, "learning_rate": 5e-05, "loss": 1.5381, "step": 3484 }, { "epoch": 0.9529669127700301, "grad_norm": 0.17733819782733917, "learning_rate": 5e-05, "loss": 1.6453, "step": 3485 }, { "epoch": 0.9532403609515997, "grad_norm": 0.1712433099746704, "learning_rate": 5e-05, "loss": 1.5944, "step": 3486 }, { "epoch": 0.9535138091331693, "grad_norm": 0.19022230803966522, "learning_rate": 5e-05, "loss": 1.6319, "step": 3487 }, { "epoch": 0.9537872573147389, "grad_norm": 0.17896117269992828, "learning_rate": 5e-05, "loss": 1.6098, "step": 3488 }, { "epoch": 0.9540607054963085, "grad_norm": 0.17199327051639557, "learning_rate": 5e-05, "loss": 1.589, "step": 3489 }, { "epoch": 0.954334153677878, "grad_norm": 0.19092747569084167, "learning_rate": 5e-05, "loss": 1.522, "step": 3490 }, { "epoch": 0.9546076018594476, "grad_norm": 0.17024657130241394, "learning_rate": 5e-05, "loss": 1.6758, "step": 3491 }, { "epoch": 0.9548810500410172, "grad_norm": 0.17147701978683472, "learning_rate": 5e-05, "loss": 1.5168, "step": 3492 }, { "epoch": 0.9551544982225868, "grad_norm": 0.18577666580677032, "learning_rate": 5e-05, "loss": 1.5943, "step": 3493 }, { "epoch": 0.9554279464041564, "grad_norm": 0.16658198833465576, "learning_rate": 5e-05, "loss": 1.6217, "step": 3494 }, { "epoch": 0.955701394585726, "grad_norm": 0.17051103711128235, "learning_rate": 5e-05, "loss": 1.521, "step": 3495 }, { "epoch": 0.9559748427672956, "grad_norm": 0.1750299036502838, "learning_rate": 5e-05, "loss": 1.541, "step": 3496 }, { "epoch": 0.9562482909488652, "grad_norm": 0.185097336769104, "learning_rate": 5e-05, "loss": 1.664, "step": 3497 }, { "epoch": 0.9565217391304348, "grad_norm": 0.17204828560352325, "learning_rate": 5e-05, "loss": 1.6225, "step": 3498 }, { "epoch": 0.9567951873120044, "grad_norm": 0.17451748251914978, "learning_rate": 5e-05, "loss": 1.5786, "step": 3499 }, { "epoch": 0.957068635493574, "grad_norm": 0.20204168558120728, "learning_rate": 5e-05, "loss": 1.7499, "step": 3500 }, { "epoch": 0.9573420836751435, "grad_norm": 0.17379069328308105, "learning_rate": 5e-05, "loss": 1.585, "step": 3501 }, { "epoch": 0.9576155318567131, "grad_norm": 0.1748073250055313, "learning_rate": 5e-05, "loss": 1.5567, "step": 3502 }, { "epoch": 0.9578889800382827, "grad_norm": 0.20730933547019958, "learning_rate": 5e-05, "loss": 1.7386, "step": 3503 }, { "epoch": 0.9581624282198523, "grad_norm": 0.1670350432395935, "learning_rate": 5e-05, "loss": 1.5471, "step": 3504 }, { "epoch": 0.9584358764014219, "grad_norm": 0.16766297817230225, "learning_rate": 5e-05, "loss": 1.4913, "step": 3505 }, { "epoch": 0.9587093245829915, "grad_norm": 0.18351885676383972, "learning_rate": 5e-05, "loss": 1.5318, "step": 3506 }, { "epoch": 0.9589827727645611, "grad_norm": 0.15731576085090637, "learning_rate": 5e-05, "loss": 1.4561, "step": 3507 }, { "epoch": 0.9592562209461307, "grad_norm": 0.18964198231697083, "learning_rate": 5e-05, "loss": 1.6603, "step": 3508 }, { "epoch": 0.9595296691277003, "grad_norm": 0.19028975069522858, "learning_rate": 5e-05, "loss": 1.6721, "step": 3509 }, { "epoch": 0.9598031173092699, "grad_norm": 0.16938328742980957, "learning_rate": 5e-05, "loss": 1.5652, "step": 3510 }, { "epoch": 0.9600765654908395, "grad_norm": 0.17705866694450378, "learning_rate": 5e-05, "loss": 1.5832, "step": 3511 }, { "epoch": 0.9603500136724091, "grad_norm": 0.1780325025320053, "learning_rate": 5e-05, "loss": 1.5572, "step": 3512 }, { "epoch": 0.9606234618539786, "grad_norm": 0.18230192363262177, "learning_rate": 5e-05, "loss": 1.6136, "step": 3513 }, { "epoch": 0.9608969100355482, "grad_norm": 0.18078622221946716, "learning_rate": 5e-05, "loss": 1.5788, "step": 3514 }, { "epoch": 0.9611703582171178, "grad_norm": 0.17788712680339813, "learning_rate": 5e-05, "loss": 1.4601, "step": 3515 }, { "epoch": 0.9614438063986874, "grad_norm": 0.17222706973552704, "learning_rate": 5e-05, "loss": 1.586, "step": 3516 }, { "epoch": 0.961717254580257, "grad_norm": 0.1748836636543274, "learning_rate": 5e-05, "loss": 1.6527, "step": 3517 }, { "epoch": 0.9619907027618266, "grad_norm": 0.18419338762760162, "learning_rate": 5e-05, "loss": 1.6447, "step": 3518 }, { "epoch": 0.9622641509433962, "grad_norm": 0.17671184241771698, "learning_rate": 5e-05, "loss": 1.6371, "step": 3519 }, { "epoch": 0.9625375991249658, "grad_norm": 0.1626741737127304, "learning_rate": 5e-05, "loss": 1.6034, "step": 3520 }, { "epoch": 0.9628110473065354, "grad_norm": 0.17750827968120575, "learning_rate": 5e-05, "loss": 1.5709, "step": 3521 }, { "epoch": 0.963084495488105, "grad_norm": 0.18092156946659088, "learning_rate": 5e-05, "loss": 1.5488, "step": 3522 }, { "epoch": 0.9633579436696746, "grad_norm": 0.20844420790672302, "learning_rate": 5e-05, "loss": 1.6209, "step": 3523 }, { "epoch": 0.9636313918512441, "grad_norm": 0.1704307496547699, "learning_rate": 5e-05, "loss": 1.6154, "step": 3524 }, { "epoch": 0.9639048400328137, "grad_norm": 0.17518380284309387, "learning_rate": 5e-05, "loss": 1.4978, "step": 3525 }, { "epoch": 0.9641782882143833, "grad_norm": 0.18488885462284088, "learning_rate": 5e-05, "loss": 1.5973, "step": 3526 }, { "epoch": 0.9644517363959529, "grad_norm": 0.1840328723192215, "learning_rate": 5e-05, "loss": 1.6942, "step": 3527 }, { "epoch": 0.9647251845775225, "grad_norm": 0.16851232945919037, "learning_rate": 5e-05, "loss": 1.5614, "step": 3528 }, { "epoch": 0.9649986327590921, "grad_norm": 0.1788729429244995, "learning_rate": 5e-05, "loss": 1.5971, "step": 3529 }, { "epoch": 0.9652720809406617, "grad_norm": 0.17521923780441284, "learning_rate": 5e-05, "loss": 1.5597, "step": 3530 }, { "epoch": 0.9655455291222313, "grad_norm": 0.16558045148849487, "learning_rate": 5e-05, "loss": 1.5802, "step": 3531 }, { "epoch": 0.965818977303801, "grad_norm": 0.16922442615032196, "learning_rate": 5e-05, "loss": 1.5096, "step": 3532 }, { "epoch": 0.9660924254853706, "grad_norm": 0.16690833866596222, "learning_rate": 5e-05, "loss": 1.5354, "step": 3533 }, { "epoch": 0.9663658736669402, "grad_norm": 0.17629151046276093, "learning_rate": 5e-05, "loss": 1.5951, "step": 3534 }, { "epoch": 0.9666393218485098, "grad_norm": 0.1715892106294632, "learning_rate": 5e-05, "loss": 1.6381, "step": 3535 }, { "epoch": 0.9669127700300792, "grad_norm": 0.17085449397563934, "learning_rate": 5e-05, "loss": 1.5276, "step": 3536 }, { "epoch": 0.9671862182116489, "grad_norm": 0.17155250906944275, "learning_rate": 5e-05, "loss": 1.5824, "step": 3537 }, { "epoch": 0.9674596663932185, "grad_norm": 0.16368617117404938, "learning_rate": 5e-05, "loss": 1.5595, "step": 3538 }, { "epoch": 0.9677331145747881, "grad_norm": 0.17515048384666443, "learning_rate": 5e-05, "loss": 1.5234, "step": 3539 }, { "epoch": 0.9680065627563577, "grad_norm": 0.17045453190803528, "learning_rate": 5e-05, "loss": 1.5847, "step": 3540 }, { "epoch": 0.9682800109379273, "grad_norm": 0.17592334747314453, "learning_rate": 5e-05, "loss": 1.681, "step": 3541 }, { "epoch": 0.9685534591194969, "grad_norm": 0.174262136220932, "learning_rate": 5e-05, "loss": 1.6834, "step": 3542 }, { "epoch": 0.9688269073010665, "grad_norm": 0.16702409088611603, "learning_rate": 5e-05, "loss": 1.5888, "step": 3543 }, { "epoch": 0.9691003554826361, "grad_norm": 0.16639576852321625, "learning_rate": 5e-05, "loss": 1.57, "step": 3544 }, { "epoch": 0.9693738036642057, "grad_norm": 0.16943658888339996, "learning_rate": 5e-05, "loss": 1.605, "step": 3545 }, { "epoch": 0.9696472518457753, "grad_norm": 0.16538041830062866, "learning_rate": 5e-05, "loss": 1.562, "step": 3546 }, { "epoch": 0.9699207000273449, "grad_norm": 0.16645492613315582, "learning_rate": 5e-05, "loss": 1.5321, "step": 3547 }, { "epoch": 0.9701941482089144, "grad_norm": 0.17388178408145905, "learning_rate": 5e-05, "loss": 1.576, "step": 3548 }, { "epoch": 0.970467596390484, "grad_norm": 0.17088045179843903, "learning_rate": 5e-05, "loss": 1.5195, "step": 3549 }, { "epoch": 0.9707410445720536, "grad_norm": 0.17386609315872192, "learning_rate": 5e-05, "loss": 1.5194, "step": 3550 }, { "epoch": 0.9710144927536232, "grad_norm": 0.1669374406337738, "learning_rate": 5e-05, "loss": 1.4916, "step": 3551 }, { "epoch": 0.9712879409351928, "grad_norm": 0.18806956708431244, "learning_rate": 5e-05, "loss": 1.4877, "step": 3552 }, { "epoch": 0.9715613891167624, "grad_norm": 0.1629197746515274, "learning_rate": 5e-05, "loss": 1.5365, "step": 3553 }, { "epoch": 0.971834837298332, "grad_norm": 0.16897207498550415, "learning_rate": 5e-05, "loss": 1.556, "step": 3554 }, { "epoch": 0.9721082854799016, "grad_norm": 0.16960465908050537, "learning_rate": 5e-05, "loss": 1.5432, "step": 3555 }, { "epoch": 0.9723817336614712, "grad_norm": 0.16694605350494385, "learning_rate": 5e-05, "loss": 1.6413, "step": 3556 }, { "epoch": 0.9726551818430408, "grad_norm": 0.162724107503891, "learning_rate": 5e-05, "loss": 1.491, "step": 3557 }, { "epoch": 0.9729286300246104, "grad_norm": 0.1615595519542694, "learning_rate": 5e-05, "loss": 1.5386, "step": 3558 }, { "epoch": 0.9732020782061799, "grad_norm": 0.15930253267288208, "learning_rate": 5e-05, "loss": 1.5991, "step": 3559 }, { "epoch": 0.9734755263877495, "grad_norm": 0.16046123206615448, "learning_rate": 5e-05, "loss": 1.5796, "step": 3560 }, { "epoch": 0.9737489745693191, "grad_norm": 0.16169172525405884, "learning_rate": 5e-05, "loss": 1.5435, "step": 3561 }, { "epoch": 0.9740224227508887, "grad_norm": 0.17044523358345032, "learning_rate": 5e-05, "loss": 1.5045, "step": 3562 }, { "epoch": 0.9742958709324583, "grad_norm": 0.16750258207321167, "learning_rate": 5e-05, "loss": 1.6184, "step": 3563 }, { "epoch": 0.9745693191140279, "grad_norm": 0.16423483192920685, "learning_rate": 5e-05, "loss": 1.5574, "step": 3564 }, { "epoch": 0.9748427672955975, "grad_norm": 0.16018086671829224, "learning_rate": 5e-05, "loss": 1.4814, "step": 3565 }, { "epoch": 0.9751162154771671, "grad_norm": 0.16692084074020386, "learning_rate": 5e-05, "loss": 1.565, "step": 3566 }, { "epoch": 0.9753896636587367, "grad_norm": 0.16623759269714355, "learning_rate": 5e-05, "loss": 1.5177, "step": 3567 }, { "epoch": 0.9756631118403063, "grad_norm": 0.1683354675769806, "learning_rate": 5e-05, "loss": 1.5655, "step": 3568 }, { "epoch": 0.9759365600218759, "grad_norm": 0.17353872954845428, "learning_rate": 5e-05, "loss": 1.5703, "step": 3569 }, { "epoch": 0.9762100082034455, "grad_norm": 0.1665719896554947, "learning_rate": 5e-05, "loss": 1.5936, "step": 3570 }, { "epoch": 0.976483456385015, "grad_norm": 0.17849120497703552, "learning_rate": 5e-05, "loss": 1.6144, "step": 3571 }, { "epoch": 0.9767569045665846, "grad_norm": 0.17340973019599915, "learning_rate": 5e-05, "loss": 1.6221, "step": 3572 }, { "epoch": 0.9770303527481542, "grad_norm": 0.17012323439121246, "learning_rate": 5e-05, "loss": 1.5603, "step": 3573 }, { "epoch": 0.9773038009297238, "grad_norm": 0.1630171239376068, "learning_rate": 5e-05, "loss": 1.4353, "step": 3574 }, { "epoch": 0.9775772491112934, "grad_norm": 0.16621048748493195, "learning_rate": 5e-05, "loss": 1.5738, "step": 3575 }, { "epoch": 0.977850697292863, "grad_norm": 0.16702908277511597, "learning_rate": 5e-05, "loss": 1.5689, "step": 3576 }, { "epoch": 0.9781241454744326, "grad_norm": 0.18123649060726166, "learning_rate": 5e-05, "loss": 1.687, "step": 3577 }, { "epoch": 0.9783975936560022, "grad_norm": 0.1809684783220291, "learning_rate": 5e-05, "loss": 1.5811, "step": 3578 }, { "epoch": 0.9786710418375718, "grad_norm": 0.18062925338745117, "learning_rate": 5e-05, "loss": 1.6317, "step": 3579 }, { "epoch": 0.9789444900191414, "grad_norm": 0.1767573207616806, "learning_rate": 5e-05, "loss": 1.6907, "step": 3580 }, { "epoch": 0.979217938200711, "grad_norm": 0.17363345623016357, "learning_rate": 5e-05, "loss": 1.6019, "step": 3581 }, { "epoch": 0.9794913863822805, "grad_norm": 0.173218235373497, "learning_rate": 5e-05, "loss": 1.5115, "step": 3582 }, { "epoch": 0.9797648345638501, "grad_norm": 0.16840875148773193, "learning_rate": 5e-05, "loss": 1.5806, "step": 3583 }, { "epoch": 0.9800382827454197, "grad_norm": 0.17666743695735931, "learning_rate": 5e-05, "loss": 1.6649, "step": 3584 }, { "epoch": 0.9803117309269893, "grad_norm": 0.1744329184293747, "learning_rate": 5e-05, "loss": 1.5703, "step": 3585 }, { "epoch": 0.9805851791085589, "grad_norm": 0.1757236123085022, "learning_rate": 5e-05, "loss": 1.6247, "step": 3586 }, { "epoch": 0.9808586272901285, "grad_norm": 0.17235401272773743, "learning_rate": 5e-05, "loss": 1.6508, "step": 3587 }, { "epoch": 0.9811320754716981, "grad_norm": 0.1712830811738968, "learning_rate": 5e-05, "loss": 1.6403, "step": 3588 }, { "epoch": 0.9814055236532677, "grad_norm": 0.16565677523612976, "learning_rate": 5e-05, "loss": 1.5304, "step": 3589 }, { "epoch": 0.9816789718348373, "grad_norm": 0.1652953326702118, "learning_rate": 5e-05, "loss": 1.553, "step": 3590 }, { "epoch": 0.9819524200164069, "grad_norm": 0.17286698520183563, "learning_rate": 5e-05, "loss": 1.5573, "step": 3591 }, { "epoch": 0.9822258681979765, "grad_norm": 0.17360328137874603, "learning_rate": 5e-05, "loss": 1.6319, "step": 3592 }, { "epoch": 0.9824993163795461, "grad_norm": 0.1673014909029007, "learning_rate": 5e-05, "loss": 1.5778, "step": 3593 }, { "epoch": 0.9827727645611156, "grad_norm": 0.17071346938610077, "learning_rate": 5e-05, "loss": 1.6138, "step": 3594 }, { "epoch": 0.9830462127426852, "grad_norm": 0.1741405427455902, "learning_rate": 5e-05, "loss": 1.5566, "step": 3595 }, { "epoch": 0.9833196609242548, "grad_norm": 0.17240159213542938, "learning_rate": 5e-05, "loss": 1.6181, "step": 3596 }, { "epoch": 0.9835931091058244, "grad_norm": 0.1678202599287033, "learning_rate": 5e-05, "loss": 1.5552, "step": 3597 }, { "epoch": 0.983866557287394, "grad_norm": 0.17836597561836243, "learning_rate": 5e-05, "loss": 1.5859, "step": 3598 }, { "epoch": 0.9841400054689636, "grad_norm": 0.17026908695697784, "learning_rate": 5e-05, "loss": 1.6616, "step": 3599 }, { "epoch": 0.9844134536505332, "grad_norm": 0.17007942497730255, "learning_rate": 5e-05, "loss": 1.48, "step": 3600 }, { "epoch": 0.9846869018321028, "grad_norm": 0.1805422455072403, "learning_rate": 5e-05, "loss": 1.6306, "step": 3601 }, { "epoch": 0.9849603500136724, "grad_norm": 0.16335126757621765, "learning_rate": 5e-05, "loss": 1.5206, "step": 3602 }, { "epoch": 0.985233798195242, "grad_norm": 0.1635698527097702, "learning_rate": 5e-05, "loss": 1.5101, "step": 3603 }, { "epoch": 0.9855072463768116, "grad_norm": 0.18003027141094208, "learning_rate": 5e-05, "loss": 1.643, "step": 3604 }, { "epoch": 0.9857806945583811, "grad_norm": 0.17599767446517944, "learning_rate": 5e-05, "loss": 1.6374, "step": 3605 }, { "epoch": 0.9860541427399507, "grad_norm": 0.1730010062456131, "learning_rate": 5e-05, "loss": 1.5686, "step": 3606 }, { "epoch": 0.9863275909215203, "grad_norm": 0.17399701476097107, "learning_rate": 5e-05, "loss": 1.6065, "step": 3607 }, { "epoch": 0.9866010391030899, "grad_norm": 0.18367905914783478, "learning_rate": 5e-05, "loss": 1.7215, "step": 3608 }, { "epoch": 0.9868744872846595, "grad_norm": 0.18548168241977692, "learning_rate": 5e-05, "loss": 1.5337, "step": 3609 }, { "epoch": 0.9871479354662291, "grad_norm": 0.1609538048505783, "learning_rate": 5e-05, "loss": 1.5586, "step": 3610 }, { "epoch": 0.9874213836477987, "grad_norm": 0.17349039018154144, "learning_rate": 5e-05, "loss": 1.659, "step": 3611 }, { "epoch": 0.9876948318293683, "grad_norm": 0.17284166812896729, "learning_rate": 5e-05, "loss": 1.6581, "step": 3612 }, { "epoch": 0.987968280010938, "grad_norm": 0.16577011346817017, "learning_rate": 5e-05, "loss": 1.4734, "step": 3613 }, { "epoch": 0.9882417281925076, "grad_norm": 0.17548039555549622, "learning_rate": 5e-05, "loss": 1.664, "step": 3614 }, { "epoch": 0.9885151763740772, "grad_norm": 0.16056503355503082, "learning_rate": 5e-05, "loss": 1.5385, "step": 3615 }, { "epoch": 0.9887886245556468, "grad_norm": 0.1729920357465744, "learning_rate": 5e-05, "loss": 1.6822, "step": 3616 }, { "epoch": 0.9890620727372162, "grad_norm": 0.1658102124929428, "learning_rate": 5e-05, "loss": 1.5311, "step": 3617 }, { "epoch": 0.9893355209187858, "grad_norm": 0.15829437971115112, "learning_rate": 5e-05, "loss": 1.5528, "step": 3618 }, { "epoch": 0.9896089691003555, "grad_norm": 0.160248264670372, "learning_rate": 5e-05, "loss": 1.5386, "step": 3619 }, { "epoch": 0.989882417281925, "grad_norm": 0.1627732813358307, "learning_rate": 5e-05, "loss": 1.5654, "step": 3620 }, { "epoch": 0.9901558654634947, "grad_norm": 0.17312705516815186, "learning_rate": 5e-05, "loss": 1.4819, "step": 3621 }, { "epoch": 0.9904293136450643, "grad_norm": 0.1666928231716156, "learning_rate": 5e-05, "loss": 1.5008, "step": 3622 }, { "epoch": 0.9907027618266339, "grad_norm": 0.16598451137542725, "learning_rate": 5e-05, "loss": 1.4918, "step": 3623 }, { "epoch": 0.9909762100082035, "grad_norm": 0.17471113801002502, "learning_rate": 5e-05, "loss": 1.6568, "step": 3624 }, { "epoch": 0.9912496581897731, "grad_norm": 0.16479487717151642, "learning_rate": 5e-05, "loss": 1.5205, "step": 3625 }, { "epoch": 0.9915231063713427, "grad_norm": 0.18188925087451935, "learning_rate": 5e-05, "loss": 1.6185, "step": 3626 }, { "epoch": 0.9917965545529123, "grad_norm": 0.1632193922996521, "learning_rate": 5e-05, "loss": 1.5691, "step": 3627 }, { "epoch": 0.9920700027344819, "grad_norm": 0.16891783475875854, "learning_rate": 5e-05, "loss": 1.53, "step": 3628 }, { "epoch": 0.9923434509160514, "grad_norm": 0.17035579681396484, "learning_rate": 5e-05, "loss": 1.5631, "step": 3629 }, { "epoch": 0.992616899097621, "grad_norm": 0.16007426381111145, "learning_rate": 5e-05, "loss": 1.414, "step": 3630 }, { "epoch": 0.9928903472791906, "grad_norm": 0.17161479592323303, "learning_rate": 5e-05, "loss": 1.5988, "step": 3631 }, { "epoch": 0.9931637954607602, "grad_norm": 0.16341786086559296, "learning_rate": 5e-05, "loss": 1.5492, "step": 3632 }, { "epoch": 0.9934372436423298, "grad_norm": 0.17597921192646027, "learning_rate": 5e-05, "loss": 1.6547, "step": 3633 }, { "epoch": 0.9937106918238994, "grad_norm": 0.18249720335006714, "learning_rate": 5e-05, "loss": 1.7218, "step": 3634 }, { "epoch": 0.993984140005469, "grad_norm": 0.17179979383945465, "learning_rate": 5e-05, "loss": 1.6261, "step": 3635 }, { "epoch": 0.9942575881870386, "grad_norm": 0.16686037182807922, "learning_rate": 5e-05, "loss": 1.5381, "step": 3636 }, { "epoch": 0.9945310363686082, "grad_norm": 0.17167183756828308, "learning_rate": 5e-05, "loss": 1.6132, "step": 3637 }, { "epoch": 0.9948044845501778, "grad_norm": 0.1685621589422226, "learning_rate": 5e-05, "loss": 1.5806, "step": 3638 }, { "epoch": 0.9950779327317474, "grad_norm": 0.17752127349376678, "learning_rate": 5e-05, "loss": 1.5842, "step": 3639 }, { "epoch": 0.9953513809133169, "grad_norm": 0.18407252430915833, "learning_rate": 5e-05, "loss": 1.5769, "step": 3640 }, { "epoch": 0.9956248290948865, "grad_norm": 0.17489740252494812, "learning_rate": 5e-05, "loss": 1.6766, "step": 3641 }, { "epoch": 0.9958982772764561, "grad_norm": 0.1652628481388092, "learning_rate": 5e-05, "loss": 1.5431, "step": 3642 }, { "epoch": 0.9961717254580257, "grad_norm": 0.16765595972537994, "learning_rate": 5e-05, "loss": 1.5277, "step": 3643 }, { "epoch": 0.9964451736395953, "grad_norm": 0.18051624298095703, "learning_rate": 5e-05, "loss": 1.7006, "step": 3644 }, { "epoch": 0.9967186218211649, "grad_norm": 0.16528740525245667, "learning_rate": 5e-05, "loss": 1.5304, "step": 3645 }, { "epoch": 0.9969920700027345, "grad_norm": 0.18011535704135895, "learning_rate": 5e-05, "loss": 1.6645, "step": 3646 }, { "epoch": 0.9972655181843041, "grad_norm": 0.17780528962612152, "learning_rate": 5e-05, "loss": 1.5679, "step": 3647 }, { "epoch": 0.9975389663658737, "grad_norm": 0.16724787652492523, "learning_rate": 5e-05, "loss": 1.5967, "step": 3648 }, { "epoch": 0.9978124145474433, "grad_norm": 0.1816672682762146, "learning_rate": 5e-05, "loss": 1.5915, "step": 3649 }, { "epoch": 0.9980858627290129, "grad_norm": 0.1704689860343933, "learning_rate": 5e-05, "loss": 1.4793, "step": 3650 }, { "epoch": 0.9983593109105825, "grad_norm": 0.16653746366500854, "learning_rate": 5e-05, "loss": 1.5062, "step": 3651 }, { "epoch": 0.998632759092152, "grad_norm": 0.17958907783031464, "learning_rate": 5e-05, "loss": 1.5776, "step": 3652 }, { "epoch": 0.9989062072737216, "grad_norm": 0.1751994490623474, "learning_rate": 5e-05, "loss": 1.5787, "step": 3653 }, { "epoch": 0.9991796554552912, "grad_norm": 0.16929645836353302, "learning_rate": 5e-05, "loss": 1.5965, "step": 3654 }, { "epoch": 0.9994531036368608, "grad_norm": 0.19185209274291992, "learning_rate": 5e-05, "loss": 1.5234, "step": 3655 }, { "epoch": 0.9997265518184304, "grad_norm": 0.17126961052417755, "learning_rate": 5e-05, "loss": 1.5285, "step": 3656 }, { "epoch": 1.0, "grad_norm": 0.17371578514575958, "learning_rate": 5e-05, "loss": 1.5768, "step": 3657 }, { "epoch": 1.0, "step": 3657, "total_flos": 2.81863078907845e+18, "train_loss": 1.6374119961558387, "train_runtime": 72812.9252, "train_samples_per_second": 0.402, "train_steps_per_second": 0.05 } ], "logging_steps": 1, "max_steps": 3657, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.81863078907845e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }