|
[ |
|
{ |
|
"loss": 36.4684, |
|
"grad_norm": 1.4972327947616577, |
|
"learning_rate": 9.991248796709548e-05, |
|
"epoch": 0.0 |
|
}, |
|
{ |
|
"loss": 31.4702, |
|
"grad_norm": 1.1359542608261108, |
|
"learning_rate": 9.982497593419096e-05, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 27.0139, |
|
"grad_norm": 0.8641749024391174, |
|
"learning_rate": 9.973746390128643e-05, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 25.5819, |
|
"grad_norm": 0.7870637774467468, |
|
"learning_rate": 9.964995186838191e-05, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 23.737, |
|
"grad_norm": 0.5421485900878906, |
|
"learning_rate": 9.956243983547739e-05, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 22.9208, |
|
"grad_norm": 0.6402917504310608, |
|
"learning_rate": 9.947492780257286e-05, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 22.6313, |
|
"grad_norm": 4.473743915557861, |
|
"learning_rate": 9.938741576966833e-05, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 21.2445, |
|
"grad_norm": 1.5480146408081055, |
|
"learning_rate": 9.929990373676381e-05, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 21.5249, |
|
"grad_norm": 1.6474212408065796, |
|
"learning_rate": 9.921239170385929e-05, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 20.416, |
|
"grad_norm": 5.127786636352539, |
|
"learning_rate": 9.912487967095477e-05, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 20.4307, |
|
"grad_norm": 3.4422194957733154, |
|
"learning_rate": 9.903736763805023e-05, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 20.303, |
|
"grad_norm": 4.862687587738037, |
|
"learning_rate": 9.894985560514571e-05, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 18.9257, |
|
"grad_norm": 3.3842506408691406, |
|
"learning_rate": 9.886234357224119e-05, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 19.3255, |
|
"grad_norm": 2.574763774871826, |
|
"learning_rate": 9.877483153933667e-05, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 18.568, |
|
"grad_norm": 3.362725019454956, |
|
"learning_rate": 9.868731950643215e-05, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 19.2249, |
|
"grad_norm": 2.039949417114258, |
|
"learning_rate": 9.859980747352761e-05, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 18.892, |
|
"grad_norm": 6.239978313446045, |
|
"learning_rate": 9.851229544062309e-05, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 18.1108, |
|
"grad_norm": 4.516480445861816, |
|
"learning_rate": 9.842478340771857e-05, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 17.5294, |
|
"grad_norm": 2.1653459072113037, |
|
"learning_rate": 9.833727137481405e-05, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 16.4828, |
|
"grad_norm": 3.5177245140075684, |
|
"learning_rate": 9.824975934190951e-05, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 17.0135, |
|
"grad_norm": 3.044818162918091, |
|
"learning_rate": 9.8162247309005e-05, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 17.2804, |
|
"grad_norm": 2.429781198501587, |
|
"learning_rate": 9.807473527610047e-05, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 16.9488, |
|
"grad_norm": 4.289863586425781, |
|
"learning_rate": 9.798722324319595e-05, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 16.413, |
|
"grad_norm": 2.322678804397583, |
|
"learning_rate": 9.789971121029142e-05, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 15.9682, |
|
"grad_norm": 2.0535218715667725, |
|
"learning_rate": 9.78121991773869e-05, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 15.9078, |
|
"grad_norm": 1.9909405708312988, |
|
"learning_rate": 9.772468714448237e-05, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 16.3041, |
|
"grad_norm": 2.9589896202087402, |
|
"learning_rate": 9.763717511157785e-05, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 15.9803, |
|
"grad_norm": 4.50626277923584, |
|
"learning_rate": 9.754966307867332e-05, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 15.3465, |
|
"grad_norm": 2.2582998275756836, |
|
"learning_rate": 9.74621510457688e-05, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 15.8187, |
|
"grad_norm": 1.8218069076538086, |
|
"learning_rate": 9.737463901286428e-05, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 15.6423, |
|
"grad_norm": 3.2282557487487793, |
|
"learning_rate": 9.728712697995976e-05, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 15.2293, |
|
"grad_norm": 4.152139663696289, |
|
"learning_rate": 9.719961494705523e-05, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 14.9266, |
|
"grad_norm": 2.4058563709259033, |
|
"learning_rate": 9.71121029141507e-05, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 14.9444, |
|
"grad_norm": 2.390054941177368, |
|
"learning_rate": 9.702459088124618e-05, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 14.3572, |
|
"grad_norm": 2.8402915000915527, |
|
"learning_rate": 9.693707884834166e-05, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 14.2444, |
|
"grad_norm": 3.193218469619751, |
|
"learning_rate": 9.684956681543714e-05, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 14.4161, |
|
"grad_norm": 3.0809693336486816, |
|
"learning_rate": 9.67620547825326e-05, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 15.3421, |
|
"grad_norm": 2.5853307247161865, |
|
"learning_rate": 9.667454274962808e-05, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 14.4311, |
|
"grad_norm": 4.195634365081787, |
|
"learning_rate": 9.658703071672356e-05, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 14.6836, |
|
"grad_norm": 3.293510913848877, |
|
"learning_rate": 9.649951868381904e-05, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 13.8319, |
|
"grad_norm": 2.9272682666778564, |
|
"learning_rate": 9.64120066509145e-05, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 14.566, |
|
"grad_norm": 2.871901512145996, |
|
"learning_rate": 9.632449461800998e-05, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 14.1236, |
|
"grad_norm": 5.13899564743042, |
|
"learning_rate": 9.623698258510546e-05, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 14.5302, |
|
"grad_norm": 2.652040481567383, |
|
"learning_rate": 9.614947055220094e-05, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 13.8852, |
|
"grad_norm": 2.9902889728546143, |
|
"learning_rate": 9.606195851929642e-05, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 14.0293, |
|
"grad_norm": 2.3652427196502686, |
|
"learning_rate": 9.597444648639187e-05, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 14.2171, |
|
"grad_norm": 4.3896484375, |
|
"learning_rate": 9.588693445348735e-05, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 13.8609, |
|
"grad_norm": 3.040400505065918, |
|
"learning_rate": 9.579942242058283e-05, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 13.7735, |
|
"grad_norm": 4.223784446716309, |
|
"learning_rate": 9.571191038767831e-05, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 13.8871, |
|
"grad_norm": 3.0033748149871826, |
|
"learning_rate": 9.562439835477379e-05, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 13.934, |
|
"grad_norm": 1.834015965461731, |
|
"learning_rate": 9.553688632186925e-05, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.9919, |
|
"grad_norm": 4.704802513122559, |
|
"learning_rate": 9.544937428896473e-05, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 12.8476, |
|
"grad_norm": 2.105950355529785, |
|
"learning_rate": 9.536186225606021e-05, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 13.0775, |
|
"grad_norm": 3.732581615447998, |
|
"learning_rate": 9.527435022315569e-05, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 14.248, |
|
"grad_norm": 3.9151251316070557, |
|
"learning_rate": 9.518683819025115e-05, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 13.519, |
|
"grad_norm": 2.424039602279663, |
|
"learning_rate": 9.509932615734663e-05, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.9035, |
|
"grad_norm": 2.8388330936431885, |
|
"learning_rate": 9.501181412444211e-05, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 13.5495, |
|
"grad_norm": 4.111719131469727, |
|
"learning_rate": 9.492430209153759e-05, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 13.0792, |
|
"grad_norm": 5.089598655700684, |
|
"learning_rate": 9.483679005863306e-05, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.7144, |
|
"grad_norm": 2.015564203262329, |
|
"learning_rate": 9.474927802572854e-05, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 13.1144, |
|
"grad_norm": 1.9832412004470825, |
|
"learning_rate": 9.466176599282401e-05, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 12.9708, |
|
"grad_norm": 1.9776819944381714, |
|
"learning_rate": 9.457425395991949e-05, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 13.1545, |
|
"grad_norm": 3.082418918609619, |
|
"learning_rate": 9.448674192701497e-05, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 12.8948, |
|
"grad_norm": 2.824528217315674, |
|
"learning_rate": 9.439922989411044e-05, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 12.8128, |
|
"grad_norm": 3.653470754623413, |
|
"learning_rate": 9.431171786120592e-05, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 12.4615, |
|
"grad_norm": 2.4570350646972656, |
|
"learning_rate": 9.42242058283014e-05, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 13.7183, |
|
"grad_norm": 1.996759057044983, |
|
"learning_rate": 9.413669379539687e-05, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 13.286, |
|
"grad_norm": 2.3849940299987793, |
|
"learning_rate": 9.404918176249234e-05, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 12.6206, |
|
"grad_norm": 3.374633550643921, |
|
"learning_rate": 9.396166972958782e-05, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 13.1151, |
|
"grad_norm": 4.5953216552734375, |
|
"learning_rate": 9.38741576966833e-05, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 12.6137, |
|
"grad_norm": 2.402780532836914, |
|
"learning_rate": 9.378664566377878e-05, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 12.1367, |
|
"grad_norm": 5.434263706207275, |
|
"learning_rate": 9.369913363087424e-05, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 12.4959, |
|
"grad_norm": 3.73447585105896, |
|
"learning_rate": 9.361162159796972e-05, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 12.1629, |
|
"grad_norm": 3.205071449279785, |
|
"learning_rate": 9.35241095650652e-05, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 12.0657, |
|
"grad_norm": 4.104920864105225, |
|
"learning_rate": 9.343659753216068e-05, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.9909, |
|
"grad_norm": 4.132589817047119, |
|
"learning_rate": 9.334908549925616e-05, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.9682, |
|
"grad_norm": 2.3729248046875, |
|
"learning_rate": 9.326157346635162e-05, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 12.2075, |
|
"grad_norm": 3.024388313293457, |
|
"learning_rate": 9.31740614334471e-05, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 11.4629, |
|
"grad_norm": 2.923081159591675, |
|
"learning_rate": 9.308654940054258e-05, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 12.6273, |
|
"grad_norm": 4.349196434020996, |
|
"learning_rate": 9.299903736763806e-05, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 12.0323, |
|
"grad_norm": 3.275175094604492, |
|
"learning_rate": 9.291152533473352e-05, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 12.1019, |
|
"grad_norm": 1.8104184865951538, |
|
"learning_rate": 9.2824013301829e-05, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 12.131, |
|
"grad_norm": 3.931492567062378, |
|
"learning_rate": 9.273650126892448e-05, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 12.6479, |
|
"grad_norm": 4.626213550567627, |
|
"learning_rate": 9.264898923601996e-05, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 12.0639, |
|
"grad_norm": 2.5656702518463135, |
|
"learning_rate": 9.256147720311543e-05, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 11.9819, |
|
"grad_norm": 3.8051023483276367, |
|
"learning_rate": 9.24739651702109e-05, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 12.7138, |
|
"grad_norm": 2.1373887062072754, |
|
"learning_rate": 9.238645313730638e-05, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 12.1889, |
|
"grad_norm": 4.774439334869385, |
|
"learning_rate": 9.229894110440186e-05, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 12.3925, |
|
"grad_norm": 3.0765390396118164, |
|
"learning_rate": 9.221142907149734e-05, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 11.4008, |
|
"grad_norm": 3.136746644973755, |
|
"learning_rate": 9.212391703859281e-05, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 11.8306, |
|
"grad_norm": 1.836838722229004, |
|
"learning_rate": 9.203640500568829e-05, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 11.7773, |
|
"grad_norm": 3.790940523147583, |
|
"learning_rate": 9.194889297278376e-05, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 11.8051, |
|
"grad_norm": 3.1878066062927246, |
|
"learning_rate": 9.186138093987924e-05, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 12.5683, |
|
"grad_norm": 3.5691912174224854, |
|
"learning_rate": 9.177386890697471e-05, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 12.0541, |
|
"grad_norm": 3.9797616004943848, |
|
"learning_rate": 9.168635687407019e-05, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.8673, |
|
"grad_norm": 6.183890342712402, |
|
"learning_rate": 9.159884484116567e-05, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.3265, |
|
"grad_norm": 3.011223316192627, |
|
"learning_rate": 9.151133280826115e-05, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.5664, |
|
"grad_norm": 2.2235491275787354, |
|
"learning_rate": 9.142382077535661e-05, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 11.5695, |
|
"grad_norm": 2.199366807937622, |
|
"learning_rate": 9.133630874245209e-05, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 12.1804, |
|
"grad_norm": 2.8299245834350586, |
|
"learning_rate": 9.124879670954757e-05, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 11.4799, |
|
"grad_norm": 3.164628744125366, |
|
"learning_rate": 9.116128467664305e-05, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 11.4195, |
|
"grad_norm": 4.022547245025635, |
|
"learning_rate": 9.107377264373851e-05, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 11.5764, |
|
"grad_norm": 2.569967031478882, |
|
"learning_rate": 9.098626061083399e-05, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 11.4122, |
|
"grad_norm": 2.7668631076812744, |
|
"learning_rate": 9.089874857792947e-05, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 11.9738, |
|
"grad_norm": 4.17225980758667, |
|
"learning_rate": 9.081123654502495e-05, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 11.2846, |
|
"grad_norm": 3.6021440029144287, |
|
"learning_rate": 9.072372451212043e-05, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 11.885, |
|
"grad_norm": 5.99414587020874, |
|
"learning_rate": 9.06362124792159e-05, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 11.4829, |
|
"grad_norm": 3.0609118938446045, |
|
"learning_rate": 9.054870044631137e-05, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 11.2748, |
|
"grad_norm": 3.083606243133545, |
|
"learning_rate": 9.046118841340685e-05, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.9556, |
|
"grad_norm": 2.1071770191192627, |
|
"learning_rate": 9.037367638050233e-05, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 11.9664, |
|
"grad_norm": 3.2089502811431885, |
|
"learning_rate": 9.02861643475978e-05, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 11.0907, |
|
"grad_norm": 2.714460611343384, |
|
"learning_rate": 9.019865231469327e-05, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 11.0296, |
|
"grad_norm": 4.843391418457031, |
|
"learning_rate": 9.011114028178875e-05, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.6882, |
|
"grad_norm": 2.8939428329467773, |
|
"learning_rate": 9.002362824888423e-05, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 11.4392, |
|
"grad_norm": 5.056521892547607, |
|
"learning_rate": 8.99361162159797e-05, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 11.1842, |
|
"grad_norm": 2.7797389030456543, |
|
"learning_rate": 8.984860418307518e-05, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.97, |
|
"grad_norm": 4.099424839019775, |
|
"learning_rate": 8.976109215017066e-05, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 11.3728, |
|
"grad_norm": 3.803455114364624, |
|
"learning_rate": 8.967358011726613e-05, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 11.1566, |
|
"grad_norm": 6.033726215362549, |
|
"learning_rate": 8.958606808436161e-05, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.635, |
|
"grad_norm": 3.339327335357666, |
|
"learning_rate": 8.949855605145708e-05, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 11.2816, |
|
"grad_norm": 2.3768680095672607, |
|
"learning_rate": 8.941104401855256e-05, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.8236, |
|
"grad_norm": 3.4453046321868896, |
|
"learning_rate": 8.932353198564804e-05, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.9037, |
|
"grad_norm": 3.0895841121673584, |
|
"learning_rate": 8.923601995274352e-05, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.4346, |
|
"grad_norm": 3.26282000541687, |
|
"learning_rate": 8.914850791983898e-05, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.2253, |
|
"grad_norm": 3.158858299255371, |
|
"learning_rate": 8.906099588693446e-05, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.9327, |
|
"grad_norm": 2.569925308227539, |
|
"learning_rate": 8.897348385402994e-05, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 11.1466, |
|
"grad_norm": 4.456540107727051, |
|
"learning_rate": 8.888597182112542e-05, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.6713, |
|
"grad_norm": 2.9973337650299072, |
|
"learning_rate": 8.879845978822088e-05, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.6667, |
|
"grad_norm": 4.433472156524658, |
|
"learning_rate": 8.871094775531636e-05, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 11.0465, |
|
"grad_norm": 3.661515474319458, |
|
"learning_rate": 8.862343572241184e-05, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.2861, |
|
"grad_norm": 2.8008625507354736, |
|
"learning_rate": 8.853592368950732e-05, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.6822, |
|
"grad_norm": 3.843266487121582, |
|
"learning_rate": 8.84484116566028e-05, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.726, |
|
"grad_norm": 3.4649717807769775, |
|
"learning_rate": 8.836089962369826e-05, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.6911, |
|
"grad_norm": 4.743326187133789, |
|
"learning_rate": 8.827338759079374e-05, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.1019, |
|
"grad_norm": 2.6317293643951416, |
|
"learning_rate": 8.818587555788922e-05, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.4147, |
|
"grad_norm": 3.893660306930542, |
|
"learning_rate": 8.80983635249847e-05, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.5977, |
|
"grad_norm": 2.704558849334717, |
|
"learning_rate": 8.801085149208017e-05, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.7126, |
|
"grad_norm": 3.4808812141418457, |
|
"learning_rate": 8.792333945917563e-05, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.4511, |
|
"grad_norm": 2.971688985824585, |
|
"learning_rate": 8.783582742627111e-05, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.3621, |
|
"grad_norm": 3.7666103839874268, |
|
"learning_rate": 8.774831539336659e-05, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.3416, |
|
"grad_norm": 2.951805353164673, |
|
"learning_rate": 8.766080336046207e-05, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.5537, |
|
"grad_norm": 3.5080454349517822, |
|
"learning_rate": 8.757329132755753e-05, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 10.3536, |
|
"grad_norm": 3.521519660949707, |
|
"learning_rate": 8.748577929465301e-05, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.3231, |
|
"grad_norm": 3.646610736846924, |
|
"learning_rate": 8.739826726174849e-05, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.6433, |
|
"grad_norm": 3.4696707725524902, |
|
"learning_rate": 8.731075522884397e-05, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 10.1692, |
|
"grad_norm": 3.852370500564575, |
|
"learning_rate": 8.722324319593944e-05, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.7841, |
|
"grad_norm": 3.693451404571533, |
|
"learning_rate": 8.713573116303491e-05, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.7817, |
|
"grad_norm": 3.032994508743286, |
|
"learning_rate": 8.704821913013039e-05, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.7416, |
|
"grad_norm": 3.537693500518799, |
|
"learning_rate": 8.696070709722587e-05, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.9999, |
|
"grad_norm": 2.624573230743408, |
|
"learning_rate": 8.687319506432135e-05, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 10.3777, |
|
"grad_norm": 2.453648328781128, |
|
"learning_rate": 8.678568303141682e-05, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.2463, |
|
"grad_norm": 3.5459659099578857, |
|
"learning_rate": 8.66981709985123e-05, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 9.69, |
|
"grad_norm": 2.9005305767059326, |
|
"learning_rate": 8.661065896560777e-05, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.5555, |
|
"grad_norm": 4.305134296417236, |
|
"learning_rate": 8.652314693270325e-05, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 11.4746, |
|
"grad_norm": 3.8746566772460938, |
|
"learning_rate": 8.643563489979872e-05, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 10.4112, |
|
"grad_norm": 3.0006351470947266, |
|
"learning_rate": 8.63481228668942e-05, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 10.7159, |
|
"grad_norm": 3.4273717403411865, |
|
"learning_rate": 8.626061083398968e-05, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.8815, |
|
"grad_norm": 3.3976597785949707, |
|
"learning_rate": 8.617309880108515e-05, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 10.0256, |
|
"grad_norm": 4.364745140075684, |
|
"learning_rate": 8.608558676818062e-05, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 10.2598, |
|
"grad_norm": 2.6873209476470947, |
|
"learning_rate": 8.59980747352761e-05, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 10.3362, |
|
"grad_norm": 4.00089693069458, |
|
"learning_rate": 8.591056270237158e-05, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 10.2189, |
|
"grad_norm": 2.858186721801758, |
|
"learning_rate": 8.582305066946706e-05, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.3866, |
|
"grad_norm": 3.203000783920288, |
|
"learning_rate": 8.573553863656252e-05, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.0813, |
|
"grad_norm": 3.210279941558838, |
|
"learning_rate": 8.5648026603658e-05, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.5642, |
|
"grad_norm": 3.2169432640075684, |
|
"learning_rate": 8.556051457075348e-05, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 9.8901, |
|
"grad_norm": 3.107404947280884, |
|
"learning_rate": 8.547300253784896e-05, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.1058, |
|
"grad_norm": 2.7491989135742188, |
|
"learning_rate": 8.538549050494444e-05, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.1777, |
|
"grad_norm": 3.140073299407959, |
|
"learning_rate": 8.52979784720399e-05, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 9.4428, |
|
"grad_norm": 3.9033658504486084, |
|
"learning_rate": 8.521046643913538e-05, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 10.4304, |
|
"grad_norm": 3.4388954639434814, |
|
"learning_rate": 8.512295440623086e-05, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.7865, |
|
"grad_norm": 2.7577993869781494, |
|
"learning_rate": 8.503544237332634e-05, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 10.5389, |
|
"grad_norm": 4.365457534790039, |
|
"learning_rate": 8.49479303404218e-05, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.6268, |
|
"grad_norm": 4.908252239227295, |
|
"learning_rate": 8.486041830751728e-05, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.8142, |
|
"grad_norm": 3.5492117404937744, |
|
"learning_rate": 8.477290627461276e-05, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.1744, |
|
"grad_norm": 3.34104061126709, |
|
"learning_rate": 8.468539424170824e-05, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.793, |
|
"grad_norm": 5.443964958190918, |
|
"learning_rate": 8.459788220880371e-05, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.6955, |
|
"grad_norm": 3.092270851135254, |
|
"learning_rate": 8.451037017589919e-05, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.7381, |
|
"grad_norm": 3.322415828704834, |
|
"learning_rate": 8.442285814299467e-05, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 10.1758, |
|
"grad_norm": 3.5836918354034424, |
|
"learning_rate": 8.433534611009014e-05, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 10.0565, |
|
"grad_norm": 4.64646053314209, |
|
"learning_rate": 8.424783407718562e-05, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 9.3562, |
|
"grad_norm": 2.8691656589508057, |
|
"learning_rate": 8.416032204428109e-05, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 10.1164, |
|
"grad_norm": 2.6130857467651367, |
|
"learning_rate": 8.407281001137657e-05, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.1353, |
|
"grad_norm": 2.950364112854004, |
|
"learning_rate": 8.398529797847205e-05, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.2614, |
|
"grad_norm": 3.1866071224212646, |
|
"learning_rate": 8.389778594556752e-05, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.7335, |
|
"grad_norm": 3.584228038787842, |
|
"learning_rate": 8.381027391266299e-05, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.7923, |
|
"grad_norm": 2.941434860229492, |
|
"learning_rate": 8.372276187975847e-05, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.6495, |
|
"grad_norm": 3.9578118324279785, |
|
"learning_rate": 8.363524984685395e-05, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.7038, |
|
"grad_norm": 3.197563648223877, |
|
"learning_rate": 8.354773781394943e-05, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.9406, |
|
"grad_norm": 3.8146650791168213, |
|
"learning_rate": 8.346022578104489e-05, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.8941, |
|
"grad_norm": 3.293826103210449, |
|
"learning_rate": 8.337271374814037e-05, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.463, |
|
"grad_norm": 2.8410701751708984, |
|
"learning_rate": 8.328520171523585e-05, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.9774, |
|
"grad_norm": 4.301900386810303, |
|
"learning_rate": 8.319768968233133e-05, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.8438, |
|
"grad_norm": 3.798737049102783, |
|
"learning_rate": 8.311017764942681e-05, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.9238, |
|
"grad_norm": 3.634910821914673, |
|
"learning_rate": 8.302266561652227e-05, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.3031, |
|
"grad_norm": 4.557560443878174, |
|
"learning_rate": 8.293515358361775e-05, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.7714, |
|
"grad_norm": 3.100658893585205, |
|
"learning_rate": 8.284764155071323e-05, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.6701, |
|
"grad_norm": 3.0376410484313965, |
|
"learning_rate": 8.276012951780871e-05, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.7965, |
|
"grad_norm": 2.64803147315979, |
|
"learning_rate": 8.267261748490418e-05, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.3502, |
|
"grad_norm": 3.5259008407592773, |
|
"learning_rate": 8.258510545199965e-05, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.3924, |
|
"grad_norm": 3.604329824447632, |
|
"learning_rate": 8.249759341909513e-05, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.1481, |
|
"grad_norm": 2.6112060546875, |
|
"learning_rate": 8.241008138619061e-05, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 10.2096, |
|
"grad_norm": 3.718703031539917, |
|
"learning_rate": 8.232256935328608e-05, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 9.0669, |
|
"grad_norm": 4.43959903717041, |
|
"learning_rate": 8.223505732038156e-05, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 9.2528, |
|
"grad_norm": 3.4342939853668213, |
|
"learning_rate": 8.214754528747703e-05, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.4184, |
|
"grad_norm": 4.191211700439453, |
|
"learning_rate": 8.206003325457251e-05, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.2057, |
|
"grad_norm": 3.076712131500244, |
|
"learning_rate": 8.197252122166799e-05, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.3174, |
|
"grad_norm": 3.668440341949463, |
|
"learning_rate": 8.188500918876346e-05, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.8369, |
|
"grad_norm": 3.419703483581543, |
|
"learning_rate": 8.179749715585894e-05, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.2614, |
|
"grad_norm": 4.150201797485352, |
|
"learning_rate": 8.170998512295442e-05, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.1372, |
|
"grad_norm": 3.2589640617370605, |
|
"learning_rate": 8.16224730900499e-05, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.4359, |
|
"grad_norm": 3.1012041568756104, |
|
"learning_rate": 8.153496105714536e-05, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.6403, |
|
"grad_norm": 3.882509708404541, |
|
"learning_rate": 8.144744902424084e-05, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.6448, |
|
"grad_norm": 2.656543254852295, |
|
"learning_rate": 8.135993699133632e-05, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.5709, |
|
"grad_norm": 3.2577645778656006, |
|
"learning_rate": 8.12724249584318e-05, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.4838, |
|
"grad_norm": 2.737210512161255, |
|
"learning_rate": 8.118491292552726e-05, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.0991, |
|
"grad_norm": 2.2185497283935547, |
|
"learning_rate": 8.109740089262274e-05, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.1741, |
|
"grad_norm": 2.766544818878174, |
|
"learning_rate": 8.100988885971822e-05, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.9664, |
|
"grad_norm": 3.627641201019287, |
|
"learning_rate": 8.09223768268137e-05, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.3467, |
|
"grad_norm": 3.600707769393921, |
|
"learning_rate": 8.083486479390916e-05, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.8328, |
|
"grad_norm": 5.097866058349609, |
|
"learning_rate": 8.074735276100464e-05, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.289, |
|
"grad_norm": 3.3913521766662598, |
|
"learning_rate": 8.065984072810012e-05, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.2046, |
|
"grad_norm": 3.586367130279541, |
|
"learning_rate": 8.05723286951956e-05, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.1802, |
|
"grad_norm": 5.786179542541504, |
|
"learning_rate": 8.048481666229108e-05, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.6482, |
|
"grad_norm": 3.158339023590088, |
|
"learning_rate": 8.039730462938655e-05, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.0124, |
|
"grad_norm": 3.3116583824157715, |
|
"learning_rate": 8.030979259648202e-05, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 8.7543, |
|
"grad_norm": 2.555194616317749, |
|
"learning_rate": 8.02222805635775e-05, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.043, |
|
"grad_norm": 3.2205519676208496, |
|
"learning_rate": 8.013476853067298e-05, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.5348, |
|
"grad_norm": 3.4175057411193848, |
|
"learning_rate": 8.004725649776845e-05, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.177, |
|
"grad_norm": 4.694581985473633, |
|
"learning_rate": 7.995974446486391e-05, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.2863, |
|
"grad_norm": 2.7787346839904785, |
|
"learning_rate": 7.987223243195939e-05, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 8.6984, |
|
"grad_norm": 3.4298195838928223, |
|
"learning_rate": 7.978472039905487e-05, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.0926, |
|
"grad_norm": 4.21417760848999, |
|
"learning_rate": 7.969720836615035e-05, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 8.8851, |
|
"grad_norm": 3.0844244956970215, |
|
"learning_rate": 7.960969633324581e-05, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.7628, |
|
"grad_norm": 3.0156939029693604, |
|
"learning_rate": 7.95221843003413e-05, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 8.6529, |
|
"grad_norm": 3.9500784873962402, |
|
"learning_rate": 7.943467226743677e-05, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 9.0502, |
|
"grad_norm": 4.802796840667725, |
|
"learning_rate": 7.934716023453225e-05, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.6458, |
|
"grad_norm": 4.273401260375977, |
|
"learning_rate": 7.925964820162772e-05, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.8679, |
|
"grad_norm": 4.070954322814941, |
|
"learning_rate": 7.91721361687232e-05, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.8355, |
|
"grad_norm": 3.3995022773742676, |
|
"learning_rate": 7.908462413581867e-05, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 8.7255, |
|
"grad_norm": 2.974888801574707, |
|
"learning_rate": 7.899711210291415e-05, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 9.6256, |
|
"grad_norm": 2.521350145339966, |
|
"learning_rate": 7.890960007000963e-05, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 8.9677, |
|
"grad_norm": 2.659583330154419, |
|
"learning_rate": 7.88220880371051e-05, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.339, |
|
"grad_norm": 4.531926155090332, |
|
"learning_rate": 7.873457600420058e-05, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.469, |
|
"grad_norm": 3.573625087738037, |
|
"learning_rate": 7.864706397129606e-05, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.2697, |
|
"grad_norm": 3.5155880451202393, |
|
"learning_rate": 7.855955193839153e-05, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 8.5749, |
|
"grad_norm": 3.201718330383301, |
|
"learning_rate": 7.8472039905487e-05, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 8.9228, |
|
"grad_norm": 3.8670506477355957, |
|
"learning_rate": 7.838452787258248e-05, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.5243, |
|
"grad_norm": 3.4351415634155273, |
|
"learning_rate": 7.829701583967796e-05, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 8.7689, |
|
"grad_norm": 4.182631492614746, |
|
"learning_rate": 7.820950380677344e-05, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.2565, |
|
"grad_norm": 3.6523499488830566, |
|
"learning_rate": 7.81219917738689e-05, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.9147, |
|
"grad_norm": 3.6572344303131104, |
|
"learning_rate": 7.803447974096438e-05, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.6875, |
|
"grad_norm": 4.45376443862915, |
|
"learning_rate": 7.794696770805986e-05, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 9.782, |
|
"grad_norm": 4.446099758148193, |
|
"learning_rate": 7.785945567515534e-05, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 9.0401, |
|
"grad_norm": 3.134500026702881, |
|
"learning_rate": 7.777194364225082e-05, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.3041, |
|
"grad_norm": 4.3101325035095215, |
|
"learning_rate": 7.768443160934628e-05, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 8.3818, |
|
"grad_norm": 2.935241222381592, |
|
"learning_rate": 7.759691957644176e-05, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.3778, |
|
"grad_norm": 3.966174364089966, |
|
"learning_rate": 7.750940754353724e-05, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 9.0559, |
|
"grad_norm": 3.758314609527588, |
|
"learning_rate": 7.742189551063272e-05, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 8.5828, |
|
"grad_norm": 3.2531213760375977, |
|
"learning_rate": 7.733438347772818e-05, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 8.6358, |
|
"grad_norm": 3.9096357822418213, |
|
"learning_rate": 7.724687144482366e-05, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 9.0841, |
|
"grad_norm": 2.787165641784668, |
|
"learning_rate": 7.715935941191914e-05, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.7611, |
|
"grad_norm": 3.6336965560913086, |
|
"learning_rate": 7.707184737901462e-05, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.3819, |
|
"grad_norm": 4.785186290740967, |
|
"learning_rate": 7.698433534611009e-05, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 9.3396, |
|
"grad_norm": 3.7301132678985596, |
|
"learning_rate": 7.689682331320557e-05, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.7932, |
|
"grad_norm": 3.769679307937622, |
|
"learning_rate": 7.680931128030104e-05, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 9.5408, |
|
"grad_norm": 3.249382257461548, |
|
"learning_rate": 7.672179924739652e-05, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 9.1383, |
|
"grad_norm": 3.562981128692627, |
|
"learning_rate": 7.6634287214492e-05, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.5737, |
|
"grad_norm": 3.2148962020874023, |
|
"learning_rate": 7.654677518158747e-05, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.5483, |
|
"grad_norm": 2.9571826457977295, |
|
"learning_rate": 7.645926314868295e-05, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.9157, |
|
"grad_norm": 3.3202896118164062, |
|
"learning_rate": 7.637175111577843e-05, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.9654, |
|
"grad_norm": 4.197299957275391, |
|
"learning_rate": 7.62842390828739e-05, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.642, |
|
"grad_norm": 2.9648005962371826, |
|
"learning_rate": 7.619672704996937e-05, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.5579, |
|
"grad_norm": 2.793729066848755, |
|
"learning_rate": 7.610921501706485e-05, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 9.0535, |
|
"grad_norm": 3.039337158203125, |
|
"learning_rate": 7.602170298416033e-05, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.4261, |
|
"grad_norm": 3.472973346710205, |
|
"learning_rate": 7.59341909512558e-05, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.5207, |
|
"grad_norm": 2.588060140609741, |
|
"learning_rate": 7.584667891835127e-05, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 9.4719, |
|
"grad_norm": 3.702918529510498, |
|
"learning_rate": 7.575916688544675e-05, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.2056, |
|
"grad_norm": 3.087986946105957, |
|
"learning_rate": 7.567165485254223e-05, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.9777, |
|
"grad_norm": 3.231987476348877, |
|
"learning_rate": 7.558414281963771e-05, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.946, |
|
"grad_norm": 3.1620264053344727, |
|
"learning_rate": 7.549663078673317e-05, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.3374, |
|
"grad_norm": 3.0438194274902344, |
|
"learning_rate": 7.540911875382865e-05, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 8.4711, |
|
"grad_norm": 3.3557493686676025, |
|
"learning_rate": 7.532160672092413e-05, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 8.5348, |
|
"grad_norm": 3.693506956100464, |
|
"learning_rate": 7.523409468801961e-05, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 9.2969, |
|
"grad_norm": 4.126795291900635, |
|
"learning_rate": 7.514658265511509e-05, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.6828, |
|
"grad_norm": 3.4798762798309326, |
|
"learning_rate": 7.505907062221055e-05, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 9.885, |
|
"grad_norm": 3.5834882259368896, |
|
"learning_rate": 7.497155858930603e-05, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.7332, |
|
"grad_norm": 3.054962396621704, |
|
"learning_rate": 7.488404655640151e-05, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.8859, |
|
"grad_norm": 2.3702313899993896, |
|
"learning_rate": 7.479653452349699e-05, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.9641, |
|
"grad_norm": 3.573233127593994, |
|
"learning_rate": 7.470902249059246e-05, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 9.0286, |
|
"grad_norm": 2.7246625423431396, |
|
"learning_rate": 7.462151045768794e-05, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.4259, |
|
"grad_norm": 3.090899705886841, |
|
"learning_rate": 7.453399842478341e-05, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.5746, |
|
"grad_norm": 2.8535008430480957, |
|
"learning_rate": 7.444648639187889e-05, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.0497, |
|
"grad_norm": 3.7636609077453613, |
|
"learning_rate": 7.435897435897436e-05, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 8.7239, |
|
"grad_norm": 3.038818597793579, |
|
"learning_rate": 7.427146232606984e-05, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.0842, |
|
"grad_norm": 3.275329351425171, |
|
"learning_rate": 7.418395029316532e-05, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 8.9054, |
|
"grad_norm": 2.4956889152526855, |
|
"learning_rate": 7.40964382602608e-05, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.8721, |
|
"grad_norm": 2.9423913955688477, |
|
"learning_rate": 7.400892622735627e-05, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.9035, |
|
"grad_norm": 4.2211785316467285, |
|
"learning_rate": 7.392141419445174e-05, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.9983, |
|
"grad_norm": 3.3558285236358643, |
|
"learning_rate": 7.383390216154722e-05, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.4671, |
|
"grad_norm": 3.1967856884002686, |
|
"learning_rate": 7.37463901286427e-05, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 8.4608, |
|
"grad_norm": 3.5259337425231934, |
|
"learning_rate": 7.365887809573818e-05, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 9.1684, |
|
"grad_norm": 3.226388692855835, |
|
"learning_rate": 7.357136606283364e-05, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 8.4309, |
|
"grad_norm": 3.7550246715545654, |
|
"learning_rate": 7.348385402992912e-05, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.4427, |
|
"grad_norm": 4.338967800140381, |
|
"learning_rate": 7.33963419970246e-05, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.9643, |
|
"grad_norm": 3.764723777770996, |
|
"learning_rate": 7.330882996412008e-05, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.346, |
|
"grad_norm": 3.2704851627349854, |
|
"learning_rate": 7.322131793121554e-05, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.3154, |
|
"grad_norm": 2.8961048126220703, |
|
"learning_rate": 7.313380589831102e-05, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.9944, |
|
"grad_norm": 3.3732376098632812, |
|
"learning_rate": 7.30462938654065e-05, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.4122, |
|
"grad_norm": 3.7773525714874268, |
|
"learning_rate": 7.295878183250198e-05, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 9.2683, |
|
"grad_norm": 3.716183662414551, |
|
"learning_rate": 7.287126979959746e-05, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.6879, |
|
"grad_norm": 3.6532111167907715, |
|
"learning_rate": 7.278375776669292e-05, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.6653, |
|
"grad_norm": 3.4833829402923584, |
|
"learning_rate": 7.26962457337884e-05, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.8866, |
|
"grad_norm": 4.338618278503418, |
|
"learning_rate": 7.260873370088388e-05, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.9239, |
|
"grad_norm": 3.3099405765533447, |
|
"learning_rate": 7.252122166797936e-05, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.8475, |
|
"grad_norm": 3.2691259384155273, |
|
"learning_rate": 7.243370963507483e-05, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 9.0336, |
|
"grad_norm": 3.4680464267730713, |
|
"learning_rate": 7.23461976021703e-05, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 9.1274, |
|
"grad_norm": 3.6281306743621826, |
|
"learning_rate": 7.225868556926578e-05, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.557, |
|
"grad_norm": 3.129265546798706, |
|
"learning_rate": 7.217117353636126e-05, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.2588, |
|
"grad_norm": 2.781096935272217, |
|
"learning_rate": 7.208366150345673e-05, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 8.6785, |
|
"grad_norm": 2.085003137588501, |
|
"learning_rate": 7.19961494705522e-05, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 8.2642, |
|
"grad_norm": 3.204002618789673, |
|
"learning_rate": 7.190863743764767e-05, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 7.9569, |
|
"grad_norm": 2.3955442905426025, |
|
"learning_rate": 7.182112540474315e-05, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.3701, |
|
"grad_norm": 3.8179776668548584, |
|
"learning_rate": 7.173361337183863e-05, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.4552, |
|
"grad_norm": 3.572737216949463, |
|
"learning_rate": 7.16461013389341e-05, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.5619, |
|
"grad_norm": 3.7918508052825928, |
|
"learning_rate": 7.155858930602957e-05, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.9817, |
|
"grad_norm": 3.5314037799835205, |
|
"learning_rate": 7.147107727312505e-05, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.3192, |
|
"grad_norm": 3.137615442276001, |
|
"learning_rate": 7.138356524022053e-05, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 9.0373, |
|
"grad_norm": 3.58949613571167, |
|
"learning_rate": 7.129605320731601e-05, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.5971, |
|
"grad_norm": 3.062047243118286, |
|
"learning_rate": 7.120854117441148e-05, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.334, |
|
"grad_norm": 3.8008644580841064, |
|
"learning_rate": 7.112102914150696e-05, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.4934, |
|
"grad_norm": 3.4640395641326904, |
|
"learning_rate": 7.103351710860243e-05, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.5304, |
|
"grad_norm": 3.6595981121063232, |
|
"learning_rate": 7.094600507569791e-05, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.6947, |
|
"grad_norm": 4.255160331726074, |
|
"learning_rate": 7.085849304279338e-05, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.5373, |
|
"grad_norm": 2.957233428955078, |
|
"learning_rate": 7.077098100988886e-05, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.919, |
|
"grad_norm": 4.1049933433532715, |
|
"learning_rate": 7.068346897698434e-05, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.6536, |
|
"grad_norm": 3.6588120460510254, |
|
"learning_rate": 7.059595694407982e-05, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.1786, |
|
"grad_norm": 2.536498785018921, |
|
"learning_rate": 7.050844491117528e-05, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.9825, |
|
"grad_norm": 3.442955255508423, |
|
"learning_rate": 7.042093287827076e-05, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 8.7311, |
|
"grad_norm": 2.923522710800171, |
|
"learning_rate": 7.033342084536624e-05, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 8.276, |
|
"grad_norm": 3.6458773612976074, |
|
"learning_rate": 7.024590881246172e-05, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 8.5271, |
|
"grad_norm": 3.239694356918335, |
|
"learning_rate": 7.01583967795572e-05, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.2755, |
|
"grad_norm": 3.39280366897583, |
|
"learning_rate": 7.007088474665266e-05, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 9.2045, |
|
"grad_norm": 3.4279630184173584, |
|
"learning_rate": 6.998337271374814e-05, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.5647, |
|
"grad_norm": 2.416999578475952, |
|
"learning_rate": 6.989586068084362e-05, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.7283, |
|
"grad_norm": 2.8094992637634277, |
|
"learning_rate": 6.98083486479391e-05, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.6518, |
|
"grad_norm": 4.319655418395996, |
|
"learning_rate": 6.972083661503456e-05, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.2852, |
|
"grad_norm": 3.9317190647125244, |
|
"learning_rate": 6.963332458213004e-05, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.6298, |
|
"grad_norm": 4.8585405349731445, |
|
"learning_rate": 6.954581254922552e-05, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.2455, |
|
"grad_norm": 2.6159684658050537, |
|
"learning_rate": 6.9458300516321e-05, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.7473, |
|
"grad_norm": 2.344099521636963, |
|
"learning_rate": 6.937078848341647e-05, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 7.9803, |
|
"grad_norm": 3.1866703033447266, |
|
"learning_rate": 6.928327645051194e-05, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.79, |
|
"grad_norm": 3.943319320678711, |
|
"learning_rate": 6.919576441760742e-05, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.8112, |
|
"grad_norm": 2.919020891189575, |
|
"learning_rate": 6.91082523847029e-05, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.5764, |
|
"grad_norm": 3.47027325630188, |
|
"learning_rate": 6.902074035179837e-05, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 9.4408, |
|
"grad_norm": 3.2260677814483643, |
|
"learning_rate": 6.893322831889385e-05, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.9655, |
|
"grad_norm": 3.2517478466033936, |
|
"learning_rate": 6.884571628598933e-05, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.8759, |
|
"grad_norm": 4.705760478973389, |
|
"learning_rate": 6.87582042530848e-05, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.8426, |
|
"grad_norm": 2.7460803985595703, |
|
"learning_rate": 6.867069222018028e-05, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.6733, |
|
"grad_norm": 3.944464921951294, |
|
"learning_rate": 6.858318018727575e-05, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.56, |
|
"grad_norm": 3.393721342086792, |
|
"learning_rate": 6.849566815437123e-05, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 7.8694, |
|
"grad_norm": 2.579340696334839, |
|
"learning_rate": 6.84081561214667e-05, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.2998, |
|
"grad_norm": 3.6678457260131836, |
|
"learning_rate": 6.832064408856219e-05, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.5947, |
|
"grad_norm": 3.218284845352173, |
|
"learning_rate": 6.823313205565765e-05, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.5014, |
|
"grad_norm": 3.5185766220092773, |
|
"learning_rate": 6.814562002275313e-05, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 9.1655, |
|
"grad_norm": 3.5601882934570312, |
|
"learning_rate": 6.805810798984861e-05, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.6889, |
|
"grad_norm": 3.317361354827881, |
|
"learning_rate": 6.797059595694409e-05, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.9274, |
|
"grad_norm": 3.271773338317871, |
|
"learning_rate": 6.788308392403955e-05, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.733, |
|
"grad_norm": 3.0022764205932617, |
|
"learning_rate": 6.779557189113503e-05, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 8.5665, |
|
"grad_norm": 3.5991992950439453, |
|
"learning_rate": 6.770805985823051e-05, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 8.3149, |
|
"grad_norm": 3.060124158859253, |
|
"learning_rate": 6.762054782532599e-05, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 7.9419, |
|
"grad_norm": 3.116497278213501, |
|
"learning_rate": 6.753303579242147e-05, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 8.456, |
|
"grad_norm": 3.201129198074341, |
|
"learning_rate": 6.744552375951693e-05, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.5843, |
|
"grad_norm": 3.7871932983398438, |
|
"learning_rate": 6.735801172661241e-05, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.1042, |
|
"grad_norm": 3.8025078773498535, |
|
"learning_rate": 6.727049969370789e-05, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 9.0008, |
|
"grad_norm": 2.9040756225585938, |
|
"learning_rate": 6.718298766080337e-05, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 7.971, |
|
"grad_norm": 5.227065086364746, |
|
"learning_rate": 6.709547562789884e-05, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 9.1628, |
|
"grad_norm": 2.822517156600952, |
|
"learning_rate": 6.700796359499431e-05, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 7.7244, |
|
"grad_norm": 2.9904367923736572, |
|
"learning_rate": 6.69204515620898e-05, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.0893, |
|
"grad_norm": 4.167274475097656, |
|
"learning_rate": 6.683293952918527e-05, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.3331, |
|
"grad_norm": 3.8134043216705322, |
|
"learning_rate": 6.674542749628074e-05, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 7.8118, |
|
"grad_norm": 3.0692172050476074, |
|
"learning_rate": 6.665791546337622e-05, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.4811, |
|
"grad_norm": 4.14231014251709, |
|
"learning_rate": 6.65704034304717e-05, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.0494, |
|
"grad_norm": 3.4583489894866943, |
|
"learning_rate": 6.648289139756717e-05, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.2668, |
|
"grad_norm": 3.468843460083008, |
|
"learning_rate": 6.639537936466265e-05, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.384, |
|
"grad_norm": 3.0200271606445312, |
|
"learning_rate": 6.630786733175812e-05, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.5672, |
|
"grad_norm": 3.9895946979522705, |
|
"learning_rate": 6.62203552988536e-05, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.4798, |
|
"grad_norm": 2.92266583442688, |
|
"learning_rate": 6.613284326594908e-05, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.749, |
|
"grad_norm": 3.8905258178710938, |
|
"learning_rate": 6.604533123304456e-05, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 7.8761, |
|
"grad_norm": 3.545311212539673, |
|
"learning_rate": 6.595781920014002e-05, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.3273, |
|
"grad_norm": 2.925837516784668, |
|
"learning_rate": 6.58703071672355e-05, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.3426, |
|
"grad_norm": 3.527435064315796, |
|
"learning_rate": 6.578279513433098e-05, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 7.5962, |
|
"grad_norm": 2.926382064819336, |
|
"learning_rate": 6.569528310142646e-05, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.2975, |
|
"grad_norm": 3.4969446659088135, |
|
"learning_rate": 6.560777106852192e-05, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.4417, |
|
"grad_norm": 3.466707229614258, |
|
"learning_rate": 6.55202590356174e-05, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.1161, |
|
"grad_norm": 4.119028091430664, |
|
"learning_rate": 6.543274700271288e-05, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.0248, |
|
"grad_norm": 3.2728042602539062, |
|
"learning_rate": 6.534523496980836e-05, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.0926, |
|
"grad_norm": 2.8251736164093018, |
|
"learning_rate": 6.525772293690382e-05, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.6109, |
|
"grad_norm": 3.521144151687622, |
|
"learning_rate": 6.51702109039993e-05, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.6268, |
|
"grad_norm": 3.15901780128479, |
|
"learning_rate": 6.508269887109478e-05, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.2584, |
|
"grad_norm": 3.5901992321014404, |
|
"learning_rate": 6.499518683819026e-05, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.5285, |
|
"grad_norm": 4.662459850311279, |
|
"learning_rate": 6.490767480528574e-05, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 7.8559, |
|
"grad_norm": 2.72666597366333, |
|
"learning_rate": 6.48201627723812e-05, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.145, |
|
"grad_norm": 3.6170144081115723, |
|
"learning_rate": 6.473265073947668e-05, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 7.838, |
|
"grad_norm": 2.9118199348449707, |
|
"learning_rate": 6.464513870657216e-05, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.4171, |
|
"grad_norm": 3.7052972316741943, |
|
"learning_rate": 6.455762667366764e-05, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.2865, |
|
"grad_norm": 4.498712062835693, |
|
"learning_rate": 6.447011464076311e-05, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.6456, |
|
"grad_norm": 3.1900229454040527, |
|
"learning_rate": 6.438260260785859e-05, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.1772, |
|
"grad_norm": 4.92230224609375, |
|
"learning_rate": 6.429509057495407e-05, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 9.221, |
|
"grad_norm": 3.758399724960327, |
|
"learning_rate": 6.420757854204954e-05, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.8449, |
|
"grad_norm": 3.110145092010498, |
|
"learning_rate": 6.412006650914501e-05, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.5491, |
|
"grad_norm": 3.1985270977020264, |
|
"learning_rate": 6.403255447624049e-05, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.0487, |
|
"grad_norm": 4.918299674987793, |
|
"learning_rate": 6.394504244333595e-05, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.6751, |
|
"grad_norm": 3.328449010848999, |
|
"learning_rate": 6.385753041043143e-05, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.0822, |
|
"grad_norm": 2.8385417461395264, |
|
"learning_rate": 6.377001837752691e-05, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.0838, |
|
"grad_norm": 3.5397825241088867, |
|
"learning_rate": 6.368250634462238e-05, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.3657, |
|
"grad_norm": 3.8638100624084473, |
|
"learning_rate": 6.359499431171786e-05, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.5573, |
|
"grad_norm": 3.129281759262085, |
|
"learning_rate": 6.350748227881333e-05, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.424, |
|
"grad_norm": 4.496127605438232, |
|
"learning_rate": 6.341997024590881e-05, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.4377, |
|
"grad_norm": 5.132551670074463, |
|
"learning_rate": 6.333245821300429e-05, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.1084, |
|
"grad_norm": 2.994011402130127, |
|
"learning_rate": 6.324494618009976e-05, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 8.0609, |
|
"grad_norm": 3.976611375808716, |
|
"learning_rate": 6.315743414719524e-05, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.0137, |
|
"grad_norm": 4.869803428649902, |
|
"learning_rate": 6.306992211429072e-05, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.2885, |
|
"grad_norm": 3.4231982231140137, |
|
"learning_rate": 6.29824100813862e-05, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.2444, |
|
"grad_norm": 5.1861252784729, |
|
"learning_rate": 6.289489804848166e-05, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 7.7026, |
|
"grad_norm": 4.288048267364502, |
|
"learning_rate": 6.280738601557714e-05, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 7.9181, |
|
"grad_norm": 3.64320969581604, |
|
"learning_rate": 6.271987398267262e-05, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.2069, |
|
"grad_norm": 4.117647647857666, |
|
"learning_rate": 6.26323619497681e-05, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.505, |
|
"grad_norm": 3.528850793838501, |
|
"learning_rate": 6.254484991686356e-05, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.1734, |
|
"grad_norm": 2.9336414337158203, |
|
"learning_rate": 6.245733788395904e-05, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 7.9808, |
|
"grad_norm": 4.607523441314697, |
|
"learning_rate": 6.236982585105452e-05, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.2398, |
|
"grad_norm": 5.0170464515686035, |
|
"learning_rate": 6.228231381815e-05, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.3433, |
|
"grad_norm": 3.7535080909729004, |
|
"learning_rate": 6.219480178524548e-05, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 7.8144, |
|
"grad_norm": 3.409480333328247, |
|
"learning_rate": 6.210728975234094e-05, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.5735, |
|
"grad_norm": 3.7058238983154297, |
|
"learning_rate": 6.201977771943642e-05, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.4169, |
|
"grad_norm": 3.1820621490478516, |
|
"learning_rate": 6.19322656865319e-05, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 7.9047, |
|
"grad_norm": 2.8989903926849365, |
|
"learning_rate": 6.184475365362738e-05, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.1377, |
|
"grad_norm": 3.903512477874756, |
|
"learning_rate": 6.175724162072284e-05, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.6631, |
|
"grad_norm": 2.916041374206543, |
|
"learning_rate": 6.166972958781832e-05, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.2377, |
|
"grad_norm": 5.932418346405029, |
|
"learning_rate": 6.15822175549138e-05, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.9961, |
|
"grad_norm": 4.3133368492126465, |
|
"learning_rate": 6.149470552200928e-05, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.0339, |
|
"grad_norm": 3.1874802112579346, |
|
"learning_rate": 6.140719348910475e-05, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 7.3347, |
|
"grad_norm": 3.9368977546691895, |
|
"learning_rate": 6.131968145620023e-05, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 7.723, |
|
"grad_norm": 3.000967025756836, |
|
"learning_rate": 6.12321694232957e-05, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.3258, |
|
"grad_norm": 3.96174693107605, |
|
"learning_rate": 6.114465739039118e-05, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.4037, |
|
"grad_norm": 3.6735053062438965, |
|
"learning_rate": 6.105714535748666e-05, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.2432, |
|
"grad_norm": 4.765099048614502, |
|
"learning_rate": 6.0969633324582134e-05, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.232, |
|
"grad_norm": 3.118907928466797, |
|
"learning_rate": 6.0882121291677607e-05, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.4367, |
|
"grad_norm": 2.7283709049224854, |
|
"learning_rate": 6.0794609258773085e-05, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.062, |
|
"grad_norm": 2.874713182449341, |
|
"learning_rate": 6.070709722586856e-05, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.2398, |
|
"grad_norm": 3.3554372787475586, |
|
"learning_rate": 6.0619585192964036e-05, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.0336, |
|
"grad_norm": 3.0796210765838623, |
|
"learning_rate": 6.053207316005951e-05, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.6318, |
|
"grad_norm": 4.566615581512451, |
|
"learning_rate": 6.044456112715499e-05, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 7.8031, |
|
"grad_norm": 2.9705634117126465, |
|
"learning_rate": 6.035704909425046e-05, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.2148, |
|
"grad_norm": 2.8812005519866943, |
|
"learning_rate": 6.026953706134594e-05, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 7.8284, |
|
"grad_norm": 3.389988899230957, |
|
"learning_rate": 6.018202502844141e-05, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 7.9364, |
|
"grad_norm": 4.094693660736084, |
|
"learning_rate": 6.009451299553689e-05, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.1086, |
|
"grad_norm": 2.9179611206054688, |
|
"learning_rate": 6.000700096263236e-05, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.5533, |
|
"grad_norm": 3.0995657444000244, |
|
"learning_rate": 5.991948892972784e-05, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.1136, |
|
"grad_norm": 4.079578399658203, |
|
"learning_rate": 5.983197689682332e-05, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.379, |
|
"grad_norm": 3.150442600250244, |
|
"learning_rate": 5.974446486391879e-05, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.6851, |
|
"grad_norm": 3.904902458190918, |
|
"learning_rate": 5.965695283101427e-05, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 8.5392, |
|
"grad_norm": 2.8424036502838135, |
|
"learning_rate": 5.956944079810974e-05, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.9675, |
|
"grad_norm": 5.174964904785156, |
|
"learning_rate": 5.948192876520522e-05, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 7.8097, |
|
"grad_norm": 3.6166417598724365, |
|
"learning_rate": 5.9394416732300694e-05, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 7.981, |
|
"grad_norm": 3.1801164150238037, |
|
"learning_rate": 5.930690469939617e-05, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.2149, |
|
"grad_norm": 3.975576400756836, |
|
"learning_rate": 5.9219392666491645e-05, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.5473, |
|
"grad_norm": 4.039759159088135, |
|
"learning_rate": 5.9131880633587123e-05, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.2659, |
|
"grad_norm": 4.9490861892700195, |
|
"learning_rate": 5.9044368600682596e-05, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.0904, |
|
"grad_norm": 4.2978715896606445, |
|
"learning_rate": 5.8956856567778074e-05, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.0685, |
|
"grad_norm": 3.037668466567993, |
|
"learning_rate": 5.8869344534873547e-05, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 7.7553, |
|
"grad_norm": 3.496307849884033, |
|
"learning_rate": 5.8781832501969025e-05, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.5136, |
|
"grad_norm": 3.175560712814331, |
|
"learning_rate": 5.8694320469064504e-05, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 7.8844, |
|
"grad_norm": 3.7230336666107178, |
|
"learning_rate": 5.8606808436159976e-05, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.2197, |
|
"grad_norm": 4.2161359786987305, |
|
"learning_rate": 5.8519296403255455e-05, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.2073, |
|
"grad_norm": 4.122830867767334, |
|
"learning_rate": 5.843178437035093e-05, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 7.9876, |
|
"grad_norm": 4.076393127441406, |
|
"learning_rate": 5.8344272337446406e-05, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 7.7404, |
|
"grad_norm": 4.162572860717773, |
|
"learning_rate": 5.825676030454188e-05, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 7.9991, |
|
"grad_norm": 3.4230401515960693, |
|
"learning_rate": 5.816924827163736e-05, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.0793, |
|
"grad_norm": 4.0326313972473145, |
|
"learning_rate": 5.808173623873283e-05, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 7.3234, |
|
"grad_norm": 2.669609308242798, |
|
"learning_rate": 5.799422420582831e-05, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.4489, |
|
"grad_norm": 3.926006555557251, |
|
"learning_rate": 5.790671217292378e-05, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.2047, |
|
"grad_norm": 2.7743630409240723, |
|
"learning_rate": 5.781920014001926e-05, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.6027, |
|
"grad_norm": 3.9714138507843018, |
|
"learning_rate": 5.773168810711473e-05, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.5097, |
|
"grad_norm": 3.775052070617676, |
|
"learning_rate": 5.764417607421021e-05, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.1056, |
|
"grad_norm": 4.158542633056641, |
|
"learning_rate": 5.755666404130568e-05, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 8.2198, |
|
"grad_norm": 3.647034168243408, |
|
"learning_rate": 5.746915200840116e-05, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.5833, |
|
"grad_norm": 3.3458187580108643, |
|
"learning_rate": 5.738163997549664e-05, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.8895, |
|
"grad_norm": 3.5432512760162354, |
|
"learning_rate": 5.729412794259211e-05, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.9663, |
|
"grad_norm": 3.1249189376831055, |
|
"learning_rate": 5.720661590968759e-05, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.6285, |
|
"grad_norm": 2.8004276752471924, |
|
"learning_rate": 5.7119103876783063e-05, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.2953, |
|
"grad_norm": 3.2479677200317383, |
|
"learning_rate": 5.703159184387854e-05, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.0079, |
|
"grad_norm": 3.8008508682250977, |
|
"learning_rate": 5.6944079810974014e-05, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 7.8035, |
|
"grad_norm": 3.2461721897125244, |
|
"learning_rate": 5.685656777806949e-05, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 8.6322, |
|
"grad_norm": 3.8512370586395264, |
|
"learning_rate": 5.6769055745164965e-05, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.7099, |
|
"grad_norm": 3.9859845638275146, |
|
"learning_rate": 5.6681543712260444e-05, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 7.9929, |
|
"grad_norm": 3.455918550491333, |
|
"learning_rate": 5.6594031679355916e-05, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 7.9348, |
|
"grad_norm": 3.744387626647949, |
|
"learning_rate": 5.6506519646451395e-05, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 7.9457, |
|
"grad_norm": 5.055604934692383, |
|
"learning_rate": 5.641900761354687e-05, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 8.2247, |
|
"grad_norm": 3.072326183319092, |
|
"learning_rate": 5.6331495580642346e-05, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 7.7124, |
|
"grad_norm": 4.150148868560791, |
|
"learning_rate": 5.624398354773782e-05, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 8.1994, |
|
"grad_norm": 5.2460503578186035, |
|
"learning_rate": 5.61564715148333e-05, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.1123, |
|
"grad_norm": 3.8343966007232666, |
|
"learning_rate": 5.6068959481928776e-05, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 7.9948, |
|
"grad_norm": 3.488602638244629, |
|
"learning_rate": 5.5981447449024235e-05, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 7.745, |
|
"grad_norm": 2.6132748126983643, |
|
"learning_rate": 5.5893935416119714e-05, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.4735, |
|
"grad_norm": 3.123828172683716, |
|
"learning_rate": 5.580642338321519e-05, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.5695, |
|
"grad_norm": 3.747915506362915, |
|
"learning_rate": 5.5718911350310665e-05, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 8.3462, |
|
"grad_norm": 4.172099590301514, |
|
"learning_rate": 5.5631399317406144e-05, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.639, |
|
"grad_norm": 3.172137498855591, |
|
"learning_rate": 5.5543887284501616e-05, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.9555, |
|
"grad_norm": 4.053969383239746, |
|
"learning_rate": 5.5456375251597095e-05, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.2866, |
|
"grad_norm": 3.186673164367676, |
|
"learning_rate": 5.536886321869257e-05, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7438, |
|
"grad_norm": 3.2737646102905273, |
|
"learning_rate": 5.5281351185788046e-05, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7122, |
|
"grad_norm": 3.1801161766052246, |
|
"learning_rate": 5.519383915288352e-05, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.6629, |
|
"grad_norm": 3.773719072341919, |
|
"learning_rate": 5.5106327119979e-05, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.2615, |
|
"grad_norm": 4.548736095428467, |
|
"learning_rate": 5.501881508707447e-05, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.0535, |
|
"grad_norm": 3.921649694442749, |
|
"learning_rate": 5.493130305416995e-05, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.4305, |
|
"grad_norm": 4.346540451049805, |
|
"learning_rate": 5.484379102126542e-05, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.423, |
|
"grad_norm": 4.634354114532471, |
|
"learning_rate": 5.47562789883609e-05, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 7.8966, |
|
"grad_norm": 3.5531675815582275, |
|
"learning_rate": 5.466876695545637e-05, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 7.7126, |
|
"grad_norm": 4.377911567687988, |
|
"learning_rate": 5.458125492255185e-05, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.2728, |
|
"grad_norm": 3.366030216217041, |
|
"learning_rate": 5.449374288964733e-05, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.1801, |
|
"grad_norm": 3.4603772163391113, |
|
"learning_rate": 5.44062308567428e-05, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.2143, |
|
"grad_norm": 4.528195381164551, |
|
"learning_rate": 5.431871882383828e-05, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 7.359, |
|
"grad_norm": 2.4803977012634277, |
|
"learning_rate": 5.423120679093375e-05, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.1361, |
|
"grad_norm": 4.201333999633789, |
|
"learning_rate": 5.414369475802923e-05, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 7.8974, |
|
"grad_norm": 4.272532939910889, |
|
"learning_rate": 5.40561827251247e-05, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 7.502, |
|
"grad_norm": 3.4006450176239014, |
|
"learning_rate": 5.396867069222018e-05, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 7.78, |
|
"grad_norm": 3.902611255645752, |
|
"learning_rate": 5.3881158659315654e-05, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.0953, |
|
"grad_norm": 2.6970345973968506, |
|
"learning_rate": 5.379364662641113e-05, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.7703, |
|
"grad_norm": 3.610957145690918, |
|
"learning_rate": 5.3706134593506605e-05, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 8.7352, |
|
"grad_norm": 4.159451961517334, |
|
"learning_rate": 5.3618622560602084e-05, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.5264, |
|
"grad_norm": 2.7696640491485596, |
|
"learning_rate": 5.3531110527697556e-05, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.776, |
|
"grad_norm": 5.263556003570557, |
|
"learning_rate": 5.3443598494793035e-05, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.4893, |
|
"grad_norm": 3.3409626483917236, |
|
"learning_rate": 5.3356086461888514e-05, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.9241, |
|
"grad_norm": 5.305122375488281, |
|
"learning_rate": 5.3268574428983986e-05, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.9385, |
|
"grad_norm": 4.231367588043213, |
|
"learning_rate": 5.3181062396079465e-05, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.1538, |
|
"grad_norm": 3.7227704524993896, |
|
"learning_rate": 5.309355036317494e-05, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.5085, |
|
"grad_norm": 3.6258912086486816, |
|
"learning_rate": 5.3006038330270416e-05, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.6825, |
|
"grad_norm": 3.3270792961120605, |
|
"learning_rate": 5.291852629736589e-05, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.8874, |
|
"grad_norm": 2.983099937438965, |
|
"learning_rate": 5.2831014264461367e-05, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.0616, |
|
"grad_norm": 3.8440752029418945, |
|
"learning_rate": 5.274350223155684e-05, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 7.7685, |
|
"grad_norm": 5.8492608070373535, |
|
"learning_rate": 5.265599019865232e-05, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 7.5809, |
|
"grad_norm": 3.308460235595703, |
|
"learning_rate": 5.256847816574779e-05, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 8.7381, |
|
"grad_norm": 3.017559766769409, |
|
"learning_rate": 5.248096613284327e-05, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 7.9422, |
|
"grad_norm": 4.227987766265869, |
|
"learning_rate": 5.239345409993874e-05, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 7.9202, |
|
"grad_norm": 3.1066997051239014, |
|
"learning_rate": 5.230594206703422e-05, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 7.7366, |
|
"grad_norm": 3.3069207668304443, |
|
"learning_rate": 5.221843003412969e-05, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.3042, |
|
"grad_norm": 3.065303087234497, |
|
"learning_rate": 5.213091800122517e-05, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.2732, |
|
"grad_norm": 3.6093387603759766, |
|
"learning_rate": 5.204340596832065e-05, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.1332, |
|
"grad_norm": 4.356596946716309, |
|
"learning_rate": 5.195589393541612e-05, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.9349, |
|
"grad_norm": 4.91728401184082, |
|
"learning_rate": 5.18683819025116e-05, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.0487, |
|
"grad_norm": 4.411836624145508, |
|
"learning_rate": 5.178086986960707e-05, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.7545, |
|
"grad_norm": 3.488790512084961, |
|
"learning_rate": 5.169335783670255e-05, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.7508, |
|
"grad_norm": 5.54533576965332, |
|
"learning_rate": 5.1605845803798024e-05, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.8403, |
|
"grad_norm": 2.8527212142944336, |
|
"learning_rate": 5.15183337708935e-05, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 8.1535, |
|
"grad_norm": 3.892737865447998, |
|
"learning_rate": 5.1430821737988975e-05, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.6217, |
|
"grad_norm": 3.9077818393707275, |
|
"learning_rate": 5.1343309705084454e-05, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.6306, |
|
"grad_norm": 3.9363648891448975, |
|
"learning_rate": 5.1255797672179926e-05, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 8.5425, |
|
"grad_norm": 4.692113399505615, |
|
"learning_rate": 5.1168285639275405e-05, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.7507, |
|
"grad_norm": 5.601973056793213, |
|
"learning_rate": 5.108077360637088e-05, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 7.8534, |
|
"grad_norm": 3.5403494834899902, |
|
"learning_rate": 5.0993261573466356e-05, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 7.9312, |
|
"grad_norm": 4.555025100708008, |
|
"learning_rate": 5.0905749540561834e-05, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 7.624, |
|
"grad_norm": 5.721600532531738, |
|
"learning_rate": 5.0818237507657307e-05, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 7.9349, |
|
"grad_norm": 3.4647514820098877, |
|
"learning_rate": 5.0730725474752785e-05, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 7.5857, |
|
"grad_norm": 3.362941026687622, |
|
"learning_rate": 5.064321344184826e-05, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.3246, |
|
"grad_norm": 5.352531433105469, |
|
"learning_rate": 5.0555701408943736e-05, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 7.8738, |
|
"grad_norm": 3.2162294387817383, |
|
"learning_rate": 5.046818937603921e-05, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.0784, |
|
"grad_norm": 3.607652187347412, |
|
"learning_rate": 5.038067734313469e-05, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.0385, |
|
"grad_norm": 3.6921122074127197, |
|
"learning_rate": 5.029316531023016e-05, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.9548, |
|
"grad_norm": 5.187925338745117, |
|
"learning_rate": 5.020565327732564e-05, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.9085, |
|
"grad_norm": 4.099059581756592, |
|
"learning_rate": 5.011814124442111e-05, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 7.7387, |
|
"grad_norm": 3.0415878295898438, |
|
"learning_rate": 5.003062921151659e-05, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.5766, |
|
"grad_norm": 4.777284622192383, |
|
"learning_rate": 4.994311717861206e-05, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 7.697, |
|
"grad_norm": 5.438363075256348, |
|
"learning_rate": 4.9855605145707534e-05, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.4595, |
|
"grad_norm": 5.054925441741943, |
|
"learning_rate": 4.976809311280301e-05, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.3592, |
|
"grad_norm": 2.9228146076202393, |
|
"learning_rate": 4.9680581079898485e-05, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.7911, |
|
"grad_norm": 4.529871940612793, |
|
"learning_rate": 4.9593069046993964e-05, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.0316, |
|
"grad_norm": 3.9995975494384766, |
|
"learning_rate": 4.9505557014089436e-05, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.5984, |
|
"grad_norm": 3.9212229251861572, |
|
"learning_rate": 4.9418044981184915e-05, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.1606, |
|
"grad_norm": 3.479395866394043, |
|
"learning_rate": 4.933053294828039e-05, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 7.7609, |
|
"grad_norm": 3.5287656784057617, |
|
"learning_rate": 4.9243020915375866e-05, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.2939, |
|
"grad_norm": 3.2169201374053955, |
|
"learning_rate": 4.915550888247134e-05, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.8161, |
|
"grad_norm": 4.046046733856201, |
|
"learning_rate": 4.906799684956682e-05, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 8.1367, |
|
"grad_norm": 3.9905033111572266, |
|
"learning_rate": 4.898048481666229e-05, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.522, |
|
"grad_norm": 3.0949547290802, |
|
"learning_rate": 4.889297278375777e-05, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.8009, |
|
"grad_norm": 3.2042038440704346, |
|
"learning_rate": 4.8805460750853247e-05, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.9891, |
|
"grad_norm": 3.542771100997925, |
|
"learning_rate": 4.871794871794872e-05, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 8.1601, |
|
"grad_norm": 4.720103740692139, |
|
"learning_rate": 4.86304366850442e-05, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.5776, |
|
"grad_norm": 3.65787672996521, |
|
"learning_rate": 4.854292465213967e-05, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.4047, |
|
"grad_norm": 3.9372549057006836, |
|
"learning_rate": 4.845541261923515e-05, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0766, |
|
"grad_norm": 3.362112045288086, |
|
"learning_rate": 4.836790058633062e-05, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 7.8371, |
|
"grad_norm": 5.547123908996582, |
|
"learning_rate": 4.82803885534261e-05, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0129, |
|
"grad_norm": 4.756041526794434, |
|
"learning_rate": 4.819287652052157e-05, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 7.8304, |
|
"grad_norm": 3.8089821338653564, |
|
"learning_rate": 4.810536448761705e-05, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.7565, |
|
"grad_norm": 3.8562700748443604, |
|
"learning_rate": 4.801785245471252e-05, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.4297, |
|
"grad_norm": 4.8232831954956055, |
|
"learning_rate": 4.7930340421808e-05, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.4436, |
|
"grad_norm": 4.951693058013916, |
|
"learning_rate": 4.7842828388903474e-05, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.9573, |
|
"grad_norm": 3.800071954727173, |
|
"learning_rate": 4.775531635599895e-05, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.4168, |
|
"grad_norm": 4.224662780761719, |
|
"learning_rate": 4.766780432309443e-05, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 8.4404, |
|
"grad_norm": 3.3358187675476074, |
|
"learning_rate": 4.7580292290189904e-05, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.4616, |
|
"grad_norm": 4.352634906768799, |
|
"learning_rate": 4.749278025728538e-05, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.8744, |
|
"grad_norm": 3.5693962574005127, |
|
"learning_rate": 4.7405268224380855e-05, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.5451, |
|
"grad_norm": 3.5086276531219482, |
|
"learning_rate": 4.7317756191476334e-05, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.4284, |
|
"grad_norm": 3.0168793201446533, |
|
"learning_rate": 4.7230244158571806e-05, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.7376, |
|
"grad_norm": 4.352570056915283, |
|
"learning_rate": 4.7142732125667285e-05, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.64, |
|
"grad_norm": 5.351820468902588, |
|
"learning_rate": 4.705522009276276e-05, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.9318, |
|
"grad_norm": 3.993790626525879, |
|
"learning_rate": 4.6967708059858236e-05, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.7602, |
|
"grad_norm": 3.1628670692443848, |
|
"learning_rate": 4.688019602695371e-05, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.2848, |
|
"grad_norm": 2.481705665588379, |
|
"learning_rate": 4.679268399404919e-05, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.8586, |
|
"grad_norm": 3.944296360015869, |
|
"learning_rate": 4.670517196114466e-05, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.7223, |
|
"grad_norm": 4.099398136138916, |
|
"learning_rate": 4.661765992824014e-05, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.6033, |
|
"grad_norm": 2.781362533569336, |
|
"learning_rate": 4.653014789533561e-05, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 8.4421, |
|
"grad_norm": 4.035131454467773, |
|
"learning_rate": 4.644263586243109e-05, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.6095, |
|
"grad_norm": 3.3464620113372803, |
|
"learning_rate": 4.635512382952657e-05, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.8892, |
|
"grad_norm": 4.8561553955078125, |
|
"learning_rate": 4.626761179662204e-05, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.7162, |
|
"grad_norm": 6.795714378356934, |
|
"learning_rate": 4.618009976371752e-05, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.7384, |
|
"grad_norm": 3.0965943336486816, |
|
"learning_rate": 4.609258773081299e-05, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.9766, |
|
"grad_norm": 3.1002793312072754, |
|
"learning_rate": 4.600507569790847e-05, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.4392, |
|
"grad_norm": 6.083471298217773, |
|
"learning_rate": 4.5917563665003935e-05, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.5822, |
|
"grad_norm": 4.11601448059082, |
|
"learning_rate": 4.5830051632099414e-05, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.5988, |
|
"grad_norm": 4.361574172973633, |
|
"learning_rate": 4.5742539599194886e-05, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 8.1273, |
|
"grad_norm": 4.6307549476623535, |
|
"learning_rate": 4.5655027566290365e-05, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 7.3601, |
|
"grad_norm": 3.4341373443603516, |
|
"learning_rate": 4.5567515533385844e-05, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 7.9766, |
|
"grad_norm": 3.7583069801330566, |
|
"learning_rate": 4.5480003500481316e-05, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.8755, |
|
"grad_norm": 3.212942123413086, |
|
"learning_rate": 4.5392491467576795e-05, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.8139, |
|
"grad_norm": 2.9877207279205322, |
|
"learning_rate": 4.530497943467227e-05, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.8379, |
|
"grad_norm": 4.133498191833496, |
|
"learning_rate": 4.5217467401767746e-05, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.9657, |
|
"grad_norm": 3.252624273300171, |
|
"learning_rate": 4.512995536886322e-05, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.7005, |
|
"grad_norm": 3.70926833152771, |
|
"learning_rate": 4.50424433359587e-05, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.6194, |
|
"grad_norm": 4.198193073272705, |
|
"learning_rate": 4.495493130305417e-05, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 8.1874, |
|
"grad_norm": 3.5660247802734375, |
|
"learning_rate": 4.486741927014965e-05, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 8.0731, |
|
"grad_norm": 3.6867547035217285, |
|
"learning_rate": 4.477990723724512e-05, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.4702, |
|
"grad_norm": 3.8409180641174316, |
|
"learning_rate": 4.46923952043406e-05, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.1333, |
|
"grad_norm": 3.7179150581359863, |
|
"learning_rate": 4.460488317143607e-05, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.4353, |
|
"grad_norm": 4.092810153961182, |
|
"learning_rate": 4.451737113853155e-05, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 8.1888, |
|
"grad_norm": 4.3642754554748535, |
|
"learning_rate": 4.442985910562702e-05, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 8.1823, |
|
"grad_norm": 3.4664993286132812, |
|
"learning_rate": 4.43423470727225e-05, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.6325, |
|
"grad_norm": 4.143255710601807, |
|
"learning_rate": 4.425483503981798e-05, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.9794, |
|
"grad_norm": 3.8068184852600098, |
|
"learning_rate": 4.416732300691345e-05, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.5482, |
|
"grad_norm": 3.6255953311920166, |
|
"learning_rate": 4.407981097400893e-05, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.1437, |
|
"grad_norm": 4.526164531707764, |
|
"learning_rate": 4.39922989411044e-05, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.3931, |
|
"grad_norm": 3.652649402618408, |
|
"learning_rate": 4.390478690819988e-05, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.3862, |
|
"grad_norm": 4.751399993896484, |
|
"learning_rate": 4.3817274875295354e-05, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.8723, |
|
"grad_norm": 3.011975049972534, |
|
"learning_rate": 4.372976284239083e-05, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 8.0483, |
|
"grad_norm": 4.407155513763428, |
|
"learning_rate": 4.3642250809486305e-05, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.6125, |
|
"grad_norm": 3.762749195098877, |
|
"learning_rate": 4.3554738776581784e-05, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.7699, |
|
"grad_norm": 5.391783714294434, |
|
"learning_rate": 4.3467226743677256e-05, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.6641, |
|
"grad_norm": 3.509794235229492, |
|
"learning_rate": 4.3379714710772735e-05, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 8.4195, |
|
"grad_norm": 4.34732723236084, |
|
"learning_rate": 4.329220267786821e-05, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.6044, |
|
"grad_norm": 4.418550491333008, |
|
"learning_rate": 4.3204690644963686e-05, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.8304, |
|
"grad_norm": 3.9914748668670654, |
|
"learning_rate": 4.3117178612059165e-05, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.4516, |
|
"grad_norm": 4.141488075256348, |
|
"learning_rate": 4.302966657915464e-05, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.7451, |
|
"grad_norm": 3.61734938621521, |
|
"learning_rate": 4.2942154546250116e-05, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 8.0466, |
|
"grad_norm": 3.956249475479126, |
|
"learning_rate": 4.285464251334559e-05, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.5964, |
|
"grad_norm": 3.214452028274536, |
|
"learning_rate": 4.276713048044107e-05, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.7055, |
|
"grad_norm": 3.8038113117218018, |
|
"learning_rate": 4.267961844753654e-05, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.9609, |
|
"grad_norm": 4.2961626052856445, |
|
"learning_rate": 4.259210641463202e-05, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.7958, |
|
"grad_norm": 2.900935649871826, |
|
"learning_rate": 4.250459438172749e-05, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.7953, |
|
"grad_norm": 3.369781970977783, |
|
"learning_rate": 4.241708234882297e-05, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.5888, |
|
"grad_norm": 6.093942642211914, |
|
"learning_rate": 4.232957031591844e-05, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 8.1158, |
|
"grad_norm": 4.063805103302002, |
|
"learning_rate": 4.224205828301392e-05, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.6127, |
|
"grad_norm": 3.981023073196411, |
|
"learning_rate": 4.215454625010939e-05, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.3727, |
|
"grad_norm": 3.273742437362671, |
|
"learning_rate": 4.206703421720487e-05, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.7617, |
|
"grad_norm": 4.247544765472412, |
|
"learning_rate": 4.197952218430034e-05, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 8.2201, |
|
"grad_norm": 5.181518077850342, |
|
"learning_rate": 4.1892010151395815e-05, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 8.0504, |
|
"grad_norm": 3.4994397163391113, |
|
"learning_rate": 4.1804498118491294e-05, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 8.2552, |
|
"grad_norm": 4.784666061401367, |
|
"learning_rate": 4.1716986085586766e-05, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.746, |
|
"grad_norm": 4.549380779266357, |
|
"learning_rate": 4.1629474052682245e-05, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.6646, |
|
"grad_norm": 3.586853504180908, |
|
"learning_rate": 4.154196201977772e-05, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.454, |
|
"grad_norm": 4.0881500244140625, |
|
"learning_rate": 4.1454449986873196e-05, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.6951, |
|
"grad_norm": 3.7725558280944824, |
|
"learning_rate": 4.136693795396867e-05, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.7406, |
|
"grad_norm": 4.566652297973633, |
|
"learning_rate": 4.127942592106415e-05, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 8.0458, |
|
"grad_norm": 4.562892913818359, |
|
"learning_rate": 4.119191388815962e-05, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 8.314, |
|
"grad_norm": 3.1217896938323975, |
|
"learning_rate": 4.11044018552551e-05, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.931, |
|
"grad_norm": 3.4693222045898438, |
|
"learning_rate": 4.101688982235058e-05, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.6126, |
|
"grad_norm": 3.7778282165527344, |
|
"learning_rate": 4.092937778944605e-05, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.4983, |
|
"grad_norm": 6.494439125061035, |
|
"learning_rate": 4.084186575654153e-05, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.801, |
|
"grad_norm": 3.602264165878296, |
|
"learning_rate": 4.0754353723637e-05, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.405, |
|
"grad_norm": 4.2882795333862305, |
|
"learning_rate": 4.066684169073248e-05, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.5115, |
|
"grad_norm": 4.935623645782471, |
|
"learning_rate": 4.057932965782795e-05, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.5315, |
|
"grad_norm": 5.16713809967041, |
|
"learning_rate": 4.049181762492343e-05, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.8313, |
|
"grad_norm": 3.440279960632324, |
|
"learning_rate": 4.04043055920189e-05, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.7669, |
|
"grad_norm": 4.02671480178833, |
|
"learning_rate": 4.031679355911438e-05, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.6988, |
|
"grad_norm": 5.945104598999023, |
|
"learning_rate": 4.022928152620985e-05, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.2314, |
|
"grad_norm": 4.557019233703613, |
|
"learning_rate": 4.014176949330533e-05, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.5578, |
|
"grad_norm": 3.9793171882629395, |
|
"learning_rate": 4.0054257460400804e-05, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.1794, |
|
"grad_norm": 3.178558349609375, |
|
"learning_rate": 3.996674542749628e-05, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.842, |
|
"grad_norm": 4.609609127044678, |
|
"learning_rate": 3.987923339459176e-05, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.4484, |
|
"grad_norm": 3.5374889373779297, |
|
"learning_rate": 3.9791721361687234e-05, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.2917, |
|
"grad_norm": 4.768485069274902, |
|
"learning_rate": 3.970420932878271e-05, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.4525, |
|
"grad_norm": 3.342456102371216, |
|
"learning_rate": 3.9616697295878185e-05, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.6611, |
|
"grad_norm": 4.111917018890381, |
|
"learning_rate": 3.9529185262973664e-05, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.9292, |
|
"grad_norm": 5.008895397186279, |
|
"learning_rate": 3.9441673230069136e-05, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.9246, |
|
"grad_norm": 4.372122287750244, |
|
"learning_rate": 3.9354161197164615e-05, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.6795, |
|
"grad_norm": 3.406059503555298, |
|
"learning_rate": 3.926664916426009e-05, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.5926, |
|
"grad_norm": 4.412403583526611, |
|
"learning_rate": 3.9179137131355566e-05, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 8.0002, |
|
"grad_norm": 4.203276634216309, |
|
"learning_rate": 3.909162509845104e-05, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.8556, |
|
"grad_norm": 3.7347216606140137, |
|
"learning_rate": 3.900411306554652e-05, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 8.2028, |
|
"grad_norm": 4.552736282348633, |
|
"learning_rate": 3.891660103264199e-05, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 8.1277, |
|
"grad_norm": 4.882839679718018, |
|
"learning_rate": 3.882908899973747e-05, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.74, |
|
"grad_norm": 4.639001846313477, |
|
"learning_rate": 3.875032817012339e-05, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 8.0942, |
|
"grad_norm": 5.097876071929932, |
|
"learning_rate": 3.866281613721887e-05, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.9073, |
|
"grad_norm": 3.200108051300049, |
|
"learning_rate": 3.8575304104314344e-05, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.5499, |
|
"grad_norm": 3.8395094871520996, |
|
"learning_rate": 3.848779207140982e-05, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.5298, |
|
"grad_norm": 3.6033782958984375, |
|
"learning_rate": 3.8400280038505295e-05, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.3608, |
|
"grad_norm": 4.341715335845947, |
|
"learning_rate": 3.8312768005600774e-05, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 8.2837, |
|
"grad_norm": 2.746906042098999, |
|
"learning_rate": 3.822525597269625e-05, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.0553, |
|
"grad_norm": 4.10823392868042, |
|
"learning_rate": 3.8137743939791725e-05, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.4852, |
|
"grad_norm": 3.21799635887146, |
|
"learning_rate": 3.8050231906887204e-05, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.4002, |
|
"grad_norm": 4.537161827087402, |
|
"learning_rate": 3.796271987398267e-05, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.4377, |
|
"grad_norm": 4.020664691925049, |
|
"learning_rate": 3.787520784107815e-05, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.6523, |
|
"grad_norm": 3.1800293922424316, |
|
"learning_rate": 3.778769580817362e-05, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.4824, |
|
"grad_norm": 3.2757511138916016, |
|
"learning_rate": 3.77001837752691e-05, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.8269, |
|
"grad_norm": 3.6784262657165527, |
|
"learning_rate": 3.761267174236457e-05, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.9757, |
|
"grad_norm": 3.4948902130126953, |
|
"learning_rate": 3.752515970946005e-05, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.8251, |
|
"grad_norm": 5.0971598625183105, |
|
"learning_rate": 3.743764767655553e-05, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.7561, |
|
"grad_norm": 4.533854961395264, |
|
"learning_rate": 3.7350135643651e-05, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.8986, |
|
"grad_norm": 4.550451278686523, |
|
"learning_rate": 3.726262361074648e-05, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.2438, |
|
"grad_norm": 3.8077099323272705, |
|
"learning_rate": 3.717511157784195e-05, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.7242, |
|
"grad_norm": 5.2727203369140625, |
|
"learning_rate": 3.708759954493743e-05, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.8602, |
|
"grad_norm": 2.9006500244140625, |
|
"learning_rate": 3.7000087512032903e-05, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.0401, |
|
"grad_norm": 4.919744491577148, |
|
"learning_rate": 3.691257547912838e-05, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.5799, |
|
"grad_norm": 3.297295093536377, |
|
"learning_rate": 3.6825063446223854e-05, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.355, |
|
"grad_norm": 2.9851813316345215, |
|
"learning_rate": 3.673755141331933e-05, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.715, |
|
"grad_norm": 3.619997262954712, |
|
"learning_rate": 3.6650039380414805e-05, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.8093, |
|
"grad_norm": 4.266133785247803, |
|
"learning_rate": 3.6562527347510284e-05, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.5723, |
|
"grad_norm": 3.513849973678589, |
|
"learning_rate": 3.6475015314605756e-05, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.006, |
|
"grad_norm": 3.6736350059509277, |
|
"learning_rate": 3.6387503281701235e-05, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.8925, |
|
"grad_norm": 3.4943020343780518, |
|
"learning_rate": 3.629999124879671e-05, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.3886, |
|
"grad_norm": 5.898230075836182, |
|
"learning_rate": 3.6212479215892186e-05, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.9521, |
|
"grad_norm": 3.2569427490234375, |
|
"learning_rate": 3.6124967182987665e-05, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.1573, |
|
"grad_norm": 6.18344259262085, |
|
"learning_rate": 3.603745515008314e-05, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.3595, |
|
"grad_norm": 6.704586982727051, |
|
"learning_rate": 3.5949943117178616e-05, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 8.1131, |
|
"grad_norm": 3.768490791320801, |
|
"learning_rate": 3.586243108427409e-05, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.8278, |
|
"grad_norm": 4.432671070098877, |
|
"learning_rate": 3.577491905136957e-05, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.4794, |
|
"grad_norm": 3.835556745529175, |
|
"learning_rate": 3.568740701846504e-05, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.5471, |
|
"grad_norm": 5.500497817993164, |
|
"learning_rate": 3.559989498556052e-05, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.9875, |
|
"grad_norm": 4.727583408355713, |
|
"learning_rate": 3.551238295265599e-05, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 6.9965, |
|
"grad_norm": 5.54524040222168, |
|
"learning_rate": 3.542487091975147e-05, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.6091, |
|
"grad_norm": 3.945673942565918, |
|
"learning_rate": 3.533735888684694e-05, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.6881, |
|
"grad_norm": 3.220522880554199, |
|
"learning_rate": 3.524984685394242e-05, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.5224, |
|
"grad_norm": 5.061761856079102, |
|
"learning_rate": 3.516233482103789e-05, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.2034, |
|
"grad_norm": 4.419524192810059, |
|
"learning_rate": 3.507482278813337e-05, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.8167, |
|
"grad_norm": 4.390359878540039, |
|
"learning_rate": 3.498731075522885e-05, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.9913, |
|
"grad_norm": 3.729773998260498, |
|
"learning_rate": 3.489979872232432e-05, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.6947, |
|
"grad_norm": 4.854176044464111, |
|
"learning_rate": 3.48122866894198e-05, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.7003, |
|
"grad_norm": 3.3899290561676025, |
|
"learning_rate": 3.472477465651527e-05, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.6489, |
|
"grad_norm": 4.47396993637085, |
|
"learning_rate": 3.463726262361075e-05, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.8908, |
|
"grad_norm": 3.3266396522521973, |
|
"learning_rate": 3.4549750590706224e-05, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.4745, |
|
"grad_norm": 4.091291904449463, |
|
"learning_rate": 3.44622385578017e-05, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.5307, |
|
"grad_norm": 7.771108627319336, |
|
"learning_rate": 3.4374726524897175e-05, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.7252, |
|
"grad_norm": 4.1433305740356445, |
|
"learning_rate": 3.4287214491992654e-05, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 8.1189, |
|
"grad_norm": 3.5036942958831787, |
|
"learning_rate": 3.4199702459088126e-05, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.6836, |
|
"grad_norm": 4.437150478363037, |
|
"learning_rate": 3.4112190426183605e-05, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.8543, |
|
"grad_norm": 6.440913200378418, |
|
"learning_rate": 3.402467839327908e-05, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.8673, |
|
"grad_norm": 4.657886981964111, |
|
"learning_rate": 3.393716636037455e-05, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 8.4281, |
|
"grad_norm": 4.122070789337158, |
|
"learning_rate": 3.384965432747003e-05, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.1948, |
|
"grad_norm": 3.2325737476348877, |
|
"learning_rate": 3.37621422945655e-05, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.3229, |
|
"grad_norm": 3.874630928039551, |
|
"learning_rate": 3.367463026166098e-05, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.5072, |
|
"grad_norm": 4.308450222015381, |
|
"learning_rate": 3.358711822875645e-05, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.5892, |
|
"grad_norm": 3.9709150791168213, |
|
"learning_rate": 3.349960619585193e-05, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.9792, |
|
"grad_norm": 6.5298919677734375, |
|
"learning_rate": 3.34120941629474e-05, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.688, |
|
"grad_norm": 4.508563041687012, |
|
"learning_rate": 3.332458213004288e-05, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.1915, |
|
"grad_norm": 3.5211637020111084, |
|
"learning_rate": 3.3237070097138354e-05, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.4623, |
|
"grad_norm": 4.973934173583984, |
|
"learning_rate": 3.314955806423383e-05, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.4097, |
|
"grad_norm": 4.810267448425293, |
|
"learning_rate": 3.3062046031329305e-05, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.5439, |
|
"grad_norm": 3.942003011703491, |
|
"learning_rate": 3.2974533998424783e-05, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.2904, |
|
"grad_norm": 3.7207398414611816, |
|
"learning_rate": 3.288702196552026e-05, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.0411, |
|
"grad_norm": 3.197200298309326, |
|
"learning_rate": 3.2799509932615734e-05, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 6.7708, |
|
"grad_norm": 5.261172294616699, |
|
"learning_rate": 3.2711997899711213e-05, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.2953, |
|
"grad_norm": 7.287022590637207, |
|
"learning_rate": 3.2624485866806686e-05, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.7778, |
|
"grad_norm": 3.6490862369537354, |
|
"learning_rate": 3.2536973833902164e-05, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.0521, |
|
"grad_norm": 3.474090337753296, |
|
"learning_rate": 3.2449461800997637e-05, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.6243, |
|
"grad_norm": 4.992802143096924, |
|
"learning_rate": 3.2361949768093115e-05, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.7397, |
|
"grad_norm": 4.16194486618042, |
|
"learning_rate": 3.227443773518859e-05, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.8743, |
|
"grad_norm": 4.265628814697266, |
|
"learning_rate": 3.2186925702284066e-05, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.5843, |
|
"grad_norm": 4.442827224731445, |
|
"learning_rate": 3.209941366937954e-05, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.1913, |
|
"grad_norm": 3.7389514446258545, |
|
"learning_rate": 3.201190163647502e-05, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.4241, |
|
"grad_norm": 4.544101238250732, |
|
"learning_rate": 3.192438960357049e-05, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.4453, |
|
"grad_norm": 3.6654653549194336, |
|
"learning_rate": 3.183687757066597e-05, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.4288, |
|
"grad_norm": 3.525256872177124, |
|
"learning_rate": 3.174936553776145e-05, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.387, |
|
"grad_norm": 4.041418075561523, |
|
"learning_rate": 3.166185350485692e-05, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.4435, |
|
"grad_norm": 4.415677547454834, |
|
"learning_rate": 3.15743414719524e-05, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.6321, |
|
"grad_norm": 3.649733066558838, |
|
"learning_rate": 3.148682943904787e-05, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.1816, |
|
"grad_norm": 4.361470699310303, |
|
"learning_rate": 3.139931740614335e-05, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.8665, |
|
"grad_norm": 2.8240556716918945, |
|
"learning_rate": 3.131180537323882e-05, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 8.0303, |
|
"grad_norm": 6.444936275482178, |
|
"learning_rate": 3.12242933403343e-05, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.834, |
|
"grad_norm": 4.267172813415527, |
|
"learning_rate": 3.113678130742977e-05, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.5162, |
|
"grad_norm": 4.9462480545043945, |
|
"learning_rate": 3.104926927452525e-05, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.3831, |
|
"grad_norm": 3.944603204727173, |
|
"learning_rate": 3.0961757241620723e-05, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.0951, |
|
"grad_norm": 4.1821608543396, |
|
"learning_rate": 3.08742452087162e-05, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.4743, |
|
"grad_norm": 4.054866790771484, |
|
"learning_rate": 3.0786733175811675e-05, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.4355, |
|
"grad_norm": 4.87803316116333, |
|
"learning_rate": 3.0699221142907153e-05, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.8149, |
|
"grad_norm": 4.6143388748168945, |
|
"learning_rate": 3.0611709110002626e-05, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.2776, |
|
"grad_norm": 3.6637542247772217, |
|
"learning_rate": 3.0524197077098104e-05, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.5867, |
|
"grad_norm": 4.739266872406006, |
|
"learning_rate": 3.043668504419358e-05, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 8.031, |
|
"grad_norm": 4.118218898773193, |
|
"learning_rate": 3.0349173011289055e-05, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.5086, |
|
"grad_norm": 3.7304162979125977, |
|
"learning_rate": 3.026166097838453e-05, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.0712, |
|
"grad_norm": 3.3575172424316406, |
|
"learning_rate": 3.0174148945480006e-05, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.8495, |
|
"grad_norm": 3.6715874671936035, |
|
"learning_rate": 3.0086636912575482e-05, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.8997, |
|
"grad_norm": 3.8344626426696777, |
|
"learning_rate": 2.9999124879670954e-05, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.6391, |
|
"grad_norm": 5.086608409881592, |
|
"learning_rate": 2.991161284676643e-05, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.6541, |
|
"grad_norm": 5.020079135894775, |
|
"learning_rate": 2.9824100813861905e-05, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.9148, |
|
"grad_norm": 4.90994119644165, |
|
"learning_rate": 2.973658878095738e-05, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.3306, |
|
"grad_norm": 7.108256816864014, |
|
"learning_rate": 2.9649076748052856e-05, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.4177, |
|
"grad_norm": 3.922966480255127, |
|
"learning_rate": 2.956156471514833e-05, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.6952, |
|
"grad_norm": 4.077629566192627, |
|
"learning_rate": 2.9474052682243807e-05, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 8.2089, |
|
"grad_norm": 3.003819227218628, |
|
"learning_rate": 2.9386540649339283e-05, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 6.8683, |
|
"grad_norm": 3.8509228229522705, |
|
"learning_rate": 2.9299028616434758e-05, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.1152, |
|
"grad_norm": 3.316972017288208, |
|
"learning_rate": 2.9211516583530234e-05, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.7425, |
|
"grad_norm": 5.465259552001953, |
|
"learning_rate": 2.9124004550625713e-05, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.3628, |
|
"grad_norm": 3.923509120941162, |
|
"learning_rate": 2.9036492517721188e-05, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.382, |
|
"grad_norm": 4.779471397399902, |
|
"learning_rate": 2.8948980484816664e-05, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.4485, |
|
"grad_norm": 6.00252628326416, |
|
"learning_rate": 2.886146845191214e-05, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.4238, |
|
"grad_norm": 4.734460353851318, |
|
"learning_rate": 2.8773956419007615e-05, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.4597, |
|
"grad_norm": 4.662705898284912, |
|
"learning_rate": 2.868644438610309e-05, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.7309, |
|
"grad_norm": 3.3174445629119873, |
|
"learning_rate": 2.8598932353198566e-05, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.7715, |
|
"grad_norm": 3.2781224250793457, |
|
"learning_rate": 2.851142032029404e-05, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.6348, |
|
"grad_norm": 5.909160137176514, |
|
"learning_rate": 2.8423908287389517e-05, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.2795, |
|
"grad_norm": 4.939976215362549, |
|
"learning_rate": 2.8336396254484992e-05, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.9346, |
|
"grad_norm": 4.42500114440918, |
|
"learning_rate": 2.8248884221580468e-05, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.4785, |
|
"grad_norm": 3.704190731048584, |
|
"learning_rate": 2.8161372188675943e-05, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.456, |
|
"grad_norm": 3.73481822013855, |
|
"learning_rate": 2.807386015577142e-05, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.4728, |
|
"grad_norm": 4.051381587982178, |
|
"learning_rate": 2.7986348122866894e-05, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.6646, |
|
"grad_norm": 3.674975633621216, |
|
"learning_rate": 2.7898836089962373e-05, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.4139, |
|
"grad_norm": 4.6207709312438965, |
|
"learning_rate": 2.781132405705785e-05, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.3962, |
|
"grad_norm": 3.7129499912261963, |
|
"learning_rate": 2.7723812024153324e-05, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.423, |
|
"grad_norm": 4.65708589553833, |
|
"learning_rate": 2.76362999912488e-05, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.577, |
|
"grad_norm": 5.05981969833374, |
|
"learning_rate": 2.7548787958344275e-05, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.4925, |
|
"grad_norm": 4.692249774932861, |
|
"learning_rate": 2.746127592543975e-05, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.4587, |
|
"grad_norm": 4.2007856369018555, |
|
"learning_rate": 2.7373763892535226e-05, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.7241, |
|
"grad_norm": 6.081201553344727, |
|
"learning_rate": 2.72862518596307e-05, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.6261, |
|
"grad_norm": 3.801405429840088, |
|
"learning_rate": 2.7198739826726177e-05, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.199, |
|
"grad_norm": 4.788170337677002, |
|
"learning_rate": 2.7111227793821653e-05, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.6249, |
|
"grad_norm": 3.934465169906616, |
|
"learning_rate": 2.7023715760917128e-05, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.1697, |
|
"grad_norm": 3.270228147506714, |
|
"learning_rate": 2.6936203728012604e-05, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.3199, |
|
"grad_norm": 4.648608207702637, |
|
"learning_rate": 2.684869169510808e-05, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.0883, |
|
"grad_norm": 3.7127881050109863, |
|
"learning_rate": 2.6761179662203555e-05, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.6402, |
|
"grad_norm": 4.310494899749756, |
|
"learning_rate": 2.6673667629299033e-05, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 8.1804, |
|
"grad_norm": 3.9658172130584717, |
|
"learning_rate": 2.658615559639451e-05, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.4997, |
|
"grad_norm": 6.007218837738037, |
|
"learning_rate": 2.6498643563489984e-05, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.1157, |
|
"grad_norm": 4.3563995361328125, |
|
"learning_rate": 2.641113153058546e-05, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.6341, |
|
"grad_norm": 3.9191131591796875, |
|
"learning_rate": 2.6323619497680935e-05, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.3165, |
|
"grad_norm": 6.353770732879639, |
|
"learning_rate": 2.623610746477641e-05, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.2788, |
|
"grad_norm": 4.23541784286499, |
|
"learning_rate": 2.6148595431871886e-05, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.4787, |
|
"grad_norm": 7.060284614562988, |
|
"learning_rate": 2.6061083398967362e-05, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.7702, |
|
"grad_norm": 3.484837055206299, |
|
"learning_rate": 2.597357136606283e-05, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.3549, |
|
"grad_norm": 6.6010589599609375, |
|
"learning_rate": 2.5886059333158306e-05, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.2694, |
|
"grad_norm": 4.792263984680176, |
|
"learning_rate": 2.5798547300253785e-05, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.479, |
|
"grad_norm": 4.992294788360596, |
|
"learning_rate": 2.571103526734926e-05, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.5955, |
|
"grad_norm": 5.028162956237793, |
|
"learning_rate": 2.5623523234444736e-05, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.1964, |
|
"grad_norm": 4.13356876373291, |
|
"learning_rate": 2.553601120154021e-05, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 6.9294, |
|
"grad_norm": 3.5145249366760254, |
|
"learning_rate": 2.5448499168635687e-05, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.4454, |
|
"grad_norm": 3.8901588916778564, |
|
"learning_rate": 2.5360987135731163e-05, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.738, |
|
"grad_norm": 4.009905815124512, |
|
"learning_rate": 2.5273475102826638e-05, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.3253, |
|
"grad_norm": 4.332956314086914, |
|
"learning_rate": 2.5185963069922114e-05, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.1716, |
|
"grad_norm": 3.688816785812378, |
|
"learning_rate": 2.509845103701759e-05, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 6.8345, |
|
"grad_norm": 7.10718297958374, |
|
"learning_rate": 2.5010939004113065e-05, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.7885, |
|
"grad_norm": 5.8644585609436035, |
|
"learning_rate": 2.492342697120854e-05, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.3728, |
|
"grad_norm": 2.958936929702759, |
|
"learning_rate": 2.4835914938304016e-05, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.2452, |
|
"grad_norm": 3.498347520828247, |
|
"learning_rate": 2.474840290539949e-05, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.0227, |
|
"grad_norm": 4.527777671813965, |
|
"learning_rate": 2.4660890872494967e-05, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.0811, |
|
"grad_norm": 4.315553665161133, |
|
"learning_rate": 2.4573378839590446e-05, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.5339, |
|
"grad_norm": 3.5268032550811768, |
|
"learning_rate": 2.448586680668592e-05, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.5172, |
|
"grad_norm": 4.606849670410156, |
|
"learning_rate": 2.4398354773781397e-05, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.5034, |
|
"grad_norm": 3.0682761669158936, |
|
"learning_rate": 2.4310842740876872e-05, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.0923, |
|
"grad_norm": 3.9300010204315186, |
|
"learning_rate": 2.4223330707972348e-05, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.3782, |
|
"grad_norm": 5.444020748138428, |
|
"learning_rate": 2.4135818675067823e-05, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 8.0698, |
|
"grad_norm": 3.9157919883728027, |
|
"learning_rate": 2.40483066421633e-05, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.4659, |
|
"grad_norm": 4.808152675628662, |
|
"learning_rate": 2.3960794609258774e-05, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.2596, |
|
"grad_norm": 4.249693870544434, |
|
"learning_rate": 2.387328257635425e-05, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.0133, |
|
"grad_norm": 4.091562271118164, |
|
"learning_rate": 2.3785770543449725e-05, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.9337, |
|
"grad_norm": 3.689053535461426, |
|
"learning_rate": 2.36982585105452e-05, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.954, |
|
"grad_norm": 3.9822888374328613, |
|
"learning_rate": 2.3610746477640676e-05, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 8.0586, |
|
"grad_norm": 4.524798393249512, |
|
"learning_rate": 2.352323444473615e-05, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.6348, |
|
"grad_norm": 4.638789653778076, |
|
"learning_rate": 2.3435722411831627e-05, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.1951, |
|
"grad_norm": 3.9606380462646484, |
|
"learning_rate": 2.3348210378927106e-05, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.1919, |
|
"grad_norm": 4.085976600646973, |
|
"learning_rate": 2.326069834602258e-05, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.7985, |
|
"grad_norm": 4.817371845245361, |
|
"learning_rate": 2.3173186313118057e-05, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.4659, |
|
"grad_norm": 4.804962635040283, |
|
"learning_rate": 2.3085674280213533e-05, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.6245, |
|
"grad_norm": 5.15590763092041, |
|
"learning_rate": 2.2998162247309005e-05, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.1799, |
|
"grad_norm": 5.4307122230529785, |
|
"learning_rate": 2.291065021440448e-05, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 6.737, |
|
"grad_norm": 3.417074680328369, |
|
"learning_rate": 2.2823138181499956e-05, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.3044, |
|
"grad_norm": 5.047757148742676, |
|
"learning_rate": 2.273562614859543e-05, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.0146, |
|
"grad_norm": 5.767230033874512, |
|
"learning_rate": 2.2648114115690907e-05, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.5375, |
|
"grad_norm": 4.92877197265625, |
|
"learning_rate": 2.2560602082786382e-05, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.5536, |
|
"grad_norm": 3.8499937057495117, |
|
"learning_rate": 2.2473090049881858e-05, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.2617, |
|
"grad_norm": 3.698652505874634, |
|
"learning_rate": 2.2385578016977337e-05, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.3101, |
|
"grad_norm": 3.8474197387695312, |
|
"learning_rate": 2.2298065984072812e-05, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.4945, |
|
"grad_norm": 4.18773889541626, |
|
"learning_rate": 2.2210553951168288e-05, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.1942, |
|
"grad_norm": 4.604954242706299, |
|
"learning_rate": 2.2123041918263763e-05, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.3004, |
|
"grad_norm": 4.48193359375, |
|
"learning_rate": 2.203552988535924e-05, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.2258, |
|
"grad_norm": 3.1619014739990234, |
|
"learning_rate": 2.1948017852454714e-05, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.1968, |
|
"grad_norm": 4.031898021697998, |
|
"learning_rate": 2.186050581955019e-05, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.7404, |
|
"grad_norm": 4.030830383300781, |
|
"learning_rate": 2.1772993786645665e-05, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.1855, |
|
"grad_norm": 3.3764097690582275, |
|
"learning_rate": 2.168548175374114e-05, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.0507, |
|
"grad_norm": 5.506438732147217, |
|
"learning_rate": 2.1597969720836616e-05, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 6.5909, |
|
"grad_norm": 4.797235012054443, |
|
"learning_rate": 2.151045768793209e-05, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.418, |
|
"grad_norm": 4.5042853355407715, |
|
"learning_rate": 2.1422945655027567e-05, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.3702, |
|
"grad_norm": 3.449220657348633, |
|
"learning_rate": 2.1335433622123043e-05, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.2734, |
|
"grad_norm": 5.276688098907471, |
|
"learning_rate": 2.1247921589218518e-05, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.1368, |
|
"grad_norm": 4.960446834564209, |
|
"learning_rate": 2.1160409556313997e-05, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.3468, |
|
"grad_norm": 4.041114330291748, |
|
"learning_rate": 2.1072897523409473e-05, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.5724, |
|
"grad_norm": 5.667148113250732, |
|
"learning_rate": 2.0985385490504945e-05, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.1379, |
|
"grad_norm": 3.245389223098755, |
|
"learning_rate": 2.089787345760042e-05, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 6.9722, |
|
"grad_norm": 4.715411186218262, |
|
"learning_rate": 2.0810361424695896e-05, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.4667, |
|
"grad_norm": 3.4023447036743164, |
|
"learning_rate": 2.072284939179137e-05, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.3368, |
|
"grad_norm": 4.798887252807617, |
|
"learning_rate": 2.0635337358886847e-05, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.224, |
|
"grad_norm": 4.741410255432129, |
|
"learning_rate": 2.0547825325982322e-05, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.0656, |
|
"grad_norm": 3.5715346336364746, |
|
"learning_rate": 2.046906449636825e-05, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.0197, |
|
"grad_norm": 4.428717613220215, |
|
"learning_rate": 2.0381552463463727e-05, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.4517, |
|
"grad_norm": 4.353855133056641, |
|
"learning_rate": 2.0294040430559202e-05, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.5488, |
|
"grad_norm": 3.331164598464966, |
|
"learning_rate": 2.0206528397654678e-05, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 6.8231, |
|
"grad_norm": 4.357122898101807, |
|
"learning_rate": 2.0119016364750153e-05, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.8218, |
|
"grad_norm": 3.6374125480651855, |
|
"learning_rate": 2.003150433184563e-05, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.2913, |
|
"grad_norm": 3.414724826812744, |
|
"learning_rate": 1.9943992298941104e-05, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.5022, |
|
"grad_norm": 3.073855400085449, |
|
"learning_rate": 1.985648026603658e-05, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.1292, |
|
"grad_norm": 5.69718074798584, |
|
"learning_rate": 1.976896823313206e-05, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.5066, |
|
"grad_norm": 3.6818926334381104, |
|
"learning_rate": 1.9681456200227534e-05, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.1924, |
|
"grad_norm": 6.099584579467773, |
|
"learning_rate": 1.959394416732301e-05, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.7311, |
|
"grad_norm": 4.388739109039307, |
|
"learning_rate": 1.9506432134418485e-05, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.8034, |
|
"grad_norm": 4.578341007232666, |
|
"learning_rate": 1.941892010151396e-05, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.1698, |
|
"grad_norm": 3.4639930725097656, |
|
"learning_rate": 1.9331408068609436e-05, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.0058, |
|
"grad_norm": 4.414987564086914, |
|
"learning_rate": 1.924389603570491e-05, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.3363, |
|
"grad_norm": 4.268624305725098, |
|
"learning_rate": 1.9156384002800387e-05, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.2589, |
|
"grad_norm": 6.716452598571777, |
|
"learning_rate": 1.9068871969895863e-05, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.2501, |
|
"grad_norm": 5.058889865875244, |
|
"learning_rate": 1.8981359936991335e-05, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.3893, |
|
"grad_norm": 6.656921863555908, |
|
"learning_rate": 1.889384790408681e-05, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.2942, |
|
"grad_norm": 4.824561595916748, |
|
"learning_rate": 1.8806335871182286e-05, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.6477, |
|
"grad_norm": 5.925858020782471, |
|
"learning_rate": 1.8718823838277765e-05, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 6.9424, |
|
"grad_norm": 3.955688714981079, |
|
"learning_rate": 1.863131180537324e-05, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 6.8707, |
|
"grad_norm": 3.95426869392395, |
|
"learning_rate": 1.8543799772468716e-05, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.5012, |
|
"grad_norm": 5.377491474151611, |
|
"learning_rate": 1.845628773956419e-05, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.6028, |
|
"grad_norm": 4.264338970184326, |
|
"learning_rate": 1.8368775706659667e-05, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 6.8808, |
|
"grad_norm": 5.041021347045898, |
|
"learning_rate": 1.8281263673755142e-05, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 8.0187, |
|
"grad_norm": 6.484523773193359, |
|
"learning_rate": 1.8193751640850618e-05, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 6.9991, |
|
"grad_norm": 4.5790205001831055, |
|
"learning_rate": 1.8106239607946093e-05, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.2012, |
|
"grad_norm": 4.204977989196777, |
|
"learning_rate": 1.801872757504157e-05, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.6324, |
|
"grad_norm": 3.803563356399536, |
|
"learning_rate": 1.7931215542137044e-05, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 6.6331, |
|
"grad_norm": 3.6445772647857666, |
|
"learning_rate": 1.784370350923252e-05, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.4633, |
|
"grad_norm": 3.9381942749023438, |
|
"learning_rate": 1.7756191476327995e-05, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.4318, |
|
"grad_norm": 3.573315382003784, |
|
"learning_rate": 1.766867944342347e-05, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.3045, |
|
"grad_norm": 3.7262725830078125, |
|
"learning_rate": 1.7581167410518946e-05, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 6.9975, |
|
"grad_norm": 4.73222541809082, |
|
"learning_rate": 1.7493655377614425e-05, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 6.7643, |
|
"grad_norm": 4.269005298614502, |
|
"learning_rate": 1.74061433447099e-05, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.7472, |
|
"grad_norm": 4.969855785369873, |
|
"learning_rate": 1.7318631311805376e-05, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.0408, |
|
"grad_norm": 4.290554046630859, |
|
"learning_rate": 1.723111927890085e-05, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.5848, |
|
"grad_norm": 4.593362808227539, |
|
"learning_rate": 1.7143607245996327e-05, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.2043, |
|
"grad_norm": 3.8505163192749023, |
|
"learning_rate": 1.7056095213091803e-05, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.076, |
|
"grad_norm": 5.526023864746094, |
|
"learning_rate": 1.6968583180187275e-05, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.5282, |
|
"grad_norm": 4.70090389251709, |
|
"learning_rate": 1.688107114728275e-05, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.1022, |
|
"grad_norm": 5.819429397583008, |
|
"learning_rate": 1.6793559114378226e-05, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.5449, |
|
"grad_norm": 4.2631707191467285, |
|
"learning_rate": 1.67060470814737e-05, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.5941, |
|
"grad_norm": 5.127431392669678, |
|
"learning_rate": 1.6618535048569177e-05, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.1741, |
|
"grad_norm": 5.605392932891846, |
|
"learning_rate": 1.6531023015664652e-05, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.5186, |
|
"grad_norm": 5.392033576965332, |
|
"learning_rate": 1.644351098276013e-05, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.4369, |
|
"grad_norm": 4.743539810180664, |
|
"learning_rate": 1.6355998949855607e-05, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.0646, |
|
"grad_norm": 4.0009684562683105, |
|
"learning_rate": 1.6268486916951082e-05, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.3658, |
|
"grad_norm": 4.551602363586426, |
|
"learning_rate": 1.6180974884046558e-05, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.7071, |
|
"grad_norm": 3.369328737258911, |
|
"learning_rate": 1.6093462851142033e-05, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.5386, |
|
"grad_norm": 3.6127750873565674, |
|
"learning_rate": 1.600595081823751e-05, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.3196, |
|
"grad_norm": 4.915907382965088, |
|
"learning_rate": 1.5918438785332984e-05, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.1598, |
|
"grad_norm": 5.295419216156006, |
|
"learning_rate": 1.583092675242846e-05, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.2027, |
|
"grad_norm": 5.066037654876709, |
|
"learning_rate": 1.5743414719523935e-05, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.4418, |
|
"grad_norm": 5.553489685058594, |
|
"learning_rate": 1.565590268661941e-05, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 6.8532, |
|
"grad_norm": 4.176399230957031, |
|
"learning_rate": 1.5568390653714886e-05, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.1576, |
|
"grad_norm": 5.018221855163574, |
|
"learning_rate": 1.5480878620810362e-05, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.5015, |
|
"grad_norm": 3.439542293548584, |
|
"learning_rate": 1.5393366587905837e-05, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 6.8427, |
|
"grad_norm": 3.641223907470703, |
|
"learning_rate": 1.5305854555001313e-05, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.7335, |
|
"grad_norm": 5.225297451019287, |
|
"learning_rate": 1.521834252209679e-05, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.1007, |
|
"grad_norm": 3.5159335136413574, |
|
"learning_rate": 1.5130830489192265e-05, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.3267, |
|
"grad_norm": 4.219715118408203, |
|
"learning_rate": 1.5043318456287741e-05, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.0066, |
|
"grad_norm": 4.482273101806641, |
|
"learning_rate": 1.4955806423383215e-05, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.4307, |
|
"grad_norm": 4.263273239135742, |
|
"learning_rate": 1.486829439047869e-05, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.3687, |
|
"grad_norm": 4.202017784118652, |
|
"learning_rate": 1.4780782357574166e-05, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.6405, |
|
"grad_norm": 5.738183498382568, |
|
"learning_rate": 1.4693270324669641e-05, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.3089, |
|
"grad_norm": 5.287261962890625, |
|
"learning_rate": 1.4605758291765117e-05, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.0879, |
|
"grad_norm": 5.147162914276123, |
|
"learning_rate": 1.4518246258860594e-05, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.145, |
|
"grad_norm": 3.873149871826172, |
|
"learning_rate": 1.443073422595607e-05, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.1013, |
|
"grad_norm": 4.64039945602417, |
|
"learning_rate": 1.4343222193051545e-05, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.0642, |
|
"grad_norm": 3.6532037258148193, |
|
"learning_rate": 1.425571016014702e-05, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.1647, |
|
"grad_norm": 3.756361484527588, |
|
"learning_rate": 1.4168198127242496e-05, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.0501, |
|
"grad_norm": 3.5314979553222656, |
|
"learning_rate": 1.4080686094337972e-05, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.4017, |
|
"grad_norm": 3.386040687561035, |
|
"learning_rate": 1.3993174061433447e-05, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.4412, |
|
"grad_norm": 3.566223382949829, |
|
"learning_rate": 1.3905662028528924e-05, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.0911, |
|
"grad_norm": 5.274896621704102, |
|
"learning_rate": 1.38181499956244e-05, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.0295, |
|
"grad_norm": 5.615356922149658, |
|
"learning_rate": 1.3730637962719875e-05, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.1455, |
|
"grad_norm": 4.624752521514893, |
|
"learning_rate": 1.364312592981535e-05, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.1833, |
|
"grad_norm": 4.156666278839111, |
|
"learning_rate": 1.3555613896910826e-05, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.8835, |
|
"grad_norm": 3.4591434001922607, |
|
"learning_rate": 1.3468101864006302e-05, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.1062, |
|
"grad_norm": 3.3804733753204346, |
|
"learning_rate": 1.3380589831101777e-05, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.365, |
|
"grad_norm": 3.7281017303466797, |
|
"learning_rate": 1.3293077798197254e-05, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.0466, |
|
"grad_norm": 3.4281463623046875, |
|
"learning_rate": 1.320556576529273e-05, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.4435, |
|
"grad_norm": 3.9861958026885986, |
|
"learning_rate": 1.3118053732388205e-05, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.3772, |
|
"grad_norm": 5.1440253257751465, |
|
"learning_rate": 1.3030541699483681e-05, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.2704, |
|
"grad_norm": 3.7356927394866943, |
|
"learning_rate": 1.2943029666579153e-05, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.4473, |
|
"grad_norm": 3.138427257537842, |
|
"learning_rate": 1.285551763367463e-05, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.654, |
|
"grad_norm": 5.250783920288086, |
|
"learning_rate": 1.2768005600770106e-05, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.2723, |
|
"grad_norm": 3.7493326663970947, |
|
"learning_rate": 1.2680493567865581e-05, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.2502, |
|
"grad_norm": 4.482826232910156, |
|
"learning_rate": 1.2592981534961057e-05, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 6.9983, |
|
"grad_norm": 4.741217613220215, |
|
"learning_rate": 1.2505469502056532e-05, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.1721, |
|
"grad_norm": 5.053958892822266, |
|
"learning_rate": 1.2417957469152008e-05, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.2113, |
|
"grad_norm": 5.000698089599609, |
|
"learning_rate": 1.2330445436247483e-05, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.0898, |
|
"grad_norm": 5.456648826599121, |
|
"learning_rate": 1.224293340334296e-05, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.2822, |
|
"grad_norm": 3.733816146850586, |
|
"learning_rate": 1.2155421370438436e-05, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.4952, |
|
"grad_norm": 4.114339351654053, |
|
"learning_rate": 1.2067909337533912e-05, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 6.9912, |
|
"grad_norm": 3.963610887527466, |
|
"learning_rate": 1.1980397304629387e-05, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.2077, |
|
"grad_norm": 4.697625637054443, |
|
"learning_rate": 1.1892885271724863e-05, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 6.9453, |
|
"grad_norm": 3.8456337451934814, |
|
"learning_rate": 1.1805373238820338e-05, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.6018, |
|
"grad_norm": 3.9979872703552246, |
|
"learning_rate": 1.1717861205915814e-05, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.078, |
|
"grad_norm": 4.1047563552856445, |
|
"learning_rate": 1.163034917301129e-05, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 6.7399, |
|
"grad_norm": 5.3073248863220215, |
|
"learning_rate": 1.1542837140106766e-05, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 6.9167, |
|
"grad_norm": 5.714503765106201, |
|
"learning_rate": 1.145532510720224e-05, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.4903, |
|
"grad_norm": 3.9626924991607666, |
|
"learning_rate": 1.1367813074297716e-05, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.4558, |
|
"grad_norm": 4.751763343811035, |
|
"learning_rate": 1.1280301041393191e-05, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.5696, |
|
"grad_norm": 3.73614501953125, |
|
"learning_rate": 1.1192789008488668e-05, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.1071, |
|
"grad_norm": 3.236339569091797, |
|
"learning_rate": 1.1105276975584144e-05, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.6115, |
|
"grad_norm": 4.271381855010986, |
|
"learning_rate": 1.101776494267962e-05, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.1791, |
|
"grad_norm": 3.6989824771881104, |
|
"learning_rate": 1.0930252909775095e-05, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.0511, |
|
"grad_norm": 3.856694221496582, |
|
"learning_rate": 1.084274087687057e-05, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.2655, |
|
"grad_norm": 4.834972858428955, |
|
"learning_rate": 1.0755228843966046e-05, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.0369, |
|
"grad_norm": 4.6722211837768555, |
|
"learning_rate": 1.0667716811061521e-05, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.1936, |
|
"grad_norm": 4.993673324584961, |
|
"learning_rate": 1.0580204778156999e-05, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.3347, |
|
"grad_norm": 3.4490904808044434, |
|
"learning_rate": 1.0492692745252472e-05, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.736, |
|
"grad_norm": 3.283051013946533, |
|
"learning_rate": 1.0405180712347948e-05, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.0317, |
|
"grad_norm": 3.656076431274414, |
|
"learning_rate": 1.0317668679443423e-05, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 6.866, |
|
"grad_norm": 3.4769787788391113, |
|
"learning_rate": 1.0230156646538899e-05, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.4499, |
|
"grad_norm": 3.384229898452759, |
|
"learning_rate": 1.0142644613634374e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 6.9746, |
|
"grad_norm": 4.784582614898682, |
|
"learning_rate": 1.0055132580729852e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.0138, |
|
"grad_norm": 4.076469898223877, |
|
"learning_rate": 9.967620547825327e-06, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.2836, |
|
"grad_norm": 5.0796709060668945, |
|
"learning_rate": 9.880108514920803e-06, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.0612, |
|
"grad_norm": 4.263620853424072, |
|
"learning_rate": 9.792596482016278e-06, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.0573, |
|
"grad_norm": 4.355484485626221, |
|
"learning_rate": 9.705084449111754e-06, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.3385, |
|
"grad_norm": 6.618645668029785, |
|
"learning_rate": 9.617572416207229e-06, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.5994, |
|
"grad_norm": 4.804537296295166, |
|
"learning_rate": 9.530060383302705e-06, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 8.1717, |
|
"grad_norm": 4.777498722076416, |
|
"learning_rate": 9.44254835039818e-06, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.1703, |
|
"grad_norm": 3.5699825286865234, |
|
"learning_rate": 9.355036317493656e-06, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.0822, |
|
"grad_norm": 6.044339179992676, |
|
"learning_rate": 9.267524284589131e-06, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.0899, |
|
"grad_norm": 3.558217763900757, |
|
"learning_rate": 9.180012251684607e-06, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 6.9441, |
|
"grad_norm": 4.0059075355529785, |
|
"learning_rate": 9.092500218780082e-06, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.1065, |
|
"grad_norm": 5.324728012084961, |
|
"learning_rate": 9.004988185875558e-06, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 6.91, |
|
"grad_norm": 3.852426767349243, |
|
"learning_rate": 8.917476152971035e-06, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.7985, |
|
"grad_norm": 5.7546844482421875, |
|
"learning_rate": 8.82996412006651e-06, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.098, |
|
"grad_norm": 4.994897842407227, |
|
"learning_rate": 8.742452087161986e-06, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.4831, |
|
"grad_norm": 4.3503522872924805, |
|
"learning_rate": 8.654940054257461e-06, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.3563, |
|
"grad_norm": 3.4878551959991455, |
|
"learning_rate": 8.567428021352937e-06, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.8125, |
|
"grad_norm": 4.518803596496582, |
|
"learning_rate": 8.47991598844841e-06, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.1354, |
|
"grad_norm": 3.4671084880828857, |
|
"learning_rate": 8.392403955543888e-06, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.1782, |
|
"grad_norm": 5.328606128692627, |
|
"learning_rate": 8.304891922639363e-06, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.7318, |
|
"grad_norm": 5.223174095153809, |
|
"learning_rate": 8.217379889734839e-06, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 6.9753, |
|
"grad_norm": 3.5544798374176025, |
|
"learning_rate": 8.129867856830314e-06, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.5763, |
|
"grad_norm": 8.088041305541992, |
|
"learning_rate": 8.04235582392579e-06, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.9196, |
|
"grad_norm": 4.860823631286621, |
|
"learning_rate": 7.954843791021265e-06, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 6.7199, |
|
"grad_norm": 2.9834036827087402, |
|
"learning_rate": 7.867331758116741e-06, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.0354, |
|
"grad_norm": 3.6943893432617188, |
|
"learning_rate": 7.779819725212218e-06, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.7071, |
|
"grad_norm": 4.091310977935791, |
|
"learning_rate": 7.692307692307694e-06, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.2135, |
|
"grad_norm": 3.6830339431762695, |
|
"learning_rate": 7.604795659403169e-06, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.5218, |
|
"grad_norm": 3.4381253719329834, |
|
"learning_rate": 7.517283626498645e-06, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.1017, |
|
"grad_norm": 5.597609519958496, |
|
"learning_rate": 7.429771593594119e-06, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 6.7574, |
|
"grad_norm": 3.175727128982544, |
|
"learning_rate": 7.342259560689595e-06, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 6.8652, |
|
"grad_norm": 3.873260021209717, |
|
"learning_rate": 7.25474752778507e-06, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.0743, |
|
"grad_norm": 3.3759090900421143, |
|
"learning_rate": 7.167235494880546e-06, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.4346, |
|
"grad_norm": 4.680045127868652, |
|
"learning_rate": 7.079723461976022e-06, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.1374, |
|
"grad_norm": 5.331534385681152, |
|
"learning_rate": 6.992211429071498e-06, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.1309, |
|
"grad_norm": 5.658579349517822, |
|
"learning_rate": 6.904699396166973e-06, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 6.9677, |
|
"grad_norm": 4.185191631317139, |
|
"learning_rate": 6.8171873632624495e-06, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 6.9958, |
|
"grad_norm": 3.82647967338562, |
|
"learning_rate": 6.729675330357925e-06, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.1796, |
|
"grad_norm": 4.288081645965576, |
|
"learning_rate": 6.6421632974534005e-06, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.0815, |
|
"grad_norm": 3.3872106075286865, |
|
"learning_rate": 6.554651264548876e-06, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 6.9035, |
|
"grad_norm": 3.4726109504699707, |
|
"learning_rate": 6.467139231644351e-06, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.4375, |
|
"grad_norm": 5.091712951660156, |
|
"learning_rate": 6.379627198739826e-06, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.4423, |
|
"grad_norm": 3.271453380584717, |
|
"learning_rate": 6.2921151658353025e-06, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 6.976, |
|
"grad_norm": 3.8439278602600098, |
|
"learning_rate": 6.204603132930778e-06, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.7446, |
|
"grad_norm": 3.4631197452545166, |
|
"learning_rate": 6.1170911000262535e-06, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.7139, |
|
"grad_norm": 3.5582733154296875, |
|
"learning_rate": 6.02957906712173e-06, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.0974, |
|
"grad_norm": 4.480440139770508, |
|
"learning_rate": 5.942067034217205e-06, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.6834, |
|
"grad_norm": 4.127463340759277, |
|
"learning_rate": 5.854555001312681e-06, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.0067, |
|
"grad_norm": 4.044102191925049, |
|
"learning_rate": 5.767042968408156e-06, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.4619, |
|
"grad_norm": 5.20751953125, |
|
"learning_rate": 5.679530935503632e-06, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 6.9783, |
|
"grad_norm": 4.303256511688232, |
|
"learning_rate": 5.592018902599107e-06, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.093, |
|
"grad_norm": 5.665140151977539, |
|
"learning_rate": 5.504506869694583e-06, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.5041, |
|
"grad_norm": 4.066624164581299, |
|
"learning_rate": 5.416994836790059e-06, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 6.906, |
|
"grad_norm": 4.449793815612793, |
|
"learning_rate": 5.329482803885535e-06, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.4408, |
|
"grad_norm": 4.4521074295043945, |
|
"learning_rate": 5.2419707709810094e-06, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.1617, |
|
"grad_norm": 3.7591211795806885, |
|
"learning_rate": 5.154458738076486e-06, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 6.955, |
|
"grad_norm": 5.360795974731445, |
|
"learning_rate": 5.066946705171961e-06, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.3291, |
|
"grad_norm": 3.872117280960083, |
|
"learning_rate": 4.979434672267437e-06, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.7188, |
|
"grad_norm": 5.078587055206299, |
|
"learning_rate": 4.891922639362913e-06, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.1934, |
|
"grad_norm": 5.7633056640625, |
|
"learning_rate": 4.804410606458389e-06, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.0959, |
|
"grad_norm": 3.960428476333618, |
|
"learning_rate": 4.716898573553863e-06, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 6.9419, |
|
"grad_norm": 6.363913536071777, |
|
"learning_rate": 4.62938654064934e-06, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.0335, |
|
"grad_norm": 4.09603214263916, |
|
"learning_rate": 4.541874507744815e-06, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.2072, |
|
"grad_norm": 3.217400312423706, |
|
"learning_rate": 4.454362474840291e-06, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.3142, |
|
"grad_norm": 4.389254570007324, |
|
"learning_rate": 4.366850441935766e-06, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.3642, |
|
"grad_norm": 4.192555904388428, |
|
"learning_rate": 4.2793384090312425e-06, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 6.8518, |
|
"grad_norm": 4.6586809158325195, |
|
"learning_rate": 4.191826376126717e-06, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.4924, |
|
"grad_norm": 3.969644784927368, |
|
"learning_rate": 4.104314343222193e-06, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 6.8693, |
|
"grad_norm": 3.7710835933685303, |
|
"learning_rate": 4.016802310317669e-06, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 6.8957, |
|
"grad_norm": 3.549421548843384, |
|
"learning_rate": 3.9292902774131446e-06, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.6237, |
|
"grad_norm": 3.6362552642822266, |
|
"learning_rate": 3.84177824450862e-06, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.2684, |
|
"grad_norm": 4.023890972137451, |
|
"learning_rate": 3.754266211604096e-06, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.2039, |
|
"grad_norm": 3.6492927074432373, |
|
"learning_rate": 3.666754178699571e-06, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.7765, |
|
"grad_norm": 4.3814191818237305, |
|
"learning_rate": 3.579242145795047e-06, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.3461, |
|
"grad_norm": 5.455050468444824, |
|
"learning_rate": 3.4917301128905225e-06, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.2237, |
|
"grad_norm": 4.942239761352539, |
|
"learning_rate": 3.4042180799859984e-06, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 6.8001, |
|
"grad_norm": 3.596323251724243, |
|
"learning_rate": 3.316706047081474e-06, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.3173, |
|
"grad_norm": 3.8507444858551025, |
|
"learning_rate": 3.229194014176949e-06, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.1595, |
|
"grad_norm": 4.1059651374816895, |
|
"learning_rate": 3.141681981272425e-06, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.279, |
|
"grad_norm": 3.6779584884643555, |
|
"learning_rate": 3.0541699483679005e-06, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 6.9319, |
|
"grad_norm": 3.716569423675537, |
|
"learning_rate": 2.9666579154633764e-06, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.1694, |
|
"grad_norm": 4.070470333099365, |
|
"learning_rate": 2.8791458825588523e-06, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.8244, |
|
"grad_norm": 4.256218910217285, |
|
"learning_rate": 2.7916338496543274e-06, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.5893, |
|
"grad_norm": 3.4462125301361084, |
|
"learning_rate": 2.7041218167498033e-06, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 6.8421, |
|
"grad_norm": 3.3473029136657715, |
|
"learning_rate": 2.616609783845279e-06, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 6.735, |
|
"grad_norm": 4.084453582763672, |
|
"learning_rate": 2.5290977509407543e-06, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.4639, |
|
"grad_norm": 4.453617572784424, |
|
"learning_rate": 2.4415857180362303e-06, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.1289, |
|
"grad_norm": 3.5332283973693848, |
|
"learning_rate": 2.3540736851317058e-06, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 6.9, |
|
"grad_norm": 4.8280792236328125, |
|
"learning_rate": 2.2665616522271813e-06, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.534, |
|
"grad_norm": 4.160041809082031, |
|
"learning_rate": 2.179049619322657e-06, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.4365, |
|
"grad_norm": 4.7804975509643555, |
|
"learning_rate": 2.0915375864181323e-06, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.4622, |
|
"grad_norm": 5.68775749206543, |
|
"learning_rate": 2.0040255535136082e-06, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 6.9841, |
|
"grad_norm": Infinity, |
|
"learning_rate": 1.9252647238995363e-06, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.0386, |
|
"grad_norm": 3.1843979358673096, |
|
"learning_rate": 1.8377526909950118e-06, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.1348, |
|
"grad_norm": 3.309314489364624, |
|
"learning_rate": 1.7502406580904876e-06, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.5823, |
|
"grad_norm": 3.1254711151123047, |
|
"learning_rate": 1.6627286251859633e-06, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.4536, |
|
"grad_norm": 4.925593852996826, |
|
"learning_rate": 1.5752165922814386e-06, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.7934, |
|
"grad_norm": 3.3663341999053955, |
|
"learning_rate": 1.4877045593769145e-06, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.2564, |
|
"grad_norm": 4.316028594970703, |
|
"learning_rate": 1.40019252647239e-06, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.0841, |
|
"grad_norm": 3.023416757583618, |
|
"learning_rate": 1.3126804935678657e-06, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.0354, |
|
"grad_norm": 4.041641712188721, |
|
"learning_rate": 1.2251684606633412e-06, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.2408, |
|
"grad_norm": 4.036230564117432, |
|
"learning_rate": 1.137656427758817e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 6.8542, |
|
"grad_norm": 3.975757360458374, |
|
"learning_rate": 1.0501443948542925e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.384, |
|
"grad_norm": 4.212265968322754, |
|
"learning_rate": 9.626323619497682e-07, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.5291, |
|
"grad_norm": 4.709102630615234, |
|
"learning_rate": 8.751203290452438e-07, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.2415, |
|
"grad_norm": 4.24073600769043, |
|
"learning_rate": 7.876082961407193e-07, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.117, |
|
"grad_norm": 4.139495849609375, |
|
"learning_rate": 7.00096263236195e-07, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.4584, |
|
"grad_norm": 3.581001043319702, |
|
"learning_rate": 6.125842303316706e-07, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.0442, |
|
"grad_norm": 3.6776018142700195, |
|
"learning_rate": 5.250721974271462e-07, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.1353, |
|
"grad_norm": 3.3257029056549072, |
|
"learning_rate": 4.375601645226219e-07, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.4847, |
|
"grad_norm": 4.7782697677612305, |
|
"learning_rate": 3.500481316180975e-07, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.4365, |
|
"grad_norm": 3.6555185317993164, |
|
"learning_rate": 2.625360987135731e-07, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.0578, |
|
"grad_norm": 3.675234079360962, |
|
"learning_rate": 1.7502406580904875e-07, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"loss": 6.8152, |
|
"grad_norm": 4.117830276489258, |
|
"learning_rate": 8.751203290452438e-08, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"train_runtime": 132410.4061, |
|
"train_samples_per_second": 2.762, |
|
"train_steps_per_second": 0.086, |
|
"train_loss": 8.721853916044362, |
|
"epoch": 3.0 |
|
} |
|
] |