|
[ |
|
{ |
|
"loss": 30.8234, |
|
"grad_norm": 0.5755094885826111, |
|
"learning_rate": 0.0009991248796709547, |
|
"epoch": 0.0 |
|
}, |
|
{ |
|
"loss": 24.1229, |
|
"grad_norm": 0.2920963764190674, |
|
"learning_rate": 0.0009982497593419095, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 22.7986, |
|
"grad_norm": 0.5106011629104614, |
|
"learning_rate": 0.0009973746390128642, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 21.4924, |
|
"grad_norm": 0.9322375059127808, |
|
"learning_rate": 0.000996499518683819, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 20.7911, |
|
"grad_norm": 0.8529098629951477, |
|
"learning_rate": 0.0009956243983547737, |
|
"epoch": 0.01 |
|
}, |
|
{ |
|
"loss": 19.338, |
|
"grad_norm": 0.776152491569519, |
|
"learning_rate": 0.0009947492780257286, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 19.0175, |
|
"grad_norm": 2.11796498298645, |
|
"learning_rate": 0.0009938741576966832, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 18.2997, |
|
"grad_norm": 1.3791886568069458, |
|
"learning_rate": 0.0009929990373676381, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 17.2791, |
|
"grad_norm": 1.3849037885665894, |
|
"learning_rate": 0.0009921239170385928, |
|
"epoch": 0.02 |
|
}, |
|
{ |
|
"loss": 17.3609, |
|
"grad_norm": 1.1861941814422607, |
|
"learning_rate": 0.0009912487967095476, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 17.1215, |
|
"grad_norm": 1.494122862815857, |
|
"learning_rate": 0.0009903736763805023, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.3944, |
|
"grad_norm": 1.5872834920883179, |
|
"learning_rate": 0.0009894985560514572, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 16.0054, |
|
"grad_norm": 1.2658979892730713, |
|
"learning_rate": 0.0009886234357224118, |
|
"epoch": 0.03 |
|
}, |
|
{ |
|
"loss": 15.5523, |
|
"grad_norm": 0.8640480041503906, |
|
"learning_rate": 0.0009877483153933667, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 16.2465, |
|
"grad_norm": 0.8946548700332642, |
|
"learning_rate": 0.0009868731950643213, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 15.0235, |
|
"grad_norm": 0.9279372692108154, |
|
"learning_rate": 0.0009859980747352762, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 15.7517, |
|
"grad_norm": 0.8807494044303894, |
|
"learning_rate": 0.0009851229544062309, |
|
"epoch": 0.04 |
|
}, |
|
{ |
|
"loss": 14.6884, |
|
"grad_norm": 0.683822751045227, |
|
"learning_rate": 0.0009842478340771857, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 14.0949, |
|
"grad_norm": 1.1334095001220703, |
|
"learning_rate": 0.0009833727137481404, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 14.3378, |
|
"grad_norm": 1.1247657537460327, |
|
"learning_rate": 0.0009824975934190953, |
|
"epoch": 0.05 |
|
}, |
|
{ |
|
"loss": 13.7597, |
|
"grad_norm": 0.9332773685455322, |
|
"learning_rate": 0.00098162247309005, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 14.5567, |
|
"grad_norm": 0.8742538690567017, |
|
"learning_rate": 0.0009807473527610048, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 14.0188, |
|
"grad_norm": 1.5592143535614014, |
|
"learning_rate": 0.0009798722324319594, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 13.9401, |
|
"grad_norm": 0.9473065733909607, |
|
"learning_rate": 0.0009789971121029143, |
|
"epoch": 0.06 |
|
}, |
|
{ |
|
"loss": 13.5177, |
|
"grad_norm": 0.5469663143157959, |
|
"learning_rate": 0.000978121991773869, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.513, |
|
"grad_norm": 1.7497597932815552, |
|
"learning_rate": 0.0009772468714448236, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.6579, |
|
"grad_norm": 0.7552927136421204, |
|
"learning_rate": 0.0009763717511157785, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.0899, |
|
"grad_norm": 0.5602779984474182, |
|
"learning_rate": 0.0009754966307867332, |
|
"epoch": 0.07 |
|
}, |
|
{ |
|
"loss": 13.8637, |
|
"grad_norm": 0.6577705144882202, |
|
"learning_rate": 0.000974621510457688, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 14.2909, |
|
"grad_norm": 1.0710817575454712, |
|
"learning_rate": 0.0009737463901286428, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.3632, |
|
"grad_norm": 0.48803457617759705, |
|
"learning_rate": 0.0009728712697995975, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.5002, |
|
"grad_norm": 0.9970788359642029, |
|
"learning_rate": 0.0009719961494705523, |
|
"epoch": 0.08 |
|
}, |
|
{ |
|
"loss": 13.6276, |
|
"grad_norm": 0.9624769687652588, |
|
"learning_rate": 0.000971121029141507, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 13.7281, |
|
"grad_norm": 0.8082631230354309, |
|
"learning_rate": 0.0009702459088124618, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 13.0793, |
|
"grad_norm": 0.6732771396636963, |
|
"learning_rate": 0.0009693707884834166, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 12.6621, |
|
"grad_norm": 0.8451002240180969, |
|
"learning_rate": 0.0009684956681543713, |
|
"epoch": 0.09 |
|
}, |
|
{ |
|
"loss": 13.2374, |
|
"grad_norm": 1.1656385660171509, |
|
"learning_rate": 0.0009676205478253261, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.7625, |
|
"grad_norm": 0.9667061567306519, |
|
"learning_rate": 0.0009667454274962808, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 13.0046, |
|
"grad_norm": 0.9311497807502747, |
|
"learning_rate": 0.0009658703071672355, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.9037, |
|
"grad_norm": 1.1891040802001953, |
|
"learning_rate": 0.0009649951868381903, |
|
"epoch": 0.1 |
|
}, |
|
{ |
|
"loss": 12.6521, |
|
"grad_norm": 1.1127817630767822, |
|
"learning_rate": 0.000964120066509145, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 13.2942, |
|
"grad_norm": 0.6665758490562439, |
|
"learning_rate": 0.0009632449461800998, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 12.4443, |
|
"grad_norm": 0.8878126740455627, |
|
"learning_rate": 0.0009623698258510546, |
|
"epoch": 0.11 |
|
}, |
|
{ |
|
"loss": 13.0001, |
|
"grad_norm": 1.5000464916229248, |
|
"learning_rate": 0.0009614947055220093, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.2303, |
|
"grad_norm": 1.1078687906265259, |
|
"learning_rate": 0.0009606195851929641, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.1915, |
|
"grad_norm": 0.8044748306274414, |
|
"learning_rate": 0.0009597444648639187, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 12.7246, |
|
"grad_norm": 0.9232500195503235, |
|
"learning_rate": 0.0009588693445348735, |
|
"epoch": 0.12 |
|
}, |
|
{ |
|
"loss": 11.9769, |
|
"grad_norm": 0.7413458824157715, |
|
"learning_rate": 0.0009579942242058283, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.8006, |
|
"grad_norm": 1.1132707595825195, |
|
"learning_rate": 0.000957119103876783, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.4323, |
|
"grad_norm": 0.7814503312110901, |
|
"learning_rate": 0.0009562439835477378, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.3482, |
|
"grad_norm": 0.8854762315750122, |
|
"learning_rate": 0.0009553688632186925, |
|
"epoch": 0.13 |
|
}, |
|
{ |
|
"loss": 12.5045, |
|
"grad_norm": 0.704131007194519, |
|
"learning_rate": 0.0009544937428896473, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 12.1405, |
|
"grad_norm": 0.7020297050476074, |
|
"learning_rate": 0.0009536186225606021, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 11.5427, |
|
"grad_norm": 0.398807168006897, |
|
"learning_rate": 0.0009527435022315568, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 12.655, |
|
"grad_norm": 1.0002299547195435, |
|
"learning_rate": 0.0009518683819025116, |
|
"epoch": 0.14 |
|
}, |
|
{ |
|
"loss": 11.9656, |
|
"grad_norm": 0.7870428562164307, |
|
"learning_rate": 0.0009509932615734664, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.4639, |
|
"grad_norm": 0.9154604077339172, |
|
"learning_rate": 0.0009501181412444211, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 11.6344, |
|
"grad_norm": 1.1896569728851318, |
|
"learning_rate": 0.0009492430209153759, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.4516, |
|
"grad_norm": 0.8169024586677551, |
|
"learning_rate": 0.0009483679005863306, |
|
"epoch": 0.15 |
|
}, |
|
{ |
|
"loss": 12.1848, |
|
"grad_norm": 0.8429264426231384, |
|
"learning_rate": 0.0009474927802572854, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 11.2014, |
|
"grad_norm": 0.8499436378479004, |
|
"learning_rate": 0.0009466176599282402, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 12.2217, |
|
"grad_norm": 0.8969743251800537, |
|
"learning_rate": 0.0009457425395991948, |
|
"epoch": 0.16 |
|
}, |
|
{ |
|
"loss": 11.7729, |
|
"grad_norm": 1.0959218740463257, |
|
"learning_rate": 0.0009448674192701496, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.6254, |
|
"grad_norm": 1.1692876815795898, |
|
"learning_rate": 0.0009439922989411043, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.5698, |
|
"grad_norm": 1.9476372003555298, |
|
"learning_rate": 0.0009431171786120591, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.4321, |
|
"grad_norm": 1.1742662191390991, |
|
"learning_rate": 0.0009422420582830139, |
|
"epoch": 0.17 |
|
}, |
|
{ |
|
"loss": 11.3224, |
|
"grad_norm": 0.9839737415313721, |
|
"learning_rate": 0.0009413669379539686, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.8269, |
|
"grad_norm": 0.9094179272651672, |
|
"learning_rate": 0.0009404918176249234, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.8652, |
|
"grad_norm": 0.9139958620071411, |
|
"learning_rate": 0.0009396166972958782, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.5493, |
|
"grad_norm": 0.7938945889472961, |
|
"learning_rate": 0.0009387415769668329, |
|
"epoch": 0.18 |
|
}, |
|
{ |
|
"loss": 11.413, |
|
"grad_norm": 0.8102487921714783, |
|
"learning_rate": 0.0009378664566377877, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.4015, |
|
"grad_norm": 0.5892770290374756, |
|
"learning_rate": 0.0009369913363087424, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 10.8455, |
|
"grad_norm": 0.7269143462181091, |
|
"learning_rate": 0.0009361162159796972, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 11.5612, |
|
"grad_norm": 0.8169882893562317, |
|
"learning_rate": 0.000935241095650652, |
|
"epoch": 0.19 |
|
}, |
|
{ |
|
"loss": 10.545, |
|
"grad_norm": 0.8424365520477295, |
|
"learning_rate": 0.0009343659753216067, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 10.8486, |
|
"grad_norm": 0.855518102645874, |
|
"learning_rate": 0.0009334908549925615, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 10.3733, |
|
"grad_norm": 1.1463903188705444, |
|
"learning_rate": 0.0009326157346635162, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 10.794, |
|
"grad_norm": 0.7493767142295837, |
|
"learning_rate": 0.000931740614334471, |
|
"epoch": 0.2 |
|
}, |
|
{ |
|
"loss": 10.5943, |
|
"grad_norm": 0.8767346739768982, |
|
"learning_rate": 0.0009308654940054258, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 11.4169, |
|
"grad_norm": 1.0650781393051147, |
|
"learning_rate": 0.0009299903736763805, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 10.8176, |
|
"grad_norm": 0.8954362869262695, |
|
"learning_rate": 0.0009291152533473353, |
|
"epoch": 0.21 |
|
}, |
|
{ |
|
"loss": 10.9644, |
|
"grad_norm": 0.697245180606842, |
|
"learning_rate": 0.0009282401330182901, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 11.0427, |
|
"grad_norm": 1.5471469163894653, |
|
"learning_rate": 0.0009273650126892448, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.8293, |
|
"grad_norm": 0.7173879146575928, |
|
"learning_rate": 0.0009264898923601996, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.744, |
|
"grad_norm": 1.1271495819091797, |
|
"learning_rate": 0.0009256147720311543, |
|
"epoch": 0.22 |
|
}, |
|
{ |
|
"loss": 10.3733, |
|
"grad_norm": 0.7106486558914185, |
|
"learning_rate": 0.0009247396517021091, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 10.9536, |
|
"grad_norm": 1.1200592517852783, |
|
"learning_rate": 0.0009238645313730638, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 10.4749, |
|
"grad_norm": 1.0028458833694458, |
|
"learning_rate": 0.0009229894110440185, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 11.4667, |
|
"grad_norm": 1.187585711479187, |
|
"learning_rate": 0.0009221142907149733, |
|
"epoch": 0.23 |
|
}, |
|
{ |
|
"loss": 10.3349, |
|
"grad_norm": 0.8691514134407043, |
|
"learning_rate": 0.000921239170385928, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.6188, |
|
"grad_norm": 0.8789599537849426, |
|
"learning_rate": 0.0009203640500568828, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.454, |
|
"grad_norm": 0.8376362919807434, |
|
"learning_rate": 0.0009194889297278376, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.2419, |
|
"grad_norm": 1.0760575532913208, |
|
"learning_rate": 0.0009186138093987923, |
|
"epoch": 0.24 |
|
}, |
|
{ |
|
"loss": 10.8593, |
|
"grad_norm": 0.709028422832489, |
|
"learning_rate": 0.0009177386890697471, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.073, |
|
"grad_norm": 1.0934019088745117, |
|
"learning_rate": 0.0009168635687407019, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 10.5596, |
|
"grad_norm": 0.7833492159843445, |
|
"learning_rate": 0.0009159884484116566, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.2079, |
|
"grad_norm": 0.8762934803962708, |
|
"learning_rate": 0.0009151133280826114, |
|
"epoch": 0.25 |
|
}, |
|
{ |
|
"loss": 11.2229, |
|
"grad_norm": 0.8059395551681519, |
|
"learning_rate": 0.0009142382077535661, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 10.8706, |
|
"grad_norm": 1.0892099142074585, |
|
"learning_rate": 0.0009133630874245209, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 10.9983, |
|
"grad_norm": 0.7471132278442383, |
|
"learning_rate": 0.0009124879670954757, |
|
"epoch": 0.26 |
|
}, |
|
{ |
|
"loss": 11.4291, |
|
"grad_norm": 0.9766479730606079, |
|
"learning_rate": 0.0009116128467664304, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.5895, |
|
"grad_norm": 0.7469794154167175, |
|
"learning_rate": 0.0009107377264373852, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 9.9826, |
|
"grad_norm": 0.9510082602500916, |
|
"learning_rate": 0.00090986260610834, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.1785, |
|
"grad_norm": 0.8061089515686035, |
|
"learning_rate": 0.0009089874857792947, |
|
"epoch": 0.27 |
|
}, |
|
{ |
|
"loss": 10.5502, |
|
"grad_norm": 0.7467952966690063, |
|
"learning_rate": 0.0009081123654502495, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.4848, |
|
"grad_norm": 0.9167515635490417, |
|
"learning_rate": 0.0009072372451212042, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.7841, |
|
"grad_norm": 1.0157630443572998, |
|
"learning_rate": 0.000906362124792159, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.6985, |
|
"grad_norm": 0.8764671087265015, |
|
"learning_rate": 0.0009054870044631138, |
|
"epoch": 0.28 |
|
}, |
|
{ |
|
"loss": 10.4706, |
|
"grad_norm": 0.7716103196144104, |
|
"learning_rate": 0.0009046118841340685, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.4371, |
|
"grad_norm": 0.83449387550354, |
|
"learning_rate": 0.0009037367638050233, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.2414, |
|
"grad_norm": 0.785839855670929, |
|
"learning_rate": 0.000902861643475978, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.0213, |
|
"grad_norm": 0.7405595183372498, |
|
"learning_rate": 0.0009019865231469327, |
|
"epoch": 0.29 |
|
}, |
|
{ |
|
"loss": 10.2501, |
|
"grad_norm": 0.929263710975647, |
|
"learning_rate": 0.0009011114028178875, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.6749, |
|
"grad_norm": 0.9185034036636353, |
|
"learning_rate": 0.0009002362824888422, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.4313, |
|
"grad_norm": 0.7888991832733154, |
|
"learning_rate": 0.000899361162159797, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 10.4389, |
|
"grad_norm": 0.9736090302467346, |
|
"learning_rate": 0.0008984860418307517, |
|
"epoch": 0.3 |
|
}, |
|
{ |
|
"loss": 9.9148, |
|
"grad_norm": 0.7677895426750183, |
|
"learning_rate": 0.0008976109215017065, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 9.7635, |
|
"grad_norm": 0.9090219736099243, |
|
"learning_rate": 0.0008967358011726613, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.0211, |
|
"grad_norm": 0.7184523344039917, |
|
"learning_rate": 0.000895860680843616, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 9.9932, |
|
"grad_norm": 1.0859735012054443, |
|
"learning_rate": 0.0008949855605145708, |
|
"epoch": 0.31 |
|
}, |
|
{ |
|
"loss": 10.2804, |
|
"grad_norm": 1.0252892971038818, |
|
"learning_rate": 0.0008941104401855256, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.0543, |
|
"grad_norm": 1.1707403659820557, |
|
"learning_rate": 0.0008932353198564803, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 10.6658, |
|
"grad_norm": 0.6616178750991821, |
|
"learning_rate": 0.0008923601995274351, |
|
"epoch": 0.32 |
|
}, |
|
{ |
|
"loss": 9.8623, |
|
"grad_norm": 1.9947571754455566, |
|
"learning_rate": 0.0008914850791983898, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.1607, |
|
"grad_norm": 1.3363871574401855, |
|
"learning_rate": 0.0008906099588693446, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.1063, |
|
"grad_norm": 1.0039112567901611, |
|
"learning_rate": 0.0008897348385402994, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 9.7059, |
|
"grad_norm": 1.0225836038589478, |
|
"learning_rate": 0.0008888597182112541, |
|
"epoch": 0.33 |
|
}, |
|
{ |
|
"loss": 10.2506, |
|
"grad_norm": 1.1005779504776, |
|
"learning_rate": 0.0008879845978822089, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.3011, |
|
"grad_norm": 1.1654433012008667, |
|
"learning_rate": 0.0008871094775531636, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 10.088, |
|
"grad_norm": 0.9155218601226807, |
|
"learning_rate": 0.0008862343572241184, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 9.8835, |
|
"grad_norm": 1.2090198993682861, |
|
"learning_rate": 0.0008853592368950732, |
|
"epoch": 0.34 |
|
}, |
|
{ |
|
"loss": 9.6644, |
|
"grad_norm": 1.5198620557785034, |
|
"learning_rate": 0.0008844841165660279, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 9.6799, |
|
"grad_norm": 1.0043960809707642, |
|
"learning_rate": 0.0008836089962369827, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 10.0658, |
|
"grad_norm": 1.0404608249664307, |
|
"learning_rate": 0.0008827338759079375, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 9.9551, |
|
"grad_norm": 1.0412163734436035, |
|
"learning_rate": 0.0008818587555788922, |
|
"epoch": 0.35 |
|
}, |
|
{ |
|
"loss": 9.4082, |
|
"grad_norm": 0.9032560586929321, |
|
"learning_rate": 0.000880983635249847, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.2566, |
|
"grad_norm": 1.2763034105300903, |
|
"learning_rate": 0.0008801085149208016, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 9.8585, |
|
"grad_norm": 0.8143719434738159, |
|
"learning_rate": 0.0008792333945917563, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 9.5974, |
|
"grad_norm": 1.3916654586791992, |
|
"learning_rate": 0.000878358274262711, |
|
"epoch": 0.36 |
|
}, |
|
{ |
|
"loss": 10.611, |
|
"grad_norm": 1.2270894050598145, |
|
"learning_rate": 0.0008774831539336658, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 9.4489, |
|
"grad_norm": 1.339573621749878, |
|
"learning_rate": 0.0008766080336046206, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 9.769, |
|
"grad_norm": 1.023978352546692, |
|
"learning_rate": 0.0008757329132755753, |
|
"epoch": 0.37 |
|
}, |
|
{ |
|
"loss": 9.7854, |
|
"grad_norm": 1.1513617038726807, |
|
"learning_rate": 0.0008748577929465301, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.4378, |
|
"grad_norm": 0.9918627142906189, |
|
"learning_rate": 0.0008739826726174849, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.6902, |
|
"grad_norm": 0.9365573525428772, |
|
"learning_rate": 0.0008731075522884396, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.5533, |
|
"grad_norm": 1.1697934865951538, |
|
"learning_rate": 0.0008722324319593944, |
|
"epoch": 0.38 |
|
}, |
|
{ |
|
"loss": 9.5204, |
|
"grad_norm": 1.2257342338562012, |
|
"learning_rate": 0.0008713573116303491, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.636, |
|
"grad_norm": 1.0158884525299072, |
|
"learning_rate": 0.0008704821913013039, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.8914, |
|
"grad_norm": 1.4228135347366333, |
|
"learning_rate": 0.0008696070709722587, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.3714, |
|
"grad_norm": 1.2829135656356812, |
|
"learning_rate": 0.0008687319506432134, |
|
"epoch": 0.39 |
|
}, |
|
{ |
|
"loss": 9.7498, |
|
"grad_norm": 1.2624573707580566, |
|
"learning_rate": 0.0008678568303141682, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 9.8928, |
|
"grad_norm": 1.3651659488677979, |
|
"learning_rate": 0.000866981709985123, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.3697, |
|
"grad_norm": 1.1383252143859863, |
|
"learning_rate": 0.0008661065896560777, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 10.1876, |
|
"grad_norm": 1.1688463687896729, |
|
"learning_rate": 0.0008652314693270325, |
|
"epoch": 0.4 |
|
}, |
|
{ |
|
"loss": 9.7974, |
|
"grad_norm": 1.1377474069595337, |
|
"learning_rate": 0.0008643563489979872, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.5742, |
|
"grad_norm": 1.0107587575912476, |
|
"learning_rate": 0.000863481228668942, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.9821, |
|
"grad_norm": 1.3488329648971558, |
|
"learning_rate": 0.0008626061083398968, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.3107, |
|
"grad_norm": 1.0305010080337524, |
|
"learning_rate": 0.0008617309880108515, |
|
"epoch": 0.41 |
|
}, |
|
{ |
|
"loss": 9.3456, |
|
"grad_norm": 0.8658286929130554, |
|
"learning_rate": 0.0008608558676818063, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 9.3709, |
|
"grad_norm": 1.1033709049224854, |
|
"learning_rate": 0.000859980747352761, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 9.5077, |
|
"grad_norm": 1.1051572561264038, |
|
"learning_rate": 0.0008591056270237157, |
|
"epoch": 0.42 |
|
}, |
|
{ |
|
"loss": 9.1458, |
|
"grad_norm": 1.3423538208007812, |
|
"learning_rate": 0.0008582305066946705, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 9.657, |
|
"grad_norm": 1.1479153633117676, |
|
"learning_rate": 0.0008573553863656252, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 10.5804, |
|
"grad_norm": 1.1615872383117676, |
|
"learning_rate": 0.00085648026603658, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 8.2792, |
|
"grad_norm": 1.212221384048462, |
|
"learning_rate": 0.0008556051457075347, |
|
"epoch": 0.43 |
|
}, |
|
{ |
|
"loss": 9.3785, |
|
"grad_norm": 1.0849367380142212, |
|
"learning_rate": 0.0008547300253784895, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 9.4097, |
|
"grad_norm": 1.119325041770935, |
|
"learning_rate": 0.0008538549050494443, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 9.3308, |
|
"grad_norm": 1.3356918096542358, |
|
"learning_rate": 0.000852979784720399, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 9.4548, |
|
"grad_norm": 0.9954844117164612, |
|
"learning_rate": 0.0008521046643913538, |
|
"epoch": 0.44 |
|
}, |
|
{ |
|
"loss": 8.9297, |
|
"grad_norm": 0.8752724528312683, |
|
"learning_rate": 0.0008512295440623086, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.1389, |
|
"grad_norm": 1.2811753749847412, |
|
"learning_rate": 0.0008503544237332633, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.3155, |
|
"grad_norm": 1.253055453300476, |
|
"learning_rate": 0.0008494793034042181, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.548, |
|
"grad_norm": 1.2081260681152344, |
|
"learning_rate": 0.0008486041830751728, |
|
"epoch": 0.45 |
|
}, |
|
{ |
|
"loss": 9.0236, |
|
"grad_norm": 1.3752362728118896, |
|
"learning_rate": 0.0008477290627461276, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.0533, |
|
"grad_norm": 1.057065725326538, |
|
"learning_rate": 0.0008468539424170824, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.0675, |
|
"grad_norm": 1.0036309957504272, |
|
"learning_rate": 0.0008459788220880371, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.5195, |
|
"grad_norm": 1.3881008625030518, |
|
"learning_rate": 0.0008451037017589919, |
|
"epoch": 0.46 |
|
}, |
|
{ |
|
"loss": 9.3519, |
|
"grad_norm": 1.4355233907699585, |
|
"learning_rate": 0.0008442285814299467, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 9.6383, |
|
"grad_norm": 0.9438649415969849, |
|
"learning_rate": 0.0008433534611009014, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 9.2643, |
|
"grad_norm": 0.8599776029586792, |
|
"learning_rate": 0.0008424783407718562, |
|
"epoch": 0.47 |
|
}, |
|
{ |
|
"loss": 8.9869, |
|
"grad_norm": 1.1090342998504639, |
|
"learning_rate": 0.0008416032204428109, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.2475, |
|
"grad_norm": 1.272929310798645, |
|
"learning_rate": 0.0008407281001137657, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.5772, |
|
"grad_norm": 0.9889743328094482, |
|
"learning_rate": 0.0008398529797847205, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.9227, |
|
"grad_norm": 1.2748692035675049, |
|
"learning_rate": 0.0008389778594556752, |
|
"epoch": 0.48 |
|
}, |
|
{ |
|
"loss": 9.9915, |
|
"grad_norm": 1.4889165163040161, |
|
"learning_rate": 0.00083810273912663, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.0012, |
|
"grad_norm": 1.2172118425369263, |
|
"learning_rate": 0.0008372276187975846, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.2968, |
|
"grad_norm": 1.0313849449157715, |
|
"learning_rate": 0.0008363524984685394, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 8.9158, |
|
"grad_norm": 1.3325482606887817, |
|
"learning_rate": 0.0008354773781394942, |
|
"epoch": 0.49 |
|
}, |
|
{ |
|
"loss": 9.0097, |
|
"grad_norm": 1.5407133102416992, |
|
"learning_rate": 0.0008346022578104489, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.0166, |
|
"grad_norm": 1.1565685272216797, |
|
"learning_rate": 0.0008337271374814037, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.1856, |
|
"grad_norm": 1.0405404567718506, |
|
"learning_rate": 0.0008328520171523584, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 9.2405, |
|
"grad_norm": 1.465058445930481, |
|
"learning_rate": 0.0008319768968233132, |
|
"epoch": 0.5 |
|
}, |
|
{ |
|
"loss": 8.835, |
|
"grad_norm": 0.9321463704109192, |
|
"learning_rate": 0.000831101776494268, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.4076, |
|
"grad_norm": 1.1780034303665161, |
|
"learning_rate": 0.0008302266561652227, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 9.5994, |
|
"grad_norm": 1.488897681236267, |
|
"learning_rate": 0.0008293515358361775, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 8.6378, |
|
"grad_norm": 1.0508447885513306, |
|
"learning_rate": 0.0008284764155071323, |
|
"epoch": 0.51 |
|
}, |
|
{ |
|
"loss": 8.7946, |
|
"grad_norm": 1.2236040830612183, |
|
"learning_rate": 0.000827601295178087, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.4619, |
|
"grad_norm": 1.0602221488952637, |
|
"learning_rate": 0.0008267261748490418, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 8.927, |
|
"grad_norm": 1.476576328277588, |
|
"learning_rate": 0.0008258510545199965, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 8.766, |
|
"grad_norm": 1.2723809480667114, |
|
"learning_rate": 0.0008249759341909513, |
|
"epoch": 0.52 |
|
}, |
|
{ |
|
"loss": 9.1577, |
|
"grad_norm": 1.2955093383789062, |
|
"learning_rate": 0.0008241008138619061, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 8.8254, |
|
"grad_norm": 1.1421802043914795, |
|
"learning_rate": 0.0008232256935328608, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 9.3559, |
|
"grad_norm": 1.2015204429626465, |
|
"learning_rate": 0.0008223505732038156, |
|
"epoch": 0.53 |
|
}, |
|
{ |
|
"loss": 8.7055, |
|
"grad_norm": 1.02347993850708, |
|
"learning_rate": 0.0008214754528747703, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.1773, |
|
"grad_norm": 1.0733789205551147, |
|
"learning_rate": 0.0008206003325457251, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.4909, |
|
"grad_norm": 1.140329360961914, |
|
"learning_rate": 0.0008197252122166799, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 8.4982, |
|
"grad_norm": 0.8933946490287781, |
|
"learning_rate": 0.0008188500918876346, |
|
"epoch": 0.54 |
|
}, |
|
{ |
|
"loss": 9.4497, |
|
"grad_norm": 1.3848881721496582, |
|
"learning_rate": 0.0008179749715585894, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.5758, |
|
"grad_norm": 1.175162672996521, |
|
"learning_rate": 0.0008170998512295442, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.5138, |
|
"grad_norm": 1.1983882188796997, |
|
"learning_rate": 0.0008162247309004989, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.0283, |
|
"grad_norm": 0.9055472612380981, |
|
"learning_rate": 0.0008153496105714536, |
|
"epoch": 0.55 |
|
}, |
|
{ |
|
"loss": 9.2822, |
|
"grad_norm": 0.8885380029678345, |
|
"learning_rate": 0.0008144744902424083, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 8.9084, |
|
"grad_norm": 1.0463942289352417, |
|
"learning_rate": 0.0008135993699133631, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.0612, |
|
"grad_norm": 1.1517601013183594, |
|
"learning_rate": 0.0008127242495843179, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 9.7954, |
|
"grad_norm": 1.6062026023864746, |
|
"learning_rate": 0.0008118491292552726, |
|
"epoch": 0.56 |
|
}, |
|
{ |
|
"loss": 8.823, |
|
"grad_norm": 1.079883098602295, |
|
"learning_rate": 0.0008109740089262274, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 8.6287, |
|
"grad_norm": 0.8593969345092773, |
|
"learning_rate": 0.0008100988885971821, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.046, |
|
"grad_norm": 1.5058172941207886, |
|
"learning_rate": 0.0008092237682681369, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 8.4422, |
|
"grad_norm": 1.0326484441757202, |
|
"learning_rate": 0.0008083486479390917, |
|
"epoch": 0.57 |
|
}, |
|
{ |
|
"loss": 9.5016, |
|
"grad_norm": 0.9177812337875366, |
|
"learning_rate": 0.0008074735276100464, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 8.4734, |
|
"grad_norm": 1.1267443895339966, |
|
"learning_rate": 0.0008065984072810012, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 8.5878, |
|
"grad_norm": 0.9788813591003418, |
|
"learning_rate": 0.000805723286951956, |
|
"epoch": 0.58 |
|
}, |
|
{ |
|
"loss": 9.1188, |
|
"grad_norm": 1.1300309896469116, |
|
"learning_rate": 0.0008048481666229107, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 8.7167, |
|
"grad_norm": 0.9951778650283813, |
|
"learning_rate": 0.0008039730462938655, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 9.1088, |
|
"grad_norm": 0.9415300488471985, |
|
"learning_rate": 0.0008030979259648202, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 8.4083, |
|
"grad_norm": 0.990203320980072, |
|
"learning_rate": 0.000802222805635775, |
|
"epoch": 0.59 |
|
}, |
|
{ |
|
"loss": 8.4926, |
|
"grad_norm": 1.0430456399917603, |
|
"learning_rate": 0.0008013476853067298, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 9.3307, |
|
"grad_norm": 0.9623116254806519, |
|
"learning_rate": 0.0008004725649776845, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 8.8633, |
|
"grad_norm": 1.0354257822036743, |
|
"learning_rate": 0.0007995974446486392, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 8.7932, |
|
"grad_norm": 1.1962673664093018, |
|
"learning_rate": 0.0007987223243195939, |
|
"epoch": 0.6 |
|
}, |
|
{ |
|
"loss": 8.4265, |
|
"grad_norm": 1.0186195373535156, |
|
"learning_rate": 0.0007978472039905487, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 8.4596, |
|
"grad_norm": 1.2448772192001343, |
|
"learning_rate": 0.0007969720836615035, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 9.0019, |
|
"grad_norm": 1.11643385887146, |
|
"learning_rate": 0.0007960969633324582, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 8.7469, |
|
"grad_norm": 1.9622658491134644, |
|
"learning_rate": 0.000795221843003413, |
|
"epoch": 0.61 |
|
}, |
|
{ |
|
"loss": 8.208, |
|
"grad_norm": 0.9547304511070251, |
|
"learning_rate": 0.0007943467226743676, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.3751, |
|
"grad_norm": 0.8313985466957092, |
|
"learning_rate": 0.0007934716023453224, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.6238, |
|
"grad_norm": 0.9323874711990356, |
|
"learning_rate": 0.0007925964820162772, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 9.0078, |
|
"grad_norm": 1.0662554502487183, |
|
"learning_rate": 0.0007917213616872319, |
|
"epoch": 0.62 |
|
}, |
|
{ |
|
"loss": 8.7407, |
|
"grad_norm": 1.197045087814331, |
|
"learning_rate": 0.0007908462413581867, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 8.9698, |
|
"grad_norm": 1.0494697093963623, |
|
"learning_rate": 0.0007899711210291415, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 8.56, |
|
"grad_norm": 0.9860395789146423, |
|
"learning_rate": 0.0007890960007000962, |
|
"epoch": 0.63 |
|
}, |
|
{ |
|
"loss": 8.624, |
|
"grad_norm": 0.8026842474937439, |
|
"learning_rate": 0.000788220880371051, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 9.1911, |
|
"grad_norm": 1.0249046087265015, |
|
"learning_rate": 0.0007873457600420057, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 8.552, |
|
"grad_norm": 1.3037137985229492, |
|
"learning_rate": 0.0007864706397129605, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 8.6872, |
|
"grad_norm": 1.1018158197402954, |
|
"learning_rate": 0.0007855955193839153, |
|
"epoch": 0.64 |
|
}, |
|
{ |
|
"loss": 8.5007, |
|
"grad_norm": 0.9974724054336548, |
|
"learning_rate": 0.00078472039905487, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.3866, |
|
"grad_norm": 1.2537139654159546, |
|
"learning_rate": 0.0007838452787258248, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 8.9869, |
|
"grad_norm": 1.2758492231369019, |
|
"learning_rate": 0.0007829701583967795, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 8.266, |
|
"grad_norm": 0.9684768915176392, |
|
"learning_rate": 0.0007820950380677343, |
|
"epoch": 0.65 |
|
}, |
|
{ |
|
"loss": 9.0718, |
|
"grad_norm": 1.0212547779083252, |
|
"learning_rate": 0.0007812199177386891, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.1438, |
|
"grad_norm": 1.2493318319320679, |
|
"learning_rate": 0.0007803447974096438, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.4132, |
|
"grad_norm": 0.8168124556541443, |
|
"learning_rate": 0.0007794696770805986, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.4466, |
|
"grad_norm": 1.2837003469467163, |
|
"learning_rate": 0.0007785945567515534, |
|
"epoch": 0.66 |
|
}, |
|
{ |
|
"loss": 8.6008, |
|
"grad_norm": 1.1589733362197876, |
|
"learning_rate": 0.0007777194364225081, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 8.7002, |
|
"grad_norm": 1.036216378211975, |
|
"learning_rate": 0.0007768443160934629, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 8.9616, |
|
"grad_norm": 0.9488565921783447, |
|
"learning_rate": 0.0007759691957644176, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 8.9011, |
|
"grad_norm": 1.1349655389785767, |
|
"learning_rate": 0.0007750940754353724, |
|
"epoch": 0.67 |
|
}, |
|
{ |
|
"loss": 8.7398, |
|
"grad_norm": 1.3466508388519287, |
|
"learning_rate": 0.0007742189551063272, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 8.1787, |
|
"grad_norm": 1.1343966722488403, |
|
"learning_rate": 0.0007733438347772819, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 8.4513, |
|
"grad_norm": 0.9983484148979187, |
|
"learning_rate": 0.0007724687144482366, |
|
"epoch": 0.68 |
|
}, |
|
{ |
|
"loss": 8.6249, |
|
"grad_norm": 1.4816855192184448, |
|
"learning_rate": 0.0007715935941191913, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.9094, |
|
"grad_norm": 1.0790578126907349, |
|
"learning_rate": 0.0007707184737901461, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.0177, |
|
"grad_norm": 1.2572119235992432, |
|
"learning_rate": 0.0007698433534611009, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.5014, |
|
"grad_norm": 1.123079776763916, |
|
"learning_rate": 0.0007689682331320556, |
|
"epoch": 0.69 |
|
}, |
|
{ |
|
"loss": 8.2177, |
|
"grad_norm": 0.8789654970169067, |
|
"learning_rate": 0.0007680931128030104, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.3753, |
|
"grad_norm": 0.9512013792991638, |
|
"learning_rate": 0.0007672179924739651, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.5434, |
|
"grad_norm": 1.929919719696045, |
|
"learning_rate": 0.0007663428721449199, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.5505, |
|
"grad_norm": 1.1756147146224976, |
|
"learning_rate": 0.0007654677518158747, |
|
"epoch": 0.7 |
|
}, |
|
{ |
|
"loss": 8.8823, |
|
"grad_norm": 1.1833679676055908, |
|
"learning_rate": 0.0007645926314868294, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.6715, |
|
"grad_norm": 1.4701839685440063, |
|
"learning_rate": 0.0007637175111577842, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.7559, |
|
"grad_norm": 0.9352959990501404, |
|
"learning_rate": 0.0007629299028616435, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 9.5594, |
|
"grad_norm": 1.0391898155212402, |
|
"learning_rate": 0.0007620547825325983, |
|
"epoch": 0.71 |
|
}, |
|
{ |
|
"loss": 8.3431, |
|
"grad_norm": 1.0766905546188354, |
|
"learning_rate": 0.000761179662203553, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.3928, |
|
"grad_norm": 1.10299551486969, |
|
"learning_rate": 0.0007603045418745078, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.9913, |
|
"grad_norm": 1.1581339836120605, |
|
"learning_rate": 0.0007594294215454624, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.5142, |
|
"grad_norm": 1.086441993713379, |
|
"learning_rate": 0.0007585543012164172, |
|
"epoch": 0.72 |
|
}, |
|
{ |
|
"loss": 8.7005, |
|
"grad_norm": 0.9478667974472046, |
|
"learning_rate": 0.000757679180887372, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.608, |
|
"grad_norm": 1.0929220914840698, |
|
"learning_rate": 0.0007568040605583267, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.1125, |
|
"grad_norm": 1.217629313468933, |
|
"learning_rate": 0.0007559289402292815, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 8.4331, |
|
"grad_norm": 1.2786823511123657, |
|
"learning_rate": 0.0007550538199002362, |
|
"epoch": 0.73 |
|
}, |
|
{ |
|
"loss": 9.1985, |
|
"grad_norm": 1.0184354782104492, |
|
"learning_rate": 0.000754178699571191, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 8.6549, |
|
"grad_norm": 0.93660968542099, |
|
"learning_rate": 0.0007533035792421458, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 8.7819, |
|
"grad_norm": 1.0092636346817017, |
|
"learning_rate": 0.0007524284589131005, |
|
"epoch": 0.74 |
|
}, |
|
{ |
|
"loss": 8.3759, |
|
"grad_norm": 1.2108792066574097, |
|
"learning_rate": 0.0007515533385840553, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.4973, |
|
"grad_norm": 0.9994498491287231, |
|
"learning_rate": 0.00075067821825501, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.3731, |
|
"grad_norm": 1.153273344039917, |
|
"learning_rate": 0.0007498030979259648, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.4148, |
|
"grad_norm": 1.051223874092102, |
|
"learning_rate": 0.0007489279775969196, |
|
"epoch": 0.75 |
|
}, |
|
{ |
|
"loss": 8.6672, |
|
"grad_norm": 1.4810237884521484, |
|
"learning_rate": 0.0007480528572678743, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.6439, |
|
"grad_norm": 1.021606206893921, |
|
"learning_rate": 0.0007471777369388291, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.7591, |
|
"grad_norm": 0.8680776357650757, |
|
"learning_rate": 0.0007463026166097839, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 9.0187, |
|
"grad_norm": 1.0177042484283447, |
|
"learning_rate": 0.0007454274962807386, |
|
"epoch": 0.76 |
|
}, |
|
{ |
|
"loss": 8.9481, |
|
"grad_norm": 1.2384392023086548, |
|
"learning_rate": 0.0007445523759516934, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 8.6184, |
|
"grad_norm": 1.3748959302902222, |
|
"learning_rate": 0.0007436772556226481, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 8.3906, |
|
"grad_norm": 1.042493462562561, |
|
"learning_rate": 0.0007428021352936029, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 9.3308, |
|
"grad_norm": 1.0647776126861572, |
|
"learning_rate": 0.0007419270149645576, |
|
"epoch": 0.77 |
|
}, |
|
{ |
|
"loss": 8.332, |
|
"grad_norm": 1.2385993003845215, |
|
"learning_rate": 0.0007410518946355123, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.3127, |
|
"grad_norm": 1.0191227197647095, |
|
"learning_rate": 0.0007401767743064671, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.3151, |
|
"grad_norm": 0.8735216856002808, |
|
"learning_rate": 0.0007393016539774218, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 8.701, |
|
"grad_norm": 1.202993392944336, |
|
"learning_rate": 0.0007384265336483766, |
|
"epoch": 0.78 |
|
}, |
|
{ |
|
"loss": 7.8262, |
|
"grad_norm": 0.9682905673980713, |
|
"learning_rate": 0.0007375514133193314, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 8.4729, |
|
"grad_norm": 1.2290154695510864, |
|
"learning_rate": 0.0007366762929902861, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 8.9253, |
|
"grad_norm": 1.0369175672531128, |
|
"learning_rate": 0.0007358011726612409, |
|
"epoch": 0.79 |
|
}, |
|
{ |
|
"loss": 9.2036, |
|
"grad_norm": 1.0748445987701416, |
|
"learning_rate": 0.0007349260523321957, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.2364, |
|
"grad_norm": 1.147964596748352, |
|
"learning_rate": 0.0007340509320031504, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 9.006, |
|
"grad_norm": 1.0363622903823853, |
|
"learning_rate": 0.0007331758116741052, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.7969, |
|
"grad_norm": 1.2576889991760254, |
|
"learning_rate": 0.0007323006913450599, |
|
"epoch": 0.8 |
|
}, |
|
{ |
|
"loss": 8.4052, |
|
"grad_norm": 1.1075588464736938, |
|
"learning_rate": 0.0007314255710160147, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.5912, |
|
"grad_norm": 1.0697672367095947, |
|
"learning_rate": 0.0007305504506869695, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.7837, |
|
"grad_norm": 1.0865002870559692, |
|
"learning_rate": 0.0007296753303579242, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.0798, |
|
"grad_norm": 1.3645957708358765, |
|
"learning_rate": 0.000728800210028879, |
|
"epoch": 0.81 |
|
}, |
|
{ |
|
"loss": 8.2649, |
|
"grad_norm": 1.0889688730239868, |
|
"learning_rate": 0.0007279250896998337, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 7.902, |
|
"grad_norm": 0.9943633675575256, |
|
"learning_rate": 0.0007270499693707885, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.493, |
|
"grad_norm": 1.3548861742019653, |
|
"learning_rate": 0.0007261748490417433, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 9.2024, |
|
"grad_norm": 1.1603728532791138, |
|
"learning_rate": 0.000725299728712698, |
|
"epoch": 0.82 |
|
}, |
|
{ |
|
"loss": 8.7272, |
|
"grad_norm": 1.2872350215911865, |
|
"learning_rate": 0.0007244246083836528, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.8292, |
|
"grad_norm": 1.0431410074234009, |
|
"learning_rate": 0.0007235494880546076, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.0473, |
|
"grad_norm": 0.9648978114128113, |
|
"learning_rate": 0.0007226743677255623, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.134, |
|
"grad_norm": 0.8962783217430115, |
|
"learning_rate": 0.0007217992473965171, |
|
"epoch": 0.83 |
|
}, |
|
{ |
|
"loss": 8.2796, |
|
"grad_norm": 0.8879069685935974, |
|
"learning_rate": 0.0007209241270674718, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 8.6275, |
|
"grad_norm": 1.0046008825302124, |
|
"learning_rate": 0.0007200490067384265, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 8.2847, |
|
"grad_norm": 1.1034067869186401, |
|
"learning_rate": 0.0007191738864093813, |
|
"epoch": 0.84 |
|
}, |
|
{ |
|
"loss": 8.723, |
|
"grad_norm": 0.9179050326347351, |
|
"learning_rate": 0.000718298766080336, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.2843, |
|
"grad_norm": 1.0402296781539917, |
|
"learning_rate": 0.0007174236457512908, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.2487, |
|
"grad_norm": 1.2751373052597046, |
|
"learning_rate": 0.0007165485254222455, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.3491, |
|
"grad_norm": 0.8596373200416565, |
|
"learning_rate": 0.0007156734050932003, |
|
"epoch": 0.85 |
|
}, |
|
{ |
|
"loss": 8.4695, |
|
"grad_norm": 1.0553058385849, |
|
"learning_rate": 0.0007147982847641551, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.74, |
|
"grad_norm": 1.0505644083023071, |
|
"learning_rate": 0.0007139231644351098, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.3704, |
|
"grad_norm": 1.4136569499969482, |
|
"learning_rate": 0.0007130480441060646, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 7.9998, |
|
"grad_norm": 0.9397268295288086, |
|
"learning_rate": 0.0007121729237770194, |
|
"epoch": 0.86 |
|
}, |
|
{ |
|
"loss": 8.5978, |
|
"grad_norm": 1.1479915380477905, |
|
"learning_rate": 0.0007112978034479741, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.6225, |
|
"grad_norm": 1.0489866733551025, |
|
"learning_rate": 0.0007104226831189289, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.3155, |
|
"grad_norm": 0.9371022582054138, |
|
"learning_rate": 0.0007095475627898836, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.3844, |
|
"grad_norm": 1.1981381177902222, |
|
"learning_rate": 0.0007086724424608384, |
|
"epoch": 0.87 |
|
}, |
|
{ |
|
"loss": 8.5061, |
|
"grad_norm": 0.8924277424812317, |
|
"learning_rate": 0.0007077973221317932, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.1918, |
|
"grad_norm": 1.4077969789505005, |
|
"learning_rate": 0.0007069222018027479, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.3377, |
|
"grad_norm": 1.1926066875457764, |
|
"learning_rate": 0.0007060470814737027, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.4682, |
|
"grad_norm": 1.1524171829223633, |
|
"learning_rate": 0.0007051719611446574, |
|
"epoch": 0.88 |
|
}, |
|
{ |
|
"loss": 8.5678, |
|
"grad_norm": 1.0660207271575928, |
|
"learning_rate": 0.0007042968408156122, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 7.9908, |
|
"grad_norm": 1.1786776781082153, |
|
"learning_rate": 0.000703421720486567, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 9.0339, |
|
"grad_norm": 0.9970653057098389, |
|
"learning_rate": 0.0007025466001575217, |
|
"epoch": 0.89 |
|
}, |
|
{ |
|
"loss": 8.6511, |
|
"grad_norm": 1.171247124671936, |
|
"learning_rate": 0.0007016714798284765, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.0249, |
|
"grad_norm": 1.1036537885665894, |
|
"learning_rate": 0.0007007963594994313, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.2895, |
|
"grad_norm": 1.4363912343978882, |
|
"learning_rate": 0.000699921239170386, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.4263, |
|
"grad_norm": 1.2977561950683594, |
|
"learning_rate": 0.0006990461188413408, |
|
"epoch": 0.9 |
|
}, |
|
{ |
|
"loss": 8.3236, |
|
"grad_norm": 1.2732399702072144, |
|
"learning_rate": 0.0006981709985122954, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.0876, |
|
"grad_norm": 0.8092446327209473, |
|
"learning_rate": 0.0006972958781832502, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.3052, |
|
"grad_norm": 1.0607753992080688, |
|
"learning_rate": 0.000696420757854205, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.2821, |
|
"grad_norm": 1.2833763360977173, |
|
"learning_rate": 0.0006955456375251597, |
|
"epoch": 0.91 |
|
}, |
|
{ |
|
"loss": 8.0437, |
|
"grad_norm": 1.2291605472564697, |
|
"learning_rate": 0.0006946705171961145, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 7.9172, |
|
"grad_norm": 0.9950680732727051, |
|
"learning_rate": 0.0006937953968670692, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 7.8579, |
|
"grad_norm": 1.170876145362854, |
|
"learning_rate": 0.000692920276538024, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.7343, |
|
"grad_norm": 1.0266340970993042, |
|
"learning_rate": 0.0006920451562089788, |
|
"epoch": 0.92 |
|
}, |
|
{ |
|
"loss": 8.3685, |
|
"grad_norm": 1.1194366216659546, |
|
"learning_rate": 0.0006911700358799335, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.8983, |
|
"grad_norm": 1.130362868309021, |
|
"learning_rate": 0.0006902949155508883, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.3624, |
|
"grad_norm": 1.2582019567489624, |
|
"learning_rate": 0.000689419795221843, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.5332, |
|
"grad_norm": 1.0985493659973145, |
|
"learning_rate": 0.0006885446748927978, |
|
"epoch": 0.93 |
|
}, |
|
{ |
|
"loss": 8.263, |
|
"grad_norm": 1.0480501651763916, |
|
"learning_rate": 0.0006876695545637526, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.1911, |
|
"grad_norm": 1.085471510887146, |
|
"learning_rate": 0.0006867944342347073, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.6767, |
|
"grad_norm": 1.109959602355957, |
|
"learning_rate": 0.0006859193139056621, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 8.1904, |
|
"grad_norm": 0.9299295544624329, |
|
"learning_rate": 0.0006850441935766169, |
|
"epoch": 0.94 |
|
}, |
|
{ |
|
"loss": 7.9858, |
|
"grad_norm": 1.3819242715835571, |
|
"learning_rate": 0.0006841690732475716, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.3134, |
|
"grad_norm": 1.499324083328247, |
|
"learning_rate": 0.0006832939529185264, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.1389, |
|
"grad_norm": 1.0068879127502441, |
|
"learning_rate": 0.0006824188325894811, |
|
"epoch": 0.95 |
|
}, |
|
{ |
|
"loss": 8.0979, |
|
"grad_norm": 1.232861876487732, |
|
"learning_rate": 0.0006815437122604359, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.1456, |
|
"grad_norm": 1.020922064781189, |
|
"learning_rate": 0.0006806685919313907, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.1438, |
|
"grad_norm": 1.2880629301071167, |
|
"learning_rate": 0.0006797934716023453, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 7.8589, |
|
"grad_norm": 1.2720872163772583, |
|
"learning_rate": 0.0006789183512733001, |
|
"epoch": 0.96 |
|
}, |
|
{ |
|
"loss": 8.338, |
|
"grad_norm": 1.1569981575012207, |
|
"learning_rate": 0.0006780432309442548, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 7.6167, |
|
"grad_norm": 1.0755385160446167, |
|
"learning_rate": 0.0006771681106152095, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 9.1889, |
|
"grad_norm": 1.1371173858642578, |
|
"learning_rate": 0.0006762929902861643, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 8.1603, |
|
"grad_norm": 1.2543790340423584, |
|
"learning_rate": 0.000675417869957119, |
|
"epoch": 0.97 |
|
}, |
|
{ |
|
"loss": 8.1684, |
|
"grad_norm": 1.665987491607666, |
|
"learning_rate": 0.0006745427496280738, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.4957, |
|
"grad_norm": 1.1479765176773071, |
|
"learning_rate": 0.0006736676292990285, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 7.998, |
|
"grad_norm": 1.1416277885437012, |
|
"learning_rate": 0.0006727925089699833, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.4458, |
|
"grad_norm": 1.2610832452774048, |
|
"learning_rate": 0.0006719173886409381, |
|
"epoch": 0.98 |
|
}, |
|
{ |
|
"loss": 8.2715, |
|
"grad_norm": 1.2478748559951782, |
|
"learning_rate": 0.0006710422683118928, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.0882, |
|
"grad_norm": 0.9021313190460205, |
|
"learning_rate": 0.0006701671479828476, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.2404, |
|
"grad_norm": 1.0023951530456543, |
|
"learning_rate": 0.0006692920276538024, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.681, |
|
"grad_norm": 1.3342375755310059, |
|
"learning_rate": 0.0006684169073247571, |
|
"epoch": 0.99 |
|
}, |
|
{ |
|
"loss": 8.024, |
|
"grad_norm": 1.0199118852615356, |
|
"learning_rate": 0.0006675417869957119, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.3688, |
|
"grad_norm": 0.893786609172821, |
|
"learning_rate": 0.0006666666666666666, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 8.0561, |
|
"grad_norm": 1.2774296998977661, |
|
"learning_rate": 0.0006657915463376214, |
|
"epoch": 1.0 |
|
}, |
|
{ |
|
"loss": 7.8444, |
|
"grad_norm": 1.0824223756790161, |
|
"learning_rate": 0.0006649164260085762, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.1771, |
|
"grad_norm": 0.869452178478241, |
|
"learning_rate": 0.0006640413056795309, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 7.6838, |
|
"grad_norm": 1.1132241487503052, |
|
"learning_rate": 0.0006631661853504857, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 7.9475, |
|
"grad_norm": 1.2853749990463257, |
|
"learning_rate": 0.0006622910650214405, |
|
"epoch": 1.01 |
|
}, |
|
{ |
|
"loss": 8.8546, |
|
"grad_norm": 1.2339048385620117, |
|
"learning_rate": 0.0006614159446923952, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.1339, |
|
"grad_norm": 1.2211487293243408, |
|
"learning_rate": 0.00066054082436335, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 7.402, |
|
"grad_norm": 1.0966975688934326, |
|
"learning_rate": 0.0006596657040343047, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.1777, |
|
"grad_norm": 1.0253325700759888, |
|
"learning_rate": 0.0006587905837052595, |
|
"epoch": 1.02 |
|
}, |
|
{ |
|
"loss": 8.2748, |
|
"grad_norm": 1.2987836599349976, |
|
"learning_rate": 0.0006579154633762143, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.3225, |
|
"grad_norm": 0.945371687412262, |
|
"learning_rate": 0.000657040343047169, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.4416, |
|
"grad_norm": 1.0868079662322998, |
|
"learning_rate": 0.0006561652227181238, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.1007, |
|
"grad_norm": 1.0190479755401611, |
|
"learning_rate": 0.0006552901023890784, |
|
"epoch": 1.03 |
|
}, |
|
{ |
|
"loss": 8.1317, |
|
"grad_norm": 1.0896625518798828, |
|
"learning_rate": 0.0006544149820600332, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 7.7364, |
|
"grad_norm": 1.1690502166748047, |
|
"learning_rate": 0.000653539861730988, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 7.8173, |
|
"grad_norm": 1.0521645545959473, |
|
"learning_rate": 0.0006526647414019427, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 7.6212, |
|
"grad_norm": 1.3057899475097656, |
|
"learning_rate": 0.0006517896210728975, |
|
"epoch": 1.04 |
|
}, |
|
{ |
|
"loss": 8.0228, |
|
"grad_norm": 0.968885064125061, |
|
"learning_rate": 0.0006509145007438522, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 7.8535, |
|
"grad_norm": 1.1838873624801636, |
|
"learning_rate": 0.000650039380414807, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.1991, |
|
"grad_norm": 1.0967016220092773, |
|
"learning_rate": 0.0006491642600857618, |
|
"epoch": 1.05 |
|
}, |
|
{ |
|
"loss": 8.1515, |
|
"grad_norm": 1.0798629522323608, |
|
"learning_rate": 0.0006482891397567165, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.291, |
|
"grad_norm": 1.1506596803665161, |
|
"learning_rate": 0.0006474140194276713, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 7.956, |
|
"grad_norm": 1.0459505319595337, |
|
"learning_rate": 0.0006465388990986261, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.4393, |
|
"grad_norm": 1.070776343345642, |
|
"learning_rate": 0.0006456637787695808, |
|
"epoch": 1.06 |
|
}, |
|
{ |
|
"loss": 8.5445, |
|
"grad_norm": 1.3064284324645996, |
|
"learning_rate": 0.0006447886584405356, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.701, |
|
"grad_norm": 1.0707839727401733, |
|
"learning_rate": 0.0006439135381114903, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 7.4342, |
|
"grad_norm": 1.123377799987793, |
|
"learning_rate": 0.0006430384177824451, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.4883, |
|
"grad_norm": 1.7230886220932007, |
|
"learning_rate": 0.0006421632974533999, |
|
"epoch": 1.07 |
|
}, |
|
{ |
|
"loss": 8.5288, |
|
"grad_norm": 0.9721227288246155, |
|
"learning_rate": 0.0006412881771243546, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 7.8249, |
|
"grad_norm": 1.2729851007461548, |
|
"learning_rate": 0.0006404130567953094, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 8.3277, |
|
"grad_norm": 0.9693044424057007, |
|
"learning_rate": 0.0006395379364662642, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 7.8798, |
|
"grad_norm": 1.104020118713379, |
|
"learning_rate": 0.0006386628161372189, |
|
"epoch": 1.08 |
|
}, |
|
{ |
|
"loss": 7.899, |
|
"grad_norm": 1.0556141138076782, |
|
"learning_rate": 0.0006377876958081737, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.6403, |
|
"grad_norm": 1.227303147315979, |
|
"learning_rate": 0.0006369125754791284, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.7407, |
|
"grad_norm": 1.2486103773117065, |
|
"learning_rate": 0.0006360374551500832, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.226, |
|
"grad_norm": 1.1452488899230957, |
|
"learning_rate": 0.000635162334821038, |
|
"epoch": 1.09 |
|
}, |
|
{ |
|
"loss": 8.5083, |
|
"grad_norm": 1.466182827949524, |
|
"learning_rate": 0.0006342872144919927, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 7.8041, |
|
"grad_norm": 1.2693302631378174, |
|
"learning_rate": 0.0006334120941629474, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 7.918, |
|
"grad_norm": 1.1236190795898438, |
|
"learning_rate": 0.0006325369738339021, |
|
"epoch": 1.1 |
|
}, |
|
{ |
|
"loss": 7.8792, |
|
"grad_norm": 0.9166776537895203, |
|
"learning_rate": 0.0006316618535048569, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.3714, |
|
"grad_norm": 1.2021427154541016, |
|
"learning_rate": 0.0006307867331758117, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.5282, |
|
"grad_norm": 1.1508140563964844, |
|
"learning_rate": 0.0006299116128467664, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 7.7235, |
|
"grad_norm": 1.044027328491211, |
|
"learning_rate": 0.0006290364925177212, |
|
"epoch": 1.11 |
|
}, |
|
{ |
|
"loss": 8.0483, |
|
"grad_norm": 1.00051748752594, |
|
"learning_rate": 0.000628161372188676, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.0003, |
|
"grad_norm": 1.0397716760635376, |
|
"learning_rate": 0.0006272862518596307, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.274, |
|
"grad_norm": 1.0577192306518555, |
|
"learning_rate": 0.0006264111315305855, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 7.8435, |
|
"grad_norm": 1.1829681396484375, |
|
"learning_rate": 0.0006255360112015402, |
|
"epoch": 1.12 |
|
}, |
|
{ |
|
"loss": 8.5019, |
|
"grad_norm": 1.9353641271591187, |
|
"learning_rate": 0.000624660890872495, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.4582, |
|
"grad_norm": 1.237269639968872, |
|
"learning_rate": 0.0006237857705434498, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.0735, |
|
"grad_norm": 1.1674834489822388, |
|
"learning_rate": 0.0006229106502144045, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.3781, |
|
"grad_norm": 1.32883620262146, |
|
"learning_rate": 0.0006220355298853593, |
|
"epoch": 1.13 |
|
}, |
|
{ |
|
"loss": 8.723, |
|
"grad_norm": 1.3197271823883057, |
|
"learning_rate": 0.000621160409556314, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.414, |
|
"grad_norm": 1.137764573097229, |
|
"learning_rate": 0.0006202852892272688, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 7.9197, |
|
"grad_norm": 1.1574738025665283, |
|
"learning_rate": 0.0006194101688982236, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 8.09, |
|
"grad_norm": 1.0444676876068115, |
|
"learning_rate": 0.0006185350485691783, |
|
"epoch": 1.14 |
|
}, |
|
{ |
|
"loss": 7.3329, |
|
"grad_norm": 0.8655235767364502, |
|
"learning_rate": 0.0006176599282401331, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.4163, |
|
"grad_norm": 0.9860300421714783, |
|
"learning_rate": 0.0006167848079110879, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.2608, |
|
"grad_norm": 1.1680139303207397, |
|
"learning_rate": 0.0006159096875820426, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 7.9283, |
|
"grad_norm": 1.545938491821289, |
|
"learning_rate": 0.0006150345672529974, |
|
"epoch": 1.15 |
|
}, |
|
{ |
|
"loss": 8.4113, |
|
"grad_norm": 1.2768994569778442, |
|
"learning_rate": 0.0006141594469239521, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.2389, |
|
"grad_norm": 1.0001721382141113, |
|
"learning_rate": 0.0006132843265949069, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.397, |
|
"grad_norm": 1.8651808500289917, |
|
"learning_rate": 0.0006124092062658617, |
|
"epoch": 1.16 |
|
}, |
|
{ |
|
"loss": 8.003, |
|
"grad_norm": 0.947693407535553, |
|
"learning_rate": 0.0006115340859368163, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 7.5861, |
|
"grad_norm": 1.1168384552001953, |
|
"learning_rate": 0.0006106589656077711, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.7788, |
|
"grad_norm": 1.1341112852096558, |
|
"learning_rate": 0.0006097838452787258, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 7.9428, |
|
"grad_norm": 1.2905473709106445, |
|
"learning_rate": 0.0006089087249496806, |
|
"epoch": 1.17 |
|
}, |
|
{ |
|
"loss": 8.6196, |
|
"grad_norm": 0.9961435794830322, |
|
"learning_rate": 0.0006080336046206354, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.224, |
|
"grad_norm": 1.3134316205978394, |
|
"learning_rate": 0.0006071584842915901, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 7.9156, |
|
"grad_norm": 1.5898418426513672, |
|
"learning_rate": 0.0006062833639625449, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 8.2147, |
|
"grad_norm": 0.99250727891922, |
|
"learning_rate": 0.0006054082436334996, |
|
"epoch": 1.18 |
|
}, |
|
{ |
|
"loss": 7.6957, |
|
"grad_norm": 1.2642431259155273, |
|
"learning_rate": 0.0006045331233044544, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 7.7926, |
|
"grad_norm": 1.314082384109497, |
|
"learning_rate": 0.0006036580029754092, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 7.9682, |
|
"grad_norm": 1.1342573165893555, |
|
"learning_rate": 0.0006027828826463639, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.0208, |
|
"grad_norm": 1.3015680313110352, |
|
"learning_rate": 0.0006019077623173187, |
|
"epoch": 1.19 |
|
}, |
|
{ |
|
"loss": 8.3608, |
|
"grad_norm": 0.9990431666374207, |
|
"learning_rate": 0.0006010326419882735, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.2009, |
|
"grad_norm": 0.9804344773292542, |
|
"learning_rate": 0.0006001575216592282, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.0484, |
|
"grad_norm": 1.1591954231262207, |
|
"learning_rate": 0.0005992824013301829, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 8.116, |
|
"grad_norm": 1.042474627494812, |
|
"learning_rate": 0.0005984072810011376, |
|
"epoch": 1.2 |
|
}, |
|
{ |
|
"loss": 7.9246, |
|
"grad_norm": 1.8579179048538208, |
|
"learning_rate": 0.0005975321606720924, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.9183, |
|
"grad_norm": 0.8727061748504639, |
|
"learning_rate": 0.0005966570403430472, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.675, |
|
"grad_norm": 1.0189380645751953, |
|
"learning_rate": 0.0005957819200140019, |
|
"epoch": 1.21 |
|
}, |
|
{ |
|
"loss": 7.6222, |
|
"grad_norm": 1.0766206979751587, |
|
"learning_rate": 0.0005949067996849567, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 7.6455, |
|
"grad_norm": 1.121745228767395, |
|
"learning_rate": 0.0005940316793559114, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.1449, |
|
"grad_norm": 1.2497507333755493, |
|
"learning_rate": 0.0005931565590268662, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.3586, |
|
"grad_norm": 1.301903486251831, |
|
"learning_rate": 0.000592281438697821, |
|
"epoch": 1.22 |
|
}, |
|
{ |
|
"loss": 8.163, |
|
"grad_norm": 1.1964079141616821, |
|
"learning_rate": 0.0005914063183687757, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.2938, |
|
"grad_norm": 1.1423827409744263, |
|
"learning_rate": 0.0005905311980397304, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.165, |
|
"grad_norm": 1.119884967803955, |
|
"learning_rate": 0.0005896560777106851, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 7.7234, |
|
"grad_norm": 1.4375518560409546, |
|
"learning_rate": 0.0005887809573816399, |
|
"epoch": 1.23 |
|
}, |
|
{ |
|
"loss": 8.0758, |
|
"grad_norm": 1.1417185068130493, |
|
"learning_rate": 0.0005879058370525947, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 7.9137, |
|
"grad_norm": 1.048060417175293, |
|
"learning_rate": 0.0005870307167235494, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.4029, |
|
"grad_norm": 0.9880658388137817, |
|
"learning_rate": 0.0005861555963945042, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.4489, |
|
"grad_norm": 1.000611424446106, |
|
"learning_rate": 0.000585280476065459, |
|
"epoch": 1.24 |
|
}, |
|
{ |
|
"loss": 8.2688, |
|
"grad_norm": 1.3099920749664307, |
|
"learning_rate": 0.0005844053557364137, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 7.7948, |
|
"grad_norm": 0.8548302054405212, |
|
"learning_rate": 0.0005835302354073685, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.442, |
|
"grad_norm": 1.1732860803604126, |
|
"learning_rate": 0.0005826551150783232, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 7.6346, |
|
"grad_norm": 0.803125262260437, |
|
"learning_rate": 0.000581779994749278, |
|
"epoch": 1.25 |
|
}, |
|
{ |
|
"loss": 8.0567, |
|
"grad_norm": 1.258419156074524, |
|
"learning_rate": 0.0005809048744202328, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.1142, |
|
"grad_norm": 1.1331418752670288, |
|
"learning_rate": 0.0005800297540911875, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 8.5457, |
|
"grad_norm": 1.5619804859161377, |
|
"learning_rate": 0.0005791546337621423, |
|
"epoch": 1.26 |
|
}, |
|
{ |
|
"loss": 7.9416, |
|
"grad_norm": 1.880534052848816, |
|
"learning_rate": 0.000578279513433097, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.8216, |
|
"grad_norm": 1.2279471158981323, |
|
"learning_rate": 0.0005774043931040518, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.8216, |
|
"grad_norm": 1.1597974300384521, |
|
"learning_rate": 0.0005765292727750066, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.9033, |
|
"grad_norm": 1.1710484027862549, |
|
"learning_rate": 0.0005756541524459613, |
|
"epoch": 1.27 |
|
}, |
|
{ |
|
"loss": 7.6036, |
|
"grad_norm": 1.0655231475830078, |
|
"learning_rate": 0.0005747790321169161, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.5982, |
|
"grad_norm": 1.0066710710525513, |
|
"learning_rate": 0.0005739039117878709, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.738, |
|
"grad_norm": 1.1333460807800293, |
|
"learning_rate": 0.0005730287914588256, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 8.0025, |
|
"grad_norm": 1.468841791152954, |
|
"learning_rate": 0.0005721536711297804, |
|
"epoch": 1.28 |
|
}, |
|
{ |
|
"loss": 7.4888, |
|
"grad_norm": 1.1363178491592407, |
|
"learning_rate": 0.0005712785508007351, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 7.3176, |
|
"grad_norm": 1.1589970588684082, |
|
"learning_rate": 0.0005704034304716899, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 7.6323, |
|
"grad_norm": 0.9033693075180054, |
|
"learning_rate": 0.0005695283101426447, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 7.8839, |
|
"grad_norm": 1.2384039163589478, |
|
"learning_rate": 0.0005686531898135993, |
|
"epoch": 1.29 |
|
}, |
|
{ |
|
"loss": 7.8408, |
|
"grad_norm": 1.3826912641525269, |
|
"learning_rate": 0.0005677780694845541, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 7.4433, |
|
"grad_norm": 1.1403487920761108, |
|
"learning_rate": 0.0005669029491555088, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.5407, |
|
"grad_norm": 1.037423014640808, |
|
"learning_rate": 0.0005660278288264636, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 8.0943, |
|
"grad_norm": 1.4421013593673706, |
|
"learning_rate": 0.0005651527084974184, |
|
"epoch": 1.3 |
|
}, |
|
{ |
|
"loss": 7.7771, |
|
"grad_norm": 1.2977713346481323, |
|
"learning_rate": 0.0005642775881683731, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 7.54, |
|
"grad_norm": 1.049196720123291, |
|
"learning_rate": 0.0005634024678393279, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 7.4699, |
|
"grad_norm": 1.0489652156829834, |
|
"learning_rate": 0.0005625273475102827, |
|
"epoch": 1.31 |
|
}, |
|
{ |
|
"loss": 7.9441, |
|
"grad_norm": 1.1373968124389648, |
|
"learning_rate": 0.0005616522271812374, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 7.2627, |
|
"grad_norm": 1.0570902824401855, |
|
"learning_rate": 0.0005607771068521922, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 7.7472, |
|
"grad_norm": 1.0547776222229004, |
|
"learning_rate": 0.0005599019865231469, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 7.8815, |
|
"grad_norm": 1.2481534481048584, |
|
"learning_rate": 0.0005590268661941017, |
|
"epoch": 1.32 |
|
}, |
|
{ |
|
"loss": 8.2547, |
|
"grad_norm": 1.1728442907333374, |
|
"learning_rate": 0.0005581517458650565, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.5035, |
|
"grad_norm": 1.0567808151245117, |
|
"learning_rate": 0.0005572766255360112, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.9982, |
|
"grad_norm": 0.8234537243843079, |
|
"learning_rate": 0.000556401505206966, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.5333, |
|
"grad_norm": 1.09587824344635, |
|
"learning_rate": 0.0005555263848779207, |
|
"epoch": 1.33 |
|
}, |
|
{ |
|
"loss": 7.768, |
|
"grad_norm": 1.3897008895874023, |
|
"learning_rate": 0.0005546512645488755, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7645, |
|
"grad_norm": 1.1089082956314087, |
|
"learning_rate": 0.0005537761442198303, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7809, |
|
"grad_norm": 1.2678576707839966, |
|
"learning_rate": 0.000552901023890785, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 7.7376, |
|
"grad_norm": 1.3946635723114014, |
|
"learning_rate": 0.0005520259035617398, |
|
"epoch": 1.34 |
|
}, |
|
{ |
|
"loss": 8.2773, |
|
"grad_norm": 1.3742512464523315, |
|
"learning_rate": 0.0005511507832326946, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.7902, |
|
"grad_norm": 1.416434645652771, |
|
"learning_rate": 0.0005502756629036493, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.6157, |
|
"grad_norm": 1.0419012308120728, |
|
"learning_rate": 0.0005494005425746041, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 7.5897, |
|
"grad_norm": 1.7180145978927612, |
|
"learning_rate": 0.0005485254222455588, |
|
"epoch": 1.35 |
|
}, |
|
{ |
|
"loss": 8.0068, |
|
"grad_norm": 1.6651771068572998, |
|
"learning_rate": 0.0005476503019165136, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 7.4023, |
|
"grad_norm": 1.0715596675872803, |
|
"learning_rate": 0.0005467751815874683, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.0369, |
|
"grad_norm": 1.208898901939392, |
|
"learning_rate": 0.000545900061258423, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 7.6188, |
|
"grad_norm": 0.9920070767402649, |
|
"learning_rate": 0.0005450249409293778, |
|
"epoch": 1.36 |
|
}, |
|
{ |
|
"loss": 8.6854, |
|
"grad_norm": 1.174086570739746, |
|
"learning_rate": 0.0005441498206003325, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 7.5733, |
|
"grad_norm": 1.244912028312683, |
|
"learning_rate": 0.0005432747002712873, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 7.389, |
|
"grad_norm": 1.5966273546218872, |
|
"learning_rate": 0.0005423995799422421, |
|
"epoch": 1.37 |
|
}, |
|
{ |
|
"loss": 8.1756, |
|
"grad_norm": 1.0320965051651, |
|
"learning_rate": 0.0005415244596131968, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.897, |
|
"grad_norm": 1.2478450536727905, |
|
"learning_rate": 0.0005406493392841516, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 7.6083, |
|
"grad_norm": 1.4347364902496338, |
|
"learning_rate": 0.0005397742189551064, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 7.9916, |
|
"grad_norm": 1.1878119707107544, |
|
"learning_rate": 0.0005388990986260611, |
|
"epoch": 1.38 |
|
}, |
|
{ |
|
"loss": 8.1032, |
|
"grad_norm": 1.3169543743133545, |
|
"learning_rate": 0.0005380239782970159, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.3094, |
|
"grad_norm": 1.271192193031311, |
|
"learning_rate": 0.0005371488579679706, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.2947, |
|
"grad_norm": 1.484824299812317, |
|
"learning_rate": 0.0005362737376389254, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.7483, |
|
"grad_norm": 1.0237884521484375, |
|
"learning_rate": 0.0005353986173098802, |
|
"epoch": 1.39 |
|
}, |
|
{ |
|
"loss": 7.7284, |
|
"grad_norm": 1.141897201538086, |
|
"learning_rate": 0.0005345234969808349, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.9684, |
|
"grad_norm": 1.2076783180236816, |
|
"learning_rate": 0.0005336483766517897, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.4731, |
|
"grad_norm": 1.0815685987472534, |
|
"learning_rate": 0.0005327732563227444, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 7.6468, |
|
"grad_norm": 1.9115163087844849, |
|
"learning_rate": 0.0005318981359936992, |
|
"epoch": 1.4 |
|
}, |
|
{ |
|
"loss": 8.179, |
|
"grad_norm": 1.1872133016586304, |
|
"learning_rate": 0.000531023015664654, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 8.1254, |
|
"grad_norm": 1.144726037979126, |
|
"learning_rate": 0.0005301478953356087, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.7947, |
|
"grad_norm": 1.562495231628418, |
|
"learning_rate": 0.0005292727750065635, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.2917, |
|
"grad_norm": 1.20420241355896, |
|
"learning_rate": 0.0005283976546775183, |
|
"epoch": 1.41 |
|
}, |
|
{ |
|
"loss": 7.9956, |
|
"grad_norm": 1.0302613973617554, |
|
"learning_rate": 0.000527522534348473, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 7.8058, |
|
"grad_norm": 1.161452293395996, |
|
"learning_rate": 0.0005266474140194278, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 8.2652, |
|
"grad_norm": 1.2876991033554077, |
|
"learning_rate": 0.0005257722936903825, |
|
"epoch": 1.42 |
|
}, |
|
{ |
|
"loss": 8.0375, |
|
"grad_norm": 1.1002925634384155, |
|
"learning_rate": 0.0005248971733613372, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 7.82, |
|
"grad_norm": 1.0201154947280884, |
|
"learning_rate": 0.000524022053032292, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.3203, |
|
"grad_norm": 1.1177037954330444, |
|
"learning_rate": 0.0005231469327032467, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 7.9789, |
|
"grad_norm": 1.4295682907104492, |
|
"learning_rate": 0.0005222718123742015, |
|
"epoch": 1.43 |
|
}, |
|
{ |
|
"loss": 8.0088, |
|
"grad_norm": 1.4420737028121948, |
|
"learning_rate": 0.0005213966920451562, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.8298, |
|
"grad_norm": 1.1020231246948242, |
|
"learning_rate": 0.000520521571716111, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.8801, |
|
"grad_norm": 1.4339189529418945, |
|
"learning_rate": 0.0005196464513870657, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 7.6756, |
|
"grad_norm": 1.5243607759475708, |
|
"learning_rate": 0.0005187713310580204, |
|
"epoch": 1.44 |
|
}, |
|
{ |
|
"loss": 8.1007, |
|
"grad_norm": 0.9880979657173157, |
|
"learning_rate": 0.0005178962107289752, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.7396, |
|
"grad_norm": 1.1447367668151855, |
|
"learning_rate": 0.0005170210903999299, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.8537, |
|
"grad_norm": 1.384048342704773, |
|
"learning_rate": 0.0005161459700708847, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.8855, |
|
"grad_norm": 1.3757721185684204, |
|
"learning_rate": 0.0005152708497418395, |
|
"epoch": 1.45 |
|
}, |
|
{ |
|
"loss": 7.8651, |
|
"grad_norm": 1.1160024404525757, |
|
"learning_rate": 0.0005143957294127942, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.8378, |
|
"grad_norm": 0.9774546027183533, |
|
"learning_rate": 0.000513520609083749, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.9251, |
|
"grad_norm": 1.5181477069854736, |
|
"learning_rate": 0.0005126454887547038, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 8.6781, |
|
"grad_norm": 1.203229308128357, |
|
"learning_rate": 0.0005117703684256585, |
|
"epoch": 1.46 |
|
}, |
|
{ |
|
"loss": 7.6571, |
|
"grad_norm": 1.0401496887207031, |
|
"learning_rate": 0.0005108952480966133, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 7.3908, |
|
"grad_norm": 1.3228225708007812, |
|
"learning_rate": 0.000510020127767568, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 8.1244, |
|
"grad_norm": 1.3072296380996704, |
|
"learning_rate": 0.0005091450074385228, |
|
"epoch": 1.47 |
|
}, |
|
{ |
|
"loss": 7.7535, |
|
"grad_norm": 1.9105629920959473, |
|
"learning_rate": 0.0005082698871094776, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.2387, |
|
"grad_norm": 1.3035160303115845, |
|
"learning_rate": 0.0005073947667804323, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 7.998, |
|
"grad_norm": 0.9805745482444763, |
|
"learning_rate": 0.0005065196464513871, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.0499, |
|
"grad_norm": 1.28218412399292, |
|
"learning_rate": 0.0005056445261223418, |
|
"epoch": 1.48 |
|
}, |
|
{ |
|
"loss": 8.0939, |
|
"grad_norm": 1.289697527885437, |
|
"learning_rate": 0.0005047694057932966, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.8801, |
|
"grad_norm": 1.3982206583023071, |
|
"learning_rate": 0.0005038942854642513, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.5012, |
|
"grad_norm": 1.1884011030197144, |
|
"learning_rate": 0.000503019165135206, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 7.7792, |
|
"grad_norm": 1.2014328241348267, |
|
"learning_rate": 0.0005021440448061608, |
|
"epoch": 1.49 |
|
}, |
|
{ |
|
"loss": 8.3151, |
|
"grad_norm": 1.2958098649978638, |
|
"learning_rate": 0.0005012689244771155, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 7.3702, |
|
"grad_norm": 1.1195346117019653, |
|
"learning_rate": 0.0005003938041480703, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 8.0952, |
|
"grad_norm": 1.2185337543487549, |
|
"learning_rate": 0.0004995186838190251, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 7.6605, |
|
"grad_norm": 1.1054099798202515, |
|
"learning_rate": 0.0004986435634899798, |
|
"epoch": 1.5 |
|
}, |
|
{ |
|
"loss": 7.8926, |
|
"grad_norm": 1.3183029890060425, |
|
"learning_rate": 0.0004977684431609346, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.8356, |
|
"grad_norm": 1.3786067962646484, |
|
"learning_rate": 0.0004968933228318894, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.7605, |
|
"grad_norm": 1.3373888731002808, |
|
"learning_rate": 0.0004960182025028441, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 7.9272, |
|
"grad_norm": 1.5524091720581055, |
|
"learning_rate": 0.0004951430821737989, |
|
"epoch": 1.51 |
|
}, |
|
{ |
|
"loss": 8.1264, |
|
"grad_norm": 0.927689790725708, |
|
"learning_rate": 0.0004942679618447536, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.1456, |
|
"grad_norm": 1.4429559707641602, |
|
"learning_rate": 0.0004933928415157084, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.5349, |
|
"grad_norm": 1.17830228805542, |
|
"learning_rate": 0.0004925177211866632, |
|
"epoch": 1.52 |
|
}, |
|
{ |
|
"loss": 8.4138, |
|
"grad_norm": 1.7398778200149536, |
|
"learning_rate": 0.0004916426008576179, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.6329, |
|
"grad_norm": 1.101945161819458, |
|
"learning_rate": 0.0004907674805285727, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 8.2694, |
|
"grad_norm": 1.2424931526184082, |
|
"learning_rate": 0.0004898923601995274, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.2639, |
|
"grad_norm": 0.8726850748062134, |
|
"learning_rate": 0.0004890172398704822, |
|
"epoch": 1.53 |
|
}, |
|
{ |
|
"loss": 7.5542, |
|
"grad_norm": 1.020978331565857, |
|
"learning_rate": 0.0004881421195414369, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.3334, |
|
"grad_norm": 1.058136224746704, |
|
"learning_rate": 0.0004872669992123917, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.6285, |
|
"grad_norm": 1.7856310606002808, |
|
"learning_rate": 0.00048639187888334644, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.8873, |
|
"grad_norm": 1.1540299654006958, |
|
"learning_rate": 0.0004855167585543012, |
|
"epoch": 1.54 |
|
}, |
|
{ |
|
"loss": 7.5676, |
|
"grad_norm": 1.4844547510147095, |
|
"learning_rate": 0.00048464163822525597, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0284, |
|
"grad_norm": 1.1018364429473877, |
|
"learning_rate": 0.00048376651789621073, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 7.8478, |
|
"grad_norm": 1.4421080350875854, |
|
"learning_rate": 0.0004828913975671655, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 8.0614, |
|
"grad_norm": 1.322413444519043, |
|
"learning_rate": 0.00048201627723812025, |
|
"epoch": 1.55 |
|
}, |
|
{ |
|
"loss": 7.9015, |
|
"grad_norm": 1.1930081844329834, |
|
"learning_rate": 0.000481141156909075, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.843, |
|
"grad_norm": 1.2846688032150269, |
|
"learning_rate": 0.0004802660365800298, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.8268, |
|
"grad_norm": 2.0413529872894287, |
|
"learning_rate": 0.00047939091625098454, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.3241, |
|
"grad_norm": 1.058362364768982, |
|
"learning_rate": 0.0004785157959219393, |
|
"epoch": 1.56 |
|
}, |
|
{ |
|
"loss": 7.8329, |
|
"grad_norm": 1.725417971611023, |
|
"learning_rate": 0.00047764067559289406, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.6295, |
|
"grad_norm": 1.1373404264450073, |
|
"learning_rate": 0.00047676555526384877, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.4763, |
|
"grad_norm": 1.1107378005981445, |
|
"learning_rate": 0.00047589043493480353, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 7.8846, |
|
"grad_norm": 1.2450941801071167, |
|
"learning_rate": 0.0004750153146057583, |
|
"epoch": 1.57 |
|
}, |
|
{ |
|
"loss": 8.4109, |
|
"grad_norm": 1.0643541812896729, |
|
"learning_rate": 0.00047414019427671305, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.9126, |
|
"grad_norm": 1.2940372228622437, |
|
"learning_rate": 0.0004732650739476678, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.7132, |
|
"grad_norm": 2.6067655086517334, |
|
"learning_rate": 0.0004723899536186226, |
|
"epoch": 1.58 |
|
}, |
|
{ |
|
"loss": 7.5708, |
|
"grad_norm": 0.9783037304878235, |
|
"learning_rate": 0.00047151483328957734, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.2771, |
|
"grad_norm": 1.037582278251648, |
|
"learning_rate": 0.0004706397129605321, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.7599, |
|
"grad_norm": 1.0178707838058472, |
|
"learning_rate": 0.00046976459263148686, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.1538, |
|
"grad_norm": 1.558307409286499, |
|
"learning_rate": 0.0004688894723024416, |
|
"epoch": 1.59 |
|
}, |
|
{ |
|
"loss": 7.5229, |
|
"grad_norm": 1.1060800552368164, |
|
"learning_rate": 0.0004680143519733964, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.5813, |
|
"grad_norm": 1.8988709449768066, |
|
"learning_rate": 0.00046713923164435115, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.8319, |
|
"grad_norm": 1.6066781282424927, |
|
"learning_rate": 0.00046626411131530586, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.8222, |
|
"grad_norm": 1.4711729288101196, |
|
"learning_rate": 0.0004653889909862606, |
|
"epoch": 1.6 |
|
}, |
|
{ |
|
"loss": 7.6115, |
|
"grad_norm": 1.3585811853408813, |
|
"learning_rate": 0.0004645138706572154, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.7618, |
|
"grad_norm": 1.1487444639205933, |
|
"learning_rate": 0.00046363875032817014, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.868, |
|
"grad_norm": 1.4386248588562012, |
|
"learning_rate": 0.0004627636299991249, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 7.7931, |
|
"grad_norm": 1.0714224576950073, |
|
"learning_rate": 0.00046188850967007967, |
|
"epoch": 1.61 |
|
}, |
|
{ |
|
"loss": 8.2688, |
|
"grad_norm": 1.6375863552093506, |
|
"learning_rate": 0.00046101338934103443, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.621, |
|
"grad_norm": 1.024120807647705, |
|
"learning_rate": 0.0004601382690119892, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 8.2226, |
|
"grad_norm": 1.2234493494033813, |
|
"learning_rate": 0.0004592631486829439, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.596, |
|
"grad_norm": 1.0593066215515137, |
|
"learning_rate": 0.00045838802835389866, |
|
"epoch": 1.62 |
|
}, |
|
{ |
|
"loss": 7.7407, |
|
"grad_norm": 1.2529680728912354, |
|
"learning_rate": 0.0004575129080248534, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 7.221, |
|
"grad_norm": 1.1312929391860962, |
|
"learning_rate": 0.0004566377876958082, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 7.6136, |
|
"grad_norm": 1.4004312753677368, |
|
"learning_rate": 0.00045576266736676294, |
|
"epoch": 1.63 |
|
}, |
|
{ |
|
"loss": 7.9718, |
|
"grad_norm": 1.4514151811599731, |
|
"learning_rate": 0.00045488754703771765, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.3337, |
|
"grad_norm": 1.1595350503921509, |
|
"learning_rate": 0.0004540124267086724, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.7414, |
|
"grad_norm": 1.1403205394744873, |
|
"learning_rate": 0.0004531373063796272, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 7.7323, |
|
"grad_norm": 1.677051305770874, |
|
"learning_rate": 0.00045226218605058194, |
|
"epoch": 1.64 |
|
}, |
|
{ |
|
"loss": 8.0048, |
|
"grad_norm": 1.338146686553955, |
|
"learning_rate": 0.0004513870657215367, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.9544, |
|
"grad_norm": 1.0941588878631592, |
|
"learning_rate": 0.00045051194539249146, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 8.1043, |
|
"grad_norm": 1.224746584892273, |
|
"learning_rate": 0.0004496368250634462, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 8.0849, |
|
"grad_norm": 1.5772489309310913, |
|
"learning_rate": 0.000448761704734401, |
|
"epoch": 1.65 |
|
}, |
|
{ |
|
"loss": 7.3165, |
|
"grad_norm": 1.4434912204742432, |
|
"learning_rate": 0.00044788658440535575, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.8826, |
|
"grad_norm": 0.9971029162406921, |
|
"learning_rate": 0.0004470114640763105, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.7822, |
|
"grad_norm": 1.061712384223938, |
|
"learning_rate": 0.00044613634374726527, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.8387, |
|
"grad_norm": 1.6292518377304077, |
|
"learning_rate": 0.00044526122341822003, |
|
"epoch": 1.66 |
|
}, |
|
{ |
|
"loss": 7.3463, |
|
"grad_norm": 1.0507898330688477, |
|
"learning_rate": 0.00044438610308917474, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.693, |
|
"grad_norm": 1.332474708557129, |
|
"learning_rate": 0.0004435109827601295, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.4542, |
|
"grad_norm": 1.3393101692199707, |
|
"learning_rate": 0.00044263586243108426, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.4236, |
|
"grad_norm": 1.4949504137039185, |
|
"learning_rate": 0.000441760742102039, |
|
"epoch": 1.67 |
|
}, |
|
{ |
|
"loss": 7.4087, |
|
"grad_norm": 1.3824454545974731, |
|
"learning_rate": 0.0004408856217729938, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.3, |
|
"grad_norm": 1.3991942405700684, |
|
"learning_rate": 0.00044001050144394855, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.1648, |
|
"grad_norm": 1.3270092010498047, |
|
"learning_rate": 0.0004391353811149033, |
|
"epoch": 1.68 |
|
}, |
|
{ |
|
"loss": 7.753, |
|
"grad_norm": 1.1912864446640015, |
|
"learning_rate": 0.00043826026078585807, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.6531, |
|
"grad_norm": 1.2112165689468384, |
|
"learning_rate": 0.00043738514045681283, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 8.0168, |
|
"grad_norm": 1.0204828977584839, |
|
"learning_rate": 0.0004365100201277676, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.8334, |
|
"grad_norm": 1.8065035343170166, |
|
"learning_rate": 0.00043563489979872236, |
|
"epoch": 1.69 |
|
}, |
|
{ |
|
"loss": 7.9395, |
|
"grad_norm": 1.1826367378234863, |
|
"learning_rate": 0.0004347597794696771, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 8.0071, |
|
"grad_norm": 0.9689782857894897, |
|
"learning_rate": 0.00043388465914063183, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.5284, |
|
"grad_norm": 0.9889323115348816, |
|
"learning_rate": 0.0004330095388115866, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.7318, |
|
"grad_norm": 1.4257516860961914, |
|
"learning_rate": 0.00043213441848254135, |
|
"epoch": 1.7 |
|
}, |
|
{ |
|
"loss": 7.6343, |
|
"grad_norm": 1.623134970664978, |
|
"learning_rate": 0.0004312592981534961, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.9645, |
|
"grad_norm": 1.2686361074447632, |
|
"learning_rate": 0.0004303841778244509, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.5339, |
|
"grad_norm": 1.5115247964859009, |
|
"learning_rate": 0.00042950905749540564, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.7401, |
|
"grad_norm": 1.285506010055542, |
|
"learning_rate": 0.0004286339371663604, |
|
"epoch": 1.71 |
|
}, |
|
{ |
|
"loss": 7.6018, |
|
"grad_norm": 1.4150651693344116, |
|
"learning_rate": 0.00042775881683731516, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.4015, |
|
"grad_norm": 1.485231637954712, |
|
"learning_rate": 0.0004268836965082699, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 8.4429, |
|
"grad_norm": 2.1629021167755127, |
|
"learning_rate": 0.0004260085761792247, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.8298, |
|
"grad_norm": 1.1586624383926392, |
|
"learning_rate": 0.00042513345585017945, |
|
"epoch": 1.72 |
|
}, |
|
{ |
|
"loss": 7.8121, |
|
"grad_norm": 1.0134670734405518, |
|
"learning_rate": 0.0004242583355211342, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.8337, |
|
"grad_norm": 1.257633090019226, |
|
"learning_rate": 0.00042338321519208897, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.6701, |
|
"grad_norm": 1.212266445159912, |
|
"learning_rate": 0.0004225080948630437, |
|
"epoch": 1.73 |
|
}, |
|
{ |
|
"loss": 7.706, |
|
"grad_norm": 1.2191237211227417, |
|
"learning_rate": 0.00042163297453399844, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.4639, |
|
"grad_norm": 1.476140022277832, |
|
"learning_rate": 0.0004207578542049532, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.8126, |
|
"grad_norm": 1.0655369758605957, |
|
"learning_rate": 0.0004198827338759079, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.4091, |
|
"grad_norm": 1.3340696096420288, |
|
"learning_rate": 0.00041900761354686267, |
|
"epoch": 1.74 |
|
}, |
|
{ |
|
"loss": 7.5701, |
|
"grad_norm": 1.3290128707885742, |
|
"learning_rate": 0.00041813249321781743, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.5513, |
|
"grad_norm": 1.1993497610092163, |
|
"learning_rate": 0.0004172573728887722, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.5115, |
|
"grad_norm": 0.9953559041023254, |
|
"learning_rate": 0.00041638225255972696, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 8.0513, |
|
"grad_norm": 1.1929738521575928, |
|
"learning_rate": 0.0004155071322306817, |
|
"epoch": 1.75 |
|
}, |
|
{ |
|
"loss": 7.4431, |
|
"grad_norm": 1.0211223363876343, |
|
"learning_rate": 0.0004146320119016365, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.4024, |
|
"grad_norm": 1.0484708547592163, |
|
"learning_rate": 0.00041375689157259124, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.4321, |
|
"grad_norm": 1.2012499570846558, |
|
"learning_rate": 0.000412881771243546, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.2608, |
|
"grad_norm": 0.9850478768348694, |
|
"learning_rate": 0.0004120066509145007, |
|
"epoch": 1.76 |
|
}, |
|
{ |
|
"loss": 7.4744, |
|
"grad_norm": 1.1142171621322632, |
|
"learning_rate": 0.00041113153058545547, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.8258, |
|
"grad_norm": 1.0107368230819702, |
|
"learning_rate": 0.00041025641025641023, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 8.0338, |
|
"grad_norm": 1.3827756643295288, |
|
"learning_rate": 0.000409381289927365, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.2029, |
|
"grad_norm": 1.056078553199768, |
|
"learning_rate": 0.00040850616959831976, |
|
"epoch": 1.77 |
|
}, |
|
{ |
|
"loss": 7.9763, |
|
"grad_norm": 1.3796826601028442, |
|
"learning_rate": 0.0004076310492692745, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.39, |
|
"grad_norm": 1.5586506128311157, |
|
"learning_rate": 0.0004067559289402293, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.7479, |
|
"grad_norm": 1.3467471599578857, |
|
"learning_rate": 0.00040588080861118404, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.5281, |
|
"grad_norm": 1.5824648141860962, |
|
"learning_rate": 0.0004050056882821388, |
|
"epoch": 1.78 |
|
}, |
|
{ |
|
"loss": 7.4095, |
|
"grad_norm": 1.5600448846817017, |
|
"learning_rate": 0.00040413056795309357, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.6296, |
|
"grad_norm": 1.4003773927688599, |
|
"learning_rate": 0.00040325544762404833, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.2299, |
|
"grad_norm": 1.1784484386444092, |
|
"learning_rate": 0.0004023803272950031, |
|
"epoch": 1.79 |
|
}, |
|
{ |
|
"loss": 7.2215, |
|
"grad_norm": 1.0865730047225952, |
|
"learning_rate": 0.0004015052069659578, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.619, |
|
"grad_norm": 1.3708497285842896, |
|
"learning_rate": 0.00040063008663691256, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.6305, |
|
"grad_norm": 1.3728278875350952, |
|
"learning_rate": 0.0003997549663078673, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.4218, |
|
"grad_norm": 1.385901689529419, |
|
"learning_rate": 0.0003988798459788221, |
|
"epoch": 1.8 |
|
}, |
|
{ |
|
"loss": 7.7959, |
|
"grad_norm": 1.5370672941207886, |
|
"learning_rate": 0.00039800472564977685, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.8249, |
|
"grad_norm": 1.039469838142395, |
|
"learning_rate": 0.0003971296053207316, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.4311, |
|
"grad_norm": 1.4947952032089233, |
|
"learning_rate": 0.00039625448499168637, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.7797, |
|
"grad_norm": 1.2262136936187744, |
|
"learning_rate": 0.00039546687669554567, |
|
"epoch": 1.81 |
|
}, |
|
{ |
|
"loss": 7.8844, |
|
"grad_norm": 1.5757509469985962, |
|
"learning_rate": 0.00039459175636650043, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.8737, |
|
"grad_norm": 1.2183258533477783, |
|
"learning_rate": 0.0003937166360374552, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.3515, |
|
"grad_norm": 1.3697617053985596, |
|
"learning_rate": 0.00039284151570840995, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.1169, |
|
"grad_norm": 1.3007692098617554, |
|
"learning_rate": 0.00039196639537936466, |
|
"epoch": 1.82 |
|
}, |
|
{ |
|
"loss": 7.2926, |
|
"grad_norm": 1.3538720607757568, |
|
"learning_rate": 0.0003910912750503194, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.8445, |
|
"grad_norm": 1.4245976209640503, |
|
"learning_rate": 0.0003902161547212742, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.456, |
|
"grad_norm": 1.323899269104004, |
|
"learning_rate": 0.00038934103439222894, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.6163, |
|
"grad_norm": 1.2635420560836792, |
|
"learning_rate": 0.0003884659140631837, |
|
"epoch": 1.83 |
|
}, |
|
{ |
|
"loss": 7.5885, |
|
"grad_norm": 1.4714936017990112, |
|
"learning_rate": 0.00038759079373413847, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.8382, |
|
"grad_norm": 1.1696442365646362, |
|
"learning_rate": 0.00038671567340509323, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.4885, |
|
"grad_norm": 1.3797491788864136, |
|
"learning_rate": 0.000385840553076048, |
|
"epoch": 1.84 |
|
}, |
|
{ |
|
"loss": 7.4614, |
|
"grad_norm": 1.0410481691360474, |
|
"learning_rate": 0.00038496543274700275, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 6.9584, |
|
"grad_norm": 1.7356559038162231, |
|
"learning_rate": 0.0003840903124179575, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.8161, |
|
"grad_norm": 1.326489806175232, |
|
"learning_rate": 0.0003832151920889123, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.6985, |
|
"grad_norm": 1.3822075128555298, |
|
"learning_rate": 0.00038234007175986704, |
|
"epoch": 1.85 |
|
}, |
|
{ |
|
"loss": 7.9532, |
|
"grad_norm": 1.2612171173095703, |
|
"learning_rate": 0.00038146495143082175, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.3309, |
|
"grad_norm": 1.8743207454681396, |
|
"learning_rate": 0.0003805898311017765, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.8573, |
|
"grad_norm": 1.515641212463379, |
|
"learning_rate": 0.0003797147107727312, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 8.0815, |
|
"grad_norm": 1.970818281173706, |
|
"learning_rate": 0.000378839590443686, |
|
"epoch": 1.86 |
|
}, |
|
{ |
|
"loss": 7.7197, |
|
"grad_norm": 1.6418136358261108, |
|
"learning_rate": 0.00037796447011464074, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.6527, |
|
"grad_norm": 1.3693944215774536, |
|
"learning_rate": 0.0003770893497855955, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.7717, |
|
"grad_norm": 1.311493992805481, |
|
"learning_rate": 0.00037621422945655026, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 8.0735, |
|
"grad_norm": 1.593992829322815, |
|
"learning_rate": 0.000375339109127505, |
|
"epoch": 1.87 |
|
}, |
|
{ |
|
"loss": 7.4285, |
|
"grad_norm": 1.212729573249817, |
|
"learning_rate": 0.0003744639887984598, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.7873, |
|
"grad_norm": 1.1326895952224731, |
|
"learning_rate": 0.00037358886846941455, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.3515, |
|
"grad_norm": 1.3937299251556396, |
|
"learning_rate": 0.0003727137481403693, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.353, |
|
"grad_norm": 1.5152568817138672, |
|
"learning_rate": 0.00037183862781132407, |
|
"epoch": 1.88 |
|
}, |
|
{ |
|
"loss": 7.8015, |
|
"grad_norm": 1.207973599433899, |
|
"learning_rate": 0.0003709635074822788, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.6713, |
|
"grad_norm": 1.003139615058899, |
|
"learning_rate": 0.00037008838715323354, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.9247, |
|
"grad_norm": 1.1870025396347046, |
|
"learning_rate": 0.0003692132668241883, |
|
"epoch": 1.89 |
|
}, |
|
{ |
|
"loss": 7.4496, |
|
"grad_norm": 1.237275242805481, |
|
"learning_rate": 0.00036833814649514307, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.2638, |
|
"grad_norm": 1.7287304401397705, |
|
"learning_rate": 0.00036746302616609783, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.7464, |
|
"grad_norm": 1.5875813961029053, |
|
"learning_rate": 0.0003665879058370526, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.683, |
|
"grad_norm": 1.7219480276107788, |
|
"learning_rate": 0.00036571278550800735, |
|
"epoch": 1.9 |
|
}, |
|
{ |
|
"loss": 7.6059, |
|
"grad_norm": 1.3815206289291382, |
|
"learning_rate": 0.0003648376651789621, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.3258, |
|
"grad_norm": 1.1902978420257568, |
|
"learning_rate": 0.0003639625448499169, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.4436, |
|
"grad_norm": 1.6532816886901855, |
|
"learning_rate": 0.00036308742452087164, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.438, |
|
"grad_norm": 1.1358212232589722, |
|
"learning_rate": 0.0003622123041918264, |
|
"epoch": 1.91 |
|
}, |
|
{ |
|
"loss": 7.9777, |
|
"grad_norm": 1.3459230661392212, |
|
"learning_rate": 0.00036133718386278116, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.9087, |
|
"grad_norm": 1.0352368354797363, |
|
"learning_rate": 0.0003604620635337359, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.5855, |
|
"grad_norm": 1.2582918405532837, |
|
"learning_rate": 0.00035958694320469063, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.4576, |
|
"grad_norm": 1.1787996292114258, |
|
"learning_rate": 0.0003587118228756454, |
|
"epoch": 1.92 |
|
}, |
|
{ |
|
"loss": 7.2572, |
|
"grad_norm": 1.2917609214782715, |
|
"learning_rate": 0.00035783670254660015, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.6433, |
|
"grad_norm": 1.1689330339431763, |
|
"learning_rate": 0.0003569615822175549, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.6579, |
|
"grad_norm": 1.2844352722167969, |
|
"learning_rate": 0.0003560864618885097, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.5178, |
|
"grad_norm": 1.498838186264038, |
|
"learning_rate": 0.00035521134155946444, |
|
"epoch": 1.93 |
|
}, |
|
{ |
|
"loss": 7.0155, |
|
"grad_norm": 1.3718552589416504, |
|
"learning_rate": 0.0003543362212304192, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.7558, |
|
"grad_norm": 1.2343835830688477, |
|
"learning_rate": 0.00035346110090137396, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.4386, |
|
"grad_norm": 1.307979702949524, |
|
"learning_rate": 0.0003525859805723287, |
|
"epoch": 1.94 |
|
}, |
|
{ |
|
"loss": 7.4287, |
|
"grad_norm": 1.46335768699646, |
|
"learning_rate": 0.0003517108602432835, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.0541, |
|
"grad_norm": 1.4892301559448242, |
|
"learning_rate": 0.00035083573991423825, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.6458, |
|
"grad_norm": 1.3297821283340454, |
|
"learning_rate": 0.000349960619585193, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.3704, |
|
"grad_norm": 1.9190036058425903, |
|
"learning_rate": 0.0003490854992561477, |
|
"epoch": 1.95 |
|
}, |
|
{ |
|
"loss": 7.9292, |
|
"grad_norm": 1.1013009548187256, |
|
"learning_rate": 0.0003482103789271025, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.8039, |
|
"grad_norm": 1.284121036529541, |
|
"learning_rate": 0.00034733525859805724, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.7188, |
|
"grad_norm": 1.118995189666748, |
|
"learning_rate": 0.000346460138269012, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.3617, |
|
"grad_norm": 1.5446746349334717, |
|
"learning_rate": 0.00034558501793996676, |
|
"epoch": 1.96 |
|
}, |
|
{ |
|
"loss": 7.5614, |
|
"grad_norm": 1.254835844039917, |
|
"learning_rate": 0.0003447098976109215, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.9923, |
|
"grad_norm": 2.215224266052246, |
|
"learning_rate": 0.0003438347772818763, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.6609, |
|
"grad_norm": 1.2917975187301636, |
|
"learning_rate": 0.00034295965695283105, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 6.9695, |
|
"grad_norm": 1.3251945972442627, |
|
"learning_rate": 0.0003420845366237858, |
|
"epoch": 1.97 |
|
}, |
|
{ |
|
"loss": 7.4109, |
|
"grad_norm": 1.5397628545761108, |
|
"learning_rate": 0.0003412094162947406, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.3063, |
|
"grad_norm": 1.1789202690124512, |
|
"learning_rate": 0.00034033429596569534, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.8137, |
|
"grad_norm": 1.6068191528320312, |
|
"learning_rate": 0.00033945917563665004, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.5466, |
|
"grad_norm": 1.2397950887680054, |
|
"learning_rate": 0.00033858405530760475, |
|
"epoch": 1.98 |
|
}, |
|
{ |
|
"loss": 7.9522, |
|
"grad_norm": 1.5175119638442993, |
|
"learning_rate": 0.0003377089349785595, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.6781, |
|
"grad_norm": 1.315258502960205, |
|
"learning_rate": 0.0003368338146495143, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.7292, |
|
"grad_norm": 2.664515256881714, |
|
"learning_rate": 0.00033595869432046904, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 8.1965, |
|
"grad_norm": 1.405129313468933, |
|
"learning_rate": 0.0003350835739914238, |
|
"epoch": 1.99 |
|
}, |
|
{ |
|
"loss": 7.4133, |
|
"grad_norm": 1.0774602890014648, |
|
"learning_rate": 0.00033420845366237856, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 8.1777, |
|
"grad_norm": 1.75553560256958, |
|
"learning_rate": 0.0003333333333333333, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.5693, |
|
"grad_norm": 1.857081651687622, |
|
"learning_rate": 0.0003324582130042881, |
|
"epoch": 2.0 |
|
}, |
|
{ |
|
"loss": 7.4888, |
|
"grad_norm": 1.0721529722213745, |
|
"learning_rate": 0.00033158309267524285, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.1311, |
|
"grad_norm": 1.0766797065734863, |
|
"learning_rate": 0.0003307079723461976, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.5107, |
|
"grad_norm": 1.4615150690078735, |
|
"learning_rate": 0.00032983285201715237, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.5258, |
|
"grad_norm": 1.4252068996429443, |
|
"learning_rate": 0.00032895773168810713, |
|
"epoch": 2.01 |
|
}, |
|
{ |
|
"loss": 7.6049, |
|
"grad_norm": 1.2926585674285889, |
|
"learning_rate": 0.0003280826113590619, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.2436, |
|
"grad_norm": 1.6630724668502808, |
|
"learning_rate": 0.0003272074910300166, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 6.951, |
|
"grad_norm": 1.2705895900726318, |
|
"learning_rate": 0.00032633237070097136, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.4782, |
|
"grad_norm": 1.6801918745040894, |
|
"learning_rate": 0.0003254572503719261, |
|
"epoch": 2.02 |
|
}, |
|
{ |
|
"loss": 7.7247, |
|
"grad_norm": 1.2789455652236938, |
|
"learning_rate": 0.0003245821300428809, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.65, |
|
"grad_norm": 1.0772324800491333, |
|
"learning_rate": 0.00032370700971383565, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.3484, |
|
"grad_norm": 1.218855857849121, |
|
"learning_rate": 0.0003228318893847904, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.7201, |
|
"grad_norm": 1.7484831809997559, |
|
"learning_rate": 0.00032195676905574517, |
|
"epoch": 2.03 |
|
}, |
|
{ |
|
"loss": 7.606, |
|
"grad_norm": 1.4081809520721436, |
|
"learning_rate": 0.00032108164872669993, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.4735, |
|
"grad_norm": 1.2214211225509644, |
|
"learning_rate": 0.0003202065283976547, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.3052, |
|
"grad_norm": 2.243197441101074, |
|
"learning_rate": 0.00031933140806860946, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.2611, |
|
"grad_norm": 1.0560696125030518, |
|
"learning_rate": 0.0003184562877395642, |
|
"epoch": 2.04 |
|
}, |
|
{ |
|
"loss": 7.3347, |
|
"grad_norm": 1.3903985023498535, |
|
"learning_rate": 0.000317581167410519, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.4106, |
|
"grad_norm": 1.285888910293579, |
|
"learning_rate": 0.0003167060470814737, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.3237, |
|
"grad_norm": 1.6455745697021484, |
|
"learning_rate": 0.00031583092675242845, |
|
"epoch": 2.05 |
|
}, |
|
{ |
|
"loss": 7.4445, |
|
"grad_norm": 1.3552714586257935, |
|
"learning_rate": 0.0003149558064233832, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.3175, |
|
"grad_norm": 1.4250375032424927, |
|
"learning_rate": 0.000314080686094338, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.5334, |
|
"grad_norm": 1.8445017337799072, |
|
"learning_rate": 0.00031320556576529274, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.7627, |
|
"grad_norm": 1.1116868257522583, |
|
"learning_rate": 0.0003123304454362475, |
|
"epoch": 2.06 |
|
}, |
|
{ |
|
"loss": 7.5347, |
|
"grad_norm": 1.1636768579483032, |
|
"learning_rate": 0.00031145532510720226, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.6581, |
|
"grad_norm": 1.4612860679626465, |
|
"learning_rate": 0.000310580204778157, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.6164, |
|
"grad_norm": 1.4403191804885864, |
|
"learning_rate": 0.0003097050844491118, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.3776, |
|
"grad_norm": 1.366955041885376, |
|
"learning_rate": 0.00030882996412006655, |
|
"epoch": 2.07 |
|
}, |
|
{ |
|
"loss": 7.556, |
|
"grad_norm": 1.4476971626281738, |
|
"learning_rate": 0.0003079548437910213, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.6019, |
|
"grad_norm": 1.4753084182739258, |
|
"learning_rate": 0.00030707972346197607, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.8493, |
|
"grad_norm": 1.2335758209228516, |
|
"learning_rate": 0.00030620460313293083, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.9252, |
|
"grad_norm": 1.3958989381790161, |
|
"learning_rate": 0.00030532948280388554, |
|
"epoch": 2.08 |
|
}, |
|
{ |
|
"loss": 7.2945, |
|
"grad_norm": 1.4621672630310059, |
|
"learning_rate": 0.0003044543624748403, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.3977, |
|
"grad_norm": 1.428195834159851, |
|
"learning_rate": 0.00030357924214579506, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.74, |
|
"grad_norm": 1.363600492477417, |
|
"learning_rate": 0.0003027041218167498, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.4894, |
|
"grad_norm": 1.2117736339569092, |
|
"learning_rate": 0.0003018290014877046, |
|
"epoch": 2.09 |
|
}, |
|
{ |
|
"loss": 7.5678, |
|
"grad_norm": 1.9844530820846558, |
|
"learning_rate": 0.00030095388115865935, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.6681, |
|
"grad_norm": 1.3558523654937744, |
|
"learning_rate": 0.0003000787608296141, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 7.9793, |
|
"grad_norm": 1.3802049160003662, |
|
"learning_rate": 0.0002992036405005688, |
|
"epoch": 2.1 |
|
}, |
|
{ |
|
"loss": 8.1848, |
|
"grad_norm": 1.845702886581421, |
|
"learning_rate": 0.0002983285201715236, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.2184, |
|
"grad_norm": 1.4479707479476929, |
|
"learning_rate": 0.00029745339984247834, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.4373, |
|
"grad_norm": 1.9233028888702393, |
|
"learning_rate": 0.0002965782795134331, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.2478, |
|
"grad_norm": 1.3621513843536377, |
|
"learning_rate": 0.00029570315918438786, |
|
"epoch": 2.11 |
|
}, |
|
{ |
|
"loss": 7.5867, |
|
"grad_norm": 1.449763298034668, |
|
"learning_rate": 0.00029482803885534257, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.2909, |
|
"grad_norm": 1.543834924697876, |
|
"learning_rate": 0.00029395291852629733, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.5481, |
|
"grad_norm": 1.2582162618637085, |
|
"learning_rate": 0.0002930777981972521, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.2092, |
|
"grad_norm": 1.25532865524292, |
|
"learning_rate": 0.00029220267786820686, |
|
"epoch": 2.12 |
|
}, |
|
{ |
|
"loss": 7.5117, |
|
"grad_norm": 1.4368300437927246, |
|
"learning_rate": 0.0002913275575391616, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.8661, |
|
"grad_norm": 1.4054632186889648, |
|
"learning_rate": 0.0002904524372101164, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 7.7641, |
|
"grad_norm": 1.4426825046539307, |
|
"learning_rate": 0.00028957731688107114, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 6.9808, |
|
"grad_norm": 1.6069836616516113, |
|
"learning_rate": 0.0002887021965520259, |
|
"epoch": 2.13 |
|
}, |
|
{ |
|
"loss": 8.0412, |
|
"grad_norm": 1.603289246559143, |
|
"learning_rate": 0.00028782707622298067, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.7541, |
|
"grad_norm": 1.2069703340530396, |
|
"learning_rate": 0.00028695195589393543, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.5413, |
|
"grad_norm": 1.2976186275482178, |
|
"learning_rate": 0.0002860768355648902, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.6833, |
|
"grad_norm": 1.4646226167678833, |
|
"learning_rate": 0.00028520171523584495, |
|
"epoch": 2.14 |
|
}, |
|
{ |
|
"loss": 7.3603, |
|
"grad_norm": 1.3783011436462402, |
|
"learning_rate": 0.00028432659490679966, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.1131, |
|
"grad_norm": 1.1677837371826172, |
|
"learning_rate": 0.0002834514745777544, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.8353, |
|
"grad_norm": 1.5966696739196777, |
|
"learning_rate": 0.0002825763542487092, |
|
"epoch": 2.15 |
|
}, |
|
{ |
|
"loss": 7.651, |
|
"grad_norm": 1.3074275255203247, |
|
"learning_rate": 0.00028170123391966394, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 6.8535, |
|
"grad_norm": 1.2238943576812744, |
|
"learning_rate": 0.0002808261135906187, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.1677, |
|
"grad_norm": 1.2107079029083252, |
|
"learning_rate": 0.00027995099326157347, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.1232, |
|
"grad_norm": 1.482686996459961, |
|
"learning_rate": 0.00027907587293252823, |
|
"epoch": 2.16 |
|
}, |
|
{ |
|
"loss": 7.6958, |
|
"grad_norm": 1.9235337972640991, |
|
"learning_rate": 0.000278200752603483, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.5763, |
|
"grad_norm": 1.0629470348358154, |
|
"learning_rate": 0.00027732563227443775, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.417, |
|
"grad_norm": 1.4404977560043335, |
|
"learning_rate": 0.0002764505119453925, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.4457, |
|
"grad_norm": 1.6266590356826782, |
|
"learning_rate": 0.0002755753916163473, |
|
"epoch": 2.17 |
|
}, |
|
{ |
|
"loss": 7.6768, |
|
"grad_norm": 1.4418647289276123, |
|
"learning_rate": 0.00027470027128730204, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.7301, |
|
"grad_norm": 1.7269823551177979, |
|
"learning_rate": 0.0002738251509582568, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.1704, |
|
"grad_norm": 1.9527968168258667, |
|
"learning_rate": 0.0002729500306292115, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 8.0284, |
|
"grad_norm": 1.1195765733718872, |
|
"learning_rate": 0.00027207491030016627, |
|
"epoch": 2.18 |
|
}, |
|
{ |
|
"loss": 7.876, |
|
"grad_norm": 1.381032109260559, |
|
"learning_rate": 0.00027119978997112103, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.4609, |
|
"grad_norm": 2.2558112144470215, |
|
"learning_rate": 0.0002703246696420758, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.524, |
|
"grad_norm": 1.0892398357391357, |
|
"learning_rate": 0.00026944954931303056, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.1756, |
|
"grad_norm": 1.432793140411377, |
|
"learning_rate": 0.0002685744289839853, |
|
"epoch": 2.19 |
|
}, |
|
{ |
|
"loss": 7.4677, |
|
"grad_norm": 2.4381473064422607, |
|
"learning_rate": 0.0002676993086549401, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.2004, |
|
"grad_norm": 1.0947704315185547, |
|
"learning_rate": 0.00026682418832589484, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.6084, |
|
"grad_norm": 1.1396403312683105, |
|
"learning_rate": 0.0002659490679968496, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.4592, |
|
"grad_norm": 1.7132469415664673, |
|
"learning_rate": 0.00026507394766780437, |
|
"epoch": 2.2 |
|
}, |
|
{ |
|
"loss": 7.6666, |
|
"grad_norm": 1.507416844367981, |
|
"learning_rate": 0.00026419882733875913, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.9483, |
|
"grad_norm": 1.997502326965332, |
|
"learning_rate": 0.0002633237070097139, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 6.8979, |
|
"grad_norm": 1.180274486541748, |
|
"learning_rate": 0.0002624485866806686, |
|
"epoch": 2.21 |
|
}, |
|
{ |
|
"loss": 7.5387, |
|
"grad_norm": 1.4130629301071167, |
|
"learning_rate": 0.00026157346635162336, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.7374, |
|
"grad_norm": 1.9466407299041748, |
|
"learning_rate": 0.0002606983460225781, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.2489, |
|
"grad_norm": 1.2844946384429932, |
|
"learning_rate": 0.00025982322569353283, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.2583, |
|
"grad_norm": 1.4728493690490723, |
|
"learning_rate": 0.0002589481053644876, |
|
"epoch": 2.22 |
|
}, |
|
{ |
|
"loss": 7.1689, |
|
"grad_norm": 1.505767583847046, |
|
"learning_rate": 0.00025807298503544235, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.3824, |
|
"grad_norm": 1.164609432220459, |
|
"learning_rate": 0.0002571978647063971, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 8.208, |
|
"grad_norm": 1.3337666988372803, |
|
"learning_rate": 0.0002563227443773519, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.1503, |
|
"grad_norm": 1.2840052843093872, |
|
"learning_rate": 0.00025544762404830664, |
|
"epoch": 2.23 |
|
}, |
|
{ |
|
"loss": 7.7838, |
|
"grad_norm": 1.6767994165420532, |
|
"learning_rate": 0.0002545725037192614, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.4818, |
|
"grad_norm": 1.2790688276290894, |
|
"learning_rate": 0.00025369738339021616, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.1404, |
|
"grad_norm": 1.9306037425994873, |
|
"learning_rate": 0.0002528222630611709, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 6.9151, |
|
"grad_norm": 1.0568101406097412, |
|
"learning_rate": 0.00025194714273212563, |
|
"epoch": 2.24 |
|
}, |
|
{ |
|
"loss": 7.5813, |
|
"grad_norm": 1.8494940996170044, |
|
"learning_rate": 0.0002510720224030804, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.1433, |
|
"grad_norm": 1.2321641445159912, |
|
"learning_rate": 0.00025019690207403515, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.0211, |
|
"grad_norm": 1.5231260061264038, |
|
"learning_rate": 0.0002493217817449899, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.5108, |
|
"grad_norm": 1.6787548065185547, |
|
"learning_rate": 0.0002484466614159447, |
|
"epoch": 2.25 |
|
}, |
|
{ |
|
"loss": 7.5859, |
|
"grad_norm": 1.8862128257751465, |
|
"learning_rate": 0.00024757154108689944, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.0871, |
|
"grad_norm": 1.5295615196228027, |
|
"learning_rate": 0.0002466964207578542, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.2151, |
|
"grad_norm": 1.6439179182052612, |
|
"learning_rate": 0.00024582130042880896, |
|
"epoch": 2.26 |
|
}, |
|
{ |
|
"loss": 7.851, |
|
"grad_norm": 1.5902001857757568, |
|
"learning_rate": 0.0002449461800997637, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.695, |
|
"grad_norm": 1.447240948677063, |
|
"learning_rate": 0.00024407105977071846, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.218, |
|
"grad_norm": 1.7448298931121826, |
|
"learning_rate": 0.00024319593944167322, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.4559, |
|
"grad_norm": 1.7815390825271606, |
|
"learning_rate": 0.00024232081911262798, |
|
"epoch": 2.27 |
|
}, |
|
{ |
|
"loss": 7.5519, |
|
"grad_norm": 1.746805191040039, |
|
"learning_rate": 0.00024144569878358275, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.4818, |
|
"grad_norm": 1.771155834197998, |
|
"learning_rate": 0.0002405705784545375, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.8775, |
|
"grad_norm": 1.2886364459991455, |
|
"learning_rate": 0.00023969545812549227, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.0862, |
|
"grad_norm": 1.3562748432159424, |
|
"learning_rate": 0.00023882033779644703, |
|
"epoch": 2.28 |
|
}, |
|
{ |
|
"loss": 7.4458, |
|
"grad_norm": 1.5549288988113403, |
|
"learning_rate": 0.00023794521746740177, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.5017, |
|
"grad_norm": 1.3231199979782104, |
|
"learning_rate": 0.00023707009713835653, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 6.9317, |
|
"grad_norm": 1.0973995923995972, |
|
"learning_rate": 0.0002361949768093113, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.2512, |
|
"grad_norm": 1.161665916442871, |
|
"learning_rate": 0.00023531985648026605, |
|
"epoch": 2.29 |
|
}, |
|
{ |
|
"loss": 7.3376, |
|
"grad_norm": 1.1249802112579346, |
|
"learning_rate": 0.0002344447361512208, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.6856, |
|
"grad_norm": 1.4549752473831177, |
|
"learning_rate": 0.00023356961582217557, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.6518, |
|
"grad_norm": 1.2443310022354126, |
|
"learning_rate": 0.0002326944954931303, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.9287, |
|
"grad_norm": 1.2414274215698242, |
|
"learning_rate": 0.00023181937516408507, |
|
"epoch": 2.3 |
|
}, |
|
{ |
|
"loss": 7.4844, |
|
"grad_norm": 1.250632882118225, |
|
"learning_rate": 0.00023094425483503983, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 6.9439, |
|
"grad_norm": 1.5678353309631348, |
|
"learning_rate": 0.0002300691345059946, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.2214, |
|
"grad_norm": 1.2777363061904907, |
|
"learning_rate": 0.00022919401417694933, |
|
"epoch": 2.31 |
|
}, |
|
{ |
|
"loss": 7.6909, |
|
"grad_norm": 1.1702243089675903, |
|
"learning_rate": 0.0002283188938479041, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.843, |
|
"grad_norm": 1.1647387742996216, |
|
"learning_rate": 0.00022744377351885883, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.5598, |
|
"grad_norm": 1.5888360738754272, |
|
"learning_rate": 0.0002265686531898136, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 7.4084, |
|
"grad_norm": 1.2132010459899902, |
|
"learning_rate": 0.00022569353286076835, |
|
"epoch": 2.32 |
|
}, |
|
{ |
|
"loss": 8.0077, |
|
"grad_norm": 1.3676106929779053, |
|
"learning_rate": 0.0002248184125317231, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.4475, |
|
"grad_norm": 1.4785172939300537, |
|
"learning_rate": 0.00022394329220267787, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.4934, |
|
"grad_norm": 1.6854803562164307, |
|
"learning_rate": 0.00022306817187363264, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.5371, |
|
"grad_norm": 1.3336540460586548, |
|
"learning_rate": 0.00022219305154458737, |
|
"epoch": 2.33 |
|
}, |
|
{ |
|
"loss": 7.091, |
|
"grad_norm": 1.5374839305877686, |
|
"learning_rate": 0.00022131793121554213, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.5715, |
|
"grad_norm": 1.259857177734375, |
|
"learning_rate": 0.0002204428108864969, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.5012, |
|
"grad_norm": 1.435889482498169, |
|
"learning_rate": 0.00021956769055745166, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.5925, |
|
"grad_norm": 1.6067544221878052, |
|
"learning_rate": 0.00021869257022840642, |
|
"epoch": 2.34 |
|
}, |
|
{ |
|
"loss": 7.2756, |
|
"grad_norm": 1.2057377099990845, |
|
"learning_rate": 0.00021781744989936118, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.0737, |
|
"grad_norm": 1.0249065160751343, |
|
"learning_rate": 0.00021694232957031591, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.2857, |
|
"grad_norm": 1.1336891651153564, |
|
"learning_rate": 0.00021606720924127068, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 7.0709, |
|
"grad_norm": 1.1853156089782715, |
|
"learning_rate": 0.00021519208891222544, |
|
"epoch": 2.35 |
|
}, |
|
{ |
|
"loss": 6.9118, |
|
"grad_norm": 1.4682341814041138, |
|
"learning_rate": 0.0002143169685831802, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.3363, |
|
"grad_norm": 1.3039721250534058, |
|
"learning_rate": 0.00021344184825413496, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.2827, |
|
"grad_norm": 1.28932785987854, |
|
"learning_rate": 0.00021256672792508972, |
|
"epoch": 2.36 |
|
}, |
|
{ |
|
"loss": 7.6069, |
|
"grad_norm": 1.7343271970748901, |
|
"learning_rate": 0.00021169160759604448, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.3543, |
|
"grad_norm": 1.9730132818222046, |
|
"learning_rate": 0.00021081648726699922, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.3351, |
|
"grad_norm": 2.070822238922119, |
|
"learning_rate": 0.00020994136693795395, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.3199, |
|
"grad_norm": 1.1327873468399048, |
|
"learning_rate": 0.00020906624660890872, |
|
"epoch": 2.37 |
|
}, |
|
{ |
|
"loss": 7.4058, |
|
"grad_norm": 1.3796617984771729, |
|
"learning_rate": 0.00020819112627986348, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.3027, |
|
"grad_norm": 1.8397942781448364, |
|
"learning_rate": 0.00020731600595081824, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.6354, |
|
"grad_norm": 1.4503923654556274, |
|
"learning_rate": 0.0002065283976546775, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.2284, |
|
"grad_norm": 1.550950527191162, |
|
"learning_rate": 0.00020565327732563227, |
|
"epoch": 2.38 |
|
}, |
|
{ |
|
"loss": 7.3061, |
|
"grad_norm": 1.5306216478347778, |
|
"learning_rate": 0.00020477815699658703, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.3337, |
|
"grad_norm": 1.269167184829712, |
|
"learning_rate": 0.0002039030366675418, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.7686, |
|
"grad_norm": 1.600019931793213, |
|
"learning_rate": 0.00020302791633849656, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.35, |
|
"grad_norm": 1.5773662328720093, |
|
"learning_rate": 0.0002021527960094513, |
|
"epoch": 2.39 |
|
}, |
|
{ |
|
"loss": 7.3691, |
|
"grad_norm": 1.547160029411316, |
|
"learning_rate": 0.00020127767568040605, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.4863, |
|
"grad_norm": 1.4968856573104858, |
|
"learning_rate": 0.00020040255535136081, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.9482, |
|
"grad_norm": 1.2087891101837158, |
|
"learning_rate": 0.00019952743502231558, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.0255, |
|
"grad_norm": 1.290597677230835, |
|
"learning_rate": 0.00019865231469327034, |
|
"epoch": 2.4 |
|
}, |
|
{ |
|
"loss": 7.178, |
|
"grad_norm": 1.5743247270584106, |
|
"learning_rate": 0.0001977771943642251, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.6474, |
|
"grad_norm": 1.5197412967681885, |
|
"learning_rate": 0.00019690207403517984, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.3527, |
|
"grad_norm": 1.4716495275497437, |
|
"learning_rate": 0.0001960269537061346, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.6313, |
|
"grad_norm": 1.9746785163879395, |
|
"learning_rate": 0.00019515183337708936, |
|
"epoch": 2.41 |
|
}, |
|
{ |
|
"loss": 7.6972, |
|
"grad_norm": 1.2683417797088623, |
|
"learning_rate": 0.00019427671304804412, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.1378, |
|
"grad_norm": 1.1373748779296875, |
|
"learning_rate": 0.00019340159271899888, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 7.0196, |
|
"grad_norm": 1.4191349744796753, |
|
"learning_rate": 0.00019252647238995364, |
|
"epoch": 2.42 |
|
}, |
|
{ |
|
"loss": 6.9102, |
|
"grad_norm": 1.6580002307891846, |
|
"learning_rate": 0.00019165135206090838, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.5105, |
|
"grad_norm": 1.2877469062805176, |
|
"learning_rate": 0.00019077623173186314, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 8.0212, |
|
"grad_norm": 1.2933236360549927, |
|
"learning_rate": 0.00018990111140281788, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.2108, |
|
"grad_norm": 1.6515684127807617, |
|
"learning_rate": 0.00018902599107377264, |
|
"epoch": 2.43 |
|
}, |
|
{ |
|
"loss": 7.2944, |
|
"grad_norm": 1.443547010421753, |
|
"learning_rate": 0.0001881508707447274, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 6.9623, |
|
"grad_norm": 1.5022013187408447, |
|
"learning_rate": 0.00018727575041568216, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.5751, |
|
"grad_norm": 1.639228343963623, |
|
"learning_rate": 0.0001864006300866369, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.6183, |
|
"grad_norm": 1.3685816526412964, |
|
"learning_rate": 0.00018552550975759166, |
|
"epoch": 2.44 |
|
}, |
|
{ |
|
"loss": 7.7862, |
|
"grad_norm": 1.4008909463882446, |
|
"learning_rate": 0.00018465038942854642, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.3036, |
|
"grad_norm": 1.4068384170532227, |
|
"learning_rate": 0.00018377526909950118, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.3222, |
|
"grad_norm": 1.4874199628829956, |
|
"learning_rate": 0.00018290014877045594, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.4538, |
|
"grad_norm": 2.161606788635254, |
|
"learning_rate": 0.0001820250284414107, |
|
"epoch": 2.45 |
|
}, |
|
{ |
|
"loss": 7.099, |
|
"grad_norm": 1.4761602878570557, |
|
"learning_rate": 0.00018114990811236544, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.6725, |
|
"grad_norm": 1.3598577976226807, |
|
"learning_rate": 0.0001802747877833202, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.4651, |
|
"grad_norm": 1.352389931678772, |
|
"learning_rate": 0.00017939966745427496, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.0266, |
|
"grad_norm": 1.302270770072937, |
|
"learning_rate": 0.00017852454712522973, |
|
"epoch": 2.46 |
|
}, |
|
{ |
|
"loss": 7.4879, |
|
"grad_norm": 1.2166621685028076, |
|
"learning_rate": 0.0001776494267961845, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 6.7354, |
|
"grad_norm": 1.4442105293273926, |
|
"learning_rate": 0.00017677430646713925, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.1184, |
|
"grad_norm": 1.6301904916763306, |
|
"learning_rate": 0.000175899186138094, |
|
"epoch": 2.47 |
|
}, |
|
{ |
|
"loss": 7.4326, |
|
"grad_norm": 1.2478090524673462, |
|
"learning_rate": 0.00017502406580904875, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.6185, |
|
"grad_norm": 1.2676613330841064, |
|
"learning_rate": 0.0001741489454800035, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.439, |
|
"grad_norm": 1.4324458837509155, |
|
"learning_rate": 0.00017327382515095827, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.7999, |
|
"grad_norm": 1.634446382522583, |
|
"learning_rate": 0.00017239870482191303, |
|
"epoch": 2.48 |
|
}, |
|
{ |
|
"loss": 7.3043, |
|
"grad_norm": 1.2877479791641235, |
|
"learning_rate": 0.0001715235844928678, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.054, |
|
"grad_norm": 1.7003803253173828, |
|
"learning_rate": 0.00017064846416382255, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.1568, |
|
"grad_norm": 1.8888310194015503, |
|
"learning_rate": 0.00016977334383477726, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.3495, |
|
"grad_norm": 1.2593083381652832, |
|
"learning_rate": 0.00016889822350573202, |
|
"epoch": 2.49 |
|
}, |
|
{ |
|
"loss": 7.4716, |
|
"grad_norm": 1.4410508871078491, |
|
"learning_rate": 0.00016802310317668679, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.5133, |
|
"grad_norm": 1.20904541015625, |
|
"learning_rate": 0.00016714798284764155, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.3222, |
|
"grad_norm": 1.4503611326217651, |
|
"learning_rate": 0.0001662728625185963, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.6387, |
|
"grad_norm": 1.3705183267593384, |
|
"learning_rate": 0.00016539774218955107, |
|
"epoch": 2.5 |
|
}, |
|
{ |
|
"loss": 7.0609, |
|
"grad_norm": 1.2106906175613403, |
|
"learning_rate": 0.0001645226218605058, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.342, |
|
"grad_norm": 1.5564229488372803, |
|
"learning_rate": 0.00016364750153146057, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.8121, |
|
"grad_norm": 1.6493812799453735, |
|
"learning_rate": 0.00016277238120241533, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.3909, |
|
"grad_norm": 1.9025623798370361, |
|
"learning_rate": 0.0001618972608733701, |
|
"epoch": 2.51 |
|
}, |
|
{ |
|
"loss": 7.0106, |
|
"grad_norm": 1.2934685945510864, |
|
"learning_rate": 0.00016102214054432485, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.5199, |
|
"grad_norm": 1.2549662590026855, |
|
"learning_rate": 0.00016014702021527962, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.3509, |
|
"grad_norm": 1.2111480236053467, |
|
"learning_rate": 0.00015927189988623435, |
|
"epoch": 2.52 |
|
}, |
|
{ |
|
"loss": 7.5281, |
|
"grad_norm": 2.2498984336853027, |
|
"learning_rate": 0.0001583967795571891, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.5218, |
|
"grad_norm": 1.4710973501205444, |
|
"learning_rate": 0.00015752165922814387, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.1575, |
|
"grad_norm": 1.4040391445159912, |
|
"learning_rate": 0.00015664653889909864, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.3097, |
|
"grad_norm": 2.3657708168029785, |
|
"learning_rate": 0.0001557714185700534, |
|
"epoch": 2.53 |
|
}, |
|
{ |
|
"loss": 7.3235, |
|
"grad_norm": 1.8456711769104004, |
|
"learning_rate": 0.00015489629824100816, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.1772, |
|
"grad_norm": 1.3032398223876953, |
|
"learning_rate": 0.0001540211779119629, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.331, |
|
"grad_norm": 1.2472988367080688, |
|
"learning_rate": 0.00015314605758291766, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 6.9758, |
|
"grad_norm": 1.1861238479614258, |
|
"learning_rate": 0.00015227093725387242, |
|
"epoch": 2.54 |
|
}, |
|
{ |
|
"loss": 7.357, |
|
"grad_norm": 1.2937425374984741, |
|
"learning_rate": 0.00015139581692482718, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.6132, |
|
"grad_norm": 1.5241109132766724, |
|
"learning_rate": 0.00015052069659578194, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.1769, |
|
"grad_norm": 1.2426915168762207, |
|
"learning_rate": 0.00014964557626673668, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 7.2242, |
|
"grad_norm": 1.5336363315582275, |
|
"learning_rate": 0.0001487704559376914, |
|
"epoch": 2.55 |
|
}, |
|
{ |
|
"loss": 8.0839, |
|
"grad_norm": 1.6944379806518555, |
|
"learning_rate": 0.00014789533560864617, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.2667, |
|
"grad_norm": 1.6602429151535034, |
|
"learning_rate": 0.00014702021527960093, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.4821, |
|
"grad_norm": 1.331986665725708, |
|
"learning_rate": 0.0001461450949505557, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.4808, |
|
"grad_norm": 1.4923409223556519, |
|
"learning_rate": 0.00014526997462151046, |
|
"epoch": 2.56 |
|
}, |
|
{ |
|
"loss": 7.3579, |
|
"grad_norm": 1.5323739051818848, |
|
"learning_rate": 0.00014439485429246522, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.1833, |
|
"grad_norm": 1.0281411409378052, |
|
"learning_rate": 0.00014351973396341998, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.521, |
|
"grad_norm": 1.777385950088501, |
|
"learning_rate": 0.00014264461363437472, |
|
"epoch": 2.57 |
|
}, |
|
{ |
|
"loss": 7.5531, |
|
"grad_norm": 1.7528423070907593, |
|
"learning_rate": 0.00014176949330532948, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.3295, |
|
"grad_norm": 1.665503740310669, |
|
"learning_rate": 0.00014089437297628424, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 6.9815, |
|
"grad_norm": 1.4323763847351074, |
|
"learning_rate": 0.000140019252647239, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.7957, |
|
"grad_norm": 1.2623038291931152, |
|
"learning_rate": 0.00013914413231819376, |
|
"epoch": 2.58 |
|
}, |
|
{ |
|
"loss": 7.2667, |
|
"grad_norm": 1.3770829439163208, |
|
"learning_rate": 0.00013826901198914853, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.2641, |
|
"grad_norm": 1.495597243309021, |
|
"learning_rate": 0.00013739389166010326, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.6276, |
|
"grad_norm": 1.0396783351898193, |
|
"learning_rate": 0.00013651877133105802, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 7.4811, |
|
"grad_norm": 1.5590603351593018, |
|
"learning_rate": 0.00013564365100201278, |
|
"epoch": 2.59 |
|
}, |
|
{ |
|
"loss": 6.9941, |
|
"grad_norm": 1.266262173652649, |
|
"learning_rate": 0.00013476853067296755, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.0138, |
|
"grad_norm": 1.3331608772277832, |
|
"learning_rate": 0.0001338934103439223, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.6792, |
|
"grad_norm": 1.54330575466156, |
|
"learning_rate": 0.00013301829001487707, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.5151, |
|
"grad_norm": 1.266360878944397, |
|
"learning_rate": 0.0001321431696858318, |
|
"epoch": 2.6 |
|
}, |
|
{ |
|
"loss": 7.6357, |
|
"grad_norm": 1.1992617845535278, |
|
"learning_rate": 0.00013126804935678657, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.6848, |
|
"grad_norm": 1.6269259452819824, |
|
"learning_rate": 0.00013039292902774133, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.3941, |
|
"grad_norm": 1.4221471548080444, |
|
"learning_rate": 0.00012951780869869606, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.5638, |
|
"grad_norm": 1.31778085231781, |
|
"learning_rate": 0.00012864268836965082, |
|
"epoch": 2.61 |
|
}, |
|
{ |
|
"loss": 7.3716, |
|
"grad_norm": 1.4217979907989502, |
|
"learning_rate": 0.00012776756804060559, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.7403, |
|
"grad_norm": 1.549012541770935, |
|
"learning_rate": 0.00012689244771156032, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.5079, |
|
"grad_norm": 1.7808821201324463, |
|
"learning_rate": 0.00012601732738251508, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.338, |
|
"grad_norm": 1.6030139923095703, |
|
"learning_rate": 0.00012514220705346984, |
|
"epoch": 2.62 |
|
}, |
|
{ |
|
"loss": 7.2113, |
|
"grad_norm": 1.688103437423706, |
|
"learning_rate": 0.0001242670867244246, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.5297, |
|
"grad_norm": 1.4482861757278442, |
|
"learning_rate": 0.00012339196639537937, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.6226, |
|
"grad_norm": 1.481149435043335, |
|
"learning_rate": 0.00012251684606633413, |
|
"epoch": 2.63 |
|
}, |
|
{ |
|
"loss": 7.1199, |
|
"grad_norm": 1.5914816856384277, |
|
"learning_rate": 0.00012164172573728888, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.5294, |
|
"grad_norm": 1.6436686515808105, |
|
"learning_rate": 0.00012076660540824364, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.7319, |
|
"grad_norm": 1.422884225845337, |
|
"learning_rate": 0.00011989148507919839, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.5878, |
|
"grad_norm": 1.2468681335449219, |
|
"learning_rate": 0.00011901636475015315, |
|
"epoch": 2.64 |
|
}, |
|
{ |
|
"loss": 7.4093, |
|
"grad_norm": 1.6080206632614136, |
|
"learning_rate": 0.00011814124442110791, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 6.927, |
|
"grad_norm": 1.2568819522857666, |
|
"learning_rate": 0.00011726612409206266, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.524, |
|
"grad_norm": 1.4558569192886353, |
|
"learning_rate": 0.00011639100376301742, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 6.7721, |
|
"grad_norm": 1.3554805517196655, |
|
"learning_rate": 0.00011551588343397218, |
|
"epoch": 2.65 |
|
}, |
|
{ |
|
"loss": 7.5129, |
|
"grad_norm": 2.061342239379883, |
|
"learning_rate": 0.00011464076310492692, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.271, |
|
"grad_norm": 1.7581554651260376, |
|
"learning_rate": 0.00011376564277588168, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.4605, |
|
"grad_norm": 1.3818498849868774, |
|
"learning_rate": 0.00011289052244683644, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.2747, |
|
"grad_norm": 1.4640157222747803, |
|
"learning_rate": 0.00011201540211779119, |
|
"epoch": 2.66 |
|
}, |
|
{ |
|
"loss": 7.4137, |
|
"grad_norm": 1.628440499305725, |
|
"learning_rate": 0.00011114028178874595, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.1947, |
|
"grad_norm": 2.1291253566741943, |
|
"learning_rate": 0.00011026516145970071, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.3972, |
|
"grad_norm": 1.53203284740448, |
|
"learning_rate": 0.00010939004113065546, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.1343, |
|
"grad_norm": 1.7009447813034058, |
|
"learning_rate": 0.00010851492080161022, |
|
"epoch": 2.67 |
|
}, |
|
{ |
|
"loss": 7.4999, |
|
"grad_norm": 1.981833815574646, |
|
"learning_rate": 0.00010763980047256499, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.0649, |
|
"grad_norm": 1.4151135683059692, |
|
"learning_rate": 0.00010676468014351973, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.4975, |
|
"grad_norm": 1.8214997053146362, |
|
"learning_rate": 0.0001058895598144745, |
|
"epoch": 2.68 |
|
}, |
|
{ |
|
"loss": 7.1928, |
|
"grad_norm": 1.475014328956604, |
|
"learning_rate": 0.00010501443948542926, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 6.7309, |
|
"grad_norm": 1.500470757484436, |
|
"learning_rate": 0.00010413931915638399, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.2154, |
|
"grad_norm": 1.0923032760620117, |
|
"learning_rate": 0.00010326419882733875, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.4584, |
|
"grad_norm": 1.476189136505127, |
|
"learning_rate": 0.00010238907849829352, |
|
"epoch": 2.69 |
|
}, |
|
{ |
|
"loss": 7.5696, |
|
"grad_norm": 1.3299099206924438, |
|
"learning_rate": 0.00010151395816924828, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.4462, |
|
"grad_norm": 1.248026967048645, |
|
"learning_rate": 0.00010063883784020303, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.057, |
|
"grad_norm": 1.5154845714569092, |
|
"learning_rate": 9.976371751115779e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.4942, |
|
"grad_norm": 1.504868745803833, |
|
"learning_rate": 9.888859718211255e-05, |
|
"epoch": 2.7 |
|
}, |
|
{ |
|
"loss": 7.7042, |
|
"grad_norm": 1.2087482213974, |
|
"learning_rate": 9.80134768530673e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.7138, |
|
"grad_norm": 2.066254138946533, |
|
"learning_rate": 9.713835652402206e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.4746, |
|
"grad_norm": 1.2078548669815063, |
|
"learning_rate": 9.626323619497682e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.5682, |
|
"grad_norm": 1.2530779838562012, |
|
"learning_rate": 9.538811586593157e-05, |
|
"epoch": 2.71 |
|
}, |
|
{ |
|
"loss": 7.4491, |
|
"grad_norm": 1.5170719623565674, |
|
"learning_rate": 9.451299553688632e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.2938, |
|
"grad_norm": 1.2933870553970337, |
|
"learning_rate": 9.363787520784108e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.1455, |
|
"grad_norm": 1.212755799293518, |
|
"learning_rate": 9.276275487879583e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.3702, |
|
"grad_norm": 1.4118942022323608, |
|
"learning_rate": 9.188763454975059e-05, |
|
"epoch": 2.72 |
|
}, |
|
{ |
|
"loss": 7.1194, |
|
"grad_norm": 1.575276494026184, |
|
"learning_rate": 9.101251422070535e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.046, |
|
"grad_norm": 1.3244752883911133, |
|
"learning_rate": 9.01373938916601e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 6.875, |
|
"grad_norm": 1.369280219078064, |
|
"learning_rate": 8.926227356261486e-05, |
|
"epoch": 2.73 |
|
}, |
|
{ |
|
"loss": 7.4045, |
|
"grad_norm": 1.3210042715072632, |
|
"learning_rate": 8.838715323356962e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.5159, |
|
"grad_norm": 1.4352552890777588, |
|
"learning_rate": 8.751203290452437e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.2315, |
|
"grad_norm": 1.4860197305679321, |
|
"learning_rate": 8.663691257547913e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 6.8597, |
|
"grad_norm": 1.2331523895263672, |
|
"learning_rate": 8.57617922464339e-05, |
|
"epoch": 2.74 |
|
}, |
|
{ |
|
"loss": 7.3485, |
|
"grad_norm": 1.2187525033950806, |
|
"learning_rate": 8.488667191738863e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.388, |
|
"grad_norm": 1.1800241470336914, |
|
"learning_rate": 8.401155158834339e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 6.9186, |
|
"grad_norm": 1.3542723655700684, |
|
"learning_rate": 8.313643125929815e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 6.9582, |
|
"grad_norm": 1.3839143514633179, |
|
"learning_rate": 8.22613109302529e-05, |
|
"epoch": 2.75 |
|
}, |
|
{ |
|
"loss": 7.4176, |
|
"grad_norm": 1.4546840190887451, |
|
"learning_rate": 8.138619060120766e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.2731, |
|
"grad_norm": 1.3623560667037964, |
|
"learning_rate": 8.051107027216243e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.1633, |
|
"grad_norm": 1.9331005811691284, |
|
"learning_rate": 7.963594994311717e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 6.8972, |
|
"grad_norm": 1.2791029214859009, |
|
"learning_rate": 7.876082961407194e-05, |
|
"epoch": 2.76 |
|
}, |
|
{ |
|
"loss": 7.1043, |
|
"grad_norm": 1.6202424764633179, |
|
"learning_rate": 7.78857092850267e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.0727, |
|
"grad_norm": 1.0835381746292114, |
|
"learning_rate": 7.701058895598145e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.0958, |
|
"grad_norm": 1.2778371572494507, |
|
"learning_rate": 7.613546862693621e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.2219, |
|
"grad_norm": 1.9295389652252197, |
|
"learning_rate": 7.526034829789097e-05, |
|
"epoch": 2.77 |
|
}, |
|
{ |
|
"loss": 7.0189, |
|
"grad_norm": 1.9394477605819702, |
|
"learning_rate": 7.43852279688457e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.0144, |
|
"grad_norm": 1.4238934516906738, |
|
"learning_rate": 7.351010763980047e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 7.2353, |
|
"grad_norm": 1.350537657737732, |
|
"learning_rate": 7.263498731075523e-05, |
|
"epoch": 2.78 |
|
}, |
|
{ |
|
"loss": 6.7353, |
|
"grad_norm": 1.3214153051376343, |
|
"learning_rate": 7.175986698170999e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.4143, |
|
"grad_norm": 2.469216823577881, |
|
"learning_rate": 7.088474665266474e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.4276, |
|
"grad_norm": 1.414184808731079, |
|
"learning_rate": 7.00096263236195e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 6.9842, |
|
"grad_norm": 1.4708011150360107, |
|
"learning_rate": 6.913450599457426e-05, |
|
"epoch": 2.79 |
|
}, |
|
{ |
|
"loss": 7.572, |
|
"grad_norm": 1.449560284614563, |
|
"learning_rate": 6.825938566552901e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.3449, |
|
"grad_norm": 1.1261264085769653, |
|
"learning_rate": 6.738426533648377e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.1776, |
|
"grad_norm": 1.5502110719680786, |
|
"learning_rate": 6.650914500743853e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.0565, |
|
"grad_norm": 1.3916562795639038, |
|
"learning_rate": 6.563402467839328e-05, |
|
"epoch": 2.8 |
|
}, |
|
{ |
|
"loss": 7.0882, |
|
"grad_norm": 1.361229658126831, |
|
"learning_rate": 6.475890434934803e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 6.981, |
|
"grad_norm": 1.6100305318832397, |
|
"learning_rate": 6.388378402030279e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.2502, |
|
"grad_norm": 1.5449306964874268, |
|
"learning_rate": 6.300866369125754e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.4208, |
|
"grad_norm": 1.3188410997390747, |
|
"learning_rate": 6.21335433622123e-05, |
|
"epoch": 2.81 |
|
}, |
|
{ |
|
"loss": 7.2957, |
|
"grad_norm": 1.543289303779602, |
|
"learning_rate": 6.125842303316706e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.0319, |
|
"grad_norm": 1.1590594053268433, |
|
"learning_rate": 6.038330270412182e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.23, |
|
"grad_norm": 1.1623939275741577, |
|
"learning_rate": 5.9508182375076575e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.1254, |
|
"grad_norm": 1.6204333305358887, |
|
"learning_rate": 5.863306204603133e-05, |
|
"epoch": 2.82 |
|
}, |
|
{ |
|
"loss": 7.4319, |
|
"grad_norm": 1.5845638513565063, |
|
"learning_rate": 5.775794171698609e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.4574, |
|
"grad_norm": 1.3281787633895874, |
|
"learning_rate": 5.688282138794084e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 6.8629, |
|
"grad_norm": 1.6502999067306519, |
|
"learning_rate": 5.6007701058895595e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.1493, |
|
"grad_norm": 1.7768168449401855, |
|
"learning_rate": 5.513258072985036e-05, |
|
"epoch": 2.83 |
|
}, |
|
{ |
|
"loss": 7.1971, |
|
"grad_norm": 1.1763763427734375, |
|
"learning_rate": 5.425746040080511e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.4182, |
|
"grad_norm": 1.4033911228179932, |
|
"learning_rate": 5.338234007175987e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 6.8175, |
|
"grad_norm": 1.5407586097717285, |
|
"learning_rate": 5.250721974271463e-05, |
|
"epoch": 2.84 |
|
}, |
|
{ |
|
"loss": 7.5091, |
|
"grad_norm": 1.5829062461853027, |
|
"learning_rate": 5.163209941366938e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.0728, |
|
"grad_norm": 1.3185957670211792, |
|
"learning_rate": 5.075697908462414e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.1931, |
|
"grad_norm": 1.1996837854385376, |
|
"learning_rate": 4.9881858755578894e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.2327, |
|
"grad_norm": 1.6188883781433105, |
|
"learning_rate": 4.900673842653365e-05, |
|
"epoch": 2.85 |
|
}, |
|
{ |
|
"loss": 7.2432, |
|
"grad_norm": 1.7829197645187378, |
|
"learning_rate": 4.813161809748841e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 6.8231, |
|
"grad_norm": 1.3998175859451294, |
|
"learning_rate": 4.725649776844316e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.5838, |
|
"grad_norm": 1.6664845943450928, |
|
"learning_rate": 4.6381377439397914e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.3804, |
|
"grad_norm": 1.2328096628189087, |
|
"learning_rate": 4.5506257110352676e-05, |
|
"epoch": 2.86 |
|
}, |
|
{ |
|
"loss": 7.1497, |
|
"grad_norm": 1.5543657541275024, |
|
"learning_rate": 4.463113678130743e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.5067, |
|
"grad_norm": 2.0711114406585693, |
|
"learning_rate": 4.3756016452262186e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.1481, |
|
"grad_norm": 2.340829372406006, |
|
"learning_rate": 4.288089612321695e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.2767, |
|
"grad_norm": 1.3014119863510132, |
|
"learning_rate": 4.2005775794171696e-05, |
|
"epoch": 2.87 |
|
}, |
|
{ |
|
"loss": 7.2583, |
|
"grad_norm": 1.186070442199707, |
|
"learning_rate": 4.113065546512645e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.7179, |
|
"grad_norm": 1.4286901950836182, |
|
"learning_rate": 4.025553513608121e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 6.9271, |
|
"grad_norm": 1.561988115310669, |
|
"learning_rate": 3.938041480703597e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 6.9378, |
|
"grad_norm": 1.2756584882736206, |
|
"learning_rate": 3.8505294477990723e-05, |
|
"epoch": 2.88 |
|
}, |
|
{ |
|
"loss": 7.8091, |
|
"grad_norm": 1.5452569723129272, |
|
"learning_rate": 3.7630174148945485e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 6.7905, |
|
"grad_norm": 1.2616968154907227, |
|
"learning_rate": 3.6755053819900234e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 7.3958, |
|
"grad_norm": 1.1684807538986206, |
|
"learning_rate": 3.5879933490854995e-05, |
|
"epoch": 2.89 |
|
}, |
|
{ |
|
"loss": 6.9238, |
|
"grad_norm": 1.351366639137268, |
|
"learning_rate": 3.500481316180975e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.4026, |
|
"grad_norm": 1.2473573684692383, |
|
"learning_rate": 3.4129692832764505e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.4247, |
|
"grad_norm": 1.5123474597930908, |
|
"learning_rate": 3.325457250371927e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.0967, |
|
"grad_norm": 1.1452938318252563, |
|
"learning_rate": 3.2379452174674016e-05, |
|
"epoch": 2.9 |
|
}, |
|
{ |
|
"loss": 7.0357, |
|
"grad_norm": 1.1505627632141113, |
|
"learning_rate": 3.150433184562877e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.4973, |
|
"grad_norm": 1.438091516494751, |
|
"learning_rate": 3.062921151658353e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.4715, |
|
"grad_norm": 1.1489310264587402, |
|
"learning_rate": 2.9754091187538288e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.0076, |
|
"grad_norm": 1.3423534631729126, |
|
"learning_rate": 2.8878970858493046e-05, |
|
"epoch": 2.91 |
|
}, |
|
{ |
|
"loss": 7.0935, |
|
"grad_norm": 1.2484374046325684, |
|
"learning_rate": 2.8003850529447798e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.1792, |
|
"grad_norm": 1.310231328010559, |
|
"learning_rate": 2.7128730200402556e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.3469, |
|
"grad_norm": 1.417974591255188, |
|
"learning_rate": 2.6253609871357314e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.2473, |
|
"grad_norm": 1.3878840208053589, |
|
"learning_rate": 2.537848954231207e-05, |
|
"epoch": 2.92 |
|
}, |
|
{ |
|
"loss": 7.1321, |
|
"grad_norm": 1.6403028964996338, |
|
"learning_rate": 2.459088124617135e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.6076, |
|
"grad_norm": 1.2110294103622437, |
|
"learning_rate": 2.3715760917126104e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.3466, |
|
"grad_norm": 1.203755497932434, |
|
"learning_rate": 2.2840640588080863e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.4367, |
|
"grad_norm": 1.2081892490386963, |
|
"learning_rate": 2.1965520259035618e-05, |
|
"epoch": 2.93 |
|
}, |
|
{ |
|
"loss": 7.6191, |
|
"grad_norm": 1.2515225410461426, |
|
"learning_rate": 2.1090399929990373e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.2915, |
|
"grad_norm": 1.2461618185043335, |
|
"learning_rate": 2.021527960094513e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.0825, |
|
"grad_norm": 1.3424855470657349, |
|
"learning_rate": 1.9340159271899886e-05, |
|
"epoch": 2.94 |
|
}, |
|
{ |
|
"loss": 7.6924, |
|
"grad_norm": 1.2109103202819824, |
|
"learning_rate": 1.846503894285464e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.531, |
|
"grad_norm": 1.2161798477172852, |
|
"learning_rate": 1.75899186138094e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.1992, |
|
"grad_norm": 1.347778081893921, |
|
"learning_rate": 1.6714798284764158e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.7785, |
|
"grad_norm": 1.2869161367416382, |
|
"learning_rate": 1.583967795571891e-05, |
|
"epoch": 2.95 |
|
}, |
|
{ |
|
"loss": 7.6703, |
|
"grad_norm": 1.1452679634094238, |
|
"learning_rate": 1.4964557626673668e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.3311, |
|
"grad_norm": 1.7757437229156494, |
|
"learning_rate": 1.4089437297628423e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.4272, |
|
"grad_norm": 1.2730258703231812, |
|
"learning_rate": 1.3214316968583182e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 6.8195, |
|
"grad_norm": 1.0826276540756226, |
|
"learning_rate": 1.2339196639537937e-05, |
|
"epoch": 2.96 |
|
}, |
|
{ |
|
"loss": 7.1219, |
|
"grad_norm": 1.3847414255142212, |
|
"learning_rate": 1.1464076310492692e-05, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.5912, |
|
"grad_norm": 1.4612926244735718, |
|
"learning_rate": 1.0588955981447449e-05, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 6.9373, |
|
"grad_norm": 1.5692036151885986, |
|
"learning_rate": 9.713835652402205e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.7104, |
|
"grad_norm": 1.4740134477615356, |
|
"learning_rate": 8.838715323356962e-06, |
|
"epoch": 2.97 |
|
}, |
|
{ |
|
"loss": 7.1918, |
|
"grad_norm": 1.026573657989502, |
|
"learning_rate": 7.963594994311717e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 6.8717, |
|
"grad_norm": 1.1959487199783325, |
|
"learning_rate": 7.088474665266474e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.4154, |
|
"grad_norm": 1.1354584693908691, |
|
"learning_rate": 6.213354336221231e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 7.1622, |
|
"grad_norm": 1.3372441530227661, |
|
"learning_rate": 5.338234007175987e-06, |
|
"epoch": 2.98 |
|
}, |
|
{ |
|
"loss": 6.9564, |
|
"grad_norm": 1.1713366508483887, |
|
"learning_rate": 4.463113678130743e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.462, |
|
"grad_norm": 1.8238294124603271, |
|
"learning_rate": 3.587993349085499e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.5493, |
|
"grad_norm": 1.3313993215560913, |
|
"learning_rate": 2.7128730200402555e-06, |
|
"epoch": 2.99 |
|
}, |
|
{ |
|
"loss": 7.2399, |
|
"grad_norm": 1.1780248880386353, |
|
"learning_rate": 1.8377526909950118e-06, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"loss": 6.879, |
|
"grad_norm": 1.2703826427459717, |
|
"learning_rate": 9.626323619497682e-07, |
|
"epoch": 3.0 |
|
}, |
|
{ |
|
"train_runtime": 104781.7564, |
|
"train_samples_per_second": 3.49, |
|
"train_steps_per_second": 0.109, |
|
"train_loss": 8.437174775609405, |
|
"epoch": 3.0 |
|
} |
|
] |