|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996163752945689, |
|
"eval_steps": 500, |
|
"global_step": 1140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008768564695566394, |
|
"grad_norm": 3.8354088038954104, |
|
"learning_rate": 5.0000000000000004e-08, |
|
"loss": 0.8827, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0017537129391132788, |
|
"grad_norm": 3.854484535409196, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.8816, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0026305694086699184, |
|
"grad_norm": 3.871894613191576, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.8801, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0035074258782265577, |
|
"grad_norm": 4.015192807591418, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.8778, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004384282347783197, |
|
"grad_norm": 3.8093684146898625, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 0.8711, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005261138817339837, |
|
"grad_norm": 3.8610474891808035, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.8774, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0061379952868964765, |
|
"grad_norm": 3.7967273935876027, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 0.8669, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007014851756453115, |
|
"grad_norm": 3.6775126026184703, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.8605, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007891708226009755, |
|
"grad_norm": 3.8340713786963674, |
|
"learning_rate": 4.5000000000000003e-07, |
|
"loss": 0.8735, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008768564695566394, |
|
"grad_norm": 3.7479501504503463, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.8843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009645421165123035, |
|
"grad_norm": 3.6317203672346734, |
|
"learning_rate": 5.5e-07, |
|
"loss": 0.8637, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010522277634679673, |
|
"grad_norm": 3.512911808429478, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.8649, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011399134104236312, |
|
"grad_norm": 3.5056527507086486, |
|
"learning_rate": 6.5e-07, |
|
"loss": 0.8514, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012275990573792953, |
|
"grad_norm": 3.150666271402955, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 0.844, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013152847043349592, |
|
"grad_norm": 2.92608322776606, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.8382, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01402970351290623, |
|
"grad_norm": 3.0202821236842246, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.8419, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014906559982462871, |
|
"grad_norm": 2.9419098502173515, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 0.8362, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01578341645201951, |
|
"grad_norm": 2.7926753613205433, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 0.825, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01666027292157615, |
|
"grad_norm": 2.4471605086654096, |
|
"learning_rate": 9.500000000000001e-07, |
|
"loss": 0.7904, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.017537129391132788, |
|
"grad_norm": 1.8918627793518321, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018413985860689427, |
|
"grad_norm": 1.713937144355921, |
|
"learning_rate": 1.0500000000000001e-06, |
|
"loss": 0.7828, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01929084233024607, |
|
"grad_norm": 1.4451729443975803, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.78, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.020167698799802708, |
|
"grad_norm": 1.0866085026095695, |
|
"learning_rate": 1.1500000000000002e-06, |
|
"loss": 0.7807, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.021044555269359347, |
|
"grad_norm": 1.022948274017058, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.758, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021921411738915986, |
|
"grad_norm": 0.976807823206357, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.7783, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.022798268208472625, |
|
"grad_norm": 2.5562950715507275, |
|
"learning_rate": 1.3e-06, |
|
"loss": 0.7815, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.023675124678029263, |
|
"grad_norm": 1.7956421603987698, |
|
"learning_rate": 1.3500000000000002e-06, |
|
"loss": 0.759, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.024551981147585906, |
|
"grad_norm": 1.3622207205502601, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.7551, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.025428837617142545, |
|
"grad_norm": 0.9842354354215974, |
|
"learning_rate": 1.45e-06, |
|
"loss": 0.7625, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.026305694086699184, |
|
"grad_norm": 0.7679059075291825, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7513, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027182550556255822, |
|
"grad_norm": 0.709914193704945, |
|
"learning_rate": 1.5500000000000002e-06, |
|
"loss": 0.7309, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02805940702581246, |
|
"grad_norm": 0.5711165082308596, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.7358, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0289362634953691, |
|
"grad_norm": 0.6732600160748007, |
|
"learning_rate": 1.6500000000000003e-06, |
|
"loss": 0.746, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.029813119964925743, |
|
"grad_norm": 0.519623223105866, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 0.7408, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03068997643448238, |
|
"grad_norm": 0.4967853550459734, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.7284, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03156683290403902, |
|
"grad_norm": 0.4558474579400771, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 0.7337, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03244368937359566, |
|
"grad_norm": 0.5187940265183988, |
|
"learning_rate": 1.85e-06, |
|
"loss": 0.7459, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0333205458431523, |
|
"grad_norm": 0.46649520265418404, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 0.7238, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03419740231270894, |
|
"grad_norm": 0.4621107554297482, |
|
"learning_rate": 1.9500000000000004e-06, |
|
"loss": 0.7243, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.035074258782265576, |
|
"grad_norm": 0.4493723053379801, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.035951115251822215, |
|
"grad_norm": 0.4196555282378131, |
|
"learning_rate": 2.05e-06, |
|
"loss": 0.7371, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.036827971721378853, |
|
"grad_norm": 0.3836269605839978, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.7172, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0377048281909355, |
|
"grad_norm": 0.38056806308372326, |
|
"learning_rate": 2.15e-06, |
|
"loss": 0.7163, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03858168466049214, |
|
"grad_norm": 0.3561457145290273, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.6986, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03945854113004878, |
|
"grad_norm": 0.3723153937166507, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.7154, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.040335397599605416, |
|
"grad_norm": 0.36630666691552083, |
|
"learning_rate": 2.3000000000000004e-06, |
|
"loss": 0.7201, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.041212254069162055, |
|
"grad_norm": 0.3482645877468935, |
|
"learning_rate": 2.35e-06, |
|
"loss": 0.7213, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.042089110538718694, |
|
"grad_norm": 0.35892687942862245, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.7167, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04296596700827533, |
|
"grad_norm": 0.3353339246028489, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 0.7154, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04384282347783197, |
|
"grad_norm": 0.3327601533732165, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7149, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04471967994738861, |
|
"grad_norm": 0.31047839521651305, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.7022, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04559653641694525, |
|
"grad_norm": 0.3140715368302216, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.7024, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04647339288650189, |
|
"grad_norm": 0.3070088967685052, |
|
"learning_rate": 2.6500000000000005e-06, |
|
"loss": 0.7116, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04735024935605853, |
|
"grad_norm": 0.29688015435603987, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 0.7068, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04822710582561517, |
|
"grad_norm": 0.312569173156887, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.708, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04910396229517181, |
|
"grad_norm": 0.3212155084231398, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.6895, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04998081876472845, |
|
"grad_norm": 0.30141336197411556, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.714, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05085767523428509, |
|
"grad_norm": 0.2678799864293998, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.6864, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05173453170384173, |
|
"grad_norm": 0.2763602360222888, |
|
"learning_rate": 2.95e-06, |
|
"loss": 0.6955, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05261138817339837, |
|
"grad_norm": 0.2960116429627635, |
|
"learning_rate": 3e-06, |
|
"loss": 0.69, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.053488244642955006, |
|
"grad_norm": 0.3126860845251708, |
|
"learning_rate": 3.05e-06, |
|
"loss": 0.7008, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.054365101112511645, |
|
"grad_norm": 0.2684477743603555, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.7065, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.055241957582068284, |
|
"grad_norm": 0.2831279869843839, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 0.6908, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05611881405162492, |
|
"grad_norm": 0.28914936357131454, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.6847, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05699567052118156, |
|
"grad_norm": 0.2664694092243829, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.6975, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0578725269907382, |
|
"grad_norm": 0.2670931319561963, |
|
"learning_rate": 3.3000000000000006e-06, |
|
"loss": 0.6957, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.058749383460294846, |
|
"grad_norm": 0.25481964712146327, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 0.6907, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.059626239929851485, |
|
"grad_norm": 0.2917224006438053, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.6889, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.060503096399408124, |
|
"grad_norm": 0.27794604488949715, |
|
"learning_rate": 3.45e-06, |
|
"loss": 0.6815, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06137995286896476, |
|
"grad_norm": 0.24963117175569036, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6883, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0622568093385214, |
|
"grad_norm": 0.2893133633641976, |
|
"learning_rate": 3.5500000000000003e-06, |
|
"loss": 0.6792, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06313366580807804, |
|
"grad_norm": 0.2826308836822568, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.7028, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06401052227763468, |
|
"grad_norm": 0.2640935466003184, |
|
"learning_rate": 3.65e-06, |
|
"loss": 0.6916, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06488737874719132, |
|
"grad_norm": 0.24415033172628944, |
|
"learning_rate": 3.7e-06, |
|
"loss": 0.6839, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06576423521674796, |
|
"grad_norm": 0.3112401087242733, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.7021, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0666410916863046, |
|
"grad_norm": 0.2875281112172732, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.6929, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06751794815586123, |
|
"grad_norm": 0.2874092373703745, |
|
"learning_rate": 3.85e-06, |
|
"loss": 0.6788, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06839480462541787, |
|
"grad_norm": 0.26681007920352356, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 0.6881, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06927166109497451, |
|
"grad_norm": 0.25207102904583284, |
|
"learning_rate": 3.95e-06, |
|
"loss": 0.6852, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07014851756453115, |
|
"grad_norm": 0.2747607135538642, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07102537403408779, |
|
"grad_norm": 0.26361955079133653, |
|
"learning_rate": 4.05e-06, |
|
"loss": 0.685, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07190223050364443, |
|
"grad_norm": 0.33310729956901713, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.6803, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07277908697320107, |
|
"grad_norm": 0.2453664087918243, |
|
"learning_rate": 4.15e-06, |
|
"loss": 0.6761, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07365594344275771, |
|
"grad_norm": 0.2908734202511105, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.6931, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07453279991231436, |
|
"grad_norm": 0.2786719287704165, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.6874, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.075409656381871, |
|
"grad_norm": 0.271512101257661, |
|
"learning_rate": 4.3e-06, |
|
"loss": 0.6775, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07628651285142764, |
|
"grad_norm": 0.2947304767213564, |
|
"learning_rate": 4.350000000000001e-06, |
|
"loss": 0.6865, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07716336932098428, |
|
"grad_norm": 0.25160176616217883, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.6785, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07804022579054092, |
|
"grad_norm": 0.32459153781403244, |
|
"learning_rate": 4.450000000000001e-06, |
|
"loss": 0.6773, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07891708226009755, |
|
"grad_norm": 0.2487028104553641, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07979393872965419, |
|
"grad_norm": 0.2925038544983962, |
|
"learning_rate": 4.5500000000000005e-06, |
|
"loss": 0.6791, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08067079519921083, |
|
"grad_norm": 0.28005649996035475, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.6704, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08154765166876747, |
|
"grad_norm": 0.3264776457957641, |
|
"learning_rate": 4.65e-06, |
|
"loss": 0.6772, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08242450813832411, |
|
"grad_norm": 0.2533079586966528, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.6792, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08330136460788075, |
|
"grad_norm": 0.25651763696878965, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.6607, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08417822107743739, |
|
"grad_norm": 0.2546288408258964, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.6669, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08505507754699403, |
|
"grad_norm": 0.25215356470309513, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.6846, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08593193401655067, |
|
"grad_norm": 0.28631928221309494, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.6717, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0868087904861073, |
|
"grad_norm": 0.27212851090592044, |
|
"learning_rate": 4.95e-06, |
|
"loss": 0.6804, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08768564695566394, |
|
"grad_norm": 0.29348118762199116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.669, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08856250342522058, |
|
"grad_norm": 0.30678288402779474, |
|
"learning_rate": 4.999998880733363e-06, |
|
"loss": 0.6631, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08943935989477722, |
|
"grad_norm": 0.3011120934546324, |
|
"learning_rate": 4.999995522934454e-06, |
|
"loss": 0.679, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09031621636433386, |
|
"grad_norm": 0.31706623056013666, |
|
"learning_rate": 4.9999899266062804e-06, |
|
"loss": 0.6723, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0911930728338905, |
|
"grad_norm": 0.3120471729111099, |
|
"learning_rate": 4.999982091753851e-06, |
|
"loss": 0.6613, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09206992930344714, |
|
"grad_norm": 0.2905613969012575, |
|
"learning_rate": 4.999972018384183e-06, |
|
"loss": 0.6611, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09294678577300378, |
|
"grad_norm": 0.28925318733211003, |
|
"learning_rate": 4.999959706506297e-06, |
|
"loss": 0.6695, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09382364224256041, |
|
"grad_norm": 0.28085987028825943, |
|
"learning_rate": 4.999945156131215e-06, |
|
"loss": 0.6502, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09470049871211705, |
|
"grad_norm": 0.30971852568333075, |
|
"learning_rate": 4.9999283672719665e-06, |
|
"loss": 0.672, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0955773551816737, |
|
"grad_norm": 0.32363303577963826, |
|
"learning_rate": 4.999909339943585e-06, |
|
"loss": 0.673, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09645421165123035, |
|
"grad_norm": 0.29549042512555623, |
|
"learning_rate": 4.999888074163108e-06, |
|
"loss": 0.6591, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09733106812078698, |
|
"grad_norm": 0.33514032815726946, |
|
"learning_rate": 4.999864569949576e-06, |
|
"loss": 0.6673, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09820792459034362, |
|
"grad_norm": 0.3092438114721304, |
|
"learning_rate": 4.999838827324036e-06, |
|
"loss": 0.6641, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09908478105990026, |
|
"grad_norm": 0.35403209993563217, |
|
"learning_rate": 4.999810846309539e-06, |
|
"loss": 0.6597, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0999616375294569, |
|
"grad_norm": 0.2964896689419525, |
|
"learning_rate": 4.999780626931136e-06, |
|
"loss": 0.67, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10083849399901354, |
|
"grad_norm": 0.3484706075226941, |
|
"learning_rate": 4.999748169215891e-06, |
|
"loss": 0.6745, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10171535046857018, |
|
"grad_norm": 0.33505074735981694, |
|
"learning_rate": 4.999713473192863e-06, |
|
"loss": 0.6591, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10259220693812682, |
|
"grad_norm": 0.27082614750107925, |
|
"learning_rate": 4.999676538893121e-06, |
|
"loss": 0.6621, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10346906340768346, |
|
"grad_norm": 0.3506965847465109, |
|
"learning_rate": 4.999637366349736e-06, |
|
"loss": 0.6733, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1043459198772401, |
|
"grad_norm": 0.27422374937685745, |
|
"learning_rate": 4.999595955597784e-06, |
|
"loss": 0.655, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10522277634679673, |
|
"grad_norm": 0.33620430443399, |
|
"learning_rate": 4.999552306674345e-06, |
|
"loss": 0.6755, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10609963281635337, |
|
"grad_norm": 0.2837804889330797, |
|
"learning_rate": 4.999506419618502e-06, |
|
"loss": 0.6579, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10697648928591001, |
|
"grad_norm": 0.37952040871876175, |
|
"learning_rate": 4.999458294471342e-06, |
|
"loss": 0.6692, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10785334575546665, |
|
"grad_norm": 0.2690864525050558, |
|
"learning_rate": 4.99940793127596e-06, |
|
"loss": 0.6494, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10873020222502329, |
|
"grad_norm": 0.3635002166658454, |
|
"learning_rate": 4.999355330077449e-06, |
|
"loss": 0.6611, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10960705869457993, |
|
"grad_norm": 0.29302462194523843, |
|
"learning_rate": 4.999300490922911e-06, |
|
"loss": 0.6526, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11048391516413657, |
|
"grad_norm": 0.3058787861740299, |
|
"learning_rate": 4.999243413861447e-06, |
|
"loss": 0.659, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1113607716336932, |
|
"grad_norm": 0.332548080761125, |
|
"learning_rate": 4.9991840989441665e-06, |
|
"loss": 0.6659, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11223762810324985, |
|
"grad_norm": 0.29432766212441813, |
|
"learning_rate": 4.999122546224181e-06, |
|
"loss": 0.6447, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11311448457280648, |
|
"grad_norm": 0.29523416391879537, |
|
"learning_rate": 4.999058755756605e-06, |
|
"loss": 0.6587, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11399134104236312, |
|
"grad_norm": 0.32423165831626255, |
|
"learning_rate": 4.998992727598557e-06, |
|
"loss": 0.6564, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11486819751191976, |
|
"grad_norm": 0.34859884756639065, |
|
"learning_rate": 4.99892446180916e-06, |
|
"loss": 0.653, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1157450539814764, |
|
"grad_norm": 0.30133447855543133, |
|
"learning_rate": 4.99885395844954e-06, |
|
"loss": 0.647, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11662191045103305, |
|
"grad_norm": 0.3600942516700186, |
|
"learning_rate": 4.998781217582827e-06, |
|
"loss": 0.6581, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11749876692058969, |
|
"grad_norm": 0.29960571448156953, |
|
"learning_rate": 4.998706239274153e-06, |
|
"loss": 0.6623, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11837562339014633, |
|
"grad_norm": 0.2992208264370026, |
|
"learning_rate": 4.998629023590656e-06, |
|
"loss": 0.6538, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11925247985970297, |
|
"grad_norm": 0.36522912538035174, |
|
"learning_rate": 4.998549570601475e-06, |
|
"loss": 0.6566, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12012933632925961, |
|
"grad_norm": 0.2988448634710597, |
|
"learning_rate": 4.998467880377754e-06, |
|
"loss": 0.673, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12100619279881625, |
|
"grad_norm": 0.32912250244162505, |
|
"learning_rate": 4.998383952992639e-06, |
|
"loss": 0.6482, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12188304926837289, |
|
"grad_norm": 0.37178534793553225, |
|
"learning_rate": 4.998297788521279e-06, |
|
"loss": 0.6546, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12275990573792953, |
|
"grad_norm": 0.28062782891296695, |
|
"learning_rate": 4.998209387040829e-06, |
|
"loss": 0.6527, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12363676220748616, |
|
"grad_norm": 0.33723394797540485, |
|
"learning_rate": 4.998118748630443e-06, |
|
"loss": 0.6391, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1245136186770428, |
|
"grad_norm": 0.2834572318610097, |
|
"learning_rate": 4.99802587337128e-06, |
|
"loss": 0.6443, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12539047514659943, |
|
"grad_norm": 0.321495289367043, |
|
"learning_rate": 4.997930761346502e-06, |
|
"loss": 0.6507, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.12626733161615608, |
|
"grad_norm": 0.3419910878952078, |
|
"learning_rate": 4.997833412641274e-06, |
|
"loss": 0.6543, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1271441880857127, |
|
"grad_norm": 0.28772221770446305, |
|
"learning_rate": 4.9977338273427625e-06, |
|
"loss": 0.6522, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12802104455526936, |
|
"grad_norm": 0.29706932671928316, |
|
"learning_rate": 4.997632005540139e-06, |
|
"loss": 0.6677, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.128897901024826, |
|
"grad_norm": 0.29918610448467253, |
|
"learning_rate": 4.997527947324573e-06, |
|
"loss": 0.6475, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12977475749438264, |
|
"grad_norm": 0.33103419851925103, |
|
"learning_rate": 4.997421652789243e-06, |
|
"loss": 0.67, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1306516139639393, |
|
"grad_norm": 0.27012500247528487, |
|
"learning_rate": 4.9973131220293255e-06, |
|
"loss": 0.647, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13152847043349591, |
|
"grad_norm": 0.297677443804652, |
|
"learning_rate": 4.9972023551419995e-06, |
|
"loss": 0.6519, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13240532690305257, |
|
"grad_norm": 0.27386600476743567, |
|
"learning_rate": 4.997089352226448e-06, |
|
"loss": 0.6562, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1332821833726092, |
|
"grad_norm": 0.3025435071675535, |
|
"learning_rate": 4.996974113383854e-06, |
|
"loss": 0.6485, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13415903984216584, |
|
"grad_norm": 0.2928572797854547, |
|
"learning_rate": 4.996856638717406e-06, |
|
"loss": 0.641, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.13503589631172247, |
|
"grad_norm": 0.28232417223789874, |
|
"learning_rate": 4.996736928332292e-06, |
|
"loss": 0.6358, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.13591275278127912, |
|
"grad_norm": 0.33877806926878856, |
|
"learning_rate": 4.9966149823357e-06, |
|
"loss": 0.6558, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13678960925083575, |
|
"grad_norm": 0.27274924720742, |
|
"learning_rate": 4.996490800836825e-06, |
|
"loss": 0.6553, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1376664657203924, |
|
"grad_norm": 0.3145522020468823, |
|
"learning_rate": 4.996364383946859e-06, |
|
"loss": 0.6458, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13854332218994903, |
|
"grad_norm": 0.28298098932682264, |
|
"learning_rate": 4.996235731778997e-06, |
|
"loss": 0.6467, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13942017865950568, |
|
"grad_norm": 0.3289393703740858, |
|
"learning_rate": 4.996104844448438e-06, |
|
"loss": 0.6522, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1402970351290623, |
|
"grad_norm": 0.3242491154179804, |
|
"learning_rate": 4.995971722072379e-06, |
|
"loss": 0.6579, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14117389159861896, |
|
"grad_norm": 0.350063023556927, |
|
"learning_rate": 4.995836364770018e-06, |
|
"loss": 0.6639, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14205074806817558, |
|
"grad_norm": 0.26800977502782475, |
|
"learning_rate": 4.995698772662558e-06, |
|
"loss": 0.6564, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14292760453773223, |
|
"grad_norm": 0.37123972908338404, |
|
"learning_rate": 4.9955589458732e-06, |
|
"loss": 0.6521, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.14380446100728886, |
|
"grad_norm": 0.25568101611736427, |
|
"learning_rate": 4.995416884527147e-06, |
|
"loss": 0.6489, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1446813174768455, |
|
"grad_norm": 0.3502739955437778, |
|
"learning_rate": 4.9952725887516015e-06, |
|
"loss": 0.6389, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14555817394640214, |
|
"grad_norm": 0.2695951493086468, |
|
"learning_rate": 4.99512605867577e-06, |
|
"loss": 0.6409, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1464350304159588, |
|
"grad_norm": 0.33224546665642934, |
|
"learning_rate": 4.994977294430856e-06, |
|
"loss": 0.6478, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.14731188688551541, |
|
"grad_norm": 0.26336591640433304, |
|
"learning_rate": 4.994826296150064e-06, |
|
"loss": 0.6416, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14818874335507207, |
|
"grad_norm": 0.3158628283831438, |
|
"learning_rate": 4.9946730639686025e-06, |
|
"loss": 0.6397, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.14906559982462872, |
|
"grad_norm": 0.29572803602407627, |
|
"learning_rate": 4.9945175980236745e-06, |
|
"loss": 0.6356, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14994245629418534, |
|
"grad_norm": 0.3344536076519792, |
|
"learning_rate": 4.99435989845449e-06, |
|
"loss": 0.6494, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.150819312763742, |
|
"grad_norm": 0.2811402499936693, |
|
"learning_rate": 4.994199965402252e-06, |
|
"loss": 0.6472, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.15169616923329862, |
|
"grad_norm": 0.30351530565920815, |
|
"learning_rate": 4.994037799010168e-06, |
|
"loss": 0.6514, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15257302570285527, |
|
"grad_norm": 0.2667020904201129, |
|
"learning_rate": 4.993873399423445e-06, |
|
"loss": 0.642, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1534498821724119, |
|
"grad_norm": 0.3062654941965369, |
|
"learning_rate": 4.993706766789287e-06, |
|
"loss": 0.6398, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15432673864196855, |
|
"grad_norm": 0.28228507467929365, |
|
"learning_rate": 4.993537901256898e-06, |
|
"loss": 0.6446, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15520359511152518, |
|
"grad_norm": 0.3157908119401443, |
|
"learning_rate": 4.993366802977486e-06, |
|
"loss": 0.645, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.15608045158108183, |
|
"grad_norm": 0.29612114085869035, |
|
"learning_rate": 4.993193472104253e-06, |
|
"loss": 0.6379, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.15695730805063846, |
|
"grad_norm": 0.31715005105530436, |
|
"learning_rate": 4.9930179087924e-06, |
|
"loss": 0.6446, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1578341645201951, |
|
"grad_norm": 0.3010974405602859, |
|
"learning_rate": 4.992840113199131e-06, |
|
"loss": 0.6273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15871102098975173, |
|
"grad_norm": 0.3097310667014726, |
|
"learning_rate": 4.992660085483645e-06, |
|
"loss": 0.6477, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.15958787745930839, |
|
"grad_norm": 0.25428924204211556, |
|
"learning_rate": 4.992477825807142e-06, |
|
"loss": 0.6562, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.160464733928865, |
|
"grad_norm": 0.30870425916577926, |
|
"learning_rate": 4.992293334332821e-06, |
|
"loss": 0.6528, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.16134159039842166, |
|
"grad_norm": 0.2915653234864446, |
|
"learning_rate": 4.992106611225875e-06, |
|
"loss": 0.6491, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1622184468679783, |
|
"grad_norm": 0.3032380988277513, |
|
"learning_rate": 4.991917656653501e-06, |
|
"loss": 0.6523, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16309530333753494, |
|
"grad_norm": 0.2986663700583823, |
|
"learning_rate": 4.991726470784891e-06, |
|
"loss": 0.6333, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16397215980709157, |
|
"grad_norm": 0.28321065505069615, |
|
"learning_rate": 4.9915330537912346e-06, |
|
"loss": 0.6411, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16484901627664822, |
|
"grad_norm": 0.358610834369166, |
|
"learning_rate": 4.99133740584572e-06, |
|
"loss": 0.6404, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16572587274620484, |
|
"grad_norm": 0.30976208589225795, |
|
"learning_rate": 4.991139527123534e-06, |
|
"loss": 0.6405, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1666027292157615, |
|
"grad_norm": 0.34149502314365515, |
|
"learning_rate": 4.990939417801859e-06, |
|
"loss": 0.6384, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16747958568531812, |
|
"grad_norm": 0.2959951500432587, |
|
"learning_rate": 4.9907370780598754e-06, |
|
"loss": 0.6469, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.16835644215487477, |
|
"grad_norm": 0.3302476980977895, |
|
"learning_rate": 4.990532508078761e-06, |
|
"loss": 0.6359, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1692332986244314, |
|
"grad_norm": 0.3944297035939378, |
|
"learning_rate": 4.990325708041691e-06, |
|
"loss": 0.6502, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17011015509398805, |
|
"grad_norm": 0.360231124267091, |
|
"learning_rate": 4.990116678133836e-06, |
|
"loss": 0.6424, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1709870115635447, |
|
"grad_norm": 0.33832741778437936, |
|
"learning_rate": 4.989905418542366e-06, |
|
"loss": 0.6352, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17186386803310133, |
|
"grad_norm": 0.36238295597291414, |
|
"learning_rate": 4.989691929456443e-06, |
|
"loss": 0.6499, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17274072450265798, |
|
"grad_norm": 0.32684488652867627, |
|
"learning_rate": 4.98947621106723e-06, |
|
"loss": 0.6475, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1736175809722146, |
|
"grad_norm": 0.2757346118610075, |
|
"learning_rate": 4.989258263567884e-06, |
|
"loss": 0.6355, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17449443744177126, |
|
"grad_norm": 0.29755713041423115, |
|
"learning_rate": 4.989038087153556e-06, |
|
"loss": 0.6336, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.17537129391132789, |
|
"grad_norm": 0.29151765698243737, |
|
"learning_rate": 4.988815682021398e-06, |
|
"loss": 0.6471, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17624815038088454, |
|
"grad_norm": 0.28111823253643253, |
|
"learning_rate": 4.988591048370552e-06, |
|
"loss": 0.6407, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.17712500685044116, |
|
"grad_norm": 0.2656165957748681, |
|
"learning_rate": 4.988364186402159e-06, |
|
"loss": 0.6326, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.17800186331999782, |
|
"grad_norm": 0.3028986715129606, |
|
"learning_rate": 4.988135096319355e-06, |
|
"loss": 0.6348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.17887871978955444, |
|
"grad_norm": 0.29924585956112065, |
|
"learning_rate": 4.987903778327269e-06, |
|
"loss": 0.6488, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1797555762591111, |
|
"grad_norm": 0.2747438588784908, |
|
"learning_rate": 4.987670232633027e-06, |
|
"loss": 0.6353, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18063243272866772, |
|
"grad_norm": 0.30887265845064044, |
|
"learning_rate": 4.987434459445748e-06, |
|
"loss": 0.6428, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.18150928919822437, |
|
"grad_norm": 0.3193061834187564, |
|
"learning_rate": 4.987196458976548e-06, |
|
"loss": 0.6467, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.182386145667781, |
|
"grad_norm": 0.2769424032566695, |
|
"learning_rate": 4.9869562314385335e-06, |
|
"loss": 0.6407, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.18326300213733765, |
|
"grad_norm": 0.3406015148633883, |
|
"learning_rate": 4.986713777046809e-06, |
|
"loss": 0.6443, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.18413985860689427, |
|
"grad_norm": 0.271878066659463, |
|
"learning_rate": 4.986469096018472e-06, |
|
"loss": 0.6328, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18501671507645093, |
|
"grad_norm": 0.2987491049335003, |
|
"learning_rate": 4.9862221885726115e-06, |
|
"loss": 0.6478, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.18589357154600755, |
|
"grad_norm": 0.3087618217189243, |
|
"learning_rate": 4.985973054930313e-06, |
|
"loss": 0.6363, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1867704280155642, |
|
"grad_norm": 0.28612704652497223, |
|
"learning_rate": 4.985721695314653e-06, |
|
"loss": 0.6409, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.18764728448512083, |
|
"grad_norm": 0.26033127989473615, |
|
"learning_rate": 4.985468109950704e-06, |
|
"loss": 0.6495, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.18852414095467748, |
|
"grad_norm": 0.29345494621139656, |
|
"learning_rate": 4.985212299065528e-06, |
|
"loss": 0.648, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1894009974242341, |
|
"grad_norm": 0.30811406203792147, |
|
"learning_rate": 4.984954262888182e-06, |
|
"loss": 0.639, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.19027785389379076, |
|
"grad_norm": 0.3312828084167346, |
|
"learning_rate": 4.9846940016497146e-06, |
|
"loss": 0.6403, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1911547103633474, |
|
"grad_norm": 0.29106752415257064, |
|
"learning_rate": 4.984431515583169e-06, |
|
"loss": 0.6457, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19203156683290404, |
|
"grad_norm": 0.2950307203873666, |
|
"learning_rate": 4.984166804923576e-06, |
|
"loss": 0.6366, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1929084233024607, |
|
"grad_norm": 0.33001978484003053, |
|
"learning_rate": 4.983899869907963e-06, |
|
"loss": 0.6519, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19378527977201732, |
|
"grad_norm": 0.25712182858786903, |
|
"learning_rate": 4.983630710775346e-06, |
|
"loss": 0.6302, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.19466213624157397, |
|
"grad_norm": 0.33700258932320354, |
|
"learning_rate": 4.983359327766735e-06, |
|
"loss": 0.6382, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1955389927111306, |
|
"grad_norm": 0.3195952299259763, |
|
"learning_rate": 4.983085721125128e-06, |
|
"loss": 0.6408, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.19641584918068725, |
|
"grad_norm": 0.2820582636542398, |
|
"learning_rate": 4.982809891095519e-06, |
|
"loss": 0.6196, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.19729270565024387, |
|
"grad_norm": 0.30343326038998625, |
|
"learning_rate": 4.982531837924887e-06, |
|
"loss": 0.6361, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19816956211980052, |
|
"grad_norm": 0.2724213298701267, |
|
"learning_rate": 4.9822515618622055e-06, |
|
"loss": 0.6455, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.19904641858935715, |
|
"grad_norm": 0.28433275446155476, |
|
"learning_rate": 4.9819690631584375e-06, |
|
"loss": 0.6329, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.1999232750589138, |
|
"grad_norm": 0.2641523923467397, |
|
"learning_rate": 4.981684342066536e-06, |
|
"loss": 0.6301, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.20080013152847043, |
|
"grad_norm": 0.29243768749633176, |
|
"learning_rate": 4.9813973988414454e-06, |
|
"loss": 0.6369, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.20167698799802708, |
|
"grad_norm": 0.27139535071517695, |
|
"learning_rate": 4.981108233740096e-06, |
|
"loss": 0.6279, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2025538444675837, |
|
"grad_norm": 0.27525475223350887, |
|
"learning_rate": 4.980816847021412e-06, |
|
"loss": 0.6429, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.20343070093714036, |
|
"grad_norm": 0.3427701449667448, |
|
"learning_rate": 4.980523238946304e-06, |
|
"loss": 0.6438, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.20430755740669698, |
|
"grad_norm": 0.2574596630900604, |
|
"learning_rate": 4.980227409777673e-06, |
|
"loss": 0.6278, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.20518441387625364, |
|
"grad_norm": 0.3069435432493287, |
|
"learning_rate": 4.9799293597804086e-06, |
|
"loss": 0.645, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.20606127034581026, |
|
"grad_norm": 0.2861360169316533, |
|
"learning_rate": 4.979629089221387e-06, |
|
"loss": 0.646, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2069381268153669, |
|
"grad_norm": 0.258606470239814, |
|
"learning_rate": 4.9793265983694775e-06, |
|
"loss": 0.638, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.20781498328492354, |
|
"grad_norm": 0.2852233202848665, |
|
"learning_rate": 4.9790218874955325e-06, |
|
"loss": 0.6233, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2086918397544802, |
|
"grad_norm": 0.27593128237727194, |
|
"learning_rate": 4.978714956872394e-06, |
|
"loss": 0.64, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.20956869622403682, |
|
"grad_norm": 0.2721892419938629, |
|
"learning_rate": 4.978405806774892e-06, |
|
"loss": 0.6242, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.21044555269359347, |
|
"grad_norm": 0.26477694173686633, |
|
"learning_rate": 4.978094437479843e-06, |
|
"loss": 0.6409, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2113224091631501, |
|
"grad_norm": 0.29511740452877416, |
|
"learning_rate": 4.977780849266054e-06, |
|
"loss": 0.6397, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.21219926563270675, |
|
"grad_norm": 0.3137075106480887, |
|
"learning_rate": 4.977465042414314e-06, |
|
"loss": 0.6185, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2130761221022634, |
|
"grad_norm": 0.2841757272525764, |
|
"learning_rate": 4.9771470172073985e-06, |
|
"loss": 0.6394, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.21395297857182002, |
|
"grad_norm": 0.289636229771129, |
|
"learning_rate": 4.976826773930076e-06, |
|
"loss": 0.6314, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.21482983504137668, |
|
"grad_norm": 0.30163996035868273, |
|
"learning_rate": 4.976504312869093e-06, |
|
"loss": 0.6347, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2157066915109333, |
|
"grad_norm": 0.261372963985366, |
|
"learning_rate": 4.976179634313187e-06, |
|
"loss": 0.6378, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.21658354798048995, |
|
"grad_norm": 0.3277256326536918, |
|
"learning_rate": 4.97585273855308e-06, |
|
"loss": 0.6326, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.21746040445004658, |
|
"grad_norm": 0.2609300415027874, |
|
"learning_rate": 4.975523625881478e-06, |
|
"loss": 0.643, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.21833726091960323, |
|
"grad_norm": 0.360435554160976, |
|
"learning_rate": 4.975192296593072e-06, |
|
"loss": 0.6301, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.21921411738915986, |
|
"grad_norm": 0.33545569496984357, |
|
"learning_rate": 4.97485875098454e-06, |
|
"loss": 0.6263, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2200909738587165, |
|
"grad_norm": 0.3109257543138659, |
|
"learning_rate": 4.974522989354544e-06, |
|
"loss": 0.6409, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.22096783032827313, |
|
"grad_norm": 0.324992218124581, |
|
"learning_rate": 4.974185012003727e-06, |
|
"loss": 0.634, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2218446867978298, |
|
"grad_norm": 0.32486130027399085, |
|
"learning_rate": 4.97384481923472e-06, |
|
"loss": 0.6164, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2227215432673864, |
|
"grad_norm": 0.37258515700556377, |
|
"learning_rate": 4.973502411352136e-06, |
|
"loss": 0.6387, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.22359839973694307, |
|
"grad_norm": 0.29043553996012594, |
|
"learning_rate": 4.97315778866257e-06, |
|
"loss": 0.6287, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2244752562064997, |
|
"grad_norm": 0.36257038619483317, |
|
"learning_rate": 4.972810951474605e-06, |
|
"loss": 0.6343, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.22535211267605634, |
|
"grad_norm": 0.2772793728031826, |
|
"learning_rate": 4.972461900098801e-06, |
|
"loss": 0.6289, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.22622896914561297, |
|
"grad_norm": 0.35920004083908574, |
|
"learning_rate": 4.972110634847703e-06, |
|
"loss": 0.6532, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.22710582561516962, |
|
"grad_norm": 0.29471007707943336, |
|
"learning_rate": 4.97175715603584e-06, |
|
"loss": 0.6431, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.22798268208472625, |
|
"grad_norm": 0.3052965075835166, |
|
"learning_rate": 4.971401463979722e-06, |
|
"loss": 0.6373, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2288595385542829, |
|
"grad_norm": 0.27702925326859024, |
|
"learning_rate": 4.971043558997839e-06, |
|
"loss": 0.6254, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.22973639502383952, |
|
"grad_norm": 0.30905022457424325, |
|
"learning_rate": 4.9706834414106645e-06, |
|
"loss": 0.6377, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.23061325149339618, |
|
"grad_norm": 0.2820956276882666, |
|
"learning_rate": 4.970321111540652e-06, |
|
"loss": 0.6303, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2314901079629528, |
|
"grad_norm": 0.3394900289735489, |
|
"learning_rate": 4.969956569712238e-06, |
|
"loss": 0.6394, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.23236696443250945, |
|
"grad_norm": 0.26647926556067275, |
|
"learning_rate": 4.969589816251837e-06, |
|
"loss": 0.6202, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2332438209020661, |
|
"grad_norm": 0.3281231898594553, |
|
"learning_rate": 4.9692208514878445e-06, |
|
"loss": 0.6343, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.23412067737162273, |
|
"grad_norm": 0.32675488207496506, |
|
"learning_rate": 4.968849675750638e-06, |
|
"loss": 0.6106, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.23499753384117938, |
|
"grad_norm": 0.28838375524590465, |
|
"learning_rate": 4.9684762893725715e-06, |
|
"loss": 0.6191, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.235874390310736, |
|
"grad_norm": 0.3568027126734991, |
|
"learning_rate": 4.968100692687981e-06, |
|
"loss": 0.6492, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.23675124678029266, |
|
"grad_norm": 0.28443576918161984, |
|
"learning_rate": 4.967722886033181e-06, |
|
"loss": 0.6332, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2376281032498493, |
|
"grad_norm": 0.34347891151295074, |
|
"learning_rate": 4.967342869746463e-06, |
|
"loss": 0.6302, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.23850495971940594, |
|
"grad_norm": 0.26856199334324765, |
|
"learning_rate": 4.9669606441681005e-06, |
|
"loss": 0.6253, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.23938181618896257, |
|
"grad_norm": 0.28792821400673596, |
|
"learning_rate": 4.966576209640344e-06, |
|
"loss": 0.617, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.24025867265851922, |
|
"grad_norm": 0.2749481611356667, |
|
"learning_rate": 4.966189566507418e-06, |
|
"loss": 0.6386, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.24113552912807584, |
|
"grad_norm": 0.2499995559979677, |
|
"learning_rate": 4.965800715115531e-06, |
|
"loss": 0.6281, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2420123855976325, |
|
"grad_norm": 0.2802197876098476, |
|
"learning_rate": 4.965409655812865e-06, |
|
"loss": 0.6356, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.24288924206718912, |
|
"grad_norm": 0.27112050232805884, |
|
"learning_rate": 4.965016388949579e-06, |
|
"loss": 0.6366, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.24376609853674577, |
|
"grad_norm": 0.28745747065199806, |
|
"learning_rate": 4.96462091487781e-06, |
|
"loss": 0.6245, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2446429550063024, |
|
"grad_norm": 0.29635776688822807, |
|
"learning_rate": 4.96422323395167e-06, |
|
"loss": 0.6413, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.24551981147585905, |
|
"grad_norm": 0.3376283192201481, |
|
"learning_rate": 4.963823346527249e-06, |
|
"loss": 0.6322, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24639666794541568, |
|
"grad_norm": 0.30520044326595835, |
|
"learning_rate": 4.96342125296261e-06, |
|
"loss": 0.6173, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.24727352441497233, |
|
"grad_norm": 0.34476437566601653, |
|
"learning_rate": 4.963016953617794e-06, |
|
"loss": 0.6172, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.24815038088452895, |
|
"grad_norm": 0.2611205789369605, |
|
"learning_rate": 4.962610448854816e-06, |
|
"loss": 0.6246, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2490272373540856, |
|
"grad_norm": 0.3294938430549001, |
|
"learning_rate": 4.962201739037665e-06, |
|
"loss": 0.632, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.24990409382364223, |
|
"grad_norm": 0.2716869569081184, |
|
"learning_rate": 4.961790824532306e-06, |
|
"loss": 0.6285, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.25078095029319886, |
|
"grad_norm": 0.33415021484488, |
|
"learning_rate": 4.961377705706677e-06, |
|
"loss": 0.6295, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2516578067627555, |
|
"grad_norm": 0.3077857421614378, |
|
"learning_rate": 4.960962382930691e-06, |
|
"loss": 0.6273, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.25253466323231216, |
|
"grad_norm": 0.3027918805177667, |
|
"learning_rate": 4.960544856576232e-06, |
|
"loss": 0.629, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2534115197018688, |
|
"grad_norm": 0.2916258020649895, |
|
"learning_rate": 4.960125127017159e-06, |
|
"loss": 0.6427, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2542883761714254, |
|
"grad_norm": 0.3152484231550671, |
|
"learning_rate": 4.959703194629304e-06, |
|
"loss": 0.6348, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.25516523264098206, |
|
"grad_norm": 0.32915709407999866, |
|
"learning_rate": 4.959279059790471e-06, |
|
"loss": 0.632, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2560420891105387, |
|
"grad_norm": 0.2817567268029023, |
|
"learning_rate": 4.958852722880435e-06, |
|
"loss": 0.6112, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.25691894558009537, |
|
"grad_norm": 0.3538236182060425, |
|
"learning_rate": 4.958424184280946e-06, |
|
"loss": 0.6241, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.257795802049652, |
|
"grad_norm": 0.2864183700965389, |
|
"learning_rate": 4.957993444375719e-06, |
|
"loss": 0.6277, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2586726585192086, |
|
"grad_norm": 0.33515303575483923, |
|
"learning_rate": 4.95756050355045e-06, |
|
"loss": 0.6277, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2595495149887653, |
|
"grad_norm": 0.31975746198582533, |
|
"learning_rate": 4.957125362192794e-06, |
|
"loss": 0.6114, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2604263714583219, |
|
"grad_norm": 0.34329553758734277, |
|
"learning_rate": 4.956688020692386e-06, |
|
"loss": 0.6457, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2613032279278786, |
|
"grad_norm": 0.3122307785419701, |
|
"learning_rate": 4.956248479440827e-06, |
|
"loss": 0.6272, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2621800843974352, |
|
"grad_norm": 0.3126439049869492, |
|
"learning_rate": 4.955806738831687e-06, |
|
"loss": 0.634, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.26305694086699183, |
|
"grad_norm": 0.30725526373905826, |
|
"learning_rate": 4.955362799260507e-06, |
|
"loss": 0.6269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2639337973365485, |
|
"grad_norm": 0.2952615284346605, |
|
"learning_rate": 4.954916661124797e-06, |
|
"loss": 0.6129, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.26481065380610513, |
|
"grad_norm": 0.3284069744839045, |
|
"learning_rate": 4.954468324824035e-06, |
|
"loss": 0.613, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.26568751027566173, |
|
"grad_norm": 0.34051928196991404, |
|
"learning_rate": 4.954017790759666e-06, |
|
"loss": 0.6192, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2665643667452184, |
|
"grad_norm": 0.30608255552211977, |
|
"learning_rate": 4.953565059335104e-06, |
|
"loss": 0.6244, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.26744122321477504, |
|
"grad_norm": 0.31501722301988566, |
|
"learning_rate": 4.953110130955733e-06, |
|
"loss": 0.6236, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2683180796843317, |
|
"grad_norm": 0.2978345978834651, |
|
"learning_rate": 4.9526530060289e-06, |
|
"loss": 0.6254, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2691949361538883, |
|
"grad_norm": 0.2935986604058687, |
|
"learning_rate": 4.952193684963922e-06, |
|
"loss": 0.6113, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.27007179262344494, |
|
"grad_norm": 0.294670736028252, |
|
"learning_rate": 4.95173216817208e-06, |
|
"loss": 0.6335, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2709486490930016, |
|
"grad_norm": 0.2746280487759909, |
|
"learning_rate": 4.951268456066623e-06, |
|
"loss": 0.6211, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.27182550556255825, |
|
"grad_norm": 0.2823209312944346, |
|
"learning_rate": 4.950802549062764e-06, |
|
"loss": 0.621, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.27270236203211484, |
|
"grad_norm": 0.2811005060766513, |
|
"learning_rate": 4.950334447577685e-06, |
|
"loss": 0.6291, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2735792185016715, |
|
"grad_norm": 0.31377780747479117, |
|
"learning_rate": 4.9498641520305264e-06, |
|
"loss": 0.6308, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.27445607497122815, |
|
"grad_norm": 0.263859895152384, |
|
"learning_rate": 4.949391662842401e-06, |
|
"loss": 0.6238, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2753329314407848, |
|
"grad_norm": 0.3124591272767995, |
|
"learning_rate": 4.948916980436379e-06, |
|
"loss": 0.6254, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.27620978791034145, |
|
"grad_norm": 0.2762091249470148, |
|
"learning_rate": 4.948440105237499e-06, |
|
"loss": 0.6297, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.27708664437989805, |
|
"grad_norm": 0.30510467983773004, |
|
"learning_rate": 4.947961037672761e-06, |
|
"loss": 0.6301, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2779635008494547, |
|
"grad_norm": 0.2894218681866538, |
|
"learning_rate": 4.947479778171127e-06, |
|
"loss": 0.6215, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.27884035731901136, |
|
"grad_norm": 0.278604444379188, |
|
"learning_rate": 4.946996327163526e-06, |
|
"loss": 0.6193, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.279717213788568, |
|
"grad_norm": 0.29226196825962947, |
|
"learning_rate": 4.946510685082844e-06, |
|
"loss": 0.6205, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2805940702581246, |
|
"grad_norm": 0.2956824922950759, |
|
"learning_rate": 4.946022852363932e-06, |
|
"loss": 0.6238, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.28147092672768126, |
|
"grad_norm": 0.28796938907697983, |
|
"learning_rate": 4.945532829443604e-06, |
|
"loss": 0.6176, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2823477831972379, |
|
"grad_norm": 0.2688847498978228, |
|
"learning_rate": 4.945040616760629e-06, |
|
"loss": 0.6178, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.28322463966679456, |
|
"grad_norm": 0.3167327299209847, |
|
"learning_rate": 4.944546214755744e-06, |
|
"loss": 0.6315, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.28410149613635116, |
|
"grad_norm": 0.28346482132020456, |
|
"learning_rate": 4.9440496238716415e-06, |
|
"loss": 0.6281, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2849783526059078, |
|
"grad_norm": 0.2862108698161924, |
|
"learning_rate": 4.943550844552978e-06, |
|
"loss": 0.6445, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.28585520907546447, |
|
"grad_norm": 0.3168994194030117, |
|
"learning_rate": 4.943049877246363e-06, |
|
"loss": 0.6336, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2867320655450211, |
|
"grad_norm": 0.3098419113094991, |
|
"learning_rate": 4.942546722400373e-06, |
|
"loss": 0.6194, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2876089220145777, |
|
"grad_norm": 0.3076330226750193, |
|
"learning_rate": 4.942041380465539e-06, |
|
"loss": 0.6332, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.28848577848413437, |
|
"grad_norm": 0.3073675940253473, |
|
"learning_rate": 4.941533851894349e-06, |
|
"loss": 0.6329, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.289362634953691, |
|
"grad_norm": 0.27407015238515836, |
|
"learning_rate": 4.9410241371412525e-06, |
|
"loss": 0.6292, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2902394914232477, |
|
"grad_norm": 0.3233677059379673, |
|
"learning_rate": 4.9405122366626545e-06, |
|
"loss": 0.6407, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2911163478928043, |
|
"grad_norm": 0.3056326849325438, |
|
"learning_rate": 4.939998150916917e-06, |
|
"loss": 0.6314, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2919932043623609, |
|
"grad_norm": 0.3140138519054107, |
|
"learning_rate": 4.93948188036436e-06, |
|
"loss": 0.6583, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2928700608319176, |
|
"grad_norm": 0.2967689552064628, |
|
"learning_rate": 4.938963425467258e-06, |
|
"loss": 0.6349, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.29374691730147423, |
|
"grad_norm": 0.35320572702474673, |
|
"learning_rate": 4.938442786689843e-06, |
|
"loss": 0.6248, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.29462377377103083, |
|
"grad_norm": 0.2958836632865014, |
|
"learning_rate": 4.9379199644983025e-06, |
|
"loss": 0.6255, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2955006302405875, |
|
"grad_norm": 0.3054952399371344, |
|
"learning_rate": 4.937394959360777e-06, |
|
"loss": 0.6119, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.29637748671014413, |
|
"grad_norm": 0.34308383177638463, |
|
"learning_rate": 4.9368677717473645e-06, |
|
"loss": 0.6468, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2972543431797008, |
|
"grad_norm": 0.2648620374237178, |
|
"learning_rate": 4.936338402130115e-06, |
|
"loss": 0.6203, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.29813119964925744, |
|
"grad_norm": 0.2976099930186866, |
|
"learning_rate": 4.935806850983034e-06, |
|
"loss": 0.6348, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.29900805611881404, |
|
"grad_norm": 0.285144357181017, |
|
"learning_rate": 4.935273118782078e-06, |
|
"loss": 0.6115, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2998849125883707, |
|
"grad_norm": 0.3079688238524965, |
|
"learning_rate": 4.934737206005159e-06, |
|
"loss": 0.6254, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.30076176905792734, |
|
"grad_norm": 0.27719094781494596, |
|
"learning_rate": 4.93419911313214e-06, |
|
"loss": 0.6386, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.301638625527484, |
|
"grad_norm": 0.29796636665366355, |
|
"learning_rate": 4.933658840644837e-06, |
|
"loss": 0.6268, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3025154819970406, |
|
"grad_norm": 0.27509893042636935, |
|
"learning_rate": 4.933116389027017e-06, |
|
"loss": 0.621, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.30339233846659724, |
|
"grad_norm": 0.31224342373584874, |
|
"learning_rate": 4.932571758764398e-06, |
|
"loss": 0.6312, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3042691949361539, |
|
"grad_norm": 0.2689144896057607, |
|
"learning_rate": 4.93202495034465e-06, |
|
"loss": 0.6115, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.30514605140571055, |
|
"grad_norm": 0.2558266510993566, |
|
"learning_rate": 4.931475964257391e-06, |
|
"loss": 0.6245, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.30602290787526715, |
|
"grad_norm": 0.25500762407211314, |
|
"learning_rate": 4.930924800994192e-06, |
|
"loss": 0.6091, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3068997643448238, |
|
"grad_norm": 0.2717131638453367, |
|
"learning_rate": 4.9303714610485705e-06, |
|
"loss": 0.6281, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30777662081438045, |
|
"grad_norm": 0.2729400616989181, |
|
"learning_rate": 4.929815944915997e-06, |
|
"loss": 0.6083, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3086534772839371, |
|
"grad_norm": 0.26000631857019024, |
|
"learning_rate": 4.929258253093885e-06, |
|
"loss": 0.6198, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3095303337534937, |
|
"grad_norm": 0.2740884453189882, |
|
"learning_rate": 4.9286983860816e-06, |
|
"loss": 0.6338, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.31040719022305036, |
|
"grad_norm": 0.27150990388252366, |
|
"learning_rate": 4.928136344380457e-06, |
|
"loss": 0.6162, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 0.26286571771385, |
|
"learning_rate": 4.9275721284937115e-06, |
|
"loss": 0.629, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.31216090316216366, |
|
"grad_norm": 0.27510252961865267, |
|
"learning_rate": 4.9270057389265734e-06, |
|
"loss": 0.633, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.31303775963172026, |
|
"grad_norm": 0.2825214790660817, |
|
"learning_rate": 4.926437176186193e-06, |
|
"loss": 0.6263, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3139146161012769, |
|
"grad_norm": 0.29292375908331497, |
|
"learning_rate": 4.92586644078167e-06, |
|
"loss": 0.6313, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.31479147257083356, |
|
"grad_norm": 0.2760563004495057, |
|
"learning_rate": 4.925293533224049e-06, |
|
"loss": 0.6174, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3156683290403902, |
|
"grad_norm": 0.29078508943452525, |
|
"learning_rate": 4.924718454026318e-06, |
|
"loss": 0.6156, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3165451855099468, |
|
"grad_norm": 0.2878769173523044, |
|
"learning_rate": 4.924141203703412e-06, |
|
"loss": 0.6047, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.31742204197950347, |
|
"grad_norm": 0.27485843884417593, |
|
"learning_rate": 4.923561782772206e-06, |
|
"loss": 0.6293, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3182988984490601, |
|
"grad_norm": 0.2865164028316351, |
|
"learning_rate": 4.922980191751524e-06, |
|
"loss": 0.6269, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.31917575491861677, |
|
"grad_norm": 0.27991173694279825, |
|
"learning_rate": 4.922396431162129e-06, |
|
"loss": 0.6143, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.3200526113881734, |
|
"grad_norm": 0.279639353480309, |
|
"learning_rate": 4.921810501526728e-06, |
|
"loss": 0.635, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.32092946785773, |
|
"grad_norm": 0.2830142803081013, |
|
"learning_rate": 4.921222403369971e-06, |
|
"loss": 0.6157, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3218063243272867, |
|
"grad_norm": 0.2684155306717856, |
|
"learning_rate": 4.920632137218447e-06, |
|
"loss": 0.6294, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3226831807968433, |
|
"grad_norm": 0.2983455576981931, |
|
"learning_rate": 4.920039703600691e-06, |
|
"loss": 0.624, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3235600372664, |
|
"grad_norm": 0.2948947231333358, |
|
"learning_rate": 4.9194451030471735e-06, |
|
"loss": 0.6102, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3244368937359566, |
|
"grad_norm": 0.2826890911442374, |
|
"learning_rate": 4.918848336090309e-06, |
|
"loss": 0.6236, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.32531375020551323, |
|
"grad_norm": 0.32269493597939386, |
|
"learning_rate": 4.91824940326445e-06, |
|
"loss": 0.6139, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3261906066750699, |
|
"grad_norm": 0.2734983777513044, |
|
"learning_rate": 4.91764830510589e-06, |
|
"loss": 0.6166, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.32706746314462654, |
|
"grad_norm": 0.36983262498880637, |
|
"learning_rate": 4.917045042152858e-06, |
|
"loss": 0.6186, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.32794431961418313, |
|
"grad_norm": 0.2751996219950251, |
|
"learning_rate": 4.916439614945527e-06, |
|
"loss": 0.6412, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3288211760837398, |
|
"grad_norm": 0.319865198714037, |
|
"learning_rate": 4.915832024026002e-06, |
|
"loss": 0.627, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.32969803255329644, |
|
"grad_norm": 0.29823421688781576, |
|
"learning_rate": 4.915222269938328e-06, |
|
"loss": 0.6181, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3305748890228531, |
|
"grad_norm": 0.27335542421500575, |
|
"learning_rate": 4.914610353228488e-06, |
|
"loss": 0.6202, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3314517454924097, |
|
"grad_norm": 0.3824213724235341, |
|
"learning_rate": 4.913996274444401e-06, |
|
"loss": 0.608, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.33232860196196634, |
|
"grad_norm": 0.3269271239671324, |
|
"learning_rate": 4.913380034135919e-06, |
|
"loss": 0.6229, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.333205458431523, |
|
"grad_norm": 0.2832871290462529, |
|
"learning_rate": 4.912761632854834e-06, |
|
"loss": 0.618, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.33408231490107965, |
|
"grad_norm": 0.329936751234759, |
|
"learning_rate": 4.912141071154869e-06, |
|
"loss": 0.6231, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.33495917137063624, |
|
"grad_norm": 0.2752693680315103, |
|
"learning_rate": 4.911518349591685e-06, |
|
"loss": 0.6234, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3358360278401929, |
|
"grad_norm": 0.3136704903953731, |
|
"learning_rate": 4.9108934687228735e-06, |
|
"loss": 0.6248, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.33671288430974955, |
|
"grad_norm": 0.2947450161853734, |
|
"learning_rate": 4.910266429107962e-06, |
|
"loss": 0.6291, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3375897407793062, |
|
"grad_norm": 0.27963622109645897, |
|
"learning_rate": 4.90963723130841e-06, |
|
"loss": 0.6168, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3384665972488628, |
|
"grad_norm": 0.2755048673546131, |
|
"learning_rate": 4.90900587588761e-06, |
|
"loss": 0.6022, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.33934345371841945, |
|
"grad_norm": 0.28857281828902753, |
|
"learning_rate": 4.908372363410886e-06, |
|
"loss": 0.6254, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3402203101879761, |
|
"grad_norm": 0.28648556573019374, |
|
"learning_rate": 4.907736694445492e-06, |
|
"loss": 0.6175, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.34109716665753276, |
|
"grad_norm": 0.26925532018377424, |
|
"learning_rate": 4.9070988695606156e-06, |
|
"loss": 0.6176, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3419740231270894, |
|
"grad_norm": 0.2832182299890066, |
|
"learning_rate": 4.906458889327375e-06, |
|
"loss": 0.6291, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.342850879596646, |
|
"grad_norm": 0.24545023229724808, |
|
"learning_rate": 4.905816754318815e-06, |
|
"loss": 0.621, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.34372773606620266, |
|
"grad_norm": 0.27071805276574584, |
|
"learning_rate": 4.905172465109912e-06, |
|
"loss": 0.6235, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3446045925357593, |
|
"grad_norm": 0.2686211222363871, |
|
"learning_rate": 4.904526022277572e-06, |
|
"loss": 0.6259, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.34548144900531597, |
|
"grad_norm": 0.2788582786567745, |
|
"learning_rate": 4.903877426400629e-06, |
|
"loss": 0.6113, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.34635830547487256, |
|
"grad_norm": 0.2882303517807228, |
|
"learning_rate": 4.903226678059842e-06, |
|
"loss": 0.6325, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3472351619444292, |
|
"grad_norm": 0.26417391198725343, |
|
"learning_rate": 4.902573777837902e-06, |
|
"loss": 0.6171, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.34811201841398587, |
|
"grad_norm": 0.27931172516771346, |
|
"learning_rate": 4.901918726319424e-06, |
|
"loss": 0.6041, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3489888748835425, |
|
"grad_norm": 0.24713049818043734, |
|
"learning_rate": 4.901261524090949e-06, |
|
"loss": 0.6099, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3498657313530991, |
|
"grad_norm": 0.29086241382146505, |
|
"learning_rate": 4.900602171740946e-06, |
|
"loss": 0.6258, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.35074258782265577, |
|
"grad_norm": 0.26291418203363, |
|
"learning_rate": 4.899940669859807e-06, |
|
"loss": 0.6117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3516194442922124, |
|
"grad_norm": 0.3216617316096804, |
|
"learning_rate": 4.89927701903985e-06, |
|
"loss": 0.6187, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3524963007617691, |
|
"grad_norm": 0.27295463776878537, |
|
"learning_rate": 4.898611219875316e-06, |
|
"loss": 0.6132, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3533731572313257, |
|
"grad_norm": 0.2853334578601736, |
|
"learning_rate": 4.897943272962372e-06, |
|
"loss": 0.6148, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3542500137008823, |
|
"grad_norm": 0.31932832747253076, |
|
"learning_rate": 4.897273178899105e-06, |
|
"loss": 0.6187, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.355126870170439, |
|
"grad_norm": 0.28031643219296354, |
|
"learning_rate": 4.896600938285526e-06, |
|
"loss": 0.6236, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.35600372663999563, |
|
"grad_norm": 0.26831626886851945, |
|
"learning_rate": 4.89592655172357e-06, |
|
"loss": 0.6102, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.35688058310955223, |
|
"grad_norm": 0.2951228212133584, |
|
"learning_rate": 4.895250019817089e-06, |
|
"loss": 0.6164, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3577574395791089, |
|
"grad_norm": 0.27330142007513136, |
|
"learning_rate": 4.894571343171862e-06, |
|
"loss": 0.6023, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.35863429604866554, |
|
"grad_norm": 0.3204620119402923, |
|
"learning_rate": 4.893890522395582e-06, |
|
"loss": 0.62, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3595111525182222, |
|
"grad_norm": 0.261478566125417, |
|
"learning_rate": 4.893207558097867e-06, |
|
"loss": 0.6294, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36038800898777884, |
|
"grad_norm": 0.250895473885103, |
|
"learning_rate": 4.892522450890251e-06, |
|
"loss": 0.6152, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.36126486545733544, |
|
"grad_norm": 0.2634865561040139, |
|
"learning_rate": 4.89183520138619e-06, |
|
"loss": 0.6157, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3621417219268921, |
|
"grad_norm": 0.26459491662331874, |
|
"learning_rate": 4.891145810201054e-06, |
|
"loss": 0.609, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.36301857839644874, |
|
"grad_norm": 0.24301745655990745, |
|
"learning_rate": 4.8904542779521346e-06, |
|
"loss": 0.6082, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3638954348660054, |
|
"grad_norm": 0.2692643109083729, |
|
"learning_rate": 4.8897606052586384e-06, |
|
"loss": 0.6226, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.364772291335562, |
|
"grad_norm": 0.24024671108707563, |
|
"learning_rate": 4.889064792741689e-06, |
|
"loss": 0.6153, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.36564914780511865, |
|
"grad_norm": 0.273288282597359, |
|
"learning_rate": 4.888366841024327e-06, |
|
"loss": 0.6334, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3665260042746753, |
|
"grad_norm": 0.2713735341001686, |
|
"learning_rate": 4.887666750731507e-06, |
|
"loss": 0.6204, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.36740286074423195, |
|
"grad_norm": 0.2749014394381958, |
|
"learning_rate": 4.8869645224901e-06, |
|
"loss": 0.6017, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.36827971721378855, |
|
"grad_norm": 0.27621114898765087, |
|
"learning_rate": 4.8862601569288885e-06, |
|
"loss": 0.6193, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3691565736833452, |
|
"grad_norm": 0.25931507650511326, |
|
"learning_rate": 4.885553654678573e-06, |
|
"loss": 0.6233, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.37003343015290185, |
|
"grad_norm": 0.28686169175433923, |
|
"learning_rate": 4.884845016371763e-06, |
|
"loss": 0.6197, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3709102866224585, |
|
"grad_norm": 0.27025382919889446, |
|
"learning_rate": 4.884134242642985e-06, |
|
"loss": 0.6033, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3717871430920151, |
|
"grad_norm": 0.275669477293775, |
|
"learning_rate": 4.883421334128674e-06, |
|
"loss": 0.6172, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.37266399956157176, |
|
"grad_norm": 0.26014021950194516, |
|
"learning_rate": 4.8827062914671775e-06, |
|
"loss": 0.6207, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3735408560311284, |
|
"grad_norm": 0.2986829920255015, |
|
"learning_rate": 4.881989115298755e-06, |
|
"loss": 0.6034, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.37441771250068506, |
|
"grad_norm": 0.28151692244357057, |
|
"learning_rate": 4.881269806265575e-06, |
|
"loss": 0.6133, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.37529456897024166, |
|
"grad_norm": 0.2932206682237993, |
|
"learning_rate": 4.8805483650117154e-06, |
|
"loss": 0.6132, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3761714254397983, |
|
"grad_norm": 0.3164265338412961, |
|
"learning_rate": 4.879824792183166e-06, |
|
"loss": 0.6077, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.37704828190935497, |
|
"grad_norm": 0.3636164115457003, |
|
"learning_rate": 4.879099088427824e-06, |
|
"loss": 0.6179, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3779251383789116, |
|
"grad_norm": 0.2891875334309757, |
|
"learning_rate": 4.878371254395492e-06, |
|
"loss": 0.6197, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3788019948484682, |
|
"grad_norm": 0.3816104662619605, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.6197, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.37967885131802487, |
|
"grad_norm": 0.29131497715708005, |
|
"learning_rate": 4.876909198108619e-06, |
|
"loss": 0.6159, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3805557077875815, |
|
"grad_norm": 0.3138520265609416, |
|
"learning_rate": 4.876174977163222e-06, |
|
"loss": 0.6139, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3814325642571382, |
|
"grad_norm": 0.28035852092093033, |
|
"learning_rate": 4.875438628559124e-06, |
|
"loss": 0.6183, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3823094207266948, |
|
"grad_norm": 0.3120106817898386, |
|
"learning_rate": 4.874700152955661e-06, |
|
"loss": 0.6052, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3831862771962514, |
|
"grad_norm": 0.29139666929908226, |
|
"learning_rate": 4.873959551014075e-06, |
|
"loss": 0.6058, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3840631336658081, |
|
"grad_norm": 0.31305383154436955, |
|
"learning_rate": 4.873216823397511e-06, |
|
"loss": 0.6094, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.38493999013536473, |
|
"grad_norm": 0.3052879988977325, |
|
"learning_rate": 4.872471970771015e-06, |
|
"loss": 0.6063, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3858168466049214, |
|
"grad_norm": 0.2965934350138861, |
|
"learning_rate": 4.871724993801541e-06, |
|
"loss": 0.6054, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.386693703074478, |
|
"grad_norm": 0.26339362714008424, |
|
"learning_rate": 4.870975893157941e-06, |
|
"loss": 0.6152, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.38757055954403463, |
|
"grad_norm": 0.27556079714679943, |
|
"learning_rate": 4.870224669510968e-06, |
|
"loss": 0.6158, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3884474160135913, |
|
"grad_norm": 0.29125701036171053, |
|
"learning_rate": 4.86947132353328e-06, |
|
"loss": 0.6202, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.38932427248314794, |
|
"grad_norm": 0.2966406156980298, |
|
"learning_rate": 4.868715855899432e-06, |
|
"loss": 0.6265, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.39020112895270453, |
|
"grad_norm": 0.27733217518457043, |
|
"learning_rate": 4.867958267285879e-06, |
|
"loss": 0.6068, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3910779854222612, |
|
"grad_norm": 0.2919788828093281, |
|
"learning_rate": 4.8671985583709765e-06, |
|
"loss": 0.6208, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.39195484189181784, |
|
"grad_norm": 0.29327731039840055, |
|
"learning_rate": 4.866436729834979e-06, |
|
"loss": 0.6175, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3928316983613745, |
|
"grad_norm": 0.2568832744529454, |
|
"learning_rate": 4.865672782360037e-06, |
|
"loss": 0.6177, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3937085548309311, |
|
"grad_norm": 0.283654204460893, |
|
"learning_rate": 4.8649067166301985e-06, |
|
"loss": 0.6203, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.39458541130048774, |
|
"grad_norm": 0.26828805221375346, |
|
"learning_rate": 4.864138533331411e-06, |
|
"loss": 0.6118, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3954622677700444, |
|
"grad_norm": 0.2597158618871073, |
|
"learning_rate": 4.863368233151514e-06, |
|
"loss": 0.6169, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.39633912423960105, |
|
"grad_norm": 0.28436035142498156, |
|
"learning_rate": 4.862595816780246e-06, |
|
"loss": 0.632, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.39721598070915765, |
|
"grad_norm": 0.2652505819829089, |
|
"learning_rate": 4.861821284909238e-06, |
|
"loss": 0.6289, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3980928371787143, |
|
"grad_norm": 0.29252031992594624, |
|
"learning_rate": 4.861044638232016e-06, |
|
"loss": 0.6328, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.39896969364827095, |
|
"grad_norm": 0.2994469365008051, |
|
"learning_rate": 4.860265877444001e-06, |
|
"loss": 0.617, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3998465501178276, |
|
"grad_norm": 0.2776900829822044, |
|
"learning_rate": 4.8594850032425036e-06, |
|
"loss": 0.608, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.4007234065873842, |
|
"grad_norm": 0.2753322141436327, |
|
"learning_rate": 4.858702016326731e-06, |
|
"loss": 0.607, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.40160026305694085, |
|
"grad_norm": 0.2738219915396828, |
|
"learning_rate": 4.857916917397779e-06, |
|
"loss": 0.6043, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4024771195264975, |
|
"grad_norm": 0.27192665887665013, |
|
"learning_rate": 4.857129707158637e-06, |
|
"loss": 0.6376, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.40335397599605416, |
|
"grad_norm": 0.27689826150792163, |
|
"learning_rate": 4.8563403863141825e-06, |
|
"loss": 0.6172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4042308324656108, |
|
"grad_norm": 0.311644665297658, |
|
"learning_rate": 4.855548955571183e-06, |
|
"loss": 0.6106, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4051076889351674, |
|
"grad_norm": 0.2912453467934098, |
|
"learning_rate": 4.854755415638298e-06, |
|
"loss": 0.6129, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.40598454540472406, |
|
"grad_norm": 0.302939167109194, |
|
"learning_rate": 4.853959767226072e-06, |
|
"loss": 0.6301, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.4068614018742807, |
|
"grad_norm": 0.261297831693092, |
|
"learning_rate": 4.85316201104694e-06, |
|
"loss": 0.6136, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.40773825834383737, |
|
"grad_norm": 0.3154856081824323, |
|
"learning_rate": 4.852362147815225e-06, |
|
"loss": 0.6171, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.40861511481339396, |
|
"grad_norm": 0.29411022742744497, |
|
"learning_rate": 4.8515601782471325e-06, |
|
"loss": 0.6085, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4094919712829506, |
|
"grad_norm": 0.3027595832299397, |
|
"learning_rate": 4.8507561030607576e-06, |
|
"loss": 0.6151, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.41036882775250727, |
|
"grad_norm": 0.3003092813187261, |
|
"learning_rate": 4.84994992297608e-06, |
|
"loss": 0.6071, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4112456842220639, |
|
"grad_norm": 0.27374249219050456, |
|
"learning_rate": 4.849141638714965e-06, |
|
"loss": 0.6166, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4121225406916205, |
|
"grad_norm": 0.3064667255626573, |
|
"learning_rate": 4.84833125100116e-06, |
|
"loss": 0.6024, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4129993971611772, |
|
"grad_norm": 0.28188617697439766, |
|
"learning_rate": 4.847518760560297e-06, |
|
"loss": 0.6134, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.4138762536307338, |
|
"grad_norm": 0.27693005272362925, |
|
"learning_rate": 4.846704168119892e-06, |
|
"loss": 0.5984, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.4147531101002905, |
|
"grad_norm": 0.3011450154809493, |
|
"learning_rate": 4.84588747440934e-06, |
|
"loss": 0.5932, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.4156299665698471, |
|
"grad_norm": 0.25715138595393167, |
|
"learning_rate": 4.845068680159921e-06, |
|
"loss": 0.6101, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.41650682303940373, |
|
"grad_norm": 0.2963493163477849, |
|
"learning_rate": 4.844247786104794e-06, |
|
"loss": 0.6081, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4173836795089604, |
|
"grad_norm": 0.29399759702492007, |
|
"learning_rate": 4.8434247929789975e-06, |
|
"loss": 0.6046, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.41826053597851703, |
|
"grad_norm": 0.3126535237916745, |
|
"learning_rate": 4.842599701519451e-06, |
|
"loss": 0.6304, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.41913739244807363, |
|
"grad_norm": 0.29299694878032745, |
|
"learning_rate": 4.841772512464953e-06, |
|
"loss": 0.6168, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.4200142489176303, |
|
"grad_norm": 0.289486342187316, |
|
"learning_rate": 4.840943226556178e-06, |
|
"loss": 0.6031, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.42089110538718694, |
|
"grad_norm": 0.30359254383613277, |
|
"learning_rate": 4.840111844535682e-06, |
|
"loss": 0.5994, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4217679618567436, |
|
"grad_norm": 0.2641793447534652, |
|
"learning_rate": 4.839278367147894e-06, |
|
"loss": 0.6036, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.4226448183263002, |
|
"grad_norm": 0.29968320834098117, |
|
"learning_rate": 4.838442795139121e-06, |
|
"loss": 0.6193, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.42352167479585684, |
|
"grad_norm": 0.30614554761610074, |
|
"learning_rate": 4.837605129257546e-06, |
|
"loss": 0.6115, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4243985312654135, |
|
"grad_norm": 0.29316129861054724, |
|
"learning_rate": 4.836765370253223e-06, |
|
"loss": 0.6039, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.42527538773497015, |
|
"grad_norm": 0.35388210389950725, |
|
"learning_rate": 4.835923518878088e-06, |
|
"loss": 0.6089, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4261522442045268, |
|
"grad_norm": 0.27541931694811506, |
|
"learning_rate": 4.835079575885944e-06, |
|
"loss": 0.6129, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4270291006740834, |
|
"grad_norm": 0.3408256598988536, |
|
"learning_rate": 4.834233542032468e-06, |
|
"loss": 0.6165, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.42790595714364005, |
|
"grad_norm": 0.30259946435062773, |
|
"learning_rate": 4.83338541807521e-06, |
|
"loss": 0.6111, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4287828136131967, |
|
"grad_norm": 0.2871132966743198, |
|
"learning_rate": 4.832535204773593e-06, |
|
"loss": 0.6273, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.42965967008275335, |
|
"grad_norm": 0.3457337315321895, |
|
"learning_rate": 4.8316829028889076e-06, |
|
"loss": 0.6005, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43053652655230995, |
|
"grad_norm": 0.2668696078107318, |
|
"learning_rate": 4.830828513184317e-06, |
|
"loss": 0.6122, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4314133830218666, |
|
"grad_norm": 0.321068645111551, |
|
"learning_rate": 4.829972036424854e-06, |
|
"loss": 0.6058, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.43229023949142326, |
|
"grad_norm": 0.26125737492647644, |
|
"learning_rate": 4.829113473377417e-06, |
|
"loss": 0.6143, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4331670959609799, |
|
"grad_norm": 0.32002755047063874, |
|
"learning_rate": 4.828252824810777e-06, |
|
"loss": 0.6061, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4340439524305365, |
|
"grad_norm": 0.2863878470189295, |
|
"learning_rate": 4.82739009149557e-06, |
|
"loss": 0.5977, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.43492080890009316, |
|
"grad_norm": 0.31874371835878795, |
|
"learning_rate": 4.826525274204297e-06, |
|
"loss": 0.608, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.4357976653696498, |
|
"grad_norm": 0.2956391151217163, |
|
"learning_rate": 4.825658373711328e-06, |
|
"loss": 0.6107, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.43667452183920646, |
|
"grad_norm": 0.288406786632812, |
|
"learning_rate": 4.824789390792899e-06, |
|
"loss": 0.6094, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.43755137830876306, |
|
"grad_norm": 0.33737182032602686, |
|
"learning_rate": 4.823918326227106e-06, |
|
"loss": 0.5971, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4384282347783197, |
|
"grad_norm": 0.25632117321609454, |
|
"learning_rate": 4.823045180793914e-06, |
|
"loss": 0.6044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43930509124787637, |
|
"grad_norm": 0.2978956835348055, |
|
"learning_rate": 4.8221699552751465e-06, |
|
"loss": 0.6009, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.440181947717433, |
|
"grad_norm": 0.30339339194561, |
|
"learning_rate": 4.821292650454495e-06, |
|
"loss": 0.6113, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4410588041869896, |
|
"grad_norm": 0.3083549716587437, |
|
"learning_rate": 4.8204132671175085e-06, |
|
"loss": 0.6074, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.44193566065654627, |
|
"grad_norm": 0.291272682255802, |
|
"learning_rate": 4.819531806051599e-06, |
|
"loss": 0.606, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4428125171261029, |
|
"grad_norm": 0.3183233272727026, |
|
"learning_rate": 4.818648268046038e-06, |
|
"loss": 0.6145, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4436893735956596, |
|
"grad_norm": 0.27989457450916727, |
|
"learning_rate": 4.817762653891957e-06, |
|
"loss": 0.6095, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4445662300652162, |
|
"grad_norm": 0.32106502207942483, |
|
"learning_rate": 4.816874964382346e-06, |
|
"loss": 0.6096, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4454430865347728, |
|
"grad_norm": 0.2690675603747584, |
|
"learning_rate": 4.815985200312057e-06, |
|
"loss": 0.5986, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4463199430043295, |
|
"grad_norm": 0.2818980909126885, |
|
"learning_rate": 4.815093362477793e-06, |
|
"loss": 0.6136, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.44719679947388613, |
|
"grad_norm": 0.29748447845455983, |
|
"learning_rate": 4.8141994516781196e-06, |
|
"loss": 0.6162, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4480736559434428, |
|
"grad_norm": 0.3107094817046459, |
|
"learning_rate": 4.813303468713456e-06, |
|
"loss": 0.5939, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4489505124129994, |
|
"grad_norm": 0.27493905192543294, |
|
"learning_rate": 4.812405414386078e-06, |
|
"loss": 0.6054, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.44982736888255603, |
|
"grad_norm": 0.28885594119974684, |
|
"learning_rate": 4.811505289500113e-06, |
|
"loss": 0.611, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4507042253521127, |
|
"grad_norm": 0.2724458036095346, |
|
"learning_rate": 4.810603094861548e-06, |
|
"loss": 0.6296, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.45158108182166934, |
|
"grad_norm": 0.3171235548951884, |
|
"learning_rate": 4.809698831278217e-06, |
|
"loss": 0.6137, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.45245793829122594, |
|
"grad_norm": 0.2975607228468226, |
|
"learning_rate": 4.808792499559812e-06, |
|
"loss": 0.6081, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4533347947607826, |
|
"grad_norm": 0.29553804453973653, |
|
"learning_rate": 4.807884100517873e-06, |
|
"loss": 0.6106, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.45421165123033924, |
|
"grad_norm": 0.29283068458115197, |
|
"learning_rate": 4.8069736349657935e-06, |
|
"loss": 0.6144, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4550885076998959, |
|
"grad_norm": 0.3123674697628625, |
|
"learning_rate": 4.806061103718816e-06, |
|
"loss": 0.6024, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4559653641694525, |
|
"grad_norm": 0.3185535504257689, |
|
"learning_rate": 4.805146507594034e-06, |
|
"loss": 0.6031, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.45684222063900914, |
|
"grad_norm": 0.32719458735857726, |
|
"learning_rate": 4.804229847410388e-06, |
|
"loss": 0.614, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4577190771085658, |
|
"grad_norm": 0.2756686412179773, |
|
"learning_rate": 4.803311123988668e-06, |
|
"loss": 0.6143, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.45859593357812245, |
|
"grad_norm": 0.3193363571929515, |
|
"learning_rate": 4.802390338151512e-06, |
|
"loss": 0.5962, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.45947279004767905, |
|
"grad_norm": 0.27470129307670516, |
|
"learning_rate": 4.801467490723402e-06, |
|
"loss": 0.6118, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4603496465172357, |
|
"grad_norm": 0.3268257836594815, |
|
"learning_rate": 4.800542582530668e-06, |
|
"loss": 0.6091, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.46122650298679235, |
|
"grad_norm": 0.2636715015821582, |
|
"learning_rate": 4.799615614401488e-06, |
|
"loss": 0.6113, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.462103359456349, |
|
"grad_norm": 0.3309929173426789, |
|
"learning_rate": 4.79868658716588e-06, |
|
"loss": 0.6063, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4629802159259056, |
|
"grad_norm": 0.2705433155095911, |
|
"learning_rate": 4.7977555016557054e-06, |
|
"loss": 0.6115, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.46385707239546226, |
|
"grad_norm": 0.2986983107432822, |
|
"learning_rate": 4.796822358704673e-06, |
|
"loss": 0.624, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4647339288650189, |
|
"grad_norm": 0.27153673858142124, |
|
"learning_rate": 4.7958871591483305e-06, |
|
"loss": 0.6144, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46561078533457556, |
|
"grad_norm": 0.2774095045069063, |
|
"learning_rate": 4.794949903824069e-06, |
|
"loss": 0.6082, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4664876418041322, |
|
"grad_norm": 0.28167525290961587, |
|
"learning_rate": 4.794010593571118e-06, |
|
"loss": 0.6106, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4673644982736888, |
|
"grad_norm": 0.2626835693504621, |
|
"learning_rate": 4.793069229230548e-06, |
|
"loss": 0.6142, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.46824135474324546, |
|
"grad_norm": 0.27619948959341917, |
|
"learning_rate": 4.792125811645271e-06, |
|
"loss": 0.6073, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4691182112128021, |
|
"grad_norm": 0.2913249262978291, |
|
"learning_rate": 4.791180341660035e-06, |
|
"loss": 0.6034, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.46999506768235877, |
|
"grad_norm": 0.2792318560656134, |
|
"learning_rate": 4.790232820121426e-06, |
|
"loss": 0.6002, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.47087192415191537, |
|
"grad_norm": 0.2690237732263836, |
|
"learning_rate": 4.789283247877867e-06, |
|
"loss": 0.6128, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.471748780621472, |
|
"grad_norm": 0.2875784864108413, |
|
"learning_rate": 4.7883316257796195e-06, |
|
"loss": 0.6125, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.47262563709102867, |
|
"grad_norm": 0.3494280106540881, |
|
"learning_rate": 4.787377954678776e-06, |
|
"loss": 0.6079, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4735024935605853, |
|
"grad_norm": 0.27811345732659243, |
|
"learning_rate": 4.786422235429269e-06, |
|
"loss": 0.6118, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4743793500301419, |
|
"grad_norm": 0.33921109846320074, |
|
"learning_rate": 4.785464468886859e-06, |
|
"loss": 0.6176, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4752562064996986, |
|
"grad_norm": 0.29592545517880114, |
|
"learning_rate": 4.784504655909146e-06, |
|
"loss": 0.6131, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4761330629692552, |
|
"grad_norm": 0.29373530511374163, |
|
"learning_rate": 4.783542797355558e-06, |
|
"loss": 0.6082, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4770099194388119, |
|
"grad_norm": 0.2999691792256973, |
|
"learning_rate": 4.782578894087357e-06, |
|
"loss": 0.5981, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4778867759083685, |
|
"grad_norm": 0.2694268894908227, |
|
"learning_rate": 4.781612946967632e-06, |
|
"loss": 0.6055, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.47876363237792513, |
|
"grad_norm": 0.2970836241532985, |
|
"learning_rate": 4.780644956861307e-06, |
|
"loss": 0.6002, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4796404888474818, |
|
"grad_norm": 0.3413332201519291, |
|
"learning_rate": 4.7796749246351335e-06, |
|
"loss": 0.6103, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.48051734531703844, |
|
"grad_norm": 0.27732196553749033, |
|
"learning_rate": 4.77870285115769e-06, |
|
"loss": 0.5972, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.48139420178659503, |
|
"grad_norm": 0.32594912225980904, |
|
"learning_rate": 4.777728737299387e-06, |
|
"loss": 0.6275, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4822710582561517, |
|
"grad_norm": 0.28158230943213153, |
|
"learning_rate": 4.776752583932455e-06, |
|
"loss": 0.6215, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.48314791472570834, |
|
"grad_norm": 0.3244722564822324, |
|
"learning_rate": 4.775774391930956e-06, |
|
"loss": 0.5947, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.484024771195265, |
|
"grad_norm": 0.26397208532030864, |
|
"learning_rate": 4.774794162170777e-06, |
|
"loss": 0.611, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4849016276648216, |
|
"grad_norm": 0.2816890422555255, |
|
"learning_rate": 4.773811895529629e-06, |
|
"loss": 0.5942, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.48577848413437824, |
|
"grad_norm": 0.28224512879430635, |
|
"learning_rate": 4.772827592887046e-06, |
|
"loss": 0.5918, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4866553406039349, |
|
"grad_norm": 0.2978578883597439, |
|
"learning_rate": 4.771841255124385e-06, |
|
"loss": 0.6031, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.48753219707349155, |
|
"grad_norm": 0.3212067488646109, |
|
"learning_rate": 4.770852883124827e-06, |
|
"loss": 0.6066, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4884090535430482, |
|
"grad_norm": 0.3047898856904216, |
|
"learning_rate": 4.769862477773374e-06, |
|
"loss": 0.6097, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4892859100126048, |
|
"grad_norm": 0.32816575436148626, |
|
"learning_rate": 4.768870039956846e-06, |
|
"loss": 0.6078, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.49016276648216145, |
|
"grad_norm": 0.30333447423661625, |
|
"learning_rate": 4.767875570563887e-06, |
|
"loss": 0.6103, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4910396229517181, |
|
"grad_norm": 0.32463487013229164, |
|
"learning_rate": 4.766879070484957e-06, |
|
"loss": 0.5925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.49191647942127475, |
|
"grad_norm": 0.27125555349656966, |
|
"learning_rate": 4.765880540612336e-06, |
|
"loss": 0.6095, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.49279333589083135, |
|
"grad_norm": 0.29571340419933284, |
|
"learning_rate": 4.764879981840121e-06, |
|
"loss": 0.6061, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.493670192360388, |
|
"grad_norm": 0.28779220439984465, |
|
"learning_rate": 4.763877395064225e-06, |
|
"loss": 0.6164, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.49454704882994466, |
|
"grad_norm": 0.3023002461106019, |
|
"learning_rate": 4.762872781182378e-06, |
|
"loss": 0.6099, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4954239052995013, |
|
"grad_norm": 0.2852998688047179, |
|
"learning_rate": 4.761866141094126e-06, |
|
"loss": 0.6151, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4963007617690579, |
|
"grad_norm": 0.27004415072990756, |
|
"learning_rate": 4.7608574757008245e-06, |
|
"loss": 0.6056, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.49717761823861456, |
|
"grad_norm": 0.26583697629837466, |
|
"learning_rate": 4.759846785905649e-06, |
|
"loss": 0.6073, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4980544747081712, |
|
"grad_norm": 0.29963137609858226, |
|
"learning_rate": 4.758834072613583e-06, |
|
"loss": 0.6175, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.49893133117772787, |
|
"grad_norm": 0.2777428291092147, |
|
"learning_rate": 4.757819336731424e-06, |
|
"loss": 0.6084, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.49980818764728446, |
|
"grad_norm": 0.286537576055084, |
|
"learning_rate": 4.756802579167781e-06, |
|
"loss": 0.6122, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5006850441168411, |
|
"grad_norm": 0.2900434750609322, |
|
"learning_rate": 4.755783800833071e-06, |
|
"loss": 0.61, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5015619005863977, |
|
"grad_norm": 0.29602981997833644, |
|
"learning_rate": 4.754763002639522e-06, |
|
"loss": 0.5979, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5024387570559544, |
|
"grad_norm": 0.2850500950921633, |
|
"learning_rate": 4.75374018550117e-06, |
|
"loss": 0.616, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.503315613525511, |
|
"grad_norm": 0.2747595431255721, |
|
"learning_rate": 4.752715350333858e-06, |
|
"loss": 0.6082, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5041924699950677, |
|
"grad_norm": 0.30963433949041175, |
|
"learning_rate": 4.75168849805524e-06, |
|
"loss": 0.6062, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5050693264646243, |
|
"grad_norm": 0.28817154630491854, |
|
"learning_rate": 4.750659629584772e-06, |
|
"loss": 0.615, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5059461829341809, |
|
"grad_norm": 0.29777143797501865, |
|
"learning_rate": 4.749628745843715e-06, |
|
"loss": 0.6093, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5068230394037376, |
|
"grad_norm": 0.2761328411528336, |
|
"learning_rate": 4.748595847755137e-06, |
|
"loss": 0.5949, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5076998958732942, |
|
"grad_norm": 0.27941749417554973, |
|
"learning_rate": 4.74756093624391e-06, |
|
"loss": 0.6165, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5085767523428508, |
|
"grad_norm": 0.28883681834919644, |
|
"learning_rate": 4.746524012236706e-06, |
|
"loss": 0.6012, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5094536088124075, |
|
"grad_norm": 0.2712633209555587, |
|
"learning_rate": 4.7454850766620005e-06, |
|
"loss": 0.5898, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5103304652819641, |
|
"grad_norm": 0.29386364789948854, |
|
"learning_rate": 4.7444441304500714e-06, |
|
"loss": 0.6057, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5112073217515208, |
|
"grad_norm": 0.27998562308750735, |
|
"learning_rate": 4.743401174532994e-06, |
|
"loss": 0.597, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5120841782210774, |
|
"grad_norm": 0.2944531079667381, |
|
"learning_rate": 4.742356209844646e-06, |
|
"loss": 0.5915, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.512961034690634, |
|
"grad_norm": 0.29506045387008756, |
|
"learning_rate": 4.741309237320703e-06, |
|
"loss": 0.6178, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5138378911601907, |
|
"grad_norm": 0.299236621784075, |
|
"learning_rate": 4.740260257898638e-06, |
|
"loss": 0.6121, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5147147476297473, |
|
"grad_norm": 0.303688650889379, |
|
"learning_rate": 4.739209272517721e-06, |
|
"loss": 0.5982, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.515591604099304, |
|
"grad_norm": 0.2925779066404172, |
|
"learning_rate": 4.738156282119018e-06, |
|
"loss": 0.5936, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5164684605688606, |
|
"grad_norm": 0.3374725318718031, |
|
"learning_rate": 4.73710128764539e-06, |
|
"loss": 0.6001, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.5173453170384172, |
|
"grad_norm": 0.28811046561615106, |
|
"learning_rate": 4.736044290041496e-06, |
|
"loss": 0.61, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.518222173507974, |
|
"grad_norm": 0.32139851009391945, |
|
"learning_rate": 4.7349852902537814e-06, |
|
"loss": 0.5931, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5190990299775305, |
|
"grad_norm": 0.27307295767087736, |
|
"learning_rate": 4.733924289230493e-06, |
|
"loss": 0.6035, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5199758864470871, |
|
"grad_norm": 0.3098223534082736, |
|
"learning_rate": 4.7328612879216615e-06, |
|
"loss": 0.6082, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5208527429166439, |
|
"grad_norm": 0.2808341207944162, |
|
"learning_rate": 4.731796287279115e-06, |
|
"loss": 0.5965, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5217295993862004, |
|
"grad_norm": 0.3093125993326785, |
|
"learning_rate": 4.730729288256468e-06, |
|
"loss": 0.6018, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5226064558557572, |
|
"grad_norm": 0.30147164249765196, |
|
"learning_rate": 4.729660291809126e-06, |
|
"loss": 0.6072, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5234833123253138, |
|
"grad_norm": 0.2893545075475105, |
|
"learning_rate": 4.728589298894284e-06, |
|
"loss": 0.5894, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5243601687948704, |
|
"grad_norm": 0.29778530349250987, |
|
"learning_rate": 4.72751631047092e-06, |
|
"loss": 0.5941, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5252370252644271, |
|
"grad_norm": 0.2822751104373634, |
|
"learning_rate": 4.726441327499805e-06, |
|
"loss": 0.6056, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5261138817339837, |
|
"grad_norm": 0.30381920940202223, |
|
"learning_rate": 4.725364350943492e-06, |
|
"loss": 0.6016, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5269907382035403, |
|
"grad_norm": 0.2728312952142679, |
|
"learning_rate": 4.72428538176632e-06, |
|
"loss": 0.6033, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.527867594673097, |
|
"grad_norm": 0.2920360605636878, |
|
"learning_rate": 4.723204420934413e-06, |
|
"loss": 0.614, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5287444511426536, |
|
"grad_norm": 0.282387818364113, |
|
"learning_rate": 4.722121469415677e-06, |
|
"loss": 0.5901, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5296213076122103, |
|
"grad_norm": 0.2954181717364726, |
|
"learning_rate": 4.721036528179802e-06, |
|
"loss": 0.6043, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5304981640817669, |
|
"grad_norm": 0.3084979402180987, |
|
"learning_rate": 4.719949598198258e-06, |
|
"loss": 0.5931, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5313750205513235, |
|
"grad_norm": 0.3252699365181927, |
|
"learning_rate": 4.718860680444297e-06, |
|
"loss": 0.6181, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5322518770208802, |
|
"grad_norm": 0.28357295095306256, |
|
"learning_rate": 4.717769775892951e-06, |
|
"loss": 0.5903, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5331287334904368, |
|
"grad_norm": 0.3569079908279582, |
|
"learning_rate": 4.7166768855210294e-06, |
|
"loss": 0.5939, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5340055899599935, |
|
"grad_norm": 0.31741200071485426, |
|
"learning_rate": 4.715582010307121e-06, |
|
"loss": 0.5897, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5348824464295501, |
|
"grad_norm": 0.3218789245412814, |
|
"learning_rate": 4.714485151231593e-06, |
|
"loss": 0.5926, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5357593028991067, |
|
"grad_norm": 0.2824610260583936, |
|
"learning_rate": 4.713386309276585e-06, |
|
"loss": 0.6039, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5366361593686634, |
|
"grad_norm": 0.3111981063952015, |
|
"learning_rate": 4.712285485426017e-06, |
|
"loss": 0.6012, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.53751301583822, |
|
"grad_norm": 0.2719370118974663, |
|
"learning_rate": 4.7111826806655804e-06, |
|
"loss": 0.5912, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5383898723077766, |
|
"grad_norm": 0.3161533458613161, |
|
"learning_rate": 4.710077895982741e-06, |
|
"loss": 0.5962, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5392667287773333, |
|
"grad_norm": 0.26701338476822095, |
|
"learning_rate": 4.708971132366739e-06, |
|
"loss": 0.6025, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5401435852468899, |
|
"grad_norm": 0.28447205168753736, |
|
"learning_rate": 4.707862390808583e-06, |
|
"loss": 0.5959, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5410204417164466, |
|
"grad_norm": 0.26585350433139904, |
|
"learning_rate": 4.706751672301058e-06, |
|
"loss": 0.5946, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5418972981860032, |
|
"grad_norm": 0.28276117956241253, |
|
"learning_rate": 4.705638977838712e-06, |
|
"loss": 0.5986, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5427741546555598, |
|
"grad_norm": 0.2752743049051474, |
|
"learning_rate": 4.704524308417872e-06, |
|
"loss": 0.6044, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5436510111251165, |
|
"grad_norm": 0.2744635750786116, |
|
"learning_rate": 4.703407665036622e-06, |
|
"loss": 0.6, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5445278675946731, |
|
"grad_norm": 0.2942835089324837, |
|
"learning_rate": 4.702289048694824e-06, |
|
"loss": 0.6163, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5454047240642297, |
|
"grad_norm": 0.29074004193212294, |
|
"learning_rate": 4.7011684603940985e-06, |
|
"loss": 0.61, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5462815805337864, |
|
"grad_norm": 0.265548853050648, |
|
"learning_rate": 4.700045901137838e-06, |
|
"loss": 0.6003, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.547158437003343, |
|
"grad_norm": 0.28147341099339, |
|
"learning_rate": 4.6989213719311956e-06, |
|
"loss": 0.6057, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5480352934728997, |
|
"grad_norm": 0.25061686481638634, |
|
"learning_rate": 4.697794873781089e-06, |
|
"loss": 0.6103, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5489121499424563, |
|
"grad_norm": 0.28270079603778164, |
|
"learning_rate": 4.696666407696201e-06, |
|
"loss": 0.5999, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5497890064120129, |
|
"grad_norm": 0.25832596909684546, |
|
"learning_rate": 4.695535974686975e-06, |
|
"loss": 0.5989, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5506658628815696, |
|
"grad_norm": 0.28610489660664173, |
|
"learning_rate": 4.694403575765615e-06, |
|
"loss": 0.6039, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5515427193511262, |
|
"grad_norm": 0.26039812165621273, |
|
"learning_rate": 4.693269211946086e-06, |
|
"loss": 0.5999, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5524195758206829, |
|
"grad_norm": 0.2802813802636672, |
|
"learning_rate": 4.692132884244113e-06, |
|
"loss": 0.5957, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5532964322902395, |
|
"grad_norm": 0.28045233973715045, |
|
"learning_rate": 4.69099459367718e-06, |
|
"loss": 0.6057, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5541732887597961, |
|
"grad_norm": 0.2850165288729873, |
|
"learning_rate": 4.689854341264525e-06, |
|
"loss": 0.6062, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5550501452293528, |
|
"grad_norm": 0.318532937146288, |
|
"learning_rate": 4.688712128027147e-06, |
|
"loss": 0.615, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5559270016989094, |
|
"grad_norm": 0.2700297126701359, |
|
"learning_rate": 4.687567954987798e-06, |
|
"loss": 0.6027, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.556803858168466, |
|
"grad_norm": 0.2709567537114069, |
|
"learning_rate": 4.686421823170987e-06, |
|
"loss": 0.606, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5576807146380227, |
|
"grad_norm": 0.30943308206128534, |
|
"learning_rate": 4.685273733602975e-06, |
|
"loss": 0.6122, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5585575711075793, |
|
"grad_norm": 0.2866407684585244, |
|
"learning_rate": 4.6841236873117765e-06, |
|
"loss": 0.5983, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.559434427577136, |
|
"grad_norm": 0.30074858616349, |
|
"learning_rate": 4.6829716853271576e-06, |
|
"loss": 0.6112, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5603112840466926, |
|
"grad_norm": 0.27481764632891953, |
|
"learning_rate": 4.681817728680638e-06, |
|
"loss": 0.5923, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5611881405162492, |
|
"grad_norm": 0.30985792219487485, |
|
"learning_rate": 4.680661818405485e-06, |
|
"loss": 0.6083, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5620649969858059, |
|
"grad_norm": 0.30548099410676144, |
|
"learning_rate": 4.679503955536715e-06, |
|
"loss": 0.6105, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5629418534553625, |
|
"grad_norm": 0.27736446160459594, |
|
"learning_rate": 4.678344141111096e-06, |
|
"loss": 0.6176, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5638187099249191, |
|
"grad_norm": 0.313370779146898, |
|
"learning_rate": 4.6771823761671386e-06, |
|
"loss": 0.6035, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5646955663944758, |
|
"grad_norm": 0.27389315771120454, |
|
"learning_rate": 4.676018661745104e-06, |
|
"loss": 0.6118, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5655724228640324, |
|
"grad_norm": 0.3272671136560007, |
|
"learning_rate": 4.674852998886998e-06, |
|
"loss": 0.6059, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5664492793335891, |
|
"grad_norm": 0.29110434636858074, |
|
"learning_rate": 4.6736853886365704e-06, |
|
"loss": 0.5957, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5673261358031457, |
|
"grad_norm": 0.27566640053494834, |
|
"learning_rate": 4.672515832039315e-06, |
|
"loss": 0.5847, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5682029922727023, |
|
"grad_norm": 0.3439499837560115, |
|
"learning_rate": 4.671344330142468e-06, |
|
"loss": 0.6066, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.569079848742259, |
|
"grad_norm": 0.2831795036732806, |
|
"learning_rate": 4.670170883995007e-06, |
|
"loss": 0.5875, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5699567052118156, |
|
"grad_norm": 0.3084275937304928, |
|
"learning_rate": 4.668995494647653e-06, |
|
"loss": 0.6046, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5708335616813722, |
|
"grad_norm": 0.2876312566066635, |
|
"learning_rate": 4.667818163152864e-06, |
|
"loss": 0.609, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5717104181509289, |
|
"grad_norm": 0.27641311480374825, |
|
"learning_rate": 4.6666388905648394e-06, |
|
"loss": 0.6084, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5725872746204855, |
|
"grad_norm": 0.2760161681243495, |
|
"learning_rate": 4.665457677939515e-06, |
|
"loss": 0.6036, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5734641310900422, |
|
"grad_norm": 0.2664014070652965, |
|
"learning_rate": 4.664274526334563e-06, |
|
"loss": 0.6047, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5743409875595988, |
|
"grad_norm": 0.27367722811571643, |
|
"learning_rate": 4.663089436809395e-06, |
|
"loss": 0.607, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5752178440291554, |
|
"grad_norm": 0.2971494077897638, |
|
"learning_rate": 4.661902410425156e-06, |
|
"loss": 0.5851, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5760947004987121, |
|
"grad_norm": 0.28359506675344376, |
|
"learning_rate": 4.660713448244723e-06, |
|
"loss": 0.5911, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5769715569682687, |
|
"grad_norm": 0.27646693971859265, |
|
"learning_rate": 4.6595225513327105e-06, |
|
"loss": 0.601, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5778484134378254, |
|
"grad_norm": 0.2707379861432875, |
|
"learning_rate": 4.658329720755464e-06, |
|
"loss": 0.5905, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.578725269907382, |
|
"grad_norm": 0.301271851117793, |
|
"learning_rate": 4.657134957581057e-06, |
|
"loss": 0.6023, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5796021263769386, |
|
"grad_norm": 0.30214846729641187, |
|
"learning_rate": 4.6559382628793e-06, |
|
"loss": 0.6095, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5804789828464954, |
|
"grad_norm": 0.2880769859831512, |
|
"learning_rate": 4.6547396377217265e-06, |
|
"loss": 0.6012, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.581355839316052, |
|
"grad_norm": 0.3363251460755209, |
|
"learning_rate": 4.653539083181603e-06, |
|
"loss": 0.5963, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5822326957856085, |
|
"grad_norm": 0.3446871487238731, |
|
"learning_rate": 4.652336600333921e-06, |
|
"loss": 0.5992, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5831095522551653, |
|
"grad_norm": 0.3016824402176579, |
|
"learning_rate": 4.651132190255401e-06, |
|
"loss": 0.6016, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5839864087247219, |
|
"grad_norm": 0.31791554379394255, |
|
"learning_rate": 4.649925854024486e-06, |
|
"loss": 0.5943, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5848632651942786, |
|
"grad_norm": 0.3603510668723624, |
|
"learning_rate": 4.648717592721347e-06, |
|
"loss": 0.6086, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5857401216638352, |
|
"grad_norm": 0.25073578292290827, |
|
"learning_rate": 4.647507407427877e-06, |
|
"loss": 0.5965, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5866169781333918, |
|
"grad_norm": 0.3401292596267892, |
|
"learning_rate": 4.646295299227691e-06, |
|
"loss": 0.5896, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5874938346029485, |
|
"grad_norm": 0.26798950974238206, |
|
"learning_rate": 4.645081269206128e-06, |
|
"loss": 0.5913, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5883706910725051, |
|
"grad_norm": 0.2712753517614824, |
|
"learning_rate": 4.643865318450247e-06, |
|
"loss": 0.5948, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5892475475420617, |
|
"grad_norm": 0.31478669896326056, |
|
"learning_rate": 4.642647448048824e-06, |
|
"loss": 0.6036, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5901244040116184, |
|
"grad_norm": 0.2853149586152437, |
|
"learning_rate": 4.641427659092359e-06, |
|
"loss": 0.5852, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.591001260481175, |
|
"grad_norm": 0.31928733056145026, |
|
"learning_rate": 4.6402059526730656e-06, |
|
"loss": 0.596, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5918781169507317, |
|
"grad_norm": 0.28886504451895006, |
|
"learning_rate": 4.638982329884878e-06, |
|
"loss": 0.5867, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5927549734202883, |
|
"grad_norm": 0.34332786639440344, |
|
"learning_rate": 4.637756791823443e-06, |
|
"loss": 0.5951, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5936318298898449, |
|
"grad_norm": 0.31536294202913445, |
|
"learning_rate": 4.6365293395861225e-06, |
|
"loss": 0.6005, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5945086863594016, |
|
"grad_norm": 0.36612645695214535, |
|
"learning_rate": 4.6352999742719954e-06, |
|
"loss": 0.6125, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5953855428289582, |
|
"grad_norm": 0.2865910172606529, |
|
"learning_rate": 4.634068696981852e-06, |
|
"loss": 0.6096, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5962623992985149, |
|
"grad_norm": 0.3077121674916666, |
|
"learning_rate": 4.632835508818192e-06, |
|
"loss": 0.5891, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5971392557680715, |
|
"grad_norm": 0.2930520316480949, |
|
"learning_rate": 4.631600410885231e-06, |
|
"loss": 0.5918, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5980161122376281, |
|
"grad_norm": 0.3412197822800723, |
|
"learning_rate": 4.630363404288891e-06, |
|
"loss": 0.5998, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5988929687071848, |
|
"grad_norm": 0.2869686807201651, |
|
"learning_rate": 4.629124490136804e-06, |
|
"loss": 0.5952, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5997698251767414, |
|
"grad_norm": 0.3044523168792968, |
|
"learning_rate": 4.627883669538311e-06, |
|
"loss": 0.6058, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.600646681646298, |
|
"grad_norm": 0.298754941767322, |
|
"learning_rate": 4.626640943604459e-06, |
|
"loss": 0.6099, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6015235381158547, |
|
"grad_norm": 0.30823608651620477, |
|
"learning_rate": 4.625396313448e-06, |
|
"loss": 0.5913, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6024003945854113, |
|
"grad_norm": 0.2745802532714142, |
|
"learning_rate": 4.624149780183395e-06, |
|
"loss": 0.5904, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.603277251054968, |
|
"grad_norm": 0.2894557068485525, |
|
"learning_rate": 4.622901344926805e-06, |
|
"loss": 0.6006, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6041541075245246, |
|
"grad_norm": 0.2844643276622375, |
|
"learning_rate": 4.621651008796095e-06, |
|
"loss": 0.5972, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6050309639940812, |
|
"grad_norm": 0.3111750841694179, |
|
"learning_rate": 4.620398772910833e-06, |
|
"loss": 0.5911, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6059078204636379, |
|
"grad_norm": 0.30229136138256857, |
|
"learning_rate": 4.619144638392289e-06, |
|
"loss": 0.6063, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6067846769331945, |
|
"grad_norm": 0.2903177693650587, |
|
"learning_rate": 4.6178886063634295e-06, |
|
"loss": 0.6022, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6076615334027511, |
|
"grad_norm": 0.29466063932438424, |
|
"learning_rate": 4.616630677948924e-06, |
|
"loss": 0.609, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6085383898723078, |
|
"grad_norm": 0.29795014881552045, |
|
"learning_rate": 4.615370854275138e-06, |
|
"loss": 0.5923, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6094152463418644, |
|
"grad_norm": 0.2835342651327551, |
|
"learning_rate": 4.614109136470133e-06, |
|
"loss": 0.5941, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6102921028114211, |
|
"grad_norm": 0.2914927284695803, |
|
"learning_rate": 4.612845525663671e-06, |
|
"loss": 0.5915, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6111689592809777, |
|
"grad_norm": 0.27150994490869584, |
|
"learning_rate": 4.611580022987202e-06, |
|
"loss": 0.5903, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6120458157505343, |
|
"grad_norm": 0.27427922033901636, |
|
"learning_rate": 4.610312629573877e-06, |
|
"loss": 0.5826, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.612922672220091, |
|
"grad_norm": 0.3257835351903302, |
|
"learning_rate": 4.609043346558536e-06, |
|
"loss": 0.608, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6137995286896476, |
|
"grad_norm": 0.27542786817313375, |
|
"learning_rate": 4.607772175077712e-06, |
|
"loss": 0.5914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6146763851592043, |
|
"grad_norm": 0.32541464673918596, |
|
"learning_rate": 4.606499116269628e-06, |
|
"loss": 0.6004, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6155532416287609, |
|
"grad_norm": 0.2775394483279354, |
|
"learning_rate": 4.605224171274198e-06, |
|
"loss": 0.6042, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6164300980983175, |
|
"grad_norm": 0.3010566442707075, |
|
"learning_rate": 4.603947341233026e-06, |
|
"loss": 0.5893, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6173069545678742, |
|
"grad_norm": 0.28841806172316603, |
|
"learning_rate": 4.602668627289401e-06, |
|
"loss": 0.5932, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6181838110374308, |
|
"grad_norm": 0.32720143492110876, |
|
"learning_rate": 4.601388030588303e-06, |
|
"loss": 0.594, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6190606675069874, |
|
"grad_norm": 0.2629157828769276, |
|
"learning_rate": 4.600105552276393e-06, |
|
"loss": 0.5962, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6199375239765441, |
|
"grad_norm": 0.2976311641314985, |
|
"learning_rate": 4.598821193502019e-06, |
|
"loss": 0.5993, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6208143804461007, |
|
"grad_norm": 0.3223849407278096, |
|
"learning_rate": 4.597534955415214e-06, |
|
"loss": 0.6023, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6216912369156574, |
|
"grad_norm": 0.3228934470983084, |
|
"learning_rate": 4.596246839167692e-06, |
|
"loss": 0.6058, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 0.2842350311614894, |
|
"learning_rate": 4.59495684591285e-06, |
|
"loss": 0.5965, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6234449498547706, |
|
"grad_norm": 0.30037127301855626, |
|
"learning_rate": 4.593664976805765e-06, |
|
"loss": 0.5912, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6243218063243273, |
|
"grad_norm": 0.29537031301186273, |
|
"learning_rate": 4.592371233003195e-06, |
|
"loss": 0.5847, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6251986627938839, |
|
"grad_norm": 0.3099776656835445, |
|
"learning_rate": 4.5910756156635725e-06, |
|
"loss": 0.6061, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6260755192634405, |
|
"grad_norm": 0.3343474177937486, |
|
"learning_rate": 4.589778125947012e-06, |
|
"loss": 0.5775, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6269523757329972, |
|
"grad_norm": 0.26492597760028275, |
|
"learning_rate": 4.588478765015304e-06, |
|
"loss": 0.6008, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6278292322025538, |
|
"grad_norm": 0.2996728173414987, |
|
"learning_rate": 4.587177534031914e-06, |
|
"loss": 0.5868, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6287060886721105, |
|
"grad_norm": 0.269698012084879, |
|
"learning_rate": 4.585874434161979e-06, |
|
"loss": 0.5908, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6295829451416671, |
|
"grad_norm": 0.3120812259438331, |
|
"learning_rate": 4.584569466572313e-06, |
|
"loss": 0.5964, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6304598016112237, |
|
"grad_norm": 0.306605213663903, |
|
"learning_rate": 4.583262632431402e-06, |
|
"loss": 0.587, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6313366580807804, |
|
"grad_norm": 0.31045769873517814, |
|
"learning_rate": 4.581953932909403e-06, |
|
"loss": 0.5924, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.632213514550337, |
|
"grad_norm": 0.30956000847409926, |
|
"learning_rate": 4.580643369178142e-06, |
|
"loss": 0.5905, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6330903710198936, |
|
"grad_norm": 0.2980650280091205, |
|
"learning_rate": 4.579330942411115e-06, |
|
"loss": 0.5961, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6339672274894503, |
|
"grad_norm": 0.2784986194522932, |
|
"learning_rate": 4.578016653783488e-06, |
|
"loss": 0.5962, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6348440839590069, |
|
"grad_norm": 0.32816601752120567, |
|
"learning_rate": 4.57670050447209e-06, |
|
"loss": 0.6149, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6357209404285636, |
|
"grad_norm": 0.2822290286934802, |
|
"learning_rate": 4.575382495655421e-06, |
|
"loss": 0.5915, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6365977968981202, |
|
"grad_norm": 0.2993973936416954, |
|
"learning_rate": 4.574062628513643e-06, |
|
"loss": 0.59, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6374746533676768, |
|
"grad_norm": 0.27875804168057794, |
|
"learning_rate": 4.572740904228582e-06, |
|
"loss": 0.6018, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6383515098372335, |
|
"grad_norm": 0.3144256132274513, |
|
"learning_rate": 4.571417323983727e-06, |
|
"loss": 0.6056, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6392283663067901, |
|
"grad_norm": 0.2763723528672814, |
|
"learning_rate": 4.570091888964231e-06, |
|
"loss": 0.5943, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6401052227763468, |
|
"grad_norm": 0.3001278571328794, |
|
"learning_rate": 4.5687646003569055e-06, |
|
"loss": 0.588, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6409820792459034, |
|
"grad_norm": 0.2847820308061442, |
|
"learning_rate": 4.567435459350222e-06, |
|
"loss": 0.5971, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.64185893571546, |
|
"grad_norm": 0.292512543142512, |
|
"learning_rate": 4.566104467134311e-06, |
|
"loss": 0.5864, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.6427357921850168, |
|
"grad_norm": 0.28968651062565176, |
|
"learning_rate": 4.564771624900961e-06, |
|
"loss": 0.62, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6436126486545733, |
|
"grad_norm": 0.3004795852693458, |
|
"learning_rate": 4.563436933843617e-06, |
|
"loss": 0.5964, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.64448950512413, |
|
"grad_norm": 0.2865806085716862, |
|
"learning_rate": 4.562100395157379e-06, |
|
"loss": 0.6026, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6453663615936867, |
|
"grad_norm": 0.2842649974188147, |
|
"learning_rate": 4.560762010039001e-06, |
|
"loss": 0.5913, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6462432180632433, |
|
"grad_norm": 0.28683866497814775, |
|
"learning_rate": 4.5594217796868915e-06, |
|
"loss": 0.5951, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6471200745328, |
|
"grad_norm": 0.2764873070461295, |
|
"learning_rate": 4.558079705301109e-06, |
|
"loss": 0.6053, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6479969310023566, |
|
"grad_norm": 0.27004479414645, |
|
"learning_rate": 4.556735788083366e-06, |
|
"loss": 0.6039, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6488737874719132, |
|
"grad_norm": 0.29052397029213667, |
|
"learning_rate": 4.555390029237026e-06, |
|
"loss": 0.601, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6497506439414699, |
|
"grad_norm": 0.2947691340138793, |
|
"learning_rate": 4.554042429967095e-06, |
|
"loss": 0.6025, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.6506275004110265, |
|
"grad_norm": 0.2792458027197797, |
|
"learning_rate": 4.552692991480234e-06, |
|
"loss": 0.6014, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6515043568805831, |
|
"grad_norm": 0.3382217380230472, |
|
"learning_rate": 4.551341714984748e-06, |
|
"loss": 0.5955, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6523812133501398, |
|
"grad_norm": 0.2966197192699023, |
|
"learning_rate": 4.549988601690588e-06, |
|
"loss": 0.5935, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6532580698196964, |
|
"grad_norm": 0.31516646846151397, |
|
"learning_rate": 4.54863365280935e-06, |
|
"loss": 0.597, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6541349262892531, |
|
"grad_norm": 0.28496714910224397, |
|
"learning_rate": 4.547276869554272e-06, |
|
"loss": 0.5814, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6550117827588097, |
|
"grad_norm": 0.30669749001026353, |
|
"learning_rate": 4.545918253140236e-06, |
|
"loss": 0.5952, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6558886392283663, |
|
"grad_norm": 0.2812261666412913, |
|
"learning_rate": 4.544557804783765e-06, |
|
"loss": 0.6162, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.656765495697923, |
|
"grad_norm": 0.27761745178740765, |
|
"learning_rate": 4.543195525703024e-06, |
|
"loss": 0.5807, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6576423521674796, |
|
"grad_norm": 0.31002121863979637, |
|
"learning_rate": 4.541831417117815e-06, |
|
"loss": 0.5851, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6585192086370363, |
|
"grad_norm": 0.29034303454873894, |
|
"learning_rate": 4.540465480249579e-06, |
|
"loss": 0.6019, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6593960651065929, |
|
"grad_norm": 0.30559901683462565, |
|
"learning_rate": 4.539097716321394e-06, |
|
"loss": 0.5866, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6602729215761495, |
|
"grad_norm": 0.2641221990159659, |
|
"learning_rate": 4.537728126557974e-06, |
|
"loss": 0.5972, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6611497780457062, |
|
"grad_norm": 0.3227708789669896, |
|
"learning_rate": 4.536356712185668e-06, |
|
"loss": 0.5796, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6620266345152628, |
|
"grad_norm": 0.294701481555053, |
|
"learning_rate": 4.534983474432458e-06, |
|
"loss": 0.6149, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6629034909848194, |
|
"grad_norm": 0.32377533070879033, |
|
"learning_rate": 4.533608414527961e-06, |
|
"loss": 0.5891, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6637803474543761, |
|
"grad_norm": 0.3042889879699245, |
|
"learning_rate": 4.532231533703423e-06, |
|
"loss": 0.5913, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6646572039239327, |
|
"grad_norm": 0.31760559251266973, |
|
"learning_rate": 4.53085283319172e-06, |
|
"loss": 0.6096, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6655340603934894, |
|
"grad_norm": 0.3078941609749165, |
|
"learning_rate": 4.529472314227362e-06, |
|
"loss": 0.5905, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.666410916863046, |
|
"grad_norm": 0.30990175786815527, |
|
"learning_rate": 4.528089978046481e-06, |
|
"loss": 0.5991, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6672877733326026, |
|
"grad_norm": 0.32903820758007046, |
|
"learning_rate": 4.5267058258868414e-06, |
|
"loss": 0.5882, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6681646298021593, |
|
"grad_norm": 0.29452587669480845, |
|
"learning_rate": 4.52531985898783e-06, |
|
"loss": 0.5803, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6690414862717159, |
|
"grad_norm": 0.30776706716693625, |
|
"learning_rate": 4.52393207859046e-06, |
|
"loss": 0.577, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6699183427412725, |
|
"grad_norm": 0.31422641761257675, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.6018, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6707951992108292, |
|
"grad_norm": 0.3173718550935184, |
|
"learning_rate": 4.521151082272817e-06, |
|
"loss": 0.5882, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6716720556803858, |
|
"grad_norm": 0.2986562015643124, |
|
"learning_rate": 4.519757868842685e-06, |
|
"loss": 0.579, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6725489121499425, |
|
"grad_norm": 0.3090764441547647, |
|
"learning_rate": 4.518362846894475e-06, |
|
"loss": 0.5985, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6734257686194991, |
|
"grad_norm": 0.30790241933986734, |
|
"learning_rate": 4.516966017677308e-06, |
|
"loss": 0.5863, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6743026250890557, |
|
"grad_norm": 0.2994056106304016, |
|
"learning_rate": 4.515567382441923e-06, |
|
"loss": 0.5991, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6751794815586124, |
|
"grad_norm": 0.2958764046270931, |
|
"learning_rate": 4.514166942440679e-06, |
|
"loss": 0.5963, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.676056338028169, |
|
"grad_norm": 0.28788185549499157, |
|
"learning_rate": 4.512764698927545e-06, |
|
"loss": 0.6064, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6769331944977256, |
|
"grad_norm": 0.29708423016925406, |
|
"learning_rate": 4.511360653158111e-06, |
|
"loss": 0.5947, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6778100509672823, |
|
"grad_norm": 0.30991902940049315, |
|
"learning_rate": 4.509954806389577e-06, |
|
"loss": 0.5987, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6786869074368389, |
|
"grad_norm": 0.2873916475278516, |
|
"learning_rate": 4.508547159880758e-06, |
|
"loss": 0.5924, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6795637639063956, |
|
"grad_norm": 0.3007245570293541, |
|
"learning_rate": 4.50713771489208e-06, |
|
"loss": 0.6015, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6804406203759522, |
|
"grad_norm": 0.30867041078073276, |
|
"learning_rate": 4.505726472685577e-06, |
|
"loss": 0.5957, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6813174768455088, |
|
"grad_norm": 0.31345922212682475, |
|
"learning_rate": 4.504313434524894e-06, |
|
"loss": 0.6006, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6821943333150655, |
|
"grad_norm": 0.29707717549610757, |
|
"learning_rate": 4.502898601675285e-06, |
|
"loss": 0.5778, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6830711897846221, |
|
"grad_norm": 0.3796068136152165, |
|
"learning_rate": 4.501481975403611e-06, |
|
"loss": 0.5991, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6839480462541788, |
|
"grad_norm": 0.28337342976468866, |
|
"learning_rate": 4.5000635569783365e-06, |
|
"loss": 0.5948, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6848249027237354, |
|
"grad_norm": 0.31230108669893153, |
|
"learning_rate": 4.498643347669533e-06, |
|
"loss": 0.5925, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.685701759193292, |
|
"grad_norm": 0.27904331433791485, |
|
"learning_rate": 4.497221348748874e-06, |
|
"loss": 0.5916, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6865786156628487, |
|
"grad_norm": 0.2942542969448629, |
|
"learning_rate": 4.4957975614896386e-06, |
|
"loss": 0.5992, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6874554721324053, |
|
"grad_norm": 0.2908765617548673, |
|
"learning_rate": 4.494371987166703e-06, |
|
"loss": 0.6065, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6883323286019619, |
|
"grad_norm": 0.2840490179126863, |
|
"learning_rate": 4.492944627056544e-06, |
|
"loss": 0.5902, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6892091850715186, |
|
"grad_norm": 0.2727369127304506, |
|
"learning_rate": 4.491515482437242e-06, |
|
"loss": 0.5867, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6900860415410752, |
|
"grad_norm": 0.28769481832954025, |
|
"learning_rate": 4.4900845545884695e-06, |
|
"loss": 0.5922, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6909628980106319, |
|
"grad_norm": 0.2906309237155975, |
|
"learning_rate": 4.4886518447915e-06, |
|
"loss": 0.5887, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6918397544801885, |
|
"grad_norm": 0.2948842293422461, |
|
"learning_rate": 4.487217354329201e-06, |
|
"loss": 0.6006, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6927166109497451, |
|
"grad_norm": 0.302074977476922, |
|
"learning_rate": 4.4857810844860325e-06, |
|
"loss": 0.5866, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6935934674193018, |
|
"grad_norm": 0.32893770275300094, |
|
"learning_rate": 4.484343036548051e-06, |
|
"loss": 0.5976, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6944703238888584, |
|
"grad_norm": 0.2778002794834819, |
|
"learning_rate": 4.482903211802904e-06, |
|
"loss": 0.584, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.695347180358415, |
|
"grad_norm": 0.294631010190205, |
|
"learning_rate": 4.481461611539829e-06, |
|
"loss": 0.5796, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6962240368279717, |
|
"grad_norm": 0.26497721691156156, |
|
"learning_rate": 4.480018237049655e-06, |
|
"loss": 0.5921, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6971008932975283, |
|
"grad_norm": 0.2571147884128945, |
|
"learning_rate": 4.4785730896247985e-06, |
|
"loss": 0.5967, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.697977749767085, |
|
"grad_norm": 0.27928133327664356, |
|
"learning_rate": 4.477126170559262e-06, |
|
"loss": 0.5933, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6988546062366416, |
|
"grad_norm": 0.2678842819485542, |
|
"learning_rate": 4.475677481148638e-06, |
|
"loss": 0.6041, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6997314627061982, |
|
"grad_norm": 0.2891606093702898, |
|
"learning_rate": 4.474227022690102e-06, |
|
"loss": 0.5957, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.700608319175755, |
|
"grad_norm": 0.288045727848727, |
|
"learning_rate": 4.4727747964824135e-06, |
|
"loss": 0.5904, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.7014851756453115, |
|
"grad_norm": 0.31585634496103415, |
|
"learning_rate": 4.471320803825915e-06, |
|
"loss": 0.5976, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7023620321148683, |
|
"grad_norm": 0.2748185200755283, |
|
"learning_rate": 4.469865046022531e-06, |
|
"loss": 0.5752, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7032388885844248, |
|
"grad_norm": 0.3355774877957403, |
|
"learning_rate": 4.468407524375767e-06, |
|
"loss": 0.5983, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7041157450539814, |
|
"grad_norm": 0.29100988533473726, |
|
"learning_rate": 4.466948240190707e-06, |
|
"loss": 0.5942, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7049926015235382, |
|
"grad_norm": 0.32395113661904446, |
|
"learning_rate": 4.465487194774012e-06, |
|
"loss": 0.5934, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.7058694579930948, |
|
"grad_norm": 0.27010926989878575, |
|
"learning_rate": 4.464024389433924e-06, |
|
"loss": 0.5965, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7067463144626513, |
|
"grad_norm": 0.31589368881558894, |
|
"learning_rate": 4.462559825480257e-06, |
|
"loss": 0.5892, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7076231709322081, |
|
"grad_norm": 0.2696414843727876, |
|
"learning_rate": 4.461093504224401e-06, |
|
"loss": 0.5995, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7085000274017647, |
|
"grad_norm": 0.2953330107498836, |
|
"learning_rate": 4.459625426979319e-06, |
|
"loss": 0.5918, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7093768838713214, |
|
"grad_norm": 0.281894292123873, |
|
"learning_rate": 4.458155595059549e-06, |
|
"loss": 0.5955, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.710253740340878, |
|
"grad_norm": 0.27376761478776995, |
|
"learning_rate": 4.4566840097811956e-06, |
|
"loss": 0.5871, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7111305968104346, |
|
"grad_norm": 0.27713167306531405, |
|
"learning_rate": 4.455210672461938e-06, |
|
"loss": 0.595, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7120074532799913, |
|
"grad_norm": 0.27385713088626723, |
|
"learning_rate": 4.453735584421021e-06, |
|
"loss": 0.5899, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7128843097495479, |
|
"grad_norm": 0.29840396727897567, |
|
"learning_rate": 4.452258746979258e-06, |
|
"loss": 0.5844, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7137611662191045, |
|
"grad_norm": 0.28333795883109736, |
|
"learning_rate": 4.4507801614590285e-06, |
|
"loss": 0.5939, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7146380226886612, |
|
"grad_norm": 0.3089268512848077, |
|
"learning_rate": 4.449299829184278e-06, |
|
"loss": 0.5859, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7155148791582178, |
|
"grad_norm": 0.2808961599877815, |
|
"learning_rate": 4.447817751480516e-06, |
|
"loss": 0.5871, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7163917356277745, |
|
"grad_norm": 0.30287533725577037, |
|
"learning_rate": 4.446333929674816e-06, |
|
"loss": 0.593, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7172685920973311, |
|
"grad_norm": 0.30584446638710266, |
|
"learning_rate": 4.444848365095809e-06, |
|
"loss": 0.5917, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7181454485668877, |
|
"grad_norm": 0.27241453105670504, |
|
"learning_rate": 4.44336105907369e-06, |
|
"loss": 0.5896, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7190223050364444, |
|
"grad_norm": 0.36474064413319707, |
|
"learning_rate": 4.4418720129402145e-06, |
|
"loss": 0.5861, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.719899161506001, |
|
"grad_norm": 0.2832577542195539, |
|
"learning_rate": 4.4403812280286915e-06, |
|
"loss": 0.5905, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7207760179755577, |
|
"grad_norm": 0.32117553322486775, |
|
"learning_rate": 4.4388887056739926e-06, |
|
"loss": 0.5801, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7216528744451143, |
|
"grad_norm": 0.27537463782509236, |
|
"learning_rate": 4.43739444721254e-06, |
|
"loss": 0.587, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7225297309146709, |
|
"grad_norm": 0.3274304411602489, |
|
"learning_rate": 4.435898453982313e-06, |
|
"loss": 0.6024, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7234065873842276, |
|
"grad_norm": 0.3232032167824163, |
|
"learning_rate": 4.434400727322844e-06, |
|
"loss": 0.6145, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7242834438537842, |
|
"grad_norm": 0.3431783037261662, |
|
"learning_rate": 4.432901268575218e-06, |
|
"loss": 0.5937, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7251603003233408, |
|
"grad_norm": 0.30897032551229503, |
|
"learning_rate": 4.43140007908207e-06, |
|
"loss": 0.598, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7260371567928975, |
|
"grad_norm": 0.2934772547759602, |
|
"learning_rate": 4.429897160187584e-06, |
|
"loss": 0.5918, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7269140132624541, |
|
"grad_norm": 0.31389790755569874, |
|
"learning_rate": 4.4283925132374946e-06, |
|
"loss": 0.5832, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7277908697320108, |
|
"grad_norm": 0.29548260652561004, |
|
"learning_rate": 4.426886139579083e-06, |
|
"loss": 0.5937, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7286677262015674, |
|
"grad_norm": 0.3162599265610075, |
|
"learning_rate": 4.425378040561175e-06, |
|
"loss": 0.5889, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.729544582671124, |
|
"grad_norm": 0.3057143041654656, |
|
"learning_rate": 4.423868217534144e-06, |
|
"loss": 0.5848, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7304214391406807, |
|
"grad_norm": 0.29540394945672244, |
|
"learning_rate": 4.4223566718499055e-06, |
|
"loss": 0.5926, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7312982956102373, |
|
"grad_norm": 0.30681513325771914, |
|
"learning_rate": 4.420843404861917e-06, |
|
"loss": 0.5838, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7321751520797939, |
|
"grad_norm": 0.29780757398255076, |
|
"learning_rate": 4.419328417925177e-06, |
|
"loss": 0.5922, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7330520085493506, |
|
"grad_norm": 0.28283439818927025, |
|
"learning_rate": 4.417811712396226e-06, |
|
"loss": 0.5875, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7339288650189072, |
|
"grad_norm": 0.30029201304931724, |
|
"learning_rate": 4.416293289633144e-06, |
|
"loss": 0.5989, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7348057214884639, |
|
"grad_norm": 0.29188774973524867, |
|
"learning_rate": 4.414773150995543e-06, |
|
"loss": 0.5878, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7356825779580205, |
|
"grad_norm": 0.3037257039566602, |
|
"learning_rate": 4.413251297844579e-06, |
|
"loss": 0.5849, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7365594344275771, |
|
"grad_norm": 0.31802355671271254, |
|
"learning_rate": 4.411727731542937e-06, |
|
"loss": 0.5873, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7374362908971338, |
|
"grad_norm": 0.31892860544931334, |
|
"learning_rate": 4.410202453454841e-06, |
|
"loss": 0.5784, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7383131473666904, |
|
"grad_norm": 0.31731371407494563, |
|
"learning_rate": 4.408675464946043e-06, |
|
"loss": 0.5973, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.739190003836247, |
|
"grad_norm": 0.2807004884396655, |
|
"learning_rate": 4.40714676738383e-06, |
|
"loss": 0.5842, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.7400668603058037, |
|
"grad_norm": 0.3102700515568577, |
|
"learning_rate": 4.405616362137017e-06, |
|
"loss": 0.584, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7409437167753603, |
|
"grad_norm": 0.28221217756766914, |
|
"learning_rate": 4.404084250575952e-06, |
|
"loss": 0.599, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.741820573244917, |
|
"grad_norm": 0.284085524365953, |
|
"learning_rate": 4.4025504340725056e-06, |
|
"loss": 0.5799, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.7426974297144736, |
|
"grad_norm": 0.35367792241463614, |
|
"learning_rate": 4.401014914000078e-06, |
|
"loss": 0.5724, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7435742861840302, |
|
"grad_norm": 0.26695572041406385, |
|
"learning_rate": 4.3994776917335945e-06, |
|
"loss": 0.5864, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7444511426535869, |
|
"grad_norm": 0.3230503614090004, |
|
"learning_rate": 4.397938768649505e-06, |
|
"loss": 0.5781, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.7453279991231435, |
|
"grad_norm": 0.32670313161244324, |
|
"learning_rate": 4.39639814612578e-06, |
|
"loss": 0.5921, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7462048555927002, |
|
"grad_norm": 0.2965265275169285, |
|
"learning_rate": 4.394855825541915e-06, |
|
"loss": 0.5847, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7470817120622568, |
|
"grad_norm": 0.3364787473225747, |
|
"learning_rate": 4.393311808278924e-06, |
|
"loss": 0.6032, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.7479585685318134, |
|
"grad_norm": 0.2925797984612242, |
|
"learning_rate": 4.391766095719341e-06, |
|
"loss": 0.5966, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.7488354250013701, |
|
"grad_norm": 0.36558987387215064, |
|
"learning_rate": 4.390218689247216e-06, |
|
"loss": 0.5965, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.7497122814709267, |
|
"grad_norm": 0.31214927998435166, |
|
"learning_rate": 4.388669590248119e-06, |
|
"loss": 0.5799, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7505891379404833, |
|
"grad_norm": 0.36912682982458045, |
|
"learning_rate": 4.387118800109133e-06, |
|
"loss": 0.5994, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.75146599441004, |
|
"grad_norm": 0.33858825867324854, |
|
"learning_rate": 4.385566320218857e-06, |
|
"loss": 0.5894, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.7523428508795966, |
|
"grad_norm": 0.3095865037795698, |
|
"learning_rate": 4.384012151967401e-06, |
|
"loss": 0.5808, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.7532197073491533, |
|
"grad_norm": 0.3163720033341599, |
|
"learning_rate": 4.382456296746389e-06, |
|
"loss": 0.61, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.7540965638187099, |
|
"grad_norm": 0.30746322298068, |
|
"learning_rate": 4.3808987559489536e-06, |
|
"loss": 0.5901, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7549734202882665, |
|
"grad_norm": 0.3216332568956709, |
|
"learning_rate": 4.379339530969738e-06, |
|
"loss": 0.5824, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.7558502767578232, |
|
"grad_norm": 0.2924396456503393, |
|
"learning_rate": 4.377778623204894e-06, |
|
"loss": 0.587, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7567271332273798, |
|
"grad_norm": 0.3102518126275497, |
|
"learning_rate": 4.3762160340520765e-06, |
|
"loss": 0.5722, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7576039896969364, |
|
"grad_norm": 0.29990520801248277, |
|
"learning_rate": 4.374651764910452e-06, |
|
"loss": 0.5867, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.7584808461664931, |
|
"grad_norm": 0.2742400854190758, |
|
"learning_rate": 4.373085817180684e-06, |
|
"loss": 0.5897, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7593577026360497, |
|
"grad_norm": 0.2966143324054175, |
|
"learning_rate": 4.371518192264946e-06, |
|
"loss": 0.593, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.7602345591056064, |
|
"grad_norm": 0.2659050257990803, |
|
"learning_rate": 4.3699488915669106e-06, |
|
"loss": 0.5933, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.761111415575163, |
|
"grad_norm": 0.28333909213084835, |
|
"learning_rate": 4.368377916491749e-06, |
|
"loss": 0.5937, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7619882720447196, |
|
"grad_norm": 0.294367790561846, |
|
"learning_rate": 4.366805268446132e-06, |
|
"loss": 0.5908, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.7628651285142763, |
|
"grad_norm": 0.2892104769841804, |
|
"learning_rate": 4.365230948838232e-06, |
|
"loss": 0.5749, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7637419849838329, |
|
"grad_norm": 0.2992157610185369, |
|
"learning_rate": 4.3636549590777144e-06, |
|
"loss": 0.6038, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.7646188414533897, |
|
"grad_norm": 0.2849149162166013, |
|
"learning_rate": 4.362077300575742e-06, |
|
"loss": 0.5838, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.7654956979229462, |
|
"grad_norm": 0.27419838720395556, |
|
"learning_rate": 4.360497974744971e-06, |
|
"loss": 0.5792, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.7663725543925028, |
|
"grad_norm": 0.2719357502719954, |
|
"learning_rate": 4.35891698299955e-06, |
|
"loss": 0.5879, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.7672494108620596, |
|
"grad_norm": 0.29276621658420166, |
|
"learning_rate": 4.357334326755123e-06, |
|
"loss": 0.5903, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7681262673316162, |
|
"grad_norm": 0.29234711934765684, |
|
"learning_rate": 4.3557500074288175e-06, |
|
"loss": 0.58, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7690031238011727, |
|
"grad_norm": 0.2900743371372321, |
|
"learning_rate": 4.354164026439256e-06, |
|
"loss": 0.5798, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7698799802707295, |
|
"grad_norm": 0.26606697197934875, |
|
"learning_rate": 4.352576385206547e-06, |
|
"loss": 0.6049, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.770756836740286, |
|
"grad_norm": 0.30681607920100556, |
|
"learning_rate": 4.350987085152286e-06, |
|
"loss": 0.5963, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7716336932098428, |
|
"grad_norm": 0.28024451945836265, |
|
"learning_rate": 4.349396127699552e-06, |
|
"loss": 0.6063, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7725105496793994, |
|
"grad_norm": 0.284435176139814, |
|
"learning_rate": 4.347803514272911e-06, |
|
"loss": 0.5847, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.773387406148956, |
|
"grad_norm": 0.2787875052171573, |
|
"learning_rate": 4.34620924629841e-06, |
|
"loss": 0.5909, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7742642626185127, |
|
"grad_norm": 0.28222554386796406, |
|
"learning_rate": 4.344613325203577e-06, |
|
"loss": 0.5815, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7751411190880693, |
|
"grad_norm": 0.30850175508825417, |
|
"learning_rate": 4.343015752417421e-06, |
|
"loss": 0.5761, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7760179755576259, |
|
"grad_norm": 0.27711497578948074, |
|
"learning_rate": 4.341416529370431e-06, |
|
"loss": 0.5851, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7768948320271826, |
|
"grad_norm": 0.2945928621135004, |
|
"learning_rate": 4.339815657494571e-06, |
|
"loss": 0.5922, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7777716884967392, |
|
"grad_norm": 0.2843169638684151, |
|
"learning_rate": 4.338213138223285e-06, |
|
"loss": 0.5835, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7786485449662959, |
|
"grad_norm": 0.2840612846899258, |
|
"learning_rate": 4.336608972991489e-06, |
|
"loss": 0.596, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7795254014358525, |
|
"grad_norm": 0.2677194609487142, |
|
"learning_rate": 4.335003163235574e-06, |
|
"loss": 0.5794, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7804022579054091, |
|
"grad_norm": 0.31211329913480695, |
|
"learning_rate": 4.3333957103934025e-06, |
|
"loss": 0.5765, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7812791143749658, |
|
"grad_norm": 0.28583623636409483, |
|
"learning_rate": 4.33178661590431e-06, |
|
"loss": 0.6016, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7821559708445224, |
|
"grad_norm": 0.31500304190137224, |
|
"learning_rate": 4.330175881209102e-06, |
|
"loss": 0.5877, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.783032827314079, |
|
"grad_norm": 0.2811796495740926, |
|
"learning_rate": 4.32856350775005e-06, |
|
"loss": 0.5881, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7839096837836357, |
|
"grad_norm": 0.29273259848443445, |
|
"learning_rate": 4.3269494969708954e-06, |
|
"loss": 0.5921, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7847865402531923, |
|
"grad_norm": 0.27373150864211443, |
|
"learning_rate": 4.325333850316846e-06, |
|
"loss": 0.6, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.785663396722749, |
|
"grad_norm": 0.3128309122282222, |
|
"learning_rate": 4.323716569234572e-06, |
|
"loss": 0.5904, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7865402531923056, |
|
"grad_norm": 0.2825745062634813, |
|
"learning_rate": 4.32209765517221e-06, |
|
"loss": 0.5816, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7874171096618622, |
|
"grad_norm": 0.3282727674741808, |
|
"learning_rate": 4.320477109579354e-06, |
|
"loss": 0.5882, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7882939661314189, |
|
"grad_norm": 0.2940095641373108, |
|
"learning_rate": 4.318854933907065e-06, |
|
"loss": 0.5985, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7891708226009755, |
|
"grad_norm": 0.31182474508449737, |
|
"learning_rate": 4.317231129607859e-06, |
|
"loss": 0.5843, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7900476790705322, |
|
"grad_norm": 0.26489892008261595, |
|
"learning_rate": 4.315605698135714e-06, |
|
"loss": 0.591, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7909245355400888, |
|
"grad_norm": 0.32933790566988397, |
|
"learning_rate": 4.313978640946061e-06, |
|
"loss": 0.5826, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7918013920096454, |
|
"grad_norm": 0.2790564068544957, |
|
"learning_rate": 4.312349959495791e-06, |
|
"loss": 0.5897, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7926782484792021, |
|
"grad_norm": 0.29278849432785253, |
|
"learning_rate": 4.310719655243243e-06, |
|
"loss": 0.5929, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7935551049487587, |
|
"grad_norm": 0.2898094197798441, |
|
"learning_rate": 4.309087729648217e-06, |
|
"loss": 0.575, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7944319614183153, |
|
"grad_norm": 0.2962974584908221, |
|
"learning_rate": 4.30745418417196e-06, |
|
"loss": 0.5874, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.795308817887872, |
|
"grad_norm": 0.2894965323690623, |
|
"learning_rate": 4.305819020277169e-06, |
|
"loss": 0.5769, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7961856743574286, |
|
"grad_norm": 0.2744231484838131, |
|
"learning_rate": 4.304182239427992e-06, |
|
"loss": 0.5943, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7970625308269853, |
|
"grad_norm": 0.2766245048172803, |
|
"learning_rate": 4.302543843090026e-06, |
|
"loss": 0.5814, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7979393872965419, |
|
"grad_norm": 0.2842673020480384, |
|
"learning_rate": 4.30090383273031e-06, |
|
"loss": 0.5912, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7988162437660985, |
|
"grad_norm": 0.28199584242917014, |
|
"learning_rate": 4.2992622098173335e-06, |
|
"loss": 0.5809, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7996931002356552, |
|
"grad_norm": 0.2820675876804688, |
|
"learning_rate": 4.297618975821027e-06, |
|
"loss": 0.5917, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.8005699567052118, |
|
"grad_norm": 0.2728605500328137, |
|
"learning_rate": 4.2959741322127635e-06, |
|
"loss": 0.5764, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.8014468131747684, |
|
"grad_norm": 0.27169399222059704, |
|
"learning_rate": 4.294327680465358e-06, |
|
"loss": 0.5849, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.8023236696443251, |
|
"grad_norm": 0.28063665744680427, |
|
"learning_rate": 4.292679622053066e-06, |
|
"loss": 0.58, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8032005261138817, |
|
"grad_norm": 0.25926421536726935, |
|
"learning_rate": 4.29102995845158e-06, |
|
"loss": 0.5787, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.8040773825834384, |
|
"grad_norm": 0.29001417666592577, |
|
"learning_rate": 4.289378691138032e-06, |
|
"loss": 0.5868, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.804954239052995, |
|
"grad_norm": 0.27215185007216747, |
|
"learning_rate": 4.287725821590987e-06, |
|
"loss": 0.5894, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.8058310955225516, |
|
"grad_norm": 0.3050881231274449, |
|
"learning_rate": 4.286071351290447e-06, |
|
"loss": 0.5911, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.8067079519921083, |
|
"grad_norm": 0.2873456207891206, |
|
"learning_rate": 4.2844152817178476e-06, |
|
"loss": 0.5835, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8075848084616649, |
|
"grad_norm": 0.2626365139918821, |
|
"learning_rate": 4.282757614356055e-06, |
|
"loss": 0.5794, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8084616649312216, |
|
"grad_norm": 0.28122583577721894, |
|
"learning_rate": 4.281098350689367e-06, |
|
"loss": 0.581, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8093385214007782, |
|
"grad_norm": 0.2955727164056087, |
|
"learning_rate": 4.279437492203509e-06, |
|
"loss": 0.6024, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8102153778703348, |
|
"grad_norm": 0.2928465088558078, |
|
"learning_rate": 4.277775040385636e-06, |
|
"loss": 0.5777, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8110922343398915, |
|
"grad_norm": 0.279748286657514, |
|
"learning_rate": 4.276110996724332e-06, |
|
"loss": 0.5983, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8119690908094481, |
|
"grad_norm": 0.3064104243975942, |
|
"learning_rate": 4.274445362709602e-06, |
|
"loss": 0.5959, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.8128459472790047, |
|
"grad_norm": 0.2705400124701495, |
|
"learning_rate": 4.272778139832876e-06, |
|
"loss": 0.5964, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.8137228037485614, |
|
"grad_norm": 0.3030828027995252, |
|
"learning_rate": 4.271109329587009e-06, |
|
"loss": 0.5784, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.814599660218118, |
|
"grad_norm": 0.2629159770264448, |
|
"learning_rate": 4.2694389334662745e-06, |
|
"loss": 0.5845, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8154765166876747, |
|
"grad_norm": 0.3351422353981342, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.5949, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8163533731572313, |
|
"grad_norm": 0.2760441532769009, |
|
"learning_rate": 4.2660933895844055e-06, |
|
"loss": 0.5904, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8172302296267879, |
|
"grad_norm": 0.30558832310943446, |
|
"learning_rate": 4.264418244818914e-06, |
|
"loss": 0.5839, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8181070860963446, |
|
"grad_norm": 0.28070458613560756, |
|
"learning_rate": 4.262741520169844e-06, |
|
"loss": 0.5791, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8189839425659012, |
|
"grad_norm": 0.2735766456330096, |
|
"learning_rate": 4.261063217138554e-06, |
|
"loss": 0.5836, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8198607990354578, |
|
"grad_norm": 0.3038178849716158, |
|
"learning_rate": 4.259383337227821e-06, |
|
"loss": 0.5885, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8207376555050145, |
|
"grad_norm": 0.26590487432268695, |
|
"learning_rate": 4.25770188194183e-06, |
|
"loss": 0.6035, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.8216145119745711, |
|
"grad_norm": 0.31271672720672494, |
|
"learning_rate": 4.25601885278618e-06, |
|
"loss": 0.5926, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8224913684441278, |
|
"grad_norm": 0.26261561071530615, |
|
"learning_rate": 4.254334251267877e-06, |
|
"loss": 0.5996, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8233682249136844, |
|
"grad_norm": 0.2891665251939073, |
|
"learning_rate": 4.252648078895336e-06, |
|
"loss": 0.5876, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.824245081383241, |
|
"grad_norm": 0.2897735311167941, |
|
"learning_rate": 4.2509603371783776e-06, |
|
"loss": 0.5892, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8251219378527977, |
|
"grad_norm": 0.28026024666883764, |
|
"learning_rate": 4.249271027628228e-06, |
|
"loss": 0.587, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8259987943223543, |
|
"grad_norm": 0.2765283292737123, |
|
"learning_rate": 4.24758015175752e-06, |
|
"loss": 0.5769, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.826875650791911, |
|
"grad_norm": 0.2921232680301083, |
|
"learning_rate": 4.245887711080283e-06, |
|
"loss": 0.5854, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8277525072614677, |
|
"grad_norm": 0.3005072830624817, |
|
"learning_rate": 4.2441937071119524e-06, |
|
"loss": 0.5802, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8286293637310242, |
|
"grad_norm": 0.27059131939602343, |
|
"learning_rate": 4.242498141369361e-06, |
|
"loss": 0.5837, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.829506220200581, |
|
"grad_norm": 0.3038588097565146, |
|
"learning_rate": 4.240801015370743e-06, |
|
"loss": 0.5869, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8303830766701376, |
|
"grad_norm": 0.31875741653821127, |
|
"learning_rate": 4.239102330635726e-06, |
|
"loss": 0.5836, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.8312599331396942, |
|
"grad_norm": 0.26475770270890336, |
|
"learning_rate": 4.2374020886853354e-06, |
|
"loss": 0.5796, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8321367896092509, |
|
"grad_norm": 0.31635648581412845, |
|
"learning_rate": 4.235700291041989e-06, |
|
"loss": 0.5732, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.8330136460788075, |
|
"grad_norm": 0.27123635854757305, |
|
"learning_rate": 4.233996939229502e-06, |
|
"loss": 0.5977, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8338905025483642, |
|
"grad_norm": 0.3356358824197267, |
|
"learning_rate": 4.232292034773076e-06, |
|
"loss": 0.5871, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.8347673590179208, |
|
"grad_norm": 0.2723531290949244, |
|
"learning_rate": 4.230585579199306e-06, |
|
"loss": 0.5916, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.8356442154874774, |
|
"grad_norm": 0.2975424730057694, |
|
"learning_rate": 4.228877574036175e-06, |
|
"loss": 0.592, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.8365210719570341, |
|
"grad_norm": 0.28108527975014536, |
|
"learning_rate": 4.227168020813053e-06, |
|
"loss": 0.5788, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.8373979284265907, |
|
"grad_norm": 0.26358656072328285, |
|
"learning_rate": 4.225456921060698e-06, |
|
"loss": 0.5728, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8382747848961473, |
|
"grad_norm": 0.2793044648839571, |
|
"learning_rate": 4.223744276311249e-06, |
|
"loss": 0.5714, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.839151641365704, |
|
"grad_norm": 0.30214577120239683, |
|
"learning_rate": 4.222030088098233e-06, |
|
"loss": 0.5993, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.8400284978352606, |
|
"grad_norm": 0.2639515397393347, |
|
"learning_rate": 4.220314357956557e-06, |
|
"loss": 0.5994, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.8409053543048173, |
|
"grad_norm": 0.3298154347341819, |
|
"learning_rate": 4.218597087422508e-06, |
|
"loss": 0.5877, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.8417822107743739, |
|
"grad_norm": 0.28203599665081885, |
|
"learning_rate": 4.216878278033753e-06, |
|
"loss": 0.5865, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8426590672439305, |
|
"grad_norm": 0.2746406409148874, |
|
"learning_rate": 4.2151579313293364e-06, |
|
"loss": 0.5881, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.8435359237134872, |
|
"grad_norm": 0.33875497622714734, |
|
"learning_rate": 4.2134360488496804e-06, |
|
"loss": 0.6029, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.8444127801830438, |
|
"grad_norm": 0.2875141188036911, |
|
"learning_rate": 4.211712632136581e-06, |
|
"loss": 0.5845, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.8452896366526004, |
|
"grad_norm": 0.32374197566257723, |
|
"learning_rate": 4.209987682733207e-06, |
|
"loss": 0.589, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.8461664931221571, |
|
"grad_norm": 0.26718900480287466, |
|
"learning_rate": 4.208261202184104e-06, |
|
"loss": 0.5844, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8470433495917137, |
|
"grad_norm": 0.29759515513279916, |
|
"learning_rate": 4.206533192035184e-06, |
|
"loss": 0.5817, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.8479202060612704, |
|
"grad_norm": 0.28330165664862006, |
|
"learning_rate": 4.20480365383373e-06, |
|
"loss": 0.5853, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.848797062530827, |
|
"grad_norm": 0.26991723910735316, |
|
"learning_rate": 4.203072589128394e-06, |
|
"loss": 0.5847, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.8496739190003836, |
|
"grad_norm": 0.28120405866784015, |
|
"learning_rate": 4.201339999469194e-06, |
|
"loss": 0.5771, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.8505507754699403, |
|
"grad_norm": 0.29731566030764794, |
|
"learning_rate": 4.199605886407515e-06, |
|
"loss": 0.5872, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8514276319394969, |
|
"grad_norm": 0.29823098898704575, |
|
"learning_rate": 4.197870251496104e-06, |
|
"loss": 0.585, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.8523044884090536, |
|
"grad_norm": 0.29246400163730035, |
|
"learning_rate": 4.196133096289071e-06, |
|
"loss": 0.5728, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.8531813448786102, |
|
"grad_norm": 0.31038345035918974, |
|
"learning_rate": 4.194394422341888e-06, |
|
"loss": 0.588, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.8540582013481668, |
|
"grad_norm": 0.29419655403066824, |
|
"learning_rate": 4.192654231211389e-06, |
|
"loss": 0.5802, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.8549350578177235, |
|
"grad_norm": 0.28924212129082133, |
|
"learning_rate": 4.190912524455762e-06, |
|
"loss": 0.5957, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8558119142872801, |
|
"grad_norm": 0.3433724407789192, |
|
"learning_rate": 4.189169303634555e-06, |
|
"loss": 0.5943, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.8566887707568367, |
|
"grad_norm": 0.3447246872111939, |
|
"learning_rate": 4.187424570308671e-06, |
|
"loss": 0.5679, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.8575656272263934, |
|
"grad_norm": 0.2717297839127488, |
|
"learning_rate": 4.185678326040369e-06, |
|
"loss": 0.5839, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.85844248369595, |
|
"grad_norm": 0.3149777108439808, |
|
"learning_rate": 4.1839305723932565e-06, |
|
"loss": 0.5684, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.8593193401655067, |
|
"grad_norm": 0.3196280126814673, |
|
"learning_rate": 4.1821813109322975e-06, |
|
"loss": 0.5845, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8601961966350633, |
|
"grad_norm": 0.3166850113740036, |
|
"learning_rate": 4.180430543223803e-06, |
|
"loss": 0.5722, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.8610730531046199, |
|
"grad_norm": 0.30727325041845543, |
|
"learning_rate": 4.178678270835435e-06, |
|
"loss": 0.582, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.8619499095741766, |
|
"grad_norm": 0.34738075452538025, |
|
"learning_rate": 4.1769244953361995e-06, |
|
"loss": 0.5789, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.8628267660437332, |
|
"grad_norm": 0.3029018585056203, |
|
"learning_rate": 4.1751692182964524e-06, |
|
"loss": 0.5906, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.8637036225132898, |
|
"grad_norm": 0.27172806950560857, |
|
"learning_rate": 4.1734124412878915e-06, |
|
"loss": 0.5864, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8645804789828465, |
|
"grad_norm": 0.3078626255245488, |
|
"learning_rate": 4.171654165883558e-06, |
|
"loss": 0.5961, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.8654573354524031, |
|
"grad_norm": 0.28755523271585887, |
|
"learning_rate": 4.169894393657834e-06, |
|
"loss": 0.5881, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.8663341919219598, |
|
"grad_norm": 0.3081436303822685, |
|
"learning_rate": 4.168133126186445e-06, |
|
"loss": 0.5818, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.8672110483915164, |
|
"grad_norm": 0.2785218381541765, |
|
"learning_rate": 4.166370365046452e-06, |
|
"loss": 0.5828, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.868087904861073, |
|
"grad_norm": 0.3391784184001714, |
|
"learning_rate": 4.164606111816256e-06, |
|
"loss": 0.5867, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8689647613306297, |
|
"grad_norm": 0.27636992919331915, |
|
"learning_rate": 4.162840368075591e-06, |
|
"loss": 0.599, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.8698416178001863, |
|
"grad_norm": 0.28517927301055196, |
|
"learning_rate": 4.161073135405529e-06, |
|
"loss": 0.5831, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.870718474269743, |
|
"grad_norm": 0.29490820494014364, |
|
"learning_rate": 4.1593044153884745e-06, |
|
"loss": 0.5757, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.8715953307392996, |
|
"grad_norm": 0.2780476402469785, |
|
"learning_rate": 4.157534209608161e-06, |
|
"loss": 0.5964, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.8724721872088562, |
|
"grad_norm": 0.29068689725516644, |
|
"learning_rate": 4.155762519649654e-06, |
|
"loss": 0.5805, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8733490436784129, |
|
"grad_norm": 0.26095614944942314, |
|
"learning_rate": 4.15398934709935e-06, |
|
"loss": 0.5841, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.8742259001479695, |
|
"grad_norm": 0.31389428529448765, |
|
"learning_rate": 4.1522146935449705e-06, |
|
"loss": 0.5846, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.8751027566175261, |
|
"grad_norm": 0.26816106638671405, |
|
"learning_rate": 4.150438560575563e-06, |
|
"loss": 0.5833, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.8759796130870828, |
|
"grad_norm": 0.31604277041792156, |
|
"learning_rate": 4.1486609497815025e-06, |
|
"loss": 0.5888, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.8768564695566394, |
|
"grad_norm": 0.3606037237047822, |
|
"learning_rate": 4.146881862754485e-06, |
|
"loss": 0.5942, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8777333260261961, |
|
"grad_norm": 0.28543513756367406, |
|
"learning_rate": 4.145101301087527e-06, |
|
"loss": 0.5915, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8786101824957527, |
|
"grad_norm": 0.3462271962536017, |
|
"learning_rate": 4.143319266374969e-06, |
|
"loss": 0.5942, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8794870389653093, |
|
"grad_norm": 0.2833352289445499, |
|
"learning_rate": 4.141535760212467e-06, |
|
"loss": 0.5863, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.880363895434866, |
|
"grad_norm": 0.35489814354695126, |
|
"learning_rate": 4.139750784196998e-06, |
|
"loss": 0.5924, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8812407519044226, |
|
"grad_norm": 0.2942335535458572, |
|
"learning_rate": 4.137964339926852e-06, |
|
"loss": 0.5892, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8821176083739792, |
|
"grad_norm": 0.32828822885224784, |
|
"learning_rate": 4.136176429001634e-06, |
|
"loss": 0.5909, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8829944648435359, |
|
"grad_norm": 0.3123727759868493, |
|
"learning_rate": 4.134387053022266e-06, |
|
"loss": 0.5845, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.8838713213130925, |
|
"grad_norm": 0.2862421766790686, |
|
"learning_rate": 4.132596213590977e-06, |
|
"loss": 0.5848, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8847481777826492, |
|
"grad_norm": 0.32232750817039807, |
|
"learning_rate": 4.1308039123113084e-06, |
|
"loss": 0.5869, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8856250342522058, |
|
"grad_norm": 0.28776404090006724, |
|
"learning_rate": 4.129010150788112e-06, |
|
"loss": 0.5992, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8865018907217624, |
|
"grad_norm": 0.3257967217812331, |
|
"learning_rate": 4.127214930627545e-06, |
|
"loss": 0.5828, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8873787471913192, |
|
"grad_norm": 0.3065300730664574, |
|
"learning_rate": 4.125418253437071e-06, |
|
"loss": 0.578, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8882556036608757, |
|
"grad_norm": 0.29218143100925903, |
|
"learning_rate": 4.123620120825459e-06, |
|
"loss": 0.5939, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8891324601304323, |
|
"grad_norm": 0.28565794045128473, |
|
"learning_rate": 4.121820534402781e-06, |
|
"loss": 0.5868, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.890009316599989, |
|
"grad_norm": 0.30898296228273797, |
|
"learning_rate": 4.120019495780412e-06, |
|
"loss": 0.582, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8908861730695457, |
|
"grad_norm": 0.2911662733325922, |
|
"learning_rate": 4.118217006571023e-06, |
|
"loss": 0.5923, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8917630295391024, |
|
"grad_norm": 0.2843342810887561, |
|
"learning_rate": 4.116413068388589e-06, |
|
"loss": 0.5754, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.892639886008659, |
|
"grad_norm": 0.334401955522752, |
|
"learning_rate": 4.11460768284838e-06, |
|
"loss": 0.5895, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8935167424782156, |
|
"grad_norm": 0.2600873368987441, |
|
"learning_rate": 4.11280085156696e-06, |
|
"loss": 0.5858, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8943935989477723, |
|
"grad_norm": 0.3051388251322737, |
|
"learning_rate": 4.110992576162193e-06, |
|
"loss": 0.5861, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8952704554173289, |
|
"grad_norm": 0.30230682759222505, |
|
"learning_rate": 4.109182858253231e-06, |
|
"loss": 0.5857, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8961473118868856, |
|
"grad_norm": 0.27145584987414345, |
|
"learning_rate": 4.107371699460521e-06, |
|
"loss": 0.5827, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8970241683564422, |
|
"grad_norm": 0.2886096599363367, |
|
"learning_rate": 4.1055591014057964e-06, |
|
"loss": 0.5732, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8979010248259988, |
|
"grad_norm": 0.2643618798342576, |
|
"learning_rate": 4.103745065712083e-06, |
|
"loss": 0.581, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8987778812955555, |
|
"grad_norm": 0.27612674007258925, |
|
"learning_rate": 4.101929594003694e-06, |
|
"loss": 0.5774, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8996547377651121, |
|
"grad_norm": 0.2694404941538916, |
|
"learning_rate": 4.100112687906224e-06, |
|
"loss": 0.5792, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.9005315942346687, |
|
"grad_norm": 0.26812897420311116, |
|
"learning_rate": 4.098294349046556e-06, |
|
"loss": 0.5945, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.9014084507042254, |
|
"grad_norm": 0.2744007605554886, |
|
"learning_rate": 4.0964745790528564e-06, |
|
"loss": 0.5712, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.902285307173782, |
|
"grad_norm": 0.2614641549143825, |
|
"learning_rate": 4.09465337955457e-06, |
|
"loss": 0.5756, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.9031621636433387, |
|
"grad_norm": 0.25643605179903173, |
|
"learning_rate": 4.092830752182423e-06, |
|
"loss": 0.593, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9040390201128953, |
|
"grad_norm": 0.26698048225450505, |
|
"learning_rate": 4.091006698568419e-06, |
|
"loss": 0.5877, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.9049158765824519, |
|
"grad_norm": 0.2655671129093472, |
|
"learning_rate": 4.0891812203458425e-06, |
|
"loss": 0.5701, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.9057927330520086, |
|
"grad_norm": 0.2706223562384906, |
|
"learning_rate": 4.08735431914925e-06, |
|
"loss": 0.5818, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.9066695895215652, |
|
"grad_norm": 0.26684323937974636, |
|
"learning_rate": 4.085525996614472e-06, |
|
"loss": 0.5878, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.9075464459911218, |
|
"grad_norm": 0.24564951471442678, |
|
"learning_rate": 4.083696254378615e-06, |
|
"loss": 0.5967, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9084233024606785, |
|
"grad_norm": 0.2761933648093443, |
|
"learning_rate": 4.081865094080053e-06, |
|
"loss": 0.576, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9093001589302351, |
|
"grad_norm": 0.2722027493749199, |
|
"learning_rate": 4.080032517358431e-06, |
|
"loss": 0.579, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.9101770153997918, |
|
"grad_norm": 0.5039307385586534, |
|
"learning_rate": 4.078198525854664e-06, |
|
"loss": 0.5943, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9110538718693484, |
|
"grad_norm": 0.26519176650439175, |
|
"learning_rate": 4.0763631212109315e-06, |
|
"loss": 0.5893, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.911930728338905, |
|
"grad_norm": 0.2644411261920598, |
|
"learning_rate": 4.074526305070679e-06, |
|
"loss": 0.5791, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9128075848084617, |
|
"grad_norm": 0.27917354228958563, |
|
"learning_rate": 4.072688079078616e-06, |
|
"loss": 0.5847, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.9136844412780183, |
|
"grad_norm": 0.27274252297201695, |
|
"learning_rate": 4.070848444880716e-06, |
|
"loss": 0.5695, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.914561297747575, |
|
"grad_norm": 0.26541238057197397, |
|
"learning_rate": 4.06900740412421e-06, |
|
"loss": 0.5858, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.9154381542171316, |
|
"grad_norm": 0.2687466193673103, |
|
"learning_rate": 4.0671649584575925e-06, |
|
"loss": 0.5832, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9163150106866882, |
|
"grad_norm": 0.27584447196087264, |
|
"learning_rate": 4.065321109530612e-06, |
|
"loss": 0.5828, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9171918671562449, |
|
"grad_norm": 0.27618254494046185, |
|
"learning_rate": 4.063475858994276e-06, |
|
"loss": 0.5829, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9180687236258015, |
|
"grad_norm": 0.2800627797716068, |
|
"learning_rate": 4.061629208500847e-06, |
|
"loss": 0.5813, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.9189455800953581, |
|
"grad_norm": 0.2731973027581407, |
|
"learning_rate": 4.059781159703839e-06, |
|
"loss": 0.5907, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.9198224365649148, |
|
"grad_norm": 0.2817329916742434, |
|
"learning_rate": 4.057931714258022e-06, |
|
"loss": 0.5845, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9206992930344714, |
|
"grad_norm": 0.2624010665247189, |
|
"learning_rate": 4.056080873819412e-06, |
|
"loss": 0.579, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9215761495040281, |
|
"grad_norm": 0.26121937584936983, |
|
"learning_rate": 4.054228640045275e-06, |
|
"loss": 0.5857, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.9224530059735847, |
|
"grad_norm": 0.2832895486337394, |
|
"learning_rate": 4.052375014594129e-06, |
|
"loss": 0.5957, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.9233298624431413, |
|
"grad_norm": 0.27671228904328893, |
|
"learning_rate": 4.0505199991257325e-06, |
|
"loss": 0.5791, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.924206718912698, |
|
"grad_norm": 0.266998502123574, |
|
"learning_rate": 4.048663595301093e-06, |
|
"loss": 0.5896, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.9250835753822546, |
|
"grad_norm": 0.3094016546060802, |
|
"learning_rate": 4.046805804782456e-06, |
|
"loss": 0.5788, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9259604318518112, |
|
"grad_norm": 0.2782662002801493, |
|
"learning_rate": 4.0449466292333166e-06, |
|
"loss": 0.5888, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.9268372883213679, |
|
"grad_norm": 0.27821869081922773, |
|
"learning_rate": 4.043086070318401e-06, |
|
"loss": 0.5879, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.9277141447909245, |
|
"grad_norm": 0.32143887759720546, |
|
"learning_rate": 4.04122412970368e-06, |
|
"loss": 0.5884, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.9285910012604812, |
|
"grad_norm": 0.2598221780539352, |
|
"learning_rate": 4.039360809056361e-06, |
|
"loss": 0.58, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.9294678577300378, |
|
"grad_norm": 0.3300275262996093, |
|
"learning_rate": 4.037496110044885e-06, |
|
"loss": 0.5963, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9303447141995944, |
|
"grad_norm": 0.2723517740568475, |
|
"learning_rate": 4.035630034338928e-06, |
|
"loss": 0.5684, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.9312215706691511, |
|
"grad_norm": 0.26174388908838997, |
|
"learning_rate": 4.033762583609398e-06, |
|
"loss": 0.5741, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.9320984271387077, |
|
"grad_norm": 0.2879705808043353, |
|
"learning_rate": 4.031893759528439e-06, |
|
"loss": 0.5651, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.9329752836082644, |
|
"grad_norm": 0.27573911638107307, |
|
"learning_rate": 4.030023563769418e-06, |
|
"loss": 0.5738, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.933852140077821, |
|
"grad_norm": 0.270890009890323, |
|
"learning_rate": 4.028151998006934e-06, |
|
"loss": 0.5748, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.9347289965473776, |
|
"grad_norm": 0.2651359065699047, |
|
"learning_rate": 4.026279063916811e-06, |
|
"loss": 0.5815, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.9356058530169343, |
|
"grad_norm": 0.285792627094006, |
|
"learning_rate": 4.024404763176101e-06, |
|
"loss": 0.5714, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.9364827094864909, |
|
"grad_norm": 0.25220096965602506, |
|
"learning_rate": 4.022529097463076e-06, |
|
"loss": 0.5761, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.9373595659560475, |
|
"grad_norm": 0.2572736434059626, |
|
"learning_rate": 4.020652068457234e-06, |
|
"loss": 0.5813, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.9382364224256042, |
|
"grad_norm": 0.2769717174034421, |
|
"learning_rate": 4.018773677839289e-06, |
|
"loss": 0.5902, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9391132788951608, |
|
"grad_norm": 0.2638965107730823, |
|
"learning_rate": 4.016893927291179e-06, |
|
"loss": 0.5774, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.9399901353647175, |
|
"grad_norm": 0.26364544697361064, |
|
"learning_rate": 4.015012818496057e-06, |
|
"loss": 0.5885, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.9408669918342741, |
|
"grad_norm": 0.2782490552191973, |
|
"learning_rate": 4.013130353138293e-06, |
|
"loss": 0.5734, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.9417438483038307, |
|
"grad_norm": 0.2939309170345373, |
|
"learning_rate": 4.011246532903472e-06, |
|
"loss": 0.5863, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.9426207047733874, |
|
"grad_norm": 0.27682818038097917, |
|
"learning_rate": 4.00936135947839e-06, |
|
"loss": 0.5878, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.943497561242944, |
|
"grad_norm": 0.27100650217384786, |
|
"learning_rate": 4.007474834551059e-06, |
|
"loss": 0.5788, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.9443744177125006, |
|
"grad_norm": 0.3179264915740243, |
|
"learning_rate": 4.005586959810697e-06, |
|
"loss": 0.5697, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.9452512741820573, |
|
"grad_norm": 0.26927348365153236, |
|
"learning_rate": 4.003697736947731e-06, |
|
"loss": 0.5683, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.9461281306516139, |
|
"grad_norm": 0.2755764124341007, |
|
"learning_rate": 4.001807167653798e-06, |
|
"loss": 0.5794, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.9470049871211706, |
|
"grad_norm": 0.2908090312996085, |
|
"learning_rate": 3.999915253621739e-06, |
|
"loss": 0.586, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9478818435907272, |
|
"grad_norm": 0.2545666408606057, |
|
"learning_rate": 3.998021996545599e-06, |
|
"loss": 0.5831, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.9487587000602838, |
|
"grad_norm": 0.29377943743323887, |
|
"learning_rate": 3.9961273981206245e-06, |
|
"loss": 0.585, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.9496355565298406, |
|
"grad_norm": 0.26968750170325856, |
|
"learning_rate": 3.994231460043265e-06, |
|
"loss": 0.5782, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.9505124129993971, |
|
"grad_norm": 0.2911018694543167, |
|
"learning_rate": 3.9923341840111675e-06, |
|
"loss": 0.5813, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.9513892694689537, |
|
"grad_norm": 0.32080813736390973, |
|
"learning_rate": 3.99043557172318e-06, |
|
"loss": 0.5836, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.9522661259385105, |
|
"grad_norm": 0.2894185491332872, |
|
"learning_rate": 3.988535624879344e-06, |
|
"loss": 0.583, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.953142982408067, |
|
"grad_norm": 0.3036439907360394, |
|
"learning_rate": 3.986634345180899e-06, |
|
"loss": 0.5753, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.9540198388776238, |
|
"grad_norm": 0.30256015219807453, |
|
"learning_rate": 3.984731734330273e-06, |
|
"loss": 0.5787, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.9548966953471804, |
|
"grad_norm": 0.2684694121785645, |
|
"learning_rate": 3.982827794031091e-06, |
|
"loss": 0.5811, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.955773551816737, |
|
"grad_norm": 0.3047268297869491, |
|
"learning_rate": 3.980922525988167e-06, |
|
"loss": 0.5757, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9566504082862937, |
|
"grad_norm": 0.2680829692432763, |
|
"learning_rate": 3.979015931907502e-06, |
|
"loss": 0.5938, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.9575272647558503, |
|
"grad_norm": 0.28352806229638294, |
|
"learning_rate": 3.977108013496286e-06, |
|
"loss": 0.5648, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.958404121225407, |
|
"grad_norm": 0.27134893274934896, |
|
"learning_rate": 3.975198772462896e-06, |
|
"loss": 0.5959, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.9592809776949636, |
|
"grad_norm": 0.27670636726963027, |
|
"learning_rate": 3.973288210516889e-06, |
|
"loss": 0.5825, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.9601578341645202, |
|
"grad_norm": 0.27577855913411087, |
|
"learning_rate": 3.971376329369011e-06, |
|
"loss": 0.5763, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.9610346906340769, |
|
"grad_norm": 0.2613562238768912, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 0.587, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.9619115471036335, |
|
"grad_norm": 0.30682832359084977, |
|
"learning_rate": 3.96754861631651e-06, |
|
"loss": 0.6012, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.9627884035731901, |
|
"grad_norm": 0.2753727317824162, |
|
"learning_rate": 3.965632787839274e-06, |
|
"loss": 0.593, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.9636652600427468, |
|
"grad_norm": 0.2896526629743159, |
|
"learning_rate": 3.963715647014932e-06, |
|
"loss": 0.5823, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.9645421165123034, |
|
"grad_norm": 0.28810606366408137, |
|
"learning_rate": 3.961797195560118e-06, |
|
"loss": 0.5844, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9654189729818601, |
|
"grad_norm": 0.2603559754869869, |
|
"learning_rate": 3.959877435192639e-06, |
|
"loss": 0.5803, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.9662958294514167, |
|
"grad_norm": 0.28655269690518276, |
|
"learning_rate": 3.957956367631475e-06, |
|
"loss": 0.5707, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.9671726859209733, |
|
"grad_norm": 0.3009451530592475, |
|
"learning_rate": 3.956033994596773e-06, |
|
"loss": 0.5771, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.96804954239053, |
|
"grad_norm": 0.2577540703327921, |
|
"learning_rate": 3.954110317809854e-06, |
|
"loss": 0.576, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.9689263988600866, |
|
"grad_norm": 0.29870257898995317, |
|
"learning_rate": 3.952185338993202e-06, |
|
"loss": 0.5872, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9698032553296432, |
|
"grad_norm": 0.2768702174324288, |
|
"learning_rate": 3.95025905987047e-06, |
|
"loss": 0.5831, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.9706801117991999, |
|
"grad_norm": 0.288774627238478, |
|
"learning_rate": 3.948331482166473e-06, |
|
"loss": 0.5951, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.9715569682687565, |
|
"grad_norm": 0.324678524263679, |
|
"learning_rate": 3.94640260760719e-06, |
|
"loss": 0.5734, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.9724338247383132, |
|
"grad_norm": 0.2777093036856744, |
|
"learning_rate": 3.944472437919761e-06, |
|
"loss": 0.5846, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.9733106812078698, |
|
"grad_norm": 0.337073965677139, |
|
"learning_rate": 3.942540974832486e-06, |
|
"loss": 0.5904, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9741875376774264, |
|
"grad_norm": 0.2919504390486104, |
|
"learning_rate": 3.9406082200748216e-06, |
|
"loss": 0.5901, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.9750643941469831, |
|
"grad_norm": 0.26917415244282195, |
|
"learning_rate": 3.938674175377383e-06, |
|
"loss": 0.5727, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.9759412506165397, |
|
"grad_norm": 0.2968354712585106, |
|
"learning_rate": 3.93673884247194e-06, |
|
"loss": 0.5684, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.9768181070860964, |
|
"grad_norm": 0.26666333819741744, |
|
"learning_rate": 3.934802223091415e-06, |
|
"loss": 0.582, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.977694963555653, |
|
"grad_norm": 0.2648009228041306, |
|
"learning_rate": 3.932864318969882e-06, |
|
"loss": 0.5732, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.9785718200252096, |
|
"grad_norm": 0.26447715765911384, |
|
"learning_rate": 3.930925131842567e-06, |
|
"loss": 0.581, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.9794486764947663, |
|
"grad_norm": 0.26650421292261106, |
|
"learning_rate": 3.928984663445844e-06, |
|
"loss": 0.578, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.9803255329643229, |
|
"grad_norm": 0.27399427740484344, |
|
"learning_rate": 3.927042915517234e-06, |
|
"loss": 0.5841, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.9812023894338795, |
|
"grad_norm": 0.29486187077568676, |
|
"learning_rate": 3.925099889795404e-06, |
|
"loss": 0.5791, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.9820792459034362, |
|
"grad_norm": 0.27626862187200796, |
|
"learning_rate": 3.9231555880201655e-06, |
|
"loss": 0.5758, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9829561023729928, |
|
"grad_norm": 0.2709394700881976, |
|
"learning_rate": 3.9212100119324704e-06, |
|
"loss": 0.5725, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.9838329588425495, |
|
"grad_norm": 0.257787971984586, |
|
"learning_rate": 3.919263163274416e-06, |
|
"loss": 0.5733, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.9847098153121061, |
|
"grad_norm": 0.2854496376494655, |
|
"learning_rate": 3.917315043789235e-06, |
|
"loss": 0.5696, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.9855866717816627, |
|
"grad_norm": 0.2566199610678738, |
|
"learning_rate": 3.9153656552212995e-06, |
|
"loss": 0.5813, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.9864635282512194, |
|
"grad_norm": 0.2555880030988225, |
|
"learning_rate": 3.913414999316118e-06, |
|
"loss": 0.5945, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.987340384720776, |
|
"grad_norm": 0.2577195559469773, |
|
"learning_rate": 3.911463077820336e-06, |
|
"loss": 0.5675, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9882172411903326, |
|
"grad_norm": 0.26851748898394834, |
|
"learning_rate": 3.909509892481726e-06, |
|
"loss": 0.5807, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9890940976598893, |
|
"grad_norm": 0.2617539578196299, |
|
"learning_rate": 3.907555445049198e-06, |
|
"loss": 0.5684, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9899709541294459, |
|
"grad_norm": 0.2586839170532308, |
|
"learning_rate": 3.905599737272791e-06, |
|
"loss": 0.5801, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9908478105990026, |
|
"grad_norm": 0.25049955800874396, |
|
"learning_rate": 3.903642770903671e-06, |
|
"loss": 0.5762, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9917246670685592, |
|
"grad_norm": 0.27270516361418773, |
|
"learning_rate": 3.901684547694133e-06, |
|
"loss": 0.5878, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9926015235381158, |
|
"grad_norm": 0.2816673997379789, |
|
"learning_rate": 3.899725069397593e-06, |
|
"loss": 0.5927, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9934783800076725, |
|
"grad_norm": 0.2679288547921494, |
|
"learning_rate": 3.897764337768597e-06, |
|
"loss": 0.5772, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9943552364772291, |
|
"grad_norm": 0.27040765991438753, |
|
"learning_rate": 3.895802354562808e-06, |
|
"loss": 0.5623, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9952320929467857, |
|
"grad_norm": 0.29605913619532825, |
|
"learning_rate": 3.893839121537015e-06, |
|
"loss": 0.5868, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9961089494163424, |
|
"grad_norm": 0.27461413478738583, |
|
"learning_rate": 3.89187464044912e-06, |
|
"loss": 0.5871, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.996985805885899, |
|
"grad_norm": 0.28648748056684925, |
|
"learning_rate": 3.8899089130581465e-06, |
|
"loss": 0.5753, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9978626623554557, |
|
"grad_norm": 0.2925165297373746, |
|
"learning_rate": 3.8879419411242335e-06, |
|
"loss": 0.5828, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9987395188250123, |
|
"grad_norm": 0.29352029461564516, |
|
"learning_rate": 3.885973726408634e-06, |
|
"loss": 0.5842, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9996163752945689, |
|
"grad_norm": 0.28650442615475913, |
|
"learning_rate": 3.884004270673711e-06, |
|
"loss": 0.5803, |
|
"step": 1140 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3420, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1140, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3818875539947520.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|