|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001834862385321101, |
|
"grad_norm": 0.5285698626925894, |
|
"learning_rate": 1.8315018315018315e-07, |
|
"loss": 2.1792, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01834862385321101, |
|
"grad_norm": 0.5570483132219919, |
|
"learning_rate": 1.8315018315018316e-06, |
|
"loss": 2.1303, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03669724770642202, |
|
"grad_norm": 0.43928839139908665, |
|
"learning_rate": 3.663003663003663e-06, |
|
"loss": 2.143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05504587155963303, |
|
"grad_norm": 0.4259147884901134, |
|
"learning_rate": 5.494505494505494e-06, |
|
"loss": 2.0498, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 0.3256334236213354, |
|
"learning_rate": 7.326007326007326e-06, |
|
"loss": 2.1005, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 0.22687147012199685, |
|
"learning_rate": 9.157509157509158e-06, |
|
"loss": 2.032, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11009174311926606, |
|
"grad_norm": 0.20407900243790855, |
|
"learning_rate": 1.0989010989010989e-05, |
|
"loss": 2.0265, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12844036697247707, |
|
"grad_norm": 0.17272636847518857, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 2.0593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 0.15668903317199465, |
|
"learning_rate": 1.4652014652014653e-05, |
|
"loss": 2.009, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1651376146788991, |
|
"grad_norm": 0.13299844821079024, |
|
"learning_rate": 1.6483516483516486e-05, |
|
"loss": 2.0319, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 0.130419681456145, |
|
"learning_rate": 1.8315018315018315e-05, |
|
"loss": 2.0057, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2018348623853211, |
|
"grad_norm": 0.13084640056684232, |
|
"learning_rate": 2.0146520146520148e-05, |
|
"loss": 2.0418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 0.13600193042807687, |
|
"learning_rate": 2.1978021978021977e-05, |
|
"loss": 1.9938, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23853211009174313, |
|
"grad_norm": 0.13634836092075994, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 1.9859, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25688073394495414, |
|
"grad_norm": 0.1422134721246097, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 2.0146, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 0.15208974396882444, |
|
"learning_rate": 2.7472527472527476e-05, |
|
"loss": 2.0141, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 0.14861311683006395, |
|
"learning_rate": 2.9304029304029305e-05, |
|
"loss": 1.9914, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3119266055045872, |
|
"grad_norm": 0.154560148894899, |
|
"learning_rate": 3.113553113553114e-05, |
|
"loss": 1.9883, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3302752293577982, |
|
"grad_norm": 0.14659138270717795, |
|
"learning_rate": 3.296703296703297e-05, |
|
"loss": 1.98, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3486238532110092, |
|
"grad_norm": 0.14930915757375882, |
|
"learning_rate": 3.47985347985348e-05, |
|
"loss": 2.0039, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 0.15222292163468532, |
|
"learning_rate": 3.663003663003663e-05, |
|
"loss": 1.9972, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3853211009174312, |
|
"grad_norm": 0.1794465203569635, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4036697247706422, |
|
"grad_norm": 0.16111917245406526, |
|
"learning_rate": 4.0293040293040296e-05, |
|
"loss": 1.9689, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.42201834862385323, |
|
"grad_norm": 0.16917075207279755, |
|
"learning_rate": 4.212454212454213e-05, |
|
"loss": 1.955, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 0.16035752232756367, |
|
"learning_rate": 4.3956043956043955e-05, |
|
"loss": 1.9784, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 0.15704206875567997, |
|
"learning_rate": 4.578754578754579e-05, |
|
"loss": 1.9898, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47706422018348627, |
|
"grad_norm": 0.17540759793553962, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.9835, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4954128440366973, |
|
"grad_norm": 0.15564282446806355, |
|
"learning_rate": 4.945054945054945e-05, |
|
"loss": 1.9849, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 0.1731016854986911, |
|
"learning_rate": 4.9998994546487535e-05, |
|
"loss": 1.984, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5321100917431193, |
|
"grad_norm": 0.16384332591458595, |
|
"learning_rate": 4.999407007091408e-05, |
|
"loss": 1.9846, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 0.16272027999288557, |
|
"learning_rate": 4.998504270550914e-05, |
|
"loss": 1.9773, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5688073394495413, |
|
"grad_norm": 0.1895435253320987, |
|
"learning_rate": 4.997191393215565e-05, |
|
"loss": 1.9544, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 0.153237870046719, |
|
"learning_rate": 4.995468590600123e-05, |
|
"loss": 1.9627, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6055045871559633, |
|
"grad_norm": 0.15686668474912246, |
|
"learning_rate": 4.9933361455104425e-05, |
|
"loss": 1.9977, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6238532110091743, |
|
"grad_norm": 0.16748791541980212, |
|
"learning_rate": 4.990794407997044e-05, |
|
"loss": 1.9657, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 0.1467745903785114, |
|
"learning_rate": 4.9878437952976563e-05, |
|
"loss": 1.9625, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 0.14836352242961773, |
|
"learning_rate": 4.984484791768721e-05, |
|
"loss": 1.9865, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6788990825688074, |
|
"grad_norm": 0.15162372625275922, |
|
"learning_rate": 4.980717948805884e-05, |
|
"loss": 1.9534, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6972477064220184, |
|
"grad_norm": 0.15628620060925916, |
|
"learning_rate": 4.9765438847534825e-05, |
|
"loss": 1.976, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7155963302752294, |
|
"grad_norm": 0.15072809988708077, |
|
"learning_rate": 4.9719632848030405e-05, |
|
"loss": 1.9598, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 0.14670892778532468, |
|
"learning_rate": 4.966976900880791e-05, |
|
"loss": 2.0001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7522935779816514, |
|
"grad_norm": 0.15020025168070633, |
|
"learning_rate": 4.9615855515242434e-05, |
|
"loss": 1.9876, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7706422018348624, |
|
"grad_norm": 0.14345074549432202, |
|
"learning_rate": 4.955790121747821e-05, |
|
"loss": 1.9839, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7889908256880734, |
|
"grad_norm": 0.15176626779360905, |
|
"learning_rate": 4.949591562897574e-05, |
|
"loss": 1.9738, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 0.15234818977369388, |
|
"learning_rate": 4.942990892495021e-05, |
|
"loss": 2.0086, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 0.14268394349668895, |
|
"learning_rate": 4.9359891940701086e-05, |
|
"loss": 1.9797, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8440366972477065, |
|
"grad_norm": 0.15852698385870923, |
|
"learning_rate": 4.9285876169833544e-05, |
|
"loss": 1.9532, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8623853211009175, |
|
"grad_norm": 0.15294331275157483, |
|
"learning_rate": 4.920787376237168e-05, |
|
"loss": 1.992, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 0.15177058058835824, |
|
"learning_rate": 4.9125897522764044e-05, |
|
"loss": 1.9902, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8990825688073395, |
|
"grad_norm": 0.17927892676829146, |
|
"learning_rate": 4.9039960907781746e-05, |
|
"loss": 1.9694, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 0.14983509077226922, |
|
"learning_rate": 4.895007802430944e-05, |
|
"loss": 1.9506, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9357798165137615, |
|
"grad_norm": 0.16471555436795565, |
|
"learning_rate": 4.885626362702966e-05, |
|
"loss": 1.9841, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 0.1472502715125968, |
|
"learning_rate": 4.8758533116000696e-05, |
|
"loss": 1.9832, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9724770642201835, |
|
"grad_norm": 0.15070629202588426, |
|
"learning_rate": 4.86569025341287e-05, |
|
"loss": 1.9774, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9908256880733946, |
|
"grad_norm": 0.14316992610781615, |
|
"learning_rate": 4.855138856453408e-05, |
|
"loss": 1.9692, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0091743119266054, |
|
"grad_norm": 0.14585698001384728, |
|
"learning_rate": 4.844200852781295e-05, |
|
"loss": 1.9598, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 0.15101510050666817, |
|
"learning_rate": 4.8328780379193885e-05, |
|
"loss": 1.9773, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0458715596330275, |
|
"grad_norm": 0.15415482414608545, |
|
"learning_rate": 4.821172270559039e-05, |
|
"loss": 1.9699, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0642201834862386, |
|
"grad_norm": 0.15483797940425081, |
|
"learning_rate": 4.8090854722549914e-05, |
|
"loss": 1.993, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0825688073394495, |
|
"grad_norm": 0.15815400043247343, |
|
"learning_rate": 4.796619627109944e-05, |
|
"loss": 1.939, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 0.1645249210360427, |
|
"learning_rate": 4.7837767814488486e-05, |
|
"loss": 1.9623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1192660550458715, |
|
"grad_norm": 0.1647619918199487, |
|
"learning_rate": 4.770559043483003e-05, |
|
"loss": 1.9816, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1376146788990826, |
|
"grad_norm": 0.1679679060709561, |
|
"learning_rate": 4.7569685829639734e-05, |
|
"loss": 1.9611, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1559633027522935, |
|
"grad_norm": 0.1676170375866798, |
|
"learning_rate": 4.743007630827423e-05, |
|
"loss": 1.959, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 0.14905089408466188, |
|
"learning_rate": 4.7286784788268904e-05, |
|
"loss": 1.9269, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1926605504587156, |
|
"grad_norm": 0.162817312686442, |
|
"learning_rate": 4.713983479157592e-05, |
|
"loss": 1.9638, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.2110091743119267, |
|
"grad_norm": 0.18370789078991157, |
|
"learning_rate": 4.698925044070296e-05, |
|
"loss": 1.9494, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.2293577981651376, |
|
"grad_norm": 0.15795771627671365, |
|
"learning_rate": 4.683505645475339e-05, |
|
"loss": 1.96, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 0.15531127222157262, |
|
"learning_rate": 4.6677278145368554e-05, |
|
"loss": 1.969, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2660550458715596, |
|
"grad_norm": 0.15599004624517146, |
|
"learning_rate": 4.65159414125727e-05, |
|
"loss": 1.9435, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.2844036697247707, |
|
"grad_norm": 0.16702581452844592, |
|
"learning_rate": 4.6351072740521415e-05, |
|
"loss": 1.9323, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3027522935779816, |
|
"grad_norm": 0.15898151168116278, |
|
"learning_rate": 4.6182699193154125e-05, |
|
"loss": 1.9442, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.15969818021992918, |
|
"learning_rate": 4.601084840975139e-05, |
|
"loss": 1.973, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.3394495412844036, |
|
"grad_norm": 0.15378573113733301, |
|
"learning_rate": 4.583554860039784e-05, |
|
"loss": 1.9366, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.3577981651376148, |
|
"grad_norm": 0.15121087319924134, |
|
"learning_rate": 4.565682854135132e-05, |
|
"loss": 1.9698, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.3761467889908257, |
|
"grad_norm": 0.15898725981112244, |
|
"learning_rate": 4.547471757031919e-05, |
|
"loss": 1.9604, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.15306064791439133, |
|
"learning_rate": 4.528924558164233e-05, |
|
"loss": 1.962, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.4128440366972477, |
|
"grad_norm": 0.16371512899536222, |
|
"learning_rate": 4.510044302138793e-05, |
|
"loss": 1.9793, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.4311926605504588, |
|
"grad_norm": 0.15788911392614066, |
|
"learning_rate": 4.490834088235157e-05, |
|
"loss": 1.9801, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.4495412844036697, |
|
"grad_norm": 0.16398987312372718, |
|
"learning_rate": 4.4712970698969645e-05, |
|
"loss": 1.9236, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.15020427466606878, |
|
"learning_rate": 4.451436454214285e-05, |
|
"loss": 1.9438, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4862385321100917, |
|
"grad_norm": 0.1528061160370563, |
|
"learning_rate": 4.4312555013971534e-05, |
|
"loss": 1.9364, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.5045871559633026, |
|
"grad_norm": 0.15222177715019794, |
|
"learning_rate": 4.4107575242404013e-05, |
|
"loss": 1.9399, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.5229357798165137, |
|
"grad_norm": 0.1540985658911008, |
|
"learning_rate": 4.38994588757984e-05, |
|
"loss": 1.9483, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.1599074284657503, |
|
"learning_rate": 4.3688240077399074e-05, |
|
"loss": 1.9748, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5596330275229358, |
|
"grad_norm": 0.1516707606485465, |
|
"learning_rate": 4.3473953519728685e-05, |
|
"loss": 1.9213, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5779816513761467, |
|
"grad_norm": 0.15122814764762293, |
|
"learning_rate": 4.325663437889643e-05, |
|
"loss": 1.9893, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.5963302752293578, |
|
"grad_norm": 0.15959987778428422, |
|
"learning_rate": 4.30363183288238e-05, |
|
"loss": 1.9602, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 0.15266741829090746, |
|
"learning_rate": 4.2813041535388496e-05, |
|
"loss": 1.9529, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.6330275229357798, |
|
"grad_norm": 0.16386079884752924, |
|
"learning_rate": 4.258684065048766e-05, |
|
"loss": 1.9606, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.6513761467889907, |
|
"grad_norm": 0.15583648586262694, |
|
"learning_rate": 4.23577528060213e-05, |
|
"loss": 1.9581, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6697247706422018, |
|
"grad_norm": 0.1585736145888436, |
|
"learning_rate": 4.212581560779689e-05, |
|
"loss": 1.9552, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.16965341576331627, |
|
"learning_rate": 4.1891067129356276e-05, |
|
"loss": 1.9296, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.7064220183486238, |
|
"grad_norm": 0.15927292960064882, |
|
"learning_rate": 4.165354590572564e-05, |
|
"loss": 1.9762, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.7247706422018347, |
|
"grad_norm": 0.15450025224674474, |
|
"learning_rate": 4.14132909270899e-05, |
|
"loss": 1.9429, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.7431192660550459, |
|
"grad_norm": 0.15355485102004446, |
|
"learning_rate": 4.117034163239219e-05, |
|
"loss": 1.9233, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.15539982061507696, |
|
"learning_rate": 4.092473790285986e-05, |
|
"loss": 1.944, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.7798165137614679, |
|
"grad_norm": 0.1551191369865497, |
|
"learning_rate": 4.0676520055457765e-05, |
|
"loss": 1.945, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.7981651376146788, |
|
"grad_norm": 0.15695341020759185, |
|
"learning_rate": 4.0425728836270037e-05, |
|
"loss": 1.9656, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.81651376146789, |
|
"grad_norm": 0.16047022345268508, |
|
"learning_rate": 4.017240541381146e-05, |
|
"loss": 1.9546, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.1561566895170307, |
|
"learning_rate": 3.9916591372269434e-05, |
|
"loss": 1.9363, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.853211009174312, |
|
"grad_norm": 0.15117586495122826, |
|
"learning_rate": 3.9658328704677794e-05, |
|
"loss": 1.978, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.8715596330275228, |
|
"grad_norm": 0.1605982135175342, |
|
"learning_rate": 3.939765980602342e-05, |
|
"loss": 1.9713, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.889908256880734, |
|
"grad_norm": 0.1602836931731585, |
|
"learning_rate": 3.913462746628691e-05, |
|
"loss": 2.0041, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 0.16286472480475192, |
|
"learning_rate": 3.886927486341844e-05, |
|
"loss": 1.9352, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.926605504587156, |
|
"grad_norm": 0.15449199584717296, |
|
"learning_rate": 3.860164555624988e-05, |
|
"loss": 1.97, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.9449541284403669, |
|
"grad_norm": 0.14684566302878604, |
|
"learning_rate": 3.833178347734443e-05, |
|
"loss": 1.9433, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.963302752293578, |
|
"grad_norm": 0.16167305499507623, |
|
"learning_rate": 3.80597329257849e-05, |
|
"loss": 1.9782, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.1544185075202575, |
|
"learning_rate": 3.778553855990176e-05, |
|
"loss": 1.9253, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.16072854970224754, |
|
"learning_rate": 3.750924538994235e-05, |
|
"loss": 1.9578, |
|
"step": 1090 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2725, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5869823436193792e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|