|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9959925193694897, |
|
"eval_steps": 100, |
|
"global_step": 233, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004274646005877639, |
|
"grad_norm": 3.4727758395385346, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -0.9238853454589844, |
|
"logits/rejected": -0.9009266495704651, |
|
"logps/chosen": -211.83998107910156, |
|
"logps/rejected": -194.95265197753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008549292011755277, |
|
"grad_norm": 3.5000648062483686, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -0.9474210739135742, |
|
"logits/rejected": -0.9417086243629456, |
|
"logps/chosen": -160.0943603515625, |
|
"logps/rejected": -163.26644897460938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.012823938017632914, |
|
"grad_norm": 3.8566721368935113, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.8552289009094238, |
|
"logits/rejected": -0.9027292132377625, |
|
"logps/chosen": -197.13523864746094, |
|
"logps/rejected": -191.77366638183594, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00209163804538548, |
|
"rewards/margins": 0.0021166829392313957, |
|
"rewards/rejected": -2.5045330403372645e-05, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017098584023510555, |
|
"grad_norm": 3.527297888533762, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -0.9195055961608887, |
|
"logits/rejected": -0.9506024122238159, |
|
"logps/chosen": -175.96563720703125, |
|
"logps/rejected": -177.187255859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0005994887324050069, |
|
"rewards/margins": 0.001228818204253912, |
|
"rewards/rejected": -0.0006293297046795487, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 3.274108961837268, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.9131849408149719, |
|
"logits/rejected": -0.9851359128952026, |
|
"logps/chosen": -196.52279663085938, |
|
"logps/rejected": -209.4899444580078, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0014956831000745296, |
|
"rewards/margins": -0.002281556138768792, |
|
"rewards/rejected": 0.0007858729222789407, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02564787603526583, |
|
"grad_norm": 3.4643988401861643, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.0323811769485474, |
|
"logits/rejected": -1.0281962156295776, |
|
"logps/chosen": -175.13864135742188, |
|
"logps/rejected": -171.71237182617188, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0016992997843772173, |
|
"rewards/margins": -0.0023347530514001846, |
|
"rewards/rejected": 0.000635453499853611, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.029922522041143467, |
|
"grad_norm": 3.753822101296772, |
|
"learning_rate": 2.916666666666667e-07, |
|
"logits/chosen": -0.8140788078308105, |
|
"logits/rejected": -0.8268399238586426, |
|
"logps/chosen": -204.0390625, |
|
"logps/rejected": -210.50558471679688, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0002044451393885538, |
|
"rewards/margins": 0.0006307458970695734, |
|
"rewards/rejected": -0.0004263008013367653, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03419716804702111, |
|
"grad_norm": 3.1848827253835568, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -0.9922436475753784, |
|
"logits/rejected": -0.9979274868965149, |
|
"logps/chosen": -192.83494567871094, |
|
"logps/rejected": -200.88128662109375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002161582000553608, |
|
"rewards/margins": 0.0022183258552104235, |
|
"rewards/rejected": -5.674359272234142e-05, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03847181405289874, |
|
"grad_norm": 3.7147220039656568, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.9252921342849731, |
|
"logits/rejected": -0.9685516357421875, |
|
"logps/chosen": -175.70448303222656, |
|
"logps/rejected": -180.89736938476562, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0024666767567396164, |
|
"rewards/margins": 0.002756566507741809, |
|
"rewards/rejected": -0.0002898902166634798, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 3.553251668230928, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.9595114588737488, |
|
"logits/rejected": -0.9833444356918335, |
|
"logps/chosen": -208.72735595703125, |
|
"logps/rejected": -214.8730926513672, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.002427927916869521, |
|
"rewards/margins": 0.001882559503428638, |
|
"rewards/rejected": 0.0005453681806102395, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04702110606465402, |
|
"grad_norm": 3.4673888891096216, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/chosen": -0.986074686050415, |
|
"logits/rejected": -0.9903304576873779, |
|
"logps/chosen": -138.227783203125, |
|
"logps/rejected": -137.13824462890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0006541174370795488, |
|
"rewards/margins": -0.0011124282609671354, |
|
"rewards/rejected": 0.0017665456980466843, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05129575207053166, |
|
"grad_norm": 3.412261478741586, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.896647036075592, |
|
"logits/rejected": -0.9640191197395325, |
|
"logps/chosen": -157.36685180664062, |
|
"logps/rejected": -180.9624481201172, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0007026732200756669, |
|
"rewards/margins": 0.0007272702641785145, |
|
"rewards/rejected": -2.459682582411915e-05, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.055570398076409296, |
|
"grad_norm": 3.5015942981464434, |
|
"learning_rate": 5.416666666666666e-07, |
|
"logits/chosen": -0.8603953123092651, |
|
"logits/rejected": -0.8457555770874023, |
|
"logps/chosen": -190.04727172851562, |
|
"logps/rejected": -196.87872314453125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0017774368170648813, |
|
"rewards/margins": 0.002889451337978244, |
|
"rewards/rejected": -0.0011120146373286843, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.059845044082286934, |
|
"grad_norm": 3.3564122983724283, |
|
"learning_rate": 5.833333333333334e-07, |
|
"logits/chosen": -0.9946928024291992, |
|
"logits/rejected": -0.9674972295761108, |
|
"logps/chosen": -173.98526000976562, |
|
"logps/rejected": -167.90187072753906, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00402639526873827, |
|
"rewards/margins": 0.00235772505402565, |
|
"rewards/rejected": 0.0016686702147126198, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 3.6183547057085903, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -0.9302492737770081, |
|
"logits/rejected": -0.9131873846054077, |
|
"logps/chosen": -172.501953125, |
|
"logps/rejected": -165.2920684814453, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.008232050575315952, |
|
"rewards/margins": 0.0026909802109003067, |
|
"rewards/rejected": 0.005541070364415646, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06839433609404222, |
|
"grad_norm": 4.094403587057344, |
|
"learning_rate": 6.666666666666666e-07, |
|
"logits/chosen": -0.8987658023834229, |
|
"logits/rejected": -0.918194591999054, |
|
"logps/chosen": -182.8192901611328, |
|
"logps/rejected": -188.6702423095703, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0033055683597922325, |
|
"rewards/margins": 0.006322154775261879, |
|
"rewards/rejected": -0.003016585949808359, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07266898209991986, |
|
"grad_norm": 3.7556102735295602, |
|
"learning_rate": 7.083333333333334e-07, |
|
"logits/chosen": -0.7985554933547974, |
|
"logits/rejected": -0.8355307579040527, |
|
"logps/chosen": -218.515869140625, |
|
"logps/rejected": -218.05130004882812, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.002777060493826866, |
|
"rewards/margins": 0.0028633405454456806, |
|
"rewards/rejected": -8.627981878817081e-05, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07694362810579748, |
|
"grad_norm": 3.7267439469140835, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": -1.0510368347167969, |
|
"logits/rejected": -1.1025066375732422, |
|
"logps/chosen": -187.49362182617188, |
|
"logps/rejected": -213.5237274169922, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.003624723292887211, |
|
"rewards/margins": 0.0034801624715328217, |
|
"rewards/rejected": 0.00014456117060035467, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 3.6435544044761947, |
|
"learning_rate": 7.916666666666666e-07, |
|
"logits/chosen": -1.0699188709259033, |
|
"logits/rejected": -1.0673398971557617, |
|
"logps/chosen": -185.699951171875, |
|
"logps/rejected": -175.41836547851562, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.01173433754593134, |
|
"rewards/margins": 0.009952141903340816, |
|
"rewards/rejected": 0.001782197505235672, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 3.56212142993403, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.9693958759307861, |
|
"logits/rejected": -1.0447947978973389, |
|
"logps/chosen": -160.5248260498047, |
|
"logps/rejected": -177.9250030517578, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0071954806335270405, |
|
"rewards/margins": 0.007437723223119974, |
|
"rewards/rejected": -0.00024224258959293365, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0897675661234304, |
|
"grad_norm": 3.599911110818667, |
|
"learning_rate": 8.75e-07, |
|
"logits/chosen": -0.8949970006942749, |
|
"logits/rejected": -0.9538885951042175, |
|
"logps/chosen": -155.24188232421875, |
|
"logps/rejected": -175.83969116210938, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0026197312399744987, |
|
"rewards/margins": 0.013713551685214043, |
|
"rewards/rejected": -0.016333281993865967, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09404221212930804, |
|
"grad_norm": 3.809589692220953, |
|
"learning_rate": 9.166666666666665e-07, |
|
"logits/chosen": -0.8426035642623901, |
|
"logits/rejected": -0.909124493598938, |
|
"logps/chosen": -162.82546997070312, |
|
"logps/rejected": -168.50677490234375, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.014116955921053886, |
|
"rewards/margins": 0.008742437697947025, |
|
"rewards/rejected": 0.005374519154429436, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09831685813518568, |
|
"grad_norm": 3.8481134168387325, |
|
"learning_rate": 9.583333333333334e-07, |
|
"logits/chosen": -0.9963463544845581, |
|
"logits/rejected": -1.030158281326294, |
|
"logps/chosen": -212.16732788085938, |
|
"logps/rejected": -226.55050659179688, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.005188916344195604, |
|
"rewards/margins": 0.037282973527908325, |
|
"rewards/rejected": -0.04247189313173294, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10259150414106331, |
|
"grad_norm": 4.017050664188984, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9375415444374084, |
|
"logits/rejected": -0.9786323308944702, |
|
"logps/chosen": -161.66697692871094, |
|
"logps/rejected": -171.43328857421875, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0014034155756235123, |
|
"rewards/margins": 0.029357939958572388, |
|
"rewards/rejected": -0.03076135367155075, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 4.219585279560121, |
|
"learning_rate": 9.999435142363483e-07, |
|
"logits/chosen": -0.9440574049949646, |
|
"logits/rejected": -0.97591233253479, |
|
"logps/chosen": -142.18214416503906, |
|
"logps/rejected": -145.74217224121094, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.006380043923854828, |
|
"rewards/margins": 0.02844325453042984, |
|
"rewards/rejected": -0.022063210606575012, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11114079615281859, |
|
"grad_norm": 4.465336404650454, |
|
"learning_rate": 9.997740697079592e-07, |
|
"logits/chosen": -0.907569408416748, |
|
"logits/rejected": -0.9431344270706177, |
|
"logps/chosen": -186.16468811035156, |
|
"logps/rejected": -188.70187377929688, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03334157541394234, |
|
"rewards/margins": 0.04588525742292404, |
|
"rewards/rejected": -0.07922682911157608, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11541544215869623, |
|
"grad_norm": 4.035688150172887, |
|
"learning_rate": 9.994917046996472e-07, |
|
"logits/chosen": -0.9081155061721802, |
|
"logits/rejected": -0.9375332593917847, |
|
"logps/chosen": -196.47586059570312, |
|
"logps/rejected": -210.2967071533203, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.06558644771575928, |
|
"rewards/margins": 0.024703964591026306, |
|
"rewards/rejected": -0.09029041230678558, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11969008816457387, |
|
"grad_norm": 4.589583975444085, |
|
"learning_rate": 9.990964830098245e-07, |
|
"logits/chosen": -0.9100086688995361, |
|
"logits/rejected": -0.9473557472229004, |
|
"logps/chosen": -183.28317260742188, |
|
"logps/rejected": -191.90957641601562, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0952601209282875, |
|
"rewards/margins": 0.06084320694208145, |
|
"rewards/rejected": -0.15610332787036896, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12396473417045151, |
|
"grad_norm": 4.479468138978008, |
|
"learning_rate": 9.985884939360872e-07, |
|
"logits/chosen": -1.1165940761566162, |
|
"logits/rejected": -1.1295504570007324, |
|
"logps/chosen": -166.12542724609375, |
|
"logps/rejected": -165.3243408203125, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11186902225017548, |
|
"rewards/margins": 0.050340794026851654, |
|
"rewards/rejected": -0.16220980882644653, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 4.703342289738615, |
|
"learning_rate": 9.97967852255038e-07, |
|
"logits/chosen": -0.9528751969337463, |
|
"logits/rejected": -0.9631531238555908, |
|
"logps/chosen": -254.89320373535156, |
|
"logps/rejected": -258.4338073730469, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22431208193302155, |
|
"rewards/margins": 0.050895195454359055, |
|
"rewards/rejected": -0.2752072513103485, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13251402618220678, |
|
"grad_norm": 4.869138630164683, |
|
"learning_rate": 9.972346981963546e-07, |
|
"logits/chosen": -1.059159755706787, |
|
"logits/rejected": -1.1036772727966309, |
|
"logps/chosen": -245.163330078125, |
|
"logps/rejected": -268.007568359375, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.28532153367996216, |
|
"rewards/margins": 0.1269759237766266, |
|
"rewards/rejected": -0.41229742765426636, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13678867218808444, |
|
"grad_norm": 4.841549329203085, |
|
"learning_rate": 9.96389197411104e-07, |
|
"logits/chosen": -0.9731124043464661, |
|
"logits/rejected": -1.025037169456482, |
|
"logps/chosen": -209.7532958984375, |
|
"logps/rejected": -234.02642822265625, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.20661108195781708, |
|
"rewards/margins": 0.18127745389938354, |
|
"rewards/rejected": -0.38788852095603943, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14106331819396206, |
|
"grad_norm": 4.527086707272363, |
|
"learning_rate": 9.954315409343168e-07, |
|
"logits/chosen": -0.9516006708145142, |
|
"logits/rejected": -1.0085594654083252, |
|
"logps/chosen": -233.74896240234375, |
|
"logps/rejected": -257.0697937011719, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3886350095272064, |
|
"rewards/margins": 0.19288921356201172, |
|
"rewards/rejected": -0.5815242528915405, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14533796419983971, |
|
"grad_norm": 5.675688535211087, |
|
"learning_rate": 9.943619451418224e-07, |
|
"logits/chosen": -0.9171434640884399, |
|
"logits/rejected": -0.9520907998085022, |
|
"logps/chosen": -232.1197967529297, |
|
"logps/rejected": -252.1339874267578, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4231939911842346, |
|
"rewards/margins": 0.2075667530298233, |
|
"rewards/rejected": -0.6307607293128967, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 4.555434497600014, |
|
"learning_rate": 9.931806517013612e-07, |
|
"logits/chosen": -0.9599072933197021, |
|
"logits/rejected": -0.9873026013374329, |
|
"logps/chosen": -235.87911987304688, |
|
"logps/rejected": -277.68585205078125, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5353493690490723, |
|
"rewards/margins": 0.26459625363349915, |
|
"rewards/rejected": -0.799945592880249, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15388725621159496, |
|
"grad_norm": 4.824318328500942, |
|
"learning_rate": 9.918879275179817e-07, |
|
"logits/chosen": -1.1668760776519775, |
|
"logits/rejected": -1.1293714046478271, |
|
"logps/chosen": -288.35406494140625, |
|
"logps/rejected": -298.5234375, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6688686609268188, |
|
"rewards/margins": 0.22619600594043732, |
|
"rewards/rejected": -0.8950645923614502, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15816190221747262, |
|
"grad_norm": 4.401543973490666, |
|
"learning_rate": 9.904840646737345e-07, |
|
"logits/chosen": -0.9521760940551758, |
|
"logits/rejected": -0.9997081756591797, |
|
"logps/chosen": -282.0852355957031, |
|
"logps/rejected": -336.65020751953125, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7313822507858276, |
|
"rewards/margins": 0.29729628562927246, |
|
"rewards/rejected": -1.0286785364151, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 5.15798330777588, |
|
"learning_rate": 9.889693803616791e-07, |
|
"logits/chosen": -1.0276933908462524, |
|
"logits/rejected": -1.045649766921997, |
|
"logps/chosen": -311.87677001953125, |
|
"logps/rejected": -334.0548095703125, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0708937644958496, |
|
"rewards/margins": 0.2531777620315552, |
|
"rewards/rejected": -1.3240714073181152, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1667111942292279, |
|
"grad_norm": 4.8993868987417555, |
|
"learning_rate": 9.873442168142157e-07, |
|
"logits/chosen": -0.909888505935669, |
|
"logits/rejected": -0.9343925714492798, |
|
"logps/chosen": -254.18350219726562, |
|
"logps/rejected": -285.18243408203125, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9324233531951904, |
|
"rewards/margins": 0.22102315723896027, |
|
"rewards/rejected": -1.1534464359283447, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 4.965329298176294, |
|
"learning_rate": 9.856089412257604e-07, |
|
"logits/chosen": -0.8430695533752441, |
|
"logits/rejected": -0.8712520599365234, |
|
"logps/chosen": -278.5356750488281, |
|
"logps/rejected": -313.9254455566406, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.097804069519043, |
|
"rewards/margins": 0.29625624418258667, |
|
"rewards/rejected": -1.3940601348876953, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17526048624098317, |
|
"grad_norm": 4.964663460213464, |
|
"learning_rate": 9.8376394566978e-07, |
|
"logits/chosen": -0.9349880218505859, |
|
"logits/rejected": -0.9195177555084229, |
|
"logps/chosen": -353.0047607421875, |
|
"logps/rejected": -375.0325927734375, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4320145845413208, |
|
"rewards/margins": 0.2865561842918396, |
|
"rewards/rejected": -1.7185708284378052, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1795351322468608, |
|
"grad_norm": 4.673829673061791, |
|
"learning_rate": 9.818096470102066e-07, |
|
"logits/chosen": -0.9460776448249817, |
|
"logits/rejected": -1.0075451135635376, |
|
"logps/chosen": -326.8829040527344, |
|
"logps/rejected": -359.4068908691406, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3413481712341309, |
|
"rewards/margins": 0.43105652928352356, |
|
"rewards/rejected": -1.772404670715332, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18380977825273845, |
|
"grad_norm": 4.756386075263894, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits/chosen": -0.8998066186904907, |
|
"logits/rejected": -0.9348124265670776, |
|
"logps/chosen": -347.86090087890625, |
|
"logps/rejected": -441.64501953125, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.69937264919281, |
|
"rewards/margins": 0.7839919924736023, |
|
"rewards/rejected": -2.4833645820617676, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18808442425861607, |
|
"grad_norm": 5.666891206447458, |
|
"learning_rate": 9.775749312176248e-07, |
|
"logits/chosen": -0.8193731307983398, |
|
"logits/rejected": -0.8275444507598877, |
|
"logps/chosen": -334.3702697753906, |
|
"logps/rejected": -402.1867370605469, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.655839204788208, |
|
"rewards/margins": 0.6519087553024292, |
|
"rewards/rejected": -2.3077480792999268, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 5.611553010112512, |
|
"learning_rate": 9.752954708892377e-07, |
|
"logits/chosen": -0.8545299172401428, |
|
"logits/rejected": -0.9027716517448425, |
|
"logps/chosen": -371.7701721191406, |
|
"logps/rejected": -439.71881103515625, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.9063547849655151, |
|
"rewards/margins": 0.5792344808578491, |
|
"rewards/rejected": -2.4855895042419434, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19663371627037135, |
|
"grad_norm": 5.062237682542423, |
|
"learning_rate": 9.729086208503173e-07, |
|
"logits/chosen": -0.9441611766815186, |
|
"logits/rejected": -0.956858217716217, |
|
"logps/chosen": -451.3914794921875, |
|
"logps/rejected": -498.17999267578125, |
|
"loss": 0.5592, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6227638721466064, |
|
"rewards/margins": 0.4997914731502533, |
|
"rewards/rejected": -3.1225552558898926, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.200908362276249, |
|
"grad_norm": 5.547722907580694, |
|
"learning_rate": 9.70414920393052e-07, |
|
"logits/chosen": -0.8402402400970459, |
|
"logits/rejected": -0.8305561542510986, |
|
"logps/chosen": -410.6358642578125, |
|
"logps/rejected": -456.8866882324219, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.3196969032287598, |
|
"rewards/margins": 0.5383195281028748, |
|
"rewards/rejected": -2.8580164909362793, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.20518300828212663, |
|
"grad_norm": 5.843768728466239, |
|
"learning_rate": 9.678149329517409e-07, |
|
"logits/chosen": -0.9230031967163086, |
|
"logits/rejected": -0.9459983706474304, |
|
"logps/chosen": -421.91253662109375, |
|
"logps/rejected": -464.15460205078125, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3388688564300537, |
|
"rewards/margins": 0.5758055448532104, |
|
"rewards/rejected": -2.9146745204925537, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.20945765428800428, |
|
"grad_norm": 6.600018288252386, |
|
"learning_rate": 9.651092459754877e-07, |
|
"logits/chosen": -0.7874542474746704, |
|
"logits/rejected": -0.7807765007019043, |
|
"logps/chosen": -553.3658447265625, |
|
"logps/rejected": -578.9154052734375, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.3889737129211426, |
|
"rewards/margins": 0.2704327702522278, |
|
"rewards/rejected": -3.6594066619873047, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 6.555644686187637, |
|
"learning_rate": 9.62298470795473e-07, |
|
"logits/chosen": -0.7596021890640259, |
|
"logits/rejected": -0.8105506896972656, |
|
"logps/chosen": -396.783935546875, |
|
"logps/rejected": -437.79541015625, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.4116053581237793, |
|
"rewards/margins": 0.3920546770095825, |
|
"rewards/rejected": -2.8036601543426514, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21800694629975956, |
|
"grad_norm": 6.744038117473701, |
|
"learning_rate": 9.59383242486827e-07, |
|
"logits/chosen": -0.8625648617744446, |
|
"logits/rejected": -0.8875184059143066, |
|
"logps/chosen": -505.1508483886719, |
|
"logps/rejected": -608.79248046875, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.0446105003356934, |
|
"rewards/margins": 0.9900886416435242, |
|
"rewards/rejected": -4.034698963165283, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22228159230563718, |
|
"grad_norm": 5.565039920114929, |
|
"learning_rate": 9.56364219725138e-07, |
|
"logits/chosen": -0.8463042974472046, |
|
"logits/rejected": -0.8962733745574951, |
|
"logps/chosen": -499.99041748046875, |
|
"logps/rejected": -634.49072265625, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.071751832962036, |
|
"rewards/margins": 1.300065279006958, |
|
"rewards/rejected": -4.371817588806152, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22655623831151483, |
|
"grad_norm": 9.993449327747468, |
|
"learning_rate": 9.532420846376315e-07, |
|
"logits/chosen": -0.7763329744338989, |
|
"logits/rejected": -0.8177902698516846, |
|
"logps/chosen": -433.4925842285156, |
|
"logps/rejected": -530.1204223632812, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.5958311557769775, |
|
"rewards/margins": 0.9427847862243652, |
|
"rewards/rejected": -3.5386157035827637, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23083088431739246, |
|
"grad_norm": 6.914149355278302, |
|
"learning_rate": 9.500175426490454e-07, |
|
"logits/chosen": -0.7263307571411133, |
|
"logits/rejected": -0.7950284481048584, |
|
"logps/chosen": -590.033447265625, |
|
"logps/rejected": -696.9810791015625, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.79225492477417, |
|
"rewards/margins": 0.9605345726013184, |
|
"rewards/rejected": -4.75278902053833, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 6.4385225722348425, |
|
"learning_rate": 9.466913223222465e-07, |
|
"logits/chosen": -0.73805832862854, |
|
"logits/rejected": -0.8121139407157898, |
|
"logps/chosen": -527.0991821289062, |
|
"logps/rejected": -672.7268676757812, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.397312641143799, |
|
"rewards/margins": 1.3639640808105469, |
|
"rewards/rejected": -4.761276721954346, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23938017632914774, |
|
"grad_norm": 7.766777578194787, |
|
"learning_rate": 9.432641751936162e-07, |
|
"logits/chosen": -0.8009728193283081, |
|
"logits/rejected": -0.8259899020195007, |
|
"logps/chosen": -421.18414306640625, |
|
"logps/rejected": -515.0050659179688, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.522728204727173, |
|
"rewards/margins": 0.8710657954216003, |
|
"rewards/rejected": -3.393793821334839, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 6.5333907258413655, |
|
"learning_rate": 9.397368756032444e-07, |
|
"logits/chosen": -0.7609117031097412, |
|
"logits/rejected": -0.7754147052764893, |
|
"logps/chosen": -436.06427001953125, |
|
"logps/rejected": -512.485107421875, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.599217414855957, |
|
"rewards/margins": 0.6933461427688599, |
|
"rewards/rejected": -3.2925636768341064, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24792946834090301, |
|
"grad_norm": 7.290942942753059, |
|
"learning_rate": 9.36110220519976e-07, |
|
"logits/chosen": -0.7123927474021912, |
|
"logits/rejected": -0.7812705039978027, |
|
"logps/chosen": -428.41351318359375, |
|
"logps/rejected": -493.216552734375, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.501229763031006, |
|
"rewards/margins": 0.6336008906364441, |
|
"rewards/rejected": -3.1348307132720947, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25220411434678064, |
|
"grad_norm": 7.291539302989226, |
|
"learning_rate": 9.323850293613379e-07, |
|
"logits/chosen": -0.8743740916252136, |
|
"logits/rejected": -0.8304850459098816, |
|
"logps/chosen": -416.0941467285156, |
|
"logps/rejected": -461.4928283691406, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.469886064529419, |
|
"rewards/margins": 0.6079959869384766, |
|
"rewards/rejected": -3.0778818130493164, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 6.444086326444115, |
|
"learning_rate": 9.285621438083997e-07, |
|
"logits/chosen": -0.7638828754425049, |
|
"logits/rejected": -0.830043375492096, |
|
"logps/chosen": -462.22723388671875, |
|
"logps/rejected": -567.3082275390625, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.678903818130493, |
|
"rewards/margins": 0.9149044752120972, |
|
"rewards/rejected": -3.59380841255188, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26075340635853594, |
|
"grad_norm": 6.239441558672264, |
|
"learning_rate": 9.246424276156006e-07, |
|
"logits/chosen": -0.7686220407485962, |
|
"logits/rejected": -0.786496102809906, |
|
"logps/chosen": -426.57977294921875, |
|
"logps/rejected": -539.332763671875, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.421875, |
|
"rewards/margins": 1.0811634063720703, |
|
"rewards/rejected": -3.5030384063720703, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26502805236441357, |
|
"grad_norm": 6.955390195667037, |
|
"learning_rate": 9.206267664155906e-07, |
|
"logits/chosen": -0.8518524765968323, |
|
"logits/rejected": -0.8896721005439758, |
|
"logps/chosen": -490.898681640625, |
|
"logps/rejected": -571.5889282226562, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7304701805114746, |
|
"rewards/margins": 0.8642421364784241, |
|
"rewards/rejected": -3.594712495803833, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2693026983702912, |
|
"grad_norm": 6.71732914608112, |
|
"learning_rate": 9.165160675191271e-07, |
|
"logits/chosen": -0.7856395244598389, |
|
"logits/rejected": -0.8273566961288452, |
|
"logps/chosen": -406.04241943359375, |
|
"logps/rejected": -512.580810546875, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.3751254081726074, |
|
"rewards/margins": 1.008442759513855, |
|
"rewards/rejected": -3.3835678100585938, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2735773443761689, |
|
"grad_norm": 8.23163566342515, |
|
"learning_rate": 9.123112597100757e-07, |
|
"logits/chosen": -0.7550954818725586, |
|
"logits/rejected": -0.7312250733375549, |
|
"logps/chosen": -428.09210205078125, |
|
"logps/rejected": -472.0053405761719, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.460500717163086, |
|
"rewards/margins": 0.6525804996490479, |
|
"rewards/rejected": -3.1130809783935547, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 6.484017150981526, |
|
"learning_rate": 9.080132930355566e-07, |
|
"logits/chosen": -0.7198902368545532, |
|
"logits/rejected": -0.7333334684371948, |
|
"logps/chosen": -447.80694580078125, |
|
"logps/rejected": -543.547119140625, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3654160499572754, |
|
"rewards/margins": 1.1327059268951416, |
|
"rewards/rejected": -3.498121738433838, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2821266363879241, |
|
"grad_norm": 7.88288469592664, |
|
"learning_rate": 9.036231385912889e-07, |
|
"logits/chosen": -0.787277102470398, |
|
"logits/rejected": -0.8082758188247681, |
|
"logps/chosen": -542.9320678710938, |
|
"logps/rejected": -589.2691650390625, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.2829174995422363, |
|
"rewards/margins": 0.4686228036880493, |
|
"rewards/rejected": -3.751540184020996, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28640128239380175, |
|
"grad_norm": 6.87598900963406, |
|
"learning_rate": 8.991417883021779e-07, |
|
"logits/chosen": -0.7320197820663452, |
|
"logits/rejected": -0.7914742231369019, |
|
"logps/chosen": -322.4740295410156, |
|
"logps/rejected": -403.00982666015625, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.867960810661316, |
|
"rewards/margins": 0.7985786199569702, |
|
"rewards/rejected": -2.666539430618286, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.29067592839967943, |
|
"grad_norm": 6.988332805689512, |
|
"learning_rate": 8.945702546981968e-07, |
|
"logits/chosen": -0.7299609780311584, |
|
"logits/rejected": -0.7391811013221741, |
|
"logps/chosen": -424.68255615234375, |
|
"logps/rejected": -520.7315673828125, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.4903666973114014, |
|
"rewards/margins": 0.9038018584251404, |
|
"rewards/rejected": -3.3941686153411865, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.29495057440555705, |
|
"grad_norm": 7.25643319614823, |
|
"learning_rate": 8.899095706856121e-07, |
|
"logits/chosen": -0.8242793679237366, |
|
"logits/rejected": -0.8567203879356384, |
|
"logps/chosen": -416.467041015625, |
|
"logps/rejected": -556.6646118164062, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.350522994995117, |
|
"rewards/margins": 1.3620076179504395, |
|
"rewards/rejected": -3.7125303745269775, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 7.602549324125367, |
|
"learning_rate": 8.851607893136064e-07, |
|
"logits/chosen": -0.7457299828529358, |
|
"logits/rejected": -0.7355296611785889, |
|
"logps/chosen": -458.4794006347656, |
|
"logps/rejected": -523.21484375, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.8070144653320312, |
|
"rewards/margins": 0.6739380955696106, |
|
"rewards/rejected": -3.480952739715576, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3034998664173123, |
|
"grad_norm": 7.842587956186825, |
|
"learning_rate": 8.803249835363484e-07, |
|
"logits/chosen": -0.7719243168830872, |
|
"logits/rejected": -0.8175538778305054, |
|
"logps/chosen": -391.3406982421875, |
|
"logps/rejected": -472.3711242675781, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.26741099357605, |
|
"rewards/margins": 0.7575722336769104, |
|
"rewards/rejected": -3.0249834060668945, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3077745124231899, |
|
"grad_norm": 8.33328622593718, |
|
"learning_rate": 8.754032459705671e-07, |
|
"logits/chosen": -0.7375326752662659, |
|
"logits/rejected": -0.7411423921585083, |
|
"logps/chosen": -552.6005249023438, |
|
"logps/rejected": -658.5523071289062, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.2394614219665527, |
|
"rewards/margins": 1.1419782638549805, |
|
"rewards/rejected": -4.381440162658691, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3120491584290676, |
|
"grad_norm": 8.00590533781815, |
|
"learning_rate": 8.703966886486818e-07, |
|
"logits/chosen": -0.7447977066040039, |
|
"logits/rejected": -0.8021827340126038, |
|
"logps/chosen": -528.2827758789062, |
|
"logps/rejected": -663.02099609375, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.100710868835449, |
|
"rewards/margins": 1.4192044734954834, |
|
"rewards/rejected": -4.519914627075195, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.31632380443494523, |
|
"grad_norm": 9.911560282455111, |
|
"learning_rate": 8.653064427675469e-07, |
|
"logits/chosen": -0.7718651294708252, |
|
"logits/rejected": -0.7922145128250122, |
|
"logps/chosen": -473.9974365234375, |
|
"logps/rejected": -587.6644897460938, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9299654960632324, |
|
"rewards/margins": 1.1681456565856934, |
|
"rewards/rejected": -4.098111152648926, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 7.86727024374843, |
|
"learning_rate": 8.601336584328658e-07, |
|
"logits/chosen": -0.6917619705200195, |
|
"logits/rejected": -0.6980517506599426, |
|
"logps/chosen": -500.0274963378906, |
|
"logps/rejected": -590.8597412109375, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.1366689205169678, |
|
"rewards/margins": 0.9159411191940308, |
|
"rewards/rejected": -4.052610397338867, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 9.289091160075673, |
|
"learning_rate": 8.548795043993315e-07, |
|
"logits/chosen": -0.7438817620277405, |
|
"logits/rejected": -0.7294880747795105, |
|
"logps/chosen": -521.81005859375, |
|
"logps/rejected": -567.1892700195312, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.433103322982788, |
|
"rewards/margins": 0.4686957001686096, |
|
"rewards/rejected": -3.901798725128174, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.32914774245257816, |
|
"grad_norm": 8.347011918030578, |
|
"learning_rate": 8.495451678065561e-07, |
|
"logits/chosen": -0.7081446647644043, |
|
"logits/rejected": -0.7084572315216064, |
|
"logps/chosen": -471.94879150390625, |
|
"logps/rejected": -576.2655639648438, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.9118268489837646, |
|
"rewards/margins": 1.087062954902649, |
|
"rewards/rejected": -3.9988901615142822, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3334223884584558, |
|
"grad_norm": 9.178664809436675, |
|
"learning_rate": 8.441318539108432e-07, |
|
"logits/chosen": -0.672901451587677, |
|
"logits/rejected": -0.6473367214202881, |
|
"logps/chosen": -446.5679931640625, |
|
"logps/rejected": -525.839599609375, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.884944200515747, |
|
"rewards/margins": 0.8302309513092041, |
|
"rewards/rejected": -3.7151753902435303, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3376970344643334, |
|
"grad_norm": 9.222651758068919, |
|
"learning_rate": 8.386407858128706e-07, |
|
"logits/chosen": -0.7438699007034302, |
|
"logits/rejected": -0.7355214357376099, |
|
"logps/chosen": -530.9581909179688, |
|
"logps/rejected": -658.5861206054688, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.4786720275878906, |
|
"rewards/margins": 1.2334851026535034, |
|
"rewards/rejected": -4.712156772613525, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 8.799544386144294, |
|
"learning_rate": 8.330732041813366e-07, |
|
"logits/chosen": -0.5365869402885437, |
|
"logits/rejected": -0.5661185383796692, |
|
"logps/chosen": -488.5903015136719, |
|
"logps/rejected": -571.9346923828125, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.123514175415039, |
|
"rewards/margins": 0.8028107285499573, |
|
"rewards/rejected": -3.9263250827789307, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3462463264760887, |
|
"grad_norm": 9.030244312954085, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": -0.6187846660614014, |
|
"logits/rejected": -0.6990691423416138, |
|
"logps/chosen": -469.09161376953125, |
|
"logps/rejected": -603.3698120117188, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.157665729522705, |
|
"rewards/margins": 1.1742490530014038, |
|
"rewards/rejected": -4.331915378570557, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.35052097248196634, |
|
"grad_norm": 8.841699455559503, |
|
"learning_rate": 8.217135491466636e-07, |
|
"logits/chosen": -0.473153293132782, |
|
"logits/rejected": -0.5449516177177429, |
|
"logps/chosen": -491.49249267578125, |
|
"logps/rejected": -654.7282104492188, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.222175121307373, |
|
"rewards/margins": 1.6088354587554932, |
|
"rewards/rejected": -4.831010341644287, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.35479561848784397, |
|
"grad_norm": 11.098615894997318, |
|
"learning_rate": 8.159240423786819e-07, |
|
"logits/chosen": -0.6635532379150391, |
|
"logits/rejected": -0.6708536148071289, |
|
"logps/chosen": -529.8382568359375, |
|
"logps/rejected": -615.276123046875, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.458679437637329, |
|
"rewards/margins": 0.8304582238197327, |
|
"rewards/rejected": -4.289137840270996, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3590702644937216, |
|
"grad_norm": 10.69217432485512, |
|
"learning_rate": 8.100631547675416e-07, |
|
"logits/chosen": -0.5764239430427551, |
|
"logits/rejected": -0.6042333245277405, |
|
"logps/chosen": -538.3191528320312, |
|
"logps/rejected": -671.1778564453125, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.707031726837158, |
|
"rewards/margins": 1.2638828754425049, |
|
"rewards/rejected": -4.970914363861084, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 10.389532135595351, |
|
"learning_rate": 8.041322105400921e-07, |
|
"logits/chosen": -0.5952804088592529, |
|
"logits/rejected": -0.5918059349060059, |
|
"logps/chosen": -468.830322265625, |
|
"logps/rejected": -555.058349609375, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.2899224758148193, |
|
"rewards/margins": 0.8072735071182251, |
|
"rewards/rejected": -4.097196102142334, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3676195565054769, |
|
"grad_norm": 9.76247745990493, |
|
"learning_rate": 7.981325497519891e-07, |
|
"logits/chosen": -0.5011740922927856, |
|
"logits/rejected": -0.5748673677444458, |
|
"logps/chosen": -568.4143676757812, |
|
"logps/rejected": -665.891357421875, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.9403202533721924, |
|
"rewards/margins": 0.8801581859588623, |
|
"rewards/rejected": -4.820478439331055, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3718942025113545, |
|
"grad_norm": 9.276763138531336, |
|
"learning_rate": 7.920655279849171e-07, |
|
"logits/chosen": -0.6208050847053528, |
|
"logits/rejected": -0.6661792993545532, |
|
"logps/chosen": -454.78558349609375, |
|
"logps/rejected": -583.1072387695312, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.0670785903930664, |
|
"rewards/margins": 1.2415242195129395, |
|
"rewards/rejected": -4.308602333068848, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37616884851723215, |
|
"grad_norm": 8.47887809958896, |
|
"learning_rate": 7.859325160403071e-07, |
|
"logits/chosen": -0.5842097401618958, |
|
"logits/rejected": -0.6111244559288025, |
|
"logps/chosen": -513.4686279296875, |
|
"logps/rejected": -631.78515625, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.507704973220825, |
|
"rewards/margins": 1.1260159015655518, |
|
"rewards/rejected": -4.633721351623535, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3804434945231098, |
|
"grad_norm": 9.042970665585285, |
|
"learning_rate": 7.797348996296114e-07, |
|
"logits/chosen": -0.594511091709137, |
|
"logits/rejected": -0.5762075185775757, |
|
"logps/chosen": -528.5706787109375, |
|
"logps/rejected": -640.2254638671875, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.5566911697387695, |
|
"rewards/margins": 1.1777138710021973, |
|
"rewards/rejected": -4.734404563903809, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 9.591838712191619, |
|
"learning_rate": 7.734740790612136e-07, |
|
"logits/chosen": -0.5243846774101257, |
|
"logits/rejected": -0.5419484376907349, |
|
"logps/chosen": -597.5997924804688, |
|
"logps/rejected": -721.8450927734375, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.259735107421875, |
|
"rewards/margins": 1.2303788661956787, |
|
"rewards/rejected": -5.490115165710449, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3889927865348651, |
|
"grad_norm": 9.197817870604572, |
|
"learning_rate": 7.671514689240365e-07, |
|
"logits/chosen": -0.5726766586303711, |
|
"logits/rejected": -0.6172913312911987, |
|
"logps/chosen": -557.262939453125, |
|
"logps/rejected": -697.6727294921875, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.7903552055358887, |
|
"rewards/margins": 1.3101625442504883, |
|
"rewards/rejected": -5.100517749786377, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.3932674325407427, |
|
"grad_norm": 11.581331107950847, |
|
"learning_rate": 7.607684977679283e-07, |
|
"logits/chosen": -0.6335964202880859, |
|
"logits/rejected": -0.6610329747200012, |
|
"logps/chosen": -519.31103515625, |
|
"logps/rejected": -657.5548706054688, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.4710004329681396, |
|
"rewards/margins": 1.4494860172271729, |
|
"rewards/rejected": -4.920486927032471, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.3975420785466204, |
|
"grad_norm": 10.631351767452296, |
|
"learning_rate": 7.543266077808892e-07, |
|
"logits/chosen": -0.427675724029541, |
|
"logits/rejected": -0.45514771342277527, |
|
"logps/chosen": -571.4942626953125, |
|
"logps/rejected": -717.7467041015625, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.948040723800659, |
|
"rewards/margins": 1.4244039058685303, |
|
"rewards/rejected": -5.372445106506348, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.401816724552498, |
|
"grad_norm": 12.057725080831089, |
|
"learning_rate": 7.478272544632202e-07, |
|
"logits/chosen": -0.5969647765159607, |
|
"logits/rejected": -0.6751678586006165, |
|
"logps/chosen": -643.1666870117188, |
|
"logps/rejected": -773.894287109375, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.552432537078857, |
|
"rewards/margins": 1.3130940198898315, |
|
"rewards/rejected": -5.8655266761779785, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 10.936442473029212, |
|
"learning_rate": 7.412719062986631e-07, |
|
"logits/chosen": -0.4887186288833618, |
|
"logits/rejected": -0.4894056022167206, |
|
"logps/chosen": -555.25341796875, |
|
"logps/rejected": -653.7760009765625, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.9043784141540527, |
|
"rewards/margins": 1.035258412361145, |
|
"rewards/rejected": -4.939637184143066, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.41036601656425326, |
|
"grad_norm": 18.608338586356762, |
|
"learning_rate": 7.346620444226059e-07, |
|
"logits/chosen": -0.5932431221008301, |
|
"logits/rejected": -0.6164640784263611, |
|
"logps/chosen": -586.3929443359375, |
|
"logps/rejected": -703.4130249023438, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.206478118896484, |
|
"rewards/margins": 1.083022117614746, |
|
"rewards/rejected": -5.2895002365112305, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41464066257013094, |
|
"grad_norm": 12.657787002066415, |
|
"learning_rate": 7.279991622874318e-07, |
|
"logits/chosen": -0.5697692632675171, |
|
"logits/rejected": -0.6259853839874268, |
|
"logps/chosen": -585.4468994140625, |
|
"logps/rejected": -732.0520629882812, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.9570021629333496, |
|
"rewards/margins": 1.447950839996338, |
|
"rewards/rejected": -5.404953479766846, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.41891530857600856, |
|
"grad_norm": 13.367477990049776, |
|
"learning_rate": 7.212847653250828e-07, |
|
"logits/chosen": -0.6540141105651855, |
|
"logits/rejected": -0.6565195322036743, |
|
"logps/chosen": -731.0140380859375, |
|
"logps/rejected": -848.3659057617188, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.200318336486816, |
|
"rewards/margins": 1.2101118564605713, |
|
"rewards/rejected": -6.410430908203125, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4231899545818862, |
|
"grad_norm": 10.139524171463488, |
|
"learning_rate": 7.145203706069182e-07, |
|
"logits/chosen": -0.7252554893493652, |
|
"logits/rejected": -0.7746644616127014, |
|
"logps/chosen": -669.2903442382812, |
|
"logps/rejected": -828.5930786132812, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.771048545837402, |
|
"rewards/margins": 1.5108307600021362, |
|
"rewards/rejected": -6.281879425048828, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 12.356398801857143, |
|
"learning_rate": 7.077075065009433e-07, |
|
"logits/chosen": -0.557784914970398, |
|
"logits/rejected": -0.5647093057632446, |
|
"logps/chosen": -607.4837646484375, |
|
"logps/rejected": -718.2990112304688, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.555881500244141, |
|
"rewards/margins": 1.080979585647583, |
|
"rewards/rejected": -5.636861801147461, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"eval_logits/chosen": -0.5001155734062195, |
|
"eval_logits/rejected": -0.5150425434112549, |
|
"eval_logps/chosen": -704.6570434570312, |
|
"eval_logps/rejected": -829.715087890625, |
|
"eval_loss": 0.4167614281177521, |
|
"eval_rewards/accuracies": 0.8145161271095276, |
|
"eval_rewards/chosen": -4.996352672576904, |
|
"eval_rewards/margins": 1.3122578859329224, |
|
"eval_rewards/rejected": -6.308610916137695, |
|
"eval_runtime": 165.659, |
|
"eval_samples_per_second": 11.838, |
|
"eval_steps_per_second": 0.374, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4317392465936415, |
|
"grad_norm": 13.314006294995002, |
|
"learning_rate": 7.008477123264847e-07, |
|
"logits/chosen": -0.6598826050758362, |
|
"logits/rejected": -0.6927035450935364, |
|
"logps/chosen": -734.70556640625, |
|
"logps/rejected": -900.852294921875, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.161342620849609, |
|
"rewards/margins": 1.7760246992111206, |
|
"rewards/rejected": -6.9373674392700195, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4360138925995191, |
|
"grad_norm": 13.821741873689282, |
|
"learning_rate": 6.939425380063923e-07, |
|
"logits/chosen": -0.6629341244697571, |
|
"logits/rejected": -0.7558687925338745, |
|
"logps/chosen": -699.57177734375, |
|
"logps/rejected": -886.5426025390625, |
|
"loss": 0.3874, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.201181888580322, |
|
"rewards/margins": 1.6117980480194092, |
|
"rewards/rejected": -6.8129801750183105, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.44028853860539674, |
|
"grad_norm": 14.52198562884218, |
|
"learning_rate": 6.869935437168449e-07, |
|
"logits/chosen": -0.4441612958908081, |
|
"logits/rejected": -0.4517134428024292, |
|
"logps/chosen": -648.8721313476562, |
|
"logps/rejected": -743.1588745117188, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.671912670135498, |
|
"rewards/margins": 1.0545084476470947, |
|
"rewards/rejected": -5.726420879364014, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44456318461127436, |
|
"grad_norm": 15.339894722169653, |
|
"learning_rate": 6.80002299534838e-07, |
|
"logits/chosen": -0.719368577003479, |
|
"logits/rejected": -0.7461254596710205, |
|
"logps/chosen": -573.4705810546875, |
|
"logps/rejected": -651.0980224609375, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.8566808700561523, |
|
"rewards/margins": 0.8586393594741821, |
|
"rewards/rejected": -4.715320587158203, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 10.956142784913482, |
|
"learning_rate": 6.72970385083438e-07, |
|
"logits/chosen": -0.641654372215271, |
|
"logits/rejected": -0.6621043682098389, |
|
"logps/chosen": -592.4070434570312, |
|
"logps/rejected": -721.1480102539062, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.002495288848877, |
|
"rewards/margins": 1.1794517040252686, |
|
"rewards/rejected": -5.181946754455566, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.45311247662302967, |
|
"grad_norm": 14.08687818754259, |
|
"learning_rate": 6.658993891748759e-07, |
|
"logits/chosen": -0.6141338348388672, |
|
"logits/rejected": -0.5712395310401917, |
|
"logps/chosen": -525.6826171875, |
|
"logps/rejected": -657.1926879882812, |
|
"loss": 0.3788, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.3286538124084473, |
|
"rewards/margins": 1.5682119131088257, |
|
"rewards/rejected": -4.896864891052246, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4573871226289073, |
|
"grad_norm": 12.007137757034995, |
|
"learning_rate": 6.587909094515663e-07, |
|
"logits/chosen": -0.6399226188659668, |
|
"logits/rejected": -0.6818464994430542, |
|
"logps/chosen": -515.7030639648438, |
|
"logps/rejected": -624.790283203125, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.5857155323028564, |
|
"rewards/margins": 0.9131308794021606, |
|
"rewards/rejected": -4.498846530914307, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4616617686347849, |
|
"grad_norm": 11.626806758384587, |
|
"learning_rate": 6.516465520251313e-07, |
|
"logits/chosen": -0.6572325229644775, |
|
"logits/rejected": -0.7261943221092224, |
|
"logps/chosen": -557.6213989257812, |
|
"logps/rejected": -685.3796997070312, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.802943468093872, |
|
"rewards/margins": 1.3063392639160156, |
|
"rewards/rejected": -5.109282970428467, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.46593641464066254, |
|
"grad_norm": 11.769626267692969, |
|
"learning_rate": 6.444679311135112e-07, |
|
"logits/chosen": -0.6812455058097839, |
|
"logits/rejected": -0.6769453287124634, |
|
"logps/chosen": -545.5555419921875, |
|
"logps/rejected": -670.9700317382812, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.524083137512207, |
|
"rewards/margins": 1.1972600221633911, |
|
"rewards/rejected": -4.721343040466309, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 11.834467781345984, |
|
"learning_rate": 6.372566686762426e-07, |
|
"logits/chosen": -0.6734607219696045, |
|
"logits/rejected": -0.6938244104385376, |
|
"logps/chosen": -631.7657470703125, |
|
"logps/rejected": -778.4968872070312, |
|
"loss": 0.3988, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.416792869567871, |
|
"rewards/margins": 1.5345261096954346, |
|
"rewards/rejected": -5.951319217681885, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.47448570665241785, |
|
"grad_norm": 10.600986700850507, |
|
"learning_rate": 6.30014394047988e-07, |
|
"logits/chosen": -0.7839672565460205, |
|
"logits/rejected": -0.7656916379928589, |
|
"logps/chosen": -520.810791015625, |
|
"logps/rejected": -590.8253173828125, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.642113447189331, |
|
"rewards/margins": 0.7910320162773132, |
|
"rewards/rejected": -4.433145999908447, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4787603526582955, |
|
"grad_norm": 13.974810391572062, |
|
"learning_rate": 6.227427435703995e-07, |
|
"logits/chosen": -0.6362528204917908, |
|
"logits/rejected": -0.7391636371612549, |
|
"logps/chosen": -589.657470703125, |
|
"logps/rejected": -778.1963500976562, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.09708309173584, |
|
"rewards/margins": 1.6432350873947144, |
|
"rewards/rejected": -5.7403178215026855, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4830349986641731, |
|
"grad_norm": 12.06902931160219, |
|
"learning_rate": 6.154433602223978e-07, |
|
"logits/chosen": -0.7784813046455383, |
|
"logits/rejected": -0.8440088033676147, |
|
"logps/chosen": -634.3173828125, |
|
"logps/rejected": -829.8695068359375, |
|
"loss": 0.4383, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.257462024688721, |
|
"rewards/margins": 1.7875339984893799, |
|
"rewards/rejected": -6.0449957847595215, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 13.358425807533337, |
|
"learning_rate": 6.081178932489535e-07, |
|
"logits/chosen": -0.7081687450408936, |
|
"logits/rejected": -0.7073873281478882, |
|
"logps/chosen": -569.8103637695312, |
|
"logps/rejected": -694.3984375, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.89652943611145, |
|
"rewards/margins": 1.3301247358322144, |
|
"rewards/rejected": -5.226654529571533, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 11.586882789419233, |
|
"learning_rate": 6.00767997788451e-07, |
|
"logits/chosen": -0.5270929336547852, |
|
"logits/rejected": -0.5626642107963562, |
|
"logps/chosen": -693.6819458007812, |
|
"logps/rejected": -889.004150390625, |
|
"loss": 0.3575, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.817679405212402, |
|
"rewards/margins": 1.964691162109375, |
|
"rewards/rejected": -6.782370090484619, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49585893668180603, |
|
"grad_norm": 12.667062697493666, |
|
"learning_rate": 5.933953344987214e-07, |
|
"logits/chosen": -0.6200395226478577, |
|
"logits/rejected": -0.6530672311782837, |
|
"logps/chosen": -617.082763671875, |
|
"logps/rejected": -743.7801513671875, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.380313873291016, |
|
"rewards/margins": 1.2766342163085938, |
|
"rewards/rejected": -5.656947612762451, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5001335826876837, |
|
"grad_norm": 13.06084918611884, |
|
"learning_rate": 5.860015691818292e-07, |
|
"logits/chosen": -0.5794460773468018, |
|
"logits/rejected": -0.6392884850502014, |
|
"logps/chosen": -523.0586547851562, |
|
"logps/rejected": -706.2977294921875, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.7082810401916504, |
|
"rewards/margins": 1.7129367589950562, |
|
"rewards/rejected": -5.421217918395996, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5044082286935613, |
|
"grad_norm": 14.158925761660381, |
|
"learning_rate": 5.78588372407695e-07, |
|
"logits/chosen": -0.591346025466919, |
|
"logits/rejected": -0.5808792114257812, |
|
"logps/chosen": -661.6780395507812, |
|
"logps/rejected": -753.6257934570312, |
|
"loss": 0.3814, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.592419147491455, |
|
"rewards/margins": 1.0543147325515747, |
|
"rewards/rejected": -5.646734237670898, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.508682874699439, |
|
"grad_norm": 13.571207607427793, |
|
"learning_rate": 5.711574191366427e-07, |
|
"logits/chosen": -0.4889651834964752, |
|
"logits/rejected": -0.44250980019569397, |
|
"logps/chosen": -608.6267700195312, |
|
"logps/rejected": -910.5017700195312, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.152202606201172, |
|
"rewards/margins": 0.7526392936706543, |
|
"rewards/rejected": -4.904841423034668, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 10.741928061872432, |
|
"learning_rate": 5.637103883409525e-07, |
|
"logits/chosen": -0.5629594922065735, |
|
"logits/rejected": -0.6181632876396179, |
|
"logps/chosen": -604.459228515625, |
|
"logps/rejected": -852.540283203125, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.461546421051025, |
|
"rewards/margins": 2.3289871215820312, |
|
"rewards/rejected": -6.790533542633057, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5172321667111942, |
|
"grad_norm": 13.589230759795878, |
|
"learning_rate": 5.562489626255103e-07, |
|
"logits/chosen": -0.6361875534057617, |
|
"logits/rejected": -0.6799750924110413, |
|
"logps/chosen": -612.7998657226562, |
|
"logps/rejected": -803.4393920898438, |
|
"loss": 0.3612, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.2646894454956055, |
|
"rewards/margins": 1.7221603393554688, |
|
"rewards/rejected": -5.986849784851074, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5215068127170719, |
|
"grad_norm": 13.130136917819023, |
|
"learning_rate": 5.48774827847634e-07, |
|
"logits/chosen": -0.6019195914268494, |
|
"logits/rejected": -0.6733092665672302, |
|
"logps/chosen": -578.5673828125, |
|
"logps/rejected": -739.2437133789062, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.263480186462402, |
|
"rewards/margins": 1.4445068836212158, |
|
"rewards/rejected": -5.707987308502197, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5257814587229495, |
|
"grad_norm": 12.568726768016017, |
|
"learning_rate": 5.412896727361662e-07, |
|
"logits/chosen": -0.5387797951698303, |
|
"logits/rejected": -0.6281207799911499, |
|
"logps/chosen": -604.70703125, |
|
"logps/rejected": -767.2006225585938, |
|
"loss": 0.3866, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.197920322418213, |
|
"rewards/margins": 1.5229953527450562, |
|
"rewards/rejected": -5.720915794372559, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5300561047288271, |
|
"grad_norm": 12.66743855092958, |
|
"learning_rate": 5.337951885099166e-07, |
|
"logits/chosen": -0.7120057940483093, |
|
"logits/rejected": -0.6868148446083069, |
|
"logps/chosen": -564.8189086914062, |
|
"logps/rejected": -678.998779296875, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.940401077270508, |
|
"rewards/margins": 1.179326057434082, |
|
"rewards/rejected": -5.11972713470459, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 14.46489476063489, |
|
"learning_rate": 5.262930684955438e-07, |
|
"logits/chosen": -0.7230139970779419, |
|
"logits/rejected": -0.7383438348770142, |
|
"logps/chosen": -680.5816040039062, |
|
"logps/rejected": -828.6448974609375, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.831777572631836, |
|
"rewards/margins": 1.4383575916290283, |
|
"rewards/rejected": -6.270134925842285, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5386053967405824, |
|
"grad_norm": 13.241409444585061, |
|
"learning_rate": 5.187850077449603e-07, |
|
"logits/chosen": -0.49940329790115356, |
|
"logits/rejected": -0.5305464267730713, |
|
"logps/chosen": -678.576416015625, |
|
"logps/rejected": -828.8834838867188, |
|
"loss": 0.3599, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.9777750968933105, |
|
"rewards/margins": 1.5080969333648682, |
|
"rewards/rejected": -6.485872268676758, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5428800427464601, |
|
"grad_norm": 15.600684361137235, |
|
"learning_rate": 5.11272702652346e-07, |
|
"logits/chosen": -0.766007125377655, |
|
"logits/rejected": -0.8170765042304993, |
|
"logps/chosen": -783.025146484375, |
|
"logps/rejected": -941.555908203125, |
|
"loss": 0.3807, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.462682247161865, |
|
"rewards/margins": 1.6811782121658325, |
|
"rewards/rejected": -7.143860816955566, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5471546887523377, |
|
"grad_norm": 12.46259382895567, |
|
"learning_rate": 5.03757850570861e-07, |
|
"logits/chosen": -0.6791242361068726, |
|
"logits/rejected": -0.6803139448165894, |
|
"logps/chosen": -693.43115234375, |
|
"logps/rejected": -792.7020263671875, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.814350128173828, |
|
"rewards/margins": 1.0507954359054565, |
|
"rewards/rejected": -5.865145206451416, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5514293347582153, |
|
"grad_norm": 14.68281824566638, |
|
"learning_rate": 4.962421494291391e-07, |
|
"logits/chosen": -0.6624226570129395, |
|
"logits/rejected": -0.8003214597702026, |
|
"logps/chosen": -641.5405883789062, |
|
"logps/rejected": -848.9283447265625, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.510129928588867, |
|
"rewards/margins": 1.8167277574539185, |
|
"rewards/rejected": -6.326857566833496, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 14.156910665906969, |
|
"learning_rate": 4.88727297347654e-07, |
|
"logits/chosen": -0.6747015714645386, |
|
"logits/rejected": -0.6304070353507996, |
|
"logps/chosen": -673.6571655273438, |
|
"logps/rejected": -818.0390625, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.6810078620910645, |
|
"rewards/margins": 1.7004587650299072, |
|
"rewards/rejected": -6.381466388702393, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5599786267699706, |
|
"grad_norm": 13.513462308218608, |
|
"learning_rate": 4.812149922550397e-07, |
|
"logits/chosen": -0.5138005614280701, |
|
"logits/rejected": -0.5008392333984375, |
|
"logps/chosen": -603.177490234375, |
|
"logps/rejected": -718.6824951171875, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.324260234832764, |
|
"rewards/margins": 1.1721910238265991, |
|
"rewards/rejected": -5.496450901031494, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5642532727758482, |
|
"grad_norm": 13.859777475577925, |
|
"learning_rate": 4.7370693150445615e-07, |
|
"logits/chosen": -0.7230309247970581, |
|
"logits/rejected": -0.7601820826530457, |
|
"logps/chosen": -678.0418090820312, |
|
"logps/rejected": -836.7296142578125, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.642270565032959, |
|
"rewards/margins": 1.615240216255188, |
|
"rewards/rejected": -6.257511138916016, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 12.304755311149645, |
|
"learning_rate": 4.6620481149008364e-07, |
|
"logits/chosen": -0.5858466029167175, |
|
"logits/rejected": -0.5665376782417297, |
|
"logps/chosen": -551.0853271484375, |
|
"logps/rejected": -661.145263671875, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.0769124031066895, |
|
"rewards/margins": 1.150291919708252, |
|
"rewards/rejected": -5.227204322814941, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5728025647876035, |
|
"grad_norm": 13.318626812706627, |
|
"learning_rate": 4.5871032726383385e-07, |
|
"logits/chosen": -0.6011719703674316, |
|
"logits/rejected": -0.6539227962493896, |
|
"logps/chosen": -613.9602661132812, |
|
"logps/rejected": -801.2767333984375, |
|
"loss": 0.3028, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.4908127784729, |
|
"rewards/margins": 1.866058588027954, |
|
"rewards/rejected": -6.356871604919434, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 15.02191791227977, |
|
"learning_rate": 4.512251721523659e-07, |
|
"logits/chosen": -0.5807833671569824, |
|
"logits/rejected": -0.5801360607147217, |
|
"logps/chosen": -585.0089111328125, |
|
"logps/rejected": -690.991455078125, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.436345100402832, |
|
"rewards/margins": 0.9718096852302551, |
|
"rewards/rejected": -5.40815544128418, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5813518567993589, |
|
"grad_norm": 13.719743061532137, |
|
"learning_rate": 4.4375103737448967e-07, |
|
"logits/chosen": -0.6176421642303467, |
|
"logits/rejected": -0.5977914333343506, |
|
"logps/chosen": -647.6025390625, |
|
"logps/rejected": -780.4196166992188, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.731601715087891, |
|
"rewards/margins": 1.3735716342926025, |
|
"rewards/rejected": -6.105173110961914, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5856265028052364, |
|
"grad_norm": 13.899011782478087, |
|
"learning_rate": 4.362896116590475e-07, |
|
"logits/chosen": -0.6031906604766846, |
|
"logits/rejected": -0.6842055320739746, |
|
"logps/chosen": -619.6199340820312, |
|
"logps/rejected": -823.1647338867188, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.415548324584961, |
|
"rewards/margins": 1.8599358797073364, |
|
"rewards/rejected": -6.275484085083008, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5899011488111141, |
|
"grad_norm": 14.094976818965387, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits/chosen": -0.5712490081787109, |
|
"logits/rejected": -0.6076186299324036, |
|
"logps/chosen": -635.4631958007812, |
|
"logps/rejected": -767.619873046875, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.872575759887695, |
|
"rewards/margins": 1.2816861867904663, |
|
"rewards/rejected": -6.154261589050293, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5941757948169917, |
|
"grad_norm": 15.923719497368527, |
|
"learning_rate": 4.2141162759230503e-07, |
|
"logits/chosen": -0.4579673409461975, |
|
"logits/rejected": -0.5088114738464355, |
|
"logps/chosen": -541.15185546875, |
|
"logps/rejected": -640.8451538085938, |
|
"loss": 0.3694, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.0685858726501465, |
|
"rewards/margins": 0.9016439914703369, |
|
"rewards/rejected": -4.970229625701904, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 15.626619862394913, |
|
"learning_rate": 4.139984308181708e-07, |
|
"logits/chosen": -0.6617997884750366, |
|
"logits/rejected": -0.6638819575309753, |
|
"logps/chosen": -747.4265747070312, |
|
"logps/rejected": -863.458251953125, |
|
"loss": 0.3971, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.498664379119873, |
|
"rewards/margins": 1.1953853368759155, |
|
"rewards/rejected": -6.694049835205078, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.602725086828747, |
|
"grad_norm": 16.11770967148462, |
|
"learning_rate": 4.0660466550127853e-07, |
|
"logits/chosen": -0.7728097438812256, |
|
"logits/rejected": -0.8388174772262573, |
|
"logps/chosen": -708.0802001953125, |
|
"logps/rejected": -857.48486328125, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.071959972381592, |
|
"rewards/margins": 1.4134087562561035, |
|
"rewards/rejected": -6.485368251800537, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6069997328346246, |
|
"grad_norm": 13.358737574089647, |
|
"learning_rate": 3.9923200221154914e-07, |
|
"logits/chosen": -0.5902035236358643, |
|
"logits/rejected": -0.600926399230957, |
|
"logps/chosen": -655.1759033203125, |
|
"logps/rejected": -776.4323120117188, |
|
"loss": 0.4008, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.78019905090332, |
|
"rewards/margins": 1.229543685913086, |
|
"rewards/rejected": -6.009742736816406, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6112743788405023, |
|
"grad_norm": 20.882100986314004, |
|
"learning_rate": 3.918821067510464e-07, |
|
"logits/chosen": -0.5608689188957214, |
|
"logits/rejected": -0.5520298480987549, |
|
"logps/chosen": -606.8939208984375, |
|
"logps/rejected": -732.2677001953125, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.506969928741455, |
|
"rewards/margins": 1.263184666633606, |
|
"rewards/rejected": -5.77015495300293, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6155490248463799, |
|
"grad_norm": 15.142641307206523, |
|
"learning_rate": 3.845566397776021e-07, |
|
"logits/chosen": -0.5451078414916992, |
|
"logits/rejected": -0.5328483581542969, |
|
"logps/chosen": -578.9205932617188, |
|
"logps/rejected": -720.2644653320312, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.13190221786499, |
|
"rewards/margins": 1.3544728755950928, |
|
"rewards/rejected": -5.486375331878662, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 14.102739323418549, |
|
"learning_rate": 3.772572564296004e-07, |
|
"logits/chosen": -0.5815902948379517, |
|
"logits/rejected": -0.6612125635147095, |
|
"logps/chosen": -647.4329833984375, |
|
"logps/rejected": -800.5177001953125, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.570836067199707, |
|
"rewards/margins": 1.5209659337997437, |
|
"rewards/rejected": -6.091801643371582, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6240983168581352, |
|
"grad_norm": 16.783278936540377, |
|
"learning_rate": 3.699856059520118e-07, |
|
"logits/chosen": -0.5741180777549744, |
|
"logits/rejected": -0.6261047720909119, |
|
"logps/chosen": -518.5027465820312, |
|
"logps/rejected": -752.7945556640625, |
|
"loss": 0.339, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.5159010887145996, |
|
"rewards/margins": 2.189849615097046, |
|
"rewards/rejected": -5.705749988555908, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6283729628640128, |
|
"grad_norm": 16.241290219398206, |
|
"learning_rate": 3.627433313237576e-07, |
|
"logits/chosen": -0.6445101499557495, |
|
"logits/rejected": -0.6295093297958374, |
|
"logps/chosen": -611.40576171875, |
|
"logps/rejected": -746.1281127929688, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.344273090362549, |
|
"rewards/margins": 1.294838309288025, |
|
"rewards/rejected": -5.639111518859863, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6326476088698905, |
|
"grad_norm": 16.58217058287404, |
|
"learning_rate": 3.5553206888648885e-07, |
|
"logits/chosen": -0.5924898386001587, |
|
"logits/rejected": -0.6705700755119324, |
|
"logps/chosen": -561.9385986328125, |
|
"logps/rejected": -790.1613159179688, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.8234567642211914, |
|
"rewards/margins": 2.038635015487671, |
|
"rewards/rejected": -5.862092018127441, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6369222548757681, |
|
"grad_norm": 13.631809000580427, |
|
"learning_rate": 3.483534479748688e-07, |
|
"logits/chosen": -0.6043068170547485, |
|
"logits/rejected": -0.6237097978591919, |
|
"logps/chosen": -599.3536376953125, |
|
"logps/rejected": -737.3873291015625, |
|
"loss": 0.3333, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.288971424102783, |
|
"rewards/margins": 1.361509919166565, |
|
"rewards/rejected": -5.650481224060059, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 13.48441747872976, |
|
"learning_rate": 3.412090905484337e-07, |
|
"logits/chosen": -0.5789849758148193, |
|
"logits/rejected": -0.5934211015701294, |
|
"logps/chosen": -637.4574584960938, |
|
"logps/rejected": -799.4398193359375, |
|
"loss": 0.3633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.6277079582214355, |
|
"rewards/margins": 1.6034901142120361, |
|
"rewards/rejected": -6.231198310852051, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6454715468875234, |
|
"grad_norm": 13.54705279065067, |
|
"learning_rate": 3.3410061082512417e-07, |
|
"logits/chosen": -0.7143419981002808, |
|
"logits/rejected": -0.7396361231803894, |
|
"logps/chosen": -644.5235595703125, |
|
"logps/rejected": -819.0350341796875, |
|
"loss": 0.3787, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.598511695861816, |
|
"rewards/margins": 1.7460615634918213, |
|
"rewards/rejected": -6.344573020935059, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 13.97282414379501, |
|
"learning_rate": 3.270296149165619e-07, |
|
"logits/chosen": -0.7898523807525635, |
|
"logits/rejected": -0.78404700756073, |
|
"logps/chosen": -746.5989990234375, |
|
"logps/rejected": -925.236328125, |
|
"loss": 0.3681, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.577144145965576, |
|
"rewards/margins": 1.7709450721740723, |
|
"rewards/rejected": -7.348089218139648, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6540208388992786, |
|
"grad_norm": 15.30647255299837, |
|
"learning_rate": 3.1999770046516194e-07, |
|
"logits/chosen": -0.6549022197723389, |
|
"logits/rejected": -0.6652963161468506, |
|
"logps/chosen": -734.189453125, |
|
"logps/rejected": -882.783203125, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.586746692657471, |
|
"rewards/margins": 1.5474714040756226, |
|
"rewards/rejected": -7.134217739105225, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6582954849051563, |
|
"grad_norm": 15.480021561153041, |
|
"learning_rate": 3.1300645628315526e-07, |
|
"logits/chosen": -0.6595125794410706, |
|
"logits/rejected": -0.684340238571167, |
|
"logps/chosen": -692.1060180664062, |
|
"logps/rejected": -861.9107055664062, |
|
"loss": 0.3519, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.179555416107178, |
|
"rewards/margins": 1.7508639097213745, |
|
"rewards/rejected": -6.930419445037842, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 14.370880370113698, |
|
"learning_rate": 3.060574619936075e-07, |
|
"logits/chosen": -0.6609420776367188, |
|
"logits/rejected": -0.6882165670394897, |
|
"logps/chosen": -755.37109375, |
|
"logps/rejected": -923.8385620117188, |
|
"loss": 0.3964, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.7476301193237305, |
|
"rewards/margins": 1.664482831954956, |
|
"rewards/rejected": -7.412113189697266, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6668447769169116, |
|
"grad_norm": 15.740330800217551, |
|
"learning_rate": 2.9915228767351535e-07, |
|
"logits/chosen": -0.6638086438179016, |
|
"logits/rejected": -0.6543954014778137, |
|
"logps/chosen": -691.0468139648438, |
|
"logps/rejected": -834.0437622070312, |
|
"loss": 0.3507, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.920919895172119, |
|
"rewards/margins": 1.5358891487121582, |
|
"rewards/rejected": -6.4568095207214355, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6711194229227893, |
|
"grad_norm": 16.901891917408413, |
|
"learning_rate": 2.922924934990568e-07, |
|
"logits/chosen": -0.7027104496955872, |
|
"logits/rejected": -0.7690137624740601, |
|
"logps/chosen": -740.62353515625, |
|
"logps/rejected": -911.9879150390625, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.2726335525512695, |
|
"rewards/margins": 1.703195571899414, |
|
"rewards/rejected": -6.975828647613525, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6753940689286668, |
|
"grad_norm": 14.502221464305004, |
|
"learning_rate": 2.8547962939308186e-07, |
|
"logits/chosen": -0.7240225672721863, |
|
"logits/rejected": -0.743172287940979, |
|
"logps/chosen": -638.63720703125, |
|
"logps/rejected": -785.5480346679688, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.341987133026123, |
|
"rewards/margins": 1.3872699737548828, |
|
"rewards/rejected": -5.729257583618164, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6796687149345445, |
|
"grad_norm": 13.076811073919702, |
|
"learning_rate": 2.7871523467491725e-07, |
|
"logits/chosen": -0.5847674608230591, |
|
"logits/rejected": -0.6150667667388916, |
|
"logps/chosen": -558.647216796875, |
|
"logps/rejected": -742.8478393554688, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.8698740005493164, |
|
"rewards/margins": 1.8015196323394775, |
|
"rewards/rejected": -5.671393394470215, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 13.435743880369799, |
|
"learning_rate": 2.720008377125682e-07, |
|
"logits/chosen": -0.7234424352645874, |
|
"logits/rejected": -0.7753596305847168, |
|
"logps/chosen": -599.20947265625, |
|
"logps/rejected": -840.508544921875, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.066052436828613, |
|
"rewards/margins": 2.2670648097991943, |
|
"rewards/rejected": -6.3331170082092285, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6882180069462998, |
|
"grad_norm": 14.725846441531868, |
|
"learning_rate": 2.6533795557739405e-07, |
|
"logits/chosen": -0.5986216068267822, |
|
"logits/rejected": -0.6119877099990845, |
|
"logps/chosen": -587.1053466796875, |
|
"logps/rejected": -751.2557983398438, |
|
"loss": 0.3576, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.9571621417999268, |
|
"rewards/margins": 1.737083077430725, |
|
"rewards/rejected": -5.694245338439941, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6924926529521774, |
|
"grad_norm": 18.854853550251143, |
|
"learning_rate": 2.5872809370133704e-07, |
|
"logits/chosen": -0.7047430872917175, |
|
"logits/rejected": -0.7392921447753906, |
|
"logps/chosen": -541.9876098632812, |
|
"logps/rejected": -679.9630737304688, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.7241249084472656, |
|
"rewards/margins": 1.3988580703735352, |
|
"rewards/rejected": -5.122982978820801, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.696767298958055, |
|
"grad_norm": 15.327216222305758, |
|
"learning_rate": 2.521727455367797e-07, |
|
"logits/chosen": -0.4683057963848114, |
|
"logits/rejected": -0.4958358705043793, |
|
"logps/chosen": -477.89453125, |
|
"logps/rejected": -642.5132446289062, |
|
"loss": 0.3217, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.4097371101379395, |
|
"rewards/margins": 1.6171811819076538, |
|
"rewards/rejected": -5.026918411254883, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7010419449639327, |
|
"grad_norm": 15.107047920787036, |
|
"learning_rate": 2.456733922191108e-07, |
|
"logits/chosen": -0.6403992176055908, |
|
"logits/rejected": -0.7081367373466492, |
|
"logps/chosen": -535.940673828125, |
|
"logps/rejected": -721.487060546875, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.6437511444091797, |
|
"rewards/margins": 1.7465698719024658, |
|
"rewards/rejected": -5.390320777893066, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 14.789037132111227, |
|
"learning_rate": 2.3923150223207173e-07, |
|
"logits/chosen": -0.6419979333877563, |
|
"logits/rejected": -0.6663538217544556, |
|
"logps/chosen": -611.316162109375, |
|
"logps/rejected": -779.5244140625, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.9267449378967285, |
|
"rewards/margins": 1.7568156719207764, |
|
"rewards/rejected": -5.683561325073242, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7095912369756879, |
|
"grad_norm": 11.678139693168967, |
|
"learning_rate": 2.3284853107596347e-07, |
|
"logits/chosen": -0.643075704574585, |
|
"logits/rejected": -0.677239179611206, |
|
"logps/chosen": -605.615478515625, |
|
"logps/rejected": -788.3361206054688, |
|
"loss": 0.3161, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.1840410232543945, |
|
"rewards/margins": 1.828648567199707, |
|
"rewards/rejected": -6.012689590454102, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7138658829815656, |
|
"grad_norm": 14.838448866889486, |
|
"learning_rate": 2.2652592093878665e-07, |
|
"logits/chosen": -0.5908488631248474, |
|
"logits/rejected": -0.6062439680099487, |
|
"logps/chosen": -623.344482421875, |
|
"logps/rejected": -775.658447265625, |
|
"loss": 0.3693, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.541435241699219, |
|
"rewards/margins": 1.4988759756088257, |
|
"rewards/rejected": -6.040311336517334, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7181405289874432, |
|
"grad_norm": 13.768660505175903, |
|
"learning_rate": 2.202651003703885e-07, |
|
"logits/chosen": -0.5698331594467163, |
|
"logits/rejected": -0.5812557339668274, |
|
"logps/chosen": -609.9556274414062, |
|
"logps/rejected": -810.02587890625, |
|
"loss": 0.3727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.267837047576904, |
|
"rewards/margins": 1.995699167251587, |
|
"rewards/rejected": -6.2635369300842285, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7224151749933209, |
|
"grad_norm": 12.956408292396839, |
|
"learning_rate": 2.1406748395969305e-07, |
|
"logits/chosen": -0.6224421858787537, |
|
"logits/rejected": -0.6563930511474609, |
|
"logps/chosen": -620.7723388671875, |
|
"logps/rejected": -781.536865234375, |
|
"loss": 0.3362, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.473085403442383, |
|
"rewards/margins": 1.6361382007598877, |
|
"rewards/rejected": -6.109223365783691, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 15.331044331855917, |
|
"learning_rate": 2.0793447201508286e-07, |
|
"logits/chosen": -0.6418094635009766, |
|
"logits/rejected": -0.6333540678024292, |
|
"logps/chosen": -693.9434204101562, |
|
"logps/rejected": -780.4866333007812, |
|
"loss": 0.3273, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.1741743087768555, |
|
"rewards/margins": 0.9175342321395874, |
|
"rewards/rejected": -6.091708660125732, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 14.142847544336131, |
|
"learning_rate": 2.01867450248011e-07, |
|
"logits/chosen": -0.6202086210250854, |
|
"logits/rejected": -0.6713452935218811, |
|
"logps/chosen": -733.3516235351562, |
|
"logps/rejected": -912.0302734375, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.687126636505127, |
|
"rewards/margins": 1.6365104913711548, |
|
"rewards/rejected": -7.32363748550415, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7352391130109538, |
|
"grad_norm": 14.223351065221193, |
|
"learning_rate": 1.9586778945990783e-07, |
|
"logits/chosen": -0.5605691075325012, |
|
"logits/rejected": -0.6431994438171387, |
|
"logps/chosen": -720.2319946289062, |
|
"logps/rejected": -907.8939819335938, |
|
"loss": 0.3518, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.346857070922852, |
|
"rewards/margins": 1.8520584106445312, |
|
"rewards/rejected": -7.198915481567383, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7395137590168315, |
|
"grad_norm": 14.283973424406769, |
|
"learning_rate": 1.899368452324584e-07, |
|
"logits/chosen": -0.8039106130599976, |
|
"logits/rejected": -0.8154680728912354, |
|
"logps/chosen": -702.9293823242188, |
|
"logps/rejected": -874.7135009765625, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -5.166163444519043, |
|
"rewards/margins": 1.7623482942581177, |
|
"rewards/rejected": -6.928511142730713, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.743788405022709, |
|
"grad_norm": 13.648679506818656, |
|
"learning_rate": 1.840759576213181e-07, |
|
"logits/chosen": -0.5466803908348083, |
|
"logits/rejected": -0.6139577031135559, |
|
"logps/chosen": -643.9953002929688, |
|
"logps/rejected": -851.5556640625, |
|
"loss": 0.3272, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.4559197425842285, |
|
"rewards/margins": 2.084817886352539, |
|
"rewards/rejected": -6.540737152099609, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 16.8663479372205, |
|
"learning_rate": 1.7828645085333644e-07, |
|
"logits/chosen": -0.6725043654441833, |
|
"logits/rejected": -0.7195257544517517, |
|
"logps/chosen": -706.1845703125, |
|
"logps/rejected": -901.8247680664062, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.220101356506348, |
|
"rewards/margins": 1.968306541442871, |
|
"rewards/rejected": -7.188408374786377, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7523376970344643, |
|
"grad_norm": 13.612220492425323, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": -0.7102064490318298, |
|
"logits/rejected": -0.7330564260482788, |
|
"logps/chosen": -644.0183715820312, |
|
"logps/rejected": -833.1116943359375, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.664500713348389, |
|
"rewards/margins": 1.9048974514007568, |
|
"rewards/rejected": -6.569398403167725, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.756612343040342, |
|
"grad_norm": 13.592118995239437, |
|
"learning_rate": 1.6692679581866332e-07, |
|
"logits/chosen": -0.5269302725791931, |
|
"logits/rejected": -0.562321662902832, |
|
"logps/chosen": -647.377685546875, |
|
"logps/rejected": -876.7937622070312, |
|
"loss": 0.3336, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.665207862854004, |
|
"rewards/margins": 2.2923545837402344, |
|
"rewards/rejected": -6.9575629234313965, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7608869890462197, |
|
"grad_norm": 13.100123198154126, |
|
"learning_rate": 1.6135921418712955e-07, |
|
"logits/chosen": -0.6257606744766235, |
|
"logits/rejected": -0.6605125069618225, |
|
"logps/chosen": -589.7310791015625, |
|
"logps/rejected": -762.8815307617188, |
|
"loss": 0.3326, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.342212200164795, |
|
"rewards/margins": 1.6928372383117676, |
|
"rewards/rejected": -6.035048961639404, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7651616350520972, |
|
"grad_norm": 24.95386459216583, |
|
"learning_rate": 1.558681460891567e-07, |
|
"logits/chosen": -0.6882709860801697, |
|
"logits/rejected": -0.7164211273193359, |
|
"logps/chosen": -729.4912719726562, |
|
"logps/rejected": -960.5554809570312, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.27896785736084, |
|
"rewards/margins": 2.3151795864105225, |
|
"rewards/rejected": -7.594147682189941, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 14.508183670705439, |
|
"learning_rate": 1.5045483219344385e-07, |
|
"logits/chosen": -0.4706317186355591, |
|
"logits/rejected": -0.4956177771091461, |
|
"logps/chosen": -686.981201171875, |
|
"logps/rejected": -892.425537109375, |
|
"loss": 0.3878, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.139954090118408, |
|
"rewards/margins": 1.9214184284210205, |
|
"rewards/rejected": -7.061371803283691, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7737109270638525, |
|
"grad_norm": 13.601053697523751, |
|
"learning_rate": 1.4512049560066835e-07, |
|
"logits/chosen": -0.5556597113609314, |
|
"logits/rejected": -0.6318129301071167, |
|
"logps/chosen": -582.2374877929688, |
|
"logps/rejected": -793.1754760742188, |
|
"loss": 0.3245, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.254069805145264, |
|
"rewards/margins": 2.0026259422302246, |
|
"rewards/rejected": -6.256695747375488, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7779855730697302, |
|
"grad_norm": 18.93784031657066, |
|
"learning_rate": 1.3986634156713417e-07, |
|
"logits/chosen": -0.5807328224182129, |
|
"logits/rejected": -0.5342915654182434, |
|
"logps/chosen": -602.649658203125, |
|
"logps/rejected": -747.00732421875, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.532341957092285, |
|
"rewards/margins": 1.6315253973007202, |
|
"rewards/rejected": -6.163866996765137, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7822602190756078, |
|
"grad_norm": 16.431500936058505, |
|
"learning_rate": 1.34693557232453e-07, |
|
"logits/chosen": -0.6352940797805786, |
|
"logits/rejected": -0.6816412210464478, |
|
"logps/chosen": -679.6211547851562, |
|
"logps/rejected": -891.3534545898438, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.584761619567871, |
|
"rewards/margins": 2.096449375152588, |
|
"rewards/rejected": -6.681210994720459, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7865348650814854, |
|
"grad_norm": 17.829780427983952, |
|
"learning_rate": 1.2960331135131823e-07, |
|
"logits/chosen": -0.6611433029174805, |
|
"logits/rejected": -0.6609802842140198, |
|
"logps/chosen": -662.570068359375, |
|
"logps/rejected": -845.5758666992188, |
|
"loss": 0.3201, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.930952072143555, |
|
"rewards/margins": 1.9189307689666748, |
|
"rewards/rejected": -6.849882125854492, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 14.013649032140211, |
|
"learning_rate": 1.2459675402943288e-07, |
|
"logits/chosen": -0.5998414754867554, |
|
"logits/rejected": -0.6097269058227539, |
|
"logps/chosen": -691.530517578125, |
|
"logps/rejected": -900.010986328125, |
|
"loss": 0.2947, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.21755838394165, |
|
"rewards/margins": 2.078969955444336, |
|
"rewards/rejected": -7.296527862548828, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7950841570932408, |
|
"grad_norm": 13.880919593847631, |
|
"learning_rate": 1.1967501646365146e-07, |
|
"logits/chosen": -0.6878648996353149, |
|
"logits/rejected": -0.7795136570930481, |
|
"logps/chosen": -629.9570922851562, |
|
"logps/rejected": -834.251708984375, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.382741451263428, |
|
"rewards/margins": 2.003836154937744, |
|
"rewards/rejected": -6.386577606201172, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7993588030991183, |
|
"grad_norm": 15.353298997567329, |
|
"learning_rate": 1.1483921068639351e-07, |
|
"logits/chosen": -0.6340612173080444, |
|
"logits/rejected": -0.6507092714309692, |
|
"logps/chosen": -723.2861938476562, |
|
"logps/rejected": -939.2916259765625, |
|
"loss": 0.3424, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.30019474029541, |
|
"rewards/margins": 2.066894769668579, |
|
"rewards/rejected": -7.36708927154541, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.803633449104996, |
|
"grad_norm": 14.566838990350824, |
|
"learning_rate": 1.1009042931438783e-07, |
|
"logits/chosen": -0.6575983762741089, |
|
"logits/rejected": -0.699353814125061, |
|
"logps/chosen": -693.1535034179688, |
|
"logps/rejected": -887.3889770507812, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.220640182495117, |
|
"rewards/margins": 1.8395450115203857, |
|
"rewards/rejected": -7.060185432434082, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8079080951108736, |
|
"grad_norm": 17.580820182141434, |
|
"learning_rate": 1.0542974530180327e-07, |
|
"logits/chosen": -0.6650811433792114, |
|
"logits/rejected": -0.7292627692222595, |
|
"logps/chosen": -640.0955200195312, |
|
"logps/rejected": -821.3978881835938, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.4905171394348145, |
|
"rewards/margins": 1.7442741394042969, |
|
"rewards/rejected": -6.234791278839111, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 14.138278292631155, |
|
"learning_rate": 1.0085821169782199e-07, |
|
"logits/chosen": -0.6633419394493103, |
|
"logits/rejected": -0.7350410223007202, |
|
"logps/chosen": -550.8514404296875, |
|
"logps/rejected": -755.83056640625, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.9601831436157227, |
|
"rewards/margins": 1.8831403255462646, |
|
"rewards/rejected": -5.843323707580566, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8164573871226289, |
|
"grad_norm": 15.051759686903543, |
|
"learning_rate": 9.637686140871121e-08, |
|
"logits/chosen": -0.5633993148803711, |
|
"logits/rejected": -0.5642579793930054, |
|
"logps/chosen": -751.5147705078125, |
|
"logps/rejected": -917.2871704101562, |
|
"loss": 0.3641, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.359073162078857, |
|
"rewards/margins": 1.6620866060256958, |
|
"rewards/rejected": -7.021159648895264, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8207320331285065, |
|
"grad_norm": 16.652659340140023, |
|
"learning_rate": 9.198670696444338e-08, |
|
"logits/chosen": -0.6166589260101318, |
|
"logits/rejected": -0.6604666709899902, |
|
"logps/chosen": -641.2590942382812, |
|
"logps/rejected": -832.0524291992188, |
|
"loss": 0.3808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.821630954742432, |
|
"rewards/margins": 1.7895194292068481, |
|
"rewards/rejected": -6.611149787902832, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8250066791343842, |
|
"grad_norm": 14.1280180622041, |
|
"learning_rate": 8.768874028992429e-08, |
|
"logits/chosen": -0.6036140322685242, |
|
"logits/rejected": -0.6334025859832764, |
|
"logps/chosen": -613.3704223632812, |
|
"logps/rejected": -792.6769409179688, |
|
"loss": 0.3289, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.494506359100342, |
|
"rewards/margins": 1.7201225757598877, |
|
"rewards/rejected": -6.21462869644165, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8292813251402619, |
|
"grad_norm": 13.810909832189019, |
|
"learning_rate": 8.348393248087287e-08, |
|
"logits/chosen": -0.5372692346572876, |
|
"logits/rejected": -0.5193148255348206, |
|
"logps/chosen": -559.5550537109375, |
|
"logps/rejected": -737.1176147460938, |
|
"loss": 0.3486, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.210740089416504, |
|
"rewards/margins": 1.7630614042282104, |
|
"rewards/rejected": -5.973801612854004, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 13.364037119659574, |
|
"learning_rate": 7.937323358440934e-08, |
|
"logits/chosen": -0.7003932595252991, |
|
"logits/rejected": -0.6555891633033752, |
|
"logps/chosen": -687.826171875, |
|
"logps/rejected": -845.4681396484375, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.774288654327393, |
|
"rewards/margins": 1.7430295944213867, |
|
"rewards/rejected": -6.517318248748779, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8378306171520171, |
|
"grad_norm": 14.21133162771402, |
|
"learning_rate": 7.535757238439938e-08, |
|
"logits/chosen": -0.7010968923568726, |
|
"logits/rejected": -0.7519139647483826, |
|
"logps/chosen": -618.968017578125, |
|
"logps/rejected": -890.8580322265625, |
|
"loss": 0.2995, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.3997673988342285, |
|
"rewards/margins": 2.6948697566986084, |
|
"rewards/rejected": -7.094637393951416, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 14.29704216155569, |
|
"learning_rate": 7.143785619160026e-08, |
|
"logits/chosen": -0.8667165637016296, |
|
"logits/rejected": -0.9397881031036377, |
|
"logps/chosen": -635.5562133789062, |
|
"logps/rejected": -869.5856323242188, |
|
"loss": 0.2775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.424742221832275, |
|
"rewards/margins": 2.1135940551757812, |
|
"rewards/rejected": -6.538336277008057, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8463799091637724, |
|
"grad_norm": 15.830532425162781, |
|
"learning_rate": 6.761497063866206e-08, |
|
"logits/chosen": -0.715203583240509, |
|
"logits/rejected": -0.7201322317123413, |
|
"logps/chosen": -675.7356567382812, |
|
"logps/rejected": -823.666748046875, |
|
"loss": 0.3961, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.689385890960693, |
|
"rewards/margins": 1.436130404472351, |
|
"rewards/rejected": -6.125515937805176, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.85065455516965, |
|
"grad_norm": 12.71855284005859, |
|
"learning_rate": 6.388977948002406e-08, |
|
"logits/chosen": -0.6863987445831299, |
|
"logits/rejected": -0.7026057839393616, |
|
"logps/chosen": -635.3793334960938, |
|
"logps/rejected": -810.3753051757812, |
|
"loss": 0.3249, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.71897554397583, |
|
"rewards/margins": 1.7087393999099731, |
|
"rewards/rejected": -6.427714824676514, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 14.663842598726813, |
|
"learning_rate": 6.026312439675551e-08, |
|
"logits/chosen": -0.6016858220100403, |
|
"logits/rejected": -0.6423814296722412, |
|
"logps/chosen": -533.045654296875, |
|
"logps/rejected": -688.97705078125, |
|
"loss": 0.343, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.7861201763153076, |
|
"rewards/margins": 1.5125634670257568, |
|
"rewards/rejected": -5.298683166503906, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"eval_logits/chosen": -0.5621978044509888, |
|
"eval_logits/rejected": -0.5776455998420715, |
|
"eval_logps/chosen": -698.124755859375, |
|
"eval_logps/rejected": -878.510498046875, |
|
"eval_loss": 0.3298446834087372, |
|
"eval_rewards/accuracies": 0.8951612710952759, |
|
"eval_rewards/chosen": -4.931028842926025, |
|
"eval_rewards/margins": 1.8655366897583008, |
|
"eval_rewards/rejected": -6.796565055847168, |
|
"eval_runtime": 148.1942, |
|
"eval_samples_per_second": 13.233, |
|
"eval_steps_per_second": 0.418, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8592038471814053, |
|
"grad_norm": 13.178173716498932, |
|
"learning_rate": 5.6735824806383945e-08, |
|
"logits/chosen": -0.8137510418891907, |
|
"logits/rejected": -0.8618326783180237, |
|
"logps/chosen": -762.1471557617188, |
|
"logps/rejected": -987.831787109375, |
|
"loss": 0.3117, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.3300957679748535, |
|
"rewards/margins": 2.290207862854004, |
|
"rewards/rejected": -7.620304107666016, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.863478493187283, |
|
"grad_norm": 15.015075721065077, |
|
"learning_rate": 5.3308677677753324e-08, |
|
"logits/chosen": -0.6007865071296692, |
|
"logits/rejected": -0.6010035872459412, |
|
"logps/chosen": -607.5897827148438, |
|
"logps/rejected": -783.7905883789062, |
|
"loss": 0.3751, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.329671382904053, |
|
"rewards/margins": 1.7175862789154053, |
|
"rewards/rejected": -6.047257423400879, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.8677531391931605, |
|
"grad_norm": 17.440815026489904, |
|
"learning_rate": 4.9982457350954576e-08, |
|
"logits/chosen": -0.6124709844589233, |
|
"logits/rejected": -0.5998551249504089, |
|
"logps/chosen": -768.2071533203125, |
|
"logps/rejected": -909.81982421875, |
|
"loss": 0.337, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.643802642822266, |
|
"rewards/margins": 1.5375871658325195, |
|
"rewards/rejected": -7.181390285491943, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.8720277851990382, |
|
"grad_norm": 13.117492290397454, |
|
"learning_rate": 4.675791536236856e-08, |
|
"logits/chosen": -0.6630779504776001, |
|
"logits/rejected": -0.7016023397445679, |
|
"logps/chosen": -576.6640014648438, |
|
"logps/rejected": -781.410888671875, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.391974449157715, |
|
"rewards/margins": 2.0928616523742676, |
|
"rewards/rejected": -6.484836101531982, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 16.38485979196371, |
|
"learning_rate": 4.3635780274861864e-08, |
|
"logits/chosen": -0.6497581005096436, |
|
"logits/rejected": -0.7152493596076965, |
|
"logps/chosen": -604.6185302734375, |
|
"logps/rejected": -769.5409545898438, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.433528900146484, |
|
"rewards/margins": 1.5042006969451904, |
|
"rewards/rejected": -5.937729835510254, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8805770772107935, |
|
"grad_norm": 19.27882004478496, |
|
"learning_rate": 4.0616757513173115e-08, |
|
"logits/chosen": -0.5923482775688171, |
|
"logits/rejected": -0.6674529910087585, |
|
"logps/chosen": -730.7793579101562, |
|
"logps/rejected": -982.1051025390625, |
|
"loss": 0.3504, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.950905799865723, |
|
"rewards/margins": 2.3350894451141357, |
|
"rewards/rejected": -7.2859954833984375, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.8848517232166712, |
|
"grad_norm": 12.70289041030948, |
|
"learning_rate": 3.7701529204526846e-08, |
|
"logits/chosen": -0.5751956105232239, |
|
"logits/rejected": -0.6102803349494934, |
|
"logps/chosen": -638.7919311523438, |
|
"logps/rejected": -788.8345336914062, |
|
"loss": 0.3242, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.849457740783691, |
|
"rewards/margins": 1.458791971206665, |
|
"rewards/rejected": -6.308249473571777, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.8891263692225487, |
|
"grad_norm": 16.266039867395605, |
|
"learning_rate": 3.4890754024512246e-08, |
|
"logits/chosen": -0.6625305414199829, |
|
"logits/rejected": -0.6873234510421753, |
|
"logps/chosen": -717.0994873046875, |
|
"logps/rejected": -884.3046875, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.226888179779053, |
|
"rewards/margins": 1.590306043624878, |
|
"rewards/rejected": -6.81719446182251, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 14.509559337608318, |
|
"learning_rate": 3.218506704825924e-08, |
|
"logits/chosen": -0.5810579061508179, |
|
"logits/rejected": -0.6108168363571167, |
|
"logps/chosen": -675.04150390625, |
|
"logps/rejected": -836.3502197265625, |
|
"loss": 0.3439, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.011352062225342, |
|
"rewards/margins": 1.6481198072433472, |
|
"rewards/rejected": -6.6594719886779785, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 17.634651177078286, |
|
"learning_rate": 2.958507960694784e-08, |
|
"logits/chosen": -0.5604880452156067, |
|
"logits/rejected": -0.5632505416870117, |
|
"logps/chosen": -718.5660400390625, |
|
"logps/rejected": -901.1178588867188, |
|
"loss": 0.3646, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.241810321807861, |
|
"rewards/margins": 1.8300410509109497, |
|
"rewards/rejected": -7.071850776672363, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9019503072401817, |
|
"grad_norm": 14.379481683528487, |
|
"learning_rate": 2.7091379149682682e-08, |
|
"logits/chosen": -0.7952392101287842, |
|
"logits/rejected": -0.8281516432762146, |
|
"logps/chosen": -715.3723754882812, |
|
"logps/rejected": -877.9434814453125, |
|
"loss": 0.3452, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.230836868286133, |
|
"rewards/margins": 1.5704870223999023, |
|
"rewards/rejected": -6.801323413848877, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9062249532460593, |
|
"grad_norm": 12.38379892339324, |
|
"learning_rate": 2.470452911076226e-08, |
|
"logits/chosen": -0.6212865114212036, |
|
"logits/rejected": -0.7052218317985535, |
|
"logps/chosen": -568.6085205078125, |
|
"logps/rejected": -806.6718139648438, |
|
"loss": 0.3225, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.2210798263549805, |
|
"rewards/margins": 2.2609493732452393, |
|
"rewards/rejected": -6.482028484344482, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9104995992519369, |
|
"grad_norm": 13.671352372902698, |
|
"learning_rate": 2.2425068782375378e-08, |
|
"logits/chosen": -0.708694577217102, |
|
"logits/rejected": -0.7406002283096313, |
|
"logps/chosen": -631.50537109375, |
|
"logps/rejected": -812.782958984375, |
|
"loss": 0.307, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.614077091217041, |
|
"rewards/margins": 1.823103427886963, |
|
"rewards/rejected": -6.4371795654296875, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9147742452578146, |
|
"grad_norm": 14.646295067927662, |
|
"learning_rate": 2.025351319275137e-08, |
|
"logits/chosen": -0.6601104736328125, |
|
"logits/rejected": -0.6835007667541504, |
|
"logps/chosen": -590.8656005859375, |
|
"logps/rejected": -773.0665283203125, |
|
"loss": 0.3485, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.47109317779541, |
|
"rewards/margins": 1.788784384727478, |
|
"rewards/rejected": -6.259877681732178, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 13.735401688183746, |
|
"learning_rate": 1.8190352989793322e-08, |
|
"logits/chosen": -0.6832427978515625, |
|
"logits/rejected": -0.7862576246261597, |
|
"logps/chosen": -725.4074096679688, |
|
"logps/rejected": -984.6351318359375, |
|
"loss": 0.3231, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.303395748138428, |
|
"rewards/margins": 2.5184988975524902, |
|
"rewards/rejected": -7.821893692016602, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9233235372695698, |
|
"grad_norm": 15.601926902374625, |
|
"learning_rate": 1.623605433021985e-08, |
|
"logits/chosen": -0.7613773345947266, |
|
"logits/rejected": -0.8268823623657227, |
|
"logps/chosen": -661.9906005859375, |
|
"logps/rejected": -919.4451904296875, |
|
"loss": 0.3372, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.652050018310547, |
|
"rewards/margins": 2.513514995574951, |
|
"rewards/rejected": -7.1655659675598145, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9275981832754475, |
|
"grad_norm": 16.26278938580055, |
|
"learning_rate": 1.4391058774239629e-08, |
|
"logits/chosen": -0.6215861439704895, |
|
"logits/rejected": -0.684840977191925, |
|
"logps/chosen": -776.4395751953125, |
|
"logps/rejected": -996.853271484375, |
|
"loss": 0.326, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.608438491821289, |
|
"rewards/margins": 1.9786306619644165, |
|
"rewards/rejected": -7.587069511413574, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9318728292813251, |
|
"grad_norm": 14.96580166748689, |
|
"learning_rate": 1.2655783185784252e-08, |
|
"logits/chosen": -0.5001079440116882, |
|
"logits/rejected": -0.5907378792762756, |
|
"logps/chosen": -636.527099609375, |
|
"logps/rejected": -864.0882568359375, |
|
"loss": 0.3191, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.651758670806885, |
|
"rewards/margins": 2.046351432800293, |
|
"rewards/rejected": -6.698110103607178, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9361474752872028, |
|
"grad_norm": 16.265287798595743, |
|
"learning_rate": 1.1030619638320804e-08, |
|
"logits/chosen": -0.7028571963310242, |
|
"logits/rejected": -0.7375434041023254, |
|
"logps/chosen": -659.0259399414062, |
|
"logps/rejected": -862.6585693359375, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.674835205078125, |
|
"rewards/margins": 1.9501476287841797, |
|
"rewards/rejected": -6.624982833862305, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 18.704093009678243, |
|
"learning_rate": 9.515935326265378e-09, |
|
"logits/chosen": -0.6694950461387634, |
|
"logits/rejected": -0.7062525749206543, |
|
"logps/chosen": -723.8511962890625, |
|
"logps/rejected": -943.264892578125, |
|
"loss": 0.3529, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.229786396026611, |
|
"rewards/margins": 2.252092123031616, |
|
"rewards/rejected": -7.481878757476807, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.944696767298958, |
|
"grad_norm": 14.334384768178085, |
|
"learning_rate": 8.11207248201834e-09, |
|
"logits/chosen": -0.5764753818511963, |
|
"logits/rejected": -0.5958765745162964, |
|
"logps/chosen": -652.0119018554688, |
|
"logps/rejected": -832.4238891601562, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.68829870223999, |
|
"rewards/margins": 1.8251862525939941, |
|
"rewards/rejected": -6.513484001159668, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9489714133048357, |
|
"grad_norm": 13.331601354010285, |
|
"learning_rate": 6.819348298638839e-09, |
|
"logits/chosen": -0.5833301544189453, |
|
"logits/rejected": -0.5820556879043579, |
|
"logps/chosen": -636.1702270507812, |
|
"logps/rejected": -775.4622192382812, |
|
"loss": 0.3313, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.623978137969971, |
|
"rewards/margins": 1.5225858688354492, |
|
"rewards/rejected": -6.14656400680542, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9532460593107134, |
|
"grad_norm": 14.100864285899743, |
|
"learning_rate": 5.638054858177643e-09, |
|
"logits/chosen": -0.6460739374160767, |
|
"logits/rejected": -0.704484760761261, |
|
"logps/chosen": -666.4550170898438, |
|
"logps/rejected": -879.8217163085938, |
|
"loss": 0.3125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.722194194793701, |
|
"rewards/margins": 2.0787832736968994, |
|
"rewards/rejected": -6.8009772300720215, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.957520705316591, |
|
"grad_norm": 14.340696291340606, |
|
"learning_rate": 4.568459065683205e-09, |
|
"logits/chosen": -0.6707419753074646, |
|
"logits/rejected": -0.6990107893943787, |
|
"logps/chosen": -594.0106201171875, |
|
"logps/rejected": -786.5427856445312, |
|
"loss": 0.2979, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.234139442443848, |
|
"rewards/margins": 1.8917232751846313, |
|
"rewards/rejected": -6.1258625984191895, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 16.87207011175805, |
|
"learning_rate": 3.6108025888958447e-09, |
|
"logits/chosen": -0.6689302921295166, |
|
"logits/rejected": -0.7221664786338806, |
|
"logps/chosen": -647.2908935546875, |
|
"logps/rejected": -820.8074340820312, |
|
"loss": 0.3808, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.4107489585876465, |
|
"rewards/margins": 1.711435317993164, |
|
"rewards/rejected": -6.1221842765808105, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9660699973283462, |
|
"grad_norm": 15.439317380628985, |
|
"learning_rate": 2.7653018036454256e-09, |
|
"logits/chosen": -0.7938471436500549, |
|
"logits/rejected": -0.8257592916488647, |
|
"logps/chosen": -685.37255859375, |
|
"logps/rejected": -856.272705078125, |
|
"loss": 0.376, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.837329864501953, |
|
"rewards/margins": 1.7641983032226562, |
|
"rewards/rejected": -6.601528167724609, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.9703446433342239, |
|
"grad_norm": 15.341084234652751, |
|
"learning_rate": 2.0321477449619096e-09, |
|
"logits/chosen": -0.6373786330223083, |
|
"logits/rejected": -0.6358063220977783, |
|
"logps/chosen": -671.7559814453125, |
|
"logps/rejected": -808.6800537109375, |
|
"loss": 0.3511, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.954955101013184, |
|
"rewards/margins": 1.4025709629058838, |
|
"rewards/rejected": -6.357525825500488, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 16.08851675396365, |
|
"learning_rate": 1.4115060639128818e-09, |
|
"logits/chosen": -0.7105420827865601, |
|
"logits/rejected": -0.7698283195495605, |
|
"logps/chosen": -771.7847900390625, |
|
"logps/rejected": -1010.12255859375, |
|
"loss": 0.3947, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.364959716796875, |
|
"rewards/margins": 2.1707944869995117, |
|
"rewards/rejected": -7.535754203796387, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9788939353459791, |
|
"grad_norm": 16.317073121348123, |
|
"learning_rate": 9.035169901754902e-10, |
|
"logits/chosen": -0.6789236664772034, |
|
"logits/rejected": -0.718908429145813, |
|
"logps/chosen": -675.6837158203125, |
|
"logps/rejected": -947.82958984375, |
|
"loss": 0.3487, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.9360833168029785, |
|
"rewards/margins": 2.77742600440979, |
|
"rewards/rejected": -7.713509559631348, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 14.913455884524495, |
|
"learning_rate": 5.082953003528456e-10, |
|
"logits/chosen": -0.7133156061172485, |
|
"logits/rejected": -0.7598533630371094, |
|
"logps/chosen": -662.4967651367188, |
|
"logps/rejected": -898.2874145507812, |
|
"loss": 0.3301, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.673389911651611, |
|
"rewards/margins": 2.1788995265960693, |
|
"rewards/rejected": -6.852289199829102, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9874432273577345, |
|
"grad_norm": 13.34444116932449, |
|
"learning_rate": 2.2593029204076574e-10, |
|
"logits/chosen": -0.7370655536651611, |
|
"logits/rejected": -0.7865728139877319, |
|
"logps/chosen": -603.37109375, |
|
"logps/rejected": -828.8916015625, |
|
"loss": 0.3555, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.3032660484313965, |
|
"rewards/margins": 2.1432766914367676, |
|
"rewards/rejected": -6.446542739868164, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.9917178733636121, |
|
"grad_norm": 14.986063948498115, |
|
"learning_rate": 5.648576365169244e-11, |
|
"logits/chosen": -0.7017238140106201, |
|
"logits/rejected": -0.7418109178543091, |
|
"logps/chosen": -690.6285400390625, |
|
"logps/rejected": -863.64208984375, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.160223960876465, |
|
"rewards/margins": 1.6970188617706299, |
|
"rewards/rejected": -6.857243061065674, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.9959925193694897, |
|
"grad_norm": 14.725841841963547, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.540886402130127, |
|
"logits/rejected": -0.5910319089889526, |
|
"logps/chosen": -539.7564697265625, |
|
"logps/rejected": -689.2047119140625, |
|
"loss": 0.3329, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.104672431945801, |
|
"rewards/margins": 1.427870750427246, |
|
"rewards/rejected": -5.532542705535889, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.9959925193694897, |
|
"step": 233, |
|
"total_flos": 0.0, |
|
"train_loss": 0.45488236134655996, |
|
"train_runtime": 10577.9564, |
|
"train_samples_per_second": 5.66, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 233, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|