|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.6577353477478027, |
|
"logits/rejected": -2.043900489807129, |
|
"logps/chosen": -505.98724365234375, |
|
"logps/rejected": -319.40179443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.477527141571045, |
|
"logits/rejected": -2.134815216064453, |
|
"logps/chosen": -285.37506103515625, |
|
"logps/rejected": -191.59552001953125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.2777777910232544, |
|
"rewards/chosen": -1.798523953766562e-05, |
|
"rewards/margins": -2.5926061425707303e-05, |
|
"rewards/rejected": 7.940820069052279e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.360628843307495, |
|
"logits/rejected": -2.1267056465148926, |
|
"logps/chosen": -271.4191589355469, |
|
"logps/rejected": -208.81991577148438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00010290103818988428, |
|
"rewards/margins": 2.105976818711497e-05, |
|
"rewards/rejected": 8.184127364074811e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.278747797012329, |
|
"logits/rejected": -2.2493417263031006, |
|
"logps/chosen": -269.8002624511719, |
|
"logps/rejected": -288.9651794433594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00028741464484483004, |
|
"rewards/margins": 0.00019061131752096117, |
|
"rewards/rejected": 9.68033418757841e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.5087287425994873, |
|
"logits/rejected": -2.340841293334961, |
|
"logps/chosen": -210.5767059326172, |
|
"logps/rejected": -181.60897827148438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0001595711219124496, |
|
"rewards/margins": 0.00010818429291248322, |
|
"rewards/rejected": 5.138682899996638e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.362971067428589, |
|
"logits/rejected": -2.338986873626709, |
|
"logps/chosen": -195.54049682617188, |
|
"logps/rejected": -211.3101806640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0001843376230681315, |
|
"rewards/margins": 0.00030581915052607656, |
|
"rewards/rejected": -0.0001214815056300722, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.495109796524048, |
|
"logits/rejected": -2.304320812225342, |
|
"logps/chosen": -244.63357543945312, |
|
"logps/rejected": -277.849853515625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.000983591889962554, |
|
"rewards/margins": 1.5824800357222557e-05, |
|
"rewards/rejected": -0.0009994168067350984, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.2505688667297363, |
|
"logits/rejected": -2.2676665782928467, |
|
"logps/chosen": -229.9136199951172, |
|
"logps/rejected": -216.98001098632812, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0010834664572030306, |
|
"rewards/margins": 0.0009649285930208862, |
|
"rewards/rejected": -0.0020483951084315777, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.4005239009857178, |
|
"logits/rejected": -2.3944191932678223, |
|
"logps/chosen": -266.11859130859375, |
|
"logps/rejected": -262.9701232910156, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0014705440262332559, |
|
"rewards/margins": 0.002042059786617756, |
|
"rewards/rejected": -0.00351260369643569, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.3290226459503174, |
|
"logits/rejected": -1.9915828704833984, |
|
"logps/chosen": -268.29278564453125, |
|
"logps/rejected": -180.13323974609375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002903540385887027, |
|
"rewards/margins": 0.002247781725600362, |
|
"rewards/rejected": -0.005151322111487389, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.3091585636138916, |
|
"logits/rejected": -2.312863826751709, |
|
"logps/chosen": -275.3695373535156, |
|
"logps/rejected": -251.3533477783203, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.003613454522565007, |
|
"rewards/margins": 0.0010787051869556308, |
|
"rewards/rejected": -0.004692160524427891, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.258510112762451, |
|
"logits/rejected": -2.1448404788970947, |
|
"logps/chosen": -211.90158081054688, |
|
"logps/rejected": -180.40475463867188, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0001777430734364316, |
|
"rewards/margins": 0.0037381600122898817, |
|
"rewards/rejected": -0.003560416866093874, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.158743381500244, |
|
"logits/rejected": -2.169588088989258, |
|
"logps/chosen": -238.22067260742188, |
|
"logps/rejected": -270.46417236328125, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0018882494186982512, |
|
"rewards/margins": 0.005054115317761898, |
|
"rewards/rejected": -0.006942364387214184, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -2.4432594776153564, |
|
"logits/rejected": -2.315918445587158, |
|
"logps/chosen": -237.79501342773438, |
|
"logps/rejected": -196.10853576660156, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0036071953363716602, |
|
"rewards/margins": 0.005986797157675028, |
|
"rewards/rejected": -0.009593991562724113, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -2.3266425132751465, |
|
"logits/rejected": -2.025289535522461, |
|
"logps/chosen": -207.1509552001953, |
|
"logps/rejected": -187.79754638671875, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.015933997929096222, |
|
"rewards/margins": 0.004653518553823233, |
|
"rewards/rejected": -0.02058752067387104, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -2.303725481033325, |
|
"logits/rejected": -2.190458297729492, |
|
"logps/chosen": -239.7921600341797, |
|
"logps/rejected": -213.1902618408203, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01170186698436737, |
|
"rewards/margins": 0.013611750677227974, |
|
"rewards/rejected": -0.025313619524240494, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -2.1805403232574463, |
|
"logits/rejected": -2.2514309883117676, |
|
"logps/chosen": -323.44732666015625, |
|
"logps/rejected": -342.3636779785156, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07079382240772247, |
|
"rewards/margins": 0.012775696814060211, |
|
"rewards/rejected": -0.08356951922178268, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -2.377260446548462, |
|
"logits/rejected": -1.992498755455017, |
|
"logps/chosen": -361.740966796875, |
|
"logps/rejected": -316.5578308105469, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.09483315795660019, |
|
"rewards/margins": -0.003298636060208082, |
|
"rewards/rejected": -0.09153451770544052, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -2.125253677368164, |
|
"logits/rejected": -2.1080162525177, |
|
"logps/chosen": -345.4566955566406, |
|
"logps/rejected": -362.8345642089844, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.11889471858739853, |
|
"rewards/margins": 0.017375323921442032, |
|
"rewards/rejected": -0.13627007603645325, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.872900366783142, |
|
"logits/rejected": -1.8023853302001953, |
|
"logps/chosen": -607.0579223632812, |
|
"logps/rejected": -631.1737060546875, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.37075790762901306, |
|
"rewards/margins": 0.051843322813510895, |
|
"rewards/rejected": -0.42260122299194336, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -2.2707138061523438, |
|
"logits/rejected": -2.1255440711975098, |
|
"logps/chosen": -350.2916564941406, |
|
"logps/rejected": -372.66778564453125, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09592956304550171, |
|
"rewards/margins": 0.014601891860365868, |
|
"rewards/rejected": -0.11053146421909332, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -2.4437155723571777, |
|
"logits/rejected": -2.0841479301452637, |
|
"logps/chosen": -348.2846984863281, |
|
"logps/rejected": -332.79693603515625, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07477747648954391, |
|
"rewards/margins": 0.06754375249147415, |
|
"rewards/rejected": -0.14232121407985687, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -2.2131965160369873, |
|
"logits/rejected": -1.79428231716156, |
|
"logps/chosen": -453.1966247558594, |
|
"logps/rejected": -504.7650451660156, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2166297435760498, |
|
"rewards/margins": 0.09042102098464966, |
|
"rewards/rejected": -0.30705076456069946, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -2.114473342895508, |
|
"logits/rejected": -2.1522789001464844, |
|
"logps/chosen": -447.54510498046875, |
|
"logps/rejected": -531.5616455078125, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.24870488047599792, |
|
"rewards/margins": 0.05732503533363342, |
|
"rewards/rejected": -0.30602994561195374, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -2.0410103797912598, |
|
"logits/rejected": -1.8702392578125, |
|
"logps/chosen": -417.52557373046875, |
|
"logps/rejected": -418.7784118652344, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.21090665459632874, |
|
"rewards/margins": 0.014016765169799328, |
|
"rewards/rejected": -0.22492341697216034, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.9370673894882202, |
|
"logits/rejected": -1.845969796180725, |
|
"logps/chosen": -467.31817626953125, |
|
"logps/rejected": -516.6582641601562, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2973518967628479, |
|
"rewards/margins": 0.04663931205868721, |
|
"rewards/rejected": -0.34399116039276123, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.7340936660766602, |
|
"logits/rejected": -1.4695367813110352, |
|
"logps/chosen": -1242.895751953125, |
|
"logps/rejected": -1449.67138671875, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9694870114326477, |
|
"rewards/margins": 0.2230859100818634, |
|
"rewards/rejected": -1.192572832107544, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.9577367305755615, |
|
"logits/rejected": -1.7384374141693115, |
|
"logps/chosen": -651.7098999023438, |
|
"logps/rejected": -757.5595703125, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.40336376428604126, |
|
"rewards/margins": 0.14299169182777405, |
|
"rewards/rejected": -0.5463554263114929, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.6682405471801758, |
|
"logits/rejected": -1.7225072383880615, |
|
"logps/chosen": -636.3197021484375, |
|
"logps/rejected": -823.1611328125, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4699520468711853, |
|
"rewards/margins": 0.15158866345882416, |
|
"rewards/rejected": -0.6215407252311707, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.7123403549194336, |
|
"logits/rejected": -1.4536840915679932, |
|
"logps/chosen": -984.7986450195312, |
|
"logps/rejected": -1116.86328125, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7080470323562622, |
|
"rewards/margins": 0.18753795325756073, |
|
"rewards/rejected": -0.8955849409103394, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.5288642644882202, |
|
"logits/rejected": -1.4273738861083984, |
|
"logps/chosen": -1024.952880859375, |
|
"logps/rejected": -1212.50439453125, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.734005331993103, |
|
"rewards/margins": 0.26781877875328064, |
|
"rewards/rejected": -1.0018240213394165, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.244638204574585, |
|
"logits/rejected": -1.2806625366210938, |
|
"logps/chosen": -906.6008911132812, |
|
"logps/rejected": -1112.8660888671875, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.713959813117981, |
|
"rewards/margins": 0.16486592590808868, |
|
"rewards/rejected": -0.8788257837295532, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.2582708597183228, |
|
"logits/rejected": -0.9609068632125854, |
|
"logps/chosen": -789.2811889648438, |
|
"logps/rejected": -924.0436401367188, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.53520667552948, |
|
"rewards/margins": 0.21356996893882751, |
|
"rewards/rejected": -0.7487767338752747, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.5856783390045166, |
|
"logits/rejected": -1.6001255512237549, |
|
"logps/chosen": -586.2677001953125, |
|
"logps/rejected": -678.0260009765625, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.31349319219589233, |
|
"rewards/margins": 0.13062533736228943, |
|
"rewards/rejected": -0.44411858916282654, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.710599660873413, |
|
"logits/rejected": -1.4391021728515625, |
|
"logps/chosen": -415.0328674316406, |
|
"logps/rejected": -529.9607543945312, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2211925983428955, |
|
"rewards/margins": 0.15037958323955536, |
|
"rewards/rejected": -0.3715721666812897, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.8084720373153687, |
|
"logits/rejected": -1.506037712097168, |
|
"logps/chosen": -570.231201171875, |
|
"logps/rejected": -703.8088989257812, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.31438136100769043, |
|
"rewards/margins": 0.16334742307662964, |
|
"rewards/rejected": -0.4777289032936096, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.4908154010772705, |
|
"logits/rejected": -1.2486298084259033, |
|
"logps/chosen": -1008.43310546875, |
|
"logps/rejected": -1258.6966552734375, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7509051561355591, |
|
"rewards/margins": 0.28672417998313904, |
|
"rewards/rejected": -1.0376293659210205, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.114203929901123, |
|
"logits/rejected": -1.0544617176055908, |
|
"logps/chosen": -935.9404296875, |
|
"logps/rejected": -1014.9347534179688, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7481463551521301, |
|
"rewards/margins": 0.08286388218402863, |
|
"rewards/rejected": -0.8310102224349976, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -0.82923823595047, |
|
"logits/rejected": -0.7714365124702454, |
|
"logps/chosen": -1037.5648193359375, |
|
"logps/rejected": -1317.840087890625, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8485895991325378, |
|
"rewards/margins": 0.2712119519710541, |
|
"rewards/rejected": -1.1198015213012695, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -0.4438857138156891, |
|
"logits/rejected": -0.24459032714366913, |
|
"logps/chosen": -1160.9703369140625, |
|
"logps/rejected": -1448.4993896484375, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9693711996078491, |
|
"rewards/margins": 0.3293563425540924, |
|
"rewards/rejected": -1.2987276315689087, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.0683261156082153, |
|
"logits/rejected": -1.0331823825836182, |
|
"logps/chosen": -835.9192504882812, |
|
"logps/rejected": -1117.007568359375, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.6222294569015503, |
|
"rewards/margins": 0.26769739389419556, |
|
"rewards/rejected": -0.8899267911911011, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.524524450302124, |
|
"logits/rejected": -1.3127562999725342, |
|
"logps/chosen": -700.7545166015625, |
|
"logps/rejected": -866.1266479492188, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4409456253051758, |
|
"rewards/margins": 0.15933458507061005, |
|
"rewards/rejected": -0.600280225276947, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.8277499675750732, |
|
"logits/rejected": -1.6153600215911865, |
|
"logps/chosen": -648.5438842773438, |
|
"logps/rejected": -690.0060424804688, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3811865448951721, |
|
"rewards/margins": 0.06636995077133179, |
|
"rewards/rejected": -0.4475564956665039, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.4686410427093506, |
|
"logits/rejected": -1.4586213827133179, |
|
"logps/chosen": -1219.008544921875, |
|
"logps/rejected": -1602.6322021484375, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9192007184028625, |
|
"rewards/margins": 0.37095293402671814, |
|
"rewards/rejected": -1.2901536226272583, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.156723976135254, |
|
"logits/rejected": -0.8702109456062317, |
|
"logps/chosen": -1515.7261962890625, |
|
"logps/rejected": -1630.477783203125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.331788420677185, |
|
"rewards/margins": 0.15234079957008362, |
|
"rewards/rejected": -1.4841291904449463, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.4883257150650024, |
|
"logits/rejected": -1.2767736911773682, |
|
"logps/chosen": -1196.55517578125, |
|
"logps/rejected": -1548.430419921875, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9334648847579956, |
|
"rewards/margins": 0.3865690231323242, |
|
"rewards/rejected": -1.3200337886810303, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.688001275062561, |
|
"logits/rejected": -1.5989247560501099, |
|
"logps/chosen": -713.7869873046875, |
|
"logps/rejected": -806.5700073242188, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.4852449297904968, |
|
"rewards/margins": 0.12311458587646484, |
|
"rewards/rejected": -0.6083595752716064, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.6326487064361572, |
|
"logits/rejected": -1.5402270555496216, |
|
"logps/chosen": -553.8263549804688, |
|
"logps/rejected": -636.650390625, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3886161148548126, |
|
"rewards/margins": 0.10035456717014313, |
|
"rewards/rejected": -0.48897066712379456, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.2314743995666504, |
|
"logits/rejected": -1.1016968488693237, |
|
"logps/chosen": -1220.119140625, |
|
"logps/rejected": -1464.5045166015625, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0543556213378906, |
|
"rewards/margins": 0.22365888953208923, |
|
"rewards/rejected": -1.2780145406723022, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.1137750148773193, |
|
"logits/rejected": -0.8115717768669128, |
|
"logps/chosen": -1465.813720703125, |
|
"logps/rejected": -1871.6728515625, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2194162607192993, |
|
"rewards/margins": 0.45649608969688416, |
|
"rewards/rejected": -1.6759124994277954, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.160712480545044, |
|
"logits/rejected": -1.0044102668762207, |
|
"logps/chosen": -1403.710693359375, |
|
"logps/rejected": -1854.1767578125, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2094306945800781, |
|
"rewards/margins": 0.44932323694229126, |
|
"rewards/rejected": -1.658753752708435, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.4455738067626953, |
|
"logits/rejected": -1.3064179420471191, |
|
"logps/chosen": -1048.9417724609375, |
|
"logps/rejected": -1300.092529296875, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8072590827941895, |
|
"rewards/margins": 0.2859098017215729, |
|
"rewards/rejected": -1.0931689739227295, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.168351650238037, |
|
"logits/rejected": -1.0151941776275635, |
|
"logps/chosen": -1202.1624755859375, |
|
"logps/rejected": -1375.6861572265625, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.973349928855896, |
|
"rewards/margins": 0.1988798826932907, |
|
"rewards/rejected": -1.1722297668457031, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.2055370807647705, |
|
"logits/rejected": -1.1897690296173096, |
|
"logps/chosen": -1302.2347412109375, |
|
"logps/rejected": -1396.9376220703125, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.0556827783584595, |
|
"rewards/margins": 0.08378318697214127, |
|
"rewards/rejected": -1.1394660472869873, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.2467204332351685, |
|
"logits/rejected": -0.9966660737991333, |
|
"logps/chosen": -1237.6484375, |
|
"logps/rejected": -1357.805908203125, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.9747149348258972, |
|
"rewards/margins": 0.1926373690366745, |
|
"rewards/rejected": -1.167352318763733, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.0729809999465942, |
|
"logits/rejected": -0.880784809589386, |
|
"logps/chosen": -1176.840576171875, |
|
"logps/rejected": -1703.155029296875, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9750388860702515, |
|
"rewards/margins": 0.547395646572113, |
|
"rewards/rejected": -1.5224344730377197, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.343149185180664, |
|
"logits/rejected": -1.3310397863388062, |
|
"logps/chosen": -1393.66748046875, |
|
"logps/rejected": -1949.219482421875, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0957911014556885, |
|
"rewards/margins": 0.5795475244522095, |
|
"rewards/rejected": -1.6753385066986084, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.2191109657287598, |
|
"logits/rejected": -1.1489986181259155, |
|
"logps/chosen": -1165.780517578125, |
|
"logps/rejected": -1509.78173828125, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9111903309822083, |
|
"rewards/margins": 0.37165799736976624, |
|
"rewards/rejected": -1.2828481197357178, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.2665627002716064, |
|
"logits/rejected": -1.0979241132736206, |
|
"logps/chosen": -1186.7427978515625, |
|
"logps/rejected": -1492.552001953125, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.9691814184188843, |
|
"rewards/margins": 0.35359352827072144, |
|
"rewards/rejected": -1.3227750062942505, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.089277982711792, |
|
"logits/rejected": -0.9281560778617859, |
|
"logps/chosen": -1328.541015625, |
|
"logps/rejected": -1635.415771484375, |
|
"loss": 0.5736, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0775573253631592, |
|
"rewards/margins": 0.35047078132629395, |
|
"rewards/rejected": -1.4280281066894531, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.0954294204711914, |
|
"logits/rejected": -0.9223726391792297, |
|
"logps/chosen": -1286.0498046875, |
|
"logps/rejected": -1733.7152099609375, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0697466135025024, |
|
"rewards/margins": 0.43960708379745483, |
|
"rewards/rejected": -1.5093533992767334, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.2414100170135498, |
|
"logits/rejected": -1.0788267850875854, |
|
"logps/chosen": -808.4361572265625, |
|
"logps/rejected": -1251.864990234375, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.566831648349762, |
|
"rewards/margins": 0.4803600311279297, |
|
"rewards/rejected": -1.0471916198730469, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.262020230293274, |
|
"logits/rejected": -1.148590087890625, |
|
"logps/chosen": -931.0750122070312, |
|
"logps/rejected": -1080.2626953125, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6786974668502808, |
|
"rewards/margins": 0.17992933094501495, |
|
"rewards/rejected": -0.8586267232894897, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.380244255065918, |
|
"logits/rejected": -1.1846643686294556, |
|
"logps/chosen": -1102.616943359375, |
|
"logps/rejected": -1461.800048828125, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.832280158996582, |
|
"rewards/margins": 0.3882806599140167, |
|
"rewards/rejected": -1.2205607891082764, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.1531554460525513, |
|
"logits/rejected": -0.73602694272995, |
|
"logps/chosen": -1035.087158203125, |
|
"logps/rejected": -1619.76171875, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7359131574630737, |
|
"rewards/margins": 0.6530500650405884, |
|
"rewards/rejected": -1.388963222503662, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.1167099475860596, |
|
"logits/rejected": -0.873482346534729, |
|
"logps/chosen": -1361.4727783203125, |
|
"logps/rejected": -1630.313232421875, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0866434574127197, |
|
"rewards/margins": 0.33569416403770447, |
|
"rewards/rejected": -1.4223374128341675, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -0.7387585639953613, |
|
"logits/rejected": -0.3248792886734009, |
|
"logps/chosen": -1398.619384765625, |
|
"logps/rejected": -2202.85498046875, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1486316919326782, |
|
"rewards/margins": 0.7962583303451538, |
|
"rewards/rejected": -1.944890022277832, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -0.6219555139541626, |
|
"logits/rejected": -0.4327964782714844, |
|
"logps/chosen": -1998.990234375, |
|
"logps/rejected": -2370.507080078125, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.755999207496643, |
|
"rewards/margins": 0.406169593334198, |
|
"rewards/rejected": -2.1621687412261963, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -0.9830737113952637, |
|
"logits/rejected": -0.8169624209403992, |
|
"logps/chosen": -1028.17578125, |
|
"logps/rejected": -1266.951904296875, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8314197659492493, |
|
"rewards/margins": 0.2545499801635742, |
|
"rewards/rejected": -1.0859696865081787, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.0446968078613281, |
|
"logits/rejected": -0.7680097818374634, |
|
"logps/chosen": -963.1312255859375, |
|
"logps/rejected": -1276.6685791015625, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7486265301704407, |
|
"rewards/margins": 0.34474682807922363, |
|
"rewards/rejected": -1.0933732986450195, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -0.9387643933296204, |
|
"logits/rejected": -0.8350385427474976, |
|
"logps/chosen": -1224.355712890625, |
|
"logps/rejected": -1547.6513671875, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9411084055900574, |
|
"rewards/margins": 0.34462398290634155, |
|
"rewards/rejected": -1.285732388496399, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -0.7177497148513794, |
|
"logits/rejected": -0.5354570746421814, |
|
"logps/chosen": -1501.9898681640625, |
|
"logps/rejected": -2006.5120849609375, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.279309630393982, |
|
"rewards/margins": 0.5385541319847107, |
|
"rewards/rejected": -1.8178638219833374, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -0.14772020280361176, |
|
"logits/rejected": 0.09259579330682755, |
|
"logps/chosen": -1891.1800537109375, |
|
"logps/rejected": -2354.27734375, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.666395902633667, |
|
"rewards/margins": 0.5180760025978088, |
|
"rewards/rejected": -2.184471845626831, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -0.7175928354263306, |
|
"logits/rejected": -0.47789135575294495, |
|
"logps/chosen": -1339.34765625, |
|
"logps/rejected": -1721.939453125, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.0828118324279785, |
|
"rewards/margins": 0.42446571588516235, |
|
"rewards/rejected": -1.5072776079177856, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.1001973152160645, |
|
"logits/rejected": -0.7717048525810242, |
|
"logps/chosen": -975.1065673828125, |
|
"logps/rejected": -1379.069580078125, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7290834188461304, |
|
"rewards/margins": 0.4491938054561615, |
|
"rewards/rejected": -1.1782772541046143, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -0.9087737202644348, |
|
"logits/rejected": -0.9751186370849609, |
|
"logps/chosen": -962.6619873046875, |
|
"logps/rejected": -1202.0980224609375, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7626216411590576, |
|
"rewards/margins": 0.23821432888507843, |
|
"rewards/rejected": -1.0008360147476196, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -0.6515249013900757, |
|
"logits/rejected": -0.5174766778945923, |
|
"logps/chosen": -1231.8673095703125, |
|
"logps/rejected": -1379.020263671875, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.020787000656128, |
|
"rewards/margins": 0.1627589762210846, |
|
"rewards/rejected": -1.1835458278656006, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.1023533344268799, |
|
"logits/rejected": -0.8313691020011902, |
|
"logps/chosen": -1354.640380859375, |
|
"logps/rejected": -1333.6419677734375, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0642707347869873, |
|
"rewards/margins": 0.0644897073507309, |
|
"rewards/rejected": -1.1287604570388794, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -0.8817610740661621, |
|
"logits/rejected": -0.6308177709579468, |
|
"logps/chosen": -1051.3311767578125, |
|
"logps/rejected": -1573.681396484375, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7915970087051392, |
|
"rewards/margins": 0.5443645715713501, |
|
"rewards/rejected": -1.3359615802764893, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.0322520732879639, |
|
"logits/rejected": -0.9708759188652039, |
|
"logps/chosen": -1105.9547119140625, |
|
"logps/rejected": -1240.3804931640625, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8885539174079895, |
|
"rewards/margins": 0.14831490814685822, |
|
"rewards/rejected": -1.0368688106536865, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -0.8422597646713257, |
|
"logits/rejected": -0.9403928518295288, |
|
"logps/chosen": -1141.7333984375, |
|
"logps/rejected": -1371.9580078125, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8957103490829468, |
|
"rewards/margins": 0.22806143760681152, |
|
"rewards/rejected": -1.1237719058990479, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -0.6579400300979614, |
|
"logits/rejected": -0.38535481691360474, |
|
"logps/chosen": -1287.460205078125, |
|
"logps/rejected": -1504.084228515625, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.095157265663147, |
|
"rewards/margins": 0.25700071454048157, |
|
"rewards/rejected": -1.3521578311920166, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -0.7758182287216187, |
|
"logits/rejected": -0.67876136302948, |
|
"logps/chosen": -1263.397216796875, |
|
"logps/rejected": -1797.307373046875, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.020475149154663, |
|
"rewards/margins": 0.5204795002937317, |
|
"rewards/rejected": -1.540954828262329, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.0213903188705444, |
|
"logits/rejected": -0.8918190002441406, |
|
"logps/chosen": -1065.27734375, |
|
"logps/rejected": -1240.601806640625, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8754297494888306, |
|
"rewards/margins": 0.1893172711133957, |
|
"rewards/rejected": -1.0647470951080322, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.0888749361038208, |
|
"logits/rejected": -0.9006432294845581, |
|
"logps/chosen": -1159.646728515625, |
|
"logps/rejected": -1520.9205322265625, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.891791045665741, |
|
"rewards/margins": 0.37499967217445374, |
|
"rewards/rejected": -1.2667908668518066, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -0.7614107131958008, |
|
"logits/rejected": -0.4498376250267029, |
|
"logps/chosen": -1577.4564208984375, |
|
"logps/rejected": -2064.27587890625, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2812590599060059, |
|
"rewards/margins": 0.5300472974777222, |
|
"rewards/rejected": -1.811306357383728, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.0388845205307007, |
|
"logits/rejected": -0.6018816232681274, |
|
"logps/chosen": -1428.4647216796875, |
|
"logps/rejected": -2024.8896484375, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1658397912979126, |
|
"rewards/margins": 0.6608397364616394, |
|
"rewards/rejected": -1.8266795873641968, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -0.6871947050094604, |
|
"logits/rejected": -0.6853007674217224, |
|
"logps/chosen": -1243.111572265625, |
|
"logps/rejected": -1829.707275390625, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0525375604629517, |
|
"rewards/margins": 0.5433839559555054, |
|
"rewards/rejected": -1.595921277999878, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -0.8195822834968567, |
|
"logits/rejected": -0.6704593896865845, |
|
"logps/chosen": -1643.8544921875, |
|
"logps/rejected": -2129.94189453125, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3954288959503174, |
|
"rewards/margins": 0.46880459785461426, |
|
"rewards/rejected": -1.8642336130142212, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.0698474645614624, |
|
"logits/rejected": -0.9074158668518066, |
|
"logps/chosen": -1074.9571533203125, |
|
"logps/rejected": -1626.334228515625, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8372681736946106, |
|
"rewards/margins": 0.602739691734314, |
|
"rewards/rejected": -1.4400079250335693, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.0306494235992432, |
|
"logits/rejected": -0.7876461148262024, |
|
"logps/chosen": -1247.2880859375, |
|
"logps/rejected": -1743.0517578125, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0142000913619995, |
|
"rewards/margins": 0.5253391265869141, |
|
"rewards/rejected": -1.5395392179489136, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -0.7109737396240234, |
|
"logits/rejected": -0.4215407371520996, |
|
"logps/chosen": -1115.1761474609375, |
|
"logps/rejected": -1571.439697265625, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8916100263595581, |
|
"rewards/margins": 0.515656590461731, |
|
"rewards/rejected": -1.407266616821289, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -0.9104745984077454, |
|
"logits/rejected": -0.6836374402046204, |
|
"logps/chosen": -1387.066650390625, |
|
"logps/rejected": -2122.8583984375, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1135450601577759, |
|
"rewards/margins": 0.7385995388031006, |
|
"rewards/rejected": -1.8521445989608765, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -0.7981353998184204, |
|
"logits/rejected": -0.47408953309059143, |
|
"logps/chosen": -1591.907958984375, |
|
"logps/rejected": -2268.22802734375, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3141790628433228, |
|
"rewards/margins": 0.6781451106071472, |
|
"rewards/rejected": -1.9923241138458252, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -0.7773085832595825, |
|
"logits/rejected": -0.8014146089553833, |
|
"logps/chosen": -1200.51904296875, |
|
"logps/rejected": -1684.487060546875, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.948921799659729, |
|
"rewards/margins": 0.48516377806663513, |
|
"rewards/rejected": -1.434085488319397, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -0.7013689279556274, |
|
"logits/rejected": -0.48669877648353577, |
|
"logps/chosen": -1563.8013916015625, |
|
"logps/rejected": -1946.0748291015625, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3094342947006226, |
|
"rewards/margins": 0.41629427671432495, |
|
"rewards/rejected": -1.7257286310195923, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -0.8420946002006531, |
|
"logits/rejected": -0.7498366832733154, |
|
"logps/chosen": -1459.593994140625, |
|
"logps/rejected": -1710.6732177734375, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2246272563934326, |
|
"rewards/margins": 0.26000121235847473, |
|
"rewards/rejected": -1.484628438949585, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -0.6557348370552063, |
|
"logits/rejected": -0.6690261363983154, |
|
"logps/chosen": -1101.353759765625, |
|
"logps/rejected": -1434.849365234375, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.974203884601593, |
|
"rewards/margins": 0.33285054564476013, |
|
"rewards/rejected": -1.3070546388626099, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.0794492959976196, |
|
"logits/rejected": -0.7161605954170227, |
|
"logps/chosen": -1141.7310791015625, |
|
"logps/rejected": -1709.1611328125, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9028726816177368, |
|
"rewards/margins": 0.6150614619255066, |
|
"rewards/rejected": -1.5179340839385986, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -0.6755369305610657, |
|
"logits/rejected": -0.619501531124115, |
|
"logps/chosen": -1471.1715087890625, |
|
"logps/rejected": -2048.2431640625, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2707023620605469, |
|
"rewards/margins": 0.5611073970794678, |
|
"rewards/rejected": -1.8318097591400146, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -0.8728944659233093, |
|
"logits/rejected": -1.02140212059021, |
|
"logps/chosen": -1593.0389404296875, |
|
"logps/rejected": -1743.603759765625, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.3282172679901123, |
|
"rewards/margins": 0.18238051235675812, |
|
"rewards/rejected": -1.510597825050354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.2148396968841553, |
|
"logits/rejected": -0.9437984228134155, |
|
"logps/chosen": -1290.468994140625, |
|
"logps/rejected": -1955.718017578125, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0306814908981323, |
|
"rewards/margins": 0.7142370939254761, |
|
"rewards/rejected": -1.7449188232421875, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -0.9041656255722046, |
|
"logits/rejected": -0.7784754633903503, |
|
"logps/chosen": -1627.837890625, |
|
"logps/rejected": -2467.47314453125, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3451955318450928, |
|
"rewards/margins": 0.8315626978874207, |
|
"rewards/rejected": -2.176758289337158, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -0.508884608745575, |
|
"logits/rejected": -0.4718368649482727, |
|
"logps/chosen": -1368.59765625, |
|
"logps/rejected": -2104.90625, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1711418628692627, |
|
"rewards/margins": 0.7298630475997925, |
|
"rewards/rejected": -1.9010050296783447, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -0.5781607031822205, |
|
"logits/rejected": -0.6443449258804321, |
|
"logps/chosen": -1434.3941650390625, |
|
"logps/rejected": -2030.039306640625, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2758595943450928, |
|
"rewards/margins": 0.5422929525375366, |
|
"rewards/rejected": -1.818152666091919, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -0.8322515487670898, |
|
"logits/rejected": -0.5321582555770874, |
|
"logps/chosen": -1379.1712646484375, |
|
"logps/rejected": -1931.9146728515625, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1463197469711304, |
|
"rewards/margins": 0.5763110518455505, |
|
"rewards/rejected": -1.7226308584213257, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -0.9420675039291382, |
|
"logits/rejected": -0.7674391269683838, |
|
"logps/chosen": -1645.792236328125, |
|
"logps/rejected": -2230.914306640625, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3653483390808105, |
|
"rewards/margins": 0.5703693628311157, |
|
"rewards/rejected": -1.9357175827026367, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -0.8753671646118164, |
|
"logits/rejected": -0.8147989511489868, |
|
"logps/chosen": -1408.5784912109375, |
|
"logps/rejected": -2042.339599609375, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.158811092376709, |
|
"rewards/margins": 0.6254128217697144, |
|
"rewards/rejected": -1.7842239141464233, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -0.6400734186172485, |
|
"logits/rejected": -0.46232056617736816, |
|
"logps/chosen": -1486.711669921875, |
|
"logps/rejected": -1575.9361572265625, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -1.3110835552215576, |
|
"rewards/margins": 0.12779514491558075, |
|
"rewards/rejected": -1.4388787746429443, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -0.616904616355896, |
|
"logits/rejected": -0.3994078040122986, |
|
"logps/chosen": -1475.5816650390625, |
|
"logps/rejected": -2202.4833984375, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2749942541122437, |
|
"rewards/margins": 0.7362874746322632, |
|
"rewards/rejected": -2.011281728744507, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -0.8570725321769714, |
|
"logits/rejected": -0.636644184589386, |
|
"logps/chosen": -1252.1573486328125, |
|
"logps/rejected": -1564.4869384765625, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.0256534814834595, |
|
"rewards/margins": 0.33503228425979614, |
|
"rewards/rejected": -1.3606857061386108, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -0.7760205268859863, |
|
"logits/rejected": -0.4805734157562256, |
|
"logps/chosen": -1414.7236328125, |
|
"logps/rejected": -1822.532958984375, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1799442768096924, |
|
"rewards/margins": 0.46841782331466675, |
|
"rewards/rejected": -1.648362159729004, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -0.7541752457618713, |
|
"logits/rejected": -0.5927517414093018, |
|
"logps/chosen": -1383.7718505859375, |
|
"logps/rejected": -1702.1370849609375, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.173661231994629, |
|
"rewards/margins": 0.3563198149204254, |
|
"rewards/rejected": -1.529981017112732, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -0.8271347284317017, |
|
"logits/rejected": -0.5372225046157837, |
|
"logps/chosen": -1264.5931396484375, |
|
"logps/rejected": -1810.603759765625, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0567820072174072, |
|
"rewards/margins": 0.5766392946243286, |
|
"rewards/rejected": -1.633421540260315, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -0.7219616770744324, |
|
"logits/rejected": -0.44911837577819824, |
|
"logps/chosen": -1458.1873779296875, |
|
"logps/rejected": -2138.194091796875, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1980092525482178, |
|
"rewards/margins": 0.7267267107963562, |
|
"rewards/rejected": -1.9247362613677979, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -0.6613628268241882, |
|
"logits/rejected": -0.41062062978744507, |
|
"logps/chosen": -1361.629150390625, |
|
"logps/rejected": -1998.5576171875, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1172220706939697, |
|
"rewards/margins": 0.6446647644042969, |
|
"rewards/rejected": -1.7618869543075562, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -0.6104007959365845, |
|
"logits/rejected": -0.5428999662399292, |
|
"logps/chosen": -1572.9007568359375, |
|
"logps/rejected": -1579.3857421875, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.3415987491607666, |
|
"rewards/margins": 0.03751998022198677, |
|
"rewards/rejected": -1.37911856174469, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -0.4355439245700836, |
|
"logits/rejected": -0.3004533052444458, |
|
"logps/chosen": -1881.150390625, |
|
"logps/rejected": -2220.178955078125, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6670087575912476, |
|
"rewards/margins": 0.3249967694282532, |
|
"rewards/rejected": -1.9920055866241455, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -0.7097476124763489, |
|
"logits/rejected": -0.6131819486618042, |
|
"logps/chosen": -1349.8577880859375, |
|
"logps/rejected": -1754.811279296875, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1261099576950073, |
|
"rewards/margins": 0.42261672019958496, |
|
"rewards/rejected": -1.5487267971038818, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -0.6964675188064575, |
|
"logits/rejected": -0.35203319787979126, |
|
"logps/chosen": -1499.8782958984375, |
|
"logps/rejected": -2223.939697265625, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.267060399055481, |
|
"rewards/margins": 0.7385483980178833, |
|
"rewards/rejected": -2.0056090354919434, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -0.9956095814704895, |
|
"logits/rejected": -0.6606365442276001, |
|
"logps/chosen": -1594.940673828125, |
|
"logps/rejected": -1980.383056640625, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3295806646347046, |
|
"rewards/margins": 0.44766488671302795, |
|
"rewards/rejected": -1.777245283126831, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -0.9308391809463501, |
|
"logits/rejected": -0.8904244303703308, |
|
"logps/chosen": -1657.0823974609375, |
|
"logps/rejected": -1868.926025390625, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3185545206069946, |
|
"rewards/margins": 0.27054083347320557, |
|
"rewards/rejected": -1.5890953540802002, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -0.625472903251648, |
|
"logits/rejected": -0.4992523789405823, |
|
"logps/chosen": -1419.646240234375, |
|
"logps/rejected": -2127.7783203125, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.194054365158081, |
|
"rewards/margins": 0.7004168629646301, |
|
"rewards/rejected": -1.8944714069366455, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -0.8765754699707031, |
|
"logits/rejected": -0.6600515842437744, |
|
"logps/chosen": -1335.862060546875, |
|
"logps/rejected": -1862.95703125, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.117206335067749, |
|
"rewards/margins": 0.5470607280731201, |
|
"rewards/rejected": -1.6642669439315796, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -0.9478354454040527, |
|
"logits/rejected": -0.7755793929100037, |
|
"logps/chosen": -1195.031005859375, |
|
"logps/rejected": -1684.8265380859375, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9703825116157532, |
|
"rewards/margins": 0.4978027939796448, |
|
"rewards/rejected": -1.4681851863861084, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8340710401535034, |
|
"logits/rejected": -0.6496783494949341, |
|
"logps/chosen": -1408.339111328125, |
|
"logps/rejected": -1972.5435791015625, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1645435094833374, |
|
"rewards/margins": 0.58301842212677, |
|
"rewards/rejected": -1.7475616931915283, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6029718887329102, |
|
"train_runtime": 12868.9286, |
|
"train_samples_per_second": 1.166, |
|
"train_steps_per_second": 0.097 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|