|
{ |
|
"best_metric": 0.4541548192501068, |
|
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-180", |
|
"epoch": 2.926829268292683, |
|
"eval_steps": 30, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.378021240234375, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"logits/chosen": -1.830958604812622, |
|
"logits/rejected": -1.8507845401763916, |
|
"logps/chosen": -28.701984405517578, |
|
"logps/rejected": -54.28569793701172, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": 0.0008967495523393154, |
|
"rewards/margins": 0.0014666033675894141, |
|
"rewards/rejected": -0.0005698538152500987, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.193418502807617, |
|
"learning_rate": 4.887323943661972e-06, |
|
"logits/chosen": -1.7550897598266602, |
|
"logits/rejected": -1.770708680152893, |
|
"logps/chosen": -47.344207763671875, |
|
"logps/rejected": -64.0368423461914, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.017231885343790054, |
|
"rewards/margins": 0.01606021076440811, |
|
"rewards/rejected": 0.0011716745793819427, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.308932304382324, |
|
"learning_rate": 4.746478873239437e-06, |
|
"logits/chosen": -1.781267762184143, |
|
"logits/rejected": -1.8114898204803467, |
|
"logps/chosen": -54.274559020996094, |
|
"logps/rejected": -95.20500183105469, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0641159638762474, |
|
"rewards/margins": 0.061691801995038986, |
|
"rewards/rejected": 0.0024241588544100523, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/chosen": -1.7831767797470093, |
|
"eval_logits/rejected": -1.8043663501739502, |
|
"eval_logps/chosen": -55.16960906982422, |
|
"eval_logps/rejected": -97.32585144042969, |
|
"eval_loss": 0.6523757576942444, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.09036973863840103, |
|
"eval_rewards/margins": 0.08673857897520065, |
|
"eval_rewards/rejected": 0.0036311547737568617, |
|
"eval_runtime": 8.141, |
|
"eval_samples_per_second": 3.439, |
|
"eval_steps_per_second": 1.72, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.6056338028169015e-06, |
|
"logits/chosen": -1.889905333518982, |
|
"logits/rejected": -1.9024461507797241, |
|
"logps/chosen": -27.918941497802734, |
|
"logps/rejected": -42.093284606933594, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.054457180202007294, |
|
"rewards/margins": 0.0539846234023571, |
|
"rewards/rejected": 0.0004725646285805851, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.53225326538086, |
|
"learning_rate": 4.464788732394367e-06, |
|
"logits/chosen": -1.8278567790985107, |
|
"logits/rejected": -1.849957823753357, |
|
"logps/chosen": -43.8238639831543, |
|
"logps/rejected": -68.02179718017578, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.13941256701946259, |
|
"rewards/margins": 0.13133978843688965, |
|
"rewards/rejected": 0.008072790689766407, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.436968803405762, |
|
"learning_rate": 4.3239436619718315e-06, |
|
"logits/chosen": -1.805991768836975, |
|
"logits/rejected": -1.8437427282333374, |
|
"logps/chosen": -43.8873291015625, |
|
"logps/rejected": -95.2943115234375, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.18793432414531708, |
|
"rewards/margins": 0.21308371424674988, |
|
"rewards/rejected": -0.025149401277303696, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -1.7877694368362427, |
|
"eval_logits/rejected": -1.8098936080932617, |
|
"eval_logps/chosen": -53.567203521728516, |
|
"eval_logps/rejected": -97.33795928955078, |
|
"eval_loss": 0.5890871286392212, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.25061002373695374, |
|
"eval_rewards/margins": 0.2481890469789505, |
|
"eval_rewards/rejected": 0.002420984674245119, |
|
"eval_runtime": 8.1404, |
|
"eval_samples_per_second": 3.44, |
|
"eval_steps_per_second": 1.72, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.183098591549296e-06, |
|
"logits/chosen": -1.8344879150390625, |
|
"logits/rejected": -1.8489716053009033, |
|
"logps/chosen": -40.38930892944336, |
|
"logps/rejected": -60.9084358215332, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.19739331305027008, |
|
"rewards/margins": 0.22638121247291565, |
|
"rewards/rejected": -0.028987903147935867, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.49536657333374, |
|
"learning_rate": 4.042253521126761e-06, |
|
"logits/chosen": -1.7903095483779907, |
|
"logits/rejected": -1.8362411260604858, |
|
"logps/chosen": -44.288116455078125, |
|
"logps/rejected": -90.21073913574219, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.34061312675476074, |
|
"rewards/margins": 0.40679749846458435, |
|
"rewards/rejected": -0.06618441641330719, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 13.401692390441895, |
|
"learning_rate": 3.901408450704225e-06, |
|
"logits/chosen": -1.8004281520843506, |
|
"logits/rejected": -1.8247934579849243, |
|
"logps/chosen": -42.32465362548828, |
|
"logps/rejected": -70.9749984741211, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.3678433299064636, |
|
"rewards/margins": 0.4186524450778961, |
|
"rewards/rejected": -0.05080908536911011, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_logits/chosen": -1.7943389415740967, |
|
"eval_logits/rejected": -1.8181126117706299, |
|
"eval_logps/chosen": -51.677486419677734, |
|
"eval_logps/rejected": -97.63689422607422, |
|
"eval_loss": 0.529485821723938, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.4395819306373596, |
|
"eval_rewards/margins": 0.4670555889606476, |
|
"eval_rewards/rejected": -0.027473628520965576, |
|
"eval_runtime": 8.1412, |
|
"eval_samples_per_second": 3.439, |
|
"eval_steps_per_second": 1.72, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 5.040858745574951, |
|
"learning_rate": 3.7605633802816903e-06, |
|
"logits/chosen": -1.8601042032241821, |
|
"logits/rejected": -1.8790462017059326, |
|
"logps/chosen": -43.77570343017578, |
|
"logps/rejected": -70.64997863769531, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.36673134565353394, |
|
"rewards/margins": 0.42903366684913635, |
|
"rewards/rejected": -0.06230226159095764, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 11.182683944702148, |
|
"learning_rate": 3.6197183098591553e-06, |
|
"logits/chosen": -1.8602203130722046, |
|
"logits/rejected": -1.8786903619766235, |
|
"logps/chosen": -29.601736068725586, |
|
"logps/rejected": -66.1338882446289, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.3122637867927551, |
|
"rewards/margins": 0.2756831645965576, |
|
"rewards/rejected": 0.03658062964677811, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 3.9169583320617676, |
|
"learning_rate": 3.47887323943662e-06, |
|
"logits/chosen": -1.8304624557495117, |
|
"logits/rejected": -1.8451646566390991, |
|
"logps/chosen": -31.413599014282227, |
|
"logps/rejected": -56.841880798339844, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.21899382770061493, |
|
"rewards/margins": 0.2744571566581726, |
|
"rewards/rejected": -0.05546332150697708, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_logits/chosen": -1.80086350440979, |
|
"eval_logits/rejected": -1.8260576725006104, |
|
"eval_logps/chosen": -50.32191848754883, |
|
"eval_logps/rejected": -98.02101135253906, |
|
"eval_loss": 0.49604225158691406, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.5751391053199768, |
|
"eval_rewards/margins": 0.6410244107246399, |
|
"eval_rewards/rejected": -0.0658852607011795, |
|
"eval_runtime": 8.1445, |
|
"eval_samples_per_second": 3.438, |
|
"eval_steps_per_second": 1.719, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.4047716856002808, |
|
"learning_rate": 3.338028169014085e-06, |
|
"logits/chosen": -1.8776130676269531, |
|
"logits/rejected": -1.8995519876480103, |
|
"logps/chosen": -22.69371795654297, |
|
"logps/rejected": -53.5282096862793, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.35938918590545654, |
|
"rewards/margins": 0.5045264959335327, |
|
"rewards/rejected": -0.14513733983039856, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.7528722882270813, |
|
"learning_rate": 3.1971830985915496e-06, |
|
"logits/chosen": -1.8126357793807983, |
|
"logits/rejected": -1.832371711730957, |
|
"logps/chosen": -38.33379364013672, |
|
"logps/rejected": -67.96979522705078, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.5568062663078308, |
|
"rewards/margins": 0.6818712949752808, |
|
"rewards/rejected": -0.12506499886512756, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 3.405579090118408, |
|
"learning_rate": 3.056338028169014e-06, |
|
"logits/chosen": -1.8196109533309937, |
|
"logits/rejected": -1.8556429147720337, |
|
"logps/chosen": -36.78864669799805, |
|
"logps/rejected": -83.05890655517578, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.542107105255127, |
|
"rewards/margins": 0.6411095857620239, |
|
"rewards/rejected": -0.09900249540805817, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_logits/chosen": -1.805869698524475, |
|
"eval_logits/rejected": -1.8330577611923218, |
|
"eval_logps/chosen": -49.10601043701172, |
|
"eval_logps/rejected": -98.84068298339844, |
|
"eval_loss": 0.4709201455116272, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.6967297196388245, |
|
"eval_rewards/margins": 0.8445812463760376, |
|
"eval_rewards/rejected": -0.1478516012430191, |
|
"eval_runtime": 8.1382, |
|
"eval_samples_per_second": 3.441, |
|
"eval_steps_per_second": 1.72, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 7.778740882873535, |
|
"learning_rate": 2.915492957746479e-06, |
|
"logits/chosen": -1.848589301109314, |
|
"logits/rejected": -1.8790754079818726, |
|
"logps/chosen": -36.49171447753906, |
|
"logps/rejected": -72.55968475341797, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.49555450677871704, |
|
"rewards/margins": 0.6891830563545227, |
|
"rewards/rejected": -0.1936284601688385, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 4.058627605438232, |
|
"learning_rate": 2.774647887323944e-06, |
|
"logits/chosen": -1.812421441078186, |
|
"logits/rejected": -1.8415311574935913, |
|
"logps/chosen": -45.62999725341797, |
|
"logps/rejected": -87.85527038574219, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.7084562182426453, |
|
"rewards/margins": 0.9553689956665039, |
|
"rewards/rejected": -0.24691279232501984, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6338028169014084e-06, |
|
"logits/chosen": -1.8475942611694336, |
|
"logits/rejected": -1.8678725957870483, |
|
"logps/chosen": -40.53328323364258, |
|
"logps/rejected": -64.86616516113281, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.5022943019866943, |
|
"rewards/margins": 0.7252141833305359, |
|
"rewards/rejected": -0.22291991114616394, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_logits/chosen": -1.8136398792266846, |
|
"eval_logits/rejected": -1.8424787521362305, |
|
"eval_logps/chosen": -48.19547653198242, |
|
"eval_logps/rejected": -99.7900161743164, |
|
"eval_loss": 0.4541548192501068, |
|
"eval_rewards/accuracies": 0.4642857015132904, |
|
"eval_rewards/chosen": 0.7877826690673828, |
|
"eval_rewards/margins": 1.0305674076080322, |
|
"eval_rewards/rejected": -0.24278469383716583, |
|
"eval_runtime": 8.1397, |
|
"eval_samples_per_second": 3.44, |
|
"eval_steps_per_second": 1.72, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 366, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 90, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|