|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 44028, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9772871808848918e-05, |
|
"loss": 3.0067, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.954574361769783e-05, |
|
"loss": 2.1686, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9318615426546744e-05, |
|
"loss": 1.9283, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.909148723539566e-05, |
|
"loss": 1.8097, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8864359044244573e-05, |
|
"loss": 1.6774, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8637230853093485e-05, |
|
"loss": 1.6303, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.84101026619424e-05, |
|
"loss": 1.5638, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8182974470791318e-05, |
|
"loss": 1.5224, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.795584627964023e-05, |
|
"loss": 1.4723, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7728718088489147e-05, |
|
"loss": 1.4697, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.750158989733806e-05, |
|
"loss": 1.4104, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7274461706186973e-05, |
|
"loss": 1.4252, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.704733351503589e-05, |
|
"loss": 1.3764, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.68202053238848e-05, |
|
"loss": 1.3702, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6593077132733718e-05, |
|
"loss": 1.3184, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.636594894158263e-05, |
|
"loss": 1.2977, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6138820750431547e-05, |
|
"loss": 1.3242, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.591169255928046e-05, |
|
"loss": 1.2685, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5684564368129372e-05, |
|
"loss": 1.2796, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.545743617697829e-05, |
|
"loss": 1.2629, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5230307985827202e-05, |
|
"loss": 1.2414, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5003179794676118e-05, |
|
"loss": 1.2226, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.477605160352503e-05, |
|
"loss": 1.0322, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4548923412373945e-05, |
|
"loss": 1.0281, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4321795221222858e-05, |
|
"loss": 1.0143, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.4094667030071774e-05, |
|
"loss": 1.0324, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3867538838920689e-05, |
|
"loss": 0.996, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3640410647769601e-05, |
|
"loss": 1.0049, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.3413282456618518e-05, |
|
"loss": 0.9879, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.318615426546743e-05, |
|
"loss": 0.9806, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.2959026074316345e-05, |
|
"loss": 0.986, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.2731897883165258e-05, |
|
"loss": 0.9894, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2504769692014174e-05, |
|
"loss": 1.0172, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2277641500863089e-05, |
|
"loss": 0.9919, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.2050513309712001e-05, |
|
"loss": 1.0085, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.1823385118560918e-05, |
|
"loss": 0.9667, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.159625692740983e-05, |
|
"loss": 0.989, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.1369128736258745e-05, |
|
"loss": 0.985, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.1142000545107661e-05, |
|
"loss": 1.0043, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.0914872353956574e-05, |
|
"loss": 0.9636, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.0687744162805489e-05, |
|
"loss": 0.9883, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0460615971654401e-05, |
|
"loss": 0.964, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0233487780503318e-05, |
|
"loss": 0.951, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.000635958935223e-05, |
|
"loss": 0.9524, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.779231398201145e-06, |
|
"loss": 0.7409, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.55210320705006e-06, |
|
"loss": 0.7549, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.324975015898974e-06, |
|
"loss": 0.7276, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.097846824747889e-06, |
|
"loss": 0.7692, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.870718633596803e-06, |
|
"loss": 0.7532, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.643590442445718e-06, |
|
"loss": 0.7775, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 8.416462251294632e-06, |
|
"loss": 0.7352, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.189334060143545e-06, |
|
"loss": 0.7427, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.96220586899246e-06, |
|
"loss": 0.7589, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.735077677841374e-06, |
|
"loss": 0.7267, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.507949486690289e-06, |
|
"loss": 0.7502, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.280821295539203e-06, |
|
"loss": 0.7225, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.0536931043881176e-06, |
|
"loss": 0.7362, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.826564913237031e-06, |
|
"loss": 0.7324, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.599436722085946e-06, |
|
"loss": 0.7083, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 6.3723085309348594e-06, |
|
"loss": 0.7398, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.145180339783774e-06, |
|
"loss": 0.7333, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.918052148632689e-06, |
|
"loss": 0.7494, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.690923957481603e-06, |
|
"loss": 0.7604, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.4637957663305175e-06, |
|
"loss": 0.7495, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.236667575179431e-06, |
|
"loss": 0.7622, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.009539384028346e-06, |
|
"loss": 0.731, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.78241119287726e-06, |
|
"loss": 0.5705, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.555283001726175e-06, |
|
"loss": 0.5773, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.3281548105750885e-06, |
|
"loss": 0.5764, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.101026619424003e-06, |
|
"loss": 0.5782, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.8738984282729175e-06, |
|
"loss": 0.582, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.6467702371218316e-06, |
|
"loss": 0.5784, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.419642045970746e-06, |
|
"loss": 0.5942, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.1925138548196606e-06, |
|
"loss": 0.5642, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.9653856636685747e-06, |
|
"loss": 0.5889, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.7382574725174893e-06, |
|
"loss": 0.5779, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.5111292813664034e-06, |
|
"loss": 0.5782, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.2840010902153175e-06, |
|
"loss": 0.5962, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.056872899064232e-06, |
|
"loss": 0.5722, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.8297447079131463e-06, |
|
"loss": 0.5736, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.6026165167620606e-06, |
|
"loss": 0.569, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.375488325610975e-06, |
|
"loss": 0.5749, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.1483601344598892e-06, |
|
"loss": 0.5859, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 9.212319433088035e-07, |
|
"loss": 0.5674, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.94103752157718e-07, |
|
"loss": 0.5916, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.6697556100663217e-07, |
|
"loss": 0.5707, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.398473698555465e-07, |
|
"loss": 0.5624, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.2719178704460798e-08, |
|
"loss": 0.5748, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 44028, |
|
"total_flos": 5.176959239737958e+16, |
|
"train_loss": 0.9650859335002343, |
|
"train_runtime": 62054.88, |
|
"train_samples_per_second": 8.514, |
|
"train_steps_per_second": 0.71 |
|
} |
|
], |
|
"max_steps": 44028, |
|
"num_train_epochs": 4, |
|
"total_flos": 5.176959239737958e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|