|
{ |
|
"best_metric": 76.59314722877838, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Japanese-GSD/checkpoint-4000", |
|
"epoch": 29.41176470588235, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.52e-05, |
|
"loss": 3.6592, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 1.457, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 1.1567, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 1.0594, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 0.9461, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_las": 72.71913404411167, |
|
"eval_loss": 0.9768415689468384, |
|
"eval_runtime": 3.6781, |
|
"eval_samples_per_second": 137.841, |
|
"eval_steps_per_second": 17.4, |
|
"eval_uas": 77.79767233661593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 0.8798, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 0.8167, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 0.7486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 0.7195, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.6067, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_las": 75.72230813054448, |
|
"eval_loss": 0.9424547553062439, |
|
"eval_runtime": 3.6759, |
|
"eval_samples_per_second": 137.927, |
|
"eval_steps_per_second": 17.411, |
|
"eval_uas": 80.39391226499552, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 0.6358, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 0.4864, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 0.5311, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 0.4331, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 0.4255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_las": 75.43745421990722, |
|
"eval_loss": 1.1136951446533203, |
|
"eval_runtime": 3.6925, |
|
"eval_samples_per_second": 137.307, |
|
"eval_steps_per_second": 17.333, |
|
"eval_uas": 80.19044518596891, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 0.3744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 0.3474, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 0.3244, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 0.2716, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 0.2877, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_las": 75.58395051680637, |
|
"eval_loss": 1.3644814491271973, |
|
"eval_runtime": 3.6735, |
|
"eval_samples_per_second": 138.014, |
|
"eval_steps_per_second": 17.422, |
|
"eval_uas": 80.19858386912998, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 0.2162, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 0.2386, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 0.1861, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 0.1989, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 0.1631, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"eval_las": 75.29095792300807, |
|
"eval_loss": 1.5608779191970825, |
|
"eval_runtime": 3.672, |
|
"eval_samples_per_second": 138.07, |
|
"eval_steps_per_second": 17.429, |
|
"eval_uas": 80.17416781964678, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 0.1637, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 0.1513, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 0.1318, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 0.1326, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 0.1156, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_las": 75.71416944738341, |
|
"eval_loss": 1.6040022373199463, |
|
"eval_runtime": 3.6712, |
|
"eval_samples_per_second": 138.103, |
|
"eval_steps_per_second": 17.433, |
|
"eval_uas": 80.20672255229104, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 6.392483221476511e-05, |
|
"loss": 0.117, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 6.338791946308726e-05, |
|
"loss": 0.0966, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 6.28510067114094e-05, |
|
"loss": 0.1057, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 6.231409395973154e-05, |
|
"loss": 0.091, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 6.177718120805369e-05, |
|
"loss": 0.09, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"eval_las": 75.79555627899406, |
|
"eval_loss": 1.8544515371322632, |
|
"eval_runtime": 3.6721, |
|
"eval_samples_per_second": 138.068, |
|
"eval_steps_per_second": 17.429, |
|
"eval_uas": 80.03581020590869, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 6.124026845637584e-05, |
|
"loss": 0.0816, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 6.070335570469799e-05, |
|
"loss": 0.0809, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 6.0166442953020136e-05, |
|
"loss": 0.0753, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 5.962953020134229e-05, |
|
"loss": 0.073, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 5.909261744966444e-05, |
|
"loss": 0.07, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"eval_las": 76.59314722877838, |
|
"eval_loss": 2.0029706954956055, |
|
"eval_runtime": 3.6667, |
|
"eval_samples_per_second": 138.273, |
|
"eval_steps_per_second": 17.455, |
|
"eval_uas": 80.7682916904045, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 5.855570469798659e-05, |
|
"loss": 0.0641, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.8018791946308735e-05, |
|
"loss": 0.0687, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 5.7481879194630884e-05, |
|
"loss": 0.0548, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 5.694496644295303e-05, |
|
"loss": 0.0608, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"learning_rate": 5.6408053691275166e-05, |
|
"loss": 0.0526, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"eval_las": 75.82811101163833, |
|
"eval_loss": 2.0474750995635986, |
|
"eval_runtime": 3.6759, |
|
"eval_samples_per_second": 137.925, |
|
"eval_steps_per_second": 17.411, |
|
"eval_uas": 80.42646699763978, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 5.5871140939597315e-05, |
|
"loss": 0.0574, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"learning_rate": 5.533422818791946e-05, |
|
"loss": 0.0506, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 21.72, |
|
"learning_rate": 5.479731543624161e-05, |
|
"loss": 0.0506, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 5.426040268456376e-05, |
|
"loss": 0.0478, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 5.372348993288591e-05, |
|
"loss": 0.0455, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"eval_las": 75.73044681370554, |
|
"eval_loss": 2.1534557342529297, |
|
"eval_runtime": 3.6739, |
|
"eval_samples_per_second": 138.001, |
|
"eval_steps_per_second": 17.42, |
|
"eval_uas": 80.47529909660616, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 5.3186577181208056e-05, |
|
"loss": 0.0446, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 5.2649664429530204e-05, |
|
"loss": 0.0409, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"learning_rate": 5.211275167785235e-05, |
|
"loss": 0.0448, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 5.15758389261745e-05, |
|
"loss": 0.0379, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 5.1038926174496656e-05, |
|
"loss": 0.0416, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"eval_las": 76.22690648653048, |
|
"eval_loss": 2.1511826515197754, |
|
"eval_runtime": 3.6773, |
|
"eval_samples_per_second": 137.872, |
|
"eval_steps_per_second": 17.404, |
|
"eval_uas": 80.80898510620982, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 5.050201342281879e-05, |
|
"loss": 0.0352, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 4.996510067114094e-05, |
|
"loss": 0.0369, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 4.942818791946309e-05, |
|
"loss": 0.0354, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 4.8891275167785235e-05, |
|
"loss": 0.0342, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"learning_rate": 4.835436241610738e-05, |
|
"loss": 0.0342, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"eval_las": 75.9908846748596, |
|
"eval_loss": 2.3812143802642822, |
|
"eval_runtime": 3.6674, |
|
"eval_samples_per_second": 138.245, |
|
"eval_steps_per_second": 17.451, |
|
"eval_uas": 80.6299340766664, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 4.781744966442953e-05, |
|
"loss": 0.0296, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 4.728053691275168e-05, |
|
"loss": 0.0304, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 28.51, |
|
"learning_rate": 4.674362416107383e-05, |
|
"loss": 0.0304, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 4.6206711409395976e-05, |
|
"loss": 0.0319, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 4.5669798657718125e-05, |
|
"loss": 0.0264, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"eval_las": 75.8769431106047, |
|
"eval_loss": 2.4017791748046875, |
|
"eval_runtime": 3.6647, |
|
"eval_samples_per_second": 138.347, |
|
"eval_steps_per_second": 17.464, |
|
"eval_uas": 80.4671604134451, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"step": 6500, |
|
"total_flos": 3.4624658436900864e+16, |
|
"train_loss": 0.30513293038881745, |
|
"train_runtime": 3575.6323, |
|
"train_samples_per_second": 134.242, |
|
"train_steps_per_second": 4.195 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 68, |
|
"total_flos": 3.4624658436900864e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|