|
{ |
|
"best_metric": 82.41013261020537, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Arabic-PADT/checkpoint-3000", |
|
"epoch": 28.94736842105263, |
|
"global_step": 5500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.76e-05, |
|
"loss": 3.3094, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.947919463087248e-05, |
|
"loss": 1.0202, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.894228187919463e-05, |
|
"loss": 0.7642, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.840536912751678e-05, |
|
"loss": 0.697, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.786845637583893e-05, |
|
"loss": 0.5389, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_las": 81.76527001554284, |
|
"eval_loss": 0.8606711626052856, |
|
"eval_runtime": 7.3104, |
|
"eval_samples_per_second": 124.343, |
|
"eval_steps_per_second": 15.594, |
|
"eval_uas": 86.67614669797281, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.733154362416108e-05, |
|
"loss": 0.5056, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.679463087248322e-05, |
|
"loss": 0.408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.625771812080537e-05, |
|
"loss": 0.3727, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.572080536912752e-05, |
|
"loss": 0.312, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.518389261744967e-05, |
|
"loss": 0.2764, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_las": 81.93723337411951, |
|
"eval_loss": 1.1254280805587769, |
|
"eval_runtime": 7.2968, |
|
"eval_samples_per_second": 124.575, |
|
"eval_steps_per_second": 15.623, |
|
"eval_uas": 87.01676642746122, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 7.464697986577182e-05, |
|
"loss": 0.2391, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 7.411006711409397e-05, |
|
"loss": 0.2091, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 7.357315436241611e-05, |
|
"loss": 0.1907, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 7.303624161073826e-05, |
|
"loss": 0.1597, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 7.249932885906041e-05, |
|
"loss": 0.1574, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_las": 81.97691722609875, |
|
"eval_loss": 1.3033808469772339, |
|
"eval_runtime": 7.3011, |
|
"eval_samples_per_second": 124.502, |
|
"eval_steps_per_second": 15.614, |
|
"eval_uas": 86.96385462482225, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.196241610738256e-05, |
|
"loss": 0.132, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 7.142550335570471e-05, |
|
"loss": 0.1349, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.088859060402686e-05, |
|
"loss": 0.1038, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 7.0351677852349e-05, |
|
"loss": 0.117, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 6.981476510067114e-05, |
|
"loss": 0.0903, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_las": 82.0099871027481, |
|
"eval_loss": 1.4602577686309814, |
|
"eval_runtime": 7.3008, |
|
"eval_samples_per_second": 124.507, |
|
"eval_steps_per_second": 15.615, |
|
"eval_uas": 87.14573894639372, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 6.927785234899329e-05, |
|
"loss": 0.1005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 6.874093959731543e-05, |
|
"loss": 0.0844, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 6.820402684563758e-05, |
|
"loss": 0.0867, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 6.766711409395973e-05, |
|
"loss": 0.0742, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 6.713020134228188e-05, |
|
"loss": 0.0739, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"eval_las": 81.9537683124442, |
|
"eval_loss": 1.6436030864715576, |
|
"eval_runtime": 7.3127, |
|
"eval_samples_per_second": 124.304, |
|
"eval_steps_per_second": 15.589, |
|
"eval_uas": 87.02668739045603, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 6.659328859060403e-05, |
|
"loss": 0.0676, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 6.605637583892618e-05, |
|
"loss": 0.0669, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 6.551946308724832e-05, |
|
"loss": 0.0645, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 6.498255033557047e-05, |
|
"loss": 0.0588, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 6.444563758389262e-05, |
|
"loss": 0.0564, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_las": 82.41013261020537, |
|
"eval_loss": 1.7569572925567627, |
|
"eval_runtime": 7.2931, |
|
"eval_samples_per_second": 124.639, |
|
"eval_steps_per_second": 15.631, |
|
"eval_uas": 87.33754423096002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 6.390872483221477e-05, |
|
"loss": 0.0517, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 6.337181208053692e-05, |
|
"loss": 0.0542, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 6.283489932885907e-05, |
|
"loss": 0.0473, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 6.229798657718121e-05, |
|
"loss": 0.0494, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 6.176107382550336e-05, |
|
"loss": 0.0452, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"eval_las": 81.9537683124442, |
|
"eval_loss": 1.7844057083129883, |
|
"eval_runtime": 7.3028, |
|
"eval_samples_per_second": 124.473, |
|
"eval_steps_per_second": 15.61, |
|
"eval_uas": 86.84149608121962, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 6.122416107382551e-05, |
|
"loss": 0.0473, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 6.068724832214766e-05, |
|
"loss": 0.0402, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.015033557046981e-05, |
|
"loss": 0.0426, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 5.9613422818791955e-05, |
|
"loss": 0.0375, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"learning_rate": 5.90765100671141e-05, |
|
"loss": 0.0402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_las": 82.2844670789378, |
|
"eval_loss": 1.867564082145691, |
|
"eval_runtime": 7.2792, |
|
"eval_samples_per_second": 124.877, |
|
"eval_steps_per_second": 15.661, |
|
"eval_uas": 87.14573894639372, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 5.853959731543625e-05, |
|
"loss": 0.037, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 5.80026845637584e-05, |
|
"loss": 0.0369, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 5.7465771812080534e-05, |
|
"loss": 0.0348, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 5.692885906040268e-05, |
|
"loss": 0.0359, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 5.639194630872483e-05, |
|
"loss": 0.0329, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"eval_las": 82.05297794239227, |
|
"eval_loss": 1.9352997541427612, |
|
"eval_runtime": 7.2823, |
|
"eval_samples_per_second": 124.824, |
|
"eval_steps_per_second": 15.654, |
|
"eval_uas": 87.08952015608982, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 5.5855033557046986e-05, |
|
"loss": 0.0311, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 5.532348993288591e-05, |
|
"loss": 0.0309, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 5.478657718120806e-05, |
|
"loss": 0.0302, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 5.424966442953021e-05, |
|
"loss": 0.0281, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 5.371275167785236e-05, |
|
"loss": 0.0286, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"eval_las": 82.0463639670624, |
|
"eval_loss": 1.9560511112213135, |
|
"eval_runtime": 7.3048, |
|
"eval_samples_per_second": 124.438, |
|
"eval_steps_per_second": 15.606, |
|
"eval_uas": 87.08952015608982, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 5.31758389261745e-05, |
|
"loss": 0.027, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 5.263892617449665e-05, |
|
"loss": 0.0265, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 5.2102013422818795e-05, |
|
"loss": 0.0268, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 5.1565100671140944e-05, |
|
"loss": 0.0251, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 5.102818791946309e-05, |
|
"loss": 0.0245, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_las": 82.21832732563908, |
|
"eval_loss": 2.1567718982696533, |
|
"eval_runtime": 7.2888, |
|
"eval_samples_per_second": 124.712, |
|
"eval_steps_per_second": 15.64, |
|
"eval_uas": 87.03660835345084, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"step": 5500, |
|
"total_flos": 2.936455296878592e+16, |
|
"train_loss": 0.20698133672367441, |
|
"train_runtime": 3488.4911, |
|
"train_samples_per_second": 137.595, |
|
"train_steps_per_second": 4.3 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 79, |
|
"total_flos": 2.936455296878592e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|