{ "best_metric": 82.41013261020537, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Arabic-PADT/checkpoint-3000", "epoch": 28.94736842105263, "global_step": 5500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 7.76e-05, "loss": 3.3094, "step": 100 }, { "epoch": 1.05, "learning_rate": 7.947919463087248e-05, "loss": 1.0202, "step": 200 }, { "epoch": 1.58, "learning_rate": 7.894228187919463e-05, "loss": 0.7642, "step": 300 }, { "epoch": 2.11, "learning_rate": 7.840536912751678e-05, "loss": 0.697, "step": 400 }, { "epoch": 2.63, "learning_rate": 7.786845637583893e-05, "loss": 0.5389, "step": 500 }, { "epoch": 2.63, "eval_las": 81.76527001554284, "eval_loss": 0.8606711626052856, "eval_runtime": 7.3104, "eval_samples_per_second": 124.343, "eval_steps_per_second": 15.594, "eval_uas": 86.67614669797281, "step": 500 }, { "epoch": 3.16, "learning_rate": 7.733154362416108e-05, "loss": 0.5056, "step": 600 }, { "epoch": 3.68, "learning_rate": 7.679463087248322e-05, "loss": 0.408, "step": 700 }, { "epoch": 4.21, "learning_rate": 7.625771812080537e-05, "loss": 0.3727, "step": 800 }, { "epoch": 4.74, "learning_rate": 7.572080536912752e-05, "loss": 0.312, "step": 900 }, { "epoch": 5.26, "learning_rate": 7.518389261744967e-05, "loss": 0.2764, "step": 1000 }, { "epoch": 5.26, "eval_las": 81.93723337411951, "eval_loss": 1.1254280805587769, "eval_runtime": 7.2968, "eval_samples_per_second": 124.575, "eval_steps_per_second": 15.623, "eval_uas": 87.01676642746122, "step": 1000 }, { "epoch": 5.79, "learning_rate": 7.464697986577182e-05, "loss": 0.2391, "step": 1100 }, { "epoch": 6.32, "learning_rate": 7.411006711409397e-05, "loss": 0.2091, "step": 1200 }, { "epoch": 6.84, "learning_rate": 7.357315436241611e-05, "loss": 0.1907, "step": 1300 }, { "epoch": 7.37, "learning_rate": 7.303624161073826e-05, "loss": 0.1597, "step": 1400 }, { "epoch": 7.89, "learning_rate": 7.249932885906041e-05, "loss": 0.1574, "step": 1500 }, { "epoch": 7.89, "eval_las": 81.97691722609875, "eval_loss": 1.3033808469772339, "eval_runtime": 7.3011, "eval_samples_per_second": 124.502, "eval_steps_per_second": 15.614, "eval_uas": 86.96385462482225, "step": 1500 }, { "epoch": 8.42, "learning_rate": 7.196241610738256e-05, "loss": 0.132, "step": 1600 }, { "epoch": 8.95, "learning_rate": 7.142550335570471e-05, "loss": 0.1349, "step": 1700 }, { "epoch": 9.47, "learning_rate": 7.088859060402686e-05, "loss": 0.1038, "step": 1800 }, { "epoch": 10.0, "learning_rate": 7.0351677852349e-05, "loss": 0.117, "step": 1900 }, { "epoch": 10.53, "learning_rate": 6.981476510067114e-05, "loss": 0.0903, "step": 2000 }, { "epoch": 10.53, "eval_las": 82.0099871027481, "eval_loss": 1.4602577686309814, "eval_runtime": 7.3008, "eval_samples_per_second": 124.507, "eval_steps_per_second": 15.615, "eval_uas": 87.14573894639372, "step": 2000 }, { "epoch": 11.05, "learning_rate": 6.927785234899329e-05, "loss": 0.1005, "step": 2100 }, { "epoch": 11.58, "learning_rate": 6.874093959731543e-05, "loss": 0.0844, "step": 2200 }, { "epoch": 12.11, "learning_rate": 6.820402684563758e-05, "loss": 0.0867, "step": 2300 }, { "epoch": 12.63, "learning_rate": 6.766711409395973e-05, "loss": 0.0742, "step": 2400 }, { "epoch": 13.16, "learning_rate": 6.713020134228188e-05, "loss": 0.0739, "step": 2500 }, { "epoch": 13.16, "eval_las": 81.9537683124442, "eval_loss": 1.6436030864715576, "eval_runtime": 7.3127, "eval_samples_per_second": 124.304, "eval_steps_per_second": 15.589, "eval_uas": 87.02668739045603, "step": 2500 }, { "epoch": 13.68, "learning_rate": 6.659328859060403e-05, "loss": 0.0676, "step": 2600 }, { "epoch": 14.21, "learning_rate": 6.605637583892618e-05, "loss": 0.0669, "step": 2700 }, { "epoch": 14.74, "learning_rate": 6.551946308724832e-05, "loss": 0.0645, "step": 2800 }, { "epoch": 15.26, "learning_rate": 6.498255033557047e-05, "loss": 0.0588, "step": 2900 }, { "epoch": 15.79, "learning_rate": 6.444563758389262e-05, "loss": 0.0564, "step": 3000 }, { "epoch": 15.79, "eval_las": 82.41013261020537, "eval_loss": 1.7569572925567627, "eval_runtime": 7.2931, "eval_samples_per_second": 124.639, "eval_steps_per_second": 15.631, "eval_uas": 87.33754423096002, "step": 3000 }, { "epoch": 16.32, "learning_rate": 6.390872483221477e-05, "loss": 0.0517, "step": 3100 }, { "epoch": 16.84, "learning_rate": 6.337181208053692e-05, "loss": 0.0542, "step": 3200 }, { "epoch": 17.37, "learning_rate": 6.283489932885907e-05, "loss": 0.0473, "step": 3300 }, { "epoch": 17.89, "learning_rate": 6.229798657718121e-05, "loss": 0.0494, "step": 3400 }, { "epoch": 18.42, "learning_rate": 6.176107382550336e-05, "loss": 0.0452, "step": 3500 }, { "epoch": 18.42, "eval_las": 81.9537683124442, "eval_loss": 1.7844057083129883, "eval_runtime": 7.3028, "eval_samples_per_second": 124.473, "eval_steps_per_second": 15.61, "eval_uas": 86.84149608121962, "step": 3500 }, { "epoch": 18.95, "learning_rate": 6.122416107382551e-05, "loss": 0.0473, "step": 3600 }, { "epoch": 19.47, "learning_rate": 6.068724832214766e-05, "loss": 0.0402, "step": 3700 }, { "epoch": 20.0, "learning_rate": 6.015033557046981e-05, "loss": 0.0426, "step": 3800 }, { "epoch": 20.53, "learning_rate": 5.9613422818791955e-05, "loss": 0.0375, "step": 3900 }, { "epoch": 21.05, "learning_rate": 5.90765100671141e-05, "loss": 0.0402, "step": 4000 }, { "epoch": 21.05, "eval_las": 82.2844670789378, "eval_loss": 1.867564082145691, "eval_runtime": 7.2792, "eval_samples_per_second": 124.877, "eval_steps_per_second": 15.661, "eval_uas": 87.14573894639372, "step": 4000 }, { "epoch": 21.58, "learning_rate": 5.853959731543625e-05, "loss": 0.037, "step": 4100 }, { "epoch": 22.11, "learning_rate": 5.80026845637584e-05, "loss": 0.0369, "step": 4200 }, { "epoch": 22.63, "learning_rate": 5.7465771812080534e-05, "loss": 0.0348, "step": 4300 }, { "epoch": 23.16, "learning_rate": 5.692885906040268e-05, "loss": 0.0359, "step": 4400 }, { "epoch": 23.68, "learning_rate": 5.639194630872483e-05, "loss": 0.0329, "step": 4500 }, { "epoch": 23.68, "eval_las": 82.05297794239227, "eval_loss": 1.9352997541427612, "eval_runtime": 7.2823, "eval_samples_per_second": 124.824, "eval_steps_per_second": 15.654, "eval_uas": 87.08952015608982, "step": 4500 }, { "epoch": 24.21, "learning_rate": 5.5855033557046986e-05, "loss": 0.0311, "step": 4600 }, { "epoch": 24.74, "learning_rate": 5.532348993288591e-05, "loss": 0.0309, "step": 4700 }, { "epoch": 25.26, "learning_rate": 5.478657718120806e-05, "loss": 0.0302, "step": 4800 }, { "epoch": 25.79, "learning_rate": 5.424966442953021e-05, "loss": 0.0281, "step": 4900 }, { "epoch": 26.32, "learning_rate": 5.371275167785236e-05, "loss": 0.0286, "step": 5000 }, { "epoch": 26.32, "eval_las": 82.0463639670624, "eval_loss": 1.9560511112213135, "eval_runtime": 7.3048, "eval_samples_per_second": 124.438, "eval_steps_per_second": 15.606, "eval_uas": 87.08952015608982, "step": 5000 }, { "epoch": 26.84, "learning_rate": 5.31758389261745e-05, "loss": 0.027, "step": 5100 }, { "epoch": 27.37, "learning_rate": 5.263892617449665e-05, "loss": 0.0265, "step": 5200 }, { "epoch": 27.89, "learning_rate": 5.2102013422818795e-05, "loss": 0.0268, "step": 5300 }, { "epoch": 28.42, "learning_rate": 5.1565100671140944e-05, "loss": 0.0251, "step": 5400 }, { "epoch": 28.95, "learning_rate": 5.102818791946309e-05, "loss": 0.0245, "step": 5500 }, { "epoch": 28.95, "eval_las": 82.21832732563908, "eval_loss": 2.1567718982696533, "eval_runtime": 7.2888, "eval_samples_per_second": 124.712, "eval_steps_per_second": 15.64, "eval_uas": 87.03660835345084, "step": 5500 }, { "epoch": 28.95, "step": 5500, "total_flos": 2.936455296878592e+16, "train_loss": 0.20698133672367441, "train_runtime": 3488.4911, "train_samples_per_second": 137.595, "train_steps_per_second": 4.3 } ], "max_steps": 15000, "num_train_epochs": 79, "total_flos": 2.936455296878592e+16, "trial_name": null, "trial_params": null }