{ "best_metric": 68.56690419635788, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Tamil-TTB/checkpoint-1500", "epoch": 307.6923076923077, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.69, "learning_rate": 7.680000000000001e-05, "loss": 2.8111, "step": 100 }, { "epoch": 15.38, "learning_rate": 7.948456375838927e-05, "loss": 0.2559, "step": 200 }, { "epoch": 23.08, "learning_rate": 7.894765100671142e-05, "loss": 0.1068, "step": 300 }, { "epoch": 30.77, "learning_rate": 7.841073825503357e-05, "loss": 0.0779, "step": 400 }, { "epoch": 38.46, "learning_rate": 7.78738255033557e-05, "loss": 0.056, "step": 500 }, { "epoch": 38.46, "eval_las": 67.2209026128266, "eval_loss": 4.200492858886719, "eval_runtime": 0.5418, "eval_samples_per_second": 147.645, "eval_steps_per_second": 18.456, "eval_uas": 78.4639746634996, "step": 500 }, { "epoch": 46.15, "learning_rate": 7.733691275167786e-05, "loss": 0.0501, "step": 600 }, { "epoch": 53.85, "learning_rate": 7.680000000000001e-05, "loss": 0.0416, "step": 700 }, { "epoch": 61.54, "learning_rate": 7.626308724832216e-05, "loss": 0.036, "step": 800 }, { "epoch": 69.23, "learning_rate": 7.57261744966443e-05, "loss": 0.0322, "step": 900 }, { "epoch": 76.92, "learning_rate": 7.518926174496645e-05, "loss": 0.0312, "step": 1000 }, { "epoch": 76.92, "eval_las": 66.0332541567696, "eval_loss": 5.273917198181152, "eval_runtime": 0.5321, "eval_samples_per_second": 150.34, "eval_steps_per_second": 18.793, "eval_uas": 76.72209026128266, "step": 1000 }, { "epoch": 84.62, "learning_rate": 7.46523489932886e-05, "loss": 0.0244, "step": 1100 }, { "epoch": 92.31, "learning_rate": 7.411543624161075e-05, "loss": 0.0265, "step": 1200 }, { "epoch": 100.0, "learning_rate": 7.35785234899329e-05, "loss": 0.021, "step": 1300 }, { "epoch": 107.69, "learning_rate": 7.304161073825505e-05, "loss": 0.0229, "step": 1400 }, { "epoch": 115.38, "learning_rate": 7.25046979865772e-05, "loss": 0.0168, "step": 1500 }, { "epoch": 115.38, "eval_las": 68.56690419635788, "eval_loss": 6.0272111892700195, "eval_runtime": 0.533, "eval_samples_per_second": 150.087, "eval_steps_per_second": 18.761, "eval_uas": 78.06809184481394, "step": 1500 }, { "epoch": 123.08, "learning_rate": 7.196778523489934e-05, "loss": 0.0195, "step": 1600 }, { "epoch": 130.77, "learning_rate": 7.143087248322148e-05, "loss": 0.0163, "step": 1700 }, { "epoch": 138.46, "learning_rate": 7.089395973154363e-05, "loss": 0.0133, "step": 1800 }, { "epoch": 146.15, "learning_rate": 7.035704697986578e-05, "loss": 0.0172, "step": 1900 }, { "epoch": 153.85, "learning_rate": 6.982013422818792e-05, "loss": 0.0135, "step": 2000 }, { "epoch": 153.85, "eval_las": 66.5083135391924, "eval_loss": 5.791055202484131, "eval_runtime": 0.5426, "eval_samples_per_second": 147.447, "eval_steps_per_second": 18.431, "eval_uas": 77.0387965162312, "step": 2000 }, { "epoch": 161.54, "learning_rate": 6.928322147651007e-05, "loss": 0.0131, "step": 2100 }, { "epoch": 169.23, "learning_rate": 6.874630872483222e-05, "loss": 0.0114, "step": 2200 }, { "epoch": 176.92, "learning_rate": 6.820939597315437e-05, "loss": 0.0117, "step": 2300 }, { "epoch": 184.62, "learning_rate": 6.767248322147652e-05, "loss": 0.0149, "step": 2400 }, { "epoch": 192.31, "learning_rate": 6.713557046979866e-05, "loss": 0.0093, "step": 2500 }, { "epoch": 192.31, "eval_las": 66.19160728424386, "eval_loss": 6.622511386871338, "eval_runtime": 0.5411, "eval_samples_per_second": 147.836, "eval_steps_per_second": 18.48, "eval_uas": 76.8012668250198, "step": 2500 }, { "epoch": 200.0, "learning_rate": 6.659865771812081e-05, "loss": 0.0145, "step": 2600 }, { "epoch": 207.69, "learning_rate": 6.606174496644296e-05, "loss": 0.0111, "step": 2700 }, { "epoch": 215.38, "learning_rate": 6.55248322147651e-05, "loss": 0.0091, "step": 2800 }, { "epoch": 223.08, "learning_rate": 6.498791946308724e-05, "loss": 0.01, "step": 2900 }, { "epoch": 230.77, "learning_rate": 6.445100671140939e-05, "loss": 0.01, "step": 3000 }, { "epoch": 230.77, "eval_las": 65.63737133808392, "eval_loss": 6.7730560302734375, "eval_runtime": 0.5346, "eval_samples_per_second": 149.632, "eval_steps_per_second": 18.704, "eval_uas": 77.0387965162312, "step": 3000 }, { "epoch": 238.46, "learning_rate": 6.391409395973154e-05, "loss": 0.0092, "step": 3100 }, { "epoch": 246.15, "learning_rate": 6.337718120805369e-05, "loss": 0.007, "step": 3200 }, { "epoch": 253.85, "learning_rate": 6.284026845637584e-05, "loss": 0.007, "step": 3300 }, { "epoch": 261.54, "learning_rate": 6.230335570469799e-05, "loss": 0.007, "step": 3400 }, { "epoch": 269.23, "learning_rate": 6.176644295302013e-05, "loss": 0.0061, "step": 3500 }, { "epoch": 269.23, "eval_las": 66.270783847981, "eval_loss": 6.5091376304626465, "eval_runtime": 0.5325, "eval_samples_per_second": 150.229, "eval_steps_per_second": 18.779, "eval_uas": 76.64291369754552, "step": 3500 }, { "epoch": 276.92, "learning_rate": 6.122953020134228e-05, "loss": 0.008, "step": 3600 }, { "epoch": 284.62, "learning_rate": 6.069261744966444e-05, "loss": 0.0092, "step": 3700 }, { "epoch": 292.31, "learning_rate": 6.0155704697986585e-05, "loss": 0.007, "step": 3800 }, { "epoch": 300.0, "learning_rate": 5.9618791946308734e-05, "loss": 0.0076, "step": 3900 }, { "epoch": 307.69, "learning_rate": 5.9081879194630875e-05, "loss": 0.0101, "step": 4000 }, { "epoch": 307.69, "eval_las": 66.7458432304038, "eval_loss": 7.320127010345459, "eval_runtime": 0.5431, "eval_samples_per_second": 147.315, "eval_steps_per_second": 18.414, "eval_uas": 76.40538400633412, "step": 4000 }, { "epoch": 307.69, "step": 4000, "total_flos": 2.0552849401921536e+16, "train_loss": 0.0970867464542389, "train_runtime": 2080.6987, "train_samples_per_second": 230.692, "train_steps_per_second": 7.209 } ], "max_steps": 15000, "num_train_epochs": 1154, "total_flos": 2.0552849401921536e+16, "trial_name": null, "trial_params": null }