|
{ |
|
"best_metric": 0.31333859510655093, |
|
"best_model_checkpoint": "xls-r-greek-cretan/checkpoint-3894", |
|
"epoch": 35.0, |
|
"eval_steps": 500, |
|
"global_step": 4130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.9992756772417789, |
|
"eval_loss": 3.404292583465576, |
|
"eval_runtime": 6.277, |
|
"eval_samples_per_second": 37.597, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 1.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 5.627, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.8620889468347096, |
|
"eval_loss": 2.5985116958618164, |
|
"eval_runtime": 6.1017, |
|
"eval_samples_per_second": 38.678, |
|
"eval_steps_per_second": 4.917, |
|
"eval_wer": 0.9952644041041832, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.17398232652469942, |
|
"eval_loss": 0.6798810958862305, |
|
"eval_runtime": 5.9946, |
|
"eval_samples_per_second": 39.369, |
|
"eval_steps_per_second": 5.004, |
|
"eval_wer": 0.5603788476716653, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.9128, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.15094886281326958, |
|
"eval_loss": 0.5401029586791992, |
|
"eval_runtime": 5.974, |
|
"eval_samples_per_second": 39.504, |
|
"eval_steps_per_second": 5.022, |
|
"eval_wer": 0.5027624309392266, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.13906996957844414, |
|
"eval_loss": 0.49666497111320496, |
|
"eval_runtime": 5.4532, |
|
"eval_samples_per_second": 43.277, |
|
"eval_steps_per_second": 5.501, |
|
"eval_wer": 0.48066298342541436, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0002917355371900826, |
|
"loss": 1.029, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.1282051282051282, |
|
"eval_loss": 0.5171706080436707, |
|
"eval_runtime": 5.3095, |
|
"eval_samples_per_second": 44.449, |
|
"eval_steps_per_second": 5.65, |
|
"eval_wer": 0.4451460142067877, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0002752066115702479, |
|
"loss": 0.8154, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.13066782558307982, |
|
"eval_loss": 0.5484737753868103, |
|
"eval_runtime": 6.0637, |
|
"eval_samples_per_second": 38.92, |
|
"eval_steps_per_second": 4.947, |
|
"eval_wer": 0.43804262036306235, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.11299435028248588, |
|
"eval_loss": 0.455568790435791, |
|
"eval_runtime": 5.7805, |
|
"eval_samples_per_second": 40.827, |
|
"eval_steps_per_second": 5.19, |
|
"eval_wer": 0.40331491712707185, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00025867768595041324, |
|
"loss": 0.68, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.11806460959003331, |
|
"eval_loss": 0.47916799783706665, |
|
"eval_runtime": 5.4838, |
|
"eval_samples_per_second": 43.036, |
|
"eval_steps_per_second": 5.471, |
|
"eval_wer": 0.4112075769534333, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.10922787193973635, |
|
"eval_loss": 0.43157267570495605, |
|
"eval_runtime": 5.6241, |
|
"eval_samples_per_second": 41.962, |
|
"eval_steps_per_second": 5.334, |
|
"eval_wer": 0.39068666140489344, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.0002421487603305785, |
|
"loss": 0.5853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.10705490366507316, |
|
"eval_loss": 0.46444636583328247, |
|
"eval_runtime": 5.4324, |
|
"eval_samples_per_second": 43.443, |
|
"eval_steps_per_second": 5.522, |
|
"eval_wer": 0.36937647987371747, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00022561983471074378, |
|
"loss": 0.534, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.11313921483413009, |
|
"eval_loss": 0.5147783756256104, |
|
"eval_runtime": 5.7969, |
|
"eval_samples_per_second": 40.711, |
|
"eval_steps_per_second": 5.175, |
|
"eval_wer": 0.3764798737174428, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.10893814283644793, |
|
"eval_loss": 0.49593451619148254, |
|
"eval_runtime": 5.8062, |
|
"eval_samples_per_second": 40.647, |
|
"eval_steps_per_second": 5.167, |
|
"eval_wer": 0.3772691397000789, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.0002090909090909091, |
|
"loss": 0.4826, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.10850354918151528, |
|
"eval_loss": 0.48721909523010254, |
|
"eval_runtime": 5.799, |
|
"eval_samples_per_second": 40.697, |
|
"eval_steps_per_second": 5.173, |
|
"eval_wer": 0.3788476716653512, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.10256410256410256, |
|
"eval_loss": 0.45876312255859375, |
|
"eval_runtime": 5.8742, |
|
"eval_samples_per_second": 40.176, |
|
"eval_steps_per_second": 5.107, |
|
"eval_wer": 0.36306235201262826, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.00019256198347107438, |
|
"loss": 0.4211, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.10155005070259307, |
|
"eval_loss": 0.4505126476287842, |
|
"eval_runtime": 5.2544, |
|
"eval_samples_per_second": 44.915, |
|
"eval_steps_per_second": 5.71, |
|
"eval_wer": 0.3606945540647198, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.00017603305785123967, |
|
"loss": 0.396, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.09923221787628567, |
|
"eval_loss": 0.46898314356803894, |
|
"eval_runtime": 5.9134, |
|
"eval_samples_per_second": 39.91, |
|
"eval_steps_per_second": 5.073, |
|
"eval_wer": 0.3606945540647198, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.09937708242792989, |
|
"eval_loss": 0.45210039615631104, |
|
"eval_runtime": 6.1856, |
|
"eval_samples_per_second": 38.153, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.3638516179952644, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.00015950413223140495, |
|
"loss": 0.3619, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.09937708242792989, |
|
"eval_loss": 0.4832761585712433, |
|
"eval_runtime": 6.2572, |
|
"eval_samples_per_second": 37.716, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 0.35438042620363064, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.09097493843256556, |
|
"eval_loss": 0.44832131266593933, |
|
"eval_runtime": 5.7413, |
|
"eval_samples_per_second": 41.106, |
|
"eval_steps_per_second": 5.225, |
|
"eval_wer": 0.3346487766377269, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 0.00014297520661157024, |
|
"loss": 0.33, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.09865275966970882, |
|
"eval_loss": 0.4619905650615692, |
|
"eval_runtime": 5.4696, |
|
"eval_samples_per_second": 43.148, |
|
"eval_steps_per_second": 5.485, |
|
"eval_wer": 0.3425414364640884, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.09532087498189193, |
|
"eval_loss": 0.4783581793308258, |
|
"eval_runtime": 5.7494, |
|
"eval_samples_per_second": 41.048, |
|
"eval_steps_per_second": 5.218, |
|
"eval_wer": 0.33859510655090763, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 0.00012644628099173552, |
|
"loss": 0.3199, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.09633492684340142, |
|
"eval_loss": 0.5089753866195679, |
|
"eval_runtime": 6.2898, |
|
"eval_samples_per_second": 37.521, |
|
"eval_steps_per_second": 4.77, |
|
"eval_wer": 0.34333070244672453, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.00010991735537190081, |
|
"loss": 0.2793, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.0948862813269593, |
|
"eval_loss": 0.5048560500144958, |
|
"eval_runtime": 6.0494, |
|
"eval_samples_per_second": 39.012, |
|
"eval_steps_per_second": 4.959, |
|
"eval_wer": 0.33859510655090763, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.09198899029407503, |
|
"eval_loss": 0.5005324482917786, |
|
"eval_runtime": 6.5065, |
|
"eval_samples_per_second": 36.271, |
|
"eval_steps_per_second": 4.611, |
|
"eval_wer": 0.324388318863457, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 9.338842975206611e-05, |
|
"loss": 0.2769, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.08851224105461393, |
|
"eval_loss": 0.503860354423523, |
|
"eval_runtime": 5.4619, |
|
"eval_samples_per_second": 43.208, |
|
"eval_steps_per_second": 5.493, |
|
"eval_wer": 0.3267561168113654, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.0938722294654498, |
|
"eval_loss": 0.5316025614738464, |
|
"eval_runtime": 5.994, |
|
"eval_samples_per_second": 39.373, |
|
"eval_steps_per_second": 5.005, |
|
"eval_wer": 0.32991318074191, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 7.68595041322314e-05, |
|
"loss": 0.2329, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.09256844850065189, |
|
"eval_loss": 0.5238826274871826, |
|
"eval_runtime": 5.6891, |
|
"eval_samples_per_second": 41.483, |
|
"eval_steps_per_second": 5.273, |
|
"eval_wer": 0.3314917127071823, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 6.033057851239669e-05, |
|
"loss": 0.2327, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.09184412574243082, |
|
"eval_loss": 0.4961460530757904, |
|
"eval_runtime": 5.7688, |
|
"eval_samples_per_second": 40.91, |
|
"eval_steps_per_second": 5.2, |
|
"eval_wer": 0.3228097868981847, |
|
"step": 3422 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.09068520932927712, |
|
"eval_loss": 0.4970700740814209, |
|
"eval_runtime": 6.2622, |
|
"eval_samples_per_second": 37.686, |
|
"eval_steps_per_second": 4.791, |
|
"eval_wer": 0.3267561168113654, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 30.51, |
|
"learning_rate": 4.380165289256198e-05, |
|
"loss": 0.2087, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.09083007388092133, |
|
"eval_loss": 0.5133862495422363, |
|
"eval_runtime": 5.6526, |
|
"eval_samples_per_second": 41.751, |
|
"eval_steps_per_second": 5.307, |
|
"eval_wer": 0.3259668508287293, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.08851224105461393, |
|
"eval_loss": 0.522043764591217, |
|
"eval_runtime": 5.6904, |
|
"eval_samples_per_second": 41.474, |
|
"eval_steps_per_second": 5.272, |
|
"eval_wer": 0.3212312549329124, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 2.727272727272727e-05, |
|
"loss": 0.1856, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.08836737650296972, |
|
"eval_loss": 0.5135778188705444, |
|
"eval_runtime": 6.0356, |
|
"eval_samples_per_second": 39.101, |
|
"eval_steps_per_second": 4.971, |
|
"eval_wer": 0.31333859510655093, |
|
"step": 3894 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 1.0743801652892562e-05, |
|
"loss": 0.1803, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.08749818919310445, |
|
"eval_loss": 0.5257639288902283, |
|
"eval_runtime": 5.8083, |
|
"eval_samples_per_second": 40.631, |
|
"eval_steps_per_second": 5.165, |
|
"eval_wer": 0.31333859510655093, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.08764305374474866, |
|
"eval_loss": 0.5268439650535583, |
|
"eval_runtime": 6.0191, |
|
"eval_samples_per_second": 39.208, |
|
"eval_steps_per_second": 4.984, |
|
"eval_wer": 0.3164956590370955, |
|
"step": 4130 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 4130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 500, |
|
"total_flos": 4.2609001975048284e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|