wav2vec2-xls-r-1b-arabic / trainer_state.json
AndrewMcDowell's picture
End of training
69b978b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.999581414817914,
"global_step": 17910,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 4.9000000000000005e-05,
"loss": 5.3488,
"step": 100
},
{
"epoch": 0.33,
"learning_rate": 9.900000000000001e-05,
"loss": 3.2572,
"step": 200
},
{
"epoch": 0.5,
"learning_rate": 0.000149,
"loss": 2.4392,
"step": 300
},
{
"epoch": 0.67,
"learning_rate": 0.000199,
"loss": 2.2566,
"step": 400
},
{
"epoch": 0.84,
"learning_rate": 0.000249,
"loss": 2.2416,
"step": 500
},
{
"epoch": 0.84,
"eval_loss": 1.2866647243499756,
"eval_runtime": 434.4225,
"eval_samples_per_second": 23.912,
"eval_steps_per_second": 2.99,
"eval_wer": 0.8874521338587047,
"step": 500
},
{
"epoch": 1.01,
"learning_rate": 0.000299,
"loss": 2.2596,
"step": 600
},
{
"epoch": 1.17,
"learning_rate": 0.00034899999999999997,
"loss": 2.2575,
"step": 700
},
{
"epoch": 1.34,
"learning_rate": 0.00039900000000000005,
"loss": 2.2978,
"step": 800
},
{
"epoch": 1.51,
"learning_rate": 0.000449,
"loss": 2.2998,
"step": 900
},
{
"epoch": 1.67,
"learning_rate": 0.000499,
"loss": 2.3089,
"step": 1000
},
{
"epoch": 1.67,
"eval_loss": 1.8336485624313354,
"eval_runtime": 430.7741,
"eval_samples_per_second": 24.115,
"eval_steps_per_second": 3.016,
"eval_wer": 0.9547514660451005,
"step": 1000
},
{
"epoch": 1.84,
"learning_rate": 0.000549,
"loss": 2.3156,
"step": 1100
},
{
"epoch": 2.01,
"learning_rate": 0.000599,
"loss": 2.3298,
"step": 1200
},
{
"epoch": 2.18,
"learning_rate": 0.0006490000000000001,
"loss": 2.3174,
"step": 1300
},
{
"epoch": 2.34,
"learning_rate": 0.000699,
"loss": 2.349,
"step": 1400
},
{
"epoch": 2.51,
"learning_rate": 0.000749,
"loss": 2.3614,
"step": 1500
},
{
"epoch": 2.51,
"eval_loss": 1.5936506986618042,
"eval_runtime": 430.1389,
"eval_samples_per_second": 24.15,
"eval_steps_per_second": 3.02,
"eval_wer": 0.9468893945279983,
"step": 1500
},
{
"epoch": 2.68,
"learning_rate": 0.000799,
"loss": 2.3597,
"step": 1600
},
{
"epoch": 2.85,
"learning_rate": 0.000849,
"loss": 2.397,
"step": 1700
},
{
"epoch": 3.02,
"learning_rate": 0.0008990000000000001,
"loss": 2.4454,
"step": 1800
},
{
"epoch": 3.18,
"learning_rate": 0.000949,
"loss": 2.4806,
"step": 1900
},
{
"epoch": 3.35,
"learning_rate": 0.000999,
"loss": 2.5234,
"step": 2000
},
{
"epoch": 3.35,
"eval_loss": 1.9764641523361206,
"eval_runtime": 434.6883,
"eval_samples_per_second": 23.898,
"eval_steps_per_second": 2.988,
"eval_wer": 0.9866992248922434,
"step": 2000
},
{
"epoch": 3.52,
"learning_rate": 0.0009753148614609572,
"loss": 2.5559,
"step": 2100
},
{
"epoch": 3.68,
"learning_rate": 0.0009501259445843828,
"loss": 2.5543,
"step": 2200
},
{
"epoch": 3.85,
"learning_rate": 0.0009249370277078086,
"loss": 2.5819,
"step": 2300
},
{
"epoch": 4.02,
"learning_rate": 0.0008997481108312343,
"loss": 2.5837,
"step": 2400
},
{
"epoch": 4.19,
"learning_rate": 0.00087455919395466,
"loss": 2.5373,
"step": 2500
},
{
"epoch": 4.19,
"eval_loss": 1.9062319993972778,
"eval_runtime": 430.3022,
"eval_samples_per_second": 24.141,
"eval_steps_per_second": 3.019,
"eval_wer": 0.9916014577205542,
"step": 2500
},
{
"epoch": 4.35,
"learning_rate": 0.0008493702770780856,
"loss": 2.5617,
"step": 2600
},
{
"epoch": 4.52,
"learning_rate": 0.0008241813602015113,
"loss": 2.5553,
"step": 2700
},
{
"epoch": 4.69,
"learning_rate": 0.0007989924433249371,
"loss": 2.549,
"step": 2800
},
{
"epoch": 4.86,
"learning_rate": 0.0007738035264483628,
"loss": 2.5636,
"step": 2900
},
{
"epoch": 5.03,
"learning_rate": 0.0007486146095717884,
"loss": 2.5703,
"step": 3000
},
{
"epoch": 5.03,
"eval_loss": 1.977164387702942,
"eval_runtime": 431.1212,
"eval_samples_per_second": 24.095,
"eval_steps_per_second": 3.013,
"eval_wer": 0.9914719647779197,
"step": 3000
},
{
"epoch": 5.19,
"learning_rate": 0.0007234256926952141,
"loss": 2.5526,
"step": 3100
},
{
"epoch": 5.36,
"learning_rate": 0.0006982367758186398,
"loss": 2.5277,
"step": 3200
},
{
"epoch": 5.53,
"learning_rate": 0.0006730478589420656,
"loss": 2.4969,
"step": 3300
},
{
"epoch": 5.69,
"learning_rate": 0.0006478589420654912,
"loss": 2.4849,
"step": 3400
},
{
"epoch": 5.86,
"learning_rate": 0.0006226700251889169,
"loss": 2.4656,
"step": 3500
},
{
"epoch": 5.86,
"eval_loss": 1.8083465099334717,
"eval_runtime": 432.7157,
"eval_samples_per_second": 24.007,
"eval_steps_per_second": 3.002,
"eval_wer": 0.9829069315722293,
"step": 3500
},
{
"epoch": 6.03,
"learning_rate": 0.0005974811083123426,
"loss": 2.4858,
"step": 3600
},
{
"epoch": 6.2,
"learning_rate": 0.0005722921914357682,
"loss": 2.4552,
"step": 3700
},
{
"epoch": 6.37,
"learning_rate": 0.0005471032745591939,
"loss": 2.4302,
"step": 3800
},
{
"epoch": 6.53,
"learning_rate": 0.0005219143576826196,
"loss": 2.4397,
"step": 3900
},
{
"epoch": 6.7,
"learning_rate": 0.0004967254408060454,
"loss": 2.4339,
"step": 4000
},
{
"epoch": 6.7,
"eval_loss": 1.754757046699524,
"eval_runtime": 442.6863,
"eval_samples_per_second": 23.466,
"eval_steps_per_second": 2.934,
"eval_wer": 0.9752483489649814,
"step": 4000
},
{
"epoch": 6.87,
"learning_rate": 0.00047153652392947104,
"loss": 2.4069,
"step": 4100
},
{
"epoch": 7.04,
"learning_rate": 0.0004463476070528967,
"loss": 2.3863,
"step": 4200
},
{
"epoch": 7.2,
"learning_rate": 0.00042115869017632243,
"loss": 2.3614,
"step": 4300
},
{
"epoch": 7.37,
"learning_rate": 0.0003959697732997481,
"loss": 2.3534,
"step": 4400
},
{
"epoch": 7.54,
"learning_rate": 0.00037078085642317383,
"loss": 2.344,
"step": 4500
},
{
"epoch": 7.54,
"eval_loss": 1.6146422624588013,
"eval_runtime": 432.0313,
"eval_samples_per_second": 24.045,
"eval_steps_per_second": 3.007,
"eval_wer": 0.9638344710213294,
"step": 4500
},
{
"epoch": 7.7,
"learning_rate": 0.0003455919395465995,
"loss": 2.3322,
"step": 4600
},
{
"epoch": 7.87,
"learning_rate": 0.0003204030226700252,
"loss": 2.315,
"step": 4700
},
{
"epoch": 8.04,
"learning_rate": 0.00029521410579345085,
"loss": 2.3035,
"step": 4800
},
{
"epoch": 8.21,
"learning_rate": 0.0002700251889168766,
"loss": 2.2715,
"step": 4900
},
{
"epoch": 8.38,
"learning_rate": 0.00024483627204030224,
"loss": 2.2677,
"step": 5000
},
{
"epoch": 8.38,
"eval_loss": 1.5104962587356567,
"eval_runtime": 431.1839,
"eval_samples_per_second": 24.092,
"eval_steps_per_second": 3.013,
"eval_wer": 0.9499232291840095,
"step": 5000
},
{
"epoch": 8.54,
"learning_rate": 0.00021964735516372797,
"loss": 2.266,
"step": 5100
},
{
"epoch": 8.71,
"learning_rate": 0.00019445843828715364,
"loss": 2.2473,
"step": 5200
},
{
"epoch": 8.88,
"learning_rate": 0.00016926952141057937,
"loss": 2.2419,
"step": 5300
},
{
"epoch": 9.05,
"learning_rate": 0.00014408060453400504,
"loss": 2.2305,
"step": 5400
},
{
"epoch": 9.21,
"learning_rate": 0.00011889168765743074,
"loss": 2.2074,
"step": 5500
},
{
"epoch": 9.21,
"eval_loss": 1.4190884828567505,
"eval_runtime": 440.4999,
"eval_samples_per_second": 23.582,
"eval_steps_per_second": 2.949,
"eval_wer": 0.9356790054942006,
"step": 5500
},
{
"epoch": 9.38,
"learning_rate": 0.000773852922690132,
"loss": 2.3213,
"step": 5600
},
{
"epoch": 9.55,
"learning_rate": 0.0007675675675675676,
"loss": 2.3412,
"step": 5700
},
{
"epoch": 9.71,
"learning_rate": 0.0007612822124450032,
"loss": 2.3508,
"step": 5800
},
{
"epoch": 9.88,
"learning_rate": 0.0007549968573224387,
"loss": 2.3767,
"step": 5900
},
{
"epoch": 10.05,
"learning_rate": 0.0007487115021998742,
"loss": 2.3768,
"step": 6000
},
{
"epoch": 10.05,
"eval_loss": 1.6662662029266357,
"eval_runtime": 424.8561,
"eval_samples_per_second": 24.451,
"eval_steps_per_second": 3.058,
"eval_wer": 0.9664798268494367,
"step": 6000
},
{
"epoch": 10.22,
"learning_rate": 0.0007424261470773099,
"loss": 2.3681,
"step": 6100
},
{
"epoch": 10.39,
"learning_rate": 0.0007361407919547455,
"loss": 2.389,
"step": 6200
},
{
"epoch": 10.55,
"learning_rate": 0.0007299182903834066,
"loss": 2.3671,
"step": 6300
},
{
"epoch": 10.72,
"learning_rate": 0.0007236329352608423,
"loss": 2.3784,
"step": 6400
},
{
"epoch": 10.89,
"learning_rate": 0.0007173475801382778,
"loss": 2.3804,
"step": 6500
},
{
"epoch": 10.89,
"eval_loss": 1.6570764780044556,
"eval_runtime": 413.7975,
"eval_samples_per_second": 25.104,
"eval_steps_per_second": 3.139,
"eval_wer": 0.9719740274155059,
"step": 6500
},
{
"epoch": 11.06,
"learning_rate": 0.0007110622250157134,
"loss": 2.3757,
"step": 6600
},
{
"epoch": 11.22,
"learning_rate": 0.0007047768698931489,
"loss": 2.3487,
"step": 6700
},
{
"epoch": 11.39,
"learning_rate": 0.0006984915147705846,
"loss": 2.3473,
"step": 6800
},
{
"epoch": 11.56,
"learning_rate": 0.0006922061596480202,
"loss": 2.3481,
"step": 6900
},
{
"epoch": 11.72,
"learning_rate": 0.0006859208045254557,
"loss": 2.3237,
"step": 7000
},
{
"epoch": 11.72,
"eval_loss": 1.604884147644043,
"eval_runtime": 412.0866,
"eval_samples_per_second": 25.208,
"eval_steps_per_second": 3.152,
"eval_wer": 0.9637049780786947,
"step": 7000
},
{
"epoch": 11.89,
"learning_rate": 0.0006796354494028913,
"loss": 2.3379,
"step": 7100
},
{
"epoch": 12.06,
"learning_rate": 0.0006733500942803269,
"loss": 2.3362,
"step": 7200
},
{
"epoch": 12.23,
"learning_rate": 0.0006670647391577624,
"loss": 2.3148,
"step": 7300
},
{
"epoch": 12.4,
"learning_rate": 0.000660779384035198,
"loss": 2.3242,
"step": 7400
},
{
"epoch": 12.56,
"learning_rate": 0.0006544940289126335,
"loss": 2.317,
"step": 7500
},
{
"epoch": 12.56,
"eval_loss": 1.5874534845352173,
"eval_runtime": 412.3759,
"eval_samples_per_second": 25.191,
"eval_steps_per_second": 3.15,
"eval_wer": 0.9655363782673845,
"step": 7500
},
{
"epoch": 12.73,
"learning_rate": 0.0006482086737900693,
"loss": 2.3342,
"step": 7600
},
{
"epoch": 12.9,
"learning_rate": 0.0006419233186675048,
"loss": 2.3229,
"step": 7700
},
{
"epoch": 13.07,
"learning_rate": 0.0006356379635449403,
"loss": 2.3128,
"step": 7800
},
{
"epoch": 13.23,
"learning_rate": 0.0006293526084223759,
"loss": 2.2992,
"step": 7900
},
{
"epoch": 13.4,
"learning_rate": 0.0006230672532998114,
"loss": 2.2988,
"step": 8000
},
{
"epoch": 13.4,
"eval_loss": 1.5357071161270142,
"eval_runtime": 411.1707,
"eval_samples_per_second": 25.264,
"eval_steps_per_second": 3.159,
"eval_wer": 0.9603381615701945,
"step": 8000
},
{
"epoch": 13.57,
"learning_rate": 0.000616781898177247,
"loss": 2.3041,
"step": 8100
},
{
"epoch": 13.74,
"learning_rate": 0.0006104965430546826,
"loss": 2.2905,
"step": 8200
},
{
"epoch": 13.9,
"learning_rate": 0.0006043368950345695,
"loss": 2.2946,
"step": 8300
},
{
"epoch": 14.07,
"learning_rate": 0.000598051539912005,
"loss": 2.3022,
"step": 8400
},
{
"epoch": 14.24,
"learning_rate": 0.0005917661847894407,
"loss": 2.2906,
"step": 8500
},
{
"epoch": 14.24,
"eval_loss": 1.5637153387069702,
"eval_runtime": 411.7098,
"eval_samples_per_second": 25.231,
"eval_steps_per_second": 3.155,
"eval_wer": 0.9592097230700927,
"step": 8500
},
{
"epoch": 14.41,
"learning_rate": 0.0005854808296668762,
"loss": 2.2918,
"step": 8600
},
{
"epoch": 14.57,
"learning_rate": 0.0005791954745443117,
"loss": 2.2805,
"step": 8700
},
{
"epoch": 14.74,
"learning_rate": 0.0005729101194217473,
"loss": 2.2951,
"step": 8800
},
{
"epoch": 14.91,
"learning_rate": 0.0005666247642991829,
"loss": 2.2876,
"step": 8900
},
{
"epoch": 15.08,
"learning_rate": 0.0005603394091766186,
"loss": 2.2848,
"step": 9000
},
{
"epoch": 15.08,
"eval_loss": 1.5325744152069092,
"eval_runtime": 411.6076,
"eval_samples_per_second": 25.238,
"eval_steps_per_second": 3.156,
"eval_wer": 0.9537340214958285,
"step": 9000
},
{
"epoch": 15.24,
"learning_rate": 0.0005540540540540541,
"loss": 2.2537,
"step": 9100
},
{
"epoch": 15.41,
"learning_rate": 0.0005477686989314896,
"loss": 2.2504,
"step": 9200
},
{
"epoch": 15.58,
"learning_rate": 0.0005414833438089252,
"loss": 2.2542,
"step": 9300
},
{
"epoch": 15.75,
"learning_rate": 0.0005351979886863608,
"loss": 2.2455,
"step": 9400
},
{
"epoch": 15.91,
"learning_rate": 0.0005289126335637963,
"loss": 2.2381,
"step": 9500
},
{
"epoch": 15.91,
"eval_loss": 1.563069462776184,
"eval_runtime": 410.1326,
"eval_samples_per_second": 25.328,
"eval_steps_per_second": 3.167,
"eval_wer": 0.9508296797824518,
"step": 9500
},
{
"epoch": 16.08,
"learning_rate": 0.0005226272784412319,
"loss": 2.2406,
"step": 9600
},
{
"epoch": 16.25,
"learning_rate": 0.0005163419233186674,
"loss": 2.2265,
"step": 9700
},
{
"epoch": 16.42,
"learning_rate": 0.0005100565681961032,
"loss": 2.2221,
"step": 9800
},
{
"epoch": 16.58,
"learning_rate": 0.0005037712130735387,
"loss": 2.2122,
"step": 9900
},
{
"epoch": 16.75,
"learning_rate": 0.0004974858579509742,
"loss": 2.2072,
"step": 10000
},
{
"epoch": 16.75,
"eval_loss": 1.4565062522888184,
"eval_runtime": 409.2254,
"eval_samples_per_second": 25.385,
"eval_steps_per_second": 3.174,
"eval_wer": 0.9395452947814344,
"step": 10000
},
{
"epoch": 16.92,
"learning_rate": 0.0004912005028284098,
"loss": 2.1876,
"step": 10100
},
{
"epoch": 17.09,
"learning_rate": 0.00048491514770584537,
"loss": 2.2144,
"step": 10200
},
{
"epoch": 17.25,
"learning_rate": 0.00047862979258328096,
"loss": 2.1943,
"step": 10300
},
{
"epoch": 17.42,
"learning_rate": 0.00047234443746071655,
"loss": 2.1901,
"step": 10400
},
{
"epoch": 17.59,
"learning_rate": 0.00046605908233815214,
"loss": 2.197,
"step": 10500
},
{
"epoch": 17.59,
"eval_loss": 1.430406093597412,
"eval_runtime": 410.1605,
"eval_samples_per_second": 25.327,
"eval_steps_per_second": 3.167,
"eval_wer": 0.9405997373143163,
"step": 10500
},
{
"epoch": 17.76,
"learning_rate": 0.0004597737272155877,
"loss": 2.1872,
"step": 10600
},
{
"epoch": 17.92,
"learning_rate": 0.0004534883720930232,
"loss": 2.2033,
"step": 10700
},
{
"epoch": 18.09,
"learning_rate": 0.00044720301697045886,
"loss": 2.1865,
"step": 10800
},
{
"epoch": 18.26,
"learning_rate": 0.0004409176618478944,
"loss": 2.194,
"step": 10900
},
{
"epoch": 18.43,
"learning_rate": 0.00043463230672533,
"loss": 2.198,
"step": 11000
},
{
"epoch": 18.43,
"eval_loss": 1.423040747642517,
"eval_runtime": 411.9246,
"eval_samples_per_second": 25.218,
"eval_steps_per_second": 3.153,
"eval_wer": 0.9382318663632832,
"step": 11000
},
{
"epoch": 18.59,
"learning_rate": 0.0004283469516027655,
"loss": 2.1784,
"step": 11100
},
{
"epoch": 18.76,
"learning_rate": 0.00042206159648020117,
"loss": 2.1739,
"step": 11200
},
{
"epoch": 18.93,
"learning_rate": 0.0004157762413576367,
"loss": 2.1686,
"step": 11300
},
{
"epoch": 19.1,
"learning_rate": 0.0004094908862350723,
"loss": 2.1639,
"step": 11400
},
{
"epoch": 19.26,
"learning_rate": 0.00040320553111250783,
"loss": 2.1668,
"step": 11500
},
{
"epoch": 19.26,
"eval_loss": 1.3998422622680664,
"eval_runtime": 412.8679,
"eval_samples_per_second": 25.161,
"eval_steps_per_second": 3.146,
"eval_wer": 0.9314982333462827,
"step": 11500
},
{
"epoch": 19.43,
"learning_rate": 0.00039692017598994347,
"loss": 2.1694,
"step": 11600
},
{
"epoch": 19.6,
"learning_rate": 0.000390634820867379,
"loss": 2.1492,
"step": 11700
},
{
"epoch": 19.77,
"learning_rate": 0.0003843494657448146,
"loss": 2.1465,
"step": 11800
},
{
"epoch": 19.93,
"learning_rate": 0.00037806411062225013,
"loss": 2.1484,
"step": 11900
},
{
"epoch": 20.1,
"learning_rate": 0.0003718416090509114,
"loss": 2.1498,
"step": 12000
},
{
"epoch": 20.1,
"eval_loss": 1.3919602632522583,
"eval_runtime": 412.6773,
"eval_samples_per_second": 25.172,
"eval_steps_per_second": 3.148,
"eval_wer": 0.9257635458867491,
"step": 12000
},
{
"epoch": 20.27,
"learning_rate": 0.00036555625392834694,
"loss": 2.1295,
"step": 12100
},
{
"epoch": 20.44,
"learning_rate": 0.00035927089880578253,
"loss": 2.1346,
"step": 12200
},
{
"epoch": 20.6,
"learning_rate": 0.0003529855436832181,
"loss": 2.1227,
"step": 12300
},
{
"epoch": 20.77,
"learning_rate": 0.00034670018856065366,
"loss": 2.1205,
"step": 12400
},
{
"epoch": 20.94,
"learning_rate": 0.00034041483343808925,
"loss": 2.1244,
"step": 12500
},
{
"epoch": 20.94,
"eval_loss": 1.3584457635879517,
"eval_runtime": 410.923,
"eval_samples_per_second": 25.28,
"eval_steps_per_second": 3.161,
"eval_wer": 0.9152561185415394,
"step": 12500
},
{
"epoch": 21.11,
"learning_rate": 0.00033412947831552484,
"loss": 2.1163,
"step": 12600
},
{
"epoch": 21.27,
"learning_rate": 0.00032784412319296043,
"loss": 2.1141,
"step": 12700
},
{
"epoch": 21.44,
"learning_rate": 0.00032155876807039597,
"loss": 2.1122,
"step": 12800
},
{
"epoch": 21.61,
"learning_rate": 0.00031527341294783156,
"loss": 2.0937,
"step": 12900
},
{
"epoch": 21.78,
"learning_rate": 0.00030898805782526715,
"loss": 2.0953,
"step": 13000
},
{
"epoch": 21.78,
"eval_loss": 1.327351450920105,
"eval_runtime": 411.8656,
"eval_samples_per_second": 25.222,
"eval_steps_per_second": 3.154,
"eval_wer": 0.905377656917698,
"step": 13000
},
{
"epoch": 21.94,
"learning_rate": 0.00030270270270270274,
"loss": 2.096,
"step": 13100
},
{
"epoch": 22.11,
"learning_rate": 0.0002964173475801383,
"loss": 2.1102,
"step": 13200
},
{
"epoch": 22.28,
"learning_rate": 0.00029013199245757386,
"loss": 2.0892,
"step": 13300
},
{
"epoch": 22.45,
"learning_rate": 0.00028384663733500945,
"loss": 2.0805,
"step": 13400
},
{
"epoch": 22.61,
"learning_rate": 0.00027756128221244504,
"loss": 2.0762,
"step": 13500
},
{
"epoch": 22.61,
"eval_loss": 1.2932939529418945,
"eval_runtime": 410.2802,
"eval_samples_per_second": 25.319,
"eval_steps_per_second": 3.166,
"eval_wer": 0.9073015520654124,
"step": 13500
},
{
"epoch": 22.78,
"learning_rate": 0.0002712759270898806,
"loss": 2.0867,
"step": 13600
},
{
"epoch": 22.95,
"learning_rate": 0.00026499057196731617,
"loss": 2.0757,
"step": 13700
},
{
"epoch": 23.12,
"learning_rate": 0.00025870521684475176,
"loss": 2.0883,
"step": 13800
},
{
"epoch": 23.28,
"learning_rate": 0.0002524198617221873,
"loss": 2.0696,
"step": 13900
},
{
"epoch": 23.45,
"learning_rate": 0.0002461345065996229,
"loss": 2.0587,
"step": 14000
},
{
"epoch": 23.45,
"eval_loss": 1.2515921592712402,
"eval_runtime": 410.7551,
"eval_samples_per_second": 25.29,
"eval_steps_per_second": 3.162,
"eval_wer": 0.8944447527609746,
"step": 14000
},
{
"epoch": 23.62,
"learning_rate": 0.00023984915147705848,
"loss": 2.0661,
"step": 14100
},
{
"epoch": 23.79,
"learning_rate": 0.00023356379635449404,
"loss": 2.0529,
"step": 14200
},
{
"epoch": 23.95,
"learning_rate": 0.00022727844123192963,
"loss": 2.0509,
"step": 14300
},
{
"epoch": 24.12,
"learning_rate": 0.0002209930861093652,
"loss": 2.0481,
"step": 14400
},
{
"epoch": 24.29,
"learning_rate": 0.00021470773098680078,
"loss": 2.0363,
"step": 14500
},
{
"epoch": 24.29,
"eval_loss": 1.2214268445968628,
"eval_runtime": 412.9112,
"eval_samples_per_second": 25.158,
"eval_steps_per_second": 3.146,
"eval_wer": 0.8901529866622269,
"step": 14500
},
{
"epoch": 24.46,
"learning_rate": 0.00020842237586423635,
"loss": 2.0412,
"step": 14600
},
{
"epoch": 24.62,
"learning_rate": 0.00020213702074167188,
"loss": 2.0264,
"step": 14700
},
{
"epoch": 24.79,
"learning_rate": 0.00019585166561910747,
"loss": 2.0373,
"step": 14800
},
{
"epoch": 24.96,
"learning_rate": 0.00018956631049654304,
"loss": 2.0373,
"step": 14900
},
{
"epoch": 25.13,
"learning_rate": 0.00018328095537397863,
"loss": 2.0302,
"step": 15000
},
{
"epoch": 25.13,
"eval_loss": 1.2087428569793701,
"eval_runtime": 412.179,
"eval_samples_per_second": 25.203,
"eval_steps_per_second": 3.152,
"eval_wer": 0.8871191520062157,
"step": 15000
},
{
"epoch": 25.29,
"learning_rate": 0.0001769956002514142,
"loss": 2.0109,
"step": 15100
},
{
"epoch": 25.46,
"learning_rate": 0.00017071024512884978,
"loss": 2.0215,
"step": 15200
},
{
"epoch": 25.63,
"learning_rate": 0.00016442489000628534,
"loss": 2.0137,
"step": 15300
},
{
"epoch": 25.8,
"learning_rate": 0.00015813953488372093,
"loss": 2.0084,
"step": 15400
},
{
"epoch": 25.96,
"learning_rate": 0.0001518541797611565,
"loss": 2.0071,
"step": 15500
},
{
"epoch": 25.96,
"eval_loss": 1.1953096389770508,
"eval_runtime": 413.1745,
"eval_samples_per_second": 25.142,
"eval_steps_per_second": 3.144,
"eval_wer": 0.8785726177923303,
"step": 15500
},
{
"epoch": 26.13,
"learning_rate": 0.00014556882463859208,
"loss": 2.0112,
"step": 15600
},
{
"epoch": 26.3,
"learning_rate": 0.00013928346951602765,
"loss": 2.0077,
"step": 15700
},
{
"epoch": 26.47,
"learning_rate": 0.00013299811439346324,
"loss": 2.0052,
"step": 15800
},
{
"epoch": 26.63,
"learning_rate": 0.0001267127592708988,
"loss": 1.9947,
"step": 15900
},
{
"epoch": 26.8,
"learning_rate": 0.00012042740414833438,
"loss": 1.9882,
"step": 16000
},
{
"epoch": 26.8,
"eval_loss": 1.1737616062164307,
"eval_runtime": 414.4584,
"eval_samples_per_second": 25.064,
"eval_steps_per_second": 3.134,
"eval_wer": 0.8711730210703517,
"step": 16000
},
{
"epoch": 26.97,
"learning_rate": 0.0001142049025769956,
"loss": 1.9926,
"step": 16100
},
{
"epoch": 27.14,
"learning_rate": 0.00010791954745443117,
"loss": 2.0032,
"step": 16200
},
{
"epoch": 27.3,
"learning_rate": 0.00010163419233186675,
"loss": 1.996,
"step": 16300
},
{
"epoch": 27.47,
"learning_rate": 9.534883720930233e-05,
"loss": 1.9746,
"step": 16400
},
{
"epoch": 27.64,
"learning_rate": 8.906348208673789e-05,
"loss": 1.9772,
"step": 16500
},
{
"epoch": 27.64,
"eval_loss": 1.164720892906189,
"eval_runtime": 425.7197,
"eval_samples_per_second": 24.401,
"eval_steps_per_second": 3.051,
"eval_wer": 0.867214236824093,
"step": 16500
},
{
"epoch": 27.81,
"learning_rate": 8.277812696417347e-05,
"loss": 1.9759,
"step": 16600
},
{
"epoch": 27.97,
"learning_rate": 7.649277184160904e-05,
"loss": 1.9657,
"step": 16700
},
{
"epoch": 28.14,
"learning_rate": 7.020741671904462e-05,
"loss": 1.9806,
"step": 16800
},
{
"epoch": 28.31,
"learning_rate": 6.39220615964802e-05,
"loss": 1.9802,
"step": 16900
},
{
"epoch": 28.48,
"learning_rate": 5.763670647391578e-05,
"loss": 1.9585,
"step": 17000
},
{
"epoch": 28.48,
"eval_loss": 1.1459153890609741,
"eval_runtime": 417.3472,
"eval_samples_per_second": 24.891,
"eval_steps_per_second": 3.113,
"eval_wer": 0.8634774404794938,
"step": 17000
},
{
"epoch": 28.64,
"learning_rate": 5.135135135135136e-05,
"loss": 1.9573,
"step": 17100
},
{
"epoch": 28.81,
"learning_rate": 4.506599622878693e-05,
"loss": 1.962,
"step": 17200
},
{
"epoch": 28.98,
"learning_rate": 3.8780641106222504e-05,
"loss": 1.9653,
"step": 17300
},
{
"epoch": 29.15,
"learning_rate": 3.249528598365808e-05,
"loss": 1.9631,
"step": 17400
},
{
"epoch": 29.31,
"learning_rate": 2.620993086109365e-05,
"loss": 1.944,
"step": 17500
},
{
"epoch": 29.31,
"eval_loss": 1.1414194107055664,
"eval_runtime": 414.6007,
"eval_samples_per_second": 25.055,
"eval_steps_per_second": 3.133,
"eval_wer": 0.8616275412989992,
"step": 17500
},
{
"epoch": 29.48,
"learning_rate": 1.9924575738529227e-05,
"loss": 1.9547,
"step": 17600
},
{
"epoch": 29.65,
"learning_rate": 1.3639220615964803e-05,
"loss": 1.9557,
"step": 17700
},
{
"epoch": 29.82,
"learning_rate": 7.353865493400377e-06,
"loss": 1.9464,
"step": 17800
},
{
"epoch": 29.98,
"learning_rate": 1.0685103708359522e-06,
"loss": 1.9654,
"step": 17900
},
{
"epoch": 30.0,
"step": 17910,
"total_flos": 3.015307099908152e+20,
"train_loss": 1.4937853462266097,
"train_runtime": 52137.1608,
"train_samples_per_second": 21.986,
"train_steps_per_second": 0.344
}
],
"max_steps": 17910,
"num_train_epochs": 30,
"total_flos": 3.015307099908152e+20,
"trial_name": null,
"trial_params": null
}