|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 100, |
|
"global_step": 1160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 9.273743629455566, |
|
"learning_rate": 9e-07, |
|
"loss": 9.21, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 10.659689903259277, |
|
"learning_rate": 1.9e-06, |
|
"loss": 9.0596, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 13.765968322753906, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 8.6308, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 13.644205093383789, |
|
"learning_rate": 3.8e-06, |
|
"loss": 7.9066, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 18.20844078063965, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 7.8077, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 18.099712371826172, |
|
"learning_rate": 5.8e-06, |
|
"loss": 6.6628, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 10.263163566589355, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 4.6321, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 4.330440998077393, |
|
"learning_rate": 7.8e-06, |
|
"loss": 3.9137, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 1.776586651802063, |
|
"learning_rate": 8.8e-06, |
|
"loss": 3.3387, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 4.104515075683594, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 3.158, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"eval_loss": 3.6802542209625244, |
|
"eval_runtime": 14.7124, |
|
"eval_samples_per_second": 91.352, |
|
"eval_steps_per_second": 2.855, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 0.8036434650421143, |
|
"learning_rate": 1.08e-05, |
|
"loss": 3.0952, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 0.9299842119216919, |
|
"learning_rate": 1.18e-05, |
|
"loss": 3.0977, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 2.4941225051879883, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 3.0263, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 1.423953652381897, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 3.0224, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 1.3000880479812622, |
|
"learning_rate": 1.48e-05, |
|
"loss": 3.0211, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 1.0403013229370117, |
|
"learning_rate": 1.58e-05, |
|
"loss": 3.0063, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.9310344827586206, |
|
"grad_norm": 0.8328425884246826, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 2.9927, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.103448275862069, |
|
"grad_norm": 1.010672926902771, |
|
"learning_rate": 1.78e-05, |
|
"loss": 3.0028, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.2758620689655173, |
|
"grad_norm": 2.069948434829712, |
|
"learning_rate": 1.88e-05, |
|
"loss": 2.9785, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"grad_norm": 1.1152503490447998, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 2.9744, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"eval_loss": 3.1165249347686768, |
|
"eval_runtime": 14.3806, |
|
"eval_samples_per_second": 93.459, |
|
"eval_steps_per_second": 2.921, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6206896551724137, |
|
"grad_norm": 1.179228663444519, |
|
"learning_rate": 2.08e-05, |
|
"loss": 2.9759, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.793103448275862, |
|
"grad_norm": 0.9685543179512024, |
|
"learning_rate": 2.18e-05, |
|
"loss": 2.9684, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.9655172413793105, |
|
"grad_norm": 0.710486114025116, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 2.9692, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 0.4123358726501465, |
|
"learning_rate": 2.38e-05, |
|
"loss": 2.9609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"grad_norm": 1.9703031778335571, |
|
"learning_rate": 2.48e-05, |
|
"loss": 2.9573, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.482758620689655, |
|
"grad_norm": 1.264237403869629, |
|
"learning_rate": 2.58e-05, |
|
"loss": 2.952, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.655172413793103, |
|
"grad_norm": 0.4067547619342804, |
|
"learning_rate": 2.6800000000000004e-05, |
|
"loss": 2.9432, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.827586206896552, |
|
"grad_norm": 0.5488032698631287, |
|
"learning_rate": 2.7800000000000005e-05, |
|
"loss": 2.9385, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3628854751586914, |
|
"learning_rate": 2.88e-05, |
|
"loss": 2.9384, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"grad_norm": 0.5179504752159119, |
|
"learning_rate": 2.98e-05, |
|
"loss": 2.9266, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"eval_loss": 3.017453908920288, |
|
"eval_runtime": 14.4537, |
|
"eval_samples_per_second": 92.987, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.344827586206897, |
|
"grad_norm": 1.5788885354995728, |
|
"learning_rate": 3.08e-05, |
|
"loss": 2.9229, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.517241379310345, |
|
"grad_norm": 1.777976155281067, |
|
"learning_rate": 3.18e-05, |
|
"loss": 2.8996, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.689655172413794, |
|
"grad_norm": 0.7765872478485107, |
|
"learning_rate": 3.2800000000000004e-05, |
|
"loss": 2.8698, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.862068965517241, |
|
"grad_norm": 1.5203367471694946, |
|
"learning_rate": 3.38e-05, |
|
"loss": 2.8412, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.0344827586206895, |
|
"grad_norm": 3.4122729301452637, |
|
"learning_rate": 3.48e-05, |
|
"loss": 2.7877, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.206896551724138, |
|
"grad_norm": 1.2957384586334229, |
|
"learning_rate": 3.58e-05, |
|
"loss": 2.6514, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.379310344827586, |
|
"grad_norm": 1.9932421445846558, |
|
"learning_rate": 3.68e-05, |
|
"loss": 2.5492, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.551724137931035, |
|
"grad_norm": 2.7136945724487305, |
|
"learning_rate": 3.7800000000000004e-05, |
|
"loss": 2.3894, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.724137931034483, |
|
"grad_norm": 3.479182720184326, |
|
"learning_rate": 3.88e-05, |
|
"loss": 2.2604, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.896551724137931, |
|
"grad_norm": 1.5610332489013672, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 2.1336, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.896551724137931, |
|
"eval_loss": 2.2134621143341064, |
|
"eval_runtime": 14.4845, |
|
"eval_samples_per_second": 92.789, |
|
"eval_steps_per_second": 2.9, |
|
"eval_wer": 1.011654374943094, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.068965517241379, |
|
"grad_norm": 1.7063915729522705, |
|
"learning_rate": 4.08e-05, |
|
"loss": 2.0117, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.241379310344827, |
|
"grad_norm": 2.4562268257141113, |
|
"learning_rate": 4.18e-05, |
|
"loss": 1.827, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.413793103448276, |
|
"grad_norm": 2.533010482788086, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 1.6851, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.586206896551724, |
|
"grad_norm": 3.3842196464538574, |
|
"learning_rate": 4.38e-05, |
|
"loss": 1.5765, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.758620689655173, |
|
"grad_norm": 4.379862308502197, |
|
"learning_rate": 4.4800000000000005e-05, |
|
"loss": 1.4748, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.931034482758621, |
|
"grad_norm": 2.6355247497558594, |
|
"learning_rate": 4.58e-05, |
|
"loss": 1.3422, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.10344827586207, |
|
"grad_norm": 3.2931857109069824, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 1.2681, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.275862068965518, |
|
"grad_norm": 1.8078395128250122, |
|
"learning_rate": 4.78e-05, |
|
"loss": 1.1503, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.448275862068966, |
|
"grad_norm": 2.258090019226074, |
|
"learning_rate": 4.88e-05, |
|
"loss": 1.0564, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"grad_norm": 2.1892685890197754, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 1.0119, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"eval_loss": 1.022745966911316, |
|
"eval_runtime": 14.558, |
|
"eval_samples_per_second": 92.321, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.825093326049349, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.793103448275861, |
|
"grad_norm": 1.7447644472122192, |
|
"learning_rate": 5.08e-05, |
|
"loss": 0.9426, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.96551724137931, |
|
"grad_norm": 2.7097115516662598, |
|
"learning_rate": 5.1800000000000005e-05, |
|
"loss": 0.8773, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.137931034482758, |
|
"grad_norm": 1.6821448802947998, |
|
"learning_rate": 5.28e-05, |
|
"loss": 0.7822, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.310344827586206, |
|
"grad_norm": 1.5076041221618652, |
|
"learning_rate": 5.380000000000001e-05, |
|
"loss": 0.7334, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.482758620689655, |
|
"grad_norm": 2.951249361038208, |
|
"learning_rate": 5.4800000000000004e-05, |
|
"loss": 0.6852, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.655172413793103, |
|
"grad_norm": 1.5876814126968384, |
|
"learning_rate": 5.580000000000001e-05, |
|
"loss": 0.653, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.827586206896552, |
|
"grad_norm": 2.462275743484497, |
|
"learning_rate": 5.68e-05, |
|
"loss": 0.6415, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.9461971521377563, |
|
"learning_rate": 5.7799999999999995e-05, |
|
"loss": 0.6092, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.172413793103448, |
|
"grad_norm": 1.474002480506897, |
|
"learning_rate": 5.88e-05, |
|
"loss": 0.5086, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.344827586206897, |
|
"grad_norm": 1.8599964380264282, |
|
"learning_rate": 5.9800000000000003e-05, |
|
"loss": 0.4995, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.344827586206897, |
|
"eval_loss": 0.7699851989746094, |
|
"eval_runtime": 14.4827, |
|
"eval_samples_per_second": 92.8, |
|
"eval_steps_per_second": 2.9, |
|
"eval_wer": 0.6573795866338887, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.517241379310345, |
|
"grad_norm": 1.4216625690460205, |
|
"learning_rate": 6.08e-05, |
|
"loss": 0.4884, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.689655172413794, |
|
"grad_norm": 2.1162352561950684, |
|
"learning_rate": 6.18e-05, |
|
"loss": 0.4398, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.862068965517242, |
|
"grad_norm": 1.4033368825912476, |
|
"learning_rate": 6.280000000000001e-05, |
|
"loss": 0.4458, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.03448275862069, |
|
"grad_norm": 1.6064072847366333, |
|
"learning_rate": 6.38e-05, |
|
"loss": 0.4414, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.206896551724139, |
|
"grad_norm": 2.22825288772583, |
|
"learning_rate": 6.48e-05, |
|
"loss": 0.3528, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.379310344827585, |
|
"grad_norm": 1.4636731147766113, |
|
"learning_rate": 6.58e-05, |
|
"loss": 0.3699, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.551724137931034, |
|
"grad_norm": 1.3389075994491577, |
|
"learning_rate": 6.680000000000001e-05, |
|
"loss": 0.3621, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.724137931034482, |
|
"grad_norm": 2.6080307960510254, |
|
"learning_rate": 6.780000000000001e-05, |
|
"loss": 0.338, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 11.89655172413793, |
|
"grad_norm": 1.3124964237213135, |
|
"learning_rate": 6.879999999999999e-05, |
|
"loss": 0.3143, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.068965517241379, |
|
"grad_norm": 1.1012139320373535, |
|
"learning_rate": 6.98e-05, |
|
"loss": 0.3233, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.068965517241379, |
|
"eval_loss": 0.49695342779159546, |
|
"eval_runtime": 14.4504, |
|
"eval_samples_per_second": 93.008, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 0.5240826732222525, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.241379310344827, |
|
"grad_norm": 2.3211851119995117, |
|
"learning_rate": 7.08e-05, |
|
"loss": 0.2827, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.413793103448276, |
|
"grad_norm": 1.2446343898773193, |
|
"learning_rate": 7.18e-05, |
|
"loss": 0.2478, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.586206896551724, |
|
"grad_norm": 1.1632747650146484, |
|
"learning_rate": 7.280000000000001e-05, |
|
"loss": 0.2761, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.758620689655173, |
|
"grad_norm": 1.39242684841156, |
|
"learning_rate": 7.38e-05, |
|
"loss": 0.2893, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.931034482758621, |
|
"grad_norm": 1.546246886253357, |
|
"learning_rate": 7.48e-05, |
|
"loss": 0.2382, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.10344827586207, |
|
"grad_norm": 1.10295832157135, |
|
"learning_rate": 7.58e-05, |
|
"loss": 0.2563, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.275862068965518, |
|
"grad_norm": 1.3670897483825684, |
|
"learning_rate": 7.680000000000001e-05, |
|
"loss": 0.2438, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.448275862068966, |
|
"grad_norm": 1.2440053224563599, |
|
"learning_rate": 7.780000000000001e-05, |
|
"loss": 0.2149, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.620689655172415, |
|
"grad_norm": 1.1088565587997437, |
|
"learning_rate": 7.88e-05, |
|
"loss": 0.2341, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 13.793103448275861, |
|
"grad_norm": 1.2869517803192139, |
|
"learning_rate": 7.98e-05, |
|
"loss": 0.2452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.793103448275861, |
|
"eval_loss": 0.4584949016571045, |
|
"eval_runtime": 14.4542, |
|
"eval_samples_per_second": 92.983, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 0.49084949467358646, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.96551724137931, |
|
"grad_norm": 1.8122053146362305, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 0.2153, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.137931034482758, |
|
"grad_norm": 1.5954018831253052, |
|
"learning_rate": 8.18e-05, |
|
"loss": 0.1834, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.310344827586206, |
|
"grad_norm": 1.2168644666671753, |
|
"learning_rate": 8.28e-05, |
|
"loss": 0.2001, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.482758620689655, |
|
"grad_norm": 2.418687343597412, |
|
"learning_rate": 8.38e-05, |
|
"loss": 0.2027, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.655172413793103, |
|
"grad_norm": 4.489363670349121, |
|
"learning_rate": 8.48e-05, |
|
"loss": 0.1739, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.827586206896552, |
|
"grad_norm": 1.143477201461792, |
|
"learning_rate": 8.58e-05, |
|
"loss": 0.2029, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1375572681427002, |
|
"learning_rate": 8.680000000000001e-05, |
|
"loss": 0.2061, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.172413793103448, |
|
"grad_norm": 1.596352458000183, |
|
"learning_rate": 8.78e-05, |
|
"loss": 0.1517, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.344827586206897, |
|
"grad_norm": 1.0544416904449463, |
|
"learning_rate": 8.88e-05, |
|
"loss": 0.1687, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.517241379310345, |
|
"grad_norm": 0.9486349821090698, |
|
"learning_rate": 8.98e-05, |
|
"loss": 0.181, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.517241379310345, |
|
"eval_loss": 0.4625524878501892, |
|
"eval_runtime": 14.6126, |
|
"eval_samples_per_second": 91.975, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.4813803150323227, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.689655172413794, |
|
"grad_norm": 2.3056046962738037, |
|
"learning_rate": 9.080000000000001e-05, |
|
"loss": 0.1855, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 15.862068965517242, |
|
"grad_norm": 1.6048717498779297, |
|
"learning_rate": 9.180000000000001e-05, |
|
"loss": 0.1783, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.03448275862069, |
|
"grad_norm": 0.8028109073638916, |
|
"learning_rate": 9.28e-05, |
|
"loss": 0.1877, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.20689655172414, |
|
"grad_norm": 1.15639066696167, |
|
"learning_rate": 9.38e-05, |
|
"loss": 0.1278, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.379310344827587, |
|
"grad_norm": 1.2636216878890991, |
|
"learning_rate": 9.48e-05, |
|
"loss": 0.1544, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.551724137931036, |
|
"grad_norm": 1.0739763975143433, |
|
"learning_rate": 9.58e-05, |
|
"loss": 0.1659, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 16.724137931034484, |
|
"grad_norm": 1.9558533430099487, |
|
"learning_rate": 9.680000000000001e-05, |
|
"loss": 0.1622, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 16.896551724137932, |
|
"grad_norm": 1.494171142578125, |
|
"learning_rate": 9.78e-05, |
|
"loss": 0.1448, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.06896551724138, |
|
"grad_norm": 0.8110372424125671, |
|
"learning_rate": 9.88e-05, |
|
"loss": 0.1522, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.24137931034483, |
|
"grad_norm": 1.800034761428833, |
|
"learning_rate": 9.98e-05, |
|
"loss": 0.1419, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.24137931034483, |
|
"eval_loss": 0.4916817247867584, |
|
"eval_runtime": 14.4094, |
|
"eval_samples_per_second": 93.273, |
|
"eval_steps_per_second": 2.915, |
|
"eval_wer": 0.4774651734498771, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.413793103448278, |
|
"grad_norm": 1.428348422050476, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.1284, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.586206896551722, |
|
"grad_norm": 1.0604817867279053, |
|
"learning_rate": 8.875e-05, |
|
"loss": 0.1525, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 17.75862068965517, |
|
"grad_norm": 0.9464378952980042, |
|
"learning_rate": 8.25e-05, |
|
"loss": 0.1326, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 17.93103448275862, |
|
"grad_norm": 1.1999770402908325, |
|
"learning_rate": 7.625e-05, |
|
"loss": 0.1266, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.103448275862068, |
|
"grad_norm": 0.815773606300354, |
|
"learning_rate": 7e-05, |
|
"loss": 0.118, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.275862068965516, |
|
"grad_norm": 0.8608018755912781, |
|
"learning_rate": 6.375e-05, |
|
"loss": 0.1203, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.448275862068964, |
|
"grad_norm": 1.4489638805389404, |
|
"learning_rate": 5.7499999999999995e-05, |
|
"loss": 0.1175, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 18.620689655172413, |
|
"grad_norm": 0.9969916939735413, |
|
"learning_rate": 5.125e-05, |
|
"loss": 0.1075, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 18.79310344827586, |
|
"grad_norm": 1.1026653051376343, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.109, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 18.96551724137931, |
|
"grad_norm": 1.557403802871704, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.1175, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 18.96551724137931, |
|
"eval_loss": 0.42788705229759216, |
|
"eval_runtime": 14.5391, |
|
"eval_samples_per_second": 92.441, |
|
"eval_steps_per_second": 2.889, |
|
"eval_wer": 0.4359464627151052, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.137931034482758, |
|
"grad_norm": 0.6065557599067688, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0727, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.310344827586206, |
|
"grad_norm": 0.7714155912399292, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.0907, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.482758620689655, |
|
"grad_norm": 1.672736644744873, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0953, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 19.655172413793103, |
|
"grad_norm": 0.798320472240448, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.084, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 19.82758620689655, |
|
"grad_norm": 0.8147380352020264, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0907, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.1494929790496826, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0897, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1160, |
|
"total_flos": 2.2493941126178012e+18, |
|
"train_loss": 1.5806312146885642, |
|
"train_runtime": 1021.9437, |
|
"train_samples_per_second": 72.333, |
|
"train_steps_per_second": 1.135 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2493941126178012e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|