|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.999886413115499, |
|
"global_step": 26410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9999242711094284e-05, |
|
"loss": 1.3163, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9963650132525562e-05, |
|
"loss": 1.0093, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.99265429761454e-05, |
|
"loss": 0.8939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9888678530859524e-05, |
|
"loss": 0.8368, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.985081408557365e-05, |
|
"loss": 0.8794, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9812949640287772e-05, |
|
"loss": 0.843, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9775085195001894e-05, |
|
"loss": 0.8488, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9737220749716017e-05, |
|
"loss": 0.8465, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9699356304430143e-05, |
|
"loss": 0.8591, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9661491859144265e-05, |
|
"loss": 0.8978, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9623627413858387e-05, |
|
"loss": 0.8035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9585762968572513e-05, |
|
"loss": 0.8153, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9547898523286635e-05, |
|
"loss": 0.8079, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9510034078000758e-05, |
|
"loss": 0.8092, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.947216963271488e-05, |
|
"loss": 0.8294, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9434305187429006e-05, |
|
"loss": 0.8099, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.939644074214313e-05, |
|
"loss": 0.85, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.935857629685725e-05, |
|
"loss": 0.8509, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9320711851571376e-05, |
|
"loss": 0.8229, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.92828474062855e-05, |
|
"loss": 0.8251, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9244982960999625e-05, |
|
"loss": 0.8116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9207118515713747e-05, |
|
"loss": 0.8277, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.916925407042787e-05, |
|
"loss": 0.8004, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9131389625141995e-05, |
|
"loss": 0.8347, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9093525179856118e-05, |
|
"loss": 0.8103, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.905566073457024e-05, |
|
"loss": 0.8566, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9017796289284362e-05, |
|
"loss": 0.7921, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8979931843998488e-05, |
|
"loss": 0.7959, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.894206739871261e-05, |
|
"loss": 0.7827, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8904202953426733e-05, |
|
"loss": 0.785, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.886633850814086e-05, |
|
"loss": 0.8061, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.882847406285498e-05, |
|
"loss": 0.7717, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8790609617569103e-05, |
|
"loss": 0.8362, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8752745172283226e-05, |
|
"loss": 0.7975, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.871488072699735e-05, |
|
"loss": 0.8123, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.8677016281711474e-05, |
|
"loss": 0.8526, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8639151836425596e-05, |
|
"loss": 0.808, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8601287391139722e-05, |
|
"loss": 0.7989, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8563422945853844e-05, |
|
"loss": 0.7755, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8525558500567967e-05, |
|
"loss": 0.8467, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8487694055282093e-05, |
|
"loss": 0.8221, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8449829609996215e-05, |
|
"loss": 0.7851, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.841196516471034e-05, |
|
"loss": 0.8159, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.837410071942446e-05, |
|
"loss": 0.8027, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8336236274138586e-05, |
|
"loss": 0.7585, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8298371828852708e-05, |
|
"loss": 0.8287, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8260507383566834e-05, |
|
"loss": 0.8127, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8222642938280956e-05, |
|
"loss": 0.8015, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.818477849299508e-05, |
|
"loss": 0.8228, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8146914047709204e-05, |
|
"loss": 0.7865, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8109049602423327e-05, |
|
"loss": 0.8093, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.807118515713745e-05, |
|
"loss": 0.7812, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.803332071185157e-05, |
|
"loss": 0.805, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.7995456266565697e-05, |
|
"loss": 0.7844, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.795759182127982e-05, |
|
"loss": 0.7976, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7919727375993942e-05, |
|
"loss": 0.7683, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7881862930708068e-05, |
|
"loss": 0.8157, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.784399848542219e-05, |
|
"loss": 0.7801, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7806134040136312e-05, |
|
"loss": 0.7817, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7768269594850435e-05, |
|
"loss": 0.8286, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.773040514956456e-05, |
|
"loss": 0.7994, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7692540704278683e-05, |
|
"loss": 0.758, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7654676258992805e-05, |
|
"loss": 0.8116, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.761681181370693e-05, |
|
"loss": 0.7762, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7578947368421054e-05, |
|
"loss": 0.7603, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7541082923135176e-05, |
|
"loss": 0.825, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7503218477849302e-05, |
|
"loss": 0.7649, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7465354032563424e-05, |
|
"loss": 0.7886, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.742748958727755e-05, |
|
"loss": 0.796, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7389625141991672e-05, |
|
"loss": 0.7712, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7351760696705795e-05, |
|
"loss": 0.8061, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7313896251419917e-05, |
|
"loss": 0.7691, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7276031806134043e-05, |
|
"loss": 0.8038, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7238167360848165e-05, |
|
"loss": 0.7679, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7200302915562288e-05, |
|
"loss": 0.7909, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7162438470276413e-05, |
|
"loss": 0.8001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7124574024990536e-05, |
|
"loss": 0.7369, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7086709579704658e-05, |
|
"loss": 0.7716, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.704884513441878e-05, |
|
"loss": 0.8003, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7010980689132906e-05, |
|
"loss": 0.7973, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.697311624384703e-05, |
|
"loss": 0.7745, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.693525179856115e-05, |
|
"loss": 0.7266, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6897387353275277e-05, |
|
"loss": 0.7917, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.68595229079894e-05, |
|
"loss": 0.7393, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.682165846270352e-05, |
|
"loss": 0.7822, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6783794017417647e-05, |
|
"loss": 0.7919, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.674592957213177e-05, |
|
"loss": 0.7943, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6708065126845892e-05, |
|
"loss": 0.7731, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6670200681560014e-05, |
|
"loss": 0.7623, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.663233623627414e-05, |
|
"loss": 0.7891, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.6594471790988263e-05, |
|
"loss": 0.7769, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.655660734570239e-05, |
|
"loss": 0.7958, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.651874290041651e-05, |
|
"loss": 0.7555, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6480878455130633e-05, |
|
"loss": 0.7993, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.644301400984476e-05, |
|
"loss": 0.7916, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.640514956455888e-05, |
|
"loss": 0.8101, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6367285119273004e-05, |
|
"loss": 0.8066, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6329420673987126e-05, |
|
"loss": 0.761, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6291556228701252e-05, |
|
"loss": 0.7919, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6253691783415374e-05, |
|
"loss": 0.7484, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.6215827338129497e-05, |
|
"loss": 0.809, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.6177962892843622e-05, |
|
"loss": 0.7683, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6140098447557745e-05, |
|
"loss": 0.7622, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6102234002271867e-05, |
|
"loss": 0.7618, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6064369556985993e-05, |
|
"loss": 0.7687, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.6026505111700115e-05, |
|
"loss": 0.7598, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.5988640666414238e-05, |
|
"loss": 0.7245, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.595077622112836e-05, |
|
"loss": 0.7751, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5912911775842486e-05, |
|
"loss": 0.7903, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5875047330556608e-05, |
|
"loss": 0.7399, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.583718288527073e-05, |
|
"loss": 0.7529, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.5799318439984856e-05, |
|
"loss": 0.7777, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.576145399469898e-05, |
|
"loss": 0.7411, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.5723589549413105e-05, |
|
"loss": 0.7805, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.5685725104127224e-05, |
|
"loss": 0.7585, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.564786065884135e-05, |
|
"loss": 0.7908, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.560999621355547e-05, |
|
"loss": 0.7302, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.5572131768269597e-05, |
|
"loss": 0.7492, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.553426732298372e-05, |
|
"loss": 0.8124, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.5496402877697842e-05, |
|
"loss": 0.7609, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.5458538432411968e-05, |
|
"loss": 0.7383, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.542067398712609e-05, |
|
"loss": 0.7585, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.538356683074593e-05, |
|
"loss": 0.7852, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5345702385460056e-05, |
|
"loss": 0.7737, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5307837940174178e-05, |
|
"loss": 0.7881, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.52699734948883e-05, |
|
"loss": 0.7716, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5232109049602425e-05, |
|
"loss": 0.7478, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5194244604316549e-05, |
|
"loss": 0.7681, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5156380159030673e-05, |
|
"loss": 0.7703, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5118515713744795e-05, |
|
"loss": 0.8031, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.508065126845892e-05, |
|
"loss": 0.7812, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5042786823173042e-05, |
|
"loss": 0.8151, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5004922377887166e-05, |
|
"loss": 0.8384, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4967057932601288e-05, |
|
"loss": 0.7861, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4929193487315412e-05, |
|
"loss": 0.7473, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4891329042029536e-05, |
|
"loss": 0.7741, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4853464596743659e-05, |
|
"loss": 0.799, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4815600151457783e-05, |
|
"loss": 0.7929, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4777735706171905e-05, |
|
"loss": 0.7594, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4739871260886029e-05, |
|
"loss": 0.736, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4702006815600151e-05, |
|
"loss": 0.7608, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4664142370314276e-05, |
|
"loss": 0.7927, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.46262779250284e-05, |
|
"loss": 0.7753, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4588413479742522e-05, |
|
"loss": 0.7476, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4550549034456646e-05, |
|
"loss": 0.7907, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4512684589170768e-05, |
|
"loss": 0.7874, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4474820143884894e-05, |
|
"loss": 0.7626, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4436955698599018e-05, |
|
"loss": 0.7527, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.439909125331314e-05, |
|
"loss": 0.7451, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4361226808027265e-05, |
|
"loss": 0.7615, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.4323362362741387e-05, |
|
"loss": 0.7365, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.4285497917455511e-05, |
|
"loss": 0.7627, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4247633472169634e-05, |
|
"loss": 0.7769, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4209769026883758e-05, |
|
"loss": 0.7253, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4171904581597882e-05, |
|
"loss": 0.7422, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4134040136312004e-05, |
|
"loss": 0.7459, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4096175691026128e-05, |
|
"loss": 0.7355, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.405831124574025e-05, |
|
"loss": 0.7658, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4020446800454375e-05, |
|
"loss": 0.8133, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.3982582355168497e-05, |
|
"loss": 0.7574, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3944717909882621e-05, |
|
"loss": 0.7755, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3906853464596745e-05, |
|
"loss": 0.7434, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3868989019310868e-05, |
|
"loss": 0.7565, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3831124574024992e-05, |
|
"loss": 0.7705, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3793260128739114e-05, |
|
"loss": 0.7699, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3755395683453238e-05, |
|
"loss": 0.7834, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3718288527073081e-05, |
|
"loss": 0.7788, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3680424081787202e-05, |
|
"loss": 0.7329, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3642559636501328e-05, |
|
"loss": 0.7774, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3604695191215448e-05, |
|
"loss": 0.7427, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3566830745929574e-05, |
|
"loss": 0.7482, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.3528966300643698e-05, |
|
"loss": 0.7888, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.349110185535782e-05, |
|
"loss": 0.7535, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3453237410071945e-05, |
|
"loss": 0.7456, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3415372964786067e-05, |
|
"loss": 0.7964, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3377508519500191e-05, |
|
"loss": 0.7913, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3339644074214313e-05, |
|
"loss": 0.7745, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3301779628928438e-05, |
|
"loss": 0.7595, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3263915183642562e-05, |
|
"loss": 0.7117, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3226050738356684e-05, |
|
"loss": 0.8142, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3188186293070808e-05, |
|
"loss": 0.7539, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.315032184778493e-05, |
|
"loss": 0.7135, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3112457402499055e-05, |
|
"loss": 0.7883, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3074592957213177e-05, |
|
"loss": 0.7335, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.3036728511927301e-05, |
|
"loss": 0.771, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2998864066641425e-05, |
|
"loss": 0.7341, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2960999621355547e-05, |
|
"loss": 0.7177, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2923135176069672e-05, |
|
"loss": 0.7784, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2885270730783794e-05, |
|
"loss": 0.7906, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2847406285497918e-05, |
|
"loss": 0.7762, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.280954184021204e-05, |
|
"loss": 0.7482, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2771677394926164e-05, |
|
"loss": 0.7069, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2734570238546007e-05, |
|
"loss": 0.761, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.269670579326013e-05, |
|
"loss": 0.7375, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2658841347974254e-05, |
|
"loss": 0.7937, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2620976902688378e-05, |
|
"loss": 0.7983, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.25831124574025e-05, |
|
"loss": 0.7339, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2545248012116624e-05, |
|
"loss": 0.7671, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2507383566830747e-05, |
|
"loss": 0.7311, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2469519121544871e-05, |
|
"loss": 0.7652, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2431654676258993e-05, |
|
"loss": 0.7549, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2393790230973117e-05, |
|
"loss": 0.7802, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2355925785687241e-05, |
|
"loss": 0.7573, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2318061340401364e-05, |
|
"loss": 0.695, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2280196895115488e-05, |
|
"loss": 0.7214, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.224233244982961e-05, |
|
"loss": 0.772, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2204468004543734e-05, |
|
"loss": 0.7855, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2166603559257857e-05, |
|
"loss": 0.7345, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.212873911397198e-05, |
|
"loss": 0.7416, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2090874668686105e-05, |
|
"loss": 0.7723, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.2053010223400227e-05, |
|
"loss": 0.7446, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.2015145778114351e-05, |
|
"loss": 0.7657, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1977281332828474e-05, |
|
"loss": 0.7944, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1939416887542598e-05, |
|
"loss": 0.7598, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1901552442256724e-05, |
|
"loss": 0.7212, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1863687996970846e-05, |
|
"loss": 0.7564, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.182582355168497e-05, |
|
"loss": 0.7552, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.1787959106399092e-05, |
|
"loss": 0.7582, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1750094661113217e-05, |
|
"loss": 0.7506, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1712230215827339e-05, |
|
"loss": 0.7738, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1674365770541463e-05, |
|
"loss": 0.7295, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1636501325255587e-05, |
|
"loss": 0.7627, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.159863687996971e-05, |
|
"loss": 0.7918, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1560772434683833e-05, |
|
"loss": 0.7235, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1522907989397956e-05, |
|
"loss": 0.7988, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.148504354411208e-05, |
|
"loss": 0.7646, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1447179098826202e-05, |
|
"loss": 0.7794, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1409314653540326e-05, |
|
"loss": 0.79, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.137145020825445e-05, |
|
"loss": 0.7291, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1333585762968573e-05, |
|
"loss": 0.7682, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1295721317682697e-05, |
|
"loss": 0.8078, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.125785687239682e-05, |
|
"loss": 0.8196, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1219992427110943e-05, |
|
"loss": 0.7173, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1182127981825066e-05, |
|
"loss": 0.7908, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.114426353653919e-05, |
|
"loss": 0.7784, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1106399091253316e-05, |
|
"loss": 0.7317, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1068534645967436e-05, |
|
"loss": 0.7497, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1030670200681562e-05, |
|
"loss": 0.7504, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0992805755395683e-05, |
|
"loss": 0.7962, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0954941310109809e-05, |
|
"loss": 0.7615, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0917076864823933e-05, |
|
"loss": 0.7485, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0879969708443772e-05, |
|
"loss": 0.7088, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0842105263157896e-05, |
|
"loss": 0.7479, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0804240817872019e-05, |
|
"loss": 0.7571, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0766376372586143e-05, |
|
"loss": 0.7576, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0728511927300267e-05, |
|
"loss": 0.7267, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.069064748201439e-05, |
|
"loss": 0.7521, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0652783036728513e-05, |
|
"loss": 0.7616, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0614918591442636e-05, |
|
"loss": 0.7232, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.057705414615676e-05, |
|
"loss": 0.797, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0539189700870882e-05, |
|
"loss": 0.7575, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0501325255585006e-05, |
|
"loss": 0.7384, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.046346081029913e-05, |
|
"loss": 0.7523, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0425596365013253e-05, |
|
"loss": 0.7271, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0387731919727377e-05, |
|
"loss": 0.7746, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.03498674744415e-05, |
|
"loss": 0.7463, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0312003029155623e-05, |
|
"loss": 0.7234, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0274138583869746e-05, |
|
"loss": 0.7462, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.023627413858387e-05, |
|
"loss": 0.7391, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0198409693297995e-05, |
|
"loss": 0.7761, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0160545248012116e-05, |
|
"loss": 0.7849, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0122680802726242e-05, |
|
"loss": 0.7391, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0084816357440364e-05, |
|
"loss": 0.7203, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0046951912154488e-05, |
|
"loss": 0.7439, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0009087466868612e-05, |
|
"loss": 0.7551, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.971223021582735e-06, |
|
"loss": 0.6935, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.933358576296857e-06, |
|
"loss": 0.7541, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.895494131010981e-06, |
|
"loss": 0.7828, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.857629685725105e-06, |
|
"loss": 0.7204, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.819765240439228e-06, |
|
"loss": 0.7374, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.781900795153352e-06, |
|
"loss": 0.7477, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.744036349867474e-06, |
|
"loss": 0.7516, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.706171904581598e-06, |
|
"loss": 0.727, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.668307459295722e-06, |
|
"loss": 0.7343, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.630443014009846e-06, |
|
"loss": 0.7739, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.592578568723969e-06, |
|
"loss": 0.7447, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.554714123438093e-06, |
|
"loss": 0.7607, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.516849678152215e-06, |
|
"loss": 0.79, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.47898523286634e-06, |
|
"loss": 0.7209, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.441120787580462e-06, |
|
"loss": 0.7745, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.403256342294586e-06, |
|
"loss": 0.7179, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.36539189700871e-06, |
|
"loss": 0.774, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.327527451722832e-06, |
|
"loss": 0.7592, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.289663006436956e-06, |
|
"loss": 0.7598, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.25179856115108e-06, |
|
"loss": 0.7665, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.213934115865203e-06, |
|
"loss": 0.7529, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.176069670579327e-06, |
|
"loss": 0.6873, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.138205225293451e-06, |
|
"loss": 0.7574, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.100340780007573e-06, |
|
"loss": 0.7215, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.062476334721697e-06, |
|
"loss": 0.7411, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.02461188943582e-06, |
|
"loss": 0.7627, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.986747444149944e-06, |
|
"loss": 0.7313, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.948882998864066e-06, |
|
"loss": 0.765, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.911018553578192e-06, |
|
"loss": 0.7696, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.873154108292314e-06, |
|
"loss": 0.7305, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.835289663006439e-06, |
|
"loss": 0.7653, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.797425217720561e-06, |
|
"loss": 0.7444, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.759560772434685e-06, |
|
"loss": 0.7858, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.721696327148807e-06, |
|
"loss": 0.7634, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.683831881862931e-06, |
|
"loss": 0.7471, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.645967436577056e-06, |
|
"loss": 0.7065, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.608102991291178e-06, |
|
"loss": 0.7659, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.570238546005302e-06, |
|
"loss": 0.7085, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.532374100719424e-06, |
|
"loss": 0.7312, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.494509655433548e-06, |
|
"loss": 0.814, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.45664521014767e-06, |
|
"loss": 0.7194, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.418780764861797e-06, |
|
"loss": 0.7266, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.380916319575919e-06, |
|
"loss": 0.7345, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.343051874290043e-06, |
|
"loss": 0.7121, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.305187429004165e-06, |
|
"loss": 0.7716, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.26732298371829e-06, |
|
"loss": 0.7601, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.229458538432412e-06, |
|
"loss": 0.736, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.191594093146536e-06, |
|
"loss": 0.7659, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.15372964786066e-06, |
|
"loss": 0.7504, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.115865202574782e-06, |
|
"loss": 0.7367, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.078000757288907e-06, |
|
"loss": 0.739, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.040136312003029e-06, |
|
"loss": 0.7421, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.002271866717153e-06, |
|
"loss": 0.7181, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.964407421431277e-06, |
|
"loss": 0.7699, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.926542976145401e-06, |
|
"loss": 0.7725, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.888678530859524e-06, |
|
"loss": 0.7658, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.850814085573648e-06, |
|
"loss": 0.6961, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.813706929193487e-06, |
|
"loss": 0.7598, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.775842483907611e-06, |
|
"loss": 0.7684, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.737978038621735e-06, |
|
"loss": 0.7258, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.700113593335858e-06, |
|
"loss": 0.7602, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.662249148049982e-06, |
|
"loss": 0.775, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.624384702764105e-06, |
|
"loss": 0.7311, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.586520257478228e-06, |
|
"loss": 0.7252, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.548655812192352e-06, |
|
"loss": 0.7078, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.5107913669064756e-06, |
|
"loss": 0.7607, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.472926921620599e-06, |
|
"loss": 0.7462, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.435062476334722e-06, |
|
"loss": 0.7574, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.397198031048845e-06, |
|
"loss": 0.7223, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.3593335857629685e-06, |
|
"loss": 0.7306, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.321469140477092e-06, |
|
"loss": 0.7638, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.283604695191217e-06, |
|
"loss": 0.7236, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.24574024990534e-06, |
|
"loss": 0.7381, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.207875804619463e-06, |
|
"loss": 0.7257, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.170011359333586e-06, |
|
"loss": 0.7516, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.132904202953427e-06, |
|
"loss": 0.7189, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.095039757667551e-06, |
|
"loss": 0.7672, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.057175312381674e-06, |
|
"loss": 0.7289, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.019310867095797e-06, |
|
"loss": 0.7194, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.9814464218099205e-06, |
|
"loss": 0.6933, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.943581976524045e-06, |
|
"loss": 0.8152, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.905717531238168e-06, |
|
"loss": 0.76, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.867853085952292e-06, |
|
"loss": 0.7271, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.829988640666415e-06, |
|
"loss": 0.7494, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.792124195380538e-06, |
|
"loss": 0.7738, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.754259750094662e-06, |
|
"loss": 0.7071, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.716395304808785e-06, |
|
"loss": 0.7339, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.678530859522908e-06, |
|
"loss": 0.7506, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.640666414237032e-06, |
|
"loss": 0.7121, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.602801968951155e-06, |
|
"loss": 0.7018, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.564937523665279e-06, |
|
"loss": 0.7677, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.527073078379403e-06, |
|
"loss": 0.6925, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.489208633093526e-06, |
|
"loss": 0.7205, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.451344187807649e-06, |
|
"loss": 0.7163, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.413479742521772e-06, |
|
"loss": 0.752, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.3756152972358965e-06, |
|
"loss": 0.7706, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.33775085195002e-06, |
|
"loss": 0.7556, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.299886406664143e-06, |
|
"loss": 0.7626, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.262021961378266e-06, |
|
"loss": 0.7447, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.224157516092389e-06, |
|
"loss": 0.741, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.186293070806513e-06, |
|
"loss": 0.7567, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.1484286255206375e-06, |
|
"loss": 0.7364, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.110564180234761e-06, |
|
"loss": 0.739, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.072699734948884e-06, |
|
"loss": 0.7309, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.034835289663007e-06, |
|
"loss": 0.7385, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.9969708443771305e-06, |
|
"loss": 0.7588, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.959106399091254e-06, |
|
"loss": 0.7442, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.921241953805377e-06, |
|
"loss": 0.7871, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.883377508519501e-06, |
|
"loss": 0.7817, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.845513063233624e-06, |
|
"loss": 0.751, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.8076486179477474e-06, |
|
"loss": 0.7379, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.769784172661871e-06, |
|
"loss": 0.7151, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.731919727375994e-06, |
|
"loss": 0.7989, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.694055282090117e-06, |
|
"loss": 0.752, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.656190836804242e-06, |
|
"loss": 0.755, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.618326391518365e-06, |
|
"loss": 0.7355, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.5804619462324885e-06, |
|
"loss": 0.668, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.542597500946612e-06, |
|
"loss": 0.763, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.504733055660735e-06, |
|
"loss": 0.7369, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.466868610374858e-06, |
|
"loss": 0.7389, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.429004165088982e-06, |
|
"loss": 0.7435, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.3911397198031055e-06, |
|
"loss": 0.7103, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.353275274517229e-06, |
|
"loss": 0.7, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.315410829231352e-06, |
|
"loss": 0.7269, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.277546383945475e-06, |
|
"loss": 0.713, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.2396819386595984e-06, |
|
"loss": 0.7442, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.201817493373722e-06, |
|
"loss": 0.731, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.163953048087847e-06, |
|
"loss": 0.7389, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.12608860280197e-06, |
|
"loss": 0.7348, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.088224157516093e-06, |
|
"loss": 0.7607, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.050359712230216e-06, |
|
"loss": 0.7151, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.0124952669443395e-06, |
|
"loss": 0.7002, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.974630821658464e-06, |
|
"loss": 0.7734, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.936766376372587e-06, |
|
"loss": 0.7506, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.89890193108671e-06, |
|
"loss": 0.7369, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.861037485800833e-06, |
|
"loss": 0.7118, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.8231730405149565e-06, |
|
"loss": 0.7152, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.78530859522908e-06, |
|
"loss": 0.7366, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.747444149943204e-06, |
|
"loss": 0.7086, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.709579704657327e-06, |
|
"loss": 0.7306, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.67171525937145e-06, |
|
"loss": 0.7266, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.633850814085574e-06, |
|
"loss": 0.7241, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.596743657705415e-06, |
|
"loss": 0.708, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.558879212419539e-06, |
|
"loss": 0.7544, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.521014767133662e-06, |
|
"loss": 0.739, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.483150321847785e-06, |
|
"loss": 0.7492, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.4452858765619086e-06, |
|
"loss": 0.7323, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.407421431276032e-06, |
|
"loss": 0.7431, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.369556985990155e-06, |
|
"loss": 0.7302, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.331692540704279e-06, |
|
"loss": 0.7112, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.293828095418402e-06, |
|
"loss": 0.7359, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.2559636501325256e-06, |
|
"loss": 0.7274, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.21809920484665e-06, |
|
"loss": 0.7307, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.180234759560773e-06, |
|
"loss": 0.7339, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.142370314274896e-06, |
|
"loss": 0.7743, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.104505868989019e-06, |
|
"loss": 0.7424, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.066641423703143e-06, |
|
"loss": 0.7152, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.028776978417267e-06, |
|
"loss": 0.707, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.99091253313139e-06, |
|
"loss": 0.7538, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.953048087845514e-06, |
|
"loss": 0.7293, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.915183642559637e-06, |
|
"loss": 0.7524, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.87731919727376e-06, |
|
"loss": 0.7938, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.839454751987884e-06, |
|
"loss": 0.7266, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.8015903067020073e-06, |
|
"loss": 0.7566, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.7637258614161305e-06, |
|
"loss": 0.713, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.725861416130254e-06, |
|
"loss": 0.7546, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.6879969708443774e-06, |
|
"loss": 0.6955, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6501325255585006e-06, |
|
"loss": 0.7084, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6122680802726247e-06, |
|
"loss": 0.7447, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.574403634986748e-06, |
|
"loss": 0.7173, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.536539189700871e-06, |
|
"loss": 0.7324, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.498674744414995e-06, |
|
"loss": 0.7561, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.460810299129118e-06, |
|
"loss": 0.7505, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.4229458538432413e-06, |
|
"loss": 0.7337, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.3850814085573645e-06, |
|
"loss": 0.7753, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.3472169632714886e-06, |
|
"loss": 0.7703, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.309352517985612e-06, |
|
"loss": 0.7384, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.271488072699735e-06, |
|
"loss": 0.7759, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.2336236274138587e-06, |
|
"loss": 0.7446, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.195759182127982e-06, |
|
"loss": 0.7437, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.7817, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.1200302915562292e-06, |
|
"loss": 0.7396, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0821658462703525e-06, |
|
"loss": 0.743, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.0443014009844757e-06, |
|
"loss": 0.7597, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.0064369556985994e-06, |
|
"loss": 0.6844, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9685725104127226e-06, |
|
"loss": 0.7737, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.930708065126846e-06, |
|
"loss": 0.7024, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.89284361984097e-06, |
|
"loss": 0.7136, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.854979174555093e-06, |
|
"loss": 0.7482, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.8171147292692164e-06, |
|
"loss": 0.7608, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.7792502839833396e-06, |
|
"loss": 0.7442, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7413858386974632e-06, |
|
"loss": 0.6687, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.703521393411587e-06, |
|
"loss": 0.7594, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.66565694812571e-06, |
|
"loss": 0.7042, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.6277925028398338e-06, |
|
"loss": 0.7556, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.589928057553957e-06, |
|
"loss": 0.7357, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5520636122680802e-06, |
|
"loss": 0.7292, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5141991669822043e-06, |
|
"loss": 0.6899, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4763347216963275e-06, |
|
"loss": 0.729, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4384702764104508e-06, |
|
"loss": 0.7473, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.400605831124574e-06, |
|
"loss": 0.7264, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3627413858386977e-06, |
|
"loss": 0.7314, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.3256342294585385e-06, |
|
"loss": 0.6819, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.287769784172662e-06, |
|
"loss": 0.7637, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2499053388867854e-06, |
|
"loss": 0.727, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2120408936009086e-06, |
|
"loss": 0.7044, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1741764483150323e-06, |
|
"loss": 0.6926, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.136312003029156e-06, |
|
"loss": 0.7406, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.098447557743279e-06, |
|
"loss": 0.7572, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.060583112457403e-06, |
|
"loss": 0.7239, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.022718667171526e-06, |
|
"loss": 0.7353, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9848542218856497e-06, |
|
"loss": 0.7287, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.946989776599773e-06, |
|
"loss": 0.7461, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.909125331313896e-06, |
|
"loss": 0.7489, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.8712608860280198e-06, |
|
"loss": 0.7156, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.8333964407421435e-06, |
|
"loss": 0.6765, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7955319954562667e-06, |
|
"loss": 0.7496, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7576675501703901e-06, |
|
"loss": 0.7269, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7198031048845134e-06, |
|
"loss": 0.728, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.681938659598637e-06, |
|
"loss": 0.7914, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6440742143127605e-06, |
|
"loss": 0.7419, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6062097690268837e-06, |
|
"loss": 0.7509, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.5683453237410074e-06, |
|
"loss": 0.7372, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5304808784551308e-06, |
|
"loss": 0.7159, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.4926164331692542e-06, |
|
"loss": 0.7272, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4547519878833777e-06, |
|
"loss": 0.735, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.416887542597501e-06, |
|
"loss": 0.715, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3790230973116246e-06, |
|
"loss": 0.7507, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.341158652025748e-06, |
|
"loss": 0.7723, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3032942067398712e-06, |
|
"loss": 0.7658, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2654297614539949e-06, |
|
"loss": 0.7727, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2275653161681181e-06, |
|
"loss": 0.748, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1897008708822416e-06, |
|
"loss": 0.7135, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1518364255963652e-06, |
|
"loss": 0.76, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1139719803104887e-06, |
|
"loss": 0.7439, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.0761075350246119e-06, |
|
"loss": 0.7496, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0382430897387353e-06, |
|
"loss": 0.7631, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.000378644452859e-06, |
|
"loss": 0.7033, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.625141991669822e-07, |
|
"loss": 0.717, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.246497538811057e-07, |
|
"loss": 0.7329, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.867853085952291e-07, |
|
"loss": 0.7065, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.489208633093526e-07, |
|
"loss": 0.702, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.110564180234761e-07, |
|
"loss": 0.7161, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.731919727375994e-07, |
|
"loss": 0.725, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.353275274517229e-07, |
|
"loss": 0.7256, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.974630821658464e-07, |
|
"loss": 0.7395, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.595986368799697e-07, |
|
"loss": 0.794, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.217341915940932e-07, |
|
"loss": 0.7349, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.838697463082166e-07, |
|
"loss": 0.7394, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.460053010223401e-07, |
|
"loss": 0.7345, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.081408557364635e-07, |
|
"loss": 0.6888, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.702764104505869e-07, |
|
"loss": 0.7347, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.324119651647104e-07, |
|
"loss": 0.7246, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.945475198788338e-07, |
|
"loss": 0.7438, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.5668307459295723e-07, |
|
"loss": 0.7108, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.1881862930708067e-07, |
|
"loss": 0.7208, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.809541840212041e-07, |
|
"loss": 0.7633, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.4308973873532756e-07, |
|
"loss": 0.7707, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.05225293449451e-07, |
|
"loss": 0.7524, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6736084816357444e-07, |
|
"loss": 0.707, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2949640287769786e-07, |
|
"loss": 0.7751, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.163195759182128e-08, |
|
"loss": 0.7672, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.376751230594472e-08, |
|
"loss": 0.6998, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5903067020068158e-08, |
|
"loss": 0.7069, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 26410, |
|
"total_flos": 6.728145843607406e+16, |
|
"train_loss": 0.7582937260732105, |
|
"train_runtime": 12885.6962, |
|
"train_samples_per_second": 65.589, |
|
"train_steps_per_second": 2.05 |
|
} |
|
], |
|
"max_steps": 26410, |
|
"num_train_epochs": 2, |
|
"total_flos": 6.728145843607406e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|