{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999886413115499, "global_step": 26410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9999242711094284e-05, "loss": 1.3163, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9963650132525562e-05, "loss": 1.0093, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.99265429761454e-05, "loss": 0.8939, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.9888678530859524e-05, "loss": 0.8368, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.985081408557365e-05, "loss": 0.8794, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.9812949640287772e-05, "loss": 0.843, "step": 250 }, { "epoch": 0.02, "learning_rate": 1.9775085195001894e-05, "loss": 0.8488, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.9737220749716017e-05, "loss": 0.8465, "step": 350 }, { "epoch": 0.03, "learning_rate": 1.9699356304430143e-05, "loss": 0.8591, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.9661491859144265e-05, "loss": 0.8978, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.9623627413858387e-05, "loss": 0.8035, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.9585762968572513e-05, "loss": 0.8153, "step": 550 }, { "epoch": 0.05, "learning_rate": 1.9547898523286635e-05, "loss": 0.8079, "step": 600 }, { "epoch": 0.05, "learning_rate": 1.9510034078000758e-05, "loss": 0.8092, "step": 650 }, { "epoch": 0.05, "learning_rate": 1.947216963271488e-05, "loss": 0.8294, "step": 700 }, { "epoch": 0.06, "learning_rate": 1.9434305187429006e-05, "loss": 0.8099, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.939644074214313e-05, "loss": 0.85, "step": 800 }, { "epoch": 0.06, "learning_rate": 1.935857629685725e-05, "loss": 0.8509, "step": 850 }, { "epoch": 0.07, "learning_rate": 1.9320711851571376e-05, "loss": 0.8229, "step": 900 }, { "epoch": 0.07, "learning_rate": 1.92828474062855e-05, "loss": 0.8251, "step": 950 }, { "epoch": 0.08, "learning_rate": 1.9244982960999625e-05, "loss": 0.8116, "step": 1000 }, { "epoch": 0.08, "learning_rate": 1.9207118515713747e-05, "loss": 0.8277, "step": 1050 }, { "epoch": 0.08, "learning_rate": 1.916925407042787e-05, "loss": 0.8004, "step": 1100 }, { "epoch": 0.09, "learning_rate": 1.9131389625141995e-05, "loss": 0.8347, "step": 1150 }, { "epoch": 0.09, "learning_rate": 1.9093525179856118e-05, "loss": 0.8103, "step": 1200 }, { "epoch": 0.09, "learning_rate": 1.905566073457024e-05, "loss": 0.8566, "step": 1250 }, { "epoch": 0.1, "learning_rate": 1.9017796289284362e-05, "loss": 0.7921, "step": 1300 }, { "epoch": 0.1, "learning_rate": 1.8979931843998488e-05, "loss": 0.7959, "step": 1350 }, { "epoch": 0.11, "learning_rate": 1.894206739871261e-05, "loss": 0.7827, "step": 1400 }, { "epoch": 0.11, "learning_rate": 1.8904202953426733e-05, "loss": 0.785, "step": 1450 }, { "epoch": 0.11, "learning_rate": 1.886633850814086e-05, "loss": 0.8061, "step": 1500 }, { "epoch": 0.12, "learning_rate": 1.882847406285498e-05, "loss": 0.7717, "step": 1550 }, { "epoch": 0.12, "learning_rate": 1.8790609617569103e-05, "loss": 0.8362, "step": 1600 }, { "epoch": 0.12, "learning_rate": 1.8752745172283226e-05, "loss": 0.7975, "step": 1650 }, { "epoch": 0.13, "learning_rate": 1.871488072699735e-05, "loss": 0.8123, "step": 1700 }, { "epoch": 0.13, "learning_rate": 1.8677016281711474e-05, "loss": 0.8526, "step": 1750 }, { "epoch": 0.14, "learning_rate": 1.8639151836425596e-05, "loss": 0.808, "step": 1800 }, { "epoch": 0.14, "learning_rate": 1.8601287391139722e-05, "loss": 0.7989, "step": 1850 }, { "epoch": 0.14, "learning_rate": 1.8563422945853844e-05, "loss": 0.7755, "step": 1900 }, { "epoch": 0.15, "learning_rate": 1.8525558500567967e-05, "loss": 0.8467, "step": 1950 }, { "epoch": 0.15, "learning_rate": 1.8487694055282093e-05, "loss": 0.8221, "step": 2000 }, { "epoch": 0.16, "learning_rate": 1.8449829609996215e-05, "loss": 0.7851, "step": 2050 }, { "epoch": 0.16, "learning_rate": 1.841196516471034e-05, "loss": 0.8159, "step": 2100 }, { "epoch": 0.16, "learning_rate": 1.837410071942446e-05, "loss": 0.8027, "step": 2150 }, { "epoch": 0.17, "learning_rate": 1.8336236274138586e-05, "loss": 0.7585, "step": 2200 }, { "epoch": 0.17, "learning_rate": 1.8298371828852708e-05, "loss": 0.8287, "step": 2250 }, { "epoch": 0.17, "learning_rate": 1.8260507383566834e-05, "loss": 0.8127, "step": 2300 }, { "epoch": 0.18, "learning_rate": 1.8222642938280956e-05, "loss": 0.8015, "step": 2350 }, { "epoch": 0.18, "learning_rate": 1.818477849299508e-05, "loss": 0.8228, "step": 2400 }, { "epoch": 0.19, "learning_rate": 1.8146914047709204e-05, "loss": 0.7865, "step": 2450 }, { "epoch": 0.19, "learning_rate": 1.8109049602423327e-05, "loss": 0.8093, "step": 2500 }, { "epoch": 0.19, "learning_rate": 1.807118515713745e-05, "loss": 0.7812, "step": 2550 }, { "epoch": 0.2, "learning_rate": 1.803332071185157e-05, "loss": 0.805, "step": 2600 }, { "epoch": 0.2, "learning_rate": 1.7995456266565697e-05, "loss": 0.7844, "step": 2650 }, { "epoch": 0.2, "learning_rate": 1.795759182127982e-05, "loss": 0.7976, "step": 2700 }, { "epoch": 0.21, "learning_rate": 1.7919727375993942e-05, "loss": 0.7683, "step": 2750 }, { "epoch": 0.21, "learning_rate": 1.7881862930708068e-05, "loss": 0.8157, "step": 2800 }, { "epoch": 0.22, "learning_rate": 1.784399848542219e-05, "loss": 0.7801, "step": 2850 }, { "epoch": 0.22, "learning_rate": 1.7806134040136312e-05, "loss": 0.7817, "step": 2900 }, { "epoch": 0.22, "learning_rate": 1.7768269594850435e-05, "loss": 0.8286, "step": 2950 }, { "epoch": 0.23, "learning_rate": 1.773040514956456e-05, "loss": 0.7994, "step": 3000 }, { "epoch": 0.23, "learning_rate": 1.7692540704278683e-05, "loss": 0.758, "step": 3050 }, { "epoch": 0.23, "learning_rate": 1.7654676258992805e-05, "loss": 0.8116, "step": 3100 }, { "epoch": 0.24, "learning_rate": 1.761681181370693e-05, "loss": 0.7762, "step": 3150 }, { "epoch": 0.24, "learning_rate": 1.7578947368421054e-05, "loss": 0.7603, "step": 3200 }, { "epoch": 0.25, "learning_rate": 1.7541082923135176e-05, "loss": 0.825, "step": 3250 }, { "epoch": 0.25, "learning_rate": 1.7503218477849302e-05, "loss": 0.7649, "step": 3300 }, { "epoch": 0.25, "learning_rate": 1.7465354032563424e-05, "loss": 0.7886, "step": 3350 }, { "epoch": 0.26, "learning_rate": 1.742748958727755e-05, "loss": 0.796, "step": 3400 }, { "epoch": 0.26, "learning_rate": 1.7389625141991672e-05, "loss": 0.7712, "step": 3450 }, { "epoch": 0.27, "learning_rate": 1.7351760696705795e-05, "loss": 0.8061, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.7313896251419917e-05, "loss": 0.7691, "step": 3550 }, { "epoch": 0.27, "learning_rate": 1.7276031806134043e-05, "loss": 0.8038, "step": 3600 }, { "epoch": 0.28, "learning_rate": 1.7238167360848165e-05, "loss": 0.7679, "step": 3650 }, { "epoch": 0.28, "learning_rate": 1.7200302915562288e-05, "loss": 0.7909, "step": 3700 }, { "epoch": 0.28, "learning_rate": 1.7162438470276413e-05, "loss": 0.8001, "step": 3750 }, { "epoch": 0.29, "learning_rate": 1.7124574024990536e-05, "loss": 0.7369, "step": 3800 }, { "epoch": 0.29, "learning_rate": 1.7086709579704658e-05, "loss": 0.7716, "step": 3850 }, { "epoch": 0.3, "learning_rate": 1.704884513441878e-05, "loss": 0.8003, "step": 3900 }, { "epoch": 0.3, "learning_rate": 1.7010980689132906e-05, "loss": 0.7973, "step": 3950 }, { "epoch": 0.3, "learning_rate": 1.697311624384703e-05, "loss": 0.7745, "step": 4000 }, { "epoch": 0.31, "learning_rate": 1.693525179856115e-05, "loss": 0.7266, "step": 4050 }, { "epoch": 0.31, "learning_rate": 1.6897387353275277e-05, "loss": 0.7917, "step": 4100 }, { "epoch": 0.31, "learning_rate": 1.68595229079894e-05, "loss": 0.7393, "step": 4150 }, { "epoch": 0.32, "learning_rate": 1.682165846270352e-05, "loss": 0.7822, "step": 4200 }, { "epoch": 0.32, "learning_rate": 1.6783794017417647e-05, "loss": 0.7919, "step": 4250 }, { "epoch": 0.33, "learning_rate": 1.674592957213177e-05, "loss": 0.7943, "step": 4300 }, { "epoch": 0.33, "learning_rate": 1.6708065126845892e-05, "loss": 0.7731, "step": 4350 }, { "epoch": 0.33, "learning_rate": 1.6670200681560014e-05, "loss": 0.7623, "step": 4400 }, { "epoch": 0.34, "learning_rate": 1.663233623627414e-05, "loss": 0.7891, "step": 4450 }, { "epoch": 0.34, "learning_rate": 1.6594471790988263e-05, "loss": 0.7769, "step": 4500 }, { "epoch": 0.34, "learning_rate": 1.655660734570239e-05, "loss": 0.7958, "step": 4550 }, { "epoch": 0.35, "learning_rate": 1.651874290041651e-05, "loss": 0.7555, "step": 4600 }, { "epoch": 0.35, "learning_rate": 1.6480878455130633e-05, "loss": 0.7993, "step": 4650 }, { "epoch": 0.36, "learning_rate": 1.644301400984476e-05, "loss": 0.7916, "step": 4700 }, { "epoch": 0.36, "learning_rate": 1.640514956455888e-05, "loss": 0.8101, "step": 4750 }, { "epoch": 0.36, "learning_rate": 1.6367285119273004e-05, "loss": 0.8066, "step": 4800 }, { "epoch": 0.37, "learning_rate": 1.6329420673987126e-05, "loss": 0.761, "step": 4850 }, { "epoch": 0.37, "learning_rate": 1.6291556228701252e-05, "loss": 0.7919, "step": 4900 }, { "epoch": 0.37, "learning_rate": 1.6253691783415374e-05, "loss": 0.7484, "step": 4950 }, { "epoch": 0.38, "learning_rate": 1.6215827338129497e-05, "loss": 0.809, "step": 5000 }, { "epoch": 0.38, "learning_rate": 1.6177962892843622e-05, "loss": 0.7683, "step": 5050 }, { "epoch": 0.39, "learning_rate": 1.6140098447557745e-05, "loss": 0.7622, "step": 5100 }, { "epoch": 0.39, "learning_rate": 1.6102234002271867e-05, "loss": 0.7618, "step": 5150 }, { "epoch": 0.39, "learning_rate": 1.6064369556985993e-05, "loss": 0.7687, "step": 5200 }, { "epoch": 0.4, "learning_rate": 1.6026505111700115e-05, "loss": 0.7598, "step": 5250 }, { "epoch": 0.4, "learning_rate": 1.5988640666414238e-05, "loss": 0.7245, "step": 5300 }, { "epoch": 0.41, "learning_rate": 1.595077622112836e-05, "loss": 0.7751, "step": 5350 }, { "epoch": 0.41, "learning_rate": 1.5912911775842486e-05, "loss": 0.7903, "step": 5400 }, { "epoch": 0.41, "learning_rate": 1.5875047330556608e-05, "loss": 0.7399, "step": 5450 }, { "epoch": 0.42, "learning_rate": 1.583718288527073e-05, "loss": 0.7529, "step": 5500 }, { "epoch": 0.42, "learning_rate": 1.5799318439984856e-05, "loss": 0.7777, "step": 5550 }, { "epoch": 0.42, "learning_rate": 1.576145399469898e-05, "loss": 0.7411, "step": 5600 }, { "epoch": 0.43, "learning_rate": 1.5723589549413105e-05, "loss": 0.7805, "step": 5650 }, { "epoch": 0.43, "learning_rate": 1.5685725104127224e-05, "loss": 0.7585, "step": 5700 }, { "epoch": 0.44, "learning_rate": 1.564786065884135e-05, "loss": 0.7908, "step": 5750 }, { "epoch": 0.44, "learning_rate": 1.560999621355547e-05, "loss": 0.7302, "step": 5800 }, { "epoch": 0.44, "learning_rate": 1.5572131768269597e-05, "loss": 0.7492, "step": 5850 }, { "epoch": 0.45, "learning_rate": 1.553426732298372e-05, "loss": 0.8124, "step": 5900 }, { "epoch": 0.45, "learning_rate": 1.5496402877697842e-05, "loss": 0.7609, "step": 5950 }, { "epoch": 0.45, "learning_rate": 1.5458538432411968e-05, "loss": 0.7383, "step": 6000 }, { "epoch": 0.46, "learning_rate": 1.542067398712609e-05, "loss": 0.7585, "step": 6050 }, { "epoch": 0.46, "learning_rate": 1.538356683074593e-05, "loss": 0.7852, "step": 6100 }, { "epoch": 0.47, "learning_rate": 1.5345702385460056e-05, "loss": 0.7737, "step": 6150 }, { "epoch": 0.47, "learning_rate": 1.5307837940174178e-05, "loss": 0.7881, "step": 6200 }, { "epoch": 0.47, "learning_rate": 1.52699734948883e-05, "loss": 0.7716, "step": 6250 }, { "epoch": 0.48, "learning_rate": 1.5232109049602425e-05, "loss": 0.7478, "step": 6300 }, { "epoch": 0.48, "learning_rate": 1.5194244604316549e-05, "loss": 0.7681, "step": 6350 }, { "epoch": 0.48, "learning_rate": 1.5156380159030673e-05, "loss": 0.7703, "step": 6400 }, { "epoch": 0.49, "learning_rate": 1.5118515713744795e-05, "loss": 0.8031, "step": 6450 }, { "epoch": 0.49, "learning_rate": 1.508065126845892e-05, "loss": 0.7812, "step": 6500 }, { "epoch": 0.5, "learning_rate": 1.5042786823173042e-05, "loss": 0.8151, "step": 6550 }, { "epoch": 0.5, "learning_rate": 1.5004922377887166e-05, "loss": 0.8384, "step": 6600 }, { "epoch": 0.5, "learning_rate": 1.4967057932601288e-05, "loss": 0.7861, "step": 6650 }, { "epoch": 0.51, "learning_rate": 1.4929193487315412e-05, "loss": 0.7473, "step": 6700 }, { "epoch": 0.51, "learning_rate": 1.4891329042029536e-05, "loss": 0.7741, "step": 6750 }, { "epoch": 0.51, "learning_rate": 1.4853464596743659e-05, "loss": 0.799, "step": 6800 }, { "epoch": 0.52, "learning_rate": 1.4815600151457783e-05, "loss": 0.7929, "step": 6850 }, { "epoch": 0.52, "learning_rate": 1.4777735706171905e-05, "loss": 0.7594, "step": 6900 }, { "epoch": 0.53, "learning_rate": 1.4739871260886029e-05, "loss": 0.736, "step": 6950 }, { "epoch": 0.53, "learning_rate": 1.4702006815600151e-05, "loss": 0.7608, "step": 7000 }, { "epoch": 0.53, "learning_rate": 1.4664142370314276e-05, "loss": 0.7927, "step": 7050 }, { "epoch": 0.54, "learning_rate": 1.46262779250284e-05, "loss": 0.7753, "step": 7100 }, { "epoch": 0.54, "learning_rate": 1.4588413479742522e-05, "loss": 0.7476, "step": 7150 }, { "epoch": 0.55, "learning_rate": 1.4550549034456646e-05, "loss": 0.7907, "step": 7200 }, { "epoch": 0.55, "learning_rate": 1.4512684589170768e-05, "loss": 0.7874, "step": 7250 }, { "epoch": 0.55, "learning_rate": 1.4474820143884894e-05, "loss": 0.7626, "step": 7300 }, { "epoch": 0.56, "learning_rate": 1.4436955698599018e-05, "loss": 0.7527, "step": 7350 }, { "epoch": 0.56, "learning_rate": 1.439909125331314e-05, "loss": 0.7451, "step": 7400 }, { "epoch": 0.56, "learning_rate": 1.4361226808027265e-05, "loss": 0.7615, "step": 7450 }, { "epoch": 0.57, "learning_rate": 1.4323362362741387e-05, "loss": 0.7365, "step": 7500 }, { "epoch": 0.57, "learning_rate": 1.4285497917455511e-05, "loss": 0.7627, "step": 7550 }, { "epoch": 0.58, "learning_rate": 1.4247633472169634e-05, "loss": 0.7769, "step": 7600 }, { "epoch": 0.58, "learning_rate": 1.4209769026883758e-05, "loss": 0.7253, "step": 7650 }, { "epoch": 0.58, "learning_rate": 1.4171904581597882e-05, "loss": 0.7422, "step": 7700 }, { "epoch": 0.59, "learning_rate": 1.4134040136312004e-05, "loss": 0.7459, "step": 7750 }, { "epoch": 0.59, "learning_rate": 1.4096175691026128e-05, "loss": 0.7355, "step": 7800 }, { "epoch": 0.59, "learning_rate": 1.405831124574025e-05, "loss": 0.7658, "step": 7850 }, { "epoch": 0.6, "learning_rate": 1.4020446800454375e-05, "loss": 0.8133, "step": 7900 }, { "epoch": 0.6, "learning_rate": 1.3982582355168497e-05, "loss": 0.7574, "step": 7950 }, { "epoch": 0.61, "learning_rate": 1.3944717909882621e-05, "loss": 0.7755, "step": 8000 }, { "epoch": 0.61, "learning_rate": 1.3906853464596745e-05, "loss": 0.7434, "step": 8050 }, { "epoch": 0.61, "learning_rate": 1.3868989019310868e-05, "loss": 0.7565, "step": 8100 }, { "epoch": 0.62, "learning_rate": 1.3831124574024992e-05, "loss": 0.7705, "step": 8150 }, { "epoch": 0.62, "learning_rate": 1.3793260128739114e-05, "loss": 0.7699, "step": 8200 }, { "epoch": 0.62, "learning_rate": 1.3755395683453238e-05, "loss": 0.7834, "step": 8250 }, { "epoch": 0.63, "learning_rate": 1.3718288527073081e-05, "loss": 0.7788, "step": 8300 }, { "epoch": 0.63, "learning_rate": 1.3680424081787202e-05, "loss": 0.7329, "step": 8350 }, { "epoch": 0.64, "learning_rate": 1.3642559636501328e-05, "loss": 0.7774, "step": 8400 }, { "epoch": 0.64, "learning_rate": 1.3604695191215448e-05, "loss": 0.7427, "step": 8450 }, { "epoch": 0.64, "learning_rate": 1.3566830745929574e-05, "loss": 0.7482, "step": 8500 }, { "epoch": 0.65, "learning_rate": 1.3528966300643698e-05, "loss": 0.7888, "step": 8550 }, { "epoch": 0.65, "learning_rate": 1.349110185535782e-05, "loss": 0.7535, "step": 8600 }, { "epoch": 0.66, "learning_rate": 1.3453237410071945e-05, "loss": 0.7456, "step": 8650 }, { "epoch": 0.66, "learning_rate": 1.3415372964786067e-05, "loss": 0.7964, "step": 8700 }, { "epoch": 0.66, "learning_rate": 1.3377508519500191e-05, "loss": 0.7913, "step": 8750 }, { "epoch": 0.67, "learning_rate": 1.3339644074214313e-05, "loss": 0.7745, "step": 8800 }, { "epoch": 0.67, "learning_rate": 1.3301779628928438e-05, "loss": 0.7595, "step": 8850 }, { "epoch": 0.67, "learning_rate": 1.3263915183642562e-05, "loss": 0.7117, "step": 8900 }, { "epoch": 0.68, "learning_rate": 1.3226050738356684e-05, "loss": 0.8142, "step": 8950 }, { "epoch": 0.68, "learning_rate": 1.3188186293070808e-05, "loss": 0.7539, "step": 9000 }, { "epoch": 0.69, "learning_rate": 1.315032184778493e-05, "loss": 0.7135, "step": 9050 }, { "epoch": 0.69, "learning_rate": 1.3112457402499055e-05, "loss": 0.7883, "step": 9100 }, { "epoch": 0.69, "learning_rate": 1.3074592957213177e-05, "loss": 0.7335, "step": 9150 }, { "epoch": 0.7, "learning_rate": 1.3036728511927301e-05, "loss": 0.771, "step": 9200 }, { "epoch": 0.7, "learning_rate": 1.2998864066641425e-05, "loss": 0.7341, "step": 9250 }, { "epoch": 0.7, "learning_rate": 1.2960999621355547e-05, "loss": 0.7177, "step": 9300 }, { "epoch": 0.71, "learning_rate": 1.2923135176069672e-05, "loss": 0.7784, "step": 9350 }, { "epoch": 0.71, "learning_rate": 1.2885270730783794e-05, "loss": 0.7906, "step": 9400 }, { "epoch": 0.72, "learning_rate": 1.2847406285497918e-05, "loss": 0.7762, "step": 9450 }, { "epoch": 0.72, "learning_rate": 1.280954184021204e-05, "loss": 0.7482, "step": 9500 }, { "epoch": 0.72, "learning_rate": 1.2771677394926164e-05, "loss": 0.7069, "step": 9550 }, { "epoch": 0.73, "learning_rate": 1.2734570238546007e-05, "loss": 0.761, "step": 9600 }, { "epoch": 0.73, "learning_rate": 1.269670579326013e-05, "loss": 0.7375, "step": 9650 }, { "epoch": 0.73, "learning_rate": 1.2658841347974254e-05, "loss": 0.7937, "step": 9700 }, { "epoch": 0.74, "learning_rate": 1.2620976902688378e-05, "loss": 0.7983, "step": 9750 }, { "epoch": 0.74, "learning_rate": 1.25831124574025e-05, "loss": 0.7339, "step": 9800 }, { "epoch": 0.75, "learning_rate": 1.2545248012116624e-05, "loss": 0.7671, "step": 9850 }, { "epoch": 0.75, "learning_rate": 1.2507383566830747e-05, "loss": 0.7311, "step": 9900 }, { "epoch": 0.75, "learning_rate": 1.2469519121544871e-05, "loss": 0.7652, "step": 9950 }, { "epoch": 0.76, "learning_rate": 1.2431654676258993e-05, "loss": 0.7549, "step": 10000 }, { "epoch": 0.76, "learning_rate": 1.2393790230973117e-05, "loss": 0.7802, "step": 10050 }, { "epoch": 0.76, "learning_rate": 1.2355925785687241e-05, "loss": 0.7573, "step": 10100 }, { "epoch": 0.77, "learning_rate": 1.2318061340401364e-05, "loss": 0.695, "step": 10150 }, { "epoch": 0.77, "learning_rate": 1.2280196895115488e-05, "loss": 0.7214, "step": 10200 }, { "epoch": 0.78, "learning_rate": 1.224233244982961e-05, "loss": 0.772, "step": 10250 }, { "epoch": 0.78, "learning_rate": 1.2204468004543734e-05, "loss": 0.7855, "step": 10300 }, { "epoch": 0.78, "learning_rate": 1.2166603559257857e-05, "loss": 0.7345, "step": 10350 }, { "epoch": 0.79, "learning_rate": 1.212873911397198e-05, "loss": 0.7416, "step": 10400 }, { "epoch": 0.79, "learning_rate": 1.2090874668686105e-05, "loss": 0.7723, "step": 10450 }, { "epoch": 0.8, "learning_rate": 1.2053010223400227e-05, "loss": 0.7446, "step": 10500 }, { "epoch": 0.8, "learning_rate": 1.2015145778114351e-05, "loss": 0.7657, "step": 10550 }, { "epoch": 0.8, "learning_rate": 1.1977281332828474e-05, "loss": 0.7944, "step": 10600 }, { "epoch": 0.81, "learning_rate": 1.1939416887542598e-05, "loss": 0.7598, "step": 10650 }, { "epoch": 0.81, "learning_rate": 1.1901552442256724e-05, "loss": 0.7212, "step": 10700 }, { "epoch": 0.81, "learning_rate": 1.1863687996970846e-05, "loss": 0.7564, "step": 10750 }, { "epoch": 0.82, "learning_rate": 1.182582355168497e-05, "loss": 0.7552, "step": 10800 }, { "epoch": 0.82, "learning_rate": 1.1787959106399092e-05, "loss": 0.7582, "step": 10850 }, { "epoch": 0.83, "learning_rate": 1.1750094661113217e-05, "loss": 0.7506, "step": 10900 }, { "epoch": 0.83, "learning_rate": 1.1712230215827339e-05, "loss": 0.7738, "step": 10950 }, { "epoch": 0.83, "learning_rate": 1.1674365770541463e-05, "loss": 0.7295, "step": 11000 }, { "epoch": 0.84, "learning_rate": 1.1636501325255587e-05, "loss": 0.7627, "step": 11050 }, { "epoch": 0.84, "learning_rate": 1.159863687996971e-05, "loss": 0.7918, "step": 11100 }, { "epoch": 0.84, "learning_rate": 1.1560772434683833e-05, "loss": 0.7235, "step": 11150 }, { "epoch": 0.85, "learning_rate": 1.1522907989397956e-05, "loss": 0.7988, "step": 11200 }, { "epoch": 0.85, "learning_rate": 1.148504354411208e-05, "loss": 0.7646, "step": 11250 }, { "epoch": 0.86, "learning_rate": 1.1447179098826202e-05, "loss": 0.7794, "step": 11300 }, { "epoch": 0.86, "learning_rate": 1.1409314653540326e-05, "loss": 0.79, "step": 11350 }, { "epoch": 0.86, "learning_rate": 1.137145020825445e-05, "loss": 0.7291, "step": 11400 }, { "epoch": 0.87, "learning_rate": 1.1333585762968573e-05, "loss": 0.7682, "step": 11450 }, { "epoch": 0.87, "learning_rate": 1.1295721317682697e-05, "loss": 0.8078, "step": 11500 }, { "epoch": 0.87, "learning_rate": 1.125785687239682e-05, "loss": 0.8196, "step": 11550 }, { "epoch": 0.88, "learning_rate": 1.1219992427110943e-05, "loss": 0.7173, "step": 11600 }, { "epoch": 0.88, "learning_rate": 1.1182127981825066e-05, "loss": 0.7908, "step": 11650 }, { "epoch": 0.89, "learning_rate": 1.114426353653919e-05, "loss": 0.7784, "step": 11700 }, { "epoch": 0.89, "learning_rate": 1.1106399091253316e-05, "loss": 0.7317, "step": 11750 }, { "epoch": 0.89, "learning_rate": 1.1068534645967436e-05, "loss": 0.7497, "step": 11800 }, { "epoch": 0.9, "learning_rate": 1.1030670200681562e-05, "loss": 0.7504, "step": 11850 }, { "epoch": 0.9, "learning_rate": 1.0992805755395683e-05, "loss": 0.7962, "step": 11900 }, { "epoch": 0.9, "learning_rate": 1.0954941310109809e-05, "loss": 0.7615, "step": 11950 }, { "epoch": 0.91, "learning_rate": 1.0917076864823933e-05, "loss": 0.7485, "step": 12000 }, { "epoch": 0.91, "learning_rate": 1.0879969708443772e-05, "loss": 0.7088, "step": 12050 }, { "epoch": 0.92, "learning_rate": 1.0842105263157896e-05, "loss": 0.7479, "step": 12100 }, { "epoch": 0.92, "learning_rate": 1.0804240817872019e-05, "loss": 0.7571, "step": 12150 }, { "epoch": 0.92, "learning_rate": 1.0766376372586143e-05, "loss": 0.7576, "step": 12200 }, { "epoch": 0.93, "learning_rate": 1.0728511927300267e-05, "loss": 0.7267, "step": 12250 }, { "epoch": 0.93, "learning_rate": 1.069064748201439e-05, "loss": 0.7521, "step": 12300 }, { "epoch": 0.94, "learning_rate": 1.0652783036728513e-05, "loss": 0.7616, "step": 12350 }, { "epoch": 0.94, "learning_rate": 1.0614918591442636e-05, "loss": 0.7232, "step": 12400 }, { "epoch": 0.94, "learning_rate": 1.057705414615676e-05, "loss": 0.797, "step": 12450 }, { "epoch": 0.95, "learning_rate": 1.0539189700870882e-05, "loss": 0.7575, "step": 12500 }, { "epoch": 0.95, "learning_rate": 1.0501325255585006e-05, "loss": 0.7384, "step": 12550 }, { "epoch": 0.95, "learning_rate": 1.046346081029913e-05, "loss": 0.7523, "step": 12600 }, { "epoch": 0.96, "learning_rate": 1.0425596365013253e-05, "loss": 0.7271, "step": 12650 }, { "epoch": 0.96, "learning_rate": 1.0387731919727377e-05, "loss": 0.7746, "step": 12700 }, { "epoch": 0.97, "learning_rate": 1.03498674744415e-05, "loss": 0.7463, "step": 12750 }, { "epoch": 0.97, "learning_rate": 1.0312003029155623e-05, "loss": 0.7234, "step": 12800 }, { "epoch": 0.97, "learning_rate": 1.0274138583869746e-05, "loss": 0.7462, "step": 12850 }, { "epoch": 0.98, "learning_rate": 1.023627413858387e-05, "loss": 0.7391, "step": 12900 }, { "epoch": 0.98, "learning_rate": 1.0198409693297995e-05, "loss": 0.7761, "step": 12950 }, { "epoch": 0.98, "learning_rate": 1.0160545248012116e-05, "loss": 0.7849, "step": 13000 }, { "epoch": 0.99, "learning_rate": 1.0122680802726242e-05, "loss": 0.7391, "step": 13050 }, { "epoch": 0.99, "learning_rate": 1.0084816357440364e-05, "loss": 0.7203, "step": 13100 }, { "epoch": 1.0, "learning_rate": 1.0046951912154488e-05, "loss": 0.7439, "step": 13150 }, { "epoch": 1.0, "learning_rate": 1.0009087466868612e-05, "loss": 0.7551, "step": 13200 }, { "epoch": 1.0, "learning_rate": 9.971223021582735e-06, "loss": 0.6935, "step": 13250 }, { "epoch": 1.01, "learning_rate": 9.933358576296857e-06, "loss": 0.7541, "step": 13300 }, { "epoch": 1.01, "learning_rate": 9.895494131010981e-06, "loss": 0.7828, "step": 13350 }, { "epoch": 1.01, "learning_rate": 9.857629685725105e-06, "loss": 0.7204, "step": 13400 }, { "epoch": 1.02, "learning_rate": 9.819765240439228e-06, "loss": 0.7374, "step": 13450 }, { "epoch": 1.02, "learning_rate": 9.781900795153352e-06, "loss": 0.7477, "step": 13500 }, { "epoch": 1.03, "learning_rate": 9.744036349867474e-06, "loss": 0.7516, "step": 13550 }, { "epoch": 1.03, "learning_rate": 9.706171904581598e-06, "loss": 0.727, "step": 13600 }, { "epoch": 1.03, "learning_rate": 9.668307459295722e-06, "loss": 0.7343, "step": 13650 }, { "epoch": 1.04, "learning_rate": 9.630443014009846e-06, "loss": 0.7739, "step": 13700 }, { "epoch": 1.04, "learning_rate": 9.592578568723969e-06, "loss": 0.7447, "step": 13750 }, { "epoch": 1.04, "learning_rate": 9.554714123438093e-06, "loss": 0.7607, "step": 13800 }, { "epoch": 1.05, "learning_rate": 9.516849678152215e-06, "loss": 0.79, "step": 13850 }, { "epoch": 1.05, "learning_rate": 9.47898523286634e-06, "loss": 0.7209, "step": 13900 }, { "epoch": 1.06, "learning_rate": 9.441120787580462e-06, "loss": 0.7745, "step": 13950 }, { "epoch": 1.06, "learning_rate": 9.403256342294586e-06, "loss": 0.7179, "step": 14000 }, { "epoch": 1.06, "learning_rate": 9.36539189700871e-06, "loss": 0.774, "step": 14050 }, { "epoch": 1.07, "learning_rate": 9.327527451722832e-06, "loss": 0.7592, "step": 14100 }, { "epoch": 1.07, "learning_rate": 9.289663006436956e-06, "loss": 0.7598, "step": 14150 }, { "epoch": 1.08, "learning_rate": 9.25179856115108e-06, "loss": 0.7665, "step": 14200 }, { "epoch": 1.08, "learning_rate": 9.213934115865203e-06, "loss": 0.7529, "step": 14250 }, { "epoch": 1.08, "learning_rate": 9.176069670579327e-06, "loss": 0.6873, "step": 14300 }, { "epoch": 1.09, "learning_rate": 9.138205225293451e-06, "loss": 0.7574, "step": 14350 }, { "epoch": 1.09, "learning_rate": 9.100340780007573e-06, "loss": 0.7215, "step": 14400 }, { "epoch": 1.09, "learning_rate": 9.062476334721697e-06, "loss": 0.7411, "step": 14450 }, { "epoch": 1.1, "learning_rate": 9.02461188943582e-06, "loss": 0.7627, "step": 14500 }, { "epoch": 1.1, "learning_rate": 8.986747444149944e-06, "loss": 0.7313, "step": 14550 }, { "epoch": 1.11, "learning_rate": 8.948882998864066e-06, "loss": 0.765, "step": 14600 }, { "epoch": 1.11, "learning_rate": 8.911018553578192e-06, "loss": 0.7696, "step": 14650 }, { "epoch": 1.11, "learning_rate": 8.873154108292314e-06, "loss": 0.7305, "step": 14700 }, { "epoch": 1.12, "learning_rate": 8.835289663006439e-06, "loss": 0.7653, "step": 14750 }, { "epoch": 1.12, "learning_rate": 8.797425217720561e-06, "loss": 0.7444, "step": 14800 }, { "epoch": 1.12, "learning_rate": 8.759560772434685e-06, "loss": 0.7858, "step": 14850 }, { "epoch": 1.13, "learning_rate": 8.721696327148807e-06, "loss": 0.7634, "step": 14900 }, { "epoch": 1.13, "learning_rate": 8.683831881862931e-06, "loss": 0.7471, "step": 14950 }, { "epoch": 1.14, "learning_rate": 8.645967436577056e-06, "loss": 0.7065, "step": 15000 }, { "epoch": 1.14, "learning_rate": 8.608102991291178e-06, "loss": 0.7659, "step": 15050 }, { "epoch": 1.14, "learning_rate": 8.570238546005302e-06, "loss": 0.7085, "step": 15100 }, { "epoch": 1.15, "learning_rate": 8.532374100719424e-06, "loss": 0.7312, "step": 15150 }, { "epoch": 1.15, "learning_rate": 8.494509655433548e-06, "loss": 0.814, "step": 15200 }, { "epoch": 1.15, "learning_rate": 8.45664521014767e-06, "loss": 0.7194, "step": 15250 }, { "epoch": 1.16, "learning_rate": 8.418780764861797e-06, "loss": 0.7266, "step": 15300 }, { "epoch": 1.16, "learning_rate": 8.380916319575919e-06, "loss": 0.7345, "step": 15350 }, { "epoch": 1.17, "learning_rate": 8.343051874290043e-06, "loss": 0.7121, "step": 15400 }, { "epoch": 1.17, "learning_rate": 8.305187429004165e-06, "loss": 0.7716, "step": 15450 }, { "epoch": 1.17, "learning_rate": 8.26732298371829e-06, "loss": 0.7601, "step": 15500 }, { "epoch": 1.18, "learning_rate": 8.229458538432412e-06, "loss": 0.736, "step": 15550 }, { "epoch": 1.18, "learning_rate": 8.191594093146536e-06, "loss": 0.7659, "step": 15600 }, { "epoch": 1.19, "learning_rate": 8.15372964786066e-06, "loss": 0.7504, "step": 15650 }, { "epoch": 1.19, "learning_rate": 8.115865202574782e-06, "loss": 0.7367, "step": 15700 }, { "epoch": 1.19, "learning_rate": 8.078000757288907e-06, "loss": 0.739, "step": 15750 }, { "epoch": 1.2, "learning_rate": 8.040136312003029e-06, "loss": 0.7421, "step": 15800 }, { "epoch": 1.2, "learning_rate": 8.002271866717153e-06, "loss": 0.7181, "step": 15850 }, { "epoch": 1.2, "learning_rate": 7.964407421431277e-06, "loss": 0.7699, "step": 15900 }, { "epoch": 1.21, "learning_rate": 7.926542976145401e-06, "loss": 0.7725, "step": 15950 }, { "epoch": 1.21, "learning_rate": 7.888678530859524e-06, "loss": 0.7658, "step": 16000 }, { "epoch": 1.22, "learning_rate": 7.850814085573648e-06, "loss": 0.6961, "step": 16050 }, { "epoch": 1.22, "learning_rate": 7.813706929193487e-06, "loss": 0.7598, "step": 16100 }, { "epoch": 1.22, "learning_rate": 7.775842483907611e-06, "loss": 0.7684, "step": 16150 }, { "epoch": 1.23, "learning_rate": 7.737978038621735e-06, "loss": 0.7258, "step": 16200 }, { "epoch": 1.23, "learning_rate": 7.700113593335858e-06, "loss": 0.7602, "step": 16250 }, { "epoch": 1.23, "learning_rate": 7.662249148049982e-06, "loss": 0.775, "step": 16300 }, { "epoch": 1.24, "learning_rate": 7.624384702764105e-06, "loss": 0.7311, "step": 16350 }, { "epoch": 1.24, "learning_rate": 7.586520257478228e-06, "loss": 0.7252, "step": 16400 }, { "epoch": 1.25, "learning_rate": 7.548655812192352e-06, "loss": 0.7078, "step": 16450 }, { "epoch": 1.25, "learning_rate": 7.5107913669064756e-06, "loss": 0.7607, "step": 16500 }, { "epoch": 1.25, "learning_rate": 7.472926921620599e-06, "loss": 0.7462, "step": 16550 }, { "epoch": 1.26, "learning_rate": 7.435062476334722e-06, "loss": 0.7574, "step": 16600 }, { "epoch": 1.26, "learning_rate": 7.397198031048845e-06, "loss": 0.7223, "step": 16650 }, { "epoch": 1.26, "learning_rate": 7.3593335857629685e-06, "loss": 0.7306, "step": 16700 }, { "epoch": 1.27, "learning_rate": 7.321469140477092e-06, "loss": 0.7638, "step": 16750 }, { "epoch": 1.27, "learning_rate": 7.283604695191217e-06, "loss": 0.7236, "step": 16800 }, { "epoch": 1.28, "learning_rate": 7.24574024990534e-06, "loss": 0.7381, "step": 16850 }, { "epoch": 1.28, "learning_rate": 7.207875804619463e-06, "loss": 0.7257, "step": 16900 }, { "epoch": 1.28, "learning_rate": 7.170011359333586e-06, "loss": 0.7516, "step": 16950 }, { "epoch": 1.29, "learning_rate": 7.132904202953427e-06, "loss": 0.7189, "step": 17000 }, { "epoch": 1.29, "learning_rate": 7.095039757667551e-06, "loss": 0.7672, "step": 17050 }, { "epoch": 1.29, "learning_rate": 7.057175312381674e-06, "loss": 0.7289, "step": 17100 }, { "epoch": 1.3, "learning_rate": 7.019310867095797e-06, "loss": 0.7194, "step": 17150 }, { "epoch": 1.3, "learning_rate": 6.9814464218099205e-06, "loss": 0.6933, "step": 17200 }, { "epoch": 1.31, "learning_rate": 6.943581976524045e-06, "loss": 0.8152, "step": 17250 }, { "epoch": 1.31, "learning_rate": 6.905717531238168e-06, "loss": 0.76, "step": 17300 }, { "epoch": 1.31, "learning_rate": 6.867853085952292e-06, "loss": 0.7271, "step": 17350 }, { "epoch": 1.32, "learning_rate": 6.829988640666415e-06, "loss": 0.7494, "step": 17400 }, { "epoch": 1.32, "learning_rate": 6.792124195380538e-06, "loss": 0.7738, "step": 17450 }, { "epoch": 1.33, "learning_rate": 6.754259750094662e-06, "loss": 0.7071, "step": 17500 }, { "epoch": 1.33, "learning_rate": 6.716395304808785e-06, "loss": 0.7339, "step": 17550 }, { "epoch": 1.33, "learning_rate": 6.678530859522908e-06, "loss": 0.7506, "step": 17600 }, { "epoch": 1.34, "learning_rate": 6.640666414237032e-06, "loss": 0.7121, "step": 17650 }, { "epoch": 1.34, "learning_rate": 6.602801968951155e-06, "loss": 0.7018, "step": 17700 }, { "epoch": 1.34, "learning_rate": 6.564937523665279e-06, "loss": 0.7677, "step": 17750 }, { "epoch": 1.35, "learning_rate": 6.527073078379403e-06, "loss": 0.6925, "step": 17800 }, { "epoch": 1.35, "learning_rate": 6.489208633093526e-06, "loss": 0.7205, "step": 17850 }, { "epoch": 1.36, "learning_rate": 6.451344187807649e-06, "loss": 0.7163, "step": 17900 }, { "epoch": 1.36, "learning_rate": 6.413479742521772e-06, "loss": 0.752, "step": 17950 }, { "epoch": 1.36, "learning_rate": 6.3756152972358965e-06, "loss": 0.7706, "step": 18000 }, { "epoch": 1.37, "learning_rate": 6.33775085195002e-06, "loss": 0.7556, "step": 18050 }, { "epoch": 1.37, "learning_rate": 6.299886406664143e-06, "loss": 0.7626, "step": 18100 }, { "epoch": 1.37, "learning_rate": 6.262021961378266e-06, "loss": 0.7447, "step": 18150 }, { "epoch": 1.38, "learning_rate": 6.224157516092389e-06, "loss": 0.741, "step": 18200 }, { "epoch": 1.38, "learning_rate": 6.186293070806513e-06, "loss": 0.7567, "step": 18250 }, { "epoch": 1.39, "learning_rate": 6.1484286255206375e-06, "loss": 0.7364, "step": 18300 }, { "epoch": 1.39, "learning_rate": 6.110564180234761e-06, "loss": 0.739, "step": 18350 }, { "epoch": 1.39, "learning_rate": 6.072699734948884e-06, "loss": 0.7309, "step": 18400 }, { "epoch": 1.4, "learning_rate": 6.034835289663007e-06, "loss": 0.7385, "step": 18450 }, { "epoch": 1.4, "learning_rate": 5.9969708443771305e-06, "loss": 0.7588, "step": 18500 }, { "epoch": 1.4, "learning_rate": 5.959106399091254e-06, "loss": 0.7442, "step": 18550 }, { "epoch": 1.41, "learning_rate": 5.921241953805377e-06, "loss": 0.7871, "step": 18600 }, { "epoch": 1.41, "learning_rate": 5.883377508519501e-06, "loss": 0.7817, "step": 18650 }, { "epoch": 1.42, "learning_rate": 5.845513063233624e-06, "loss": 0.751, "step": 18700 }, { "epoch": 1.42, "learning_rate": 5.8076486179477474e-06, "loss": 0.7379, "step": 18750 }, { "epoch": 1.42, "learning_rate": 5.769784172661871e-06, "loss": 0.7151, "step": 18800 }, { "epoch": 1.43, "learning_rate": 5.731919727375994e-06, "loss": 0.7989, "step": 18850 }, { "epoch": 1.43, "learning_rate": 5.694055282090117e-06, "loss": 0.752, "step": 18900 }, { "epoch": 1.43, "learning_rate": 5.656190836804242e-06, "loss": 0.755, "step": 18950 }, { "epoch": 1.44, "learning_rate": 5.618326391518365e-06, "loss": 0.7355, "step": 19000 }, { "epoch": 1.44, "learning_rate": 5.5804619462324885e-06, "loss": 0.668, "step": 19050 }, { "epoch": 1.45, "learning_rate": 5.542597500946612e-06, "loss": 0.763, "step": 19100 }, { "epoch": 1.45, "learning_rate": 5.504733055660735e-06, "loss": 0.7369, "step": 19150 }, { "epoch": 1.45, "learning_rate": 5.466868610374858e-06, "loss": 0.7389, "step": 19200 }, { "epoch": 1.46, "learning_rate": 5.429004165088982e-06, "loss": 0.7435, "step": 19250 }, { "epoch": 1.46, "learning_rate": 5.3911397198031055e-06, "loss": 0.7103, "step": 19300 }, { "epoch": 1.47, "learning_rate": 5.353275274517229e-06, "loss": 0.7, "step": 19350 }, { "epoch": 1.47, "learning_rate": 5.315410829231352e-06, "loss": 0.7269, "step": 19400 }, { "epoch": 1.47, "learning_rate": 5.277546383945475e-06, "loss": 0.713, "step": 19450 }, { "epoch": 1.48, "learning_rate": 5.2396819386595984e-06, "loss": 0.7442, "step": 19500 }, { "epoch": 1.48, "learning_rate": 5.201817493373722e-06, "loss": 0.731, "step": 19550 }, { "epoch": 1.48, "learning_rate": 5.163953048087847e-06, "loss": 0.7389, "step": 19600 }, { "epoch": 1.49, "learning_rate": 5.12608860280197e-06, "loss": 0.7348, "step": 19650 }, { "epoch": 1.49, "learning_rate": 5.088224157516093e-06, "loss": 0.7607, "step": 19700 }, { "epoch": 1.5, "learning_rate": 5.050359712230216e-06, "loss": 0.7151, "step": 19750 }, { "epoch": 1.5, "learning_rate": 5.0124952669443395e-06, "loss": 0.7002, "step": 19800 }, { "epoch": 1.5, "learning_rate": 4.974630821658464e-06, "loss": 0.7734, "step": 19850 }, { "epoch": 1.51, "learning_rate": 4.936766376372587e-06, "loss": 0.7506, "step": 19900 }, { "epoch": 1.51, "learning_rate": 4.89890193108671e-06, "loss": 0.7369, "step": 19950 }, { "epoch": 1.51, "learning_rate": 4.861037485800833e-06, "loss": 0.7118, "step": 20000 }, { "epoch": 1.52, "learning_rate": 4.8231730405149565e-06, "loss": 0.7152, "step": 20050 }, { "epoch": 1.52, "learning_rate": 4.78530859522908e-06, "loss": 0.7366, "step": 20100 }, { "epoch": 1.53, "learning_rate": 4.747444149943204e-06, "loss": 0.7086, "step": 20150 }, { "epoch": 1.53, "learning_rate": 4.709579704657327e-06, "loss": 0.7306, "step": 20200 }, { "epoch": 1.53, "learning_rate": 4.67171525937145e-06, "loss": 0.7266, "step": 20250 }, { "epoch": 1.54, "learning_rate": 4.633850814085574e-06, "loss": 0.7241, "step": 20300 }, { "epoch": 1.54, "learning_rate": 4.596743657705415e-06, "loss": 0.708, "step": 20350 }, { "epoch": 1.54, "learning_rate": 4.558879212419539e-06, "loss": 0.7544, "step": 20400 }, { "epoch": 1.55, "learning_rate": 4.521014767133662e-06, "loss": 0.739, "step": 20450 }, { "epoch": 1.55, "learning_rate": 4.483150321847785e-06, "loss": 0.7492, "step": 20500 }, { "epoch": 1.56, "learning_rate": 4.4452858765619086e-06, "loss": 0.7323, "step": 20550 }, { "epoch": 1.56, "learning_rate": 4.407421431276032e-06, "loss": 0.7431, "step": 20600 }, { "epoch": 1.56, "learning_rate": 4.369556985990155e-06, "loss": 0.7302, "step": 20650 }, { "epoch": 1.57, "learning_rate": 4.331692540704279e-06, "loss": 0.7112, "step": 20700 }, { "epoch": 1.57, "learning_rate": 4.293828095418402e-06, "loss": 0.7359, "step": 20750 }, { "epoch": 1.58, "learning_rate": 4.2559636501325256e-06, "loss": 0.7274, "step": 20800 }, { "epoch": 1.58, "learning_rate": 4.21809920484665e-06, "loss": 0.7307, "step": 20850 }, { "epoch": 1.58, "learning_rate": 4.180234759560773e-06, "loss": 0.7339, "step": 20900 }, { "epoch": 1.59, "learning_rate": 4.142370314274896e-06, "loss": 0.7743, "step": 20950 }, { "epoch": 1.59, "learning_rate": 4.104505868989019e-06, "loss": 0.7424, "step": 21000 }, { "epoch": 1.59, "learning_rate": 4.066641423703143e-06, "loss": 0.7152, "step": 21050 }, { "epoch": 1.6, "learning_rate": 4.028776978417267e-06, "loss": 0.707, "step": 21100 }, { "epoch": 1.6, "learning_rate": 3.99091253313139e-06, "loss": 0.7538, "step": 21150 }, { "epoch": 1.61, "learning_rate": 3.953048087845514e-06, "loss": 0.7293, "step": 21200 }, { "epoch": 1.61, "learning_rate": 3.915183642559637e-06, "loss": 0.7524, "step": 21250 }, { "epoch": 1.61, "learning_rate": 3.87731919727376e-06, "loss": 0.7938, "step": 21300 }, { "epoch": 1.62, "learning_rate": 3.839454751987884e-06, "loss": 0.7266, "step": 21350 }, { "epoch": 1.62, "learning_rate": 3.8015903067020073e-06, "loss": 0.7566, "step": 21400 }, { "epoch": 1.62, "learning_rate": 3.7637258614161305e-06, "loss": 0.713, "step": 21450 }, { "epoch": 1.63, "learning_rate": 3.725861416130254e-06, "loss": 0.7546, "step": 21500 }, { "epoch": 1.63, "learning_rate": 3.6879969708443774e-06, "loss": 0.6955, "step": 21550 }, { "epoch": 1.64, "learning_rate": 3.6501325255585006e-06, "loss": 0.7084, "step": 21600 }, { "epoch": 1.64, "learning_rate": 3.6122680802726247e-06, "loss": 0.7447, "step": 21650 }, { "epoch": 1.64, "learning_rate": 3.574403634986748e-06, "loss": 0.7173, "step": 21700 }, { "epoch": 1.65, "learning_rate": 3.536539189700871e-06, "loss": 0.7324, "step": 21750 }, { "epoch": 1.65, "learning_rate": 3.498674744414995e-06, "loss": 0.7561, "step": 21800 }, { "epoch": 1.65, "learning_rate": 3.460810299129118e-06, "loss": 0.7505, "step": 21850 }, { "epoch": 1.66, "learning_rate": 3.4229458538432413e-06, "loss": 0.7337, "step": 21900 }, { "epoch": 1.66, "learning_rate": 3.3850814085573645e-06, "loss": 0.7753, "step": 21950 }, { "epoch": 1.67, "learning_rate": 3.3472169632714886e-06, "loss": 0.7703, "step": 22000 }, { "epoch": 1.67, "learning_rate": 3.309352517985612e-06, "loss": 0.7384, "step": 22050 }, { "epoch": 1.67, "learning_rate": 3.271488072699735e-06, "loss": 0.7759, "step": 22100 }, { "epoch": 1.68, "learning_rate": 3.2336236274138587e-06, "loss": 0.7446, "step": 22150 }, { "epoch": 1.68, "learning_rate": 3.195759182127982e-06, "loss": 0.7437, "step": 22200 }, { "epoch": 1.68, "learning_rate": 3.157894736842105e-06, "loss": 0.7817, "step": 22250 }, { "epoch": 1.69, "learning_rate": 3.1200302915562292e-06, "loss": 0.7396, "step": 22300 }, { "epoch": 1.69, "learning_rate": 3.0821658462703525e-06, "loss": 0.743, "step": 22350 }, { "epoch": 1.7, "learning_rate": 3.0443014009844757e-06, "loss": 0.7597, "step": 22400 }, { "epoch": 1.7, "learning_rate": 3.0064369556985994e-06, "loss": 0.6844, "step": 22450 }, { "epoch": 1.7, "learning_rate": 2.9685725104127226e-06, "loss": 0.7737, "step": 22500 }, { "epoch": 1.71, "learning_rate": 2.930708065126846e-06, "loss": 0.7024, "step": 22550 }, { "epoch": 1.71, "learning_rate": 2.89284361984097e-06, "loss": 0.7136, "step": 22600 }, { "epoch": 1.72, "learning_rate": 2.854979174555093e-06, "loss": 0.7482, "step": 22650 }, { "epoch": 1.72, "learning_rate": 2.8171147292692164e-06, "loss": 0.7608, "step": 22700 }, { "epoch": 1.72, "learning_rate": 2.7792502839833396e-06, "loss": 0.7442, "step": 22750 }, { "epoch": 1.73, "learning_rate": 2.7413858386974632e-06, "loss": 0.6687, "step": 22800 }, { "epoch": 1.73, "learning_rate": 2.703521393411587e-06, "loss": 0.7594, "step": 22850 }, { "epoch": 1.73, "learning_rate": 2.66565694812571e-06, "loss": 0.7042, "step": 22900 }, { "epoch": 1.74, "learning_rate": 2.6277925028398338e-06, "loss": 0.7556, "step": 22950 }, { "epoch": 1.74, "learning_rate": 2.589928057553957e-06, "loss": 0.7357, "step": 23000 }, { "epoch": 1.75, "learning_rate": 2.5520636122680802e-06, "loss": 0.7292, "step": 23050 }, { "epoch": 1.75, "learning_rate": 2.5141991669822043e-06, "loss": 0.6899, "step": 23100 }, { "epoch": 1.75, "learning_rate": 2.4763347216963275e-06, "loss": 0.729, "step": 23150 }, { "epoch": 1.76, "learning_rate": 2.4384702764104508e-06, "loss": 0.7473, "step": 23200 }, { "epoch": 1.76, "learning_rate": 2.400605831124574e-06, "loss": 0.7264, "step": 23250 }, { "epoch": 1.76, "learning_rate": 2.3627413858386977e-06, "loss": 0.7314, "step": 23300 }, { "epoch": 1.77, "learning_rate": 2.3256342294585385e-06, "loss": 0.6819, "step": 23350 }, { "epoch": 1.77, "learning_rate": 2.287769784172662e-06, "loss": 0.7637, "step": 23400 }, { "epoch": 1.78, "learning_rate": 2.2499053388867854e-06, "loss": 0.727, "step": 23450 }, { "epoch": 1.78, "learning_rate": 2.2120408936009086e-06, "loss": 0.7044, "step": 23500 }, { "epoch": 1.78, "learning_rate": 2.1741764483150323e-06, "loss": 0.6926, "step": 23550 }, { "epoch": 1.79, "learning_rate": 2.136312003029156e-06, "loss": 0.7406, "step": 23600 }, { "epoch": 1.79, "learning_rate": 2.098447557743279e-06, "loss": 0.7572, "step": 23650 }, { "epoch": 1.79, "learning_rate": 2.060583112457403e-06, "loss": 0.7239, "step": 23700 }, { "epoch": 1.8, "learning_rate": 2.022718667171526e-06, "loss": 0.7353, "step": 23750 }, { "epoch": 1.8, "learning_rate": 1.9848542218856497e-06, "loss": 0.7287, "step": 23800 }, { "epoch": 1.81, "learning_rate": 1.946989776599773e-06, "loss": 0.7461, "step": 23850 }, { "epoch": 1.81, "learning_rate": 1.909125331313896e-06, "loss": 0.7489, "step": 23900 }, { "epoch": 1.81, "learning_rate": 1.8712608860280198e-06, "loss": 0.7156, "step": 23950 }, { "epoch": 1.82, "learning_rate": 1.8333964407421435e-06, "loss": 0.6765, "step": 24000 }, { "epoch": 1.82, "learning_rate": 1.7955319954562667e-06, "loss": 0.7496, "step": 24050 }, { "epoch": 1.82, "learning_rate": 1.7576675501703901e-06, "loss": 0.7269, "step": 24100 }, { "epoch": 1.83, "learning_rate": 1.7198031048845134e-06, "loss": 0.728, "step": 24150 }, { "epoch": 1.83, "learning_rate": 1.681938659598637e-06, "loss": 0.7914, "step": 24200 }, { "epoch": 1.84, "learning_rate": 1.6440742143127605e-06, "loss": 0.7419, "step": 24250 }, { "epoch": 1.84, "learning_rate": 1.6062097690268837e-06, "loss": 0.7509, "step": 24300 }, { "epoch": 1.84, "learning_rate": 1.5683453237410074e-06, "loss": 0.7372, "step": 24350 }, { "epoch": 1.85, "learning_rate": 1.5304808784551308e-06, "loss": 0.7159, "step": 24400 }, { "epoch": 1.85, "learning_rate": 1.4926164331692542e-06, "loss": 0.7272, "step": 24450 }, { "epoch": 1.86, "learning_rate": 1.4547519878833777e-06, "loss": 0.735, "step": 24500 }, { "epoch": 1.86, "learning_rate": 1.416887542597501e-06, "loss": 0.715, "step": 24550 }, { "epoch": 1.86, "learning_rate": 1.3790230973116246e-06, "loss": 0.7507, "step": 24600 }, { "epoch": 1.87, "learning_rate": 1.341158652025748e-06, "loss": 0.7723, "step": 24650 }, { "epoch": 1.87, "learning_rate": 1.3032942067398712e-06, "loss": 0.7658, "step": 24700 }, { "epoch": 1.87, "learning_rate": 1.2654297614539949e-06, "loss": 0.7727, "step": 24750 }, { "epoch": 1.88, "learning_rate": 1.2275653161681181e-06, "loss": 0.748, "step": 24800 }, { "epoch": 1.88, "learning_rate": 1.1897008708822416e-06, "loss": 0.7135, "step": 24850 }, { "epoch": 1.89, "learning_rate": 1.1518364255963652e-06, "loss": 0.76, "step": 24900 }, { "epoch": 1.89, "learning_rate": 1.1139719803104887e-06, "loss": 0.7439, "step": 24950 }, { "epoch": 1.89, "learning_rate": 1.0761075350246119e-06, "loss": 0.7496, "step": 25000 }, { "epoch": 1.9, "learning_rate": 1.0382430897387353e-06, "loss": 0.7631, "step": 25050 }, { "epoch": 1.9, "learning_rate": 1.000378644452859e-06, "loss": 0.7033, "step": 25100 }, { "epoch": 1.9, "learning_rate": 9.625141991669822e-07, "loss": 0.717, "step": 25150 }, { "epoch": 1.91, "learning_rate": 9.246497538811057e-07, "loss": 0.7329, "step": 25200 }, { "epoch": 1.91, "learning_rate": 8.867853085952291e-07, "loss": 0.7065, "step": 25250 }, { "epoch": 1.92, "learning_rate": 8.489208633093526e-07, "loss": 0.702, "step": 25300 }, { "epoch": 1.92, "learning_rate": 8.110564180234761e-07, "loss": 0.7161, "step": 25350 }, { "epoch": 1.92, "learning_rate": 7.731919727375994e-07, "loss": 0.725, "step": 25400 }, { "epoch": 1.93, "learning_rate": 7.353275274517229e-07, "loss": 0.7256, "step": 25450 }, { "epoch": 1.93, "learning_rate": 6.974630821658464e-07, "loss": 0.7395, "step": 25500 }, { "epoch": 1.93, "learning_rate": 6.595986368799697e-07, "loss": 0.794, "step": 25550 }, { "epoch": 1.94, "learning_rate": 6.217341915940932e-07, "loss": 0.7349, "step": 25600 }, { "epoch": 1.94, "learning_rate": 5.838697463082166e-07, "loss": 0.7394, "step": 25650 }, { "epoch": 1.95, "learning_rate": 5.460053010223401e-07, "loss": 0.7345, "step": 25700 }, { "epoch": 1.95, "learning_rate": 5.081408557364635e-07, "loss": 0.6888, "step": 25750 }, { "epoch": 1.95, "learning_rate": 4.702764104505869e-07, "loss": 0.7347, "step": 25800 }, { "epoch": 1.96, "learning_rate": 4.324119651647104e-07, "loss": 0.7246, "step": 25850 }, { "epoch": 1.96, "learning_rate": 3.945475198788338e-07, "loss": 0.7438, "step": 25900 }, { "epoch": 1.97, "learning_rate": 3.5668307459295723e-07, "loss": 0.7108, "step": 25950 }, { "epoch": 1.97, "learning_rate": 3.1881862930708067e-07, "loss": 0.7208, "step": 26000 }, { "epoch": 1.97, "learning_rate": 2.809541840212041e-07, "loss": 0.7633, "step": 26050 }, { "epoch": 1.98, "learning_rate": 2.4308973873532756e-07, "loss": 0.7707, "step": 26100 }, { "epoch": 1.98, "learning_rate": 2.05225293449451e-07, "loss": 0.7524, "step": 26150 }, { "epoch": 1.98, "learning_rate": 1.6736084816357444e-07, "loss": 0.707, "step": 26200 }, { "epoch": 1.99, "learning_rate": 1.2949640287769786e-07, "loss": 0.7751, "step": 26250 }, { "epoch": 1.99, "learning_rate": 9.163195759182128e-08, "loss": 0.7672, "step": 26300 }, { "epoch": 2.0, "learning_rate": 5.376751230594472e-08, "loss": 0.6998, "step": 26350 }, { "epoch": 2.0, "learning_rate": 1.5903067020068158e-08, "loss": 0.7069, "step": 26400 }, { "epoch": 2.0, "step": 26410, "total_flos": 6.728145843607406e+16, "train_loss": 0.7582937260732105, "train_runtime": 12885.6962, "train_samples_per_second": 65.589, "train_steps_per_second": 2.05 } ], "max_steps": 26410, "num_train_epochs": 2, "total_flos": 6.728145843607406e+16, "trial_name": null, "trial_params": null }