codebert-base-mlm-Malicious_URLs / trainer_state.json
DunnBC22's picture
All Dunn!!!
b10d4f1
raw
history blame
65.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.999886413115499,
"global_step": 26410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9999242711094284e-05,
"loss": 1.3163,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.9963650132525562e-05,
"loss": 1.0093,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.99265429761454e-05,
"loss": 0.8939,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.9888678530859524e-05,
"loss": 0.8368,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 1.985081408557365e-05,
"loss": 0.8794,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.9812949640287772e-05,
"loss": 0.843,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 1.9775085195001894e-05,
"loss": 0.8488,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.9737220749716017e-05,
"loss": 0.8465,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 1.9699356304430143e-05,
"loss": 0.8591,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 1.9661491859144265e-05,
"loss": 0.8978,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 1.9623627413858387e-05,
"loss": 0.8035,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 1.9585762968572513e-05,
"loss": 0.8153,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 1.9547898523286635e-05,
"loss": 0.8079,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 1.9510034078000758e-05,
"loss": 0.8092,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 1.947216963271488e-05,
"loss": 0.8294,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 1.9434305187429006e-05,
"loss": 0.8099,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 1.939644074214313e-05,
"loss": 0.85,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 1.935857629685725e-05,
"loss": 0.8509,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 1.9320711851571376e-05,
"loss": 0.8229,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 1.92828474062855e-05,
"loss": 0.8251,
"step": 950
},
{
"epoch": 0.08,
"learning_rate": 1.9244982960999625e-05,
"loss": 0.8116,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 1.9207118515713747e-05,
"loss": 0.8277,
"step": 1050
},
{
"epoch": 0.08,
"learning_rate": 1.916925407042787e-05,
"loss": 0.8004,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 1.9131389625141995e-05,
"loss": 0.8347,
"step": 1150
},
{
"epoch": 0.09,
"learning_rate": 1.9093525179856118e-05,
"loss": 0.8103,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 1.905566073457024e-05,
"loss": 0.8566,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 1.9017796289284362e-05,
"loss": 0.7921,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 1.8979931843998488e-05,
"loss": 0.7959,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 1.894206739871261e-05,
"loss": 0.7827,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 1.8904202953426733e-05,
"loss": 0.785,
"step": 1450
},
{
"epoch": 0.11,
"learning_rate": 1.886633850814086e-05,
"loss": 0.8061,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 1.882847406285498e-05,
"loss": 0.7717,
"step": 1550
},
{
"epoch": 0.12,
"learning_rate": 1.8790609617569103e-05,
"loss": 0.8362,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 1.8752745172283226e-05,
"loss": 0.7975,
"step": 1650
},
{
"epoch": 0.13,
"learning_rate": 1.871488072699735e-05,
"loss": 0.8123,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 1.8677016281711474e-05,
"loss": 0.8526,
"step": 1750
},
{
"epoch": 0.14,
"learning_rate": 1.8639151836425596e-05,
"loss": 0.808,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 1.8601287391139722e-05,
"loss": 0.7989,
"step": 1850
},
{
"epoch": 0.14,
"learning_rate": 1.8563422945853844e-05,
"loss": 0.7755,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 1.8525558500567967e-05,
"loss": 0.8467,
"step": 1950
},
{
"epoch": 0.15,
"learning_rate": 1.8487694055282093e-05,
"loss": 0.8221,
"step": 2000
},
{
"epoch": 0.16,
"learning_rate": 1.8449829609996215e-05,
"loss": 0.7851,
"step": 2050
},
{
"epoch": 0.16,
"learning_rate": 1.841196516471034e-05,
"loss": 0.8159,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 1.837410071942446e-05,
"loss": 0.8027,
"step": 2150
},
{
"epoch": 0.17,
"learning_rate": 1.8336236274138586e-05,
"loss": 0.7585,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 1.8298371828852708e-05,
"loss": 0.8287,
"step": 2250
},
{
"epoch": 0.17,
"learning_rate": 1.8260507383566834e-05,
"loss": 0.8127,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 1.8222642938280956e-05,
"loss": 0.8015,
"step": 2350
},
{
"epoch": 0.18,
"learning_rate": 1.818477849299508e-05,
"loss": 0.8228,
"step": 2400
},
{
"epoch": 0.19,
"learning_rate": 1.8146914047709204e-05,
"loss": 0.7865,
"step": 2450
},
{
"epoch": 0.19,
"learning_rate": 1.8109049602423327e-05,
"loss": 0.8093,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 1.807118515713745e-05,
"loss": 0.7812,
"step": 2550
},
{
"epoch": 0.2,
"learning_rate": 1.803332071185157e-05,
"loss": 0.805,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 1.7995456266565697e-05,
"loss": 0.7844,
"step": 2650
},
{
"epoch": 0.2,
"learning_rate": 1.795759182127982e-05,
"loss": 0.7976,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 1.7919727375993942e-05,
"loss": 0.7683,
"step": 2750
},
{
"epoch": 0.21,
"learning_rate": 1.7881862930708068e-05,
"loss": 0.8157,
"step": 2800
},
{
"epoch": 0.22,
"learning_rate": 1.784399848542219e-05,
"loss": 0.7801,
"step": 2850
},
{
"epoch": 0.22,
"learning_rate": 1.7806134040136312e-05,
"loss": 0.7817,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 1.7768269594850435e-05,
"loss": 0.8286,
"step": 2950
},
{
"epoch": 0.23,
"learning_rate": 1.773040514956456e-05,
"loss": 0.7994,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 1.7692540704278683e-05,
"loss": 0.758,
"step": 3050
},
{
"epoch": 0.23,
"learning_rate": 1.7654676258992805e-05,
"loss": 0.8116,
"step": 3100
},
{
"epoch": 0.24,
"learning_rate": 1.761681181370693e-05,
"loss": 0.7762,
"step": 3150
},
{
"epoch": 0.24,
"learning_rate": 1.7578947368421054e-05,
"loss": 0.7603,
"step": 3200
},
{
"epoch": 0.25,
"learning_rate": 1.7541082923135176e-05,
"loss": 0.825,
"step": 3250
},
{
"epoch": 0.25,
"learning_rate": 1.7503218477849302e-05,
"loss": 0.7649,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 1.7465354032563424e-05,
"loss": 0.7886,
"step": 3350
},
{
"epoch": 0.26,
"learning_rate": 1.742748958727755e-05,
"loss": 0.796,
"step": 3400
},
{
"epoch": 0.26,
"learning_rate": 1.7389625141991672e-05,
"loss": 0.7712,
"step": 3450
},
{
"epoch": 0.27,
"learning_rate": 1.7351760696705795e-05,
"loss": 0.8061,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 1.7313896251419917e-05,
"loss": 0.7691,
"step": 3550
},
{
"epoch": 0.27,
"learning_rate": 1.7276031806134043e-05,
"loss": 0.8038,
"step": 3600
},
{
"epoch": 0.28,
"learning_rate": 1.7238167360848165e-05,
"loss": 0.7679,
"step": 3650
},
{
"epoch": 0.28,
"learning_rate": 1.7200302915562288e-05,
"loss": 0.7909,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 1.7162438470276413e-05,
"loss": 0.8001,
"step": 3750
},
{
"epoch": 0.29,
"learning_rate": 1.7124574024990536e-05,
"loss": 0.7369,
"step": 3800
},
{
"epoch": 0.29,
"learning_rate": 1.7086709579704658e-05,
"loss": 0.7716,
"step": 3850
},
{
"epoch": 0.3,
"learning_rate": 1.704884513441878e-05,
"loss": 0.8003,
"step": 3900
},
{
"epoch": 0.3,
"learning_rate": 1.7010980689132906e-05,
"loss": 0.7973,
"step": 3950
},
{
"epoch": 0.3,
"learning_rate": 1.697311624384703e-05,
"loss": 0.7745,
"step": 4000
},
{
"epoch": 0.31,
"learning_rate": 1.693525179856115e-05,
"loss": 0.7266,
"step": 4050
},
{
"epoch": 0.31,
"learning_rate": 1.6897387353275277e-05,
"loss": 0.7917,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 1.68595229079894e-05,
"loss": 0.7393,
"step": 4150
},
{
"epoch": 0.32,
"learning_rate": 1.682165846270352e-05,
"loss": 0.7822,
"step": 4200
},
{
"epoch": 0.32,
"learning_rate": 1.6783794017417647e-05,
"loss": 0.7919,
"step": 4250
},
{
"epoch": 0.33,
"learning_rate": 1.674592957213177e-05,
"loss": 0.7943,
"step": 4300
},
{
"epoch": 0.33,
"learning_rate": 1.6708065126845892e-05,
"loss": 0.7731,
"step": 4350
},
{
"epoch": 0.33,
"learning_rate": 1.6670200681560014e-05,
"loss": 0.7623,
"step": 4400
},
{
"epoch": 0.34,
"learning_rate": 1.663233623627414e-05,
"loss": 0.7891,
"step": 4450
},
{
"epoch": 0.34,
"learning_rate": 1.6594471790988263e-05,
"loss": 0.7769,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 1.655660734570239e-05,
"loss": 0.7958,
"step": 4550
},
{
"epoch": 0.35,
"learning_rate": 1.651874290041651e-05,
"loss": 0.7555,
"step": 4600
},
{
"epoch": 0.35,
"learning_rate": 1.6480878455130633e-05,
"loss": 0.7993,
"step": 4650
},
{
"epoch": 0.36,
"learning_rate": 1.644301400984476e-05,
"loss": 0.7916,
"step": 4700
},
{
"epoch": 0.36,
"learning_rate": 1.640514956455888e-05,
"loss": 0.8101,
"step": 4750
},
{
"epoch": 0.36,
"learning_rate": 1.6367285119273004e-05,
"loss": 0.8066,
"step": 4800
},
{
"epoch": 0.37,
"learning_rate": 1.6329420673987126e-05,
"loss": 0.761,
"step": 4850
},
{
"epoch": 0.37,
"learning_rate": 1.6291556228701252e-05,
"loss": 0.7919,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 1.6253691783415374e-05,
"loss": 0.7484,
"step": 4950
},
{
"epoch": 0.38,
"learning_rate": 1.6215827338129497e-05,
"loss": 0.809,
"step": 5000
},
{
"epoch": 0.38,
"learning_rate": 1.6177962892843622e-05,
"loss": 0.7683,
"step": 5050
},
{
"epoch": 0.39,
"learning_rate": 1.6140098447557745e-05,
"loss": 0.7622,
"step": 5100
},
{
"epoch": 0.39,
"learning_rate": 1.6102234002271867e-05,
"loss": 0.7618,
"step": 5150
},
{
"epoch": 0.39,
"learning_rate": 1.6064369556985993e-05,
"loss": 0.7687,
"step": 5200
},
{
"epoch": 0.4,
"learning_rate": 1.6026505111700115e-05,
"loss": 0.7598,
"step": 5250
},
{
"epoch": 0.4,
"learning_rate": 1.5988640666414238e-05,
"loss": 0.7245,
"step": 5300
},
{
"epoch": 0.41,
"learning_rate": 1.595077622112836e-05,
"loss": 0.7751,
"step": 5350
},
{
"epoch": 0.41,
"learning_rate": 1.5912911775842486e-05,
"loss": 0.7903,
"step": 5400
},
{
"epoch": 0.41,
"learning_rate": 1.5875047330556608e-05,
"loss": 0.7399,
"step": 5450
},
{
"epoch": 0.42,
"learning_rate": 1.583718288527073e-05,
"loss": 0.7529,
"step": 5500
},
{
"epoch": 0.42,
"learning_rate": 1.5799318439984856e-05,
"loss": 0.7777,
"step": 5550
},
{
"epoch": 0.42,
"learning_rate": 1.576145399469898e-05,
"loss": 0.7411,
"step": 5600
},
{
"epoch": 0.43,
"learning_rate": 1.5723589549413105e-05,
"loss": 0.7805,
"step": 5650
},
{
"epoch": 0.43,
"learning_rate": 1.5685725104127224e-05,
"loss": 0.7585,
"step": 5700
},
{
"epoch": 0.44,
"learning_rate": 1.564786065884135e-05,
"loss": 0.7908,
"step": 5750
},
{
"epoch": 0.44,
"learning_rate": 1.560999621355547e-05,
"loss": 0.7302,
"step": 5800
},
{
"epoch": 0.44,
"learning_rate": 1.5572131768269597e-05,
"loss": 0.7492,
"step": 5850
},
{
"epoch": 0.45,
"learning_rate": 1.553426732298372e-05,
"loss": 0.8124,
"step": 5900
},
{
"epoch": 0.45,
"learning_rate": 1.5496402877697842e-05,
"loss": 0.7609,
"step": 5950
},
{
"epoch": 0.45,
"learning_rate": 1.5458538432411968e-05,
"loss": 0.7383,
"step": 6000
},
{
"epoch": 0.46,
"learning_rate": 1.542067398712609e-05,
"loss": 0.7585,
"step": 6050
},
{
"epoch": 0.46,
"learning_rate": 1.538356683074593e-05,
"loss": 0.7852,
"step": 6100
},
{
"epoch": 0.47,
"learning_rate": 1.5345702385460056e-05,
"loss": 0.7737,
"step": 6150
},
{
"epoch": 0.47,
"learning_rate": 1.5307837940174178e-05,
"loss": 0.7881,
"step": 6200
},
{
"epoch": 0.47,
"learning_rate": 1.52699734948883e-05,
"loss": 0.7716,
"step": 6250
},
{
"epoch": 0.48,
"learning_rate": 1.5232109049602425e-05,
"loss": 0.7478,
"step": 6300
},
{
"epoch": 0.48,
"learning_rate": 1.5194244604316549e-05,
"loss": 0.7681,
"step": 6350
},
{
"epoch": 0.48,
"learning_rate": 1.5156380159030673e-05,
"loss": 0.7703,
"step": 6400
},
{
"epoch": 0.49,
"learning_rate": 1.5118515713744795e-05,
"loss": 0.8031,
"step": 6450
},
{
"epoch": 0.49,
"learning_rate": 1.508065126845892e-05,
"loss": 0.7812,
"step": 6500
},
{
"epoch": 0.5,
"learning_rate": 1.5042786823173042e-05,
"loss": 0.8151,
"step": 6550
},
{
"epoch": 0.5,
"learning_rate": 1.5004922377887166e-05,
"loss": 0.8384,
"step": 6600
},
{
"epoch": 0.5,
"learning_rate": 1.4967057932601288e-05,
"loss": 0.7861,
"step": 6650
},
{
"epoch": 0.51,
"learning_rate": 1.4929193487315412e-05,
"loss": 0.7473,
"step": 6700
},
{
"epoch": 0.51,
"learning_rate": 1.4891329042029536e-05,
"loss": 0.7741,
"step": 6750
},
{
"epoch": 0.51,
"learning_rate": 1.4853464596743659e-05,
"loss": 0.799,
"step": 6800
},
{
"epoch": 0.52,
"learning_rate": 1.4815600151457783e-05,
"loss": 0.7929,
"step": 6850
},
{
"epoch": 0.52,
"learning_rate": 1.4777735706171905e-05,
"loss": 0.7594,
"step": 6900
},
{
"epoch": 0.53,
"learning_rate": 1.4739871260886029e-05,
"loss": 0.736,
"step": 6950
},
{
"epoch": 0.53,
"learning_rate": 1.4702006815600151e-05,
"loss": 0.7608,
"step": 7000
},
{
"epoch": 0.53,
"learning_rate": 1.4664142370314276e-05,
"loss": 0.7927,
"step": 7050
},
{
"epoch": 0.54,
"learning_rate": 1.46262779250284e-05,
"loss": 0.7753,
"step": 7100
},
{
"epoch": 0.54,
"learning_rate": 1.4588413479742522e-05,
"loss": 0.7476,
"step": 7150
},
{
"epoch": 0.55,
"learning_rate": 1.4550549034456646e-05,
"loss": 0.7907,
"step": 7200
},
{
"epoch": 0.55,
"learning_rate": 1.4512684589170768e-05,
"loss": 0.7874,
"step": 7250
},
{
"epoch": 0.55,
"learning_rate": 1.4474820143884894e-05,
"loss": 0.7626,
"step": 7300
},
{
"epoch": 0.56,
"learning_rate": 1.4436955698599018e-05,
"loss": 0.7527,
"step": 7350
},
{
"epoch": 0.56,
"learning_rate": 1.439909125331314e-05,
"loss": 0.7451,
"step": 7400
},
{
"epoch": 0.56,
"learning_rate": 1.4361226808027265e-05,
"loss": 0.7615,
"step": 7450
},
{
"epoch": 0.57,
"learning_rate": 1.4323362362741387e-05,
"loss": 0.7365,
"step": 7500
},
{
"epoch": 0.57,
"learning_rate": 1.4285497917455511e-05,
"loss": 0.7627,
"step": 7550
},
{
"epoch": 0.58,
"learning_rate": 1.4247633472169634e-05,
"loss": 0.7769,
"step": 7600
},
{
"epoch": 0.58,
"learning_rate": 1.4209769026883758e-05,
"loss": 0.7253,
"step": 7650
},
{
"epoch": 0.58,
"learning_rate": 1.4171904581597882e-05,
"loss": 0.7422,
"step": 7700
},
{
"epoch": 0.59,
"learning_rate": 1.4134040136312004e-05,
"loss": 0.7459,
"step": 7750
},
{
"epoch": 0.59,
"learning_rate": 1.4096175691026128e-05,
"loss": 0.7355,
"step": 7800
},
{
"epoch": 0.59,
"learning_rate": 1.405831124574025e-05,
"loss": 0.7658,
"step": 7850
},
{
"epoch": 0.6,
"learning_rate": 1.4020446800454375e-05,
"loss": 0.8133,
"step": 7900
},
{
"epoch": 0.6,
"learning_rate": 1.3982582355168497e-05,
"loss": 0.7574,
"step": 7950
},
{
"epoch": 0.61,
"learning_rate": 1.3944717909882621e-05,
"loss": 0.7755,
"step": 8000
},
{
"epoch": 0.61,
"learning_rate": 1.3906853464596745e-05,
"loss": 0.7434,
"step": 8050
},
{
"epoch": 0.61,
"learning_rate": 1.3868989019310868e-05,
"loss": 0.7565,
"step": 8100
},
{
"epoch": 0.62,
"learning_rate": 1.3831124574024992e-05,
"loss": 0.7705,
"step": 8150
},
{
"epoch": 0.62,
"learning_rate": 1.3793260128739114e-05,
"loss": 0.7699,
"step": 8200
},
{
"epoch": 0.62,
"learning_rate": 1.3755395683453238e-05,
"loss": 0.7834,
"step": 8250
},
{
"epoch": 0.63,
"learning_rate": 1.3718288527073081e-05,
"loss": 0.7788,
"step": 8300
},
{
"epoch": 0.63,
"learning_rate": 1.3680424081787202e-05,
"loss": 0.7329,
"step": 8350
},
{
"epoch": 0.64,
"learning_rate": 1.3642559636501328e-05,
"loss": 0.7774,
"step": 8400
},
{
"epoch": 0.64,
"learning_rate": 1.3604695191215448e-05,
"loss": 0.7427,
"step": 8450
},
{
"epoch": 0.64,
"learning_rate": 1.3566830745929574e-05,
"loss": 0.7482,
"step": 8500
},
{
"epoch": 0.65,
"learning_rate": 1.3528966300643698e-05,
"loss": 0.7888,
"step": 8550
},
{
"epoch": 0.65,
"learning_rate": 1.349110185535782e-05,
"loss": 0.7535,
"step": 8600
},
{
"epoch": 0.66,
"learning_rate": 1.3453237410071945e-05,
"loss": 0.7456,
"step": 8650
},
{
"epoch": 0.66,
"learning_rate": 1.3415372964786067e-05,
"loss": 0.7964,
"step": 8700
},
{
"epoch": 0.66,
"learning_rate": 1.3377508519500191e-05,
"loss": 0.7913,
"step": 8750
},
{
"epoch": 0.67,
"learning_rate": 1.3339644074214313e-05,
"loss": 0.7745,
"step": 8800
},
{
"epoch": 0.67,
"learning_rate": 1.3301779628928438e-05,
"loss": 0.7595,
"step": 8850
},
{
"epoch": 0.67,
"learning_rate": 1.3263915183642562e-05,
"loss": 0.7117,
"step": 8900
},
{
"epoch": 0.68,
"learning_rate": 1.3226050738356684e-05,
"loss": 0.8142,
"step": 8950
},
{
"epoch": 0.68,
"learning_rate": 1.3188186293070808e-05,
"loss": 0.7539,
"step": 9000
},
{
"epoch": 0.69,
"learning_rate": 1.315032184778493e-05,
"loss": 0.7135,
"step": 9050
},
{
"epoch": 0.69,
"learning_rate": 1.3112457402499055e-05,
"loss": 0.7883,
"step": 9100
},
{
"epoch": 0.69,
"learning_rate": 1.3074592957213177e-05,
"loss": 0.7335,
"step": 9150
},
{
"epoch": 0.7,
"learning_rate": 1.3036728511927301e-05,
"loss": 0.771,
"step": 9200
},
{
"epoch": 0.7,
"learning_rate": 1.2998864066641425e-05,
"loss": 0.7341,
"step": 9250
},
{
"epoch": 0.7,
"learning_rate": 1.2960999621355547e-05,
"loss": 0.7177,
"step": 9300
},
{
"epoch": 0.71,
"learning_rate": 1.2923135176069672e-05,
"loss": 0.7784,
"step": 9350
},
{
"epoch": 0.71,
"learning_rate": 1.2885270730783794e-05,
"loss": 0.7906,
"step": 9400
},
{
"epoch": 0.72,
"learning_rate": 1.2847406285497918e-05,
"loss": 0.7762,
"step": 9450
},
{
"epoch": 0.72,
"learning_rate": 1.280954184021204e-05,
"loss": 0.7482,
"step": 9500
},
{
"epoch": 0.72,
"learning_rate": 1.2771677394926164e-05,
"loss": 0.7069,
"step": 9550
},
{
"epoch": 0.73,
"learning_rate": 1.2734570238546007e-05,
"loss": 0.761,
"step": 9600
},
{
"epoch": 0.73,
"learning_rate": 1.269670579326013e-05,
"loss": 0.7375,
"step": 9650
},
{
"epoch": 0.73,
"learning_rate": 1.2658841347974254e-05,
"loss": 0.7937,
"step": 9700
},
{
"epoch": 0.74,
"learning_rate": 1.2620976902688378e-05,
"loss": 0.7983,
"step": 9750
},
{
"epoch": 0.74,
"learning_rate": 1.25831124574025e-05,
"loss": 0.7339,
"step": 9800
},
{
"epoch": 0.75,
"learning_rate": 1.2545248012116624e-05,
"loss": 0.7671,
"step": 9850
},
{
"epoch": 0.75,
"learning_rate": 1.2507383566830747e-05,
"loss": 0.7311,
"step": 9900
},
{
"epoch": 0.75,
"learning_rate": 1.2469519121544871e-05,
"loss": 0.7652,
"step": 9950
},
{
"epoch": 0.76,
"learning_rate": 1.2431654676258993e-05,
"loss": 0.7549,
"step": 10000
},
{
"epoch": 0.76,
"learning_rate": 1.2393790230973117e-05,
"loss": 0.7802,
"step": 10050
},
{
"epoch": 0.76,
"learning_rate": 1.2355925785687241e-05,
"loss": 0.7573,
"step": 10100
},
{
"epoch": 0.77,
"learning_rate": 1.2318061340401364e-05,
"loss": 0.695,
"step": 10150
},
{
"epoch": 0.77,
"learning_rate": 1.2280196895115488e-05,
"loss": 0.7214,
"step": 10200
},
{
"epoch": 0.78,
"learning_rate": 1.224233244982961e-05,
"loss": 0.772,
"step": 10250
},
{
"epoch": 0.78,
"learning_rate": 1.2204468004543734e-05,
"loss": 0.7855,
"step": 10300
},
{
"epoch": 0.78,
"learning_rate": 1.2166603559257857e-05,
"loss": 0.7345,
"step": 10350
},
{
"epoch": 0.79,
"learning_rate": 1.212873911397198e-05,
"loss": 0.7416,
"step": 10400
},
{
"epoch": 0.79,
"learning_rate": 1.2090874668686105e-05,
"loss": 0.7723,
"step": 10450
},
{
"epoch": 0.8,
"learning_rate": 1.2053010223400227e-05,
"loss": 0.7446,
"step": 10500
},
{
"epoch": 0.8,
"learning_rate": 1.2015145778114351e-05,
"loss": 0.7657,
"step": 10550
},
{
"epoch": 0.8,
"learning_rate": 1.1977281332828474e-05,
"loss": 0.7944,
"step": 10600
},
{
"epoch": 0.81,
"learning_rate": 1.1939416887542598e-05,
"loss": 0.7598,
"step": 10650
},
{
"epoch": 0.81,
"learning_rate": 1.1901552442256724e-05,
"loss": 0.7212,
"step": 10700
},
{
"epoch": 0.81,
"learning_rate": 1.1863687996970846e-05,
"loss": 0.7564,
"step": 10750
},
{
"epoch": 0.82,
"learning_rate": 1.182582355168497e-05,
"loss": 0.7552,
"step": 10800
},
{
"epoch": 0.82,
"learning_rate": 1.1787959106399092e-05,
"loss": 0.7582,
"step": 10850
},
{
"epoch": 0.83,
"learning_rate": 1.1750094661113217e-05,
"loss": 0.7506,
"step": 10900
},
{
"epoch": 0.83,
"learning_rate": 1.1712230215827339e-05,
"loss": 0.7738,
"step": 10950
},
{
"epoch": 0.83,
"learning_rate": 1.1674365770541463e-05,
"loss": 0.7295,
"step": 11000
},
{
"epoch": 0.84,
"learning_rate": 1.1636501325255587e-05,
"loss": 0.7627,
"step": 11050
},
{
"epoch": 0.84,
"learning_rate": 1.159863687996971e-05,
"loss": 0.7918,
"step": 11100
},
{
"epoch": 0.84,
"learning_rate": 1.1560772434683833e-05,
"loss": 0.7235,
"step": 11150
},
{
"epoch": 0.85,
"learning_rate": 1.1522907989397956e-05,
"loss": 0.7988,
"step": 11200
},
{
"epoch": 0.85,
"learning_rate": 1.148504354411208e-05,
"loss": 0.7646,
"step": 11250
},
{
"epoch": 0.86,
"learning_rate": 1.1447179098826202e-05,
"loss": 0.7794,
"step": 11300
},
{
"epoch": 0.86,
"learning_rate": 1.1409314653540326e-05,
"loss": 0.79,
"step": 11350
},
{
"epoch": 0.86,
"learning_rate": 1.137145020825445e-05,
"loss": 0.7291,
"step": 11400
},
{
"epoch": 0.87,
"learning_rate": 1.1333585762968573e-05,
"loss": 0.7682,
"step": 11450
},
{
"epoch": 0.87,
"learning_rate": 1.1295721317682697e-05,
"loss": 0.8078,
"step": 11500
},
{
"epoch": 0.87,
"learning_rate": 1.125785687239682e-05,
"loss": 0.8196,
"step": 11550
},
{
"epoch": 0.88,
"learning_rate": 1.1219992427110943e-05,
"loss": 0.7173,
"step": 11600
},
{
"epoch": 0.88,
"learning_rate": 1.1182127981825066e-05,
"loss": 0.7908,
"step": 11650
},
{
"epoch": 0.89,
"learning_rate": 1.114426353653919e-05,
"loss": 0.7784,
"step": 11700
},
{
"epoch": 0.89,
"learning_rate": 1.1106399091253316e-05,
"loss": 0.7317,
"step": 11750
},
{
"epoch": 0.89,
"learning_rate": 1.1068534645967436e-05,
"loss": 0.7497,
"step": 11800
},
{
"epoch": 0.9,
"learning_rate": 1.1030670200681562e-05,
"loss": 0.7504,
"step": 11850
},
{
"epoch": 0.9,
"learning_rate": 1.0992805755395683e-05,
"loss": 0.7962,
"step": 11900
},
{
"epoch": 0.9,
"learning_rate": 1.0954941310109809e-05,
"loss": 0.7615,
"step": 11950
},
{
"epoch": 0.91,
"learning_rate": 1.0917076864823933e-05,
"loss": 0.7485,
"step": 12000
},
{
"epoch": 0.91,
"learning_rate": 1.0879969708443772e-05,
"loss": 0.7088,
"step": 12050
},
{
"epoch": 0.92,
"learning_rate": 1.0842105263157896e-05,
"loss": 0.7479,
"step": 12100
},
{
"epoch": 0.92,
"learning_rate": 1.0804240817872019e-05,
"loss": 0.7571,
"step": 12150
},
{
"epoch": 0.92,
"learning_rate": 1.0766376372586143e-05,
"loss": 0.7576,
"step": 12200
},
{
"epoch": 0.93,
"learning_rate": 1.0728511927300267e-05,
"loss": 0.7267,
"step": 12250
},
{
"epoch": 0.93,
"learning_rate": 1.069064748201439e-05,
"loss": 0.7521,
"step": 12300
},
{
"epoch": 0.94,
"learning_rate": 1.0652783036728513e-05,
"loss": 0.7616,
"step": 12350
},
{
"epoch": 0.94,
"learning_rate": 1.0614918591442636e-05,
"loss": 0.7232,
"step": 12400
},
{
"epoch": 0.94,
"learning_rate": 1.057705414615676e-05,
"loss": 0.797,
"step": 12450
},
{
"epoch": 0.95,
"learning_rate": 1.0539189700870882e-05,
"loss": 0.7575,
"step": 12500
},
{
"epoch": 0.95,
"learning_rate": 1.0501325255585006e-05,
"loss": 0.7384,
"step": 12550
},
{
"epoch": 0.95,
"learning_rate": 1.046346081029913e-05,
"loss": 0.7523,
"step": 12600
},
{
"epoch": 0.96,
"learning_rate": 1.0425596365013253e-05,
"loss": 0.7271,
"step": 12650
},
{
"epoch": 0.96,
"learning_rate": 1.0387731919727377e-05,
"loss": 0.7746,
"step": 12700
},
{
"epoch": 0.97,
"learning_rate": 1.03498674744415e-05,
"loss": 0.7463,
"step": 12750
},
{
"epoch": 0.97,
"learning_rate": 1.0312003029155623e-05,
"loss": 0.7234,
"step": 12800
},
{
"epoch": 0.97,
"learning_rate": 1.0274138583869746e-05,
"loss": 0.7462,
"step": 12850
},
{
"epoch": 0.98,
"learning_rate": 1.023627413858387e-05,
"loss": 0.7391,
"step": 12900
},
{
"epoch": 0.98,
"learning_rate": 1.0198409693297995e-05,
"loss": 0.7761,
"step": 12950
},
{
"epoch": 0.98,
"learning_rate": 1.0160545248012116e-05,
"loss": 0.7849,
"step": 13000
},
{
"epoch": 0.99,
"learning_rate": 1.0122680802726242e-05,
"loss": 0.7391,
"step": 13050
},
{
"epoch": 0.99,
"learning_rate": 1.0084816357440364e-05,
"loss": 0.7203,
"step": 13100
},
{
"epoch": 1.0,
"learning_rate": 1.0046951912154488e-05,
"loss": 0.7439,
"step": 13150
},
{
"epoch": 1.0,
"learning_rate": 1.0009087466868612e-05,
"loss": 0.7551,
"step": 13200
},
{
"epoch": 1.0,
"learning_rate": 9.971223021582735e-06,
"loss": 0.6935,
"step": 13250
},
{
"epoch": 1.01,
"learning_rate": 9.933358576296857e-06,
"loss": 0.7541,
"step": 13300
},
{
"epoch": 1.01,
"learning_rate": 9.895494131010981e-06,
"loss": 0.7828,
"step": 13350
},
{
"epoch": 1.01,
"learning_rate": 9.857629685725105e-06,
"loss": 0.7204,
"step": 13400
},
{
"epoch": 1.02,
"learning_rate": 9.819765240439228e-06,
"loss": 0.7374,
"step": 13450
},
{
"epoch": 1.02,
"learning_rate": 9.781900795153352e-06,
"loss": 0.7477,
"step": 13500
},
{
"epoch": 1.03,
"learning_rate": 9.744036349867474e-06,
"loss": 0.7516,
"step": 13550
},
{
"epoch": 1.03,
"learning_rate": 9.706171904581598e-06,
"loss": 0.727,
"step": 13600
},
{
"epoch": 1.03,
"learning_rate": 9.668307459295722e-06,
"loss": 0.7343,
"step": 13650
},
{
"epoch": 1.04,
"learning_rate": 9.630443014009846e-06,
"loss": 0.7739,
"step": 13700
},
{
"epoch": 1.04,
"learning_rate": 9.592578568723969e-06,
"loss": 0.7447,
"step": 13750
},
{
"epoch": 1.04,
"learning_rate": 9.554714123438093e-06,
"loss": 0.7607,
"step": 13800
},
{
"epoch": 1.05,
"learning_rate": 9.516849678152215e-06,
"loss": 0.79,
"step": 13850
},
{
"epoch": 1.05,
"learning_rate": 9.47898523286634e-06,
"loss": 0.7209,
"step": 13900
},
{
"epoch": 1.06,
"learning_rate": 9.441120787580462e-06,
"loss": 0.7745,
"step": 13950
},
{
"epoch": 1.06,
"learning_rate": 9.403256342294586e-06,
"loss": 0.7179,
"step": 14000
},
{
"epoch": 1.06,
"learning_rate": 9.36539189700871e-06,
"loss": 0.774,
"step": 14050
},
{
"epoch": 1.07,
"learning_rate": 9.327527451722832e-06,
"loss": 0.7592,
"step": 14100
},
{
"epoch": 1.07,
"learning_rate": 9.289663006436956e-06,
"loss": 0.7598,
"step": 14150
},
{
"epoch": 1.08,
"learning_rate": 9.25179856115108e-06,
"loss": 0.7665,
"step": 14200
},
{
"epoch": 1.08,
"learning_rate": 9.213934115865203e-06,
"loss": 0.7529,
"step": 14250
},
{
"epoch": 1.08,
"learning_rate": 9.176069670579327e-06,
"loss": 0.6873,
"step": 14300
},
{
"epoch": 1.09,
"learning_rate": 9.138205225293451e-06,
"loss": 0.7574,
"step": 14350
},
{
"epoch": 1.09,
"learning_rate": 9.100340780007573e-06,
"loss": 0.7215,
"step": 14400
},
{
"epoch": 1.09,
"learning_rate": 9.062476334721697e-06,
"loss": 0.7411,
"step": 14450
},
{
"epoch": 1.1,
"learning_rate": 9.02461188943582e-06,
"loss": 0.7627,
"step": 14500
},
{
"epoch": 1.1,
"learning_rate": 8.986747444149944e-06,
"loss": 0.7313,
"step": 14550
},
{
"epoch": 1.11,
"learning_rate": 8.948882998864066e-06,
"loss": 0.765,
"step": 14600
},
{
"epoch": 1.11,
"learning_rate": 8.911018553578192e-06,
"loss": 0.7696,
"step": 14650
},
{
"epoch": 1.11,
"learning_rate": 8.873154108292314e-06,
"loss": 0.7305,
"step": 14700
},
{
"epoch": 1.12,
"learning_rate": 8.835289663006439e-06,
"loss": 0.7653,
"step": 14750
},
{
"epoch": 1.12,
"learning_rate": 8.797425217720561e-06,
"loss": 0.7444,
"step": 14800
},
{
"epoch": 1.12,
"learning_rate": 8.759560772434685e-06,
"loss": 0.7858,
"step": 14850
},
{
"epoch": 1.13,
"learning_rate": 8.721696327148807e-06,
"loss": 0.7634,
"step": 14900
},
{
"epoch": 1.13,
"learning_rate": 8.683831881862931e-06,
"loss": 0.7471,
"step": 14950
},
{
"epoch": 1.14,
"learning_rate": 8.645967436577056e-06,
"loss": 0.7065,
"step": 15000
},
{
"epoch": 1.14,
"learning_rate": 8.608102991291178e-06,
"loss": 0.7659,
"step": 15050
},
{
"epoch": 1.14,
"learning_rate": 8.570238546005302e-06,
"loss": 0.7085,
"step": 15100
},
{
"epoch": 1.15,
"learning_rate": 8.532374100719424e-06,
"loss": 0.7312,
"step": 15150
},
{
"epoch": 1.15,
"learning_rate": 8.494509655433548e-06,
"loss": 0.814,
"step": 15200
},
{
"epoch": 1.15,
"learning_rate": 8.45664521014767e-06,
"loss": 0.7194,
"step": 15250
},
{
"epoch": 1.16,
"learning_rate": 8.418780764861797e-06,
"loss": 0.7266,
"step": 15300
},
{
"epoch": 1.16,
"learning_rate": 8.380916319575919e-06,
"loss": 0.7345,
"step": 15350
},
{
"epoch": 1.17,
"learning_rate": 8.343051874290043e-06,
"loss": 0.7121,
"step": 15400
},
{
"epoch": 1.17,
"learning_rate": 8.305187429004165e-06,
"loss": 0.7716,
"step": 15450
},
{
"epoch": 1.17,
"learning_rate": 8.26732298371829e-06,
"loss": 0.7601,
"step": 15500
},
{
"epoch": 1.18,
"learning_rate": 8.229458538432412e-06,
"loss": 0.736,
"step": 15550
},
{
"epoch": 1.18,
"learning_rate": 8.191594093146536e-06,
"loss": 0.7659,
"step": 15600
},
{
"epoch": 1.19,
"learning_rate": 8.15372964786066e-06,
"loss": 0.7504,
"step": 15650
},
{
"epoch": 1.19,
"learning_rate": 8.115865202574782e-06,
"loss": 0.7367,
"step": 15700
},
{
"epoch": 1.19,
"learning_rate": 8.078000757288907e-06,
"loss": 0.739,
"step": 15750
},
{
"epoch": 1.2,
"learning_rate": 8.040136312003029e-06,
"loss": 0.7421,
"step": 15800
},
{
"epoch": 1.2,
"learning_rate": 8.002271866717153e-06,
"loss": 0.7181,
"step": 15850
},
{
"epoch": 1.2,
"learning_rate": 7.964407421431277e-06,
"loss": 0.7699,
"step": 15900
},
{
"epoch": 1.21,
"learning_rate": 7.926542976145401e-06,
"loss": 0.7725,
"step": 15950
},
{
"epoch": 1.21,
"learning_rate": 7.888678530859524e-06,
"loss": 0.7658,
"step": 16000
},
{
"epoch": 1.22,
"learning_rate": 7.850814085573648e-06,
"loss": 0.6961,
"step": 16050
},
{
"epoch": 1.22,
"learning_rate": 7.813706929193487e-06,
"loss": 0.7598,
"step": 16100
},
{
"epoch": 1.22,
"learning_rate": 7.775842483907611e-06,
"loss": 0.7684,
"step": 16150
},
{
"epoch": 1.23,
"learning_rate": 7.737978038621735e-06,
"loss": 0.7258,
"step": 16200
},
{
"epoch": 1.23,
"learning_rate": 7.700113593335858e-06,
"loss": 0.7602,
"step": 16250
},
{
"epoch": 1.23,
"learning_rate": 7.662249148049982e-06,
"loss": 0.775,
"step": 16300
},
{
"epoch": 1.24,
"learning_rate": 7.624384702764105e-06,
"loss": 0.7311,
"step": 16350
},
{
"epoch": 1.24,
"learning_rate": 7.586520257478228e-06,
"loss": 0.7252,
"step": 16400
},
{
"epoch": 1.25,
"learning_rate": 7.548655812192352e-06,
"loss": 0.7078,
"step": 16450
},
{
"epoch": 1.25,
"learning_rate": 7.5107913669064756e-06,
"loss": 0.7607,
"step": 16500
},
{
"epoch": 1.25,
"learning_rate": 7.472926921620599e-06,
"loss": 0.7462,
"step": 16550
},
{
"epoch": 1.26,
"learning_rate": 7.435062476334722e-06,
"loss": 0.7574,
"step": 16600
},
{
"epoch": 1.26,
"learning_rate": 7.397198031048845e-06,
"loss": 0.7223,
"step": 16650
},
{
"epoch": 1.26,
"learning_rate": 7.3593335857629685e-06,
"loss": 0.7306,
"step": 16700
},
{
"epoch": 1.27,
"learning_rate": 7.321469140477092e-06,
"loss": 0.7638,
"step": 16750
},
{
"epoch": 1.27,
"learning_rate": 7.283604695191217e-06,
"loss": 0.7236,
"step": 16800
},
{
"epoch": 1.28,
"learning_rate": 7.24574024990534e-06,
"loss": 0.7381,
"step": 16850
},
{
"epoch": 1.28,
"learning_rate": 7.207875804619463e-06,
"loss": 0.7257,
"step": 16900
},
{
"epoch": 1.28,
"learning_rate": 7.170011359333586e-06,
"loss": 0.7516,
"step": 16950
},
{
"epoch": 1.29,
"learning_rate": 7.132904202953427e-06,
"loss": 0.7189,
"step": 17000
},
{
"epoch": 1.29,
"learning_rate": 7.095039757667551e-06,
"loss": 0.7672,
"step": 17050
},
{
"epoch": 1.29,
"learning_rate": 7.057175312381674e-06,
"loss": 0.7289,
"step": 17100
},
{
"epoch": 1.3,
"learning_rate": 7.019310867095797e-06,
"loss": 0.7194,
"step": 17150
},
{
"epoch": 1.3,
"learning_rate": 6.9814464218099205e-06,
"loss": 0.6933,
"step": 17200
},
{
"epoch": 1.31,
"learning_rate": 6.943581976524045e-06,
"loss": 0.8152,
"step": 17250
},
{
"epoch": 1.31,
"learning_rate": 6.905717531238168e-06,
"loss": 0.76,
"step": 17300
},
{
"epoch": 1.31,
"learning_rate": 6.867853085952292e-06,
"loss": 0.7271,
"step": 17350
},
{
"epoch": 1.32,
"learning_rate": 6.829988640666415e-06,
"loss": 0.7494,
"step": 17400
},
{
"epoch": 1.32,
"learning_rate": 6.792124195380538e-06,
"loss": 0.7738,
"step": 17450
},
{
"epoch": 1.33,
"learning_rate": 6.754259750094662e-06,
"loss": 0.7071,
"step": 17500
},
{
"epoch": 1.33,
"learning_rate": 6.716395304808785e-06,
"loss": 0.7339,
"step": 17550
},
{
"epoch": 1.33,
"learning_rate": 6.678530859522908e-06,
"loss": 0.7506,
"step": 17600
},
{
"epoch": 1.34,
"learning_rate": 6.640666414237032e-06,
"loss": 0.7121,
"step": 17650
},
{
"epoch": 1.34,
"learning_rate": 6.602801968951155e-06,
"loss": 0.7018,
"step": 17700
},
{
"epoch": 1.34,
"learning_rate": 6.564937523665279e-06,
"loss": 0.7677,
"step": 17750
},
{
"epoch": 1.35,
"learning_rate": 6.527073078379403e-06,
"loss": 0.6925,
"step": 17800
},
{
"epoch": 1.35,
"learning_rate": 6.489208633093526e-06,
"loss": 0.7205,
"step": 17850
},
{
"epoch": 1.36,
"learning_rate": 6.451344187807649e-06,
"loss": 0.7163,
"step": 17900
},
{
"epoch": 1.36,
"learning_rate": 6.413479742521772e-06,
"loss": 0.752,
"step": 17950
},
{
"epoch": 1.36,
"learning_rate": 6.3756152972358965e-06,
"loss": 0.7706,
"step": 18000
},
{
"epoch": 1.37,
"learning_rate": 6.33775085195002e-06,
"loss": 0.7556,
"step": 18050
},
{
"epoch": 1.37,
"learning_rate": 6.299886406664143e-06,
"loss": 0.7626,
"step": 18100
},
{
"epoch": 1.37,
"learning_rate": 6.262021961378266e-06,
"loss": 0.7447,
"step": 18150
},
{
"epoch": 1.38,
"learning_rate": 6.224157516092389e-06,
"loss": 0.741,
"step": 18200
},
{
"epoch": 1.38,
"learning_rate": 6.186293070806513e-06,
"loss": 0.7567,
"step": 18250
},
{
"epoch": 1.39,
"learning_rate": 6.1484286255206375e-06,
"loss": 0.7364,
"step": 18300
},
{
"epoch": 1.39,
"learning_rate": 6.110564180234761e-06,
"loss": 0.739,
"step": 18350
},
{
"epoch": 1.39,
"learning_rate": 6.072699734948884e-06,
"loss": 0.7309,
"step": 18400
},
{
"epoch": 1.4,
"learning_rate": 6.034835289663007e-06,
"loss": 0.7385,
"step": 18450
},
{
"epoch": 1.4,
"learning_rate": 5.9969708443771305e-06,
"loss": 0.7588,
"step": 18500
},
{
"epoch": 1.4,
"learning_rate": 5.959106399091254e-06,
"loss": 0.7442,
"step": 18550
},
{
"epoch": 1.41,
"learning_rate": 5.921241953805377e-06,
"loss": 0.7871,
"step": 18600
},
{
"epoch": 1.41,
"learning_rate": 5.883377508519501e-06,
"loss": 0.7817,
"step": 18650
},
{
"epoch": 1.42,
"learning_rate": 5.845513063233624e-06,
"loss": 0.751,
"step": 18700
},
{
"epoch": 1.42,
"learning_rate": 5.8076486179477474e-06,
"loss": 0.7379,
"step": 18750
},
{
"epoch": 1.42,
"learning_rate": 5.769784172661871e-06,
"loss": 0.7151,
"step": 18800
},
{
"epoch": 1.43,
"learning_rate": 5.731919727375994e-06,
"loss": 0.7989,
"step": 18850
},
{
"epoch": 1.43,
"learning_rate": 5.694055282090117e-06,
"loss": 0.752,
"step": 18900
},
{
"epoch": 1.43,
"learning_rate": 5.656190836804242e-06,
"loss": 0.755,
"step": 18950
},
{
"epoch": 1.44,
"learning_rate": 5.618326391518365e-06,
"loss": 0.7355,
"step": 19000
},
{
"epoch": 1.44,
"learning_rate": 5.5804619462324885e-06,
"loss": 0.668,
"step": 19050
},
{
"epoch": 1.45,
"learning_rate": 5.542597500946612e-06,
"loss": 0.763,
"step": 19100
},
{
"epoch": 1.45,
"learning_rate": 5.504733055660735e-06,
"loss": 0.7369,
"step": 19150
},
{
"epoch": 1.45,
"learning_rate": 5.466868610374858e-06,
"loss": 0.7389,
"step": 19200
},
{
"epoch": 1.46,
"learning_rate": 5.429004165088982e-06,
"loss": 0.7435,
"step": 19250
},
{
"epoch": 1.46,
"learning_rate": 5.3911397198031055e-06,
"loss": 0.7103,
"step": 19300
},
{
"epoch": 1.47,
"learning_rate": 5.353275274517229e-06,
"loss": 0.7,
"step": 19350
},
{
"epoch": 1.47,
"learning_rate": 5.315410829231352e-06,
"loss": 0.7269,
"step": 19400
},
{
"epoch": 1.47,
"learning_rate": 5.277546383945475e-06,
"loss": 0.713,
"step": 19450
},
{
"epoch": 1.48,
"learning_rate": 5.2396819386595984e-06,
"loss": 0.7442,
"step": 19500
},
{
"epoch": 1.48,
"learning_rate": 5.201817493373722e-06,
"loss": 0.731,
"step": 19550
},
{
"epoch": 1.48,
"learning_rate": 5.163953048087847e-06,
"loss": 0.7389,
"step": 19600
},
{
"epoch": 1.49,
"learning_rate": 5.12608860280197e-06,
"loss": 0.7348,
"step": 19650
},
{
"epoch": 1.49,
"learning_rate": 5.088224157516093e-06,
"loss": 0.7607,
"step": 19700
},
{
"epoch": 1.5,
"learning_rate": 5.050359712230216e-06,
"loss": 0.7151,
"step": 19750
},
{
"epoch": 1.5,
"learning_rate": 5.0124952669443395e-06,
"loss": 0.7002,
"step": 19800
},
{
"epoch": 1.5,
"learning_rate": 4.974630821658464e-06,
"loss": 0.7734,
"step": 19850
},
{
"epoch": 1.51,
"learning_rate": 4.936766376372587e-06,
"loss": 0.7506,
"step": 19900
},
{
"epoch": 1.51,
"learning_rate": 4.89890193108671e-06,
"loss": 0.7369,
"step": 19950
},
{
"epoch": 1.51,
"learning_rate": 4.861037485800833e-06,
"loss": 0.7118,
"step": 20000
},
{
"epoch": 1.52,
"learning_rate": 4.8231730405149565e-06,
"loss": 0.7152,
"step": 20050
},
{
"epoch": 1.52,
"learning_rate": 4.78530859522908e-06,
"loss": 0.7366,
"step": 20100
},
{
"epoch": 1.53,
"learning_rate": 4.747444149943204e-06,
"loss": 0.7086,
"step": 20150
},
{
"epoch": 1.53,
"learning_rate": 4.709579704657327e-06,
"loss": 0.7306,
"step": 20200
},
{
"epoch": 1.53,
"learning_rate": 4.67171525937145e-06,
"loss": 0.7266,
"step": 20250
},
{
"epoch": 1.54,
"learning_rate": 4.633850814085574e-06,
"loss": 0.7241,
"step": 20300
},
{
"epoch": 1.54,
"learning_rate": 4.596743657705415e-06,
"loss": 0.708,
"step": 20350
},
{
"epoch": 1.54,
"learning_rate": 4.558879212419539e-06,
"loss": 0.7544,
"step": 20400
},
{
"epoch": 1.55,
"learning_rate": 4.521014767133662e-06,
"loss": 0.739,
"step": 20450
},
{
"epoch": 1.55,
"learning_rate": 4.483150321847785e-06,
"loss": 0.7492,
"step": 20500
},
{
"epoch": 1.56,
"learning_rate": 4.4452858765619086e-06,
"loss": 0.7323,
"step": 20550
},
{
"epoch": 1.56,
"learning_rate": 4.407421431276032e-06,
"loss": 0.7431,
"step": 20600
},
{
"epoch": 1.56,
"learning_rate": 4.369556985990155e-06,
"loss": 0.7302,
"step": 20650
},
{
"epoch": 1.57,
"learning_rate": 4.331692540704279e-06,
"loss": 0.7112,
"step": 20700
},
{
"epoch": 1.57,
"learning_rate": 4.293828095418402e-06,
"loss": 0.7359,
"step": 20750
},
{
"epoch": 1.58,
"learning_rate": 4.2559636501325256e-06,
"loss": 0.7274,
"step": 20800
},
{
"epoch": 1.58,
"learning_rate": 4.21809920484665e-06,
"loss": 0.7307,
"step": 20850
},
{
"epoch": 1.58,
"learning_rate": 4.180234759560773e-06,
"loss": 0.7339,
"step": 20900
},
{
"epoch": 1.59,
"learning_rate": 4.142370314274896e-06,
"loss": 0.7743,
"step": 20950
},
{
"epoch": 1.59,
"learning_rate": 4.104505868989019e-06,
"loss": 0.7424,
"step": 21000
},
{
"epoch": 1.59,
"learning_rate": 4.066641423703143e-06,
"loss": 0.7152,
"step": 21050
},
{
"epoch": 1.6,
"learning_rate": 4.028776978417267e-06,
"loss": 0.707,
"step": 21100
},
{
"epoch": 1.6,
"learning_rate": 3.99091253313139e-06,
"loss": 0.7538,
"step": 21150
},
{
"epoch": 1.61,
"learning_rate": 3.953048087845514e-06,
"loss": 0.7293,
"step": 21200
},
{
"epoch": 1.61,
"learning_rate": 3.915183642559637e-06,
"loss": 0.7524,
"step": 21250
},
{
"epoch": 1.61,
"learning_rate": 3.87731919727376e-06,
"loss": 0.7938,
"step": 21300
},
{
"epoch": 1.62,
"learning_rate": 3.839454751987884e-06,
"loss": 0.7266,
"step": 21350
},
{
"epoch": 1.62,
"learning_rate": 3.8015903067020073e-06,
"loss": 0.7566,
"step": 21400
},
{
"epoch": 1.62,
"learning_rate": 3.7637258614161305e-06,
"loss": 0.713,
"step": 21450
},
{
"epoch": 1.63,
"learning_rate": 3.725861416130254e-06,
"loss": 0.7546,
"step": 21500
},
{
"epoch": 1.63,
"learning_rate": 3.6879969708443774e-06,
"loss": 0.6955,
"step": 21550
},
{
"epoch": 1.64,
"learning_rate": 3.6501325255585006e-06,
"loss": 0.7084,
"step": 21600
},
{
"epoch": 1.64,
"learning_rate": 3.6122680802726247e-06,
"loss": 0.7447,
"step": 21650
},
{
"epoch": 1.64,
"learning_rate": 3.574403634986748e-06,
"loss": 0.7173,
"step": 21700
},
{
"epoch": 1.65,
"learning_rate": 3.536539189700871e-06,
"loss": 0.7324,
"step": 21750
},
{
"epoch": 1.65,
"learning_rate": 3.498674744414995e-06,
"loss": 0.7561,
"step": 21800
},
{
"epoch": 1.65,
"learning_rate": 3.460810299129118e-06,
"loss": 0.7505,
"step": 21850
},
{
"epoch": 1.66,
"learning_rate": 3.4229458538432413e-06,
"loss": 0.7337,
"step": 21900
},
{
"epoch": 1.66,
"learning_rate": 3.3850814085573645e-06,
"loss": 0.7753,
"step": 21950
},
{
"epoch": 1.67,
"learning_rate": 3.3472169632714886e-06,
"loss": 0.7703,
"step": 22000
},
{
"epoch": 1.67,
"learning_rate": 3.309352517985612e-06,
"loss": 0.7384,
"step": 22050
},
{
"epoch": 1.67,
"learning_rate": 3.271488072699735e-06,
"loss": 0.7759,
"step": 22100
},
{
"epoch": 1.68,
"learning_rate": 3.2336236274138587e-06,
"loss": 0.7446,
"step": 22150
},
{
"epoch": 1.68,
"learning_rate": 3.195759182127982e-06,
"loss": 0.7437,
"step": 22200
},
{
"epoch": 1.68,
"learning_rate": 3.157894736842105e-06,
"loss": 0.7817,
"step": 22250
},
{
"epoch": 1.69,
"learning_rate": 3.1200302915562292e-06,
"loss": 0.7396,
"step": 22300
},
{
"epoch": 1.69,
"learning_rate": 3.0821658462703525e-06,
"loss": 0.743,
"step": 22350
},
{
"epoch": 1.7,
"learning_rate": 3.0443014009844757e-06,
"loss": 0.7597,
"step": 22400
},
{
"epoch": 1.7,
"learning_rate": 3.0064369556985994e-06,
"loss": 0.6844,
"step": 22450
},
{
"epoch": 1.7,
"learning_rate": 2.9685725104127226e-06,
"loss": 0.7737,
"step": 22500
},
{
"epoch": 1.71,
"learning_rate": 2.930708065126846e-06,
"loss": 0.7024,
"step": 22550
},
{
"epoch": 1.71,
"learning_rate": 2.89284361984097e-06,
"loss": 0.7136,
"step": 22600
},
{
"epoch": 1.72,
"learning_rate": 2.854979174555093e-06,
"loss": 0.7482,
"step": 22650
},
{
"epoch": 1.72,
"learning_rate": 2.8171147292692164e-06,
"loss": 0.7608,
"step": 22700
},
{
"epoch": 1.72,
"learning_rate": 2.7792502839833396e-06,
"loss": 0.7442,
"step": 22750
},
{
"epoch": 1.73,
"learning_rate": 2.7413858386974632e-06,
"loss": 0.6687,
"step": 22800
},
{
"epoch": 1.73,
"learning_rate": 2.703521393411587e-06,
"loss": 0.7594,
"step": 22850
},
{
"epoch": 1.73,
"learning_rate": 2.66565694812571e-06,
"loss": 0.7042,
"step": 22900
},
{
"epoch": 1.74,
"learning_rate": 2.6277925028398338e-06,
"loss": 0.7556,
"step": 22950
},
{
"epoch": 1.74,
"learning_rate": 2.589928057553957e-06,
"loss": 0.7357,
"step": 23000
},
{
"epoch": 1.75,
"learning_rate": 2.5520636122680802e-06,
"loss": 0.7292,
"step": 23050
},
{
"epoch": 1.75,
"learning_rate": 2.5141991669822043e-06,
"loss": 0.6899,
"step": 23100
},
{
"epoch": 1.75,
"learning_rate": 2.4763347216963275e-06,
"loss": 0.729,
"step": 23150
},
{
"epoch": 1.76,
"learning_rate": 2.4384702764104508e-06,
"loss": 0.7473,
"step": 23200
},
{
"epoch": 1.76,
"learning_rate": 2.400605831124574e-06,
"loss": 0.7264,
"step": 23250
},
{
"epoch": 1.76,
"learning_rate": 2.3627413858386977e-06,
"loss": 0.7314,
"step": 23300
},
{
"epoch": 1.77,
"learning_rate": 2.3256342294585385e-06,
"loss": 0.6819,
"step": 23350
},
{
"epoch": 1.77,
"learning_rate": 2.287769784172662e-06,
"loss": 0.7637,
"step": 23400
},
{
"epoch": 1.78,
"learning_rate": 2.2499053388867854e-06,
"loss": 0.727,
"step": 23450
},
{
"epoch": 1.78,
"learning_rate": 2.2120408936009086e-06,
"loss": 0.7044,
"step": 23500
},
{
"epoch": 1.78,
"learning_rate": 2.1741764483150323e-06,
"loss": 0.6926,
"step": 23550
},
{
"epoch": 1.79,
"learning_rate": 2.136312003029156e-06,
"loss": 0.7406,
"step": 23600
},
{
"epoch": 1.79,
"learning_rate": 2.098447557743279e-06,
"loss": 0.7572,
"step": 23650
},
{
"epoch": 1.79,
"learning_rate": 2.060583112457403e-06,
"loss": 0.7239,
"step": 23700
},
{
"epoch": 1.8,
"learning_rate": 2.022718667171526e-06,
"loss": 0.7353,
"step": 23750
},
{
"epoch": 1.8,
"learning_rate": 1.9848542218856497e-06,
"loss": 0.7287,
"step": 23800
},
{
"epoch": 1.81,
"learning_rate": 1.946989776599773e-06,
"loss": 0.7461,
"step": 23850
},
{
"epoch": 1.81,
"learning_rate": 1.909125331313896e-06,
"loss": 0.7489,
"step": 23900
},
{
"epoch": 1.81,
"learning_rate": 1.8712608860280198e-06,
"loss": 0.7156,
"step": 23950
},
{
"epoch": 1.82,
"learning_rate": 1.8333964407421435e-06,
"loss": 0.6765,
"step": 24000
},
{
"epoch": 1.82,
"learning_rate": 1.7955319954562667e-06,
"loss": 0.7496,
"step": 24050
},
{
"epoch": 1.82,
"learning_rate": 1.7576675501703901e-06,
"loss": 0.7269,
"step": 24100
},
{
"epoch": 1.83,
"learning_rate": 1.7198031048845134e-06,
"loss": 0.728,
"step": 24150
},
{
"epoch": 1.83,
"learning_rate": 1.681938659598637e-06,
"loss": 0.7914,
"step": 24200
},
{
"epoch": 1.84,
"learning_rate": 1.6440742143127605e-06,
"loss": 0.7419,
"step": 24250
},
{
"epoch": 1.84,
"learning_rate": 1.6062097690268837e-06,
"loss": 0.7509,
"step": 24300
},
{
"epoch": 1.84,
"learning_rate": 1.5683453237410074e-06,
"loss": 0.7372,
"step": 24350
},
{
"epoch": 1.85,
"learning_rate": 1.5304808784551308e-06,
"loss": 0.7159,
"step": 24400
},
{
"epoch": 1.85,
"learning_rate": 1.4926164331692542e-06,
"loss": 0.7272,
"step": 24450
},
{
"epoch": 1.86,
"learning_rate": 1.4547519878833777e-06,
"loss": 0.735,
"step": 24500
},
{
"epoch": 1.86,
"learning_rate": 1.416887542597501e-06,
"loss": 0.715,
"step": 24550
},
{
"epoch": 1.86,
"learning_rate": 1.3790230973116246e-06,
"loss": 0.7507,
"step": 24600
},
{
"epoch": 1.87,
"learning_rate": 1.341158652025748e-06,
"loss": 0.7723,
"step": 24650
},
{
"epoch": 1.87,
"learning_rate": 1.3032942067398712e-06,
"loss": 0.7658,
"step": 24700
},
{
"epoch": 1.87,
"learning_rate": 1.2654297614539949e-06,
"loss": 0.7727,
"step": 24750
},
{
"epoch": 1.88,
"learning_rate": 1.2275653161681181e-06,
"loss": 0.748,
"step": 24800
},
{
"epoch": 1.88,
"learning_rate": 1.1897008708822416e-06,
"loss": 0.7135,
"step": 24850
},
{
"epoch": 1.89,
"learning_rate": 1.1518364255963652e-06,
"loss": 0.76,
"step": 24900
},
{
"epoch": 1.89,
"learning_rate": 1.1139719803104887e-06,
"loss": 0.7439,
"step": 24950
},
{
"epoch": 1.89,
"learning_rate": 1.0761075350246119e-06,
"loss": 0.7496,
"step": 25000
},
{
"epoch": 1.9,
"learning_rate": 1.0382430897387353e-06,
"loss": 0.7631,
"step": 25050
},
{
"epoch": 1.9,
"learning_rate": 1.000378644452859e-06,
"loss": 0.7033,
"step": 25100
},
{
"epoch": 1.9,
"learning_rate": 9.625141991669822e-07,
"loss": 0.717,
"step": 25150
},
{
"epoch": 1.91,
"learning_rate": 9.246497538811057e-07,
"loss": 0.7329,
"step": 25200
},
{
"epoch": 1.91,
"learning_rate": 8.867853085952291e-07,
"loss": 0.7065,
"step": 25250
},
{
"epoch": 1.92,
"learning_rate": 8.489208633093526e-07,
"loss": 0.702,
"step": 25300
},
{
"epoch": 1.92,
"learning_rate": 8.110564180234761e-07,
"loss": 0.7161,
"step": 25350
},
{
"epoch": 1.92,
"learning_rate": 7.731919727375994e-07,
"loss": 0.725,
"step": 25400
},
{
"epoch": 1.93,
"learning_rate": 7.353275274517229e-07,
"loss": 0.7256,
"step": 25450
},
{
"epoch": 1.93,
"learning_rate": 6.974630821658464e-07,
"loss": 0.7395,
"step": 25500
},
{
"epoch": 1.93,
"learning_rate": 6.595986368799697e-07,
"loss": 0.794,
"step": 25550
},
{
"epoch": 1.94,
"learning_rate": 6.217341915940932e-07,
"loss": 0.7349,
"step": 25600
},
{
"epoch": 1.94,
"learning_rate": 5.838697463082166e-07,
"loss": 0.7394,
"step": 25650
},
{
"epoch": 1.95,
"learning_rate": 5.460053010223401e-07,
"loss": 0.7345,
"step": 25700
},
{
"epoch": 1.95,
"learning_rate": 5.081408557364635e-07,
"loss": 0.6888,
"step": 25750
},
{
"epoch": 1.95,
"learning_rate": 4.702764104505869e-07,
"loss": 0.7347,
"step": 25800
},
{
"epoch": 1.96,
"learning_rate": 4.324119651647104e-07,
"loss": 0.7246,
"step": 25850
},
{
"epoch": 1.96,
"learning_rate": 3.945475198788338e-07,
"loss": 0.7438,
"step": 25900
},
{
"epoch": 1.97,
"learning_rate": 3.5668307459295723e-07,
"loss": 0.7108,
"step": 25950
},
{
"epoch": 1.97,
"learning_rate": 3.1881862930708067e-07,
"loss": 0.7208,
"step": 26000
},
{
"epoch": 1.97,
"learning_rate": 2.809541840212041e-07,
"loss": 0.7633,
"step": 26050
},
{
"epoch": 1.98,
"learning_rate": 2.4308973873532756e-07,
"loss": 0.7707,
"step": 26100
},
{
"epoch": 1.98,
"learning_rate": 2.05225293449451e-07,
"loss": 0.7524,
"step": 26150
},
{
"epoch": 1.98,
"learning_rate": 1.6736084816357444e-07,
"loss": 0.707,
"step": 26200
},
{
"epoch": 1.99,
"learning_rate": 1.2949640287769786e-07,
"loss": 0.7751,
"step": 26250
},
{
"epoch": 1.99,
"learning_rate": 9.163195759182128e-08,
"loss": 0.7672,
"step": 26300
},
{
"epoch": 2.0,
"learning_rate": 5.376751230594472e-08,
"loss": 0.6998,
"step": 26350
},
{
"epoch": 2.0,
"learning_rate": 1.5903067020068158e-08,
"loss": 0.7069,
"step": 26400
},
{
"epoch": 2.0,
"step": 26410,
"total_flos": 6.728145843607406e+16,
"train_loss": 0.7582937260732105,
"train_runtime": 12885.6962,
"train_samples_per_second": 65.589,
"train_steps_per_second": 2.05
}
],
"max_steps": 26410,
"num_train_epochs": 2,
"total_flos": 6.728145843607406e+16,
"trial_name": null,
"trial_params": null
}